Blob Blame History Raw
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 3.3
Patch-mainline: Never, SUSE-Xen specific

 This patch contains the differences between 3.2 and 3.3.

Automatically created from "patch-3.3" by xen-port-patches.py
Acked-by: jbeulich@suse.com

3.4/arch/x86/include/mach-xen/asm/i387.h (moved to fpu-internal.h)
3.13/drivers/acpi/processor_core.c (needs re-implementation)
3.13/drivers/acpi/processor_perflib.c (acpi_processor_load_module() disappeared)

--- a/arch/x86/entry/entry_64_compat-xen.S
+++ b/arch/x86/entry/entry_64_compat-xen.S
@@ -14,6 +14,7 @@
 #include <asm/segment.h>
 #include <asm/irqflags.h>
 #include <linux/linkage.h>
+#include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -22,8 +23,6 @@
 
 	.section .entry.text, "ax"
 
-#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
-
 	.macro IA32_ARG_FIXUP noebp=0
 	movl	%edi,%r8d
 	.if \noebp
@@ -128,7 +127,7 @@ ENTRY(ia32_sysenter_target)
 	CFI_RESTORE	rcx
  	movl	%ebp,%ebp		/* zero extension */
 	movl	%eax,%eax
-	movl	48-THREAD_SIZE+TI_sysenter_return(%rsp),%r10d
+	movl	TI_sysenter_return+THREAD_INFO(%rsp,8*6-KERNEL_STACK_OFFSET),%r10d
 	movl	$__USER32_DS,40(%rsp)
 	movq	%rbp,32(%rsp)
 	movl	$__USER32_CS,16(%rsp)
@@ -142,9 +141,8 @@ ENTRY(ia32_sysenter_target)
  	.section __ex_table,"a"
  	.quad 1b,ia32_badarg
  	.previous	
-	GET_THREAD_INFO(%r10)
-	orl    $TS_COMPAT,TI_status(%r10)
-	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz  sysenter_tracesys
 	jmp .Lia32_check_call
 
@@ -156,7 +154,7 @@ ENTRY(ia32_sysenter_target)
 	movl %ebx,%edx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%esi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
@@ -219,9 +217,8 @@ ENTRY(ia32_cstar_target)
 	.section __ex_table,"a"
 	.quad 1b,ia32_badarg
 	.previous	
-	GET_THREAD_INFO(%r10)
-	orl   $TS_COMPAT,TI_status(%r10)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
 	ja  ia32_badsys
@@ -238,7 +235,7 @@ cstar_auditsys:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
@@ -302,9 +299,8 @@ ENTRY(ia32_syscall)
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
 	SAVE_ARGS 0,1,0
-	GET_THREAD_INFO(%r10)
-	orl   $TS_COMPAT,TI_status(%r10)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz ia32_tracesys
 .Lia32_check_call:
 	cmpq $(IA32_NR_syscalls-1),%rax
@@ -320,7 +316,7 @@ ia32_sysret:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz sysenter_auditsys
 #endif
 ia32_tracesys:			 
@@ -341,14 +337,11 @@ ia32_badsys:
 	movq $-ENOSYS,%rax
 	jmp ia32_sysret
 
-quiet_ni_syscall:
-	movq $-ENOSYS,%rax
-	ret
 	CFI_ENDPROC
 	
 	.macro PTREGSCALL label, func, arg
-	.globl \label
-\label:
+	ALIGN
+GLOBAL(\label)
 	leaq \func(%rip),%rax
 	leaq -ARGOFFSET+8(%rsp),\arg	/* 8 for return address */
 	jmp  ia32_ptregs_common	
@@ -365,7 +358,8 @@ quiet_ni_syscall:
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
 	PTREGSCALL stub32_iopl, sys_iopl, %rsi
 
-ENTRY(ia32_ptregs_common)
+	ALIGN
+ia32_ptregs_common:
 	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
@@ -387,357 +381,3 @@ ENTRY(ia32_ptregs_common)
 	jmp  ia32_sysret	/* misbalances the return cache */
 	CFI_ENDPROC
 END(ia32_ptregs_common)
-
-	.section .rodata,"a"
-	.align 8
-ia32_sys_call_table:
-	.quad sys_restart_syscall
-	.quad sys_exit
-	.quad stub32_fork
-	.quad sys_read
-	.quad sys_write
-	.quad compat_sys_open		/* 5 */
-	.quad sys_close
-	.quad sys32_waitpid
-	.quad sys_creat
-	.quad sys_link
-	.quad sys_unlink		/* 10 */
-	.quad stub32_execve
-	.quad sys_chdir
-	.quad compat_sys_time
-	.quad sys_mknod
-	.quad sys_chmod		/* 15 */
-	.quad sys_lchown16
-	.quad quiet_ni_syscall			/* old break syscall holder */
-	.quad sys_stat
-	.quad sys32_lseek
-	.quad sys_getpid		/* 20 */
-	.quad compat_sys_mount	/* mount  */
-	.quad sys_oldumount	/* old_umount  */
-	.quad sys_setuid16
-	.quad sys_getuid16
-	.quad compat_sys_stime	/* stime */		/* 25 */
-	.quad compat_sys_ptrace	/* ptrace */
-	.quad sys_alarm
-	.quad sys_fstat	/* (old)fstat */
-	.quad sys_pause
-	.quad compat_sys_utime	/* 30 */
-	.quad quiet_ni_syscall	/* old stty syscall holder */
-	.quad quiet_ni_syscall	/* old gtty syscall holder */
-	.quad sys_access
-	.quad sys_nice	
-	.quad quiet_ni_syscall	/* 35 */	/* old ftime syscall holder */
-	.quad sys_sync
-	.quad sys32_kill
-	.quad sys_rename
-	.quad sys_mkdir
-	.quad sys_rmdir		/* 40 */
-	.quad sys_dup
-	.quad sys_pipe
-	.quad compat_sys_times
-	.quad quiet_ni_syscall			/* old prof syscall holder */
-	.quad sys_brk		/* 45 */
-	.quad sys_setgid16
-	.quad sys_getgid16
-	.quad sys_signal
-	.quad sys_geteuid16
-	.quad sys_getegid16	/* 50 */
-	.quad sys_acct
-	.quad sys_umount			/* new_umount */
-	.quad quiet_ni_syscall			/* old lock syscall holder */
-	.quad compat_sys_ioctl
-	.quad compat_sys_fcntl64		/* 55 */
-	.quad quiet_ni_syscall			/* old mpx syscall holder */
-	.quad sys_setpgid
-	.quad quiet_ni_syscall			/* old ulimit syscall holder */
-	.quad sys_olduname
-	.quad sys_umask		/* 60 */
-	.quad sys_chroot
-	.quad compat_sys_ustat
-	.quad sys_dup2
-	.quad sys_getppid
-	.quad sys_getpgrp		/* 65 */
-	.quad sys_setsid
-	.quad sys32_sigaction
-	.quad sys_sgetmask
-	.quad sys_ssetmask
-	.quad sys_setreuid16	/* 70 */
-	.quad sys_setregid16
-	.quad sys32_sigsuspend
-	.quad compat_sys_sigpending
-	.quad sys_sethostname
-	.quad compat_sys_setrlimit	/* 75 */
-	.quad compat_sys_old_getrlimit	/* old_getrlimit */
-	.quad compat_sys_getrusage
-	.quad compat_sys_gettimeofday
-	.quad compat_sys_settimeofday
-	.quad sys_getgroups16	/* 80 */
-	.quad sys_setgroups16
-	.quad compat_sys_old_select
-	.quad sys_symlink
-	.quad sys_lstat
-	.quad sys_readlink		/* 85 */
-	.quad sys_uselib
-	.quad sys_swapon
-	.quad sys_reboot
-	.quad compat_sys_old_readdir
-	.quad sys32_mmap		/* 90 */
-	.quad sys_munmap
-	.quad sys_truncate
-	.quad sys_ftruncate
-	.quad sys_fchmod
-	.quad sys_fchown16		/* 95 */
-	.quad sys_getpriority
-	.quad sys_setpriority
-	.quad quiet_ni_syscall			/* old profil syscall holder */
-	.quad compat_sys_statfs
-	.quad compat_sys_fstatfs		/* 100 */
-	.quad sys_ioperm
-	.quad compat_sys_socketcall
-	.quad sys_syslog
-	.quad compat_sys_setitimer
-	.quad compat_sys_getitimer	/* 105 */
-	.quad compat_sys_newstat
-	.quad compat_sys_newlstat
-	.quad compat_sys_newfstat
-	.quad sys_uname
-	.quad stub32_iopl		/* 110 */
-	.quad sys_vhangup
-	.quad quiet_ni_syscall	/* old "idle" system call */
-	.quad sys32_vm86_warning	/* vm86old */ 
-	.quad compat_sys_wait4
-	.quad sys_swapoff		/* 115 */
-	.quad compat_sys_sysinfo
-	.quad sys32_ipc
-	.quad sys_fsync
-	.quad stub32_sigreturn
-	.quad stub32_clone		/* 120 */
-	.quad sys_setdomainname
-	.quad sys_newuname
-	.quad sys_modify_ldt
-	.quad compat_sys_adjtimex
-	.quad sys32_mprotect		/* 125 */
-	.quad compat_sys_sigprocmask
-	.quad quiet_ni_syscall		/* create_module */
-	.quad sys_init_module
-	.quad sys_delete_module
-	.quad quiet_ni_syscall		/* 130  get_kernel_syms */
-	.quad sys32_quotactl
-	.quad sys_getpgid
-	.quad sys_fchdir
-	.quad quiet_ni_syscall	/* bdflush */
-	.quad sys_sysfs		/* 135 */
-	.quad sys_personality
-	.quad quiet_ni_syscall	/* for afs_syscall */
-	.quad sys_setfsuid16
-	.quad sys_setfsgid16
-	.quad sys_llseek		/* 140 */
-	.quad compat_sys_getdents
-	.quad compat_sys_select
-	.quad sys_flock
-	.quad sys_msync
-	.quad compat_sys_readv		/* 145 */
-	.quad compat_sys_writev
-	.quad sys_getsid
-	.quad sys_fdatasync
-	.quad compat_sys_sysctl	/* sysctl */
-	.quad sys_mlock		/* 150 */
-	.quad sys_munlock
-	.quad sys_mlockall
-	.quad sys_munlockall
-	.quad sys_sched_setparam
-	.quad sys_sched_getparam   /* 155 */
-	.quad sys_sched_setscheduler
-	.quad sys_sched_getscheduler
-	.quad sys_sched_yield
-	.quad sys_sched_get_priority_max
-	.quad sys_sched_get_priority_min  /* 160 */
-	.quad sys32_sched_rr_get_interval
-	.quad compat_sys_nanosleep
-	.quad sys_mremap
-	.quad sys_setresuid16
-	.quad sys_getresuid16	/* 165 */
-	.quad sys32_vm86_warning	/* vm86 */ 
-	.quad quiet_ni_syscall	/* query_module */
-	.quad sys_poll
-	.quad quiet_ni_syscall /* old nfsservctl */
-	.quad sys_setresgid16	/* 170 */
-	.quad sys_getresgid16
-	.quad sys_prctl
-	.quad stub32_rt_sigreturn
-	.quad sys32_rt_sigaction
-	.quad sys32_rt_sigprocmask	/* 175 */
-	.quad sys32_rt_sigpending
-	.quad compat_sys_rt_sigtimedwait
-	.quad sys32_rt_sigqueueinfo
-	.quad sys_rt_sigsuspend
-	.quad sys32_pread		/* 180 */
-	.quad sys32_pwrite
-	.quad sys_chown16
-	.quad sys_getcwd
-	.quad sys_capget
-	.quad sys_capset
-	.quad stub32_sigaltstack
-	.quad sys32_sendfile
-	.quad quiet_ni_syscall		/* streams1 */
-	.quad quiet_ni_syscall		/* streams2 */
-	.quad stub32_vfork            /* 190 */
-	.quad compat_sys_getrlimit
-	.quad sys_mmap_pgoff
-	.quad sys32_truncate64
-	.quad sys32_ftruncate64
-	.quad sys32_stat64		/* 195 */
-	.quad sys32_lstat64
-	.quad sys32_fstat64
-	.quad sys_lchown
-	.quad sys_getuid
-	.quad sys_getgid		/* 200 */
-	.quad sys_geteuid
-	.quad sys_getegid
-	.quad sys_setreuid
-	.quad sys_setregid
-	.quad sys_getgroups	/* 205 */
-	.quad sys_setgroups
-	.quad sys_fchown
-	.quad sys_setresuid
-	.quad sys_getresuid
-	.quad sys_setresgid	/* 210 */
-	.quad sys_getresgid
-	.quad sys_chown
-	.quad sys_setuid
-	.quad sys_setgid
-	.quad sys_setfsuid		/* 215 */
-	.quad sys_setfsgid
-	.quad sys_pivot_root
-	.quad sys_mincore
-	.quad sys_madvise
-	.quad compat_sys_getdents64	/* 220 getdents64 */
-	.quad compat_sys_fcntl64	
-	.quad quiet_ni_syscall		/* tux */
-	.quad quiet_ni_syscall    	/* security */
-	.quad sys_gettid	
-	.quad sys32_readahead	/* 225 */
-	.quad sys_setxattr
-	.quad sys_lsetxattr
-	.quad sys_fsetxattr
-	.quad sys_getxattr
-	.quad sys_lgetxattr	/* 230 */
-	.quad sys_fgetxattr
-	.quad sys_listxattr
-	.quad sys_llistxattr
-	.quad sys_flistxattr
-	.quad sys_removexattr	/* 235 */
-	.quad sys_lremovexattr
-	.quad sys_fremovexattr
-	.quad sys_tkill
-	.quad sys_sendfile64 
-	.quad compat_sys_futex		/* 240 */
-	.quad compat_sys_sched_setaffinity
-	.quad compat_sys_sched_getaffinity
-	.quad sys_set_thread_area
-	.quad sys_get_thread_area
-	.quad compat_sys_io_setup	/* 245 */
-	.quad sys_io_destroy
-	.quad compat_sys_io_getevents
-	.quad compat_sys_io_submit
-	.quad sys_io_cancel
-	.quad sys32_fadvise64		/* 250 */
-	.quad quiet_ni_syscall 	/* free_huge_pages */
-	.quad sys_exit_group
-	.quad sys32_lookup_dcookie
-	.quad sys_epoll_create
-	.quad sys_epoll_ctl		/* 255 */
-	.quad sys_epoll_wait
-	.quad sys_remap_file_pages
-	.quad sys_set_tid_address
-	.quad compat_sys_timer_create
-	.quad compat_sys_timer_settime	/* 260 */
-	.quad compat_sys_timer_gettime
-	.quad sys_timer_getoverrun
-	.quad sys_timer_delete
-	.quad compat_sys_clock_settime
-	.quad compat_sys_clock_gettime	/* 265 */
-	.quad compat_sys_clock_getres
-	.quad compat_sys_clock_nanosleep
-	.quad compat_sys_statfs64
-	.quad compat_sys_fstatfs64
-	.quad sys_tgkill		/* 270 */
-	.quad compat_sys_utimes
-	.quad sys32_fadvise64_64
-	.quad quiet_ni_syscall	/* sys_vserver */
-	.quad sys_mbind
-	.quad compat_sys_get_mempolicy	/* 275 */
-	.quad sys_set_mempolicy
-	.quad compat_sys_mq_open
-	.quad sys_mq_unlink
-	.quad compat_sys_mq_timedsend
-	.quad compat_sys_mq_timedreceive	/* 280 */
-	.quad compat_sys_mq_notify
-	.quad compat_sys_mq_getsetattr
-	.quad compat_sys_kexec_load	/* reserved for kexec */
-	.quad compat_sys_waitid
-	.quad quiet_ni_syscall		/* 285: sys_altroot */
-	.quad sys_add_key
-	.quad sys_request_key
-	.quad sys_keyctl
-	.quad sys_ioprio_set
-	.quad sys_ioprio_get		/* 290 */
-	.quad sys_inotify_init
-	.quad sys_inotify_add_watch
-	.quad sys_inotify_rm_watch
-	.quad sys_migrate_pages
-	.quad compat_sys_openat		/* 295 */
-	.quad sys_mkdirat
-	.quad sys_mknodat
-	.quad sys_fchownat
-	.quad compat_sys_futimesat
-	.quad sys32_fstatat		/* 300 */
-	.quad sys_unlinkat
-	.quad sys_renameat
-	.quad sys_linkat
-	.quad sys_symlinkat
-	.quad sys_readlinkat		/* 305 */
-	.quad sys_fchmodat
-	.quad sys_faccessat
-	.quad compat_sys_pselect6
-	.quad compat_sys_ppoll
-	.quad sys_unshare		/* 310 */
-	.quad compat_sys_set_robust_list
-	.quad compat_sys_get_robust_list
-	.quad sys_splice
-	.quad sys32_sync_file_range
-	.quad sys_tee			/* 315 */
-	.quad compat_sys_vmsplice
-	.quad compat_sys_move_pages
-	.quad sys_getcpu
-	.quad sys_epoll_pwait
-	.quad compat_sys_utimensat	/* 320 */
-	.quad compat_sys_signalfd
-	.quad sys_timerfd_create
-	.quad sys_eventfd
-	.quad sys32_fallocate
-	.quad compat_sys_timerfd_settime	/* 325 */
-	.quad compat_sys_timerfd_gettime
-	.quad compat_sys_signalfd4
-	.quad sys_eventfd2
-	.quad sys_epoll_create1
-	.quad sys_dup3				/* 330 */
-	.quad sys_pipe2
-	.quad sys_inotify_init1
-	.quad compat_sys_preadv
-	.quad compat_sys_pwritev
-	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
-	.quad sys_perf_event_open
-	.quad compat_sys_recvmmsg
-	.quad sys_fanotify_init
-	.quad sys32_fanotify_mark
-	.quad sys_prlimit64		/* 340 */
-	.quad sys_name_to_handle_at
-	.quad compat_sys_open_by_handle_at
-	.quad compat_sys_clock_adjtime
-	.quad sys_syncfs
-	.quad compat_sys_sendmmsg	/* 345 */
-	.quad sys_setns
-	.quad compat_sys_process_vm_readv
-	.quad compat_sys_process_vm_writev
-ia32_syscall_end:
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -93,7 +93,7 @@ extern void aout_dump_debugregs(struct u
 
 extern void hw_breakpoint_restore(void);
 
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_IDT)
 DECLARE_PER_CPU(int, debug_stack_usage);
 static inline void debug_stack_usage_inc(void)
 {
--- a/arch/x86/include/mach-xen/asm/desc.h
+++ b/arch/x86/include/mach-xen/asm/desc.h
@@ -36,6 +36,8 @@ static inline void fill_ldt(struct desc_
 #ifndef CONFIG_X86_NO_IDT
 extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
+extern struct desc_ptr nmi_idt_descr;
+extern gate_desc nmi_idt_table[];
 #endif
 
 struct gdt_page {
@@ -332,6 +334,16 @@ static inline void set_desc_limit(struct
 }
 
 #ifndef CONFIG_X86_NO_IDT
+#ifdef CONFIG_X86_64
+static inline void set_nmi_gate(int gate, void *addr)
+{
+	gate_desc s;
+
+	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
+	write_idt_entry(nmi_idt_table, gate, &s);
+}
+#endif
+
 static inline void _set_gate(int gate, unsigned type, void *addr,
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
--- a/arch/x86/include/mach-xen/asm/fixmap.h
+++ b/arch/x86/include/mach-xen/asm/fixmap.h
@@ -124,7 +124,7 @@ enum fixed_addresses {
 #endif
 	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
 	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
-#ifdef	CONFIG_X86_MRST
+#ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
 	__end_of_permanent_fixed_addresses,
--- a/arch/x86/include/mach-xen/asm/pci.h
+++ b/arch/x86/include/mach-xen/asm/pci.h
@@ -118,19 +118,28 @@ static inline void x86_teardown_msi_irq(
 {
 	x86_msi.teardown_msi_irq(irq);
 }
+static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+	x86_msi.restore_msi_irqs(dev, irq);
+}
 #define arch_setup_msi_irqs x86_setup_msi_irqs
 #define arch_teardown_msi_irqs x86_teardown_msi_irqs
 #define arch_teardown_msi_irq x86_teardown_msi_irq
+#define arch_restore_msi_irqs x86_restore_msi_irqs
 /* implemented in arch/x86/kernel/apic/io_apic. */
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
+void native_restore_msi_irqs(struct pci_dev *dev, int irq);
 /* default to the implementation in drivers/lib/msi.c */
 #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+#define HAVE_DEFAULT_MSI_RESTORE_IRQS
 void default_teardown_msi_irqs(struct pci_dev *dev);
+void default_restore_msi_irqs(struct pci_dev *dev, int irq);
 #else
 #define native_setup_msi_irqs		NULL
 #define native_teardown_msi_irq		NULL
 #define default_teardown_msi_irqs	NULL
+#define default_restore_msi_irqs	NULL
 #endif
 
 #define PCI_DMA_BUS_IS_PHYS 0
--- a/arch/x86/include/mach-xen/asm/pgtable.h
+++ b/arch/x86/include/mach-xen/asm/pgtable.h
@@ -738,7 +738,7 @@ static inline void ptep_set_wrprotect(st
 		set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
 }
 
-#define flush_tlb_fix_spurious_fault(vma, address)
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
 
 #define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
 
--- a/arch/x86/include/mach-xen/asm/processor.h
+++ b/arch/x86/include/mach-xen/asm/processor.h
@@ -109,7 +109,7 @@ struct cpuinfo_x86 {
 	u16			initial_apicid;
 #endif
 	u16			x86_clflush_size;
-#ifdef CONFIG_X86_HT
+#ifndef CONFIG_XEN
 	/* number of cores as seen by the OS: */
 	u16			booted_cores;
 	/* Physical processor id: */
@@ -119,10 +119,8 @@ struct cpuinfo_x86 {
 	/* Compute unit id */
 	u8			compute_unit_id;
 #endif
-#ifdef CONFIG_SMP
 	/* Index into per_cpu list: */
 	u16			cpu_index;
-#endif
 #ifndef CONFIG_XEN
 	u32			microcode;
 #endif
@@ -394,6 +392,8 @@ union thread_xstate {
 };
 
 struct fpu {
+	unsigned int last_cpu;
+	unsigned int has_fpu;
 	union thread_xstate *state;
 };
 
--- a/arch/x86/include/mach-xen/asm/smp.h
+++ b/arch/x86/include/mach-xen/asm/smp.h
@@ -231,5 +231,11 @@ extern int hard_smp_processor_id(void);
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+#ifdef CONFIG_DEBUG_NMI_SELFTEST
+extern void nmi_selftest(void);
+#else
+#define nmi_selftest() do { } while (0)
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_SMP_H */
--- a/arch/x86/include/mach-xen/asm/spinlock.h
+++ b/arch/x86/include/mach-xen/asm/spinlock.h
@@ -137,19 +137,8 @@ static __always_inline void __ticket_spi
 {
 	register struct __raw_tickets new;
 
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
-# define UNLOCK_SUFFIX(n) "%z" #n
-#elif TICKET_SHIFT == 8
-# define UNLOCK_SUFFIX(n) "b"
-#elif TICKET_SHIFT == 16
-# define UNLOCK_SUFFIX(n) "w"
-#endif
-	asm volatile(UNLOCK_LOCK_PREFIX "inc" UNLOCK_SUFFIX(0) " %0"
-		     : "+m" (lock->tickets.head)
-		     :
-		     : "memory", "cc");
+	__add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
 #if !defined(XEN_SPINLOCK_SOURCE) || !CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-# undef UNLOCK_SUFFIX
 # undef UNLOCK_LOCK_PREFIX
 #endif
 	new = ACCESS_ONCE(lock->tickets);
--- a/arch/x86/kernel/apic/io_apic-xen.c
+++ b/arch/x86/kernel/apic/io_apic-xen.c
@@ -2502,8 +2502,8 @@ asmlinkage void smp_irq_move_cleanup_int
 	unsigned vector, me;
 
 	ack_APIC_irq();
-	exit_idle();
 	irq_enter();
+	exit_idle();
 
 	me = smp_processor_id();
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -3035,6 +3035,10 @@ static inline void __init check_timer(vo
 	}
 	local_irq_disable();
 	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+	if (x2apic_preenabled)
+		apic_printk(APIC_QUIET, KERN_INFO
+			    "Perhaps problem with the pre-enabled x2apic mode\n"
+			    "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
 	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 		"report.  Then try booting with the 'noapic' option.\n");
 out:
--- a/arch/x86/kernel/cpu/common-xen.c
+++ b/arch/x86/kernel/cpu/common-xen.c
@@ -725,9 +725,7 @@ static void __init early_identify_cpu(st
 	if (this_cpu->c_early_init)
 		this_cpu->c_early_init(c);
 
-#ifdef CONFIG_SMP
 	c->cpu_index = 0;
-#endif
 	filter_cpuid_features(c, false);
 
 	setup_smep(c);
@@ -814,10 +812,7 @@ static void __cpuinit generic_identify(s
 		c->apicid = c->initial_apicid;
 # endif
 #endif
-
-#ifdef CONFIG_X86_HT
 		c->phys_proc_id = c->initial_apicid;
-#endif
 	}
 #endif
 
@@ -1086,6 +1081,8 @@ __setup("clearcpuid=", setup_disablecpui
 #ifdef CONFIG_X86_64
 #ifndef CONFIG_X86_NO_IDT
 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
+				    (unsigned long) nmi_idt_table };
 #endif
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1116,6 +1113,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
 
 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
+
 #ifndef CONFIG_X86_NO_TSS
 /*
  * Special IST stacks which the CPU switches to when it calls
@@ -1177,10 +1177,34 @@ unsigned long kernel_eflags;
 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 #endif
 
+#ifndef CONFIG_X86_NO_IDT
+static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
+DEFINE_PER_CPU(int, debug_stack_usage);
+
+int is_debug_stack(unsigned long addr)
+{
+	return __get_cpu_var(debug_stack_usage) ||
+		(addr <= __get_cpu_var(debug_stack_addr) &&
+		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
+}
+
+void debug_stack_set_zero(void)
+{
+	load_idt((const struct desc_ptr *)&nmi_idt_descr);
+}
+
+void debug_stack_reset(void)
+{
+	load_idt((const struct desc_ptr *)&idt_descr);
+}
+#endif
+
 #else	/* CONFIG_X86_64 */
 
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
@@ -1227,6 +1251,17 @@ static void dbg_restore_debug_regs(void)
 #define dbg_restore_debug_regs()
 #endif /* ! CONFIG_KGDB */
 
+#ifndef CONFIG_XEN
+/*
+ * Prints an error where the NUMA and configured core-number mismatch and the
+ * platform didn't override this to fix it up
+ */
+void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+	pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
+}
+#endif
+
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
  * initialized (naturally) in the bootstrap process, such as the GDT
@@ -1307,6 +1342,10 @@ void __cpuinit cpu_init(void)
 			estacks += exception_stack_sizes[v];
 			oist->ist[v] = t->x86_tss.ist[v] =
 					(unsigned long)estacks;
+#ifndef CONFIG_X86_NO_IDT
+			if (v == DEBUG_STACK-1)
+				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
+#endif
 		}
 	}
 
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -94,6 +94,7 @@ static int mce_raise_notify(unsigned int
 	return NMI_HANDLED;
 }
 
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
 static void mce_irq_ipi(void *info)
 {
 	int cpu = smp_processor_id();
@@ -105,6 +106,7 @@ static void mce_irq_ipi(void *info)
 		raise_exception(m, NULL);
 	}
 }
+#endif
 
 /* Inject mce on current CPU */
 static int raise_local(void)
--- a/arch/x86/kernel/e820-xen.c
+++ b/arch/x86/kernel/e820-xen.c
@@ -19,6 +19,7 @@
 #include <linux/acpi.h>
 #include <linux/firmware-map.h>
 #include <linux/memblock.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
@@ -250,22 +251,38 @@ static void __init _e820_print_map(const
  *	   ____________________33__
  *	   ______________________4_
  */
+struct change_member {
+	struct e820entry *pbios; /* pointer to original bios entry */
+	unsigned long long addr; /* address for this change point */
+};
+
+static int __init cpcompare(const void *a, const void *b)
+{
+	struct change_member * const *app = a, * const *bpp = b;
+	const struct change_member *ap = *app, *bp = *bpp;
+
+	/*
+	 * Inputs are pointers to two elements of change_point[].  If their
+	 * addresses are unequal, their difference dominates.  If the addresses
+	 * are equal, then consider one that represents the end of its region
+	 * to be greater than one that does not.
+	 */
+	if (ap->addr != bp->addr)
+		return ap->addr > bp->addr ? 1 : -1;
+
+	return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr);
+}
 
 int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
 			     u32 *pnr_map)
 {
-	struct change_member {
-		struct e820entry *pbios; /* pointer to original bios entry */
-		unsigned long long addr; /* address for this change point */
-	};
 	static struct change_member change_point_list[2*E820_X_MAX] __initdata;
 	static struct change_member *change_point[2*E820_X_MAX] __initdata;
 	static struct e820entry *overlap_list[E820_X_MAX] __initdata;
 	static struct e820entry new_bios[E820_X_MAX] __initdata;
-	struct change_member *change_tmp;
 	unsigned long current_type, last_type;
 	unsigned long long last_addr;
-	int chgidx, still_changing;
+	int chgidx;
 	int overlap_entries;
 	int new_bios_entry;
 	int old_nr, new_nr, chg_nr;
@@ -306,35 +323,7 @@ int __init sanitize_e820_map(struct e820
 	chg_nr = chgidx;
 
 	/* sort change-point list by memory addresses (low -> high) */
-	still_changing = 1;
-	while (still_changing)	{
-		still_changing = 0;
-		for (i = 1; i < chg_nr; i++)  {
-			unsigned long long curaddr, lastaddr;
-			unsigned long long curpbaddr, lastpbaddr;
-
-			curaddr = change_point[i]->addr;
-			lastaddr = change_point[i - 1]->addr;
-			curpbaddr = change_point[i]->pbios->addr;
-			lastpbaddr = change_point[i - 1]->pbios->addr;
-
-			/*
-			 * swap entries, when:
-			 *
-			 * curaddr > lastaddr or
-			 * curaddr == lastaddr and curaddr == curpbaddr and
-			 * lastaddr != lastpbaddr
-			 */
-			if (curaddr < lastaddr ||
-			    (curaddr == lastaddr && curaddr == curpbaddr &&
-			     lastaddr != lastpbaddr)) {
-				change_tmp = change_point[i];
-				change_point[i] = change_point[i-1];
-				change_point[i-1] = change_tmp;
-				still_changing = 1;
-			}
-		}
-	}
+	sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL);
 
 	/* create a new bios memory map, removing overlaps */
 	overlap_entries = 0;	 /* number of entries in the overlap table */
@@ -769,7 +758,7 @@ void __init e820_mark_nosave_regions(uns
 }
 #endif
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_ACPI
 /**
  * Mark ACPI NVS memory region, so that we can save/restore it during
  * hibernation and the subsequent resume.
@@ -782,7 +771,7 @@ static int __init e820_mark_nvs_memory(v
 		struct e820entry *ei = &e820.map[i];
 
 		if (ei->type == E820_NVS)
-			suspend_nvs_register(ei->addr, ei->size);
+			acpi_nvs_register(ei->addr, ei->size);
 	}
 
 	return 0;
@@ -795,47 +784,29 @@ core_initcall(e820_mark_nvs_memory);
 /*
  * pre allocated 4k and reserved it in memblock and e820_saved
  */
-u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
+u64 __init early_reserve_e820(u64 size, u64 align)
 {
-	u64 size = 0;
 	u64 addr;
-	u64 start;
 #ifdef CONFIG_XEN
-	unsigned int order = get_order(sizet);
+	unsigned int order = get_order(size);
 	int rc;
 	unsigned long max_initmap_pfn;
 
 	if (!is_initial_xendomain())
 		return 0;
-	sizet = PAGE_SIZE << order;
+	size = PAGE_SIZE << order;
 	if (align < PAGE_SIZE)
 		align = PAGE_SIZE;
 #endif
-	for (start = startt; ; start += size) {
-		start = memblock_x86_find_in_range_size(start, &size, align);
-		if (start == MEMBLOCK_ERROR)
-			return 0;
-		if (size >= sizet)
-			break;
+	addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+	if (addr) {
+		e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
+		printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
+		update_e820_saved();
 	}
-
-#ifdef CONFIG_X86_32
-	if (start >= MAXMEM)
-		return 0;
-	if (start + size > MAXMEM)
-		size = MAXMEM - start;
-#endif
 #ifdef CONFIG_XEN
-	if ((start >> PAGE_SHIFT) >= xen_start_info->nr_pages)
-		return 0;
-	if (PFN_UP(start + size) > xen_start_info->nr_pages)
-		size = ((u64)xen_start_info->nr_pages << PAGE_SHIFT) - start;
-#endif
-
-	addr = round_down(start + size - sizet, align);
-	if (addr < start)
+	else
 		return 0;
-#ifdef CONFIG_XEN
 	max_initmap_pfn = ALIGN(PFN_UP(__pa(xen_start_info->pt_base))
 				       + xen_start_info->nr_pt_frames
 				       + 1 + (1 << (19 - PAGE_SHIFT)),
@@ -859,10 +830,6 @@ u64 __init early_reserve_e820(u64 startt
 	if (rc)
 		return 0;
 #endif
-	memblock_x86_reserve_range(addr, addr + sizet, "new next");
-	e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
-	printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
-	update_e820_saved();
 
 	return addr;
 }
@@ -1223,7 +1190,7 @@ void __init memblock_x86_fill(void)
 	 * We are safe to enable resizing, beause memblock_x86_fill()
 	 * is rather later for x86
 	 */
-	memblock_can_resize = 1;
+	memblock_allow_resize();
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
@@ -1238,22 +1205,36 @@ void __init memblock_x86_fill(void)
 		memblock_add(ei->addr, ei->size);
 	}
 
-	memblock_analyze();
 	memblock_dump_all();
 }
 
 void __init memblock_find_dma_reserve(void)
 {
 #if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
-	u64 free_size_pfn;
-	u64 mem_size_pfn;
+	u64 nr_pages = 0, nr_free_pages = 0;
+	unsigned long start_pfn, end_pfn;
+	phys_addr_t start, end;
+	int i;
+	u64 u;
+
 	/*
 	 * need to find out used area below MAX_DMA_PFN
 	 * need to use memblock to get free size in [0, MAX_DMA_PFN]
 	 * at first, and assume boot_mem will not take below MAX_DMA_PFN
 	 */
-	mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
-	free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
-	set_dma_reserve(mem_size_pfn - free_size_pfn);
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+		start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
+		end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
+		nr_pages += end_pfn - start_pfn;
+	}
+
+	for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
+		start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
+		end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
+		if (start_pfn < end_pfn)
+			nr_free_pages += end_pfn - start_pfn;
+	}
+
+	set_dma_reserve(nr_pages - nr_free_pages);
 #endif
 }
--- a/arch/x86/kernel/early_printk-xen.c
+++ b/arch/x86/kernel/early_printk-xen.c
@@ -272,14 +272,14 @@ static int __init setup_early_printk(cha
 		if (!strncmp(buf, "xen", 3))
 			early_console_register(&xenboot_console, keep);
 #endif
-#ifdef CONFIG_EARLY_PRINTK_MRST
+#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
 		if (!strncmp(buf, "mrst", 4)) {
 			mrst_early_console_init();
 			early_console_register(&early_mrst_console, keep);
 		}
 
 		if (!strncmp(buf, "hsu", 3)) {
-			hsu_early_console_init();
+			hsu_early_console_init(buf + 3);
 			early_console_register(&early_hsu_console, keep);
 		}
 #endif
--- a/arch/x86/entry/entry_32-xen.S
+++ b/arch/x86/entry/entry_32-xen.S
@@ -42,6 +42,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/err.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
 #include <asm/errno.h>
@@ -82,8 +83,6 @@
  * enough to patch inline, increasing performance.
  */
 
-#define nr_syscalls ((syscall_table_size)/4)
-
 /* Pseudo-eflags. */
 NMI_MASK	= 0x80000000
 
@@ -427,7 +426,7 @@ sysenter_past_esp:
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz sysenter_audit
 sysenter_do_call:
-	cmpl $(nr_syscalls), %eax
+	cmpl $(NR_syscalls), %eax
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
@@ -459,7 +458,7 @@ sysenter_audit:
 	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%edx			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	pushl_cfi %ebx
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
 	jmp sysenter_do_call
@@ -470,11 +469,10 @@ sysexit_audit:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
 	movl %eax,%edx		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%eax		/* zero-extend that */
-	inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
@@ -533,7 +531,7 @@ ENTRY(system_call)
 					# system call tracing in operation / emulation
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz syscall_trace_entry
-	cmpl $(nr_syscalls), %eax
+	cmpl $(NR_syscalls), %eax
 	jae syscall_badsys
 syscall_call:
 	call *sys_call_table(,%eax,4)
@@ -694,6 +692,8 @@ work_notifysig:				# deal with pending s
 	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	xorl %edx, %edx
 	call do_notify_resume
 	jmp resume_userspace_sig
@@ -707,6 +707,8 @@ work_notifysig_v86:
 #else
 	movl %esp, %eax
 #endif
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	xorl %edx, %edx
 	call do_notify_resume
 	jmp resume_userspace_sig
@@ -719,7 +721,7 @@ syscall_trace_entry:
 	movl %esp, %eax
 	call syscall_trace_enter
 	/* What it returned is what we'll actually use.  */
-	cmpl $(nr_syscalls), %eax
+	cmpl $(NR_syscalls), %eax
 	jnae syscall_call
 	jmp syscall_exit
 END(syscall_trace_entry)
@@ -759,29 +761,28 @@ END(syscall_badsys)
  * System calls that need a pt_regs pointer.
  */
 #define PTREGSCALL0(name) \
-	ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ;  \
 	leal 4(%esp),%eax; \
-	jmp sys_##name;
+	jmp sys_##name; \
+ENDPROC(ptregs_##name)
 
 #define PTREGSCALL1(name) \
-	ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
 	leal 4(%esp),%edx; \
 	movl (PT_EBX+4)(%esp),%eax; \
-	jmp sys_##name;
+	jmp sys_##name; \
+ENDPROC(ptregs_##name)
 
 #define PTREGSCALL2(name) \
-	ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
 	leal 4(%esp),%ecx; \
 	movl (PT_ECX+4)(%esp),%edx; \
 	movl (PT_EBX+4)(%esp),%eax; \
-	jmp sys_##name;
+	jmp sys_##name; \
+ENDPROC(ptregs_##name)
 
 #define PTREGSCALL3(name) \
-	ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
 	CFI_STARTPROC; \
 	leal 4(%esp),%eax; \
 	pushl_cfi %eax; \
@@ -806,8 +807,7 @@ PTREGSCALL2(vm86)
 PTREGSCALL1(vm86old)
 
 /* Clone is an oddball.  The 4th arg is in %edi */
-	ALIGN;
-ptregs_clone:
+ENTRY(ptregs_clone)
 	CFI_STARTPROC
 	leal 4(%esp),%eax
 	pushl_cfi %eax
@@ -1363,7 +1363,7 @@ ENTRY(ia32pv_cstar_target)
 	GET_THREAD_INFO(%ebp)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz cstar_trace_entry
-	cmpl $nr_syscalls,%eax
+	cmpl $NR_syscalls,%eax
 	jae cstar_badsys
 .Lcstar_call:
 	btl %eax,cstar_special
@@ -1378,7 +1378,7 @@ ENTRY(ia32pv_cstar_target)
 	movl PT_ECX(%esp),%ecx
 	movl %ecx,PT_EBP(%esp)		# put user EBP back in place
 	jmp syscall_call
-cstar_set_tif:
+GLOBAL(cstar_set_tif)
 	movl $cstar_clear_tif,(%esp)	# replace return address
 	LOCK_PREFIX
 	orl $_TIF_CSTAR,TI_flags(%ebp)
@@ -1390,7 +1390,7 @@ cstar_clear_tif:
 	jmp .Lcstar_exit
 cstar_trace_entry:
 	movl $-ENOSYS,PT_EAX(%esp)
-	cmpl $nr_syscalls,%eax
+	cmpl $NR_syscalls,%eax
 	jae 1f
 	btl %eax,cstar_special
 	jc .Lcstar_trace_special
@@ -1401,7 +1401,7 @@ cstar_trace_entry:
 	LOCK_PREFIX
 	andl $~_TIF_CSTAR,TI_flags(%ebp)
 	/* What it returned is what we'll actually use.  */
-	cmpl $nr_syscalls,%eax
+	cmpl $NR_syscalls,%eax
 	jb .Lcstar_call
 	jmp .Lcstar_exit
 .Lcstar_trace_special:
@@ -1410,7 +1410,7 @@ cstar_trace_entry:
 	movl %ecx,PT_EBP(%esp)		# put user EBP back in place
 	call syscall_trace_enter
 	/* What it returned is what we'll actually use.  */
-	cmpl $nr_syscalls,%eax
+	cmpl $NR_syscalls,%eax
 	jb syscall_call
 	jmp syscall_exit
 cstar_badsys:
@@ -1438,19 +1438,14 @@ ENTRY(cstar_ret_from_fork)
 	jmp ret_from_fork
 	CFI_ENDPROC
 END(cstar_ret_from_fork)
-#endif /* TIF_CSTAR */
-
-.section .rodata,"a"
-#include "syscall_table_32.S"
 
-syscall_table_size=(.-sys_call_table)
-
-#ifdef TIF_CSTAR
 #include <asm/unistd.h>
+.pushsection .rodata,"a"
+.balign 4
 cstar_special:
 nr=0
 mask=0
-.rept nr_syscalls+31
+.rept NR_syscalls+31
  .irp n, __NR_sigreturn, __NR_rt_sigreturn
   .if nr == \n
    mask = mask | (1 << (\n & 31))
@@ -1462,15 +1457,7 @@ mask=0
   mask = 0
  .endif
 .endr
-#define	sys_call_table cstar_call_table
-#define	ptregs_fork cstar_set_tif
-#define	ptregs_clone cstar_set_tif
-#define	ptregs_vfork cstar_set_tif
-#include "syscall_table_32.S"
-#undef	sys_call_table
-#undef	ptregs_fork
-#undef	ptregs_clone
-#undef	ptregs_vfork
+.popsection
 #endif /* TIF_CSTAR */
 
 /*
--- a/arch/x86/entry/entry_64-xen.S
+++ b/arch/x86/entry/entry_64-xen.S
@@ -58,6 +58,7 @@
 #include <asm/processor-flags.h>
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
+#include <linux/err.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/features.h>
 
@@ -212,7 +213,7 @@ NMI_MASK = 0x80000000
 	/*CFI_REL_OFFSET	ss,0*/
 	pushq_cfi %rax /* rsp */
 	CFI_REL_OFFSET	rsp,0
-	pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
+	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
 	/*CFI_REL_OFFSET	rflags,0*/
 	pushq_cfi $__KERNEL_CS /* cs */
 	/*CFI_REL_OFFSET	cs,0*/
@@ -448,8 +449,11 @@ ENTRY(ret_from_fork)
 	RESTORE_REST
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	je   int_ret_from_sys_call
-
+	jnz  1f
+	/* Need to set the proper %ss (not NULL) for ring 3 iretq */
+	movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
+	jmp  retint_restore_args
+1:
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
 
@@ -494,8 +498,7 @@ ENTRY(system_call)
 	INTR_FRAME start=2 offset=2*8
 	SAVE_ARGS -8,0
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
-	GET_THREAD_INFO(%rcx)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz tracesys
 system_call_fastpath:
 	cmpq $__NR_syscall_max,%rax
@@ -512,10 +515,9 @@ ret_from_sys_call:
 	/* edi:	flagmask */
 sysret_check:
 	LOCKDEP_SYS_EXIT
-	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	movl TI_flags(%rcx),%edx
+	movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
 	andl %edi,%edx
 	jnz  sysret_careful
 	CFI_REMEMBER_STATE
@@ -564,7 +566,7 @@ badsys:
 #ifdef CONFIG_AUDITSYSCALL
 	/*
 	 * Fast path for syscall audit without full syscall trace.
-	 * We just call audit_syscall_entry() directly, and then
+	 * We just call __audit_syscall_entry() directly, and then
 	 * jump back to the normal fast path.
 	 */
 auditsys:
@@ -574,22 +576,21 @@ auditsys:
 	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
 	movq %rax,%rsi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	LOAD_ARGS 0		/* reload call-clobbered registers */
 	jmp system_call_fastpath
 
 	/*
-	 * Return fast path for syscall audit.  Call audit_syscall_exit()
+	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
 	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
 	 * masked off.
 	 */
 sysret_audit:
 	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
-	cmpq $0,%rsi		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	jmp sysret_check
 #endif	/* CONFIG_AUDITSYSCALL */
@@ -597,7 +598,7 @@ sysret_audit:
 	/* Do syscall tracing */
 tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz auditsys
 #endif
 	SAVE_REST
@@ -626,12 +627,6 @@ tracesys:
 GLOBAL(int_ret_from_sys_call)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testb $3,CS-ARGOFFSET(%rsp)
-        jnz 1f
-        /* Need to set the proper %ss (not NULL) for ring 3 iretq */
-        movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
-        jmp retint_restore_args   # retrun from ring3 kernel
-1:              
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	mask to check */
 GLOBAL(int_with_check)
@@ -1236,13 +1231,28 @@ ENTRY(error_exit)
 END(error_exit)
 
 
+#define extern #
+#include <asm-generic/percpu.h>
+
+.pushsection PER_CPU_BASE_SECTION, "aw", @progbits
+in_NMI:	.byte	0
+.popsection
+
 do_nmi_callback:
 	CFI_STARTPROC
 	addq $8, %rsp
 	CFI_ENDPROC
 	DEFAULT_FRAME
+	orb  $1, PER_CPU_VAR(in_NMI)
+	js   1f
+0:
+	movb $0x80, PER_CPU_VAR(in_NMI)
 	call do_nmi
+	movl $0x80, %eax
+	cmpxchgb %ah, PER_CPU_VAR(in_NMI)
+	jne  0b
 	orl  $NMI_MASK,EFLAGS(%rsp)
+1:
 	RESTORE_REST
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
--- a/arch/x86/kernel/head-xen.c
+++ b/arch/x86/kernel/head-xen.c
@@ -54,7 +54,7 @@ void __init reserve_ebda_region(void)
 		lowmem = 0x9f000;
 
 	/* reserve all memory between lowmem and the 1MB mark */
-	memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
+	memblock_reserve(lowmem, 0x100000 - lowmem);
 }
 #else /* CONFIG_XEN */
 #include <linux/export.h>
@@ -106,11 +106,10 @@ void __init xen_start_kernel(void)
 	WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
 				     VMASST_TYPE_writable_pagetables));
 
-	memblock_init();
-	memblock_x86_reserve_range(PAGE_ALIGN(__pa_symbol(&_end)),
-				   __pa(xen_start_info->pt_base)
-				   + PFN_PHYS(xen_start_info->nr_pt_frames),
-				   "Xen provided");
+	memblock_reserve(PAGE_ALIGN(__pa_symbol(&_end)),
+			 __pa(xen_start_info->pt_base)
+			 + PFN_PHYS(xen_start_info->nr_pt_frames)
+			 - PAGE_ALIGN(__pa_symbol(&_end)));
 
 	x86_configure_nx();
 
--- a/arch/x86/kernel/head32-xen.c
+++ b/arch/x86/kernel/head32-xen.c
@@ -47,9 +47,8 @@ void __init i386_start_kernel(void)
 	BUG_ON(pte_index(hypervisor_virt_start));
 #endif
 
-	memblock_init();
-
-	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_reserve(__pa_symbol(&_text),
+			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
 #ifndef CONFIG_XEN
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -59,7 +58,7 @@ void __init i386_start_kernel(void)
 		u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 		u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 		u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
-		memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
+		memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
 	}
 #endif
 
--- a/arch/x86/kernel/head64-xen.c
+++ b/arch/x86/kernel/head64-xen.c
@@ -117,9 +117,8 @@ void __init x86_64_start_reservations(ch
 {
 	copy_bootdata(__va(real_mode_data));
 
-	memblock_init();
-
-	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_reserve(__pa_symbol(&_text),
+			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
 	/*
 	 * At this point everything still needed from the boot loader
--- a/arch/x86/kernel/irq-xen.c
+++ b/arch/x86/kernel/irq-xen.c
@@ -78,6 +78,12 @@ int arch_show_interrupts(struct seq_file
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
 	seq_printf(p, "  IRQ work interrupts\n");
+#ifndef CONFIG_XEN
+	seq_printf(p, "%*s: ", prec, "RTR");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
+	seq_printf(p, "  APIC ICR read retries\n");
+#endif
 #endif
 #ifndef CONFIG_XEN
 	if (x86_platform_ipi_callback) {
@@ -149,6 +155,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->apic_perf_irqs;
 	sum += irq_stats(cpu)->apic_irq_work_irqs;
+	sum += irq_stats(cpu)->icr_read_retry_count;
 #endif
 #ifndef CONFIG_XEN
 	if (x86_platform_ipi_callback)
@@ -200,8 +207,8 @@ unsigned int __irq_entry do_IRQ(struct p
 	unsigned vector = ~regs->orig_ax;
 	unsigned irq;
 
-	exit_idle();
 	irq_enter();
+	exit_idle();
 
 	irq = __this_cpu_read(vector_irq[vector]);
 
@@ -228,10 +235,10 @@ void smp_x86_platform_ipi(struct pt_regs
 
 	ack_APIC_irq();
 
-	exit_idle();
-
 	irq_enter();
 
+	exit_idle();
+
 	inc_irq_stat(x86_platform_ipis);
 
 	if (x86_platform_ipi_callback)
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -33,7 +33,9 @@ static inline void stack_overflow_check(
 {
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 #define STACK_TOP_MARGIN	128
+#ifndef CONFIG_X86_NO_TSS
 	struct orig_ist *oist;
+#endif
 	u64 irq_stack_top, irq_stack_bottom;
 	u64 estack_top, estack_bottom;
 	u64 curbase = (u64)task_stack_page(current);
@@ -52,11 +54,15 @@ static inline void stack_overflow_check(
 	if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
 		return;
 
+#ifndef CONFIG_X86_NO_TSS
 	oist = this_cpu_ptr(&orig_ist);
 	estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
 	estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
 	if (regs->sp >= estack_top && regs->sp <= estack_bottom)
 		return;
+#else
+	estack_top = estack_bottom = 0;
+#endif
 
 	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
 		current->comm, curbase, regs->sp,
--- a/arch/x86/kernel/cpu/microcode/core-xen.c
+++ b/arch/x86/kernel/cpu/microcode/core-xen.c
@@ -186,16 +186,21 @@ static int request_microcode(const char 
 static int __init microcode_init(void)
 {
 	const struct cpuinfo_x86 *c = &boot_cpu_data;
-	char buf[32];
+	char buf[36];
 	const char *fw_name = buf;
 	int error;
 
 	if (c->x86_vendor == X86_VENDOR_INTEL)
 		snprintf(buf, sizeof(buf), "intel-ucode/%02x-%02x-%02x",
 			 c->x86, c->x86_model, c->x86_mask);
-	else if (c->x86_vendor == X86_VENDOR_AMD)
-		fw_name = "amd-ucode/microcode_amd.bin";
-	else {
+	else if (c->x86_vendor == X86_VENDOR_AMD) {
+		if (c->x86 >= 0x15)
+			snprintf(buf, sizeof(buf),
+				 "amd-ucode/microcode_amd_fam%xh.bin",
+				 c->x86);
+		else
+			fw_name = "amd-ucode/microcode_amd.bin";
+	} else {
 		pr_err("no support for this CPU vendor\n");
 		return -ENODEV;
 	}
--- a/arch/x86/kernel/mpparse-xen.c
+++ b/arch/x86/kernel/mpparse-xen.c
@@ -591,9 +591,7 @@ void __init default_get_smp_config(unsig
 #ifndef CONFIG_XEN
 static void __init smp_reserve_memory(struct mpf_intel *mpf)
 {
-	unsigned long size = get_mpc_size(mpf->physptr);
-
-	memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
+	memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
 }
 #endif
 
@@ -626,7 +624,7 @@ static int __init smp_scan_config(unsign
 			       mpf, (u64)virt_to_phys(mpf));
 
 			mem = virt_to_phys(mpf);
-			memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
+			memblock_reserve(mem, sizeof(*mpf));
 			if (mpf->physptr)
 				smp_reserve_memory(mpf);
 #else
@@ -874,10 +872,8 @@ early_param("alloc_mptable", parse_alloc
 
 void __init early_reserve_e820_mpc_new(void)
 {
-	if (enable_update_mptable && alloc_mptable) {
-		u64 startt = 0;
-		mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
-	}
+	if (enable_update_mptable && alloc_mptable)
+		mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
 }
 
 static int __init update_mp_table(void)
--- a/arch/x86/kernel/pci-dma-xen.c
+++ b/arch/x86/kernel/pci-dma-xen.c
@@ -44,6 +44,15 @@ int iommu_detected __read_mostly = 0;
  * guests and not for driver dma translation.
  */
 int iommu_pass_through __read_mostly;
+
+/*
+ * Group multi-function PCI devices into a single device-group for the
+ * iommu_device_group interface.  This tells the iommu driver to pretend
+ * it cannot distinguish between functions of a device, exposing only one
+ * group for the device.  Useful for disallowing use of individual PCI
+ * functions from userspace drivers.
+ */
+int iommu_group_mf __read_mostly;
 #endif
 
 extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
@@ -235,6 +244,8 @@ static __init int iommu_setup(char *p)
 #ifndef CONFIG_XEN
 		if (!strncmp(p, "pt", 2))
 			iommu_pass_through = 1;
+		if (!strncmp(p, "group_mf", 8))
+			iommu_group_mf = 1;
 
 		gart_parse_options(p);
 #endif
--- a/arch/x86/kernel/process-xen.c
+++ b/arch/x86/kernel/process-xen.c
@@ -280,7 +280,7 @@ int kernel_thread(int (*fn)(void *), voi
 	regs.orig_ax = -1;
 	regs.ip = (unsigned long) kernel_thread_helper;
 	regs.cs = __KERNEL_CS | get_kernel_rpl();
-	regs.flags = X86_EFLAGS_IF | 0x2;
+	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
 
 	/* Ok, create the new process.. */
 	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
--- a/arch/x86/kernel/process_32-xen.c
+++ b/arch/x86/kernel/process_32-xen.c
@@ -102,7 +102,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched()) {
 
 			check_pgt_cache();
@@ -119,7 +120,8 @@ void cpu_idle(void)
 				xen_idle();
 			start_critical_timings();
 		}
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
@@ -215,6 +217,7 @@ int copy_thread(unsigned long clone_flag
 
 	task_user_gs(p) = get_user_gs(regs);
 
+	p->fpu_counter = 0;
 	p->thread.io_bitmap_ptr = NULL;
 	tsk = current;
 	err = -ENOMEM;
@@ -303,11 +306,11 @@ __switch_to(struct task_struct *prev_p, 
 {
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
-	int cpu = smp_processor_id(), cr0_ts = 0;
+	int cpu = smp_processor_id(), cr0_ts;
 #ifndef CONFIG_X86_NO_TSS
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 #endif
-	bool preload_fpu;
+	fpu_switch_t fpu;
 #if CONFIG_XEN_COMPAT > 0x030002
 	struct physdev_set_iopl iopl_op;
 	struct physdev_set_iobitmap iobmp_op;
@@ -320,26 +323,7 @@ __switch_to(struct task_struct *prev_p, 
 
 	/* XEN NOTE: FS/GS saved in switch_mm(), not here. */
 
-	/*
-	 * If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 */
-	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
-	/*
-	 * This is basically '__unlazy_fpu', except that we queue a
-	 * multicall to indicate FPU task switch, rather than
-	 * synchronously trapping to Xen.
-	 */
-	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
-		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
-		if (!preload_fpu) {
-			mcl->op = __HYPERVISOR_fpu_taskswitch;
-			mcl++->args[0] = 1;
-			cr0_ts = 1;
-		}
-	}
+	fpu = xen_switch_fpu_prepare(prev_p, next_p, cpu, &mcl);
 
 	/*
 	 * Reload sp0.
@@ -381,14 +365,6 @@ __switch_to(struct task_struct *prev_p, 
 		mcl++;
 	}
 
-	/* If we're going to preload the fpu context, make sure clts
-	   is run while we're batching the cpu state updates. */
-	if (preload_fpu) {
-		mcl->op = __HYPERVISOR_fpu_taskswitch;
-		mcl++->args[0] = 0;
-		cr0_ts = -1;
-	}
-
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
 		set_xen_guest_handle(iobmp_op.bitmap,
 				     (char *)next->io_bitmap_ptr);
@@ -409,8 +385,11 @@ __switch_to(struct task_struct *prev_p, 
 	BUG_ON(pdo > _pdo + ARRAY_SIZE(_pdo));
 #endif
 	BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl));
-	if (cr0_ts)
+	if (_mcl->op == __HYPERVISOR_fpu_taskswitch) {
 		percpu_write(xen_x86_cr0_upd, X86_CR0_TS);
+		cr0_ts = _mcl->args[0] ? 1 : -1;
+	} else
+		cr0_ts = 0;
 	if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
 		BUG();
 	if (cr0_ts) {
@@ -421,10 +400,6 @@ __switch_to(struct task_struct *prev_p, 
 		xen_clear_cr0_upd();
 	}
 
-	/* we're going to use this soon, after a few expensive things */
-	if (preload_fpu)
-		prefetch(next->fpu.state);
-
 	/*
 	 * Now maybe handle debug registers
 	 */
@@ -441,15 +416,14 @@ __switch_to(struct task_struct *prev_p, 
 	 */
 	arch_end_context_switch(next_p);
 
-	if (preload_fpu)
-		__math_state_restore();
-
 	/*
 	 * Restore %gs if needed (which is common)
 	 */
 	if (prev->gs | next->gs)
 		lazy_load_gs(next->gs);
 
+	switch_fpu_finish(next_p, fpu);
+
 	percpu_write(current_task, next_p);
 
 	return prev_p;
--- a/arch/x86/kernel/process_64-xen.c
+++ b/arch/x86/kernel/process_64-xen.c
@@ -126,7 +126,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched()) {
 
 			rmb();
@@ -143,8 +143,14 @@ void cpu_idle(void)
 			enter_idle();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
+
+			/* enter_idle() needs rcu for notifiers */
+			rcu_idle_enter();
+
 			if (cpuidle_idle_call())
 				xen_idle();
+
+			rcu_idle_exit();
 			start_critical_timings();
 
 			/* In many cases the interrupt that ended idle
@@ -153,7 +159,7 @@ void cpu_idle(void)
 			__exit_idle();
 		}
 
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
@@ -289,6 +295,7 @@ int copy_thread(unsigned long clone_flag
 
 	set_tsk_thread_flag(p, TIF_FORK);
 
+	p->fpu_counter = 0;
 	p->thread.io_bitmap_ptr = NULL;
 
 	savesegment(gs, p->thread.gsindex);
@@ -302,13 +309,12 @@ int copy_thread(unsigned long clone_flag
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
-		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
+						  IO_BITMAP_BYTES, GFP_KERNEL);
 		if (!p->thread.io_bitmap_ptr) {
 			p->thread.io_bitmap_max = 0;
 			return -ENOMEM;
 		}
-		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
-				IO_BITMAP_BYTES);
 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
 	}
 
@@ -392,7 +398,7 @@ __switch_to(struct task_struct *prev_p, 
 #ifndef CONFIG_X86_NO_TSS
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 #endif
-	bool preload_fpu;
+	fpu_switch_t fpu;
 #if CONFIG_XEN_COMPAT > 0x030002
 	struct physdev_set_iopl iopl_op;
 	struct physdev_set_iobitmap iobmp_op;
@@ -403,38 +409,7 @@ __switch_to(struct task_struct *prev_p, 
 #endif
 	multicall_entry_t _mcl[8], *mcl = _mcl;
 
-	/*
-	 * If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 */
-	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
-	/* we're going to use this soon, after a few expensive things */
-	if (preload_fpu)
-		prefetch(next->fpu.state);
-
-	/*
-	 * This is basically '__unlazy_fpu', except that we queue a
-	 * multicall to indicate FPU task switch, rather than
-	 * synchronously trapping to Xen.
-	 * The AMD workaround requires it to be after DS reload, or
-	 * after DS has been cleared, which we do in __prepare_arch_switch.
-	 */
-	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
-		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
-		if (!preload_fpu) {
-			mcl->op = __HYPERVISOR_fpu_taskswitch;
-			mcl++->args[0] = 1;
-		}
-	} else
-		prev_p->fpu_counter = 0;
-
-	/* Make sure cpu is ready for new context */
-	if (preload_fpu) {
-		mcl->op = __HYPERVISOR_fpu_taskswitch;
-		mcl++->args[0] = 0;
-	}
+	fpu = xen_switch_fpu_prepare(prev_p, next_p, cpu, &mcl);
 
 	/*
 	 * Reload sp0.
@@ -549,6 +524,8 @@ __switch_to(struct task_struct *prev_p, 
 	if (next->gs)
 		WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs));
 
+	switch_fpu_finish(next_p, fpu);
+
 	/*
 	 * Switch the PDA context.
 	 */
@@ -565,13 +542,6 @@ __switch_to(struct task_struct *prev_p, 
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p);
 
-	/*
-	 * Preload the FPU context, now that we've determined that the
-	 * task is likely to be using it.
-	 */
-	if (preload_fpu)
-		__math_state_restore();
-
 	return prev_p;
 }
 
--- a/arch/x86/kernel/setup-xen.c
+++ b/arch/x86/kernel/setup-xen.c
@@ -342,7 +342,8 @@ static void __init cleanup_highmap(void)
 static void __init reserve_brk(void)
 {
 	if (_brk_end > _brk_start)
-		memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
+		memblock_reserve(__pa(_brk_start),
+				 __pa(_brk_end) - __pa(_brk_start));
 
 	/* Mark brk area as locked down and no longer taking any
 	   new allocations */
@@ -368,13 +369,13 @@ static void __init relocate_initrd(void)
 	ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
 					 PAGE_SIZE);
 
-	if (ramdisk_here == MEMBLOCK_ERROR)
+	if (!ramdisk_here)
 		panic("Cannot find place for new RAMDISK of size %lld\n",
 			 ramdisk_size);
 
 	/* Note: this includes all the lowmem currently occupied by
 	   the initrd, we rely on that fact to keep the data intact. */
-	memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
+	memblock_reserve(ramdisk_here, area_size);
 	initrd_start = ramdisk_here + PAGE_OFFSET;
 	initrd_end   = initrd_start + ramdisk_size;
 	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -447,7 +448,7 @@ static void __init reserve_initrd(void)
 	initrd_start = 0;
 
 	if (ramdisk_size >= (end_of_lowmem>>1)) {
-		memblock_x86_free_range(ramdisk_image, ramdisk_end);
+		memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 		printk(KERN_ERR "initrd too large to handle, "
 		       "disabling initrd\n");
 		return;
@@ -473,7 +474,7 @@ static void __init reserve_initrd(void)
 
 	relocate_initrd();
 
-	memblock_x86_free_range(ramdisk_image, ramdisk_end);
+	memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 }
 #else
 static void __init reserve_initrd(void)
@@ -552,15 +553,13 @@ static void __init memblock_x86_reserve_
 #ifndef CONFIG_XEN
 	struct setup_data *data;
 	u64 pa_data;
-	char buf[32];
 
 	if (boot_params.hdr.version < 0x0209)
 		return;
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
 		data = early_memremap(pa_data, sizeof(*data));
-		sprintf(buf, "setup data %x", data->type);
-		memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		memblock_reserve(pa_data, sizeof(*data) + data->len);
 		pa_data = data->next;
 		early_iounmap(data, sizeof(*data));
 	}
@@ -617,7 +616,7 @@ static void __init reserve_crashkernel(v
 		crash_base = memblock_find_in_range(alignment,
 			       CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
 
-		if (crash_base == MEMBLOCK_ERROR) {
+		if (!crash_base) {
 			pr_info("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
@@ -631,7 +630,7 @@ static void __init reserve_crashkernel(v
 			return;
 		}
 	}
-	memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
+	memblock_reserve(crash_base, crash_size);
 
 	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
 			"for crashkernel (System RAM: %ldMB)\n",
@@ -694,7 +693,7 @@ static __init void reserve_ibft_region(v
 
 #ifndef CONFIG_XEN
 	if (size)
-		memblock_x86_reserve_range(addr, addr + size, "* ibft");
+		memblock_reserve(addr, size);
 #endif
 }
 
@@ -842,12 +841,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-#ifdef CONFIG_X86_32
-		     "EL32",
-#else
-		     "EL64",
-#endif
-	 4)) {
+		     EFI_LOADER_SIGNATURE, 4)) {
 		efi_enabled = 1;
 		efi_memblock_x86_reserve_range();
 	}
@@ -994,8 +988,8 @@ void __init setup_arch(char **cmdline_p)
 		max_pfn = e820_end_of_ram_pfn();
 #else
 	if (max_pfn > xen_start_info->nr_pages)
-		memblock_x86_reserve_range(PFN_PHYS(xen_start_info->nr_pages),
-					   PFN_PHYS(max_pfn), "BALLOON");
+		memblock_reserve(PFN_PHYS(xen_start_info->nr_pages),
+				 PFN_PHYS(max_pfn - xen_start_info->nr_pages));
 #endif
 
 #ifdef CONFIG_X86_32
--- a/arch/x86/kernel/smp-xen.c
+++ b/arch/x86/kernel/smp-xen.c
@@ -28,6 +28,7 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/ipi.h>
+#include <asm/nmi.h>
 #include <xen/evtchn.h>
 /*
  *	Some notes on x86 processor bugs affecting SMP operation:
@@ -132,6 +133,20 @@ void xen_send_call_func_ipi(const struct
 	xen_send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR);
 }
 
+static atomic_t stopping_cpu = ATOMIC_INIT(-1);
+static bool __read_mostly xen_smp_disable_nmi_ipi;
+
+static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
+{
+	/* We are registered on stopping cpu too, avoid spurious NMI */
+	if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
+		return NMI_HANDLED;
+
+	stop_this_cpu(NULL);
+
+	return NMI_HANDLED;
+}
+
 /*
  * this function calls the 'stop' function on all other CPUs in the system.
  */
@@ -158,7 +173,27 @@ void xen_stop_other_cpus(int wait)
 	 * currently)
 	 */
 	if (num_online_cpus() > 1) {
-		xen_send_IPI_allbutself(REBOOT_VECTOR);
+		unsigned int vector = REBOOT_VECTOR;
+
+		if (!xen_smp_disable_nmi_ipi) {
+			/* did someone beat us here? */
+			if (atomic_cmpxchg(&stopping_cpu, -1,
+					   safe_smp_processor_id()) != -1)
+				return;
+
+			if (register_nmi_handler(NMI_LOCAL,
+						 smp_stop_nmi_callback,
+						 NMI_FLAG_FIRST, "smp_stop"))
+				/* Note: we ignore failures here */
+				return;
+
+			/* sync above data before sending NMI */
+			wmb();
+
+			vector = NMI_VECTOR;
+		}
+
+		xen_send_IPI_allbutself(vector);
 
 		/*
 		 * Don't wait longer than a second if the caller
@@ -199,3 +234,11 @@ irqreturn_t smp_call_function_single_int
 
 	return IRQ_HANDLED;
 }
+
+static int __init nonmi_ipi_setup(char *str)
+{
+        xen_smp_disable_nmi_ipi = true;
+        return 1;
+}
+
+__setup("nonmi_ipi", nonmi_ipi_setup);
--- /dev/null
+++ b/arch/x86/entry/syscall_32-xen.c
@@ -0,0 +1,20 @@
+#include "syscall_32.c"
+
+#include <linux/thread_info.h>
+
+#ifdef TIF_CSTAR
+extern asmlinkage void cstar_set_tif(void);
+
+#define	ptregs_fork cstar_set_tif
+#define	ptregs_clone cstar_set_tif
+#define	ptregs_vfork cstar_set_tif
+
+const sys_call_ptr_t cstar_call_table[__NR_syscall_max+1] = {
+	/*
+	 * Smells like a compiler bug -- it doesn't work
+	 * when the & below is removed.
+	 */
+	[0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm/syscalls_32.h>
+};
+#endif /* TIF_CSTAR */
--- a/arch/x86/kernel/traps-xen.c
+++ b/arch/x86/kernel/traps-xen.c
@@ -310,19 +310,20 @@ dotraplinkage void __kprobes do_int3(str
 			== NOTIFY_STOP)
 		return;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
-#ifdef CONFIG_KPROBES
+
 	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 			== NOTIFY_STOP)
 		return;
-#else
-	if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
-			== NOTIFY_STOP)
-		return;
-#endif
 
+	/*
+	 * Let others (NMI) know that the debug stack is in use
+	 * as we may switch to the interrupt stack.
+	 */
+	debug_stack_usage_inc();
 	preempt_conditional_sti(regs);
 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
+	debug_stack_usage_dec();
 }
 
 #if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
@@ -415,6 +416,12 @@ dotraplinkage void __kprobes do_debug(st
 							SIGTRAP) == NOTIFY_STOP)
 		return;
 
+	/*
+	 * Let others (NMI) know that the debug stack is in use
+	 * as we may switch to the interrupt stack.
+	 */
+	debug_stack_usage_inc();
+
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
 
@@ -422,6 +429,7 @@ dotraplinkage void __kprobes do_debug(st
 		handle_vm86_trap((struct kernel_vm86_regs *) regs,
 				error_code, 1);
 		preempt_conditional_cli(regs);
+		debug_stack_usage_dec();
 		return;
 	}
 
@@ -441,6 +449,7 @@ dotraplinkage void __kprobes do_debug(st
 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 		send_sigtrap(tsk, regs, error_code, si_code);
 	preempt_conditional_cli(regs);
+	debug_stack_usage_dec();
 
 	return;
 }
@@ -568,44 +577,19 @@ asmlinkage void __attribute__((weak)) sm
 #endif /* CONFIG_XEN */
 
 /*
- * __math_state_restore assumes that cr0.TS is already clear and the
- * fpu state is all ready for use.  Used during context switch.
- */
-void __math_state_restore(void)
-{
-	struct thread_info *thread = current_thread_info();
-	struct task_struct *tsk = thread->task;
-
-	/*
-	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
-	 */
-	if (unlikely(restore_fpu_checking(tsk))) {
-		stts();
-		force_sig(SIGSEGV, tsk);
-		return;
-	}
-
-	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
-	tsk->fpu_counter++;
-}
-
-/*
  * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
  *
  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
  * Don't touch unless you *really* know how it works.
  *
- * Must be called with kernel preemption disabled (in this case,
- * local interrupts are disabled at the call-site in entry.S).
+ * Must be called with kernel preemption disabled (eg with local
+ * local interrupts as in the case of do_device_not_available).
  */
-asmlinkage void math_state_restore(void)
+static void _math_state_restore(void)
 {
-	struct thread_info *thread = current_thread_info();
-	struct task_struct *tsk = thread->task;
+	struct task_struct *tsk = current;
 
-	/* NB. 'clts' is done for us by Xen during virtual trap. */
-	percpu_and(xen_x86_cr0, ~X86_CR0_TS);
 	if (!tsk_used_math(tsk)) {
 		stts();
 		local_irq_enable();
@@ -623,8 +607,23 @@ asmlinkage void math_state_restore(void)
 		clts();
 	}
 
-	/* clts();			Allow maths ops (or we recurse) */
-	__math_state_restore();
+	xen_thread_fpu_begin(tsk, NULL);
+	/*
+	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+	 */
+	if (unlikely(restore_fpu_checking(tsk))) {
+		__thread_fpu_end(tsk);
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+	tsk->fpu_counter++;
+}
+
+void math_state_restore(void)
+{
+	clts();
+	_math_state_restore();
 }
 
 dotraplinkage void __kprobes
@@ -641,7 +640,9 @@ do_device_not_available(struct pt_regs *
 		return;
 	}
 #endif
-	math_state_restore(); /* interrupts still off */
+	/* NB. 'clts' is done for us by Xen during virtual trap. */
+	percpu_and(xen_x86_cr0, ~X86_CR0_TS);
+	_math_state_restore(); /* interrupts still off */
 #ifdef CONFIG_X86_32
 	conditional_sti(regs);
 #endif
--- a/arch/x86/mm/fault-xen.c
+++ b/arch/x86/mm/fault-xen.c
@@ -635,7 +635,7 @@ pgtable_bad(struct pt_regs *regs, unsign
 
 static noinline void
 no_context(struct pt_regs *regs, unsigned long error_code,
-	   unsigned long address)
+	   unsigned long address, int signal, int si_code)
 {
 	struct task_struct *tsk = current;
 	unsigned long *stackend;
@@ -643,8 +643,17 @@ no_context(struct pt_regs *regs, unsigne
 	int sig;
 
 	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs))
+	if (fixup_exception(regs)) {
+		if (current_thread_info()->sig_on_uaccess_error && signal) {
+			tsk->thread.trap_no = 14;
+			tsk->thread.error_code = error_code | PF_USER;
+			tsk->thread.cr2 = address;
+
+			/* XXX: hwpoison faults will set the wrong code. */
+			force_sig_info_fault(signal, si_code, address, tsk, 0);
+		}
 		return;
+	}
 
 	/*
 	 * 32-bit:
@@ -673,7 +682,7 @@ no_context(struct pt_regs *regs, unsigne
 
 	stackend = end_of_stack(tsk);
 	if (tsk != &init_task && *stackend != STACK_END_MAGIC)
-		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 
 	tsk->thread.cr2		= address;
 	tsk->thread.trap_no	= 14;
@@ -684,7 +693,7 @@ no_context(struct pt_regs *regs, unsigne
 		sig = 0;
 
 	/* Executive summary in case the body of the oops scrolled away */
-	printk(KERN_EMERG "CR2: %016lx\n", address);
+	printk(KERN_DEFAULT "CR2: %016lx\n", address);
 
 	oops_end(flags, regs, sig);
 }
@@ -764,7 +773,7 @@ __bad_area_nosemaphore(struct pt_regs *r
 	if (is_f00f_bug(regs, address))
 		return;
 
-	no_context(regs, error_code, address);
+	no_context(regs, error_code, address, SIGSEGV, si_code);
 }
 
 static noinline void
@@ -828,7 +837,7 @@ do_sigbus(struct pt_regs *regs, unsigned
 
 	/* Kernel mode? Handle exceptions or die: */
 	if (!(error_code & PF_USER)) {
-		no_context(regs, error_code, address);
+		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
 		return;
 	}
 
@@ -863,7 +872,7 @@ mm_fault_error(struct pt_regs *regs, uns
 		if (!(fault & VM_FAULT_RETRY))
 			up_read(&current->mm->mmap_sem);
 		if (!(error_code & PF_USER))
-			no_context(regs, error_code, address);
+			no_context(regs, error_code, address, 0, 0);
 		return 1;
 	}
 	if (!(fault & VM_FAULT_ERROR))
@@ -873,7 +882,8 @@ mm_fault_error(struct pt_regs *regs, uns
 		/* Kernel mode? Handle exceptions or die: */
 		if (!(error_code & PF_USER)) {
 			up_read(&current->mm->mmap_sem);
-			no_context(regs, error_code, address);
+			no_context(regs, error_code, address,
+				   SIGSEGV, SEGV_MAPERR);
 			return 1;
 		}
 
--- a/arch/x86/mm/init-xen.c
+++ b/arch/x86/mm/init-xen.c
@@ -16,6 +16,7 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <asm/proto.h>
+#include <asm/dma.h>		/* for MAX_DMA_PFN */
 
 unsigned long __meminitdata pgt_buf_start;
 unsigned long __meminitdata pgt_buf_end;
@@ -80,10 +81,10 @@ static void __init find_early_table_spac
 		pgt_buf_end = pgt_buf_start;
 	} else {
 		/*
-		 * [table_start, table_top) gets passed to
-		 * memblock_x86_reserve_range(), so we must not use table_end
-		 * here, despite continuing to allocate from there. table_end
-		 * possibly being below table_start is otoh not a problem.
+		 * [table_start, table_top) gets passed to memblock_reserve(),
+		 * so we must not use table_end here, despite continuing to
+		 * allocate from there. table_end possibly being below
+		 * table_start is otoh not a problem.
 		 */
 		pgt_buf_start = pgt_buf_top;
 	}
@@ -99,7 +100,8 @@ static void __init find_early_table_spac
 
 void __init xen_pagetable_reserve(u64 start, u64 end)
 {
-	memblock_x86_reserve_range(start, end, "PGTABLE");
+	if (end > start)
+		memblock_reserve(start, end - start);
 }
 
 struct map_range {
@@ -341,8 +343,8 @@ unsigned long __init_refok init_memory_m
 	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
 	 * so that they can be reused for other purposes.
 	 *
-	 * On native it just means calling memblock_x86_reserve_range, on Xen it
-	 * also means marking RW the pagetable pages that we allocated before
+	 * On native it just means calling memblock_reserve, on Xen it also
+	 * means marking RW the pagetable pages that we allocated before
 	 * but that haven't been used.
 	 *
 	 * In fact on xen we mark RO the whole range pgt_buf_start -
@@ -468,3 +470,24 @@ void free_initrd_mem(unsigned long start
 	free_init_pages("initrd memory", start, PAGE_ALIGN(end));
 }
 #endif
+
+void __init zone_sizes_init(void)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+#ifdef CONFIG_ZONE_DMA
+	max_zone_pfns[ZONE_DMA]		= MAX_DMA_PFN;
+#endif
+#ifdef CONFIG_ZONE_DMA32
+	max_zone_pfns[ZONE_DMA32]	= MAX_DMA32_PFN;
+#endif
+	max_zone_pfns[ZONE_NORMAL]	= max_low_pfn;
+#ifdef CONFIG_HIGHMEM
+	max_zone_pfns[ZONE_HIGHMEM]	= max_pfn;
+#endif
+
+	free_area_init_nodes(max_zone_pfns);
+}
+
--- a/arch/x86/mm/init_32-xen.c
+++ b/arch/x86/mm/init_32-xen.c
@@ -463,23 +463,17 @@ static void __init add_one_highpage_init
 void __init add_highpages_with_active_regions(int nid,
 			 unsigned long start_pfn, unsigned long end_pfn)
 {
-	struct range *range;
-	int nr_range;
-	int i;
-
-	nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
-
-	for (i = 0; i < nr_range; i++) {
-		struct page *page;
-		int node_pfn;
-
-		for (node_pfn = range[i].start; node_pfn < range[i].end;
-		     node_pfn++) {
-			if (!pfn_valid(node_pfn))
-				continue;
-			page = pfn_to_page(node_pfn);
-			add_one_highpage_init(page);
-		}
+	phys_addr_t start, end;
+	u64 i;
+
+	for_each_free_mem_range(i, nid, &start, &end, NULL) {
+		unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
+					    start_pfn, end_pfn);
+		unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
+					      start_pfn, end_pfn);
+		for ( ; pfn < e_pfn; pfn++)
+			if (pfn_valid(pfn))
+				add_one_highpage_init(pfn_to_page(pfn));
 	}
 }
 #else
@@ -652,18 +646,18 @@ void __init initmem_init(void)
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
-	memblock_x86_register_active_regions(0, 0, highend_pfn);
-	sparse_memory_present_with_active_regions(0);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	memblock_x86_register_active_regions(0, 0, max_low_pfn);
-	sparse_memory_present_with_active_regions(0);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
+
+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+	sparse_memory_present_with_active_regions(0);
+
 #ifdef CONFIG_FLATMEM
 	max_mapnr = num_physpages;
 #endif
@@ -676,22 +670,6 @@ void __init initmem_init(void)
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
-static void __init zone_sizes_init(void)
-{
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
-	max_zone_pfns[ZONE_DMA] =
-		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-#endif
-	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-#ifdef CONFIG_HIGHMEM
-	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-#endif
-
-	free_area_init_nodes(max_zone_pfns);
-}
-
 void __init setup_bootmem_allocator(void)
 {
 	printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
@@ -747,8 +725,7 @@ unsigned long __init extend_init_mapping
 	}
 
 	if (start_pfn > start)
-		memblock_x86_reserve_range(start << PAGE_SHIFT,
-					   start_pfn << PAGE_SHIFT, "INITMAP");
+		memblock_reserve(PFN_PHYS(start), PFN_PHYS(start_pfn - start));
 
 	return start_pfn;
 }
@@ -815,6 +792,17 @@ void __init mem_init(void)
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
 #endif
+	/*
+	 * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
+	 * be done before free_all_bootmem(). Memblock use free low memory for
+	 * temporary data (see find_range_array()) and for this purpose can use
+	 * pages that was already passed to the buddy allocator, hence marked as
+	 * not accessible in the page tables when compiled with
+	 * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
+	 * important here.
+	 */
+	set_highmem_pages_init();
+
 	/* this will put all low memory onto the freelists */
 	totalram_pages += free_all_bootmem();
 	/* XEN: init low-mem pages outside initial allocation. */
@@ -831,8 +819,6 @@ void __init mem_init(void)
 		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
 			reservedpages++;
 
-	set_highmem_pages_init();
-
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
--- a/arch/x86/mm/init_64-xen.c
+++ b/arch/x86/mm/init_64-xen.c
@@ -457,7 +457,7 @@ static inline int __meminit make_readonl
 	 * No need for writable mapping of kernel image. This also ensures that
 	 * page and descriptor tables embedded inside don't have writable
 	 * mappings. The range must be in sync with that passed to
-	 * memblock_x86_reserve_range() (as "TEXT DATA BSS"), since all other
+	 * memblock_reserve() (covering kernel code and data), since all other
 	 * regions can be allocated from under CONFIG_NO_BOOTMEM and thus must
 	 * be writable.
 	 */
@@ -863,21 +863,12 @@ kernel_physical_mapping_init(unsigned lo
 #ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
-	memblock_x86_register_active_regions(0, 0, max_pfn);
+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
 }
 #endif
 
 void __init paging_init(void)
 {
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
-
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
-	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
-#endif
-	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
-	max_zone_pfns[ZONE_NORMAL] = max_pfn;
-
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 
@@ -889,7 +880,7 @@ void __init paging_init(void)
 	 */
 	node_clear_state(0, N_NORMAL_MEMORY);
 
-	free_area_init_nodes(max_zone_pfns);
+	zone_sizes_init();
 
 	SetPagePinned(virt_to_page(init_mm.pgd));
 }
--- a/arch/x86/mm/pageattr-xen.c
+++ b/arch/x86/mm/pageattr-xen.c
@@ -1083,7 +1083,7 @@ out_err:
 }
 EXPORT_SYMBOL(set_memory_uc);
 
-int _set_memory_array(unsigned long *addr, int addrinarray,
+static int _set_memory_array(unsigned long *addr, int addrinarray,
 		unsigned long new_type)
 {
 	int i, j;
@@ -1419,12 +1419,6 @@ void kernel_map_pages(struct page *page,
 	}
 
 	/*
-	 * If page allocator is not up yet then do not call c_p_a():
-	 */
-	if (!debug_pagealloc_enabled)
-		return;
-
-	/*
 	 * The return value is ignored as the calls cannot fail.
 	 * Large pages for identity mappings are not used at boot time
 	 * and hence no memory allocations during large page split.
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -337,6 +337,8 @@ acpi_map_lookup_virt(void __iomem *virt,
 #if defined(CONFIG_IA64) || defined(CONFIG_ARM64)
 /* ioremap will take care of cache attributes */
 #define should_use_kmap(pfn)   0
+#elif defined(CONFIG_XEN)
+#define should_use_kmap(mfn)   pfn_valid(pfn = mfn_to_local_pfn(mfn))
 #else
 #define should_use_kmap(pfn)   page_is_ram(pfn)
 #endif
--- a/drivers/hwmon/coretemp-xen.c
+++ b/drivers/hwmon/coretemp-xen.c
@@ -341,7 +341,7 @@ static int create_name_attr(struct platf
 }
 
 static int create_core_attrs(struct temp_data *tdata, struct device *dev,
-				int attr_no)
+			     int attr_no)
 {
 	int err, i;
 	static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev,
--- a/drivers/pci/msi-xen.c
+++ b/drivers/pci/msi-xen.c
@@ -38,18 +38,21 @@ static int pci_seg_supported = 1;
 static LIST_HEAD(msi_dev_head);
 DEFINE_SPINLOCK(msi_dev_lock);
 
+struct msi_pirq_entry {
+	struct list_head list;
+	int pirq;
+	int entry_nr;
+	struct msi_dev_list *dev_entry;
+	struct kobject kobj;
+};
+
 struct msi_dev_list {
 	struct pci_dev *dev;
-	struct list_head list;
 	spinlock_t pirq_list_lock;
 	/* Store default pre-assigned irq */
 	unsigned int default_irq;
-};
-
-struct msi_pirq_entry {
-	struct list_head list;
-	int pirq;
-	int entry_nr;
+	domid_t owner;
+	struct msi_pirq_entry e;
 };
 
 /* Arch hooks */
@@ -89,6 +92,21 @@ static void msix_set_enable(struct pci_d
 	}
 }
 
+static int (*get_owner)(struct pci_dev *dev);
+
+static domid_t msi_get_dev_owner(struct pci_dev *dev)
+{
+	int owner;
+
+	if (is_initial_xendomain()
+	    && get_owner && (owner = get_owner(dev)) >= 0) {
+		dev_info(&dev->dev, "get owner: %u\n", owner);
+		return owner;
+	}
+
+	return DOMID_SELF;
+}
+
 static struct msi_dev_list *get_msi_dev_pirq_list(struct pci_dev *dev)
 {
 	struct msi_dev_list *msi_dev_list, *ret = NULL;
@@ -96,12 +114,14 @@ static struct msi_dev_list *get_msi_dev_
 
 	spin_lock_irqsave(&msi_dev_lock, flags);
 
-	list_for_each_entry(msi_dev_list, &msi_dev_head, list)
+	list_for_each_entry(msi_dev_list, &msi_dev_head, e.list)
 		if ( msi_dev_list->dev == dev )
 			ret = msi_dev_list;
 
 	if ( ret ) {
 		spin_unlock_irqrestore(&msi_dev_lock, flags);
+		if (ret->owner == DOMID_IO)
+			ret->owner = msi_get_dev_owner(dev);
 		return ret;
 	}
 
@@ -116,7 +136,10 @@ static struct msi_dev_list *get_msi_dev_
 
 	ret->dev = dev;
 	spin_lock_init(&ret->pirq_list_lock);
-	list_add_tail(&ret->list, &msi_dev_head);
+	ret->owner = msi_get_dev_owner(dev);
+	ret->e.entry_nr = -1;
+	ret->e.dev_entry = ret;
+	list_add_tail(&ret->e.list, &msi_dev_head);
 	spin_unlock_irqrestore(&msi_dev_lock, flags);
 	return ret;
 }
@@ -131,6 +154,8 @@ static int attach_pirq_entry(int pirq, i
 		return -ENOMEM;
 	entry->pirq = pirq;
 	entry->entry_nr = entry_nr;
+	entry->dev_entry = msi_dev_entry;
+	memset(&entry->kobj, 0, sizeof(entry->kobj));
 	spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
 	list_add_tail(&entry->list, &msi_dev_entry->dev->msi_list);
 	spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
@@ -154,11 +179,10 @@ static void detach_pirq_entry(int entry_
 	}
 }
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 /*
  * pciback will provide device's owner
  */
-static int (*get_owner)(struct pci_dev *dev);
-
 int register_msi_get_owner(int (*func)(struct pci_dev *dev))
 {
 	if (get_owner) {
@@ -178,26 +202,15 @@ int unregister_msi_get_owner(int (*func)
 	return 0;
 }
 EXPORT_SYMBOL(unregister_msi_get_owner);
+#endif
 
-static int msi_get_dev_owner(struct pci_dev *dev)
-{
-	int owner;
-
-	BUG_ON(!is_initial_xendomain());
-	if (get_owner && (owner = get_owner(dev)) >= 0) {
-		dev_info(&dev->dev, "get owner: %x \n", owner);
-		return owner;
-	}
-
-	return DOMID_SELF;
-}
-
-static int msi_unmap_pirq(struct pci_dev *dev, int pirq)
+static int msi_unmap_pirq(struct pci_dev *dev, int pirq, domid_t owner,
+			  struct kobject *kobj)
 {
 	struct physdev_unmap_pirq unmap;
 	int rc;
 
-	unmap.domid = msi_get_dev_owner(dev);
+	unmap.domid = owner;
 	/* See comments in msi_map_vector, input parameter pirq means
 	 * irq number only if the device belongs to dom0 itself.
 	 */
@@ -210,6 +223,16 @@ static int msi_unmap_pirq(struct pci_dev
 	if (rc < 0)
 		return rc;
 
+	/*
+	 * Its possible that we get into this path when populate_msi_sysfs()
+	 * fails, which means the entries were not registered with sysfs.
+	 * In that case don't unregister them.
+	 */
+	if (kobj->parent) {
+		kobject_del(kobj);
+		kobject_put(kobj);
+	}
+
 	if (unmap.domid == DOMID_SELF)
 		evtchn_map_pirq(pirq, 0);
 
@@ -237,13 +260,11 @@ static u64 find_table_base(struct pci_de
 /*
  * Protected by msi_lock
  */
-static int msi_map_vector(struct pci_dev *dev, int entry_nr, u64 table_base)
+static int msi_map_vector(struct pci_dev *dev, int entry_nr, u64 table_base,
+			  domid_t domid)
 {
 	struct physdev_map_pirq map_irq;
 	int rc = -EINVAL;
-	domid_t domid = DOMID_SELF;
-
-	domid = msi_get_dev_owner(dev);
 
 	map_irq.domid = domid;
 	map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
@@ -351,6 +372,142 @@ void pci_restore_msi_state(struct pci_de
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_pirq_entry(obj) container_of(obj, struct msi_pirq_entry, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_pirq_entry *, struct msi_attribute *,
+			char *buf);
+	ssize_t (*store)(struct msi_pirq_entry *, struct msi_attribute *,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_pirq_entry *entry,
+			     struct msi_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", entry->entry_nr >= 0 ? "msix" : "msi");
+}
+
+static ssize_t show_xen_irq(struct msi_pirq_entry *entry,
+			    struct msi_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", entry->dev_entry->owner == DOMID_SELF
+				    ? evtchn_get_xen_pirq(entry->pirq)
+				    : entry->pirq);
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_pirq_entry *entry = to_pirq_entry(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+static struct msi_attribute xen_irq_attribute =
+	__ATTR(xen_irq, S_IRUGO, show_xen_irq, NULL);
+
+static struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	&xen_irq_attribute.attr,
+	NULL
+};
+
+static struct attribute *msi_pirq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+static void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_dev_list *entry = to_pirq_entry(kobj)->dev_entry;
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static struct kobj_type msi_pirq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_pirq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_dev_list *dev_entry = get_msi_dev_pirq_list(pdev);
+	domid_t owner = dev_entry->owner;
+	struct msi_pirq_entry *pirq_entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	if (pdev->msi_enabled) {
+		kobj = &dev_entry->e.kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		if (owner == DOMID_SELF)
+			ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+						   "%u", pdev->irq);
+		else
+			ret = kobject_init_and_add(kobj, &msi_pirq_ktype, NULL,
+						   "xen-%u", pdev->irq);
+		if (ret)
+			pci_dev_put(pdev);
+		return ret;
+	}
+
+	list_for_each_entry(pirq_entry, &pdev->msi_list, list) {
+		kobj = &pirq_entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		if (owner == DOMID_SELF)
+			ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+						   "%u", pirq_entry->pirq);
+		else
+			ret = kobject_init_and_add(kobj, &msi_pirq_ktype, NULL,
+						   "xen-%u", pirq_entry->pirq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	pci_dev_put(pdev);
+	list_for_each_entry(pirq_entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&pirq_entry->kobj);
+		kobject_put(&pirq_entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -364,12 +521,13 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state)
  */
 static int msi_capability_init(struct pci_dev *dev, int nvec)
 {
+	struct msi_dev_list *dev_entry = get_msi_dev_pirq_list(dev);
 	int pos, pirq;
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
 	msi_set_enable(dev, pos, 0);	/* Disable MSI during set up */
 
-	pirq = msi_map_vector(dev, 0, 0);
+	pirq = msi_map_vector(dev, 0, 0, dev_entry->owner);
 	if (pirq < 0)
 		return -EBUSY;
 
@@ -378,7 +536,8 @@ static int msi_capability_init(struct pc
 	msi_set_enable(dev, pos, 1);
 	dev->msi_enabled = 1;
 
-	dev->irq = pirq;
+	dev->irq = dev_entry->e.pirq = pirq;
+	populate_msi_sysfs(dev);
 	return 0;
 }
 
@@ -437,7 +596,8 @@ static int msix_capability_init(struct p
 		}
 		if (mapped)
 			continue;
-		pirq = msi_map_vector(dev, entries[i].entry, table_base);
+		pirq = msi_map_vector(dev, entries[i].entry, table_base,
+				      msi_dev_entry->owner);
 		if (pirq < 0)
 			break;
 		attach_pirq_entry(pirq, entries[i].entry, msi_dev_entry);
@@ -447,7 +607,12 @@ static int msix_capability_init(struct p
 	if (i != nvec) {
 		int avail = i - 1;
 		for (j = --i; j >= 0; j--) {
-			msi_unmap_pirq(dev, entries[j].vector);
+			list_for_each_entry(pirq_entry, &dev->msi_list, list)
+				if (pirq_entry->entry_nr == entries[i].entry)
+					break;
+			msi_unmap_pirq(dev, entries[j].vector,
+				       msi_dev_entry->owner,
+				       &pirq_entry->kobj);
 			detach_pirq_entry(entries[j].entry, msi_dev_entry);
 			entries[j].vector = 0;
 		}
@@ -462,6 +627,7 @@ static int msix_capability_init(struct p
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
+	populate_msi_sysfs(dev);
 
 	control &= ~PCI_MSIX_FLAGS_MASKALL;
 	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
@@ -560,7 +726,7 @@ int pci_enable_msi_block(struct pci_dev 
 		dev->irq = evtchn_map_pirq(-1, dev->irq);
 		dev->msi_enabled = 1;
 		msi_dev_entry->default_irq = temp;
-
+		populate_msi_sysfs(dev);
 		return ret;
 #else
 		return -EOPNOTSUPP;
@@ -605,7 +771,10 @@ void pci_msi_shutdown(struct pci_dev *de
 	pirq = dev->irq;
 	/* Restore dev->irq to its default pin-assertion irq */
 	dev->irq = msi_dev_entry->default_irq;
-	msi_unmap_pirq(dev, pirq);
+	msi_unmap_pirq(dev, pirq, msi_dev_entry->owner,
+		       &msi_dev_entry->e.kobj);
+	msi_dev_entry->owner = DOMID_IO;
+	memset(&msi_dev_entry->e.kobj, 0, sizeof(msi_dev_entry->e.kobj));
 
 	/* Disable MSI mode */
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
@@ -617,6 +786,8 @@ void pci_msi_shutdown(struct pci_dev *de
 void pci_disable_msi(struct pci_dev *dev)
 {
 	pci_msi_shutdown(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -694,6 +865,7 @@ int pci_enable_msix(struct pci_dev *dev,
 			attach_pirq_entry(irq, entries[i].entry, msi_dev_entry);
 			entries[i].vector = irq;
 		}
+		populate_msi_sysfs(dev);
 		return 0;
 #else
 		return -EOPNOTSUPP;
@@ -760,6 +932,8 @@ void pci_msix_shutdown(struct pci_dev *d
 void pci_disable_msix(struct pci_dev *dev)
 {
 	pci_msix_shutdown(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
@@ -774,25 +948,35 @@ EXPORT_SYMBOL(pci_disable_msix);
  **/
 void msi_remove_pci_irq_vectors(struct pci_dev *dev)
 {
-	unsigned long flags;
 	struct msi_dev_list *msi_dev_entry;
-	struct msi_pirq_entry *pirq_entry, *tmp;
 
 	if (!pci_msi_enable || !dev)
 		return;
 
 	msi_dev_entry = get_msi_dev_pirq_list(dev);
 
-	spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
-	list_for_each_entry_safe(pirq_entry, tmp, &dev->msi_list, list) {
+	for (;;) {
+		struct msi_pirq_entry *pirq_entry;
+		unsigned long flags;
+
+		spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
+		pirq_entry = list_first_entry_or_null(&dev->msi_list,
+						      struct msi_pirq_entry,
+						      list);
+		if (pirq_entry)
+			list_del(&pirq_entry->list);
+		spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
+		if (!pirq_entry)
+			break;
 		if (is_initial_xendomain())
-			msi_unmap_pirq(dev, pirq_entry->pirq);
+			msi_unmap_pirq(dev, pirq_entry->pirq,
+				       msi_dev_entry->owner,
+				       &pirq_entry->kobj);
 		else
 			evtchn_map_pirq(pirq_entry->pirq, 0);
-		list_del(&pirq_entry->list);
 		kfree(pirq_entry);
 	}
-	spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
+	msi_dev_entry->owner = DOMID_IO;
 	dev->irq = msi_dev_entry->default_irq;
 }
 
@@ -815,5 +999,23 @@ EXPORT_SYMBOL(pci_msi_enabled);
 
 void pci_msi_init_pci_dev(struct pci_dev *dev)
 {
+	int pos;
 	INIT_LIST_HEAD(&dev->msi_list);
+
+	/* Disable the msi hardware to avoid screaming interrupts
+	 * during boot.  This is the power on reset default so
+	 * usually this should be a noop.
+	 * But on a Xen host don't do this for
+	 * - IOMMUs which the hypervisor is in control of (and hence has
+	 *   already enabled on purpose),
+	 * - unprivileged domains.
+	 */
+	if (!is_initial_xendomain()
+	    || ((dev->class >> 8) == PCI_CLASS_SYSTEM_IOMMU
+	        && dev->vendor == PCI_VENDOR_ID_AMD))
+		return;
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	if (pos)
+		msi_set_enable(dev, pos, 0);
+	msix_set_enable(dev, 0);
 }
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -22,10 +22,6 @@ config XEN_UNPRIVILEGED_GUEST
 	select PM
 	select SUSPEND
 
-config XEN_PRIVCMD
-	def_bool y
-	depends on PROC_FS
-
 config XEN_XENBUS_DEV
 	def_bool y
 	depends on PROC_FS
@@ -632,7 +628,8 @@ config XEN_SCSI_BACKEND
 
 config XEN_PRIVCMD
 	tristate
-	depends on XEN
+	depends on PARAVIRT_XEN || (XEN && PROC_FS)
+	default y if XEN
 	default m
 
 config XEN_STUB
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -3,8 +3,10 @@ xen-biomerge-$(CONFIG_PARAVIRT_XEN) := b
 xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
 xen-balloon_$(CONFIG_PARAVIRT_XEN) := xen-balloon.o
 xen-evtchn-name-$(CONFIG_PARAVIRT_XEN) := xen-evtchn
+xen-privcmd_$(CONFIG_PARAVIRT_XEN) := xen-privcmd.o
 
 xen-balloon_$(CONFIG_XEN)	:= balloon/
+xen-privcmd_$(CONFIG_XEN)	:= privcmd/
 obj-$(CONFIG_XEN)		+= core/
 obj-$(CONFIG_XEN)		+= console/
 obj-y				+= xenbus/
@@ -38,10 +40,12 @@ obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
+obj-$(CONFIG_XEN_PRIVCMD)		+= $(xen-privcmd_y)
 
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
+xen-privcmd-y				:= privcmd.o
 
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
@@ -59,7 +63,6 @@ obj-$(CONFIG_XEN_SCSI_BACKEND)		+= scsib
 obj-$(CONFIG_XEN_SCSI_FRONTEND)		+= scsifront/
 obj-$(CONFIG_XEN_USB_BACKEND)		+= usbback/
 obj-$(CONFIG_XEN_USB_FRONTEND)		+= usbfront/
-obj-$(CONFIG_XEN_PRIVCMD)	+= privcmd/
 obj-$(CONFIG_XEN_GRANT_DEV)	+= gntdev/
 obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL)		+= sfc_netutil/
 obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND)	+= sfc_netfront/
--- a/drivers/xen/balloon/balloon.c
+++ b/drivers/xen/balloon/balloon.c
@@ -73,11 +73,6 @@ static DEFINE_MUTEX(balloon_mutex);
  */
 DEFINE_SPINLOCK(balloon_lock);
 
-#ifndef MODULE
-#include <linux/pagevec.h>
-static struct pagevec free_pagevec;
-#endif
-
 struct balloon_stats balloon_stats;
 
 /* We increase/decrease in batches which fit in a page */
@@ -198,27 +193,14 @@ static struct page *balloon_next_page(st
 static inline void balloon_free_page(struct page *page)
 {
 #ifndef MODULE
-	if (put_page_testzero(page) && !pagevec_add(&free_pagevec, page)) {
-		__pagevec_free(&free_pagevec);
-		pagevec_reinit(&free_pagevec);
-	}
+	if (put_page_testzero(page))
+		free_hot_cold_page(page, 1);
 #else
-	/* pagevec interface is not being exported. */
+	/* free_hot_cold_page() is not being exported. */
 	__free_page(page);
 #endif
 }
 
-static inline void balloon_free_and_unlock(unsigned long flags)
-{
-#ifndef MODULE
-	if (pagevec_count(&free_pagevec)) {
-		__pagevec_free(&free_pagevec);
-		pagevec_reinit(&free_pagevec);
-	}
-#endif
-	balloon_unlock(flags);
-}
-
 static void balloon_alarm(unsigned long unused)
 {
 	schedule_work(&balloon_worker);
@@ -330,7 +312,7 @@ static int increase_reservation(unsigned
 	totalram_pages = bs.current_pages - totalram_bias;
 
  out:
-	balloon_free_and_unlock(flags);
+	balloon_unlock(flags);
 
 #ifndef MODULE
 	setup_per_zone_wmarks();
@@ -567,7 +549,6 @@ static int __init balloon_init(void)
 	IPRINTK("Initialising balloon driver.\n");
 
 #ifdef CONFIG_XEN
-	pagevec_init(&free_pagevec, true);
 	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
 	totalram_pages   = bs.current_pages;
 #else 
@@ -720,7 +701,7 @@ struct page **alloc_empty_pages_and_page
 
 		if (ret != 0) {
 			balloon_free_page(page);
-			balloon_free_and_unlock(flags);
+			balloon_unlock(flags);
 			goto err;
 		}
 
--- a/drivers/xen/balloon/sysfs.c
+++ b/drivers/xen/balloon/sysfs.c
@@ -29,12 +29,11 @@
  */
 
 #include <linux/capability.h>
+#include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/stat.h>
 #include <linux/string.h>
-#include <linux/sysdev.h>
-#include <linux/module.h>
 #include <xen/balloon.h>
 #include "common.h"
 
@@ -45,27 +44,27 @@
 #define BALLOON_CLASS_NAME "xen_memory"
 
 #define BALLOON_SHOW(name, format, args...)			\
-	static ssize_t show_##name(struct sys_device *dev,	\
-				   struct sysdev_attribute *attr, \
+	static ssize_t show_##name(struct device *dev,		\
+				   struct device_attribute *attr, \
 				   char *buf)			\
 	{							\
 		return sprintf(buf, format, ##args);		\
 	}							\
-	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
 
 BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
 BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
 BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
 BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
 
-static ssize_t show_target_kb(struct sys_device *dev,
-			      struct sysdev_attribute *attr, char *buf)
+static ssize_t show_target_kb(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
 }
 
-static ssize_t store_target_kb(struct sys_device *dev,
-			       struct sysdev_attribute *attr,
+static ssize_t store_target_kb(struct device *dev,
+			       struct device_attribute *attr,
 			       const char *buf, size_t count)
 {
 	char *endchar;
@@ -83,19 +82,19 @@ static ssize_t store_target_kb(struct sy
 	return count;
 }
 
-static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(target_kb, S_IRUGO | S_IWUSR,
 		   show_target_kb, store_target_kb);
 
-static ssize_t show_target(struct sys_device *dev,
-			   struct sysdev_attribute *attr, char *buf)
+static ssize_t show_target(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%llu\n",
 		       (unsigned long long)balloon_stats.target_pages
 		       << PAGE_SHIFT);
 }
 
-static ssize_t store_target(struct sys_device *dev,
-			    struct sysdev_attribute *attr,
+static ssize_t store_target(struct device *dev,
+			    struct device_attribute *attr,
 			    const char *buf,
 			    size_t count)
 {
@@ -114,19 +113,19 @@ static ssize_t store_target(struct sys_d
 	return count;
 }
 
-static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(target, S_IRUGO | S_IWUSR,
 		   show_target, store_target);
 
-static struct sysdev_attribute *balloon_attrs[] = {
-	&attr_target_kb,
-	&attr_target,
+static struct device_attribute *balloon_attrs[] = {
+	&dev_attr_target_kb,
+	&dev_attr_target,
 };
 
 static struct attribute *balloon_info_attrs[] = {
-	&attr_current_kb.attr,
-	&attr_low_kb.attr,
-	&attr_high_kb.attr,
-	&attr_driver_kb.attr,
+	&dev_attr_current_kb.attr,
+	&dev_attr_low_kb.attr,
+	&dev_attr_high_kb.attr,
+	&dev_attr_driver_kb.attr,
 	NULL
 };
 
@@ -135,36 +134,37 @@ static const struct attribute_group ball
 	.attrs = balloon_info_attrs,
 };
 
-static struct sysdev_class balloon_sysdev_class = {
+static struct bus_type balloon_subsys = {
 	.name = BALLOON_CLASS_NAME,
+	.dev_name = BALLOON_CLASS_NAME,
 };
 
-static struct sys_device balloon_sysdev;
+static struct device balloon_dev;
 
-static int __init register_balloon(struct sys_device *sysdev)
+static int __init register_balloon(struct device *dev)
 {
 	int i, error;
 
-	error = sysdev_class_register(&balloon_sysdev_class);
+	error = subsys_system_register(&balloon_subsys, NULL);
 	if (error)
 		return error;
 
-	sysdev->id = 0;
-	sysdev->cls = &balloon_sysdev_class;
+	dev->id = 0;
+	dev->bus = &balloon_subsys;
 
-	error = sysdev_register(sysdev);
+	error = device_register(dev);
 	if (error) {
-		sysdev_class_unregister(&balloon_sysdev_class);
+		bus_unregister(&balloon_subsys);
 		return error;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
-		error = sysdev_create_file(sysdev, balloon_attrs[i]);
+		error = device_create_file(dev, balloon_attrs[i]);
 		if (error)
 			goto fail;
 	}
 
-	error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+	error = sysfs_create_group(&dev->kobj, &balloon_info_group);
 	if (error)
 		goto fail;
 	
@@ -172,33 +172,33 @@ static int __init register_balloon(struc
 
  fail:
 	while (--i >= 0)
-		sysdev_remove_file(sysdev, balloon_attrs[i]);
-	sysdev_unregister(sysdev);
-	sysdev_class_unregister(&balloon_sysdev_class);
+		device_remove_file(dev, balloon_attrs[i]);
+	device_unregister(dev);
+	bus_unregister(&balloon_subsys);
 	return error;
 }
 
-static __exit void unregister_balloon(struct sys_device *sysdev)
+static __exit void unregister_balloon(struct device *dev)
 {
 	int i;
 
-	sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
+	sysfs_remove_group(&dev->kobj, &balloon_info_group);
 	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
-		sysdev_remove_file(sysdev, balloon_attrs[i]);
-	sysdev_unregister(sysdev);
-	sysdev_class_unregister(&balloon_sysdev_class);
+		device_remove_file(dev, balloon_attrs[i]);
+	device_unregister(dev);
+	bus_unregister(&balloon_subsys);
 }
 
 int __init balloon_sysfs_init(void)
 {
-	int rc = register_balloon(&balloon_sysdev);
+	int rc = register_balloon(&balloon_dev);
 
-	register_xen_selfballooning(&balloon_sysdev);
+	register_xen_selfballooning(&balloon_dev);
 
 	return rc;
 }
 
 void __exit balloon_sysfs_exit(void)
 {
-	unregister_balloon(&balloon_sysdev);
+	unregister_balloon(&balloon_dev);
 }
--- a/drivers/xen/blkback/blkback.c
+++ b/drivers/xen/blkback/blkback.c
@@ -62,7 +62,7 @@ module_param_named(reqs, blkif_reqs, uin
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
 
 /* Run-time switchable: /sys/module/blkback/parameters/ */
-static int log_stats;
+static bool log_stats;
 static unsigned int debug_lvl;
 module_param(log_stats, bool, 0644);
 module_param(debug_lvl, uint, 0644);
@@ -330,8 +330,11 @@ irqreturn_t blkif_be_int(int irq, void *
 
 static void dispatch_discard(blkif_t *blkif, struct blkif_request_discard *req)
 {
+	unsigned long secure = (blkif->vbd.discard_secure &&
+				(req->flag & BLKIF_DISCARD_SECURE)) ?
+			       BLKDEV_DISCARD_SECURE : 0;
 	struct phys_req preq;
-	int err = -EOPNOTSUPP, status;
+	int status;
 
 	blkif->st_ds_req++;
 
@@ -348,12 +351,8 @@ static void dispatch_discard(blkif_t *bl
 		return;
 	}
 
-	if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
-	    blkif->blk_backend_type == BLKIF_BACKEND_FILE)
-		err = blkdev_issue_discard(preq.bdev, preq.sector_number,
-					   preq.nr_sects, GFP_KERNEL, 0);
-
-	switch (err) {
+	switch (blkdev_issue_discard(preq.bdev, preq.sector_number,
+				     preq.nr_sects, GFP_KERNEL, secure)) {
 	case 0:
 		status = BLKIF_RSP_OKAY;
 		break;
--- a/drivers/xen/blkback/common.h
+++ b/drivers/xen/blkback/common.h
@@ -43,16 +43,12 @@
 	pr_debug("(file=%s, line=%d) " _f,	\
 		 __FILE__ , __LINE__ , ## _a )
 
-enum blkif_backend_type {
-	BLKIF_BACKEND_PHY  = 1,
-	BLKIF_BACKEND_FILE = 2,
-};
-
 struct vbd {
 	blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
 	fmode_t        mode;        /* FMODE_xxx */
 	unsigned char  type;        /* VDISK_xxx */
 	bool           flush_support;
+	bool           discard_secure;
 	u32            pdevice;     /* phys device that this vbd maps to */
 	struct block_device *bdev;
 	sector_t       size;        /* Cached size parameter */
@@ -68,7 +64,6 @@ typedef struct blkif_st {
 	unsigned int      irq;
 	/* Comms information. */
 	enum blkif_protocol blk_protocol;
-	enum blkif_backend_type blk_backend_type;
 	blkif_back_rings_t blk_rings;
 	struct vm_struct *blk_ring_area;
 	/* The VBD attached to this interface. */
--- a/drivers/xen/blkback/vbd.c
+++ b/drivers/xen/blkback/vbd.c
@@ -92,6 +92,9 @@ int vbd_create(blkif_t *blkif, blkif_vde
 	if (q && q->flush_flags)
 		vbd->flush_support = true;
 
+	if (q && blk_queue_secdiscard(q))
+		vbd->discard_secure = true;
+
 	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
 		handle, blkif->domid);
 	return 0;
--- a/drivers/xen/blkback/xenbus.c
+++ b/drivers/xen/blkback/xenbus.c
@@ -222,43 +222,34 @@ static void blkback_discard(struct xenbu
 			    struct backend_info *be)
 {
 	struct xenbus_device *dev = be->dev;
-	blkif_t *blkif = be->blkif;
-	char *type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+	struct vbd *vbd = &be->blkif->vbd;
+	struct request_queue *q = bdev_get_queue(vbd->bdev);
 	int err, state = 0;
 
-	if (!IS_ERR(type)) {
-		if (strncmp(type, "file", 4) == 0) {
+	if (blk_queue_discard(q)) {
+		err = xenbus_printf(xbt, dev->nodename, "discard-granularity",
+				    "%u", q->limits.discard_granularity);
+		if (!err)
 			state = 1;
-			blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+		else
+			xenbus_dev_error(dev, err,
+					 "writing discard-granularity");
+		err = xenbus_printf(xbt, dev->nodename, "discard-alignment",
+				    "%u", q->limits.discard_alignment);
+		if (err) {
+			xenbus_dev_error(dev, err,
+					 "writing discard-alignment");
+			state = 0;
 		}
-		if (strncmp(type, "phy", 3) == 0) {
-			struct request_queue *q;
+	}
 
-			q = bdev_get_queue(blkif->vbd.bdev);
-			if (blk_queue_discard(q)) {
-				blkif->blk_backend_type = BLKIF_BACKEND_PHY;
-				err = xenbus_printf(xbt, dev->nodename,
-					"discard-granularity", "%u",
-					q->limits.discard_granularity);
-				if (!err)
-					state = 1;
-				else
-					xenbus_dev_error(dev, err,
-						"writing discard-granularity");
-				err = xenbus_printf(xbt, dev->nodename,
-					"discard-alignment", "%u",
-					q->limits.discard_alignment);
-				if (err) {
-					xenbus_dev_error(dev, err,
-						"writing discard-alignment");
-					state = 0;
-				}
-			}
-		}
-		kfree(type);
-	} else
-		xenbus_dev_error(dev, PTR_ERR(type),
-				 "reading type for discard");
+	/* Optional. */
+	if (state) {
+		err = xenbus_printf(xbt, dev->nodename, "discard-secure",
+				    "%d", vbd->discard_secure);
+		if (err)
+			xenbus_dev_error(dev, err, "writing discard-secure");
+	}
 
 	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
 			    "%d", state);
--- a/drivers/xen/blkfront/blkfront.c
+++ b/drivers/xen/blkfront/blkfront.c
@@ -332,6 +332,7 @@ static void blkfront_setup_discard(struc
 {
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
+	int discard_secure;
 
 	info->feature_discard = 1;
 	if (!xenbus_gather(XBT_NIL, info->xbdev->otherend,
@@ -341,6 +342,10 @@ static void blkfront_setup_discard(struc
 		info->discard_granularity = discard_granularity;
 		info->discard_alignment = discard_alignment;
 	}
+	if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			 "discard-secure", "%d", &discard_secure) != 1)
+		discard_secure = 0;
+	info->feature_secdiscard = !!discard_secure;
 }
 
 /*
@@ -764,10 +769,13 @@ int blkif_ioctl(struct block_device *bd,
 				return scsi_cmd_ioctl(filep, info->rq,
 						      info->gd, command,
 						      (void __user *)argument);
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
 				return scsi_cmd_ioctl(info->rq, info->gd,
 						      mode, command,
 						      (void __user *)argument);
+#else
+				return scsi_cmd_blk_ioctl(bd, mode, command,
+							  (void __user *)argument);
 #endif
 			}
 		}
@@ -845,13 +853,15 @@ static int blkif_queue_request(struct re
 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 #endif
 
-	if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+	if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
 		struct blkif_request_discard *discard = (void *)ring_req;
 
 		/* id, sector_number and handle are set above. */
 		discard->operation = BLKIF_OP_DISCARD;
 		discard->flag = 0;
 		discard->nr_sectors = blk_rq_sectors(req);
+		if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
+			discard->flag = BLKIF_DISCARD_SECURE;
 	} else {
 		ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
 		BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -1036,7 +1046,9 @@ static irqreturn_t blkif_int(int irq, vo
 					info->gd->disk_name);
 				ret = -EOPNOTSUPP;
 				info->feature_discard = 0;
+				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+				queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
 			}
 			__blk_end_request_all(req, ret);
 			break;
@@ -1093,6 +1105,9 @@ static void blkif_free(struct blkfront_i
 static void blkif_completion(struct blk_shadow *s)
 {
 	int i;
+
+	if (s->req.operation == BLKIF_OP_DISCARD)
+		return;
 	for (i = 0; i < s->req.nr_segments; i++)
 		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
 }
--- a/drivers/xen/blkfront/block.h
+++ b/drivers/xen/blkfront/block.h
@@ -111,7 +111,8 @@ struct blkfront_info
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
 	unsigned int flush_op;
 #endif
-	unsigned int feature_discard;
+	bool feature_discard;
+	bool feature_secdiscard;
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
 	int is_ready;
--- a/drivers/xen/blkfront/vbd.c
+++ b/drivers/xen/blkfront/vbd.c
@@ -302,7 +302,7 @@ xlbd_reserve_minors(struct xlbd_major_in
 	if (end > ms->nr) {
 		unsigned long *bitmap, *old;
 
-		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+		bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
 				 GFP_KERNEL);
 		if (bitmap == NULL)
 			return -ENOMEM;
@@ -371,6 +371,8 @@ xlvbd_init_blk_queue(struct gendisk *gd,
 		blk_queue_max_discard_sectors(rq, get_capacity(gd));
 		rq->limits.discard_granularity = info->discard_granularity;
 		rq->limits.discard_alignment = info->discard_alignment;
+		if (info->feature_secdiscard)
+			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
 	}
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
--- a/drivers/xen/blktap/blktap.c
+++ b/drivers/xen/blktap/blktap.c
@@ -131,7 +131,7 @@ static struct tap_blkif *tapfds[MAX_TAP_
 static int blktap_next_minor;
 
 /* Run-time switchable: /sys/module/blktap/parameters/ */
-static int log_stats;
+static bool log_stats;
 static unsigned int debug_lvl;
 module_param(log_stats, bool, 0644);
 module_param(debug_lvl, uint, 0644);
@@ -277,7 +277,7 @@ static inline unsigned int OFFSET_TO_SEG
     } while(0)
 
 
-static char *blktap_devnode(struct device *dev, mode_t *mode)
+static char *blktap_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "xen/blktap%u", MINOR(dev->devt));
 }
--- a/drivers/xen/blktap2-new/device.c
+++ b/drivers/xen/blktap2-new/device.c
@@ -425,7 +425,7 @@ blktap_device_destroy_sync(struct blktap
 		   !blktap_device_try_destroy(tap));
 }
 
-static char *blktap_devnode(struct gendisk *gd, mode_t *mode)
+static char *blktap_devnode(struct gendisk *gd, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
 			 gd->first_minor);
--- a/drivers/xen/blktap2-new/sysfs.c
+++ b/drivers/xen/blktap2-new/sysfs.c
@@ -262,7 +262,7 @@ blktap_sysfs_show_devices(struct class *
 }
 static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL);
 
-static char *blktap_devnode(struct device *dev, mode_t *mode)
+static char *blktap_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "blktap%u",
 			 MINOR(dev->devt));
--- a/drivers/xen/blktap2/device.c
+++ b/drivers/xen/blktap2/device.c
@@ -1068,7 +1068,7 @@ blktap_device_destroy(struct blktap *tap
 	return 0;
 }
 
-static char *blktap_devnode(struct gendisk *gd, mode_t *mode)
+static char *blktap_devnode(struct gendisk *gd, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
 			 gd->first_minor);
--- a/drivers/xen/blktap2/sysfs.c
+++ b/drivers/xen/blktap2/sysfs.c
@@ -439,7 +439,7 @@ blktap_sysfs_free(void)
 	class_destroy(class);
 }
 
-static char *blktap_devnode(struct device *dev, mode_t *mode)
+static char *blktap_devnode(struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "blktap%u",
 			 MINOR(dev->devt));
--- a/drivers/xen/core/cpu_hotplug.c
+++ b/drivers/xen/core/cpu_hotplug.c
@@ -25,7 +25,7 @@ static int local_cpu_hotplug_request(voi
 	return (current->mm != NULL);
 }
 
-static void vcpu_hotplug(unsigned int cpu, struct sys_device *dev)
+static void vcpu_hotplug(unsigned int cpu, struct device *dev)
 {
 	int err;
 	char dir[16], state[16];
@@ -63,7 +63,7 @@ static void handle_vcpu_hotplug_event(
 
 	if ((cpustr = strstr(node, "cpu/")) != NULL) {
 		sscanf(cpustr, "cpu/%u", &cpu);
-		vcpu_hotplug(cpu, get_cpu_sysdev(cpu));
+		vcpu_hotplug(cpu, get_cpu_device(cpu));
 	}
 }
 
@@ -96,7 +96,7 @@ static int setup_cpu_watcher(struct noti
 
 	if (!is_initial_xendomain()) {
 		for_each_possible_cpu(i)
-			vcpu_hotplug(i, get_cpu_sysdev(i));
+			vcpu_hotplug(i, get_cpu_device(i));
 		pr_info("Brought up %ld CPUs\n", (long)num_online_cpus());
 	}
 
--- a/drivers/xen/core/evtchn.c
+++ b/drivers/xen/core/evtchn.c
@@ -329,8 +329,8 @@ asmlinkage void __irq_entry evtchn_do_up
 
 	old_regs = set_irq_regs(regs);
 	xen_spin_irq_enter();
-	exit_idle();
 	irq_enter();
+	exit_idle();
 
 	do {
 		vcpu_info->evtchn_upcall_pending = 0;
--- a/drivers/xen/core/smpboot.c
+++ b/drivers/xen/core/smpboot.c
@@ -443,6 +443,7 @@ void __ref play_dead(void)
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
+	nmi_selftest();
 }
 
 #ifndef CONFIG_X86_LOCAL_APIC
--- a/drivers/xen/core/spinlock.c
+++ b/drivers/xen/core/spinlock.c
@@ -37,7 +37,7 @@ struct rm_seq {
 };
 static DEFINE_PER_CPU(struct rm_seq, rm_seq);
 
-static int __read_mostly nopoll;
+static bool __read_mostly nopoll;
 module_param(nopoll, bool, 0);
 
 int __cpuinit xen_spinlock_init(unsigned int cpu)
@@ -139,9 +139,7 @@ static unsigned int ticket_drop(struct s
 
 	if (cmpxchg(&spinning->ticket, ticket, -1) != ticket)
 		return -1;
-	asm volatile(UNLOCK_LOCK_PREFIX "inc" UNLOCK_SUFFIX(0) " %0"
-		     : "+m" (lock->tickets.head)
-		     : : "memory", "cc");
+	__add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
 	ticket = (__ticket_t)(ticket + 1);
 	return ticket != lock->tickets.tail ? ticket : -1;
 }
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -99,7 +99,8 @@ static int netbk_change_mtu(struct net_d
 	return 0;
 }
 
-static u32 netbk_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netbk_fix_features(struct net_device *dev,
+					    netdev_features_t features)
 {
 	netif_t *netif = netdev_priv(dev);
 
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -51,6 +51,12 @@ struct netbk_rx_meta {
 	u8 copy:1;
 };
 
+struct netbk_tx_cb {
+	u16 copy_slots;
+	u16 pending_idx[1 + XEN_NETIF_NR_SLOTS_MIN];
+};
+#define netbk_tx_cb(skb) ((struct netbk_tx_cb *)skb->cb)
+
 struct netbk_tx_pending_inuse {
 	struct list_head list;
 	unsigned long alloc_time;
@@ -155,6 +161,8 @@ static struct sk_buff_head tx_queue;
 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+static gnttab_copy_t tx_copy_ops[2 * MAX_PENDING_REQS];
+static netif_tx_request_t tx_slots[XEN_NETIF_NR_SLOTS_MIN];
 
 static struct list_head net_schedule_list;
 static spinlock_t net_schedule_list_lock;
@@ -163,11 +171,19 @@ static spinlock_t net_schedule_list_lock
 static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
 
+/*
+ * This is the maximum slots a TX request can have. If a guest sends a TX
+ * request which exceeds this limit it is considered malicious.
+ */
+static unsigned int max_tx_slots = XEN_NETIF_NR_SLOTS_MIN;
+module_param(max_tx_slots, uint, S_IRUGO);
+MODULE_PARM_DESC(max_tx_slots, "Maximum number of slots accepted in netfront TX requests");
+
 /* Setting this allows the safe use of this driver without netloop. */
-static int MODPARM_copy_skb = 1;
+static bool __initdata MODPARM_copy_skb = true;
 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
 MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
-static int MODPARM_permute_returns = 0;
+static bool MODPARM_permute_returns;
 module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
 MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
 
@@ -1051,26 +1067,48 @@ static int netbk_count_requests(netif_t 
 				netif_tx_request_t *txp, int work_to_do)
 {
 	RING_IDX cons = netif->tx.req_cons;
-	int frags = 0, drop_err = 0;
+	int slots = 0, drop_err = 0;
 
 	if (!(first->flags & XEN_NETTXF_more_data))
 		return 0;
 
 	do {
-		if (frags >= work_to_do) {
-			netdev_err(netif->dev, "Need more frags\n");
+		if (slots >= work_to_do) {
+			netdev_err(netif->dev, "Need more slots\n");
 			netbk_fatal_tx_err(netif);
 			return -ENODATA;
 		}
 
-		if (unlikely(frags >= MAX_SKB_FRAGS)) {
-			netdev_err(netif->dev, "Too many frags\n");
+		if (unlikely(slots >= max_tx_slots)) {
+			netdev_err(netif->dev, "Too many slots\n");
 			netbk_fatal_tx_err(netif);
 			return -E2BIG;
 		}
 
-		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
-		       sizeof(*txp));
+		/*
+		 * The Xen network protocol had an implicit dependency on
+		 * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the
+		 * historical MAX_SKB_FRAGS value 18 to honor the same
+		 * behavior as before. Any packet using more than 18 slots
+		 * but less than max_tx_slots slots is dropped.
+		 */
+		switch (slots) {
+		case 0 ... XEN_NETIF_NR_SLOTS_MIN - 1:
+			break;
+		case XEN_NETIF_NR_SLOTS_MIN:
+			if (net_ratelimit())
+				netdev_dbg(netif->dev,
+					   "slot count exceeding limit of %d, dropping packet\n",
+					   XEN_NETIF_NR_SLOTS_MIN);
+			if (!drop_err)
+				drop_err = -E2BIG;
+			/* fall through */
+		default:
+			--txp;
+			break;
+		}
+
+		*txp = *RING_GET_REQUEST(&netif->tx, cons + slots);
 
 		/*
 		 * If the guest submitted a frame >= 64 KiB then first->size
@@ -1089,7 +1127,7 @@ static int netbk_count_requests(netif_t 
 		}
 
 		first->size -= txp->size;
-		frags++;
+		slots++;
 
 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
 			netdev_err(netif->dev, "txp->offset: %x, size: %u\n",
@@ -1100,30 +1138,77 @@ static int netbk_count_requests(netif_t 
 	} while ((txp++)->flags & XEN_NETTXF_more_data);
 
 	if (drop_err) {
-		netbk_tx_err(netif, first, cons + frags);
+		netbk_tx_err(netif, first, cons + slots);
 		return drop_err;
 	}
 
-	return frags;
+	return slots;
+}
+
+struct netbk_tx_gop {
+	gnttab_map_grant_ref_t *map;
+	gnttab_copy_t *copy;
+	void *ptr;
+};
+
+static void netbk_fill_tx_copy(const netif_tx_request_t *txreq,
+			       struct netbk_tx_gop *gop, domid_t domid)
+{
+	gop->copy--;
+	gop->copy->source.u.ref = txreq->gref;
+	gop->copy->source.domid = domid;
+	gop->copy->source.offset = txreq->offset;
+	gop->copy->dest.u.gmfn = virt_to_mfn(gop->ptr);
+	gop->copy->dest.domid = DOMID_SELF;
+	gop->copy->dest.offset = offset_in_page(gop->ptr);
+	gop->copy->flags = GNTCOPY_source_gref;
+
+	if (gop->copy->dest.offset + txreq->size > PAGE_SIZE) {
+		unsigned int first = PAGE_SIZE - gop->copy->dest.offset;
+
+		gop->copy->len = first;
+		gop->ptr += first;
+
+		gop->copy--;
+		gop->copy->source = gop->copy[-1].source;
+		gop->copy->source.offset += first;
+		gop->copy->dest.u.gmfn = virt_to_mfn(gop->ptr);
+		gop->copy->dest.domid = DOMID_SELF;
+		gop->copy->dest.offset = 0;
+		gop->copy->flags = GNTCOPY_source_gref;
+		gop->copy->len = txreq->size - first;
+	} else
+		gop->copy->len = txreq->size;
+
+	gop->ptr += gop->copy->len;
 }
 
-static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
-						  struct sk_buff *skb,
-						  netif_tx_request_t *txp,
-						  gnttab_map_grant_ref_t *mop)
+void netbk_get_requests(netif_t *netif, struct sk_buff *skb,
+			netif_tx_request_t *txp, struct netbk_tx_gop *gop)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
-	u16 pending_idx = *(u16 *)skb->data;
+	u16 pending_idx = netbk_tx_cb(skb)->pending_idx[0];
 	int i, start;
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
 	start = (frag_get_pending_idx(frags) == pending_idx);
 
+	for (i = 0; i < netbk_tx_cb(skb)->copy_slots; ++i, txp++) {
+		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+
+		netbk_fill_tx_copy(txp, gop, netif->domid);
+
+		pending_tx_info[pending_idx].req = *txp;
+		netif_get(netif);
+		pending_tx_info[pending_idx].netif = netif;
+		netbk_tx_cb(skb)->pending_idx[1 + i] = pending_idx;
+	}
+
 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
 		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
 
-		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
+		gnttab_set_map_op(gop->map++, idx_to_kaddr(pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
 				  txp->gref, netif->domid);
 
@@ -1133,14 +1218,17 @@ static gnttab_map_grant_ref_t *netbk_get
 		frag_set_pending_idx(&frags[i], pending_idx);
 	}
 
-	return mop;
+	if ((void *)gop->map > (void *)gop->copy && net_ratelimit())
+		netdev_warn(netif->dev, "Grant op overrun (%p > %p)\n",
+			    gop->map, gop->copy);
 }
 
-static int netbk_tx_check_mop(struct sk_buff *skb,
-			       gnttab_map_grant_ref_t **mopp)
+static int netbk_tx_check_gop(struct sk_buff *skb,
+			      struct netbk_tx_gop *gop, bool hdr_copied)
 {
-	gnttab_map_grant_ref_t *mop = *mopp;
-	u16 pending_idx = *(u16 *)skb->data;
+	gnttab_copy_t *cop = gop->copy;
+	gnttab_map_grant_ref_t *mop = gop->map;
+	u16 pending_idx = netbk_tx_cb(skb)->pending_idx[0];
 	netif_t *netif = pending_tx_info[pending_idx].netif;
 	netif_tx_request_t *txp;
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -1148,8 +1236,18 @@ static int netbk_tx_check_mop(struct sk_
 	int i, err, start;
 
 	/* Check status of header. */
-	err = mop->status;
-	if (unlikely(err != GNTST_okay)) {
+	if (hdr_copied) {
+		err = (--cop)->status;
+		txp = &pending_tx_info[pending_idx].req;
+		if (txp->size > cop->len)
+			cmpxchg_local(&err, GNTST_okay, (--cop)->status);
+		make_tx_response(netif, txp,
+				 err == GNTST_okay ? XEN_NETIF_RSP_OKAY
+						   : XEN_NETIF_RSP_ERROR);
+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+		netif_put(netif);
+	} else if (unlikely((err = mop->status) != GNTST_okay)) {
+		++mop;
 		txp = &pending_tx_info[pending_idx].req;
 		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
 		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
@@ -1157,19 +1255,34 @@ static int netbk_tx_check_mop(struct sk_
 	} else {
 		set_phys_to_machine(idx_to_pfn(pending_idx),
 			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-		grant_tx_handle[pending_idx] = mop->handle;
+		grant_tx_handle[pending_idx] = mop++->handle;
 	}
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
 	start = (frag_get_pending_idx(shinfo->frags) == pending_idx);
 
-	for (i = start; i < nr_frags; i++) {
+	for (i = 0; i < netbk_tx_cb(skb)->copy_slots; ++i) {
+		int newerr = (--cop)->status;
+
+		pending_idx = netbk_tx_cb(skb)->pending_idx[1 + i];
+		txp = &pending_tx_info[pending_idx].req;
+		if (txp->size > cop->len)
+			cmpxchg_local(&newerr, GNTST_okay, (--cop)->status);
+		make_tx_response(netif, txp,
+				 newerr == GNTST_okay ? XEN_NETIF_RSP_OKAY
+						      : XEN_NETIF_RSP_ERROR);
+		cmpxchg_local(&err, GNTST_okay, newerr);
+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+		netif_put(netif);
+	}
+
+	for (i = start; i < nr_frags; i++, mop++) {
 		int j, newerr;
 
 		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
 
 		/* Check error status: if okay then remember grant handle. */
-		newerr = (++mop)->status;
+		newerr = mop->status;
 		if (likely(newerr == GNTST_okay)) {
 			set_phys_to_machine(idx_to_pfn(pending_idx),
 				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
@@ -1191,8 +1304,10 @@ static int netbk_tx_check_mop(struct sk_
 			continue;
 
 		/* First error: invalidate header and preceding fragments. */
-		pending_idx = *((u16 *)skb->data);
-		netif_idx_release(pending_idx);
+		if (!hdr_copied) {
+			pending_idx = netbk_tx_cb(skb)->pending_idx[0];
+			netif_idx_release(pending_idx);
+		}
 		for (j = start; j < i; j++) {
 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
 			netif_idx_release(pending_idx);
@@ -1202,7 +1317,11 @@ static int netbk_tx_check_mop(struct sk_
 		err = newerr;
 	}
 
-	*mopp = mop + 1;
+	gop->map = mop;
+	gop->copy = cop;
+	if ((void *)mop > (void *)cop && net_ratelimit())
+		netdev_warn(netif->dev, "Grant op check overrun (%p > %p)\n",
+			    mop, cop);
 	return err;
 }
 
@@ -1292,20 +1411,23 @@ static void net_tx_action(unsigned long 
 {
 	struct sk_buff *skb;
 	netif_t *netif;
-	netif_tx_request_t txreq;
-	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
+	netif_tx_request_t txreq, *txslot;
 	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 	u16 pending_idx;
 	RING_IDX i;
-	gnttab_map_grant_ref_t *mop;
+	struct netbk_tx_gop gop;
+	multicall_entry_t mcl[2];
 	unsigned int data_len;
 	int ret, work_to_do;
 
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netbk_tx_cb));
+
 	net_tx_action_dealloc();
 
-	mop = tx_map_ops;
-	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-		!list_empty(&net_schedule_list)) {
+	gop.map = tx_map_ops;
+	gop.copy = tx_copy_ops + ARRAY_SIZEOF(tx_copy_ops);
+	while (NR_PENDING_REQS + XEN_NETIF_NR_SLOTS_MIN < MAX_PENDING_REQS
+	       && !list_empty(&net_schedule_list)) {
 		/* Get a netif from the list with work to do. */
 		netif = poll_net_schedule_list();
 		/*
@@ -1387,7 +1509,8 @@ static void net_tx_action(unsigned long 
 				continue;
 		}
 
-		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+		txslot = netbk->tx.slots;
+		ret = netbk_count_requests(netif, &txreq, txslot, work_to_do);
 		if (unlikely(ret < 0))
 			continue;
 
@@ -1415,6 +1538,12 @@ static void net_tx_action(unsigned long 
 		data_len = (txreq.size > PKT_PROT_LEN &&
 			    ret < MAX_SKB_FRAGS) ?
 			PKT_PROT_LEN : txreq.size;
+		while (ret > MAX_SKB_FRAGS ||
+		       (ret && (data_len + txslot->size <= PKT_PROT_LEN ||
+				netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB))) {
+			data_len += txslot++->size;
+			--ret;
+		}
 
 		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
 				GFP_ATOMIC | __GFP_NOWARN);
@@ -1440,39 +1569,38 @@ static void net_tx_action(unsigned long 
 			}
 		}
 
-		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
-				  GNTMAP_host_map | GNTMAP_readonly,
-				  txreq.gref, netif->domid);
-		mop++;
-
 		memcpy(&pending_tx_info[pending_idx].req,
 		       &txreq, sizeof(txreq));
 		pending_tx_info[pending_idx].netif = netif;
-		*((u16 *)skb->data) = pending_idx;
+		netbk_tx_cb(skb)->pending_idx[0] = pending_idx;
+		netbk_tx_cb(skb)->copy_slots = txslot - netbk->tx.slots;
 
 		__skb_put(skb, data_len);
+		gop.ptr = skb->data;
 
 		skb_shinfo(skb)->nr_frags = ret;
-		if (data_len < txreq.size)
+		if (data_len < txreq.size) {
+			gnttab_set_map_op(gop.map++, idx_to_kaddr(pending_idx),
+					  GNTMAP_host_map | GNTMAP_readonly,
+					  txreq.gref, netif->domid);
 			skb_shinfo(skb)->nr_frags++;
-		else
+		} else {
+			netbk_fill_tx_copy(&txreq, &gop, netif->domid);
 			pending_idx = INVALID_PENDING_IDX;
+		}
 		frag_set_pending_idx(skb_shinfo(skb)->frags, pending_idx);
 
 		__skb_queue_tail(&tx_queue, skb);
 
 		pending_cons++;
 
-		mop = netbk_get_requests(netif, skb, txfrags, mop);
+		netbk_get_requests(netif, skb, netbk->tx.slots, &gop);
 
 		netif->tx.req_cons = i;
 		netif_schedule_work(netif);
-
-		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
-			break;
 	}
 
-	if (mop == tx_map_ops)
+	if (skb_queue_empty(&tx_queue))
 		goto out;
 
     /* NOTE: some maps may fail with GNTST_eagain, which could be successfully
@@ -1480,22 +1608,28 @@ static void net_tx_action(unsigned long 
      * req and let the frontend resend the relevant packet again. This is fine
      * because it is unlikely that a network buffer will be paged out or shared,
      * and therefore it is unlikely to fail with GNTST_eagain. */
-	ret = HYPERVISOR_grant_table_op(
-		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
-	BUG_ON(ret);
+	MULTI_grant_table_op(&mcl[0], GNTTABOP_copy, gop.copy,
+			     tx_copy_ops + ARRAY_SIZE(tx_copy_ops) - gop.copy);
+	MULTI_grant_table_op(&mcl[1], GNTTABOP_map_grant_ref,
+			     tx_map_ops, gop.map - tx_map_ops);
+	if (HYPERVISOR_multicall_check(mcl, 2, NULL))
+		BUG();
 
-	mop = tx_map_ops;
+	gop.map = tx_map_ops;
+	gop.copy = tx_copy_ops + ARRAY_SIZE(tx_copy_ops);
 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
 		struct net_device *dev;
 		netif_tx_request_t *txp;
 
-		pending_idx = *((u16 *)skb->data);
+		pending_idx = netbk_tx_cb(skb)->pending_idx[0];
 		netif       = pending_tx_info[pending_idx].netif;
 		dev         = netif->dev;
 		txp         = &pending_tx_info[pending_idx].req;
+		data_len    = skb->len;
 
-		/* Check the remap error code. */
-		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
+		/* Check the remap/copy error code. */
+		if (unlikely(netbk_tx_check_gop(skb, &gop,
+						data_len >= txp->size))) {
 			netdev_dbg(dev, "netback grant failed.\n");
 			skb_shinfo(skb)->nr_frags = 0;
 			kfree_skb(skb);
@@ -1503,17 +1637,13 @@ static void net_tx_action(unsigned long 
 			continue;
 		}
 
-		data_len = skb->len;
-		memcpy(skb->data,
-		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
-		       data_len);
 		if (data_len < txp->size) {
+			memcpy(skb->data,
+			       (void *)(idx_to_kaddr(pending_idx) + txp->offset),
+			       data_len);
 			/* Append the packet payload as a fragment. */
 			txp->offset += data_len;
 			txp->size -= data_len;
-		} else {
-			/* Schedule a response immediately. */
-			netif_idx_release(pending_idx);
 		}
 
 		if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1545,15 +1675,6 @@ static void net_tx_action(unsigned long 
 			continue;
 		}
 
-		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
-		    unlikely(skb_linearize(skb))) {
-			netdev_dbg(dev,
-			           "Can't linearize skb in net_tx_action.\n");
-			kfree_skb(skb);
-			dev->stats.rx_dropped++;
-			continue;
-		}
-
 		dev->stats.rx_bytes += skb->len;
 		dev->stats.rx_packets++;
 
@@ -1704,6 +1825,13 @@ static int __init netback_init(void)
 	if (!is_running_on_xen())
 		return -ENODEV;
 
+	BUILD_BUG_ON(XEN_NETIF_NR_SLOTS_MIN >= MAX_PENDING_REQS);
+	if (max_tx_slots < XEN_NETIF_NR_SLOTS_MIN) {
+		pr_info("netback: max_tx_slots too small (%u), using XEN_NETIF_NR_SLOTS_MIN (%d)\n",
+			max_tx_slots, XEN_NETIF_NR_SLOTS_MIN);
+		max_tx_slots = XEN_NETIF_NR_SLOTS_MIN;
+	}
+
 	/* We can increase reservation by this much in net_rx_action(). */
 	balloon_update_driver_allowance(NET_RX_RING_SIZE);
 
--- a/drivers/xen/netfront/netfront.c
+++ b/drivers/xen/netfront/netfront.c
@@ -84,15 +84,15 @@ struct netfront_cb {
  * For paravirtualised guests, flipping is the default.
  */
 #ifdef CONFIG_XEN
-static int MODPARM_rx_copy = 0;
+static bool MODPARM_rx_copy;
 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
-static int MODPARM_rx_flip = 0;
+static bool MODPARM_rx_flip;
 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
 #else
-static const int MODPARM_rx_copy = 1;
-static const int MODPARM_rx_flip = 0;
+# define MODPARM_rx_copy true
+# define MODPARM_rx_flip false
 #endif
 
 #define RX_COPY_THRESHOLD 256
@@ -230,7 +230,7 @@ static void xennet_sysfs_delif(struct ne
 #define xennet_sysfs_delif(dev) do { } while(0)
 #endif
 
-static inline int xennet_can_sg(struct net_device *dev)
+static inline bool xennet_can_sg(struct net_device *dev)
 {
 	return dev->features & NETIF_F_SG;
 }
@@ -2069,7 +2069,8 @@ static void network_set_multicast_list(s
 {
 }
 
-static u32 xennet_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t xennet_fix_features(struct net_device *dev,
+					     netdev_features_t features)
 {
 	struct netfront_info *np = netdev_priv(dev);
 	int val;
@@ -2095,7 +2096,8 @@ static u32 xennet_fix_features(struct ne
 	return features;
 }
 
-static int xennet_set_features(struct net_device *dev, u32 features)
+static int xennet_set_features(struct net_device *dev,
+			       netdev_features_t features)
 {
 	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
 		netdev_info(dev, "Reducing MTU because no SG offload");
@@ -2280,7 +2282,7 @@ static int __init netif_init(void)
 	}
 
 	if (!MODPARM_rx_flip && !MODPARM_rx_copy)
-		MODPARM_rx_copy = 1; /* Default is to copy. */
+		MODPARM_rx_copy = true; /* Default is to copy. */
 #endif
 
 	netif_init_accel();
--- a/drivers/xen/pcifront/pci_op.c
+++ b/drivers/xen/pcifront/pci_op.c
@@ -12,7 +12,7 @@
 #include <xen/evtchn.h>
 #include "pcifront.h"
 
-static int verbose_request;
+static bool verbose_request;
 module_param(verbose_request, bool, 0644);
 
 static void pcifront_init_sd(struct pcifront_sd *sd,
--- a/drivers/xen/pcifront/xenbus.c
+++ b/drivers/xen/pcifront/xenbus.c
@@ -371,7 +371,7 @@ static int pcifront_detach_devices(struc
 		pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
 		if(!pci_dev) {
 			dev_dbg(&pdev->xdev->dev,
-				"Cannot get PCI device %04x:%02x:%02x.%02x\n",
+				"Cannot get PCI device %04x:%02x:%02x.%u\n",
 				domain, bus, slot, func);
 			continue;
 		}
@@ -379,7 +379,7 @@ static int pcifront_detach_devices(struc
 		pci_dev_put(pci_dev);
 
 		dev_dbg(&pdev->xdev->dev,
-			"PCI device %04x:%02x:%02x.%02x removed.\n",
+			"PCI device %04x:%02x:%02x.%u removed.\n",
 			domain, bus, slot, func);
 	}
 
--- a/drivers/xen/scsiback/scsiback.c
+++ b/drivers/xen/scsiback/scsiback.c
@@ -56,7 +56,7 @@ static unsigned int vscsiif_reqs = 128;
 module_param_named(reqs, vscsiif_reqs, uint, 0);
 MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
 
-static int log_print_stat;
+static bool log_print_stat;
 module_param(log_print_stat, bool, 0644);
 
 #define SCSIBACK_INVALID_HANDLE (~0)
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -323,7 +323,9 @@ void pcistub_put_pci_dev(struct pci_dev 
 	xen_pcibk_config_reset_dev(dev);
 	xen_pcibk_config_free_dyn_fields(dev);
 
+#ifndef CONFIG_XEN
 	xen_unregister_device_domain_owner(dev);
+#endif
 
 	spin_lock_irqsave(&found_psdev->lock, flags);
 	found_psdev->pdev = NULL;
--- a/drivers/xen/xenbus/Makefile
+++ b/drivers/xen/xenbus/Makefile
@@ -1,12 +1,17 @@
 obj-y += xenbus_client.o xenbus_comms.o xenbus_xs.o xenbus_probe.o
-obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o
+backend-standalone-$(CONFIG_XEN) += xenbus_be.o
+obj-$(CONFIG_PARAVIRT_XEN) += xenbus_dev_frontend.o
 
 xenbus_be-objs =
 xenbus_be-objs += xenbus_backend_client.o
+xenbus_be-objs += xenbus_dev_backend.o
 
 xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
 obj-y += $(xenbus-y) $(xenbus-m)
 obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
 
 obj-$(CONFIG_PARAVIRT_XEN_BACKEND) += xenbus_probe_backend.o
+backend-standalone-$(CONFIG_PARAVIRT_XEN) += xenbus_dev_backend.o
 obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
+
+obj-$(CONFIG_XEN_BACKEND) += $(backend-standalone-y)
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -36,18 +36,42 @@
 #include <xen/gnttab.h>
 #else
 #include <linux/types.h>
+#include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <linux/export.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/page.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/event_channel.h>
+#include <xen/balloon.h>
 #include <xen/events.h>
 #include <xen/grant_table.h>
 #endif
 #include <xen/xenbus.h>
+#include <xen/xen.h>
 
-#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#if defined(CONFIG_PARAVIRT_XEN)
+#include "xenbus_probe.h"
+
+struct xenbus_map_node {
+	struct list_head next;
+	union {
+		struct vm_struct *area; /* PV */
+		struct page *page;     /* HVM */
+	};
+	grant_handle_t handle;
+};
+
+static DEFINE_SPINLOCK(xenbus_valloc_lock);
+static LIST_HEAD(xenbus_valloc_pages);
+
+struct xenbus_ring_ops {
+	int (*map)(struct xenbus_device *dev, grant_ref_t gnt, void **vaddr);
+	int (*unmap)(struct xenbus_device *dev, void *vaddr);
+};
+
+static const struct xenbus_ring_ops *ring_ops __read_mostly;
+#elif defined(HAVE_XEN_PLATFORM_COMPAT_H)
 #include <xen/platform-compat.h>
 #endif
 
@@ -418,19 +442,33 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  */
 int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t gnt_ref, void **vaddr)
 {
+	return ring_ops->map(dev, gnt_ref, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
+				     grant_ref_t gnt_ref, void **vaddr)
+{
 	struct gnttab_map_grant_ref op = {
 		.flags = GNTMAP_host_map | GNTMAP_contains_pte,
 		.ref   = gnt_ref,
 		.dom   = dev->otherend_id,
 	};
+	struct xenbus_map_node *node;
 	struct vm_struct *area;
 	pte_t *pte;
 
 	*vaddr = NULL;
 
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
 	area = alloc_vm_area(PAGE_SIZE, &pte);
-	if (!area)
+	if (!area) {
+		kfree(node);
 		return -ENOMEM;
+	}
 
 	op.host_addr = arbitrary_virt_to_machine(pte).maddr;
 
@@ -439,19 +477,59 @@ int xenbus_map_ring_valloc(struct xenbus
 
 	if (op.status != GNTST_okay) {
 		free_vm_area(area);
+		kfree(node);
 		xenbus_dev_fatal(dev, op.status,
 				 "mapping in shared page %d from domain %d",
 				 gnt_ref, dev->otherend_id);
 		return op.status;
 	}
 
-	/* Stuff the handle in an unused field */
-	area->phys_addr = (unsigned long)op.handle;
+	node->handle = op.handle;
+	node->area = area;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
 
 	*vaddr = area->addr;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
+				      grant_ref_t gnt_ref, void **vaddr)
+{
+	struct xenbus_map_node *node;
+	int err;
+	void *addr;
+
+	*vaddr = NULL;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+	if (err)
+		goto out_err;
+
+	addr = pfn_to_kaddr(page_to_pfn(node->page));
+
+	err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+	if (err)
+		goto out_err;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
+
+	*vaddr = addr;
+	return 0;
+
+ out_err:
+	free_xenballooned_pages(1, &node->page);
+	kfree(node);
+	return err;
+}
 
 
 /**
@@ -471,12 +549,10 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc
 int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t gnt_ref,
 		    grant_handle_t *handle, void *vaddr)
 {
-	struct gnttab_map_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-		.flags     = GNTMAP_host_map,
-		.ref       = gnt_ref,
-		.dom       = dev->otherend_id,
-	};
+	struct gnttab_map_grant_ref op;
+
+	gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
+			  dev->otherend_id);
 
 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
 		BUG();
@@ -507,32 +583,36 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring);
  */
 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 {
-	struct vm_struct *area;
+	return ring_ops->unmap(dev, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
+{
+	struct xenbus_map_node *node;
 	struct gnttab_unmap_grant_ref op = {
 		.host_addr = (unsigned long)vaddr,
 	};
 	unsigned int level;
 
-	/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
-	 * method so that we don't have to muck with vmalloc internals here.
-	 * We could force the user to hang on to their struct vm_struct from
-	 * xenbus_map_ring_valloc, but these 6 lines considerably simplify
-	 * this API.
-	 */
-	read_lock(&vmlist_lock);
-	for (area = vmlist; area != NULL; area = area->next) {
-		if (area->addr == vaddr)
-			break;
+	spin_lock(&xenbus_valloc_lock);
+	list_for_each_entry(node, &xenbus_valloc_pages, next) {
+		if (node->area->addr == vaddr) {
+			list_del(&node->next);
+			goto found;
+		}
 	}
-	read_unlock(&vmlist_lock);
+	node = NULL;
+ found:
+	spin_unlock(&xenbus_valloc_lock);
 
-	if (!area) {
+	if (!node) {
 		xenbus_dev_error(dev, -ENOENT,
 				 "can't find mapped virtual address %p", vaddr);
 		return GNTST_bad_virt_addr;
 	}
 
-	op.handle = (grant_handle_t)area->phys_addr;
+	op.handle = node->handle;
 	op.host_addr = arbitrary_virt_to_machine(
 		lookup_address((unsigned long)vaddr, &level)).maddr;
 
@@ -540,16 +620,50 @@ int xenbus_unmap_ring_vfree(struct xenbu
 		BUG();
 
 	if (op.status == GNTST_okay)
-		free_vm_area(area);
+		free_vm_area(node->area);
 	else
 		xenbus_dev_error(dev, op.status,
 				 "unmapping page at handle %d error %d",
-				 (int16_t)area->phys_addr, op.status);
+				 node->handle, op.status);
 
+	kfree(node);
 	return op.status;
 }
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 
+static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
+{
+	int rv;
+	struct xenbus_map_node *node;
+	void *addr;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_for_each_entry(node, &xenbus_valloc_pages, next) {
+		addr = pfn_to_kaddr(page_to_pfn(node->page));
+		if (addr == vaddr) {
+			list_del(&node->next);
+			goto found;
+		}
+	}
+	node = NULL;
+ found:
+	spin_unlock(&xenbus_valloc_lock);
+
+	if (!node) {
+		xenbus_dev_error(dev, -ENOENT,
+				 "can't find mapped virtual address %p", vaddr);
+		return GNTST_bad_virt_addr;
+	}
+
+	rv = xenbus_unmap_ring(dev, node->handle, addr);
+
+	if (!rv)
+		free_xenballooned_pages(1, &node->page);
+	else
+		WARN(1, "Leaking %p\n", vaddr);
+
+	kfree(node);
+	return rv;
+}
 
 /**
  * xenbus_unmap_ring
@@ -564,10 +678,9 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfre
 int xenbus_unmap_ring(struct xenbus_device *dev,
 		      grant_handle_t handle, void *vaddr)
 {
-	struct gnttab_unmap_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-		.handle    = handle,
-	};
+	struct gnttab_unmap_grant_ref op;
+
+	gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
 
 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
 		BUG();
@@ -600,3 +713,23 @@ enum xenbus_state xenbus_read_driver_sta
 	return result;
 }
 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
+
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static const struct xenbus_ring_ops ring_ops_pv = {
+	.map = xenbus_map_ring_valloc_pv,
+	.unmap = xenbus_unmap_ring_vfree_pv,
+};
+
+static const struct xenbus_ring_ops ring_ops_hvm = {
+	.map = xenbus_map_ring_valloc_hvm,
+	.unmap = xenbus_unmap_ring_vfree_hvm,
+};
+
+void __init xenbus_ring_ops_init(void)
+{
+	if (xen_pv_domain())
+		ring_ops = &ring_ops_pv;
+	else
+		ring_ops = &ring_ops_hvm;
+}
+#endif
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -31,6 +31,8 @@
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
 
+#include <linux/fs.h>
+
 int xs_init(void);
 int xb_init_comms(void);
 
@@ -43,6 +45,8 @@ int xs_input_avail(void);
 extern struct xenstore_domain_interface *xen_store_interface;
 extern int xen_store_evtchn;
 
+extern const struct file_operations xen_xenbus_fops;
+
 /* For xenbus internal use. */
 enum {
 	XENBUS_XSD_UNCOMMITTED = 0,
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -9,7 +9,9 @@
 #include <linux/capability.h>
 
 #include <xen/xen.h>
+#ifdef CONFIG_PARAVIRT_XEN
 #include <xen/page.h>
+#endif
 #include <xen/xenbus.h>
 #include <xen/xenbus_dev.h>
 #include <xen/grant_table.h>
@@ -101,7 +103,7 @@ static int xenbus_backend_mmap(struct fi
 		return -EINVAL;
 
 	if (remap_pfn_range(vma, vma->vm_start,
-			    virt_to_pfn(xen_store_interface),
+			    PFN_DOWN(__pa(xen_store_interface)),
 			    size, vma->vm_page_prot))
 		return -EAGAIN;
 
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -84,10 +84,10 @@
 #endif
 
 int xen_store_evtchn;
-PARAVIRT_EXPORT_SYMBOL(xen_store_evtchn);
+EXPORT_SYMBOL_GPL(xen_store_evtchn);
 
 struct xenstore_domain_interface *xen_store_interface;
-PARAVIRT_EXPORT_SYMBOL(xen_store_interface);
+EXPORT_SYMBOL_GPL(xen_store_interface);
 
 static unsigned long xen_store_mfn;
 
@@ -1330,6 +1330,8 @@ xenbus_init(void)
 
 	xenbus_dev_init();
 #else /* !defined(CONFIG_XEN) && !defined(MODULE) */
+	xenbus_ring_ops_init();
+
 	if (xen_hvm_domain()) {
 		uint64_t v = 0;
 		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -104,4 +104,6 @@ extern void xenbus_otherend_changed(stru
 extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
 					char *id_node, char *path_node);
 
+void xenbus_ring_ops_init(void);
+
 #endif
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -77,6 +77,7 @@
 #define PCI_CLASS_SYSTEM_RTC		0x0803
 #define PCI_CLASS_SYSTEM_PCI_HOTPLUG	0x0804
 #define PCI_CLASS_SYSTEM_SDHCI		0x0805
+#define PCI_CLASS_SYSTEM_IOMMU		0x0806
 #define PCI_CLASS_SYSTEM_OTHER		0x0880
 
 #define PCI_BASE_CLASS_INPUT		0x09
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -88,11 +88,11 @@ void free_xenballooned_pages(int nr_page
 
 #endif /* CONFIG_PARAVIRT_XEN */
 
-struct sys_device;
+struct device;
 #ifdef CONFIG_XEN_SELFBALLOONING
-extern int register_xen_selfballooning(struct sys_device *sysdev);
+extern int register_xen_selfballooning(struct device *dev);
 #else
-static inline int register_xen_selfballooning(struct sys_device *sysdev)
+static inline int register_xen_selfballooning(struct device *dev)
 {
 	return -ENOSYS;
 }
--- a/include/xen/blkif.h
+++ b/include/xen/blkif.h
@@ -53,7 +53,7 @@ struct blkif_x86_32_request {
 };
 struct blkif_x86_32_discard {
 	uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
-	uint8_t        reserved;     /*                                      */
+	uint8_t        flag;         /* BLKIF_DISCARD_*                      */
 	blkif_vdev_t   handle;       /* same as for read/write requests      */
 	uint64_t       id;           /* private guest value, echoed in resp  */
 	blkif_sector_t sector_number;/* start sector idx on disk             */
@@ -81,7 +81,7 @@ struct blkif_x86_64_request {
 };
 struct blkif_x86_64_discard {
 	uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
-	uint8_t        reserved;     /*                                      */
+	uint8_t        flag;         /* BLKIF_DISCARD_*                      */
 	blkif_vdev_t   handle;       /* sane as for read/write requests      */
 	uint64_t       __attribute__((__aligned__(8))) id;
 	blkif_sector_t sector_number;/* start sector idx on disk             */
--- a/include/xen/evtchn.h
+++ b/include/xen/evtchn.h
@@ -56,6 +56,7 @@ struct irq_cfg {
 	};
 };
 struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
+static inline int evtchn_make_refcounted(unsigned int evtchn) { return 0; }
 #endif
 
 /*
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -124,7 +124,9 @@ typedef uint32_t grant_ref_t;
  * Version 1 of the grant table entry structure is maintained purely
  * for backwards compatibility.  New guests should use version 2.
  */
-#if __XEN_INTERFACE_VERSION__ < 0x0003020a
+#if defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
+#define grant_entry grant_entry_v1
+#elif __XEN_INTERFACE_VERSION__ < 0x0003020a
 #define grant_entry_v1 grant_entry
 #define grant_entry_v1_t grant_entry_t
 #endif
@@ -212,7 +214,7 @@ typedef struct grant_entry_v1 grant_entr
  * The interface by which domains use grant references does not depend
  * on the grant table version in use by the other domain.
  */
-#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H))
 /*
  * Version 1 and version 2 grant entries share a common prefix.  The
  * fields of the prefix are documented as part of struct
@@ -305,7 +307,7 @@ typedef uint16_t grant_status_t;
 #define GNTTABOP_copy                 5
 #define GNTTABOP_query_size           6
 #define GNTTABOP_unmap_and_replace    7
-#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H))
 #define GNTTABOP_set_version          8
 #define GNTTABOP_get_status_frames    9
 #define GNTTABOP_get_version          10
@@ -514,10 +516,11 @@ struct gnttab_unmap_and_replace {
     /* OUT parameters. */
     int16_t  status;              /* => enum grant_status */
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
 typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
 
-#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H))
 /*
  * GNTTABOP_set_version: Request a particular version of the grant
  * table shared table structure.  This operation can only be performed
@@ -529,6 +532,7 @@ struct gnttab_set_version {
     /* IN/OUT parameters */
     uint32_t version;
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
 typedef struct gnttab_set_version gnttab_set_version_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
 
@@ -553,6 +557,7 @@ struct gnttab_get_status_frames {
     int16_t  status;              /* => enum grant_status */
     XEN_GUEST_HANDLE(uint64_t) frame_list;
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
 typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);
 
@@ -567,6 +572,7 @@ struct gnttab_get_version {
     /* OUT parameters */
     uint32_t version;
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
 typedef struct gnttab_get_version gnttab_get_version_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);
 
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -564,25 +564,40 @@ struct blkif_request_segment {
  */
 struct blkif_request {
     uint8_t        operation;    /* BLKIF_OP_???                         */
+#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
     uint8_t        nr_segments;  /* number of segments                   */
     blkif_vdev_t   handle;       /* only for read/write requests         */
     uint64_t       id;           /* private guest value, echoed in resp  */
-#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
     blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
     struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
 #else
     union {
-        struct blkif_request_rw {
+        struct __attribute__((__packed__)) blkif_request_rw {
+            uint8_t        nr_segments;  /* number of segments                  */
+            blkif_vdev_t   handle;       /* only for read/write requests        */
+#ifdef CONFIG_X86_64
+            uint32_t       _pad1;        /* offsetof(blkif_request,u.rw.id) == 8 */
+#endif
+            uint64_t       id;           /* private guest value, echoed in resp */
             blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
             struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
         } rw;
-        struct blkif_request_discard {
+        struct __attribute__((__packed__)) blkif_request_discard {
+            uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero.        */
+#define BLKIF_DISCARD_SECURE (1<<0)      /* ignored if discard-secure=0          */
+            blkif_vdev_t   _pad1;        /* only for read/write requests         */
+#ifdef CONFIG_X86_64
+            uint32_t       _pad2;        /* offsetof(blkif_req..,u.discard.id)==8*/
+#endif
+            uint64_t       id;           /* private guest value, echoed in resp  */
             blkif_sector_t sector_number;
-            uint64_t nr_sectors;
+            uint64_t       nr_sectors;
+            uint8_t        _pad3;
         } discard;
     } u;
+} __attribute__((__packed__));
 #endif
-};
 typedef struct blkif_request blkif_request_t;
 
 #if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
--- a/lib/swiotlb-xen.c
+++ b/lib/swiotlb-xen.c
@@ -114,11 +114,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-unsigned long swioltb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(void)
 {
 	return io_tlb_nslabs;
 }
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)