Automatically created from "patch-3.3" by xen-port-patches.py
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 3.3
Patch-mainline: 3.3
This patch contains the differences between 3.2 and 3.3.
Acked-by: jbeulich@suse.com
3.4/arch/x86/include/mach-xen/asm/i387.h (moved to fpu-internal.h)
--- 12.2.orig/arch/x86/ia32/ia32entry-xen.S 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/arch/x86/ia32/ia32entry-xen.S 2012-02-09 12:46:23.000000000 +0100
@@ -14,6 +14,7 @@
#include <asm/segment.h>
#include <asm/irqflags.h>
#include <linux/linkage.h>
+#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -22,8 +23,6 @@
.section .entry.text, "ax"
-#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
-
.macro IA32_ARG_FIXUP noebp=0
movl %edi,%r8d
.if \noebp
@@ -128,7 +127,7 @@ ENTRY(ia32_sysenter_target)
CFI_RESTORE rcx
movl %ebp,%ebp /* zero extension */
movl %eax,%eax
- movl 48-THREAD_SIZE+TI_sysenter_return(%rsp),%r10d
+ movl TI_sysenter_return+THREAD_INFO(%rsp,8*6-KERNEL_STACK_OFFSET),%r10d
movl $__USER32_DS,40(%rsp)
movq %rbp,32(%rsp)
movl $__USER32_CS,16(%rsp)
@@ -142,9 +141,8 @@ ENTRY(ia32_sysenter_target)
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
- GET_THREAD_INFO(%r10)
- orl $TS_COMPAT,TI_status(%r10)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz sysenter_tracesys
jmp .Lia32_check_call
@@ -156,7 +154,7 @@ ENTRY(ia32_sysenter_target)
movl %ebx,%edx /* 3rd arg: 1st syscall arg */
movl %eax,%esi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
@@ -219,9 +217,8 @@ ENTRY(ia32_cstar_target)
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
- GET_THREAD_INFO(%r10)
- orl $TS_COMPAT,TI_status(%r10)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
@@ -238,7 +235,7 @@ cstar_auditsys:
cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz cstar_auditsys
#endif
xchgl %r9d,%ebp
@@ -302,9 +299,8 @@ ENTRY(ia32_syscall)
/* note the registers are not zero extended to the sf.
this could be a problem. */
SAVE_ARGS 0,1,0
- GET_THREAD_INFO(%r10)
- orl $TS_COMPAT,TI_status(%r10)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_tracesys
.Lia32_check_call:
cmpq $(IA32_NR_syscalls-1),%rax
@@ -320,7 +316,7 @@ ia32_sysret:
sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz sysenter_auditsys
#endif
ia32_tracesys:
@@ -341,14 +337,11 @@ ia32_badsys:
movq $-ENOSYS,%rax
jmp ia32_sysret
-quiet_ni_syscall:
- movq $-ENOSYS,%rax
- ret
CFI_ENDPROC
.macro PTREGSCALL label, func, arg
- .globl \label
-\label:
+ ALIGN
+GLOBAL(\label)
leaq \func(%rip),%rax
leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
jmp ia32_ptregs_common
@@ -365,7 +358,8 @@ quiet_ni_syscall:
PTREGSCALL stub32_vfork, sys_vfork, %rdi
PTREGSCALL stub32_iopl, sys_iopl, %rsi
-ENTRY(ia32_ptregs_common)
+ ALIGN
+ia32_ptregs_common:
popq %r11
CFI_ENDPROC
CFI_STARTPROC32 simple
@@ -387,357 +381,3 @@ ENTRY(ia32_ptregs_common)
jmp ia32_sysret /* misbalances the return cache */
CFI_ENDPROC
END(ia32_ptregs_common)
-
- .section .rodata,"a"
- .align 8
-ia32_sys_call_table:
- .quad sys_restart_syscall
- .quad sys_exit
- .quad stub32_fork
- .quad sys_read
- .quad sys_write
- .quad compat_sys_open /* 5 */
- .quad sys_close
- .quad sys32_waitpid
- .quad sys_creat
- .quad sys_link
- .quad sys_unlink /* 10 */
- .quad stub32_execve
- .quad sys_chdir
- .quad compat_sys_time
- .quad sys_mknod
- .quad sys_chmod /* 15 */
- .quad sys_lchown16
- .quad quiet_ni_syscall /* old break syscall holder */
- .quad sys_stat
- .quad sys32_lseek
- .quad sys_getpid /* 20 */
- .quad compat_sys_mount /* mount */
- .quad sys_oldumount /* old_umount */
- .quad sys_setuid16
- .quad sys_getuid16
- .quad compat_sys_stime /* stime */ /* 25 */
- .quad compat_sys_ptrace /* ptrace */
- .quad sys_alarm
- .quad sys_fstat /* (old)fstat */
- .quad sys_pause
- .quad compat_sys_utime /* 30 */
- .quad quiet_ni_syscall /* old stty syscall holder */
- .quad quiet_ni_syscall /* old gtty syscall holder */
- .quad sys_access
- .quad sys_nice
- .quad quiet_ni_syscall /* 35 */ /* old ftime syscall holder */
- .quad sys_sync
- .quad sys32_kill
- .quad sys_rename
- .quad sys_mkdir
- .quad sys_rmdir /* 40 */
- .quad sys_dup
- .quad sys_pipe
- .quad compat_sys_times
- .quad quiet_ni_syscall /* old prof syscall holder */
- .quad sys_brk /* 45 */
- .quad sys_setgid16
- .quad sys_getgid16
- .quad sys_signal
- .quad sys_geteuid16
- .quad sys_getegid16 /* 50 */
- .quad sys_acct
- .quad sys_umount /* new_umount */
- .quad quiet_ni_syscall /* old lock syscall holder */
- .quad compat_sys_ioctl
- .quad compat_sys_fcntl64 /* 55 */
- .quad quiet_ni_syscall /* old mpx syscall holder */
- .quad sys_setpgid
- .quad quiet_ni_syscall /* old ulimit syscall holder */
- .quad sys_olduname
- .quad sys_umask /* 60 */
- .quad sys_chroot
- .quad compat_sys_ustat
- .quad sys_dup2
- .quad sys_getppid
- .quad sys_getpgrp /* 65 */
- .quad sys_setsid
- .quad sys32_sigaction
- .quad sys_sgetmask
- .quad sys_ssetmask
- .quad sys_setreuid16 /* 70 */
- .quad sys_setregid16
- .quad sys32_sigsuspend
- .quad compat_sys_sigpending
- .quad sys_sethostname
- .quad compat_sys_setrlimit /* 75 */
- .quad compat_sys_old_getrlimit /* old_getrlimit */
- .quad compat_sys_getrusage
- .quad compat_sys_gettimeofday
- .quad compat_sys_settimeofday
- .quad sys_getgroups16 /* 80 */
- .quad sys_setgroups16
- .quad compat_sys_old_select
- .quad sys_symlink
- .quad sys_lstat
- .quad sys_readlink /* 85 */
- .quad sys_uselib
- .quad sys_swapon
- .quad sys_reboot
- .quad compat_sys_old_readdir
- .quad sys32_mmap /* 90 */
- .quad sys_munmap
- .quad sys_truncate
- .quad sys_ftruncate
- .quad sys_fchmod
- .quad sys_fchown16 /* 95 */
- .quad sys_getpriority
- .quad sys_setpriority
- .quad quiet_ni_syscall /* old profil syscall holder */
- .quad compat_sys_statfs
- .quad compat_sys_fstatfs /* 100 */
- .quad sys_ioperm
- .quad compat_sys_socketcall
- .quad sys_syslog
- .quad compat_sys_setitimer
- .quad compat_sys_getitimer /* 105 */
- .quad compat_sys_newstat
- .quad compat_sys_newlstat
- .quad compat_sys_newfstat
- .quad sys_uname
- .quad stub32_iopl /* 110 */
- .quad sys_vhangup
- .quad quiet_ni_syscall /* old "idle" system call */
- .quad sys32_vm86_warning /* vm86old */
- .quad compat_sys_wait4
- .quad sys_swapoff /* 115 */
- .quad compat_sys_sysinfo
- .quad sys32_ipc
- .quad sys_fsync
- .quad stub32_sigreturn
- .quad stub32_clone /* 120 */
- .quad sys_setdomainname
- .quad sys_newuname
- .quad sys_modify_ldt
- .quad compat_sys_adjtimex
- .quad sys32_mprotect /* 125 */
- .quad compat_sys_sigprocmask
- .quad quiet_ni_syscall /* create_module */
- .quad sys_init_module
- .quad sys_delete_module
- .quad quiet_ni_syscall /* 130 get_kernel_syms */
- .quad sys32_quotactl
- .quad sys_getpgid
- .quad sys_fchdir
- .quad quiet_ni_syscall /* bdflush */
- .quad sys_sysfs /* 135 */
- .quad sys_personality
- .quad quiet_ni_syscall /* for afs_syscall */
- .quad sys_setfsuid16
- .quad sys_setfsgid16
- .quad sys_llseek /* 140 */
- .quad compat_sys_getdents
- .quad compat_sys_select
- .quad sys_flock
- .quad sys_msync
- .quad compat_sys_readv /* 145 */
- .quad compat_sys_writev
- .quad sys_getsid
- .quad sys_fdatasync
- .quad compat_sys_sysctl /* sysctl */
- .quad sys_mlock /* 150 */
- .quad sys_munlock
- .quad sys_mlockall
- .quad sys_munlockall
- .quad sys_sched_setparam
- .quad sys_sched_getparam /* 155 */
- .quad sys_sched_setscheduler
- .quad sys_sched_getscheduler
- .quad sys_sched_yield
- .quad sys_sched_get_priority_max
- .quad sys_sched_get_priority_min /* 160 */
- .quad sys32_sched_rr_get_interval
- .quad compat_sys_nanosleep
- .quad sys_mremap
- .quad sys_setresuid16
- .quad sys_getresuid16 /* 165 */
- .quad sys32_vm86_warning /* vm86 */
- .quad quiet_ni_syscall /* query_module */
- .quad sys_poll
- .quad quiet_ni_syscall /* old nfsservctl */
- .quad sys_setresgid16 /* 170 */
- .quad sys_getresgid16
- .quad sys_prctl
- .quad stub32_rt_sigreturn
- .quad sys32_rt_sigaction
- .quad sys32_rt_sigprocmask /* 175 */
- .quad sys32_rt_sigpending
- .quad compat_sys_rt_sigtimedwait
- .quad sys32_rt_sigqueueinfo
- .quad sys_rt_sigsuspend
- .quad sys32_pread /* 180 */
- .quad sys32_pwrite
- .quad sys_chown16
- .quad sys_getcwd
- .quad sys_capget
- .quad sys_capset
- .quad stub32_sigaltstack
- .quad sys32_sendfile
- .quad quiet_ni_syscall /* streams1 */
- .quad quiet_ni_syscall /* streams2 */
- .quad stub32_vfork /* 190 */
- .quad compat_sys_getrlimit
- .quad sys_mmap_pgoff
- .quad sys32_truncate64
- .quad sys32_ftruncate64
- .quad sys32_stat64 /* 195 */
- .quad sys32_lstat64
- .quad sys32_fstat64
- .quad sys_lchown
- .quad sys_getuid
- .quad sys_getgid /* 200 */
- .quad sys_geteuid
- .quad sys_getegid
- .quad sys_setreuid
- .quad sys_setregid
- .quad sys_getgroups /* 205 */
- .quad sys_setgroups
- .quad sys_fchown
- .quad sys_setresuid
- .quad sys_getresuid
- .quad sys_setresgid /* 210 */
- .quad sys_getresgid
- .quad sys_chown
- .quad sys_setuid
- .quad sys_setgid
- .quad sys_setfsuid /* 215 */
- .quad sys_setfsgid
- .quad sys_pivot_root
- .quad sys_mincore
- .quad sys_madvise
- .quad compat_sys_getdents64 /* 220 getdents64 */
- .quad compat_sys_fcntl64
- .quad quiet_ni_syscall /* tux */
- .quad quiet_ni_syscall /* security */
- .quad sys_gettid
- .quad sys32_readahead /* 225 */
- .quad sys_setxattr
- .quad sys_lsetxattr
- .quad sys_fsetxattr
- .quad sys_getxattr
- .quad sys_lgetxattr /* 230 */
- .quad sys_fgetxattr
- .quad sys_listxattr
- .quad sys_llistxattr
- .quad sys_flistxattr
- .quad sys_removexattr /* 235 */
- .quad sys_lremovexattr
- .quad sys_fremovexattr
- .quad sys_tkill
- .quad sys_sendfile64
- .quad compat_sys_futex /* 240 */
- .quad compat_sys_sched_setaffinity
- .quad compat_sys_sched_getaffinity
- .quad sys_set_thread_area
- .quad sys_get_thread_area
- .quad compat_sys_io_setup /* 245 */
- .quad sys_io_destroy
- .quad compat_sys_io_getevents
- .quad compat_sys_io_submit
- .quad sys_io_cancel
- .quad sys32_fadvise64 /* 250 */
- .quad quiet_ni_syscall /* free_huge_pages */
- .quad sys_exit_group
- .quad sys32_lookup_dcookie
- .quad sys_epoll_create
- .quad sys_epoll_ctl /* 255 */
- .quad sys_epoll_wait
- .quad sys_remap_file_pages
- .quad sys_set_tid_address
- .quad compat_sys_timer_create
- .quad compat_sys_timer_settime /* 260 */
- .quad compat_sys_timer_gettime
- .quad sys_timer_getoverrun
- .quad sys_timer_delete
- .quad compat_sys_clock_settime
- .quad compat_sys_clock_gettime /* 265 */
- .quad compat_sys_clock_getres
- .quad compat_sys_clock_nanosleep
- .quad compat_sys_statfs64
- .quad compat_sys_fstatfs64
- .quad sys_tgkill /* 270 */
- .quad compat_sys_utimes
- .quad sys32_fadvise64_64
- .quad quiet_ni_syscall /* sys_vserver */
- .quad sys_mbind
- .quad compat_sys_get_mempolicy /* 275 */
- .quad sys_set_mempolicy
- .quad compat_sys_mq_open
- .quad sys_mq_unlink
- .quad compat_sys_mq_timedsend
- .quad compat_sys_mq_timedreceive /* 280 */
- .quad compat_sys_mq_notify
- .quad compat_sys_mq_getsetattr
- .quad compat_sys_kexec_load /* reserved for kexec */
- .quad compat_sys_waitid
- .quad quiet_ni_syscall /* 285: sys_altroot */
- .quad sys_add_key
- .quad sys_request_key
- .quad sys_keyctl
- .quad sys_ioprio_set
- .quad sys_ioprio_get /* 290 */
- .quad sys_inotify_init
- .quad sys_inotify_add_watch
- .quad sys_inotify_rm_watch
- .quad sys_migrate_pages
- .quad compat_sys_openat /* 295 */
- .quad sys_mkdirat
- .quad sys_mknodat
- .quad sys_fchownat
- .quad compat_sys_futimesat
- .quad sys32_fstatat /* 300 */
- .quad sys_unlinkat
- .quad sys_renameat
- .quad sys_linkat
- .quad sys_symlinkat
- .quad sys_readlinkat /* 305 */
- .quad sys_fchmodat
- .quad sys_faccessat
- .quad compat_sys_pselect6
- .quad compat_sys_ppoll
- .quad sys_unshare /* 310 */
- .quad compat_sys_set_robust_list
- .quad compat_sys_get_robust_list
- .quad sys_splice
- .quad sys32_sync_file_range
- .quad sys_tee /* 315 */
- .quad compat_sys_vmsplice
- .quad compat_sys_move_pages
- .quad sys_getcpu
- .quad sys_epoll_pwait
- .quad compat_sys_utimensat /* 320 */
- .quad compat_sys_signalfd
- .quad sys_timerfd_create
- .quad sys_eventfd
- .quad sys32_fallocate
- .quad compat_sys_timerfd_settime /* 325 */
- .quad compat_sys_timerfd_gettime
- .quad compat_sys_signalfd4
- .quad sys_eventfd2
- .quad sys_epoll_create1
- .quad sys_dup3 /* 330 */
- .quad sys_pipe2
- .quad sys_inotify_init1
- .quad compat_sys_preadv
- .quad compat_sys_pwritev
- .quad compat_sys_rt_tgsigqueueinfo /* 335 */
- .quad sys_perf_event_open
- .quad compat_sys_recvmmsg
- .quad sys_fanotify_init
- .quad sys32_fanotify_mark
- .quad sys_prlimit64 /* 340 */
- .quad sys_name_to_handle_at
- .quad compat_sys_open_by_handle_at
- .quad compat_sys_clock_adjtime
- .quad sys_syncfs
- .quad compat_sys_sendmmsg /* 345 */
- .quad sys_setns
- .quad compat_sys_process_vm_readv
- .quad compat_sys_process_vm_writev
-ia32_syscall_end:
--- 12.2.orig/arch/x86/include/asm/debugreg.h 2012-06-20 12:12:04.000000000 +0200
+++ 12.2/arch/x86/include/asm/debugreg.h 2012-04-10 17:25:00.000000000 +0200
@@ -168,7 +168,7 @@ extern void aout_dump_debugregs(struct u
extern void hw_breakpoint_restore(void);
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_IDT)
DECLARE_PER_CPU(int, debug_stack_usage);
static inline void debug_stack_usage_inc(void)
{
--- 12.2.orig/arch/x86/include/mach-xen/asm/desc.h 2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/desc.h 2012-02-09 12:32:50.000000000 +0100
@@ -36,6 +36,8 @@ static inline void fill_ldt(struct desc_
#ifndef CONFIG_X86_NO_IDT
extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
+extern struct desc_ptr nmi_idt_descr;
+extern gate_desc nmi_idt_table[];
#endif
struct gdt_page {
@@ -332,6 +334,16 @@ static inline void set_desc_limit(struct
}
#ifndef CONFIG_X86_NO_IDT
+#ifdef CONFIG_X86_64
+static inline void set_nmi_gate(int gate, void *addr)
+{
+ gate_desc s;
+
+ pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
+ write_idt_entry(nmi_idt_table, gate, &s);
+}
+#endif
+
static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
{
--- 12.2.orig/arch/x86/include/mach-xen/asm/fixmap.h 2011-09-08 16:54:08.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/fixmap.h 2012-02-09 12:32:50.000000000 +0100
@@ -124,7 +124,7 @@ enum fixed_addresses {
#endif
FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
-#ifdef CONFIG_X86_MRST
+#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
__end_of_permanent_fixed_addresses,
--- 12.2.orig/arch/x86/include/mach-xen/asm/pci.h 2011-07-01 15:19:34.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/pci.h 2012-02-09 12:32:50.000000000 +0100
@@ -118,19 +118,28 @@ static inline void x86_teardown_msi_irq(
{
x86_msi.teardown_msi_irq(irq);
}
+static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+ x86_msi.restore_msi_irqs(dev, irq);
+}
#define arch_setup_msi_irqs x86_setup_msi_irqs
#define arch_teardown_msi_irqs x86_teardown_msi_irqs
#define arch_teardown_msi_irq x86_teardown_msi_irq
+#define arch_restore_msi_irqs x86_restore_msi_irqs
/* implemented in arch/x86/kernel/apic/io_apic. */
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
+void native_restore_msi_irqs(struct pci_dev *dev, int irq);
/* default to the implementation in drivers/lib/msi.c */
#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+#define HAVE_DEFAULT_MSI_RESTORE_IRQS
void default_teardown_msi_irqs(struct pci_dev *dev);
+void default_restore_msi_irqs(struct pci_dev *dev, int irq);
#else
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#define default_teardown_msi_irqs NULL
+#define default_restore_msi_irqs NULL
#endif
#define PCI_DMA_BUS_IS_PHYS 0
--- 12.2.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-03-23 10:10:00.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/pgtable.h 2012-02-09 12:32:50.000000000 +0100
@@ -738,7 +738,7 @@ static inline void ptep_set_wrprotect(st
set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
-#define flush_tlb_fix_spurious_fault(vma, address)
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
--- 12.2.orig/arch/x86/include/mach-xen/asm/processor.h 2011-11-17 16:53:30.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/processor.h 2012-02-29 10:59:05.000000000 +0100
@@ -109,7 +109,7 @@ struct cpuinfo_x86 {
u16 initial_apicid;
#endif
u16 x86_clflush_size;
-#ifdef CONFIG_X86_HT
+#ifndef CONFIG_XEN
/* number of cores as seen by the OS: */
u16 booted_cores;
/* Physical processor id: */
@@ -119,10 +119,8 @@ struct cpuinfo_x86 {
/* Compute unit id */
u8 compute_unit_id;
#endif
-#ifdef CONFIG_SMP
/* Index into per_cpu list: */
u16 cpu_index;
-#endif
#ifndef CONFIG_XEN
u32 microcode;
#endif
@@ -394,6 +392,8 @@ union thread_xstate {
};
struct fpu {
+ unsigned int last_cpu;
+ unsigned int has_fpu;
union thread_xstate *state;
};
--- 12.2.orig/arch/x86/include/mach-xen/asm/smp.h 2011-04-13 17:01:31.000000000 +0200
+++ 12.2/arch/x86/include/mach-xen/asm/smp.h 2012-02-09 12:32:50.000000000 +0100
@@ -231,5 +231,11 @@ extern int hard_smp_processor_id(void);
#endif /* CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_DEBUG_NMI_SELFTEST
+extern void nmi_selftest(void);
+#else
+#define nmi_selftest() do { } while (0)
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */
--- 12.2.orig/arch/x86/include/mach-xen/asm/spinlock.h 2012-02-01 09:13:39.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/spinlock.h 2012-02-09 12:49:39.000000000 +0100
@@ -137,19 +137,8 @@ static __always_inline void __ticket_spi
{
register struct __raw_tickets new;
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
-# define UNLOCK_SUFFIX(n) "%z" #n
-#elif TICKET_SHIFT == 8
-# define UNLOCK_SUFFIX(n) "b"
-#elif TICKET_SHIFT == 16
-# define UNLOCK_SUFFIX(n) "w"
-#endif
- asm volatile(UNLOCK_LOCK_PREFIX "inc" UNLOCK_SUFFIX(0) " %0"
- : "+m" (lock->tickets.head)
- :
- : "memory", "cc");
+ __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
#if !defined(XEN_SPINLOCK_SOURCE) || !CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
-# undef UNLOCK_SUFFIX
# undef UNLOCK_LOCK_PREFIX
#endif
new = ACCESS_ONCE(lock->tickets);
--- 12.2.orig/arch/x86/include/mach-xen/asm/xor_64.h 2011-02-01 14:39:24.000000000 +0100
+++ 12.2/arch/x86/include/mach-xen/asm/xor_64.h 2012-02-21 13:49:31.000000000 +0100
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_XOR_64_H
#define _ASM_X86_XOR_64_H
+#include <asm/i387.h>
+
/*
* x86-64 changes / gcc fixes from Andi Kleen.
* Copyright 2002 Andi Kleen, SuSE Labs.
@@ -18,7 +20,7 @@ typedef struct {
#define XMMS_SAVE \
do { \
preempt_disable(); \
- if (!(current_thread_info()->status & TS_USEDFPU)) \
+ if (!__thread_has_fpu(current)) \
clts(); \
asm volatile( \
"movups %%xmm0,(%1) ;\n\t" \
@@ -41,7 +43,7 @@ do { \
: \
: "r" (cr0), "r" (xmm_save) \
: "memory"); \
- if (!(current_thread_info()->status & TS_USEDFPU)) \
+ if (!__thread_has_fpu(current)) \
stts(); \
preempt_enable(); \
} while (0)
--- 12.2.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-11-28 10:08:44.000000000 +0100
+++ 12.2/arch/x86/kernel/apic/io_apic-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -2502,8 +2502,8 @@ asmlinkage void smp_irq_move_cleanup_int
unsigned vector, me;
ack_APIC_irq();
- exit_idle();
irq_enter();
+ exit_idle();
me = smp_processor_id();
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -3035,6 +3035,10 @@ static inline void __init check_timer(vo
}
local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+ if (x2apic_preenabled)
+ apic_printk(APIC_QUIET, KERN_INFO
+ "Perhaps problem with the pre-enabled x2apic mode\n"
+ "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out:
--- 12.2.orig/arch/x86/kernel/cpu/common-xen.c 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/common-xen.c 2012-04-20 15:15:12.000000000 +0200
@@ -711,9 +711,7 @@ static void __init early_identify_cpu(st
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
-#ifdef CONFIG_SMP
c->cpu_index = 0;
-#endif
filter_cpuid_features(c, false);
setup_smep(c);
@@ -800,10 +798,7 @@ static void __cpuinit generic_identify(s
c->apicid = c->initial_apicid;
# endif
#endif
-
-#ifdef CONFIG_X86_HT
c->phys_proc_id = c->initial_apicid;
-#endif
}
#endif
@@ -1072,6 +1067,8 @@ __setup("clearcpuid=", setup_disablecpui
#ifdef CONFIG_X86_64
#ifndef CONFIG_X86_NO_IDT
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) nmi_idt_table };
#endif
DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1102,6 +1099,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
+
#ifndef CONFIG_X86_NO_TSS
/*
* Special IST stacks which the CPU switches to when it calls
@@ -1163,10 +1163,34 @@ unsigned long kernel_eflags;
DEFINE_PER_CPU(struct orig_ist, orig_ist);
#endif
+#ifndef CONFIG_X86_NO_IDT
+static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
+DEFINE_PER_CPU(int, debug_stack_usage);
+
+int is_debug_stack(unsigned long addr)
+{
+ return __get_cpu_var(debug_stack_usage) ||
+ (addr <= __get_cpu_var(debug_stack_addr) &&
+ addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
+}
+
+void debug_stack_set_zero(void)
+{
+ load_idt((const struct desc_ptr *)&nmi_idt_descr);
+}
+
+void debug_stack_reset(void)
+{
+ load_idt((const struct desc_ptr *)&idt_descr);
+}
+#endif
+
#else /* CONFIG_X86_64 */
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
@@ -1213,6 +1237,17 @@ static void dbg_restore_debug_regs(void)
#define dbg_restore_debug_regs()
#endif /* ! CONFIG_KGDB */
+#ifndef CONFIG_XEN
+/*
+ * Prints an error where the NUMA and configured core-number mismatch and the
+ * platform didn't override this to fix it up
+ */
+void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+ pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
+}
+#endif
+
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
@@ -1293,6 +1328,10 @@ void __cpuinit cpu_init(void)
estacks += exception_stack_sizes[v];
oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
+#ifndef CONFIG_X86_NO_IDT
+ if (v == DEBUG_STACK-1)
+ per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
+#endif
}
}
--- 12.2.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c 2012-02-08 12:15:59.000000000 +0100
+++ 12.2/arch/x86/kernel/cpu/mcheck/mce-inject.c 2012-02-16 17:51:33.000000000 +0100
@@ -93,6 +93,7 @@ static int mce_raise_notify(unsigned int
return NMI_HANDLED;
}
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
static void mce_irq_ipi(void *info)
{
int cpu = smp_processor_id();
@@ -104,6 +105,7 @@ static void mce_irq_ipi(void *info)
raise_exception(m, NULL);
}
}
+#endif
/* Inject mce on current CPU */
static int raise_local(void)
--- 12.2.orig/arch/x86/kernel/e820-xen.c 2011-12-21 12:00:26.000000000 +0100
+++ 12.2/arch/x86/kernel/e820-xen.c 2012-02-16 17:12:00.000000000 +0100
@@ -19,6 +19,7 @@
#include <linux/acpi.h>
#include <linux/firmware-map.h>
#include <linux/memblock.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/proto.h>
@@ -250,22 +251,38 @@ static void __init _e820_print_map(const
* ____________________33__
* ______________________4_
*/
+struct change_member {
+ struct e820entry *pbios; /* pointer to original bios entry */
+ unsigned long long addr; /* address for this change point */
+};
+
+static int __init cpcompare(const void *a, const void *b)
+{
+ struct change_member * const *app = a, * const *bpp = b;
+ const struct change_member *ap = *app, *bp = *bpp;
+
+ /*
+ * Inputs are pointers to two elements of change_point[]. If their
+ * addresses are unequal, their difference dominates. If the addresses
+ * are equal, then consider one that represents the end of its region
+ * to be greater than one that does not.
+ */
+ if (ap->addr != bp->addr)
+ return ap->addr > bp->addr ? 1 : -1;
+
+ return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr);
+}
int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
u32 *pnr_map)
{
- struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
- };
static struct change_member change_point_list[2*E820_X_MAX] __initdata;
static struct change_member *change_point[2*E820_X_MAX] __initdata;
static struct e820entry *overlap_list[E820_X_MAX] __initdata;
static struct e820entry new_bios[E820_X_MAX] __initdata;
- struct change_member *change_tmp;
unsigned long current_type, last_type;
unsigned long long last_addr;
- int chgidx, still_changing;
+ int chgidx;
int overlap_entries;
int new_bios_entry;
int old_nr, new_nr, chg_nr;
@@ -306,35 +323,7 @@ int __init sanitize_e820_map(struct e820
chg_nr = chgidx;
/* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i = 1; i < chg_nr; i++) {
- unsigned long long curaddr, lastaddr;
- unsigned long long curpbaddr, lastpbaddr;
-
- curaddr = change_point[i]->addr;
- lastaddr = change_point[i - 1]->addr;
- curpbaddr = change_point[i]->pbios->addr;
- lastpbaddr = change_point[i - 1]->pbios->addr;
-
- /*
- * swap entries, when:
- *
- * curaddr > lastaddr or
- * curaddr == lastaddr and curaddr == curpbaddr and
- * lastaddr != lastpbaddr
- */
- if (curaddr < lastaddr ||
- (curaddr == lastaddr && curaddr == curpbaddr &&
- lastaddr != lastpbaddr)) {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing = 1;
- }
- }
- }
+ sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL);
/* create a new bios memory map, removing overlaps */
overlap_entries = 0; /* number of entries in the overlap table */
@@ -769,7 +758,7 @@ void __init e820_mark_nosave_regions(uns
}
#endif
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_ACPI
/**
* Mark ACPI NVS memory region, so that we can save/restore it during
* hibernation and the subsequent resume.
@@ -782,7 +771,7 @@ static int __init e820_mark_nvs_memory(v
struct e820entry *ei = &e820.map[i];
if (ei->type == E820_NVS)
- suspend_nvs_register(ei->addr, ei->size);
+ acpi_nvs_register(ei->addr, ei->size);
}
return 0;
@@ -795,47 +784,29 @@ core_initcall(e820_mark_nvs_memory);
/*
* pre allocated 4k and reserved it in memblock and e820_saved
*/
-u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
+u64 __init early_reserve_e820(u64 size, u64 align)
{
- u64 size = 0;
u64 addr;
- u64 start;
#ifdef CONFIG_XEN
- unsigned int order = get_order(sizet);
+ unsigned int order = get_order(size);
int rc;
unsigned long max_initmap_pfn;
if (!is_initial_xendomain())
return 0;
- sizet = PAGE_SIZE << order;
+ size = PAGE_SIZE << order;
if (align < PAGE_SIZE)
align = PAGE_SIZE;
#endif
- for (start = startt; ; start += size) {
- start = memblock_x86_find_in_range_size(start, &size, align);
- if (start == MEMBLOCK_ERROR)
- return 0;
- if (size >= sizet)
- break;
+ addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+ if (addr) {
+ e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
+ printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
+ update_e820_saved();
}
-
-#ifdef CONFIG_X86_32
- if (start >= MAXMEM)
- return 0;
- if (start + size > MAXMEM)
- size = MAXMEM - start;
-#endif
#ifdef CONFIG_XEN
- if ((start >> PAGE_SHIFT) >= xen_start_info->nr_pages)
- return 0;
- if (PFN_UP(start + size) > xen_start_info->nr_pages)
- size = ((u64)xen_start_info->nr_pages << PAGE_SHIFT) - start;
-#endif
-
- addr = round_down(start + size - sizet, align);
- if (addr < start)
+ else
return 0;
-#ifdef CONFIG_XEN
max_initmap_pfn = ALIGN(PFN_UP(__pa(xen_start_info->pt_base))
+ xen_start_info->nr_pt_frames
+ 1 + (1 << (19 - PAGE_SHIFT)),
@@ -859,10 +830,6 @@ u64 __init early_reserve_e820(u64 startt
if (rc)
return 0;
#endif
- memblock_x86_reserve_range(addr, addr + sizet, "new next");
- e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
- printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
- update_e820_saved();
return addr;
}
@@ -1223,7 +1190,7 @@ void __init memblock_x86_fill(void)
* We are safe to enable resizing, beause memblock_x86_fill()
* is rather later for x86
*/
- memblock_can_resize = 1;
+ memblock_allow_resize();
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
@@ -1238,22 +1205,42 @@ void __init memblock_x86_fill(void)
memblock_add(ei->addr, ei->size);
}
- memblock_analyze();
+#ifdef CONFIG_XEN
+ if (max_pfn > xen_start_info->nr_pages)
+ memblock_reserve(PFN_PHYS(xen_start_info->nr_pages),
+ PFN_PHYS(max_pfn - xen_start_info->nr_pages));
+#endif
+
memblock_dump_all();
}
void __init memblock_find_dma_reserve(void)
{
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
- u64 free_size_pfn;
- u64 mem_size_pfn;
+ u64 nr_pages = 0, nr_free_pages = 0;
+ unsigned long start_pfn, end_pfn;
+ phys_addr_t start, end;
+ int i;
+ u64 u;
+
/*
* need to find out used area below MAX_DMA_PFN
* need to use memblock to get free size in [0, MAX_DMA_PFN]
* at first, and assume boot_mem will not take below MAX_DMA_PFN
*/
- mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
- free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
- set_dma_reserve(mem_size_pfn - free_size_pfn);
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+ start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
+ end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
+ nr_pages += end_pfn - start_pfn;
+ }
+
+ for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
+ start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
+ end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
+ if (start_pfn < end_pfn)
+ nr_free_pages += end_pfn - start_pfn;
+ }
+
+ set_dma_reserve(nr_pages - nr_free_pages);
#endif
}
--- 12.2.orig/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:41:35.000000000 +0100
+++ 12.2/arch/x86/kernel/early_printk-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -272,14 +272,14 @@ static int __init setup_early_printk(cha
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
-#ifdef CONFIG_EARLY_PRINTK_MRST
+#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
if (!strncmp(buf, "mrst", 4)) {
mrst_early_console_init();
early_console_register(&early_mrst_console, keep);
}
if (!strncmp(buf, "hsu", 3)) {
- hsu_early_console_init();
+ hsu_early_console_init(buf + 3);
early_console_register(&early_hsu_console, keep);
}
#endif
--- 12.2.orig/arch/x86/kernel/entry_32-xen.S 2012-02-29 14:20:19.000000000 +0100
+++ 12.2/arch/x86/kernel/entry_32-xen.S 2012-02-29 14:23:02.000000000 +0100
@@ -42,6 +42,7 @@
*/
#include <linux/linkage.h>
+#include <linux/err.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
#include <asm/errno.h>
@@ -82,8 +83,6 @@
* enough to patch inline, increasing performance.
*/
-#define nr_syscalls ((syscall_table_size)/4)
-
/* Pseudo-eflags. */
NMI_MASK = 0x80000000
@@ -427,7 +426,7 @@ sysenter_past_esp:
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz sysenter_audit
sysenter_do_call:
- cmpl $(nr_syscalls), %eax
+ cmpl $(NR_syscalls), %eax
jae syscall_badsys
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)
@@ -459,7 +458,7 @@ sysenter_audit:
movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
movl %eax,%edx /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
pushl_cfi %ebx
movl PT_EAX(%esp),%eax /* reload syscall number */
jmp sysenter_do_call
@@ -470,11 +469,10 @@ sysexit_audit:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
movl %eax,%edx /* second arg, syscall return value */
- cmpl $0,%eax /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpl $-MAX_ERRNO,%eax /* is it an error ? */
+ setbe %al /* 1 if so, 0 if not */
movzbl %al,%eax /* zero-extend that */
- inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
+ call __audit_syscall_exit
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
@@ -533,7 +531,7 @@ ENTRY(system_call)
# system call tracing in operation / emulation
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry
- cmpl $(nr_syscalls), %eax
+ cmpl $(NR_syscalls), %eax
jae syscall_badsys
syscall_call:
call *sys_call_table(,%eax,4)
@@ -694,6 +692,8 @@ work_notifysig: # deal with pending s
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace_sig
@@ -707,6 +707,8 @@ work_notifysig_v86:
#else
movl %esp, %eax
#endif
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace_sig
@@ -719,7 +721,7 @@ syscall_trace_entry:
movl %esp, %eax
call syscall_trace_enter
/* What it returned is what we'll actually use. */
- cmpl $(nr_syscalls), %eax
+ cmpl $(NR_syscalls), %eax
jnae syscall_call
jmp syscall_exit
END(syscall_trace_entry)
@@ -759,29 +761,28 @@ END(syscall_badsys)
* System calls that need a pt_regs pointer.
*/
#define PTREGSCALL0(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
leal 4(%esp),%eax; \
- jmp sys_##name;
+ jmp sys_##name; \
+ENDPROC(ptregs_##name)
#define PTREGSCALL1(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
leal 4(%esp),%edx; \
movl (PT_EBX+4)(%esp),%eax; \
- jmp sys_##name;
+ jmp sys_##name; \
+ENDPROC(ptregs_##name)
#define PTREGSCALL2(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
leal 4(%esp),%ecx; \
movl (PT_ECX+4)(%esp),%edx; \
movl (PT_EBX+4)(%esp),%eax; \
- jmp sys_##name;
+ jmp sys_##name; \
+ENDPROC(ptregs_##name)
#define PTREGSCALL3(name) \
- ALIGN; \
-ptregs_##name: \
+ENTRY(ptregs_##name) ; \
CFI_STARTPROC; \
leal 4(%esp),%eax; \
pushl_cfi %eax; \
@@ -806,8 +807,7 @@ PTREGSCALL2(vm86)
PTREGSCALL1(vm86old)
/* Clone is an oddball. The 4th arg is in %edi */
- ALIGN;
-ptregs_clone:
+ENTRY(ptregs_clone)
CFI_STARTPROC
leal 4(%esp),%eax
pushl_cfi %eax
@@ -1365,7 +1365,7 @@ ENTRY(ia32pv_cstar_target)
GET_THREAD_INFO(%ebp)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz cstar_trace_entry
- cmpl $nr_syscalls,%eax
+ cmpl $NR_syscalls,%eax
jae cstar_badsys
.Lcstar_call:
btl %eax,cstar_special
@@ -1380,7 +1380,7 @@ ENTRY(ia32pv_cstar_target)
movl PT_ECX(%esp),%ecx
movl %ecx,PT_EBP(%esp) # put user EBP back in place
jmp syscall_call
-cstar_set_tif:
+GLOBAL(cstar_set_tif)
movl $cstar_clear_tif,(%esp) # replace return address
LOCK_PREFIX
orl $_TIF_CSTAR,TI_flags(%ebp)
@@ -1392,7 +1392,7 @@ cstar_clear_tif:
jmp .Lcstar_exit
cstar_trace_entry:
movl $-ENOSYS,PT_EAX(%esp)
- cmpl $nr_syscalls,%eax
+ cmpl $NR_syscalls,%eax
jae 1f
btl %eax,cstar_special
jc .Lcstar_trace_special
@@ -1403,7 +1403,7 @@ cstar_trace_entry:
LOCK_PREFIX
andl $~_TIF_CSTAR,TI_flags(%ebp)
/* What it returned is what we'll actually use. */
- cmpl $nr_syscalls,%eax
+ cmpl $NR_syscalls,%eax
jb .Lcstar_call
jmp .Lcstar_exit
.Lcstar_trace_special:
@@ -1412,7 +1412,7 @@ cstar_trace_entry:
movl %ecx,PT_EBP(%esp) # put user EBP back in place
call syscall_trace_enter
/* What it returned is what we'll actually use. */
- cmpl $nr_syscalls,%eax
+ cmpl $NR_syscalls,%eax
jb syscall_call
jmp syscall_exit
cstar_badsys:
@@ -1440,19 +1440,14 @@ ENTRY(cstar_ret_from_fork)
jmp ret_from_fork
CFI_ENDPROC
END(cstar_ret_from_fork)
-#endif /* TIF_CSTAR */
-
-.section .rodata,"a"
-#include "syscall_table_32.S"
-syscall_table_size=(.-sys_call_table)
-
-#ifdef TIF_CSTAR
#include <asm/unistd.h>
+.pushsection .rodata,"a"
+.balign 4
cstar_special:
nr=0
mask=0
-.rept nr_syscalls+31
+.rept NR_syscalls+31
.irp n, __NR_sigreturn, __NR_rt_sigreturn
.if nr == \n
mask = mask | (1 << (\n & 31))
@@ -1464,15 +1459,7 @@ mask=0
mask = 0
.endif
.endr
-#define sys_call_table cstar_call_table
-#define ptregs_fork cstar_set_tif
-#define ptregs_clone cstar_set_tif
-#define ptregs_vfork cstar_set_tif
-#include "syscall_table_32.S"
-#undef sys_call_table
-#undef ptregs_fork
-#undef ptregs_clone
-#undef ptregs_vfork
+.popsection
#endif /* TIF_CSTAR */
/*
--- 12.2.orig/arch/x86/kernel/entry_64-xen.S 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/arch/x86/kernel/entry_64-xen.S 2012-02-10 08:50:41.000000000 +0100
@@ -58,6 +58,7 @@
#include <asm/processor-flags.h>
#include <asm/ftrace.h>
#include <asm/percpu.h>
+#include <linux/err.h>
#include <xen/interface/xen.h>
#include <xen/interface/features.h>
@@ -212,7 +213,7 @@ NMI_MASK = 0x80000000
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
+ pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
@@ -448,8 +449,11 @@ ENTRY(ret_from_fork)
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- je int_ret_from_sys_call
-
+ jnz 1f
+ /* Need to set the proper %ss (not NULL) for ring 3 iretq */
+ movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
+ jmp retint_restore_args
+1:
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -494,8 +498,7 @@ ENTRY(system_call)
INTR_FRAME start=2 offset=2*8
SAVE_ARGS -8,0
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- GET_THREAD_INFO(%rcx)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz tracesys
system_call_fastpath:
cmpq $__NR_syscall_max,%rax
@@ -512,10 +515,9 @@ ret_from_sys_call:
/* edi: flagmask */
sysret_check:
LOCKDEP_SYS_EXIT
- GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- movl TI_flags(%rcx),%edx
+ movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
andl %edi,%edx
jnz sysret_careful
CFI_REMEMBER_STATE
@@ -564,7 +566,7 @@ badsys:
#ifdef CONFIG_AUDITSYSCALL
/*
* Fast path for syscall audit without full syscall trace.
- * We just call audit_syscall_entry() directly, and then
+ * We just call __audit_syscall_entry() directly, and then
* jump back to the normal fast path.
*/
auditsys:
@@ -574,22 +576,21 @@ auditsys:
movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
movq %rax,%rsi /* 2nd arg: syscall number */
movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
- call audit_syscall_entry
+ call __audit_syscall_entry
LOAD_ARGS 0 /* reload call-clobbered registers */
jmp system_call_fastpath
/*
- * Return fast path for syscall audit. Call audit_syscall_exit()
+ * Return fast path for syscall audit. Call __audit_syscall_exit()
* directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
* masked off.
*/
sysret_audit:
movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
- cmpq $0,%rsi /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
+ cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
+ setbe %al /* 1 if so, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
- inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
+ call __audit_syscall_exit
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
jmp sysret_check
#endif /* CONFIG_AUDITSYSCALL */
@@ -597,7 +598,7 @@ sysret_audit:
/* Do syscall tracing */
tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz auditsys
#endif
SAVE_REST
@@ -626,12 +627,6 @@ tracesys:
GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testb $3,CS-ARGOFFSET(%rsp)
- jnz 1f
- /* Need to set the proper %ss (not NULL) for ring 3 iretq */
- movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
- jmp retint_restore_args # retrun from ring3 kernel
-1:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
GLOBAL(int_with_check)
@@ -861,13 +856,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ ALIGN
+ INTR_FRAME
+.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
.if NUM_INVALIDATE_TLB_VECTORS > \idx
-apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
- invalidate_interrupt\idx smp_invalidate_interrupt
+ENTRY(invalidate_interrupt\idx)
+ pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
+ jmp .Lcommon_invalidate_interrupt0
+ CFI_ADJUST_CFA_OFFSET -8
+END(invalidate_interrupt\idx)
.endif
.endr
+ CFI_ENDPROC
+apicinterrupt INVALIDATE_TLB_VECTOR_START, \
+ invalidate_interrupt0, smp_invalidate_interrupt
#endif
apicinterrupt THRESHOLD_APIC_VECTOR \
@@ -1301,13 +1304,28 @@ ENTRY(error_exit)
END(error_exit)
+#define extern #
+#include <asm-generic/percpu.h>
+
+.pushsection PER_CPU_BASE_SECTION, "aw", @progbits
+in_NMI: .byte 0
+.popsection
+
do_nmi_callback:
CFI_STARTPROC
addq $8, %rsp
CFI_ENDPROC
DEFAULT_FRAME
+ orb $1, PER_CPU_VAR(in_NMI)
+ js 1f
+0:
+ movb $0x80, PER_CPU_VAR(in_NMI)
call do_nmi
+ movl $0x80, %eax
+ cmpxchgb %ah, PER_CPU_VAR(in_NMI)
+ jne 0b
orl $NMI_MASK,EFLAGS(%rsp)
+1:
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
--- 12.2.orig/arch/x86/kernel/head-xen.c 2012-02-08 16:16:55.000000000 +0100
+++ 12.2/arch/x86/kernel/head-xen.c 2012-02-09 17:46:13.000000000 +0100
@@ -54,7 +54,7 @@ void __init reserve_ebda_region(void)
lowmem = 0x9f000;
/* reserve all memory between lowmem and the 1MB mark */
- memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
+ memblock_reserve(lowmem, 0x100000 - lowmem);
}
#else /* CONFIG_XEN */
#include <linux/export.h>
@@ -106,12 +106,9 @@ void __init xen_start_kernel(void)
WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_writable_pagetables));
- memblock_init();
- memblock_x86_reserve_range(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
- __pa(xen_start_info->pt_base)
- + (xen_start_info->nr_pt_frames
- << PAGE_SHIFT),
- "Xen provided");
+ memblock_reserve(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
+ __pa(xen_start_info->pt_base)
+ + PFN_PHYS(xen_start_info->nr_pt_frames));
#ifdef CONFIG_X86_32
{
--- 12.2.orig/arch/x86/kernel/head32-xen.c 2011-07-01 15:19:34.000000000 +0200
+++ 12.2/arch/x86/kernel/head32-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -47,9 +47,8 @@ void __init i386_start_kernel(void)
BUG_ON(pte_index(hypervisor_virt_start));
#endif
- memblock_init();
-
- memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ memblock_reserve(__pa_symbol(&_text),
+ __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
#ifndef CONFIG_XEN
#ifdef CONFIG_BLK_DEV_INITRD
@@ -59,7 +58,7 @@ void __init i386_start_kernel(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
+ memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
}
#endif
--- 12.2.orig/arch/x86/kernel/head64-xen.c 2011-04-12 15:59:10.000000000 +0200
+++ 12.2/arch/x86/kernel/head64-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -117,9 +117,8 @@ void __init x86_64_start_reservations(ch
{
copy_bootdata(__va(real_mode_data));
- memblock_init();
-
- memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ memblock_reserve(__pa_symbol(&_text),
+ __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
/*
* At this point everything still needed from the boot loader
--- 12.2.orig/arch/x86/kernel/irq-xen.c 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/arch/x86/kernel/irq-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -78,6 +78,12 @@ int arch_show_interrupts(struct seq_file
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
seq_printf(p, " IRQ work interrupts\n");
+#ifndef CONFIG_XEN
+ seq_printf(p, "%*s: ", prec, "RTR");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
+ seq_printf(p, " APIC ICR read retries\n");
+#endif
#endif
#ifndef CONFIG_XEN
if (x86_platform_ipi_callback) {
@@ -149,6 +155,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_irq_work_irqs;
+ sum += irq_stats(cpu)->icr_read_retry_count;
#endif
#ifndef CONFIG_XEN
if (x86_platform_ipi_callback)
@@ -201,8 +208,8 @@ unsigned int __irq_entry do_IRQ(struct p
unsigned vector = ~regs->orig_ax;
unsigned irq;
- exit_idle();
irq_enter();
+ exit_idle();
irq = __this_cpu_read(vector_irq[vector]);
@@ -229,10 +236,10 @@ void smp_x86_platform_ipi(struct pt_regs
ack_APIC_irq();
- exit_idle();
-
irq_enter();
+ exit_idle();
+
inc_irq_stat(x86_platform_ipis);
if (x86_platform_ipi_callback)
--- 12.2.orig/arch/x86/kernel/irq_64.c 2012-06-20 12:12:04.000000000 +0200
+++ 12.2/arch/x86/kernel/irq_64.c 2012-05-11 10:57:43.000000000 +0200
@@ -39,7 +39,9 @@ static inline void stack_overflow_check(
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
#define STACK_TOP_MARGIN 128
+#ifndef CONFIG_X86_NO_TSS
struct orig_ist *oist;
+#endif
u64 irq_stack_top, irq_stack_bottom;
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
@@ -58,11 +60,15 @@ static inline void stack_overflow_check(
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
return;
+#ifndef CONFIG_X86_NO_TSS
oist = &__get_cpu_var(orig_ist);
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
+#else
+ estack_top = estack_bottom = 0;
+#endif
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
current->comm, curbase, regs->sp,
--- 12.2.orig/arch/x86/kernel/microcode_core-xen.c 2011-12-01 15:28:13.000000000 +0100
+++ 12.2/arch/x86/kernel/microcode_core-xen.c 2012-02-09 14:22:00.000000000 +0100
@@ -186,16 +186,21 @@ static int request_microcode(const char
static int __init microcode_init(void)
{
const struct cpuinfo_x86 *c = &boot_cpu_data;
- char buf[32];
+ char buf[36];
const char *fw_name = buf;
int error;
if (c->x86_vendor == X86_VENDOR_INTEL)
snprintf(buf, sizeof(buf), "intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
- else if (c->x86_vendor == X86_VENDOR_AMD)
- fw_name = "amd-ucode/microcode_amd.bin";
- else {
+ else if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (c->x86 >= 0x15)
+ snprintf(buf, sizeof(buf),
+ "amd-ucode/microcode_amd_fam%xh.bin",
+ c->x86);
+ else
+ fw_name = "amd-ucode/microcode_amd.bin";
+ } else {
pr_err("no support for this CPU vendor\n");
return -ENODEV;
}
--- 12.2.orig/arch/x86/kernel/mpparse-xen.c 2011-12-21 11:56:23.000000000 +0100
+++ 12.2/arch/x86/kernel/mpparse-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -591,9 +591,7 @@ void __init default_get_smp_config(unsig
#ifndef CONFIG_XEN
static void __init smp_reserve_memory(struct mpf_intel *mpf)
{
- unsigned long size = get_mpc_size(mpf->physptr);
-
- memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
+ memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
}
#endif
@@ -626,7 +624,7 @@ static int __init smp_scan_config(unsign
mpf, (u64)virt_to_phys(mpf));
mem = virt_to_phys(mpf);
- memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
+ memblock_reserve(mem, sizeof(*mpf));
if (mpf->physptr)
smp_reserve_memory(mpf);
#else
@@ -874,10 +872,8 @@ early_param("alloc_mptable", parse_alloc
void __init early_reserve_e820_mpc_new(void)
{
- if (enable_update_mptable && alloc_mptable) {
- u64 startt = 0;
- mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
- }
+ if (enable_update_mptable && alloc_mptable)
+ mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
}
static int __init update_mp_table(void)
--- 12.2.orig/arch/x86/kernel/pci-dma-xen.c 2012-04-04 14:32:31.000000000 +0200
+++ 12.2/arch/x86/kernel/pci-dma-xen.c 2012-04-04 14:32:53.000000000 +0200
@@ -42,6 +42,15 @@ int iommu_detected __read_mostly = 0;
* guests and not for driver dma translation.
*/
int iommu_pass_through __read_mostly;
+
+/*
+ * Group multi-function PCI devices into a single device-group for the
+ * iommu_device_group interface. This tells the iommu driver to pretend
+ * it cannot distinguish between functions of a device, exposing only one
+ * group for the device. Useful for disallowing use of individual PCI
+ * functions from userspace drivers.
+ */
+int iommu_group_mf __read_mostly;
#endif
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
@@ -233,6 +242,8 @@ static __init int iommu_setup(char *p)
#ifndef CONFIG_XEN
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
+ if (!strncmp(p, "group_mf", 8))
+ iommu_group_mf = 1;
gart_parse_options(p);
#endif
--- 12.2.orig/arch/x86/kernel/process-xen.c 2011-12-21 11:59:08.000000000 +0100
+++ 12.2/arch/x86/kernel/process-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -280,7 +280,7 @@ int kernel_thread(int (*fn)(void *), voi
regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | 0x2;
+ regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
/* Ok, create the new process.. */
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
--- 12.2.orig/arch/x86/kernel/process_32-xen.c 2012-02-29 14:20:36.000000000 +0100
+++ 12.2/arch/x86/kernel/process_32-xen.c 2012-02-29 14:23:23.000000000 +0100
@@ -102,7 +102,8 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_stop_sched_tick(1);
+ tick_nohz_idle_enter();
+ rcu_idle_enter();
while (!need_resched()) {
check_pgt_cache();
@@ -119,7 +120,8 @@ void cpu_idle(void)
xen_idle();
start_critical_timings();
}
- tick_nohz_restart_sched_tick();
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
preempt_enable_no_resched();
schedule();
preempt_disable();
@@ -215,6 +217,7 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
tsk = current;
err = -ENOMEM;
@@ -307,7 +310,7 @@ __switch_to(struct task_struct *prev_p,
#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, cpu);
#endif
- bool preload_fpu;
+ fpu_switch_t fpu;
#if CONFIG_XEN_COMPAT > 0x030002
struct physdev_set_iopl iopl_op;
struct physdev_set_iobitmap iobmp_op;
@@ -320,29 +323,7 @@ __switch_to(struct task_struct *prev_p,
/* XEN NOTE: FS/GS saved in switch_mm(), not here. */
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- /*
- * This is basically '__unlazy_fpu', except that we queue a
- * multicall to indicate FPU task switch, rather than
- * synchronously trapping to Xen.
- */
- if (task_thread_info(prev_p)->status & TS_USEDFPU) {
- __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
- if (!preload_fpu) {
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = 1;
- mcl++;
- }
- }
-#if 0 /* lazy fpu sanity check */
- else BUG_ON(!(read_cr0() & 8));
-#endif
+ fpu = xen_switch_fpu_prepare(prev_p, next_p, cpu, &mcl);
/*
* Reload sp0.
@@ -384,14 +365,6 @@ __switch_to(struct task_struct *prev_p,
mcl++;
}
- /* If we're going to preload the fpu context, make sure clts
- is run while we're batching the cpu state updates. */
- if (preload_fpu) {
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = 0;
- mcl++;
- }
-
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
set_xen_guest_handle(iobmp_op.bitmap,
(char *)next->io_bitmap_ptr);
@@ -415,10 +388,6 @@ __switch_to(struct task_struct *prev_p,
if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
BUG();
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
-
/*
* Now maybe handle debug registers
*/
@@ -435,15 +404,14 @@ __switch_to(struct task_struct *prev_p,
*/
arch_end_context_switch(next_p);
- if (preload_fpu)
- __math_state_restore();
-
/*
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
+ switch_fpu_finish(next_p, fpu);
+
percpu_write(current_task, next_p);
return prev_p;
--- 12.2.orig/arch/x86/kernel/process_64-xen.c 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/arch/x86/kernel/process_64-xen.c 2012-02-29 10:59:05.000000000 +0100
@@ -126,7 +126,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_stop_sched_tick(1);
+ tick_nohz_idle_enter();
while (!need_resched()) {
rmb();
@@ -143,8 +143,14 @@ void cpu_idle(void)
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
+
+ /* enter_idle() needs rcu for notifiers */
+ rcu_idle_enter();
+
if (cpuidle_idle_call())
xen_idle();
+
+ rcu_idle_exit();
start_critical_timings();
/* In many cases the interrupt that ended idle
@@ -153,7 +159,7 @@ void cpu_idle(void)
__exit_idle();
}
- tick_nohz_restart_sched_tick();
+ tick_nohz_idle_exit();
preempt_enable_no_resched();
schedule();
preempt_disable();
@@ -289,6 +295,7 @@ int copy_thread(unsigned long clone_flag
set_tsk_thread_flag(p, TIF_FORK);
+ p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
@@ -302,13 +309,12 @@ int copy_thread(unsigned long clone_flag
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
+ IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
p->thread.io_bitmap_max = 0;
return -ENOMEM;
}
- memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES);
set_tsk_thread_flag(p, TIF_IO_BITMAP);
}
@@ -392,7 +398,7 @@ __switch_to(struct task_struct *prev_p,
#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, cpu);
#endif
- bool preload_fpu;
+ fpu_switch_t fpu;
#if CONFIG_XEN_COMPAT > 0x030002
struct physdev_set_iopl iopl_op;
struct physdev_set_iobitmap iobmp_op;
@@ -403,40 +409,7 @@ __switch_to(struct task_struct *prev_p,
#endif
multicall_entry_t _mcl[8], *mcl = _mcl;
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
-
- /*
- * This is basically '__unlazy_fpu', except that we queue a
- * multicall to indicate FPU task switch, rather than
- * synchronously trapping to Xen.
- * The AMD workaround requires it to be after DS reload, or
- * after DS has been cleared, which we do in __prepare_arch_switch.
- */
- if (task_thread_info(prev_p)->status & TS_USEDFPU) {
- __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
- if (!preload_fpu) {
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = 1;
- mcl++;
- }
- } else
- prev_p->fpu_counter = 0;
-
- /* Make sure cpu is ready for new context */
- if (preload_fpu) {
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = 0;
- mcl++;
- }
+ fpu = xen_switch_fpu_prepare(prev_p, next_p, cpu, &mcl);
/*
* Reload sp0.
@@ -539,6 +512,8 @@ __switch_to(struct task_struct *prev_p,
if (next->gs)
WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs));
+ switch_fpu_finish(next_p, fpu);
+
/*
* Switch the PDA context.
*/
@@ -555,13 +530,6 @@ __switch_to(struct task_struct *prev_p,
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p);
- /*
- * Preload the FPU context, now that we've determined that the
- * task is likely to be using it.
- */
- if (preload_fpu)
- __math_state_restore();
-
return prev_p;
}
--- 12.2.orig/arch/x86/kernel/setup-xen.c 2012-06-08 10:38:03.000000000 +0200
+++ 12.2/arch/x86/kernel/setup-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -341,7 +341,8 @@ static void __init cleanup_highmap(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
- memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
+ memblock_reserve(__pa(_brk_start),
+ __pa(_brk_end) - __pa(_brk_start));
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -367,13 +368,13 @@ static void __init relocate_initrd(void)
ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
PAGE_SIZE);
- if (ramdisk_here == MEMBLOCK_ERROR)
+ if (!ramdisk_here)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
+ memblock_reserve(ramdisk_here, area_size);
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -446,7 +447,7 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= (end_of_lowmem>>1)) {
- memblock_x86_free_range(ramdisk_image, ramdisk_end);
+ memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
@@ -472,7 +473,7 @@ static void __init reserve_initrd(void)
relocate_initrd();
- memblock_x86_free_range(ramdisk_image, ramdisk_end);
+ memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
#else
static void __init reserve_initrd(void)
@@ -551,15 +552,13 @@ static void __init memblock_x86_reserve_
#ifndef CONFIG_XEN
struct setup_data *data;
u64 pa_data;
- char buf[32];
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
- sprintf(buf, "setup data %x", data->type);
- memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
+ memblock_reserve(pa_data, sizeof(*data) + data->len);
pa_data = data->next;
early_iounmap(data, sizeof(*data));
}
@@ -617,7 +616,7 @@ static void __init reserve_crashkernel(v
crash_base = memblock_find_in_range(alignment,
CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
- if (crash_base == MEMBLOCK_ERROR) {
+ if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
@@ -631,7 +630,7 @@ static void __init reserve_crashkernel(v
return;
}
}
- memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
+ memblock_reserve(crash_base, crash_size);
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -695,7 +694,7 @@ static __init void reserve_ibft_region(v
#ifndef CONFIG_XEN
if (size)
- memblock_x86_reserve_range(addr, addr + size, "* ibft");
+ memblock_reserve(addr, size);
#endif
}
@@ -843,12 +842,7 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-#ifdef CONFIG_X86_32
- "EL32",
-#else
- "EL64",
-#endif
- 4)) {
+ EFI_LOADER_SIGNATURE, 4)) {
efi_enabled = 1;
efi_memblock_x86_reserve_range();
}
--- 12.2.orig/arch/x86/kernel/smp-xen.c 2011-07-01 15:47:44.000000000 +0200
+++ 12.2/arch/x86/kernel/smp-xen.c 2012-02-16 17:53:11.000000000 +0100
@@ -28,6 +28,7 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/ipi.h>
+#include <asm/nmi.h>
#include <xen/evtchn.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -132,6 +133,20 @@ void xen_send_call_func_ipi(const struct
xen_send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR);
}
+static atomic_t stopping_cpu = ATOMIC_INIT(-1);
+static bool __read_mostly xen_smp_disable_nmi_ipi;
+
+static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
+{
+ /* We are registered on stopping cpu too, avoid spurious NMI */
+ if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
+ return NMI_HANDLED;
+
+ stop_this_cpu(NULL);
+
+ return NMI_HANDLED;
+}
+
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
@@ -158,7 +173,27 @@ void xen_stop_other_cpus(int wait)
* currently)
*/
if (num_online_cpus() > 1) {
- xen_send_IPI_allbutself(REBOOT_VECTOR);
+ unsigned int vector = REBOOT_VECTOR;
+
+ if (!xen_smp_disable_nmi_ipi) {
+ /* did someone beat us here? */
+ if (atomic_cmpxchg(&stopping_cpu, -1,
+ safe_smp_processor_id()) != -1)
+ return;
+
+ if (register_nmi_handler(NMI_LOCAL,
+ smp_stop_nmi_callback,
+ NMI_FLAG_FIRST, "smp_stop"))
+ /* Note: we ignore failures here */
+ return;
+
+ /* sync above data before sending NMI */
+ wmb();
+
+ vector = NMI_VECTOR;
+ }
+
+ xen_send_IPI_allbutself(vector);
/*
* Don't wait longer than a second if the caller
@@ -199,3 +234,11 @@ irqreturn_t smp_call_function_single_int
return IRQ_HANDLED;
}
+
+static int __init nonmi_ipi_setup(char *str)
+{
+ xen_smp_disable_nmi_ipi = true;
+ return 1;
+}
+
+__setup("nonmi_ipi", nonmi_ipi_setup);
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ 12.2/arch/x86/kernel/syscall_32-xen.c 2012-02-29 14:36:52.000000000 +0100
@@ -0,0 +1,20 @@
+#include "syscall_32.c"
+
+#include <linux/thread_info.h>
+
+#ifdef TIF_CSTAR
+extern asmlinkage void cstar_set_tif(void);
+
+#define ptregs_fork cstar_set_tif
+#define ptregs_clone cstar_set_tif
+#define ptregs_vfork cstar_set_tif
+
+const sys_call_ptr_t cstar_call_table[__NR_syscall_max+1] = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm/syscalls_32.h>
+};
+#endif /* TIF_CSTAR */
--- 12.2.orig/arch/x86/kernel/traps-xen.c 2011-11-17 16:50:15.000000000 +0100
+++ 12.2/arch/x86/kernel/traps-xen.c 2012-06-20 12:18:34.000000000 +0200
@@ -310,19 +310,20 @@ dotraplinkage void __kprobes do_int3(str
== NOTIFY_STOP)
return;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
-#ifdef CONFIG_KPROBES
+
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
-#else
- if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
- == NOTIFY_STOP)
- return;
-#endif
+ /*
+ * Let others (NMI) know that the debug stack is in use
+ * as we may switch to the interrupt stack.
+ */
+ debug_stack_usage_inc();
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
+ debug_stack_usage_dec();
}
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
@@ -415,6 +416,12 @@ dotraplinkage void __kprobes do_debug(st
SIGTRAP) == NOTIFY_STOP)
return;
+ /*
+ * Let others (NMI) know that the debug stack is in use
+ * as we may switch to the interrupt stack.
+ */
+ debug_stack_usage_inc();
+
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
@@ -422,6 +429,7 @@ dotraplinkage void __kprobes do_debug(st
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
preempt_conditional_cli(regs);
+ debug_stack_usage_dec();
return;
}
@@ -441,6 +449,7 @@ dotraplinkage void __kprobes do_debug(st
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
+ debug_stack_usage_dec();
return;
}
@@ -568,41 +577,18 @@ asmlinkage void __attribute__((weak)) sm
#endif /* CONFIG_XEN */
/*
- * __math_state_restore assumes that cr0.TS is already clear and the
- * fpu state is all ready for use. Used during context switch.
- */
-void __math_state_restore(void)
-{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
-
- /*
- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
- */
- if (unlikely(restore_fpu_checking(tsk))) {
- stts();
- force_sig(SIGSEGV, tsk);
- return;
- }
-
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
- tsk->fpu_counter++;
-}
-
-/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
*
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
* Don't touch unless you *really* know how it works.
*
- * Must be called with kernel preemption disabled (in this case,
- * local interrupts are disabled at the call-site in entry.S).
+ * Must be called with kernel preemption disabled (eg with local
+ * local interrupts as in the case of do_device_not_available).
*/
-asmlinkage void math_state_restore(void)
+void math_state_restore(void)
{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
if (!tsk_used_math(tsk)) {
local_irq_enable();
@@ -620,7 +606,17 @@ asmlinkage void math_state_restore(void)
}
/* NB. 'clts' is done for us by Xen during virtual trap. */
- __math_state_restore();
+ xen_thread_fpu_begin(tsk, NULL);
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(tsk))) {
+ __thread_fpu_end(tsk);
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+
+ tsk->fpu_counter++;
}
dotraplinkage void __kprobes
--- 12.2.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/arch/x86/kernel/vsyscall_64-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, v
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
static int __init vsyscall_setup(char *str)
{
@@ -142,11 +142,40 @@ static int addr_to_vsyscall_nr(unsigned
return nr;
}
+static bool write_ok_or_segv(unsigned long ptr, size_t size)
+{
+ /*
+ * XXX: if access_ok, get_user, and put_user handled
+ * sig_on_uaccess_error, this could go away.
+ */
+
+ if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
+ siginfo_t info;
+ struct thread_struct *thread = ¤t->thread;
+
+ thread->error_code = 6; /* user fault, no page, write */
+ thread->cr2 = ptr;
+ thread->trap_no = 14;
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = (void __user *)ptr;
+
+ force_sig_info(SIGSEGV, &info, current);
+ return false;
+ } else {
+ return true;
+ }
+}
+
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr;
+ int prev_sig_on_uaccess_error;
long ret;
/*
@@ -182,18 +211,43 @@ bool emulate_vsyscall(struct pt_regs *re
if (seccomp_mode(&tsk->seccomp))
do_exit(SIGKILL);
+ /*
+ * With a real vsyscall, page faults cause SIGSEGV. We want to
+ * preserve that behavior to make writing exploits harder.
+ */
+ prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
+ current_thread_info()->sig_on_uaccess_error = 1;
+
+ /*
+ * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
+ * 64-bit, so we don't need to special-case it here. For all the
+ * vsyscalls, 0 means "don't write anything" not "write it at
+ * address 0".
+ */
+ ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
+ if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+ !write_ok_or_segv(regs->si, sizeof(struct timezone)))
+ break;
+
ret = sys_gettimeofday(
(struct timeval __user *)regs->di,
(struct timezone __user *)regs->si);
break;
case 1:
+ if (!write_ok_or_segv(regs->di, sizeof(time_t)))
+ break;
+
ret = sys_time((time_t __user *)regs->di);
break;
case 2:
+ if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
+ !write_ok_or_segv(regs->si, sizeof(unsigned)))
+ break;
+
ret = sys_getcpu((unsigned __user *)regs->di,
(unsigned __user *)regs->si,
0);
@@ -203,17 +257,22 @@ bool emulate_vsyscall(struct pt_regs *re
break;
}
+ current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+
if (ret == -EFAULT) {
- /*
- * Bad news -- userspace fed a bad pointer to a vsyscall.
- *
- * With a real vsyscall, that would have caused SIGSEGV.
- * To make writing reliable exploits using the emulated
- * vsyscalls harder, generate SIGSEGV here as well.
- */
+ /* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall fault (exploit attempt?)");
- goto sigsegv;
+
+ /*
+ * If we failed to generate a signal for any reason,
+ * generate one here. (This should be impossible.)
+ */
+ if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
+ !sigismember(&tsk->pending.signal, SIGSEGV)))
+ goto sigsegv;
+
+ return true; /* Don't emulate the ret. */
}
regs->ax = ret;
--- 12.2.orig/arch/x86/mm/fault-xen.c 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/arch/x86/mm/fault-xen.c 2012-02-16 13:54:07.000000000 +0100
@@ -635,7 +635,7 @@ pgtable_bad(struct pt_regs *regs, unsign
static noinline void
no_context(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+ unsigned long address, int signal, int si_code)
{
struct task_struct *tsk = current;
unsigned long *stackend;
@@ -643,8 +643,17 @@ no_context(struct pt_regs *regs, unsigne
int sig;
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
+ if (fixup_exception(regs)) {
+ if (current_thread_info()->sig_on_uaccess_error && signal) {
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code | PF_USER;
+ tsk->thread.cr2 = address;
+
+ /* XXX: hwpoison faults will set the wrong code. */
+ force_sig_info_fault(signal, si_code, address, tsk, 0);
+ }
return;
+ }
/*
* 32-bit:
@@ -673,7 +682,7 @@ no_context(struct pt_regs *regs, unsigne
stackend = end_of_stack(tsk);
if (tsk != &init_task && *stackend != STACK_END_MAGIC)
- printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+ printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
@@ -684,7 +693,7 @@ no_context(struct pt_regs *regs, unsigne
sig = 0;
/* Executive summary in case the body of the oops scrolled away */
- printk(KERN_EMERG "CR2: %016lx\n", address);
+ printk(KERN_DEFAULT "CR2: %016lx\n", address);
oops_end(flags, regs, sig);
}
@@ -764,7 +773,7 @@ __bad_area_nosemaphore(struct pt_regs *r
if (is_f00f_bug(regs, address))
return;
- no_context(regs, error_code, address);
+ no_context(regs, error_code, address, SIGSEGV, si_code);
}
static noinline void
@@ -828,7 +837,7 @@ do_sigbus(struct pt_regs *regs, unsigned
/* Kernel mode? Handle exceptions or die: */
if (!(error_code & PF_USER)) {
- no_context(regs, error_code, address);
+ no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
return;
}
@@ -863,7 +872,7 @@ mm_fault_error(struct pt_regs *regs, uns
if (!(fault & VM_FAULT_RETRY))
up_read(¤t->mm->mmap_sem);
if (!(error_code & PF_USER))
- no_context(regs, error_code, address);
+ no_context(regs, error_code, address, 0, 0);
return 1;
}
if (!(fault & VM_FAULT_ERROR))
@@ -873,7 +882,8 @@ mm_fault_error(struct pt_regs *regs, uns
/* Kernel mode? Handle exceptions or die: */
if (!(error_code & PF_USER)) {
up_read(¤t->mm->mmap_sem);
- no_context(regs, error_code, address);
+ no_context(regs, error_code, address,
+ SIGSEGV, SEGV_MAPERR);
return 1;
}
--- 12.2.orig/arch/x86/mm/init-xen.c 2011-07-01 15:48:40.000000000 +0200
+++ 12.2/arch/x86/mm/init-xen.c 2012-02-21 14:29:17.000000000 +0100
@@ -16,6 +16,7 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/proto.h>
+#include <asm/dma.h> /* for MAX_DMA_PFN */
unsigned long __meminitdata pgt_buf_start;
unsigned long __meminitdata pgt_buf_end;
@@ -99,7 +100,8 @@ static void __init find_early_table_spac
void __init xen_pagetable_reserve(u64 start, u64 end)
{
- memblock_x86_reserve_range(start, end, "PGTABLE");
+ if (end > start)
+ memblock_reserve(start, end - start);
}
struct map_range {
@@ -341,8 +343,8 @@ unsigned long __init_refok init_memory_m
* pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
* so that they can be reused for other purposes.
*
- * On native it just means calling memblock_x86_reserve_range, on Xen it
- * also means marking RW the pagetable pages that we allocated before
+ * On native it just means calling memblock_reserve, on Xen it also
+ * means marking RW the pagetable pages that we allocated before
* but that haven't been used.
*
* In fact on xen we mark RO the whole range pgt_buf_start -
@@ -466,3 +468,24 @@ void free_initrd_mem(unsigned long start
free_init_pages("initrd memory", start, PAGE_ALIGN(end));
}
#endif
+
+void __init zone_sizes_init(void)
+{
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+#ifdef CONFIG_ZONE_DMA
+ max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+#endif
+#ifdef CONFIG_ZONE_DMA32
+ max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
+#endif
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+#ifdef CONFIG_HIGHMEM
+ max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
+#endif
+
+ free_area_init_nodes(max_zone_pfns);
+}
+
--- 12.2.orig/arch/x86/mm/init_32-xen.c 2011-07-01 15:19:35.000000000 +0200
+++ 12.2/arch/x86/mm/init_32-xen.c 2012-02-09 15:46:24.000000000 +0100
@@ -463,23 +463,17 @@ static void __init add_one_highpage_init
void __init add_highpages_with_active_regions(int nid,
unsigned long start_pfn, unsigned long end_pfn)
{
- struct range *range;
- int nr_range;
- int i;
-
- nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
-
- for (i = 0; i < nr_range; i++) {
- struct page *page;
- int node_pfn;
-
- for (node_pfn = range[i].start; node_pfn < range[i].end;
- node_pfn++) {
- if (!pfn_valid(node_pfn))
- continue;
- page = pfn_to_page(node_pfn);
- add_one_highpage_init(page);
- }
+ phys_addr_t start, end;
+ u64 i;
+
+ for_each_free_mem_range(i, nid, &start, &end, NULL) {
+ unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
+ start_pfn, end_pfn);
+ unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
+ start_pfn, end_pfn);
+ for ( ; pfn < e_pfn; pfn++)
+ if (pfn_valid(pfn))
+ add_one_highpage_init(pfn_to_page(pfn));
}
}
#else
@@ -652,18 +646,18 @@ void __init initmem_init(void)
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > max_low_pfn)
highstart_pfn = max_low_pfn;
- memblock_x86_register_active_regions(0, 0, highend_pfn);
- sparse_memory_present_with_active_regions(0);
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- memblock_x86_register_active_regions(0, 0, max_low_pfn);
- sparse_memory_present_with_active_regions(0);
num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
+
+ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+ sparse_memory_present_with_active_regions(0);
+
#ifdef CONFIG_FLATMEM
max_mapnr = num_physpages;
#endif
@@ -676,30 +670,8 @@ void __init initmem_init(void)
}
#endif /* !CONFIG_NEED_MULTIPLE_NODES */
-static void __init zone_sizes_init(void)
-{
- unsigned long max_zone_pfns[MAX_NR_ZONES];
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
- max_zone_pfns[ZONE_DMA] =
- virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-#endif
- max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-#ifdef CONFIG_HIGHMEM
- max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-#endif
-
- free_area_init_nodes(max_zone_pfns);
-}
-
void __init setup_bootmem_allocator(void)
{
-#ifdef CONFIG_XEN
- if (max_low_pfn > xen_start_info->nr_pages)
- memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
- max_low_pfn << PAGE_SHIFT, "BALLOON");
-#endif
-
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
@@ -753,8 +725,7 @@ unsigned long __init extend_init_mapping
}
if (start_pfn > start)
- memblock_x86_reserve_range(start << PAGE_SHIFT,
- start_pfn << PAGE_SHIFT, "INITMAP");
+ memblock_reserve(PFN_PHYS(start), PFN_PHYS(start_pfn - start));
return start_pfn;
}
@@ -821,6 +792,17 @@ void __init mem_init(void)
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
+ /*
+ * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
+ * be done before free_all_bootmem(). Memblock use free low memory for
+ * temporary data (see find_range_array()) and for this purpose can use
+ * pages that was already passed to the buddy allocator, hence marked as
+ * not accessible in the page tables when compiled with
+ * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
+ * important here.
+ */
+ set_highmem_pages_init();
+
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
/* XEN: init low-mem pages outside initial allocation. */
@@ -837,8 +819,6 @@ void __init mem_init(void)
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
- set_highmem_pages_init();
-
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
--- 12.2.orig/arch/x86/mm/init_64-xen.c 2011-09-12 13:55:43.000000000 +0200
+++ 12.2/arch/x86/mm/init_64-xen.c 2012-02-09 15:47:16.000000000 +0100
@@ -862,26 +862,12 @@ kernel_physical_mapping_init(unsigned lo
#ifndef CONFIG_NUMA
void __init initmem_init(void)
{
- memblock_x86_register_active_regions(0, 0, max_pfn);
-#ifdef CONFIG_XEN
- if (max_pfn > xen_start_info->nr_pages)
- memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
- max_pfn << PAGE_SHIFT, "BALLOON");
-#endif
+ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
}
#endif
void __init paging_init(void)
{
- unsigned long max_zone_pfns[MAX_NR_ZONES];
-
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
- max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
-#endif
- max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
- max_zone_pfns[ZONE_NORMAL] = max_pfn;
-
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
@@ -893,7 +879,7 @@ void __init paging_init(void)
*/
node_clear_state(0, N_NORMAL_MEMORY);
- free_area_init_nodes(max_zone_pfns);
+ zone_sizes_init();
SetPagePinned(virt_to_page(init_mm.pgd));
}
--- 12.2.orig/arch/x86/mm/pageattr-xen.c 2011-04-13 17:01:32.000000000 +0200
+++ 12.2/arch/x86/mm/pageattr-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -1083,7 +1083,7 @@ out_err:
}
EXPORT_SYMBOL(set_memory_uc);
-int _set_memory_array(unsigned long *addr, int addrinarray,
+static int _set_memory_array(unsigned long *addr, int addrinarray,
unsigned long new_type)
{
int i, j;
@@ -1419,12 +1419,6 @@ void kernel_map_pages(struct page *page,
}
/*
- * If page allocator is not up yet then do not call c_p_a():
- */
- if (!debug_pagealloc_enabled)
- return;
-
- /*
* The return value is ignored as the calls cannot fail.
* Large pages for identity mappings are not used at boot time
* and hence no memory allocations during large page split.
--- 12.2.orig/drivers/acpi/osl.c 2012-06-20 12:12:04.000000000 +0200
+++ 12.2/drivers/acpi/osl.c 2012-04-10 17:24:36.000000000 +0200
@@ -326,8 +326,12 @@ acpi_map_lookup_virt(void __iomem *virt,
}
#ifndef CONFIG_IA64
+#ifndef CONFIG_XEN
#define should_use_kmap(pfn) page_is_ram(pfn)
#else
+#define should_use_kmap(mfn) pfn_valid(pfn = mfn_to_local_pfn(mfn))
+#endif
+#else
/* ioremap will take care of cache attributes */
#define should_use_kmap(pfn) 0
#endif
--- 12.2.orig/drivers/acpi/processor_core.c 2012-02-08 12:25:09.000000000 +0100
+++ 12.2/drivers/acpi/processor_core.c 2012-02-09 14:24:29.000000000 +0100
@@ -205,7 +205,7 @@ int acpi_get_cpuid(acpi_handle handle, i
* Ignores apic_id and always return 0 for CPU0's handle.
* Return -1 for other CPU's handle.
*/
- if (acpi_id == 0)
+ if (acpi_id == 0 && !i)
return acpi_id;
else
return apic_id;
--- 12.2.orig/drivers/acpi/processor_driver.c 2012-05-23 13:35:40.000000000 +0200
+++ 12.2/drivers/acpi/processor_driver.c 2012-05-23 13:40:47.000000000 +0200
@@ -492,7 +492,11 @@ static struct notifier_block acpi_cpu_no
*/
static __ref int acpi_processor_start(struct acpi_processor *pr)
{
+#ifndef CONFIG_XEN
struct acpi_device *device = per_cpu(processor_device_array, pr->id);
+#else
+ struct acpi_device *device = radix_tree_lookup(&processor_device_tree, pr->acpi_id);
+#endif
int result = 0;
#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
--- 12.2.orig/drivers/acpi/processor_perflib.c 2011-06-30 16:41:01.000000000 +0200
+++ 12.2/drivers/acpi/processor_perflib.c 2012-02-16 13:47:34.000000000 +0100
@@ -253,7 +253,6 @@ void acpi_processor_ppc_exit(void)
acpi_processor_ppc_status &= ~PPC_REGISTERED;
}
-#endif /* CONFIG_CPU_FREQ */
/*
* Do a quick check if the systems looks like it should use ACPI
@@ -276,6 +275,7 @@ void acpi_processor_load_module(struct a
}
kfree(buffer.pointer);
}
+#endif /* CONFIG_CPU_FREQ */
static int acpi_processor_get_performance_control(struct acpi_processor *pr)
{
--- 12.2.orig/drivers/pci/msi-xen.c 2011-11-17 15:56:06.000000000 +0100
+++ 12.2/drivers/pci/msi-xen.c 2012-06-21 08:35:05.000000000 +0200
@@ -36,18 +36,21 @@ static int pci_seg_supported = 1;
static LIST_HEAD(msi_dev_head);
DEFINE_SPINLOCK(msi_dev_lock);
+struct msi_pirq_entry {
+ struct list_head list;
+ int pirq;
+ int entry_nr;
+ struct msi_dev_list *dev_entry;
+ struct kobject kobj;
+};
+
struct msi_dev_list {
struct pci_dev *dev;
- struct list_head list;
spinlock_t pirq_list_lock;
/* Store default pre-assigned irq */
unsigned int default_irq;
-};
-
-struct msi_pirq_entry {
- struct list_head list;
- int pirq;
- int entry_nr;
+ domid_t owner;
+ struct msi_pirq_entry e;
};
/* Arch hooks */
@@ -87,6 +90,21 @@ static void msix_set_enable(struct pci_d
}
}
+static int (*get_owner)(struct pci_dev *dev);
+
+static domid_t msi_get_dev_owner(struct pci_dev *dev)
+{
+ int owner;
+
+ if (is_initial_xendomain()
+ && get_owner && (owner = get_owner(dev)) >= 0) {
+ dev_info(&dev->dev, "get owner: %u\n", owner);
+ return owner;
+ }
+
+ return DOMID_SELF;
+}
+
static struct msi_dev_list *get_msi_dev_pirq_list(struct pci_dev *dev)
{
struct msi_dev_list *msi_dev_list, *ret = NULL;
@@ -94,12 +112,14 @@ static struct msi_dev_list *get_msi_dev_
spin_lock_irqsave(&msi_dev_lock, flags);
- list_for_each_entry(msi_dev_list, &msi_dev_head, list)
+ list_for_each_entry(msi_dev_list, &msi_dev_head, e.list)
if ( msi_dev_list->dev == dev )
ret = msi_dev_list;
if ( ret ) {
spin_unlock_irqrestore(&msi_dev_lock, flags);
+ if (ret->owner == DOMID_IO)
+ ret->owner = msi_get_dev_owner(dev);
return ret;
}
@@ -114,7 +134,10 @@ static struct msi_dev_list *get_msi_dev_
ret->dev = dev;
spin_lock_init(&ret->pirq_list_lock);
- list_add_tail(&ret->list, &msi_dev_head);
+ ret->owner = msi_get_dev_owner(dev);
+ ret->e.entry_nr = -1;
+ ret->e.dev_entry = ret;
+ list_add_tail(&ret->e.list, &msi_dev_head);
spin_unlock_irqrestore(&msi_dev_lock, flags);
return ret;
}
@@ -129,6 +152,8 @@ static int attach_pirq_entry(int pirq, i
return -ENOMEM;
entry->pirq = pirq;
entry->entry_nr = entry_nr;
+ entry->dev_entry = msi_dev_entry;
+ memset(&entry->kobj, 0, sizeof(entry->kobj));
spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
list_add_tail(&entry->list, &msi_dev_entry->dev->msi_list);
spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
@@ -152,11 +177,10 @@ static void detach_pirq_entry(int entry_
}
}
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
/*
* pciback will provide device's owner
*/
-static int (*get_owner)(struct pci_dev *dev);
-
int register_msi_get_owner(int (*func)(struct pci_dev *dev))
{
if (get_owner) {
@@ -176,26 +200,15 @@ int unregister_msi_get_owner(int (*func)
return 0;
}
EXPORT_SYMBOL(unregister_msi_get_owner);
+#endif
-static int msi_get_dev_owner(struct pci_dev *dev)
-{
- int owner;
-
- BUG_ON(!is_initial_xendomain());
- if (get_owner && (owner = get_owner(dev)) >= 0) {
- dev_info(&dev->dev, "get owner: %x \n", owner);
- return owner;
- }
-
- return DOMID_SELF;
-}
-
-static int msi_unmap_pirq(struct pci_dev *dev, int pirq)
+static int msi_unmap_pirq(struct pci_dev *dev, int pirq, domid_t owner,
+ struct kobject *kobj)
{
struct physdev_unmap_pirq unmap;
int rc;
- unmap.domid = msi_get_dev_owner(dev);
+ unmap.domid = owner;
/* See comments in msi_map_vector, input parameter pirq means
* irq number only if the device belongs to dom0 itself.
*/
@@ -208,6 +221,16 @@ static int msi_unmap_pirq(struct pci_dev
if (rc < 0)
return rc;
+ /*
+ * Its possible that we get into this path when populate_msi_sysfs()
+ * fails, which means the entries were not registered with sysfs.
+ * In that case don't unregister them.
+ */
+ if (kobj->parent) {
+ kobject_del(kobj);
+ kobject_put(kobj);
+ }
+
if (unmap.domid == DOMID_SELF)
evtchn_map_pirq(pirq, 0);
@@ -233,13 +256,11 @@ static u64 find_table_base(struct pci_de
/*
* Protected by msi_lock
*/
-static int msi_map_vector(struct pci_dev *dev, int entry_nr, u64 table_base)
+static int msi_map_vector(struct pci_dev *dev, int entry_nr, u64 table_base,
+ domid_t domid)
{
struct physdev_map_pirq map_irq;
int rc = -EINVAL;
- domid_t domid = DOMID_SELF;
-
- domid = msi_get_dev_owner(dev);
map_irq.domid = domid;
map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
@@ -338,6 +359,142 @@ void pci_restore_msi_state(struct pci_de
}
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_pirq_entry(obj) container_of(obj, struct msi_pirq_entry, kobj)
+
+struct msi_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct msi_pirq_entry *, struct msi_attribute *,
+ char *buf);
+ ssize_t (*store)(struct msi_pirq_entry *, struct msi_attribute *,
+ const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_pirq_entry *entry,
+ struct msi_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", entry->entry_nr >= 0 ? "msix" : "msi");
+}
+
+static ssize_t show_xen_irq(struct msi_pirq_entry *entry,
+ struct msi_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", entry->dev_entry->owner == DOMID_SELF
+ ? evtchn_get_xen_pirq(entry->pirq)
+ : entry->pirq);
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct msi_attribute *attribute = to_msi_attr(attr);
+ struct msi_pirq_entry *entry = to_pirq_entry(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+ .show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+ __ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+static struct msi_attribute xen_irq_attribute =
+ __ATTR(xen_irq, S_IRUGO, show_xen_irq, NULL);
+
+static struct attribute *msi_irq_default_attrs[] = {
+ &mode_attribute.attr,
+ &xen_irq_attribute.attr,
+ NULL
+};
+
+static struct attribute *msi_pirq_default_attrs[] = {
+ &mode_attribute.attr,
+ NULL
+};
+
+static void msi_kobj_release(struct kobject *kobj)
+{
+ struct msi_dev_list *entry = to_pirq_entry(kobj)->dev_entry;
+
+ pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+ .release = msi_kobj_release,
+ .sysfs_ops = &msi_irq_sysfs_ops,
+ .default_attrs = msi_irq_default_attrs,
+};
+
+static struct kobj_type msi_pirq_ktype = {
+ .release = msi_kobj_release,
+ .sysfs_ops = &msi_irq_sysfs_ops,
+ .default_attrs = msi_pirq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+ struct msi_dev_list *dev_entry = get_msi_dev_pirq_list(pdev);
+ domid_t owner = dev_entry->owner;
+ struct msi_pirq_entry *pirq_entry;
+ struct kobject *kobj;
+ int ret;
+ int count = 0;
+
+ pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+ if (!pdev->msi_kset)
+ return -ENOMEM;
+
+ if (pdev->msi_enabled) {
+ kobj = &dev_entry->e.kobj;
+ kobj->kset = pdev->msi_kset;
+ pci_dev_get(pdev);
+ if (owner == DOMID_SELF)
+ ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+ "%u", pdev->irq);
+ else
+ ret = kobject_init_and_add(kobj, &msi_pirq_ktype, NULL,
+ "xen-%u", pdev->irq);
+ if (ret)
+ pci_dev_put(pdev);
+ return ret;
+ }
+
+ list_for_each_entry(pirq_entry, &pdev->msi_list, list) {
+ kobj = &pirq_entry->kobj;
+ kobj->kset = pdev->msi_kset;
+ pci_dev_get(pdev);
+ if (owner == DOMID_SELF)
+ ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+ "%u", pirq_entry->pirq);
+ else
+ ret = kobject_init_and_add(kobj, &msi_pirq_ktype, NULL,
+ "xen-%u", pirq_entry->pirq);
+ if (ret)
+ goto out_unroll;
+
+ count++;
+ }
+
+ return 0;
+
+out_unroll:
+ pci_dev_put(pdev);
+ list_for_each_entry(pirq_entry, &pdev->msi_list, list) {
+ if (!count)
+ break;
+ kobject_del(&pirq_entry->kobj);
+ kobject_put(&pirq_entry->kobj);
+ count--;
+ }
+ return ret;
+}
+
/**
* msi_capability_init - configure device's MSI capability structure
* @dev: pointer to the pci_dev data structure of MSI device function
@@ -351,6 +508,7 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state)
*/
static int msi_capability_init(struct pci_dev *dev, int nvec)
{
+ struct msi_dev_list *dev_entry = get_msi_dev_pirq_list(dev);
int pos, pirq;
u16 control;
@@ -359,7 +517,7 @@ static int msi_capability_init(struct pc
pci_read_config_word(dev, msi_control_reg(pos), &control);
- pirq = msi_map_vector(dev, 0, 0);
+ pirq = msi_map_vector(dev, 0, 0, dev_entry->owner);
if (pirq < 0)
return -EBUSY;
@@ -368,7 +526,8 @@ static int msi_capability_init(struct pc
msi_set_enable(dev, pos, 1);
dev->msi_enabled = 1;
- dev->irq = pirq;
+ dev->irq = dev_entry->e.pirq = pirq;
+ populate_msi_sysfs(dev);
return 0;
}
@@ -429,7 +588,8 @@ static int msix_capability_init(struct p
}
if (mapped)
continue;
- pirq = msi_map_vector(dev, entries[i].entry, table_base);
+ pirq = msi_map_vector(dev, entries[i].entry, table_base,
+ msi_dev_entry->owner);
if (pirq < 0)
break;
attach_pirq_entry(pirq, entries[i].entry, msi_dev_entry);
@@ -439,7 +599,12 @@ static int msix_capability_init(struct p
if (i != nvec) {
int avail = i - 1;
for (j = --i; j >= 0; j--) {
- msi_unmap_pirq(dev, entries[j].vector);
+ list_for_each_entry(pirq_entry, &dev->msi_list, list)
+ if (pirq_entry->entry_nr == entries[i].entry)
+ break;
+ msi_unmap_pirq(dev, entries[j].vector,
+ msi_dev_entry->owner,
+ &pirq_entry->kobj);
detach_pirq_entry(entries[j].entry, msi_dev_entry);
entries[j].vector = 0;
}
@@ -454,6 +619,7 @@ static int msix_capability_init(struct p
/* Set MSI-X enabled bits and unmask the function */
pci_intx_for_msi(dev, 0);
dev->msix_enabled = 1;
+ populate_msi_sysfs(dev);
control &= ~PCI_MSIX_FLAGS_MASKALL;
pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
@@ -553,7 +719,7 @@ int pci_enable_msi_block(struct pci_dev
dev->irq = evtchn_map_pirq(-1, dev->irq);
dev->msi_enabled = 1;
msi_dev_entry->default_irq = temp;
-
+ populate_msi_sysfs(dev);
return ret;
#else
return -EOPNOTSUPP;
@@ -599,7 +765,10 @@ void pci_msi_shutdown(struct pci_dev *de
pirq = dev->irq;
/* Restore dev->irq to its default pin-assertion vector */
dev->irq = msi_dev_entry->default_irq;
- msi_unmap_pirq(dev, pirq);
+ msi_unmap_pirq(dev, pirq, msi_dev_entry->owner,
+ &msi_dev_entry->e.kobj);
+ msi_dev_entry->owner = DOMID_IO;
+ memset(&msi_dev_entry->e.kobj, 0, sizeof(msi_dev_entry->e.kobj));
/* Disable MSI mode */
pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
@@ -611,6 +780,8 @@ void pci_msi_shutdown(struct pci_dev *de
void pci_disable_msi(struct pci_dev *dev)
{
pci_msi_shutdown(dev);
+ kset_unregister(dev->msi_kset);
+ dev->msi_kset = NULL;
}
EXPORT_SYMBOL(pci_disable_msi);
@@ -690,6 +861,7 @@ int pci_enable_msix(struct pci_dev *dev,
attach_pirq_entry(irq, entries[i].entry, msi_dev_entry);
entries[i].vector = irq;
}
+ populate_msi_sysfs(dev);
return 0;
#else
return -EOPNOTSUPP;
@@ -757,6 +929,8 @@ void pci_msix_shutdown(struct pci_dev *d
void pci_disable_msix(struct pci_dev *dev)
{
pci_msix_shutdown(dev);
+ kset_unregister(dev->msi_kset);
+ dev->msi_kset = NULL;
}
EXPORT_SYMBOL(pci_disable_msix);
@@ -783,13 +957,16 @@ void msi_remove_pci_irq_vectors(struct p
spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
list_for_each_entry_safe(pirq_entry, tmp, &dev->msi_list, list) {
if (is_initial_xendomain())
- msi_unmap_pirq(dev, pirq_entry->pirq);
+ msi_unmap_pirq(dev, pirq_entry->pirq,
+ msi_dev_entry->owner,
+ &pirq_entry->kobj);
else
evtchn_map_pirq(pirq_entry->pirq, 0);
list_del(&pirq_entry->list);
kfree(pirq_entry);
}
spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
+ msi_dev_entry->owner = DOMID_IO;
dev->irq = msi_dev_entry->default_irq;
}
@@ -812,5 +989,21 @@ EXPORT_SYMBOL(pci_msi_enabled);
void pci_msi_init_pci_dev(struct pci_dev *dev)
{
+ int pos;
INIT_LIST_HEAD(&dev->msi_list);
+
+ /* Disable the msi hardware to avoid screaming interrupts
+ * during boot. This is the power on reset default so
+ * usually this should be a noop.
+ * But on a Xen host don't do this for IOMMUs which the hypervisor
+ * is in control of (and hence has already enabled on purpose).
+ */
+ if (is_initial_xendomain()
+ && (dev->class >> 8) == PCI_CLASS_SYSTEM_IOMMU
+ && dev->vendor == PCI_VENDOR_ID_AMD)
+ return;
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+ if (pos)
+ msi_set_enable(dev, pos, 0);
+ msix_set_enable(dev, 0);
}
--- 12.2.orig/drivers/xen/Kconfig 2012-04-03 13:16:00.000000000 +0200
+++ 12.2/drivers/xen/Kconfig 2012-04-10 17:24:48.000000000 +0200
@@ -22,10 +22,6 @@ config XEN_UNPRIVILEGED_GUEST
select PM
select SUSPEND
-config XEN_PRIVCMD
- def_bool y
- depends on PROC_FS
-
config XEN_XENBUS_DEV
def_bool y
depends on PROC_FS
@@ -571,7 +567,8 @@ endmenu
config XEN_PRIVCMD
tristate
- depends on XEN
+ depends on PARAVIRT_XEN || (XEN && PROC_FS)
+ default y if XEN
default m
config XEN_ACPI_PROCESSOR
--- 12.2.orig/drivers/xen/Makefile 2011-11-17 16:59:30.000000000 +0100
+++ 12.2/drivers/xen/Makefile 2012-02-09 13:41:36.000000000 +0100
@@ -3,8 +3,10 @@ xen-biomerge-$(CONFIG_PARAVIRT_XEN) := b
xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
xen-balloon_$(CONFIG_PARAVIRT_XEN) := xen-balloon.o
xen-evtchn-name-$(CONFIG_PARAVIRT_XEN) := xen-evtchn
+xen-privcmd_$(CONFIG_PARAVIRT_XEN) := xen-privcmd.o
xen-balloon_$(CONFIG_XEN) := balloon/
+xen-privcmd_$(CONFIG_XEN) := privcmd/
obj-$(CONFIG_XEN) += core/
obj-$(CONFIG_XEN) += console/
obj-y += xenbus/
@@ -37,10 +39,12 @@ obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
+obj-$(CONFIG_XEN_PRIVCMD) += $(xen-privcmd_y)
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
+xen-privcmd-y := privcmd.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
@@ -58,7 +62,6 @@ obj-$(CONFIG_XEN_SCSI_BACKEND) += scsib
obj-$(CONFIG_XEN_SCSI_FRONTEND) += scsifront/
obj-$(CONFIG_XEN_USB_BACKEND) += usbback/
obj-$(CONFIG_XEN_USB_FRONTEND) += usbfront/
-obj-$(CONFIG_XEN_PRIVCMD) += privcmd/
obj-$(CONFIG_XEN_GRANT_DEV) += gntdev/
obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) += sfc_netutil/
obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) += sfc_netfront/
--- 12.2.orig/drivers/xen/balloon/balloon.c 2012-06-06 14:04:25.000000000 +0200
+++ 12.2/drivers/xen/balloon/balloon.c 2012-06-08 10:38:14.000000000 +0200
@@ -73,11 +73,6 @@ static DEFINE_MUTEX(balloon_mutex);
*/
DEFINE_SPINLOCK(balloon_lock);
-#ifndef MODULE
-#include <linux/pagevec.h>
-static struct pagevec free_pagevec;
-#endif
-
struct balloon_stats balloon_stats;
/* We increase/decrease in batches which fit in a page */
@@ -198,27 +193,14 @@ static struct page *balloon_next_page(st
static inline void balloon_free_page(struct page *page)
{
#ifndef MODULE
- if (put_page_testzero(page) && !pagevec_add(&free_pagevec, page)) {
- __pagevec_free(&free_pagevec);
- pagevec_reinit(&free_pagevec);
- }
+ if (put_page_testzero(page))
+ free_hot_cold_page(page, 1);
#else
- /* pagevec interface is not being exported. */
+ /* free_hot_cold_page() is not being exported. */
__free_page(page);
#endif
}
-static inline void balloon_free_and_unlock(unsigned long flags)
-{
-#ifndef MODULE
- if (pagevec_count(&free_pagevec)) {
- __pagevec_free(&free_pagevec);
- pagevec_reinit(&free_pagevec);
- }
-#endif
- balloon_unlock(flags);
-}
-
static void balloon_alarm(unsigned long unused)
{
schedule_work(&balloon_worker);
@@ -330,7 +312,7 @@ static int increase_reservation(unsigned
totalram_pages = bs.current_pages - totalram_bias;
out:
- balloon_free_and_unlock(flags);
+ balloon_unlock(flags);
#ifndef MODULE
setup_per_zone_wmarks();
@@ -567,7 +549,6 @@ static int __init balloon_init(void)
IPRINTK("Initialising balloon driver.\n");
#ifdef CONFIG_XEN
- pagevec_init(&free_pagevec, true);
bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
totalram_pages = bs.current_pages;
#else
@@ -726,7 +707,7 @@ struct page **alloc_empty_pages_and_page
if (ret != 0) {
balloon_free_page(page);
- balloon_free_and_unlock(flags);
+ balloon_unlock(flags);
goto err;
}
--- 12.2.orig/drivers/xen/balloon/sysfs.c 2012-02-03 13:51:27.000000000 +0100
+++ 12.2/drivers/xen/balloon/sysfs.c 2012-02-16 17:19:42.000000000 +0100
@@ -29,12 +29,11 @@
*/
#include <linux/capability.h>
+#include <linux/device.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/stat.h>
#include <linux/string.h>
-#include <linux/sysdev.h>
-#include <linux/module.h>
#include <xen/balloon.h>
#include "common.h"
@@ -45,27 +44,27 @@
#define BALLOON_CLASS_NAME "xen_memory"
#define BALLOON_SHOW(name, format, args...) \
- static ssize_t show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
+ static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
- static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
-static ssize_t show_target_kb(struct sys_device *dev,
- struct sysdev_attribute *attr, char *buf)
+static ssize_t show_target_kb(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
}
-static ssize_t store_target_kb(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_target_kb(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
char *endchar;
@@ -83,19 +82,19 @@ static ssize_t store_target_kb(struct sy
return count;
}
-static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(target_kb, S_IRUGO | S_IWUSR,
show_target_kb, store_target_kb);
-static ssize_t show_target(struct sys_device *dev,
- struct sysdev_attribute *attr, char *buf)
+static ssize_t show_target(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%llu\n",
(unsigned long long)balloon_stats.target_pages
<< PAGE_SHIFT);
}
-static ssize_t store_target(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_target(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -114,19 +113,19 @@ static ssize_t store_target(struct sys_d
return count;
}
-static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(target, S_IRUGO | S_IWUSR,
show_target, store_target);
-static struct sysdev_attribute *balloon_attrs[] = {
- &attr_target_kb,
- &attr_target,
+static struct device_attribute *balloon_attrs[] = {
+ &dev_attr_target_kb,
+ &dev_attr_target,
};
static struct attribute *balloon_info_attrs[] = {
- &attr_current_kb.attr,
- &attr_low_kb.attr,
- &attr_high_kb.attr,
- &attr_driver_kb.attr,
+ &dev_attr_current_kb.attr,
+ &dev_attr_low_kb.attr,
+ &dev_attr_high_kb.attr,
+ &dev_attr_driver_kb.attr,
NULL
};
@@ -135,36 +134,37 @@ static const struct attribute_group ball
.attrs = balloon_info_attrs,
};
-static struct sysdev_class balloon_sysdev_class = {
+static struct bus_type balloon_subsys = {
.name = BALLOON_CLASS_NAME,
+ .dev_name = BALLOON_CLASS_NAME,
};
-static struct sys_device balloon_sysdev;
+static struct device balloon_dev;
-static int __init register_balloon(struct sys_device *sysdev)
+static int __init register_balloon(struct device *dev)
{
int i, error;
- error = sysdev_class_register(&balloon_sysdev_class);
+ error = subsys_system_register(&balloon_subsys, NULL);
if (error)
return error;
- sysdev->id = 0;
- sysdev->cls = &balloon_sysdev_class;
+ dev->id = 0;
+ dev->bus = &balloon_subsys;
- error = sysdev_register(sysdev);
+ error = device_register(dev);
if (error) {
- sysdev_class_unregister(&balloon_sysdev_class);
+ bus_unregister(&balloon_subsys);
return error;
}
for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
- error = sysdev_create_file(sysdev, balloon_attrs[i]);
+ error = device_create_file(dev, balloon_attrs[i]);
if (error)
goto fail;
}
- error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+ error = sysfs_create_group(&dev->kobj, &balloon_info_group);
if (error)
goto fail;
@@ -172,33 +172,33 @@ static int __init register_balloon(struc
fail:
while (--i >= 0)
- sysdev_remove_file(sysdev, balloon_attrs[i]);
- sysdev_unregister(sysdev);
- sysdev_class_unregister(&balloon_sysdev_class);
+ device_remove_file(dev, balloon_attrs[i]);
+ device_unregister(dev);
+ bus_unregister(&balloon_subsys);
return error;
}
-static __exit void unregister_balloon(struct sys_device *sysdev)
+static __exit void unregister_balloon(struct device *dev)
{
int i;
- sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
+ sysfs_remove_group(&dev->kobj, &balloon_info_group);
for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
- sysdev_remove_file(sysdev, balloon_attrs[i]);
- sysdev_unregister(sysdev);
- sysdev_class_unregister(&balloon_sysdev_class);
+ device_remove_file(dev, balloon_attrs[i]);
+ device_unregister(dev);
+ bus_unregister(&balloon_subsys);
}
int __init balloon_sysfs_init(void)
{
- int rc = register_balloon(&balloon_sysdev);
+ int rc = register_balloon(&balloon_dev);
- register_xen_selfballooning(&balloon_sysdev);
+ register_xen_selfballooning(&balloon_dev);
return rc;
}
void __exit balloon_sysfs_exit(void)
{
- unregister_balloon(&balloon_sysdev);
+ unregister_balloon(&balloon_dev);
}
--- 12.2.orig/drivers/xen/blkback/blkback.c 2012-04-04 10:26:46.000000000 +0200
+++ 12.2/drivers/xen/blkback/blkback.c 2012-03-26 13:37:56.000000000 +0200
@@ -341,8 +341,11 @@ irqreturn_t blkif_be_int(int irq, void *
static void dispatch_discard(blkif_t *blkif, struct blkif_request_discard *req)
{
+ unsigned long secure = (blkif->vbd.discard_secure &&
+ (req->flag & BLKIF_DISCARD_SECURE)) ?
+ BLKDEV_DISCARD_SECURE : 0;
struct phys_req preq;
- int err = -EOPNOTSUPP, status;
+ int status;
blkif->st_ds_req++;
@@ -361,12 +364,8 @@ static void dispatch_discard(blkif_t *bl
plug_queue(blkif, preq.bdev);
- if (blkif->blk_backend_type == BLKIF_BACKEND_PHY ||
- blkif->blk_backend_type == BLKIF_BACKEND_FILE)
- err = blkdev_issue_discard(preq.bdev, preq.sector_number,
- preq.nr_sects, GFP_KERNEL, 0);
-
- switch (err) {
+ switch (blkdev_issue_discard(preq.bdev, preq.sector_number,
+ preq.nr_sects, GFP_KERNEL, secure)) {
case 0:
status = BLKIF_RSP_OKAY;
break;
--- 12.2.orig/drivers/xen/blkback/common.h 2012-06-08 10:37:58.000000000 +0200
+++ 12.2/drivers/xen/blkback/common.h 2012-06-08 10:38:21.000000000 +0200
@@ -43,16 +43,12 @@
pr_debug("(file=%s, line=%d) " _f, \
__FILE__ , __LINE__ , ## _a )
-enum blkif_backend_type {
- BLKIF_BACKEND_PHY = 1,
- BLKIF_BACKEND_FILE = 2,
-};
-
struct vbd {
blkif_vdev_t handle; /* what the domain refers to this vbd as */
fmode_t mode; /* FMODE_xxx */
unsigned char type; /* VDISK_xxx */
bool flush_support;
+ bool discard_secure;
u32 pdevice; /* phys device that this vbd maps to */
struct block_device *bdev;
sector_t size; /* Cached size parameter */
@@ -68,7 +64,6 @@ typedef struct blkif_st {
unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
- enum blkif_backend_type blk_backend_type;
blkif_back_rings_t blk_rings;
struct vm_struct *blk_ring_area;
/* The VBD attached to this interface. */
--- 12.2.orig/drivers/xen/blkback/vbd.c 2012-02-24 15:15:19.000000000 +0100
+++ 12.2/drivers/xen/blkback/vbd.c 2012-02-27 10:22:57.000000000 +0100
@@ -92,6 +92,9 @@ int vbd_create(blkif_t *blkif, blkif_vde
if (q && q->flush_flags)
vbd->flush_support = true;
+ if (q && blk_queue_secdiscard(q))
+ vbd->discard_secure = true;
+
DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
handle, blkif->domid);
return 0;
--- 12.2.orig/drivers/xen/blkback/xenbus.c 2012-03-22 14:25:55.000000000 +0100
+++ 12.2/drivers/xen/blkback/xenbus.c 2012-03-22 14:26:51.000000000 +0100
@@ -227,43 +227,34 @@ static void blkback_discard(struct xenbu
struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
- blkif_t *blkif = be->blkif;
- char *type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+ struct vbd *vbd = &be->blkif->vbd;
+ struct request_queue *q = bdev_get_queue(vbd->bdev);
int err, state = 0;
- if (!IS_ERR(type)) {
- if (strncmp(type, "file", 4) == 0) {
+ if (blk_queue_discard(q)) {
+ err = xenbus_printf(xbt, dev->nodename, "discard-granularity",
+ "%u", q->limits.discard_granularity);
+ if (!err)
state = 1;
- blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+ else
+ xenbus_dev_error(dev, err,
+ "writing discard-granularity");
+ err = xenbus_printf(xbt, dev->nodename, "discard-alignment",
+ "%u", q->limits.discard_alignment);
+ if (err) {
+ xenbus_dev_error(dev, err,
+ "writing discard-alignment");
+ state = 0;
}
- if (strncmp(type, "phy", 3) == 0) {
- struct request_queue *q;
+ }
- q = bdev_get_queue(blkif->vbd.bdev);
- if (blk_queue_discard(q)) {
- blkif->blk_backend_type = BLKIF_BACKEND_PHY;
- err = xenbus_printf(xbt, dev->nodename,
- "discard-granularity", "%u",
- q->limits.discard_granularity);
- if (!err)
- state = 1;
- else
- xenbus_dev_error(dev, err,
- "writing discard-granularity");
- err = xenbus_printf(xbt, dev->nodename,
- "discard-alignment", "%u",
- q->limits.discard_alignment);
- if (err) {
- xenbus_dev_error(dev, err,
- "writing discard-alignment");
- state = 0;
- }
- }
- }
- kfree(type);
- } else
- xenbus_dev_error(dev, PTR_ERR(type),
- "reading type for discard");
+ /* Optional. */
+ if (state) {
+ err = xenbus_printf(xbt, dev->nodename, "discard-secure",
+ "%d", vbd->discard_secure);
+ if (err)
+ xenbus_dev_error(dev, err, "writing discard-secure");
+ }
err = xenbus_printf(xbt, dev->nodename, "feature-discard",
"%d", state);
--- 12.2.orig/drivers/xen/blkfront/blkfront.c 2012-06-12 15:36:36.000000000 +0200
+++ 12.2/drivers/xen/blkfront/blkfront.c 2012-06-12 15:36:44.000000000 +0200
@@ -331,11 +331,13 @@ static void blkfront_setup_discard(struc
char *type;
unsigned int discard_granularity;
unsigned int discard_alignment;
+ int discard_secure;
type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
if (IS_ERR(type))
return;
+ info->feature_secdiscard = 0;
if (strncmp(type, "phy", 3) == 0) {
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-granularity", "%u", &discard_granularity,
@@ -346,6 +348,10 @@ static void blkfront_setup_discard(struc
info->discard_granularity = discard_granularity;
info->discard_alignment = discard_alignment;
}
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "discard-secure", "%d", &discard_secure);
+ if (err == 1)
+ info->feature_secdiscard = discard_secure;
} else if (strncmp(type, "file", 4) == 0)
info->feature_discard = 1;
@@ -764,10 +770,13 @@ int blkif_ioctl(struct block_device *bd,
return scsi_cmd_ioctl(filep, info->rq,
info->gd, command,
(void __user *)argument);
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
return scsi_cmd_ioctl(info->rq, info->gd,
mode, command,
(void __user *)argument);
+#else
+ return scsi_cmd_blk_ioctl(bd, mode, command,
+ (void __user *)argument);
#endif
}
}
@@ -844,13 +853,15 @@ static int blkif_queue_request(struct re
#endif
ring_req->operation = info->flush_op;
- if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+ if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
struct blkif_request_discard *discard = (void *)ring_req;
/* id, sector_number and handle are set above. */
discard->operation = BLKIF_OP_DISCARD;
discard->flag = 0;
discard->nr_sectors = blk_rq_sectors(req);
+ if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
+ discard->flag = BLKIF_DISCARD_SECURE;
} else {
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -1035,7 +1046,9 @@ static irqreturn_t blkif_int(int irq, vo
info->gd->disk_name);
ret = -EOPNOTSUPP;
info->feature_discard = 0;
+ info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+ queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
}
__blk_end_request_all(req, ret);
break;
@@ -1092,6 +1105,9 @@ static void blkif_free(struct blkfront_i
static void blkif_completion(struct blk_shadow *s)
{
int i;
+
+ if (s->req.operation == BLKIF_OP_DISCARD)
+ return;
for (i = 0; i < s->req.nr_segments; i++)
gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
}
--- 12.2.orig/drivers/xen/blkfront/block.h 2012-06-08 10:38:01.000000000 +0200
+++ 12.2/drivers/xen/blkfront/block.h 2012-06-08 10:38:23.000000000 +0200
@@ -109,7 +109,8 @@ struct blkfront_info
unsigned long shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
- unsigned int feature_discard;
+ bool feature_discard;
+ bool feature_secdiscard;
unsigned int discard_granularity;
unsigned int discard_alignment;
int is_ready;
--- 12.2.orig/drivers/xen/blkfront/vbd.c 2012-03-12 16:18:35.000000000 +0100
+++ 12.2/drivers/xen/blkfront/vbd.c 2012-03-12 16:19:08.000000000 +0100
@@ -302,7 +302,7 @@ xlbd_reserve_minors(struct xlbd_major_in
if (end > ms->nr) {
unsigned long *bitmap, *old;
- bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+ bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
GFP_KERNEL);
if (bitmap == NULL)
return -ENOMEM;
@@ -370,6 +370,8 @@ xlvbd_init_blk_queue(struct gendisk *gd,
blk_queue_max_discard_sectors(rq, get_capacity(gd));
rq->limits.discard_granularity = info->discard_granularity;
rq->limits.discard_alignment = info->discard_alignment;
+ if (info->feature_secdiscard)
+ queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
--- 12.2.orig/drivers/xen/blktap/blktap.c 2011-11-18 15:35:59.000000000 +0100
+++ 12.2/drivers/xen/blktap/blktap.c 2012-02-17 11:29:03.000000000 +0100
@@ -277,7 +277,7 @@ static inline unsigned int OFFSET_TO_SEG
} while(0)
-static char *blktap_devnode(struct device *dev, mode_t *mode)
+static char *blktap_devnode(struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "xen/blktap%u", MINOR(dev->devt));
}
--- 12.2.orig/drivers/xen/blktap2-new/device.c 2011-11-21 15:50:27.000000000 +0100
+++ 12.2/drivers/xen/blktap2-new/device.c 2012-02-17 11:29:41.000000000 +0100
@@ -425,7 +425,7 @@ blktap_device_destroy_sync(struct blktap
!blktap_device_try_destroy(tap));
}
-static char *blktap_devnode(struct gendisk *gd, mode_t *mode)
+static char *blktap_devnode(struct gendisk *gd, umode_t *mode)
{
return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
gd->first_minor);
--- 12.2.orig/drivers/xen/blktap2-new/sysfs.c 2011-02-24 15:02:50.000000000 +0100
+++ 12.2/drivers/xen/blktap2-new/sysfs.c 2012-02-17 11:29:46.000000000 +0100
@@ -262,7 +262,7 @@ blktap_sysfs_show_devices(struct class *
}
static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL);
-static char *blktap_devnode(struct device *dev, mode_t *mode)
+static char *blktap_devnode(struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "blktap%u",
MINOR(dev->devt));
--- 12.2.orig/drivers/xen/blktap2/device.c 2012-02-16 13:44:17.000000000 +0100
+++ 12.2/drivers/xen/blktap2/device.c 2012-02-17 11:29:27.000000000 +0100
@@ -1068,7 +1068,7 @@ blktap_device_destroy(struct blktap *tap
return 0;
}
-static char *blktap_devnode(struct gendisk *gd, mode_t *mode)
+static char *blktap_devnode(struct gendisk *gd, umode_t *mode)
{
return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
gd->first_minor);
--- 12.2.orig/drivers/xen/blktap2/sysfs.c 2011-02-24 14:59:15.000000000 +0100
+++ 12.2/drivers/xen/blktap2/sysfs.c 2012-02-17 11:29:32.000000000 +0100
@@ -439,7 +439,7 @@ blktap_sysfs_free(void)
class_destroy(class);
}
-static char *blktap_devnode(struct device *dev, mode_t *mode)
+static char *blktap_devnode(struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "blktap%u",
MINOR(dev->devt));
--- 12.2.orig/drivers/xen/console/console.c 2012-03-22 14:10:10.000000000 +0100
+++ 12.2/drivers/xen/console/console.c 2012-03-22 14:26:59.000000000 +0100
@@ -341,7 +341,6 @@ void __init dom0_init_screen_info(const
((_tty)->index != (xc_num - 1)))
static struct ktermios *xencons_termios[MAX_NR_CONSOLES];
-static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES];
static struct tty_struct *xencons_tty;
static int xencons_priv_irq;
static char x_char;
@@ -679,7 +678,6 @@ static int __init xencons_init(void)
TTY_DRIVER_REAL_RAW |
TTY_DRIVER_RESET_TERMIOS;
DRV(xencons_driver)->termios = xencons_termios;
- DRV(xencons_driver)->termios_locked = xencons_termios_locked;
switch (xc_mode) {
case XC_XVC:
--- 12.2.orig/drivers/xen/core/cpu_hotplug.c 2011-02-01 14:42:26.000000000 +0100
+++ 12.2/drivers/xen/core/cpu_hotplug.c 2012-02-10 09:47:12.000000000 +0100
@@ -25,7 +25,7 @@ static int local_cpu_hotplug_request(voi
return (current->mm != NULL);
}
-static void vcpu_hotplug(unsigned int cpu, struct sys_device *dev)
+static void vcpu_hotplug(unsigned int cpu, struct device *dev)
{
int err;
char dir[32], state[32];
@@ -63,7 +63,7 @@ static void handle_vcpu_hotplug_event(
if ((cpustr = strstr(node, "cpu/")) != NULL) {
sscanf(cpustr, "cpu/%u", &cpu);
- vcpu_hotplug(cpu, get_cpu_sysdev(cpu));
+ vcpu_hotplug(cpu, get_cpu_device(cpu));
}
}
@@ -96,7 +96,7 @@ static int setup_cpu_watcher(struct noti
if (!is_initial_xendomain()) {
for_each_possible_cpu(i)
- vcpu_hotplug(i, get_cpu_sysdev(i));
+ vcpu_hotplug(i, get_cpu_device(i));
pr_info("Brought up %ld CPUs\n", (long)num_online_cpus());
}
--- 12.2.orig/drivers/xen/core/evtchn.c 2011-11-21 15:49:38.000000000 +0100
+++ 12.2/drivers/xen/core/evtchn.c 2012-04-03 17:08:41.000000000 +0200
@@ -311,8 +311,8 @@ asmlinkage void __irq_entry evtchn_do_up
old_regs = set_irq_regs(regs);
xen_spin_irq_enter();
- exit_idle();
irq_enter();
+ exit_idle();
do {
vcpu_info->evtchn_upcall_pending = 0;
--- 12.2.orig/drivers/xen/core/smpboot.c 2011-11-18 15:44:14.000000000 +0100
+++ 12.2/drivers/xen/core/smpboot.c 2012-03-22 16:22:50.000000000 +0100
@@ -443,6 +443,7 @@ void __ref play_dead(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
+ nmi_selftest();
}
#ifndef CONFIG_X86_LOCAL_APIC
--- 12.2.orig/drivers/xen/core/spinlock.c 2012-02-07 11:59:21.000000000 +0100
+++ 12.2/drivers/xen/core/spinlock.c 2012-02-09 12:50:33.000000000 +0100
@@ -132,9 +132,7 @@ static unsigned int ticket_drop(struct s
if (cmpxchg(&spinning->ticket, ticket, -1) != ticket)
return -1;
- asm volatile(UNLOCK_LOCK_PREFIX "inc" UNLOCK_SUFFIX(0) " %0"
- : "+m" (lock->tickets.head)
- : : "memory", "cc");
+ __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
ticket = (__ticket_t)(ticket + 1);
return ticket != lock->tickets.tail ? ticket : -1;
}
--- 12.2.orig/drivers/xen/netback/interface.c 2012-01-24 14:12:08.000000000 +0100
+++ 12.2/drivers/xen/netback/interface.c 2012-02-10 08:56:24.000000000 +0100
@@ -95,7 +95,8 @@ static int netbk_change_mtu(struct net_d
return 0;
}
-static u32 netbk_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netbk_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
netif_t *netif = netdev_priv(dev);
--- 12.2.orig/drivers/xen/netback/netback.c 2012-06-08 10:37:44.000000000 +0200
+++ 12.2/drivers/xen/netback/netback.c 2012-02-17 09:00:33.000000000 +0100
@@ -164,10 +164,10 @@ static unsigned long mfn_list[MAX_MFN_AL
static unsigned int alloc_index = 0;
/* Setting this allows the safe use of this driver without netloop. */
-static int MODPARM_copy_skb = 1;
+static bool MODPARM_copy_skb = true;
module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
-static int MODPARM_permute_returns = 0;
+static bool MODPARM_permute_returns;
module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
--- 12.2.orig/drivers/xen/netfront/netfront.c 2012-06-08 10:37:47.000000000 +0200
+++ 12.2/drivers/xen/netfront/netfront.c 2012-06-08 10:38:28.000000000 +0200
@@ -81,15 +81,15 @@ struct netfront_cb {
* For paravirtualised guests, flipping is the default.
*/
#ifdef CONFIG_XEN
-static int MODPARM_rx_copy = 0;
+static bool MODPARM_rx_copy;
module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
-static int MODPARM_rx_flip = 0;
+static bool MODPARM_rx_flip;
module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
#else
-static const int MODPARM_rx_copy = 1;
-static const int MODPARM_rx_flip = 0;
+# define MODPARM_rx_copy true
+# define MODPARM_rx_flip false
#endif
#define RX_COPY_THRESHOLD 256
@@ -229,7 +229,7 @@ static void xennet_sysfs_delif(struct ne
#define xennet_sysfs_delif(dev) do { } while(0)
#endif
-static inline int xennet_can_sg(struct net_device *dev)
+static inline bool xennet_can_sg(struct net_device *dev)
{
return dev->features & NETIF_F_SG;
}
@@ -2032,7 +2032,8 @@ static void network_set_multicast_list(s
{
}
-static u32 xennet_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t xennet_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
struct netfront_info *np = netdev_priv(dev);
int val;
@@ -2058,7 +2059,8 @@ static u32 xennet_fix_features(struct ne
return features;
}
-static int xennet_set_features(struct net_device *dev, u32 features)
+static int xennet_set_features(struct net_device *dev,
+ netdev_features_t features)
{
if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
netdev_info(dev, "Reducing MTU because no SG offload");
@@ -2239,7 +2241,7 @@ static int __init netif_init(void)
}
if (!MODPARM_rx_flip && !MODPARM_rx_copy)
- MODPARM_rx_copy = 1; /* Default is to copy. */
+ MODPARM_rx_copy = true; /* Default is to copy. */
#endif
netif_init_accel();
--- 12.2.orig/drivers/xen/pcifront/xenbus.c 2012-03-12 13:53:21.000000000 +0100
+++ 12.2/drivers/xen/pcifront/xenbus.c 2012-03-12 13:55:45.000000000 +0100
@@ -371,7 +371,7 @@ static int pcifront_detach_devices(struc
pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
if(!pci_dev) {
dev_dbg(&pdev->xdev->dev,
- "Cannot get PCI device %04x:%02x:%02x.%02x\n",
+ "Cannot get PCI device %04x:%02x:%02x.%u\n",
domain, bus, slot, func);
continue;
}
@@ -379,7 +379,7 @@ static int pcifront_detach_devices(struc
pci_dev_put(pci_dev);
dev_dbg(&pdev->xdev->dev,
- "PCI device %04x:%02x:%02x.%02x removed.\n",
+ "PCI device %04x:%02x:%02x.%u removed.\n",
domain, bus, slot, func);
}
--- 12.2.orig/drivers/xen/xen-pciback/pci_stub.c 2012-04-10 17:21:31.000000000 +0200
+++ 12.2/drivers/xen/xen-pciback/pci_stub.c 2012-04-10 17:24:42.000000000 +0200
@@ -267,7 +267,9 @@ void pcistub_put_pci_dev(struct pci_dev
xen_pcibk_config_free_dyn_fields(found_psdev->dev);
xen_pcibk_config_reset_dev(found_psdev->dev);
+#ifndef CONFIG_XEN
xen_unregister_device_domain_owner(found_psdev->dev);
+#endif
spin_lock_irqsave(&found_psdev->lock, flags);
found_psdev->pdev = NULL;
--- 12.2.orig/drivers/xen/xenbus/Makefile 2011-02-02 17:08:58.000000000 +0100
+++ 12.2/drivers/xen/xenbus/Makefile 2012-02-09 13:58:18.000000000 +0100
@@ -1,12 +1,17 @@
obj-y += xenbus_client.o xenbus_comms.o xenbus_xs.o xenbus_probe.o
-obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o
+backend-standalone-$(CONFIG_XEN) += xenbus_be.o
+obj-$(CONFIG_PARAVIRT_XEN) += xenbus_dev_frontend.o
xenbus_be-objs =
xenbus_be-objs += xenbus_backend_client.o
+xenbus_be-objs += xenbus_dev_backend.o
xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
obj-y += $(xenbus-y) $(xenbus-m)
obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
obj-$(CONFIG_PARAVIRT_XEN_BACKEND) += xenbus_probe_backend.o
+backend-standalone-$(CONFIG_PARAVIRT_XEN) += xenbus_dev_backend.o
obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
+
+obj-$(CONFIG_XEN_BACKEND) += $(backend-standalone-y)
--- 12.2.orig/drivers/xen/xenbus/xenbus_client.c 2011-11-28 10:14:06.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_client.c 2012-02-17 09:16:09.000000000 +0100
@@ -36,18 +36,42 @@
#include <xen/gnttab.h>
#else
#include <linux/types.h>
+#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
+#include <xen/balloon.h>
#include <xen/events.h>
#include <xen/grant_table.h>
#endif
#include <xen/xenbus.h>
+#include <xen/xen.h>
-#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#if defined(CONFIG_PARAVIRT_XEN)
+#include "xenbus_probe.h"
+
+struct xenbus_map_node {
+ struct list_head next;
+ union {
+ struct vm_struct *area; /* PV */
+ struct page *page; /* HVM */
+ };
+ grant_handle_t handle;
+};
+
+static DEFINE_SPINLOCK(xenbus_valloc_lock);
+static LIST_HEAD(xenbus_valloc_pages);
+
+struct xenbus_ring_ops {
+ int (*map)(struct xenbus_device *dev, grant_ref_t gnt, void **vaddr);
+ int (*unmap)(struct xenbus_device *dev, void *vaddr);
+};
+
+static const struct xenbus_ring_ops *ring_ops __read_mostly;
+#elif defined(HAVE_XEN_PLATFORM_COMPAT_H)
#include <xen/platform-compat.h>
#endif
@@ -450,19 +474,33 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
*/
int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t gnt_ref, void **vaddr)
{
+ return ring_ops->map(dev, gnt_ref, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
+ grant_ref_t gnt_ref, void **vaddr)
+{
struct gnttab_map_grant_ref op = {
.flags = GNTMAP_host_map | GNTMAP_contains_pte,
.ref = gnt_ref,
.dom = dev->otherend_id,
};
+ struct xenbus_map_node *node;
struct vm_struct *area;
pte_t *pte;
*vaddr = NULL;
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
area = alloc_vm_area(PAGE_SIZE, &pte);
- if (!area)
+ if (!area) {
+ kfree(node);
return -ENOMEM;
+ }
op.host_addr = arbitrary_virt_to_machine(pte).maddr;
@@ -471,19 +509,59 @@ int xenbus_map_ring_valloc(struct xenbus
if (op.status != GNTST_okay) {
free_vm_area(area);
+ kfree(node);
xenbus_dev_fatal(dev, op.status,
"mapping in shared page %d from domain %d",
gnt_ref, dev->otherend_id);
return op.status;
}
- /* Stuff the handle in an unused field */
- area->phys_addr = (unsigned long)op.handle;
+ node->handle = op.handle;
+ node->area = area;
+
+ spin_lock(&xenbus_valloc_lock);
+ list_add(&node->next, &xenbus_valloc_pages);
+ spin_unlock(&xenbus_valloc_lock);
*vaddr = area->addr;
return 0;
}
-EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
+ grant_ref_t gnt_ref, void **vaddr)
+{
+ struct xenbus_map_node *node;
+ int err;
+ void *addr;
+
+ *vaddr = NULL;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+ if (err)
+ goto out_err;
+
+ addr = pfn_to_kaddr(page_to_pfn(node->page));
+
+ err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+ if (err)
+ goto out_err;
+
+ spin_lock(&xenbus_valloc_lock);
+ list_add(&node->next, &xenbus_valloc_pages);
+ spin_unlock(&xenbus_valloc_lock);
+
+ *vaddr = addr;
+ return 0;
+
+ out_err:
+ free_xenballooned_pages(1, &node->page);
+ kfree(node);
+ return err;
+}
/**
@@ -503,12 +581,10 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc
int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t gnt_ref,
grant_handle_t *handle, void *vaddr)
{
- struct gnttab_map_grant_ref op = {
- .host_addr = (unsigned long)vaddr,
- .flags = GNTMAP_host_map,
- .ref = gnt_ref,
- .dom = dev->otherend_id,
- };
+ struct gnttab_map_grant_ref op;
+
+ gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
+ dev->otherend_id);
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
BUG();
@@ -539,32 +615,36 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring);
*/
int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
{
- struct vm_struct *area;
+ return ring_ops->unmap(dev, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
+{
+ struct xenbus_map_node *node;
struct gnttab_unmap_grant_ref op = {
.host_addr = (unsigned long)vaddr,
};
unsigned int level;
- /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
- * method so that we don't have to muck with vmalloc internals here.
- * We could force the user to hang on to their struct vm_struct from
- * xenbus_map_ring_valloc, but these 6 lines considerably simplify
- * this API.
- */
- read_lock(&vmlist_lock);
- for (area = vmlist; area != NULL; area = area->next) {
- if (area->addr == vaddr)
- break;
+ spin_lock(&xenbus_valloc_lock);
+ list_for_each_entry(node, &xenbus_valloc_pages, next) {
+ if (node->area->addr == vaddr) {
+ list_del(&node->next);
+ goto found;
+ }
}
- read_unlock(&vmlist_lock);
+ node = NULL;
+ found:
+ spin_unlock(&xenbus_valloc_lock);
- if (!area) {
+ if (!node) {
xenbus_dev_error(dev, -ENOENT,
"can't find mapped virtual address %p", vaddr);
return GNTST_bad_virt_addr;
}
- op.handle = (grant_handle_t)area->phys_addr;
+ op.handle = node->handle;
op.host_addr = arbitrary_virt_to_machine(
lookup_address((unsigned long)vaddr, &level)).maddr;
@@ -572,16 +652,50 @@ int xenbus_unmap_ring_vfree(struct xenbu
BUG();
if (op.status == GNTST_okay)
- free_vm_area(area);
+ free_vm_area(node->area);
else
xenbus_dev_error(dev, op.status,
"unmapping page at handle %d error %d",
- (int16_t)area->phys_addr, op.status);
+ node->handle, op.status);
+ kfree(node);
return op.status;
}
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
+{
+ int rv;
+ struct xenbus_map_node *node;
+ void *addr;
+
+ spin_lock(&xenbus_valloc_lock);
+ list_for_each_entry(node, &xenbus_valloc_pages, next) {
+ addr = pfn_to_kaddr(page_to_pfn(node->page));
+ if (addr == vaddr) {
+ list_del(&node->next);
+ goto found;
+ }
+ }
+ node = NULL;
+ found:
+ spin_unlock(&xenbus_valloc_lock);
+
+ if (!node) {
+ xenbus_dev_error(dev, -ENOENT,
+ "can't find mapped virtual address %p", vaddr);
+ return GNTST_bad_virt_addr;
+ }
+
+ rv = xenbus_unmap_ring(dev, node->handle, addr);
+
+ if (!rv)
+ free_xenballooned_pages(1, &node->page);
+ else
+ WARN(1, "Leaking %p\n", vaddr);
+
+ kfree(node);
+ return rv;
+}
/**
* xenbus_unmap_ring
@@ -596,10 +710,9 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfre
int xenbus_unmap_ring(struct xenbus_device *dev,
grant_handle_t handle, void *vaddr)
{
- struct gnttab_unmap_grant_ref op = {
- .host_addr = (unsigned long)vaddr,
- .handle = handle,
- };
+ struct gnttab_unmap_grant_ref op;
+
+ gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
BUG();
@@ -632,3 +745,23 @@ enum xenbus_state xenbus_read_driver_sta
return result;
}
EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
+
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static const struct xenbus_ring_ops ring_ops_pv = {
+ .map = xenbus_map_ring_valloc_pv,
+ .unmap = xenbus_unmap_ring_vfree_pv,
+};
+
+static const struct xenbus_ring_ops ring_ops_hvm = {
+ .map = xenbus_map_ring_valloc_hvm,
+ .unmap = xenbus_unmap_ring_vfree_hvm,
+};
+
+void __init xenbus_ring_ops_init(void)
+{
+ if (xen_pv_domain())
+ ring_ops = &ring_ops_pv;
+ else
+ ring_ops = &ring_ops_hvm;
+}
+#endif
--- 12.2.orig/drivers/xen/xenbus/xenbus_comms.h 2011-04-11 13:43:15.000000000 +0200
+++ 12.2/drivers/xen/xenbus/xenbus_comms.h 2012-02-09 12:32:50.000000000 +0100
@@ -31,6 +31,8 @@
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
+#include <linux/fs.h>
+
int xs_init(void);
int xb_init_comms(void);
@@ -43,6 +45,8 @@ int xs_input_avail(void);
extern struct xenstore_domain_interface *xen_store_interface;
extern int xen_store_evtchn;
+extern const struct file_operations xen_xenbus_fops;
+
/* For xenbus internal use. */
enum {
XENBUS_XSD_UNCOMMITTED = 0,
--- 12.2.orig/drivers/xen/xenbus/xenbus_dev_backend.c 2012-06-20 12:12:04.000000000 +0200
+++ 12.2/drivers/xen/xenbus/xenbus_dev_backend.c 2012-02-17 09:50:33.000000000 +0100
@@ -7,7 +7,9 @@
#include <linux/capability.h>
#include <xen/xen.h>
+#ifdef CONFIG_PARAVIRT_XEN
#include <xen/page.h>
+#endif
#include <xen/xenbus_dev.h>
#include "xenbus_comms.h"
@@ -49,7 +51,7 @@ static int xenbus_backend_mmap(struct fi
return -EINVAL;
if (remap_pfn_range(vma, vma->vm_start,
- virt_to_pfn(xen_store_interface),
+ PFN_DOWN(__pa(xen_store_interface)),
size, vma->vm_page_prot))
return -EAGAIN;
--- 12.2.orig/drivers/xen/xenbus/xenbus_probe.c 2012-03-12 13:54:30.000000000 +0100
+++ 12.2/drivers/xen/xenbus/xenbus_probe.c 2012-03-12 13:55:51.000000000 +0100
@@ -84,10 +84,10 @@
#endif
int xen_store_evtchn;
-PARAVIRT_EXPORT_SYMBOL(xen_store_evtchn);
+EXPORT_SYMBOL_GPL(xen_store_evtchn);
struct xenstore_domain_interface *xen_store_interface;
-PARAVIRT_EXPORT_SYMBOL(xen_store_interface);
+EXPORT_SYMBOL_GPL(xen_store_interface);
static unsigned long xen_store_mfn;
@@ -1330,6 +1330,8 @@ xenbus_init(void)
xenbus_dev_init();
#else /* !defined(CONFIG_XEN) && !defined(MODULE) */
+ xenbus_ring_ops_init();
+
if (xen_hvm_domain()) {
uint64_t v = 0;
err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
--- 12.2.orig/drivers/xen/xenbus/xenbus_probe.h 2012-06-08 11:13:38.000000000 +0200
+++ 12.2/drivers/xen/xenbus/xenbus_probe.h 2012-06-08 11:23:49.000000000 +0200
@@ -106,4 +106,6 @@ extern void xenbus_otherend_changed(stru
extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node);
+void xenbus_ring_ops_init(void);
+
#endif
--- head.orig/include/linux/pci_ids.h 2012-06-13 12:14:11.000000000 +0200
+++ head/include/linux/pci_ids.h 2012-06-21 08:31:17.000000000 +0200
@@ -75,6 +75,7 @@
#define PCI_CLASS_SYSTEM_RTC 0x0803
#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804
#define PCI_CLASS_SYSTEM_SDHCI 0x0805
+#define PCI_CLASS_SYSTEM_IOMMU 0x0806
#define PCI_CLASS_SYSTEM_OTHER 0x0880
#define PCI_BASE_CLASS_INPUT 0x09
--- 12.2.orig/include/xen/balloon.h 2012-02-03 13:44:44.000000000 +0100
+++ 12.2/include/xen/balloon.h 2012-02-09 12:32:50.000000000 +0100
@@ -88,11 +88,11 @@ void free_xenballooned_pages(int nr_page
#endif /* CONFIG_PARAVIRT_XEN */
-struct sys_device;
+struct device;
#ifdef CONFIG_XEN_SELFBALLOONING
-extern int register_xen_selfballooning(struct sys_device *sysdev);
+extern int register_xen_selfballooning(struct device *dev);
#else
-static inline int register_xen_selfballooning(struct sys_device *sysdev)
+static inline int register_xen_selfballooning(struct device *dev)
{
return -ENOSYS;
}
--- 12.2.orig/include/xen/blkif.h 2012-04-04 10:27:25.000000000 +0200
+++ 12.2/include/xen/blkif.h 2012-04-04 10:31:24.000000000 +0200
@@ -48,7 +48,7 @@ struct blkif_x86_32_request {
};
struct blkif_x86_32_discard {
uint8_t operation; /* BLKIF_OP_DISCARD */
- uint8_t reserved; /* */
+ uint8_t flag; /* BLKIF_DISCARD_* */
blkif_vdev_t handle; /* same as for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk */
@@ -75,7 +75,7 @@ struct blkif_x86_64_request {
};
struct blkif_x86_64_discard {
uint8_t operation; /* BLKIF_OP_DISCARD */
- uint8_t reserved; /* */
+ uint8_t flag; /* BLKIF_DISCARD_* */
blkif_vdev_t handle; /* sane as for read/write requests */
uint64_t __attribute__((__aligned__(8))) id;
blkif_sector_t sector_number;/* start sector idx on disk */
@@ -122,6 +122,8 @@ static void inline blkif_get_x86_32_req(
blkif_request_discard_t *d = (void *)dst;
const blkif_x86_32_discard_t *s = (const void *)src;
+ /* We should be doing "d->flag = s->flag;" but the
+ * copying of nr_segments does it for us already. */
d->nr_sectors = s->nr_sectors;
return;
}
@@ -144,6 +146,8 @@ static void inline blkif_get_x86_64_req(
blkif_request_discard_t *d = (void *)dst;
const blkif_x86_64_discard_t *s = (const void *)src;
+ /* We should be doing "d->flag = s->flag" but the
+ * copying of nr_segments does it for us already. */
d->nr_sectors = s->nr_sectors;
return;
}
--- 12.2.orig/include/xen/evtchn.h 2011-02-01 15:09:47.000000000 +0100
+++ 12.2/include/xen/evtchn.h 2012-02-10 09:14:30.000000000 +0100
@@ -59,6 +59,7 @@ struct irq_cfg {
};
};
struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
+static inline int evtchn_make_refcounted(unsigned int evtchn) { return 0; }
#endif
/*
--- 12.2.orig/include/xen/interface/grant_table.h 2012-02-16 12:34:07.000000000 +0100
+++ 12.2/include/xen/interface/grant_table.h 2012-02-16 13:45:23.000000000 +0100
@@ -100,7 +100,9 @@ typedef uint32_t grant_ref_t;
* Version 1 of the grant table entry structure is maintained purely
* for backwards compatibility. New guests should use version 2.
*/
-#if __XEN_INTERFACE_VERSION__ < 0x0003020a
+#if defined(CONFIG_PARAVIRT_XEN)
+#define grant_entry grant_entry_v1
+#elif __XEN_INTERFACE_VERSION__ < 0x0003020a
#define grant_entry_v1 grant_entry
#define grant_entry_v1_t grant_entry_t
#endif
@@ -188,7 +190,7 @@ typedef struct grant_entry_v1 grant_entr
* The interface by which domains use grant references does not depend
* on the grant table version in use by the other domain.
*/
-#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+#if defined(CONFIG_PARAVIRT_XEN) || __XEN_INTERFACE_VERSION__ >= 0x0003020a
/*
* Version 1 and version 2 grant entries share a common prefix. The
* fields of the prefix are documented as part of struct
@@ -467,10 +469,11 @@ struct gnttab_unmap_and_replace {
/* OUT parameters. */
int16_t status; /* GNTST_* */
};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
-#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+#if defined(CONFIG_PARAVIRT_XEN) || __XEN_INTERFACE_VERSION__ >= 0x0003020a
/*
* GNTTABOP_set_version: Request a particular version of the grant
* table shared table structure. This operation can only be performed
@@ -483,6 +486,7 @@ struct gnttab_set_version {
/* IN/OUT parameters */
uint32_t version;
};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
typedef struct gnttab_set_version gnttab_set_version_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
@@ -508,6 +512,7 @@ struct gnttab_get_status_frames {
int16_t status; /* GNTST_* */
XEN_GUEST_HANDLE(uint64_t) frame_list;
};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);
@@ -523,6 +528,7 @@ struct gnttab_get_version {
/* OUT parameters */
uint32_t version;
};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
typedef struct gnttab_get_version gnttab_get_version_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);
--- 12.2.orig/include/xen/interface/io/blkif.h 2012-04-04 10:28:27.000000000 +0200
+++ 12.2/include/xen/interface/io/blkif.h 2012-04-04 10:31:20.000000000 +0200
@@ -428,25 +428,40 @@ struct blkif_request_segment {
*/
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
+#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
-#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
#else
union {
- struct blkif_request_rw {
+ struct __attribute__((__packed__)) blkif_request_rw {
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+#ifdef CONFIG_X86_64
+ uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */
+#endif
+ uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} rw;
- struct blkif_request_discard {
+ struct __attribute__((__packed__)) blkif_request_discard {
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */
+#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
+ blkif_vdev_t _pad1; /* only for read/write requests */
+#ifdef CONFIG_X86_64
+ uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/
+#endif
+ uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;
- uint64_t nr_sectors;
+ uint64_t nr_sectors;
+ uint8_t _pad3;
} discard;
} u;
+} __attribute__((__packed__));
#endif
-};
typedef struct blkif_request blkif_request_t;
#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
--- 12.2.orig/lib/swiotlb-xen.c 2011-07-01 15:19:35.000000000 +0200
+++ 12.2/lib/swiotlb-xen.c 2012-02-09 12:32:50.000000000 +0100
@@ -114,11 +114,11 @@ setup_io_tlb_npages(char *str)
__setup("swiotlb=", setup_io_tlb_npages);
/* make io_tlb_overflow tunable too? */
-unsigned long swioltb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(void)
{
return io_tlb_nslabs;
}
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
/* Note that this doesn't work with highmem page */
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
volatile void *address)