From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.37
Patch-mainline: 2.6.37
This patch contains the differences between 2.6.36 and 2.6.37.
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.37" by xen-port-patches.py
--- head.orig/arch/x86/Kconfig 2014-04-30 10:47:53.000000000 +0200
+++ head/arch/x86/Kconfig 2014-06-26 15:01:39.000000000 +0200
@@ -1947,7 +1947,6 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION
depends on X86_64 && HUGETLB_PAGE && MIGRATION
menu "Power management and ACPI options"
- depends on !XEN_UNPRIVILEGED_GUEST
config ARCH_HIBERNATION_HEADER
def_bool y
@@ -1955,6 +1954,8 @@ config ARCH_HIBERNATION_HEADER
source "kernel/power/Kconfig"
+if !XEN_UNPRIVILEGED_GUEST
+
source "drivers/acpi/Kconfig"
source "drivers/sfi/Kconfig"
@@ -2091,6 +2092,8 @@ source "drivers/cpuidle/Kconfig"
source "drivers/idle/Kconfig"
+endif # !XEN_UNPRIVILEGED_GUEST
+
endmenu
@@ -2169,7 +2172,7 @@ config PCI_OLPC
config PCI_XEN
def_bool y
- depends on PCI && XEN
+ depends on PCI && PARAVIRT_XEN
select SWIOTLB_XEN
config PCI_DOMAINS
@@ -2193,21 +2196,6 @@ config PCI_CNB20LE_QUIRK
You should say N unless you know you need this.
-config XEN_PCIDEV_FRONTEND
- def_bool y
- prompt "Xen PCI Frontend" if X86_64
- depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64)
- select HOTPLUG
- help
- The PCI device frontend driver allows the kernel to import arbitrary
- PCI devices from a PCI backend to support PCI driver domains.
-
-config XEN_PCIDEV_FE_DEBUG
- bool "Xen PCI Frontend Debugging"
- depends on XEN_PCIDEV_FRONTEND
- help
- Enables some debug statements within the PCI Frontend.
-
source "drivers/pci/pcie/Kconfig"
source "drivers/pci/Kconfig"
--- head.orig/arch/x86/include/asm/hw_irq.h 2013-08-09 14:59:34.000000000 +0200
+++ head/arch/x86/include/asm/hw_irq.h 2013-09-26 13:00:29.000000000 +0200
@@ -194,6 +194,7 @@ extern irqreturn_t smp_reschedule_interr
extern irqreturn_t smp_call_function_interrupt(int, void *);
extern irqreturn_t smp_call_function_single_interrupt(int, void *);
extern irqreturn_t smp_reboot_interrupt(int, void *);
+extern irqreturn_t smp_irq_work_interrupt(int, void *);
#endif
#endif
--- head.orig/arch/x86/include/asm/io.h 2014-06-08 20:19:54.000000000 +0200
+++ head/arch/x86/include/asm/io.h 2014-04-30 10:50:21.000000000 +0200
@@ -319,7 +319,7 @@ extern void __iomem *ioremap_wc(resource
extern bool is_early_ioremap_ptep(pte_t *ptep);
-#ifdef CONFIG_XEN
+#ifdef CONFIG_PARAVIRT_XEN
#include <xen/xen.h>
struct bio_vec;
@@ -329,7 +329,7 @@ extern bool xen_biovec_phys_mergeable(co
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
(!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
-#endif /* CONFIG_XEN */
+#endif /* CONFIG_PARAVIRT_XEN */
#define IO_SPACE_LIMIT 0xffff
--- head.orig/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 15:09:47.000000000 +0100
@@ -217,5 +217,20 @@ static inline unsigned long virt_to_fix(
BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
return __virt_to_fix(vaddr);
}
+
+/* Return an pointer with offset calculated */
+static __always_inline unsigned long
+__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
+{
+ __set_fixmap(idx, phys, flags);
+ return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
+}
+
+#define set_fixmap_offset(idx, phys) \
+ __set_fixmap_offset(idx, phys, PAGE_KERNEL)
+
+#define set_fixmap_offset_nocache(idx, phys) \
+ __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_FIXMAP_H */
--- head.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 15:09:47.000000000 +0100
@@ -58,15 +58,16 @@ extern void kunmap_high(struct page *pag
void *kmap(struct page *page);
void kunmap(struct page *page);
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
-void *kmap_atomic(struct page *page, enum km_type type);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr);
-#define kmap_atomic_pte(page, type) \
- kmap_atomic_prot(page, type, \
+#define kmap_atomic_pte(page) \
+ kmap_atomic_prot(page, \
PagePinned(page) ? PAGE_KERNEL_RO : kmap_prot)
#define flush_cache_kmaps() do { } while (0)
--- head.orig/arch/x86/include/mach-xen/asm/io.h 2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/io.h 2011-02-01 15:09:47.000000000 +0100
@@ -212,6 +212,7 @@ static inline void __iomem *ioremap(reso
extern void iounmap(volatile void __iomem *addr);
+extern void set_iounmap_nonlazy(void);
#ifdef __KERNEL__
@@ -353,6 +354,7 @@ extern void __iomem *early_memremap(reso
unsigned long size);
extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void fixup_early_ioremap(void);
+extern bool is_early_ioremap_ptep(pte_t *ptep);
#define IO_SPACE_LIMIT 0xffff
--- head.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:49:16.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:50:13.000000000 +0100
@@ -13,7 +13,12 @@
#define NMI_VECTOR 0x02
#define CALL_FUNC_SINGLE_VECTOR 3
#define REBOOT_VECTOR 4
+#ifdef CONFIG_IRQ_WORK
+#define IRQ_WORK_VECTOR 5
+#define NR_IPIS 6
+#else
#define NR_IPIS 5
+#endif
/*
* The maximum number of vectors supported by i386 processors
--- head.orig/arch/x86/include/mach-xen/asm/irqflags.h 2011-02-01 14:54:13.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/irqflags.h 2012-05-24 09:12:20.000000000 +0200
@@ -36,19 +36,19 @@ void xen_safe_halt(void);
void xen_halt(void);
-#define __raw_local_save_flags() xen_save_fl()
+#define arch_local_save_flags() xen_save_fl()
-#define raw_local_irq_restore(flags) xen_restore_fl(flags)
+#define arch_local_irq_restore(flags) xen_restore_fl(flags)
-#define raw_local_irq_disable() xen_irq_disable()
+#define arch_local_irq_disable() xen_irq_disable()
-#define raw_local_irq_enable() xen_irq_enable()
+#define arch_local_irq_enable() xen_irq_enable()
/*
* Used in the idle loop; sti takes one instruction cycle
* to complete:
*/
-static inline void raw_safe_halt(void)
+static inline void arch_safe_halt(void)
{
xen_safe_halt();
}
@@ -65,11 +65,11 @@ static inline void halt(void)
/*
* For spinlocks, etc:
*/
-#define __raw_local_irq_save() \
+#define arch_local_irq_save() \
({ \
- unsigned long flags = __raw_local_save_flags(); \
+ unsigned long flags = arch_local_save_flags(); \
\
- raw_local_irq_disable(); \
+ arch_local_irq_disable(); \
\
flags; \
})
@@ -129,22 +129,16 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
#endif /* __ASSEMBLY__ */
#ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags) \
- do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags) \
- do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline int arch_irqs_disabled_flags(unsigned long flags)
{
return (flags != 0);
}
-#define raw_irqs_disabled() \
+#define arch_irqs_disabled() \
({ \
- unsigned long flags = __raw_local_save_flags(); \
+ unsigned long flags = arch_local_save_flags(); \
\
- raw_irqs_disabled_flags(flags); \
+ arch_irqs_disabled_flags(flags); \
})
#else
--- head.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:09:47.000000000 +0100
@@ -7,6 +7,7 @@
#include <linux/string.h>
#include <asm/scatterlist.h>
#include <asm/io.h>
+#include <asm/x86_init.h>
#ifdef __KERNEL__
@@ -100,9 +101,36 @@ static inline void early_quirks(void) {
extern void pci_iommu_alloc(void);
-/* MSI arch hooks */
-#define arch_setup_msi_irqs arch_setup_msi_irqs
-#define arch_teardown_msi_irqs arch_teardown_msi_irqs
+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
+/* MSI arch specific hooks */
+static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ return x86_msi.setup_msi_irqs(dev, nvec, type);
+}
+
+static inline void x86_teardown_msi_irqs(struct pci_dev *dev)
+{
+ x86_msi.teardown_msi_irqs(dev);
+}
+
+static inline void x86_teardown_msi_irq(unsigned int irq)
+{
+ x86_msi.teardown_msi_irq(irq);
+}
+#define arch_setup_msi_irqs x86_setup_msi_irqs
+#define arch_teardown_msi_irqs x86_teardown_msi_irqs
+#define arch_teardown_msi_irq x86_teardown_msi_irq
+/* implemented in arch/x86/kernel/apic/io_apic. */
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+void native_teardown_msi_irq(unsigned int irq);
+/* default to the implementation in drivers/lib/msi.c */
+#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+void default_teardown_msi_irqs(struct pci_dev *dev);
+#else
+#define native_setup_msi_irqs NULL
+#define native_teardown_msi_irq NULL
+#define default_teardown_msi_irqs NULL
+#endif
#define PCI_DMA_BUS_IS_PHYS 0
--- head.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-03-23 10:00:38.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable.h 2011-03-23 10:02:30.000000000 +0100
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAG
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
+extern struct mm_struct *pgd_page_get_mm(struct page *page);
+
#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
@@ -634,6 +636,8 @@ static inline void ptep_set_wrprotect(st
set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
+#define flush_tlb_fix_spurious_fault(vma, address)
+
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:09:47.000000000 +0100
@@ -25,7 +25,7 @@
struct vm_area_struct;
extern pgd_t *swapper_pg_dir;
-extern pgd_t trampoline_pg_dir[1024];
+extern pgd_t initial_page_table[1024];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
@@ -48,24 +48,14 @@ extern void set_pmd_pfn(unsigned long, u
#endif
#if defined(CONFIG_HIGHPTE)
-#define __KM_PTE \
- (in_nmi() ? KM_NMI_PTE : \
- in_irq() ? KM_IRQ_PTE : \
- KM_PTE0)
#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir))) + \
pte_index((address)))
-#define pte_offset_map_nested(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
- pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
-#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
+#define pte_unmap(pte) kunmap_atomic((pte))
#else
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
-#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
#endif
/* Clear a kernel PTE and flush it from the TLB */
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-03-23 10:02:08.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-03-23 10:02:27.000000000 +0100
@@ -104,6 +104,8 @@ static inline void xen_pgd_clear(pgd_t *
#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
@@ -127,9 +129,7 @@ static inline int pgd_large(pgd_t pgd) {
/* x86-64 always has all page tables mapped. */
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
#define pte_unmap(pte) ((void)(pte))/* NOP */
-#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */
#define update_mmu_cache(vma, address, ptep) do { } while (0)
--- head.orig/arch/x86/include/mach-xen/asm/processor.h 2012-05-23 13:36:57.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:48.000000000 +0100
@@ -120,6 +120,8 @@ struct cpuinfo_x86 {
u16 phys_proc_id;
/* Core id: */
u16 cpu_core_id;
+ /* Compute unit id */
+ u8 compute_unit_id;
#endif
#ifdef CONFIG_SMP
/* Index into per_cpu list: */
@@ -556,7 +558,7 @@ extern unsigned long mmu_cr4_features;
static inline void set_in_cr4(unsigned long mask)
{
- unsigned cr4;
+ unsigned long cr4;
mmu_cr4_features |= mask;
cr4 = read_cr4();
@@ -566,7 +568,7 @@ static inline void set_in_cr4(unsigned l
static inline void clear_in_cr4(unsigned long mask)
{
- unsigned cr4;
+ unsigned long cr4;
mmu_cr4_features &= ~mask;
cr4 = read_cr4();
@@ -718,31 +720,6 @@ extern unsigned long idle_halt;
extern unsigned long idle_nomwait;
extern bool c1e_detected;
-#ifndef CONFIG_XEN
-/*
- * on systems with caches, caches must be flashed as the absolute
- * last instruction before going into a suspended halt. Otherwise,
- * dirty data can linger in the cache and become stale on resume,
- * leading to strange errors.
- *
- * perform a variety of operations to guarantee that the compiler
- * will not reorder instructions. wbinvd itself is serializing
- * so the processor will not reorder.
- *
- * Systems without cache can just go into halt.
- */
-static inline void wbinvd_halt(void)
-{
- mb();
- /* check for clflush to determine if wbinvd is legal */
- if (cpu_has_clflush)
- asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
- else
- while (1)
- halt();
-}
-#endif
-
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
--- head.orig/arch/x86/include/mach-xen/asm/smp.h 2011-03-03 16:10:16.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/smp.h 2011-03-03 16:12:15.000000000 +0100
@@ -57,7 +57,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -76,7 +76,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
@@ -148,12 +153,16 @@ void smp_store_cpu_info(int id);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
-void xen_smp_send_stop(void);
+void xen_stop_other_cpus(int wait);
void xen_smp_send_reschedule(int cpu);
void xen_send_call_func_ipi(const struct cpumask *mask);
void xen_send_call_func_single_ipi(int cpu);
-#define smp_send_stop xen_smp_send_stop
+static inline void smp_send_stop(void)
+{
+ xen_stop_other_cpus(0);
+}
+
#define smp_send_reschedule xen_smp_send_reschedule
#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi
#define arch_send_call_function_ipi_mask xen_send_call_func_ipi
--- head.orig/arch/x86/include/mach-xen/asm/spinlock.h 2012-04-03 08:28:39.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/spinlock.h 2012-01-31 18:08:35.000000000 +0100
@@ -196,16 +196,16 @@ static __always_inline int __ticket_spin
static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
{
unsigned int token, count;
- unsigned int flags = __raw_local_irq_save();
+ unsigned int flags = arch_local_irq_save();
bool free;
__ticket_spin_lock_preamble;
if (likely(free)) {
- raw_local_irq_restore(flags);
+ arch_local_irq_restore(flags);
return;
}
token = xen_spin_adjust(lock, token);
- raw_local_irq_restore(flags);
+ arch_local_irq_restore(flags);
do {
count = 1 << 12;
__ticket_spin_lock_body;
--- head.orig/arch/x86/include/mach-xen/asm/swiotlb.h 2011-02-01 14:55:46.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/swiotlb.h 2011-02-01 15:09:47.000000000 +0100
@@ -1,6 +1,4 @@
#include_next <asm/swiotlb.h>
-#define pci_swiotlb_detect() 1
-
dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
int dir);
--- head.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2013-09-26 14:51:44.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/tlbflush.h 2013-09-26 14:52:05.000000000 +0200
@@ -119,6 +119,4 @@ static inline void flush_tlb_kernel_rang
flush_tlb_all();
}
-extern void zap_low_mappings(bool early);
-
#endif /* _ASM_X86_TLBFLUSH_H */
--- head.orig/arch/x86/kernel/Makefile 2013-12-02 17:51:11.000000000 +0100
+++ head/arch/x86/kernel/Makefile 2013-12-02 17:57:49.000000000 +0100
@@ -121,7 +121,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-y += vsmp_64.o
endif
-disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o i8237.o i8253.o \
- i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o \
- uv_%.o vsmp_64.o
+disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o i8237.o i8253.o i8259.o \
+ irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o vsmp_64.o
disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- head.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -144,13 +144,9 @@ struct irq_pin_list {
struct irq_pin_list *next;
};
-static struct irq_pin_list *get_one_free_irq_2_pin(int node)
+static struct irq_pin_list *alloc_irq_pin_list(int node)
{
- struct irq_pin_list *pin;
-
- pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
-
- return pin;
+ return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
}
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -163,10 +159,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS];
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
- struct irq_desc *desc;
- int count;
- int node;
- int i;
+ int count, node, i;
if (!legacy_pic->nr_legacy_irqs) {
nr_irqs_gsi = 0;
@@ -175,13 +168,15 @@ int __init arch_early_irq_init(void)
cfg = irq_cfgx;
count = ARRAY_SIZE(irq_cfgx);
- node= cpu_to_node(boot_cpu_id);
+ node = cpu_to_node(0);
+
+ /* Make sure the legacy interrupts are marked in the bitmap */
+ irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
for (i = 0; i < count; i++) {
- desc = irq_to_desc(i);
- desc->chip_data = &cfg[i];
- zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
- zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+ set_irq_chip_data(i, &cfg[i]);
+ zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
+ zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
/*
* For legacy IRQ's, start with assigning irq0 to irq15 to
* IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
@@ -196,170 +191,88 @@ int __init arch_early_irq_init(void)
}
#ifdef CONFIG_SPARSE_IRQ
-struct irq_cfg *irq_cfg(unsigned int irq)
+static struct irq_cfg *irq_cfg(unsigned int irq)
{
- struct irq_cfg *cfg = NULL;
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- if (desc)
- cfg = desc->chip_data;
-
- return cfg;
+ return get_irq_chip_data(irq);
}
-static struct irq_cfg *get_one_free_irq_cfg(int node)
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
{
struct irq_cfg *cfg;
- cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
- if (cfg) {
- if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
- kfree(cfg);
- cfg = NULL;
- } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
- GFP_ATOMIC, node)) {
- free_cpumask_var(cfg->domain);
- kfree(cfg);
- cfg = NULL;
- }
- }
-
+ cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+ if (!cfg)
+ return NULL;
+ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+ goto out_cfg;
+ if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+ goto out_domain;
return cfg;
+out_domain:
+ free_cpumask_var(cfg->domain);
+out_cfg:
+ kfree(cfg);
+ return NULL;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
-{
- struct irq_cfg *cfg;
-
- cfg = desc->chip_data;
- if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(node);
- if (!desc->chip_data) {
- printk(KERN_ERR "can not alloc irq_cfg\n");
- BUG_ON(1);
- }
- }
-
- return 0;
-}
-
-/* for move_irq_desc */
-static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
{
- struct irq_pin_list *old_entry, *head, *tail, *entry;
-
- cfg->irq_2_pin = NULL;
- old_entry = old_cfg->irq_2_pin;
- if (!old_entry)
- return;
-
- entry = get_one_free_irq_2_pin(node);
- if (!entry)
+ if (!cfg)
return;
+ set_irq_chip_data(at, NULL);
+ free_cpumask_var(cfg->domain);
+ free_cpumask_var(cfg->old_domain);
+ kfree(cfg);
+}
- entry->apic = old_entry->apic;
- entry->pin = old_entry->pin;
- head = entry;
- tail = entry;
- old_entry = old_entry->next;
- while (old_entry) {
- entry = get_one_free_irq_2_pin(node);
- if (!entry) {
- entry = head;
- while (entry) {
- head = entry->next;
- kfree(entry);
- entry = head;
- }
- /* still use the old one */
- return;
- }
- entry->apic = old_entry->apic;
- entry->pin = old_entry->pin;
- tail->next = entry;
- tail = entry;
- old_entry = old_entry->next;
- }
+#else
- tail->next = NULL;
- cfg->irq_2_pin = head;
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
-static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
{
- struct irq_pin_list *entry, *next;
-
- if (old_cfg->irq_2_pin == cfg->irq_2_pin)
- return;
+ return irq_cfgx + irq;
+}
- entry = old_cfg->irq_2_pin;
+static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
- while (entry) {
- next = entry->next;
- kfree(entry);
- entry = next;
- }
- old_cfg->irq_2_pin = NULL;
-}
+#endif
-void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int node)
+static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
{
+ int res = irq_alloc_desc_at(at, node);
struct irq_cfg *cfg;
- struct irq_cfg *old_cfg;
-
- cfg = get_one_free_irq_cfg(node);
-
- if (!cfg)
- return;
-
- desc->chip_data = cfg;
- old_cfg = old_desc->chip_data;
-
- cfg->vector = old_cfg->vector;
- cfg->move_in_progress = old_cfg->move_in_progress;
- cpumask_copy(cfg->domain, old_cfg->domain);
- cpumask_copy(cfg->old_domain, old_cfg->old_domain);
-
- init_copy_irq_2_pin(old_cfg, cfg, node);
-}
+ if (res < 0) {
+ if (res != -EEXIST)
+ return NULL;
+ cfg = get_irq_chip_data(at);
+ if (cfg)
+ return cfg;
+ }
-static void free_irq_cfg(struct irq_cfg *cfg)
-{
- free_cpumask_var(cfg->domain);
- free_cpumask_var(cfg->old_domain);
- kfree(cfg);
+ cfg = alloc_irq_cfg(at, node);
+ if (cfg)
+ set_irq_chip_data(at, cfg);
+ else
+ irq_free_desc(at);
+ return cfg;
}
-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+static int alloc_irq_from(unsigned int from, int node)
{
- struct irq_cfg *old_cfg, *cfg;
-
- old_cfg = old_desc->chip_data;
- cfg = desc->chip_data;
-
- if (old_cfg == cfg)
- return;
-
- if (old_cfg) {
- free_irq_2_pin(old_cfg, cfg);
- free_irq_cfg(old_cfg);
- old_desc->chip_data = NULL;
- }
+ return irq_alloc_desc_from(from, node);
}
-/* end for move_irq_desc */
-#else
-struct irq_cfg *irq_cfg(unsigned int irq)
+static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ free_irq_cfg(at, cfg);
+ irq_free_desc(at);
}
-#endif
-
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -492,7 +405,7 @@ __ioapic_write_entry(int apic, int pin,
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
}
-void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -523,7 +436,7 @@ static void ioapic_mask_entry(int apic,
* fast in the common case, and fast for shared ISA-space IRQs.
*/
static int
-add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
+__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
struct irq_pin_list **last, *entry;
@@ -535,7 +448,7 @@ add_pin_to_irq_node_nopanic(struct irq_c
last = &entry->next;
}
- entry = get_one_free_irq_2_pin(node);
+ entry = alloc_irq_pin_list(node);
if (!entry) {
printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
node, apic, pin);
@@ -550,7 +463,7 @@ add_pin_to_irq_node_nopanic(struct irq_c
static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
- if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+ if (__add_pin_to_irq_node(cfg, node, apic, pin))
panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
}
@@ -613,11 +526,6 @@ static void __unmask_and_level_IO_APIC_i
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
-static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
-}
-
static void io_apic_sync(struct irq_pin_list *entry)
{
/*
@@ -629,44 +537,37 @@ static void io_apic_sync(struct irq_pin_
readl(&io_apic->data);
}
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+static void mask_ioapic(struct irq_cfg *cfg)
{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
+static void mask_ioapic_irq(struct irq_data *data)
{
- struct irq_cfg *cfg = desc->chip_data;
- unsigned long flags;
-
- BUG_ON(!cfg);
+ mask_ioapic(data->chip_data);
+}
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(cfg);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+static void __unmask_ioapic(struct irq_cfg *cfg)
+{
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
-static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
+static void unmask_ioapic(struct irq_cfg *cfg)
{
- struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(cfg);
+ __unmask_ioapic(cfg);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void mask_IO_APIC_irq(unsigned int irq)
+static void unmask_ioapic_irq(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
- mask_IO_APIC_irq_desc(desc);
-}
-static void unmask_IO_APIC_irq(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- unmask_IO_APIC_irq_desc(desc);
+ unmask_ioapic(data->chip_data);
}
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -693,7 +594,7 @@ static void clear_IO_APIC (void)
}
#else
#define add_pin_to_irq_node(cfg, node, apic, pin)
-#define add_pin_to_irq_node_nopanic(cfg, node, apic, pin) 0
+#define __add_pin_to_irq_node(cfg, node, apic, pin) 0
#endif /* !CONFIG_XEN */
#ifdef CONFIG_X86_32
@@ -741,14 +642,14 @@ struct IO_APIC_route_entry **alloc_ioapi
struct IO_APIC_route_entry **ioapic_entries;
ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
- GFP_ATOMIC);
+ GFP_KERNEL);
if (!ioapic_entries)
return 0;
for (apic = 0; apic < nr_ioapics; apic++) {
ioapic_entries[apic] =
kzalloc(sizeof(struct IO_APIC_route_entry) *
- nr_ioapic_registers[apic], GFP_ATOMIC);
+ nr_ioapic_registers[apic], GFP_KERNEL);
if (!ioapic_entries[apic])
goto nomem;
}
@@ -1310,7 +1211,6 @@ void __setup_vector_irq(int cpu)
/* Initialize vector_irq on a new cpu */
int irq, vector;
struct irq_cfg *cfg;
- struct irq_desc *desc;
/*
* vector_lock will make sure that we don't run into irq vector
@@ -1319,9 +1219,10 @@ void __setup_vector_irq(int cpu)
*/
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
- for_each_irq_desc(irq, desc) {
- cfg = desc->chip_data;
-
+ for_each_active_irq(irq) {
+ cfg = get_irq_chip_data(irq);
+ if (!cfg)
+ continue;
/*
* If it is a legacy IRQ handled by the legacy PIC, this cpu
* will be part of the irq_cfg's domain.
@@ -1378,17 +1279,17 @@ static inline int IO_APIC_irq_trigger(in
}
#endif
-static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
+static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
{
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
trigger == IOAPIC_LEVEL)
- desc->status |= IRQ_LEVEL;
+ irq_set_status_flags(irq, IRQ_LEVEL);
else
- desc->status &= ~IRQ_LEVEL;
+ irq_clear_status_flags(irq, IRQ_LEVEL);
- if (irq_remapped(irq)) {
- desc->status |= IRQ_MOVE_PCNTXT;
+ if (irq_remapped(get_irq_chip_data(irq))) {
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
if (trigger)
set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
handle_fasteoi_irq,
@@ -1410,13 +1311,13 @@ static void ioapic_register_intr(int irq
}
#else /* !CONFIG_XEN */
#define __clear_irq_vector(irq, cfg) ((void)0)
-#define ioapic_register_intr(irq, desc, trigger) evtchn_register_pirq(irq)
+#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
#endif
-int setup_ioapic_entry(int apic_id, int irq,
- struct IO_APIC_route_entry *entry,
- unsigned int destination, int trigger,
- int polarity, int vector, int pin)
+static int setup_ioapic_entry(int apic_id, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector, int pin)
{
/*
* add it to the IO-APIC irq-routing table:
@@ -1438,21 +1339,7 @@ int setup_ioapic_entry(int apic_id, int
if (index < 0)
panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
- memset(&irte, 0, sizeof(irte));
-
- irte.present = 1;
- irte.dst_mode = apic->irq_dest_mode;
- /*
- * Trigger mode in the IRTE will always be edge, and the
- * actual level or edge trigger will be setup in the IO-APIC
- * RTE. This will help simplify level triggered irq migration.
- * For more details, see the comments above explainig IO-APIC
- * irq migration in the presence of interrupt-remapping.
- */
- irte.trigger_mode = 0;
- irte.dlvry_mode = apic->irq_delivery_mode;
- irte.vector = vector;
- irte.dest_id = IRTE_DEST(destination);
+ prepare_irte(&irte, vector, destination);
/* Set source-id of interrupt request */
set_ioapic_sid(&irte, apic_id);
@@ -1489,18 +1376,14 @@ int setup_ioapic_entry(int apic_id, int
return 0;
}
-static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
- int trigger, int polarity)
+static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
+ struct irq_cfg *cfg, int trigger, int polarity)
{
- struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
unsigned int dest;
if (!IO_APIC_IRQ(irq))
return;
-
- cfg = desc->chip_data;
-
#ifndef CONFIG_XEN
/*
* For legacy irqs, cfg->domain starts with cpu 0 for legacy
@@ -1535,10 +1418,10 @@ static void setup_IO_APIC_irq(int apic_i
return;
}
- ioapic_register_intr(irq, desc, trigger);
+ ioapic_register_intr(irq, trigger);
#ifndef CONFIG_XEN
if (irq < legacy_pic->nr_legacy_irqs)
- legacy_pic->chip->mask(irq);
+ legacy_pic->mask(irq);
#endif
ioapic_write_entry(apic_id, pin, entry);
@@ -1550,11 +1433,9 @@ static struct {
static void __init setup_IO_APIC_irqs(void)
{
- int apic_id, pin, idx, irq;
- int notcon = 0;
- struct irq_desc *desc;
+ int apic_id, pin, idx, irq, notcon = 0;
+ int node = cpu_to_node(0);
struct irq_cfg *cfg;
- int node = cpu_to_node(boot_cpu_id);
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1596,19 +1477,17 @@ static void __init setup_IO_APIC_irqs(vo
continue;
#endif
- desc = irq_to_desc_alloc_node(irq, node);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ cfg = alloc_irq_and_cfg_at(irq, node);
+ if (!cfg)
continue;
- }
- cfg = desc->chip_data;
+
add_pin_to_irq_node(cfg, node, apic_id, pin);
/*
* don't mark it in pin_programmed, so later acpi could
* set it correctly when irq < 16
*/
- setup_IO_APIC_irq(apic_id, pin, irq, desc,
- irq_trigger(idx), irq_polarity(idx));
+ setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
+ irq_polarity(idx));
}
if (notcon)
@@ -1623,9 +1502,7 @@ static void __init setup_IO_APIC_irqs(vo
*/
void setup_IO_APIC_irq_extra(u32 gsi)
{
- int apic_id = 0, pin, idx, irq;
- int node = cpu_to_node(boot_cpu_id);
- struct irq_desc *desc;
+ int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
struct irq_cfg *cfg;
/*
@@ -1645,18 +1522,15 @@ void setup_IO_APIC_irq_extra(u32 gsi)
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
return;
#endif
-#ifdef CONFIG_SPARSE_IRQ
- desc = irq_to_desc(irq);
- if (desc)
+
+ /* Only handle the non legacy irqs on secondary ioapics */
+ if (apic_id == 0 || irq < NR_IRQS_LEGACY)
return;
-#endif
- desc = irq_to_desc_alloc_node(irq, node);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+
+ cfg = alloc_irq_and_cfg_at(irq, node);
+ if (!cfg)
return;
- }
- cfg = desc->chip_data;
add_pin_to_irq_node(cfg, node, apic_id, pin);
if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
@@ -1666,7 +1540,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
}
set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
- setup_IO_APIC_irq(apic_id, pin, irq, desc,
+ setup_ioapic_irq(apic_id, pin, irq, cfg,
irq_trigger(idx), irq_polarity(idx));
}
@@ -1718,7 +1592,6 @@ __apicdebuginit(void) print_IO_APIC(void
union IO_APIC_reg_03 reg_03;
unsigned long flags;
struct irq_cfg *cfg;
- struct irq_desc *desc;
unsigned int irq;
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
@@ -1805,10 +1678,10 @@ __apicdebuginit(void) print_IO_APIC(void
}
}
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for_each_irq_desc(irq, desc) {
+ for_each_active_irq(irq) {
struct irq_pin_list *entry;
- cfg = desc->chip_data;
+ cfg = get_irq_chip_data(irq);
if (!cfg)
continue;
entry = cfg->irq_2_pin;
@@ -2315,29 +2188,26 @@ static int __init timer_irq_works(void)
* an edge even if it isn't on the 8259A...
*/
-static unsigned int startup_ioapic_irq(unsigned int irq)
+static unsigned int startup_ioapic_irq(struct irq_data *data)
{
- int was_pending = 0;
+ int was_pending = 0, irq = data->irq;
unsigned long flags;
- struct irq_cfg *cfg;
raw_spin_lock_irqsave(&ioapic_lock, flags);
if (irq < legacy_pic->nr_legacy_irqs) {
- legacy_pic->chip->mask(irq);
+ legacy_pic->mask(irq);
if (legacy_pic->irq_pending(irq))
was_pending = 1;
}
- cfg = irq_cfg(irq);
- __unmask_IO_APIC_irq(cfg);
+ __unmask_ioapic(data->chip_data);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
}
-static int ioapic_retrigger_irq(unsigned int irq)
+static int ioapic_retrigger_irq(struct irq_data *data)
{
-
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_cfg *cfg = data->chip_data;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
@@ -2388,7 +2258,7 @@ static void __target_IO_APIC_irq(unsigne
* With interrupt-remapping, destination information comes
* from interrupt-remapping table entry.
*/
- if (!irq_remapped(irq))
+ if (!irq_remapped(cfg))
io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
@@ -2398,65 +2268,46 @@ static void __target_IO_APIC_irq(unsigne
}
/*
- * Either sets desc->affinity to a valid value, and returns
+ * Either sets data->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves desc->affinity untouched.
+ * leaves data->affinity untouched.
*/
-unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
- unsigned int *dest_id)
+int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ unsigned int *dest_id)
{
- struct irq_cfg *cfg;
- unsigned int irq;
+ struct irq_cfg *cfg = data->chip_data;
if (!cpumask_intersects(mask, cpu_online_mask))
return -1;
- irq = desc->irq;
- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+ if (assign_irq_vector(data->irq, data->chip_data, mask))
return -1;
- cpumask_copy(desc->affinity, mask);
+ cpumask_copy(data->affinity, mask);
- *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+ *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
return 0;
}
static int
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_cfg *cfg;
+ unsigned int dest, irq = data->irq;
unsigned long flags;
- unsigned int dest;
- unsigned int irq;
- int ret = -1;
-
- irq = desc->irq;
- cfg = desc->chip_data;
+ int ret;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- ret = set_desc_affinity(desc, mask, &dest);
+ ret = __ioapic_set_affinity(data, mask, &dest);
if (!ret) {
/* Only the high 8 bits are valid. */
dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, cfg);
+ __target_IO_APIC_irq(irq, dest, data->chip_data);
}
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
return ret;
}
-static int
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
-
- return set_ioapic_affinity_irq_desc(desc, mask);
-}
-
#ifdef CONFIG_INTR_REMAP
/*
@@ -2471,24 +2322,21 @@ set_ioapic_affinity_irq(unsigned int irq
* the interrupt-remapping table entry.
*/
static int
-migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
struct irte irte;
- unsigned int dest;
- unsigned int irq;
- int ret = -1;
if (!cpumask_intersects(mask, cpu_online_mask))
- return ret;
+ return -EINVAL;
- irq = desc->irq;
if (get_irte(irq, &irte))
- return ret;
+ return -EBUSY;
- cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return ret;
+ return -EBUSY;
dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
@@ -2503,29 +2351,14 @@ migrate_ioapic_irq_desc(struct irq_desc
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
- cpumask_copy(desc->affinity, mask);
-
+ cpumask_copy(data->affinity, mask);
return 0;
}
-/*
- * Migrates the IRQ destination in the process context.
- */
-static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
- const struct cpumask *mask)
-{
- return migrate_ioapic_irq_desc(desc, mask);
-}
-static int set_ir_ioapic_affinity_irq(unsigned int irq,
- const struct cpumask *mask)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- return set_ir_ioapic_affinity_irq_desc(desc, mask);
-}
#else
-static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
- const struct cpumask *mask)
+static inline int
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
return 0;
}
@@ -2587,10 +2420,8 @@ unlock:
irq_exit();
}
-static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
{
- struct irq_desc *desc = *descp;
- struct irq_cfg *cfg = desc->chip_data;
unsigned me;
if (likely(!cfg->move_in_progress))
@@ -2602,31 +2433,28 @@ static void __irq_complete_move(struct i
send_cleanup_vector(cfg);
}
-static void irq_complete_move(struct irq_desc **descp)
+static void irq_complete_move(struct irq_cfg *cfg)
{
- __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+ __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
}
void irq_force_complete_move(int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
+ struct irq_cfg *cfg = get_irq_chip_data(irq);
if (!cfg)
return;
- __irq_complete_move(&desc, cfg->vector);
+ __irq_complete_move(cfg, cfg->vector);
}
#else
-static inline void irq_complete_move(struct irq_desc **descp) {}
+static inline void irq_complete_move(struct irq_cfg *cfg) { }
#endif
-static void ack_apic_edge(unsigned int irq)
+static void ack_apic_edge(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
- irq_complete_move(&desc);
- move_native_irq(irq);
+ irq_complete_move(data->chip_data);
+ move_native_irq(data->irq);
ack_APIC_irq();
}
@@ -2648,10 +2476,12 @@ atomic_t irq_mis_count;
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
+ unsigned long flags;
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, cfg->irq_2_pin) {
if (mp_ioapics[entry->apic].apicver >= 0x20) {
/*
@@ -2660,7 +2490,7 @@ static void __eoi_ioapic_irq(unsigned in
* intr-remapping table entry. Hence for the io-apic
* EOI we use the pin number.
*/
- if (irq_remapped(irq))
+ if (irq_remapped(cfg))
io_apic_eoi(entry->apic, entry->pin);
else
io_apic_eoi(entry->apic, cfg->vector);
@@ -2669,36 +2499,21 @@ static void __eoi_ioapic_irq(unsigned in
__unmask_and_level_IO_APIC_irq(entry);
}
}
-}
-
-static void eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void ack_apic_level(unsigned int irq)
+static void ack_apic_level(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = data->chip_data;
+ int i, do_unmask_irq = 0, irq = data->irq;
unsigned long v;
- int i;
- struct irq_cfg *cfg;
- int do_unmask_irq = 0;
- irq_complete_move(&desc);
+ irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
- mask_IO_APIC_irq_desc(desc);
+ mask_ioapic(cfg);
}
#endif
@@ -2734,7 +2549,6 @@ static void ack_apic_level(unsigned int
* we use the above logic (mask+edge followed by unmask+level) from
* Manfred Spraul to clear the remote IRR.
*/
- cfg = desc->chip_data;
i = cfg->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
@@ -2754,7 +2568,7 @@ static void ack_apic_level(unsigned int
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
- eoi_ioapic_irq(desc);
+ eoi_ioapic_irq(irq, cfg);
}
/* Now we can move and renable the irq */
@@ -2785,62 +2599,58 @@ static void ack_apic_level(unsigned int
* accurate and is causing problems then it is a hardware bug
* and you can go talk to the chipset vendor about it.
*/
- cfg = desc->chip_data;
if (!io_apic_level_ack_pending(cfg))
move_masked_irq(irq);
- unmask_IO_APIC_irq_desc(desc);
+ unmask_ioapic(cfg);
}
}
#ifdef CONFIG_INTR_REMAP
-static void ir_ack_apic_edge(unsigned int irq)
+static void ir_ack_apic_edge(struct irq_data *data)
{
ack_APIC_irq();
}
-static void ir_ack_apic_level(unsigned int irq)
+static void ir_ack_apic_level(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
ack_APIC_irq();
- eoi_ioapic_irq(desc);
+ eoi_ioapic_irq(data->irq, data->chip_data);
}
#endif /* CONFIG_INTR_REMAP */
static struct irq_chip ioapic_chip __read_mostly = {
- .name = "IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
- .ack = ack_apic_edge,
- .eoi = ack_apic_level,
+ .name = "IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+ .irq_mask = mask_ioapic_irq,
+ .irq_unmask = unmask_ioapic_irq,
+ .irq_ack = ack_apic_edge,
+ .irq_eoi = ack_apic_level,
#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity_irq,
+ .irq_set_affinity = ioapic_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
static struct irq_chip ir_ioapic_chip __read_mostly = {
- .name = "IR-IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
+ .name = "IR-IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+ .irq_mask = mask_ioapic_irq,
+ .irq_unmask = unmask_ioapic_irq,
#ifdef CONFIG_INTR_REMAP
- .ack = ir_ack_apic_edge,
- .eoi = ir_ack_apic_level,
+ .irq_ack = ir_ack_apic_edge,
+ .irq_eoi = ir_ack_apic_level,
#ifdef CONFIG_SMP
- .set_affinity = set_ir_ioapic_affinity_irq,
+ .irq_set_affinity = ir_ioapic_set_affinity,
#endif
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
#endif /* !CONFIG_XEN */
static inline void init_IO_APIC_traps(void)
{
- int irq;
- struct irq_desc *desc;
struct irq_cfg *cfg;
+ unsigned int irq;
/*
* NOTE! The local APIC isn't very good at handling
@@ -2853,12 +2663,12 @@ static inline void init_IO_APIC_traps(vo
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- for_each_irq_desc(irq, desc) {
+ for_each_active_irq(irq) {
#ifdef CONFIG_XEN
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
continue;
#endif
- cfg = desc->chip_data;
+ cfg = get_irq_chip_data(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
@@ -2869,7 +2679,7 @@ static inline void init_IO_APIC_traps(vo
legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
- desc->chip = &no_irq_chip;
+ set_irq_chip(irq, &no_irq_chip);
}
}
}
@@ -2879,7 +2689,7 @@ static inline void init_IO_APIC_traps(vo
* The local APIC irq-chip implementation:
*/
-static void mask_lapic_irq(unsigned int irq)
+static void mask_lapic_irq(struct irq_data *data)
{
unsigned long v;
@@ -2887,7 +2697,7 @@ static void mask_lapic_irq(unsigned int
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
}
-static void unmask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(struct irq_data *data)
{
unsigned long v;
@@ -2895,21 +2705,21 @@ static void unmask_lapic_irq(unsigned in
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
-static void ack_lapic_irq(unsigned int irq)
+static void ack_lapic_irq(struct irq_data *data)
{
ack_APIC_irq();
}
static struct irq_chip lapic_chip __read_mostly = {
.name = "local-APIC",
- .mask = mask_lapic_irq,
- .unmask = unmask_lapic_irq,
- .ack = ack_lapic_irq,
+ .irq_mask = mask_lapic_irq,
+ .irq_unmask = unmask_lapic_irq,
+ .irq_ack = ack_lapic_irq,
};
-static void lapic_register_intr(int irq, struct irq_desc *desc)
+static void lapic_register_intr(int irq)
{
- desc->status &= ~IRQ_LEVEL;
+ irq_clear_status_flags(irq, IRQ_LEVEL);
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
}
@@ -3012,9 +2822,8 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_desc *desc = irq_to_desc(0);
- struct irq_cfg *cfg = desc->chip_data;
- int node = cpu_to_node(boot_cpu_id);
+ struct irq_cfg *cfg = get_irq_chip_data(0);
+ int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
int no_pin1 = 0;
@@ -3024,7 +2833,7 @@ static inline void __init check_timer(vo
/*
* get/set the timer IRQ vector:
*/
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
assign_irq_vector(0, cfg, apic->target_cpus());
/*
@@ -3083,7 +2892,7 @@ static inline void __init check_timer(vo
add_pin_to_irq_node(cfg, node, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
} else {
- /* for edge trigger, setup_IO_APIC_irq already
+ /* for edge trigger, setup_ioapic_irq already
* leave it unmasked.
* so only need to unmask if it is level-trigger
* do we really have level trigger timer?
@@ -3091,12 +2900,12 @@ static inline void __init check_timer(vo
int idx;
idx = find_irq_entry(apic1, pin1, mp_INT);
if (idx != -1 && irq_trigger(idx))
- unmask_IO_APIC_irq_desc(desc);
+ unmask_ioapic(cfg);
}
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
}
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
@@ -3119,14 +2928,14 @@ static inline void __init check_timer(vo
*/
replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1;
if (nmi_watchdog == NMI_IO_APIC) {
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
setup_nmi();
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
}
goto out;
}
@@ -3134,7 +2943,7 @@ static inline void __init check_timer(vo
* Cleanup, just in case ...
*/
local_irq_disable();
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
}
@@ -3151,16 +2960,16 @@ static inline void __init check_timer(vo
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
- lapic_register_intr(0, desc);
+ lapic_register_intr(0);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
local_irq_disable();
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -3340,49 +3149,42 @@ device_initcall(ioapic_init_sysfs);
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int irq_want, int node)
+unsigned int create_irq_nr(unsigned int from, int node)
{
- /* Allocate an unused irq */
- unsigned int irq;
- unsigned int new;
+ struct irq_cfg *cfg;
unsigned long flags;
- struct irq_cfg *cfg_new = NULL;
- struct irq_desc *desc_new = NULL;
-
- irq = 0;
- if (irq_want < nr_irqs_gsi)
- irq_want = nr_irqs_gsi;
-
- raw_spin_lock_irqsave(&vector_lock, flags);
- for (new = irq_want; new < nr_irqs; new++) {
- desc_new = irq_to_desc_alloc_node(new, node);
- if (!desc_new) {
- printk(KERN_INFO "can not get irq_desc for %d\n", new);
- continue;
- }
- cfg_new = desc_new->chip_data;
-
- if (cfg_new->vector != 0)
- continue;
+ unsigned int ret = 0;
+ int irq;
- desc_new = move_irq_desc(desc_new, node);
- cfg_new = desc_new->chip_data;
+ if (from < nr_irqs_gsi)
+ from = nr_irqs_gsi;
- if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
- irq = new;
- break;
+ irq = alloc_irq_from(from, node);
+ if (irq < 0)
+ return 0;
+ cfg = alloc_irq_cfg(irq, node);
+ if (!cfg) {
+ free_irq_at(irq, NULL);
+ return 0;
}
- raw_spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0)
- dynamic_irq_init_keep_chip_data(irq);
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
+ ret = irq;
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
- return irq;
+ if (ret) {
+ set_irq_chip_data(irq, cfg);
+ irq_clear_status_flags(irq, IRQ_NOREQUEST);
+ } else {
+ free_irq_at(irq, cfg);
+ }
+ return ret;
}
int create_irq(void)
{
- int node = cpu_to_node(boot_cpu_id);
+ int node = cpu_to_node(0);
unsigned int irq_want;
int irq;
@@ -3397,14 +3199,17 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
+ struct irq_cfg *cfg = get_irq_chip_data(irq);
unsigned long flags;
- dynamic_irq_cleanup_keep_chip_data(irq);
+ irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
- free_irte(irq);
+ if (irq_remapped(cfg))
+ free_irte(irq);
raw_spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, get_irq_chip_data(irq));
+ __clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
+ free_irq_at(irq, cfg);
}
#endif /* !CONFIG_XEN */
@@ -3429,7 +3234,7 @@ static int msi_compose_msg(struct pci_de
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
- if (irq_remapped(irq)) {
+ if (irq_remapped(get_irq_chip_data(irq))) {
struct irte irte;
int ir_index;
u16 sub_handle;
@@ -3437,14 +3242,7 @@ static int msi_compose_msg(struct pci_de
ir_index = map_irq_to_irte_handle(irq, &sub_handle);
BUG_ON(ir_index == -1);
- memset (&irte, 0, sizeof(irte));
-
- irte.present = 1;
- irte.dst_mode = apic->irq_dest_mode;
- irte.trigger_mode = 0; /* edge */
- irte.dlvry_mode = apic->irq_delivery_mode;
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
+ prepare_irte(&irte, cfg->vector, dest);
/* Set source-id of interrupt request */
if (pdev)
@@ -3489,26 +3287,24 @@ static int msi_compose_msg(struct pci_de
}
#ifdef CONFIG_SMP
-static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
struct msi_msg msg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
- get_cached_msi_msg_desc(desc, &msg);
+ __get_cached_msi_msg(data->msi_desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- write_msi_msg_desc(desc, &msg);
+ __write_msi_msg(data->msi_desc, &msg);
return 0;
}
@@ -3518,17 +3314,17 @@ static int set_msi_irq_affinity(unsigned
* done in the process context using interrupt-remapping hardware.
*/
static int
-ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
- unsigned int dest;
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
struct irte irte;
if (get_irte(irq, &irte))
return -1;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
irte.vector = cfg->vector;
@@ -3558,27 +3354,27 @@ ir_set_msi_irq_affinity(unsigned int irq
* which implement the MSI or MSI-X Capability Structure.
*/
static struct irq_chip msi_chip = {
- .name = "PCI-MSI",
- .unmask = unmask_msi_irq,
- .mask = mask_msi_irq,
- .ack = ack_apic_edge,
+ .name = "PCI-MSI",
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = set_msi_irq_affinity,
+ .irq_set_affinity = msi_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
static struct irq_chip msi_ir_chip = {
- .name = "IR-PCI-MSI",
- .unmask = unmask_msi_irq,
- .mask = mask_msi_irq,
+ .name = "IR-PCI-MSI",
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
#ifdef CONFIG_INTR_REMAP
- .ack = ir_ack_apic_edge,
+ .irq_ack = ir_ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = ir_set_msi_irq_affinity,
+ .irq_set_affinity = ir_msi_set_affinity,
#endif
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
/*
@@ -3610,8 +3406,8 @@ static int msi_alloc_irte(struct pci_dev
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
- int ret;
struct msi_msg msg;
+ int ret;
ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
@@ -3620,12 +3416,8 @@ static int setup_msi_irq(struct pci_dev
set_irq_msi(irq, msidesc);
write_msi_msg(irq, &msg);
- if (irq_remapped(irq)) {
- struct irq_desc *desc = irq_to_desc(irq);
- /*
- * irq migration in process context
- */
- desc->status |= IRQ_MOVE_PCNTXT;
+ if (irq_remapped(get_irq_chip_data(irq))) {
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
} else
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
@@ -3635,15 +3427,12 @@ static int setup_msi_irq(struct pci_dev
return 0;
}
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- unsigned int irq;
- int ret, sub_handle;
+ int node, ret, sub_handle, index = 0;
+ unsigned int irq, irq_want;
struct msi_desc *msidesc;
- unsigned int irq_want;
struct intel_iommu *iommu = NULL;
- int index = 0;
- int node;
/* x86 doesn't support multiple MSI yet */
if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3696,31 +3485,31 @@ error:
return ret;
}
-void arch_teardown_msi_irq(unsigned int irq)
+void native_teardown_msi_irq(unsigned int irq)
{
destroy_irq(irq);
}
#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
#ifdef CONFIG_SMP
-static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
struct msi_msg msg;
- unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
dmar_msi_read(irq, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
dmar_msi_write(irq, &msg);
@@ -3730,14 +3519,14 @@ static int dmar_msi_set_affinity(unsigne
#endif /* CONFIG_SMP */
static struct irq_chip dmar_msi_type = {
- .name = "DMAR_MSI",
- .unmask = dmar_msi_unmask,
- .mask = dmar_msi_mask,
- .ack = ack_apic_edge,
+ .name = "DMAR_MSI",
+ .irq_unmask = dmar_msi_unmask,
+ .irq_mask = dmar_msi_mask,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = dmar_msi_set_affinity,
+ .irq_set_affinity = dmar_msi_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
int arch_setup_dmar_msi(unsigned int irq)
@@ -3758,26 +3547,24 @@ int arch_setup_dmar_msi(unsigned int irq
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
struct msi_msg msg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
- hpet_msi_read(irq, &msg);
+ hpet_msi_read(data->handler_data, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- hpet_msi_write(irq, &msg);
+ hpet_msi_write(data->handler_data, &msg);
return 0;
}
@@ -3785,34 +3572,33 @@ static int hpet_msi_set_affinity(unsigne
#endif /* CONFIG_SMP */
static struct irq_chip ir_hpet_msi_type = {
- .name = "IR-HPET_MSI",
- .unmask = hpet_msi_unmask,
- .mask = hpet_msi_mask,
+ .name = "IR-HPET_MSI",
+ .irq_unmask = hpet_msi_unmask,
+ .irq_mask = hpet_msi_mask,
#ifdef CONFIG_INTR_REMAP
- .ack = ir_ack_apic_edge,
+ .irq_ack = ir_ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = ir_set_msi_irq_affinity,
+ .irq_set_affinity = ir_msi_set_affinity,
#endif
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
- .unmask = hpet_msi_unmask,
- .mask = hpet_msi_mask,
- .ack = ack_apic_edge,
+ .irq_unmask = hpet_msi_unmask,
+ .irq_mask = hpet_msi_mask,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = hpet_msi_set_affinity,
+ .irq_set_affinity = hpet_msi_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
- int ret;
struct msi_msg msg;
- struct irq_desc *desc = irq_to_desc(irq);
+ int ret;
if (intr_remapping_enabled) {
struct intel_iommu *iommu = map_hpet_to_ir(id);
@@ -3830,9 +3616,9 @@ int arch_setup_hpet_msi(unsigned int irq
if (ret < 0)
return ret;
- hpet_msi_write(irq, &msg);
- desc->status |= IRQ_MOVE_PCNTXT;
- if (irq_remapped(irq))
+ hpet_msi_write(get_irq_data(irq), &msg);
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+ if (irq_remapped(get_irq_chip_data(irq)))
set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
handle_edge_irq, "edge");
else
@@ -3865,33 +3651,30 @@ static void target_ht_irq(unsigned int i
write_ht_irq_msg(irq, &msg);
}
-static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
- target_ht_irq(irq, dest, cfg->vector);
-
+ target_ht_irq(data->irq, dest, cfg->vector);
return 0;
}
#endif
static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .mask = mask_ht_irq,
- .unmask = unmask_ht_irq,
- .ack = ack_apic_edge,
+ .name = "PCI-HT",
+ .irq_mask = mask_ht_irq,
+ .irq_unmask = unmask_ht_irq,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = set_ht_irq_affinity,
+ .irq_set_affinity = ht_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3965,6 +3748,11 @@ void __init probe_nr_irqs_gsi(void)
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
}
+int get_nr_irqs_gsi(void)
+{
+ return nr_irqs_gsi;
+}
+
#ifdef CONFIG_SPARSE_IRQ
int __init arch_probe_nr_irqs(void)
{
@@ -3983,7 +3771,7 @@ int __init arch_probe_nr_irqs(void)
if (nr < nr_irqs)
nr_irqs = nr;
- return 0;
+ return NR_IRQS_LEGACY;
}
#endif
#endif /* CONFIG_XEN */
@@ -3991,7 +3779,6 @@ int __init arch_probe_nr_irqs(void)
static int __io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr)
{
- struct irq_desc *desc;
struct irq_cfg *cfg;
int node;
int ioapic, pin;
@@ -4014,13 +3801,11 @@ static int __io_apic_set_pci_routing(str
if (dev)
node = dev_to_node(dev);
else
- node = cpu_to_node(boot_cpu_id);
+ node = cpu_to_node(0);
- desc = irq_to_desc_alloc_node(irq, node);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ cfg = alloc_irq_and_cfg_at(irq, node);
+ if (!cfg)
return 0;
- }
pin = irq_attr->ioapic_pin;
trigger = irq_attr->trigger;
@@ -4030,15 +3815,14 @@ static int __io_apic_set_pci_routing(str
* IRQs < 16 are already in the irq_2_pin[] map
*/
if (irq >= legacy_pic->nr_legacy_irqs) {
- cfg = desc->chip_data;
- if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+ if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
printk(KERN_INFO "can not add pin %d for irq %d\n",
pin, irq);
return 0;
}
}
- setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+ setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
return 0;
}
@@ -4234,14 +4018,14 @@ void __init setup_ioapic_dest(void)
*/
if (desc->status &
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->affinity;
+ mask = desc->irq_data.affinity;
else
mask = apic->target_cpus();
if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq_desc(desc, mask);
+ ir_ioapic_set_affinity(&desc->irq_data, mask, false);
else
- set_ioapic_affinity_irq_desc(desc, mask);
+ ioapic_set_affinity(&desc->irq_data, mask, false);
}
}
@@ -4429,20 +4213,19 @@ void __init mp_register_ioapic(int id, u
void __init pre_init_apic_IRQ0(void)
{
struct irq_cfg *cfg;
- struct irq_desc *desc;
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
#endif
- desc = irq_to_desc_alloc_node(0, 0);
+ /* Make sure the irq descriptor is set up */
+ cfg = alloc_irq_and_cfg_at(0, 0);
setup_local_APIC();
- cfg = irq_cfg(0);
add_pin_to_irq_node(cfg, 0, 0, 0);
set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
- setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
+ setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
}
#endif
--- head.orig/arch/x86/kernel/cpu/common-xen.c 2012-08-01 12:03:22.000000000 +0200
+++ head/arch/x86/kernel/cpu/common-xen.c 2012-08-01 12:03:41.000000000 +0200
@@ -714,7 +714,7 @@ static void __init early_identify_cpu(st
this_cpu->c_early_init(c);
#ifdef CONFIG_SMP
- c->cpu_index = boot_cpu_id;
+ c->cpu_index = 0;
#endif
filter_cpuid_features(c, false);
}
@@ -753,16 +753,21 @@ void __init early_cpu_init(void)
}
/*
- * The NOPL instruction is supposed to exist on all CPUs with
- * family >= 6; unfortunately, that's not true in practice because
- * of early VIA chips and (more importantly) broken virtualizers that
- * are not easy to detect. In the latter case it doesn't even *fail*
- * reliably, so probing for it doesn't even work. Disable it completely
+ * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
+ * unfortunately, that's not true in practice because of early VIA
+ * chips and (more importantly) broken virtualizers that are not easy
+ * to detect. In the latter case it doesn't even *fail* reliably, so
+ * probing for it doesn't even work. Disable it completely on 32-bit
* unless we can find a reliable way to detect all the broken cases.
+ * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
*/
static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_X86_32
clear_cpu_cap(c, X86_FEATURE_NOPL);
+#else
+ set_cpu_cap(c, X86_FEATURE_NOPL);
+#endif
}
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -1376,13 +1381,6 @@ void __cpuinit cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- /*
- * Force FPU initialization:
- */
- current_thread_info()->status = 0;
- clear_used_math();
- mxcsr_feature_mask_init();
-
fpu_init();
xsave_init();
}
--- head.orig/arch/x86/kernel/e820-xen.c 2011-09-23 15:56:09.000000000 +0200
+++ head/arch/x86/kernel/e820-xen.c 2011-09-23 15:58:51.000000000 +0200
@@ -15,6 +15,7 @@
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/firmware-map.h>
+#include <linux/memblock.h>
#include <asm/e820.h>
#include <asm/proto.h>
@@ -39,7 +40,7 @@ struct e820map e820;
#if !defined(CONFIG_XEN)
struct e820map e820_saved;
#elif defined(CONFIG_XEN_PRIVILEGED_GUEST)
-static struct e820map machine_e820;
+struct e820map machine_e820;
# define e820_saved machine_e820
#else
# define machine_e820 e820
@@ -793,75 +794,9 @@ core_initcall(e820_mark_nvs_memory);
#endif
#endif
-/*
- * Find a free area with specified alignment in a specific range.
- */
-u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 addr;
- u64 ei_start, ei_last;
-
- if (ei->type != E820_RAM)
- continue;
-
- ei_last = ei->addr + ei->size;
- ei_start = ei->addr;
- addr = find_early_area(ei_start, ei_last, start, end,
- size, align);
-
- if (addr != -1ULL)
- return addr;
- }
- return -1ULL;
-}
-
-u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
-{
- return find_e820_area(start, end, size, align);
-}
-
-u64 __init get_max_mapped(void)
-{
- u64 end = max_pfn_mapped;
-
- end <<= PAGE_SHIFT;
-
- return end;
-}
-/*
- * Find next free range after *start
- */
-u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 addr;
- u64 ei_start, ei_last;
-
- if (ei->type != E820_RAM)
- continue;
-
- ei_last = ei->addr + ei->size;
- ei_start = ei->addr;
- addr = find_early_area_size(ei_start, ei_last, start,
- sizep, align);
-
- if (addr != -1ULL)
- return addr;
- }
-
- return -1ULL;
-}
-
#ifndef CONFIG_XEN_UNPRIVILEGED_GUEST
/*
- * pre allocated 4k and reserved it in e820
+ * pre allocated 4k and reserved it in memblock and e820_saved
*/
u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
{
@@ -880,8 +815,8 @@ u64 __init early_reserve_e820(u64 startt
align = PAGE_SIZE;
#endif
for (start = startt; ; start += size) {
- start = find_e820_area_size(start, &size, align);
- if (!(start + 1))
+ start = memblock_x86_find_in_range_size(start, &size, align);
+ if (start == MEMBLOCK_ERROR)
return 0;
if (size >= sizet)
break;
@@ -927,10 +862,9 @@ u64 __init early_reserve_e820(u64 startt
if (rc)
return 0;
#endif
- e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
+ memblock_x86_reserve_range(addr, addr + sizet, "new next");
e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
- printk(KERN_INFO "update e820 for early_reserve_e820\n");
- update_e820();
+ printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
update_e820_saved();
return addr;
@@ -993,83 +927,6 @@ unsigned long __init e820_end_of_low_ram
{
return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
}
-/*
- * Finds an active region in the address range from start_pfn to last_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
- */
-int __init e820_find_active_region(const struct e820entry *ei,
- unsigned long start_pfn,
- unsigned long last_pfn,
- unsigned long *ei_startpfn,
- unsigned long *ei_endpfn)
-{
- u64 align = PAGE_SIZE;
-
-#ifdef CONFIG_XEN
- if (last_pfn > xen_start_info->nr_pages)
- last_pfn = xen_start_info->nr_pages;
-#endif
-
- *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
- *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
-
- /* Skip map entries smaller than a page */
- if (*ei_startpfn >= *ei_endpfn)
- return 0;
-
- /* Skip if map is outside the node */
- if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
- *ei_startpfn >= last_pfn)
- return 0;
-
- /* Check for overlaps */
- if (*ei_startpfn < start_pfn)
- *ei_startpfn = start_pfn;
- if (*ei_endpfn > last_pfn)
- *ei_endpfn = last_pfn;
-
- return 1;
-}
-
-/* Walk the e820 map and register active regions within a node */
-void __init e820_register_active_regions(int nid, unsigned long start_pfn,
- unsigned long last_pfn)
-{
- unsigned long ei_startpfn;
- unsigned long ei_endpfn;
- int i;
-
- for (i = 0; i < e820.nr_map; i++)
- if (e820_find_active_region(&e820.map[i],
- start_pfn, last_pfn,
- &ei_startpfn, &ei_endpfn))
- add_active_range(nid, ei_startpfn, ei_endpfn);
-#ifdef CONFIG_XEN
- BUG_ON(nid);
- add_active_range(nid, last_pfn, last_pfn);
-#endif
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-u64 __init e820_hole_size(u64 start, u64 end)
-{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long last_pfn = end >> PAGE_SHIFT;
- unsigned long ei_startpfn, ei_endpfn, ram = 0;
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- if (e820_find_active_region(&e820.map[i],
- start_pfn, last_pfn,
- &ei_startpfn, &ei_endpfn))
- ram += ei_endpfn - ei_startpfn;
- }
- return end - start - ((u64)ram << PAGE_SHIFT);
-}
static void early_panic(char *msg)
{
@@ -1350,3 +1207,48 @@ void __init setup_memory_map(void)
printk(KERN_INFO "Xen-provided physical RAM map:\n");
_e820_print_map(&e820, who);
}
+
+void __init memblock_x86_fill(void)
+{
+ int i;
+ u64 end;
+
+ /*
+ * EFI may have more than 128 entries
+ * We are safe to enable resizing, beause memblock_x86_fill()
+ * is rather later for x86
+ */
+ memblock_can_resize = 1;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ end = ei->addr + ei->size;
+ if (end != (resource_size_t)end)
+ continue;
+
+ if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ continue;
+
+ memblock_add(ei->addr, ei->size);
+ }
+
+ memblock_analyze();
+ memblock_dump_all();
+}
+
+void __init memblock_find_dma_reserve(void)
+{
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+ u64 free_size_pfn;
+ u64 mem_size_pfn;
+ /*
+ * need to find out used area below MAX_DMA_PFN
+ * need to use memblock to get free size in [0, MAX_DMA_PFN]
+ * at first, and assume boot_mem will not take below MAX_DMA_PFN
+ */
+ mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ set_dma_reserve(mem_size_pfn - free_size_pfn);
+#endif
+}
--- head.orig/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -13,6 +13,7 @@
#include <asm/setup.h>
#include <asm/pci-direct.h>
#include <asm/fixmap.h>
+#include <asm/mrst.h>
#include <asm/pgtable.h>
#include <linux/usb/ehci_def.h>
@@ -271,6 +272,18 @@ static int __init setup_early_printk(cha
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
+#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+ if (!strncmp(buf, "mrst", 4)) {
+ mrst_early_console_init();
+ early_console_register(&early_mrst_console, keep);
+ }
+
+ if (!strncmp(buf, "hsu", 3)) {
+ hsu_early_console_init();
+ early_console_register(&early_hsu_console, keep);
+ }
+
+#endif
buf++;
}
return 0;
--- head.orig/arch/x86/kernel/entry_32-xen.S 2013-01-30 11:51:38.000000000 +0100
+++ head/arch/x86/kernel/entry_32-xen.S 2013-01-30 11:53:28.000000000 +0100
@@ -119,8 +119,7 @@ NMI_MASK = 0x80000000
/* unfortunately push/pop can't be no-op */
.macro PUSH_GS
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
.endm
.macro POP_GS pop=0
addl $(4 + \pop), %esp
@@ -144,14 +143,12 @@ NMI_MASK = 0x80000000
#else /* CONFIG_X86_32_LAZY_GS */
.macro PUSH_GS
- pushl %gs
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %gs
/*CFI_REL_OFFSET gs, 0*/
.endm
.macro POP_GS pop=0
-98: popl %gs
- CFI_ADJUST_CFA_OFFSET -4
+98: popl_cfi %gs
/*CFI_RESTORE gs*/
.if \pop <> 0
add $\pop, %esp
@@ -199,35 +196,25 @@ NMI_MASK = 0x80000000
.macro SAVE_ALL
cld
PUSH_GS
- pushl %fs
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %fs
/*CFI_REL_OFFSET fs, 0;*/
- pushl %es
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %es
/*CFI_REL_OFFSET es, 0;*/
- pushl %ds
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ds
/*CFI_REL_OFFSET ds, 0;*/
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
CFI_REL_OFFSET eax, 0
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET ebp, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx, 0
- pushl %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl $(__USER_DS), %edx
movl %edx, %ds
@@ -238,39 +225,29 @@ NMI_MASK = 0x80000000
.endm
.macro RESTORE_INT_REGS
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
CFI_RESTORE ecx
- popl %edx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
CFI_RESTORE edx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ebp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebp
CFI_RESTORE ebp
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
CFI_RESTORE eax
.endm
.macro RESTORE_REGS pop=0
RESTORE_INT_REGS
-1: popl %ds
- CFI_ADJUST_CFA_OFFSET -4
+1: popl_cfi %ds
/*CFI_RESTORE ds;*/
-2: popl %es
- CFI_ADJUST_CFA_OFFSET -4
+2: popl_cfi %es
/*CFI_RESTORE es;*/
-3: popl %fs
- CFI_ADJUST_CFA_OFFSET -4
+3: popl_cfi %fs
/*CFI_RESTORE fs;*/
POP_GS \pop
.pushsection .fixup, "ax"
@@ -324,16 +301,12 @@ NMI_MASK = 0x80000000
ENTRY(ret_from_fork)
CFI_STARTPROC
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
call schedule_tail
GET_THREAD_INFO(%ebp)
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
- pushl $0x0202 # Reset kernel eflags
- CFI_ADJUST_CFA_OFFSET 4
- popfl
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
+ pushl_cfi $0x0202 # Reset kernel eflags
+ popfl_cfi
jmp syscall_exit
CFI_ENDPROC
END(ret_from_fork)
@@ -413,29 +386,23 @@ sysenter_past_esp:
* enough kernel state to call TRACE_IRQS_OFF can be called - but
* we immediately enable interrupts at that point anyway.
*/
- pushl $(__USER_DS)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__USER_DS
/*CFI_REL_OFFSET ss, 0*/
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET esp, 0
- pushfl
+ pushfl_cfi
orl $X86_EFLAGS_IF, (%esp)
- CFI_ADJUST_CFA_OFFSET 4
- pushl $(__USER_CS)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__USER_CS
/*CFI_REL_OFFSET cs, 0*/
/*
* Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
SAVE_ALL
ENABLE_INTERRUPTS(CLBR_NONE)
@@ -490,8 +457,7 @@ sysenter_audit:
movl %eax,%edx /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
call audit_syscall_entry
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
movl PT_EAX(%esp),%eax /* reload syscall number */
jmp sysenter_do_call
@@ -535,8 +501,7 @@ ENTRY(ia32pv_sysenter_target)
addl $4,%esp
CFI_ADJUST_CFA_OFFSET -4
/* +5*4 is SS:ESP,EFLAGS,CS:EIP. +8 is esp0 setting. */
- pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
/*
* Load the potential sixth argument from user stack.
* Careful about security.
@@ -559,8 +524,7 @@ ENDPROC(ia32pv_sysenter_target)
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
- pushl %eax # save orig_eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax # save orig_eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
@@ -610,7 +574,6 @@ restore_nocheck:
jnz restore_all_enable_events # != 0 => enable event delivery
#endif
RESTORE_REGS 4 # skip orig_eax/error_code
- CFI_ADJUST_CFA_OFFSET -4
irq_return:
INTERRUPT_RETURN
.section .fixup,"ax"
@@ -664,10 +627,8 @@ ldt_ss:
shr $16, %edx
mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
- pushl $__ESPFIX_SS
- CFI_ADJUST_CFA_OFFSET 4
- push %eax /* new kernel esp */
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__ESPFIX_SS
+ pushl_cfi %eax /* new kernel esp */
/* Disable interrupts, but do not irqtrace this section: we
* will soon execute iret and the tracer was already set to
* the irqstate after the iret */
@@ -736,11 +697,9 @@ work_notifysig: # deal with pending s
ALIGN
work_notifysig_v86:
- pushl %ecx # save ti_flags for do_notify_resume
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
- popl %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
movl %eax, %esp
#else
movl %esp, %eax
@@ -820,14 +779,18 @@ ptregs_##name: \
#define PTREGSCALL3(name) \
ALIGN; \
ptregs_##name: \
+ CFI_STARTPROC; \
leal 4(%esp),%eax; \
- pushl %eax; \
+ pushl_cfi %eax; \
movl PT_EDX(%eax),%ecx; \
movl PT_ECX(%eax),%edx; \
movl PT_EBX(%eax),%eax; \
call sys_##name; \
addl $4,%esp; \
- ret
+ CFI_ADJUST_CFA_OFFSET -4; \
+ ret; \
+ CFI_ENDPROC; \
+ENDPROC(ptregs_##name)
PTREGSCALL1(iopl)
PTREGSCALL0(fork)
@@ -842,15 +805,19 @@ PTREGSCALL1(vm86old)
/* Clone is an oddball. The 4th arg is in %edi */
ALIGN;
ptregs_clone:
+ CFI_STARTPROC
leal 4(%esp),%eax
- pushl %eax
- pushl PT_EDI(%eax)
+ pushl_cfi %eax
+ pushl_cfi PT_EDI(%eax)
movl PT_EDX(%eax),%ecx
movl PT_ECX(%eax),%edx
movl PT_EBX(%eax),%eax
call sys_clone
addl $8,%esp
+ CFI_ADJUST_CFA_OFFSET -8
ret
+ CFI_ENDPROC
+ENDPROC(ptregs_clone)
#ifndef CONFIG_XEN
.macro FIXUP_ESPFIX_STACK
@@ -866,10 +833,8 @@ ptregs_clone:
mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
shl $16, %eax
addl %esp, %eax /* the adjusted stack pointer */
- pushl $__KERNEL_DS
- CFI_ADJUST_CFA_OFFSET 4
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__KERNEL_DS
+ pushl_cfi %eax
lss (%esp), %esp /* switch to the normal stack segment */
CFI_ADJUST_CFA_OFFSET -8
.endm
@@ -906,8 +871,7 @@ vector=FIRST_EXTERNAL_VECTOR
.if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
.endif
-1: pushl $(~vector+0x80) /* Note: always in signed byte range */
- CFI_ADJUST_CFA_OFFSET 4
+1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
@@ -947,8 +911,7 @@ ENDPROC(common_interrupt)
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
- pushl $~(nr); \
- CFI_ADJUST_CFA_OFFSET 4; \
+ pushl_cfi $~(nr); \
SAVE_ALL; \
TRACE_IRQS_OFF \
movl %esp,%eax; \
@@ -985,8 +948,7 @@ ENDPROC(name)
# so we can simply throw away the new one.
ENTRY(hypervisor_callback)
RING0_INT_FRAME
- pushl $-1
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1
SAVE_ALL
movl PT_CS(%esp),%ecx
movl PT_EIP(%esp),%eax
@@ -1006,8 +968,7 @@ ENTRY(hypervisor_callback)
addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame.
#endif
.Ldo_upcall:
- push %esp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esp
call evtchn_do_upcall
add $4,%esp
CFI_ADJUST_CFA_OFFSET -4
@@ -1081,8 +1042,7 @@ ENTRY(failsafe_callback)
leal 16(%esp),%esp
RING0_INT_FRAME
jnz iret_exc # EAX != 0 => Category 2 (Bad IRET)
- pushl $-1 # EAX == 0 => Category 1 (Bad segment)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1 # EAX == 0 => Category 1 (Bad segment)
SAVE_ALL
jmp ret_from_exception
.section .fixup,"ax"; \
@@ -1111,21 +1071,18 @@ ENTRY(failsafe_callback)
ENTRY(coprocessor_error)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_coprocessor_error
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_coprocessor_error
jmp error_code
CFI_ENDPROC
END(coprocessor_error)
ENTRY(simd_coprocessor_error)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
#ifdef CONFIG_X86_INVD_BUG
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661: pushl $do_general_protection
+661: pushl_cfi $do_general_protection
662:
.section .altinstructions,"a"
.balign 4
@@ -1140,19 +1097,16 @@ ENTRY(simd_coprocessor_error)
664:
.previous
#else
- pushl $do_simd_coprocessor_error
+ pushl_cfi $do_simd_coprocessor_error
#endif
- CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
END(simd_coprocessor_error)
ENTRY(device_not_available)
RING0_INT_FRAME
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_device_not_available
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1 # mark this as an int
+ pushl_cfi $do_device_not_available
jmp error_code
CFI_ENDPROC
END(device_not_available)
@@ -1174,82 +1128,68 @@ END(native_irq_enable_sysexit)
ENTRY(overflow)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_overflow
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_overflow
jmp error_code
CFI_ENDPROC
END(overflow)
ENTRY(bounds)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_bounds
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_bounds
jmp error_code
CFI_ENDPROC
END(bounds)
ENTRY(invalid_op)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_invalid_op
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_invalid_op
jmp error_code
CFI_ENDPROC
END(invalid_op)
ENTRY(coprocessor_segment_overrun)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_coprocessor_segment_overrun
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_coprocessor_segment_overrun
jmp error_code
CFI_ENDPROC
END(coprocessor_segment_overrun)
ENTRY(invalid_TSS)
RING0_EC_FRAME
- pushl $do_invalid_TSS
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_invalid_TSS
jmp error_code
CFI_ENDPROC
END(invalid_TSS)
ENTRY(segment_not_present)
RING0_EC_FRAME
- pushl $do_segment_not_present
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_segment_not_present
jmp error_code
CFI_ENDPROC
END(segment_not_present)
ENTRY(stack_segment)
RING0_EC_FRAME
- pushl $do_stack_segment
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_stack_segment
jmp error_code
CFI_ENDPROC
END(stack_segment)
ENTRY(alignment_check)
RING0_EC_FRAME
- pushl $do_alignment_check
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_alignment_check
jmp error_code
CFI_ENDPROC
END(alignment_check)
ENTRY(divide_error)
RING0_INT_FRAME
- pushl $0 # no error code
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_divide_error
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0 # no error code
+ pushl_cfi $do_divide_error
jmp error_code
CFI_ENDPROC
END(divide_error)
@@ -1257,10 +1197,8 @@ END(divide_error)
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl machine_check_vector
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi machine_check_vector
jmp error_code
CFI_ENDPROC
END(machine_check)
@@ -1269,10 +1207,8 @@ END(machine_check)
#ifndef CONFIG_XEN
ENTRY(spurious_interrupt_bug)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_spurious_interrupt_bug
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_spurious_interrupt_bug
jmp error_code
CFI_ENDPROC
END(spurious_interrupt_bug)
@@ -1280,8 +1216,7 @@ END(spurious_interrupt_bug)
ENTRY(fixup_4gb_segment)
RING0_EC_FRAME
- pushl $do_fixup_4gb_segment
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_fixup_4gb_segment
jmp error_code
CFI_ENDPROC
END(fixup_4gb_segment)
@@ -1415,8 +1350,7 @@ ENTRY(ia32pv_cstar_target)
movl %ebp,%ecx
movl $__USER_CS,4(%esp)
movl 12(%esp),%ebp
- pushl %eax # save orig_eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax # save orig_eax
/*
* Load the potential sixth argument from user stack.
* Careful about security.
@@ -1550,40 +1484,29 @@ mask=0
ENTRY(page_fault)
RING0_EC_FRAME
- pushl $do_page_fault
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_page_fault
ALIGN
error_code:
/* the function address is in %gs's slot on the stack */
- pushl %fs
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %fs
/*CFI_REL_OFFSET fs, 0*/
- pushl %es
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %es
/*CFI_REL_OFFSET es, 0*/
- pushl %ds
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ds
/*CFI_REL_OFFSET ds, 0*/
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
CFI_REL_OFFSET eax, 0
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET ebp, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx, 0
- pushl %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
cld
movl $(__KERNEL_PERCPU), %ecx
@@ -1626,12 +1549,9 @@ END(page_fault)
movl TSS_sysenter_sp0 + \offset(%esp), %esp
CFI_DEF_CFA esp, 0
CFI_UNDEFINED eip
- pushfl
- CFI_ADJUST_CFA_OFFSET 4
- pushl $__KERNEL_CS
- CFI_ADJUST_CFA_OFFSET 4
- pushl $sysenter_past_esp
- CFI_ADJUST_CFA_OFFSET 4
+ pushfl_cfi
+ pushl_cfi $__KERNEL_CS
+ pushl_cfi $sysenter_past_esp
CFI_REL_OFFSET eip, 0
.endm
#endif /* CONFIG_XEN */
@@ -1644,8 +1564,7 @@ ENTRY(debug)
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
debug_stack_correct:
#endif /* !CONFIG_XEN */
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1 # mark this as an int
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # error code 0
@@ -1665,33 +1584,28 @@ END(debug)
*/
ENTRY(nmi)
RING0_INT_FRAME
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
#ifndef CONFIG_XEN
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
je nmi_espfix_stack
cmpl $ia32_sysenter_target,(%esp)
je nmi_stack_fixup
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
movl %esp,%eax
/* Do not access memory above the end of our stack page,
* it might not exist.
*/
andl $(THREAD_SIZE-1),%eax
cmpl $(THREAD_SIZE-20),%eax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
jae nmi_stack_correct
cmpl $ia32_sysenter_target,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
/* We have a RING0_INT_FRAME here */
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
@@ -1720,18 +1634,14 @@ nmi_espfix_stack:
*
* create the pointer to lss back
*/
- pushl %ss
- CFI_ADJUST_CFA_OFFSET 4
- pushl %esp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ss
+ pushl_cfi %esp
addl $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
- pushl 16(%esp)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi 16(%esp)
.endr
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
SAVE_ALL
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx,%edx # zero error code
@@ -1753,8 +1663,7 @@ END(nmi)
ENTRY(int3)
RING0_INT_FRAME
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1 # mark this as an int
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
@@ -1766,8 +1675,7 @@ END(int3)
ENTRY(general_protection)
RING0_EC_FRAME
- pushl $do_general_protection
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_general_protection
jmp error_code
CFI_ENDPROC
END(general_protection)
--- head.orig/arch/x86/kernel/entry_64-xen.S 2013-05-24 08:24:12.000000000 +0200
+++ head/arch/x86/kernel/entry_64-xen.S 2013-05-24 08:24:37.000000000 +0200
@@ -204,23 +204,17 @@ NMI_MASK = 0x80000000
.macro FAKE_STACK_FRAME child_rip
/* push in order ss, rsp, eflags, cs, rip */
xorl %eax, %eax
- pushq $__KERNEL_DS /* ss */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__KERNEL_DS /* ss */
/*CFI_REL_OFFSET ss,0*/
- pushq %rax /* rsp */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq $X86_EFLAGS_IF /* eflags - interrupts on */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
- pushq $__KERNEL_CS /* cs */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
- pushq \child_rip /* rip */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi \child_rip /* rip */
CFI_REL_OFFSET rip,0
- pushq %rax /* orig rax */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax /* orig rax */
.endm
.macro UNFAKE_STACK_FRAME
@@ -335,6 +329,7 @@ NMI_MASK = 0x80000000
#ifndef CONFIG_XEN
/* save partial stack frame */
+ .pushsection .kprobes.text, "ax"
ENTRY(save_args)
XCPT_FRAME
cld
@@ -374,6 +369,7 @@ ENTRY(save_args)
ret
CFI_ENDPROC
END(save_args)
+ .popsection
#endif
ENTRY(save_rest)
@@ -435,10 +431,8 @@ ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK,TI_flags(%r8)
- push kernel_eflags(%rip)
- CFI_ADJUST_CFA_OFFSET 8
- popf # reset kernel eflags
- CFI_ADJUST_CFA_OFFSET -8
+ pushq_cfi kernel_eflags(%rip)
+ popfq_cfi # reset kernel eflags
call schedule_tail # rdi: 'prev' task parameter
@@ -535,11 +529,9 @@ sysret_careful:
jnc sysret_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
jmp sysret_check
/* Handle a signal */
@@ -652,11 +644,9 @@ int_careful:
jnc int_very_careful
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
@@ -670,12 +660,10 @@ int_check_syscall_exit_work:
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
@@ -732,9 +720,8 @@ END(ptregscall_common)
ENTRY(stub_execve)
CFI_STARTPROC
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
+ addq $8, %rsp
+ PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
movq %rsp, %rcx
@@ -753,7 +740,7 @@ END(stub_execve)
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET -8
+ PARTIAL_FRAME 0
SAVE_REST
movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
@@ -795,11 +782,9 @@ retint_careful:
jnc retint_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
@@ -849,8 +834,8 @@ ENTRY(\sym)
movq 8(%rsp),%r11
CFI_RESTORE r11
movq $-1,8(%rsp) /* ORIG_RAX: no syscall to restart */
- subq $(15-1)*8,%rsp
- CFI_ADJUST_CFA_OFFSET (15-1)*8
+ subq $ORIG_RAX-R15-1*8,%rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15-1*8
call error_entry
DEFAULT_FRAME 0
movq %rsp,%rdi /* pt_regs pointer */
@@ -876,8 +861,8 @@ ENTRY(\sym)
CFI_RESTORE rcx
movq 8(%rsp),%r11
CFI_RESTORE r11
- subq $(15-2)*8,%rsp
- CFI_ADJUST_CFA_OFFSET (15-2)*8
+ subq $ORIG_RAX-R15-2*8,%rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15-2*8
call error_entry
DEFAULT_FRAME 0
movq %rsp,%rdi /* pt_regs pointer */
@@ -997,8 +982,7 @@ ENTRY(failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq $-1
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $-1
SAVE_ALL
jmp error_exit
CFI_ENDPROC
@@ -1066,8 +1050,7 @@ END(kernel_execve)
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
- push %rbp
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rbp
CFI_REL_OFFSET rbp,0
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
@@ -1076,6 +1059,7 @@ ENTRY(call_softirq)
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
+ CFI_RESTORE rbp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
decl PER_CPU_VAR(irq_count)
@@ -1114,7 +1098,7 @@ paranoidzeroentry machine_check *machine
/* ebx: no swapgs flag */
ENTRY(paranoid_exit)
- INTR_FRAME
+ DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %ebx,%ebx /* swapgs needed? */
@@ -1194,7 +1178,6 @@ error_sti:
#endif
TRACE_IRQS_OFF
ret
- CFI_ENDPROC
#ifndef CONFIG_XEN
/*
@@ -1221,6 +1204,7 @@ bstep_iret:
movq %rcx,RIP+8(%rsp)
jmp error_swapgs
#endif
+ CFI_ENDPROC
END(error_entry)
@@ -1261,11 +1245,9 @@ END(do_nmi_callback)
#ifndef CONFIG_IA32_EMULATION
ENTRY(ignore_sysret)
INTR_FRAME
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rcx
CFI_RESTORE rcx
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %r11
CFI_RESTORE r11
mov $-ENOSYS,%eax
# any non-zero value not having VGCF_in_syscall set will do:
--- head.orig/arch/x86/kernel/head-xen.c 2013-05-13 14:02:23.000000000 +0200
+++ head/arch/x86/kernel/head-xen.c 2013-04-05 09:12:57.000000000 +0200
@@ -1,5 +1,6 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
#include <asm/setup.h>
@@ -53,7 +54,7 @@ void __init reserve_ebda_region(void)
lowmem = 0x9f000;
/* reserve all memory between lowmem and the 1MB mark */
- reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
+ memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
}
#else /* CONFIG_XEN */
#include <linux/module.h>
@@ -105,10 +106,11 @@ void __init xen_start_kernel(void)
WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_writable_pagetables));
- reserve_early(PAGE_ALIGN(__pa_symbol(&_end)),
- __pa(xen_start_info->pt_base)
- + PFN_PHYS(xen_start_info->nr_pt_frames),
- "Xen provided");
+ memblock_init();
+ memblock_x86_reserve_range(PAGE_ALIGN(__pa_symbol(&_end)),
+ __pa(xen_start_info->pt_base)
+ + PFN_PHYS(xen_start_info->nr_pt_frames),
+ "Xen provided");
x86_configure_nx();
--- head.orig/arch/x86/kernel/head32-xen.c 2011-05-09 11:41:42.000000000 +0200
+++ head/arch/x86/kernel/head32-xen.c 2011-05-09 11:42:39.000000000 +0200
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/start_kernel.h>
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <asm/setup.h>
#include <asm/sections.h>
@@ -15,6 +16,7 @@
#include <asm/trampoline.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
+#include <asm/tlbflush.h>
static void __init i386_default_early_setup(void)
{
@@ -47,17 +49,18 @@ void __init i386_start_kernel(void)
BUG_ON(pte_index(hypervisor_virt_start));
#endif
+ memblock_init();
+
#ifdef CONFIG_X86_TRAMPOLINE
/*
* But first pinch a few for the stack/trampoline stuff
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
- reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
- "EX TRAMPOLINE");
+ memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
#endif
- reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifndef CONFIG_XEN
#ifdef CONFIG_BLK_DEV_INITRD
@@ -67,7 +70,7 @@ void __init i386_start_kernel(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+ memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
--- head.orig/arch/x86/kernel/head64-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head/arch/x86/kernel/head64-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -15,6 +15,7 @@
#include <linux/percpu.h>
#include <linux/start_kernel.h>
#include <linux/io.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -119,7 +120,9 @@ void __init x86_64_start_reservations(ch
{
copy_bootdata(__va(real_mode_data));
- reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ memblock_init();
+
+ memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
/*
* At this point everything still needed from the boot loader
--- head.orig/arch/x86/kernel/irq-xen.c 2013-05-24 10:36:37.000000000 +0200
+++ head/arch/x86/kernel/irq-xen.c 2013-05-24 10:37:09.000000000 +0200
@@ -71,10 +71,10 @@ static int show_other_interrupts(struct
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance monitoring interrupts\n");
- seq_printf(p, "%*s: ", prec, "PND");
+ seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
- seq_printf(p, " Performance pending work\n");
+ seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+ seq_printf(p, " IRQ work interrupts\n");
#endif
#ifndef CONFIG_XEN
if (x86_platform_ipi_callback) {
@@ -172,7 +172,7 @@ int show_interrupts(struct seq_file *p,
seq_printf(p, "%*d: ", prec, i);
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
- seq_printf(p, " %8s", desc->chip->name);
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
seq_printf(p, "-%-8s", desc->name);
if (action) {
@@ -198,7 +198,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
- sum += irq_stats(cpu)->apic_pending_irqs;
+ sum += irq_stats(cpu)->apic_irq_work_irqs;
#endif
#ifndef CONFIG_XEN
if (x86_platform_ipi_callback)
@@ -303,6 +303,7 @@ void fixup_irqs(void)
unsigned int irq;
static int warned;
struct irq_desc *desc;
+ struct irq_data *data;
static DECLARE_BITMAP(irqs_used, NR_IRQS);
for_each_irq_desc(irq, desc) {
@@ -318,7 +319,8 @@ void fixup_irqs(void)
/* interrupt's are disabled at this point */
raw_spin_lock(&desc->lock);
- affinity = desc->affinity;
+ data = &desc->irq_data;
+ affinity = data->affinity;
if (!irq_has_action(irq) ||
cpumask_subset(affinity, cpu_online_mask)) {
raw_spin_unlock(&desc->lock);
@@ -333,16 +335,16 @@ void fixup_irqs(void)
affinity = cpu_all_mask;
}
- if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
- desc->chip->mask(irq);
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
+ data->chip->irq_mask(data);
- if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, affinity);
- else if (desc->chip != &no_irq_chip && !(warned++))
+ if (data->chip->irq_set_affinity)
+ data->chip->irq_set_affinity(data, affinity, true);
+ else if (data->chip != &no_irq_chip && !(warned++))
set_affinity = 0;
- if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
- desc->chip->unmask(irq);
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
+ data->chip->irq_unmask(data);
raw_spin_unlock(&desc->lock);
@@ -368,9 +370,10 @@ void fixup_irqs(void)
continue;
if (xen_test_irq_pending(irq)) {
+ data = irq_get_irq_data(irq);
raw_spin_lock(&desc->lock);
- if (desc->chip->retrigger)
- desc->chip->retrigger(irq);
+ if (data->chip->irq_retrigger)
+ data->chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
}
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head/arch/x86/kernel/irq_work-xen.c 2011-02-03 11:19:35.000000000 +0100
@@ -0,0 +1,23 @@
+/*
+ * x86/Xen specific code for irq_work
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/ipi.h>
+
+#ifdef CONFIG_SMP
+irqreturn_t smp_irq_work_interrupt(int irq, void *dev_id)
+{
+ inc_irq_stat(apic_irq_work_irqs);
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+void arch_irq_work_raise(void)
+{
+ xen_send_IPI_self(IRQ_WORK_VECTOR);
+}
+#endif
--- head.orig/arch/x86/kernel/cpu/microcode/core-xen.c 2011-12-01 15:26:48.000000000 +0100
+++ head/arch/x86/kernel/cpu/microcode/core-xen.c 2011-12-01 15:28:13.000000000 +0100
@@ -12,7 +12,7 @@
* Software Developer's Manual
* Order Number 253668 or free download from:
*
- * http://developer.intel.com/design/pentium4/manuals/253668.htm
+ * http://developer.intel.com/Assets/PDF/manual/253668.pdf
*
* For more information, go to http://www.urbanmyth.org/microcode
*
@@ -117,6 +117,7 @@ static const struct file_operations micr
.owner = THIS_MODULE,
.write = microcode_write,
.open = microcode_open,
+ .llseek = no_llseek,
};
static struct miscdevice microcode_dev = {
--- head.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/bitops.h>
@@ -686,7 +687,7 @@ static void __init smp_reserve_memory(st
{
unsigned long size = get_mpc_size(mpf->physptr);
- reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
+ memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
}
#endif
@@ -719,7 +720,7 @@ static int __init smp_scan_config(unsign
mpf, (u64)virt_to_phys(mpf));
mem = virt_to_phys(mpf);
- reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
+ memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
if (mpf->physptr)
smp_reserve_memory(mpf);
#else
--- head.orig/arch/x86/kernel/pci-dma-xen.c 2012-04-04 14:10:53.000000000 +0200
+++ head/arch/x86/kernel/pci-dma-xen.c 2012-04-04 14:32:09.000000000 +0200
@@ -9,6 +9,7 @@
#include <asm/dma.h>
#include <asm/iommu.h>
#include <asm/x86_init.h>
+#include <asm/iommu_table.h>
static int forbid_dac __read_mostly;
@@ -42,6 +43,8 @@ int iommu_detected __read_mostly = 0;
int iommu_pass_through __read_mostly;
#endif
+extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
+
/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
@@ -140,7 +143,10 @@ static struct dma_map_ops swiotlb_dma_op
.dma_supported = swiotlb_dma_supported
};
-#define pci_xen_swiotlb_detect() 1
+static int __init pci_xen_swiotlb_detect(void)
+{
+ return 1;
+}
static void __init pci_xen_swiotlb_init(void)
{
@@ -151,26 +157,28 @@ static void __init pci_xen_swiotlb_init(
}
}
+IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, NULL, pci_xen_swiotlb_init, NULL);
+
void __init pci_iommu_alloc(void)
{
+ struct iommu_table_entry *p;
+
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
- if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
- goto out;
-
- gart_iommu_hole_init();
-
- detect_calgary();
+ sort_iommu_table(__iommu_table, __iommu_table_end);
+ check_iommu_entries(__iommu_table, __iommu_table_end);
- detect_intel_iommu();
-
- /* needs to be called after gart_iommu_hole_init */
- amd_iommu_detect();
-out:
- pci_xen_swiotlb_init();
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && p->detect && p->detect() > 0) {
+ p->flags |= IOMMU_DETECTED;
+ if (p->early_init)
+ p->early_init();
+ if (p->flags & IOMMU_FINISH_IF_DETECTED)
+ break;
+ }
+ }
}
-
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag)
{
@@ -375,6 +383,7 @@ EXPORT_SYMBOL(dma_supported);
static int __init pci_iommu_init(void)
{
+ struct iommu_table_entry *p;
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
@@ -382,14 +391,10 @@ static int __init pci_iommu_init(void)
#endif
x86_init.iommu.iommu_init();
-#ifndef CONFIG_XEN
- if (swiotlb || xen_swiotlb) {
- printk(KERN_INFO "PCI-DMA: "
- "Using software bounce buffering for IO (SWIOTLB)\n");
- swiotlb_print_info();
- } else
- swiotlb_free();
-#endif
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
+ p->late_init();
+ }
return 0;
}
--- head.orig/arch/x86/kernel/resource.c 2011-01-05 01:50:19.000000000 +0100
+++ head/arch/x86/kernel/resource.c 2011-09-23 14:48:43.000000000 +0200
@@ -1,3 +1,7 @@
+#ifdef CONFIG_XEN
+# define e820 machine_e820
+# include <asm/hypervisor.h>
+#endif
#include <linux/ioport.h>
#include <asm/e820.h>
@@ -37,6 +41,10 @@ static void remove_e820_regions(struct r
void arch_remove_reservations(struct resource *avail)
{
+#ifdef CONFIG_XEN
+ if (!is_initial_xendomain())
+ return;
+#endif
/*
* Trim out BIOS area (high 2MB) and E820 regions. We do not remove
* the low 1MB unconditionally, as this area is needed for some ISA
--- head.orig/arch/x86/kernel/setup-xen.c 2013-12-06 15:07:45.000000000 +0100
+++ head/arch/x86/kernel/setup-xen.c 2013-12-06 15:07:53.000000000 +0100
@@ -31,6 +31,7 @@
#include <linux/apm_bios.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/seq_file.h>
#include <linux/console.h>
#include <linux/mca.h>
@@ -83,7 +84,6 @@
#include <asm/dmi.h>
#include <asm/io_apic.h>
#include <asm/ist.h>
-#include <asm/vmi.h>
#include <asm/setup_arch.h>
#include <asm/bios_ebda.h>
#include <asm/cacheflush.h>
@@ -107,11 +107,12 @@
#include <asm/percpu.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
-#include <asm/k8.h>
+#include <asm/amd_nb.h>
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
#include <asm/mce.h>
+#include <asm/alternative.h>
#ifdef CONFIG_XEN
#include <asm/hypervisor.h>
@@ -156,7 +157,6 @@ unsigned long max_pfn_mapped;
RESERVE_BRK(dmi_alloc, 65536);
#endif
-unsigned int boot_cpu_id __read_mostly;
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
@@ -338,7 +338,7 @@ static inline void init_gbpages(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
- reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
+ memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -361,17 +361,16 @@ static void __init relocate_initrd(void)
char *p, *q;
/* We need to move the initrd down into lowmem */
- ramdisk_here = find_e820_area(0, end_of_lowmem, area_size,
+ ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
PAGE_SIZE);
- if (ramdisk_here == -1ULL)
+ if (ramdisk_here == MEMBLOCK_ERROR)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_early(ramdisk_here, ramdisk_here + area_size,
- "NEW RAMDISK");
+ memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -444,7 +443,7 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= (end_of_lowmem>>1)) {
- free_early(ramdisk_image, ramdisk_end);
+ memblock_x86_free_range(ramdisk_image, ramdisk_end);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
@@ -470,7 +469,7 @@ static void __init reserve_initrd(void)
relocate_initrd();
- free_early(ramdisk_image, ramdisk_end);
+ memblock_x86_free_range(ramdisk_image, ramdisk_end);
}
#else
static void __init reserve_initrd(void)
@@ -530,7 +529,7 @@ static void __init e820_reserve_setup_da
#endif
}
-static void __init reserve_early_setup_data(void)
+static void __init memblock_x86_reserve_range_setup_data(void)
{
#ifndef CONFIG_XEN
struct setup_data *data;
@@ -543,7 +542,7 @@ static void __init reserve_early_setup_d
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
sprintf(buf, "setup data %x", data->type);
- reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+ memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
pa_data = data->next;
early_iounmap(data, sizeof(*data));
}
@@ -565,6 +564,18 @@ static inline unsigned long long get_tot
return total << PAGE_SHIFT;
}
+/*
+ * Keep the crash kernel below this limit. On 32 bits earlier kernels
+ * would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
+ * limit once kexec-tools are fixed.
+ */
+#ifdef CONFIG_X86_32
+# define CRASH_KERNEL_ADDR_MAX (512 << 20)
+#else
+# define CRASH_KERNEL_ADDR_MAX (896 << 20)
+#endif
+
static void __init reserve_crashkernel(void)
{
unsigned long long total_mem;
@@ -582,23 +593,27 @@ static void __init reserve_crashkernel(v
if (crash_base <= 0) {
const unsigned long long alignment = 16<<20; /* 16M */
- crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
- alignment);
- if (crash_base == -1ULL) {
+ /*
+ * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+ */
+ crash_base = memblock_find_in_range(alignment,
+ CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
+
+ if (crash_base == MEMBLOCK_ERROR) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
unsigned long long start;
- start = find_e820_area(crash_base, ULONG_MAX, crash_size,
- 1<<20);
+ start = memblock_find_in_range(crash_base,
+ crash_base + crash_size, crash_size, 1<<20);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
- reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
+ memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -683,93 +698,27 @@ static __init void reserve_ibft_region(v
#ifndef CONFIG_XEN
if (size)
- reserve_early_overlap_ok(addr, addr + size, "ibft");
+ memblock_x86_reserve_range(addr, addr + size, "* ibft");
#endif
}
-#ifdef CONFIG_X86_RESERVE_LOW_64K
-static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE
- "%s detected: BIOS may corrupt low RAM, working around it.\n",
- d->ident);
-
- e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
- return 0;
-}
-#endif
-
-/* List of systems that have known low memory corruption BIOS problems */
-static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
-#ifdef CONFIG_X86_RESERVE_LOW_64K
- {
- .callback = dmi_low_memory_corruption,
- .ident = "AMI BIOS",
- .matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
- },
- },
- {
- .callback = dmi_low_memory_corruption,
- .ident = "Phoenix BIOS",
- .matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
- },
- },
- {
- .callback = dmi_low_memory_corruption,
- .ident = "Phoenix/MSC BIOS",
- .matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
- },
- },
- /*
- * AMI BIOS with low memory corruption was found on Intel DG45ID and
- * DG45FC boards.
- * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
- * match only DMI_BOARD_NAME and see if there is more bad products
- * with this vendor.
- */
- {
- .callback = dmi_low_memory_corruption,
- .ident = "AMI BIOS",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
- },
- },
- {
- .callback = dmi_low_memory_corruption,
- .ident = "AMI BIOS",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
- },
- },
- /*
- * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
- * match on the product name.
- */
- {
- .callback = dmi_low_memory_corruption,
- .ident = "Phoenix BIOS",
- .matches = {
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
- },
- },
-#endif
- {}
-};
-
#ifndef CONFIG_XEN
+static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+
static void __init trim_bios_range(void)
{
/*
* A special case is the first 4Kb of memory;
* This is a BIOS owned area, not kernel ram, but generally
* not listed as such in the E820 table.
+ *
+ * This typically reserves additional memory (64KiB by default)
+ * since some BIOSes are known to corrupt low memory. See the
+ * Kconfig help text for X86_RESERVE_LOW.
*/
- e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+ e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
+ E820_RAM, E820_RESERVED);
+
/*
* special case: Some BIOSen report the PC BIOS
* area (640->1Mb) as ram even though it is not.
@@ -778,8 +727,39 @@ static void __init trim_bios_range(void)
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
+
+static int __init parse_reservelow(char *p)
+{
+ unsigned long long size;
+
+ if (!p)
+ return -EINVAL;
+
+ size = memparse(p, &p);
+
+ if (size < 4096)
+ size = 4096;
+
+ if (size > 640*1024)
+ size = 640*1024;
+
+ reserve_low = size;
+
+ return 0;
+}
+
+early_param("reservelow", parse_reservelow);
#endif
+static u64 __init get_max_mapped(void)
+{
+ u64 end = max_pfn_mapped;
+
+ end <<= PAGE_SHIFT;
+
+ return end;
+}
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -797,6 +777,7 @@ void __init setup_arch(char **cmdline_p)
{
int acpi = 0;
int k8 = 0;
+ unsigned long flags;
#ifdef CONFIG_XEN
unsigned int i;
unsigned long p2m_pages;
@@ -819,14 +800,27 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
+
+#ifndef CONFIG_XEN
+ /*
+ * copy kernel address range established so far and switch
+ * to the proper swapper page table
+ */
+ clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ initial_page_table + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+#endif
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
- /* VMI may relocate the fixmap; do this before touching ioremap area */
- vmi_init();
-
- /* OFW also may relocate the fixmap */
+ /*
+ * If we have OLPC OFW, we might end up relocating the fixmap due to
+ * reserve_top(), so do this before touching the ioremap area.
+ */
olpc_ofw_detect();
early_trap_init();
@@ -872,7 +866,7 @@ void __init setup_arch(char **cmdline_p)
#endif
4)) {
efi_enabled = 1;
- efi_reserve_early();
+ efi_memblock_x86_reserve_range();
}
#endif
#else /* CONFIG_XEN */
@@ -900,6 +894,7 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
+ iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
/* update the e820_saved too */
@@ -952,11 +947,8 @@ void __init setup_arch(char **cmdline_p)
x86_report_nx();
- /* Must be before kernel pagetables are setup */
- vmi_activate();
-
/* after early param, so could get panic from serial */
- reserve_early_setup_data();
+ memblock_x86_reserve_range_setup_data();
if (acpi_mps_check()) {
#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
@@ -975,12 +967,9 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled)
efi_init();
- if (is_initial_xendomain()) {
+ if (is_initial_xendomain())
dmi_scan_machine();
- dmi_check_system(bad_bios_dmi_table);
- }
-
/*
* VMware detection requires dmi to be available, so this
* needs to be done after dmi_scan_machine, for the BP.
@@ -1015,8 +1004,6 @@ void __init setup_arch(char **cmdline_p)
*/
max_pfn = e820_end_of_ram_pfn();
- /* preallocate 4k for mptable mpc */
- early_reserve_e820_mpc_new();
/* update e820 for memory not covered by WB MTRRs */
mtrr_bp_init();
#ifndef CONFIG_XEN
@@ -1043,20 +1030,8 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = max_pfn;
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
-#ifndef CONFIG_XEN
- max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
-#endif
#endif
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
- setup_bios_corruption_check();
-#endif
-
- printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
- max_pfn_mapped<<PAGE_SHIFT);
-
- reserve_brk();
-
/*
* Find and reserve possible boot-time SMP configuration:
*/
@@ -1064,6 +1039,26 @@ void __init setup_arch(char **cmdline_p)
reserve_ibft_region();
+ /*
+ * Need to conclude brk, before memblock_x86_fill()
+ * it could use memblock_find_in_range, could overlap with
+ * brk area.
+ */
+ reserve_brk();
+
+ memblock.current_limit = get_max_mapped();
+ memblock_x86_fill();
+
+ /* preallocate 4k for mptable mpc */
+ early_reserve_e820_mpc_new();
+
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+ setup_bios_corruption_check();
+#endif
+
+ printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+ max_pfn_mapped<<PAGE_SHIFT);
+
reserve_trampoline_memory();
#ifdef CONFIG_ACPI_SLEEP
@@ -1087,6 +1082,7 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = max_pfn;
}
#endif
+ memblock.current_limit = get_max_mapped();
/*
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -1132,10 +1128,7 @@ void __init setup_arch(char **cmdline_p)
#endif
initmem_init(0, max_pfn, acpi, k8);
-#ifndef CONFIG_NO_BOOTMEM
- early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
-#endif
-
+ memblock_find_dma_reserve();
dma32_reserve_bootmem();
#ifdef CONFIG_KVM_CLOCK
@@ -1146,7 +1139,12 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
- setup_trampoline_page_table();
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
+ /* sync back kernel address range */
+ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+#endif
tboot_probe();
@@ -1292,6 +1290,10 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.banner();
mcheck_init();
+
+ local_irq_save(flags);
+ arch_init_ideal_nop5();
+ local_irq_restore(flags);
}
#ifdef CONFIG_X86_32
--- head.orig/arch/x86/kernel/smp-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/kernel/smp-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -143,10 +143,10 @@ irqreturn_t smp_reboot_interrupt(int irq
return IRQ_HANDLED;
}
-void xen_smp_send_stop(void)
+void xen_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
/*
* Use an own vector here because smp_call_function
@@ -160,9 +160,12 @@ void xen_smp_send_stop(void)
if (num_online_cpus() > 1) {
xen_send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
--- head.orig/arch/x86/kernel/traps-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/kernel/traps-xen.c 2013-11-07 11:51:33.000000000 +0100
@@ -568,6 +568,7 @@ dotraplinkage void __kprobes do_debug(st
if (regs->flags & X86_VM_MASK) {
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
+ preempt_conditional_cli(regs);
return;
}
@@ -773,21 +774,10 @@ asmlinkage void math_state_restore(void)
__math_state_restore();
}
-#ifndef CONFIG_MATH_EMULATION
-void math_emulate(struct math_emu_info *info)
-{
- printk(KERN_EMERG
- "math-emulation not enabled and no coprocessor found.\n");
- printk(KERN_EMERG "killing %s.\n", current->comm);
- force_sig(SIGFPE, current);
- schedule();
-}
-#endif /* CONFIG_MATH_EMULATION */
-
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
+#ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
@@ -795,12 +785,12 @@ do_device_not_available(struct pt_regs *
info.regs = regs;
math_emulate(&info);
- } else {
- math_state_restore(); /* interrupts still off */
- conditional_sti(regs);
+ return;
}
-#else
- math_state_restore();
+#endif
+ math_state_restore(); /* interrupts still off */
+#ifdef CONFIG_X86_32
+ conditional_sti(regs);
#endif
}
@@ -882,20 +872,6 @@ void __init trap_init(void)
if (ret)
printk("HYPERVISOR_set_trap_table failed (%d)\n", ret);
-#ifdef CONFIG_X86_32
- if (cpu_has_fxsr) {
- printk(KERN_INFO "Enabling fast FPU save and restore... ");
- set_in_cr4(X86_CR4_OSFXSR);
- printk("done.\n");
- }
- if (cpu_has_xmm) {
- printk(KERN_INFO
- "Enabling unmasked SIMD FPU exception support... ");
- set_in_cr4(X86_CR4_OSXMMEXCPT);
- printk("done.\n");
- }
-
-#endif
/*
* Should be a barrier for any external CPU state:
*/
--- head.orig/arch/x86/mm/fault-xen.c 2011-08-15 11:05:39.000000000 +0200
+++ head/arch/x86/mm/fault-xen.c 2011-08-15 11:05:47.000000000 +0200
@@ -11,6 +11,7 @@
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
+#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -161,15 +162,20 @@ is_prefetch(struct pt_regs *regs, unsign
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk)
+ struct task_struct *tsk, int fault)
{
+ unsigned lsb = 0;
siginfo_t info;
info.si_signo = si_signo;
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
- info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+ info.si_addr_lsb = lsb;
force_sig_info(si_signo, &info, tsk);
}
@@ -177,9 +183,6 @@ force_sig_info_fault(int si_signo, int s
DEFINE_SPINLOCK(pgd_lock);
LIST_HEAD(pgd_list);
-#define pgd_page_table(what, pg) \
- spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock)
-
#ifdef CONFIG_X86_32
static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
@@ -241,13 +244,16 @@ void vmalloc_sync_all(void)
spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
- pmd_t *pmd;
+ spinlock_t *pgt_lock;
+ pmd_t *ret;
+
+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
- pgd_page_table(lock, page);
- pmd = vmalloc_sync_one(page_address(page), address);
- pgd_page_table(unlock, page);
+ spin_lock(pgt_lock);
+ ret = vmalloc_sync_one(page_address(page), address);
+ spin_unlock(pgt_lock);
- if (!pmd)
+ if (!ret)
break;
}
spin_unlock_irqrestore(&pgd_lock, flags);
@@ -269,6 +275,8 @@ static noinline __kprobes int vmalloc_fa
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
+ WARN_ON_ONCE(in_nmi());
+
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
@@ -344,31 +352,7 @@ out:
void vmalloc_sync_all(void)
{
- unsigned long address;
-
- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
- address += PGDIR_SIZE) {
-
- const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
-
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- pgd_page_table(lock, page);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
- pgd_page_table(unlock, page);
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}
/*
@@ -389,6 +373,8 @@ static noinline __kprobes int vmalloc_fa
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
+ WARN_ON_ONCE(in_nmi());
+
/*
* Copy kernel mappings over when needed. This can also
* happen within a race in page table update. In the later
@@ -752,7 +738,7 @@ __bad_area_nosemaphore(struct pt_regs *r
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
return;
}
@@ -837,14 +823,14 @@ do_sigbus(struct pt_regs *regs, unsigned
tsk->thread.trap_no = 14;
#ifdef CONFIG_MEMORY_FAILURE
- if (fault & VM_FAULT_HWPOISON) {
+ if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
printk(KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
tsk->comm, tsk->pid, address);
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk);
+ force_sig_info_fault(SIGBUS, code, address, tsk, fault);
}
static noinline void
@@ -854,7 +840,8 @@ mm_fault_error(struct pt_regs *regs, uns
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+ VM_FAULT_HWPOISON_LARGE))
do_sigbus(regs, error_code, address, fault);
else
BUG();
@@ -915,8 +902,14 @@ spurious_fault(unsigned long error_code,
if (pmd_large(*pmd))
return spurious_fault_check(error_code, (pte_t *) pmd);
+ /*
+ * Note: don't use pte_present() here, since it returns true
+ * if the _PAGE_PROTNONE bit is set. However, this aliases the
+ * _PAGE_GLOBAL bit, which for kernel pages give false positives
+ * when CONFIG_DEBUG_PAGEALLOC is used.
+ */
pte = pte_offset_kernel(pmd, address);
- if (!pte_present(*pte))
+ if (!(pte_flags(*pte) & _PAGE_PRESENT))
return 0;
ret = spurious_fault_check(error_code, pte);
@@ -936,9 +929,9 @@ spurious_fault(unsigned long error_code,
int show_unhandled_signals = 1;
static inline int
-access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
+access_error(unsigned long error_code, struct vm_area_struct *vma)
{
- if (write) {
+ if (error_code & PF_WRITE) {
/* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
@@ -973,8 +966,10 @@ do_page_fault(struct pt_regs *regs, unsi
struct task_struct *tsk;
unsigned long address;
struct mm_struct *mm;
- int write;
int fault;
+ int write = error_code & PF_WRITE;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+ (write ? FAULT_FLAG_WRITE : 0);
/* Set the "privileged fault" bit to something sane. */
if (user_mode_vm(regs))
@@ -1102,6 +1097,7 @@ do_page_fault(struct pt_regs *regs, unsi
bad_area_nosemaphore(regs, error_code, address);
return;
}
+retry:
down_read(&mm->mmap_sem);
} else {
/*
@@ -1145,9 +1141,7 @@ do_page_fault(struct pt_regs *regs, unsi
* we can handle it..
*/
good_area:
- write = error_code & PF_WRITE;
-
- if (unlikely(access_error(error_code, write, vma))) {
+ if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address);
return;
}
@@ -1157,21 +1151,34 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault:
*/
- fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
+ fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
return;
}
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
- regs, address);
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ tsk->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ regs, address);
+ } else {
+ tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ goto retry;
+ }
}
check_v8086_mode(regs, address, tsk);
--- head.orig/arch/x86/mm/highmem_32-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/mm/highmem_32-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -9,6 +9,7 @@ void *kmap(struct page *page)
return page_address(page);
return kmap_high(page);
}
+EXPORT_SYMBOL(kmap);
void kunmap(struct page *page)
{
@@ -18,6 +19,7 @@ void kunmap(struct page *page)
return;
kunmap_high(page);
}
+EXPORT_SYMBOL(kunmap);
/*
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
@@ -27,10 +29,10 @@ void kunmap(struct page *page)
* However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
- enum fixed_addresses idx;
unsigned long vaddr;
+ int idx, type;
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable();
@@ -38,8 +40,7 @@ void *kmap_atomic_prot(struct page *page
if (!PageHighMem(page))
return page_address(page);
- debug_kmap_atomic(type);
-
+ type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
@@ -47,44 +48,57 @@ void *kmap_atomic_prot(struct page *page
return (void *)vaddr;
}
+EXPORT_SYMBOL(kmap_atomic_prot);
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
+{
+ return kmap_atomic_prot(page, kmap_prot);
+}
+EXPORT_SYMBOL(__kmap_atomic);
+
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
{
- return kmap_atomic_prot(page, type, kmap_prot);
+ return kmap_atomic_prot_pfn(pfn, kmap_prot);
}
+EXPORT_SYMBOL_GPL(kmap_atomic_pfn);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
- /*
- * Force other mappings to Oops if they'll try to access this pte
- * without first remap it. Keeping stale mappings around is a bad idea
- * also, in case the page changes cacheability attributes or becomes
- * a protected page in a hypervisor.
- */
- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+ vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+ int idx, type;
+
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+ WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+ /*
+ * Force other mappings to Oops if they'll try to access this
+ * pte without first remap it. Keeping stale mappings around
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
kpte_clear_flush(kmap_pte-idx, vaddr);
- else {
+ kmap_atomic_idx_pop();
+ }
#ifdef CONFIG_DEBUG_HIGHMEM
+ else {
BUG_ON(vaddr < PAGE_OFFSET);
BUG_ON(vaddr >= (unsigned long)high_memory);
-#endif
}
+#endif
pagefault_enable();
}
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
-{
- return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
-}
-EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
+EXPORT_SYMBOL(__kunmap_atomic);
struct page *kmap_atomic_to_page(void *ptr)
{
@@ -98,6 +112,7 @@ struct page *kmap_atomic_to_page(void *p
pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
return pte_page(*pte);
}
+EXPORT_SYMBOL(kmap_atomic_to_page);
void clear_highpage(struct page *page)
{
@@ -117,6 +132,7 @@ void clear_highpage(struct page *page)
clear_page(kaddr);
kunmap_atomic(kaddr, KM_USER0);
}
+EXPORT_SYMBOL(clear_highpage);
void copy_highpage(struct page *to, struct page *from)
{
@@ -143,14 +159,6 @@ void copy_highpage(struct page *to, stru
kunmap_atomic(vfrom, KM_USER0);
kunmap_atomic(vto, KM_USER1);
}
-
-EXPORT_SYMBOL(kmap);
-EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
-EXPORT_SYMBOL(kmap_atomic_prot);
-EXPORT_SYMBOL(kmap_atomic_to_page);
-EXPORT_SYMBOL(clear_highpage);
EXPORT_SYMBOL(copy_highpage);
void __init set_highmem_pages_init(void)
--- head.orig/arch/x86/mm/init-xen.c 2013-08-15 13:02:09.000000000 +0200
+++ head/arch/x86/mm/init-xen.c 2013-04-05 09:15:32.000000000 +0200
@@ -2,6 +2,7 @@
#include <linux/initrd.h>
#include <linux/ioport.h>
#include <linux/swap.h>
+#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <asm/cacheflush.h>
@@ -88,10 +89,10 @@ static void __init find_early_table_spac
e820_table_end = e820_table_start;
} else {
/*
- * [table_start, table_top) gets passed to reserve_early(),
- * so we must not use table_end here, despite continuing
- * to allocate from there. table_end possibly being below
- * table_start is otoh not a problem.
+ * [table_start, table_top) gets passed to
+ * memblock_x86_reserve_range(), so we must not use table_end
+ * here, despite continuing to allocate from there. table_end
+ * possibly being below table_start is otoh not a problem.
*/
e820_table_start = e820_table_top;
}
@@ -340,7 +341,7 @@ unsigned long __init_refok init_memory_m
__flush_tlb_all();
if (!after_bootmem && e820_table_top > e820_table_start)
- reserve_early(e820_table_start << PAGE_SHIFT,
+ memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
e820_table_top << PAGE_SHIFT, "PGTABLE");
if (!after_bootmem)
--- head.orig/arch/x86/mm/init_32-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/mm/init_32-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -25,6 +25,7 @@
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/proc_fs.h>
#include <linux/memory_hotplug.h>
#include <linux/initrd.h>
@@ -70,7 +71,7 @@ static __init void *alloc_low_page(void)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
- memset(adr, 0, PAGE_SIZE);
+ clear_page(adr);
return adr;
}
@@ -458,49 +459,28 @@ static void __init add_one_highpage_init
totalhigh_pages++;
}
-struct add_highpages_data {
- unsigned long start_pfn;
- unsigned long end_pfn;
-};
-
-static int __init add_highpages_work_fn(unsigned long start_pfn,
- unsigned long end_pfn, void *datax)
-{
- int node_pfn;
- struct page *page;
- unsigned long final_start_pfn, final_end_pfn;
- struct add_highpages_data *data;
-
- data = (struct add_highpages_data *)datax;
-
- final_start_pfn = max(start_pfn, data->start_pfn);
- final_end_pfn = min(end_pfn, data->end_pfn);
- if (final_start_pfn >= final_end_pfn)
- return 0;
-
- for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
- node_pfn++) {
- if (!pfn_valid(node_pfn))
- continue;
- page = pfn_to_page(node_pfn);
- add_one_highpage_init(page);
- }
-
- return 0;
-
-}
-
-void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
- unsigned long end_pfn)
+void __init add_highpages_with_active_regions(int nid,
+ unsigned long start_pfn, unsigned long end_pfn)
{
- struct add_highpages_data data;
+ struct range *range;
+ int nr_range;
+ int i;
- data.start_pfn = start_pfn;
- data.end_pfn = end_pfn;
+ nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
- work_with_active_regions(nid, add_highpages_work_fn, &data);
+ for (i = 0; i < nr_range; i++) {
+ struct page *page;
+ int node_pfn;
+
+ for (node_pfn = range[i].start; node_pfn < range[i].end;
+ node_pfn++) {
+ if (!pfn_valid(node_pfn))
+ continue;
+ page = pfn_to_page(node_pfn);
+ add_one_highpage_init(page);
+ }
+ }
}
-
#else
static inline void permanent_kmaps_init(pgd_t *pgd_base)
{
@@ -550,48 +530,6 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base);
}
-#if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_XEN)
-/*
- * ACPI suspend needs this for resume, because things like the intel-agp
- * driver might have split up a kernel 4MB mapping.
- */
-char swsusp_pg_dir[PAGE_SIZE]
- __attribute__ ((aligned(PAGE_SIZE)));
-
-static inline void save_pg_dir(void)
-{
- memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
-}
-#else /* !CONFIG_ACPI_SLEEP */
-static inline void save_pg_dir(void)
-{
-}
-#endif /* !CONFIG_ACPI_SLEEP */
-
-void zap_low_mappings(bool early)
-{
- int i;
-
- /*
- * Zap initial low-memory mappings.
- *
- * Note that "pgd_clear()" doesn't do it for
- * us, because pgd_clear() is a no-op on i386.
- */
- for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
-#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
- set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
-#else
- set_pgd(swapper_pg_dir+i, __pgd(0));
-#endif
- }
-
- if (early)
- __flush_tlb();
- else
- flush_tlb_all();
-}
-
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
EXPORT_SYMBOL_GPL(__supported_pte_mask);
@@ -714,14 +652,14 @@ void __init initmem_init(unsigned long s
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > max_low_pfn)
highstart_pfn = max_low_pfn;
- e820_register_active_regions(0, 0, highend_pfn);
+ memblock_x86_register_active_regions(0, 0, highend_pfn);
sparse_memory_present_with_active_regions(0);
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- e820_register_active_regions(0, 0, max_low_pfn);
+ memblock_x86_register_active_regions(0, 0, max_low_pfn);
sparse_memory_present_with_active_regions(0);
num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
@@ -752,75 +690,18 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
-#ifndef CONFIG_NO_BOOTMEM
-static unsigned long __init setup_node_bootmem(int nodeid,
- unsigned long start_pfn,
- unsigned long end_pfn,
- unsigned long bootmap)
-{
- unsigned long bootmap_size;
-
- /* don't touch min_low_pfn */
- bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
- bootmap >> PAGE_SHIFT,
- start_pfn, end_pfn);
- printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
- nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
- printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
- nodeid, bootmap, bootmap + bootmap_size);
- free_bootmem_with_active_regions(nodeid, end_pfn);
-
- return bootmap + bootmap_size;
-}
-#endif
-
void __init setup_bootmem_allocator(void)
{
-#ifndef CONFIG_NO_BOOTMEM
- int nodeid;
- unsigned long bootmap_size, bootmap;
- unsigned long end_xen_pfn = min(max_low_pfn, xen_start_info->nr_pages);
-
- /*
- * Initialize the boot-time allocator (with low memory only):
- */
- bootmap_size = bootmem_bootmap_pages(end_xen_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(0, min(max_pfn_mapped,
- xen_start_info->nr_pages)<<PAGE_SHIFT,
- bootmap_size, PAGE_SIZE);
- if (bootmap == -1L)
- panic("Cannot find bootmem map of size %ld\n", bootmap_size);
- reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
-#elif defined(CONFIG_XEN)
+#ifdef CONFIG_XEN
if (max_low_pfn > xen_start_info->nr_pages)
- reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
- max_low_pfn << PAGE_SHIFT, "BALLOON");
+ memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
+ max_low_pfn << PAGE_SHIFT, "BALLOON");
#endif
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
-#ifndef CONFIG_NO_BOOTMEM
- for_each_online_node(nodeid) {
- unsigned long start_pfn, end_pfn;
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- start_pfn = node_start_pfn[nodeid];
- end_pfn = node_end_pfn[nodeid];
- if (start_pfn > end_xen_pfn)
- continue;
- if (end_pfn > end_xen_pfn)
- end_pfn = end_xen_pfn;
-#else
- start_pfn = 0;
- end_pfn = end_xen_pfn;
-#endif
- bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
- bootmap);
- }
-#endif
-
after_bootmem = 1;
}
@@ -870,8 +751,8 @@ unsigned long __init extend_init_mapping
}
if (start_pfn > start)
- reserve_early(start << PAGE_SHIFT,
- start_pfn << PAGE_SHIFT, "INITMAP");
+ memblock_x86_reserve_range(start << PAGE_SHIFT,
+ start_pfn << PAGE_SHIFT, "INITMAP");
return start_pfn;
}
@@ -1026,9 +907,6 @@ void __init mem_init(void)
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
- save_pg_dir();
- zap_low_mappings(true);
-
SetPagePinned(virt_to_page(init_mm.pgd));
}
@@ -1139,8 +1017,3 @@ void mark_rodata_ro(void)
}
#endif
-int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
- int flags)
-{
- return reserve_bootmem(phys, len, flags);
-}
--- head.orig/arch/x86/mm/init_64-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/mm/init_64-xen.c 2013-04-05 09:14:46.000000000 +0200
@@ -24,6 +24,7 @@
#include <linux/initrd.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/proc_fs.h>
#include <linux/pci.h>
#include <linux/pfn.h>
@@ -54,7 +55,6 @@
#include <asm/cacheflush.h>
#include <asm/init.h>
#include <asm/setup.h>
-#include <linux/bootmem.h>
#include <xen/features.h>
@@ -164,6 +164,43 @@ static int __init nonx32_setup(char *str
__setup("noexec32=", nonx32_setup);
/*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+ unsigned long address;
+
+ for (address = start; address <= end; address += PGDIR_SIZE) {
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ spinlock_t *pgt_lock;
+
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+ spin_lock(pgt_lock);
+
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd)
+ != pgd_page_vaddr(*pgd_ref));
+
+ spin_unlock(pgt_lock);
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
*/
@@ -405,9 +442,9 @@ static inline int __meminit make_readonl
* page and descriptor tables embedded inside don't have writable
* mappings. Exclude the vsyscall area here, allowing alternative
* instruction patching to work. The range must be in sync with that
- * passed to reserve_early() (as "TEXT DATA BSS"), since all other
- * regions can be allocated from under CONFIG_NO_BOOTMEM and thus must
- * be writable.
+ * passed to memblock_x86_reserve_range() (as "TEXT DATA BSS"), since
+ * all other regions can be allocated from under CONFIG_NO_BOOTMEM and
+ * thus must be writable.
*/
if ((paddr >= __pa_symbol(&_text))
&& (paddr < (__pa_symbol(__bss_stop) & PAGE_MASK))
@@ -778,11 +815,13 @@ kernel_physical_mapping_init(unsigned lo
unsigned long end,
unsigned long page_size_mask)
{
-
+ bool pgd_changed = false;
unsigned long next, last_map_addr = end;
+ unsigned long addr;
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
+ addr = start;
for (; start < end; start = next) {
pgd_t *pgd = pgd_offset_k(start);
@@ -814,9 +853,13 @@ kernel_physical_mapping_init(unsigned lo
spin_lock(&init_mm.page_table_lock);
pgd_populate(&init_mm, pgd, __va(pud_phys));
spin_unlock(&init_mm.page_table_lock);
+ pgd_changed = true;
}
}
+ if (pgd_changed)
+ sync_global_pgds(addr, end);
+
return last_map_addr;
}
@@ -824,31 +867,11 @@ kernel_physical_mapping_init(unsigned lo
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{
-#ifndef CONFIG_NO_BOOTMEM
- unsigned long bootmap_size, bootmap;
-
- e820_register_active_regions(0, start_pfn, end_pfn);
-#ifdef CONFIG_XEN
- if (end_pfn > xen_start_info->nr_pages)
- end_pfn = xen_start_info->nr_pages;
-#endif
- bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
- PAGE_SIZE);
- if (bootmap == -1L)
- panic("Cannot find bootmem map of size %ld\n", bootmap_size);
- reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
- /* don't touch min_low_pfn */
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
- 0, end_pfn);
- free_bootmem_with_active_regions(0, end_pfn);
-#else
- e820_register_active_regions(0, start_pfn, end_pfn);
+ memblock_x86_register_active_regions(0, start_pfn, end_pfn);
#ifdef CONFIG_XEN
if (end_pfn > xen_start_info->nr_pages)
- reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT, "BALLOON");
-#endif
+ memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
+ end_pfn << PAGE_SHIFT, "BALLOON");
#endif
}
#endif
@@ -1068,54 +1091,6 @@ void mark_rodata_ro(void)
#endif
-int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
- int flags)
-{
-#ifdef CONFIG_NUMA
- int nid, next_nid;
- int ret;
-#endif
- unsigned long pfn = phys >> PAGE_SHIFT;
-
- if (pfn >= max_pfn) {
- /*
- * This can happen with kdump kernels when accessing
- * firmware tables:
- */
- if (pfn < max_pfn_mapped)
- return -EFAULT;
-
- printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
- phys, len);
- return -EFAULT;
- }
-
- /* Should check here against the e820 map to avoid double free */
-#ifdef CONFIG_NUMA
- nid = phys_to_nid(phys);
- next_nid = phys_to_nid(phys + len - 1);
- if (nid == next_nid)
- ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
- else
- ret = reserve_bootmem(phys, len, flags);
-
- if (ret != 0)
- return ret;
-
-#else
- reserve_bootmem(phys, len, flags);
-#endif
-
-#ifndef CONFIG_XEN
- if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
- dma_reserve += len / PAGE_SIZE;
- set_dma_reserve(dma_reserve);
- }
-#endif
-
- return 0;
-}
-
int kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1287,6 +1262,7 @@ vmemmap_populate(struct page *start_page
}
}
+ sync_global_pgds((unsigned long)start_page, end);
return 0;
}
--- head.orig/arch/x86/mm/iomap_32-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/mm/iomap_32-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -49,21 +49,20 @@ int iomap_create_wc(resource_size_t base
}
EXPORT_SYMBOL_GPL(iomap_create_wc);
-void
-iomap_free(resource_size_t base, unsigned long size)
+void iomap_free(resource_size_t base, unsigned long size)
{
io_free_memtype(base, base + size);
}
EXPORT_SYMBOL_GPL(iomap_free);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
{
- enum fixed_addresses idx;
unsigned long vaddr;
+ int idx, type;
pagefault_disable();
- debug_kmap_atomic(type);
+ type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
set_pte_at(&init_mm, vaddr, kmap_pte - idx, pfn_pte(pfn, prot));
@@ -73,10 +72,10 @@ void *kmap_atomic_prot_pfn(unsigned long
}
/*
- * Map 'mfn' using fixed map 'type' and protections 'prot'
+ * Map 'mfn' using protections 'prot'
*/
void __iomem *
-iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot)
+iomap_atomic_prot_pfn(unsigned long mfn, pgprot_t prot)
{
/*
* For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
@@ -88,24 +87,34 @@ iomap_atomic_prot_pfn(unsigned long mfn,
prot = PAGE_KERNEL_UC_MINUS;
pgprot_val(prot) |= _PAGE_IOMAP;
- return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, type, prot);
+ return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, prot);
}
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
void
-iounmap_atomic(void __iomem *kvaddr, enum km_type type)
+iounmap_atomic(void __iomem *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
- /*
- * Force other mappings to Oops if they'll try to access this pte
- * without first remap it. Keeping stale mappings around is a bad idea
- * also, in case the page changes cacheability attributes or becomes
- * a protected page in a hypervisor.
- */
- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+ vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+ int idx, type;
+
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+ WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+ /*
+ * Force other mappings to Oops if they'll try to access this
+ * pte without first remap it. Keeping stale mappings around
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
kpte_clear_flush(kmap_pte-idx, vaddr);
+ kmap_atomic_idx_pop();
+ }
pagefault_enable();
}
--- head.orig/arch/x86/mm/ioremap-xen.c 2012-11-26 14:22:41.000000000 +0100
+++ head/arch/x86/mm/ioremap-xen.c 2011-05-09 11:42:30.000000000 +0200
@@ -527,6 +527,11 @@ static inline pte_t * __init early_iorem
return &bm_pte[pte_index(addr)];
}
+bool __init is_early_ioremap_ptep(pte_t *ptep)
+{
+ return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
+}
+
static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
void __init early_ioremap_init(void)
--- head.orig/arch/x86/mm/pgtable-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head/arch/x86/mm/pgtable-xen.c 2013-12-10 11:38:00.000000000 +0100
@@ -429,7 +429,19 @@ static inline void pgd_list_del(pgd_t *p
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-static void pgd_ctor(pgd_t *pgd)
+
+static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+ BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
+ virt_to_page(pgd)->index = (pgoff_t)mm;
+}
+
+struct mm_struct *pgd_page_get_mm(struct page *page)
+{
+ return (struct mm_struct *)page->index;
+}
+
+static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
pgd_test_and_unpin(pgd);
@@ -442,10 +454,6 @@ static void pgd_ctor(pgd_t *pgd)
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
- paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
}
#ifdef CONFIG_X86_64
@@ -455,8 +463,10 @@ static void pgd_ctor(pgd_t *pgd)
#endif
/* list required to sync kernel mapping updates */
- if (!SHARED_KERNEL_PMD)
+ if (!SHARED_KERNEL_PMD) {
+ pgd_set_mm(pgd, mm);
pgd_list_add(pgd);
+ }
}
static void pgd_dtor(pgd_t *pgd)
@@ -664,12 +674,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
}
#endif
- pgd_ctor(pgd);
+ pgd_ctor(mm, pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
- /* Store a back link for vmalloc_sync_all(). */
- set_page_private(virt_to_page(pgd), (unsigned long)mm);
-
spin_unlock_irqrestore(&pgd_lock, flags);
return pgd;
--- head.orig/arch/x86/pci/pcifront.c 2011-02-01 14:50:44.000000000 +0100
+++ head/arch/x86/pci/pcifront.c 2011-02-01 15:09:47.000000000 +0100
@@ -16,7 +16,7 @@ static int pcifront_enable_irq(struct pc
{
u8 irq;
pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
- if (!irq_to_desc_alloc_node(irq, numa_node_id()))
+ if (!alloc_irq_and_cfg_at(irq, numa_node_id()))
return -ENOMEM;
evtchn_register_pirq(irq);
dev->irq = irq;
--- head.orig/arch/x86/xen/Kconfig 2013-01-08 11:58:39.000000000 +0100
+++ head/arch/x86/xen/Kconfig 2014-04-30 10:50:20.000000000 +0200
@@ -16,7 +16,7 @@ config PARAVIRT_XEN
config XEN_DOM0
def_bool y
- depends on XEN && PCI_XEN && SWIOTLB_XEN
+ depends on PARAVIRT_XEN && PCI_XEN && SWIOTLB_XEN
depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
config XEN_PVHVM
--- head.orig/drivers/oprofile/oprofile_files.c 2012-01-06 10:50:28.000000000 +0100
+++ head/drivers/oprofile/oprofile_files.c 2012-02-16 13:43:26.000000000 +0100
@@ -296,6 +296,7 @@ static const struct file_operations acti
.open = adomain_open,
.read = domain_read,
.write = domain_write,
+ .llseek = default_llseek,
};
static DEFINE_DOMAIN_DATA(passive);
@@ -310,6 +311,7 @@ static const struct file_operations pass
.open = pdomain_open,
.read = domain_read,
.write = domain_write,
+ .llseek = default_llseek,
};
#endif /* CONFIG_XEN */
--- head.orig/drivers/pci/Kconfig 2014-02-18 17:26:10.000000000 +0100
+++ head/drivers/pci/Kconfig 2013-09-26 13:00:18.000000000 +0200
@@ -69,9 +69,9 @@ config PCI_STUB
When in doubt, say N.
-config XEN_PCIDEV_FRONTEND
+config PARAVIRT_XEN_PCIDEV_FRONTEND
tristate "Xen PCI Frontend"
- depends on PCI && X86 && XEN
+ depends on PCI && X86 && PARAVIRT_XEN
select PCI_XEN
select XEN_XENBUS_FRONTEND
default y
@@ -79,6 +79,15 @@ config XEN_PCIDEV_FRONTEND
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
+config XEN_PCIDEV_FRONTEND
+ def_bool y
+ prompt "Xen PCI Frontend" if X86_64
+ depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64)
+ select HOTPLUG
+ help
+ The PCI device frontend driver allows the kernel to import arbitrary
+ PCI devices from a PCI backend to support PCI driver domains.
+
config HT_IRQ
bool "Interrupts on hypertransport devices"
default y
--- head.orig/drivers/pci/Makefile 2011-01-31 14:32:40.000000000 +0100
+++ head/drivers/pci/Makefile 2013-01-08 11:58:55.000000000 +0100
@@ -60,7 +60,7 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
obj-$(CONFIG_PCI_STUB) += pci-stub.o
-obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
+obj-$(CONFIG_PARAVIRT_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
obj-$(CONFIG_OF) += of.o
--- head.orig/drivers/xen/Kconfig 2014-01-30 10:17:56.000000000 +0100
+++ head/drivers/xen/Kconfig 2012-02-17 14:34:57.000000000 +0100
@@ -20,10 +20,6 @@ config XEN_PRIVILEGED_GUEST
config XEN_UNPRIVILEGED_GUEST
def_bool !XEN_PRIVILEGED_GUEST
select PM
- select PM_SLEEP
- select PM_SLEEP_SMP if SMP
- select PM_RUNTIME if PCI
- select PM_OPS if PCI
select SUSPEND
config XEN_PRIVCMD
--- head.orig/drivers/xen/Makefile 2012-10-04 12:54:07.000000000 +0200
+++ head/drivers/xen/Makefile 2012-10-04 13:05:34.000000000 +0200
@@ -1,6 +1,8 @@
obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o
+xen-biomerge-$(CONFIG_PARAVIRT_XEN) := biomerge.o
xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
+xen-evtchn-name-$(CONFIG_PARAVIRT_XEN) := xen-evtchn
xen-balloon-$(CONFIG_XEN) := balloon/
obj-$(CONFIG_XEN) += core/
@@ -9,6 +11,7 @@ obj-y += xenbus/
obj-$(CONFIG_XEN) += char/
xen-backend-$(CONFIG_XEN_BACKEND) := util.o
+xen-evtchn-name-$(CONFIG_XEN) := evtchn
nostackp := $(call cc-option, -fno-stack-protector)
ifeq ($(CONFIG_PARAVIRT_XEN),y)
@@ -16,17 +19,23 @@ CFLAGS_features.o := $(nostackp)
endif
priv-$(CONFIG_USB_SUPPORT) := dbgp.o
+priv-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_XEN) += features.o $(xen-backend-y) $(xen-backend-m)
obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += $(priv-y)
+obj-$(CONFIG_BLOCK) += $(xen-biomerge-y)
obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y)
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
-obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
+obj-$(CONFIG_XEN_DEV_EVTCHN) += $(xen-evtchn-name-y).o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
+obj-$(CONFIG_XEN_DOM0) += pci.o
+
+xen-evtchn-y := evtchn.o
+
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
obj-$(filter m,$(CONFIG_XEN_BLKDEV_TAP2)) += blktap2/ blktap2-new/
--- head.orig/drivers/xen/blkback/blkback.c 2013-06-20 15:24:06.000000000 +0200
+++ head/drivers/xen/blkback/blkback.c 2013-06-20 15:25:36.000000000 +0200
@@ -196,13 +196,17 @@ static void fast_flush_area(pending_req_
static void print_stats(blkif_t *blkif)
{
- printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d"
+ " | fl %4d\n",
current->comm, blkif->st_oo_req,
- blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
+ blkif->st_rd_req, blkif->st_wr_req,
+ blkif->st_br_req, blkif->st_fl_req);
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
blkif->st_rd_req = 0;
blkif->st_wr_req = 0;
blkif->st_oo_req = 0;
+ blkif->st_br_req = 0;
+ blkif->st_fl_req = 0;
}
int blkif_schedule(void *arg)
@@ -260,19 +264,43 @@ int blkif_schedule(void *arg)
return 0;
}
+static void drain_io(blkif_t *blkif)
+{
+ atomic_set(&blkif->drain, 1);
+ do {
+ /* The initial value is one, and one refcnt taken at the
+ * start of the blkif_schedule thread. */
+ if (atomic_read(&blkif->refcnt) <= 2)
+ break;
+
+ wait_for_completion_interruptible_timeout(
+ &blkif->drain_complete, HZ);
+
+ if (!atomic_read(&blkif->drain))
+ break;
+ } while (!kthread_should_stop());
+ atomic_set(&blkif->drain, 0);
+}
+
/******************************************************************
* COMPLETION CALLBACK -- Called as bh->b_end_io()
*/
static void __end_block_io_op(pending_req_t *pending_req, int error)
{
+ blkif_t *blkif = pending_req->blkif;
int status = BLKIF_RSP_OKAY;
/* An error fails the entire request. */
if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
(error == -EOPNOTSUPP)) {
DPRINTK("blkback: write barrier op failed, not supported\n");
- blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
+ blkback_barrier(XBT_NIL, blkif->be, 0);
+ status = BLKIF_RSP_EOPNOTSUPP;
+ } else if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
+ (error == -EOPNOTSUPP)) {
+ DPRINTK("blkback: flush diskcache op failed, not supported\n");
+ blkback_flush_diskcache(XBT_NIL, blkif->be, 0);
status = BLKIF_RSP_EOPNOTSUPP;
} else if (error) {
DPRINTK("Buffer not up-to-date at end of operation, "
@@ -282,10 +310,13 @@ static void __end_block_io_op(pending_re
if (atomic_dec_and_test(&pending_req->pendcnt)) {
fast_flush_area(pending_req);
- make_response(pending_req->blkif, pending_req->id,
+ make_response(blkif, pending_req->id,
pending_req->operation, status);
- blkif_put(pending_req->blkif);
free_req(pending_req);
+ if (atomic_read(&blkif->drain)
+ && atomic_read(&blkif->refcnt) <= 2)
+ complete(&blkif->drain_complete);
+ blkif_put(blkif);
}
}
@@ -366,6 +397,7 @@ static int _do_block_io_op(blkif_t *blki
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
pending_req = alloc_req();
if (!pending_req) {
blkif->st_oo_req++;
@@ -443,7 +475,11 @@ static void dispatch_rw_block_io(blkif_t
break;
case BLKIF_OP_WRITE_BARRIER:
blkif->st_br_req++;
- operation = WRITE_BARRIER;
+ operation = WRITE_FLUSH_FUA;
+ break;
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ blkif->st_fl_req++;
+ operation = WRITE_FLUSH;
break;
default:
operation = 0; /* make gcc happy */
@@ -452,7 +488,7 @@ static void dispatch_rw_block_io(blkif_t
/* Check that number of segments is sane. */
nseg = req->nr_segments;
- if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
+ if (unlikely(nseg == 0 && !(operation & REQ_FLUSH)) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
DPRINTK("Bad number of segments in request (%d)\n", nseg);
goto fail_response;
@@ -524,6 +560,12 @@ static void dispatch_rw_block_io(blkif_t
goto fail_flush;
}
+ /* Wait on all outstanding I/O's and once that has been completed
+ * issue the WRITE_FLUSH.
+ */
+ if (req->operation == BLKIF_OP_WRITE_BARRIER)
+ drain_io(blkif);
+
plug_queue(blkif, preq.bdev);
atomic_set(&pending_req->pendcnt, 1);
blkif_get(blkif);
@@ -560,7 +602,7 @@ static void dispatch_rw_block_io(blkif_t
}
if (!bio) {
- BUG_ON(operation != WRITE_BARRIER);
+ BUG_ON(!(operation & (REQ_FLUSH|REQ_FUA)));
bio = bio_alloc(GFP_KERNEL, 0);
if (unlikely(bio == NULL))
goto fail_put_bio;
--- head.orig/drivers/xen/blkback/common.h 2012-06-06 13:52:57.000000000 +0200
+++ head/drivers/xen/blkback/common.h 2013-06-20 15:25:38.000000000 +0200
@@ -48,6 +48,7 @@ struct vbd {
blkif_vdev_t handle; /* what the domain refers to this vbd as */
unsigned char readonly; /* Non-zero -> read-only */
unsigned char type; /* VDISK_xxx */
+ bool flush_support;
u32 pdevice; /* phys device that this vbd maps to */
struct block_device *bdev;
sector_t size; /* Cached size parameter */
@@ -74,6 +75,9 @@ typedef struct blkif_st {
atomic_t refcnt;
wait_queue_head_t wq;
+ /* for barrier (drain) requests */
+ struct completion drain_complete;
+ atomic_t drain;
struct task_struct *xenblkd;
unsigned int waiting_reqs;
struct request_queue *plug;
@@ -84,6 +88,7 @@ typedef struct blkif_st {
int st_wr_req;
int st_oo_req;
int st_br_req;
+ int st_fl_req;
int st_rd_sect;
int st_wr_sect;
@@ -141,5 +146,7 @@ int blkif_schedule(void *arg);
void blkback_barrier(struct xenbus_transaction, struct backend_info *,
int state);
+void blkback_flush_diskcache(struct xenbus_transaction,
+ struct backend_info *, int state);
#endif /* __BLKIF__BACKEND__COMMON_H__ */
--- head.orig/drivers/xen/blkback/interface.c 2013-06-20 15:18:15.000000000 +0200
+++ head/drivers/xen/blkback/interface.c 2013-06-20 15:25:40.000000000 +0200
@@ -49,6 +49,8 @@ blkif_t *blkif_alloc(domid_t domid)
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
init_waitqueue_head(&blkif->wq);
+ init_completion(&blkif->drain_complete);
+ atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
init_waitqueue_head(&blkif->waiting_to_free);
init_waitqueue_head(&blkif->shutdown_wq);
--- head.orig/drivers/xen/blkback/vbd.c 2011-02-01 14:50:44.000000000 +0100
+++ head/drivers/xen/blkback/vbd.c 2011-09-07 12:35:54.000000000 +0200
@@ -55,6 +55,7 @@ int vbd_create(blkif_t *blkif, blkif_vde
{
struct vbd *vbd;
struct block_device *bdev;
+ struct request_queue *q;
vbd = &blkif->vbd;
vbd->handle = handle;
@@ -88,6 +89,10 @@ int vbd_create(blkif_t *blkif, blkif_vde
if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
vbd->type |= VDISK_REMOVABLE;
+ q = bdev_get_queue(bdev);
+ if (q && q->flush_flags)
+ vbd->flush_support = true;
+
DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
handle, blkif->domid);
return 0;
--- head.orig/drivers/xen/blkback/xenbus.c 2013-05-31 13:17:26.000000000 +0200
+++ head/drivers/xen/blkback/xenbus.c 2012-12-18 12:08:43.000000000 +0100
@@ -119,6 +119,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
+VBD_SHOW(fl_req, "%d\n", be->blkif->st_fl_req);
VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
@@ -127,6 +128,7 @@ static struct attribute *vbdstat_attrs[]
&dev_attr_rd_req.attr,
&dev_attr_wr_req.attr,
&dev_attr_br_req.attr,
+ &dev_attr_fl_req.attr,
&dev_attr_rd_sect.attr,
&dev_attr_wr_sect.attr,
NULL
@@ -210,6 +212,17 @@ void blkback_barrier(struct xenbus_trans
xenbus_dev_error(dev, err, "writing feature-barrier");
}
+void blkback_flush_diskcache(struct xenbus_transaction xbt,
+ struct backend_info *be, int state)
+{
+ struct xenbus_device *dev = be->dev;
+ int err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
+ "%d", state);
+
+ if (err)
+ xenbus_dev_error(dev, err, "writing feature-flush-cache");
+}
+
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures, and watch the store waiting for the hotplug scripts to tell us
@@ -426,7 +439,8 @@ again:
return;
}
- blkback_barrier(xbt, be, 1);
+ blkback_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
+ blkback_barrier(xbt, be, be->blkif->vbd.flush_support);
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
vbd_size(&be->blkif->vbd));
--- head.orig/drivers/xen/blkfront/blkfront.c 2013-05-31 13:33:41.000000000 +0200
+++ head/drivers/xen/blkfront/blkfront.c 2013-05-31 13:36:10.000000000 +0200
@@ -337,7 +337,7 @@ static void connect(struct blkfront_info
{
unsigned long long sectors;
unsigned int binfo, sector_size, physical_sector_size;
- int err, barrier;
+ int err, barrier, flush;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
@@ -382,25 +382,41 @@ static void connect(struct blkfront_info
if (err <= 0)
physical_sector_size = sector_size;
+ info->feature_flush = 0;
+ info->flush_op = 0;
+
err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
"feature-barrier", "%d", &barrier);
/*
* If there's no "feature-barrier" defined, then it means
* we're dealing with a very old backend which writes
- * synchronously; draining will do what needs to get done.
- *
- * If there are barriers, then we can do full queued writes
- * with tagged barriers.
+ * synchronously; nothing to do.
*
- * If barriers are not supported, then there's no much we can
- * do, so just set ordering to NONE.
+ * If there are barriers, then we use flush.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ if (err > 0 && barrier) {
+ info->feature_flush = REQ_FLUSH | REQ_FUA;
+ info->flush_op = BLKIF_OP_WRITE_BARRIER;
+ }
+ /*
+ * And if there is "feature-flush-cache" use that above
+ * barriers.
*/
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "feature-flush-cache", "%d", &flush);
+ if (err > 0 && flush) {
+ info->feature_flush = REQ_FLUSH;
+ info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
+ }
+#else
if (err <= 0)
- info->feature_barrier = QUEUE_ORDERED_DRAIN;
+ info->feature_flush = QUEUE_ORDERED_DRAIN;
else if (barrier)
- info->feature_barrier = QUEUE_ORDERED_TAG;
+ info->feature_flush = QUEUE_ORDERED_TAG;
else
- info->feature_barrier = QUEUE_ORDERED_NONE;
+ info->feature_flush = QUEUE_ORDERED_NONE;
+#endif
err = xlvbd_add(sectors, info->vdevice, binfo, sector_size,
physical_sector_size, info);
@@ -539,7 +555,7 @@ static inline int ADD_ID_TO_FREELIST(
if (!info->shadow[id].request)
return -ENXIO;
info->shadow[id].req.id = info->shadow_free;
- info->shadow[id].request = 0;
+ info->shadow[id].request = NULL;
info->shadow_free = id;
return 0;
}
@@ -762,14 +778,10 @@ int blkif_getgeo(struct block_device *bd
/*
- * blkif_queue_request
+ * Generate a Xen blkfront IO request from a blk layer request. Reads
+ * and writes are handled as expected.
*
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- * virtual address in the guest os.
+ * @req: a request struct
*/
static int blkif_queue_request(struct request *req)
{
@@ -798,7 +810,7 @@ static int blkif_queue_request(struct re
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST(info);
- info->shadow[id].request = (unsigned long)req;
+ info->shadow[id].request = req;
ring_req->id = id;
ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -806,8 +818,12 @@ static int blkif_queue_request(struct re
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+#else
if (req->cmd_flags & REQ_HARDBARRIER)
- ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+#endif
+ ring_req->operation = info->flush_op;
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -865,7 +881,9 @@ void do_blkif_request(struct request_que
blk_start_request(req);
- if (req->cmd_type != REQ_TYPE_FS) {
+ if ((req->cmd_type != REQ_TYPE_FS) ||
+ ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
+ !info->flush_op)) {
req->errors = (DID_ERROR << 16) |
(DRIVER_INVALID << 24);
__blk_end_request_all(req, -EIO);
@@ -931,7 +949,7 @@ static irqreturn_t blkif_int(int irq, vo
continue;
}
id = bret->id;
- req = (struct request *)info->shadow[id].request;
+ req = info->shadow[id].request;
blkif_completion(&info->shadow[id]);
@@ -945,14 +963,31 @@ static irqreturn_t blkif_int(int irq, vo
ret = bret->status == BLKIF_RSP_OKAY ? 0 : -EIO;
switch (bret->operation) {
+ const char *kind;
+
+ case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
- if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
- pr_warning("blkfront: %s: %s op failed\n",
- info->gd->disk_name,
- op_name(bret->operation));
+ kind = "";
+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP))
ret = -EOPNOTSUPP;
- info->feature_barrier = QUEUE_ORDERED_NONE;
- xlvbd_barrier(info);
+ if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+ info->shadow[id].req.nr_segments == 0)) {
+ kind = "empty ";
+ ret = -EOPNOTSUPP;
+ }
+ if (unlikely(ret)) {
+ if (ret == -EOPNOTSUPP) {
+ pr_warn("blkfront: %s: %s%s op failed\n",
+ info->gd->disk_name, kind,
+ op_name(bret->operation));
+ ret = 0;
+ }
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ info->feature_flush = 0;
+#else
+ info->feature_flush = QUEUE_ORDERED_NONE;
+#endif
+ xlvbd_flush(info);
}
/* fall through */
case BLKIF_OP_READ:
@@ -1044,7 +1079,7 @@ static int blkif_recover(struct blkfront
/* Stage 3: Find pending requests and requeue them. */
for (i = 0; i < BLK_RING_SIZE; i++) {
/* Not in use? */
- if (copy[i].request == 0)
+ if (!copy[i].request)
continue;
/* Grab a request slot and copy shadow state into it. */
@@ -1062,8 +1097,7 @@ static int blkif_recover(struct blkfront
req->seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->id].frame[j]),
- rq_data_dir((struct request *)
- info->shadow[req->id].request) ?
+ rq_data_dir(info->shadow[req->id].request) ?
GTF_readonly : 0);
info->shadow[req->id].req = *req;
--- head.orig/drivers/xen/blkfront/block.h 2013-05-31 13:26:15.000000000 +0200
+++ head/drivers/xen/blkfront/block.h 2013-05-31 13:36:13.000000000 +0200
@@ -77,7 +77,7 @@ struct xlbd_major_info
struct blk_shadow {
blkif_request_t req;
- unsigned long request;
+ struct request *request;
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
@@ -107,7 +107,8 @@ struct blkfront_info
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free;
- int feature_barrier;
+ unsigned int feature_flush;
+ unsigned int flush_op;
int is_ready;
};
@@ -135,7 +136,7 @@ int xlvbd_add(blkif_sector_t capacity, i
unsigned int sector_size, unsigned int physical_sector_size,
struct blkfront_info *);
void xlvbd_del(struct blkfront_info *info);
-int xlvbd_barrier(struct blkfront_info *info);
+void xlvbd_flush(struct blkfront_info *info);
#ifdef CONFIG_SYSFS
int xlvbd_sysfs_addif(struct blkfront_info *info);
--- head.orig/drivers/xen/blkfront/vbd.c 2013-05-31 13:33:43.000000000 +0200
+++ head/drivers/xen/blkfront/vbd.c 2013-05-31 13:36:14.000000000 +0200
@@ -475,7 +475,7 @@ xlvbd_add(blkif_sector_t capacity, int v
info->gd = gd;
- xlvbd_barrier(info);
+ xlvbd_flush(info);
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -521,36 +521,38 @@ xlvbd_del(struct blkfront_info *info)
info->rq = NULL;
}
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
-int
-xlvbd_barrier(struct blkfront_info *info)
+void
+xlvbd_flush(struct blkfront_info *info)
{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ blk_queue_flush(info->rq, info->feature_flush);
+ pr_info("blkfront: %s: %s: %s\n",
+ info->gd->disk_name,
+ info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+ "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
+ "flush diskcache" : "barrier or flush"),
+ info->feature_flush ? "enabled" : "disabled");
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
int err;
const char *barrier;
- switch (info->feature_barrier) {
+ switch (info->feature_flush) {
case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break;
case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break;
case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
default: return -EINVAL;
}
- err = blk_queue_ordered(info->rq, info->feature_barrier);
+ err = blk_queue_ordered(info->rq, info->feature_flush);
if (err)
return err;
pr_info("blkfront: %s: barriers %s\n",
info->gd->disk_name, barrier);
- return 0;
-}
#else
-int
-xlvbd_barrier(struct blkfront_info *info)
-{
- if (info->feature_barrier)
+ if (info->feature_flush)
pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name);
- return -ENOSYS;
-}
#endif
+}
#ifdef CONFIG_SYSFS
static ssize_t show_media(struct device *dev,
--- head.orig/drivers/xen/blktap/blktap.c 2013-06-20 15:24:44.000000000 +0200
+++ head/drivers/xen/blktap/blktap.c 2012-05-23 13:38:37.000000000 +0200
@@ -445,6 +445,7 @@ static const struct file_operations blkt
.unlocked_ioctl = blktap_ioctl,
.open = blktap_open,
.release = blktap_release,
+ .llseek = no_llseek,
.mmap = blktap_mmap,
};
@@ -578,6 +579,8 @@ static int blktap_open(struct inode *ino
tap_blkif_t *info;
int i;
+ nonseekable_open(inode, filp);
+
/* ctrl device, treat differently */
if (!idx)
return 0;
--- head.orig/drivers/xen/blktap2/device.c 2012-02-16 13:40:30.000000000 +0100
+++ head/drivers/xen/blktap2/device.c 2012-02-16 13:43:41.000000000 +0100
@@ -824,7 +824,7 @@ blktap_device_run_queue(struct blktap *t
continue;
}
- if (req->cmd_flags & REQ_HARDBARRIER) {
+ if (req->cmd_flags & (REQ_FLUSH|REQ_FUA)) {
blk_start_request(req);
__blk_end_request_all(req, -EOPNOTSUPP);
continue;
--- head.orig/drivers/xen/blktap2-new/device.c 2012-02-16 16:38:13.000000000 +0100
+++ head/drivers/xen/blktap2-new/device.c 2012-02-16 16:38:28.000000000 +0100
@@ -306,9 +306,6 @@ blktap_device_configure(struct blktap *t
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
- /* We are reordering, but cacheless. */
- blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN);
-
spin_unlock_irq(&dev->lock);
}
--- head.orig/drivers/xen/core/Makefile 2012-02-17 14:28:52.000000000 +0100
+++ head/drivers/xen/core/Makefile 2012-02-17 14:35:00.000000000 +0100
@@ -4,7 +4,6 @@
obj-y := evtchn.o gnttab.o reboot.o machine_reboot.o fallback.o
-obj-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o
obj-$(CONFIG_PROC_FS) += xen_proc.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
--- head.orig/drivers/xen/core/evtchn.c 2013-01-30 11:51:17.000000000 +0100
+++ head/drivers/xen/core/evtchn.c 2014-06-30 10:29:57.000000000 +0200
@@ -33,6 +33,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/irq.h>
+#include <linux/irqdesc.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
@@ -88,14 +89,17 @@ static struct irq_cfg _irq_cfg[] = {
static inline struct irq_cfg *__pure irq_cfg(unsigned int irq)
{
#ifdef CONFIG_SPARSE_IRQ
- struct irq_desc *desc = irq_to_desc(irq);
-
- return desc ? desc->chip_data : NULL;
+ return get_irq_chip_data(irq);
#else
return irq < NR_IRQS ? _irq_cfg + irq : NULL;
#endif
}
+static inline struct irq_cfg *__pure irq_data_cfg(struct irq_data *data)
+{
+ return irq_data_get_irq_chip_data(data);
+}
+
/* Constructor for packed IRQ information. */
static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
{
@@ -115,26 +119,47 @@ static inline u32 mk_irq_info(u32 type,
* Accessors for packed IRQ information.
*/
+static inline unsigned int evtchn_from_irq_cfg(const struct irq_cfg *cfg)
+{
+ return cfg->info & ((1U << _EVTCHN_BITS) - 1);
+}
+
+static inline unsigned int evtchn_from_irq_data(struct irq_data *data)
+{
+ const struct irq_cfg *cfg = irq_data_cfg(data);
+
+ return cfg ? evtchn_from_irq_cfg(cfg) : 0;
+}
+
static inline unsigned int evtchn_from_irq(int irq)
{
- const struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_data *data = irq_get_irq_data(irq);
- return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0;
+ return data ? evtchn_from_irq_data(data) : 0;
+}
+
+static inline unsigned int index_from_irq_cfg(const struct irq_cfg *cfg)
+{
+ return (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
}
static inline unsigned int index_from_irq(int irq)
{
const struct irq_cfg *cfg = irq_cfg(irq);
- return cfg ? (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1)
- : 0;
+ return cfg ? index_from_irq_cfg(cfg) : 0;
+}
+
+static inline unsigned int type_from_irq_cfg(const struct irq_cfg *cfg)
+{
+ return cfg->info >> (32 - _IRQT_BITS);
}
static inline unsigned int type_from_irq(int irq)
{
const struct irq_cfg *cfg = irq_cfg(irq);
- return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND;
+ return cfg ? type_from_irq_cfg(cfg) : IRQT_UNBOUND;
}
unsigned int irq_from_evtchn(unsigned int port)
@@ -171,16 +196,17 @@ static inline unsigned long active_evtch
~sh->evtchn_mask[idx]);
}
-static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu, int irq,
+static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu,
+ struct irq_data *data,
const struct cpumask *cpumask)
{
shared_info_t *s = HYPERVISOR_shared_info;
BUG_ON(!test_bit(chn, s->evtchn_mask));
- if (irq >= 0) {
+ if (data) {
BUG_ON(!cpumask_test_cpu(cpu, cpumask));
- cpumask_copy(irq_to_desc(irq)->affinity, cpumask);
+ cpumask_copy(data->affinity, cpumask);
}
clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_evtchn[chn]));
@@ -190,7 +216,11 @@ static void _bind_evtchn_to_cpu(unsigned
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
{
- _bind_evtchn_to_cpu(chn, cpu, evtchn_to_irq[chn], cpumask_of(cpu));
+ int irq = evtchn_to_irq[chn];
+
+ _bind_evtchn_to_cpu(chn, cpu,
+ irq != -1 ? irq_get_irq_data(irq) : NULL,
+ cpumask_of(cpu));
}
static void init_evtchn_cpu_bindings(void)
@@ -199,10 +229,10 @@ static void init_evtchn_cpu_bindings(voi
/* By default all event channels notify CPU#0. */
for (i = 0; i < nr_irqs; i++) {
- struct irq_desc *desc = irq_to_desc(i);
+ struct irq_data *data = irq_get_irq_data(i);
- if (desc)
- cpumask_copy(desc->affinity, cpumask_of(0));
+ if (data)
+ cpumask_copy(data->affinity, cpumask_of(0));
}
memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -225,7 +255,8 @@ static inline unsigned long active_evtch
return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
}
-static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu, int irq,
+static void _bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu,
+ struct irq_data *data,
const struct cpumask *cpumask)
{
}
@@ -394,32 +425,37 @@ asmlinkage void __irq_entry evtchn_do_up
set_irq_regs(old_regs);
}
-static int find_unbound_irq(unsigned int node, struct irq_chip *chip)
+/*
+ * On success returns with irq_mapping_update_lock held and
+ * cfg->bindcount set to 1.
+ */
+static int find_unbound_irq(unsigned int node, struct irq_cfg **pcfg,
+ struct irq_chip *chip)
{
static int warned;
int irq;
for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
- struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ struct irq_desc *desc = irq_to_desc(irq);
- desc = irq_to_desc(irq);
- if (!desc)
- desc = irq_to_desc_alloc_node(irq, node);
- else if (desc->chip != &no_irq_chip &&
- desc->chip != &dynirq_chip)
- continue;
- if (!desc)
+ if (unlikely(!cfg))
return -ENOMEM;
- cfg = desc->chip_data;
- if (cfg && !cfg->bindcount) {
+ spin_lock(&irq_mapping_update_lock);
+ if ((desc->irq_data.chip == &no_irq_chip ||
+ desc->irq_data.chip == chip) &&
+ !cfg->bindcount) {
+ cfg->bindcount = 1;
+ spin_unlock(&irq_mapping_update_lock);
+ *pcfg = cfg;
desc->status |= IRQ_NOPROBE;
set_irq_chip_and_handler_name(irq, chip,
handle_fasteoi_irq,
"fasteoi");
return irq;
}
+ spin_unlock(&irq_mapping_update_lock);
}
if (!warned) {
@@ -440,40 +476,45 @@ static int bind_caller_port_to_irq(unsig
spin_lock(&irq_mapping_update_lock);
if ((irq = evtchn_to_irq[caller_port]) == -1) {
- if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0)
- goto out;
-
- evtchn_to_irq[caller_port] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_CALLER_PORT,
- 0, caller_port);
- }
+ struct irq_cfg *cfg;
- irq_cfg(irq)->bindcount++;
+ spin_unlock(&irq_mapping_update_lock);
+ if ((irq = find_unbound_irq(numa_node_id(), &cfg,
+ &dynirq_chip)) < 0)
+ return irq;
+ spin_lock(&irq_mapping_update_lock);
+ if (evtchn_to_irq[caller_port] == -1) {
+ evtchn_to_irq[caller_port] = irq;
+ cfg->info = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
+ } else {
+ cfg->bindcount = 0;
+ irq = evtchn_to_irq[caller_port];
+ ++irq_cfg(irq)->bindcount;
+ }
+ } else
+ ++irq_cfg(irq)->bindcount;
- out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
static int bind_local_port_to_irq(unsigned int local_port)
{
+ struct irq_cfg *cfg;
int irq;
- spin_lock(&irq_mapping_update_lock);
-
- BUG_ON(evtchn_to_irq[local_port] != -1);
-
- if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0) {
+ if ((irq = find_unbound_irq(numa_node_id(), &cfg, &dynirq_chip)) < 0) {
if (close_evtchn(local_port))
BUG();
- goto out;
+ return irq;
}
+ spin_lock(&irq_mapping_update_lock);
+
+ BUG_ON(evtchn_to_irq[local_port] != -1);
evtchn_to_irq[local_port] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
- irq_cfg(irq)->bindcount++;
+ cfg->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
- out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
@@ -509,91 +550,107 @@ static int bind_interdomain_evtchn_to_ir
static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
- struct evtchn_bind_virq bind_virq;
- int evtchn, irq;
+ int irq;
spin_lock(&irq_mapping_update_lock);
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
- if ((irq = find_unbound_irq(cpu_to_node(cpu),
+ struct irq_cfg *cfg;
+
+ spin_unlock(&irq_mapping_update_lock);
+ if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
&dynirq_chip)) < 0)
- goto out;
+ return irq;
+ spin_lock(&irq_mapping_update_lock);
- bind_virq.virq = virq;
- bind_virq.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
- &bind_virq) != 0)
- BUG();
- evtchn = bind_virq.port;
+ if (per_cpu(virq_to_irq, cpu)[virq] == -1) {
+ struct evtchn_bind_virq bind_virq = {
+ .virq = virq,
+ .vcpu = cpu
+ };
- evtchn_to_irq[evtchn] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
- per_cpu(virq_to_irq, cpu)[virq] = irq;
+ evtchn_to_irq[bind_virq.port] = irq;
+ cfg->info = mk_irq_info(IRQT_VIRQ, virq, bind_virq.port);
- bind_evtchn_to_cpu(evtchn, cpu);
- }
+ per_cpu(virq_to_irq, cpu)[virq] = irq;
- irq_cfg(irq)->bindcount++;
+ bind_evtchn_to_cpu(bind_virq.port, cpu);
+ } else {
+ cfg->bindcount = 0;
+ irq = per_cpu(virq_to_irq, cpu)[virq];
+ ++irq_cfg(irq)->bindcount;
+ }
+ } else
+ ++irq_cfg(irq)->bindcount;
- out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
- struct evtchn_bind_ipi bind_ipi;
- int evtchn, irq;
+ int irq;
spin_lock(&irq_mapping_update_lock);
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
- if ((irq = find_unbound_irq(cpu_to_node(cpu),
+ struct irq_cfg *cfg;
+
+ spin_unlock(&irq_mapping_update_lock);
+ if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
&dynirq_chip)) < 0)
- goto out;
+ return irq;
+ spin_lock(&irq_mapping_update_lock);
- bind_ipi.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
- &bind_ipi) != 0)
- BUG();
- evtchn = bind_ipi.port;
+ if (per_cpu(ipi_to_irq, cpu)[ipi] == -1) {
+ struct evtchn_bind_ipi bind_ipi = { .vcpu = cpu };
- evtchn_to_irq[evtchn] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi) != 0)
+ BUG();
- per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+ evtchn_to_irq[bind_ipi.port] = irq;
+ cfg->info = mk_irq_info(IRQT_IPI, ipi, bind_ipi.port);
- bind_evtchn_to_cpu(evtchn, cpu);
- }
+ per_cpu(ipi_to_irq, cpu)[ipi] = irq;
- irq_cfg(irq)->bindcount++;
+ bind_evtchn_to_cpu(bind_ipi.port, cpu);
+ } else {
+ cfg->bindcount = 0;
+ irq = per_cpu(ipi_to_irq, cpu)[ipi];
+ ++irq_cfg(irq)->bindcount;
+ }
+ } else
+ ++irq_cfg(irq)->bindcount;
- out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
static void unbind_from_irq(unsigned int irq)
{
- unsigned int cpu;
- int evtchn = evtchn_from_irq(irq);
+ struct irq_cfg *cfg = irq_cfg(irq);
+ unsigned int cpu, evtchn = evtchn_from_irq_cfg(cfg);
spin_lock(&irq_mapping_update_lock);
- if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) {
- if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
+ if (!--cfg->bindcount && VALID_EVTCHN(evtchn)) {
+ if ((type_from_irq_cfg(cfg) != IRQT_CALLER_PORT) &&
close_evtchn(evtchn))
BUG();
- switch (type_from_irq(irq)) {
+ switch (type_from_irq_cfg(cfg)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
- [index_from_irq(irq)] = -1;
+ [index_from_irq_cfg(cfg)] = -1;
break;
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
- [index_from_irq(irq)] = -1;
+ [index_from_irq_cfg(cfg)] = -1;
break;
default:
break;
@@ -603,7 +660,7 @@ static void unbind_from_irq(unsigned int
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
- irq_cfg(irq)->info = IRQ_UNBOUND;
+ cfg->info = IRQ_UNBOUND;
/* Zap stats across IRQ changes of use. */
for_each_possible_cpu(cpu)
@@ -744,9 +801,10 @@ void unbind_from_irqhandler(unsigned int
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
#ifdef CONFIG_SMP
-static int set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int set_affinity_irq(struct irq_data *data,
+ const struct cpumask *dest, bool force)
{
- unsigned int port = evtchn_from_irq(irq);
+ unsigned int port = evtchn_from_irq_data(data);
unsigned int cpu = cpumask_any(dest);
struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu };
bool masked;
@@ -758,7 +816,7 @@ static int set_affinity_irq(unsigned int
masked = test_and_set_evtchn_mask(port);
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv);
if (rc == 0)
- _bind_evtchn_to_cpu(port, cpu, irq, dest);
+ _bind_evtchn_to_cpu(port, cpu, data, dest);
if (!masked)
unmask_evtchn(port);
@@ -766,9 +824,10 @@ static int set_affinity_irq(unsigned int
}
#endif
-int resend_irq_on_evtchn(unsigned int irq)
+int resend_irq_on_evtchn(struct irq_data *data)
{
- int masked, evtchn = evtchn_from_irq(irq);
+ unsigned int evtchn = evtchn_from_irq_data(data);
+ bool masked;
if (!VALID_EVTCHN(evtchn))
return 1;
@@ -785,52 +844,51 @@ int resend_irq_on_evtchn(unsigned int ir
* Interface to generic handling in irq.c
*/
-static void unmask_dynirq(unsigned int irq)
+static void unmask_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ unsigned int evtchn = evtchn_from_irq_data(data);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
-static void mask_dynirq(unsigned int irq)
+static void mask_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ unsigned int evtchn = evtchn_from_irq_data(data);
if (VALID_EVTCHN(evtchn))
mask_evtchn(evtchn);
}
-static unsigned int startup_dynirq(unsigned int irq)
+static unsigned int startup_dynirq(struct irq_data *data)
{
- unmask_dynirq(irq);
+ unmask_dynirq(data);
return 0;
}
#define shutdown_dynirq mask_dynirq
-static void end_dynirq(unsigned int irq)
+static void end_dynirq(struct irq_data *data)
{
- if (!(irq_to_desc(irq)->status & IRQ_DISABLED)) {
- move_masked_irq(irq);
- unmask_dynirq(irq);
+ if (!(irq_to_desc(data->irq)->status & IRQ_DISABLED)) {
+ move_masked_irq(data->irq);
+ unmask_dynirq(data);
}
}
static struct irq_chip dynirq_chip = {
- .name = "Dynamic",
- .startup = startup_dynirq,
- .shutdown = shutdown_dynirq,
- .enable = unmask_dynirq,
- .disable = mask_dynirq,
- .mask = mask_dynirq,
- .unmask = unmask_dynirq,
- .end = end_dynirq,
- .eoi = end_dynirq,
+ .name = "Dynamic",
+ .irq_startup = startup_dynirq,
+ .irq_shutdown = shutdown_dynirq,
+ .irq_enable = unmask_dynirq,
+ .irq_disable = mask_dynirq,
+ .irq_mask = mask_dynirq,
+ .irq_unmask = unmask_dynirq,
+ .irq_eoi = end_dynirq,
#ifdef CONFIG_SMP
- .set_affinity = set_affinity_irq,
+ .irq_set_affinity = set_affinity_irq,
#endif
- .retrigger = resend_irq_on_evtchn,
+ .irq_retrigger = resend_irq_on_evtchn,
};
/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
@@ -883,19 +941,20 @@ static inline void pirq_query_unmask(int
set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
}
-static int set_type_pirq(unsigned int irq, unsigned int type)
+static int set_type_pirq(struct irq_data *data, unsigned int type)
{
if (type != IRQ_TYPE_PROBE)
return -EINVAL;
- set_bit(irq - PIRQ_BASE, probing_pirq);
+ set_bit(data->irq - PIRQ_BASE, probing_pirq);
return 0;
}
-static void enable_pirq(unsigned int irq)
+static void enable_pirq(struct irq_data *data)
{
struct evtchn_bind_pirq bind_pirq;
- int evtchn = evtchn_from_irq(irq);
- unsigned int pirq = irq - PIRQ_BASE;
+ struct irq_cfg *cfg = irq_data_cfg(data);
+ unsigned int evtchn = evtchn_from_irq_cfg(cfg);
+ unsigned int irq = data->irq, pirq = irq - PIRQ_BASE;
if (VALID_EVTCHN(evtchn)) {
if (pirq < nr_pirqs)
@@ -919,8 +978,8 @@ static void enable_pirq(unsigned int irq
pirq_query_unmask(irq);
evtchn_to_irq[evtchn] = irq;
- _bind_evtchn_to_cpu(evtchn, 0, -1, NULL);
- irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
+ _bind_evtchn_to_cpu(evtchn, 0, NULL, NULL);
+ cfg->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
out:
pirq_unmask_and_notify(evtchn, irq);
@@ -928,15 +987,16 @@ static void enable_pirq(unsigned int irq
#define disable_pirq mask_pirq
-static unsigned int startup_pirq(unsigned int irq)
+static unsigned int startup_pirq(struct irq_data *data)
{
- enable_pirq(irq);
+ enable_pirq(data);
return 0;
}
-static void shutdown_pirq(unsigned int irq)
+static void shutdown_pirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ struct irq_cfg *cfg = irq_data_cfg(data);
+ unsigned int evtchn = evtchn_from_irq_cfg(cfg);
if (!VALID_EVTCHN(evtchn))
return;
@@ -948,48 +1008,47 @@ static void shutdown_pirq(unsigned int i
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
- irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
+ cfg->info = mk_irq_info(IRQT_PIRQ, index_from_irq_cfg(cfg), 0);
}
-static void unmask_pirq(unsigned int irq)
+static void unmask_pirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ unsigned int evtchn = evtchn_from_irq_data(data);
if (VALID_EVTCHN(evtchn))
- pirq_unmask_and_notify(evtchn, irq);
+ pirq_unmask_and_notify(evtchn, data->irq);
}
#define mask_pirq mask_dynirq
-static void end_pirq(unsigned int irq)
+static void end_pirq(struct irq_data *data)
{
- const struct irq_desc *desc = irq_to_desc(irq);
+ const struct irq_desc *desc = irq_to_desc(data->irq);
if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
(IRQ_DISABLED|IRQ_PENDING))
- shutdown_pirq(irq);
+ shutdown_pirq(data);
else {
if (!(desc->status & IRQ_DISABLED))
- move_masked_irq(irq);
- unmask_pirq(irq);
+ move_masked_irq(data->irq);
+ unmask_pirq(data);
}
}
static struct irq_chip pirq_chip = {
- .name = "Phys",
- .startup = startup_pirq,
- .shutdown = shutdown_pirq,
- .enable = enable_pirq,
- .disable = disable_pirq,
- .mask = mask_pirq,
- .unmask = unmask_pirq,
- .end = end_pirq,
- .eoi = end_pirq,
- .set_type = set_type_pirq,
+ .name = "Phys",
+ .irq_startup = startup_pirq,
+ .irq_shutdown = shutdown_pirq,
+ .irq_enable = enable_pirq,
+ .irq_disable = disable_pirq,
+ .irq_mask = mask_pirq,
+ .irq_unmask = unmask_pirq,
+ .irq_eoi = end_pirq,
+ .irq_set_type = set_type_pirq,
#ifdef CONFIG_SMP
- .set_affinity = set_affinity_irq,
+ .irq_set_affinity = set_affinity_irq,
#endif
- .retrigger = resend_irq_on_evtchn,
+ .irq_retrigger = resend_irq_on_evtchn,
};
int irq_ignore_unhandled(unsigned int irq)
@@ -1092,7 +1151,7 @@ static void restore_cpu_virqs(unsigned i
/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
- _bind_evtchn_to_cpu(evtchn, cpu, -1, NULL);
+ _bind_evtchn_to_cpu(evtchn, cpu, NULL, NULL);
/* Ready for use. */
unmask_evtchn(evtchn);
@@ -1120,7 +1179,7 @@ static void restore_cpu_ipis(unsigned in
/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
- _bind_evtchn_to_cpu(evtchn, cpu, -1, NULL);
+ _bind_evtchn_to_cpu(evtchn, cpu, NULL, NULL);
/* Ready for use. */
if (!(irq_to_desc(irq)->status & IRQ_DISABLED))
@@ -1180,29 +1239,41 @@ int __init arch_early_irq_init(void)
unsigned int i;
for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++)
- irq_to_desc(i)->chip_data = _irq_cfg + i;
+ set_irq_chip_data(i, _irq_cfg + i);
return 0;
}
-#ifdef CONFIG_SPARSE_IRQ
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
{
- if (!desc->chip_data) {
+ int res = irq_alloc_desc_at(at, node);
+ struct irq_cfg *cfg = NULL;
+
+ if (res < 0) {
+ if (res != -EEXIST)
+ return NULL;
+ cfg = get_irq_chip_data(at);
+ if (cfg)
+ return cfg;
+ }
+
+#ifdef CONFIG_SPARSE_IRQ
#ifdef CONFIG_SMP
- /* By default all event channels notify CPU#0. */
- cpumask_copy(desc->affinity, cpumask_of(0));
+ /* By default all event channels notify CPU#0. */
+ cpumask_copy(irq_get_irq_data(at)->affinity, cpumask_of(0));
#endif
- desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC);
- }
- if (!desc->chip_data) {
- pr_emerg("cannot alloc irq_cfg\n");
- BUG();
- }
- return 0;
-}
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (cfg)
+ set_irq_chip_data(at, cfg);
+ else
+ irq_free_desc(at);
+
+ return cfg;
+#else
+ return irq_cfg(at);
#endif
+}
#ifdef CONFIG_SPARSE_IRQ
#ifdef CONFIG_X86_IO_APIC
@@ -1239,7 +1310,7 @@ int __init arch_probe_nr_irqs(void)
printk(KERN_DEBUG "nr_pirqs: %d\n", nr_pirqs);
- return 0;
+ return ARRAY_SIZE(_irq_cfg);
}
#endif
@@ -1271,10 +1342,12 @@ int assign_irq_vector(int irq, struct ir
void evtchn_register_pirq(int irq)
{
+ struct irq_cfg *cfg = irq_cfg(irq);
+
BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs);
- if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
+ if (identity_mapped_irq(irq) || type_from_irq_cfg(cfg) != IRQT_UNBOUND)
return;
- irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0);
+ cfg->info = mk_irq_info(IRQT_PIRQ, irq, 0);
set_irq_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
"fasteoi");
}
@@ -1283,21 +1356,15 @@ int evtchn_map_pirq(int irq, int xen_pir
{
if (irq < 0) {
#ifdef CONFIG_SPARSE_IRQ
- spin_lock(&irq_mapping_update_lock);
- irq = find_unbound_irq(numa_node_id(), &pirq_chip);
- if (irq >= 0) {
- struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg;
- desc = irq_to_desc_alloc_node(irq, numa_node_id());
- cfg = desc->chip_data;
- BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
- cfg->bindcount++;
- cfg->info = mk_irq_info(IRQT_PIRQ, xen_pirq, 0);
- }
- spin_unlock(&irq_mapping_update_lock);
+ irq = find_unbound_irq(numa_node_id(), &cfg, &pirq_chip);
if (irq < 0)
return irq;
+ spin_lock(&irq_mapping_update_lock);
+ BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
+ cfg->info = mk_irq_info(IRQT_PIRQ, xen_pirq, 0);
+ spin_unlock(&irq_mapping_update_lock);
} else if (irq >= PIRQ_BASE && irq < PIRQ_BASE + nr_pirqs) {
WARN_ONCE(1, "Non-MSI IRQ#%d (Xen %d)\n", irq, xen_pirq);
return -EINVAL;
@@ -1307,15 +1374,17 @@ int evtchn_map_pirq(int irq, int xen_pir
irq = PIRQ_BASE + nr_pirqs - 1;
spin_lock(&irq_alloc_lock);
do {
- struct irq_desc *desc;
struct irq_cfg *cfg;
if (identity_mapped_irq(irq))
continue;
- desc = irq_to_desc_alloc_node(irq, numa_node_id());
- cfg = desc->chip_data;
- if (!index_from_irq(irq)) {
- BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
+ cfg = alloc_irq_and_cfg_at(irq, numa_node_id());
+ if (unlikely(!cfg)) {
+ spin_unlock(&irq_alloc_lock);
+ return -ENOMEM;
+ }
+ if (!index_from_irq_cfg(cfg)) {
+ BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
cfg->info = mk_irq_info(IRQT_PIRQ,
xen_pirq, 0);
break;
@@ -1328,18 +1397,14 @@ int evtchn_map_pirq(int irq, int xen_pir
handle_fasteoi_irq, "fasteoi");
#endif
} else if (!xen_pirq) {
- if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
+ struct irq_cfg *cfg = irq_cfg(irq);
+
+ if (!cfg || unlikely(type_from_irq_cfg(cfg) != IRQT_PIRQ))
return -EINVAL;
- /*
- * dynamic_irq_cleanup(irq) would seem to be the correct thing
- * here, but cannot be used as we get here also during shutdown
- * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
- * then causes a warning in dynamic_irq_cleanup().
- */
set_irq_chip_and_handler(irq, NULL, NULL);
- irq_cfg(irq)->info = IRQ_UNBOUND;
+ cfg->info = IRQ_UNBOUND;
#ifdef CONFIG_SPARSE_IRQ
- irq_cfg(irq)->bindcount--;
+ cfg->bindcount--;
#endif
return 0;
} else if (type_from_irq(irq) != IRQT_PIRQ
@@ -1354,10 +1419,12 @@ int evtchn_map_pirq(int irq, int xen_pir
int evtchn_get_xen_pirq(int irq)
{
+ struct irq_cfg *cfg = irq_cfg(irq);
+
if (identity_mapped_irq(irq))
return irq;
- BUG_ON(type_from_irq(irq) != IRQT_PIRQ);
- return index_from_irq(irq);
+ BUG_ON(type_from_irq_cfg(cfg) != IRQT_PIRQ);
+ return index_from_irq_cfg(cfg);
}
void __init xen_init_IRQ(void)
--- head.orig/drivers/xen/core/smpboot.c 2012-03-22 16:21:46.000000000 +0100
+++ head/drivers/xen/core/smpboot.c 2012-03-22 16:22:20.000000000 +0100
@@ -32,7 +32,7 @@ extern void smp_trap_init(trap_info_t *)
cpumask_var_t vcpu_initialized_mask;
-DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info);
+DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
static DEFINE_PER_CPU(int, resched_irq);
@@ -44,6 +44,11 @@ static char callfunc_name[NR_CPUS][15];
static char call1func_name[NR_CPUS][15];
static char reboot_name[NR_CPUS][15];
+#ifdef CONFIG_IRQ_WORK
+static DEFINE_PER_CPU(int, irq_work_irq);
+static char irq_work_name[NR_CPUS][15];
+#endif
+
void __init prefill_possible_map(void)
{
int i, rc;
@@ -74,6 +79,9 @@ static int __cpuinit xen_smp_intr_init(u
int rc;
per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
+#ifdef CONFIG_IRQ_WORK
+ per_cpu(irq_work_irq, cpu) =
+#endif
per_cpu(call1func_irq, cpu) = per_cpu(reboot_irq, cpu) = -1;
sprintf(resched_name[cpu], "resched%u", cpu);
@@ -120,6 +128,19 @@ static int __cpuinit xen_smp_intr_init(u
goto fail;
per_cpu(reboot_irq, cpu) = rc;
+#ifdef CONFIG_IRQ_WORK
+ sprintf(irq_work_name[cpu], "irqwork%u", cpu);
+ rc = bind_ipi_to_irqhandler(IRQ_WORK_VECTOR,
+ cpu,
+ smp_irq_work_interrupt,
+ IRQF_DISABLED|IRQF_NOBALANCING,
+ irq_work_name[cpu],
+ NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(irq_work_irq, cpu) = rc;
+#endif
+
rc = xen_spinlock_init(cpu);
if (rc < 0)
goto fail;
@@ -138,6 +159,10 @@ static int __cpuinit xen_smp_intr_init(u
unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
if (per_cpu(reboot_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
+#ifdef CONFIG_IRQ_WORK
+ if (per_cpu(irq_work_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(irq_work_irq, cpu), NULL);
+#endif
xen_spinlock_cleanup(cpu);
return rc;
}
@@ -151,6 +176,9 @@ static void __cpuinit xen_smp_intr_exit(
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
+#ifdef CONFIG_IRQ_WORK
+ unbind_from_irqhandler(per_cpu(irq_work_irq, cpu), NULL);
+#endif
xen_spinlock_cleanup(cpu);
}
--- head.orig/drivers/xen/core/spinlock.c 2014-01-07 17:14:50.000000000 +0100
+++ head/drivers/xen/core/spinlock.c 2014-01-07 17:15:16.000000000 +0100
@@ -24,7 +24,7 @@ struct spinning {
struct spinning *prev;
};
static DEFINE_PER_CPU(struct spinning *, _spinning);
-static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn);
+static DEFINE_PER_CPU_READ_MOSTLY(evtchn_port_t, poll_evtchn);
/*
* Protect removal of objects: Addition can be done lockless, and even
* removal itself doesn't need protection - what needs to be prevented is
@@ -227,7 +227,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
unsigned int flags)
{
typeof(vcpu_info(0)->evtchn_upcall_mask) upcall_mask
- = __raw_local_save_flags();
+ = arch_local_save_flags();
struct spinning spinning;
/* If kicker interrupt not initialized yet, just spin. */
@@ -251,7 +251,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
break;
}
}
- raw_local_irq_disable();
+ arch_local_irq_disable();
#endif
smp_wmb();
percpu_write(_spinning, &spinning);
@@ -278,7 +278,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
if (upcall_mask > flags) {
spinning.irq_count = percpu_read(_irq_count);
smp_wmb();
- raw_local_irq_restore(flags);
+ arch_local_irq_restore(flags);
}
#endif
@@ -287,7 +287,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
BUG();
#if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
- raw_local_irq_disable();
+ arch_local_irq_disable();
smp_wmb();
spinning.irq_count = UINT_MAX;
#endif
@@ -306,9 +306,9 @@ bool xen_spin_wait(arch_spinlock_t *lock
/* announce we're done */
percpu_write(_spinning, spinning.prev);
if (!CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING)
- raw_local_irq_disable();
+ arch_local_irq_disable();
sequence();
- raw_local_irq_restore(upcall_mask);
+ arch_local_irq_restore(upcall_mask);
smp_rmb();
if (lock->cur == spinning.ticket)
return true;
@@ -352,7 +352,7 @@ void xen_spin_kick(const arch_spinlock_t
atomic_t *rm_ctr = NULL;
struct spinning *spinning;
- flags = __raw_local_irq_save();
+ flags = arch_local_irq_save();
if (cpu == local)
spinning = percpu_read(_spinning);
else for (;;) {
@@ -387,7 +387,7 @@ void xen_spin_kick(const arch_spinlock_t
if (rm_ctr)
atomic_dec(rm_ctr);
- raw_local_irq_restore(flags);
+ arch_local_irq_restore(flags);
if (unlikely(spinning)) {
#if CONFIG_XEN_SPINLOCK_ACQUIRE_NESTING
--- head.orig/drivers/xen/evtchn.c 2013-09-26 13:00:02.000000000 +0200
+++ head/drivers/xen/evtchn.c 2013-09-26 13:00:15.000000000 +0200
@@ -563,7 +563,11 @@ static const struct file_operations evtc
static struct miscdevice evtchn_miscdev = {
.minor = MISC_DYNAMIC_MINOR,
+#ifdef CONFIG_PARAVIRT_XEN
.name = "xen/evtchn",
+#else
+ .name = "evtchn",
+#endif
.nodename = "xen/evtchn",
.fops = &evtchn_fops,
};
--- head.orig/drivers/xen/fbfront/xenfb.c 2011-02-01 15:03:03.000000000 +0100
+++ head/drivers/xen/fbfront/xenfb.c 2011-04-13 14:12:22.000000000 +0200
@@ -611,10 +611,9 @@ static int __devinit xenfb_probe(struct
info->refresh.data = (unsigned long)info;
INIT_LIST_HEAD(&info->mappings);
- info->fb = vmalloc(fb_size);
+ info->fb = vzalloc(fb_size);
if (info->fb == NULL)
goto error_nomem;
- memset(info->fb, 0, fb_size);
info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
--- head.orig/drivers/xen/gntdev/gntdev.c 2012-05-23 13:35:46.000000000 +0200
+++ head/drivers/xen/gntdev/gntdev.c 2012-05-23 13:38:40.000000000 +0200
@@ -140,6 +140,7 @@ static long gntdev_ioctl(struct file *fl
static const struct file_operations gntdev_fops = {
.owner = THIS_MODULE,
.open = gntdev_open,
+ .llseek = no_llseek,
.release = gntdev_release,
.mmap = gntdev_mmap,
.unlocked_ioctl = gntdev_ioctl
@@ -394,6 +395,8 @@ static int gntdev_open(struct inode *ino
{
gntdev_file_private_data_t *private_data;
+ nonseekable_open(inode, flip);
+
/* Allocate space for the per-instance private data. */
private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
if (!private_data)
--- head.orig/drivers/xen/pci.c 2014-03-31 05:40:15.000000000 +0200
+++ head/drivers/xen/pci.c 2014-02-18 17:29:14.000000000 +0100
@@ -23,14 +23,23 @@
#include <xen/interface/physdev.h>
#include <xen/interface/xen.h>
+#ifdef CONFIG_PARAVIRT_XEN
+#define CONFIG_XEN_COMPAT 0x040000
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#else
+#include <asm/hypervisor.h>
+#endif
#include "../pci/pci.h"
#ifdef CONFIG_PCI_MMCONFIG
#include <asm/pci_x86.h>
#endif
+#if CONFIG_XEN_COMPAT < 0x040200
static bool __read_mostly pci_seg_supported = true;
+#else
+#define pci_seg_supported true
+#endif
static int xen_add_device(struct device *dev)
{
@@ -89,7 +98,9 @@ static int xen_add_device(struct device
r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
if (r != -ENOSYS)
return r;
+#if CONFIG_XEN_COMPAT < 0x040200
pci_seg_supported = false;
+#endif
}
if (pci_domain_nr(pci_dev->bus))
--- head.orig/drivers/xen/privcmd/privcmd.c 2014-01-30 10:16:46.000000000 +0100
+++ head/drivers/xen/privcmd/privcmd.c 2014-01-30 10:18:11.000000000 +0100
@@ -449,7 +449,8 @@ static int privcmd_mmap(struct file * fi
if (xen_feature(XENFEAT_auto_translated_physmap))
return -ENOSYS;
- /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+ /* DONTCOPY is essential for Xen because copy_page_range doesn't know
+ * how to recreate these mappings */
vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
vma->vm_ops = &privcmd_vm_ops;
vma->vm_private_data = NULL;
@@ -459,6 +460,8 @@ static int privcmd_mmap(struct file * fi
#endif
static const struct file_operations privcmd_file_ops = {
+ .open = nonseekable_open,
+ .llseek = no_llseek,
.unlocked_ioctl = privcmd_ioctl,
.mmap = privcmd_mmap,
};
--- head.orig/drivers/xen/scsifront/scsifront.c 2012-12-06 16:09:58.000000000 +0100
+++ head/drivers/xen/scsifront/scsifront.c 2012-12-06 16:13:46.000000000 +0100
@@ -324,11 +324,12 @@ static int map_data_for_request(struct v
return ref_cnt;
}
-static int scsifront_queuecommand(struct scsi_cmnd *sc,
- void (*done)(struct scsi_cmnd *))
+static int scsifront_queuecommand(struct Scsi_Host *shost,
+ struct scsi_cmnd *sc)
{
- struct vscsifrnt_info *info = shost_priv(sc->device->host);
+ struct vscsifrnt_info *info = shost_priv(shost);
vscsiif_request_t *ring_req;
+ unsigned long flags;
int ref_cnt;
uint16_t rqid;
@@ -339,11 +340,13 @@ static int scsifront_queuecommand(struct
sc->cmnd[2], sc->cmnd[3], sc->cmnd[4], sc->cmnd[5],
sc->cmnd[6], sc->cmnd[7], sc->cmnd[8], sc->cmnd[9]);
*/
+ spin_lock_irqsave(shost->host_lock, flags);
+ scsi_cmd_get_serial(shost, sc);
if (RING_FULL(&info->ring)) {
- goto out_host_busy;
+ spin_unlock_irqrestore(shost->host_lock, flags);
+ return SCSI_MLQUEUE_HOST_BUSY;
}
- sc->scsi_done = done;
sc->result = 0;
ring_req = scsifront_pre_request(info);
@@ -371,27 +374,21 @@ static int scsifront_queuecommand(struct
ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
if (ref_cnt < 0) {
add_id_to_freelist(info, rqid);
+ spin_unlock_irqrestore(shost->host_lock, flags);
if (ref_cnt == (-ENOMEM))
- goto out_host_busy;
- else {
- sc->result = (DID_ERROR << 16);
- goto out_fail_command;
- }
+ return SCSI_MLQUEUE_HOST_BUSY;
+ sc->result = (DID_ERROR << 16);
+ sc->scsi_done(sc);
+ return 0;
}
ring_req->nr_segments = (uint8_t)ref_cnt;
info->shadow[rqid].nr_segments = ref_cnt;
scsifront_do_request(info);
+ spin_unlock_irqrestore(shost->host_lock, flags);
return 0;
-
-out_host_busy:
- return SCSI_MLQUEUE_HOST_BUSY;
-
-out_fail_command:
- done(sc);
- return 0;
}
--- head.orig/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2008-02-20 09:32:49.000000000 +0100
+++ head/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2011-02-01 15:09:47.000000000 +0100
@@ -54,7 +54,6 @@
#include <linux/in6.h>
#include <linux/spinlock.h>
#include <linux/highmem.h>
-#include <linux/smp_lock.h>
#include <linux/ctype.h>
#include <linux/uio.h>
#include <asm/current.h>
--- head.orig/drivers/xen/xenbus/xenbus_dev.c 2011-02-01 15:03:03.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_dev.c 2012-02-16 13:43:48.000000000 +0100
@@ -488,6 +488,7 @@ static const struct file_operations xenb
.write = xenbus_dev_write,
.open = xenbus_dev_open,
.release = xenbus_dev_release,
+ .llseek = no_llseek,
.poll = xenbus_dev_poll,
#ifdef CONFIG_XEN_PRIVILEGED_GUEST
.unlocked_ioctl = xenbus_dev_ioctl
--- head.orig/drivers/xen/xenbus/xenbus_probe.c 2012-03-12 13:52:22.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.c 2012-03-12 13:52:29.000000000 +0100
@@ -58,6 +58,8 @@
#include <xen/evtchn.h>
#include <xen/features.h>
#include <xen/gnttab.h>
+
+#define PARAVIRT_EXPORT_SYMBOL(sym) __typeof__(sym) sym
#else
#include <asm/xen/hypervisor.h>
@@ -67,6 +69,8 @@
#include <xen/page.h>
#include <xen/platform_pci.h>
+
+#define PARAVIRT_EXPORT_SYMBOL EXPORT_SYMBOL_GPL
#endif
#ifndef CONFIG_XEN
@@ -81,11 +85,10 @@
#endif
int xen_store_evtchn;
-#if !defined(CONFIG_XEN) && !defined(MODULE)
-EXPORT_SYMBOL(xen_store_evtchn);
-#endif
+PARAVIRT_EXPORT_SYMBOL(xen_store_evtchn);
struct xenstore_domain_interface *xen_store_interface;
+PARAVIRT_EXPORT_SYMBOL(xen_store_interface);
static unsigned long xen_store_mfn;
@@ -1221,9 +1224,7 @@ int __devinit
xenbus_init(void)
{
int err = 0;
-#if defined(CONFIG_XEN) || defined(MODULE)
unsigned long page = 0;
-#endif
DPRINTK("");
@@ -1241,7 +1242,6 @@ xenbus_init(void)
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/
if (is_initial_xendomain()) {
-#if defined(CONFIG_XEN) || defined(MODULE)
struct evtchn_alloc_unbound alloc_unbound;
/* Allocate Xenstore page */
@@ -1280,9 +1280,6 @@ xenbus_init(void)
if (xsd_port_intf)
xsd_port_intf->read_proc = xsd_port_read;
#endif
-#else
- /* dom0 not yet supported */
-#endif
xen_store_interface = mfn_to_virt(xen_store_mfn);
} else {
#if !defined(CONFIG_XEN) && !defined(MODULE)
@@ -1368,10 +1365,8 @@ xenbus_init(void)
* registered.
*/
-#if defined(CONFIG_XEN) || defined(MODULE)
if (page != 0)
free_page(page);
-#endif
return err;
}
--- head.orig/include/uapi/xen/Kbuild 2014-02-18 17:22:08.000000000 +0100
+++ head/include/uapi/xen/Kbuild 2014-02-18 17:29:23.000000000 +0100
@@ -1,5 +1,4 @@
# UAPI Header export list
header-y += gntalloc.h
header-y += gntdev.h
-header-y += privcmd.h
header-y += public/
--- head.orig/include/xen/evtchn.h 2012-10-23 15:29:34.000000000 +0200
+++ head/include/xen/evtchn.h 2012-10-23 15:45:43.000000000 +0200
@@ -55,6 +55,7 @@ struct irq_cfg {
#endif
};
};
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
#endif
/*
--- head.orig/include/xen/interface/memory.h 2014-01-30 10:17:58.000000000 +0100
+++ head/include/xen/interface/memory.h 2014-01-30 10:18:07.000000000 +0100
@@ -206,6 +206,7 @@ struct xen_machphys_mapping {
xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */
xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */
};
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping);
typedef struct xen_machphys_mapping xen_machphys_mapping_t;
DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
@@ -318,6 +319,7 @@ struct xen_memory_map {
*/
XEN_GUEST_HANDLE(void) buffer;
};
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
typedef struct xen_memory_map xen_memory_map_t;
DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
--- head.orig/include/uapi/xen/privcmd.h 2012-12-11 04:30:57.000000000 +0100
+++ head/include/uapi/xen/privcmd.h 2012-10-23 15:44:46.000000000 +0200
@@ -1,98 +1,3 @@
-/******************************************************************************
- * privcmd.h
- *
- * Interface to /proc/xen/privcmd.
- *
- * Copyright (c) 2003-2005, K A Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __LINUX_PUBLIC_PRIVCMD_H__
-#define __LINUX_PUBLIC_PRIVCMD_H__
-
-#include <linux/types.h>
-#include <linux/compiler.h>
-#include <xen/interface/xen.h>
-
-struct privcmd_hypercall {
- __u64 op;
- __u64 arg[5];
-};
-
-struct privcmd_mmap_entry {
- __u64 va;
- __u64 mfn;
- __u64 npages;
-};
-
-struct privcmd_mmap {
- int num;
- domid_t dom; /* target domain */
- struct privcmd_mmap_entry __user *entry;
-};
-
-struct privcmd_mmapbatch {
- int num; /* number of pages to populate */
- domid_t dom; /* target domain */
- __u64 addr; /* virtual address */
- xen_pfn_t __user *arr; /* array of mfns - or'd with
- PRIVCMD_MMAPBATCH_*_ERROR on err */
-};
-
-#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
-#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
-
-struct privcmd_mmapbatch_v2 {
- unsigned int num; /* number of pages to populate */
- domid_t dom; /* target domain */
- __u64 addr; /* virtual address */
- const xen_pfn_t __user *arr; /* array of mfns */
- int __user *err; /* array of error codes */
-};
-
-/*
- * @cmd: IOCTL_PRIVCMD_HYPERCALL
- * @arg: &privcmd_hypercall_t
- * Return: Value returned from execution of the specified hypercall.
- *
- * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
- * @arg: &struct privcmd_mmapbatch_v2
- * Return: 0 on success (i.e., arg->err contains valid error codes for
- * each frame). On an error other than a failed frame remap, -1 is
- * returned and errno is set to EINVAL, EFAULT etc. As an exception,
- * if the operation was otherwise successful but any frame failed with
- * -ENOENT, then -1 is returned and errno is set to ENOENT.
- */
-#define IOCTL_PRIVCMD_HYPERCALL \
- _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
-#define IOCTL_PRIVCMD_MMAP \
- _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
-#define IOCTL_PRIVCMD_MMAPBATCH \
- _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
-#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
- _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
-
-#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
+#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__)
+#include "public/privcmd.h"
+#endif
--- head.orig/include/uapi/xen/public/privcmd.h 2011-02-01 14:38:38.000000000 +0100
+++ head/include/uapi/xen/public/privcmd.h 2011-02-01 15:09:47.000000000 +0100
@@ -34,6 +34,7 @@
#define __LINUX_PUBLIC_PRIVCMD_H__
#include <linux/types.h>
+#include <linux/compiler.h>
typedef struct privcmd_hypercall
{
--- head.orig/kernel/power/Kconfig 2014-06-26 11:21:16.000000000 +0200
+++ head/kernel/power/Kconfig 2013-12-02 17:57:40.000000000 +0100
@@ -165,7 +165,7 @@ config PM_ADVANCED_DEBUG
config PM_TEST_SUSPEND
bool "Test suspend/resume and wakealarm during bootup"
- depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
+ depends on SUSPEND && PM_DEBUG && RTC_CLASS=y && !XEN_UNPRIVILEGED_GUEST
---help---
This option will let you suspend your machine during bootup, and
make it wake up a few seconds later using an RTC wakeup alarm.
@@ -212,7 +212,7 @@ config PM_TRACE
config PM_TRACE_RTC
bool "Suspend/resume event tracing"
depends on PM_SLEEP_DEBUG
- depends on X86
+ depends on X86 && !XEN_UNPRIVILEGED_GUEST
select PM_TRACE
---help---
This enables some cheesy code to save the last PM event point in the
--- head.orig/lib/swiotlb-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head/lib/swiotlb-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -58,7 +58,7 @@ static unsigned long io_tlb_nslabs;
*/
static unsigned long io_tlb_overflow = 32*1024;
-void *io_tlb_overflow_buffer;
+static void *io_tlb_overflow_buffer;
/*
* This is a free list describing the number of free entries available from
@@ -174,16 +174,16 @@ void __init swiotlb_init_with_tbl(char *
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
*/
- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
+ io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
/*
* Get the overflow emergency buffer
*/
- io_tlb_overflow_buffer = alloc_bootmem(io_tlb_overflow);
+ io_tlb_overflow_buffer = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_overflow));
if (!io_tlb_overflow_buffer)
panic("Cannot allocate SWIOTLB overflow buffer!\n");
@@ -218,7 +218,7 @@ swiotlb_init_with_default_size(size_t de
/*
* Get IO TLB memory from the low pages
*/
- io_tlb_start = alloc_bootmem_pages(bytes);
+ io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
if (!io_tlb_start)
panic("Cannot allocate SWIOTLB buffer");
--- head.orig/mm/vmalloc.c 2013-12-02 17:38:28.000000000 +0100
+++ head/mm/vmalloc.c 2013-12-02 17:57:32.000000000 +0100
@@ -541,8 +541,6 @@ static void vmap_debug_free_range(unsign
#ifdef CONFIG_DEBUG_PAGEALLOC
vunmap_page_range(start, end);
flush_tlb_kernel_range(start, end);
-#elif defined(CONFIG_XEN) && defined(CONFIG_X86)
- vunmap_page_range(start, end);
#endif
}