Automatically created from "patch-2.6.39" by xen-port-patches.py
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.39
Patch-mainline: 2.6.39
This patch contains the differences between 2.6.38 and 2.6.39.
Acked-by: jbeulich@novell.com
--- head.orig/arch/x86/Kconfig 2012-04-10 17:07:07.000000000 +0200
+++ head/arch/x86/Kconfig 2012-04-10 17:12:02.000000000 +0200
@@ -1195,7 +1195,7 @@ config ARCH_PHYS_ADDR_T_64BIT
def_bool X86_64 || X86_PAE
config ARCH_DMA_ADDR_T_64BIT
- def_bool X86_64 || HIGHMEM64G
+ def_bool X86_64 || XEN || HIGHMEM64G
config DIRECT_GBPAGES
bool "Enable 1GB pages for kernel pagetables" if EXPERT
@@ -2052,7 +2052,7 @@ source "drivers/pci/Kconfig"
# x86_64 have no ISA slots, but can have ISA-style DMA.
config ISA_DMA_API
- bool "ISA-style DMA support" if (X86_64 && EXPERT)
+ bool "ISA-style DMA support" if ((X86_64 || XEN) && EXPERT) || XEN_UNPRIVILEGED_GUEST
default y
help
Enables ISA-style DMA support for devices requiring such controllers.
@@ -2122,7 +2122,7 @@ config SCx200HR_TIMER
config OLPC
bool "One Laptop Per Child support"
- depends on !X86_PAE
+ depends on !X86_PAE && !XEN
select GPIOLIB
select OF
select OF_PROMTREE
--- head.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 15:04:27.000000000 +0100
+++ head/arch/x86/ia32/ia32entry-xen.S 2011-04-12 16:00:27.000000000 +0200
@@ -20,6 +20,8 @@
#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_LE 0x40000000
+ .section .entry.text, "ax"
+
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
.macro IA32_ARG_FIXUP noebp=0
@@ -122,8 +124,7 @@ ENTRY(ia32_sysenter_target)
CFI_REL_OFFSET rcx,0
movq 8(%rsp),%r11
CFI_RESTORE r11
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rcx
CFI_RESTORE rcx
movl %ebp,%ebp /* zero extension */
movl %eax,%eax
@@ -293,8 +294,7 @@ ENTRY(ia32_syscall)
CFI_REL_OFFSET rcx,0
movq 8(%rsp),%r11
CFI_RESTORE r11
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rcx
CFI_RESTORE rcx
movl %eax,%eax
movq %rax,(%rsp)
@@ -732,4 +732,8 @@ ia32_sys_call_table:
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64 /* 340 */
+ .quad sys_name_to_handle_at
+ .quad compat_sys_open_by_handle_at
+ .quad compat_sys_clock_adjtime
+ .quad sys_syncfs
ia32_syscall_end:
--- head.orig/arch/x86/include/asm/acpi.h 2011-08-23 14:04:36.000000000 +0200
+++ head/arch/x86/include/asm/acpi.h 2011-08-23 14:06:28.000000000 +0200
@@ -119,7 +119,11 @@ static inline void acpi_disable_pci(void
}
/* Low-level suspend routine. */
+#ifdef CONFIG_ACPI_PV_SLEEP
+#define acpi_suspend_lowlevel() acpi_enter_sleep_state(ACPI_STATE_S3)
+#else
extern int acpi_suspend_lowlevel(void);
+#endif
extern const unsigned char acpi_wakeup_code[];
#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
--- head.orig/arch/x86/include/asm/trampoline.h 2012-06-06 13:23:56.000000000 +0200
+++ head/arch/x86/include/asm/trampoline.h 2011-04-14 14:59:31.000000000 +0200
@@ -1,4 +1,4 @@
-#ifndef _ASM_X86_TRAMPOLINE_H
+#if !defined(_ASM_X86_TRAMPOLINE_H) && !defined(CONFIG_XEN)
#define _ASM_X86_TRAMPOLINE_H
#ifndef __ASSEMBLY__
--- head.orig/arch/x86/include/mach-xen/asm/io.h 2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/io.h 2011-04-15 11:26:41.000000000 +0200
@@ -352,6 +352,8 @@ extern void __iomem *early_ioremap(resou
unsigned long size);
extern void __iomem *early_memremap(resource_size_t phys_addr,
unsigned long size);
+extern void __iomem *early_memremap_ro(resource_size_t phys_addr,
+ unsigned long size);
extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void fixup_early_ioremap(void);
extern bool is_early_ioremap_ptep(pte_t *ptep);
--- head.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-03-23 10:10:03.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-04-12 15:59:10.000000000 +0200
@@ -63,8 +63,6 @@ static inline void xen_pmd_clear(pmd_t *
static inline void pud_clear(pud_t *pudp)
{
- pgdval_t pgd;
-
set_pud(pudp, __pud(0));
/*
@@ -73,13 +71,10 @@ static inline void pud_clear(pud_t *pudp
* section 8.1: in PAE mode we explicitly have to flush the
* TLB via cr3 if the top-level pgd is changed...
*
- * Make sure the pud entry we're updating is within the
- * current pgd to avoid unnecessary TLB flushes.
+ * Currently all places where pud_clear() is called either have
+ * flush_tlb_mm() followed or don't need TLB flush (x86_64 code or
+ * pud_clear_bad()), so we don't need TLB flush here.
*/
- pgd = read_cr3();
- if (__pa(pudp) >= pgd && __pa(pudp) <
- (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
- xen_tlb_flush();
}
#ifdef CONFIG_SMP
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-03-23 10:10:05.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-04-12 15:59:10.000000000 +0200
@@ -165,7 +165,6 @@ static inline int pgd_large(pgd_t pgd) {
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
extern int kern_addr_valid(unsigned long addr);
-extern void cleanup_highmap(void);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
--- head.orig/arch/x86/include/mach-xen/asm/pgtable_types.h 2011-02-01 15:41:35.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/pgtable_types.h 2011-05-23 11:27:06.000000000 +0200
@@ -358,6 +358,8 @@ int phys_mem_access_prot_allowed(struct
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);
+extern void xen_pagetable_reserve(u64 start, u64 end);
+
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
--- head.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:59.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/processor.h 2011-04-12 15:59:10.000000000 +0200
@@ -103,10 +103,6 @@ struct cpuinfo_x86 {
int x86_power;
unsigned long loops_per_jiffy;
#ifndef CONFIG_XEN
-#ifdef CONFIG_SMP
- /* cpus sharing the last level cache: */
- cpumask_var_t llc_shared_map;
-#endif
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
--- head.orig/arch/x86/include/mach-xen/asm/smp.h 2011-03-03 16:12:54.000000000 +0100
+++ head/arch/x86/include/mach-xen/asm/smp.h 2011-04-13 17:01:31.000000000 +0200
@@ -17,12 +17,24 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
+#include <asm/cpufeature.h>
extern unsigned int num_processors;
#ifndef CONFIG_XEN
+static inline bool cpu_has_ht_siblings(void)
+{
+ bool has_siblings = false;
+#ifdef CONFIG_SMP
+ has_siblings = cpu_has_ht && smp_num_siblings > 1;
+#endif
+ return has_siblings;
+}
+
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
#endif
@@ -38,8 +50,16 @@ static inline const struct cpumask *cpu_
}
#ifndef CONFIG_XEN
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
#endif
#ifdef CONFIG_SMP
--- head.orig/arch/x86/kernel/Makefile 2012-04-10 17:06:48.000000000 +0200
+++ head/arch/x86/kernel/Makefile 2012-04-10 17:11:59.000000000 +0200
@@ -117,5 +117,6 @@ ifeq ($(CONFIG_X86_64),y)
endif
disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8237.o i8253.o \
- i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o vsmp_64.o
+ i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o trampoline%.o \
+ tsc.o tsc_sync.o vsmp_64.o
disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- head.orig/arch/x86/kernel/acpi/Makefile 2011-01-31 18:07:35.000000000 +0100
+++ head/arch/x86/kernel/acpi/Makefile 2011-04-12 16:29:53.000000000 +0200
@@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w
$(obj)/realmode/wakeup.bin: FORCE
$(Q)$(MAKE) $(build)=$(obj)/realmode
-disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_%.o
+disabled-obj-$(CONFIG_XEN) := cstate.o sleep.o wakeup_%.o
--- head.orig/arch/x86/kernel/amd_nb.c 2011-04-13 13:47:56.000000000 +0200
+++ head/arch/x86/kernel/amd_nb.c 2012-02-08 12:53:26.000000000 +0100
@@ -154,6 +154,7 @@ struct resource *amd_get_mmconfig_range(
return res;
}
+#ifndef CONFIG_XEN
int amd_get_subcaches(int cpu)
{
struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
@@ -208,6 +209,7 @@ int amd_set_subcaches(int cpu, int mask)
return 0;
}
+#endif
static int amd_cache_gart(void)
{
--- head.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-17 10:30:00.000000000 +0100
+++ head/arch/x86/kernel/apic/io_apic-xen.c 2011-05-09 11:46:50.000000000 +0200
@@ -30,7 +30,7 @@
#include <linux/compiler.h>
#include <linux/acpi.h>
#include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/jiffies.h> /* time_after() */
@@ -120,7 +120,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BU
int skip_ioapic_setup;
-static void __init _arch_disable_smp_support(void)
+/**
+ * disable_ioapic_support() - disables ioapic support at runtime
+ */
+static void __init _disable_ioapic_support(void)
{
#ifdef CONFIG_PCI
noioapicquirk = 1;
@@ -132,11 +135,14 @@ static void __init _arch_disable_smp_sup
static int __init parse_noapic(char *str)
{
/* disable IO-APIC */
- _arch_disable_smp_support();
+ _disable_ioapic_support();
return 0;
}
early_param("noapic", parse_noapic);
+static int io_apic_setup_irq_pin(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr);
+
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
{
@@ -194,7 +200,7 @@ int __init arch_early_irq_init(void)
irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
for (i = 0; i < count; i++) {
- set_irq_chip_data(i, &cfg[i]);
+ irq_set_chip_data(i, &cfg[i]);
zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
/*
@@ -213,7 +219,7 @@ int __init arch_early_irq_init(void)
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg *irq_cfg(unsigned int irq)
{
- return get_irq_chip_data(irq);
+ return irq_get_chip_data(irq);
}
static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
@@ -239,7 +245,7 @@ static void free_irq_cfg(unsigned int at
{
if (!cfg)
return;
- set_irq_chip_data(at, NULL);
+ irq_set_chip_data(at, NULL);
free_cpumask_var(cfg->domain);
free_cpumask_var(cfg->old_domain);
kfree(cfg);
@@ -269,14 +275,14 @@ static struct irq_cfg *alloc_irq_and_cfg
if (res < 0) {
if (res != -EEXIST)
return NULL;
- cfg = get_irq_chip_data(at);
+ cfg = irq_get_chip_data(at);
if (cfg)
return cfg;
}
cfg = alloc_irq_cfg(at, node);
if (cfg)
- set_irq_chip_data(at, cfg);
+ irq_set_chip_data(at, cfg);
else
irq_free_desc(at);
return cfg;
@@ -868,7 +874,7 @@ static int EISA_ELCR(unsigned int irq)
#define default_MCA_trigger(idx) (1)
#define default_MCA_polarity(idx) default_ISA_polarity(idx)
-static int MPBIOS_polarity(int idx)
+static int irq_polarity(int idx)
{
int bus = mp_irqs[idx].srcbus;
int polarity;
@@ -910,7 +916,7 @@ static int MPBIOS_polarity(int idx)
return polarity;
}
-static int MPBIOS_trigger(int idx)
+static int irq_trigger(int idx)
{
int bus = mp_irqs[idx].srcbus;
int trigger;
@@ -982,16 +988,6 @@ static int MPBIOS_trigger(int idx)
return trigger;
}
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
static int pin_2_irq(int idx, int apic, int pin)
{
int irq;
@@ -1244,7 +1240,7 @@ void __setup_vector_irq(int cpu)
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
/*
@@ -1275,10 +1271,6 @@ void __setup_vector_irq(int cpu)
static struct irq_chip ioapic_chip;
static struct irq_chip ir_ioapic_chip;
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
#ifdef CONFIG_X86_32
static inline int IO_APIC_irq_trigger(int irq)
{
@@ -1303,39 +1295,35 @@ static inline int IO_APIC_irq_trigger(in
}
#endif
-static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
+static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
+ unsigned long trigger)
{
+ struct irq_chip *chip = &ioapic_chip;
+ irq_flow_handler_t hdl;
+ bool fasteoi;
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
+ trigger == IOAPIC_LEVEL) {
irq_set_status_flags(irq, IRQ_LEVEL);
- else
+ fasteoi = true;
+ } else {
irq_clear_status_flags(irq, IRQ_LEVEL);
+ fasteoi = false;
+ }
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(cfg)) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (trigger)
- set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
- handle_fasteoi_irq,
- "fasteoi");
- else
- set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
- handle_edge_irq, "edge");
- return;
+ chip = &ir_ioapic_chip;
+ fasteoi = trigger != 0;
}
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq,
- "fasteoi");
- else
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_edge_irq, "edge");
+ hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
+ irq_set_chip_and_handler_name(irq, chip, hdl,
+ fasteoi ? "fasteoi" : "edge");
}
#else /* !CONFIG_XEN */
#define __clear_irq_vector(irq, cfg) ((void)0)
-#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
+#define ioapic_register_intr(irq, cfg, trigger) evtchn_register_pirq(irq)
#endif
static int setup_ioapic_entry(int apic_id, int irq,
@@ -1442,7 +1430,7 @@ static void setup_ioapic_irq(int apic_id
return;
}
- ioapic_register_intr(irq, trigger);
+ ioapic_register_intr(irq, cfg, trigger);
#ifndef CONFIG_XEN
if (irq < legacy_pic->nr_legacy_irqs)
legacy_pic->mask(irq);
@@ -1455,33 +1443,26 @@ static struct {
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
-static void __init setup_IO_APIC_irqs(void)
+static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
{
- int apic_id, pin, idx, irq, notcon = 0;
- int node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ if (idx != -1)
+ return false;
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
+ mp_ioapics[apic_id].apicid, pin);
+ return true;
+}
+
+static void __init __io_apic_setup_irqs(unsigned int apic_id)
+{
+ int idx, node = cpu_to_node(0);
+ struct io_apic_irq_attr attr;
+ unsigned int pin, irq;
- for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
idx = find_irq_entry(apic_id, pin, mp_INT);
- if (idx == -1) {
- if (!notcon) {
- notcon = 1;
- apic_printk(APIC_VERBOSE,
- KERN_DEBUG " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- } else
- apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
+ if (io_apic_pin_not_connected(idx, apic_id, pin))
continue;
- }
- if (notcon) {
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
- notcon = 0;
- }
irq = pin_2_irq(idx, apic_id, pin);
@@ -1497,26 +1478,25 @@ static void __init setup_IO_APIC_irqs(vo
* installed and if it returns 1:
*/
if (apic->multi_timer_check &&
- apic->multi_timer_check(apic_id, irq))
+ apic->multi_timer_check(apic_id, irq))
continue;
#endif
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- continue;
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- add_pin_to_irq_node(cfg, node, apic_id, pin);
- /*
- * don't mark it in pin_programmed, so later acpi could
- * set it correctly when irq < 16
- */
- setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
- irq_polarity(idx));
+ io_apic_setup_irq_pin(irq, node, &attr);
}
+}
- if (notcon)
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
+static void __init setup_IO_APIC_irqs(void)
+{
+ unsigned int apic_id;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
+ __io_apic_setup_irqs(apic_id);
}
/*
@@ -1527,7 +1507,7 @@ static void __init setup_IO_APIC_irqs(vo
void setup_IO_APIC_irq_extra(u32 gsi)
{
int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -1551,21 +1531,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
if (apic_id == 0 || irq < NR_IRQS_LEGACY)
return;
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return;
-
- add_pin_to_irq_node(cfg, node, apic_id, pin);
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[apic_id].apicid, pin);
- return;
- }
- set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
-
- setup_ioapic_irq(apic_id, pin, irq, cfg,
- irq_trigger(idx), irq_polarity(idx));
+ io_apic_setup_irq_pin_once(irq, node, &attr);
}
#ifndef CONFIG_XEN
@@ -1598,7 +1567,8 @@ static void __init setup_timer_IRQ0_pin(
* The timer IRQ doesn't have to know that behind the
* scene we may have a 8259A-master in AEOI mode ...
*/
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
/*
* Add it to the IO-APIC irq-routing table:
@@ -1705,7 +1675,7 @@ __apicdebuginit(void) print_IO_APIC(void
for_each_active_irq(irq) {
struct irq_pin_list *entry;
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
entry = cfg->irq_2_pin;
@@ -1996,7 +1966,7 @@ void disable_IO_APIC(void)
*
* With interrupt-remapping, for now we will use virtual wire A mode,
* as virtual wire B is little complex (need to configure both
- * IOAPIC RTE aswell as interrupt-remapping table entry).
+ * IOAPIC RTE as well as interrupt-remapping table entry).
* As this gets called during crash dump, keep this simple for now.
*/
if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
@@ -2471,7 +2441,7 @@ static void irq_complete_move(struct irq
void irq_force_complete_move(int irq)
{
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
if (!cfg)
return;
@@ -2485,7 +2455,7 @@ static inline void irq_complete_move(str
static void ack_apic_edge(struct irq_data *data)
{
irq_complete_move(data->chip_data);
- move_native_irq(data->irq);
+ irq_move_irq(data);
ack_APIC_irq();
}
@@ -2542,7 +2512,7 @@ static void ack_apic_level(struct irq_da
irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(irqd_is_setaffinity_pending(data))) {
do_unmask_irq = 1;
mask_ioapic(cfg);
}
@@ -2631,7 +2601,7 @@ static void ack_apic_level(struct irq_da
* and you can go talk to the chipset vendor about it.
*/
if (!io_apic_level_ack_pending(cfg))
- move_masked_irq(irq);
+ irq_move_masked_irq(data);
unmask_ioapic(cfg);
}
}
@@ -2699,7 +2669,7 @@ static inline void init_IO_APIC_traps(vo
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
continue;
#endif
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
@@ -2710,7 +2680,7 @@ static inline void init_IO_APIC_traps(vo
legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
- set_irq_chip(irq, &no_irq_chip);
+ irq_set_chip(irq, &no_irq_chip);
}
}
}
@@ -2751,7 +2721,7 @@ static struct irq_chip lapic_chip __read
static void lapic_register_intr(int irq)
{
irq_clear_status_flags(irq, IRQ_LEVEL);
- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
}
@@ -2835,7 +2805,7 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = get_irq_chip_data(0);
+ struct irq_cfg *cfg = irq_get_chip_data(0);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
@@ -3026,7 +2996,7 @@ void __init setup_IO_APIC(void)
}
/*
- * Called after all the initialization is done. If we didnt find any
+ * Called after all the initialization is done. If we didn't find any
* APIC bugs then we can allow the modify fast path
*/
@@ -3048,89 +3018,84 @@ static int __init io_apic_bug_finalize(v
late_initcall(io_apic_bug_finalize);
#ifndef CONFIG_XEN
-struct sysfs_ioapic_data {
- struct sys_device dev;
- struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+static struct IO_APIC_route_entry *ioapic_saved_data[MAX_IO_APICS];
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+static void suspend_ioapic(int ioapic_id)
{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
+ struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
int i;
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
- *entry = ioapic_read_entry(dev->id, i);
+ if (!saved_data)
+ return;
+
+ for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+ saved_data[i] = ioapic_read_entry(ioapic_id, i);
+}
+
+static int ioapic_suspend(void)
+{
+ int ioapic_id;
+
+ for (ioapic_id = 0; ioapic_id < nr_ioapics; ioapic_id++)
+ suspend_ioapic(ioapic_id);
return 0;
}
-static int ioapic_resume(struct sys_device *dev)
+static void resume_ioapic(int ioapic_id)
{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
+ struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
unsigned long flags;
union IO_APIC_reg_00 reg_00;
int i;
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
+ if (!saved_data)
+ return;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(dev->id, 0);
- if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
- reg_00.bits.ID = mp_ioapics[dev->id].apicid;
- io_apic_write(dev->id, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic_id, 0);
+ if (reg_00.bits.ID != mp_ioapics[ioapic_id].apicid) {
+ reg_00.bits.ID = mp_ioapics[ioapic_id].apicid;
+ io_apic_write(ioapic_id, 0, reg_00.raw);
}
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
- ioapic_write_entry(dev->id, i, entry[i]);
+ for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+ ioapic_write_entry(ioapic_id, i, saved_data[i]);
+}
- return 0;
+static void ioapic_resume(void)
+{
+ int ioapic_id;
+
+ for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
+ resume_ioapic(ioapic_id);
}
-static struct sysdev_class ioapic_sysdev_class = {
- .name = "ioapic",
+static struct syscore_ops ioapic_syscore_ops = {
.suspend = ioapic_suspend,
.resume = ioapic_resume,
};
-static int __init ioapic_init_sysfs(void)
+static int __init ioapic_init_ops(void)
{
- struct sys_device * dev;
- int i, size, error;
+ int i;
- error = sysdev_class_register(&ioapic_sysdev_class);
- if (error)
- return error;
+ for (i = 0; i < nr_ioapics; i++) {
+ unsigned int size;
- for (i = 0; i < nr_ioapics; i++ ) {
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+ size = nr_ioapic_registers[i]
* sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
- if (!mp_ioapic_data[i]) {
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- dev = &mp_ioapic_data[i]->dev;
- dev->id = i;
- dev->cls = &ioapic_sysdev_class;
- error = sysdev_register(dev);
- if (error) {
- kfree(mp_ioapic_data[i]);
- mp_ioapic_data[i] = NULL;
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
+ ioapic_saved_data[i] = kzalloc(size, GFP_KERNEL);
+ if (!ioapic_saved_data[i])
+ pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
}
+ register_syscore_ops(&ioapic_syscore_ops);
+
return 0;
}
-device_initcall(ioapic_init_sysfs);
+device_initcall(ioapic_init_ops);
/*
* Dynamic irq allocate and deallocation
@@ -3160,7 +3125,7 @@ unsigned int create_irq_nr(unsigned int
raw_spin_unlock_irqrestore(&vector_lock, flags);
if (ret) {
- set_irq_chip_data(irq, cfg);
+ irq_set_chip_data(irq, cfg);
irq_clear_status_flags(irq, IRQ_NOREQUEST);
} else {
free_irq_at(irq, cfg);
@@ -3185,7 +3150,7 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
unsigned long flags;
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
@@ -3220,7 +3185,7 @@ static int msi_compose_msg(struct pci_de
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(cfg)) {
struct irte irte;
int ir_index;
u16 sub_handle;
@@ -3392,6 +3357,7 @@ static int msi_alloc_irte(struct pci_dev
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
+ struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
int ret;
@@ -3399,14 +3365,15 @@ static int setup_msi_irq(struct pci_dev
if (ret < 0)
return ret;
- set_irq_msi(irq, msidesc);
+ irq_set_msi_desc(irq, msidesc);
write_msi_msg(irq, &msg);
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(irq_get_chip_data(irq))) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
- } else
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+ chip = &msi_ir_chip;
+ }
+
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
@@ -3524,8 +3491,8 @@ int arch_setup_dmar_msi(unsigned int irq
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
- set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
+ irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+ "edge");
return 0;
}
#endif
@@ -3583,6 +3550,7 @@ static struct irq_chip hpet_msi_type = {
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
+ struct irq_chip *chip = &hpet_msi_type;
struct msi_msg msg;
int ret;
@@ -3602,15 +3570,12 @@ int arch_setup_hpet_msi(unsigned int irq
if (ret < 0)
return ret;
- hpet_msi_write(get_irq_data(irq), &msg);
+ hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (irq_remapped(get_irq_chip_data(irq)))
- set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
- handle_edge_irq, "edge");
- else
- set_irq_chip_and_handler_name(irq, &hpet_msi_type,
- handle_edge_irq, "edge");
+ if (irq_remapped(irq_get_chip_data(irq)))
+ chip = &ir_hpet_msi_type;
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
}
#endif
@@ -3697,7 +3662,7 @@ int arch_setup_ht_irq(unsigned int irq,
write_ht_irq_msg(irq, &msg);
- set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+ irq_set_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
@@ -3706,7 +3671,40 @@ int arch_setup_ht_irq(unsigned int irq,
}
#endif /* CONFIG_HT_IRQ */
-int __init io_apic_get_redir_entries (int ioapic)
+static int
+io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
+{
+ struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ int ret;
+
+ if (!cfg)
+ return -EINVAL;
+ ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
+ if (!ret)
+ setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
+ attr->trigger, attr->polarity);
+ return ret;
+}
+
+int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr)
+{
+ unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
+ int ret;
+
+ /* Avoid redundant programming */
+ if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[id].apicid, pin);
+ return 0;
+ }
+ ret = io_apic_setup_irq_pin(irq, node, attr);
+ if (!ret)
+ set_bit(pin, mp_ioapic_routing[id].pin_programmed);
+ return ret;
+}
+
+static int __init io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -3762,105 +3760,32 @@ int __init arch_probe_nr_irqs(void)
#endif
#endif /* CONFIG_XEN */
-static int __io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
{
- struct irq_cfg *cfg;
int node;
- int ioapic, pin;
- int trigger, polarity;
- ioapic = irq_attr->ioapic;
#ifdef CONFIG_XEN
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
- ioapic, irq);
+ irq_attr->ioapic, irq);
return -EINVAL;
}
#endif
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
+ irq_attr->ioapic);
return -EINVAL;
}
- if (dev)
- node = dev_to_node(dev);
- else
- node = cpu_to_node(0);
-
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return 0;
-
- pin = irq_attr->ioapic_pin;
- trigger = irq_attr->trigger;
- polarity = irq_attr->polarity;
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= legacy_pic->nr_legacy_irqs) {
- if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
- printk(KERN_INFO "can not add pin %d for irq %d\n",
- pin, irq);
- return 0;
- }
- }
-
- setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
+ node = dev ? dev_to_node(dev) : cpu_to_node(0);
- return 0;
-}
-
-int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
-{
- int ioapic, pin;
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
- ioapic = irq_attr->ioapic;
- pin = irq_attr->ioapic_pin;
- if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[ioapic].apicid, pin);
- return 0;
- }
- set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
-
- return __io_apic_set_pci_routing(dev, irq, irq_attr);
+ return io_apic_setup_irq_pin_once(irq, node, irq_attr);
}
-u8 __init io_apic_unique_id(u8 id)
-{
#ifdef CONFIG_X86_32
#ifndef CONFIG_XEN
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return io_apic_get_unique_id(nr_ioapics, id);
- else
-#endif
- return id;
-#else
- int i;
- DECLARE_BITMAP(used, 256);
-
- bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
- struct mpc_ioapic *ia = &mp_ioapics[i];
- __set_bit(ia->apicid, used);
- }
- if (!test_bit(id, used))
- return id;
- return find_first_zero_bit(used, 256);
-#endif
-}
-
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int __init io_apic_get_unique_id(int ioapic, int apic_id)
{
union IO_APIC_reg_00 reg_00;
static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3935,7 +3860,34 @@ int __init io_apic_get_unique_id(int ioa
}
#endif
-int __init io_apic_get_version(int ioapic)
+static u8 __init io_apic_unique_id(u8 id)
+{
+#ifndef CONFIG_XEN
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return io_apic_get_unique_id(nr_ioapics, id);
+ else
+#endif
+ return id;
+}
+#else
+static u8 __init io_apic_unique_id(u8 id)
+{
+ int i;
+ DECLARE_BITMAP(used, 256);
+
+ bitmap_zero(used, 256);
+ for (i = 0; i < nr_ioapics; i++) {
+ struct mpc_ioapic *ia = &mp_ioapics[i];
+ __set_bit(ia->apicid, used);
+ }
+ if (!test_bit(id, used))
+ return id;
+ return find_first_zero_bit(used, 256);
+}
+#endif
+
+static int __init io_apic_get_version(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -3981,8 +3933,8 @@ int acpi_get_override_irq(u32 gsi, int *
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
- struct irq_desc *desc;
const struct cpumask *mask;
+ struct irq_data *idata;
if (skip_ioapic_setup == 1)
return;
@@ -3997,21 +3949,20 @@ void __init setup_ioapic_dest(void)
if ((ioapic > 0) && (irq > 16))
continue;
- desc = irq_to_desc(irq);
+ idata = irq_get_irq_data(irq);
/*
* Honour affinities which have been set in early boot
*/
- if (desc->status &
- (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->irq_data.affinity;
+ if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
+ mask = idata->affinity;
else
mask = apic->target_cpus();
if (intr_remapping_enabled)
- ir_ioapic_set_affinity(&desc->irq_data, mask, false);
+ ir_ioapic_set_affinity(idata, mask, false);
else
- ioapic_set_affinity(&desc->irq_data, mask, false);
+ ioapic_set_affinity(idata, mask, false);
}
}
@@ -4140,10 +4091,10 @@ int mp_find_ioapic_pin(int ioapic, u32 g
return gsi - mp_gsi_routing[ioapic].gsi_base;
}
-static int bad_ioapic(unsigned long address)
+static __init int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
+ printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
"(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
return 1;
}
@@ -4203,21 +4154,17 @@ void __init mp_register_ioapic(int id, u
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
physid_set_mask_of_physid(boot_cpu_physical_apicid,
&phys_cpu_present_map);
#endif
- /* Make sure the irq descriptor is set up */
- cfg = alloc_irq_and_cfg_at(0, 0);
-
setup_local_APIC();
- add_pin_to_irq_node(cfg, 0, 0, 0);
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
- setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
+ io_apic_setup_irq_pin(0, 0, &attr);
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
}
#endif
--- head.orig/arch/x86/kernel/cpu/amd.c 2012-05-08 10:52:17.000000000 +0200
+++ head/arch/x86/kernel/cpu/amd.c 2012-05-08 10:52:53.000000000 +0200
@@ -671,6 +671,7 @@ static void __cpuinit init_amd(struct cp
if (c->x86 > 0x11)
set_cpu_cap(c, X86_FEATURE_ARAT);
+#ifndef CONFIG_XEN
/*
* Disable GART TLB Walk Errors on Fam10h. We do this here
* because this is always needed when GART is enabled, even in a
@@ -694,6 +695,7 @@ static void __cpuinit init_amd(struct cp
}
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+#endif
}
#ifdef CONFIG_X86_32
--- head.orig/arch/x86/kernel/cpu/common-xen.c 2011-05-18 10:47:16.000000000 +0200
+++ head/arch/x86/kernel/cpu/common-xen.c 2011-05-18 10:47:21.000000000 +0200
@@ -910,7 +910,7 @@ static void __cpuinit identify_cpu(struc
select_idle_routine(c);
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
}
--- head.orig/arch/x86/kernel/e820-xen.c 2011-04-26 09:19:42.000000000 +0200
+++ head/arch/x86/kernel/e820-xen.c 2011-09-23 16:02:14.000000000 +0200
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
+#include <linux/crash_dump.h>
#include <linux/bootmem.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
@@ -722,21 +723,15 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
+void __init parse_e820_ext(struct setup_data *sdata)
{
- u32 map_len;
int entries;
struct e820entry *extmap;
entries = sdata->len / sizeof(struct e820entry);
- map_len = sdata->len + sizeof(struct setup_data);
- if (map_len > PAGE_SIZE)
- sdata = early_ioremap(pa_data, map_len);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- if (map_len > PAGE_SIZE)
- early_iounmap(sdata, map_len);
printk(KERN_INFO "extended physical RAM map:\n");
_e820_print_map(&e820, "extended");
}
@@ -946,15 +941,23 @@ static int __init parse_memopt(char *p)
if (!p)
return -EINVAL;
-#ifdef CONFIG_X86_32
+#ifndef CONFIG_XEN
if (!strcmp(p, "nopentium")) {
+#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_PSE);
return 0;
+#else
+ printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
+ return -EINVAL;
+#endif
}
#endif
userdef = 1;
mem_size = memparse(p, &p);
+ /* don't remove all of memory when handling "mem={invalid}" param */
+ if (mem_size == 0)
+ return -EINVAL;
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
i = e820.nr_map - 1;
--- head.orig/arch/x86/kernel/entry_32-xen.S 2012-02-29 14:18:49.000000000 +0100
+++ head/arch/x86/kernel/entry_32-xen.S 2012-02-29 14:19:12.000000000 +0100
@@ -66,6 +66,8 @@
#define sysexit_audit syscall_exit_work
#endif
+ .section .entry.text, "ax"
+
/*
* We use macros for low-level operations which need to be overridden
* for paravirtualization. The following will never clobber any registers:
@@ -399,7 +401,7 @@ sysenter_past_esp:
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
@@ -858,7 +860,7 @@ ENDPROC(ptregs_clone)
*/
.section .init.rodata,"a"
ENTRY(interrupt)
-.text
+.section .entry.text, "ax"
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
@@ -877,7 +879,7 @@ vector=FIRST_EXTERNAL_VECTOR
.endif
.previous
.long 1b
- .text
+ .section .entry.text, "ax"
vector=vector+1
.endif
.endr
@@ -1685,11 +1687,10 @@ END(general_protection)
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
RING0_EC_FRAME
- pushl $do_async_page_fault
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_async_page_fault
jmp error_code
CFI_ENDPROC
-END(apf_page_fault)
+END(async_page_fault)
#endif
/*
--- head.orig/arch/x86/kernel/entry_64-xen.S 2011-10-07 11:41:33.000000000 +0200
+++ head/arch/x86/kernel/entry_64-xen.S 2011-10-07 11:41:39.000000000 +0200
@@ -21,7 +21,7 @@
* A note on terminology:
* - top of stack: Architecture defined interrupt frame from SS to RIP
* at the top of the kernel process stack.
- * - partial stack frame: partially saved registers upto R11.
+ * - partial stack frame: partially saved registers up to R11.
* - full stack frame: Like partial stack frame, but all register saved.
*
* Some macro usage:
@@ -66,6 +66,8 @@
#define __AUDIT_ARCH_LE 0x40000000
.code64
+ .section .entry.text, "ax"
+
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
@@ -457,7 +459,7 @@ ENTRY(ret_from_fork)
END(ret_from_fork)
/*
- * System call entry. Upto 6 arguments in registers are supported.
+ * System call entry. Up to 6 arguments in registers are supported.
*
* SYSCALL does not save anything on the stack and does not change the
* stack pointer.
@@ -858,9 +860,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
invalidate_interrupt\idx smp_invalidate_interrupt
+.endif
.endr
#endif
--- head.orig/arch/x86/kernel/head32-xen.c 2011-05-09 11:42:46.000000000 +0200
+++ head/arch/x86/kernel/head32-xen.c 2011-05-09 11:43:03.000000000 +0200
@@ -51,15 +51,6 @@ void __init i386_start_kernel(void)
memblock_init();
-#ifdef CONFIG_X86_TRAMPOLINE
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
-#endif
-
memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifndef CONFIG_XEN
--- head.orig/arch/x86/kernel/head64-xen.c 2011-02-01 15:09:47.000000000 +0100
+++ head/arch/x86/kernel/head64-xen.c 2011-04-12 15:59:10.000000000 +0200
@@ -95,9 +95,6 @@ void __init x86_64_start_kernel(char * r
/* Make NULL pointers segfault */
zap_identity_mappings();
- /* Cleanup the over mapped high alias */
- cleanup_highmap();
-
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
#ifdef CONFIG_EARLY_PRINTK
set_intr_gate(i, &early_idt_handlers[i]);
--- head.orig/arch/x86/kernel/head_32-xen.S 2011-08-09 11:07:43.000000000 +0200
+++ head/arch/x86/kernel/head_32-xen.S 2011-08-09 11:10:21.000000000 +0200
@@ -92,7 +92,7 @@ ENTRY(startup_32)
movl $__HYPERVISOR_update_va_mapping, %eax
int $0x82
- movl $(PAGE_SIZE_asm / 8), %ecx
+ movl $(PAGE_SIZE / 8), %ecx
movl %esp, %ebx
movl $__HYPERVISOR_set_gdt, %eax
int $0x82
@@ -121,7 +121,7 @@ ENTRY(hypercall_page)
* BSS section
*/
__PAGE_ALIGNED_BSS
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(swapper_pg_fixmap)
.fill 1024,4,0
ENTRY(empty_zero_page)
--- head.orig/arch/x86/kernel/ioport-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head/arch/x86/kernel/ioport-xen.c 2011-04-12 16:53:32.000000000 +0200
@@ -14,23 +14,10 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <linux/bitmap.h>
#include <asm/syscalls.h>
#include <xen/interface/physdev.h>
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base,
- unsigned int extent, int new_value)
-{
- unsigned int i;
-
- for (i = base; i < base + extent; i++) {
- if (new_value)
- __set_bit(i, bitmap);
- else
- __clear_bit(i, bitmap);
- }
-}
-
/*
* this changes the io permissions bitmap in the current task.
*/
@@ -65,7 +52,10 @@ asmlinkage long sys_ioperm(unsigned long
&set_iobitmap));
}
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+ if (turn_on)
+ bitmap_clear(t->io_bitmap_ptr, from, num);
+ else
+ bitmap_set(t->io_bitmap_ptr, from, num);
return 0;
}
--- head.orig/arch/x86/kernel/irq-xen.c 2011-02-18 15:17:23.000000000 +0100
+++ head/arch/x86/kernel/irq-xen.c 2011-04-13 17:01:32.000000000 +0200
@@ -8,6 +8,7 @@
#include <linux/seq_file.h>
#include <linux/smp.h>
#include <linux/ftrace.h>
+#include <linux/delay.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
@@ -48,9 +49,9 @@ void ack_bad_irq(unsigned int irq)
#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
- * /proc/interrupts printing:
+ * /proc/interrupts printing for arch specific interrupts
*/
-static int show_other_interrupts(struct seq_file *p, int prec)
+int arch_show_interrupts(struct seq_file *p, int prec)
{
int j;
@@ -135,59 +136,6 @@ static int show_other_interrupts(struct
return 0;
}
-int show_interrupts(struct seq_file *p, void *v)
-{
- unsigned long flags, any_count = 0;
- int i = *(loff_t *) v, j, prec;
- struct irqaction *action;
- struct irq_desc *desc;
-
- if (i > nr_irqs)
- return 0;
-
- for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
- j *= 10;
-
- if (i == nr_irqs)
- return show_other_interrupts(p, prec);
-
- /* print header */
- if (i == 0) {
- seq_printf(p, "%*s", prec + 8, "");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d", j);
- seq_putc(p, '\n');
- }
-
- desc = irq_to_desc(i);
- if (!desc)
- return 0;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- for_each_online_cpu(j)
- any_count |= kstat_irqs_cpu(i, j);
- action = desc->action;
- if (!action && !any_count)
- goto out;
-
- seq_printf(p, "%*d: ", prec, i);
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
- seq_printf(p, " %8s", desc->irq_data.chip->name);
- seq_printf(p, "-%-8s", desc->name);
-
- if (action) {
- seq_printf(p, " %s", action->name);
- while ((action = action->next) != NULL)
- seq_printf(p, ", %s", action->name);
- }
-
- seq_putc(p, '\n');
-out:
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
-}
-
/*
* /proc/stat helpers
*/
@@ -295,15 +243,6 @@ void smp_x86_platform_ipi(struct pt_regs
}
#endif
-#ifdef CONFIG_OF
-unsigned int irq_create_of_mapping(struct device_node *controller,
- const u32 *intspec, unsigned int intsize)
-{
- return intspec[0];
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
#include <xen/evtchn.h>
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
@@ -313,6 +252,7 @@ void fixup_irqs(void)
static int warned;
struct irq_desc *desc;
struct irq_data *data;
+ struct irq_chip *chip;
static DECLARE_BITMAP(irqs_used, NR_IRQS);
for_each_irq_desc(irq, desc) {
@@ -328,7 +268,7 @@ void fixup_irqs(void)
/* interrupt's are disabled at this point */
raw_spin_lock(&desc->lock);
- data = &desc->irq_data;
+ data = irq_desc_get_irq_data(desc);
affinity = data->affinity;
if (!irq_has_action(irq) ||
cpumask_subset(affinity, cpu_online_mask)) {
@@ -344,16 +284,17 @@ void fixup_irqs(void)
affinity = cpu_all_mask;
}
- if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
- data->chip->irq_mask(data);
+ chip = irq_data_get_irq_chip(data);
+ if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
+ chip->irq_mask(data);
- if (data->chip->irq_set_affinity)
- data->chip->irq_set_affinity(data, affinity, true);
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(data, affinity, true);
else if (data->chip != &no_irq_chip && !(warned++))
set_affinity = 0;
- if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
- data->chip->irq_unmask(data);
+ if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
+ chip->irq_unmask(data);
raw_spin_unlock(&desc->lock);
@@ -380,10 +321,11 @@ void fixup_irqs(void)
if (xen_test_irq_pending(irq)) {
desc = irq_to_desc(irq);
- data = &desc->irq_data;
+ data = irq_desc_get_irq_data(desc);
+ chip = irq_data_get_irq_chip(data);
raw_spin_lock(&desc->lock);
- if (data->chip->irq_retrigger)
- data->chip->irq_retrigger(data);
+ if (chip->irq_retrigger)
+ chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
}
--- head.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 16:09:24.000000000 +0100
+++ head/arch/x86/kernel/mpparse-xen.c 2011-04-12 15:59:10.000000000 +0200
@@ -752,10 +752,6 @@ static void __init check_irq_src(struct
*nr_m_spare += 1;
}
}
-#else /* CONFIG_X86_IO_APIC */
-static
-inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
-#endif /* CONFIG_X86_IO_APIC */
static int
check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
@@ -769,6 +765,10 @@ check_slot(unsigned long mpc_new_phys, u
return ret;
}
+#else /* CONFIG_X86_IO_APIC */
+static
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
+#endif /* CONFIG_X86_IO_APIC */
static int __init replace_intsrc_all(struct mpc_table *mpc,
unsigned long mpc_new_phys,
@@ -921,7 +921,7 @@ static int __init update_mp_table(void)
if (!mpc_new_phys) {
unsigned char old, new;
- /* check if we can change the postion */
+ /* check if we can change the position */
mpc->checksum = 0;
old = mpf_checksum((unsigned char *)mpc, mpc->length);
mpc->checksum = 0xff;
@@ -930,7 +930,7 @@ static int __init update_mp_table(void)
printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
return 0;
}
- printk(KERN_INFO "use in-positon replacing\n");
+ printk(KERN_INFO "use in-position replacing\n");
} else {
maddr_t mpc_new_bus;
--- head.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:13:18.000000000 +0100
+++ head/arch/x86/kernel/process-xen.c 2011-04-13 17:01:32.000000000 +0200
@@ -89,7 +89,7 @@ void exit_thread(void)
void show_regs(struct pt_regs *regs)
{
show_registers(regs);
- show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
+ show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
}
void show_regs_common(void)
@@ -112,12 +112,9 @@ void show_regs_common(void)
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk(KERN_CONT " ");
- printk(KERN_CONT "%s %s", vendor, product);
- if (board) {
- printk(KERN_CONT "/");
- printk(KERN_CONT "%s", board);
- }
+ printk(KERN_CONT " %s %s", vendor, product);
+ if (board)
+ printk(KERN_CONT "/%s", board);
printk(KERN_CONT "\n");
}
--- head.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:48:24.000000000 +0100
+++ head/arch/x86/kernel/process_64-xen.c 2011-04-12 15:59:10.000000000 +0200
@@ -569,6 +569,10 @@ void set_personality_64bit(void)
/* Make sure to be in 64bit mode */
clear_thread_flag(TIF_IA32);
+ /* Ensure the corresponding mm is not marked. */
+ if (current->mm)
+ current->mm->context.ia32_compat = 0;
+
/* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way,
so it's not too bad. The main problem is just that
@@ -584,6 +588,10 @@ void set_personality_ia32(void)
set_thread_flag(TIF_IA32);
current->personality |= force_personality32;
+ /* Mark the associated mm as containing 32-bit tasks. */
+ if (current->mm)
+ current->mm->context.ia32_compat = 1;
+
/* Prepare the first "return" to user space */
current_thread_info()->status |= TS_COMPAT;
}
--- head.orig/arch/x86/kernel/setup-xen.c 2012-06-08 10:35:55.000000000 +0200
+++ head/arch/x86/kernel/setup-xen.c 2012-06-08 10:36:10.000000000 +0200
@@ -113,6 +113,7 @@
#endif
#include <asm/mce.h>
#include <asm/alternative.h>
+#include <asm/prom.h>
#ifdef CONFIG_XEN
#include <asm/hypervisor.h>
@@ -332,6 +333,9 @@ static void __init init_gbpages(void)
static inline void init_gbpages(void)
{
}
+static void __init cleanup_highmap(void)
+{
+}
#endif
static void __init reserve_brk(void)
@@ -486,16 +490,30 @@ static void __init parse_setup_data(void
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = early_memremap(pa_data, PAGE_SIZE);
+ u32 data_len, map_len;
+
+ map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
+ (u64)sizeof(struct setup_data));
+ data = early_memremap(pa_data, map_len);
+ data_len = data->len + sizeof(struct setup_data);
+ if (data_len > map_len) {
+ early_iounmap(data, map_len);
+ data = early_memremap(pa_data, data_len);
+ map_len = data_len;
+ }
+
switch (data->type) {
case SETUP_E820_EXT:
- parse_e820_ext(data, pa_data);
+ parse_e820_ext(data);
+ break;
+ case SETUP_DTB:
+ add_dtb(pa_data);
break;
default:
break;
}
pa_data = data->next;
- early_iounmap(data, PAGE_SIZE);
+ early_iounmap(data, map_len);
}
#endif
}
@@ -669,28 +687,6 @@ void __init reserve_standard_io_resource
}
-/*
- * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
- * is_kdump_kernel() to determine if we are booting after a panic. Hence
- * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
- */
-
-#ifdef CONFIG_CRASH_DUMP
-/* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel. This option will be passed
- * by kexec loader to the capture kernel.
- */
-static int __init setup_elfcorehdr(char *arg)
-{
- char *end;
- if (!arg)
- return -EINVAL;
- elfcorehdr_addr = memparse(arg, &end);
- return end > arg ? 0 : -EINVAL;
-}
-early_param("elfcorehdr", setup_elfcorehdr);
-#endif
-
static __init void reserve_ibft_region(void)
{
unsigned long addr, size = 0;
@@ -752,15 +748,6 @@ static int __init parse_reservelow(char
early_param("reservelow", parse_reservelow);
#endif
-static u64 __init get_max_mapped(void)
-{
- u64 end = max_pfn_mapped;
-
- end <<= PAGE_SHIFT;
-
- return end;
-}
-
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -776,11 +763,8 @@ static u64 __init get_max_mapped(void)
void __init setup_arch(char **cmdline_p)
{
- int acpi = 0;
- int amd = 0;
unsigned long flags;
#ifdef CONFIG_XEN
- unsigned int i;
unsigned long p2m_pages;
struct physdev_set_iopl set_iopl;
@@ -1047,6 +1031,8 @@ void __init setup_arch(char **cmdline_p)
*/
reserve_brk();
+ cleanup_highmap();
+
memblock.current_limit = get_max_mapped();
memblock_x86_fill();
@@ -1060,15 +1046,10 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
- reserve_trampoline_memory();
-
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- * even before init_memory_mapping
- */
- acpi_reserve_wakeup_memory();
+#ifndef CONFIG_XEN
+ setup_trampolines();
#endif
+
init_gbpages();
/* max_pfn_mapped is updated here */
@@ -1118,19 +1099,7 @@ void __init setup_arch(char **cmdline_p)
early_acpi_boot_init();
-#ifdef CONFIG_ACPI_NUMA
- /*
- * Parse SRAT to discover nodes.
- */
- acpi = acpi_numa_init();
-#endif
-
-#ifdef CONFIG_AMD_NUMA
- if (!acpi)
- amd = !amd_numa_init(0, max_pfn);
-#endif
-
- initmem_init(0, max_pfn, acpi, amd);
+ initmem_init();
memblock_find_dma_reserve();
dma32_reserve_bootmem();
@@ -1142,6 +1111,11 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+ if (boot_cpu_data.cpuid_level >= 0) {
+ /* A CPU has %cr4 if and only if it has CPUID */
+ mmu_cr4_features = read_cr4();
+ }
+
#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
@@ -1224,10 +1198,14 @@ void __init setup_arch(char **cmdline_p)
virt_to_mfn(pfn_to_mfn_frame_list_list);
}
+#ifdef CONFIG_ISA_DMA_API
+# define ch p2m_pages
/* Mark all ISA DMA channels in-use - using them wouldn't work. */
- for (i = 0; i < MAX_DMA_CHANNELS; ++i)
- if (i != 4 && request_dma(i, "xen") != 0)
+ for (ch = 0; ch < MAX_DMA_CHANNELS; ++ch)
+ if (ch != 4 && request_dma(ch, "xen") != 0)
BUG();
+# undef ch
+#endif
#else /* CONFIG_XEN */
generic_apic_probe();
@@ -1238,8 +1216,8 @@ void __init setup_arch(char **cmdline_p)
* Read APIC and some other early information from ACPI tables.
*/
acpi_boot_init();
-
sfi_init();
+ x86_dtb_init();
/*
* get boot-time SMP configuration:
@@ -1249,9 +1227,7 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
-#ifdef CONFIG_X86_64
init_cpu_to_node();
-#endif
#ifndef CONFIG_XEN
init_apic_mappings();
@@ -1289,6 +1265,8 @@ void __init setup_arch(char **cmdline_p)
#endif
x86_init.oem.banner();
+ x86_init.timers.wallclock_init();
+
mcheck_init();
local_irq_save(flags);
--- head.orig/arch/x86/kernel/setup_percpu.c 2012-05-23 13:34:18.000000000 +0200
+++ head/arch/x86/kernel/setup_percpu.c 2012-05-23 13:38:58.000000000 +0200
@@ -231,7 +231,8 @@ void __init setup_per_cpu_areas(void)
* are zeroed indicating that the static arrays are
* gone.
*/
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
+#ifndef CONFIG_XEN
+#ifdef CONFIG_X86_LOCAL_APIC
per_cpu(x86_cpu_to_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
@@ -241,6 +242,7 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_cpu_to_logical_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
+#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
@@ -268,13 +270,15 @@ void __init setup_per_cpu_areas(void)
}
/* indicate the early static arrays will soon be gone */
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
+#ifndef CONFIG_XEN
+#ifdef CONFIG_X86_LOCAL_APIC
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
#ifdef CONFIG_X86_32
early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
#endif
+#endif
#ifdef CONFIG_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
--- head.orig/arch/x86/kernel/time-xen.c 2012-02-10 13:29:07.000000000 +0100
+++ head/arch/x86/kernel/time-xen.c 2012-02-10 13:29:28.000000000 +0100
@@ -19,6 +19,9 @@
#include <linux/cpufreq.h>
#include <linux/clocksource.h>
+extern seqlock_t xtime_lock;
+extern void do_timer(unsigned long ticks);
+
#include <asm/vsyscall.h>
#include <asm/delay.h>
#include <asm/time.h>
@@ -269,19 +272,14 @@ static void sync_xen_wallclock(unsigned
static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
static void sync_xen_wallclock(unsigned long dummy)
{
- struct timespec now;
- unsigned long seq;
+ struct timespec now, ignore;
struct xen_platform_op op;
BUG_ON(!is_initial_xendomain());
if (!ntp_synced() || independent_wallclock)
return;
- do {
- seq = read_seqbegin(&xtime_lock);
- now = __current_kernel_time();
- } while (read_seqretry(&xtime_lock, seq));
-
+ get_xtime_and_monotonic_and_sleep_offset(&now, &ignore, &ignore);
set_normalized_timespec(&now, now.tv_sec, now.tv_nsec);
op.cmd = XENPF_settime;
--- head.orig/arch/x86/kernel/x86_init-xen.c 2011-07-11 13:00:38.000000000 +0200
+++ head/arch/x86/kernel/x86_init-xen.c 2011-07-11 13:01:21.000000000 +0200
@@ -59,6 +59,10 @@ struct x86_init_ops x86_init __initdata
.banner = x86_init_noop,
},
+ .mapping = {
+ .pagetable_reserve = xen_pagetable_reserve,
+ },
+
.paging = {
.pagetable_setup_start = x86_init_pgd_noop,
.pagetable_setup_done = x86_init_pgd_noop,
@@ -68,6 +72,7 @@ struct x86_init_ops x86_init __initdata
.setup_percpu_clockev = NULL,
.tsc_pre_init = x86_init_noop,
.timer_init = x86_init_noop,
+ .wallclock_init = x86_init_noop,
},
.iommu = {
--- head.orig/arch/x86/mm/init-xen.c 2011-02-01 15:41:35.000000000 +0100
+++ head/arch/x86/mm/init-xen.c 2011-05-23 11:32:36.000000000 +0200
@@ -19,9 +19,9 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-unsigned long __meminitdata e820_table_start;
-unsigned long __meminitdata e820_table_end;
-unsigned long __meminitdata e820_table_top;
+unsigned long __meminitdata pgt_buf_start;
+unsigned long __meminitdata pgt_buf_end;
+unsigned long __meminitdata pgt_buf_top;
int after_bootmem;
@@ -72,21 +72,14 @@ static void __init find_early_table_spac
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
-#ifdef CONFIG_X86_32
- e820_table_start = extend_init_mapping(tables);
- e820_table_end = e820_table_start;
+ pgt_buf_start = extend_init_mapping(tables);
+ pgt_buf_end = pgt_buf_start;
#else /* CONFIG_X86_64 */
- if (!e820_table_top) {
- e820_table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
+ if (!pgt_buf_top) {
+ pgt_buf_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
xen_start_info->nr_pt_frames;
- e820_table_end = e820_table_start;
+ pgt_buf_end = pgt_buf_start;
} else {
/*
* [table_start, table_top) gets passed to reserve_early(),
@@ -94,16 +87,21 @@ static void __init find_early_table_spac
* to allocate from there. table_end possibly being below
* table_start is otoh not a problem.
*/
- e820_table_start = e820_table_top;
+ pgt_buf_start = pgt_buf_top;
}
#endif
- if (e820_table_start == -1UL)
+ if (pgt_buf_start == -1UL)
panic("Cannot find space for the kernel page tables");
- e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
+ pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
+ end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
+}
+
+void __init xen_pagetable_reserve(u64 start, u64 end)
+{
+ memblock_x86_reserve_range(start, end, "PGTABLE");
}
struct map_range {
@@ -303,7 +301,7 @@ unsigned long __init_refok init_memory_m
}
/* Blow away any spurious initial mappings. */
- va = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT);
+ va = __START_KERNEL_map + (pgt_buf_start << PAGE_SHIFT);
addr = page[pgd_index(va)];
page = addr_to_page(addr);
@@ -329,20 +327,35 @@ unsigned long __init_refok init_memory_m
#endif
#ifdef CONFIG_X86_64
- BUG_ON(e820_table_end > e820_table_top);
+ BUG_ON(pgt_buf_end > pgt_buf_top);
if (!start)
xen_finish_init_mapping();
else
#endif
- if (e820_table_end < e820_table_top)
+ if (pgt_buf_end < pgt_buf_top)
/* Disable the 'table_end' allocator. */
- e820_table_top = e820_table_end;
+ pgt_buf_top = pgt_buf_end;
__flush_tlb_all();
- if (!after_bootmem && e820_table_top > e820_table_start)
- memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
- e820_table_top << PAGE_SHIFT, "PGTABLE");
+ /*
+ * Reserve the kernel pagetable pages we used (pgt_buf_start -
+ * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+ * so that they can be reused for other purposes.
+ *
+ * On native it just means calling memblock_x86_reserve_range, on Xen it
+ * also means marking RW the pagetable pages that we allocated before
+ * but that haven't been used.
+ *
+ * In fact on xen we mark RO the whole range pgt_buf_start -
+ * pgt_buf_top, because we have to make sure that when
+ * init_memory_mapping reaches the pagetable pages area, it maps
+ * RO all the pagetable pages, including the ones that are beyond
+ * pgt_buf_end at that time.
+ */
+ if (!after_bootmem && pgt_buf_top > pgt_buf_start)
+ x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+ PFN_PHYS(pgt_buf_top));
if (!after_bootmem)
early_memtest(start, end);
--- head.orig/arch/x86/mm/init_32-xen.c 2011-02-01 15:41:35.000000000 +0100
+++ head/arch/x86/mm/init_32-xen.c 2011-04-13 17:01:32.000000000 +0200
@@ -65,10 +65,10 @@ bool __read_mostly __vmalloc_start_set =
static __init void *alloc_low_page(void)
{
- unsigned long pfn = e820_table_end++;
+ unsigned long pfn = pgt_buf_end++;
void *adr;
- if (pfn >= e820_table_top)
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
@@ -173,8 +173,8 @@ static pte_t *__init page_table_kmap_che
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
- && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
- || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
+ && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
+ || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
pte_t *newpte;
int i;
@@ -646,8 +646,7 @@ void __init find_low_pfn_range(void)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
@@ -986,7 +985,7 @@ static void mark_nxdata_nx(void)
{
/*
* When this called, init has already been executed and released,
- * so everything past _etext sould be NX.
+ * so everything past _etext should be NX.
*/
unsigned long start = PFN_ALIGN(_etext);
/*
--- head.orig/arch/x86/mm/init_64-xen.c 2011-03-17 14:22:21.000000000 +0100
+++ head/arch/x86/mm/init_64-xen.c 2011-11-03 12:51:20.000000000 +0100
@@ -210,9 +210,9 @@ static __ref void *spp_getpage(void)
if (after_bootmem)
ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
- else if (e820_table_end < e820_table_top) {
- ptr = __va(e820_table_end << PAGE_SHIFT);
- e820_table_end++;
+ else if (pgt_buf_end < pgt_buf_top) {
+ ptr = __va(pgt_buf_end << PAGE_SHIFT);
+ pgt_buf_end++;
clear_page(ptr);
} else
ptr = alloc_bootmem_pages(PAGE_SIZE);
@@ -369,18 +369,18 @@ void __init init_extra_mapping_uc(unsign
* to the compile time generated pmds. This results in invalid pmds up
* to the point where we hit the physaddr 0 mapping.
*
- * We limit the mappings to the region from _text to _end. _end is
- * rounded up to the 2MB boundary. This catches the invalid pmds as
+ * We limit the mappings to the region from _text to _brk_end. _brk_end
+ * is rounded up to the 2MB boundary. This catches the invalid pmds as
* well, as they are located before _text:
*/
void __init cleanup_highmap(void)
{
unsigned long vaddr = __START_KERNEL_map;
- unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
+ unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+ unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
pmd_t *pmd = level2_kernel_pgt;
- pmd_t *last_pmd = pmd + PTRS_PER_PMD;
- for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
+ for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
if (pmd_none(*pmd))
continue;
if (vaddr < (unsigned long) _text || vaddr > end)
@@ -401,9 +401,9 @@ static __ref void *alloc_low_page(unsign
return adr;
}
- BUG_ON(!e820_table_end);
- pfn = e820_table_end++;
- if (pfn >= e820_table_top)
+ BUG_ON(!pgt_buf_end);
+ pfn = pgt_buf_end++;
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -412,12 +412,28 @@ static __ref void *alloc_low_page(unsign
return adr;
}
+static __ref void *map_low_page(void *virt)
+{
+ void *adr;
+ unsigned long phys, left;
+
+ if (after_bootmem)
+ return virt;
+
+ phys = __pa(virt);
+ left = phys & (PAGE_SIZE - 1);
+ adr = early_memremap_ro(phys & PAGE_MASK, PAGE_SIZE);
+ adr = (void *)(((unsigned long)adr) | left);
+
+ return adr;
+}
+
static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
- early_iounmap(adr, PAGE_SIZE);
+ early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
}
static inline int __meminit make_readonly(unsigned long paddr)
@@ -428,13 +444,13 @@ static inline int __meminit make_readonl
/* Make new page tables read-only on the first pass. */
if (!xen_feature(XENFEAT_writable_page_tables)
&& !max_pfn_mapped
- && (paddr >= (e820_table_start << PAGE_SHIFT))
- && (paddr < (e820_table_top << PAGE_SHIFT)))
+ && (paddr >= (pgt_buf_start << PAGE_SHIFT))
+ && (paddr < (pgt_buf_top << PAGE_SHIFT)))
readonly = 1;
/* Make old page tables read-only. */
if (!xen_feature(XENFEAT_writable_page_tables)
&& (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
- && (paddr < (e820_table_end << PAGE_SHIFT)))
+ && (paddr < (pgt_buf_end << PAGE_SHIFT)))
readonly = 1;
/*
@@ -503,16 +519,6 @@ phys_pte_init(pte_t *pte_page, unsigned
}
static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
- pgprot_t prot)
-{
- pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-
- BUG_ON(!max_pfn_mapped);
- return phys_pte_init(pte, address, end, prot);
-}
-
-static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
{
@@ -533,8 +539,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
if (__pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
- last_map_addr = phys_pte_update(pmd, address,
+ pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+ last_map_addr = phys_pte_init(pte, address,
end, prot);
+ unmap_low_page(pte);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -576,9 +584,15 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
if (max_pfn_mapped)
make_page_readonly(__va(pte_phys),
XENFEAT_writable_page_tables);
- if (page_size_mask & (1 << PG_LEVEL_NUM))
- xen_l2_entry_update(pmd, __pmd(pte_phys | _PAGE_TABLE));
- else
+ if (page_size_mask & (1 << PG_LEVEL_NUM)) {
+ mmu_update_t u;
+
+ u.ptr = arbitrary_virt_to_machine(pmd);
+ u.val = phys_to_machine(pte_phys) | _PAGE_TABLE;
+ if (HYPERVISOR_mmu_update(&u, 1, NULL,
+ DOMID_SELF) < 0)
+ BUG();
+ } else
*pmd = __pmd(pte_phys | _PAGE_TABLE);
} else {
make_page_readonly(pte, XENFEAT_writable_page_tables);
@@ -592,21 +606,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
}
static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
- unsigned long page_size_mask, pgprot_t prot)
-{
- pmd_t *pmd = pmd_offset(pud, 0);
- unsigned long last_map_addr;
-
- BUG_ON(!max_pfn_mapped);
- last_map_addr = phys_pmd_init(pmd, address, end,
- page_size_mask | (1 << PG_LEVEL_NUM),
- prot);
- __flush_tlb_all();
- return last_map_addr;
-}
-
-static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
{
@@ -625,8 +624,12 @@ phys_pud_init(pud_t *pud_page, unsigned
if (__pud_val(*pud)) {
if (!pud_large(*pud)) {
- last_map_addr = phys_pmd_update(pud, addr, end,
- page_size_mask, prot);
+ pmd = map_low_page(pmd_offset(pud, 0));
+ last_map_addr = phys_pmd_init(pmd, addr, end,
+ page_size_mask | (1 << PG_LEVEL_NUM),
+ prot);
+ unmap_low_page(pmd);
+ __flush_tlb_all();
continue;
}
/*
@@ -668,9 +671,15 @@ phys_pud_init(pud_t *pud_page, unsigned
if (max_pfn_mapped)
make_page_readonly(__va(pmd_phys),
XENFEAT_writable_page_tables);
- if (page_size_mask & (1 << PG_LEVEL_NUM))
- xen_l3_entry_update(pud, __pud(pmd_phys | _PAGE_TABLE));
- else
+ if (page_size_mask & (1 << PG_LEVEL_NUM)) {
+ mmu_update_t u;
+
+ u.ptr = arbitrary_virt_to_machine(pud);
+ u.val = phys_to_machine(pmd_phys) | _PAGE_TABLE;
+ if (HYPERVISOR_mmu_update(&u, 1, NULL,
+ DOMID_SELF) < 0)
+ BUG();
+ } else
*pud = __pud(pmd_phys | _PAGE_TABLE);
} else {
make_page_readonly(pmd, XENFEAT_writable_page_tables);
@@ -686,17 +695,6 @@ phys_pud_init(pud_t *pud_page, unsigned
return last_map_addr;
}
-static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
- unsigned long page_size_mask)
-{
- pud_t *pud;
-
- pud = (pud_t *)pgd_page_vaddr(*pgd);
-
- return phys_pud_init(pud, addr, end, page_size_mask | (1 << PG_LEVEL_NUM));
-}
-
void __init xen_init_pt(void)
{
unsigned long addr, *page;
@@ -799,15 +797,15 @@ void __init xen_finish_init_mapping(void
/* Destroy the Xen-created mappings beyond the kernel image. */
start = PAGE_ALIGN(_brk_end);
- end = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT);
+ end = __START_KERNEL_map + (pgt_buf_start << PAGE_SHIFT);
for (; start < end; start += PAGE_SIZE)
if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0))
BUG();
- WARN(e820_table_end != e820_table_top, "start=%lx cur=%lx top=%lx\n",
- e820_table_start, e820_table_end, e820_table_top);
- if (e820_table_end > e820_table_top)
- e820_table_top = e820_table_end;
+ WARN(pgt_buf_end != pgt_buf_top, "start=%lx cur=%lx top=%lx\n",
+ pgt_buf_start, pgt_buf_end, pgt_buf_top);
+ if (pgt_buf_end > pgt_buf_top)
+ pgt_buf_top = pgt_buf_end;
}
unsigned long __meminit
@@ -833,8 +831,10 @@ kernel_physical_mapping_init(unsigned lo
next = end;
if (__pgd_val(*pgd)) {
- last_map_addr = phys_pud_update(pgd, __pa(start),
- __pa(end), page_size_mask);
+ pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+ last_map_addr = phys_pud_init(pud, __pa(start),
+ __pa(end), page_size_mask | (1 << PG_LEVEL_NUM));
+ unmap_low_page(pud);
continue;
}
@@ -864,14 +864,13 @@ kernel_physical_mapping_init(unsigned lo
}
#ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
- memblock_x86_register_active_regions(0, start_pfn, end_pfn);
+ memblock_x86_register_active_regions(0, 0, max_pfn);
#ifdef CONFIG_XEN
- if (end_pfn > xen_start_info->nr_pages)
+ if (max_pfn > xen_start_info->nr_pages)
memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT, "BALLOON");
+ max_pfn << PAGE_SHIFT, "BALLOON");
#endif
}
#endif
@@ -1149,18 +1148,18 @@ static struct vm_area_struct gate_vma =
.vm_flags = VM_READ | VM_EXEC
};
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(tsk, TIF_IA32))
+ if (!mm || mm->context.ia32_compat)
return NULL;
#endif
return &gate_vma;
}
-int in_gate_area(struct task_struct *task, unsigned long addr)
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
- struct vm_area_struct *vma = get_gate_vma(task);
+ struct vm_area_struct *vma = get_gate_vma(mm);
if (!vma)
return 0;
@@ -1169,11 +1168,11 @@ int in_gate_area(struct task_struct *tas
}
/*
- * Use this when you have no reliable task/vma, typically from interrupt
- * context. It is less reliable than using the task's vma and may give
- * false positives:
+ * Use this when you have no reliable mm, typically from interrupt
+ * context. It is less reliable than using a task's mm and may give
+ * false positives.
*/
-int in_gate_area_no_task(unsigned long addr)
+int in_gate_area_no_mm(unsigned long addr)
{
return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
}
@@ -1187,6 +1186,19 @@ const char *arch_vma_name(struct vm_area
return NULL;
}
+#ifdef CONFIG_X86_UV
+#define MIN_MEMORY_BLOCK_SIZE (1 << SECTION_SIZE_BITS)
+
+unsigned long memory_block_size_bytes(void)
+{
+ if (is_uv_system()) {
+ printk(KERN_INFO "UV: memory block size 2GB\n");
+ return 2UL * 1024 * 1024 * 1024;
+ }
+ return MIN_MEMORY_BLOCK_SIZE;
+}
+#endif
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* Initialise the sparsemem vmemmap using huge-pages at the PMD level.
--- head.orig/arch/x86/mm/ioremap-xen.c 2011-05-09 11:42:49.000000000 +0200
+++ head/arch/x86/mm/ioremap-xen.c 2011-05-09 11:42:57.000000000 +0200
@@ -758,6 +758,12 @@ early_memremap(resource_size_t phys_addr
return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL);
}
+void __init __iomem *
+early_memremap_ro(resource_size_t phys_addr, unsigned long size)
+{
+ return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL_RO);
+}
+
void __init early_iounmap(void __iomem *addr, unsigned long size)
{
unsigned long virt_addr;
--- head.orig/arch/x86/mm/pageattr-xen.c 2011-03-23 10:10:15.000000000 +0100
+++ head/arch/x86/mm/pageattr-xen.c 2011-04-13 17:01:32.000000000 +0200
@@ -310,7 +310,7 @@ static inline pgprot_t static_protection
* these shared mappings are made of small page mappings.
* Thus this don't enforce !RW mapping for small page kernel
* text mapping logic will help Linux Xen parvirt guest boot
- * aswell.
+ * as well.
*/
if (lookup_address(address, &level) && (level != PG_LEVEL_4K))
pgprot_val(forbidden) |= _PAGE_RW;
--- head.orig/arch/x86/mm/pgtable-xen.c 2011-03-17 14:26:03.000000000 +0100
+++ head/arch/x86/mm/pgtable-xen.c 2011-04-12 15:59:10.000000000 +0200
@@ -528,8 +528,7 @@ void pud_populate(struct mm_struct *mm,
* section 8.1: in PAE mode we explicitly have to flush the
* TLB via cr3 if the top-level pgd is changed...
*/
- if (mm == current->active_mm)
- xen_tlb_flush();
+ flush_tlb_mm(mm);
}
#else /* !CONFIG_X86_PAE */
--- head.orig/arch/x86/vdso/vdso32-setup-xen.c 2012-02-29 14:18:20.000000000 +0100
+++ head/arch/x86/vdso/vdso32-setup-xen.c 2012-02-29 14:19:15.000000000 +0100
@@ -465,24 +465,25 @@ const char *arch_vma_name(struct vm_area
return NULL;
}
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
- struct mm_struct *mm = tsk->mm;
-
- /* Check to see if this task was created in compat vdso mode */
+ /*
+ * Check to see if the corresponding task was created in compat vdso
+ * mode.
+ */
if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
return &gate_vma;
return NULL;
}
-int in_gate_area(struct task_struct *task, unsigned long addr)
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
- const struct vm_area_struct *vma = get_gate_vma(task);
+ const struct vm_area_struct *vma = get_gate_vma(mm);
return vma && addr >= vma->vm_start && addr < vma->vm_end;
}
-int in_gate_area_no_task(unsigned long addr)
+int in_gate_area_no_mm(unsigned long addr)
{
return 0;
}
--- head.orig/drivers/net/Kconfig 2012-02-08 12:16:22.000000000 +0100
+++ head/drivers/net/Kconfig 2012-02-08 12:54:02.000000000 +0100
@@ -306,15 +306,15 @@ config PARAVIRT_XEN_NETDEV_FRONTEND
domain 0).
The corresponding Linux backend driver is enabled by the
- CONFIG_XEN_NETDEV_BACKEND option.
+ PARAVIRT_XEN_NETDEV_BACKEND option.
If you are compiling a kernel for use as Xen guest, you
should say Y here. To compile this driver as a module, chose
M here: the module will be called xen-netfront.
-config XEN_NETDEV_BACKEND
+config PARAVIRT_XEN_NETDEV_BACKEND
tristate "Xen backend network device"
- depends on XEN_BACKEND
+ depends on PARAVIRT_XEN_BACKEND
help
This driver allows the kernel to act as a Xen network driver
domain which exports paravirtual network devices to other
@@ -322,7 +322,7 @@ config XEN_NETDEV_BACKEND
system that implements a compatible front end.
The corresponding Linux frontend driver is enabled by the
- CONFIG_XEN_NETDEV_FRONTEND configuration option.
+ PARAVIRT_XEN_NETDEV_FRONTEND configuration option.
The backend driver presents a standard network device
endpoint for each paravirtual network device to the driver
--- head.orig/drivers/net/Makefile 2012-02-08 11:35:14.000000000 +0100
+++ head/drivers/net/Makefile 2012-02-08 12:54:12.000000000 +0100
@@ -57,7 +57,7 @@ obj-$(CONFIG_WIMAX) += wimax/
obj-$(CONFIG_VMXNET3) += vmxnet3/
obj-$(CONFIG_PARAVIRT_XEN_NETDEV_FRONTEND) += xen-netfront.o
-obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
+obj-$(CONFIG_PARAVIRT_XEN_NETDEV_BACKEND) += xen-netback/
obj-$(CONFIG_USB_CATC) += usb/
obj-$(CONFIG_USB_KAWETH) += usb/
--- head.orig/drivers/net/xen-netback/Makefile 2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/net/xen-netback/Makefile 2011-04-13 14:42:19.000000000 +0200
@@ -1,3 +1,3 @@
-obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
+obj-$(CONFIG_PARAVIRT_XEN_NETDEV_BACKEND) := xen-netback.o
xen-netback-y := netback.o xenbus.o interface.o
--- head.orig/drivers/watchdog/Kconfig 2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/watchdog/Kconfig 2012-04-10 17:11:45.000000000 +0200
@@ -1184,7 +1184,7 @@ config WATCHDOG_RIO
config XEN_WDT
tristate "Xen Watchdog support"
- depends on XEN
+ depends on XEN || PARAVIRT_XEN
help
Say Y here to support the hypervisor watchdog capability provided
by Xen 4.0 and newer. The watchdog timeout period is normally one
--- head.orig/drivers/watchdog/xen_wdt.c 2012-06-06 13:23:56.000000000 +0200
+++ head/drivers/watchdog/xen_wdt.c 2012-04-10 17:11:39.000000000 +0200
@@ -1,7 +1,8 @@
/*
* Xen Watchdog Driver
*
- * (c) Copyright 2010 Novell, Inc.
+ * (c) Copyright 2010,2011 Novell, Inc.
+ * (c) Copyright 2011,2012 SuSE
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -28,8 +29,10 @@
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/watchdog.h>
+#ifdef CONFIG_PARAVIRT_XEN
#include <xen/xen.h>
#include <asm/xen/hypercall.h>
+#endif
#include <xen/interface/sched.h>
static struct platform_device *platform_device;
@@ -329,17 +332,19 @@ static int __init xen_wdt_init_module(vo
{
int err;
+#ifdef CONFIG_PARAVIRT_XEN
if (!xen_domain())
return -ENODEV;
+#endif
- pr_info("Xen WatchDog Timer Driver v%s\n", DRV_VERSION);
+ printk(KERN_INFO "Xen WatchDog Timer Driver v%s\n", DRV_VERSION);
err = platform_driver_register(&xen_wdt_driver);
if (err)
return err;
platform_device = platform_device_register_simple(DRV_NAME,
- -1, NULL, 0);
+ -1, NULL, 0);
if (IS_ERR(platform_device)) {
err = PTR_ERR(platform_device);
platform_driver_unregister(&xen_wdt_driver);
--- head.orig/drivers/xen/Kconfig 2012-04-03 13:15:48.000000000 +0200
+++ head/drivers/xen/Kconfig 2012-04-03 13:15:53.000000000 +0200
@@ -503,7 +503,7 @@ config XEN_GNTDEV
config XEN_GRANT_DEV_ALLOC
tristate "User-space grant reference allocator driver"
- depends on XEN
+ depends on PARAVIRT_XEN
default m
help
Allows userspace processes to create pages with access granted
--- head.orig/drivers/xen/Makefile 2011-08-18 11:16:13.000000000 +0200
+++ head/drivers/xen/Makefile 2011-08-18 11:16:19.000000000 +0200
@@ -1,10 +1,10 @@
-obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o
+obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o balloon.o
xen-biomerge-$(CONFIG_PARAVIRT_XEN) := biomerge.o
xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
-xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
+xen-balloon_$(CONFIG_PARAVIRT_XEN) := xen-balloon.o
xen-evtchn-name-$(CONFIG_PARAVIRT_XEN) := xen-evtchn
-xen-balloon-$(CONFIG_XEN) := balloon/
+xen-balloon_$(CONFIG_XEN) := balloon/
obj-$(CONFIG_XEN) += core/
obj-$(CONFIG_XEN) += console/
obj-y += xenbus/
@@ -25,10 +25,11 @@ obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += $(
obj-$(CONFIG_BLOCK) += $(xen-biomerge-y)
obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y)
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
-obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
+obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon_y)
obj-$(CONFIG_XEN_DEV_EVTCHN) += $(xen-evtchn-name-y).o
obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
obj-$(CONFIG_XENFS) += xenfs/
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
@@ -36,6 +37,7 @@ obj-$(CONFIG_XEN_DOM0) += pci.o
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
+xen-gntalloc-y := gntalloc.o
xen-platform-pci-y := platform-pci.o
--- head.orig/drivers/xen/blkback/blkback.c 2012-04-04 10:26:03.000000000 +0200
+++ head/drivers/xen/blkback/blkback.c 2012-03-26 12:28:48.000000000 +0200
@@ -149,8 +149,6 @@ static void unplug_queue(blkif_t *blkif)
{
if (blkif->plug == NULL)
return;
- if (blkif->plug->unplug_fn)
- blkif->plug->unplug_fn(blkif->plug);
kobject_put(&blkif->plug->kobj);
blkif->plug = NULL;
}
--- head.orig/drivers/xen/blktap2-new/device.c 2012-02-16 16:38:28.000000000 +0100
+++ head/drivers/xen/blktap2-new/device.c 2012-02-16 16:38:39.000000000 +0100
@@ -527,10 +527,9 @@ blktap_device_debug(struct blktap *tap,
queue_logical_block_size(q));
s += snprintf(s, end - s,
- "queue flags:%#lx plugged:%d stopped:%d empty:%d\n",
+ "queue flags:%#lx stopped:%d\n",
q->queue_flags,
- blk_queue_plugged(q), blk_queue_stopped(q),
- elv_queue_empty(q));
+ blk_queue_stopped(q));
bdev = bdget_disk(disk, 0);
if (bdev) {
--- head.orig/drivers/xen/core/evtchn.c 2012-04-03 17:02:16.000000000 +0200
+++ head/drivers/xen/core/evtchn.c 2012-06-08 10:36:16.000000000 +0200
@@ -33,7 +33,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/irq.h>
-#include <linux/irqdesc.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
@@ -47,6 +46,7 @@
#include <xen/interface/physdev.h>
#include <asm/hypervisor.h>
#include <linux/mc146818rtc.h> /* RTC_IRQ */
+#include "../../../kernel/irq/internals.h" /* IRQS_AUTODETECT, IRQS_PENDING */
/*
* This lock protects updates to the following mapping and reference-count
@@ -89,7 +89,7 @@ static struct irq_cfg _irq_cfg[] = {
static inline struct irq_cfg *__pure irq_cfg(unsigned int irq)
{
#ifdef CONFIG_SPARSE_IRQ
- return get_irq_chip_data(irq);
+ return irq_get_chip_data(irq);
#else
return irq < NR_IRQS ? _irq_cfg + irq : NULL;
#endif
@@ -420,18 +420,18 @@ static int find_unbound_irq(unsigned int
for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_data *data = irq_get_irq_data(irq);
if (unlikely(!cfg))
return -ENOMEM;
- if (desc->irq_data.chip != &no_irq_chip &&
- desc->irq_data.chip != chip)
+ if (data->chip != &no_irq_chip &&
+ data->chip != chip)
continue;
if (!cfg->bindcount) {
*pcfg = cfg;
- desc->status |= IRQ_NOPROBE;
- set_irq_chip_and_handler_name(irq, chip,
+ irq_set_noprobe(irq);
+ irq_set_chip_and_handler_name(irq, chip,
handle_fasteoi_irq,
"fasteoi");
return irq;
@@ -600,7 +600,7 @@ static int bind_ipi_to_irq(unsigned int
static void unbind_from_irq(unsigned int irq)
{
struct irq_cfg *cfg = irq_cfg(irq);
- unsigned int cpu, evtchn = evtchn_from_irq_cfg(cfg);
+ unsigned int evtchn = evtchn_from_irq_cfg(cfg);
spin_lock(&irq_mapping_update_lock);
@@ -628,17 +628,7 @@ static void unbind_from_irq(unsigned int
evtchn_to_irq[evtchn] = -1;
cfg->info = IRQ_UNBOUND;
- /* Zap stats across IRQ changes of use. */
- for_each_possible_cpu(cpu) {
-#ifdef CONFIG_GENERIC_HARDIRQS
- struct irq_desc *desc = irq_to_desc(irq);
-
- if (desc->kstat_irqs)
- *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
-#else
- kstat_cpu(cpu).irqs[irq] = 0;
-#endif
- }
+ dynamic_irq_cleanup(irq);
}
spin_unlock(&irq_mapping_update_lock);
@@ -785,8 +775,11 @@ static int set_affinity_irq(struct irq_d
masked = test_and_set_evtchn_mask(port);
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv);
- if (rc == 0)
+ if (rc == 0) {
bind_evtchn_to_cpu(port, cpu);
+ rc = evtchn_to_irq[port] != -1 ? IRQ_SET_MASK_OK_NOCOPY
+ : IRQ_SET_MASK_OK;
+ }
if (!masked)
unmask_evtchn(port);
@@ -840,8 +833,8 @@ static unsigned int startup_dynirq(struc
static void end_dynirq(struct irq_data *data)
{
- if (!(irq_to_desc(data->irq)->status & IRQ_DISABLED)) {
- move_masked_irq(data->irq);
+ if (!irqd_irq_disabled(data)) {
+ irq_move_masked_irq(data);
unmask_dynirq(data);
}
}
@@ -936,7 +929,7 @@ static void enable_pirq(struct irq_data
/* NB. We are happy to share unless we are probing. */
bind_pirq.flags = (pirq < nr_pirqs
&& test_and_clear_bit(pirq, probing_pirq))
- || (irq_to_desc(irq)->status & IRQ_AUTODETECT)
+ || (irq_to_desc(irq)->istate & IRQS_AUTODETECT)
? 0 : BIND_PIRQ__WILL_SHARE;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
if (bind_pirq.flags)
@@ -993,14 +986,13 @@ static void unmask_pirq(struct irq_data
static void end_pirq(struct irq_data *data)
{
- const struct irq_desc *desc = irq_to_desc(data->irq);
+ bool disabled = irqd_irq_disabled(data);
- if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
- (IRQ_DISABLED|IRQ_PENDING))
+ if (disabled && (irq_to_desc(data->irq)->istate & IRQS_PENDING))
shutdown_pirq(data);
else {
- if (!(desc->status & IRQ_DISABLED))
- move_masked_irq(data->irq);
+ if (!disabled)
+ irq_move_masked_irq(data);
unmask_pirq(data);
}
}
@@ -1134,10 +1126,13 @@ static void restore_cpu_ipis(unsigned in
int ipi, irq, evtchn;
for (ipi = 0; ipi < NR_IPIS; ipi++) {
+ struct irq_data *data;
+
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
continue;
- BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_IPI, ipi, 0));
+ data = irq_get_irq_data(irq);
+ BUG_ON(irq_data_cfg(data)->info != mk_irq_info(IRQT_IPI, ipi, 0));
/* Get a new binding from Xen. */
bind_ipi.vcpu = cpu;
@@ -1148,11 +1143,11 @@ static void restore_cpu_ipis(unsigned in
/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
+ irq_data_cfg(data)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
bind_evtchn_to_cpu(evtchn, cpu);
/* Ready for use. */
- if (!(irq_to_desc(irq)->status & IRQ_DISABLED))
+ if (!irqd_irq_disabled(data))
unmask_evtchn(evtchn);
}
}
@@ -1209,7 +1204,7 @@ int __init arch_early_irq_init(void)
unsigned int i;
for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++)
- set_irq_chip_data(i, _irq_cfg + i);
+ irq_set_chip_data(i, _irq_cfg + i);
return 0;
}
@@ -1222,7 +1217,7 @@ struct irq_cfg *alloc_irq_and_cfg_at(uns
if (res < 0) {
if (res != -EEXIST)
return NULL;
- cfg = get_irq_chip_data(at);
+ cfg = irq_get_chip_data(at);
if (cfg)
return cfg;
}
@@ -1235,7 +1230,7 @@ struct irq_cfg *alloc_irq_and_cfg_at(uns
cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
if (cfg)
- set_irq_chip_data(at, cfg);
+ irq_set_chip_data(at, cfg);
else
irq_free_desc(at);
@@ -1318,7 +1313,7 @@ void evtchn_register_pirq(int irq)
if (identity_mapped_irq(irq) || type_from_irq_cfg(cfg) != IRQT_UNBOUND)
return;
cfg->info = mk_irq_info(IRQT_PIRQ, irq, 0);
- set_irq_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
+ irq_set_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
"fasteoi");
}
@@ -1366,7 +1361,7 @@ int evtchn_map_pirq(int irq, int xen_pir
spin_unlock(&irq_alloc_lock);
if (irq < PIRQ_BASE)
return -ENOSPC;
- set_irq_chip_and_handler_name(irq, &pirq_chip,
+ irq_set_chip_and_handler_name(irq, &pirq_chip,
handle_fasteoi_irq, "fasteoi");
#endif
} else if (!xen_pirq) {
@@ -1380,7 +1375,7 @@ int evtchn_map_pirq(int irq, int xen_pir
* when a driver didn't free_irq() its MSI(-X) IRQ(s), which
* then causes a warning in dynamic_irq_cleanup().
*/
- set_irq_chip_and_handler(irq, NULL, NULL);
+ irq_set_chip_and_handler(irq, NULL, NULL);
cfg->info = IRQ_UNBOUND;
#ifdef CONFIG_SPARSE_IRQ
cfg->bindcount--;
@@ -1431,8 +1426,8 @@ void __init xen_init_IRQ(void)
#ifndef CONFIG_SPARSE_IRQ
for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
- irq_to_desc(i)->status |= IRQ_NOPROBE;
- set_irq_chip_and_handler_name(i, &dynirq_chip,
+ irq_set_noprobe(i);
+ irq_set_chip_and_handler_name(i, &dynirq_chip,
handle_fasteoi_irq, "fasteoi");
}
@@ -1449,7 +1444,7 @@ void __init xen_init_IRQ(void)
continue;
#endif
- set_irq_chip_and_handler_name(i, &pirq_chip,
+ irq_set_chip_and_handler_name(i, &pirq_chip,
handle_fasteoi_irq, "fasteoi");
}
}
--- head.orig/drivers/xen/netback/netback.c 2011-04-11 15:05:22.000000000 +0200
+++ head/drivers/xen/netback/netback.c 2012-06-08 10:36:23.000000000 +0200
@@ -514,7 +514,7 @@ static int netbk_check_gop(int nr_frags,
multicall_entry_t *mcl;
gnttab_transfer_t *gop;
gnttab_copy_t *copy_op;
- int status = NETIF_RSP_OKAY;
+ int status = XEN_NETIF_RSP_OKAY;
int i;
for (i = 0; i <= nr_frags; i++) {
@@ -525,7 +525,7 @@ static int netbk_check_gop(int nr_frags,
if (unlikely(copy_op->status != GNTST_okay)) {
DPRINTK("Bad status %d from copy to DOM%d.\n",
copy_op->status, domid);
- status = NETIF_RSP_ERROR;
+ status = XEN_NETIF_RSP_ERROR;
}
} else {
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -545,7 +545,7 @@ static int netbk_check_gop(int nr_frags,
* a fatal error anyway.
*/
BUG_ON(gop->status == GNTST_bad_page);
- status = NETIF_RSP_ERROR;
+ status = XEN_NETIF_RSP_ERROR;
}
}
}
@@ -561,7 +561,7 @@ static void netbk_add_frag_responses(net
for (i = 0; i < nr_frags; i++) {
int id = meta[i].id;
- int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+ int flags = (i == nr_frags - 1) ? 0 : XEN_NETRXF_more_data;
if (meta[i].copy)
offset = 0;
@@ -702,14 +702,15 @@ static void net_rx_action(unsigned long
skb->dev->stats.tx_packets++;
id = meta[npo.meta_cons].id;
- flags = nr_frags ? NETRXF_more_data : 0;
+ flags = nr_frags ? XEN_NETRXF_more_data : 0;
switch (skb->ip_summed) {
case CHECKSUM_PARTIAL: /* local packet? */
- flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ flags |= XEN_NETRXF_csum_blank |
+ XEN_NETRXF_data_validated;
break;
case CHECKSUM_UNNECESSARY: /* remote but checksummed? */
- flags |= NETRXF_data_validated;
+ flags |= XEN_NETRXF_data_validated;
break;
}
@@ -726,7 +727,7 @@ static void net_rx_action(unsigned long
RING_GET_RESPONSE(&netif->rx,
netif->rx.rsp_prod_pvt++);
- resp->flags |= NETRXF_extra_info;
+ resp->flags |= XEN_NETRXF_extra_info;
gso->u.gso.size = meta[npo.meta_cons].frag.size;
gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -994,7 +995,7 @@ inline static void net_tx_action_dealloc
netif = pending_tx_info[pending_idx].netif;
make_tx_response(netif, &pending_tx_info[pending_idx].req,
- NETIF_RSP_OKAY);
+ XEN_NETIF_RSP_OKAY);
/* Ready for next use. */
gnttab_reset_grant_page(mmap_pages[pending_idx]);
@@ -1012,7 +1013,7 @@ static void netbk_tx_err(netif_t *netif,
RING_IDX cons = netif->tx.req_cons;
do {
- make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
if (cons >= end)
break;
txp = RING_GET_REQUEST(&netif->tx, cons++);
@@ -1028,7 +1029,7 @@ static int netbk_count_requests(netif_t
RING_IDX cons = netif->tx.req_cons;
int frags = 0;
- if (!(first->flags & NETTXF_more_data))
+ if (!(first->flags & XEN_NETTXF_more_data))
return 0;
do {
@@ -1057,7 +1058,7 @@ static int netbk_count_requests(netif_t
txp->offset, txp->size);
return -frags;
}
- } while ((txp++)->flags & NETTXF_more_data);
+ } while ((txp++)->flags & XEN_NETTXF_more_data);
return frags;
}
@@ -1106,7 +1107,7 @@ static int netbk_tx_check_mop(struct sk_
err = mop->status;
if (unlikely(err != GNTST_okay)) {
txp = &pending_tx_info[pending_idx].req;
- make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
netif_put(netif);
} else {
@@ -1137,7 +1138,7 @@ static int netbk_tx_check_mop(struct sk_
/* Error on this fragment: respond to client with an error. */
txp = &pending_tx_info[pending_idx].req;
- make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
netif_put(netif);
@@ -1310,7 +1311,7 @@ static void net_tx_action(unsigned long
netif->tx.req_cons = ++i;
memset(extras, 0, sizeof(extras));
- if (txreq.flags & NETTXF_extra_info) {
+ if (txreq.flags & XEN_NETTXF_extra_info) {
work_to_do = netbk_get_extras(netif, extras,
work_to_do);
i = netif->tx.req_cons;
@@ -1449,9 +1450,9 @@ static void net_tx_action(unsigned long
netif_idx_release(pending_idx);
}
- if (txp->flags & NETTXF_csum_blank)
+ if (txp->flags & XEN_NETTXF_csum_blank)
skb->ip_summed = CHECKSUM_PARTIAL;
- else if (txp->flags & NETTXF_data_validated)
+ else if (txp->flags & XEN_NETTXF_data_validated)
skb->ip_summed = CHECKSUM_UNNECESSARY;
else
skb->ip_summed = CHECKSUM_NONE;
@@ -1549,8 +1550,8 @@ static void make_tx_response(netif_t *ne
resp->id = txp->id;
resp->status = st;
- if (txp->flags & NETTXF_extra_info)
- RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+ if (txp->flags & XEN_NETTXF_extra_info)
+ RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
netif->tx.rsp_prod_pvt = ++i;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
--- head.orig/drivers/xen/netfront/netfront.c 2012-03-12 13:53:17.000000000 +0100
+++ head/drivers/xen/netfront/netfront.c 2012-06-08 10:36:26.000000000 +0200
@@ -651,7 +651,7 @@ static void network_tx_buf_gc(struct net
struct netif_tx_response *txrsp;
txrsp = RING_GET_RESPONSE(&np->tx, cons);
- if (txrsp->status == NETIF_RSP_NULL)
+ if (txrsp->status == XEN_NETIF_RSP_NULL)
continue;
id = txrsp->id;
@@ -875,7 +875,7 @@ static void xennet_make_frags(struct sk_
while (len > PAGE_SIZE - offset) {
tx->size = PAGE_SIZE - offset;
- tx->flags |= NETTXF_more_data;
+ tx->flags |= XEN_NETTXF_more_data;
len -= tx->size;
data += tx->size;
offset = 0;
@@ -900,7 +900,7 @@ static void xennet_make_frags(struct sk_
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
- tx->flags |= NETTXF_more_data;
+ tx->flags |= XEN_NETTXF_more_data;
id = get_id_from_freelist(np->tx_skbs);
np->tx_skbs[id] = skb_get(skb);
@@ -981,9 +981,9 @@ static int network_start_xmit(struct sk_
extra = NULL;
if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
- tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+ tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
- tx->flags |= NETTXF_data_validated;
+ tx->flags |= XEN_NETTXF_data_validated;
#if HAVE_TSO
if (skb_shinfo(skb)->gso_size) {
@@ -993,7 +993,7 @@ static int network_start_xmit(struct sk_
if (extra)
extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
else
- tx->flags |= NETTXF_extra_info;
+ tx->flags |= XEN_NETTXF_extra_info;
gso->u.gso.size = skb_shinfo(skb)->gso_size;
gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -1131,7 +1131,7 @@ static int xennet_get_responses(struct n
int err = 0;
unsigned long ret;
- if (rx->flags & NETRXF_extra_info) {
+ if (rx->flags & XEN_NETRXF_extra_info) {
err = xennet_get_extras(np, extras, rp);
cons = np->rx.rsp_cons;
}
@@ -1206,7 +1206,7 @@ static int xennet_get_responses(struct n
__skb_queue_tail(list, skb);
next:
- if (!(rx->flags & NETRXF_more_data))
+ if (!(rx->flags & XEN_NETRXF_more_data))
break;
if (cons + frags == rp) {
@@ -1407,9 +1407,9 @@ err:
skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
skb->len += skb->data_len;
- if (rx->flags & NETRXF_csum_blank)
+ if (rx->flags & XEN_NETRXF_csum_blank)
skb->ip_summed = CHECKSUM_PARTIAL;
- else if (rx->flags & NETRXF_data_validated)
+ else if (rx->flags & XEN_NETRXF_data_validated)
skb->ip_summed = CHECKSUM_UNNECESSARY;
else
skb->ip_summed = CHECKSUM_NONE;
--- head.orig/drivers/xen/usbfront/usbfront-hub.c 2009-10-15 11:45:41.000000000 +0200
+++ head/drivers/xen/usbfront/usbfront-hub.c 2011-04-14 17:50:18.000000000 +0200
@@ -271,8 +271,8 @@ static void xenhcd_hub_descriptor(struct
desc->bDescLength = 7 + 2 * temp;
/* bitmaps for DeviceRemovable and PortPwrCtrlMask */
- memset(&desc->bitmap[0], 0, temp);
- memset(&desc->bitmap[temp], 0xff, temp);
+ memset(&desc->u.hs.DeviceRemovable[0], 0, temp);
+ memset(&desc->u.hs.DeviceRemovable[temp], 0xff, temp);
/* per-port over current reporting and no power switching */
temp = 0x000a;
--- head.orig/drivers/xen/xenbus/xenbus_probe.c 2012-03-22 14:19:07.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.c 2012-03-22 14:22:22.000000000 +0100
@@ -809,7 +809,7 @@ static struct xenbus_watch fe_watch = {
static int __maybe_unused suspend_dev(struct device *dev, void *data)
#else
-int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+int xenbus_dev_suspend(struct device *dev)
#endif
{
int err = 0;
@@ -823,11 +823,7 @@ int xenbus_dev_suspend(struct device *de
return 0;
drv = to_xenbus_driver(dev->driver);
if (drv->suspend)
-#if !defined(CONFIG_XEN) && !defined(MODULE)
- err = drv->suspend(xdev, state);
-#else
err = drv->suspend(xdev);
-#endif
if (err)
pr_warning("xenbus: suspend %s failed: %i\n",
dev_name(dev), err);
@@ -900,7 +896,15 @@ int xenbus_dev_resume(struct device *dev
}
PARAVIRT_EXPORT_SYMBOL(xenbus_dev_resume);
-#if (defined(CONFIG_XEN) && defined(CONFIG_PM_SLEEP)) || defined(MODULE)
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+int xenbus_dev_cancel(struct device *dev)
+{
+ /* Do nothing */
+ DPRINTK("cancel");
+ return 0;
+}
+PARAVIRT_EXPORT_SYMBOL(xenbus_dev_cancel);
+#elif defined(CONFIG_PM_SLEEP) || defined(MODULE)
void xenbus_suspend(void)
{
DPRINTK("");
--- head.orig/drivers/xen/xenbus/xenbus_probe.h 2011-12-21 11:24:56.000000000 +0100
+++ head/drivers/xen/xenbus/xenbus_probe.h 2011-12-21 11:28:53.000000000 +0100
@@ -91,8 +91,9 @@ extern void xenbus_dev_changed(const cha
extern void xenbus_dev_shutdown(struct device *_dev);
-extern int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+extern int xenbus_dev_suspend(struct device *dev);
extern int xenbus_dev_resume(struct device *dev);
+extern int xenbus_dev_cancel(struct device *dev);
extern void xenbus_otherend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len,
--- head.orig/drivers/xen/xenoprof/xenoprofile.c 2012-02-16 12:36:22.000000000 +0100
+++ head/drivers/xen/xenoprof/xenoprofile.c 2011-04-15 12:46:57.000000000 +0200
@@ -19,7 +19,7 @@
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/oprofile.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
@@ -58,9 +58,9 @@ static int ovf_irq[NR_CPUS];
/* cpu model type string - copied from Xen on XENOPROF_init command */
static char cpu_type[XENOPROF_CPU_TYPE_SIZE];
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
-static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
+static int xenoprof_suspend(void)
{
if (xenoprof_enabled == 1)
xenoprof_stop();
@@ -68,46 +68,35 @@ static int xenoprof_suspend(struct sys_d
}
-static int xenoprof_resume(struct sys_device * dev)
+static void xenoprof_resume(void)
{
if (xenoprof_enabled == 1)
xenoprof_start();
- return 0;
}
-static struct sysdev_class oprofile_sysclass = {
- .name = "oprofile",
+static struct syscore_ops oprofile_syscore_ops = {
.resume = xenoprof_resume,
.suspend = xenoprof_suspend
};
-static struct sys_device device_oprofile = {
- .id = 0,
- .cls = &oprofile_sysclass,
-};
-
-
static int __init init_driverfs(void)
{
- int error;
- if (!(error = sysdev_class_register(&oprofile_sysclass)))
- error = sysdev_register(&device_oprofile);
- return error;
+ register_syscore_ops(&oprofile_syscore_ops);
+ return 0;
}
static void exit_driverfs(void)
{
- sysdev_unregister(&device_oprofile);
- sysdev_class_unregister(&oprofile_sysclass);
+ unregister_syscore_ops(&oprofile_syscore_ops);
}
#else
#define init_driverfs() do { } while (0)
#define exit_driverfs() do { } while (0)
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
static unsigned long long oprofile_samples;
static unsigned long long p_oprofile_samples;
--- head.orig/include/xen/balloon.h 2011-01-31 18:07:35.000000000 +0100
+++ head/include/xen/balloon.h 2011-04-13 17:01:31.000000000 +0200
@@ -56,6 +56,31 @@ void balloon_release_driver_page(struct
extern spinlock_t balloon_lock;
#define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags)
#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
-#endif
+
+#else /* CONFIG_PARAVIRT_XEN */
+
+#define RETRY_UNLIMITED 0
+
+struct balloon_stats {
+ /* We aim for 'current allocation' == 'target allocation'. */
+ unsigned long current_pages;
+ unsigned long target_pages;
+ /* Number of pages in high- and low-memory balloons. */
+ unsigned long balloon_low;
+ unsigned long balloon_high;
+ unsigned long schedule_delay;
+ unsigned long max_schedule_delay;
+ unsigned long retry_count;
+ unsigned long max_retry_count;
+};
+
+extern struct balloon_stats balloon_stats;
+
+void balloon_set_new_target(unsigned long target);
+
+int alloc_xenballooned_pages(int nr_pages, struct page** pages);
+void free_xenballooned_pages(int nr_pages, struct page** pages);
+
+#endif /* CONFIG_PARAVIRT_XEN */
#endif /* __XEN_BALLOON_H__ */
--- head.orig/include/xen/interface/io/blkif.h 2012-04-04 08:57:09.000000000 +0200
+++ head/include/xen/interface/io/blkif.h 2012-04-04 10:26:18.000000000 +0200
@@ -431,8 +431,17 @@ struct blkif_request {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
+#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#else
+ union {
+ struct blkif_request_rw {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ } rw;
+ } u;
+#endif
};
typedef struct blkif_request blkif_request_t;
--- head.orig/include/xen/interface/io/netif.h 2011-01-31 17:49:31.000000000 +0100
+++ head/include/xen/interface/io/netif.h 2011-04-13 15:41:23.000000000 +0200
@@ -50,20 +50,20 @@
*/
/* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETTXF_csum_blank (0)
-#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
+#define _XEN_NETTXF_csum_blank (0)
+#define XEN_NETTXF_csum_blank (1U<<_XEN_NETTXF_csum_blank)
/* Packet data has been validated against protocol checksum. */
-#define _NETTXF_data_validated (1)
-#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
+#define _XEN_NETTXF_data_validated (1)
+#define XEN_NETTXF_data_validated (1U<<_XEN_NETTXF_data_validated)
/* Packet continues in the next request descriptor. */
-#define _NETTXF_more_data (2)
-#define NETTXF_more_data (1U<<_NETTXF_more_data)
+#define _XEN_NETTXF_more_data (2)
+#define XEN_NETTXF_more_data (1U<<_XEN_NETTXF_more_data)
/* Packet to be followed by extra descriptor(s). */
-#define _NETTXF_extra_info (3)
-#define NETTXF_extra_info (1U<<_NETTXF_extra_info)
+#define _XEN_NETTXF_extra_info (3)
+#define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info)
struct netif_tx_request {
grant_ref_t gref; /* Reference to buffer page */
@@ -160,20 +160,24 @@ struct netif_rx_request {
typedef struct netif_rx_request netif_rx_request_t;
/* Packet data has been validated against protocol checksum. */
-#define _NETRXF_data_validated (0)
-#define NETRXF_data_validated (1U<<_NETRXF_data_validated)
+#define _XEN_NETRXF_data_validated (0)
+#define XEN_NETRXF_data_validated (1U<<_XEN_NETRXF_data_validated)
/* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETRXF_csum_blank (1)
-#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
+#define _XEN_NETRXF_csum_blank (1)
+#define XEN_NETRXF_csum_blank (1U<<_XEN_NETRXF_csum_blank)
/* Packet continues in the next request descriptor. */
-#define _NETRXF_more_data (2)
-#define NETRXF_more_data (1U<<_NETRXF_more_data)
+#define _XEN_NETRXF_more_data (2)
+#define XEN_NETRXF_more_data (1U<<_XEN_NETRXF_more_data)
/* Packet to be followed by extra descriptor(s). */
-#define _NETRXF_extra_info (3)
-#define NETRXF_extra_info (1U<<_NETRXF_extra_info)
+#define _XEN_NETRXF_extra_info (3)
+#define XEN_NETRXF_extra_info (1U<<_XEN_NETRXF_extra_info)
+
+/* GSO Prefix descriptor. */
+#define _XEN_NETRXF_gso_prefix (4)
+#define XEN_NETRXF_gso_prefix (1U<<_XEN_NETRXF_gso_prefix)
struct netif_rx_response {
uint16_t id;
@@ -204,10 +208,10 @@ DEFINE_RING_TYPES(xen_netif_rx,
#define xen_netif_extra_info netif_extra_info
#endif
-#define NETIF_RSP_DROPPED -2
-#define NETIF_RSP_ERROR -1
-#define NETIF_RSP_OKAY 0
+#define XEN_NETIF_RSP_DROPPED -2
+#define XEN_NETIF_RSP_ERROR -1
+#define XEN_NETIF_RSP_OKAY 0
/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
-#define NETIF_RSP_NULL 1
+#define XEN_NETIF_RSP_NULL 1
#endif
--- head.orig/include/xen/public/gntdev.h 2011-02-03 13:52:28.000000000 +0100
+++ head/include/xen/public/gntdev.h 2011-04-13 15:21:38.000000000 +0200
@@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
uint32_t count;
};
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
+struct ioctl_gntdev_unmap_notify {
+ /* IN parameters */
+ /* Offset in the file descriptor for a byte within the page (same as
+ * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
+ * be cleared. Otherwise, it can be any byte in the page whose
+ * notification we are adjusting.
+ */
+ uint64_t index;
+ /* Action(s) to take on unmap */
+ uint32_t action;
+ /* Event channel to notify */
+ uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
--- head.orig/include/xen/xenbus.h 2011-12-21 11:17:37.000000000 +0100
+++ head/include/xen/xenbus.h 2011-04-13 15:43:04.000000000 +0200
@@ -105,10 +105,8 @@ struct xenbus_driver {
void (*otherend_changed)(struct xenbus_device *dev,
enum xenbus_state backend_state);
int (*remove)(struct xenbus_device *dev);
-#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
- int (*suspend)(struct xenbus_device *dev, pm_message_t state);
-#else
int (*suspend)(struct xenbus_device *dev);
+#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
int (*suspend_cancel)(struct xenbus_device *dev);
#endif
int (*resume)(struct xenbus_device *dev);