|
Jan Beulich |
4bfb4a |
From: jbeulich@novell.com
|
|
Jan Beulich |
4bfb4a |
Subject: eliminate scalability issues from initial mapping setup
|
|
Jan Beulich |
f4eb20 |
Patch-mainline: n/a
|
|
Jan Beulich |
4bfb4a |
References: bnc#417417
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
Direct Xen to place the initial P->M table outside of the initial
|
|
Jan Beulich |
4bfb4a |
mapping, as otherwise the 1G (implementation) / 2G (theoretical)
|
|
Jan Beulich |
4bfb4a |
restriction on the size of the initial mapping limits the amount
|
|
Jan Beulich |
4bfb4a |
of memory a domain can be handed initially.
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
Note that the flags passed to HYPERVISOR_update_va_mapping() from
|
|
Jan Beulich |
4bfb4a |
__make_page_writable() and make_lowmem_page_writable() are
|
|
Jan Beulich |
4bfb4a |
intentionally not including UVMF_ALL. This is intended to be on optimal
|
|
Jan Beulich |
4bfb4a |
choice between the overhead of a potential spurious page fault (as
|
|
Jan Beulich |
4bfb4a |
remote CPUs may still have read-only translations in their TLBs) and
|
|
Jan Beulich |
4bfb4a |
the overhead of cross processor flushes. Flushing on the local CPU
|
|
Jan Beulich |
4bfb4a |
shouldn't be as expensive (and hence can be viewed as an optimization
|
|
Jan Beulich |
4bfb4a |
avoiding the spurious page fault on the local CPU), but is required
|
|
Jan Beulich |
4bfb4a |
when the functions are used before the page fault handler gets set up.
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
f22562 |
--- head.orig/arch/x86/kernel/head64-xen.c 2012-02-09 12:32:50.000000000 +0100
|
|
Jan Beulich |
f22562 |
+++ head/arch/x86/kernel/head64-xen.c 2012-02-10 14:03:06.000000000 +0100
|
|
Jan Beulich |
f22562 |
@@ -120,6 +120,12 @@ void __init x86_64_start_reservations(ch
|
|
Jan Beulich |
f22562 |
memblock_reserve(__pa_symbol(&_text),
|
|
Jan Beulich |
f22562 |
__pa_symbol(&__bss_stop) - __pa_symbol(&_text));
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
Jan Beulich |
4bfb4a |
+ xen_start_info->mfn_list = ~0UL;
|
|
Jan Beulich |
4bfb4a |
+ else if (xen_start_info->mfn_list < __START_KERNEL_map)
|
|
Jan Beulich |
f22562 |
+ memblock_reserve(PFN_PHYS(xen_start_info->first_p2m_pfn),
|
|
Jan Beulich |
f22562 |
+ PFN_PHYS(xen_start_info->nr_p2m_frames));
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
/*
|
|
Jan Beulich |
4bfb4a |
* At this point everything still needed from the boot loader
|
|
Jan Beulich |
4bfb4a |
* or BIOS or kernel text should be early reserved or marked not
|
|
Jan Beulich |
f22562 |
--- head.orig/arch/x86/kernel/head_64-xen.S 2011-08-09 11:17:44.000000000 +0200
|
|
Jan Beulich |
f22562 |
+++ head/arch/x86/kernel/head_64-xen.S 2011-08-09 11:19:00.000000000 +0200
|
|
Jan Beulich |
48defc |
@@ -17,6 +17,7 @@
|
|
Jan Beulich |
48defc |
#include <linux/elfnote.h>
|
|
Jan Beulich |
4bfb4a |
#include <asm/segment.h>
|
|
Jan Beulich |
4bfb4a |
#include <asm/page.h>
|
|
Jan Beulich |
4bfb4a |
+#include <asm/pgtable.h>
|
|
Jan Beulich |
4bfb4a |
#include <asm/msr.h>
|
|
Jan Beulich |
4bfb4a |
#include <asm/cache.h>
|
|
Jan Beulich |
4bfb4a |
#include <asm/dwarf2.h>
|
|
Jan Beulich |
3692f4 |
@@ -159,6 +160,7 @@ ENTRY(empty_zero_page)
|
|
Jan Beulich |
4bfb4a |
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad startup_64)
|
|
Jan Beulich |
4bfb4a |
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad hypercall_page)
|
|
Jan Beulich |
4bfb4a |
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT, _PAGE_PRESENT)
|
|
Jan Beulich |
4bfb4a |
+ ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad VMEMMAP_START)
|
|
Jan Beulich |
3692f4 |
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "writable_page_tables";
|
|
Jan Beulich |
3692f4 |
.ascii "|writable_descriptor_tables";
|
|
Jan Beulich |
3692f4 |
.ascii "|auto_translated_physmap";
|
|
Jan Beulich |
a3c5f2 |
--- head.orig/arch/x86/kernel/setup-xen.c 2012-05-23 13:48:26.000000000 +0200
|
|
Jan Beulich |
1f9dec |
+++ head/arch/x86/kernel/setup-xen.c 2012-06-08 10:49:58.000000000 +0200
|
|
Jan Beulich |
a3c5f2 |
@@ -1112,6 +1112,54 @@ void __init setup_arch(char **cmdline_p)
|
|
Jan Beulich |
4c2a76 |
init_gbpages();
|
|
Jan Beulich |
4c2a76 |
|
|
Jan Beulich |
4c2a76 |
/* max_pfn_mapped is updated here */
|
|
Jan Beulich |
4c2a76 |
+#ifdef CONFIG_X86_64_XEN
|
|
Jan Beulich |
4c2a76 |
+ if (xen_start_info->mfn_list < __START_KERNEL_map) {
|
|
Jan Beulich |
4c2a76 |
+ /* Map P2M space only after all usable memory. */
|
|
Jan Beulich |
4c2a76 |
+ unsigned long p2m_start = xen_start_info->first_p2m_pfn;
|
|
Jan Beulich |
4c2a76 |
+ unsigned long p2m_end = p2m_start
|
|
Jan Beulich |
4c2a76 |
+ + xen_start_info->nr_p2m_frames;
|
|
Jan Beulich |
4c2a76 |
+ unsigned long temp;
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
+ max_low_pfn_mapped = init_memory_mapping(
|
|
Jan Beulich |
4c2a76 |
+ 0, min(max_low_pfn, p2m_start) << PAGE_SHIFT);
|
|
Jan Beulich |
4c2a76 |
+ max_pfn_mapped = max_low_pfn_mapped;
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
+ if (p2m_end < max_low_pfn)
|
|
Jan Beulich |
4c2a76 |
+ max_low_pfn_mapped = init_memory_mapping(
|
|
Jan Beulich |
4c2a76 |
+ p2m_end << PAGE_SHIFT,
|
|
Jan Beulich |
4c2a76 |
+ max_low_pfn << PAGE_SHIFT);
|
|
Jan Beulich |
4c2a76 |
+ max_pfn_mapped = max_low_pfn_mapped;
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
+ if (max_low_pfn < p2m_start)
|
|
Jan Beulich |
4c2a76 |
+ max_pfn_mapped = init_memory_mapping(
|
|
Jan Beulich |
4c2a76 |
+ max_low_pfn << PAGE_SHIFT,
|
|
Jan Beulich |
4c2a76 |
+ p2m_start << PAGE_SHIFT);
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
+ if (max(max_low_pfn, p2m_end) < max_pfn)
|
|
Jan Beulich |
4c2a76 |
+ max_pfn_mapped = init_memory_mapping(
|
|
Jan Beulich |
4c2a76 |
+ max(max_low_pfn, p2m_end) << PAGE_SHIFT,
|
|
Jan Beulich |
4c2a76 |
+ max_pfn << PAGE_SHIFT);
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
+ temp = max_pfn_mapped;
|
|
Jan Beulich |
4c2a76 |
+ if (p2m_start < max_low_pfn) {
|
|
Jan Beulich |
4c2a76 |
+ temp = init_memory_mapping(
|
|
Jan Beulich |
4c2a76 |
+ p2m_start << PAGE_SHIFT,
|
|
Jan Beulich |
4c2a76 |
+ min(max_low_pfn, p2m_end) << PAGE_SHIFT);
|
|
Jan Beulich |
4c2a76 |
+ if (temp > max_low_pfn_mapped)
|
|
Jan Beulich |
4c2a76 |
+ max_low_pfn_mapped = temp;
|
|
Jan Beulich |
4c2a76 |
+ }
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
+ if (max_low_pfn < p2m_end)
|
|
Jan Beulich |
4c2a76 |
+ temp = init_memory_mapping(
|
|
Jan Beulich |
4c2a76 |
+ max(max_low_pfn, p2m_start) << PAGE_SHIFT,
|
|
Jan Beulich |
4c2a76 |
+ p2m_end << PAGE_SHIFT);
|
|
Jan Beulich |
4c2a76 |
+ if (temp > max_pfn_mapped)
|
|
Jan Beulich |
4c2a76 |
+ max_pfn_mapped = temp;
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
+ goto init_memory_mapping_done;
|
|
Jan Beulich |
4c2a76 |
+ }
|
|
Jan Beulich |
4c2a76 |
+#endif
|
|
Jan Beulich |
4c2a76 |
+
|
|
Jan Beulich |
4c2a76 |
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<
|
|
Jan Beulich |
4c2a76 |
max_pfn_mapped = max_low_pfn_mapped;
|
|
Jan Beulich |
4c2a76 |
|
|
Jan Beulich |
a3c5f2 |
@@ -1119,6 +1167,7 @@ void __init setup_arch(char **cmdline_p)
|
|
Jan Beulich |
4c2a76 |
if (max_pfn > max_low_pfn) {
|
|
Jan Beulich |
4c2a76 |
max_pfn_mapped = init_memory_mapping(1UL<<32,
|
|
Jan Beulich |
4c2a76 |
max_pfn<
|
|
Jan Beulich |
4c2a76 |
+ init_memory_mapping_done:
|
|
Jan Beulich |
4c2a76 |
/* can we preseve max_low_pfn ?*/
|
|
Jan Beulich |
4c2a76 |
max_low_pfn = max_pfn;
|
|
Jan Beulich |
4c2a76 |
}
|
|
Jan Beulich |
1f9dec |
@@ -1211,7 +1260,7 @@ void __init setup_arch(char **cmdline_p)
|
|
Jan Beulich |
4bfb4a |
difference = xen_start_info->nr_pages - max_pfn;
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
set_xen_guest_handle(reservation.extent_start,
|
|
Jan Beulich |
4bfb4a |
- ((unsigned long *)xen_start_info->mfn_list) + max_pfn);
|
|
Jan Beulich |
4bfb4a |
+ phys_to_machine_mapping + max_pfn);
|
|
Jan Beulich |
4bfb4a |
reservation.nr_extents = difference;
|
|
Jan Beulich |
4bfb4a |
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
|
|
Jan Beulich |
4bfb4a |
&reservation);
|
|
Jan Beulich |
1f9dec |
@@ -1224,13 +1273,83 @@ void __init setup_arch(char **cmdline_p)
|
|
Jan Beulich |
9f943f |
phys_to_machine_mapping = alloc_bootmem_pages(
|
|
Jan Beulich |
9f943f |
max_pfn * sizeof(unsigned long));
|
|
Jan Beulich |
9f943f |
memcpy(phys_to_machine_mapping,
|
|
Jan Beulich |
9f943f |
- (unsigned long *)xen_start_info->mfn_list,
|
|
Jan Beulich |
9f943f |
+ __va(__pa(xen_start_info->mfn_list)),
|
|
Jan Beulich |
9f943f |
p2m_pages * sizeof(unsigned long));
|
|
Jan Beulich |
f22562 |
memset(phys_to_machine_mapping + p2m_pages, ~0,
|
|
Jan Beulich |
f22562 |
(max_pfn - p2m_pages) * sizeof(unsigned long));
|
|
Jan Beulich |
f22562 |
- free_bootmem(__pa(xen_start_info->mfn_list),
|
|
Jan Beulich |
f22562 |
- PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
|
|
Jan Beulich |
f22562 |
- sizeof(unsigned long))));
|
|
Jan Beulich |
f22562 |
+#ifdef CONFIG_X86_64
|
|
Jan Beulich |
4bfb4a |
+ if (xen_start_info->mfn_list == VMEMMAP_START) {
|
|
Jan Beulich |
4bfb4a |
+ /*
|
|
Jan Beulich |
4bfb4a |
+ * Since it is well isolated we can (and since it is
|
|
Jan Beulich |
4bfb4a |
+ * perhaps large we should) also free the page tables
|
|
Jan Beulich |
4bfb4a |
+ * mapping the initial P->M table.
|
|
Jan Beulich |
4bfb4a |
+ */
|
|
Jan Beulich |
4bfb4a |
+ unsigned long va = VMEMMAP_START, pa;
|
|
Jan Beulich |
4bfb4a |
+ pgd_t *pgd = pgd_offset_k(va);
|
|
Jan Beulich |
4bfb4a |
+ pud_t *pud_page = pud_offset(pgd, 0);
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
+ BUILD_BUG_ON(VMEMMAP_START & ~PGDIR_MASK);
|
|
Jan Beulich |
4bfb4a |
+ xen_l4_entry_update(pgd, __pgd(0));
|
|
Jan Beulich |
9f943f |
+ do {
|
|
Jan Beulich |
4bfb4a |
+ pud_t *pud = pud_page + pud_index(va);
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
+ if (pud_none(*pud))
|
|
Jan Beulich |
4bfb4a |
+ va += PUD_SIZE;
|
|
Jan Beulich |
4bfb4a |
+ else if (pud_large(*pud)) {
|
|
Jan Beulich |
4bfb4a |
+ pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
|
|
Jan Beulich |
4bfb4a |
+ make_pages_writable(__va(pa),
|
|
Jan Beulich |
4bfb4a |
+ PUD_SIZE >> PAGE_SHIFT,
|
|
Jan Beulich |
4bfb4a |
+ XENFEAT_writable_page_tables);
|
|
Jan Beulich |
4bfb4a |
+ free_bootmem(pa, PUD_SIZE);
|
|
Jan Beulich |
4bfb4a |
+ va += PUD_SIZE;
|
|
Jan Beulich |
4bfb4a |
+ } else {
|
|
Jan Beulich |
4bfb4a |
+ pmd_t *pmd = pmd_offset(pud, va);
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
+ if (pmd_large(*pmd)) {
|
|
Jan Beulich |
4bfb4a |
+ pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
|
|
Jan Beulich |
4bfb4a |
+ make_pages_writable(__va(pa),
|
|
Jan Beulich |
4bfb4a |
+ PMD_SIZE >> PAGE_SHIFT,
|
|
Jan Beulich |
4bfb4a |
+ XENFEAT_writable_page_tables);
|
|
Jan Beulich |
4bfb4a |
+ free_bootmem(pa, PMD_SIZE);
|
|
Jan Beulich |
4bfb4a |
+ } else if (!pmd_none(*pmd)) {
|
|
Jan Beulich |
b08ea4 |
+ unsigned int i;
|
|
Jan Beulich |
4bfb4a |
+ pte_t *pte = pte_offset_kernel(pmd, va);
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
+ for (i = 0; i < PTRS_PER_PTE; ++i) {
|
|
Jan Beulich |
4bfb4a |
+ if (pte_none(pte[i]))
|
|
Jan Beulich |
4bfb4a |
+ break;
|
|
Jan Beulich |
4bfb4a |
+ pa = pte_pfn(pte[i]) << PAGE_SHIFT;
|
|
Jan Beulich |
4bfb4a |
+ make_page_writable(__va(pa),
|
|
Jan Beulich |
4bfb4a |
+ XENFEAT_writable_page_tables);
|
|
Jan Beulich |
4bfb4a |
+ free_bootmem(pa, PAGE_SIZE);
|
|
Jan Beulich |
4bfb4a |
+ }
|
|
Jan Beulich |
4bfb4a |
+ ClearPagePinned(virt_to_page(pte));
|
|
Jan Beulich |
4bfb4a |
+ make_page_writable(pte,
|
|
Jan Beulich |
4bfb4a |
+ XENFEAT_writable_page_tables);
|
|
Jan Beulich |
4bfb4a |
+ free_bootmem(__pa(pte), PAGE_SIZE);
|
|
Jan Beulich |
4bfb4a |
+ }
|
|
Jan Beulich |
4bfb4a |
+ va += PMD_SIZE;
|
|
Jan Beulich |
4bfb4a |
+ if (pmd_index(va))
|
|
Jan Beulich |
4bfb4a |
+ continue;
|
|
Jan Beulich |
4bfb4a |
+ ClearPagePinned(virt_to_page(pmd));
|
|
Jan Beulich |
4bfb4a |
+ make_page_writable(pmd,
|
|
Jan Beulich |
4bfb4a |
+ XENFEAT_writable_page_tables);
|
|
Jan Beulich |
4bfb4a |
+ free_bootmem(__pa((unsigned long)pmd
|
|
Jan Beulich |
4bfb4a |
+ & PAGE_MASK),
|
|
Jan Beulich |
9f943f |
+ PAGE_SIZE);
|
|
Jan Beulich |
4bfb4a |
+ }
|
|
Jan Beulich |
9f943f |
+ } while (pud_index(va));
|
|
Jan Beulich |
4bfb4a |
+ ClearPagePinned(virt_to_page(pud_page));
|
|
Jan Beulich |
4bfb4a |
+ make_page_writable(pud_page,
|
|
Jan Beulich |
9f943f |
+ XENFEAT_writable_page_tables);
|
|
Jan Beulich |
4bfb4a |
+ free_bootmem(__pa((unsigned long)pud_page & PAGE_MASK),
|
|
Jan Beulich |
9f943f |
+ PAGE_SIZE);
|
|
Jan Beulich |
4bfb4a |
+ } else if (!WARN_ON(xen_start_info->mfn_list
|
|
Jan Beulich |
4bfb4a |
+ < __START_KERNEL_map))
|
|
Jan Beulich |
4bfb4a |
+#endif
|
|
Jan Beulich |
4bfb4a |
+ free_bootmem(__pa(xen_start_info->mfn_list),
|
|
Jan Beulich |
f22562 |
+ PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
|
|
Jan Beulich |
f22562 |
+ sizeof(unsigned long))));
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
8ed7ef |
if (!is_initial_xendomain() || kexec_enabled())
|
|
Jan Beulich |
8ed7ef |
setup_pfn_to_mfn_frame_list(__alloc_bootmem);
|
|
Jan Beulich |
9f943f |
--- head.orig/arch/x86/mm/init-xen.c 2012-04-11 17:13:04.000000000 +0200
|
|
Jan Beulich |
9f943f |
+++ head/arch/x86/mm/init-xen.c 2012-04-11 18:02:45.000000000 +0200
|
|
Jan Beulich |
9f943f |
@@ -352,9 +352,20 @@ unsigned long __init_refok init_memory_m
|
|
Jan Beulich |
3b6edf |
* RO all the pagetable pages, including the ones that are beyond
|
|
Jan Beulich |
3b6edf |
* pgt_buf_end at that time.
|
|
Jan Beulich |
3b6edf |
*/
|
|
Jan Beulich |
d3bfd6 |
- if (!after_bootmem && pgt_buf_top > pgt_buf_start)
|
|
Jan Beulich |
d3bfd6 |
+ if (!after_bootmem && pgt_buf_top > pgt_buf_start) {
|
|
Jan Beulich |
9d5ae8 |
+#ifdef CONFIG_X86_64
|
|
Jan Beulich |
9d5ae8 |
+ if (xen_start_info->mfn_list < __START_KERNEL_map
|
|
Jan Beulich |
d3bfd6 |
+ && pgt_buf_start <= xen_start_info->first_p2m_pfn
|
|
Jan Beulich |
d3bfd6 |
+ && pgt_buf_top > xen_start_info->first_p2m_pfn) {
|
|
Jan Beulich |
3b6edf |
+ x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
|
|
Jan Beulich |
3b6edf |
+ PFN_PHYS(xen_start_info->first_p2m_pfn));
|
|
Jan Beulich |
d3bfd6 |
+ pgt_buf_start = xen_start_info->first_p2m_pfn
|
|
Jan Beulich |
d3bfd6 |
+ + xen_start_info->nr_p2m_frames;
|
|
Jan Beulich |
9d5ae8 |
+ }
|
|
Jan Beulich |
9d5ae8 |
+#endif
|
|
Jan Beulich |
3b6edf |
x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
|
|
Jan Beulich |
3b6edf |
PFN_PHYS(pgt_buf_top));
|
|
Jan Beulich |
9d5ae8 |
+ }
|
|
Jan Beulich |
9d5ae8 |
|
|
Jan Beulich |
9d5ae8 |
if (!after_bootmem)
|
|
Jan Beulich |
9d5ae8 |
early_memtest(start, end);
|
|
Jan Beulich |
9f943f |
--- head.orig/arch/x86/mm/init_64-xen.c 2012-04-11 17:55:48.000000000 +0200
|
|
Jan Beulich |
9f943f |
+++ head/arch/x86/mm/init_64-xen.c 2012-04-11 18:02:40.000000000 +0200
|
|
Jan Beulich |
9f943f |
@@ -220,6 +220,17 @@ void sync_global_pgds(unsigned long star
|
|
Jan Beulich |
cf6d99 |
}
|
|
Jan Beulich |
9d5ae8 |
}
|
|
Jan Beulich |
9d5ae8 |
|
|
Jan Beulich |
9d5ae8 |
+static __init unsigned long get_table_end(void)
|
|
Jan Beulich |
4bfb4a |
+{
|
|
Jan Beulich |
d3bfd6 |
+ BUG_ON(!pgt_buf_end);
|
|
Jan Beulich |
4bfb4a |
+ if (xen_start_info->mfn_list < __START_KERNEL_map
|
|
Jan Beulich |
d3bfd6 |
+ && pgt_buf_end == xen_start_info->first_p2m_pfn) {
|
|
Jan Beulich |
d3bfd6 |
+ pgt_buf_end += xen_start_info->nr_p2m_frames;
|
|
Jan Beulich |
d3bfd6 |
+ pgt_buf_top += xen_start_info->nr_p2m_frames;
|
|
Jan Beulich |
4bfb4a |
+ }
|
|
Jan Beulich |
d3bfd6 |
+ return pgt_buf_end++;
|
|
Jan Beulich |
4bfb4a |
+}
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
/*
|
|
Jan Beulich |
4bfb4a |
* NOTE: This function is marked __ref because it calls __init function
|
|
Jan Beulich |
4bfb4a |
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
|
|
Jan Beulich |
9f943f |
@@ -231,8 +242,7 @@ static __ref void *spp_getpage(void)
|
|
Jan Beulich |
4bfb4a |
if (after_bootmem)
|
|
Jan Beulich |
48defc |
ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
|
|
Jan Beulich |
d3bfd6 |
else if (pgt_buf_end < pgt_buf_top) {
|
|
Jan Beulich |
d3bfd6 |
- ptr = __va(pgt_buf_end << PAGE_SHIFT);
|
|
Jan Beulich |
d3bfd6 |
- pgt_buf_end++;
|
|
Jan Beulich |
9d5ae8 |
+ ptr = __va(get_table_end() << PAGE_SHIFT);
|
|
Jan Beulich |
683661 |
clear_page(ptr);
|
|
Jan Beulich |
4bfb4a |
} else
|
|
Jan Beulich |
4bfb4a |
ptr = alloc_bootmem_pages(PAGE_SIZE);
|
|
Jan Beulich |
9f943f |
@@ -427,8 +437,7 @@ static __ref void *alloc_low_page(unsign
|
|
Jan Beulich |
4bfb4a |
return adr;
|
|
Jan Beulich |
4bfb4a |
}
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
d3bfd6 |
- BUG_ON(!pgt_buf_end);
|
|
Jan Beulich |
d3bfd6 |
- pfn = pgt_buf_end++;
|
|
Jan Beulich |
9d5ae8 |
+ pfn = get_table_end();
|
|
Jan Beulich |
d3bfd6 |
if (pfn >= pgt_buf_top)
|
|
Jan Beulich |
4bfb4a |
panic("alloc_low_page: ran out of memory");
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
9f943f |
@@ -469,14 +478,29 @@ static inline int __meminit make_readonl
|
|
Jan Beulich |
4bfb4a |
/* Make new page tables read-only on the first pass. */
|
|
Jan Beulich |
4bfb4a |
if (!xen_feature(XENFEAT_writable_page_tables)
|
|
Jan Beulich |
4bfb4a |
&& !max_pfn_mapped
|
|
Jan Beulich |
d3bfd6 |
- && (paddr >= (pgt_buf_start << PAGE_SHIFT))
|
|
Jan Beulich |
d3bfd6 |
- && (paddr < (pgt_buf_top << PAGE_SHIFT)))
|
|
Jan Beulich |
4bfb4a |
- readonly = 1;
|
|
Jan Beulich |
d3bfd6 |
+ && (paddr >= (pgt_buf_start << PAGE_SHIFT))) {
|
|
Jan Beulich |
d3bfd6 |
+ unsigned long top = pgt_buf_top;
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
9d5ae8 |
+ /* Account for the range get_table_end() skips. */
|
|
Jan Beulich |
4bfb4a |
+ if (xen_start_info->mfn_list < __START_KERNEL_map
|
|
Jan Beulich |
d3bfd6 |
+ && pgt_buf_end <= xen_start_info->first_p2m_pfn
|
|
Jan Beulich |
4bfb4a |
+ && top > xen_start_info->first_p2m_pfn)
|
|
Jan Beulich |
4bfb4a |
+ top += xen_start_info->nr_p2m_frames;
|
|
Jan Beulich |
4bfb4a |
+ if (paddr < (top << PAGE_SHIFT))
|
|
Jan Beulich |
4bfb4a |
+ readonly = 1;
|
|
Jan Beulich |
4bfb4a |
+ }
|
|
Jan Beulich |
4bfb4a |
/* Make old page tables read-only. */
|
|
Jan Beulich |
4bfb4a |
if (!xen_feature(XENFEAT_writable_page_tables)
|
|
Jan Beulich |
4bfb4a |
&& (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
|
|
Jan Beulich |
d3bfd6 |
&& (paddr < (pgt_buf_end << PAGE_SHIFT)))
|
|
Jan Beulich |
4bfb4a |
readonly = 1;
|
|
Jan Beulich |
4bfb4a |
+ /* Make P->M table (and its page tables) read-only. */
|
|
Jan Beulich |
4bfb4a |
+ if (!xen_feature(XENFEAT_writable_page_tables)
|
|
Jan Beulich |
4bfb4a |
+ && xen_start_info->mfn_list < __START_KERNEL_map
|
|
Jan Beulich |
4bfb4a |
+ && paddr >= (xen_start_info->first_p2m_pfn << PAGE_SHIFT)
|
|
Jan Beulich |
4bfb4a |
+ && paddr < (xen_start_info->first_p2m_pfn
|
|
Jan Beulich |
4bfb4a |
+ + xen_start_info->nr_p2m_frames) << PAGE_SHIFT)
|
|
Jan Beulich |
4bfb4a |
+ readonly = 1;
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
/*
|
|
Jan Beulich |
4bfb4a |
* No need for writable mapping of kernel image. This also ensures that
|
|
Jan Beulich |
9f943f |
@@ -548,7 +572,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
|
|
Jan Beulich |
4c2a76 |
|
|
Jan Beulich |
4c2a76 |
int i = pmd_index(address);
|
|
Jan Beulich |
4c2a76 |
|
|
Jan Beulich |
4c2a76 |
- for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
|
|
Jan Beulich |
4c2a76 |
+ for (; i < PTRS_PER_PMD; i++, address = (address & PMD_MASK) + PMD_SIZE) {
|
|
Jan Beulich |
4c2a76 |
unsigned long pte_phys;
|
|
Jan Beulich |
4c2a76 |
pmd_t *pmd = pmd_page + pmd_index(address);
|
|
Jan Beulich |
4c2a76 |
pte_t *pte;
|
|
Jan Beulich |
9f943f |
@@ -760,6 +784,12 @@ void __init xen_init_pt(void)
|
|
Jan Beulich |
2396a9 |
(PTRS_PER_PUD - pud_index(__START_KERNEL_map))
|
|
Jan Beulich |
2396a9 |
* sizeof(*level3_kernel_pgt));
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
+ /* Copy the initial P->M table mappings if necessary. */
|
|
Jan Beulich |
4bfb4a |
+ addr = pgd_index(xen_start_info->mfn_list);
|
|
Jan Beulich |
4bfb4a |
+ if (addr < pgd_index(__START_KERNEL_map))
|
|
Jan Beulich |
4bfb4a |
+ init_level4_pgt[addr] =
|
|
Jan Beulich |
4bfb4a |
+ ((pgd_t *)xen_start_info->pt_base)[addr];
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
/* Do an early initialization of the fixmap area. */
|
|
Jan Beulich |
4bfb4a |
addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
|
|
Jan Beulich |
2396a9 |
if (pud_present(level3_kernel_pgt[pud_index(addr)])) {
|
|
Jan Beulich |
9f943f |
@@ -791,22 +821,27 @@ void __init xen_init_pt(void)
|
|
Jan Beulich |
9d5ae8 |
void __init xen_finish_init_mapping(void)
|
|
Jan Beulich |
4bfb4a |
{
|
|
Jan Beulich |
9d5ae8 |
unsigned long start, end;
|
|
Jan Beulich |
4bfb4a |
+ struct mmuext_op mmuext;
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
/* Re-vector virtual addresses pointing into the initial
|
|
Jan Beulich |
4bfb4a |
mapping to the just-established permanent ones. */
|
|
Jan Beulich |
4bfb4a |
xen_start_info = __va(__pa(xen_start_info));
|
|
Jan Beulich |
4bfb4a |
xen_start_info->pt_base = (unsigned long)
|
|
Jan Beulich |
4bfb4a |
__va(__pa(xen_start_info->pt_base));
|
|
Jan Beulich |
4bfb4a |
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
Jan Beulich |
4bfb4a |
+ if (!xen_feature(XENFEAT_auto_translated_physmap)
|
|
Jan Beulich |
4bfb4a |
+ && xen_start_info->mfn_list >= __START_KERNEL_map)
|
|
Jan Beulich |
4bfb4a |
phys_to_machine_mapping =
|
|
Jan Beulich |
4bfb4a |
__va(__pa(xen_start_info->mfn_list));
|
|
Jan Beulich |
4bfb4a |
- xen_start_info->mfn_list = (unsigned long)
|
|
Jan Beulich |
4bfb4a |
- phys_to_machine_mapping;
|
|
Jan Beulich |
4bfb4a |
- }
|
|
Jan Beulich |
4bfb4a |
if (xen_start_info->mod_start)
|
|
Jan Beulich |
4bfb4a |
xen_start_info->mod_start = (unsigned long)
|
|
Jan Beulich |
4bfb4a |
__va(__pa(xen_start_info->mod_start));
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
+ /* Unpin the no longer used Xen provided page tables. */
|
|
Jan Beulich |
4bfb4a |
+ mmuext.cmd = MMUEXT_UNPIN_TABLE;
|
|
Jan Beulich |
ab11d6 |
+ mmuext.arg1.mfn = virt_to_mfn(xen_start_info->pt_base);
|
|
Jan Beulich |
4bfb4a |
+ if (HYPERVISOR_mmuext_op(&mmuext, 1, NULL, DOMID_SELF))
|
|
Jan Beulich |
4bfb4a |
+ BUG();
|
|
Jan Beulich |
4bfb4a |
+
|
|
Jan Beulich |
4bfb4a |
/* Destroy the Xen-created mappings beyond the kernel image. */
|
|
Jan Beulich |
fdf2d6 |
start = PAGE_ALIGN(_brk_end);
|
|
Jan Beulich |
d3bfd6 |
end = __START_KERNEL_map + (pgt_buf_start << PAGE_SHIFT);
|
|
Jan Beulich |
f22562 |
--- head.orig/arch/x86/mm/pageattr-xen.c 2012-02-09 12:32:50.000000000 +0100
|
|
Jan Beulich |
f22562 |
+++ head/arch/x86/mm/pageattr-xen.c 2012-02-10 14:03:23.000000000 +0100
|
|
Jan Beulich |
f22562 |
@@ -1490,7 +1490,7 @@ static void __make_page_writable(unsigne
|
|
Jan Beulich |
4bfb4a |
|
|
Jan Beulich |
4bfb4a |
pte = lookup_address(va, &level);
|
|
Jan Beulich |
4bfb4a |
BUG_ON(!pte || level != PG_LEVEL_4K);
|
|
Jan Beulich |
4bfb4a |
- if (HYPERVISOR_update_va_mapping(va, pte_mkwrite(*pte), 0))
|
|
Jan Beulich |
4bfb4a |
+ if (HYPERVISOR_update_va_mapping(va, pte_mkwrite(*pte), UVMF_INVLPG))
|
|
Jan Beulich |
4bfb4a |
BUG();
|
|
Jan Beulich |
4bfb4a |
if (in_secondary_range(va)) {
|
|
Jan Beulich |
4bfb4a |
unsigned long pfn = pte_pfn(*pte);
|
|
Jan Beulich |
f22562 |
--- head.orig/arch/x86/mm/pgtable-xen.c 2011-04-11 16:14:31.000000000 +0200
|
|
Jan Beulich |
f22562 |
+++ head/arch/x86/mm/pgtable-xen.c 2011-02-03 14:42:41.000000000 +0100
|
|
Jan Beulich |
30e8a1 |
@@ -344,7 +344,7 @@ void __init xen_init_pgd_pin(void)
|
|
Jan Beulich |
4bfb4a |
if (PTRS_PER_PUD > 1) /* not folded */
|
|
Jan Beulich |
4bfb4a |
SetPagePinned(virt_to_page(pud));
|
|
Jan Beulich |
4bfb4a |
for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
|
|
Jan Beulich |
4bfb4a |
- if (!pud_present(*pud))
|
|
Jan Beulich |
4bfb4a |
+ if (!pud_present(*pud) || pud_large(*pud))
|
|
Jan Beulich |
4bfb4a |
continue;
|
|
Jan Beulich |
4bfb4a |
pmd = pmd_offset(pud, 0);
|
|
Jan Beulich |
4bfb4a |
if (PTRS_PER_PMD > 1) /* not folded */
|
|
Jan Beulich |
30e8a1 |
@@ -355,7 +355,7 @@ void __init xen_init_pgd_pin(void)
|
|
Jan Beulich |
4bfb4a |
&& m >= pmd_index(HYPERVISOR_VIRT_START))
|
|
Jan Beulich |
4bfb4a |
continue;
|
|
Jan Beulich |
4bfb4a |
#endif
|
|
Jan Beulich |
4bfb4a |
- if (!pmd_present(*pmd))
|
|
Jan Beulich |
4bfb4a |
+ if (!pmd_present(*pmd) || pmd_large(*pmd))
|
|
Jan Beulich |
4bfb4a |
continue;
|
|
Jan Beulich |
4bfb4a |
SetPagePinned(pmd_page(*pmd));
|
|
Jan Beulich |
4bfb4a |
}
|
|
Jan Beulich |
9f943f |
--- head.orig/arch/x86/mm/pgtable_32-xen.c 2012-04-11 13:26:23.000000000 +0200
|
|
Jan Beulich |
f22562 |
+++ head/arch/x86/mm/pgtable_32-xen.c 2011-02-03 14:42:41.000000000 +0100
|
|
Jan Beulich |
9f943f |
@@ -173,6 +173,6 @@ void make_lowmem_page_writable(void *va,
|
|
Jan Beulich |
4bfb4a |
pte = lookup_address((unsigned long)va, &level);
|
|
Jan Beulich |
4bfb4a |
BUG_ON(!pte || level != PG_LEVEL_4K || !pte_present(*pte));
|
|
Jan Beulich |
4bfb4a |
rc = HYPERVISOR_update_va_mapping(
|
|
Jan Beulich |
4bfb4a |
- (unsigned long)va, pte_mkwrite(*pte), 0);
|
|
Jan Beulich |
4bfb4a |
+ (unsigned long)va, pte_mkwrite(*pte), UVMF_INVLPG);
|
|
Jan Beulich |
4bfb4a |
BUG_ON(rc);
|
|
Jan Beulich |
4bfb4a |
}
|