From: jbeulich@novell.com
Subject: fix issues with the assignment of huge amounts of memory
Patch-mainline: obsolete
References: bnc#482614, bnc#537435, bnc#700856
--- head.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2012-05-31 14:50:05.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/hypervisor.h 2012-05-11 16:46:55.000000000 +0200
@@ -103,6 +103,10 @@ void xen_pgd_pin(unsigned long ptr);
void xen_pgd_unpin(unsigned long ptr);
void xen_init_pgd_pin(void);
+#ifdef CONFIG_PM_SLEEP
+void setup_pfn_to_mfn_frame_list(void *(*)(unsigned long, unsigned long,
+ unsigned long));
+#endif
void xen_set_ldt(const void *ptr, unsigned int ents);
--- head.orig/arch/x86/kernel/e820-xen.c 2012-02-10 11:29:49.000000000 +0100
+++ head/arch/x86/kernel/e820-xen.c 2012-02-10 13:42:50.000000000 +0100
@@ -926,6 +926,26 @@ static int __init parse_memopt(char *p)
/* don't remove all of memory when handling "mem={invalid}" param */
if (mem_size == 0)
return -EINVAL;
+#ifdef CONFIG_XEN
+ /*
+ * A little less than 2% of available memory are needed for page
+ * tables, p2m map, and mem_map. Hence the maximum amount of memory
+ * we can potentially balloon up to can in no case exceed about 50
+ * times of what we've been given initially. Since even with that we
+ * won't be able to boot (due to various calculations done based on
+ * the total number of pages) we further restrict this to factor 32.
+ */
+ if ((mem_size >> (PAGE_SHIFT + 5)) > xen_start_info->nr_pages) {
+ u64 size = (u64)xen_start_info->nr_pages << 5;
+
+ pr_warn("mem=%Luk is invalid for an initial"
+ " allocation of %luk, using %Luk\n",
+ (unsigned long long)mem_size >> 10,
+ xen_start_info->nr_pages << (PAGE_SHIFT - 10),
+ (unsigned long long)size << (PAGE_SHIFT - 10));
+ mem_size = size << PAGE_SHIFT;
+ }
+#endif
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
i = e820.nr_map - 1;
@@ -1125,6 +1145,7 @@ void __init e820_reserve_resources_late(
char *__init default_machine_specific_memory_setup(void)
{
int rc, nr_map;
+ unsigned long maxmem;
struct xen_memory_map memmap;
static struct e820entry __initdata map[E820MAX];
@@ -1150,6 +1171,29 @@ char *__init default_machine_specific_me
BUG();
#ifdef CONFIG_XEN
+ /* See the comment in parse_memopt(). */
+ for (maxmem = rc = 0; rc < e820.nr_map; ++rc)
+ if (e820.map[rc].type == E820_RAM)
+ maxmem += e820.map[rc].size >> PAGE_SHIFT;
+ if (is_initial_xendomain()) {
+ domid_t domid = DOMID_SELF;
+
+ rc = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+ if (rc > 0 && maxmem > rc)
+ maxmem = rc;
+ }
+ if ((maxmem >> 5) > xen_start_info->nr_pages) {
+ unsigned long long size = (u64)xen_start_info->nr_pages << 5;
+
+ pr_warn("maxmem of %luM is invalid for an initial"
+ " allocation of %luM, using %LuM\n",
+ maxmem >> (20 - PAGE_SHIFT),
+ xen_start_info->nr_pages >> (20 - PAGE_SHIFT),
+ size >> (20 - PAGE_SHIFT));
+ size <<= PAGE_SHIFT;
+ e820_remove_range(size, ULLONG_MAX - size, E820_RAM, 1);
+ }
+
if (is_initial_xendomain()) {
memmap.nr_entries = E820MAX;
set_xen_guest_handle(memmap.buffer, machine_e820.map);
--- head.orig/arch/x86/kernel/setup-xen.c 2012-04-11 17:11:34.000000000 +0200
+++ head/arch/x86/kernel/setup-xen.c 2012-06-08 10:49:46.000000000 +0200
@@ -132,12 +132,7 @@ static struct notifier_block xen_panic_b
unsigned long *phys_to_machine_mapping;
EXPORT_SYMBOL(phys_to_machine_mapping);
-unsigned long *pfn_to_mfn_frame_list_list,
-#ifdef CONFIG_X86_64
- *pfn_to_mfn_frame_list[512];
-#else
- *pfn_to_mfn_frame_list[128];
-#endif
+static unsigned long *pfn_to_mfn_frame_list_list, **pfn_to_mfn_frame_list;
/* Raw start-of-day parameters from the hypervisor. */
start_info_t *xen_start_info;
@@ -178,6 +173,62 @@ struct boot_params __initdata boot_param
#else
struct boot_params boot_params;
#endif
+#else /* CONFIG_XEN */
+/*
+ * Initialise the list of the frames that specify the list of
+ * frames that make up the p2m table. Used by save/restore and
+ * kexec/crash.
+ */
+#ifdef CONFIG_PM_SLEEP
+void
+#else
+static void __init
+#endif
+setup_pfn_to_mfn_frame_list(typeof(__alloc_bootmem) *__alloc_bootmem)
+{
+ unsigned long i, j, size;
+ unsigned int k, order, fpp = PAGE_SIZE / sizeof(unsigned long);
+
+ size = (max_pfn + fpp - 1) / fpp;
+ size = (size + fpp - 1) / fpp;
+ ++size; /* include a zero terminator for crash tools */
+ size *= sizeof(unsigned long);
+ order = get_order(size);
+ if (__alloc_bootmem)
+ pfn_to_mfn_frame_list_list =
+ alloc_bootmem_pages(PAGE_SIZE << order);
+ if (order) {
+ if (xen_create_contiguous_region((unsigned long)
+ pfn_to_mfn_frame_list_list,
+ order, 0))
+ pr_err("List of P2M frame lists is not contiguous, %s will not work",
+ is_initial_xendomain()
+ ? "kdump" : "save/restore");
+ memset(pfn_to_mfn_frame_list_list, 0, size);
+ }
+ size -= sizeof(unsigned long);
+ if (__alloc_bootmem)
+ pfn_to_mfn_frame_list = alloc_bootmem(size);
+
+ for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) {
+ if (j == fpp)
+ j = 0;
+ if (j == 0) {
+ k++;
+ BUG_ON(k * sizeof(unsigned long) >= size);
+ if (__alloc_bootmem)
+ pfn_to_mfn_frame_list[k] =
+ alloc_bootmem_pages(PAGE_SIZE);
+ pfn_to_mfn_frame_list_list[k] =
+ virt_to_mfn(pfn_to_mfn_frame_list[k]);
+ }
+ pfn_to_mfn_frame_list[k][j] =
+ virt_to_mfn(&phys_to_machine_mapping[i]);
+ }
+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+ virt_to_mfn(pfn_to_mfn_frame_list_list);
+}
#endif
/*
@@ -1139,6 +1190,9 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_XEN
#ifdef CONFIG_KEXEC
xen_machine_kexec_setup_resources();
+# define kexec_enabled() (crashk_res.start < crashk_res.end)
+#else
+# define kexec_enabled() 0
#endif
p2m_pages = max_pfn;
if (xen_start_info->nr_pages > max_pfn) {
@@ -1164,45 +1218,20 @@ void __init setup_arch(char **cmdline_p)
p2m_pages = xen_start_info->nr_pages;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- unsigned long i, j;
- unsigned int k, fpp;
-
/* Make sure we have a large enough P->M table. */
phys_to_machine_mapping = alloc_bootmem_pages(
max_pfn * sizeof(unsigned long));
- memset(phys_to_machine_mapping, ~0,
- max_pfn * sizeof(unsigned long));
memcpy(phys_to_machine_mapping,
(unsigned long *)xen_start_info->mfn_list,
p2m_pages * sizeof(unsigned long));
+ memset(phys_to_machine_mapping + p2m_pages, ~0,
+ (max_pfn - p2m_pages) * sizeof(unsigned long));
free_bootmem(__pa(xen_start_info->mfn_list),
PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
sizeof(unsigned long))));
- /*
- * Initialise the list of the frames that specify the list of
- * frames that make up the p2m table. Used by save/restore.
- */
- pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
-
- fpp = PAGE_SIZE/sizeof(unsigned long);
- for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) {
- if (j == fpp)
- j = 0;
- if (j == 0) {
- k++;
- BUG_ON(k>=ARRAY_SIZE(pfn_to_mfn_frame_list));
- pfn_to_mfn_frame_list[k] =
- alloc_bootmem_pages(PAGE_SIZE);
- pfn_to_mfn_frame_list_list[k] =
- virt_to_mfn(pfn_to_mfn_frame_list[k]);
- }
- pfn_to_mfn_frame_list[k][j] =
- virt_to_mfn(&phys_to_machine_mapping[i]);
- }
- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- virt_to_mfn(pfn_to_mfn_frame_list_list);
+ if (!is_initial_xendomain() || kexec_enabled())
+ setup_pfn_to_mfn_frame_list(__alloc_bootmem);
}
#ifdef CONFIG_ISA_DMA_API
--- head.orig/drivers/xen/core/machine_reboot.c 2011-11-18 16:11:15.000000000 +0100
+++ head/drivers/xen/core/machine_reboot.c 2011-11-18 17:16:21.000000000 +0100
@@ -71,11 +71,7 @@ static void pre_suspend(void)
static void post_suspend(int suspend_cancelled)
{
- int i, j, k, fpp;
unsigned long shinfo_mfn;
- extern unsigned long max_pfn;
- extern unsigned long *pfn_to_mfn_frame_list_list;
- extern unsigned long *pfn_to_mfn_frame_list[];
if (suspend_cancelled) {
xen_start_info->store_mfn =
@@ -83,6 +79,8 @@ static void post_suspend(int suspend_can
xen_start_info->console.domU.mfn =
pfn_to_mfn(xen_start_info->console.domU.mfn);
} else {
+ unsigned int i;
+
#ifdef CONFIG_SMP
cpumask_copy(vcpu_initialized_mask, cpu_online_mask);
#endif
@@ -99,20 +97,8 @@ static void post_suspend(int suspend_can
clear_page(empty_zero_page);
- fpp = PAGE_SIZE/sizeof(unsigned long);
- for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
- if ((j % fpp) == 0) {
- k++;
- pfn_to_mfn_frame_list_list[k] =
- virt_to_mfn(pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- pfn_to_mfn_frame_list[k][j] =
- virt_to_mfn(&phys_to_machine_mapping[i]);
- }
- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- virt_to_mfn(pfn_to_mfn_frame_list_list);
+ if (!suspend_cancelled)
+ setup_pfn_to_mfn_frame_list(NULL);
}
#endif