Blob Blame History Raw
From: jbeulich@novell.com
Subject: fix issues with the assignment of huge amounts of memory
Patch-mainline: obsolete
References: bnc#482614, bnc#537435, bnc#700856

--- head.orig/arch/x86/include/mach-xen/asm/hypervisor.h	2012-05-31 14:50:05.000000000 +0200
+++ head/arch/x86/include/mach-xen/asm/hypervisor.h	2012-05-11 16:46:55.000000000 +0200
@@ -103,6 +103,10 @@ void xen_pgd_pin(unsigned long ptr);
 void xen_pgd_unpin(unsigned long ptr);
 
 void xen_init_pgd_pin(void);
+#ifdef CONFIG_PM_SLEEP
+void setup_pfn_to_mfn_frame_list(void *(*)(unsigned long, unsigned long,
+					   unsigned long));
+#endif
 
 void xen_set_ldt(const void *ptr, unsigned int ents);
 
--- head.orig/arch/x86/kernel/e820-xen.c	2012-02-10 11:29:49.000000000 +0100
+++ head/arch/x86/kernel/e820-xen.c	2012-02-10 13:42:50.000000000 +0100
@@ -926,6 +926,26 @@ static int __init parse_memopt(char *p)
 	/* don't remove all of memory when handling "mem={invalid}" param */
 	if (mem_size == 0)
 		return -EINVAL;
+#ifdef CONFIG_XEN
+	/*
+	 * A little less than 2% of available memory are needed for page
+	 * tables, p2m map, and mem_map. Hence the maximum amount of memory
+	 * we can potentially balloon up to can in no case exceed about 50
+	 * times of what we've been given initially. Since even with that we
+	 * won't be able to boot (due to various calculations done based on
+	 * the total number of pages) we further restrict this to factor 32.
+	 */
+	if ((mem_size >> (PAGE_SHIFT + 5)) > xen_start_info->nr_pages) {
+		u64 size = (u64)xen_start_info->nr_pages << 5;
+
+		pr_warn("mem=%Luk is invalid for an initial"
+			" allocation of %luk, using %Luk\n",
+			(unsigned long long)mem_size >> 10,
+			xen_start_info->nr_pages << (PAGE_SHIFT - 10),
+			(unsigned long long)size << (PAGE_SHIFT - 10));
+		mem_size = size << PAGE_SHIFT;
+	}
+#endif
 	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 
 	i = e820.nr_map - 1;
@@ -1125,6 +1145,7 @@ void __init e820_reserve_resources_late(
 char *__init default_machine_specific_memory_setup(void)
 {
 	int rc, nr_map;
+	unsigned long maxmem;
 	struct xen_memory_map memmap;
 	static struct e820entry __initdata map[E820MAX];
 
@@ -1150,6 +1171,29 @@ char *__init default_machine_specific_me
 		BUG();
 
 #ifdef CONFIG_XEN
+	/* See the comment in parse_memopt(). */
+	for (maxmem = rc = 0; rc < e820.nr_map; ++rc)
+		if (e820.map[rc].type == E820_RAM)
+			maxmem += e820.map[rc].size >> PAGE_SHIFT;
+	if (is_initial_xendomain()) {
+		domid_t domid = DOMID_SELF;
+
+		rc = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+		if (rc > 0 && maxmem > rc)
+			maxmem = rc;
+	}
+	if ((maxmem >> 5) > xen_start_info->nr_pages) {
+		unsigned long long size = (u64)xen_start_info->nr_pages << 5;
+
+		pr_warn("maxmem of %luM is invalid for an initial"
+			" allocation of %luM, using %LuM\n",
+			maxmem >> (20 - PAGE_SHIFT),
+			xen_start_info->nr_pages >> (20 - PAGE_SHIFT),
+			size >> (20 - PAGE_SHIFT));
+		size <<= PAGE_SHIFT;
+		e820_remove_range(size, ULLONG_MAX - size, E820_RAM, 1);
+	}
+
 	if (is_initial_xendomain()) {
 		memmap.nr_entries = E820MAX;
 		set_xen_guest_handle(memmap.buffer, machine_e820.map);
--- head.orig/arch/x86/kernel/setup-xen.c	2012-04-11 17:11:34.000000000 +0200
+++ head/arch/x86/kernel/setup-xen.c	2012-06-08 10:49:46.000000000 +0200
@@ -132,12 +132,7 @@ static struct notifier_block xen_panic_b
 unsigned long *phys_to_machine_mapping;
 EXPORT_SYMBOL(phys_to_machine_mapping);
 
-unsigned long *pfn_to_mfn_frame_list_list,
-#ifdef CONFIG_X86_64
-	*pfn_to_mfn_frame_list[512];
-#else
-	*pfn_to_mfn_frame_list[128];
-#endif
+static unsigned long *pfn_to_mfn_frame_list_list, **pfn_to_mfn_frame_list;
 
 /* Raw start-of-day parameters from the hypervisor. */
 start_info_t *xen_start_info;
@@ -178,6 +173,62 @@ struct boot_params __initdata boot_param
 #else
 struct boot_params boot_params;
 #endif
+#else /* CONFIG_XEN */
+/*
+ * Initialise the list of the frames that specify the list of
+ * frames that make up the p2m table. Used by save/restore and
+ * kexec/crash.
+ */
+#ifdef CONFIG_PM_SLEEP
+void
+#else
+static void __init
+#endif
+setup_pfn_to_mfn_frame_list(typeof(__alloc_bootmem) *__alloc_bootmem)
+{
+	unsigned long i, j, size;
+	unsigned int k, order, fpp = PAGE_SIZE / sizeof(unsigned long);
+
+	size = (max_pfn + fpp - 1) / fpp;
+	size = (size + fpp - 1) / fpp;
+	++size; /* include a zero terminator for crash tools */
+	size *= sizeof(unsigned long);
+	order = get_order(size);
+	if (__alloc_bootmem)
+		pfn_to_mfn_frame_list_list =
+			alloc_bootmem_pages(PAGE_SIZE << order);
+	if (order) {
+		if (xen_create_contiguous_region((unsigned long)
+						 pfn_to_mfn_frame_list_list,
+						 order, 0))
+			pr_err("List of P2M frame lists is not contiguous, %s will not work",
+			       is_initial_xendomain()
+			       ? "kdump" : "save/restore");
+		memset(pfn_to_mfn_frame_list_list, 0, size);
+	}
+	size -= sizeof(unsigned long);
+	if (__alloc_bootmem)
+		pfn_to_mfn_frame_list = alloc_bootmem(size);
+
+	for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) {
+		if (j == fpp)
+			j = 0;
+		if (j == 0) {
+			k++;
+			BUG_ON(k * sizeof(unsigned long) >= size);
+			if (__alloc_bootmem)
+				pfn_to_mfn_frame_list[k] =
+					alloc_bootmem_pages(PAGE_SIZE);
+			pfn_to_mfn_frame_list_list[k] =
+				virt_to_mfn(pfn_to_mfn_frame_list[k]);
+		}
+		pfn_to_mfn_frame_list[k][j] =
+			virt_to_mfn(&phys_to_machine_mapping[i]);
+	}
+	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+		virt_to_mfn(pfn_to_mfn_frame_list_list);
+}
 #endif
 
 /*
@@ -1139,6 +1190,9 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_XEN
 #ifdef CONFIG_KEXEC
 	xen_machine_kexec_setup_resources();
+# define kexec_enabled() (crashk_res.start < crashk_res.end)
+#else
+# define kexec_enabled() 0
 #endif
 	p2m_pages = max_pfn;
 	if (xen_start_info->nr_pages > max_pfn) {
@@ -1164,45 +1218,20 @@ void __init setup_arch(char **cmdline_p)
 		p2m_pages = xen_start_info->nr_pages;
 
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		unsigned long i, j;
-		unsigned int k, fpp;
-
 		/* Make sure we have a large enough P->M table. */
 		phys_to_machine_mapping = alloc_bootmem_pages(
 			max_pfn * sizeof(unsigned long));
-		memset(phys_to_machine_mapping, ~0,
-		       max_pfn * sizeof(unsigned long));
 		memcpy(phys_to_machine_mapping,
 		       (unsigned long *)xen_start_info->mfn_list,
 		       p2m_pages * sizeof(unsigned long));
+		memset(phys_to_machine_mapping + p2m_pages, ~0,
+		       (max_pfn - p2m_pages) * sizeof(unsigned long));
 		free_bootmem(__pa(xen_start_info->mfn_list),
 			     PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
 					     sizeof(unsigned long))));
 
-		/*
-		 * Initialise the list of the frames that specify the list of
-		 * frames that make up the p2m table. Used by save/restore.
-		 */
-		pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
-
-		fpp = PAGE_SIZE/sizeof(unsigned long);
-		for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) {
-			if (j == fpp)
-				j = 0;
-			if (j == 0) {
-				k++;
-				BUG_ON(k>=ARRAY_SIZE(pfn_to_mfn_frame_list));
-				pfn_to_mfn_frame_list[k] =
-					alloc_bootmem_pages(PAGE_SIZE);
-				pfn_to_mfn_frame_list_list[k] =
-					virt_to_mfn(pfn_to_mfn_frame_list[k]);
-			}
-			pfn_to_mfn_frame_list[k][j] =
-				virt_to_mfn(&phys_to_machine_mapping[i]);
-		}
-		HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
-			virt_to_mfn(pfn_to_mfn_frame_list_list);
+		if (!is_initial_xendomain() || kexec_enabled())
+			setup_pfn_to_mfn_frame_list(__alloc_bootmem);
 	}
 
 #ifdef CONFIG_ISA_DMA_API
--- head.orig/drivers/xen/core/machine_reboot.c	2011-11-18 16:11:15.000000000 +0100
+++ head/drivers/xen/core/machine_reboot.c	2011-11-18 17:16:21.000000000 +0100
@@ -71,11 +71,7 @@ static void pre_suspend(void)
 
 static void post_suspend(int suspend_cancelled)
 {
-	int i, j, k, fpp;
 	unsigned long shinfo_mfn;
-	extern unsigned long max_pfn;
-	extern unsigned long *pfn_to_mfn_frame_list_list;
-	extern unsigned long *pfn_to_mfn_frame_list[];
 
 	if (suspend_cancelled) {
 		xen_start_info->store_mfn =
@@ -83,6 +79,8 @@ static void post_suspend(int suspend_can
 		xen_start_info->console.domU.mfn =
 			pfn_to_mfn(xen_start_info->console.domU.mfn);
 	} else {
+		unsigned int i;
+
 #ifdef CONFIG_SMP
 		cpumask_copy(vcpu_initialized_mask, cpu_online_mask);
 #endif
@@ -99,20 +97,8 @@ static void post_suspend(int suspend_can
 
 	clear_page(empty_zero_page);
 
-	fpp = PAGE_SIZE/sizeof(unsigned long);
-	for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
-		if ((j % fpp) == 0) {
-			k++;
-			pfn_to_mfn_frame_list_list[k] =
-				virt_to_mfn(pfn_to_mfn_frame_list[k]);
-			j = 0;
-		}
-		pfn_to_mfn_frame_list[k][j] =
-			virt_to_mfn(&phys_to_machine_mapping[i]);
-	}
-	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
-		virt_to_mfn(pfn_to_mfn_frame_list_list);
+	if (!suspend_cancelled)
+		setup_pfn_to_mfn_frame_list(NULL);
 }
 #endif