Blob Blame History Raw
From d0dc12e86b3197a14a908d4fe7cb35b73dda82b5 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Thu, 5 Apr 2018 16:23:00 -0700
Subject: [PATCH] mm/memory_hotplug: optimize memory hotplug
Git-commit: d0dc12e86b3197a14a908d4fe7cb35b73dda82b5
Patch-mainline: v4.17-rc1
References: bnc#1114830, fate#326764

During memory hotplugging we traverse struct pages three times:

1. memset(0) in sparse_add_one_section()
2. loop in __add_section() to set do: set_page_node(page, nid); and
   SetPageReserved(page);
3. loop in memmap_init_zone() to call __init_single_pfn()

This patch removes the first two loops, and leaves only loop 3.  All
struct pages are initialized in one place, the same as it is done during
boot.

The benefits:

 - We improve memory hotplug performance because we are not evicting the
   cache several times and also reduce loop branching overhead.

 - Remove condition from hotpath in __init_single_pfn(), that was added
   in order to fix the problem that was reported by Bharata in the above
   email thread, thus also improve performance during normal boot.

 - Make memory hotplug more similar to the boot memory initialization
   path because we zero and initialize struct pages only in one
   function.

 - Simplifies memory hotplug struct page initialization code, and thus
   enables future improvements, such as multi-threading the
   initialization of struct pages in order to improve hotplug
   performance even further on larger machines.

[pasha.tatashin@oracle.com: v5]
  Link: http://lkml.kernel.org/r/20180228030308.1116-7-pasha.tatashin@oracle.com
Link: http://lkml.kernel.org/r/20180215165920.8570-7-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Michal Hocko <mhocko@suse.com>

---
 drivers/base/node.c    |    2 ++
 include/linux/memory.h |    1 +
 mm/memory_hotplug.c    |   27 ++++++++-------------------
 mm/page_alloc.c        |   12 +++++++-----
 mm/sparse.c            |    8 +++++++-
 5 files changed, 25 insertions(+), 25 deletions(-)

--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -397,6 +397,8 @@ int register_mem_sect_under_node(struct
 
 	if (!mem_blk)
 		return -EFAULT;
+
+	mem_blk->nid = nid;
 	if (!node_online(nid))
 		return 0;
 
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -32,6 +32,7 @@ struct memory_block {
 	void *hw;			/* optional pointer to fw/hw data */
 	int (*phys_callback)(struct memory_block *);
 	struct device dev;
+	int nid;			/* NID for this memory block */
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -310,7 +310,6 @@ static int __meminit __add_section(int n
 		struct vmem_altmap *altmap, bool want_memblock)
 {
 	int ret;
-	int i;
 
 	if (pfn_valid(phys_start_pfn))
 		return -EEXIST;
@@ -319,23 +318,6 @@ static int __meminit __add_section(int n
 	if (ret < 0)
 		return ret;
 
-	/*
-	 * Make all the pages reserved so that nobody will stumble over half
-	 * initialized state.
-	 * FIXME: We also have to associate it with a node because pfn_to_node
-	 * relies on having page with the proper node.
-	 */
-	for (i = 0; i < PAGES_PER_SECTION; i++) {
-		unsigned long pfn = phys_start_pfn + i;
-		struct page *page;
-		if (!pfn_valid(pfn))
-			continue;
-
-		page = pfn_to_page(pfn);
-		set_page_node(page, nid);
-		SetPageReserved(page);
-	}
-
 	if (!want_memblock)
 		return 0;
 
@@ -1004,8 +986,15 @@ int __ref online_pages(unsigned long pfn
 	int nid;
 	int ret;
 	struct memory_notify arg;
+	struct memory_block *mem;
+
+	/*
+	 * We can't use pfn_to_nid() because nid might be stored in struct page
+	 * which is not yet initialized. Instead, we find nid from memory block.
+	 */
+	mem = find_memory_block(__pfn_to_section(pfn));
+	nid = mem->nid;
 
-	nid = pfn_to_nid(pfn);
 	if (!allow_online_pfn_range(nid, pfn, nr_pages, online_type))
 		return -EINVAL;
 
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1180,6 +1180,7 @@ static void free_one_page(struct zone *z
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
 				unsigned long zone, int nid)
 {
+	memset(page, 0, sizeof(*page));
 	set_page_links(page, zone, nid, pfn);
 	init_page_count(page);
 	page_mapcount_reset(page);
@@ -5311,6 +5312,7 @@ void __meminit memmap_init_zone(unsigned
 	pg_data_t *pgdat = NODE_DATA(nid);
 	unsigned long pfn;
 	unsigned long nr_initialised = 0;
+	struct page *page;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 	struct memblock_region *r = NULL, *tmp;
 #endif
@@ -5363,6 +5365,11 @@ void __meminit memmap_init_zone(unsigned
 #endif
 
 not_early:
+		page = pfn_to_page(pfn);
+		__init_single_page(page, pfn, zone, nid);
+		if (context == MEMMAP_HOTPLUG)
+			SetPageReserved(page);
+
 		/*
 		 * Mark the block movable so that blocks are reserved for
 		 * movable at startup. This will force kernel allocations
@@ -5376,13 +5383,8 @@ not_early:
 		 * pfn out of zone.
 		 */
 		if (!(pfn & (pageblock_nr_pages - 1))) {
-			struct page *page = pfn_to_page(pfn);
-
-			__init_single_page(page, pfn, zone, nid);
 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
 			cond_resched();
-		} else {
-			__init_single_pfn(pfn, zone, nid);
 		}
 	}
 }
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -768,7 +768,13 @@ int __meminit sparse_add_one_section(str
 		goto out;
 	}
 
-	memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * Poison uninitialized struct pages in order to catch invalid flags
+	 * combinations.
+	 */
+	memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION);
+#endif
 
 	ms->section_mem_map |= SECTION_MARKED_PRESENT;