Blob Blame History Raw
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 May 2018 15:24:21 +0200
Subject: [PATCH 2/4] Split IRQ-off and zone->lock while freeing pages from PCP
 list #2
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/wagi/linux-stable-rt.git
Git-commit: 9cf4d2227a896a0aaf753dd8875167e0ab391fbc
Patch-mainline: Queued in subsystem maintainer repository
References: SLE Realtime Extension

Split the IRQ-off section while accessing the PCP list from zone->lock
while freeing pages.
Introcude  isolate_pcp_pages() which separates the pages from the PCP
list onto a temporary list and then free the temporary list via
free_pcppages_bulk().

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 mm/page_alloc.c |   60 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 10 deletions(-)

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1318,8 +1318,8 @@ static inline void prefetch_buddy(struct
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static void free_pcppages_bulk(struct zone *zone, int count,
-			       struct list_head *head)
+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
+			       bool zone_retry)
 {
 	bool isolated_pageblocks;
 	struct page *page, *tmp;
@@ -1334,12 +1334,27 @@ static void free_pcppages_bulk(struct zo
 	 */
 	list_for_each_entry_safe(page, tmp, head, lru) {
 		int mt = get_pcppage_migratetype(page);
+
+		if (page_zone(page) != zone) {
+			/*
+			 * free_unref_page_list() sorts pages by zone. If we end
+			 * up with pages from a different NUMA nodes belonging
+			 * to the same ZONE index then we need to redo with the
+			 * correct ZONE pointer. Skip the page for now, redo it
+			 * on the next iteration.
+			 */
+			WARN_ON_ONCE(zone_retry == false);
+			if (zone_retry)
+				continue;
+		}
+
 		/* MIGRATE_ISOLATE page should not go to pcplists */
 		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
 		/* Pageblock could have been isolated meanwhile */
 		if (unlikely(isolated_pageblocks))
 			mt = get_pageblock_migratetype(page);
 
+		list_del(&page->lru);
 		__free_one_page(page, page_to_pfn(page), zone, 0, mt, true);
 		trace_mm_page_pcpu_drain(page, 0, mt);
 	}
@@ -2882,7 +2897,7 @@ void drain_zone_pages(struct zone *zone,
 	local_irq_restore(flags);
 
 	if (to_drain > 0)
-		free_pcppages_bulk(zone, to_drain, &dst);
+		free_pcppages_bulk(zone, &dst, false);
 }
 #endif
 
@@ -2912,7 +2927,7 @@ static void drain_pages_zone(unsigned in
 	local_irq_restore(flags);
 
 	if (count)
-		free_pcppages_bulk(zone, count, &dst);
+		free_pcppages_bulk(zone, &dst, false);
 }
 
 /*
@@ -3111,7 +3126,8 @@ static bool free_unref_page_prepare(stru
 	return true;
 }
 
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+				   struct list_head *dst)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
@@ -3140,10 +3156,8 @@ static void free_unref_page_commit(struc
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
 		unsigned long batch = READ_ONCE(pcp->batch);
-		LIST_HEAD(dst);
 
-		isolate_pcp_pages(batch, pcp, &dst);
-		free_pcppages_bulk(zone, batch, &dst);
+		isolate_pcp_pages(batch, pcp, dst);
 	}
 }
 
@@ -3154,13 +3168,17 @@ void free_unref_page(struct page *page)
 {
 	unsigned long flags;
 	unsigned long pfn = page_to_pfn(page);
+	struct zone *zone = page_zone(page);
+	LIST_HEAD(dst);
 
 	if (!free_unref_page_prepare(page, pfn))
 		return;
 
 	local_irq_save(flags);
-	free_unref_page_commit(page, pfn);
+	free_unref_page_commit(page, pfn, &dst);
 	local_irq_restore(flags);
+	if (!list_empty(&dst))
+		free_pcppages_bulk(zone, &dst, false);
 }
 
 /*
@@ -3171,6 +3189,11 @@ void free_unref_page_list(struct list_he
 	struct page *page, *next;
 	unsigned long flags, pfn;
 	int batch_count = 0;
+	struct list_head dsts[__MAX_NR_ZONES];
+	int i;
+
+	for (i = 0; i < __MAX_NR_ZONES; i++)
+		INIT_LIST_HEAD(&dsts[i]);
 
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
@@ -3183,10 +3206,12 @@ void free_unref_page_list(struct list_he
 	local_irq_save(flags);
 	list_for_each_entry_safe(page, next, list, lru) {
 		unsigned long pfn = page_private(page);
+		enum zone_type type;
 
 		set_page_private(page, 0);
 		trace_mm_page_free_batched(page);
-		free_unref_page_commit(page, pfn);
+		type = page_zonenum(page);
+		free_unref_page_commit(page, pfn, &dsts[type]);
 
 		/*
 		 * Guard against excessive IRQ disabled times when we get
@@ -3199,6 +3224,21 @@ void free_unref_page_list(struct list_he
 		}
 	}
 	local_irq_restore(flags);
+
+	for (i = 0; i < __MAX_NR_ZONES; ) {
+		struct page *page;
+		struct zone *zone;
+
+		if (list_empty(&dsts[i])) {
+			i++;
+			continue;
+		}
+
+		page = list_first_entry(&dsts[i], struct page, lru);
+		zone = page_zone(page);
+
+		free_pcppages_bulk(zone, &dsts[i], true);
+	}
 }
 
 /*