|
Mel Gorman |
7ca072 |
From 020f8d956f939bd6963977ac4dda9046341f19dd Mon Sep 17 00:00:00 2001
|
|
Mel Gorman |
a445f5 |
From: Mel Gorman <mgorman@techsingularity.net>
|
|
Mel Gorman |
a445f5 |
Date: Sun, 13 Feb 2022 12:33:44 +0000
|
|
Mel Gorman |
a445f5 |
Subject: [PATCH] mm/page_alloc: Limit number of high-order pages on PCP during
|
|
Mel Gorman |
a445f5 |
bulk free
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
References: bnc#1193239,bnc#1193199,bnc#1193329
|
|
Mel Gorman |
a445f5 |
Patch-mainline: Not yet, queued in -mm maintainer repository (no git tree)
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
When a PCP is mostly used for frees then high-order pages can exist on PCP
|
|
Mel Gorman |
a445f5 |
lists for some time. This is problematic when the allocation pattern is all
|
|
Mel Gorman |
a445f5 |
allocations from one CPU and all frees from another resulting in colder
|
|
Mel Gorman |
a445f5 |
pages being used. When bulk freeing pages, limit the number of high-order
|
|
Mel Gorman |
a445f5 |
pages that are stored on the PCP lists.
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
Netperf running on localhost exhibits this pattern and while it does
|
|
Mel Gorman |
a445f5 |
not matter for some machines, it does matter for others with smaller
|
|
Mel Gorman |
a445f5 |
caches where cache misses cause problems due to reduced page reuse.
|
|
Mel Gorman |
a445f5 |
Pages freed directly to the buddy list may be reused quickly while still
|
|
Mel Gorman |
a445f5 |
cache hot where as storing on the PCP lists may be cold by the time
|
|
Mel Gorman |
a445f5 |
free_pcppages_bulk() is called.
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
Using perf kmem:mm_page_alloc, the 5 most used page frames were
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
5.17-rc3
|
|
Mel Gorman |
a445f5 |
13041 pfn=0x111a30
|
|
Mel Gorman |
a445f5 |
13081 pfn=0x5814d0
|
|
Mel Gorman |
a445f5 |
13097 pfn=0x108258
|
|
Mel Gorman |
a445f5 |
13121 pfn=0x689598
|
|
Mel Gorman |
a445f5 |
13128 pfn=0x5814d8
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
5.17-revert-highpcp
|
|
Mel Gorman |
a445f5 |
192009 pfn=0x54c140
|
|
Mel Gorman |
a445f5 |
195426 pfn=0x1081d0
|
|
Mel Gorman |
a445f5 |
200908 pfn=0x61c808
|
|
Mel Gorman |
a445f5 |
243515 pfn=0xa9dc20
|
|
Mel Gorman |
a445f5 |
402523 pfn=0x222bb8
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
5.17-full-series
|
|
Mel Gorman |
a445f5 |
142693 pfn=0x346208
|
|
Mel Gorman |
a445f5 |
162227 pfn=0x13bf08
|
|
Mel Gorman |
a445f5 |
166413 pfn=0x2711e0
|
|
Mel Gorman |
a445f5 |
166950 pfn=0x2702f8
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
The spread is wider as there is still time before pages freed to one
|
|
Mel Gorman |
a445f5 |
PCP get released with a tradeoff between fast reuse and reduced zone
|
|
Mel Gorman |
a445f5 |
lock acquisition.
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
From the machine used to gather the traces, the headline performance
|
|
Mel Gorman |
a445f5 |
was equivalent.
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
netperf-tcp
|
|
Mel Gorman |
a445f5 |
5.17.0-rc3 5.17.0-rc3 5.17.0-rc3
|
|
Mel Gorman |
7ca072 |
vanilla mm-reverthighpcp-v1r1 mm-highpcplimit-v2
|
|
Mel Gorman |
7ca072 |
Hmean 64 839.93 ( 0.00%) 840.77 ( 0.10%) 841.02 ( 0.13%)
|
|
Mel Gorman |
7ca072 |
Hmean 128 1614.22 ( 0.00%) 1622.07 * 0.49%* 1636.41 * 1.37%*
|
|
Mel Gorman |
7ca072 |
Hmean 256 2952.00 ( 0.00%) 2953.19 ( 0.04%) 2977.76 * 0.87%*
|
|
Mel Gorman |
7ca072 |
Hmean 1024 10291.67 ( 0.00%) 10239.17 ( -0.51%) 10434.41 * 1.39%*
|
|
Mel Gorman |
7ca072 |
Hmean 2048 17335.08 ( 0.00%) 17399.97 ( 0.37%) 17134.81 * -1.16%*
|
|
Mel Gorman |
7ca072 |
Hmean 3312 22628.15 ( 0.00%) 22471.97 ( -0.69%) 22422.78 ( -0.91%)
|
|
Mel Gorman |
7ca072 |
Hmean 4096 25009.50 ( 0.00%) 24752.83 * -1.03%* 24740.41 ( -1.08%)
|
|
Mel Gorman |
7ca072 |
Hmean 8192 32745.01 ( 0.00%) 31682.63 * -3.24%* 32153.50 * -1.81%*
|
|
Mel Gorman |
7ca072 |
Hmean 16384 39759.59 ( 0.00%) 36805.78 * -7.43%* 38948.13 * -2.04%*
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
From a 1-socket skylake machine with a small CPU cache that suffers
|
|
Mel Gorman |
a445f5 |
more if cache misses are too high
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
netperf-tcp
|
|
Mel Gorman |
a445f5 |
5.17.0-rc3 5.17.0-rc3 5.17.0-rc3
|
|
Mel Gorman |
7ca072 |
vanilla mm-reverthighpcp-v1 mm-highpcplimit-v2
|
|
Mel Gorman |
7ca072 |
Hmean 64 938.95 ( 0.00%) 941.50 * 0.27%* 943.61 * 0.50%*
|
|
Mel Gorman |
7ca072 |
Hmean 128 1843.10 ( 0.00%) 1857.58 * 0.79%* 1861.09 * 0.98%*
|
|
Mel Gorman |
7ca072 |
Hmean 256 3573.07 ( 0.00%) 3667.45 * 2.64%* 3674.91 * 2.85%*
|
|
Mel Gorman |
7ca072 |
Hmean 1024 13206.52 ( 0.00%) 13487.80 * 2.13%* 13393.21 * 1.41%*
|
|
Mel Gorman |
7ca072 |
Hmean 2048 22870.23 ( 0.00%) 23337.96 * 2.05%* 23188.41 * 1.39%*
|
|
Mel Gorman |
7ca072 |
Hmean 3312 31001.99 ( 0.00%) 32206.50 * 3.89%* 31863.62 * 2.78%*
|
|
Mel Gorman |
7ca072 |
Hmean 4096 35364.59 ( 0.00%) 36490.96 * 3.19%* 36112.54 * 2.11%*
|
|
Mel Gorman |
7ca072 |
Hmean 8192 48497.71 ( 0.00%) 49954.05 * 3.00%* 49588.26 * 2.25%*
|
|
Mel Gorman |
7ca072 |
Hmean 16384 58410.86 ( 0.00%) 60839.80 * 4.16%* 62282.96 * 6.63%*
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
Note that this was a machine that did not benefit from caching high-order
|
|
Mel Gorman |
a445f5 |
pages and performance is almost restored with the series applied. It's not
|
|
Mel Gorman |
a445f5 |
fully restored as cache misses are still higher. This is a trade-off
|
|
Mel Gorman |
a445f5 |
between optimising for a workload that does all allocs on one CPU and frees
|
|
Mel Gorman |
a445f5 |
on another or more general workloads that need high-order pages for SLUB
|
|
Mel Gorman |
a445f5 |
and benefit from avoiding zone->lock for every SLUB refill/drain.
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
|
|
Mel Gorman |
a445f5 |
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
|
|
Mel Gorman |
a445f5 |
Signed-off-by: Mel Gorman <mgorman@suse.de>
|
|
Mel Gorman |
a445f5 |
---
|
|
Mel Gorman |
a445f5 |
mm/page_alloc.c | 26 +++++++++++++++++++++-----
|
|
Mel Gorman |
a445f5 |
1 file changed, 21 insertions(+), 5 deletions(-)
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
|
Mel Gorman |
7ca072 |
index a942bdc656e6..14fddfdf4f46 100644
|
|
Mel Gorman |
a445f5 |
--- a/mm/page_alloc.c
|
|
Mel Gorman |
a445f5 |
+++ b/mm/page_alloc.c
|
|
Mel Gorman |
a445f5 |
@@ -3301,10 +3301,15 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
|
|
Mel Gorman |
a445f5 |
return true;
|
|
Mel Gorman |
a445f5 |
}
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
-static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
|
|
Mel Gorman |
a445f5 |
+static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch,
|
|
Mel Gorman |
a445f5 |
+ bool free_high)
|
|
Mel Gorman |
a445f5 |
{
|
|
Mel Gorman |
a445f5 |
int min_nr_free, max_nr_free;
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
+ /* Free everything if batch freeing high-order pages. */
|
|
Mel Gorman |
a445f5 |
+ if (unlikely(free_high))
|
|
Mel Gorman |
a445f5 |
+ return pcp->count;
|
|
Mel Gorman |
a445f5 |
+
|
|
Mel Gorman |
a445f5 |
/* Check for PCP disabled or boot pageset */
|
|
Mel Gorman |
a445f5 |
if (unlikely(high < batch))
|
|
Mel Gorman |
a445f5 |
return 1;
|
|
Mel Gorman |
a445f5 |
@@ -3325,11 +3330,12 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
|
|
Mel Gorman |
a445f5 |
return batch;
|
|
Mel Gorman |
a445f5 |
}
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
-static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
|
|
Mel Gorman |
a445f5 |
+static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
|
|
Mel Gorman |
a445f5 |
+ bool free_high)
|
|
Mel Gorman |
a445f5 |
{
|
|
Mel Gorman |
a445f5 |
int high = READ_ONCE(pcp->high);
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
- if (unlikely(!high))
|
|
Mel Gorman |
a445f5 |
+ if (unlikely(!high || free_high))
|
|
Mel Gorman |
a445f5 |
return 0;
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
if (!test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags))
|
|
Mel Gorman |
a445f5 |
@@ -3349,17 +3355,27 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
|
|
Mel Gorman |
a445f5 |
struct per_cpu_pages *pcp;
|
|
Mel Gorman |
a445f5 |
int high;
|
|
Mel Gorman |
a445f5 |
int pindex;
|
|
Mel Gorman |
a445f5 |
+ bool free_high;
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
__count_vm_event(PGFREE);
|
|
Mel Gorman |
a445f5 |
pcp = this_cpu_ptr(zone->per_cpu_pageset);
|
|
Mel Gorman |
a445f5 |
pindex = order_to_pindex(migratetype, order);
|
|
Mel Gorman |
a445f5 |
list_add(&page->lru, &pcp->lists[pindex]);
|
|
Mel Gorman |
a445f5 |
pcp->count += 1 << order;
|
|
Mel Gorman |
a445f5 |
- high = nr_pcp_high(pcp, zone);
|
|
Mel Gorman |
a445f5 |
+
|
|
Mel Gorman |
a445f5 |
+ /*
|
|
Mel Gorman |
a445f5 |
+ * As high-order pages other than THP's stored on PCP can contribute
|
|
Mel Gorman |
a445f5 |
+ * to fragmentation, limit the number stored when PCP is heavily
|
|
Mel Gorman |
a445f5 |
+ * freeing without allocation. The remainder after bulk freeing
|
|
Mel Gorman |
a445f5 |
+ * stops will be drained from vmstat refresh context.
|
|
Mel Gorman |
a445f5 |
+ */
|
|
Mel Gorman |
a445f5 |
+ free_high = (pcp->free_factor && order && order <= PAGE_ALLOC_COSTLY_ORDER);
|
|
Mel Gorman |
a445f5 |
+
|
|
Mel Gorman |
a445f5 |
+ high = nr_pcp_high(pcp, zone, free_high);
|
|
Mel Gorman |
a445f5 |
if (pcp->count >= high) {
|
|
Mel Gorman |
a445f5 |
int batch = READ_ONCE(pcp->batch);
|
|
Mel Gorman |
a445f5 |
|
|
Mel Gorman |
a445f5 |
- free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp, pindex);
|
|
Mel Gorman |
a445f5 |
+ free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch, free_high), pcp, pindex);
|
|
Mel Gorman |
a445f5 |
}
|
|
Mel Gorman |
a445f5 |
}
|
|
Mel Gorman |
a445f5 |
|