diff --git a/patches.fixes/mm-avoid-marking-swap-cached-page-as-lazyfree.patch b/patches.fixes/mm-avoid-marking-swap-cached-page-as-lazyfree.patch new file mode 100644 index 0000000..d1d369b --- /dev/null +++ b/patches.fixes/mm-avoid-marking-swap-cached-page-as-lazyfree.patch @@ -0,0 +1,59 @@ +From: Shaohua Li +Date: Tue, 3 Oct 2017 16:15:29 -0700 +Subject: mm: avoid marking swap cached page as lazyfree +Git-commit: 24c92eb7dce0a299b8e1a8c5fa585844a53bf7f0 +Patch-mainline: v4.14-rc4 +References: VM Functionality, bsc#1061775 + +MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear +SwapBacked). There is no lock to prevent the page is added to swap +cache between these two steps by page reclaim. Page reclaim could add +the page to swap cache and unmap the page. After page reclaim, the page +is added back to lru. At that time, we probably start draining per-cpu +pagevec and mark the page lazyfree. So the page could be in a state +with SwapBacked cleared and PG_swapcache set. Next time there is a +refault in the virtual address, do_swap_page can find the page from swap +cache but the page has PageSwapCache false because SwapBacked isn't set, +so do_swap_page will bail out and do nothing. The task will keep +running into fault handler. + +Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") +Link: http://lkml.kernel.org/r/6537ef3814398c0073630b03f176263bc81f0902.1506446061.git.shli@fb.com +Signed-off-by: Shaohua Li +Reported-by: Artem Savkov +Tested-by: Artem Savkov +Reviewed-by: Rik van Riel +Acked-by: Johannes Weiner +Acked-by: Michal Hocko +Acked-by: Minchan Kim +Cc: Hillf Danton +Cc: Hugh Dickins +Cc: Mel Gorman +Cc: [4.12+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Vlastimil Babka +--- + mm/swap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page + void *arg) + { + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && +- !PageUnevictable(page)) { ++ !PageSwapCache(page) && !PageUnevictable(page)) { + bool active = PageActive(page); + + del_page_from_lru_list(page, lruvec, +@@ -664,7 +664,7 @@ void deactivate_file_page(struct page *p + void mark_page_lazyfree(struct page *page) + { + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && +- !PageUnevictable(page)) { ++ !PageSwapCache(page) && !PageUnevictable(page)) { + struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); + + get_page(page); diff --git a/patches.fixes/mm-fix-data-corruption-caused-by-lazyfree-page.patch b/patches.fixes/mm-fix-data-corruption-caused-by-lazyfree-page.patch new file mode 100644 index 0000000..b7ba675 --- /dev/null +++ b/patches.fixes/mm-fix-data-corruption-caused-by-lazyfree-page.patch @@ -0,0 +1,63 @@ +From: Shaohua Li +Date: Tue, 3 Oct 2017 16:15:32 -0700 +Subject: mm: fix data corruption caused by lazyfree page +Git-commit: 9625456cc76391b7f3f2809579126542a8ed4d39 +Patch-mainline: v4.14-rc4 +References: VM Functionality, bsc#1061775 + +MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear +SwapBacked). There is no lock to prevent the page is added to swap +cache between these two steps by page reclaim. If page reclaim finds +such page, it will simply add the page to swap cache without pageout the +page to swap because the page is marked as clean. Next time, page fault +will read data from the swap slot which doesn't have the original data, +so we have a data corruption. To fix issue, we mark the page dirty and +pageout the page. + +However, we shouldn't dirty all pages which is clean and in swap cache. +swapin page is swap cache and clean too. So we only dirty page which is +added into swap cache in page reclaim, which shouldn't be swapin page. +As Minchan suggested, simply dirty the page in add_to_swap can do the +job. + +Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") +Link: http://lkml.kernel.org/r/08c84256b007bf3f63c91d94383bd9eb6fee2daa.1506446061.git.shli@fb.com +Signed-off-by: Shaohua Li +Reported-by: Artem Savkov +Acked-by: Michal Hocko +Acked-by: Minchan Kim +Cc: Johannes Weiner +Cc: Hillf Danton +Cc: Hugh Dickins +Cc: Rik van Riel +Cc: Mel Gorman +Cc: [4.12+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Vlastimil Babka +--- + mm/swap_state.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/mm/swap_state.c ++++ b/mm/swap_state.c +@@ -208,6 +208,19 @@ int add_to_swap(struct page *page, struc + __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); + + if (!err) { ++ /* ++ * Normally the page will be dirtied in unmap because its pte ++ * should be dirty. A special case is MADV_FREE page. The page'e ++ * pte could have dirty bit cleared but the page's SwapBacked ++ * bit is still set because clearing the dirty bit and ++ * SwapBacked bit has no lock protected. For such page, unmap ++ * will not set dirty bit for it, so page reclaim will not write ++ * the page out. This can cause data corruption when the page is ++ * swap in later. Always setting the dirty bit for the page ++ * solves the problem. ++ */ ++ set_page_dirty(page); ++ + return 1; + } else { /* -ENOMEM radix-tree allocation failure */ + /* diff --git a/patches.fixes/mm-page_alloc-c-apply-gfp_allowed_mask-before-the-first-allocation-attempt.patch b/patches.fixes/mm-page_alloc-c-apply-gfp_allowed_mask-before-the-first-allocation-attempt.patch new file mode 100644 index 0000000..7f7a17f --- /dev/null +++ b/patches.fixes/mm-page_alloc-c-apply-gfp_allowed_mask-before-the-first-allocation-attempt.patch @@ -0,0 +1,41 @@ +From: Tetsuo Handa +Date: Fri, 8 Sep 2017 16:13:22 -0700 +Subject: mm/page_alloc.c: apply gfp_allowed_mask before the first allocation + attempt +Git-commit: f19360f015d338a80bec4d56c2e4fc01680ffd8f +Patch-mainline: v4.14-rc1 +References: VM Functionality, git fixes + +We are by error initializing alloc_flags before gfp_allowed_mask is +applied. This could cause problems after pm_restrict_gfp_mask() is called +during suspend operation. Apply gfp_allowed_mask before initializing +alloc_flags so that the first allocation attempt uses correct flags. + +Link: http://lkml.kernel.org/r/201709020016.ADJ21342.OFLJHOOSMFVtFQ@I-love.SAKURA.ne.jp +Fixes: 83d4ca8148fd9092 ("mm, page_alloc: move __GFP_HARDWALL modifications out of the fastpath") +Signed-off-by: Tetsuo Handa +Acked-by: Michal Hocko +Acked-by: Vlastimil Babka +Cc: Mel Gorman +Cc: Jesper Dangaard Brouer +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +--- + mm/page_alloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -4028,10 +4028,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, u + { + struct page *page; + unsigned int alloc_flags = ALLOC_WMARK_LOW; +- gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ ++ gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ + struct alloc_context ac = { }; + + gfp_mask &= gfp_allowed_mask; ++ alloc_mask = gfp_mask; + if (!prepare_alloc_pages(gfp_mask, order, zonelist, nodemask, &ac, &alloc_mask, &alloc_flags)) + return NULL; + diff --git a/series.conf b/series.conf index d54f531..992223c 100644 --- a/series.conf +++ b/series.conf @@ -1426,6 +1426,9 @@ patches.fixes/getcwd.fix patches.fixes/mm-oom-let-oom_reap_task-and-exit_mmap-run-concurrently.patch + patches.fixes/mm-page_alloc-c-apply-gfp_allowed_mask-before-the-first-allocation-attempt.patch + patches.fixes/mm-avoid-marking-swap-cached-page-as-lazyfree.patch + patches.fixes/mm-fix-data-corruption-caused-by-lazyfree-page.patch ######################################################## # IPC patches