Michal Hocko 625579
From a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 Mon Sep 17 00:00:00 2001
Michal Hocko 625579
From: Yang Shi <yang.shi@linux.alibaba.com>
Michal Hocko 625579
Date: Thu, 28 Mar 2019 20:43:55 -0700
Michal Hocko 625579
Subject: [PATCH] mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT
Michal Hocko 625579
 is specified
Michal Hocko 625579
Git-commit: a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7
Michal Hocko 625579
Patch-mainline: v5.1-rc3
Michal Hocko 625579
References: bsc#1185906
Michal Hocko 625579
Michal Hocko 625579
mhocko@suse.com:
Michal Hocko 625579
This kernel splits THPs rather than migrate them so we have to check for
Michal Hocko 625579
the mode and bail out early if this is a pure MPOL_MF_STRICT mode
Michal Hocko 625579
without move. Otherwise would would just have split the page and
Michal Hocko 625579
potentially end up on a different node breaking the syscall contract.
Michal Hocko 625579
Michal Hocko 625579
When MPOL_MF_STRICT was specified and an existing page was already on a
Michal Hocko 625579
node that does not follow the policy, mbind() should return -EIO.  But
Michal Hocko 625579
commit 6f4576e3687b ("mempolicy: apply page table walker on
Michal Hocko 625579
queue_pages_range()") broke the rule.
Michal Hocko 625579
Michal Hocko 625579
And commit c8633798497c ("mm: mempolicy: mbind and migrate_pages support
Michal Hocko 625579
thp migration") didn't return the correct value for THP mbind() too.
Michal Hocko 625579
Michal Hocko 625579
If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it
Michal Hocko 625579
reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an
Michal Hocko 625579
existing page was already on a node that does not follow the policy.
Michal Hocko 625579
And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or
Michal Hocko 625579
MPOL_MF_MOVE_ALL was specified.
Michal Hocko 625579
Michal Hocko 625579
Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c
Michal Hocko 625579
Michal Hocko 625579
[akpm@linux-foundation.org: tweak code comment]
Michal Hocko 625579
Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.alibaba.com
Michal Hocko 625579
Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()")
Michal Hocko 625579
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Michal Hocko 625579
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Michal Hocko 625579
Reported-by: Cyril Hrubis <chrubis@suse.cz>
Michal Hocko 625579
Suggested-by: Kirill A. Shutemov <kirill@shutemov.name>
Michal Hocko 625579
Acked-by: Rafael Aquini <aquini@redhat.com>
Michal Hocko 625579
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Michal Hocko 625579
Acked-by: David Rientjes <rientjes@google.com>
Michal Hocko 625579
Cc: Vlastimil Babka <vbabka@suse.cz>
Michal Hocko 625579
Cc: <stable@vger.kernel.org>
Michal Hocko 625579
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Michal Hocko 625579
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Michal Hocko 625579
Signed-off-by: Michal Hocko <mhocko@suse.com>
Michal Hocko 625579
Michal Hocko 625579
---
Michal Hocko 625579
 mm/mempolicy.c |   53 +++++++++++++++++++++++++++++++++++++++++++++++------
Michal Hocko 625579
 1 file changed, 47 insertions(+), 6 deletions(-)
Michal Hocko 625579
Michal Hocko 625579
--- a/mm/mempolicy.c
Michal Hocko 625579
+++ b/mm/mempolicy.c
Michal Hocko 625579
@@ -499,6 +499,22 @@ static int queue_pages_pte_range(pmd_t *
Michal Hocko 625579
 		ptl = pmd_lock(walk->mm, pmd);
Michal Hocko 625579
 		if (pmd_trans_huge(*pmd)) {
Michal Hocko 625579
 			page = pmd_page(*pmd);
Michal Hocko 625579
+
Michal Hocko 625579
+			nid = page_to_nid(page);
Michal Hocko 625579
+			if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) {
Michal Hocko 625579
+				spin_unlock(ptl);
Michal Hocko 625579
+				return 0;
Michal Hocko 625579
+			}
Michal Hocko 625579
+
Michal Hocko 625579
+			/*
Michal Hocko 625579
+			 * We cannot modify (split) THP in pure strict mode. A misplaced
Michal Hocko 625579
+			 * page has to be reported by EIO
Michal Hocko 625579
+			 */
Michal Hocko 625579
+			if ((flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL) == MPOL_MF_STRICT)) {
Michal Hocko 625579
+				spin_unlock(ptl);
Michal Hocko 625579
+				return -EIO;
Michal Hocko 625579
+			}
Michal Hocko 625579
+
Michal Hocko 625579
 			if (is_huge_zero_page(page)) {
Michal Hocko 625579
 				spin_unlock(ptl);
Michal Hocko 625579
 				__split_huge_pmd(vma, pmd, addr, false, NULL);
Michal Hocko 625579
@@ -509,8 +525,15 @@ static int queue_pages_pte_range(pmd_t *
Michal Hocko 625579
 				ret = split_huge_page(page);
Michal Hocko 625579
 				unlock_page(page);
Michal Hocko 625579
 				put_page(page);
Michal Hocko 625579
-				if (ret)
Michal Hocko 625579
+				if (ret) {
Michal Hocko 625579
+					/*
Michal Hocko 625579
+					 * When moving pages in the strict mode we
Michal Hocko 625579
+					 * should report errors
Michal Hocko 625579
+					 */
Michal Hocko 625579
+					if (flags & MPOL_MF_STRICT)
Michal Hocko 625579
+						return -EIO;
Michal Hocko 625579
 					return 0;
Michal Hocko 625579
+				}
Michal Hocko 625579
 			}
Michal Hocko 625579
 		} else {
Michal Hocko 625579
 			spin_unlock(ptl);
Michal Hocko 625579
@@ -536,6 +559,12 @@ retry:
Michal Hocko 625579
 		nid = page_to_nid(page);
Michal Hocko 625579
 		if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
Michal Hocko 625579
 			continue;
Michal Hocko 625579
+		/*
Michal Hocko 625579
+		 * We cannot modify (split) THP in pure strict mode. A misplaced
Michal Hocko 625579
+		 * page has to be reported by EIO
Michal Hocko 625579
+		 */
Michal Hocko 625579
+		if ((flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL) == MPOL_MF_STRICT))
Michal Hocko 625579
+			break;
Michal Hocko 625579
 		if (PageTransCompound(page)) {
Michal Hocko 625579
 			get_page(page);
Michal Hocko 625579
 			pte_unmap_unlock(pte, ptl);
Michal Hocko 625579
@@ -543,8 +572,10 @@ retry:
Michal Hocko 625579
 			ret = split_huge_page(page);
Michal Hocko 625579
 			unlock_page(page);
Michal Hocko 625579
 			put_page(page);
Michal Hocko 625579
-			/* Failed to split -- skip. */
Michal Hocko 625579
+			/* Failed to split -- skip. unless in strict mode */
Michal Hocko 625579
 			if (ret) {
Michal Hocko 625579
+				if (flags & MPOL_MF_STRICT)
Michal Hocko 625579
+					return -EIO;
Michal Hocko 625579
 				pte = pte_offset_map_lock(walk->mm, pmd,
Michal Hocko 625579
 						addr, &ptl);
Michal Hocko 625579
 				continue;
Michal Hocko 625579
@@ -552,11 +583,16 @@ retry:
Michal Hocko 625579
 			goto retry;
Michal Hocko 625579
 		}
Michal Hocko 625579
 
Michal Hocko 625579
-		migrate_page_add(page, qp->pagelist, flags);
Michal Hocko 625579
+		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
Michal Hocko 625579
+			if (!vma_migratable(vma))
Michal Hocko 625579
+				break;
Michal Hocko 625579
+			migrate_page_add(page, qp->pagelist, flags);
Michal Hocko 625579
+		} else
Michal Hocko 625579
+			break;
Michal Hocko 625579
 	}
Michal Hocko 625579
 	pte_unmap_unlock(pte - 1, ptl);
Michal Hocko 625579
 	cond_resched();
Michal Hocko 625579
-	return 0;
Michal Hocko 625579
+	return addr != end ? -EIO : 0;
Michal Hocko 625579
 }
Michal Hocko 625579
 
Michal Hocko 625579
 static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
Michal Hocko 625579
@@ -628,7 +664,12 @@ static int queue_pages_test_walk(unsigne
Michal Hocko 625579
 	unsigned long endvma = vma->vm_end;
Michal Hocko 625579
 	unsigned long flags = qp->flags;
Michal Hocko 625579
 
Michal Hocko 625579
-	if (!vma_migratable(vma))
Michal Hocko 625579
+	/*
Michal Hocko 625579
+	 * Need check MPOL_MF_STRICT to return -EIO if possible
Michal Hocko 625579
+	 * regardless of vma_migratable
Michal Hocko 625579
+	 */
Michal Hocko 625579
+	if (!vma_migratable(vma) &&
Michal Hocko 625579
+	    !(flags & MPOL_MF_STRICT))
Michal Hocko 625579
 		return 1;
Michal Hocko 625579
 
Michal Hocko 625579
 	if (endvma > end)
Michal Hocko 625579
@@ -655,7 +696,7 @@ static int queue_pages_test_walk(unsigne
Michal Hocko 625579
 	}
Michal Hocko 625579
 
Michal Hocko 625579
 	/* queue pages from current vma */
Michal Hocko 625579
-	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
Michal Hocko 625579
+	if (flags & MPOL_MF_VALID)
Michal Hocko 625579
 		return 0;
Michal Hocko 625579
 	return 1;
Michal Hocko 625579
 }