From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:49:19 -0700
Subject: mm: prevent get_user_pages() from overflowing page refcount
Git-commit: 8fde12ca79aff9b5ba951fce1a2641901b8d8e64
Patch-mainline: v5.1-rc5
References: CVE-2019-11487, bsc#1133190
[ 4.12 backport notes: also adjust arch-specific gup.c for x86 and s390,
ignore sparc/sh/mips that we don't build ]
If the page refcount wraps around past zero, it will be freed while
there are still four billion references to it. One of the possible
avenues for an attacker to try to make this happen is by doing direct IO
on a page multiple times. This patch makes get_user_pages() refuse to
take a new page reference if there are already more than two billion
references to the page.
Reported-by: Jann Horn <jannh@google.com>
Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
arch/s390/mm/gup.c | 9 ++++++---
arch/x86/mm/gup.c | 14 ++++++++++++--
mm/gup.c | 46 +++++++++++++++++++++++++++++++++++-----------
mm/hugetlb.c | 13 +++++++++++++
4 files changed, 66 insertions(+), 16 deletions(-)
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *p
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_get_speculative(head)))
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
put_page(head);
@@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pm
refs++;
} while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) {
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_add_speculative(head, refs))) {
*nr -= refs;
return 0;
}
@@ -150,7 +152,8 @@ static int gup_huge_pud(pud_t *pudp, pud
refs++;
} while (addr += PAGE_SIZE, addr != end);
- if (!page_cache_add_speculative(head, refs)) {
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_add_speculative(head, refs))) {
*nr -= refs;
return 0;
}
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -137,7 +137,10 @@ static noinline int gup_pte_range(pmd_t
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- get_page(page);
+ if (unlikely(!try_get_page(page))) {
+ put_dev_pagemap(pgmap);
+ break;
+ }
put_dev_pagemap(pgmap);
SetPageReferenced(page);
pages[*nr] = page;
@@ -173,9 +176,12 @@ static int __gup_device_huge(unsigned lo
undo_dev_pagemap(nr, nr_start, pages);
return 0;
}
+ if (unlikely(!try_get_page(page))) {
+ put_dev_pagemap(pgmap);
+ return 0;
+ }
SetPageReferenced(page);
pages[*nr] = page;
- get_page(page);
put_dev_pagemap(pgmap);
(*nr)++;
pfn++;
@@ -219,6 +225,8 @@ static noinline int gup_huge_pmd(pmd_t p
refs = 0;
head = pmd_page(pmd);
+ if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+ return 0;
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -282,6 +290,8 @@ static noinline int gup_huge_pud(pud_t p
refs = 0;
head = pud_page(pud);
+ if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+ return 0;
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -153,7 +153,11 @@ retry:
}
if (flags & FOLL_GET) {
- get_page(page);
+ if (unlikely(!try_get_page(page))) {
+ page = ERR_PTR(-ENOMEM);
+ put_dev_pagemap(pgmap);
+ goto out;
+ }
/* drop the pgmap reference now that we hold the page */
if (pgmap) {
@@ -306,7 +310,10 @@ struct page *follow_page_mask(struct vm_
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
- get_page(page);
+ if (unlikely(!try_get_page(page))) {
+ spin_unlock(ptl);
+ return ERR_PTR(-ENOMEM);
+ }
spin_unlock(ptl);
lock_page(page);
ret = split_huge_page(page);
@@ -372,7 +379,10 @@ static int get_gate_page(struct mm_struc
if (is_device_public_page(*page))
goto unmap;
}
- get_page(*page);
+ if (unlikely(!try_get_page(*page))) {
+ ret = -ENOMEM;
+ goto unmap;
+ }
out:
ret = 0;
unmap:
@@ -1275,6 +1285,20 @@ static void undo_dev_pagemap(int *nr, in
}
}
+/*
+ * Return the compund head page with ref appropriately incremented,
+ * or NULL if that failed.
+ */
+static inline struct page *try_get_compound_head(struct page *page, int refs)
+{
+ struct page *head = compound_head(page);
+ if (WARN_ON_ONCE(page_ref_count(head) < 0))
+ return NULL;
+ if (unlikely(!page_cache_add_speculative(head, refs)))
+ return NULL;
+ return head;
+}
+
#ifdef __HAVE_ARCH_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
@@ -1309,9 +1333,9 @@ static int gup_pte_range(pmd_t pmd, unsi
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ head = try_get_compound_head(page, 1);
+ if (!head)
goto pte_unmap;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
@@ -1447,8 +1471,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pmd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pmd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
@@ -1485,8 +1509,8 @@ static int gup_huge_pud(pud_t orig, pud_
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pud_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pud_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
@@ -1522,8 +1546,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pgd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pgd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4263,6 +4263,19 @@ long follow_hugetlb_page(struct mm_struc
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
page = pte_page(huge_ptep_get(pte));
+
+ /*
+ * Instead of doing 'try_get_page()' below in the same_page
+ * loop, just check the count once here.
+ */
+ if (unlikely(page_count(page) <= 0)) {
+ if (pages) {
+ spin_unlock(ptl);
+ remainder = 0;
+ err = -ENOMEM;
+ break;
+ }
+ }
same_page:
if (pages) {
pages[i] = mem_map_offset(page, pfn_offset);