From 2f6d444f6b57b11a777897f35004007e3bd4fc6d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 27 Jul 2018 21:48:17 +1000
Subject: [PATCH] powerpc/64s: Fix page table fragment refcount race vs
speculative references
References: bsc#1131326, bsc#1108937
Patch-mainline: v4.19-rc1
Git-commit: 4231aba000f5a4583dd9f67057aadb68c3eca99d
The page table fragment allocator uses the main page refcount racily
with respect to speculative references. A customer observed a BUG due
to page table page refcount underflow in the fragment allocator. This
can be caused by the fragment allocator set_page_count stomping on a
speculative reference, and then the speculative failure handler
decrements the new reference, and the underflow eventually pops when
the page tables are freed.
Fix this by using a dedicated field in the struct page for the page
table fragment allocator.
Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
Cc: stable@vger.kernel.org # v3.10+
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Backported-by: Gustavo Walbon <gwalbon@linux.ibm.com>
[
struct page uses another offset in the scruct to store the flag count
]
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
arch/powerpc/mm/mmu_context_book3s64.c | 4 ++--
arch/powerpc/mm/pgtable_64.c | 10 +++++++---
include/linux/mm_types.h | 1 +
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 3f980baade4c..0aff84f8e08d 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -201,9 +201,9 @@ static void destroy_pagetable_page(struct mm_struct *mm)
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 8faaa44bea34..ac990323ecfb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -364,6 +364,8 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
return NULL;
}
+ atomic_set(&page->pt_frag_refcount, 1);
+
ret = page_address(page);
spin_lock(&mm->page_table_lock);
/*
@@ -372,7 +374,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
* count.
*/
if (likely(!mm->context.pte_frag)) {
- set_page_count(page, PTE_FRAG_NR);
+ atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -395,10 +397,12 @@ pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel
void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
+
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0aacdfa6dc2e..2b9589f02601 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -57,6 +57,7 @@ struct page {
/* Second double word */
union {
+ atomic_t pt_frag_refcount; /* powerpc */
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* sl[aou]b first free object */
/* page_deferred_list().prev -- second tail page */
--
2.20.1