Blob Blame History Raw
From 2f6d444f6b57b11a777897f35004007e3bd4fc6d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 27 Jul 2018 21:48:17 +1000
Subject: [PATCH] powerpc/64s: Fix page table fragment refcount race vs
 speculative references

References: bsc#1131326, bsc#1108937
Patch-mainline: v4.19-rc1
Git-commit: 4231aba000f5a4583dd9f67057aadb68c3eca99d

The page table fragment allocator uses the main page refcount racily
with respect to speculative references. A customer observed a BUG due
to page table page refcount underflow in the fragment allocator. This
can be caused by the fragment allocator set_page_count stomping on a
speculative reference, and then the speculative failure handler
decrements the new reference, and the underflow eventually pops when
the page tables are freed.

Fix this by using a dedicated field in the struct page for the page
table fragment allocator.

Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
Cc: stable@vger.kernel.org # v3.10+
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Backported-by: Gustavo Walbon <gwalbon@linux.ibm.com>
[
struct page uses another offset in the scruct to store the flag count
]
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
 arch/powerpc/mm/mmu_context_book3s64.c |  4 ++--
 arch/powerpc/mm/pgtable_64.c           | 10 +++++++---
 include/linux/mm_types.h               |  1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 3f980baade4c..0aff84f8e08d 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -201,9 +201,9 @@ static void destroy_pagetable_page(struct mm_struct *mm)
 	/* drop all the pending references */
 	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
 	/* We allow PTE_FRAG_NR fragments from a PTE page */
-	if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+	if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
 		pgtable_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 8faaa44bea34..ac990323ecfb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -364,6 +364,8 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 		return NULL;
 	}
 
+	atomic_set(&page->pt_frag_refcount, 1);
+
 	ret = page_address(page);
 	spin_lock(&mm->page_table_lock);
 	/*
@@ -372,7 +374,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 	 * count.
 	 */
 	if (likely(!mm->context.pte_frag)) {
-		set_page_count(page, PTE_FRAG_NR);
+		atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
 		mm->context.pte_frag = ret + PTE_FRAG_SIZE;
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -395,10 +397,12 @@ pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel
 void pte_fragment_free(unsigned long *table, int kernel)
 {
 	struct page *page = virt_to_page(table);
-	if (put_page_testzero(page)) {
+
+	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		if (!kernel)
 			pgtable_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0aacdfa6dc2e..2b9589f02601 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -57,6 +57,7 @@ struct page {
 
 	/* Second double word */
 	union {
+		atomic_t pt_frag_refcount; /* powerpc */
 		pgoff_t index;		/* Our offset within mapping. */
 		void *freelist;		/* sl[aou]b first free object */
 		/* page_deferred_list().prev	-- second tail page */
-- 
2.20.1