Blob Blame History Raw
From: Alistair Popple <apopple@nvidia.com>
Date: Wed, 28 Sep 2022 22:01:16 +1000
Subject: mm: free device private pages have zero refcount
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Git-commit: ef233450898f8893dafa193a9f3211fa077a3d05
Patch-mainline: v6.1-rc1
References: jsc#PED-1294, bsc#1204363, CVE-2022-3523

Since 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page
refcount") device private pages have no longer had an extra reference
count when the page is in use.  However before handing them back to the
owning device driver we add an extra reference count such that free pages
have a reference count of one.

This makes it difficult to tell if a page is free or not because both free
and in use pages will have a non-zero refcount.  Instead we should return
pages to the drivers page allocator with a zero reference count.  Kernel
code can then safely use kernel functions such as get_page_unless_zero().

Link: https://lkml.kernel.org/r/cf70cf6f8c0bdb8aaebdbfb0d790aea4c683c3c6.1664366292.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Alex Sierra <alex.sierra@amd.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c       |    2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |    2 +-
 drivers/gpu/drm/nouveau/nouveau_dmem.c   |    2 +-
 include/linux/memremap.h                 |    1 +
 lib/test_hmm.c                           |    2 +-
 mm/memremap.c                            |    9 +++++++++
 mm/page_alloc.c                          |    8 ++++++++
 7 files changed, 22 insertions(+), 4 deletions(-)

--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -718,7 +718,7 @@ static struct page *kvmppc_uvmem_get_pag
 
 	dpage = pfn_to_page(uvmem_pfn);
 	dpage->zone_device_data = pvt;
-	lock_page(dpage);
+	zone_device_page_init(dpage);
 	return dpage;
 out_clear:
 	spin_lock(&kvmppc_uvmem_bitmap_lock);
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -225,7 +225,7 @@ svm_migrate_get_vram_page(struct svm_ran
 	page = pfn_to_page(pfn);
 	svm_range_bo_ref(prange->svm_bo);
 	page->zone_device_data = prange->svm_bo;
-	lock_page(page);
+	zone_device_page_init(page);
 }
 
 static void
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -326,7 +326,7 @@ nouveau_dmem_page_alloc_locked(struct no
 			return NULL;
 	}
 
-	lock_page(page);
+	zone_device_page_init(page);
 	return page;
 }
 
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -160,6 +160,7 @@ static inline bool is_device_coherent_pa
 }
 
 #ifdef CONFIG_ZONE_DEVICE
+void zone_device_page_init(struct page *page);
 void *memremap_pages(struct dev_pagemap *pgmap, int nid);
 void memunmap_pages(struct dev_pagemap *pgmap);
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -627,8 +627,8 @@ static struct page *dmirror_devmem_alloc
 			goto error;
 	}
 
+	zone_device_page_init(dpage);
 	dpage->zone_device_data = rpage;
-	lock_page(dpage);
 	return dpage;
 
 error:
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -496,8 +496,17 @@ void free_zone_device_page(struct page *
 	/*
 	 * Reset the page count to 1 to prepare for handing out the page again.
 	 */
+	if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
+	    page->pgmap->type != MEMORY_DEVICE_COHERENT)
+		set_page_count(page, 1);
+}
+
+void zone_device_page_init(struct page *page)
+{
 	set_page_count(page, 1);
+	lock_page(page);
 }
+EXPORT_SYMBOL_GPL(zone_device_page_init);
 
 #ifdef CONFIG_FS_DAX
 bool __put_devmap_managed_page(struct page *page)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6589,6 +6589,14 @@ static void __ref __init_zone_device_pag
 		set_pageblock_migratetype(page, MIGRATE_MOVABLE);
 		cond_resched();
 	}
+
+	/*
+	 * ZONE_DEVICE pages are released directly to the driver page allocator
+	 * which will set the page count to 1 when allocating the page.
+	 */
+	if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
+	    pgmap->type == MEMORY_DEVICE_COHERENT)
+		set_page_count(page, 0);
 }
 
 static void __ref memmap_init_compound(struct page *head,