Blob Blame History Raw
From 70c7edc1c6d29aa9aee8b133da3fa8fd1b1248e4 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 7 Mar 2023 17:54:53 +0100
Subject: [PATCH] kabi: workaround for migrate_vma.fault_page
Patch-mainline: Never, KABI workaround
References: CVE-2022-3523, bsc#1204363

3rd party modules built against the GA KABI will instantiate struct
migrate_vma without the fault_page, so we must not access it in that
case. Their migrate_to_ram callback will also not handle fault_page so
the extra page reference added by do_swap_page() will not be recognized
as the fault_page reference and cause migration to fail.

To fix the first problem, add MIGRATE_VMA_FAULT_PAGE flag and
migrate_vma_fault_page() wrapper to obtain migrate_vma.fault_page only
if we know it exists. In-tree modules will set the flag. Rename the
field to __fault_page to catch any future backports that need to be
checked.

To fix the second problem, add PGMAP_MIGRATE_VMA_FAULT_PAGE flag for
pgmap that in-kernel drivers set to make do_swap_page() know they handle
fault_page and so the page reference can be incremented and decremented
there.

Note in svm_migrate_init() we replace the setting of
MIGRATE_VMA_SELECT_DEVICE_PRIVATE to pgmap flags, which was bogus but
harmless (and removed upstream as part of a larger change).

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  6 +++---
 drivers/gpu/drm/nouveau/nouveau_dmem.c   |  5 +++--
 include/linux/memremap.h                 |  1 +
 include/linux/migrate.h                  | 20 +++++++++++++++++++-
 lib/test_hmm.c                           |  4 ++--
 mm/memory.c                              |  9 +++++++--
 mm/migrate.c                             | 10 ++++++----
 7 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 8baacb8554ad..67b2895ece96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -617,7 +617,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 	migrate.vma = vma;
 	migrate.start = start;
 	migrate.end = end;
-	migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+	migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_FAULT_PAGE;
 	migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
 
 	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
@@ -628,7 +628,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 
 	migrate.src = buf;
 	migrate.dst = migrate.src + npages;
-	migrate.fault_page = fault_page;
+	migrate.__fault_page = fault_page;
 	scratch = (dma_addr_t *)(migrate.dst + npages);
 
 	r = migrate_vma_setup(&migrate);
@@ -913,7 +913,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
 	pgmap->range.end = res->end;
 	pgmap->ops = &svm_migrate_pgmap_ops;
 	pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
-	pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+	pgmap->flags = PGMAP_MIGRATE_VMA_FAULT_PAGE;
 
 	/* Device manager releases device-specific resources, memory region and
 	 * pgmap when driver disconnects from device.
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 232d01cb7ad3..eea3deeab981 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -192,8 +192,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 		.src		= &src,
 		.dst		= &dst,
 		.pgmap_owner	= drm->dev,
-		.fault_page	= vmf->page,
-		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
+		.__fault_page	= vmf->page,
+		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_FAULT_PAGE,
 	};
 
 	/*
@@ -254,6 +254,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 	chunk->pagemap.range.end = res->end;
 	chunk->pagemap.nr_range = 1;
 	chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
+	chunk->pagemap.flags = PGMAP_MIGRATE_VMA_FAULT_PAGE;
 	chunk->pagemap.owner = drm->dev;
 
 	ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index c0e9d35889e8..69c33a5bcf6e 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -90,6 +90,7 @@ struct dev_pagemap_ops {
 };
 
 #define PGMAP_ALTMAP_VALID	(1 << 0)
+#define PGMAP_MIGRATE_VMA_FAULT_PAGE   (1 << 1)
 
 /**
  * struct dev_pagemap - metadata for ZONE_DEVICE mappings
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ab4fd50f85d5..b482531448f2 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -135,6 +135,9 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
 enum migrate_vma_direction {
 	MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
 	MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
+#ifndef __GENKSYMS__
+	MIGRATE_VMA_FAULT_PAGE = 1 << 2,
+#endif
 };
 
 struct migrate_vma {
@@ -169,9 +172,24 @@ struct migrate_vma {
 	 * Set to vmf->page if this is being called to migrate a page as part of
 	 * a migrate_to_ram() callback.
 	 */
-	struct page		*fault_page;
+#ifndef __GENKSYMS__
+	struct page		*__fault_page;
+#endif
 };
 
+/*
+ * KABI workaround for third-party modules built with struct migrate_vma
+ * definition without the new fault_page field. Only touch the field if it's
+ */
+static inline struct page *
+migrate_vma_fault_page(struct migrate_vma *migrate_vma)
+{
+	if (migrate_vma->flags & MIGRATE_VMA_FAULT_PAGE)
+		return migrate_vma->__fault_page;
+
+	return NULL;
+}
+
 int migrate_vma_setup(struct migrate_vma *args);
 void migrate_vma_pages(struct migrate_vma *migrate);
 void migrate_vma_finalize(struct migrate_vma *migrate);
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index f24d36a54967..240f7a74ee35 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1168,8 +1168,8 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
 	args.src = &src_pfns;
 	args.dst = &dst_pfns;
 	args.pgmap_owner = dmirror->mdevice;
-	args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
-	args.fault_page = vmf->page;
+	args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_FAULT_PAGE;
+	args.__fault_page = vmf->page;
 
 	if (migrate_vma_setup(&args))
 		return VM_FAULT_SIGBUS;
diff --git a/mm/memory.c b/mm/memory.c
index ce6111409a6a..dcbe7688f129 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3490,6 +3490,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			vmf->page = pfn_swap_entry_to_page(entry);
 			ret = remove_device_exclusive_entry(vmf);
 		} else if (is_device_private_entry(entry)) {
+			bool pgmap_has_fault_page;
+
 			vmf->page = pfn_swap_entry_to_page(entry);
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
@@ -3502,10 +3504,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			 * Get a page reference while we know the page can't be
 			 * freed.
 			 */
-			get_page(vmf->page);
+			pgmap_has_fault_page = vmf->page->pgmap->flags & PGMAP_MIGRATE_VMA_FAULT_PAGE;
+			if (pgmap_has_fault_page)
+				get_page(vmf->page);
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
-			put_page(vmf->page);
+			if (pgmap_has_fault_page)
+				put_page(vmf->page);
 		} else if (is_hwpoison_entry(entry)) {
 			ret = VM_FAULT_HWPOISON;
 		} else {
diff --git a/mm/migrate.c b/mm/migrate.c
index 22f3b0bf4baf..a5a8118a1c49 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2511,7 +2511,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
 			put_page(page);
 		}
 
-		if (!migrate_vma_check_page(page, migrate->fault_page)) {
+		if (!migrate_vma_check_page(page, migrate_vma_fault_page(migrate))) {
 			if (remap) {
 				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
 				migrate->cpages--;
@@ -2578,7 +2578,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
 				goto restore;
 		}
 
-		if (migrate_vma_check_page(page, migrate->fault_page))
+		if (migrate_vma_check_page(page, migrate_vma_fault_page(migrate)))
 			continue;
 
 restore:
@@ -2673,6 +2673,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
 int migrate_vma_setup(struct migrate_vma *args)
 {
 	long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
+	struct page *fault_page;
 
 	args->start &= PAGE_MASK;
 	args->end &= PAGE_MASK;
@@ -2688,7 +2689,8 @@ int migrate_vma_setup(struct migrate_vma *args)
 		return -EINVAL;
 	if (!args->src || !args->dst)
 		return -EINVAL;
-	if (args->fault_page && !is_device_private_page(args->fault_page))
+	fault_page = migrate_vma_fault_page(args);
+	if (fault_page && !is_device_private_page(fault_page))
 		return -EINVAL;
 
 	memset(args->src, 0, sizeof(*args->src) * nr_pages);
@@ -2922,7 +2924,7 @@ void migrate_vma_pages(struct migrate_vma *migrate)
 			}
 		}
 
-		if (migrate->fault_page == page)
+		if (migrate_vma_fault_page(migrate) == page)
 			r = migrate_page_extra(mapping, newpage, page,
 						MIGRATE_SYNC_NO_COPY, 1);
 		else
-- 
2.39.2