From 70c7edc1c6d29aa9aee8b133da3fa8fd1b1248e4 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 7 Mar 2023 17:54:53 +0100
Subject: [PATCH] kabi: workaround for migrate_vma.fault_page
Patch-mainline: Never, KABI workaround
References: CVE-2022-3523, bsc#1204363
3rd party modules built against the GA KABI will instantiate struct
migrate_vma without the fault_page, so we must not access it in that
case. Their migrate_to_ram callback will also not handle fault_page so
the extra page reference added by do_swap_page() will not be recognized
as the fault_page reference and cause migration to fail.
To fix the first problem, add MIGRATE_VMA_FAULT_PAGE flag and
migrate_vma_fault_page() wrapper to obtain migrate_vma.fault_page only
if we know it exists. In-tree modules will set the flag. Rename the
field to __fault_page to catch any future backports that need to be
checked.
To fix the second problem, add PGMAP_MIGRATE_VMA_FAULT_PAGE flag for
pgmap that in-kernel drivers set to make do_swap_page() know they handle
fault_page and so the page reference can be incremented and decremented
there.
Note in svm_migrate_init() we replace the setting of
MIGRATE_VMA_SELECT_DEVICE_PRIVATE to pgmap flags, which was bogus but
harmless (and removed upstream as part of a larger change).
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +++---
drivers/gpu/drm/nouveau/nouveau_dmem.c | 5 +++--
include/linux/memremap.h | 1 +
include/linux/migrate.h | 20 +++++++++++++++++++-
lib/test_hmm.c | 4 ++--
mm/memory.c | 9 +++++++--
mm/migrate.c | 10 ++++++----
7 files changed, 41 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 8baacb8554ad..67b2895ece96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -617,7 +617,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
- migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_FAULT_PAGE;
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
@@ -628,7 +628,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.src = buf;
migrate.dst = migrate.src + npages;
- migrate.fault_page = fault_page;
+ migrate.__fault_page = fault_page;
scratch = (dma_addr_t *)(migrate.dst + npages);
r = migrate_vma_setup(&migrate);
@@ -913,7 +913,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
pgmap->range.end = res->end;
pgmap->ops = &svm_migrate_pgmap_ops;
pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
- pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ pgmap->flags = PGMAP_MIGRATE_VMA_FAULT_PAGE;
/* Device manager releases device-specific resources, memory region and
* pgmap when driver disconnects from device.
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 232d01cb7ad3..eea3deeab981 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -192,8 +192,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
.src = &src,
.dst = &dst,
.pgmap_owner = drm->dev,
- .fault_page = vmf->page,
- .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
+ .__fault_page = vmf->page,
+ .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_FAULT_PAGE,
};
/*
@@ -254,6 +254,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
chunk->pagemap.range.end = res->end;
chunk->pagemap.nr_range = 1;
chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
+ chunk->pagemap.flags = PGMAP_MIGRATE_VMA_FAULT_PAGE;
chunk->pagemap.owner = drm->dev;
ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index c0e9d35889e8..69c33a5bcf6e 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -90,6 +90,7 @@ struct dev_pagemap_ops {
};
#define PGMAP_ALTMAP_VALID (1 << 0)
+#define PGMAP_MIGRATE_VMA_FAULT_PAGE (1 << 1)
/**
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ab4fd50f85d5..b482531448f2 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -135,6 +135,9 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
enum migrate_vma_direction {
MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
+#ifndef __GENKSYMS__
+ MIGRATE_VMA_FAULT_PAGE = 1 << 2,
+#endif
};
struct migrate_vma {
@@ -169,9 +172,24 @@ struct migrate_vma {
* Set to vmf->page if this is being called to migrate a page as part of
* a migrate_to_ram() callback.
*/
- struct page *fault_page;
+#ifndef __GENKSYMS__
+ struct page *__fault_page;
+#endif
};
+/*
+ * KABI workaround for third-party modules built with struct migrate_vma
+ * definition without the new fault_page field. Only touch the field if it's
+ */
+static inline struct page *
+migrate_vma_fault_page(struct migrate_vma *migrate_vma)
+{
+ if (migrate_vma->flags & MIGRATE_VMA_FAULT_PAGE)
+ return migrate_vma->__fault_page;
+
+ return NULL;
+}
+
int migrate_vma_setup(struct migrate_vma *args);
void migrate_vma_pages(struct migrate_vma *migrate);
void migrate_vma_finalize(struct migrate_vma *migrate);
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index f24d36a54967..240f7a74ee35 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1168,8 +1168,8 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
args.src = &src_pfns;
args.dst = &dst_pfns;
args.pgmap_owner = dmirror->mdevice;
- args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
- args.fault_page = vmf->page;
+ args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_FAULT_PAGE;
+ args.__fault_page = vmf->page;
if (migrate_vma_setup(&args))
return VM_FAULT_SIGBUS;
diff --git a/mm/memory.c b/mm/memory.c
index ce6111409a6a..dcbe7688f129 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3490,6 +3490,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->page = pfn_swap_entry_to_page(entry);
ret = remove_device_exclusive_entry(vmf);
} else if (is_device_private_entry(entry)) {
+ bool pgmap_has_fault_page;
+
vmf->page = pfn_swap_entry_to_page(entry);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
@@ -3502,10 +3504,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* Get a page reference while we know the page can't be
* freed.
*/
- get_page(vmf->page);
+ pgmap_has_fault_page = vmf->page->pgmap->flags & PGMAP_MIGRATE_VMA_FAULT_PAGE;
+ if (pgmap_has_fault_page)
+ get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
- put_page(vmf->page);
+ if (pgmap_has_fault_page)
+ put_page(vmf->page);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;
} else {
diff --git a/mm/migrate.c b/mm/migrate.c
index 22f3b0bf4baf..a5a8118a1c49 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2511,7 +2511,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
put_page(page);
}
- if (!migrate_vma_check_page(page, migrate->fault_page)) {
+ if (!migrate_vma_check_page(page, migrate_vma_fault_page(migrate))) {
if (remap) {
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
migrate->cpages--;
@@ -2578,7 +2578,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
goto restore;
}
- if (migrate_vma_check_page(page, migrate->fault_page))
+ if (migrate_vma_check_page(page, migrate_vma_fault_page(migrate)))
continue;
restore:
@@ -2673,6 +2673,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
int migrate_vma_setup(struct migrate_vma *args)
{
long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
+ struct page *fault_page;
args->start &= PAGE_MASK;
args->end &= PAGE_MASK;
@@ -2688,7 +2689,8 @@ int migrate_vma_setup(struct migrate_vma *args)
return -EINVAL;
if (!args->src || !args->dst)
return -EINVAL;
- if (args->fault_page && !is_device_private_page(args->fault_page))
+ fault_page = migrate_vma_fault_page(args);
+ if (fault_page && !is_device_private_page(fault_page))
return -EINVAL;
memset(args->src, 0, sizeof(*args->src) * nr_pages);
@@ -2922,7 +2924,7 @@ void migrate_vma_pages(struct migrate_vma *migrate)
}
}
- if (migrate->fault_page == page)
+ if (migrate_vma_fault_page(migrate) == page)
r = migrate_page_extra(mapping, newpage, page,
MIGRATE_SYNC_NO_COPY, 1);
else
--
2.39.2