diff --git a/blacklist.conf b/blacklist.conf index 4ed1227..307b925 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -1068,7 +1068,6 @@ bc714c8bd4b7f1f29f9b15d79211c5fb3aa63c4d # Temporarily blacklisted while DRM bac 3af4da165f487a3956fe5a7b4ee08b12c7a3a9af # Temporarily blacklisted while DRM backport is ongoing: hostap: convert to struct proc_ops 1d90b6491014ead775146726b81a78ed993c3188 # Temporarily blacklisted while DRM backport is ongoing: include/linux/memremap.h: remove stale comments 68d237056e007c88031d80900cdba0945121a287 # Temporarily blacklisted while DRM backport is ongoing: scatterlist: protect parameters of the sg_table related macros -c1a06df6ebf6ca98fb7a672fe447c7469d6c1968 # Temporarily blacklisted while DRM backport is ongoing: mm/migrate: fix migrate_pgmap_owner w/o CONFIG_MMU_NOTIFIER 6a9dc5fd6170d0a41c8a14eb19e63d94bea5705a # Temporarily blacklisted while DRM backport is ongoing: lib: Revert use of fallthrough pseudo-keyword in lib/ 693a8e936590f93451e6f5a3d748616f5a59c80b # Temporarily blacklisted while DRM backport is ongoing: driver code: print symbolic error code e1f82a0dcf388d98bcc7ad195c03bd812405e6b2 # Temporarily blacklisted while DRM backport is ongoing: driver core: Annotate dev_err_probe() with __must_check diff --git a/config/x86_64/default b/config/x86_64/default index 921103e..d2ba1e6 100644 --- a/config/x86_64/default +++ b/config/x86_64/default @@ -9659,6 +9659,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_OBJAGG is not set # CONFIG_TEST_STACKINIT is not set # CONFIG_TEST_MEMINIT is not set +CONFIG_TEST_HMM=m CONFIG_MEMTEST=y # CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_SAMPLES is not set diff --git a/patches.suse/0012-mm-migrate-add-a-flags-parameter-to-migrate_vma.patch b/patches.suse/0012-mm-migrate-add-a-flags-parameter-to-migrate_vma.patch index e33dc63..7f79ec5 100644 --- a/patches.suse/0012-mm-migrate-add-a-flags-parameter-to-migrate_vma.patch +++ b/patches.suse/0012-mm-migrate-add-a-flags-parameter-to-migrate_vma.patch @@ -25,8 +25,9 @@ Signed-off-by: Patrik Jakobsson --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 4 +++- include/linux/migrate.h | 13 +++++++++---- + lib/test_hmm.c | 15 ++++----------- mm/migrate.c | 6 ++++-- - 3 files changed, 16 insertions(+), 7 deletions(-) + 4 files changed, 20 insertions(+), 18 deletions(-) --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -78,6 +79,44 @@ Signed-off-by: Patrik Jakobsson }; int migrate_vma_setup(struct migrate_vma *args); +--- a/lib/test_hmm.c ++++ b/lib/test_hmm.c +@@ -585,15 +585,6 @@ static void dmirror_migrate_alloc_and_co + */ + spage = migrate_pfn_to_page(*src); + +- /* +- * Don't migrate device private pages from our own driver or +- * others. For our own we would do a device private memory copy +- * not a migration and for others, we would need to fault the +- * other device's page into system memory first. +- */ +- if (spage && is_zone_device_page(spage)) +- continue; +- + dpage = dmirror_devmem_alloc_page(mdevice); + if (!dpage) + continue; +@@ -702,7 +693,8 @@ static int dmirror_migrate(struct dmirro + args.dst = dst_pfns; + args.start = addr; + args.end = next; +- args.src_owner = NULL; ++ args.pgmap_owner = NULL; ++ args.flags = MIGRATE_VMA_SELECT_SYSTEM; + ret = migrate_vma_setup(&args); + if (ret) + goto out; +@@ -1053,7 +1045,8 @@ static vm_fault_t dmirror_devmem_fault(s + args.end = args.start + PAGE_SIZE; + args.src = &src_pfns; + args.dst = &dst_pfns; +- args.src_owner = dmirror->mdevice; ++ args.pgmap_owner = dmirror->mdevice; ++ args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + + if (migrate_vma_setup(&args)) + return VM_FAULT_SIGBUS; --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2265,7 +2265,9 @@ again: diff --git a/patches.suse/lib-fix-test_hmm-c-reference-after-free.patch b/patches.suse/lib-fix-test_hmm-c-reference-after-free.patch new file mode 100644 index 0000000..fc2382b --- /dev/null +++ b/patches.suse/lib-fix-test_hmm-c-reference-after-free.patch @@ -0,0 +1,42 @@ +From: Randy Dunlap +Date: Thu, 25 Jun 2020 20:29:43 -0700 +Subject: lib: fix test_hmm.c reference after free +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Git-commit: 786ae133e07f2a6b352a0efad16b555ee45a2898 +Patch-mainline: v5.8-rc3 +References: jsc#SLE-16387 + +Coccinelle scripts report the following errors: + + lib/test_hmm.c:523:20-26: ERROR: reference preceded by free on line 521 + lib/test_hmm.c:524:21-27: ERROR: reference preceded by free on line 521 + lib/test_hmm.c:523:28-35: ERROR: devmem is NULL but dereferenced. + lib/test_hmm.c:524:29-36: ERROR: devmem is NULL but dereferenced. + +Fix these by using the local variable 'res' instead of devmem. + +Link: http://lkml.kernel.org/r/c845c158-9c65-9665-0d0b-00342846dd07@infradead.org +Signed-off-by: Randy Dunlap +Reviewed-by: Ralph Campbell +Cc: Jérôme Glisse +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Vlastimil Babka +--- + lib/test_hmm.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/lib/test_hmm.c ++++ b/lib/test_hmm.c +@@ -520,8 +520,7 @@ static bool dmirror_allocate_chunk(struc + err_free: + kfree(devmem); + err_release: +- release_mem_region(devmem->pagemap.res.start, +- resource_size(&devmem->pagemap.res)); ++ release_mem_region(res->start, resource_size(res)); + err: + mutex_unlock(&mdevice->devmem_lock); + return false; diff --git a/patches.suse/lib-test_hmm-c-remove-unused-dmirror_zero_page.patch b/patches.suse/lib-test_hmm-c-remove-unused-dmirror_zero_page.patch new file mode 100644 index 0000000..0dfcea8 --- /dev/null +++ b/patches.suse/lib-test_hmm-c-remove-unused-dmirror_zero_page.patch @@ -0,0 +1,60 @@ +From: Ralph Campbell +Date: Tue, 13 Oct 2020 16:54:32 -0700 +Subject: lib/test_hmm.c: remove unused dmirror_zero_page +Git-commit: 9b53122f616a74ddbbd6c97a3c8294c631a13d15 +Patch-mainline: v5.10-rc1 +References: jsc#SLE-16387 + +The variable dmirror_zero_page is unused in the HMM self test driver which +was probably intended to demonstrate how a driver could use +migrate_vma_setup() to share a single read-only device private zero page +similar to how the CPU does. However, this isn't needed for the self +tests so remove it. + +Signed-off-by: Ralph Campbell +Signed-off-by: Andrew Morton +Cc: Jerome Glisse +Link: https://lkml.kernel.org/r/20200914213801.16520-1-rcampbell@nvidia.com +Signed-off-by: Linus Torvalds +Signed-off-by: Vlastimil Babka +--- + lib/test_hmm.c | 14 -------------- + 1 file changed, 14 deletions(-) + +--- a/lib/test_hmm.c ++++ b/lib/test_hmm.c +@@ -36,7 +36,6 @@ + static const struct dev_pagemap_ops dmirror_devmem_ops; + static const struct mmu_interval_notifier_ops dmirror_min_ops; + static dev_t dmirror_dev; +-static struct page *dmirror_zero_page; + + struct dmirror_device; + +@@ -1126,17 +1125,6 @@ static int __init hmm_dmirror_init(void) + goto err_chrdev; + } + +- /* +- * Allocate a zero page to simulate a reserved page of device private +- * memory which is always zero. The zero_pfn page isn't used just to +- * make the code here simpler (i.e., we need a struct page for it). +- */ +- dmirror_zero_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); +- if (!dmirror_zero_page) { +- ret = -ENOMEM; +- goto err_chrdev; +- } +- + pr_info("HMM test module loaded. This is only for testing HMM.\n"); + return 0; + +@@ -1152,8 +1140,6 @@ static void __exit hmm_dmirror_exit(void + { + int id; + +- if (dmirror_zero_page) +- __free_page(dmirror_zero_page); + for (id = 0; id < DMIRROR_NDEVICES; id++) + dmirror_device_remove(dmirror_devices + id); + unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); diff --git a/patches.suse/maintainers-add-hmm-selftests.patch b/patches.suse/maintainers-add-hmm-selftests.patch new file mode 100644 index 0000000..b1f40ac --- /dev/null +++ b/patches.suse/maintainers-add-hmm-selftests.patch @@ -0,0 +1,28 @@ +From: Ralph Campbell +Date: Wed, 22 Apr 2020 12:50:28 -0700 +Subject: MAINTAINERS: add HMM selftests +Git-commit: f07e2f6be37a750737b93f5635485171ad459eb9 +Patch-mainline: v5.8-rc1 +References: jsc#SLE-16387 + +Add files for HMM selftests. + +Link: https://lore.kernel.org/r/20200422195028.3684-4-rcampbell@nvidia.com +Signed-off-by: Ralph Campbell +Signed-off-by: Jason Gunthorpe +Signed-off-by: Vlastimil Babka +--- + MAINTAINERS | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -7363,6 +7363,8 @@ S: Maintained + F: mm/hmm* + F: include/linux/hmm* + F: Documentation/vm/hmm.rst ++F: lib/test_hmm* ++F: tools/testing/selftests/vm/*hmm* + + HOST AP DRIVER + M: Jouni Malinen diff --git a/patches.suse/mm-hmm-add-tests-for-hmm_pfn_to_map_order.patch b/patches.suse/mm-hmm-add-tests-for-hmm_pfn_to_map_order.patch new file mode 100644 index 0000000..3cab02e --- /dev/null +++ b/patches.suse/mm-hmm-add-tests-for-hmm_pfn_to_map_order.patch @@ -0,0 +1,138 @@ +From: Ralph Campbell +Date: Wed, 1 Jul 2020 15:53:52 -0700 +Subject: mm/hmm: add tests for hmm_pfn_to_map_order() +Git-commit: e478425bec930e9368c6efdc78d2e5d85eadc18e +Patch-mainline: v5.9-rc1 +References: jsc#SLE-16387 + +Add a sanity test for hmm_range_fault() returning the page mapping size +order. + +Link: https://lore.kernel.org/r/20200701225352.9649-6-rcampbell@nvidia.com +Signed-off-by: Ralph Campbell +Signed-off-by: Jason Gunthorpe +Signed-off-by: Vlastimil Babka +--- + lib/test_hmm.c | 4 + + lib/test_hmm_uapi.h | 4 + + tools/testing/selftests/vm/hmm-tests.c | 76 +++++++++++++++++++++++++++++++++ + 3 files changed, 84 insertions(+) + +--- a/lib/test_hmm.c ++++ b/lib/test_hmm.c +@@ -766,6 +766,10 @@ static void dmirror_mkentry(struct dmirr + *perm |= HMM_DMIRROR_PROT_WRITE; + else + *perm |= HMM_DMIRROR_PROT_READ; ++ if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT) ++ *perm |= HMM_DMIRROR_PROT_PMD; ++ else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT) ++ *perm |= HMM_DMIRROR_PROT_PUD; + } + + static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni, +--- a/lib/test_hmm_uapi.h ++++ b/lib/test_hmm_uapi.h +@@ -40,6 +40,8 @@ struct hmm_dmirror_cmd { + * HMM_DMIRROR_PROT_NONE: unpopulated PTE or PTE with no access + * HMM_DMIRROR_PROT_READ: read-only PTE + * HMM_DMIRROR_PROT_WRITE: read/write PTE ++ * HMM_DMIRROR_PROT_PMD: PMD sized page is fully mapped by same permissions ++ * HMM_DMIRROR_PROT_PUD: PUD sized page is fully mapped by same permissions + * HMM_DMIRROR_PROT_ZERO: special read-only zero page + * HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL: Migrated device private page on the + * device the ioctl() is made +@@ -51,6 +53,8 @@ enum { + HMM_DMIRROR_PROT_NONE = 0x00, + HMM_DMIRROR_PROT_READ = 0x01, + HMM_DMIRROR_PROT_WRITE = 0x02, ++ HMM_DMIRROR_PROT_PMD = 0x04, ++ HMM_DMIRROR_PROT_PUD = 0x08, + HMM_DMIRROR_PROT_ZERO = 0x10, + HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20, + HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30, +--- a/tools/testing/selftests/vm/hmm-tests.c ++++ b/tools/testing/selftests/vm/hmm-tests.c +@@ -1292,6 +1292,82 @@ TEST_F(hmm2, snapshot) + } + + /* ++ * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that ++ * should be mapped by a large page table entry. ++ */ ++TEST_F(hmm, compound) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ int *ptr; ++ unsigned char *m; ++ int ret; ++ long pagesizes[4]; ++ int n, idx; ++ unsigned long i; ++ ++ /* Skip test if we can't allocate a hugetlbfs page. */ ++ ++ n = gethugepagesizes(pagesizes, 4); ++ if (n <= 0) ++ return; ++ for (idx = 0; --n > 0; ) { ++ if (pagesizes[n] < pagesizes[idx]) ++ idx = n; ++ } ++ size = ALIGN(TWOMEG, pagesizes[idx]); ++ npages = size >> self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->ptr = get_hugepage_region(size, GHR_STRICT); ++ if (buffer->ptr == NULL) { ++ free(buffer); ++ return; ++ } ++ ++ buffer->size = size; ++ buffer->mirror = malloc(npages); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ /* Initialize the pages the device will snapshot in buffer->ptr. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Simulate a device snapshotting CPU pagetables. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device saw. */ ++ m = buffer->mirror; ++ for (i = 0; i < npages; ++i) ++ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE | ++ HMM_DMIRROR_PROT_PMD); ++ ++ /* Make the region read-only. */ ++ ret = mprotect(buffer->ptr, size, PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ++ /* Simulate a device snapshotting CPU pagetables. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device saw. */ ++ m = buffer->mirror; ++ for (i = 0; i < npages; ++i) ++ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ | ++ HMM_DMIRROR_PROT_PMD); ++ ++ free_hugepage_region(buffer->ptr); ++ buffer->ptr = NULL; ++ hmm_buffer_free(buffer); ++} ++ ++/* + * Test two devices reading the same memory (double mapped). + */ + TEST_F(hmm2, double_map) diff --git a/patches.suse/mm-hmm-test-add-selftest-driver-for-hmm.patch b/patches.suse/mm-hmm-test-add-selftest-driver-for-hmm.patch new file mode 100644 index 0000000..32c248f --- /dev/null +++ b/patches.suse/mm-hmm-test-add-selftest-driver-for-hmm.patch @@ -0,0 +1,1292 @@ +From: Ralph Campbell +Date: Wed, 22 Apr 2020 12:50:26 -0700 +Subject: mm/hmm/test: add selftest driver for HMM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Git-commit: b2ef9f5a5cb37643ca5def3516c546457074b882 +Patch-mainline: v5.8-rc1 +References: jsc#SLE-16387 + +This driver is for testing device private memory migration and devices +which use hmm_range_fault() to access system memory via device page tables. + +Link: https://lore.kernel.org/r/20200422195028.3684-2-rcampbell@nvidia.com +Link: https://lore.kernel.org/r/20200516010424.2013-1-rcampbell@nvidia.com +Signed-off-by: Ralph Campbell +Signed-off-by: Jérôme Glisse +Link: https://lore.kernel.org/r/20200509030225.14592-1-weiyongjun1@huawei.com +Link: https://lore.kernel.org/r/20200509030234.14747-1-weiyongjun1@huawei.com +Signed-off-by: Wei Yongjun +Link: https://lore.kernel.org/r/20200511183704.GA225608@mwanda +Signed-off-by: Dan Carpenter +Signed-off-by: Jason Gunthorpe +Signed-off-by: Vlastimil Babka +--- + lib/Kconfig.debug | 13 + lib/Makefile | 1 + lib/test_hmm.c | 1164 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + lib/test_hmm_uapi.h | 59 ++ + 4 files changed, 1237 insertions(+) + +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -2076,6 +2076,19 @@ config TEST_MEMINIT + + If unsure, say N. + ++config TEST_HMM ++ tristate "Test HMM (Heterogeneous Memory Management)" ++ depends on TRANSPARENT_HUGEPAGE ++ depends on DEVICE_PRIVATE ++ select HMM_MIRROR ++ select MMU_NOTIFIER ++ help ++ This is a pseudo device driver solely for testing HMM. ++ Say M here if you want to build the HMM test module. ++ Doing so will allow you to run tools/testing/selftest/vm/hmm-tests. ++ ++ If unsure, say N. ++ + endif # RUNTIME_TESTING_MENU + + config MEMTEST +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -92,6 +92,7 @@ obj-$(CONFIG_TEST_OBJAGG) += test_objagg + obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o + obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o + obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o ++obj-$(CONFIG_TEST_HMM) += test_hmm.o + + obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ + +--- /dev/null ++++ b/lib/test_hmm.c +@@ -0,0 +1,1164 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * This is a module to test the HMM (Heterogeneous Memory Management) ++ * mirror and zone device private memory migration APIs of the kernel. ++ * Userspace programs can register with the driver to mirror their own address ++ * space and can use the device to read/write any valid virtual address. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "test_hmm_uapi.h" ++ ++#define DMIRROR_NDEVICES 2 ++#define DMIRROR_RANGE_FAULT_TIMEOUT 1000 ++#define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) ++#define DEVMEM_CHUNKS_RESERVE 16 ++ ++static const struct dev_pagemap_ops dmirror_devmem_ops; ++static const struct mmu_interval_notifier_ops dmirror_min_ops; ++static dev_t dmirror_dev; ++static struct page *dmirror_zero_page; ++ ++struct dmirror_device; ++ ++struct dmirror_bounce { ++ void *ptr; ++ unsigned long size; ++ unsigned long addr; ++ unsigned long cpages; ++}; ++ ++#define DPT_XA_TAG_WRITE 3UL ++ ++/* ++ * Data structure to track address ranges and register for mmu interval ++ * notifier updates. ++ */ ++struct dmirror_interval { ++ struct mmu_interval_notifier notifier; ++ struct dmirror *dmirror; ++}; ++ ++/* ++ * Data attached to the open device file. ++ * Note that it might be shared after a fork(). ++ */ ++struct dmirror { ++ struct dmirror_device *mdevice; ++ struct xarray pt; ++ struct mmu_interval_notifier notifier; ++ struct mutex mutex; ++}; ++ ++/* ++ * ZONE_DEVICE pages for migration and simulating device memory. ++ */ ++struct dmirror_chunk { ++ struct dev_pagemap pagemap; ++ struct dmirror_device *mdevice; ++}; ++ ++/* ++ * Per device data. ++ */ ++struct dmirror_device { ++ struct cdev cdevice; ++ struct hmm_devmem *devmem; ++ ++ unsigned int devmem_capacity; ++ unsigned int devmem_count; ++ struct dmirror_chunk **devmem_chunks; ++ struct mutex devmem_lock; /* protects the above */ ++ ++ unsigned long calloc; ++ unsigned long cfree; ++ struct page *free_pages; ++ spinlock_t lock; /* protects the above */ ++}; ++ ++static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES]; ++ ++static int dmirror_bounce_init(struct dmirror_bounce *bounce, ++ unsigned long addr, ++ unsigned long size) ++{ ++ bounce->addr = addr; ++ bounce->size = size; ++ bounce->cpages = 0; ++ bounce->ptr = vmalloc(size); ++ if (!bounce->ptr) ++ return -ENOMEM; ++ return 0; ++} ++ ++static void dmirror_bounce_fini(struct dmirror_bounce *bounce) ++{ ++ vfree(bounce->ptr); ++} ++ ++static int dmirror_fops_open(struct inode *inode, struct file *filp) ++{ ++ struct cdev *cdev = inode->i_cdev; ++ struct dmirror *dmirror; ++ int ret; ++ ++ /* Mirror this process address space */ ++ dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL); ++ if (dmirror == NULL) ++ return -ENOMEM; ++ ++ dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice); ++ mutex_init(&dmirror->mutex); ++ xa_init(&dmirror->pt); ++ ++ ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm, ++ 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops); ++ if (ret) { ++ kfree(dmirror); ++ return ret; ++ } ++ ++ filp->private_data = dmirror; ++ return 0; ++} ++ ++static int dmirror_fops_release(struct inode *inode, struct file *filp) ++{ ++ struct dmirror *dmirror = filp->private_data; ++ ++ mmu_interval_notifier_remove(&dmirror->notifier); ++ xa_destroy(&dmirror->pt); ++ kfree(dmirror); ++ return 0; ++} ++ ++static struct dmirror_device *dmirror_page_to_device(struct page *page) ++ ++{ ++ return container_of(page->pgmap, struct dmirror_chunk, ++ pagemap)->mdevice; ++} ++ ++static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) ++{ ++ unsigned long *pfns = range->hmm_pfns; ++ unsigned long pfn; ++ ++ for (pfn = (range->start >> PAGE_SHIFT); ++ pfn < (range->end >> PAGE_SHIFT); ++ pfn++, pfns++) { ++ struct page *page; ++ void *entry; ++ ++ /* ++ * Since we asked for hmm_range_fault() to populate pages, ++ * it shouldn't return an error entry on success. ++ */ ++ WARN_ON(*pfns & HMM_PFN_ERROR); ++ WARN_ON(!(*pfns & HMM_PFN_VALID)); ++ ++ page = hmm_pfn_to_page(*pfns); ++ WARN_ON(!page); ++ ++ entry = page; ++ if (*pfns & HMM_PFN_WRITE) ++ entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); ++ else if (WARN_ON(range->default_flags & HMM_PFN_WRITE)) ++ return -EFAULT; ++ entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); ++ if (xa_is_err(entry)) ++ return xa_err(entry); ++ } ++ ++ return 0; ++} ++ ++static void dmirror_do_update(struct dmirror *dmirror, unsigned long start, ++ unsigned long end) ++{ ++ unsigned long pfn; ++ void *entry; ++ ++ /* ++ * The XArray doesn't hold references to pages since it relies on ++ * the mmu notifier to clear page pointers when they become stale. ++ * Therefore, it is OK to just clear the entry. ++ */ ++ xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT, ++ end >> PAGE_SHIFT) ++ xa_erase(&dmirror->pt, pfn); ++} ++ ++static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni, ++ const struct mmu_notifier_range *range, ++ unsigned long cur_seq) ++{ ++ struct dmirror *dmirror = container_of(mni, struct dmirror, notifier); ++ ++ if (mmu_notifier_range_blockable(range)) ++ mutex_lock(&dmirror->mutex); ++ else if (!mutex_trylock(&dmirror->mutex)) ++ return false; ++ ++ mmu_interval_set_seq(mni, cur_seq); ++ dmirror_do_update(dmirror, range->start, range->end); ++ ++ mutex_unlock(&dmirror->mutex); ++ return true; ++} ++ ++static const struct mmu_interval_notifier_ops dmirror_min_ops = { ++ .invalidate = dmirror_interval_invalidate, ++}; ++ ++static int dmirror_range_fault(struct dmirror *dmirror, ++ struct hmm_range *range) ++{ ++ struct mm_struct *mm = dmirror->notifier.mm; ++ unsigned long timeout = ++ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); ++ int ret; ++ ++ while (true) { ++ if (time_after(jiffies, timeout)) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ range->notifier_seq = mmu_interval_read_begin(range->notifier); ++ down_read(&mm->mmap_sem); ++ ret = hmm_range_fault(range); ++ up_read(&mm->mmap_sem); ++ if (ret) { ++ if (ret == -EBUSY) ++ continue; ++ goto out; ++ } ++ ++ mutex_lock(&dmirror->mutex); ++ if (mmu_interval_read_retry(range->notifier, ++ range->notifier_seq)) { ++ mutex_unlock(&dmirror->mutex); ++ continue; ++ } ++ break; ++ } ++ ++ ret = dmirror_do_fault(dmirror, range); ++ ++ mutex_unlock(&dmirror->mutex); ++out: ++ return ret; ++} ++ ++static int dmirror_fault(struct dmirror *dmirror, unsigned long start, ++ unsigned long end, bool write) ++{ ++ struct mm_struct *mm = dmirror->notifier.mm; ++ unsigned long addr; ++ unsigned long pfns[64]; ++ struct hmm_range range = { ++ .notifier = &dmirror->notifier, ++ .hmm_pfns = pfns, ++ .pfn_flags_mask = 0, ++ .default_flags = ++ HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0), ++ .dev_private_owner = dmirror->mdevice, ++ }; ++ int ret = 0; ++ ++ /* Since the mm is for the mirrored process, get a reference first. */ ++ if (!mmget_not_zero(mm)) ++ return 0; ++ ++ for (addr = start; addr < end; addr = range.end) { ++ range.start = addr; ++ range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); ++ ++ ret = dmirror_range_fault(dmirror, &range); ++ if (ret) ++ break; ++ } ++ ++ mmput(mm); ++ return ret; ++} ++ ++static int dmirror_do_read(struct dmirror *dmirror, unsigned long start, ++ unsigned long end, struct dmirror_bounce *bounce) ++{ ++ unsigned long pfn; ++ void *ptr; ++ ++ ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); ++ ++ for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { ++ void *entry; ++ struct page *page; ++ void *tmp; ++ ++ entry = xa_load(&dmirror->pt, pfn); ++ page = xa_untag_pointer(entry); ++ if (!page) ++ return -ENOENT; ++ ++ tmp = kmap(page); ++ memcpy(ptr, tmp, PAGE_SIZE); ++ kunmap(page); ++ ++ ptr += PAGE_SIZE; ++ bounce->cpages++; ++ } ++ ++ return 0; ++} ++ ++static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) ++{ ++ struct dmirror_bounce bounce; ++ unsigned long start, end; ++ unsigned long size = cmd->npages << PAGE_SHIFT; ++ int ret; ++ ++ start = cmd->addr; ++ end = start + size; ++ if (end < start) ++ return -EINVAL; ++ ++ ret = dmirror_bounce_init(&bounce, start, size); ++ if (ret) ++ return ret; ++ ++ while (1) { ++ mutex_lock(&dmirror->mutex); ++ ret = dmirror_do_read(dmirror, start, end, &bounce); ++ mutex_unlock(&dmirror->mutex); ++ if (ret != -ENOENT) ++ break; ++ ++ start = cmd->addr + (bounce.cpages << PAGE_SHIFT); ++ ret = dmirror_fault(dmirror, start, end, false); ++ if (ret) ++ break; ++ cmd->faults++; ++ } ++ ++ if (ret == 0) { ++ if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, ++ bounce.size)) ++ ret = -EFAULT; ++ } ++ cmd->cpages = bounce.cpages; ++ dmirror_bounce_fini(&bounce); ++ return ret; ++} ++ ++static int dmirror_do_write(struct dmirror *dmirror, unsigned long start, ++ unsigned long end, struct dmirror_bounce *bounce) ++{ ++ unsigned long pfn; ++ void *ptr; ++ ++ ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); ++ ++ for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { ++ void *entry; ++ struct page *page; ++ void *tmp; ++ ++ entry = xa_load(&dmirror->pt, pfn); ++ page = xa_untag_pointer(entry); ++ if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE) ++ return -ENOENT; ++ ++ tmp = kmap(page); ++ memcpy(tmp, ptr, PAGE_SIZE); ++ kunmap(page); ++ ++ ptr += PAGE_SIZE; ++ bounce->cpages++; ++ } ++ ++ return 0; ++} ++ ++static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) ++{ ++ struct dmirror_bounce bounce; ++ unsigned long start, end; ++ unsigned long size = cmd->npages << PAGE_SHIFT; ++ int ret; ++ ++ start = cmd->addr; ++ end = start + size; ++ if (end < start) ++ return -EINVAL; ++ ++ ret = dmirror_bounce_init(&bounce, start, size); ++ if (ret) ++ return ret; ++ if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr), ++ bounce.size)) { ++ ret = -EFAULT; ++ goto fini; ++ } ++ ++ while (1) { ++ mutex_lock(&dmirror->mutex); ++ ret = dmirror_do_write(dmirror, start, end, &bounce); ++ mutex_unlock(&dmirror->mutex); ++ if (ret != -ENOENT) ++ break; ++ ++ start = cmd->addr + (bounce.cpages << PAGE_SHIFT); ++ ret = dmirror_fault(dmirror, start, end, true); ++ if (ret) ++ break; ++ cmd->faults++; ++ } ++ ++fini: ++ cmd->cpages = bounce.cpages; ++ dmirror_bounce_fini(&bounce); ++ return ret; ++} ++ ++static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, ++ struct page **ppage) ++{ ++ struct dmirror_chunk *devmem; ++ struct resource *res; ++ unsigned long pfn; ++ unsigned long pfn_first; ++ unsigned long pfn_last; ++ void *ptr; ++ ++ mutex_lock(&mdevice->devmem_lock); ++ ++ if (mdevice->devmem_count == mdevice->devmem_capacity) { ++ struct dmirror_chunk **new_chunks; ++ unsigned int new_capacity; ++ ++ new_capacity = mdevice->devmem_capacity + ++ DEVMEM_CHUNKS_RESERVE; ++ new_chunks = krealloc(mdevice->devmem_chunks, ++ sizeof(new_chunks[0]) * new_capacity, ++ GFP_KERNEL); ++ if (!new_chunks) ++ goto err; ++ mdevice->devmem_capacity = new_capacity; ++ mdevice->devmem_chunks = new_chunks; ++ } ++ ++ res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, ++ "hmm_dmirror"); ++ if (IS_ERR(res)) ++ goto err; ++ ++ devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); ++ if (!devmem) ++ goto err_release; ++ ++ devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; ++ devmem->pagemap.res = *res; ++ devmem->pagemap.ops = &dmirror_devmem_ops; ++ devmem->pagemap.owner = mdevice; ++ ++ ptr = memremap_pages(&devmem->pagemap, numa_node_id()); ++ if (IS_ERR(ptr)) ++ goto err_free; ++ ++ devmem->mdevice = mdevice; ++ pfn_first = devmem->pagemap.res.start >> PAGE_SHIFT; ++ pfn_last = pfn_first + ++ (resource_size(&devmem->pagemap.res) >> PAGE_SHIFT); ++ mdevice->devmem_chunks[mdevice->devmem_count++] = devmem; ++ ++ mutex_unlock(&mdevice->devmem_lock); ++ ++ pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n", ++ DEVMEM_CHUNK_SIZE / (1024 * 1024), ++ mdevice->devmem_count, ++ mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)), ++ pfn_first, pfn_last); ++ ++ spin_lock(&mdevice->lock); ++ for (pfn = pfn_first; pfn < pfn_last; pfn++) { ++ struct page *page = pfn_to_page(pfn); ++ ++ page->zone_device_data = mdevice->free_pages; ++ mdevice->free_pages = page; ++ } ++ if (ppage) { ++ *ppage = mdevice->free_pages; ++ mdevice->free_pages = (*ppage)->zone_device_data; ++ mdevice->calloc++; ++ } ++ spin_unlock(&mdevice->lock); ++ ++ return true; ++ ++err_free: ++ kfree(devmem); ++err_release: ++ release_mem_region(devmem->pagemap.res.start, ++ resource_size(&devmem->pagemap.res)); ++err: ++ mutex_unlock(&mdevice->devmem_lock); ++ return false; ++} ++ ++static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) ++{ ++ struct page *dpage = NULL; ++ struct page *rpage; ++ ++ /* ++ * This is a fake device so we alloc real system memory to store ++ * our device memory. ++ */ ++ rpage = alloc_page(GFP_HIGHUSER); ++ if (!rpage) ++ return NULL; ++ ++ spin_lock(&mdevice->lock); ++ ++ if (mdevice->free_pages) { ++ dpage = mdevice->free_pages; ++ mdevice->free_pages = dpage->zone_device_data; ++ mdevice->calloc++; ++ spin_unlock(&mdevice->lock); ++ } else { ++ spin_unlock(&mdevice->lock); ++ if (!dmirror_allocate_chunk(mdevice, &dpage)) ++ goto error; ++ } ++ ++ dpage->zone_device_data = rpage; ++ get_page(dpage); ++ lock_page(dpage); ++ return dpage; ++ ++error: ++ __free_page(rpage); ++ return NULL; ++} ++ ++static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, ++ struct dmirror *dmirror) ++{ ++ struct dmirror_device *mdevice = dmirror->mdevice; ++ const unsigned long *src = args->src; ++ unsigned long *dst = args->dst; ++ unsigned long addr; ++ ++ for (addr = args->start; addr < args->end; addr += PAGE_SIZE, ++ src++, dst++) { ++ struct page *spage; ++ struct page *dpage; ++ struct page *rpage; ++ ++ if (!(*src & MIGRATE_PFN_MIGRATE)) ++ continue; ++ ++ /* ++ * Note that spage might be NULL which is OK since it is an ++ * unallocated pte_none() or read-only zero page. ++ */ ++ spage = migrate_pfn_to_page(*src); ++ ++ /* ++ * Don't migrate device private pages from our own driver or ++ * others. For our own we would do a device private memory copy ++ * not a migration and for others, we would need to fault the ++ * other device's page into system memory first. ++ */ ++ if (spage && is_zone_device_page(spage)) ++ continue; ++ ++ dpage = dmirror_devmem_alloc_page(mdevice); ++ if (!dpage) ++ continue; ++ ++ rpage = dpage->zone_device_data; ++ if (spage) ++ copy_highpage(rpage, spage); ++ else ++ clear_highpage(rpage); ++ ++ /* ++ * Normally, a device would use the page->zone_device_data to ++ * point to the mirror but here we use it to hold the page for ++ * the simulated device memory and that page holds the pointer ++ * to the mirror. ++ */ ++ rpage->zone_device_data = dmirror; ++ ++ *dst = migrate_pfn(page_to_pfn(dpage)) | ++ MIGRATE_PFN_LOCKED; ++ if ((*src & MIGRATE_PFN_WRITE) || ++ (!spage && args->vma->vm_flags & VM_WRITE)) ++ *dst |= MIGRATE_PFN_WRITE; ++ } ++} ++ ++static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, ++ struct dmirror *dmirror) ++{ ++ unsigned long start = args->start; ++ unsigned long end = args->end; ++ const unsigned long *src = args->src; ++ const unsigned long *dst = args->dst; ++ unsigned long pfn; ++ ++ /* Map the migrated pages into the device's page tables. */ ++ mutex_lock(&dmirror->mutex); ++ ++ for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, ++ src++, dst++) { ++ struct page *dpage; ++ void *entry; ++ ++ if (!(*src & MIGRATE_PFN_MIGRATE)) ++ continue; ++ ++ dpage = migrate_pfn_to_page(*dst); ++ if (!dpage) ++ continue; ++ ++ /* ++ * Store the page that holds the data so the page table ++ * doesn't have to deal with ZONE_DEVICE private pages. ++ */ ++ entry = dpage->zone_device_data; ++ if (*dst & MIGRATE_PFN_WRITE) ++ entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); ++ entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); ++ if (xa_is_err(entry)) { ++ mutex_unlock(&dmirror->mutex); ++ return xa_err(entry); ++ } ++ } ++ ++ mutex_unlock(&dmirror->mutex); ++ return 0; ++} ++ ++static int dmirror_migrate(struct dmirror *dmirror, ++ struct hmm_dmirror_cmd *cmd) ++{ ++ unsigned long start, end, addr; ++ unsigned long size = cmd->npages << PAGE_SHIFT; ++ struct mm_struct *mm = dmirror->notifier.mm; ++ struct vm_area_struct *vma; ++ unsigned long src_pfns[64]; ++ unsigned long dst_pfns[64]; ++ struct dmirror_bounce bounce; ++ struct migrate_vma args; ++ unsigned long next; ++ int ret; ++ ++ start = cmd->addr; ++ end = start + size; ++ if (end < start) ++ return -EINVAL; ++ ++ /* Since the mm is for the mirrored process, get a reference first. */ ++ if (!mmget_not_zero(mm)) ++ return -EINVAL; ++ ++ down_read(&mm->mmap_sem); ++ for (addr = start; addr < end; addr = next) { ++ vma = find_vma(mm, addr); ++ if (!vma || addr < vma->vm_start || ++ !(vma->vm_flags & VM_READ)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); ++ if (next > vma->vm_end) ++ next = vma->vm_end; ++ ++ args.vma = vma; ++ args.src = src_pfns; ++ args.dst = dst_pfns; ++ args.start = addr; ++ args.end = next; ++ args.src_owner = NULL; ++ ret = migrate_vma_setup(&args); ++ if (ret) ++ goto out; ++ ++ dmirror_migrate_alloc_and_copy(&args, dmirror); ++ migrate_vma_pages(&args); ++ dmirror_migrate_finalize_and_map(&args, dmirror); ++ migrate_vma_finalize(&args); ++ } ++ up_read(&mm->mmap_sem); ++ mmput(mm); ++ ++ /* Return the migrated data for verification. */ ++ ret = dmirror_bounce_init(&bounce, start, size); ++ if (ret) ++ return ret; ++ mutex_lock(&dmirror->mutex); ++ ret = dmirror_do_read(dmirror, start, end, &bounce); ++ mutex_unlock(&dmirror->mutex); ++ if (ret == 0) { ++ if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, ++ bounce.size)) ++ ret = -EFAULT; ++ } ++ cmd->cpages = bounce.cpages; ++ dmirror_bounce_fini(&bounce); ++ return ret; ++ ++out: ++ up_read(&mm->mmap_sem); ++ mmput(mm); ++ return ret; ++} ++ ++static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, ++ unsigned char *perm, unsigned long entry) ++{ ++ struct page *page; ++ ++ if (entry & HMM_PFN_ERROR) { ++ *perm = HMM_DMIRROR_PROT_ERROR; ++ return; ++ } ++ if (!(entry & HMM_PFN_VALID)) { ++ *perm = HMM_DMIRROR_PROT_NONE; ++ return; ++ } ++ ++ page = hmm_pfn_to_page(entry); ++ if (is_device_private_page(page)) { ++ /* Is the page migrated to this device or some other? */ ++ if (dmirror->mdevice == dmirror_page_to_device(page)) ++ *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; ++ else ++ *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; ++ } else if (is_zero_pfn(page_to_pfn(page))) ++ *perm = HMM_DMIRROR_PROT_ZERO; ++ else ++ *perm = HMM_DMIRROR_PROT_NONE; ++ if (entry & HMM_PFN_WRITE) ++ *perm |= HMM_DMIRROR_PROT_WRITE; ++ else ++ *perm |= HMM_DMIRROR_PROT_READ; ++} ++ ++static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni, ++ const struct mmu_notifier_range *range, ++ unsigned long cur_seq) ++{ ++ struct dmirror_interval *dmi = ++ container_of(mni, struct dmirror_interval, notifier); ++ struct dmirror *dmirror = dmi->dmirror; ++ ++ if (mmu_notifier_range_blockable(range)) ++ mutex_lock(&dmirror->mutex); ++ else if (!mutex_trylock(&dmirror->mutex)) ++ return false; ++ ++ /* ++ * Snapshots only need to set the sequence number since any ++ * invalidation in the interval invalidates the whole snapshot. ++ */ ++ mmu_interval_set_seq(mni, cur_seq); ++ ++ mutex_unlock(&dmirror->mutex); ++ return true; ++} ++ ++static const struct mmu_interval_notifier_ops dmirror_mrn_ops = { ++ .invalidate = dmirror_snapshot_invalidate, ++}; ++ ++static int dmirror_range_snapshot(struct dmirror *dmirror, ++ struct hmm_range *range, ++ unsigned char *perm) ++{ ++ struct mm_struct *mm = dmirror->notifier.mm; ++ struct dmirror_interval notifier; ++ unsigned long timeout = ++ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); ++ unsigned long i; ++ unsigned long n; ++ int ret = 0; ++ ++ notifier.dmirror = dmirror; ++ range->notifier = ¬ifier.notifier; ++ ++ ret = mmu_interval_notifier_insert(range->notifier, mm, ++ range->start, range->end - range->start, ++ &dmirror_mrn_ops); ++ if (ret) ++ return ret; ++ ++ while (true) { ++ if (time_after(jiffies, timeout)) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ range->notifier_seq = mmu_interval_read_begin(range->notifier); ++ ++ down_read(&mm->mmap_sem); ++ ret = hmm_range_fault(range); ++ up_read(&mm->mmap_sem); ++ if (ret) { ++ if (ret == -EBUSY) ++ continue; ++ goto out; ++ } ++ ++ mutex_lock(&dmirror->mutex); ++ if (mmu_interval_read_retry(range->notifier, ++ range->notifier_seq)) { ++ mutex_unlock(&dmirror->mutex); ++ continue; ++ } ++ break; ++ } ++ ++ n = (range->end - range->start) >> PAGE_SHIFT; ++ for (i = 0; i < n; i++) ++ dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]); ++ ++ mutex_unlock(&dmirror->mutex); ++out: ++ mmu_interval_notifier_remove(range->notifier); ++ return ret; ++} ++ ++static int dmirror_snapshot(struct dmirror *dmirror, ++ struct hmm_dmirror_cmd *cmd) ++{ ++ struct mm_struct *mm = dmirror->notifier.mm; ++ unsigned long start, end; ++ unsigned long size = cmd->npages << PAGE_SHIFT; ++ unsigned long addr; ++ unsigned long next; ++ unsigned long pfns[64]; ++ unsigned char perm[64]; ++ char __user *uptr; ++ struct hmm_range range = { ++ .hmm_pfns = pfns, ++ .dev_private_owner = dmirror->mdevice, ++ }; ++ int ret = 0; ++ ++ start = cmd->addr; ++ end = start + size; ++ if (end < start) ++ return -EINVAL; ++ ++ /* Since the mm is for the mirrored process, get a reference first. */ ++ if (!mmget_not_zero(mm)) ++ return -EINVAL; ++ ++ /* ++ * Register a temporary notifier to detect invalidations even if it ++ * overlaps with other mmu_interval_notifiers. ++ */ ++ uptr = u64_to_user_ptr(cmd->ptr); ++ for (addr = start; addr < end; addr = next) { ++ unsigned long n; ++ ++ next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); ++ range.start = addr; ++ range.end = next; ++ ++ ret = dmirror_range_snapshot(dmirror, &range, perm); ++ if (ret) ++ break; ++ ++ n = (range.end - range.start) >> PAGE_SHIFT; ++ if (copy_to_user(uptr, perm, n)) { ++ ret = -EFAULT; ++ break; ++ } ++ ++ cmd->cpages += n; ++ uptr += n; ++ } ++ mmput(mm); ++ ++ return ret; ++} ++ ++static long dmirror_fops_unlocked_ioctl(struct file *filp, ++ unsigned int command, ++ unsigned long arg) ++{ ++ void __user *uarg = (void __user *)arg; ++ struct hmm_dmirror_cmd cmd; ++ struct dmirror *dmirror; ++ int ret; ++ ++ dmirror = filp->private_data; ++ if (!dmirror) ++ return -EINVAL; ++ ++ if (copy_from_user(&cmd, uarg, sizeof(cmd))) ++ return -EFAULT; ++ ++ if (cmd.addr & ~PAGE_MASK) ++ return -EINVAL; ++ if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT))) ++ return -EINVAL; ++ ++ cmd.cpages = 0; ++ cmd.faults = 0; ++ ++ switch (command) { ++ case HMM_DMIRROR_READ: ++ ret = dmirror_read(dmirror, &cmd); ++ break; ++ ++ case HMM_DMIRROR_WRITE: ++ ret = dmirror_write(dmirror, &cmd); ++ break; ++ ++ case HMM_DMIRROR_MIGRATE: ++ ret = dmirror_migrate(dmirror, &cmd); ++ break; ++ ++ case HMM_DMIRROR_SNAPSHOT: ++ ret = dmirror_snapshot(dmirror, &cmd); ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ if (ret) ++ return ret; ++ ++ if (copy_to_user(uarg, &cmd, sizeof(cmd))) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++static const struct file_operations dmirror_fops = { ++ .open = dmirror_fops_open, ++ .release = dmirror_fops_release, ++ .unlocked_ioctl = dmirror_fops_unlocked_ioctl, ++ .llseek = default_llseek, ++ .owner = THIS_MODULE, ++}; ++ ++static void dmirror_devmem_free(struct page *page) ++{ ++ struct page *rpage = page->zone_device_data; ++ struct dmirror_device *mdevice; ++ ++ if (rpage) ++ __free_page(rpage); ++ ++ mdevice = dmirror_page_to_device(page); ++ ++ spin_lock(&mdevice->lock); ++ mdevice->cfree++; ++ page->zone_device_data = mdevice->free_pages; ++ mdevice->free_pages = page; ++ spin_unlock(&mdevice->lock); ++} ++ ++static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, ++ struct dmirror_device *mdevice) ++{ ++ const unsigned long *src = args->src; ++ unsigned long *dst = args->dst; ++ unsigned long start = args->start; ++ unsigned long end = args->end; ++ unsigned long addr; ++ ++ for (addr = start; addr < end; addr += PAGE_SIZE, ++ src++, dst++) { ++ struct page *dpage, *spage; ++ ++ spage = migrate_pfn_to_page(*src); ++ if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) ++ continue; ++ spage = spage->zone_device_data; ++ ++ dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); ++ if (!dpage) ++ continue; ++ ++ lock_page(dpage); ++ copy_highpage(dpage, spage); ++ *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; ++ if (*src & MIGRATE_PFN_WRITE) ++ *dst |= MIGRATE_PFN_WRITE; ++ } ++ return 0; ++} ++ ++static void dmirror_devmem_fault_finalize_and_map(struct migrate_vma *args, ++ struct dmirror *dmirror) ++{ ++ /* Invalidate the device's page table mapping. */ ++ mutex_lock(&dmirror->mutex); ++ dmirror_do_update(dmirror, args->start, args->end); ++ mutex_unlock(&dmirror->mutex); ++} ++ ++static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) ++{ ++ struct migrate_vma args; ++ unsigned long src_pfns; ++ unsigned long dst_pfns; ++ struct page *rpage; ++ struct dmirror *dmirror; ++ vm_fault_t ret; ++ ++ /* ++ * Normally, a device would use the page->zone_device_data to point to ++ * the mirror but here we use it to hold the page for the simulated ++ * device memory and that page holds the pointer to the mirror. ++ */ ++ rpage = vmf->page->zone_device_data; ++ dmirror = rpage->zone_device_data; ++ ++ /* FIXME demonstrate how we can adjust migrate range */ ++ args.vma = vmf->vma; ++ args.start = vmf->address; ++ args.end = args.start + PAGE_SIZE; ++ args.src = &src_pfns; ++ args.dst = &dst_pfns; ++ args.src_owner = dmirror->mdevice; ++ ++ if (migrate_vma_setup(&args)) ++ return VM_FAULT_SIGBUS; ++ ++ ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror->mdevice); ++ if (ret) ++ return ret; ++ migrate_vma_pages(&args); ++ dmirror_devmem_fault_finalize_and_map(&args, dmirror); ++ migrate_vma_finalize(&args); ++ return 0; ++} ++ ++static const struct dev_pagemap_ops dmirror_devmem_ops = { ++ .page_free = dmirror_devmem_free, ++ .migrate_to_ram = dmirror_devmem_fault, ++}; ++ ++static int dmirror_device_init(struct dmirror_device *mdevice, int id) ++{ ++ dev_t dev; ++ int ret; ++ ++ dev = MKDEV(MAJOR(dmirror_dev), id); ++ mutex_init(&mdevice->devmem_lock); ++ spin_lock_init(&mdevice->lock); ++ ++ cdev_init(&mdevice->cdevice, &dmirror_fops); ++ mdevice->cdevice.owner = THIS_MODULE; ++ ret = cdev_add(&mdevice->cdevice, dev, 1); ++ if (ret) ++ return ret; ++ ++ /* Build a list of free ZONE_DEVICE private struct pages */ ++ dmirror_allocate_chunk(mdevice, NULL); ++ ++ return 0; ++} ++ ++static void dmirror_device_remove(struct dmirror_device *mdevice) ++{ ++ unsigned int i; ++ ++ if (mdevice->devmem_chunks) { ++ for (i = 0; i < mdevice->devmem_count; i++) { ++ struct dmirror_chunk *devmem = ++ mdevice->devmem_chunks[i]; ++ ++ memunmap_pages(&devmem->pagemap); ++ release_mem_region(devmem->pagemap.res.start, ++ resource_size(&devmem->pagemap.res)); ++ kfree(devmem); ++ } ++ kfree(mdevice->devmem_chunks); ++ } ++ ++ cdev_del(&mdevice->cdevice); ++} ++ ++static int __init hmm_dmirror_init(void) ++{ ++ int ret; ++ int id; ++ ++ ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES, ++ "HMM_DMIRROR"); ++ if (ret) ++ goto err_unreg; ++ ++ for (id = 0; id < DMIRROR_NDEVICES; id++) { ++ ret = dmirror_device_init(dmirror_devices + id, id); ++ if (ret) ++ goto err_chrdev; ++ } ++ ++ /* ++ * Allocate a zero page to simulate a reserved page of device private ++ * memory which is always zero. The zero_pfn page isn't used just to ++ * make the code here simpler (i.e., we need a struct page for it). ++ */ ++ dmirror_zero_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); ++ if (!dmirror_zero_page) { ++ ret = -ENOMEM; ++ goto err_chrdev; ++ } ++ ++ pr_info("HMM test module loaded. This is only for testing HMM.\n"); ++ return 0; ++ ++err_chrdev: ++ while (--id >= 0) ++ dmirror_device_remove(dmirror_devices + id); ++ unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); ++err_unreg: ++ return ret; ++} ++ ++static void __exit hmm_dmirror_exit(void) ++{ ++ int id; ++ ++ if (dmirror_zero_page) ++ __free_page(dmirror_zero_page); ++ for (id = 0; id < DMIRROR_NDEVICES; id++) ++ dmirror_device_remove(dmirror_devices + id); ++ unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); ++} ++ ++module_init(hmm_dmirror_init); ++module_exit(hmm_dmirror_exit); ++MODULE_LICENSE("GPL"); +--- /dev/null ++++ b/lib/test_hmm_uapi.h +@@ -0,0 +1,59 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * This is a module to test the HMM (Heterogeneous Memory Management) API ++ * of the kernel. It allows a userspace program to expose its entire address ++ * space through the HMM test module device file. ++ */ ++#ifndef _LIB_TEST_HMM_UAPI_H ++#define _LIB_TEST_HMM_UAPI_H ++ ++#include ++#include ++ ++/* ++ * Structure to pass to the HMM test driver to mimic a device accessing ++ * system memory and ZONE_DEVICE private memory through device page tables. ++ * ++ * @addr: (in) user address the device will read/write ++ * @ptr: (in) user address where device data is copied to/from ++ * @npages: (in) number of pages to read/write ++ * @cpages: (out) number of pages copied ++ * @faults: (out) number of device page faults seen ++ */ ++struct hmm_dmirror_cmd { ++ __u64 addr; ++ __u64 ptr; ++ __u64 npages; ++ __u64 cpages; ++ __u64 faults; ++}; ++ ++/* Expose the address space of the calling process through hmm device file */ ++#define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd) ++#define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd) ++#define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd) ++#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) ++ ++/* ++ * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT. ++ * HMM_DMIRROR_PROT_ERROR: no valid mirror PTE for this page ++ * HMM_DMIRROR_PROT_NONE: unpopulated PTE or PTE with no access ++ * HMM_DMIRROR_PROT_READ: read-only PTE ++ * HMM_DMIRROR_PROT_WRITE: read/write PTE ++ * HMM_DMIRROR_PROT_ZERO: special read-only zero page ++ * HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL: Migrated device private page on the ++ * device the ioctl() is made ++ * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some ++ * other device ++ */ ++enum { ++ HMM_DMIRROR_PROT_ERROR = 0xFF, ++ HMM_DMIRROR_PROT_NONE = 0x00, ++ HMM_DMIRROR_PROT_READ = 0x01, ++ HMM_DMIRROR_PROT_WRITE = 0x02, ++ HMM_DMIRROR_PROT_ZERO = 0x10, ++ HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20, ++ HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30, ++}; ++ ++#endif /* _LIB_TEST_HMM_UAPI_H */ diff --git a/patches.suse/mm-hmm-test-add-selftests-for-hmm.patch b/patches.suse/mm-hmm-test-add-selftests-for-hmm.patch new file mode 100644 index 0000000..5314ce3 --- /dev/null +++ b/patches.suse/mm-hmm-test-add-selftests-for-hmm.patch @@ -0,0 +1,1540 @@ +From: Ralph Campbell +Date: Wed, 22 Apr 2020 12:50:27 -0700 +Subject: mm/hmm/test: add selftests for HMM +Git-commit: fee9f6d1b8df35ce4ec14a49f27a7d9e4e06fd57 +Patch-mainline: v5.8-rc1 +References: jsc#SLE-16387 + +Add some basic stand alone self tests for HMM. +The test program and shell scripts use the test_hmm.ko driver to exercise +HMM functionality in the kernel. + +Link: https://lore.kernel.org/r/20200422195028.3684-3-rcampbell@nvidia.com +Signed-off-by: Ralph Campbell +Signed-off-by: Jason Gunthorpe +Signed-off-by: Vlastimil Babka +--- + tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 3 + tools/testing/selftests/vm/config | 1 + tools/testing/selftests/vm/hmm-tests.c | 1359 +++++++++++++++++++++++++++++++++ + tools/testing/selftests/vm/run_vmtests | 15 + tools/testing/selftests/vm/test_hmm.sh | 97 ++ + 6 files changed, 1476 insertions(+) + +--- a/tools/testing/selftests/vm/.gitignore ++++ b/tools/testing/selftests/vm/.gitignore +@@ -14,3 +14,4 @@ virtual_address_range + gup_benchmark + va_128TBswitch + map_fixed_noreplace ++hmm-tests +--- a/tools/testing/selftests/vm/Makefile ++++ b/tools/testing/selftests/vm/Makefile +@@ -5,6 +5,7 @@ CFLAGS = -Wall -I ../../../../usr/includ + LDLIBS = -lrt + TEST_GEN_FILES = compaction_test + TEST_GEN_FILES += gup_benchmark ++TEST_GEN_FILES += hmm-tests + TEST_GEN_FILES += hugepage-mmap + TEST_GEN_FILES += hugepage-shm + TEST_GEN_FILES += map_hugetlb +@@ -26,6 +27,8 @@ TEST_FILES := test_vmalloc.sh + KSFT_KHDR_INSTALL := 1 + include ../lib.mk + ++$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread ++ + $(OUTPUT)/userfaultfd: LDLIBS += -lpthread + + $(OUTPUT)/mlock-random-test: LDLIBS += -lcap +--- a/tools/testing/selftests/vm/config ++++ b/tools/testing/selftests/vm/config +@@ -1,2 +1,3 @@ + CONFIG_SYSVIPC=y + CONFIG_USERFAULTFD=y ++CONFIG_TEST_HMM=m +--- /dev/null ++++ b/tools/testing/selftests/vm/hmm-tests.c +@@ -0,0 +1,1359 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * HMM stands for Heterogeneous Memory Management, it is a helper layer inside ++ * the linux kernel to help device drivers mirror a process address space in ++ * the device. This allows the device to use the same address space which ++ * makes communication and data exchange a lot easier. ++ * ++ * This framework's sole purpose is to exercise various code paths inside ++ * the kernel to make sure that HMM performs as expected and to flush out any ++ * bugs. ++ */ ++ ++#include "../kselftest_harness.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * This is a private UAPI to the kernel test module so it isn't exported ++ * in the usual include/uapi/... directory. ++ */ ++#include "../../../../lib/test_hmm_uapi.h" ++ ++struct hmm_buffer { ++ void *ptr; ++ void *mirror; ++ unsigned long size; ++ int fd; ++ uint64_t cpages; ++ uint64_t faults; ++}; ++ ++#define TWOMEG (1 << 21) ++#define HMM_BUFFER_SIZE (1024 << 12) ++#define HMM_PATH_MAX 64 ++#define NTIMES 256 ++ ++#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) ++ ++FIXTURE(hmm) ++{ ++ int fd; ++ unsigned int page_size; ++ unsigned int page_shift; ++}; ++ ++FIXTURE(hmm2) ++{ ++ int fd0; ++ int fd1; ++ unsigned int page_size; ++ unsigned int page_shift; ++}; ++ ++static int hmm_open(int unit) ++{ ++ char pathname[HMM_PATH_MAX]; ++ int fd; ++ ++ snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit); ++ fd = open(pathname, O_RDWR, 0); ++ if (fd < 0) ++ fprintf(stderr, "could not open hmm dmirror driver (%s)\n", ++ pathname); ++ return fd; ++} ++ ++FIXTURE_SETUP(hmm) ++{ ++ self->page_size = sysconf(_SC_PAGE_SIZE); ++ self->page_shift = ffs(self->page_size) - 1; ++ ++ self->fd = hmm_open(0); ++ ASSERT_GE(self->fd, 0); ++} ++ ++FIXTURE_SETUP(hmm2) ++{ ++ self->page_size = sysconf(_SC_PAGE_SIZE); ++ self->page_shift = ffs(self->page_size) - 1; ++ ++ self->fd0 = hmm_open(0); ++ ASSERT_GE(self->fd0, 0); ++ self->fd1 = hmm_open(1); ++ ASSERT_GE(self->fd1, 0); ++} ++ ++FIXTURE_TEARDOWN(hmm) ++{ ++ int ret = close(self->fd); ++ ++ ASSERT_EQ(ret, 0); ++ self->fd = -1; ++} ++ ++FIXTURE_TEARDOWN(hmm2) ++{ ++ int ret = close(self->fd0); ++ ++ ASSERT_EQ(ret, 0); ++ self->fd0 = -1; ++ ++ ret = close(self->fd1); ++ ASSERT_EQ(ret, 0); ++ self->fd1 = -1; ++} ++ ++static int hmm_dmirror_cmd(int fd, ++ unsigned long request, ++ struct hmm_buffer *buffer, ++ unsigned long npages) ++{ ++ struct hmm_dmirror_cmd cmd; ++ int ret; ++ ++ /* Simulate a device reading system memory. */ ++ cmd.addr = (__u64)buffer->ptr; ++ cmd.ptr = (__u64)buffer->mirror; ++ cmd.npages = npages; ++ ++ for (;;) { ++ ret = ioctl(fd, request, &cmd); ++ if (ret == 0) ++ break; ++ if (errno == EINTR) ++ continue; ++ return -errno; ++ } ++ buffer->cpages = cmd.cpages; ++ buffer->faults = cmd.faults; ++ ++ return 0; ++} ++ ++static void hmm_buffer_free(struct hmm_buffer *buffer) ++{ ++ if (buffer == NULL) ++ return; ++ ++ if (buffer->ptr) ++ munmap(buffer->ptr, buffer->size); ++ free(buffer->mirror); ++ free(buffer); ++} ++ ++/* ++ * Create a temporary file that will be deleted on close. ++ */ ++static int hmm_create_file(unsigned long size) ++{ ++ char path[HMM_PATH_MAX]; ++ int fd; ++ ++ strcpy(path, "/tmp"); ++ fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600); ++ if (fd >= 0) { ++ int r; ++ ++ do { ++ r = ftruncate(fd, size); ++ } while (r == -1 && errno == EINTR); ++ if (!r) ++ return fd; ++ close(fd); ++ } ++ return -1; ++} ++ ++/* ++ * Return a random unsigned number. ++ */ ++static unsigned int hmm_random(void) ++{ ++ static int fd = -1; ++ unsigned int r; ++ ++ if (fd < 0) { ++ fd = open("/dev/urandom", O_RDONLY); ++ if (fd < 0) { ++ fprintf(stderr, "%s:%d failed to open /dev/urandom\n", ++ __FILE__, __LINE__); ++ return ~0U; ++ } ++ } ++ read(fd, &r, sizeof(r)); ++ return r; ++} ++ ++static void hmm_nanosleep(unsigned int n) ++{ ++ struct timespec t; ++ ++ t.tv_sec = 0; ++ t.tv_nsec = n; ++ nanosleep(&t, NULL); ++} ++ ++/* ++ * Simple NULL test of device open/close. ++ */ ++TEST_F(hmm, open_close) ++{ ++} ++ ++/* ++ * Read private anonymous memory. ++ */ ++TEST_F(hmm, anon_read) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ int val; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* ++ * Initialize buffer in system memory but leave the first two pages ++ * zero (pte_none and pfn_zero). ++ */ ++ i = 2 * self->page_size / sizeof(*ptr); ++ for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Set buffer permission to read-only. */ ++ ret = mprotect(buffer->ptr, size, PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ++ /* Populate the CPU page table with a special zero page. */ ++ val = *(int *)(buffer->ptr + self->page_size); ++ ASSERT_EQ(val, 0); ++ ++ /* Simulate a device reading system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device read. */ ++ ptr = buffer->mirror; ++ for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], 0); ++ for (; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Read private anonymous memory which has been protected with ++ * mprotect() PROT_NONE. ++ */ ++TEST_F(hmm, anon_read_prot) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Initialize mirror buffer so we can verify it isn't written. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = -i; ++ ++ /* Protect buffer from reading. */ ++ ret = mprotect(buffer->ptr, size, PROT_NONE); ++ ASSERT_EQ(ret, 0); ++ ++ /* Simulate a device reading system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); ++ ASSERT_EQ(ret, -EFAULT); ++ ++ /* Allow CPU to read the buffer so we can check it. */ ++ ret = mprotect(buffer->ptr, size, PROT_READ); ++ ASSERT_EQ(ret, 0); ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], -i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Write private anonymous memory. ++ */ ++TEST_F(hmm, anon_write) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize data that the device will write to buffer->ptr. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Write private anonymous memory which has been protected with ++ * mprotect() PROT_READ. ++ */ ++TEST_F(hmm, anon_write_prot) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Simulate a device reading a zero page of memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, 1); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Initialize data that the device will write to buffer->ptr. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, -EPERM); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], 0); ++ ++ /* Now allow writing and see that the zero page is replaced. */ ++ ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Check that a device writing an anonymous private mapping ++ * will copy-on-write if a child process inherits the mapping. ++ */ ++TEST_F(hmm, anon_write_child) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ pid_t pid; ++ int child_fd; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer->ptr so we can tell if it is written. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Initialize data that the device will write to buffer->ptr. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = -i; ++ ++ pid = fork(); ++ if (pid == -1) ++ ASSERT_EQ(pid, 0); ++ if (pid != 0) { ++ waitpid(pid, &ret, 0); ++ ASSERT_EQ(WIFEXITED(ret), 1); ++ ++ /* Check that the parent's buffer did not change. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ return; ++ } ++ ++ /* Check that we see the parent's values. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], -i); ++ ++ /* The child process needs its own mirror to its own mm. */ ++ child_fd = hmm_open(0); ++ ASSERT_GE(child_fd, 0); ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], -i); ++ ++ close(child_fd); ++ exit(0); ++} ++ ++/* ++ * Check that a device writing an anonymous shared mapping ++ * will not copy-on-write if a child process inherits the mapping. ++ */ ++TEST_F(hmm, anon_write_child_shared) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ pid_t pid; ++ int child_fd; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer->ptr so we can tell if it is written. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Initialize data that the device will write to buffer->ptr. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = -i; ++ ++ pid = fork(); ++ if (pid == -1) ++ ASSERT_EQ(pid, 0); ++ if (pid != 0) { ++ waitpid(pid, &ret, 0); ++ ASSERT_EQ(WIFEXITED(ret), 1); ++ ++ /* Check that the parent's buffer did change. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], -i); ++ return; ++ } ++ ++ /* Check that we see the parent's values. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], -i); ++ ++ /* The child process needs its own mirror to its own mm. */ ++ child_fd = hmm_open(0); ++ ASSERT_GE(child_fd, 0); ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], -i); ++ ++ close(child_fd); ++ exit(0); ++} ++ ++/* ++ * Write private anonymous huge page. ++ */ ++TEST_F(hmm, anon_write_huge) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ void *old_ptr; ++ void *map; ++ int *ptr; ++ int ret; ++ ++ size = 2 * TWOMEG; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ size = TWOMEG; ++ npages = size >> self->page_shift; ++ map = (void *)ALIGN((uintptr_t)buffer->ptr, size); ++ ret = madvise(map, size, MADV_HUGEPAGE); ++ ASSERT_EQ(ret, 0); ++ old_ptr = buffer->ptr; ++ buffer->ptr = map; ++ ++ /* Initialize data that the device will write to buffer->ptr. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ buffer->ptr = old_ptr; ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Write huge TLBFS page. ++ */ ++TEST_F(hmm, anon_write_hugetlbfs) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ long pagesizes[4]; ++ int n, idx; ++ ++ /* Skip test if we can't allocate a hugetlbfs page. */ ++ ++ n = gethugepagesizes(pagesizes, 4); ++ if (n <= 0) ++ return; ++ for (idx = 0; --n > 0; ) { ++ if (pagesizes[n] < pagesizes[idx]) ++ idx = n; ++ } ++ size = ALIGN(TWOMEG, pagesizes[idx]); ++ npages = size >> self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->ptr = get_hugepage_region(size, GHR_STRICT); ++ if (buffer->ptr == NULL) { ++ free(buffer); ++ return; ++ } ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ /* Initialize data that the device will write to buffer->ptr. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ free_hugepage_region(buffer->ptr); ++ buffer->ptr = NULL; ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Read mmap'ed file memory. ++ */ ++TEST_F(hmm, file_read) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ int fd; ++ ssize_t len; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ fd = hmm_create_file(size); ++ ASSERT_GE(fd, 0); ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = fd; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ /* Write initial contents of the file. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ len = pwrite(fd, buffer->mirror, size, 0); ++ ASSERT_EQ(len, size); ++ memset(buffer->mirror, 0, size); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ, ++ MAP_SHARED, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Simulate a device reading system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Write mmap'ed file memory. ++ */ ++TEST_F(hmm, file_write) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ int fd; ++ ssize_t len; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ fd = hmm_create_file(size); ++ ASSERT_GE(fd, 0); ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = fd; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize data that the device will write to buffer->ptr. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Simulate a device writing system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device wrote. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ /* Check that the device also wrote the file. */ ++ len = pread(fd, buffer->mirror, size, 0); ++ ASSERT_EQ(len, size); ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Migrate anonymous memory to device private memory. ++ */ ++TEST_F(hmm, migrate) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Migrate memory to device. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Migrate anonymous memory to device private memory and fault it back to system ++ * memory. ++ */ ++TEST_F(hmm, migrate_fault) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Migrate memory to device. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ /* Fault pages back to system memory and check them. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Try to migrate various memory types to device private memory. ++ */ ++TEST_F(hmm2, migrate_mixed) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ int *ptr; ++ unsigned char *p; ++ int ret; ++ int val; ++ ++ npages = 6; ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ /* Reserve a range of addresses. */ ++ buffer->ptr = mmap(NULL, size, ++ PROT_NONE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ p = buffer->ptr; ++ ++ /* Migrating a protected area should be an error. */ ++ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages); ++ ASSERT_EQ(ret, -EINVAL); ++ ++ /* Punch a hole after the first page address. */ ++ ret = munmap(buffer->ptr + self->page_size, self->page_size); ++ ASSERT_EQ(ret, 0); ++ ++ /* We expect an error if the vma doesn't cover the range. */ ++ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3); ++ ASSERT_EQ(ret, -EINVAL); ++ ++ /* Page 2 will be a read-only zero page. */ ++ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, ++ PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ptr = (int *)(buffer->ptr + 2 * self->page_size); ++ val = *ptr + 3; ++ ASSERT_EQ(val, 3); ++ ++ /* Page 3 will be read-only. */ ++ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, ++ PROT_READ | PROT_WRITE); ++ ASSERT_EQ(ret, 0); ++ ptr = (int *)(buffer->ptr + 3 * self->page_size); ++ *ptr = val; ++ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, ++ PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ++ /* Page 4-5 will be read-write. */ ++ ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size, ++ PROT_READ | PROT_WRITE); ++ ASSERT_EQ(ret, 0); ++ ptr = (int *)(buffer->ptr + 4 * self->page_size); ++ *ptr = val; ++ ptr = (int *)(buffer->ptr + 5 * self->page_size); ++ *ptr = val; ++ ++ /* Now try to migrate pages 2-5 to device 1. */ ++ buffer->ptr = p + 2 * self->page_size; ++ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, 4); ++ ++ /* Page 5 won't be migrated to device 0 because it's on device 1. */ ++ buffer->ptr = p + 5 * self->page_size; ++ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); ++ ASSERT_EQ(ret, -ENOENT); ++ buffer->ptr = p; ++ ++ buffer->ptr = p; ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Migrate anonymous memory to device private memory and fault it back to system ++ * memory multiple times. ++ */ ++TEST_F(hmm, migrate_multiple) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ unsigned long c; ++ int *ptr; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ for (c = 0; c < NTIMES; c++) { ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Migrate memory to device. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, ++ npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ /* Fault pages back to system memory and check them. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++ } ++} ++ ++/* ++ * Read anonymous memory multiple times. ++ */ ++TEST_F(hmm, anon_read_multiple) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ unsigned long c; ++ int *ptr; ++ int ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ for (c = 0; c < NTIMES; c++) { ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i + c; ++ ++ /* Simulate a device reading system memory. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, ++ npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i + c); ++ ++ hmm_buffer_free(buffer); ++ } ++} ++ ++void *unmap_buffer(void *p) ++{ ++ struct hmm_buffer *buffer = p; ++ ++ /* Delay for a bit and then unmap buffer while it is being read. */ ++ hmm_nanosleep(hmm_random() % 32000); ++ munmap(buffer->ptr + buffer->size / 2, buffer->size / 2); ++ buffer->ptr = NULL; ++ ++ return NULL; ++} ++ ++/* ++ * Try reading anonymous memory while it is being unmapped. ++ */ ++TEST_F(hmm, anon_teardown) ++{ ++ unsigned long npages; ++ unsigned long size; ++ unsigned long c; ++ void *ret; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ for (c = 0; c < NTIMES; ++c) { ++ pthread_t thread; ++ struct hmm_buffer *buffer; ++ unsigned long i; ++ int *ptr; ++ int rc; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i + c; ++ ++ rc = pthread_create(&thread, NULL, unmap_buffer, buffer); ++ ASSERT_EQ(rc, 0); ++ ++ /* Simulate a device reading system memory. */ ++ rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, ++ npages); ++ if (rc == 0) { ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; ++ i < size / sizeof(*ptr); ++ ++i) ++ ASSERT_EQ(ptr[i], i + c); ++ } ++ ++ pthread_join(thread, &ret); ++ hmm_buffer_free(buffer); ++ } ++} ++ ++/* ++ * Test memory snapshot without faulting in pages accessed by the device. ++ */ ++TEST_F(hmm2, snapshot) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ int *ptr; ++ unsigned char *p; ++ unsigned char *m; ++ int ret; ++ int val; ++ ++ npages = 7; ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(npages); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ /* Reserve a range of addresses. */ ++ buffer->ptr = mmap(NULL, size, ++ PROT_NONE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ p = buffer->ptr; ++ ++ /* Punch a hole after the first page address. */ ++ ret = munmap(buffer->ptr + self->page_size, self->page_size); ++ ASSERT_EQ(ret, 0); ++ ++ /* Page 2 will be read-only zero page. */ ++ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, ++ PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ptr = (int *)(buffer->ptr + 2 * self->page_size); ++ val = *ptr + 3; ++ ASSERT_EQ(val, 3); ++ ++ /* Page 3 will be read-only. */ ++ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, ++ PROT_READ | PROT_WRITE); ++ ASSERT_EQ(ret, 0); ++ ptr = (int *)(buffer->ptr + 3 * self->page_size); ++ *ptr = val; ++ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, ++ PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ++ /* Page 4-6 will be read-write. */ ++ ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size, ++ PROT_READ | PROT_WRITE); ++ ASSERT_EQ(ret, 0); ++ ptr = (int *)(buffer->ptr + 4 * self->page_size); ++ *ptr = val; ++ ++ /* Page 5 will be migrated to device 0. */ ++ buffer->ptr = p + 5 * self->page_size; ++ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, 1); ++ ++ /* Page 6 will be migrated to device 1. */ ++ buffer->ptr = p + 6 * self->page_size; ++ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, 1); ++ ++ /* Simulate a device snapshotting CPU pagetables. */ ++ buffer->ptr = p; ++ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device saw. */ ++ m = buffer->mirror; ++ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR); ++ ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR); ++ ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ); ++ ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ); ++ ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE); ++ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | ++ HMM_DMIRROR_PROT_WRITE); ++ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* ++ * Test two devices reading the same memory (double mapped). ++ */ ++TEST_F(hmm2, double_map) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ ++ npages = 6; ++ size = npages << self->page_shift; ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = -1; ++ buffer->size = size; ++ buffer->mirror = malloc(npages); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ /* Reserve a range of addresses. */ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Make region read-only. */ ++ ret = mprotect(buffer->ptr, size, PROT_READ); ++ ASSERT_EQ(ret, 0); ++ ++ /* Simulate device 0 reading system memory. */ ++ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ /* Simulate device 1 reading system memory. */ ++ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ASSERT_EQ(buffer->faults, 1); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ /* Punch a hole after the first page address. */ ++ ret = munmap(buffer->ptr + self->page_size, self->page_size); ++ ASSERT_EQ(ret, 0); ++ ++ hmm_buffer_free(buffer); ++} ++ ++TEST_HARNESS_MAIN +--- a/tools/testing/selftests/vm/run_vmtests ++++ b/tools/testing/selftests/vm/run_vmtests +@@ -227,4 +227,19 @@ else + exitcode=1 + fi + ++echo "running HMM smoke test" ++echo "------------------------------------" ++./test_hmm.sh smoke ++ret_val=$? ++ ++if [ $ret_val -eq 0 ]; then ++ echo "[PASS]" ++elif [ $ret_val -eq $ksft_skip ]; then ++ echo "[SKIP]" ++ exitcode=$ksft_skip ++else ++ echo "[FAIL]" ++ exitcode=1 ++fi ++ + exit $exitcode +--- /dev/null ++++ b/tools/testing/selftests/vm/test_hmm.sh +@@ -0,0 +1,97 @@ ++#!/bin/bash ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Copyright (C) 2018 Uladzislau Rezki (Sony) ++# ++# This is a test script for the kernel test driver to analyse vmalloc ++# allocator. Therefore it is just a kernel module loader. You can specify ++# and pass different parameters in order to: ++# a) analyse performance of vmalloc allocations; ++# b) stressing and stability check of vmalloc subsystem. ++ ++TEST_NAME="test_hmm" ++DRIVER="test_hmm" ++ ++# 1 if fails ++exitcode=1 ++ ++# Kselftest framework requirement - SKIP code is 4. ++ksft_skip=4 ++ ++check_test_requirements() ++{ ++ uid=$(id -u) ++ if [ $uid -ne 0 ]; then ++ echo "$0: Must be run as root" ++ exit $ksft_skip ++ fi ++ ++ if ! which modprobe > /dev/null 2>&1; then ++ echo "$0: You need modprobe installed" ++ exit $ksft_skip ++ fi ++ ++ if ! modinfo $DRIVER > /dev/null 2>&1; then ++ echo "$0: You must have the following enabled in your kernel:" ++ echo "CONFIG_TEST_HMM=m" ++ exit $ksft_skip ++ fi ++} ++ ++load_driver() ++{ ++ modprobe $DRIVER > /dev/null 2>&1 ++ if [ $? == 0 ]; then ++ major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) ++ mknod /dev/hmm_dmirror0 c $major 0 ++ mknod /dev/hmm_dmirror1 c $major 1 ++ fi ++} ++ ++unload_driver() ++{ ++ modprobe -r $DRIVER > /dev/null 2>&1 ++ rm -f /dev/hmm_dmirror? ++} ++ ++run_smoke() ++{ ++ echo "Running smoke test. Note, this test provides basic coverage." ++ ++ load_driver ++ $(dirname "${BASH_SOURCE[0]}")/hmm-tests ++ unload_driver ++} ++ ++usage() ++{ ++ echo -n "Usage: $0" ++ echo ++ echo "Example usage:" ++ echo ++ echo "# Shows help message" ++ echo "./${TEST_NAME}.sh" ++ echo ++ echo "# Smoke testing" ++ echo "./${TEST_NAME}.sh smoke" ++ echo ++ exit 0 ++} ++ ++function run_test() ++{ ++ if [ $# -eq 0 ]; then ++ usage ++ else ++ if [ "$1" = "smoke" ]; then ++ run_smoke ++ else ++ usage ++ fi ++ fi ++} ++ ++check_test_requirements ++run_test $@ ++ ++exit 0 diff --git a/patches.suse/mm-hmm-test-use-the-new-migration-invalidation.patch b/patches.suse/mm-hmm-test-use-the-new-migration-invalidation.patch new file mode 100644 index 0000000..21a5dbe --- /dev/null +++ b/patches.suse/mm-hmm-test-use-the-new-migration-invalidation.patch @@ -0,0 +1,131 @@ +From: Ralph Campbell +Date: Thu, 23 Jul 2020 15:30:03 -0700 +Subject: mm/hmm/test: use the new migration invalidation +Git-commit: 7d17e83abec1be3355260b3e4812044c65c32907 +Patch-mainline: v5.9-rc1 +References: jsc#SLE-16387 + +Use the new MMU_NOTIFY_MIGRATE event to skip MMU invalidations of device +private memory and handle the invalidation in the driver as part of +migrating device private memory. + +Link: https://lore.kernel.org/r/20200723223004.9586-6-rcampbell@nvidia.com +Signed-off-by: Ralph Campbell +Signed-off-by: Jason Gunthorpe +Signed-off-by: Vlastimil Babka +--- + lib/test_hmm.c | 30 +++++++++++++++++------------- + tools/testing/selftests/vm/hmm-tests.c | 18 ++++++++++++++---- + 2 files changed, 31 insertions(+), 17 deletions(-) + +--- a/lib/test_hmm.c ++++ b/lib/test_hmm.c +@@ -214,6 +214,14 @@ static bool dmirror_interval_invalidate( + { + struct dmirror *dmirror = container_of(mni, struct dmirror, notifier); + ++ /* ++ * Ignore invalidation callbacks for device private pages since ++ * the invalidation is handled as part of the migration process. ++ */ ++ if (range->event == MMU_NOTIFY_MIGRATE && ++ range->migrate_pgmap_owner == dmirror->mdevice) ++ return true; ++ + if (mmu_notifier_range_blockable(range)) + mutex_lock(&dmirror->mutex); + else if (!mutex_trylock(&dmirror->mutex)) +@@ -693,7 +701,7 @@ static int dmirror_migrate(struct dmirro + args.dst = dst_pfns; + args.start = addr; + args.end = next; +- args.pgmap_owner = NULL; ++ args.pgmap_owner = dmirror->mdevice; + args.flags = MIGRATE_VMA_SELECT_SYSTEM; + ret = migrate_vma_setup(&args); + if (ret) +@@ -983,7 +991,7 @@ static void dmirror_devmem_free(struct p + } + + static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, +- struct dmirror_device *mdevice) ++ struct dmirror *dmirror) + { + const unsigned long *src = args->src; + unsigned long *dst = args->dst; +@@ -1005,6 +1013,7 @@ static vm_fault_t dmirror_devmem_fault_a + continue; + + lock_page(dpage); ++ xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); + copy_highpage(dpage, spage); + *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + if (*src & MIGRATE_PFN_WRITE) +@@ -1013,15 +1022,6 @@ static vm_fault_t dmirror_devmem_fault_a + return 0; + } + +-static void dmirror_devmem_fault_finalize_and_map(struct migrate_vma *args, +- struct dmirror *dmirror) +-{ +- /* Invalidate the device's page table mapping. */ +- mutex_lock(&dmirror->mutex); +- dmirror_do_update(dmirror, args->start, args->end); +- mutex_unlock(&dmirror->mutex); +-} +- + static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) + { + struct migrate_vma args; +@@ -1051,11 +1051,15 @@ static vm_fault_t dmirror_devmem_fault(s + if (migrate_vma_setup(&args)) + return VM_FAULT_SIGBUS; + +- ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror->mdevice); ++ ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror); + if (ret) + return ret; + migrate_vma_pages(&args); +- dmirror_devmem_fault_finalize_and_map(&args, dmirror); ++ /* ++ * No device finalize step is needed since ++ * dmirror_devmem_fault_alloc_and_copy() will have already ++ * invalidated the device page table. ++ */ + migrate_vma_finalize(&args); + return 0; + } +--- a/tools/testing/selftests/vm/hmm-tests.c ++++ b/tools/testing/selftests/vm/hmm-tests.c +@@ -881,8 +881,9 @@ TEST_F(hmm, migrate) + } + + /* +- * Migrate anonymous memory to device private memory and fault it back to system +- * memory. ++ * Migrate anonymous memory to device private memory and fault some of it back ++ * to system memory, then try migrating the resulting mix of system and device ++ * private memory to the device. + */ + TEST_F(hmm, migrate_fault) + { +@@ -924,8 +925,17 @@ TEST_F(hmm, migrate_fault) + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + +- /* Fault pages back to system memory and check them. */ +- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ /* Fault half the pages back to system memory and check them. */ ++ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ /* Migrate memory to the device again. */ ++ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); diff --git a/patches.suse/mm-memremap-c-convert-devmap-static-branch-to-inc-dec.patch b/patches.suse/mm-memremap-c-convert-devmap-static-branch-to-inc-dec.patch new file mode 100644 index 0000000..b423f78 --- /dev/null +++ b/patches.suse/mm-memremap-c-convert-devmap-static-branch-to-inc-dec.patch @@ -0,0 +1,55 @@ +From: Ira Weiny +Date: Tue, 13 Oct 2020 16:52:33 -0700 +Subject: mm/memremap.c: convert devmap static branch to {inc,dec} +Git-commit: 433e7d3177544c8cf0b6375abd310b0ef023fe9d +Patch-mainline: v5.10-rc1 +References: jsc#SLE-16387 + +While reviewing Protection Key Supervisor support it was pointed out that +using a counter to track static branch enable was an anti-pattern which +was better solved using the provided static_branch_{inc,dec} functions.[1] + +Fix up devmap_managed_key to work the same way. Also this should be safer +because there is a very small (very unlikely) race when multiple callers +try to enable at the same time. + +[1] https://lore.kernel.org/lkml/20200714194031.GI5523@worktop.programming.kicks-ass.net/ + +Signed-off-by: Ira Weiny +Signed-off-by: Andrew Morton +Reviewed-by: William Kucharski +Cc: Dan Williams +Cc: Vishal Verma +Link: https://lkml.kernel.org/r/20200810235319.2796597-1-ira.weiny@intel.com +Signed-off-by: Linus Torvalds +Signed-off-by: Vlastimil Babka +--- + mm/memremap.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/mm/memremap.c ++++ b/mm/memremap.c +@@ -40,12 +40,10 @@ EXPORT_SYMBOL_GPL(memremap_compat_align) + #ifdef CONFIG_DEV_PAGEMAP_OPS + DEFINE_STATIC_KEY_FALSE(devmap_managed_key); + EXPORT_SYMBOL(devmap_managed_key); +-static atomic_t devmap_managed_enable; + + static void devmap_managed_enable_put(void) + { +- if (atomic_dec_and_test(&devmap_managed_enable)) +- static_branch_disable(&devmap_managed_key); ++ static_branch_dec(&devmap_managed_key); + } + + static int devmap_managed_enable_get(struct dev_pagemap *pgmap) +@@ -55,8 +53,7 @@ static int devmap_managed_enable_get(str + return -EINVAL; + } + +- if (atomic_inc_return(&devmap_managed_enable) == 1) +- static_branch_enable(&devmap_managed_key); ++ static_branch_inc(&devmap_managed_key); + return 0; + } + #else diff --git a/patches.suse/mm-migrate-fix-migrate_pgmap_owner-w-o-config_mmu_notifier.patch b/patches.suse/mm-migrate-fix-migrate_pgmap_owner-w-o-config_mmu_notifier.patch new file mode 100644 index 0000000..04199bb --- /dev/null +++ b/patches.suse/mm-migrate-fix-migrate_pgmap_owner-w-o-config_mmu_notifier.patch @@ -0,0 +1,77 @@ +From: Ralph Campbell +Date: Thu, 6 Aug 2020 23:17:09 -0700 +Subject: mm/migrate: fix migrate_pgmap_owner w/o CONFIG_MMU_NOTIFIER +Git-commit: c1a06df6ebf6ca98fb7a672fe447c7469d6c1968 +Patch-mainline: v5.9-rc1 +References: jsc#SLE-16387 + +On x86_64, when CONFIG_MMU_NOTIFIER is not set/enabled, there is a +compiler error: + + mm/migrate.c: In function 'migrate_vma_collect': + mm/migrate.c:2481:7: error: 'struct mmu_notifier_range' has no member named 'migrate_pgmap_owner' + range.migrate_pgmap_owner = migrate->pgmap_owner; + ^ + +Fixes: 998427b3ad2c ("mm/notifier: add migration invalidation type") +Reported-by: Randy Dunlap +Signed-off-by: Ralph Campbell +Signed-off-by: Andrew Morton +Tested-by: Randy Dunlap +Acked-by: Randy Dunlap +Cc: Jerome Glisse +Cc: John Hubbard +Cc: Christoph Hellwig +Cc: "Jason Gunthorpe" +Link: http://lkml.kernel.org/r/20200806193353.7124-1-rcampbell@nvidia.com +Signed-off-by: Linus Torvalds +Signed-off-by: Vlastimil Babka +--- + include/linux/mmu_notifier.h | 13 +++++++++++++ + mm/migrate.c | 6 +++--- + 2 files changed, 16 insertions(+), 3 deletions(-) + +--- a/include/linux/mmu_notifier.h ++++ b/include/linux/mmu_notifier.h +@@ -520,6 +520,16 @@ static inline void mmu_notifier_range_in + range->flags = flags; + } + ++static inline void mmu_notifier_range_init_migrate( ++ struct mmu_notifier_range *range, unsigned int flags, ++ struct vm_area_struct *vma, struct mm_struct *mm, ++ unsigned long start, unsigned long end, void *pgmap) ++{ ++ mmu_notifier_range_init(range, MMU_NOTIFY_MIGRATE, flags, vma, mm, ++ start, end); ++ range->migrate_pgmap_owner = pgmap; ++} ++ + #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ + ({ \ + int __young; \ +@@ -644,6 +654,9 @@ static inline void _mmu_notifier_range_i + + #define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \ + _mmu_notifier_range_init(range, start, end) ++#define mmu_notifier_range_init_migrate(range, flags, vma, mm, start, end, \ ++ pgmap) \ ++ _mmu_notifier_range_init(range, start, end) + + static inline bool + mmu_notifier_range_blockable(const struct mmu_notifier_range *range) +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -2373,9 +2373,9 @@ static void migrate_vma_collect(struct m + * that the registered device driver can skip invalidating device + * private page mappings that won't be migrated. + */ +- mmu_notifier_range_init(&range, MMU_NOTIFY_MIGRATE, 0, migrate->vma, +- migrate->vma->vm_mm, migrate->start, migrate->end); +- range.migrate_pgmap_owner = migrate->pgmap_owner; ++ mmu_notifier_range_init_migrate(&range, 0, migrate->vma, ++ migrate->vma->vm_mm, migrate->start, migrate->end, ++ migrate->pgmap_owner); + mmu_notifier_invalidate_range_start(&range); + + walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end, diff --git a/patches.suse/mm-mmu_notifier-fix-and-extend-kerneldoc.patch b/patches.suse/mm-mmu_notifier-fix-and-extend-kerneldoc.patch new file mode 100644 index 0000000..a95b7e8 --- /dev/null +++ b/patches.suse/mm-mmu_notifier-fix-and-extend-kerneldoc.patch @@ -0,0 +1,65 @@ +From: Krzysztof Kozlowski +Date: Tue, 11 Aug 2020 18:32:09 -0700 +Subject: mm: mmu_notifier: fix and extend kerneldoc +Git-commit: d49653f35adff8c778e7c5fbd4dbdf929594eca8 +Patch-mainline: v5.9-rc1 +References: jsc#SLE-16387 + +Fix W=1 compile warnings (invalid kerneldoc): + + mm/mmu_notifier.c:187: warning: Function parameter or member 'interval_sub' not described in 'mmu_interval_read_bgin' + mm/mmu_notifier.c:708: warning: Function parameter or member 'subscription' not described in 'mmu_notifier_registr' + mm/mmu_notifier.c:708: warning: Excess function parameter 'mn' description in 'mmu_notifier_register' + mm/mmu_notifier.c:880: warning: Function parameter or member 'subscription' not described in 'mmu_notifier_put' + mm/mmu_notifier.c:880: warning: Excess function parameter 'mn' description in 'mmu_notifier_put' + mm/mmu_notifier.c:982: warning: Function parameter or member 'ops' not described in 'mmu_interval_notifier_insert' + +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Andrew Morton +Reviewed-by: Jason Gunthorpe +Link: http://lkml.kernel.org/r/20200728171109.28687-4-krzk@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Vlastimil Babka +--- + mm/mmu_notifier.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/mm/mmu_notifier.c ++++ b/mm/mmu_notifier.c +@@ -166,7 +166,7 @@ static void mn_itree_inv_end(struct mmu_ + /** + * mmu_interval_read_begin - Begin a read side critical section against a VA + * range +- * interval_sub: The interval subscription ++ * @interval_sub: The interval subscription + * + * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a + * collision-retry scheme similar to seqcount for the VA range under +@@ -686,7 +686,7 @@ EXPORT_SYMBOL_GPL(__mmu_notifier_registe + + /** + * mmu_notifier_register - Register a notifier on a mm +- * @mn: The notifier to attach ++ * @subscription: The notifier to attach + * @mm: The mm to attach the notifier to + * + * Must not hold mmap_sem nor any other VM related lock when calling +@@ -856,7 +856,7 @@ static void mmu_notifier_free_rcu(struct + + /** + * mmu_notifier_put - Release the reference on the notifier +- * @mn: The notifier to act on ++ * @subscription: The notifier to act on + * + * This function must be paired with each mmu_notifier_get(), it releases the + * reference obtained by the get. If this is the last reference then process +@@ -965,7 +965,8 @@ static int __mmu_interval_notifier_inser + * @interval_sub: Interval subscription to register + * @start: Starting virtual address to monitor + * @length: Length of the range to monitor +- * @mm : mm_struct to attach to ++ * @mm: mm_struct to attach to ++ * @ops: Interval notifier operations to be called on matching events + * + * This function subscribes the interval notifier for notifications from the + * mm. Upon return the ops related to mmu_interval_notifier will be called diff --git a/patches.suse/xarray-add-xa_for_each_range.patch b/patches.suse/xarray-add-xa_for_each_range.patch new file mode 100644 index 0000000..fdc7e30 --- /dev/null +++ b/patches.suse/xarray-add-xa_for_each_range.patch @@ -0,0 +1,96 @@ +From: "Matthew Wilcox (Oracle)" +Date: Sun, 12 Jan 2020 15:54:10 -0500 +Subject: XArray: Add xa_for_each_range +Git-commit: 00ed452c210a0bc1ff3ee79e1ce6b199f00a0638 +Patch-mainline: v5.5 +References: jsc#SLE-16387 + +This function supports iterating over a range of an array. Also add +documentation links for xa_for_each_start(). + +Signed-off-by: Matthew Wilcox (Oracle) +Signed-off-by: Vlastimil Babka +--- + Documentation/core-api/xarray.rst | 10 ++++++---- + include/linux/xarray.h | 37 ++++++++++++++++++++++++++++++++----- + 2 files changed, 38 insertions(+), 9 deletions(-) + +--- a/Documentation/core-api/xarray.rst ++++ b/Documentation/core-api/xarray.rst +@@ -94,10 +94,10 @@ calling xa_clear_mark(). You can ask wh + XArray has a particular mark set by calling xa_marked(). + + You can copy entries out of the XArray into a plain array by calling +-xa_extract(). Or you can iterate over the present entries in +-the XArray by calling xa_for_each(). You may prefer to use +-xa_find() or xa_find_after() to move to the next present +-entry in the XArray. ++xa_extract(). Or you can iterate over the present entries in the XArray ++by calling xa_for_each(), xa_for_each_start() or xa_for_each_range(). ++You may prefer to use xa_find() or xa_find_after() to move to the next ++present entry in the XArray. + + Calling xa_store_range() stores the same entry in a range + of indices. If you do this, some of the other operations will behave +@@ -180,6 +180,8 @@ No lock needed: + Takes RCU read lock: + * xa_load() + * xa_for_each() ++ * xa_for_each_start() ++ * xa_for_each_range() + * xa_find() + * xa_find_after() + * xa_extract() +--- a/include/linux/xarray.h ++++ b/include/linux/xarray.h +@@ -417,6 +417,36 @@ static inline bool xa_marked(const struc + } + + /** ++ * xa_for_each_range() - Iterate over a portion of an XArray. ++ * @xa: XArray. ++ * @index: Index of @entry. ++ * @entry: Entry retrieved from array. ++ * @start: First index to retrieve from array. ++ * @last: Last index to retrieve from array. ++ * ++ * During the iteration, @entry will have the value of the entry stored ++ * in @xa at @index. You may modify @index during the iteration if you ++ * want to skip or reprocess indices. It is safe to modify the array ++ * during the iteration. At the end of the iteration, @entry will be set ++ * to NULL and @index will have a value less than or equal to max. ++ * ++ * xa_for_each_range() is O(n.log(n)) while xas_for_each() is O(n). You have ++ * to handle your own locking with xas_for_each(), and if you have to unlock ++ * after each iteration, it will also end up being O(n.log(n)). ++ * xa_for_each_range() will spin if it hits a retry entry; if you intend to ++ * see retry entries, you should use the xas_for_each() iterator instead. ++ * The xas_for_each() iterator will expand into more inline code than ++ * xa_for_each_range(). ++ * ++ * Context: Any context. Takes and releases the RCU lock. ++ */ ++#define xa_for_each_range(xa, index, entry, start, last) \ ++ for (index = start, \ ++ entry = xa_find(xa, &index, last, XA_PRESENT); \ ++ entry; \ ++ entry = xa_find_after(xa, &index, last, XA_PRESENT)) ++ ++/** + * xa_for_each_start() - Iterate over a portion of an XArray. + * @xa: XArray. + * @index: Index of @entry. +@@ -439,11 +469,8 @@ static inline bool xa_marked(const struc + * + * Context: Any context. Takes and releases the RCU lock. + */ +-#define xa_for_each_start(xa, index, entry, start) \ +- for (index = start, \ +- entry = xa_find(xa, &index, ULONG_MAX, XA_PRESENT); \ +- entry; \ +- entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT)) ++#define xa_for_each_start(xa, index, entry, start) \ ++ xa_for_each_range(xa, index, entry, start, ULONG_MAX) + + /** + * xa_for_each() - Iterate over present entries in an XArray. diff --git a/series.conf b/series.conf index 0d5a3d3..eb728a4 100644 --- a/series.conf +++ b/series.conf @@ -15930,6 +15930,7 @@ patches.suse/XArray-Fix-infinite-loop-with-entry-at-ULONG_MAX.patch patches.suse/XArray-Fix-xa_find_after-with-multi-index-entries.patch patches.suse/XArray-Fix-xas_find-returning-too-many-entries.patch + patches.suse/xarray-add-xa_for_each_range.patch patches.suse/mmc-sdhci_am654-Remove-Inverted-Write-Protect-flag.patch patches.suse/mmc-sdhci_am654-Reset-Command-and-Data-line-after-tu.patch patches.suse/mmc-tegra-fix-SDR50-tuning-override.patch @@ -29312,6 +29313,9 @@ patches.suse/0013-drm-amdgpu-remove-dead-code-after-hmm_range_fault.patch patches.suse/0014-mm-hmm-remove-HMM_PFN_SPECIAL.patch patches.suse/0015-mm-hmm-remove-the-customizable-pfn-format-from-hmm_r.patch + patches.suse/mm-hmm-test-add-selftest-driver-for-hmm.patch + patches.suse/mm-hmm-test-add-selftests-for-hmm.patch + patches.suse/maintainers-add-hmm-selftests.patch patches.suse/0016-drm-i915-Move-GGTT-fence-registers-under-gt.patch patches.suse/0017-drm-i915-gt-Pull-restoration-of-GGTT-fences-undernea.patch patches.suse/0018-drm-i915-Remove-manual-save-resume-of-fence-register.patch @@ -34890,6 +34894,7 @@ patches.suse/0001-ocfs2-avoid-inode-removal-while-nfsd-is-accessing-it.patch patches.suse/0002-ocfs2-load-global_inode_alloc.patch patches.suse/0003-ocfs2-fix-panic-on-nfs-server-over-ocfs2.patch + patches.suse/lib-fix-test_hmm-c-reference-after-free.patch patches.suse/mm-memcontrol-c-add-missed-css_put.patch patches.suse/1352-drm-rcar-du-Fix-build-error.patch patches.suse/1353-gpu-host1x-Clean-up-debugfs-in-error-handling-path.patch @@ -36443,6 +36448,7 @@ patches.suse/mmc-sdhci-cadence-do-not-use-hardware-tuning-for-SD-.patch patches.suse/mmc-sdhci-pci-o2micro-Bug-fix-for-O2-host-controller.patch patches.suse/1931-mm-hmm-provide-the-page-mapping-order-in-hmm_range_f.patch + patches.suse/mm-hmm-add-tests-for-hmm_pfn_to_map_order.patch patches.suse/0008-nouveau-hmm-fault-one-page-at-a-time.patch patches.suse/0009-nouveau-fix-mapping-2MB-sysmem-pages.patch patches.suse/0010-nouveau-hmm-support-mapping-large-sysmem-pages.patch @@ -36450,6 +36456,7 @@ patches.suse/0012-mm-migrate-add-a-flags-parameter-to-migrate_vma.patch patches.suse/1930-mm-notifier-add-migration-invalidation-type.patch patches.suse/0013-nouveau-svm-use-the-new-migration-invalidation.patch + patches.suse/mm-hmm-test-use-the-new-migration-invalidation.patch patches.suse/leds-lm355x-avoid-enum-conversion-warning.patch patches.suse/leds-88pm860x-fix-use-after-free-on-unbind.patch patches.suse/leds-da903x-fix-use-after-free-on-unbind.patch @@ -40288,6 +40295,7 @@ patches.suse/xfs-don-t-eat-an-EIO-ENOSPC-writeback-error-when-scr.patch patches.suse/xfs-fix-reflink-quota-reservation-accounting-error.patch patches.suse/xfs-fix-inode-allocation-block-res-calculation-prece.patch + patches.suse/mm-migrate-fix-migrate_pgmap_owner-w-o-config_mmu_notifier.patch patches.suse/mm-shuffle-don-t-move-pages-between-zones-and-don-t-read-garbage-memmaps.patch patches.suse/mm-fix-kthread_use_mm-vs-tlb-invalidate.patch patches.suse/ocfs2-fix-remounting-needed-after-setfacl-command.patch @@ -40559,6 +40567,7 @@ patches.suse/1673-drm-virtio-convert-to-LE-accessors.patch patches.suse/virtio_pci_modern-Fix-the-comment-of-virtio_pci_find.patch patches.suse/platform-chrome-cros_ec_ishtp-Fix-a-double-unlock-is.patch + patches.suse/mm-mmu_notifier-fix-and-extend-kerneldoc.patch patches.suse/kernel.h-remove-duplicate-include-of-asm-div64.h.patch patches.suse/include-linux-poison.h-remove-obsolete-comment.patch patches.suse/lib-bitmap.c-fix-bitmap_cut-for-partial-overlapping-.patch @@ -41963,6 +41972,8 @@ patches.suse/nvme-fix-error-handling-in-nvme_ns_report_zones.patch patches.suse/mm-fadvise-improve-the-expensive-remote-LRU-cache-draining-after-FADV_DONTNEED.patch patches.suse/mmswapfile.c-fix-potential-memory-leak-in-sys_swapon.patch + patches.suse/mm-memremap-c-convert-devmap-static-branch-to-inc-dec.patch + patches.suse/lib-test_hmm-c-remove-unused-dmirror_zero_page.patch patches.suse/kvm-svm-add-ghcb-definitions patches.suse/kvm-svm-add-ghcb-accessor-functions patches.suse/kvm-svm-use-_packed-shorthand diff --git a/supported.conf b/supported.conf index dbeb61f..7412c36 100644 --- a/supported.conf +++ b/supported.conf @@ -32,6 +32,7 @@ +kselftests-kmp lib/test_firmware # FATE#323821 +kselftests-kmp lib/test_module # FATE#323821 - ensures kmp is never empty +kselftests-kmp lib/test_sysctl # FATE#323821 ++kselftests-kmp lib/test_hmm # jsc#SLE-16387 +ocfs2-kmp fs/ocfs2/cluster/ocfs2_nodemanager # fate#319339 +ocfs2-kmp fs/ocfs2/dlm/ocfs2_dlm # fate#319339 +ocfs2-kmp fs/ocfs2/dlmfs/ocfs2_dlmfs # fate#319339