From 3007b0a7083982e16cbeed9ce1b3b7f42f8f03bc Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Fri, 19 Nov 2021 16:16:39 -0500
Subject: drm/amdkfd: handle VMA remove race
Git-commit: 0cc53cb450669cf1def4ff89e8cbcd8ec3c62380
Patch-mainline: v5.16-rc3
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225
VMA may be removed before unmap notifier callback, and deferred list
work remove range, return success for this special case as we are
handling stale retry fault.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b553c34cc99b..c69748e693fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2559,20 +2559,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
}
static bool
-svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
+svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
{
unsigned long requested = VM_READ;
- struct vm_area_struct *vma;
if (write_fault)
requested |= VM_WRITE;
- vma = find_vma(mm, addr << PAGE_SHIFT);
- if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
- pr_debug("address 0x%llx VMA is removed\n", addr);
- return true;
- }
-
pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
vma->vm_flags);
return (vma->vm_flags & requested) == requested;
@@ -2590,6 +2583,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
+ struct vm_area_struct *vma;
int r = 0;
if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2665,7 +2659,17 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out_unlock_range;
}
- if (!svm_fault_allowed(mm, addr, write_fault)) {
+ /* __do_munmap removed VMA, return success as we are handling stale
+ * retry fault.
+ */
+ vma = find_vma(mm, addr << PAGE_SHIFT);
+ if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ pr_debug("address 0x%llx VMA is removed\n", addr);
+ r = 0;
+ goto out_unlock_range;
+ }
+
+ if (!svm_fault_allowed(vma, write_fault)) {
pr_debug("fault addr 0x%llx no %s permission\n", addr,
write_fault ? "write" : "read");
r = -EPERM;
--
2.38.1