From abcff59bd52dcad860ccf7468e66adbac167afe5 Mon Sep 17 00:00:00 2001
From: shaoyunl <shaoyun.liu@amd.com>
Date: Mon, 29 Nov 2021 21:29:05 -0500
Subject: drm/amdgpu: adjust the kfd reset sequence in reset sriov function
Git-commit: 428890a3fec131521cc59aac0d3c48bde9d76b7b
Patch-mainline: v5.16-rc4
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225
This change revert previous commits:
9f4f2c1a3524 ("drm/amd/amdgpu: fix the kfd pre_reset sequence in sriov")
271fd38ce56d ("drm/amdgpu: move kfd post_reset out of reset_sriov function")
This change moves the amdgpu_amdkfd_pre_reset to an earlier place
in amdgpu_device_reset_sriov, presumably to address the sequence issue
that the first patch was originally meant to fix.
Some register access(GRBM_GFX_CNTL) only be allowed on full access
mode. Move kfd_pre_reset and kfd_post_reset back inside reset_sriov
function.
Fixes: 9f4f2c1a3524 ("drm/amd/amdgpu: fix the kfd pre_reset sequence in sriov")
Fixes: 271fd38ce56d ("drm/amdgpu: move kfd post_reset out of reset_sriov function")
Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4b7a69ef721f..1e651b959141 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4289,6 +4289,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
{
int r;
+ amdgpu_amdkfd_pre_reset(adev);
+
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
@@ -4316,6 +4318,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
+ amdgpu_amdkfd_post_reset(adev);
error:
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
@@ -5030,7 +5033,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
- amdgpu_amdkfd_pre_reset(tmp_adev);
+ if (!amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_pre_reset(tmp_adev);
/*
* Mark these ASICs to be reseted as untracked first
@@ -5148,9 +5152,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- /* unlock kfd */
- if (!need_emergency_restart)
- amdgpu_amdkfd_post_reset(tmp_adev);
+ /* unlock kfd: SRIOV would do it separately */
+ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_post_reset(tmp_adev);
/* kfd_post_reset will do nothing if kfd device is not initialized,
* need to bring up kfd here if it's not be initialized before
--
2.38.1