From 2cf49e00d40d5132e3d067b5aa6d84791929ab15 Mon Sep 17 00:00:00 2001
From: shaoyunl <shaoyun.liu@amd.com>
Date: Sun, 14 Nov 2021 12:38:18 -0500
Subject: [PATCH] drm/amd/amdkfd: Fix kernel panic when reset failed and been triggered again
Git-commit: 2cf49e00d40d5132e3d067b5aa6d84791929ab15
Patch-mainline: v5.16-rc2
References: git-fixes
In SRIOV configuration, the reset may failed to bring asic back to normal but stop cpsch
already been called, the start_cpsch will not be called since there is no resume in this
case. When reset been triggered again, driver should avoid to do uninitialization again.
Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Takashi Iwai <tiwai@suse.de>
---
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 003ba6a373ff..93e33dd84dd4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1226,6 +1226,11 @@ static int stop_cpsch(struct device_queue_manager *dqm)
bool hanging;
dqm_lock(dqm);
+ if (!dqm->sched_running) {
+ dqm_unlock(dqm);
+ return 0;
+ }
+
if (!dqm->is_hws_hang)
unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
hanging = dqm->is_hws_hang || dqm->is_resetting;
--
2.31.1