diff --git a/patches.suse/drm-amdkfd-Reset-GPU-on-queue-preemption-failure.patch b/patches.suse/drm-amdkfd-Reset-GPU-on-queue-preemption-failure.patch new file mode 100644 index 0000000..42f7352 --- /dev/null +++ b/patches.suse/drm-amdkfd-Reset-GPU-on-queue-preemption-failure.patch @@ -0,0 +1,35 @@ +From 8bdfb4ea95ca738d33ef71376c21eba20130f2eb Mon Sep 17 00:00:00 2001 +From: Harish Kasiviswanathan +Date: Tue, 26 Mar 2024 15:32:46 -0400 +Subject: [PATCH] drm/amdkfd: Reset GPU on queue preemption failure +Git-commit: 8bdfb4ea95ca738d33ef71376c21eba20130f2eb +Patch-mainline: v6.9-rc4 +References: stable-fixes + +Currently, with F32 HWS GPU reset is only when unmap queue fails. + +However, if compute queue doesn't repond to preemption request in time +unmap will return without any error. In this case, only preemption error +is logged and Reset is not triggered. Call GPU reset in this case also. + +Reviewed-by: Alex Deucher +Signed-off-by: Harish Kasiviswanathan +Reviewed-by: Mukul Joshi +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Acked-by: Takashi Iwai + +--- + drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -1805,6 +1805,7 @@ static int unmap_queues_cpsch(struct dev + pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); + while (halt_if_hws_hang) + schedule(); ++ kfd_hws_hang(dqm); + return -ETIME; + } + diff --git a/series.conf b/series.conf index 00f529f..005ac5d 100644 --- a/series.conf +++ b/series.conf @@ -46297,6 +46297,7 @@ patches.suse/drm-i915-cdclk-Fix-CDCLK-programming-order-when-pipe.patch patches.suse/drm-i915-Disable-port-sync-when-bigjoiner-is-used.patch patches.suse/drm-i915-vrr-Disable-VRR-when-using-bigjoiner.patch + patches.suse/drm-amdkfd-Reset-GPU-on-queue-preemption-failure.patch patches.suse/iommu-vt-d-Allocate-local-memory-for-page-request-qu.patch # out-of-tree patches