From 916b80a38ae86beecfad6d3ddecd2d845b2a2d97 Mon Sep 17 00:00:00 2001
From: Tao Zhou <tao.zhou1@amd.com>
Date: Wed, 16 Mar 2022 14:38:12 +0800
Subject: drm/amdkfd: add RAS poison consumption handling for UTCL2 (v2)
Git-commit: 1990e29b1900758f596434204d4067955f6e904e
Patch-mainline: v5.19-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225
Do RAS page retirement and use gpu reset as fallback in UTCL2 fault
handler.
v2: replace vm fault event with posion consumed event in UTCL2
poison consumption.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 7db2421a3340..56902b5bb7b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -308,6 +308,12 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+ if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
+ amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+ event_interrupt_poison_consumption(dev, pasid, client_id);
+ return;
+ }
+
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |
--
2.38.1