From 3508ea2831f180d9a2014b3596f808bf0ac7ed91 Mon Sep 17 00:00:00 2001
From: Tao Zhou <tao.zhou1@amd.com>
Date: Tue, 19 Apr 2022 14:45:09 +0800
Subject: drm/amdgpu: add RAS fatal error interrupt handler
Git-commit: b3c76814ce5b043faa2f07108f1c87ed1cbc8cd1
Patch-mainline: v5.19-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225
The fatal error handler is independent from general ras interrupt
handler since there is no related IH ring.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 15 +--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 ++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 +
3 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index ea3e8c66211f..b4cf8717f554 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -193,20 +193,7 @@ static irqreturn_t amdgpu_irq_handler(int irq, void *arg)
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
- /* For the hardware that cannot enable bif ring for both ras_controller_irq
- * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
- * register to check whether the interrupt is triggered or not, and properly
- * ack the interrupt if it is there
- */
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
- if (adev->nbio.ras &&
- adev->nbio.ras->handle_ras_controller_intr_no_bifring)
- adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
-
- if (adev->nbio.ras &&
- adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
- adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
- }
+ amdgpu_ras_interrupt_fatal_error_handler(adev);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1c86ec9ab139..03ce3ce913e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1515,6 +1515,26 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
/* ras fs end */
/* ih begin */
+
+/* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
+ return;
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
+}
+
static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
struct amdgpu_iv_entry *entry)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index c4b61785ab5c..b9a6fac2b8b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -683,4 +683,5 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj);
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
#endif
--
2.38.1