Blob Blame History Raw
From b7081dbcee2cca30c3bccb2c8adcc676a58a2922 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Fri, 22 Nov 2019 18:39:11 +0800
Subject: drm/amdgpu: clear uncorrectable parity error status bit
Git-commit: 5c39d600e315ce1a0d13c491693c0390d40c94f3
Patch-mainline: v5.6-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322

This should be cleared during every nbif uncorrectable error cleanup work.

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 9a3a65a0691c..bb701dbfd472 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -482,10 +482,12 @@ static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *a
 	return 0;
 }
 
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2	0x13a20030
+
 static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
 					void *ras_error_status)
 {
-	uint32_t global_sts, central_sts, int_eoi;
+	uint32_t global_sts, central_sts, int_eoi, parity_sts;
 	uint32_t corr, fatal, non_fatal;
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
 
@@ -494,6 +496,7 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
 	fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
 	non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
 				ParityErrNonFatal);
+	parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
 
 	if (corr)
 		err_data->ce_count++;
@@ -505,6 +508,11 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
 		/* clear error status register */
 		WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
 
+		if (fatal)
+			/* clear parity fatal error indication field */
+			WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2,
+				    parity_sts);
+
 		if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
 				BIFL_RasContller_Intr_Recv)) {
 			/* clear interrupt status register */
-- 
2.28.0