From cb439d643d5d87e99046a5e29b69a53f5d714c1c Mon Sep 17 00:00:00 2001
From: "Stanley.Yang" <Stanley.Yang@amd.com>
Date: Tue, 7 Dec 2021 14:28:58 +0800
Subject: drm/amdgpu: skip umc ras error count harvest
Git-commit: cf63b702720d734cb4144440d72d4b2ac6c494f8
Patch-mainline: v5.17-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225
remove in recovery stat check, skip umc ras err cnt
harvest in amdgpu_ras_log_on_err_counter
Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ccbfe52e0635..9f19db31c449 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -897,11 +897,6 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int ret = 0;
- /* skip get ecc info during gpu recovery */
- if (atomic_read(&ras->in_recovery) == 1 &&
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))
- return;
-
/*
* choosing right query method according to
* whether smu support query error information
@@ -1752,6 +1747,16 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
continue;
+ /*
+ * this is a workaround for aldebaran, skip send msg to
+ * smu to get ecc_info table due to smu handle get ecc
+ * info table failed temporarily.
+ * should be removed until smu fix handle ecc_info table.
+ */
+ if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+ (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))
+ continue;
+
amdgpu_ras_query_error_status(adev, &info);
}
}
--
2.38.1