From 0beee11b67b0b94bcb787fe54401ce0c1692969b Mon Sep 17 00:00:00 2001
From: Guchun Chen <guchun.chen@amd.com>
Date: Tue, 10 Mar 2020 18:27:08 +0800
Subject: drm/amdgpu: update ras capability's query based on mem ecc
Git-commit: 88474ccad5f85ee934785e2919689282ba6c6767
Patch-mainline: v5.7-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322
configuration
RAS support capability needs to be updated on top of different
memeory ECC enablement, and remove redundant memory ecc check
in gmc module for vega20 and arcturus.
v2: check HBM ECC enablement and set ras mask accordingly.
v3: avoid to invoke atomfirmware interface to query twice.
Suggested-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 ++++++++++++----
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 38 +++++++++----------------
2 files changed, 32 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ce8548d5fbf3..c3dca4ca8081 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1765,18 +1765,30 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
*hw_supported = 0;
*supported = 0;
- if (amdgpu_sriov_vf(adev) ||
+ if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
(adev->asic_type != CHIP_VEGA20 &&
adev->asic_type != CHIP_ARCTURUS))
return;
- if (adev->is_atom_fw &&
- (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
- amdgpu_atomfirmware_sram_ecc_supported(adev)))
- *hw_supported = AMDGPU_RAS_BLOCK_MASK;
+ if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+ DRM_INFO("HBM ECC is active.\n");
+ *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ } else
+ DRM_INFO("HBM ECC is not presented.\n");
+
+ if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+ DRM_INFO("SRAM ECC is active.\n");
+ *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ } else
+ DRM_INFO("SRAM ECC is not presented.\n");
+
+ /* hw_supported needs to be aligned with RAS block mask. */
+ *hw_supported &= AMDGPU_RAS_BLOCK_MASK;
*supported = amdgpu_ras_enable == 0 ?
- 0 : *hw_supported & amdgpu_ras_mask;
+ 0 : *hw_supported & amdgpu_ras_mask;
}
int amdgpu_ras_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6ceaab553130..8606f877478f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -922,30 +922,20 @@ static int gmc_v9_0_late_init(void *handle)
if (r)
return r;
/* Check if ecc is available */
- if (!amdgpu_sriov_vf(adev)) {
- switch (adev->asic_type) {
- case CHIP_VEGA10:
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- r = amdgpu_atomfirmware_mem_ecc_supported(adev);
- if (!r) {
- DRM_INFO("ECC is not present.\n");
- if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
- adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
- } else {
- DRM_INFO("ECC is active.\n");
- }
-
- r = amdgpu_atomfirmware_sram_ecc_supported(adev);
- if (!r) {
- DRM_INFO("SRAM ECC is not present.\n");
- } else {
- DRM_INFO("SRAM ECC is active.\n");
- }
- break;
- default:
- break;
- }
+ if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
+ r = amdgpu_atomfirmware_mem_ecc_supported(adev);
+ if (!r) {
+ DRM_INFO("ECC is not present.\n");
+ if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+ adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
+ } else
+ DRM_INFO("ECC is active.\n");
+
+ r = amdgpu_atomfirmware_sram_ecc_supported(adev);
+ if (!r)
+ DRM_INFO("SRAM ECC is not present.\n");
+ else
+ DRM_INFO("SRAM ECC is active.\n");
}
if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
--
2.28.0