From 930411ef0a0a84559dbf8a007af34abe2789eb52 Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Tue, 7 Jan 2020 14:16:05 +0800
Subject: drm/amdgpu: resolved bug in UMC RAS CE query
Git-commit: 0ee51f1d940e0715d2b82f97f4aa1bd333242f39
Patch-mainline: v5.6-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322
switch CE counter register access' to use SMN
disable UMC indexing mode
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 32 +++++++++++++++++----------
1 file changed, 20 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 25e9e8b7d5fb..fe18ae33da61 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -52,6 +52,12 @@ const uint32_t
{9, 25, 0, 16}, {15, 31, 6, 22}
};
+static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 0);
+}
+
static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
uint32_t umc_inst,
uint32_t ch_inst)
@@ -87,27 +93,27 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
}
/* select the lower chip and check the error count */
- ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 0);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
*error_count +=
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
UMC_V6_1_CE_CNT_INIT);
/* clear the lower chip err count */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* select the higher chip and check the err counter */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
*error_count +=
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
UMC_V6_1_CE_CNT_INIT);
/* clear the higher chip err count */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* check for SRAM correctable error
MCUMC_STATUS is a 64 bit register */
@@ -283,21 +289,21 @@ static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
}
/* select the lower chip and check the error count */
- ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 0);
/* set ce error interrupt type to APIC based interrupt */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrInt, 0x1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
/* set error count to initial value */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* select the higher chip and check the err counter */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
}
static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
@@ -306,6 +312,8 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
uint32_t ch_inst = 0;
uint32_t umc_reg_offset = 0;
+ umc_v6_1_disable_umc_index_mode(adev);
+
for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) {
for (ch_inst = 0; ch_inst < adev->umc.channel_inst_num; ch_inst++) {
umc_reg_offset = get_umc_6_reg_offset(adev,
--
2.28.0