Blob Blame History Raw
From d547353d128a4c25d0776516fb926ad5882636c3 Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Fri, 17 Jan 2020 12:18:00 +0800
Subject: drm/amdgpu: added support to get mGPU DRAM base
Git-commit: a6c44d2538c469f412c3fded0de2290494d762d7
Patch-mainline: v5.6-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322

resolves issue with RAS error injection in mGPU configuration

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_df.h        |  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       | 20 +++++++
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c          | 59 ++++++++++++++++++-
 .../amd/include/asic_reg/df/df_3_6_offset.h   |  3 +
 .../amd/include/asic_reg/df/df_3_6_sh_mask.h  |  8 +++
 5 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 61a26c15c8dd..057f6ea645d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -52,6 +52,9 @@ struct amdgpu_df_funcs {
 	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
 	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
 			 uint32_t ficadl_val, uint32_t ficadh_val);
+	uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev,
+				       uint32_t df_inst);
+	uint32_t (*get_df_inst_id)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_df {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 766be7f18282..cef94e2169fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -742,6 +742,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
 	return 0;
 }
 
+uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+	uint32_t df_inst_id;
+
+	if ((!adev->df.funcs)                 ||
+	    (!adev->df.funcs->get_df_inst_id) ||
+	    (!adev->df.funcs->get_dram_base_addr))
+		return addr;
+
+	df_inst_id = adev->df.funcs->get_df_inst_id(adev);
+
+	return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id);
+}
+
 /* wrapper of psp_ras_trigger_error */
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 		struct ras_inject_if *info)
@@ -759,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 	if (!obj)
 		return -EINVAL;
 
+	/* Calculate XGMI relative offset */
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		block_info.address = get_xgmi_relative_phy_addr(adev,
+								block_info.address);
+	}
+
 	switch (info->head.block) {
 	case AMDGPU_RAS_BLOCK__GFX:
 		if (adev->gfx.funcs->ras_error_inject)
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index f51326598a8c..5a1bd8ed1a6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -27,6 +27,9 @@
 #include "df/df_3_6_offset.h"
 #include "df/df_3_6_sh_mask.h"
 
+#define DF_3_6_SMN_REG_INST_DIST        0x8
+#define DF_3_6_INST_CNT                 8
+
 static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
 				       16, 32, 0, 0, 0, 2, 4, 8};
 
@@ -683,6 +686,58 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
 	}
 }
 
+static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
+					   uint32_t df_inst)
+{
+	uint32_t base_addr_reg_val 	= 0;
+	uint64_t base_addr	 	= 0;
+
+	base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
+					df_inst * DF_3_6_SMN_REG_INST_DIST);
+
+	if (REG_GET_FIELD(base_addr_reg_val,
+			  DF_CS_UMC_AON0_DramBaseAddress0,
+			  AddrRngVal) == 0) {
+		DRM_WARN("address range not valid");
+		return 0;
+	}
+
+	base_addr = REG_GET_FIELD(base_addr_reg_val,
+				  DF_CS_UMC_AON0_DramBaseAddress0,
+				  DramBaseAddr);
+
+	return base_addr << 28;
+}
+
+static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
+{
+	uint32_t xgmi_node_id	= 0;
+	uint32_t df_inst_id 	= 0;
+
+	/* Walk through DF dst nodes to find current XGMI node */
+	for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
+
+		xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
+					   df_inst_id * DF_3_6_SMN_REG_INST_DIST);
+		xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
+					     DF_CS_UMC_AON0_DramLimitAddress0,
+					     DstFabricID);
+
+		/* TODO: establish reason dest fabric id is offset by 7 */
+		xgmi_node_id = xgmi_node_id >> 7;
+
+		if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
+			break;
+	}
+
+	if (df_inst_id == DF_3_6_INST_CNT) {
+		DRM_WARN("cant match df dst id with gpu node");
+		return 0;
+	}
+
+	return df_inst_id;
+}
+
 const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.sw_init = df_v3_6_sw_init,
 	.sw_fini = df_v3_6_sw_fini,
@@ -696,5 +751,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.pmc_stop = df_v3_6_pmc_stop,
 	.pmc_get_count = df_v3_6_pmc_get_count,
 	.get_fica = df_v3_6_get_fica,
-	.set_fica = df_v3_6_set_fica
+	.set_fica = df_v3_6_set_fica,
+	.get_dram_base_addr = df_v3_6_get_dram_base_addr,
+	.get_df_inst_id = df_v3_6_get_df_inst_id
 };
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
index 87c84691b5be..bb2c9c7a18df 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -71,4 +71,7 @@
 #define smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3		0x1d098UL
 #define smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3		0x1d09cUL
 
+#define smnDF_CS_UMC_AON0_DramBaseAddress0 	0x1c110UL
+#define smnDF_CS_UMC_AON0_DramLimitAddress0 	0x1c114UL
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
index 65e9f756e86e..7afa87c7ff54 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
@@ -53,4 +53,12 @@
 #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK						0x00000E00L
 #define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK						0xFFFFF000L
 
+//DF_CS_UMC_AON0_DramLimitAddress0
+#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID__SHIFT                                                  0x0
+#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO__SHIFT                                                   0xa
+#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr__SHIFT                                                0xc
+#define DF_CS_UMC_AON0_DramLimitAddress0__DstFabricID_MASK                                                    0x000003FFL
+#define DF_CS_UMC_AON0_DramLimitAddress0__AllowReqIO_MASK                                                     0x00000400L
+#define DF_CS_UMC_AON0_DramLimitAddress0__DramLimitAddr_MASK                                                  0xFFFFF000L
+
 #endif
-- 
2.28.0