From 029fbd437eb6a8cd20d983bdbf0037a9417ef2ee Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Tue, 10 Sep 2019 11:13:39 +0800
Subject: drm/amdgpu: initialize ras structures for xgmi block (v2)
Git-commit: 029fbd437eb6a8cd20d983bdbf0037a9417ef2ee
Patch-mainline: v5.5-rc1
References: bsc#1152489
init ras common interface and fs node for xgmi block
v2: remove unnecessary physical node number check before
invoking amdgpu_xgmi_ras_late_init
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 36 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 +
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-
4 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index d3be51ba6349..b36d4c686844 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -123,6 +123,7 @@ struct amdgpu_xgmi {
/* gpu list in the same hive */
struct list_head head;
bool supported;
+ struct ras_common_if *ras_if;
};
struct amdgpu_gmc {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 65aae75f80fd..7f6f2e964ae3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
#include "df/df_3_6_offset.h"
static DEFINE_MUTEX(xgmi_mutex);
@@ -437,3 +438,38 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
mutex_unlock(&hive->hive_lock);
}
}
+
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "xgmi_wafl_err_count",
+ .debugfs_name = "xgmi_wafl_err_inject",
+ };
+
+ if (!adev->gmc.xgmi.supported ||
+ adev->gmc.xgmi.num_physical_nodes == 0)
+ return 0;
+
+ if (!adev->gmc.xgmi.ras_if) {
+ adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->gmc.xgmi.ras_if)
+ return -ENOMEM;
+ adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+ adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gmc.xgmi.ras_if->sub_block_index = 0;
+ strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
+ }
+ ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
+ kfree(adev->gmc.xgmi.ras_if);
+ adev->gmc.xgmi.ras_if = NULL;
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index fbcee31788c4..9023789397c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -42,6 +42,7 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 167e916e40be..a991a6c4f468 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -55,6 +55,7 @@
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
/* add these here since we already include dce12 headers and these are for DCN */
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
@@ -808,7 +809,8 @@ static int gmc_v9_0_ecc_late_init(void *handle)
if (r)
return r;
}
- return 0;
+
+ return amdgpu_xgmi_ras_late_init(adev);
}
static int gmc_v9_0_late_init(void *handle)
--
2.28.0