Blob Blame History Raw
From 1c7c106f395c7167779206773f0f1d027fde82c0 Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Wed, 22 Sep 2021 14:04:52 +0800
Subject: drm/amdgpu: Updated RAS infrastructure
Git-commit: 640ae42efb828be69a9ee6ac88fb3d5a3e678ddf
Patch-mainline: v5.16-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225

Update RAS infrastructure to support RAS query for MCA subblocks

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c |   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 146 ++++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  19 ++-
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   |   9 +-
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c  |   4 +-
 drivers/gpu/drm/amd/amdgpu/ta_ras_if.h  |  11 +-
 7 files changed, 146 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index a2d3dbbf7d25..ce538f4819f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -31,7 +31,7 @@ void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
 					      uint64_t mc_status_addr,
 					      unsigned long *error_count)
 {
-	uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4);
+	uint64_t mc_status = RREG64_PCIE(mc_status_addr);
 
 	if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
 	    REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
@@ -42,7 +42,7 @@ void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
 						uint64_t mc_status_addr,
 						unsigned long *error_count)
 {
-	uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4);
+	uint64_t mc_status = RREG64_PCIE(mc_status_addr);
 
 	if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
 	    (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
@@ -56,7 +56,7 @@ void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
 void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
 				  uint64_t mc_status_addr)
 {
-	WREG64_PCIE(mc_status_addr * 4, 0x0ULL);
+	WREG64_PCIE(mc_status_addr, 0x0ULL);
 }
 
 void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
@@ -87,8 +87,8 @@ int amdgpu_mca_ras_late_init(struct amdgpu_device *adev,
 		if (!mca_dev->ras_if)
 			return -ENOMEM;
 		mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block;
+		mca_dev->ras_if->sub_block_index = mca_dev->ras_funcs->ras_sub_block;
 		mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
-		mca_dev->ras_if->sub_block_index = 0;
 	}
 	ih_info.head = fs_info.head = *mca_dev->ras_if;
 	r = amdgpu_ras_late_init(adev, mca_dev->ras_if,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index f860f2f0e296..c74bc7177066 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -29,6 +29,7 @@ struct amdgpu_mca_ras_funcs {
 	void (*query_ras_error_address)(struct amdgpu_device *adev,
 					void *ras_error_status);
 	uint32_t ras_block;
+	uint32_t ras_sub_block;
 	const char* sysfs_name;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b5332db4d287..912ea1f9fd04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -61,9 +61,30 @@ const char *ras_block_string[] = {
 	"mp0",
 	"mp1",
 	"fuse",
-	"mpio",
+	"mca",
 };
 
+const char *ras_mca_block_string[] = {
+	"mca_mp0",
+	"mca_mp1",
+	"mca_mpio",
+	"mca_iohc",
+};
+
+const char *get_ras_block_str(struct ras_common_if *ras_block)
+{
+	if (!ras_block)
+		return "NULL";
+
+	if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT)
+		return "OUT OF RANGE";
+
+	if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
+		return ras_mca_block_string[ras_block->sub_block_index];
+
+	return ras_block_string[ras_block->block];
+}
+
 #define ras_err_str(i) (ras_error_string[ffs(i)])
 
 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
@@ -188,7 +209,7 @@ static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
 
 	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
 		*block_id = i;
-		if (strcmp(name, ras_block_str(i)) == 0)
+		if (strcmp(name, ras_block_string[i]) == 0)
 			return 0;
 	}
 	return -EINVAL;
@@ -510,7 +531,6 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 	if (amdgpu_ras_query_error_status(obj->adev, &info))
 		return -EINVAL;
 
-
 	if (obj->adev->asic_type == CHIP_ALDEBARAN) {
 		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
 			DRM_WARN("Failed to reset error counter and error status");
@@ -530,7 +550,7 @@ static inline void put_obj(struct ras_manager *obj)
 	if (obj && (--obj->use == 0))
 		list_del(&obj->node);
 	if (obj && (obj->use < 0))
-		DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", ras_block_str(obj->head.block));
+		DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
 }
 
 /* make one obj and return it. */
@@ -546,7 +566,14 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
 	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
 		return NULL;
 
-	obj = &con->objs[head->block];
+	if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+		if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+			return NULL;
+
+		obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+	} else
+		obj = &con->objs[head->block];
+
 	/* already exist. return obj? */
 	if (alive_obj(obj))
 		return NULL;
@@ -574,19 +601,21 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 		if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
 			return NULL;
 
-		obj = &con->objs[head->block];
+		if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+			if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+				return NULL;
+
+			obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+		} else
+			obj = &con->objs[head->block];
 
-		if (alive_obj(obj)) {
-			WARN_ON(head->block != obj->head.block);
+		if (alive_obj(obj))
 			return obj;
-		}
 	} else {
-		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
+		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
 			obj = &con->objs[i];
-			if (alive_obj(obj)) {
-				WARN_ON(i != obj->head.block);
+			if (alive_obj(obj))
 				return obj;
-			}
 		}
 	}
 
@@ -627,8 +656,6 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 	 */
 	if (!amdgpu_ras_is_feature_allowed(adev, head))
 		return 0;
-	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
-		return 0;
 
 	if (enable) {
 		if (!obj) {
@@ -679,18 +706,13 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 
 	/* Do not enable if it is not allowed. */
 	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
-	/* Are we alerady in that state we are going to set? */
-	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) {
-		ret = 0;
-		goto out;
-	}
 
 	if (!amdgpu_ras_intr_triggered()) {
 		ret = psp_ras_enable_features(&adev->psp, info, enable);
 		if (ret) {
 			dev_err(adev->dev, "ras %s %s failed %d\n",
 				enable ? "enable":"disable",
-				ras_block_str(head->block),
+				get_ras_block_str(head),
 				ret);
 			goto out;
 		}
@@ -732,7 +754,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
 				if (!ret)
 					dev_info(adev->dev,
 						"RAS INFO: %s setup object\n",
-						ras_block_str(head->block));
+						get_ras_block_str(head));
 			}
 		} else {
 			/* setup the object then issue a ras TA disable cmd.*/
@@ -782,17 +804,39 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
 		bool bypass)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-	int ras_block_count = AMDGPU_RAS_BLOCK_COUNT;
 	int i;
-	const enum amdgpu_ras_error_type default_ras_type =
-		AMDGPU_RAS_ERROR__NONE;
+	const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE;
 
-	for (i = 0; i < ras_block_count; i++) {
+	for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
 		struct ras_common_if head = {
 			.block = i,
 			.type = default_ras_type,
 			.sub_block_index = 0,
 		};
+
+		if (i == AMDGPU_RAS_BLOCK__MCA)
+			continue;
+
+		if (bypass) {
+			/*
+			 * bypass psp. vbios enable ras for us.
+			 * so just create the obj
+			 */
+			if (__amdgpu_ras_feature_enable(adev, &head, 1))
+				break;
+		} else {
+			if (amdgpu_ras_feature_enable(adev, &head, 1))
+				break;
+		}
+	}
+
+	for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
+		struct ras_common_if head = {
+			.block = AMDGPU_RAS_BLOCK__MCA,
+			.type = default_ras_type,
+			.sub_block_index = i,
+		};
+
 		if (bypass) {
 			/*
 			 * bypass psp. vbios enable ras for us.
@@ -810,6 +854,32 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
 }
 /* feature ctl end */
 
+
+void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
+				       struct ras_common_if *ras_block,
+				       struct ras_err_data  *err_data)
+{
+	switch (ras_block->sub_block_index) {
+	case AMDGPU_RAS_MCA_BLOCK__MP0:
+		if (adev->mca.mp0.ras_funcs &&
+		    adev->mca.mp0.ras_funcs->query_ras_error_count)
+			adev->mca.mp0.ras_funcs->query_ras_error_count(adev, &err_data);
+		break;
+	case AMDGPU_RAS_MCA_BLOCK__MP1:
+		if (adev->mca.mp1.ras_funcs &&
+		    adev->mca.mp1.ras_funcs->query_ras_error_count)
+			adev->mca.mp1.ras_funcs->query_ras_error_count(adev, &err_data);
+		break;
+	case AMDGPU_RAS_MCA_BLOCK__MPIO:
+		if (adev->mca.mpio.ras_funcs &&
+		    adev->mca.mpio.ras_funcs->query_ras_error_count)
+			adev->mca.mpio.ras_funcs->query_ras_error_count(adev, &err_data);
+		break;
+	default:
+		break;
+	}
+}
+
 /* query/inject/cure begin */
 int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 				  struct ras_query_if *info)
@@ -873,6 +943,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 		    adev->hdp.ras_funcs->query_ras_error_count)
 			adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
 		break;
+	case AMDGPU_RAS_BLOCK__MCA:
+		amdgpu_ras_mca_query_error_status(adev, &info->head, &err_data);
+		break;
 	default:
 		break;
 	}
@@ -894,13 +967,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 					adev->smuio.funcs->get_socket_id(adev),
 					adev->smuio.funcs->get_die_id(adev),
 					obj->err_data.ce_count,
-					ras_block_str(info->head.block));
+					get_ras_block_str(&info->head));
 		} else {
 			dev_info(adev->dev, "%ld correctable hardware errors "
 					"detected in %s block, no user "
 					"action is needed.\n",
 					obj->err_data.ce_count,
-					ras_block_str(info->head.block));
+					get_ras_block_str(&info->head));
 		}
 	}
 	if (err_data.ue_count) {
@@ -913,12 +986,12 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
 					adev->smuio.funcs->get_socket_id(adev),
 					adev->smuio.funcs->get_die_id(adev),
 					obj->err_data.ue_count,
-					ras_block_str(info->head.block));
+					get_ras_block_str(&info->head));
 		} else {
 			dev_info(adev->dev, "%ld uncorrectable hardware errors "
 					"detected in %s block\n",
 					obj->err_data.ue_count,
-					ras_block_str(info->head.block));
+					get_ras_block_str(&info->head));
 		}
 	}
 
@@ -1028,9 +1101,7 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 	case AMDGPU_RAS_BLOCK__SDMA:
 	case AMDGPU_RAS_BLOCK__MMHUB:
 	case AMDGPU_RAS_BLOCK__PCIE_BIF:
-	case AMDGPU_RAS_BLOCK__MP0:
-	case AMDGPU_RAS_BLOCK__MP1:
-	case AMDGPU_RAS_BLOCK__MPIO:
+	case AMDGPU_RAS_BLOCK__MCA:
 		ret = psp_ras_trigger_error(&adev->psp, &block_info);
 		break;
 	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
@@ -1038,13 +1109,13 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 		break;
 	default:
 		dev_info(adev->dev, "%s error injection is not supported yet\n",
-			 ras_block_str(info->head.block));
+			 get_ras_block_str(&info->head));
 		ret = -EINVAL;
 	}
 
 	if (ret)
 		dev_err(adev->dev, "ras inject %s failed %d\n",
-			ras_block_str(info->head.block), ret);
+			get_ras_block_str(&info->head), ret);
 
 	return ret;
 }
@@ -1387,7 +1458,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
 		if (amdgpu_ras_is_supported(adev, obj->head.block) &&
 			(obj->attr_inuse == 1)) {
 			sprintf(fs_info.debugfs_name, "%s_err_inject",
-					ras_block_str(obj->head.block));
+					get_ras_block_str(&obj->head));
 			fs_info.head = obj->head;
 			amdgpu_ras_debugfs_create(adev, &fs_info, dir);
 		}
@@ -2185,7 +2256,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 		return 0;
 
 	con = kmalloc(sizeof(struct amdgpu_ras) +
-			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT,
+			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
+			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
 			GFP_KERNEL|__GFP_ZERO);
 	if (!con)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 1670467c2054..ec42e9873aaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -49,15 +49,22 @@ enum amdgpu_ras_block {
 	AMDGPU_RAS_BLOCK__MP0,
 	AMDGPU_RAS_BLOCK__MP1,
 	AMDGPU_RAS_BLOCK__FUSE,
-	AMDGPU_RAS_BLOCK__MPIO,
+	AMDGPU_RAS_BLOCK__MCA,
 
 	AMDGPU_RAS_BLOCK__LAST
 };
 
-extern const char *ras_block_string[];
+enum amdgpu_ras_mca_block {
+	AMDGPU_RAS_MCA_BLOCK__MP0 = 0,
+	AMDGPU_RAS_MCA_BLOCK__MP1,
+	AMDGPU_RAS_MCA_BLOCK__MPIO,
+	AMDGPU_RAS_MCA_BLOCK__IOHC,
+
+	AMDGPU_RAS_MCA_BLOCK__LAST
+};
 
-#define ras_block_str(i) (ras_block_string[i])
 #define AMDGPU_RAS_BLOCK_COUNT	AMDGPU_RAS_BLOCK__LAST
+#define AMDGPU_RAS_MCA_BLOCK_COUNT	AMDGPU_RAS_MCA_BLOCK__LAST
 #define AMDGPU_RAS_BLOCK_MASK	((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
 
 enum amdgpu_ras_gfx_subblock {
@@ -544,8 +551,8 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
 		return TA_RAS_BLOCK__MP1;
 	case AMDGPU_RAS_BLOCK__FUSE:
 		return TA_RAS_BLOCK__FUSE;
-	case AMDGPU_RAS_BLOCK__MPIO:
-		return TA_RAS_BLOCK__MPIO;
+	case AMDGPU_RAS_BLOCK__MCA:
+		return TA_RAS_BLOCK__MCA;
 	default:
 		WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
 		return TA_RAS_BLOCK__UMC;
@@ -640,4 +647,6 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev);
 
 int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
 
+const char *get_ras_block_str(struct ras_common_if *ras_block);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index 058b65730a84..8f7107d392af 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -52,7 +52,8 @@ const struct amdgpu_mca_ras_funcs mca_v3_0_mp0_ras_funcs = {
 	.ras_fini = mca_v3_0_mp0_ras_fini,
 	.query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
 	.query_ras_error_address = NULL,
-	.ras_block = AMDGPU_RAS_BLOCK__MP0,
+	.ras_block = AMDGPU_RAS_BLOCK__MCA,
+	.ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP0,
 	.sysfs_name = "mp0_err_count",
 };
 
@@ -79,7 +80,8 @@ const struct amdgpu_mca_ras_funcs mca_v3_0_mp1_ras_funcs = {
 	.ras_fini = mca_v3_0_mp1_ras_fini,
 	.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
 	.query_ras_error_address = NULL,
-	.ras_block = AMDGPU_RAS_BLOCK__MP1,
+	.ras_block = AMDGPU_RAS_BLOCK__MCA,
+	.ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP1,
 	.sysfs_name = "mp1_err_count",
 };
 
@@ -106,7 +108,8 @@ const struct amdgpu_mca_ras_funcs mca_v3_0_mpio_ras_funcs = {
 	.ras_fini = mca_v3_0_mpio_ras_fini,
 	.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
 	.query_ras_error_address = NULL,
-	.ras_block = AMDGPU_RAS_BLOCK__MPIO,
+	.ras_block = AMDGPU_RAS_BLOCK__MCA,
+	.ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MPIO,
 	.sysfs_name = "mpio_err_count",
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 0a9fc19b1be0..91b3afa946f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -387,13 +387,13 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 						"errors detected in %s block, "
 						"no user action is needed.\n",
 						obj->err_data.ce_count,
-						ras_block_str(adev->nbio.ras_if->block));
+						get_ras_block_str(adev->nbio.ras_if));
 
 			if (err_data.ue_count)
 				dev_info(adev->dev, "%ld uncorrectable hardware "
 						"errors detected in %s block\n",
 						obj->err_data.ue_count,
-						ras_block_str(adev->nbio.ras_if->block));
+						get_ras_block_str(adev->nbio.ras_if));
 		}
 
 		dev_info(adev->dev, "RAS controller interrupt triggered "
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 532260fd64db..82d956d15b54 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -73,10 +73,19 @@ enum ta_ras_block {
 	TA_RAS_BLOCK__MP0,
 	TA_RAS_BLOCK__MP1,
 	TA_RAS_BLOCK__FUSE,
-	TA_RAS_BLOCK__MPIO,
+	TA_RAS_BLOCK__MCA,
 	TA_NUM_BLOCK_MAX
 };
 
+enum ta_ras_mca_block
+{
+	TA_RAS_MCA_BLOCK__MP0   = 0,
+	TA_RAS_MCA_BLOCK__MP1   = 1,
+	TA_RAS_MCA_BLOCK__MPIO  = 2,
+	TA_RAS_MCA_BLOCK__IOHC  = 3,
+	TA_MCA_NUM_BLOCK_MAX
+};
+
 enum ta_ras_error_type {
 	TA_RAS_ERROR__NONE			= 0,
 	TA_RAS_ERROR__PARITY			= 1,
-- 
2.38.1