From eb7c3a296d2f56a529a0f58b840b7cfea02af2e3 Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Wed, 13 May 2020 20:23:51 +0800
Subject: drm/amdgpu: Update RAS XGMI error inject sequence
Git-commit: 5c23e9e05e42b5ea56a87a17f1da9ccf9b100465
Patch-mainline: v5.8-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322
Disable XGMI link power down prior to issuing a XGMI RAS error
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 30 ++++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7348619253c7..50fe08bf2f72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -811,6 +811,32 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
return 0;
}
+/* Trigger XGMI/WAFL error */
+int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+ struct ta_ras_trigger_error_input *block_info)
+{
+ int ret;
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
+
+ if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
+ dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+ ret = psp_ras_trigger_error(&adev->psp, block_info);
+
+ if (amdgpu_ras_intr_triggered())
+ return ret;
+
+ if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
+ dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to allow df cstate");
+
+ return ret;
+}
+
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -844,10 +870,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
break;
case AMDGPU_RAS_BLOCK__UMC:
case AMDGPU_RAS_BLOCK__MMHUB:
- case AMDGPU_RAS_BLOCK__XGMI_WAFL:
case AMDGPU_RAS_BLOCK__PCIE_BIF:
ret = psp_ras_trigger_error(&adev->psp, &block_info);
break;
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
+ break;
default:
dev_info(adev->dev, "%s error injection is not supported yet\n",
ras_block_str(info->head.block));
--
2.28.0