From 3d6455baf7157206345443de32248a45ea27e47c Mon Sep 17 00:00:00 2001
From: Victor Zhao <Victor.Zhao@amd.com>
Date: Fri, 24 Jun 2022 11:59:21 +0800
Subject: drm/amdgpu: revert context to stop engine before mode2 reset
Git-commit: 72fadb13674f807f10a168fb7d020dde58ce6b0b
Patch-mainline: v6.1-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225 jsc#PED-2849
For some hang caused by slow tests, engine cannot be stopped which
may cause resume failure after reset. In this case, force halt
engine by reverting context addresses
Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h | 1 +
drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 36 +++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 2 ++
4 files changed, 40 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a18117ed86be..802c62eb780d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5072,6 +5072,7 @@ static void amdgpu_device_recheck_guilty_jobs(
/* set guilty */
drm_sched_increase_karma(s_job);
+ amdgpu_reset_prepare_hwcontext(adev, reset_context);
retry:
/* do hw reset */
if (amdgpu_sriov_vf(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
index f8036f2b100e..c7b44aeb671b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
@@ -37,6 +37,7 @@ struct amdgpu_gfxhub_funcs {
void (*utcl2_harvest)(struct amdgpu_device *adev);
void (*mode2_save_regs)(struct amdgpu_device *adev);
void (*mode2_restore_regs)(struct amdgpu_device *adev);
+ void (*halt)(struct amdgpu_device *adev);
};
struct amdgpu_gfxhub {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index 51cf8acd2d79..8cf53e039c11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -646,6 +646,41 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL);
}
+static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ int i;
+ uint32_t tmp;
+ int time = 1000;
+
+ gfxhub_v2_1_set_fault_enable_default(adev, false);
+
+ for (i = 0; i <= 14; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ 0);
+ }
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK |
+ GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 &&
+ time) {
+ udelay(100);
+ time--;
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ }
+
+ if (!time) {
+ DRM_WARN("failed to wait for GRBM(EA) idle\n");
+ }
+}
+
const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.get_fb_location = gfxhub_v2_1_get_fb_location,
.get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset,
@@ -658,4 +693,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
.mode2_save_regs = gfxhub_v2_1_save_regs,
.mode2_restore_regs = gfxhub_v2_1_restore_regs,
+ .halt = gfxhub_v2_1_halt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
index 2dbbda17848b..7aa570c1ce4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -97,6 +97,8 @@ sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
if (!amdgpu_sriov_vf(adev)) {
if (adev->gfxhub.funcs->mode2_save_regs)
adev->gfxhub.funcs->mode2_save_regs(adev);
+ if (adev->gfxhub.funcs->halt)
+ adev->gfxhub.funcs->halt(adev);
r = sienna_cichlid_mode2_suspend_ip(adev);
}
--
2.38.1