Jiri Slaby ed9267
From: Horatio Zhang <Hongkun.Zhang@amd.com>
Jiri Slaby ed9267
Date: Thu, 4 May 2023 01:46:12 -0400
Jiri Slaby ed9267
Subject: [PATCH] drm/amdgpu: drop gfx_v11_0_cp_ecc_error_irq_funcs
Jiri Slaby ed9267
MIME-Version: 1.0
Jiri Slaby ed9267
Content-Type: text/plain; charset=UTF-8
Jiri Slaby ed9267
Content-Transfer-Encoding: 8bit
Jiri Slaby ed9267
References: bsc#1012628
Jiri Slaby ed9267
Patch-mainline: 6.3.3
Jiri Slaby ed9267
Git-commit: 720b47229a5b24061d1c2e29ddb6043a59178d79
Jiri Slaby ed9267
Jiri Slaby ed9267
commit 720b47229a5b24061d1c2e29ddb6043a59178d79 upstream.
Jiri Slaby ed9267
Jiri Slaby ed9267
The gfx.cp_ecc_error_irq is retired in gfx11. In gfx_v11_0_hw_fini still
Jiri Slaby ed9267
use amdgpu_irq_put to disable this interrupt, which caused the call trace
Jiri Slaby ed9267
in this function.
Jiri Slaby ed9267
Jiri Slaby ed9267
[  102.873958] Call Trace:
Jiri Slaby ed9267
[  102.873959]  <TASK>
Jiri Slaby ed9267
[  102.873961]  gfx_v11_0_hw_fini+0x23/0x1e0 [amdgpu]
Jiri Slaby ed9267
[  102.874019]  gfx_v11_0_suspend+0xe/0x20 [amdgpu]
Jiri Slaby ed9267
[  102.874072]  amdgpu_device_ip_suspend_phase2+0x240/0x460 [amdgpu]
Jiri Slaby ed9267
[  102.874122]  amdgpu_device_ip_suspend+0x3d/0x80 [amdgpu]
Jiri Slaby ed9267
[  102.874172]  amdgpu_device_pre_asic_reset+0xd9/0x490 [amdgpu]
Jiri Slaby ed9267
[  102.874223]  amdgpu_device_gpu_recover.cold+0x548/0xce6 [amdgpu]
Jiri Slaby ed9267
[  102.874321]  amdgpu_debugfs_reset_work+0x4c/0x70 [amdgpu]
Jiri Slaby ed9267
[  102.874375]  process_one_work+0x21f/0x3f0
Jiri Slaby ed9267
[  102.874377]  worker_thread+0x200/0x3e0
Jiri Slaby ed9267
[  102.874378]  ? process_one_work+0x3f0/0x3f0
Jiri Slaby ed9267
[  102.874379]  kthread+0xfd/0x130
Jiri Slaby ed9267
[  102.874380]  ? kthread_complete_and_exit+0x20/0x20
Jiri Slaby ed9267
[  102.874381]  ret_from_fork+0x22/0x30
Jiri Slaby ed9267
Jiri Slaby ed9267
v2:
Jiri Slaby ed9267
- Handle umc and gfx ras cases in separated patch
Jiri Slaby ed9267
- Retired the gfx_v11_0_cp_ecc_error_irq_funcs in gfx11
Jiri Slaby ed9267
Jiri Slaby ed9267
v3:
Jiri Slaby ed9267
- Improve the subject and code comments
Jiri Slaby ed9267
- Add judgment on gfx11 in the function of amdgpu_gfx_ras_late_init
Jiri Slaby ed9267
Jiri Slaby ed9267
v4:
Jiri Slaby ed9267
- Drop the define of CP_ME1_PIPE_INST_ADDR_INTERVAL and
Jiri Slaby ed9267
SET_ECC_ME_PIPE_STATE which using in gfx_v11_0_set_cp_ecc_error_state
Jiri Slaby ed9267
- Check cp_ecc_error_irq.funcs rather than ip version for a more
Jiri Slaby ed9267
sustainable life
Jiri Slaby ed9267
Jiri Slaby ed9267
v5:
Jiri Slaby ed9267
- Simplify judgment conditions
Jiri Slaby ed9267
Jiri Slaby ed9267
Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>
Jiri Slaby ed9267
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Jiri Slaby ed9267
Acked-by: Christian König <christian.koenig@amd.com>
Jiri Slaby ed9267
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Jiri Slaby ed9267
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Jiri Slaby ed9267
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Jiri Slaby ed9267
Cc: stable@vger.kernel.org
Jiri Slaby ed9267
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Jiri Slaby ed9267
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Jiri Slaby ed9267
---
Jiri Slaby ed9267
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c |  8 +++--
Jiri Slaby ed9267
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 46 -------------------------
Jiri Slaby ed9267
 2 files changed, 5 insertions(+), 49 deletions(-)
Jiri Slaby ed9267
Jiri Slaby ed9267
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
Jiri Slaby ed9267
index 35ed46b9..8a0a4464 100644
Jiri Slaby ed9267
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
Jiri Slaby ed9267
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
Jiri Slaby ed9267
@@ -686,9 +686,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
Jiri Slaby ed9267
 		if (r)
Jiri Slaby ed9267
 			return r;
Jiri Slaby ed9267
 
Jiri Slaby ed9267
-		r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
Jiri Slaby ed9267
-		if (r)
Jiri Slaby ed9267
-			goto late_fini;
Jiri Slaby ed9267
+		if (adev->gfx.cp_ecc_error_irq.funcs) {
Jiri Slaby ed9267
+			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
Jiri Slaby ed9267
+			if (r)
Jiri Slaby ed9267
+				goto late_fini;
Jiri Slaby ed9267
+		}
Jiri Slaby ed9267
 	} else {
Jiri Slaby ed9267
 		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
Jiri Slaby ed9267
 	}
Jiri Slaby ed9267
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
Jiri Slaby ed9267
index ecf8ceb5..7609d206 100644
Jiri Slaby ed9267
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
Jiri Slaby ed9267
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
Jiri Slaby ed9267
@@ -1313,13 +1313,6 @@ static int gfx_v11_0_sw_init(void *handle)
Jiri Slaby ed9267
 	if (r)
Jiri Slaby ed9267
 		return r;
Jiri Slaby ed9267
 
Jiri Slaby ed9267
-	/* ECC error */
Jiri Slaby ed9267
-	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
Jiri Slaby ed9267
-				  GFX_11_0_0__SRCID__CP_ECC_ERROR,
Jiri Slaby ed9267
-				  &adev->gfx.cp_ecc_error_irq);
Jiri Slaby ed9267
-	if (r)
Jiri Slaby ed9267
-		return r;
Jiri Slaby ed9267
-
Jiri Slaby ed9267
 	/* FED error */
Jiri Slaby ed9267
 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
Jiri Slaby ed9267
 				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
Jiri Slaby ed9267
@@ -4442,7 +4435,6 @@ static int gfx_v11_0_hw_fini(void *handle)
Jiri Slaby ed9267
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
Jiri Slaby ed9267
 	int r;
Jiri Slaby ed9267
 
Jiri Slaby ed9267
-	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
Jiri Slaby ed9267
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
Jiri Slaby ed9267
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
Jiri Slaby ed9267
 
Jiri Slaby ed9267
@@ -5882,36 +5874,6 @@ static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev
Jiri Slaby ed9267
 	}
Jiri Slaby ed9267
 }
Jiri Slaby ed9267
 
Jiri Slaby ed9267
-#define CP_ME1_PIPE_INST_ADDR_INTERVAL  0x1
Jiri Slaby ed9267
-#define SET_ECC_ME_PIPE_STATE(reg_addr, state) \
Jiri Slaby ed9267
-	do { \
Jiri Slaby ed9267
-		uint32_t tmp = RREG32_SOC15_IP(GC, reg_addr); \
Jiri Slaby ed9267
-		tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, state); \
Jiri Slaby ed9267
-		WREG32_SOC15_IP(GC, reg_addr, tmp); \
Jiri Slaby ed9267
-	} while (0)
Jiri Slaby ed9267
-
Jiri Slaby ed9267
-static int gfx_v11_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
Jiri Slaby ed9267
-							struct amdgpu_irq_src *source,
Jiri Slaby ed9267
-							unsigned type,
Jiri Slaby ed9267
-							enum amdgpu_interrupt_state state)
Jiri Slaby ed9267
-{
Jiri Slaby ed9267
-	uint32_t ecc_irq_state = 0;
Jiri Slaby ed9267
-	uint32_t pipe0_int_cntl_addr = 0;
Jiri Slaby ed9267
-	int i = 0;
Jiri Slaby ed9267
-
Jiri Slaby ed9267
-	ecc_irq_state = (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0;
Jiri Slaby ed9267
-
Jiri Slaby ed9267
-	pipe0_int_cntl_addr = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
Jiri Slaby ed9267
-
Jiri Slaby ed9267
-	WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, ecc_irq_state);
Jiri Slaby ed9267
-
Jiri Slaby ed9267
-	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++)
Jiri Slaby ed9267
-		SET_ECC_ME_PIPE_STATE(pipe0_int_cntl_addr + i * CP_ME1_PIPE_INST_ADDR_INTERVAL,
Jiri Slaby ed9267
-					ecc_irq_state);
Jiri Slaby ed9267
-
Jiri Slaby ed9267
-	return 0;
Jiri Slaby ed9267
-}
Jiri Slaby ed9267
-
Jiri Slaby ed9267
 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
Jiri Slaby ed9267
 					    struct amdgpu_irq_src *src,
Jiri Slaby ed9267
 					    unsigned type,
Jiri Slaby ed9267
@@ -6329,11 +6291,6 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
Jiri Slaby ed9267
 	.process = gfx_v11_0_priv_inst_irq,
Jiri Slaby ed9267
 };
Jiri Slaby ed9267
 
Jiri Slaby ed9267
-static const struct amdgpu_irq_src_funcs gfx_v11_0_cp_ecc_error_irq_funcs = {
Jiri Slaby ed9267
-	.set = gfx_v11_0_set_cp_ecc_error_state,
Jiri Slaby ed9267
-	.process = amdgpu_gfx_cp_ecc_error_irq,
Jiri Slaby ed9267
-};
Jiri Slaby ed9267
-
Jiri Slaby ed9267
 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
Jiri Slaby ed9267
 	.process = gfx_v11_0_rlc_gc_fed_irq,
Jiri Slaby ed9267
 };
Jiri Slaby ed9267
@@ -6349,9 +6306,6 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
Jiri Slaby ed9267
 	adev->gfx.priv_inst_irq.num_types = 1;
Jiri Slaby ed9267
 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
Jiri Slaby ed9267
 
Jiri Slaby ed9267
-	adev->gfx.cp_ecc_error_irq.num_types = 1; /* CP ECC error */
Jiri Slaby ed9267
-	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v11_0_cp_ecc_error_irq_funcs;
Jiri Slaby ed9267
-
Jiri Slaby ed9267
 	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
Jiri Slaby ed9267
 	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
Jiri Slaby ed9267
 
Jiri Slaby ed9267
-- 
Jiri Slaby ed9267
2.35.3
Jiri Slaby ed9267