From ed9267b78b2f291d0cdfd05e1b49ba75e9370827 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: May 17 2023 19:15:55 +0000 Subject: drm/amdgpu: drop gfx_v11_0_cp_ecc_error_irq_funcs (bsc#1012628). --- diff --git a/patches.kernel.org/6.3.3-205-drm-amdgpu-drop-gfx_v11_0_cp_ecc_error_irq_func.patch b/patches.kernel.org/6.3.3-205-drm-amdgpu-drop-gfx_v11_0_cp_ecc_error_irq_func.patch new file mode 100644 index 0000000..6a87401 --- /dev/null +++ b/patches.kernel.org/6.3.3-205-drm-amdgpu-drop-gfx_v11_0_cp_ecc_error_irq_func.patch @@ -0,0 +1,170 @@ +From: Horatio Zhang +Date: Thu, 4 May 2023 01:46:12 -0400 +Subject: [PATCH] drm/amdgpu: drop gfx_v11_0_cp_ecc_error_irq_funcs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +References: bsc#1012628 +Patch-mainline: 6.3.3 +Git-commit: 720b47229a5b24061d1c2e29ddb6043a59178d79 + +commit 720b47229a5b24061d1c2e29ddb6043a59178d79 upstream. + +The gfx.cp_ecc_error_irq is retired in gfx11. In gfx_v11_0_hw_fini still +use amdgpu_irq_put to disable this interrupt, which caused the call trace +in this function. + +[ 102.873958] Call Trace: +[ 102.873959] +[ 102.873961] gfx_v11_0_hw_fini+0x23/0x1e0 [amdgpu] +[ 102.874019] gfx_v11_0_suspend+0xe/0x20 [amdgpu] +[ 102.874072] amdgpu_device_ip_suspend_phase2+0x240/0x460 [amdgpu] +[ 102.874122] amdgpu_device_ip_suspend+0x3d/0x80 [amdgpu] +[ 102.874172] amdgpu_device_pre_asic_reset+0xd9/0x490 [amdgpu] +[ 102.874223] amdgpu_device_gpu_recover.cold+0x548/0xce6 [amdgpu] +[ 102.874321] amdgpu_debugfs_reset_work+0x4c/0x70 [amdgpu] +[ 102.874375] process_one_work+0x21f/0x3f0 +[ 102.874377] worker_thread+0x200/0x3e0 +[ 102.874378] ? process_one_work+0x3f0/0x3f0 +[ 102.874379] kthread+0xfd/0x130 +[ 102.874380] ? kthread_complete_and_exit+0x20/0x20 +[ 102.874381] ret_from_fork+0x22/0x30 + +v2: +- Handle umc and gfx ras cases in separated patch +- Retired the gfx_v11_0_cp_ecc_error_irq_funcs in gfx11 + +v3: +- Improve the subject and code comments +- Add judgment on gfx11 in the function of amdgpu_gfx_ras_late_init + +v4: +- Drop the define of CP_ME1_PIPE_INST_ADDR_INTERVAL and +SET_ECC_ME_PIPE_STATE which using in gfx_v11_0_set_cp_ecc_error_state +- Check cp_ecc_error_irq.funcs rather than ip version for a more +sustainable life + +v5: +- Simplify judgment conditions + +Signed-off-by: Horatio Zhang +Reviewed-by: Hawking Zhang +Acked-by: Christian König +Reviewed-by: Guchun Chen +Reviewed-by: Feifei Xu +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Jiri Slaby +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +++-- + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 46 ------------------------- + 2 files changed, 5 insertions(+), 49 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +index 35ed46b9..8a0a4464 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +@@ -686,9 +686,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r + if (r) + return r; + +- r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); +- if (r) +- goto late_fini; ++ if (adev->gfx.cp_ecc_error_irq.funcs) { ++ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); ++ if (r) ++ goto late_fini; ++ } + } else { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +index ecf8ceb5..7609d206 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +@@ -1313,13 +1313,6 @@ static int gfx_v11_0_sw_init(void *handle) + if (r) + return r; + +- /* ECC error */ +- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, +- GFX_11_0_0__SRCID__CP_ECC_ERROR, +- &adev->gfx.cp_ecc_error_irq); +- if (r) +- return r; +- + /* FED error */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, +@@ -4442,7 +4435,6 @@ static int gfx_v11_0_hw_fini(void *handle) + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + +- amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + +@@ -5882,36 +5874,6 @@ static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev + } + } + +-#define CP_ME1_PIPE_INST_ADDR_INTERVAL 0x1 +-#define SET_ECC_ME_PIPE_STATE(reg_addr, state) \ +- do { \ +- uint32_t tmp = RREG32_SOC15_IP(GC, reg_addr); \ +- tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, state); \ +- WREG32_SOC15_IP(GC, reg_addr, tmp); \ +- } while (0) +- +-static int gfx_v11_0_set_cp_ecc_error_state(struct amdgpu_device *adev, +- struct amdgpu_irq_src *source, +- unsigned type, +- enum amdgpu_interrupt_state state) +-{ +- uint32_t ecc_irq_state = 0; +- uint32_t pipe0_int_cntl_addr = 0; +- int i = 0; +- +- ecc_irq_state = (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0; +- +- pipe0_int_cntl_addr = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); +- +- WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, ecc_irq_state); +- +- for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) +- SET_ECC_ME_PIPE_STATE(pipe0_int_cntl_addr + i * CP_ME1_PIPE_INST_ADDR_INTERVAL, +- ecc_irq_state); +- +- return 0; +-} +- + static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, +@@ -6329,11 +6291,6 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { + .process = gfx_v11_0_priv_inst_irq, + }; + +-static const struct amdgpu_irq_src_funcs gfx_v11_0_cp_ecc_error_irq_funcs = { +- .set = gfx_v11_0_set_cp_ecc_error_state, +- .process = amdgpu_gfx_cp_ecc_error_irq, +-}; +- + static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { + .process = gfx_v11_0_rlc_gc_fed_irq, + }; +@@ -6349,9 +6306,6 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; + +- adev->gfx.cp_ecc_error_irq.num_types = 1; /* CP ECC error */ +- adev->gfx.cp_ecc_error_irq.funcs = &gfx_v11_0_cp_ecc_error_irq_funcs; +- + adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ + adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; + +-- +2.35.3 + diff --git a/series.conf b/series.conf index 4d1f756..95bf853 100644 --- a/series.conf +++ b/series.conf @@ -938,6 +938,7 @@ patches.kernel.org/6.3.3-202-drm-amdgpu-Fix-vram-recover-doesn-t-work-after-.patch patches.kernel.org/6.3.3-203-drm-amd-display-Enforce-60us-prefetch-for-200Mh.patch patches.kernel.org/6.3.3-204-drm-amd-pm-parse-pp_handle-under-appropriate-co.patch + patches.kernel.org/6.3.3-205-drm-amdgpu-drop-gfx_v11_0_cp_ecc_error_irq_func.patch ######################################################## # Build fixes that apply to the vanilla kernel too.