diff --git a/patches.kernel.org/6.2.12-097-drm-amdgpu-Force-signal-hw_fences-that-are-emb.patch b/patches.kernel.org/6.2.12-097-drm-amdgpu-Force-signal-hw_fences-that-are-emb.patch new file mode 100644 index 0000000..4d04925 --- /dev/null +++ b/patches.kernel.org/6.2.12-097-drm-amdgpu-Force-signal-hw_fences-that-are-emb.patch @@ -0,0 +1,56 @@ +From: YuBiao Wang +Date: Thu, 16 Mar 2023 11:30:32 +0800 +Subject: [PATCH] drm/amdgpu: Force signal hw_fences that are embedded in + non-sched jobs +References: bsc#1012628 +Patch-mainline: 6.2.12 +Git-commit: 033c56474acf567a450f8bafca50e0b610f2b716 + +[ Upstream commit 033c56474acf567a450f8bafca50e0b610f2b716 ] + +[Why] +For engines not supporting soft reset, i.e. VCN, there will be a failed +ib test before mode 1 reset during asic reset. The fences in this case +are never signaled and next time when we try to free the sa_bo, kernel +will hang. + +[How] +During pre_asic_reset, driver will clear job fences and afterwards the +fences' refcount will be reduced to 1. For drm_sched_jobs it will be +released in job_free_cb, and for non-sched jobs like ib_test, it's meant +to be released in sa_bo_free but only when the fences are signaled. So +we have to force signal the non_sched bad job's fence during +pre_asic_reset or the clear is not complete. + +Signed-off-by: YuBiao Wang +Acked-by: Luben Tuikov +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +Signed-off-by: Jiri Slaby +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +index faff4a3f..f52d0ba9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -678,6 +678,15 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) + ptr = &ring->fence_drv.fences[i]; + old = rcu_dereference_protected(*ptr, 1); + if (old && old->ops == &amdgpu_job_fence_ops) { ++ struct amdgpu_job *job; ++ ++ /* For non-scheduler bad job, i.e. failed ib test, we need to signal ++ * it right here or we won't be able to track them in fence_drv ++ * and they will remain unsignaled during sa_bo free. ++ */ ++ job = container_of(old, struct amdgpu_job, hw_fence); ++ if (!job->base.s_fence && !dma_fence_is_signaled(old)) ++ dma_fence_signal(old); + RCU_INIT_POINTER(*ptr, NULL); + dma_fence_put(old); + } +-- +2.35.3 + diff --git a/series.conf b/series.conf index e00f8c4..2a5690d 100644 --- a/series.conf +++ b/series.conf @@ -2324,6 +2324,7 @@ patches.kernel.org/6.2.12-094-wifi-iwlwifi-mvm-fix-mvmtxq-stopped-handling.patch patches.kernel.org/6.2.12-095-wifi-iwlwifi-mvm-protect-TXQ-list-manipulation.patch patches.kernel.org/6.2.12-096-drm-amdgpu-add-mes-resume-when-do-gfx-post-sof.patch + patches.kernel.org/6.2.12-097-drm-amdgpu-Force-signal-hw_fences-that-are-emb.patch ######################################################## # Build fixes that apply to the vanilla kernel too.