From: Monk Liu <Monk.Liu@amd.com>
Date: Mon, 25 Dec 2017 15:14:58 +0800
Subject: drm/amdgpu: stop all rings before doing gpu recover
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Git-commit: 711826656bebb09b814349fac21cb13f88f92665
Patch-mainline: v4.17-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166
found recover_vram_from_shadow sometimes get executed
in paralle with SDMA scheduler, should stop all
schedulers before doing gpu reset/recover
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 40 ++++++++++-------------------
1 file changed, 15 insertions(+), 25 deletions(-)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2651,22 +2651,23 @@ int amdgpu_device_gpu_recover(struct amd
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
/* store modesetting */
if (amdgpu_device_has_dc_support(adev))
state = drm_atomic_helper_suspend(adev->ddev);
- /* block scheduler */
+ /* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->sched.thread)
continue;
- /* only focus on the ring hit timeout if &job not NULL */
+ kthread_park(ring->sched.thread);
+
if (job && job->ring->idx != i)
continue;
- kthread_park(ring->sched.thread);
drm_sched_hw_job_reset(&ring->sched, &job->base);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
@@ -2709,33 +2710,22 @@ int amdgpu_device_gpu_recover(struct amd
}
dma_fence_put(fence);
}
+ }
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
- /* only focus on the ring hit timeout if &job not NULL */
- if (job && job->ring->idx != i)
- continue;
+ if (!ring || !ring->sched.thread)
+ continue;
+ /* only need recovery sched of the given job's ring
+ * or all rings (in the case @job is NULL)
+ * after above amdgpu_reset accomplished
+ */
+ if ((!job || job->ring->idx == i) && !r)
drm_sched_job_recovery(&ring->sched);
- kthread_unpark(ring->sched.thread);
- }
- } else {
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
-
- /* only focus on the ring hit timeout if &job not NULL */
- if (job && job->ring->idx != i)
- continue;
- kthread_unpark(adev->rings[i]->sched.thread);
- }
+ kthread_unpark(ring->sched.thread);
}
if (amdgpu_device_has_dc_support(adev)) {