From: Rob Clark Date: Fri, 15 Sep 2017 11:04:44 -0400 Subject: drm/msm: dump submits which triggered gpu hang Git-commit: 96169f4e755a507a5c0b4fc12882d1d42abd5ab5 Patch-mainline: v4.15-rc1 References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166 Note we need to move update_fences() to after msm_rd_dump_submit(), otherwise the bo's referenced by the submit may no longer be valid. Signed-off-by: Rob Clark Acked-by: Petr Tesarik --- drivers/gpu/drm/msm/msm_gpu.c | 47 ++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 20 deletions(-) --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -257,34 +257,16 @@ static void recover_worker(struct work_s { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; struct msm_gem_submit *submit; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); - uint64_t fence; int i; - /* Update all the rings with the latest and greatest fence */ - for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { - struct msm_ringbuffer *ring = gpu->rb[i]; - - fence = ring->memptrs->fence; - - /* - * For the current (faulting?) ring/submit advance the fence by - * one more to clear the faulting submit - */ - if (ring == cur_ring) - fence = fence + 1; - - update_fences(gpu, ring, fence); - } - mutex_lock(&dev->struct_mutex); - dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - fence = cur_ring->memptrs->fence + 1; - submit = find_submit(cur_ring, fence); + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); if (submit) { struct task_struct *task; @@ -309,9 +291,34 @@ static void recover_worker(struct work_s dev_err(dev->dev, "%s: offending task: %s (%s)\n", gpu->name, task->comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", task->comm, cmd); + } else { + msm_rd_dump_submit(priv->hangrd, submit, NULL); } rcu_read_unlock(); + } + + + /* + * Update all the rings with the latest and greatest fence.. this + * needs to happen after msm_rd_dump_submit() to ensure that the + * bo's referenced by the offending submit are still around. + */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + uint32_t fence = ring->memptrs->fence; + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence++; + + update_fences(gpu, ring, fence); } if (msm_gpu_active(gpu)) {