Blob Blame History Raw
From c77c6a68c6bf0fc42f4c5d2132f99e5801a875e4 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Thu, 10 Mar 2022 15:46:05 -0800
Subject: drm/msm/gpu: Park scheduler threads for system suspend
Git-commit: 7e4167c9e021afb01fb69abae8642d781c8907b6
Patch-mainline: v5.18-rc3
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225

In the system suspend path, we don't want to be racing with the
scheduler kthreads pushing additional queued up jobs to the hw
queue (ringbuffer).  So park them first.  While we are at it,
move the wait for active jobs to complete into the new system-
suspend path.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Link: https://lore.kernel.org/r/20220310234611.424743-3-robdclark@gmail.com
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/msm/adreno/adreno_device.c | 68 ++++++++++++++++++++--
 1 file changed, 64 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 8859834b51b8..0440a98988fc 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -619,22 +619,82 @@ static int active_submits(struct msm_gpu *gpu)
 static int adreno_runtime_suspend(struct device *dev)
 {
 	struct msm_gpu *gpu = dev_to_gpu(dev);
-	int remaining;
+
+	/*
+	 * We should be holding a runpm ref, which will prevent
+	 * runtime suspend.  In the system suspend path, we've
+	 * already waited for active jobs to complete.
+	 */
+	WARN_ON_ONCE(gpu->active_submits);
+
+	return gpu->funcs->pm_suspend(gpu);
+}
+
+static void suspend_scheduler(struct msm_gpu *gpu)
+{
+	int i;
+
+	/*
+	 * Shut down the scheduler before we force suspend, so that
+	 * suspend isn't racing with scheduler kthread feeding us
+	 * more work.
+	 *
+	 * Note, we just want to park the thread, and let any jobs
+	 * that are already on the hw queue complete normally, as
+	 * opposed to the drm_sched_stop() path used for handling
+	 * faulting/timed-out jobs.  We can't really cancel any jobs
+	 * already on the hw queue without racing with the GPU.
+	 */
+	for (i = 0; i < gpu->nr_rings; i++) {
+		struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
+		kthread_park(sched->thread);
+	}
+}
+
+static void resume_scheduler(struct msm_gpu *gpu)
+{
+	int i;
+
+	for (i = 0; i < gpu->nr_rings; i++) {
+		struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
+		kthread_unpark(sched->thread);
+	}
+}
+
+static int adreno_system_suspend(struct device *dev)
+{
+	struct msm_gpu *gpu = dev_to_gpu(dev);
+	int remaining, ret;
+
+	suspend_scheduler(gpu);
 
 	remaining = wait_event_timeout(gpu->retire_event,
 				       active_submits(gpu) == 0,
 				       msecs_to_jiffies(1000));
 	if (remaining == 0) {
 		dev_err(dev, "Timeout waiting for GPU to suspend\n");
-		return -EBUSY;
+		ret = -EBUSY;
+		goto out;
 	}
 
-	return gpu->funcs->pm_suspend(gpu);
+	ret = pm_runtime_force_suspend(dev);
+out:
+	if (ret)
+		resume_scheduler(gpu);
+
+	return ret;
 }
+
+static int adreno_system_resume(struct device *dev)
+{
+	resume_scheduler(dev_to_gpu(dev));
+	return pm_runtime_force_resume(dev);
+}
+
 #endif
 
 static const struct dev_pm_ops adreno_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(adreno_system_suspend, adreno_system_resume)
 	SET_RUNTIME_PM_OPS(adreno_runtime_suspend, adreno_runtime_resume, NULL)
 };
 
-- 
2.38.1