Blob Blame History Raw
From 24237b4cf0301ee7157482d84e18b1bf78c41b38 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime@cerno.tech>
Date: Mon, 25 Oct 2021 16:11:06 +0200
Subject: drm/vc4: Fix non-blocking commit getting stuck forever
Git-commit: 0c250c150c74a90db298bf2a8bcd0a1dabed2e2f
Patch-mainline: v5.17-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225

In some situation, we can end up being stuck on a non-blocking that went
through properly.

The situation that seems to trigger it reliably is to first start a
non-blocking commit, and then right after, and before we had any vblank
interrupt), start a blocking commit.

This will lead to the first commit workqueue to be scheduled, setup the
display, while the second commit is waiting for the first one to be
completed.

The vblank interrupt will then be raised, vc4_crtc_handle_vblank() will
run and will compare the active dlist in the HVS channel to the one
associated with the crtc->state.

However, at that point, the second commit is waiting using
drm_atomic_helper_wait_for_dependencies that occurs after
drm_atomic_helper_swap_state has been called, so crtc->state points to
the second commit state. vc4_crtc_handle_vblank() will compare the two
dlist addresses and since they don't match will ignore the interrupt.

The vblank event will never be reported, and the first and second commit
will wait for the first commit completion until they timeout.

The underlying reason is that it was never safe to do so. Indeed,
accessing the ->state pointer access synchronization is based on
ownership guarantees that can only occur within the functions and hooks
defined as part of the KMS framework, and obviously the irq handler
isn't one of them. The rework to move to generic helpers only uncovered
the underlying issue.

However, since the code path between
drm_atomic_helper_wait_for_dependencies() and
drm_atomic_helper_wait_for_vblanks() is serialised and we can't get two
commits in that path at the same time, we can work around this issue by
setting a variable associated to struct drm_crtc to the dlist we expect,
and then using it from the vc4_crtc_handle_vblank() function.

Since that state is shared with the modesetting path, we also need to
introduce a spinlock to protect the code shared between the interrupt
handler and the modesetting path, protecting only our new variable for
now.

Link: https://lore.kernel.org/all/YWgteNaNeaS9uWDe@phenom.ffwll.local/
Link: https://lore.kernel.org/r/20211025141113.702757-3-maxime@cerno.tech
Fixes: 56d1fe0979dc ("drm/vc4: Make pageflip completion handling more robust.")
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/vc4/vc4_crtc.c |  5 ++++-
 drivers/gpu/drm/vc4/vc4_drv.h  | 14 ++++++++++++++
 drivers/gpu/drm/vc4/vc4_hvs.c  |  7 +++++--
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 7d21ea150b98..63143a1333fd 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -721,8 +721,9 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
+	spin_lock(&vc4_crtc->irq_lock);
 	if (vc4_crtc->event &&
-	    (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)) ||
+	    (vc4_crtc->current_dlist == HVS_READ(SCALER_DISPLACTX(chan)) ||
 	     vc4_crtc->feeds_txp)) {
 		drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
 		vc4_crtc->event = NULL;
@@ -736,6 +737,7 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
 		 */
 		vc4_hvs_unmask_underrun(dev, chan);
 	}
+	spin_unlock(&vc4_crtc->irq_lock);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
@@ -1135,6 +1137,7 @@ int vc4_crtc_init(struct drm_device *drm, struct vc4_crtc *vc4_crtc,
 		return PTR_ERR(primary_plane);
 	}
 
+	spin_lock_init(&vc4_crtc->irq_lock);
 	drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
 				  crtc_funcs, NULL);
 	drm_crtc_helper_add(crtc, crtc_helper_funcs);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 3dc593a48d09..e979001c9233 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -497,6 +497,20 @@ struct vc4_crtc {
 	 * @feeds_txp: True if the CRTC feeds our writeback controller.
 	 */
 	bool feeds_txp;
+
+	/**
+	 * @irq_lock: Spinlock protecting the resources shared between
+	 * the atomic code and our vblank handler.
+	 */
+	spinlock_t irq_lock;
+
+	/**
+	 * @current_dlist: Start offset of the display list currently
+	 * set in the HVS for that CRTC. Protected by @irq_lock, and
+	 * copied in vc4_hvs_update_dlist() for the CRTC interrupt
+	 * handler to have access to that value.
+	 */
+	unsigned int current_dlist;
 };
 
 static inline struct vc4_crtc *
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 9ddaee6b368d..f8ed0f6a57e0 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -365,10 +365,9 @@ static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+	unsigned long flags;
 
 	if (crtc->state->event) {
-		unsigned long flags;
-
 		crtc->state->event->pipe = drm_crtc_index(crtc);
 
 		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
@@ -388,6 +387,10 @@ static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
 		HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
 			  vc4_state->mm.start);
 	}
+
+	spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
+	vc4_crtc->current_dlist = vc4_state->mm.start;
+	spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
 }
 
 void vc4_hvs_atomic_enable(struct drm_crtc *crtc,
-- 
2.38.1