Blob Blame History Raw
From c2bea967160104309185fbf5debe9123aaf7dbae Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime@cerno.tech>
Date: Fri, 10 Jun 2022 13:51:48 +0200
Subject: drm/vc4: crtc: Fix out of order frames during asynchronous page flips
Git-commit: d19e00ee06a9abf590b178c34cad637a516752f8
Patch-mainline: v5.19-rc4
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225

When doing an asynchronous page flip (PAGE_FLIP ioctl with the
DRM_MODE_PAGE_FLIP_ASYNC flag set), the current code waits for the
possible GPU buffer being rendered through a call to
vc4_queue_seqno_cb().

On the BCM2835-37, the GPU driver is part of the vc4 driver and that
function is defined in vc4_gem.c to wait for the buffer to be rendered,
and once it's done, call a callback.

However, on the BCM2711 used on the RaspberryPi4, the GPU driver is
separate (v3d) and that function won't do anything. This was working
because we were going into a path, due to uninitialized variables, that
was always scheduling the callback.

However, we were never actually waiting for the buffer to be rendered
which was resulting in frames being displayed out of order.

The generic API to signal those kind of completion in the kernel are the
DMA fences, and fortunately the v3d drivers supports them and signal
when its job is done. That API also provides an equivalent function that
allows to have a callback being executed when the fence is signalled as
done.

Let's change our driver a bit to rely on the previous function for the
older SoCs, and on DMA fences for the BCM2711.

Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Link: https://lore.kernel.org/r/20220610115149.964394-14-maxime@cerno.tech
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 50 +++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index a3c04d6cbd20..9355213dc883 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -776,6 +776,7 @@ struct vc4_async_flip_state {
 	struct drm_pending_vblank_event *event;
 
 	union {
+		struct dma_fence_cb fence;
 		struct vc4_seqno_cb seqno;
 	} cb;
 };
@@ -835,6 +836,50 @@ static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb)
 		vc4_bo_dec_usecnt(bo);
 }
 
+static void vc4_async_page_flip_fence_complete(struct dma_fence *fence,
+					       struct dma_fence_cb *cb)
+{
+	struct vc4_async_flip_state *flip_state =
+		container_of(cb, struct vc4_async_flip_state, cb.fence);
+
+	vc4_async_page_flip_complete(flip_state);
+	dma_fence_put(fence);
+}
+
+static int vc4_async_set_fence_cb(struct drm_device *dev,
+				  struct vc4_async_flip_state *flip_state)
+{
+	struct drm_framebuffer *fb = flip_state->fb;
+	struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct dma_fence *fence;
+	int ret;
+
+	if (!vc4->is_vc5) {
+		struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+		return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno,
+					  vc4_async_page_flip_seqno_complete);
+	}
+
+	ret = dma_resv_get_singleton(cma_bo->base.resv, DMA_RESV_USAGE_READ, &fence);
+	if (ret)
+		return ret;
+
+	/* If there's no fence, complete the page flip immediately */
+	if (!fence) {
+		vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+		return 0;
+	}
+
+	/* If the fence has already been completed, complete the page flip */
+	if (dma_fence_add_callback(fence, &flip_state->cb.fence,
+				   vc4_async_page_flip_fence_complete))
+		vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+
+	return 0;
+}
+
 static int
 vc4_async_page_flip_common(struct drm_crtc *crtc,
 			   struct drm_framebuffer *fb,
@@ -844,8 +889,6 @@ vc4_async_page_flip_common(struct drm_crtc *crtc,
 	struct drm_device *dev = crtc->dev;
 	struct drm_plane *plane = crtc->primary;
 	struct vc4_async_flip_state *flip_state;
-	struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
-	struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
 
 	flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
 	if (!flip_state)
@@ -876,8 +919,7 @@ vc4_async_page_flip_common(struct drm_crtc *crtc,
 	 */
 	drm_atomic_set_fb_for_plane(plane->state, fb);
 
-	vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno,
-			   vc4_async_page_flip_seqno_complete);
+	vc4_async_set_fence_cb(dev, flip_state);
 
 	/* Driver takes ownership of state on successful async commit. */
 	return 0;
-- 
2.38.1