Blob Blame History Raw
From fcae496153a3ce719082746a01c81dd581e7a239 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 8 May 2020 10:29:27 +0100
Subject: [PATCH] drm/i915: Prevent using semaphores to chain up to external fences
Git-commit: fcae496153a3ce719082746a01c81dd581e7a239
Patch-mainline: v5.8-rc1
References: bsc#1174737

The downside of using semaphores is that we lose metadata passing
along the signaling chain. This is particularly nasty when we
need to pass along a fatal error such as EFAULT or EDEADLK. For
fatal errors we want to scrub the request before it is executed,
which means that we cannot preload the request onto HW and have
it wait upon a semaphore.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200508092933.738-3-chris@chris-wilson.co.uk
Acked-by: Takashi Iwai <tiwai@suse.de>

---
 drivers/gpu/drm/i915/i915_request.c         |   26 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler_types.h |    1 +
 2 files changed, 27 insertions(+)

--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -922,6 +922,15 @@ emit_semaphore_wait(struct i915_request
 		    struct i915_request *from,
 		    gfp_t gfp)
 {
+	/*
+	 * If this or its dependents are waiting on an external fence
+	 * that may fail catastrophically, then we want to avoid using
+	 * sempahores as they bypass the fence signaling metadata, and we
+	 * lose the fence->error propagation.
+	 */
+	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
+		goto await_fence;
+
 	/* Just emit the first semaphore we see as request space is limited. */
 	if (already_busywaiting(to) & from->engine->mask)
 		goto await_fence;
@@ -988,12 +997,29 @@ i915_request_await_request(struct i915_r
 			return ret;
 	}
 
+	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
+		to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;
+
 	return 0;
 }
 
+static void mark_external(struct i915_request *rq)
+{
+	/*
+	 * The downside of using semaphores is that we lose metadata passing
+	 * along the signaling chain. This is particularly nasty when we
+	 * need to pass along a fatal error such as EFAULT or EDEADLK. For
+	 * fatal errors we want to scrub the request before it is executed,
+	 * which means that we cannot preload the request onto HW and have
+	 * it wait upon a semaphore.
+	 */
+	rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;
+}
+
 static int
 __i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
 {
+	mark_external(rq);
 	return i915_sw_fence_await_dma_fence(&rq->submit, fence,
 					     fence->context ? I915_FENCE_TIMEOUT : 0,
 					     I915_FENCE_GFP);
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -57,6 +57,7 @@ struct i915_sched_node {
 	struct i915_sched_attr attr;
 	unsigned int flags;
 #define I915_SCHED_HAS_SEMAPHORE_CHAIN	BIT(0)
+#define I915_SCHED_HAS_EXTERNAL_CHAIN	BIT(1)
 	intel_engine_mask_t semaphores;
 };