Blob Blame History Raw
From 0b1570b7ffe68dfefa07cb092a0723f898bb8184 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 27 Feb 2020 08:57:14 +0000
Subject: [PATCH] drm/i915: Protect i915_request_await_start from early waits
Git-commit: 0b1570b7ffe68dfefa07cb092a0723f898bb8184
Patch-mainline: v5.6-rc5
No-fix: d22d2d073ef859b346bc32cb25299262e3973769
References: bsc#1174737

We need to be extremely careful inside i915_request_await_start() as it
needs to walk the list of requests in the foreign timeline with very
little protection. As we hold our own timeline mutex, we can not nest
inside the signaler's timeline mutex, so all that remains is our RCU
protection. However, to be safe we need to tell the compiler that we may
be traversing the list only under RCU protection, and furthermore we
need to start declaring requests as elements of the timeline from their
construction.

Fixes: 9ddc8ec027a3 ("drm/i915: Eliminate the trylock for awaiting an earlier re
quest")
Fixes: 6a79d848403d ("drm/i915: Lock signaler timeline while navigating")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200227085723.1961649-11-ch
ris@chris-wilson.co.uk
(cherry picked from commit d22d2d073ef859b346bc32cb25299262e3973769)

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Takashi Iwai <tiwai@suse.de>

---
 drivers/gpu/drm/i915/i915_request.c |   41 ++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 13 deletions(-)

--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -298,7 +298,7 @@ static bool i915_request_retire(struct i
 	spin_unlock_irq(&rq->lock);
 
 	remove_from_client(rq);
-	list_del(&rq->link);
+	list_del_rcu(&rq->link);
 
 	intel_context_exit(rq->hw_context);
 	intel_context_unpin(rq->hw_context);
@@ -734,6 +734,8 @@ __i915_request_create(struct intel_conte
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
 	intel_context_mark_active(ce);
+	list_add_tail_rcu(&rq->link, &tl->requests);
+
 	return rq;
 
 err_unwind:
@@ -791,13 +793,23 @@ i915_request_await_start(struct i915_req
 	GEM_BUG_ON(i915_request_timeline(rq) ==
 		   rcu_access_pointer(signal->timeline));
 
+	if (i915_request_started(signal))
+		return 0;
+
 	fence = NULL;
 	rcu_read_lock();
 	spin_lock_irq(&signal->lock);
-	if (!i915_request_started(signal) &&
-	    !list_is_first(&signal->link,
-			   &rcu_dereference(signal->timeline)->requests)) {
-		struct i915_request *prev = list_prev_entry(signal, link);
+	do {
+		struct list_head *pos = READ_ONCE(signal->link.prev);
+		struct i915_request *prev;
+
+		/* Confirm signal has not been retired, the link is valid */
+		if (unlikely(i915_request_started(signal)))
+			break;
+
+		/* Is signal the earliest request on its timeline? */
+		if (pos == &rcu_dereference(signal->timeline)->requests)
+			break;
 
 		/*
 		 * Peek at the request before us in the timeline. That
@@ -805,13 +817,18 @@ i915_request_await_start(struct i915_req
 		 * after acquiring a reference to it, confirm that it is
 		 * still part of the signaler's timeline.
 		 */
-		if (i915_request_get_rcu(prev)) {
-			if (list_next_entry(prev, link) == signal)
-				fence = &prev->fence;
-			else
-				i915_request_put(prev);
+		prev = list_entry(pos, typeof(*prev), link);
+		if (!i915_request_get_rcu(prev))
+			break;
+
+		/* After the strong barrier, confirm prev is still attached */
+		if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
+			i915_request_put(prev);
+			break;
 		}
-	}
+
+		fence = &prev->fence;
+	} while (0);
 	spin_unlock_irq(&signal->lock);
 	rcu_read_unlock();
 	if (!fence)
@@ -1244,8 +1261,6 @@ __i915_request_add_to_timeline(struct i9
 							 0);
 	}
 
-	list_add_tail(&rq->link, &timeline->requests);
-
 	/*
 	 * Make sure that no request gazumped us - if it was allocated after
 	 * our i915_request_alloc() and called __i915_request_add() before