Blob Blame History Raw
From 750bde2fd4ffacc297473c36d6fdb29c395b06aa Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 21 Nov 2019 07:10:41 +0000
Subject: drm/i915: Serialise with remote retirement
Git-commit: 750bde2fd4ffacc297473c36d6fdb29c395b06aa
Patch-mainline: v5.5-rc2
References: bsc#1152489

Since retirement may be running in a worker on another CPU, it may be
skipped in the local intel_gt_wait_for_idle(). To ensure the state is
consistent for our sanity checks upon load, serialise with the remote
retirer by waiting on the timeline->mutex.

Outside of this use case, e.g. on suspend or module unload, we expect the
slack to be picked up by intel_gt_pm_wait_for_idle() and so prefer to
put the special case serialisation with retirement in its single user,
for now at least.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191121071044.97798-2-chris@chris-wilson.co.uk
(cherry picked from commit 2d0fb251360ab7eccbffd99f6933a2a4de678d52)
Fixes: 093b92287363 ("drm/i915: Split i915_active.mutex into an irq-safe spinlock for the rbtree")
Closes: https://gitlab.freedesktop.org/drm/intel/issues/754
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/i915/i915_gem.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b9eb6b3149b7..d034fa413164 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -45,6 +45,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -1053,6 +1054,18 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	return err;
 }
 
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
+
+	intel_context_timeline_unlock(tl);
+	return 0;
+}
+
 static int __intel_engines_record_defaults(struct intel_gt *gt)
 {
 	struct i915_request *requests[I915_NUM_ENGINES] = {};
@@ -1121,13 +1134,20 @@ static int __intel_engines_record_defaults(struct intel_gt *gt)
 		if (!rq)
 			continue;
 
-		/* We want to be able to unbind the state from the GGTT */
-		GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
-
+		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT,
+				     &rq->hw_context->flags));
 		state = rq->hw_context->state;
 		if (!state)
 			continue;
 
+		/* Serialise with retirement on another CPU */
+		err = __intel_context_flush_retire(rq->hw_context);
+		if (err)
+			goto out;
+
+		/* We want to be able to unbind the state from the GGTT */
+		GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
+
 		/*
 		 * As we will hold a reference to the logical state, it will
 		 * not be torn down with the context, and importantly the
-- 
2.28.0