Blob Blame History Raw
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Feb 2018 22:06:31 +0000
Subject: drm/i915: Track number of pending freed objects
Git-commit: c9c70471549a0f5956bb3da4fc2609cd567a809d
Patch-mainline: v4.17-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

During igt, we frequently call into the driver to reset both HW and
driver state (idling the device, waiting for it to become idle and
freeing off old objects) to ensure that we start each test/subtest/pass
from known state. This process incurs an RCU barrier or two to ensure
that any such pending frees are indeed flushed before we return.
However, unconditionally waiting on the RCU barrier adds needless delay
to many callers, which adds up to several seconds when repeated thousands
of times. We can skip the rcu_barrier() if by tracking how many outstanding
frees we have, we know there are none.

The same path is used along suspend, where we may be able to save the
unconditional RCU barrier.

To put it into perspective with a completely meaningless
microbenchmark, igt/gem_sync/idle is improved from 50ms to 30us on bdw.

v2: Remove the extra synchronize_rcu() inside i915_drop_caches_set()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180219220631.25001-1-chris@chris-wilson.co.uk

Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |    4 +---
 drivers/gpu/drm/i915/i915_drv.h     |    8 ++++++++
 drivers/gpu/drm/i915/i915_gem.c     |    7 ++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4078,10 +4078,8 @@ i915_drop_caches_set(void *data, u64 val
 	if (val & DROP_IDLE)
 		drain_delayed_work(&dev_priv->gt.idle_work);
 
-	if (val & DROP_FREED) {
-		synchronize_rcu();
+	if (val & DROP_FREED)
 		i915_gem_drain_freed_objects(dev_priv);
-	}
 
 	return ret;
 }
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1095,6 +1095,11 @@ struct i915_gem_mm {
 	struct llist_head free_list;
 	struct work_struct free_work;
 	spinlock_t free_lock;
+	/**
+	 * Count of objects pending destructions. Used to skip needlessly
+	 * waiting on an RCU barrier if no objects are waiting to be freed.
+	 */
+	atomic_t free_count;
 
 	/**
 	 * Small stash of WC pages
@@ -3134,6 +3139,9 @@ void i915_gem_free_object(struct drm_gem
 
 static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
+	if (!atomic_read(&i915->mm.free_count))
+		return;
+
 	/* A single pass should suffice to release all the freed objects (along
 	 * most call paths) , but be a little more paranoid in that freeing
 	 * the objects does take a little amount of time, during which the rcu
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4770,6 +4770,9 @@ static void __i915_gem_free_objects(stru
 		kfree(obj->bit_17);
 		i915_gem_object_free(obj);
 
+		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+		atomic_dec(&i915->mm.free_count);
+
 		if (on)
 			cond_resched();
 	}
@@ -4858,6 +4861,7 @@ void i915_gem_free_object(struct drm_gem
 	 * i915_gem_busy_ioctl(). For the corresponding synchronized
 	 * lookup see i915_gem_object_lookup_rcu().
 	 */
+	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
 	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
 }
 
@@ -5558,7 +5562,8 @@ err_out:
 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 {
 	i915_gem_drain_freed_objects(dev_priv);
-	WARN_ON(!llist_empty(&dev_priv->mm.free_list));
+	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
+	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.object_count);
 
 	mutex_lock(&dev_priv->drm.struct_mutex);