Blob Blame History Raw
From 7fc92e96c3eed6004ce8dab5e315264bff85db5a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 16 Jun 2017 11:54:55 +0100
Subject: [PATCH] drm/i915: Store i915_gem_object_is_coherent() as a bit next to cache-dirty
Git-commit: 7fc92e96c3eed6004ce8dab5e315264bff85db5a
Patch-mainline: v4.13-rc1
References: FATE#322643 bsc#1055900

For ease of use (i.e. avoiding a few checks and function calls), store
the object's cache coherency next to the cache is dirty bit.

Specifically this patch aims to reduce the frequency of no-op calls to
i915_gem_object_clflush() to counter-act the increase of such calls for
GPU only objects in the previous patch.

V2: Replace cache_dirty & ~cache_coherent with cache_dirty &&
!cache_coherent as gcc generates much better code for the latter
(Tvrtko)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Tested-by: Dongwon Kim <dongwon.kim@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170616105455.16977-1-chris@chris-wilson.co.uk
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Acked-by: Takashi Iwai <tiwai@suse.de>

---
 drivers/gpu/drm/i915/i915_gem.c                  |   14 +++++++-------
 drivers/gpu/drm/i915/i915_gem_clflush.c          |    2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c       |    2 +-
 drivers/gpu/drm/i915/i915_gem_internal.c         |    3 ++-
 drivers/gpu/drm/i915/i915_gem_object.h           |    1 +
 drivers/gpu/drm/i915/i915_gem_stolen.c           |    1 +
 drivers/gpu/drm/i915/i915_gem_userptr.c          |    3 ++-
 drivers/gpu/drm/i915/selftests/huge_gem_object.c |    3 ++-
 8 files changed, 17 insertions(+), 12 deletions(-)

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(stru
 	if (obj->cache_dirty)
 		return false;
 
-	if (!i915_gem_object_is_coherent(obj))
+	if (!obj->cache_coherent)
 		return true;
 
 	return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct d
 
 	if (needs_clflush &&
 	    (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-	    !i915_gem_object_is_coherent(obj))
+	    !obj->cache_coherent)
 		drm_clflush_sg(pages);
 
 	__start_cpu_write(obj);
@@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(stru
 	if (ret)
 		return ret;
 
-	if (i915_gem_object_is_coherent(obj) ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+	if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
 		if (ret)
 			goto err_unpin;
@@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(str
 	if (ret)
 		return ret;
 
-	if (i915_gem_object_is_coherent(obj) ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+	if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
 		if (ret)
 			goto err_unpin;
@@ -3684,6 +3682,7 @@ restart:
 	list_for_each_entry(vma, &obj->vma_list, obj_link)
 		vma->node.color = cache_level;
 	obj->cache_level = cache_level;
+	obj->cache_coherent = i915_gem_object_is_coherent(obj);
 	obj->cache_dirty = true; /* Always invalidate stale cachelines */
 
 	return 0;
@@ -4344,7 +4343,8 @@ i915_gem_object_create(struct drm_i915_p
 	} else
 		obj->cache_level = I915_CACHE_NONE;
 
-	obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+	obj->cache_coherent = i915_gem_object_is_coherent(obj);
+	obj->cache_dirty = !obj->cache_coherent;
 
 	trace_i915_gem_object_create(obj);
 
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -139,7 +139,7 @@ void i915_gem_clflush_object(struct drm_
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
 	 */
-	if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
+	if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
 		return;
 
 	trace_i915_gem_object_clflush(obj);
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1110,7 +1110,7 @@ eb_move_to_gpu(struct i915_execbuffer *e
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
-		if (obj->cache_dirty)
+		if (unlikely(obj->cache_dirty && !obj->cache_coherent))
 			i915_gem_clflush_object(obj, 0);
 
 		ret = i915_gem_request_await_object
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct d
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
-	obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+	obj->cache_coherent = i915_gem_object_is_coherent(obj);
+	obj->cache_dirty = !obj->cache_coherent;
 
 	return obj;
 }
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -121,6 +121,7 @@ struct drm_i915_gem_object {
 	unsigned long gt_ro:1;
 	unsigned int cache_level:3;
 	unsigned int cache_dirty:1;
+	unsigned int cache_coherent:1;
 
 	atomic_t frontbuffer_bits;
 	unsigned int frontbuffer_ggtt_origin; /* write once */
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -590,6 +590,7 @@ _i915_gem_object_create_stolen(struct dr
 	obj->stolen = stolen;
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
 	obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
 
 	if (i915_gem_object_pin_pages(obj))
 		goto cleanup;
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -805,7 +805,8 @@ i915_gem_userptr_ioctl(struct drm_device
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->cache_level = I915_CACHE_LLC;
-	obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+	obj->cache_coherent = i915_gem_object_is_coherent(obj);
+	obj->cache_dirty = !obj->cache_coherent;
 
 	obj->userptr.ptr = args->user_ptr;
 	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -129,7 +129,8 @@ huge_gem_object(struct drm_i915_private
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
-	obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+	obj->cache_coherent = i915_gem_object_is_coherent(obj);
+	obj->cache_dirty = !obj->cache_coherent;
 	obj->scratch = phys_size;
 
 	return obj;