Blob Blame History Raw
From 2fc7a06ad518610d197bafe7d2d3e8a8f9bb181e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 7 Dec 2017 22:24:34 +0000
Subject: [PATCH] drm/i915/execlists: Cache ELSP register offset
Git-commit: 2fc7a06ad518610d197bafe7d2d3e8a8f9bb181e
Patch-mainline: v4.16-rc1
References: FATE#322643 bsc#1055900

Currently on every submission, we recalculate the ELSP register offset
for the engine, after chasing the pointers to find the iomem base. Since
this is fixed for the lifetime of the driver, record the offset in the
execlists struct.

In practice the difference is negligible, it just happens to remove 27
bytes of eyesore pointer dancing from next to the hottest instruction
(which is itself due to stalling for a cache miss) in perf profiles of
the execlists_submission_tasklet().

V2: Trim off one more elsp local.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171207222434.17686-1-chris@chris-wilson.co.uk
Acked-by: Takashi Iwai <tiwai@suse.de>

---
 drivers/gpu/drm/i915/intel_lrc.c        |   13 ++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    5 +++++
 2 files changed, 11 insertions(+), 7 deletions(-)

--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -431,8 +431,6 @@ static inline void elsp_write(u64 desc,
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlists.port;
-	u32 __iomem *elsp =
-		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
 	for (n = execlists_num_ports(&engine->execlists); n--; ) {
@@ -458,7 +456,7 @@ static void execlists_submit_ports(struc
 			desc = 0;
 		}
 
-		elsp_write(desc, elsp);
+		elsp_write(desc, engine->execlists.elsp);
 	}
 	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 }
@@ -496,8 +494,6 @@ static void inject_preempt_context(struc
 {
 	struct intel_context *ce =
 		&engine->i915->preempt_context->engine[engine->id];
-	u32 __iomem *elsp =
-		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
 	GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
@@ -510,9 +506,9 @@ static void inject_preempt_context(struc
 
 	GEM_TRACE("\n");
 	for (n = execlists_num_ports(&engine->execlists); --n; )
-		elsp_write(0, elsp);
+		elsp_write(0, engine->execlists.elsp);
 
-	elsp_write(ce->lrc_desc, elsp);
+	elsp_write(ce->lrc_desc, engine->execlists.elsp);
 	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
@@ -1512,6 +1508,9 @@ static int gen8_init_common_ring(struct
 	execlists->csb_head = -1;
 	execlists->active = 0;
 
+	execlists->elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+
 	/* After a GPU reset, we may have requests to replay */
 	if (execlists->first)
 		tasklet_schedule(&execlists->tasklet);
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -199,6 +199,11 @@ struct intel_engine_execlists {
 	bool no_priolist;
 
 	/**
+	 * @elsp: the ExecList Submission Port register
+	 */
+	u32 __iomem *elsp;
+
+	/**
 	 * @port: execlist port states
 	 *
 	 * For each hardware ELSP (ExecList Submission Port) we keep