Blob Blame History Raw
From 767a983ab25564e8fcc84fa203d1c51d2e50c6ef Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 13 Sep 2017 09:56:05 +0100
Subject: [PATCH] drm/i915/execlists: Read the context-status HEAD from the HWSP
Git-commit: 767a983ab25564e8fcc84fa203d1c51d2e50c6ef
Patch-mainline: v4.15-rc1
References: FATE#322643 bsc#1055900

The engine also provides a mirror of the CSB write pointer in the HWSP,
but not of our read pointer. To take advantage of this we need to
remember where we read up to on the last interrupt and continue off from
there. This poses a problem following a reset, as we don't know where
the hw will start writing from, and due to the use of power contexts we
cannot perform that query during the reset itself. So we continue the
current modus operandi of delaying the first read of the context-status
read/write pointers until after the first interrupt. With this we should
now have eliminated all uncached mmio reads in handling the
context-status interrupt, though we still have the uncached mmio writes
for submitting new work, and many uncached mmio reads in the global
interrupt handler itself. Still a step in the right direction towards
reducing our resubmit latency, although it appears lost in the noise!

V2: Cannonlake moved the CSB write index
V3: Include the sw/hwsp state in debugfs/i915_engine_info
V4: Also revert to using CSB mmio for GVT-g
V5: Prevent the compiler reloading tail (Mika)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Acked-by: Michel Thierry <michel.thierry@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170913085605.18299-6-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Acked-by: Takashi Iwai <tiwai@suse.de>

---
 drivers/gpu/drm/i915/i915_debugfs.c     |    6 ++++--
 drivers/gpu/drm/i915/i915_drv.h         |    8 ++++++++
 drivers/gpu/drm/i915/intel_lrc.c        |   27 ++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    3 +++
 4 files changed, 35 insertions(+), 9 deletions(-)

--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3326,8 +3326,10 @@ static int i915_engine_info(struct seq_f
 			ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 			read = GEN8_CSB_READ_PTR(ptr);
 			write = GEN8_CSB_WRITE_PTR(ptr);
-			seq_printf(m, "\tExeclist CSB read %d, write %d, interrupt posted? %s\n",
-				   read, write,
+			seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
+				   read, engine->csb_head,
+				   write,
+				   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
 				   yesno(test_bit(ENGINE_IRQ_EXECLIST,
 						  &engine->irq_posted)));
 			if (read >= GEN8_CSB_ENTRIES)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4400,4 +4400,12 @@ int remap_io_mapping(struct vm_area_stru
 		     unsigned long addr, unsigned long pfn, unsigned long size,
 		     struct io_mapping *iomap);
 
+static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) >= 10)
+		return CNL_HWS_CSB_WRITE_INDEX;
+	else
+		return I915_HWS_CSB_WRITE_INDEX;
+}
+
 #endif
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -539,8 +539,6 @@ static void intel_lrc_irq_handler(unsign
 	 * new request (outside of the context-switch interrupt).
 	 */
 	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
-		u32 __iomem *csb_mmio =
-			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
 		/* The HWSP contains a (cacheable) mirror of the CSB */
 		const u32 *buf =
 			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
@@ -550,6 +548,7 @@ static void intel_lrc_irq_handler(unsign
 		if (unlikely(engine->csb_use_mmio)) {
 			buf = (u32 * __force)
 				(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+			engine->csb_head = -1; /* force mmio read of CSB ptrs */
 		}
 
 		/* The write will be ordered by the uncached read (itself
@@ -563,9 +562,19 @@ static void intel_lrc_irq_handler(unsign
 		 * is set and we do a new loop.
 		 */
 		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		head = readl(csb_mmio);
-		tail = GEN8_CSB_WRITE_PTR(head);
-		head = GEN8_CSB_READ_PTR(head);
+		if (unlikely(engine->csb_head == -1)) { /* following a reset */
+			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+			tail = GEN8_CSB_WRITE_PTR(head);
+			head = GEN8_CSB_READ_PTR(head);
+			engine->csb_head = head;
+		} else {
+			const int write_idx =
+				intel_hws_csb_write_index(dev_priv) -
+				I915_HWS_CSB_BUF0_INDEX;
+
+			head = engine->csb_head;
+			tail = READ_ONCE(buf[write_idx]);
+		}
 		while (head != tail) {
 			struct drm_i915_gem_request *rq;
 			unsigned int status;
@@ -619,8 +628,11 @@ static void intel_lrc_irq_handler(unsign
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-		       csb_mmio);
+		if (head != engine->csb_head) {
+			engine->csb_head = head;
+			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+			       dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+		}
 	}
 
 	if (execlists_elsp_ready(engine))
@@ -1269,6 +1281,7 @@ static int gen8_init_common_ring(struct
 	I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
 		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	engine->csb_head = -1;
 
 	/* After a GPU reset, we may have requests to replay */
 	submit = false;
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -391,6 +391,7 @@ struct intel_engine_cs {
 	struct rb_root execlist_queue;
 	struct rb_node *execlist_first;
 	unsigned int fw_domains;
+	unsigned int csb_head;
 	bool csb_use_mmio;
 
 	/* Contexts are pinned whilst they are active on the GPU. The last
@@ -498,6 +499,8 @@ intel_write_status_page(struct intel_eng
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
 #define I915_HWS_CSB_BUF0_INDEX		0x10
+#define I915_HWS_CSB_WRITE_INDEX	0x1f
+#define CNL_HWS_CSB_WRITE_INDEX		0x2f
 
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);