From 02c2e9677e4ed0a8ba90651bd84cbd52ec596f3f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 21 Apr 2020 18:13:51 +0100
Subject: drm/i915/selftests: Unroll the CS frequency loop
Git-commit: 33883310cd8ed365a4279600b329c50992e8f528
Patch-mainline: v5.8-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322
Having noticed that MI_BB_START is incurring a memory stall (see the
correlation with uncore frequency), we have to unroll the loop in order
to diminish the impact of the MI_BB_START on the instruction throughput.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200421171351.19575-1-chris@chris-wilson.co.uk
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
drivers/gpu/drm/i915/gt/selftest_rps.c | 33 ++++++++++++++++----------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index e2afc2003caa..0d7ed000aff0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -49,14 +49,17 @@ create_spin_counter(struct intel_engine_cs *engine,
#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
+ unsigned long end;
u32 *base, *cs;
int loop, i;
int err;
- obj = i915_gem_object_create_internal(vm->i915, 4096);
+ obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
if (IS_ERR(obj))
return ERR_CAST(obj);
+ end = obj->base.size / sizeof(u32) - 1;
+
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
@@ -90,27 +93,31 @@ create_spin_counter(struct intel_engine_cs *engine,
loop = cs - base;
- *cs++ = MI_MATH(4);
- *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
- *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
- *cs++ = MI_MATH_ADD;
- *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
-
- if (srm) {
- *cs++ = MI_STORE_REGISTER_MEM_GEN8;
- *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
- *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
- *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
+ /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
+ for (i = 0; i < 1024; i++) {
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
+
+ if (srm) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+ *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
+ }
}
*cs++ = MI_BATCH_BUFFER_START_GEN8;
*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+ GEM_BUG_ON(cs - base > end);
i915_gem_object_flush_map(obj);
*cancel = base + loop;
- *counter = srm ? memset32(base + 1000, 0, 1) : NULL;
+ *counter = srm ? memset32(base + end, 0, 1) : NULL;
return vma;
}
--
2.28.0