Blob Blame History Raw
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Mon, 30 Apr 2018 10:52:59 +0300
Subject: drm/i915: Print error state times relative to capture
Git-commit: 043477b088d2af61a0937754c6560002237e6741
Patch-mainline: v4.18-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

Using plain jiffies in error state output makes the output
time differences relative to the current system time. This
is wrong as it makes output time differences dependent
of when the error state is printed rather than when it is
captured.

Store capture jiffies into error state and use it
when outputting the state to fix time differences output.

v2: use engine timestamp as epoch, output formatting (Chris)
v3: pass epoch to print_engine/request (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180430075259.4476-1-mika.kuoppala@linux.intel.com

Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c |   47 +++++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_gpu_error.h |    2 +
 2 files changed, 40 insertions(+), 9 deletions(-)

--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -404,15 +404,16 @@ static const char *bannable(const struct
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
-				const struct drm_i915_error_request *erq)
+				const struct drm_i915_error_request *erq,
+				const unsigned long epoch)
 {
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
 		   erq->context, erq->seqno, erq->sched_attr.priority,
-		   jiffies_to_msecs(jiffies - erq->jiffies),
+		   jiffies_to_msecs(erq->jiffies - epoch),
 		   erq->head, erq->tail);
 }
 
@@ -427,7 +428,8 @@ static void error_print_context(struct d
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee)
+			       const struct drm_i915_error_engine *ee,
+			       const unsigned long epoch)
 {
 	int n;
 
@@ -497,14 +499,15 @@ static void error_print_engine(struct dr
 	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
 	err_printf(m, "  hangcheck action: %s\n",
 		   hangcheck_action_to_str(ee->hangcheck_action));
-	err_printf(m, "  hangcheck action timestamp: %lu, %u ms ago\n",
+	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
+		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
 		   ee->hangcheck_timestamp,
-		   jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n]);
+		error_print_request(m, " ", &ee->execlist[n], epoch);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -650,6 +653,11 @@ int i915_error_state_to_str(struct drm_i
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
+	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
+	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
+		   error->capture,
+		   jiffies_to_msecs(jiffies - error->capture),
+		   jiffies_to_msecs(error->capture - error->epoch));
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].hangcheck_stalled &&
@@ -710,7 +718,7 @@ int i915_error_state_to_str(struct drm_i
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].engine_id != -1)
-			error_print_engine(m, &error->engine[i]);
+			error_print_engine(m, &error->engine[i], error->epoch);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
@@ -769,7 +777,9 @@ int i915_error_state_to_str(struct drm_i
 				   dev_priv->engine[i]->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ", &ee->requests[j]);
+				error_print_request(m, " ",
+						    &ee->requests[j],
+						    error->epoch);
 		}
 
 		if (IS_ERR(ee->waiters)) {
@@ -1736,6 +1746,22 @@ static void capture_params(struct i915_g
 #undef DUP
 }
 
+static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
+{
+	unsigned long epoch = error->capture;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
+		const struct drm_i915_error_engine *ee = &error->engine[i];
+
+		if (ee->hangcheck_stalled &&
+		    time_before(ee->hangcheck_timestamp, epoch))
+			epoch = ee->hangcheck_timestamp;
+	}
+
+	return epoch;
+}
+
 static int capture(void *data)
 {
 	struct i915_gpu_state *error = data;
@@ -1744,6 +1770,7 @@ static int capture(void *data)
 	error->boottime = ktime_get_boottime();
 	error->uptime = ktime_sub(ktime_get(),
 				  error->i915->gt.last_init_time);
+	error->capture = jiffies;
 
 	capture_params(error);
 	capture_gen_state(error);
@@ -1757,6 +1784,8 @@ static int capture(void *data)
 	error->overlay = intel_overlay_capture_error_state(error->i915);
 	error->display = intel_display_capture_error_state(error->i915);
 
+	error->epoch = capture_find_epoch(error);
+
 	return 0;
 }
 
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -31,6 +31,8 @@ struct i915_gpu_state {
 	ktime_t time;
 	ktime_t boottime;
 	ktime_t uptime;
+	unsigned long capture;
+	unsigned long epoch;
 
 	struct drm_i915_private *i915;