From: Chris Wilson Date: Tue, 24 Apr 2018 15:29:45 +0100 Subject: drm/i915/breadcrumbs: Keep the fake irq armed across reset Git-commit: 36a501a199336fbf87299ebbe598d0af00922949 Patch-mainline: v4.18-rc1 References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166 Instead of synchronously cancelling the timer and re-enabling it inside the reset callbacks, keep the timer enabled and let it die on its next wakeup if no longer required. This allows intel_engine_reset_breadcrumbs() to be used from an atomic (timer/softirq) context such as required for resetting an engine. It also allows us to react better to the user poking around debugfs for testing missed irqs. v2: Tighten the order of del_timer_sync as the fake_irq timer may trigger the hangcheck timer, and so we should cancel it first and then cancel the hangcheck (Mika) Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180424142945.6787-1-chris@chris-wilson.co.uk Acked-by: Petr Tesarik --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -130,11 +130,12 @@ static void intel_breadcrumbs_hangcheck( static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = from_timer(engine, t, - breadcrumbs.fake_irq); + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* The timer persists in case we cannot enable interrupts, + /* + * The timer persists in case we cannot enable interrupts, * or if we have previously seen seqno/interrupt incoherency * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). * Here the worker will wake up every jiffie in order to kick the @@ -148,6 +149,12 @@ static void intel_breadcrumbs_fake_irq(s if (!b->irq_armed) return; + /* If the user has disabled the fake-irq, restore the hangchecking */ + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + mod_timer(&b->fake_irq, jiffies + 1); } @@ -831,8 +838,8 @@ static void cancel_fake_irq(struct intel { struct intel_breadcrumbs *b = &engine->breadcrumbs; + del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ del_timer_sync(&b->hangcheck); - del_timer_sync(&b->fake_irq); clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } @@ -840,15 +847,22 @@ void intel_engine_reset_breadcrumbs(stru { struct intel_breadcrumbs *b = &engine->breadcrumbs; - cancel_fake_irq(engine); spin_lock_irq(&b->irq_lock); + /* + * Leave the fake_irq timer enabled (if it is running), but clear the + * bit so that it turns itself off on its next wake up and goes back + * to the long hangcheck interval if still required. + */ + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + if (b->irq_enabled) irq_enable(engine); else irq_disable(engine); - /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + /* + * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the * GPU is active and may have already executed the MI_USER_INTERRUPT * before the CPU is ready to receive. However, the engine is currently * idle (we haven't started it yet), there is no possibility for a @@ -857,9 +871,6 @@ void intel_engine_reset_breadcrumbs(stru */ clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - if (b->irq_armed) - enable_fake_irq(b); - spin_unlock_irq(&b->irq_lock); }