Blob Blame History Raw
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 13 Nov 2017 14:28:44 +0100
Subject: perf/core: Fix tree based event rotation
Git-commit: 8703a7cfe148f73062c568e9a8549ce692104864
Patch-mainline: v4.17-rc1
References: FATE#326324

Similar to how first programming cpu=-1 and then cpu=# is wrong, so is
rotating both. It was especially wrong when we were still programming
the PMU in this same order, because in that scenario we might never
actually end up running cpu=# events at all.

Cure this by using the active_list to pick the rotation event; since
at programming we already select the left-most event.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Dmitri Prokhorov <Dmitry.Prohorov@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Valery Cherepennikov <valery.cherepennikov@intel.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Tony Jones <tonyj@suse.de>
---
 kernel/events/core.c | 44 ++++++++++++++++++--------------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc5dd072c194..460e485220e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1623,22 +1623,6 @@ perf_event_groups_next(struct perf_event *event)
 	return NULL;
 }
 
-/*
- * Rotate the @cpu subtree.
- *
- * Re-insert the leftmost event at the tail of the subtree.
- */
-static void
-perf_event_groups_rotate(struct perf_event_groups *groups, int cpu)
-{
-	struct perf_event *event = perf_event_groups_first(groups, cpu);
-
-	if (event) {
-		perf_event_groups_delete(groups, event);
-		perf_event_groups_insert(groups, event);
-	}
-}
-
 /*
  * Iterate through the whole groups tree.
  */
@@ -3601,24 +3585,24 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
 }
 
 /*
- * Round-robin a context's events:
+ * Move @event to the tail of the @ctx's elegible events.
  */
-static void rotate_ctx(struct perf_event_context *ctx)
+static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
 {
 	/*
 	 * Rotate the first entry last of non-pinned groups. Rotation might be
 	 * disabled by the inheritance code.
 	 */
-	if (!ctx->rotate_disable) {
-		int sw = -1, cpu = smp_processor_id();
+	if (ctx->rotate_disable)
+		return;
 
-		perf_event_groups_rotate(&ctx->flexible_groups, sw);
-		perf_event_groups_rotate(&ctx->flexible_groups, cpu);
-	}
+	perf_event_groups_delete(&ctx->flexible_groups, event);
+	perf_event_groups_insert(&ctx->flexible_groups, event);
 }
 
 static int perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
+	struct perf_event *ctx_event = NULL, *cpuctx_event = NULL;
 	struct perf_event_context *ctx = NULL;
 	int rotate = 0;
 
@@ -3639,13 +3623,21 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
 	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
 	perf_pmu_disable(cpuctx->ctx.pmu);
 
+	cpuctx_event = list_first_entry_or_null(&cpuctx->ctx.flexible_active,
+						struct perf_event, active_list);
+	if (ctx) {
+		ctx_event = list_first_entry_or_null(&ctx->flexible_active,
+						     struct perf_event, active_list);
+	}
+
 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
 		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
 
-	rotate_ctx(&cpuctx->ctx);
-	if (ctx)
-		rotate_ctx(ctx);
+	if (cpuctx_event)
+		rotate_ctx(&cpuctx->ctx, cpuctx_event);
+	if (ctx_event)
+		rotate_ctx(ctx, ctx_event);
 
 	perf_event_sched_in(cpuctx, ctx, current);