Blob Blame History Raw
From ae632606e6ea7d63fa86617b0ade72eba29f3b1f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 8 Nov 2019 14:15:57 +0100
Subject: [PATCH] sched/core: Optimize pick_next_task()

References: bnc#1155798 (CPU scheduler functional and performance backports)
Patch-mainline: v5.5-rc1
Git-commit: 5d7d605642b28a5911198a405a6072f091bfbee6

Ever since we moved the sched_class definitions into their own files,
the constant expression {fair,idle}_sched_class.pick_next_task() is
not in fact a compile time constant anymore and results in an indirect
call (barring LTO).

Fix that by exposing pick_next_task_{fair,idle}() directly, this gets
rid of the indirect call (and RETPOLINE) on the fast path.

Also remove the unlikely() from the idle case, it is in fact /the/ way
we select idle -- and that is a very common thing to do.

Performance for will-it-scale/sched_yield improves by 2% (as reported
by 0-day).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bsegall@google.com
Cc: dietmar.eggemann@arm.com
Cc: juri.lelli@redhat.com
Cc: ktkhai@virtuozzo.com
Cc: mgorman@suse.de
Cc: qais.yousef@arm.com
Cc: qperret@google.com
Cc: rostedt@goodmis.org
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Link: https://lkml.kernel.org/r/20191108131909.603037345@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 kernel/sched/core.c  | 6 +++---
 kernel/sched/fair.c  | 2 +-
 kernel/sched/idle.c  | 2 +-
 kernel/sched/sched.h | 3 +++
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cb4f7078c576..bbaed50efd3a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3789,14 +3789,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 		    prev->sched_class == &fair_sched_class) &&
 		   rq->nr_running == rq->cfs.h_nr_running)) {
 
-		p = fair_sched_class.pick_next_task(rq, prev, rf);
+		p = pick_next_task_fair(rq, prev, rf);
 		if (unlikely(p == RETRY_TASK))
 			goto restart;
 
 		/* Assumes fair_sched_class->next == idle_sched_class */
-		if (unlikely(!p)) {
+		if (!p) {
 			put_prev_task(rq, prev);
-			p = idle_sched_class.pick_next_task(rq, NULL, NULL);
+			p = pick_next_task_idle(rq, NULL, NULL);
 		}
 
 		return p;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 82973047b0f4..0169bf766359 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6627,7 +6627,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 		set_last_buddy(se);
 }
 
-static struct task_struct *
+struct task_struct *
 pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
 	struct cfs_rq *cfs_rq = &rq->cfs;
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index bd5630373a94..a05caf7c7c5d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -391,7 +391,7 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next)
 	schedstat_inc(rq->sched_goidle);
 }
 
-static struct task_struct *
+struct task_struct *
 pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
 	struct task_struct *next = rq->idle;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 96623f2108c3..588c8a2d95af 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1811,6 +1811,9 @@ static inline bool sched_fair_runnable(struct rq *rq)
 	return rq->cfs.nr_running > 0;
 }
 
+extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+extern struct task_struct *pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
+
 #ifdef CONFIG_SMP
 
 extern void update_group_capacity(struct sched_domain *sd, int cpu);