| From b4bfa3fcfe3b827ddb8b16edd45896caac5a1194 Mon Sep 17 00:00:00 2001 |
| From: Thomas Gleixner <tglx@linutronix.de> |
| Date: Sun, 15 Aug 2021 23:27:46 +0200 |
| Subject: [PATCH] sched/core: Rework the __schedule() preempt argument |
| Git-commit: b4bfa3fcfe3b827ddb8b16edd45896caac5a1194 |
| Patch-mainline: v5.15-rc1 |
| References: bsc#1190137 bsc#1189998 |
| |
| PREEMPT_RT needs to hand a special state into __schedule() when a task |
| blocks on a 'sleeping' spin/rwlock. This is required to handle |
| rcu_note_context_switch() correctly without having special casing in the |
| RCU code. From an RCU point of view the blocking on the sleeping spinlock |
| is equivalent to preemption, because the task might be in a read side |
| critical section. |
| |
| schedule_debug() also has a check which would trigger with the !preempt |
| case, but that could be handled differently. |
| |
| To avoid adding another argument and extra checks which cannot be optimized |
| out by the compiler, the following solution has been chosen: |
| |
| - Replace the boolean 'preempt' argument with an unsigned integer |
| 'sched_mode' argument and define constants to hand in: |
| (0 == no preemption, 1 = preemption). |
| |
| - Add two masks to apply on that mode: one for the debug/rcu invocations, |
| and one for the actual scheduling decision. |
| |
| For a non RT kernel these masks are UINT_MAX, i.e. all bits are set, |
| which allows the compiler to optimize the AND operation out, because it is |
| not masking out anything. IOW, it's not different from the boolean. |
| |
| RT enabled kernels will define these masks separately. |
| |
| No functional change. |
| |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Signed-off-by: Ingo Molnar <mingo@kernel.org> |
| Link: https://lore.kernel.org/r/20210815211302.315473019@linutronix.de |
| Signed-off-by: Davidlohr Bueso <dbueso@suse.de> |
| |
| |
| kernel/sched/core.c | 34 +++++++++++++++++++++++----------- |
| 1 file changed, 23 insertions(+), 11 deletions(-) |
| |
| diff --git a/kernel/sched/core.c b/kernel/sched/core.c |
| index e407c6ac4a26..ebc24e136222 100644 |
| |
| |
| @@ -5819,6 +5819,18 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) |
| |
| #endif /* CONFIG_SCHED_CORE */ |
| |
| +/* |
| + * Constants for the sched_mode argument of __schedule(). |
| + * |
| + * The mode argument allows RT enabled kernels to differentiate a |
| + * preemption from blocking on an 'sleeping' spin/rwlock. Note that |
| + * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to |
| + * optimize the AND operation out and just check for zero. |
| + */ |
| +#define SM_NONE 0x0 |
| +#define SM_PREEMPT 0x1 |
| +#define SM_MASK_PREEMPT (~0U) |
| + |
| /* |
| * __schedule() is the main scheduler function. |
| * |
| @@ -5858,7 +5870,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) |
| * |
| * WARNING: must be called with preemption disabled! |
| */ |
| -static void __sched notrace __schedule(bool preempt) |
| +static void __sched notrace __schedule(unsigned int sched_mode) |
| { |
| struct task_struct *prev, *next; |
| unsigned long *switch_count; |
| @@ -5871,13 +5883,13 @@ static void __sched notrace __schedule(bool preempt) |
| rq = cpu_rq(cpu); |
| prev = rq->curr; |
| |
| - schedule_debug(prev, preempt); |
| + schedule_debug(prev, !!sched_mode); |
| |
| if (sched_feat(HRTICK) || sched_feat(HRTICK_DL)) |
| hrtick_clear(rq); |
| |
| local_irq_disable(); |
| - rcu_note_context_switch(preempt); |
| + rcu_note_context_switch(!!sched_mode); |
| |
| /* |
| * Make sure that signal_pending_state()->signal_pending() below |
| @@ -5911,7 +5923,7 @@ static void __sched notrace __schedule(bool preempt) |
| * - ptrace_{,un}freeze_traced() can change ->state underneath us. |
| */ |
| prev_state = READ_ONCE(prev->__state); |
| - if (!preempt && prev_state) { |
| + if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { |
| if (signal_pending_state(prev_state, prev)) { |
| WRITE_ONCE(prev->__state, TASK_RUNNING); |
| } else { |
| @@ -5977,7 +5989,7 @@ static void __sched notrace __schedule(bool preempt) |
| migrate_disable_switch(rq, prev); |
| psi_sched_switch(prev, next, !task_on_rq_queued(prev)); |
| |
| - trace_sched_switch(preempt, prev, next); |
| + trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); |
| |
| /* Also unlocks the rq: */ |
| rq = context_switch(rq, prev, next, &rf); |
| @@ -5998,7 +6010,7 @@ void __noreturn do_task_dead(void) |
| /* Tell freezer to ignore us: */ |
| current->flags |= PF_NOFREEZE; |
| |
| - __schedule(false); |
| + __schedule(SM_NONE); |
| BUG(); |
| |
| /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ |
| @@ -6059,7 +6071,7 @@ asmlinkage __visible void __sched schedule(void) |
| sched_submit_work(tsk); |
| do { |
| preempt_disable(); |
| - __schedule(false); |
| + __schedule(SM_NONE); |
| sched_preempt_enable_no_resched(); |
| } while (need_resched()); |
| sched_update_worker(tsk); |
| @@ -6087,7 +6099,7 @@ void __sched schedule_idle(void) |
| */ |
| WARN_ON_ONCE(current->__state); |
| do { |
| - __schedule(false); |
| + __schedule(SM_NONE); |
| } while (need_resched()); |
| } |
| |
| @@ -6140,7 +6152,7 @@ static void __sched notrace preempt_schedule_common(void) |
| */ |
| preempt_disable_notrace(); |
| preempt_latency_start(1); |
| - __schedule(true); |
| + __schedule(SM_PREEMPT); |
| preempt_latency_stop(1); |
| preempt_enable_no_resched_notrace(); |
| |
| @@ -6219,7 +6231,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) |
| * an infinite recursion. |
| */ |
| prev_ctx = exception_enter(); |
| - __schedule(true); |
| + __schedule(SM_PREEMPT); |
| exception_exit(prev_ctx); |
| |
| preempt_latency_stop(1); |
| @@ -6368,7 +6380,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) |
| do { |
| preempt_disable(); |
| local_irq_enable(); |
| - __schedule(true); |
| + __schedule(SM_PREEMPT); |
| local_irq_disable(); |
| sched_preempt_enable_no_resched(); |
| } while (need_resched()); |
| -- |
| 2.26.2 |
| |