Blob Blame History Raw
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 20 May 2019 13:09:08 +0200
Subject: [PATCH] softirq: Add preemptible softirq
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git
Git-commit: da77ceac3d20f27310a07a7c346a4ee6b40d6c28
Patch-mainline: Queued in subsystem maintainer repository
References: SLE Realtime Extension

Add preemptible softirq for RT's needs. By removing the softirq count
from the preempt counter, the softirq becomes preemptible. A per-CPU
lock ensures that there is no parallel softirq processing or that
per-CPU variables are not access in parallel by multiple threads.

local_bh_enable() will process all softirq work that has been raised in
its BH-disabled section once the BH counter gets to 0.

[+ rcu_read_lock() as part of local_bh_disable() by Scott Wood]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 include/linux/bottom_half.h |    5 
 include/linux/interrupt.h   |    1 
 include/linux/preempt.h     |   17 ++-
 include/linux/rcupdate.h    |    3 
 include/linux/sched.h       |    3 
 kernel/softirq.c            |  228 +++++++++++++++++++++++++++++++++++++++++++-
 kernel/time/tick-sched.c    |    9 -
 7 files changed, 252 insertions(+), 14 deletions(-)

--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -4,6 +4,10 @@
 
 #include <linux/preempt.h>
 
+#ifdef CONFIG_PREEMPT_RT
+extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
+#else
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
 #else
@@ -13,6 +17,7 @@ static __always_inline void __local_bh_d
 	barrier();
 }
 #endif
+#endif
 
 static inline void local_bh_disable(void)
 {
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -561,6 +561,7 @@ extern void __raise_softirq_irqoff(unsig
 
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
+extern void softirq_check_pending_idle(void);
 
 DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
 
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -78,10 +78,8 @@
 #include <asm/preempt.h>
 
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
-#define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
 #define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
 				 | NMI_MASK))
-
 /*
  * Are we doing bottom half or hardware interrupt processing?
  *
@@ -96,12 +94,23 @@
  *       should not be used in new code.
  */
 #define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
-#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
 #define in_nmi()		(preempt_count() & NMI_MASK)
 #define in_task()		(!(preempt_count() & \
 				   (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+#ifdef CONFIG_PREEMPT_RT
+
+#define softirq_count()		((long)current->softirq_count)
+#define in_softirq()		(softirq_count())
+#define in_serving_softirq()	(current->softirq_count & SOFTIRQ_OFFSET)
+
+#else
+
+#define softirq_count()		(preempt_count() & SOFTIRQ_MASK)
+#define in_softirq()		(softirq_count())
+#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
+
+#endif
 
 /*
  * The preempt_count offset after preempt_disable();
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -279,7 +279,8 @@ static inline void rcu_preempt_sleep_che
 #define rcu_sleep_check()						\
 	do {								\
 		rcu_preempt_sleep_check();				\
-		RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map),	\
+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))		\
+		    RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map),	\
 				 "Illegal context switch in RCU-bh read-side critical section"); \
 		RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map),	\
 				 "Illegal context switch in RCU-sched read-side critical section"); \
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -978,6 +978,9 @@ struct task_struct {
 	int				softirqs_enabled;
 	int				softirq_context;
 #endif
+#ifdef CONFIG_PREEMPT_RT
+	int				softirq_count;
+#endif
 
 #ifdef CONFIG_LOCKDEP
 # define MAX_LOCK_DEPTH			48UL
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -25,6 +25,9 @@
 #include <linux/smpboot.h>
 #include <linux/tick.h>
 #include <linux/irq.h>
+#ifdef CONFIG_PREEMPT_RT
+#include <linux/locallock.h>
+#endif
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/irq.h>
@@ -102,6 +105,104 @@ static bool ksoftirqd_running(unsigned l
  * softirq and whether we just have bh disabled.
  */
 
+#ifdef CONFIG_PREEMPT_RT
+static DEFINE_LOCAL_IRQ_LOCK(bh_lock);
+static DEFINE_PER_CPU(long, softirq_counter);
+
+void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
+{
+	unsigned long __maybe_unused flags;
+	long soft_cnt;
+
+	WARN_ON_ONCE(in_irq());
+	if (!in_atomic()) {
+		local_lock(bh_lock);
+		rcu_read_lock();
+	}
+	soft_cnt = this_cpu_inc_return(softirq_counter);
+	WARN_ON_ONCE(soft_cnt == 0);
+	current->softirq_count += SOFTIRQ_DISABLE_OFFSET;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	local_irq_save(flags);
+	if (soft_cnt == 1)
+		trace_softirqs_off(ip);
+	local_irq_restore(flags);
+#endif
+}
+EXPORT_SYMBOL(__local_bh_disable_ip);
+
+static void local_bh_disable_rt(void)
+{
+	local_bh_disable();
+}
+
+void _local_bh_enable(void)
+{
+	unsigned long __maybe_unused flags;
+	long soft_cnt;
+
+	soft_cnt = this_cpu_dec_return(softirq_counter);
+	WARN_ON_ONCE(soft_cnt < 0);
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	local_irq_save(flags);
+	if (soft_cnt == 0)
+		trace_softirqs_on(_RET_IP_);
+	local_irq_restore(flags);
+#endif
+
+	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
+	if (!in_atomic()) {
+		rcu_read_unlock();
+		local_unlock(bh_lock);
+	}
+}
+
+void _local_bh_enable_rt(void)
+{
+	_local_bh_enable();
+}
+
+void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
+{
+	u32 pending;
+	long count;
+
+	WARN_ON_ONCE(in_irq());
+	lockdep_assert_irqs_enabled();
+
+	local_irq_disable();
+	count = this_cpu_read(softirq_counter);
+
+	if (unlikely(count == 1)) {
+		pending = local_softirq_pending();
+		if (pending && !ksoftirqd_running(pending)) {
+			if (!in_atomic())
+				__do_softirq();
+			else
+				wakeup_softirqd();
+		}
+		trace_softirqs_on(ip);
+	}
+	count = this_cpu_dec_return(softirq_counter);
+	WARN_ON_ONCE(count < 0);
+	local_irq_enable();
+
+	if (!in_atomic()) {
+		rcu_read_unlock();
+		local_unlock(bh_lock);
+	}
+
+	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
+	preempt_check_resched();
+}
+EXPORT_SYMBOL(__local_bh_enable_ip);
+
+#else
+static void local_bh_disable_rt(void) { }
+static void _local_bh_enable_rt(void) { }
+
 /*
  * This one is for softirq.c-internal use,
  * where hardirqs are disabled legitimately:
@@ -196,6 +297,7 @@ void __local_bh_enable_ip(unsigned long
 	preempt_check_resched();
 }
 EXPORT_SYMBOL(__local_bh_enable_ip);
+#endif
 
 /*
  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
@@ -266,7 +368,11 @@ asmlinkage __visible void __softirq_entr
 	pending = local_softirq_pending();
 	account_irq_enter_time(current);
 
+#ifdef CONFIG_PREEMPT_RT
+	current->softirq_count |= SOFTIRQ_OFFSET;
+#else
 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
+#endif
 	in_hardirq = lockdep_softirq_start();
 
 restart:
@@ -300,9 +406,10 @@ asmlinkage __visible void __softirq_entr
 		h++;
 		pending >>= softirq_bit;
 	}
-
+#ifndef CONFIG_PREEMPT_RT
 	if (__this_cpu_read(ksoftirqd) == current)
 		rcu_softirq_qs();
+#endif
 	local_irq_disable();
 
 	pending = local_softirq_pending();
@@ -316,11 +423,16 @@ asmlinkage __visible void __softirq_entr
 
 	lockdep_softirq_end(in_hardirq);
 	account_irq_exit_time(current);
+#ifdef CONFIG_PREEMPT_RT
+	current->softirq_count &= ~SOFTIRQ_OFFSET;
+#else
 	__local_bh_enable(SOFTIRQ_OFFSET);
+#endif
 	WARN_ON_ONCE(in_interrupt());
 	current_restore_flags(old_flags, PF_MEMALLOC);
 }
 
+#ifndef CONFIG_PREEMPT_RT
 asmlinkage __visible void do_softirq(void)
 {
 	__u32 pending;
@@ -338,6 +450,7 @@ asmlinkage __visible void do_softirq(voi
 
 	local_irq_restore(flags);
 }
+#endif
 
 /*
  * Enter an interrupt context.
@@ -358,6 +471,16 @@ void irq_enter(void)
 	__irq_enter();
 }
 
+#ifdef CONFIG_PREEMPT_RT
+
+static inline void invoke_softirq(void)
+{
+	if (this_cpu_read(softirq_counter) == 0)
+		wakeup_softirqd();
+}
+
+#else
+
 static inline void invoke_softirq(void)
 {
 	if (ksoftirqd_running(local_softirq_pending()))
@@ -383,6 +506,7 @@ static inline void invoke_softirq(void)
 		wakeup_softirqd();
 	}
 }
+#endif
 
 static inline void tick_irq_exit(void)
 {
@@ -420,6 +544,27 @@ void irq_exit(void)
 /*
  * This function must run with irqs disabled!
  */
+#ifdef CONFIG_PREEMPT_RT
+void raise_softirq_irqoff(unsigned int nr)
+{
+	__raise_softirq_irqoff(nr);
+
+	/*
+	 * If we're in an hard interrupt we let irq return code deal
+	 * with the wakeup of ksoftirqd.
+	 */
+	if (in_irq())
+		return;
+	/*
+	 * If were are not in BH-disabled section then we have to wake
+	 * ksoftirqd.
+	 */
+	if (this_cpu_read(softirq_counter) == 0)
+		wakeup_softirqd();
+}
+
+#else
+
 inline void raise_softirq_irqoff(unsigned int nr)
 {
 	__raise_softirq_irqoff(nr);
@@ -437,6 +582,8 @@ inline void raise_softirq_irqoff(unsigne
 		wakeup_softirqd();
 }
 
+#endif
+
 void raise_softirq(unsigned int nr)
 {
 	unsigned long flags;
@@ -594,6 +741,7 @@ static int ksoftirqd_should_run(unsigned
 
 static void run_ksoftirqd(unsigned int cpu)
 {
+	local_bh_disable_rt();
 	local_irq_disable();
 	if (local_softirq_pending()) {
 		/*
@@ -602,10 +750,12 @@ static void run_ksoftirqd(unsigned int c
 		 */
 		__do_softirq();
 		local_irq_enable();
+		_local_bh_enable_rt();
 		cond_resched();
 		return;
 	}
 	local_irq_enable();
+	_local_bh_enable_rt();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -679,6 +829,13 @@ static struct smp_hotplug_thread softirq
 
 static __init int spawn_ksoftirqd(void)
 {
+#ifdef CONFIG_PREEMPT_RT
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		lockdep_set_novalidate_class(per_cpu_ptr(&bh_lock.lock, cpu));
+#endif
+
 	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
 				  takeover_tasklets);
 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
@@ -687,6 +844,75 @@ static __init int spawn_ksoftirqd(void)
 }
 early_initcall(spawn_ksoftirqd);
 
+#ifdef CONFIG_PREEMPT_RT
+
+/*
+ * On preempt-rt a softirq running context might be blocked on a
+ * lock. There might be no other runnable task on this CPU because the
+ * lock owner runs on some other CPU. So we have to go into idle with
+ * the pending bit set. Therefor we need to check this otherwise we
+ * warn about false positives which confuses users and defeats the
+ * whole purpose of this test.
+ *
+ * This code is called with interrupts disabled.
+ */
+void softirq_check_pending_idle(void)
+{
+	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
+	static int rate_limit;
+	bool okay = false;
+	u32 warnpending;
+
+	if (rate_limit >= 10)
+		return;
+
+	warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
+	if (!warnpending)
+		return;
+
+	if (!tsk)
+		return;
+	/*
+	 * If ksoftirqd is blocked on a lock then we may go idle with pending
+	 * softirq.
+	 */
+	raw_spin_lock(&tsk->pi_lock);
+	if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING ||
+	    (tsk->state == TASK_UNINTERRUPTIBLE && tsk->sleeping_lock)) {
+		okay = true;
+	}
+	raw_spin_unlock(&tsk->pi_lock);
+	if (okay)
+		return;
+	/*
+	 * The softirq lock is held in non-atomic context and the owner is
+	 * blocking on a lock. It will schedule softirqs once the counter goes
+	 * back to zero.
+	 */
+	if (this_cpu_read(softirq_counter) > 0)
+		return;
+
+	printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
+	       warnpending);
+	rate_limit++;
+}
+
+#else
+
+void softirq_check_pending_idle(void)
+{
+	static int ratelimit;
+
+	if (ratelimit < 10 &&
+	    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
+		pr_warn("NOHZ: local_softirq_pending %02x\n",
+			(unsigned int) local_softirq_pending());
+		ratelimit++;
+	}
+}
+
+#endif
+
 /*
  * [ These __weak aliases are kept in a separate compilation unit, so that
  *   GCC does not inline them incorrectly. ]
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -899,14 +899,7 @@ static bool can_stop_idle_tick(int cpu,
 		return false;
 
 	if (unlikely(local_softirq_pending())) {
-		static int ratelimit;
-
-		if (ratelimit < 10 &&
-		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
-			pr_warn("NOHZ: local_softirq_pending %02x\n",
-				(unsigned int) local_softirq_pending());
-			ratelimit++;
-		}
+		softirq_check_pending_idle();
 		return false;
 	}