Blob Blame History Raw
From: John Stultz <johnstul@us.ibm.com>
Date: Fri, 3 Jul 2009 08:29:58 -0500
Subject: posix-timers: Thread posix-cpu-timers on -rt
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git
Git-commit: da77ceac3d20f27310a07a7c346a4ee6b40d6c28
Patch-mainline: Queued in subsystem maintainer repository
References: SLE Realtime Extension

posix-cpu-timer code takes non -rt safe locks in hard irq
context. Move it to a thread.

[ 3.0 fixes from Peter Zijlstra <peterz@infradead.org> ]

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 include/linux/posix-timers.h   |   10 ++
 kernel/time/posix-cpu-timers.c |  175 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 174 insertions(+), 11 deletions(-)

--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -123,6 +123,9 @@ struct posix_cputimers {
 	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
 	unsigned int			timers_active;
 	unsigned int			expiry_active;
+#ifdef CONFIG_PREEMPT_RT
+	struct task_struct		*posix_timer_list;
+#endif
 };
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
@@ -152,9 +155,16 @@ static inline void posix_cputimers_rt_wa
 	INIT_CPU_TIMERBASE(b[2]),					\
 }
 
+#ifdef CONFIG_PREEMPT_RT
+# define INIT_TIMER_LIST	.posix_timer_list = NULL,
+#else
+# define INIT_TIMER_LIST
+#endif
+
 #define INIT_CPU_TIMERS(s)						\
 	.posix_cputimers = {						\
 		.bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases),	\
+		INIT_TIMER_LIST						\
 	},
 #else
 struct posix_cputimers { };
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -3,8 +3,10 @@
  * Implement CPU time clocks for the POSIX clock interface.
  */
 
+#include <uapi/linux/sched/types.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/rt.h>
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <linux/math64.h>
@@ -15,6 +17,7 @@
 #include <linux/workqueue.h>
 #include <linux/compat.h>
 #include <linux/sched/deadline.h>
+#include <linux/smpboot.h>
 
 #include "posix-timers.h"
 
@@ -27,6 +30,9 @@ void posix_cputimers_group_init(struct p
 		pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
 		pct->timers_active = true;
 	}
+#ifdef CONFIG_PREEMPT_RT
+	pct->posix_timer_list = NULL;
+#endif
 }
 
 /*
@@ -804,7 +810,8 @@ static inline void check_dl_overrun(stru
 	}
 }
 
-static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
+static bool check_rlimit(struct task_struct *tsk, u64 time, u64 limit,
+			 int signo, bool rt, bool hard)
 {
 	if (time < limit)
 		return false;
@@ -812,9 +819,9 @@ static bool check_rlimit(u64 time, u64 l
 	if (print_fatal_signals) {
 		pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
 			rt ? "RT" : "CPU", hard ? "hard" : "soft",
-			current->comm, task_pid_nr(current));
+			tsk->comm, task_pid_nr(tsk));
 	}
-	__group_send_sig_info(signo, SEND_SIG_PRIV, current);
+	__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	return true;
 }
 
@@ -850,11 +857,11 @@ static void check_thread_timers(struct t
 
 		/* At the hard limit, send SIGKILL. No further action. */
 		if (hard != RLIM_INFINITY &&
-		    check_rlimit(rttime, hard, SIGKILL, true, true))
+		    check_rlimit(tsk, rttime, hard, SIGKILL, true, true))
 			return;
 
 		/* At the soft limit, send a SIGXCPU every second */
-		if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
+		if (check_rlimit(tsk, rttime, soft, SIGXCPU, true, false)) {
 			soft += USEC_PER_SEC;
 			tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 		}
@@ -949,11 +956,11 @@ static void check_process_timers(struct
 
 		/* At the hard limit, send SIGKILL. No further action. */
 		if (hard != RLIM_INFINITY &&
-		    check_rlimit(ptime, hardns, SIGKILL, false, true))
+		    check_rlimit(tsk, ptime, hardns, SIGKILL, false, true))
 			return;
 
 		/* At the soft limit, send a SIGXCPU every second */
-		if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
+		if (check_rlimit(tsk, ptime, softns, SIGXCPU, false, false)) {
 			sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
 			softns += NSEC_PER_SEC;
 		}
@@ -1110,15 +1117,12 @@ static inline bool fastpath_timer_check(
  * already updated our counts.  We need to check if any timers fire now.
  * Interrupts are disabled.
  */
-void run_posix_cpu_timers(void)
+static void __run_posix_cpu_timers(struct task_struct *tsk)
 {
-	struct task_struct *tsk = current;
 	struct k_itimer *timer, *next;
 	unsigned long flags;
 	LIST_HEAD(firing);
 
-	lockdep_assert_irqs_disabled();
-
 	/*
 	 * The fast path checks that there are no expired thread or thread
 	 * group timers.  If that's so, just return.
@@ -1171,6 +1175,155 @@ void run_posix_cpu_timers(void)
 	}
 }
 
+#ifdef CONFIG_PREEMPT_RT
+#include <linux/kthread.h>
+#include <linux/cpu.h>
+DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
+DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
+DEFINE_PER_CPU(bool, posix_timer_th_active);
+
+static void posix_cpu_kthread_fn(unsigned int cpu)
+{
+	struct task_struct *tsk = NULL;
+	struct task_struct *next = NULL;
+
+	BUG_ON(per_cpu(posix_timer_task, cpu) != current);
+
+	/* grab task list */
+	raw_local_irq_disable();
+	tsk = per_cpu(posix_timer_tasklist, cpu);
+	per_cpu(posix_timer_tasklist, cpu) = NULL;
+	raw_local_irq_enable();
+
+	/* its possible the list is empty, just return */
+	if (!tsk)
+		return;
+
+	/* Process task list */
+	while (1) {
+		/* save next */
+		next = tsk->posix_cputimers.posix_timer_list;
+
+		/* run the task timers, clear its ptr and
+		 * unreference it
+		 */
+		__run_posix_cpu_timers(tsk);
+		tsk->posix_cputimers.posix_timer_list = NULL;
+		put_task_struct(tsk);
+
+		/* check if this is the last on the list */
+		if (next == tsk)
+			break;
+		tsk = next;
+	}
+}
+
+static inline int __fastpath_timer_check(struct task_struct *tsk)
+{
+	/* tsk == current, ensure it is safe to use ->signal/sighand */
+	if (unlikely(tsk->exit_state))
+		return 0;
+
+	if (!expiry_cache_is_inactive(&tsk->posix_cputimers))
+		return 1;
+
+	if (!expiry_cache_is_inactive(&tsk->signal->posix_cputimers))
+		return 1;
+
+	return 0;
+}
+
+void run_posix_cpu_timers(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct task_struct *tsk = current;
+	struct task_struct *tasklist;
+
+	BUG_ON(!irqs_disabled());
+
+	if (per_cpu(posix_timer_th_active, cpu) != true)
+		return;
+
+	/* get per-cpu references */
+	tasklist = per_cpu(posix_timer_tasklist, cpu);
+
+	/* check to see if we're already queued */
+	if (!tsk->posix_cputimers.posix_timer_list && __fastpath_timer_check(tsk)) {
+		get_task_struct(tsk);
+		if (tasklist) {
+			tsk->posix_cputimers.posix_timer_list = tasklist;
+		} else {
+			/*
+			 * The list is terminated by a self-pointing
+			 * task_struct
+			 */
+			tsk->posix_cputimers.posix_timer_list = tsk;
+		}
+		per_cpu(posix_timer_tasklist, cpu) = tsk;
+
+		wake_up_process(per_cpu(posix_timer_task, cpu));
+	}
+}
+
+static int posix_cpu_kthread_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(posix_timer_tasklist) != NULL;
+}
+
+static void posix_cpu_kthread_park(unsigned int cpu)
+{
+	this_cpu_write(posix_timer_th_active, false);
+}
+
+static void posix_cpu_kthread_unpark(unsigned int cpu)
+{
+	this_cpu_write(posix_timer_th_active, true);
+}
+
+static void posix_cpu_kthread_setup(unsigned int cpu)
+{
+	struct sched_param sp;
+
+	sp.sched_priority = MAX_RT_PRIO - 1;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+	posix_cpu_kthread_unpark(cpu);
+}
+
+static struct smp_hotplug_thread posix_cpu_thread = {
+	.store			= &posix_timer_task,
+	.thread_should_run	= posix_cpu_kthread_should_run,
+	.thread_fn		= posix_cpu_kthread_fn,
+	.thread_comm		= "posixcputmr/%u",
+	.setup			= posix_cpu_kthread_setup,
+	.park			= posix_cpu_kthread_park,
+	.unpark			= posix_cpu_kthread_unpark,
+};
+
+static int __init posix_cpu_thread_init(void)
+{
+	/* Start one for boot CPU. */
+	unsigned long cpu;
+	int ret;
+
+	/* init the per-cpu posix_timer_tasklets */
+	for_each_possible_cpu(cpu)
+		per_cpu(posix_timer_tasklist, cpu) = NULL;
+
+	ret = smpboot_register_percpu_thread(&posix_cpu_thread);
+	WARN_ON(ret);
+
+	return 0;
+}
+early_initcall(posix_cpu_thread_init);
+
+#else /* CONFIG_PREEMPT_RT */
+void run_posix_cpu_timers(void)
+{
+	lockdep_assert_irqs_disabled();
+	__run_posix_cpu_timers(current);
+}
+#endif /* CONFIG_PREEMPT_RT */
+
 /*
  * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  * The tsk->sighand->siglock must be held by the caller.