Daniel Wagner 099489
From: John Stultz <johnstul@us.ibm.com>
Daniel Wagner 099489
Date: Fri, 3 Jul 2009 08:29:58 -0500
Daniel Wagner 099489
Subject: posix-timers: Thread posix-cpu-timers on -rt
Daniel Wagner 099489
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git
Daniel Wagner c00dcb
Git-commit: da77ceac3d20f27310a07a7c346a4ee6b40d6c28
Daniel Wagner 099489
Patch-mainline: Queued in subsystem maintainer repository
Daniel Wagner 099489
References: SLE Realtime Extension
Daniel Wagner 099489
Daniel Wagner 099489
posix-cpu-timer code takes non -rt safe locks in hard irq
Daniel Wagner 099489
context. Move it to a thread.
Daniel Wagner 099489
Daniel Wagner 099489
[ 3.0 fixes from Peter Zijlstra <peterz@infradead.org> ]
Daniel Wagner 099489
Daniel Wagner 099489
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Daniel Wagner 099489
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Daniel Wagner 099489
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Daniel Wagner 099489
---
Daniel Wagner 099489
 include/linux/posix-timers.h   |   10 ++
Daniel Wagner 099489
 kernel/time/posix-cpu-timers.c |  175 ++++++++++++++++++++++++++++++++++++++---
Daniel Wagner 099489
 2 files changed, 174 insertions(+), 11 deletions(-)
Daniel Wagner 099489
Daniel Wagner 099489
--- a/include/linux/posix-timers.h
Daniel Wagner 099489
+++ b/include/linux/posix-timers.h
Daniel Wagner 099489
@@ -123,6 +123,9 @@ struct posix_cputimers {
Daniel Wagner 099489
 	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
Daniel Wagner 099489
 	unsigned int			timers_active;
Daniel Wagner 099489
 	unsigned int			expiry_active;
Daniel Wagner 099489
+#ifdef CONFIG_PREEMPT_RT
Daniel Wagner 099489
+	struct task_struct		*posix_timer_list;
Daniel Wagner 099489
+#endif
Daniel Wagner 099489
 };
Daniel Wagner 099489
 
Daniel Wagner 099489
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
Daniel Wagner 099489
@@ -152,9 +155,16 @@ static inline void posix_cputimers_rt_wa
Daniel Wagner 099489
 	INIT_CPU_TIMERBASE(b[2]),					\
Daniel Wagner 099489
 }
Daniel Wagner 099489
 
Daniel Wagner 099489
+#ifdef CONFIG_PREEMPT_RT
Daniel Wagner 099489
+# define INIT_TIMER_LIST	.posix_timer_list = NULL,
Daniel Wagner 099489
+#else
Daniel Wagner 099489
+# define INIT_TIMER_LIST
Daniel Wagner 099489
+#endif
Daniel Wagner 099489
+
Daniel Wagner 099489
 #define INIT_CPU_TIMERS(s)						\
Daniel Wagner 099489
 	.posix_cputimers = {						\
Daniel Wagner 099489
 		.bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases),	\
Daniel Wagner 099489
+		INIT_TIMER_LIST						\
Daniel Wagner 099489
 	},
Daniel Wagner 099489
 #else
Daniel Wagner 099489
 struct posix_cputimers { };
Daniel Wagner 099489
--- a/kernel/time/posix-cpu-timers.c
Daniel Wagner 099489
+++ b/kernel/time/posix-cpu-timers.c
Daniel Wagner 099489
@@ -3,8 +3,10 @@
Daniel Wagner 099489
  * Implement CPU time clocks for the POSIX clock interface.
Daniel Wagner 099489
  */
Daniel Wagner 099489
 
Daniel Wagner 099489
+#include <uapi/linux/sched/types.h>
Daniel Wagner 099489
 #include <linux/sched/signal.h>
Daniel Wagner 099489
 #include <linux/sched/cputime.h>
Daniel Wagner 099489
+#include <linux/sched/rt.h>
Daniel Wagner 099489
 #include <linux/posix-timers.h>
Daniel Wagner 099489
 #include <linux/errno.h>
Daniel Wagner 099489
 #include <linux/math64.h>
Daniel Wagner 099489
@@ -15,6 +17,7 @@
Daniel Wagner 099489
 #include <linux/workqueue.h>
Daniel Wagner 099489
 #include <linux/compat.h>
Daniel Wagner 099489
 #include <linux/sched/deadline.h>
Daniel Wagner 099489
+#include <linux/smpboot.h>
Daniel Wagner 099489
 
Daniel Wagner 099489
 #include "posix-timers.h"
Daniel Wagner 099489
 
Daniel Wagner 099489
@@ -27,6 +30,9 @@ void posix_cputimers_group_init(struct p
Daniel Wagner 099489
 		pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
Daniel Wagner 099489
 		pct->timers_active = true;
Daniel Wagner 099489
 	}
Daniel Wagner 099489
+#ifdef CONFIG_PREEMPT_RT
Daniel Wagner 099489
+	pct->posix_timer_list = NULL;
Daniel Wagner 099489
+#endif
Daniel Wagner 099489
 }
Daniel Wagner 099489
 
Daniel Wagner 099489
 /*
Daniel Wagner 099489
@@ -804,7 +810,8 @@ static inline void check_dl_overrun(stru
Daniel Wagner 099489
 	}
Daniel Wagner 099489
 }
Daniel Wagner 099489
 
Daniel Wagner 099489
-static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
Daniel Wagner 099489
+static bool check_rlimit(struct task_struct *tsk, u64 time, u64 limit,
Daniel Wagner 099489
+			 int signo, bool rt, bool hard)
Daniel Wagner 099489
 {
Daniel Wagner 099489
 	if (time < limit)
Daniel Wagner 099489
 		return false;
Daniel Wagner 099489
@@ -812,9 +819,9 @@ static bool check_rlimit(u64 time, u64 l
Daniel Wagner 099489
 	if (print_fatal_signals) {
Daniel Wagner 099489
 		pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
Daniel Wagner 099489
 			rt ? "RT" : "CPU", hard ? "hard" : "soft",
Daniel Wagner 099489
-			current->comm, task_pid_nr(current));
Daniel Wagner 099489
+			tsk->comm, task_pid_nr(tsk));
Daniel Wagner 099489
 	}
Daniel Wagner 099489
-	__group_send_sig_info(signo, SEND_SIG_PRIV, current);
Daniel Wagner 099489
+	__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
Daniel Wagner 099489
 	return true;
Daniel Wagner 099489
 }
Daniel Wagner 099489
 
Daniel Wagner 099489
@@ -850,11 +857,11 @@ static void check_thread_timers(struct t
Daniel Wagner 099489
 
Daniel Wagner 099489
 		/* At the hard limit, send SIGKILL. No further action. */
Daniel Wagner 099489
 		if (hard != RLIM_INFINITY &&
Daniel Wagner 099489
-		    check_rlimit(rttime, hard, SIGKILL, true, true))
Daniel Wagner 099489
+		    check_rlimit(tsk, rttime, hard, SIGKILL, true, true))
Daniel Wagner 099489
 			return;
Daniel Wagner 099489
 
Daniel Wagner 099489
 		/* At the soft limit, send a SIGXCPU every second */
Daniel Wagner 099489
-		if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
Daniel Wagner 099489
+		if (check_rlimit(tsk, rttime, soft, SIGXCPU, true, false)) {
Daniel Wagner 099489
 			soft += USEC_PER_SEC;
Daniel Wagner 099489
 			tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
Daniel Wagner 099489
 		}
Daniel Wagner 099489
@@ -949,11 +956,11 @@ static void check_process_timers(struct
Daniel Wagner 099489
 
Daniel Wagner 099489
 		/* At the hard limit, send SIGKILL. No further action. */
Daniel Wagner 099489
 		if (hard != RLIM_INFINITY &&
Daniel Wagner 099489
-		    check_rlimit(ptime, hardns, SIGKILL, false, true))
Daniel Wagner 099489
+		    check_rlimit(tsk, ptime, hardns, SIGKILL, false, true))
Daniel Wagner 099489
 			return;
Daniel Wagner 099489
 
Daniel Wagner 099489
 		/* At the soft limit, send a SIGXCPU every second */
Daniel Wagner 099489
-		if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
Daniel Wagner 099489
+		if (check_rlimit(tsk, ptime, softns, SIGXCPU, false, false)) {
Daniel Wagner 099489
 			sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
Daniel Wagner 099489
 			softns += NSEC_PER_SEC;
Daniel Wagner 099489
 		}
Daniel Wagner 099489
@@ -1110,15 +1117,12 @@ static inline bool fastpath_timer_check(
Daniel Wagner 099489
  * already updated our counts.  We need to check if any timers fire now.
Daniel Wagner 099489
  * Interrupts are disabled.
Daniel Wagner 099489
  */
Daniel Wagner 099489
-void run_posix_cpu_timers(void)
Daniel Wagner 099489
+static void __run_posix_cpu_timers(struct task_struct *tsk)
Daniel Wagner 099489
 {
Daniel Wagner 099489
-	struct task_struct *tsk = current;
Daniel Wagner 099489
 	struct k_itimer *timer, *next;
Daniel Wagner 099489
 	unsigned long flags;
Daniel Wagner 099489
 	LIST_HEAD(firing);
Daniel Wagner 099489
 
Daniel Wagner 099489
-	lockdep_assert_irqs_disabled();
Daniel Wagner 099489
-
Daniel Wagner 099489
 	/*
Daniel Wagner 099489
 	 * The fast path checks that there are no expired thread or thread
Daniel Wagner 099489
 	 * group timers.  If that's so, just return.
Daniel Wagner 099489
@@ -1171,6 +1175,155 @@ void run_posix_cpu_timers(void)
Daniel Wagner 099489
 	}
Daniel Wagner 099489
 }
Daniel Wagner 099489
 
Daniel Wagner 099489
+#ifdef CONFIG_PREEMPT_RT
Daniel Wagner 099489
+#include <linux/kthread.h>
Daniel Wagner 099489
+#include <linux/cpu.h>
Daniel Wagner 099489
+DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
Daniel Wagner 099489
+DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
Daniel Wagner 099489
+DEFINE_PER_CPU(bool, posix_timer_th_active);
Daniel Wagner 099489
+
Daniel Wagner 099489
+static void posix_cpu_kthread_fn(unsigned int cpu)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	struct task_struct *tsk = NULL;
Daniel Wagner 099489
+	struct task_struct *next = NULL;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	BUG_ON(per_cpu(posix_timer_task, cpu) != current);
Daniel Wagner 099489
+
Daniel Wagner 099489
+	/* grab task list */
Daniel Wagner 099489
+	raw_local_irq_disable();
Daniel Wagner 099489
+	tsk = per_cpu(posix_timer_tasklist, cpu);
Daniel Wagner 099489
+	per_cpu(posix_timer_tasklist, cpu) = NULL;
Daniel Wagner 099489
+	raw_local_irq_enable();
Daniel Wagner 099489
+
Daniel Wagner 099489
+	/* its possible the list is empty, just return */
Daniel Wagner 099489
+	if (!tsk)
Daniel Wagner 099489
+		return;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	/* Process task list */
Daniel Wagner 099489
+	while (1) {
Daniel Wagner 099489
+		/* save next */
Daniel Wagner 099489
+		next = tsk->posix_cputimers.posix_timer_list;
Daniel Wagner 099489
+
Daniel Wagner 099489
+		/* run the task timers, clear its ptr and
Daniel Wagner 099489
+		 * unreference it
Daniel Wagner 099489
+		 */
Daniel Wagner 099489
+		__run_posix_cpu_timers(tsk);
Daniel Wagner 099489
+		tsk->posix_cputimers.posix_timer_list = NULL;
Daniel Wagner 099489
+		put_task_struct(tsk);
Daniel Wagner 099489
+
Daniel Wagner 099489
+		/* check if this is the last on the list */
Daniel Wagner 099489
+		if (next == tsk)
Daniel Wagner 099489
+			break;
Daniel Wagner 099489
+		tsk = next;
Daniel Wagner 099489
+	}
Daniel Wagner 099489
+}
Daniel Wagner 099489
+
Daniel Wagner 099489
+static inline int __fastpath_timer_check(struct task_struct *tsk)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	/* tsk == current, ensure it is safe to use ->signal/sighand */
Daniel Wagner 099489
+	if (unlikely(tsk->exit_state))
Daniel Wagner 099489
+		return 0;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	if (!expiry_cache_is_inactive(&tsk->posix_cputimers))
Daniel Wagner 099489
+		return 1;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	if (!expiry_cache_is_inactive(&tsk->signal->posix_cputimers))
Daniel Wagner 099489
+		return 1;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	return 0;
Daniel Wagner 099489
+}
Daniel Wagner 099489
+
Daniel Wagner 099489
+void run_posix_cpu_timers(void)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	unsigned int cpu = smp_processor_id();
Daniel Wagner 099489
+	struct task_struct *tsk = current;
Daniel Wagner 099489
+	struct task_struct *tasklist;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	BUG_ON(!irqs_disabled());
Daniel Wagner 099489
+
Daniel Wagner 099489
+	if (per_cpu(posix_timer_th_active, cpu) != true)
Daniel Wagner 099489
+		return;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	/* get per-cpu references */
Daniel Wagner 099489
+	tasklist = per_cpu(posix_timer_tasklist, cpu);
Daniel Wagner 099489
+
Daniel Wagner 099489
+	/* check to see if we're already queued */
Daniel Wagner 099489
+	if (!tsk->posix_cputimers.posix_timer_list && __fastpath_timer_check(tsk)) {
Daniel Wagner 099489
+		get_task_struct(tsk);
Daniel Wagner 099489
+		if (tasklist) {
Daniel Wagner 099489
+			tsk->posix_cputimers.posix_timer_list = tasklist;
Daniel Wagner 099489
+		} else {
Daniel Wagner 099489
+			/*
Daniel Wagner 099489
+			 * The list is terminated by a self-pointing
Daniel Wagner 099489
+			 * task_struct
Daniel Wagner 099489
+			 */
Daniel Wagner 099489
+			tsk->posix_cputimers.posix_timer_list = tsk;
Daniel Wagner 099489
+		}
Daniel Wagner 099489
+		per_cpu(posix_timer_tasklist, cpu) = tsk;
Daniel Wagner 099489
+
Daniel Wagner 099489
+		wake_up_process(per_cpu(posix_timer_task, cpu));
Daniel Wagner 099489
+	}
Daniel Wagner 099489
+}
Daniel Wagner 099489
+
Daniel Wagner 099489
+static int posix_cpu_kthread_should_run(unsigned int cpu)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	return __this_cpu_read(posix_timer_tasklist) != NULL;
Daniel Wagner 099489
+}
Daniel Wagner 099489
+
Daniel Wagner 099489
+static void posix_cpu_kthread_park(unsigned int cpu)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	this_cpu_write(posix_timer_th_active, false);
Daniel Wagner 099489
+}
Daniel Wagner 099489
+
Daniel Wagner 099489
+static void posix_cpu_kthread_unpark(unsigned int cpu)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	this_cpu_write(posix_timer_th_active, true);
Daniel Wagner 099489
+}
Daniel Wagner 099489
+
Daniel Wagner 099489
+static void posix_cpu_kthread_setup(unsigned int cpu)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	struct sched_param sp;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	sp.sched_priority = MAX_RT_PRIO - 1;
Daniel Wagner 099489
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
Daniel Wagner 099489
+	posix_cpu_kthread_unpark(cpu);
Daniel Wagner 099489
+}
Daniel Wagner 099489
+
Daniel Wagner 099489
+static struct smp_hotplug_thread posix_cpu_thread = {
Daniel Wagner 099489
+	.store			= &posix_timer_task,
Daniel Wagner 099489
+	.thread_should_run	= posix_cpu_kthread_should_run,
Daniel Wagner 099489
+	.thread_fn		= posix_cpu_kthread_fn,
Daniel Wagner 099489
+	.thread_comm		= "posixcputmr/%u",
Daniel Wagner 099489
+	.setup			= posix_cpu_kthread_setup,
Daniel Wagner 099489
+	.park			= posix_cpu_kthread_park,
Daniel Wagner 099489
+	.unpark			= posix_cpu_kthread_unpark,
Daniel Wagner 099489
+};
Daniel Wagner 099489
+
Daniel Wagner 099489
+static int __init posix_cpu_thread_init(void)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	/* Start one for boot CPU. */
Daniel Wagner 099489
+	unsigned long cpu;
Daniel Wagner 099489
+	int ret;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	/* init the per-cpu posix_timer_tasklets */
Daniel Wagner 099489
+	for_each_possible_cpu(cpu)
Daniel Wagner 099489
+		per_cpu(posix_timer_tasklist, cpu) = NULL;
Daniel Wagner 099489
+
Daniel Wagner 099489
+	ret = smpboot_register_percpu_thread(&posix_cpu_thread);
Daniel Wagner 099489
+	WARN_ON(ret);
Daniel Wagner 099489
+
Daniel Wagner 099489
+	return 0;
Daniel Wagner 099489
+}
Daniel Wagner 099489
+early_initcall(posix_cpu_thread_init);
Daniel Wagner 099489
+
Daniel Wagner 099489
+#else /* CONFIG_PREEMPT_RT */
Daniel Wagner 099489
+void run_posix_cpu_timers(void)
Daniel Wagner 099489
+{
Daniel Wagner 099489
+	lockdep_assert_irqs_disabled();
Daniel Wagner 099489
+	__run_posix_cpu_timers(current);
Daniel Wagner 099489
+}
Daniel Wagner 099489
+#endif /* CONFIG_PREEMPT_RT */
Daniel Wagner 099489
+
Daniel Wagner 099489
 /*
Daniel Wagner 099489
  * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
Daniel Wagner 099489
  * The tsk->sighand->siglock must be held by the caller.