Blob Blame History Raw
From: John Stultz <johnstul@us.ibm.com>
Date: Fri, 3 Jul 2009 08:29:58 -0500
Subject: posix-timers: Thread posix-cpu-timers on -rt
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git
Git-commit: 470637c623fc39a4804c3611e480b5ccdf3dca1c
Patch-mainline: Queued in subsystem maintainer repository
References: SLE Realtime Extension

posix-cpu-timer code takes non -rt safe locks in hard irq
context. Move it to a thread.

[ 3.0 fixes from Peter Zijlstra <peterz@infradead.org> ]

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
---
 include/linux/init_task.h      |    7 +
 include/linux/sched.h          |    3 
 kernel/fork.c                  |    3 
 kernel/time/posix-cpu-timers.c |  157 +++++++++++++++++++++++++++++++++++++++--
 4 files changed, 166 insertions(+), 4 deletions(-)

--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -168,6 +168,12 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define INIT_TIMER_LIST		.posix_timer_list = NULL,
+#else
+# define INIT_TIMER_LIST
+#endif
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 # define INIT_VTIME(tsk)						\
 	.vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount),		\
@@ -282,6 +288,7 @@ extern struct cred init_cred;
 	INIT_CPU_TIMERS(tsk)						\
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
 	.timer_slack_ns = 50000, /* 50 usec default slack */		\
+	INIT_TIMER_LIST							\
 	.pids = {							\
 		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),		\
 		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),		\
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -720,6 +720,9 @@ struct task_struct {
 #ifdef CONFIG_POSIX_TIMERS
 	struct task_cputime		cputime_expires;
 	struct list_head		cpu_timers[3];
+#ifdef CONFIG_PREEMPT_RT_BASE
+	struct task_struct		*posix_timer_list;
+#endif
 #endif
 
 	/* Process credentials: */
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1461,6 +1461,9 @@ static void rt_mutex_init_task(struct ta
  */
 static void posix_cpu_timers_init(struct task_struct *tsk)
 {
+#ifdef CONFIG_PREEMPT_RT_BASE
+	tsk->posix_timer_list = NULL;
+#endif
 	tsk->cputime_expires.prof_exp = 0;
 	tsk->cputime_expires.virt_exp = 0;
 	tsk->cputime_expires.sched_exp = 0;
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -2,8 +2,10 @@
  * Implement CPU time clocks for the POSIX clock interface.
  */
 
+#include <uapi/linux/sched/types.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/rt.h>
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <linux/math64.h>
@@ -12,6 +14,7 @@
 #include <trace/events/timer.h>
 #include <linux/tick.h>
 #include <linux/workqueue.h>
+#include <linux/smpboot.h>
 
 /*
  * Called after updating RLIMIT_CPU to run cpu timer and update
@@ -590,7 +593,7 @@ static int posix_cpu_timer_set(struct k_
 	/*
 	 * Disarm any old timer after extracting its expiry time.
 	 */
-	lockdep_assert_irqs_disabled();
+	lockdep_assert_irqs_disabled_nonrt();
 
 	ret = 0;
 	old_incr = timer->it.cpu.incr;
@@ -1027,7 +1030,7 @@ void posix_cpu_timer_schedule(struct k_i
 	/*
 	 * Now re-arm for the new expiry time.
 	 */
-	lockdep_assert_irqs_disabled();
+	lockdep_assert_irqs_disabled_nonrt();
 	arm_timer(timer);
 	unlock_task_sighand(p, &flags);
 
@@ -1116,13 +1119,13 @@ static inline int fastpath_timer_check(s
  * already updated our counts.  We need to check if any timers fire now.
  * Interrupts are disabled.
  */
-void run_posix_cpu_timers(struct task_struct *tsk)
+static void __run_posix_cpu_timers(struct task_struct *tsk)
 {
 	LIST_HEAD(firing);
 	struct k_itimer *timer, *next;
 	unsigned long flags;
 
-	lockdep_assert_irqs_disabled();
+	lockdep_assert_irqs_disabled_nonrt();
 
 	/*
 	 * The fast path checks that there are no expired thread or thread
@@ -1176,6 +1179,152 @@ void run_posix_cpu_timers(struct task_st
 	}
 }
 
+#ifdef CONFIG_PREEMPT_RT_BASE
+#include <linux/kthread.h>
+#include <linux/cpu.h>
+DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
+DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
+DEFINE_PER_CPU(bool, posix_timer_th_active);
+
+static void posix_cpu_kthread_fn(unsigned int cpu)
+{
+	struct task_struct *tsk = NULL;
+	struct task_struct *next = NULL;
+
+	BUG_ON(per_cpu(posix_timer_task, cpu) != current);
+
+	/* grab task list */
+	raw_local_irq_disable();
+	tsk = per_cpu(posix_timer_tasklist, cpu);
+	per_cpu(posix_timer_tasklist, cpu) = NULL;
+	raw_local_irq_enable();
+
+	/* its possible the list is empty, just return */
+	if (!tsk)
+		return;
+
+	/* Process task list */
+	while (1) {
+		/* save next */
+		next = tsk->posix_timer_list;
+
+		/* run the task timers, clear its ptr and
+		 * unreference it
+		 */
+		__run_posix_cpu_timers(tsk);
+		tsk->posix_timer_list = NULL;
+		put_task_struct(tsk);
+
+		/* check if this is the last on the list */
+		if (next == tsk)
+			break;
+		tsk = next;
+	}
+}
+
+static inline int __fastpath_timer_check(struct task_struct *tsk)
+{
+	/* tsk == current, ensure it is safe to use ->signal/sighand */
+	if (unlikely(tsk->exit_state))
+		return 0;
+
+	if (!task_cputime_zero(&tsk->cputime_expires))
+			return 1;
+
+	if (!task_cputime_zero(&tsk->signal->cputime_expires))
+			return 1;
+
+	return 0;
+}
+
+void run_posix_cpu_timers(struct task_struct *tsk)
+{
+	unsigned int cpu = smp_processor_id();
+	struct task_struct *tasklist;
+
+	BUG_ON(!irqs_disabled());
+
+	if (per_cpu(posix_timer_th_active, cpu) != true)
+		return;
+
+	/* get per-cpu references */
+	tasklist = per_cpu(posix_timer_tasklist, cpu);
+
+	/* check to see if we're already queued */
+	if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
+		get_task_struct(tsk);
+		if (tasklist) {
+			tsk->posix_timer_list = tasklist;
+		} else {
+			/*
+			 * The list is terminated by a self-pointing
+			 * task_struct
+			 */
+			tsk->posix_timer_list = tsk;
+		}
+		per_cpu(posix_timer_tasklist, cpu) = tsk;
+
+		wake_up_process(per_cpu(posix_timer_task, cpu));
+	}
+}
+
+static int posix_cpu_kthread_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(posix_timer_tasklist) != NULL;
+}
+
+static void posix_cpu_kthread_park(unsigned int cpu)
+{
+	this_cpu_write(posix_timer_th_active, false);
+}
+
+static void posix_cpu_kthread_unpark(unsigned int cpu)
+{
+	this_cpu_write(posix_timer_th_active, true);
+}
+
+static void posix_cpu_kthread_setup(unsigned int cpu)
+{
+	struct sched_param sp;
+
+	sp.sched_priority = MAX_RT_PRIO - 1;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+	posix_cpu_kthread_unpark(cpu);
+}
+
+static struct smp_hotplug_thread posix_cpu_thread = {
+	.store			= &posix_timer_task,
+	.thread_should_run	= posix_cpu_kthread_should_run,
+	.thread_fn		= posix_cpu_kthread_fn,
+	.thread_comm		= "posixcputmr/%u",
+	.setup			= posix_cpu_kthread_setup,
+	.park			= posix_cpu_kthread_park,
+	.unpark			= posix_cpu_kthread_unpark,
+};
+
+static int __init posix_cpu_thread_init(void)
+{
+	/* Start one for boot CPU. */
+	unsigned long cpu;
+	int ret;
+
+	/* init the per-cpu posix_timer_tasklets */
+	for_each_possible_cpu(cpu)
+		per_cpu(posix_timer_tasklist, cpu) = NULL;
+
+	ret = smpboot_register_percpu_thread(&posix_cpu_thread);
+	WARN_ON(ret);
+
+	return 0;
+}
+early_initcall(posix_cpu_thread_init);
+#else /* CONFIG_PREEMPT_RT_BASE */
+void run_posix_cpu_timers(struct task_struct *tsk)
+{
+	__run_posix_cpu_timers(tsk);
+}
+#endif /* CONFIG_PREEMPT_RT_BASE */
+
 /*
  * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  * The tsk->sighand->siglock must be held by the caller.