Blob Blame History Raw
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 5 Sep 2017 16:22:16 +0200
Subject: Add Anna-Maria's "Provide softirq context hrtimers" + RT fixups
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git
Git-commit: 3e71e14ef8bfec5651a13d6eeaec76b57e9eaf9a
Patch-mainline: Queued in subsystem maintainer repository
References: SLE12 Realtime Extension

The "Provide softirq context hrtimers" includes the following patches:
  0001-hrtimer-Use-predefined-function-for-updating-next_ti.patch
  0002-hrtimer-Correct-blantanly-wrong-comment.patch
  0003-hrtimer-Fix-kerneldoc-for-struct-hrtimer_cpu_base.patch
  0004-hrtimer-Cleanup-clock-argument-in-schedule_hrtimeout.patch
  0005-hrtimer-Switch-for-loop-to-_ffs-evaluation.patch
  0006-hrtimer-Store-running-timer-in-hrtimer_clock_base.patch
  0007-hrtimer-Reduce-conditional-code-hres_active.patch
  0008-hrtimer-Reduce-conditional-code-expires_next-next_ti.patch
  0009-hrtimer-Reduce-conditional-code-hrtimer_reprogram.patch
  0010-hrtimer-Make-handling-of-hrtimer-reprogramming-and-e.patch
  0011-hrtimer-Allow-remote-hrtimer-enqueue-with-expires_ne.patch
  0012-hrtimer-Simplify-hrtimer_reprogram-call.patch
  0013-hrtimer-Split-out-code-from-hrtimer_start_range_ns-f.patch
  0014-hrtimer-Split-out-code-from-__hrtimer_get_next_event.patch
  0015-hrtimer-Add-clock-bases-for-soft-irq-context.patch
  0016-hrtimer-Allow-function-reuse-for-softirq-based-hrtim.patch
  0017-hrtimer-Implementation-of-softirq-hrtimer-handling.patch
  0018-hrtimer-Enable-soft-and-hard-hrtimer.patch
  0019-can-bcm-Replace-hrtimer_tasklet-with-softirq-based-h.patch
  0020-mac80211_hwsim-Replace-hrtimer-tasklet-with-softirq-.patch
  0021-xfrm-Replace-hrtimer-tasklet-with-softirq-hrtimer.patch
  0022-softirq-Remove-tasklet_hrtimer.patch

The interation of those resulted in the removal of the old "irqsafe"
member and the custom softirq infrastrucure which resulted in the
removal of the following patches:
  KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch
  kernel-perf-mark-perf_cpu_context-s-timer-as-irqsafe.patch
  perf-make-swevent-hrtimer-irqsafe.patch
  sched-deadline-dl_task_timer-has-to-be-irqsafe.patch
  tick-broadcast--Make-hrtimer-irqsafe.patch
  hrtimer-enfore-64byte-alignment.patch
  hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
  kernel-hrtimer-don-t-wakeup-a-process-while-holding-.patch
  kernel-hrtimer-hotplug-don-t-wake-ktimersoftd-while-.patch
  kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch
  timer-hrtimer-check-properly-for-a-running-timer.patch

The "old" functionality where all timers were most hrtimers are moved
into softirq context is preserved by
  hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch
  hrtimer-by-timers-by-default-into-the-softirq-context.patch

and updating
  hrtimers-prepare-full-preemption.patch

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
---
 arch/x86/kvm/lapic.c                  |    3 
 block/blk-mq.c                        |    3 
 drivers/net/wireless/mac80211_hwsim.c |   44 --
 include/linux/hrtimer.h               |  116 +++--
 include/linux/interrupt.h             |   25 -
 include/linux/wait.h                  |    4 
 include/net/xfrm.h                    |    2 
 kernel/events/core.c                  |    6 
 kernel/futex.c                        |   19 
 kernel/sched/core.c                   |    3 
 kernel/sched/deadline.c               |    3 
 kernel/sched/rt.c                     |    5 
 kernel/softirq.c                      |   51 --
 kernel/time/hrtimer.c                 |  740 ++++++++++++++++++----------------
 kernel/time/tick-broadcast-hrtimer.c  |    3 
 kernel/time/tick-sched.c              |    3 
 kernel/watchdog.c                     |    3 
 net/can/bcm.c                         |  150 ++----
 net/core/pktgen.c                     |    4 
 net/xfrm/xfrm_state.c                 |   29 -
 20 files changed, 573 insertions(+), 643 deletions(-)

--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2184,10 +2184,9 @@ int kvm_create_lapic(struct kvm_vcpu *vc
 	}
 	apic->vcpu = vcpu;
 
-	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
+	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC_HARD,
 		     HRTIMER_MODE_ABS_PINNED);
 	apic->lapic_timer.timer.function = apic_timer_fn;
-	apic->lapic_timer.timer.irqsafe = 1;
 
 	/*
 	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3230,10 +3230,9 @@ static bool blk_mq_poll_hybrid_sleep(str
 	kt = nsecs;
 
 	mode = HRTIMER_MODE_REL;
-	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode, current);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs, current);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -545,7 +545,7 @@ struct mac80211_hwsim_data {
 	unsigned int rx_filter;
 	bool started, idle, scanning;
 	struct mutex mutex;
-	struct tasklet_hrtimer beacon_timer;
+	struct hrtimer beacon_timer;
 	enum ps_mode {
 		PS_DISABLED, PS_ENABLED, PS_AUTO_POLL, PS_MANUAL_POLL
 	} ps;
@@ -1479,7 +1479,7 @@ static void mac80211_hwsim_stop(struct i
 {
 	struct mac80211_hwsim_data *data = hw->priv;
 	data->started = false;
-	tasklet_hrtimer_cancel(&data->beacon_timer);
+	hrtimer_cancel(&data->beacon_timer);
 	wiphy_dbg(hw->wiphy, "%s\n", __func__);
 }
 
@@ -1602,14 +1602,12 @@ static enum hrtimer_restart
 mac80211_hwsim_beacon(struct hrtimer *timer)
 {
 	struct mac80211_hwsim_data *data =
-		container_of(timer, struct mac80211_hwsim_data,
-			     beacon_timer.timer);
+		container_of(timer, struct mac80211_hwsim_data, beacon_timer);
 	struct ieee80211_hw *hw = data->hw;
 	u64 bcn_int = data->beacon_int;
-	ktime_t next_bcn;
 
 	if (!data->started)
-		goto out;
+		return HRTIMER_NORESTART;
 
 	ieee80211_iterate_active_interfaces_atomic(
 		hw, IEEE80211_IFACE_ITER_NORMAL,
@@ -1621,11 +1619,9 @@ mac80211_hwsim_beacon(struct hrtimer *ti
 		data->bcn_delta = 0;
 	}
 
-	next_bcn = ktime_add(hrtimer_get_expires(timer),
-			     ns_to_ktime(bcn_int * 1000));
-	tasklet_hrtimer_start(&data->beacon_timer, next_bcn, HRTIMER_MODE_ABS);
-out:
-	return HRTIMER_NORESTART;
+	hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer),
+			ns_to_ktime(bcn_int * NSEC_PER_USEC));
+	return HRTIMER_RESTART;
 }
 
 static const char * const hwsim_chanwidths[] = {
@@ -1699,15 +1695,15 @@ static int mac80211_hwsim_config(struct
 	mutex_unlock(&data->mutex);
 
 	if (!data->started || !data->beacon_int)
-		tasklet_hrtimer_cancel(&data->beacon_timer);
-	else if (!hrtimer_is_queued(&data->beacon_timer.timer)) {
+		hrtimer_cancel(&data->beacon_timer);
+	else if (!hrtimer_is_queued(&data->beacon_timer)) {
 		u64 tsf = mac80211_hwsim_get_tsf(hw, NULL);
 		u32 bcn_int = data->beacon_int;
 		u64 until_tbtt = bcn_int - do_div(tsf, bcn_int);
 
-		tasklet_hrtimer_start(&data->beacon_timer,
-				      ns_to_ktime(until_tbtt * 1000),
-				      HRTIMER_MODE_REL);
+		hrtimer_start(&data->beacon_timer,
+			      ns_to_ktime(until_tbtt * 1000),
+			      HRTIMER_MODE_REL);
 	}
 
 	return 0;
@@ -1770,7 +1766,7 @@ static void mac80211_hwsim_bss_info_chan
 			  info->enable_beacon, info->beacon_int);
 		vp->bcn_en = info->enable_beacon;
 		if (data->started &&
-		    !hrtimer_is_queued(&data->beacon_timer.timer) &&
+		    !hrtimer_is_queued(&data->beacon_timer) &&
 		    info->enable_beacon) {
 			u64 tsf, until_tbtt;
 			u32 bcn_int;
@@ -1778,9 +1774,9 @@ static void mac80211_hwsim_bss_info_chan
 			tsf = mac80211_hwsim_get_tsf(hw, vif);
 			bcn_int = data->beacon_int;
 			until_tbtt = bcn_int - do_div(tsf, bcn_int);
-			tasklet_hrtimer_start(&data->beacon_timer,
-					      ns_to_ktime(until_tbtt * 1000),
-					      HRTIMER_MODE_REL);
+			hrtimer_start(&data->beacon_timer,
+				      ns_to_ktime(until_tbtt * 1000),
+				      HRTIMER_MODE_REL);
 		} else if (!info->enable_beacon) {
 			unsigned int count = 0;
 			ieee80211_iterate_active_interfaces_atomic(
@@ -1789,7 +1785,7 @@ static void mac80211_hwsim_bss_info_chan
 			wiphy_dbg(hw->wiphy, "  beaconing vifs remaining: %u",
 				  count);
 			if (count == 0) {
-				tasklet_hrtimer_cancel(&data->beacon_timer);
+				hrtimer_cancel(&data->beacon_timer);
 				data->beacon_int = 0;
 			}
 		}
@@ -2878,9 +2874,9 @@ static int mac80211_hwsim_new_radio(stru
 
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
 
-	tasklet_hrtimer_init(&data->beacon_timer,
-			     mac80211_hwsim_beacon,
-			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&data->beacon_timer, CLOCK_MONOTONIC_SOFT,
+		     HRTIMER_MODE_ABS);
+	data->beacon_timer.function = mac80211_hwsim_beacon;
 
 	err = ieee80211_register_hw(hw);
 	if (err < 0) {
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -24,6 +24,23 @@
 #include <linux/timerqueue.h>
 #include <linux/wait.h>
 
+/*
+ * Clock ids for hrtimers which expire in softirq context. These clock ids
+ * are kernel internal and never exported to user space.
+ */
+#define HRTIMER_BASE_SOFT_MASK	MAX_CLOCKS
+#define HRTIMER_BASE_HARD_MASK	(MAX_CLOCKS << 1)
+
+#define CLOCK_REALTIME_SOFT	(CLOCK_REALTIME	 | HRTIMER_BASE_SOFT_MASK)
+#define CLOCK_MONOTONIC_SOFT	(CLOCK_MONOTONIC | HRTIMER_BASE_SOFT_MASK)
+#define CLOCK_BOOTTIME_SOFT	(CLOCK_BOOTTIME	 | HRTIMER_BASE_SOFT_MASK)
+#define CLOCK_TAI_SOFT		(CLOCK_TAI	 | HRTIMER_BASE_SOFT_MASK)
+
+#define CLOCK_REALTIME_HARD	(CLOCK_REALTIME	 | HRTIMER_BASE_HARD_MASK)
+#define CLOCK_MONOTONIC_HARD	(CLOCK_MONOTONIC | HRTIMER_BASE_HARD_MASK)
+#define CLOCK_BOOTTIME_HARD	(CLOCK_BOOTTIME	 | HRTIMER_BASE_HARD_MASK)
+#define CLOCK_TAI_HARD		(CLOCK_TAI	 | HRTIMER_BASE_HARD_MASK)
+
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
 
@@ -86,8 +103,6 @@ enum hrtimer_restart {
  *		was armed.
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
- * @cb_entry:	list entry to defer timers from hardirq context
- * @irqsafe:	timer can run in hardirq context
  * @state:	state information (See bit values above)
  * @is_rel:	Set if the timer was armed relative
  *
@@ -98,8 +113,6 @@ struct hrtimer {
 	ktime_t				_softexpires;
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
-	struct list_head		cb_entry;
-	int				irqsafe;
 	u8				state;
 	u8				is_rel;
 };
@@ -116,7 +129,11 @@ struct hrtimer_sleeper {
 	struct task_struct *task;
 };
 
-# define HRTIMER_CLOCK_BASE_ALIGN	64
+#ifdef CONFIG_64BIT
+# define __hrtimer_clock_base_align	____cacheline_aligned
+#else
+# define __hrtimer_clock_base_align
+#endif
 
 /**
  * struct hrtimer_clock_base - the timer base for a specific clock
@@ -124,43 +141,46 @@ struct hrtimer_sleeper {
  * @index:		clock type index for per_cpu support when moving a
  *			timer to a base on another cpu.
  * @clockid:		clock id for per_cpu support
+ * @seq:		seqcount around __run_hrtimer
+ * @running:		pointer to the currently running hrtimer
  * @active:		red black tree root node for the active timers
- * @expired:		list head for deferred timers.
  * @get_time:		function to retrieve the current time of the clock
  * @offset:		offset of this clock to the monotonic base
  */
 struct hrtimer_clock_base {
 	struct hrtimer_cpu_base	*cpu_base;
-	int			index;
+	unsigned int		index;
 	clockid_t		clockid;
+	seqcount_t		seq;
+	struct hrtimer		*running;
 	struct timerqueue_head	active;
-	struct list_head	expired;
 	ktime_t			(*get_time)(void);
 	ktime_t			offset;
-} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
+} __hrtimer_clock_base_align;
 
 enum  hrtimer_base_type {
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_BOOTTIME,
 	HRTIMER_BASE_TAI,
+	HRTIMER_BASE_MONOTONIC_SOFT,
+	HRTIMER_BASE_REALTIME_SOFT,
+	HRTIMER_BASE_BOOTTIME_SOFT,
+	HRTIMER_BASE_TAI_SOFT,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
-/*
+/**
  * struct hrtimer_cpu_base - the per cpu clock bases
  * @lock:		lock protecting the base and associated clock bases
  *			and timers
- * @seq:		seqcount around __run_hrtimer
- * @running:		pointer to the currently running hrtimer
  * @cpu:		cpu number
  * @active_bases:	Bitfield to mark bases with active timers
  * @clock_was_set_seq:	Sequence counter of clock was set events
  * @migration_enabled:	The migration of hrtimers to other cpus is enabled
  * @nohz_active:	The nohz functionality is enabled
- * @expires_next:	absolute time of the next event which was scheduled
- *			via clock_set_next_event()
- * @next_timer:		Pointer to the first expiring timer
+ * @softirq_activated:	displays, if the softirq is raised - update of softirq
+ *			related settings is not required then.
  * @in_hrtirq:		hrtimer_interrupt() is currently executing
  * @hres_active:	State of high resolution mode
  * @hang_detected:	The last hrtimer interrupt detected a hang
@@ -168,6 +188,11 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @expires_next:	absolute time of the next event, is required for remote
+ *			hrtimer enqueue; it is the total first expiry time (hard
+ *			and soft hrtimer are taken into account)
+ * @next_timer:		Pointer to the first expiring timer
+ * @softirq_expires_next: Time to check, if soft queues needs also to be expired
  * @clock_base:		array of clock bases for this cpu
  *
  * Note: next_timer is just an optimization for __remove_hrtimer().
@@ -176,25 +201,24 @@ enum  hrtimer_base_type {
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
-	seqcount_t			seq;
-	struct hrtimer			*running;
-	struct hrtimer			*running_soft;
 	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set_seq;
 	bool				migration_enabled;
 	bool				nohz_active;
-#ifdef CONFIG_HIGH_RES_TIMERS
-	unsigned int			in_hrtirq	: 1,
-					hres_active	: 1,
+	bool				softirq_activated;
+	unsigned int			hres_active	: 1,
+					in_hrtirq	: 1,
 					hang_detected	: 1;
-	ktime_t				expires_next;
-	struct hrtimer			*next_timer;
+#ifdef CONFIG_HIGH_RES_TIMERS
 	unsigned int			nr_events;
 	unsigned int			nr_retries;
 	unsigned int			nr_hangs;
 	unsigned int			max_hang_time;
 #endif
+	ktime_t				expires_next;
+	struct hrtimer			*next_timer;
+	ktime_t				softirq_expires_next;
 #ifdef CONFIG_PREEMPT_RT_BASE
 	wait_queue_head_t		wait;
 #endif
@@ -203,8 +227,6 @@ struct hrtimer_cpu_base {
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
 {
-	BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN);
-
 	timer->node.expires = time;
 	timer->_softexpires = time;
 }
@@ -273,16 +295,16 @@ static inline ktime_t hrtimer_cb_get_tim
 	return timer->base->get_time();
 }
 
-#ifdef CONFIG_HIGH_RES_TIMERS
-struct clock_event_device;
-
-extern void hrtimer_interrupt(struct clock_event_device *dev);
-
 static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 {
 	return timer->base->cpu_base->hres_active;
 }
 
+#ifdef CONFIG_HIGH_RES_TIMERS
+struct clock_event_device;
+
+extern void hrtimer_interrupt(struct clock_event_device *dev);
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
@@ -305,11 +327,6 @@ extern unsigned int hrtimer_resolution;
 
 #define hrtimer_resolution	(unsigned int)LOW_RES_NSEC
 
-static inline int hrtimer_is_hres_active(struct hrtimer *timer)
-{
-	return 0;
-}
-
 static inline void clock_was_set_delayed(void) { }
 
 #endif
@@ -351,10 +368,17 @@ DECLARE_PER_CPU(struct tick_device, tick
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+				 enum hrtimer_mode mode,
+				 struct task_struct *task);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+					  clockid_t clock_id,
+					  enum hrtimer_mode mode,
+					  struct task_struct *task);
 
 extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
 #else
@@ -364,6 +388,15 @@ static inline void hrtimer_init_on_stack
 {
 	hrtimer_init(timer, which_clock, mode);
 }
+
+static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+					    clockid_t clock_id,
+					    enum hrtimer_mode mode,
+					    struct task_struct *task)
+{
+	hrtimer_init_sleeper(sl, clock_id, mode, task);
+}
+
 static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
@@ -442,13 +475,7 @@ static inline bool hrtimer_is_queued(str
  */
 static inline int hrtimer_callback_running(const struct hrtimer *timer)
 {
-	if (timer->base->cpu_base->running == timer)
-		return 1;
-#ifdef CONFIG_PREEMPT_RT_BASE
-	if (timer->base->cpu_base->running_soft == timer)
-		return 1;
-#endif
-	return 0;
+	return timer->base->running == timer;
 }
 
 /* Forward a hrtimer so it expires after now: */
@@ -484,15 +511,12 @@ extern long hrtimer_nanosleep(struct tim
 			      const clockid_t clockid);
 extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
-				 struct task_struct *tsk);
-
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 						const enum hrtimer_mode mode);
 extern int schedule_hrtimeout_range_clock(ktime_t *expires,
 					  u64 delta,
 					  const enum hrtimer_mode mode,
-					  int clock);
+					  clockid_t clock_id);
 extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
 
 /* Soft interrupt function to run the hrtimer queues: */
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -653,31 +653,6 @@ extern void tasklet_kill_immediate(struc
 extern void tasklet_init(struct tasklet_struct *t,
 			 void (*func)(unsigned long), unsigned long data);
 
-struct tasklet_hrtimer {
-	struct hrtimer		timer;
-	struct tasklet_struct	tasklet;
-	enum hrtimer_restart	(*function)(struct hrtimer *);
-};
-
-extern void
-tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
-		     enum hrtimer_restart (*function)(struct hrtimer *),
-		     clockid_t which_clock, enum hrtimer_mode mode);
-
-static inline
-void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
-			   const enum hrtimer_mode mode)
-{
-	hrtimer_start(&ttimer->timer, time, mode);
-}
-
-static inline
-void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
-{
-	hrtimer_cancel(&ttimer->timer);
-	tasklet_kill(&ttimer->tasklet);
-}
-
 #ifdef CONFIG_PREEMPT_RT_FULL
 extern void softirq_early_init(void);
 #else
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -486,8 +486,8 @@ do {										\
 	int __ret = 0;								\
 	struct hrtimer_sleeper __t;						\
 										\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t, current);					\
+	hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,			\
+				      HRTIMER_MODE_REL, current);		\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -214,7 +214,7 @@ struct xfrm_state {
 	struct xfrm_stats	stats;
 
 	struct xfrm_lifetime_cur curlft;
-	struct tasklet_hrtimer	mtimer;
+	struct hrtimer		mtimer;
 
 	struct xfrm_state_offload xso;
 
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1084,9 +1084,8 @@ static void __perf_mux_hrtimer_init(stru
 	cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
 	raw_spin_lock_init(&cpuctx->hrtimer_lock);
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(timer, CLOCK_MONOTONIC_HARD, HRTIMER_MODE_ABS_PINNED);
 	timer->function = perf_mux_hrtimer_handler;
-	timer->irqsafe = 1;
 }
 
 static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
@@ -9122,9 +9121,8 @@ static void perf_swevent_init_hrtimer(st
 	if (!is_sampling_event(event))
 		return;
 
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC_HARD, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swevent_hrtimer;
-	hwc->hrtimer.irqsafe = 1;
 
 	/*
 	 * Since hrtimers have a fixed rate, we can do a static freq->period
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2677,10 +2677,9 @@ static int futex_wait(u32 __user *uaddr,
 	if (abs_time) {
 		to = &timeout;
 
-		hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
-				      CLOCK_REALTIME : CLOCK_MONOTONIC,
-				      HRTIMER_MODE_ABS);
-		hrtimer_init_sleeper(to, current);
+		hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ?
+					      CLOCK_REALTIME : CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS, current);
 		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
 					     current->timer_slack_ns);
 	}
@@ -2776,9 +2775,8 @@ static int futex_lock_pi(u32 __user *uad
 
 	if (time) {
 		to = &timeout;
-		hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
-				      HRTIMER_MODE_ABS);
-		hrtimer_init_sleeper(to, current);
+		hrtimer_init_sleeper_on_stack(to, CLOCK_REALTIME,
+					      HRTIMER_MODE_ABS, current);
 		hrtimer_set_expires(&to->timer, *time);
 	}
 
@@ -3196,10 +3194,9 @@ static int futex_wait_requeue_pi(u32 __u
 
 	if (abs_time) {
 		to = &timeout;
-		hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
-				      CLOCK_REALTIME : CLOCK_MONOTONIC,
-				      HRTIMER_MODE_ABS);
-		hrtimer_init_sleeper(to, current);
+		hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ?
+					      CLOCK_REALTIME : CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS, current);
 		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
 					     current->timer_slack_ns);
 	}
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -344,9 +344,8 @@ static void init_rq_hrtick(struct rq *rq
 	rq->hrtick_csd.info = rq;
 #endif
 
-	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC_HARD, HRTIMER_MODE_REL);
 	rq->hrtick_timer.function = hrtick;
-	rq->hrtick_timer.irqsafe = 1;
 }
 #else	/* CONFIG_SCHED_HRTICK */
 static inline void hrtick_clear(struct rq *rq)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -689,9 +689,8 @@ void init_dl_task_timer(struct sched_dl_
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(timer, CLOCK_MONOTONIC_HARD, HRTIMER_MODE_REL);
 	timer->function = dl_task_timer;
-	timer->irqsafe = 1;
 }
 
 /*
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -46,9 +46,8 @@ void init_rt_bandwidth(struct rt_bandwid
 
 	raw_spin_lock_init(&rt_b->rt_runtime_lock);
 
-	hrtimer_init(&rt_b->rt_period_timer,
-			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	rt_b->rt_period_timer.irqsafe = 1;
+	hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC_HARD,
+		     HRTIMER_MODE_REL);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -1095,57 +1095,6 @@ void tasklet_kill(struct tasklet_struct
 }
 EXPORT_SYMBOL(tasklet_kill);
 
-/*
- * tasklet_hrtimer
- */
-
-/*
- * The trampoline is called when the hrtimer expires. It schedules a tasklet
- * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
- * hrtimer callback, but from softirq context.
- */
-static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
-{
-	struct tasklet_hrtimer *ttimer =
-		container_of(timer, struct tasklet_hrtimer, timer);
-
-	tasklet_hi_schedule(&ttimer->tasklet);
-	return HRTIMER_NORESTART;
-}
-
-/*
- * Helper function which calls the hrtimer callback from
- * tasklet/softirq context
- */
-static void __tasklet_hrtimer_trampoline(unsigned long data)
-{
-	struct tasklet_hrtimer *ttimer = (void *)data;
-	enum hrtimer_restart restart;
-
-	restart = ttimer->function(&ttimer->timer);
-	if (restart != HRTIMER_NORESTART)
-		hrtimer_restart(&ttimer->timer);
-}
-
-/**
- * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
- * @ttimer:	 tasklet_hrtimer which is initialized
- * @function:	 hrtimer callback function which gets called from softirq context
- * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
- * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
- */
-void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
-			  enum hrtimer_restart (*function)(struct hrtimer *),
-			  clockid_t which_clock, enum hrtimer_mode mode)
-{
-	hrtimer_init(&ttimer->timer, which_clock, mode);
-	ttimer->timer.function = __hrtimer_tasklet_trampoline;
-	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
-		     (unsigned long)ttimer);
-	ttimer->function = function;
-}
-EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
-
 void __init softirq_init(void)
 {
 	int cpu;
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -59,6 +59,14 @@
 #include "tick-internal.h"
 
 /*
+ * Masks for selecting the soft and hard context timers from
+ * cpu_base->active
+ */
+#define MASK_SHIFT		(HRTIMER_BASE_MONOTONIC_SOFT)
+#define HRTIMER_ACTIVE_HARD	((1U << MASK_SHIFT) - 1)
+#define HRTIMER_ACTIVE_SOFT	(HRTIMER_ACTIVE_HARD << MASK_SHIFT)
+
+/*
  * The timer bases:
  *
  * There are more clockids than hrtimer bases. Thus, we index
@@ -69,7 +77,6 @@
 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 {
 	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
-	.seq = SEQCNT_ZERO(hrtimer_bases.seq),
 	.clock_base =
 	{
 		{
@@ -92,17 +99,55 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base,
 			.clockid = CLOCK_TAI,
 			.get_time = &ktime_get_clocktai,
 		},
+		{
+			.index = HRTIMER_BASE_MONOTONIC_SOFT,
+			.clockid = CLOCK_MONOTONIC_SOFT,
+			.get_time = &ktime_get,
+		},
+		{
+			.index = HRTIMER_BASE_REALTIME_SOFT,
+			.clockid = CLOCK_REALTIME_SOFT,
+			.get_time = &ktime_get_real,
+		},
+		{
+			.index = HRTIMER_BASE_BOOTTIME_SOFT,
+			.clockid = CLOCK_BOOTTIME_SOFT,
+			.get_time = &ktime_get_boottime,
+		},
+		{
+			.index = HRTIMER_BASE_TAI_SOFT,
+			.clockid = CLOCK_TAI_SOFT,
+			.get_time = &ktime_get_clocktai,
+		},
 	}
 };
 
-static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
+#define MAX_CLOCKS_HRT		(MAX_CLOCKS * 3)
+
+static const int hrtimer_clock_to_base_table[MAX_CLOCKS_HRT] = {
 	/* Make sure we catch unsupported clockids */
-	[0 ... MAX_CLOCKS - 1]	= HRTIMER_MAX_CLOCK_BASES,
+	[0 ... MAX_CLOCKS_HRT - 1]	= HRTIMER_MAX_CLOCK_BASES,
 
-	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
-	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
-	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
-	[CLOCK_TAI]		= HRTIMER_BASE_TAI,
+#ifdef CONFIG_PREEMPT_RT_FULL
+	[CLOCK_REALTIME]		= HRTIMER_BASE_REALTIME_SOFT,
+	[CLOCK_MONOTONIC]		= HRTIMER_BASE_MONOTONIC_SOFT,
+	[CLOCK_BOOTTIME]		= HRTIMER_BASE_BOOTTIME_SOFT,
+	[CLOCK_TAI]			= HRTIMER_BASE_TAI_SOFT,
+#else
+	[CLOCK_REALTIME]		= HRTIMER_BASE_REALTIME,
+	[CLOCK_MONOTONIC]		= HRTIMER_BASE_MONOTONIC,
+	[CLOCK_BOOTTIME]		= HRTIMER_BASE_BOOTTIME,
+	[CLOCK_TAI]			= HRTIMER_BASE_TAI,
+#endif
+	[CLOCK_REALTIME_SOFT]		= HRTIMER_BASE_REALTIME_SOFT,
+	[CLOCK_MONOTONIC_SOFT]		= HRTIMER_BASE_MONOTONIC_SOFT,
+	[CLOCK_BOOTTIME_SOFT]		= HRTIMER_BASE_BOOTTIME_SOFT,
+	[CLOCK_TAI_SOFT]		= HRTIMER_BASE_TAI_SOFT,
+
+	[CLOCK_REALTIME_HARD]		= HRTIMER_BASE_REALTIME,
+	[CLOCK_MONOTONIC_HARD]		= HRTIMER_BASE_MONOTONIC,
+	[CLOCK_BOOTTIME_HARD]		= HRTIMER_BASE_BOOTTIME,
+	[CLOCK_TAI_HARD]		= HRTIMER_BASE_TAI,
 };
 
 /*
@@ -117,7 +162,6 @@ static const int hrtimer_clock_to_base_t
  * timer->base->cpu_base
  */
 static struct hrtimer_cpu_base migration_cpu_base = {
-	.seq = SEQCNT_ZERO(migration_cpu_base),
 	.clock_base = { { .cpu_base = &migration_cpu_base, }, },
 };
 
@@ -155,26 +199,21 @@ struct hrtimer_clock_base *lock_hrtimer_
 }
 
 /*
- * With HIGHRES=y we do not migrate the timer when it is expiring
- * before the next event on the target cpu because we cannot reprogram
- * the target cpu hardware and we would cause it to fire late.
+ * We do not migrate the timer when it is expiring before the next
+ * event on the target cpu. When high resolution is enabled, we cannot
+ * reprogram the target cpu hardware and we would cause it to fire
+ * late. To keep it simple, we handle the high resolution enabled and
+ * disabled case similar.
  *
  * Called with cpu_base->lock of target cpu held.
  */
 static int
 hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
 {
-#ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t expires;
 
-	if (!new_base->cpu_base->hres_active)
-		return 0;
-
 	expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
-	return expires <= new_base->cpu_base->expires_next;
-#else
-	return 0;
-#endif
+	return expires < new_base->cpu_base->expires_next;
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
@@ -453,28 +492,26 @@ static inline void debug_deactivate(stru
 	trace_hrtimer_cancel(timer);
 }
 
-#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
 static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
 					     struct hrtimer *timer)
 {
-#ifdef CONFIG_HIGH_RES_TIMERS
 	cpu_base->next_timer = timer;
-#endif
 }
 
-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
+static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
+					 unsigned int active,
+					 ktime_t expires_next)
 {
-	struct hrtimer_clock_base *base = cpu_base->clock_base;
-	unsigned int active = cpu_base->active_bases;
-	ktime_t expires, expires_next = KTIME_MAX;
+	ktime_t expires;
 
-	hrtimer_update_next_timer(cpu_base, NULL);
-	for (; active; base++, active >>= 1) {
+	while (active) {
+		unsigned int id = __ffs(active);
+		struct hrtimer_clock_base *base;
 		struct timerqueue_node *next;
 		struct hrtimer *timer;
 
-		if (!(active & 0x01))
-			continue;
+		active &= ~(1U << id);
+		base = cpu_base->clock_base + id;
 
 		next = timerqueue_getnext(&base->active);
 		timer = container_of(next, struct hrtimer, node);
@@ -493,7 +530,31 @@ static ktime_t __hrtimer_get_next_event(
 		expires_next = 0;
 	return expires_next;
 }
-#endif
+
+static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
+{
+	unsigned int active;
+	ktime_t expires_next = KTIME_MAX;
+
+	hrtimer_update_next_timer(cpu_base, NULL);
+
+	if (!cpu_base->softirq_activated) {
+		active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
+		expires_next = __hrtimer_next_event_base(cpu_base, active,
+							 expires_next);
+		cpu_base->softirq_expires_next = expires_next;
+	}
+
+	active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+	expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+
+	/*
+	 * cpu_base->expires_next is not updated here. It is set only
+	 * in hrtimer_reprogramming path!
+	 */
+
+	return expires_next;
+}
 
 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 {
@@ -505,6 +566,19 @@ static inline ktime_t hrtimer_update_bas
 					    offs_real, offs_boot, offs_tai);
 }
 
+/*
+ * Is the high resolution mode active ?
+ */
+static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
+{
+	return cpu_base->hres_active;
+}
+
+static inline int hrtimer_hres_active(void)
+{
+	return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
+}
+
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -534,19 +608,6 @@ static inline int hrtimer_is_hres_enable
 }
 
 /*
- * Is the high resolution mode active ?
- */
-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
-{
-	return cpu_base->hres_active;
-}
-
-static inline int hrtimer_hres_active(void)
-{
-	return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
-}
-
-/*
  * Reprogram the event source with checking both queues for the
  * next event
  * Called with interrupts disabled and base->lock held
@@ -587,79 +648,6 @@ hrtimer_force_reprogram(struct hrtimer_c
 }
 
 /*
- * When a timer is enqueued and expires earlier than the already enqueued
- * timers, we have to check, whether it expires earlier than the timer for
- * which the clock event device was armed.
- *
- * Called with interrupts disabled and base->cpu_base.lock held
- */
-static void hrtimer_reprogram(struct hrtimer *timer,
-			      struct hrtimer_clock_base *base)
-{
-	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
-	ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-
-	WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
-
-	/*
-	 * If the timer is not on the current cpu, we cannot reprogram
-	 * the other cpus clock event device.
-	 */
-	if (base->cpu_base != cpu_base)
-		return;
-
-	/*
-	 * If the hrtimer interrupt is running, then it will
-	 * reevaluate the clock bases and reprogram the clock event
-	 * device. The callbacks are always executed in hard interrupt
-	 * context so we don't need an extra check for a running
-	 * callback.
-	 */
-	if (cpu_base->in_hrtirq)
-		return;
-
-	/*
-	 * CLOCK_REALTIME timer might be requested with an absolute
-	 * expiry time which is less than base->offset. Set it to 0.
-	 */
-	if (expires < 0)
-		expires = 0;
-
-	if (expires >= cpu_base->expires_next)
-		return;
-
-	/* Update the pointer to the next expiring timer */
-	cpu_base->next_timer = timer;
-
-	/*
-	 * If a hang was detected in the last timer interrupt then we
-	 * do not schedule a timer which is earlier than the expiry
-	 * which we enforced in the hang detection. We want the system
-	 * to make progress.
-	 */
-	if (cpu_base->hang_detected)
-		return;
-
-	/*
-	 * Program the timer hardware. We enforce the expiry for
-	 * events which are already in the past.
-	 */
-	cpu_base->expires_next = expires;
-	tick_program_event(expires, 1);
-}
-
-/*
- * Initialize the high resolution related parts of cpu_base
- */
-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
-{
-	base->expires_next = KTIME_MAX;
-	base->hang_detected = 0;
-	base->hres_active = 0;
-	base->next_timer = NULL;
-}
-
-/*
  * Retrigger next event is called after clock was set
  *
  * Called with interrupts disabled via on_each_cpu()
@@ -739,20 +727,80 @@ void clock_was_set_delayed(void)
 
 #else
 
-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; }
-static inline int hrtimer_hres_active(void) { return 0; }
 static inline int hrtimer_is_hres_enabled(void) { return 0; }
 static inline void hrtimer_switch_to_hres(void) { }
 static inline void
 hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
-static inline void hrtimer_reprogram(struct hrtimer *timer,
-				     struct hrtimer_clock_base *base) { }
-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 static inline void retrigger_next_event(void *arg) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
 /*
+ * When a timer is enqueued and expires earlier than the already enqueued
+ * timers, we have to check, whether it expires earlier than the timer for
+ * which the clock event device was armed.
+ *
+ * Called with interrupts disabled and base->cpu_base.lock held
+ */
+static void hrtimer_reprogram(struct hrtimer *timer)
+{
+	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	struct hrtimer_clock_base *base = timer->base;
+	ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+
+	WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
+
+	/*
+	 * If the timer is not on the current cpu, we cannot reprogram
+	 * the other cpus clock event device.
+	 */
+	if (base->cpu_base != cpu_base)
+		return;
+
+	/*
+	 * If the hrtimer interrupt is running, then it will
+	 * reevaluate the clock bases and reprogram the clock event
+	 * device. The callbacks are always executed in hard interrupt
+	 * context so we don't need an extra check for a running
+	 * callback.
+	 */
+	if (cpu_base->in_hrtirq)
+		return;
+
+	/*
+	 * CLOCK_REALTIME timer might be requested with an absolute
+	 * expiry time which is less than base->offset. Set it to 0.
+	 */
+	if (expires < 0)
+		expires = 0;
+
+	if (expires >= cpu_base->expires_next)
+		return;
+
+	/* Update the pointer to the next expiring timer */
+	hrtimer_update_next_timer(cpu_base, timer);
+	cpu_base->expires_next = expires;
+
+	/*
+	 * If hres is not active, hardware does not have to be
+	 * programmed yet.
+	 *
+	 * If a hang was detected in the last timer interrupt then we
+	 * do not schedule a timer which is earlier than the expiry
+	 * which we enforced in the hang detection. We want the system
+	 * to make progress.
+	 */
+	if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
+		return;
+
+	/*
+	 * Program the timer hardware. We enforce the expiry for
+	 * events which are already in the past.
+	 */
+	tick_program_event(expires, 1);
+}
+
+/*
  * Clock realtime was set
  *
  * Change the offset of the realtime clock vs. the monotonic
@@ -865,7 +913,8 @@ void hrtimer_wait_for_timer(const struct
 {
 	struct hrtimer_clock_base *base = timer->base;
 
-	if (base && base->cpu_base && !timer->irqsafe)
+	if (base && base->cpu_base &&
+	    base->index >= HRTIMER_BASE_MONOTONIC_SOFT)
 		wait_event(base->cpu_base->wait,
 				!(hrtimer_callback_running(timer)));
 }
@@ -917,11 +966,6 @@ static void __remove_hrtimer(struct hrti
 	if (!(state & HRTIMER_STATE_ENQUEUED))
 		return;
 
-	if (unlikely(!list_empty(&timer->cb_entry))) {
-		list_del_init(&timer->cb_entry);
-		return;
-	}
-
 	if (!timerqueue_del(&base->active, &timer->node))
 		cpu_base->active_bases &= ~(1 << base->index);
 
@@ -986,22 +1030,54 @@ static inline ktime_t hrtimer_update_low
 	return tim;
 }
 
-/**
- * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
- * @timer:	the timer to be added
- * @tim:	expiry time
- * @delta_ns:	"slack" range for the timer
- * @mode:	expiry mode: absolute (HRTIMER_MODE_ABS) or
- *		relative (HRTIMER_MODE_REL)
- */
-void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
-			    u64 delta_ns, const enum hrtimer_mode mode)
+static void hrtimer_reprogram_softirq(struct hrtimer *timer)
 {
-	struct hrtimer_clock_base *base, *new_base;
-	unsigned long flags;
-	int leftmost;
+	struct hrtimer_clock_base *base = timer->base;
+	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+	ktime_t expires;
 
-	base = lock_hrtimer_base(timer, &flags);
+	/*
+	 * The softirq timer is not rearmed, when the softirq was raised
+	 * and has not yet run to completion.
+	 */
+	if (cpu_base->softirq_activated)
+		return;
+
+	expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+
+	if (!ktime_before(expires, cpu_base->softirq_expires_next))
+		return;
+
+	cpu_base->softirq_expires_next = expires;
+
+	if (!ktime_before(expires, cpu_base->expires_next))
+		return;
+	hrtimer_reprogram(timer);
+}
+
+static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base,
+					 bool reprogram)
+{
+	ktime_t expires;
+
+	expires = __hrtimer_get_next_event(cpu_base);
+
+	if (!reprogram || !ktime_before(expires, cpu_base->expires_next))
+		return;
+	/*
+	 * next_timer can be used here, because
+	 * hrtimer_get_next_event() updated the next
+	 * timer. expires_next is only set when reprogramming function
+	 * is called.
+	 */
+	hrtimer_reprogram(cpu_base->next_timer);
+}
+
+static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+				    u64 delta_ns, const enum hrtimer_mode mode,
+				    struct hrtimer_clock_base *base)
+{
+	struct hrtimer_clock_base *new_base;
 
 	/* Remove an active timer from the queue: */
 	remove_hrtimer(timer, base, true);
@@ -1016,21 +1092,31 @@ void hrtimer_start_range_ns(struct hrtim
 	/* Switch the timer base, if necessary: */
 	new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
 
-	leftmost = enqueue_hrtimer(timer, new_base);
-	if (!leftmost)
-		goto unlock;
+	return enqueue_hrtimer(timer, new_base);
+}
 
-	if (!hrtimer_is_hres_active(timer)) {
-		/*
-		 * Kick to reschedule the next tick to handle the new timer
-		 * on dynticks target.
-		 */
-		if (new_base->cpu_base->nohz_active)
-			wake_up_nohz_cpu(new_base->cpu_base->cpu);
-	} else {
-		hrtimer_reprogram(timer, new_base);
+/**
+ * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
+ * @timer:	the timer to be added
+ * @tim:	expiry time
+ * @delta_ns:	"slack" range for the timer
+ * @mode:	expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *		relative (HRTIMER_MODE_REL)
+ */
+void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+			    u64 delta_ns, const enum hrtimer_mode mode)
+{
+	struct hrtimer_clock_base *base;
+	unsigned long flags;
+
+	base = lock_hrtimer_base(timer, &flags);
+
+	if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) {
+		if (timer->base->index < HRTIMER_BASE_MONOTONIC_SOFT)
+			hrtimer_reprogram(timer);
+		else
+			hrtimer_reprogram_softirq(timer);
 	}
-unlock:
 	unlock_hrtimer_base(timer, &flags);
 }
 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
@@ -1138,14 +1224,18 @@ u64 hrtimer_get_next_event(void)
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
 {
-	if (likely(clock_id < MAX_CLOCKS)) {
+	if (likely(clock_id < MAX_CLOCKS_HRT)) {
 		int base = hrtimer_clock_to_base_table[clock_id];
 
 		if (likely(base != HRTIMER_MAX_CLOCK_BASES))
 			return base;
 	}
 	WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
+#ifdef CONFIG_PREEMPT_RT_FULL
+	return HRTIMER_BASE_MONOTONIC_SOFT;
+#else
 	return HRTIMER_BASE_MONOTONIC;
+#endif
 }
 
 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
@@ -1163,12 +1253,15 @@ static void __hrtimer_init(struct hrtime
 	 * clock modifications, so they needs to become CLOCK_MONOTONIC to
 	 * ensure POSIX compliance.
 	 */
-	if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
-		clock_id = CLOCK_MONOTONIC;
+	if (mode & HRTIMER_MODE_ABS) {
+		if (clock_id == CLOCK_REALTIME)
+			clock_id = CLOCK_MONOTONIC;
+		else if (clock_id == CLOCK_REALTIME_SOFT)
+			clock_id = CLOCK_MONOTONIC_SOFT;
+	}
 
 	base = hrtimer_clockid_to_base(clock_id);
 	timer->base = &cpu_base->clock_base[base];
-	INIT_LIST_HEAD(&timer->cb_entry);
 	timerqueue_init(&timer->node);
 }
 
@@ -1195,20 +1288,19 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
  */
 bool hrtimer_active(const struct hrtimer *timer)
 {
-	struct hrtimer_cpu_base *cpu_base;
+	struct hrtimer_clock_base *base;
 	unsigned int seq;
 
 	do {
-		cpu_base = READ_ONCE(timer->base->cpu_base);
-		seq = raw_read_seqcount_begin(&cpu_base->seq);
+		base = READ_ONCE(timer->base);
+		seq = raw_read_seqcount_begin(&base->seq);
 
 		if (timer->state != HRTIMER_STATE_INACTIVE ||
-		    cpu_base->running_soft == timer ||
-		    cpu_base->running == timer)
+		    base->running == timer)
 			return true;
 
-	} while (read_seqcount_retry(&cpu_base->seq, seq) ||
-		 cpu_base != READ_ONCE(timer->base->cpu_base));
+	} while (read_seqcount_retry(&base->seq, seq) ||
+		 base != READ_ONCE(timer->base));
 
 	return false;
 }
@@ -1234,7 +1326,8 @@ EXPORT_SYMBOL_GPL(hrtimer_active);
 
 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
 			  struct hrtimer_clock_base *base,
-			  struct hrtimer *timer, ktime_t *now)
+			  struct hrtimer *timer, ktime_t *now,
+			  bool hardirq)
 {
 	enum hrtimer_restart (*fn)(struct hrtimer *);
 	int restart;
@@ -1242,16 +1335,16 @@ static void __run_hrtimer(struct hrtimer
 	lockdep_assert_held(&cpu_base->lock);
 
 	debug_deactivate(timer);
-	cpu_base->running = timer;
+	base->running = timer;
 
 	/*
 	 * Separate the ->running assignment from the ->state assignment.
 	 *
 	 * As with a regular write barrier, this ensures the read side in
-	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
+	 * hrtimer_active() cannot observe base->running == NULL &&
 	 * timer->state == INACTIVE.
 	 */
-	raw_write_seqcount_barrier(&cpu_base->seq);
+	raw_write_seqcount_barrier(&base->seq);
 
 	__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
 	fn = timer->function;
@@ -1265,15 +1358,23 @@ static void __run_hrtimer(struct hrtimer
 		timer->is_rel = false;
 
 	/*
-	 * Because we run timers from hardirq context, there is no chance
-	 * they get migrated to another cpu, therefore its safe to unlock
-	 * the timer base.
+	 * The timer is marked as running in the cpu base, so it is
+	 * protected against migration to a different CPU even if the lock
+	 * is dropped.
 	 */
-	raw_spin_unlock(&cpu_base->lock);
+	if (hardirq)
+		raw_spin_unlock(&cpu_base->lock);
+	else
+		raw_spin_unlock_irq(&cpu_base->lock);
+
 	trace_hrtimer_expire_entry(timer, now);
 	restart = fn(timer);
 	trace_hrtimer_expire_exit(timer);
-	raw_spin_lock(&cpu_base->lock);
+
+	if (hardirq)
+		raw_spin_lock(&cpu_base->lock);
+	else
+		raw_spin_lock_irq(&cpu_base->lock);
 
 	/*
 	 * Note: We clear the running state after enqueue_hrtimer and
@@ -1292,125 +1393,28 @@ static void __run_hrtimer(struct hrtimer
 	 * Separate the ->running assignment from the ->state assignment.
 	 *
 	 * As with a regular write barrier, this ensures the read side in
-	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
+	 * hrtimer_active() cannot observe base->running.timer == NULL &&
 	 * timer->state == INACTIVE.
 	 */
-	raw_write_seqcount_barrier(&cpu_base->seq);
-
-	WARN_ON_ONCE(cpu_base->running != timer);
-	cpu_base->running = NULL;
-}
-
-#ifdef CONFIG_PREEMPT_RT_BASE
-static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
-				 struct hrtimer_clock_base *base)
-{
-	int leftmost;
-
-	if (restart != HRTIMER_NORESTART &&
-	    !(timer->state & HRTIMER_STATE_ENQUEUED)) {
-
-		leftmost = enqueue_hrtimer(timer, base);
-		if (!leftmost)
-			return;
-#ifdef CONFIG_HIGH_RES_TIMERS
-		if (!hrtimer_is_hres_active(timer)) {
-			/*
-			 * Kick to reschedule the next tick to handle the new timer
-			 * on dynticks target.
-			 */
-			if (base->cpu_base->nohz_active)
-				wake_up_nohz_cpu(base->cpu_base->cpu);
-		} else {
-
-			hrtimer_reprogram(timer, base);
-		}
-#endif
-	}
-}
-
-/*
- * The changes in mainline which removed the callback modes from
- * hrtimer are not yet working with -rt. The non wakeup_process()
- * based callbacks which involve sleeping locks need to be treated
- * seperately.
- */
-static void hrtimer_rt_run_pending(void)
-{
-	enum hrtimer_restart (*fn)(struct hrtimer *);
-	struct hrtimer_cpu_base *cpu_base;
-	struct hrtimer_clock_base *base;
-	struct hrtimer *timer;
-	int index, restart;
-
-	local_irq_disable();
-	cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
-
-	raw_spin_lock(&cpu_base->lock);
-
-	for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
-		base = &cpu_base->clock_base[index];
-
-		while (!list_empty(&base->expired)) {
-			timer = list_first_entry(&base->expired,
-						 struct hrtimer, cb_entry);
+	raw_write_seqcount_barrier(&base->seq);
 
-			/*
-			 * Same as the above __run_hrtimer function
-			 * just we run with interrupts enabled.
-			 */
-			debug_deactivate(timer);
-			cpu_base->running_soft = timer;
-			raw_write_seqcount_barrier(&cpu_base->seq);
-
-			__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
-			fn = timer->function;
-
-			raw_spin_unlock_irq(&cpu_base->lock);
-			restart = fn(timer);
-			raw_spin_lock_irq(&cpu_base->lock);
-
-			hrtimer_rt_reprogram(restart, timer, base);
-			raw_write_seqcount_barrier(&cpu_base->seq);
-
-			WARN_ON_ONCE(cpu_base->running_soft != timer);
-			cpu_base->running_soft = NULL;
-		}
-	}
-
-	raw_spin_unlock_irq(&cpu_base->lock);
-
-	wake_up_timer_waiters(cpu_base);
+	WARN_ON_ONCE(base->running != timer);
+	base->running = NULL;
 }
 
-static int hrtimer_rt_defer(struct hrtimer *timer)
+static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
+				 unsigned int active_mask)
 {
-	if (timer->irqsafe)
-		return 0;
-
-	__remove_hrtimer(timer, timer->base, timer->state, 0);
-	list_add_tail(&timer->cb_entry, &timer->base->expired);
-	return 1;
-}
-
-#else
-
-static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
+	unsigned int active = cpu_base->active_bases & active_mask;
 
-#endif
-
-static int __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
-{
-	struct hrtimer_clock_base *base = cpu_base->clock_base;
-	unsigned int active = cpu_base->active_bases;
-	int raise = 0;
-
-	for (; active; base++, active >>= 1) {
+	while (active) {
+		unsigned int id = __ffs(active);
+		struct hrtimer_clock_base *base;
 		struct timerqueue_node *node;
 		ktime_t basenow;
 
-		if (!(active & 0x01))
-			continue;
+		active &= ~(1U << id);
+		base = cpu_base->clock_base + id;
 
 		basenow = ktime_add(now, base->offset);
 
@@ -1434,13 +1438,27 @@ static int __hrtimer_run_queues(struct h
 			if (basenow < hrtimer_get_softexpires_tv64(timer))
 				break;
 
-			if (!hrtimer_rt_defer(timer))
-				__run_hrtimer(cpu_base, base, timer, &basenow);
-			else
-				raise = 1;
+			__run_hrtimer(cpu_base, base, timer, &basenow,
+				      active_mask == HRTIMER_ACTIVE_HARD);
 		}
 	}
-	return raise;
+}
+
+static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
+{
+	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	ktime_t now;
+
+	raw_spin_lock_irq(&cpu_base->lock);
+
+	now = hrtimer_update_base(cpu_base);
+	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_SOFT);
+
+	cpu_base->softirq_activated = 0;
+	hrtimer_update_softirq_timer(cpu_base, true);
+
+	raw_spin_unlock_irq(&cpu_base->lock);
+	wake_up_timer_waiters(cpu_base);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1454,7 +1472,6 @@ void hrtimer_interrupt(struct clock_even
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
 	ktime_t expires_next, now, entry_time, delta;
 	int retries = 0;
-	int raise;
 
 	BUG_ON(!cpu_base->hres_active);
 	cpu_base->nr_events++;
@@ -1473,9 +1490,15 @@ retry:
 	 */
 	cpu_base->expires_next = KTIME_MAX;
 
-	raise = __hrtimer_run_queues(cpu_base, now);
+	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+		cpu_base->softirq_expires_next = KTIME_MAX;
+		cpu_base->softirq_activated = 1;
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+	}
+
+	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD);
 
-	/* Reevaluate the clock bases for the next expiry */
+	/* Reevaluate the hard interrupt clock bases for the next expiry */
 	expires_next = __hrtimer_get_next_event(cpu_base);
 	/*
 	 * Store the new expiry value so the migration code can verify
@@ -1484,8 +1507,6 @@ retry:
 	cpu_base->expires_next = expires_next;
 	cpu_base->in_hrtirq = 0;
 	raw_spin_unlock(&cpu_base->lock);
-	if (raise)
-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 
 	/* Reprogramming necessary ? */
 	if (!tick_program_event(expires_next, 0)) {
@@ -1562,7 +1583,6 @@ void hrtimer_run_queues(void)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
 	ktime_t now;
-	int raise;
 
 	if (__hrtimer_hres_active(cpu_base))
 		return;
@@ -1581,10 +1601,15 @@ void hrtimer_run_queues(void)
 
 	raw_spin_lock(&cpu_base->lock);
 	now = hrtimer_update_base(cpu_base);
-	raise = __hrtimer_run_queues(cpu_base, now);
-	raw_spin_unlock(&cpu_base->lock);
-	if (raise)
+
+	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+		cpu_base->softirq_expires_next = KTIME_MAX;
+		cpu_base->softirq_activated = 1;
 		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+	}
+
+	__hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD);
+	raw_spin_unlock(&cpu_base->lock);
 }
 
 /*
@@ -1603,19 +1628,51 @@ static enum hrtimer_restart hrtimer_wake
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id,
+				   enum hrtimer_mode mode,
+				   struct task_struct *task)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	if (!(clock_id & HRTIMER_BASE_SOFT_MASK))
+		clock_id |= HRTIMER_BASE_HARD_MASK;
+#endif
+	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
-	sl->timer.irqsafe = 1;
 	sl->task = task;
 }
+
+/**
+ * hrtimer_init - initialize a timer to the given clock
+ * @timer:	the timer to be initialized
+ * @clock_id:	the clock to be used
+ * @mode:	timer mode abs/rel
+ */
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+			  enum hrtimer_mode mode, struct task_struct *task)
+{
+	debug_init(&sl->timer, clock_id, mode);
+	__hrtimer_init_sleeper(sl, clock_id, mode, task);
+
+}
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id,
+				   enum hrtimer_mode mode,
+				   struct task_struct *task)
+{
+	debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
+	__hrtimer_init_sleeper(sl, clock_id, mode, task);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
+#endif
+
+
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
 				unsigned long state)
 {
-	hrtimer_init_sleeper(t, current);
-
 	do {
 		set_current_state(state);
 		hrtimer_start_expires(&t->timer, mode);
@@ -1655,8 +1712,8 @@ long __sched hrtimer_nanosleep_restart(s
 	struct timespec __user  *rmtp;
 	int ret = 0;
 
-	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
-				HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
+				      HRTIMER_MODE_ABS, current);
 	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
 
 	/* cpu_chill() does not care about restart state. */
@@ -1691,8 +1748,9 @@ __hrtimer_nanosleep(struct timespec64 *r
 	if (dl_task(current) || rt_task(current))
 		slack = 0;
 
-	hrtimer_init_on_stack(&t.timer, clockid, mode);
+	hrtimer_init_sleeper_on_stack(&t, clockid, mode, current);
 	hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
+
 	if (do_nanosleep(&t, mode, state))
 		goto out;
 
@@ -1754,7 +1812,7 @@ void cpu_chill(void)
 	unsigned int freeze_flag = current->flags & PF_NOFREEZE;
 
 	current->flags |= PF_NOFREEZE;
-	__hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
+	__hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC_HARD,
 			    TASK_UNINTERRUPTIBLE);
 	if (!freeze_flag)
 		current->flags &= ~PF_NOFREEZE;
@@ -1773,12 +1831,15 @@ int hrtimers_prepare_cpu(unsigned int cp
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		cpu_base->clock_base[i].cpu_base = cpu_base;
 		timerqueue_init_head(&cpu_base->clock_base[i].active);
-		INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
 	}
 
 	cpu_base->active_bases = 0;
 	cpu_base->cpu = cpu;
-	hrtimer_init_hres(cpu_base);
+	cpu_base->hres_active = 0;
+	cpu_base->expires_next = KTIME_MAX;
+	cpu_base->softirq_expires_next = KTIME_MAX;
+	cpu_base->hang_detected = 0;
+	cpu_base->next_timer = NULL;
 #ifdef CONFIG_PREEMPT_RT_BASE
 	init_waitqueue_head(&cpu_base->wait);
 #endif
@@ -1787,7 +1848,7 @@ int hrtimers_prepare_cpu(unsigned int cp
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 				struct hrtimer_clock_base *new_base)
 {
 	struct hrtimer *timer;
@@ -1815,25 +1876,17 @@ static int migrate_hrtimer_list(struct h
 		 */
 		enqueue_hrtimer(timer, new_base);
 	}
-#ifdef CONFIG_PREEMPT_RT_BASE
-	list_splice_tail(&old_base->expired, &new_base->expired);
-	/*
-	 * Tell the caller to raise HRTIMER_SOFTIRQ.  We can't safely
-	 * acquire ktimersoftd->pi_lock while the base lock is held.
-	 */
-	return !list_empty(&new_base->expired);
-#endif
-	return 0;
 }
 
 int hrtimers_dead_cpu(unsigned int scpu)
 {
 	struct hrtimer_cpu_base *old_base, *new_base;
-	int i, raise = 0;
+	int i;
 
 	BUG_ON(cpu_online(scpu));
 	tick_cancel_sched_timer(scpu);
 
+	local_bh_disable();
 	local_irq_disable();
 	old_base = &per_cpu(hrtimer_bases, scpu);
 	new_base = this_cpu_ptr(&hrtimer_bases);
@@ -1845,56 +1898,50 @@ int hrtimers_dead_cpu(unsigned int scpu)
 	raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		raise |= migrate_hrtimer_list(&old_base->clock_base[i],
-					      &new_base->clock_base[i]);
+		migrate_hrtimer_list(&old_base->clock_base[i],
+				     &new_base->clock_base[i]);
 	}
 
+	/*
+	 * The migration might have changed the first expiring softirq
+	 * timer on this CPU. Update it.
+	 */
+	hrtimer_update_softirq_timer(new_base, false);
+
 	raw_spin_unlock(&old_base->lock);
 	raw_spin_unlock(&new_base->lock);
 
-	if (raise)
-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
-
 	/* Check, if we got expired work to do */
 	__hrtimer_peek_ahead_timers();
 	local_irq_enable();
+	local_bh_enable();
 	return 0;
 }
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
-#ifdef CONFIG_PREEMPT_RT_BASE
-
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
-	hrtimer_rt_run_pending();
-}
-
-static void hrtimers_open_softirq(void)
-{
-	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
-}
-
-#else
-static void hrtimers_open_softirq(void) { }
-#endif
-
 void __init hrtimers_init(void)
 {
+	/*
+	 * It is necessary, that the soft base mask is a single
+	 * bit.
+	 */
+	BUILD_BUG_ON_NOT_POWER_OF_2(HRTIMER_BASE_SOFT_MASK);
+
 	hrtimers_prepare_cpu(smp_processor_id());
-	hrtimers_open_softirq();
+	open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
 }
 
 /**
  * schedule_hrtimeout_range_clock - sleep until timeout
  * @expires:	timeout value (ktime_t)
  * @delta:	slack in expires timeout (ktime_t)
- * @mode:	timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- * @clock:	timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
+ * @mode:	timer mode
+ * @clock_id:	timer clock to be used
  */
 int __sched
 schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
-			       const enum hrtimer_mode mode, int clock)
+			       const enum hrtimer_mode mode, clockid_t clock_id)
 {
 	struct hrtimer_sleeper t;
 
@@ -1915,10 +1962,9 @@ schedule_hrtimeout_range_clock(ktime_t *
 		return -EINTR;
 	}
 
-	hrtimer_init_on_stack(&t.timer, clock, mode);
-	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
+	hrtimer_init_sleeper_on_stack(&t, clock_id, mode, current);
 
-	hrtimer_init_sleeper(&t, current);
+	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 
 	hrtimer_start_expires(&t.timer, mode);
 
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -106,8 +106,7 @@ static enum hrtimer_restart bc_handler(s
 
 void tick_setup_hrtimer_broadcast(void)
 {
-	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&bctimer, CLOCK_MONOTONIC_HARD, HRTIMER_MODE_ABS);
 	bctimer.function = bc_handler;
-	bctimer.irqsafe = true;
 	clockevents_register_device(&ce_broadcast_hrtimer);
 }
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1217,8 +1217,7 @@ void tick_setup_sched_timer(void)
 	/*
 	 * Emulate tick processing via per-CPU hrtimers:
 	 */
-	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	ts->sched_timer.irqsafe = 1;
+	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC_HARD, HRTIMER_MODE_ABS);
 	ts->sched_timer.function = tick_sched_timer;
 
 	/* Get the next period (per-CPU) */
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -383,9 +383,8 @@ static void watchdog_enable(unsigned int
 	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
 
 	/* kick off the timer for the hardlockup detector */
-	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC_HARD, HRTIMER_MODE_REL);
 	hrtimer->function = watchdog_timer_fn;
-	hrtimer->irqsafe = 1;
 
 	/* Enable the perf event */
 	watchdog_nmi_enable(cpu);
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -105,7 +105,6 @@ struct bcm_op {
 	unsigned long frames_abs, frames_filtered;
 	struct bcm_timeval ival1, ival2;
 	struct hrtimer timer, thrtimer;
-	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
 	int rx_ifindex;
 	int cfsiz;
@@ -383,25 +382,34 @@ static void bcm_send_to_user(struct bcm_
 	}
 }
 
-static void bcm_tx_start_timer(struct bcm_op *op)
+static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt)
 {
+	ktime_t ival;
+
 	if (op->kt_ival1 && op->count)
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival1),
-			      HRTIMER_MODE_ABS);
+		ival = op->kt_ival1;
 	else if (op->kt_ival2)
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival2),
-			      HRTIMER_MODE_ABS);
+		ival = op->kt_ival2;
+	else
+		return false;
+
+	hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival));
+	return true;
 }
 
-static void bcm_tx_timeout_tsklet(unsigned long data)
+static void bcm_tx_start_timer(struct bcm_op *op)
 {
-	struct bcm_op *op = (struct bcm_op *)data;
+	if (bcm_tx_set_expiry(op, &op->timer))
+		hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS);
+}
+
+/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */
+static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
+{
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
 	struct bcm_msg_head msg_head;
 
 	if (op->kt_ival1 && (op->count > 0)) {
-
 		op->count--;
 		if (!op->count && (op->flags & TX_COUNTEVT)) {
 
@@ -418,22 +426,12 @@ static void bcm_tx_timeout_tsklet(unsign
 		}
 		bcm_can_tx(op);
 
-	} else if (op->kt_ival2)
+	} else if (op->kt_ival2) {
 		bcm_can_tx(op);
+	}
 
-	bcm_tx_start_timer(op);
-}
-
-/*
- * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions
- */
-static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
-{
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
-	tasklet_schedule(&op->tsklet);
-
-	return HRTIMER_NORESTART;
+	return bcm_tx_set_expiry(op, &op->timer) ?
+		HRTIMER_RESTART : HRTIMER_NORESTART;
 }
 
 /*
@@ -561,11 +559,18 @@ static void bcm_rx_starttimer(struct bcm
 		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
 }
 
-static void bcm_rx_timeout_tsklet(unsigned long data)
+/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */
+static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 {
-	struct bcm_op *op = (struct bcm_op *)data;
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
 	struct bcm_msg_head msg_head;
 
+	/* if user wants to be informed, when cyclic CAN-Messages come back */
+	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
+		/* clear received CAN frames to indicate 'nothing received' */
+		memset(op->last_frames, 0, op->nframes * op->cfsiz);
+	}
+
 	/* create notification to user */
 	msg_head.opcode  = RX_TIMEOUT;
 	msg_head.flags   = op->flags;
@@ -576,25 +581,6 @@ static void bcm_rx_timeout_tsklet(unsign
 	msg_head.nframes = 0;
 
 	bcm_send_to_user(op, &msg_head, NULL, 0);
-}
-
-/*
- * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out
- */
-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
-{
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
-	/* schedule before NET_RX_SOFTIRQ */
-	tasklet_hi_schedule(&op->tsklet);
-
-	/* no restart of the timer is done here! */
-
-	/* if user wants to be informed, when cyclic CAN-Messages come back */
-	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
-		/* clear received CAN frames to indicate 'nothing received' */
-		memset(op->last_frames, 0, op->nframes * op->cfsiz);
-	}
 
 	return HRTIMER_NORESTART;
 }
@@ -602,14 +588,12 @@ static enum hrtimer_restart bcm_rx_timeo
 /*
  * bcm_rx_do_flush - helper for bcm_rx_thr_flush
  */
-static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
-				  unsigned int index)
+static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index)
 {
 	struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
 
 	if ((op->last_frames) && (lcf->flags & RX_THR)) {
-		if (update)
-			bcm_rx_changed(op, lcf);
+		bcm_rx_changed(op, lcf);
 		return 1;
 	}
 	return 0;
@@ -617,11 +601,8 @@ static inline int bcm_rx_do_flush(struct
 
 /*
  * bcm_rx_thr_flush - Check for throttled data and send it to the userspace
- *
- * update == 0 : just check if throttled data is available  (any irq context)
- * update == 1 : check and send throttled data to userspace (soft_irq context)
  */
-static int bcm_rx_thr_flush(struct bcm_op *op, int update)
+static int bcm_rx_thr_flush(struct bcm_op *op)
 {
 	int updated = 0;
 
@@ -630,24 +611,16 @@ static int bcm_rx_thr_flush(struct bcm_o
 
 		/* for MUX filter we start at index 1 */
 		for (i = 1; i < op->nframes; i++)
-			updated += bcm_rx_do_flush(op, update, i);
+			updated += bcm_rx_do_flush(op, i);
 
 	} else {
 		/* for RX_FILTER_ID and simple filter */
-		updated += bcm_rx_do_flush(op, update, 0);
+		updated += bcm_rx_do_flush(op, 0);
 	}
 
 	return updated;
 }
 
-static void bcm_rx_thr_tsklet(unsigned long data)
-{
-	struct bcm_op *op = (struct bcm_op *)data;
-
-	/* push the changed data to the userspace */
-	bcm_rx_thr_flush(op, 1);
-}
-
 /*
  * bcm_rx_thr_handler - the time for blocked content updates is over now:
  *                      Check for throttled data and send it to the userspace
@@ -656,9 +629,7 @@ static enum hrtimer_restart bcm_rx_thr_h
 {
 	struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
 
-	tasklet_schedule(&op->thrtsklet);
-
-	if (bcm_rx_thr_flush(op, 0)) {
+	if (bcm_rx_thr_flush(op)) {
 		hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
 		return HRTIMER_RESTART;
 	} else {
@@ -754,23 +725,8 @@ static struct bcm_op *bcm_find_op(struct
 
 static void bcm_remove_op(struct bcm_op *op)
 {
-	if (op->tsklet.func) {
-		while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
-		       test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
-		       hrtimer_active(&op->timer)) {
-			hrtimer_cancel(&op->timer);
-			tasklet_kill(&op->tsklet);
-		}
-	}
-
-	if (op->thrtsklet.func) {
-		while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
-		       test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
-		       hrtimer_active(&op->thrtimer)) {
-			hrtimer_cancel(&op->thrtimer);
-			tasklet_kill(&op->thrtsklet);
-		}
-	}
+	hrtimer_cancel(&op->timer);
+	hrtimer_cancel(&op->thrtimer);
 
 	if ((op->frames) && (op->frames != &op->sframe))
 		kfree(op->frames);
@@ -1002,15 +958,13 @@ static int bcm_tx_setup(struct bcm_msg_h
 		op->ifindex = ifindex;
 
 		/* initialize uninitialized (kzalloc) structure */
-		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->timer, CLOCK_MONOTONIC_SOFT,
+			     HRTIMER_MODE_REL);
 		op->timer.function = bcm_tx_timeout_handler;
 
-		/* initialize tasklet for tx countevent notification */
-		tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet,
-			     (unsigned long) op);
-
 		/* currently unused in tx_ops */
-		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC_SOFT,
+			     HRTIMER_MODE_REL);
 
 		/* add this bcm_op to the list of the tx_ops */
 		list_add(&op->list, &bo->tx_ops);
@@ -1177,20 +1131,14 @@ static int bcm_rx_setup(struct bcm_msg_h
 		op->rx_ifindex = ifindex;
 
 		/* initialize uninitialized (kzalloc) structure */
-		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->timer, CLOCK_MONOTONIC_SOFT,
+			     HRTIMER_MODE_REL);
 		op->timer.function = bcm_rx_timeout_handler;
 
-		/* initialize tasklet for rx timeout notification */
-		tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet,
-			     (unsigned long) op);
-
-		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC_SOFT,
+			     HRTIMER_MODE_REL);
 		op->thrtimer.function = bcm_rx_thr_handler;
 
-		/* initialize tasklet for rx throttle handling */
-		tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet,
-			     (unsigned long) op);
-
 		/* add this bcm_op to the list of the rx_ops */
 		list_add(&op->list, &bo->rx_ops);
 
@@ -1236,7 +1184,7 @@ static int bcm_rx_setup(struct bcm_msg_h
 			 */
 			op->kt_lastmsg = 0;
 			hrtimer_cancel(&op->thrtimer);
-			bcm_rx_thr_flush(op, 1);
+			bcm_rx_thr_flush(op);
 		}
 
 		if ((op->flags & STARTTIMER) && op->kt_ival1)
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2253,7 +2253,8 @@ static void spin(struct pktgen_dev *pkt_
 	s64 remaining;
 	struct hrtimer_sleeper t;
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS,
+				      current);
 	hrtimer_set_expires(&t.timer, spin_until);
 
 	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2268,7 +2269,6 @@ static void spin(struct pktgen_dev *pkt_
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
 		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t, current);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -427,7 +427,7 @@ static void xfrm_put_mode(struct xfrm_mo
 
 static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
-	tasklet_hrtimer_cancel(&x->mtimer);
+	hrtimer_cancel(&x->mtimer);
 	del_timer_sync(&x->rtimer);
 	kfree(x->aead);
 	kfree(x->aalg);
@@ -472,8 +472,8 @@ static void xfrm_state_gc_task(struct wo
 
 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 {
-	struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
-	struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
+	struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
+	enum hrtimer_restart ret = HRTIMER_NORESTART;
 	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
@@ -537,7 +537,8 @@ static enum hrtimer_restart xfrm_timer_h
 		km_state_expired(x, 0, 0);
 resched:
 	if (next != LONG_MAX) {
-		tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
+		hrtimer_forward_now(&x->mtimer, ktime_set(next, 0));
+		ret = HRTIMER_RESTART;
 	}
 
 	goto out;
@@ -554,7 +555,7 @@ expired:
 
 out:
 	spin_unlock(&x->lock);
-	return HRTIMER_NORESTART;
+	return ret;
 }
 
 static void xfrm_replay_timer_handler(unsigned long data);
@@ -573,8 +574,8 @@ struct xfrm_state *xfrm_state_alloc(stru
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
-		tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
-					CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
+		hrtimer_init(&x->mtimer, CLOCK_BOOTTIME_SOFT, HRTIMER_MODE_ABS);
+		x->mtimer.function = xfrm_timer_handler;
 		setup_timer(&x->rtimer, xfrm_replay_timer_handler,
 				(unsigned long)x);
 		x->curlft.add_time = get_seconds();
@@ -1030,7 +1031,9 @@ found:
 				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
-			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+			hrtimer_start(&x->mtimer,
+				      ktime_set(net->xfrm.sysctl_acq_expires, 0),
+				      HRTIMER_MODE_REL);
 			net->xfrm.state_num++;
 			xfrm_hash_grow_check(net, x->bydst.next != NULL);
 			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -1141,7 +1144,7 @@ static void __xfrm_state_insert(struct x
 		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 	}
 
-	tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+	hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
 	if (x->replay_maxage)
 		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 
@@ -1245,7 +1248,9 @@ static struct xfrm_state *__find_acq_cor
 		x->mark.m = m->m;
 		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 		xfrm_state_hold(x);
-		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+		hrtimer_start(&x->mtimer,
+			      ktime_set(net->xfrm.sysctl_acq_expires, 0),
+			      HRTIMER_MODE_REL);
 		list_add(&x->km.all, &net->xfrm.state_all);
 		hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 		h = xfrm_src_hash(net, daddr, saddr, family);
@@ -1537,7 +1542,7 @@ out:
 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
 		x1->km.dying = 0;
 
-		tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+		hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
 		if (x1->curlft.use_time)
 			xfrm_state_check_expire(x1);
 
@@ -1561,7 +1566,7 @@ int xfrm_state_check_expire(struct xfrm_
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
-		tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
+		hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
 		return -EINVAL;
 	}