|
Jiri Wiesner |
018430 |
From db370a8b9f67ae5f17e3d5482493294467784504 Mon Sep 17 00:00:00 2001
|
|
Jiri Wiesner |
018430 |
From: Wander Lairson Costa <wander@redhat.com>
|
|
Jiri Wiesner |
018430 |
Date: Thu, 2 Feb 2023 09:30:20 -0300
|
|
Jiri Wiesner |
018430 |
Subject: [PATCH] rtmutex: Ensure that the top waiter is always woken up
|
|
Jiri Wiesner |
018430 |
Git-commit: db370a8b9f67ae5f17e3d5482493294467784504
|
|
Jiri Wiesner |
018430 |
Patch-mainline: v6.2-rc8
|
|
Jiri Wiesner |
018430 |
References: git-fixes
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
Let L1 and L2 be two spinlocks.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
Let T1 be a task holding L1 and blocked on L2. T1, currently, is the top
|
|
Jiri Wiesner |
018430 |
waiter of L2.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
Let T2 be the task holding L2.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
Let T3 be a task trying to acquire L1.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
The following events will lead to a state in which the wait queue of L2
|
|
Jiri Wiesner |
018430 |
isn't empty, but no task actually holds the lock.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
T1 T2 T3
|
|
Jiri Wiesner |
018430 |
== == ==
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
spin_lock(L1)
|
|
Jiri Wiesner |
018430 |
| raw_spin_lock(L1->wait_lock)
|
|
Jiri Wiesner |
018430 |
| rtlock_slowlock_locked(L1)
|
|
Jiri Wiesner |
018430 |
| | task_blocks_on_rt_mutex(L1, T3)
|
|
Jiri Wiesner |
018430 |
| | | orig_waiter->lock = L1
|
|
Jiri Wiesner |
018430 |
| | | orig_waiter->task = T3
|
|
Jiri Wiesner |
018430 |
| | | raw_spin_unlock(L1->wait_lock)
|
|
Jiri Wiesner |
018430 |
| | | rt_mutex_adjust_prio_chain(T1, L1, L2, orig_waiter, T3)
|
|
Jiri Wiesner |
018430 |
spin_unlock(L2) | | | |
|
|
Jiri Wiesner |
018430 |
| rt_mutex_slowunlock(L2) | | | |
|
|
Jiri Wiesner |
018430 |
| | raw_spin_lock(L2->wait_lock) | | | |
|
|
Jiri Wiesner |
018430 |
| | wakeup(T1) | | | |
|
|
Jiri Wiesner |
018430 |
| | raw_spin_unlock(L2->wait_lock) | | | |
|
|
Jiri Wiesner |
018430 |
| | | | waiter = T1->pi_blocked_on
|
|
Jiri Wiesner |
018430 |
| | | | waiter == rt_mutex_top_waiter(L2)
|
|
Jiri Wiesner |
018430 |
| | | | waiter->task == T1
|
|
Jiri Wiesner |
018430 |
| | | | raw_spin_lock(L2->wait_lock)
|
|
Jiri Wiesner |
018430 |
| | | | dequeue(L2, waiter)
|
|
Jiri Wiesner |
018430 |
| | | | update_prio(waiter, T1)
|
|
Jiri Wiesner |
018430 |
| | | | enqueue(L2, waiter)
|
|
Jiri Wiesner |
018430 |
| | | | waiter != rt_mutex_top_waiter(L2)
|
|
Jiri Wiesner |
018430 |
| | | | L2->owner == NULL
|
|
Jiri Wiesner |
018430 |
| | | | wakeup(T1)
|
|
Jiri Wiesner |
018430 |
| | | | raw_spin_unlock(L2->wait_lock)
|
|
Jiri Wiesner |
018430 |
T1 wakes up
|
|
Jiri Wiesner |
018430 |
T1 != top_waiter(L2)
|
|
Jiri Wiesner |
018430 |
schedule_rtlock()
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
If the deadline of T1 is updated before the call to update_prio(), and the
|
|
Jiri Wiesner |
018430 |
new deadline is greater than the deadline of the second top waiter, then
|
|
Jiri Wiesner |
018430 |
after the requeue, T1 is no longer the top waiter, and the wrong task is
|
|
Jiri Wiesner |
018430 |
woken up which will then go back to sleep because it is not the top waiter.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
This can be reproduced in PREEMPT_RT with stress-ng:
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
while true; do
|
|
Jiri Wiesner |
018430 |
stress-ng --sched deadline --sched-period 1000000000 \
|
|
Jiri Wiesner |
018430 |
--sched-runtime 800000000 --sched-deadline \
|
|
Jiri Wiesner |
018430 |
1000000000 --mmapfork 23 -t 20
|
|
Jiri Wiesner |
018430 |
done
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
A similar issue was pointed out by Thomas versus the cases where the top
|
|
Jiri Wiesner |
018430 |
waiter drops out early due to a signal or timeout, which is a general issue
|
|
Jiri Wiesner |
018430 |
for all regular rtmutex use cases, e.g. futex.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
The problematic code is in rt_mutex_adjust_prio_chain():
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
// Save the top waiter before dequeue/enqueue
|
|
Jiri Wiesner |
018430 |
prerequeue_top_waiter = rt_mutex_top_waiter(lock);
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
rt_mutex_dequeue(lock, waiter);
|
|
Jiri Wiesner |
018430 |
waiter_update_prio(waiter, task);
|
|
Jiri Wiesner |
018430 |
rt_mutex_enqueue(lock, waiter);
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
// Lock has no owner?
|
|
Jiri Wiesner |
018430 |
if (!rt_mutex_owner(lock)) {
|
|
Jiri Wiesner |
018430 |
// Top waiter changed
|
|
Jiri Wiesner |
018430 |
----> if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
|
|
Jiri Wiesner |
018430 |
----> wake_up_state(waiter->task, waiter->wake_state);
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
This only takes the case into account where @waiter is the new top waiter
|
|
Jiri Wiesner |
018430 |
due to the requeue operation.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
But it fails to handle the case where @waiter is not longer the top
|
|
Jiri Wiesner |
018430 |
waiter due to the requeue operation.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
Ensure that the new top waiter is woken up so in all cases so it can take
|
|
Jiri Wiesner |
018430 |
over the ownerless lock.
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
[ tglx: Amend changelog, add Fixes tag ]
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
Fixes: c014ef69b3ac ("locking/rtmutex: Add wake_state to rt_mutex_waiter")
|
|
Jiri Wiesner |
018430 |
Signed-off-by: Wander Lairson Costa <wander@redhat.com>
|
|
Jiri Wiesner |
018430 |
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
Jiri Wiesner |
018430 |
Cc: stable@vger.kernel.org
|
|
Jiri Wiesner |
018430 |
Link: https://lore.kernel.org/r/20230117172649.52465-1-wander@redhat.com
|
|
Jiri Wiesner |
018430 |
Link: https://lore.kernel.org/r/20230202123020.14844-1-wander@redhat.com
|
|
Jiri Wiesner |
018430 |
Signed-off-by: Jiri Wiesner <jwiesner@suse.de>
|
|
Jiri Wiesner |
018430 |
---
|
|
Jiri Wiesner |
018430 |
kernel/locking/rtmutex.c | 5 +++--
|
|
Jiri Wiesner |
018430 |
1 file changed, 3 insertions(+), 2 deletions(-)
|
|
Jiri Wiesner |
018430 |
|
|
Jiri Wiesner |
018430 |
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
|
|
Jiri Wiesner |
018430 |
index 010cf4e6d0b8..728f434de2bb 100644
|
|
Jiri Wiesner |
018430 |
--- a/kernel/locking/rtmutex.c
|
|
Jiri Wiesner |
018430 |
+++ b/kernel/locking/rtmutex.c
|
|
Jiri Wiesner |
018430 |
@@ -901,8 +901,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
|
|
Jiri Wiesner |
018430 |
* then we need to wake the new top waiter up to try
|
|
Jiri Wiesner |
018430 |
* to get the lock.
|
|
Jiri Wiesner |
018430 |
*/
|
|
Jiri Wiesner |
018430 |
- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
|
|
Jiri Wiesner |
018430 |
- wake_up_state(waiter->task, waiter->wake_state);
|
|
Jiri Wiesner |
018430 |
+ top_waiter = rt_mutex_top_waiter(lock);
|
|
Jiri Wiesner |
018430 |
+ if (prerequeue_top_waiter != top_waiter)
|
|
Jiri Wiesner |
018430 |
+ wake_up_state(top_waiter->task, top_waiter->wake_state);
|
|
Jiri Wiesner |
018430 |
raw_spin_unlock_irq(&lock->wait_lock);
|
|
Jiri Wiesner |
018430 |
return 0;
|
|
Jiri Wiesner |
018430 |
}
|
|
Jiri Wiesner |
018430 |
--
|
|
Jiri Wiesner |
018430 |
2.35.3
|
|
Jiri Wiesner |
018430 |
|