hc
2023-11-20 2e7bd41e4e8ab3d1efdabd9e263a2f7fe79bff8c
kernel/kernel/locking/rtmutex.c
....@@ -7,6 +7,11 @@
77 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
88 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
99 * Copyright (C) 2006 Esben Nielsen
10
+ * Adaptive Spinlocks:
11
+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
12
+ * and Peter Morreale,
13
+ * Adaptive Spinlocks simplification:
14
+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
1015 *
1116 * See Documentation/locking/rt-mutex-design.txt for details.
1217 */
....@@ -18,6 +23,8 @@
1823 #include <linux/sched/wake_q.h>
1924 #include <linux/sched/debug.h>
2025 #include <linux/timer.h>
26
+#include <linux/ww_mutex.h>
27
+#include <linux/blkdev.h>
2128
2229 #include "rtmutex_common.h"
2330
....@@ -135,6 +142,12 @@
135142 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
136143 }
137144
145
+static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
146
+{
147
+ return waiter && waiter != PI_WAKEUP_INPROGRESS &&
148
+ waiter != PI_REQUEUE_INPROGRESS;
149
+}
150
+
138151 /*
139152 * We can speed up the acquire/release, if there's no debugging state to be
140153 * set up.
....@@ -228,7 +241,7 @@
228241 * Only use with rt_mutex_waiter_{less,equal}()
229242 */
230243 #define task_to_waiter(p) \
231
- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
244
+ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) }
232245
233246 static inline int
234247 rt_mutex_waiter_less(struct rt_mutex_waiter *left,
....@@ -266,6 +279,27 @@
266279 return left->deadline == right->deadline;
267280
268281 return 1;
282
+}
283
+
284
+#define STEAL_NORMAL 0
285
+#define STEAL_LATERAL 1
286
+
287
+static inline int
288
+rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode)
289
+{
290
+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
291
+
292
+ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter))
293
+ return 1;
294
+
295
+ /*
296
+ * Note that RT tasks are excluded from lateral-steals
297
+ * to prevent the introduction of an unbounded latency.
298
+ */
299
+ if (mode == STEAL_NORMAL || rt_task(waiter->task))
300
+ return 0;
301
+
302
+ return rt_mutex_waiter_equal(waiter, top_waiter);
269303 }
270304
271305 static void
....@@ -372,6 +406,14 @@
372406 return debug_rt_mutex_detect_deadlock(waiter, chwalk);
373407 }
374408
409
+static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
410
+{
411
+ if (waiter->savestate)
412
+ wake_up_lock_sleeper(waiter->task);
413
+ else
414
+ wake_up_process(waiter->task);
415
+}
416
+
375417 /*
376418 * Max number of times we'll walk the boosting chain:
377419 */
....@@ -379,7 +421,8 @@
379421
380422 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
381423 {
382
- return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
424
+ return rt_mutex_real_waiter(p->pi_blocked_on) ?
425
+ p->pi_blocked_on->lock : NULL;
383426 }
384427
385428 /*
....@@ -515,7 +558,7 @@
515558 * reached or the state of the chain has changed while we
516559 * dropped the locks.
517560 */
518
- if (!waiter)
561
+ if (!rt_mutex_real_waiter(waiter))
519562 goto out_unlock_pi;
520563
521564 /*
....@@ -696,13 +739,16 @@
696739 * follow here. This is the end of the chain we are walking.
697740 */
698741 if (!rt_mutex_owner(lock)) {
742
+ struct rt_mutex_waiter *lock_top_waiter;
743
+
699744 /*
700745 * If the requeue [7] above changed the top waiter,
701746 * then we need to wake the new top waiter up to try
702747 * to get the lock.
703748 */
704
- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
705
- wake_up_process(rt_mutex_top_waiter(lock)->task);
749
+ lock_top_waiter = rt_mutex_top_waiter(lock);
750
+ if (prerequeue_top_waiter != lock_top_waiter)
751
+ rt_mutex_wake_waiter(lock_top_waiter);
706752 raw_spin_unlock_irq(&lock->wait_lock);
707753 return 0;
708754 }
....@@ -804,9 +850,11 @@
804850 * @task: The task which wants to acquire the lock
805851 * @waiter: The waiter that is queued to the lock's wait tree if the
806852 * callsite called task_blocked_on_lock(), otherwise NULL
853
+ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL)
807854 */
808
-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
809
- struct rt_mutex_waiter *waiter)
855
+static int __try_to_take_rt_mutex(struct rt_mutex *lock,
856
+ struct task_struct *task,
857
+ struct rt_mutex_waiter *waiter, int mode)
810858 {
811859 lockdep_assert_held(&lock->wait_lock);
812860
....@@ -842,12 +890,11 @@
842890 */
843891 if (waiter) {
844892 /*
845
- * If waiter is not the highest priority waiter of
846
- * @lock, give up.
893
+ * If waiter is not the highest priority waiter of @lock,
894
+ * or its peer when lateral steal is allowed, give up.
847895 */
848
- if (waiter != rt_mutex_top_waiter(lock))
896
+ if (!rt_mutex_steal(lock, waiter, mode))
849897 return 0;
850
-
851898 /*
852899 * We can acquire the lock. Remove the waiter from the
853900 * lock waiters tree.
....@@ -865,14 +912,12 @@
865912 */
866913 if (rt_mutex_has_waiters(lock)) {
867914 /*
868
- * If @task->prio is greater than or equal to
869
- * the top waiter priority (kernel view),
870
- * @task lost.
915
+ * If @task->prio is greater than the top waiter
916
+ * priority (kernel view), or equal to it when a
917
+ * lateral steal is forbidden, @task lost.
871918 */
872
- if (!rt_mutex_waiter_less(task_to_waiter(task),
873
- rt_mutex_top_waiter(lock)))
919
+ if (!rt_mutex_steal(lock, task_to_waiter(task), mode))
874920 return 0;
875
-
876921 /*
877922 * The current top waiter stays enqueued. We
878923 * don't have to change anything in the lock
....@@ -919,6 +964,344 @@
919964 return 1;
920965 }
921966
967
+#ifdef CONFIG_PREEMPT_RT_FULL
968
+/*
969
+ * preemptible spin_lock functions:
970
+ */
971
+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
972
+ void (*slowfn)(struct rt_mutex *lock))
973
+{
974
+ might_sleep_no_state_check();
975
+
976
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
977
+ return;
978
+ else
979
+ slowfn(lock);
980
+}
981
+
982
+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
983
+ void (*slowfn)(struct rt_mutex *lock))
984
+{
985
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
986
+ return;
987
+ else
988
+ slowfn(lock);
989
+}
990
+#ifdef CONFIG_SMP
991
+/*
992
+ * Note that owner is a speculative pointer and dereferencing relies
993
+ * on rcu_read_lock() and the check against the lock owner.
994
+ */
995
+static int adaptive_wait(struct rt_mutex *lock,
996
+ struct task_struct *owner)
997
+{
998
+ int res = 0;
999
+
1000
+ rcu_read_lock();
1001
+ for (;;) {
1002
+ if (owner != rt_mutex_owner(lock))
1003
+ break;
1004
+ /*
1005
+ * Ensure that owner->on_cpu is dereferenced _after_
1006
+ * checking the above to be valid.
1007
+ */
1008
+ barrier();
1009
+ if (!owner->on_cpu) {
1010
+ res = 1;
1011
+ break;
1012
+ }
1013
+ cpu_relax();
1014
+ }
1015
+ rcu_read_unlock();
1016
+ return res;
1017
+}
1018
+#else
1019
+static int adaptive_wait(struct rt_mutex *lock,
1020
+ struct task_struct *orig_owner)
1021
+{
1022
+ return 1;
1023
+}
1024
+#endif
1025
+
1026
+static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
1027
+ struct rt_mutex_waiter *waiter,
1028
+ struct task_struct *task,
1029
+ enum rtmutex_chainwalk chwalk);
1030
+/*
1031
+ * Slow path lock function spin_lock style: this variant is very
1032
+ * careful not to miss any non-lock wakeups.
1033
+ *
1034
+ * We store the current state under p->pi_lock in p->saved_state and
1035
+ * the try_to_wake_up() code handles this accordingly.
1036
+ */
1037
+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
1038
+ struct rt_mutex_waiter *waiter,
1039
+ unsigned long flags)
1040
+{
1041
+ struct task_struct *lock_owner, *self = current;
1042
+ struct rt_mutex_waiter *top_waiter;
1043
+ int ret;
1044
+
1045
+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL))
1046
+ return;
1047
+
1048
+ BUG_ON(rt_mutex_owner(lock) == self);
1049
+
1050
+ /*
1051
+ * We save whatever state the task is in and we'll restore it
1052
+ * after acquiring the lock taking real wakeups into account
1053
+ * as well. We are serialized via pi_lock against wakeups. See
1054
+ * try_to_wake_up().
1055
+ */
1056
+ raw_spin_lock(&self->pi_lock);
1057
+ self->saved_state = self->state;
1058
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
1059
+ raw_spin_unlock(&self->pi_lock);
1060
+
1061
+ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK);
1062
+ BUG_ON(ret);
1063
+
1064
+ for (;;) {
1065
+ /* Try to acquire the lock again. */
1066
+ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL))
1067
+ break;
1068
+
1069
+ top_waiter = rt_mutex_top_waiter(lock);
1070
+ lock_owner = rt_mutex_owner(lock);
1071
+
1072
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1073
+
1074
+ debug_rt_mutex_print_deadlock(waiter);
1075
+
1076
+ if (top_waiter != waiter || adaptive_wait(lock, lock_owner))
1077
+ schedule();
1078
+
1079
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
1080
+
1081
+ raw_spin_lock(&self->pi_lock);
1082
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
1083
+ raw_spin_unlock(&self->pi_lock);
1084
+ }
1085
+
1086
+ /*
1087
+ * Restore the task state to current->saved_state. We set it
1088
+ * to the original state above and the try_to_wake_up() code
1089
+ * has possibly updated it when a real (non-rtmutex) wakeup
1090
+ * happened while we were blocked. Clear saved_state so
1091
+ * try_to_wakeup() does not get confused.
1092
+ */
1093
+ raw_spin_lock(&self->pi_lock);
1094
+ __set_current_state_no_track(self->saved_state);
1095
+ self->saved_state = TASK_RUNNING;
1096
+ raw_spin_unlock(&self->pi_lock);
1097
+
1098
+ /*
1099
+ * try_to_take_rt_mutex() sets the waiter bit
1100
+ * unconditionally. We might have to fix that up:
1101
+ */
1102
+ fixup_rt_mutex_waiters(lock);
1103
+
1104
+ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock));
1105
+ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry));
1106
+}
1107
+
1108
+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
1109
+{
1110
+ struct rt_mutex_waiter waiter;
1111
+ unsigned long flags;
1112
+
1113
+ rt_mutex_init_waiter(&waiter, true);
1114
+
1115
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
1116
+ rt_spin_lock_slowlock_locked(lock, &waiter, flags);
1117
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1118
+ debug_rt_mutex_free_waiter(&waiter);
1119
+}
1120
+
1121
+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
1122
+ struct wake_q_head *wake_q,
1123
+ struct wake_q_head *wq_sleeper);
1124
+/*
1125
+ * Slow path to release a rt_mutex spin_lock style
1126
+ */
1127
+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
1128
+{
1129
+ unsigned long flags;
1130
+ DEFINE_WAKE_Q(wake_q);
1131
+ DEFINE_WAKE_Q(wake_sleeper_q);
1132
+ bool postunlock;
1133
+
1134
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
1135
+ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q);
1136
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1137
+
1138
+ if (postunlock)
1139
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
1140
+}
1141
+
1142
+void __lockfunc rt_spin_lock(spinlock_t *lock)
1143
+{
1144
+ sleeping_lock_inc();
1145
+ rcu_read_lock();
1146
+ migrate_disable();
1147
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1148
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
1149
+}
1150
+EXPORT_SYMBOL(rt_spin_lock);
1151
+
1152
+void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
1153
+{
1154
+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
1155
+}
1156
+
1157
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
1158
+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
1159
+{
1160
+ sleeping_lock_inc();
1161
+ rcu_read_lock();
1162
+ migrate_disable();
1163
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
1164
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
1165
+}
1166
+EXPORT_SYMBOL(rt_spin_lock_nested);
1167
+#endif
1168
+
1169
+void __lockfunc rt_spin_unlock(spinlock_t *lock)
1170
+{
1171
+ /* NOTE: we always pass in '1' for nested, for simplicity */
1172
+ spin_release(&lock->dep_map, 1, _RET_IP_);
1173
+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
1174
+ migrate_enable();
1175
+ rcu_read_unlock();
1176
+ sleeping_lock_dec();
1177
+}
1178
+EXPORT_SYMBOL(rt_spin_unlock);
1179
+
1180
+void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
1181
+{
1182
+ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
1183
+}
1184
+EXPORT_SYMBOL(__rt_spin_unlock);
1185
+
1186
+/*
1187
+ * Wait for the lock to get unlocked: instead of polling for an unlock
1188
+ * (like raw spinlocks do), we lock and unlock, to force the kernel to
1189
+ * schedule if there's contention:
1190
+ */
1191
+void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
1192
+{
1193
+ spin_lock(lock);
1194
+ spin_unlock(lock);
1195
+}
1196
+EXPORT_SYMBOL(rt_spin_unlock_wait);
1197
+
1198
+int __lockfunc rt_spin_trylock(spinlock_t *lock)
1199
+{
1200
+ int ret;
1201
+
1202
+ sleeping_lock_inc();
1203
+ migrate_disable();
1204
+ ret = __rt_mutex_trylock(&lock->lock);
1205
+ if (ret) {
1206
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1207
+ rcu_read_lock();
1208
+ } else {
1209
+ migrate_enable();
1210
+ sleeping_lock_dec();
1211
+ }
1212
+ return ret;
1213
+}
1214
+EXPORT_SYMBOL(rt_spin_trylock);
1215
+
1216
+int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
1217
+{
1218
+ int ret;
1219
+
1220
+ local_bh_disable();
1221
+ ret = __rt_mutex_trylock(&lock->lock);
1222
+ if (ret) {
1223
+ sleeping_lock_inc();
1224
+ rcu_read_lock();
1225
+ migrate_disable();
1226
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1227
+ } else
1228
+ local_bh_enable();
1229
+ return ret;
1230
+}
1231
+EXPORT_SYMBOL(rt_spin_trylock_bh);
1232
+
1233
+int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
1234
+{
1235
+ int ret;
1236
+
1237
+ *flags = 0;
1238
+ ret = __rt_mutex_trylock(&lock->lock);
1239
+ if (ret) {
1240
+ sleeping_lock_inc();
1241
+ rcu_read_lock();
1242
+ migrate_disable();
1243
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1244
+ }
1245
+ return ret;
1246
+}
1247
+EXPORT_SYMBOL(rt_spin_trylock_irqsave);
1248
+
1249
+void
1250
+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key)
1251
+{
1252
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
1253
+ /*
1254
+ * Make sure we are not reinitializing a held lock:
1255
+ */
1256
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
1257
+ lockdep_init_map(&lock->dep_map, name, key, 0);
1258
+#endif
1259
+}
1260
+EXPORT_SYMBOL(__rt_spin_lock_init);
1261
+
1262
+#endif /* PREEMPT_RT_FULL */
1263
+
1264
+#ifdef CONFIG_PREEMPT_RT_FULL
1265
+ static inline int __sched
1266
+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
1267
+{
1268
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
1269
+ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
1270
+
1271
+ if (!hold_ctx)
1272
+ return 0;
1273
+
1274
+ if (unlikely(ctx == hold_ctx))
1275
+ return -EALREADY;
1276
+
1277
+ if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
1278
+ (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
1279
+#ifdef CONFIG_DEBUG_MUTEXES
1280
+ DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
1281
+ ctx->contending_lock = ww;
1282
+#endif
1283
+ return -EDEADLK;
1284
+ }
1285
+
1286
+ return 0;
1287
+}
1288
+#else
1289
+ static inline int __sched
1290
+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
1291
+{
1292
+ BUG();
1293
+ return 0;
1294
+}
1295
+
1296
+#endif
1297
+
1298
+static inline int
1299
+try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
1300
+ struct rt_mutex_waiter *waiter)
1301
+{
1302
+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
1303
+}
1304
+
9221305 /*
9231306 * Task blocks on lock.
9241307 *
....@@ -951,6 +1334,22 @@
9511334 return -EDEADLK;
9521335
9531336 raw_spin_lock(&task->pi_lock);
1337
+ /*
1338
+ * In the case of futex requeue PI, this will be a proxy
1339
+ * lock. The task will wake unaware that it is enqueueed on
1340
+ * this lock. Avoid blocking on two locks and corrupting
1341
+ * pi_blocked_on via the PI_WAKEUP_INPROGRESS
1342
+ * flag. futex_wait_requeue_pi() sets this when it wakes up
1343
+ * before requeue (due to a signal or timeout). Do not enqueue
1344
+ * the task if PI_WAKEUP_INPROGRESS is set.
1345
+ */
1346
+ if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
1347
+ raw_spin_unlock(&task->pi_lock);
1348
+ return -EAGAIN;
1349
+ }
1350
+
1351
+ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
1352
+
9541353 waiter->task = task;
9551354 waiter->lock = lock;
9561355 waiter->prio = task->prio;
....@@ -974,7 +1373,7 @@
9741373 rt_mutex_enqueue_pi(owner, waiter);
9751374
9761375 rt_mutex_adjust_prio(owner);
977
- if (owner->pi_blocked_on)
1376
+ if (rt_mutex_real_waiter(owner->pi_blocked_on))
9781377 chain_walk = 1;
9791378 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
9801379 chain_walk = 1;
....@@ -1016,6 +1415,7 @@
10161415 * Called with lock->wait_lock held and interrupts disabled.
10171416 */
10181417 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1418
+ struct wake_q_head *wake_sleeper_q,
10191419 struct rt_mutex *lock)
10201420 {
10211421 struct rt_mutex_waiter *waiter;
....@@ -1055,7 +1455,10 @@
10551455 * Pairs with preempt_enable() in rt_mutex_postunlock();
10561456 */
10571457 preempt_disable();
1058
- wake_q_add(wake_q, waiter->task);
1458
+ if (waiter->savestate)
1459
+ wake_q_add_sleeper(wake_sleeper_q, waiter->task);
1460
+ else
1461
+ wake_q_add(wake_q, waiter->task);
10591462 raw_spin_unlock(&current->pi_lock);
10601463 }
10611464
....@@ -1070,7 +1473,7 @@
10701473 {
10711474 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
10721475 struct task_struct *owner = rt_mutex_owner(lock);
1073
- struct rt_mutex *next_lock;
1476
+ struct rt_mutex *next_lock = NULL;
10741477
10751478 lockdep_assert_held(&lock->wait_lock);
10761479
....@@ -1096,7 +1499,8 @@
10961499 rt_mutex_adjust_prio(owner);
10971500
10981501 /* Store the lock on which owner is blocked or NULL */
1099
- next_lock = task_blocked_on_lock(owner);
1502
+ if (rt_mutex_real_waiter(owner->pi_blocked_on))
1503
+ next_lock = task_blocked_on_lock(owner);
11001504
11011505 raw_spin_unlock(&owner->pi_lock);
11021506
....@@ -1132,26 +1536,28 @@
11321536 raw_spin_lock_irqsave(&task->pi_lock, flags);
11331537
11341538 waiter = task->pi_blocked_on;
1135
- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
1539
+ if (!rt_mutex_real_waiter(waiter) ||
1540
+ rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
11361541 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
11371542 return;
11381543 }
11391544 next_lock = waiter->lock;
1140
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
11411545
11421546 /* gets dropped in rt_mutex_adjust_prio_chain()! */
11431547 get_task_struct(task);
11441548
1549
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
11451550 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
11461551 next_lock, NULL, task);
11471552 }
11481553
1149
-void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
1554
+void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
11501555 {
11511556 debug_rt_mutex_init_waiter(waiter);
11521557 RB_CLEAR_NODE(&waiter->pi_tree_entry);
11531558 RB_CLEAR_NODE(&waiter->tree_entry);
11541559 waiter->task = NULL;
1560
+ waiter->savestate = savestate;
11551561 }
11561562
11571563 /**
....@@ -1167,7 +1573,8 @@
11671573 static int __sched
11681574 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
11691575 struct hrtimer_sleeper *timeout,
1170
- struct rt_mutex_waiter *waiter)
1576
+ struct rt_mutex_waiter *waiter,
1577
+ struct ww_acquire_ctx *ww_ctx)
11711578 {
11721579 int ret = 0;
11731580
....@@ -1176,16 +1583,17 @@
11761583 if (try_to_take_rt_mutex(lock, current, waiter))
11771584 break;
11781585
1179
- /*
1180
- * TASK_INTERRUPTIBLE checks for signals and
1181
- * timeout. Ignored otherwise.
1182
- */
1183
- if (likely(state == TASK_INTERRUPTIBLE)) {
1184
- /* Signal pending? */
1185
- if (signal_pending(current))
1186
- ret = -EINTR;
1187
- if (timeout && !timeout->task)
1188
- ret = -ETIMEDOUT;
1586
+ if (timeout && !timeout->task) {
1587
+ ret = -ETIMEDOUT;
1588
+ break;
1589
+ }
1590
+ if (signal_pending_state(state, current)) {
1591
+ ret = -EINTR;
1592
+ break;
1593
+ }
1594
+
1595
+ if (ww_ctx && ww_ctx->acquired > 0) {
1596
+ ret = __mutex_lock_check_stamp(lock, ww_ctx);
11891597 if (ret)
11901598 break;
11911599 }
....@@ -1224,19 +1632,157 @@
12241632 }
12251633 }
12261634
1635
+static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
1636
+ struct ww_acquire_ctx *ww_ctx)
1637
+{
1638
+#ifdef CONFIG_DEBUG_MUTEXES
1639
+ /*
1640
+ * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
1641
+ * but released with a normal mutex_unlock in this call.
1642
+ *
1643
+ * This should never happen, always use ww_mutex_unlock.
1644
+ */
1645
+ DEBUG_LOCKS_WARN_ON(ww->ctx);
1646
+
1647
+ /*
1648
+ * Not quite done after calling ww_acquire_done() ?
1649
+ */
1650
+ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
1651
+
1652
+ if (ww_ctx->contending_lock) {
1653
+ /*
1654
+ * After -EDEADLK you tried to
1655
+ * acquire a different ww_mutex? Bad!
1656
+ */
1657
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
1658
+
1659
+ /*
1660
+ * You called ww_mutex_lock after receiving -EDEADLK,
1661
+ * but 'forgot' to unlock everything else first?
1662
+ */
1663
+ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
1664
+ ww_ctx->contending_lock = NULL;
1665
+ }
1666
+
1667
+ /*
1668
+ * Naughty, using a different class will lead to undefined behavior!
1669
+ */
1670
+ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
1671
+#endif
1672
+ ww_ctx->acquired++;
1673
+}
1674
+
1675
+#ifdef CONFIG_PREEMPT_RT_FULL
1676
+static void ww_mutex_account_lock(struct rt_mutex *lock,
1677
+ struct ww_acquire_ctx *ww_ctx)
1678
+{
1679
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
1680
+ struct rt_mutex_waiter *waiter, *n;
1681
+
1682
+ /*
1683
+ * This branch gets optimized out for the common case,
1684
+ * and is only important for ww_mutex_lock.
1685
+ */
1686
+ ww_mutex_lock_acquired(ww, ww_ctx);
1687
+ ww->ctx = ww_ctx;
1688
+
1689
+ /*
1690
+ * Give any possible sleeping processes the chance to wake up,
1691
+ * so they can recheck if they have to back off.
1692
+ */
1693
+ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root,
1694
+ tree_entry) {
1695
+ /* XXX debug rt mutex waiter wakeup */
1696
+
1697
+ BUG_ON(waiter->lock != lock);
1698
+ rt_mutex_wake_waiter(waiter);
1699
+ }
1700
+}
1701
+
1702
+#else
1703
+
1704
+static void ww_mutex_account_lock(struct rt_mutex *lock,
1705
+ struct ww_acquire_ctx *ww_ctx)
1706
+{
1707
+ BUG();
1708
+}
1709
+#endif
1710
+
1711
+int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
1712
+ struct hrtimer_sleeper *timeout,
1713
+ enum rtmutex_chainwalk chwalk,
1714
+ struct ww_acquire_ctx *ww_ctx,
1715
+ struct rt_mutex_waiter *waiter)
1716
+{
1717
+ int ret;
1718
+
1719
+#ifdef CONFIG_PREEMPT_RT_FULL
1720
+ if (ww_ctx) {
1721
+ struct ww_mutex *ww;
1722
+
1723
+ ww = container_of(lock, struct ww_mutex, base.lock);
1724
+ if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
1725
+ return -EALREADY;
1726
+ }
1727
+#endif
1728
+
1729
+ /* Try to acquire the lock again: */
1730
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
1731
+ if (ww_ctx)
1732
+ ww_mutex_account_lock(lock, ww_ctx);
1733
+ return 0;
1734
+ }
1735
+
1736
+ set_current_state(state);
1737
+
1738
+ /* Setup the timer, when timeout != NULL */
1739
+ if (unlikely(timeout))
1740
+ hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1741
+
1742
+ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
1743
+
1744
+ if (likely(!ret)) {
1745
+ /* sleep on the mutex */
1746
+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter,
1747
+ ww_ctx);
1748
+ } else if (ww_ctx) {
1749
+ /* ww_mutex received EDEADLK, let it become EALREADY */
1750
+ ret = __mutex_lock_check_stamp(lock, ww_ctx);
1751
+ BUG_ON(!ret);
1752
+ }
1753
+
1754
+ if (unlikely(ret)) {
1755
+ __set_current_state(TASK_RUNNING);
1756
+ remove_waiter(lock, waiter);
1757
+ /* ww_mutex wants to report EDEADLK/EALREADY, let it */
1758
+ if (!ww_ctx)
1759
+ rt_mutex_handle_deadlock(ret, chwalk, waiter);
1760
+ } else if (ww_ctx) {
1761
+ ww_mutex_account_lock(lock, ww_ctx);
1762
+ }
1763
+
1764
+ /*
1765
+ * try_to_take_rt_mutex() sets the waiter bit
1766
+ * unconditionally. We might have to fix that up.
1767
+ */
1768
+ fixup_rt_mutex_waiters(lock);
1769
+ return ret;
1770
+}
1771
+
12271772 /*
12281773 * Slow path lock function:
12291774 */
12301775 static int __sched
12311776 rt_mutex_slowlock(struct rt_mutex *lock, int state,
12321777 struct hrtimer_sleeper *timeout,
1233
- enum rtmutex_chainwalk chwalk)
1778
+ enum rtmutex_chainwalk chwalk,
1779
+ struct ww_acquire_ctx *ww_ctx)
12341780 {
12351781 struct rt_mutex_waiter waiter;
12361782 unsigned long flags;
12371783 int ret = 0;
12381784
1239
- rt_mutex_init_waiter(&waiter);
1785
+ rt_mutex_init_waiter(&waiter, false);
12401786
12411787 /*
12421788 * Technically we could use raw_spin_[un]lock_irq() here, but this can
....@@ -1248,35 +1794,8 @@
12481794 */
12491795 raw_spin_lock_irqsave(&lock->wait_lock, flags);
12501796
1251
- /* Try to acquire the lock again: */
1252
- if (try_to_take_rt_mutex(lock, current, NULL)) {
1253
- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1254
- return 0;
1255
- }
1256
-
1257
- set_current_state(state);
1258
-
1259
- /* Setup the timer, when timeout != NULL */
1260
- if (unlikely(timeout))
1261
- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1262
-
1263
- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1264
-
1265
- if (likely(!ret))
1266
- /* sleep on the mutex */
1267
- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
1268
-
1269
- if (unlikely(ret)) {
1270
- __set_current_state(TASK_RUNNING);
1271
- remove_waiter(lock, &waiter);
1272
- rt_mutex_handle_deadlock(ret, chwalk, &waiter);
1273
- }
1274
-
1275
- /*
1276
- * try_to_take_rt_mutex() sets the waiter bit
1277
- * unconditionally. We might have to fix that up.
1278
- */
1279
- fixup_rt_mutex_waiters(lock);
1797
+ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx,
1798
+ &waiter);
12801799
12811800 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
12821801
....@@ -1337,7 +1856,8 @@
13371856 * Return whether the current task needs to call rt_mutex_postunlock().
13381857 */
13391858 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1340
- struct wake_q_head *wake_q)
1859
+ struct wake_q_head *wake_q,
1860
+ struct wake_q_head *wake_sleeper_q)
13411861 {
13421862 unsigned long flags;
13431863
....@@ -1391,7 +1911,7 @@
13911911 *
13921912 * Queue the next waiter for wakeup once we release the wait_lock.
13931913 */
1394
- mark_wakeup_next_waiter(wake_q, lock);
1914
+ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
13951915 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
13961916
13971917 return true; /* call rt_mutex_postunlock() */
....@@ -1405,29 +1925,45 @@
14051925 */
14061926 static inline int
14071927 rt_mutex_fastlock(struct rt_mutex *lock, int state,
1928
+ struct ww_acquire_ctx *ww_ctx,
14081929 int (*slowfn)(struct rt_mutex *lock, int state,
14091930 struct hrtimer_sleeper *timeout,
1410
- enum rtmutex_chainwalk chwalk))
1931
+ enum rtmutex_chainwalk chwalk,
1932
+ struct ww_acquire_ctx *ww_ctx))
14111933 {
14121934 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
14131935 return 0;
14141936
1415
- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
1937
+ /*
1938
+ * If rt_mutex blocks, the function sched_submit_work will not call
1939
+ * blk_schedule_flush_plug (because tsk_is_pi_blocked would be true).
1940
+ * We must call blk_schedule_flush_plug here, if we don't call it,
1941
+ * a deadlock in I/O may happen.
1942
+ */
1943
+ if (unlikely(blk_needs_flush_plug(current)))
1944
+ blk_schedule_flush_plug(current);
1945
+
1946
+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx);
14161947 }
14171948
14181949 static inline int
14191950 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
14201951 struct hrtimer_sleeper *timeout,
14211952 enum rtmutex_chainwalk chwalk,
1953
+ struct ww_acquire_ctx *ww_ctx,
14221954 int (*slowfn)(struct rt_mutex *lock, int state,
14231955 struct hrtimer_sleeper *timeout,
1424
- enum rtmutex_chainwalk chwalk))
1956
+ enum rtmutex_chainwalk chwalk,
1957
+ struct ww_acquire_ctx *ww_ctx))
14251958 {
14261959 if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
14271960 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
14281961 return 0;
14291962
1430
- return slowfn(lock, state, timeout, chwalk);
1963
+ if (unlikely(blk_needs_flush_plug(current)))
1964
+ blk_schedule_flush_plug(current);
1965
+
1966
+ return slowfn(lock, state, timeout, chwalk, ww_ctx);
14311967 }
14321968
14331969 static inline int
....@@ -1443,9 +1979,11 @@
14431979 /*
14441980 * Performs the wakeup of the the top-waiter and re-enables preemption.
14451981 */
1446
-void rt_mutex_postunlock(struct wake_q_head *wake_q)
1982
+void rt_mutex_postunlock(struct wake_q_head *wake_q,
1983
+ struct wake_q_head *wake_sleeper_q)
14471984 {
14481985 wake_up_q(wake_q);
1986
+ wake_up_q_sleeper(wake_sleeper_q);
14491987
14501988 /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
14511989 preempt_enable();
....@@ -1454,23 +1992,46 @@
14541992 static inline void
14551993 rt_mutex_fastunlock(struct rt_mutex *lock,
14561994 bool (*slowfn)(struct rt_mutex *lock,
1457
- struct wake_q_head *wqh))
1995
+ struct wake_q_head *wqh,
1996
+ struct wake_q_head *wq_sleeper))
14581997 {
14591998 DEFINE_WAKE_Q(wake_q);
1999
+ DEFINE_WAKE_Q(wake_sleeper_q);
14602000
14612001 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
14622002 return;
14632003
1464
- if (slowfn(lock, &wake_q))
1465
- rt_mutex_postunlock(&wake_q);
2004
+ if (slowfn(lock, &wake_q, &wake_sleeper_q))
2005
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
2006
+}
2007
+
2008
+int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state)
2009
+{
2010
+ might_sleep();
2011
+ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock);
2012
+}
2013
+
2014
+/**
2015
+ * rt_mutex_lock_state - lock a rt_mutex with a given state
2016
+ *
2017
+ * @lock: The rt_mutex to be locked
2018
+ * @state: The state to set when blocking on the rt_mutex
2019
+ */
2020
+static inline int __sched rt_mutex_lock_state(struct rt_mutex *lock,
2021
+ unsigned int subclass, int state)
2022
+{
2023
+ int ret;
2024
+
2025
+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
2026
+ ret = __rt_mutex_lock_state(lock, state);
2027
+ if (ret)
2028
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
2029
+ return ret;
14662030 }
14672031
14682032 static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass)
14692033 {
1470
- might_sleep();
1471
-
1472
- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
1473
- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
2034
+ rt_mutex_lock_state(lock, subclass, TASK_UNINTERRUPTIBLE);
14742035 }
14752036
14762037 #ifdef CONFIG_DEBUG_LOCK_ALLOC
....@@ -1511,16 +2072,7 @@
15112072 */
15122073 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
15132074 {
1514
- int ret;
1515
-
1516
- might_sleep();
1517
-
1518
- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1519
- ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
1520
- if (ret)
1521
- mutex_release(&lock->dep_map, 1, _RET_IP_);
1522
-
1523
- return ret;
2075
+ return rt_mutex_lock_state(lock, 0, TASK_INTERRUPTIBLE);
15242076 }
15252077 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
15262078
....@@ -1536,6 +2088,22 @@
15362088 {
15372089 return __rt_mutex_slowtrylock(lock);
15382090 }
2091
+
2092
+/**
2093
+ * rt_mutex_lock_killable - lock a rt_mutex killable
2094
+ *
2095
+ * @lock: the rt_mutex to be locked
2096
+ * @detect_deadlock: deadlock detection on/off
2097
+ *
2098
+ * Returns:
2099
+ * 0 on success
2100
+ * -EINTR when interrupted by a signal
2101
+ */
2102
+int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
2103
+{
2104
+ return rt_mutex_lock_state(lock, 0, TASK_KILLABLE);
2105
+}
2106
+EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
15392107
15402108 /**
15412109 * rt_mutex_timed_lock - lock a rt_mutex interruptible
....@@ -1560,6 +2128,7 @@
15602128 mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
15612129 ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
15622130 RT_MUTEX_MIN_CHAINWALK,
2131
+ NULL,
15632132 rt_mutex_slowlock);
15642133 if (ret)
15652134 mutex_release(&lock->dep_map, 1, _RET_IP_);
....@@ -1567,6 +2136,18 @@
15672136 return ret;
15682137 }
15692138 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
2139
+
2140
+int __sched __rt_mutex_trylock(struct rt_mutex *lock)
2141
+{
2142
+#ifdef CONFIG_PREEMPT_RT_FULL
2143
+ if (WARN_ON_ONCE(in_irq() || in_nmi()))
2144
+#else
2145
+ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
2146
+#endif
2147
+ return 0;
2148
+
2149
+ return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
2150
+}
15702151
15712152 /**
15722153 * rt_mutex_trylock - try to lock a rt_mutex
....@@ -1583,16 +2164,18 @@
15832164 {
15842165 int ret;
15852166
1586
- if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
1587
- return 0;
1588
-
1589
- ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
2167
+ ret = __rt_mutex_trylock(lock);
15902168 if (ret)
15912169 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
15922170
15932171 return ret;
15942172 }
15952173 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
2174
+
2175
+void __sched __rt_mutex_unlock(struct rt_mutex *lock)
2176
+{
2177
+ rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
2178
+}
15962179
15972180 /**
15982181 * rt_mutex_unlock - unlock a rt_mutex
....@@ -1602,16 +2185,13 @@
16022185 void __sched rt_mutex_unlock(struct rt_mutex *lock)
16032186 {
16042187 mutex_release(&lock->dep_map, 1, _RET_IP_);
1605
- rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
2188
+ __rt_mutex_unlock(lock);
16062189 }
16072190 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
16082191
1609
-/**
1610
- * Futex variant, that since futex variants do not use the fast-path, can be
1611
- * simple and will not need to retry.
1612
- */
1613
-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
1614
- struct wake_q_head *wake_q)
2192
+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
2193
+ struct wake_q_head *wake_q,
2194
+ struct wake_q_head *wq_sleeper)
16152195 {
16162196 lockdep_assert_held(&lock->wait_lock);
16172197
....@@ -1628,23 +2208,35 @@
16282208 * avoid inversion prior to the wakeup. preempt_disable()
16292209 * therein pairs with rt_mutex_postunlock().
16302210 */
1631
- mark_wakeup_next_waiter(wake_q, lock);
2211
+ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock);
16322212
16332213 return true; /* call postunlock() */
2214
+}
2215
+
2216
+/**
2217
+ * Futex variant, that since futex variants do not use the fast-path, can be
2218
+ * simple and will not need to retry.
2219
+ */
2220
+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
2221
+ struct wake_q_head *wake_q,
2222
+ struct wake_q_head *wq_sleeper)
2223
+{
2224
+ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper);
16342225 }
16352226
16362227 void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
16372228 {
16382229 DEFINE_WAKE_Q(wake_q);
2230
+ DEFINE_WAKE_Q(wake_sleeper_q);
16392231 unsigned long flags;
16402232 bool postunlock;
16412233
16422234 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1643
- postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
2235
+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
16442236 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
16452237
16462238 if (postunlock)
1647
- rt_mutex_postunlock(&wake_q);
2239
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
16482240 }
16492241
16502242 /**
....@@ -1683,7 +2275,7 @@
16832275 if (name && key)
16842276 debug_rt_mutex_init(lock, name, key);
16852277 }
1686
-EXPORT_SYMBOL_GPL(__rt_mutex_init);
2278
+EXPORT_SYMBOL(__rt_mutex_init);
16872279
16882280 /**
16892281 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
....@@ -1703,6 +2295,14 @@
17032295 struct task_struct *proxy_owner)
17042296 {
17052297 __rt_mutex_init(lock, NULL, NULL);
2298
+#ifdef CONFIG_DEBUG_SPINLOCK
2299
+ /*
2300
+ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is
2301
+ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping
2302
+ * lock.
2303
+ */
2304
+ raw_spin_lock_init(&lock->wait_lock);
2305
+#endif
17062306 debug_rt_mutex_proxy_lock(lock, proxy_owner);
17072307 rt_mutex_set_owner(lock, proxy_owner);
17082308 }
....@@ -1723,6 +2323,26 @@
17232323 {
17242324 debug_rt_mutex_proxy_unlock(lock);
17252325 rt_mutex_set_owner(lock, NULL);
2326
+}
2327
+
2328
+static void fixup_rt_mutex_blocked(struct rt_mutex *lock)
2329
+{
2330
+ struct task_struct *tsk = current;
2331
+ /*
2332
+ * RT has a problem here when the wait got interrupted by a timeout
2333
+ * or a signal. task->pi_blocked_on is still set. The task must
2334
+ * acquire the hash bucket lock when returning from this function.
2335
+ *
2336
+ * If the hash bucket lock is contended then the
2337
+ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
2338
+ * task_blocks_on_rt_mutex() will trigger. This can be avoided by
2339
+ * clearing task->pi_blocked_on which removes the task from the
2340
+ * boosting chain of the rtmutex. That's correct because the task
2341
+ * is not longer blocked on it.
2342
+ */
2343
+ raw_spin_lock(&tsk->pi_lock);
2344
+ tsk->pi_blocked_on = NULL;
2345
+ raw_spin_unlock(&tsk->pi_lock);
17262346 }
17272347
17282348 /**
....@@ -1755,6 +2375,34 @@
17552375 if (try_to_take_rt_mutex(lock, task, NULL))
17562376 return 1;
17572377
2378
+#ifdef CONFIG_PREEMPT_RT_FULL
2379
+ /*
2380
+ * In PREEMPT_RT there's an added race.
2381
+ * If the task, that we are about to requeue, times out,
2382
+ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
2383
+ * to skip this task. But right after the task sets
2384
+ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
2385
+ * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
2386
+ * This will replace the PI_WAKEUP_INPROGRESS with the actual
2387
+ * lock that it blocks on. We *must not* place this task
2388
+ * on this proxy lock in that case.
2389
+ *
2390
+ * To prevent this race, we first take the task's pi_lock
2391
+ * and check if it has updated its pi_blocked_on. If it has,
2392
+ * we assume that it woke up and we return -EAGAIN.
2393
+ * Otherwise, we set the task's pi_blocked_on to
2394
+ * PI_REQUEUE_INPROGRESS, so that if the task is waking up
2395
+ * it will know that we are in the process of requeuing it.
2396
+ */
2397
+ raw_spin_lock(&task->pi_lock);
2398
+ if (task->pi_blocked_on) {
2399
+ raw_spin_unlock(&task->pi_lock);
2400
+ return -EAGAIN;
2401
+ }
2402
+ task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
2403
+ raw_spin_unlock(&task->pi_lock);
2404
+#endif
2405
+
17582406 /* We enforce deadlock detection for futexes */
17592407 ret = task_blocks_on_rt_mutex(lock, waiter, task,
17602408 RT_MUTEX_FULL_CHAINWALK);
....@@ -1768,6 +2416,9 @@
17682416 */
17692417 ret = 0;
17702418 }
2419
+
2420
+ if (ret)
2421
+ fixup_rt_mutex_blocked(lock);
17712422
17722423 debug_rt_mutex_print_deadlock(waiter);
17732424
....@@ -1854,12 +2505,15 @@
18542505 raw_spin_lock_irq(&lock->wait_lock);
18552506 /* sleep on the mutex */
18562507 set_current_state(TASK_INTERRUPTIBLE);
1857
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
2508
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
18582509 /*
18592510 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
18602511 * have to fix that up.
18612512 */
18622513 fixup_rt_mutex_waiters(lock);
2514
+ if (ret)
2515
+ fixup_rt_mutex_blocked(lock);
2516
+
18632517 raw_spin_unlock_irq(&lock->wait_lock);
18642518
18652519 return ret;
....@@ -1921,3 +2575,99 @@
19212575
19222576 return cleanup;
19232577 }
2578
+
2579
+static inline int
2580
+ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
2581
+{
2582
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
2583
+ unsigned tmp;
2584
+
2585
+ if (ctx->deadlock_inject_countdown-- == 0) {
2586
+ tmp = ctx->deadlock_inject_interval;
2587
+ if (tmp > UINT_MAX/4)
2588
+ tmp = UINT_MAX;
2589
+ else
2590
+ tmp = tmp*2 + tmp + tmp/2;
2591
+
2592
+ ctx->deadlock_inject_interval = tmp;
2593
+ ctx->deadlock_inject_countdown = tmp;
2594
+ ctx->contending_lock = lock;
2595
+
2596
+ ww_mutex_unlock(lock);
2597
+
2598
+ return -EDEADLK;
2599
+ }
2600
+#endif
2601
+
2602
+ return 0;
2603
+}
2604
+
2605
+#ifdef CONFIG_PREEMPT_RT_FULL
2606
+int __sched
2607
+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
2608
+{
2609
+ int ret;
2610
+
2611
+ might_sleep();
2612
+
2613
+ mutex_acquire_nest(&lock->base.dep_map, 0, 0,
2614
+ ctx ? &ctx->dep_map : NULL, _RET_IP_);
2615
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0,
2616
+ ctx);
2617
+ if (ret)
2618
+ mutex_release(&lock->base.dep_map, 1, _RET_IP_);
2619
+ else if (!ret && ctx && ctx->acquired > 1)
2620
+ return ww_mutex_deadlock_injection(lock, ctx);
2621
+
2622
+ return ret;
2623
+}
2624
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
2625
+
2626
+int __sched
2627
+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
2628
+{
2629
+ int ret;
2630
+
2631
+ might_sleep();
2632
+
2633
+ mutex_acquire_nest(&lock->base.dep_map, 0, 0,
2634
+ ctx ? &ctx->dep_map : NULL, _RET_IP_);
2635
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0,
2636
+ ctx);
2637
+ if (ret)
2638
+ mutex_release(&lock->base.dep_map, 1, _RET_IP_);
2639
+ else if (!ret && ctx && ctx->acquired > 1)
2640
+ return ww_mutex_deadlock_injection(lock, ctx);
2641
+
2642
+ return ret;
2643
+}
2644
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
2645
+
2646
+void __sched ww_mutex_unlock(struct ww_mutex *lock)
2647
+{
2648
+ int nest = !!lock->ctx;
2649
+
2650
+ /*
2651
+ * The unlocking fastpath is the 0->1 transition from 'locked'
2652
+ * into 'unlocked' state:
2653
+ */
2654
+ if (nest) {
2655
+#ifdef CONFIG_DEBUG_MUTEXES
2656
+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
2657
+#endif
2658
+ if (lock->ctx->acquired > 0)
2659
+ lock->ctx->acquired--;
2660
+ lock->ctx = NULL;
2661
+ }
2662
+
2663
+ mutex_release(&lock->base.dep_map, nest, _RET_IP_);
2664
+ __rt_mutex_unlock(&lock->base.lock);
2665
+}
2666
+EXPORT_SYMBOL(ww_mutex_unlock);
2667
+
2668
+int __rt_mutex_owner_current(struct rt_mutex *lock)
2669
+{
2670
+ return rt_mutex_owner(lock) == current;
2671
+}
2672
+EXPORT_SYMBOL(__rt_mutex_owner_current);
2673
+#endif