hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/time/hrtimer.c
....@@ -1,34 +1,25 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
2
- * linux/kernel/hrtimer.c
3
- *
43 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
54 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
65 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
76 *
87 * High-resolution kernel timers
98 *
10
- * In contrast to the low-resolution timeout API implemented in
11
- * kernel/timer.c, hrtimers provide finer resolution and accuracy
12
- * depending on system configuration and capabilities.
13
- *
14
- * These timers are currently used for:
15
- * - itimers
16
- * - POSIX timers
17
- * - nanosleep
18
- * - precise in-kernel timing
9
+ * In contrast to the low-resolution timeout API, aka timer wheel,
10
+ * hrtimers provide finer resolution and accuracy depending on system
11
+ * configuration and capabilities.
1912 *
2013 * Started by: Thomas Gleixner and Ingo Molnar
2114 *
2215 * Credits:
23
- * based on kernel/timer.c
16
+ * Based on the original timer wheel code
2417 *
2518 * Help, testing, suggestions, bugfixes, improvements were
2619 * provided by:
2720 *
2821 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
2922 * et. al.
30
- *
31
- * For licencing details see kernel-base/COPYING
3223 */
3324
3425 #include <linux/cpu.h>
....@@ -39,7 +30,6 @@
3930 #include <linux/syscalls.h>
4031 #include <linux/interrupt.h>
4132 #include <linux/tick.h>
42
-#include <linux/seq_file.h>
4333 #include <linux/err.h>
4434 #include <linux/debugobjects.h>
4535 #include <linux/sched/signal.h>
....@@ -145,10 +135,19 @@
145135 * timer->base->cpu_base
146136 */
147137 static struct hrtimer_cpu_base migration_cpu_base = {
148
- .clock_base = { { .cpu_base = &migration_cpu_base, }, },
138
+ .clock_base = { {
139
+ .cpu_base = &migration_cpu_base,
140
+ .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
141
+ &migration_cpu_base.lock),
142
+ }, },
149143 };
150144
151145 #define migration_base migration_cpu_base.clock_base[0]
146
+
147
+static inline bool is_migration_base(struct hrtimer_clock_base *base)
148
+{
149
+ return base == &migration_base;
150
+}
152151
153152 /*
154153 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
....@@ -169,7 +168,7 @@
169168 struct hrtimer_clock_base *base;
170169
171170 for (;;) {
172
- base = timer->base;
171
+ base = READ_ONCE(timer->base);
173172 if (likely(base != &migration_base)) {
174173 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
175174 if (likely(base == timer->base))
....@@ -249,7 +248,7 @@
249248 return base;
250249
251250 /* See the comment in lock_hrtimer_base() */
252
- timer->base = &migration_base;
251
+ WRITE_ONCE(timer->base, &migration_base);
253252 raw_spin_unlock(&base->cpu_base->lock);
254253 raw_spin_lock(&new_base->cpu_base->lock);
255254
....@@ -258,10 +257,10 @@
258257 raw_spin_unlock(&new_base->cpu_base->lock);
259258 raw_spin_lock(&base->cpu_base->lock);
260259 new_cpu_base = this_cpu_base;
261
- timer->base = base;
260
+ WRITE_ONCE(timer->base, base);
262261 goto again;
263262 }
264
- timer->base = new_base;
263
+ WRITE_ONCE(timer->base, new_base);
265264 } else {
266265 if (new_cpu_base != this_cpu_base &&
267266 hrtimer_check_target(timer, new_base)) {
....@@ -273,6 +272,11 @@
273272 }
274273
275274 #else /* CONFIG_SMP */
275
+
276
+static inline bool is_migration_base(struct hrtimer_clock_base *base)
277
+{
278
+ return false;
279
+}
276280
277281 static inline struct hrtimer_clock_base *
278282 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
....@@ -311,7 +315,7 @@
311315 div >>= 1;
312316 }
313317 tmp >>= sft;
314
- do_div(tmp, (unsigned long) div);
318
+ do_div(tmp, (u32) div);
315319 return dclc < 0 ? -tmp : tmp;
316320 }
317321 EXPORT_SYMBOL_GPL(__ktime_divns);
....@@ -338,7 +342,7 @@
338342
339343 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
340344
341
-static struct debug_obj_descr hrtimer_debug_descr;
345
+static const struct debug_obj_descr hrtimer_debug_descr;
342346
343347 static void *hrtimer_debug_hint(void *addr)
344348 {
....@@ -373,7 +377,7 @@
373377 switch (state) {
374378 case ODEBUG_STATE_ACTIVE:
375379 WARN_ON(1);
376
-
380
+ fallthrough;
377381 default:
378382 return false;
379383 }
....@@ -397,7 +401,7 @@
397401 }
398402 }
399403
400
-static struct debug_obj_descr hrtimer_debug_descr = {
404
+static const struct debug_obj_descr hrtimer_debug_descr = {
401405 .name = "hrtimer",
402406 .debug_hint = hrtimer_debug_hint,
403407 .fixup_init = hrtimer_fixup_init,
....@@ -421,11 +425,6 @@
421425 debug_object_deactivate(timer, &hrtimer_debug_descr);
422426 }
423427
424
-static inline void debug_hrtimer_free(struct hrtimer *timer)
425
-{
426
- debug_object_free(timer, &hrtimer_debug_descr);
427
-}
428
-
429428 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
430429 enum hrtimer_mode mode);
431430
....@@ -436,6 +435,17 @@
436435 __hrtimer_init(timer, clock_id, mode);
437436 }
438437 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
438
+
439
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
440
+ clockid_t clock_id, enum hrtimer_mode mode);
441
+
442
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
443
+ clockid_t clock_id, enum hrtimer_mode mode)
444
+{
445
+ debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
446
+ __hrtimer_init_sleeper(sl, clock_id, mode);
447
+}
448
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
439449
440450 void destroy_hrtimer_on_stack(struct hrtimer *timer)
441451 {
....@@ -748,22 +758,6 @@
748758 retrigger_next_event(NULL);
749759 }
750760
751
-static void clock_was_set_work(struct work_struct *work)
752
-{
753
- clock_was_set();
754
-}
755
-
756
-static DECLARE_WORK(hrtimer_work, clock_was_set_work);
757
-
758
-/*
759
- * Called from timekeeping and resume code to reprogram the hrtimer
760
- * interrupt device on all cpus.
761
- */
762
-void clock_was_set_delayed(void)
763
-{
764
- schedule_work(&hrtimer_work);
765
-}
766
-
767761 #else
768762
769763 static inline int hrtimer_is_hres_enabled(void) { return 0; }
....@@ -879,6 +873,22 @@
879873 on_each_cpu(retrigger_next_event, NULL, 1);
880874 #endif
881875 timerfd_clock_was_set();
876
+}
877
+
878
+static void clock_was_set_work(struct work_struct *work)
879
+{
880
+ clock_was_set();
881
+}
882
+
883
+static DECLARE_WORK(hrtimer_work, clock_was_set_work);
884
+
885
+/*
886
+ * Called from timekeeping and resume code to reprogram the hrtimer
887
+ * interrupt device on all cpus and to notify timerfd.
888
+ */
889
+void clock_was_set_delayed(void)
890
+{
891
+ schedule_work(&hrtimer_work);
882892 }
883893
884894 /*
....@@ -1173,9 +1183,13 @@
11731183
11741184 /*
11751185 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
1176
- * match.
1186
+ * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
1187
+ * expiry mode because unmarked timers are moved to softirq expiry.
11771188 */
1178
- WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
1189
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
1190
+ WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
1191
+ else
1192
+ WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
11791193
11801194 base = lock_hrtimer_base(timer, &flags);
11811195
....@@ -1191,9 +1205,10 @@
11911205 * @timer: hrtimer to stop
11921206 *
11931207 * Returns:
1194
- * 0 when the timer was not active
1195
- * 1 when the timer was active
1196
- * -1 when the timer is currently executing the callback function and
1208
+ *
1209
+ * * 0 when the timer was not active
1210
+ * * 1 when the timer was active
1211
+ * * -1 when the timer is currently executing the callback function and
11971212 * cannot be stopped
11981213 */
11991214 int hrtimer_try_to_cancel(struct hrtimer *timer)
....@@ -1223,6 +1238,93 @@
12231238 }
12241239 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
12251240
1241
+#ifdef CONFIG_PREEMPT_RT
1242
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
1243
+{
1244
+ spin_lock_init(&base->softirq_expiry_lock);
1245
+}
1246
+
1247
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
1248
+{
1249
+ spin_lock(&base->softirq_expiry_lock);
1250
+}
1251
+
1252
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
1253
+{
1254
+ spin_unlock(&base->softirq_expiry_lock);
1255
+}
1256
+
1257
+/*
1258
+ * The counterpart to hrtimer_cancel_wait_running().
1259
+ *
1260
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
1261
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
1262
+ * allows the waiter to acquire the lock and make progress.
1263
+ */
1264
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
1265
+ unsigned long flags)
1266
+{
1267
+ if (atomic_read(&cpu_base->timer_waiters)) {
1268
+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1269
+ spin_unlock(&cpu_base->softirq_expiry_lock);
1270
+ spin_lock(&cpu_base->softirq_expiry_lock);
1271
+ raw_spin_lock_irq(&cpu_base->lock);
1272
+ }
1273
+}
1274
+
1275
+/*
1276
+ * This function is called on PREEMPT_RT kernels when the fast path
1277
+ * deletion of a timer failed because the timer callback function was
1278
+ * running.
1279
+ *
1280
+ * This prevents priority inversion: if the soft irq thread is preempted
1281
+ * in the middle of a timer callback, then calling del_timer_sync() can
1282
+ * lead to two issues:
1283
+ *
1284
+ * - If the caller is on a remote CPU then it has to spin wait for the timer
1285
+ * handler to complete. This can result in unbound priority inversion.
1286
+ *
1287
+ * - If the caller originates from the task which preempted the timer
1288
+ * handler on the same CPU, then spin waiting for the timer handler to
1289
+ * complete is never going to end.
1290
+ */
1291
+void hrtimer_cancel_wait_running(const struct hrtimer *timer)
1292
+{
1293
+ /* Lockless read. Prevent the compiler from reloading it below */
1294
+ struct hrtimer_clock_base *base = READ_ONCE(timer->base);
1295
+
1296
+ /*
1297
+ * Just relax if the timer expires in hard interrupt context or if
1298
+ * it is currently on the migration base.
1299
+ */
1300
+ if (!timer->is_soft || is_migration_base(base)) {
1301
+ cpu_relax();
1302
+ return;
1303
+ }
1304
+
1305
+ /*
1306
+ * Mark the base as contended and grab the expiry lock, which is
1307
+ * held by the softirq across the timer callback. Drop the lock
1308
+ * immediately so the softirq can expire the next timer. In theory
1309
+ * the timer could already be running again, but that's more than
1310
+ * unlikely and just causes another wait loop.
1311
+ */
1312
+ atomic_inc(&base->cpu_base->timer_waiters);
1313
+ spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
1314
+ atomic_dec(&base->cpu_base->timer_waiters);
1315
+ spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
1316
+}
1317
+#else
1318
+static inline void
1319
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
1320
+static inline void
1321
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
1322
+static inline void
1323
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
1324
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
1325
+ unsigned long flags) { }
1326
+#endif
1327
+
12261328 /**
12271329 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
12281330 * @timer: the timer to be cancelled
....@@ -1233,13 +1335,15 @@
12331335 */
12341336 int hrtimer_cancel(struct hrtimer *timer)
12351337 {
1236
- for (;;) {
1237
- int ret = hrtimer_try_to_cancel(timer);
1338
+ int ret;
12381339
1239
- if (ret >= 0)
1240
- return ret;
1241
- cpu_relax();
1242
- }
1340
+ do {
1341
+ ret = hrtimer_try_to_cancel(timer);
1342
+
1343
+ if (ret < 0)
1344
+ hrtimer_cancel_wait_running(timer);
1345
+ } while (ret < 0);
1346
+ return ret;
12431347 }
12441348 EXPORT_SYMBOL_GPL(hrtimer_cancel);
12451349
....@@ -1336,8 +1440,17 @@
13361440 enum hrtimer_mode mode)
13371441 {
13381442 bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
1339
- int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
13401443 struct hrtimer_cpu_base *cpu_base;
1444
+ int base;
1445
+
1446
+ /*
1447
+ * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
1448
+ * marked for hard interrupt expiry mode are moved into soft
1449
+ * interrupt context for latency reasons and because the callbacks
1450
+ * can invoke functions which might sleep on RT, e.g. spin_lock().
1451
+ */
1452
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
1453
+ softtimer = true;
13411454
13421455 memset(timer, 0, sizeof(struct hrtimer));
13431456
....@@ -1351,8 +1464,10 @@
13511464 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
13521465 clock_id = CLOCK_MONOTONIC;
13531466
1467
+ base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
13541468 base += hrtimer_clockid_to_base(clock_id);
13551469 timer->is_soft = softtimer;
1470
+ timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
13561471 timer->base = &cpu_base->clock_base[base];
13571472 timerqueue_init(&timer->node);
13581473 }
....@@ -1425,9 +1540,10 @@
14251540 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
14261541 struct hrtimer_clock_base *base,
14271542 struct hrtimer *timer, ktime_t *now,
1428
- unsigned long flags)
1543
+ unsigned long flags) __must_hold(&cpu_base->lock)
14291544 {
14301545 enum hrtimer_restart (*fn)(struct hrtimer *);
1546
+ bool expires_in_hardirq;
14311547 int restart;
14321548
14331549 lockdep_assert_held(&cpu_base->lock);
....@@ -1462,7 +1578,11 @@
14621578 */
14631579 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
14641580 trace_hrtimer_expire_entry(timer, now);
1581
+ expires_in_hardirq = lockdep_hrtimer_enter(timer);
1582
+
14651583 restart = fn(timer);
1584
+
1585
+ lockdep_hrtimer_exit(expires_in_hardirq);
14661586 trace_hrtimer_expire_exit(timer);
14671587 raw_spin_lock_irq(&cpu_base->lock);
14681588
....@@ -1525,6 +1645,8 @@
15251645 break;
15261646
15271647 __run_hrtimer(cpu_base, base, timer, &basenow, flags);
1648
+ if (active_mask == HRTIMER_ACTIVE_SOFT)
1649
+ hrtimer_sync_wait_running(cpu_base, flags);
15281650 }
15291651 }
15301652 }
....@@ -1535,6 +1657,7 @@
15351657 unsigned long flags;
15361658 ktime_t now;
15371659
1660
+ hrtimer_cpu_base_lock_expiry(cpu_base);
15381661 raw_spin_lock_irqsave(&cpu_base->lock, flags);
15391662
15401663 now = hrtimer_update_base(cpu_base);
....@@ -1544,6 +1667,7 @@
15441667 hrtimer_update_softirq_timer(cpu_base, true);
15451668
15461669 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1670
+ hrtimer_cpu_base_unlock_expiry(cpu_base);
15471671 }
15481672
15491673 #ifdef CONFIG_HIGH_RES_TIMERS
....@@ -1715,10 +1839,75 @@
17151839 return HRTIMER_NORESTART;
17161840 }
17171841
1718
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
1842
+/**
1843
+ * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
1844
+ * @sl: sleeper to be started
1845
+ * @mode: timer mode abs/rel
1846
+ *
1847
+ * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
1848
+ * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
1849
+ */
1850
+void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
1851
+ enum hrtimer_mode mode)
17191852 {
1853
+ /*
1854
+ * Make the enqueue delivery mode check work on RT. If the sleeper
1855
+ * was initialized for hard interrupt delivery, force the mode bit.
1856
+ * This is a special case for hrtimer_sleepers because
1857
+ * hrtimer_init_sleeper() determines the delivery mode on RT so the
1858
+ * fiddling with this decision is avoided at the call sites.
1859
+ */
1860
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
1861
+ mode |= HRTIMER_MODE_HARD;
1862
+
1863
+ hrtimer_start_expires(&sl->timer, mode);
1864
+}
1865
+EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
1866
+
1867
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
1868
+ clockid_t clock_id, enum hrtimer_mode mode)
1869
+{
1870
+ /*
1871
+ * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
1872
+ * marked for hard interrupt expiry mode are moved into soft
1873
+ * interrupt context either for latency reasons or because the
1874
+ * hrtimer callback takes regular spinlocks or invokes other
1875
+ * functions which are not suitable for hard interrupt context on
1876
+ * PREEMPT_RT.
1877
+ *
1878
+ * The hrtimer_sleeper callback is RT compatible in hard interrupt
1879
+ * context, but there is a latency concern: Untrusted userspace can
1880
+ * spawn many threads which arm timers for the same expiry time on
1881
+ * the same CPU. That causes a latency spike due to the wakeup of
1882
+ * a gazillion threads.
1883
+ *
1884
+ * OTOH, priviledged real-time user space applications rely on the
1885
+ * low latency of hard interrupt wakeups. If the current task is in
1886
+ * a real-time scheduling class, mark the mode for hard interrupt
1887
+ * expiry.
1888
+ */
1889
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
1890
+ if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
1891
+ mode |= HRTIMER_MODE_HARD;
1892
+ }
1893
+
1894
+ __hrtimer_init(&sl->timer, clock_id, mode);
17201895 sl->timer.function = hrtimer_wakeup;
1721
- sl->task = task;
1896
+ sl->task = current;
1897
+}
1898
+
1899
+/**
1900
+ * hrtimer_init_sleeper - initialize sleeper to the given clock
1901
+ * @sl: sleeper to be initialized
1902
+ * @clock_id: the clock to be used
1903
+ * @mode: timer mode abs/rel
1904
+ */
1905
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
1906
+ enum hrtimer_mode mode)
1907
+{
1908
+ debug_init(&sl->timer, clock_id, mode);
1909
+ __hrtimer_init_sleeper(sl, clock_id, mode);
1910
+
17221911 }
17231912 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
17241913
....@@ -1727,7 +1916,7 @@
17271916 switch(restart->nanosleep.type) {
17281917 #ifdef CONFIG_COMPAT_32BIT_TIME
17291918 case TT_COMPAT:
1730
- if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
1919
+ if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
17311920 return -EFAULT;
17321921 break;
17331922 #endif
....@@ -1745,11 +1934,9 @@
17451934 {
17461935 struct restart_block *restart;
17471936
1748
- hrtimer_init_sleeper(t, current);
1749
-
17501937 do {
17511938 set_current_state(TASK_INTERRUPTIBLE);
1752
- hrtimer_start_expires(&t->timer, mode);
1939
+ hrtimer_sleeper_start_expires(t, mode);
17531940
17541941 if (likely(t->task))
17551942 freezable_schedule();
....@@ -1783,17 +1970,16 @@
17831970 struct hrtimer_sleeper t;
17841971 int ret;
17851972
1786
- hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
1787
- HRTIMER_MODE_ABS);
1973
+ hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
1974
+ HRTIMER_MODE_ABS);
17881975 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1789
-
17901976 ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
17911977 destroy_hrtimer_on_stack(&t.timer);
17921978 return ret;
17931979 }
17941980
1795
-long hrtimer_nanosleep(const struct timespec64 *rqtp,
1796
- const enum hrtimer_mode mode, const clockid_t clockid)
1981
+long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
1982
+ const clockid_t clockid)
17971983 {
17981984 struct restart_block *restart;
17991985 struct hrtimer_sleeper t;
....@@ -1804,8 +1990,8 @@
18041990 if (dl_task(current) || rt_task(current))
18051991 slack = 0;
18061992
1807
- hrtimer_init_on_stack(&t.timer, clockid, mode);
1808
- hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
1993
+ hrtimer_init_sleeper_on_stack(&t, clockid, mode);
1994
+ hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
18091995 ret = do_nanosleep(&t, mode);
18101996 if (ret != -ERESTART_RESTARTBLOCK)
18111997 goto out;
....@@ -1825,7 +2011,7 @@
18252011 return ret;
18262012 }
18272013
1828
-#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
2014
+#ifdef CONFIG_64BIT
18292015
18302016 SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
18312017 struct __kernel_timespec __user *, rmtp)
....@@ -1840,19 +2026,20 @@
18402026
18412027 current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
18422028 current->restart_block.nanosleep.rmtp = rmtp;
1843
- return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
2029
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
2030
+ CLOCK_MONOTONIC);
18442031 }
18452032
18462033 #endif
18472034
18482035 #ifdef CONFIG_COMPAT_32BIT_TIME
18492036
1850
-COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
1851
- struct compat_timespec __user *, rmtp)
2037
+SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
2038
+ struct old_timespec32 __user *, rmtp)
18522039 {
18532040 struct timespec64 tu;
18542041
1855
- if (compat_get_timespec64(&tu, rqtp))
2042
+ if (get_old_timespec32(&tu, rqtp))
18562043 return -EFAULT;
18572044
18582045 if (!timespec64_valid(&tu))
....@@ -1860,8 +2047,39 @@
18602047
18612048 current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
18622049 current->restart_block.nanosleep.compat_rmtp = rmtp;
1863
- return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
2050
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
2051
+ CLOCK_MONOTONIC);
18642052 }
2053
+#endif
2054
+
2055
+#ifdef CONFIG_PREEMPT_RT
2056
+/*
2057
+ * Sleep for 1 ms in hope whoever holds what we want will let it go.
2058
+ */
2059
+void cpu_chill(void)
2060
+{
2061
+ unsigned int freeze_flag = current->flags & PF_NOFREEZE;
2062
+ struct task_struct *self = current;
2063
+ ktime_t chill_time;
2064
+
2065
+ raw_spin_lock_irq(&self->pi_lock);
2066
+ self->saved_state = self->state;
2067
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
2068
+ raw_spin_unlock_irq(&self->pi_lock);
2069
+
2070
+ chill_time = ktime_set(0, NSEC_PER_MSEC);
2071
+
2072
+ current->flags |= PF_NOFREEZE;
2073
+ schedule_hrtimeout(&chill_time, HRTIMER_MODE_REL_HARD);
2074
+ if (!freeze_flag)
2075
+ current->flags &= ~PF_NOFREEZE;
2076
+
2077
+ raw_spin_lock_irq(&self->pi_lock);
2078
+ __set_current_state_no_track(self->saved_state);
2079
+ self->saved_state = TASK_RUNNING;
2080
+ raw_spin_unlock_irq(&self->pi_lock);
2081
+}
2082
+EXPORT_SYMBOL(cpu_chill);
18652083 #endif
18662084
18672085 /*
....@@ -1873,8 +2091,11 @@
18732091 int i;
18742092
18752093 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1876
- cpu_base->clock_base[i].cpu_base = cpu_base;
1877
- timerqueue_init_head(&cpu_base->clock_base[i].active);
2094
+ struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
2095
+
2096
+ clock_b->cpu_base = cpu_base;
2097
+ seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
2098
+ timerqueue_init_head(&clock_b->active);
18782099 }
18792100
18802101 cpu_base->cpu = cpu;
....@@ -1885,6 +2106,7 @@
18852106 cpu_base->softirq_next_timer = NULL;
18862107 cpu_base->expires_next = KTIME_MAX;
18872108 cpu_base->softirq_expires_next = KTIME_MAX;
2109
+ hrtimer_cpu_base_init_expiry_lock(cpu_base);
18882110 return 0;
18892111 }
18902112
....@@ -2003,12 +2225,9 @@
20032225 return -EINTR;
20042226 }
20052227
2006
- hrtimer_init_on_stack(&t.timer, clock_id, mode);
2228
+ hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
20072229 hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
2008
-
2009
- hrtimer_init_sleeper(&t, current);
2010
-
2011
- hrtimer_start_expires(&t.timer, mode);
2230
+ hrtimer_sleeper_start_expires(&t, mode);
20122231
20132232 if (likely(t.task))
20142233 schedule();
....@@ -2020,6 +2239,7 @@
20202239
20212240 return !t.task ? 0 : -EINTR;
20222241 }
2242
+EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
20232243
20242244 /**
20252245 * schedule_hrtimeout_range - sleep until timeout