| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
|---|
| 1 | 2 | /* |
|---|
| 2 | | - * linux/kernel/hrtimer.c |
|---|
| 3 | | - * |
|---|
| 4 | 3 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
|---|
| 5 | 4 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
|---|
| 6 | 5 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner |
|---|
| 7 | 6 | * |
|---|
| 8 | 7 | * High-resolution kernel timers |
|---|
| 9 | 8 | * |
|---|
| 10 | | - * In contrast to the low-resolution timeout API implemented in |
|---|
| 11 | | - * kernel/timer.c, hrtimers provide finer resolution and accuracy |
|---|
| 12 | | - * depending on system configuration and capabilities. |
|---|
| 13 | | - * |
|---|
| 14 | | - * These timers are currently used for: |
|---|
| 15 | | - * - itimers |
|---|
| 16 | | - * - POSIX timers |
|---|
| 17 | | - * - nanosleep |
|---|
| 18 | | - * - precise in-kernel timing |
|---|
| 9 | + * In contrast to the low-resolution timeout API, aka timer wheel, |
|---|
| 10 | + * hrtimers provide finer resolution and accuracy depending on system |
|---|
| 11 | + * configuration and capabilities. |
|---|
| 19 | 12 | * |
|---|
| 20 | 13 | * Started by: Thomas Gleixner and Ingo Molnar |
|---|
| 21 | 14 | * |
|---|
| 22 | 15 | * Credits: |
|---|
| 23 | | - * based on kernel/timer.c |
|---|
| 16 | + * Based on the original timer wheel code |
|---|
| 24 | 17 | * |
|---|
| 25 | 18 | * Help, testing, suggestions, bugfixes, improvements were |
|---|
| 26 | 19 | * provided by: |
|---|
| 27 | 20 | * |
|---|
| 28 | 21 | * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel |
|---|
| 29 | 22 | * et. al. |
|---|
| 30 | | - * |
|---|
| 31 | | - * For licencing details see kernel-base/COPYING |
|---|
| 32 | 23 | */ |
|---|
| 33 | 24 | |
|---|
| 34 | 25 | #include <linux/cpu.h> |
|---|
| .. | .. |
|---|
| 39 | 30 | #include <linux/syscalls.h> |
|---|
| 40 | 31 | #include <linux/interrupt.h> |
|---|
| 41 | 32 | #include <linux/tick.h> |
|---|
| 42 | | -#include <linux/seq_file.h> |
|---|
| 43 | 33 | #include <linux/err.h> |
|---|
| 44 | 34 | #include <linux/debugobjects.h> |
|---|
| 45 | 35 | #include <linux/sched/signal.h> |
|---|
| .. | .. |
|---|
| 145 | 135 | * timer->base->cpu_base |
|---|
| 146 | 136 | */ |
|---|
| 147 | 137 | static struct hrtimer_cpu_base migration_cpu_base = { |
|---|
| 148 | | - .clock_base = { { .cpu_base = &migration_cpu_base, }, }, |
|---|
| 138 | + .clock_base = { { |
|---|
| 139 | + .cpu_base = &migration_cpu_base, |
|---|
| 140 | + .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, |
|---|
| 141 | + &migration_cpu_base.lock), |
|---|
| 142 | + }, }, |
|---|
| 149 | 143 | }; |
|---|
| 150 | 144 | |
|---|
| 151 | 145 | #define migration_base migration_cpu_base.clock_base[0] |
|---|
| 146 | + |
|---|
| 147 | +static inline bool is_migration_base(struct hrtimer_clock_base *base) |
|---|
| 148 | +{ |
|---|
| 149 | + return base == &migration_base; |
|---|
| 150 | +} |
|---|
| 152 | 151 | |
|---|
| 153 | 152 | /* |
|---|
| 154 | 153 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock |
|---|
| .. | .. |
|---|
| 169 | 168 | struct hrtimer_clock_base *base; |
|---|
| 170 | 169 | |
|---|
| 171 | 170 | for (;;) { |
|---|
| 172 | | - base = timer->base; |
|---|
| 171 | + base = READ_ONCE(timer->base); |
|---|
| 173 | 172 | if (likely(base != &migration_base)) { |
|---|
| 174 | 173 | raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); |
|---|
| 175 | 174 | if (likely(base == timer->base)) |
|---|
| .. | .. |
|---|
| 249 | 248 | return base; |
|---|
| 250 | 249 | |
|---|
| 251 | 250 | /* See the comment in lock_hrtimer_base() */ |
|---|
| 252 | | - timer->base = &migration_base; |
|---|
| 251 | + WRITE_ONCE(timer->base, &migration_base); |
|---|
| 253 | 252 | raw_spin_unlock(&base->cpu_base->lock); |
|---|
| 254 | 253 | raw_spin_lock(&new_base->cpu_base->lock); |
|---|
| 255 | 254 | |
|---|
| .. | .. |
|---|
| 258 | 257 | raw_spin_unlock(&new_base->cpu_base->lock); |
|---|
| 259 | 258 | raw_spin_lock(&base->cpu_base->lock); |
|---|
| 260 | 259 | new_cpu_base = this_cpu_base; |
|---|
| 261 | | - timer->base = base; |
|---|
| 260 | + WRITE_ONCE(timer->base, base); |
|---|
| 262 | 261 | goto again; |
|---|
| 263 | 262 | } |
|---|
| 264 | | - timer->base = new_base; |
|---|
| 263 | + WRITE_ONCE(timer->base, new_base); |
|---|
| 265 | 264 | } else { |
|---|
| 266 | 265 | if (new_cpu_base != this_cpu_base && |
|---|
| 267 | 266 | hrtimer_check_target(timer, new_base)) { |
|---|
| .. | .. |
|---|
| 273 | 272 | } |
|---|
| 274 | 273 | |
|---|
| 275 | 274 | #else /* CONFIG_SMP */ |
|---|
| 275 | + |
|---|
| 276 | +static inline bool is_migration_base(struct hrtimer_clock_base *base) |
|---|
| 277 | +{ |
|---|
| 278 | + return false; |
|---|
| 279 | +} |
|---|
| 276 | 280 | |
|---|
| 277 | 281 | static inline struct hrtimer_clock_base * |
|---|
| 278 | 282 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
|---|
| .. | .. |
|---|
| 311 | 315 | div >>= 1; |
|---|
| 312 | 316 | } |
|---|
| 313 | 317 | tmp >>= sft; |
|---|
| 314 | | - do_div(tmp, (unsigned long) div); |
|---|
| 318 | + do_div(tmp, (u32) div); |
|---|
| 315 | 319 | return dclc < 0 ? -tmp : tmp; |
|---|
| 316 | 320 | } |
|---|
| 317 | 321 | EXPORT_SYMBOL_GPL(__ktime_divns); |
|---|
| .. | .. |
|---|
| 338 | 342 | |
|---|
| 339 | 343 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS |
|---|
| 340 | 344 | |
|---|
| 341 | | -static struct debug_obj_descr hrtimer_debug_descr; |
|---|
| 345 | +static const struct debug_obj_descr hrtimer_debug_descr; |
|---|
| 342 | 346 | |
|---|
| 343 | 347 | static void *hrtimer_debug_hint(void *addr) |
|---|
| 344 | 348 | { |
|---|
| .. | .. |
|---|
| 373 | 377 | switch (state) { |
|---|
| 374 | 378 | case ODEBUG_STATE_ACTIVE: |
|---|
| 375 | 379 | WARN_ON(1); |
|---|
| 376 | | - |
|---|
| 380 | + fallthrough; |
|---|
| 377 | 381 | default: |
|---|
| 378 | 382 | return false; |
|---|
| 379 | 383 | } |
|---|
| .. | .. |
|---|
| 397 | 401 | } |
|---|
| 398 | 402 | } |
|---|
| 399 | 403 | |
|---|
| 400 | | -static struct debug_obj_descr hrtimer_debug_descr = { |
|---|
| 404 | +static const struct debug_obj_descr hrtimer_debug_descr = { |
|---|
| 401 | 405 | .name = "hrtimer", |
|---|
| 402 | 406 | .debug_hint = hrtimer_debug_hint, |
|---|
| 403 | 407 | .fixup_init = hrtimer_fixup_init, |
|---|
| .. | .. |
|---|
| 421 | 425 | debug_object_deactivate(timer, &hrtimer_debug_descr); |
|---|
| 422 | 426 | } |
|---|
| 423 | 427 | |
|---|
| 424 | | -static inline void debug_hrtimer_free(struct hrtimer *timer) |
|---|
| 425 | | -{ |
|---|
| 426 | | - debug_object_free(timer, &hrtimer_debug_descr); |
|---|
| 427 | | -} |
|---|
| 428 | | - |
|---|
| 429 | 428 | static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, |
|---|
| 430 | 429 | enum hrtimer_mode mode); |
|---|
| 431 | 430 | |
|---|
| .. | .. |
|---|
| 436 | 435 | __hrtimer_init(timer, clock_id, mode); |
|---|
| 437 | 436 | } |
|---|
| 438 | 437 | EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); |
|---|
| 438 | + |
|---|
| 439 | +static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, |
|---|
| 440 | + clockid_t clock_id, enum hrtimer_mode mode); |
|---|
| 441 | + |
|---|
| 442 | +void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, |
|---|
| 443 | + clockid_t clock_id, enum hrtimer_mode mode) |
|---|
| 444 | +{ |
|---|
| 445 | + debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); |
|---|
| 446 | + __hrtimer_init_sleeper(sl, clock_id, mode); |
|---|
| 447 | +} |
|---|
| 448 | +EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); |
|---|
| 439 | 449 | |
|---|
| 440 | 450 | void destroy_hrtimer_on_stack(struct hrtimer *timer) |
|---|
| 441 | 451 | { |
|---|
| .. | .. |
|---|
| 748 | 758 | retrigger_next_event(NULL); |
|---|
| 749 | 759 | } |
|---|
| 750 | 760 | |
|---|
| 751 | | -static void clock_was_set_work(struct work_struct *work) |
|---|
| 752 | | -{ |
|---|
| 753 | | - clock_was_set(); |
|---|
| 754 | | -} |
|---|
| 755 | | - |
|---|
| 756 | | -static DECLARE_WORK(hrtimer_work, clock_was_set_work); |
|---|
| 757 | | - |
|---|
| 758 | | -/* |
|---|
| 759 | | - * Called from timekeeping and resume code to reprogram the hrtimer |
|---|
| 760 | | - * interrupt device on all cpus. |
|---|
| 761 | | - */ |
|---|
| 762 | | -void clock_was_set_delayed(void) |
|---|
| 763 | | -{ |
|---|
| 764 | | - schedule_work(&hrtimer_work); |
|---|
| 765 | | -} |
|---|
| 766 | | - |
|---|
| 767 | 761 | #else |
|---|
| 768 | 762 | |
|---|
| 769 | 763 | static inline int hrtimer_is_hres_enabled(void) { return 0; } |
|---|
| .. | .. |
|---|
| 879 | 873 | on_each_cpu(retrigger_next_event, NULL, 1); |
|---|
| 880 | 874 | #endif |
|---|
| 881 | 875 | timerfd_clock_was_set(); |
|---|
| 876 | +} |
|---|
| 877 | + |
|---|
| 878 | +static void clock_was_set_work(struct work_struct *work) |
|---|
| 879 | +{ |
|---|
| 880 | + clock_was_set(); |
|---|
| 881 | +} |
|---|
| 882 | + |
|---|
| 883 | +static DECLARE_WORK(hrtimer_work, clock_was_set_work); |
|---|
| 884 | + |
|---|
| 885 | +/* |
|---|
| 886 | + * Called from timekeeping and resume code to reprogram the hrtimer |
|---|
| 887 | + * interrupt device on all cpus and to notify timerfd. |
|---|
| 888 | + */ |
|---|
| 889 | +void clock_was_set_delayed(void) |
|---|
| 890 | +{ |
|---|
| 891 | + schedule_work(&hrtimer_work); |
|---|
| 882 | 892 | } |
|---|
| 883 | 893 | |
|---|
| 884 | 894 | /* |
|---|
| .. | .. |
|---|
| 1173 | 1183 | |
|---|
| 1174 | 1184 | /* |
|---|
| 1175 | 1185 | * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft |
|---|
| 1176 | | - * match. |
|---|
| 1186 | + * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard |
|---|
| 1187 | + * expiry mode because unmarked timers are moved to softirq expiry. |
|---|
| 1177 | 1188 | */ |
|---|
| 1178 | | - WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); |
|---|
| 1189 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
|---|
| 1190 | + WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); |
|---|
| 1191 | + else |
|---|
| 1192 | + WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); |
|---|
| 1179 | 1193 | |
|---|
| 1180 | 1194 | base = lock_hrtimer_base(timer, &flags); |
|---|
| 1181 | 1195 | |
|---|
| .. | .. |
|---|
| 1191 | 1205 | * @timer: hrtimer to stop |
|---|
| 1192 | 1206 | * |
|---|
| 1193 | 1207 | * Returns: |
|---|
| 1194 | | - * 0 when the timer was not active |
|---|
| 1195 | | - * 1 when the timer was active |
|---|
| 1196 | | - * -1 when the timer is currently executing the callback function and |
|---|
| 1208 | + * |
|---|
| 1209 | + * * 0 when the timer was not active |
|---|
| 1210 | + * * 1 when the timer was active |
|---|
| 1211 | + * * -1 when the timer is currently executing the callback function and |
|---|
| 1197 | 1212 | * cannot be stopped |
|---|
| 1198 | 1213 | */ |
|---|
| 1199 | 1214 | int hrtimer_try_to_cancel(struct hrtimer *timer) |
|---|
| .. | .. |
|---|
| 1223 | 1238 | } |
|---|
| 1224 | 1239 | EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); |
|---|
| 1225 | 1240 | |
|---|
| 1241 | +#ifdef CONFIG_PREEMPT_RT |
|---|
| 1242 | +static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) |
|---|
| 1243 | +{ |
|---|
| 1244 | + spin_lock_init(&base->softirq_expiry_lock); |
|---|
| 1245 | +} |
|---|
| 1246 | + |
|---|
| 1247 | +static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) |
|---|
| 1248 | +{ |
|---|
| 1249 | + spin_lock(&base->softirq_expiry_lock); |
|---|
| 1250 | +} |
|---|
| 1251 | + |
|---|
| 1252 | +static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) |
|---|
| 1253 | +{ |
|---|
| 1254 | + spin_unlock(&base->softirq_expiry_lock); |
|---|
| 1255 | +} |
|---|
| 1256 | + |
|---|
| 1257 | +/* |
|---|
| 1258 | + * The counterpart to hrtimer_cancel_wait_running(). |
|---|
| 1259 | + * |
|---|
| 1260 | + * If there is a waiter for cpu_base->expiry_lock, then it was waiting for |
|---|
| 1261 | + * the timer callback to finish. Drop expiry_lock and reaquire it. That |
|---|
| 1262 | + * allows the waiter to acquire the lock and make progress. |
|---|
| 1263 | + */ |
|---|
| 1264 | +static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, |
|---|
| 1265 | + unsigned long flags) |
|---|
| 1266 | +{ |
|---|
| 1267 | + if (atomic_read(&cpu_base->timer_waiters)) { |
|---|
| 1268 | + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); |
|---|
| 1269 | + spin_unlock(&cpu_base->softirq_expiry_lock); |
|---|
| 1270 | + spin_lock(&cpu_base->softirq_expiry_lock); |
|---|
| 1271 | + raw_spin_lock_irq(&cpu_base->lock); |
|---|
| 1272 | + } |
|---|
| 1273 | +} |
|---|
| 1274 | + |
|---|
| 1275 | +/* |
|---|
| 1276 | + * This function is called on PREEMPT_RT kernels when the fast path |
|---|
| 1277 | + * deletion of a timer failed because the timer callback function was |
|---|
| 1278 | + * running. |
|---|
| 1279 | + * |
|---|
| 1280 | + * This prevents priority inversion: if the soft irq thread is preempted |
|---|
| 1281 | + * in the middle of a timer callback, then calling del_timer_sync() can |
|---|
| 1282 | + * lead to two issues: |
|---|
| 1283 | + * |
|---|
| 1284 | + * - If the caller is on a remote CPU then it has to spin wait for the timer |
|---|
| 1285 | + * handler to complete. This can result in unbound priority inversion. |
|---|
| 1286 | + * |
|---|
| 1287 | + * - If the caller originates from the task which preempted the timer |
|---|
| 1288 | + * handler on the same CPU, then spin waiting for the timer handler to |
|---|
| 1289 | + * complete is never going to end. |
|---|
| 1290 | + */ |
|---|
| 1291 | +void hrtimer_cancel_wait_running(const struct hrtimer *timer) |
|---|
| 1292 | +{ |
|---|
| 1293 | + /* Lockless read. Prevent the compiler from reloading it below */ |
|---|
| 1294 | + struct hrtimer_clock_base *base = READ_ONCE(timer->base); |
|---|
| 1295 | + |
|---|
| 1296 | + /* |
|---|
| 1297 | + * Just relax if the timer expires in hard interrupt context or if |
|---|
| 1298 | + * it is currently on the migration base. |
|---|
| 1299 | + */ |
|---|
| 1300 | + if (!timer->is_soft || is_migration_base(base)) { |
|---|
| 1301 | + cpu_relax(); |
|---|
| 1302 | + return; |
|---|
| 1303 | + } |
|---|
| 1304 | + |
|---|
| 1305 | + /* |
|---|
| 1306 | + * Mark the base as contended and grab the expiry lock, which is |
|---|
| 1307 | + * held by the softirq across the timer callback. Drop the lock |
|---|
| 1308 | + * immediately so the softirq can expire the next timer. In theory |
|---|
| 1309 | + * the timer could already be running again, but that's more than |
|---|
| 1310 | + * unlikely and just causes another wait loop. |
|---|
| 1311 | + */ |
|---|
| 1312 | + atomic_inc(&base->cpu_base->timer_waiters); |
|---|
| 1313 | + spin_lock_bh(&base->cpu_base->softirq_expiry_lock); |
|---|
| 1314 | + atomic_dec(&base->cpu_base->timer_waiters); |
|---|
| 1315 | + spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); |
|---|
| 1316 | +} |
|---|
| 1317 | +#else |
|---|
| 1318 | +static inline void |
|---|
| 1319 | +hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } |
|---|
| 1320 | +static inline void |
|---|
| 1321 | +hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } |
|---|
| 1322 | +static inline void |
|---|
| 1323 | +hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } |
|---|
| 1324 | +static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, |
|---|
| 1325 | + unsigned long flags) { } |
|---|
| 1326 | +#endif |
|---|
| 1327 | + |
|---|
| 1226 | 1328 | /** |
|---|
| 1227 | 1329 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. |
|---|
| 1228 | 1330 | * @timer: the timer to be cancelled |
|---|
| .. | .. |
|---|
| 1233 | 1335 | */ |
|---|
| 1234 | 1336 | int hrtimer_cancel(struct hrtimer *timer) |
|---|
| 1235 | 1337 | { |
|---|
| 1236 | | - for (;;) { |
|---|
| 1237 | | - int ret = hrtimer_try_to_cancel(timer); |
|---|
| 1338 | + int ret; |
|---|
| 1238 | 1339 | |
|---|
| 1239 | | - if (ret >= 0) |
|---|
| 1240 | | - return ret; |
|---|
| 1241 | | - cpu_relax(); |
|---|
| 1242 | | - } |
|---|
| 1340 | + do { |
|---|
| 1341 | + ret = hrtimer_try_to_cancel(timer); |
|---|
| 1342 | + |
|---|
| 1343 | + if (ret < 0) |
|---|
| 1344 | + hrtimer_cancel_wait_running(timer); |
|---|
| 1345 | + } while (ret < 0); |
|---|
| 1346 | + return ret; |
|---|
| 1243 | 1347 | } |
|---|
| 1244 | 1348 | EXPORT_SYMBOL_GPL(hrtimer_cancel); |
|---|
| 1245 | 1349 | |
|---|
| .. | .. |
|---|
| 1336 | 1440 | enum hrtimer_mode mode) |
|---|
| 1337 | 1441 | { |
|---|
| 1338 | 1442 | bool softtimer = !!(mode & HRTIMER_MODE_SOFT); |
|---|
| 1339 | | - int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; |
|---|
| 1340 | 1443 | struct hrtimer_cpu_base *cpu_base; |
|---|
| 1444 | + int base; |
|---|
| 1445 | + |
|---|
| 1446 | + /* |
|---|
| 1447 | + * On PREEMPT_RT enabled kernels hrtimers which are not explicitely |
|---|
| 1448 | + * marked for hard interrupt expiry mode are moved into soft |
|---|
| 1449 | + * interrupt context for latency reasons and because the callbacks |
|---|
| 1450 | + * can invoke functions which might sleep on RT, e.g. spin_lock(). |
|---|
| 1451 | + */ |
|---|
| 1452 | + if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) |
|---|
| 1453 | + softtimer = true; |
|---|
| 1341 | 1454 | |
|---|
| 1342 | 1455 | memset(timer, 0, sizeof(struct hrtimer)); |
|---|
| 1343 | 1456 | |
|---|
| .. | .. |
|---|
| 1351 | 1464 | if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) |
|---|
| 1352 | 1465 | clock_id = CLOCK_MONOTONIC; |
|---|
| 1353 | 1466 | |
|---|
| 1467 | + base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; |
|---|
| 1354 | 1468 | base += hrtimer_clockid_to_base(clock_id); |
|---|
| 1355 | 1469 | timer->is_soft = softtimer; |
|---|
| 1470 | + timer->is_hard = !!(mode & HRTIMER_MODE_HARD); |
|---|
| 1356 | 1471 | timer->base = &cpu_base->clock_base[base]; |
|---|
| 1357 | 1472 | timerqueue_init(&timer->node); |
|---|
| 1358 | 1473 | } |
|---|
| .. | .. |
|---|
| 1425 | 1540 | static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, |
|---|
| 1426 | 1541 | struct hrtimer_clock_base *base, |
|---|
| 1427 | 1542 | struct hrtimer *timer, ktime_t *now, |
|---|
| 1428 | | - unsigned long flags) |
|---|
| 1543 | + unsigned long flags) __must_hold(&cpu_base->lock) |
|---|
| 1429 | 1544 | { |
|---|
| 1430 | 1545 | enum hrtimer_restart (*fn)(struct hrtimer *); |
|---|
| 1546 | + bool expires_in_hardirq; |
|---|
| 1431 | 1547 | int restart; |
|---|
| 1432 | 1548 | |
|---|
| 1433 | 1549 | lockdep_assert_held(&cpu_base->lock); |
|---|
| .. | .. |
|---|
| 1462 | 1578 | */ |
|---|
| 1463 | 1579 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); |
|---|
| 1464 | 1580 | trace_hrtimer_expire_entry(timer, now); |
|---|
| 1581 | + expires_in_hardirq = lockdep_hrtimer_enter(timer); |
|---|
| 1582 | + |
|---|
| 1465 | 1583 | restart = fn(timer); |
|---|
| 1584 | + |
|---|
| 1585 | + lockdep_hrtimer_exit(expires_in_hardirq); |
|---|
| 1466 | 1586 | trace_hrtimer_expire_exit(timer); |
|---|
| 1467 | 1587 | raw_spin_lock_irq(&cpu_base->lock); |
|---|
| 1468 | 1588 | |
|---|
| .. | .. |
|---|
| 1525 | 1645 | break; |
|---|
| 1526 | 1646 | |
|---|
| 1527 | 1647 | __run_hrtimer(cpu_base, base, timer, &basenow, flags); |
|---|
| 1648 | + if (active_mask == HRTIMER_ACTIVE_SOFT) |
|---|
| 1649 | + hrtimer_sync_wait_running(cpu_base, flags); |
|---|
| 1528 | 1650 | } |
|---|
| 1529 | 1651 | } |
|---|
| 1530 | 1652 | } |
|---|
| .. | .. |
|---|
| 1535 | 1657 | unsigned long flags; |
|---|
| 1536 | 1658 | ktime_t now; |
|---|
| 1537 | 1659 | |
|---|
| 1660 | + hrtimer_cpu_base_lock_expiry(cpu_base); |
|---|
| 1538 | 1661 | raw_spin_lock_irqsave(&cpu_base->lock, flags); |
|---|
| 1539 | 1662 | |
|---|
| 1540 | 1663 | now = hrtimer_update_base(cpu_base); |
|---|
| .. | .. |
|---|
| 1544 | 1667 | hrtimer_update_softirq_timer(cpu_base, true); |
|---|
| 1545 | 1668 | |
|---|
| 1546 | 1669 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); |
|---|
| 1670 | + hrtimer_cpu_base_unlock_expiry(cpu_base); |
|---|
| 1547 | 1671 | } |
|---|
| 1548 | 1672 | |
|---|
| 1549 | 1673 | #ifdef CONFIG_HIGH_RES_TIMERS |
|---|
| .. | .. |
|---|
| 1715 | 1839 | return HRTIMER_NORESTART; |
|---|
| 1716 | 1840 | } |
|---|
| 1717 | 1841 | |
|---|
| 1718 | | -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) |
|---|
| 1842 | +/** |
|---|
| 1843 | + * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer |
|---|
| 1844 | + * @sl: sleeper to be started |
|---|
| 1845 | + * @mode: timer mode abs/rel |
|---|
| 1846 | + * |
|---|
| 1847 | + * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers |
|---|
| 1848 | + * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) |
|---|
| 1849 | + */ |
|---|
| 1850 | +void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, |
|---|
| 1851 | + enum hrtimer_mode mode) |
|---|
| 1719 | 1852 | { |
|---|
| 1853 | + /* |
|---|
| 1854 | + * Make the enqueue delivery mode check work on RT. If the sleeper |
|---|
| 1855 | + * was initialized for hard interrupt delivery, force the mode bit. |
|---|
| 1856 | + * This is a special case for hrtimer_sleepers because |
|---|
| 1857 | + * hrtimer_init_sleeper() determines the delivery mode on RT so the |
|---|
| 1858 | + * fiddling with this decision is avoided at the call sites. |
|---|
| 1859 | + */ |
|---|
| 1860 | + if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) |
|---|
| 1861 | + mode |= HRTIMER_MODE_HARD; |
|---|
| 1862 | + |
|---|
| 1863 | + hrtimer_start_expires(&sl->timer, mode); |
|---|
| 1864 | +} |
|---|
| 1865 | +EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); |
|---|
| 1866 | + |
|---|
| 1867 | +static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, |
|---|
| 1868 | + clockid_t clock_id, enum hrtimer_mode mode) |
|---|
| 1869 | +{ |
|---|
| 1870 | + /* |
|---|
| 1871 | + * On PREEMPT_RT enabled kernels hrtimers which are not explicitely |
|---|
| 1872 | + * marked for hard interrupt expiry mode are moved into soft |
|---|
| 1873 | + * interrupt context either for latency reasons or because the |
|---|
| 1874 | + * hrtimer callback takes regular spinlocks or invokes other |
|---|
| 1875 | + * functions which are not suitable for hard interrupt context on |
|---|
| 1876 | + * PREEMPT_RT. |
|---|
| 1877 | + * |
|---|
| 1878 | + * The hrtimer_sleeper callback is RT compatible in hard interrupt |
|---|
| 1879 | + * context, but there is a latency concern: Untrusted userspace can |
|---|
| 1880 | + * spawn many threads which arm timers for the same expiry time on |
|---|
| 1881 | + * the same CPU. That causes a latency spike due to the wakeup of |
|---|
| 1882 | + * a gazillion threads. |
|---|
| 1883 | + * |
|---|
| 1884 | + * OTOH, priviledged real-time user space applications rely on the |
|---|
| 1885 | + * low latency of hard interrupt wakeups. If the current task is in |
|---|
| 1886 | + * a real-time scheduling class, mark the mode for hard interrupt |
|---|
| 1887 | + * expiry. |
|---|
| 1888 | + */ |
|---|
| 1889 | + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { |
|---|
| 1890 | + if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT)) |
|---|
| 1891 | + mode |= HRTIMER_MODE_HARD; |
|---|
| 1892 | + } |
|---|
| 1893 | + |
|---|
| 1894 | + __hrtimer_init(&sl->timer, clock_id, mode); |
|---|
| 1720 | 1895 | sl->timer.function = hrtimer_wakeup; |
|---|
| 1721 | | - sl->task = task; |
|---|
| 1896 | + sl->task = current; |
|---|
| 1897 | +} |
|---|
| 1898 | + |
|---|
| 1899 | +/** |
|---|
| 1900 | + * hrtimer_init_sleeper - initialize sleeper to the given clock |
|---|
| 1901 | + * @sl: sleeper to be initialized |
|---|
| 1902 | + * @clock_id: the clock to be used |
|---|
| 1903 | + * @mode: timer mode abs/rel |
|---|
| 1904 | + */ |
|---|
| 1905 | +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, |
|---|
| 1906 | + enum hrtimer_mode mode) |
|---|
| 1907 | +{ |
|---|
| 1908 | + debug_init(&sl->timer, clock_id, mode); |
|---|
| 1909 | + __hrtimer_init_sleeper(sl, clock_id, mode); |
|---|
| 1910 | + |
|---|
| 1722 | 1911 | } |
|---|
| 1723 | 1912 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); |
|---|
| 1724 | 1913 | |
|---|
| .. | .. |
|---|
| 1727 | 1916 | switch(restart->nanosleep.type) { |
|---|
| 1728 | 1917 | #ifdef CONFIG_COMPAT_32BIT_TIME |
|---|
| 1729 | 1918 | case TT_COMPAT: |
|---|
| 1730 | | - if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp)) |
|---|
| 1919 | + if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp)) |
|---|
| 1731 | 1920 | return -EFAULT; |
|---|
| 1732 | 1921 | break; |
|---|
| 1733 | 1922 | #endif |
|---|
| .. | .. |
|---|
| 1745 | 1934 | { |
|---|
| 1746 | 1935 | struct restart_block *restart; |
|---|
| 1747 | 1936 | |
|---|
| 1748 | | - hrtimer_init_sleeper(t, current); |
|---|
| 1749 | | - |
|---|
| 1750 | 1937 | do { |
|---|
| 1751 | 1938 | set_current_state(TASK_INTERRUPTIBLE); |
|---|
| 1752 | | - hrtimer_start_expires(&t->timer, mode); |
|---|
| 1939 | + hrtimer_sleeper_start_expires(t, mode); |
|---|
| 1753 | 1940 | |
|---|
| 1754 | 1941 | if (likely(t->task)) |
|---|
| 1755 | 1942 | freezable_schedule(); |
|---|
| .. | .. |
|---|
| 1783 | 1970 | struct hrtimer_sleeper t; |
|---|
| 1784 | 1971 | int ret; |
|---|
| 1785 | 1972 | |
|---|
| 1786 | | - hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, |
|---|
| 1787 | | - HRTIMER_MODE_ABS); |
|---|
| 1973 | + hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, |
|---|
| 1974 | + HRTIMER_MODE_ABS); |
|---|
| 1788 | 1975 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); |
|---|
| 1789 | | - |
|---|
| 1790 | 1976 | ret = do_nanosleep(&t, HRTIMER_MODE_ABS); |
|---|
| 1791 | 1977 | destroy_hrtimer_on_stack(&t.timer); |
|---|
| 1792 | 1978 | return ret; |
|---|
| 1793 | 1979 | } |
|---|
| 1794 | 1980 | |
|---|
| 1795 | | -long hrtimer_nanosleep(const struct timespec64 *rqtp, |
|---|
| 1796 | | - const enum hrtimer_mode mode, const clockid_t clockid) |
|---|
| 1981 | +long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, |
|---|
| 1982 | + const clockid_t clockid) |
|---|
| 1797 | 1983 | { |
|---|
| 1798 | 1984 | struct restart_block *restart; |
|---|
| 1799 | 1985 | struct hrtimer_sleeper t; |
|---|
| .. | .. |
|---|
| 1804 | 1990 | if (dl_task(current) || rt_task(current)) |
|---|
| 1805 | 1991 | slack = 0; |
|---|
| 1806 | 1992 | |
|---|
| 1807 | | - hrtimer_init_on_stack(&t.timer, clockid, mode); |
|---|
| 1808 | | - hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); |
|---|
| 1993 | + hrtimer_init_sleeper_on_stack(&t, clockid, mode); |
|---|
| 1994 | + hrtimer_set_expires_range_ns(&t.timer, rqtp, slack); |
|---|
| 1809 | 1995 | ret = do_nanosleep(&t, mode); |
|---|
| 1810 | 1996 | if (ret != -ERESTART_RESTARTBLOCK) |
|---|
| 1811 | 1997 | goto out; |
|---|
| .. | .. |
|---|
| 1825 | 2011 | return ret; |
|---|
| 1826 | 2012 | } |
|---|
| 1827 | 2013 | |
|---|
| 1828 | | -#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) |
|---|
| 2014 | +#ifdef CONFIG_64BIT |
|---|
| 1829 | 2015 | |
|---|
| 1830 | 2016 | SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, |
|---|
| 1831 | 2017 | struct __kernel_timespec __user *, rmtp) |
|---|
| .. | .. |
|---|
| 1838 | 2024 | if (!timespec64_valid(&tu)) |
|---|
| 1839 | 2025 | return -EINVAL; |
|---|
| 1840 | 2026 | |
|---|
| 2027 | + current->restart_block.fn = do_no_restart_syscall; |
|---|
| 1841 | 2028 | current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; |
|---|
| 1842 | 2029 | current->restart_block.nanosleep.rmtp = rmtp; |
|---|
| 1843 | | - return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); |
|---|
| 2030 | + return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, |
|---|
| 2031 | + CLOCK_MONOTONIC); |
|---|
| 1844 | 2032 | } |
|---|
| 1845 | 2033 | |
|---|
| 1846 | 2034 | #endif |
|---|
| 1847 | 2035 | |
|---|
| 1848 | 2036 | #ifdef CONFIG_COMPAT_32BIT_TIME |
|---|
| 1849 | 2037 | |
|---|
| 1850 | | -COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, |
|---|
| 1851 | | - struct compat_timespec __user *, rmtp) |
|---|
| 2038 | +SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, |
|---|
| 2039 | + struct old_timespec32 __user *, rmtp) |
|---|
| 1852 | 2040 | { |
|---|
| 1853 | 2041 | struct timespec64 tu; |
|---|
| 1854 | 2042 | |
|---|
| 1855 | | - if (compat_get_timespec64(&tu, rqtp)) |
|---|
| 2043 | + if (get_old_timespec32(&tu, rqtp)) |
|---|
| 1856 | 2044 | return -EFAULT; |
|---|
| 1857 | 2045 | |
|---|
| 1858 | 2046 | if (!timespec64_valid(&tu)) |
|---|
| 1859 | 2047 | return -EINVAL; |
|---|
| 1860 | 2048 | |
|---|
| 2049 | + current->restart_block.fn = do_no_restart_syscall; |
|---|
| 1861 | 2050 | current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; |
|---|
| 1862 | 2051 | current->restart_block.nanosleep.compat_rmtp = rmtp; |
|---|
| 1863 | | - return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); |
|---|
| 2052 | + return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, |
|---|
| 2053 | + CLOCK_MONOTONIC); |
|---|
| 1864 | 2054 | } |
|---|
| 1865 | 2055 | #endif |
|---|
| 1866 | 2056 | |
|---|
| .. | .. |
|---|
| 1873 | 2063 | int i; |
|---|
| 1874 | 2064 | |
|---|
| 1875 | 2065 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
|---|
| 1876 | | - cpu_base->clock_base[i].cpu_base = cpu_base; |
|---|
| 1877 | | - timerqueue_init_head(&cpu_base->clock_base[i].active); |
|---|
| 2066 | + struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; |
|---|
| 2067 | + |
|---|
| 2068 | + clock_b->cpu_base = cpu_base; |
|---|
| 2069 | + seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); |
|---|
| 2070 | + timerqueue_init_head(&clock_b->active); |
|---|
| 1878 | 2071 | } |
|---|
| 1879 | 2072 | |
|---|
| 1880 | 2073 | cpu_base->cpu = cpu; |
|---|
| .. | .. |
|---|
| 1885 | 2078 | cpu_base->softirq_next_timer = NULL; |
|---|
| 1886 | 2079 | cpu_base->expires_next = KTIME_MAX; |
|---|
| 1887 | 2080 | cpu_base->softirq_expires_next = KTIME_MAX; |
|---|
| 2081 | + hrtimer_cpu_base_init_expiry_lock(cpu_base); |
|---|
| 1888 | 2082 | return 0; |
|---|
| 1889 | 2083 | } |
|---|
| 1890 | 2084 | |
|---|
| .. | .. |
|---|
| 2003 | 2197 | return -EINTR; |
|---|
| 2004 | 2198 | } |
|---|
| 2005 | 2199 | |
|---|
| 2006 | | - hrtimer_init_on_stack(&t.timer, clock_id, mode); |
|---|
| 2200 | + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); |
|---|
| 2007 | 2201 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); |
|---|
| 2008 | | - |
|---|
| 2009 | | - hrtimer_init_sleeper(&t, current); |
|---|
| 2010 | | - |
|---|
| 2011 | | - hrtimer_start_expires(&t.timer, mode); |
|---|
| 2202 | + hrtimer_sleeper_start_expires(&t, mode); |
|---|
| 2012 | 2203 | |
|---|
| 2013 | 2204 | if (likely(t.task)) |
|---|
| 2014 | 2205 | schedule(); |
|---|
| .. | .. |
|---|
| 2020 | 2211 | |
|---|
| 2021 | 2212 | return !t.task ? 0 : -EINTR; |
|---|
| 2022 | 2213 | } |
|---|
| 2214 | +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); |
|---|
| 2023 | 2215 | |
|---|
| 2024 | 2216 | /** |
|---|
| 2025 | 2217 | * schedule_hrtimeout_range - sleep until timeout |
|---|