.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | | - * linux/kernel/timer.c |
---|
3 | | - * |
---|
4 | 3 | * Kernel internal timers |
---|
5 | 4 | * |
---|
6 | 5 | * Copyright (C) 1991, 1992 Linus Torvalds |
---|
.. | .. |
---|
56 | 55 | |
---|
57 | 56 | #define CREATE_TRACE_POINTS |
---|
58 | 57 | #include <trace/events/timer.h> |
---|
| 58 | +#undef CREATE_TRACE_POINTS |
---|
| 59 | +#include <trace/hooks/timer.h> |
---|
| 60 | + |
---|
| 61 | +EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_entry); |
---|
| 62 | +EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_exit); |
---|
59 | 63 | |
---|
60 | 64 | __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; |
---|
61 | 65 | |
---|
.. | .. |
---|
158 | 162 | |
---|
159 | 163 | /* |
---|
160 | 164 | * The time start value for each level to select the bucket at enqueue |
---|
161 | | - * time. |
---|
| 165 | + * time. We start from the last possible delta of the previous level |
---|
| 166 | + * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()). |
---|
162 | 167 | */ |
---|
163 | 168 | #define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT)) |
---|
164 | 169 | |
---|
.. | .. |
---|
198 | 203 | struct timer_base { |
---|
199 | 204 | raw_spinlock_t lock; |
---|
200 | 205 | struct timer_list *running_timer; |
---|
| 206 | +#ifdef CONFIG_PREEMPT_RT |
---|
| 207 | + spinlock_t expiry_lock; |
---|
| 208 | + atomic_t timer_waiters; |
---|
| 209 | +#endif |
---|
201 | 210 | unsigned long clk; |
---|
202 | 211 | unsigned long next_expiry; |
---|
203 | 212 | unsigned int cpu; |
---|
| 213 | + bool next_expiry_recalc; |
---|
204 | 214 | bool is_idle; |
---|
205 | | - bool must_forward_clk; |
---|
| 215 | + bool timers_pending; |
---|
206 | 216 | DECLARE_BITMAP(pending_map, WHEEL_SIZE); |
---|
207 | 217 | struct hlist_head vectors[WHEEL_SIZE]; |
---|
208 | 218 | } ____cacheline_aligned; |
---|
.. | .. |
---|
247 | 257 | } |
---|
248 | 258 | |
---|
249 | 259 | int timer_migration_handler(struct ctl_table *table, int write, |
---|
250 | | - void __user *buffer, size_t *lenp, |
---|
251 | | - loff_t *ppos) |
---|
| 260 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
252 | 261 | { |
---|
253 | 262 | int ret; |
---|
254 | 263 | |
---|
.. | .. |
---|
486 | 495 | * Helper function to calculate the array index for a given expiry |
---|
487 | 496 | * time. |
---|
488 | 497 | */ |
---|
489 | | -static inline unsigned calc_index(unsigned expires, unsigned lvl) |
---|
| 498 | +static inline unsigned calc_index(unsigned long expires, unsigned lvl, |
---|
| 499 | + unsigned long *bucket_expiry) |
---|
490 | 500 | { |
---|
| 501 | + |
---|
| 502 | + /* |
---|
| 503 | + * The timer wheel has to guarantee that a timer does not fire |
---|
| 504 | + * early. Early expiry can happen due to: |
---|
| 505 | + * - Timer is armed at the edge of a tick |
---|
| 506 | + * - Truncation of the expiry time in the outer wheel levels |
---|
| 507 | + * |
---|
| 508 | + * Round up with level granularity to prevent this. |
---|
| 509 | + */ |
---|
| 510 | + trace_android_vh_timer_calc_index(lvl, &expires); |
---|
491 | 511 | expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); |
---|
| 512 | + *bucket_expiry = expires << LVL_SHIFT(lvl); |
---|
492 | 513 | return LVL_OFFS(lvl) + (expires & LVL_MASK); |
---|
493 | 514 | } |
---|
494 | 515 | |
---|
495 | | -static int calc_wheel_index(unsigned long expires, unsigned long clk) |
---|
| 516 | +static int calc_wheel_index(unsigned long expires, unsigned long clk, |
---|
| 517 | + unsigned long *bucket_expiry) |
---|
496 | 518 | { |
---|
497 | 519 | unsigned long delta = expires - clk; |
---|
498 | 520 | unsigned int idx; |
---|
499 | 521 | |
---|
500 | 522 | if (delta < LVL_START(1)) { |
---|
501 | | - idx = calc_index(expires, 0); |
---|
| 523 | + idx = calc_index(expires, 0, bucket_expiry); |
---|
502 | 524 | } else if (delta < LVL_START(2)) { |
---|
503 | | - idx = calc_index(expires, 1); |
---|
| 525 | + idx = calc_index(expires, 1, bucket_expiry); |
---|
504 | 526 | } else if (delta < LVL_START(3)) { |
---|
505 | | - idx = calc_index(expires, 2); |
---|
| 527 | + idx = calc_index(expires, 2, bucket_expiry); |
---|
506 | 528 | } else if (delta < LVL_START(4)) { |
---|
507 | | - idx = calc_index(expires, 3); |
---|
| 529 | + idx = calc_index(expires, 3, bucket_expiry); |
---|
508 | 530 | } else if (delta < LVL_START(5)) { |
---|
509 | | - idx = calc_index(expires, 4); |
---|
| 531 | + idx = calc_index(expires, 4, bucket_expiry); |
---|
510 | 532 | } else if (delta < LVL_START(6)) { |
---|
511 | | - idx = calc_index(expires, 5); |
---|
| 533 | + idx = calc_index(expires, 5, bucket_expiry); |
---|
512 | 534 | } else if (delta < LVL_START(7)) { |
---|
513 | | - idx = calc_index(expires, 6); |
---|
| 535 | + idx = calc_index(expires, 6, bucket_expiry); |
---|
514 | 536 | } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { |
---|
515 | | - idx = calc_index(expires, 7); |
---|
| 537 | + idx = calc_index(expires, 7, bucket_expiry); |
---|
516 | 538 | } else if ((long) delta < 0) { |
---|
517 | 539 | idx = clk & LVL_MASK; |
---|
| 540 | + *bucket_expiry = clk; |
---|
518 | 541 | } else { |
---|
519 | 542 | /* |
---|
520 | 543 | * Force expire obscene large timeouts to expire at the |
---|
.. | .. |
---|
523 | 546 | if (delta >= WHEEL_TIMEOUT_CUTOFF) |
---|
524 | 547 | expires = clk + WHEEL_TIMEOUT_MAX; |
---|
525 | 548 | |
---|
526 | | - idx = calc_index(expires, LVL_DEPTH - 1); |
---|
| 549 | + idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry); |
---|
527 | 550 | } |
---|
528 | 551 | return idx; |
---|
529 | | -} |
---|
530 | | - |
---|
531 | | -/* |
---|
532 | | - * Enqueue the timer into the hash bucket, mark it pending in |
---|
533 | | - * the bitmap and store the index in the timer flags. |
---|
534 | | - */ |
---|
535 | | -static void enqueue_timer(struct timer_base *base, struct timer_list *timer, |
---|
536 | | - unsigned int idx) |
---|
537 | | -{ |
---|
538 | | - hlist_add_head(&timer->entry, base->vectors + idx); |
---|
539 | | - __set_bit(idx, base->pending_map); |
---|
540 | | - timer_set_idx(timer, idx); |
---|
541 | | -} |
---|
542 | | - |
---|
543 | | -static void |
---|
544 | | -__internal_add_timer(struct timer_base *base, struct timer_list *timer) |
---|
545 | | -{ |
---|
546 | | - unsigned int idx; |
---|
547 | | - |
---|
548 | | - idx = calc_wheel_index(timer->expires, base->clk); |
---|
549 | | - enqueue_timer(base, timer, idx); |
---|
550 | 552 | } |
---|
551 | 553 | |
---|
552 | 554 | static void |
---|
.. | .. |
---|
570 | 572 | * timer is not deferrable. If the other CPU is on the way to idle |
---|
571 | 573 | * then it can't set base->is_idle as we hold the base lock: |
---|
572 | 574 | */ |
---|
573 | | - if (!base->is_idle) |
---|
574 | | - return; |
---|
575 | | - |
---|
576 | | - /* Check whether this is the new first expiring timer: */ |
---|
577 | | - if (time_after_eq(timer->expires, base->next_expiry)) |
---|
578 | | - return; |
---|
579 | | - |
---|
580 | | - /* |
---|
581 | | - * Set the next expiry time and kick the CPU so it can reevaluate the |
---|
582 | | - * wheel: |
---|
583 | | - */ |
---|
584 | | - if (time_before(timer->expires, base->clk)) { |
---|
585 | | - /* |
---|
586 | | - * Prevent from forward_timer_base() moving the base->clk |
---|
587 | | - * backward |
---|
588 | | - */ |
---|
589 | | - base->next_expiry = base->clk; |
---|
590 | | - } else { |
---|
591 | | - base->next_expiry = timer->expires; |
---|
592 | | - } |
---|
593 | | - wake_up_nohz_cpu(base->cpu); |
---|
| 575 | + if (base->is_idle) |
---|
| 576 | + wake_up_nohz_cpu(base->cpu); |
---|
594 | 577 | } |
---|
595 | 578 | |
---|
596 | | -static void |
---|
597 | | -internal_add_timer(struct timer_base *base, struct timer_list *timer) |
---|
| 579 | +/* |
---|
| 580 | + * Enqueue the timer into the hash bucket, mark it pending in |
---|
| 581 | + * the bitmap, store the index in the timer flags then wake up |
---|
| 582 | + * the target CPU if needed. |
---|
| 583 | + */ |
---|
| 584 | +static void enqueue_timer(struct timer_base *base, struct timer_list *timer, |
---|
| 585 | + unsigned int idx, unsigned long bucket_expiry) |
---|
598 | 586 | { |
---|
599 | | - __internal_add_timer(base, timer); |
---|
600 | | - trigger_dyntick_cpu(base, timer); |
---|
| 587 | + |
---|
| 588 | + hlist_add_head(&timer->entry, base->vectors + idx); |
---|
| 589 | + __set_bit(idx, base->pending_map); |
---|
| 590 | + timer_set_idx(timer, idx); |
---|
| 591 | + |
---|
| 592 | + trace_timer_start(timer, timer->expires, timer->flags); |
---|
| 593 | + |
---|
| 594 | + /* |
---|
| 595 | + * Check whether this is the new first expiring timer. The |
---|
| 596 | + * effective expiry time of the timer is required here |
---|
| 597 | + * (bucket_expiry) instead of timer->expires. |
---|
| 598 | + */ |
---|
| 599 | + if (time_before(bucket_expiry, base->next_expiry)) { |
---|
| 600 | + /* |
---|
| 601 | + * Set the next expiry time and kick the CPU so it |
---|
| 602 | + * can reevaluate the wheel: |
---|
| 603 | + */ |
---|
| 604 | + base->next_expiry = bucket_expiry; |
---|
| 605 | + base->timers_pending = true; |
---|
| 606 | + base->next_expiry_recalc = false; |
---|
| 607 | + trigger_dyntick_cpu(base, timer); |
---|
| 608 | + } |
---|
| 609 | +} |
---|
| 610 | + |
---|
| 611 | +static void internal_add_timer(struct timer_base *base, struct timer_list *timer) |
---|
| 612 | +{ |
---|
| 613 | + unsigned long bucket_expiry; |
---|
| 614 | + unsigned int idx; |
---|
| 615 | + |
---|
| 616 | + idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry); |
---|
| 617 | + enqueue_timer(base, timer, idx, bucket_expiry); |
---|
601 | 618 | } |
---|
602 | 619 | |
---|
603 | 620 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS |
---|
604 | 621 | |
---|
605 | | -static struct debug_obj_descr timer_debug_descr; |
---|
| 622 | +static const struct debug_obj_descr timer_debug_descr; |
---|
606 | 623 | |
---|
607 | 624 | static void *timer_debug_hint(void *addr) |
---|
608 | 625 | { |
---|
.. | .. |
---|
657 | 674 | |
---|
658 | 675 | case ODEBUG_STATE_ACTIVE: |
---|
659 | 676 | WARN_ON(1); |
---|
660 | | - |
---|
| 677 | + fallthrough; |
---|
661 | 678 | default: |
---|
662 | 679 | return false; |
---|
663 | 680 | } |
---|
.. | .. |
---|
698 | 715 | } |
---|
699 | 716 | } |
---|
700 | 717 | |
---|
701 | | -static struct debug_obj_descr timer_debug_descr = { |
---|
| 718 | +static const struct debug_obj_descr timer_debug_descr = { |
---|
702 | 719 | .name = "timer_list", |
---|
703 | 720 | .debug_hint = timer_debug_hint, |
---|
704 | 721 | .is_static_object = timer_is_static_object, |
---|
.. | .. |
---|
721 | 738 | static inline void debug_timer_deactivate(struct timer_list *timer) |
---|
722 | 739 | { |
---|
723 | 740 | debug_object_deactivate(timer, &timer_debug_descr); |
---|
724 | | -} |
---|
725 | | - |
---|
726 | | -static inline void debug_timer_free(struct timer_list *timer) |
---|
727 | | -{ |
---|
728 | | - debug_object_free(timer, &timer_debug_descr); |
---|
729 | 741 | } |
---|
730 | 742 | |
---|
731 | 743 | static inline void debug_timer_assert_init(struct timer_list *timer) |
---|
.. | .. |
---|
767 | 779 | trace_timer_init(timer); |
---|
768 | 780 | } |
---|
769 | 781 | |
---|
770 | | -static inline void |
---|
771 | | -debug_activate(struct timer_list *timer, unsigned long expires) |
---|
772 | | -{ |
---|
773 | | - debug_timer_activate(timer); |
---|
774 | | - trace_timer_start(timer, expires, timer->flags); |
---|
775 | | -} |
---|
776 | | - |
---|
777 | 782 | static inline void debug_deactivate(struct timer_list *timer) |
---|
778 | 783 | { |
---|
779 | 784 | debug_timer_deactivate(timer); |
---|
.. | .. |
---|
792 | 797 | { |
---|
793 | 798 | timer->entry.pprev = NULL; |
---|
794 | 799 | timer->function = func; |
---|
| 800 | + if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS)) |
---|
| 801 | + flags &= TIMER_INIT_FLAGS; |
---|
795 | 802 | timer->flags = flags | raw_smp_processor_id(); |
---|
796 | 803 | lockdep_init_map(&timer->lockdep_map, name, key, 0); |
---|
797 | 804 | } |
---|
.. | .. |
---|
837 | 844 | if (!timer_pending(timer)) |
---|
838 | 845 | return 0; |
---|
839 | 846 | |
---|
840 | | - if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) |
---|
| 847 | + if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) { |
---|
841 | 848 | __clear_bit(idx, base->pending_map); |
---|
| 849 | + base->next_expiry_recalc = true; |
---|
| 850 | + } |
---|
842 | 851 | |
---|
843 | 852 | detach_timer(timer, clear_pending); |
---|
844 | 853 | return 1; |
---|
.. | .. |
---|
888 | 897 | |
---|
889 | 898 | static inline void forward_timer_base(struct timer_base *base) |
---|
890 | 899 | { |
---|
891 | | -#ifdef CONFIG_NO_HZ_COMMON |
---|
892 | | - unsigned long jnow; |
---|
| 900 | + unsigned long jnow = READ_ONCE(jiffies); |
---|
893 | 901 | |
---|
894 | 902 | /* |
---|
895 | | - * We only forward the base when we are idle or have just come out of |
---|
896 | | - * idle (must_forward_clk logic), and have a delta between base clock |
---|
897 | | - * and jiffies. In the common case, run_timers will take care of it. |
---|
| 903 | + * No need to forward if we are close enough below jiffies. |
---|
| 904 | + * Also while executing timers, base->clk is 1 offset ahead |
---|
| 905 | + * of jiffies to avoid endless requeuing to current jffies. |
---|
898 | 906 | */ |
---|
899 | | - if (likely(!base->must_forward_clk)) |
---|
900 | | - return; |
---|
901 | | - |
---|
902 | | - jnow = READ_ONCE(jiffies); |
---|
903 | | - base->must_forward_clk = base->is_idle; |
---|
904 | | - if ((long)(jnow - base->clk) < 2) |
---|
| 907 | + if ((long)(jnow - base->clk) < 1) |
---|
905 | 908 | return; |
---|
906 | 909 | |
---|
907 | 910 | /* |
---|
.. | .. |
---|
915 | 918 | return; |
---|
916 | 919 | base->clk = base->next_expiry; |
---|
917 | 920 | } |
---|
918 | | -#endif |
---|
919 | 921 | } |
---|
920 | 922 | |
---|
921 | 923 | |
---|
.. | .. |
---|
958 | 960 | |
---|
959 | 961 | #define MOD_TIMER_PENDING_ONLY 0x01 |
---|
960 | 962 | #define MOD_TIMER_REDUCE 0x02 |
---|
| 963 | +#define MOD_TIMER_NOTPENDING 0x04 |
---|
961 | 964 | |
---|
962 | 965 | static inline int |
---|
963 | 966 | __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) |
---|
964 | 967 | { |
---|
| 968 | + unsigned long clk = 0, flags, bucket_expiry; |
---|
965 | 969 | struct timer_base *base, *new_base; |
---|
966 | 970 | unsigned int idx = UINT_MAX; |
---|
967 | | - unsigned long clk = 0, flags; |
---|
968 | 971 | int ret = 0; |
---|
969 | 972 | |
---|
970 | 973 | BUG_ON(!timer->function); |
---|
.. | .. |
---|
974 | 977 | * the timer is re-modified to have the same timeout or ends up in the |
---|
975 | 978 | * same array bucket then just return: |
---|
976 | 979 | */ |
---|
977 | | - if (timer_pending(timer)) { |
---|
| 980 | + if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) { |
---|
978 | 981 | /* |
---|
979 | 982 | * The downside of this optimization is that it can result in |
---|
980 | 983 | * larger granularity than you would get from adding a new |
---|
.. | .. |
---|
1003 | 1006 | } |
---|
1004 | 1007 | |
---|
1005 | 1008 | clk = base->clk; |
---|
1006 | | - idx = calc_wheel_index(expires, clk); |
---|
| 1009 | + idx = calc_wheel_index(expires, clk, &bucket_expiry); |
---|
1007 | 1010 | |
---|
1008 | 1011 | /* |
---|
1009 | 1012 | * Retrieve and compare the array index of the pending |
---|
.. | .. |
---|
1050 | 1053 | } |
---|
1051 | 1054 | } |
---|
1052 | 1055 | |
---|
1053 | | - debug_activate(timer, expires); |
---|
| 1056 | + debug_timer_activate(timer); |
---|
1054 | 1057 | |
---|
1055 | 1058 | timer->expires = expires; |
---|
1056 | 1059 | /* |
---|
1057 | 1060 | * If 'idx' was calculated above and the base time did not advance |
---|
1058 | 1061 | * between calculating 'idx' and possibly switching the base, only |
---|
1059 | | - * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise |
---|
1060 | | - * we need to (re)calculate the wheel index via |
---|
1061 | | - * internal_add_timer(). |
---|
| 1062 | + * enqueue_timer() is required. Otherwise we need to (re)calculate |
---|
| 1063 | + * the wheel index via internal_add_timer(). |
---|
1062 | 1064 | */ |
---|
1063 | | - if (idx != UINT_MAX && clk == base->clk) { |
---|
1064 | | - enqueue_timer(base, timer, idx); |
---|
1065 | | - trigger_dyntick_cpu(base, timer); |
---|
1066 | | - } else { |
---|
| 1065 | + if (idx != UINT_MAX && clk == base->clk) |
---|
| 1066 | + enqueue_timer(base, timer, idx, bucket_expiry); |
---|
| 1067 | + else |
---|
1067 | 1068 | internal_add_timer(base, timer); |
---|
1068 | | - } |
---|
1069 | 1069 | |
---|
1070 | 1070 | out_unlock: |
---|
1071 | 1071 | raw_spin_unlock_irqrestore(&base->lock, flags); |
---|
.. | .. |
---|
1147 | 1147 | void add_timer(struct timer_list *timer) |
---|
1148 | 1148 | { |
---|
1149 | 1149 | BUG_ON(timer_pending(timer)); |
---|
1150 | | - mod_timer(timer, timer->expires); |
---|
| 1150 | + __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING); |
---|
1151 | 1151 | } |
---|
1152 | 1152 | EXPORT_SYMBOL(add_timer); |
---|
1153 | 1153 | |
---|
.. | .. |
---|
1184 | 1184 | } |
---|
1185 | 1185 | forward_timer_base(base); |
---|
1186 | 1186 | |
---|
1187 | | - debug_activate(timer, timer->expires); |
---|
| 1187 | + debug_timer_activate(timer); |
---|
1188 | 1188 | internal_add_timer(base, timer); |
---|
1189 | 1189 | raw_spin_unlock_irqrestore(&base->lock, flags); |
---|
1190 | 1190 | } |
---|
.. | .. |
---|
1245 | 1245 | } |
---|
1246 | 1246 | EXPORT_SYMBOL(try_to_del_timer_sync); |
---|
1247 | 1247 | |
---|
1248 | | -#ifdef CONFIG_SMP |
---|
| 1248 | +#ifdef CONFIG_PREEMPT_RT |
---|
| 1249 | +static __init void timer_base_init_expiry_lock(struct timer_base *base) |
---|
| 1250 | +{ |
---|
| 1251 | + spin_lock_init(&base->expiry_lock); |
---|
| 1252 | +} |
---|
| 1253 | + |
---|
| 1254 | +static inline void timer_base_lock_expiry(struct timer_base *base) |
---|
| 1255 | +{ |
---|
| 1256 | + spin_lock(&base->expiry_lock); |
---|
| 1257 | +} |
---|
| 1258 | + |
---|
| 1259 | +static inline void timer_base_unlock_expiry(struct timer_base *base) |
---|
| 1260 | +{ |
---|
| 1261 | + spin_unlock(&base->expiry_lock); |
---|
| 1262 | +} |
---|
| 1263 | + |
---|
| 1264 | +/* |
---|
| 1265 | + * The counterpart to del_timer_wait_running(). |
---|
| 1266 | + * |
---|
| 1267 | + * If there is a waiter for base->expiry_lock, then it was waiting for the |
---|
| 1268 | + * timer callback to finish. Drop expiry_lock and reaquire it. That allows |
---|
| 1269 | + * the waiter to acquire the lock and make progress. |
---|
| 1270 | + */ |
---|
| 1271 | +static void timer_sync_wait_running(struct timer_base *base) |
---|
| 1272 | +{ |
---|
| 1273 | + if (atomic_read(&base->timer_waiters)) { |
---|
| 1274 | + raw_spin_unlock_irq(&base->lock); |
---|
| 1275 | + spin_unlock(&base->expiry_lock); |
---|
| 1276 | + spin_lock(&base->expiry_lock); |
---|
| 1277 | + raw_spin_lock_irq(&base->lock); |
---|
| 1278 | + } |
---|
| 1279 | +} |
---|
| 1280 | + |
---|
| 1281 | +/* |
---|
| 1282 | + * This function is called on PREEMPT_RT kernels when the fast path |
---|
| 1283 | + * deletion of a timer failed because the timer callback function was |
---|
| 1284 | + * running. |
---|
| 1285 | + * |
---|
| 1286 | + * This prevents priority inversion, if the softirq thread on a remote CPU |
---|
| 1287 | + * got preempted, and it prevents a life lock when the task which tries to |
---|
| 1288 | + * delete a timer preempted the softirq thread running the timer callback |
---|
| 1289 | + * function. |
---|
| 1290 | + */ |
---|
| 1291 | +static void del_timer_wait_running(struct timer_list *timer) |
---|
| 1292 | +{ |
---|
| 1293 | + u32 tf; |
---|
| 1294 | + |
---|
| 1295 | + tf = READ_ONCE(timer->flags); |
---|
| 1296 | + if (!(tf & TIMER_MIGRATING)) { |
---|
| 1297 | + struct timer_base *base = get_timer_base(tf); |
---|
| 1298 | + |
---|
| 1299 | + /* |
---|
| 1300 | + * Mark the base as contended and grab the expiry lock, |
---|
| 1301 | + * which is held by the softirq across the timer |
---|
| 1302 | + * callback. Drop the lock immediately so the softirq can |
---|
| 1303 | + * expire the next timer. In theory the timer could already |
---|
| 1304 | + * be running again, but that's more than unlikely and just |
---|
| 1305 | + * causes another wait loop. |
---|
| 1306 | + */ |
---|
| 1307 | + atomic_inc(&base->timer_waiters); |
---|
| 1308 | + spin_lock_bh(&base->expiry_lock); |
---|
| 1309 | + atomic_dec(&base->timer_waiters); |
---|
| 1310 | + spin_unlock_bh(&base->expiry_lock); |
---|
| 1311 | + } |
---|
| 1312 | +} |
---|
| 1313 | +#else |
---|
| 1314 | +static inline void timer_base_init_expiry_lock(struct timer_base *base) { } |
---|
| 1315 | +static inline void timer_base_lock_expiry(struct timer_base *base) { } |
---|
| 1316 | +static inline void timer_base_unlock_expiry(struct timer_base *base) { } |
---|
| 1317 | +static inline void timer_sync_wait_running(struct timer_base *base) { } |
---|
| 1318 | +static inline void del_timer_wait_running(struct timer_list *timer) { } |
---|
| 1319 | +#endif |
---|
| 1320 | + |
---|
| 1321 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) |
---|
1249 | 1322 | /** |
---|
1250 | 1323 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
---|
1251 | 1324 | * @timer: the timer to be deactivated |
---|
.. | .. |
---|
1284 | 1357 | */ |
---|
1285 | 1358 | int del_timer_sync(struct timer_list *timer) |
---|
1286 | 1359 | { |
---|
| 1360 | + int ret; |
---|
| 1361 | + |
---|
1287 | 1362 | #ifdef CONFIG_LOCKDEP |
---|
1288 | 1363 | unsigned long flags; |
---|
1289 | 1364 | |
---|
.. | .. |
---|
1301 | 1376 | * could lead to deadlock. |
---|
1302 | 1377 | */ |
---|
1303 | 1378 | WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); |
---|
1304 | | - for (;;) { |
---|
1305 | | - int ret = try_to_del_timer_sync(timer); |
---|
1306 | | - if (ret >= 0) |
---|
1307 | | - return ret; |
---|
1308 | | - cpu_relax(); |
---|
1309 | | - } |
---|
| 1379 | + |
---|
| 1380 | + do { |
---|
| 1381 | + ret = try_to_del_timer_sync(timer); |
---|
| 1382 | + |
---|
| 1383 | + if (unlikely(ret < 0)) { |
---|
| 1384 | + del_timer_wait_running(timer); |
---|
| 1385 | + cpu_relax(); |
---|
| 1386 | + } |
---|
| 1387 | + } while (ret < 0); |
---|
| 1388 | + |
---|
| 1389 | + return ret; |
---|
1310 | 1390 | } |
---|
1311 | 1391 | EXPORT_SYMBOL(del_timer_sync); |
---|
1312 | 1392 | #endif |
---|
1313 | 1393 | |
---|
1314 | | -static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *)) |
---|
| 1394 | +static void call_timer_fn(struct timer_list *timer, |
---|
| 1395 | + void (*fn)(struct timer_list *), |
---|
| 1396 | + unsigned long baseclk) |
---|
1315 | 1397 | { |
---|
1316 | 1398 | int count = preempt_count(); |
---|
1317 | 1399 | |
---|
.. | .. |
---|
1334 | 1416 | */ |
---|
1335 | 1417 | lock_map_acquire(&lockdep_map); |
---|
1336 | 1418 | |
---|
1337 | | - trace_timer_expire_entry(timer); |
---|
| 1419 | + trace_timer_expire_entry(timer, baseclk); |
---|
1338 | 1420 | fn(timer); |
---|
1339 | 1421 | trace_timer_expire_exit(timer); |
---|
1340 | 1422 | |
---|
1341 | 1423 | lock_map_release(&lockdep_map); |
---|
1342 | 1424 | |
---|
1343 | 1425 | if (count != preempt_count()) { |
---|
1344 | | - WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", |
---|
| 1426 | + WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n", |
---|
1345 | 1427 | fn, count, preempt_count()); |
---|
1346 | 1428 | /* |
---|
1347 | 1429 | * Restore the preempt count. That gives us a decent |
---|
.. | .. |
---|
1355 | 1437 | |
---|
1356 | 1438 | static void expire_timers(struct timer_base *base, struct hlist_head *head) |
---|
1357 | 1439 | { |
---|
| 1440 | + /* |
---|
| 1441 | + * This value is required only for tracing. base->clk was |
---|
| 1442 | + * incremented directly before expire_timers was called. But expiry |
---|
| 1443 | + * is related to the old base->clk value. |
---|
| 1444 | + */ |
---|
| 1445 | + unsigned long baseclk = base->clk - 1; |
---|
| 1446 | + |
---|
1358 | 1447 | while (!hlist_empty(head)) { |
---|
1359 | 1448 | struct timer_list *timer; |
---|
1360 | 1449 | void (*fn)(struct timer_list *); |
---|
.. | .. |
---|
1368 | 1457 | |
---|
1369 | 1458 | if (timer->flags & TIMER_IRQSAFE) { |
---|
1370 | 1459 | raw_spin_unlock(&base->lock); |
---|
1371 | | - call_timer_fn(timer, fn); |
---|
| 1460 | + call_timer_fn(timer, fn, baseclk); |
---|
1372 | 1461 | raw_spin_lock(&base->lock); |
---|
| 1462 | + base->running_timer = NULL; |
---|
1373 | 1463 | } else { |
---|
1374 | 1464 | raw_spin_unlock_irq(&base->lock); |
---|
1375 | | - call_timer_fn(timer, fn); |
---|
| 1465 | + call_timer_fn(timer, fn, baseclk); |
---|
1376 | 1466 | raw_spin_lock_irq(&base->lock); |
---|
| 1467 | + base->running_timer = NULL; |
---|
| 1468 | + timer_sync_wait_running(base); |
---|
1377 | 1469 | } |
---|
1378 | 1470 | } |
---|
1379 | 1471 | } |
---|
1380 | 1472 | |
---|
1381 | | -static int __collect_expired_timers(struct timer_base *base, |
---|
1382 | | - struct hlist_head *heads) |
---|
| 1473 | +static int collect_expired_timers(struct timer_base *base, |
---|
| 1474 | + struct hlist_head *heads) |
---|
1383 | 1475 | { |
---|
1384 | | - unsigned long clk = base->clk; |
---|
| 1476 | + unsigned long clk = base->clk = base->next_expiry; |
---|
1385 | 1477 | struct hlist_head *vec; |
---|
1386 | 1478 | int i, levels = 0; |
---|
1387 | 1479 | unsigned int idx; |
---|
.. | .. |
---|
1403 | 1495 | return levels; |
---|
1404 | 1496 | } |
---|
1405 | 1497 | |
---|
1406 | | -#ifdef CONFIG_NO_HZ_COMMON |
---|
1407 | 1498 | /* |
---|
1408 | 1499 | * Find the next pending bucket of a level. Search from level start (@offset) |
---|
1409 | 1500 | * + @clk upwards and if nothing there, search from start of the level |
---|
.. | .. |
---|
1436 | 1527 | clk = base->clk; |
---|
1437 | 1528 | for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { |
---|
1438 | 1529 | int pos = next_pending_bucket(base, offset, clk & LVL_MASK); |
---|
| 1530 | + unsigned long lvl_clk = clk & LVL_CLK_MASK; |
---|
1439 | 1531 | |
---|
1440 | 1532 | if (pos >= 0) { |
---|
1441 | 1533 | unsigned long tmp = clk + (unsigned long) pos; |
---|
.. | .. |
---|
1443 | 1535 | tmp <<= LVL_SHIFT(lvl); |
---|
1444 | 1536 | if (time_before(tmp, next)) |
---|
1445 | 1537 | next = tmp; |
---|
| 1538 | + |
---|
| 1539 | + /* |
---|
| 1540 | + * If the next expiration happens before we reach |
---|
| 1541 | + * the next level, no need to check further. |
---|
| 1542 | + */ |
---|
| 1543 | + if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK)) |
---|
| 1544 | + break; |
---|
1446 | 1545 | } |
---|
1447 | 1546 | /* |
---|
1448 | 1547 | * Clock for the next level. If the current level clock lower |
---|
.. | .. |
---|
1480 | 1579 | * So the simple check whether the lower bits of the current |
---|
1481 | 1580 | * level are 0 or not is sufficient for all cases. |
---|
1482 | 1581 | */ |
---|
1483 | | - adj = clk & LVL_CLK_MASK ? 1 : 0; |
---|
| 1582 | + adj = lvl_clk ? 1 : 0; |
---|
1484 | 1583 | clk >>= LVL_CLK_SHIFT; |
---|
1485 | 1584 | clk += adj; |
---|
1486 | 1585 | } |
---|
| 1586 | + |
---|
| 1587 | + base->next_expiry_recalc = false; |
---|
| 1588 | + base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); |
---|
| 1589 | + |
---|
1487 | 1590 | return next; |
---|
1488 | 1591 | } |
---|
1489 | 1592 | |
---|
| 1593 | +#ifdef CONFIG_NO_HZ_COMMON |
---|
1490 | 1594 | /* |
---|
1491 | 1595 | * Check, if the next hrtimer event is before the next timer wheel |
---|
1492 | 1596 | * event: |
---|
.. | .. |
---|
1533 | 1637 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); |
---|
1534 | 1638 | u64 expires = KTIME_MAX; |
---|
1535 | 1639 | unsigned long nextevt; |
---|
1536 | | - bool is_max_delta; |
---|
1537 | 1640 | |
---|
1538 | 1641 | /* |
---|
1539 | 1642 | * Pretend that there is no timer pending if the cpu is offline. |
---|
.. | .. |
---|
1543 | 1646 | return expires; |
---|
1544 | 1647 | |
---|
1545 | 1648 | raw_spin_lock(&base->lock); |
---|
1546 | | - nextevt = __next_timer_interrupt(base); |
---|
1547 | | - is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); |
---|
1548 | | - base->next_expiry = nextevt; |
---|
| 1649 | + if (base->next_expiry_recalc) |
---|
| 1650 | + base->next_expiry = __next_timer_interrupt(base); |
---|
| 1651 | + nextevt = base->next_expiry; |
---|
| 1652 | + |
---|
1549 | 1653 | /* |
---|
1550 | 1654 | * We have a fresh next event. Check whether we can forward the |
---|
1551 | 1655 | * base. We can only do that when @basej is past base->clk |
---|
.. | .. |
---|
1562 | 1666 | expires = basem; |
---|
1563 | 1667 | base->is_idle = false; |
---|
1564 | 1668 | } else { |
---|
1565 | | - if (!is_max_delta) |
---|
| 1669 | + if (base->timers_pending) |
---|
1566 | 1670 | expires = basem + (u64)(nextevt - basej) * TICK_NSEC; |
---|
1567 | 1671 | /* |
---|
1568 | 1672 | * If we expect to sleep more than a tick, mark the base idle. |
---|
.. | .. |
---|
1571 | 1675 | * logic is only maintained for the BASE_STD base, deferrable |
---|
1572 | 1676 | * timers may still see large granularity skew (by design). |
---|
1573 | 1677 | */ |
---|
1574 | | - if ((expires - basem) > TICK_NSEC) { |
---|
1575 | | - base->must_forward_clk = true; |
---|
| 1678 | + if ((expires - basem) > TICK_NSEC) |
---|
1576 | 1679 | base->is_idle = true; |
---|
1577 | | - } |
---|
1578 | 1680 | } |
---|
1579 | 1681 | raw_spin_unlock(&base->lock); |
---|
1580 | 1682 | |
---|
.. | .. |
---|
1598 | 1700 | */ |
---|
1599 | 1701 | base->is_idle = false; |
---|
1600 | 1702 | } |
---|
1601 | | - |
---|
1602 | | -static int collect_expired_timers(struct timer_base *base, |
---|
1603 | | - struct hlist_head *heads) |
---|
1604 | | -{ |
---|
1605 | | - unsigned long now = READ_ONCE(jiffies); |
---|
1606 | | - |
---|
1607 | | - /* |
---|
1608 | | - * NOHZ optimization. After a long idle sleep we need to forward the |
---|
1609 | | - * base to current jiffies. Avoid a loop by searching the bitfield for |
---|
1610 | | - * the next expiring timer. |
---|
1611 | | - */ |
---|
1612 | | - if ((long)(now - base->clk) > 2) { |
---|
1613 | | - unsigned long next = __next_timer_interrupt(base); |
---|
1614 | | - |
---|
1615 | | - /* |
---|
1616 | | - * If the next timer is ahead of time forward to current |
---|
1617 | | - * jiffies, otherwise forward to the next expiry time: |
---|
1618 | | - */ |
---|
1619 | | - if (time_after(next, now)) { |
---|
1620 | | - /* |
---|
1621 | | - * The call site will increment base->clk and then |
---|
1622 | | - * terminate the expiry loop immediately. |
---|
1623 | | - */ |
---|
1624 | | - base->clk = now; |
---|
1625 | | - return 0; |
---|
1626 | | - } |
---|
1627 | | - base->clk = next; |
---|
1628 | | - } |
---|
1629 | | - return __collect_expired_timers(base, heads); |
---|
1630 | | -} |
---|
1631 | | -#else |
---|
1632 | | -static inline int collect_expired_timers(struct timer_base *base, |
---|
1633 | | - struct hlist_head *heads) |
---|
1634 | | -{ |
---|
1635 | | - return __collect_expired_timers(base, heads); |
---|
1636 | | -} |
---|
1637 | 1703 | #endif |
---|
1638 | 1704 | |
---|
1639 | 1705 | /* |
---|
.. | .. |
---|
1644 | 1710 | { |
---|
1645 | 1711 | struct task_struct *p = current; |
---|
1646 | 1712 | |
---|
| 1713 | + PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0); |
---|
| 1714 | + |
---|
1647 | 1715 | /* Note: this timer irq context must be accounted for as well. */ |
---|
1648 | 1716 | account_process_tick(p, user_tick); |
---|
1649 | 1717 | run_local_timers(); |
---|
1650 | | - rcu_check_callbacks(user_tick); |
---|
| 1718 | + rcu_sched_clock_irq(user_tick); |
---|
1651 | 1719 | #ifdef CONFIG_IRQ_WORK |
---|
1652 | 1720 | if (in_irq()) |
---|
1653 | 1721 | irq_work_tick(); |
---|
1654 | 1722 | #endif |
---|
1655 | 1723 | scheduler_tick(); |
---|
1656 | 1724 | if (IS_ENABLED(CONFIG_POSIX_TIMERS)) |
---|
1657 | | - run_posix_cpu_timers(p); |
---|
| 1725 | + run_posix_cpu_timers(); |
---|
1658 | 1726 | } |
---|
1659 | 1727 | |
---|
1660 | 1728 | /** |
---|
.. | .. |
---|
1666 | 1734 | struct hlist_head heads[LVL_DEPTH]; |
---|
1667 | 1735 | int levels; |
---|
1668 | 1736 | |
---|
1669 | | - if (!time_after_eq(jiffies, base->clk)) |
---|
| 1737 | + if (time_before(jiffies, base->next_expiry)) |
---|
1670 | 1738 | return; |
---|
1671 | 1739 | |
---|
| 1740 | + timer_base_lock_expiry(base); |
---|
1672 | 1741 | raw_spin_lock_irq(&base->lock); |
---|
1673 | 1742 | |
---|
1674 | | - /* |
---|
1675 | | - * timer_base::must_forward_clk must be cleared before running |
---|
1676 | | - * timers so that any timer functions that call mod_timer() will |
---|
1677 | | - * not try to forward the base. Idle tracking / clock forwarding |
---|
1678 | | - * logic is only used with BASE_STD timers. |
---|
1679 | | - * |
---|
1680 | | - * The must_forward_clk flag is cleared unconditionally also for |
---|
1681 | | - * the deferrable base. The deferrable base is not affected by idle |
---|
1682 | | - * tracking and never forwarded, so clearing the flag is a NOOP. |
---|
1683 | | - * |
---|
1684 | | - * The fact that the deferrable base is never forwarded can cause |
---|
1685 | | - * large variations in granularity for deferrable timers, but they |
---|
1686 | | - * can be deferred for long periods due to idle anyway. |
---|
1687 | | - */ |
---|
1688 | | - base->must_forward_clk = false; |
---|
1689 | | - |
---|
1690 | | - while (time_after_eq(jiffies, base->clk)) { |
---|
1691 | | - |
---|
| 1743 | + while (time_after_eq(jiffies, base->clk) && |
---|
| 1744 | + time_after_eq(jiffies, base->next_expiry)) { |
---|
1692 | 1745 | levels = collect_expired_timers(base, heads); |
---|
| 1746 | + /* |
---|
| 1747 | + * The two possible reasons for not finding any expired |
---|
| 1748 | + * timer at this clk are that all matching timers have been |
---|
| 1749 | + * dequeued or no timer has been queued since |
---|
| 1750 | + * base::next_expiry was set to base::clk + |
---|
| 1751 | + * NEXT_TIMER_MAX_DELTA. |
---|
| 1752 | + */ |
---|
| 1753 | + WARN_ON_ONCE(!levels && !base->next_expiry_recalc |
---|
| 1754 | + && base->timers_pending); |
---|
1693 | 1755 | base->clk++; |
---|
| 1756 | + base->next_expiry = __next_timer_interrupt(base); |
---|
1694 | 1757 | |
---|
1695 | 1758 | while (levels--) |
---|
1696 | 1759 | expire_timers(base, heads + levels); |
---|
1697 | 1760 | } |
---|
1698 | | - base->running_timer = NULL; |
---|
1699 | 1761 | raw_spin_unlock_irq(&base->lock); |
---|
| 1762 | + timer_base_unlock_expiry(base); |
---|
1700 | 1763 | } |
---|
1701 | 1764 | |
---|
1702 | 1765 | /* |
---|
.. | .. |
---|
1720 | 1783 | |
---|
1721 | 1784 | hrtimer_run_queues(); |
---|
1722 | 1785 | /* Raise the softirq only if required. */ |
---|
1723 | | - if (time_before(jiffies, base->clk)) { |
---|
| 1786 | + if (time_before(jiffies, base->next_expiry)) { |
---|
1724 | 1787 | if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) |
---|
1725 | 1788 | return; |
---|
1726 | 1789 | /* CPU is awake, so check the deferrable base. */ |
---|
1727 | 1790 | base++; |
---|
1728 | | - if (time_before(jiffies, base->clk)) |
---|
| 1791 | + if (time_before(jiffies, base->next_expiry)) |
---|
1729 | 1792 | return; |
---|
1730 | 1793 | } |
---|
1731 | 1794 | raise_softirq(TIMER_SOFTIRQ); |
---|
.. | .. |
---|
1751 | 1814 | * schedule_timeout - sleep until timeout |
---|
1752 | 1815 | * @timeout: timeout value in jiffies |
---|
1753 | 1816 | * |
---|
1754 | | - * Make the current task sleep until @timeout jiffies have |
---|
1755 | | - * elapsed. The routine will return immediately unless |
---|
1756 | | - * the current task state has been set (see set_current_state()). |
---|
| 1817 | + * Make the current task sleep until @timeout jiffies have elapsed. |
---|
| 1818 | + * The function behavior depends on the current task state |
---|
| 1819 | + * (see also set_current_state() description): |
---|
1757 | 1820 | * |
---|
1758 | | - * You can set the task state as follows - |
---|
| 1821 | + * %TASK_RUNNING - the scheduler is called, but the task does not sleep |
---|
| 1822 | + * at all. That happens because sched_submit_work() does nothing for |
---|
| 1823 | + * tasks in %TASK_RUNNING state. |
---|
1759 | 1824 | * |
---|
1760 | 1825 | * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to |
---|
1761 | 1826 | * pass before the routine returns unless the current task is explicitly |
---|
1762 | | - * woken up, (e.g. by wake_up_process())". |
---|
| 1827 | + * woken up, (e.g. by wake_up_process()). |
---|
1763 | 1828 | * |
---|
1764 | 1829 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is |
---|
1765 | 1830 | * delivered to the current task or the current task is explicitly woken |
---|
1766 | 1831 | * up. |
---|
1767 | 1832 | * |
---|
1768 | | - * The current task state is guaranteed to be TASK_RUNNING when this |
---|
| 1833 | + * The current task state is guaranteed to be %TASK_RUNNING when this |
---|
1769 | 1834 | * routine returns. |
---|
1770 | 1835 | * |
---|
1771 | 1836 | * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule |
---|
.. | .. |
---|
1773 | 1838 | * value will be %MAX_SCHEDULE_TIMEOUT. |
---|
1774 | 1839 | * |
---|
1775 | 1840 | * Returns 0 when the timer has expired otherwise the remaining time in |
---|
1776 | | - * jiffies will be returned. In all cases the return value is guaranteed |
---|
| 1841 | + * jiffies will be returned. In all cases the return value is guaranteed |
---|
1777 | 1842 | * to be non-negative. |
---|
1778 | 1843 | */ |
---|
1779 | 1844 | signed long __sched schedule_timeout(signed long timeout) |
---|
.. | .. |
---|
1814 | 1879 | |
---|
1815 | 1880 | timer.task = current; |
---|
1816 | 1881 | timer_setup_on_stack(&timer.timer, process_timeout, 0); |
---|
1817 | | - __mod_timer(&timer.timer, expire, 0); |
---|
| 1882 | + __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING); |
---|
1818 | 1883 | schedule(); |
---|
1819 | 1884 | del_singleshot_timer_sync(&timer.timer); |
---|
1820 | 1885 | |
---|
.. | .. |
---|
1887 | 1952 | base = per_cpu_ptr(&timer_bases[b], cpu); |
---|
1888 | 1953 | base->clk = jiffies; |
---|
1889 | 1954 | base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; |
---|
| 1955 | + base->timers_pending = false; |
---|
1890 | 1956 | base->is_idle = false; |
---|
1891 | | - base->must_forward_clk = true; |
---|
1892 | 1957 | } |
---|
1893 | 1958 | return 0; |
---|
1894 | 1959 | } |
---|
.. | .. |
---|
1941 | 2006 | base->cpu = cpu; |
---|
1942 | 2007 | raw_spin_lock_init(&base->lock); |
---|
1943 | 2008 | base->clk = jiffies; |
---|
| 2009 | + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; |
---|
| 2010 | + timer_base_init_expiry_lock(base); |
---|
1944 | 2011 | } |
---|
1945 | 2012 | } |
---|
1946 | 2013 | |
---|
.. | .. |
---|
1955 | 2022 | void __init init_timers(void) |
---|
1956 | 2023 | { |
---|
1957 | 2024 | init_timer_cpus(); |
---|
| 2025 | + posix_cputimers_init_work(); |
---|
1958 | 2026 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); |
---|
1959 | 2027 | } |
---|
1960 | 2028 | |
---|
.. | .. |
---|
1988 | 2056 | EXPORT_SYMBOL(msleep_interruptible); |
---|
1989 | 2057 | |
---|
1990 | 2058 | /** |
---|
| 2059 | + * usleep_range_state - Sleep for an approximate time in a given state |
---|
| 2060 | + * @min: Minimum time in usecs to sleep |
---|
| 2061 | + * @max: Maximum time in usecs to sleep |
---|
| 2062 | + * @state: State of the current task that will be while sleeping |
---|
| 2063 | + * |
---|
| 2064 | + * In non-atomic context where the exact wakeup time is flexible, use |
---|
| 2065 | + * usleep_range_state() instead of udelay(). The sleep improves responsiveness |
---|
| 2066 | + * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces |
---|
| 2067 | + * power usage by allowing hrtimers to take advantage of an already- |
---|
| 2068 | + * scheduled interrupt instead of scheduling a new one just for this sleep. |
---|
| 2069 | + */ |
---|
| 2070 | +void __sched usleep_range_state(unsigned long min, unsigned long max, |
---|
| 2071 | + unsigned int state) |
---|
| 2072 | +{ |
---|
| 2073 | + ktime_t exp = ktime_add_us(ktime_get(), min); |
---|
| 2074 | + u64 delta = (u64)(max - min) * NSEC_PER_USEC; |
---|
| 2075 | + |
---|
| 2076 | + for (;;) { |
---|
| 2077 | + __set_current_state(state); |
---|
| 2078 | + /* Do not return before the requested sleep time has elapsed */ |
---|
| 2079 | + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) |
---|
| 2080 | + break; |
---|
| 2081 | + } |
---|
| 2082 | +} |
---|
| 2083 | + |
---|
| 2084 | +/** |
---|
1991 | 2085 | * usleep_range - Sleep for an approximate time |
---|
1992 | 2086 | * @min: Minimum time in usecs to sleep |
---|
1993 | 2087 | * @max: Maximum time in usecs to sleep |
---|
.. | .. |
---|
2000 | 2094 | */ |
---|
2001 | 2095 | void __sched usleep_range(unsigned long min, unsigned long max) |
---|
2002 | 2096 | { |
---|
2003 | | - ktime_t exp = ktime_add_us(ktime_get(), min); |
---|
2004 | | - u64 delta = (u64)(max - min) * NSEC_PER_USEC; |
---|
2005 | | - |
---|
2006 | | - for (;;) { |
---|
2007 | | - __set_current_state(TASK_UNINTERRUPTIBLE); |
---|
2008 | | - /* Do not return before the requested sleep time has elapsed */ |
---|
2009 | | - if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) |
---|
2010 | | - break; |
---|
2011 | | - } |
---|
| 2097 | + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); |
---|
2012 | 2098 | } |
---|
2013 | 2099 | EXPORT_SYMBOL(usleep_range); |
---|