| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
|---|
| 1 | 2 | /* |
|---|
| 2 | | - * linux/kernel/timer.c |
|---|
| 3 | | - * |
|---|
| 4 | 3 | * Kernel internal timers |
|---|
| 5 | 4 | * |
|---|
| 6 | 5 | * Copyright (C) 1991, 1992 Linus Torvalds |
|---|
| .. | .. |
|---|
| 56 | 55 | |
|---|
| 57 | 56 | #define CREATE_TRACE_POINTS |
|---|
| 58 | 57 | #include <trace/events/timer.h> |
|---|
| 58 | +#undef CREATE_TRACE_POINTS |
|---|
| 59 | +#include <trace/hooks/timer.h> |
|---|
| 60 | + |
|---|
| 61 | +EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_entry); |
|---|
| 62 | +EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_exit); |
|---|
| 59 | 63 | |
|---|
| 60 | 64 | __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; |
|---|
| 61 | 65 | |
|---|
| .. | .. |
|---|
| 158 | 162 | |
|---|
| 159 | 163 | /* |
|---|
| 160 | 164 | * The time start value for each level to select the bucket at enqueue |
|---|
| 161 | | - * time. |
|---|
| 165 | + * time. We start from the last possible delta of the previous level |
|---|
| 166 | + * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()). |
|---|
| 162 | 167 | */ |
|---|
| 163 | 168 | #define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT)) |
|---|
| 164 | 169 | |
|---|
| .. | .. |
|---|
| 198 | 203 | struct timer_base { |
|---|
| 199 | 204 | raw_spinlock_t lock; |
|---|
| 200 | 205 | struct timer_list *running_timer; |
|---|
| 206 | +#ifdef CONFIG_PREEMPT_RT |
|---|
| 201 | 207 | spinlock_t expiry_lock; |
|---|
| 208 | + atomic_t timer_waiters; |
|---|
| 209 | +#endif |
|---|
| 202 | 210 | unsigned long clk; |
|---|
| 203 | 211 | unsigned long next_expiry; |
|---|
| 204 | 212 | unsigned int cpu; |
|---|
| 213 | + bool next_expiry_recalc; |
|---|
| 205 | 214 | bool is_idle; |
|---|
| 206 | | - bool must_forward_clk; |
|---|
| 215 | + bool timers_pending; |
|---|
| 207 | 216 | DECLARE_BITMAP(pending_map, WHEEL_SIZE); |
|---|
| 208 | 217 | struct hlist_head vectors[WHEEL_SIZE]; |
|---|
| 209 | 218 | } ____cacheline_aligned; |
|---|
| .. | .. |
|---|
| 215 | 224 | static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); |
|---|
| 216 | 225 | static DEFINE_MUTEX(timer_keys_mutex); |
|---|
| 217 | 226 | |
|---|
| 218 | | -static struct swork_event timer_update_swork; |
|---|
| 227 | +static void timer_update_keys(struct work_struct *work); |
|---|
| 228 | +static DECLARE_WORK(timer_update_work, timer_update_keys); |
|---|
| 219 | 229 | |
|---|
| 220 | 230 | #ifdef CONFIG_SMP |
|---|
| 221 | 231 | unsigned int sysctl_timer_migration = 1; |
|---|
| .. | .. |
|---|
| 233 | 243 | static inline void timers_update_migration(void) { } |
|---|
| 234 | 244 | #endif /* !CONFIG_SMP */ |
|---|
| 235 | 245 | |
|---|
| 236 | | -static void timer_update_keys(struct swork_event *event) |
|---|
| 246 | +static void timer_update_keys(struct work_struct *work) |
|---|
| 237 | 247 | { |
|---|
| 238 | 248 | mutex_lock(&timer_keys_mutex); |
|---|
| 239 | 249 | timers_update_migration(); |
|---|
| .. | .. |
|---|
| 243 | 253 | |
|---|
| 244 | 254 | void timers_update_nohz(void) |
|---|
| 245 | 255 | { |
|---|
| 246 | | - swork_queue(&timer_update_swork); |
|---|
| 256 | + schedule_work(&timer_update_work); |
|---|
| 247 | 257 | } |
|---|
| 248 | | - |
|---|
| 249 | | -static __init int hrtimer_init_thread(void) |
|---|
| 250 | | -{ |
|---|
| 251 | | - WARN_ON(swork_get()); |
|---|
| 252 | | - INIT_SWORK(&timer_update_swork, timer_update_keys); |
|---|
| 253 | | - return 0; |
|---|
| 254 | | -} |
|---|
| 255 | | -early_initcall(hrtimer_init_thread); |
|---|
| 256 | 258 | |
|---|
| 257 | 259 | int timer_migration_handler(struct ctl_table *table, int write, |
|---|
| 258 | | - void __user *buffer, size_t *lenp, |
|---|
| 259 | | - loff_t *ppos) |
|---|
| 260 | + void *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 260 | 261 | { |
|---|
| 261 | 262 | int ret; |
|---|
| 262 | 263 | |
|---|
| .. | .. |
|---|
| 494 | 495 | * Helper function to calculate the array index for a given expiry |
|---|
| 495 | 496 | * time. |
|---|
| 496 | 497 | */ |
|---|
| 497 | | -static inline unsigned calc_index(unsigned expires, unsigned lvl) |
|---|
| 498 | +static inline unsigned calc_index(unsigned long expires, unsigned lvl, |
|---|
| 499 | + unsigned long *bucket_expiry) |
|---|
| 498 | 500 | { |
|---|
| 501 | + |
|---|
| 502 | + /* |
|---|
| 503 | + * The timer wheel has to guarantee that a timer does not fire |
|---|
| 504 | + * early. Early expiry can happen due to: |
|---|
| 505 | + * - Timer is armed at the edge of a tick |
|---|
| 506 | + * - Truncation of the expiry time in the outer wheel levels |
|---|
| 507 | + * |
|---|
| 508 | + * Round up with level granularity to prevent this. |
|---|
| 509 | + */ |
|---|
| 510 | + trace_android_vh_timer_calc_index(lvl, &expires); |
|---|
| 499 | 511 | expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); |
|---|
| 512 | + *bucket_expiry = expires << LVL_SHIFT(lvl); |
|---|
| 500 | 513 | return LVL_OFFS(lvl) + (expires & LVL_MASK); |
|---|
| 501 | 514 | } |
|---|
| 502 | 515 | |
|---|
| 503 | | -static int calc_wheel_index(unsigned long expires, unsigned long clk) |
|---|
| 516 | +static int calc_wheel_index(unsigned long expires, unsigned long clk, |
|---|
| 517 | + unsigned long *bucket_expiry) |
|---|
| 504 | 518 | { |
|---|
| 505 | 519 | unsigned long delta = expires - clk; |
|---|
| 506 | 520 | unsigned int idx; |
|---|
| 507 | 521 | |
|---|
| 508 | 522 | if (delta < LVL_START(1)) { |
|---|
| 509 | | - idx = calc_index(expires, 0); |
|---|
| 523 | + idx = calc_index(expires, 0, bucket_expiry); |
|---|
| 510 | 524 | } else if (delta < LVL_START(2)) { |
|---|
| 511 | | - idx = calc_index(expires, 1); |
|---|
| 525 | + idx = calc_index(expires, 1, bucket_expiry); |
|---|
| 512 | 526 | } else if (delta < LVL_START(3)) { |
|---|
| 513 | | - idx = calc_index(expires, 2); |
|---|
| 527 | + idx = calc_index(expires, 2, bucket_expiry); |
|---|
| 514 | 528 | } else if (delta < LVL_START(4)) { |
|---|
| 515 | | - idx = calc_index(expires, 3); |
|---|
| 529 | + idx = calc_index(expires, 3, bucket_expiry); |
|---|
| 516 | 530 | } else if (delta < LVL_START(5)) { |
|---|
| 517 | | - idx = calc_index(expires, 4); |
|---|
| 531 | + idx = calc_index(expires, 4, bucket_expiry); |
|---|
| 518 | 532 | } else if (delta < LVL_START(6)) { |
|---|
| 519 | | - idx = calc_index(expires, 5); |
|---|
| 533 | + idx = calc_index(expires, 5, bucket_expiry); |
|---|
| 520 | 534 | } else if (delta < LVL_START(7)) { |
|---|
| 521 | | - idx = calc_index(expires, 6); |
|---|
| 535 | + idx = calc_index(expires, 6, bucket_expiry); |
|---|
| 522 | 536 | } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { |
|---|
| 523 | | - idx = calc_index(expires, 7); |
|---|
| 537 | + idx = calc_index(expires, 7, bucket_expiry); |
|---|
| 524 | 538 | } else if ((long) delta < 0) { |
|---|
| 525 | 539 | idx = clk & LVL_MASK; |
|---|
| 540 | + *bucket_expiry = clk; |
|---|
| 526 | 541 | } else { |
|---|
| 527 | 542 | /* |
|---|
| 528 | 543 | * Force expire obscene large timeouts to expire at the |
|---|
| .. | .. |
|---|
| 531 | 546 | if (delta >= WHEEL_TIMEOUT_CUTOFF) |
|---|
| 532 | 547 | expires = clk + WHEEL_TIMEOUT_MAX; |
|---|
| 533 | 548 | |
|---|
| 534 | | - idx = calc_index(expires, LVL_DEPTH - 1); |
|---|
| 549 | + idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry); |
|---|
| 535 | 550 | } |
|---|
| 536 | 551 | return idx; |
|---|
| 537 | | -} |
|---|
| 538 | | - |
|---|
| 539 | | -/* |
|---|
| 540 | | - * Enqueue the timer into the hash bucket, mark it pending in |
|---|
| 541 | | - * the bitmap and store the index in the timer flags. |
|---|
| 542 | | - */ |
|---|
| 543 | | -static void enqueue_timer(struct timer_base *base, struct timer_list *timer, |
|---|
| 544 | | - unsigned int idx) |
|---|
| 545 | | -{ |
|---|
| 546 | | - hlist_add_head(&timer->entry, base->vectors + idx); |
|---|
| 547 | | - __set_bit(idx, base->pending_map); |
|---|
| 548 | | - timer_set_idx(timer, idx); |
|---|
| 549 | | -} |
|---|
| 550 | | - |
|---|
| 551 | | -static void |
|---|
| 552 | | -__internal_add_timer(struct timer_base *base, struct timer_list *timer) |
|---|
| 553 | | -{ |
|---|
| 554 | | - unsigned int idx; |
|---|
| 555 | | - |
|---|
| 556 | | - idx = calc_wheel_index(timer->expires, base->clk); |
|---|
| 557 | | - enqueue_timer(base, timer, idx); |
|---|
| 558 | 552 | } |
|---|
| 559 | 553 | |
|---|
| 560 | 554 | static void |
|---|
| .. | .. |
|---|
| 578 | 572 | * timer is not deferrable. If the other CPU is on the way to idle |
|---|
| 579 | 573 | * then it can't set base->is_idle as we hold the base lock: |
|---|
| 580 | 574 | */ |
|---|
| 581 | | - if (!base->is_idle) |
|---|
| 582 | | - return; |
|---|
| 583 | | - |
|---|
| 584 | | - /* Check whether this is the new first expiring timer: */ |
|---|
| 585 | | - if (time_after_eq(timer->expires, base->next_expiry)) |
|---|
| 586 | | - return; |
|---|
| 587 | | - |
|---|
| 588 | | - /* |
|---|
| 589 | | - * Set the next expiry time and kick the CPU so it can reevaluate the |
|---|
| 590 | | - * wheel: |
|---|
| 591 | | - */ |
|---|
| 592 | | - if (time_before(timer->expires, base->clk)) { |
|---|
| 593 | | - /* |
|---|
| 594 | | - * Prevent from forward_timer_base() moving the base->clk |
|---|
| 595 | | - * backward |
|---|
| 596 | | - */ |
|---|
| 597 | | - base->next_expiry = base->clk; |
|---|
| 598 | | - } else { |
|---|
| 599 | | - base->next_expiry = timer->expires; |
|---|
| 600 | | - } |
|---|
| 601 | | - wake_up_nohz_cpu(base->cpu); |
|---|
| 575 | + if (base->is_idle) |
|---|
| 576 | + wake_up_nohz_cpu(base->cpu); |
|---|
| 602 | 577 | } |
|---|
| 603 | 578 | |
|---|
| 604 | | -static void |
|---|
| 605 | | -internal_add_timer(struct timer_base *base, struct timer_list *timer) |
|---|
| 579 | +/* |
|---|
| 580 | + * Enqueue the timer into the hash bucket, mark it pending in |
|---|
| 581 | + * the bitmap, store the index in the timer flags then wake up |
|---|
| 582 | + * the target CPU if needed. |
|---|
| 583 | + */ |
|---|
| 584 | +static void enqueue_timer(struct timer_base *base, struct timer_list *timer, |
|---|
| 585 | + unsigned int idx, unsigned long bucket_expiry) |
|---|
| 606 | 586 | { |
|---|
| 607 | | - __internal_add_timer(base, timer); |
|---|
| 608 | | - trigger_dyntick_cpu(base, timer); |
|---|
| 587 | + |
|---|
| 588 | + hlist_add_head(&timer->entry, base->vectors + idx); |
|---|
| 589 | + __set_bit(idx, base->pending_map); |
|---|
| 590 | + timer_set_idx(timer, idx); |
|---|
| 591 | + |
|---|
| 592 | + trace_timer_start(timer, timer->expires, timer->flags); |
|---|
| 593 | + |
|---|
| 594 | + /* |
|---|
| 595 | + * Check whether this is the new first expiring timer. The |
|---|
| 596 | + * effective expiry time of the timer is required here |
|---|
| 597 | + * (bucket_expiry) instead of timer->expires. |
|---|
| 598 | + */ |
|---|
| 599 | + if (time_before(bucket_expiry, base->next_expiry)) { |
|---|
| 600 | + /* |
|---|
| 601 | + * Set the next expiry time and kick the CPU so it |
|---|
| 602 | + * can reevaluate the wheel: |
|---|
| 603 | + */ |
|---|
| 604 | + base->next_expiry = bucket_expiry; |
|---|
| 605 | + base->timers_pending = true; |
|---|
| 606 | + base->next_expiry_recalc = false; |
|---|
| 607 | + trigger_dyntick_cpu(base, timer); |
|---|
| 608 | + } |
|---|
| 609 | +} |
|---|
| 610 | + |
|---|
| 611 | +static void internal_add_timer(struct timer_base *base, struct timer_list *timer) |
|---|
| 612 | +{ |
|---|
| 613 | + unsigned long bucket_expiry; |
|---|
| 614 | + unsigned int idx; |
|---|
| 615 | + |
|---|
| 616 | + idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry); |
|---|
| 617 | + enqueue_timer(base, timer, idx, bucket_expiry); |
|---|
| 609 | 618 | } |
|---|
| 610 | 619 | |
|---|
| 611 | 620 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS |
|---|
| 612 | 621 | |
|---|
| 613 | | -static struct debug_obj_descr timer_debug_descr; |
|---|
| 622 | +static const struct debug_obj_descr timer_debug_descr; |
|---|
| 614 | 623 | |
|---|
| 615 | 624 | static void *timer_debug_hint(void *addr) |
|---|
| 616 | 625 | { |
|---|
| .. | .. |
|---|
| 665 | 674 | |
|---|
| 666 | 675 | case ODEBUG_STATE_ACTIVE: |
|---|
| 667 | 676 | WARN_ON(1); |
|---|
| 668 | | - |
|---|
| 677 | + fallthrough; |
|---|
| 669 | 678 | default: |
|---|
| 670 | 679 | return false; |
|---|
| 671 | 680 | } |
|---|
| .. | .. |
|---|
| 706 | 715 | } |
|---|
| 707 | 716 | } |
|---|
| 708 | 717 | |
|---|
| 709 | | -static struct debug_obj_descr timer_debug_descr = { |
|---|
| 718 | +static const struct debug_obj_descr timer_debug_descr = { |
|---|
| 710 | 719 | .name = "timer_list", |
|---|
| 711 | 720 | .debug_hint = timer_debug_hint, |
|---|
| 712 | 721 | .is_static_object = timer_is_static_object, |
|---|
| .. | .. |
|---|
| 729 | 738 | static inline void debug_timer_deactivate(struct timer_list *timer) |
|---|
| 730 | 739 | { |
|---|
| 731 | 740 | debug_object_deactivate(timer, &timer_debug_descr); |
|---|
| 732 | | -} |
|---|
| 733 | | - |
|---|
| 734 | | -static inline void debug_timer_free(struct timer_list *timer) |
|---|
| 735 | | -{ |
|---|
| 736 | | - debug_object_free(timer, &timer_debug_descr); |
|---|
| 737 | 741 | } |
|---|
| 738 | 742 | |
|---|
| 739 | 743 | static inline void debug_timer_assert_init(struct timer_list *timer) |
|---|
| .. | .. |
|---|
| 775 | 779 | trace_timer_init(timer); |
|---|
| 776 | 780 | } |
|---|
| 777 | 781 | |
|---|
| 778 | | -static inline void |
|---|
| 779 | | -debug_activate(struct timer_list *timer, unsigned long expires) |
|---|
| 780 | | -{ |
|---|
| 781 | | - debug_timer_activate(timer); |
|---|
| 782 | | - trace_timer_start(timer, expires, timer->flags); |
|---|
| 783 | | -} |
|---|
| 784 | | - |
|---|
| 785 | 782 | static inline void debug_deactivate(struct timer_list *timer) |
|---|
| 786 | 783 | { |
|---|
| 787 | 784 | debug_timer_deactivate(timer); |
|---|
| .. | .. |
|---|
| 800 | 797 | { |
|---|
| 801 | 798 | timer->entry.pprev = NULL; |
|---|
| 802 | 799 | timer->function = func; |
|---|
| 800 | + if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS)) |
|---|
| 801 | + flags &= TIMER_INIT_FLAGS; |
|---|
| 803 | 802 | timer->flags = flags | raw_smp_processor_id(); |
|---|
| 804 | 803 | lockdep_init_map(&timer->lockdep_map, name, key, 0); |
|---|
| 805 | 804 | } |
|---|
| .. | .. |
|---|
| 845 | 844 | if (!timer_pending(timer)) |
|---|
| 846 | 845 | return 0; |
|---|
| 847 | 846 | |
|---|
| 848 | | - if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) |
|---|
| 847 | + if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) { |
|---|
| 849 | 848 | __clear_bit(idx, base->pending_map); |
|---|
| 849 | + base->next_expiry_recalc = true; |
|---|
| 850 | + } |
|---|
| 850 | 851 | |
|---|
| 851 | 852 | detach_timer(timer, clear_pending); |
|---|
| 852 | 853 | return 1; |
|---|
| .. | .. |
|---|
| 896 | 897 | |
|---|
| 897 | 898 | static inline void forward_timer_base(struct timer_base *base) |
|---|
| 898 | 899 | { |
|---|
| 899 | | -#ifdef CONFIG_NO_HZ_COMMON |
|---|
| 900 | | - unsigned long jnow; |
|---|
| 900 | + unsigned long jnow = READ_ONCE(jiffies); |
|---|
| 901 | 901 | |
|---|
| 902 | 902 | /* |
|---|
| 903 | | - * We only forward the base when we are idle or have just come out of |
|---|
| 904 | | - * idle (must_forward_clk logic), and have a delta between base clock |
|---|
| 905 | | - * and jiffies. In the common case, run_timers will take care of it. |
|---|
| 903 | + * No need to forward if we are close enough below jiffies. |
|---|
| 904 | + * Also while executing timers, base->clk is 1 offset ahead |
|---|
| 905 | + * of jiffies to avoid endless requeuing to current jffies. |
|---|
| 906 | 906 | */ |
|---|
| 907 | | - if (likely(!base->must_forward_clk)) |
|---|
| 908 | | - return; |
|---|
| 909 | | - |
|---|
| 910 | | - jnow = READ_ONCE(jiffies); |
|---|
| 911 | | - base->must_forward_clk = base->is_idle; |
|---|
| 912 | | - if ((long)(jnow - base->clk) < 2) |
|---|
| 907 | + if ((long)(jnow - base->clk) < 1) |
|---|
| 913 | 908 | return; |
|---|
| 914 | 909 | |
|---|
| 915 | 910 | /* |
|---|
| .. | .. |
|---|
| 923 | 918 | return; |
|---|
| 924 | 919 | base->clk = base->next_expiry; |
|---|
| 925 | 920 | } |
|---|
| 926 | | -#endif |
|---|
| 927 | 921 | } |
|---|
| 928 | 922 | |
|---|
| 929 | 923 | |
|---|
| .. | .. |
|---|
| 966 | 960 | |
|---|
| 967 | 961 | #define MOD_TIMER_PENDING_ONLY 0x01 |
|---|
| 968 | 962 | #define MOD_TIMER_REDUCE 0x02 |
|---|
| 963 | +#define MOD_TIMER_NOTPENDING 0x04 |
|---|
| 969 | 964 | |
|---|
| 970 | 965 | static inline int |
|---|
| 971 | 966 | __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) |
|---|
| 972 | 967 | { |
|---|
| 968 | + unsigned long clk = 0, flags, bucket_expiry; |
|---|
| 973 | 969 | struct timer_base *base, *new_base; |
|---|
| 974 | 970 | unsigned int idx = UINT_MAX; |
|---|
| 975 | | - unsigned long clk = 0, flags; |
|---|
| 976 | 971 | int ret = 0; |
|---|
| 977 | 972 | |
|---|
| 978 | 973 | BUG_ON(!timer->function); |
|---|
| .. | .. |
|---|
| 982 | 977 | * the timer is re-modified to have the same timeout or ends up in the |
|---|
| 983 | 978 | * same array bucket then just return: |
|---|
| 984 | 979 | */ |
|---|
| 985 | | - if (timer_pending(timer)) { |
|---|
| 980 | + if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) { |
|---|
| 986 | 981 | /* |
|---|
| 987 | 982 | * The downside of this optimization is that it can result in |
|---|
| 988 | 983 | * larger granularity than you would get from adding a new |
|---|
| .. | .. |
|---|
| 1011 | 1006 | } |
|---|
| 1012 | 1007 | |
|---|
| 1013 | 1008 | clk = base->clk; |
|---|
| 1014 | | - idx = calc_wheel_index(expires, clk); |
|---|
| 1009 | + idx = calc_wheel_index(expires, clk, &bucket_expiry); |
|---|
| 1015 | 1010 | |
|---|
| 1016 | 1011 | /* |
|---|
| 1017 | 1012 | * Retrieve and compare the array index of the pending |
|---|
| .. | .. |
|---|
| 1058 | 1053 | } |
|---|
| 1059 | 1054 | } |
|---|
| 1060 | 1055 | |
|---|
| 1061 | | - debug_activate(timer, expires); |
|---|
| 1056 | + debug_timer_activate(timer); |
|---|
| 1062 | 1057 | |
|---|
| 1063 | 1058 | timer->expires = expires; |
|---|
| 1064 | 1059 | /* |
|---|
| 1065 | 1060 | * If 'idx' was calculated above and the base time did not advance |
|---|
| 1066 | 1061 | * between calculating 'idx' and possibly switching the base, only |
|---|
| 1067 | | - * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise |
|---|
| 1068 | | - * we need to (re)calculate the wheel index via |
|---|
| 1069 | | - * internal_add_timer(). |
|---|
| 1062 | + * enqueue_timer() is required. Otherwise we need to (re)calculate |
|---|
| 1063 | + * the wheel index via internal_add_timer(). |
|---|
| 1070 | 1064 | */ |
|---|
| 1071 | | - if (idx != UINT_MAX && clk == base->clk) { |
|---|
| 1072 | | - enqueue_timer(base, timer, idx); |
|---|
| 1073 | | - trigger_dyntick_cpu(base, timer); |
|---|
| 1074 | | - } else { |
|---|
| 1065 | + if (idx != UINT_MAX && clk == base->clk) |
|---|
| 1066 | + enqueue_timer(base, timer, idx, bucket_expiry); |
|---|
| 1067 | + else |
|---|
| 1075 | 1068 | internal_add_timer(base, timer); |
|---|
| 1076 | | - } |
|---|
| 1077 | 1069 | |
|---|
| 1078 | 1070 | out_unlock: |
|---|
| 1079 | 1071 | raw_spin_unlock_irqrestore(&base->lock, flags); |
|---|
| .. | .. |
|---|
| 1155 | 1147 | void add_timer(struct timer_list *timer) |
|---|
| 1156 | 1148 | { |
|---|
| 1157 | 1149 | BUG_ON(timer_pending(timer)); |
|---|
| 1158 | | - mod_timer(timer, timer->expires); |
|---|
| 1150 | + __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING); |
|---|
| 1159 | 1151 | } |
|---|
| 1160 | 1152 | EXPORT_SYMBOL(add_timer); |
|---|
| 1161 | 1153 | |
|---|
| .. | .. |
|---|
| 1192 | 1184 | } |
|---|
| 1193 | 1185 | forward_timer_base(base); |
|---|
| 1194 | 1186 | |
|---|
| 1195 | | - debug_activate(timer, timer->expires); |
|---|
| 1187 | + debug_timer_activate(timer); |
|---|
| 1196 | 1188 | internal_add_timer(base, timer); |
|---|
| 1197 | 1189 | raw_spin_unlock_irqrestore(&base->lock, flags); |
|---|
| 1198 | 1190 | } |
|---|
| .. | .. |
|---|
| 1227 | 1219 | } |
|---|
| 1228 | 1220 | EXPORT_SYMBOL(del_timer); |
|---|
| 1229 | 1221 | |
|---|
| 1230 | | -static int __try_to_del_timer_sync(struct timer_list *timer, |
|---|
| 1231 | | - struct timer_base **basep) |
|---|
| 1232 | | -{ |
|---|
| 1233 | | - struct timer_base *base; |
|---|
| 1234 | | - unsigned long flags; |
|---|
| 1235 | | - int ret = -1; |
|---|
| 1236 | | - |
|---|
| 1237 | | - debug_assert_init(timer); |
|---|
| 1238 | | - |
|---|
| 1239 | | - *basep = base = lock_timer_base(timer, &flags); |
|---|
| 1240 | | - |
|---|
| 1241 | | - if (base->running_timer != timer) |
|---|
| 1242 | | - ret = detach_if_pending(timer, base, true); |
|---|
| 1243 | | - |
|---|
| 1244 | | - raw_spin_unlock_irqrestore(&base->lock, flags); |
|---|
| 1245 | | - |
|---|
| 1246 | | - return ret; |
|---|
| 1247 | | -} |
|---|
| 1248 | | - |
|---|
| 1249 | 1222 | /** |
|---|
| 1250 | 1223 | * try_to_del_timer_sync - Try to deactivate a timer |
|---|
| 1251 | 1224 | * @timer: timer to delete |
|---|
| .. | .. |
|---|
| 1256 | 1229 | int try_to_del_timer_sync(struct timer_list *timer) |
|---|
| 1257 | 1230 | { |
|---|
| 1258 | 1231 | struct timer_base *base; |
|---|
| 1232 | + unsigned long flags; |
|---|
| 1233 | + int ret = -1; |
|---|
| 1259 | 1234 | |
|---|
| 1260 | | - return __try_to_del_timer_sync(timer, &base); |
|---|
| 1235 | + debug_assert_init(timer); |
|---|
| 1236 | + |
|---|
| 1237 | + base = lock_timer_base(timer, &flags); |
|---|
| 1238 | + |
|---|
| 1239 | + if (base->running_timer != timer) |
|---|
| 1240 | + ret = detach_if_pending(timer, base, true); |
|---|
| 1241 | + |
|---|
| 1242 | + raw_spin_unlock_irqrestore(&base->lock, flags); |
|---|
| 1243 | + |
|---|
| 1244 | + return ret; |
|---|
| 1261 | 1245 | } |
|---|
| 1262 | 1246 | EXPORT_SYMBOL(try_to_del_timer_sync); |
|---|
| 1263 | 1247 | |
|---|
| 1264 | | -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) |
|---|
| 1265 | | -static int __del_timer_sync(struct timer_list *timer) |
|---|
| 1248 | +#ifdef CONFIG_PREEMPT_RT |
|---|
| 1249 | +static __init void timer_base_init_expiry_lock(struct timer_base *base) |
|---|
| 1266 | 1250 | { |
|---|
| 1267 | | - struct timer_base *base; |
|---|
| 1268 | | - int ret; |
|---|
| 1251 | + spin_lock_init(&base->expiry_lock); |
|---|
| 1252 | +} |
|---|
| 1269 | 1253 | |
|---|
| 1270 | | - for (;;) { |
|---|
| 1271 | | - ret = __try_to_del_timer_sync(timer, &base); |
|---|
| 1272 | | - if (ret >= 0) |
|---|
| 1273 | | - return ret; |
|---|
| 1254 | +static inline void timer_base_lock_expiry(struct timer_base *base) |
|---|
| 1255 | +{ |
|---|
| 1256 | + spin_lock(&base->expiry_lock); |
|---|
| 1257 | +} |
|---|
| 1274 | 1258 | |
|---|
| 1275 | | - /* |
|---|
| 1276 | | - * When accessing the lock, timers of base are no longer expired |
|---|
| 1277 | | - * and so timer is no longer running. |
|---|
| 1278 | | - */ |
|---|
| 1279 | | - spin_lock(&base->expiry_lock); |
|---|
| 1259 | +static inline void timer_base_unlock_expiry(struct timer_base *base) |
|---|
| 1260 | +{ |
|---|
| 1261 | + spin_unlock(&base->expiry_lock); |
|---|
| 1262 | +} |
|---|
| 1263 | + |
|---|
| 1264 | +/* |
|---|
| 1265 | + * The counterpart to del_timer_wait_running(). |
|---|
| 1266 | + * |
|---|
| 1267 | + * If there is a waiter for base->expiry_lock, then it was waiting for the |
|---|
| 1268 | + * timer callback to finish. Drop expiry_lock and reaquire it. That allows |
|---|
| 1269 | + * the waiter to acquire the lock and make progress. |
|---|
| 1270 | + */ |
|---|
| 1271 | +static void timer_sync_wait_running(struct timer_base *base) |
|---|
| 1272 | +{ |
|---|
| 1273 | + if (atomic_read(&base->timer_waiters)) { |
|---|
| 1274 | + raw_spin_unlock_irq(&base->lock); |
|---|
| 1280 | 1275 | spin_unlock(&base->expiry_lock); |
|---|
| 1276 | + spin_lock(&base->expiry_lock); |
|---|
| 1277 | + raw_spin_lock_irq(&base->lock); |
|---|
| 1281 | 1278 | } |
|---|
| 1282 | 1279 | } |
|---|
| 1283 | 1280 | |
|---|
| 1281 | +/* |
|---|
| 1282 | + * This function is called on PREEMPT_RT kernels when the fast path |
|---|
| 1283 | + * deletion of a timer failed because the timer callback function was |
|---|
| 1284 | + * running. |
|---|
| 1285 | + * |
|---|
| 1286 | + * This prevents priority inversion, if the softirq thread on a remote CPU |
|---|
| 1287 | + * got preempted, and it prevents a life lock when the task which tries to |
|---|
| 1288 | + * delete a timer preempted the softirq thread running the timer callback |
|---|
| 1289 | + * function. |
|---|
| 1290 | + */ |
|---|
| 1291 | +static void del_timer_wait_running(struct timer_list *timer) |
|---|
| 1292 | +{ |
|---|
| 1293 | + u32 tf; |
|---|
| 1294 | + |
|---|
| 1295 | + tf = READ_ONCE(timer->flags); |
|---|
| 1296 | + if (!(tf & TIMER_MIGRATING)) { |
|---|
| 1297 | + struct timer_base *base = get_timer_base(tf); |
|---|
| 1298 | + |
|---|
| 1299 | + /* |
|---|
| 1300 | + * Mark the base as contended and grab the expiry lock, |
|---|
| 1301 | + * which is held by the softirq across the timer |
|---|
| 1302 | + * callback. Drop the lock immediately so the softirq can |
|---|
| 1303 | + * expire the next timer. In theory the timer could already |
|---|
| 1304 | + * be running again, but that's more than unlikely and just |
|---|
| 1305 | + * causes another wait loop. |
|---|
| 1306 | + */ |
|---|
| 1307 | + atomic_inc(&base->timer_waiters); |
|---|
| 1308 | + spin_lock_bh(&base->expiry_lock); |
|---|
| 1309 | + atomic_dec(&base->timer_waiters); |
|---|
| 1310 | + spin_unlock_bh(&base->expiry_lock); |
|---|
| 1311 | + } |
|---|
| 1312 | +} |
|---|
| 1313 | +#else |
|---|
| 1314 | +static inline void timer_base_init_expiry_lock(struct timer_base *base) { } |
|---|
| 1315 | +static inline void timer_base_lock_expiry(struct timer_base *base) { } |
|---|
| 1316 | +static inline void timer_base_unlock_expiry(struct timer_base *base) { } |
|---|
| 1317 | +static inline void timer_sync_wait_running(struct timer_base *base) { } |
|---|
| 1318 | +static inline void del_timer_wait_running(struct timer_list *timer) { } |
|---|
| 1319 | +#endif |
|---|
| 1320 | + |
|---|
| 1321 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) |
|---|
| 1284 | 1322 | /** |
|---|
| 1285 | 1323 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
|---|
| 1286 | 1324 | * @timer: the timer to be deactivated |
|---|
| .. | .. |
|---|
| 1319 | 1357 | */ |
|---|
| 1320 | 1358 | int del_timer_sync(struct timer_list *timer) |
|---|
| 1321 | 1359 | { |
|---|
| 1360 | + int ret; |
|---|
| 1361 | + |
|---|
| 1322 | 1362 | #ifdef CONFIG_LOCKDEP |
|---|
| 1323 | 1363 | unsigned long flags; |
|---|
| 1324 | 1364 | |
|---|
| .. | .. |
|---|
| 1337 | 1377 | */ |
|---|
| 1338 | 1378 | WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); |
|---|
| 1339 | 1379 | |
|---|
| 1340 | | - return __del_timer_sync(timer); |
|---|
| 1380 | + do { |
|---|
| 1381 | + ret = try_to_del_timer_sync(timer); |
|---|
| 1382 | + |
|---|
| 1383 | + if (unlikely(ret < 0)) { |
|---|
| 1384 | + del_timer_wait_running(timer); |
|---|
| 1385 | + cpu_relax(); |
|---|
| 1386 | + } |
|---|
| 1387 | + } while (ret < 0); |
|---|
| 1388 | + |
|---|
| 1389 | + return ret; |
|---|
| 1341 | 1390 | } |
|---|
| 1342 | 1391 | EXPORT_SYMBOL(del_timer_sync); |
|---|
| 1343 | 1392 | #endif |
|---|
| 1344 | 1393 | |
|---|
| 1345 | | -static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *)) |
|---|
| 1394 | +static void call_timer_fn(struct timer_list *timer, |
|---|
| 1395 | + void (*fn)(struct timer_list *), |
|---|
| 1396 | + unsigned long baseclk) |
|---|
| 1346 | 1397 | { |
|---|
| 1347 | 1398 | int count = preempt_count(); |
|---|
| 1348 | 1399 | |
|---|
| .. | .. |
|---|
| 1365 | 1416 | */ |
|---|
| 1366 | 1417 | lock_map_acquire(&lockdep_map); |
|---|
| 1367 | 1418 | |
|---|
| 1368 | | - trace_timer_expire_entry(timer); |
|---|
| 1419 | + trace_timer_expire_entry(timer, baseclk); |
|---|
| 1369 | 1420 | fn(timer); |
|---|
| 1370 | 1421 | trace_timer_expire_exit(timer); |
|---|
| 1371 | 1422 | |
|---|
| 1372 | 1423 | lock_map_release(&lockdep_map); |
|---|
| 1373 | 1424 | |
|---|
| 1374 | 1425 | if (count != preempt_count()) { |
|---|
| 1375 | | - WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", |
|---|
| 1426 | + WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n", |
|---|
| 1376 | 1427 | fn, count, preempt_count()); |
|---|
| 1377 | 1428 | /* |
|---|
| 1378 | 1429 | * Restore the preempt count. That gives us a decent |
|---|
| .. | .. |
|---|
| 1386 | 1437 | |
|---|
| 1387 | 1438 | static void expire_timers(struct timer_base *base, struct hlist_head *head) |
|---|
| 1388 | 1439 | { |
|---|
| 1440 | + /* |
|---|
| 1441 | + * This value is required only for tracing. base->clk was |
|---|
| 1442 | + * incremented directly before expire_timers was called. But expiry |
|---|
| 1443 | + * is related to the old base->clk value. |
|---|
| 1444 | + */ |
|---|
| 1445 | + unsigned long baseclk = base->clk - 1; |
|---|
| 1446 | + |
|---|
| 1389 | 1447 | while (!hlist_empty(head)) { |
|---|
| 1390 | 1448 | struct timer_list *timer; |
|---|
| 1391 | 1449 | void (*fn)(struct timer_list *); |
|---|
| .. | .. |
|---|
| 1399 | 1457 | |
|---|
| 1400 | 1458 | if (timer->flags & TIMER_IRQSAFE) { |
|---|
| 1401 | 1459 | raw_spin_unlock(&base->lock); |
|---|
| 1402 | | - call_timer_fn(timer, fn); |
|---|
| 1403 | | - base->running_timer = NULL; |
|---|
| 1404 | | - spin_unlock(&base->expiry_lock); |
|---|
| 1405 | | - spin_lock(&base->expiry_lock); |
|---|
| 1460 | + call_timer_fn(timer, fn, baseclk); |
|---|
| 1406 | 1461 | raw_spin_lock(&base->lock); |
|---|
| 1462 | + base->running_timer = NULL; |
|---|
| 1407 | 1463 | } else { |
|---|
| 1408 | 1464 | raw_spin_unlock_irq(&base->lock); |
|---|
| 1409 | | - call_timer_fn(timer, fn); |
|---|
| 1410 | | - base->running_timer = NULL; |
|---|
| 1411 | | - spin_unlock(&base->expiry_lock); |
|---|
| 1412 | | - spin_lock(&base->expiry_lock); |
|---|
| 1465 | + call_timer_fn(timer, fn, baseclk); |
|---|
| 1413 | 1466 | raw_spin_lock_irq(&base->lock); |
|---|
| 1467 | + base->running_timer = NULL; |
|---|
| 1468 | + timer_sync_wait_running(base); |
|---|
| 1414 | 1469 | } |
|---|
| 1415 | 1470 | } |
|---|
| 1416 | 1471 | } |
|---|
| 1417 | 1472 | |
|---|
| 1418 | | -static int __collect_expired_timers(struct timer_base *base, |
|---|
| 1419 | | - struct hlist_head *heads) |
|---|
| 1473 | +static int collect_expired_timers(struct timer_base *base, |
|---|
| 1474 | + struct hlist_head *heads) |
|---|
| 1420 | 1475 | { |
|---|
| 1421 | | - unsigned long clk = base->clk; |
|---|
| 1476 | + unsigned long clk = base->clk = base->next_expiry; |
|---|
| 1422 | 1477 | struct hlist_head *vec; |
|---|
| 1423 | 1478 | int i, levels = 0; |
|---|
| 1424 | 1479 | unsigned int idx; |
|---|
| .. | .. |
|---|
| 1440 | 1495 | return levels; |
|---|
| 1441 | 1496 | } |
|---|
| 1442 | 1497 | |
|---|
| 1443 | | -#ifdef CONFIG_NO_HZ_COMMON |
|---|
| 1444 | 1498 | /* |
|---|
| 1445 | 1499 | * Find the next pending bucket of a level. Search from level start (@offset) |
|---|
| 1446 | 1500 | * + @clk upwards and if nothing there, search from start of the level |
|---|
| .. | .. |
|---|
| 1473 | 1527 | clk = base->clk; |
|---|
| 1474 | 1528 | for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { |
|---|
| 1475 | 1529 | int pos = next_pending_bucket(base, offset, clk & LVL_MASK); |
|---|
| 1530 | + unsigned long lvl_clk = clk & LVL_CLK_MASK; |
|---|
| 1476 | 1531 | |
|---|
| 1477 | 1532 | if (pos >= 0) { |
|---|
| 1478 | 1533 | unsigned long tmp = clk + (unsigned long) pos; |
|---|
| .. | .. |
|---|
| 1480 | 1535 | tmp <<= LVL_SHIFT(lvl); |
|---|
| 1481 | 1536 | if (time_before(tmp, next)) |
|---|
| 1482 | 1537 | next = tmp; |
|---|
| 1538 | + |
|---|
| 1539 | + /* |
|---|
| 1540 | + * If the next expiration happens before we reach |
|---|
| 1541 | + * the next level, no need to check further. |
|---|
| 1542 | + */ |
|---|
| 1543 | + if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK)) |
|---|
| 1544 | + break; |
|---|
| 1483 | 1545 | } |
|---|
| 1484 | 1546 | /* |
|---|
| 1485 | 1547 | * Clock for the next level. If the current level clock lower |
|---|
| .. | .. |
|---|
| 1517 | 1579 | * So the simple check whether the lower bits of the current |
|---|
| 1518 | 1580 | * level are 0 or not is sufficient for all cases. |
|---|
| 1519 | 1581 | */ |
|---|
| 1520 | | - adj = clk & LVL_CLK_MASK ? 1 : 0; |
|---|
| 1582 | + adj = lvl_clk ? 1 : 0; |
|---|
| 1521 | 1583 | clk >>= LVL_CLK_SHIFT; |
|---|
| 1522 | 1584 | clk += adj; |
|---|
| 1523 | 1585 | } |
|---|
| 1586 | + |
|---|
| 1587 | + base->next_expiry_recalc = false; |
|---|
| 1588 | + base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); |
|---|
| 1589 | + |
|---|
| 1524 | 1590 | return next; |
|---|
| 1525 | 1591 | } |
|---|
| 1526 | 1592 | |
|---|
| 1593 | +#ifdef CONFIG_NO_HZ_COMMON |
|---|
| 1527 | 1594 | /* |
|---|
| 1528 | 1595 | * Check, if the next hrtimer event is before the next timer wheel |
|---|
| 1529 | 1596 | * event: |
|---|
| .. | .. |
|---|
| 1570 | 1637 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); |
|---|
| 1571 | 1638 | u64 expires = KTIME_MAX; |
|---|
| 1572 | 1639 | unsigned long nextevt; |
|---|
| 1573 | | - bool is_max_delta; |
|---|
| 1574 | 1640 | |
|---|
| 1575 | 1641 | /* |
|---|
| 1576 | 1642 | * Pretend that there is no timer pending if the cpu is offline. |
|---|
| .. | .. |
|---|
| 1580 | 1646 | return expires; |
|---|
| 1581 | 1647 | |
|---|
| 1582 | 1648 | raw_spin_lock(&base->lock); |
|---|
| 1583 | | - nextevt = __next_timer_interrupt(base); |
|---|
| 1584 | | - is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); |
|---|
| 1585 | | - base->next_expiry = nextevt; |
|---|
| 1649 | + if (base->next_expiry_recalc) |
|---|
| 1650 | + base->next_expiry = __next_timer_interrupt(base); |
|---|
| 1651 | + nextevt = base->next_expiry; |
|---|
| 1652 | + |
|---|
| 1586 | 1653 | /* |
|---|
| 1587 | 1654 | * We have a fresh next event. Check whether we can forward the |
|---|
| 1588 | 1655 | * base. We can only do that when @basej is past base->clk |
|---|
| .. | .. |
|---|
| 1599 | 1666 | expires = basem; |
|---|
| 1600 | 1667 | base->is_idle = false; |
|---|
| 1601 | 1668 | } else { |
|---|
| 1602 | | - if (!is_max_delta) |
|---|
| 1669 | + if (base->timers_pending) |
|---|
| 1603 | 1670 | expires = basem + (u64)(nextevt - basej) * TICK_NSEC; |
|---|
| 1604 | 1671 | /* |
|---|
| 1605 | 1672 | * If we expect to sleep more than a tick, mark the base idle. |
|---|
| .. | .. |
|---|
| 1608 | 1675 | * logic is only maintained for the BASE_STD base, deferrable |
|---|
| 1609 | 1676 | * timers may still see large granularity skew (by design). |
|---|
| 1610 | 1677 | */ |
|---|
| 1611 | | - if ((expires - basem) > TICK_NSEC) { |
|---|
| 1612 | | - base->must_forward_clk = true; |
|---|
| 1678 | + if ((expires - basem) > TICK_NSEC) |
|---|
| 1613 | 1679 | base->is_idle = true; |
|---|
| 1614 | | - } |
|---|
| 1615 | 1680 | } |
|---|
| 1616 | 1681 | raw_spin_unlock(&base->lock); |
|---|
| 1617 | 1682 | |
|---|
| .. | .. |
|---|
| 1635 | 1700 | */ |
|---|
| 1636 | 1701 | base->is_idle = false; |
|---|
| 1637 | 1702 | } |
|---|
| 1638 | | - |
|---|
| 1639 | | -static int collect_expired_timers(struct timer_base *base, |
|---|
| 1640 | | - struct hlist_head *heads) |
|---|
| 1641 | | -{ |
|---|
| 1642 | | - unsigned long now = READ_ONCE(jiffies); |
|---|
| 1643 | | - |
|---|
| 1644 | | - /* |
|---|
| 1645 | | - * NOHZ optimization. After a long idle sleep we need to forward the |
|---|
| 1646 | | - * base to current jiffies. Avoid a loop by searching the bitfield for |
|---|
| 1647 | | - * the next expiring timer. |
|---|
| 1648 | | - */ |
|---|
| 1649 | | - if ((long)(now - base->clk) > 2) { |
|---|
| 1650 | | - unsigned long next = __next_timer_interrupt(base); |
|---|
| 1651 | | - |
|---|
| 1652 | | - /* |
|---|
| 1653 | | - * If the next timer is ahead of time forward to current |
|---|
| 1654 | | - * jiffies, otherwise forward to the next expiry time: |
|---|
| 1655 | | - */ |
|---|
| 1656 | | - if (time_after(next, now)) { |
|---|
| 1657 | | - /* |
|---|
| 1658 | | - * The call site will increment base->clk and then |
|---|
| 1659 | | - * terminate the expiry loop immediately. |
|---|
| 1660 | | - */ |
|---|
| 1661 | | - base->clk = now; |
|---|
| 1662 | | - return 0; |
|---|
| 1663 | | - } |
|---|
| 1664 | | - base->clk = next; |
|---|
| 1665 | | - } |
|---|
| 1666 | | - return __collect_expired_timers(base, heads); |
|---|
| 1667 | | -} |
|---|
| 1668 | | -#else |
|---|
| 1669 | | -static inline int collect_expired_timers(struct timer_base *base, |
|---|
| 1670 | | - struct hlist_head *heads) |
|---|
| 1671 | | -{ |
|---|
| 1672 | | - return __collect_expired_timers(base, heads); |
|---|
| 1673 | | -} |
|---|
| 1674 | 1703 | #endif |
|---|
| 1675 | 1704 | |
|---|
| 1676 | 1705 | /* |
|---|
| .. | .. |
|---|
| 1681 | 1710 | { |
|---|
| 1682 | 1711 | struct task_struct *p = current; |
|---|
| 1683 | 1712 | |
|---|
| 1713 | + PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0); |
|---|
| 1714 | + |
|---|
| 1684 | 1715 | /* Note: this timer irq context must be accounted for as well. */ |
|---|
| 1685 | 1716 | account_process_tick(p, user_tick); |
|---|
| 1686 | 1717 | run_local_timers(); |
|---|
| 1687 | | - rcu_check_callbacks(user_tick); |
|---|
| 1718 | + rcu_sched_clock_irq(user_tick); |
|---|
| 1688 | 1719 | #ifdef CONFIG_IRQ_WORK |
|---|
| 1689 | 1720 | if (in_irq()) |
|---|
| 1690 | 1721 | irq_work_tick(); |
|---|
| 1691 | 1722 | #endif |
|---|
| 1692 | 1723 | scheduler_tick(); |
|---|
| 1693 | 1724 | if (IS_ENABLED(CONFIG_POSIX_TIMERS)) |
|---|
| 1694 | | - run_posix_cpu_timers(p); |
|---|
| 1725 | + run_posix_cpu_timers(); |
|---|
| 1695 | 1726 | } |
|---|
| 1696 | 1727 | |
|---|
| 1697 | 1728 | /** |
|---|
| .. | .. |
|---|
| 1703 | 1734 | struct hlist_head heads[LVL_DEPTH]; |
|---|
| 1704 | 1735 | int levels; |
|---|
| 1705 | 1736 | |
|---|
| 1706 | | - if (!time_after_eq(jiffies, base->clk)) |
|---|
| 1737 | + if (time_before(jiffies, base->next_expiry)) |
|---|
| 1707 | 1738 | return; |
|---|
| 1708 | 1739 | |
|---|
| 1709 | | - spin_lock(&base->expiry_lock); |
|---|
| 1740 | + timer_base_lock_expiry(base); |
|---|
| 1710 | 1741 | raw_spin_lock_irq(&base->lock); |
|---|
| 1711 | 1742 | |
|---|
| 1712 | | - /* |
|---|
| 1713 | | - * timer_base::must_forward_clk must be cleared before running |
|---|
| 1714 | | - * timers so that any timer functions that call mod_timer() will |
|---|
| 1715 | | - * not try to forward the base. Idle tracking / clock forwarding |
|---|
| 1716 | | - * logic is only used with BASE_STD timers. |
|---|
| 1717 | | - * |
|---|
| 1718 | | - * The must_forward_clk flag is cleared unconditionally also for |
|---|
| 1719 | | - * the deferrable base. The deferrable base is not affected by idle |
|---|
| 1720 | | - * tracking and never forwarded, so clearing the flag is a NOOP. |
|---|
| 1721 | | - * |
|---|
| 1722 | | - * The fact that the deferrable base is never forwarded can cause |
|---|
| 1723 | | - * large variations in granularity for deferrable timers, but they |
|---|
| 1724 | | - * can be deferred for long periods due to idle anyway. |
|---|
| 1725 | | - */ |
|---|
| 1726 | | - base->must_forward_clk = false; |
|---|
| 1727 | | - |
|---|
| 1728 | | - while (time_after_eq(jiffies, base->clk)) { |
|---|
| 1729 | | - |
|---|
| 1743 | + while (time_after_eq(jiffies, base->clk) && |
|---|
| 1744 | + time_after_eq(jiffies, base->next_expiry)) { |
|---|
| 1730 | 1745 | levels = collect_expired_timers(base, heads); |
|---|
| 1746 | + /* |
|---|
| 1747 | + * The two possible reasons for not finding any expired |
|---|
| 1748 | + * timer at this clk are that all matching timers have been |
|---|
| 1749 | + * dequeued or no timer has been queued since |
|---|
| 1750 | + * base::next_expiry was set to base::clk + |
|---|
| 1751 | + * NEXT_TIMER_MAX_DELTA. |
|---|
| 1752 | + */ |
|---|
| 1753 | + WARN_ON_ONCE(!levels && !base->next_expiry_recalc |
|---|
| 1754 | + && base->timers_pending); |
|---|
| 1731 | 1755 | base->clk++; |
|---|
| 1756 | + base->next_expiry = __next_timer_interrupt(base); |
|---|
| 1732 | 1757 | |
|---|
| 1733 | 1758 | while (levels--) |
|---|
| 1734 | 1759 | expire_timers(base, heads + levels); |
|---|
| 1735 | 1760 | } |
|---|
| 1736 | 1761 | raw_spin_unlock_irq(&base->lock); |
|---|
| 1737 | | - spin_unlock(&base->expiry_lock); |
|---|
| 1762 | + timer_base_unlock_expiry(base); |
|---|
| 1738 | 1763 | } |
|---|
| 1739 | 1764 | |
|---|
| 1740 | 1765 | /* |
|---|
| .. | .. |
|---|
| 1743 | 1768 | static __latent_entropy void run_timer_softirq(struct softirq_action *h) |
|---|
| 1744 | 1769 | { |
|---|
| 1745 | 1770 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); |
|---|
| 1746 | | - |
|---|
| 1747 | | - irq_work_tick_soft(); |
|---|
| 1748 | 1771 | |
|---|
| 1749 | 1772 | __run_timers(base); |
|---|
| 1750 | 1773 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) |
|---|
| .. | .. |
|---|
| 1760 | 1783 | |
|---|
| 1761 | 1784 | hrtimer_run_queues(); |
|---|
| 1762 | 1785 | /* Raise the softirq only if required. */ |
|---|
| 1763 | | - if (time_before(jiffies, base->clk)) { |
|---|
| 1786 | + if (time_before(jiffies, base->next_expiry)) { |
|---|
| 1764 | 1787 | if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) |
|---|
| 1765 | 1788 | return; |
|---|
| 1766 | 1789 | /* CPU is awake, so check the deferrable base. */ |
|---|
| 1767 | 1790 | base++; |
|---|
| 1768 | | - if (time_before(jiffies, base->clk)) |
|---|
| 1791 | + if (time_before(jiffies, base->next_expiry)) |
|---|
| 1769 | 1792 | return; |
|---|
| 1770 | 1793 | } |
|---|
| 1771 | 1794 | raise_softirq(TIMER_SOFTIRQ); |
|---|
| .. | .. |
|---|
| 1791 | 1814 | * schedule_timeout - sleep until timeout |
|---|
| 1792 | 1815 | * @timeout: timeout value in jiffies |
|---|
| 1793 | 1816 | * |
|---|
| 1794 | | - * Make the current task sleep until @timeout jiffies have |
|---|
| 1795 | | - * elapsed. The routine will return immediately unless |
|---|
| 1796 | | - * the current task state has been set (see set_current_state()). |
|---|
| 1817 | + * Make the current task sleep until @timeout jiffies have elapsed. |
|---|
| 1818 | + * The function behavior depends on the current task state |
|---|
| 1819 | + * (see also set_current_state() description): |
|---|
| 1797 | 1820 | * |
|---|
| 1798 | | - * You can set the task state as follows - |
|---|
| 1821 | + * %TASK_RUNNING - the scheduler is called, but the task does not sleep |
|---|
| 1822 | + * at all. That happens because sched_submit_work() does nothing for |
|---|
| 1823 | + * tasks in %TASK_RUNNING state. |
|---|
| 1799 | 1824 | * |
|---|
| 1800 | 1825 | * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to |
|---|
| 1801 | 1826 | * pass before the routine returns unless the current task is explicitly |
|---|
| 1802 | | - * woken up, (e.g. by wake_up_process())". |
|---|
| 1827 | + * woken up, (e.g. by wake_up_process()). |
|---|
| 1803 | 1828 | * |
|---|
| 1804 | 1829 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is |
|---|
| 1805 | 1830 | * delivered to the current task or the current task is explicitly woken |
|---|
| 1806 | 1831 | * up. |
|---|
| 1807 | 1832 | * |
|---|
| 1808 | | - * The current task state is guaranteed to be TASK_RUNNING when this |
|---|
| 1833 | + * The current task state is guaranteed to be %TASK_RUNNING when this |
|---|
| 1809 | 1834 | * routine returns. |
|---|
| 1810 | 1835 | * |
|---|
| 1811 | 1836 | * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule |
|---|
| .. | .. |
|---|
| 1813 | 1838 | * value will be %MAX_SCHEDULE_TIMEOUT. |
|---|
| 1814 | 1839 | * |
|---|
| 1815 | 1840 | * Returns 0 when the timer has expired otherwise the remaining time in |
|---|
| 1816 | | - * jiffies will be returned. In all cases the return value is guaranteed |
|---|
| 1841 | + * jiffies will be returned. In all cases the return value is guaranteed |
|---|
| 1817 | 1842 | * to be non-negative. |
|---|
| 1818 | 1843 | */ |
|---|
| 1819 | 1844 | signed long __sched schedule_timeout(signed long timeout) |
|---|
| .. | .. |
|---|
| 1854 | 1879 | |
|---|
| 1855 | 1880 | timer.task = current; |
|---|
| 1856 | 1881 | timer_setup_on_stack(&timer.timer, process_timeout, 0); |
|---|
| 1857 | | - __mod_timer(&timer.timer, expire, 0); |
|---|
| 1882 | + __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING); |
|---|
| 1858 | 1883 | schedule(); |
|---|
| 1859 | 1884 | del_singleshot_timer_sync(&timer.timer); |
|---|
| 1860 | 1885 | |
|---|
| .. | .. |
|---|
| 1927 | 1952 | base = per_cpu_ptr(&timer_bases[b], cpu); |
|---|
| 1928 | 1953 | base->clk = jiffies; |
|---|
| 1929 | 1954 | base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; |
|---|
| 1955 | + base->timers_pending = false; |
|---|
| 1930 | 1956 | base->is_idle = false; |
|---|
| 1931 | | - base->must_forward_clk = true; |
|---|
| 1932 | 1957 | } |
|---|
| 1933 | 1958 | return 0; |
|---|
| 1934 | 1959 | } |
|---|
| .. | .. |
|---|
| 1981 | 2006 | base->cpu = cpu; |
|---|
| 1982 | 2007 | raw_spin_lock_init(&base->lock); |
|---|
| 1983 | 2008 | base->clk = jiffies; |
|---|
| 1984 | | - spin_lock_init(&base->expiry_lock); |
|---|
| 2009 | + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; |
|---|
| 2010 | + timer_base_init_expiry_lock(base); |
|---|
| 1985 | 2011 | } |
|---|
| 1986 | 2012 | } |
|---|
| 1987 | 2013 | |
|---|
| .. | .. |
|---|
| 1996 | 2022 | void __init init_timers(void) |
|---|
| 1997 | 2023 | { |
|---|
| 1998 | 2024 | init_timer_cpus(); |
|---|
| 2025 | + posix_cputimers_init_work(); |
|---|
| 1999 | 2026 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); |
|---|
| 2000 | 2027 | } |
|---|
| 2001 | 2028 | |
|---|
| .. | .. |
|---|
| 2029 | 2056 | EXPORT_SYMBOL(msleep_interruptible); |
|---|
| 2030 | 2057 | |
|---|
| 2031 | 2058 | /** |
|---|
| 2059 | + * usleep_range_state - Sleep for an approximate time in a given state |
|---|
| 2060 | + * @min: Minimum time in usecs to sleep |
|---|
| 2061 | + * @max: Maximum time in usecs to sleep |
|---|
| 2062 | + * @state: State of the current task that will be while sleeping |
|---|
| 2063 | + * |
|---|
| 2064 | + * In non-atomic context where the exact wakeup time is flexible, use |
|---|
| 2065 | + * usleep_range_state() instead of udelay(). The sleep improves responsiveness |
|---|
| 2066 | + * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces |
|---|
| 2067 | + * power usage by allowing hrtimers to take advantage of an already- |
|---|
| 2068 | + * scheduled interrupt instead of scheduling a new one just for this sleep. |
|---|
| 2069 | + */ |
|---|
| 2070 | +void __sched usleep_range_state(unsigned long min, unsigned long max, |
|---|
| 2071 | + unsigned int state) |
|---|
| 2072 | +{ |
|---|
| 2073 | + ktime_t exp = ktime_add_us(ktime_get(), min); |
|---|
| 2074 | + u64 delta = (u64)(max - min) * NSEC_PER_USEC; |
|---|
| 2075 | + |
|---|
| 2076 | + for (;;) { |
|---|
| 2077 | + __set_current_state(state); |
|---|
| 2078 | + /* Do not return before the requested sleep time has elapsed */ |
|---|
| 2079 | + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) |
|---|
| 2080 | + break; |
|---|
| 2081 | + } |
|---|
| 2082 | +} |
|---|
| 2083 | + |
|---|
| 2084 | +/** |
|---|
| 2032 | 2085 | * usleep_range - Sleep for an approximate time |
|---|
| 2033 | 2086 | * @min: Minimum time in usecs to sleep |
|---|
| 2034 | 2087 | * @max: Maximum time in usecs to sleep |
|---|
| .. | .. |
|---|
| 2041 | 2094 | */ |
|---|
| 2042 | 2095 | void __sched usleep_range(unsigned long min, unsigned long max) |
|---|
| 2043 | 2096 | { |
|---|
| 2044 | | - ktime_t exp = ktime_add_us(ktime_get(), min); |
|---|
| 2045 | | - u64 delta = (u64)(max - min) * NSEC_PER_USEC; |
|---|
| 2046 | | - |
|---|
| 2047 | | - for (;;) { |
|---|
| 2048 | | - __set_current_state(TASK_UNINTERRUPTIBLE); |
|---|
| 2049 | | - /* Do not return before the requested sleep time has elapsed */ |
|---|
| 2050 | | - if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) |
|---|
| 2051 | | - break; |
|---|
| 2052 | | - } |
|---|
| 2097 | + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); |
|---|
| 2053 | 2098 | } |
|---|
| 2054 | 2099 | EXPORT_SYMBOL(usleep_range); |
|---|