| .. | .. | 
|---|
 | 1 | +// SPDX-License-Identifier: GPL-2.0  | 
|---|
| 1 | 2 |  /* | 
|---|
| 2 |  | - *  linux/kernel/timer.c  | 
|---|
| 3 |  | - *  | 
|---|
| 4 | 3 |   *  Kernel internal timers | 
|---|
| 5 | 4 |   * | 
|---|
| 6 | 5 |   *  Copyright (C) 1991, 1992  Linus Torvalds | 
|---|
| .. | .. | 
|---|
| 56 | 55 |   | 
|---|
| 57 | 56 |  #define CREATE_TRACE_POINTS | 
|---|
| 58 | 57 |  #include <trace/events/timer.h> | 
|---|
 | 58 | +#undef CREATE_TRACE_POINTS  | 
|---|
 | 59 | +#include <trace/hooks/timer.h>  | 
|---|
 | 60 | +  | 
|---|
 | 61 | +EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_entry);  | 
|---|
 | 62 | +EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_exit);  | 
|---|
| 59 | 63 |   | 
|---|
| 60 | 64 |  __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; | 
|---|
| 61 | 65 |   | 
|---|
| .. | .. | 
|---|
| 158 | 162 |   | 
|---|
| 159 | 163 |  /* | 
|---|
| 160 | 164 |   * The time start value for each level to select the bucket at enqueue | 
|---|
| 161 |  | - * time.  | 
|---|
 | 165 | + * time. We start from the last possible delta of the previous level  | 
|---|
 | 166 | + * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()).  | 
|---|
| 162 | 167 |   */ | 
|---|
| 163 | 168 |  #define LVL_START(n)	((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT)) | 
|---|
| 164 | 169 |   | 
|---|
| .. | .. | 
|---|
| 198 | 203 |  struct timer_base { | 
|---|
| 199 | 204 |  	raw_spinlock_t		lock; | 
|---|
| 200 | 205 |  	struct timer_list	*running_timer; | 
|---|
 | 206 | +#ifdef CONFIG_PREEMPT_RT  | 
|---|
| 201 | 207 |  	spinlock_t		expiry_lock; | 
|---|
 | 208 | +	atomic_t		timer_waiters;  | 
|---|
 | 209 | +#endif  | 
|---|
| 202 | 210 |  	unsigned long		clk; | 
|---|
| 203 | 211 |  	unsigned long		next_expiry; | 
|---|
| 204 | 212 |  	unsigned int		cpu; | 
|---|
 | 213 | +	bool			next_expiry_recalc;  | 
|---|
| 205 | 214 |  	bool			is_idle; | 
|---|
| 206 |  | -	bool			must_forward_clk;  | 
|---|
 | 215 | +	bool			timers_pending;  | 
|---|
| 207 | 216 |  	DECLARE_BITMAP(pending_map, WHEEL_SIZE); | 
|---|
| 208 | 217 |  	struct hlist_head	vectors[WHEEL_SIZE]; | 
|---|
| 209 | 218 |  } ____cacheline_aligned; | 
|---|
| .. | .. | 
|---|
| 215 | 224 |  static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); | 
|---|
| 216 | 225 |  static DEFINE_MUTEX(timer_keys_mutex); | 
|---|
| 217 | 226 |   | 
|---|
| 218 |  | -static struct swork_event timer_update_swork;  | 
|---|
 | 227 | +static void timer_update_keys(struct work_struct *work);  | 
|---|
 | 228 | +static DECLARE_WORK(timer_update_work, timer_update_keys);  | 
|---|
| 219 | 229 |   | 
|---|
| 220 | 230 |  #ifdef CONFIG_SMP | 
|---|
| 221 | 231 |  unsigned int sysctl_timer_migration = 1; | 
|---|
| .. | .. | 
|---|
| 233 | 243 |  static inline void timers_update_migration(void) { } | 
|---|
| 234 | 244 |  #endif /* !CONFIG_SMP */ | 
|---|
| 235 | 245 |   | 
|---|
| 236 |  | -static void timer_update_keys(struct swork_event *event)  | 
|---|
 | 246 | +static void timer_update_keys(struct work_struct *work)  | 
|---|
| 237 | 247 |  { | 
|---|
| 238 | 248 |  	mutex_lock(&timer_keys_mutex); | 
|---|
| 239 | 249 |  	timers_update_migration(); | 
|---|
| .. | .. | 
|---|
| 243 | 253 |   | 
|---|
| 244 | 254 |  void timers_update_nohz(void) | 
|---|
| 245 | 255 |  { | 
|---|
| 246 |  | -	swork_queue(&timer_update_swork);  | 
|---|
 | 256 | +	schedule_work(&timer_update_work);  | 
|---|
| 247 | 257 |  } | 
|---|
| 248 |  | -  | 
|---|
| 249 |  | -static __init int hrtimer_init_thread(void)  | 
|---|
| 250 |  | -{  | 
|---|
| 251 |  | -	WARN_ON(swork_get());  | 
|---|
| 252 |  | -	INIT_SWORK(&timer_update_swork, timer_update_keys);  | 
|---|
| 253 |  | -	return 0;  | 
|---|
| 254 |  | -}  | 
|---|
| 255 |  | -early_initcall(hrtimer_init_thread);  | 
|---|
| 256 | 258 |   | 
|---|
| 257 | 259 |  int timer_migration_handler(struct ctl_table *table, int write, | 
|---|
| 258 |  | -			    void __user *buffer, size_t *lenp,  | 
|---|
| 259 |  | -			    loff_t *ppos)  | 
|---|
 | 260 | +			    void *buffer, size_t *lenp, loff_t *ppos)  | 
|---|
| 260 | 261 |  { | 
|---|
| 261 | 262 |  	int ret; | 
|---|
| 262 | 263 |   | 
|---|
| .. | .. | 
|---|
| 494 | 495 |   * Helper function to calculate the array index for a given expiry | 
|---|
| 495 | 496 |   * time. | 
|---|
| 496 | 497 |   */ | 
|---|
| 497 |  | -static inline unsigned calc_index(unsigned expires, unsigned lvl)  | 
|---|
 | 498 | +static inline unsigned calc_index(unsigned long expires, unsigned lvl,  | 
|---|
 | 499 | +				  unsigned long *bucket_expiry)  | 
|---|
| 498 | 500 |  { | 
|---|
 | 501 | +  | 
|---|
 | 502 | +	/*  | 
|---|
 | 503 | +	 * The timer wheel has to guarantee that a timer does not fire  | 
|---|
 | 504 | +	 * early. Early expiry can happen due to:  | 
|---|
 | 505 | +	 * - Timer is armed at the edge of a tick  | 
|---|
 | 506 | +	 * - Truncation of the expiry time in the outer wheel levels  | 
|---|
 | 507 | +	 *  | 
|---|
 | 508 | +	 * Round up with level granularity to prevent this.  | 
|---|
 | 509 | +	 */  | 
|---|
 | 510 | +	trace_android_vh_timer_calc_index(lvl, &expires);  | 
|---|
| 499 | 511 |  	expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); | 
|---|
 | 512 | +	*bucket_expiry = expires << LVL_SHIFT(lvl);  | 
|---|
| 500 | 513 |  	return LVL_OFFS(lvl) + (expires & LVL_MASK); | 
|---|
| 501 | 514 |  } | 
|---|
| 502 | 515 |   | 
|---|
| 503 |  | -static int calc_wheel_index(unsigned long expires, unsigned long clk)  | 
|---|
 | 516 | +static int calc_wheel_index(unsigned long expires, unsigned long clk,  | 
|---|
 | 517 | +			    unsigned long *bucket_expiry)  | 
|---|
| 504 | 518 |  { | 
|---|
| 505 | 519 |  	unsigned long delta = expires - clk; | 
|---|
| 506 | 520 |  	unsigned int idx; | 
|---|
| 507 | 521 |   | 
|---|
| 508 | 522 |  	if (delta < LVL_START(1)) { | 
|---|
| 509 |  | -		idx = calc_index(expires, 0);  | 
|---|
 | 523 | +		idx = calc_index(expires, 0, bucket_expiry);  | 
|---|
| 510 | 524 |  	} else if (delta < LVL_START(2)) { | 
|---|
| 511 |  | -		idx = calc_index(expires, 1);  | 
|---|
 | 525 | +		idx = calc_index(expires, 1, bucket_expiry);  | 
|---|
| 512 | 526 |  	} else if (delta < LVL_START(3)) { | 
|---|
| 513 |  | -		idx = calc_index(expires, 2);  | 
|---|
 | 527 | +		idx = calc_index(expires, 2, bucket_expiry);  | 
|---|
| 514 | 528 |  	} else if (delta < LVL_START(4)) { | 
|---|
| 515 |  | -		idx = calc_index(expires, 3);  | 
|---|
 | 529 | +		idx = calc_index(expires, 3, bucket_expiry);  | 
|---|
| 516 | 530 |  	} else if (delta < LVL_START(5)) { | 
|---|
| 517 |  | -		idx = calc_index(expires, 4);  | 
|---|
 | 531 | +		idx = calc_index(expires, 4, bucket_expiry);  | 
|---|
| 518 | 532 |  	} else if (delta < LVL_START(6)) { | 
|---|
| 519 |  | -		idx = calc_index(expires, 5);  | 
|---|
 | 533 | +		idx = calc_index(expires, 5, bucket_expiry);  | 
|---|
| 520 | 534 |  	} else if (delta < LVL_START(7)) { | 
|---|
| 521 |  | -		idx = calc_index(expires, 6);  | 
|---|
 | 535 | +		idx = calc_index(expires, 6, bucket_expiry);  | 
|---|
| 522 | 536 |  	} else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { | 
|---|
| 523 |  | -		idx = calc_index(expires, 7);  | 
|---|
 | 537 | +		idx = calc_index(expires, 7, bucket_expiry);  | 
|---|
| 524 | 538 |  	} else if ((long) delta < 0) { | 
|---|
| 525 | 539 |  		idx = clk & LVL_MASK; | 
|---|
 | 540 | +		*bucket_expiry = clk;  | 
|---|
| 526 | 541 |  	} else { | 
|---|
| 527 | 542 |  		/* | 
|---|
| 528 | 543 |  		 * Force expire obscene large timeouts to expire at the | 
|---|
| .. | .. | 
|---|
| 531 | 546 |  		if (delta >= WHEEL_TIMEOUT_CUTOFF) | 
|---|
| 532 | 547 |  			expires = clk + WHEEL_TIMEOUT_MAX; | 
|---|
| 533 | 548 |   | 
|---|
| 534 |  | -		idx = calc_index(expires, LVL_DEPTH - 1);  | 
|---|
 | 549 | +		idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);  | 
|---|
| 535 | 550 |  	} | 
|---|
| 536 | 551 |  	return idx; | 
|---|
| 537 |  | -}  | 
|---|
| 538 |  | -  | 
|---|
| 539 |  | -/*  | 
|---|
| 540 |  | - * Enqueue the timer into the hash bucket, mark it pending in  | 
|---|
| 541 |  | - * the bitmap and store the index in the timer flags.  | 
|---|
| 542 |  | - */  | 
|---|
| 543 |  | -static void enqueue_timer(struct timer_base *base, struct timer_list *timer,  | 
|---|
| 544 |  | -			  unsigned int idx)  | 
|---|
| 545 |  | -{  | 
|---|
| 546 |  | -	hlist_add_head(&timer->entry, base->vectors + idx);  | 
|---|
| 547 |  | -	__set_bit(idx, base->pending_map);  | 
|---|
| 548 |  | -	timer_set_idx(timer, idx);  | 
|---|
| 549 |  | -}  | 
|---|
| 550 |  | -  | 
|---|
| 551 |  | -static void  | 
|---|
| 552 |  | -__internal_add_timer(struct timer_base *base, struct timer_list *timer)  | 
|---|
| 553 |  | -{  | 
|---|
| 554 |  | -	unsigned int idx;  | 
|---|
| 555 |  | -  | 
|---|
| 556 |  | -	idx = calc_wheel_index(timer->expires, base->clk);  | 
|---|
| 557 |  | -	enqueue_timer(base, timer, idx);  | 
|---|
| 558 | 552 |  } | 
|---|
| 559 | 553 |   | 
|---|
| 560 | 554 |  static void | 
|---|
| .. | .. | 
|---|
| 578 | 572 |  	 * timer is not deferrable. If the other CPU is on the way to idle | 
|---|
| 579 | 573 |  	 * then it can't set base->is_idle as we hold the base lock: | 
|---|
| 580 | 574 |  	 */ | 
|---|
| 581 |  | -	if (!base->is_idle)  | 
|---|
| 582 |  | -		return;  | 
|---|
| 583 |  | -  | 
|---|
| 584 |  | -	/* Check whether this is the new first expiring timer: */  | 
|---|
| 585 |  | -	if (time_after_eq(timer->expires, base->next_expiry))  | 
|---|
| 586 |  | -		return;  | 
|---|
| 587 |  | -  | 
|---|
| 588 |  | -	/*  | 
|---|
| 589 |  | -	 * Set the next expiry time and kick the CPU so it can reevaluate the  | 
|---|
| 590 |  | -	 * wheel:  | 
|---|
| 591 |  | -	 */  | 
|---|
| 592 |  | -	if (time_before(timer->expires, base->clk)) {  | 
|---|
| 593 |  | -		/*  | 
|---|
| 594 |  | -		 * Prevent from forward_timer_base() moving the base->clk  | 
|---|
| 595 |  | -		 * backward  | 
|---|
| 596 |  | -		 */  | 
|---|
| 597 |  | -		base->next_expiry = base->clk;  | 
|---|
| 598 |  | -	} else {  | 
|---|
| 599 |  | -		base->next_expiry = timer->expires;  | 
|---|
| 600 |  | -	}  | 
|---|
| 601 |  | -	wake_up_nohz_cpu(base->cpu);  | 
|---|
 | 575 | +	if (base->is_idle)  | 
|---|
 | 576 | +		wake_up_nohz_cpu(base->cpu);  | 
|---|
| 602 | 577 |  } | 
|---|
| 603 | 578 |   | 
|---|
| 604 |  | -static void  | 
|---|
| 605 |  | -internal_add_timer(struct timer_base *base, struct timer_list *timer)  | 
|---|
 | 579 | +/*  | 
|---|
 | 580 | + * Enqueue the timer into the hash bucket, mark it pending in  | 
|---|
 | 581 | + * the bitmap, store the index in the timer flags then wake up  | 
|---|
 | 582 | + * the target CPU if needed.  | 
|---|
 | 583 | + */  | 
|---|
 | 584 | +static void enqueue_timer(struct timer_base *base, struct timer_list *timer,  | 
|---|
 | 585 | +			  unsigned int idx, unsigned long bucket_expiry)  | 
|---|
| 606 | 586 |  { | 
|---|
| 607 |  | -	__internal_add_timer(base, timer);  | 
|---|
| 608 |  | -	trigger_dyntick_cpu(base, timer);  | 
|---|
 | 587 | +  | 
|---|
 | 588 | +	hlist_add_head(&timer->entry, base->vectors + idx);  | 
|---|
 | 589 | +	__set_bit(idx, base->pending_map);  | 
|---|
 | 590 | +	timer_set_idx(timer, idx);  | 
|---|
 | 591 | +  | 
|---|
 | 592 | +	trace_timer_start(timer, timer->expires, timer->flags);  | 
|---|
 | 593 | +  | 
|---|
 | 594 | +	/*  | 
|---|
 | 595 | +	 * Check whether this is the new first expiring timer. The  | 
|---|
 | 596 | +	 * effective expiry time of the timer is required here  | 
|---|
 | 597 | +	 * (bucket_expiry) instead of timer->expires.  | 
|---|
 | 598 | +	 */  | 
|---|
 | 599 | +	if (time_before(bucket_expiry, base->next_expiry)) {  | 
|---|
 | 600 | +		/*  | 
|---|
 | 601 | +		 * Set the next expiry time and kick the CPU so it  | 
|---|
 | 602 | +		 * can reevaluate the wheel:  | 
|---|
 | 603 | +		 */  | 
|---|
 | 604 | +		base->next_expiry = bucket_expiry;  | 
|---|
 | 605 | +		base->timers_pending = true;  | 
|---|
 | 606 | +		base->next_expiry_recalc = false;  | 
|---|
 | 607 | +		trigger_dyntick_cpu(base, timer);  | 
|---|
 | 608 | +	}  | 
|---|
 | 609 | +}  | 
|---|
 | 610 | +  | 
|---|
 | 611 | +static void internal_add_timer(struct timer_base *base, struct timer_list *timer)  | 
|---|
 | 612 | +{  | 
|---|
 | 613 | +	unsigned long bucket_expiry;  | 
|---|
 | 614 | +	unsigned int idx;  | 
|---|
 | 615 | +  | 
|---|
 | 616 | +	idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);  | 
|---|
 | 617 | +	enqueue_timer(base, timer, idx, bucket_expiry);  | 
|---|
| 609 | 618 |  } | 
|---|
| 610 | 619 |   | 
|---|
| 611 | 620 |  #ifdef CONFIG_DEBUG_OBJECTS_TIMERS | 
|---|
| 612 | 621 |   | 
|---|
| 613 |  | -static struct debug_obj_descr timer_debug_descr;  | 
|---|
 | 622 | +static const struct debug_obj_descr timer_debug_descr;  | 
|---|
| 614 | 623 |   | 
|---|
| 615 | 624 |  static void *timer_debug_hint(void *addr) | 
|---|
| 616 | 625 |  { | 
|---|
| .. | .. | 
|---|
| 665 | 674 |   | 
|---|
| 666 | 675 |  	case ODEBUG_STATE_ACTIVE: | 
|---|
| 667 | 676 |  		WARN_ON(1); | 
|---|
| 668 |  | -  | 
|---|
 | 677 | +		fallthrough;  | 
|---|
| 669 | 678 |  	default: | 
|---|
| 670 | 679 |  		return false; | 
|---|
| 671 | 680 |  	} | 
|---|
| .. | .. | 
|---|
| 706 | 715 |  	} | 
|---|
| 707 | 716 |  } | 
|---|
| 708 | 717 |   | 
|---|
| 709 |  | -static struct debug_obj_descr timer_debug_descr = {  | 
|---|
 | 718 | +static const struct debug_obj_descr timer_debug_descr = {  | 
|---|
| 710 | 719 |  	.name			= "timer_list", | 
|---|
| 711 | 720 |  	.debug_hint		= timer_debug_hint, | 
|---|
| 712 | 721 |  	.is_static_object	= timer_is_static_object, | 
|---|
| .. | .. | 
|---|
| 729 | 738 |  static inline void debug_timer_deactivate(struct timer_list *timer) | 
|---|
| 730 | 739 |  { | 
|---|
| 731 | 740 |  	debug_object_deactivate(timer, &timer_debug_descr); | 
|---|
| 732 |  | -}  | 
|---|
| 733 |  | -  | 
|---|
| 734 |  | -static inline void debug_timer_free(struct timer_list *timer)  | 
|---|
| 735 |  | -{  | 
|---|
| 736 |  | -	debug_object_free(timer, &timer_debug_descr);  | 
|---|
| 737 | 741 |  } | 
|---|
| 738 | 742 |   | 
|---|
| 739 | 743 |  static inline void debug_timer_assert_init(struct timer_list *timer) | 
|---|
| .. | .. | 
|---|
| 775 | 779 |  	trace_timer_init(timer); | 
|---|
| 776 | 780 |  } | 
|---|
| 777 | 781 |   | 
|---|
| 778 |  | -static inline void  | 
|---|
| 779 |  | -debug_activate(struct timer_list *timer, unsigned long expires)  | 
|---|
| 780 |  | -{  | 
|---|
| 781 |  | -	debug_timer_activate(timer);  | 
|---|
| 782 |  | -	trace_timer_start(timer, expires, timer->flags);  | 
|---|
| 783 |  | -}  | 
|---|
| 784 |  | -  | 
|---|
| 785 | 782 |  static inline void debug_deactivate(struct timer_list *timer) | 
|---|
| 786 | 783 |  { | 
|---|
| 787 | 784 |  	debug_timer_deactivate(timer); | 
|---|
| .. | .. | 
|---|
| 800 | 797 |  { | 
|---|
| 801 | 798 |  	timer->entry.pprev = NULL; | 
|---|
| 802 | 799 |  	timer->function = func; | 
|---|
 | 800 | +	if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS))  | 
|---|
 | 801 | +		flags &= TIMER_INIT_FLAGS;  | 
|---|
| 803 | 802 |  	timer->flags = flags | raw_smp_processor_id(); | 
|---|
| 804 | 803 |  	lockdep_init_map(&timer->lockdep_map, name, key, 0); | 
|---|
| 805 | 804 |  } | 
|---|
| .. | .. | 
|---|
| 845 | 844 |  	if (!timer_pending(timer)) | 
|---|
| 846 | 845 |  		return 0; | 
|---|
| 847 | 846 |   | 
|---|
| 848 |  | -	if (hlist_is_singular_node(&timer->entry, base->vectors + idx))  | 
|---|
 | 847 | +	if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {  | 
|---|
| 849 | 848 |  		__clear_bit(idx, base->pending_map); | 
|---|
 | 849 | +		base->next_expiry_recalc = true;  | 
|---|
 | 850 | +	}  | 
|---|
| 850 | 851 |   | 
|---|
| 851 | 852 |  	detach_timer(timer, clear_pending); | 
|---|
| 852 | 853 |  	return 1; | 
|---|
| .. | .. | 
|---|
| 896 | 897 |   | 
|---|
| 897 | 898 |  static inline void forward_timer_base(struct timer_base *base) | 
|---|
| 898 | 899 |  { | 
|---|
| 899 |  | -#ifdef CONFIG_NO_HZ_COMMON  | 
|---|
| 900 |  | -	unsigned long jnow;  | 
|---|
 | 900 | +	unsigned long jnow = READ_ONCE(jiffies);  | 
|---|
| 901 | 901 |   | 
|---|
| 902 | 902 |  	/* | 
|---|
| 903 |  | -	 * We only forward the base when we are idle or have just come out of  | 
|---|
| 904 |  | -	 * idle (must_forward_clk logic), and have a delta between base clock  | 
|---|
| 905 |  | -	 * and jiffies. In the common case, run_timers will take care of it.  | 
|---|
 | 903 | +	 * No need to forward if we are close enough below jiffies.  | 
|---|
 | 904 | +	 * Also while executing timers, base->clk is 1 offset ahead  | 
|---|
 | 905 | +	 * of jiffies to avoid endless requeuing to current jffies.  | 
|---|
| 906 | 906 |  	 */ | 
|---|
| 907 |  | -	if (likely(!base->must_forward_clk))  | 
|---|
| 908 |  | -		return;  | 
|---|
| 909 |  | -  | 
|---|
| 910 |  | -	jnow = READ_ONCE(jiffies);  | 
|---|
| 911 |  | -	base->must_forward_clk = base->is_idle;  | 
|---|
| 912 |  | -	if ((long)(jnow - base->clk) < 2)  | 
|---|
 | 907 | +	if ((long)(jnow - base->clk) < 1)  | 
|---|
| 913 | 908 |  		return; | 
|---|
| 914 | 909 |   | 
|---|
| 915 | 910 |  	/* | 
|---|
| .. | .. | 
|---|
| 923 | 918 |  			return; | 
|---|
| 924 | 919 |  		base->clk = base->next_expiry; | 
|---|
| 925 | 920 |  	} | 
|---|
| 926 |  | -#endif  | 
|---|
| 927 | 921 |  } | 
|---|
| 928 | 922 |   | 
|---|
| 929 | 923 |   | 
|---|
| .. | .. | 
|---|
| 966 | 960 |   | 
|---|
| 967 | 961 |  #define MOD_TIMER_PENDING_ONLY		0x01 | 
|---|
| 968 | 962 |  #define MOD_TIMER_REDUCE		0x02 | 
|---|
 | 963 | +#define MOD_TIMER_NOTPENDING		0x04  | 
|---|
| 969 | 964 |   | 
|---|
| 970 | 965 |  static inline int | 
|---|
| 971 | 966 |  __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) | 
|---|
| 972 | 967 |  { | 
|---|
 | 968 | +	unsigned long clk = 0, flags, bucket_expiry;  | 
|---|
| 973 | 969 |  	struct timer_base *base, *new_base; | 
|---|
| 974 | 970 |  	unsigned int idx = UINT_MAX; | 
|---|
| 975 |  | -	unsigned long clk = 0, flags;  | 
|---|
| 976 | 971 |  	int ret = 0; | 
|---|
| 977 | 972 |   | 
|---|
| 978 | 973 |  	BUG_ON(!timer->function); | 
|---|
| .. | .. | 
|---|
| 982 | 977 |  	 * the timer is re-modified to have the same timeout or ends up in the | 
|---|
| 983 | 978 |  	 * same array bucket then just return: | 
|---|
| 984 | 979 |  	 */ | 
|---|
| 985 |  | -	if (timer_pending(timer)) {  | 
|---|
 | 980 | +	if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {  | 
|---|
| 986 | 981 |  		/* | 
|---|
| 987 | 982 |  		 * The downside of this optimization is that it can result in | 
|---|
| 988 | 983 |  		 * larger granularity than you would get from adding a new | 
|---|
| .. | .. | 
|---|
| 1011 | 1006 |  		} | 
|---|
| 1012 | 1007 |   | 
|---|
| 1013 | 1008 |  		clk = base->clk; | 
|---|
| 1014 |  | -		idx = calc_wheel_index(expires, clk);  | 
|---|
 | 1009 | +		idx = calc_wheel_index(expires, clk, &bucket_expiry);  | 
|---|
| 1015 | 1010 |   | 
|---|
| 1016 | 1011 |  		/* | 
|---|
| 1017 | 1012 |  		 * Retrieve and compare the array index of the pending | 
|---|
| .. | .. | 
|---|
| 1058 | 1053 |  		} | 
|---|
| 1059 | 1054 |  	} | 
|---|
| 1060 | 1055 |   | 
|---|
| 1061 |  | -	debug_activate(timer, expires);  | 
|---|
 | 1056 | +	debug_timer_activate(timer);  | 
|---|
| 1062 | 1057 |   | 
|---|
| 1063 | 1058 |  	timer->expires = expires; | 
|---|
| 1064 | 1059 |  	/* | 
|---|
| 1065 | 1060 |  	 * If 'idx' was calculated above and the base time did not advance | 
|---|
| 1066 | 1061 |  	 * between calculating 'idx' and possibly switching the base, only | 
|---|
| 1067 |  | -	 * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise  | 
|---|
| 1068 |  | -	 * we need to (re)calculate the wheel index via  | 
|---|
| 1069 |  | -	 * internal_add_timer().  | 
|---|
 | 1062 | +	 * enqueue_timer() is required. Otherwise we need to (re)calculate  | 
|---|
 | 1063 | +	 * the wheel index via internal_add_timer().  | 
|---|
| 1070 | 1064 |  	 */ | 
|---|
| 1071 |  | -	if (idx != UINT_MAX && clk == base->clk) {  | 
|---|
| 1072 |  | -		enqueue_timer(base, timer, idx);  | 
|---|
| 1073 |  | -		trigger_dyntick_cpu(base, timer);  | 
|---|
| 1074 |  | -	} else {  | 
|---|
 | 1065 | +	if (idx != UINT_MAX && clk == base->clk)  | 
|---|
 | 1066 | +		enqueue_timer(base, timer, idx, bucket_expiry);  | 
|---|
 | 1067 | +	else  | 
|---|
| 1075 | 1068 |  		internal_add_timer(base, timer); | 
|---|
| 1076 |  | -	}  | 
|---|
| 1077 | 1069 |   | 
|---|
| 1078 | 1070 |  out_unlock: | 
|---|
| 1079 | 1071 |  	raw_spin_unlock_irqrestore(&base->lock, flags); | 
|---|
| .. | .. | 
|---|
| 1155 | 1147 |  void add_timer(struct timer_list *timer) | 
|---|
| 1156 | 1148 |  { | 
|---|
| 1157 | 1149 |  	BUG_ON(timer_pending(timer)); | 
|---|
| 1158 |  | -	mod_timer(timer, timer->expires);  | 
|---|
 | 1150 | +	__mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);  | 
|---|
| 1159 | 1151 |  } | 
|---|
| 1160 | 1152 |  EXPORT_SYMBOL(add_timer); | 
|---|
| 1161 | 1153 |   | 
|---|
| .. | .. | 
|---|
| 1192 | 1184 |  	} | 
|---|
| 1193 | 1185 |  	forward_timer_base(base); | 
|---|
| 1194 | 1186 |   | 
|---|
| 1195 |  | -	debug_activate(timer, timer->expires);  | 
|---|
 | 1187 | +	debug_timer_activate(timer);  | 
|---|
| 1196 | 1188 |  	internal_add_timer(base, timer); | 
|---|
| 1197 | 1189 |  	raw_spin_unlock_irqrestore(&base->lock, flags); | 
|---|
| 1198 | 1190 |  } | 
|---|
| .. | .. | 
|---|
| 1227 | 1219 |  } | 
|---|
| 1228 | 1220 |  EXPORT_SYMBOL(del_timer); | 
|---|
| 1229 | 1221 |   | 
|---|
| 1230 |  | -static int __try_to_del_timer_sync(struct timer_list *timer,  | 
|---|
| 1231 |  | -				   struct timer_base **basep)  | 
|---|
| 1232 |  | -{  | 
|---|
| 1233 |  | -	struct timer_base *base;  | 
|---|
| 1234 |  | -	unsigned long flags;  | 
|---|
| 1235 |  | -	int ret = -1;  | 
|---|
| 1236 |  | -  | 
|---|
| 1237 |  | -	debug_assert_init(timer);  | 
|---|
| 1238 |  | -  | 
|---|
| 1239 |  | -	*basep = base = lock_timer_base(timer, &flags);  | 
|---|
| 1240 |  | -  | 
|---|
| 1241 |  | -	if (base->running_timer != timer)  | 
|---|
| 1242 |  | -		ret = detach_if_pending(timer, base, true);  | 
|---|
| 1243 |  | -  | 
|---|
| 1244 |  | -	raw_spin_unlock_irqrestore(&base->lock, flags);  | 
|---|
| 1245 |  | -  | 
|---|
| 1246 |  | -	return ret;  | 
|---|
| 1247 |  | -}  | 
|---|
| 1248 |  | -  | 
|---|
| 1249 | 1222 |  /** | 
|---|
| 1250 | 1223 |   * try_to_del_timer_sync - Try to deactivate a timer | 
|---|
| 1251 | 1224 |   * @timer: timer to delete | 
|---|
| .. | .. | 
|---|
| 1256 | 1229 |  int try_to_del_timer_sync(struct timer_list *timer) | 
|---|
| 1257 | 1230 |  { | 
|---|
| 1258 | 1231 |  	struct timer_base *base; | 
|---|
 | 1232 | +	unsigned long flags;  | 
|---|
 | 1233 | +	int ret = -1;  | 
|---|
| 1259 | 1234 |   | 
|---|
| 1260 |  | -	return __try_to_del_timer_sync(timer, &base);  | 
|---|
 | 1235 | +	debug_assert_init(timer);  | 
|---|
 | 1236 | +  | 
|---|
 | 1237 | +	base = lock_timer_base(timer, &flags);  | 
|---|
 | 1238 | +  | 
|---|
 | 1239 | +	if (base->running_timer != timer)  | 
|---|
 | 1240 | +		ret = detach_if_pending(timer, base, true);  | 
|---|
 | 1241 | +  | 
|---|
 | 1242 | +	raw_spin_unlock_irqrestore(&base->lock, flags);  | 
|---|
 | 1243 | +  | 
|---|
 | 1244 | +	return ret;  | 
|---|
| 1261 | 1245 |  } | 
|---|
| 1262 | 1246 |  EXPORT_SYMBOL(try_to_del_timer_sync); | 
|---|
| 1263 | 1247 |   | 
|---|
| 1264 |  | -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)  | 
|---|
| 1265 |  | -static int __del_timer_sync(struct timer_list *timer)  | 
|---|
 | 1248 | +#ifdef CONFIG_PREEMPT_RT  | 
|---|
 | 1249 | +static __init void timer_base_init_expiry_lock(struct timer_base *base)  | 
|---|
| 1266 | 1250 |  { | 
|---|
| 1267 |  | -	struct timer_base *base;  | 
|---|
| 1268 |  | -	int ret;  | 
|---|
 | 1251 | +	spin_lock_init(&base->expiry_lock);  | 
|---|
 | 1252 | +}  | 
|---|
| 1269 | 1253 |   | 
|---|
| 1270 |  | -	for (;;) {  | 
|---|
| 1271 |  | -		ret = __try_to_del_timer_sync(timer, &base);  | 
|---|
| 1272 |  | -		if (ret >= 0)  | 
|---|
| 1273 |  | -			return ret;  | 
|---|
 | 1254 | +static inline void timer_base_lock_expiry(struct timer_base *base)  | 
|---|
 | 1255 | +{  | 
|---|
 | 1256 | +	spin_lock(&base->expiry_lock);  | 
|---|
 | 1257 | +}  | 
|---|
| 1274 | 1258 |   | 
|---|
| 1275 |  | -		/*  | 
|---|
| 1276 |  | -		 * When accessing the lock, timers of base are no longer expired  | 
|---|
| 1277 |  | -		 * and so timer is no longer running.  | 
|---|
| 1278 |  | -		 */  | 
|---|
| 1279 |  | -		spin_lock(&base->expiry_lock);  | 
|---|
 | 1259 | +static inline void timer_base_unlock_expiry(struct timer_base *base)  | 
|---|
 | 1260 | +{  | 
|---|
 | 1261 | +	spin_unlock(&base->expiry_lock);  | 
|---|
 | 1262 | +}  | 
|---|
 | 1263 | +  | 
|---|
 | 1264 | +/*  | 
|---|
 | 1265 | + * The counterpart to del_timer_wait_running().  | 
|---|
 | 1266 | + *  | 
|---|
 | 1267 | + * If there is a waiter for base->expiry_lock, then it was waiting for the  | 
|---|
 | 1268 | + * timer callback to finish. Drop expiry_lock and reaquire it. That allows  | 
|---|
 | 1269 | + * the waiter to acquire the lock and make progress.  | 
|---|
 | 1270 | + */  | 
|---|
 | 1271 | +static void timer_sync_wait_running(struct timer_base *base)  | 
|---|
 | 1272 | +{  | 
|---|
 | 1273 | +	if (atomic_read(&base->timer_waiters)) {  | 
|---|
 | 1274 | +		raw_spin_unlock_irq(&base->lock);  | 
|---|
| 1280 | 1275 |  		spin_unlock(&base->expiry_lock); | 
|---|
 | 1276 | +		spin_lock(&base->expiry_lock);  | 
|---|
 | 1277 | +		raw_spin_lock_irq(&base->lock);  | 
|---|
| 1281 | 1278 |  	} | 
|---|
| 1282 | 1279 |  } | 
|---|
| 1283 | 1280 |   | 
|---|
 | 1281 | +/*  | 
|---|
 | 1282 | + * This function is called on PREEMPT_RT kernels when the fast path  | 
|---|
 | 1283 | + * deletion of a timer failed because the timer callback function was  | 
|---|
 | 1284 | + * running.  | 
|---|
 | 1285 | + *  | 
|---|
 | 1286 | + * This prevents priority inversion, if the softirq thread on a remote CPU  | 
|---|
 | 1287 | + * got preempted, and it prevents a life lock when the task which tries to  | 
|---|
 | 1288 | + * delete a timer preempted the softirq thread running the timer callback  | 
|---|
 | 1289 | + * function.  | 
|---|
 | 1290 | + */  | 
|---|
 | 1291 | +static void del_timer_wait_running(struct timer_list *timer)  | 
|---|
 | 1292 | +{  | 
|---|
 | 1293 | +	u32 tf;  | 
|---|
 | 1294 | +  | 
|---|
 | 1295 | +	tf = READ_ONCE(timer->flags);  | 
|---|
 | 1296 | +	if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) {  | 
|---|
 | 1297 | +		struct timer_base *base = get_timer_base(tf);  | 
|---|
 | 1298 | +  | 
|---|
 | 1299 | +		/*  | 
|---|
 | 1300 | +		 * Mark the base as contended and grab the expiry lock,  | 
|---|
 | 1301 | +		 * which is held by the softirq across the timer  | 
|---|
 | 1302 | +		 * callback. Drop the lock immediately so the softirq can  | 
|---|
 | 1303 | +		 * expire the next timer. In theory the timer could already  | 
|---|
 | 1304 | +		 * be running again, but that's more than unlikely and just  | 
|---|
 | 1305 | +		 * causes another wait loop.  | 
|---|
 | 1306 | +		 */  | 
|---|
 | 1307 | +		atomic_inc(&base->timer_waiters);  | 
|---|
 | 1308 | +		spin_lock_bh(&base->expiry_lock);  | 
|---|
 | 1309 | +		atomic_dec(&base->timer_waiters);  | 
|---|
 | 1310 | +		spin_unlock_bh(&base->expiry_lock);  | 
|---|
 | 1311 | +	}  | 
|---|
 | 1312 | +}  | 
|---|
 | 1313 | +#else  | 
|---|
 | 1314 | +static inline void timer_base_init_expiry_lock(struct timer_base *base) { }  | 
|---|
 | 1315 | +static inline void timer_base_lock_expiry(struct timer_base *base) { }  | 
|---|
 | 1316 | +static inline void timer_base_unlock_expiry(struct timer_base *base) { }  | 
|---|
 | 1317 | +static inline void timer_sync_wait_running(struct timer_base *base) { }  | 
|---|
 | 1318 | +static inline void del_timer_wait_running(struct timer_list *timer) { }  | 
|---|
 | 1319 | +#endif  | 
|---|
 | 1320 | +  | 
|---|
 | 1321 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)  | 
|---|
| 1284 | 1322 |  /** | 
|---|
| 1285 | 1323 |   * del_timer_sync - deactivate a timer and wait for the handler to finish. | 
|---|
| 1286 | 1324 |   * @timer: the timer to be deactivated | 
|---|
| .. | .. | 
|---|
| 1319 | 1357 |   */ | 
|---|
| 1320 | 1358 |  int del_timer_sync(struct timer_list *timer) | 
|---|
| 1321 | 1359 |  { | 
|---|
 | 1360 | +	int ret;  | 
|---|
 | 1361 | +  | 
|---|
| 1322 | 1362 |  #ifdef CONFIG_LOCKDEP | 
|---|
| 1323 | 1363 |  	unsigned long flags; | 
|---|
| 1324 | 1364 |   | 
|---|
| .. | .. | 
|---|
| 1337 | 1377 |  	 */ | 
|---|
| 1338 | 1378 |  	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); | 
|---|
| 1339 | 1379 |   | 
|---|
| 1340 |  | -	return __del_timer_sync(timer);  | 
|---|
 | 1380 | +	/*  | 
|---|
 | 1381 | +	 * Must be able to sleep on PREEMPT_RT because of the slowpath in  | 
|---|
 | 1382 | +	 * del_timer_wait_running().  | 
|---|
 | 1383 | +	 */  | 
|---|
 | 1384 | +	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE))  | 
|---|
 | 1385 | +		lockdep_assert_preemption_enabled();  | 
|---|
 | 1386 | +  | 
|---|
 | 1387 | +	do {  | 
|---|
 | 1388 | +		ret = try_to_del_timer_sync(timer);  | 
|---|
 | 1389 | +  | 
|---|
 | 1390 | +		if (unlikely(ret < 0)) {  | 
|---|
 | 1391 | +			del_timer_wait_running(timer);  | 
|---|
 | 1392 | +			cpu_relax();  | 
|---|
 | 1393 | +		}  | 
|---|
 | 1394 | +	} while (ret < 0);  | 
|---|
 | 1395 | +  | 
|---|
 | 1396 | +	return ret;  | 
|---|
| 1341 | 1397 |  } | 
|---|
| 1342 | 1398 |  EXPORT_SYMBOL(del_timer_sync); | 
|---|
| 1343 | 1399 |  #endif | 
|---|
| 1344 | 1400 |   | 
|---|
| 1345 |  | -static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))  | 
|---|
 | 1401 | +static void call_timer_fn(struct timer_list *timer,  | 
|---|
 | 1402 | +			  void (*fn)(struct timer_list *),  | 
|---|
 | 1403 | +			  unsigned long baseclk)  | 
|---|
| 1346 | 1404 |  { | 
|---|
| 1347 | 1405 |  	int count = preempt_count(); | 
|---|
| 1348 | 1406 |   | 
|---|
| .. | .. | 
|---|
| 1365 | 1423 |  	 */ | 
|---|
| 1366 | 1424 |  	lock_map_acquire(&lockdep_map); | 
|---|
| 1367 | 1425 |   | 
|---|
| 1368 |  | -	trace_timer_expire_entry(timer);  | 
|---|
 | 1426 | +	trace_timer_expire_entry(timer, baseclk);  | 
|---|
| 1369 | 1427 |  	fn(timer); | 
|---|
| 1370 | 1428 |  	trace_timer_expire_exit(timer); | 
|---|
| 1371 | 1429 |   | 
|---|
| 1372 | 1430 |  	lock_map_release(&lockdep_map); | 
|---|
| 1373 | 1431 |   | 
|---|
| 1374 | 1432 |  	if (count != preempt_count()) { | 
|---|
| 1375 |  | -		WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",  | 
|---|
 | 1433 | +		WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",  | 
|---|
| 1376 | 1434 |  			  fn, count, preempt_count()); | 
|---|
| 1377 | 1435 |  		/* | 
|---|
| 1378 | 1436 |  		 * Restore the preempt count. That gives us a decent | 
|---|
| .. | .. | 
|---|
| 1386 | 1444 |   | 
|---|
| 1387 | 1445 |  static void expire_timers(struct timer_base *base, struct hlist_head *head) | 
|---|
| 1388 | 1446 |  { | 
|---|
 | 1447 | +	/*  | 
|---|
 | 1448 | +	 * This value is required only for tracing. base->clk was  | 
|---|
 | 1449 | +	 * incremented directly before expire_timers was called. But expiry  | 
|---|
 | 1450 | +	 * is related to the old base->clk value.  | 
|---|
 | 1451 | +	 */  | 
|---|
 | 1452 | +	unsigned long baseclk = base->clk - 1;  | 
|---|
 | 1453 | +  | 
|---|
| 1389 | 1454 |  	while (!hlist_empty(head)) { | 
|---|
| 1390 | 1455 |  		struct timer_list *timer; | 
|---|
| 1391 | 1456 |  		void (*fn)(struct timer_list *); | 
|---|
| .. | .. | 
|---|
| 1399 | 1464 |   | 
|---|
| 1400 | 1465 |  		if (timer->flags & TIMER_IRQSAFE) { | 
|---|
| 1401 | 1466 |  			raw_spin_unlock(&base->lock); | 
|---|
| 1402 |  | -			call_timer_fn(timer, fn);  | 
|---|
| 1403 |  | -			base->running_timer = NULL;  | 
|---|
| 1404 |  | -			spin_unlock(&base->expiry_lock);  | 
|---|
| 1405 |  | -			spin_lock(&base->expiry_lock);  | 
|---|
 | 1467 | +			call_timer_fn(timer, fn, baseclk);  | 
|---|
| 1406 | 1468 |  			raw_spin_lock(&base->lock); | 
|---|
 | 1469 | +			base->running_timer = NULL;  | 
|---|
| 1407 | 1470 |  		} else { | 
|---|
| 1408 | 1471 |  			raw_spin_unlock_irq(&base->lock); | 
|---|
| 1409 |  | -			call_timer_fn(timer, fn);  | 
|---|
| 1410 |  | -			base->running_timer = NULL;  | 
|---|
| 1411 |  | -			spin_unlock(&base->expiry_lock);  | 
|---|
| 1412 |  | -			spin_lock(&base->expiry_lock);  | 
|---|
 | 1472 | +			call_timer_fn(timer, fn, baseclk);  | 
|---|
| 1413 | 1473 |  			raw_spin_lock_irq(&base->lock); | 
|---|
 | 1474 | +			base->running_timer = NULL;  | 
|---|
 | 1475 | +			timer_sync_wait_running(base);  | 
|---|
| 1414 | 1476 |  		} | 
|---|
| 1415 | 1477 |  	} | 
|---|
| 1416 | 1478 |  } | 
|---|
| 1417 | 1479 |   | 
|---|
| 1418 |  | -static int __collect_expired_timers(struct timer_base *base,  | 
|---|
| 1419 |  | -				    struct hlist_head *heads)  | 
|---|
 | 1480 | +static int collect_expired_timers(struct timer_base *base,  | 
|---|
 | 1481 | +				  struct hlist_head *heads)  | 
|---|
| 1420 | 1482 |  { | 
|---|
| 1421 |  | -	unsigned long clk = base->clk;  | 
|---|
 | 1483 | +	unsigned long clk = base->clk = base->next_expiry;  | 
|---|
| 1422 | 1484 |  	struct hlist_head *vec; | 
|---|
| 1423 | 1485 |  	int i, levels = 0; | 
|---|
| 1424 | 1486 |  	unsigned int idx; | 
|---|
| .. | .. | 
|---|
| 1440 | 1502 |  	return levels; | 
|---|
| 1441 | 1503 |  } | 
|---|
| 1442 | 1504 |   | 
|---|
| 1443 |  | -#ifdef CONFIG_NO_HZ_COMMON  | 
|---|
| 1444 | 1505 |  /* | 
|---|
| 1445 | 1506 |   * Find the next pending bucket of a level. Search from level start (@offset) | 
|---|
| 1446 | 1507 |   * + @clk upwards and if nothing there, search from start of the level | 
|---|
| .. | .. | 
|---|
| 1473 | 1534 |  	clk = base->clk; | 
|---|
| 1474 | 1535 |  	for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { | 
|---|
| 1475 | 1536 |  		int pos = next_pending_bucket(base, offset, clk & LVL_MASK); | 
|---|
 | 1537 | +		unsigned long lvl_clk = clk & LVL_CLK_MASK;  | 
|---|
| 1476 | 1538 |   | 
|---|
| 1477 | 1539 |  		if (pos >= 0) { | 
|---|
| 1478 | 1540 |  			unsigned long tmp = clk + (unsigned long) pos; | 
|---|
| .. | .. | 
|---|
| 1480 | 1542 |  			tmp <<= LVL_SHIFT(lvl); | 
|---|
| 1481 | 1543 |  			if (time_before(tmp, next)) | 
|---|
| 1482 | 1544 |  				next = tmp; | 
|---|
 | 1545 | +  | 
|---|
 | 1546 | +			/*  | 
|---|
 | 1547 | +			 * If the next expiration happens before we reach  | 
|---|
 | 1548 | +			 * the next level, no need to check further.  | 
|---|
 | 1549 | +			 */  | 
|---|
 | 1550 | +			if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))  | 
|---|
 | 1551 | +				break;  | 
|---|
| 1483 | 1552 |  		} | 
|---|
| 1484 | 1553 |  		/* | 
|---|
| 1485 | 1554 |  		 * Clock for the next level. If the current level clock lower | 
|---|
| .. | .. | 
|---|
| 1517 | 1586 |  		 * So the simple check whether the lower bits of the current | 
|---|
| 1518 | 1587 |  		 * level are 0 or not is sufficient for all cases. | 
|---|
| 1519 | 1588 |  		 */ | 
|---|
| 1520 |  | -		adj = clk & LVL_CLK_MASK ? 1 : 0;  | 
|---|
 | 1589 | +		adj = lvl_clk ? 1 : 0;  | 
|---|
| 1521 | 1590 |  		clk >>= LVL_CLK_SHIFT; | 
|---|
| 1522 | 1591 |  		clk += adj; | 
|---|
| 1523 | 1592 |  	} | 
|---|
 | 1593 | +  | 
|---|
 | 1594 | +	base->next_expiry_recalc = false;  | 
|---|
 | 1595 | +	base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);  | 
|---|
 | 1596 | +  | 
|---|
| 1524 | 1597 |  	return next; | 
|---|
| 1525 | 1598 |  } | 
|---|
| 1526 | 1599 |   | 
|---|
 | 1600 | +#ifdef CONFIG_NO_HZ_COMMON  | 
|---|
| 1527 | 1601 |  /* | 
|---|
| 1528 | 1602 |   * Check, if the next hrtimer event is before the next timer wheel | 
|---|
| 1529 | 1603 |   * event: | 
|---|
| .. | .. | 
|---|
| 1570 | 1644 |  	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | 
|---|
| 1571 | 1645 |  	u64 expires = KTIME_MAX; | 
|---|
| 1572 | 1646 |  	unsigned long nextevt; | 
|---|
| 1573 |  | -	bool is_max_delta;  | 
|---|
| 1574 | 1647 |   | 
|---|
| 1575 | 1648 |  	/* | 
|---|
| 1576 | 1649 |  	 * Pretend that there is no timer pending if the cpu is offline. | 
|---|
| .. | .. | 
|---|
| 1580 | 1653 |  		return expires; | 
|---|
| 1581 | 1654 |   | 
|---|
| 1582 | 1655 |  	raw_spin_lock(&base->lock); | 
|---|
| 1583 |  | -	nextevt = __next_timer_interrupt(base);  | 
|---|
| 1584 |  | -	is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);  | 
|---|
| 1585 |  | -	base->next_expiry = nextevt;  | 
|---|
 | 1656 | +	if (base->next_expiry_recalc)  | 
|---|
 | 1657 | +		base->next_expiry = __next_timer_interrupt(base);  | 
|---|
 | 1658 | +	nextevt = base->next_expiry;  | 
|---|
 | 1659 | +  | 
|---|
| 1586 | 1660 |  	/* | 
|---|
| 1587 | 1661 |  	 * We have a fresh next event. Check whether we can forward the | 
|---|
| 1588 | 1662 |  	 * base. We can only do that when @basej is past base->clk | 
|---|
| .. | .. | 
|---|
| 1599 | 1673 |  		expires = basem; | 
|---|
| 1600 | 1674 |  		base->is_idle = false; | 
|---|
| 1601 | 1675 |  	} else { | 
|---|
| 1602 |  | -		if (!is_max_delta)  | 
|---|
 | 1676 | +		if (base->timers_pending)  | 
|---|
| 1603 | 1677 |  			expires = basem + (u64)(nextevt - basej) * TICK_NSEC; | 
|---|
| 1604 | 1678 |  		/* | 
|---|
| 1605 | 1679 |  		 * If we expect to sleep more than a tick, mark the base idle. | 
|---|
| .. | .. | 
|---|
| 1608 | 1682 |  		 * logic is only maintained for the BASE_STD base, deferrable | 
|---|
| 1609 | 1683 |  		 * timers may still see large granularity skew (by design). | 
|---|
| 1610 | 1684 |  		 */ | 
|---|
| 1611 |  | -		if ((expires - basem) > TICK_NSEC) {  | 
|---|
| 1612 |  | -			base->must_forward_clk = true;  | 
|---|
 | 1685 | +		if ((expires - basem) > TICK_NSEC)  | 
|---|
| 1613 | 1686 |  			base->is_idle = true; | 
|---|
| 1614 |  | -		}  | 
|---|
| 1615 | 1687 |  	} | 
|---|
| 1616 | 1688 |  	raw_spin_unlock(&base->lock); | 
|---|
| 1617 | 1689 |   | 
|---|
| .. | .. | 
|---|
| 1635 | 1707 |  	 */ | 
|---|
| 1636 | 1708 |  	base->is_idle = false; | 
|---|
| 1637 | 1709 |  } | 
|---|
| 1638 |  | -  | 
|---|
| 1639 |  | -static int collect_expired_timers(struct timer_base *base,  | 
|---|
| 1640 |  | -				  struct hlist_head *heads)  | 
|---|
| 1641 |  | -{  | 
|---|
| 1642 |  | -	unsigned long now = READ_ONCE(jiffies);  | 
|---|
| 1643 |  | -  | 
|---|
| 1644 |  | -	/*  | 
|---|
| 1645 |  | -	 * NOHZ optimization. After a long idle sleep we need to forward the  | 
|---|
| 1646 |  | -	 * base to current jiffies. Avoid a loop by searching the bitfield for  | 
|---|
| 1647 |  | -	 * the next expiring timer.  | 
|---|
| 1648 |  | -	 */  | 
|---|
| 1649 |  | -	if ((long)(now - base->clk) > 2) {  | 
|---|
| 1650 |  | -		unsigned long next = __next_timer_interrupt(base);  | 
|---|
| 1651 |  | -  | 
|---|
| 1652 |  | -		/*  | 
|---|
| 1653 |  | -		 * If the next timer is ahead of time forward to current  | 
|---|
| 1654 |  | -		 * jiffies, otherwise forward to the next expiry time:  | 
|---|
| 1655 |  | -		 */  | 
|---|
| 1656 |  | -		if (time_after(next, now)) {  | 
|---|
| 1657 |  | -			/*  | 
|---|
| 1658 |  | -			 * The call site will increment base->clk and then  | 
|---|
| 1659 |  | -			 * terminate the expiry loop immediately.  | 
|---|
| 1660 |  | -			 */  | 
|---|
| 1661 |  | -			base->clk = now;  | 
|---|
| 1662 |  | -			return 0;  | 
|---|
| 1663 |  | -		}  | 
|---|
| 1664 |  | -		base->clk = next;  | 
|---|
| 1665 |  | -	}  | 
|---|
| 1666 |  | -	return __collect_expired_timers(base, heads);  | 
|---|
| 1667 |  | -}  | 
|---|
| 1668 |  | -#else  | 
|---|
| 1669 |  | -static inline int collect_expired_timers(struct timer_base *base,  | 
|---|
| 1670 |  | -					 struct hlist_head *heads)  | 
|---|
| 1671 |  | -{  | 
|---|
| 1672 |  | -	return __collect_expired_timers(base, heads);  | 
|---|
| 1673 |  | -}  | 
|---|
| 1674 | 1710 |  #endif | 
|---|
| 1675 | 1711 |   | 
|---|
| 1676 | 1712 |  /* | 
|---|
| .. | .. | 
|---|
| 1681 | 1717 |  { | 
|---|
| 1682 | 1718 |  	struct task_struct *p = current; | 
|---|
| 1683 | 1719 |   | 
|---|
 | 1720 | +	PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);  | 
|---|
 | 1721 | +  | 
|---|
| 1684 | 1722 |  	/* Note: this timer irq context must be accounted for as well. */ | 
|---|
| 1685 | 1723 |  	account_process_tick(p, user_tick); | 
|---|
| 1686 | 1724 |  	run_local_timers(); | 
|---|
| 1687 |  | -	rcu_check_callbacks(user_tick);  | 
|---|
 | 1725 | +	rcu_sched_clock_irq(user_tick);  | 
|---|
| 1688 | 1726 |  #ifdef CONFIG_IRQ_WORK | 
|---|
| 1689 | 1727 |  	if (in_irq()) | 
|---|
| 1690 | 1728 |  		irq_work_tick(); | 
|---|
| 1691 | 1729 |  #endif | 
|---|
| 1692 | 1730 |  	scheduler_tick(); | 
|---|
| 1693 | 1731 |  	if (IS_ENABLED(CONFIG_POSIX_TIMERS)) | 
|---|
| 1694 |  | -		run_posix_cpu_timers(p);  | 
|---|
 | 1732 | +		run_posix_cpu_timers();  | 
|---|
| 1695 | 1733 |  } | 
|---|
| 1696 | 1734 |   | 
|---|
| 1697 | 1735 |  /** | 
|---|
| .. | .. | 
|---|
| 1703 | 1741 |  	struct hlist_head heads[LVL_DEPTH]; | 
|---|
| 1704 | 1742 |  	int levels; | 
|---|
| 1705 | 1743 |   | 
|---|
| 1706 |  | -	if (!time_after_eq(jiffies, base->clk))  | 
|---|
 | 1744 | +	if (time_before(jiffies, base->next_expiry))  | 
|---|
| 1707 | 1745 |  		return; | 
|---|
| 1708 | 1746 |   | 
|---|
| 1709 |  | -	spin_lock(&base->expiry_lock);  | 
|---|
 | 1747 | +	timer_base_lock_expiry(base);  | 
|---|
| 1710 | 1748 |  	raw_spin_lock_irq(&base->lock); | 
|---|
| 1711 | 1749 |   | 
|---|
| 1712 |  | -	/*  | 
|---|
| 1713 |  | -	 * timer_base::must_forward_clk must be cleared before running  | 
|---|
| 1714 |  | -	 * timers so that any timer functions that call mod_timer() will  | 
|---|
| 1715 |  | -	 * not try to forward the base. Idle tracking / clock forwarding  | 
|---|
| 1716 |  | -	 * logic is only used with BASE_STD timers.  | 
|---|
| 1717 |  | -	 *  | 
|---|
| 1718 |  | -	 * The must_forward_clk flag is cleared unconditionally also for  | 
|---|
| 1719 |  | -	 * the deferrable base. The deferrable base is not affected by idle  | 
|---|
| 1720 |  | -	 * tracking and never forwarded, so clearing the flag is a NOOP.  | 
|---|
| 1721 |  | -	 *  | 
|---|
| 1722 |  | -	 * The fact that the deferrable base is never forwarded can cause  | 
|---|
| 1723 |  | -	 * large variations in granularity for deferrable timers, but they  | 
|---|
| 1724 |  | -	 * can be deferred for long periods due to idle anyway.  | 
|---|
| 1725 |  | -	 */  | 
|---|
| 1726 |  | -	base->must_forward_clk = false;  | 
|---|
| 1727 |  | -  | 
|---|
| 1728 |  | -	while (time_after_eq(jiffies, base->clk)) {  | 
|---|
| 1729 |  | -  | 
|---|
 | 1750 | +	while (time_after_eq(jiffies, base->clk) &&  | 
|---|
 | 1751 | +	       time_after_eq(jiffies, base->next_expiry)) {  | 
|---|
| 1730 | 1752 |  		levels = collect_expired_timers(base, heads); | 
|---|
 | 1753 | +		/*  | 
|---|
 | 1754 | +		 * The two possible reasons for not finding any expired  | 
|---|
 | 1755 | +		 * timer at this clk are that all matching timers have been  | 
|---|
 | 1756 | +		 * dequeued or no timer has been queued since  | 
|---|
 | 1757 | +		 * base::next_expiry was set to base::clk +  | 
|---|
 | 1758 | +		 * NEXT_TIMER_MAX_DELTA.  | 
|---|
 | 1759 | +		 */  | 
|---|
 | 1760 | +		WARN_ON_ONCE(!levels && !base->next_expiry_recalc  | 
|---|
 | 1761 | +			     && base->timers_pending);  | 
|---|
| 1731 | 1762 |  		base->clk++; | 
|---|
 | 1763 | +		base->next_expiry = __next_timer_interrupt(base);  | 
|---|
| 1732 | 1764 |   | 
|---|
| 1733 | 1765 |  		while (levels--) | 
|---|
| 1734 | 1766 |  			expire_timers(base, heads + levels); | 
|---|
| 1735 | 1767 |  	} | 
|---|
| 1736 | 1768 |  	raw_spin_unlock_irq(&base->lock); | 
|---|
| 1737 |  | -	spin_unlock(&base->expiry_lock);  | 
|---|
 | 1769 | +	timer_base_unlock_expiry(base);  | 
|---|
| 1738 | 1770 |  } | 
|---|
| 1739 | 1771 |   | 
|---|
| 1740 | 1772 |  /* | 
|---|
| .. | .. | 
|---|
| 1743 | 1775 |  static __latent_entropy void run_timer_softirq(struct softirq_action *h) | 
|---|
| 1744 | 1776 |  { | 
|---|
| 1745 | 1777 |  	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | 
|---|
| 1746 |  | -  | 
|---|
| 1747 |  | -	irq_work_tick_soft();  | 
|---|
| 1748 | 1778 |   | 
|---|
| 1749 | 1779 |  	__run_timers(base); | 
|---|
| 1750 | 1780 |  	if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) | 
|---|
| .. | .. | 
|---|
| 1760 | 1790 |   | 
|---|
| 1761 | 1791 |  	hrtimer_run_queues(); | 
|---|
| 1762 | 1792 |  	/* Raise the softirq only if required. */ | 
|---|
| 1763 |  | -	if (time_before(jiffies, base->clk)) {  | 
|---|
 | 1793 | +	if (time_before(jiffies, base->next_expiry)) {  | 
|---|
| 1764 | 1794 |  		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) | 
|---|
| 1765 | 1795 |  			return; | 
|---|
| 1766 | 1796 |  		/* CPU is awake, so check the deferrable base. */ | 
|---|
| 1767 | 1797 |  		base++; | 
|---|
| 1768 |  | -		if (time_before(jiffies, base->clk))  | 
|---|
 | 1798 | +		if (time_before(jiffies, base->next_expiry))  | 
|---|
| 1769 | 1799 |  			return; | 
|---|
| 1770 | 1800 |  	} | 
|---|
| 1771 | 1801 |  	raise_softirq(TIMER_SOFTIRQ); | 
|---|
| .. | .. | 
|---|
| 1791 | 1821 |   * schedule_timeout - sleep until timeout | 
|---|
| 1792 | 1822 |   * @timeout: timeout value in jiffies | 
|---|
| 1793 | 1823 |   * | 
|---|
| 1794 |  | - * Make the current task sleep until @timeout jiffies have  | 
|---|
| 1795 |  | - * elapsed. The routine will return immediately unless  | 
|---|
| 1796 |  | - * the current task state has been set (see set_current_state()).  | 
|---|
 | 1824 | + * Make the current task sleep until @timeout jiffies have elapsed.  | 
|---|
 | 1825 | + * The function behavior depends on the current task state  | 
|---|
 | 1826 | + * (see also set_current_state() description):  | 
|---|
| 1797 | 1827 |   * | 
|---|
| 1798 |  | - * You can set the task state as follows -  | 
|---|
 | 1828 | + * %TASK_RUNNING - the scheduler is called, but the task does not sleep  | 
|---|
 | 1829 | + * at all. That happens because sched_submit_work() does nothing for  | 
|---|
 | 1830 | + * tasks in %TASK_RUNNING state.  | 
|---|
| 1799 | 1831 |   * | 
|---|
| 1800 | 1832 |   * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to | 
|---|
| 1801 | 1833 |   * pass before the routine returns unless the current task is explicitly | 
|---|
| 1802 |  | - * woken up, (e.g. by wake_up_process())".  | 
|---|
 | 1834 | + * woken up, (e.g. by wake_up_process()).  | 
|---|
| 1803 | 1835 |   * | 
|---|
| 1804 | 1836 |   * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | 
|---|
| 1805 | 1837 |   * delivered to the current task or the current task is explicitly woken | 
|---|
| 1806 | 1838 |   * up. | 
|---|
| 1807 | 1839 |   * | 
|---|
| 1808 |  | - * The current task state is guaranteed to be TASK_RUNNING when this  | 
|---|
 | 1840 | + * The current task state is guaranteed to be %TASK_RUNNING when this  | 
|---|
| 1809 | 1841 |   * routine returns. | 
|---|
| 1810 | 1842 |   * | 
|---|
| 1811 | 1843 |   * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule | 
|---|
| .. | .. | 
|---|
| 1813 | 1845 |   * value will be %MAX_SCHEDULE_TIMEOUT. | 
|---|
| 1814 | 1846 |   * | 
|---|
| 1815 | 1847 |   * Returns 0 when the timer has expired otherwise the remaining time in | 
|---|
| 1816 |  | - * jiffies will be returned.  In all cases the return value is guaranteed  | 
|---|
 | 1848 | + * jiffies will be returned. In all cases the return value is guaranteed  | 
|---|
| 1817 | 1849 |   * to be non-negative. | 
|---|
| 1818 | 1850 |   */ | 
|---|
| 1819 | 1851 |  signed long __sched schedule_timeout(signed long timeout) | 
|---|
| .. | .. | 
|---|
| 1854 | 1886 |   | 
|---|
| 1855 | 1887 |  	timer.task = current; | 
|---|
| 1856 | 1888 |  	timer_setup_on_stack(&timer.timer, process_timeout, 0); | 
|---|
| 1857 |  | -	__mod_timer(&timer.timer, expire, 0);  | 
|---|
 | 1889 | +	__mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);  | 
|---|
| 1858 | 1890 |  	schedule(); | 
|---|
| 1859 | 1891 |  	del_singleshot_timer_sync(&timer.timer); | 
|---|
| 1860 | 1892 |   | 
|---|
| .. | .. | 
|---|
| 1927 | 1959 |  		base = per_cpu_ptr(&timer_bases[b], cpu); | 
|---|
| 1928 | 1960 |  		base->clk = jiffies; | 
|---|
| 1929 | 1961 |  		base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; | 
|---|
 | 1962 | +		base->timers_pending = false;  | 
|---|
| 1930 | 1963 |  		base->is_idle = false; | 
|---|
| 1931 |  | -		base->must_forward_clk = true;  | 
|---|
| 1932 | 1964 |  	} | 
|---|
| 1933 | 1965 |  	return 0; | 
|---|
| 1934 | 1966 |  } | 
|---|
| .. | .. | 
|---|
| 1981 | 2013 |  		base->cpu = cpu; | 
|---|
| 1982 | 2014 |  		raw_spin_lock_init(&base->lock); | 
|---|
| 1983 | 2015 |  		base->clk = jiffies; | 
|---|
| 1984 |  | -		spin_lock_init(&base->expiry_lock);  | 
|---|
 | 2016 | +		base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;  | 
|---|
 | 2017 | +		timer_base_init_expiry_lock(base);  | 
|---|
| 1985 | 2018 |  	} | 
|---|
| 1986 | 2019 |  } | 
|---|
| 1987 | 2020 |   | 
|---|
| .. | .. | 
|---|
| 1996 | 2029 |  void __init init_timers(void) | 
|---|
| 1997 | 2030 |  { | 
|---|
| 1998 | 2031 |  	init_timer_cpus(); | 
|---|
 | 2032 | +	posix_cputimers_init_work();  | 
|---|
| 1999 | 2033 |  	open_softirq(TIMER_SOFTIRQ, run_timer_softirq); | 
|---|
| 2000 | 2034 |  } | 
|---|
| 2001 | 2035 |   | 
|---|
| .. | .. | 
|---|
| 2029 | 2063 |  EXPORT_SYMBOL(msleep_interruptible); | 
|---|
| 2030 | 2064 |   | 
|---|
| 2031 | 2065 |  /** | 
|---|
 | 2066 | + * usleep_range_state - Sleep for an approximate time in a given state  | 
|---|
 | 2067 | + * @min:	Minimum time in usecs to sleep  | 
|---|
 | 2068 | + * @max:	Maximum time in usecs to sleep  | 
|---|
 | 2069 | + * @state:	State of the current task that will be while sleeping  | 
|---|
 | 2070 | + *  | 
|---|
 | 2071 | + * In non-atomic context where the exact wakeup time is flexible, use  | 
|---|
 | 2072 | + * usleep_range_state() instead of udelay().  The sleep improves responsiveness  | 
|---|
 | 2073 | + * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces  | 
|---|
 | 2074 | + * power usage by allowing hrtimers to take advantage of an already-  | 
|---|
 | 2075 | + * scheduled interrupt instead of scheduling a new one just for this sleep.  | 
|---|
 | 2076 | + */  | 
|---|
 | 2077 | +void __sched usleep_range_state(unsigned long min, unsigned long max,  | 
|---|
 | 2078 | +				unsigned int state)  | 
|---|
 | 2079 | +{  | 
|---|
 | 2080 | +	ktime_t exp = ktime_add_us(ktime_get(), min);  | 
|---|
 | 2081 | +	u64 delta = (u64)(max - min) * NSEC_PER_USEC;  | 
|---|
 | 2082 | +  | 
|---|
 | 2083 | +	for (;;) {  | 
|---|
 | 2084 | +		__set_current_state(state);  | 
|---|
 | 2085 | +		/* Do not return before the requested sleep time has elapsed */  | 
|---|
 | 2086 | +		if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))  | 
|---|
 | 2087 | +			break;  | 
|---|
 | 2088 | +	}  | 
|---|
 | 2089 | +}  | 
|---|
 | 2090 | +  | 
|---|
 | 2091 | +/**  | 
|---|
| 2032 | 2092 |   * usleep_range - Sleep for an approximate time | 
|---|
| 2033 | 2093 |   * @min: Minimum time in usecs to sleep | 
|---|
| 2034 | 2094 |   * @max: Maximum time in usecs to sleep | 
|---|
| .. | .. | 
|---|
| 2041 | 2101 |   */ | 
|---|
| 2042 | 2102 |  void __sched usleep_range(unsigned long min, unsigned long max) | 
|---|
| 2043 | 2103 |  { | 
|---|
| 2044 |  | -	ktime_t exp = ktime_add_us(ktime_get(), min);  | 
|---|
| 2045 |  | -	u64 delta = (u64)(max - min) * NSEC_PER_USEC;  | 
|---|
| 2046 |  | -  | 
|---|
| 2047 |  | -	for (;;) {  | 
|---|
| 2048 |  | -		__set_current_state(TASK_UNINTERRUPTIBLE);  | 
|---|
| 2049 |  | -		/* Do not return before the requested sleep time has elapsed */  | 
|---|
| 2050 |  | -		if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))  | 
|---|
| 2051 |  | -			break;  | 
|---|
| 2052 |  | -	}  | 
|---|
 | 2104 | +	usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);  | 
|---|
| 2053 | 2105 |  } | 
|---|
| 2054 | 2106 |  EXPORT_SYMBOL(usleep_range); | 
|---|