| .. | .. | 
|---|
|  | 1 | +// SPDX-License-Identifier: GPL-2.0 | 
|---|
| 1 | 2 | /* | 
|---|
| 2 |  | - *  linux/kernel/time/tick-sched.c | 
|---|
| 3 |  | - * | 
|---|
| 4 | 3 | *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | 
|---|
| 5 | 4 | *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | 
|---|
| 6 | 5 | *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner | 
|---|
| .. | .. | 
|---|
| 8 | 7 | *  No idle tick implementation for low and high resolution timers | 
|---|
| 9 | 8 | * | 
|---|
| 10 | 9 | *  Started by: Thomas Gleixner and Ingo Molnar | 
|---|
| 11 |  | - * | 
|---|
| 12 |  | - *  Distribute under GPLv2. | 
|---|
| 13 | 10 | */ | 
|---|
| 14 | 11 | #include <linux/cpu.h> | 
|---|
| 15 | 12 | #include <linux/err.h> | 
|---|
| .. | .. | 
|---|
| 26 | 23 | #include <linux/module.h> | 
|---|
| 27 | 24 | #include <linux/irq_work.h> | 
|---|
| 28 | 25 | #include <linux/posix-timers.h> | 
|---|
| 29 |  | -#include <linux/timer.h> | 
|---|
| 30 | 26 | #include <linux/context_tracking.h> | 
|---|
| 31 | 27 | #include <linux/mm.h> | 
|---|
|  | 28 | +#include <trace/hooks/sched.h> | 
|---|
| 32 | 29 |  | 
|---|
| 33 | 30 | #include <asm/irq_regs.h> | 
|---|
| 34 | 31 |  | 
|---|
| .. | .. | 
|---|
| 134 | 131 | * into a long sleep. If two CPUs happen to assign themselves to | 
|---|
| 135 | 132 | * this duty, then the jiffies update is still serialized by | 
|---|
| 136 | 133 | * jiffies_lock. | 
|---|
|  | 134 | +	 * | 
|---|
|  | 135 | +	 * If nohz_full is enabled, this should not happen because the | 
|---|
|  | 136 | +	 * tick_do_timer_cpu never relinquishes. | 
|---|
| 137 | 137 | */ | 
|---|
| 138 |  | -	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) | 
|---|
| 139 |  | -	    && !tick_nohz_full_cpu(cpu)) | 
|---|
|  | 138 | +	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { | 
|---|
|  | 139 | +#ifdef CONFIG_NO_HZ_FULL | 
|---|
|  | 140 | +		WARN_ON_ONCE(tick_nohz_full_running); | 
|---|
|  | 141 | +#endif | 
|---|
| 140 | 142 | tick_do_timer_cpu = cpu; | 
|---|
|  | 143 | +	} | 
|---|
| 141 | 144 | #endif | 
|---|
| 142 | 145 |  | 
|---|
| 143 | 146 | /* Check, if the jiffies need an update */ | 
|---|
| 144 |  | -	if (tick_do_timer_cpu == cpu) | 
|---|
|  | 147 | +	if (tick_do_timer_cpu == cpu) { | 
|---|
| 145 | 148 | tick_do_update_jiffies64(now); | 
|---|
|  | 149 | +		trace_android_vh_jiffies_update(NULL); | 
|---|
|  | 150 | +	} | 
|---|
| 146 | 151 |  | 
|---|
| 147 | 152 | if (ts->inidle) | 
|---|
| 148 | 153 | ts->got_idle_tick = 1; | 
|---|
| .. | .. | 
|---|
| 179 | 184 | #ifdef CONFIG_NO_HZ_FULL | 
|---|
| 180 | 185 | cpumask_var_t tick_nohz_full_mask; | 
|---|
| 181 | 186 | bool tick_nohz_full_running; | 
|---|
|  | 187 | +EXPORT_SYMBOL_GPL(tick_nohz_full_running); | 
|---|
| 182 | 188 | static atomic_t tick_dep_mask; | 
|---|
| 183 | 189 |  | 
|---|
| 184 | 190 | static bool check_tick_dependency(atomic_t *dep) | 
|---|
| .. | .. | 
|---|
| 202 | 208 |  | 
|---|
| 203 | 209 | if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) { | 
|---|
| 204 | 210 | trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); | 
|---|
|  | 211 | +		return true; | 
|---|
|  | 212 | +	} | 
|---|
|  | 213 | + | 
|---|
|  | 214 | +	if (val & TICK_DEP_MASK_RCU) { | 
|---|
|  | 215 | +		trace_tick_stop(0, TICK_DEP_MASK_RCU); | 
|---|
| 205 | 216 | return true; | 
|---|
| 206 | 217 | } | 
|---|
| 207 | 218 |  | 
|---|
| .. | .. | 
|---|
| 237 | 248 |  | 
|---|
| 238 | 249 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | 
|---|
| 239 | 250 | .func = nohz_full_kick_func, | 
|---|
| 240 |  | -	.flags = IRQ_WORK_HARD_IRQ, | 
|---|
|  | 251 | +	.flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ), | 
|---|
| 241 | 252 | }; | 
|---|
| 242 | 253 |  | 
|---|
| 243 | 254 | /* | 
|---|
| .. | .. | 
|---|
| 332 | 343 | preempt_enable(); | 
|---|
| 333 | 344 | } | 
|---|
| 334 | 345 | } | 
|---|
|  | 346 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu); | 
|---|
| 335 | 347 |  | 
|---|
| 336 | 348 | void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) | 
|---|
| 337 | 349 | { | 
|---|
| .. | .. | 
|---|
| 339 | 351 |  | 
|---|
| 340 | 352 | atomic_andnot(BIT(bit), &ts->tick_dep_mask); | 
|---|
| 341 | 353 | } | 
|---|
|  | 354 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); | 
|---|
| 342 | 355 |  | 
|---|
| 343 | 356 | /* | 
|---|
| 344 |  | - * Set a per-task tick dependency. Posix CPU timers need this in order to elapse | 
|---|
| 345 |  | - * per task timers. | 
|---|
|  | 357 | + * Set a per-task tick dependency. RCU need this. Also posix CPU timers | 
|---|
|  | 358 | + * in order to elapse per task timers. | 
|---|
| 346 | 359 | */ | 
|---|
| 347 | 360 | void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) | 
|---|
| 348 | 361 | { | 
|---|
| 349 |  | -	/* | 
|---|
| 350 |  | -	 * We could optimize this with just kicking the target running the task | 
|---|
| 351 |  | -	 * if that noise matters for nohz full users. | 
|---|
| 352 |  | -	 */ | 
|---|
| 353 |  | -	tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit); | 
|---|
|  | 362 | +	if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) { | 
|---|
|  | 363 | +		if (tsk == current) { | 
|---|
|  | 364 | +			preempt_disable(); | 
|---|
|  | 365 | +			tick_nohz_full_kick(); | 
|---|
|  | 366 | +			preempt_enable(); | 
|---|
|  | 367 | +		} else { | 
|---|
|  | 368 | +			/* | 
|---|
|  | 369 | +			 * Some future tick_nohz_full_kick_task() | 
|---|
|  | 370 | +			 * should optimize this. | 
|---|
|  | 371 | +			 */ | 
|---|
|  | 372 | +			tick_nohz_full_kick_all(); | 
|---|
|  | 373 | +		} | 
|---|
|  | 374 | +	} | 
|---|
| 354 | 375 | } | 
|---|
|  | 376 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task); | 
|---|
| 355 | 377 |  | 
|---|
| 356 | 378 | void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) | 
|---|
| 357 | 379 | { | 
|---|
| 358 | 380 | atomic_andnot(BIT(bit), &tsk->tick_dep_mask); | 
|---|
| 359 | 381 | } | 
|---|
|  | 382 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task); | 
|---|
| 360 | 383 |  | 
|---|
| 361 | 384 | /* | 
|---|
| 362 | 385 | * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse | 
|---|
| .. | .. | 
|---|
| 409 | 432 | static int tick_nohz_cpu_down(unsigned int cpu) | 
|---|
| 410 | 433 | { | 
|---|
| 411 | 434 | /* | 
|---|
| 412 |  | -	 * The boot CPU handles housekeeping duty (unbound timers, | 
|---|
| 413 |  | -	 * workqueues, timekeeping, ...) on behalf of full dynticks | 
|---|
|  | 435 | +	 * The tick_do_timer_cpu CPU handles housekeeping duty (unbound | 
|---|
|  | 436 | +	 * timers, workqueues, timekeeping, ...) on behalf of full dynticks | 
|---|
| 414 | 437 | * CPUs. It must remain online when nohz full is enabled. | 
|---|
| 415 | 438 | */ | 
|---|
| 416 | 439 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) | 
|---|
| .. | .. | 
|---|
| 437 | 460 | return; | 
|---|
| 438 | 461 | } | 
|---|
| 439 | 462 |  | 
|---|
| 440 |  | -	cpu = smp_processor_id(); | 
|---|
|  | 463 | +	if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) && | 
|---|
|  | 464 | +			!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) { | 
|---|
|  | 465 | +		cpu = smp_processor_id(); | 
|---|
| 441 | 466 |  | 
|---|
| 442 |  | -	if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { | 
|---|
| 443 |  | -		pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", | 
|---|
| 444 |  | -			cpu); | 
|---|
| 445 |  | -		cpumask_clear_cpu(cpu, tick_nohz_full_mask); | 
|---|
|  | 467 | +		if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { | 
|---|
|  | 468 | +			pr_warn("NO_HZ: Clearing %d from nohz_full range " | 
|---|
|  | 469 | +				"for timekeeping\n", cpu); | 
|---|
|  | 470 | +			cpumask_clear_cpu(cpu, tick_nohz_full_mask); | 
|---|
|  | 471 | +		} | 
|---|
| 446 | 472 | } | 
|---|
| 447 | 473 |  | 
|---|
| 448 | 474 | for_each_cpu(cpu, tick_nohz_full_mask) | 
|---|
| .. | .. | 
|---|
| 639 | 665 | /* Forward the time to expire in the future */ | 
|---|
| 640 | 666 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 
|---|
| 641 | 667 |  | 
|---|
| 642 |  | -	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 
|---|
| 643 |  | -		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); | 
|---|
| 644 |  | -	else | 
|---|
|  | 668 | +	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 
|---|
|  | 669 | +		hrtimer_start_expires(&ts->sched_timer, | 
|---|
|  | 670 | +				      HRTIMER_MODE_ABS_PINNED_HARD); | 
|---|
|  | 671 | +	} else { | 
|---|
| 645 | 672 | tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); | 
|---|
|  | 673 | +	} | 
|---|
| 646 | 674 |  | 
|---|
| 647 | 675 | /* | 
|---|
| 648 | 676 | * Reset to make sure next tick stop doesn't get fooled by past | 
|---|
| .. | .. | 
|---|
| 659 | 687 | static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) | 
|---|
| 660 | 688 | { | 
|---|
| 661 | 689 | u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; | 
|---|
| 662 |  | -	unsigned long seq, basejiff; | 
|---|
|  | 690 | +	unsigned long basejiff; | 
|---|
|  | 691 | +	unsigned int seq; | 
|---|
| 663 | 692 |  | 
|---|
| 664 | 693 | /* Read jiffies and the time when jiffies were updated last */ | 
|---|
| 665 | 694 | do { | 
|---|
| .. | .. | 
|---|
| 786 | 815 | */ | 
|---|
| 787 | 816 | if (!ts->tick_stopped) { | 
|---|
| 788 | 817 | calc_load_nohz_start(); | 
|---|
| 789 |  | -		cpu_load_update_nohz_start(); | 
|---|
| 790 | 818 | quiet_vmstat(); | 
|---|
| 791 | 819 |  | 
|---|
| 792 | 820 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 
|---|
| .. | .. | 
|---|
| 807 | 835 | } | 
|---|
| 808 | 836 |  | 
|---|
| 809 | 837 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 
|---|
| 810 |  | -		hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); | 
|---|
|  | 838 | +		hrtimer_start(&ts->sched_timer, tick, | 
|---|
|  | 839 | +			      HRTIMER_MODE_ABS_PINNED_HARD); | 
|---|
| 811 | 840 | } else { | 
|---|
| 812 | 841 | hrtimer_set_expires(&ts->sched_timer, tick); | 
|---|
| 813 | 842 | tick_program_event(tick, 1); | 
|---|
| .. | .. | 
|---|
| 833 | 862 | { | 
|---|
| 834 | 863 | /* Update jiffies first */ | 
|---|
| 835 | 864 | tick_do_update_jiffies64(now); | 
|---|
| 836 |  | -	cpu_load_update_nohz_stop(); | 
|---|
| 837 | 865 |  | 
|---|
| 838 | 866 | /* | 
|---|
| 839 | 867 | * Clear the timer idle flag, so we avoid IPIs on remote queueing and | 
|---|
| .. | .. | 
|---|
| 896 | 924 | if (need_resched()) | 
|---|
| 897 | 925 | return false; | 
|---|
| 898 | 926 |  | 
|---|
| 899 |  | -	if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | 
|---|
| 900 |  | -		softirq_check_pending_idle(); | 
|---|
|  | 927 | +	if (unlikely(local_softirq_pending())) { | 
|---|
|  | 928 | +		static int ratelimit; | 
|---|
|  | 929 | + | 
|---|
|  | 930 | +		if (ratelimit < 10 && | 
|---|
|  | 931 | +		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | 
|---|
|  | 932 | +			pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", | 
|---|
|  | 933 | +				(unsigned int) local_softirq_pending()); | 
|---|
|  | 934 | +			ratelimit++; | 
|---|
|  | 935 | +		} | 
|---|
| 901 | 936 | return false; | 
|---|
| 902 | 937 | } | 
|---|
| 903 | 938 |  | 
|---|
| .. | .. | 
|---|
| 908 | 943 | */ | 
|---|
| 909 | 944 | if (tick_do_timer_cpu == cpu) | 
|---|
| 910 | 945 | return false; | 
|---|
| 911 |  | -		/* | 
|---|
| 912 |  | -		 * Boot safety: make sure the timekeeping duty has been | 
|---|
| 913 |  | -		 * assigned before entering dyntick-idle mode, | 
|---|
| 914 |  | -		 */ | 
|---|
| 915 |  | -		if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) | 
|---|
|  | 946 | + | 
|---|
|  | 947 | +		/* Should not happen for nohz-full */ | 
|---|
|  | 948 | +		if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | 
|---|
| 916 | 949 | return false; | 
|---|
| 917 | 950 | } | 
|---|
| 918 | 951 |  | 
|---|
| .. | .. | 
|---|
| 1030 | 1063 | } | 
|---|
| 1031 | 1064 |  | 
|---|
| 1032 | 1065 | /** | 
|---|
|  | 1066 | + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer | 
|---|
|  | 1067 | + * or the tick, whatever that expires first. Note that, if the tick has been | 
|---|
|  | 1068 | + * stopped, it returns the next hrtimer. | 
|---|
|  | 1069 | + * | 
|---|
|  | 1070 | + * Called from power state control code with interrupts disabled | 
|---|
|  | 1071 | + */ | 
|---|
|  | 1072 | +ktime_t tick_nohz_get_next_hrtimer(void) | 
|---|
|  | 1073 | +{ | 
|---|
|  | 1074 | +	return __this_cpu_read(tick_cpu_device.evtdev)->next_event; | 
|---|
|  | 1075 | +} | 
|---|
|  | 1076 | + | 
|---|
|  | 1077 | +/** | 
|---|
| 1033 | 1078 | * tick_nohz_get_sleep_length - return the expected length of the current sleep | 
|---|
| 1034 | 1079 | * @delta_next: duration until the next event if the tick cannot be stopped | 
|---|
| 1035 | 1080 | * | 
|---|
| .. | .. | 
|---|
| 1081 | 1126 |  | 
|---|
| 1082 | 1127 | return ts->idle_calls; | 
|---|
| 1083 | 1128 | } | 
|---|
|  | 1129 | +EXPORT_SYMBOL_GPL(tick_nohz_get_idle_calls_cpu); | 
|---|
| 1084 | 1130 |  | 
|---|
| 1085 | 1131 | /** | 
|---|
| 1086 | 1132 | * tick_nohz_get_idle_calls - return the current idle calls counter value | 
|---|
| .. | .. | 
|---|
| 1099 | 1145 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | 
|---|
| 1100 | 1146 | unsigned long ticks; | 
|---|
| 1101 | 1147 |  | 
|---|
| 1102 |  | -	if (vtime_accounting_cpu_enabled()) | 
|---|
|  | 1148 | +	if (vtime_accounting_enabled_this_cpu()) | 
|---|
| 1103 | 1149 | return; | 
|---|
| 1104 | 1150 | /* | 
|---|
| 1105 | 1151 | * We stopped the tick in idle. Update process times would miss the | 
|---|
| .. | .. | 
|---|
| 1213 | 1259 | * Recycle the hrtimer in ts, so we can share the | 
|---|
| 1214 | 1260 | * hrtimer_forward with the highres code. | 
|---|
| 1215 | 1261 | */ | 
|---|
| 1216 |  | -	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 
|---|
|  | 1262 | +	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); | 
|---|
| 1217 | 1263 | /* Get the next period */ | 
|---|
| 1218 | 1264 | next = tick_init_jiffy_update(); | 
|---|
| 1219 | 1265 |  | 
|---|
| .. | .. | 
|---|
| 1258 | 1304 | * High resolution timer specific code | 
|---|
| 1259 | 1305 | */ | 
|---|
| 1260 | 1306 | #ifdef CONFIG_HIGH_RES_TIMERS | 
|---|
| 1261 |  | - | 
|---|
| 1262 |  | -static void (*wake_callback)(void); | 
|---|
| 1263 |  | - | 
|---|
| 1264 |  | -void register_tick_sched_wakeup_callback(void (*cb)(void)) | 
|---|
| 1265 |  | -{ | 
|---|
| 1266 |  | -	if (!wake_callback) | 
|---|
| 1267 |  | -		wake_callback = cb; | 
|---|
| 1268 |  | -	else | 
|---|
| 1269 |  | -		pr_warn("tick-sched wake cb already exists; skipping.\n"); | 
|---|
| 1270 |  | -} | 
|---|
| 1271 |  | -EXPORT_SYMBOL_GPL(register_tick_sched_wakeup_callback); | 
|---|
| 1272 |  | - | 
|---|
| 1273 | 1307 | /* | 
|---|
| 1274 | 1308 | * We rearm the timer until we get disabled by the idle code. | 
|---|
| 1275 | 1309 | * Called with interrupts disabled. | 
|---|
| .. | .. | 
|---|
| 1287 | 1321 | * Do not call, when we are not in irq context and have | 
|---|
| 1288 | 1322 | * no valid regs pointer | 
|---|
| 1289 | 1323 | */ | 
|---|
| 1290 |  | -	if (regs) { | 
|---|
|  | 1324 | +	if (regs) | 
|---|
| 1291 | 1325 | tick_sched_handle(ts, regs); | 
|---|
| 1292 |  | -		if (wake_callback && tick_do_timer_cpu == smp_processor_id()) { | 
|---|
| 1293 |  | -			/* | 
|---|
| 1294 |  | -			 * wakeup user if needed | 
|---|
| 1295 |  | -			 */ | 
|---|
| 1296 |  | -			wake_callback(); | 
|---|
| 1297 |  | -		} | 
|---|
| 1298 |  | -	} | 
|---|
| 1299 | 1326 | else | 
|---|
| 1300 | 1327 | ts->next_tick = 0; | 
|---|
| 1301 | 1328 |  | 
|---|
| .. | .. | 
|---|
| 1344 | 1371 | } | 
|---|
| 1345 | 1372 |  | 
|---|
| 1346 | 1373 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 
|---|
| 1347 |  | -	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); | 
|---|
|  | 1374 | +	hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); | 
|---|
| 1348 | 1375 | tick_nohz_activate(ts, NOHZ_MODE_HIGHRES); | 
|---|
| 1349 | 1376 | } | 
|---|
| 1350 | 1377 | #endif /* HIGH_RES_TIMERS */ | 
|---|
| .. | .. | 
|---|
| 1411 | 1438 | tick_nohz_switch_to_nohz(); | 
|---|
| 1412 | 1439 | return 0; | 
|---|
| 1413 | 1440 | } | 
|---|
| 1414 |  | - | 
|---|
| 1415 |  | -ktime_t *get_next_event_cpu(unsigned int cpu) | 
|---|
| 1416 |  | -{ | 
|---|
| 1417 |  | -	return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event); | 
|---|
| 1418 |  | -} | 
|---|
| 1419 |  | -EXPORT_SYMBOL_GPL(get_next_event_cpu); | 
|---|