.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | | - * linux/kernel/time/tick-sched.c |
---|
3 | | - * |
---|
4 | 3 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
---|
5 | 4 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
---|
6 | 5 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner |
---|
.. | .. |
---|
8 | 7 | * No idle tick implementation for low and high resolution timers |
---|
9 | 8 | * |
---|
10 | 9 | * Started by: Thomas Gleixner and Ingo Molnar |
---|
11 | | - * |
---|
12 | | - * Distribute under GPLv2. |
---|
13 | 10 | */ |
---|
14 | 11 | #include <linux/cpu.h> |
---|
15 | 12 | #include <linux/err.h> |
---|
.. | .. |
---|
26 | 23 | #include <linux/module.h> |
---|
27 | 24 | #include <linux/irq_work.h> |
---|
28 | 25 | #include <linux/posix-timers.h> |
---|
29 | | -#include <linux/timer.h> |
---|
30 | 26 | #include <linux/context_tracking.h> |
---|
31 | 27 | #include <linux/mm.h> |
---|
| 28 | +#include <trace/hooks/sched.h> |
---|
32 | 29 | |
---|
33 | 30 | #include <asm/irq_regs.h> |
---|
34 | 31 | |
---|
.. | .. |
---|
69 | 66 | return; |
---|
70 | 67 | |
---|
71 | 68 | /* Reevaluate with jiffies_lock held */ |
---|
72 | | - write_seqlock(&jiffies_lock); |
---|
| 69 | + raw_spin_lock(&jiffies_lock); |
---|
| 70 | + write_seqcount_begin(&jiffies_seq); |
---|
73 | 71 | |
---|
74 | 72 | delta = ktime_sub(now, last_jiffies_update); |
---|
75 | 73 | if (delta >= tick_period) { |
---|
.. | .. |
---|
95 | 93 | /* Keep the tick_next_period variable up to date */ |
---|
96 | 94 | tick_next_period = ktime_add(last_jiffies_update, tick_period); |
---|
97 | 95 | } else { |
---|
98 | | - write_sequnlock(&jiffies_lock); |
---|
| 96 | + write_seqcount_end(&jiffies_seq); |
---|
| 97 | + raw_spin_unlock(&jiffies_lock); |
---|
99 | 98 | return; |
---|
100 | 99 | } |
---|
101 | | - write_sequnlock(&jiffies_lock); |
---|
| 100 | + write_seqcount_end(&jiffies_seq); |
---|
| 101 | + raw_spin_unlock(&jiffies_lock); |
---|
102 | 102 | update_wall_time(); |
---|
103 | 103 | } |
---|
104 | 104 | |
---|
.. | .. |
---|
109 | 109 | { |
---|
110 | 110 | ktime_t period; |
---|
111 | 111 | |
---|
112 | | - write_seqlock(&jiffies_lock); |
---|
| 112 | + raw_spin_lock(&jiffies_lock); |
---|
| 113 | + write_seqcount_begin(&jiffies_seq); |
---|
113 | 114 | /* Did we start the jiffies update yet ? */ |
---|
114 | 115 | if (last_jiffies_update == 0) |
---|
115 | 116 | last_jiffies_update = tick_next_period; |
---|
116 | 117 | period = last_jiffies_update; |
---|
117 | | - write_sequnlock(&jiffies_lock); |
---|
| 118 | + write_seqcount_end(&jiffies_seq); |
---|
| 119 | + raw_spin_unlock(&jiffies_lock); |
---|
118 | 120 | return period; |
---|
119 | 121 | } |
---|
120 | 122 | |
---|
.. | .. |
---|
129 | 131 | * into a long sleep. If two CPUs happen to assign themselves to |
---|
130 | 132 | * this duty, then the jiffies update is still serialized by |
---|
131 | 133 | * jiffies_lock. |
---|
| 134 | + * |
---|
| 135 | + * If nohz_full is enabled, this should not happen because the |
---|
| 136 | + * tick_do_timer_cpu never relinquishes. |
---|
132 | 137 | */ |
---|
133 | | - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) |
---|
134 | | - && !tick_nohz_full_cpu(cpu)) |
---|
| 138 | + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { |
---|
| 139 | +#ifdef CONFIG_NO_HZ_FULL |
---|
| 140 | + WARN_ON_ONCE(tick_nohz_full_running); |
---|
| 141 | +#endif |
---|
135 | 142 | tick_do_timer_cpu = cpu; |
---|
| 143 | + } |
---|
136 | 144 | #endif |
---|
137 | 145 | |
---|
138 | 146 | /* Check, if the jiffies need an update */ |
---|
139 | | - if (tick_do_timer_cpu == cpu) |
---|
| 147 | + if (tick_do_timer_cpu == cpu) { |
---|
140 | 148 | tick_do_update_jiffies64(now); |
---|
| 149 | + trace_android_vh_jiffies_update(NULL); |
---|
| 150 | + } |
---|
141 | 151 | |
---|
142 | 152 | if (ts->inidle) |
---|
143 | 153 | ts->got_idle_tick = 1; |
---|
.. | .. |
---|
174 | 184 | #ifdef CONFIG_NO_HZ_FULL |
---|
175 | 185 | cpumask_var_t tick_nohz_full_mask; |
---|
176 | 186 | bool tick_nohz_full_running; |
---|
| 187 | +EXPORT_SYMBOL_GPL(tick_nohz_full_running); |
---|
177 | 188 | static atomic_t tick_dep_mask; |
---|
178 | 189 | |
---|
179 | 190 | static bool check_tick_dependency(atomic_t *dep) |
---|
.. | .. |
---|
197 | 208 | |
---|
198 | 209 | if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) { |
---|
199 | 210 | trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE); |
---|
| 211 | + return true; |
---|
| 212 | + } |
---|
| 213 | + |
---|
| 214 | + if (val & TICK_DEP_MASK_RCU) { |
---|
| 215 | + trace_tick_stop(0, TICK_DEP_MASK_RCU); |
---|
200 | 216 | return true; |
---|
201 | 217 | } |
---|
202 | 218 | |
---|
.. | .. |
---|
232 | 248 | |
---|
233 | 249 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { |
---|
234 | 250 | .func = nohz_full_kick_func, |
---|
| 251 | + .flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ), |
---|
235 | 252 | }; |
---|
236 | 253 | |
---|
237 | 254 | /* |
---|
.. | .. |
---|
326 | 343 | preempt_enable(); |
---|
327 | 344 | } |
---|
328 | 345 | } |
---|
| 346 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu); |
---|
329 | 347 | |
---|
330 | 348 | void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) |
---|
331 | 349 | { |
---|
.. | .. |
---|
333 | 351 | |
---|
334 | 352 | atomic_andnot(BIT(bit), &ts->tick_dep_mask); |
---|
335 | 353 | } |
---|
| 354 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); |
---|
336 | 355 | |
---|
337 | 356 | /* |
---|
338 | | - * Set a per-task tick dependency. Posix CPU timers need this in order to elapse |
---|
339 | | - * per task timers. |
---|
| 357 | + * Set a per-task tick dependency. RCU need this. Also posix CPU timers |
---|
| 358 | + * in order to elapse per task timers. |
---|
340 | 359 | */ |
---|
341 | 360 | void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) |
---|
342 | 361 | { |
---|
343 | | - /* |
---|
344 | | - * We could optimize this with just kicking the target running the task |
---|
345 | | - * if that noise matters for nohz full users. |
---|
346 | | - */ |
---|
347 | | - tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit); |
---|
| 362 | + if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) { |
---|
| 363 | + if (tsk == current) { |
---|
| 364 | + preempt_disable(); |
---|
| 365 | + tick_nohz_full_kick(); |
---|
| 366 | + preempt_enable(); |
---|
| 367 | + } else { |
---|
| 368 | + /* |
---|
| 369 | + * Some future tick_nohz_full_kick_task() |
---|
| 370 | + * should optimize this. |
---|
| 371 | + */ |
---|
| 372 | + tick_nohz_full_kick_all(); |
---|
| 373 | + } |
---|
| 374 | + } |
---|
348 | 375 | } |
---|
| 376 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task); |
---|
349 | 377 | |
---|
350 | 378 | void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) |
---|
351 | 379 | { |
---|
352 | 380 | atomic_andnot(BIT(bit), &tsk->tick_dep_mask); |
---|
353 | 381 | } |
---|
| 382 | +EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task); |
---|
354 | 383 | |
---|
355 | 384 | /* |
---|
356 | 385 | * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse |
---|
.. | .. |
---|
403 | 432 | static int tick_nohz_cpu_down(unsigned int cpu) |
---|
404 | 433 | { |
---|
405 | 434 | /* |
---|
406 | | - * The boot CPU handles housekeeping duty (unbound timers, |
---|
407 | | - * workqueues, timekeeping, ...) on behalf of full dynticks |
---|
| 435 | + * The tick_do_timer_cpu CPU handles housekeeping duty (unbound |
---|
| 436 | + * timers, workqueues, timekeeping, ...) on behalf of full dynticks |
---|
408 | 437 | * CPUs. It must remain online when nohz full is enabled. |
---|
409 | 438 | */ |
---|
410 | 439 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) |
---|
.. | .. |
---|
431 | 460 | return; |
---|
432 | 461 | } |
---|
433 | 462 | |
---|
434 | | - cpu = smp_processor_id(); |
---|
| 463 | + if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) && |
---|
| 464 | + !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) { |
---|
| 465 | + cpu = smp_processor_id(); |
---|
435 | 466 | |
---|
436 | | - if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { |
---|
437 | | - pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", |
---|
438 | | - cpu); |
---|
439 | | - cpumask_clear_cpu(cpu, tick_nohz_full_mask); |
---|
| 467 | + if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { |
---|
| 468 | + pr_warn("NO_HZ: Clearing %d from nohz_full range " |
---|
| 469 | + "for timekeeping\n", cpu); |
---|
| 470 | + cpumask_clear_cpu(cpu, tick_nohz_full_mask); |
---|
| 471 | + } |
---|
440 | 472 | } |
---|
441 | 473 | |
---|
442 | 474 | for_each_cpu(cpu, tick_nohz_full_mask) |
---|
.. | .. |
---|
633 | 665 | /* Forward the time to expire in the future */ |
---|
634 | 666 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
---|
635 | 667 | |
---|
636 | | - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
---|
637 | | - hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); |
---|
638 | | - else |
---|
| 668 | + if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
---|
| 669 | + hrtimer_start_expires(&ts->sched_timer, |
---|
| 670 | + HRTIMER_MODE_ABS_PINNED_HARD); |
---|
| 671 | + } else { |
---|
639 | 672 | tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); |
---|
| 673 | + } |
---|
640 | 674 | |
---|
641 | 675 | /* |
---|
642 | 676 | * Reset to make sure next tick stop doesn't get fooled by past |
---|
.. | .. |
---|
653 | 687 | static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) |
---|
654 | 688 | { |
---|
655 | 689 | u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; |
---|
656 | | - unsigned long seq, basejiff; |
---|
| 690 | + unsigned long basejiff; |
---|
| 691 | + unsigned int seq; |
---|
657 | 692 | |
---|
658 | 693 | /* Read jiffies and the time when jiffies were updated last */ |
---|
659 | 694 | do { |
---|
660 | | - seq = read_seqbegin(&jiffies_lock); |
---|
| 695 | + seq = read_seqcount_begin(&jiffies_seq); |
---|
661 | 696 | basemono = last_jiffies_update; |
---|
662 | 697 | basejiff = jiffies; |
---|
663 | | - } while (read_seqretry(&jiffies_lock, seq)); |
---|
| 698 | + } while (read_seqcount_retry(&jiffies_seq, seq)); |
---|
664 | 699 | ts->last_jiffies = basejiff; |
---|
665 | 700 | ts->timer_expires_base = basemono; |
---|
666 | 701 | |
---|
.. | .. |
---|
780 | 815 | */ |
---|
781 | 816 | if (!ts->tick_stopped) { |
---|
782 | 817 | calc_load_nohz_start(); |
---|
783 | | - cpu_load_update_nohz_start(); |
---|
784 | 818 | quiet_vmstat(); |
---|
785 | 819 | |
---|
786 | 820 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
---|
.. | .. |
---|
801 | 835 | } |
---|
802 | 836 | |
---|
803 | 837 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
---|
804 | | - hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); |
---|
| 838 | + hrtimer_start(&ts->sched_timer, tick, |
---|
| 839 | + HRTIMER_MODE_ABS_PINNED_HARD); |
---|
805 | 840 | } else { |
---|
806 | 841 | hrtimer_set_expires(&ts->sched_timer, tick); |
---|
807 | 842 | tick_program_event(tick, 1); |
---|
.. | .. |
---|
827 | 862 | { |
---|
828 | 863 | /* Update jiffies first */ |
---|
829 | 864 | tick_do_update_jiffies64(now); |
---|
830 | | - cpu_load_update_nohz_stop(); |
---|
831 | 865 | |
---|
832 | 866 | /* |
---|
833 | 867 | * Clear the timer idle flag, so we avoid IPIs on remote queueing and |
---|
.. | .. |
---|
890 | 924 | if (need_resched()) |
---|
891 | 925 | return false; |
---|
892 | 926 | |
---|
893 | | - if (unlikely(local_softirq_pending() && cpu_online(cpu))) { |
---|
| 927 | + if (unlikely(local_softirq_pending())) { |
---|
894 | 928 | static int ratelimit; |
---|
895 | 929 | |
---|
896 | | - if (ratelimit < 10 && |
---|
| 930 | + if (ratelimit < 10 && !local_bh_blocked() && |
---|
897 | 931 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { |
---|
898 | | - pr_warn("NOHZ: local_softirq_pending %02x\n", |
---|
| 932 | + pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", |
---|
899 | 933 | (unsigned int) local_softirq_pending()); |
---|
900 | 934 | ratelimit++; |
---|
901 | 935 | } |
---|
.. | .. |
---|
909 | 943 | */ |
---|
910 | 944 | if (tick_do_timer_cpu == cpu) |
---|
911 | 945 | return false; |
---|
912 | | - /* |
---|
913 | | - * Boot safety: make sure the timekeeping duty has been |
---|
914 | | - * assigned before entering dyntick-idle mode, |
---|
915 | | - */ |
---|
916 | | - if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) |
---|
| 946 | + |
---|
| 947 | + /* Should not happen for nohz-full */ |
---|
| 948 | + if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) |
---|
917 | 949 | return false; |
---|
918 | 950 | } |
---|
919 | 951 | |
---|
.. | .. |
---|
1031 | 1063 | } |
---|
1032 | 1064 | |
---|
1033 | 1065 | /** |
---|
| 1066 | + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer |
---|
| 1067 | + * or the tick, whatever that expires first. Note that, if the tick has been |
---|
| 1068 | + * stopped, it returns the next hrtimer. |
---|
| 1069 | + * |
---|
| 1070 | + * Called from power state control code with interrupts disabled |
---|
| 1071 | + */ |
---|
| 1072 | +ktime_t tick_nohz_get_next_hrtimer(void) |
---|
| 1073 | +{ |
---|
| 1074 | + return __this_cpu_read(tick_cpu_device.evtdev)->next_event; |
---|
| 1075 | +} |
---|
| 1076 | + |
---|
| 1077 | +/** |
---|
1034 | 1078 | * tick_nohz_get_sleep_length - return the expected length of the current sleep |
---|
1035 | 1079 | * @delta_next: duration until the next event if the tick cannot be stopped |
---|
1036 | 1080 | * |
---|
.. | .. |
---|
1082 | 1126 | |
---|
1083 | 1127 | return ts->idle_calls; |
---|
1084 | 1128 | } |
---|
| 1129 | +EXPORT_SYMBOL_GPL(tick_nohz_get_idle_calls_cpu); |
---|
1085 | 1130 | |
---|
1086 | 1131 | /** |
---|
1087 | 1132 | * tick_nohz_get_idle_calls - return the current idle calls counter value |
---|
.. | .. |
---|
1100 | 1145 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
---|
1101 | 1146 | unsigned long ticks; |
---|
1102 | 1147 | |
---|
1103 | | - if (vtime_accounting_cpu_enabled()) |
---|
| 1148 | + if (vtime_accounting_enabled_this_cpu()) |
---|
1104 | 1149 | return; |
---|
1105 | 1150 | /* |
---|
1106 | 1151 | * We stopped the tick in idle. Update process times would miss the |
---|
.. | .. |
---|
1214 | 1259 | * Recycle the hrtimer in ts, so we can share the |
---|
1215 | 1260 | * hrtimer_forward with the highres code. |
---|
1216 | 1261 | */ |
---|
1217 | | - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
---|
| 1262 | + hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); |
---|
1218 | 1263 | /* Get the next period */ |
---|
1219 | 1264 | next = tick_init_jiffy_update(); |
---|
1220 | 1265 | |
---|
.. | .. |
---|
1259 | 1304 | * High resolution timer specific code |
---|
1260 | 1305 | */ |
---|
1261 | 1306 | #ifdef CONFIG_HIGH_RES_TIMERS |
---|
1262 | | - |
---|
1263 | | -static void (*wake_callback)(void); |
---|
1264 | | - |
---|
1265 | | -void register_tick_sched_wakeup_callback(void (*cb)(void)) |
---|
1266 | | -{ |
---|
1267 | | - if (!wake_callback) |
---|
1268 | | - wake_callback = cb; |
---|
1269 | | - else |
---|
1270 | | - pr_warn("tick-sched wake cb already exists; skipping.\n"); |
---|
1271 | | -} |
---|
1272 | | -EXPORT_SYMBOL_GPL(register_tick_sched_wakeup_callback); |
---|
1273 | | - |
---|
1274 | 1307 | /* |
---|
1275 | 1308 | * We rearm the timer until we get disabled by the idle code. |
---|
1276 | 1309 | * Called with interrupts disabled. |
---|
.. | .. |
---|
1288 | 1321 | * Do not call, when we are not in irq context and have |
---|
1289 | 1322 | * no valid regs pointer |
---|
1290 | 1323 | */ |
---|
1291 | | - if (regs) { |
---|
| 1324 | + if (regs) |
---|
1292 | 1325 | tick_sched_handle(ts, regs); |
---|
1293 | | - if (wake_callback && tick_do_timer_cpu == smp_processor_id()) { |
---|
1294 | | - /* |
---|
1295 | | - * wakeup user if needed |
---|
1296 | | - */ |
---|
1297 | | - wake_callback(); |
---|
1298 | | - } |
---|
1299 | | - } |
---|
1300 | 1326 | else |
---|
1301 | 1327 | ts->next_tick = 0; |
---|
1302 | 1328 | |
---|
.. | .. |
---|
1330 | 1356 | /* |
---|
1331 | 1357 | * Emulate tick processing via per-CPU hrtimers: |
---|
1332 | 1358 | */ |
---|
1333 | | - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
---|
| 1359 | + hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); |
---|
1334 | 1360 | ts->sched_timer.function = tick_sched_timer; |
---|
1335 | 1361 | |
---|
1336 | 1362 | /* Get the next period (per-CPU) */ |
---|
.. | .. |
---|
1345 | 1371 | } |
---|
1346 | 1372 | |
---|
1347 | 1373 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
---|
1348 | | - hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); |
---|
| 1374 | + hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD); |
---|
1349 | 1375 | tick_nohz_activate(ts, NOHZ_MODE_HIGHRES); |
---|
1350 | 1376 | } |
---|
1351 | 1377 | #endif /* HIGH_RES_TIMERS */ |
---|
.. | .. |
---|
1412 | 1438 | tick_nohz_switch_to_nohz(); |
---|
1413 | 1439 | return 0; |
---|
1414 | 1440 | } |
---|
1415 | | - |
---|
1416 | | -ktime_t *get_next_event_cpu(unsigned int cpu) |
---|
1417 | | -{ |
---|
1418 | | - return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event); |
---|
1419 | | -} |
---|
1420 | | -EXPORT_SYMBOL_GPL(get_next_event_cpu); |
---|