hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/time/tick-sched.c
....@@ -1,6 +1,5 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
2
- * linux/kernel/time/tick-sched.c
3
- *
43 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
54 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
65 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
....@@ -8,8 +7,6 @@
87 * No idle tick implementation for low and high resolution timers
98 *
109 * Started by: Thomas Gleixner and Ingo Molnar
11
- *
12
- * Distribute under GPLv2.
1310 */
1411 #include <linux/cpu.h>
1512 #include <linux/err.h>
....@@ -26,9 +23,9 @@
2623 #include <linux/module.h>
2724 #include <linux/irq_work.h>
2825 #include <linux/posix-timers.h>
29
-#include <linux/timer.h>
3026 #include <linux/context_tracking.h>
3127 #include <linux/mm.h>
28
+#include <trace/hooks/sched.h>
3229
3330 #include <asm/irq_regs.h>
3431
....@@ -134,15 +131,23 @@
134131 * into a long sleep. If two CPUs happen to assign themselves to
135132 * this duty, then the jiffies update is still serialized by
136133 * jiffies_lock.
134
+ *
135
+ * If nohz_full is enabled, this should not happen because the
136
+ * tick_do_timer_cpu never relinquishes.
137137 */
138
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
139
- && !tick_nohz_full_cpu(cpu))
138
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
139
+#ifdef CONFIG_NO_HZ_FULL
140
+ WARN_ON_ONCE(tick_nohz_full_running);
141
+#endif
140142 tick_do_timer_cpu = cpu;
143
+ }
141144 #endif
142145
143146 /* Check, if the jiffies need an update */
144
- if (tick_do_timer_cpu == cpu)
147
+ if (tick_do_timer_cpu == cpu) {
145148 tick_do_update_jiffies64(now);
149
+ trace_android_vh_jiffies_update(NULL);
150
+ }
146151
147152 if (ts->inidle)
148153 ts->got_idle_tick = 1;
....@@ -179,6 +184,7 @@
179184 #ifdef CONFIG_NO_HZ_FULL
180185 cpumask_var_t tick_nohz_full_mask;
181186 bool tick_nohz_full_running;
187
+EXPORT_SYMBOL_GPL(tick_nohz_full_running);
182188 static atomic_t tick_dep_mask;
183189
184190 static bool check_tick_dependency(atomic_t *dep)
....@@ -202,6 +208,11 @@
202208
203209 if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
204210 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
211
+ return true;
212
+ }
213
+
214
+ if (val & TICK_DEP_MASK_RCU) {
215
+ trace_tick_stop(0, TICK_DEP_MASK_RCU);
205216 return true;
206217 }
207218
....@@ -237,7 +248,7 @@
237248
238249 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
239250 .func = nohz_full_kick_func,
240
- .flags = IRQ_WORK_HARD_IRQ,
251
+ .flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ),
241252 };
242253
243254 /*
....@@ -332,6 +343,7 @@
332343 preempt_enable();
333344 }
334345 }
346
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
335347
336348 void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
337349 {
....@@ -339,24 +351,35 @@
339351
340352 atomic_andnot(BIT(bit), &ts->tick_dep_mask);
341353 }
354
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
342355
343356 /*
344
- * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
345
- * per task timers.
357
+ * Set a per-task tick dependency. RCU need this. Also posix CPU timers
358
+ * in order to elapse per task timers.
346359 */
347360 void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
348361 {
349
- /*
350
- * We could optimize this with just kicking the target running the task
351
- * if that noise matters for nohz full users.
352
- */
353
- tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
362
+ if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
363
+ if (tsk == current) {
364
+ preempt_disable();
365
+ tick_nohz_full_kick();
366
+ preempt_enable();
367
+ } else {
368
+ /*
369
+ * Some future tick_nohz_full_kick_task()
370
+ * should optimize this.
371
+ */
372
+ tick_nohz_full_kick_all();
373
+ }
374
+ }
354375 }
376
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
355377
356378 void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
357379 {
358380 atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
359381 }
382
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
360383
361384 /*
362385 * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
....@@ -409,8 +432,8 @@
409432 static int tick_nohz_cpu_down(unsigned int cpu)
410433 {
411434 /*
412
- * The boot CPU handles housekeeping duty (unbound timers,
413
- * workqueues, timekeeping, ...) on behalf of full dynticks
435
+ * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
436
+ * timers, workqueues, timekeeping, ...) on behalf of full dynticks
414437 * CPUs. It must remain online when nohz full is enabled.
415438 */
416439 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
....@@ -437,12 +460,15 @@
437460 return;
438461 }
439462
440
- cpu = smp_processor_id();
463
+ if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
464
+ !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
465
+ cpu = smp_processor_id();
441466
442
- if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
443
- pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
444
- cpu);
445
- cpumask_clear_cpu(cpu, tick_nohz_full_mask);
467
+ if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
468
+ pr_warn("NO_HZ: Clearing %d from nohz_full range "
469
+ "for timekeeping\n", cpu);
470
+ cpumask_clear_cpu(cpu, tick_nohz_full_mask);
471
+ }
446472 }
447473
448474 for_each_cpu(cpu, tick_nohz_full_mask)
....@@ -639,10 +665,12 @@
639665 /* Forward the time to expire in the future */
640666 hrtimer_forward(&ts->sched_timer, now, tick_period);
641667
642
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
643
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
644
- else
668
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
669
+ hrtimer_start_expires(&ts->sched_timer,
670
+ HRTIMER_MODE_ABS_PINNED_HARD);
671
+ } else {
645672 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
673
+ }
646674
647675 /*
648676 * Reset to make sure next tick stop doesn't get fooled by past
....@@ -659,7 +687,8 @@
659687 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
660688 {
661689 u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
662
- unsigned long seq, basejiff;
690
+ unsigned long basejiff;
691
+ unsigned int seq;
663692
664693 /* Read jiffies and the time when jiffies were updated last */
665694 do {
....@@ -786,7 +815,6 @@
786815 */
787816 if (!ts->tick_stopped) {
788817 calc_load_nohz_start();
789
- cpu_load_update_nohz_start();
790818 quiet_vmstat();
791819
792820 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
....@@ -807,7 +835,8 @@
807835 }
808836
809837 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
810
- hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
838
+ hrtimer_start(&ts->sched_timer, tick,
839
+ HRTIMER_MODE_ABS_PINNED_HARD);
811840 } else {
812841 hrtimer_set_expires(&ts->sched_timer, tick);
813842 tick_program_event(tick, 1);
....@@ -833,7 +862,6 @@
833862 {
834863 /* Update jiffies first */
835864 tick_do_update_jiffies64(now);
836
- cpu_load_update_nohz_stop();
837865
838866 /*
839867 * Clear the timer idle flag, so we avoid IPIs on remote queueing and
....@@ -896,8 +924,15 @@
896924 if (need_resched())
897925 return false;
898926
899
- if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
900
- softirq_check_pending_idle();
927
+ if (unlikely(local_softirq_pending())) {
928
+ static int ratelimit;
929
+
930
+ if (ratelimit < 10 && !local_bh_blocked() &&
931
+ (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
932
+ pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
933
+ (unsigned int) local_softirq_pending());
934
+ ratelimit++;
935
+ }
901936 return false;
902937 }
903938
....@@ -908,11 +943,9 @@
908943 */
909944 if (tick_do_timer_cpu == cpu)
910945 return false;
911
- /*
912
- * Boot safety: make sure the timekeeping duty has been
913
- * assigned before entering dyntick-idle mode,
914
- */
915
- if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
946
+
947
+ /* Should not happen for nohz-full */
948
+ if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
916949 return false;
917950 }
918951
....@@ -1030,6 +1063,18 @@
10301063 }
10311064
10321065 /**
1066
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
1067
+ * or the tick, whatever that expires first. Note that, if the tick has been
1068
+ * stopped, it returns the next hrtimer.
1069
+ *
1070
+ * Called from power state control code with interrupts disabled
1071
+ */
1072
+ktime_t tick_nohz_get_next_hrtimer(void)
1073
+{
1074
+ return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
1075
+}
1076
+
1077
+/**
10331078 * tick_nohz_get_sleep_length - return the expected length of the current sleep
10341079 * @delta_next: duration until the next event if the tick cannot be stopped
10351080 *
....@@ -1081,6 +1126,7 @@
10811126
10821127 return ts->idle_calls;
10831128 }
1129
+EXPORT_SYMBOL_GPL(tick_nohz_get_idle_calls_cpu);
10841130
10851131 /**
10861132 * tick_nohz_get_idle_calls - return the current idle calls counter value
....@@ -1099,7 +1145,7 @@
10991145 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
11001146 unsigned long ticks;
11011147
1102
- if (vtime_accounting_cpu_enabled())
1148
+ if (vtime_accounting_enabled_this_cpu())
11031149 return;
11041150 /*
11051151 * We stopped the tick in idle. Update process times would miss the
....@@ -1213,7 +1259,7 @@
12131259 * Recycle the hrtimer in ts, so we can share the
12141260 * hrtimer_forward with the highres code.
12151261 */
1216
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1262
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
12171263 /* Get the next period */
12181264 next = tick_init_jiffy_update();
12191265
....@@ -1258,18 +1304,6 @@
12581304 * High resolution timer specific code
12591305 */
12601306 #ifdef CONFIG_HIGH_RES_TIMERS
1261
-
1262
-static void (*wake_callback)(void);
1263
-
1264
-void register_tick_sched_wakeup_callback(void (*cb)(void))
1265
-{
1266
- if (!wake_callback)
1267
- wake_callback = cb;
1268
- else
1269
- pr_warn("tick-sched wake cb already exists; skipping.\n");
1270
-}
1271
-EXPORT_SYMBOL_GPL(register_tick_sched_wakeup_callback);
1272
-
12731307 /*
12741308 * We rearm the timer until we get disabled by the idle code.
12751309 * Called with interrupts disabled.
....@@ -1287,15 +1321,8 @@
12871321 * Do not call, when we are not in irq context and have
12881322 * no valid regs pointer
12891323 */
1290
- if (regs) {
1324
+ if (regs)
12911325 tick_sched_handle(ts, regs);
1292
- if (wake_callback && tick_do_timer_cpu == smp_processor_id()) {
1293
- /*
1294
- * wakeup user if needed
1295
- */
1296
- wake_callback();
1297
- }
1298
- }
12991326 else
13001327 ts->next_tick = 0;
13011328
....@@ -1344,7 +1371,7 @@
13441371 }
13451372
13461373 hrtimer_forward(&ts->sched_timer, now, tick_period);
1347
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
1374
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
13481375 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
13491376 }
13501377 #endif /* HIGH_RES_TIMERS */
....@@ -1411,9 +1438,3 @@
14111438 tick_nohz_switch_to_nohz();
14121439 return 0;
14131440 }
1414
-
1415
-ktime_t *get_next_event_cpu(unsigned int cpu)
1416
-{
1417
- return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event);
1418
-}
1419
-EXPORT_SYMBOL_GPL(get_next_event_cpu);