hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/time/tick-sched.c
....@@ -1,6 +1,5 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
2
- * linux/kernel/time/tick-sched.c
3
- *
43 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
54 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
65 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
....@@ -8,8 +7,6 @@
87 * No idle tick implementation for low and high resolution timers
98 *
109 * Started by: Thomas Gleixner and Ingo Molnar
11
- *
12
- * Distribute under GPLv2.
1310 */
1411 #include <linux/cpu.h>
1512 #include <linux/err.h>
....@@ -26,9 +23,9 @@
2623 #include <linux/module.h>
2724 #include <linux/irq_work.h>
2825 #include <linux/posix-timers.h>
29
-#include <linux/timer.h>
3026 #include <linux/context_tracking.h>
3127 #include <linux/mm.h>
28
+#include <trace/hooks/sched.h>
3229
3330 #include <asm/irq_regs.h>
3431
....@@ -69,7 +66,8 @@
6966 return;
7067
7168 /* Reevaluate with jiffies_lock held */
72
- write_seqlock(&jiffies_lock);
69
+ raw_spin_lock(&jiffies_lock);
70
+ write_seqcount_begin(&jiffies_seq);
7371
7472 delta = ktime_sub(now, last_jiffies_update);
7573 if (delta >= tick_period) {
....@@ -95,10 +93,12 @@
9593 /* Keep the tick_next_period variable up to date */
9694 tick_next_period = ktime_add(last_jiffies_update, tick_period);
9795 } else {
98
- write_sequnlock(&jiffies_lock);
96
+ write_seqcount_end(&jiffies_seq);
97
+ raw_spin_unlock(&jiffies_lock);
9998 return;
10099 }
101
- write_sequnlock(&jiffies_lock);
100
+ write_seqcount_end(&jiffies_seq);
101
+ raw_spin_unlock(&jiffies_lock);
102102 update_wall_time();
103103 }
104104
....@@ -109,12 +109,14 @@
109109 {
110110 ktime_t period;
111111
112
- write_seqlock(&jiffies_lock);
112
+ raw_spin_lock(&jiffies_lock);
113
+ write_seqcount_begin(&jiffies_seq);
113114 /* Did we start the jiffies update yet ? */
114115 if (last_jiffies_update == 0)
115116 last_jiffies_update = tick_next_period;
116117 period = last_jiffies_update;
117
- write_sequnlock(&jiffies_lock);
118
+ write_seqcount_end(&jiffies_seq);
119
+ raw_spin_unlock(&jiffies_lock);
118120 return period;
119121 }
120122
....@@ -129,15 +131,23 @@
129131 * into a long sleep. If two CPUs happen to assign themselves to
130132 * this duty, then the jiffies update is still serialized by
131133 * jiffies_lock.
134
+ *
135
+ * If nohz_full is enabled, this should not happen because the
136
+ * tick_do_timer_cpu never relinquishes.
132137 */
133
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
134
- && !tick_nohz_full_cpu(cpu))
138
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
139
+#ifdef CONFIG_NO_HZ_FULL
140
+ WARN_ON_ONCE(tick_nohz_full_running);
141
+#endif
135142 tick_do_timer_cpu = cpu;
143
+ }
136144 #endif
137145
138146 /* Check, if the jiffies need an update */
139
- if (tick_do_timer_cpu == cpu)
147
+ if (tick_do_timer_cpu == cpu) {
140148 tick_do_update_jiffies64(now);
149
+ trace_android_vh_jiffies_update(NULL);
150
+ }
141151
142152 if (ts->inidle)
143153 ts->got_idle_tick = 1;
....@@ -174,6 +184,7 @@
174184 #ifdef CONFIG_NO_HZ_FULL
175185 cpumask_var_t tick_nohz_full_mask;
176186 bool tick_nohz_full_running;
187
+EXPORT_SYMBOL_GPL(tick_nohz_full_running);
177188 static atomic_t tick_dep_mask;
178189
179190 static bool check_tick_dependency(atomic_t *dep)
....@@ -197,6 +208,11 @@
197208
198209 if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
199210 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
211
+ return true;
212
+ }
213
+
214
+ if (val & TICK_DEP_MASK_RCU) {
215
+ trace_tick_stop(0, TICK_DEP_MASK_RCU);
200216 return true;
201217 }
202218
....@@ -232,6 +248,7 @@
232248
233249 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
234250 .func = nohz_full_kick_func,
251
+ .flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ),
235252 };
236253
237254 /*
....@@ -326,6 +343,7 @@
326343 preempt_enable();
327344 }
328345 }
346
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
329347
330348 void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
331349 {
....@@ -333,24 +351,35 @@
333351
334352 atomic_andnot(BIT(bit), &ts->tick_dep_mask);
335353 }
354
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
336355
337356 /*
338
- * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
339
- * per task timers.
357
+ * Set a per-task tick dependency. RCU need this. Also posix CPU timers
358
+ * in order to elapse per task timers.
340359 */
341360 void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
342361 {
343
- /*
344
- * We could optimize this with just kicking the target running the task
345
- * if that noise matters for nohz full users.
346
- */
347
- tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
362
+ if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
363
+ if (tsk == current) {
364
+ preempt_disable();
365
+ tick_nohz_full_kick();
366
+ preempt_enable();
367
+ } else {
368
+ /*
369
+ * Some future tick_nohz_full_kick_task()
370
+ * should optimize this.
371
+ */
372
+ tick_nohz_full_kick_all();
373
+ }
374
+ }
348375 }
376
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
349377
350378 void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
351379 {
352380 atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
353381 }
382
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
354383
355384 /*
356385 * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
....@@ -403,8 +432,8 @@
403432 static int tick_nohz_cpu_down(unsigned int cpu)
404433 {
405434 /*
406
- * The boot CPU handles housekeeping duty (unbound timers,
407
- * workqueues, timekeeping, ...) on behalf of full dynticks
435
+ * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
436
+ * timers, workqueues, timekeeping, ...) on behalf of full dynticks
408437 * CPUs. It must remain online when nohz full is enabled.
409438 */
410439 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
....@@ -431,12 +460,15 @@
431460 return;
432461 }
433462
434
- cpu = smp_processor_id();
463
+ if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
464
+ !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
465
+ cpu = smp_processor_id();
435466
436
- if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
437
- pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
438
- cpu);
439
- cpumask_clear_cpu(cpu, tick_nohz_full_mask);
467
+ if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
468
+ pr_warn("NO_HZ: Clearing %d from nohz_full range "
469
+ "for timekeeping\n", cpu);
470
+ cpumask_clear_cpu(cpu, tick_nohz_full_mask);
471
+ }
440472 }
441473
442474 for_each_cpu(cpu, tick_nohz_full_mask)
....@@ -633,10 +665,12 @@
633665 /* Forward the time to expire in the future */
634666 hrtimer_forward(&ts->sched_timer, now, tick_period);
635667
636
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
637
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
638
- else
668
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
669
+ hrtimer_start_expires(&ts->sched_timer,
670
+ HRTIMER_MODE_ABS_PINNED_HARD);
671
+ } else {
639672 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
673
+ }
640674
641675 /*
642676 * Reset to make sure next tick stop doesn't get fooled by past
....@@ -653,14 +687,15 @@
653687 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
654688 {
655689 u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
656
- unsigned long seq, basejiff;
690
+ unsigned long basejiff;
691
+ unsigned int seq;
657692
658693 /* Read jiffies and the time when jiffies were updated last */
659694 do {
660
- seq = read_seqbegin(&jiffies_lock);
695
+ seq = read_seqcount_begin(&jiffies_seq);
661696 basemono = last_jiffies_update;
662697 basejiff = jiffies;
663
- } while (read_seqretry(&jiffies_lock, seq));
698
+ } while (read_seqcount_retry(&jiffies_seq, seq));
664699 ts->last_jiffies = basejiff;
665700 ts->timer_expires_base = basemono;
666701
....@@ -780,7 +815,6 @@
780815 */
781816 if (!ts->tick_stopped) {
782817 calc_load_nohz_start();
783
- cpu_load_update_nohz_start();
784818 quiet_vmstat();
785819
786820 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
....@@ -801,7 +835,8 @@
801835 }
802836
803837 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
804
- hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
838
+ hrtimer_start(&ts->sched_timer, tick,
839
+ HRTIMER_MODE_ABS_PINNED_HARD);
805840 } else {
806841 hrtimer_set_expires(&ts->sched_timer, tick);
807842 tick_program_event(tick, 1);
....@@ -827,7 +862,6 @@
827862 {
828863 /* Update jiffies first */
829864 tick_do_update_jiffies64(now);
830
- cpu_load_update_nohz_stop();
831865
832866 /*
833867 * Clear the timer idle flag, so we avoid IPIs on remote queueing and
....@@ -890,12 +924,12 @@
890924 if (need_resched())
891925 return false;
892926
893
- if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
927
+ if (unlikely(local_softirq_pending())) {
894928 static int ratelimit;
895929
896
- if (ratelimit < 10 &&
930
+ if (ratelimit < 10 && !local_bh_blocked() &&
897931 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
898
- pr_warn("NOHZ: local_softirq_pending %02x\n",
932
+ pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
899933 (unsigned int) local_softirq_pending());
900934 ratelimit++;
901935 }
....@@ -909,11 +943,9 @@
909943 */
910944 if (tick_do_timer_cpu == cpu)
911945 return false;
912
- /*
913
- * Boot safety: make sure the timekeeping duty has been
914
- * assigned before entering dyntick-idle mode,
915
- */
916
- if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
946
+
947
+ /* Should not happen for nohz-full */
948
+ if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
917949 return false;
918950 }
919951
....@@ -1031,6 +1063,18 @@
10311063 }
10321064
10331065 /**
1066
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
1067
+ * or the tick, whatever that expires first. Note that, if the tick has been
1068
+ * stopped, it returns the next hrtimer.
1069
+ *
1070
+ * Called from power state control code with interrupts disabled
1071
+ */
1072
+ktime_t tick_nohz_get_next_hrtimer(void)
1073
+{
1074
+ return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
1075
+}
1076
+
1077
+/**
10341078 * tick_nohz_get_sleep_length - return the expected length of the current sleep
10351079 * @delta_next: duration until the next event if the tick cannot be stopped
10361080 *
....@@ -1082,6 +1126,7 @@
10821126
10831127 return ts->idle_calls;
10841128 }
1129
+EXPORT_SYMBOL_GPL(tick_nohz_get_idle_calls_cpu);
10851130
10861131 /**
10871132 * tick_nohz_get_idle_calls - return the current idle calls counter value
....@@ -1100,7 +1145,7 @@
11001145 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
11011146 unsigned long ticks;
11021147
1103
- if (vtime_accounting_cpu_enabled())
1148
+ if (vtime_accounting_enabled_this_cpu())
11041149 return;
11051150 /*
11061151 * We stopped the tick in idle. Update process times would miss the
....@@ -1214,7 +1259,7 @@
12141259 * Recycle the hrtimer in ts, so we can share the
12151260 * hrtimer_forward with the highres code.
12161261 */
1217
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1262
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
12181263 /* Get the next period */
12191264 next = tick_init_jiffy_update();
12201265
....@@ -1259,18 +1304,6 @@
12591304 * High resolution timer specific code
12601305 */
12611306 #ifdef CONFIG_HIGH_RES_TIMERS
1262
-
1263
-static void (*wake_callback)(void);
1264
-
1265
-void register_tick_sched_wakeup_callback(void (*cb)(void))
1266
-{
1267
- if (!wake_callback)
1268
- wake_callback = cb;
1269
- else
1270
- pr_warn("tick-sched wake cb already exists; skipping.\n");
1271
-}
1272
-EXPORT_SYMBOL_GPL(register_tick_sched_wakeup_callback);
1273
-
12741307 /*
12751308 * We rearm the timer until we get disabled by the idle code.
12761309 * Called with interrupts disabled.
....@@ -1288,15 +1321,8 @@
12881321 * Do not call, when we are not in irq context and have
12891322 * no valid regs pointer
12901323 */
1291
- if (regs) {
1324
+ if (regs)
12921325 tick_sched_handle(ts, regs);
1293
- if (wake_callback && tick_do_timer_cpu == smp_processor_id()) {
1294
- /*
1295
- * wakeup user if needed
1296
- */
1297
- wake_callback();
1298
- }
1299
- }
13001326 else
13011327 ts->next_tick = 0;
13021328
....@@ -1330,7 +1356,7 @@
13301356 /*
13311357 * Emulate tick processing via per-CPU hrtimers:
13321358 */
1333
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1359
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
13341360 ts->sched_timer.function = tick_sched_timer;
13351361
13361362 /* Get the next period (per-CPU) */
....@@ -1345,7 +1371,7 @@
13451371 }
13461372
13471373 hrtimer_forward(&ts->sched_timer, now, tick_period);
1348
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
1374
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
13491375 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
13501376 }
13511377 #endif /* HIGH_RES_TIMERS */
....@@ -1412,9 +1438,3 @@
14121438 tick_nohz_switch_to_nohz();
14131439 return 0;
14141440 }
1415
-
1416
-ktime_t *get_next_event_cpu(unsigned int cpu)
1417
-{
1418
- return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event);
1419
-}
1420
-EXPORT_SYMBOL_GPL(get_next_event_cpu);