hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/kernel/time/tick-sched.c
....@@ -54,49 +54,67 @@
5454 */
5555 static void tick_do_update_jiffies64(ktime_t now)
5656 {
57
- unsigned long ticks = 0;
57
+ unsigned long ticks = 1;
5858 ktime_t delta;
5959
6060 /*
61
- * Do a quick check without holding jiffies_lock:
62
- * The READ_ONCE() pairs with two updates done later in this function.
61
+ * Do a quick check without holding jiffies_lock. The READ_ONCE()
62
+ * pairs with the update done later in this function.
63
+ *
64
+ * This is also an intentional data race which is even safe on
65
+ * 32bit in theory. If there is a concurrent update then the check
66
+ * might give a random answer. It does not matter because if it
67
+ * returns then the concurrent update is already taking care, if it
68
+ * falls through then it will pointlessly contend on jiffies_lock.
69
+ *
70
+ * Though there is one nasty case on 32bit due to store tearing of
71
+ * the 64bit value. If the first 32bit store makes the quick check
72
+ * return on all other CPUs and the writing CPU context gets
73
+ * delayed to complete the second store (scheduled out on virt)
74
+ * then jiffies can become stale for up to ~2^32 nanoseconds
75
+ * without noticing. After that point all CPUs will wait for
76
+ * jiffies lock.
77
+ *
78
+ * OTOH, this is not any different than the situation with NOHZ=off
79
+ * where one CPU is responsible for updating jiffies and
80
+ * timekeeping. If that CPU goes out for lunch then all other CPUs
81
+ * will operate on stale jiffies until it decides to come back.
6382 */
64
- delta = ktime_sub(now, READ_ONCE(last_jiffies_update));
65
- if (delta < tick_period)
83
+ if (ktime_before(now, READ_ONCE(tick_next_period)))
6684 return;
6785
6886 /* Reevaluate with jiffies_lock held */
6987 raw_spin_lock(&jiffies_lock);
70
- write_seqcount_begin(&jiffies_seq);
71
-
72
- delta = ktime_sub(now, last_jiffies_update);
73
- if (delta >= tick_period) {
74
-
75
- delta = ktime_sub(delta, tick_period);
76
- /* Pairs with the lockless read in this function. */
77
- WRITE_ONCE(last_jiffies_update,
78
- ktime_add(last_jiffies_update, tick_period));
79
-
80
- /* Slow path for long timeouts */
81
- if (unlikely(delta >= tick_period)) {
82
- s64 incr = ktime_to_ns(tick_period);
83
-
84
- ticks = ktime_divns(delta, incr);
85
-
86
- /* Pairs with the lockless read in this function. */
87
- WRITE_ONCE(last_jiffies_update,
88
- ktime_add_ns(last_jiffies_update,
89
- incr * ticks));
90
- }
91
- do_timer(++ticks);
92
-
93
- /* Keep the tick_next_period variable up to date */
94
- tick_next_period = ktime_add(last_jiffies_update, tick_period);
95
- } else {
96
- write_seqcount_end(&jiffies_seq);
88
+ if (ktime_before(now, tick_next_period)) {
9789 raw_spin_unlock(&jiffies_lock);
9890 return;
9991 }
92
+
93
+ write_seqcount_begin(&jiffies_seq);
94
+
95
+ delta = ktime_sub(now, tick_next_period);
96
+ if (unlikely(delta >= TICK_NSEC)) {
97
+ /* Slow path for long idle sleep times */
98
+ s64 incr = TICK_NSEC;
99
+
100
+ ticks += ktime_divns(delta, incr);
101
+
102
+ last_jiffies_update = ktime_add_ns(last_jiffies_update,
103
+ incr * ticks);
104
+ } else {
105
+ last_jiffies_update = ktime_add_ns(last_jiffies_update,
106
+ TICK_NSEC);
107
+ }
108
+
109
+ do_timer(ticks);
110
+
111
+ /*
112
+ * Keep the tick_next_period variable up to date. WRITE_ONCE()
113
+ * pairs with the READ_ONCE() in the lockless quick check above.
114
+ */
115
+ WRITE_ONCE(tick_next_period,
116
+ ktime_add_ns(last_jiffies_update, TICK_NSEC));
117
+
100118 write_seqcount_end(&jiffies_seq);
101119 raw_spin_unlock(&jiffies_lock);
102120 update_wall_time();
....@@ -112,13 +130,26 @@
112130 raw_spin_lock(&jiffies_lock);
113131 write_seqcount_begin(&jiffies_seq);
114132 /* Did we start the jiffies update yet ? */
115
- if (last_jiffies_update == 0)
133
+ if (last_jiffies_update == 0) {
134
+ u32 rem;
135
+
136
+ /*
137
+ * Ensure that the tick is aligned to a multiple of
138
+ * TICK_NSEC.
139
+ */
140
+ div_u64_rem(tick_next_period, TICK_NSEC, &rem);
141
+ if (rem)
142
+ tick_next_period += TICK_NSEC - rem;
143
+
116144 last_jiffies_update = tick_next_period;
145
+ }
117146 period = last_jiffies_update;
118147 write_seqcount_end(&jiffies_seq);
119148 raw_spin_unlock(&jiffies_lock);
120149 return period;
121150 }
151
+
152
+#define MAX_STALLED_JIFFIES 5
122153
123154 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
124155 {
....@@ -147,6 +178,21 @@
147178 if (tick_do_timer_cpu == cpu) {
148179 tick_do_update_jiffies64(now);
149180 trace_android_vh_jiffies_update(NULL);
181
+ }
182
+
183
+ /*
184
+ * If jiffies update stalled for too long (timekeeper in stop_machine()
185
+ * or VMEXIT'ed for several msecs), force an update.
186
+ */
187
+ if (ts->last_tick_jiffies != jiffies) {
188
+ ts->stalled_jiffies = 0;
189
+ ts->last_tick_jiffies = READ_ONCE(jiffies);
190
+ } else {
191
+ if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
192
+ tick_do_update_jiffies64(now);
193
+ ts->stalled_jiffies = 0;
194
+ ts->last_tick_jiffies = READ_ONCE(jiffies);
195
+ }
150196 }
151197
152198 if (ts->inidle)
....@@ -213,6 +259,11 @@
213259
214260 if (val & TICK_DEP_MASK_RCU) {
215261 trace_tick_stop(0, TICK_DEP_MASK_RCU);
262
+ return true;
263
+ }
264
+
265
+ if (val & TICK_DEP_MASK_RCU_EXP) {
266
+ trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP);
216267 return true;
217268 }
218269
....@@ -429,7 +480,7 @@
429480 tick_nohz_full_running = true;
430481 }
431482
432
-static int tick_nohz_cpu_down(unsigned int cpu)
483
+bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
433484 {
434485 /*
435486 * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
....@@ -437,8 +488,13 @@
437488 * CPUs. It must remain online when nohz full is enabled.
438489 */
439490 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
440
- return -EBUSY;
441
- return 0;
491
+ return false;
492
+ return true;
493
+}
494
+
495
+static int tick_nohz_cpu_down(unsigned int cpu)
496
+{
497
+ return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
442498 }
443499
444500 void __init tick_nohz_init(void)
....@@ -663,7 +719,7 @@
663719 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
664720
665721 /* Forward the time to expire in the future */
666
- hrtimer_forward(&ts->sched_timer, now, tick_period);
722
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
667723
668724 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
669725 hrtimer_start_expires(&ts->sched_timer,
....@@ -831,6 +887,8 @@
831887 if (unlikely(expires == KTIME_MAX)) {
832888 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
833889 hrtimer_cancel(&ts->sched_timer);
890
+ else
891
+ tick_program_event(KTIME_MAX, 1);
834892 return;
835893 }
836894
....@@ -927,7 +985,7 @@
927985 if (unlikely(local_softirq_pending())) {
928986 static int ratelimit;
929987
930
- if (ratelimit < 10 && !local_bh_blocked() &&
988
+ if (ratelimit < 10 &&
931989 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
932990 pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
933991 (unsigned int) local_softirq_pending());
....@@ -1223,11 +1281,17 @@
12231281 tick_sched_do_timer(ts, now);
12241282 tick_sched_handle(ts, regs);
12251283
1226
- /* No need to reprogram if we are running tickless */
1227
- if (unlikely(ts->tick_stopped))
1284
+ if (unlikely(ts->tick_stopped)) {
1285
+ /*
1286
+ * The clockevent device is not reprogrammed, so change the
1287
+ * clock event device to ONESHOT_STOPPED to avoid spurious
1288
+ * interrupts on devices which might not be truly one shot.
1289
+ */
1290
+ tick_program_event(KTIME_MAX, 1);
12281291 return;
1292
+ }
12291293
1230
- hrtimer_forward(&ts->sched_timer, now, tick_period);
1294
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
12311295 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
12321296 }
12331297
....@@ -1264,7 +1328,7 @@
12641328 next = tick_init_jiffy_update();
12651329
12661330 hrtimer_set_expires(&ts->sched_timer, next);
1267
- hrtimer_forward_now(&ts->sched_timer, tick_period);
1331
+ hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
12681332 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
12691333 tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
12701334 }
....@@ -1330,7 +1394,7 @@
13301394 if (unlikely(ts->tick_stopped))
13311395 return HRTIMER_NORESTART;
13321396
1333
- hrtimer_forward(timer, now, tick_period);
1397
+ hrtimer_forward(timer, now, TICK_NSEC);
13341398
13351399 return HRTIMER_RESTART;
13361400 }
....@@ -1364,13 +1428,13 @@
13641428
13651429 /* Offset the tick to avert jiffies_lock contention. */
13661430 if (sched_skew_tick) {
1367
- u64 offset = ktime_to_ns(tick_period) >> 1;
1431
+ u64 offset = TICK_NSEC >> 1;
13681432 do_div(offset, num_possible_cpus());
13691433 offset *= smp_processor_id();
13701434 hrtimer_add_expires_ns(&ts->sched_timer, offset);
13711435 }
13721436
1373
- hrtimer_forward(&ts->sched_timer, now, tick_period);
1437
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
13741438 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
13751439 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
13761440 }