From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/kernel/time/tick-sched.c | 305 ++++++++++++++++++++++++++++++++------------------
1 files changed, 195 insertions(+), 110 deletions(-)
diff --git a/kernel/kernel/time/tick-sched.c b/kernel/kernel/time/tick-sched.c
index 28e4f21..3b9c648 100644
--- a/kernel/kernel/time/tick-sched.c
+++ b/kernel/kernel/time/tick-sched.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/tick-sched.c
- *
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
@@ -8,8 +7,6 @@
* No idle tick implementation for low and high resolution timers
*
* Started by: Thomas Gleixner and Ingo Molnar
- *
- * Distribute under GPLv2.
*/
#include <linux/cpu.h>
#include <linux/err.h>
@@ -26,9 +23,9 @@
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
-#include <linux/timer.h>
#include <linux/context_tracking.h>
#include <linux/mm.h>
+#include <trace/hooks/sched.h>
#include <asm/irq_regs.h>
@@ -57,49 +54,67 @@
*/
static void tick_do_update_jiffies64(ktime_t now)
{
- unsigned long ticks = 0;
+ unsigned long ticks = 1;
ktime_t delta;
/*
- * Do a quick check without holding jiffies_lock:
- * The READ_ONCE() pairs with two updates done later in this function.
+ * Do a quick check without holding jiffies_lock. The READ_ONCE()
+ * pairs with the update done later in this function.
+ *
+ * This is also an intentional data race which is even safe on
+ * 32bit in theory. If there is a concurrent update then the check
+ * might give a random answer. It does not matter because if it
+ * returns then the concurrent update is already taking care, if it
+ * falls through then it will pointlessly contend on jiffies_lock.
+ *
+ * Though there is one nasty case on 32bit due to store tearing of
+ * the 64bit value. If the first 32bit store makes the quick check
+ * return on all other CPUs and the writing CPU context gets
+ * delayed to complete the second store (scheduled out on virt)
+ * then jiffies can become stale for up to ~2^32 nanoseconds
+ * without noticing. After that point all CPUs will wait for
+ * jiffies lock.
+ *
+ * OTOH, this is not any different than the situation with NOHZ=off
+ * where one CPU is responsible for updating jiffies and
+ * timekeeping. If that CPU goes out for lunch then all other CPUs
+ * will operate on stale jiffies until it decides to come back.
*/
- delta = ktime_sub(now, READ_ONCE(last_jiffies_update));
- if (delta < tick_period)
+ if (ktime_before(now, READ_ONCE(tick_next_period)))
return;
/* Reevaluate with jiffies_lock held */
raw_spin_lock(&jiffies_lock);
- write_seqcount_begin(&jiffies_seq);
-
- delta = ktime_sub(now, last_jiffies_update);
- if (delta >= tick_period) {
-
- delta = ktime_sub(delta, tick_period);
- /* Pairs with the lockless read in this function. */
- WRITE_ONCE(last_jiffies_update,
- ktime_add(last_jiffies_update, tick_period));
-
- /* Slow path for long timeouts */
- if (unlikely(delta >= tick_period)) {
- s64 incr = ktime_to_ns(tick_period);
-
- ticks = ktime_divns(delta, incr);
-
- /* Pairs with the lockless read in this function. */
- WRITE_ONCE(last_jiffies_update,
- ktime_add_ns(last_jiffies_update,
- incr * ticks));
- }
- do_timer(++ticks);
-
- /* Keep the tick_next_period variable up to date */
- tick_next_period = ktime_add(last_jiffies_update, tick_period);
- } else {
- write_seqcount_end(&jiffies_seq);
+ if (ktime_before(now, tick_next_period)) {
raw_spin_unlock(&jiffies_lock);
return;
}
+
+ write_seqcount_begin(&jiffies_seq);
+
+ delta = ktime_sub(now, tick_next_period);
+ if (unlikely(delta >= TICK_NSEC)) {
+ /* Slow path for long idle sleep times */
+ s64 incr = TICK_NSEC;
+
+ ticks += ktime_divns(delta, incr);
+
+ last_jiffies_update = ktime_add_ns(last_jiffies_update,
+ incr * ticks);
+ } else {
+ last_jiffies_update = ktime_add_ns(last_jiffies_update,
+ TICK_NSEC);
+ }
+
+ do_timer(ticks);
+
+ /*
+ * Keep the tick_next_period variable up to date. WRITE_ONCE()
+ * pairs with the READ_ONCE() in the lockless quick check above.
+ */
+ WRITE_ONCE(tick_next_period,
+ ktime_add_ns(last_jiffies_update, TICK_NSEC));
+
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
update_wall_time();
@@ -115,13 +130,26 @@
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
/* Did we start the jiffies update yet ? */
- if (last_jiffies_update == 0)
+ if (last_jiffies_update == 0) {
+ u32 rem;
+
+ /*
+ * Ensure that the tick is aligned to a multiple of
+ * TICK_NSEC.
+ */
+ div_u64_rem(tick_next_period, TICK_NSEC, &rem);
+ if (rem)
+ tick_next_period += TICK_NSEC - rem;
+
last_jiffies_update = tick_next_period;
+ }
period = last_jiffies_update;
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
return period;
}
+
+#define MAX_STALLED_JIFFIES 5
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
@@ -134,15 +162,38 @@
* into a long sleep. If two CPUs happen to assign themselves to
* this duty, then the jiffies update is still serialized by
* jiffies_lock.
+ *
+ * If nohz_full is enabled, this should not happen because the
+ * tick_do_timer_cpu never relinquishes.
*/
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
- && !tick_nohz_full_cpu(cpu))
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+#ifdef CONFIG_NO_HZ_FULL
+ WARN_ON_ONCE(tick_nohz_full_running);
+#endif
tick_do_timer_cpu = cpu;
+ }
#endif
/* Check, if the jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_do_timer_cpu == cpu) {
tick_do_update_jiffies64(now);
+ trace_android_vh_jiffies_update(NULL);
+ }
+
+ /*
+ * If jiffies update stalled for too long (timekeeper in stop_machine()
+ * or VMEXIT'ed for several msecs), force an update.
+ */
+ if (ts->last_tick_jiffies != jiffies) {
+ ts->stalled_jiffies = 0;
+ ts->last_tick_jiffies = READ_ONCE(jiffies);
+ } else {
+ if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
+ tick_do_update_jiffies64(now);
+ ts->stalled_jiffies = 0;
+ ts->last_tick_jiffies = READ_ONCE(jiffies);
+ }
+ }
if (ts->inidle)
ts->got_idle_tick = 1;
@@ -179,6 +230,7 @@
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
bool tick_nohz_full_running;
+EXPORT_SYMBOL_GPL(tick_nohz_full_running);
static atomic_t tick_dep_mask;
static bool check_tick_dependency(atomic_t *dep)
@@ -202,6 +254,16 @@
if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+ return true;
+ }
+
+ if (val & TICK_DEP_MASK_RCU) {
+ trace_tick_stop(0, TICK_DEP_MASK_RCU);
+ return true;
+ }
+
+ if (val & TICK_DEP_MASK_RCU_EXP) {
+ trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP);
return true;
}
@@ -237,7 +299,7 @@
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_func,
- .flags = IRQ_WORK_HARD_IRQ,
+ .flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ),
};
/*
@@ -332,6 +394,7 @@
preempt_enable();
}
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
@@ -339,24 +402,35 @@
atomic_andnot(BIT(bit), &ts->tick_dep_mask);
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
/*
- * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
- * per task timers.
+ * Set a per-task tick dependency. RCU need this. Also posix CPU timers
+ * in order to elapse per task timers.
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
- /*
- * We could optimize this with just kicking the target running the task
- * if that noise matters for nohz full users.
- */
- tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
+ if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
+ if (tsk == current) {
+ preempt_disable();
+ tick_nohz_full_kick();
+ preempt_enable();
+ } else {
+ /*
+ * Some future tick_nohz_full_kick_task()
+ * should optimize this.
+ */
+ tick_nohz_full_kick_all();
+ }
+ }
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
/*
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
@@ -406,16 +480,21 @@
tick_nohz_full_running = true;
}
-static int tick_nohz_cpu_down(unsigned int cpu)
+bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
{
/*
- * The boot CPU handles housekeeping duty (unbound timers,
- * workqueues, timekeeping, ...) on behalf of full dynticks
+ * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
+ * timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
- return -EBUSY;
- return 0;
+ return false;
+ return true;
+}
+
+static int tick_nohz_cpu_down(unsigned int cpu)
+{
+ return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
}
void __init tick_nohz_init(void)
@@ -437,12 +516,15 @@
return;
}
- cpu = smp_processor_id();
+ if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
+ !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
+ cpu = smp_processor_id();
- if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
- pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
- cpu);
- cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+ if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
+ pr_warn("NO_HZ: Clearing %d from nohz_full range "
+ "for timekeeping\n", cpu);
+ cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+ }
}
for_each_cpu(cpu, tick_nohz_full_mask)
@@ -637,12 +719,14 @@
hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
/* Forward the time to expire in the future */
- hrtimer_forward(&ts->sched_timer, now, tick_period);
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
- else
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ hrtimer_start_expires(&ts->sched_timer,
+ HRTIMER_MODE_ABS_PINNED_HARD);
+ } else {
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+ }
/*
* Reset to make sure next tick stop doesn't get fooled by past
@@ -659,7 +743,8 @@
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
- unsigned long seq, basejiff;
+ unsigned long basejiff;
+ unsigned int seq;
/* Read jiffies and the time when jiffies were updated last */
do {
@@ -786,7 +871,6 @@
*/
if (!ts->tick_stopped) {
calc_load_nohz_start();
- cpu_load_update_nohz_start();
quiet_vmstat();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
@@ -803,11 +887,14 @@
if (unlikely(expires == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
+ else
+ tick_program_event(KTIME_MAX, 1);
return;
}
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
- hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start(&ts->sched_timer, tick,
+ HRTIMER_MODE_ABS_PINNED_HARD);
} else {
hrtimer_set_expires(&ts->sched_timer, tick);
tick_program_event(tick, 1);
@@ -833,7 +920,6 @@
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
- cpu_load_update_nohz_stop();
/*
* Clear the timer idle flag, so we avoid IPIs on remote queueing and
@@ -896,8 +982,15 @@
if (need_resched())
return false;
- if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
- softirq_check_pending_idle();
+ if (unlikely(local_softirq_pending())) {
+ static int ratelimit;
+
+ if (ratelimit < 10 &&
+ (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
+ pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
+ (unsigned int) local_softirq_pending());
+ ratelimit++;
+ }
return false;
}
@@ -908,11 +1001,9 @@
*/
if (tick_do_timer_cpu == cpu)
return false;
- /*
- * Boot safety: make sure the timekeeping duty has been
- * assigned before entering dyntick-idle mode,
- */
- if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+
+ /* Should not happen for nohz-full */
+ if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
return false;
}
@@ -1030,6 +1121,18 @@
}
/**
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
+ * or the tick, whatever that expires first. Note that, if the tick has been
+ * stopped, it returns the next hrtimer.
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_next_hrtimer(void)
+{
+ return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
+}
+
+/**
* tick_nohz_get_sleep_length - return the expected length of the current sleep
* @delta_next: duration until the next event if the tick cannot be stopped
*
@@ -1081,6 +1184,7 @@
return ts->idle_calls;
}
+EXPORT_SYMBOL_GPL(tick_nohz_get_idle_calls_cpu);
/**
* tick_nohz_get_idle_calls - return the current idle calls counter value
@@ -1099,7 +1203,7 @@
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
- if (vtime_accounting_cpu_enabled())
+ if (vtime_accounting_enabled_this_cpu())
return;
/*
* We stopped the tick in idle. Update process times would miss the
@@ -1177,11 +1281,17 @@
tick_sched_do_timer(ts, now);
tick_sched_handle(ts, regs);
- /* No need to reprogram if we are running tickless */
- if (unlikely(ts->tick_stopped))
+ if (unlikely(ts->tick_stopped)) {
+ /*
+ * The clockevent device is not reprogrammed, so change the
+ * clock event device to ONESHOT_STOPPED to avoid spurious
+ * interrupts on devices which might not be truly one shot.
+ */
+ tick_program_event(KTIME_MAX, 1);
return;
+ }
- hrtimer_forward(&ts->sched_timer, now, tick_period);
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
@@ -1213,12 +1323,12 @@
* Recycle the hrtimer in ts, so we can share the
* hrtimer_forward with the highres code.
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
/* Get the next period */
next = tick_init_jiffy_update();
hrtimer_set_expires(&ts->sched_timer, next);
- hrtimer_forward_now(&ts->sched_timer, tick_period);
+ hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
}
@@ -1258,18 +1368,6 @@
* High resolution timer specific code
*/
#ifdef CONFIG_HIGH_RES_TIMERS
-
-static void (*wake_callback)(void);
-
-void register_tick_sched_wakeup_callback(void (*cb)(void))
-{
- if (!wake_callback)
- wake_callback = cb;
- else
- pr_warn("tick-sched wake cb already exists; skipping.\n");
-}
-EXPORT_SYMBOL_GPL(register_tick_sched_wakeup_callback);
-
/*
* We rearm the timer until we get disabled by the idle code.
* Called with interrupts disabled.
@@ -1287,15 +1385,8 @@
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
- if (regs) {
+ if (regs)
tick_sched_handle(ts, regs);
- if (wake_callback && tick_do_timer_cpu == smp_processor_id()) {
- /*
- * wakeup user if needed
- */
- wake_callback();
- }
- }
else
ts->next_tick = 0;
@@ -1303,7 +1394,7 @@
if (unlikely(ts->tick_stopped))
return HRTIMER_NORESTART;
- hrtimer_forward(timer, now, tick_period);
+ hrtimer_forward(timer, now, TICK_NSEC);
return HRTIMER_RESTART;
}
@@ -1337,14 +1428,14 @@
/* Offset the tick to avert jiffies_lock contention. */
if (sched_skew_tick) {
- u64 offset = ktime_to_ns(tick_period) >> 1;
+ u64 offset = TICK_NSEC >> 1;
do_div(offset, num_possible_cpus());
offset *= smp_processor_id();
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}
- hrtimer_forward(&ts->sched_timer, now, tick_period);
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
}
#endif /* HIGH_RES_TIMERS */
@@ -1411,9 +1502,3 @@
tick_nohz_switch_to_nohz();
return 0;
}
-
-ktime_t *get_next_event_cpu(unsigned int cpu)
-{
- return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event);
-}
-EXPORT_SYMBOL_GPL(get_next_event_cpu);
--
Gitblit v1.6.2