From a5969cabbb4660eab42b6ef0412cbbd1200cf14d Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 12 Oct 2024 07:10:09 +0000
Subject: [PATCH] 修改led为gpio
---
kernel/kernel/time/timer.c | 557 ++++++++++++++++++++++++++++++-------------------------
1 files changed, 301 insertions(+), 256 deletions(-)
diff --git a/kernel/kernel/time/timer.c b/kernel/kernel/time/timer.c
index 3e2c0bd..f5147bd 100644
--- a/kernel/kernel/time/timer.c
+++ b/kernel/kernel/time/timer.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/timer.c
- *
* Kernel internal timers
*
* Copyright (C) 1991, 1992 Linus Torvalds
@@ -56,6 +55,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/timer.h>
+#undef CREATE_TRACE_POINTS
+#include <trace/hooks/timer.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_entry);
+EXPORT_TRACEPOINT_SYMBOL_GPL(hrtimer_expire_exit);
__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
@@ -158,7 +162,8 @@
/*
* The time start value for each level to select the bucket at enqueue
- * time.
+ * time. We start from the last possible delta of the previous level
+ * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()).
*/
#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
@@ -198,12 +203,16 @@
struct timer_base {
raw_spinlock_t lock;
struct timer_list *running_timer;
+#ifdef CONFIG_PREEMPT_RT
spinlock_t expiry_lock;
+ atomic_t timer_waiters;
+#endif
unsigned long clk;
unsigned long next_expiry;
unsigned int cpu;
+ bool next_expiry_recalc;
bool is_idle;
- bool must_forward_clk;
+ bool timers_pending;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -215,7 +224,8 @@
static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
static DEFINE_MUTEX(timer_keys_mutex);
-static struct swork_event timer_update_swork;
+static void timer_update_keys(struct work_struct *work);
+static DECLARE_WORK(timer_update_work, timer_update_keys);
#ifdef CONFIG_SMP
unsigned int sysctl_timer_migration = 1;
@@ -233,7 +243,7 @@
static inline void timers_update_migration(void) { }
#endif /* !CONFIG_SMP */
-static void timer_update_keys(struct swork_event *event)
+static void timer_update_keys(struct work_struct *work)
{
mutex_lock(&timer_keys_mutex);
timers_update_migration();
@@ -243,20 +253,11 @@
void timers_update_nohz(void)
{
- swork_queue(&timer_update_swork);
+ schedule_work(&timer_update_work);
}
-
-static __init int hrtimer_init_thread(void)
-{
- WARN_ON(swork_get());
- INIT_SWORK(&timer_update_swork, timer_update_keys);
- return 0;
-}
-early_initcall(hrtimer_init_thread);
int timer_migration_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -494,35 +495,49 @@
* Helper function to calculate the array index for a given expiry
* time.
*/
-static inline unsigned calc_index(unsigned expires, unsigned lvl)
+static inline unsigned calc_index(unsigned long expires, unsigned lvl,
+ unsigned long *bucket_expiry)
{
+
+ /*
+ * The timer wheel has to guarantee that a timer does not fire
+ * early. Early expiry can happen due to:
+ * - Timer is armed at the edge of a tick
+ * - Truncation of the expiry time in the outer wheel levels
+ *
+ * Round up with level granularity to prevent this.
+ */
+ trace_android_vh_timer_calc_index(lvl, &expires);
expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
+ *bucket_expiry = expires << LVL_SHIFT(lvl);
return LVL_OFFS(lvl) + (expires & LVL_MASK);
}
-static int calc_wheel_index(unsigned long expires, unsigned long clk)
+static int calc_wheel_index(unsigned long expires, unsigned long clk,
+ unsigned long *bucket_expiry)
{
unsigned long delta = expires - clk;
unsigned int idx;
if (delta < LVL_START(1)) {
- idx = calc_index(expires, 0);
+ idx = calc_index(expires, 0, bucket_expiry);
} else if (delta < LVL_START(2)) {
- idx = calc_index(expires, 1);
+ idx = calc_index(expires, 1, bucket_expiry);
} else if (delta < LVL_START(3)) {
- idx = calc_index(expires, 2);
+ idx = calc_index(expires, 2, bucket_expiry);
} else if (delta < LVL_START(4)) {
- idx = calc_index(expires, 3);
+ idx = calc_index(expires, 3, bucket_expiry);
} else if (delta < LVL_START(5)) {
- idx = calc_index(expires, 4);
+ idx = calc_index(expires, 4, bucket_expiry);
} else if (delta < LVL_START(6)) {
- idx = calc_index(expires, 5);
+ idx = calc_index(expires, 5, bucket_expiry);
} else if (delta < LVL_START(7)) {
- idx = calc_index(expires, 6);
+ idx = calc_index(expires, 6, bucket_expiry);
} else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
- idx = calc_index(expires, 7);
+ idx = calc_index(expires, 7, bucket_expiry);
} else if ((long) delta < 0) {
idx = clk & LVL_MASK;
+ *bucket_expiry = clk;
} else {
/*
* Force expire obscene large timeouts to expire at the
@@ -531,30 +546,9 @@
if (delta >= WHEEL_TIMEOUT_CUTOFF)
expires = clk + WHEEL_TIMEOUT_MAX;
- idx = calc_index(expires, LVL_DEPTH - 1);
+ idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
}
return idx;
-}
-
-/*
- * Enqueue the timer into the hash bucket, mark it pending in
- * the bitmap and store the index in the timer flags.
- */
-static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
- unsigned int idx)
-{
- hlist_add_head(&timer->entry, base->vectors + idx);
- __set_bit(idx, base->pending_map);
- timer_set_idx(timer, idx);
-}
-
-static void
-__internal_add_timer(struct timer_base *base, struct timer_list *timer)
-{
- unsigned int idx;
-
- idx = calc_wheel_index(timer->expires, base->clk);
- enqueue_timer(base, timer, idx);
}
static void
@@ -578,39 +572,54 @@
* timer is not deferrable. If the other CPU is on the way to idle
* then it can't set base->is_idle as we hold the base lock:
*/
- if (!base->is_idle)
- return;
-
- /* Check whether this is the new first expiring timer: */
- if (time_after_eq(timer->expires, base->next_expiry))
- return;
-
- /*
- * Set the next expiry time and kick the CPU so it can reevaluate the
- * wheel:
- */
- if (time_before(timer->expires, base->clk)) {
- /*
- * Prevent from forward_timer_base() moving the base->clk
- * backward
- */
- base->next_expiry = base->clk;
- } else {
- base->next_expiry = timer->expires;
- }
- wake_up_nohz_cpu(base->cpu);
+ if (base->is_idle)
+ wake_up_nohz_cpu(base->cpu);
}
-static void
-internal_add_timer(struct timer_base *base, struct timer_list *timer)
+/*
+ * Enqueue the timer into the hash bucket, mark it pending in
+ * the bitmap, store the index in the timer flags then wake up
+ * the target CPU if needed.
+ */
+static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
+ unsigned int idx, unsigned long bucket_expiry)
{
- __internal_add_timer(base, timer);
- trigger_dyntick_cpu(base, timer);
+
+ hlist_add_head(&timer->entry, base->vectors + idx);
+ __set_bit(idx, base->pending_map);
+ timer_set_idx(timer, idx);
+
+ trace_timer_start(timer, timer->expires, timer->flags);
+
+ /*
+ * Check whether this is the new first expiring timer. The
+ * effective expiry time of the timer is required here
+ * (bucket_expiry) instead of timer->expires.
+ */
+ if (time_before(bucket_expiry, base->next_expiry)) {
+ /*
+ * Set the next expiry time and kick the CPU so it
+ * can reevaluate the wheel:
+ */
+ base->next_expiry = bucket_expiry;
+ base->timers_pending = true;
+ base->next_expiry_recalc = false;
+ trigger_dyntick_cpu(base, timer);
+ }
+}
+
+static void internal_add_timer(struct timer_base *base, struct timer_list *timer)
+{
+ unsigned long bucket_expiry;
+ unsigned int idx;
+
+ idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
+ enqueue_timer(base, timer, idx, bucket_expiry);
}
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-static struct debug_obj_descr timer_debug_descr;
+static const struct debug_obj_descr timer_debug_descr;
static void *timer_debug_hint(void *addr)
{
@@ -665,7 +674,7 @@
case ODEBUG_STATE_ACTIVE:
WARN_ON(1);
-
+ fallthrough;
default:
return false;
}
@@ -706,7 +715,7 @@
}
}
-static struct debug_obj_descr timer_debug_descr = {
+static const struct debug_obj_descr timer_debug_descr = {
.name = "timer_list",
.debug_hint = timer_debug_hint,
.is_static_object = timer_is_static_object,
@@ -729,11 +738,6 @@
static inline void debug_timer_deactivate(struct timer_list *timer)
{
debug_object_deactivate(timer, &timer_debug_descr);
-}
-
-static inline void debug_timer_free(struct timer_list *timer)
-{
- debug_object_free(timer, &timer_debug_descr);
}
static inline void debug_timer_assert_init(struct timer_list *timer)
@@ -775,13 +779,6 @@
trace_timer_init(timer);
}
-static inline void
-debug_activate(struct timer_list *timer, unsigned long expires)
-{
- debug_timer_activate(timer);
- trace_timer_start(timer, expires, timer->flags);
-}
-
static inline void debug_deactivate(struct timer_list *timer)
{
debug_timer_deactivate(timer);
@@ -800,6 +797,8 @@
{
timer->entry.pprev = NULL;
timer->function = func;
+ if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS))
+ flags &= TIMER_INIT_FLAGS;
timer->flags = flags | raw_smp_processor_id();
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}
@@ -845,8 +844,10 @@
if (!timer_pending(timer))
return 0;
- if (hlist_is_singular_node(&timer->entry, base->vectors + idx))
+ if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {
__clear_bit(idx, base->pending_map);
+ base->next_expiry_recalc = true;
+ }
detach_timer(timer, clear_pending);
return 1;
@@ -896,20 +897,14 @@
static inline void forward_timer_base(struct timer_base *base)
{
-#ifdef CONFIG_NO_HZ_COMMON
- unsigned long jnow;
+ unsigned long jnow = READ_ONCE(jiffies);
/*
- * We only forward the base when we are idle or have just come out of
- * idle (must_forward_clk logic), and have a delta between base clock
- * and jiffies. In the common case, run_timers will take care of it.
+ * No need to forward if we are close enough below jiffies.
+ * Also while executing timers, base->clk is 1 offset ahead
+ * of jiffies to avoid endless requeuing to current jffies.
*/
- if (likely(!base->must_forward_clk))
- return;
-
- jnow = READ_ONCE(jiffies);
- base->must_forward_clk = base->is_idle;
- if ((long)(jnow - base->clk) < 2)
+ if ((long)(jnow - base->clk) < 1)
return;
/*
@@ -923,7 +918,6 @@
return;
base->clk = base->next_expiry;
}
-#endif
}
@@ -966,13 +960,14 @@
#define MOD_TIMER_PENDING_ONLY 0x01
#define MOD_TIMER_REDUCE 0x02
+#define MOD_TIMER_NOTPENDING 0x04
static inline int
__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
{
+ unsigned long clk = 0, flags, bucket_expiry;
struct timer_base *base, *new_base;
unsigned int idx = UINT_MAX;
- unsigned long clk = 0, flags;
int ret = 0;
BUG_ON(!timer->function);
@@ -982,7 +977,7 @@
* the timer is re-modified to have the same timeout or ends up in the
* same array bucket then just return:
*/
- if (timer_pending(timer)) {
+ if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {
/*
* The downside of this optimization is that it can result in
* larger granularity than you would get from adding a new
@@ -1011,7 +1006,7 @@
}
clk = base->clk;
- idx = calc_wheel_index(expires, clk);
+ idx = calc_wheel_index(expires, clk, &bucket_expiry);
/*
* Retrieve and compare the array index of the pending
@@ -1058,22 +1053,19 @@
}
}
- debug_activate(timer, expires);
+ debug_timer_activate(timer);
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
* between calculating 'idx' and possibly switching the base, only
- * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
- * we need to (re)calculate the wheel index via
- * internal_add_timer().
+ * enqueue_timer() is required. Otherwise we need to (re)calculate
+ * the wheel index via internal_add_timer().
*/
- if (idx != UINT_MAX && clk == base->clk) {
- enqueue_timer(base, timer, idx);
- trigger_dyntick_cpu(base, timer);
- } else {
+ if (idx != UINT_MAX && clk == base->clk)
+ enqueue_timer(base, timer, idx, bucket_expiry);
+ else
internal_add_timer(base, timer);
- }
out_unlock:
raw_spin_unlock_irqrestore(&base->lock, flags);
@@ -1155,7 +1147,7 @@
void add_timer(struct timer_list *timer)
{
BUG_ON(timer_pending(timer));
- mod_timer(timer, timer->expires);
+ __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
}
EXPORT_SYMBOL(add_timer);
@@ -1192,7 +1184,7 @@
}
forward_timer_base(base);
- debug_activate(timer, timer->expires);
+ debug_timer_activate(timer);
internal_add_timer(base, timer);
raw_spin_unlock_irqrestore(&base->lock, flags);
}
@@ -1227,25 +1219,6 @@
}
EXPORT_SYMBOL(del_timer);
-static int __try_to_del_timer_sync(struct timer_list *timer,
- struct timer_base **basep)
-{
- struct timer_base *base;
- unsigned long flags;
- int ret = -1;
-
- debug_assert_init(timer);
-
- *basep = base = lock_timer_base(timer, &flags);
-
- if (base->running_timer != timer)
- ret = detach_if_pending(timer, base, true);
-
- raw_spin_unlock_irqrestore(&base->lock, flags);
-
- return ret;
-}
-
/**
* try_to_del_timer_sync - Try to deactivate a timer
* @timer: timer to delete
@@ -1256,31 +1229,96 @@
int try_to_del_timer_sync(struct timer_list *timer)
{
struct timer_base *base;
+ unsigned long flags;
+ int ret = -1;
- return __try_to_del_timer_sync(timer, &base);
+ debug_assert_init(timer);
+
+ base = lock_timer_base(timer, &flags);
+
+ if (base->running_timer != timer)
+ ret = detach_if_pending(timer, base, true);
+
+ raw_spin_unlock_irqrestore(&base->lock, flags);
+
+ return ret;
}
EXPORT_SYMBOL(try_to_del_timer_sync);
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
-static int __del_timer_sync(struct timer_list *timer)
+#ifdef CONFIG_PREEMPT_RT
+static __init void timer_base_init_expiry_lock(struct timer_base *base)
{
- struct timer_base *base;
- int ret;
+ spin_lock_init(&base->expiry_lock);
+}
- for (;;) {
- ret = __try_to_del_timer_sync(timer, &base);
- if (ret >= 0)
- return ret;
+static inline void timer_base_lock_expiry(struct timer_base *base)
+{
+ spin_lock(&base->expiry_lock);
+}
- /*
- * When accessing the lock, timers of base are no longer expired
- * and so timer is no longer running.
- */
- spin_lock(&base->expiry_lock);
+static inline void timer_base_unlock_expiry(struct timer_base *base)
+{
+ spin_unlock(&base->expiry_lock);
+}
+
+/*
+ * The counterpart to del_timer_wait_running().
+ *
+ * If there is a waiter for base->expiry_lock, then it was waiting for the
+ * timer callback to finish. Drop expiry_lock and reaquire it. That allows
+ * the waiter to acquire the lock and make progress.
+ */
+static void timer_sync_wait_running(struct timer_base *base)
+{
+ if (atomic_read(&base->timer_waiters)) {
+ raw_spin_unlock_irq(&base->lock);
spin_unlock(&base->expiry_lock);
+ spin_lock(&base->expiry_lock);
+ raw_spin_lock_irq(&base->lock);
}
}
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void del_timer_wait_running(struct timer_list *timer)
+{
+ u32 tf;
+
+ tf = READ_ONCE(timer->flags);
+ if (!(tf & TIMER_MIGRATING)) {
+ struct timer_base *base = get_timer_base(tf);
+
+ /*
+ * Mark the base as contended and grab the expiry lock,
+ * which is held by the softirq across the timer
+ * callback. Drop the lock immediately so the softirq can
+ * expire the next timer. In theory the timer could already
+ * be running again, but that's more than unlikely and just
+ * causes another wait loop.
+ */
+ atomic_inc(&base->timer_waiters);
+ spin_lock_bh(&base->expiry_lock);
+ atomic_dec(&base->timer_waiters);
+ spin_unlock_bh(&base->expiry_lock);
+ }
+}
+#else
+static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
+static inline void timer_base_lock_expiry(struct timer_base *base) { }
+static inline void timer_base_unlock_expiry(struct timer_base *base) { }
+static inline void timer_sync_wait_running(struct timer_base *base) { }
+static inline void del_timer_wait_running(struct timer_list *timer) { }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
/**
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
@@ -1319,6 +1357,8 @@
*/
int del_timer_sync(struct timer_list *timer)
{
+ int ret;
+
#ifdef CONFIG_LOCKDEP
unsigned long flags;
@@ -1337,12 +1377,23 @@
*/
WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
- return __del_timer_sync(timer);
+ do {
+ ret = try_to_del_timer_sync(timer);
+
+ if (unlikely(ret < 0)) {
+ del_timer_wait_running(timer);
+ cpu_relax();
+ }
+ } while (ret < 0);
+
+ return ret;
}
EXPORT_SYMBOL(del_timer_sync);
#endif
-static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
+static void call_timer_fn(struct timer_list *timer,
+ void (*fn)(struct timer_list *),
+ unsigned long baseclk)
{
int count = preempt_count();
@@ -1365,14 +1416,14 @@
*/
lock_map_acquire(&lockdep_map);
- trace_timer_expire_entry(timer);
+ trace_timer_expire_entry(timer, baseclk);
fn(timer);
trace_timer_expire_exit(timer);
lock_map_release(&lockdep_map);
if (count != preempt_count()) {
- WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+ WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
fn, count, preempt_count());
/*
* Restore the preempt count. That gives us a decent
@@ -1386,6 +1437,13 @@
static void expire_timers(struct timer_base *base, struct hlist_head *head)
{
+ /*
+ * This value is required only for tracing. base->clk was
+ * incremented directly before expire_timers was called. But expiry
+ * is related to the old base->clk value.
+ */
+ unsigned long baseclk = base->clk - 1;
+
while (!hlist_empty(head)) {
struct timer_list *timer;
void (*fn)(struct timer_list *);
@@ -1399,26 +1457,23 @@
if (timer->flags & TIMER_IRQSAFE) {
raw_spin_unlock(&base->lock);
- call_timer_fn(timer, fn);
- base->running_timer = NULL;
- spin_unlock(&base->expiry_lock);
- spin_lock(&base->expiry_lock);
+ call_timer_fn(timer, fn, baseclk);
raw_spin_lock(&base->lock);
+ base->running_timer = NULL;
} else {
raw_spin_unlock_irq(&base->lock);
- call_timer_fn(timer, fn);
- base->running_timer = NULL;
- spin_unlock(&base->expiry_lock);
- spin_lock(&base->expiry_lock);
+ call_timer_fn(timer, fn, baseclk);
raw_spin_lock_irq(&base->lock);
+ base->running_timer = NULL;
+ timer_sync_wait_running(base);
}
}
}
-static int __collect_expired_timers(struct timer_base *base,
- struct hlist_head *heads)
+static int collect_expired_timers(struct timer_base *base,
+ struct hlist_head *heads)
{
- unsigned long clk = base->clk;
+ unsigned long clk = base->clk = base->next_expiry;
struct hlist_head *vec;
int i, levels = 0;
unsigned int idx;
@@ -1440,7 +1495,6 @@
return levels;
}
-#ifdef CONFIG_NO_HZ_COMMON
/*
* Find the next pending bucket of a level. Search from level start (@offset)
* + @clk upwards and if nothing there, search from start of the level
@@ -1473,6 +1527,7 @@
clk = base->clk;
for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
+ unsigned long lvl_clk = clk & LVL_CLK_MASK;
if (pos >= 0) {
unsigned long tmp = clk + (unsigned long) pos;
@@ -1480,6 +1535,13 @@
tmp <<= LVL_SHIFT(lvl);
if (time_before(tmp, next))
next = tmp;
+
+ /*
+ * If the next expiration happens before we reach
+ * the next level, no need to check further.
+ */
+ if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))
+ break;
}
/*
* Clock for the next level. If the current level clock lower
@@ -1517,13 +1579,18 @@
* So the simple check whether the lower bits of the current
* level are 0 or not is sufficient for all cases.
*/
- adj = clk & LVL_CLK_MASK ? 1 : 0;
+ adj = lvl_clk ? 1 : 0;
clk >>= LVL_CLK_SHIFT;
clk += adj;
}
+
+ base->next_expiry_recalc = false;
+ base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
+
return next;
}
+#ifdef CONFIG_NO_HZ_COMMON
/*
* Check, if the next hrtimer event is before the next timer wheel
* event:
@@ -1570,7 +1637,6 @@
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
u64 expires = KTIME_MAX;
unsigned long nextevt;
- bool is_max_delta;
/*
* Pretend that there is no timer pending if the cpu is offline.
@@ -1580,9 +1646,10 @@
return expires;
raw_spin_lock(&base->lock);
- nextevt = __next_timer_interrupt(base);
- is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
- base->next_expiry = nextevt;
+ if (base->next_expiry_recalc)
+ base->next_expiry = __next_timer_interrupt(base);
+ nextevt = base->next_expiry;
+
/*
* We have a fresh next event. Check whether we can forward the
* base. We can only do that when @basej is past base->clk
@@ -1599,7 +1666,7 @@
expires = basem;
base->is_idle = false;
} else {
- if (!is_max_delta)
+ if (base->timers_pending)
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
* If we expect to sleep more than a tick, mark the base idle.
@@ -1608,10 +1675,8 @@
* logic is only maintained for the BASE_STD base, deferrable
* timers may still see large granularity skew (by design).
*/
- if ((expires - basem) > TICK_NSEC) {
- base->must_forward_clk = true;
+ if ((expires - basem) > TICK_NSEC)
base->is_idle = true;
- }
}
raw_spin_unlock(&base->lock);
@@ -1635,42 +1700,6 @@
*/
base->is_idle = false;
}
-
-static int collect_expired_timers(struct timer_base *base,
- struct hlist_head *heads)
-{
- unsigned long now = READ_ONCE(jiffies);
-
- /*
- * NOHZ optimization. After a long idle sleep we need to forward the
- * base to current jiffies. Avoid a loop by searching the bitfield for
- * the next expiring timer.
- */
- if ((long)(now - base->clk) > 2) {
- unsigned long next = __next_timer_interrupt(base);
-
- /*
- * If the next timer is ahead of time forward to current
- * jiffies, otherwise forward to the next expiry time:
- */
- if (time_after(next, now)) {
- /*
- * The call site will increment base->clk and then
- * terminate the expiry loop immediately.
- */
- base->clk = now;
- return 0;
- }
- base->clk = next;
- }
- return __collect_expired_timers(base, heads);
-}
-#else
-static inline int collect_expired_timers(struct timer_base *base,
- struct hlist_head *heads)
-{
- return __collect_expired_timers(base, heads);
-}
#endif
/*
@@ -1681,17 +1710,19 @@
{
struct task_struct *p = current;
+ PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);
+
/* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick);
run_local_timers();
- rcu_check_callbacks(user_tick);
+ rcu_sched_clock_irq(user_tick);
#ifdef CONFIG_IRQ_WORK
if (in_irq())
irq_work_tick();
#endif
scheduler_tick();
if (IS_ENABLED(CONFIG_POSIX_TIMERS))
- run_posix_cpu_timers(p);
+ run_posix_cpu_timers();
}
/**
@@ -1703,38 +1734,32 @@
struct hlist_head heads[LVL_DEPTH];
int levels;
- if (!time_after_eq(jiffies, base->clk))
+ if (time_before(jiffies, base->next_expiry))
return;
- spin_lock(&base->expiry_lock);
+ timer_base_lock_expiry(base);
raw_spin_lock_irq(&base->lock);
- /*
- * timer_base::must_forward_clk must be cleared before running
- * timers so that any timer functions that call mod_timer() will
- * not try to forward the base. Idle tracking / clock forwarding
- * logic is only used with BASE_STD timers.
- *
- * The must_forward_clk flag is cleared unconditionally also for
- * the deferrable base. The deferrable base is not affected by idle
- * tracking and never forwarded, so clearing the flag is a NOOP.
- *
- * The fact that the deferrable base is never forwarded can cause
- * large variations in granularity for deferrable timers, but they
- * can be deferred for long periods due to idle anyway.
- */
- base->must_forward_clk = false;
-
- while (time_after_eq(jiffies, base->clk)) {
-
+ while (time_after_eq(jiffies, base->clk) &&
+ time_after_eq(jiffies, base->next_expiry)) {
levels = collect_expired_timers(base, heads);
+ /*
+ * The two possible reasons for not finding any expired
+ * timer at this clk are that all matching timers have been
+ * dequeued or no timer has been queued since
+ * base::next_expiry was set to base::clk +
+ * NEXT_TIMER_MAX_DELTA.
+ */
+ WARN_ON_ONCE(!levels && !base->next_expiry_recalc
+ && base->timers_pending);
base->clk++;
+ base->next_expiry = __next_timer_interrupt(base);
while (levels--)
expire_timers(base, heads + levels);
}
raw_spin_unlock_irq(&base->lock);
- spin_unlock(&base->expiry_lock);
+ timer_base_unlock_expiry(base);
}
/*
@@ -1743,8 +1768,6 @@
static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
-
- irq_work_tick_soft();
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
@@ -1760,12 +1783,12 @@
hrtimer_run_queues();
/* Raise the softirq only if required. */
- if (time_before(jiffies, base->clk)) {
+ if (time_before(jiffies, base->next_expiry)) {
if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
return;
/* CPU is awake, so check the deferrable base. */
base++;
- if (time_before(jiffies, base->clk))
+ if (time_before(jiffies, base->next_expiry))
return;
}
raise_softirq(TIMER_SOFTIRQ);
@@ -1791,21 +1814,23 @@
* schedule_timeout - sleep until timeout
* @timeout: timeout value in jiffies
*
- * Make the current task sleep until @timeout jiffies have
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
+ * Make the current task sleep until @timeout jiffies have elapsed.
+ * The function behavior depends on the current task state
+ * (see also set_current_state() description):
*
- * You can set the task state as follows -
+ * %TASK_RUNNING - the scheduler is called, but the task does not sleep
+ * at all. That happens because sched_submit_work() does nothing for
+ * tasks in %TASK_RUNNING state.
*
* %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
* pass before the routine returns unless the current task is explicitly
- * woken up, (e.g. by wake_up_process())".
+ * woken up, (e.g. by wake_up_process()).
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
* delivered to the current task or the current task is explicitly woken
* up.
*
- * The current task state is guaranteed to be TASK_RUNNING when this
+ * The current task state is guaranteed to be %TASK_RUNNING when this
* routine returns.
*
* Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
@@ -1813,7 +1838,7 @@
* value will be %MAX_SCHEDULE_TIMEOUT.
*
* Returns 0 when the timer has expired otherwise the remaining time in
- * jiffies will be returned. In all cases the return value is guaranteed
+ * jiffies will be returned. In all cases the return value is guaranteed
* to be non-negative.
*/
signed long __sched schedule_timeout(signed long timeout)
@@ -1854,7 +1879,7 @@
timer.task = current;
timer_setup_on_stack(&timer.timer, process_timeout, 0);
- __mod_timer(&timer.timer, expire, 0);
+ __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
schedule();
del_singleshot_timer_sync(&timer.timer);
@@ -1927,8 +1952,8 @@
base = per_cpu_ptr(&timer_bases[b], cpu);
base->clk = jiffies;
base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+ base->timers_pending = false;
base->is_idle = false;
- base->must_forward_clk = true;
}
return 0;
}
@@ -1981,7 +2006,8 @@
base->cpu = cpu;
raw_spin_lock_init(&base->lock);
base->clk = jiffies;
- spin_lock_init(&base->expiry_lock);
+ base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+ timer_base_init_expiry_lock(base);
}
}
@@ -1996,6 +2022,7 @@
void __init init_timers(void)
{
init_timer_cpus();
+ posix_cputimers_init_work();
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
@@ -2029,6 +2056,32 @@
EXPORT_SYMBOL(msleep_interruptible);
/**
+ * usleep_range_state - Sleep for an approximate time in a given state
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ * @state: State of the current task that will be while sleeping
+ *
+ * In non-atomic context where the exact wakeup time is flexible, use
+ * usleep_range_state() instead of udelay(). The sleep improves responsiveness
+ * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
+ * power usage by allowing hrtimers to take advantage of an already-
+ * scheduled interrupt instead of scheduling a new one just for this sleep.
+ */
+void __sched usleep_range_state(unsigned long min, unsigned long max,
+ unsigned int state)
+{
+ ktime_t exp = ktime_add_us(ktime_get(), min);
+ u64 delta = (u64)(max - min) * NSEC_PER_USEC;
+
+ for (;;) {
+ __set_current_state(state);
+ /* Do not return before the requested sleep time has elapsed */
+ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+ break;
+ }
+}
+
+/**
* usleep_range - Sleep for an approximate time
* @min: Minimum time in usecs to sleep
* @max: Maximum time in usecs to sleep
@@ -2041,14 +2094,6 @@
*/
void __sched usleep_range(unsigned long min, unsigned long max)
{
- ktime_t exp = ktime_add_us(ktime_get(), min);
- u64 delta = (u64)(max - min) * NSEC_PER_USEC;
-
- for (;;) {
- __set_current_state(TASK_UNINTERRUPTIBLE);
- /* Do not return before the requested sleep time has elapsed */
- if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
- break;
- }
+ usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(usleep_range);
--
Gitblit v1.6.2