From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/kernel/workqueue.c | 733 ++++++++++++++++++++++++++++++++++++-------------------
1 files changed, 474 insertions(+), 259 deletions(-)
diff --git a/kernel/kernel/workqueue.c b/kernel/kernel/workqueue.c
index 4722041..0bcf043 100644
--- a/kernel/kernel/workqueue.c
+++ b/kernel/kernel/workqueue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/workqueue.c - generic async execution with shared worker pool
*
@@ -50,8 +51,13 @@
#include <linux/sched/isolation.h>
#include <linux/nmi.h>
#include <linux/kvm_para.h>
+#include <uapi/linux/sched/types.h>
#include "workqueue_internal.h"
+
+#include <trace/hooks/wqlockup.h>
+/* events/workqueue.h uses default TRACE_INCLUDE_PATH */
+#undef TRACE_INCLUDE_PATH
enum {
/*
@@ -128,16 +134,16 @@
*
* PL: wq_pool_mutex protected.
*
- * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
+ * PR: wq_pool_mutex protected for writes. RCU protected for reads.
*
* PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
*
* PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
- * sched-RCU for reads.
+ * RCU for reads.
*
* WQ: wq->mutex protected.
*
- * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
+ * WR: wq->mutex protected for writes. RCU protected for reads.
*
* MD: wq_mayday_lock protected.
*/
@@ -145,7 +151,7 @@
/* struct worker is defined in workqueue_internal.h */
struct worker_pool {
- spinlock_t lock; /* the pool lock */
+ raw_spinlock_t lock; /* the pool lock */
int cpu; /* I: the associated cpu */
int node; /* I: the associated node ID */
int id; /* I: pool ID */
@@ -184,7 +190,7 @@
atomic_t nr_running ____cacheline_aligned_in_smp;
/*
- * Destruction of pool is sched-RCU protected to allow dereferences
+ * Destruction of pool is RCU protected to allow dereferences
* from get_work_pool().
*/
struct rcu_head rcu;
@@ -213,7 +219,7 @@
/*
* Release of unbound pwq is punted to system_wq. See put_pwq()
* and pwq_unbound_release_workfn() for details. pool_workqueue
- * itself is also sched-RCU protected so that the first pwq can be
+ * itself is also RCU protected so that the first pwq can be
* determined without grabbing wq->mutex.
*/
struct work_struct unbound_release_work;
@@ -248,7 +254,7 @@
struct list_head flusher_overflow; /* WQ: flush overflow list */
struct list_head maydays; /* MD: pwqs requesting rescue */
- struct worker *rescuer; /* I: rescue worker */
+ struct worker *rescuer; /* MD: rescue worker */
int nr_drainers; /* WQ: drain in progress */
int saved_max_active; /* WQ: saved pwq max_active */
@@ -260,13 +266,15 @@
struct wq_device *wq_dev; /* I: for sysfs interface */
#endif
#ifdef CONFIG_LOCKDEP
+ char *lock_name;
+ struct lock_class_key key;
struct lockdep_map lockdep_map;
#endif
char name[WQ_NAME_LEN]; /* I: workqueue name */
/*
- * Destruction of workqueue_struct is sched-RCU protected to allow
- * walking the workqueues list without grabbing wq_pool_mutex.
+ * Destruction of workqueue_struct is RCU protected to allow walking
+ * the workqueues list without grabbing wq_pool_mutex.
* This is used to dump all workqueues from sysrq.
*/
struct rcu_head rcu;
@@ -298,8 +306,9 @@
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
-static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
-static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
+static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
+/* wait for manager to go away */
+static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -353,25 +362,24 @@
static int worker_thread(void *__worker);
static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
+static void show_pwq(struct pool_workqueue *pwq);
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
-#define assert_rcu_or_pool_mutex() \
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
- !lockdep_is_held(&wq_pool_mutex), \
- "sched RCU or wq_pool_mutex should be held")
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_start);
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_end);
-#define assert_rcu_or_wq_mutex(wq) \
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
- !lockdep_is_held(&wq->mutex), \
- "sched RCU or wq->mutex should be held")
+#define assert_rcu_or_pool_mutex() \
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
+ !lockdep_is_held(&wq_pool_mutex), \
+ "RCU or wq_pool_mutex should be held")
#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
!lockdep_is_held(&wq->mutex) && \
!lockdep_is_held(&wq_pool_mutex), \
- "sched RCU, wq->mutex or wq_pool_mutex should be held")
+ "RCU, wq->mutex or wq_pool_mutex should be held")
#define for_each_cpu_worker_pool(pool, cpu) \
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
@@ -383,7 +391,7 @@
* @pool: iteration cursor
* @pi: integer used for iteration
*
- * This must be called either with wq_pool_mutex held or sched RCU read
+ * This must be called either with wq_pool_mutex held or RCU read
* locked. If the pool needs to be used beyond the locking in effect, the
* caller is responsible for guaranteeing that the pool stays online.
*
@@ -415,7 +423,7 @@
* @pwq: iteration cursor
* @wq: the target workqueue
*
- * This must be called either with wq->mutex held or sched RCU read locked.
+ * This must be called either with wq->mutex held or RCU read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
*
@@ -423,13 +431,12 @@
* ignored.
*/
#define for_each_pwq(pwq, wq) \
- list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
- if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
- else
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
+ lockdep_is_held(&(wq->mutex)))
#ifdef CONFIG_DEBUG_OBJECTS_WORK
-static struct debug_obj_descr work_debug_descr;
+static const struct debug_obj_descr work_debug_descr;
static void *work_debug_hint(void *addr)
{
@@ -479,7 +486,7 @@
}
}
-static struct debug_obj_descr work_debug_descr = {
+static const struct debug_obj_descr work_debug_descr = {
.name = "work_struct",
.debug_hint = work_debug_hint,
.is_static_object = work_is_static_object,
@@ -551,7 +558,7 @@
* @wq: the target workqueue
* @node: the node ID
*
- * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
+ * This must be called with any of wq_pool_mutex, wq->mutex or RCU
* read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
@@ -647,7 +654,7 @@
* The following mb guarantees that previous clear of a PENDING bit
* will not be reordered with any speculative LOADS or STORES from
* work->current_func, which is executed afterwards. This possible
- * reordering can lead to a missed execution on attempt to qeueue
+ * reordering can lead to a missed execution on attempt to queue
* the same @work. E.g. consider this case:
*
* CPU#0 CPU#1
@@ -680,12 +687,17 @@
set_work_data(work, WORK_STRUCT_NO_POOL, 0);
}
+static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
+{
+ return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
+}
+
static struct pool_workqueue *get_work_pwq(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
- return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
+ return work_struct_pwq(data);
else
return NULL;
}
@@ -695,8 +707,8 @@
* @work: the work item of interest
*
* Pools are created and destroyed under wq_pool_mutex, and allows read
- * access under sched-RCU read lock. As such, this function should be
- * called under wq_pool_mutex or with preemption disabled.
+ * access under RCU read lock. As such, this function should be
+ * called under wq_pool_mutex or inside of a rcu_read_lock() region.
*
* All fields of the returned pool are accessible as long as the above
* mentioned locking is in effect. If the returned pool needs to be used
@@ -713,8 +725,7 @@
assert_rcu_or_pool_mutex();
if (data & WORK_STRUCT_PWQ)
- return ((struct pool_workqueue *)
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
+ return work_struct_pwq(data)->pool;
pool_id = data >> WORK_OFFQ_POOL_SHIFT;
if (pool_id == WORK_OFFQ_POOL_NONE)
@@ -735,8 +746,7 @@
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
- return ((struct pool_workqueue *)
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
+ return work_struct_pwq(data)->pool->id;
return data >> WORK_OFFQ_POOL_SHIFT;
}
@@ -829,7 +839,7 @@
* Wake up the first idle worker of @pool.
*
* CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void wake_up_worker(struct worker_pool *pool)
{
@@ -840,43 +850,42 @@
}
/**
- * wq_worker_waking_up - a worker is waking up
+ * wq_worker_running - a worker is running again
* @task: task waking up
- * @cpu: CPU @task is waking up to
*
- * This function is called during try_to_wake_up() when a worker is
- * being awoken.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
+ * This function is called when a worker returns from schedule()
*/
-void wq_worker_waking_up(struct task_struct *task, int cpu)
+void wq_worker_running(struct task_struct *task)
{
struct worker *worker = kthread_data(task);
- if (!(worker->flags & WORKER_NOT_RUNNING)) {
- WARN_ON_ONCE(worker->pool->cpu != cpu);
+ if (!worker->sleeping)
+ return;
+
+ /*
+ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
+ * and the nr_running increment below, we may ruin the nr_running reset
+ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
+ * pool. Protect against such race.
+ */
+ preempt_disable();
+ if (!(worker->flags & WORKER_NOT_RUNNING))
atomic_inc(&worker->pool->nr_running);
- }
+ preempt_enable();
+ worker->sleeping = 0;
}
/**
* wq_worker_sleeping - a worker is going to sleep
* @task: task going to sleep
*
- * This function is called during schedule() when a busy worker is
- * going to sleep. Worker on the same cpu can be woken up by
- * returning pointer to its task.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
- *
- * Return:
- * Worker task on @cpu to wake up, %NULL if none.
+ * This function is called from schedule() when a busy worker is
+ * going to sleep. Preemption needs to be disabled to protect ->sleeping
+ * assignment.
*/
-struct task_struct *wq_worker_sleeping(struct task_struct *task)
+void wq_worker_sleeping(struct task_struct *task)
{
- struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+ struct worker *next, *worker = kthread_data(task);
struct worker_pool *pool;
/*
@@ -885,13 +894,16 @@
* checking NOT_RUNNING.
*/
if (worker->flags & WORKER_NOT_RUNNING)
- return NULL;
+ return;
pool = worker->pool;
- /* this can only happen on the local cpu */
- if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
- return NULL;
+ /* Return if preempted before wq_worker_running() was reached */
+ if (worker->sleeping)
+ return;
+
+ worker->sleeping = 1;
+ raw_spin_lock_irq(&pool->lock);
/*
* The counterpart of the following dec_and_test, implied mb,
@@ -905,19 +917,33 @@
* lock is safe.
*/
if (atomic_dec_and_test(&pool->nr_running) &&
- !list_empty(&pool->worklist))
- to_wakeup = first_idle_worker(pool);
- return to_wakeup ? to_wakeup->task : NULL;
+ !list_empty(&pool->worklist)) {
+ next = first_idle_worker(pool);
+ if (next)
+ wake_up_process(next->task);
+ }
+ raw_spin_unlock_irq(&pool->lock);
}
/**
* wq_worker_last_func - retrieve worker's last work function
+ * @task: Task to retrieve last work function of.
*
* Determine the last function a worker executed. This is called from
* the scheduler to get a worker's last known identity.
*
* CONTEXT:
- * spin_lock_irq(rq->lock)
+ * raw_spin_lock_irq(rq->lock)
+ *
+ * This function is called during schedule() when a kworker is going
+ * to sleep. It's used by psi to identify aggregation workers during
+ * dequeuing, to allow periodic aggregation to shut-off when that
+ * worker is the last task in the system or cgroup to go to sleep.
+ *
+ * As this function doesn't involve any workqueue-related locking, it
+ * only returns stable values when called from inside the scheduler's
+ * queuing and dequeuing paths, when @task, which must be a kworker,
+ * is guaranteed to not be processing any works.
*
* Return:
* The last work function %current executed as a worker, NULL if it
@@ -938,7 +964,7 @@
* Set @flags in @worker->flags and adjust nr_running accordingly.
*
* CONTEXT:
- * spin_lock_irq(pool->lock)
+ * raw_spin_lock_irq(pool->lock)
*/
static inline void worker_set_flags(struct worker *worker, unsigned int flags)
{
@@ -963,7 +989,7 @@
* Clear @flags in @worker->flags and adjust nr_running accordingly.
*
* CONTEXT:
- * spin_lock_irq(pool->lock)
+ * raw_spin_lock_irq(pool->lock)
*/
static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
{
@@ -1011,7 +1037,7 @@
* actually occurs, it should be easy to locate the culprit work function.
*
* CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*
* Return:
* Pointer to worker which is executing @work if found, %NULL
@@ -1046,7 +1072,7 @@
* nested inside outer list_for_each_entry_safe().
*
* CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void move_linked_works(struct work_struct *work, struct list_head *head,
struct work_struct **nextp)
@@ -1121,12 +1147,12 @@
{
if (pwq) {
/*
- * As both pwqs and pools are sched-RCU protected, the
+ * As both pwqs and pools are RCU protected, the
* following lock operations are safe.
*/
- spin_lock_irq(&pwq->pool->lock);
+ raw_spin_lock_irq(&pwq->pool->lock);
put_pwq(pwq);
- spin_unlock_irq(&pwq->pool->lock);
+ raw_spin_unlock_irq(&pwq->pool->lock);
}
}
@@ -1159,7 +1185,7 @@
* decrement nr_in_flight of its pwq and handle workqueue flushing.
*
* CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
{
@@ -1207,11 +1233,14 @@
* stable state - idle, on timer or on worklist.
*
* Return:
+ *
+ * ======== ================================================================
* 1 if @work was pending and we successfully stole PENDING
* 0 if @work was idle and we claimed PENDING
* -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
* -ENOENT if someone else is canceling @work, this state may persist
* for arbitrarily long
+ * ======== ================================================================
*
* Note:
* On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
@@ -1249,6 +1278,7 @@
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
return 0;
+ rcu_read_lock();
/*
* The queueing is in progress, or it is already queued. Try to
* steal it from ->worklist without clearing WORK_STRUCT_PENDING.
@@ -1257,7 +1287,7 @@
if (!pool)
goto fail;
- spin_lock(&pool->lock);
+ raw_spin_lock(&pool->lock);
/*
* work->data is guaranteed to point to pwq only while the work
* item is queued on pwq->wq, and both updating work->data to point
@@ -1286,11 +1316,13 @@
/* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
- spin_unlock(&pool->lock);
+ raw_spin_unlock(&pool->lock);
+ rcu_read_unlock();
return 1;
}
- spin_unlock(&pool->lock);
+ raw_spin_unlock(&pool->lock);
fail:
+ rcu_read_unlock();
local_irq_restore(*flags);
if (work_is_canceling(work))
return -ENOENT;
@@ -1309,12 +1341,15 @@
* work_struct flags.
*
* CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
struct list_head *head, unsigned int extra_flags)
{
struct worker_pool *pool = pwq->pool;
+
+ /* record the work call stack in order to print it in KASAN reports */
+ kasan_record_aux_stack(work);
/* we own @work, set data and link */
set_work_pwq(work, pwq, extra_flags);
@@ -1342,7 +1377,7 @@
worker = current_wq_worker();
/*
- * Return %true iff I'm a worker execuing a work item on @wq. If
+ * Return %true iff I'm a worker executing a work item on @wq. If
* I'm @worker, it's safe to dereference it without locking.
*/
return worker && worker->current_pwq->wq == wq;
@@ -1403,6 +1438,7 @@
if (unlikely(wq->flags & __WQ_DRAINING) &&
WARN_ON_ONCE(!is_chained_work(wq)))
return;
+ rcu_read_lock();
retry:
/* pwq which will be used unless @work is executing elsewhere */
if (wq->flags & WQ_UNBOUND) {
@@ -1424,7 +1460,7 @@
if (last_pool && last_pool != pwq->pool) {
struct worker *worker;
- spin_lock(&last_pool->lock);
+ raw_spin_lock(&last_pool->lock);
worker = find_worker_executing_work(last_pool, work);
@@ -1432,11 +1468,11 @@
pwq = worker->current_pwq;
} else {
/* meh... not running there, queue here */
- spin_unlock(&last_pool->lock);
- spin_lock(&pwq->pool->lock);
+ raw_spin_unlock(&last_pool->lock);
+ raw_spin_lock(&pwq->pool->lock);
}
} else {
- spin_lock(&pwq->pool->lock);
+ raw_spin_lock(&pwq->pool->lock);
}
/*
@@ -1449,7 +1485,7 @@
*/
if (unlikely(!pwq->refcnt)) {
if (wq->flags & WQ_UNBOUND) {
- spin_unlock(&pwq->pool->lock);
+ raw_spin_unlock(&pwq->pool->lock);
cpu_relax();
goto retry;
}
@@ -1461,10 +1497,8 @@
/* pwq determined, queue */
trace_workqueue_queue_work(req_cpu, pwq, work);
- if (WARN_ON(!list_empty(&work->entry))) {
- spin_unlock(&pwq->pool->lock);
- return;
- }
+ if (WARN_ON(!list_empty(&work->entry)))
+ goto out;
pwq->nr_in_flight[pwq->work_color]++;
work_flags = work_color_to_flags(pwq->work_color);
@@ -1483,7 +1517,9 @@
debug_work_activate(work);
insert_work(pwq, work, worklist, work_flags);
- spin_unlock(&pwq->pool->lock);
+out:
+ raw_spin_unlock(&pwq->pool->lock);
+ rcu_read_unlock();
}
/**
@@ -1515,6 +1551,90 @@
}
EXPORT_SYMBOL(queue_work_on);
+/**
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
+ * @node: NUMA node ID that we want to select a CPU from
+ *
+ * This function will attempt to find a "random" cpu available on a given
+ * node. If there are no CPUs available on the given node it will return
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
+ * available CPU if we need to schedule this work.
+ */
+static int workqueue_select_cpu_near(int node)
+{
+ int cpu;
+
+ /* No point in doing this if NUMA isn't enabled for workqueues */
+ if (!wq_numa_enabled)
+ return WORK_CPU_UNBOUND;
+
+ /* Delay binding to CPU if node is not valid or online */
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
+ return WORK_CPU_UNBOUND;
+
+ /* Use local node/cpu if we are already there */
+ cpu = raw_smp_processor_id();
+ if (node == cpu_to_node(cpu))
+ return cpu;
+
+ /* Use "random" otherwise know as "first" online CPU of node */
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
+
+ /* If CPU is valid return that, otherwise just defer */
+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
+}
+
+/**
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
+ * @node: NUMA node that we are targeting the work for
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
+ * idea here is to provide a way to somehow associate work with a given
+ * NUMA node.
+ *
+ * This function will only make a best effort attempt at getting this onto
+ * the right NUMA node. If no node is requested or the requested node is
+ * offline then we just fall back to standard queue_work behavior.
+ *
+ * Currently the "random" CPU ends up being the first available CPU in the
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
+ * are running on the node. In that case we just use the current CPU.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
+ */
+bool queue_work_node(int node, struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ unsigned long flags;
+ bool ret = false;
+
+ /*
+ * This current implementation is specific to unbound workqueues.
+ * Specifically we only return the first available CPU for a given
+ * node instead of cycling through individual CPUs within the node.
+ *
+ * If this is used with a per-cpu workqueue then the logic in
+ * workqueue_select_cpu_near would need to be updated to allow for
+ * some round robin type logic.
+ */
+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
+
+ local_irq_save(flags);
+
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ int cpu = workqueue_select_cpu_near(node);
+
+ __queue_work(cpu, wq, work);
+ ret = true;
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_node);
+
void delayed_work_timer_fn(struct timer_list *t)
{
struct delayed_work *dwork = from_timer(dwork, t, timer);
@@ -1531,9 +1651,14 @@
struct work_struct *work = &dwork->work;
WARN_ON_ONCE(!wq);
-#ifndef CONFIG_CFI_CLANG
- WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
-#endif
+ /*
+ * With CFI, timer->function can point to a jump table entry in a module,
+ * which fails the comparison. Disable the warning if CFI and modules are
+ * both enabled.
+ */
+ if (!IS_ENABLED(CONFIG_CFI_CLANG) || !IS_ENABLED(CONFIG_MODULES))
+ WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
+
WARN_ON_ONCE(timer_pending(timer));
WARN_ON_ONCE(!list_empty(&work->entry));
@@ -1644,7 +1769,7 @@
*
* Return: %false if @rwork was already pending, %true otherwise. Note
* that a full RCU grace period is guaranteed only after a %true return.
- * While @rwork is guarnateed to be executed after a %false return, the
+ * While @rwork is guaranteed to be executed after a %false return, the
* execution may happen before a full RCU grace period has passed.
*/
bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
@@ -1669,7 +1794,7 @@
* necessary.
*
* LOCKING:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void worker_enter_idle(struct worker *worker)
{
@@ -1709,7 +1834,7 @@
* @worker is leaving idle state. Update stats.
*
* LOCKING:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void worker_leave_idle(struct worker *worker)
{
@@ -1838,17 +1963,26 @@
goto fail;
set_user_nice(worker->task, pool->attrs->nice);
+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO)) {
+ struct sched_param param;
+
+ if (pool->attrs->nice == 0)
+ param.sched_priority = MAX_RT_PRIO / 2 - 4;
+ else
+ param.sched_priority = MAX_RT_PRIO / 2 - 2;
+ sched_setscheduler_nocheck(worker->task, SCHED_RR, ¶m);
+ }
kthread_bind_mask(worker->task, pool->attrs->cpumask);
/* successful, attach the worker to the pool */
worker_attach_to_pool(worker, pool);
/* start the newly created worker */
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
worker->pool->nr_workers++;
worker_enter_idle(worker);
wake_up_process(worker->task);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
return worker;
@@ -1867,7 +2001,7 @@
* be idle.
*
* CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void destroy_worker(struct worker *worker)
{
@@ -1893,7 +2027,7 @@
{
struct worker_pool *pool = from_timer(pool, t, idle_timer);
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
while (too_many_workers(pool)) {
struct worker *worker;
@@ -1911,7 +2045,7 @@
destroy_worker(worker);
}
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
}
static void send_mayday(struct work_struct *work)
@@ -1942,8 +2076,8 @@
struct worker_pool *pool = from_timer(pool, t, mayday_timer);
struct work_struct *work;
- spin_lock_irq(&pool->lock);
- spin_lock(&wq_mayday_lock); /* for wq->maydays */
+ raw_spin_lock_irq(&pool->lock);
+ raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */
if (need_to_create_worker(pool)) {
/*
@@ -1956,8 +2090,8 @@
send_mayday(work);
}
- spin_unlock(&wq_mayday_lock);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock(&wq_mayday_lock);
+ raw_spin_unlock_irq(&pool->lock);
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
}
@@ -1976,7 +2110,7 @@
* may_start_working() %true.
*
* LOCKING:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Does GFP_KERNEL allocations. Called only from
* manager.
*/
@@ -1985,7 +2119,7 @@
__acquires(&pool->lock)
{
restart:
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
@@ -2001,7 +2135,7 @@
}
del_timer_sync(&pool->mayday_timer);
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
/*
* This is necessary even after a new worker was just successfully
* created as @pool->lock was dropped and the new worker might have
@@ -2024,7 +2158,7 @@
* and may_start_working() is true.
*
* CONTEXT:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Does GFP_KERNEL allocations.
*
* Return:
@@ -2047,7 +2181,7 @@
pool->manager = NULL;
pool->flags &= ~POOL_MANAGER_ACTIVE;
- wake_up(&wq_manager_wait);
+ rcuwait_wake_up(&manager_wait);
return true;
}
@@ -2063,7 +2197,7 @@
* call this function to process a work.
*
* CONTEXT:
- * spin_lock_irq(pool->lock) which is released and regrabbed.
+ * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
*/
static void process_one_work(struct worker *worker, struct work_struct *work)
__releases(&pool->lock)
@@ -2145,7 +2279,7 @@
*/
set_work_pool_and_clear_pending(work, pool->id);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
lock_map_acquire(&pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
@@ -2177,13 +2311,13 @@
* While we must be careful to not use "work" after this, the trace
* point will only record its address.
*/
- trace_workqueue_execute_end(work);
+ trace_workqueue_execute_end(work, worker->current_func);
lock_map_release(&lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);
if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
- " last function: %pf\n",
+ " last function: %ps\n",
current->comm, preempt_count(), task_pid_nr(current),
worker->current_func);
debug_show_held_locks(current);
@@ -2191,7 +2325,7 @@
}
/*
- * The following prevents a kworker from hogging CPU on !PREEMPT
+ * The following prevents a kworker from hogging CPU on !PREEMPTION
* kernels, where a requeueing work item waiting for something to
* happen could deadlock with stop_machine as such work item could
* indefinitely requeue itself while all other CPUs are trapped in
@@ -2200,7 +2334,7 @@
*/
cond_resched();
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
/* clear cpu intensive status */
if (unlikely(cpu_intensive))
@@ -2226,7 +2360,7 @@
* fetches a work from the top and executes it.
*
* CONTEXT:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times.
*/
static void process_scheduled_works(struct worker *worker)
@@ -2268,11 +2402,11 @@
/* tell the scheduler that this is a workqueue worker */
set_pf_worker(true);
woke_up:
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
/* am I supposed to die? */
if (unlikely(worker->flags & WORKER_DIE)) {
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
WARN_ON_ONCE(!list_empty(&worker->entry));
set_pf_worker(false);
@@ -2338,7 +2472,7 @@
*/
worker_enter_idle(worker);
__set_current_state(TASK_IDLE);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
schedule();
goto woke_up;
}
@@ -2392,7 +2526,7 @@
should_stop = kthread_should_stop();
/* see whether any pwq is asking for help */
- spin_lock_irq(&wq_mayday_lock);
+ raw_spin_lock_irq(&wq_mayday_lock);
while (!list_empty(&wq->maydays)) {
struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
@@ -2404,11 +2538,11 @@
__set_current_state(TASK_RUNNING);
list_del_init(&pwq->mayday_node);
- spin_unlock_irq(&wq_mayday_lock);
+ raw_spin_unlock_irq(&wq_mayday_lock);
worker_attach_to_pool(rescuer, pool);
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
/*
* Slurp in all works issued via this workqueue and
@@ -2436,8 +2570,8 @@
* being used to relieve memory pressure, don't
* incur MAYDAY_INTERVAL delay inbetween.
*/
- if (need_to_create_worker(pool)) {
- spin_lock(&wq_mayday_lock);
+ if (pwq->nr_active && need_to_create_worker(pool)) {
+ raw_spin_lock(&wq_mayday_lock);
/*
* Queue iff we aren't racing destruction
* and somebody else hasn't queued it already.
@@ -2446,7 +2580,7 @@
get_pwq(pwq);
list_add_tail(&pwq->mayday_node, &wq->maydays);
}
- spin_unlock(&wq_mayday_lock);
+ raw_spin_unlock(&wq_mayday_lock);
}
}
@@ -2464,14 +2598,14 @@
if (need_more_worker(pool))
wake_up_worker(pool);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
worker_detach_from_pool(rescuer);
- spin_lock_irq(&wq_mayday_lock);
+ raw_spin_lock_irq(&wq_mayday_lock);
}
- spin_unlock_irq(&wq_mayday_lock);
+ raw_spin_unlock_irq(&wq_mayday_lock);
if (should_stop) {
__set_current_state(TASK_RUNNING);
@@ -2508,11 +2642,11 @@
worker = current_wq_worker();
WARN_ONCE(current->flags & PF_MEMALLOC,
- "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
current->pid, current->comm, target_wq->name, target_func);
WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
(WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
- "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
+ "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
worker->current_pwq->wq->name, worker->current_func,
target_wq->name, target_func);
}
@@ -2551,7 +2685,7 @@
* underneath us, so we can't reliably determine pwq from @target.
*
* CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
*/
static void insert_wq_barrier(struct pool_workqueue *pwq,
struct wq_barrier *barr,
@@ -2638,7 +2772,7 @@
for_each_pwq(pwq, wq) {
struct worker_pool *pool = pwq->pool;
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
if (flush_color >= 0) {
WARN_ON_ONCE(pwq->flush_color != -1);
@@ -2655,7 +2789,7 @@
pwq->work_color = work_color;
}
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
}
if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
@@ -2743,7 +2877,7 @@
* First flushers are responsible for cascading flushes and
* handling overflow. Non-first flushers can simply return.
*/
- if (wq->first_flusher != &this_flusher)
+ if (READ_ONCE(wq->first_flusher) != &this_flusher)
return;
mutex_lock(&wq->mutex);
@@ -2752,7 +2886,7 @@
if (wq->first_flusher != &this_flusher)
goto out_unlock;
- wq->first_flusher = NULL;
+ WRITE_ONCE(wq->first_flusher, NULL);
WARN_ON_ONCE(!list_empty(&this_flusher.list));
WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
@@ -2855,9 +2989,9 @@
for_each_pwq(pwq, wq) {
bool drained;
- spin_lock_irq(&pwq->pool->lock);
+ raw_spin_lock_irq(&pwq->pool->lock);
drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
- spin_unlock_irq(&pwq->pool->lock);
+ raw_spin_unlock_irq(&pwq->pool->lock);
if (drained)
continue;
@@ -2886,14 +3020,14 @@
might_sleep();
- local_irq_disable();
+ rcu_read_lock();
pool = get_work_pool(work);
if (!pool) {
- local_irq_enable();
+ rcu_read_unlock();
return false;
}
- spin_lock(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
/* see the comment in try_to_grab_pending() with the same code */
pwq = get_work_pwq(work);
if (pwq) {
@@ -2909,7 +3043,7 @@
check_flush_dependency(pwq->wq, work);
insert_wq_barrier(pwq, barr, work, worker);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
/*
* Force a lock recursion deadlock when using flush_work() inside a
@@ -2925,10 +3059,11 @@
lock_map_acquire(&pwq->wq->lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);
}
-
+ rcu_read_unlock();
return true;
already_gone:
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
+ rcu_read_unlock();
return false;
}
@@ -2942,10 +3077,8 @@
if (WARN_ON(!work->func))
return false;
- if (!from_cancel) {
- lock_map_acquire(&work->lockdep_map);
- lock_map_release(&work->lockdep_map);
- }
+ lock_map_acquire(&work->lockdep_map);
+ lock_map_release(&work->lockdep_map);
if (start_flush_work(work, &barr, from_cancel)) {
wait_for_completion(&barr.done);
@@ -3250,21 +3383,20 @@
/**
* alloc_workqueue_attrs - allocate a workqueue_attrs
- * @gfp_mask: allocation mask to use
*
* Allocate a new workqueue_attrs, initialize with default settings and
* return it.
*
* Return: The allocated new workqueue_attr on success. %NULL on failure.
*/
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+struct workqueue_attrs *alloc_workqueue_attrs(void)
{
struct workqueue_attrs *attrs;
- attrs = kzalloc(sizeof(*attrs), gfp_mask);
+ attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
if (!attrs)
goto fail;
- if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+ if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
goto fail;
cpumask_copy(attrs->cpumask, cpu_possible_mask);
@@ -3321,7 +3453,7 @@
*/
static int init_worker_pool(struct worker_pool *pool)
{
- spin_lock_init(&pool->lock);
+ raw_spin_lock_init(&pool->lock);
pool->id = -1;
pool->cpu = -1;
pool->node = NUMA_NO_NODE;
@@ -3342,23 +3474,62 @@
pool->refcnt = 1;
/* shouldn't fail above this point */
- pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ pool->attrs = alloc_workqueue_attrs();
if (!pool->attrs)
return -ENOMEM;
return 0;
}
+
+#ifdef CONFIG_LOCKDEP
+static void wq_init_lockdep(struct workqueue_struct *wq)
+{
+ char *lock_name;
+
+ lockdep_register_key(&wq->key);
+ lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
+ if (!lock_name)
+ lock_name = wq->name;
+
+ wq->lock_name = lock_name;
+ lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
+}
+
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
+{
+ lockdep_unregister_key(&wq->key);
+}
+
+static void wq_free_lockdep(struct workqueue_struct *wq)
+{
+ if (wq->lock_name != wq->name)
+ kfree(wq->lock_name);
+}
+#else
+static void wq_init_lockdep(struct workqueue_struct *wq)
+{
+}
+
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
+{
+}
+
+static void wq_free_lockdep(struct workqueue_struct *wq)
+{
+}
+#endif
static void rcu_free_wq(struct rcu_head *rcu)
{
struct workqueue_struct *wq =
container_of(rcu, struct workqueue_struct, rcu);
+ wq_free_lockdep(wq);
+
if (!(wq->flags & WQ_UNBOUND))
free_percpu(wq->cpu_pwqs);
else
free_workqueue_attrs(wq->unbound_attrs);
- kfree(wq->rescuer);
kfree(wq);
}
@@ -3371,11 +3542,23 @@
kfree(pool);
}
+/* This returns with the lock held on success (pool manager is inactive). */
+static bool wq_manager_inactive(struct worker_pool *pool)
+{
+ raw_spin_lock_irq(&pool->lock);
+
+ if (pool->flags & POOL_MANAGER_ACTIVE) {
+ raw_spin_unlock_irq(&pool->lock);
+ return false;
+ }
+ return true;
+}
+
/**
* put_unbound_pool - put a worker_pool
* @pool: worker_pool to put
*
- * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
+ * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
* safe manner. get_unbound_pool() calls this function on its failure path
* and this function should be able to release pools which went through,
* successfully or not, init_worker_pool().
@@ -3406,16 +3589,17 @@
* Become the manager and destroy all workers. This prevents
* @pool's workers from blocking on attach_mutex. We're the last
* manager and @pool gets freed with the flag set.
+ * Because of how wq_manager_inactive() works, we will hold the
+ * spinlock after a successful wait.
*/
- spin_lock_irq(&pool->lock);
- wait_event_lock_irq(wq_manager_wait,
- !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
+ rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
+ TASK_UNINTERRUPTIBLE);
pool->flags |= POOL_MANAGER_ACTIVE;
while ((worker = first_idle_worker(pool)))
destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
mutex_lock(&wq_pool_attach_mutex);
if (!list_empty(&pool->workers))
@@ -3429,8 +3613,8 @@
del_timer_sync(&pool->idle_timer);
del_timer_sync(&pool->mayday_timer);
- /* sched-RCU protected to allow dereferences from get_work_pool() */
- call_rcu_sched(&pool->rcu, rcu_free_pool);
+ /* RCU protected to allow dereferences from get_work_pool() */
+ call_rcu(&pool->rcu, rcu_free_pool);
}
/**
@@ -3543,14 +3727,16 @@
put_unbound_pool(pool);
mutex_unlock(&wq_pool_mutex);
- call_rcu_sched(&pwq->rcu, rcu_free_pwq);
+ call_rcu(&pwq->rcu, rcu_free_pwq);
/*
* If we're the last pwq going away, @wq is already dead and no one
* is gonna access it anymore. Schedule RCU free.
*/
- if (is_last)
- call_rcu_sched(&wq->rcu, rcu_free_wq);
+ if (is_last) {
+ wq_unregister_lockdep(wq);
+ call_rcu(&wq->rcu, rcu_free_wq);
+ }
}
/**
@@ -3575,7 +3761,7 @@
return;
/* this function can be called during early boot w/ irq disabled */
- spin_lock_irqsave(&pwq->pool->lock, flags);
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
/*
* During [un]freezing, the caller is responsible for ensuring that
@@ -3605,7 +3791,7 @@
pwq->max_active = 0;
}
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
}
/* initialize newly alloced @pwq which is associated with @wq and @pool */
@@ -3778,8 +3964,8 @@
ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
- new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
- tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ new_attrs = alloc_workqueue_attrs();
+ tmp_attrs = alloc_workqueue_attrs();
if (!ctx || !new_attrs || !tmp_attrs)
goto out_free;
@@ -3913,6 +4099,8 @@
*
* Performs GFP_KERNEL allocations.
*
+ * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
+ *
* Return: 0 on success and -errno on failure.
*/
int apply_workqueue_attrs(struct workqueue_struct *wq,
@@ -3920,13 +4108,14 @@
{
int ret;
- apply_wqattrs_lock();
+ lockdep_assert_cpus_held();
+
+ mutex_lock(&wq_pool_mutex);
ret = apply_workqueue_attrs_locked(wq, attrs);
- apply_wqattrs_unlock();
+ mutex_unlock(&wq_pool_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
/**
* wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
@@ -4004,9 +4193,9 @@
use_dfl_pwq:
mutex_lock(&wq->mutex);
- spin_lock_irq(&wq->dfl_pwq->pool->lock);
+ raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
get_pwq(wq->dfl_pwq);
- spin_unlock_irq(&wq->dfl_pwq->pool->lock);
+ raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
out_unlock:
mutex_unlock(&wq->mutex);
@@ -4036,16 +4225,21 @@
mutex_unlock(&wq->mutex);
}
return 0;
- } else if (wq->flags & __WQ_ORDERED) {
+ }
+
+ get_online_cpus();
+ if (wq->flags & __WQ_ORDERED) {
ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
/* there should only be single pwq for ordering guarantee */
WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
"ordering guarantee broken for workqueue %s\n", wq->name);
- return ret;
} else {
- return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
+ ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
}
+ put_online_cpus();
+
+ return ret;
}
static int wq_clamp_max_active(int max_active, unsigned int flags,
@@ -4078,8 +4272,8 @@
rescuer->rescue_wq = wq;
rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
- ret = PTR_ERR_OR_ZERO(rescuer->task);
- if (ret) {
+ if (IS_ERR(rescuer->task)) {
+ ret = PTR_ERR(rescuer->task);
kfree(rescuer);
return ret;
}
@@ -4091,11 +4285,10 @@
return 0;
}
-struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
- unsigned int flags,
- int max_active,
- struct lock_class_key *key,
- const char *lock_name, ...)
+__printf(1, 4)
+struct workqueue_struct *alloc_workqueue(const char *fmt,
+ unsigned int flags,
+ int max_active, ...)
{
size_t tbl_size = 0;
va_list args;
@@ -4125,12 +4318,12 @@
return NULL;
if (flags & WQ_UNBOUND) {
- wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ wq->unbound_attrs = alloc_workqueue_attrs();
if (!wq->unbound_attrs)
goto err_free_wq;
}
- va_start(args, lock_name);
+ va_start(args, max_active);
vsnprintf(wq->name, sizeof(wq->name), fmt, args);
va_end(args);
@@ -4147,11 +4340,11 @@
INIT_LIST_HEAD(&wq->flusher_overflow);
INIT_LIST_HEAD(&wq->maydays);
- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
+ wq_init_lockdep(wq);
INIT_LIST_HEAD(&wq->list);
if (alloc_and_link_pwqs(wq) < 0)
- goto err_free_wq;
+ goto err_unreg_lockdep;
if (wq_online && init_rescuer(wq) < 0)
goto err_destroy;
@@ -4177,6 +4370,9 @@
return wq;
+err_unreg_lockdep:
+ wq_unregister_lockdep(wq);
+ wq_free_lockdep(wq);
err_free_wq:
free_workqueue_attrs(wq->unbound_attrs);
kfree(wq);
@@ -4185,7 +4381,23 @@
destroy_workqueue(wq);
return NULL;
}
-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
+EXPORT_SYMBOL_GPL(alloc_workqueue);
+
+static bool pwq_busy(struct pool_workqueue *pwq)
+{
+ int i;
+
+ for (i = 0; i < WORK_NR_COLORS; i++)
+ if (pwq->nr_in_flight[i])
+ return true;
+
+ if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
+ return true;
+ if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+ return true;
+
+ return false;
+}
/**
* destroy_workqueue - safely terminate a workqueue
@@ -4212,35 +4424,34 @@
struct worker *rescuer = wq->rescuer;
/* this prevents new queueing */
- spin_lock_irq(&wq_mayday_lock);
+ raw_spin_lock_irq(&wq_mayday_lock);
wq->rescuer = NULL;
- spin_unlock_irq(&wq_mayday_lock);
+ raw_spin_unlock_irq(&wq_mayday_lock);
/* rescuer will empty maydays list before exiting */
kthread_stop(rescuer->task);
kfree(rescuer);
}
- /* sanity checks */
+ /*
+ * Sanity checks - grab all the locks so that we wait for all
+ * in-flight operations which may do put_pwq().
+ */
+ mutex_lock(&wq_pool_mutex);
mutex_lock(&wq->mutex);
for_each_pwq(pwq, wq) {
- int i;
-
- for (i = 0; i < WORK_NR_COLORS; i++) {
- if (WARN_ON(pwq->nr_in_flight[i])) {
- mutex_unlock(&wq->mutex);
- show_workqueue_state();
- return;
- }
- }
-
- if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
- WARN_ON(pwq->nr_active) ||
- WARN_ON(!list_empty(&pwq->delayed_works))) {
+ raw_spin_lock_irq(&pwq->pool->lock);
+ if (WARN_ON(pwq_busy(pwq))) {
+ pr_warn("%s: %s has the following busy pwq\n",
+ __func__, wq->name);
+ show_pwq(pwq);
+ raw_spin_unlock_irq(&pwq->pool->lock);
mutex_unlock(&wq->mutex);
+ mutex_unlock(&wq_pool_mutex);
show_workqueue_state();
return;
}
+ raw_spin_unlock_irq(&pwq->pool->lock);
}
mutex_unlock(&wq->mutex);
@@ -4248,16 +4459,16 @@
* wq list is used to freeze wq, remove from list after
* flushing is complete in case freeze races us.
*/
- mutex_lock(&wq_pool_mutex);
list_del_rcu(&wq->list);
mutex_unlock(&wq_pool_mutex);
if (!(wq->flags & WQ_UNBOUND)) {
+ wq_unregister_lockdep(wq);
/*
* The base ref is never dropped on per-cpu pwqs. Directly
* schedule RCU free.
*/
- call_rcu_sched(&wq->rcu, rcu_free_wq);
+ call_rcu(&wq->rcu, rcu_free_wq);
} else {
/*
* We're the sole accessor of @wq at this point. Directly
@@ -4367,7 +4578,8 @@
struct pool_workqueue *pwq;
bool ret;
- rcu_read_lock_sched();
+ rcu_read_lock();
+ preempt_disable();
if (cpu == WORK_CPU_UNBOUND)
cpu = smp_processor_id();
@@ -4378,7 +4590,8 @@
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
ret = !list_empty(&pwq->delayed_works);
- rcu_read_unlock_sched();
+ preempt_enable();
+ rcu_read_unlock();
return ret;
}
@@ -4404,15 +4617,15 @@
if (work_pending(work))
ret |= WORK_BUSY_PENDING;
- local_irq_save(flags);
+ rcu_read_lock();
pool = get_work_pool(work);
if (pool) {
- spin_lock(&pool->lock);
+ raw_spin_lock_irqsave(&pool->lock, flags);
if (find_worker_executing_work(pool, work))
ret |= WORK_BUSY_RUNNING;
- spin_unlock(&pool->lock);
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
}
- local_irq_restore(flags);
+ rcu_read_unlock();
return ret;
}
@@ -4476,14 +4689,14 @@
* Carefully copy the associated workqueue's workfn, name and desc.
* Keep the original last '\0' in case the original is garbage.
*/
- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
- probe_kernel_read(name, wq->name, sizeof(name) - 1);
- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
if (fn || name[0] || desc[0]) {
- printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
+ printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
if (strcmp(name, desc))
pr_cont(" (%s)", desc);
pr_cont("\n");
@@ -4508,7 +4721,7 @@
pr_cont("%s BAR(%d)", comma ? "," : "",
task_pid_nr(barr->task));
} else {
- pr_cont("%s %pf", comma ? "," : "", work->func);
+ pr_cont("%s %ps", comma ? "," : "", work->func);
}
}
@@ -4541,9 +4754,9 @@
if (worker->current_pwq != pwq)
continue;
- pr_cont("%s %d%s:%pf", comma ? "," : "",
+ pr_cont("%s %d%s:%ps", comma ? "," : "",
task_pid_nr(worker->task),
- worker == pwq->wq->rescuer ? "(RESCUER)" : "",
+ worker->rescue_wq ? "(RESCUER)" : "",
worker->current_func);
list_for_each_entry(work, &worker->scheduled, entry)
pr_cont_work(false, work);
@@ -4597,7 +4810,7 @@
unsigned long flags;
int pi;
- rcu_read_lock_sched();
+ rcu_read_lock();
pr_info("Showing busy workqueues and worker pools:\n");
@@ -4617,10 +4830,10 @@
pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
for_each_pwq(pwq, wq) {
- spin_lock_irqsave(&pwq->pool->lock, flags);
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
if (pwq->nr_active || !list_empty(&pwq->delayed_works))
show_pwq(pwq);
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
/*
* We could be printing a lot from atomic context, e.g.
* sysrq-t -> show_workqueue_state(). Avoid triggering
@@ -4634,7 +4847,7 @@
struct worker *worker;
bool first = true;
- spin_lock_irqsave(&pool->lock, flags);
+ raw_spin_lock_irqsave(&pool->lock, flags);
if (pool->nr_workers == pool->nr_idle)
goto next_pool;
@@ -4653,7 +4866,7 @@
}
pr_cont("\n");
next_pool:
- spin_unlock_irqrestore(&pool->lock, flags);
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
/*
* We could be printing a lot from atomic context, e.g.
* sysrq-t -> show_workqueue_state(). Avoid triggering
@@ -4662,7 +4875,7 @@
touch_nmi_watchdog();
}
- rcu_read_unlock_sched();
+ rcu_read_unlock();
}
/* used to show worker information through /proc/PID/{comm,stat,status} */
@@ -4683,7 +4896,7 @@
struct worker_pool *pool = worker->pool;
if (pool) {
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
/*
* ->desc tracks information (wq name or
* set_worker_desc()) for the latest execution. If
@@ -4697,12 +4910,13 @@
scnprintf(buf + off, size - off, "-%s",
worker->desc);
}
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
}
}
mutex_unlock(&wq_pool_attach_mutex);
}
+EXPORT_SYMBOL_GPL(wq_worker_comm);
#ifdef CONFIG_SMP
@@ -4728,7 +4942,7 @@
for_each_cpu_worker_pool(pool, cpu) {
mutex_lock(&wq_pool_attach_mutex);
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
/*
* We've blocked all attach/detach operations. Make all workers
@@ -4742,7 +4956,7 @@
pool->flags |= POOL_DISASSOCIATED;
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
mutex_unlock(&wq_pool_attach_mutex);
/*
@@ -4768,9 +4982,9 @@
* worker blocking could lead to lengthy stalls. Kick off
* unbound chain execution of currently pending work items.
*/
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
wake_up_worker(pool);
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
}
}
@@ -4797,7 +5011,7 @@
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0);
- spin_lock_irq(&pool->lock);
+ raw_spin_lock_irq(&pool->lock);
pool->flags &= ~POOL_DISASSOCIATED;
@@ -4826,7 +5040,7 @@
*
* WRITE_ONCE() is necessary because @worker->flags may be
* tested without holding any lock in
- * wq_worker_waking_up(). Without it, NOT_RUNNING test may
+ * wq_worker_running(). Without it, NOT_RUNNING test may
* fail incorrectly leading to premature concurrency
* management operations.
*/
@@ -4836,7 +5050,7 @@
WRITE_ONCE(worker->flags, worker_flags);
}
- spin_unlock_irq(&pool->lock);
+ raw_spin_unlock_irq(&pool->lock);
}
/**
@@ -5049,16 +5263,16 @@
* nr_active is monotonically decreasing. It's safe
* to peek without lock.
*/
- rcu_read_lock_sched();
+ rcu_read_lock();
for_each_pwq(pwq, wq) {
WARN_ON_ONCE(pwq->nr_active < 0);
if (pwq->nr_active) {
busy = true;
- rcu_read_unlock_sched();
+ rcu_read_unlock();
goto out_unlock;
}
}
- rcu_read_unlock_sched();
+ rcu_read_unlock();
}
out_unlock:
mutex_unlock(&wq_pool_mutex);
@@ -5260,7 +5474,8 @@
const char *delim = "";
int node, written = 0;
- rcu_read_lock_sched();
+ get_online_cpus();
+ rcu_read_lock();
for_each_node(node) {
written += scnprintf(buf + written, PAGE_SIZE - written,
"%s%d:%d", delim, node,
@@ -5268,7 +5483,8 @@
delim = " ";
}
written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
- rcu_read_unlock_sched();
+ rcu_read_unlock();
+ put_online_cpus();
return written;
}
@@ -5293,7 +5509,7 @@
lockdep_assert_held(&wq_pool_mutex);
- attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ attrs = alloc_workqueue_attrs();
if (!attrs)
return NULL;
@@ -5639,6 +5855,7 @@
pr_cont_pool_info(pool);
pr_cont(" stuck for %us!\n",
jiffies_to_msecs(now - pool_ts) / 1000);
+ trace_android_vh_wq_lockup_pool(pool->cpu, pool_ts);
}
}
@@ -5722,7 +5939,14 @@
return;
}
- wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
+ for_each_possible_cpu(cpu) {
+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
+ return;
+ }
+ }
+
+ wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_unbound_numa_attrs_buf);
/*
@@ -5739,11 +5963,6 @@
for_each_possible_cpu(cpu) {
node = cpu_to_node(cpu);
- if (WARN_ON(node == NUMA_NO_NODE)) {
- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
- /* happens iff arch is bonkers, let's just proceed */
- return;
- }
cpumask_set_cpu(cpu, tbl[node]);
}
@@ -5761,13 +5980,13 @@
* items. Actual work item execution starts only after kthreads can be
* created and scheduled right before early initcalls.
*/
-int __init workqueue_init_early(void)
+void __init workqueue_init_early(void)
{
int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
int i, cpu;
- WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
+ BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
@@ -5797,7 +6016,7 @@
for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
struct workqueue_attrs *attrs;
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
attrs->nice = std_nice[i];
unbound_std_wq_attrs[i] = attrs;
@@ -5806,7 +6025,7 @@
* guaranteed by max_active which is enforced by pwqs.
* Turn off NUMA so that dfl_pwq is used for all nodes.
*/
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
attrs->nice = std_nice[i];
attrs->no_numa = true;
ordered_wq_attrs[i] = attrs;
@@ -5828,8 +6047,6 @@
!system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||
!system_freezable_power_efficient_wq);
-
- return 0;
}
/**
@@ -5841,7 +6058,7 @@
* are no kworkers executing the work items yet. Populate the worker pools
* with the initial workers and enable future kworker creations.
*/
-int __init workqueue_init(void)
+void __init workqueue_init(void)
{
struct workqueue_struct *wq;
struct worker_pool *pool;
@@ -5888,6 +6105,4 @@
wq_online = true;
wq_watchdog_init();
-
- return 0;
}
--
Gitblit v1.6.2