From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/kernel/workqueue.c | 411 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 310 insertions(+), 101 deletions(-) diff --git a/kernel/kernel/workqueue.c b/kernel/kernel/workqueue.c index ad290b4..cb057e3 100644 --- a/kernel/kernel/workqueue.c +++ b/kernel/kernel/workqueue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * kernel/workqueue.c - generic async execution with shared worker pool * @@ -50,8 +51,13 @@ #include <linux/sched/isolation.h> #include <linux/nmi.h> #include <linux/kvm_para.h> +#include <uapi/linux/sched/types.h> #include "workqueue_internal.h" + +#include <trace/hooks/wqlockup.h> +/* events/workqueue.h uses default TRACE_INCLUDE_PATH */ +#undef TRACE_INCLUDE_PATH enum { /* @@ -133,7 +139,7 @@ * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. * * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or - * sched-RCU for reads. + * RCU for reads. * * WQ: wq->mutex protected. * @@ -248,7 +254,7 @@ struct list_head flusher_overflow; /* WQ: flush overflow list */ struct list_head maydays; /* MD: pwqs requesting rescue */ - struct worker *rescuer; /* I: rescue worker */ + struct worker *rescuer; /* MD: rescue worker */ int nr_drainers; /* WQ: drain in progress */ int saved_max_active; /* WQ: saved pwq max_active */ @@ -260,13 +266,15 @@ struct wq_device *wq_dev; /* I: for sysfs interface */ #endif #ifdef CONFIG_LOCKDEP + char *lock_name; + struct lock_class_key key; struct lockdep_map lockdep_map; #endif char name[WQ_NAME_LEN]; /* I: workqueue name */ /* - * Destruction of workqueue_struct is sched-RCU protected to allow - * walking the workqueues list without grabbing wq_pool_mutex. + * Destruction of workqueue_struct is RCU protected to allow walking + * the workqueues list without grabbing wq_pool_mutex. * This is used to dump all workqueues from sysrq. */ struct rcu_head rcu; @@ -299,7 +307,8 @@ static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ -static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ +/* wait for manager to go away */ +static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait); static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static bool workqueue_freezing; /* PL: have wqs started freezing? */ @@ -353,19 +362,18 @@ static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); +static void show_pwq(struct pool_workqueue *pwq); #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> + +EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_start); +EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_end); #define assert_rcu_or_pool_mutex() \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&wq_pool_mutex), \ "RCU or wq_pool_mutex should be held") - -#define assert_rcu_or_wq_mutex(wq) \ - RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ - !lockdep_is_held(&wq->mutex), \ - "RCU or wq->mutex should be held") #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ @@ -423,13 +431,12 @@ * ignored. */ #define for_each_pwq(pwq, wq) \ - list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \ - if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ - else + list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \ + lockdep_is_held(&(wq->mutex))) #ifdef CONFIG_DEBUG_OBJECTS_WORK -static struct debug_obj_descr work_debug_descr; +static const struct debug_obj_descr work_debug_descr; static void *work_debug_hint(void *addr) { @@ -479,7 +486,7 @@ } } -static struct debug_obj_descr work_debug_descr = { +static const struct debug_obj_descr work_debug_descr = { .name = "work_struct", .debug_hint = work_debug_hint, .is_static_object = work_is_static_object, @@ -647,7 +654,7 @@ * The following mb guarantees that previous clear of a PENDING bit * will not be reordered with any speculative LOADS or STORES from * work->current_func, which is executed afterwards. This possible - * reordering can lead to a missed execution on attempt to qeueue + * reordering can lead to a missed execution on attempt to queue * the same @work. E.g. consider this case: * * CPU#0 CPU#1 @@ -851,8 +858,17 @@ if (!worker->sleeping) return; + + /* + * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check + * and the nr_running increment below, we may ruin the nr_running reset + * and leave with an unexpected pool->nr_running == 1 on the newly unbound + * pool. Protect against such race. + */ + preempt_disable(); if (!(worker->flags & WORKER_NOT_RUNNING)) atomic_inc(&worker->pool->nr_running); + preempt_enable(); worker->sleeping = 0; } @@ -861,7 +877,8 @@ * @task: task going to sleep * * This function is called from schedule() when a busy worker is - * going to sleep. + * going to sleep. Preemption needs to be disabled to protect ->sleeping + * assignment. */ void wq_worker_sleeping(struct task_struct *task) { @@ -878,7 +895,8 @@ pool = worker->pool; - if (WARN_ON_ONCE(worker->sleeping)) + /* Return if preempted before wq_worker_running() was reached */ + if (worker->sleeping) return; worker->sleeping = 1; @@ -906,12 +924,23 @@ /** * wq_worker_last_func - retrieve worker's last work function + * @task: Task to retrieve last work function of. * * Determine the last function a worker executed. This is called from * the scheduler to get a worker's last known identity. * * CONTEXT: - * spin_lock_irq(rq->lock) + * raw_spin_lock_irq(rq->lock) + * + * This function is called during schedule() when a kworker is going + * to sleep. It's used by psi to identify aggregation workers during + * dequeuing, to allow periodic aggregation to shut-off when that + * worker is the last task in the system or cgroup to go to sleep. + * + * As this function doesn't involve any workqueue-related locking, it + * only returns stable values when called from inside the scheduler's + * queuing and dequeuing paths, when @task, which must be a kworker, + * is guaranteed to not be processing any works. * * Return: * The last work function %current executed as a worker, NULL if it @@ -1201,11 +1230,14 @@ * stable state - idle, on timer or on worklist. * * Return: + * + * ======== ================================================================ * 1 if @work was pending and we successfully stole PENDING * 0 if @work was idle and we claimed PENDING * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry * -ENOENT if someone else is canceling @work, this state may persist * for arbitrarily long + * ======== ================================================================ * * Note: * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting @@ -1313,6 +1345,9 @@ { struct worker_pool *pool = pwq->pool; + /* record the work call stack in order to print it in KASAN reports */ + kasan_record_aux_stack(work); + /* we own @work, set data and link */ set_work_pwq(work, pwq, extra_flags); list_add_tail(&work->entry, head); @@ -1339,7 +1374,7 @@ worker = current_wq_worker(); /* - * Return %true iff I'm a worker execuing a work item on @wq. If + * Return %true iff I'm a worker executing a work item on @wq. If * I'm @worker, it's safe to dereference it without locking. */ return worker && worker->current_pwq->wq == wq; @@ -1513,14 +1548,96 @@ } EXPORT_SYMBOL(queue_work_on); +/** + * workqueue_select_cpu_near - Select a CPU based on NUMA node + * @node: NUMA node ID that we want to select a CPU from + * + * This function will attempt to find a "random" cpu available on a given + * node. If there are no CPUs available on the given node it will return + * WORK_CPU_UNBOUND indicating that we should just schedule to any + * available CPU if we need to schedule this work. + */ +static int workqueue_select_cpu_near(int node) +{ + int cpu; + + /* No point in doing this if NUMA isn't enabled for workqueues */ + if (!wq_numa_enabled) + return WORK_CPU_UNBOUND; + + /* Delay binding to CPU if node is not valid or online */ + if (node < 0 || node >= MAX_NUMNODES || !node_online(node)) + return WORK_CPU_UNBOUND; + + /* Use local node/cpu if we are already there */ + cpu = raw_smp_processor_id(); + if (node == cpu_to_node(cpu)) + return cpu; + + /* Use "random" otherwise know as "first" online CPU of node */ + cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); + + /* If CPU is valid return that, otherwise just defer */ + return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND; +} + +/** + * queue_work_node - queue work on a "random" cpu for a given NUMA node + * @node: NUMA node that we are targeting the work for + * @wq: workqueue to use + * @work: work to queue + * + * We queue the work to a "random" CPU within a given NUMA node. The basic + * idea here is to provide a way to somehow associate work with a given + * NUMA node. + * + * This function will only make a best effort attempt at getting this onto + * the right NUMA node. If no node is requested or the requested node is + * offline then we just fall back to standard queue_work behavior. + * + * Currently the "random" CPU ends up being the first available CPU in the + * intersection of cpu_online_mask and the cpumask of the node, unless we + * are running on the node. In that case we just use the current CPU. + * + * Return: %false if @work was already on a queue, %true otherwise. + */ +bool queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work) +{ + unsigned long flags; + bool ret = false; + + /* + * This current implementation is specific to unbound workqueues. + * Specifically we only return the first available CPU for a given + * node instead of cycling through individual CPUs within the node. + * + * If this is used with a per-cpu workqueue then the logic in + * workqueue_select_cpu_near would need to be updated to allow for + * some round robin type logic. + */ + WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); + + local_irq_save(flags); + + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + int cpu = workqueue_select_cpu_near(node); + + __queue_work(cpu, wq, work); + ret = true; + } + + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(queue_work_node); + void delayed_work_timer_fn(struct timer_list *t) { struct delayed_work *dwork = from_timer(dwork, t, timer); - unsigned long flags; - local_irq_save(flags); + /* should have been called from irqsafe timer with irq already off */ __queue_work(dwork->cpu, dwork->wq, &dwork->work); - local_irq_restore(flags); } EXPORT_SYMBOL(delayed_work_timer_fn); @@ -1531,9 +1648,14 @@ struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); -#ifndef CONFIG_CFI_CLANG - WARN_ON_ONCE(timer->function != delayed_work_timer_fn); -#endif + /* + * With CFI, timer->function can point to a jump table entry in a module, + * which fails the comparison. Disable the warning if CFI and modules are + * both enabled. + */ + if (!IS_ENABLED(CONFIG_CFI_CLANG) || !IS_ENABLED(CONFIG_MODULES)) + WARN_ON_ONCE(timer->function != delayed_work_timer_fn); + WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); @@ -1644,7 +1766,7 @@ * * Return: %false if @rwork was already pending, %true otherwise. Note * that a full RCU grace period is guaranteed only after a %true return. - * While @rwork is guarnateed to be executed after a %false return, the + * While @rwork is guaranteed to be executed after a %false return, the * execution may happen before a full RCU grace period has passed. */ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork) @@ -1838,6 +1960,15 @@ goto fail; set_user_nice(worker->task, pool->attrs->nice); + if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO)) { + struct sched_param param; + + if (pool->attrs->nice == 0) + param.sched_priority = MAX_RT_PRIO / 2 - 4; + else + param.sched_priority = MAX_RT_PRIO / 2 - 2; + sched_setscheduler_nocheck(worker->task, SCHED_RR, ¶m); + } kthread_bind_mask(worker->task, pool->attrs->cpumask); /* successful, attach the worker to the pool */ @@ -2047,7 +2178,7 @@ pool->manager = NULL; pool->flags &= ~POOL_MANAGER_ACTIVE; - swake_up_one(&wq_manager_wait); + rcuwait_wake_up(&manager_wait); return true; } @@ -2177,13 +2308,13 @@ * While we must be careful to not use "work" after this, the trace * point will only record its address. */ - trace_workqueue_execute_end(work); + trace_workqueue_execute_end(work, worker->current_func); lock_map_release(&lockdep_map); lock_map_release(&pwq->wq->lockdep_map); if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" - " last function: %pf\n", + " last function: %ps\n", current->comm, preempt_count(), task_pid_nr(current), worker->current_func); debug_show_held_locks(current); @@ -2191,7 +2322,7 @@ } /* - * The following prevents a kworker from hogging CPU on !PREEMPT + * The following prevents a kworker from hogging CPU on !PREEMPTION * kernels, where a requeueing work item waiting for something to * happen could deadlock with stop_machine as such work item could * indefinitely requeue itself while all other CPUs are trapped in @@ -2436,7 +2567,7 @@ * being used to relieve memory pressure, don't * incur MAYDAY_INTERVAL delay inbetween. */ - if (need_to_create_worker(pool)) { + if (pwq->nr_active && need_to_create_worker(pool)) { raw_spin_lock(&wq_mayday_lock); /* * Queue iff we aren't racing destruction @@ -2508,11 +2639,11 @@ worker = current_wq_worker(); WARN_ONCE(current->flags & PF_MEMALLOC, - "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", + "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps", current->pid, current->comm, target_wq->name, target_func); WARN_ONCE(worker && ((worker->current_pwq->wq->flags & (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM), - "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", + "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps", worker->current_pwq->wq->name, worker->current_func, target_wq->name, target_func); } @@ -2743,7 +2874,7 @@ * First flushers are responsible for cascading flushes and * handling overflow. Non-first flushers can simply return. */ - if (wq->first_flusher != &this_flusher) + if (READ_ONCE(wq->first_flusher) != &this_flusher) return; mutex_lock(&wq->mutex); @@ -2752,7 +2883,7 @@ if (wq->first_flusher != &this_flusher) goto out_unlock; - wq->first_flusher = NULL; + WRITE_ONCE(wq->first_flusher, NULL); WARN_ON_ONCE(!list_empty(&this_flusher.list)); WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); @@ -2943,10 +3074,8 @@ if (WARN_ON(!work->func)) return false; - if (!from_cancel) { - lock_map_acquire(&work->lockdep_map); - lock_map_release(&work->lockdep_map); - } + lock_map_acquire(&work->lockdep_map); + lock_map_release(&work->lockdep_map); if (start_flush_work(work, &barr, from_cancel)) { wait_for_completion(&barr.done); @@ -3241,7 +3370,7 @@ * * Undo alloc_workqueue_attrs(). */ -static void free_workqueue_attrs(struct workqueue_attrs *attrs) +void free_workqueue_attrs(struct workqueue_attrs *attrs) { if (attrs) { free_cpumask_var(attrs->cpumask); @@ -3257,7 +3386,7 @@ * * Return: The allocated new workqueue_attr on success. %NULL on failure. */ -static struct workqueue_attrs *alloc_workqueue_attrs(void) +struct workqueue_attrs *alloc_workqueue_attrs(void) { struct workqueue_attrs *attrs; @@ -3348,17 +3477,56 @@ return 0; } +#ifdef CONFIG_LOCKDEP +static void wq_init_lockdep(struct workqueue_struct *wq) +{ + char *lock_name; + + lockdep_register_key(&wq->key); + lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); + if (!lock_name) + lock_name = wq->name; + + wq->lock_name = lock_name; + lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0); +} + +static void wq_unregister_lockdep(struct workqueue_struct *wq) +{ + lockdep_unregister_key(&wq->key); +} + +static void wq_free_lockdep(struct workqueue_struct *wq) +{ + if (wq->lock_name != wq->name) + kfree(wq->lock_name); +} +#else +static void wq_init_lockdep(struct workqueue_struct *wq) +{ +} + +static void wq_unregister_lockdep(struct workqueue_struct *wq) +{ +} + +static void wq_free_lockdep(struct workqueue_struct *wq) +{ +} +#endif + static void rcu_free_wq(struct rcu_head *rcu) { struct workqueue_struct *wq = container_of(rcu, struct workqueue_struct, rcu); + + wq_free_lockdep(wq); if (!(wq->flags & WQ_UNBOUND)) free_percpu(wq->cpu_pwqs); else free_workqueue_attrs(wq->unbound_attrs); - kfree(wq->rescuer); kfree(wq); } @@ -3369,6 +3537,18 @@ ida_destroy(&pool->worker_ida); free_workqueue_attrs(pool->attrs); kfree(pool); +} + +/* This returns with the lock held on success (pool manager is inactive). */ +static bool wq_manager_inactive(struct worker_pool *pool) +{ + raw_spin_lock_irq(&pool->lock); + + if (pool->flags & POOL_MANAGER_ACTIVE) { + raw_spin_unlock_irq(&pool->lock); + return false; + } + return true; } /** @@ -3406,10 +3586,11 @@ * Become the manager and destroy all workers. This prevents * @pool's workers from blocking on attach_mutex. We're the last * manager and @pool gets freed with the flag set. + * Because of how wq_manager_inactive() works, we will hold the + * spinlock after a successful wait. */ - raw_spin_lock_irq(&pool->lock); - swait_event_lock_irq(wq_manager_wait, - !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); + rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool), + TASK_UNINTERRUPTIBLE); pool->flags |= POOL_MANAGER_ACTIVE; while ((worker = first_idle_worker(pool))) @@ -3549,8 +3730,10 @@ * If we're the last pwq going away, @wq is already dead and no one * is gonna access it anymore. Schedule RCU free. */ - if (is_last) + if (is_last) { + wq_unregister_lockdep(wq); call_rcu(&wq->rcu, rcu_free_wq); + } } /** @@ -3913,16 +4096,20 @@ * * Performs GFP_KERNEL allocations. * + * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus(). + * * Return: 0 on success and -errno on failure. */ -static int apply_workqueue_attrs(struct workqueue_struct *wq, +int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { int ret; - apply_wqattrs_lock(); + lockdep_assert_cpus_held(); + + mutex_lock(&wq_pool_mutex); ret = apply_workqueue_attrs_locked(wq, attrs); - apply_wqattrs_unlock(); + mutex_unlock(&wq_pool_mutex); return ret; } @@ -4035,16 +4222,21 @@ mutex_unlock(&wq->mutex); } return 0; - } else if (wq->flags & __WQ_ORDERED) { + } + + get_online_cpus(); + if (wq->flags & __WQ_ORDERED) { ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]); /* there should only be single pwq for ordering guarantee */ WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), "ordering guarantee broken for workqueue %s\n", wq->name); - return ret; } else { - return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); + ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); } + put_online_cpus(); + + return ret; } static int wq_clamp_max_active(int max_active, unsigned int flags, @@ -4077,8 +4269,8 @@ rescuer->rescue_wq = wq; rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name); - ret = PTR_ERR_OR_ZERO(rescuer->task); - if (ret) { + if (IS_ERR(rescuer->task)) { + ret = PTR_ERR(rescuer->task); kfree(rescuer); return ret; } @@ -4090,11 +4282,10 @@ return 0; } -struct workqueue_struct *__alloc_workqueue_key(const char *fmt, - unsigned int flags, - int max_active, - struct lock_class_key *key, - const char *lock_name, ...) +__printf(1, 4) +struct workqueue_struct *alloc_workqueue(const char *fmt, + unsigned int flags, + int max_active, ...) { size_t tbl_size = 0; va_list args; @@ -4129,7 +4320,7 @@ goto err_free_wq; } - va_start(args, lock_name); + va_start(args, max_active); vsnprintf(wq->name, sizeof(wq->name), fmt, args); va_end(args); @@ -4146,11 +4337,11 @@ INIT_LIST_HEAD(&wq->flusher_overflow); INIT_LIST_HEAD(&wq->maydays); - lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); + wq_init_lockdep(wq); INIT_LIST_HEAD(&wq->list); if (alloc_and_link_pwqs(wq) < 0) - goto err_free_wq; + goto err_unreg_lockdep; if (wq_online && init_rescuer(wq) < 0) goto err_destroy; @@ -4176,6 +4367,9 @@ return wq; +err_unreg_lockdep: + wq_unregister_lockdep(wq); + wq_free_lockdep(wq); err_free_wq: free_workqueue_attrs(wq->unbound_attrs); kfree(wq); @@ -4184,7 +4378,23 @@ destroy_workqueue(wq); return NULL; } -EXPORT_SYMBOL_GPL(__alloc_workqueue_key); +EXPORT_SYMBOL_GPL(alloc_workqueue); + +static bool pwq_busy(struct pool_workqueue *pwq) +{ + int i; + + for (i = 0; i < WORK_NR_COLORS; i++) + if (pwq->nr_in_flight[i]) + return true; + + if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1)) + return true; + if (pwq->nr_active || !list_empty(&pwq->delayed_works)) + return true; + + return false; +} /** * destroy_workqueue - safely terminate a workqueue @@ -4220,26 +4430,25 @@ kfree(rescuer); } - /* sanity checks */ + /* + * Sanity checks - grab all the locks so that we wait for all + * in-flight operations which may do put_pwq(). + */ + mutex_lock(&wq_pool_mutex); mutex_lock(&wq->mutex); for_each_pwq(pwq, wq) { - int i; - - for (i = 0; i < WORK_NR_COLORS; i++) { - if (WARN_ON(pwq->nr_in_flight[i])) { - mutex_unlock(&wq->mutex); - show_workqueue_state(); - return; - } - } - - if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) || - WARN_ON(pwq->nr_active) || - WARN_ON(!list_empty(&pwq->delayed_works))) { + raw_spin_lock_irq(&pwq->pool->lock); + if (WARN_ON(pwq_busy(pwq))) { + pr_warn("%s: %s has the following busy pwq\n", + __func__, wq->name); + show_pwq(pwq); + raw_spin_unlock_irq(&pwq->pool->lock); mutex_unlock(&wq->mutex); + mutex_unlock(&wq_pool_mutex); show_workqueue_state(); return; } + raw_spin_unlock_irq(&pwq->pool->lock); } mutex_unlock(&wq->mutex); @@ -4247,11 +4456,11 @@ * wq list is used to freeze wq, remove from list after * flushing is complete in case freeze races us. */ - mutex_lock(&wq_pool_mutex); list_del_rcu(&wq->list); mutex_unlock(&wq_pool_mutex); if (!(wq->flags & WQ_UNBOUND)) { + wq_unregister_lockdep(wq); /* * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. @@ -4477,14 +4686,14 @@ * Carefully copy the associated workqueue's workfn, name and desc. * Keep the original last '\0' in case the original is garbage. */ - probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); - probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); - probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); - probe_kernel_read(name, wq->name, sizeof(name) - 1); - probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); + copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); + copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); + copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); + copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { - printk("%sWorkqueue: %s %pf", log_lvl, name, fn); + printk("%sWorkqueue: %s %ps", log_lvl, name, fn); if (strcmp(name, desc)) pr_cont(" (%s)", desc); pr_cont("\n"); @@ -4509,7 +4718,7 @@ pr_cont("%s BAR(%d)", comma ? "," : "", task_pid_nr(barr->task)); } else { - pr_cont("%s %pf", comma ? "," : "", work->func); + pr_cont("%s %ps", comma ? "," : "", work->func); } } @@ -4542,9 +4751,9 @@ if (worker->current_pwq != pwq) continue; - pr_cont("%s %d%s:%pf", comma ? "," : "", + pr_cont("%s %d%s:%ps", comma ? "," : "", task_pid_nr(worker->task), - worker == pwq->wq->rescuer ? "(RESCUER)" : "", + worker->rescue_wq ? "(RESCUER)" : "", worker->current_func); list_for_each_entry(work, &worker->scheduled, entry) pr_cont_work(false, work); @@ -4704,6 +4913,7 @@ mutex_unlock(&wq_pool_attach_mutex); } +EXPORT_SYMBOL_GPL(wq_worker_comm); #ifdef CONFIG_SMP @@ -4827,7 +5037,7 @@ * * WRITE_ONCE() is necessary because @worker->flags may be * tested without holding any lock in - * wq_worker_waking_up(). Without it, NOT_RUNNING test may + * wq_worker_running(). Without it, NOT_RUNNING test may * fail incorrectly leading to premature concurrency * management operations. */ @@ -5642,6 +5852,7 @@ pr_cont_pool_info(pool); pr_cont(" stuck for %us!\n", jiffies_to_msecs(now - pool_ts) / 1000); + trace_android_vh_wq_lockup_pool(pool->cpu, pool_ts); } } @@ -5725,6 +5936,13 @@ return; } + for_each_possible_cpu(cpu) { + if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) { + pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu); + return; + } + } + wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(); BUG_ON(!wq_update_unbound_numa_attrs_buf); @@ -5742,11 +5960,6 @@ for_each_possible_cpu(cpu) { node = cpu_to_node(cpu); - if (WARN_ON(node == NUMA_NO_NODE)) { - pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu); - /* happens iff arch is bonkers, let's just proceed */ - return; - } cpumask_set_cpu(cpu, tbl[node]); } @@ -5764,13 +5977,13 @@ * items. Actual work item execution starts only after kthreads can be * created and scheduled right before early initcalls. */ -int __init workqueue_init_early(void) +void __init workqueue_init_early(void) { int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; int i, cpu; - WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); + BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags)); @@ -5831,8 +6044,6 @@ !system_unbound_wq || !system_freezable_wq || !system_power_efficient_wq || !system_freezable_power_efficient_wq); - - return 0; } /** @@ -5844,7 +6055,7 @@ * are no kworkers executing the work items yet. Populate the worker pools * with the initial workers and enable future kworker creations. */ -int __init workqueue_init(void) +void __init workqueue_init(void) { struct workqueue_struct *wq; struct worker_pool *pool; @@ -5891,6 +6102,4 @@ wq_online = true; wq_watchdog_init(); - - return 0; } -- Gitblit v1.6.2