hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/kernel/workqueue.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * kernel/workqueue.c - generic async execution with shared worker pool
34 *
....@@ -50,8 +51,13 @@
5051 #include <linux/sched/isolation.h>
5152 #include <linux/nmi.h>
5253 #include <linux/kvm_para.h>
54
+#include <uapi/linux/sched/types.h>
5355
5456 #include "workqueue_internal.h"
57
+
58
+#include <trace/hooks/wqlockup.h>
59
+/* events/workqueue.h uses default TRACE_INCLUDE_PATH */
60
+#undef TRACE_INCLUDE_PATH
5561
5662 enum {
5763 /*
....@@ -133,7 +139,7 @@
133139 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
134140 *
135141 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
136
- * sched-RCU for reads.
142
+ * RCU for reads.
137143 *
138144 * WQ: wq->mutex protected.
139145 *
....@@ -248,7 +254,7 @@
248254 struct list_head flusher_overflow; /* WQ: flush overflow list */
249255
250256 struct list_head maydays; /* MD: pwqs requesting rescue */
251
- struct worker *rescuer; /* I: rescue worker */
257
+ struct worker *rescuer; /* MD: rescue worker */
252258
253259 int nr_drainers; /* WQ: drain in progress */
254260 int saved_max_active; /* WQ: saved pwq max_active */
....@@ -260,13 +266,15 @@
260266 struct wq_device *wq_dev; /* I: for sysfs interface */
261267 #endif
262268 #ifdef CONFIG_LOCKDEP
269
+ char *lock_name;
270
+ struct lock_class_key key;
263271 struct lockdep_map lockdep_map;
264272 #endif
265273 char name[WQ_NAME_LEN]; /* I: workqueue name */
266274
267275 /*
268
- * Destruction of workqueue_struct is sched-RCU protected to allow
269
- * walking the workqueues list without grabbing wq_pool_mutex.
276
+ * Destruction of workqueue_struct is RCU protected to allow walking
277
+ * the workqueues list without grabbing wq_pool_mutex.
270278 * This is used to dump all workqueues from sysrq.
271279 */
272280 struct rcu_head rcu;
....@@ -299,7 +307,8 @@
299307 static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
300308 static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
301309 static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302
-static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
310
+/* wait for manager to go away */
311
+static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
303312
304313 static LIST_HEAD(workqueues); /* PR: list of all workqueues */
305314 static bool workqueue_freezing; /* PL: have wqs started freezing? */
....@@ -353,19 +362,18 @@
353362
354363 static int worker_thread(void *__worker);
355364 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
365
+static void show_pwq(struct pool_workqueue *pwq);
356366
357367 #define CREATE_TRACE_POINTS
358368 #include <trace/events/workqueue.h>
369
+
370
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_start);
371
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_end);
359372
360373 #define assert_rcu_or_pool_mutex() \
361374 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
362375 !lockdep_is_held(&wq_pool_mutex), \
363376 "RCU or wq_pool_mutex should be held")
364
-
365
-#define assert_rcu_or_wq_mutex(wq) \
366
- RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
367
- !lockdep_is_held(&wq->mutex), \
368
- "RCU or wq->mutex should be held")
369377
370378 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
371379 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
....@@ -423,13 +431,12 @@
423431 * ignored.
424432 */
425433 #define for_each_pwq(pwq, wq) \
426
- list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
427
- if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
428
- else
434
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
435
+ lockdep_is_held(&(wq->mutex)))
429436
430437 #ifdef CONFIG_DEBUG_OBJECTS_WORK
431438
432
-static struct debug_obj_descr work_debug_descr;
439
+static const struct debug_obj_descr work_debug_descr;
433440
434441 static void *work_debug_hint(void *addr)
435442 {
....@@ -479,7 +486,7 @@
479486 }
480487 }
481488
482
-static struct debug_obj_descr work_debug_descr = {
489
+static const struct debug_obj_descr work_debug_descr = {
483490 .name = "work_struct",
484491 .debug_hint = work_debug_hint,
485492 .is_static_object = work_is_static_object,
....@@ -647,7 +654,7 @@
647654 * The following mb guarantees that previous clear of a PENDING bit
648655 * will not be reordered with any speculative LOADS or STORES from
649656 * work->current_func, which is executed afterwards. This possible
650
- * reordering can lead to a missed execution on attempt to qeueue
657
+ * reordering can lead to a missed execution on attempt to queue
651658 * the same @work. E.g. consider this case:
652659 *
653660 * CPU#0 CPU#1
....@@ -851,8 +858,17 @@
851858
852859 if (!worker->sleeping)
853860 return;
861
+
862
+ /*
863
+ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
864
+ * and the nr_running increment below, we may ruin the nr_running reset
865
+ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
866
+ * pool. Protect against such race.
867
+ */
868
+ preempt_disable();
854869 if (!(worker->flags & WORKER_NOT_RUNNING))
855870 atomic_inc(&worker->pool->nr_running);
871
+ preempt_enable();
856872 worker->sleeping = 0;
857873 }
858874
....@@ -861,7 +877,8 @@
861877 * @task: task going to sleep
862878 *
863879 * This function is called from schedule() when a busy worker is
864
- * going to sleep.
880
+ * going to sleep. Preemption needs to be disabled to protect ->sleeping
881
+ * assignment.
865882 */
866883 void wq_worker_sleeping(struct task_struct *task)
867884 {
....@@ -878,7 +895,8 @@
878895
879896 pool = worker->pool;
880897
881
- if (WARN_ON_ONCE(worker->sleeping))
898
+ /* Return if preempted before wq_worker_running() was reached */
899
+ if (worker->sleeping)
882900 return;
883901
884902 worker->sleeping = 1;
....@@ -906,12 +924,23 @@
906924
907925 /**
908926 * wq_worker_last_func - retrieve worker's last work function
927
+ * @task: Task to retrieve last work function of.
909928 *
910929 * Determine the last function a worker executed. This is called from
911930 * the scheduler to get a worker's last known identity.
912931 *
913932 * CONTEXT:
914
- * spin_lock_irq(rq->lock)
933
+ * raw_spin_lock_irq(rq->lock)
934
+ *
935
+ * This function is called during schedule() when a kworker is going
936
+ * to sleep. It's used by psi to identify aggregation workers during
937
+ * dequeuing, to allow periodic aggregation to shut-off when that
938
+ * worker is the last task in the system or cgroup to go to sleep.
939
+ *
940
+ * As this function doesn't involve any workqueue-related locking, it
941
+ * only returns stable values when called from inside the scheduler's
942
+ * queuing and dequeuing paths, when @task, which must be a kworker,
943
+ * is guaranteed to not be processing any works.
915944 *
916945 * Return:
917946 * The last work function %current executed as a worker, NULL if it
....@@ -1201,11 +1230,14 @@
12011230 * stable state - idle, on timer or on worklist.
12021231 *
12031232 * Return:
1233
+ *
1234
+ * ======== ================================================================
12041235 * 1 if @work was pending and we successfully stole PENDING
12051236 * 0 if @work was idle and we claimed PENDING
12061237 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
12071238 * -ENOENT if someone else is canceling @work, this state may persist
12081239 * for arbitrarily long
1240
+ * ======== ================================================================
12091241 *
12101242 * Note:
12111243 * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
....@@ -1313,6 +1345,9 @@
13131345 {
13141346 struct worker_pool *pool = pwq->pool;
13151347
1348
+ /* record the work call stack in order to print it in KASAN reports */
1349
+ kasan_record_aux_stack(work);
1350
+
13161351 /* we own @work, set data and link */
13171352 set_work_pwq(work, pwq, extra_flags);
13181353 list_add_tail(&work->entry, head);
....@@ -1339,7 +1374,7 @@
13391374
13401375 worker = current_wq_worker();
13411376 /*
1342
- * Return %true iff I'm a worker execuing a work item on @wq. If
1377
+ * Return %true iff I'm a worker executing a work item on @wq. If
13431378 * I'm @worker, it's safe to dereference it without locking.
13441379 */
13451380 return worker && worker->current_pwq->wq == wq;
....@@ -1513,14 +1548,96 @@
15131548 }
15141549 EXPORT_SYMBOL(queue_work_on);
15151550
1551
+/**
1552
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
1553
+ * @node: NUMA node ID that we want to select a CPU from
1554
+ *
1555
+ * This function will attempt to find a "random" cpu available on a given
1556
+ * node. If there are no CPUs available on the given node it will return
1557
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
1558
+ * available CPU if we need to schedule this work.
1559
+ */
1560
+static int workqueue_select_cpu_near(int node)
1561
+{
1562
+ int cpu;
1563
+
1564
+ /* No point in doing this if NUMA isn't enabled for workqueues */
1565
+ if (!wq_numa_enabled)
1566
+ return WORK_CPU_UNBOUND;
1567
+
1568
+ /* Delay binding to CPU if node is not valid or online */
1569
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1570
+ return WORK_CPU_UNBOUND;
1571
+
1572
+ /* Use local node/cpu if we are already there */
1573
+ cpu = raw_smp_processor_id();
1574
+ if (node == cpu_to_node(cpu))
1575
+ return cpu;
1576
+
1577
+ /* Use "random" otherwise know as "first" online CPU of node */
1578
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1579
+
1580
+ /* If CPU is valid return that, otherwise just defer */
1581
+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1582
+}
1583
+
1584
+/**
1585
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
1586
+ * @node: NUMA node that we are targeting the work for
1587
+ * @wq: workqueue to use
1588
+ * @work: work to queue
1589
+ *
1590
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
1591
+ * idea here is to provide a way to somehow associate work with a given
1592
+ * NUMA node.
1593
+ *
1594
+ * This function will only make a best effort attempt at getting this onto
1595
+ * the right NUMA node. If no node is requested or the requested node is
1596
+ * offline then we just fall back to standard queue_work behavior.
1597
+ *
1598
+ * Currently the "random" CPU ends up being the first available CPU in the
1599
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
1600
+ * are running on the node. In that case we just use the current CPU.
1601
+ *
1602
+ * Return: %false if @work was already on a queue, %true otherwise.
1603
+ */
1604
+bool queue_work_node(int node, struct workqueue_struct *wq,
1605
+ struct work_struct *work)
1606
+{
1607
+ unsigned long flags;
1608
+ bool ret = false;
1609
+
1610
+ /*
1611
+ * This current implementation is specific to unbound workqueues.
1612
+ * Specifically we only return the first available CPU for a given
1613
+ * node instead of cycling through individual CPUs within the node.
1614
+ *
1615
+ * If this is used with a per-cpu workqueue then the logic in
1616
+ * workqueue_select_cpu_near would need to be updated to allow for
1617
+ * some round robin type logic.
1618
+ */
1619
+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1620
+
1621
+ local_irq_save(flags);
1622
+
1623
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1624
+ int cpu = workqueue_select_cpu_near(node);
1625
+
1626
+ __queue_work(cpu, wq, work);
1627
+ ret = true;
1628
+ }
1629
+
1630
+ local_irq_restore(flags);
1631
+ return ret;
1632
+}
1633
+EXPORT_SYMBOL_GPL(queue_work_node);
1634
+
15161635 void delayed_work_timer_fn(struct timer_list *t)
15171636 {
15181637 struct delayed_work *dwork = from_timer(dwork, t, timer);
1519
- unsigned long flags;
15201638
1521
- local_irq_save(flags);
1639
+ /* should have been called from irqsafe timer with irq already off */
15221640 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1523
- local_irq_restore(flags);
15241641 }
15251642 EXPORT_SYMBOL(delayed_work_timer_fn);
15261643
....@@ -1531,9 +1648,14 @@
15311648 struct work_struct *work = &dwork->work;
15321649
15331650 WARN_ON_ONCE(!wq);
1534
-#ifndef CONFIG_CFI_CLANG
1535
- WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1536
-#endif
1651
+ /*
1652
+ * With CFI, timer->function can point to a jump table entry in a module,
1653
+ * which fails the comparison. Disable the warning if CFI and modules are
1654
+ * both enabled.
1655
+ */
1656
+ if (!IS_ENABLED(CONFIG_CFI_CLANG) || !IS_ENABLED(CONFIG_MODULES))
1657
+ WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1658
+
15371659 WARN_ON_ONCE(timer_pending(timer));
15381660 WARN_ON_ONCE(!list_empty(&work->entry));
15391661
....@@ -1644,7 +1766,7 @@
16441766 *
16451767 * Return: %false if @rwork was already pending, %true otherwise. Note
16461768 * that a full RCU grace period is guaranteed only after a %true return.
1647
- * While @rwork is guarnateed to be executed after a %false return, the
1769
+ * While @rwork is guaranteed to be executed after a %false return, the
16481770 * execution may happen before a full RCU grace period has passed.
16491771 */
16501772 bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
....@@ -1838,6 +1960,15 @@
18381960 goto fail;
18391961
18401962 set_user_nice(worker->task, pool->attrs->nice);
1963
+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO)) {
1964
+ struct sched_param param;
1965
+
1966
+ if (pool->attrs->nice == 0)
1967
+ param.sched_priority = MAX_RT_PRIO / 2 - 4;
1968
+ else
1969
+ param.sched_priority = MAX_RT_PRIO / 2 - 2;
1970
+ sched_setscheduler_nocheck(worker->task, SCHED_RR, &param);
1971
+ }
18411972 kthread_bind_mask(worker->task, pool->attrs->cpumask);
18421973
18431974 /* successful, attach the worker to the pool */
....@@ -2047,7 +2178,7 @@
20472178
20482179 pool->manager = NULL;
20492180 pool->flags &= ~POOL_MANAGER_ACTIVE;
2050
- swake_up_one(&wq_manager_wait);
2181
+ rcuwait_wake_up(&manager_wait);
20512182 return true;
20522183 }
20532184
....@@ -2177,13 +2308,13 @@
21772308 * While we must be careful to not use "work" after this, the trace
21782309 * point will only record its address.
21792310 */
2180
- trace_workqueue_execute_end(work);
2311
+ trace_workqueue_execute_end(work, worker->current_func);
21812312 lock_map_release(&lockdep_map);
21822313 lock_map_release(&pwq->wq->lockdep_map);
21832314
21842315 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
21852316 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2186
- " last function: %pf\n",
2317
+ " last function: %ps\n",
21872318 current->comm, preempt_count(), task_pid_nr(current),
21882319 worker->current_func);
21892320 debug_show_held_locks(current);
....@@ -2191,7 +2322,7 @@
21912322 }
21922323
21932324 /*
2194
- * The following prevents a kworker from hogging CPU on !PREEMPT
2325
+ * The following prevents a kworker from hogging CPU on !PREEMPTION
21952326 * kernels, where a requeueing work item waiting for something to
21962327 * happen could deadlock with stop_machine as such work item could
21972328 * indefinitely requeue itself while all other CPUs are trapped in
....@@ -2436,7 +2567,7 @@
24362567 * being used to relieve memory pressure, don't
24372568 * incur MAYDAY_INTERVAL delay inbetween.
24382569 */
2439
- if (need_to_create_worker(pool)) {
2570
+ if (pwq->nr_active && need_to_create_worker(pool)) {
24402571 raw_spin_lock(&wq_mayday_lock);
24412572 /*
24422573 * Queue iff we aren't racing destruction
....@@ -2508,11 +2639,11 @@
25082639 worker = current_wq_worker();
25092640
25102641 WARN_ONCE(current->flags & PF_MEMALLOC,
2511
- "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2642
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
25122643 current->pid, current->comm, target_wq->name, target_func);
25132644 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
25142645 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2515
- "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2646
+ "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
25162647 worker->current_pwq->wq->name, worker->current_func,
25172648 target_wq->name, target_func);
25182649 }
....@@ -2743,7 +2874,7 @@
27432874 * First flushers are responsible for cascading flushes and
27442875 * handling overflow. Non-first flushers can simply return.
27452876 */
2746
- if (wq->first_flusher != &this_flusher)
2877
+ if (READ_ONCE(wq->first_flusher) != &this_flusher)
27472878 return;
27482879
27492880 mutex_lock(&wq->mutex);
....@@ -2752,7 +2883,7 @@
27522883 if (wq->first_flusher != &this_flusher)
27532884 goto out_unlock;
27542885
2755
- wq->first_flusher = NULL;
2886
+ WRITE_ONCE(wq->first_flusher, NULL);
27562887
27572888 WARN_ON_ONCE(!list_empty(&this_flusher.list));
27582889 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
....@@ -2943,10 +3074,8 @@
29433074 if (WARN_ON(!work->func))
29443075 return false;
29453076
2946
- if (!from_cancel) {
2947
- lock_map_acquire(&work->lockdep_map);
2948
- lock_map_release(&work->lockdep_map);
2949
- }
3077
+ lock_map_acquire(&work->lockdep_map);
3078
+ lock_map_release(&work->lockdep_map);
29503079
29513080 if (start_flush_work(work, &barr, from_cancel)) {
29523081 wait_for_completion(&barr.done);
....@@ -3241,7 +3370,7 @@
32413370 *
32423371 * Undo alloc_workqueue_attrs().
32433372 */
3244
-static void free_workqueue_attrs(struct workqueue_attrs *attrs)
3373
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
32453374 {
32463375 if (attrs) {
32473376 free_cpumask_var(attrs->cpumask);
....@@ -3257,7 +3386,7 @@
32573386 *
32583387 * Return: The allocated new workqueue_attr on success. %NULL on failure.
32593388 */
3260
-static struct workqueue_attrs *alloc_workqueue_attrs(void)
3389
+struct workqueue_attrs *alloc_workqueue_attrs(void)
32613390 {
32623391 struct workqueue_attrs *attrs;
32633392
....@@ -3348,17 +3477,56 @@
33483477 return 0;
33493478 }
33503479
3480
+#ifdef CONFIG_LOCKDEP
3481
+static void wq_init_lockdep(struct workqueue_struct *wq)
3482
+{
3483
+ char *lock_name;
3484
+
3485
+ lockdep_register_key(&wq->key);
3486
+ lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3487
+ if (!lock_name)
3488
+ lock_name = wq->name;
3489
+
3490
+ wq->lock_name = lock_name;
3491
+ lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3492
+}
3493
+
3494
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3495
+{
3496
+ lockdep_unregister_key(&wq->key);
3497
+}
3498
+
3499
+static void wq_free_lockdep(struct workqueue_struct *wq)
3500
+{
3501
+ if (wq->lock_name != wq->name)
3502
+ kfree(wq->lock_name);
3503
+}
3504
+#else
3505
+static void wq_init_lockdep(struct workqueue_struct *wq)
3506
+{
3507
+}
3508
+
3509
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3510
+{
3511
+}
3512
+
3513
+static void wq_free_lockdep(struct workqueue_struct *wq)
3514
+{
3515
+}
3516
+#endif
3517
+
33513518 static void rcu_free_wq(struct rcu_head *rcu)
33523519 {
33533520 struct workqueue_struct *wq =
33543521 container_of(rcu, struct workqueue_struct, rcu);
3522
+
3523
+ wq_free_lockdep(wq);
33553524
33563525 if (!(wq->flags & WQ_UNBOUND))
33573526 free_percpu(wq->cpu_pwqs);
33583527 else
33593528 free_workqueue_attrs(wq->unbound_attrs);
33603529
3361
- kfree(wq->rescuer);
33623530 kfree(wq);
33633531 }
33643532
....@@ -3369,6 +3537,18 @@
33693537 ida_destroy(&pool->worker_ida);
33703538 free_workqueue_attrs(pool->attrs);
33713539 kfree(pool);
3540
+}
3541
+
3542
+/* This returns with the lock held on success (pool manager is inactive). */
3543
+static bool wq_manager_inactive(struct worker_pool *pool)
3544
+{
3545
+ raw_spin_lock_irq(&pool->lock);
3546
+
3547
+ if (pool->flags & POOL_MANAGER_ACTIVE) {
3548
+ raw_spin_unlock_irq(&pool->lock);
3549
+ return false;
3550
+ }
3551
+ return true;
33723552 }
33733553
33743554 /**
....@@ -3406,10 +3586,11 @@
34063586 * Become the manager and destroy all workers. This prevents
34073587 * @pool's workers from blocking on attach_mutex. We're the last
34083588 * manager and @pool gets freed with the flag set.
3589
+ * Because of how wq_manager_inactive() works, we will hold the
3590
+ * spinlock after a successful wait.
34093591 */
3410
- raw_spin_lock_irq(&pool->lock);
3411
- swait_event_lock_irq(wq_manager_wait,
3412
- !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3592
+ rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3593
+ TASK_UNINTERRUPTIBLE);
34133594 pool->flags |= POOL_MANAGER_ACTIVE;
34143595
34153596 while ((worker = first_idle_worker(pool)))
....@@ -3549,8 +3730,10 @@
35493730 * If we're the last pwq going away, @wq is already dead and no one
35503731 * is gonna access it anymore. Schedule RCU free.
35513732 */
3552
- if (is_last)
3733
+ if (is_last) {
3734
+ wq_unregister_lockdep(wq);
35533735 call_rcu(&wq->rcu, rcu_free_wq);
3736
+ }
35543737 }
35553738
35563739 /**
....@@ -3913,16 +4096,20 @@
39134096 *
39144097 * Performs GFP_KERNEL allocations.
39154098 *
4099
+ * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
4100
+ *
39164101 * Return: 0 on success and -errno on failure.
39174102 */
3918
-static int apply_workqueue_attrs(struct workqueue_struct *wq,
4103
+int apply_workqueue_attrs(struct workqueue_struct *wq,
39194104 const struct workqueue_attrs *attrs)
39204105 {
39214106 int ret;
39224107
3923
- apply_wqattrs_lock();
4108
+ lockdep_assert_cpus_held();
4109
+
4110
+ mutex_lock(&wq_pool_mutex);
39244111 ret = apply_workqueue_attrs_locked(wq, attrs);
3925
- apply_wqattrs_unlock();
4112
+ mutex_unlock(&wq_pool_mutex);
39264113
39274114 return ret;
39284115 }
....@@ -4035,16 +4222,21 @@
40354222 mutex_unlock(&wq->mutex);
40364223 }
40374224 return 0;
4038
- } else if (wq->flags & __WQ_ORDERED) {
4225
+ }
4226
+
4227
+ get_online_cpus();
4228
+ if (wq->flags & __WQ_ORDERED) {
40394229 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
40404230 /* there should only be single pwq for ordering guarantee */
40414231 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
40424232 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
40434233 "ordering guarantee broken for workqueue %s\n", wq->name);
4044
- return ret;
40454234 } else {
4046
- return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4235
+ ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
40474236 }
4237
+ put_online_cpus();
4238
+
4239
+ return ret;
40484240 }
40494241
40504242 static int wq_clamp_max_active(int max_active, unsigned int flags,
....@@ -4077,8 +4269,8 @@
40774269
40784270 rescuer->rescue_wq = wq;
40794271 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4080
- ret = PTR_ERR_OR_ZERO(rescuer->task);
4081
- if (ret) {
4272
+ if (IS_ERR(rescuer->task)) {
4273
+ ret = PTR_ERR(rescuer->task);
40824274 kfree(rescuer);
40834275 return ret;
40844276 }
....@@ -4090,11 +4282,10 @@
40904282 return 0;
40914283 }
40924284
4093
-struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4094
- unsigned int flags,
4095
- int max_active,
4096
- struct lock_class_key *key,
4097
- const char *lock_name, ...)
4285
+__printf(1, 4)
4286
+struct workqueue_struct *alloc_workqueue(const char *fmt,
4287
+ unsigned int flags,
4288
+ int max_active, ...)
40984289 {
40994290 size_t tbl_size = 0;
41004291 va_list args;
....@@ -4129,7 +4320,7 @@
41294320 goto err_free_wq;
41304321 }
41314322
4132
- va_start(args, lock_name);
4323
+ va_start(args, max_active);
41334324 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
41344325 va_end(args);
41354326
....@@ -4146,11 +4337,11 @@
41464337 INIT_LIST_HEAD(&wq->flusher_overflow);
41474338 INIT_LIST_HEAD(&wq->maydays);
41484339
4149
- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4340
+ wq_init_lockdep(wq);
41504341 INIT_LIST_HEAD(&wq->list);
41514342
41524343 if (alloc_and_link_pwqs(wq) < 0)
4153
- goto err_free_wq;
4344
+ goto err_unreg_lockdep;
41544345
41554346 if (wq_online && init_rescuer(wq) < 0)
41564347 goto err_destroy;
....@@ -4176,6 +4367,9 @@
41764367
41774368 return wq;
41784369
4370
+err_unreg_lockdep:
4371
+ wq_unregister_lockdep(wq);
4372
+ wq_free_lockdep(wq);
41794373 err_free_wq:
41804374 free_workqueue_attrs(wq->unbound_attrs);
41814375 kfree(wq);
....@@ -4184,7 +4378,23 @@
41844378 destroy_workqueue(wq);
41854379 return NULL;
41864380 }
4187
-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4381
+EXPORT_SYMBOL_GPL(alloc_workqueue);
4382
+
4383
+static bool pwq_busy(struct pool_workqueue *pwq)
4384
+{
4385
+ int i;
4386
+
4387
+ for (i = 0; i < WORK_NR_COLORS; i++)
4388
+ if (pwq->nr_in_flight[i])
4389
+ return true;
4390
+
4391
+ if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4392
+ return true;
4393
+ if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4394
+ return true;
4395
+
4396
+ return false;
4397
+}
41884398
41894399 /**
41904400 * destroy_workqueue - safely terminate a workqueue
....@@ -4220,26 +4430,25 @@
42204430 kfree(rescuer);
42214431 }
42224432
4223
- /* sanity checks */
4433
+ /*
4434
+ * Sanity checks - grab all the locks so that we wait for all
4435
+ * in-flight operations which may do put_pwq().
4436
+ */
4437
+ mutex_lock(&wq_pool_mutex);
42244438 mutex_lock(&wq->mutex);
42254439 for_each_pwq(pwq, wq) {
4226
- int i;
4227
-
4228
- for (i = 0; i < WORK_NR_COLORS; i++) {
4229
- if (WARN_ON(pwq->nr_in_flight[i])) {
4230
- mutex_unlock(&wq->mutex);
4231
- show_workqueue_state();
4232
- return;
4233
- }
4234
- }
4235
-
4236
- if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4237
- WARN_ON(pwq->nr_active) ||
4238
- WARN_ON(!list_empty(&pwq->delayed_works))) {
4440
+ raw_spin_lock_irq(&pwq->pool->lock);
4441
+ if (WARN_ON(pwq_busy(pwq))) {
4442
+ pr_warn("%s: %s has the following busy pwq\n",
4443
+ __func__, wq->name);
4444
+ show_pwq(pwq);
4445
+ raw_spin_unlock_irq(&pwq->pool->lock);
42394446 mutex_unlock(&wq->mutex);
4447
+ mutex_unlock(&wq_pool_mutex);
42404448 show_workqueue_state();
42414449 return;
42424450 }
4451
+ raw_spin_unlock_irq(&pwq->pool->lock);
42434452 }
42444453 mutex_unlock(&wq->mutex);
42454454
....@@ -4247,11 +4456,11 @@
42474456 * wq list is used to freeze wq, remove from list after
42484457 * flushing is complete in case freeze races us.
42494458 */
4250
- mutex_lock(&wq_pool_mutex);
42514459 list_del_rcu(&wq->list);
42524460 mutex_unlock(&wq_pool_mutex);
42534461
42544462 if (!(wq->flags & WQ_UNBOUND)) {
4463
+ wq_unregister_lockdep(wq);
42554464 /*
42564465 * The base ref is never dropped on per-cpu pwqs. Directly
42574466 * schedule RCU free.
....@@ -4477,14 +4686,14 @@
44774686 * Carefully copy the associated workqueue's workfn, name and desc.
44784687 * Keep the original last '\0' in case the original is garbage.
44794688 */
4480
- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4481
- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4482
- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4483
- probe_kernel_read(name, wq->name, sizeof(name) - 1);
4484
- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4689
+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4690
+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4691
+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4692
+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4693
+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
44854694
44864695 if (fn || name[0] || desc[0]) {
4487
- printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4696
+ printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
44884697 if (strcmp(name, desc))
44894698 pr_cont(" (%s)", desc);
44904699 pr_cont("\n");
....@@ -4509,7 +4718,7 @@
45094718 pr_cont("%s BAR(%d)", comma ? "," : "",
45104719 task_pid_nr(barr->task));
45114720 } else {
4512
- pr_cont("%s %pf", comma ? "," : "", work->func);
4721
+ pr_cont("%s %ps", comma ? "," : "", work->func);
45134722 }
45144723 }
45154724
....@@ -4542,9 +4751,9 @@
45424751 if (worker->current_pwq != pwq)
45434752 continue;
45444753
4545
- pr_cont("%s %d%s:%pf", comma ? "," : "",
4754
+ pr_cont("%s %d%s:%ps", comma ? "," : "",
45464755 task_pid_nr(worker->task),
4547
- worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4756
+ worker->rescue_wq ? "(RESCUER)" : "",
45484757 worker->current_func);
45494758 list_for_each_entry(work, &worker->scheduled, entry)
45504759 pr_cont_work(false, work);
....@@ -4704,6 +4913,7 @@
47044913
47054914 mutex_unlock(&wq_pool_attach_mutex);
47064915 }
4916
+EXPORT_SYMBOL_GPL(wq_worker_comm);
47074917
47084918 #ifdef CONFIG_SMP
47094919
....@@ -4827,7 +5037,7 @@
48275037 *
48285038 * WRITE_ONCE() is necessary because @worker->flags may be
48295039 * tested without holding any lock in
4830
- * wq_worker_waking_up(). Without it, NOT_RUNNING test may
5040
+ * wq_worker_running(). Without it, NOT_RUNNING test may
48315041 * fail incorrectly leading to premature concurrency
48325042 * management operations.
48335043 */
....@@ -5642,6 +5852,7 @@
56425852 pr_cont_pool_info(pool);
56435853 pr_cont(" stuck for %us!\n",
56445854 jiffies_to_msecs(now - pool_ts) / 1000);
5855
+ trace_android_vh_wq_lockup_pool(pool->cpu, pool_ts);
56455856 }
56465857 }
56475858
....@@ -5725,6 +5936,13 @@
57255936 return;
57265937 }
57275938
5939
+ for_each_possible_cpu(cpu) {
5940
+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
5941
+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5942
+ return;
5943
+ }
5944
+ }
5945
+
57285946 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
57295947 BUG_ON(!wq_update_unbound_numa_attrs_buf);
57305948
....@@ -5742,11 +5960,6 @@
57425960
57435961 for_each_possible_cpu(cpu) {
57445962 node = cpu_to_node(cpu);
5745
- if (WARN_ON(node == NUMA_NO_NODE)) {
5746
- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5747
- /* happens iff arch is bonkers, let's just proceed */
5748
- return;
5749
- }
57505963 cpumask_set_cpu(cpu, tbl[node]);
57515964 }
57525965
....@@ -5764,13 +5977,13 @@
57645977 * items. Actual work item execution starts only after kthreads can be
57655978 * created and scheduled right before early initcalls.
57665979 */
5767
-int __init workqueue_init_early(void)
5980
+void __init workqueue_init_early(void)
57685981 {
57695982 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
57705983 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
57715984 int i, cpu;
57725985
5773
- WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5986
+ BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
57745987
57755988 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
57765989 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
....@@ -5831,8 +6044,6 @@
58316044 !system_unbound_wq || !system_freezable_wq ||
58326045 !system_power_efficient_wq ||
58336046 !system_freezable_power_efficient_wq);
5834
-
5835
- return 0;
58366047 }
58376048
58386049 /**
....@@ -5844,7 +6055,7 @@
58446055 * are no kworkers executing the work items yet. Populate the worker pools
58456056 * with the initial workers and enable future kworker creations.
58466057 */
5847
-int __init workqueue_init(void)
6058
+void __init workqueue_init(void)
58486059 {
58496060 struct workqueue_struct *wq;
58506061 struct worker_pool *pool;
....@@ -5891,6 +6102,4 @@
58916102
58926103 wq_online = true;
58936104 wq_watchdog_init();
5894
-
5895
- return 0;
58966105 }