hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/kernel/workqueue.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * kernel/workqueue.c - generic async execution with shared worker pool
34 *
....@@ -50,8 +51,13 @@
5051 #include <linux/sched/isolation.h>
5152 #include <linux/nmi.h>
5253 #include <linux/kvm_para.h>
54
+#include <uapi/linux/sched/types.h>
5355
5456 #include "workqueue_internal.h"
57
+
58
+#include <trace/hooks/wqlockup.h>
59
+/* events/workqueue.h uses default TRACE_INCLUDE_PATH */
60
+#undef TRACE_INCLUDE_PATH
5561
5662 enum {
5763 /*
....@@ -133,7 +139,7 @@
133139 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
134140 *
135141 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
136
- * sched-RCU for reads.
142
+ * RCU for reads.
137143 *
138144 * WQ: wq->mutex protected.
139145 *
....@@ -248,7 +254,7 @@
248254 struct list_head flusher_overflow; /* WQ: flush overflow list */
249255
250256 struct list_head maydays; /* MD: pwqs requesting rescue */
251
- struct worker *rescuer; /* I: rescue worker */
257
+ struct worker *rescuer; /* MD: rescue worker */
252258
253259 int nr_drainers; /* WQ: drain in progress */
254260 int saved_max_active; /* WQ: saved pwq max_active */
....@@ -260,13 +266,15 @@
260266 struct wq_device *wq_dev; /* I: for sysfs interface */
261267 #endif
262268 #ifdef CONFIG_LOCKDEP
269
+ char *lock_name;
270
+ struct lock_class_key key;
263271 struct lockdep_map lockdep_map;
264272 #endif
265273 char name[WQ_NAME_LEN]; /* I: workqueue name */
266274
267275 /*
268
- * Destruction of workqueue_struct is sched-RCU protected to allow
269
- * walking the workqueues list without grabbing wq_pool_mutex.
276
+ * Destruction of workqueue_struct is RCU protected to allow walking
277
+ * the workqueues list without grabbing wq_pool_mutex.
270278 * This is used to dump all workqueues from sysrq.
271279 */
272280 struct rcu_head rcu;
....@@ -299,7 +307,8 @@
299307 static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
300308 static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
301309 static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302
-static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
310
+/* wait for manager to go away */
311
+static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
303312
304313 static LIST_HEAD(workqueues); /* PR: list of all workqueues */
305314 static bool workqueue_freezing; /* PL: have wqs started freezing? */
....@@ -353,19 +362,18 @@
353362
354363 static int worker_thread(void *__worker);
355364 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
365
+static void show_pwq(struct pool_workqueue *pwq);
356366
357367 #define CREATE_TRACE_POINTS
358368 #include <trace/events/workqueue.h>
369
+
370
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_start);
371
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_end);
359372
360373 #define assert_rcu_or_pool_mutex() \
361374 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
362375 !lockdep_is_held(&wq_pool_mutex), \
363376 "RCU or wq_pool_mutex should be held")
364
-
365
-#define assert_rcu_or_wq_mutex(wq) \
366
- RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
367
- !lockdep_is_held(&wq->mutex), \
368
- "RCU or wq->mutex should be held")
369377
370378 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
371379 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
....@@ -423,13 +431,12 @@
423431 * ignored.
424432 */
425433 #define for_each_pwq(pwq, wq) \
426
- list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
427
- if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
428
- else
434
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
435
+ lockdep_is_held(&(wq->mutex)))
429436
430437 #ifdef CONFIG_DEBUG_OBJECTS_WORK
431438
432
-static struct debug_obj_descr work_debug_descr;
439
+static const struct debug_obj_descr work_debug_descr;
433440
434441 static void *work_debug_hint(void *addr)
435442 {
....@@ -479,7 +486,7 @@
479486 }
480487 }
481488
482
-static struct debug_obj_descr work_debug_descr = {
489
+static const struct debug_obj_descr work_debug_descr = {
483490 .name = "work_struct",
484491 .debug_hint = work_debug_hint,
485492 .is_static_object = work_is_static_object,
....@@ -647,7 +654,7 @@
647654 * The following mb guarantees that previous clear of a PENDING bit
648655 * will not be reordered with any speculative LOADS or STORES from
649656 * work->current_func, which is executed afterwards. This possible
650
- * reordering can lead to a missed execution on attempt to qeueue
657
+ * reordering can lead to a missed execution on attempt to queue
651658 * the same @work. E.g. consider this case:
652659 *
653660 * CPU#0 CPU#1
....@@ -680,12 +687,17 @@
680687 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
681688 }
682689
690
+static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
691
+{
692
+ return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
693
+}
694
+
683695 static struct pool_workqueue *get_work_pwq(struct work_struct *work)
684696 {
685697 unsigned long data = atomic_long_read(&work->data);
686698
687699 if (data & WORK_STRUCT_PWQ)
688
- return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
700
+ return work_struct_pwq(data);
689701 else
690702 return NULL;
691703 }
....@@ -713,8 +725,7 @@
713725 assert_rcu_or_pool_mutex();
714726
715727 if (data & WORK_STRUCT_PWQ)
716
- return ((struct pool_workqueue *)
717
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
728
+ return work_struct_pwq(data)->pool;
718729
719730 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
720731 if (pool_id == WORK_OFFQ_POOL_NONE)
....@@ -735,8 +746,7 @@
735746 unsigned long data = atomic_long_read(&work->data);
736747
737748 if (data & WORK_STRUCT_PWQ)
738
- return ((struct pool_workqueue *)
739
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
749
+ return work_struct_pwq(data)->pool->id;
740750
741751 return data >> WORK_OFFQ_POOL_SHIFT;
742752 }
....@@ -851,8 +861,17 @@
851861
852862 if (!worker->sleeping)
853863 return;
864
+
865
+ /*
866
+ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
867
+ * and the nr_running increment below, we may ruin the nr_running reset
868
+ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
869
+ * pool. Protect against such race.
870
+ */
871
+ preempt_disable();
854872 if (!(worker->flags & WORKER_NOT_RUNNING))
855873 atomic_inc(&worker->pool->nr_running);
874
+ preempt_enable();
856875 worker->sleeping = 0;
857876 }
858877
....@@ -861,7 +880,8 @@
861880 * @task: task going to sleep
862881 *
863882 * This function is called from schedule() when a busy worker is
864
- * going to sleep.
883
+ * going to sleep. Preemption needs to be disabled to protect ->sleeping
884
+ * assignment.
865885 */
866886 void wq_worker_sleeping(struct task_struct *task)
867887 {
....@@ -878,7 +898,8 @@
878898
879899 pool = worker->pool;
880900
881
- if (WARN_ON_ONCE(worker->sleeping))
901
+ /* Return if preempted before wq_worker_running() was reached */
902
+ if (worker->sleeping)
882903 return;
883904
884905 worker->sleeping = 1;
....@@ -906,12 +927,23 @@
906927
907928 /**
908929 * wq_worker_last_func - retrieve worker's last work function
930
+ * @task: Task to retrieve last work function of.
909931 *
910932 * Determine the last function a worker executed. This is called from
911933 * the scheduler to get a worker's last known identity.
912934 *
913935 * CONTEXT:
914
- * spin_lock_irq(rq->lock)
936
+ * raw_spin_lock_irq(rq->lock)
937
+ *
938
+ * This function is called during schedule() when a kworker is going
939
+ * to sleep. It's used by psi to identify aggregation workers during
940
+ * dequeuing, to allow periodic aggregation to shut-off when that
941
+ * worker is the last task in the system or cgroup to go to sleep.
942
+ *
943
+ * As this function doesn't involve any workqueue-related locking, it
944
+ * only returns stable values when called from inside the scheduler's
945
+ * queuing and dequeuing paths, when @task, which must be a kworker,
946
+ * is guaranteed to not be processing any works.
915947 *
916948 * Return:
917949 * The last work function %current executed as a worker, NULL if it
....@@ -1201,11 +1233,14 @@
12011233 * stable state - idle, on timer or on worklist.
12021234 *
12031235 * Return:
1236
+ *
1237
+ * ======== ================================================================
12041238 * 1 if @work was pending and we successfully stole PENDING
12051239 * 0 if @work was idle and we claimed PENDING
12061240 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
12071241 * -ENOENT if someone else is canceling @work, this state may persist
12081242 * for arbitrarily long
1243
+ * ======== ================================================================
12091244 *
12101245 * Note:
12111246 * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
....@@ -1313,6 +1348,9 @@
13131348 {
13141349 struct worker_pool *pool = pwq->pool;
13151350
1351
+ /* record the work call stack in order to print it in KASAN reports */
1352
+ kasan_record_aux_stack(work);
1353
+
13161354 /* we own @work, set data and link */
13171355 set_work_pwq(work, pwq, extra_flags);
13181356 list_add_tail(&work->entry, head);
....@@ -1339,7 +1377,7 @@
13391377
13401378 worker = current_wq_worker();
13411379 /*
1342
- * Return %true iff I'm a worker execuing a work item on @wq. If
1380
+ * Return %true iff I'm a worker executing a work item on @wq. If
13431381 * I'm @worker, it's safe to dereference it without locking.
13441382 */
13451383 return worker && worker->current_pwq->wq == wq;
....@@ -1513,14 +1551,96 @@
15131551 }
15141552 EXPORT_SYMBOL(queue_work_on);
15151553
1554
+/**
1555
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
1556
+ * @node: NUMA node ID that we want to select a CPU from
1557
+ *
1558
+ * This function will attempt to find a "random" cpu available on a given
1559
+ * node. If there are no CPUs available on the given node it will return
1560
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
1561
+ * available CPU if we need to schedule this work.
1562
+ */
1563
+static int workqueue_select_cpu_near(int node)
1564
+{
1565
+ int cpu;
1566
+
1567
+ /* No point in doing this if NUMA isn't enabled for workqueues */
1568
+ if (!wq_numa_enabled)
1569
+ return WORK_CPU_UNBOUND;
1570
+
1571
+ /* Delay binding to CPU if node is not valid or online */
1572
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1573
+ return WORK_CPU_UNBOUND;
1574
+
1575
+ /* Use local node/cpu if we are already there */
1576
+ cpu = raw_smp_processor_id();
1577
+ if (node == cpu_to_node(cpu))
1578
+ return cpu;
1579
+
1580
+ /* Use "random" otherwise know as "first" online CPU of node */
1581
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1582
+
1583
+ /* If CPU is valid return that, otherwise just defer */
1584
+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1585
+}
1586
+
1587
+/**
1588
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
1589
+ * @node: NUMA node that we are targeting the work for
1590
+ * @wq: workqueue to use
1591
+ * @work: work to queue
1592
+ *
1593
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
1594
+ * idea here is to provide a way to somehow associate work with a given
1595
+ * NUMA node.
1596
+ *
1597
+ * This function will only make a best effort attempt at getting this onto
1598
+ * the right NUMA node. If no node is requested or the requested node is
1599
+ * offline then we just fall back to standard queue_work behavior.
1600
+ *
1601
+ * Currently the "random" CPU ends up being the first available CPU in the
1602
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
1603
+ * are running on the node. In that case we just use the current CPU.
1604
+ *
1605
+ * Return: %false if @work was already on a queue, %true otherwise.
1606
+ */
1607
+bool queue_work_node(int node, struct workqueue_struct *wq,
1608
+ struct work_struct *work)
1609
+{
1610
+ unsigned long flags;
1611
+ bool ret = false;
1612
+
1613
+ /*
1614
+ * This current implementation is specific to unbound workqueues.
1615
+ * Specifically we only return the first available CPU for a given
1616
+ * node instead of cycling through individual CPUs within the node.
1617
+ *
1618
+ * If this is used with a per-cpu workqueue then the logic in
1619
+ * workqueue_select_cpu_near would need to be updated to allow for
1620
+ * some round robin type logic.
1621
+ */
1622
+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1623
+
1624
+ local_irq_save(flags);
1625
+
1626
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1627
+ int cpu = workqueue_select_cpu_near(node);
1628
+
1629
+ __queue_work(cpu, wq, work);
1630
+ ret = true;
1631
+ }
1632
+
1633
+ local_irq_restore(flags);
1634
+ return ret;
1635
+}
1636
+EXPORT_SYMBOL_GPL(queue_work_node);
1637
+
15161638 void delayed_work_timer_fn(struct timer_list *t)
15171639 {
15181640 struct delayed_work *dwork = from_timer(dwork, t, timer);
1519
- unsigned long flags;
15201641
1521
- local_irq_save(flags);
1642
+ /* should have been called from irqsafe timer with irq already off */
15221643 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1523
- local_irq_restore(flags);
15241644 }
15251645 EXPORT_SYMBOL(delayed_work_timer_fn);
15261646
....@@ -1531,9 +1651,14 @@
15311651 struct work_struct *work = &dwork->work;
15321652
15331653 WARN_ON_ONCE(!wq);
1534
-#ifndef CONFIG_CFI_CLANG
1535
- WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1536
-#endif
1654
+ /*
1655
+ * With CFI, timer->function can point to a jump table entry in a module,
1656
+ * which fails the comparison. Disable the warning if CFI and modules are
1657
+ * both enabled.
1658
+ */
1659
+ if (!IS_ENABLED(CONFIG_CFI_CLANG) || !IS_ENABLED(CONFIG_MODULES))
1660
+ WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1661
+
15371662 WARN_ON_ONCE(timer_pending(timer));
15381663 WARN_ON_ONCE(!list_empty(&work->entry));
15391664
....@@ -1644,7 +1769,7 @@
16441769 *
16451770 * Return: %false if @rwork was already pending, %true otherwise. Note
16461771 * that a full RCU grace period is guaranteed only after a %true return.
1647
- * While @rwork is guarnateed to be executed after a %false return, the
1772
+ * While @rwork is guaranteed to be executed after a %false return, the
16481773 * execution may happen before a full RCU grace period has passed.
16491774 */
16501775 bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
....@@ -1838,6 +1963,15 @@
18381963 goto fail;
18391964
18401965 set_user_nice(worker->task, pool->attrs->nice);
1966
+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO)) {
1967
+ struct sched_param param;
1968
+
1969
+ if (pool->attrs->nice == 0)
1970
+ param.sched_priority = MAX_RT_PRIO / 2 - 4;
1971
+ else
1972
+ param.sched_priority = MAX_RT_PRIO / 2 - 2;
1973
+ sched_setscheduler_nocheck(worker->task, SCHED_RR, &param);
1974
+ }
18411975 kthread_bind_mask(worker->task, pool->attrs->cpumask);
18421976
18431977 /* successful, attach the worker to the pool */
....@@ -2047,7 +2181,7 @@
20472181
20482182 pool->manager = NULL;
20492183 pool->flags &= ~POOL_MANAGER_ACTIVE;
2050
- swake_up_one(&wq_manager_wait);
2184
+ rcuwait_wake_up(&manager_wait);
20512185 return true;
20522186 }
20532187
....@@ -2177,13 +2311,13 @@
21772311 * While we must be careful to not use "work" after this, the trace
21782312 * point will only record its address.
21792313 */
2180
- trace_workqueue_execute_end(work);
2314
+ trace_workqueue_execute_end(work, worker->current_func);
21812315 lock_map_release(&lockdep_map);
21822316 lock_map_release(&pwq->wq->lockdep_map);
21832317
21842318 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
21852319 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2186
- " last function: %pf\n",
2320
+ " last function: %ps\n",
21872321 current->comm, preempt_count(), task_pid_nr(current),
21882322 worker->current_func);
21892323 debug_show_held_locks(current);
....@@ -2191,7 +2325,7 @@
21912325 }
21922326
21932327 /*
2194
- * The following prevents a kworker from hogging CPU on !PREEMPT
2328
+ * The following prevents a kworker from hogging CPU on !PREEMPTION
21952329 * kernels, where a requeueing work item waiting for something to
21962330 * happen could deadlock with stop_machine as such work item could
21972331 * indefinitely requeue itself while all other CPUs are trapped in
....@@ -2436,7 +2570,7 @@
24362570 * being used to relieve memory pressure, don't
24372571 * incur MAYDAY_INTERVAL delay inbetween.
24382572 */
2439
- if (need_to_create_worker(pool)) {
2573
+ if (pwq->nr_active && need_to_create_worker(pool)) {
24402574 raw_spin_lock(&wq_mayday_lock);
24412575 /*
24422576 * Queue iff we aren't racing destruction
....@@ -2508,11 +2642,11 @@
25082642 worker = current_wq_worker();
25092643
25102644 WARN_ONCE(current->flags & PF_MEMALLOC,
2511
- "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2645
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
25122646 current->pid, current->comm, target_wq->name, target_func);
25132647 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
25142648 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2515
- "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2649
+ "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
25162650 worker->current_pwq->wq->name, worker->current_func,
25172651 target_wq->name, target_func);
25182652 }
....@@ -2743,7 +2877,7 @@
27432877 * First flushers are responsible for cascading flushes and
27442878 * handling overflow. Non-first flushers can simply return.
27452879 */
2746
- if (wq->first_flusher != &this_flusher)
2880
+ if (READ_ONCE(wq->first_flusher) != &this_flusher)
27472881 return;
27482882
27492883 mutex_lock(&wq->mutex);
....@@ -2752,7 +2886,7 @@
27522886 if (wq->first_flusher != &this_flusher)
27532887 goto out_unlock;
27542888
2755
- wq->first_flusher = NULL;
2889
+ WRITE_ONCE(wq->first_flusher, NULL);
27562890
27572891 WARN_ON_ONCE(!list_empty(&this_flusher.list));
27582892 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
....@@ -2943,10 +3077,8 @@
29433077 if (WARN_ON(!work->func))
29443078 return false;
29453079
2946
- if (!from_cancel) {
2947
- lock_map_acquire(&work->lockdep_map);
2948
- lock_map_release(&work->lockdep_map);
2949
- }
3080
+ lock_map_acquire(&work->lockdep_map);
3081
+ lock_map_release(&work->lockdep_map);
29503082
29513083 if (start_flush_work(work, &barr, from_cancel)) {
29523084 wait_for_completion(&barr.done);
....@@ -3241,7 +3373,7 @@
32413373 *
32423374 * Undo alloc_workqueue_attrs().
32433375 */
3244
-static void free_workqueue_attrs(struct workqueue_attrs *attrs)
3376
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
32453377 {
32463378 if (attrs) {
32473379 free_cpumask_var(attrs->cpumask);
....@@ -3257,7 +3389,7 @@
32573389 *
32583390 * Return: The allocated new workqueue_attr on success. %NULL on failure.
32593391 */
3260
-static struct workqueue_attrs *alloc_workqueue_attrs(void)
3392
+struct workqueue_attrs *alloc_workqueue_attrs(void)
32613393 {
32623394 struct workqueue_attrs *attrs;
32633395
....@@ -3348,17 +3480,56 @@
33483480 return 0;
33493481 }
33503482
3483
+#ifdef CONFIG_LOCKDEP
3484
+static void wq_init_lockdep(struct workqueue_struct *wq)
3485
+{
3486
+ char *lock_name;
3487
+
3488
+ lockdep_register_key(&wq->key);
3489
+ lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3490
+ if (!lock_name)
3491
+ lock_name = wq->name;
3492
+
3493
+ wq->lock_name = lock_name;
3494
+ lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3495
+}
3496
+
3497
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3498
+{
3499
+ lockdep_unregister_key(&wq->key);
3500
+}
3501
+
3502
+static void wq_free_lockdep(struct workqueue_struct *wq)
3503
+{
3504
+ if (wq->lock_name != wq->name)
3505
+ kfree(wq->lock_name);
3506
+}
3507
+#else
3508
+static void wq_init_lockdep(struct workqueue_struct *wq)
3509
+{
3510
+}
3511
+
3512
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3513
+{
3514
+}
3515
+
3516
+static void wq_free_lockdep(struct workqueue_struct *wq)
3517
+{
3518
+}
3519
+#endif
3520
+
33513521 static void rcu_free_wq(struct rcu_head *rcu)
33523522 {
33533523 struct workqueue_struct *wq =
33543524 container_of(rcu, struct workqueue_struct, rcu);
3525
+
3526
+ wq_free_lockdep(wq);
33553527
33563528 if (!(wq->flags & WQ_UNBOUND))
33573529 free_percpu(wq->cpu_pwqs);
33583530 else
33593531 free_workqueue_attrs(wq->unbound_attrs);
33603532
3361
- kfree(wq->rescuer);
33623533 kfree(wq);
33633534 }
33643535
....@@ -3369,6 +3540,18 @@
33693540 ida_destroy(&pool->worker_ida);
33703541 free_workqueue_attrs(pool->attrs);
33713542 kfree(pool);
3543
+}
3544
+
3545
+/* This returns with the lock held on success (pool manager is inactive). */
3546
+static bool wq_manager_inactive(struct worker_pool *pool)
3547
+{
3548
+ raw_spin_lock_irq(&pool->lock);
3549
+
3550
+ if (pool->flags & POOL_MANAGER_ACTIVE) {
3551
+ raw_spin_unlock_irq(&pool->lock);
3552
+ return false;
3553
+ }
3554
+ return true;
33723555 }
33733556
33743557 /**
....@@ -3406,10 +3589,11 @@
34063589 * Become the manager and destroy all workers. This prevents
34073590 * @pool's workers from blocking on attach_mutex. We're the last
34083591 * manager and @pool gets freed with the flag set.
3592
+ * Because of how wq_manager_inactive() works, we will hold the
3593
+ * spinlock after a successful wait.
34093594 */
3410
- raw_spin_lock_irq(&pool->lock);
3411
- swait_event_lock_irq(wq_manager_wait,
3412
- !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3595
+ rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3596
+ TASK_UNINTERRUPTIBLE);
34133597 pool->flags |= POOL_MANAGER_ACTIVE;
34143598
34153599 while ((worker = first_idle_worker(pool)))
....@@ -3549,8 +3733,10 @@
35493733 * If we're the last pwq going away, @wq is already dead and no one
35503734 * is gonna access it anymore. Schedule RCU free.
35513735 */
3552
- if (is_last)
3736
+ if (is_last) {
3737
+ wq_unregister_lockdep(wq);
35533738 call_rcu(&wq->rcu, rcu_free_wq);
3739
+ }
35543740 }
35553741
35563742 /**
....@@ -3913,16 +4099,20 @@
39134099 *
39144100 * Performs GFP_KERNEL allocations.
39154101 *
4102
+ * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
4103
+ *
39164104 * Return: 0 on success and -errno on failure.
39174105 */
3918
-static int apply_workqueue_attrs(struct workqueue_struct *wq,
4106
+int apply_workqueue_attrs(struct workqueue_struct *wq,
39194107 const struct workqueue_attrs *attrs)
39204108 {
39214109 int ret;
39224110
3923
- apply_wqattrs_lock();
4111
+ lockdep_assert_cpus_held();
4112
+
4113
+ mutex_lock(&wq_pool_mutex);
39244114 ret = apply_workqueue_attrs_locked(wq, attrs);
3925
- apply_wqattrs_unlock();
4115
+ mutex_unlock(&wq_pool_mutex);
39264116
39274117 return ret;
39284118 }
....@@ -4035,16 +4225,21 @@
40354225 mutex_unlock(&wq->mutex);
40364226 }
40374227 return 0;
4038
- } else if (wq->flags & __WQ_ORDERED) {
4228
+ }
4229
+
4230
+ get_online_cpus();
4231
+ if (wq->flags & __WQ_ORDERED) {
40394232 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
40404233 /* there should only be single pwq for ordering guarantee */
40414234 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
40424235 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
40434236 "ordering guarantee broken for workqueue %s\n", wq->name);
4044
- return ret;
40454237 } else {
4046
- return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4238
+ ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
40474239 }
4240
+ put_online_cpus();
4241
+
4242
+ return ret;
40484243 }
40494244
40504245 static int wq_clamp_max_active(int max_active, unsigned int flags,
....@@ -4077,8 +4272,8 @@
40774272
40784273 rescuer->rescue_wq = wq;
40794274 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4080
- ret = PTR_ERR_OR_ZERO(rescuer->task);
4081
- if (ret) {
4275
+ if (IS_ERR(rescuer->task)) {
4276
+ ret = PTR_ERR(rescuer->task);
40824277 kfree(rescuer);
40834278 return ret;
40844279 }
....@@ -4090,11 +4285,10 @@
40904285 return 0;
40914286 }
40924287
4093
-struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4094
- unsigned int flags,
4095
- int max_active,
4096
- struct lock_class_key *key,
4097
- const char *lock_name, ...)
4288
+__printf(1, 4)
4289
+struct workqueue_struct *alloc_workqueue(const char *fmt,
4290
+ unsigned int flags,
4291
+ int max_active, ...)
40984292 {
40994293 size_t tbl_size = 0;
41004294 va_list args;
....@@ -4129,7 +4323,7 @@
41294323 goto err_free_wq;
41304324 }
41314325
4132
- va_start(args, lock_name);
4326
+ va_start(args, max_active);
41334327 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
41344328 va_end(args);
41354329
....@@ -4146,11 +4340,11 @@
41464340 INIT_LIST_HEAD(&wq->flusher_overflow);
41474341 INIT_LIST_HEAD(&wq->maydays);
41484342
4149
- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4343
+ wq_init_lockdep(wq);
41504344 INIT_LIST_HEAD(&wq->list);
41514345
41524346 if (alloc_and_link_pwqs(wq) < 0)
4153
- goto err_free_wq;
4347
+ goto err_unreg_lockdep;
41544348
41554349 if (wq_online && init_rescuer(wq) < 0)
41564350 goto err_destroy;
....@@ -4176,6 +4370,9 @@
41764370
41774371 return wq;
41784372
4373
+err_unreg_lockdep:
4374
+ wq_unregister_lockdep(wq);
4375
+ wq_free_lockdep(wq);
41794376 err_free_wq:
41804377 free_workqueue_attrs(wq->unbound_attrs);
41814378 kfree(wq);
....@@ -4184,7 +4381,23 @@
41844381 destroy_workqueue(wq);
41854382 return NULL;
41864383 }
4187
-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4384
+EXPORT_SYMBOL_GPL(alloc_workqueue);
4385
+
4386
+static bool pwq_busy(struct pool_workqueue *pwq)
4387
+{
4388
+ int i;
4389
+
4390
+ for (i = 0; i < WORK_NR_COLORS; i++)
4391
+ if (pwq->nr_in_flight[i])
4392
+ return true;
4393
+
4394
+ if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4395
+ return true;
4396
+ if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4397
+ return true;
4398
+
4399
+ return false;
4400
+}
41884401
41894402 /**
41904403 * destroy_workqueue - safely terminate a workqueue
....@@ -4220,26 +4433,25 @@
42204433 kfree(rescuer);
42214434 }
42224435
4223
- /* sanity checks */
4436
+ /*
4437
+ * Sanity checks - grab all the locks so that we wait for all
4438
+ * in-flight operations which may do put_pwq().
4439
+ */
4440
+ mutex_lock(&wq_pool_mutex);
42244441 mutex_lock(&wq->mutex);
42254442 for_each_pwq(pwq, wq) {
4226
- int i;
4227
-
4228
- for (i = 0; i < WORK_NR_COLORS; i++) {
4229
- if (WARN_ON(pwq->nr_in_flight[i])) {
4230
- mutex_unlock(&wq->mutex);
4231
- show_workqueue_state();
4232
- return;
4233
- }
4234
- }
4235
-
4236
- if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4237
- WARN_ON(pwq->nr_active) ||
4238
- WARN_ON(!list_empty(&pwq->delayed_works))) {
4443
+ raw_spin_lock_irq(&pwq->pool->lock);
4444
+ if (WARN_ON(pwq_busy(pwq))) {
4445
+ pr_warn("%s: %s has the following busy pwq\n",
4446
+ __func__, wq->name);
4447
+ show_pwq(pwq);
4448
+ raw_spin_unlock_irq(&pwq->pool->lock);
42394449 mutex_unlock(&wq->mutex);
4450
+ mutex_unlock(&wq_pool_mutex);
42404451 show_workqueue_state();
42414452 return;
42424453 }
4454
+ raw_spin_unlock_irq(&pwq->pool->lock);
42434455 }
42444456 mutex_unlock(&wq->mutex);
42454457
....@@ -4247,11 +4459,11 @@
42474459 * wq list is used to freeze wq, remove from list after
42484460 * flushing is complete in case freeze races us.
42494461 */
4250
- mutex_lock(&wq_pool_mutex);
42514462 list_del_rcu(&wq->list);
42524463 mutex_unlock(&wq_pool_mutex);
42534464
42544465 if (!(wq->flags & WQ_UNBOUND)) {
4466
+ wq_unregister_lockdep(wq);
42554467 /*
42564468 * The base ref is never dropped on per-cpu pwqs. Directly
42574469 * schedule RCU free.
....@@ -4477,14 +4689,14 @@
44774689 * Carefully copy the associated workqueue's workfn, name and desc.
44784690 * Keep the original last '\0' in case the original is garbage.
44794691 */
4480
- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4481
- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4482
- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4483
- probe_kernel_read(name, wq->name, sizeof(name) - 1);
4484
- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4692
+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4693
+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4694
+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4695
+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4696
+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
44854697
44864698 if (fn || name[0] || desc[0]) {
4487
- printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4699
+ printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
44884700 if (strcmp(name, desc))
44894701 pr_cont(" (%s)", desc);
44904702 pr_cont("\n");
....@@ -4509,7 +4721,7 @@
45094721 pr_cont("%s BAR(%d)", comma ? "," : "",
45104722 task_pid_nr(barr->task));
45114723 } else {
4512
- pr_cont("%s %pf", comma ? "," : "", work->func);
4724
+ pr_cont("%s %ps", comma ? "," : "", work->func);
45134725 }
45144726 }
45154727
....@@ -4542,9 +4754,9 @@
45424754 if (worker->current_pwq != pwq)
45434755 continue;
45444756
4545
- pr_cont("%s %d%s:%pf", comma ? "," : "",
4757
+ pr_cont("%s %d%s:%ps", comma ? "," : "",
45464758 task_pid_nr(worker->task),
4547
- worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4759
+ worker->rescue_wq ? "(RESCUER)" : "",
45484760 worker->current_func);
45494761 list_for_each_entry(work, &worker->scheduled, entry)
45504762 pr_cont_work(false, work);
....@@ -4704,6 +4916,7 @@
47044916
47054917 mutex_unlock(&wq_pool_attach_mutex);
47064918 }
4919
+EXPORT_SYMBOL_GPL(wq_worker_comm);
47074920
47084921 #ifdef CONFIG_SMP
47094922
....@@ -4827,7 +5040,7 @@
48275040 *
48285041 * WRITE_ONCE() is necessary because @worker->flags may be
48295042 * tested without holding any lock in
4830
- * wq_worker_waking_up(). Without it, NOT_RUNNING test may
5043
+ * wq_worker_running(). Without it, NOT_RUNNING test may
48315044 * fail incorrectly leading to premature concurrency
48325045 * management operations.
48335046 */
....@@ -5642,6 +5855,7 @@
56425855 pr_cont_pool_info(pool);
56435856 pr_cont(" stuck for %us!\n",
56445857 jiffies_to_msecs(now - pool_ts) / 1000);
5858
+ trace_android_vh_wq_lockup_pool(pool->cpu, pool_ts);
56455859 }
56465860 }
56475861
....@@ -5725,6 +5939,13 @@
57255939 return;
57265940 }
57275941
5942
+ for_each_possible_cpu(cpu) {
5943
+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
5944
+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5945
+ return;
5946
+ }
5947
+ }
5948
+
57285949 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
57295950 BUG_ON(!wq_update_unbound_numa_attrs_buf);
57305951
....@@ -5742,11 +5963,6 @@
57425963
57435964 for_each_possible_cpu(cpu) {
57445965 node = cpu_to_node(cpu);
5745
- if (WARN_ON(node == NUMA_NO_NODE)) {
5746
- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5747
- /* happens iff arch is bonkers, let's just proceed */
5748
- return;
5749
- }
57505966 cpumask_set_cpu(cpu, tbl[node]);
57515967 }
57525968
....@@ -5764,13 +5980,13 @@
57645980 * items. Actual work item execution starts only after kthreads can be
57655981 * created and scheduled right before early initcalls.
57665982 */
5767
-int __init workqueue_init_early(void)
5983
+void __init workqueue_init_early(void)
57685984 {
57695985 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
57705986 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
57715987 int i, cpu;
57725988
5773
- WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5989
+ BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
57745990
57755991 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
57765992 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
....@@ -5831,8 +6047,6 @@
58316047 !system_unbound_wq || !system_freezable_wq ||
58326048 !system_power_efficient_wq ||
58336049 !system_freezable_power_efficient_wq);
5834
-
5835
- return 0;
58366050 }
58376051
58386052 /**
....@@ -5844,7 +6058,7 @@
58446058 * are no kworkers executing the work items yet. Populate the worker pools
58456059 * with the initial workers and enable future kworker creations.
58466060 */
5847
-int __init workqueue_init(void)
6061
+void __init workqueue_init(void)
58486062 {
58496063 struct workqueue_struct *wq;
58506064 struct worker_pool *pool;
....@@ -5891,6 +6105,4 @@
58916105
58926106 wq_online = true;
58936107 wq_watchdog_init();
5894
-
5895
- return 0;
58966108 }