hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/kernel/workqueue.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * kernel/workqueue.c - generic async execution with shared worker pool
34 *
....@@ -50,8 +51,13 @@
5051 #include <linux/sched/isolation.h>
5152 #include <linux/nmi.h>
5253 #include <linux/kvm_para.h>
54
+#include <uapi/linux/sched/types.h>
5355
5456 #include "workqueue_internal.h"
57
+
58
+#include <trace/hooks/wqlockup.h>
59
+/* events/workqueue.h uses default TRACE_INCLUDE_PATH */
60
+#undef TRACE_INCLUDE_PATH
5561
5662 enum {
5763 /*
....@@ -128,16 +134,16 @@
128134 *
129135 * PL: wq_pool_mutex protected.
130136 *
131
- * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
137
+ * PR: wq_pool_mutex protected for writes. RCU protected for reads.
132138 *
133139 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
134140 *
135141 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
136
- * sched-RCU for reads.
142
+ * RCU for reads.
137143 *
138144 * WQ: wq->mutex protected.
139145 *
140
- * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
146
+ * WR: wq->mutex protected for writes. RCU protected for reads.
141147 *
142148 * MD: wq_mayday_lock protected.
143149 */
....@@ -145,7 +151,7 @@
145151 /* struct worker is defined in workqueue_internal.h */
146152
147153 struct worker_pool {
148
- spinlock_t lock; /* the pool lock */
154
+ raw_spinlock_t lock; /* the pool lock */
149155 int cpu; /* I: the associated cpu */
150156 int node; /* I: the associated node ID */
151157 int id; /* I: pool ID */
....@@ -184,7 +190,7 @@
184190 atomic_t nr_running ____cacheline_aligned_in_smp;
185191
186192 /*
187
- * Destruction of pool is sched-RCU protected to allow dereferences
193
+ * Destruction of pool is RCU protected to allow dereferences
188194 * from get_work_pool().
189195 */
190196 struct rcu_head rcu;
....@@ -213,7 +219,7 @@
213219 /*
214220 * Release of unbound pwq is punted to system_wq. See put_pwq()
215221 * and pwq_unbound_release_workfn() for details. pool_workqueue
216
- * itself is also sched-RCU protected so that the first pwq can be
222
+ * itself is also RCU protected so that the first pwq can be
217223 * determined without grabbing wq->mutex.
218224 */
219225 struct work_struct unbound_release_work;
....@@ -248,7 +254,7 @@
248254 struct list_head flusher_overflow; /* WQ: flush overflow list */
249255
250256 struct list_head maydays; /* MD: pwqs requesting rescue */
251
- struct worker *rescuer; /* I: rescue worker */
257
+ struct worker *rescuer; /* MD: rescue worker */
252258
253259 int nr_drainers; /* WQ: drain in progress */
254260 int saved_max_active; /* WQ: saved pwq max_active */
....@@ -260,13 +266,15 @@
260266 struct wq_device *wq_dev; /* I: for sysfs interface */
261267 #endif
262268 #ifdef CONFIG_LOCKDEP
269
+ char *lock_name;
270
+ struct lock_class_key key;
263271 struct lockdep_map lockdep_map;
264272 #endif
265273 char name[WQ_NAME_LEN]; /* I: workqueue name */
266274
267275 /*
268
- * Destruction of workqueue_struct is sched-RCU protected to allow
269
- * walking the workqueues list without grabbing wq_pool_mutex.
276
+ * Destruction of workqueue_struct is RCU protected to allow walking
277
+ * the workqueues list without grabbing wq_pool_mutex.
270278 * This is used to dump all workqueues from sysrq.
271279 */
272280 struct rcu_head rcu;
....@@ -298,8 +306,9 @@
298306
299307 static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
300308 static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
301
-static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302
-static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
309
+static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
310
+/* wait for manager to go away */
311
+static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
303312
304313 static LIST_HEAD(workqueues); /* PR: list of all workqueues */
305314 static bool workqueue_freezing; /* PL: have wqs started freezing? */
....@@ -353,25 +362,24 @@
353362
354363 static int worker_thread(void *__worker);
355364 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
365
+static void show_pwq(struct pool_workqueue *pwq);
356366
357367 #define CREATE_TRACE_POINTS
358368 #include <trace/events/workqueue.h>
359369
360
-#define assert_rcu_or_pool_mutex() \
361
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
362
- !lockdep_is_held(&wq_pool_mutex), \
363
- "sched RCU or wq_pool_mutex should be held")
370
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_start);
371
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_end);
364372
365
-#define assert_rcu_or_wq_mutex(wq) \
366
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
367
- !lockdep_is_held(&wq->mutex), \
368
- "sched RCU or wq->mutex should be held")
373
+#define assert_rcu_or_pool_mutex() \
374
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
375
+ !lockdep_is_held(&wq_pool_mutex), \
376
+ "RCU or wq_pool_mutex should be held")
369377
370378 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
371
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
379
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
372380 !lockdep_is_held(&wq->mutex) && \
373381 !lockdep_is_held(&wq_pool_mutex), \
374
- "sched RCU, wq->mutex or wq_pool_mutex should be held")
382
+ "RCU, wq->mutex or wq_pool_mutex should be held")
375383
376384 #define for_each_cpu_worker_pool(pool, cpu) \
377385 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
....@@ -383,7 +391,7 @@
383391 * @pool: iteration cursor
384392 * @pi: integer used for iteration
385393 *
386
- * This must be called either with wq_pool_mutex held or sched RCU read
394
+ * This must be called either with wq_pool_mutex held or RCU read
387395 * locked. If the pool needs to be used beyond the locking in effect, the
388396 * caller is responsible for guaranteeing that the pool stays online.
389397 *
....@@ -415,7 +423,7 @@
415423 * @pwq: iteration cursor
416424 * @wq: the target workqueue
417425 *
418
- * This must be called either with wq->mutex held or sched RCU read locked.
426
+ * This must be called either with wq->mutex held or RCU read locked.
419427 * If the pwq needs to be used beyond the locking in effect, the caller is
420428 * responsible for guaranteeing that the pwq stays online.
421429 *
....@@ -423,13 +431,12 @@
423431 * ignored.
424432 */
425433 #define for_each_pwq(pwq, wq) \
426
- list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
427
- if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
428
- else
434
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
435
+ lockdep_is_held(&(wq->mutex)))
429436
430437 #ifdef CONFIG_DEBUG_OBJECTS_WORK
431438
432
-static struct debug_obj_descr work_debug_descr;
439
+static const struct debug_obj_descr work_debug_descr;
433440
434441 static void *work_debug_hint(void *addr)
435442 {
....@@ -479,7 +486,7 @@
479486 }
480487 }
481488
482
-static struct debug_obj_descr work_debug_descr = {
489
+static const struct debug_obj_descr work_debug_descr = {
483490 .name = "work_struct",
484491 .debug_hint = work_debug_hint,
485492 .is_static_object = work_is_static_object,
....@@ -551,7 +558,7 @@
551558 * @wq: the target workqueue
552559 * @node: the node ID
553560 *
554
- * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
561
+ * This must be called with any of wq_pool_mutex, wq->mutex or RCU
555562 * read locked.
556563 * If the pwq needs to be used beyond the locking in effect, the caller is
557564 * responsible for guaranteeing that the pwq stays online.
....@@ -647,7 +654,7 @@
647654 * The following mb guarantees that previous clear of a PENDING bit
648655 * will not be reordered with any speculative LOADS or STORES from
649656 * work->current_func, which is executed afterwards. This possible
650
- * reordering can lead to a missed execution on attempt to qeueue
657
+ * reordering can lead to a missed execution on attempt to queue
651658 * the same @work. E.g. consider this case:
652659 *
653660 * CPU#0 CPU#1
....@@ -695,8 +702,8 @@
695702 * @work: the work item of interest
696703 *
697704 * Pools are created and destroyed under wq_pool_mutex, and allows read
698
- * access under sched-RCU read lock. As such, this function should be
699
- * called under wq_pool_mutex or with preemption disabled.
705
+ * access under RCU read lock. As such, this function should be
706
+ * called under wq_pool_mutex or inside of a rcu_read_lock() region.
700707 *
701708 * All fields of the returned pool are accessible as long as the above
702709 * mentioned locking is in effect. If the returned pool needs to be used
....@@ -829,7 +836,7 @@
829836 * Wake up the first idle worker of @pool.
830837 *
831838 * CONTEXT:
832
- * spin_lock_irq(pool->lock).
839
+ * raw_spin_lock_irq(pool->lock).
833840 */
834841 static void wake_up_worker(struct worker_pool *pool)
835842 {
....@@ -840,43 +847,42 @@
840847 }
841848
842849 /**
843
- * wq_worker_waking_up - a worker is waking up
850
+ * wq_worker_running - a worker is running again
844851 * @task: task waking up
845
- * @cpu: CPU @task is waking up to
846852 *
847
- * This function is called during try_to_wake_up() when a worker is
848
- * being awoken.
849
- *
850
- * CONTEXT:
851
- * spin_lock_irq(rq->lock)
853
+ * This function is called when a worker returns from schedule()
852854 */
853
-void wq_worker_waking_up(struct task_struct *task, int cpu)
855
+void wq_worker_running(struct task_struct *task)
854856 {
855857 struct worker *worker = kthread_data(task);
856858
857
- if (!(worker->flags & WORKER_NOT_RUNNING)) {
858
- WARN_ON_ONCE(worker->pool->cpu != cpu);
859
+ if (!worker->sleeping)
860
+ return;
861
+
862
+ /*
863
+ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
864
+ * and the nr_running increment below, we may ruin the nr_running reset
865
+ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
866
+ * pool. Protect against such race.
867
+ */
868
+ preempt_disable();
869
+ if (!(worker->flags & WORKER_NOT_RUNNING))
859870 atomic_inc(&worker->pool->nr_running);
860
- }
871
+ preempt_enable();
872
+ worker->sleeping = 0;
861873 }
862874
863875 /**
864876 * wq_worker_sleeping - a worker is going to sleep
865877 * @task: task going to sleep
866878 *
867
- * This function is called during schedule() when a busy worker is
868
- * going to sleep. Worker on the same cpu can be woken up by
869
- * returning pointer to its task.
870
- *
871
- * CONTEXT:
872
- * spin_lock_irq(rq->lock)
873
- *
874
- * Return:
875
- * Worker task on @cpu to wake up, %NULL if none.
879
+ * This function is called from schedule() when a busy worker is
880
+ * going to sleep. Preemption needs to be disabled to protect ->sleeping
881
+ * assignment.
876882 */
877
-struct task_struct *wq_worker_sleeping(struct task_struct *task)
883
+void wq_worker_sleeping(struct task_struct *task)
878884 {
879
- struct worker *worker = kthread_data(task), *to_wakeup = NULL;
885
+ struct worker *next, *worker = kthread_data(task);
880886 struct worker_pool *pool;
881887
882888 /*
....@@ -885,13 +891,16 @@
885891 * checking NOT_RUNNING.
886892 */
887893 if (worker->flags & WORKER_NOT_RUNNING)
888
- return NULL;
894
+ return;
889895
890896 pool = worker->pool;
891897
892
- /* this can only happen on the local cpu */
893
- if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
894
- return NULL;
898
+ /* Return if preempted before wq_worker_running() was reached */
899
+ if (worker->sleeping)
900
+ return;
901
+
902
+ worker->sleeping = 1;
903
+ raw_spin_lock_irq(&pool->lock);
895904
896905 /*
897906 * The counterpart of the following dec_and_test, implied mb,
....@@ -905,19 +914,33 @@
905914 * lock is safe.
906915 */
907916 if (atomic_dec_and_test(&pool->nr_running) &&
908
- !list_empty(&pool->worklist))
909
- to_wakeup = first_idle_worker(pool);
910
- return to_wakeup ? to_wakeup->task : NULL;
917
+ !list_empty(&pool->worklist)) {
918
+ next = first_idle_worker(pool);
919
+ if (next)
920
+ wake_up_process(next->task);
921
+ }
922
+ raw_spin_unlock_irq(&pool->lock);
911923 }
912924
913925 /**
914926 * wq_worker_last_func - retrieve worker's last work function
927
+ * @task: Task to retrieve last work function of.
915928 *
916929 * Determine the last function a worker executed. This is called from
917930 * the scheduler to get a worker's last known identity.
918931 *
919932 * CONTEXT:
920
- * spin_lock_irq(rq->lock)
933
+ * raw_spin_lock_irq(rq->lock)
934
+ *
935
+ * This function is called during schedule() when a kworker is going
936
+ * to sleep. It's used by psi to identify aggregation workers during
937
+ * dequeuing, to allow periodic aggregation to shut-off when that
938
+ * worker is the last task in the system or cgroup to go to sleep.
939
+ *
940
+ * As this function doesn't involve any workqueue-related locking, it
941
+ * only returns stable values when called from inside the scheduler's
942
+ * queuing and dequeuing paths, when @task, which must be a kworker,
943
+ * is guaranteed to not be processing any works.
921944 *
922945 * Return:
923946 * The last work function %current executed as a worker, NULL if it
....@@ -938,7 +961,7 @@
938961 * Set @flags in @worker->flags and adjust nr_running accordingly.
939962 *
940963 * CONTEXT:
941
- * spin_lock_irq(pool->lock)
964
+ * raw_spin_lock_irq(pool->lock)
942965 */
943966 static inline void worker_set_flags(struct worker *worker, unsigned int flags)
944967 {
....@@ -963,7 +986,7 @@
963986 * Clear @flags in @worker->flags and adjust nr_running accordingly.
964987 *
965988 * CONTEXT:
966
- * spin_lock_irq(pool->lock)
989
+ * raw_spin_lock_irq(pool->lock)
967990 */
968991 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
969992 {
....@@ -1011,7 +1034,7 @@
10111034 * actually occurs, it should be easy to locate the culprit work function.
10121035 *
10131036 * CONTEXT:
1014
- * spin_lock_irq(pool->lock).
1037
+ * raw_spin_lock_irq(pool->lock).
10151038 *
10161039 * Return:
10171040 * Pointer to worker which is executing @work if found, %NULL
....@@ -1046,7 +1069,7 @@
10461069 * nested inside outer list_for_each_entry_safe().
10471070 *
10481071 * CONTEXT:
1049
- * spin_lock_irq(pool->lock).
1072
+ * raw_spin_lock_irq(pool->lock).
10501073 */
10511074 static void move_linked_works(struct work_struct *work, struct list_head *head,
10521075 struct work_struct **nextp)
....@@ -1121,12 +1144,12 @@
11211144 {
11221145 if (pwq) {
11231146 /*
1124
- * As both pwqs and pools are sched-RCU protected, the
1147
+ * As both pwqs and pools are RCU protected, the
11251148 * following lock operations are safe.
11261149 */
1127
- spin_lock_irq(&pwq->pool->lock);
1150
+ raw_spin_lock_irq(&pwq->pool->lock);
11281151 put_pwq(pwq);
1129
- spin_unlock_irq(&pwq->pool->lock);
1152
+ raw_spin_unlock_irq(&pwq->pool->lock);
11301153 }
11311154 }
11321155
....@@ -1159,7 +1182,7 @@
11591182 * decrement nr_in_flight of its pwq and handle workqueue flushing.
11601183 *
11611184 * CONTEXT:
1162
- * spin_lock_irq(pool->lock).
1185
+ * raw_spin_lock_irq(pool->lock).
11631186 */
11641187 static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
11651188 {
....@@ -1207,11 +1230,14 @@
12071230 * stable state - idle, on timer or on worklist.
12081231 *
12091232 * Return:
1233
+ *
1234
+ * ======== ================================================================
12101235 * 1 if @work was pending and we successfully stole PENDING
12111236 * 0 if @work was idle and we claimed PENDING
12121237 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
12131238 * -ENOENT if someone else is canceling @work, this state may persist
12141239 * for arbitrarily long
1240
+ * ======== ================================================================
12151241 *
12161242 * Note:
12171243 * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
....@@ -1249,6 +1275,7 @@
12491275 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
12501276 return 0;
12511277
1278
+ rcu_read_lock();
12521279 /*
12531280 * The queueing is in progress, or it is already queued. Try to
12541281 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
....@@ -1257,7 +1284,7 @@
12571284 if (!pool)
12581285 goto fail;
12591286
1260
- spin_lock(&pool->lock);
1287
+ raw_spin_lock(&pool->lock);
12611288 /*
12621289 * work->data is guaranteed to point to pwq only while the work
12631290 * item is queued on pwq->wq, and both updating work->data to point
....@@ -1286,11 +1313,13 @@
12861313 /* work->data points to pwq iff queued, point to pool */
12871314 set_work_pool_and_keep_pending(work, pool->id);
12881315
1289
- spin_unlock(&pool->lock);
1316
+ raw_spin_unlock(&pool->lock);
1317
+ rcu_read_unlock();
12901318 return 1;
12911319 }
1292
- spin_unlock(&pool->lock);
1320
+ raw_spin_unlock(&pool->lock);
12931321 fail:
1322
+ rcu_read_unlock();
12941323 local_irq_restore(*flags);
12951324 if (work_is_canceling(work))
12961325 return -ENOENT;
....@@ -1309,12 +1338,15 @@
13091338 * work_struct flags.
13101339 *
13111340 * CONTEXT:
1312
- * spin_lock_irq(pool->lock).
1341
+ * raw_spin_lock_irq(pool->lock).
13131342 */
13141343 static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
13151344 struct list_head *head, unsigned int extra_flags)
13161345 {
13171346 struct worker_pool *pool = pwq->pool;
1347
+
1348
+ /* record the work call stack in order to print it in KASAN reports */
1349
+ kasan_record_aux_stack(work);
13181350
13191351 /* we own @work, set data and link */
13201352 set_work_pwq(work, pwq, extra_flags);
....@@ -1342,7 +1374,7 @@
13421374
13431375 worker = current_wq_worker();
13441376 /*
1345
- * Return %true iff I'm a worker execuing a work item on @wq. If
1377
+ * Return %true iff I'm a worker executing a work item on @wq. If
13461378 * I'm @worker, it's safe to dereference it without locking.
13471379 */
13481380 return worker && worker->current_pwq->wq == wq;
....@@ -1403,6 +1435,7 @@
14031435 if (unlikely(wq->flags & __WQ_DRAINING) &&
14041436 WARN_ON_ONCE(!is_chained_work(wq)))
14051437 return;
1438
+ rcu_read_lock();
14061439 retry:
14071440 /* pwq which will be used unless @work is executing elsewhere */
14081441 if (wq->flags & WQ_UNBOUND) {
....@@ -1424,7 +1457,7 @@
14241457 if (last_pool && last_pool != pwq->pool) {
14251458 struct worker *worker;
14261459
1427
- spin_lock(&last_pool->lock);
1460
+ raw_spin_lock(&last_pool->lock);
14281461
14291462 worker = find_worker_executing_work(last_pool, work);
14301463
....@@ -1432,11 +1465,11 @@
14321465 pwq = worker->current_pwq;
14331466 } else {
14341467 /* meh... not running there, queue here */
1435
- spin_unlock(&last_pool->lock);
1436
- spin_lock(&pwq->pool->lock);
1468
+ raw_spin_unlock(&last_pool->lock);
1469
+ raw_spin_lock(&pwq->pool->lock);
14371470 }
14381471 } else {
1439
- spin_lock(&pwq->pool->lock);
1472
+ raw_spin_lock(&pwq->pool->lock);
14401473 }
14411474
14421475 /*
....@@ -1449,7 +1482,7 @@
14491482 */
14501483 if (unlikely(!pwq->refcnt)) {
14511484 if (wq->flags & WQ_UNBOUND) {
1452
- spin_unlock(&pwq->pool->lock);
1485
+ raw_spin_unlock(&pwq->pool->lock);
14531486 cpu_relax();
14541487 goto retry;
14551488 }
....@@ -1461,10 +1494,8 @@
14611494 /* pwq determined, queue */
14621495 trace_workqueue_queue_work(req_cpu, pwq, work);
14631496
1464
- if (WARN_ON(!list_empty(&work->entry))) {
1465
- spin_unlock(&pwq->pool->lock);
1466
- return;
1467
- }
1497
+ if (WARN_ON(!list_empty(&work->entry)))
1498
+ goto out;
14681499
14691500 pwq->nr_in_flight[pwq->work_color]++;
14701501 work_flags = work_color_to_flags(pwq->work_color);
....@@ -1483,7 +1514,9 @@
14831514 debug_work_activate(work);
14841515 insert_work(pwq, work, worklist, work_flags);
14851516
1486
- spin_unlock(&pwq->pool->lock);
1517
+out:
1518
+ raw_spin_unlock(&pwq->pool->lock);
1519
+ rcu_read_unlock();
14871520 }
14881521
14891522 /**
....@@ -1515,6 +1548,90 @@
15151548 }
15161549 EXPORT_SYMBOL(queue_work_on);
15171550
1551
+/**
1552
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
1553
+ * @node: NUMA node ID that we want to select a CPU from
1554
+ *
1555
+ * This function will attempt to find a "random" cpu available on a given
1556
+ * node. If there are no CPUs available on the given node it will return
1557
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
1558
+ * available CPU if we need to schedule this work.
1559
+ */
1560
+static int workqueue_select_cpu_near(int node)
1561
+{
1562
+ int cpu;
1563
+
1564
+ /* No point in doing this if NUMA isn't enabled for workqueues */
1565
+ if (!wq_numa_enabled)
1566
+ return WORK_CPU_UNBOUND;
1567
+
1568
+ /* Delay binding to CPU if node is not valid or online */
1569
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1570
+ return WORK_CPU_UNBOUND;
1571
+
1572
+ /* Use local node/cpu if we are already there */
1573
+ cpu = raw_smp_processor_id();
1574
+ if (node == cpu_to_node(cpu))
1575
+ return cpu;
1576
+
1577
+ /* Use "random" otherwise know as "first" online CPU of node */
1578
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1579
+
1580
+ /* If CPU is valid return that, otherwise just defer */
1581
+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1582
+}
1583
+
1584
+/**
1585
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
1586
+ * @node: NUMA node that we are targeting the work for
1587
+ * @wq: workqueue to use
1588
+ * @work: work to queue
1589
+ *
1590
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
1591
+ * idea here is to provide a way to somehow associate work with a given
1592
+ * NUMA node.
1593
+ *
1594
+ * This function will only make a best effort attempt at getting this onto
1595
+ * the right NUMA node. If no node is requested or the requested node is
1596
+ * offline then we just fall back to standard queue_work behavior.
1597
+ *
1598
+ * Currently the "random" CPU ends up being the first available CPU in the
1599
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
1600
+ * are running on the node. In that case we just use the current CPU.
1601
+ *
1602
+ * Return: %false if @work was already on a queue, %true otherwise.
1603
+ */
1604
+bool queue_work_node(int node, struct workqueue_struct *wq,
1605
+ struct work_struct *work)
1606
+{
1607
+ unsigned long flags;
1608
+ bool ret = false;
1609
+
1610
+ /*
1611
+ * This current implementation is specific to unbound workqueues.
1612
+ * Specifically we only return the first available CPU for a given
1613
+ * node instead of cycling through individual CPUs within the node.
1614
+ *
1615
+ * If this is used with a per-cpu workqueue then the logic in
1616
+ * workqueue_select_cpu_near would need to be updated to allow for
1617
+ * some round robin type logic.
1618
+ */
1619
+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1620
+
1621
+ local_irq_save(flags);
1622
+
1623
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1624
+ int cpu = workqueue_select_cpu_near(node);
1625
+
1626
+ __queue_work(cpu, wq, work);
1627
+ ret = true;
1628
+ }
1629
+
1630
+ local_irq_restore(flags);
1631
+ return ret;
1632
+}
1633
+EXPORT_SYMBOL_GPL(queue_work_node);
1634
+
15181635 void delayed_work_timer_fn(struct timer_list *t)
15191636 {
15201637 struct delayed_work *dwork = from_timer(dwork, t, timer);
....@@ -1531,9 +1648,14 @@
15311648 struct work_struct *work = &dwork->work;
15321649
15331650 WARN_ON_ONCE(!wq);
1534
-#ifndef CONFIG_CFI_CLANG
1535
- WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1536
-#endif
1651
+ /*
1652
+ * With CFI, timer->function can point to a jump table entry in a module,
1653
+ * which fails the comparison. Disable the warning if CFI and modules are
1654
+ * both enabled.
1655
+ */
1656
+ if (!IS_ENABLED(CONFIG_CFI_CLANG) || !IS_ENABLED(CONFIG_MODULES))
1657
+ WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1658
+
15371659 WARN_ON_ONCE(timer_pending(timer));
15381660 WARN_ON_ONCE(!list_empty(&work->entry));
15391661
....@@ -1644,7 +1766,7 @@
16441766 *
16451767 * Return: %false if @rwork was already pending, %true otherwise. Note
16461768 * that a full RCU grace period is guaranteed only after a %true return.
1647
- * While @rwork is guarnateed to be executed after a %false return, the
1769
+ * While @rwork is guaranteed to be executed after a %false return, the
16481770 * execution may happen before a full RCU grace period has passed.
16491771 */
16501772 bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
....@@ -1669,7 +1791,7 @@
16691791 * necessary.
16701792 *
16711793 * LOCKING:
1672
- * spin_lock_irq(pool->lock).
1794
+ * raw_spin_lock_irq(pool->lock).
16731795 */
16741796 static void worker_enter_idle(struct worker *worker)
16751797 {
....@@ -1709,7 +1831,7 @@
17091831 * @worker is leaving idle state. Update stats.
17101832 *
17111833 * LOCKING:
1712
- * spin_lock_irq(pool->lock).
1834
+ * raw_spin_lock_irq(pool->lock).
17131835 */
17141836 static void worker_leave_idle(struct worker *worker)
17151837 {
....@@ -1838,17 +1960,26 @@
18381960 goto fail;
18391961
18401962 set_user_nice(worker->task, pool->attrs->nice);
1963
+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO)) {
1964
+ struct sched_param param;
1965
+
1966
+ if (pool->attrs->nice == 0)
1967
+ param.sched_priority = MAX_RT_PRIO / 2 - 4;
1968
+ else
1969
+ param.sched_priority = MAX_RT_PRIO / 2 - 2;
1970
+ sched_setscheduler_nocheck(worker->task, SCHED_RR, &param);
1971
+ }
18411972 kthread_bind_mask(worker->task, pool->attrs->cpumask);
18421973
18431974 /* successful, attach the worker to the pool */
18441975 worker_attach_to_pool(worker, pool);
18451976
18461977 /* start the newly created worker */
1847
- spin_lock_irq(&pool->lock);
1978
+ raw_spin_lock_irq(&pool->lock);
18481979 worker->pool->nr_workers++;
18491980 worker_enter_idle(worker);
18501981 wake_up_process(worker->task);
1851
- spin_unlock_irq(&pool->lock);
1982
+ raw_spin_unlock_irq(&pool->lock);
18521983
18531984 return worker;
18541985
....@@ -1867,7 +1998,7 @@
18671998 * be idle.
18681999 *
18692000 * CONTEXT:
1870
- * spin_lock_irq(pool->lock).
2001
+ * raw_spin_lock_irq(pool->lock).
18712002 */
18722003 static void destroy_worker(struct worker *worker)
18732004 {
....@@ -1893,7 +2024,7 @@
18932024 {
18942025 struct worker_pool *pool = from_timer(pool, t, idle_timer);
18952026
1896
- spin_lock_irq(&pool->lock);
2027
+ raw_spin_lock_irq(&pool->lock);
18972028
18982029 while (too_many_workers(pool)) {
18992030 struct worker *worker;
....@@ -1911,7 +2042,7 @@
19112042 destroy_worker(worker);
19122043 }
19132044
1914
- spin_unlock_irq(&pool->lock);
2045
+ raw_spin_unlock_irq(&pool->lock);
19152046 }
19162047
19172048 static void send_mayday(struct work_struct *work)
....@@ -1942,8 +2073,8 @@
19422073 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
19432074 struct work_struct *work;
19442075
1945
- spin_lock_irq(&pool->lock);
1946
- spin_lock(&wq_mayday_lock); /* for wq->maydays */
2076
+ raw_spin_lock_irq(&pool->lock);
2077
+ raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */
19472078
19482079 if (need_to_create_worker(pool)) {
19492080 /*
....@@ -1956,8 +2087,8 @@
19562087 send_mayday(work);
19572088 }
19582089
1959
- spin_unlock(&wq_mayday_lock);
1960
- spin_unlock_irq(&pool->lock);
2090
+ raw_spin_unlock(&wq_mayday_lock);
2091
+ raw_spin_unlock_irq(&pool->lock);
19612092
19622093 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
19632094 }
....@@ -1976,7 +2107,7 @@
19762107 * may_start_working() %true.
19772108 *
19782109 * LOCKING:
1979
- * spin_lock_irq(pool->lock) which may be released and regrabbed
2110
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
19802111 * multiple times. Does GFP_KERNEL allocations. Called only from
19812112 * manager.
19822113 */
....@@ -1985,7 +2116,7 @@
19852116 __acquires(&pool->lock)
19862117 {
19872118 restart:
1988
- spin_unlock_irq(&pool->lock);
2119
+ raw_spin_unlock_irq(&pool->lock);
19892120
19902121 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
19912122 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
....@@ -2001,7 +2132,7 @@
20012132 }
20022133
20032134 del_timer_sync(&pool->mayday_timer);
2004
- spin_lock_irq(&pool->lock);
2135
+ raw_spin_lock_irq(&pool->lock);
20052136 /*
20062137 * This is necessary even after a new worker was just successfully
20072138 * created as @pool->lock was dropped and the new worker might have
....@@ -2024,7 +2155,7 @@
20242155 * and may_start_working() is true.
20252156 *
20262157 * CONTEXT:
2027
- * spin_lock_irq(pool->lock) which may be released and regrabbed
2158
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
20282159 * multiple times. Does GFP_KERNEL allocations.
20292160 *
20302161 * Return:
....@@ -2047,7 +2178,7 @@
20472178
20482179 pool->manager = NULL;
20492180 pool->flags &= ~POOL_MANAGER_ACTIVE;
2050
- wake_up(&wq_manager_wait);
2181
+ rcuwait_wake_up(&manager_wait);
20512182 return true;
20522183 }
20532184
....@@ -2063,7 +2194,7 @@
20632194 * call this function to process a work.
20642195 *
20652196 * CONTEXT:
2066
- * spin_lock_irq(pool->lock) which is released and regrabbed.
2197
+ * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
20672198 */
20682199 static void process_one_work(struct worker *worker, struct work_struct *work)
20692200 __releases(&pool->lock)
....@@ -2145,7 +2276,7 @@
21452276 */
21462277 set_work_pool_and_clear_pending(work, pool->id);
21472278
2148
- spin_unlock_irq(&pool->lock);
2279
+ raw_spin_unlock_irq(&pool->lock);
21492280
21502281 lock_map_acquire(&pwq->wq->lockdep_map);
21512282 lock_map_acquire(&lockdep_map);
....@@ -2177,13 +2308,13 @@
21772308 * While we must be careful to not use "work" after this, the trace
21782309 * point will only record its address.
21792310 */
2180
- trace_workqueue_execute_end(work);
2311
+ trace_workqueue_execute_end(work, worker->current_func);
21812312 lock_map_release(&lockdep_map);
21822313 lock_map_release(&pwq->wq->lockdep_map);
21832314
21842315 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
21852316 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2186
- " last function: %pf\n",
2317
+ " last function: %ps\n",
21872318 current->comm, preempt_count(), task_pid_nr(current),
21882319 worker->current_func);
21892320 debug_show_held_locks(current);
....@@ -2191,7 +2322,7 @@
21912322 }
21922323
21932324 /*
2194
- * The following prevents a kworker from hogging CPU on !PREEMPT
2325
+ * The following prevents a kworker from hogging CPU on !PREEMPTION
21952326 * kernels, where a requeueing work item waiting for something to
21962327 * happen could deadlock with stop_machine as such work item could
21972328 * indefinitely requeue itself while all other CPUs are trapped in
....@@ -2200,7 +2331,7 @@
22002331 */
22012332 cond_resched();
22022333
2203
- spin_lock_irq(&pool->lock);
2334
+ raw_spin_lock_irq(&pool->lock);
22042335
22052336 /* clear cpu intensive status */
22062337 if (unlikely(cpu_intensive))
....@@ -2226,7 +2357,7 @@
22262357 * fetches a work from the top and executes it.
22272358 *
22282359 * CONTEXT:
2229
- * spin_lock_irq(pool->lock) which may be released and regrabbed
2360
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
22302361 * multiple times.
22312362 */
22322363 static void process_scheduled_works(struct worker *worker)
....@@ -2268,11 +2399,11 @@
22682399 /* tell the scheduler that this is a workqueue worker */
22692400 set_pf_worker(true);
22702401 woke_up:
2271
- spin_lock_irq(&pool->lock);
2402
+ raw_spin_lock_irq(&pool->lock);
22722403
22732404 /* am I supposed to die? */
22742405 if (unlikely(worker->flags & WORKER_DIE)) {
2275
- spin_unlock_irq(&pool->lock);
2406
+ raw_spin_unlock_irq(&pool->lock);
22762407 WARN_ON_ONCE(!list_empty(&worker->entry));
22772408 set_pf_worker(false);
22782409
....@@ -2338,7 +2469,7 @@
23382469 */
23392470 worker_enter_idle(worker);
23402471 __set_current_state(TASK_IDLE);
2341
- spin_unlock_irq(&pool->lock);
2472
+ raw_spin_unlock_irq(&pool->lock);
23422473 schedule();
23432474 goto woke_up;
23442475 }
....@@ -2392,7 +2523,7 @@
23922523 should_stop = kthread_should_stop();
23932524
23942525 /* see whether any pwq is asking for help */
2395
- spin_lock_irq(&wq_mayday_lock);
2526
+ raw_spin_lock_irq(&wq_mayday_lock);
23962527
23972528 while (!list_empty(&wq->maydays)) {
23982529 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
....@@ -2404,11 +2535,11 @@
24042535 __set_current_state(TASK_RUNNING);
24052536 list_del_init(&pwq->mayday_node);
24062537
2407
- spin_unlock_irq(&wq_mayday_lock);
2538
+ raw_spin_unlock_irq(&wq_mayday_lock);
24082539
24092540 worker_attach_to_pool(rescuer, pool);
24102541
2411
- spin_lock_irq(&pool->lock);
2542
+ raw_spin_lock_irq(&pool->lock);
24122543
24132544 /*
24142545 * Slurp in all works issued via this workqueue and
....@@ -2436,8 +2567,8 @@
24362567 * being used to relieve memory pressure, don't
24372568 * incur MAYDAY_INTERVAL delay inbetween.
24382569 */
2439
- if (need_to_create_worker(pool)) {
2440
- spin_lock(&wq_mayday_lock);
2570
+ if (pwq->nr_active && need_to_create_worker(pool)) {
2571
+ raw_spin_lock(&wq_mayday_lock);
24412572 /*
24422573 * Queue iff we aren't racing destruction
24432574 * and somebody else hasn't queued it already.
....@@ -2446,7 +2577,7 @@
24462577 get_pwq(pwq);
24472578 list_add_tail(&pwq->mayday_node, &wq->maydays);
24482579 }
2449
- spin_unlock(&wq_mayday_lock);
2580
+ raw_spin_unlock(&wq_mayday_lock);
24502581 }
24512582 }
24522583
....@@ -2464,14 +2595,14 @@
24642595 if (need_more_worker(pool))
24652596 wake_up_worker(pool);
24662597
2467
- spin_unlock_irq(&pool->lock);
2598
+ raw_spin_unlock_irq(&pool->lock);
24682599
24692600 worker_detach_from_pool(rescuer);
24702601
2471
- spin_lock_irq(&wq_mayday_lock);
2602
+ raw_spin_lock_irq(&wq_mayday_lock);
24722603 }
24732604
2474
- spin_unlock_irq(&wq_mayday_lock);
2605
+ raw_spin_unlock_irq(&wq_mayday_lock);
24752606
24762607 if (should_stop) {
24772608 __set_current_state(TASK_RUNNING);
....@@ -2508,11 +2639,11 @@
25082639 worker = current_wq_worker();
25092640
25102641 WARN_ONCE(current->flags & PF_MEMALLOC,
2511
- "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2642
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
25122643 current->pid, current->comm, target_wq->name, target_func);
25132644 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
25142645 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2515
- "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2646
+ "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
25162647 worker->current_pwq->wq->name, worker->current_func,
25172648 target_wq->name, target_func);
25182649 }
....@@ -2551,7 +2682,7 @@
25512682 * underneath us, so we can't reliably determine pwq from @target.
25522683 *
25532684 * CONTEXT:
2554
- * spin_lock_irq(pool->lock).
2685
+ * raw_spin_lock_irq(pool->lock).
25552686 */
25562687 static void insert_wq_barrier(struct pool_workqueue *pwq,
25572688 struct wq_barrier *barr,
....@@ -2638,7 +2769,7 @@
26382769 for_each_pwq(pwq, wq) {
26392770 struct worker_pool *pool = pwq->pool;
26402771
2641
- spin_lock_irq(&pool->lock);
2772
+ raw_spin_lock_irq(&pool->lock);
26422773
26432774 if (flush_color >= 0) {
26442775 WARN_ON_ONCE(pwq->flush_color != -1);
....@@ -2655,7 +2786,7 @@
26552786 pwq->work_color = work_color;
26562787 }
26572788
2658
- spin_unlock_irq(&pool->lock);
2789
+ raw_spin_unlock_irq(&pool->lock);
26592790 }
26602791
26612792 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
....@@ -2743,7 +2874,7 @@
27432874 * First flushers are responsible for cascading flushes and
27442875 * handling overflow. Non-first flushers can simply return.
27452876 */
2746
- if (wq->first_flusher != &this_flusher)
2877
+ if (READ_ONCE(wq->first_flusher) != &this_flusher)
27472878 return;
27482879
27492880 mutex_lock(&wq->mutex);
....@@ -2752,7 +2883,7 @@
27522883 if (wq->first_flusher != &this_flusher)
27532884 goto out_unlock;
27542885
2755
- wq->first_flusher = NULL;
2886
+ WRITE_ONCE(wq->first_flusher, NULL);
27562887
27572888 WARN_ON_ONCE(!list_empty(&this_flusher.list));
27582889 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
....@@ -2855,9 +2986,9 @@
28552986 for_each_pwq(pwq, wq) {
28562987 bool drained;
28572988
2858
- spin_lock_irq(&pwq->pool->lock);
2989
+ raw_spin_lock_irq(&pwq->pool->lock);
28592990 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2860
- spin_unlock_irq(&pwq->pool->lock);
2991
+ raw_spin_unlock_irq(&pwq->pool->lock);
28612992
28622993 if (drained)
28632994 continue;
....@@ -2886,14 +3017,14 @@
28863017
28873018 might_sleep();
28883019
2889
- local_irq_disable();
3020
+ rcu_read_lock();
28903021 pool = get_work_pool(work);
28913022 if (!pool) {
2892
- local_irq_enable();
3023
+ rcu_read_unlock();
28933024 return false;
28943025 }
28953026
2896
- spin_lock(&pool->lock);
3027
+ raw_spin_lock_irq(&pool->lock);
28973028 /* see the comment in try_to_grab_pending() with the same code */
28983029 pwq = get_work_pwq(work);
28993030 if (pwq) {
....@@ -2909,7 +3040,7 @@
29093040 check_flush_dependency(pwq->wq, work);
29103041
29113042 insert_wq_barrier(pwq, barr, work, worker);
2912
- spin_unlock_irq(&pool->lock);
3043
+ raw_spin_unlock_irq(&pool->lock);
29133044
29143045 /*
29153046 * Force a lock recursion deadlock when using flush_work() inside a
....@@ -2925,10 +3056,11 @@
29253056 lock_map_acquire(&pwq->wq->lockdep_map);
29263057 lock_map_release(&pwq->wq->lockdep_map);
29273058 }
2928
-
3059
+ rcu_read_unlock();
29293060 return true;
29303061 already_gone:
2931
- spin_unlock_irq(&pool->lock);
3062
+ raw_spin_unlock_irq(&pool->lock);
3063
+ rcu_read_unlock();
29323064 return false;
29333065 }
29343066
....@@ -2942,10 +3074,8 @@
29423074 if (WARN_ON(!work->func))
29433075 return false;
29443076
2945
- if (!from_cancel) {
2946
- lock_map_acquire(&work->lockdep_map);
2947
- lock_map_release(&work->lockdep_map);
2948
- }
3077
+ lock_map_acquire(&work->lockdep_map);
3078
+ lock_map_release(&work->lockdep_map);
29493079
29503080 if (start_flush_work(work, &barr, from_cancel)) {
29513081 wait_for_completion(&barr.done);
....@@ -3250,21 +3380,20 @@
32503380
32513381 /**
32523382 * alloc_workqueue_attrs - allocate a workqueue_attrs
3253
- * @gfp_mask: allocation mask to use
32543383 *
32553384 * Allocate a new workqueue_attrs, initialize with default settings and
32563385 * return it.
32573386 *
32583387 * Return: The allocated new workqueue_attr on success. %NULL on failure.
32593388 */
3260
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3389
+struct workqueue_attrs *alloc_workqueue_attrs(void)
32613390 {
32623391 struct workqueue_attrs *attrs;
32633392
3264
- attrs = kzalloc(sizeof(*attrs), gfp_mask);
3393
+ attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
32653394 if (!attrs)
32663395 goto fail;
3267
- if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3396
+ if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
32683397 goto fail;
32693398
32703399 cpumask_copy(attrs->cpumask, cpu_possible_mask);
....@@ -3321,7 +3450,7 @@
33213450 */
33223451 static int init_worker_pool(struct worker_pool *pool)
33233452 {
3324
- spin_lock_init(&pool->lock);
3453
+ raw_spin_lock_init(&pool->lock);
33253454 pool->id = -1;
33263455 pool->cpu = -1;
33273456 pool->node = NUMA_NO_NODE;
....@@ -3342,23 +3471,62 @@
33423471 pool->refcnt = 1;
33433472
33443473 /* shouldn't fail above this point */
3345
- pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3474
+ pool->attrs = alloc_workqueue_attrs();
33463475 if (!pool->attrs)
33473476 return -ENOMEM;
33483477 return 0;
33493478 }
3479
+
3480
+#ifdef CONFIG_LOCKDEP
3481
+static void wq_init_lockdep(struct workqueue_struct *wq)
3482
+{
3483
+ char *lock_name;
3484
+
3485
+ lockdep_register_key(&wq->key);
3486
+ lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3487
+ if (!lock_name)
3488
+ lock_name = wq->name;
3489
+
3490
+ wq->lock_name = lock_name;
3491
+ lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3492
+}
3493
+
3494
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3495
+{
3496
+ lockdep_unregister_key(&wq->key);
3497
+}
3498
+
3499
+static void wq_free_lockdep(struct workqueue_struct *wq)
3500
+{
3501
+ if (wq->lock_name != wq->name)
3502
+ kfree(wq->lock_name);
3503
+}
3504
+#else
3505
+static void wq_init_lockdep(struct workqueue_struct *wq)
3506
+{
3507
+}
3508
+
3509
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3510
+{
3511
+}
3512
+
3513
+static void wq_free_lockdep(struct workqueue_struct *wq)
3514
+{
3515
+}
3516
+#endif
33503517
33513518 static void rcu_free_wq(struct rcu_head *rcu)
33523519 {
33533520 struct workqueue_struct *wq =
33543521 container_of(rcu, struct workqueue_struct, rcu);
33553522
3523
+ wq_free_lockdep(wq);
3524
+
33563525 if (!(wq->flags & WQ_UNBOUND))
33573526 free_percpu(wq->cpu_pwqs);
33583527 else
33593528 free_workqueue_attrs(wq->unbound_attrs);
33603529
3361
- kfree(wq->rescuer);
33623530 kfree(wq);
33633531 }
33643532
....@@ -3371,11 +3539,23 @@
33713539 kfree(pool);
33723540 }
33733541
3542
+/* This returns with the lock held on success (pool manager is inactive). */
3543
+static bool wq_manager_inactive(struct worker_pool *pool)
3544
+{
3545
+ raw_spin_lock_irq(&pool->lock);
3546
+
3547
+ if (pool->flags & POOL_MANAGER_ACTIVE) {
3548
+ raw_spin_unlock_irq(&pool->lock);
3549
+ return false;
3550
+ }
3551
+ return true;
3552
+}
3553
+
33743554 /**
33753555 * put_unbound_pool - put a worker_pool
33763556 * @pool: worker_pool to put
33773557 *
3378
- * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
3558
+ * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
33793559 * safe manner. get_unbound_pool() calls this function on its failure path
33803560 * and this function should be able to release pools which went through,
33813561 * successfully or not, init_worker_pool().
....@@ -3406,16 +3586,17 @@
34063586 * Become the manager and destroy all workers. This prevents
34073587 * @pool's workers from blocking on attach_mutex. We're the last
34083588 * manager and @pool gets freed with the flag set.
3589
+ * Because of how wq_manager_inactive() works, we will hold the
3590
+ * spinlock after a successful wait.
34093591 */
3410
- spin_lock_irq(&pool->lock);
3411
- wait_event_lock_irq(wq_manager_wait,
3412
- !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3592
+ rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3593
+ TASK_UNINTERRUPTIBLE);
34133594 pool->flags |= POOL_MANAGER_ACTIVE;
34143595
34153596 while ((worker = first_idle_worker(pool)))
34163597 destroy_worker(worker);
34173598 WARN_ON(pool->nr_workers || pool->nr_idle);
3418
- spin_unlock_irq(&pool->lock);
3599
+ raw_spin_unlock_irq(&pool->lock);
34193600
34203601 mutex_lock(&wq_pool_attach_mutex);
34213602 if (!list_empty(&pool->workers))
....@@ -3429,8 +3610,8 @@
34293610 del_timer_sync(&pool->idle_timer);
34303611 del_timer_sync(&pool->mayday_timer);
34313612
3432
- /* sched-RCU protected to allow dereferences from get_work_pool() */
3433
- call_rcu_sched(&pool->rcu, rcu_free_pool);
3613
+ /* RCU protected to allow dereferences from get_work_pool() */
3614
+ call_rcu(&pool->rcu, rcu_free_pool);
34343615 }
34353616
34363617 /**
....@@ -3543,14 +3724,16 @@
35433724 put_unbound_pool(pool);
35443725 mutex_unlock(&wq_pool_mutex);
35453726
3546
- call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3727
+ call_rcu(&pwq->rcu, rcu_free_pwq);
35473728
35483729 /*
35493730 * If we're the last pwq going away, @wq is already dead and no one
35503731 * is gonna access it anymore. Schedule RCU free.
35513732 */
3552
- if (is_last)
3553
- call_rcu_sched(&wq->rcu, rcu_free_wq);
3733
+ if (is_last) {
3734
+ wq_unregister_lockdep(wq);
3735
+ call_rcu(&wq->rcu, rcu_free_wq);
3736
+ }
35543737 }
35553738
35563739 /**
....@@ -3575,7 +3758,7 @@
35753758 return;
35763759
35773760 /* this function can be called during early boot w/ irq disabled */
3578
- spin_lock_irqsave(&pwq->pool->lock, flags);
3761
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
35793762
35803763 /*
35813764 * During [un]freezing, the caller is responsible for ensuring that
....@@ -3605,7 +3788,7 @@
36053788 pwq->max_active = 0;
36063789 }
36073790
3608
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
3791
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
36093792 }
36103793
36113794 /* initialize newly alloced @pwq which is associated with @wq and @pool */
....@@ -3778,8 +3961,8 @@
37783961
37793962 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
37803963
3781
- new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3782
- tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3964
+ new_attrs = alloc_workqueue_attrs();
3965
+ tmp_attrs = alloc_workqueue_attrs();
37833966 if (!ctx || !new_attrs || !tmp_attrs)
37843967 goto out_free;
37853968
....@@ -3913,6 +4096,8 @@
39134096 *
39144097 * Performs GFP_KERNEL allocations.
39154098 *
4099
+ * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
4100
+ *
39164101 * Return: 0 on success and -errno on failure.
39174102 */
39184103 int apply_workqueue_attrs(struct workqueue_struct *wq,
....@@ -3920,13 +4105,14 @@
39204105 {
39214106 int ret;
39224107
3923
- apply_wqattrs_lock();
4108
+ lockdep_assert_cpus_held();
4109
+
4110
+ mutex_lock(&wq_pool_mutex);
39244111 ret = apply_workqueue_attrs_locked(wq, attrs);
3925
- apply_wqattrs_unlock();
4112
+ mutex_unlock(&wq_pool_mutex);
39264113
39274114 return ret;
39284115 }
3929
-EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
39304116
39314117 /**
39324118 * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
....@@ -4004,9 +4190,9 @@
40044190
40054191 use_dfl_pwq:
40064192 mutex_lock(&wq->mutex);
4007
- spin_lock_irq(&wq->dfl_pwq->pool->lock);
4193
+ raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
40084194 get_pwq(wq->dfl_pwq);
4009
- spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4195
+ raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
40104196 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
40114197 out_unlock:
40124198 mutex_unlock(&wq->mutex);
....@@ -4036,16 +4222,21 @@
40364222 mutex_unlock(&wq->mutex);
40374223 }
40384224 return 0;
4039
- } else if (wq->flags & __WQ_ORDERED) {
4225
+ }
4226
+
4227
+ get_online_cpus();
4228
+ if (wq->flags & __WQ_ORDERED) {
40404229 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
40414230 /* there should only be single pwq for ordering guarantee */
40424231 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
40434232 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
40444233 "ordering guarantee broken for workqueue %s\n", wq->name);
4045
- return ret;
40464234 } else {
4047
- return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4235
+ ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
40484236 }
4237
+ put_online_cpus();
4238
+
4239
+ return ret;
40494240 }
40504241
40514242 static int wq_clamp_max_active(int max_active, unsigned int flags,
....@@ -4078,8 +4269,8 @@
40784269
40794270 rescuer->rescue_wq = wq;
40804271 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4081
- ret = PTR_ERR_OR_ZERO(rescuer->task);
4082
- if (ret) {
4272
+ if (IS_ERR(rescuer->task)) {
4273
+ ret = PTR_ERR(rescuer->task);
40834274 kfree(rescuer);
40844275 return ret;
40854276 }
....@@ -4091,11 +4282,10 @@
40914282 return 0;
40924283 }
40934284
4094
-struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4095
- unsigned int flags,
4096
- int max_active,
4097
- struct lock_class_key *key,
4098
- const char *lock_name, ...)
4285
+__printf(1, 4)
4286
+struct workqueue_struct *alloc_workqueue(const char *fmt,
4287
+ unsigned int flags,
4288
+ int max_active, ...)
40994289 {
41004290 size_t tbl_size = 0;
41014291 va_list args;
....@@ -4125,12 +4315,12 @@
41254315 return NULL;
41264316
41274317 if (flags & WQ_UNBOUND) {
4128
- wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
4318
+ wq->unbound_attrs = alloc_workqueue_attrs();
41294319 if (!wq->unbound_attrs)
41304320 goto err_free_wq;
41314321 }
41324322
4133
- va_start(args, lock_name);
4323
+ va_start(args, max_active);
41344324 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
41354325 va_end(args);
41364326
....@@ -4147,11 +4337,11 @@
41474337 INIT_LIST_HEAD(&wq->flusher_overflow);
41484338 INIT_LIST_HEAD(&wq->maydays);
41494339
4150
- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4340
+ wq_init_lockdep(wq);
41514341 INIT_LIST_HEAD(&wq->list);
41524342
41534343 if (alloc_and_link_pwqs(wq) < 0)
4154
- goto err_free_wq;
4344
+ goto err_unreg_lockdep;
41554345
41564346 if (wq_online && init_rescuer(wq) < 0)
41574347 goto err_destroy;
....@@ -4177,6 +4367,9 @@
41774367
41784368 return wq;
41794369
4370
+err_unreg_lockdep:
4371
+ wq_unregister_lockdep(wq);
4372
+ wq_free_lockdep(wq);
41804373 err_free_wq:
41814374 free_workqueue_attrs(wq->unbound_attrs);
41824375 kfree(wq);
....@@ -4185,7 +4378,23 @@
41854378 destroy_workqueue(wq);
41864379 return NULL;
41874380 }
4188
-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4381
+EXPORT_SYMBOL_GPL(alloc_workqueue);
4382
+
4383
+static bool pwq_busy(struct pool_workqueue *pwq)
4384
+{
4385
+ int i;
4386
+
4387
+ for (i = 0; i < WORK_NR_COLORS; i++)
4388
+ if (pwq->nr_in_flight[i])
4389
+ return true;
4390
+
4391
+ if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4392
+ return true;
4393
+ if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4394
+ return true;
4395
+
4396
+ return false;
4397
+}
41894398
41904399 /**
41914400 * destroy_workqueue - safely terminate a workqueue
....@@ -4212,35 +4421,34 @@
42124421 struct worker *rescuer = wq->rescuer;
42134422
42144423 /* this prevents new queueing */
4215
- spin_lock_irq(&wq_mayday_lock);
4424
+ raw_spin_lock_irq(&wq_mayday_lock);
42164425 wq->rescuer = NULL;
4217
- spin_unlock_irq(&wq_mayday_lock);
4426
+ raw_spin_unlock_irq(&wq_mayday_lock);
42184427
42194428 /* rescuer will empty maydays list before exiting */
42204429 kthread_stop(rescuer->task);
42214430 kfree(rescuer);
42224431 }
42234432
4224
- /* sanity checks */
4433
+ /*
4434
+ * Sanity checks - grab all the locks so that we wait for all
4435
+ * in-flight operations which may do put_pwq().
4436
+ */
4437
+ mutex_lock(&wq_pool_mutex);
42254438 mutex_lock(&wq->mutex);
42264439 for_each_pwq(pwq, wq) {
4227
- int i;
4228
-
4229
- for (i = 0; i < WORK_NR_COLORS; i++) {
4230
- if (WARN_ON(pwq->nr_in_flight[i])) {
4231
- mutex_unlock(&wq->mutex);
4232
- show_workqueue_state();
4233
- return;
4234
- }
4235
- }
4236
-
4237
- if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4238
- WARN_ON(pwq->nr_active) ||
4239
- WARN_ON(!list_empty(&pwq->delayed_works))) {
4440
+ raw_spin_lock_irq(&pwq->pool->lock);
4441
+ if (WARN_ON(pwq_busy(pwq))) {
4442
+ pr_warn("%s: %s has the following busy pwq\n",
4443
+ __func__, wq->name);
4444
+ show_pwq(pwq);
4445
+ raw_spin_unlock_irq(&pwq->pool->lock);
42404446 mutex_unlock(&wq->mutex);
4447
+ mutex_unlock(&wq_pool_mutex);
42414448 show_workqueue_state();
42424449 return;
42434450 }
4451
+ raw_spin_unlock_irq(&pwq->pool->lock);
42444452 }
42454453 mutex_unlock(&wq->mutex);
42464454
....@@ -4248,16 +4456,16 @@
42484456 * wq list is used to freeze wq, remove from list after
42494457 * flushing is complete in case freeze races us.
42504458 */
4251
- mutex_lock(&wq_pool_mutex);
42524459 list_del_rcu(&wq->list);
42534460 mutex_unlock(&wq_pool_mutex);
42544461
42554462 if (!(wq->flags & WQ_UNBOUND)) {
4463
+ wq_unregister_lockdep(wq);
42564464 /*
42574465 * The base ref is never dropped on per-cpu pwqs. Directly
42584466 * schedule RCU free.
42594467 */
4260
- call_rcu_sched(&wq->rcu, rcu_free_wq);
4468
+ call_rcu(&wq->rcu, rcu_free_wq);
42614469 } else {
42624470 /*
42634471 * We're the sole accessor of @wq at this point. Directly
....@@ -4367,7 +4575,8 @@
43674575 struct pool_workqueue *pwq;
43684576 bool ret;
43694577
4370
- rcu_read_lock_sched();
4578
+ rcu_read_lock();
4579
+ preempt_disable();
43714580
43724581 if (cpu == WORK_CPU_UNBOUND)
43734582 cpu = smp_processor_id();
....@@ -4378,7 +4587,8 @@
43784587 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
43794588
43804589 ret = !list_empty(&pwq->delayed_works);
4381
- rcu_read_unlock_sched();
4590
+ preempt_enable();
4591
+ rcu_read_unlock();
43824592
43834593 return ret;
43844594 }
....@@ -4404,15 +4614,15 @@
44044614 if (work_pending(work))
44054615 ret |= WORK_BUSY_PENDING;
44064616
4407
- local_irq_save(flags);
4617
+ rcu_read_lock();
44084618 pool = get_work_pool(work);
44094619 if (pool) {
4410
- spin_lock(&pool->lock);
4620
+ raw_spin_lock_irqsave(&pool->lock, flags);
44114621 if (find_worker_executing_work(pool, work))
44124622 ret |= WORK_BUSY_RUNNING;
4413
- spin_unlock(&pool->lock);
4623
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
44144624 }
4415
- local_irq_restore(flags);
4625
+ rcu_read_unlock();
44164626
44174627 return ret;
44184628 }
....@@ -4476,14 +4686,14 @@
44764686 * Carefully copy the associated workqueue's workfn, name and desc.
44774687 * Keep the original last '\0' in case the original is garbage.
44784688 */
4479
- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4480
- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4481
- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4482
- probe_kernel_read(name, wq->name, sizeof(name) - 1);
4483
- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4689
+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4690
+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4691
+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4692
+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4693
+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
44844694
44854695 if (fn || name[0] || desc[0]) {
4486
- printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4696
+ printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
44874697 if (strcmp(name, desc))
44884698 pr_cont(" (%s)", desc);
44894699 pr_cont("\n");
....@@ -4508,7 +4718,7 @@
45084718 pr_cont("%s BAR(%d)", comma ? "," : "",
45094719 task_pid_nr(barr->task));
45104720 } else {
4511
- pr_cont("%s %pf", comma ? "," : "", work->func);
4721
+ pr_cont("%s %ps", comma ? "," : "", work->func);
45124722 }
45134723 }
45144724
....@@ -4541,9 +4751,9 @@
45414751 if (worker->current_pwq != pwq)
45424752 continue;
45434753
4544
- pr_cont("%s %d%s:%pf", comma ? "," : "",
4754
+ pr_cont("%s %d%s:%ps", comma ? "," : "",
45454755 task_pid_nr(worker->task),
4546
- worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4756
+ worker->rescue_wq ? "(RESCUER)" : "",
45474757 worker->current_func);
45484758 list_for_each_entry(work, &worker->scheduled, entry)
45494759 pr_cont_work(false, work);
....@@ -4597,7 +4807,7 @@
45974807 unsigned long flags;
45984808 int pi;
45994809
4600
- rcu_read_lock_sched();
4810
+ rcu_read_lock();
46014811
46024812 pr_info("Showing busy workqueues and worker pools:\n");
46034813
....@@ -4617,10 +4827,10 @@
46174827 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
46184828
46194829 for_each_pwq(pwq, wq) {
4620
- spin_lock_irqsave(&pwq->pool->lock, flags);
4830
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
46214831 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
46224832 show_pwq(pwq);
4623
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
4833
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
46244834 /*
46254835 * We could be printing a lot from atomic context, e.g.
46264836 * sysrq-t -> show_workqueue_state(). Avoid triggering
....@@ -4634,7 +4844,7 @@
46344844 struct worker *worker;
46354845 bool first = true;
46364846
4637
- spin_lock_irqsave(&pool->lock, flags);
4847
+ raw_spin_lock_irqsave(&pool->lock, flags);
46384848 if (pool->nr_workers == pool->nr_idle)
46394849 goto next_pool;
46404850
....@@ -4653,7 +4863,7 @@
46534863 }
46544864 pr_cont("\n");
46554865 next_pool:
4656
- spin_unlock_irqrestore(&pool->lock, flags);
4866
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
46574867 /*
46584868 * We could be printing a lot from atomic context, e.g.
46594869 * sysrq-t -> show_workqueue_state(). Avoid triggering
....@@ -4662,7 +4872,7 @@
46624872 touch_nmi_watchdog();
46634873 }
46644874
4665
- rcu_read_unlock_sched();
4875
+ rcu_read_unlock();
46664876 }
46674877
46684878 /* used to show worker information through /proc/PID/{comm,stat,status} */
....@@ -4683,7 +4893,7 @@
46834893 struct worker_pool *pool = worker->pool;
46844894
46854895 if (pool) {
4686
- spin_lock_irq(&pool->lock);
4896
+ raw_spin_lock_irq(&pool->lock);
46874897 /*
46884898 * ->desc tracks information (wq name or
46894899 * set_worker_desc()) for the latest execution. If
....@@ -4697,12 +4907,13 @@
46974907 scnprintf(buf + off, size - off, "-%s",
46984908 worker->desc);
46994909 }
4700
- spin_unlock_irq(&pool->lock);
4910
+ raw_spin_unlock_irq(&pool->lock);
47014911 }
47024912 }
47034913
47044914 mutex_unlock(&wq_pool_attach_mutex);
47054915 }
4916
+EXPORT_SYMBOL_GPL(wq_worker_comm);
47064917
47074918 #ifdef CONFIG_SMP
47084919
....@@ -4728,7 +4939,7 @@
47284939
47294940 for_each_cpu_worker_pool(pool, cpu) {
47304941 mutex_lock(&wq_pool_attach_mutex);
4731
- spin_lock_irq(&pool->lock);
4942
+ raw_spin_lock_irq(&pool->lock);
47324943
47334944 /*
47344945 * We've blocked all attach/detach operations. Make all workers
....@@ -4742,7 +4953,7 @@
47424953
47434954 pool->flags |= POOL_DISASSOCIATED;
47444955
4745
- spin_unlock_irq(&pool->lock);
4956
+ raw_spin_unlock_irq(&pool->lock);
47464957 mutex_unlock(&wq_pool_attach_mutex);
47474958
47484959 /*
....@@ -4768,9 +4979,9 @@
47684979 * worker blocking could lead to lengthy stalls. Kick off
47694980 * unbound chain execution of currently pending work items.
47704981 */
4771
- spin_lock_irq(&pool->lock);
4982
+ raw_spin_lock_irq(&pool->lock);
47724983 wake_up_worker(pool);
4773
- spin_unlock_irq(&pool->lock);
4984
+ raw_spin_unlock_irq(&pool->lock);
47744985 }
47754986 }
47764987
....@@ -4797,7 +5008,7 @@
47975008 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
47985009 pool->attrs->cpumask) < 0);
47995010
4800
- spin_lock_irq(&pool->lock);
5011
+ raw_spin_lock_irq(&pool->lock);
48015012
48025013 pool->flags &= ~POOL_DISASSOCIATED;
48035014
....@@ -4826,7 +5037,7 @@
48265037 *
48275038 * WRITE_ONCE() is necessary because @worker->flags may be
48285039 * tested without holding any lock in
4829
- * wq_worker_waking_up(). Without it, NOT_RUNNING test may
5040
+ * wq_worker_running(). Without it, NOT_RUNNING test may
48305041 * fail incorrectly leading to premature concurrency
48315042 * management operations.
48325043 */
....@@ -4836,7 +5047,7 @@
48365047 WRITE_ONCE(worker->flags, worker_flags);
48375048 }
48385049
4839
- spin_unlock_irq(&pool->lock);
5050
+ raw_spin_unlock_irq(&pool->lock);
48405051 }
48415052
48425053 /**
....@@ -5049,16 +5260,16 @@
50495260 * nr_active is monotonically decreasing. It's safe
50505261 * to peek without lock.
50515262 */
5052
- rcu_read_lock_sched();
5263
+ rcu_read_lock();
50535264 for_each_pwq(pwq, wq) {
50545265 WARN_ON_ONCE(pwq->nr_active < 0);
50555266 if (pwq->nr_active) {
50565267 busy = true;
5057
- rcu_read_unlock_sched();
5268
+ rcu_read_unlock();
50585269 goto out_unlock;
50595270 }
50605271 }
5061
- rcu_read_unlock_sched();
5272
+ rcu_read_unlock();
50625273 }
50635274 out_unlock:
50645275 mutex_unlock(&wq_pool_mutex);
....@@ -5260,7 +5471,8 @@
52605471 const char *delim = "";
52615472 int node, written = 0;
52625473
5263
- rcu_read_lock_sched();
5474
+ get_online_cpus();
5475
+ rcu_read_lock();
52645476 for_each_node(node) {
52655477 written += scnprintf(buf + written, PAGE_SIZE - written,
52665478 "%s%d:%d", delim, node,
....@@ -5268,7 +5480,8 @@
52685480 delim = " ";
52695481 }
52705482 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5271
- rcu_read_unlock_sched();
5483
+ rcu_read_unlock();
5484
+ put_online_cpus();
52725485
52735486 return written;
52745487 }
....@@ -5293,7 +5506,7 @@
52935506
52945507 lockdep_assert_held(&wq_pool_mutex);
52955508
5296
- attrs = alloc_workqueue_attrs(GFP_KERNEL);
5509
+ attrs = alloc_workqueue_attrs();
52975510 if (!attrs)
52985511 return NULL;
52995512
....@@ -5639,6 +5852,7 @@
56395852 pr_cont_pool_info(pool);
56405853 pr_cont(" stuck for %us!\n",
56415854 jiffies_to_msecs(now - pool_ts) / 1000);
5855
+ trace_android_vh_wq_lockup_pool(pool->cpu, pool_ts);
56425856 }
56435857 }
56445858
....@@ -5722,7 +5936,14 @@
57225936 return;
57235937 }
57245938
5725
- wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
5939
+ for_each_possible_cpu(cpu) {
5940
+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
5941
+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5942
+ return;
5943
+ }
5944
+ }
5945
+
5946
+ wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
57265947 BUG_ON(!wq_update_unbound_numa_attrs_buf);
57275948
57285949 /*
....@@ -5739,11 +5960,6 @@
57395960
57405961 for_each_possible_cpu(cpu) {
57415962 node = cpu_to_node(cpu);
5742
- if (WARN_ON(node == NUMA_NO_NODE)) {
5743
- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5744
- /* happens iff arch is bonkers, let's just proceed */
5745
- return;
5746
- }
57475963 cpumask_set_cpu(cpu, tbl[node]);
57485964 }
57495965
....@@ -5761,13 +5977,13 @@
57615977 * items. Actual work item execution starts only after kthreads can be
57625978 * created and scheduled right before early initcalls.
57635979 */
5764
-int __init workqueue_init_early(void)
5980
+void __init workqueue_init_early(void)
57655981 {
57665982 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
57675983 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
57685984 int i, cpu;
57695985
5770
- WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5986
+ BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
57715987
57725988 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
57735989 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
....@@ -5797,7 +6013,7 @@
57976013 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
57986014 struct workqueue_attrs *attrs;
57996015
5800
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
6016
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
58016017 attrs->nice = std_nice[i];
58026018 unbound_std_wq_attrs[i] = attrs;
58036019
....@@ -5806,7 +6022,7 @@
58066022 * guaranteed by max_active which is enforced by pwqs.
58076023 * Turn off NUMA so that dfl_pwq is used for all nodes.
58086024 */
5809
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
6025
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
58106026 attrs->nice = std_nice[i];
58116027 attrs->no_numa = true;
58126028 ordered_wq_attrs[i] = attrs;
....@@ -5828,8 +6044,6 @@
58286044 !system_unbound_wq || !system_freezable_wq ||
58296045 !system_power_efficient_wq ||
58306046 !system_freezable_power_efficient_wq);
5831
-
5832
- return 0;
58336047 }
58346048
58356049 /**
....@@ -5841,7 +6055,7 @@
58416055 * are no kworkers executing the work items yet. Populate the worker pools
58426056 * with the initial workers and enable future kworker creations.
58436057 */
5844
-int __init workqueue_init(void)
6058
+void __init workqueue_init(void)
58456059 {
58466060 struct workqueue_struct *wq;
58476061 struct worker_pool *pool;
....@@ -5888,6 +6102,4 @@
58886102
58896103 wq_online = true;
58906104 wq_watchdog_init();
5891
-
5892
- return 0;
58936105 }