hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/kernel/workqueue.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * kernel/workqueue.c - generic async execution with shared worker pool
34 *
....@@ -50,8 +51,13 @@
5051 #include <linux/sched/isolation.h>
5152 #include <linux/nmi.h>
5253 #include <linux/kvm_para.h>
54
+#include <uapi/linux/sched/types.h>
5355
5456 #include "workqueue_internal.h"
57
+
58
+#include <trace/hooks/wqlockup.h>
59
+/* events/workqueue.h uses default TRACE_INCLUDE_PATH */
60
+#undef TRACE_INCLUDE_PATH
5561
5662 enum {
5763 /*
....@@ -128,16 +134,16 @@
128134 *
129135 * PL: wq_pool_mutex protected.
130136 *
131
- * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
137
+ * PR: wq_pool_mutex protected for writes. RCU protected for reads.
132138 *
133139 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
134140 *
135141 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
136
- * sched-RCU for reads.
142
+ * RCU for reads.
137143 *
138144 * WQ: wq->mutex protected.
139145 *
140
- * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
146
+ * WR: wq->mutex protected for writes. RCU protected for reads.
141147 *
142148 * MD: wq_mayday_lock protected.
143149 */
....@@ -145,7 +151,7 @@
145151 /* struct worker is defined in workqueue_internal.h */
146152
147153 struct worker_pool {
148
- spinlock_t lock; /* the pool lock */
154
+ raw_spinlock_t lock; /* the pool lock */
149155 int cpu; /* I: the associated cpu */
150156 int node; /* I: the associated node ID */
151157 int id; /* I: pool ID */
....@@ -184,7 +190,7 @@
184190 atomic_t nr_running ____cacheline_aligned_in_smp;
185191
186192 /*
187
- * Destruction of pool is sched-RCU protected to allow dereferences
193
+ * Destruction of pool is RCU protected to allow dereferences
188194 * from get_work_pool().
189195 */
190196 struct rcu_head rcu;
....@@ -213,7 +219,7 @@
213219 /*
214220 * Release of unbound pwq is punted to system_wq. See put_pwq()
215221 * and pwq_unbound_release_workfn() for details. pool_workqueue
216
- * itself is also sched-RCU protected so that the first pwq can be
222
+ * itself is also RCU protected so that the first pwq can be
217223 * determined without grabbing wq->mutex.
218224 */
219225 struct work_struct unbound_release_work;
....@@ -248,7 +254,7 @@
248254 struct list_head flusher_overflow; /* WQ: flush overflow list */
249255
250256 struct list_head maydays; /* MD: pwqs requesting rescue */
251
- struct worker *rescuer; /* I: rescue worker */
257
+ struct worker *rescuer; /* MD: rescue worker */
252258
253259 int nr_drainers; /* WQ: drain in progress */
254260 int saved_max_active; /* WQ: saved pwq max_active */
....@@ -260,13 +266,15 @@
260266 struct wq_device *wq_dev; /* I: for sysfs interface */
261267 #endif
262268 #ifdef CONFIG_LOCKDEP
269
+ char *lock_name;
270
+ struct lock_class_key key;
263271 struct lockdep_map lockdep_map;
264272 #endif
265273 char name[WQ_NAME_LEN]; /* I: workqueue name */
266274
267275 /*
268
- * Destruction of workqueue_struct is sched-RCU protected to allow
269
- * walking the workqueues list without grabbing wq_pool_mutex.
276
+ * Destruction of workqueue_struct is RCU protected to allow walking
277
+ * the workqueues list without grabbing wq_pool_mutex.
270278 * This is used to dump all workqueues from sysrq.
271279 */
272280 struct rcu_head rcu;
....@@ -298,8 +306,9 @@
298306
299307 static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
300308 static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
301
-static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302
-static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
309
+static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
310
+/* wait for manager to go away */
311
+static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
303312
304313 static LIST_HEAD(workqueues); /* PR: list of all workqueues */
305314 static bool workqueue_freezing; /* PL: have wqs started freezing? */
....@@ -353,25 +362,24 @@
353362
354363 static int worker_thread(void *__worker);
355364 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
365
+static void show_pwq(struct pool_workqueue *pwq);
356366
357367 #define CREATE_TRACE_POINTS
358368 #include <trace/events/workqueue.h>
359369
360
-#define assert_rcu_or_pool_mutex() \
361
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
362
- !lockdep_is_held(&wq_pool_mutex), \
363
- "sched RCU or wq_pool_mutex should be held")
370
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_start);
371
+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_end);
364372
365
-#define assert_rcu_or_wq_mutex(wq) \
366
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
367
- !lockdep_is_held(&wq->mutex), \
368
- "sched RCU or wq->mutex should be held")
373
+#define assert_rcu_or_pool_mutex() \
374
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
375
+ !lockdep_is_held(&wq_pool_mutex), \
376
+ "RCU or wq_pool_mutex should be held")
369377
370378 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
371
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
379
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
372380 !lockdep_is_held(&wq->mutex) && \
373381 !lockdep_is_held(&wq_pool_mutex), \
374
- "sched RCU, wq->mutex or wq_pool_mutex should be held")
382
+ "RCU, wq->mutex or wq_pool_mutex should be held")
375383
376384 #define for_each_cpu_worker_pool(pool, cpu) \
377385 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
....@@ -383,7 +391,7 @@
383391 * @pool: iteration cursor
384392 * @pi: integer used for iteration
385393 *
386
- * This must be called either with wq_pool_mutex held or sched RCU read
394
+ * This must be called either with wq_pool_mutex held or RCU read
387395 * locked. If the pool needs to be used beyond the locking in effect, the
388396 * caller is responsible for guaranteeing that the pool stays online.
389397 *
....@@ -415,7 +423,7 @@
415423 * @pwq: iteration cursor
416424 * @wq: the target workqueue
417425 *
418
- * This must be called either with wq->mutex held or sched RCU read locked.
426
+ * This must be called either with wq->mutex held or RCU read locked.
419427 * If the pwq needs to be used beyond the locking in effect, the caller is
420428 * responsible for guaranteeing that the pwq stays online.
421429 *
....@@ -423,13 +431,12 @@
423431 * ignored.
424432 */
425433 #define for_each_pwq(pwq, wq) \
426
- list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
427
- if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
428
- else
434
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
435
+ lockdep_is_held(&(wq->mutex)))
429436
430437 #ifdef CONFIG_DEBUG_OBJECTS_WORK
431438
432
-static struct debug_obj_descr work_debug_descr;
439
+static const struct debug_obj_descr work_debug_descr;
433440
434441 static void *work_debug_hint(void *addr)
435442 {
....@@ -479,7 +486,7 @@
479486 }
480487 }
481488
482
-static struct debug_obj_descr work_debug_descr = {
489
+static const struct debug_obj_descr work_debug_descr = {
483490 .name = "work_struct",
484491 .debug_hint = work_debug_hint,
485492 .is_static_object = work_is_static_object,
....@@ -551,7 +558,7 @@
551558 * @wq: the target workqueue
552559 * @node: the node ID
553560 *
554
- * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
561
+ * This must be called with any of wq_pool_mutex, wq->mutex or RCU
555562 * read locked.
556563 * If the pwq needs to be used beyond the locking in effect, the caller is
557564 * responsible for guaranteeing that the pwq stays online.
....@@ -647,7 +654,7 @@
647654 * The following mb guarantees that previous clear of a PENDING bit
648655 * will not be reordered with any speculative LOADS or STORES from
649656 * work->current_func, which is executed afterwards. This possible
650
- * reordering can lead to a missed execution on attempt to qeueue
657
+ * reordering can lead to a missed execution on attempt to queue
651658 * the same @work. E.g. consider this case:
652659 *
653660 * CPU#0 CPU#1
....@@ -680,12 +687,17 @@
680687 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
681688 }
682689
690
+static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
691
+{
692
+ return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
693
+}
694
+
683695 static struct pool_workqueue *get_work_pwq(struct work_struct *work)
684696 {
685697 unsigned long data = atomic_long_read(&work->data);
686698
687699 if (data & WORK_STRUCT_PWQ)
688
- return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
700
+ return work_struct_pwq(data);
689701 else
690702 return NULL;
691703 }
....@@ -695,8 +707,8 @@
695707 * @work: the work item of interest
696708 *
697709 * Pools are created and destroyed under wq_pool_mutex, and allows read
698
- * access under sched-RCU read lock. As such, this function should be
699
- * called under wq_pool_mutex or with preemption disabled.
710
+ * access under RCU read lock. As such, this function should be
711
+ * called under wq_pool_mutex or inside of a rcu_read_lock() region.
700712 *
701713 * All fields of the returned pool are accessible as long as the above
702714 * mentioned locking is in effect. If the returned pool needs to be used
....@@ -713,8 +725,7 @@
713725 assert_rcu_or_pool_mutex();
714726
715727 if (data & WORK_STRUCT_PWQ)
716
- return ((struct pool_workqueue *)
717
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
728
+ return work_struct_pwq(data)->pool;
718729
719730 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
720731 if (pool_id == WORK_OFFQ_POOL_NONE)
....@@ -735,8 +746,7 @@
735746 unsigned long data = atomic_long_read(&work->data);
736747
737748 if (data & WORK_STRUCT_PWQ)
738
- return ((struct pool_workqueue *)
739
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
749
+ return work_struct_pwq(data)->pool->id;
740750
741751 return data >> WORK_OFFQ_POOL_SHIFT;
742752 }
....@@ -829,7 +839,7 @@
829839 * Wake up the first idle worker of @pool.
830840 *
831841 * CONTEXT:
832
- * spin_lock_irq(pool->lock).
842
+ * raw_spin_lock_irq(pool->lock).
833843 */
834844 static void wake_up_worker(struct worker_pool *pool)
835845 {
....@@ -840,43 +850,42 @@
840850 }
841851
842852 /**
843
- * wq_worker_waking_up - a worker is waking up
853
+ * wq_worker_running - a worker is running again
844854 * @task: task waking up
845
- * @cpu: CPU @task is waking up to
846855 *
847
- * This function is called during try_to_wake_up() when a worker is
848
- * being awoken.
849
- *
850
- * CONTEXT:
851
- * spin_lock_irq(rq->lock)
856
+ * This function is called when a worker returns from schedule()
852857 */
853
-void wq_worker_waking_up(struct task_struct *task, int cpu)
858
+void wq_worker_running(struct task_struct *task)
854859 {
855860 struct worker *worker = kthread_data(task);
856861
857
- if (!(worker->flags & WORKER_NOT_RUNNING)) {
858
- WARN_ON_ONCE(worker->pool->cpu != cpu);
862
+ if (!worker->sleeping)
863
+ return;
864
+
865
+ /*
866
+ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
867
+ * and the nr_running increment below, we may ruin the nr_running reset
868
+ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
869
+ * pool. Protect against such race.
870
+ */
871
+ preempt_disable();
872
+ if (!(worker->flags & WORKER_NOT_RUNNING))
859873 atomic_inc(&worker->pool->nr_running);
860
- }
874
+ preempt_enable();
875
+ worker->sleeping = 0;
861876 }
862877
863878 /**
864879 * wq_worker_sleeping - a worker is going to sleep
865880 * @task: task going to sleep
866881 *
867
- * This function is called during schedule() when a busy worker is
868
- * going to sleep. Worker on the same cpu can be woken up by
869
- * returning pointer to its task.
870
- *
871
- * CONTEXT:
872
- * spin_lock_irq(rq->lock)
873
- *
874
- * Return:
875
- * Worker task on @cpu to wake up, %NULL if none.
882
+ * This function is called from schedule() when a busy worker is
883
+ * going to sleep. Preemption needs to be disabled to protect ->sleeping
884
+ * assignment.
876885 */
877
-struct task_struct *wq_worker_sleeping(struct task_struct *task)
886
+void wq_worker_sleeping(struct task_struct *task)
878887 {
879
- struct worker *worker = kthread_data(task), *to_wakeup = NULL;
888
+ struct worker *next, *worker = kthread_data(task);
880889 struct worker_pool *pool;
881890
882891 /*
....@@ -885,13 +894,16 @@
885894 * checking NOT_RUNNING.
886895 */
887896 if (worker->flags & WORKER_NOT_RUNNING)
888
- return NULL;
897
+ return;
889898
890899 pool = worker->pool;
891900
892
- /* this can only happen on the local cpu */
893
- if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
894
- return NULL;
901
+ /* Return if preempted before wq_worker_running() was reached */
902
+ if (worker->sleeping)
903
+ return;
904
+
905
+ worker->sleeping = 1;
906
+ raw_spin_lock_irq(&pool->lock);
895907
896908 /*
897909 * The counterpart of the following dec_and_test, implied mb,
....@@ -905,19 +917,33 @@
905917 * lock is safe.
906918 */
907919 if (atomic_dec_and_test(&pool->nr_running) &&
908
- !list_empty(&pool->worklist))
909
- to_wakeup = first_idle_worker(pool);
910
- return to_wakeup ? to_wakeup->task : NULL;
920
+ !list_empty(&pool->worklist)) {
921
+ next = first_idle_worker(pool);
922
+ if (next)
923
+ wake_up_process(next->task);
924
+ }
925
+ raw_spin_unlock_irq(&pool->lock);
911926 }
912927
913928 /**
914929 * wq_worker_last_func - retrieve worker's last work function
930
+ * @task: Task to retrieve last work function of.
915931 *
916932 * Determine the last function a worker executed. This is called from
917933 * the scheduler to get a worker's last known identity.
918934 *
919935 * CONTEXT:
920
- * spin_lock_irq(rq->lock)
936
+ * raw_spin_lock_irq(rq->lock)
937
+ *
938
+ * This function is called during schedule() when a kworker is going
939
+ * to sleep. It's used by psi to identify aggregation workers during
940
+ * dequeuing, to allow periodic aggregation to shut-off when that
941
+ * worker is the last task in the system or cgroup to go to sleep.
942
+ *
943
+ * As this function doesn't involve any workqueue-related locking, it
944
+ * only returns stable values when called from inside the scheduler's
945
+ * queuing and dequeuing paths, when @task, which must be a kworker,
946
+ * is guaranteed to not be processing any works.
921947 *
922948 * Return:
923949 * The last work function %current executed as a worker, NULL if it
....@@ -938,7 +964,7 @@
938964 * Set @flags in @worker->flags and adjust nr_running accordingly.
939965 *
940966 * CONTEXT:
941
- * spin_lock_irq(pool->lock)
967
+ * raw_spin_lock_irq(pool->lock)
942968 */
943969 static inline void worker_set_flags(struct worker *worker, unsigned int flags)
944970 {
....@@ -963,7 +989,7 @@
963989 * Clear @flags in @worker->flags and adjust nr_running accordingly.
964990 *
965991 * CONTEXT:
966
- * spin_lock_irq(pool->lock)
992
+ * raw_spin_lock_irq(pool->lock)
967993 */
968994 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
969995 {
....@@ -1011,7 +1037,7 @@
10111037 * actually occurs, it should be easy to locate the culprit work function.
10121038 *
10131039 * CONTEXT:
1014
- * spin_lock_irq(pool->lock).
1040
+ * raw_spin_lock_irq(pool->lock).
10151041 *
10161042 * Return:
10171043 * Pointer to worker which is executing @work if found, %NULL
....@@ -1046,7 +1072,7 @@
10461072 * nested inside outer list_for_each_entry_safe().
10471073 *
10481074 * CONTEXT:
1049
- * spin_lock_irq(pool->lock).
1075
+ * raw_spin_lock_irq(pool->lock).
10501076 */
10511077 static void move_linked_works(struct work_struct *work, struct list_head *head,
10521078 struct work_struct **nextp)
....@@ -1121,12 +1147,12 @@
11211147 {
11221148 if (pwq) {
11231149 /*
1124
- * As both pwqs and pools are sched-RCU protected, the
1150
+ * As both pwqs and pools are RCU protected, the
11251151 * following lock operations are safe.
11261152 */
1127
- spin_lock_irq(&pwq->pool->lock);
1153
+ raw_spin_lock_irq(&pwq->pool->lock);
11281154 put_pwq(pwq);
1129
- spin_unlock_irq(&pwq->pool->lock);
1155
+ raw_spin_unlock_irq(&pwq->pool->lock);
11301156 }
11311157 }
11321158
....@@ -1159,7 +1185,7 @@
11591185 * decrement nr_in_flight of its pwq and handle workqueue flushing.
11601186 *
11611187 * CONTEXT:
1162
- * spin_lock_irq(pool->lock).
1188
+ * raw_spin_lock_irq(pool->lock).
11631189 */
11641190 static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
11651191 {
....@@ -1207,11 +1233,14 @@
12071233 * stable state - idle, on timer or on worklist.
12081234 *
12091235 * Return:
1236
+ *
1237
+ * ======== ================================================================
12101238 * 1 if @work was pending and we successfully stole PENDING
12111239 * 0 if @work was idle and we claimed PENDING
12121240 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
12131241 * -ENOENT if someone else is canceling @work, this state may persist
12141242 * for arbitrarily long
1243
+ * ======== ================================================================
12151244 *
12161245 * Note:
12171246 * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
....@@ -1249,6 +1278,7 @@
12491278 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
12501279 return 0;
12511280
1281
+ rcu_read_lock();
12521282 /*
12531283 * The queueing is in progress, or it is already queued. Try to
12541284 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
....@@ -1257,7 +1287,7 @@
12571287 if (!pool)
12581288 goto fail;
12591289
1260
- spin_lock(&pool->lock);
1290
+ raw_spin_lock(&pool->lock);
12611291 /*
12621292 * work->data is guaranteed to point to pwq only while the work
12631293 * item is queued on pwq->wq, and both updating work->data to point
....@@ -1286,11 +1316,13 @@
12861316 /* work->data points to pwq iff queued, point to pool */
12871317 set_work_pool_and_keep_pending(work, pool->id);
12881318
1289
- spin_unlock(&pool->lock);
1319
+ raw_spin_unlock(&pool->lock);
1320
+ rcu_read_unlock();
12901321 return 1;
12911322 }
1292
- spin_unlock(&pool->lock);
1323
+ raw_spin_unlock(&pool->lock);
12931324 fail:
1325
+ rcu_read_unlock();
12941326 local_irq_restore(*flags);
12951327 if (work_is_canceling(work))
12961328 return -ENOENT;
....@@ -1309,12 +1341,15 @@
13091341 * work_struct flags.
13101342 *
13111343 * CONTEXT:
1312
- * spin_lock_irq(pool->lock).
1344
+ * raw_spin_lock_irq(pool->lock).
13131345 */
13141346 static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
13151347 struct list_head *head, unsigned int extra_flags)
13161348 {
13171349 struct worker_pool *pool = pwq->pool;
1350
+
1351
+ /* record the work call stack in order to print it in KASAN reports */
1352
+ kasan_record_aux_stack(work);
13181353
13191354 /* we own @work, set data and link */
13201355 set_work_pwq(work, pwq, extra_flags);
....@@ -1342,7 +1377,7 @@
13421377
13431378 worker = current_wq_worker();
13441379 /*
1345
- * Return %true iff I'm a worker execuing a work item on @wq. If
1380
+ * Return %true iff I'm a worker executing a work item on @wq. If
13461381 * I'm @worker, it's safe to dereference it without locking.
13471382 */
13481383 return worker && worker->current_pwq->wq == wq;
....@@ -1403,6 +1438,7 @@
14031438 if (unlikely(wq->flags & __WQ_DRAINING) &&
14041439 WARN_ON_ONCE(!is_chained_work(wq)))
14051440 return;
1441
+ rcu_read_lock();
14061442 retry:
14071443 /* pwq which will be used unless @work is executing elsewhere */
14081444 if (wq->flags & WQ_UNBOUND) {
....@@ -1424,7 +1460,7 @@
14241460 if (last_pool && last_pool != pwq->pool) {
14251461 struct worker *worker;
14261462
1427
- spin_lock(&last_pool->lock);
1463
+ raw_spin_lock(&last_pool->lock);
14281464
14291465 worker = find_worker_executing_work(last_pool, work);
14301466
....@@ -1432,11 +1468,11 @@
14321468 pwq = worker->current_pwq;
14331469 } else {
14341470 /* meh... not running there, queue here */
1435
- spin_unlock(&last_pool->lock);
1436
- spin_lock(&pwq->pool->lock);
1471
+ raw_spin_unlock(&last_pool->lock);
1472
+ raw_spin_lock(&pwq->pool->lock);
14371473 }
14381474 } else {
1439
- spin_lock(&pwq->pool->lock);
1475
+ raw_spin_lock(&pwq->pool->lock);
14401476 }
14411477
14421478 /*
....@@ -1449,7 +1485,7 @@
14491485 */
14501486 if (unlikely(!pwq->refcnt)) {
14511487 if (wq->flags & WQ_UNBOUND) {
1452
- spin_unlock(&pwq->pool->lock);
1488
+ raw_spin_unlock(&pwq->pool->lock);
14531489 cpu_relax();
14541490 goto retry;
14551491 }
....@@ -1461,10 +1497,8 @@
14611497 /* pwq determined, queue */
14621498 trace_workqueue_queue_work(req_cpu, pwq, work);
14631499
1464
- if (WARN_ON(!list_empty(&work->entry))) {
1465
- spin_unlock(&pwq->pool->lock);
1466
- return;
1467
- }
1500
+ if (WARN_ON(!list_empty(&work->entry)))
1501
+ goto out;
14681502
14691503 pwq->nr_in_flight[pwq->work_color]++;
14701504 work_flags = work_color_to_flags(pwq->work_color);
....@@ -1483,7 +1517,9 @@
14831517 debug_work_activate(work);
14841518 insert_work(pwq, work, worklist, work_flags);
14851519
1486
- spin_unlock(&pwq->pool->lock);
1520
+out:
1521
+ raw_spin_unlock(&pwq->pool->lock);
1522
+ rcu_read_unlock();
14871523 }
14881524
14891525 /**
....@@ -1515,6 +1551,90 @@
15151551 }
15161552 EXPORT_SYMBOL(queue_work_on);
15171553
1554
+/**
1555
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
1556
+ * @node: NUMA node ID that we want to select a CPU from
1557
+ *
1558
+ * This function will attempt to find a "random" cpu available on a given
1559
+ * node. If there are no CPUs available on the given node it will return
1560
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
1561
+ * available CPU if we need to schedule this work.
1562
+ */
1563
+static int workqueue_select_cpu_near(int node)
1564
+{
1565
+ int cpu;
1566
+
1567
+ /* No point in doing this if NUMA isn't enabled for workqueues */
1568
+ if (!wq_numa_enabled)
1569
+ return WORK_CPU_UNBOUND;
1570
+
1571
+ /* Delay binding to CPU if node is not valid or online */
1572
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1573
+ return WORK_CPU_UNBOUND;
1574
+
1575
+ /* Use local node/cpu if we are already there */
1576
+ cpu = raw_smp_processor_id();
1577
+ if (node == cpu_to_node(cpu))
1578
+ return cpu;
1579
+
1580
+ /* Use "random" otherwise know as "first" online CPU of node */
1581
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1582
+
1583
+ /* If CPU is valid return that, otherwise just defer */
1584
+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1585
+}
1586
+
1587
+/**
1588
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
1589
+ * @node: NUMA node that we are targeting the work for
1590
+ * @wq: workqueue to use
1591
+ * @work: work to queue
1592
+ *
1593
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
1594
+ * idea here is to provide a way to somehow associate work with a given
1595
+ * NUMA node.
1596
+ *
1597
+ * This function will only make a best effort attempt at getting this onto
1598
+ * the right NUMA node. If no node is requested or the requested node is
1599
+ * offline then we just fall back to standard queue_work behavior.
1600
+ *
1601
+ * Currently the "random" CPU ends up being the first available CPU in the
1602
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
1603
+ * are running on the node. In that case we just use the current CPU.
1604
+ *
1605
+ * Return: %false if @work was already on a queue, %true otherwise.
1606
+ */
1607
+bool queue_work_node(int node, struct workqueue_struct *wq,
1608
+ struct work_struct *work)
1609
+{
1610
+ unsigned long flags;
1611
+ bool ret = false;
1612
+
1613
+ /*
1614
+ * This current implementation is specific to unbound workqueues.
1615
+ * Specifically we only return the first available CPU for a given
1616
+ * node instead of cycling through individual CPUs within the node.
1617
+ *
1618
+ * If this is used with a per-cpu workqueue then the logic in
1619
+ * workqueue_select_cpu_near would need to be updated to allow for
1620
+ * some round robin type logic.
1621
+ */
1622
+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1623
+
1624
+ local_irq_save(flags);
1625
+
1626
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1627
+ int cpu = workqueue_select_cpu_near(node);
1628
+
1629
+ __queue_work(cpu, wq, work);
1630
+ ret = true;
1631
+ }
1632
+
1633
+ local_irq_restore(flags);
1634
+ return ret;
1635
+}
1636
+EXPORT_SYMBOL_GPL(queue_work_node);
1637
+
15181638 void delayed_work_timer_fn(struct timer_list *t)
15191639 {
15201640 struct delayed_work *dwork = from_timer(dwork, t, timer);
....@@ -1531,9 +1651,14 @@
15311651 struct work_struct *work = &dwork->work;
15321652
15331653 WARN_ON_ONCE(!wq);
1534
-#ifndef CONFIG_CFI_CLANG
1535
- WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1536
-#endif
1654
+ /*
1655
+ * With CFI, timer->function can point to a jump table entry in a module,
1656
+ * which fails the comparison. Disable the warning if CFI and modules are
1657
+ * both enabled.
1658
+ */
1659
+ if (!IS_ENABLED(CONFIG_CFI_CLANG) || !IS_ENABLED(CONFIG_MODULES))
1660
+ WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1661
+
15371662 WARN_ON_ONCE(timer_pending(timer));
15381663 WARN_ON_ONCE(!list_empty(&work->entry));
15391664
....@@ -1644,7 +1769,7 @@
16441769 *
16451770 * Return: %false if @rwork was already pending, %true otherwise. Note
16461771 * that a full RCU grace period is guaranteed only after a %true return.
1647
- * While @rwork is guarnateed to be executed after a %false return, the
1772
+ * While @rwork is guaranteed to be executed after a %false return, the
16481773 * execution may happen before a full RCU grace period has passed.
16491774 */
16501775 bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
....@@ -1669,7 +1794,7 @@
16691794 * necessary.
16701795 *
16711796 * LOCKING:
1672
- * spin_lock_irq(pool->lock).
1797
+ * raw_spin_lock_irq(pool->lock).
16731798 */
16741799 static void worker_enter_idle(struct worker *worker)
16751800 {
....@@ -1709,7 +1834,7 @@
17091834 * @worker is leaving idle state. Update stats.
17101835 *
17111836 * LOCKING:
1712
- * spin_lock_irq(pool->lock).
1837
+ * raw_spin_lock_irq(pool->lock).
17131838 */
17141839 static void worker_leave_idle(struct worker *worker)
17151840 {
....@@ -1838,17 +1963,26 @@
18381963 goto fail;
18391964
18401965 set_user_nice(worker->task, pool->attrs->nice);
1966
+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO)) {
1967
+ struct sched_param param;
1968
+
1969
+ if (pool->attrs->nice == 0)
1970
+ param.sched_priority = MAX_RT_PRIO / 2 - 4;
1971
+ else
1972
+ param.sched_priority = MAX_RT_PRIO / 2 - 2;
1973
+ sched_setscheduler_nocheck(worker->task, SCHED_RR, &param);
1974
+ }
18411975 kthread_bind_mask(worker->task, pool->attrs->cpumask);
18421976
18431977 /* successful, attach the worker to the pool */
18441978 worker_attach_to_pool(worker, pool);
18451979
18461980 /* start the newly created worker */
1847
- spin_lock_irq(&pool->lock);
1981
+ raw_spin_lock_irq(&pool->lock);
18481982 worker->pool->nr_workers++;
18491983 worker_enter_idle(worker);
18501984 wake_up_process(worker->task);
1851
- spin_unlock_irq(&pool->lock);
1985
+ raw_spin_unlock_irq(&pool->lock);
18521986
18531987 return worker;
18541988
....@@ -1867,7 +2001,7 @@
18672001 * be idle.
18682002 *
18692003 * CONTEXT:
1870
- * spin_lock_irq(pool->lock).
2004
+ * raw_spin_lock_irq(pool->lock).
18712005 */
18722006 static void destroy_worker(struct worker *worker)
18732007 {
....@@ -1893,7 +2027,7 @@
18932027 {
18942028 struct worker_pool *pool = from_timer(pool, t, idle_timer);
18952029
1896
- spin_lock_irq(&pool->lock);
2030
+ raw_spin_lock_irq(&pool->lock);
18972031
18982032 while (too_many_workers(pool)) {
18992033 struct worker *worker;
....@@ -1911,7 +2045,7 @@
19112045 destroy_worker(worker);
19122046 }
19132047
1914
- spin_unlock_irq(&pool->lock);
2048
+ raw_spin_unlock_irq(&pool->lock);
19152049 }
19162050
19172051 static void send_mayday(struct work_struct *work)
....@@ -1942,8 +2076,8 @@
19422076 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
19432077 struct work_struct *work;
19442078
1945
- spin_lock_irq(&pool->lock);
1946
- spin_lock(&wq_mayday_lock); /* for wq->maydays */
2079
+ raw_spin_lock_irq(&pool->lock);
2080
+ raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */
19472081
19482082 if (need_to_create_worker(pool)) {
19492083 /*
....@@ -1956,8 +2090,8 @@
19562090 send_mayday(work);
19572091 }
19582092
1959
- spin_unlock(&wq_mayday_lock);
1960
- spin_unlock_irq(&pool->lock);
2093
+ raw_spin_unlock(&wq_mayday_lock);
2094
+ raw_spin_unlock_irq(&pool->lock);
19612095
19622096 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
19632097 }
....@@ -1976,7 +2110,7 @@
19762110 * may_start_working() %true.
19772111 *
19782112 * LOCKING:
1979
- * spin_lock_irq(pool->lock) which may be released and regrabbed
2113
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
19802114 * multiple times. Does GFP_KERNEL allocations. Called only from
19812115 * manager.
19822116 */
....@@ -1985,7 +2119,7 @@
19852119 __acquires(&pool->lock)
19862120 {
19872121 restart:
1988
- spin_unlock_irq(&pool->lock);
2122
+ raw_spin_unlock_irq(&pool->lock);
19892123
19902124 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
19912125 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
....@@ -2001,7 +2135,7 @@
20012135 }
20022136
20032137 del_timer_sync(&pool->mayday_timer);
2004
- spin_lock_irq(&pool->lock);
2138
+ raw_spin_lock_irq(&pool->lock);
20052139 /*
20062140 * This is necessary even after a new worker was just successfully
20072141 * created as @pool->lock was dropped and the new worker might have
....@@ -2024,7 +2158,7 @@
20242158 * and may_start_working() is true.
20252159 *
20262160 * CONTEXT:
2027
- * spin_lock_irq(pool->lock) which may be released and regrabbed
2161
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
20282162 * multiple times. Does GFP_KERNEL allocations.
20292163 *
20302164 * Return:
....@@ -2047,7 +2181,7 @@
20472181
20482182 pool->manager = NULL;
20492183 pool->flags &= ~POOL_MANAGER_ACTIVE;
2050
- wake_up(&wq_manager_wait);
2184
+ rcuwait_wake_up(&manager_wait);
20512185 return true;
20522186 }
20532187
....@@ -2063,7 +2197,7 @@
20632197 * call this function to process a work.
20642198 *
20652199 * CONTEXT:
2066
- * spin_lock_irq(pool->lock) which is released and regrabbed.
2200
+ * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
20672201 */
20682202 static void process_one_work(struct worker *worker, struct work_struct *work)
20692203 __releases(&pool->lock)
....@@ -2145,7 +2279,7 @@
21452279 */
21462280 set_work_pool_and_clear_pending(work, pool->id);
21472281
2148
- spin_unlock_irq(&pool->lock);
2282
+ raw_spin_unlock_irq(&pool->lock);
21492283
21502284 lock_map_acquire(&pwq->wq->lockdep_map);
21512285 lock_map_acquire(&lockdep_map);
....@@ -2177,13 +2311,13 @@
21772311 * While we must be careful to not use "work" after this, the trace
21782312 * point will only record its address.
21792313 */
2180
- trace_workqueue_execute_end(work);
2314
+ trace_workqueue_execute_end(work, worker->current_func);
21812315 lock_map_release(&lockdep_map);
21822316 lock_map_release(&pwq->wq->lockdep_map);
21832317
21842318 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
21852319 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2186
- " last function: %pf\n",
2320
+ " last function: %ps\n",
21872321 current->comm, preempt_count(), task_pid_nr(current),
21882322 worker->current_func);
21892323 debug_show_held_locks(current);
....@@ -2191,7 +2325,7 @@
21912325 }
21922326
21932327 /*
2194
- * The following prevents a kworker from hogging CPU on !PREEMPT
2328
+ * The following prevents a kworker from hogging CPU on !PREEMPTION
21952329 * kernels, where a requeueing work item waiting for something to
21962330 * happen could deadlock with stop_machine as such work item could
21972331 * indefinitely requeue itself while all other CPUs are trapped in
....@@ -2200,7 +2334,7 @@
22002334 */
22012335 cond_resched();
22022336
2203
- spin_lock_irq(&pool->lock);
2337
+ raw_spin_lock_irq(&pool->lock);
22042338
22052339 /* clear cpu intensive status */
22062340 if (unlikely(cpu_intensive))
....@@ -2226,7 +2360,7 @@
22262360 * fetches a work from the top and executes it.
22272361 *
22282362 * CONTEXT:
2229
- * spin_lock_irq(pool->lock) which may be released and regrabbed
2363
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
22302364 * multiple times.
22312365 */
22322366 static void process_scheduled_works(struct worker *worker)
....@@ -2268,11 +2402,11 @@
22682402 /* tell the scheduler that this is a workqueue worker */
22692403 set_pf_worker(true);
22702404 woke_up:
2271
- spin_lock_irq(&pool->lock);
2405
+ raw_spin_lock_irq(&pool->lock);
22722406
22732407 /* am I supposed to die? */
22742408 if (unlikely(worker->flags & WORKER_DIE)) {
2275
- spin_unlock_irq(&pool->lock);
2409
+ raw_spin_unlock_irq(&pool->lock);
22762410 WARN_ON_ONCE(!list_empty(&worker->entry));
22772411 set_pf_worker(false);
22782412
....@@ -2338,7 +2472,7 @@
23382472 */
23392473 worker_enter_idle(worker);
23402474 __set_current_state(TASK_IDLE);
2341
- spin_unlock_irq(&pool->lock);
2475
+ raw_spin_unlock_irq(&pool->lock);
23422476 schedule();
23432477 goto woke_up;
23442478 }
....@@ -2392,7 +2526,7 @@
23922526 should_stop = kthread_should_stop();
23932527
23942528 /* see whether any pwq is asking for help */
2395
- spin_lock_irq(&wq_mayday_lock);
2529
+ raw_spin_lock_irq(&wq_mayday_lock);
23962530
23972531 while (!list_empty(&wq->maydays)) {
23982532 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
....@@ -2404,11 +2538,11 @@
24042538 __set_current_state(TASK_RUNNING);
24052539 list_del_init(&pwq->mayday_node);
24062540
2407
- spin_unlock_irq(&wq_mayday_lock);
2541
+ raw_spin_unlock_irq(&wq_mayday_lock);
24082542
24092543 worker_attach_to_pool(rescuer, pool);
24102544
2411
- spin_lock_irq(&pool->lock);
2545
+ raw_spin_lock_irq(&pool->lock);
24122546
24132547 /*
24142548 * Slurp in all works issued via this workqueue and
....@@ -2436,8 +2570,8 @@
24362570 * being used to relieve memory pressure, don't
24372571 * incur MAYDAY_INTERVAL delay inbetween.
24382572 */
2439
- if (need_to_create_worker(pool)) {
2440
- spin_lock(&wq_mayday_lock);
2573
+ if (pwq->nr_active && need_to_create_worker(pool)) {
2574
+ raw_spin_lock(&wq_mayday_lock);
24412575 /*
24422576 * Queue iff we aren't racing destruction
24432577 * and somebody else hasn't queued it already.
....@@ -2446,7 +2580,7 @@
24462580 get_pwq(pwq);
24472581 list_add_tail(&pwq->mayday_node, &wq->maydays);
24482582 }
2449
- spin_unlock(&wq_mayday_lock);
2583
+ raw_spin_unlock(&wq_mayday_lock);
24502584 }
24512585 }
24522586
....@@ -2464,14 +2598,14 @@
24642598 if (need_more_worker(pool))
24652599 wake_up_worker(pool);
24662600
2467
- spin_unlock_irq(&pool->lock);
2601
+ raw_spin_unlock_irq(&pool->lock);
24682602
24692603 worker_detach_from_pool(rescuer);
24702604
2471
- spin_lock_irq(&wq_mayday_lock);
2605
+ raw_spin_lock_irq(&wq_mayday_lock);
24722606 }
24732607
2474
- spin_unlock_irq(&wq_mayday_lock);
2608
+ raw_spin_unlock_irq(&wq_mayday_lock);
24752609
24762610 if (should_stop) {
24772611 __set_current_state(TASK_RUNNING);
....@@ -2508,11 +2642,11 @@
25082642 worker = current_wq_worker();
25092643
25102644 WARN_ONCE(current->flags & PF_MEMALLOC,
2511
- "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2645
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
25122646 current->pid, current->comm, target_wq->name, target_func);
25132647 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
25142648 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2515
- "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2649
+ "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
25162650 worker->current_pwq->wq->name, worker->current_func,
25172651 target_wq->name, target_func);
25182652 }
....@@ -2551,7 +2685,7 @@
25512685 * underneath us, so we can't reliably determine pwq from @target.
25522686 *
25532687 * CONTEXT:
2554
- * spin_lock_irq(pool->lock).
2688
+ * raw_spin_lock_irq(pool->lock).
25552689 */
25562690 static void insert_wq_barrier(struct pool_workqueue *pwq,
25572691 struct wq_barrier *barr,
....@@ -2638,7 +2772,7 @@
26382772 for_each_pwq(pwq, wq) {
26392773 struct worker_pool *pool = pwq->pool;
26402774
2641
- spin_lock_irq(&pool->lock);
2775
+ raw_spin_lock_irq(&pool->lock);
26422776
26432777 if (flush_color >= 0) {
26442778 WARN_ON_ONCE(pwq->flush_color != -1);
....@@ -2655,7 +2789,7 @@
26552789 pwq->work_color = work_color;
26562790 }
26572791
2658
- spin_unlock_irq(&pool->lock);
2792
+ raw_spin_unlock_irq(&pool->lock);
26592793 }
26602794
26612795 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
....@@ -2743,7 +2877,7 @@
27432877 * First flushers are responsible for cascading flushes and
27442878 * handling overflow. Non-first flushers can simply return.
27452879 */
2746
- if (wq->first_flusher != &this_flusher)
2880
+ if (READ_ONCE(wq->first_flusher) != &this_flusher)
27472881 return;
27482882
27492883 mutex_lock(&wq->mutex);
....@@ -2752,7 +2886,7 @@
27522886 if (wq->first_flusher != &this_flusher)
27532887 goto out_unlock;
27542888
2755
- wq->first_flusher = NULL;
2889
+ WRITE_ONCE(wq->first_flusher, NULL);
27562890
27572891 WARN_ON_ONCE(!list_empty(&this_flusher.list));
27582892 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
....@@ -2855,9 +2989,9 @@
28552989 for_each_pwq(pwq, wq) {
28562990 bool drained;
28572991
2858
- spin_lock_irq(&pwq->pool->lock);
2992
+ raw_spin_lock_irq(&pwq->pool->lock);
28592993 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2860
- spin_unlock_irq(&pwq->pool->lock);
2994
+ raw_spin_unlock_irq(&pwq->pool->lock);
28612995
28622996 if (drained)
28632997 continue;
....@@ -2886,14 +3020,14 @@
28863020
28873021 might_sleep();
28883022
2889
- local_irq_disable();
3023
+ rcu_read_lock();
28903024 pool = get_work_pool(work);
28913025 if (!pool) {
2892
- local_irq_enable();
3026
+ rcu_read_unlock();
28933027 return false;
28943028 }
28953029
2896
- spin_lock(&pool->lock);
3030
+ raw_spin_lock_irq(&pool->lock);
28973031 /* see the comment in try_to_grab_pending() with the same code */
28983032 pwq = get_work_pwq(work);
28993033 if (pwq) {
....@@ -2909,7 +3043,7 @@
29093043 check_flush_dependency(pwq->wq, work);
29103044
29113045 insert_wq_barrier(pwq, barr, work, worker);
2912
- spin_unlock_irq(&pool->lock);
3046
+ raw_spin_unlock_irq(&pool->lock);
29133047
29143048 /*
29153049 * Force a lock recursion deadlock when using flush_work() inside a
....@@ -2925,10 +3059,11 @@
29253059 lock_map_acquire(&pwq->wq->lockdep_map);
29263060 lock_map_release(&pwq->wq->lockdep_map);
29273061 }
2928
-
3062
+ rcu_read_unlock();
29293063 return true;
29303064 already_gone:
2931
- spin_unlock_irq(&pool->lock);
3065
+ raw_spin_unlock_irq(&pool->lock);
3066
+ rcu_read_unlock();
29323067 return false;
29333068 }
29343069
....@@ -2942,10 +3077,8 @@
29423077 if (WARN_ON(!work->func))
29433078 return false;
29443079
2945
- if (!from_cancel) {
2946
- lock_map_acquire(&work->lockdep_map);
2947
- lock_map_release(&work->lockdep_map);
2948
- }
3080
+ lock_map_acquire(&work->lockdep_map);
3081
+ lock_map_release(&work->lockdep_map);
29493082
29503083 if (start_flush_work(work, &barr, from_cancel)) {
29513084 wait_for_completion(&barr.done);
....@@ -3250,21 +3383,20 @@
32503383
32513384 /**
32523385 * alloc_workqueue_attrs - allocate a workqueue_attrs
3253
- * @gfp_mask: allocation mask to use
32543386 *
32553387 * Allocate a new workqueue_attrs, initialize with default settings and
32563388 * return it.
32573389 *
32583390 * Return: The allocated new workqueue_attr on success. %NULL on failure.
32593391 */
3260
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3392
+struct workqueue_attrs *alloc_workqueue_attrs(void)
32613393 {
32623394 struct workqueue_attrs *attrs;
32633395
3264
- attrs = kzalloc(sizeof(*attrs), gfp_mask);
3396
+ attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
32653397 if (!attrs)
32663398 goto fail;
3267
- if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3399
+ if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
32683400 goto fail;
32693401
32703402 cpumask_copy(attrs->cpumask, cpu_possible_mask);
....@@ -3321,7 +3453,7 @@
33213453 */
33223454 static int init_worker_pool(struct worker_pool *pool)
33233455 {
3324
- spin_lock_init(&pool->lock);
3456
+ raw_spin_lock_init(&pool->lock);
33253457 pool->id = -1;
33263458 pool->cpu = -1;
33273459 pool->node = NUMA_NO_NODE;
....@@ -3342,23 +3474,62 @@
33423474 pool->refcnt = 1;
33433475
33443476 /* shouldn't fail above this point */
3345
- pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3477
+ pool->attrs = alloc_workqueue_attrs();
33463478 if (!pool->attrs)
33473479 return -ENOMEM;
33483480 return 0;
33493481 }
3482
+
3483
+#ifdef CONFIG_LOCKDEP
3484
+static void wq_init_lockdep(struct workqueue_struct *wq)
3485
+{
3486
+ char *lock_name;
3487
+
3488
+ lockdep_register_key(&wq->key);
3489
+ lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3490
+ if (!lock_name)
3491
+ lock_name = wq->name;
3492
+
3493
+ wq->lock_name = lock_name;
3494
+ lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3495
+}
3496
+
3497
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3498
+{
3499
+ lockdep_unregister_key(&wq->key);
3500
+}
3501
+
3502
+static void wq_free_lockdep(struct workqueue_struct *wq)
3503
+{
3504
+ if (wq->lock_name != wq->name)
3505
+ kfree(wq->lock_name);
3506
+}
3507
+#else
3508
+static void wq_init_lockdep(struct workqueue_struct *wq)
3509
+{
3510
+}
3511
+
3512
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
3513
+{
3514
+}
3515
+
3516
+static void wq_free_lockdep(struct workqueue_struct *wq)
3517
+{
3518
+}
3519
+#endif
33503520
33513521 static void rcu_free_wq(struct rcu_head *rcu)
33523522 {
33533523 struct workqueue_struct *wq =
33543524 container_of(rcu, struct workqueue_struct, rcu);
33553525
3526
+ wq_free_lockdep(wq);
3527
+
33563528 if (!(wq->flags & WQ_UNBOUND))
33573529 free_percpu(wq->cpu_pwqs);
33583530 else
33593531 free_workqueue_attrs(wq->unbound_attrs);
33603532
3361
- kfree(wq->rescuer);
33623533 kfree(wq);
33633534 }
33643535
....@@ -3371,11 +3542,23 @@
33713542 kfree(pool);
33723543 }
33733544
3545
+/* This returns with the lock held on success (pool manager is inactive). */
3546
+static bool wq_manager_inactive(struct worker_pool *pool)
3547
+{
3548
+ raw_spin_lock_irq(&pool->lock);
3549
+
3550
+ if (pool->flags & POOL_MANAGER_ACTIVE) {
3551
+ raw_spin_unlock_irq(&pool->lock);
3552
+ return false;
3553
+ }
3554
+ return true;
3555
+}
3556
+
33743557 /**
33753558 * put_unbound_pool - put a worker_pool
33763559 * @pool: worker_pool to put
33773560 *
3378
- * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
3561
+ * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
33793562 * safe manner. get_unbound_pool() calls this function on its failure path
33803563 * and this function should be able to release pools which went through,
33813564 * successfully or not, init_worker_pool().
....@@ -3406,16 +3589,17 @@
34063589 * Become the manager and destroy all workers. This prevents
34073590 * @pool's workers from blocking on attach_mutex. We're the last
34083591 * manager and @pool gets freed with the flag set.
3592
+ * Because of how wq_manager_inactive() works, we will hold the
3593
+ * spinlock after a successful wait.
34093594 */
3410
- spin_lock_irq(&pool->lock);
3411
- wait_event_lock_irq(wq_manager_wait,
3412
- !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3595
+ rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3596
+ TASK_UNINTERRUPTIBLE);
34133597 pool->flags |= POOL_MANAGER_ACTIVE;
34143598
34153599 while ((worker = first_idle_worker(pool)))
34163600 destroy_worker(worker);
34173601 WARN_ON(pool->nr_workers || pool->nr_idle);
3418
- spin_unlock_irq(&pool->lock);
3602
+ raw_spin_unlock_irq(&pool->lock);
34193603
34203604 mutex_lock(&wq_pool_attach_mutex);
34213605 if (!list_empty(&pool->workers))
....@@ -3429,8 +3613,8 @@
34293613 del_timer_sync(&pool->idle_timer);
34303614 del_timer_sync(&pool->mayday_timer);
34313615
3432
- /* sched-RCU protected to allow dereferences from get_work_pool() */
3433
- call_rcu_sched(&pool->rcu, rcu_free_pool);
3616
+ /* RCU protected to allow dereferences from get_work_pool() */
3617
+ call_rcu(&pool->rcu, rcu_free_pool);
34343618 }
34353619
34363620 /**
....@@ -3543,14 +3727,16 @@
35433727 put_unbound_pool(pool);
35443728 mutex_unlock(&wq_pool_mutex);
35453729
3546
- call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3730
+ call_rcu(&pwq->rcu, rcu_free_pwq);
35473731
35483732 /*
35493733 * If we're the last pwq going away, @wq is already dead and no one
35503734 * is gonna access it anymore. Schedule RCU free.
35513735 */
3552
- if (is_last)
3553
- call_rcu_sched(&wq->rcu, rcu_free_wq);
3736
+ if (is_last) {
3737
+ wq_unregister_lockdep(wq);
3738
+ call_rcu(&wq->rcu, rcu_free_wq);
3739
+ }
35543740 }
35553741
35563742 /**
....@@ -3575,7 +3761,7 @@
35753761 return;
35763762
35773763 /* this function can be called during early boot w/ irq disabled */
3578
- spin_lock_irqsave(&pwq->pool->lock, flags);
3764
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
35793765
35803766 /*
35813767 * During [un]freezing, the caller is responsible for ensuring that
....@@ -3605,7 +3791,7 @@
36053791 pwq->max_active = 0;
36063792 }
36073793
3608
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
3794
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
36093795 }
36103796
36113797 /* initialize newly alloced @pwq which is associated with @wq and @pool */
....@@ -3778,8 +3964,8 @@
37783964
37793965 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
37803966
3781
- new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3782
- tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3967
+ new_attrs = alloc_workqueue_attrs();
3968
+ tmp_attrs = alloc_workqueue_attrs();
37833969 if (!ctx || !new_attrs || !tmp_attrs)
37843970 goto out_free;
37853971
....@@ -3913,6 +4099,8 @@
39134099 *
39144100 * Performs GFP_KERNEL allocations.
39154101 *
4102
+ * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
4103
+ *
39164104 * Return: 0 on success and -errno on failure.
39174105 */
39184106 int apply_workqueue_attrs(struct workqueue_struct *wq,
....@@ -3920,13 +4108,14 @@
39204108 {
39214109 int ret;
39224110
3923
- apply_wqattrs_lock();
4111
+ lockdep_assert_cpus_held();
4112
+
4113
+ mutex_lock(&wq_pool_mutex);
39244114 ret = apply_workqueue_attrs_locked(wq, attrs);
3925
- apply_wqattrs_unlock();
4115
+ mutex_unlock(&wq_pool_mutex);
39264116
39274117 return ret;
39284118 }
3929
-EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
39304119
39314120 /**
39324121 * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
....@@ -4004,9 +4193,9 @@
40044193
40054194 use_dfl_pwq:
40064195 mutex_lock(&wq->mutex);
4007
- spin_lock_irq(&wq->dfl_pwq->pool->lock);
4196
+ raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
40084197 get_pwq(wq->dfl_pwq);
4009
- spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4198
+ raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
40104199 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
40114200 out_unlock:
40124201 mutex_unlock(&wq->mutex);
....@@ -4036,16 +4225,21 @@
40364225 mutex_unlock(&wq->mutex);
40374226 }
40384227 return 0;
4039
- } else if (wq->flags & __WQ_ORDERED) {
4228
+ }
4229
+
4230
+ get_online_cpus();
4231
+ if (wq->flags & __WQ_ORDERED) {
40404232 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
40414233 /* there should only be single pwq for ordering guarantee */
40424234 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
40434235 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
40444236 "ordering guarantee broken for workqueue %s\n", wq->name);
4045
- return ret;
40464237 } else {
4047
- return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4238
+ ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
40484239 }
4240
+ put_online_cpus();
4241
+
4242
+ return ret;
40494243 }
40504244
40514245 static int wq_clamp_max_active(int max_active, unsigned int flags,
....@@ -4078,8 +4272,8 @@
40784272
40794273 rescuer->rescue_wq = wq;
40804274 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4081
- ret = PTR_ERR_OR_ZERO(rescuer->task);
4082
- if (ret) {
4275
+ if (IS_ERR(rescuer->task)) {
4276
+ ret = PTR_ERR(rescuer->task);
40834277 kfree(rescuer);
40844278 return ret;
40854279 }
....@@ -4091,11 +4285,10 @@
40914285 return 0;
40924286 }
40934287
4094
-struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4095
- unsigned int flags,
4096
- int max_active,
4097
- struct lock_class_key *key,
4098
- const char *lock_name, ...)
4288
+__printf(1, 4)
4289
+struct workqueue_struct *alloc_workqueue(const char *fmt,
4290
+ unsigned int flags,
4291
+ int max_active, ...)
40994292 {
41004293 size_t tbl_size = 0;
41014294 va_list args;
....@@ -4125,12 +4318,12 @@
41254318 return NULL;
41264319
41274320 if (flags & WQ_UNBOUND) {
4128
- wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
4321
+ wq->unbound_attrs = alloc_workqueue_attrs();
41294322 if (!wq->unbound_attrs)
41304323 goto err_free_wq;
41314324 }
41324325
4133
- va_start(args, lock_name);
4326
+ va_start(args, max_active);
41344327 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
41354328 va_end(args);
41364329
....@@ -4147,11 +4340,11 @@
41474340 INIT_LIST_HEAD(&wq->flusher_overflow);
41484341 INIT_LIST_HEAD(&wq->maydays);
41494342
4150
- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4343
+ wq_init_lockdep(wq);
41514344 INIT_LIST_HEAD(&wq->list);
41524345
41534346 if (alloc_and_link_pwqs(wq) < 0)
4154
- goto err_free_wq;
4347
+ goto err_unreg_lockdep;
41554348
41564349 if (wq_online && init_rescuer(wq) < 0)
41574350 goto err_destroy;
....@@ -4177,6 +4370,9 @@
41774370
41784371 return wq;
41794372
4373
+err_unreg_lockdep:
4374
+ wq_unregister_lockdep(wq);
4375
+ wq_free_lockdep(wq);
41804376 err_free_wq:
41814377 free_workqueue_attrs(wq->unbound_attrs);
41824378 kfree(wq);
....@@ -4185,7 +4381,23 @@
41854381 destroy_workqueue(wq);
41864382 return NULL;
41874383 }
4188
-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4384
+EXPORT_SYMBOL_GPL(alloc_workqueue);
4385
+
4386
+static bool pwq_busy(struct pool_workqueue *pwq)
4387
+{
4388
+ int i;
4389
+
4390
+ for (i = 0; i < WORK_NR_COLORS; i++)
4391
+ if (pwq->nr_in_flight[i])
4392
+ return true;
4393
+
4394
+ if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4395
+ return true;
4396
+ if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4397
+ return true;
4398
+
4399
+ return false;
4400
+}
41894401
41904402 /**
41914403 * destroy_workqueue - safely terminate a workqueue
....@@ -4212,35 +4424,34 @@
42124424 struct worker *rescuer = wq->rescuer;
42134425
42144426 /* this prevents new queueing */
4215
- spin_lock_irq(&wq_mayday_lock);
4427
+ raw_spin_lock_irq(&wq_mayday_lock);
42164428 wq->rescuer = NULL;
4217
- spin_unlock_irq(&wq_mayday_lock);
4429
+ raw_spin_unlock_irq(&wq_mayday_lock);
42184430
42194431 /* rescuer will empty maydays list before exiting */
42204432 kthread_stop(rescuer->task);
42214433 kfree(rescuer);
42224434 }
42234435
4224
- /* sanity checks */
4436
+ /*
4437
+ * Sanity checks - grab all the locks so that we wait for all
4438
+ * in-flight operations which may do put_pwq().
4439
+ */
4440
+ mutex_lock(&wq_pool_mutex);
42254441 mutex_lock(&wq->mutex);
42264442 for_each_pwq(pwq, wq) {
4227
- int i;
4228
-
4229
- for (i = 0; i < WORK_NR_COLORS; i++) {
4230
- if (WARN_ON(pwq->nr_in_flight[i])) {
4231
- mutex_unlock(&wq->mutex);
4232
- show_workqueue_state();
4233
- return;
4234
- }
4235
- }
4236
-
4237
- if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4238
- WARN_ON(pwq->nr_active) ||
4239
- WARN_ON(!list_empty(&pwq->delayed_works))) {
4443
+ raw_spin_lock_irq(&pwq->pool->lock);
4444
+ if (WARN_ON(pwq_busy(pwq))) {
4445
+ pr_warn("%s: %s has the following busy pwq\n",
4446
+ __func__, wq->name);
4447
+ show_pwq(pwq);
4448
+ raw_spin_unlock_irq(&pwq->pool->lock);
42404449 mutex_unlock(&wq->mutex);
4450
+ mutex_unlock(&wq_pool_mutex);
42414451 show_workqueue_state();
42424452 return;
42434453 }
4454
+ raw_spin_unlock_irq(&pwq->pool->lock);
42444455 }
42454456 mutex_unlock(&wq->mutex);
42464457
....@@ -4248,16 +4459,16 @@
42484459 * wq list is used to freeze wq, remove from list after
42494460 * flushing is complete in case freeze races us.
42504461 */
4251
- mutex_lock(&wq_pool_mutex);
42524462 list_del_rcu(&wq->list);
42534463 mutex_unlock(&wq_pool_mutex);
42544464
42554465 if (!(wq->flags & WQ_UNBOUND)) {
4466
+ wq_unregister_lockdep(wq);
42564467 /*
42574468 * The base ref is never dropped on per-cpu pwqs. Directly
42584469 * schedule RCU free.
42594470 */
4260
- call_rcu_sched(&wq->rcu, rcu_free_wq);
4471
+ call_rcu(&wq->rcu, rcu_free_wq);
42614472 } else {
42624473 /*
42634474 * We're the sole accessor of @wq at this point. Directly
....@@ -4367,7 +4578,8 @@
43674578 struct pool_workqueue *pwq;
43684579 bool ret;
43694580
4370
- rcu_read_lock_sched();
4581
+ rcu_read_lock();
4582
+ preempt_disable();
43714583
43724584 if (cpu == WORK_CPU_UNBOUND)
43734585 cpu = smp_processor_id();
....@@ -4378,7 +4590,8 @@
43784590 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
43794591
43804592 ret = !list_empty(&pwq->delayed_works);
4381
- rcu_read_unlock_sched();
4593
+ preempt_enable();
4594
+ rcu_read_unlock();
43824595
43834596 return ret;
43844597 }
....@@ -4404,15 +4617,15 @@
44044617 if (work_pending(work))
44054618 ret |= WORK_BUSY_PENDING;
44064619
4407
- local_irq_save(flags);
4620
+ rcu_read_lock();
44084621 pool = get_work_pool(work);
44094622 if (pool) {
4410
- spin_lock(&pool->lock);
4623
+ raw_spin_lock_irqsave(&pool->lock, flags);
44114624 if (find_worker_executing_work(pool, work))
44124625 ret |= WORK_BUSY_RUNNING;
4413
- spin_unlock(&pool->lock);
4626
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
44144627 }
4415
- local_irq_restore(flags);
4628
+ rcu_read_unlock();
44164629
44174630 return ret;
44184631 }
....@@ -4476,14 +4689,14 @@
44764689 * Carefully copy the associated workqueue's workfn, name and desc.
44774690 * Keep the original last '\0' in case the original is garbage.
44784691 */
4479
- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4480
- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4481
- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4482
- probe_kernel_read(name, wq->name, sizeof(name) - 1);
4483
- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4692
+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4693
+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4694
+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4695
+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4696
+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
44844697
44854698 if (fn || name[0] || desc[0]) {
4486
- printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4699
+ printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
44874700 if (strcmp(name, desc))
44884701 pr_cont(" (%s)", desc);
44894702 pr_cont("\n");
....@@ -4508,7 +4721,7 @@
45084721 pr_cont("%s BAR(%d)", comma ? "," : "",
45094722 task_pid_nr(barr->task));
45104723 } else {
4511
- pr_cont("%s %pf", comma ? "," : "", work->func);
4724
+ pr_cont("%s %ps", comma ? "," : "", work->func);
45124725 }
45134726 }
45144727
....@@ -4541,9 +4754,9 @@
45414754 if (worker->current_pwq != pwq)
45424755 continue;
45434756
4544
- pr_cont("%s %d%s:%pf", comma ? "," : "",
4757
+ pr_cont("%s %d%s:%ps", comma ? "," : "",
45454758 task_pid_nr(worker->task),
4546
- worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4759
+ worker->rescue_wq ? "(RESCUER)" : "",
45474760 worker->current_func);
45484761 list_for_each_entry(work, &worker->scheduled, entry)
45494762 pr_cont_work(false, work);
....@@ -4597,7 +4810,7 @@
45974810 unsigned long flags;
45984811 int pi;
45994812
4600
- rcu_read_lock_sched();
4813
+ rcu_read_lock();
46014814
46024815 pr_info("Showing busy workqueues and worker pools:\n");
46034816
....@@ -4617,10 +4830,10 @@
46174830 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
46184831
46194832 for_each_pwq(pwq, wq) {
4620
- spin_lock_irqsave(&pwq->pool->lock, flags);
4833
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
46214834 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
46224835 show_pwq(pwq);
4623
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
4836
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
46244837 /*
46254838 * We could be printing a lot from atomic context, e.g.
46264839 * sysrq-t -> show_workqueue_state(). Avoid triggering
....@@ -4634,7 +4847,7 @@
46344847 struct worker *worker;
46354848 bool first = true;
46364849
4637
- spin_lock_irqsave(&pool->lock, flags);
4850
+ raw_spin_lock_irqsave(&pool->lock, flags);
46384851 if (pool->nr_workers == pool->nr_idle)
46394852 goto next_pool;
46404853
....@@ -4653,7 +4866,7 @@
46534866 }
46544867 pr_cont("\n");
46554868 next_pool:
4656
- spin_unlock_irqrestore(&pool->lock, flags);
4869
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
46574870 /*
46584871 * We could be printing a lot from atomic context, e.g.
46594872 * sysrq-t -> show_workqueue_state(). Avoid triggering
....@@ -4662,7 +4875,7 @@
46624875 touch_nmi_watchdog();
46634876 }
46644877
4665
- rcu_read_unlock_sched();
4878
+ rcu_read_unlock();
46664879 }
46674880
46684881 /* used to show worker information through /proc/PID/{comm,stat,status} */
....@@ -4683,7 +4896,7 @@
46834896 struct worker_pool *pool = worker->pool;
46844897
46854898 if (pool) {
4686
- spin_lock_irq(&pool->lock);
4899
+ raw_spin_lock_irq(&pool->lock);
46874900 /*
46884901 * ->desc tracks information (wq name or
46894902 * set_worker_desc()) for the latest execution. If
....@@ -4697,12 +4910,13 @@
46974910 scnprintf(buf + off, size - off, "-%s",
46984911 worker->desc);
46994912 }
4700
- spin_unlock_irq(&pool->lock);
4913
+ raw_spin_unlock_irq(&pool->lock);
47014914 }
47024915 }
47034916
47044917 mutex_unlock(&wq_pool_attach_mutex);
47054918 }
4919
+EXPORT_SYMBOL_GPL(wq_worker_comm);
47064920
47074921 #ifdef CONFIG_SMP
47084922
....@@ -4728,7 +4942,7 @@
47284942
47294943 for_each_cpu_worker_pool(pool, cpu) {
47304944 mutex_lock(&wq_pool_attach_mutex);
4731
- spin_lock_irq(&pool->lock);
4945
+ raw_spin_lock_irq(&pool->lock);
47324946
47334947 /*
47344948 * We've blocked all attach/detach operations. Make all workers
....@@ -4742,7 +4956,7 @@
47424956
47434957 pool->flags |= POOL_DISASSOCIATED;
47444958
4745
- spin_unlock_irq(&pool->lock);
4959
+ raw_spin_unlock_irq(&pool->lock);
47464960 mutex_unlock(&wq_pool_attach_mutex);
47474961
47484962 /*
....@@ -4768,9 +4982,9 @@
47684982 * worker blocking could lead to lengthy stalls. Kick off
47694983 * unbound chain execution of currently pending work items.
47704984 */
4771
- spin_lock_irq(&pool->lock);
4985
+ raw_spin_lock_irq(&pool->lock);
47724986 wake_up_worker(pool);
4773
- spin_unlock_irq(&pool->lock);
4987
+ raw_spin_unlock_irq(&pool->lock);
47744988 }
47754989 }
47764990
....@@ -4797,7 +5011,7 @@
47975011 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
47985012 pool->attrs->cpumask) < 0);
47995013
4800
- spin_lock_irq(&pool->lock);
5014
+ raw_spin_lock_irq(&pool->lock);
48015015
48025016 pool->flags &= ~POOL_DISASSOCIATED;
48035017
....@@ -4826,7 +5040,7 @@
48265040 *
48275041 * WRITE_ONCE() is necessary because @worker->flags may be
48285042 * tested without holding any lock in
4829
- * wq_worker_waking_up(). Without it, NOT_RUNNING test may
5043
+ * wq_worker_running(). Without it, NOT_RUNNING test may
48305044 * fail incorrectly leading to premature concurrency
48315045 * management operations.
48325046 */
....@@ -4836,7 +5050,7 @@
48365050 WRITE_ONCE(worker->flags, worker_flags);
48375051 }
48385052
4839
- spin_unlock_irq(&pool->lock);
5053
+ raw_spin_unlock_irq(&pool->lock);
48405054 }
48415055
48425056 /**
....@@ -5049,16 +5263,16 @@
50495263 * nr_active is monotonically decreasing. It's safe
50505264 * to peek without lock.
50515265 */
5052
- rcu_read_lock_sched();
5266
+ rcu_read_lock();
50535267 for_each_pwq(pwq, wq) {
50545268 WARN_ON_ONCE(pwq->nr_active < 0);
50555269 if (pwq->nr_active) {
50565270 busy = true;
5057
- rcu_read_unlock_sched();
5271
+ rcu_read_unlock();
50585272 goto out_unlock;
50595273 }
50605274 }
5061
- rcu_read_unlock_sched();
5275
+ rcu_read_unlock();
50625276 }
50635277 out_unlock:
50645278 mutex_unlock(&wq_pool_mutex);
....@@ -5260,7 +5474,8 @@
52605474 const char *delim = "";
52615475 int node, written = 0;
52625476
5263
- rcu_read_lock_sched();
5477
+ get_online_cpus();
5478
+ rcu_read_lock();
52645479 for_each_node(node) {
52655480 written += scnprintf(buf + written, PAGE_SIZE - written,
52665481 "%s%d:%d", delim, node,
....@@ -5268,7 +5483,8 @@
52685483 delim = " ";
52695484 }
52705485 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5271
- rcu_read_unlock_sched();
5486
+ rcu_read_unlock();
5487
+ put_online_cpus();
52725488
52735489 return written;
52745490 }
....@@ -5293,7 +5509,7 @@
52935509
52945510 lockdep_assert_held(&wq_pool_mutex);
52955511
5296
- attrs = alloc_workqueue_attrs(GFP_KERNEL);
5512
+ attrs = alloc_workqueue_attrs();
52975513 if (!attrs)
52985514 return NULL;
52995515
....@@ -5639,6 +5855,7 @@
56395855 pr_cont_pool_info(pool);
56405856 pr_cont(" stuck for %us!\n",
56415857 jiffies_to_msecs(now - pool_ts) / 1000);
5858
+ trace_android_vh_wq_lockup_pool(pool->cpu, pool_ts);
56425859 }
56435860 }
56445861
....@@ -5722,7 +5939,14 @@
57225939 return;
57235940 }
57245941
5725
- wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
5942
+ for_each_possible_cpu(cpu) {
5943
+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
5944
+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5945
+ return;
5946
+ }
5947
+ }
5948
+
5949
+ wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
57265950 BUG_ON(!wq_update_unbound_numa_attrs_buf);
57275951
57285952 /*
....@@ -5739,11 +5963,6 @@
57395963
57405964 for_each_possible_cpu(cpu) {
57415965 node = cpu_to_node(cpu);
5742
- if (WARN_ON(node == NUMA_NO_NODE)) {
5743
- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5744
- /* happens iff arch is bonkers, let's just proceed */
5745
- return;
5746
- }
57475966 cpumask_set_cpu(cpu, tbl[node]);
57485967 }
57495968
....@@ -5761,13 +5980,13 @@
57615980 * items. Actual work item execution starts only after kthreads can be
57625981 * created and scheduled right before early initcalls.
57635982 */
5764
-int __init workqueue_init_early(void)
5983
+void __init workqueue_init_early(void)
57655984 {
57665985 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
57675986 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
57685987 int i, cpu;
57695988
5770
- WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5989
+ BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
57715990
57725991 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
57735992 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
....@@ -5797,7 +6016,7 @@
57976016 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
57986017 struct workqueue_attrs *attrs;
57996018
5800
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
6019
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
58016020 attrs->nice = std_nice[i];
58026021 unbound_std_wq_attrs[i] = attrs;
58036022
....@@ -5806,7 +6025,7 @@
58066025 * guaranteed by max_active which is enforced by pwqs.
58076026 * Turn off NUMA so that dfl_pwq is used for all nodes.
58086027 */
5809
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
6028
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
58106029 attrs->nice = std_nice[i];
58116030 attrs->no_numa = true;
58126031 ordered_wq_attrs[i] = attrs;
....@@ -5828,8 +6047,6 @@
58286047 !system_unbound_wq || !system_freezable_wq ||
58296048 !system_power_efficient_wq ||
58306049 !system_freezable_power_efficient_wq);
5831
-
5832
- return 0;
58336050 }
58346051
58356052 /**
....@@ -5841,7 +6058,7 @@
58416058 * are no kworkers executing the work items yet. Populate the worker pools
58426059 * with the initial workers and enable future kworker creations.
58436060 */
5844
-int __init workqueue_init(void)
6061
+void __init workqueue_init(void)
58456062 {
58466063 struct workqueue_struct *wq;
58476064 struct worker_pool *pool;
....@@ -5888,6 +6105,4 @@
58886105
58896106 wq_online = true;
58906107 wq_watchdog_init();
5891
-
5892
- return 0;
58936108 }