~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* kernel/workqueue.c - generic async execution with shared worker pool
3	4	*
..	..	@@ -50,8 +51,13 @@
50	51	#include <linux/sched/isolation.h>
51	52	#include <linux/nmi.h>
52	53	#include <linux/kvm_para.h>
	54	+#include <uapi/linux/sched/types.h>
53	55
54	56	#include "workqueue_internal.h"
	57	+
	58	+#include <trace/hooks/wqlockup.h>
	59	+/* events/workqueue.h uses default TRACE_INCLUDE_PATH */
	60	+#undef TRACE_INCLUDE_PATH
55	61
56	62	enum {
57	63	/*
..	..	@@ -133,7 +139,7 @@
133	139	* PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
134	140	*
135	141	* PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
136		- * sched-RCU for reads.
	142	+ * RCU for reads.
137	143	*
138	144	* WQ: wq->mutex protected.
139	145	*
..	..	@@ -248,7 +254,7 @@
248	254	struct list_head flusher_overflow; /* WQ: flush overflow list */
249	255
250	256	struct list_head maydays; /* MD: pwqs requesting rescue */
251		- struct worker rescuer; / I: rescue worker */
	257	+ struct worker rescuer; / MD: rescue worker */
252	258
253	259	int nr_drainers; /* WQ: drain in progress */
254	260	int saved_max_active; /* WQ: saved pwq max_active */
..	..	@@ -260,13 +266,15 @@
260	266	struct wq_device wq_dev; / I: for sysfs interface */
261	267	#endif
262	268	#ifdef CONFIG_LOCKDEP
	269	+ char *lock_name;
	270	+ struct lock_class_key key;
263	271	struct lockdep_map lockdep_map;
264	272	#endif
265	273	char name[WQ_NAME_LEN]; /* I: workqueue name */
266	274
267	275	/*
268		- * Destruction of workqueue_struct is sched-RCU protected to allow
269		- * walking the workqueues list without grabbing wq_pool_mutex.
	276	+ * Destruction of workqueue_struct is RCU protected to allow walking
	277	+ * the workqueues list without grabbing wq_pool_mutex.
270	278	* This is used to dump all workqueues from sysrq.
271	279	*/
272	280	struct rcu_head rcu;
..	..	@@ -299,7 +307,8 @@
299	307	static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
300	308	static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
301	309	static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302		-static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
	310	+/* wait for manager to go away */
	311	+static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
303	312
304	313	static LIST_HEAD(workqueues); /* PR: list of all workqueues */
305	314	static bool workqueue_freezing; /* PL: have wqs started freezing? */
..	..	@@ -353,19 +362,18 @@
353	362
354	363	static int worker_thread(void *__worker);
355	364	static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
	365	+static void show_pwq(struct pool_workqueue *pwq);
356	366
357	367	#define CREATE_TRACE_POINTS
358	368	#include <trace/events/workqueue.h>
	369	+
	370	+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_start);
	371	+EXPORT_TRACEPOINT_SYMBOL_GPL(workqueue_execute_end);
359	372
360	373	#define assert_rcu_or_pool_mutex() \
361	374	RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
362	375	!lockdep_is_held(&wq_pool_mutex), \
363	376	"RCU or wq_pool_mutex should be held")
364		-
365		-#define assert_rcu_or_wq_mutex(wq) \
366		- RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
367		- !lockdep_is_held(&wq->mutex), \
368		- "RCU or wq->mutex should be held")
369	377
370	378	#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
371	379	RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
..	..	@@ -423,13 +431,12 @@
423	431	* ignored.
424	432	*/
425	433	#define for_each_pwq(pwq, wq) \
426		- list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
427		- if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
428		- else
	434	+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
	435	+ lockdep_is_held(&(wq->mutex)))
429	436
430	437	#ifdef CONFIG_DEBUG_OBJECTS_WORK
431	438
432		-static struct debug_obj_descr work_debug_descr;
	439	+static const struct debug_obj_descr work_debug_descr;
433	440
434	441	static void work_debug_hint(void addr)
435	442	{
..	..	@@ -479,7 +486,7 @@
479	486	}
480	487	}
481	488
482		-static struct debug_obj_descr work_debug_descr = {
	489	+static const struct debug_obj_descr work_debug_descr = {
483	490	.name = "work_struct",
484	491	.debug_hint = work_debug_hint,
485	492	.is_static_object = work_is_static_object,
..	..	@@ -647,7 +654,7 @@
647	654	* The following mb guarantees that previous clear of a PENDING bit
648	655	* will not be reordered with any speculative LOADS or STORES from
649	656	* work->current_func, which is executed afterwards. This possible
650		- * reordering can lead to a missed execution on attempt to qeueue
	657	+ * reordering can lead to a missed execution on attempt to queue
651	658	* the same @work. E.g. consider this case:
652	659	*
653	660	* CPU#0 CPU#1
..	..	@@ -851,8 +858,17 @@
851	858
852	859	if (!worker->sleeping)
853	860	return;
	861	+
	862	+ /*
	863	+ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
	864	+ * and the nr_running increment below, we may ruin the nr_running reset
	865	+ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
	866	+ * pool. Protect against such race.
	867	+ */
	868	+ preempt_disable();
854	869	if (!(worker->flags & WORKER_NOT_RUNNING))
855	870	atomic_inc(&worker->pool->nr_running);
	871	+ preempt_enable();
856	872	worker->sleeping = 0;
857	873	}
858	874
..	..	@@ -861,7 +877,8 @@
861	877	* @task: task going to sleep
862	878	*
863	879	* This function is called from schedule() when a busy worker is
864		- * going to sleep.
	880	+ * going to sleep. Preemption needs to be disabled to protect ->sleeping
	881	+ * assignment.
865	882	*/
866	883	void wq_worker_sleeping(struct task_struct *task)
867	884	{
..	..	@@ -878,7 +895,8 @@
878	895
879	896	pool = worker->pool;
880	897
881		- if (WARN_ON_ONCE(worker->sleeping))
	898	+ /* Return if preempted before wq_worker_running() was reached */
	899	+ if (worker->sleeping)
882	900	return;
883	901
884	902	worker->sleeping = 1;
..	..	@@ -906,12 +924,23 @@
906	924
907	925	/**
908	926	* wq_worker_last_func - retrieve worker's last work function
	927	+ * @task: Task to retrieve last work function of.
909	928	*
910	929	* Determine the last function a worker executed. This is called from
911	930	* the scheduler to get a worker's last known identity.
912	931	*
913	932	* CONTEXT:
914		- * spin_lock_irq(rq->lock)
	933	+ * raw_spin_lock_irq(rq->lock)
	934	+ *
	935	+ * This function is called during schedule() when a kworker is going
	936	+ * to sleep. It's used by psi to identify aggregation workers during
	937	+ * dequeuing, to allow periodic aggregation to shut-off when that
	938	+ * worker is the last task in the system or cgroup to go to sleep.
	939	+ *
	940	+ * As this function doesn't involve any workqueue-related locking, it
	941	+ * only returns stable values when called from inside the scheduler's
	942	+ * queuing and dequeuing paths, when @task, which must be a kworker,
	943	+ * is guaranteed to not be processing any works.
915	944	*
916	945	* Return:
917	946	* The last work function %current executed as a worker, NULL if it
..	..	@@ -1201,11 +1230,14 @@
1201	1230	* stable state - idle, on timer or on worklist.
1202	1231	*
1203	1232	* Return:
	1233	+ *
	1234	+ * ======== ================================================================
1204	1235	* 1 if @work was pending and we successfully stole PENDING
1205	1236	* 0 if @work was idle and we claimed PENDING
1206	1237	* -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
1207	1238	* -ENOENT if someone else is canceling @work, this state may persist
1208	1239	* for arbitrarily long
	1240	+ * ======== ================================================================
1209	1241	*
1210	1242	* Note:
1211	1243	* On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
..	..	@@ -1313,6 +1345,9 @@
1313	1345	{
1314	1346	struct worker_pool *pool = pwq->pool;
1315	1347
	1348	+ /* record the work call stack in order to print it in KASAN reports */
	1349	+ kasan_record_aux_stack(work);
	1350	+
1316	1351	/* we own @work, set data and link */
1317	1352	set_work_pwq(work, pwq, extra_flags);
1318	1353	list_add_tail(&work->entry, head);
..	..	@@ -1339,7 +1374,7 @@
1339	1374
1340	1375	worker = current_wq_worker();
1341	1376	/*
1342		- * Return %true iff I'm a worker execuing a work item on @wq. If
	1377	+ * Return %true iff I'm a worker executing a work item on @wq. If
1343	1378	* I'm @worker, it's safe to dereference it without locking.
1344	1379	*/
1345	1380	return worker && worker->current_pwq->wq == wq;
..	..	@@ -1513,14 +1548,96 @@
1513	1548	}
1514	1549	EXPORT_SYMBOL(queue_work_on);
1515	1550
	1551	+/**
	1552	+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
	1553	+ * @node: NUMA node ID that we want to select a CPU from
	1554	+ *
	1555	+ * This function will attempt to find a "random" cpu available on a given
	1556	+ * node. If there are no CPUs available on the given node it will return
	1557	+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
	1558	+ * available CPU if we need to schedule this work.
	1559	+ */
	1560	+static int workqueue_select_cpu_near(int node)
	1561	+{
	1562	+ int cpu;
	1563	+
	1564	+ /* No point in doing this if NUMA isn't enabled for workqueues */
	1565	+ if (!wq_numa_enabled)
	1566	+ return WORK_CPU_UNBOUND;
	1567	+
	1568	+ /* Delay binding to CPU if node is not valid or online */
	1569	+ if (node < 0 \|\| node >= MAX_NUMNODES \|\| !node_online(node))
	1570	+ return WORK_CPU_UNBOUND;
	1571	+
	1572	+ /* Use local node/cpu if we are already there */
	1573	+ cpu = raw_smp_processor_id();
	1574	+ if (node == cpu_to_node(cpu))
	1575	+ return cpu;
	1576	+
	1577	+ /* Use "random" otherwise know as "first" online CPU of node */
	1578	+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
	1579	+
	1580	+ /* If CPU is valid return that, otherwise just defer */
	1581	+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
	1582	+}
	1583	+
	1584	+/**
	1585	+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
	1586	+ * @node: NUMA node that we are targeting the work for
	1587	+ * @wq: workqueue to use
	1588	+ * @work: work to queue
	1589	+ *
	1590	+ * We queue the work to a "random" CPU within a given NUMA node. The basic
	1591	+ * idea here is to provide a way to somehow associate work with a given
	1592	+ * NUMA node.
	1593	+ *
	1594	+ * This function will only make a best effort attempt at getting this onto
	1595	+ * the right NUMA node. If no node is requested or the requested node is
	1596	+ * offline then we just fall back to standard queue_work behavior.
	1597	+ *
	1598	+ * Currently the "random" CPU ends up being the first available CPU in the
	1599	+ * intersection of cpu_online_mask and the cpumask of the node, unless we
	1600	+ * are running on the node. In that case we just use the current CPU.
	1601	+ *
	1602	+ * Return: %false if @work was already on a queue, %true otherwise.
	1603	+ */
	1604	+bool queue_work_node(int node, struct workqueue_struct *wq,
	1605	+ struct work_struct *work)
	1606	+{
	1607	+ unsigned long flags;
	1608	+ bool ret = false;
	1609	+
	1610	+ /*
	1611	+ * This current implementation is specific to unbound workqueues.
	1612	+ * Specifically we only return the first available CPU for a given
	1613	+ * node instead of cycling through individual CPUs within the node.
	1614	+ *
	1615	+ * If this is used with a per-cpu workqueue then the logic in
	1616	+ * workqueue_select_cpu_near would need to be updated to allow for
	1617	+ * some round robin type logic.
	1618	+ */
	1619	+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
	1620	+
	1621	+ local_irq_save(flags);
	1622	+
	1623	+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
	1624	+ int cpu = workqueue_select_cpu_near(node);
	1625	+
	1626	+ __queue_work(cpu, wq, work);
	1627	+ ret = true;
	1628	+ }
	1629	+
	1630	+ local_irq_restore(flags);
	1631	+ return ret;
	1632	+}
	1633	+EXPORT_SYMBOL_GPL(queue_work_node);
	1634	+
1516	1635	void delayed_work_timer_fn(struct timer_list *t)
1517	1636	{
1518	1637	struct delayed_work *dwork = from_timer(dwork, t, timer);
1519		- unsigned long flags;
1520	1638
1521		- local_irq_save(flags);
	1639	+ /* should have been called from irqsafe timer with irq already off */
1522	1640	__queue_work(dwork->cpu, dwork->wq, &dwork->work);
1523		- local_irq_restore(flags);
1524	1641	}
1525	1642	EXPORT_SYMBOL(delayed_work_timer_fn);
1526	1643
..	..	@@ -1531,9 +1648,14 @@
1531	1648	struct work_struct *work = &dwork->work;
1532	1649
1533	1650	WARN_ON_ONCE(!wq);
1534		-#ifndef CONFIG_CFI_CLANG
1535		- WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1536		-#endif
	1651	+ /*
	1652	+ * With CFI, timer->function can point to a jump table entry in a module,
	1653	+ * which fails the comparison. Disable the warning if CFI and modules are
	1654	+ * both enabled.
	1655	+ */
	1656	+ if (!IS_ENABLED(CONFIG_CFI_CLANG) \|\| !IS_ENABLED(CONFIG_MODULES))
	1657	+ WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
	1658	+
1537	1659	WARN_ON_ONCE(timer_pending(timer));
1538	1660	WARN_ON_ONCE(!list_empty(&work->entry));
1539	1661
..	..	@@ -1644,7 +1766,7 @@
1644	1766	*
1645	1767	* Return: %false if @rwork was already pending, %true otherwise. Note
1646	1768	* that a full RCU grace period is guaranteed only after a %true return.
1647		- * While @rwork is guarnateed to be executed after a %false return, the
	1769	+ * While @rwork is guaranteed to be executed after a %false return, the
1648	1770	* execution may happen before a full RCU grace period has passed.
1649	1771	*/
1650	1772	bool queue_rcu_work(struct workqueue_struct wq, struct rcu_work rwork)
..	..	@@ -1838,6 +1960,15 @@
1838	1960	goto fail;
1839	1961
1840	1962	set_user_nice(worker->task, pool->attrs->nice);
	1963	+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO)) {
	1964	+ struct sched_param param;
	1965	+
	1966	+ if (pool->attrs->nice == 0)
	1967	+ param.sched_priority = MAX_RT_PRIO / 2 - 4;
	1968	+ else
	1969	+ param.sched_priority = MAX_RT_PRIO / 2 - 2;
	1970	+ sched_setscheduler_nocheck(worker->task, SCHED_RR, &param);
	1971	+ }
1841	1972	kthread_bind_mask(worker->task, pool->attrs->cpumask);
1842	1973
1843	1974	/* successful, attach the worker to the pool */
..	..	@@ -2047,7 +2178,7 @@
2047	2178
2048	2179	pool->manager = NULL;
2049	2180	pool->flags &= ~POOL_MANAGER_ACTIVE;
2050		- swake_up_one(&wq_manager_wait);
	2181	+ rcuwait_wake_up(&manager_wait);
2051	2182	return true;
2052	2183	}
2053	2184
..	..	@@ -2177,13 +2308,13 @@
2177	2308	* While we must be careful to not use "work" after this, the trace
2178	2309	* point will only record its address.
2179	2310	*/
2180		- trace_workqueue_execute_end(work);
	2311	+ trace_workqueue_execute_end(work, worker->current_func);
2181	2312	lock_map_release(&lockdep_map);
2182	2313	lock_map_release(&pwq->wq->lockdep_map);
2183	2314
2184	2315	if (unlikely(in_atomic() \|\| lockdep_depth(current) > 0)) {
2185	2316	pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2186		- " last function: %pf\n",
	2317	+ " last function: %ps\n",
2187	2318	current->comm, preempt_count(), task_pid_nr(current),
2188	2319	worker->current_func);
2189	2320	debug_show_held_locks(current);
..	..	@@ -2191,7 +2322,7 @@
2191	2322	}
2192	2323
2193	2324	/*
2194		- * The following prevents a kworker from hogging CPU on !PREEMPT
	2325	+ * The following prevents a kworker from hogging CPU on !PREEMPTION
2195	2326	* kernels, where a requeueing work item waiting for something to
2196	2327	* happen could deadlock with stop_machine as such work item could
2197	2328	* indefinitely requeue itself while all other CPUs are trapped in
..	..	@@ -2436,7 +2567,7 @@
2436	2567	* being used to relieve memory pressure, don't
2437	2568	* incur MAYDAY_INTERVAL delay inbetween.
2438	2569	*/
2439		- if (need_to_create_worker(pool)) {
	2570	+ if (pwq->nr_active && need_to_create_worker(pool)) {
2440	2571	raw_spin_lock(&wq_mayday_lock);
2441	2572	/*
2442	2573	* Queue iff we aren't racing destruction
..	..	@@ -2508,11 +2639,11 @@
2508	2639	worker = current_wq_worker();
2509	2640
2510	2641	WARN_ONCE(current->flags & PF_MEMALLOC,
2511		- "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
	2642	+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
2512	2643	current->pid, current->comm, target_wq->name, target_func);
2513	2644	WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2514	2645	(WQ_MEM_RECLAIM \| __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2515		- "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
	2646	+ "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
2516	2647	worker->current_pwq->wq->name, worker->current_func,
2517	2648	target_wq->name, target_func);
2518	2649	}
..	..	@@ -2743,7 +2874,7 @@
2743	2874	* First flushers are responsible for cascading flushes and
2744	2875	* handling overflow. Non-first flushers can simply return.
2745	2876	*/
2746		- if (wq->first_flusher != &this_flusher)
	2877	+ if (READ_ONCE(wq->first_flusher) != &this_flusher)
2747	2878	return;
2748	2879
2749	2880	mutex_lock(&wq->mutex);
..	..	@@ -2752,7 +2883,7 @@
2752	2883	if (wq->first_flusher != &this_flusher)
2753	2884	goto out_unlock;
2754	2885
2755		- wq->first_flusher = NULL;
	2886	+ WRITE_ONCE(wq->first_flusher, NULL);
2756	2887
2757	2888	WARN_ON_ONCE(!list_empty(&this_flusher.list));
2758	2889	WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
..	..	@@ -2943,10 +3074,8 @@
2943	3074	if (WARN_ON(!work->func))
2944	3075	return false;
2945	3076
2946		- if (!from_cancel) {
2947		- lock_map_acquire(&work->lockdep_map);
2948		- lock_map_release(&work->lockdep_map);
2949		- }
	3077	+ lock_map_acquire(&work->lockdep_map);
	3078	+ lock_map_release(&work->lockdep_map);
2950	3079
2951	3080	if (start_flush_work(work, &barr, from_cancel)) {
2952	3081	wait_for_completion(&barr.done);
..	..	@@ -3241,7 +3370,7 @@
3241	3370	*
3242	3371	* Undo alloc_workqueue_attrs().
3243	3372	*/
3244		-static void free_workqueue_attrs(struct workqueue_attrs *attrs)
	3373	+void free_workqueue_attrs(struct workqueue_attrs *attrs)
3245	3374	{
3246	3375	if (attrs) {
3247	3376	free_cpumask_var(attrs->cpumask);
..	..	@@ -3257,7 +3386,7 @@
3257	3386	*
3258	3387	* Return: The allocated new workqueue_attr on success. %NULL on failure.
3259	3388	*/
3260		-static struct workqueue_attrs *alloc_workqueue_attrs(void)
	3389	+struct workqueue_attrs *alloc_workqueue_attrs(void)
3261	3390	{
3262	3391	struct workqueue_attrs *attrs;
3263	3392
..	..	@@ -3348,17 +3477,56 @@
3348	3477	return 0;
3349	3478	}
3350	3479
	3480	+#ifdef CONFIG_LOCKDEP
	3481	+static void wq_init_lockdep(struct workqueue_struct *wq)
	3482	+{
	3483	+ char *lock_name;
	3484	+
	3485	+ lockdep_register_key(&wq->key);
	3486	+ lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
	3487	+ if (!lock_name)
	3488	+ lock_name = wq->name;
	3489	+
	3490	+ wq->lock_name = lock_name;
	3491	+ lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
	3492	+}
	3493	+
	3494	+static void wq_unregister_lockdep(struct workqueue_struct *wq)
	3495	+{
	3496	+ lockdep_unregister_key(&wq->key);
	3497	+}
	3498	+
	3499	+static void wq_free_lockdep(struct workqueue_struct *wq)
	3500	+{
	3501	+ if (wq->lock_name != wq->name)
	3502	+ kfree(wq->lock_name);
	3503	+}
	3504	+#else
	3505	+static void wq_init_lockdep(struct workqueue_struct *wq)
	3506	+{
	3507	+}
	3508	+
	3509	+static void wq_unregister_lockdep(struct workqueue_struct *wq)
	3510	+{
	3511	+}
	3512	+
	3513	+static void wq_free_lockdep(struct workqueue_struct *wq)
	3514	+{
	3515	+}
	3516	+#endif
	3517	+
3351	3518	static void rcu_free_wq(struct rcu_head *rcu)
3352	3519	{
3353	3520	struct workqueue_struct *wq =
3354	3521	container_of(rcu, struct workqueue_struct, rcu);
	3522	+
	3523	+ wq_free_lockdep(wq);
3355	3524
3356	3525	if (!(wq->flags & WQ_UNBOUND))
3357	3526	free_percpu(wq->cpu_pwqs);
3358	3527	else
3359	3528	free_workqueue_attrs(wq->unbound_attrs);
3360	3529
3361		- kfree(wq->rescuer);
3362	3530	kfree(wq);
3363	3531	}
3364	3532
..	..	@@ -3369,6 +3537,18 @@
3369	3537	ida_destroy(&pool->worker_ida);
3370	3538	free_workqueue_attrs(pool->attrs);
3371	3539	kfree(pool);
	3540	+}
	3541	+
	3542	+/* This returns with the lock held on success (pool manager is inactive). */
	3543	+static bool wq_manager_inactive(struct worker_pool *pool)
	3544	+{
	3545	+ raw_spin_lock_irq(&pool->lock);
	3546	+
	3547	+ if (pool->flags & POOL_MANAGER_ACTIVE) {
	3548	+ raw_spin_unlock_irq(&pool->lock);
	3549	+ return false;
	3550	+ }
	3551	+ return true;
3372	3552	}
3373	3553
3374	3554	/**
..	..	@@ -3406,10 +3586,11 @@
3406	3586	* Become the manager and destroy all workers. This prevents
3407	3587	* @pool's workers from blocking on attach_mutex. We're the last
3408	3588	* manager and @pool gets freed with the flag set.
	3589	+ * Because of how wq_manager_inactive() works, we will hold the
	3590	+ * spinlock after a successful wait.
3409	3591	*/
3410		- raw_spin_lock_irq(&pool->lock);
3411		- swait_event_lock_irq(wq_manager_wait,
3412		- !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
	3592	+ rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
	3593	+ TASK_UNINTERRUPTIBLE);
3413	3594	pool->flags \|= POOL_MANAGER_ACTIVE;
3414	3595
3415	3596	while ((worker = first_idle_worker(pool)))
..	..	@@ -3549,8 +3730,10 @@
3549	3730	* If we're the last pwq going away, @wq is already dead and no one
3550	3731	* is gonna access it anymore. Schedule RCU free.
3551	3732	*/
3552		- if (is_last)
	3733	+ if (is_last) {
	3734	+ wq_unregister_lockdep(wq);
3553	3735	call_rcu(&wq->rcu, rcu_free_wq);
	3736	+ }
3554	3737	}
3555	3738
3556	3739	/**
..	..	@@ -3913,16 +4096,20 @@
3913	4096	*
3914	4097	* Performs GFP_KERNEL allocations.
3915	4098	*
	4099	+ * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
	4100	+ *
3916	4101	* Return: 0 on success and -errno on failure.
3917	4102	*/
3918		-static int apply_workqueue_attrs(struct workqueue_struct *wq,
	4103	+int apply_workqueue_attrs(struct workqueue_struct *wq,
3919	4104	const struct workqueue_attrs *attrs)
3920	4105	{
3921	4106	int ret;
3922	4107
3923		- apply_wqattrs_lock();
	4108	+ lockdep_assert_cpus_held();
	4109	+
	4110	+ mutex_lock(&wq_pool_mutex);
3924	4111	ret = apply_workqueue_attrs_locked(wq, attrs);
3925		- apply_wqattrs_unlock();
	4112	+ mutex_unlock(&wq_pool_mutex);
3926	4113
3927	4114	return ret;
3928	4115	}
..	..	@@ -4035,16 +4222,21 @@
4035	4222	mutex_unlock(&wq->mutex);
4036	4223	}
4037	4224	return 0;
4038		- } else if (wq->flags & __WQ_ORDERED) {
	4225	+ }
	4226	+
	4227	+ get_online_cpus();
	4228	+ if (wq->flags & __WQ_ORDERED) {
4039	4229	ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4040	4230	/* there should only be single pwq for ordering guarantee */
4041	4231	WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node \|\|
4042	4232	wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4043	4233	"ordering guarantee broken for workqueue %s\n", wq->name);
4044		- return ret;
4045	4234	} else {
4046		- return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
	4235	+ ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4047	4236	}
	4237	+ put_online_cpus();
	4238	+
	4239	+ return ret;
4048	4240	}
4049	4241
4050	4242	static int wq_clamp_max_active(int max_active, unsigned int flags,
..	..	@@ -4077,8 +4269,8 @@
4077	4269
4078	4270	rescuer->rescue_wq = wq;
4079	4271	rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4080		- ret = PTR_ERR_OR_ZERO(rescuer->task);
4081		- if (ret) {
	4272	+ if (IS_ERR(rescuer->task)) {
	4273	+ ret = PTR_ERR(rescuer->task);
4082	4274	kfree(rescuer);
4083	4275	return ret;
4084	4276	}
..	..	@@ -4090,11 +4282,10 @@
4090	4282	return 0;
4091	4283	}
4092	4284
4093		-struct workqueue_struct __alloc_workqueue_key(const char fmt,
4094		- unsigned int flags,
4095		- int max_active,
4096		- struct lock_class_key *key,
4097		- const char *lock_name, ...)
	4285	+__printf(1, 4)
	4286	+struct workqueue_struct alloc_workqueue(const char fmt,
	4287	+ unsigned int flags,
	4288	+ int max_active, ...)
4098	4289	{
4099	4290	size_t tbl_size = 0;
4100	4291	va_list args;
..	..	@@ -4129,7 +4320,7 @@
4129	4320	goto err_free_wq;
4130	4321	}
4131	4322
4132		- va_start(args, lock_name);
	4323	+ va_start(args, max_active);
4133	4324	vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4134	4325	va_end(args);
4135	4326
..	..	@@ -4146,11 +4337,11 @@
4146	4337	INIT_LIST_HEAD(&wq->flusher_overflow);
4147	4338	INIT_LIST_HEAD(&wq->maydays);
4148	4339
4149		- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
	4340	+ wq_init_lockdep(wq);
4150	4341	INIT_LIST_HEAD(&wq->list);
4151	4342
4152	4343	if (alloc_and_link_pwqs(wq) < 0)
4153		- goto err_free_wq;
	4344	+ goto err_unreg_lockdep;
4154	4345
4155	4346	if (wq_online && init_rescuer(wq) < 0)
4156	4347	goto err_destroy;
..	..	@@ -4176,6 +4367,9 @@
4176	4367
4177	4368	return wq;
4178	4369
	4370	+err_unreg_lockdep:
	4371	+ wq_unregister_lockdep(wq);
	4372	+ wq_free_lockdep(wq);
4179	4373	err_free_wq:
4180	4374	free_workqueue_attrs(wq->unbound_attrs);
4181	4375	kfree(wq);
..	..	@@ -4184,7 +4378,23 @@
4184	4378	destroy_workqueue(wq);
4185	4379	return NULL;
4186	4380	}
4187		-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
	4381	+EXPORT_SYMBOL_GPL(alloc_workqueue);
	4382	+
	4383	+static bool pwq_busy(struct pool_workqueue *pwq)
	4384	+{
	4385	+ int i;
	4386	+
	4387	+ for (i = 0; i < WORK_NR_COLORS; i++)
	4388	+ if (pwq->nr_in_flight[i])
	4389	+ return true;
	4390	+
	4391	+ if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
	4392	+ return true;
	4393	+ if (pwq->nr_active \|\| !list_empty(&pwq->delayed_works))
	4394	+ return true;
	4395	+
	4396	+ return false;
	4397	+}
4188	4398
4189	4399	/**
4190	4400	* destroy_workqueue - safely terminate a workqueue
..	..	@@ -4220,26 +4430,25 @@
4220	4430	kfree(rescuer);
4221	4431	}
4222	4432
4223		- /* sanity checks */
	4433	+ /*
	4434	+ * Sanity checks - grab all the locks so that we wait for all
	4435	+ * in-flight operations which may do put_pwq().
	4436	+ */
	4437	+ mutex_lock(&wq_pool_mutex);
4224	4438	mutex_lock(&wq->mutex);
4225	4439	for_each_pwq(pwq, wq) {
4226		- int i;
4227		-
4228		- for (i = 0; i < WORK_NR_COLORS; i++) {
4229		- if (WARN_ON(pwq->nr_in_flight[i])) {
4230		- mutex_unlock(&wq->mutex);
4231		- show_workqueue_state();
4232		- return;
4233		- }
4234		- }
4235		-
4236		- if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) \|\|
4237		- WARN_ON(pwq->nr_active) \|\|
4238		- WARN_ON(!list_empty(&pwq->delayed_works))) {
	4440	+ raw_spin_lock_irq(&pwq->pool->lock);
	4441	+ if (WARN_ON(pwq_busy(pwq))) {
	4442	+ pr_warn("%s: %s has the following busy pwq\n",
	4443	+ __func__, wq->name);
	4444	+ show_pwq(pwq);
	4445	+ raw_spin_unlock_irq(&pwq->pool->lock);
4239	4446	mutex_unlock(&wq->mutex);
	4447	+ mutex_unlock(&wq_pool_mutex);
4240	4448	show_workqueue_state();
4241	4449	return;
4242	4450	}
	4451	+ raw_spin_unlock_irq(&pwq->pool->lock);
4243	4452	}
4244	4453	mutex_unlock(&wq->mutex);
4245	4454
..	..	@@ -4247,11 +4456,11 @@
4247	4456	* wq list is used to freeze wq, remove from list after
4248	4457	* flushing is complete in case freeze races us.
4249	4458	*/
4250		- mutex_lock(&wq_pool_mutex);
4251	4459	list_del_rcu(&wq->list);
4252	4460	mutex_unlock(&wq_pool_mutex);
4253	4461
4254	4462	if (!(wq->flags & WQ_UNBOUND)) {
	4463	+ wq_unregister_lockdep(wq);
4255	4464	/*
4256	4465	* The base ref is never dropped on per-cpu pwqs. Directly
4257	4466	* schedule RCU free.
..	..	@@ -4477,14 +4686,14 @@
4477	4686	* Carefully copy the associated workqueue's workfn, name and desc.
4478	4687	* Keep the original last '\0' in case the original is garbage.
4479	4688	*/
4480		- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4481		- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4482		- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4483		- probe_kernel_read(name, wq->name, sizeof(name) - 1);
4484		- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
	4689	+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
	4690	+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
	4691	+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
	4692	+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
	4693	+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
4485	4694
4486	4695	if (fn \|\| name[0] \|\| desc[0]) {
4487		- printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
	4696	+ printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
4488	4697	if (strcmp(name, desc))
4489	4698	pr_cont(" (%s)", desc);
4490	4699	pr_cont("\n");
..	..	@@ -4509,7 +4718,7 @@
4509	4718	pr_cont("%s BAR(%d)", comma ? "," : "",
4510	4719	task_pid_nr(barr->task));
4511	4720	} else {
4512		- pr_cont("%s %pf", comma ? "," : "", work->func);
	4721	+ pr_cont("%s %ps", comma ? "," : "", work->func);
4513	4722	}
4514	4723	}
4515	4724
..	..	@@ -4542,9 +4751,9 @@
4542	4751	if (worker->current_pwq != pwq)
4543	4752	continue;
4544	4753
4545		- pr_cont("%s %d%s:%pf", comma ? "," : "",
	4754	+ pr_cont("%s %d%s:%ps", comma ? "," : "",
4546	4755	task_pid_nr(worker->task),
4547		- worker == pwq->wq->rescuer ? "(RESCUER)" : "",
	4756	+ worker->rescue_wq ? "(RESCUER)" : "",
4548	4757	worker->current_func);
4549	4758	list_for_each_entry(work, &worker->scheduled, entry)
4550	4759	pr_cont_work(false, work);
..	..	@@ -4704,6 +4913,7 @@
4704	4913
4705	4914	mutex_unlock(&wq_pool_attach_mutex);
4706	4915	}
	4916	+EXPORT_SYMBOL_GPL(wq_worker_comm);
4707	4917
4708	4918	#ifdef CONFIG_SMP
4709	4919
..	..	@@ -4827,7 +5037,7 @@
4827	5037	*
4828	5038	* WRITE_ONCE() is necessary because @worker->flags may be
4829	5039	* tested without holding any lock in
4830		- * wq_worker_waking_up(). Without it, NOT_RUNNING test may
	5040	+ * wq_worker_running(). Without it, NOT_RUNNING test may
4831	5041	* fail incorrectly leading to premature concurrency
4832	5042	* management operations.
4833	5043	*/
..	..	@@ -5642,6 +5852,7 @@
5642	5852	pr_cont_pool_info(pool);
5643	5853	pr_cont(" stuck for %us!\n",
5644	5854	jiffies_to_msecs(now - pool_ts) / 1000);
	5855	+ trace_android_vh_wq_lockup_pool(pool->cpu, pool_ts);
5645	5856	}
5646	5857	}
5647	5858
..	..	@@ -5725,6 +5936,13 @@
5725	5936	return;
5726	5937	}
5727	5938
	5939	+ for_each_possible_cpu(cpu) {
	5940	+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
	5941	+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
	5942	+ return;
	5943	+ }
	5944	+ }
	5945	+
5728	5946	wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
5729	5947	BUG_ON(!wq_update_unbound_numa_attrs_buf);
5730	5948
..	..	@@ -5742,11 +5960,6 @@
5742	5960
5743	5961	for_each_possible_cpu(cpu) {
5744	5962	node = cpu_to_node(cpu);
5745		- if (WARN_ON(node == NUMA_NO_NODE)) {
5746		- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5747		- /* happens iff arch is bonkers, let's just proceed */
5748		- return;
5749		- }
5750	5963	cpumask_set_cpu(cpu, tbl[node]);
5751	5964	}
5752	5965
..	..	@@ -5764,13 +5977,13 @@
5764	5977	* items. Actual work item execution starts only after kthreads can be
5765	5978	* created and scheduled right before early initcalls.
5766	5979	*/
5767		-int __init workqueue_init_early(void)
	5980	+void __init workqueue_init_early(void)
5768	5981	{
5769	5982	int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5770	5983	int hk_flags = HK_FLAG_DOMAIN \| HK_FLAG_WQ;
5771	5984	int i, cpu;
5772	5985
5773		- WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
	5986	+ BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5774	5987
5775	5988	BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5776	5989	cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
..	..	@@ -5831,8 +6044,6 @@
5831	6044	!system_unbound_wq \|\| !system_freezable_wq \|\|
5832	6045	!system_power_efficient_wq \|\|
5833	6046	!system_freezable_power_efficient_wq);
5834		-
5835		- return 0;
5836	6047	}
5837	6048
5838	6049	/**
..	..	@@ -5844,7 +6055,7 @@
5844	6055	* are no kworkers executing the work items yet. Populate the worker pools
5845	6056	* with the initial workers and enable future kworker creations.
5846	6057	*/
5847		-int __init workqueue_init(void)
	6058	+void __init workqueue_init(void)
5848	6059	{
5849	6060	struct workqueue_struct *wq;
5850	6061	struct worker_pool *pool;
..	..	@@ -5891,6 +6102,4 @@
5891	6102
5892	6103	wq_online = true;
5893	6104	wq_watchdog_init();
5894		-
5895		- return 0;
5896	6105	}