~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,63 +1,17 @@
	1	+/* SPDX-License-Identifier: GPL-2.0+ */
1	2	/*
2	3	* Read-Copy Update mechanism for mutual exclusion (tree-based version)
3	4	* Internal non-public definitions that provide either classic
4	5	* or preemptible semantics.
5	6	*
6		- * This program is free software; you can redistribute it and/or modify
7		- * it under the terms of the GNU General Public License as published by
8		- * the Free Software Foundation; either version 2 of the License, or
9		- * (at your option) any later version.
10		- *
11		- * This program is distributed in the hope that it will be useful,
12		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		- * GNU General Public License for more details.
15		- *
16		- * You should have received a copy of the GNU General Public License
17		- * along with this program; if not, you can access it online at
18		- * http://www.gnu.org/licenses/gpl-2.0.html.
19		- *
20	7	* Copyright Red Hat, 2009
21	8	* Copyright IBM Corporation, 2009
22	9	*
23	10	* Author: Ingo Molnar <mingo@elte.hu>
24		- * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	11	+ * Paul E. McKenney <paulmck@linux.ibm.com>
25	12	*/
26		-
27		-#include <linux/delay.h>
28		-#include <linux/gfp.h>
29		-#include <linux/oom.h>
30		-#include <linux/sched/debug.h>
31		-#include <linux/smpboot.h>
32		-#include <linux/sched/isolation.h>
33		-#include <uapi/linux/sched/types.h>
34		-#include "../time/tick-internal.h"
35		-
36		-#ifdef CONFIG_RCU_BOOST
37	13
38	14	#include "../locking/rtmutex_common.h"
39		-
40		-/*
41		- * Control variables for per-CPU and per-rcu_node kthreads. These
42		- * handle all flavors of RCU.
43		- */
44		-static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
45		-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
46		-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
47		-DEFINE_PER_CPU(char, rcu_cpu_has_work);
48		-
49		-#else /* #ifdef CONFIG_RCU_BOOST */
50		-
51		-/*
52		- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
53		- * all uses are in dead code. Provide a definition to keep the compiler
54		- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
55		- * This probably needs to be excluded from -rt builds.
56		- */
57		-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
58		-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
59		-
60		-#endif /* #else #ifdef CONFIG_RCU_BOOST */
61	15
62	16	#ifdef CONFIG_RCU_NOCB_CPU
63	17	static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
..	..	@@ -82,6 +36,8 @@
82	36	pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
83	37	if (IS_ENABLED(CONFIG_PROVE_RCU))
84	38	pr_info("\tRCU lockdep checking is enabled.\n");
	39	+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
	40	+ pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
85	41	if (RCU_NUM_LVLS >= 4)
86	42	pr_info("\tFour(or more)-level hierarchy is enabled.\n");
87	43	if (RCU_FANOUT_LEAF != 16)
..	..	@@ -102,10 +58,14 @@
102	58	pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
103	59	if (qlowmark != DEFAULT_RCU_QLOMARK)
104	60	pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
	61	+ if (qovld != DEFAULT_RCU_QOVLD)
	62	+ pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
105	63	if (jiffies_till_first_fqs != ULONG_MAX)
106	64	pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
107	65	if (jiffies_till_next_fqs != ULONG_MAX)
108	66	pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
	67	+ if (jiffies_till_sched_qs != ULONG_MAX)
	68	+ pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
109	69	if (rcu_kick_kthreads)
110	70	pr_info("\tKick kthreads if too-long grace period.\n");
111	71	if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
..	..	@@ -116,6 +76,8 @@
116	76	pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
117	77	if (gp_cleanup_delay)
118	78	pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
	79	+ if (!use_softirq)
	80	+ pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
119	81	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
120	82	pr_info("\tRCU debug extended QS entry/exit.\n");
121	83	rcupdate_announce_bootup_oddness();
..	..	@@ -123,12 +85,7 @@
123	85
124	86	#ifdef CONFIG_PREEMPT_RCU
125	87
126		-RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
127		-static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
128		-static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
129		-
130		-static void rcu_report_exp_rnp(struct rcu_state rsp, struct rcu_node rnp,
131		- bool wake);
	88	+static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
132	89	static void rcu_read_unlock_special(struct task_struct *t);
133	90
134	91	/*
..	..	@@ -271,7 +228,7 @@
271	228	WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
272	229	}
273	230	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
274		- rnp->exp_tasks = &t->rcu_node_entry;
	231	+ WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
275	232	WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
276	233	!(rnp->qsmask & rdp->grpmask));
277	234	WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
..	..	@@ -284,13 +241,10 @@
284	241	* no need to check for a subsequent expedited GP. (Though we are
285	242	* still in a quiescent state in any case.)
286	243	*/
287		- if (blkd_state & RCU_EXP_BLKD &&
288		- t->rcu_read_unlock_special.b.exp_need_qs) {
289		- t->rcu_read_unlock_special.b.exp_need_qs = false;
290		- rcu_report_exp_rdp(rdp->rsp, rdp, true);
291		- } else {
292		- WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
293		- }
	244	+ if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
	245	+ rcu_report_exp_rdp(rdp);
	246	+ else
	247	+ WARN_ON_ONCE(rdp->exp_deferred_qs);
294	248	}
295	249
296	250	/*
..	..	@@ -306,16 +260,16 @@
306	260	*
307	261	* Callers to this function must disable preemption.
308	262	*/
309		-static void rcu_preempt_qs(void)
	263	+static void rcu_qs(void)
310	264	{
311		- RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
312		- if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
	265	+ RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
	266	+ if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
313	267	trace_rcu_grace_period(TPS("rcu_preempt"),
314		- __this_cpu_read(rcu_data_p->gp_seq),
	268	+ __this_cpu_read(rcu_data.gp_seq),
315	269	TPS("cpuqs"));
316		- __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
317		- barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
318		- current->rcu_read_unlock_special.b.need_qs = false;
	270	+ __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
	271	+ barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
	272	+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
319	273	}
320	274	}
321	275
..	..	@@ -332,19 +286,19 @@
332	286	*
333	287	* Caller must disable interrupts.
334	288	*/
335		-static void rcu_preempt_note_context_switch(bool preempt)
	289	+void rcu_note_context_switch(bool preempt)
336	290	{
337	291	struct task_struct *t = current;
338		- struct rcu_data *rdp;
	292	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
339	293	struct rcu_node *rnp;
340	294
	295	+ trace_rcu_utilization(TPS("Start context switch"));
341	296	lockdep_assert_irqs_disabled();
342		- WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
343		- if (t->rcu_read_lock_nesting > 0 &&
	297	+ WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);
	298	+ if (rcu_preempt_depth() > 0 &&
344	299	!t->rcu_read_unlock_special.b.blocked) {
345	300
346	301	/* Possibly blocking in an RCU read-side critical section. */
347		- rdp = this_cpu_ptr(rcu_state_p->rda);
348	302	rnp = rdp->mynode;
349	303	raw_spin_lock_rcu_node(rnp);
350	304	t->rcu_read_unlock_special.b.blocked = true;
..	..	@@ -357,20 +311,14 @@
357	311	*/
358	312	WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
359	313	WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
360		- trace_rcu_preempt_task(rdp->rsp->name,
	314	+ trace_rcu_preempt_task(rcu_state.name,
361	315	t->pid,
362	316	(rnp->qsmask & rdp->grpmask)
363	317	? rnp->gp_seq
364	318	: rcu_seq_snap(&rnp->gp_seq));
365	319	rcu_preempt_ctxt_queue(rnp, rdp);
366		- } else if (t->rcu_read_lock_nesting < 0 &&
367		- t->rcu_read_unlock_special.s) {
368		-
369		- /*
370		- * Complete exit from RCU read-side critical section on
371		- * behalf of preempted instance of __rcu_read_unlock().
372		- */
373		- rcu_read_unlock_special(t);
	320	+ } else {
	321	+ rcu_preempt_deferred_qs(t);
374	322	}
375	323
376	324	/*
..	..	@@ -382,8 +330,13 @@
382	330	* grace period, then the fact that the task has been enqueued
383	331	* means that we continue to block the current grace period.
384	332	*/
385		- rcu_preempt_qs();
	333	+ rcu_qs();
	334	+ if (rdp->exp_deferred_qs)
	335	+ rcu_report_exp_rdp(rdp);
	336	+ rcu_tasks_qs(current, preempt);
	337	+ trace_rcu_utilization(TPS("End context switch"));
386	338	}
	339	+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
387	340
388	341	/*
389	342	* Check for preempted RCU readers blocking the current grace period
..	..	@@ -395,6 +348,24 @@
395	348	return READ_ONCE(rnp->gp_tasks) != NULL;
396	349	}
397	350
	351	+/* limit value for ->rcu_read_lock_nesting. */
	352	+#define RCU_NEST_PMAX (INT_MAX / 2)
	353	+
	354	+static void rcu_preempt_read_enter(void)
	355	+{
	356	+ current->rcu_read_lock_nesting++;
	357	+}
	358	+
	359	+static int rcu_preempt_read_exit(void)
	360	+{
	361	+ return --current->rcu_read_lock_nesting;
	362	+}
	363	+
	364	+static void rcu_preempt_depth_set(int val)
	365	+{
	366	+ current->rcu_read_lock_nesting = val;
	367	+}
	368	+
398	369	/*
399	370	* Preemptible RCU implementation for rcu_read_lock().
400	371	* Just increment ->rcu_read_lock_nesting, shared state will be updated
..	..	@@ -402,7 +373,11 @@
402	373	*/
403	374	void __rcu_read_lock(void)
404	375	{
405		- current->rcu_read_lock_nesting++;
	376	+ rcu_preempt_read_enter();
	377	+ if (IS_ENABLED(CONFIG_PROVE_LOCKING))
	378	+ WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
	379	+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
	380	+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
406	381	barrier(); /* critical section after entry code. */
407	382	}
408	383	EXPORT_SYMBOL_GPL(__rcu_read_lock);
..	..	@@ -418,24 +393,16 @@
418	393	{
419	394	struct task_struct *t = current;
420	395
421		- if (t->rcu_read_lock_nesting != 1) {
422		- --t->rcu_read_lock_nesting;
423		- } else {
	396	+ if (rcu_preempt_read_exit() == 0) {
424	397	barrier(); /* critical section before exit code. */
425		- t->rcu_read_lock_nesting = INT_MIN;
426		- barrier(); /* assign before ->rcu_read_unlock_special load */
427	398	if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
428	399	rcu_read_unlock_special(t);
429		- barrier(); /* ->rcu_read_unlock_special load before assign */
430		- t->rcu_read_lock_nesting = 0;
431	400	}
432		-#ifdef CONFIG_PROVE_LOCKING
433		- {
434		- int rrln = READ_ONCE(t->rcu_read_lock_nesting);
	401	+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
	402	+ int rrln = rcu_preempt_depth();
435	403
436		- WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
	404	+ WARN_ON_ONCE(rrln < 0 \|\| rrln > RCU_NEST_PMAX);
437	405	}
438		-#endif /* #ifdef CONFIG_PROVE_LOCKING */
439	406	}
440	407	EXPORT_SYMBOL_GPL(__rcu_read_unlock);
441	408
..	..	@@ -464,27 +431,21 @@
464	431	}
465	432
466	433	/*
467		- * Handle special cases during rcu_read_unlock(), such as needing to
468		- * notify RCU core processing or task having blocked during the RCU
469		- * read-side critical section.
	434	+ * Report deferred quiescent states. The deferral time can
	435	+ * be quite short, for example, in the case of the call from
	436	+ * rcu_read_unlock_special().
470	437	*/
471		-static void rcu_read_unlock_special(struct task_struct *t)
	438	+static void
	439	+rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
472	440	{
473	441	bool empty_exp;
474	442	bool empty_norm;
475	443	bool empty_exp_now;
476		- unsigned long flags;
477	444	struct list_head *np;
478	445	bool drop_boost_mutex = false;
479	446	struct rcu_data *rdp;
480	447	struct rcu_node *rnp;
481	448	union rcu_special special;
482		-
483		- /* NMI handlers cannot block and cannot safely manipulate state. */
484		- if (in_nmi())
485		- return;
486		-
487		- local_irq_save(flags);
488	449
489	450	/*
490	451	* If RCU core is waiting for this CPU to exit its critical section,
..	..	@@ -492,49 +453,32 @@
492	453	* t->rcu_read_unlock_special cannot change.
493	454	*/
494	455	special = t->rcu_read_unlock_special;
	456	+ rdp = this_cpu_ptr(&rcu_data);
	457	+ if (!special.s && !rdp->exp_deferred_qs) {
	458	+ local_irq_restore(flags);
	459	+ return;
	460	+ }
	461	+ t->rcu_read_unlock_special.s = 0;
495	462	if (special.b.need_qs) {
496		- rcu_preempt_qs();
497		- t->rcu_read_unlock_special.b.need_qs = false;
498		- if (!t->rcu_read_unlock_special.s) {
499		- local_irq_restore(flags);
500		- return;
	463	+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
	464	+ rcu_report_qs_rdp(rdp);
	465	+ udelay(rcu_unlock_delay);
	466	+ } else {
	467	+ rcu_qs();
501	468	}
502	469	}
503	470
504	471	/*
505		- * Respond to a request for an expedited grace period, but only if
506		- * we were not preempted, meaning that we were running on the same
507		- * CPU throughout. If we were preempted, the exp_need_qs flag
508		- * would have been cleared at the time of the first preemption,
509		- * and the quiescent state would be reported when we were dequeued.
	472	+ * Respond to a request by an expedited grace period for a
	473	+ * quiescent state from this CPU. Note that requests from
	474	+ * tasks are handled when removing the task from the
	475	+ * blocked-tasks list below.
510	476	*/
511		- if (special.b.exp_need_qs) {
512		- WARN_ON_ONCE(special.b.blocked);
513		- t->rcu_read_unlock_special.b.exp_need_qs = false;
514		- rdp = this_cpu_ptr(rcu_state_p->rda);
515		- rcu_report_exp_rdp(rcu_state_p, rdp, true);
516		- if (!t->rcu_read_unlock_special.s) {
517		- local_irq_restore(flags);
518		- return;
519		- }
520		- }
521		-
522		- /* Hardware IRQ handlers cannot block, complain if they get here. */
523		- if (in_irq() \|\| in_serving_softirq()) {
524		- lockdep_rcu_suspicious(__FILE__, __LINE__,
525		- "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
526		- pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
527		- t->rcu_read_unlock_special.s,
528		- t->rcu_read_unlock_special.b.blocked,
529		- t->rcu_read_unlock_special.b.exp_need_qs,
530		- t->rcu_read_unlock_special.b.need_qs);
531		- local_irq_restore(flags);
532		- return;
533		- }
	477	+ if (rdp->exp_deferred_qs)
	478	+ rcu_report_exp_rdp(rdp);
534	479
535	480	/* Clean up if blocked during RCU read-side critical section. */
536	481	if (special.b.blocked) {
537		- t->rcu_read_unlock_special.b.blocked = false;
538	482
539	483	/*
540	484	* Remove this task from the list it blocked on. The task
..	..	@@ -549,7 +493,7 @@
549	493	empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
550	494	WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
551	495	(!empty_norm \|\| rnp->qsmask));
552		- empty_exp = sync_rcu_preempt_exp_done(rnp);
	496	+ empty_exp = sync_rcu_exp_done(rnp);
553	497	smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
554	498	np = rcu_next_node_entry(t, rnp);
555	499	list_del_init(&t->rcu_node_entry);
..	..	@@ -559,12 +503,12 @@
559	503	if (&t->rcu_node_entry == rnp->gp_tasks)
560	504	WRITE_ONCE(rnp->gp_tasks, np);
561	505	if (&t->rcu_node_entry == rnp->exp_tasks)
562		- rnp->exp_tasks = np;
	506	+ WRITE_ONCE(rnp->exp_tasks, np);
563	507	if (IS_ENABLED(CONFIG_RCU_BOOST)) {
564	508	/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
565	509	drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
566	510	if (&t->rcu_node_entry == rnp->boost_tasks)
567		- rnp->boost_tasks = np;
	511	+ WRITE_ONCE(rnp->boost_tasks, np);
568	512	}
569	513
570	514	/*
..	..	@@ -573,7 +517,7 @@
573	517	* Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
574	518	* so we must take a snapshot of the expedited state.
575	519	*/
576		- empty_exp_now = sync_rcu_preempt_exp_done(rnp);
	520	+ empty_exp_now = sync_rcu_exp_done(rnp);
577	521	if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
578	522	trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
579	523	rnp->gp_seq,
..	..	@@ -582,138 +526,141 @@
582	526	rnp->grplo,
583	527	rnp->grphi,
584	528	!!rnp->gp_tasks);
585		- rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
	529	+ rcu_report_unblock_qs_rnp(rnp, flags);
586	530	} else {
587	531	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
588	532	}
589		-
590		- /* Unboost if we were boosted. */
591		- if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
592		- rt_mutex_futex_unlock(&rnp->boost_mtx);
593	533
594	534	/*
595	535	* If this was the last task on the expedited lists,
596	536	* then we need to report up the rcu_node hierarchy.
597	537	*/
598	538	if (!empty_exp && empty_exp_now)
599		- rcu_report_exp_rnp(rcu_state_p, rnp, true);
	539	+ rcu_report_exp_rnp(rnp, true);
	540	+
	541	+ /* Unboost if we were boosted. */
	542	+ if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
	543	+ rt_mutex_futex_unlock(&rnp->boost_mtx);
	544	+
600	545	} else {
601	546	local_irq_restore(flags);
602	547	}
603	548	}
604	549
605	550	/*
606		- * Dump detailed information for all tasks blocking the current RCU
607		- * grace period on the specified rcu_node structure.
	551	+ * Is a deferred quiescent-state pending, and are we also not in
	552	+ * an RCU read-side critical section? It is the caller's responsibility
	553	+ * to ensure it is otherwise safe to report any deferred quiescent
	554	+ * states. The reason for this is that it is safe to report a
	555	+ * quiescent state during context switch even though preemption
	556	+ * is disabled. This function cannot be expected to understand these
	557	+ * nuances, so the caller must handle them.
608	558	*/
609		-static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
	559	+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
	560	+{
	561	+ return (__this_cpu_read(rcu_data.exp_deferred_qs) \|\|
	562	+ READ_ONCE(t->rcu_read_unlock_special.s)) &&
	563	+ rcu_preempt_depth() == 0;
	564	+}
	565	+
	566	+/*
	567	+ * Report a deferred quiescent state if needed and safe to do so.
	568	+ * As with rcu_preempt_need_deferred_qs(), "safe" involves only
	569	+ * not being in an RCU read-side critical section. The caller must
	570	+ * evaluate safety in terms of interrupt, softirq, and preemption
	571	+ * disabling.
	572	+ */
	573	+static void rcu_preempt_deferred_qs(struct task_struct *t)
610	574	{
611	575	unsigned long flags;
612		- struct task_struct *t;
613	576
614		- raw_spin_lock_irqsave_rcu_node(rnp, flags);
615		- if (!rcu_preempt_blocked_readers_cgp(rnp)) {
616		- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
	577	+ if (!rcu_preempt_need_deferred_qs(t))
	578	+ return;
	579	+ local_irq_save(flags);
	580	+ rcu_preempt_deferred_qs_irqrestore(t, flags);
	581	+}
	582	+
	583	+/*
	584	+ * Minimal handler to give the scheduler a chance to re-evaluate.
	585	+ */
	586	+static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
	587	+{
	588	+ struct rcu_data *rdp;
	589	+
	590	+ rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
	591	+ rdp->defer_qs_iw_pending = false;
	592	+}
	593	+
	594	+/*
	595	+ * Handle special cases during rcu_read_unlock(), such as needing to
	596	+ * notify RCU core processing or task having blocked during the RCU
	597	+ * read-side critical section.
	598	+ */
	599	+static void rcu_read_unlock_special(struct task_struct *t)
	600	+{
	601	+ unsigned long flags;
	602	+ bool preempt_bh_were_disabled =
	603	+ !!(preempt_count() & (PREEMPT_MASK \| SOFTIRQ_MASK));
	604	+ bool irqs_were_disabled;
	605	+
	606	+ /* NMI handlers cannot block and cannot safely manipulate state. */
	607	+ if (in_nmi())
	608	+ return;
	609	+
	610	+ local_irq_save(flags);
	611	+ irqs_were_disabled = irqs_disabled_flags(flags);
	612	+ if (preempt_bh_were_disabled \|\| irqs_were_disabled) {
	613	+ bool exp;
	614	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
	615	+ struct rcu_node *rnp = rdp->mynode;
	616	+
	617	+ exp = (t->rcu_blocked_node &&
	618	+ READ_ONCE(t->rcu_blocked_node->exp_tasks)) \|\|
	619	+ (rdp->grpmask & READ_ONCE(rnp->expmask));
	620	+ // Need to defer quiescent state until everything is enabled.
	621	+ if (use_softirq && (in_irq() \|\| (exp && !irqs_were_disabled))) {
	622	+ // Using softirq, safe to awaken, and either the
	623	+ // wakeup is free or there is an expedited GP.
	624	+ raise_softirq_irqoff(RCU_SOFTIRQ);
	625	+ } else {
	626	+ // Enabling BH or preempt does reschedule, so...
	627	+ // Also if no expediting, slow is OK.
	628	+ // Plus nohz_full CPUs eventually get tick enabled.
	629	+ set_tsk_need_resched(current);
	630	+ set_preempt_need_resched();
	631	+ if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
	632	+ !rdp->defer_qs_iw_pending && exp && cpu_online(rdp->cpu)) {
	633	+ // Get scheduler to re-evaluate and call hooks.
	634	+ // If !IRQ_WORK, FQS scan will eventually IPI.
	635	+ init_irq_work(&rdp->defer_qs_iw,
	636	+ rcu_preempt_deferred_qs_handler);
	637	+ rdp->defer_qs_iw_pending = true;
	638	+ irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
	639	+ }
	640	+ }
	641	+ local_irq_restore(flags);
617	642	return;
618	643	}
619		- t = list_entry(rnp->gp_tasks->prev,
620		- struct task_struct, rcu_node_entry);
621		- list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
622		- /*
623		- * We could be printing a lot while holding a spinlock.
624		- * Avoid triggering hard lockup.
625		- */
626		- touch_nmi_watchdog();
627		- sched_show_task(t);
628		- }
629		- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
630		-}
631		-
632		-/*
633		- * Dump detailed information for all tasks blocking the current RCU
634		- * grace period.
635		- */
636		-static void rcu_print_detail_task_stall(struct rcu_state *rsp)
637		-{
638		- struct rcu_node *rnp = rcu_get_root(rsp);
639		-
640		- rcu_print_detail_task_stall_rnp(rnp);
641		- rcu_for_each_leaf_node(rsp, rnp)
642		- rcu_print_detail_task_stall_rnp(rnp);
643		-}
644		-
645		-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
646		-{
647		- pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
648		- rnp->level, rnp->grplo, rnp->grphi);
649		-}
650		-
651		-static void rcu_print_task_stall_end(void)
652		-{
653		- pr_cont("\n");
654		-}
655		-
656		-/*
657		- * Scan the current list of tasks blocked within RCU read-side critical
658		- * sections, printing out the tid of each.
659		- */
660		-static int rcu_print_task_stall(struct rcu_node *rnp)
661		-{
662		- struct task_struct *t;
663		- int ndetected = 0;
664		-
665		- if (!rcu_preempt_blocked_readers_cgp(rnp))
666		- return 0;
667		- rcu_print_task_stall_begin(rnp);
668		- t = list_entry(rnp->gp_tasks->prev,
669		- struct task_struct, rcu_node_entry);
670		- list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
671		- pr_cont(" P%d", t->pid);
672		- ndetected++;
673		- }
674		- rcu_print_task_stall_end();
675		- return ndetected;
676		-}
677		-
678		-/*
679		- * Scan the current list of tasks blocked within RCU read-side critical
680		- * sections, printing out the tid of each that is blocking the current
681		- * expedited grace period.
682		- */
683		-static int rcu_print_task_exp_stall(struct rcu_node *rnp)
684		-{
685		- struct task_struct *t;
686		- int ndetected = 0;
687		-
688		- if (!rnp->exp_tasks)
689		- return 0;
690		- t = list_entry(rnp->exp_tasks->prev,
691		- struct task_struct, rcu_node_entry);
692		- list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
693		- pr_cont(" P%d", t->pid);
694		- ndetected++;
695		- }
696		- return ndetected;
	644	+ rcu_preempt_deferred_qs_irqrestore(t, flags);
697	645	}
698	646
699	647	/*
700	648	* Check that the list of blocked tasks for the newly completed grace
701	649	* period is in fact empty. It is a serious bug to complete a grace
702	650	* period that still has RCU readers blocked! This function must be
703		- * invoked -before- updating this rnp's ->gp_seq, and the rnp's ->lock
704		- * must be held by the caller.
	651	+ * invoked -before- updating this rnp's ->gp_seq.
705	652	*
706	653	* Also, if there are blocked tasks on the list, they automatically
707	654	* block the newly created grace period, so set up ->gp_tasks accordingly.
708	655	*/
709		-static void
710		-rcu_preempt_check_blocked_tasks(struct rcu_state rsp, struct rcu_node rnp)
	656	+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
711	657	{
712	658	struct task_struct *t;
713	659
714	660	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
	661	+ raw_lockdep_assert_held_rcu_node(rnp);
715	662	if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
716		- dump_blkd_tasks(rsp, rnp, 10);
	663	+ dump_blkd_tasks(rnp, 10);
717	664	if (rcu_preempt_has_tasks(rnp) &&
718	665	(rnp->qsmaskinit \|\| rnp->wait_blkd_tasks)) {
719	666	WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
..	..	@@ -726,139 +673,67 @@
726	673	}
727	674
728	675	/*
729		- * Check for a quiescent state from the current CPU. When a task blocks,
730		- * the task is recorded in the corresponding CPU's rcu_node structure,
731		- * which is checked elsewhere.
732		- *
733		- * Caller must disable hard irqs.
	676	+ * Check for a quiescent state from the current CPU, including voluntary
	677	+ * context switches for Tasks RCU. When a task blocks, the task is
	678	+ * recorded in the corresponding CPU's rcu_node structure, which is checked
	679	+ * elsewhere, hence this function need only check for quiescent states
	680	+ * related to the current CPU, not to those related to tasks.
734	681	*/
735		-static void rcu_preempt_check_callbacks(void)
	682	+static void rcu_flavor_sched_clock_irq(int user)
736	683	{
737		- struct rcu_state *rsp = &rcu_preempt_state;
738	684	struct task_struct *t = current;
739	685
740		- if (t->rcu_read_lock_nesting == 0) {
741		- rcu_preempt_qs();
	686	+ lockdep_assert_irqs_disabled();
	687	+ if (user \|\| rcu_is_cpu_rrupt_from_idle()) {
	688	+ rcu_note_voluntary_context_switch(current);
	689	+ }
	690	+ if (rcu_preempt_depth() > 0 \|\|
	691	+ (preempt_count() & (PREEMPT_MASK \| SOFTIRQ_MASK))) {
	692	+ /* No QS, force context switch if deferred. */
	693	+ if (rcu_preempt_need_deferred_qs(t)) {
	694	+ set_tsk_need_resched(t);
	695	+ set_preempt_need_resched();
	696	+ }
	697	+ } else if (rcu_preempt_need_deferred_qs(t)) {
	698	+ rcu_preempt_deferred_qs(t); /* Report deferred QS. */
	699	+ return;
	700	+ } else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
	701	+ rcu_qs(); /* Report immediate QS. */
742	702	return;
743	703	}
744		- if (t->rcu_read_lock_nesting > 0 &&
745		- __this_cpu_read(rcu_data_p->core_needs_qs) &&
746		- __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) &&
	704	+
	705	+ /* If GP is oldish, ask for help from rcu_read_unlock_special(). */
	706	+ if (rcu_preempt_depth() > 0 &&
	707	+ __this_cpu_read(rcu_data.core_needs_qs) &&
	708	+ __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
747	709	!t->rcu_read_unlock_special.b.need_qs &&
748		- time_after(jiffies, rsp->gp_start + HZ))
	710	+ time_after(jiffies, rcu_state.gp_start + HZ))
749	711	t->rcu_read_unlock_special.b.need_qs = true;
750		-}
751		-
752		-/**
753		- * call_rcu() - Queue an RCU callback for invocation after a grace period.
754		- * @head: structure to be used for queueing the RCU updates.
755		- * @func: actual callback function to be invoked after the grace period
756		- *
757		- * The callback function will be invoked some time after a full grace
758		- * period elapses, in other words after all pre-existing RCU read-side
759		- * critical sections have completed. However, the callback function
760		- * might well execute concurrently with RCU read-side critical sections
761		- * that started after call_rcu() was invoked. RCU read-side critical
762		- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
763		- * and may be nested.
764		- *
765		- * Note that all CPUs must agree that the grace period extended beyond
766		- * all pre-existing RCU read-side critical section. On systems with more
767		- * than one CPU, this means that when "func()" is invoked, each CPU is
768		- * guaranteed to have executed a full memory barrier since the end of its
769		- * last RCU read-side critical section whose beginning preceded the call
770		- * to call_rcu(). It also means that each CPU executing an RCU read-side
771		- * critical section that continues beyond the start of "func()" must have
772		- * executed a memory barrier after the call_rcu() but before the beginning
773		- * of that RCU read-side critical section. Note that these guarantees
774		- * include CPUs that are offline, idle, or executing in user mode, as
775		- * well as CPUs that are executing in the kernel.
776		- *
777		- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
778		- * resulting RCU callback function "func()", then both CPU A and CPU B are
779		- * guaranteed to execute a full memory barrier during the time interval
780		- * between the call to call_rcu() and the invocation of "func()" -- even
781		- * if CPU A and CPU B are the same CPU (but again only if the system has
782		- * more than one CPU).
783		- */
784		-void call_rcu(struct rcu_head *head, rcu_callback_t func)
785		-{
786		- __call_rcu(head, func, rcu_state_p, -1, 0);
787		-}
788		-EXPORT_SYMBOL_GPL(call_rcu);
789		-
790		-/**
791		- * synchronize_rcu - wait until a grace period has elapsed.
792		- *
793		- * Control will return to the caller some time after a full grace
794		- * period has elapsed, in other words after all currently executing RCU
795		- * read-side critical sections have completed. Note, however, that
796		- * upon return from synchronize_rcu(), the caller might well be executing
797		- * concurrently with new RCU read-side critical sections that began while
798		- * synchronize_rcu() was waiting. RCU read-side critical sections are
799		- * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
800		- *
801		- * See the description of synchronize_sched() for more detailed
802		- * information on memory-ordering guarantees. However, please note
803		- * that -only- the memory-ordering guarantees apply. For example,
804		- * synchronize_rcu() is -not- guaranteed to wait on things like code
805		- * protected by preempt_disable(), instead, synchronize_rcu() is -only-
806		- * guaranteed to wait on RCU read-side critical sections, that is, sections
807		- * of code protected by rcu_read_lock().
808		- */
809		-void synchronize_rcu(void)
810		-{
811		- RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) \|\|
812		- lock_is_held(&rcu_lock_map) \|\|
813		- lock_is_held(&rcu_sched_lock_map),
814		- "Illegal synchronize_rcu() in RCU read-side critical section");
815		- if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
816		- return;
817		- if (rcu_gp_is_expedited())
818		- synchronize_rcu_expedited();
819		- else
820		- wait_rcu_gp(call_rcu);
821		-}
822		-EXPORT_SYMBOL_GPL(synchronize_rcu);
823		-
824		-/**
825		- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
826		- *
827		- * Note that this primitive does not necessarily wait for an RCU grace period
828		- * to complete. For example, if there are no RCU callbacks queued anywhere
829		- * in the system, then rcu_barrier() is within its rights to return
830		- * immediately, without waiting for anything, much less an RCU grace period.
831		- */
832		-void rcu_barrier(void)
833		-{
834		- _rcu_barrier(rcu_state_p);
835		-}
836		-EXPORT_SYMBOL_GPL(rcu_barrier);
837		-
838		-/*
839		- * Initialize preemptible RCU's state structures.
840		- */
841		-static void __init __rcu_init_preempt(void)
842		-{
843		- rcu_init_one(rcu_state_p);
844	712	}
845	713
846	714	/*
847	715	* Check for a task exiting while in a preemptible-RCU read-side
848		- * critical section, clean up if so. No need to issue warnings,
849		- * as debug_check_no_locks_held() already does this if lockdep
850		- * is enabled.
	716	+ * critical section, clean up if so. No need to issue warnings, as
	717	+ * debug_check_no_locks_held() already does this if lockdep is enabled.
	718	+ * Besides, if this function does anything other than just immediately
	719	+ * return, there was a bug of some sort. Spewing warnings from this
	720	+ * function is like as not to simply obscure important prior warnings.
851	721	*/
852	722	void exit_rcu(void)
853	723	{
854	724	struct task_struct *t = current;
855	725
856		- if (likely(list_empty(&current->rcu_node_entry)))
	726	+ if (unlikely(!list_empty(&current->rcu_node_entry))) {
	727	+ rcu_preempt_depth_set(1);
	728	+ barrier();
	729	+ WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
	730	+ } else if (unlikely(rcu_preempt_depth())) {
	731	+ rcu_preempt_depth_set(1);
	732	+ } else {
857	733	return;
858		- t->rcu_read_lock_nesting = 1;
859		- barrier();
860		- t->rcu_read_unlock_special.b.blocked = true;
	734	+ }
861	735	__rcu_read_unlock();
	736	+ rcu_preempt_deferred_qs(current);
862	737	}
863	738
864	739	/*
..	..	@@ -866,7 +741,7 @@
866	741	* specified number of elements.
867	742	*/
868	743	static void
869		-dump_blkd_tasks(struct rcu_state rsp, struct rcu_node rnp, int ncheck)
	744	+dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
870	745	{
871	746	int cpu;
872	747	int i;
..	..	@@ -878,23 +753,23 @@
878	753	raw_lockdep_assert_held_rcu_node(rnp);
879	754	pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
880	755	__func__, rnp->grplo, rnp->grphi, rnp->level,
881		- (long)rnp->gp_seq, (long)rnp->completedqs);
	756	+ (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
882	757	for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
883	758	pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
884	759	__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
885	760	pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
886		- __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
887		- rnp->exp_tasks);
	761	+ __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
	762	+ READ_ONCE(rnp->exp_tasks));
888	763	pr_info("%s: ->blkd_tasks", __func__);
889	764	i = 0;
890	765	list_for_each(lhp, &rnp->blkd_tasks) {
891	766	pr_cont(" %p", lhp);
892		- if (++i >= 10)
	767	+ if (++i >= ncheck)
893	768	break;
894	769	}
895	770	pr_cont("\n");
896	771	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
897		- rdp = per_cpu_ptr(rsp->rda, cpu);
	772	+ rdp = per_cpu_ptr(&rcu_data, cpu);
898	773	onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
899	774	pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
900	775	cpu, ".o"[onl],
..	..	@@ -905,7 +780,23 @@
905	780
906	781	#else /* #ifdef CONFIG_PREEMPT_RCU */
907	782
908		-static struct rcu_state *const rcu_state_p = &rcu_sched_state;
	783	+/*
	784	+ * If strict grace periods are enabled, and if the calling
	785	+ * __rcu_read_unlock() marks the beginning of a quiescent state, immediately
	786	+ * report that quiescent state and, if requested, spin for a bit.
	787	+ */
	788	+void rcu_read_unlock_strict(void)
	789	+{
	790	+ struct rcu_data *rdp;
	791	+
	792	+ if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) \|\|
	793	+ irqs_disabled() \|\| preempt_count() \|\| !rcu_state.gp_kthread)
	794	+ return;
	795	+ rdp = this_cpu_ptr(&rcu_data);
	796	+ rcu_report_qs_rdp(rdp);
	797	+ udelay(rcu_unlock_delay);
	798	+}
	799	+EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
909	800
910	801	/*
911	802	* Tell them what RCU they are running.
..	..	@@ -917,12 +808,73 @@
917	808	}
918	809
919	810	/*
920		- * Because preemptible RCU does not exist, we never have to check for
921		- * CPUs being in quiescent states.
	811	+ * Note a quiescent state for PREEMPTION=n. Because we do not need to know
	812	+ * how many quiescent states passed, just if there was at least one since
	813	+ * the start of the grace period, this just sets a flag. The caller must
	814	+ * have disabled preemption.
922	815	*/
923		-static void rcu_preempt_note_context_switch(bool preempt)
	816	+static void rcu_qs(void)
924	817	{
	818	+ RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
	819	+ if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
	820	+ return;
	821	+ trace_rcu_grace_period(TPS("rcu_sched"),
	822	+ __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
	823	+ __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
	824	+ if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
	825	+ return;
	826	+ __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
	827	+ rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
925	828	}
	829	+
	830	+/*
	831	+ * Register an urgently needed quiescent state. If there is an
	832	+ * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
	833	+ * dyntick-idle quiescent state visible to other CPUs, which will in
	834	+ * some cases serve for expedited as well as normal grace periods.
	835	+ * Either way, register a lightweight quiescent state.
	836	+ */
	837	+void rcu_all_qs(void)
	838	+{
	839	+ unsigned long flags;
	840	+
	841	+ if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
	842	+ return;
	843	+ preempt_disable();
	844	+ /* Load rcu_urgent_qs before other flags. */
	845	+ if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
	846	+ preempt_enable();
	847	+ return;
	848	+ }
	849	+ this_cpu_write(rcu_data.rcu_urgent_qs, false);
	850	+ if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {
	851	+ local_irq_save(flags);
	852	+ rcu_momentary_dyntick_idle();
	853	+ local_irq_restore(flags);
	854	+ }
	855	+ rcu_qs();
	856	+ preempt_enable();
	857	+}
	858	+EXPORT_SYMBOL_GPL(rcu_all_qs);
	859	+
	860	+/*
	861	+ * Note a PREEMPTION=n context switch. The caller must have disabled interrupts.
	862	+ */
	863	+void rcu_note_context_switch(bool preempt)
	864	+{
	865	+ trace_rcu_utilization(TPS("Start context switch"));
	866	+ rcu_qs();
	867	+ /* Load rcu_urgent_qs before other flags. */
	868	+ if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))
	869	+ goto out;
	870	+ this_cpu_write(rcu_data.rcu_urgent_qs, false);
	871	+ if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
	872	+ rcu_momentary_dyntick_idle();
	873	+ rcu_tasks_qs(current, preempt);
	874	+out:
	875	+ trace_rcu_utilization(TPS("End context switch"));
	876	+}
	877	+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
926	878
927	879	/*
928	880	* Because preemptible RCU does not exist, there are never any preempted
..	..	@@ -942,66 +894,47 @@
942	894	}
943	895
944	896	/*
945		- * Because preemptible RCU does not exist, we never have to check for
946		- * tasks blocked within RCU read-side critical sections.
	897	+ * Because there is no preemptible RCU, there can be no deferred quiescent
	898	+ * states.
947	899	*/
948		-static void rcu_print_detail_task_stall(struct rcu_state *rsp)
	900	+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
949	901	{
	902	+ return false;
950	903	}
951		-
952		-/*
953		- * Because preemptible RCU does not exist, we never have to check for
954		- * tasks blocked within RCU read-side critical sections.
955		- */
956		-static int rcu_print_task_stall(struct rcu_node *rnp)
957		-{
958		- return 0;
959		-}
960		-
961		-/*
962		- * Because preemptible RCU does not exist, we never have to check for
963		- * tasks blocked within RCU read-side critical sections that are
964		- * blocking the current expedited grace period.
965		- */
966		-static int rcu_print_task_exp_stall(struct rcu_node *rnp)
967		-{
968		- return 0;
969		-}
	904	+static void rcu_preempt_deferred_qs(struct task_struct *t) { }
970	905
971	906	/*
972	907	* Because there is no preemptible RCU, there can be no readers blocked,
973	908	* so there is no need to check for blocked tasks. So check only for
974	909	* bogus qsmask values.
975	910	*/
976		-static void
977		-rcu_preempt_check_blocked_tasks(struct rcu_state rsp, struct rcu_node rnp)
	911	+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
978	912	{
979	913	WARN_ON_ONCE(rnp->qsmask);
980	914	}
981	915
982	916	/*
983		- * Because preemptible RCU does not exist, it never has any callbacks
984		- * to check.
	917	+ * Check to see if this CPU is in a non-context-switch quiescent state,
	918	+ * namely user mode and idle loop.
985	919	*/
986		-static void rcu_preempt_check_callbacks(void)
	920	+static void rcu_flavor_sched_clock_irq(int user)
987	921	{
988		-}
	922	+ if (user \|\| rcu_is_cpu_rrupt_from_idle()) {
989	923
990		-/*
991		- * Because preemptible RCU does not exist, rcu_barrier() is just
992		- * another name for rcu_barrier_sched().
993		- */
994		-void rcu_barrier(void)
995		-{
996		- rcu_barrier_sched();
997		-}
998		-EXPORT_SYMBOL_GPL(rcu_barrier);
	924	+ /*
	925	+ * Get here if this CPU took its interrupt from user
	926	+ * mode or from the idle loop, and if this is not a
	927	+ * nested interrupt. In this case, the CPU is in
	928	+ * a quiescent state, so note it.
	929	+ *
	930	+ * No memory barrier is required here because rcu_qs()
	931	+ * references only CPU-local variables that other CPUs
	932	+ * neither access nor modify, at least not while the
	933	+ * corresponding CPU is online.
	934	+ */
999	935
1000		-/*
1001		- * Because preemptible RCU does not exist, it need not be initialized.
1002		- */
1003		-static void __init __rcu_init_preempt(void)
1004		-{
	936	+ rcu_qs();
	937	+ }
1005	938	}
1006	939
1007	940	/*
..	..	@@ -1016,24 +949,27 @@
1016	949	* Dump the guaranteed-empty blocked-tasks state. Trust but verify.
1017	950	*/
1018	951	static void
1019		-dump_blkd_tasks(struct rcu_state rsp, struct rcu_node rnp, int ncheck)
	952	+dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
1020	953	{
1021	954	WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
1022	955	}
1023	956
1024	957	#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
1025	958
1026		-#ifdef CONFIG_RCU_BOOST
1027		-
1028		-static void rcu_wake_cond(struct task_struct *t, int status)
	959	+/*
	960	+ * If boosting, set rcuc kthreads to realtime priority.
	961	+ */
	962	+static void rcu_cpu_kthread_setup(unsigned int cpu)
1029	963	{
1030		- /*
1031		- * If the thread is yielding, only wake it when this
1032		- * is invoked from idle
1033		- */
1034		- if (status != RCU_KTHREAD_YIELDING \|\| is_idle_task(current))
1035		- wake_up_process(t);
	964	+#ifdef CONFIG_RCU_BOOST
	965	+ struct sched_param sp;
	966	+
	967	+ sp.sched_priority = kthread_prio;
	968	+ sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
	969	+#endif /* #ifdef CONFIG_RCU_BOOST */
1036	970	}
	971	+
	972	+#ifdef CONFIG_RCU_BOOST
1037	973
1038	974	/*
1039	975	* Carry out RCU priority boosting on the task indicated by ->exp_tasks
..	..	@@ -1113,20 +1049,21 @@
1113	1049
1114	1050	trace_rcu_utilization(TPS("Start boost kthread@init"));
1115	1051	for (;;) {
1116		- rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
	1052	+ WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
1117	1053	trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
1118		- rcu_wait(rnp->boost_tasks \|\| rnp->exp_tasks);
	1054	+ rcu_wait(READ_ONCE(rnp->boost_tasks) \|\|
	1055	+ READ_ONCE(rnp->exp_tasks));
1119	1056	trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
1120		- rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
	1057	+ WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
1121	1058	more2boost = rcu_boost(rnp);
1122	1059	if (more2boost)
1123	1060	spincnt++;
1124	1061	else
1125	1062	spincnt = 0;
1126	1063	if (spincnt > 10) {
1127		- rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
	1064	+ WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
1128	1065	trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
1129		- schedule_timeout_interruptible(2);
	1066	+ schedule_timeout_idle(2);
1130	1067	trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
1131	1068	spincnt = 0;
1132	1069	}
..	..	@@ -1149,8 +1086,6 @@
1149	1086	static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1150	1087	__releases(rnp->lock)
1151	1088	{
1152		- struct task_struct *t;
1153		-
1154	1089	raw_lockdep_assert_held_rcu_node(rnp);
1155	1090	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1156	1091	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
..	..	@@ -1160,33 +1095,15 @@
1160	1095	(rnp->gp_tasks != NULL &&
1161	1096	rnp->boost_tasks == NULL &&
1162	1097	rnp->qsmask == 0 &&
1163		- ULONG_CMP_GE(jiffies, rnp->boost_time))) {
	1098	+ (!time_after(rnp->boost_time, jiffies) \|\| rcu_state.cbovld))) {
1164	1099	if (rnp->exp_tasks == NULL)
1165		- rnp->boost_tasks = rnp->gp_tasks;
	1100	+ WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
1166	1101	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1167		- t = rnp->boost_kthread_task;
1168		- if (t)
1169		- rcu_wake_cond(t, rnp->boost_kthread_status);
	1102	+ rcu_wake_cond(rnp->boost_kthread_task,
	1103	+ READ_ONCE(rnp->boost_kthread_status));
1170	1104	} else {
1171	1105	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1172	1106	}
1173		-}
1174		-
1175		-/*
1176		- * Wake up the per-CPU kthread to invoke RCU callbacks.
1177		- */
1178		-static void invoke_rcu_callbacks_kthread(void)
1179		-{
1180		- unsigned long flags;
1181		-
1182		- local_irq_save(flags);
1183		- __this_cpu_write(rcu_cpu_has_work, 1);
1184		- if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1185		- current != __this_cpu_read(rcu_cpu_kthread_task)) {
1186		- rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
1187		- __this_cpu_read(rcu_cpu_kthread_status));
1188		- }
1189		- local_irq_restore(flags);
1190	1107	}
1191	1108
1192	1109	/*
..	..	@@ -1195,7 +1112,7 @@
1195	1112	*/
1196	1113	static bool rcu_is_callbacks_kthread(void)
1197	1114	{
1198		- return __this_cpu_read(rcu_cpu_kthread_task) == current;
	1115	+ return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
1199	1116	}
1200	1117
1201	1118	#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
..	..	@@ -1213,95 +1130,35 @@
1213	1130	* already exist. We only create this kthread for preemptible RCU.
1214	1131	* Returns zero if all is well, a negated errno otherwise.
1215	1132	*/
1216		-static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1217		- struct rcu_node *rnp)
	1133	+static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1218	1134	{
1219		- int rnp_index = rnp - &rsp->node[0];
	1135	+ int rnp_index = rnp - rcu_get_root();
1220	1136	unsigned long flags;
1221	1137	struct sched_param sp;
1222	1138	struct task_struct *t;
1223	1139
1224		- if (rcu_state_p != rsp)
1225		- return 0;
	1140	+ if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
	1141	+ return;
1226	1142
1227	1143	if (!rcu_scheduler_fully_active \|\| rcu_rnp_online_cpus(rnp) == 0)
1228		- return 0;
	1144	+ return;
1229	1145
1230		- rsp->boost = 1;
	1146	+ rcu_state.boost = 1;
	1147	+
1231	1148	if (rnp->boost_kthread_task != NULL)
1232		- return 0;
	1149	+ return;
	1150	+
1233	1151	t = kthread_create(rcu_boost_kthread, (void *)rnp,
1234	1152	"rcub/%d", rnp_index);
1235		- if (IS_ERR(t))
1236		- return PTR_ERR(t);
	1153	+ if (WARN_ON_ONCE(IS_ERR(t)))
	1154	+ return;
	1155	+
1237	1156	raw_spin_lock_irqsave_rcu_node(rnp, flags);
1238	1157	rnp->boost_kthread_task = t;
1239	1158	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1240	1159	sp.sched_priority = kthread_prio;
1241	1160	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1242	1161	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1243		- return 0;
1244		-}
1245		-
1246		-static void rcu_kthread_do_work(void)
1247		-{
1248		- rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
1249		- rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
1250		- rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
1251		-}
1252		-
1253		-static void rcu_cpu_kthread_setup(unsigned int cpu)
1254		-{
1255		- struct sched_param sp;
1256		-
1257		- sp.sched_priority = kthread_prio;
1258		- sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1259		-}
1260		-
1261		-static void rcu_cpu_kthread_park(unsigned int cpu)
1262		-{
1263		- per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1264		-}
1265		-
1266		-static int rcu_cpu_kthread_should_run(unsigned int cpu)
1267		-{
1268		- return __this_cpu_read(rcu_cpu_has_work);
1269		-}
1270		-
1271		-/*
1272		- * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1273		- * RCU softirq used in flavors and configurations of RCU that do not
1274		- * support RCU priority boosting.
1275		- */
1276		-static void rcu_cpu_kthread(unsigned int cpu)
1277		-{
1278		- unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
1279		- char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
1280		- int spincnt;
1281		-
1282		- for (spincnt = 0; spincnt < 10; spincnt++) {
1283		- trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
1284		- local_bh_disable();
1285		- *statusp = RCU_KTHREAD_RUNNING;
1286		- this_cpu_inc(rcu_cpu_kthread_loops);
1287		- local_irq_disable();
1288		- work = *workp;
1289		- *workp = 0;
1290		- local_irq_enable();
1291		- if (work)
1292		- rcu_kthread_do_work();
1293		- local_bh_enable();
1294		- if (*workp == 0) {
1295		- trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
1296		- *statusp = RCU_KTHREAD_WAITING;
1297		- return;
1298		- }
1299		- }
1300		- *statusp = RCU_KTHREAD_YIELDING;
1301		- trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
1302		- schedule_timeout_interruptible(2);
1303		- trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
1304		- *statusp = RCU_KTHREAD_WAITING;
1305	1162	}
1306	1163
1307	1164	/*
..	..	@@ -1334,38 +1191,25 @@
1334	1191	free_cpumask_var(cm);
1335	1192	}
1336	1193
1337		-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1338		- .store = &rcu_cpu_kthread_task,
1339		- .thread_should_run = rcu_cpu_kthread_should_run,
1340		- .thread_fn = rcu_cpu_kthread,
1341		- .thread_comm = "rcuc/%u",
1342		- .setup = rcu_cpu_kthread_setup,
1343		- .park = rcu_cpu_kthread_park,
1344		-};
1345		-
1346	1194	/*
1347	1195	* Spawn boost kthreads -- called as soon as the scheduler is running.
1348	1196	*/
1349	1197	static void __init rcu_spawn_boost_kthreads(void)
1350	1198	{
1351	1199	struct rcu_node *rnp;
1352		- int cpu;
1353	1200
1354		- for_each_possible_cpu(cpu)
1355		- per_cpu(rcu_cpu_has_work, cpu) = 0;
1356		- BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1357		- rcu_for_each_leaf_node(rcu_state_p, rnp)
1358		- (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
	1201	+ rcu_for_each_leaf_node(rnp)
	1202	+ rcu_spawn_one_boost_kthread(rnp);
1359	1203	}
1360	1204
1361	1205	static void rcu_prepare_kthreads(int cpu)
1362	1206	{
1363		- struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
	1207	+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1364	1208	struct rcu_node *rnp = rdp->mynode;
1365	1209
1366	1210	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
1367	1211	if (rcu_scheduler_fully_active)
1368		- (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
	1212	+ rcu_spawn_one_boost_kthread(rnp);
1369	1213	}
1370	1214
1371	1215	#else /* #ifdef CONFIG_RCU_BOOST */
..	..	@@ -1374,11 +1218,6 @@
1374	1218	__releases(rnp->lock)
1375	1219	{
1376	1220	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1377		-}
1378		-
1379		-static void invoke_rcu_callbacks_kthread(void)
1380		-{
1381		- WARN_ON_ONCE(1);
1382	1221	}
1383	1222
1384	1223	static bool rcu_is_callbacks_kthread(void)
..	..	@@ -1407,18 +1246,19 @@
1407	1246	#if !defined(CONFIG_RCU_FAST_NO_HZ)
1408	1247
1409	1248	/*
1410		- * Check to see if any future RCU-related work will need to be done
1411		- * by the current CPU, even if none need be done immediately, returning
1412		- * 1 if so. This function is part of the RCU implementation; it is -not-
1413		- * an exported member of the RCU API.
	1249	+ * Check to see if any future non-offloaded RCU-related work will need
	1250	+ * to be done by the current CPU, even if none need be done immediately,
	1251	+ * returning 1 if so. This function is part of the RCU implementation;
	1252	+ * it is -not- an exported member of the RCU API.
1414	1253	*
1415		- * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1416		- * any flavor of RCU.
	1254	+ * Because we not have RCU_FAST_NO_HZ, just check whether or not this
	1255	+ * CPU has RCU callbacks queued.
1417	1256	*/
1418	1257	int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1419	1258	{
1420	1259	*nextevt = KTIME_MAX;
1421		- return rcu_cpu_has_callbacks(NULL);
	1260	+ return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
	1261	+ !rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist);
1422	1262	}
1423	1263
1424	1264	/*
..	..	@@ -1437,23 +1277,14 @@
1437	1277	{
1438	1278	}
1439	1279
1440		-/*
1441		- * Don't bother keeping a running count of the number of RCU callbacks
1442		- * posted because CONFIG_RCU_FAST_NO_HZ=n.
1443		- */
1444		-static void rcu_idle_count_callbacks_posted(void)
1445		-{
1446		-}
1447		-
1448	1280	#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1449	1281
1450	1282	/*
1451	1283	* This code is invoked when a CPU goes idle, at which point we want
1452	1284	* to have the CPU do everything required for RCU so that it can enter
1453		- * the energy-efficient dyntick-idle mode. This is handled by a
1454		- * state machine implemented by rcu_prepare_for_idle() below.
	1285	+ * the energy-efficient dyntick-idle mode.
1455	1286	*
1456		- * The following three proprocessor symbols control this state machine:
	1287	+ * The following preprocessor symbol controls this:
1457	1288	*
1458	1289	* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
1459	1290	* to sleep in dyntick-idle mode with RCU callbacks pending. This
..	..	@@ -1462,81 +1293,67 @@
1462	1293	* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
1463	1294	* system. And if you are -that- concerned about energy efficiency,
1464	1295	* just power the system down and be done with it!
1465		- * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
1466		- * permitted to sleep in dyntick-idle mode with only lazy RCU
1467		- * callbacks pending. Setting this too high can OOM your system.
1468	1296	*
1469		- * The values below work well in practice. If future workloads require
	1297	+ * The value below works well in practice. If future workloads require
1470	1298	* adjustment, they can be converted into kernel config parameters, though
1471	1299	* making the state machine smarter might be a better option.
1472	1300	*/
1473	1301	#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1474		-#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1475	1302
1476	1303	static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1477	1304	module_param(rcu_idle_gp_delay, int, 0644);
1478		-static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1479		-module_param(rcu_idle_lazy_gp_delay, int, 0644);
1480	1305
1481	1306	/*
1482		- * Try to advance callbacks for all flavors of RCU on the current CPU, but
1483		- * only if it has been awhile since the last time we did so. Afterwards,
1484		- * if there are any callbacks ready for immediate invocation, return true.
	1307	+ * Try to advance callbacks on the current CPU, but only if it has been
	1308	+ * awhile since the last time we did so. Afterwards, if there are any
	1309	+ * callbacks ready for immediate invocation, return true.
1485	1310	*/
1486	1311	static bool __maybe_unused rcu_try_advance_all_cbs(void)
1487	1312	{
1488	1313	bool cbs_ready = false;
1489		- struct rcu_data *rdp;
1490		- struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
	1314	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1491	1315	struct rcu_node *rnp;
1492		- struct rcu_state *rsp;
1493	1316
1494	1317	/* Exit early if we advanced recently. */
1495		- if (jiffies == rdtp->last_advance_all)
	1318	+ if (jiffies == rdp->last_advance_all)
1496	1319	return false;
1497		- rdtp->last_advance_all = jiffies;
	1320	+ rdp->last_advance_all = jiffies;
1498	1321
1499		- for_each_rcu_flavor(rsp) {
1500		- rdp = this_cpu_ptr(rsp->rda);
1501		- rnp = rdp->mynode;
	1322	+ rnp = rdp->mynode;
1502	1323
1503		- /*
1504		- * Don't bother checking unless a grace period has
1505		- * completed since we last checked and there are
1506		- * callbacks not yet ready to invoke.
1507		- */
1508		- if ((rcu_seq_completed_gp(rdp->gp_seq,
1509		- rcu_seq_current(&rnp->gp_seq)) \|\|
1510		- unlikely(READ_ONCE(rdp->gpwrap))) &&
1511		- rcu_segcblist_pend_cbs(&rdp->cblist))
1512		- note_gp_changes(rsp, rdp);
	1324	+ /*
	1325	+ * Don't bother checking unless a grace period has
	1326	+ * completed since we last checked and there are
	1327	+ * callbacks not yet ready to invoke.
	1328	+ */
	1329	+ if ((rcu_seq_completed_gp(rdp->gp_seq,
	1330	+ rcu_seq_current(&rnp->gp_seq)) \|\|
	1331	+ unlikely(READ_ONCE(rdp->gpwrap))) &&
	1332	+ rcu_segcblist_pend_cbs(&rdp->cblist))
	1333	+ note_gp_changes(rdp);
1513	1334
1514		- if (rcu_segcblist_ready_cbs(&rdp->cblist))
1515		- cbs_ready = true;
1516		- }
	1335	+ if (rcu_segcblist_ready_cbs(&rdp->cblist))
	1336	+ cbs_ready = true;
1517	1337	return cbs_ready;
1518	1338	}
1519	1339
1520	1340	/*
1521	1341	* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
1522	1342	* to invoke. If the CPU has callbacks, try to advance them. Tell the
1523		- * caller to set the timeout based on whether or not there are non-lazy
1524		- * callbacks.
	1343	+ * caller about what to set the timeout.
1525	1344	*
1526	1345	* The caller must have disabled interrupts.
1527	1346	*/
1528	1347	int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1529	1348	{
1530		- struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
	1349	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1531	1350	unsigned long dj;
1532	1351
1533	1352	lockdep_assert_irqs_disabled();
1534	1353
1535		- /* Snapshot to detect later posting of non-lazy callback. */
1536		- rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1537		-
1538		- /* If no callbacks, RCU doesn't need the CPU. */
1539		- if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) {
	1354	+ /* If no non-offloaded callbacks, RCU doesn't need the CPU. */
	1355	+ if (rcu_segcblist_empty(&rdp->cblist) \|\|
	1356	+ rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist)) {
1540	1357	*nextevt = KTIME_MAX;
1541	1358	return 0;
1542	1359	}
..	..	@@ -1547,83 +1364,59 @@
1547	1364	invoke_rcu_core();
1548	1365	return 1;
1549	1366	}
1550		- rdtp->last_accelerate = jiffies;
	1367	+ rdp->last_accelerate = jiffies;
1551	1368
1552		- /* Request timer delay depending on laziness, and round. */
1553		- if (!rdtp->all_lazy) {
1554		- dj = round_up(rcu_idle_gp_delay + jiffies,
1555		- rcu_idle_gp_delay) - jiffies;
1556		- } else {
1557		- dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
1558		- }
	1369	+ /* Request timer and round. */
	1370	+ dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
	1371	+
1559	1372	nextevt = basemono + dj TICK_NSEC;
1560	1373	return 0;
1561	1374	}
1562	1375
1563	1376	/*
1564		- * Prepare a CPU for idle from an RCU perspective. The first major task
1565		- * is to sense whether nohz mode has been enabled or disabled via sysfs.
1566		- * The second major task is to check to see if a non-lazy callback has
1567		- * arrived at a CPU that previously had only lazy callbacks. The third
1568		- * major task is to accelerate (that is, assign grace-period numbers to)
1569		- * any recently arrived callbacks.
	1377	+ * Prepare a CPU for idle from an RCU perspective. The first major task is to
	1378	+ * sense whether nohz mode has been enabled or disabled via sysfs. The second
	1379	+ * major task is to accelerate (that is, assign grace-period numbers to) any
	1380	+ * recently arrived callbacks.
1570	1381	*
1571	1382	* The caller must have disabled interrupts.
1572	1383	*/
1573	1384	static void rcu_prepare_for_idle(void)
1574	1385	{
1575	1386	bool needwake;
1576		- struct rcu_data *rdp;
1577		- struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
	1387	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1578	1388	struct rcu_node *rnp;
1579		- struct rcu_state *rsp;
1580	1389	int tne;
1581	1390
1582	1391	lockdep_assert_irqs_disabled();
1583		- if (rcu_is_nocb_cpu(smp_processor_id()))
	1392	+ if (rcu_segcblist_is_offloaded(&rdp->cblist))
1584	1393	return;
1585	1394
1586	1395	/* Handle nohz enablement switches conservatively. */
1587	1396	tne = READ_ONCE(tick_nohz_active);
1588		- if (tne != rdtp->tick_nohz_enabled_snap) {
1589		- if (rcu_cpu_has_callbacks(NULL))
	1397	+ if (tne != rdp->tick_nohz_enabled_snap) {
	1398	+ if (!rcu_segcblist_empty(&rdp->cblist))
1590	1399	invoke_rcu_core(); /* force nohz to see update. */
1591		- rdtp->tick_nohz_enabled_snap = tne;
	1400	+ rdp->tick_nohz_enabled_snap = tne;
1592	1401	return;
1593	1402	}
1594	1403	if (!tne)
1595	1404	return;
1596	1405
1597	1406	/*
1598		- * If a non-lazy callback arrived at a CPU having only lazy
1599		- * callbacks, invoke RCU core for the side-effect of recalculating
1600		- * idle duration on re-entry to idle.
1601		- */
1602		- if (rdtp->all_lazy &&
1603		- rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
1604		- rdtp->all_lazy = false;
1605		- rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1606		- invoke_rcu_core();
1607		- return;
1608		- }
1609		-
1610		- /*
1611	1407	* If we have not yet accelerated this jiffy, accelerate all
1612	1408	* callbacks on this CPU.
1613	1409	*/
1614		- if (rdtp->last_accelerate == jiffies)
	1410	+ if (rdp->last_accelerate == jiffies)
1615	1411	return;
1616		- rdtp->last_accelerate = jiffies;
1617		- for_each_rcu_flavor(rsp) {
1618		- rdp = this_cpu_ptr(rsp->rda);
1619		- if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1620		- continue;
	1412	+ rdp->last_accelerate = jiffies;
	1413	+ if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
1621	1414	rnp = rdp->mynode;
1622	1415	raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
1623		- needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
	1416	+ needwake = rcu_accelerate_cbs(rnp, rdp);
1624	1417	raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
1625	1418	if (needwake)
1626		- rcu_gp_kthread_wake(rsp);
	1419	+ rcu_gp_kthread_wake();
1627	1420	}
1628	1421	}
1629	1422
..	..	@@ -1634,240 +1427,58 @@
1634	1427	*/
1635	1428	static void rcu_cleanup_after_idle(void)
1636	1429	{
	1430	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
	1431	+
1637	1432	lockdep_assert_irqs_disabled();
1638		- if (rcu_is_nocb_cpu(smp_processor_id()))
	1433	+ if (rcu_segcblist_is_offloaded(&rdp->cblist))
1639	1434	return;
1640	1435	if (rcu_try_advance_all_cbs())
1641	1436	invoke_rcu_core();
1642	1437	}
1643	1438
1644		-/*
1645		- * Keep a running count of the number of non-lazy callbacks posted
1646		- * on this CPU. This running counter (which is never decremented) allows
1647		- * rcu_prepare_for_idle() to detect when something out of the idle loop
1648		- * posts a callback, even if an equal number of callbacks are invoked.
1649		- * Of course, callbacks should only be posted from within a trace event
1650		- * designed to be called from idle or from within RCU_NONIDLE().
1651		- */
1652		-static void rcu_idle_count_callbacks_posted(void)
1653		-{
1654		- __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
1655		-}
1656		-
1657		-/*
1658		- * Data for flushing lazy RCU callbacks at OOM time.
1659		- */
1660		-static atomic_t oom_callback_count;
1661		-static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
1662		-
1663		-/*
1664		- * RCU OOM callback -- decrement the outstanding count and deliver the
1665		- * wake-up if we are the last one.
1666		- */
1667		-static void rcu_oom_callback(struct rcu_head *rhp)
1668		-{
1669		- if (atomic_dec_and_test(&oom_callback_count))
1670		- wake_up(&oom_callback_wq);
1671		-}
1672		-
1673		-/*
1674		- * Post an rcu_oom_notify callback on the current CPU if it has at
1675		- * least one lazy callback. This will unnecessarily post callbacks
1676		- * to CPUs that already have a non-lazy callback at the end of their
1677		- * callback list, but this is an infrequent operation, so accept some
1678		- * extra overhead to keep things simple.
1679		- */
1680		-static void rcu_oom_notify_cpu(void *unused)
1681		-{
1682		- struct rcu_state *rsp;
1683		- struct rcu_data *rdp;
1684		-
1685		- for_each_rcu_flavor(rsp) {
1686		- rdp = raw_cpu_ptr(rsp->rda);
1687		- if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
1688		- atomic_inc(&oom_callback_count);
1689		- rsp->call(&rdp->oom_head, rcu_oom_callback);
1690		- }
1691		- }
1692		-}
1693		-
1694		-/*
1695		- * If low on memory, ensure that each CPU has a non-lazy callback.
1696		- * This will wake up CPUs that have only lazy callbacks, in turn
1697		- * ensuring that they free up the corresponding memory in a timely manner.
1698		- * Because an uncertain amount of memory will be freed in some uncertain
1699		- * timeframe, we do not claim to have freed anything.
1700		- */
1701		-static int rcu_oom_notify(struct notifier_block *self,
1702		- unsigned long notused, void *nfreed)
1703		-{
1704		- int cpu;
1705		-
1706		- /* Wait for callbacks from earlier instance to complete. */
1707		- wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
1708		- smp_mb(); /* Ensure callback reuse happens after callback invocation. */
1709		-
1710		- /*
1711		- * Prevent premature wakeup: ensure that all increments happen
1712		- * before there is a chance of the counter reaching zero.
1713		- */
1714		- atomic_set(&oom_callback_count, 1);
1715		-
1716		- for_each_online_cpu(cpu) {
1717		- smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1718		- cond_resched_tasks_rcu_qs();
1719		- }
1720		-
1721		- /* Unconditionally decrement: no need to wake ourselves up. */
1722		- atomic_dec(&oom_callback_count);
1723		-
1724		- return NOTIFY_OK;
1725		-}
1726		-
1727		-static struct notifier_block rcu_oom_nb = {
1728		- .notifier_call = rcu_oom_notify
1729		-};
1730		-
1731		-static int __init rcu_register_oom_notifier(void)
1732		-{
1733		- register_oom_notifier(&rcu_oom_nb);
1734		- return 0;
1735		-}
1736		-early_initcall(rcu_register_oom_notifier);
1737		-
1738	1439	#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1739		-
1740		-#ifdef CONFIG_RCU_FAST_NO_HZ
1741		-
1742		-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1743		-{
1744		- struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1745		- unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
1746		-
1747		- sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
1748		- rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
1749		- ulong2long(nlpd),
1750		- rdtp->all_lazy ? 'L' : '.',
1751		- rdtp->tick_nohz_enabled_snap ? '.' : 'D');
1752		-}
1753		-
1754		-#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
1755		-
1756		-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1757		-{
1758		- *cp = '\0';
1759		-}
1760		-
1761		-#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
1762		-
1763		-/* Initiate the stall-info list. */
1764		-static void print_cpu_stall_info_begin(void)
1765		-{
1766		- pr_cont("\n");
1767		-}
1768		-
1769		-/*
1770		- * Print out diagnostic information for the specified stalled CPU.
1771		- *
1772		- * If the specified CPU is aware of the current RCU grace period
1773		- * (flavor specified by rsp), then print the number of scheduling
1774		- * clock interrupts the CPU has taken during the time that it has
1775		- * been aware. Otherwise, print the number of RCU grace periods
1776		- * that this CPU is ignorant of, for example, "1" if the CPU was
1777		- * aware of the previous grace period.
1778		- *
1779		- * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
1780		- */
1781		-static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1782		-{
1783		- unsigned long delta;
1784		- char fast_no_hz[72];
1785		- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1786		- struct rcu_dynticks *rdtp = rdp->dynticks;
1787		- char *ticks_title;
1788		- unsigned long ticks_value;
1789		-
1790		- /*
1791		- * We could be printing a lot while holding a spinlock. Avoid
1792		- * triggering hard lockup.
1793		- */
1794		- touch_nmi_watchdog();
1795		-
1796		- ticks_value = rcu_seq_ctr(rsp->gp_seq - rdp->gp_seq);
1797		- if (ticks_value) {
1798		- ticks_title = "GPs behind";
1799		- } else {
1800		- ticks_title = "ticks this GP";
1801		- ticks_value = rdp->ticks_this_gp;
1802		- }
1803		- print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1804		- delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
1805		- pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
1806		- cpu,
1807		- "O."[!!cpu_online(cpu)],
1808		- "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
1809		- "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
1810		- !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
1811		- rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
1812		- "!."[!delta],
1813		- ticks_value, ticks_title,
1814		- rcu_dynticks_snap(rdtp) & 0xfff,
1815		- rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
1816		- rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
1817		- READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
1818		- fast_no_hz);
1819		-}
1820		-
1821		-/* Terminate the stall-info list. */
1822		-static void print_cpu_stall_info_end(void)
1823		-{
1824		- pr_err("\t");
1825		-}
1826		-
1827		-/* Zero ->ticks_this_gp for all flavors of RCU. */
1828		-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
1829		-{
1830		- rdp->ticks_this_gp = 0;
1831		- rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
1832		-}
1833		-
1834		-/* Increment ->ticks_this_gp for all flavors of RCU. */
1835		-static void increment_cpu_stall_ticks(void)
1836		-{
1837		- struct rcu_state *rsp;
1838		-
1839		- for_each_rcu_flavor(rsp)
1840		- raw_cpu_inc(rsp->rda->ticks_this_gp);
1841		-}
1842	1440
1843	1441	#ifdef CONFIG_RCU_NOCB_CPU
1844	1442
1845	1443	/*
1846	1444	* Offload callback processing from the boot-time-specified set of CPUs
1847		- * specified by rcu_nocb_mask. For each CPU in the set, there is a
1848		- * kthread created that pulls the callbacks from the corresponding CPU,
1849		- * waits for a grace period to elapse, and invokes the callbacks.
1850		- * The no-CBs CPUs do a wake_up() on their kthread when they insert
1851		- * a callback into any empty list, unless the rcu_nocb_poll boot parameter
1852		- * has been specified, in which case each kthread actively polls its
1853		- * CPU. (Which isn't so great for energy efficiency, but which does
1854		- * reduce RCU's overhead on that CPU.)
	1445	+ * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
	1446	+ * created that pull the callbacks from the corresponding CPU, wait for
	1447	+ * a grace period to elapse, and invoke the callbacks. These kthreads
	1448	+ * are organized into GP kthreads, which manage incoming callbacks, wait for
	1449	+ * grace periods, and awaken CB kthreads, and the CB kthreads, which only
	1450	+ * invoke callbacks. Each GP kthread invokes its own CBs. The no-CBs CPUs
	1451	+ * do a wake_up() on their GP kthread when they insert a callback into any
	1452	+ * empty list, unless the rcu_nocb_poll boot parameter has been specified,
	1453	+ * in which case each kthread actively polls its CPU. (Which isn't so great
	1454	+ * for energy efficiency, but which does reduce RCU's overhead on that CPU.)
1855	1455	*
1856	1456	* This is intended to be used in conjunction with Frederic Weisbecker's
1857	1457	* adaptive-idle work, which would seriously reduce OS jitter on CPUs
1858	1458	* running CPU-bound user-mode computations.
1859	1459	*
1860		- * Offloading of callback processing could also in theory be used as
1861		- * an energy-efficiency measure because CPUs with no RCU callbacks
1862		- * queued are more aggressive about entering dyntick-idle mode.
	1460	+ * Offloading of callbacks can also be used as an energy-efficiency
	1461	+ * measure because CPUs with no RCU callbacks queued are more aggressive
	1462	+ * about entering dyntick-idle mode.
1863	1463	*/
1864	1464
1865	1465
1866		-/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
	1466	+/*
	1467	+ * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
	1468	+ * The string after the "rcu_nocbs=" is either "all" for all CPUs, or a
	1469	+ * comma-separated list of CPUs and/or CPU ranges. If an invalid list is
	1470	+ * given, a warning is emitted and all CPUs are offloaded.
	1471	+ */
1867	1472	static int __init rcu_nocb_setup(char *str)
1868	1473	{
1869	1474	alloc_bootmem_cpumask_var(&rcu_nocb_mask);
1870		- cpulist_parse(str, rcu_nocb_mask);
	1475	+ if (!strcasecmp(str, "all"))
	1476	+ cpumask_setall(rcu_nocb_mask);
	1477	+ else
	1478	+ if (cpulist_parse(str, rcu_nocb_mask)) {
	1479	+ pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
	1480	+ cpumask_setall(rcu_nocb_mask);
	1481	+ }
1871	1482	return 1;
1872	1483	}
1873	1484	__setup("rcu_nocbs=", rcu_nocb_setup);
..	..	@@ -1878,6 +1489,117 @@
1878	1489	return 0;
1879	1490	}
1880	1491	early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
	1492	+
	1493	+/*
	1494	+ * Don't bother bypassing ->cblist if the call_rcu() rate is low.
	1495	+ * After all, the main point of bypassing is to avoid lock contention
	1496	+ * on ->nocb_lock, which only can happen at high call_rcu() rates.
	1497	+ */
	1498	+int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;
	1499	+module_param(nocb_nobypass_lim_per_jiffy, int, 0);
	1500	+
	1501	+/*
	1502	+ * Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the
	1503	+ * lock isn't immediately available, increment ->nocb_lock_contended to
	1504	+ * flag the contention.
	1505	+ */
	1506	+static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
	1507	+ __acquires(&rdp->nocb_bypass_lock)
	1508	+{
	1509	+ lockdep_assert_irqs_disabled();
	1510	+ if (raw_spin_trylock(&rdp->nocb_bypass_lock))
	1511	+ return;
	1512	+ atomic_inc(&rdp->nocb_lock_contended);
	1513	+ WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
	1514	+ smp_mb__after_atomic(); /* atomic_inc() before lock. */
	1515	+ raw_spin_lock(&rdp->nocb_bypass_lock);
	1516	+ smp_mb__before_atomic(); /* atomic_dec() after lock. */
	1517	+ atomic_dec(&rdp->nocb_lock_contended);
	1518	+}
	1519	+
	1520	+/*
	1521	+ * Spinwait until the specified rcu_data structure's ->nocb_lock is
	1522	+ * not contended. Please note that this is extremely special-purpose,
	1523	+ * relying on the fact that at most two kthreads and one CPU contend for
	1524	+ * this lock, and also that the two kthreads are guaranteed to have frequent
	1525	+ * grace-period-duration time intervals between successive acquisitions
	1526	+ * of the lock. This allows us to use an extremely simple throttling
	1527	+ * mechanism, and further to apply it only to the CPU doing floods of
	1528	+ * call_rcu() invocations. Don't try this at home!
	1529	+ */
	1530	+static void rcu_nocb_wait_contended(struct rcu_data *rdp)
	1531	+{
	1532	+ WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
	1533	+ while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
	1534	+ cpu_relax();
	1535	+}
	1536	+
	1537	+/*
	1538	+ * Conditionally acquire the specified rcu_data structure's
	1539	+ * ->nocb_bypass_lock.
	1540	+ */
	1541	+static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
	1542	+{
	1543	+ lockdep_assert_irqs_disabled();
	1544	+ return raw_spin_trylock(&rdp->nocb_bypass_lock);
	1545	+}
	1546	+
	1547	+/*
	1548	+ * Release the specified rcu_data structure's ->nocb_bypass_lock.
	1549	+ */
	1550	+static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
	1551	+ __releases(&rdp->nocb_bypass_lock)
	1552	+{
	1553	+ lockdep_assert_irqs_disabled();
	1554	+ raw_spin_unlock(&rdp->nocb_bypass_lock);
	1555	+}
	1556	+
	1557	+/*
	1558	+ * Acquire the specified rcu_data structure's ->nocb_lock, but only
	1559	+ * if it corresponds to a no-CBs CPU.
	1560	+ */
	1561	+static void rcu_nocb_lock(struct rcu_data *rdp)
	1562	+{
	1563	+ lockdep_assert_irqs_disabled();
	1564	+ if (!rcu_segcblist_is_offloaded(&rdp->cblist))
	1565	+ return;
	1566	+ raw_spin_lock(&rdp->nocb_lock);
	1567	+}
	1568	+
	1569	+/*
	1570	+ * Release the specified rcu_data structure's ->nocb_lock, but only
	1571	+ * if it corresponds to a no-CBs CPU.
	1572	+ */
	1573	+static void rcu_nocb_unlock(struct rcu_data *rdp)
	1574	+{
	1575	+ if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
	1576	+ lockdep_assert_irqs_disabled();
	1577	+ raw_spin_unlock(&rdp->nocb_lock);
	1578	+ }
	1579	+}
	1580	+
	1581	+/*
	1582	+ * Release the specified rcu_data structure's ->nocb_lock and restore
	1583	+ * interrupts, but only if it corresponds to a no-CBs CPU.
	1584	+ */
	1585	+static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
	1586	+ unsigned long flags)
	1587	+{
	1588	+ if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
	1589	+ lockdep_assert_irqs_disabled();
	1590	+ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
	1591	+ } else {
	1592	+ local_irq_restore(flags);
	1593	+ }
	1594	+}
	1595	+
	1596	+/* Lockdep check that ->cblist may be safely accessed. */
	1597	+static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
	1598	+{
	1599	+ lockdep_assert_irqs_disabled();
	1600	+ if (rcu_segcblist_is_offloaded(&rdp->cblist))
	1601	+ lockdep_assert_held(&rdp->nocb_lock);
	1602	+}
1881	1603
1882	1604	/*
1883	1605	* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
..	..	@@ -1908,442 +1630,523 @@
1908	1630	}
1909	1631
1910	1632	/*
1911		- * Kick the leader kthread for this NOCB group. Caller holds ->nocb_lock
	1633	+ * Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock
1912	1634	* and this function releases it.
1913	1635	*/
1914		-static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
1915		- unsigned long flags)
	1636	+static void wake_nocb_gp(struct rcu_data *rdp, bool force,
	1637	+ unsigned long flags)
1916	1638	__releases(rdp->nocb_lock)
1917	1639	{
1918		- struct rcu_data *rdp_leader = rdp->nocb_leader;
	1640	+ bool needwake = false;
	1641	+ struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
1919	1642
1920	1643	lockdep_assert_held(&rdp->nocb_lock);
1921		- if (!READ_ONCE(rdp_leader->nocb_kthread)) {
1922		- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
	1644	+ if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
	1645	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
	1646	+ TPS("AlreadyAwake"));
	1647	+ rcu_nocb_unlock_irqrestore(rdp, flags);
1923	1648	return;
1924	1649	}
1925		- if (rdp_leader->nocb_leader_sleep \|\| force) {
1926		- /* Prior smp_mb__after_atomic() orders against prior enqueue. */
1927		- WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
	1650	+
	1651	+ if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) {
	1652	+ WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
1928	1653	del_timer(&rdp->nocb_timer);
1929		- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1930		- smp_mb(); /* ->nocb_leader_sleep before swake_up_one(). */
1931		- swake_up_one(&rdp_leader->nocb_wq);
1932		- } else {
1933		- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1934	1654	}
	1655	+ rcu_nocb_unlock_irqrestore(rdp, flags);
	1656	+ raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
	1657	+ if (force \|\| READ_ONCE(rdp_gp->nocb_gp_sleep)) {
	1658	+ WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
	1659	+ needwake = true;
	1660	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
	1661	+ }
	1662	+ raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
	1663	+ if (needwake)
	1664	+ wake_up_process(rdp_gp->nocb_gp_kthread);
1935	1665	}
1936	1666
1937	1667	/*
1938		- * Kick the leader kthread for this NOCB group, but caller has not
1939		- * acquired locks.
	1668	+ * Arrange to wake the GP kthread for this NOCB group at some future
	1669	+ * time when it is safe to do so.
1940	1670	*/
1941		-static void wake_nocb_leader(struct rcu_data *rdp, bool force)
	1671	+static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
	1672	+ const char *reason)
1942	1673	{
1943		- unsigned long flags;
1944		-
1945		- raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
1946		- __wake_nocb_leader(rdp, force, flags);
1947		-}
1948		-
1949		-/*
1950		- * Arrange to wake the leader kthread for this NOCB group at some
1951		- * future time when it is safe to do so.
1952		- */
1953		-static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
1954		- const char *reason)
1955		-{
1956		- unsigned long flags;
1957		-
1958		- raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
1959	1674	if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
1960	1675	mod_timer(&rdp->nocb_timer, jiffies + 1);
1961		- WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
1962		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, reason);
1963		- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
	1676	+ if (rdp->nocb_defer_wakeup < waketype)
	1677	+ WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
	1678	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
1964	1679	}
1965	1680
1966	1681	/*
1967		- * Does the specified CPU need an RCU callback for the specified flavor
1968		- * of rcu_barrier()?
	1682	+ * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
	1683	+ * However, if there is a callback to be enqueued and if ->nocb_bypass
	1684	+ * proves to be initially empty, just return false because the no-CB GP
	1685	+ * kthread may need to be awakened in this case.
	1686	+ *
	1687	+ * Note that this function always returns true if rhp is NULL.
1969	1688	*/
1970		-static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
	1689	+static bool rcu_nocb_do_flush_bypass(struct rcu_data rdp, struct rcu_head rhp,
	1690	+ unsigned long j)
1971	1691	{
1972		- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1973		- unsigned long ret;
1974		-#ifdef CONFIG_PROVE_RCU
1975		- struct rcu_head *rhp;
1976		-#endif /* #ifdef CONFIG_PROVE_RCU */
	1692	+ struct rcu_cblist rcl;
1977	1693
1978		- /*
1979		- * Check count of all no-CBs callbacks awaiting invocation.
1980		- * There needs to be a barrier before this function is called,
1981		- * but associated with a prior determination that no more
1982		- * callbacks would be posted. In the worst case, the first
1983		- * barrier in _rcu_barrier() suffices (but the caller cannot
1984		- * necessarily rely on this, not a substitute for the caller
1985		- * getting the concurrency design right!). There must also be
1986		- * a barrier between the following load an posting of a callback
1987		- * (if a callback is in fact needed). This is associated with an
1988		- * atomic_inc() in the caller.
1989		- */
1990		- ret = atomic_long_read(&rdp->nocb_q_count);
1991		-
1992		-#ifdef CONFIG_PROVE_RCU
1993		- rhp = READ_ONCE(rdp->nocb_head);
1994		- if (!rhp)
1995		- rhp = READ_ONCE(rdp->nocb_gp_head);
1996		- if (!rhp)
1997		- rhp = READ_ONCE(rdp->nocb_follower_head);
1998		-
1999		- /* Having no rcuo kthread but CBs after scheduler starts is bad! */
2000		- if (!READ_ONCE(rdp->nocb_kthread) && rhp &&
2001		- rcu_scheduler_fully_active) {
2002		- /* RCU callback enqueued before CPU first came online??? */
2003		- pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
2004		- cpu, rhp->func);
2005		- WARN_ON_ONCE(1);
	1694	+ WARN_ON_ONCE(!rcu_segcblist_is_offloaded(&rdp->cblist));
	1695	+ rcu_lockdep_assert_cblist_protected(rdp);
	1696	+ lockdep_assert_held(&rdp->nocb_bypass_lock);
	1697	+ if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
	1698	+ raw_spin_unlock(&rdp->nocb_bypass_lock);
	1699	+ return false;
2006	1700	}
2007		-#endif /* #ifdef CONFIG_PROVE_RCU */
2008		-
2009		- return !!ret;
	1701	+ /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
	1702	+ if (rhp)
	1703	+ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
	1704	+ rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
	1705	+ rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
	1706	+ WRITE_ONCE(rdp->nocb_bypass_first, j);
	1707	+ rcu_nocb_bypass_unlock(rdp);
	1708	+ return true;
2010	1709	}
2011	1710
2012	1711	/*
2013		- * Enqueue the specified string of rcu_head structures onto the specified
2014		- * CPU's no-CBs lists. The CPU is specified by rdp, the head of the
2015		- * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy
2016		- * counts are supplied by rhcount and rhcount_lazy.
	1712	+ * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.
	1713	+ * However, if there is a callback to be enqueued and if ->nocb_bypass
	1714	+ * proves to be initially empty, just return false because the no-CB GP
	1715	+ * kthread may need to be awakened in this case.
	1716	+ *
	1717	+ * Note that this function always returns true if rhp is NULL.
	1718	+ */
	1719	+static bool rcu_nocb_flush_bypass(struct rcu_data rdp, struct rcu_head rhp,
	1720	+ unsigned long j)
	1721	+{
	1722	+ if (!rcu_segcblist_is_offloaded(&rdp->cblist))
	1723	+ return true;
	1724	+ rcu_lockdep_assert_cblist_protected(rdp);
	1725	+ rcu_nocb_bypass_lock(rdp);
	1726	+ return rcu_nocb_do_flush_bypass(rdp, rhp, j);
	1727	+}
	1728	+
	1729	+/*
	1730	+ * If the ->nocb_bypass_lock is immediately available, flush the
	1731	+ * ->nocb_bypass queue into ->cblist.
	1732	+ */
	1733	+static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
	1734	+{
	1735	+ rcu_lockdep_assert_cblist_protected(rdp);
	1736	+ if (!rcu_segcblist_is_offloaded(&rdp->cblist) \|\|
	1737	+ !rcu_nocb_bypass_trylock(rdp))
	1738	+ return;
	1739	+ WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
	1740	+}
	1741	+
	1742	+/*
	1743	+ * See whether it is appropriate to use the ->nocb_bypass list in order
	1744	+ * to control contention on ->nocb_lock. A limited number of direct
	1745	+ * enqueues are permitted into ->cblist per jiffy. If ->nocb_bypass
	1746	+ * is non-empty, further callbacks must be placed into ->nocb_bypass,
	1747	+ * otherwise rcu_barrier() breaks. Use rcu_nocb_flush_bypass() to switch
	1748	+ * back to direct use of ->cblist. However, ->nocb_bypass should not be
	1749	+ * used if ->cblist is empty, because otherwise callbacks can be stranded
	1750	+ * on ->nocb_bypass because we cannot count on the current CPU ever again
	1751	+ * invoking call_rcu(). The general rule is that if ->nocb_bypass is
	1752	+ * non-empty, the corresponding no-CBs grace-period kthread must not be
	1753	+ * in an indefinite sleep state.
	1754	+ *
	1755	+ * Finally, it is not permitted to use the bypass during early boot,
	1756	+ * as doing so would confuse the auto-initialization code. Besides
	1757	+ * which, there is no point in worrying about lock contention while
	1758	+ * there is only one CPU in operation.
	1759	+ */
	1760	+static bool rcu_nocb_try_bypass(struct rcu_data rdp, struct rcu_head rhp,
	1761	+ bool *was_alldone, unsigned long flags)
	1762	+{
	1763	+ unsigned long c;
	1764	+ unsigned long cur_gp_seq;
	1765	+ unsigned long j = jiffies;
	1766	+ long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
	1767	+
	1768	+ if (!rcu_segcblist_is_offloaded(&rdp->cblist)) {
	1769	+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
	1770	+ return false; /* Not offloaded, no bypassing. */
	1771	+ }
	1772	+ lockdep_assert_irqs_disabled();
	1773	+
	1774	+ // Don't use ->nocb_bypass during early boot.
	1775	+ if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
	1776	+ rcu_nocb_lock(rdp);
	1777	+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
	1778	+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
	1779	+ return false;
	1780	+ }
	1781	+
	1782	+ // If we have advanced to a new jiffy, reset counts to allow
	1783	+ // moving back from ->nocb_bypass to ->cblist.
	1784	+ if (j == rdp->nocb_nobypass_last) {
	1785	+ c = rdp->nocb_nobypass_count + 1;
	1786	+ } else {
	1787	+ WRITE_ONCE(rdp->nocb_nobypass_last, j);
	1788	+ c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
	1789	+ if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
	1790	+ nocb_nobypass_lim_per_jiffy))
	1791	+ c = 0;
	1792	+ else if (c > nocb_nobypass_lim_per_jiffy)
	1793	+ c = nocb_nobypass_lim_per_jiffy;
	1794	+ }
	1795	+ WRITE_ONCE(rdp->nocb_nobypass_count, c);
	1796	+
	1797	+ // If there hasn't yet been all that many ->cblist enqueues
	1798	+ // this jiffy, tell the caller to enqueue onto ->cblist. But flush
	1799	+ // ->nocb_bypass first.
	1800	+ if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
	1801	+ rcu_nocb_lock(rdp);
	1802	+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
	1803	+ if (*was_alldone)
	1804	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
	1805	+ TPS("FirstQ"));
	1806	+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
	1807	+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
	1808	+ return false; // Caller must enqueue the callback.
	1809	+ }
	1810	+
	1811	+ // If ->nocb_bypass has been used too long or is too full,
	1812	+ // flush ->nocb_bypass to ->cblist.
	1813	+ if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) \|\|
	1814	+ ncbs >= qhimark) {
	1815	+ rcu_nocb_lock(rdp);
	1816	+ if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
	1817	+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
	1818	+ if (*was_alldone)
	1819	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
	1820	+ TPS("FirstQ"));
	1821	+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
	1822	+ return false; // Caller must enqueue the callback.
	1823	+ }
	1824	+ if (j != rdp->nocb_gp_adv_time &&
	1825	+ rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
	1826	+ rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
	1827	+ rcu_advance_cbs_nowake(rdp->mynode, rdp);
	1828	+ rdp->nocb_gp_adv_time = j;
	1829	+ }
	1830	+ rcu_nocb_unlock_irqrestore(rdp, flags);
	1831	+ return true; // Callback already enqueued.
	1832	+ }
	1833	+
	1834	+ // We need to use the bypass.
	1835	+ rcu_nocb_wait_contended(rdp);
	1836	+ rcu_nocb_bypass_lock(rdp);
	1837	+ ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
	1838	+ rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
	1839	+ rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
	1840	+ if (!ncbs) {
	1841	+ WRITE_ONCE(rdp->nocb_bypass_first, j);
	1842	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
	1843	+ }
	1844	+ rcu_nocb_bypass_unlock(rdp);
	1845	+ smp_mb(); /* Order enqueue before wake. */
	1846	+ if (ncbs) {
	1847	+ local_irq_restore(flags);
	1848	+ } else {
	1849	+ // No-CBs GP kthread might be indefinitely asleep, if so, wake.
	1850	+ rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
	1851	+ if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
	1852	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
	1853	+ TPS("FirstBQwake"));
	1854	+ __call_rcu_nocb_wake(rdp, true, flags);
	1855	+ } else {
	1856	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
	1857	+ TPS("FirstBQnoWake"));
	1858	+ rcu_nocb_unlock_irqrestore(rdp, flags);
	1859	+ }
	1860	+ }
	1861	+ return true; // Callback already enqueued.
	1862	+}
	1863	+
	1864	+/*
	1865	+ * Awaken the no-CBs grace-period kthead if needed, either due to it
	1866	+ * legitimately being asleep or due to overload conditions.
2017	1867	*
2018	1868	* If warranted, also wake up the kthread servicing this CPUs queues.
2019	1869	*/
2020		-static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2021		- struct rcu_head *rhp,
2022		- struct rcu_head **rhtp,
2023		- int rhcount, int rhcount_lazy,
2024		- unsigned long flags)
	1870	+static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
	1871	+ unsigned long flags)
	1872	+ __releases(rdp->nocb_lock)
2025	1873	{
2026		- int len;
2027		- struct rcu_head **old_rhpp;
	1874	+ unsigned long cur_gp_seq;
	1875	+ unsigned long j;
	1876	+ long len;
2028	1877	struct task_struct *t;
2029	1878
2030		- /* Enqueue the callback on the nocb list and update counts. */
2031		- atomic_long_add(rhcount, &rdp->nocb_q_count);
2032		- /* rcu_barrier() relies on ->nocb_q_count add before xchg. */
2033		- old_rhpp = xchg(&rdp->nocb_tail, rhtp);
2034		- WRITE_ONCE(*old_rhpp, rhp);
2035		- atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
2036		- smp_mb__after_atomic(); /* Store old_rhpp before _wake test. /
2037		-
2038		- /* If we are not being polled and there is a kthread, awaken it ... */
2039		- t = READ_ONCE(rdp->nocb_kthread);
	1879	+ // If we are being polled or there is no kthread, just leave.
	1880	+ t = READ_ONCE(rdp->nocb_gp_kthread);
2040	1881	if (rcu_nocb_poll \|\| !t) {
2041		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
	1882	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2042	1883	TPS("WakeNotPoll"));
	1884	+ rcu_nocb_unlock_irqrestore(rdp, flags);
2043	1885	return;
2044	1886	}
2045		- len = atomic_long_read(&rdp->nocb_q_count);
2046		- if (old_rhpp == &rdp->nocb_head) {
	1887	+ // Need to actually to a wakeup.
	1888	+ len = rcu_segcblist_n_cbs(&rdp->cblist);
	1889	+ if (was_alldone) {
	1890	+ rdp->qlen_last_fqs_check = len;
2047	1891	if (!irqs_disabled_flags(flags)) {
2048	1892	/* ... if queue was empty ... */
2049		- wake_nocb_leader(rdp, false);
2050		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
	1893	+ wake_nocb_gp(rdp, false, flags);
	1894	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2051	1895	TPS("WakeEmpty"));
2052	1896	} else {
2053		- wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
2054		- TPS("WakeEmptyIsDeferred"));
	1897	+ wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
	1898	+ TPS("WakeEmptyIsDeferred"));
	1899	+ rcu_nocb_unlock_irqrestore(rdp, flags);
2055	1900	}
2056		- rdp->qlen_last_fqs_check = 0;
2057	1901	} else if (len > rdp->qlen_last_fqs_check + qhimark) {
2058	1902	/* ... or if many callbacks queued. */
2059		- if (!irqs_disabled_flags(flags)) {
2060		- wake_nocb_leader(rdp, true);
2061		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2062		- TPS("WakeOvf"));
2063		- } else {
2064		- wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
2065		- TPS("WakeOvfIsDeferred"));
	1903	+ rdp->qlen_last_fqs_check = len;
	1904	+ j = jiffies;
	1905	+ if (j != rdp->nocb_gp_adv_time &&
	1906	+ rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
	1907	+ rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
	1908	+ rcu_advance_cbs_nowake(rdp->mynode, rdp);
	1909	+ rdp->nocb_gp_adv_time = j;
2066	1910	}
2067		- rdp->qlen_last_fqs_check = LONG_MAX / 2;
	1911	+ smp_mb(); /* Enqueue before timer_pending(). */
	1912	+ if ((rdp->nocb_cb_sleep \|\|
	1913	+ !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
	1914	+ !timer_pending(&rdp->nocb_bypass_timer))
	1915	+ wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
	1916	+ TPS("WakeOvfIsDeferred"));
	1917	+ rcu_nocb_unlock_irqrestore(rdp, flags);
2068	1918	} else {
2069		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
	1919	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
	1920	+ rcu_nocb_unlock_irqrestore(rdp, flags);
2070	1921	}
2071	1922	return;
2072	1923	}
2073	1924
2074		-/*
2075		- * This is a helper for __call_rcu(), which invokes this when the normal
2076		- * callback queue is inoperable. If this is not a no-CBs CPU, this
2077		- * function returns failure back to __call_rcu(), which can complain
2078		- * appropriately.
2079		- *
2080		- * Otherwise, this function queues the callback where the corresponding
2081		- * "rcuo" kthread can find it.
2082		- */
2083		-static bool __call_rcu_nocb(struct rcu_data rdp, struct rcu_head rhp,
2084		- bool lazy, unsigned long flags)
	1925	+/* Wake up the no-CBs GP kthread to flush ->nocb_bypass. */
	1926	+static void do_nocb_bypass_wakeup_timer(struct timer_list *t)
2085	1927	{
	1928	+ unsigned long flags;
	1929	+ struct rcu_data *rdp = from_timer(rdp, t, nocb_bypass_timer);
2086	1930
2087		- if (!rcu_is_nocb_cpu(rdp->cpu))
2088		- return false;
2089		- __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
2090		- if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2091		- trace_rcu_kfree_callback(rdp->rsp->name, rhp,
2092		- (unsigned long)rhp->func,
2093		- -atomic_long_read(&rdp->nocb_q_count_lazy),
2094		- -atomic_long_read(&rdp->nocb_q_count));
2095		- else
2096		- trace_rcu_callback(rdp->rsp->name, rhp,
2097		- -atomic_long_read(&rdp->nocb_q_count_lazy),
2098		- -atomic_long_read(&rdp->nocb_q_count));
	1931	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
	1932	+ rcu_nocb_lock_irqsave(rdp, flags);
	1933	+ smp_mb__after_spinlock(); /* Timer expire before wakeup. */
	1934	+ __call_rcu_nocb_wake(rdp, true, flags);
	1935	+}
	1936	+
	1937	+/*
	1938	+ * No-CBs GP kthreads come here to wait for additional callbacks to show up
	1939	+ * or for grace periods to end.
	1940	+ */
	1941	+static void nocb_gp_wait(struct rcu_data *my_rdp)
	1942	+{
	1943	+ bool bypass = false;
	1944	+ long bypass_ncbs;
	1945	+ int __maybe_unused cpu = my_rdp->cpu;
	1946	+ unsigned long cur_gp_seq;
	1947	+ unsigned long flags;
	1948	+ bool gotcbs = false;
	1949	+ unsigned long j = jiffies;
	1950	+ bool needwait_gp = false; // This prevents actual uninitialized use.
	1951	+ bool needwake;
	1952	+ bool needwake_gp;
	1953	+ struct rcu_data *rdp;
	1954	+ struct rcu_node *rnp;
	1955	+ unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.
	1956	+ bool wasempty = false;
2099	1957
2100	1958	/*
2101		- * If called from an extended quiescent state with interrupts
2102		- * disabled, invoke the RCU core in order to allow the idle-entry
2103		- * deferred-wakeup check to function.
	1959	+ * Each pass through the following loop checks for CBs and for the
	1960	+ * nearest grace period (if any) to wait for next. The CB kthreads
	1961	+ * and the global grace-period kthread are awakened if needed.
2104	1962	*/
2105		- if (irqs_disabled_flags(flags) &&
2106		- !rcu_is_watching() &&
2107		- cpu_online(smp_processor_id()))
2108		- invoke_rcu_core();
	1963	+ WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
	1964	+ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
	1965	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
	1966	+ rcu_nocb_lock_irqsave(rdp, flags);
	1967	+ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
	1968	+ if (bypass_ncbs &&
	1969	+ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) \|\|
	1970	+ bypass_ncbs > 2 * qhimark)) {
	1971	+ // Bypass full or old, so flush it.
	1972	+ (void)rcu_nocb_try_flush_bypass(rdp, j);
	1973	+ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
	1974	+ } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
	1975	+ rcu_nocb_unlock_irqrestore(rdp, flags);
	1976	+ continue; /* No callbacks here, try next. */
	1977	+ }
	1978	+ if (bypass_ncbs) {
	1979	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
	1980	+ TPS("Bypass"));
	1981	+ bypass = true;
	1982	+ }
	1983	+ rnp = rdp->mynode;
	1984	+ if (bypass) { // Avoid race with first bypass CB.
	1985	+ WRITE_ONCE(my_rdp->nocb_defer_wakeup,
	1986	+ RCU_NOCB_WAKE_NOT);
	1987	+ del_timer(&my_rdp->nocb_timer);
	1988	+ }
	1989	+ // Advance callbacks if helpful and low contention.
	1990	+ needwake_gp = false;
	1991	+ if (!rcu_segcblist_restempty(&rdp->cblist,
	1992	+ RCU_NEXT_READY_TAIL) \|\|
	1993	+ (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
	1994	+ rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
	1995	+ raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
	1996	+ needwake_gp = rcu_advance_cbs(rnp, rdp);
	1997	+ wasempty = rcu_segcblist_restempty(&rdp->cblist,
	1998	+ RCU_NEXT_READY_TAIL);
	1999	+ raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */
	2000	+ }
	2001	+ // Need to wait on some grace period?
	2002	+ WARN_ON_ONCE(wasempty &&
	2003	+ !rcu_segcblist_restempty(&rdp->cblist,
	2004	+ RCU_NEXT_READY_TAIL));
	2005	+ if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
	2006	+ if (!needwait_gp \|\|
	2007	+ ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
	2008	+ wait_gp_seq = cur_gp_seq;
	2009	+ needwait_gp = true;
	2010	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
	2011	+ TPS("NeedWaitGP"));
	2012	+ }
	2013	+ if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
	2014	+ needwake = rdp->nocb_cb_sleep;
	2015	+ WRITE_ONCE(rdp->nocb_cb_sleep, false);
	2016	+ smp_mb(); /* CB invocation -after- GP end. */
	2017	+ } else {
	2018	+ needwake = false;
	2019	+ }
	2020	+ rcu_nocb_unlock_irqrestore(rdp, flags);
	2021	+ if (needwake) {
	2022	+ swake_up_one(&rdp->nocb_cb_wq);
	2023	+ gotcbs = true;
	2024	+ }
	2025	+ if (needwake_gp)
	2026	+ rcu_gp_kthread_wake();
	2027	+ }
2109	2028
2110		- return true;
	2029	+ my_rdp->nocb_gp_bypass = bypass;
	2030	+ my_rdp->nocb_gp_gp = needwait_gp;
	2031	+ my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
	2032	+ if (bypass && !rcu_nocb_poll) {
	2033	+ // At least one child with non-empty ->nocb_bypass, so set
	2034	+ // timer in order to avoid stranding its callbacks.
	2035	+ raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
	2036	+ mod_timer(&my_rdp->nocb_bypass_timer, j + 2);
	2037	+ raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
	2038	+ }
	2039	+ if (rcu_nocb_poll) {
	2040	+ /* Polling, so trace if first poll in the series. */
	2041	+ if (gotcbs)
	2042	+ trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
	2043	+ schedule_timeout_idle(1);
	2044	+ } else if (!needwait_gp) {
	2045	+ /* Wait for callbacks to appear. */
	2046	+ trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
	2047	+ swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
	2048	+ !READ_ONCE(my_rdp->nocb_gp_sleep));
	2049	+ trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
	2050	+ } else {
	2051	+ rnp = my_rdp->mynode;
	2052	+ trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
	2053	+ swait_event_interruptible_exclusive(
	2054	+ rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
	2055	+ rcu_seq_done(&rnp->gp_seq, wait_gp_seq) \|\|
	2056	+ !READ_ONCE(my_rdp->nocb_gp_sleep));
	2057	+ trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
	2058	+ }
	2059	+ if (!rcu_nocb_poll) {
	2060	+ raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
	2061	+ if (bypass)
	2062	+ del_timer(&my_rdp->nocb_bypass_timer);
	2063	+ WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
	2064	+ raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
	2065	+ }
	2066	+ my_rdp->nocb_gp_seq = -1;
	2067	+ WARN_ON(signal_pending(current));
2111	2068	}
2112	2069
2113	2070	/*
2114		- * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
2115		- * not a no-CBs CPU.
	2071	+ * No-CBs grace-period-wait kthread. There is one of these per group
	2072	+ * of CPUs, but only once at least one CPU in that group has come online
	2073	+ * at least once since boot. This kthread checks for newly posted
	2074	+ * callbacks from any of the CPUs it is responsible for, waits for a
	2075	+ * grace period, then awakens all of the rcu_nocb_cb_kthread() instances
	2076	+ * that then have callback-invocation work to do.
2116	2077	*/
2117		-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
2118		- struct rcu_data *rdp,
2119		- unsigned long flags)
	2078	+static int rcu_nocb_gp_kthread(void *arg)
2120	2079	{
2121		- lockdep_assert_irqs_disabled();
2122		- if (!rcu_is_nocb_cpu(smp_processor_id()))
2123		- return false; /* Not NOCBs CPU, caller must migrate CBs. */
2124		- __call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist),
2125		- rcu_segcblist_tail(&rdp->cblist),
2126		- rcu_segcblist_n_cbs(&rdp->cblist),
2127		- rcu_segcblist_n_lazy_cbs(&rdp->cblist), flags);
2128		- rcu_segcblist_init(&rdp->cblist);
2129		- rcu_segcblist_disable(&rdp->cblist);
2130		- return true;
	2080	+ struct rcu_data *rdp = arg;
	2081	+
	2082	+ for (;;) {
	2083	+ WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
	2084	+ nocb_gp_wait(rdp);
	2085	+ cond_resched_tasks_rcu_qs();
	2086	+ }
	2087	+ return 0;
2131	2088	}
2132	2089
2133	2090	/*
2134		- * If necessary, kick off a new grace period, and either way wait
2135		- * for a subsequent grace period to complete.
	2091	+ * Invoke any ready callbacks from the corresponding no-CBs CPU,
	2092	+ * then, if there are no more, wait for more to appear.
2136	2093	*/
2137		-static void rcu_nocb_wait_gp(struct rcu_data *rdp)
	2094	+static void nocb_cb_wait(struct rcu_data *rdp)
2138	2095	{
2139		- unsigned long c;
2140		- bool d;
	2096	+ unsigned long cur_gp_seq;
2141	2097	unsigned long flags;
2142		- bool needwake;
	2098	+ bool needwake_gp = false;
2143	2099	struct rcu_node *rnp = rdp->mynode;
2144	2100
2145	2101	local_irq_save(flags);
2146		- c = rcu_seq_snap(&rdp->rsp->gp_seq);
2147		- if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
2148		- local_irq_restore(flags);
2149		- } else {
2150		- raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
2151		- needwake = rcu_start_this_gp(rnp, rdp, c);
2152		- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2153		- if (needwake)
2154		- rcu_gp_kthread_wake(rdp->rsp);
	2102	+ rcu_momentary_dyntick_idle();
	2103	+ local_irq_restore(flags);
	2104	+ local_bh_disable();
	2105	+ rcu_do_batch(rdp);
	2106	+ local_bh_enable();
	2107	+ lockdep_assert_irqs_enabled();
	2108	+ rcu_nocb_lock_irqsave(rdp, flags);
	2109	+ if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
	2110	+ rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&
	2111	+ raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */
	2112	+ needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
	2113	+ raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
	2114	+ }
	2115	+ if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
	2116	+ rcu_nocb_unlock_irqrestore(rdp, flags);
	2117	+ if (needwake_gp)
	2118	+ rcu_gp_kthread_wake();
	2119	+ return;
2155	2120	}
2156	2121
2157		- /*
2158		- * Wait for the grace period. Do so interruptibly to avoid messing
2159		- * up the load average.
2160		- */
2161		- trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
2162		- for (;;) {
2163		- swait_event_interruptible_exclusive(
2164		- rnp->nocb_gp_wq[rcu_seq_ctr(c) & 0x1],
2165		- (d = rcu_seq_done(&rnp->gp_seq, c)));
2166		- if (likely(d))
2167		- break;
2168		- WARN_ON(signal_pending(current));
2169		- trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
	2122	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
	2123	+ WRITE_ONCE(rdp->nocb_cb_sleep, true);
	2124	+ rcu_nocb_unlock_irqrestore(rdp, flags);
	2125	+ if (needwake_gp)
	2126	+ rcu_gp_kthread_wake();
	2127	+ swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
	2128	+ !READ_ONCE(rdp->nocb_cb_sleep));
	2129	+ if (!smp_load_acquire(&rdp->nocb_cb_sleep)) { /* VVV */
	2130	+ /* ^^^ Ensure CB invocation follows _sleep test. */
	2131	+ return;
2170	2132	}
2171		- trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
2172		- smp_mb(); /* Ensure that CB invocation happens after GP end. */
	2133	+ WARN_ON(signal_pending(current));
	2134	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
2173	2135	}
2174	2136
2175	2137	/*
2176		- * Leaders come here to wait for additional callbacks to show up.
2177		- * This function does not return until callbacks appear.
	2138	+ * Per-rcu_data kthread, but only for no-CBs CPUs. Repeatedly invoke
	2139	+ * nocb_cb_wait() to do the dirty work.
2178	2140	*/
2179		-static void nocb_leader_wait(struct rcu_data *my_rdp)
	2141	+static int rcu_nocb_cb_kthread(void *arg)
2180	2142	{
2181		- bool firsttime = true;
2182		- unsigned long flags;
2183		- bool gotcbs;
2184		- struct rcu_data *rdp;
2185		- struct rcu_head **tail;
2186		-
2187		-wait_again:
2188		-
2189		- /* Wait for callbacks to appear. */
2190		- if (!rcu_nocb_poll) {
2191		- trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
2192		- swait_event_interruptible_exclusive(my_rdp->nocb_wq,
2193		- !READ_ONCE(my_rdp->nocb_leader_sleep));
2194		- raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
2195		- my_rdp->nocb_leader_sleep = true;
2196		- WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
2197		- del_timer(&my_rdp->nocb_timer);
2198		- raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
2199		- } else if (firsttime) {
2200		- firsttime = false; /* Don't drown trace log with "Poll"! */
2201		- trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Poll"));
2202		- }
2203		-
2204		- /*
2205		- * Each pass through the following loop checks a follower for CBs.
2206		- * We are our own first follower. Any CBs found are moved to
2207		- * nocb_gp_head, where they await a grace period.
2208		- */
2209		- gotcbs = false;
2210		- smp_mb(); /* wakeup and _sleep before ->nocb_head reads. */
2211		- for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2212		- rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
2213		- if (!rdp->nocb_gp_head)
2214		- continue; /* No CBs here, try next follower. */
2215		-
2216		- /* Move callbacks to wait-for-GP list, which is empty. */
2217		- WRITE_ONCE(rdp->nocb_head, NULL);
2218		- rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
2219		- gotcbs = true;
2220		- }
2221		-
2222		- /* No callbacks? Sleep a bit if polling, and go retry. */
2223		- if (unlikely(!gotcbs)) {
2224		- WARN_ON(signal_pending(current));
2225		- if (rcu_nocb_poll) {
2226		- schedule_timeout_interruptible(1);
2227		- } else {
2228		- trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
2229		- TPS("WokeEmpty"));
2230		- }
2231		- goto wait_again;
2232		- }
2233		-
2234		- /* Wait for one grace period. */
2235		- rcu_nocb_wait_gp(my_rdp);
2236		-
2237		- /* Each pass through the following loop wakes a follower, if needed. */
2238		- for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2239		- if (!rcu_nocb_poll &&
2240		- READ_ONCE(rdp->nocb_head) &&
2241		- READ_ONCE(my_rdp->nocb_leader_sleep)) {
2242		- raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
2243		- my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
2244		- raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
2245		- }
2246		- if (!rdp->nocb_gp_head)
2247		- continue; /* No CBs, so no need to wake follower. */
2248		-
2249		- /* Append callbacks to follower's "done" list. */
2250		- raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
2251		- tail = rdp->nocb_follower_tail;
2252		- rdp->nocb_follower_tail = rdp->nocb_gp_tail;
2253		- *tail = rdp->nocb_gp_head;
2254		- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
2255		- if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2256		- /* List was empty, so wake up the follower. */
2257		- swake_up_one(&rdp->nocb_wq);
2258		- }
2259		- }
2260		-
2261		- /* If we (the leader) don't have CBs, go wait some more. */
2262		- if (!my_rdp->nocb_follower_head)
2263		- goto wait_again;
2264		-}
2265		-
2266		-/*
2267		- * Followers come here to wait for additional callbacks to show up.
2268		- * This function does not return until callbacks appear.
2269		- */
2270		-static void nocb_follower_wait(struct rcu_data *rdp)
2271		-{
2272		- for (;;) {
2273		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
2274		- swait_event_interruptible_exclusive(rdp->nocb_wq,
2275		- READ_ONCE(rdp->nocb_follower_head));
2276		- if (smp_load_acquire(&rdp->nocb_follower_head)) {
2277		- /* ^^^ Ensure CB invocation follows _head test. */
2278		- return;
2279		- }
2280		- WARN_ON(signal_pending(current));
2281		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeEmpty"));
2282		- }
2283		-}
2284		-
2285		-/*
2286		- * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes
2287		- * callbacks queued by the corresponding no-CBs CPU, however, there is
2288		- * an optional leader-follower relationship so that the grace-period
2289		- * kthreads don't have to do quite so many wakeups.
2290		- */
2291		-static int rcu_nocb_kthread(void *arg)
2292		-{
2293		- int c, cl;
2294		- unsigned long flags;
2295		- struct rcu_head *list;
2296		- struct rcu_head *next;
2297		- struct rcu_head **tail;
2298	2143	struct rcu_data *rdp = arg;
2299	2144
2300		- /* Each pass through this loop invokes one batch of callbacks */
	2145	+ // Each pass through this loop does one callback batch, and,
	2146	+ // if there are no more ready callbacks, waits for them.
2301	2147	for (;;) {
2302		- /* Wait for callbacks. */
2303		- if (rdp->nocb_leader == rdp)
2304		- nocb_leader_wait(rdp);
2305		- else
2306		- nocb_follower_wait(rdp);
2307		-
2308		- /* Pull the ready-to-invoke callbacks onto local list. */
2309		- raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
2310		- list = rdp->nocb_follower_head;
2311		- rdp->nocb_follower_head = NULL;
2312		- tail = rdp->nocb_follower_tail;
2313		- rdp->nocb_follower_tail = &rdp->nocb_follower_head;
2314		- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
2315		- BUG_ON(!list);
2316		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeNonEmpty"));
2317		-
2318		- /* Each pass through the following loop invokes a callback. */
2319		- trace_rcu_batch_start(rdp->rsp->name,
2320		- atomic_long_read(&rdp->nocb_q_count_lazy),
2321		- atomic_long_read(&rdp->nocb_q_count), -1);
2322		- c = cl = 0;
2323		- while (list) {
2324		- next = list->next;
2325		- /* Wait for enqueuing to complete, if needed. */
2326		- while (next == NULL && &list->next != tail) {
2327		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2328		- TPS("WaitQueue"));
2329		- schedule_timeout_interruptible(1);
2330		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2331		- TPS("WokeQueue"));
2332		- next = list->next;
2333		- }
2334		- debug_rcu_head_unqueue(list);
2335		- local_bh_disable();
2336		- if (__rcu_reclaim(rdp->rsp->name, list))
2337		- cl++;
2338		- c++;
2339		- local_bh_enable();
2340		- cond_resched_tasks_rcu_qs();
2341		- list = next;
2342		- }
2343		- trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2344		- smp_mb__before_atomic(); /* _add after CB invocation. */
2345		- atomic_long_add(-c, &rdp->nocb_q_count);
2346		- atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
	2148	+ nocb_cb_wait(rdp);
	2149	+ cond_resched_tasks_rcu_qs();
2347	2150	}
2348	2151	return 0;
2349	2152	}
..	..	@@ -2360,15 +2163,14 @@
2360	2163	unsigned long flags;
2361	2164	int ndw;
2362	2165
2363		- raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
	2166	+ rcu_nocb_lock_irqsave(rdp, flags);
2364	2167	if (!rcu_nocb_need_deferred_wakeup(rdp)) {
2365		- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
	2168	+ rcu_nocb_unlock_irqrestore(rdp, flags);
2366	2169	return;
2367	2170	}
2368	2171	ndw = READ_ONCE(rdp->nocb_defer_wakeup);
2369		- WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
2370		- __wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
2371		- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
	2172	+ wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
	2173	+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
2372	2174	}
2373	2175
2374	2176	/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
..	..	@@ -2390,11 +2192,16 @@
2390	2192	do_nocb_deferred_wakeup_common(rdp);
2391	2193	}
2392	2194
	2195	+void rcu_nocb_flush_deferred_wakeup(void)
	2196	+{
	2197	+ do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
	2198	+}
	2199	+
2393	2200	void __init rcu_init_nohz(void)
2394	2201	{
2395	2202	int cpu;
2396	2203	bool need_rcu_nocb_mask = false;
2397		- struct rcu_state *rsp;
	2204	+ struct rcu_data *rdp;
2398	2205
2399	2206	#if defined(CONFIG_NO_HZ_FULL)
2400	2207	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
..	..	@@ -2428,82 +2235,73 @@
2428	2235	if (rcu_nocb_poll)
2429	2236	pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2430	2237
2431		- for_each_rcu_flavor(rsp) {
2432		- for_each_cpu(cpu, rcu_nocb_mask)
2433		- init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu));
2434		- rcu_organize_nocb_kthreads(rsp);
	2238	+ for_each_cpu(cpu, rcu_nocb_mask) {
	2239	+ rdp = per_cpu_ptr(&rcu_data, cpu);
	2240	+ if (rcu_segcblist_empty(&rdp->cblist))
	2241	+ rcu_segcblist_init(&rdp->cblist);
	2242	+ rcu_segcblist_offload(&rdp->cblist);
2435	2243	}
	2244	+ rcu_organize_nocb_kthreads();
2436	2245	}
2437	2246
2438	2247	/* Initialize per-rcu_data variables for no-CBs CPUs. */
2439	2248	static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2440	2249	{
2441		- rdp->nocb_tail = &rdp->nocb_head;
2442		- init_swait_queue_head(&rdp->nocb_wq);
2443		- rdp->nocb_follower_tail = &rdp->nocb_follower_head;
	2250	+ init_swait_queue_head(&rdp->nocb_cb_wq);
	2251	+ init_swait_queue_head(&rdp->nocb_gp_wq);
2444	2252	raw_spin_lock_init(&rdp->nocb_lock);
	2253	+ raw_spin_lock_init(&rdp->nocb_bypass_lock);
	2254	+ raw_spin_lock_init(&rdp->nocb_gp_lock);
2445	2255	timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
	2256	+ timer_setup(&rdp->nocb_bypass_timer, do_nocb_bypass_wakeup_timer, 0);
	2257	+ rcu_cblist_init(&rdp->nocb_bypass);
2446	2258	}
2447	2259
2448	2260	/*
2449	2261	* If the specified CPU is a no-CBs CPU that does not already have its
2450		- * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are
2451		- * brought online out of order, this can require re-organizing the
2452		- * leader-follower relationships.
	2262	+ * rcuo CB kthread, spawn it. Additionally, if the rcuo GP kthread
	2263	+ * for this CPU's group has not yet been created, spawn it as well.
2453	2264	*/
2454		-static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
	2265	+static void rcu_spawn_one_nocb_kthread(int cpu)
2455	2266	{
2456		- struct rcu_data *rdp;
2457		- struct rcu_data *rdp_last;
2458		- struct rcu_data *rdp_old_leader;
2459		- struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu);
	2267	+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
	2268	+ struct rcu_data *rdp_gp;
2460	2269	struct task_struct *t;
2461	2270
2462	2271	/*
2463	2272	* If this isn't a no-CBs CPU or if it already has an rcuo kthread,
2464	2273	* then nothing to do.
2465	2274	*/
2466		- if (!rcu_is_nocb_cpu(cpu) \|\| rdp_spawn->nocb_kthread)
	2275	+ if (!rcu_is_nocb_cpu(cpu) \|\| rdp->nocb_cb_kthread)
2467	2276	return;
2468	2277
2469		- /* If we didn't spawn the leader first, reorganize! */
2470		- rdp_old_leader = rdp_spawn->nocb_leader;
2471		- if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) {
2472		- rdp_last = NULL;
2473		- rdp = rdp_old_leader;
2474		- do {
2475		- rdp->nocb_leader = rdp_spawn;
2476		- if (rdp_last && rdp != rdp_spawn)
2477		- rdp_last->nocb_next_follower = rdp;
2478		- if (rdp == rdp_spawn) {
2479		- rdp = rdp->nocb_next_follower;
2480		- } else {
2481		- rdp_last = rdp;
2482		- rdp = rdp->nocb_next_follower;
2483		- rdp_last->nocb_next_follower = NULL;
2484		- }
2485		- } while (rdp);
2486		- rdp_spawn->nocb_next_follower = rdp_old_leader;
	2278	+ /* If we didn't spawn the GP kthread first, reorganize! */
	2279	+ rdp_gp = rdp->nocb_gp_rdp;
	2280	+ if (!rdp_gp->nocb_gp_kthread) {
	2281	+ t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
	2282	+ "rcuog/%d", rdp_gp->cpu);
	2283	+ if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
	2284	+ return;
	2285	+ WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
2487	2286	}
2488	2287
2489		- /* Spawn the kthread for this CPU and RCU flavor. */
2490		- t = kthread_run(rcu_nocb_kthread, rdp_spawn,
2491		- "rcuo%c/%d", rsp->abbr, cpu);
2492		- BUG_ON(IS_ERR(t));
2493		- WRITE_ONCE(rdp_spawn->nocb_kthread, t);
	2288	+ /* Spawn the kthread for this CPU. */
	2289	+ t = kthread_run(rcu_nocb_cb_kthread, rdp,
	2290	+ "rcuo%c/%d", rcu_state.abbr, cpu);
	2291	+ if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
	2292	+ return;
	2293	+ WRITE_ONCE(rdp->nocb_cb_kthread, t);
	2294	+ WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
2494	2295	}
2495	2296
2496	2297	/*
2497	2298	* If the specified CPU is a no-CBs CPU that does not already have its
2498		- * rcuo kthreads, spawn them.
	2299	+ * rcuo kthread, spawn it.
2499	2300	*/
2500		-static void rcu_spawn_all_nocb_kthreads(int cpu)
	2301	+static void rcu_spawn_cpu_nocb_kthread(int cpu)
2501	2302	{
2502		- struct rcu_state *rsp;
2503		-
2504	2303	if (rcu_scheduler_fully_active)
2505		- for_each_rcu_flavor(rsp)
2506		- rcu_spawn_one_nocb_kthread(rsp, cpu);
	2304	+ rcu_spawn_one_nocb_kthread(cpu);
2507	2305	}
2508	2306
2509	2307	/*
..	..	@@ -2517,30 +2315,33 @@
2517	2315	int cpu;
2518	2316
2519	2317	for_each_online_cpu(cpu)
2520		- rcu_spawn_all_nocb_kthreads(cpu);
	2318	+ rcu_spawn_cpu_nocb_kthread(cpu);
2521	2319	}
2522	2320
2523		-/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */
2524		-static int rcu_nocb_leader_stride = -1;
2525		-module_param(rcu_nocb_leader_stride, int, 0444);
	2321	+/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
	2322	+static int rcu_nocb_gp_stride = -1;
	2323	+module_param(rcu_nocb_gp_stride, int, 0444);
2526	2324
2527	2325	/*
2528		- * Initialize leader-follower relationships for all no-CBs CPU.
	2326	+ * Initialize GP-CB relationships for all no-CBs CPU.
2529	2327	*/
2530		-static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
	2328	+static void __init rcu_organize_nocb_kthreads(void)
2531	2329	{
2532	2330	int cpu;
2533		- int ls = rcu_nocb_leader_stride;
2534		- int nl = 0; /* Next leader. */
	2331	+ bool firsttime = true;
	2332	+ bool gotnocbs = false;
	2333	+ bool gotnocbscbs = true;
	2334	+ int ls = rcu_nocb_gp_stride;
	2335	+ int nl = 0; /* Next GP kthread. */
2535	2336	struct rcu_data *rdp;
2536		- struct rcu_data rdp_leader = NULL; / Suppress misguided gcc warn. */
	2337	+ struct rcu_data rdp_gp = NULL; / Suppress misguided gcc warn. */
2537	2338	struct rcu_data *rdp_prev = NULL;
2538	2339
2539	2340	if (!cpumask_available(rcu_nocb_mask))
2540	2341	return;
2541	2342	if (ls == -1) {
2542		- ls = int_sqrt(nr_cpu_ids);
2543		- rcu_nocb_leader_stride = ls;
	2343	+ ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
	2344	+ rcu_nocb_gp_stride = ls;
2544	2345	}
2545	2346
2546	2347	/*
..	..	@@ -2549,47 +2350,142 @@
2549	2350	* we will spawn the needed set of rcu_nocb_kthread() kthreads.
2550	2351	*/
2551	2352	for_each_cpu(cpu, rcu_nocb_mask) {
2552		- rdp = per_cpu_ptr(rsp->rda, cpu);
	2353	+ rdp = per_cpu_ptr(&rcu_data, cpu);
2553	2354	if (rdp->cpu >= nl) {
2554		- /* New leader, set up for followers & next leader. */
	2355	+ /* New GP kthread, set up for CBs & next GP. */
	2356	+ gotnocbs = true;
2555	2357	nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
2556		- rdp->nocb_leader = rdp;
2557		- rdp_leader = rdp;
	2358	+ rdp->nocb_gp_rdp = rdp;
	2359	+ rdp_gp = rdp;
	2360	+ if (dump_tree) {
	2361	+ if (!firsttime)
	2362	+ pr_cont("%s\n", gotnocbscbs
	2363	+ ? "" : " (self only)");
	2364	+ gotnocbscbs = false;
	2365	+ firsttime = false;
	2366	+ pr_alert("%s: No-CB GP kthread CPU %d:",
	2367	+ __func__, cpu);
	2368	+ }
2558	2369	} else {
2559		- /* Another follower, link to previous leader. */
2560		- rdp->nocb_leader = rdp_leader;
2561		- rdp_prev->nocb_next_follower = rdp;
	2370	+ /* Another CB kthread, link to previous GP kthread. */
	2371	+ gotnocbscbs = true;
	2372	+ rdp->nocb_gp_rdp = rdp_gp;
	2373	+ rdp_prev->nocb_next_cb_rdp = rdp;
	2374	+ if (dump_tree)
	2375	+ pr_cont(" %d", cpu);
2562	2376	}
2563	2377	rdp_prev = rdp;
2564	2378	}
	2379	+ if (gotnocbs && dump_tree)
	2380	+ pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
2565	2381	}
2566	2382
2567		-/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
2568		-static bool init_nocb_callback_list(struct rcu_data *rdp)
	2383	+/*
	2384	+ * Bind the current task to the offloaded CPUs. If there are no offloaded
	2385	+ * CPUs, leave the task unbound. Splat if the bind attempt fails.
	2386	+ */
	2387	+void rcu_bind_current_to_nocb(void)
2569	2388	{
2570		- if (!rcu_is_nocb_cpu(rdp->cpu))
2571		- return false;
	2389	+ if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
	2390	+ WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
	2391	+}
	2392	+EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
2572	2393
2573		- /* If there are early-boot callbacks, move them to nocb lists. */
2574		- if (!rcu_segcblist_empty(&rdp->cblist)) {
2575		- rdp->nocb_head = rcu_segcblist_head(&rdp->cblist);
2576		- rdp->nocb_tail = rcu_segcblist_tail(&rdp->cblist);
2577		- atomic_long_set(&rdp->nocb_q_count,
2578		- rcu_segcblist_n_cbs(&rdp->cblist));
2579		- atomic_long_set(&rdp->nocb_q_count_lazy,
2580		- rcu_segcblist_n_lazy_cbs(&rdp->cblist));
2581		- rcu_segcblist_init(&rdp->cblist);
2582		- }
2583		- rcu_segcblist_disable(&rdp->cblist);
2584		- return true;
	2394	+/*
	2395	+ * Dump out nocb grace-period kthread state for the specified rcu_data
	2396	+ * structure.
	2397	+ */
	2398	+static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
	2399	+{
	2400	+ struct rcu_node *rnp = rdp->mynode;
	2401	+
	2402	+ pr_info("nocb GP %d %c%c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu\n",
	2403	+ rdp->cpu,
	2404	+ "kK"[!!rdp->nocb_gp_kthread],
	2405	+ "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
	2406	+ "dD"[!!rdp->nocb_defer_wakeup],
	2407	+ "tT"[timer_pending(&rdp->nocb_timer)],
	2408	+ "bB"[timer_pending(&rdp->nocb_bypass_timer)],
	2409	+ "sS"[!!rdp->nocb_gp_sleep],
	2410	+ ".W"[swait_active(&rdp->nocb_gp_wq)],
	2411	+ ".W"[swait_active(&rnp->nocb_gp_wq[0])],
	2412	+ ".W"[swait_active(&rnp->nocb_gp_wq[1])],
	2413	+ ".B"[!!rdp->nocb_gp_bypass],
	2414	+ ".G"[!!rdp->nocb_gp_gp],
	2415	+ (long)rdp->nocb_gp_seq,
	2416	+ rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops));
	2417	+}
	2418	+
	2419	+/* Dump out nocb kthread state for the specified rcu_data structure. */
	2420	+static void show_rcu_nocb_state(struct rcu_data *rdp)
	2421	+{
	2422	+ struct rcu_segcblist *rsclp = &rdp->cblist;
	2423	+ bool waslocked;
	2424	+ bool wastimer;
	2425	+ bool wassleep;
	2426	+
	2427	+ if (rdp->nocb_gp_rdp == rdp)
	2428	+ show_rcu_nocb_gp_state(rdp);
	2429	+
	2430	+ pr_info(" CB %d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%c%c%c q%ld\n",
	2431	+ rdp->cpu, rdp->nocb_gp_rdp->cpu,
	2432	+ "kK"[!!rdp->nocb_cb_kthread],
	2433	+ "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
	2434	+ "cC"[!!atomic_read(&rdp->nocb_lock_contended)],
	2435	+ "lL"[raw_spin_is_locked(&rdp->nocb_lock)],
	2436	+ "sS"[!!rdp->nocb_cb_sleep],
	2437	+ ".W"[swait_active(&rdp->nocb_cb_wq)],
	2438	+ jiffies - rdp->nocb_bypass_first,
	2439	+ jiffies - rdp->nocb_nobypass_last,
	2440	+ rdp->nocb_nobypass_count,
	2441	+ ".D"[rcu_segcblist_ready_cbs(rsclp)],
	2442	+ ".W"[!rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)],
	2443	+ ".R"[!rcu_segcblist_restempty(rsclp, RCU_WAIT_TAIL)],
	2444	+ ".N"[!rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL)],
	2445	+ ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
	2446	+ rcu_segcblist_n_cbs(&rdp->cblist));
	2447	+
	2448	+ /* It is OK for GP kthreads to have GP state. */
	2449	+ if (rdp->nocb_gp_rdp == rdp)
	2450	+ return;
	2451	+
	2452	+ waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
	2453	+ wastimer = timer_pending(&rdp->nocb_bypass_timer);
	2454	+ wassleep = swait_active(&rdp->nocb_gp_wq);
	2455	+ if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
	2456	+ return; /* Nothing untowards. */
	2457	+
	2458	+ pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
	2459	+ "lL"[waslocked],
	2460	+ "dD"[!!rdp->nocb_defer_wakeup],
	2461	+ "tT"[wastimer],
	2462	+ "sS"[!!rdp->nocb_gp_sleep],
	2463	+ ".W"[wassleep]);
2585	2464	}
2586	2465
2587	2466	#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2588	2467
2589		-static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
	2468	+/* No ->nocb_lock to acquire. */
	2469	+static void rcu_nocb_lock(struct rcu_data *rdp)
2590	2470	{
2591		- WARN_ON_ONCE(1); /* Should be dead code. */
2592		- return false;
	2471	+}
	2472	+
	2473	+/* No ->nocb_lock to release. */
	2474	+static void rcu_nocb_unlock(struct rcu_data *rdp)
	2475	+{
	2476	+}
	2477	+
	2478	+/* No ->nocb_lock to release. */
	2479	+static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
	2480	+ unsigned long flags)
	2481	+{
	2482	+ local_irq_restore(flags);
	2483	+}
	2484	+
	2485	+/* Lockdep check that ->cblist may be safely accessed. */
	2486	+static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
	2487	+{
	2488	+ lockdep_assert_irqs_disabled();
2593	2489	}
2594	2490
2595	2491	static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
..	..	@@ -2605,17 +2501,22 @@
2605	2501	{
2606	2502	}
2607	2503
2608		-static bool __call_rcu_nocb(struct rcu_data rdp, struct rcu_head rhp,
2609		- bool lazy, unsigned long flags)
	2504	+static bool rcu_nocb_flush_bypass(struct rcu_data rdp, struct rcu_head rhp,
	2505	+ unsigned long j)
	2506	+{
	2507	+ return true;
	2508	+}
	2509	+
	2510	+static bool rcu_nocb_try_bypass(struct rcu_data rdp, struct rcu_head rhp,
	2511	+ bool *was_alldone, unsigned long flags)
2610	2512	{
2611	2513	return false;
2612	2514	}
2613	2515
2614		-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
2615		- struct rcu_data *rdp,
2616		- unsigned long flags)
	2516	+static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
	2517	+ unsigned long flags)
2617	2518	{
2618		- return false;
	2519	+ WARN_ON_ONCE(1); /* Should be dead code! */
2619	2520	}
2620	2521
2621	2522	static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
..	..	@@ -2631,7 +2532,7 @@
2631	2532	{
2632	2533	}
2633	2534
2634		-static void rcu_spawn_all_nocb_kthreads(int cpu)
	2535	+static void rcu_spawn_cpu_nocb_kthread(int cpu)
2635	2536	{
2636	2537	}
2637	2538
..	..	@@ -2639,9 +2540,8 @@
2639	2540	{
2640	2541	}
2641	2542
2642		-static bool init_nocb_callback_list(struct rcu_data *rdp)
	2543	+static void show_rcu_nocb_state(struct rcu_data *rdp)
2643	2544	{
2644		- return false;
2645	2545	}
2646	2546
2647	2547	#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
..	..	@@ -2655,12 +2555,12 @@
2655	2555	* This code relies on the fact that all NO_HZ_FULL CPUs are also
2656	2556	* CONFIG_RCU_NOCB_CPU CPUs.
2657	2557	*/
2658		-static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
	2558	+static bool rcu_nohz_full_cpu(void)
2659	2559	{
2660	2560	#ifdef CONFIG_NO_HZ_FULL
2661	2561	if (tick_nohz_full_cpu(smp_processor_id()) &&
2662		- (!rcu_gp_in_progress(rsp) \|\|
2663		- ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ)))
	2562	+ (!rcu_gp_in_progress() \|\|
	2563	+ time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
2664	2564	return true;
2665	2565	#endif /* #ifdef CONFIG_NO_HZ_FULL */
2666	2566	return false;
..	..	@@ -2677,7 +2577,7 @@
2677	2577	}
2678	2578
2679	2579	/* Record the current task on dyntick-idle entry. */
2680		-static void rcu_dynticks_task_enter(void)
	2580	+static __always_inline void rcu_dynticks_task_enter(void)
2681	2581	{
2682	2582	#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
2683	2583	WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
..	..	@@ -2685,9 +2585,27 @@
2685	2585	}
2686	2586
2687	2587	/* Record no current task on dyntick-idle exit. */
2688		-static void rcu_dynticks_task_exit(void)
	2588	+static __always_inline void rcu_dynticks_task_exit(void)
2689	2589	{
2690	2590	#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
2691	2591	WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
2692	2592	#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
2693	2593	}
	2594	+
	2595	+/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
	2596	+static __always_inline void rcu_dynticks_task_trace_enter(void)
	2597	+{
	2598	+#ifdef CONFIG_TASKS_TRACE_RCU
	2599	+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
	2600	+ current->trc_reader_special.b.need_mb = true;
	2601	+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
	2602	+}
	2603	+
	2604	+/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
	2605	+static __always_inline void rcu_dynticks_task_trace_exit(void)
	2606	+{
	2607	+#ifdef CONFIG_TASKS_TRACE_RCU
	2608	+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
	2609	+ current->trc_reader_special.b.need_mb = false;
	2610	+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
	2611	+}