~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,63 +1,55 @@
	1	+/* SPDX-License-Identifier: GPL-2.0+ */
1	2	/*
2	3	* RCU expedited grace periods
3	4	*
4		- * This program is free software; you can redistribute it and/or modify
5		- * it under the terms of the GNU General Public License as published by
6		- * the Free Software Foundation; either version 2 of the License, or
7		- * (at your option) any later version.
8		- *
9		- * This program is distributed in the hope that it will be useful,
10		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		- * GNU General Public License for more details.
13		- *
14		- * You should have received a copy of the GNU General Public License
15		- * along with this program; if not, you can access it online at
16		- * http://www.gnu.org/licenses/gpl-2.0.html.
17		- *
18	5	* Copyright IBM Corporation, 2016
19	6	*
20		- * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	7	+ * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
21	8	*/
22	9
23	10	#include <linux/lockdep.h>
24	11
	12	+static void rcu_exp_handler(void *unused);
	13	+static int rcu_print_task_exp_stall(struct rcu_node *rnp);
	14	+
25	15	/*
26	16	* Record the start of an expedited grace period.
27	17	*/
28		-static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
	18	+static void rcu_exp_gp_seq_start(void)
29	19	{
30		- rcu_seq_start(&rsp->expedited_sequence);
	20	+ rcu_seq_start(&rcu_state.expedited_sequence);
31	21	}
32	22
33	23	/*
34		- * Return then value that expedited-grace-period counter will have
	24	+ * Return the value that the expedited-grace-period counter will have
35	25	* at the end of the current grace period.
36	26	*/
37		-static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp)
	27	+static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
38	28	{
39		- return rcu_seq_endval(&rsp->expedited_sequence);
	29	+ return rcu_seq_endval(&rcu_state.expedited_sequence);
40	30	}
41	31
42	32	/*
43	33	* Record the end of an expedited grace period.
44	34	*/
45		-static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
	35	+static void rcu_exp_gp_seq_end(void)
46	36	{
47		- rcu_seq_end(&rsp->expedited_sequence);
	37	+ rcu_seq_end(&rcu_state.expedited_sequence);
48	38	smp_mb(); /* Ensure that consecutive grace periods serialize. */
49	39	}
50	40
51	41	/*
52		- * Take a snapshot of the expedited-grace-period counter.
	42	+ * Take a snapshot of the expedited-grace-period counter, which is the
	43	+ * earliest value that will indicate that a full grace period has
	44	+ * elapsed since the current time.
53	45	*/
54		-static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
	46	+static unsigned long rcu_exp_gp_seq_snap(void)
55	47	{
56	48	unsigned long s;
57	49
58	50	smp_mb(); /* Caller's modifications seen first by other CPUs. */
59		- s = rcu_seq_snap(&rsp->expedited_sequence);
60		- trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
	51	+ s = rcu_seq_snap(&rcu_state.expedited_sequence);
	52	+ trace_rcu_exp_grace_period(rcu_state.name, s, TPS("snap"));
61	53	return s;
62	54	}
63	55
..	..	@@ -66,9 +58,9 @@
66	58	* if a full expedited grace period has elapsed since that snapshot
67	59	* was taken.
68	60	*/
69		-static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
	61	+static bool rcu_exp_gp_seq_done(unsigned long s)
70	62	{
71		- return rcu_seq_done(&rsp->expedited_sequence, s);
	63	+ return rcu_seq_done(&rcu_state.expedited_sequence, s);
72	64	}
73	65
74	66	/*
..	..	@@ -78,26 +70,26 @@
78	70	* ever been online. This means that this function normally takes its
79	71	* no-work-to-do fastpath.
80	72	*/
81		-static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
	73	+static void sync_exp_reset_tree_hotplug(void)
82	74	{
83	75	bool done;
84	76	unsigned long flags;
85	77	unsigned long mask;
86	78	unsigned long oldmask;
87		- int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */
	79	+ int ncpus = smp_load_acquire(&rcu_state.ncpus); /* Order vs. locking. */
88	80	struct rcu_node *rnp;
89	81	struct rcu_node *rnp_up;
90	82
91	83	/* If no new CPUs onlined since last time, nothing to do. */
92		- if (likely(ncpus == rsp->ncpus_snap))
	84	+ if (likely(ncpus == rcu_state.ncpus_snap))
93	85	return;
94		- rsp->ncpus_snap = ncpus;
	86	+ rcu_state.ncpus_snap = ncpus;
95	87
96	88	/*
97	89	* Each pass through the following loop propagates newly onlined
98	90	* CPUs for the current rcu_node structure up the rcu_node tree.
99	91	*/
100		- rcu_for_each_leaf_node(rsp, rnp) {
	92	+ rcu_for_each_leaf_node(rnp) {
101	93	raw_spin_lock_irqsave_rcu_node(rnp, flags);
102	94	if (rnp->expmaskinit == rnp->expmaskinitnext) {
103	95	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
..	..	@@ -135,16 +127,16 @@
135	127	* Reset the ->expmask values in the rcu_node tree in preparation for
136	128	* a new expedited grace period.
137	129	*/
138		-static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
	130	+static void __maybe_unused sync_exp_reset_tree(void)
139	131	{
140	132	unsigned long flags;
141	133	struct rcu_node *rnp;
142	134
143		- sync_exp_reset_tree_hotplug(rsp);
144		- rcu_for_each_node_breadth_first(rsp, rnp) {
	135	+ sync_exp_reset_tree_hotplug();
	136	+ rcu_for_each_node_breadth_first(rnp) {
145	137	raw_spin_lock_irqsave_rcu_node(rnp, flags);
146	138	WARN_ON_ONCE(rnp->expmask);
147		- rnp->expmask = rnp->expmaskinit;
	139	+ WRITE_ONCE(rnp->expmask, rnp->expmaskinit);
148	140	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
149	141	}
150	142	}
..	..	@@ -153,31 +145,26 @@
153	145	* Return non-zero if there is no RCU expedited grace period in progress
154	146	* for the specified rcu_node structure, in other words, if all CPUs and
155	147	* tasks covered by the specified rcu_node structure have done their bit
156		- * for the current expedited grace period. Works only for preemptible
157		- * RCU -- other RCU implementation use other means.
158		- *
159		- * Caller must hold the specificed rcu_node structure's ->lock
	148	+ * for the current expedited grace period.
160	149	*/
161		-static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
	150	+static bool sync_rcu_exp_done(struct rcu_node *rnp)
162	151	{
163	152	raw_lockdep_assert_held_rcu_node(rnp);
164		-
165		- return rnp->exp_tasks == NULL &&
	153	+ return READ_ONCE(rnp->exp_tasks) == NULL &&
166	154	READ_ONCE(rnp->expmask) == 0;
167	155	}
168	156
169	157	/*
170		- * Like sync_rcu_preempt_exp_done(), but this function assumes the caller
171		- * doesn't hold the rcu_node's ->lock, and will acquire and release the lock
172		- * itself
	158	+ * Like sync_rcu_exp_done(), but where the caller does not hold the
	159	+ * rcu_node's ->lock.
173	160	*/
174		-static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
	161	+static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
175	162	{
176	163	unsigned long flags;
177	164	bool ret;
178	165
179	166	raw_spin_lock_irqsave_rcu_node(rnp, flags);
180		- ret = sync_rcu_preempt_exp_done(rnp);
	167	+ ret = sync_rcu_exp_done(rnp);
181	168	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
182	169
183	170	return ret;
..	..	@@ -191,17 +178,16 @@
191	178	* which the task was queued or to one of that rcu_node structure's ancestors,
192	179	* recursively up the tree. (Calm down, calm down, we do the recursion
193	180	* iteratively!)
194		- *
195		- * Caller must hold the specified rcu_node structure's ->lock.
196	181	*/
197		-static void __rcu_report_exp_rnp(struct rcu_state rsp, struct rcu_node rnp,
	182	+static void __rcu_report_exp_rnp(struct rcu_node *rnp,
198	183	bool wake, unsigned long flags)
199	184	__releases(rnp->lock)
200	185	{
201	186	unsigned long mask;
202	187
	188	+ raw_lockdep_assert_held_rcu_node(rnp);
203	189	for (;;) {
204		- if (!sync_rcu_preempt_exp_done(rnp)) {
	190	+ if (!sync_rcu_exp_done(rnp)) {
205	191	if (!rnp->expmask)
206	192	rcu_initiate_boost(rnp, flags);
207	193	else
..	..	@@ -212,7 +198,7 @@
212	198	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
213	199	if (wake) {
214	200	smp_mb(); /* EGP done before wake_up(). */
215		- swake_up_one(&rsp->expedited_wq);
	201	+ swake_up_one(&rcu_state.expedited_wq);
216	202	}
217	203	break;
218	204	}
..	..	@@ -221,7 +207,7 @@
221	207	rnp = rnp->parent;
222	208	raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
223	209	WARN_ON_ONCE(!(rnp->expmask & mask));
224		- rnp->expmask &= ~mask;
	210	+ WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
225	211	}
226	212	}
227	213
..	..	@@ -229,49 +215,56 @@
229	215	* Report expedited quiescent state for specified node. This is a
230	216	* lock-acquisition wrapper function for __rcu_report_exp_rnp().
231	217	*/
232		-static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
233		- struct rcu_node *rnp, bool wake)
	218	+static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake)
234	219	{
235	220	unsigned long flags;
236	221
237	222	raw_spin_lock_irqsave_rcu_node(rnp, flags);
238		- __rcu_report_exp_rnp(rsp, rnp, wake, flags);
	223	+ __rcu_report_exp_rnp(rnp, wake, flags);
239	224	}
240	225
241	226	/*
242	227	* Report expedited quiescent state for multiple CPUs, all covered by the
243	228	* specified leaf rcu_node structure.
244	229	*/
245		-static void rcu_report_exp_cpu_mult(struct rcu_state rsp, struct rcu_node rnp,
	230	+static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
246	231	unsigned long mask, bool wake)
247	232	{
	233	+ int cpu;
248	234	unsigned long flags;
	235	+ struct rcu_data *rdp;
249	236
250	237	raw_spin_lock_irqsave_rcu_node(rnp, flags);
251	238	if (!(rnp->expmask & mask)) {
252	239	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
253	240	return;
254	241	}
255		- rnp->expmask &= ~mask;
256		- __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
	242	+ WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
	243	+ for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
	244	+ rdp = per_cpu_ptr(&rcu_data, cpu);
	245	+ if (!IS_ENABLED(CONFIG_NO_HZ_FULL) \|\| !rdp->rcu_forced_tick_exp)
	246	+ continue;
	247	+ rdp->rcu_forced_tick_exp = false;
	248	+ tick_dep_clear_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
	249	+ }
	250	+ __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */
257	251	}
258	252
259	253	/*
260	254	* Report expedited quiescent state for specified rcu_data (CPU).
261	255	*/
262		-static void rcu_report_exp_rdp(struct rcu_state rsp, struct rcu_data rdp,
263		- bool wake)
	256	+static void rcu_report_exp_rdp(struct rcu_data *rdp)
264	257	{
265		- rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
	258	+ WRITE_ONCE(rdp->exp_deferred_qs, false);
	259	+ rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
266	260	}
267	261
268		-/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
269		-static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
	262	+/* Common code for work-done checking. */
	263	+static bool sync_exp_work_done(unsigned long s)
270	264	{
271		- if (rcu_exp_gp_seq_done(rsp, s)) {
272		- trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
273		- /* Ensure test happens before caller kfree(). */
274		- smp_mb__before_atomic(); /* ^^^ */
	265	+ if (rcu_exp_gp_seq_done(s)) {
	266	+ trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
	267	+ smp_mb(); /* Ensure test happens before caller kfree(). */
275	268	return true;
276	269	}
277	270	return false;
..	..	@@ -284,28 +277,28 @@
284	277	* with the mutex held, indicating that the caller must actually do the
285	278	* expedited grace period.
286	279	*/
287		-static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
	280	+static bool exp_funnel_lock(unsigned long s)
288	281	{
289		- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
	282	+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
290	283	struct rcu_node *rnp = rdp->mynode;
291		- struct rcu_node *rnp_root = rcu_get_root(rsp);
	284	+ struct rcu_node *rnp_root = rcu_get_root();
292	285
293	286	/* Low-contention fastpath. */
294	287	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
295	288	(rnp == rnp_root \|\|
296	289	ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
297		- mutex_trylock(&rsp->exp_mutex))
	290	+ mutex_trylock(&rcu_state.exp_mutex))
298	291	goto fastpath;
299	292
300	293	/*
301	294	* Each pass through the following loop works its way up
302	295	* the rcu_node tree, returning if others have done the work or
303		- * otherwise falls through to acquire rsp->exp_mutex. The mapping
	296	+ * otherwise falls through to acquire ->exp_mutex. The mapping
304	297	* from CPU to rcu_node structure can be inexact, as it is just
305	298	* promoting locality and is not strictly needed for correctness.
306	299	*/
307	300	for (; rnp != NULL; rnp = rnp->parent) {
308		- if (sync_exp_work_done(rsp, s))
	301	+ if (sync_exp_work_done(s))
309	302	return true;
310	303
311	304	/* Work not done, either wait here or go up. */
..	..	@@ -314,66 +307,27 @@
314	307
315	308	/* Someone else doing GP, so wait for them. */
316	309	spin_unlock(&rnp->exp_lock);
317		- trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
	310	+ trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
318	311	rnp->grplo, rnp->grphi,
319	312	TPS("wait"));
320	313	wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
321		- sync_exp_work_done(rsp, s));
	314	+ sync_exp_work_done(s));
322	315	return true;
323	316	}
324		- rnp->exp_seq_rq = s; /* Followers can wait on us. */
	317	+ WRITE_ONCE(rnp->exp_seq_rq, s); /* Followers can wait on us. */
325	318	spin_unlock(&rnp->exp_lock);
326		- trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
327		- rnp->grphi, TPS("nxtlvl"));
	319	+ trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
	320	+ rnp->grplo, rnp->grphi, TPS("nxtlvl"));
328	321	}
329		- mutex_lock(&rsp->exp_mutex);
	322	+ mutex_lock(&rcu_state.exp_mutex);
330	323	fastpath:
331		- if (sync_exp_work_done(rsp, s)) {
332		- mutex_unlock(&rsp->exp_mutex);
	324	+ if (sync_exp_work_done(s)) {
	325	+ mutex_unlock(&rcu_state.exp_mutex);
333	326	return true;
334	327	}
335		- rcu_exp_gp_seq_start(rsp);
336		- trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
	328	+ rcu_exp_gp_seq_start();
	329	+ trace_rcu_exp_grace_period(rcu_state.name, s, TPS("start"));
337	330	return false;
338		-}
339		-
340		-/* Invoked on each online non-idle CPU for expedited quiescent state. */
341		-static void sync_sched_exp_handler(void *data)
342		-{
343		- struct rcu_data *rdp;
344		- struct rcu_node *rnp;
345		- struct rcu_state *rsp = data;
346		-
347		- rdp = this_cpu_ptr(rsp->rda);
348		- rnp = rdp->mynode;
349		- if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) \|\|
350		- __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
351		- return;
352		- if (rcu_is_cpu_rrupt_from_idle()) {
353		- rcu_report_exp_rdp(&rcu_sched_state,
354		- this_cpu_ptr(&rcu_sched_data), true);
355		- return;
356		- }
357		- __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
358		- /* Store .exp before .rcu_urgent_qs. */
359		- smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
360		- resched_cpu(smp_processor_id());
361		-}
362		-
363		-/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
364		-static void sync_sched_exp_online_cleanup(int cpu)
365		-{
366		- struct rcu_data *rdp;
367		- int ret;
368		- struct rcu_node *rnp;
369		- struct rcu_state *rsp = &rcu_sched_state;
370		-
371		- rdp = per_cpu_ptr(rsp->rda, cpu);
372		- rnp = rdp->mynode;
373		- if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
374		- return;
375		- ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
376		- WARN_ON_ONCE(ret);
377	331	}
378	332
379	333	/*
..	..	@@ -384,31 +338,27 @@
384	338	{
385	339	int cpu;
386	340	unsigned long flags;
387		- smp_call_func_t func;
388	341	unsigned long mask_ofl_test;
389	342	unsigned long mask_ofl_ipi;
390	343	int ret;
391	344	struct rcu_exp_work *rewp =
392	345	container_of(wp, struct rcu_exp_work, rew_work);
393	346	struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
394		- struct rcu_state *rsp = rewp->rew_rsp;
395	347
396		- func = rewp->rew_func;
397	348	raw_spin_lock_irqsave_rcu_node(rnp, flags);
398	349
399	350	/* Each pass checks a CPU for identity, offline, and idle. */
400	351	mask_ofl_test = 0;
401	352	for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
402		- unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
403		- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
404		- struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
	353	+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
	354	+ unsigned long mask = rdp->grpmask;
405	355	int snap;
406	356
407	357	if (raw_smp_processor_id() == cpu \|\|
408	358	!(rnp->qsmaskinitnext & mask)) {
409	359	mask_ofl_test \|= mask;
410	360	} else {
411		- snap = rcu_dynticks_snap(rdtp);
	361	+ snap = rcu_dynticks_snap(rdp);
412	362	if (rcu_dynticks_in_eqs(snap))
413	363	mask_ofl_test \|= mask;
414	364	else
..	..	@@ -423,132 +373,169 @@
423	373	* until such time as the ->expmask bits are cleared.
424	374	*/
425	375	if (rcu_preempt_has_tasks(rnp))
426		- rnp->exp_tasks = rnp->blkd_tasks.next;
	376	+ WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
427	377	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
428	378
429	379	/* IPI the remaining CPUs for expedited quiescent state. */
430		- for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
431		- unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
432		- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
	380	+ for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) {
	381	+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
	382	+ unsigned long mask = rdp->grpmask;
433	383
434		- if (!(mask_ofl_ipi & mask))
435		- continue;
436	384	retry_ipi:
437		- if (rcu_dynticks_in_eqs_since(rdp->dynticks,
438		- rdp->exp_dynticks_snap)) {
	385	+ if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) {
439	386	mask_ofl_test \|= mask;
440	387	continue;
441	388	}
442		- ret = smp_call_function_single(cpu, func, rsp, 0);
443		- if (!ret) {
444		- mask_ofl_ipi &= ~mask;
	389	+ if (get_cpu() == cpu) {
	390	+ mask_ofl_test \|= mask;
	391	+ put_cpu();
445	392	continue;
446	393	}
	394	+ ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
	395	+ put_cpu();
	396	+ /* The CPU will report the QS in response to the IPI. */
	397	+ if (!ret)
	398	+ continue;
	399	+
447	400	/* Failed, raced with CPU hotplug operation. */
448	401	raw_spin_lock_irqsave_rcu_node(rnp, flags);
449	402	if ((rnp->qsmaskinitnext & mask) &&
450	403	(rnp->expmask & mask)) {
451	404	/* Online, so delay for a bit and try again. */
452	405	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
453		- trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
454		- schedule_timeout_uninterruptible(1);
	406	+ trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("selectofl"));
	407	+ schedule_timeout_idle(1);
455	408	goto retry_ipi;
456	409	}
457		- /* CPU really is offline, so we can ignore it. */
458		- if (!(rnp->expmask & mask))
459		- mask_ofl_ipi &= ~mask;
	410	+ /* CPU really is offline, so we must report its QS. */
	411	+ if (rnp->expmask & mask)
	412	+ mask_ofl_test \|= mask;
460	413	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
461	414	}
462	415	/* Report quiescent states for those that went offline. */
463		- mask_ofl_test \|= mask_ofl_ipi;
464	416	if (mask_ofl_test)
465		- rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
	417	+ rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
466	418	}
467	419
468	420	/*
469	421	* Select the nodes that the upcoming expedited grace period needs
470	422	* to wait for.
471	423	*/
472		-static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
473		- smp_call_func_t func)
	424	+static void sync_rcu_exp_select_cpus(void)
474	425	{
475	426	int cpu;
476	427	struct rcu_node *rnp;
477	428
478		- trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
479		- sync_exp_reset_tree(rsp);
480		- trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
	429	+ trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
	430	+ sync_exp_reset_tree();
	431	+ trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("select"));
481	432
482	433	/* Schedule work for each leaf rcu_node structure. */
483		- rcu_for_each_leaf_node(rsp, rnp) {
	434	+ rcu_for_each_leaf_node(rnp) {
484	435	rnp->exp_need_flush = false;
485	436	if (!READ_ONCE(rnp->expmask))
486	437	continue; /* Avoid early boot non-existent wq. */
487		- rnp->rew.rew_func = func;
488		- rnp->rew.rew_rsp = rsp;
489	438	if (!READ_ONCE(rcu_par_gp_wq) \|\|
490	439	rcu_scheduler_active != RCU_SCHEDULER_RUNNING \|\|
491		- rcu_is_last_leaf_node(rsp, rnp)) {
	440	+ rcu_is_last_leaf_node(rnp)) {
492	441	/* No workqueues yet or last leaf, do direct call. */
493	442	sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
494	443	continue;
495	444	}
496	445	INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
497		- preempt_disable();
498		- cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask);
	446	+ cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
499	447	/* If all offline, queue the work on an unbound CPU. */
500		- if (unlikely(cpu > rnp->grphi))
	448	+ if (unlikely(cpu > rnp->grphi - rnp->grplo))
501	449	cpu = WORK_CPU_UNBOUND;
	450	+ else
	451	+ cpu += rnp->grplo;
502	452	queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
503		- preempt_enable();
504	453	rnp->exp_need_flush = true;
505	454	}
506	455
507	456	/* Wait for workqueue jobs (if any) to complete. */
508		- rcu_for_each_leaf_node(rsp, rnp)
	457	+ rcu_for_each_leaf_node(rnp)
509	458	if (rnp->exp_need_flush)
510	459	flush_work(&rnp->rew.rew_work);
511	460	}
512	461
513		-static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
	462	+/*
	463	+ * Wait for the expedited grace period to elapse, within time limit.
	464	+ * If the time limit is exceeded without the grace period elapsing,
	465	+ * return false, otherwise return true.
	466	+ */
	467	+static bool synchronize_rcu_expedited_wait_once(long tlimit)
	468	+{
	469	+ int t;
	470	+ struct rcu_node *rnp_root = rcu_get_root();
	471	+
	472	+ t = swait_event_timeout_exclusive(rcu_state.expedited_wq,
	473	+ sync_rcu_exp_done_unlocked(rnp_root),
	474	+ tlimit);
	475	+ // Workqueues should not be signaled.
	476	+ if (t > 0 \|\| sync_rcu_exp_done_unlocked(rnp_root))
	477	+ return true;
	478	+ WARN_ON(t < 0); /* workqueues should not be signaled. */
	479	+ return false;
	480	+}
	481	+
	482	+/*
	483	+ * Wait for the expedited grace period to elapse, issuing any needed
	484	+ * RCU CPU stall warnings along the way.
	485	+ */
	486	+static void synchronize_rcu_expedited_wait(void)
514	487	{
515	488	int cpu;
	489	+ unsigned long j;
516	490	unsigned long jiffies_stall;
517	491	unsigned long jiffies_start;
518	492	unsigned long mask;
519	493	int ndetected;
	494	+ struct rcu_data *rdp;
520	495	struct rcu_node *rnp;
521		- struct rcu_node *rnp_root = rcu_get_root(rsp);
522		- int ret;
	496	+ struct rcu_node *rnp_root = rcu_get_root();
523	497
524		- trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait"));
	498	+ trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
525	499	jiffies_stall = rcu_jiffies_till_stall_check();
526	500	jiffies_start = jiffies;
	501	+ if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
	502	+ if (synchronize_rcu_expedited_wait_once(1))
	503	+ return;
	504	+ rcu_for_each_leaf_node(rnp) {
	505	+ for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
	506	+ rdp = per_cpu_ptr(&rcu_data, cpu);
	507	+ if (rdp->rcu_forced_tick_exp)
	508	+ continue;
	509	+ rdp->rcu_forced_tick_exp = true;
	510	+ tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
	511	+ }
	512	+ }
	513	+ j = READ_ONCE(jiffies_till_first_fqs);
	514	+ if (synchronize_rcu_expedited_wait_once(j + HZ))
	515	+ return;
	516	+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT));
	517	+ }
527	518
528	519	for (;;) {
529		- ret = swait_event_timeout_exclusive(
530		- rsp->expedited_wq,
531		- sync_rcu_preempt_exp_done_unlocked(rnp_root),
532		- jiffies_stall);
533		- if (ret > 0 \|\| sync_rcu_preempt_exp_done_unlocked(rnp_root))
	520	+ if (synchronize_rcu_expedited_wait_once(jiffies_stall))
534	521	return;
535		- WARN_ON(ret < 0); /* workqueues should not be signaled. */
536		- if (rcu_cpu_stall_suppress)
	522	+ if (rcu_stall_is_suppressed())
537	523	continue;
538	524	panic_on_rcu_stall();
	525	+ trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
539	526	pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
540		- rsp->name);
	527	+ rcu_state.name);
541	528	ndetected = 0;
542		- rcu_for_each_leaf_node(rsp, rnp) {
	529	+ rcu_for_each_leaf_node(rnp) {
543	530	ndetected += rcu_print_task_exp_stall(rnp);
544	531	for_each_leaf_node_possible_cpu(rnp, cpu) {
545	532	struct rcu_data *rdp;
546	533
547	534	mask = leaf_node_cpu_bit(rnp, cpu);
548		- if (!(rnp->expmask & mask))
	535	+ if (!(READ_ONCE(rnp->expmask) & mask))
549	536	continue;
550	537	ndetected++;
551		- rdp = per_cpu_ptr(rsp->rda, cpu);
	538	+ rdp = per_cpu_ptr(&rcu_data, cpu);
552	539	pr_cont(" %d-%c%c%c", cpu,
553	540	"O."[!!cpu_online(cpu)],
554	541	"o."[!!(rdp->grpmask & rnp->expmaskinit)],
..	..	@@ -556,26 +543,27 @@
556	543	}
557	544	}
558	545	pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
559		- jiffies - jiffies_start, rsp->expedited_sequence,
560		- rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
	546	+ jiffies - jiffies_start, rcu_state.expedited_sequence,
	547	+ data_race(rnp_root->expmask),
	548	+ ".T"[!!data_race(rnp_root->exp_tasks)]);
561	549	if (ndetected) {
562	550	pr_err("blocking rcu_node structures:");
563		- rcu_for_each_node_breadth_first(rsp, rnp) {
	551	+ rcu_for_each_node_breadth_first(rnp) {
564	552	if (rnp == rnp_root)
565	553	continue; /* printed unconditionally */
566		- if (sync_rcu_preempt_exp_done_unlocked(rnp))
	554	+ if (sync_rcu_exp_done_unlocked(rnp))
567	555	continue;
568	556	pr_cont(" l=%u:%d-%d:%#lx/%c",
569	557	rnp->level, rnp->grplo, rnp->grphi,
570		- rnp->expmask,
571		- ".T"[!!rnp->exp_tasks]);
	558	+ data_race(rnp->expmask),
	559	+ ".T"[!!data_race(rnp->exp_tasks)]);
572	560	}
573	561	pr_cont("\n");
574	562	}
575		- rcu_for_each_leaf_node(rsp, rnp) {
	563	+ rcu_for_each_leaf_node(rnp) {
576	564	for_each_leaf_node_possible_cpu(rnp, cpu) {
577	565	mask = leaf_node_cpu_bit(rnp, cpu);
578		- if (!(rnp->expmask & mask))
	566	+ if (!(READ_ONCE(rnp->expmask) & mask))
579	567	continue;
580	568	dump_cpu_task(cpu);
581	569	}
..	..	@@ -590,47 +578,45 @@
590	578	* grace period. Also update all the ->exp_seq_rq counters as needed
591	579	* in order to avoid counter-wrap problems.
592	580	*/
593		-static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
	581	+static void rcu_exp_wait_wake(unsigned long s)
594	582	{
595	583	struct rcu_node *rnp;
596	584
597		- synchronize_sched_expedited_wait(rsp);
598		- rcu_exp_gp_seq_end(rsp);
599		- trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
	585	+ synchronize_rcu_expedited_wait();
600	586
601		- /*
602		- * Switch over to wakeup mode, allowing the next GP, but -only- the
603		- * next GP, to proceed.
604		- */
605		- mutex_lock(&rsp->exp_wake_mutex);
	587	+ // Switch over to wakeup mode, allowing the next GP to proceed.
	588	+ // End the previous grace period only after acquiring the mutex
	589	+ // to ensure that only one GP runs concurrently with wakeups.
	590	+ mutex_lock(&rcu_state.exp_wake_mutex);
	591	+ rcu_exp_gp_seq_end();
	592	+ trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end"));
606	593
607		- rcu_for_each_node_breadth_first(rsp, rnp) {
	594	+ rcu_for_each_node_breadth_first(rnp) {
608	595	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
609	596	spin_lock(&rnp->exp_lock);
610	597	/* Recheck, avoid hang in case someone just arrived. */
611	598	if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
612		- rnp->exp_seq_rq = s;
	599	+ WRITE_ONCE(rnp->exp_seq_rq, s);
613	600	spin_unlock(&rnp->exp_lock);
614	601	}
615	602	smp_mb(); /* All above changes before wakeup. */
616	603	wake_up_all(&rnp->exp_wq[rcu_seq_ctr(s) & 0x3]);
617	604	}
618		- trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
619		- mutex_unlock(&rsp->exp_wake_mutex);
	605	+ trace_rcu_exp_grace_period(rcu_state.name, s, TPS("endwake"));
	606	+ mutex_unlock(&rcu_state.exp_wake_mutex);
620	607	}
621	608
622	609	/*
623	610	* Common code to drive an expedited grace period forward, used by
624	611	* workqueues and mid-boot-time tasks.
625	612	*/
626		-static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
627		- smp_call_func_t func, unsigned long s)
	613	+static void rcu_exp_sel_wait_wake(unsigned long s)
628	614	{
629	615	/* Initialize the rcu_node tree in preparation for the wait. */
630		- sync_rcu_exp_select_cpus(rsp, func);
	616	+ sync_rcu_exp_select_cpus();
631	617
632	618	/* Wait and clean up, including waking everyone. */
633		- rcu_exp_wait_wake(rsp, s);
	619	+ rcu_exp_wait_wake(s);
634	620	}
635	621
636	622	/*
..	..	@@ -641,88 +627,8 @@
641	627	struct rcu_exp_work *rewp;
642	628
643	629	rewp = container_of(wp, struct rcu_exp_work, rew_work);
644		- rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
	630	+ rcu_exp_sel_wait_wake(rewp->rew_s);
645	631	}
646		-
647		-/*
648		- * Given an rcu_state pointer and a smp_call_function() handler, kick
649		- * off the specified flavor of expedited grace period.
650		- */
651		-static void _synchronize_rcu_expedited(struct rcu_state *rsp,
652		- smp_call_func_t func)
653		-{
654		- struct rcu_data *rdp;
655		- struct rcu_exp_work rew;
656		- struct rcu_node *rnp;
657		- unsigned long s;
658		-
659		- /* If expedited grace periods are prohibited, fall back to normal. */
660		- if (rcu_gp_is_normal()) {
661		- wait_rcu_gp(rsp->call);
662		- return;
663		- }
664		-
665		- /* Take a snapshot of the sequence number. */
666		- s = rcu_exp_gp_seq_snap(rsp);
667		- if (exp_funnel_lock(rsp, s))
668		- return; /* Someone else did our work for us. */
669		-
670		- /* Ensure that load happens before action based on it. */
671		- if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
672		- /* Direct call during scheduler init and early_initcalls(). */
673		- rcu_exp_sel_wait_wake(rsp, func, s);
674		- } else {
675		- /* Marshall arguments & schedule the expedited grace period. */
676		- rew.rew_func = func;
677		- rew.rew_rsp = rsp;
678		- rew.rew_s = s;
679		- INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
680		- queue_work(rcu_gp_wq, &rew.rew_work);
681		- }
682		-
683		- /* Wait for expedited grace period to complete. */
684		- rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
685		- rnp = rcu_get_root(rsp);
686		- wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
687		- sync_exp_work_done(rsp, s));
688		- smp_mb(); /* Workqueue actions happen before return. */
689		-
690		- /* Let the next expedited grace period start. */
691		- mutex_unlock(&rsp->exp_mutex);
692		-}
693		-
694		-/**
695		- * synchronize_sched_expedited - Brute-force RCU-sched grace period
696		- *
697		- * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
698		- * approach to force the grace period to end quickly. This consumes
699		- * significant time on all CPUs and is unfriendly to real-time workloads,
700		- * so is thus not recommended for any sort of common-case code. In fact,
701		- * if you are using synchronize_sched_expedited() in a loop, please
702		- * restructure your code to batch your updates, and then use a single
703		- * synchronize_sched() instead.
704		- *
705		- * This implementation can be thought of as an application of sequence
706		- * locking to expedited grace periods, but using the sequence counter to
707		- * determine when someone else has already done the work instead of for
708		- * retrying readers.
709		- */
710		-void synchronize_sched_expedited(void)
711		-{
712		- struct rcu_state *rsp = &rcu_sched_state;
713		-
714		- RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) \|\|
715		- lock_is_held(&rcu_lock_map) \|\|
716		- lock_is_held(&rcu_sched_lock_map),
717		- "Illegal synchronize_sched_expedited() in RCU read-side critical section");
718		-
719		- /* If only one CPU, this is automatically a grace period. */
720		- if (rcu_blocking_is_gp())
721		- return;
722		-
723		- _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
724		-}
725		-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
726	632
727	633	#ifdef CONFIG_PREEMPT_RCU
728	634
..	..	@@ -733,79 +639,232 @@
733	639	* ->expmask fields in the rcu_node tree. Otherwise, immediately
734	640	* report the quiescent state.
735	641	*/
736		-static void sync_rcu_exp_handler(void *info)
	642	+static void rcu_exp_handler(void *unused)
737	643	{
738		- struct rcu_data *rdp;
739		- struct rcu_state *rsp = info;
	644	+ int depth = rcu_preempt_depth();
	645	+ unsigned long flags;
	646	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
	647	+ struct rcu_node *rnp = rdp->mynode;
740	648	struct task_struct *t = current;
741	649
742	650	/*
743		- * Within an RCU read-side critical section, request that the next
744		- * rcu_read_unlock() report. Unless this RCU read-side critical
745		- * section has already blocked, in which case it is already set
746		- * up for the expedited grace period to wait on it.
	651	+ * First, the common case of not being in an RCU read-side
	652	+ * critical section. If also enabled or idle, immediately
	653	+ * report the quiescent state, otherwise defer.
747	654	*/
748		- if (t->rcu_read_lock_nesting > 0 &&
749		- !t->rcu_read_unlock_special.b.blocked) {
750		- t->rcu_read_unlock_special.b.exp_need_qs = true;
	655	+ if (!depth) {
	656	+ if (!(preempt_count() & (PREEMPT_MASK \| SOFTIRQ_MASK)) \|\|
	657	+ rcu_dynticks_curr_cpu_in_eqs()) {
	658	+ rcu_report_exp_rdp(rdp);
	659	+ } else {
	660	+ rdp->exp_deferred_qs = true;
	661	+ set_tsk_need_resched(t);
	662	+ set_preempt_need_resched();
	663	+ }
751	664	return;
752	665	}
753	666
754	667	/*
755		- * We are either exiting an RCU read-side critical section (negative
756		- * values of t->rcu_read_lock_nesting) or are not in one at all
757		- * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
758		- * read-side critical section that blocked before this expedited
759		- * grace period started. Either way, we can immediately report
760		- * the quiescent state.
	668	+ * Second, the less-common case of being in an RCU read-side
	669	+ * critical section. In this case we can count on a future
	670	+ * rcu_read_unlock(). However, this rcu_read_unlock() might
	671	+ * execute on some other CPU, but in that case there will be
	672	+ * a future context switch. Either way, if the expedited
	673	+ * grace period is still waiting on this CPU, set ->deferred_qs
	674	+ * so that the eventual quiescent state will be reported.
	675	+ * Note that there is a large group of race conditions that
	676	+ * can have caused this quiescent state to already have been
	677	+ * reported, so we really do need to check ->expmask.
761	678	*/
762		- rdp = this_cpu_ptr(rsp->rda);
763		- rcu_report_exp_rdp(rsp, rdp, true);
	679	+ if (depth > 0) {
	680	+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
	681	+ if (rnp->expmask & rdp->grpmask) {
	682	+ rdp->exp_deferred_qs = true;
	683	+ t->rcu_read_unlock_special.b.exp_hint = true;
	684	+ }
	685	+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
	686	+ return;
	687	+ }
	688	+
	689	+ // Finally, negative nesting depth should not happen.
	690	+ WARN_ON_ONCE(1);
764	691	}
	692	+
	693	+/* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */
	694	+static void sync_sched_exp_online_cleanup(int cpu)
	695	+{
	696	+}
	697	+
	698	+/*
	699	+ * Scan the current list of tasks blocked within RCU read-side critical
	700	+ * sections, printing out the tid of each that is blocking the current
	701	+ * expedited grace period.
	702	+ */
	703	+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
	704	+{
	705	+ unsigned long flags;
	706	+ int ndetected = 0;
	707	+ struct task_struct *t;
	708	+
	709	+ if (!READ_ONCE(rnp->exp_tasks))
	710	+ return 0;
	711	+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
	712	+ t = list_entry(rnp->exp_tasks->prev,
	713	+ struct task_struct, rcu_node_entry);
	714	+ list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
	715	+ pr_cont(" P%d", t->pid);
	716	+ ndetected++;
	717	+ }
	718	+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
	719	+ return ndetected;
	720	+}
	721	+
	722	+#else /* #ifdef CONFIG_PREEMPT_RCU */
	723	+
	724	+/* Request an expedited quiescent state. */
	725	+static void rcu_exp_need_qs(void)
	726	+{
	727	+ __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
	728	+ /* Store .exp before .rcu_urgent_qs. */
	729	+ smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
	730	+ set_tsk_need_resched(current);
	731	+ set_preempt_need_resched();
	732	+}
	733	+
	734	+/* Invoked on each online non-idle CPU for expedited quiescent state. */
	735	+static void rcu_exp_handler(void *unused)
	736	+{
	737	+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
	738	+ struct rcu_node *rnp = rdp->mynode;
	739	+
	740	+ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) \|\|
	741	+ __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
	742	+ return;
	743	+ if (rcu_is_cpu_rrupt_from_idle()) {
	744	+ rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
	745	+ return;
	746	+ }
	747	+ rcu_exp_need_qs();
	748	+}
	749	+
	750	+/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
	751	+static void sync_sched_exp_online_cleanup(int cpu)
	752	+{
	753	+ unsigned long flags;
	754	+ int my_cpu;
	755	+ struct rcu_data *rdp;
	756	+ int ret;
	757	+ struct rcu_node *rnp;
	758	+
	759	+ rdp = per_cpu_ptr(&rcu_data, cpu);
	760	+ rnp = rdp->mynode;
	761	+ my_cpu = get_cpu();
	762	+ /* Quiescent state either not needed or already requested, leave. */
	763	+ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) \|\|
	764	+ rdp->cpu_no_qs.b.exp) {
	765	+ put_cpu();
	766	+ return;
	767	+ }
	768	+ /* Quiescent state needed on current CPU, so set it up locally. */
	769	+ if (my_cpu == cpu) {
	770	+ local_irq_save(flags);
	771	+ rcu_exp_need_qs();
	772	+ local_irq_restore(flags);
	773	+ put_cpu();
	774	+ return;
	775	+ }
	776	+ /* Quiescent state needed on some other CPU, send IPI. */
	777	+ ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
	778	+ put_cpu();
	779	+ WARN_ON_ONCE(ret);
	780	+}
	781	+
	782	+/*
	783	+ * Because preemptible RCU does not exist, we never have to check for
	784	+ * tasks blocked within RCU read-side critical sections that are
	785	+ * blocking the current expedited grace period.
	786	+ */
	787	+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
	788	+{
	789	+ return 0;
	790	+}
	791	+
	792	+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
765	793
766	794	/**
767	795	* synchronize_rcu_expedited - Brute-force RCU grace period
768	796	*
769		- * Wait for an RCU-preempt grace period, but expedite it. The basic
770		- * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
771		- * checks whether the CPU is in an RCU-preempt critical section, and
772		- * if so, it sets a flag that causes the outermost rcu_read_unlock()
773		- * to report the quiescent state. On the other hand, if the CPU is
774		- * not in an RCU read-side critical section, the IPI handler reports
775		- * the quiescent state immediately.
	797	+ * Wait for an RCU grace period, but expedite it. The basic idea is to
	798	+ * IPI all non-idle non-nohz online CPUs. The IPI handler checks whether
	799	+ * the CPU is in an RCU critical section, and if so, it sets a flag that
	800	+ * causes the outermost rcu_read_unlock() to report the quiescent state
	801	+ * for RCU-preempt or asks the scheduler for help for RCU-sched. On the
	802	+ * other hand, if the CPU is not in an RCU read-side critical section,
	803	+ * the IPI handler reports the quiescent state immediately.
776	804	*
777		- * Although this is a greate improvement over previous expedited
	805	+ * Although this is a great improvement over previous expedited
778	806	* implementations, it is still unfriendly to real-time workloads, so is
779	807	* thus not recommended for any sort of common-case code. In fact, if
780	808	* you are using synchronize_rcu_expedited() in a loop, please restructure
781		- * your code to batch your updates, and then Use a single synchronize_rcu()
	809	+ * your code to batch your updates, and then use a single synchronize_rcu()
782	810	* instead.
	811	+ *
	812	+ * This has the same semantics as (but is more brutal than) synchronize_rcu().
783	813	*/
784	814	void synchronize_rcu_expedited(void)
785	815	{
786		- struct rcu_state *rsp = rcu_state_p;
	816	+ bool no_wq;
	817	+ struct rcu_exp_work rew;
	818	+ struct rcu_node *rnp;
	819	+ unsigned long s;
787	820
788	821	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) \|\|
789	822	lock_is_held(&rcu_lock_map) \|\|
790	823	lock_is_held(&rcu_sched_lock_map),
791	824	"Illegal synchronize_rcu_expedited() in RCU read-side critical section");
792	825
793		- if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
	826	+ /* Is the state is such that the call is a grace period? */
	827	+ if (rcu_blocking_is_gp())
794	828	return;
795		- _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
	829	+
	830	+ /* If expedited grace periods are prohibited, fall back to normal. */
	831	+ if (rcu_gp_is_normal()) {
	832	+ wait_rcu_gp(call_rcu);
	833	+ return;
	834	+ }
	835	+
	836	+ /* Take a snapshot of the sequence number. */
	837	+ s = rcu_exp_gp_seq_snap();
	838	+ if (exp_funnel_lock(s))
	839	+ return; /* Someone else did our work for us. */
	840	+
	841	+ /* Don't use workqueue during boot or from an incoming CPU. */
	842	+ preempt_disable();
	843	+ no_wq = rcu_scheduler_active == RCU_SCHEDULER_INIT \|\|
	844	+ !cpumask_test_cpu(smp_processor_id(), cpu_active_mask);
	845	+ preempt_enable();
	846	+
	847	+ /* Ensure that load happens before action based on it. */
	848	+ if (unlikely(no_wq)) {
	849	+ /* Direct call for scheduler init, early_initcall()s, and incoming CPUs. */
	850	+ rcu_exp_sel_wait_wake(s);
	851	+ } else {
	852	+ /* Marshall arguments & schedule the expedited grace period. */
	853	+ rew.rew_s = s;
	854	+ INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
	855	+ queue_work(rcu_gp_wq, &rew.rew_work);
	856	+ }
	857	+
	858	+ /* Wait for expedited grace period to complete. */
	859	+ rnp = rcu_get_root();
	860	+ wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
	861	+ sync_exp_work_done(s));
	862	+ smp_mb(); /* Workqueue actions happen before return. */
	863	+
	864	+ /* Let the next expedited grace period start. */
	865	+ mutex_unlock(&rcu_state.exp_mutex);
	866	+
	867	+ if (likely(!no_wq))
	868	+ destroy_work_on_stack(&rew.rew_work);
796	869	}
797	870	EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
798		-
799		-#else /* #ifdef CONFIG_PREEMPT_RCU */
800		-
801		-/*
802		- * Wait for an rcu-preempt grace period, but make it happen quickly.
803		- * But because preemptible RCU does not exist, map to rcu-sched.
804		- */
805		-void synchronize_rcu_expedited(void)
806		-{
807		- synchronize_sched_expedited();
808		-}
809		-EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
810		-
811		-#endif /* #else #ifdef CONFIG_PREEMPT_RCU */