.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0+ |
---|
1 | 2 | /* |
---|
2 | | - * Read-Copy Update mechanism for mutual exclusion |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or modify |
---|
5 | | - * it under the terms of the GNU General Public License as published by |
---|
6 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
7 | | - * (at your option) any later version. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope that it will be useful, |
---|
10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | | - * GNU General Public License for more details. |
---|
13 | | - * |
---|
14 | | - * You should have received a copy of the GNU General Public License |
---|
15 | | - * along with this program; if not, you can access it online at |
---|
16 | | - * http://www.gnu.org/licenses/gpl-2.0.html. |
---|
| 3 | + * Read-Copy Update mechanism for mutual exclusion (tree-based version) |
---|
17 | 4 | * |
---|
18 | 5 | * Copyright IBM Corporation, 2008 |
---|
19 | 6 | * |
---|
20 | 7 | * Authors: Dipankar Sarma <dipankar@in.ibm.com> |
---|
21 | 8 | * Manfred Spraul <manfred@colorfullife.com> |
---|
22 | | - * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version |
---|
| 9 | + * Paul E. McKenney <paulmck@linux.ibm.com> |
---|
23 | 10 | * |
---|
24 | | - * Based on the original work by Paul McKenney <paulmck@us.ibm.com> |
---|
| 11 | + * Based on the original work by Paul McKenney <paulmck@linux.ibm.com> |
---|
25 | 12 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. |
---|
26 | 13 | * |
---|
27 | 14 | * For detailed explanation of Read-Copy Update mechanism see - |
---|
.. | .. |
---|
56 | 43 | #include <uapi/linux/sched/types.h> |
---|
57 | 44 | #include <linux/prefetch.h> |
---|
58 | 45 | #include <linux/delay.h> |
---|
59 | | -#include <linux/stop_machine.h> |
---|
60 | 46 | #include <linux/random.h> |
---|
61 | 47 | #include <linux/trace_events.h> |
---|
62 | 48 | #include <linux/suspend.h> |
---|
63 | 49 | #include <linux/ftrace.h> |
---|
| 50 | +#include <linux/tick.h> |
---|
| 51 | +#include <linux/sysrq.h> |
---|
| 52 | +#include <linux/kprobes.h> |
---|
| 53 | +#include <linux/gfp.h> |
---|
| 54 | +#include <linux/oom.h> |
---|
| 55 | +#include <linux/smpboot.h> |
---|
| 56 | +#include <linux/jiffies.h> |
---|
| 57 | +#include <linux/slab.h> |
---|
| 58 | +#include <linux/sched/isolation.h> |
---|
| 59 | +#include <linux/sched/clock.h> |
---|
| 60 | +#include <linux/vmalloc.h> |
---|
| 61 | +#include <linux/mm.h> |
---|
| 62 | +#include <linux/kasan.h> |
---|
| 63 | +#include "../time/tick-internal.h" |
---|
64 | 64 | |
---|
65 | 65 | #include "tree.h" |
---|
66 | 66 | #include "rcu.h" |
---|
.. | .. |
---|
73 | 73 | /* Data structures. */ |
---|
74 | 74 | |
---|
75 | 75 | /* |
---|
76 | | - * In order to export the rcu_state name to the tracing tools, it |
---|
77 | | - * needs to be added in the __tracepoint_string section. |
---|
78 | | - * This requires defining a separate variable tp_<sname>_varname |
---|
79 | | - * that points to the string being used, and this will allow |
---|
80 | | - * the tracing userspace tools to be able to decipher the string |
---|
81 | | - * address to the matching string. |
---|
| 76 | + * Steal a bit from the bottom of ->dynticks for idle entry/exit |
---|
| 77 | + * control. Initially this is for TLB flushing. |
---|
82 | 78 | */ |
---|
83 | | -#ifdef CONFIG_TRACING |
---|
84 | | -# define DEFINE_RCU_TPS(sname) \ |
---|
85 | | -static char sname##_varname[] = #sname; \ |
---|
86 | | -static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; |
---|
87 | | -# define RCU_STATE_NAME(sname) sname##_varname |
---|
88 | | -#else |
---|
89 | | -# define DEFINE_RCU_TPS(sname) |
---|
90 | | -# define RCU_STATE_NAME(sname) __stringify(sname) |
---|
91 | | -#endif |
---|
| 79 | +#define RCU_DYNTICK_CTRL_MASK 0x1 |
---|
| 80 | +#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1) |
---|
92 | 81 | |
---|
93 | | -#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ |
---|
94 | | -DEFINE_RCU_TPS(sname) \ |
---|
95 | | -static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ |
---|
96 | | -struct rcu_state sname##_state = { \ |
---|
97 | | - .level = { &sname##_state.node[0] }, \ |
---|
98 | | - .rda = &sname##_data, \ |
---|
99 | | - .call = cr, \ |
---|
100 | | - .gp_state = RCU_GP_IDLE, \ |
---|
101 | | - .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \ |
---|
102 | | - .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
---|
103 | | - .name = RCU_STATE_NAME(sname), \ |
---|
104 | | - .abbr = sabbr, \ |
---|
105 | | - .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \ |
---|
106 | | - .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \ |
---|
107 | | - .ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \ |
---|
108 | | -} |
---|
109 | | - |
---|
110 | | -RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); |
---|
111 | | -RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); |
---|
112 | | - |
---|
113 | | -static struct rcu_state *const rcu_state_p; |
---|
114 | | -LIST_HEAD(rcu_struct_flavors); |
---|
| 82 | +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { |
---|
| 83 | + .dynticks_nesting = 1, |
---|
| 84 | + .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, |
---|
| 85 | + .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), |
---|
| 86 | +}; |
---|
| 87 | +static struct rcu_state rcu_state = { |
---|
| 88 | + .level = { &rcu_state.node[0] }, |
---|
| 89 | + .gp_state = RCU_GP_IDLE, |
---|
| 90 | + .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, |
---|
| 91 | + .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex), |
---|
| 92 | + .name = RCU_NAME, |
---|
| 93 | + .abbr = RCU_ABBR, |
---|
| 94 | + .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), |
---|
| 95 | + .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), |
---|
| 96 | + .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), |
---|
| 97 | +}; |
---|
115 | 98 | |
---|
116 | 99 | /* Dump rcu_node combining tree at boot to verify correct setup. */ |
---|
117 | 100 | static bool dump_tree; |
---|
118 | 101 | module_param(dump_tree, bool, 0444); |
---|
| 102 | +/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */ |
---|
| 103 | +static bool use_softirq = true; |
---|
| 104 | +module_param(use_softirq, bool, 0444); |
---|
119 | 105 | /* Control rcu_node-tree auto-balancing at boot time. */ |
---|
120 | 106 | static bool rcu_fanout_exact; |
---|
121 | 107 | module_param(rcu_fanout_exact, bool, 0444); |
---|
.. | .. |
---|
126 | 112 | /* Number of rcu_nodes at specified level. */ |
---|
127 | 113 | int num_rcu_lvl[] = NUM_RCU_LVL_INIT; |
---|
128 | 114 | int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ |
---|
129 | | -/* panic() on RCU Stall sysctl. */ |
---|
130 | | -int sysctl_panic_on_rcu_stall __read_mostly = CONFIG_BOOTPARAM_RCU_STALL_PANIC_VALUE; |
---|
131 | | -ATOMIC_NOTIFIER_HEAD(rcu_stall_notifier_list); |
---|
132 | 115 | |
---|
133 | 116 | /* |
---|
134 | 117 | * The rcu_scheduler_active variable is initialized to the value |
---|
.. | .. |
---|
159 | 142 | */ |
---|
160 | 143 | static int rcu_scheduler_fully_active __read_mostly; |
---|
161 | 144 | |
---|
162 | | -static void |
---|
163 | | -rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, |
---|
164 | | - struct rcu_node *rnp, unsigned long gps, unsigned long flags); |
---|
| 145 | +static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp, |
---|
| 146 | + unsigned long gps, unsigned long flags); |
---|
165 | 147 | static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); |
---|
166 | 148 | static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); |
---|
167 | 149 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
---|
168 | 150 | static void invoke_rcu_core(void); |
---|
169 | | -static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
---|
170 | | -static void rcu_report_exp_rdp(struct rcu_state *rsp, |
---|
171 | | - struct rcu_data *rdp, bool wake); |
---|
| 151 | +static void rcu_report_exp_rdp(struct rcu_data *rdp); |
---|
172 | 152 | static void sync_sched_exp_online_cleanup(int cpu); |
---|
| 153 | +static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp); |
---|
173 | 154 | |
---|
174 | 155 | /* rcuc/rcub kthread realtime priority */ |
---|
175 | 156 | static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; |
---|
176 | | -module_param(kthread_prio, int, 0644); |
---|
| 157 | +module_param(kthread_prio, int, 0444); |
---|
177 | 158 | |
---|
178 | 159 | /* Delay in jiffies for grace-period initialization delays, debug only. */ |
---|
179 | 160 | |
---|
.. | .. |
---|
184 | 165 | static int gp_cleanup_delay; |
---|
185 | 166 | module_param(gp_cleanup_delay, int, 0444); |
---|
186 | 167 | |
---|
187 | | -/* Retreive RCU kthreads priority for rcutorture */ |
---|
| 168 | +// Add delay to rcu_read_unlock() for strict grace periods. |
---|
| 169 | +static int rcu_unlock_delay; |
---|
| 170 | +#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD |
---|
| 171 | +module_param(rcu_unlock_delay, int, 0444); |
---|
| 172 | +#endif |
---|
| 173 | + |
---|
| 174 | +/* |
---|
| 175 | + * This rcu parameter is runtime-read-only. It reflects |
---|
| 176 | + * a minimum allowed number of objects which can be cached |
---|
| 177 | + * per-CPU. Object size is equal to one page. This value |
---|
| 178 | + * can be changed at boot time. |
---|
| 179 | + */ |
---|
| 180 | +static int rcu_min_cached_objs = 5; |
---|
| 181 | +module_param(rcu_min_cached_objs, int, 0444); |
---|
| 182 | + |
---|
| 183 | +/* Retrieve RCU kthreads priority for rcutorture */ |
---|
188 | 184 | int rcu_get_gp_kthreads_prio(void) |
---|
189 | 185 | { |
---|
190 | 186 | return kthread_prio; |
---|
.. | .. |
---|
208 | 204 | * held, but the bit corresponding to the current CPU will be stable |
---|
209 | 205 | * in most contexts. |
---|
210 | 206 | */ |
---|
211 | | -unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) |
---|
| 207 | +static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) |
---|
212 | 208 | { |
---|
213 | 209 | return READ_ONCE(rnp->qsmaskinitnext); |
---|
214 | 210 | } |
---|
.. | .. |
---|
218 | 214 | * permit this function to be invoked without holding the root rcu_node |
---|
219 | 215 | * structure's ->lock, but of course results can be subject to change. |
---|
220 | 216 | */ |
---|
221 | | -static int rcu_gp_in_progress(struct rcu_state *rsp) |
---|
| 217 | +static int rcu_gp_in_progress(void) |
---|
222 | 218 | { |
---|
223 | | - return rcu_seq_state(rcu_seq_current(&rsp->gp_seq)); |
---|
| 219 | + return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq)); |
---|
224 | 220 | } |
---|
225 | 221 | |
---|
226 | 222 | /* |
---|
227 | | - * Note a quiescent state. Because we do not need to know |
---|
228 | | - * how many quiescent states passed, just if there was at least |
---|
229 | | - * one since the start of the grace period, this just sets a flag. |
---|
230 | | - * The caller must have disabled preemption. |
---|
| 223 | + * Return the number of callbacks queued on the specified CPU. |
---|
| 224 | + * Handles both the nocbs and normal cases. |
---|
231 | 225 | */ |
---|
232 | | -void rcu_sched_qs(void) |
---|
| 226 | +static long rcu_get_n_cbs_cpu(int cpu) |
---|
233 | 227 | { |
---|
234 | | - RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!"); |
---|
235 | | - if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) |
---|
236 | | - return; |
---|
237 | | - trace_rcu_grace_period(TPS("rcu_sched"), |
---|
238 | | - __this_cpu_read(rcu_sched_data.gp_seq), |
---|
239 | | - TPS("cpuqs")); |
---|
240 | | - __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false); |
---|
241 | | - if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) |
---|
242 | | - return; |
---|
243 | | - __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false); |
---|
244 | | - rcu_report_exp_rdp(&rcu_sched_state, |
---|
245 | | - this_cpu_ptr(&rcu_sched_data), true); |
---|
| 228 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 229 | + |
---|
| 230 | + if (rcu_segcblist_is_enabled(&rdp->cblist)) |
---|
| 231 | + return rcu_segcblist_n_cbs(&rdp->cblist); |
---|
| 232 | + return 0; |
---|
246 | 233 | } |
---|
247 | 234 | |
---|
248 | | -void rcu_bh_qs(void) |
---|
| 235 | +void rcu_softirq_qs(void) |
---|
249 | 236 | { |
---|
250 | | - RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); |
---|
251 | | - if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { |
---|
252 | | - trace_rcu_grace_period(TPS("rcu_bh"), |
---|
253 | | - __this_cpu_read(rcu_bh_data.gp_seq), |
---|
254 | | - TPS("cpuqs")); |
---|
255 | | - __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); |
---|
256 | | - } |
---|
| 237 | + rcu_qs(); |
---|
| 238 | + rcu_preempt_deferred_qs(current); |
---|
257 | 239 | } |
---|
258 | | - |
---|
259 | | -/* |
---|
260 | | - * Steal a bit from the bottom of ->dynticks for idle entry/exit |
---|
261 | | - * control. Initially this is for TLB flushing. |
---|
262 | | - */ |
---|
263 | | -#define RCU_DYNTICK_CTRL_MASK 0x1 |
---|
264 | | -#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1) |
---|
265 | | -#ifndef rcu_eqs_special_exit |
---|
266 | | -#define rcu_eqs_special_exit() do { } while (0) |
---|
267 | | -#endif |
---|
268 | | - |
---|
269 | | -static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
---|
270 | | - .dynticks_nesting = 1, |
---|
271 | | - .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, |
---|
272 | | - .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), |
---|
273 | | -}; |
---|
274 | 240 | |
---|
275 | 241 | /* |
---|
276 | 242 | * Record entry into an extended quiescent state. This is only to be |
---|
277 | | - * called when not already in an extended quiescent state. |
---|
| 243 | + * called when not already in an extended quiescent state, that is, |
---|
| 244 | + * RCU is watching prior to the call to this function and is no longer |
---|
| 245 | + * watching upon return. |
---|
278 | 246 | */ |
---|
279 | | -static void rcu_dynticks_eqs_enter(void) |
---|
| 247 | +static noinstr void rcu_dynticks_eqs_enter(void) |
---|
280 | 248 | { |
---|
281 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
| 249 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
282 | 250 | int seq; |
---|
283 | 251 | |
---|
284 | 252 | /* |
---|
.. | .. |
---|
286 | 254 | * critical sections, and we also must force ordering with the |
---|
287 | 255 | * next idle sojourn. |
---|
288 | 256 | */ |
---|
289 | | - seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); |
---|
290 | | - /* Better be in an extended quiescent state! */ |
---|
| 257 | + rcu_dynticks_task_trace_enter(); // Before ->dynticks update! |
---|
| 258 | + seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); |
---|
| 259 | + // RCU is no longer watching. Better be in extended quiescent state! |
---|
291 | 260 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && |
---|
292 | 261 | (seq & RCU_DYNTICK_CTRL_CTR)); |
---|
293 | 262 | /* Better not have special action (TLB flush) pending! */ |
---|
.. | .. |
---|
297 | 266 | |
---|
298 | 267 | /* |
---|
299 | 268 | * Record exit from an extended quiescent state. This is only to be |
---|
300 | | - * called from an extended quiescent state. |
---|
| 269 | + * called from an extended quiescent state, that is, RCU is not watching |
---|
| 270 | + * prior to the call to this function and is watching upon return. |
---|
301 | 271 | */ |
---|
302 | | -static void rcu_dynticks_eqs_exit(void) |
---|
| 272 | +static noinstr void rcu_dynticks_eqs_exit(void) |
---|
303 | 273 | { |
---|
304 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
| 274 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
305 | 275 | int seq; |
---|
306 | 276 | |
---|
307 | 277 | /* |
---|
.. | .. |
---|
309 | 279 | * and we also must force ordering with the next RCU read-side |
---|
310 | 280 | * critical section. |
---|
311 | 281 | */ |
---|
312 | | - seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); |
---|
| 282 | + seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); |
---|
| 283 | + // RCU is now watching. Better not be in an extended quiescent state! |
---|
| 284 | + rcu_dynticks_task_trace_exit(); // After ->dynticks update! |
---|
313 | 285 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && |
---|
314 | 286 | !(seq & RCU_DYNTICK_CTRL_CTR)); |
---|
315 | 287 | if (seq & RCU_DYNTICK_CTRL_MASK) { |
---|
316 | | - atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks); |
---|
| 288 | + arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks); |
---|
317 | 289 | smp_mb__after_atomic(); /* _exit after clearing mask. */ |
---|
318 | | - /* Prefer duplicate flushes to losing a flush. */ |
---|
319 | | - rcu_eqs_special_exit(); |
---|
320 | 290 | } |
---|
321 | 291 | } |
---|
322 | 292 | |
---|
.. | .. |
---|
332 | 302 | */ |
---|
333 | 303 | static void rcu_dynticks_eqs_online(void) |
---|
334 | 304 | { |
---|
335 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
| 305 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
336 | 306 | |
---|
337 | | - if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR) |
---|
| 307 | + if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR) |
---|
338 | 308 | return; |
---|
339 | | - atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); |
---|
| 309 | + atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); |
---|
340 | 310 | } |
---|
341 | 311 | |
---|
342 | 312 | /* |
---|
.. | .. |
---|
344 | 314 | * |
---|
345 | 315 | * No ordering, as we are sampling CPU-local information. |
---|
346 | 316 | */ |
---|
347 | | -bool rcu_dynticks_curr_cpu_in_eqs(void) |
---|
| 317 | +static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) |
---|
348 | 318 | { |
---|
349 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
| 319 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
350 | 320 | |
---|
351 | | - return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR); |
---|
| 321 | + return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR); |
---|
352 | 322 | } |
---|
353 | 323 | |
---|
354 | 324 | /* |
---|
355 | 325 | * Snapshot the ->dynticks counter with full ordering so as to allow |
---|
356 | 326 | * stable comparison of this counter with past and future snapshots. |
---|
357 | 327 | */ |
---|
358 | | -int rcu_dynticks_snap(struct rcu_dynticks *rdtp) |
---|
| 328 | +static int rcu_dynticks_snap(struct rcu_data *rdp) |
---|
359 | 329 | { |
---|
360 | | - int snap = atomic_add_return(0, &rdtp->dynticks); |
---|
| 330 | + int snap = atomic_add_return(0, &rdp->dynticks); |
---|
361 | 331 | |
---|
362 | 332 | return snap & ~RCU_DYNTICK_CTRL_MASK; |
---|
363 | 333 | } |
---|
.. | .. |
---|
372 | 342 | } |
---|
373 | 343 | |
---|
374 | 344 | /* |
---|
375 | | - * Return true if the CPU corresponding to the specified rcu_dynticks |
---|
| 345 | + * Return true if the CPU corresponding to the specified rcu_data |
---|
376 | 346 | * structure has spent some time in an extended quiescent state since |
---|
377 | 347 | * rcu_dynticks_snap() returned the specified snapshot. |
---|
378 | 348 | */ |
---|
379 | | -static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap) |
---|
| 349 | +static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap) |
---|
380 | 350 | { |
---|
381 | | - return snap != rcu_dynticks_snap(rdtp); |
---|
| 351 | + return snap != rcu_dynticks_snap(rdp); |
---|
| 352 | +} |
---|
| 353 | + |
---|
| 354 | +/* |
---|
| 355 | + * Return true if the referenced integer is zero while the specified |
---|
| 356 | + * CPU remains within a single extended quiescent state. |
---|
| 357 | + */ |
---|
| 358 | +bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) |
---|
| 359 | +{ |
---|
| 360 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 361 | + int snap; |
---|
| 362 | + |
---|
| 363 | + // If not quiescent, force back to earlier extended quiescent state. |
---|
| 364 | + snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK | |
---|
| 365 | + RCU_DYNTICK_CTRL_CTR); |
---|
| 366 | + |
---|
| 367 | + smp_rmb(); // Order ->dynticks and *vp reads. |
---|
| 368 | + if (READ_ONCE(*vp)) |
---|
| 369 | + return false; // Non-zero, so report failure; |
---|
| 370 | + smp_rmb(); // Order *vp read and ->dynticks re-read. |
---|
| 371 | + |
---|
| 372 | + // If still in the same extended quiescent state, we are good! |
---|
| 373 | + return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK); |
---|
382 | 374 | } |
---|
383 | 375 | |
---|
384 | 376 | /* |
---|
.. | .. |
---|
392 | 384 | { |
---|
393 | 385 | int old; |
---|
394 | 386 | int new; |
---|
395 | | - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
---|
| 387 | + int new_old; |
---|
| 388 | + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); |
---|
396 | 389 | |
---|
| 390 | + new_old = atomic_read(&rdp->dynticks); |
---|
397 | 391 | do { |
---|
398 | | - old = atomic_read(&rdtp->dynticks); |
---|
| 392 | + old = new_old; |
---|
399 | 393 | if (old & RCU_DYNTICK_CTRL_CTR) |
---|
400 | 394 | return false; |
---|
401 | 395 | new = old | RCU_DYNTICK_CTRL_MASK; |
---|
402 | | - } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old); |
---|
| 396 | + new_old = atomic_cmpxchg(&rdp->dynticks, old, new); |
---|
| 397 | + } while (new_old != old); |
---|
403 | 398 | return true; |
---|
404 | 399 | } |
---|
405 | 400 | |
---|
.. | .. |
---|
414 | 409 | * |
---|
415 | 410 | * The caller must have disabled interrupts and must not be idle. |
---|
416 | 411 | */ |
---|
417 | | -static void rcu_momentary_dyntick_idle(void) |
---|
| 412 | +notrace void rcu_momentary_dyntick_idle(void) |
---|
418 | 413 | { |
---|
419 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
420 | 414 | int special; |
---|
421 | 415 | |
---|
422 | | - raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false); |
---|
423 | | - special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); |
---|
| 416 | + raw_cpu_write(rcu_data.rcu_need_heavy_qs, false); |
---|
| 417 | + special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, |
---|
| 418 | + &this_cpu_ptr(&rcu_data)->dynticks); |
---|
424 | 419 | /* It is illegal to call this from idle state. */ |
---|
425 | 420 | WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); |
---|
| 421 | + rcu_preempt_deferred_qs(current); |
---|
426 | 422 | } |
---|
| 423 | +EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); |
---|
427 | 424 | |
---|
428 | | -/* |
---|
429 | | - * Note a context switch. This is a quiescent state for RCU-sched, |
---|
430 | | - * and requires special handling for preemptible RCU. |
---|
431 | | - * The caller must have disabled interrupts. |
---|
432 | | - */ |
---|
433 | | -void rcu_note_context_switch(bool preempt) |
---|
434 | | -{ |
---|
435 | | - barrier(); /* Avoid RCU read-side critical sections leaking down. */ |
---|
436 | | - trace_rcu_utilization(TPS("Start context switch")); |
---|
437 | | - rcu_sched_qs(); |
---|
438 | | - rcu_preempt_note_context_switch(preempt); |
---|
439 | | - /* Load rcu_urgent_qs before other flags. */ |
---|
440 | | - if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) |
---|
441 | | - goto out; |
---|
442 | | - this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); |
---|
443 | | - if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) |
---|
444 | | - rcu_momentary_dyntick_idle(); |
---|
445 | | - this_cpu_inc(rcu_dynticks.rcu_qs_ctr); |
---|
446 | | - if (!preempt) |
---|
447 | | - rcu_tasks_qs(current); |
---|
448 | | -out: |
---|
449 | | - trace_rcu_utilization(TPS("End context switch")); |
---|
450 | | - barrier(); /* Avoid RCU read-side critical sections leaking up. */ |
---|
451 | | -} |
---|
452 | | -EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
---|
453 | | - |
---|
454 | | -/* |
---|
455 | | - * Register a quiescent state for all RCU flavors. If there is an |
---|
456 | | - * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight |
---|
457 | | - * dyntick-idle quiescent state visible to other CPUs (but only for those |
---|
458 | | - * RCU flavors in desperate need of a quiescent state, which will normally |
---|
459 | | - * be none of them). Either way, do a lightweight quiescent state for |
---|
460 | | - * all RCU flavors. |
---|
| 425 | +/** |
---|
| 426 | + * rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle |
---|
461 | 427 | * |
---|
462 | | - * The barrier() calls are redundant in the common case when this is |
---|
463 | | - * called externally, but just in case this is called from within this |
---|
464 | | - * file. |
---|
| 428 | + * If the current CPU is idle and running at a first-level (not nested) |
---|
| 429 | + * interrupt, or directly, from idle, return true. |
---|
465 | 430 | * |
---|
| 431 | + * The caller must have at least disabled IRQs. |
---|
466 | 432 | */ |
---|
467 | | -void rcu_all_qs(void) |
---|
| 433 | +static int rcu_is_cpu_rrupt_from_idle(void) |
---|
468 | 434 | { |
---|
469 | | - unsigned long flags; |
---|
| 435 | + long nesting; |
---|
470 | 436 | |
---|
471 | | - if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs)) |
---|
472 | | - return; |
---|
473 | | - preempt_disable(); |
---|
474 | | - /* Load rcu_urgent_qs before other flags. */ |
---|
475 | | - if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) { |
---|
476 | | - preempt_enable(); |
---|
477 | | - return; |
---|
478 | | - } |
---|
479 | | - this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); |
---|
480 | | - barrier(); /* Avoid RCU read-side critical sections leaking down. */ |
---|
481 | | - if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) { |
---|
482 | | - local_irq_save(flags); |
---|
483 | | - rcu_momentary_dyntick_idle(); |
---|
484 | | - local_irq_restore(flags); |
---|
485 | | - } |
---|
486 | | - if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) |
---|
487 | | - rcu_sched_qs(); |
---|
488 | | - this_cpu_inc(rcu_dynticks.rcu_qs_ctr); |
---|
489 | | - barrier(); /* Avoid RCU read-side critical sections leaking up. */ |
---|
490 | | - preempt_enable(); |
---|
| 437 | + /* |
---|
| 438 | + * Usually called from the tick; but also used from smp_function_call() |
---|
| 439 | + * for expedited grace periods. This latter can result in running from |
---|
| 440 | + * the idle task, instead of an actual IPI. |
---|
| 441 | + */ |
---|
| 442 | + lockdep_assert_irqs_disabled(); |
---|
| 443 | + |
---|
| 444 | + /* Check for counter underflows */ |
---|
| 445 | + RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0, |
---|
| 446 | + "RCU dynticks_nesting counter underflow!"); |
---|
| 447 | + RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0, |
---|
| 448 | + "RCU dynticks_nmi_nesting counter underflow/zero!"); |
---|
| 449 | + |
---|
| 450 | + /* Are we at first interrupt nesting level? */ |
---|
| 451 | + nesting = __this_cpu_read(rcu_data.dynticks_nmi_nesting); |
---|
| 452 | + if (nesting > 1) |
---|
| 453 | + return false; |
---|
| 454 | + |
---|
| 455 | + /* |
---|
| 456 | + * If we're not in an interrupt, we must be in the idle task! |
---|
| 457 | + */ |
---|
| 458 | + WARN_ON_ONCE(!nesting && !is_idle_task(current)); |
---|
| 459 | + |
---|
| 460 | + /* Does CPU appear to be idle from an RCU standpoint? */ |
---|
| 461 | + return __this_cpu_read(rcu_data.dynticks_nesting) == 0; |
---|
491 | 462 | } |
---|
492 | | -EXPORT_SYMBOL_GPL(rcu_all_qs); |
---|
493 | 463 | |
---|
494 | | -#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */ |
---|
| 464 | +#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10) |
---|
| 465 | + // Maximum callbacks per rcu_do_batch ... |
---|
| 466 | +#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood. |
---|
495 | 467 | static long blimit = DEFAULT_RCU_BLIMIT; |
---|
496 | | -#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */ |
---|
| 468 | +#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit. |
---|
497 | 469 | static long qhimark = DEFAULT_RCU_QHIMARK; |
---|
498 | | -#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */ |
---|
| 470 | +#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit. |
---|
499 | 471 | static long qlowmark = DEFAULT_RCU_QLOMARK; |
---|
| 472 | +#define DEFAULT_RCU_QOVLD_MULT 2 |
---|
| 473 | +#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK) |
---|
| 474 | +static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS. |
---|
| 475 | +static long qovld_calc = -1; // No pre-initialization lock acquisitions! |
---|
500 | 476 | |
---|
501 | 477 | module_param(blimit, long, 0444); |
---|
502 | 478 | module_param(qhimark, long, 0444); |
---|
503 | 479 | module_param(qlowmark, long, 0444); |
---|
| 480 | +module_param(qovld, long, 0444); |
---|
504 | 481 | |
---|
505 | | -static ulong jiffies_till_first_fqs = ULONG_MAX; |
---|
| 482 | +static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX; |
---|
506 | 483 | static ulong jiffies_till_next_fqs = ULONG_MAX; |
---|
507 | 484 | static bool rcu_kick_kthreads; |
---|
| 485 | +static int rcu_divisor = 7; |
---|
| 486 | +module_param(rcu_divisor, int, 0644); |
---|
| 487 | + |
---|
| 488 | +/* Force an exit from rcu_do_batch() after 3 milliseconds. */ |
---|
| 489 | +static long rcu_resched_ns = 3 * NSEC_PER_MSEC; |
---|
| 490 | +module_param(rcu_resched_ns, long, 0644); |
---|
| 491 | + |
---|
| 492 | +/* |
---|
| 493 | + * How long the grace period must be before we start recruiting |
---|
| 494 | + * quiescent-state help from rcu_note_context_switch(). |
---|
| 495 | + */ |
---|
| 496 | +static ulong jiffies_till_sched_qs = ULONG_MAX; |
---|
| 497 | +module_param(jiffies_till_sched_qs, ulong, 0444); |
---|
| 498 | +static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */ |
---|
| 499 | +module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */ |
---|
| 500 | + |
---|
| 501 | +/* |
---|
| 502 | + * Make sure that we give the grace-period kthread time to detect any |
---|
| 503 | + * idle CPUs before taking active measures to force quiescent states. |
---|
| 504 | + * However, don't go below 100 milliseconds, adjusted upwards for really |
---|
| 505 | + * large systems. |
---|
| 506 | + */ |
---|
| 507 | +static void adjust_jiffies_till_sched_qs(void) |
---|
| 508 | +{ |
---|
| 509 | + unsigned long j; |
---|
| 510 | + |
---|
| 511 | + /* If jiffies_till_sched_qs was specified, respect the request. */ |
---|
| 512 | + if (jiffies_till_sched_qs != ULONG_MAX) { |
---|
| 513 | + WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs); |
---|
| 514 | + return; |
---|
| 515 | + } |
---|
| 516 | + /* Otherwise, set to third fqs scan, but bound below on large system. */ |
---|
| 517 | + j = READ_ONCE(jiffies_till_first_fqs) + |
---|
| 518 | + 2 * READ_ONCE(jiffies_till_next_fqs); |
---|
| 519 | + if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV) |
---|
| 520 | + j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV; |
---|
| 521 | + pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j); |
---|
| 522 | + WRITE_ONCE(jiffies_to_sched_qs, j); |
---|
| 523 | +} |
---|
508 | 524 | |
---|
509 | 525 | static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp) |
---|
510 | 526 | { |
---|
511 | 527 | ulong j; |
---|
512 | 528 | int ret = kstrtoul(val, 0, &j); |
---|
513 | 529 | |
---|
514 | | - if (!ret) |
---|
| 530 | + if (!ret) { |
---|
515 | 531 | WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j); |
---|
| 532 | + adjust_jiffies_till_sched_qs(); |
---|
| 533 | + } |
---|
516 | 534 | return ret; |
---|
517 | 535 | } |
---|
518 | 536 | |
---|
.. | .. |
---|
521 | 539 | ulong j; |
---|
522 | 540 | int ret = kstrtoul(val, 0, &j); |
---|
523 | 541 | |
---|
524 | | - if (!ret) |
---|
| 542 | + if (!ret) { |
---|
525 | 543 | WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1)); |
---|
| 544 | + adjust_jiffies_till_sched_qs(); |
---|
| 545 | + } |
---|
526 | 546 | return ret; |
---|
527 | 547 | } |
---|
528 | 548 | |
---|
.. | .. |
---|
540 | 560 | module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644); |
---|
541 | 561 | module_param(rcu_kick_kthreads, bool, 0644); |
---|
542 | 562 | |
---|
543 | | -/* |
---|
544 | | - * How long the grace period must be before we start recruiting |
---|
545 | | - * quiescent-state help from rcu_note_context_switch(). |
---|
546 | | - */ |
---|
547 | | -static ulong jiffies_till_sched_qs = HZ / 10; |
---|
548 | | -module_param(jiffies_till_sched_qs, ulong, 0444); |
---|
549 | | - |
---|
550 | | -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)); |
---|
551 | | -static void force_quiescent_state(struct rcu_state *rsp); |
---|
552 | | -static int rcu_pending(void); |
---|
| 563 | +static void force_qs_rnp(int (*f)(struct rcu_data *rdp)); |
---|
| 564 | +static int rcu_pending(int user); |
---|
553 | 565 | |
---|
554 | 566 | /* |
---|
555 | 567 | * Return the number of RCU GPs completed thus far for debug & stats. |
---|
556 | 568 | */ |
---|
557 | 569 | unsigned long rcu_get_gp_seq(void) |
---|
558 | 570 | { |
---|
559 | | - return READ_ONCE(rcu_state_p->gp_seq); |
---|
| 571 | + return READ_ONCE(rcu_state.gp_seq); |
---|
560 | 572 | } |
---|
561 | 573 | EXPORT_SYMBOL_GPL(rcu_get_gp_seq); |
---|
562 | | - |
---|
563 | | -/* |
---|
564 | | - * Return the number of RCU-sched GPs completed thus far for debug & stats. |
---|
565 | | - */ |
---|
566 | | -unsigned long rcu_sched_get_gp_seq(void) |
---|
567 | | -{ |
---|
568 | | - return READ_ONCE(rcu_sched_state.gp_seq); |
---|
569 | | -} |
---|
570 | | -EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq); |
---|
571 | | - |
---|
572 | | -/* |
---|
573 | | - * Return the number of RCU-bh GPs completed thus far for debug & stats. |
---|
574 | | - */ |
---|
575 | | -unsigned long rcu_bh_get_gp_seq(void) |
---|
576 | | -{ |
---|
577 | | - return READ_ONCE(rcu_bh_state.gp_seq); |
---|
578 | | -} |
---|
579 | | -EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq); |
---|
580 | 574 | |
---|
581 | 575 | /* |
---|
582 | 576 | * Return the number of RCU expedited batches completed thus far for |
---|
.. | .. |
---|
586 | 580 | */ |
---|
587 | 581 | unsigned long rcu_exp_batches_completed(void) |
---|
588 | 582 | { |
---|
589 | | - return rcu_state_p->expedited_sequence; |
---|
| 583 | + return rcu_state.expedited_sequence; |
---|
590 | 584 | } |
---|
591 | 585 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); |
---|
592 | 586 | |
---|
593 | 587 | /* |
---|
594 | | - * Return the number of RCU-sched expedited batches completed thus far |
---|
595 | | - * for debug & stats. Similar to rcu_exp_batches_completed(). |
---|
| 588 | + * Return the root node of the rcu_state structure. |
---|
596 | 589 | */ |
---|
597 | | -unsigned long rcu_exp_batches_completed_sched(void) |
---|
| 590 | +static struct rcu_node *rcu_get_root(void) |
---|
598 | 591 | { |
---|
599 | | - return rcu_sched_state.expedited_sequence; |
---|
| 592 | + return &rcu_state.node[0]; |
---|
600 | 593 | } |
---|
601 | | -EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); |
---|
602 | | - |
---|
603 | | -/* |
---|
604 | | - * Force a quiescent state. |
---|
605 | | - */ |
---|
606 | | -void rcu_force_quiescent_state(void) |
---|
607 | | -{ |
---|
608 | | - force_quiescent_state(rcu_state_p); |
---|
609 | | -} |
---|
610 | | -EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
---|
611 | | - |
---|
612 | | -/* |
---|
613 | | - * Force a quiescent state for RCU BH. |
---|
614 | | - */ |
---|
615 | | -void rcu_bh_force_quiescent_state(void) |
---|
616 | | -{ |
---|
617 | | - force_quiescent_state(&rcu_bh_state); |
---|
618 | | -} |
---|
619 | | -EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); |
---|
620 | | - |
---|
621 | | -/* |
---|
622 | | - * Force a quiescent state for RCU-sched. |
---|
623 | | - */ |
---|
624 | | -void rcu_sched_force_quiescent_state(void) |
---|
625 | | -{ |
---|
626 | | - force_quiescent_state(&rcu_sched_state); |
---|
627 | | -} |
---|
628 | | -EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); |
---|
629 | | - |
---|
630 | | -/* |
---|
631 | | - * Show the state of the grace-period kthreads. |
---|
632 | | - */ |
---|
633 | | -void show_rcu_gp_kthreads(void) |
---|
634 | | -{ |
---|
635 | | - int cpu; |
---|
636 | | - struct rcu_data *rdp; |
---|
637 | | - struct rcu_node *rnp; |
---|
638 | | - struct rcu_state *rsp; |
---|
639 | | - |
---|
640 | | - for_each_rcu_flavor(rsp) { |
---|
641 | | - pr_info("%s: wait state: %d ->state: %#lx\n", |
---|
642 | | - rsp->name, rsp->gp_state, rsp->gp_kthread->state); |
---|
643 | | - rcu_for_each_node_breadth_first(rsp, rnp) { |
---|
644 | | - if (ULONG_CMP_GE(rsp->gp_seq, rnp->gp_seq_needed)) |
---|
645 | | - continue; |
---|
646 | | - pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n", |
---|
647 | | - rnp->grplo, rnp->grphi, rnp->gp_seq, |
---|
648 | | - rnp->gp_seq_needed); |
---|
649 | | - if (!rcu_is_leaf_node(rnp)) |
---|
650 | | - continue; |
---|
651 | | - for_each_leaf_node_possible_cpu(rnp, cpu) { |
---|
652 | | - rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
653 | | - if (rdp->gpwrap || |
---|
654 | | - ULONG_CMP_GE(rsp->gp_seq, |
---|
655 | | - rdp->gp_seq_needed)) |
---|
656 | | - continue; |
---|
657 | | - pr_info("\tcpu %d ->gp_seq_needed %lu\n", |
---|
658 | | - cpu, rdp->gp_seq_needed); |
---|
659 | | - } |
---|
660 | | - } |
---|
661 | | - /* sched_show_task(rsp->gp_kthread); */ |
---|
662 | | - } |
---|
663 | | -} |
---|
664 | | -EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); |
---|
665 | 594 | |
---|
666 | 595 | /* |
---|
667 | 596 | * Send along grace-period-related data for rcutorture diagnostics. |
---|
.. | .. |
---|
669 | 598 | void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, |
---|
670 | 599 | unsigned long *gp_seq) |
---|
671 | 600 | { |
---|
672 | | - struct rcu_state *rsp = NULL; |
---|
673 | | - |
---|
674 | 601 | switch (test_type) { |
---|
675 | 602 | case RCU_FLAVOR: |
---|
676 | | - rsp = rcu_state_p; |
---|
677 | | - break; |
---|
678 | | - case RCU_BH_FLAVOR: |
---|
679 | | - rsp = &rcu_bh_state; |
---|
680 | | - break; |
---|
681 | | - case RCU_SCHED_FLAVOR: |
---|
682 | | - rsp = &rcu_sched_state; |
---|
| 603 | + *flags = READ_ONCE(rcu_state.gp_flags); |
---|
| 604 | + *gp_seq = rcu_seq_current(&rcu_state.gp_seq); |
---|
683 | 605 | break; |
---|
684 | 606 | default: |
---|
685 | 607 | break; |
---|
686 | 608 | } |
---|
687 | | - if (rsp == NULL) |
---|
688 | | - return; |
---|
689 | | - *flags = READ_ONCE(rsp->gp_flags); |
---|
690 | | - *gp_seq = rcu_seq_current(&rsp->gp_seq); |
---|
691 | 609 | } |
---|
692 | 610 | EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); |
---|
693 | | - |
---|
694 | | -/* |
---|
695 | | - * Return the root node of the specified rcu_state structure. |
---|
696 | | - */ |
---|
697 | | -static struct rcu_node *rcu_get_root(struct rcu_state *rsp) |
---|
698 | | -{ |
---|
699 | | - return &rsp->node[0]; |
---|
700 | | -} |
---|
701 | 611 | |
---|
702 | 612 | /* |
---|
703 | 613 | * Enter an RCU extended quiescent state, which can be either the |
---|
.. | .. |
---|
707 | 617 | * the possibility of usermode upcalls having messed up our count |
---|
708 | 618 | * of interrupt nesting level during the prior busy period. |
---|
709 | 619 | */ |
---|
710 | | -static void rcu_eqs_enter(bool user) |
---|
| 620 | +static noinstr void rcu_eqs_enter(bool user) |
---|
711 | 621 | { |
---|
712 | | - struct rcu_state *rsp; |
---|
713 | | - struct rcu_data *rdp; |
---|
714 | | - struct rcu_dynticks *rdtp; |
---|
| 622 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
715 | 623 | |
---|
716 | | - rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
717 | | - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); |
---|
| 624 | + WARN_ON_ONCE(rdp->dynticks_nmi_nesting != DYNTICK_IRQ_NONIDLE); |
---|
| 625 | + WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); |
---|
718 | 626 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && |
---|
719 | | - rdtp->dynticks_nesting == 0); |
---|
720 | | - if (rdtp->dynticks_nesting != 1) { |
---|
721 | | - rdtp->dynticks_nesting--; |
---|
| 627 | + rdp->dynticks_nesting == 0); |
---|
| 628 | + if (rdp->dynticks_nesting != 1) { |
---|
| 629 | + // RCU will still be watching, so just do accounting and leave. |
---|
| 630 | + rdp->dynticks_nesting--; |
---|
722 | 631 | return; |
---|
723 | 632 | } |
---|
724 | 633 | |
---|
725 | 634 | lockdep_assert_irqs_disabled(); |
---|
726 | | - trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); |
---|
| 635 | + instrumentation_begin(); |
---|
| 636 | + trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); |
---|
727 | 637 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); |
---|
728 | | - for_each_rcu_flavor(rsp) { |
---|
729 | | - rdp = this_cpu_ptr(rsp->rda); |
---|
730 | | - do_nocb_deferred_wakeup(rdp); |
---|
731 | | - } |
---|
| 638 | + rdp = this_cpu_ptr(&rcu_data); |
---|
732 | 639 | rcu_prepare_for_idle(); |
---|
733 | | - WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ |
---|
| 640 | + rcu_preempt_deferred_qs(current); |
---|
| 641 | + |
---|
| 642 | + // instrumentation for the noinstr rcu_dynticks_eqs_enter() |
---|
| 643 | + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); |
---|
| 644 | + |
---|
| 645 | + instrumentation_end(); |
---|
| 646 | + WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ |
---|
| 647 | + // RCU is watching here ... |
---|
734 | 648 | rcu_dynticks_eqs_enter(); |
---|
| 649 | + // ... but is no longer watching here. |
---|
735 | 650 | rcu_dynticks_task_enter(); |
---|
736 | 651 | } |
---|
737 | 652 | |
---|
.. | .. |
---|
751 | 666 | lockdep_assert_irqs_disabled(); |
---|
752 | 667 | rcu_eqs_enter(false); |
---|
753 | 668 | } |
---|
| 669 | +EXPORT_SYMBOL_GPL(rcu_idle_enter); |
---|
754 | 670 | |
---|
755 | 671 | #ifdef CONFIG_NO_HZ_FULL |
---|
756 | 672 | /** |
---|
.. | .. |
---|
764 | 680 | * If you add or remove a call to rcu_user_enter(), be sure to test with |
---|
765 | 681 | * CONFIG_RCU_EQS_DEBUG=y. |
---|
766 | 682 | */ |
---|
767 | | -void rcu_user_enter(void) |
---|
| 683 | +noinstr void rcu_user_enter(void) |
---|
768 | 684 | { |
---|
| 685 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
| 686 | + |
---|
769 | 687 | lockdep_assert_irqs_disabled(); |
---|
| 688 | + |
---|
| 689 | + instrumentation_begin(); |
---|
| 690 | + do_nocb_deferred_wakeup(rdp); |
---|
| 691 | + instrumentation_end(); |
---|
| 692 | + |
---|
770 | 693 | rcu_eqs_enter(true); |
---|
771 | 694 | } |
---|
772 | 695 | #endif /* CONFIG_NO_HZ_FULL */ |
---|
.. | .. |
---|
775 | 698 | * rcu_nmi_exit - inform RCU of exit from NMI context |
---|
776 | 699 | * |
---|
777 | 700 | * If we are returning from the outermost NMI handler that interrupted an |
---|
778 | | - * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting |
---|
| 701 | + * RCU-idle period, update rdp->dynticks and rdp->dynticks_nmi_nesting |
---|
779 | 702 | * to let the RCU grace-period handling know that the CPU is back to |
---|
780 | 703 | * being RCU-idle. |
---|
781 | 704 | * |
---|
782 | 705 | * If you add or remove a call to rcu_nmi_exit(), be sure to test |
---|
783 | 706 | * with CONFIG_RCU_EQS_DEBUG=y. |
---|
784 | 707 | */ |
---|
785 | | -void rcu_nmi_exit(void) |
---|
| 708 | +noinstr void rcu_nmi_exit(void) |
---|
786 | 709 | { |
---|
787 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
| 710 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
788 | 711 | |
---|
| 712 | + instrumentation_begin(); |
---|
789 | 713 | /* |
---|
790 | 714 | * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. |
---|
791 | 715 | * (We are exiting an NMI handler, so RCU better be paying attention |
---|
792 | 716 | * to us!) |
---|
793 | 717 | */ |
---|
794 | | - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); |
---|
| 718 | + WARN_ON_ONCE(rdp->dynticks_nmi_nesting <= 0); |
---|
795 | 719 | WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); |
---|
796 | 720 | |
---|
797 | 721 | /* |
---|
798 | 722 | * If the nesting level is not 1, the CPU wasn't RCU-idle, so |
---|
799 | 723 | * leave it in non-RCU-idle state. |
---|
800 | 724 | */ |
---|
801 | | - if (rdtp->dynticks_nmi_nesting != 1) { |
---|
802 | | - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks); |
---|
803 | | - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ |
---|
804 | | - rdtp->dynticks_nmi_nesting - 2); |
---|
| 725 | + if (rdp->dynticks_nmi_nesting != 1) { |
---|
| 726 | + trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, |
---|
| 727 | + atomic_read(&rdp->dynticks)); |
---|
| 728 | + WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ |
---|
| 729 | + rdp->dynticks_nmi_nesting - 2); |
---|
| 730 | + instrumentation_end(); |
---|
805 | 731 | return; |
---|
806 | 732 | } |
---|
807 | 733 | |
---|
808 | 734 | /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ |
---|
809 | | - trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks); |
---|
810 | | - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ |
---|
| 735 | + trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); |
---|
| 736 | + WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ |
---|
| 737 | + |
---|
| 738 | + if (!in_nmi()) |
---|
| 739 | + rcu_prepare_for_idle(); |
---|
| 740 | + |
---|
| 741 | + // instrumentation for the noinstr rcu_dynticks_eqs_enter() |
---|
| 742 | + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); |
---|
| 743 | + instrumentation_end(); |
---|
| 744 | + |
---|
| 745 | + // RCU is watching here ... |
---|
811 | 746 | rcu_dynticks_eqs_enter(); |
---|
| 747 | + // ... but is no longer watching here. |
---|
| 748 | + |
---|
| 749 | + if (!in_nmi()) |
---|
| 750 | + rcu_dynticks_task_enter(); |
---|
812 | 751 | } |
---|
813 | 752 | |
---|
814 | 753 | /** |
---|
.. | .. |
---|
830 | 769 | * If you add or remove a call to rcu_irq_exit(), be sure to test with |
---|
831 | 770 | * CONFIG_RCU_EQS_DEBUG=y. |
---|
832 | 771 | */ |
---|
833 | | -void rcu_irq_exit(void) |
---|
| 772 | +void noinstr rcu_irq_exit(void) |
---|
834 | 773 | { |
---|
835 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
836 | | - |
---|
837 | 774 | lockdep_assert_irqs_disabled(); |
---|
838 | | - if (rdtp->dynticks_nmi_nesting == 1) |
---|
839 | | - rcu_prepare_for_idle(); |
---|
840 | 775 | rcu_nmi_exit(); |
---|
841 | | - if (rdtp->dynticks_nmi_nesting == 0) |
---|
842 | | - rcu_dynticks_task_enter(); |
---|
843 | 776 | } |
---|
| 777 | + |
---|
| 778 | +/** |
---|
| 779 | + * rcu_irq_exit_preempt - Inform RCU that current CPU is exiting irq |
---|
| 780 | + * towards in kernel preemption |
---|
| 781 | + * |
---|
| 782 | + * Same as rcu_irq_exit() but has a sanity check that scheduling is safe |
---|
| 783 | + * from RCU point of view. Invoked from return from interrupt before kernel |
---|
| 784 | + * preemption. |
---|
| 785 | + */ |
---|
| 786 | +void rcu_irq_exit_preempt(void) |
---|
| 787 | +{ |
---|
| 788 | + lockdep_assert_irqs_disabled(); |
---|
| 789 | + rcu_nmi_exit(); |
---|
| 790 | + |
---|
| 791 | + RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0, |
---|
| 792 | + "RCU dynticks_nesting counter underflow/zero!"); |
---|
| 793 | + RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) != |
---|
| 794 | + DYNTICK_IRQ_NONIDLE, |
---|
| 795 | + "Bad RCU dynticks_nmi_nesting counter\n"); |
---|
| 796 | + RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(), |
---|
| 797 | + "RCU in extended quiescent state!"); |
---|
| 798 | +} |
---|
| 799 | + |
---|
| 800 | +#ifdef CONFIG_PROVE_RCU |
---|
| 801 | +/** |
---|
| 802 | + * rcu_irq_exit_check_preempt - Validate that scheduling is possible |
---|
| 803 | + */ |
---|
| 804 | +void rcu_irq_exit_check_preempt(void) |
---|
| 805 | +{ |
---|
| 806 | + lockdep_assert_irqs_disabled(); |
---|
| 807 | + |
---|
| 808 | + RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0, |
---|
| 809 | + "RCU dynticks_nesting counter underflow/zero!"); |
---|
| 810 | + RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) != |
---|
| 811 | + DYNTICK_IRQ_NONIDLE, |
---|
| 812 | + "Bad RCU dynticks_nmi_nesting counter\n"); |
---|
| 813 | + RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(), |
---|
| 814 | + "RCU in extended quiescent state!"); |
---|
| 815 | +} |
---|
| 816 | +#endif /* #ifdef CONFIG_PROVE_RCU */ |
---|
844 | 817 | |
---|
845 | 818 | /* |
---|
846 | 819 | * Wrapper for rcu_irq_exit() where interrupts are enabled. |
---|
.. | .. |
---|
865 | 838 | * allow for the possibility of usermode upcalls messing up our count of |
---|
866 | 839 | * interrupt nesting level during the busy period that is just now starting. |
---|
867 | 840 | */ |
---|
868 | | -static void rcu_eqs_exit(bool user) |
---|
| 841 | +static void noinstr rcu_eqs_exit(bool user) |
---|
869 | 842 | { |
---|
870 | | - struct rcu_dynticks *rdtp; |
---|
| 843 | + struct rcu_data *rdp; |
---|
871 | 844 | long oldval; |
---|
872 | 845 | |
---|
873 | 846 | lockdep_assert_irqs_disabled(); |
---|
874 | | - rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
875 | | - oldval = rdtp->dynticks_nesting; |
---|
| 847 | + rdp = this_cpu_ptr(&rcu_data); |
---|
| 848 | + oldval = rdp->dynticks_nesting; |
---|
876 | 849 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); |
---|
877 | 850 | if (oldval) { |
---|
878 | | - rdtp->dynticks_nesting++; |
---|
| 851 | + // RCU was already watching, so just do accounting and leave. |
---|
| 852 | + rdp->dynticks_nesting++; |
---|
879 | 853 | return; |
---|
880 | 854 | } |
---|
881 | 855 | rcu_dynticks_task_exit(); |
---|
| 856 | + // RCU is not watching here ... |
---|
882 | 857 | rcu_dynticks_eqs_exit(); |
---|
| 858 | + // ... but is watching here. |
---|
| 859 | + instrumentation_begin(); |
---|
| 860 | + |
---|
| 861 | + // instrumentation for the noinstr rcu_dynticks_eqs_exit() |
---|
| 862 | + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); |
---|
| 863 | + |
---|
883 | 864 | rcu_cleanup_after_idle(); |
---|
884 | | - trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); |
---|
| 865 | + trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); |
---|
885 | 866 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); |
---|
886 | | - WRITE_ONCE(rdtp->dynticks_nesting, 1); |
---|
887 | | - WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); |
---|
| 867 | + WRITE_ONCE(rdp->dynticks_nesting, 1); |
---|
| 868 | + WARN_ON_ONCE(rdp->dynticks_nmi_nesting); |
---|
| 869 | + WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); |
---|
| 870 | + instrumentation_end(); |
---|
888 | 871 | } |
---|
889 | 872 | |
---|
890 | 873 | /** |
---|
.. | .. |
---|
904 | 887 | rcu_eqs_exit(false); |
---|
905 | 888 | local_irq_restore(flags); |
---|
906 | 889 | } |
---|
| 890 | +EXPORT_SYMBOL_GPL(rcu_idle_exit); |
---|
907 | 891 | |
---|
908 | 892 | #ifdef CONFIG_NO_HZ_FULL |
---|
909 | 893 | /** |
---|
.. | .. |
---|
915 | 899 | * If you add or remove a call to rcu_user_exit(), be sure to test with |
---|
916 | 900 | * CONFIG_RCU_EQS_DEBUG=y. |
---|
917 | 901 | */ |
---|
918 | | -void rcu_user_exit(void) |
---|
| 902 | +void noinstr rcu_user_exit(void) |
---|
919 | 903 | { |
---|
920 | 904 | rcu_eqs_exit(1); |
---|
921 | 905 | } |
---|
| 906 | + |
---|
| 907 | +/** |
---|
| 908 | + * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it. |
---|
| 909 | + * |
---|
| 910 | + * The scheduler tick is not normally enabled when CPUs enter the kernel |
---|
| 911 | + * from nohz_full userspace execution. After all, nohz_full userspace |
---|
| 912 | + * execution is an RCU quiescent state and the time executing in the kernel |
---|
| 913 | + * is quite short. Except of course when it isn't. And it is not hard to |
---|
| 914 | + * cause a large system to spend tens of seconds or even minutes looping |
---|
| 915 | + * in the kernel, which can cause a number of problems, include RCU CPU |
---|
| 916 | + * stall warnings. |
---|
| 917 | + * |
---|
| 918 | + * Therefore, if a nohz_full CPU fails to report a quiescent state |
---|
| 919 | + * in a timely manner, the RCU grace-period kthread sets that CPU's |
---|
| 920 | + * ->rcu_urgent_qs flag with the expectation that the next interrupt or |
---|
| 921 | + * exception will invoke this function, which will turn on the scheduler |
---|
| 922 | + * tick, which will enable RCU to detect that CPU's quiescent states, |
---|
| 923 | + * for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels. |
---|
| 924 | + * The tick will be disabled once a quiescent state is reported for |
---|
| 925 | + * this CPU. |
---|
| 926 | + * |
---|
| 927 | + * Of course, in carefully tuned systems, there might never be an |
---|
| 928 | + * interrupt or exception. In that case, the RCU grace-period kthread |
---|
| 929 | + * will eventually cause one to happen. However, in less carefully |
---|
| 930 | + * controlled environments, this function allows RCU to get what it |
---|
| 931 | + * needs without creating otherwise useless interruptions. |
---|
| 932 | + */ |
---|
| 933 | +void __rcu_irq_enter_check_tick(void) |
---|
| 934 | +{ |
---|
| 935 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
| 936 | + |
---|
| 937 | + // If we're here from NMI there's nothing to do. |
---|
| 938 | + if (in_nmi()) |
---|
| 939 | + return; |
---|
| 940 | + |
---|
| 941 | + RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(), |
---|
| 942 | + "Illegal rcu_irq_enter_check_tick() from extended quiescent state"); |
---|
| 943 | + |
---|
| 944 | + if (!tick_nohz_full_cpu(rdp->cpu) || |
---|
| 945 | + !READ_ONCE(rdp->rcu_urgent_qs) || |
---|
| 946 | + READ_ONCE(rdp->rcu_forced_tick)) { |
---|
| 947 | + // RCU doesn't need nohz_full help from this CPU, or it is |
---|
| 948 | + // already getting that help. |
---|
| 949 | + return; |
---|
| 950 | + } |
---|
| 951 | + |
---|
| 952 | + // We get here only when not in an extended quiescent state and |
---|
| 953 | + // from interrupts (as opposed to NMIs). Therefore, (1) RCU is |
---|
| 954 | + // already watching and (2) The fact that we are in an interrupt |
---|
| 955 | + // handler and that the rcu_node lock is an irq-disabled lock |
---|
| 956 | + // prevents self-deadlock. So we can safely recheck under the lock. |
---|
| 957 | + // Note that the nohz_full state currently cannot change. |
---|
| 958 | + raw_spin_lock_rcu_node(rdp->mynode); |
---|
| 959 | + if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) { |
---|
| 960 | + // A nohz_full CPU is in the kernel and RCU needs a |
---|
| 961 | + // quiescent state. Turn on the tick! |
---|
| 962 | + WRITE_ONCE(rdp->rcu_forced_tick, true); |
---|
| 963 | + tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU); |
---|
| 964 | + } |
---|
| 965 | + raw_spin_unlock_rcu_node(rdp->mynode); |
---|
| 966 | +} |
---|
| 967 | +NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick); |
---|
922 | 968 | #endif /* CONFIG_NO_HZ_FULL */ |
---|
923 | 969 | |
---|
924 | 970 | /** |
---|
925 | 971 | * rcu_nmi_enter - inform RCU of entry to NMI context |
---|
926 | 972 | * |
---|
927 | | - * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and |
---|
928 | | - * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know |
---|
| 973 | + * If the CPU was idle from RCU's viewpoint, update rdp->dynticks and |
---|
| 974 | + * rdp->dynticks_nmi_nesting to let the RCU grace-period handling know |
---|
929 | 975 | * that the CPU is active. This implementation permits nested NMIs, as |
---|
930 | 976 | * long as the nesting level does not overflow an int. (You will probably |
---|
931 | 977 | * run out of stack space first.) |
---|
.. | .. |
---|
933 | 979 | * If you add or remove a call to rcu_nmi_enter(), be sure to test |
---|
934 | 980 | * with CONFIG_RCU_EQS_DEBUG=y. |
---|
935 | 981 | */ |
---|
936 | | -void rcu_nmi_enter(void) |
---|
| 982 | +noinstr void rcu_nmi_enter(void) |
---|
937 | 983 | { |
---|
938 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
939 | 984 | long incby = 2; |
---|
| 985 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
940 | 986 | |
---|
941 | 987 | /* Complain about underflow. */ |
---|
942 | | - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); |
---|
| 988 | + WARN_ON_ONCE(rdp->dynticks_nmi_nesting < 0); |
---|
943 | 989 | |
---|
944 | 990 | /* |
---|
945 | 991 | * If idle from RCU viewpoint, atomically increment ->dynticks |
---|
.. | .. |
---|
950 | 996 | * period (observation due to Andy Lutomirski). |
---|
951 | 997 | */ |
---|
952 | 998 | if (rcu_dynticks_curr_cpu_in_eqs()) { |
---|
| 999 | + |
---|
| 1000 | + if (!in_nmi()) |
---|
| 1001 | + rcu_dynticks_task_exit(); |
---|
| 1002 | + |
---|
| 1003 | + // RCU is not watching here ... |
---|
953 | 1004 | rcu_dynticks_eqs_exit(); |
---|
| 1005 | + // ... but is watching here. |
---|
| 1006 | + |
---|
| 1007 | + if (!in_nmi()) { |
---|
| 1008 | + instrumentation_begin(); |
---|
| 1009 | + rcu_cleanup_after_idle(); |
---|
| 1010 | + instrumentation_end(); |
---|
| 1011 | + } |
---|
| 1012 | + |
---|
| 1013 | + instrumentation_begin(); |
---|
| 1014 | + // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs() |
---|
| 1015 | + instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks)); |
---|
| 1016 | + // instrumentation for the noinstr rcu_dynticks_eqs_exit() |
---|
| 1017 | + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); |
---|
| 1018 | + |
---|
954 | 1019 | incby = 1; |
---|
| 1020 | + } else if (!in_nmi()) { |
---|
| 1021 | + instrumentation_begin(); |
---|
| 1022 | + rcu_irq_enter_check_tick(); |
---|
| 1023 | + } else { |
---|
| 1024 | + instrumentation_begin(); |
---|
955 | 1025 | } |
---|
| 1026 | + |
---|
956 | 1027 | trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), |
---|
957 | | - rdtp->dynticks_nmi_nesting, |
---|
958 | | - rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks); |
---|
959 | | - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ |
---|
960 | | - rdtp->dynticks_nmi_nesting + incby); |
---|
| 1028 | + rdp->dynticks_nmi_nesting, |
---|
| 1029 | + rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks)); |
---|
| 1030 | + instrumentation_end(); |
---|
| 1031 | + WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */ |
---|
| 1032 | + rdp->dynticks_nmi_nesting + incby); |
---|
961 | 1033 | barrier(); |
---|
962 | 1034 | } |
---|
963 | 1035 | |
---|
.. | .. |
---|
983 | 1055 | * If you add or remove a call to rcu_irq_enter(), be sure to test with |
---|
984 | 1056 | * CONFIG_RCU_EQS_DEBUG=y. |
---|
985 | 1057 | */ |
---|
986 | | -void rcu_irq_enter(void) |
---|
| 1058 | +noinstr void rcu_irq_enter(void) |
---|
987 | 1059 | { |
---|
988 | | - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
---|
989 | | - |
---|
990 | 1060 | lockdep_assert_irqs_disabled(); |
---|
991 | | - if (rdtp->dynticks_nmi_nesting == 0) |
---|
992 | | - rcu_dynticks_task_exit(); |
---|
993 | 1061 | rcu_nmi_enter(); |
---|
994 | | - if (rdtp->dynticks_nmi_nesting == 1) |
---|
995 | | - rcu_cleanup_after_idle(); |
---|
996 | 1062 | } |
---|
997 | 1063 | |
---|
998 | 1064 | /* |
---|
.. | .. |
---|
1010 | 1076 | local_irq_restore(flags); |
---|
1011 | 1077 | } |
---|
1012 | 1078 | |
---|
| 1079 | +/* |
---|
| 1080 | + * If any sort of urgency was applied to the current CPU (for example, |
---|
| 1081 | + * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order |
---|
| 1082 | + * to get to a quiescent state, disable it. |
---|
| 1083 | + */ |
---|
| 1084 | +static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp) |
---|
| 1085 | +{ |
---|
| 1086 | + raw_lockdep_assert_held_rcu_node(rdp->mynode); |
---|
| 1087 | + WRITE_ONCE(rdp->rcu_urgent_qs, false); |
---|
| 1088 | + WRITE_ONCE(rdp->rcu_need_heavy_qs, false); |
---|
| 1089 | + if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) { |
---|
| 1090 | + tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU); |
---|
| 1091 | + WRITE_ONCE(rdp->rcu_forced_tick, false); |
---|
| 1092 | + } |
---|
| 1093 | +} |
---|
| 1094 | + |
---|
1013 | 1095 | /** |
---|
1014 | | - * rcu_is_watching - see if RCU thinks that the current CPU is idle |
---|
| 1096 | + * rcu_is_watching - see if RCU thinks that the current CPU is not idle |
---|
1015 | 1097 | * |
---|
1016 | 1098 | * Return true if RCU is watching the running CPU, which means that this |
---|
1017 | 1099 | * CPU can safely enter RCU read-side critical sections. In other words, |
---|
1018 | | - * if the current CPU is in its idle loop and is neither in an interrupt |
---|
1019 | | - * or NMI handler, return true. |
---|
| 1100 | + * if the current CPU is not in its idle loop or is in an interrupt or |
---|
| 1101 | + * NMI handler, return true. |
---|
| 1102 | + * |
---|
| 1103 | + * Make notrace because it can be called by the internal functions of |
---|
| 1104 | + * ftrace, and making this notrace removes unnecessary recursion calls. |
---|
1020 | 1105 | */ |
---|
1021 | | -bool notrace rcu_is_watching(void) |
---|
| 1106 | +notrace bool rcu_is_watching(void) |
---|
1022 | 1107 | { |
---|
1023 | 1108 | bool ret; |
---|
1024 | 1109 | |
---|
.. | .. |
---|
1044 | 1129 | cpu = task_cpu(t); |
---|
1045 | 1130 | if (!task_curr(t)) |
---|
1046 | 1131 | return; /* This task is not running on that CPU. */ |
---|
1047 | | - smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true); |
---|
| 1132 | + smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true); |
---|
1048 | 1133 | } |
---|
1049 | 1134 | |
---|
1050 | 1135 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) |
---|
.. | .. |
---|
1055 | 1140 | * Disable preemption to avoid false positives that could otherwise |
---|
1056 | 1141 | * happen due to the current CPU number being sampled, this task being |
---|
1057 | 1142 | * preempted, its old CPU being taken offline, resuming on some other CPU, |
---|
1058 | | - * then determining that its old CPU is now offline. Because there are |
---|
1059 | | - * multiple flavors of RCU, and because this function can be called in the |
---|
1060 | | - * midst of updating the flavors while a given CPU coming online or going |
---|
1061 | | - * offline, it is necessary to check all flavors. If any of the flavors |
---|
1062 | | - * believe that given CPU is online, it is considered to be online. |
---|
| 1143 | + * then determining that its old CPU is now offline. |
---|
1063 | 1144 | * |
---|
1064 | 1145 | * Disable checking if in an NMI handler because we cannot safely |
---|
1065 | 1146 | * report errors from NMI handlers anyway. In addition, it is OK to use |
---|
.. | .. |
---|
1070 | 1151 | { |
---|
1071 | 1152 | struct rcu_data *rdp; |
---|
1072 | 1153 | struct rcu_node *rnp; |
---|
1073 | | - struct rcu_state *rsp; |
---|
| 1154 | + bool ret = false; |
---|
1074 | 1155 | |
---|
1075 | 1156 | if (in_nmi() || !rcu_scheduler_fully_active) |
---|
1076 | 1157 | return true; |
---|
1077 | | - preempt_disable(); |
---|
1078 | | - for_each_rcu_flavor(rsp) { |
---|
1079 | | - rdp = this_cpu_ptr(rsp->rda); |
---|
1080 | | - rnp = rdp->mynode; |
---|
1081 | | - if (rdp->grpmask & rcu_rnp_online_cpus(rnp)) { |
---|
1082 | | - preempt_enable(); |
---|
1083 | | - return true; |
---|
1084 | | - } |
---|
1085 | | - } |
---|
1086 | | - preempt_enable(); |
---|
1087 | | - return false; |
---|
| 1158 | + preempt_disable_notrace(); |
---|
| 1159 | + rdp = this_cpu_ptr(&rcu_data); |
---|
| 1160 | + rnp = rdp->mynode; |
---|
| 1161 | + if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1) |
---|
| 1162 | + ret = true; |
---|
| 1163 | + preempt_enable_notrace(); |
---|
| 1164 | + return ret; |
---|
1088 | 1165 | } |
---|
1089 | 1166 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); |
---|
1090 | 1167 | |
---|
1091 | 1168 | #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ |
---|
1092 | | - |
---|
1093 | | -/** |
---|
1094 | | - * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle |
---|
1095 | | - * |
---|
1096 | | - * If the current CPU is idle or running at a first-level (not nested) |
---|
1097 | | - * interrupt from idle, return true. The caller must have at least |
---|
1098 | | - * disabled preemption. |
---|
1099 | | - */ |
---|
1100 | | -static int rcu_is_cpu_rrupt_from_idle(void) |
---|
1101 | | -{ |
---|
1102 | | - return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 && |
---|
1103 | | - __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1; |
---|
1104 | | -} |
---|
1105 | 1169 | |
---|
1106 | 1170 | /* |
---|
1107 | 1171 | * We are reporting a quiescent state on behalf of some other CPU, so |
---|
.. | .. |
---|
1127 | 1191 | */ |
---|
1128 | 1192 | static int dyntick_save_progress_counter(struct rcu_data *rdp) |
---|
1129 | 1193 | { |
---|
1130 | | - rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks); |
---|
| 1194 | + rdp->dynticks_snap = rcu_dynticks_snap(rdp); |
---|
1131 | 1195 | if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { |
---|
1132 | | - trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti")); |
---|
| 1196 | + trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti")); |
---|
1133 | 1197 | rcu_gpnum_ovf(rdp->mynode, rdp); |
---|
1134 | 1198 | return 1; |
---|
1135 | 1199 | } |
---|
1136 | 1200 | return 0; |
---|
1137 | | -} |
---|
1138 | | - |
---|
1139 | | -/* |
---|
1140 | | - * Handler for the irq_work request posted when a grace period has |
---|
1141 | | - * gone on for too long, but not yet long enough for an RCU CPU |
---|
1142 | | - * stall warning. Set state appropriately, but just complain if |
---|
1143 | | - * there is unexpected state on entry. |
---|
1144 | | - */ |
---|
1145 | | -static void rcu_iw_handler(struct irq_work *iwp) |
---|
1146 | | -{ |
---|
1147 | | - struct rcu_data *rdp; |
---|
1148 | | - struct rcu_node *rnp; |
---|
1149 | | - |
---|
1150 | | - rdp = container_of(iwp, struct rcu_data, rcu_iw); |
---|
1151 | | - rnp = rdp->mynode; |
---|
1152 | | - raw_spin_lock_rcu_node(rnp); |
---|
1153 | | - if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { |
---|
1154 | | - rdp->rcu_iw_gp_seq = rnp->gp_seq; |
---|
1155 | | - rdp->rcu_iw_pending = false; |
---|
1156 | | - } |
---|
1157 | | - raw_spin_unlock_rcu_node(rnp); |
---|
1158 | 1201 | } |
---|
1159 | 1202 | |
---|
1160 | 1203 | /* |
---|
.. | .. |
---|
1178 | 1221 | * read-side critical section that started before the beginning |
---|
1179 | 1222 | * of the current RCU grace period. |
---|
1180 | 1223 | */ |
---|
1181 | | - if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) { |
---|
1182 | | - trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti")); |
---|
1183 | | - rdp->dynticks_fqs++; |
---|
| 1224 | + if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) { |
---|
| 1225 | + trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti")); |
---|
1184 | 1226 | rcu_gpnum_ovf(rnp, rdp); |
---|
1185 | 1227 | return 1; |
---|
1186 | 1228 | } |
---|
1187 | 1229 | |
---|
1188 | 1230 | /* |
---|
1189 | | - * Has this CPU encountered a cond_resched() since the beginning |
---|
1190 | | - * of the grace period? For this to be the case, the CPU has to |
---|
1191 | | - * have noticed the current grace period. This might not be the |
---|
1192 | | - * case for nohz_full CPUs looping in the kernel. |
---|
| 1231 | + * Complain if a CPU that is considered to be offline from RCU's |
---|
| 1232 | + * perspective has not yet reported a quiescent state. After all, |
---|
| 1233 | + * the offline CPU should have reported a quiescent state during |
---|
| 1234 | + * the CPU-offline process, or, failing that, by rcu_gp_init() |
---|
| 1235 | + * if it ran concurrently with either the CPU going offline or the |
---|
| 1236 | + * last task on a leaf rcu_node structure exiting its RCU read-side |
---|
| 1237 | + * critical section while all CPUs corresponding to that structure |
---|
| 1238 | + * are offline. This added warning detects bugs in any of these |
---|
| 1239 | + * code paths. |
---|
| 1240 | + * |
---|
| 1241 | + * The rcu_node structure's ->lock is held here, which excludes |
---|
| 1242 | + * the relevant portions the CPU-hotplug code, the grace-period |
---|
| 1243 | + * initialization code, and the rcu_read_unlock() code paths. |
---|
| 1244 | + * |
---|
| 1245 | + * For more detail, please refer to the "Hotplug CPU" section |
---|
| 1246 | + * of RCU's Requirements documentation. |
---|
1193 | 1247 | */ |
---|
1194 | | - jtsq = jiffies_till_sched_qs; |
---|
1195 | | - ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); |
---|
1196 | | - if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && |
---|
1197 | | - READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && |
---|
1198 | | - rcu_seq_current(&rdp->gp_seq) == rnp->gp_seq && !rdp->gpwrap) { |
---|
1199 | | - trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("rqc")); |
---|
1200 | | - rcu_gpnum_ovf(rnp, rdp); |
---|
1201 | | - return 1; |
---|
1202 | | - } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) { |
---|
1203 | | - /* Load rcu_qs_ctr before store to rcu_urgent_qs. */ |
---|
1204 | | - smp_store_release(ruqp, true); |
---|
1205 | | - } |
---|
1206 | | - |
---|
1207 | | - /* If waiting too long on an offline CPU, complain. */ |
---|
1208 | | - if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) && |
---|
1209 | | - time_after(jiffies, rdp->rsp->gp_start + HZ)) { |
---|
| 1248 | + if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) { |
---|
1210 | 1249 | bool onl; |
---|
1211 | 1250 | struct rcu_node *rnp1; |
---|
1212 | 1251 | |
---|
1213 | | - WARN_ON(1); /* Offline CPUs are supposed to report QS! */ |
---|
1214 | 1252 | pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", |
---|
1215 | 1253 | __func__, rnp->grplo, rnp->grphi, rnp->level, |
---|
1216 | 1254 | (long)rnp->gp_seq, (long)rnp->completedqs); |
---|
.. | .. |
---|
1227 | 1265 | |
---|
1228 | 1266 | /* |
---|
1229 | 1267 | * A CPU running for an extended time within the kernel can |
---|
1230 | | - * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode, |
---|
1231 | | - * even context-switching back and forth between a pair of |
---|
1232 | | - * in-kernel CPU-bound tasks cannot advance grace periods. |
---|
1233 | | - * So if the grace period is old enough, make the CPU pay attention. |
---|
1234 | | - * Note that the unsynchronized assignments to the per-CPU |
---|
1235 | | - * rcu_need_heavy_qs variable are safe. Yes, setting of |
---|
1236 | | - * bits can be lost, but they will be set again on the next |
---|
1237 | | - * force-quiescent-state pass. So lost bit sets do not result |
---|
1238 | | - * in incorrect behavior, merely in a grace period lasting |
---|
1239 | | - * a few jiffies longer than it might otherwise. Because |
---|
1240 | | - * there are at most four threads involved, and because the |
---|
1241 | | - * updates are only once every few jiffies, the probability of |
---|
1242 | | - * lossage (and thus of slight grace-period extension) is |
---|
1243 | | - * quite low. |
---|
| 1268 | + * delay RCU grace periods: (1) At age jiffies_to_sched_qs, |
---|
| 1269 | + * set .rcu_urgent_qs, (2) At age 2*jiffies_to_sched_qs, set |
---|
| 1270 | + * both .rcu_need_heavy_qs and .rcu_urgent_qs. Note that the |
---|
| 1271 | + * unsynchronized assignments to the per-CPU rcu_need_heavy_qs |
---|
| 1272 | + * variable are safe because the assignments are repeated if this |
---|
| 1273 | + * CPU failed to pass through a quiescent state. This code |
---|
| 1274 | + * also checks .jiffies_resched in case jiffies_to_sched_qs |
---|
| 1275 | + * is set way high. |
---|
1244 | 1276 | */ |
---|
1245 | | - rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); |
---|
| 1277 | + jtsq = READ_ONCE(jiffies_to_sched_qs); |
---|
| 1278 | + ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu); |
---|
| 1279 | + rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu); |
---|
1246 | 1280 | if (!READ_ONCE(*rnhqp) && |
---|
1247 | | - (time_after(jiffies, rdp->rsp->gp_start + jtsq) || |
---|
1248 | | - time_after(jiffies, rdp->rsp->jiffies_resched))) { |
---|
| 1281 | + (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || |
---|
| 1282 | + time_after(jiffies, rcu_state.jiffies_resched) || |
---|
| 1283 | + rcu_state.cbovld)) { |
---|
1249 | 1284 | WRITE_ONCE(*rnhqp, true); |
---|
1250 | 1285 | /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ |
---|
1251 | 1286 | smp_store_release(ruqp, true); |
---|
1252 | | - rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */ |
---|
| 1287 | + } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) { |
---|
| 1288 | + WRITE_ONCE(*ruqp, true); |
---|
1253 | 1289 | } |
---|
1254 | 1290 | |
---|
1255 | 1291 | /* |
---|
1256 | | - * If more than halfway to RCU CPU stall-warning time, do a |
---|
1257 | | - * resched_cpu() to try to loosen things up a bit. Also check to |
---|
1258 | | - * see if the CPU is getting hammered with interrupts, but only |
---|
1259 | | - * once per grace period, just to keep the IPIs down to a dull roar. |
---|
| 1292 | + * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq! |
---|
| 1293 | + * The above code handles this, but only for straight cond_resched(). |
---|
| 1294 | + * And some in-kernel loops check need_resched() before calling |
---|
| 1295 | + * cond_resched(), which defeats the above code for CPUs that are |
---|
| 1296 | + * running in-kernel with scheduling-clock interrupts disabled. |
---|
| 1297 | + * So hit them over the head with the resched_cpu() hammer! |
---|
1260 | 1298 | */ |
---|
1261 | | - if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) { |
---|
| 1299 | + if (tick_nohz_full_cpu(rdp->cpu) && |
---|
| 1300 | + (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || |
---|
| 1301 | + rcu_state.cbovld)) { |
---|
| 1302 | + WRITE_ONCE(*ruqp, true); |
---|
1262 | 1303 | resched_cpu(rdp->cpu); |
---|
| 1304 | + WRITE_ONCE(rdp->last_fqs_resched, jiffies); |
---|
| 1305 | + } |
---|
| 1306 | + |
---|
| 1307 | + /* |
---|
| 1308 | + * If more than halfway to RCU CPU stall-warning time, invoke |
---|
| 1309 | + * resched_cpu() more frequently to try to loosen things up a bit. |
---|
| 1310 | + * Also check to see if the CPU is getting hammered with interrupts, |
---|
| 1311 | + * but only once per grace period, just to keep the IPIs down to |
---|
| 1312 | + * a dull roar. |
---|
| 1313 | + */ |
---|
| 1314 | + if (time_after(jiffies, rcu_state.jiffies_resched)) { |
---|
| 1315 | + if (time_after(jiffies, |
---|
| 1316 | + READ_ONCE(rdp->last_fqs_resched) + jtsq)) { |
---|
| 1317 | + resched_cpu(rdp->cpu); |
---|
| 1318 | + WRITE_ONCE(rdp->last_fqs_resched, jiffies); |
---|
| 1319 | + } |
---|
1263 | 1320 | if (IS_ENABLED(CONFIG_IRQ_WORK) && |
---|
1264 | 1321 | !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && |
---|
1265 | 1322 | (rnp->ffmask & rdp->grpmask)) { |
---|
1266 | 1323 | init_irq_work(&rdp->rcu_iw, rcu_iw_handler); |
---|
| 1324 | + atomic_set(&rdp->rcu_iw.flags, IRQ_WORK_HARD_IRQ); |
---|
1267 | 1325 | rdp->rcu_iw_pending = true; |
---|
1268 | 1326 | rdp->rcu_iw_gp_seq = rnp->gp_seq; |
---|
1269 | 1327 | irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); |
---|
.. | .. |
---|
1273 | 1331 | return 0; |
---|
1274 | 1332 | } |
---|
1275 | 1333 | |
---|
1276 | | -static void record_gp_stall_check_time(struct rcu_state *rsp) |
---|
1277 | | -{ |
---|
1278 | | - unsigned long j = jiffies; |
---|
1279 | | - unsigned long j1; |
---|
1280 | | - |
---|
1281 | | - rsp->gp_start = j; |
---|
1282 | | - j1 = rcu_jiffies_till_stall_check(); |
---|
1283 | | - /* Record ->gp_start before ->jiffies_stall. */ |
---|
1284 | | - smp_store_release(&rsp->jiffies_stall, j + j1); /* ^^^ */ |
---|
1285 | | - rsp->jiffies_resched = j + j1 / 2; |
---|
1286 | | - rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs); |
---|
1287 | | -} |
---|
1288 | | - |
---|
1289 | | -/* |
---|
1290 | | - * Convert a ->gp_state value to a character string. |
---|
1291 | | - */ |
---|
1292 | | -static const char *gp_state_getname(short gs) |
---|
1293 | | -{ |
---|
1294 | | - if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) |
---|
1295 | | - return "???"; |
---|
1296 | | - return gp_state_names[gs]; |
---|
1297 | | -} |
---|
1298 | | - |
---|
1299 | | -/* |
---|
1300 | | - * Complain about starvation of grace-period kthread. |
---|
1301 | | - */ |
---|
1302 | | -static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) |
---|
1303 | | -{ |
---|
1304 | | - unsigned long gpa; |
---|
1305 | | - unsigned long j; |
---|
1306 | | - |
---|
1307 | | - j = jiffies; |
---|
1308 | | - gpa = READ_ONCE(rsp->gp_activity); |
---|
1309 | | - if (j - gpa > 2 * HZ) { |
---|
1310 | | - pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", |
---|
1311 | | - rsp->name, j - gpa, |
---|
1312 | | - (long)rcu_seq_current(&rsp->gp_seq), |
---|
1313 | | - rsp->gp_flags, |
---|
1314 | | - gp_state_getname(rsp->gp_state), rsp->gp_state, |
---|
1315 | | - rsp->gp_kthread ? rsp->gp_kthread->state : ~0, |
---|
1316 | | - rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1); |
---|
1317 | | - if (rsp->gp_kthread) { |
---|
1318 | | - pr_err("RCU grace-period kthread stack dump:\n"); |
---|
1319 | | - sched_show_task(rsp->gp_kthread); |
---|
1320 | | - wake_up_process(rsp->gp_kthread); |
---|
1321 | | - } |
---|
1322 | | - } |
---|
1323 | | -} |
---|
1324 | | - |
---|
1325 | | -/* |
---|
1326 | | - * Dump stacks of all tasks running on stalled CPUs. First try using |
---|
1327 | | - * NMIs, but fall back to manual remote stack tracing on architectures |
---|
1328 | | - * that don't support NMI-based stack dumps. The NMI-triggered stack |
---|
1329 | | - * traces are more accurate because they are printed by the target CPU. |
---|
1330 | | - */ |
---|
1331 | | -static void rcu_dump_cpu_stacks(struct rcu_state *rsp) |
---|
1332 | | -{ |
---|
1333 | | - int cpu; |
---|
1334 | | - unsigned long flags; |
---|
1335 | | - struct rcu_node *rnp; |
---|
1336 | | - |
---|
1337 | | - rcu_for_each_leaf_node(rsp, rnp) { |
---|
1338 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
1339 | | - for_each_leaf_node_possible_cpu(rnp, cpu) |
---|
1340 | | - if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) |
---|
1341 | | - if (!trigger_single_cpu_backtrace(cpu)) |
---|
1342 | | - dump_cpu_task(cpu); |
---|
1343 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
1344 | | - } |
---|
1345 | | -} |
---|
1346 | | - |
---|
1347 | | -/* |
---|
1348 | | - * If too much time has passed in the current grace period, and if |
---|
1349 | | - * so configured, go kick the relevant kthreads. |
---|
1350 | | - */ |
---|
1351 | | -static void rcu_stall_kick_kthreads(struct rcu_state *rsp) |
---|
1352 | | -{ |
---|
1353 | | - unsigned long j; |
---|
1354 | | - |
---|
1355 | | - if (!rcu_kick_kthreads) |
---|
1356 | | - return; |
---|
1357 | | - j = READ_ONCE(rsp->jiffies_kick_kthreads); |
---|
1358 | | - if (time_after(jiffies, j) && rsp->gp_kthread && |
---|
1359 | | - (rcu_gp_in_progress(rsp) || READ_ONCE(rsp->gp_flags))) { |
---|
1360 | | - WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name); |
---|
1361 | | - rcu_ftrace_dump(DUMP_ALL); |
---|
1362 | | - wake_up_process(rsp->gp_kthread); |
---|
1363 | | - WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ); |
---|
1364 | | - } |
---|
1365 | | -} |
---|
1366 | | - |
---|
1367 | | -static void panic_on_rcu_stall(void) |
---|
1368 | | -{ |
---|
1369 | | - if (sysctl_panic_on_rcu_stall) |
---|
1370 | | - panic("RCU Stall\n"); |
---|
1371 | | -} |
---|
1372 | | - |
---|
1373 | | -static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq) |
---|
1374 | | -{ |
---|
1375 | | - int cpu; |
---|
1376 | | - unsigned long flags; |
---|
1377 | | - unsigned long gpa; |
---|
1378 | | - unsigned long j; |
---|
1379 | | - int ndetected = 0; |
---|
1380 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
1381 | | - long totqlen = 0; |
---|
1382 | | - |
---|
1383 | | - /* Kick and suppress, if so configured. */ |
---|
1384 | | - rcu_stall_kick_kthreads(rsp); |
---|
1385 | | - if (rcu_cpu_stall_suppress) |
---|
1386 | | - return; |
---|
1387 | | - |
---|
1388 | | - /* |
---|
1389 | | - * OK, time to rat on our buddy... |
---|
1390 | | - * See Documentation/RCU/stallwarn.txt for info on how to debug |
---|
1391 | | - * RCU CPU stall warnings. |
---|
1392 | | - */ |
---|
1393 | | - pr_err("INFO: %s detected stalls on CPUs/tasks:", rsp->name); |
---|
1394 | | - print_cpu_stall_info_begin(); |
---|
1395 | | - rcu_for_each_leaf_node(rsp, rnp) { |
---|
1396 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
1397 | | - ndetected += rcu_print_task_stall(rnp); |
---|
1398 | | - if (rnp->qsmask != 0) { |
---|
1399 | | - for_each_leaf_node_possible_cpu(rnp, cpu) |
---|
1400 | | - if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { |
---|
1401 | | - print_cpu_stall_info(rsp, cpu); |
---|
1402 | | - ndetected++; |
---|
1403 | | - } |
---|
1404 | | - } |
---|
1405 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
1406 | | - } |
---|
1407 | | - |
---|
1408 | | - print_cpu_stall_info_end(); |
---|
1409 | | - for_each_possible_cpu(cpu) |
---|
1410 | | - totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, |
---|
1411 | | - cpu)->cblist); |
---|
1412 | | - pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", |
---|
1413 | | - smp_processor_id(), (long)(jiffies - rsp->gp_start), |
---|
1414 | | - (long)rcu_seq_current(&rsp->gp_seq), totqlen); |
---|
1415 | | - if (ndetected) { |
---|
1416 | | - rcu_dump_cpu_stacks(rsp); |
---|
1417 | | - |
---|
1418 | | - /* Complain about tasks blocking the grace period. */ |
---|
1419 | | - rcu_print_detail_task_stall(rsp); |
---|
1420 | | - } else { |
---|
1421 | | - if (rcu_seq_current(&rsp->gp_seq) != gp_seq) { |
---|
1422 | | - pr_err("INFO: Stall ended before state dump start\n"); |
---|
1423 | | - } else { |
---|
1424 | | - j = jiffies; |
---|
1425 | | - gpa = READ_ONCE(rsp->gp_activity); |
---|
1426 | | - pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", |
---|
1427 | | - rsp->name, j - gpa, j, gpa, |
---|
1428 | | - jiffies_till_next_fqs, |
---|
1429 | | - rcu_get_root(rsp)->qsmask); |
---|
1430 | | - /* In this case, the current CPU might be at fault. */ |
---|
1431 | | - sched_show_task(current); |
---|
1432 | | - } |
---|
1433 | | - } |
---|
1434 | | - /* Rewrite if needed in case of slow consoles. */ |
---|
1435 | | - if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) |
---|
1436 | | - WRITE_ONCE(rsp->jiffies_stall, |
---|
1437 | | - jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
---|
1438 | | - |
---|
1439 | | - rcu_check_gp_kthread_starvation(rsp); |
---|
1440 | | - |
---|
1441 | | - atomic_notifier_call_chain(&rcu_stall_notifier_list, 0, NULL); |
---|
1442 | | - |
---|
1443 | | - panic_on_rcu_stall(); |
---|
1444 | | - |
---|
1445 | | - force_quiescent_state(rsp); /* Kick them all. */ |
---|
1446 | | -} |
---|
1447 | | - |
---|
1448 | | -static void print_cpu_stall(struct rcu_state *rsp) |
---|
1449 | | -{ |
---|
1450 | | - int cpu; |
---|
1451 | | - unsigned long flags; |
---|
1452 | | - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
---|
1453 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
1454 | | - long totqlen = 0; |
---|
1455 | | - |
---|
1456 | | - /* Kick and suppress, if so configured. */ |
---|
1457 | | - rcu_stall_kick_kthreads(rsp); |
---|
1458 | | - if (rcu_cpu_stall_suppress) |
---|
1459 | | - return; |
---|
1460 | | - |
---|
1461 | | - /* |
---|
1462 | | - * OK, time to rat on ourselves... |
---|
1463 | | - * See Documentation/RCU/stallwarn.txt for info on how to debug |
---|
1464 | | - * RCU CPU stall warnings. |
---|
1465 | | - */ |
---|
1466 | | - pr_err("INFO: %s self-detected stall on CPU", rsp->name); |
---|
1467 | | - print_cpu_stall_info_begin(); |
---|
1468 | | - raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); |
---|
1469 | | - print_cpu_stall_info(rsp, smp_processor_id()); |
---|
1470 | | - raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); |
---|
1471 | | - print_cpu_stall_info_end(); |
---|
1472 | | - for_each_possible_cpu(cpu) |
---|
1473 | | - totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, |
---|
1474 | | - cpu)->cblist); |
---|
1475 | | - pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n", |
---|
1476 | | - jiffies - rsp->gp_start, |
---|
1477 | | - (long)rcu_seq_current(&rsp->gp_seq), totqlen); |
---|
1478 | | - |
---|
1479 | | - rcu_check_gp_kthread_starvation(rsp); |
---|
1480 | | - |
---|
1481 | | - rcu_dump_cpu_stacks(rsp); |
---|
1482 | | - |
---|
1483 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
1484 | | - /* Rewrite if needed in case of slow consoles. */ |
---|
1485 | | - if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) |
---|
1486 | | - WRITE_ONCE(rsp->jiffies_stall, |
---|
1487 | | - jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
---|
1488 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
1489 | | - |
---|
1490 | | - panic_on_rcu_stall(); |
---|
1491 | | - |
---|
1492 | | - /* |
---|
1493 | | - * Attempt to revive the RCU machinery by forcing a context switch. |
---|
1494 | | - * |
---|
1495 | | - * A context switch would normally allow the RCU state machine to make |
---|
1496 | | - * progress and it could be we're stuck in kernel space without context |
---|
1497 | | - * switches for an entirely unreasonable amount of time. |
---|
1498 | | - */ |
---|
1499 | | - resched_cpu(smp_processor_id()); |
---|
1500 | | -} |
---|
1501 | | - |
---|
1502 | | -static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) |
---|
1503 | | -{ |
---|
1504 | | - unsigned long gs1; |
---|
1505 | | - unsigned long gs2; |
---|
1506 | | - unsigned long gps; |
---|
1507 | | - unsigned long j; |
---|
1508 | | - unsigned long jn; |
---|
1509 | | - unsigned long js; |
---|
1510 | | - struct rcu_node *rnp; |
---|
1511 | | - |
---|
1512 | | - if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) || |
---|
1513 | | - !rcu_gp_in_progress(rsp)) |
---|
1514 | | - return; |
---|
1515 | | - rcu_stall_kick_kthreads(rsp); |
---|
1516 | | - j = jiffies; |
---|
1517 | | - |
---|
1518 | | - /* |
---|
1519 | | - * Lots of memory barriers to reject false positives. |
---|
1520 | | - * |
---|
1521 | | - * The idea is to pick up rsp->gp_seq, then rsp->jiffies_stall, |
---|
1522 | | - * then rsp->gp_start, and finally another copy of rsp->gp_seq. |
---|
1523 | | - * These values are updated in the opposite order with memory |
---|
1524 | | - * barriers (or equivalent) during grace-period initialization |
---|
1525 | | - * and cleanup. Now, a false positive can occur if we get an new |
---|
1526 | | - * value of rsp->gp_start and a old value of rsp->jiffies_stall. |
---|
1527 | | - * But given the memory barriers, the only way that this can happen |
---|
1528 | | - * is if one grace period ends and another starts between these |
---|
1529 | | - * two fetches. This is detected by comparing the second fetch |
---|
1530 | | - * of rsp->gp_seq with the previous fetch from rsp->gp_seq. |
---|
1531 | | - * |
---|
1532 | | - * Given this check, comparisons of jiffies, rsp->jiffies_stall, |
---|
1533 | | - * and rsp->gp_start suffice to forestall false positives. |
---|
1534 | | - */ |
---|
1535 | | - gs1 = READ_ONCE(rsp->gp_seq); |
---|
1536 | | - smp_rmb(); /* Pick up ->gp_seq first... */ |
---|
1537 | | - js = READ_ONCE(rsp->jiffies_stall); |
---|
1538 | | - smp_rmb(); /* ...then ->jiffies_stall before the rest... */ |
---|
1539 | | - gps = READ_ONCE(rsp->gp_start); |
---|
1540 | | - smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */ |
---|
1541 | | - gs2 = READ_ONCE(rsp->gp_seq); |
---|
1542 | | - if (gs1 != gs2 || |
---|
1543 | | - ULONG_CMP_LT(j, js) || |
---|
1544 | | - ULONG_CMP_GE(gps, js)) |
---|
1545 | | - return; /* No stall or GP completed since entering function. */ |
---|
1546 | | - rnp = rdp->mynode; |
---|
1547 | | - jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; |
---|
1548 | | - if (rcu_gp_in_progress(rsp) && |
---|
1549 | | - (READ_ONCE(rnp->qsmask) & rdp->grpmask) && |
---|
1550 | | - cmpxchg(&rsp->jiffies_stall, js, jn) == js) { |
---|
1551 | | - |
---|
1552 | | - /* We haven't checked in, so go dump stack. */ |
---|
1553 | | - print_cpu_stall(rsp); |
---|
1554 | | - |
---|
1555 | | - } else if (rcu_gp_in_progress(rsp) && |
---|
1556 | | - ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && |
---|
1557 | | - cmpxchg(&rsp->jiffies_stall, js, jn) == js) { |
---|
1558 | | - |
---|
1559 | | - /* They had a few time units to dump stack, so complain. */ |
---|
1560 | | - print_other_cpu_stall(rsp, gs2); |
---|
1561 | | - } |
---|
1562 | | -} |
---|
1563 | | - |
---|
1564 | | -/** |
---|
1565 | | - * rcu_cpu_stall_reset - prevent further stall warnings in current grace period |
---|
1566 | | - * |
---|
1567 | | - * Set the stall-warning timeout way off into the future, thus preventing |
---|
1568 | | - * any RCU CPU stall-warning messages from appearing in the current set of |
---|
1569 | | - * RCU grace periods. |
---|
1570 | | - * |
---|
1571 | | - * The caller must disable hard irqs. |
---|
1572 | | - */ |
---|
1573 | | -void rcu_cpu_stall_reset(void) |
---|
1574 | | -{ |
---|
1575 | | - struct rcu_state *rsp; |
---|
1576 | | - |
---|
1577 | | - for_each_rcu_flavor(rsp) |
---|
1578 | | - WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2); |
---|
1579 | | -} |
---|
1580 | | - |
---|
1581 | 1334 | /* Trace-event wrapper function for trace_rcu_future_grace_period. */ |
---|
1582 | 1335 | static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, |
---|
1583 | 1336 | unsigned long gp_seq_req, const char *s) |
---|
1584 | 1337 | { |
---|
1585 | | - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gp_seq, gp_seq_req, |
---|
1586 | | - rnp->level, rnp->grplo, rnp->grphi, s); |
---|
| 1338 | + trace_rcu_future_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), |
---|
| 1339 | + gp_seq_req, rnp->level, |
---|
| 1340 | + rnp->grplo, rnp->grphi, s); |
---|
1587 | 1341 | } |
---|
1588 | 1342 | |
---|
1589 | 1343 | /* |
---|
.. | .. |
---|
1606 | 1360 | unsigned long gp_seq_req) |
---|
1607 | 1361 | { |
---|
1608 | 1362 | bool ret = false; |
---|
1609 | | - struct rcu_state *rsp = rdp->rsp; |
---|
1610 | 1363 | struct rcu_node *rnp; |
---|
1611 | 1364 | |
---|
1612 | 1365 | /* |
---|
.. | .. |
---|
1631 | 1384 | TPS("Prestarted")); |
---|
1632 | 1385 | goto unlock_out; |
---|
1633 | 1386 | } |
---|
1634 | | - rnp->gp_seq_needed = gp_seq_req; |
---|
| 1387 | + WRITE_ONCE(rnp->gp_seq_needed, gp_seq_req); |
---|
1635 | 1388 | if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) { |
---|
1636 | 1389 | /* |
---|
1637 | 1390 | * We just marked the leaf or internal node, and a |
---|
.. | .. |
---|
1650 | 1403 | } |
---|
1651 | 1404 | |
---|
1652 | 1405 | /* If GP already in progress, just leave, otherwise start one. */ |
---|
1653 | | - if (rcu_gp_in_progress(rsp)) { |
---|
| 1406 | + if (rcu_gp_in_progress()) { |
---|
1654 | 1407 | trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot")); |
---|
1655 | 1408 | goto unlock_out; |
---|
1656 | 1409 | } |
---|
1657 | 1410 | trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot")); |
---|
1658 | | - WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT); |
---|
1659 | | - rsp->gp_req_activity = jiffies; |
---|
1660 | | - if (!rsp->gp_kthread) { |
---|
| 1411 | + WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags | RCU_GP_FLAG_INIT); |
---|
| 1412 | + WRITE_ONCE(rcu_state.gp_req_activity, jiffies); |
---|
| 1413 | + if (!READ_ONCE(rcu_state.gp_kthread)) { |
---|
1661 | 1414 | trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread")); |
---|
1662 | 1415 | goto unlock_out; |
---|
1663 | 1416 | } |
---|
1664 | | - trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq")); |
---|
| 1417 | + trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq")); |
---|
1665 | 1418 | ret = true; /* Caller must wake GP kthread. */ |
---|
1666 | 1419 | unlock_out: |
---|
1667 | 1420 | /* Push furthest requested GP to leaf node and rcu_data structure. */ |
---|
1668 | 1421 | if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) { |
---|
1669 | | - rnp_start->gp_seq_needed = rnp->gp_seq_needed; |
---|
1670 | | - rdp->gp_seq_needed = rnp->gp_seq_needed; |
---|
| 1422 | + WRITE_ONCE(rnp_start->gp_seq_needed, rnp->gp_seq_needed); |
---|
| 1423 | + WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); |
---|
1671 | 1424 | } |
---|
1672 | 1425 | if (rnp != rnp_start) |
---|
1673 | 1426 | raw_spin_unlock_rcu_node(rnp); |
---|
.. | .. |
---|
1678 | 1431 | * Clean up any old requests for the just-ended grace period. Also return |
---|
1679 | 1432 | * whether any additional grace periods have been requested. |
---|
1680 | 1433 | */ |
---|
1681 | | -static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) |
---|
| 1434 | +static bool rcu_future_gp_cleanup(struct rcu_node *rnp) |
---|
1682 | 1435 | { |
---|
1683 | 1436 | bool needmore; |
---|
1684 | | - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
---|
| 1437 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
1685 | 1438 | |
---|
1686 | 1439 | needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed); |
---|
1687 | 1440 | if (!needmore) |
---|
.. | .. |
---|
1692 | 1445 | } |
---|
1693 | 1446 | |
---|
1694 | 1447 | /* |
---|
1695 | | - * Awaken the grace-period kthread. Don't do a self-awaken (unless in |
---|
1696 | | - * an interrupt or softirq handler), and don't bother awakening when there |
---|
1697 | | - * is nothing for the grace-period kthread to do (as in several CPUs raced |
---|
1698 | | - * to awaken, and we lost), and finally don't try to awaken a kthread that |
---|
1699 | | - * has not yet been created. If all those checks are passed, track some |
---|
1700 | | - * debug information and awaken. |
---|
| 1448 | + * Awaken the grace-period kthread. Don't do a self-awaken (unless in an |
---|
| 1449 | + * interrupt or softirq handler, in which case we just might immediately |
---|
| 1450 | + * sleep upon return, resulting in a grace-period hang), and don't bother |
---|
| 1451 | + * awakening when there is nothing for the grace-period kthread to do |
---|
| 1452 | + * (as in several CPUs raced to awaken, we lost), and finally don't try |
---|
| 1453 | + * to awaken a kthread that has not yet been created. If all those checks |
---|
| 1454 | + * are passed, track some debug information and awaken. |
---|
1701 | 1455 | * |
---|
1702 | 1456 | * So why do the self-wakeup when in an interrupt or softirq handler |
---|
1703 | 1457 | * in the grace-period kthread's context? Because the kthread might have |
---|
.. | .. |
---|
1705 | 1459 | * pre-sleep check of the awaken condition. In this case, a wakeup really |
---|
1706 | 1460 | * is required, and is therefore supplied. |
---|
1707 | 1461 | */ |
---|
1708 | | -static void rcu_gp_kthread_wake(struct rcu_state *rsp) |
---|
| 1462 | +static void rcu_gp_kthread_wake(void) |
---|
1709 | 1463 | { |
---|
1710 | | - if ((current == rsp->gp_kthread && |
---|
1711 | | - !in_interrupt() && !in_serving_softirq()) || |
---|
1712 | | - !READ_ONCE(rsp->gp_flags) || |
---|
1713 | | - !rsp->gp_kthread) |
---|
| 1464 | + struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); |
---|
| 1465 | + |
---|
| 1466 | + if ((current == t && !in_irq() && !in_serving_softirq()) || |
---|
| 1467 | + !READ_ONCE(rcu_state.gp_flags) || !t) |
---|
1714 | 1468 | return; |
---|
1715 | | - swake_up_one(&rsp->gp_wq); |
---|
| 1469 | + WRITE_ONCE(rcu_state.gp_wake_time, jiffies); |
---|
| 1470 | + WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); |
---|
| 1471 | + swake_up_one(&rcu_state.gp_wq); |
---|
1716 | 1472 | } |
---|
1717 | 1473 | |
---|
1718 | 1474 | /* |
---|
.. | .. |
---|
1727 | 1483 | * |
---|
1728 | 1484 | * The caller must hold rnp->lock with interrupts disabled. |
---|
1729 | 1485 | */ |
---|
1730 | | -static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, |
---|
1731 | | - struct rcu_data *rdp) |
---|
| 1486 | +static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp) |
---|
1732 | 1487 | { |
---|
1733 | 1488 | unsigned long gp_seq_req; |
---|
1734 | 1489 | bool ret = false; |
---|
1735 | 1490 | |
---|
| 1491 | + rcu_lockdep_assert_cblist_protected(rdp); |
---|
1736 | 1492 | raw_lockdep_assert_held_rcu_node(rnp); |
---|
1737 | 1493 | |
---|
1738 | 1494 | /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ |
---|
.. | .. |
---|
1749 | 1505 | * accelerating callback invocation to an earlier grace-period |
---|
1750 | 1506 | * number. |
---|
1751 | 1507 | */ |
---|
1752 | | - gp_seq_req = rcu_seq_snap(&rsp->gp_seq); |
---|
| 1508 | + gp_seq_req = rcu_seq_snap(&rcu_state.gp_seq); |
---|
1753 | 1509 | if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req)) |
---|
1754 | 1510 | ret = rcu_start_this_gp(rnp, rdp, gp_seq_req); |
---|
1755 | 1511 | |
---|
1756 | 1512 | /* Trace depending on how much we were able to accelerate. */ |
---|
1757 | 1513 | if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) |
---|
1758 | | - trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccWaitCB")); |
---|
| 1514 | + trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccWaitCB")); |
---|
1759 | 1515 | else |
---|
1760 | | - trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccReadyCB")); |
---|
| 1516 | + trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB")); |
---|
| 1517 | + |
---|
1761 | 1518 | return ret; |
---|
1762 | 1519 | } |
---|
1763 | 1520 | |
---|
.. | .. |
---|
1768 | 1525 | * that a new grace-period request be made, invokes rcu_accelerate_cbs() |
---|
1769 | 1526 | * while holding the leaf rcu_node structure's ->lock. |
---|
1770 | 1527 | */ |
---|
1771 | | -static void rcu_accelerate_cbs_unlocked(struct rcu_state *rsp, |
---|
1772 | | - struct rcu_node *rnp, |
---|
| 1528 | +static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp, |
---|
1773 | 1529 | struct rcu_data *rdp) |
---|
1774 | 1530 | { |
---|
1775 | 1531 | unsigned long c; |
---|
1776 | 1532 | bool needwake; |
---|
1777 | 1533 | |
---|
1778 | | - lockdep_assert_irqs_disabled(); |
---|
1779 | | - c = rcu_seq_snap(&rsp->gp_seq); |
---|
1780 | | - if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { |
---|
| 1534 | + rcu_lockdep_assert_cblist_protected(rdp); |
---|
| 1535 | + c = rcu_seq_snap(&rcu_state.gp_seq); |
---|
| 1536 | + if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { |
---|
1781 | 1537 | /* Old request still live, so mark recent callbacks. */ |
---|
1782 | 1538 | (void)rcu_segcblist_accelerate(&rdp->cblist, c); |
---|
1783 | 1539 | return; |
---|
1784 | 1540 | } |
---|
1785 | 1541 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ |
---|
1786 | | - needwake = rcu_accelerate_cbs(rsp, rnp, rdp); |
---|
| 1542 | + needwake = rcu_accelerate_cbs(rnp, rdp); |
---|
1787 | 1543 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
---|
1788 | 1544 | if (needwake) |
---|
1789 | | - rcu_gp_kthread_wake(rsp); |
---|
| 1545 | + rcu_gp_kthread_wake(); |
---|
1790 | 1546 | } |
---|
1791 | 1547 | |
---|
1792 | 1548 | /* |
---|
.. | .. |
---|
1799 | 1555 | * |
---|
1800 | 1556 | * The caller must hold rnp->lock with interrupts disabled. |
---|
1801 | 1557 | */ |
---|
1802 | | -static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, |
---|
1803 | | - struct rcu_data *rdp) |
---|
| 1558 | +static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp) |
---|
1804 | 1559 | { |
---|
| 1560 | + rcu_lockdep_assert_cblist_protected(rdp); |
---|
1805 | 1561 | raw_lockdep_assert_held_rcu_node(rnp); |
---|
1806 | 1562 | |
---|
1807 | 1563 | /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ |
---|
.. | .. |
---|
1815 | 1571 | rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq); |
---|
1816 | 1572 | |
---|
1817 | 1573 | /* Classify any remaining callbacks. */ |
---|
1818 | | - return rcu_accelerate_cbs(rsp, rnp, rdp); |
---|
| 1574 | + return rcu_accelerate_cbs(rnp, rdp); |
---|
| 1575 | +} |
---|
| 1576 | + |
---|
| 1577 | +/* |
---|
| 1578 | + * Move and classify callbacks, but only if doing so won't require |
---|
| 1579 | + * that the RCU grace-period kthread be awakened. |
---|
| 1580 | + */ |
---|
| 1581 | +static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp, |
---|
| 1582 | + struct rcu_data *rdp) |
---|
| 1583 | +{ |
---|
| 1584 | + rcu_lockdep_assert_cblist_protected(rdp); |
---|
| 1585 | + if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp)) |
---|
| 1586 | + return; |
---|
| 1587 | + // The grace period cannot end while we hold the rcu_node lock. |
---|
| 1588 | + if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) |
---|
| 1589 | + WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); |
---|
| 1590 | + raw_spin_unlock_rcu_node(rnp); |
---|
| 1591 | +} |
---|
| 1592 | + |
---|
| 1593 | +/* |
---|
| 1594 | + * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a |
---|
| 1595 | + * quiescent state. This is intended to be invoked when the CPU notices |
---|
| 1596 | + * a new grace period. |
---|
| 1597 | + */ |
---|
| 1598 | +static void rcu_strict_gp_check_qs(void) |
---|
| 1599 | +{ |
---|
| 1600 | + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) { |
---|
| 1601 | + rcu_read_lock(); |
---|
| 1602 | + rcu_read_unlock(); |
---|
| 1603 | + } |
---|
1819 | 1604 | } |
---|
1820 | 1605 | |
---|
1821 | 1606 | /* |
---|
.. | .. |
---|
1824 | 1609 | * structure corresponding to the current CPU, and must have irqs disabled. |
---|
1825 | 1610 | * Returns true if the grace-period kthread needs to be awakened. |
---|
1826 | 1611 | */ |
---|
1827 | | -static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, |
---|
1828 | | - struct rcu_data *rdp) |
---|
| 1612 | +static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) |
---|
1829 | 1613 | { |
---|
1830 | | - bool ret; |
---|
1831 | | - bool need_gp; |
---|
| 1614 | + bool ret = false; |
---|
| 1615 | + bool need_qs; |
---|
| 1616 | + const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && |
---|
| 1617 | + rcu_segcblist_is_offloaded(&rdp->cblist); |
---|
1832 | 1618 | |
---|
1833 | 1619 | raw_lockdep_assert_held_rcu_node(rnp); |
---|
1834 | 1620 | |
---|
.. | .. |
---|
1838 | 1624 | /* Handle the ends of any preceding grace periods first. */ |
---|
1839 | 1625 | if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) || |
---|
1840 | 1626 | unlikely(READ_ONCE(rdp->gpwrap))) { |
---|
1841 | | - ret = rcu_advance_cbs(rsp, rnp, rdp); /* Advance callbacks. */ |
---|
1842 | | - trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuend")); |
---|
| 1627 | + if (!offloaded) |
---|
| 1628 | + ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */ |
---|
| 1629 | + rdp->core_needs_qs = false; |
---|
| 1630 | + trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend")); |
---|
1843 | 1631 | } else { |
---|
1844 | | - ret = rcu_accelerate_cbs(rsp, rnp, rdp); /* Recent callbacks. */ |
---|
| 1632 | + if (!offloaded) |
---|
| 1633 | + ret = rcu_accelerate_cbs(rnp, rdp); /* Recent CBs. */ |
---|
| 1634 | + if (rdp->core_needs_qs) |
---|
| 1635 | + rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); |
---|
1845 | 1636 | } |
---|
1846 | 1637 | |
---|
1847 | 1638 | /* Now handle the beginnings of any new-to-this-CPU grace periods. */ |
---|
.. | .. |
---|
1852 | 1643 | * set up to detect a quiescent state, otherwise don't |
---|
1853 | 1644 | * go looking for one. |
---|
1854 | 1645 | */ |
---|
1855 | | - trace_rcu_grace_period(rsp->name, rnp->gp_seq, TPS("cpustart")); |
---|
1856 | | - need_gp = !!(rnp->qsmask & rdp->grpmask); |
---|
1857 | | - rdp->cpu_no_qs.b.norm = need_gp; |
---|
1858 | | - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); |
---|
1859 | | - rdp->core_needs_qs = need_gp; |
---|
| 1646 | + trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart")); |
---|
| 1647 | + need_qs = !!(rnp->qsmask & rdp->grpmask); |
---|
| 1648 | + rdp->cpu_no_qs.b.norm = need_qs; |
---|
| 1649 | + rdp->core_needs_qs = need_qs; |
---|
1860 | 1650 | zero_cpu_stall_ticks(rdp); |
---|
1861 | 1651 | } |
---|
1862 | 1652 | rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ |
---|
1863 | | - if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap) |
---|
1864 | | - rdp->gp_seq_needed = rnp->gp_seq_needed; |
---|
| 1653 | + if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap) |
---|
| 1654 | + WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); |
---|
1865 | 1655 | WRITE_ONCE(rdp->gpwrap, false); |
---|
1866 | 1656 | rcu_gpnum_ovf(rnp, rdp); |
---|
1867 | 1657 | return ret; |
---|
1868 | 1658 | } |
---|
1869 | 1659 | |
---|
1870 | | -static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) |
---|
| 1660 | +static void note_gp_changes(struct rcu_data *rdp) |
---|
1871 | 1661 | { |
---|
1872 | 1662 | unsigned long flags; |
---|
1873 | 1663 | bool needwake; |
---|
.. | .. |
---|
1881 | 1671 | local_irq_restore(flags); |
---|
1882 | 1672 | return; |
---|
1883 | 1673 | } |
---|
1884 | | - needwake = __note_gp_changes(rsp, rnp, rdp); |
---|
| 1674 | + needwake = __note_gp_changes(rnp, rdp); |
---|
1885 | 1675 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
| 1676 | + rcu_strict_gp_check_qs(); |
---|
1886 | 1677 | if (needwake) |
---|
1887 | | - rcu_gp_kthread_wake(rsp); |
---|
| 1678 | + rcu_gp_kthread_wake(); |
---|
1888 | 1679 | } |
---|
1889 | 1680 | |
---|
1890 | | -static void rcu_gp_slow(struct rcu_state *rsp, int delay) |
---|
| 1681 | +static void rcu_gp_slow(int delay) |
---|
1891 | 1682 | { |
---|
1892 | 1683 | if (delay > 0 && |
---|
1893 | | - !(rcu_seq_ctr(rsp->gp_seq) % |
---|
| 1684 | + !(rcu_seq_ctr(rcu_state.gp_seq) % |
---|
1894 | 1685 | (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) |
---|
1895 | | - schedule_timeout_uninterruptible(delay); |
---|
| 1686 | + schedule_timeout_idle(delay); |
---|
| 1687 | +} |
---|
| 1688 | + |
---|
| 1689 | +static unsigned long sleep_duration; |
---|
| 1690 | + |
---|
| 1691 | +/* Allow rcutorture to stall the grace-period kthread. */ |
---|
| 1692 | +void rcu_gp_set_torture_wait(int duration) |
---|
| 1693 | +{ |
---|
| 1694 | + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0) |
---|
| 1695 | + WRITE_ONCE(sleep_duration, duration); |
---|
| 1696 | +} |
---|
| 1697 | +EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait); |
---|
| 1698 | + |
---|
| 1699 | +/* Actually implement the aforementioned wait. */ |
---|
| 1700 | +static void rcu_gp_torture_wait(void) |
---|
| 1701 | +{ |
---|
| 1702 | + unsigned long duration; |
---|
| 1703 | + |
---|
| 1704 | + if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST)) |
---|
| 1705 | + return; |
---|
| 1706 | + duration = xchg(&sleep_duration, 0UL); |
---|
| 1707 | + if (duration > 0) { |
---|
| 1708 | + pr_alert("%s: Waiting %lu jiffies\n", __func__, duration); |
---|
| 1709 | + schedule_timeout_idle(duration); |
---|
| 1710 | + pr_alert("%s: Wait complete\n", __func__); |
---|
| 1711 | + } |
---|
| 1712 | +} |
---|
| 1713 | + |
---|
| 1714 | +/* |
---|
| 1715 | + * Handler for on_each_cpu() to invoke the target CPU's RCU core |
---|
| 1716 | + * processing. |
---|
| 1717 | + */ |
---|
| 1718 | +static void rcu_strict_gp_boundary(void *unused) |
---|
| 1719 | +{ |
---|
| 1720 | + invoke_rcu_core(); |
---|
1896 | 1721 | } |
---|
1897 | 1722 | |
---|
1898 | 1723 | /* |
---|
1899 | 1724 | * Initialize a new grace period. Return false if no grace period required. |
---|
1900 | 1725 | */ |
---|
1901 | | -static bool rcu_gp_init(struct rcu_state *rsp) |
---|
| 1726 | +static bool rcu_gp_init(void) |
---|
1902 | 1727 | { |
---|
| 1728 | + unsigned long firstseq; |
---|
1903 | 1729 | unsigned long flags; |
---|
1904 | 1730 | unsigned long oldmask; |
---|
1905 | 1731 | unsigned long mask; |
---|
1906 | 1732 | struct rcu_data *rdp; |
---|
1907 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
| 1733 | + struct rcu_node *rnp = rcu_get_root(); |
---|
1908 | 1734 | |
---|
1909 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
| 1735 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
1910 | 1736 | raw_spin_lock_irq_rcu_node(rnp); |
---|
1911 | | - if (!READ_ONCE(rsp->gp_flags)) { |
---|
| 1737 | + if (!READ_ONCE(rcu_state.gp_flags)) { |
---|
1912 | 1738 | /* Spurious wakeup, tell caller to go back to sleep. */ |
---|
1913 | 1739 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
1914 | 1740 | return false; |
---|
1915 | 1741 | } |
---|
1916 | | - WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */ |
---|
| 1742 | + WRITE_ONCE(rcu_state.gp_flags, 0); /* Clear all flags: New GP. */ |
---|
1917 | 1743 | |
---|
1918 | | - if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { |
---|
| 1744 | + if (WARN_ON_ONCE(rcu_gp_in_progress())) { |
---|
1919 | 1745 | /* |
---|
1920 | 1746 | * Grace period already in progress, don't start another. |
---|
1921 | 1747 | * Not supposed to be able to happen. |
---|
.. | .. |
---|
1925 | 1751 | } |
---|
1926 | 1752 | |
---|
1927 | 1753 | /* Advance to a new grace period and initialize state. */ |
---|
1928 | | - record_gp_stall_check_time(rsp); |
---|
| 1754 | + record_gp_stall_check_time(); |
---|
1929 | 1755 | /* Record GP times before starting GP, hence rcu_seq_start(). */ |
---|
1930 | | - rcu_seq_start(&rsp->gp_seq); |
---|
1931 | | - trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("start")); |
---|
| 1756 | + rcu_seq_start(&rcu_state.gp_seq); |
---|
| 1757 | + ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq); |
---|
| 1758 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start")); |
---|
1932 | 1759 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
1933 | 1760 | |
---|
1934 | 1761 | /* |
---|
1935 | | - * Apply per-leaf buffered online and offline operations to the |
---|
1936 | | - * rcu_node tree. Note that this new grace period need not wait |
---|
1937 | | - * for subsequent online CPUs, and that quiescent-state forcing |
---|
1938 | | - * will handle subsequent offline CPUs. |
---|
| 1762 | + * Apply per-leaf buffered online and offline operations to |
---|
| 1763 | + * the rcu_node tree. Note that this new grace period need not |
---|
| 1764 | + * wait for subsequent online CPUs, and that RCU hooks in the CPU |
---|
| 1765 | + * offlining path, when combined with checks in this function, |
---|
| 1766 | + * will handle CPUs that are currently going offline or that will |
---|
| 1767 | + * go offline later. Please also refer to "Hotplug CPU" section |
---|
| 1768 | + * of RCU's Requirements documentation. |
---|
1939 | 1769 | */ |
---|
1940 | | - rsp->gp_state = RCU_GP_ONOFF; |
---|
1941 | | - rcu_for_each_leaf_node(rsp, rnp) { |
---|
1942 | | - spin_lock(&rsp->ofl_lock); |
---|
| 1770 | + rcu_state.gp_state = RCU_GP_ONOFF; |
---|
| 1771 | + rcu_for_each_leaf_node(rnp) { |
---|
| 1772 | + smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values. |
---|
| 1773 | + firstseq = READ_ONCE(rnp->ofl_seq); |
---|
| 1774 | + if (firstseq & 0x1) |
---|
| 1775 | + while (firstseq == READ_ONCE(rnp->ofl_seq)) |
---|
| 1776 | + schedule_timeout_idle(1); // Can't wake unless RCU is watching. |
---|
| 1777 | + smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values. |
---|
| 1778 | + raw_spin_lock(&rcu_state.ofl_lock); |
---|
1943 | 1779 | raw_spin_lock_irq_rcu_node(rnp); |
---|
1944 | 1780 | if (rnp->qsmaskinit == rnp->qsmaskinitnext && |
---|
1945 | 1781 | !rnp->wait_blkd_tasks) { |
---|
1946 | 1782 | /* Nothing to do on this leaf rcu_node structure. */ |
---|
1947 | 1783 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
1948 | | - spin_unlock(&rsp->ofl_lock); |
---|
| 1784 | + raw_spin_unlock(&rcu_state.ofl_lock); |
---|
1949 | 1785 | continue; |
---|
1950 | 1786 | } |
---|
1951 | 1787 | |
---|
.. | .. |
---|
1981 | 1817 | } |
---|
1982 | 1818 | |
---|
1983 | 1819 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
1984 | | - spin_unlock(&rsp->ofl_lock); |
---|
| 1820 | + raw_spin_unlock(&rcu_state.ofl_lock); |
---|
1985 | 1821 | } |
---|
1986 | | - rcu_gp_slow(rsp, gp_preinit_delay); /* Races with CPU hotplug. */ |
---|
| 1822 | + rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */ |
---|
1987 | 1823 | |
---|
1988 | 1824 | /* |
---|
1989 | 1825 | * Set the quiescent-state-needed bits in all the rcu_node |
---|
1990 | | - * structures for all currently online CPUs in breadth-first order, |
---|
1991 | | - * starting from the root rcu_node structure, relying on the layout |
---|
1992 | | - * of the tree within the rsp->node[] array. Note that other CPUs |
---|
1993 | | - * will access only the leaves of the hierarchy, thus seeing that no |
---|
1994 | | - * grace period is in progress, at least until the corresponding |
---|
1995 | | - * leaf node has been initialized. |
---|
| 1826 | + * structures for all currently online CPUs in breadth-first |
---|
| 1827 | + * order, starting from the root rcu_node structure, relying on the |
---|
| 1828 | + * layout of the tree within the rcu_state.node[] array. Note that |
---|
| 1829 | + * other CPUs will access only the leaves of the hierarchy, thus |
---|
| 1830 | + * seeing that no grace period is in progress, at least until the |
---|
| 1831 | + * corresponding leaf node has been initialized. |
---|
1996 | 1832 | * |
---|
1997 | 1833 | * The grace period cannot complete until the initialization |
---|
1998 | 1834 | * process finishes, because this kthread handles both. |
---|
1999 | 1835 | */ |
---|
2000 | | - rsp->gp_state = RCU_GP_INIT; |
---|
2001 | | - rcu_for_each_node_breadth_first(rsp, rnp) { |
---|
2002 | | - rcu_gp_slow(rsp, gp_init_delay); |
---|
| 1836 | + rcu_state.gp_state = RCU_GP_INIT; |
---|
| 1837 | + rcu_for_each_node_breadth_first(rnp) { |
---|
| 1838 | + rcu_gp_slow(gp_init_delay); |
---|
2003 | 1839 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
2004 | | - rdp = this_cpu_ptr(rsp->rda); |
---|
2005 | | - rcu_preempt_check_blocked_tasks(rsp, rnp); |
---|
| 1840 | + rdp = this_cpu_ptr(&rcu_data); |
---|
| 1841 | + rcu_preempt_check_blocked_tasks(rnp); |
---|
2006 | 1842 | rnp->qsmask = rnp->qsmaskinit; |
---|
2007 | | - WRITE_ONCE(rnp->gp_seq, rsp->gp_seq); |
---|
| 1843 | + WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq); |
---|
2008 | 1844 | if (rnp == rdp->mynode) |
---|
2009 | | - (void)__note_gp_changes(rsp, rnp, rdp); |
---|
| 1845 | + (void)__note_gp_changes(rnp, rdp); |
---|
2010 | 1846 | rcu_preempt_boost_start_gp(rnp); |
---|
2011 | | - trace_rcu_grace_period_init(rsp->name, rnp->gp_seq, |
---|
| 1847 | + trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq, |
---|
2012 | 1848 | rnp->level, rnp->grplo, |
---|
2013 | 1849 | rnp->grphi, rnp->qsmask); |
---|
2014 | 1850 | /* Quiescent states for tasks on any now-offline CPUs. */ |
---|
2015 | 1851 | mask = rnp->qsmask & ~rnp->qsmaskinitnext; |
---|
2016 | 1852 | rnp->rcu_gp_init_mask = mask; |
---|
2017 | 1853 | if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp)) |
---|
2018 | | - rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); |
---|
| 1854 | + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); |
---|
2019 | 1855 | else |
---|
2020 | 1856 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
2021 | 1857 | cond_resched_tasks_rcu_qs(); |
---|
2022 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
| 1858 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
2023 | 1859 | } |
---|
| 1860 | + |
---|
| 1861 | + // If strict, make all CPUs aware of new grace period. |
---|
| 1862 | + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) |
---|
| 1863 | + on_each_cpu(rcu_strict_gp_boundary, NULL, 0); |
---|
2024 | 1864 | |
---|
2025 | 1865 | return true; |
---|
2026 | 1866 | } |
---|
.. | .. |
---|
2029 | 1869 | * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state |
---|
2030 | 1870 | * time. |
---|
2031 | 1871 | */ |
---|
2032 | | -static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) |
---|
| 1872 | +static bool rcu_gp_fqs_check_wake(int *gfp) |
---|
2033 | 1873 | { |
---|
2034 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
| 1874 | + struct rcu_node *rnp = rcu_get_root(); |
---|
2035 | 1875 | |
---|
2036 | | - /* Someone like call_rcu() requested a force-quiescent-state scan. */ |
---|
2037 | | - *gfp = READ_ONCE(rsp->gp_flags); |
---|
| 1876 | + // If under overload conditions, force an immediate FQS scan. |
---|
| 1877 | + if (*gfp & RCU_GP_FLAG_OVLD) |
---|
| 1878 | + return true; |
---|
| 1879 | + |
---|
| 1880 | + // Someone like call_rcu() requested a force-quiescent-state scan. |
---|
| 1881 | + *gfp = READ_ONCE(rcu_state.gp_flags); |
---|
2038 | 1882 | if (*gfp & RCU_GP_FLAG_FQS) |
---|
2039 | 1883 | return true; |
---|
2040 | 1884 | |
---|
2041 | | - /* The current grace period has completed. */ |
---|
| 1885 | + // The current grace period has completed. |
---|
2042 | 1886 | if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)) |
---|
2043 | 1887 | return true; |
---|
2044 | 1888 | |
---|
.. | .. |
---|
2048 | 1892 | /* |
---|
2049 | 1893 | * Do one round of quiescent-state forcing. |
---|
2050 | 1894 | */ |
---|
2051 | | -static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time) |
---|
| 1895 | +static void rcu_gp_fqs(bool first_time) |
---|
2052 | 1896 | { |
---|
2053 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
| 1897 | + struct rcu_node *rnp = rcu_get_root(); |
---|
2054 | 1898 | |
---|
2055 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
2056 | | - rsp->n_force_qs++; |
---|
| 1899 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
| 1900 | + WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1); |
---|
2057 | 1901 | if (first_time) { |
---|
2058 | 1902 | /* Collect dyntick-idle snapshots. */ |
---|
2059 | | - force_qs_rnp(rsp, dyntick_save_progress_counter); |
---|
| 1903 | + force_qs_rnp(dyntick_save_progress_counter); |
---|
2060 | 1904 | } else { |
---|
2061 | 1905 | /* Handle dyntick-idle and offline CPUs. */ |
---|
2062 | | - force_qs_rnp(rsp, rcu_implicit_dynticks_qs); |
---|
| 1906 | + force_qs_rnp(rcu_implicit_dynticks_qs); |
---|
2063 | 1907 | } |
---|
2064 | 1908 | /* Clear flag to prevent immediate re-entry. */ |
---|
2065 | | - if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { |
---|
| 1909 | + if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) { |
---|
2066 | 1910 | raw_spin_lock_irq_rcu_node(rnp); |
---|
2067 | | - WRITE_ONCE(rsp->gp_flags, |
---|
2068 | | - READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS); |
---|
| 1911 | + WRITE_ONCE(rcu_state.gp_flags, |
---|
| 1912 | + READ_ONCE(rcu_state.gp_flags) & ~RCU_GP_FLAG_FQS); |
---|
2069 | 1913 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
| 1914 | + } |
---|
| 1915 | +} |
---|
| 1916 | + |
---|
| 1917 | +/* |
---|
| 1918 | + * Loop doing repeated quiescent-state forcing until the grace period ends. |
---|
| 1919 | + */ |
---|
| 1920 | +static void rcu_gp_fqs_loop(void) |
---|
| 1921 | +{ |
---|
| 1922 | + bool first_gp_fqs; |
---|
| 1923 | + int gf = 0; |
---|
| 1924 | + unsigned long j; |
---|
| 1925 | + int ret; |
---|
| 1926 | + struct rcu_node *rnp = rcu_get_root(); |
---|
| 1927 | + |
---|
| 1928 | + first_gp_fqs = true; |
---|
| 1929 | + j = READ_ONCE(jiffies_till_first_fqs); |
---|
| 1930 | + if (rcu_state.cbovld) |
---|
| 1931 | + gf = RCU_GP_FLAG_OVLD; |
---|
| 1932 | + ret = 0; |
---|
| 1933 | + for (;;) { |
---|
| 1934 | + if (!ret) { |
---|
| 1935 | + rcu_state.jiffies_force_qs = jiffies + j; |
---|
| 1936 | + WRITE_ONCE(rcu_state.jiffies_kick_kthreads, |
---|
| 1937 | + jiffies + (j ? 3 * j : 2)); |
---|
| 1938 | + } |
---|
| 1939 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, |
---|
| 1940 | + TPS("fqswait")); |
---|
| 1941 | + rcu_state.gp_state = RCU_GP_WAIT_FQS; |
---|
| 1942 | + ret = swait_event_idle_timeout_exclusive( |
---|
| 1943 | + rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j); |
---|
| 1944 | + rcu_gp_torture_wait(); |
---|
| 1945 | + rcu_state.gp_state = RCU_GP_DOING_FQS; |
---|
| 1946 | + /* Locking provides needed memory barriers. */ |
---|
| 1947 | + /* If grace period done, leave loop. */ |
---|
| 1948 | + if (!READ_ONCE(rnp->qsmask) && |
---|
| 1949 | + !rcu_preempt_blocked_readers_cgp(rnp)) |
---|
| 1950 | + break; |
---|
| 1951 | + /* If time for quiescent-state forcing, do it. */ |
---|
| 1952 | + if (!time_after(rcu_state.jiffies_force_qs, jiffies) || |
---|
| 1953 | + (gf & (RCU_GP_FLAG_FQS | RCU_GP_FLAG_OVLD))) { |
---|
| 1954 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, |
---|
| 1955 | + TPS("fqsstart")); |
---|
| 1956 | + rcu_gp_fqs(first_gp_fqs); |
---|
| 1957 | + gf = 0; |
---|
| 1958 | + if (first_gp_fqs) { |
---|
| 1959 | + first_gp_fqs = false; |
---|
| 1960 | + gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0; |
---|
| 1961 | + } |
---|
| 1962 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, |
---|
| 1963 | + TPS("fqsend")); |
---|
| 1964 | + cond_resched_tasks_rcu_qs(); |
---|
| 1965 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
| 1966 | + ret = 0; /* Force full wait till next FQS. */ |
---|
| 1967 | + j = READ_ONCE(jiffies_till_next_fqs); |
---|
| 1968 | + } else { |
---|
| 1969 | + /* Deal with stray signal. */ |
---|
| 1970 | + cond_resched_tasks_rcu_qs(); |
---|
| 1971 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
| 1972 | + WARN_ON(signal_pending(current)); |
---|
| 1973 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, |
---|
| 1974 | + TPS("fqswaitsig")); |
---|
| 1975 | + ret = 1; /* Keep old FQS timing. */ |
---|
| 1976 | + j = jiffies; |
---|
| 1977 | + if (time_after(jiffies, rcu_state.jiffies_force_qs)) |
---|
| 1978 | + j = 1; |
---|
| 1979 | + else |
---|
| 1980 | + j = rcu_state.jiffies_force_qs - j; |
---|
| 1981 | + gf = 0; |
---|
| 1982 | + } |
---|
2070 | 1983 | } |
---|
2071 | 1984 | } |
---|
2072 | 1985 | |
---|
2073 | 1986 | /* |
---|
2074 | 1987 | * Clean up after the old grace period. |
---|
2075 | 1988 | */ |
---|
2076 | | -static void rcu_gp_cleanup(struct rcu_state *rsp) |
---|
| 1989 | +static void rcu_gp_cleanup(void) |
---|
2077 | 1990 | { |
---|
2078 | | - unsigned long gp_duration; |
---|
| 1991 | + int cpu; |
---|
2079 | 1992 | bool needgp = false; |
---|
| 1993 | + unsigned long gp_duration; |
---|
2080 | 1994 | unsigned long new_gp_seq; |
---|
| 1995 | + bool offloaded; |
---|
2081 | 1996 | struct rcu_data *rdp; |
---|
2082 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
| 1997 | + struct rcu_node *rnp = rcu_get_root(); |
---|
2083 | 1998 | struct swait_queue_head *sq; |
---|
2084 | 1999 | |
---|
2085 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
| 2000 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
2086 | 2001 | raw_spin_lock_irq_rcu_node(rnp); |
---|
2087 | | - gp_duration = jiffies - rsp->gp_start; |
---|
2088 | | - if (gp_duration > rsp->gp_max) |
---|
2089 | | - rsp->gp_max = gp_duration; |
---|
| 2002 | + rcu_state.gp_end = jiffies; |
---|
| 2003 | + gp_duration = rcu_state.gp_end - rcu_state.gp_start; |
---|
| 2004 | + if (gp_duration > rcu_state.gp_max) |
---|
| 2005 | + rcu_state.gp_max = gp_duration; |
---|
2090 | 2006 | |
---|
2091 | 2007 | /* |
---|
2092 | 2008 | * We know the grace period is complete, but to everyone else |
---|
.. | .. |
---|
2107 | 2023 | * the rcu_node structures before the beginning of the next grace |
---|
2108 | 2024 | * period is recorded in any of the rcu_node structures. |
---|
2109 | 2025 | */ |
---|
2110 | | - new_gp_seq = rsp->gp_seq; |
---|
| 2026 | + new_gp_seq = rcu_state.gp_seq; |
---|
2111 | 2027 | rcu_seq_end(&new_gp_seq); |
---|
2112 | | - rcu_for_each_node_breadth_first(rsp, rnp) { |
---|
| 2028 | + rcu_for_each_node_breadth_first(rnp) { |
---|
2113 | 2029 | raw_spin_lock_irq_rcu_node(rnp); |
---|
2114 | 2030 | if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) |
---|
2115 | | - dump_blkd_tasks(rsp, rnp, 10); |
---|
| 2031 | + dump_blkd_tasks(rnp, 10); |
---|
2116 | 2032 | WARN_ON_ONCE(rnp->qsmask); |
---|
2117 | 2033 | WRITE_ONCE(rnp->gp_seq, new_gp_seq); |
---|
2118 | | - rdp = this_cpu_ptr(rsp->rda); |
---|
| 2034 | + rdp = this_cpu_ptr(&rcu_data); |
---|
2119 | 2035 | if (rnp == rdp->mynode) |
---|
2120 | | - needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; |
---|
| 2036 | + needgp = __note_gp_changes(rnp, rdp) || needgp; |
---|
2121 | 2037 | /* smp_mb() provided by prior unlock-lock pair. */ |
---|
2122 | | - needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp; |
---|
| 2038 | + needgp = rcu_future_gp_cleanup(rnp) || needgp; |
---|
| 2039 | + // Reset overload indication for CPUs no longer overloaded |
---|
| 2040 | + if (rcu_is_leaf_node(rnp)) |
---|
| 2041 | + for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) { |
---|
| 2042 | + rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 2043 | + check_cb_ovld_locked(rdp, rnp); |
---|
| 2044 | + } |
---|
2123 | 2045 | sq = rcu_nocb_gp_get(rnp); |
---|
2124 | 2046 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
2125 | 2047 | rcu_nocb_gp_cleanup(sq); |
---|
2126 | 2048 | cond_resched_tasks_rcu_qs(); |
---|
2127 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
2128 | | - rcu_gp_slow(rsp, gp_cleanup_delay); |
---|
| 2049 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
| 2050 | + rcu_gp_slow(gp_cleanup_delay); |
---|
2129 | 2051 | } |
---|
2130 | | - rnp = rcu_get_root(rsp); |
---|
2131 | | - raw_spin_lock_irq_rcu_node(rnp); /* GP before rsp->gp_seq update. */ |
---|
| 2052 | + rnp = rcu_get_root(); |
---|
| 2053 | + raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */ |
---|
2132 | 2054 | |
---|
2133 | | - /* Declare grace period done. */ |
---|
2134 | | - rcu_seq_end(&rsp->gp_seq); |
---|
2135 | | - trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("end")); |
---|
2136 | | - rsp->gp_state = RCU_GP_IDLE; |
---|
| 2055 | + /* Declare grace period done, trace first to use old GP number. */ |
---|
| 2056 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end")); |
---|
| 2057 | + rcu_seq_end(&rcu_state.gp_seq); |
---|
| 2058 | + ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq); |
---|
| 2059 | + rcu_state.gp_state = RCU_GP_IDLE; |
---|
2137 | 2060 | /* Check for GP requests since above loop. */ |
---|
2138 | | - rdp = this_cpu_ptr(rsp->rda); |
---|
| 2061 | + rdp = this_cpu_ptr(&rcu_data); |
---|
2139 | 2062 | if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) { |
---|
2140 | 2063 | trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed, |
---|
2141 | 2064 | TPS("CleanupMore")); |
---|
2142 | 2065 | needgp = true; |
---|
2143 | 2066 | } |
---|
2144 | 2067 | /* Advance CBs to reduce false positives below. */ |
---|
2145 | | - if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) { |
---|
2146 | | - WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); |
---|
2147 | | - rsp->gp_req_activity = jiffies; |
---|
2148 | | - trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), |
---|
| 2068 | + offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && |
---|
| 2069 | + rcu_segcblist_is_offloaded(&rdp->cblist); |
---|
| 2070 | + if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) { |
---|
| 2071 | + WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT); |
---|
| 2072 | + WRITE_ONCE(rcu_state.gp_req_activity, jiffies); |
---|
| 2073 | + trace_rcu_grace_period(rcu_state.name, |
---|
| 2074 | + rcu_state.gp_seq, |
---|
2149 | 2075 | TPS("newreq")); |
---|
2150 | 2076 | } else { |
---|
2151 | | - WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT); |
---|
| 2077 | + WRITE_ONCE(rcu_state.gp_flags, |
---|
| 2078 | + rcu_state.gp_flags & RCU_GP_FLAG_INIT); |
---|
2152 | 2079 | } |
---|
2153 | 2080 | raw_spin_unlock_irq_rcu_node(rnp); |
---|
| 2081 | + |
---|
| 2082 | + // If strict, make all CPUs aware of the end of the old grace period. |
---|
| 2083 | + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) |
---|
| 2084 | + on_each_cpu(rcu_strict_gp_boundary, NULL, 0); |
---|
2154 | 2085 | } |
---|
2155 | 2086 | |
---|
2156 | 2087 | /* |
---|
2157 | 2088 | * Body of kthread that handles grace periods. |
---|
2158 | 2089 | */ |
---|
2159 | | -static int __noreturn rcu_gp_kthread(void *arg) |
---|
| 2090 | +static int __noreturn rcu_gp_kthread(void *unused) |
---|
2160 | 2091 | { |
---|
2161 | | - bool first_gp_fqs; |
---|
2162 | | - int gf; |
---|
2163 | | - unsigned long j; |
---|
2164 | | - int ret; |
---|
2165 | | - struct rcu_state *rsp = arg; |
---|
2166 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
2167 | | - |
---|
2168 | 2092 | rcu_bind_gp_kthread(); |
---|
2169 | 2093 | for (;;) { |
---|
2170 | 2094 | |
---|
2171 | 2095 | /* Handle grace-period start. */ |
---|
2172 | 2096 | for (;;) { |
---|
2173 | | - trace_rcu_grace_period(rsp->name, |
---|
2174 | | - READ_ONCE(rsp->gp_seq), |
---|
| 2097 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, |
---|
2175 | 2098 | TPS("reqwait")); |
---|
2176 | | - rsp->gp_state = RCU_GP_WAIT_GPS; |
---|
2177 | | - swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & |
---|
2178 | | - RCU_GP_FLAG_INIT); |
---|
2179 | | - rsp->gp_state = RCU_GP_DONE_GPS; |
---|
| 2099 | + rcu_state.gp_state = RCU_GP_WAIT_GPS; |
---|
| 2100 | + swait_event_idle_exclusive(rcu_state.gp_wq, |
---|
| 2101 | + READ_ONCE(rcu_state.gp_flags) & |
---|
| 2102 | + RCU_GP_FLAG_INIT); |
---|
| 2103 | + rcu_gp_torture_wait(); |
---|
| 2104 | + rcu_state.gp_state = RCU_GP_DONE_GPS; |
---|
2180 | 2105 | /* Locking provides needed memory barrier. */ |
---|
2181 | | - if (rcu_gp_init(rsp)) |
---|
| 2106 | + if (rcu_gp_init()) |
---|
2182 | 2107 | break; |
---|
2183 | 2108 | cond_resched_tasks_rcu_qs(); |
---|
2184 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
| 2109 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
2185 | 2110 | WARN_ON(signal_pending(current)); |
---|
2186 | | - trace_rcu_grace_period(rsp->name, |
---|
2187 | | - READ_ONCE(rsp->gp_seq), |
---|
| 2111 | + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, |
---|
2188 | 2112 | TPS("reqwaitsig")); |
---|
2189 | 2113 | } |
---|
2190 | 2114 | |
---|
2191 | 2115 | /* Handle quiescent-state forcing. */ |
---|
2192 | | - first_gp_fqs = true; |
---|
2193 | | - j = jiffies_till_first_fqs; |
---|
2194 | | - ret = 0; |
---|
2195 | | - for (;;) { |
---|
2196 | | - if (!ret) { |
---|
2197 | | - rsp->jiffies_force_qs = jiffies + j; |
---|
2198 | | - WRITE_ONCE(rsp->jiffies_kick_kthreads, |
---|
2199 | | - jiffies + 3 * j); |
---|
2200 | | - } |
---|
2201 | | - trace_rcu_grace_period(rsp->name, |
---|
2202 | | - READ_ONCE(rsp->gp_seq), |
---|
2203 | | - TPS("fqswait")); |
---|
2204 | | - rsp->gp_state = RCU_GP_WAIT_FQS; |
---|
2205 | | - ret = swait_event_idle_timeout_exclusive(rsp->gp_wq, |
---|
2206 | | - rcu_gp_fqs_check_wake(rsp, &gf), j); |
---|
2207 | | - rsp->gp_state = RCU_GP_DOING_FQS; |
---|
2208 | | - /* Locking provides needed memory barriers. */ |
---|
2209 | | - /* If grace period done, leave loop. */ |
---|
2210 | | - if (!READ_ONCE(rnp->qsmask) && |
---|
2211 | | - !rcu_preempt_blocked_readers_cgp(rnp)) |
---|
2212 | | - break; |
---|
2213 | | - /* If time for quiescent-state forcing, do it. */ |
---|
2214 | | - if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || |
---|
2215 | | - (gf & RCU_GP_FLAG_FQS)) { |
---|
2216 | | - trace_rcu_grace_period(rsp->name, |
---|
2217 | | - READ_ONCE(rsp->gp_seq), |
---|
2218 | | - TPS("fqsstart")); |
---|
2219 | | - rcu_gp_fqs(rsp, first_gp_fqs); |
---|
2220 | | - first_gp_fqs = false; |
---|
2221 | | - trace_rcu_grace_period(rsp->name, |
---|
2222 | | - READ_ONCE(rsp->gp_seq), |
---|
2223 | | - TPS("fqsend")); |
---|
2224 | | - cond_resched_tasks_rcu_qs(); |
---|
2225 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
2226 | | - ret = 0; /* Force full wait till next FQS. */ |
---|
2227 | | - j = jiffies_till_next_fqs; |
---|
2228 | | - } else { |
---|
2229 | | - /* Deal with stray signal. */ |
---|
2230 | | - cond_resched_tasks_rcu_qs(); |
---|
2231 | | - WRITE_ONCE(rsp->gp_activity, jiffies); |
---|
2232 | | - WARN_ON(signal_pending(current)); |
---|
2233 | | - trace_rcu_grace_period(rsp->name, |
---|
2234 | | - READ_ONCE(rsp->gp_seq), |
---|
2235 | | - TPS("fqswaitsig")); |
---|
2236 | | - ret = 1; /* Keep old FQS timing. */ |
---|
2237 | | - j = jiffies; |
---|
2238 | | - if (time_after(jiffies, rsp->jiffies_force_qs)) |
---|
2239 | | - j = 1; |
---|
2240 | | - else |
---|
2241 | | - j = rsp->jiffies_force_qs - j; |
---|
2242 | | - } |
---|
2243 | | - } |
---|
| 2116 | + rcu_gp_fqs_loop(); |
---|
2244 | 2117 | |
---|
2245 | 2118 | /* Handle grace-period end. */ |
---|
2246 | | - rsp->gp_state = RCU_GP_CLEANUP; |
---|
2247 | | - rcu_gp_cleanup(rsp); |
---|
2248 | | - rsp->gp_state = RCU_GP_CLEANED; |
---|
| 2119 | + rcu_state.gp_state = RCU_GP_CLEANUP; |
---|
| 2120 | + rcu_gp_cleanup(); |
---|
| 2121 | + rcu_state.gp_state = RCU_GP_CLEANED; |
---|
2249 | 2122 | } |
---|
2250 | 2123 | } |
---|
2251 | 2124 | |
---|
2252 | 2125 | /* |
---|
2253 | | - * Report a full set of quiescent states to the specified rcu_state data |
---|
2254 | | - * structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period |
---|
2255 | | - * kthread if another grace period is required. Whether we wake |
---|
2256 | | - * the grace-period kthread or it awakens itself for the next round |
---|
2257 | | - * of quiescent-state forcing, that kthread will clean up after the |
---|
2258 | | - * just-completed grace period. Note that the caller must hold rnp->lock, |
---|
2259 | | - * which is released before return. |
---|
| 2126 | + * Report a full set of quiescent states to the rcu_state data structure. |
---|
| 2127 | + * Invoke rcu_gp_kthread_wake() to awaken the grace-period kthread if |
---|
| 2128 | + * another grace period is required. Whether we wake the grace-period |
---|
| 2129 | + * kthread or it awakens itself for the next round of quiescent-state |
---|
| 2130 | + * forcing, that kthread will clean up after the just-completed grace |
---|
| 2131 | + * period. Note that the caller must hold rnp->lock, which is released |
---|
| 2132 | + * before return. |
---|
2260 | 2133 | */ |
---|
2261 | | -static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) |
---|
2262 | | - __releases(rcu_get_root(rsp)->lock) |
---|
| 2134 | +static void rcu_report_qs_rsp(unsigned long flags) |
---|
| 2135 | + __releases(rcu_get_root()->lock) |
---|
2263 | 2136 | { |
---|
2264 | | - raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp)); |
---|
2265 | | - WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
---|
2266 | | - WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); |
---|
2267 | | - raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); |
---|
2268 | | - rcu_gp_kthread_wake(rsp); |
---|
| 2137 | + raw_lockdep_assert_held_rcu_node(rcu_get_root()); |
---|
| 2138 | + WARN_ON_ONCE(!rcu_gp_in_progress()); |
---|
| 2139 | + WRITE_ONCE(rcu_state.gp_flags, |
---|
| 2140 | + READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS); |
---|
| 2141 | + raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(), flags); |
---|
| 2142 | + rcu_gp_kthread_wake(); |
---|
2269 | 2143 | } |
---|
2270 | 2144 | |
---|
2271 | 2145 | /* |
---|
.. | .. |
---|
2282 | 2156 | * disabled. This allows propagating quiescent state due to resumed tasks |
---|
2283 | 2157 | * during grace-period initialization. |
---|
2284 | 2158 | */ |
---|
2285 | | -static void |
---|
2286 | | -rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, |
---|
2287 | | - struct rcu_node *rnp, unsigned long gps, unsigned long flags) |
---|
| 2159 | +static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp, |
---|
| 2160 | + unsigned long gps, unsigned long flags) |
---|
2288 | 2161 | __releases(rnp->lock) |
---|
2289 | 2162 | { |
---|
2290 | 2163 | unsigned long oldmask = 0; |
---|
.. | .. |
---|
2306 | 2179 | WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ |
---|
2307 | 2180 | WARN_ON_ONCE(!rcu_is_leaf_node(rnp) && |
---|
2308 | 2181 | rcu_preempt_blocked_readers_cgp(rnp)); |
---|
2309 | | - rnp->qsmask &= ~mask; |
---|
2310 | | - trace_rcu_quiescent_state_report(rsp->name, rnp->gp_seq, |
---|
| 2182 | + WRITE_ONCE(rnp->qsmask, rnp->qsmask & ~mask); |
---|
| 2183 | + trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq, |
---|
2311 | 2184 | mask, rnp->qsmask, rnp->level, |
---|
2312 | 2185 | rnp->grplo, rnp->grphi, |
---|
2313 | 2186 | !!rnp->gp_tasks); |
---|
.. | .. |
---|
2329 | 2202 | rnp_c = rnp; |
---|
2330 | 2203 | rnp = rnp->parent; |
---|
2331 | 2204 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
2332 | | - oldmask = rnp_c->qsmask; |
---|
| 2205 | + oldmask = READ_ONCE(rnp_c->qsmask); |
---|
2333 | 2206 | } |
---|
2334 | 2207 | |
---|
2335 | 2208 | /* |
---|
.. | .. |
---|
2337 | 2210 | * state for this grace period. Invoke rcu_report_qs_rsp() |
---|
2338 | 2211 | * to clean up and start the next grace period if one is needed. |
---|
2339 | 2212 | */ |
---|
2340 | | - rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ |
---|
| 2213 | + rcu_report_qs_rsp(flags); /* releases rnp->lock. */ |
---|
2341 | 2214 | } |
---|
2342 | 2215 | |
---|
2343 | 2216 | /* |
---|
2344 | 2217 | * Record a quiescent state for all tasks that were previously queued |
---|
2345 | 2218 | * on the specified rcu_node structure and that were blocking the current |
---|
2346 | | - * RCU grace period. The caller must hold the specified rnp->lock with |
---|
| 2219 | + * RCU grace period. The caller must hold the corresponding rnp->lock with |
---|
2347 | 2220 | * irqs disabled, and this lock is released upon return, but irqs remain |
---|
2348 | 2221 | * disabled. |
---|
2349 | 2222 | */ |
---|
2350 | 2223 | static void __maybe_unused |
---|
2351 | | -rcu_report_unblock_qs_rnp(struct rcu_state *rsp, |
---|
2352 | | - struct rcu_node *rnp, unsigned long flags) |
---|
| 2224 | +rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) |
---|
2353 | 2225 | __releases(rnp->lock) |
---|
2354 | 2226 | { |
---|
2355 | 2227 | unsigned long gps; |
---|
.. | .. |
---|
2357 | 2229 | struct rcu_node *rnp_p; |
---|
2358 | 2230 | |
---|
2359 | 2231 | raw_lockdep_assert_held_rcu_node(rnp); |
---|
2360 | | - if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) || |
---|
2361 | | - WARN_ON_ONCE(rsp != rcu_state_p) || |
---|
| 2232 | + if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RCU)) || |
---|
2362 | 2233 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) || |
---|
2363 | 2234 | rnp->qsmask != 0) { |
---|
2364 | 2235 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
.. | .. |
---|
2372 | 2243 | * Only one rcu_node structure in the tree, so don't |
---|
2373 | 2244 | * try to report up to its nonexistent parent! |
---|
2374 | 2245 | */ |
---|
2375 | | - rcu_report_qs_rsp(rsp, flags); |
---|
| 2246 | + rcu_report_qs_rsp(flags); |
---|
2376 | 2247 | return; |
---|
2377 | 2248 | } |
---|
2378 | 2249 | |
---|
.. | .. |
---|
2381 | 2252 | mask = rnp->grpmask; |
---|
2382 | 2253 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
---|
2383 | 2254 | raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */ |
---|
2384 | | - rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags); |
---|
| 2255 | + rcu_report_qs_rnp(mask, rnp_p, gps, flags); |
---|
2385 | 2256 | } |
---|
2386 | 2257 | |
---|
2387 | 2258 | /* |
---|
.. | .. |
---|
2389 | 2260 | * structure. This must be called from the specified CPU. |
---|
2390 | 2261 | */ |
---|
2391 | 2262 | static void |
---|
2392 | | -rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) |
---|
| 2263 | +rcu_report_qs_rdp(struct rcu_data *rdp) |
---|
2393 | 2264 | { |
---|
2394 | 2265 | unsigned long flags; |
---|
2395 | 2266 | unsigned long mask; |
---|
2396 | | - bool needwake; |
---|
| 2267 | + bool needwake = false; |
---|
| 2268 | + const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && |
---|
| 2269 | + rcu_segcblist_is_offloaded(&rdp->cblist); |
---|
2397 | 2270 | struct rcu_node *rnp; |
---|
2398 | 2271 | |
---|
| 2272 | + WARN_ON_ONCE(rdp->cpu != smp_processor_id()); |
---|
2399 | 2273 | rnp = rdp->mynode; |
---|
2400 | 2274 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
2401 | 2275 | if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || |
---|
.. | .. |
---|
2408 | 2282 | * within the current grace period. |
---|
2409 | 2283 | */ |
---|
2410 | 2284 | rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ |
---|
2411 | | - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); |
---|
2412 | 2285 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
2413 | 2286 | return; |
---|
2414 | 2287 | } |
---|
2415 | 2288 | mask = rdp->grpmask; |
---|
| 2289 | + rdp->core_needs_qs = false; |
---|
2416 | 2290 | if ((rnp->qsmask & mask) == 0) { |
---|
2417 | 2291 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
2418 | 2292 | } else { |
---|
2419 | | - rdp->core_needs_qs = false; |
---|
2420 | | - |
---|
2421 | 2293 | /* |
---|
2422 | 2294 | * This GP can't end until cpu checks in, so all of our |
---|
2423 | 2295 | * callbacks can be processed during the next GP. |
---|
2424 | 2296 | */ |
---|
2425 | | - needwake = rcu_accelerate_cbs(rsp, rnp, rdp); |
---|
| 2297 | + if (!offloaded) |
---|
| 2298 | + needwake = rcu_accelerate_cbs(rnp, rdp); |
---|
2426 | 2299 | |
---|
2427 | | - rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); |
---|
| 2300 | + rcu_disable_urgency_upon_qs(rdp); |
---|
| 2301 | + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); |
---|
2428 | 2302 | /* ^^^ Released rnp->lock */ |
---|
2429 | 2303 | if (needwake) |
---|
2430 | | - rcu_gp_kthread_wake(rsp); |
---|
| 2304 | + rcu_gp_kthread_wake(); |
---|
2431 | 2305 | } |
---|
2432 | 2306 | } |
---|
2433 | 2307 | |
---|
.. | .. |
---|
2438 | 2312 | * quiescent state for this grace period, and record that fact if so. |
---|
2439 | 2313 | */ |
---|
2440 | 2314 | static void |
---|
2441 | | -rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) |
---|
| 2315 | +rcu_check_quiescent_state(struct rcu_data *rdp) |
---|
2442 | 2316 | { |
---|
2443 | 2317 | /* Check for grace-period ends and beginnings. */ |
---|
2444 | | - note_gp_changes(rsp, rdp); |
---|
| 2318 | + note_gp_changes(rdp); |
---|
2445 | 2319 | |
---|
2446 | 2320 | /* |
---|
2447 | 2321 | * Does this CPU still need to do its part for current grace period? |
---|
.. | .. |
---|
2461 | 2335 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the |
---|
2462 | 2336 | * judge of that). |
---|
2463 | 2337 | */ |
---|
2464 | | - rcu_report_qs_rdp(rdp->cpu, rsp, rdp); |
---|
| 2338 | + rcu_report_qs_rdp(rdp); |
---|
2465 | 2339 | } |
---|
2466 | 2340 | |
---|
2467 | 2341 | /* |
---|
2468 | | - * Trace the fact that this CPU is going offline. |
---|
| 2342 | + * Near the end of the offline process. Trace the fact that this CPU |
---|
| 2343 | + * is going offline. |
---|
2469 | 2344 | */ |
---|
2470 | | -static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
---|
| 2345 | +int rcutree_dying_cpu(unsigned int cpu) |
---|
2471 | 2346 | { |
---|
2472 | | - RCU_TRACE(bool blkd;) |
---|
2473 | | - RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);) |
---|
2474 | | - RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) |
---|
| 2347 | + bool blkd; |
---|
| 2348 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
| 2349 | + struct rcu_node *rnp = rdp->mynode; |
---|
2475 | 2350 | |
---|
2476 | 2351 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) |
---|
2477 | | - return; |
---|
| 2352 | + return 0; |
---|
2478 | 2353 | |
---|
2479 | | - RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);) |
---|
2480 | | - trace_rcu_grace_period(rsp->name, rnp->gp_seq, |
---|
| 2354 | + blkd = !!(rnp->qsmask & rdp->grpmask); |
---|
| 2355 | + trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), |
---|
2481 | 2356 | blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); |
---|
| 2357 | + return 0; |
---|
2482 | 2358 | } |
---|
2483 | 2359 | |
---|
2484 | 2360 | /* |
---|
.. | .. |
---|
2532 | 2408 | * There can only be one CPU hotplug operation at a time, so no need for |
---|
2533 | 2409 | * explicit locking. |
---|
2534 | 2410 | */ |
---|
2535 | | -static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
---|
| 2411 | +int rcutree_dead_cpu(unsigned int cpu) |
---|
2536 | 2412 | { |
---|
2537 | | - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
| 2413 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
2538 | 2414 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ |
---|
2539 | 2415 | |
---|
2540 | 2416 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) |
---|
2541 | | - return; |
---|
| 2417 | + return 0; |
---|
2542 | 2418 | |
---|
2543 | 2419 | /* Adjust any no-longer-needed kthreads. */ |
---|
2544 | 2420 | rcu_boost_kthread_setaffinity(rnp, -1); |
---|
| 2421 | + /* Do any needed no-CB deferred wakeups from this CPU. */ |
---|
| 2422 | + do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu)); |
---|
| 2423 | + |
---|
| 2424 | + // Stop-machine done, so allow nohz_full to disable tick. |
---|
| 2425 | + tick_dep_clear(TICK_DEP_BIT_RCU); |
---|
| 2426 | + return 0; |
---|
2545 | 2427 | } |
---|
2546 | 2428 | |
---|
2547 | 2429 | /* |
---|
2548 | 2430 | * Invoke any RCU callbacks that have made it to the end of their grace |
---|
2549 | 2431 | * period. Thottle as specified by rdp->blimit. |
---|
2550 | 2432 | */ |
---|
2551 | | -static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) |
---|
| 2433 | +static void rcu_do_batch(struct rcu_data *rdp) |
---|
2552 | 2434 | { |
---|
| 2435 | + int div; |
---|
2553 | 2436 | unsigned long flags; |
---|
| 2437 | + const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && |
---|
| 2438 | + rcu_segcblist_is_offloaded(&rdp->cblist); |
---|
2554 | 2439 | struct rcu_head *rhp; |
---|
2555 | 2440 | struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); |
---|
2556 | 2441 | long bl, count; |
---|
| 2442 | + long pending, tlimit = 0; |
---|
2557 | 2443 | |
---|
2558 | 2444 | /* If no callbacks are ready, just return. */ |
---|
2559 | 2445 | if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { |
---|
2560 | | - trace_rcu_batch_start(rsp->name, |
---|
2561 | | - rcu_segcblist_n_lazy_cbs(&rdp->cblist), |
---|
| 2446 | + trace_rcu_batch_start(rcu_state.name, |
---|
2562 | 2447 | rcu_segcblist_n_cbs(&rdp->cblist), 0); |
---|
2563 | | - trace_rcu_batch_end(rsp->name, 0, |
---|
| 2448 | + trace_rcu_batch_end(rcu_state.name, 0, |
---|
2564 | 2449 | !rcu_segcblist_empty(&rdp->cblist), |
---|
2565 | 2450 | need_resched(), is_idle_task(current), |
---|
2566 | 2451 | rcu_is_callbacks_kthread()); |
---|
.. | .. |
---|
2573 | 2458 | * callback counts, as rcu_barrier() needs to be conservative. |
---|
2574 | 2459 | */ |
---|
2575 | 2460 | local_irq_save(flags); |
---|
| 2461 | + rcu_nocb_lock(rdp); |
---|
2576 | 2462 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); |
---|
2577 | | - bl = rdp->blimit; |
---|
2578 | | - trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist), |
---|
| 2463 | + pending = rcu_segcblist_n_cbs(&rdp->cblist); |
---|
| 2464 | + div = READ_ONCE(rcu_divisor); |
---|
| 2465 | + div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; |
---|
| 2466 | + bl = max(rdp->blimit, pending >> div); |
---|
| 2467 | + if (in_serving_softirq() && unlikely(bl > 100)) { |
---|
| 2468 | + long rrn = READ_ONCE(rcu_resched_ns); |
---|
| 2469 | + |
---|
| 2470 | + rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn; |
---|
| 2471 | + tlimit = local_clock() + rrn; |
---|
| 2472 | + } |
---|
| 2473 | + trace_rcu_batch_start(rcu_state.name, |
---|
2579 | 2474 | rcu_segcblist_n_cbs(&rdp->cblist), bl); |
---|
2580 | 2475 | rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); |
---|
2581 | | - local_irq_restore(flags); |
---|
| 2476 | + if (offloaded) |
---|
| 2477 | + rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); |
---|
| 2478 | + rcu_nocb_unlock_irqrestore(rdp, flags); |
---|
2582 | 2479 | |
---|
2583 | 2480 | /* Invoke callbacks. */ |
---|
| 2481 | + tick_dep_set_task(current, TICK_DEP_BIT_RCU); |
---|
2584 | 2482 | rhp = rcu_cblist_dequeue(&rcl); |
---|
2585 | 2483 | for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) { |
---|
| 2484 | + rcu_callback_t f; |
---|
| 2485 | + |
---|
2586 | 2486 | debug_rcu_head_unqueue(rhp); |
---|
2587 | | - if (__rcu_reclaim(rsp->name, rhp)) |
---|
2588 | | - rcu_cblist_dequeued_lazy(&rcl); |
---|
| 2487 | + |
---|
| 2488 | + rcu_lock_acquire(&rcu_callback_map); |
---|
| 2489 | + trace_rcu_invoke_callback(rcu_state.name, rhp); |
---|
| 2490 | + |
---|
| 2491 | + f = rhp->func; |
---|
| 2492 | + WRITE_ONCE(rhp->func, (rcu_callback_t)0L); |
---|
| 2493 | + f(rhp); |
---|
| 2494 | + |
---|
| 2495 | + rcu_lock_release(&rcu_callback_map); |
---|
| 2496 | + |
---|
2589 | 2497 | /* |
---|
2590 | 2498 | * Stop only if limit reached and CPU has something to do. |
---|
2591 | 2499 | * Note: The rcl structure counts down from zero. |
---|
2592 | 2500 | */ |
---|
2593 | | - if (-rcl.len >= bl && |
---|
2594 | | - (need_resched() || |
---|
2595 | | - (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) |
---|
2596 | | - break; |
---|
| 2501 | + if (in_serving_softirq()) { |
---|
| 2502 | + if (-rcl.len >= bl && (need_resched() || |
---|
| 2503 | + (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) |
---|
| 2504 | + break; |
---|
| 2505 | + |
---|
| 2506 | + /* |
---|
| 2507 | + * Make sure we don't spend too much time here and deprive other |
---|
| 2508 | + * softirq vectors of CPU cycles. |
---|
| 2509 | + */ |
---|
| 2510 | + if (unlikely(tlimit)) { |
---|
| 2511 | + /* only call local_clock() every 32 callbacks */ |
---|
| 2512 | + if (likely((-rcl.len & 31) || local_clock() < tlimit)) |
---|
| 2513 | + continue; |
---|
| 2514 | + /* Exceeded the time limit, so leave. */ |
---|
| 2515 | + break; |
---|
| 2516 | + } |
---|
| 2517 | + } else { |
---|
| 2518 | + local_bh_enable(); |
---|
| 2519 | + lockdep_assert_irqs_enabled(); |
---|
| 2520 | + cond_resched_tasks_rcu_qs(); |
---|
| 2521 | + lockdep_assert_irqs_enabled(); |
---|
| 2522 | + local_bh_disable(); |
---|
| 2523 | + } |
---|
2597 | 2524 | } |
---|
2598 | 2525 | |
---|
2599 | 2526 | local_irq_save(flags); |
---|
| 2527 | + rcu_nocb_lock(rdp); |
---|
2600 | 2528 | count = -rcl.len; |
---|
2601 | | - trace_rcu_batch_end(rsp->name, count, !!rcl.head, need_resched(), |
---|
| 2529 | + rdp->n_cbs_invoked += count; |
---|
| 2530 | + trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(), |
---|
2602 | 2531 | is_idle_task(current), rcu_is_callbacks_kthread()); |
---|
2603 | 2532 | |
---|
2604 | 2533 | /* Update counts and requeue any remaining callbacks. */ |
---|
.. | .. |
---|
2608 | 2537 | |
---|
2609 | 2538 | /* Reinstate batch limit if we have worked down the excess. */ |
---|
2610 | 2539 | count = rcu_segcblist_n_cbs(&rdp->cblist); |
---|
2611 | | - if (rdp->blimit == LONG_MAX && count <= qlowmark) |
---|
| 2540 | + if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark) |
---|
2612 | 2541 | rdp->blimit = blimit; |
---|
2613 | 2542 | |
---|
2614 | 2543 | /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ |
---|
2615 | 2544 | if (count == 0 && rdp->qlen_last_fqs_check != 0) { |
---|
2616 | 2545 | rdp->qlen_last_fqs_check = 0; |
---|
2617 | | - rdp->n_force_qs_snap = rsp->n_force_qs; |
---|
| 2546 | + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); |
---|
2618 | 2547 | } else if (count < rdp->qlen_last_fqs_check - qhimark) |
---|
2619 | 2548 | rdp->qlen_last_fqs_check = count; |
---|
2620 | 2549 | |
---|
.. | .. |
---|
2622 | 2551 | * The following usually indicates a double call_rcu(). To track |
---|
2623 | 2552 | * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. |
---|
2624 | 2553 | */ |
---|
2625 | | - WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0)); |
---|
| 2554 | + WARN_ON_ONCE(count == 0 && !rcu_segcblist_empty(&rdp->cblist)); |
---|
| 2555 | + WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) && |
---|
| 2556 | + count != 0 && rcu_segcblist_empty(&rdp->cblist)); |
---|
2626 | 2557 | |
---|
2627 | | - local_irq_restore(flags); |
---|
| 2558 | + rcu_nocb_unlock_irqrestore(rdp, flags); |
---|
2628 | 2559 | |
---|
2629 | 2560 | /* Re-invoke RCU core processing if there are callbacks remaining. */ |
---|
2630 | | - if (rcu_segcblist_ready_cbs(&rdp->cblist)) |
---|
| 2561 | + if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist)) |
---|
2631 | 2562 | invoke_rcu_core(); |
---|
| 2563 | + tick_dep_clear_task(current, TICK_DEP_BIT_RCU); |
---|
2632 | 2564 | } |
---|
2633 | 2565 | |
---|
2634 | 2566 | /* |
---|
2635 | | - * Check to see if this CPU is in a non-context-switch quiescent state |
---|
2636 | | - * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). |
---|
2637 | | - * Also schedule RCU core processing. |
---|
2638 | | - * |
---|
2639 | | - * This function must be called from hardirq context. It is normally |
---|
2640 | | - * invoked from the scheduling-clock interrupt. |
---|
| 2567 | + * This function is invoked from each scheduling-clock interrupt, |
---|
| 2568 | + * and checks to see if this CPU is in a non-context-switch quiescent |
---|
| 2569 | + * state, for example, user mode or idle loop. It also schedules RCU |
---|
| 2570 | + * core processing. If the current grace period has gone on too long, |
---|
| 2571 | + * it will ask the scheduler to manufacture a context switch for the sole |
---|
| 2572 | + * purpose of providing a providing the needed quiescent state. |
---|
2641 | 2573 | */ |
---|
2642 | | -void rcu_check_callbacks(int user) |
---|
| 2574 | +void rcu_sched_clock_irq(int user) |
---|
2643 | 2575 | { |
---|
2644 | 2576 | trace_rcu_utilization(TPS("Start scheduler-tick")); |
---|
2645 | | - increment_cpu_stall_ticks(); |
---|
2646 | | - if (user || rcu_is_cpu_rrupt_from_idle()) { |
---|
2647 | | - |
---|
2648 | | - /* |
---|
2649 | | - * Get here if this CPU took its interrupt from user |
---|
2650 | | - * mode or from the idle loop, and if this is not a |
---|
2651 | | - * nested interrupt. In this case, the CPU is in |
---|
2652 | | - * a quiescent state, so note it. |
---|
2653 | | - * |
---|
2654 | | - * No memory barrier is required here because both |
---|
2655 | | - * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local |
---|
2656 | | - * variables that other CPUs neither access nor modify, |
---|
2657 | | - * at least not while the corresponding CPU is online. |
---|
2658 | | - */ |
---|
2659 | | - |
---|
2660 | | - rcu_sched_qs(); |
---|
2661 | | - rcu_bh_qs(); |
---|
2662 | | - rcu_note_voluntary_context_switch(current); |
---|
2663 | | - |
---|
2664 | | - } else if (!in_softirq()) { |
---|
2665 | | - |
---|
2666 | | - /* |
---|
2667 | | - * Get here if this CPU did not take its interrupt from |
---|
2668 | | - * softirq, in other words, if it is not interrupting |
---|
2669 | | - * a rcu_bh read-side critical section. This is an _bh |
---|
2670 | | - * critical section, so note it. |
---|
2671 | | - */ |
---|
2672 | | - |
---|
2673 | | - rcu_bh_qs(); |
---|
2674 | | - } |
---|
2675 | | - rcu_preempt_check_callbacks(); |
---|
| 2577 | + lockdep_assert_irqs_disabled(); |
---|
| 2578 | + raw_cpu_inc(rcu_data.ticks_this_gp); |
---|
2676 | 2579 | /* The load-acquire pairs with the store-release setting to true. */ |
---|
2677 | | - if (smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) { |
---|
| 2580 | + if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) { |
---|
2678 | 2581 | /* Idle and userspace execution already are quiescent states. */ |
---|
2679 | 2582 | if (!rcu_is_cpu_rrupt_from_idle() && !user) { |
---|
2680 | 2583 | set_tsk_need_resched(current); |
---|
2681 | 2584 | set_preempt_need_resched(); |
---|
2682 | 2585 | } |
---|
2683 | | - __this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); |
---|
| 2586 | + __this_cpu_write(rcu_data.rcu_urgent_qs, false); |
---|
2684 | 2587 | } |
---|
2685 | | - if (rcu_pending()) |
---|
| 2588 | + rcu_flavor_sched_clock_irq(user); |
---|
| 2589 | + if (rcu_pending(user)) |
---|
2686 | 2590 | invoke_rcu_core(); |
---|
| 2591 | + lockdep_assert_irqs_disabled(); |
---|
2687 | 2592 | |
---|
2688 | 2593 | trace_rcu_utilization(TPS("End scheduler-tick")); |
---|
2689 | 2594 | } |
---|
2690 | 2595 | |
---|
2691 | 2596 | /* |
---|
2692 | | - * Scan the leaf rcu_node structures, processing dyntick state for any that |
---|
2693 | | - * have not yet encountered a quiescent state, using the function specified. |
---|
2694 | | - * Also initiate boosting for any threads blocked on the root rcu_node. |
---|
2695 | | - * |
---|
2696 | | - * The caller must have suppressed start of new grace periods. |
---|
| 2597 | + * Scan the leaf rcu_node structures. For each structure on which all |
---|
| 2598 | + * CPUs have reported a quiescent state and on which there are tasks |
---|
| 2599 | + * blocking the current grace period, initiate RCU priority boosting. |
---|
| 2600 | + * Otherwise, invoke the specified function to check dyntick state for |
---|
| 2601 | + * each CPU that has not yet reported a quiescent state. |
---|
2697 | 2602 | */ |
---|
2698 | | -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)) |
---|
| 2603 | +static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) |
---|
2699 | 2604 | { |
---|
2700 | 2605 | int cpu; |
---|
2701 | 2606 | unsigned long flags; |
---|
2702 | 2607 | unsigned long mask; |
---|
| 2608 | + struct rcu_data *rdp; |
---|
2703 | 2609 | struct rcu_node *rnp; |
---|
2704 | 2610 | |
---|
2705 | | - rcu_for_each_leaf_node(rsp, rnp) { |
---|
| 2611 | + rcu_state.cbovld = rcu_state.cbovldnext; |
---|
| 2612 | + rcu_state.cbovldnext = false; |
---|
| 2613 | + rcu_for_each_leaf_node(rnp) { |
---|
2706 | 2614 | cond_resched_tasks_rcu_qs(); |
---|
2707 | 2615 | mask = 0; |
---|
2708 | 2616 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
| 2617 | + rcu_state.cbovldnext |= !!rnp->cbovldmask; |
---|
2709 | 2618 | if (rnp->qsmask == 0) { |
---|
2710 | | - if (rcu_state_p == &rcu_sched_state || |
---|
2711 | | - rsp != rcu_state_p || |
---|
2712 | | - rcu_preempt_blocked_readers_cgp(rnp)) { |
---|
| 2619 | + if (rcu_preempt_blocked_readers_cgp(rnp)) { |
---|
2713 | 2620 | /* |
---|
2714 | 2621 | * No point in scanning bits because they |
---|
2715 | 2622 | * are all zero. But we might need to |
---|
.. | .. |
---|
2722 | 2629 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
2723 | 2630 | continue; |
---|
2724 | 2631 | } |
---|
2725 | | - for_each_leaf_node_possible_cpu(rnp, cpu) { |
---|
2726 | | - unsigned long bit = leaf_node_cpu_bit(rnp, cpu); |
---|
2727 | | - if ((rnp->qsmask & bit) != 0) { |
---|
2728 | | - if (f(per_cpu_ptr(rsp->rda, cpu))) |
---|
2729 | | - mask |= bit; |
---|
| 2632 | + for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) { |
---|
| 2633 | + rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 2634 | + if (f(rdp)) { |
---|
| 2635 | + mask |= rdp->grpmask; |
---|
| 2636 | + rcu_disable_urgency_upon_qs(rdp); |
---|
2730 | 2637 | } |
---|
2731 | 2638 | } |
---|
2732 | 2639 | if (mask != 0) { |
---|
2733 | 2640 | /* Idle/offline CPUs, report (releases rnp->lock). */ |
---|
2734 | | - rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); |
---|
| 2641 | + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); |
---|
2735 | 2642 | } else { |
---|
2736 | 2643 | /* Nothing to do here, so just drop the lock. */ |
---|
2737 | 2644 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
.. | .. |
---|
2743 | 2650 | * Force quiescent states on reluctant CPUs, and also detect which |
---|
2744 | 2651 | * CPUs are in dyntick-idle mode. |
---|
2745 | 2652 | */ |
---|
2746 | | -static void force_quiescent_state(struct rcu_state *rsp) |
---|
| 2653 | +void rcu_force_quiescent_state(void) |
---|
2747 | 2654 | { |
---|
2748 | 2655 | unsigned long flags; |
---|
2749 | 2656 | bool ret; |
---|
.. | .. |
---|
2751 | 2658 | struct rcu_node *rnp_old = NULL; |
---|
2752 | 2659 | |
---|
2753 | 2660 | /* Funnel through hierarchy to reduce memory contention. */ |
---|
2754 | | - rnp = __this_cpu_read(rsp->rda->mynode); |
---|
| 2661 | + rnp = raw_cpu_read(rcu_data.mynode); |
---|
2755 | 2662 | for (; rnp != NULL; rnp = rnp->parent) { |
---|
2756 | | - ret = (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || |
---|
2757 | | - !raw_spin_trylock(&rnp->fqslock); |
---|
| 2663 | + ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) || |
---|
| 2664 | + !raw_spin_trylock(&rnp->fqslock); |
---|
2758 | 2665 | if (rnp_old != NULL) |
---|
2759 | 2666 | raw_spin_unlock(&rnp_old->fqslock); |
---|
2760 | 2667 | if (ret) |
---|
2761 | 2668 | return; |
---|
2762 | 2669 | rnp_old = rnp; |
---|
2763 | 2670 | } |
---|
2764 | | - /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ |
---|
| 2671 | + /* rnp_old == rcu_get_root(), rnp == NULL. */ |
---|
2765 | 2672 | |
---|
2766 | 2673 | /* Reached the root of the rcu_node tree, acquire lock. */ |
---|
2767 | 2674 | raw_spin_lock_irqsave_rcu_node(rnp_old, flags); |
---|
2768 | 2675 | raw_spin_unlock(&rnp_old->fqslock); |
---|
2769 | | - if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { |
---|
| 2676 | + if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) { |
---|
2770 | 2677 | raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); |
---|
2771 | 2678 | return; /* Someone beat us to it. */ |
---|
2772 | 2679 | } |
---|
2773 | | - WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); |
---|
| 2680 | + WRITE_ONCE(rcu_state.gp_flags, |
---|
| 2681 | + READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS); |
---|
2774 | 2682 | raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); |
---|
2775 | | - rcu_gp_kthread_wake(rsp); |
---|
| 2683 | + rcu_gp_kthread_wake(); |
---|
| 2684 | +} |
---|
| 2685 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
---|
| 2686 | + |
---|
| 2687 | +// Workqueue handler for an RCU reader for kernels enforcing struct RCU |
---|
| 2688 | +// grace periods. |
---|
| 2689 | +static void strict_work_handler(struct work_struct *work) |
---|
| 2690 | +{ |
---|
| 2691 | + rcu_read_lock(); |
---|
| 2692 | + rcu_read_unlock(); |
---|
2776 | 2693 | } |
---|
2777 | 2694 | |
---|
2778 | | -/* |
---|
2779 | | - * This function checks for grace-period requests that fail to motivate |
---|
2780 | | - * RCU to come out of its idle mode. |
---|
2781 | | - */ |
---|
2782 | | -static void |
---|
2783 | | -rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, |
---|
2784 | | - struct rcu_data *rdp) |
---|
2785 | | -{ |
---|
2786 | | - const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ; |
---|
2787 | | - unsigned long flags; |
---|
2788 | | - unsigned long j; |
---|
2789 | | - struct rcu_node *rnp_root = rcu_get_root(rsp); |
---|
2790 | | - static atomic_t warned = ATOMIC_INIT(0); |
---|
2791 | | - |
---|
2792 | | - if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress(rsp) || |
---|
2793 | | - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed)) |
---|
2794 | | - return; |
---|
2795 | | - j = jiffies; /* Expensive access, and in common case don't get here. */ |
---|
2796 | | - if (time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) || |
---|
2797 | | - time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) || |
---|
2798 | | - atomic_read(&warned)) |
---|
2799 | | - return; |
---|
2800 | | - |
---|
2801 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
2802 | | - j = jiffies; |
---|
2803 | | - if (rcu_gp_in_progress(rsp) || |
---|
2804 | | - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || |
---|
2805 | | - time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) || |
---|
2806 | | - time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) || |
---|
2807 | | - atomic_read(&warned)) { |
---|
2808 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
2809 | | - return; |
---|
2810 | | - } |
---|
2811 | | - /* Hold onto the leaf lock to make others see warned==1. */ |
---|
2812 | | - |
---|
2813 | | - if (rnp_root != rnp) |
---|
2814 | | - raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ |
---|
2815 | | - j = jiffies; |
---|
2816 | | - if (rcu_gp_in_progress(rsp) || |
---|
2817 | | - ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || |
---|
2818 | | - time_before(j, rsp->gp_req_activity + gpssdelay) || |
---|
2819 | | - time_before(j, rsp->gp_activity + gpssdelay) || |
---|
2820 | | - atomic_xchg(&warned, 1)) { |
---|
2821 | | - raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */ |
---|
2822 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
2823 | | - return; |
---|
2824 | | - } |
---|
2825 | | - pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n", |
---|
2826 | | - __func__, (long)READ_ONCE(rsp->gp_seq), |
---|
2827 | | - (long)READ_ONCE(rnp_root->gp_seq_needed), |
---|
2828 | | - j - rsp->gp_req_activity, j - rsp->gp_activity, |
---|
2829 | | - rsp->gp_flags, rsp->gp_state, rsp->name, |
---|
2830 | | - rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL); |
---|
2831 | | - WARN_ON(1); |
---|
2832 | | - if (rnp_root != rnp) |
---|
2833 | | - raw_spin_unlock_rcu_node(rnp_root); |
---|
2834 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
2835 | | -} |
---|
2836 | | - |
---|
2837 | | -/* |
---|
2838 | | - * This does the RCU core processing work for the specified rcu_state |
---|
2839 | | - * and rcu_data structures. This may be called only from the CPU to |
---|
2840 | | - * whom the rdp belongs. |
---|
2841 | | - */ |
---|
2842 | | -static void |
---|
2843 | | -__rcu_process_callbacks(struct rcu_state *rsp) |
---|
| 2695 | +/* Perform RCU core processing work for the current CPU. */ |
---|
| 2696 | +static __latent_entropy void rcu_core(void) |
---|
2844 | 2697 | { |
---|
2845 | 2698 | unsigned long flags; |
---|
2846 | | - struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); |
---|
| 2699 | + struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); |
---|
2847 | 2700 | struct rcu_node *rnp = rdp->mynode; |
---|
2848 | | - |
---|
2849 | | - WARN_ON_ONCE(!rdp->beenonline); |
---|
2850 | | - |
---|
2851 | | - /* Update RCU state based on any recent quiescent states. */ |
---|
2852 | | - rcu_check_quiescent_state(rsp, rdp); |
---|
2853 | | - |
---|
2854 | | - /* No grace period and unregistered callbacks? */ |
---|
2855 | | - if (!rcu_gp_in_progress(rsp) && |
---|
2856 | | - rcu_segcblist_is_enabled(&rdp->cblist)) { |
---|
2857 | | - local_irq_save(flags); |
---|
2858 | | - if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) |
---|
2859 | | - rcu_accelerate_cbs_unlocked(rsp, rnp, rdp); |
---|
2860 | | - local_irq_restore(flags); |
---|
2861 | | - } |
---|
2862 | | - |
---|
2863 | | - rcu_check_gp_start_stall(rsp, rnp, rdp); |
---|
2864 | | - |
---|
2865 | | - /* If there are callbacks ready, invoke them. */ |
---|
2866 | | - if (rcu_segcblist_ready_cbs(&rdp->cblist)) |
---|
2867 | | - invoke_rcu_callbacks(rsp, rdp); |
---|
2868 | | - |
---|
2869 | | - /* Do any needed deferred wakeups of rcuo kthreads. */ |
---|
2870 | | - do_nocb_deferred_wakeup(rdp); |
---|
2871 | | -} |
---|
2872 | | - |
---|
2873 | | -/* |
---|
2874 | | - * Do RCU core processing for the current CPU. |
---|
2875 | | - */ |
---|
2876 | | -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) |
---|
2877 | | -{ |
---|
2878 | | - struct rcu_state *rsp; |
---|
| 2701 | + const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && |
---|
| 2702 | + rcu_segcblist_is_offloaded(&rdp->cblist); |
---|
2879 | 2703 | |
---|
2880 | 2704 | if (cpu_is_offline(smp_processor_id())) |
---|
2881 | 2705 | return; |
---|
2882 | 2706 | trace_rcu_utilization(TPS("Start RCU core")); |
---|
2883 | | - for_each_rcu_flavor(rsp) |
---|
2884 | | - __rcu_process_callbacks(rsp); |
---|
| 2707 | + WARN_ON_ONCE(!rdp->beenonline); |
---|
| 2708 | + |
---|
| 2709 | + /* Report any deferred quiescent states if preemption enabled. */ |
---|
| 2710 | + if (!(preempt_count() & PREEMPT_MASK)) { |
---|
| 2711 | + rcu_preempt_deferred_qs(current); |
---|
| 2712 | + } else if (rcu_preempt_need_deferred_qs(current)) { |
---|
| 2713 | + set_tsk_need_resched(current); |
---|
| 2714 | + set_preempt_need_resched(); |
---|
| 2715 | + } |
---|
| 2716 | + |
---|
| 2717 | + /* Update RCU state based on any recent quiescent states. */ |
---|
| 2718 | + rcu_check_quiescent_state(rdp); |
---|
| 2719 | + |
---|
| 2720 | + /* No grace period and unregistered callbacks? */ |
---|
| 2721 | + if (!rcu_gp_in_progress() && |
---|
| 2722 | + rcu_segcblist_is_enabled(&rdp->cblist) && !offloaded) { |
---|
| 2723 | + local_irq_save(flags); |
---|
| 2724 | + if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) |
---|
| 2725 | + rcu_accelerate_cbs_unlocked(rnp, rdp); |
---|
| 2726 | + local_irq_restore(flags); |
---|
| 2727 | + } |
---|
| 2728 | + |
---|
| 2729 | + rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check()); |
---|
| 2730 | + |
---|
| 2731 | + /* If there are callbacks ready, invoke them. */ |
---|
| 2732 | + if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist) && |
---|
| 2733 | + likely(READ_ONCE(rcu_scheduler_fully_active))) |
---|
| 2734 | + rcu_do_batch(rdp); |
---|
| 2735 | + |
---|
| 2736 | + /* Do any needed deferred wakeups of rcuo kthreads. */ |
---|
| 2737 | + do_nocb_deferred_wakeup(rdp); |
---|
2885 | 2738 | trace_rcu_utilization(TPS("End RCU core")); |
---|
| 2739 | + |
---|
| 2740 | + // If strict GPs, schedule an RCU reader in a clean environment. |
---|
| 2741 | + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) |
---|
| 2742 | + queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work); |
---|
| 2743 | +} |
---|
| 2744 | + |
---|
| 2745 | +static void rcu_core_si(struct softirq_action *h) |
---|
| 2746 | +{ |
---|
| 2747 | + rcu_core(); |
---|
| 2748 | +} |
---|
| 2749 | + |
---|
| 2750 | +static void rcu_wake_cond(struct task_struct *t, int status) |
---|
| 2751 | +{ |
---|
| 2752 | + /* |
---|
| 2753 | + * If the thread is yielding, only wake it when this |
---|
| 2754 | + * is invoked from idle |
---|
| 2755 | + */ |
---|
| 2756 | + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) |
---|
| 2757 | + wake_up_process(t); |
---|
| 2758 | +} |
---|
| 2759 | + |
---|
| 2760 | +static void invoke_rcu_core_kthread(void) |
---|
| 2761 | +{ |
---|
| 2762 | + struct task_struct *t; |
---|
| 2763 | + unsigned long flags; |
---|
| 2764 | + |
---|
| 2765 | + local_irq_save(flags); |
---|
| 2766 | + __this_cpu_write(rcu_data.rcu_cpu_has_work, 1); |
---|
| 2767 | + t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task); |
---|
| 2768 | + if (t != NULL && t != current) |
---|
| 2769 | + rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status)); |
---|
| 2770 | + local_irq_restore(flags); |
---|
2886 | 2771 | } |
---|
2887 | 2772 | |
---|
2888 | 2773 | /* |
---|
2889 | | - * Schedule RCU callback invocation. If the specified type of RCU |
---|
2890 | | - * does not support RCU priority boosting, just do a direct call, |
---|
2891 | | - * otherwise wake up the per-CPU kernel kthread. Note that because we |
---|
2892 | | - * are running on the current CPU with softirqs disabled, the |
---|
2893 | | - * rcu_cpu_kthread_task cannot disappear out from under us. |
---|
| 2774 | + * Wake up this CPU's rcuc kthread to do RCU core processing. |
---|
2894 | 2775 | */ |
---|
2895 | | -static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
---|
2896 | | -{ |
---|
2897 | | - if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) |
---|
2898 | | - return; |
---|
2899 | | - if (likely(!rsp->boost)) { |
---|
2900 | | - rcu_do_batch(rsp, rdp); |
---|
2901 | | - return; |
---|
2902 | | - } |
---|
2903 | | - invoke_rcu_callbacks_kthread(); |
---|
2904 | | -} |
---|
2905 | | - |
---|
2906 | 2776 | static void invoke_rcu_core(void) |
---|
2907 | 2777 | { |
---|
2908 | | - if (cpu_online(smp_processor_id())) |
---|
| 2778 | + if (!cpu_online(smp_processor_id())) |
---|
| 2779 | + return; |
---|
| 2780 | + if (use_softirq) |
---|
2909 | 2781 | raise_softirq(RCU_SOFTIRQ); |
---|
| 2782 | + else |
---|
| 2783 | + invoke_rcu_core_kthread(); |
---|
| 2784 | +} |
---|
| 2785 | + |
---|
| 2786 | +static void rcu_cpu_kthread_park(unsigned int cpu) |
---|
| 2787 | +{ |
---|
| 2788 | + per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
---|
| 2789 | +} |
---|
| 2790 | + |
---|
| 2791 | +static int rcu_cpu_kthread_should_run(unsigned int cpu) |
---|
| 2792 | +{ |
---|
| 2793 | + return __this_cpu_read(rcu_data.rcu_cpu_has_work); |
---|
| 2794 | +} |
---|
| 2795 | + |
---|
| 2796 | +/* |
---|
| 2797 | + * Per-CPU kernel thread that invokes RCU callbacks. This replaces |
---|
| 2798 | + * the RCU softirq used in configurations of RCU that do not support RCU |
---|
| 2799 | + * priority boosting. |
---|
| 2800 | + */ |
---|
| 2801 | +static void rcu_cpu_kthread(unsigned int cpu) |
---|
| 2802 | +{ |
---|
| 2803 | + unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); |
---|
| 2804 | + char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); |
---|
| 2805 | + int spincnt; |
---|
| 2806 | + |
---|
| 2807 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_run")); |
---|
| 2808 | + for (spincnt = 0; spincnt < 10; spincnt++) { |
---|
| 2809 | + local_bh_disable(); |
---|
| 2810 | + *statusp = RCU_KTHREAD_RUNNING; |
---|
| 2811 | + local_irq_disable(); |
---|
| 2812 | + work = *workp; |
---|
| 2813 | + *workp = 0; |
---|
| 2814 | + local_irq_enable(); |
---|
| 2815 | + if (work) |
---|
| 2816 | + rcu_core(); |
---|
| 2817 | + local_bh_enable(); |
---|
| 2818 | + if (*workp == 0) { |
---|
| 2819 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); |
---|
| 2820 | + *statusp = RCU_KTHREAD_WAITING; |
---|
| 2821 | + return; |
---|
| 2822 | + } |
---|
| 2823 | + } |
---|
| 2824 | + *statusp = RCU_KTHREAD_YIELDING; |
---|
| 2825 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); |
---|
| 2826 | + schedule_timeout_idle(2); |
---|
| 2827 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); |
---|
| 2828 | + *statusp = RCU_KTHREAD_WAITING; |
---|
| 2829 | +} |
---|
| 2830 | + |
---|
| 2831 | +static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
---|
| 2832 | + .store = &rcu_data.rcu_cpu_kthread_task, |
---|
| 2833 | + .thread_should_run = rcu_cpu_kthread_should_run, |
---|
| 2834 | + .thread_fn = rcu_cpu_kthread, |
---|
| 2835 | + .thread_comm = "rcuc/%u", |
---|
| 2836 | + .setup = rcu_cpu_kthread_setup, |
---|
| 2837 | + .park = rcu_cpu_kthread_park, |
---|
| 2838 | +}; |
---|
| 2839 | + |
---|
| 2840 | +/* |
---|
| 2841 | + * Spawn per-CPU RCU core processing kthreads. |
---|
| 2842 | + */ |
---|
| 2843 | +static int __init rcu_spawn_core_kthreads(void) |
---|
| 2844 | +{ |
---|
| 2845 | + int cpu; |
---|
| 2846 | + |
---|
| 2847 | + for_each_possible_cpu(cpu) |
---|
| 2848 | + per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0; |
---|
| 2849 | + if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq) |
---|
| 2850 | + return 0; |
---|
| 2851 | + WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), |
---|
| 2852 | + "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__); |
---|
| 2853 | + return 0; |
---|
2910 | 2854 | } |
---|
2911 | 2855 | |
---|
2912 | 2856 | /* |
---|
2913 | 2857 | * Handle any core-RCU processing required by a call_rcu() invocation. |
---|
2914 | 2858 | */ |
---|
2915 | | -static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, |
---|
2916 | | - struct rcu_head *head, unsigned long flags) |
---|
| 2859 | +static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, |
---|
| 2860 | + unsigned long flags) |
---|
2917 | 2861 | { |
---|
2918 | 2862 | /* |
---|
2919 | 2863 | * If called from an extended quiescent state, invoke the RCU |
---|
.. | .. |
---|
2928 | 2872 | |
---|
2929 | 2873 | /* |
---|
2930 | 2874 | * Force the grace period if too many callbacks or too long waiting. |
---|
2931 | | - * Enforce hysteresis, and don't invoke force_quiescent_state() |
---|
| 2875 | + * Enforce hysteresis, and don't invoke rcu_force_quiescent_state() |
---|
2932 | 2876 | * if some other CPU has recently done so. Also, don't bother |
---|
2933 | | - * invoking force_quiescent_state() if the newly enqueued callback |
---|
| 2877 | + * invoking rcu_force_quiescent_state() if the newly enqueued callback |
---|
2934 | 2878 | * is the only one waiting for a grace period to complete. |
---|
2935 | 2879 | */ |
---|
2936 | 2880 | if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > |
---|
2937 | 2881 | rdp->qlen_last_fqs_check + qhimark)) { |
---|
2938 | 2882 | |
---|
2939 | 2883 | /* Are we ignoring a completed grace period? */ |
---|
2940 | | - note_gp_changes(rsp, rdp); |
---|
| 2884 | + note_gp_changes(rdp); |
---|
2941 | 2885 | |
---|
2942 | 2886 | /* Start a new grace period if one not already started. */ |
---|
2943 | | - if (!rcu_gp_in_progress(rsp)) { |
---|
2944 | | - rcu_accelerate_cbs_unlocked(rsp, rdp->mynode, rdp); |
---|
| 2887 | + if (!rcu_gp_in_progress()) { |
---|
| 2888 | + rcu_accelerate_cbs_unlocked(rdp->mynode, rdp); |
---|
2945 | 2889 | } else { |
---|
2946 | 2890 | /* Give the grace period a kick. */ |
---|
2947 | | - rdp->blimit = LONG_MAX; |
---|
2948 | | - if (rsp->n_force_qs == rdp->n_force_qs_snap && |
---|
| 2891 | + rdp->blimit = DEFAULT_MAX_RCU_BLIMIT; |
---|
| 2892 | + if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap && |
---|
2949 | 2893 | rcu_segcblist_first_pend_cb(&rdp->cblist) != head) |
---|
2950 | | - force_quiescent_state(rsp); |
---|
2951 | | - rdp->n_force_qs_snap = rsp->n_force_qs; |
---|
| 2894 | + rcu_force_quiescent_state(); |
---|
| 2895 | + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); |
---|
2952 | 2896 | rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); |
---|
2953 | 2897 | } |
---|
2954 | 2898 | } |
---|
.. | .. |
---|
2962 | 2906 | } |
---|
2963 | 2907 | |
---|
2964 | 2908 | /* |
---|
2965 | | - * Helper function for call_rcu() and friends. The cpu argument will |
---|
2966 | | - * normally be -1, indicating "currently running CPU". It may specify |
---|
2967 | | - * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier() |
---|
2968 | | - * is expected to specify a CPU. |
---|
| 2909 | + * Check and if necessary update the leaf rcu_node structure's |
---|
| 2910 | + * ->cbovldmask bit corresponding to the current CPU based on that CPU's |
---|
| 2911 | + * number of queued RCU callbacks. The caller must hold the leaf rcu_node |
---|
| 2912 | + * structure's ->lock. |
---|
2969 | 2913 | */ |
---|
| 2914 | +static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp) |
---|
| 2915 | +{ |
---|
| 2916 | + raw_lockdep_assert_held_rcu_node(rnp); |
---|
| 2917 | + if (qovld_calc <= 0) |
---|
| 2918 | + return; // Early boot and wildcard value set. |
---|
| 2919 | + if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) |
---|
| 2920 | + WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask); |
---|
| 2921 | + else |
---|
| 2922 | + WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask); |
---|
| 2923 | +} |
---|
| 2924 | + |
---|
| 2925 | +/* |
---|
| 2926 | + * Check and if necessary update the leaf rcu_node structure's |
---|
| 2927 | + * ->cbovldmask bit corresponding to the current CPU based on that CPU's |
---|
| 2928 | + * number of queued RCU callbacks. No locks need be held, but the |
---|
| 2929 | + * caller must have disabled interrupts. |
---|
| 2930 | + * |
---|
| 2931 | + * Note that this function ignores the possibility that there are a lot |
---|
| 2932 | + * of callbacks all of which have already seen the end of their respective |
---|
| 2933 | + * grace periods. This omission is due to the need for no-CBs CPUs to |
---|
| 2934 | + * be holding ->nocb_lock to do this check, which is too heavy for a |
---|
| 2935 | + * common-case operation. |
---|
| 2936 | + */ |
---|
| 2937 | +static void check_cb_ovld(struct rcu_data *rdp) |
---|
| 2938 | +{ |
---|
| 2939 | + struct rcu_node *const rnp = rdp->mynode; |
---|
| 2940 | + |
---|
| 2941 | + if (qovld_calc <= 0 || |
---|
| 2942 | + ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) == |
---|
| 2943 | + !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask))) |
---|
| 2944 | + return; // Early boot wildcard value or already set correctly. |
---|
| 2945 | + raw_spin_lock_rcu_node(rnp); |
---|
| 2946 | + check_cb_ovld_locked(rdp, rnp); |
---|
| 2947 | + raw_spin_unlock_rcu_node(rnp); |
---|
| 2948 | +} |
---|
| 2949 | + |
---|
| 2950 | +/* Helper function for call_rcu() and friends. */ |
---|
2970 | 2951 | static void |
---|
2971 | | -__call_rcu(struct rcu_head *head, rcu_callback_t func, |
---|
2972 | | - struct rcu_state *rsp, int cpu, bool lazy) |
---|
| 2952 | +__call_rcu(struct rcu_head *head, rcu_callback_t func) |
---|
2973 | 2953 | { |
---|
2974 | 2954 | unsigned long flags; |
---|
2975 | 2955 | struct rcu_data *rdp; |
---|
| 2956 | + bool was_alldone; |
---|
2976 | 2957 | |
---|
2977 | 2958 | /* Misaligned rcu_head! */ |
---|
2978 | 2959 | WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); |
---|
.. | .. |
---|
2983 | 2964 | * Use rcu:rcu_callback trace event to find the previous |
---|
2984 | 2965 | * time callback was passed to __call_rcu(). |
---|
2985 | 2966 | */ |
---|
2986 | | - WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n", |
---|
| 2967 | + WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pS()!!!\n", |
---|
2987 | 2968 | head, head->func); |
---|
2988 | 2969 | WRITE_ONCE(head->func, rcu_leak_callback); |
---|
2989 | 2970 | return; |
---|
.. | .. |
---|
2991 | 2972 | head->func = func; |
---|
2992 | 2973 | head->next = NULL; |
---|
2993 | 2974 | local_irq_save(flags); |
---|
2994 | | - rdp = this_cpu_ptr(rsp->rda); |
---|
| 2975 | + kasan_record_aux_stack(head); |
---|
| 2976 | + rdp = this_cpu_ptr(&rcu_data); |
---|
2995 | 2977 | |
---|
2996 | 2978 | /* Add the callback to our list. */ |
---|
2997 | | - if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) { |
---|
2998 | | - int offline; |
---|
2999 | | - |
---|
3000 | | - if (cpu != -1) |
---|
3001 | | - rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3002 | | - if (likely(rdp->mynode)) { |
---|
3003 | | - /* Post-boot, so this should be for a no-CBs CPU. */ |
---|
3004 | | - offline = !__call_rcu_nocb(rdp, head, lazy, flags); |
---|
3005 | | - WARN_ON_ONCE(offline); |
---|
3006 | | - /* Offline CPU, _call_rcu() illegal, leak callback. */ |
---|
3007 | | - local_irq_restore(flags); |
---|
3008 | | - return; |
---|
3009 | | - } |
---|
3010 | | - /* |
---|
3011 | | - * Very early boot, before rcu_init(). Initialize if needed |
---|
3012 | | - * and then drop through to queue the callback. |
---|
3013 | | - */ |
---|
3014 | | - BUG_ON(cpu != -1); |
---|
| 2979 | + if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) { |
---|
| 2980 | + // This can trigger due to call_rcu() from offline CPU: |
---|
| 2981 | + WARN_ON_ONCE(rcu_scheduler_active != RCU_SCHEDULER_INACTIVE); |
---|
3015 | 2982 | WARN_ON_ONCE(!rcu_is_watching()); |
---|
| 2983 | + // Very early boot, before rcu_init(). Initialize if needed |
---|
| 2984 | + // and then drop through to queue the callback. |
---|
3016 | 2985 | if (rcu_segcblist_empty(&rdp->cblist)) |
---|
3017 | 2986 | rcu_segcblist_init(&rdp->cblist); |
---|
3018 | 2987 | } |
---|
3019 | | - rcu_segcblist_enqueue(&rdp->cblist, head, lazy); |
---|
3020 | | - if (!lazy) |
---|
3021 | | - rcu_idle_count_callbacks_posted(); |
---|
3022 | 2988 | |
---|
3023 | | - if (__is_kfree_rcu_offset((unsigned long)func)) |
---|
3024 | | - trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, |
---|
3025 | | - rcu_segcblist_n_lazy_cbs(&rdp->cblist), |
---|
| 2989 | + check_cb_ovld(rdp); |
---|
| 2990 | + if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) |
---|
| 2991 | + return; // Enqueued onto ->nocb_bypass, so just leave. |
---|
| 2992 | + // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. |
---|
| 2993 | + rcu_segcblist_enqueue(&rdp->cblist, head); |
---|
| 2994 | + if (__is_kvfree_rcu_offset((unsigned long)func)) |
---|
| 2995 | + trace_rcu_kvfree_callback(rcu_state.name, head, |
---|
| 2996 | + (unsigned long)func, |
---|
3026 | 2997 | rcu_segcblist_n_cbs(&rdp->cblist)); |
---|
3027 | 2998 | else |
---|
3028 | | - trace_rcu_callback(rsp->name, head, |
---|
3029 | | - rcu_segcblist_n_lazy_cbs(&rdp->cblist), |
---|
| 2999 | + trace_rcu_callback(rcu_state.name, head, |
---|
3030 | 3000 | rcu_segcblist_n_cbs(&rdp->cblist)); |
---|
3031 | 3001 | |
---|
3032 | 3002 | /* Go handle any RCU core processing required. */ |
---|
3033 | | - __call_rcu_core(rsp, rdp, head, flags); |
---|
| 3003 | + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && |
---|
| 3004 | + unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) { |
---|
| 3005 | + __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ |
---|
| 3006 | + } else { |
---|
| 3007 | + __call_rcu_core(rdp, head, flags); |
---|
| 3008 | + local_irq_restore(flags); |
---|
| 3009 | + } |
---|
| 3010 | +} |
---|
| 3011 | + |
---|
| 3012 | +/** |
---|
| 3013 | + * call_rcu() - Queue an RCU callback for invocation after a grace period. |
---|
| 3014 | + * @head: structure to be used for queueing the RCU updates. |
---|
| 3015 | + * @func: actual callback function to be invoked after the grace period |
---|
| 3016 | + * |
---|
| 3017 | + * The callback function will be invoked some time after a full grace |
---|
| 3018 | + * period elapses, in other words after all pre-existing RCU read-side |
---|
| 3019 | + * critical sections have completed. However, the callback function |
---|
| 3020 | + * might well execute concurrently with RCU read-side critical sections |
---|
| 3021 | + * that started after call_rcu() was invoked. RCU read-side critical |
---|
| 3022 | + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), and |
---|
| 3023 | + * may be nested. In addition, regions of code across which interrupts, |
---|
| 3024 | + * preemption, or softirqs have been disabled also serve as RCU read-side |
---|
| 3025 | + * critical sections. This includes hardware interrupt handlers, softirq |
---|
| 3026 | + * handlers, and NMI handlers. |
---|
| 3027 | + * |
---|
| 3028 | + * Note that all CPUs must agree that the grace period extended beyond |
---|
| 3029 | + * all pre-existing RCU read-side critical section. On systems with more |
---|
| 3030 | + * than one CPU, this means that when "func()" is invoked, each CPU is |
---|
| 3031 | + * guaranteed to have executed a full memory barrier since the end of its |
---|
| 3032 | + * last RCU read-side critical section whose beginning preceded the call |
---|
| 3033 | + * to call_rcu(). It also means that each CPU executing an RCU read-side |
---|
| 3034 | + * critical section that continues beyond the start of "func()" must have |
---|
| 3035 | + * executed a memory barrier after the call_rcu() but before the beginning |
---|
| 3036 | + * of that RCU read-side critical section. Note that these guarantees |
---|
| 3037 | + * include CPUs that are offline, idle, or executing in user mode, as |
---|
| 3038 | + * well as CPUs that are executing in the kernel. |
---|
| 3039 | + * |
---|
| 3040 | + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the |
---|
| 3041 | + * resulting RCU callback function "func()", then both CPU A and CPU B are |
---|
| 3042 | + * guaranteed to execute a full memory barrier during the time interval |
---|
| 3043 | + * between the call to call_rcu() and the invocation of "func()" -- even |
---|
| 3044 | + * if CPU A and CPU B are the same CPU (but again only if the system has |
---|
| 3045 | + * more than one CPU). |
---|
| 3046 | + */ |
---|
| 3047 | +void call_rcu(struct rcu_head *head, rcu_callback_t func) |
---|
| 3048 | +{ |
---|
| 3049 | + __call_rcu(head, func); |
---|
| 3050 | +} |
---|
| 3051 | +EXPORT_SYMBOL_GPL(call_rcu); |
---|
| 3052 | + |
---|
| 3053 | + |
---|
| 3054 | +/* Maximum number of jiffies to wait before draining a batch. */ |
---|
| 3055 | +#define KFREE_DRAIN_JIFFIES (HZ / 50) |
---|
| 3056 | +#define KFREE_N_BATCHES 2 |
---|
| 3057 | +#define FREE_N_CHANNELS 2 |
---|
| 3058 | + |
---|
| 3059 | +/** |
---|
| 3060 | + * struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers |
---|
| 3061 | + * @nr_records: Number of active pointers in the array |
---|
| 3062 | + * @next: Next bulk object in the block chain |
---|
| 3063 | + * @records: Array of the kvfree_rcu() pointers |
---|
| 3064 | + */ |
---|
| 3065 | +struct kvfree_rcu_bulk_data { |
---|
| 3066 | + unsigned long nr_records; |
---|
| 3067 | + struct kvfree_rcu_bulk_data *next; |
---|
| 3068 | + void *records[]; |
---|
| 3069 | +}; |
---|
| 3070 | + |
---|
| 3071 | +/* |
---|
| 3072 | + * This macro defines how many entries the "records" array |
---|
| 3073 | + * will contain. It is based on the fact that the size of |
---|
| 3074 | + * kvfree_rcu_bulk_data structure becomes exactly one page. |
---|
| 3075 | + */ |
---|
| 3076 | +#define KVFREE_BULK_MAX_ENTR \ |
---|
| 3077 | + ((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *)) |
---|
| 3078 | + |
---|
| 3079 | +/** |
---|
| 3080 | + * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests |
---|
| 3081 | + * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period |
---|
| 3082 | + * @head_free: List of kfree_rcu() objects waiting for a grace period |
---|
| 3083 | + * @bkvhead_free: Bulk-List of kvfree_rcu() objects waiting for a grace period |
---|
| 3084 | + * @krcp: Pointer to @kfree_rcu_cpu structure |
---|
| 3085 | + */ |
---|
| 3086 | + |
---|
| 3087 | +struct kfree_rcu_cpu_work { |
---|
| 3088 | + struct rcu_work rcu_work; |
---|
| 3089 | + struct rcu_head *head_free; |
---|
| 3090 | + struct kvfree_rcu_bulk_data *bkvhead_free[FREE_N_CHANNELS]; |
---|
| 3091 | + struct kfree_rcu_cpu *krcp; |
---|
| 3092 | +}; |
---|
| 3093 | + |
---|
| 3094 | +/** |
---|
| 3095 | + * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period |
---|
| 3096 | + * @head: List of kfree_rcu() objects not yet waiting for a grace period |
---|
| 3097 | + * @bkvhead: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period |
---|
| 3098 | + * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period |
---|
| 3099 | + * @lock: Synchronize access to this structure |
---|
| 3100 | + * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES |
---|
| 3101 | + * @monitor_todo: Tracks whether a @monitor_work delayed work is pending |
---|
| 3102 | + * @initialized: The @rcu_work fields have been initialized |
---|
| 3103 | + * @count: Number of objects for which GP not started |
---|
| 3104 | + * @bkvcache: |
---|
| 3105 | + * A simple cache list that contains objects for reuse purpose. |
---|
| 3106 | + * In order to save some per-cpu space the list is singular. |
---|
| 3107 | + * Even though it is lockless an access has to be protected by the |
---|
| 3108 | + * per-cpu lock. |
---|
| 3109 | + * @page_cache_work: A work to refill the cache when it is empty |
---|
| 3110 | + * @work_in_progress: Indicates that page_cache_work is running |
---|
| 3111 | + * @hrtimer: A hrtimer for scheduling a page_cache_work |
---|
| 3112 | + * @nr_bkv_objs: number of allocated objects at @bkvcache. |
---|
| 3113 | + * |
---|
| 3114 | + * This is a per-CPU structure. The reason that it is not included in |
---|
| 3115 | + * the rcu_data structure is to permit this code to be extracted from |
---|
| 3116 | + * the RCU files. Such extraction could allow further optimization of |
---|
| 3117 | + * the interactions with the slab allocators. |
---|
| 3118 | + */ |
---|
| 3119 | +struct kfree_rcu_cpu { |
---|
| 3120 | + struct rcu_head *head; |
---|
| 3121 | + struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS]; |
---|
| 3122 | + struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES]; |
---|
| 3123 | + raw_spinlock_t lock; |
---|
| 3124 | + struct delayed_work monitor_work; |
---|
| 3125 | + bool monitor_todo; |
---|
| 3126 | + bool initialized; |
---|
| 3127 | + int count; |
---|
| 3128 | + |
---|
| 3129 | + struct work_struct page_cache_work; |
---|
| 3130 | + atomic_t work_in_progress; |
---|
| 3131 | + struct hrtimer hrtimer; |
---|
| 3132 | + |
---|
| 3133 | + struct llist_head bkvcache; |
---|
| 3134 | + int nr_bkv_objs; |
---|
| 3135 | +}; |
---|
| 3136 | + |
---|
| 3137 | +static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = { |
---|
| 3138 | + .lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock), |
---|
| 3139 | +}; |
---|
| 3140 | + |
---|
| 3141 | +static __always_inline void |
---|
| 3142 | +debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data *bhead) |
---|
| 3143 | +{ |
---|
| 3144 | +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD |
---|
| 3145 | + int i; |
---|
| 3146 | + |
---|
| 3147 | + for (i = 0; i < bhead->nr_records; i++) |
---|
| 3148 | + debug_rcu_head_unqueue((struct rcu_head *)(bhead->records[i])); |
---|
| 3149 | +#endif |
---|
| 3150 | +} |
---|
| 3151 | + |
---|
| 3152 | +static inline struct kfree_rcu_cpu * |
---|
| 3153 | +krc_this_cpu_lock(unsigned long *flags) |
---|
| 3154 | +{ |
---|
| 3155 | + struct kfree_rcu_cpu *krcp; |
---|
| 3156 | + |
---|
| 3157 | + local_irq_save(*flags); // For safely calling this_cpu_ptr(). |
---|
| 3158 | + krcp = this_cpu_ptr(&krc); |
---|
| 3159 | + raw_spin_lock(&krcp->lock); |
---|
| 3160 | + |
---|
| 3161 | + return krcp; |
---|
| 3162 | +} |
---|
| 3163 | + |
---|
| 3164 | +static inline void |
---|
| 3165 | +krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags) |
---|
| 3166 | +{ |
---|
| 3167 | + raw_spin_unlock(&krcp->lock); |
---|
3034 | 3168 | local_irq_restore(flags); |
---|
3035 | 3169 | } |
---|
3036 | 3170 | |
---|
3037 | | -/** |
---|
3038 | | - * call_rcu_sched() - Queue an RCU for invocation after sched grace period. |
---|
3039 | | - * @head: structure to be used for queueing the RCU updates. |
---|
3040 | | - * @func: actual callback function to be invoked after the grace period |
---|
3041 | | - * |
---|
3042 | | - * The callback function will be invoked some time after a full grace |
---|
3043 | | - * period elapses, in other words after all currently executing RCU |
---|
3044 | | - * read-side critical sections have completed. call_rcu_sched() assumes |
---|
3045 | | - * that the read-side critical sections end on enabling of preemption |
---|
3046 | | - * or on voluntary preemption. |
---|
3047 | | - * RCU read-side critical sections are delimited by: |
---|
3048 | | - * |
---|
3049 | | - * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR |
---|
3050 | | - * - anything that disables preemption. |
---|
3051 | | - * |
---|
3052 | | - * These may be nested. |
---|
3053 | | - * |
---|
3054 | | - * See the description of call_rcu() for more detailed information on |
---|
3055 | | - * memory ordering guarantees. |
---|
3056 | | - */ |
---|
3057 | | -void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) |
---|
| 3171 | +static inline struct kvfree_rcu_bulk_data * |
---|
| 3172 | +get_cached_bnode(struct kfree_rcu_cpu *krcp) |
---|
3058 | 3173 | { |
---|
3059 | | - __call_rcu(head, func, &rcu_sched_state, -1, 0); |
---|
3060 | | -} |
---|
3061 | | -EXPORT_SYMBOL_GPL(call_rcu_sched); |
---|
| 3174 | + if (!krcp->nr_bkv_objs) |
---|
| 3175 | + return NULL; |
---|
3062 | 3176 | |
---|
3063 | | -/** |
---|
3064 | | - * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. |
---|
3065 | | - * @head: structure to be used for queueing the RCU updates. |
---|
3066 | | - * @func: actual callback function to be invoked after the grace period |
---|
3067 | | - * |
---|
3068 | | - * The callback function will be invoked some time after a full grace |
---|
3069 | | - * period elapses, in other words after all currently executing RCU |
---|
3070 | | - * read-side critical sections have completed. call_rcu_bh() assumes |
---|
3071 | | - * that the read-side critical sections end on completion of a softirq |
---|
3072 | | - * handler. This means that read-side critical sections in process |
---|
3073 | | - * context must not be interrupted by softirqs. This interface is to be |
---|
3074 | | - * used when most of the read-side critical sections are in softirq context. |
---|
3075 | | - * RCU read-side critical sections are delimited by: |
---|
3076 | | - * |
---|
3077 | | - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR |
---|
3078 | | - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. |
---|
3079 | | - * |
---|
3080 | | - * These may be nested. |
---|
3081 | | - * |
---|
3082 | | - * See the description of call_rcu() for more detailed information on |
---|
3083 | | - * memory ordering guarantees. |
---|
3084 | | - */ |
---|
3085 | | -void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) |
---|
3086 | | -{ |
---|
3087 | | - __call_rcu(head, func, &rcu_bh_state, -1, 0); |
---|
| 3177 | + krcp->nr_bkv_objs--; |
---|
| 3178 | + return (struct kvfree_rcu_bulk_data *) |
---|
| 3179 | + llist_del_first(&krcp->bkvcache); |
---|
3088 | 3180 | } |
---|
3089 | | -EXPORT_SYMBOL_GPL(call_rcu_bh); |
---|
| 3181 | + |
---|
| 3182 | +static inline bool |
---|
| 3183 | +put_cached_bnode(struct kfree_rcu_cpu *krcp, |
---|
| 3184 | + struct kvfree_rcu_bulk_data *bnode) |
---|
| 3185 | +{ |
---|
| 3186 | + // Check the limit. |
---|
| 3187 | + if (krcp->nr_bkv_objs >= rcu_min_cached_objs) |
---|
| 3188 | + return false; |
---|
| 3189 | + |
---|
| 3190 | + llist_add((struct llist_node *) bnode, &krcp->bkvcache); |
---|
| 3191 | + krcp->nr_bkv_objs++; |
---|
| 3192 | + return true; |
---|
| 3193 | + |
---|
| 3194 | +} |
---|
3090 | 3195 | |
---|
3091 | 3196 | /* |
---|
3092 | | - * Queue an RCU callback for lazy invocation after a grace period. |
---|
3093 | | - * This will likely be later named something like "call_rcu_lazy()", |
---|
3094 | | - * but this change will require some way of tagging the lazy RCU |
---|
3095 | | - * callbacks in the list of pending callbacks. Until then, this |
---|
3096 | | - * function may only be called from __kfree_rcu(). |
---|
| 3197 | + * This function is invoked in workqueue context after a grace period. |
---|
| 3198 | + * It frees all the objects queued on ->bhead_free or ->head_free. |
---|
3097 | 3199 | */ |
---|
3098 | | -void kfree_call_rcu(struct rcu_head *head, |
---|
3099 | | - rcu_callback_t func) |
---|
| 3200 | +static void kfree_rcu_work(struct work_struct *work) |
---|
3100 | 3201 | { |
---|
3101 | | - __call_rcu(head, func, rcu_state_p, -1, 1); |
---|
| 3202 | + unsigned long flags; |
---|
| 3203 | + struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS], *bnext; |
---|
| 3204 | + struct rcu_head *head, *next; |
---|
| 3205 | + struct kfree_rcu_cpu *krcp; |
---|
| 3206 | + struct kfree_rcu_cpu_work *krwp; |
---|
| 3207 | + int i, j; |
---|
| 3208 | + |
---|
| 3209 | + krwp = container_of(to_rcu_work(work), |
---|
| 3210 | + struct kfree_rcu_cpu_work, rcu_work); |
---|
| 3211 | + krcp = krwp->krcp; |
---|
| 3212 | + |
---|
| 3213 | + raw_spin_lock_irqsave(&krcp->lock, flags); |
---|
| 3214 | + // Channels 1 and 2. |
---|
| 3215 | + for (i = 0; i < FREE_N_CHANNELS; i++) { |
---|
| 3216 | + bkvhead[i] = krwp->bkvhead_free[i]; |
---|
| 3217 | + krwp->bkvhead_free[i] = NULL; |
---|
| 3218 | + } |
---|
| 3219 | + |
---|
| 3220 | + // Channel 3. |
---|
| 3221 | + head = krwp->head_free; |
---|
| 3222 | + krwp->head_free = NULL; |
---|
| 3223 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3224 | + |
---|
| 3225 | + // Handle two first channels. |
---|
| 3226 | + for (i = 0; i < FREE_N_CHANNELS; i++) { |
---|
| 3227 | + for (; bkvhead[i]; bkvhead[i] = bnext) { |
---|
| 3228 | + bnext = bkvhead[i]->next; |
---|
| 3229 | + debug_rcu_bhead_unqueue(bkvhead[i]); |
---|
| 3230 | + |
---|
| 3231 | + rcu_lock_acquire(&rcu_callback_map); |
---|
| 3232 | + if (i == 0) { // kmalloc() / kfree(). |
---|
| 3233 | + trace_rcu_invoke_kfree_bulk_callback( |
---|
| 3234 | + rcu_state.name, bkvhead[i]->nr_records, |
---|
| 3235 | + bkvhead[i]->records); |
---|
| 3236 | + |
---|
| 3237 | + kfree_bulk(bkvhead[i]->nr_records, |
---|
| 3238 | + bkvhead[i]->records); |
---|
| 3239 | + } else { // vmalloc() / vfree(). |
---|
| 3240 | + for (j = 0; j < bkvhead[i]->nr_records; j++) { |
---|
| 3241 | + trace_rcu_invoke_kvfree_callback( |
---|
| 3242 | + rcu_state.name, |
---|
| 3243 | + bkvhead[i]->records[j], 0); |
---|
| 3244 | + |
---|
| 3245 | + vfree(bkvhead[i]->records[j]); |
---|
| 3246 | + } |
---|
| 3247 | + } |
---|
| 3248 | + rcu_lock_release(&rcu_callback_map); |
---|
| 3249 | + |
---|
| 3250 | + raw_spin_lock_irqsave(&krcp->lock, flags); |
---|
| 3251 | + if (put_cached_bnode(krcp, bkvhead[i])) |
---|
| 3252 | + bkvhead[i] = NULL; |
---|
| 3253 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3254 | + |
---|
| 3255 | + if (bkvhead[i]) |
---|
| 3256 | + free_page((unsigned long) bkvhead[i]); |
---|
| 3257 | + |
---|
| 3258 | + cond_resched_tasks_rcu_qs(); |
---|
| 3259 | + } |
---|
| 3260 | + } |
---|
| 3261 | + |
---|
| 3262 | + /* |
---|
| 3263 | + * Emergency case only. It can happen under low memory |
---|
| 3264 | + * condition when an allocation gets failed, so the "bulk" |
---|
| 3265 | + * path can not be temporary maintained. |
---|
| 3266 | + */ |
---|
| 3267 | + for (; head; head = next) { |
---|
| 3268 | + unsigned long offset = (unsigned long)head->func; |
---|
| 3269 | + void *ptr = (void *)head - offset; |
---|
| 3270 | + |
---|
| 3271 | + next = head->next; |
---|
| 3272 | + debug_rcu_head_unqueue((struct rcu_head *)ptr); |
---|
| 3273 | + rcu_lock_acquire(&rcu_callback_map); |
---|
| 3274 | + trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset); |
---|
| 3275 | + |
---|
| 3276 | + if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset))) |
---|
| 3277 | + kvfree(ptr); |
---|
| 3278 | + |
---|
| 3279 | + rcu_lock_release(&rcu_callback_map); |
---|
| 3280 | + cond_resched_tasks_rcu_qs(); |
---|
| 3281 | + } |
---|
3102 | 3282 | } |
---|
3103 | | -EXPORT_SYMBOL_GPL(kfree_call_rcu); |
---|
| 3283 | + |
---|
| 3284 | +static bool |
---|
| 3285 | +need_offload_krc(struct kfree_rcu_cpu *krcp) |
---|
| 3286 | +{ |
---|
| 3287 | + int i; |
---|
| 3288 | + |
---|
| 3289 | + for (i = 0; i < FREE_N_CHANNELS; i++) |
---|
| 3290 | + if (krcp->bkvhead[i]) |
---|
| 3291 | + return true; |
---|
| 3292 | + |
---|
| 3293 | + return !!krcp->head; |
---|
| 3294 | +} |
---|
| 3295 | + |
---|
| 3296 | +static bool |
---|
| 3297 | +need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp) |
---|
| 3298 | +{ |
---|
| 3299 | + int i; |
---|
| 3300 | + |
---|
| 3301 | + for (i = 0; i < FREE_N_CHANNELS; i++) |
---|
| 3302 | + if (krwp->bkvhead_free[i]) |
---|
| 3303 | + return true; |
---|
| 3304 | + |
---|
| 3305 | + return !!krwp->head_free; |
---|
| 3306 | +} |
---|
3104 | 3307 | |
---|
3105 | 3308 | /* |
---|
3106 | | - * Because a context switch is a grace period for RCU-sched and RCU-bh, |
---|
3107 | | - * any blocking grace-period wait automatically implies a grace period |
---|
3108 | | - * if there is only one CPU online at any point time during execution |
---|
3109 | | - * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to |
---|
| 3309 | + * Schedule the kfree batch RCU work to run in workqueue context after a GP. |
---|
| 3310 | + * |
---|
| 3311 | + * This function is invoked by kfree_rcu_monitor() when the KFREE_DRAIN_JIFFIES |
---|
| 3312 | + * timeout has been reached. |
---|
| 3313 | + */ |
---|
| 3314 | +static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp) |
---|
| 3315 | +{ |
---|
| 3316 | + struct kfree_rcu_cpu_work *krwp; |
---|
| 3317 | + bool repeat = false; |
---|
| 3318 | + int i, j; |
---|
| 3319 | + |
---|
| 3320 | + lockdep_assert_held(&krcp->lock); |
---|
| 3321 | + |
---|
| 3322 | + for (i = 0; i < KFREE_N_BATCHES; i++) { |
---|
| 3323 | + krwp = &(krcp->krw_arr[i]); |
---|
| 3324 | + |
---|
| 3325 | + // Try to detach bulk_head or head and attach it, only when |
---|
| 3326 | + // all channels are free. Any channel is not free means at krwp |
---|
| 3327 | + // there is on-going rcu work to handle krwp's free business. |
---|
| 3328 | + if (need_wait_for_krwp_work(krwp)) |
---|
| 3329 | + continue; |
---|
| 3330 | + |
---|
| 3331 | + if (need_offload_krc(krcp)) { |
---|
| 3332 | + // Channel 1 corresponds to SLAB ptrs. |
---|
| 3333 | + // Channel 2 corresponds to vmalloc ptrs. |
---|
| 3334 | + for (j = 0; j < FREE_N_CHANNELS; j++) { |
---|
| 3335 | + if (!krwp->bkvhead_free[j]) { |
---|
| 3336 | + krwp->bkvhead_free[j] = krcp->bkvhead[j]; |
---|
| 3337 | + krcp->bkvhead[j] = NULL; |
---|
| 3338 | + } |
---|
| 3339 | + } |
---|
| 3340 | + |
---|
| 3341 | + // Channel 3 corresponds to emergency path. |
---|
| 3342 | + if (!krwp->head_free) { |
---|
| 3343 | + krwp->head_free = krcp->head; |
---|
| 3344 | + krcp->head = NULL; |
---|
| 3345 | + } |
---|
| 3346 | + |
---|
| 3347 | + WRITE_ONCE(krcp->count, 0); |
---|
| 3348 | + |
---|
| 3349 | + /* |
---|
| 3350 | + * One work is per one batch, so there are three |
---|
| 3351 | + * "free channels", the batch can handle. It can |
---|
| 3352 | + * be that the work is in the pending state when |
---|
| 3353 | + * channels have been detached following by each |
---|
| 3354 | + * other. |
---|
| 3355 | + */ |
---|
| 3356 | + queue_rcu_work(system_wq, &krwp->rcu_work); |
---|
| 3357 | + } |
---|
| 3358 | + } |
---|
| 3359 | + |
---|
| 3360 | + // Repeat if any "free" corresponding channel is still busy. |
---|
| 3361 | + if (need_offload_krc(krcp)) |
---|
| 3362 | + repeat = true; |
---|
| 3363 | + |
---|
| 3364 | + return !repeat; |
---|
| 3365 | +} |
---|
| 3366 | + |
---|
| 3367 | +static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp, |
---|
| 3368 | + unsigned long flags) |
---|
| 3369 | +{ |
---|
| 3370 | + // Attempt to start a new batch. |
---|
| 3371 | + krcp->monitor_todo = false; |
---|
| 3372 | + if (queue_kfree_rcu_work(krcp)) { |
---|
| 3373 | + // Success! Our job is done here. |
---|
| 3374 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3375 | + return; |
---|
| 3376 | + } |
---|
| 3377 | + |
---|
| 3378 | + // Previous RCU batch still in progress, try again later. |
---|
| 3379 | + krcp->monitor_todo = true; |
---|
| 3380 | + schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); |
---|
| 3381 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3382 | +} |
---|
| 3383 | + |
---|
| 3384 | +/* |
---|
| 3385 | + * This function is invoked after the KFREE_DRAIN_JIFFIES timeout. |
---|
| 3386 | + * It invokes kfree_rcu_drain_unlock() to attempt to start another batch. |
---|
| 3387 | + */ |
---|
| 3388 | +static void kfree_rcu_monitor(struct work_struct *work) |
---|
| 3389 | +{ |
---|
| 3390 | + unsigned long flags; |
---|
| 3391 | + struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu, |
---|
| 3392 | + monitor_work.work); |
---|
| 3393 | + |
---|
| 3394 | + raw_spin_lock_irqsave(&krcp->lock, flags); |
---|
| 3395 | + if (krcp->monitor_todo) |
---|
| 3396 | + kfree_rcu_drain_unlock(krcp, flags); |
---|
| 3397 | + else |
---|
| 3398 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3399 | +} |
---|
| 3400 | + |
---|
| 3401 | +static enum hrtimer_restart |
---|
| 3402 | +schedule_page_work_fn(struct hrtimer *t) |
---|
| 3403 | +{ |
---|
| 3404 | + struct kfree_rcu_cpu *krcp = |
---|
| 3405 | + container_of(t, struct kfree_rcu_cpu, hrtimer); |
---|
| 3406 | + |
---|
| 3407 | + queue_work(system_highpri_wq, &krcp->page_cache_work); |
---|
| 3408 | + return HRTIMER_NORESTART; |
---|
| 3409 | +} |
---|
| 3410 | + |
---|
| 3411 | +static void fill_page_cache_func(struct work_struct *work) |
---|
| 3412 | +{ |
---|
| 3413 | + struct kvfree_rcu_bulk_data *bnode; |
---|
| 3414 | + struct kfree_rcu_cpu *krcp = |
---|
| 3415 | + container_of(work, struct kfree_rcu_cpu, |
---|
| 3416 | + page_cache_work); |
---|
| 3417 | + unsigned long flags; |
---|
| 3418 | + bool pushed; |
---|
| 3419 | + int i; |
---|
| 3420 | + |
---|
| 3421 | + for (i = 0; i < rcu_min_cached_objs; i++) { |
---|
| 3422 | + bnode = (struct kvfree_rcu_bulk_data *) |
---|
| 3423 | + __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); |
---|
| 3424 | + |
---|
| 3425 | + if (!bnode) |
---|
| 3426 | + break; |
---|
| 3427 | + |
---|
| 3428 | + raw_spin_lock_irqsave(&krcp->lock, flags); |
---|
| 3429 | + pushed = put_cached_bnode(krcp, bnode); |
---|
| 3430 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3431 | + |
---|
| 3432 | + if (!pushed) { |
---|
| 3433 | + free_page((unsigned long) bnode); |
---|
| 3434 | + break; |
---|
| 3435 | + } |
---|
| 3436 | + } |
---|
| 3437 | + |
---|
| 3438 | + atomic_set(&krcp->work_in_progress, 0); |
---|
| 3439 | +} |
---|
| 3440 | + |
---|
| 3441 | +static void |
---|
| 3442 | +run_page_cache_worker(struct kfree_rcu_cpu *krcp) |
---|
| 3443 | +{ |
---|
| 3444 | + if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && |
---|
| 3445 | + !atomic_xchg(&krcp->work_in_progress, 1)) { |
---|
| 3446 | + hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, |
---|
| 3447 | + HRTIMER_MODE_REL); |
---|
| 3448 | + krcp->hrtimer.function = schedule_page_work_fn; |
---|
| 3449 | + hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL); |
---|
| 3450 | + } |
---|
| 3451 | +} |
---|
| 3452 | + |
---|
| 3453 | +static inline bool |
---|
| 3454 | +kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr) |
---|
| 3455 | +{ |
---|
| 3456 | + struct kvfree_rcu_bulk_data *bnode; |
---|
| 3457 | + int idx; |
---|
| 3458 | + |
---|
| 3459 | + if (unlikely(!krcp->initialized)) |
---|
| 3460 | + return false; |
---|
| 3461 | + |
---|
| 3462 | + lockdep_assert_held(&krcp->lock); |
---|
| 3463 | + idx = !!is_vmalloc_addr(ptr); |
---|
| 3464 | + |
---|
| 3465 | + /* Check if a new block is required. */ |
---|
| 3466 | + if (!krcp->bkvhead[idx] || |
---|
| 3467 | + krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) { |
---|
| 3468 | + bnode = get_cached_bnode(krcp); |
---|
| 3469 | + /* Switch to emergency path. */ |
---|
| 3470 | + if (!bnode) |
---|
| 3471 | + return false; |
---|
| 3472 | + |
---|
| 3473 | + /* Initialize the new block. */ |
---|
| 3474 | + bnode->nr_records = 0; |
---|
| 3475 | + bnode->next = krcp->bkvhead[idx]; |
---|
| 3476 | + |
---|
| 3477 | + /* Attach it to the head. */ |
---|
| 3478 | + krcp->bkvhead[idx] = bnode; |
---|
| 3479 | + } |
---|
| 3480 | + |
---|
| 3481 | + /* Finally insert. */ |
---|
| 3482 | + krcp->bkvhead[idx]->records |
---|
| 3483 | + [krcp->bkvhead[idx]->nr_records++] = ptr; |
---|
| 3484 | + |
---|
| 3485 | + return true; |
---|
| 3486 | +} |
---|
| 3487 | + |
---|
| 3488 | +/* |
---|
| 3489 | + * Queue a request for lazy invocation of appropriate free routine after a |
---|
| 3490 | + * grace period. Please note there are three paths are maintained, two are the |
---|
| 3491 | + * main ones that use array of pointers interface and third one is emergency |
---|
| 3492 | + * one, that is used only when the main path can not be maintained temporary, |
---|
| 3493 | + * due to memory pressure. |
---|
| 3494 | + * |
---|
| 3495 | + * Each kvfree_call_rcu() request is added to a batch. The batch will be drained |
---|
| 3496 | + * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will |
---|
| 3497 | + * be free'd in workqueue context. This allows us to: batch requests together to |
---|
| 3498 | + * reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load. |
---|
| 3499 | + */ |
---|
| 3500 | +void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) |
---|
| 3501 | +{ |
---|
| 3502 | + unsigned long flags; |
---|
| 3503 | + struct kfree_rcu_cpu *krcp; |
---|
| 3504 | + bool success; |
---|
| 3505 | + void *ptr; |
---|
| 3506 | + |
---|
| 3507 | + if (head) { |
---|
| 3508 | + ptr = (void *) head - (unsigned long) func; |
---|
| 3509 | + } else { |
---|
| 3510 | + /* |
---|
| 3511 | + * Please note there is a limitation for the head-less |
---|
| 3512 | + * variant, that is why there is a clear rule for such |
---|
| 3513 | + * objects: it can be used from might_sleep() context |
---|
| 3514 | + * only. For other places please embed an rcu_head to |
---|
| 3515 | + * your data. |
---|
| 3516 | + */ |
---|
| 3517 | + might_sleep(); |
---|
| 3518 | + ptr = (unsigned long *) func; |
---|
| 3519 | + } |
---|
| 3520 | + |
---|
| 3521 | + krcp = krc_this_cpu_lock(&flags); |
---|
| 3522 | + |
---|
| 3523 | + // Queue the object but don't yet schedule the batch. |
---|
| 3524 | + if (debug_rcu_head_queue(ptr)) { |
---|
| 3525 | + // Probable double kfree_rcu(), just leak. |
---|
| 3526 | + WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n", |
---|
| 3527 | + __func__, head); |
---|
| 3528 | + |
---|
| 3529 | + // Mark as success and leave. |
---|
| 3530 | + success = true; |
---|
| 3531 | + goto unlock_return; |
---|
| 3532 | + } |
---|
| 3533 | + |
---|
| 3534 | + success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr); |
---|
| 3535 | + if (!success) { |
---|
| 3536 | + run_page_cache_worker(krcp); |
---|
| 3537 | + |
---|
| 3538 | + if (head == NULL) |
---|
| 3539 | + // Inline if kvfree_rcu(one_arg) call. |
---|
| 3540 | + goto unlock_return; |
---|
| 3541 | + |
---|
| 3542 | + head->func = func; |
---|
| 3543 | + head->next = krcp->head; |
---|
| 3544 | + krcp->head = head; |
---|
| 3545 | + success = true; |
---|
| 3546 | + } |
---|
| 3547 | + |
---|
| 3548 | + WRITE_ONCE(krcp->count, krcp->count + 1); |
---|
| 3549 | + |
---|
| 3550 | + // Set timer to drain after KFREE_DRAIN_JIFFIES. |
---|
| 3551 | + if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && |
---|
| 3552 | + !krcp->monitor_todo) { |
---|
| 3553 | + krcp->monitor_todo = true; |
---|
| 3554 | + schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); |
---|
| 3555 | + } |
---|
| 3556 | + |
---|
| 3557 | +unlock_return: |
---|
| 3558 | + krc_this_cpu_unlock(krcp, flags); |
---|
| 3559 | + |
---|
| 3560 | + /* |
---|
| 3561 | + * Inline kvfree() after synchronize_rcu(). We can do |
---|
| 3562 | + * it from might_sleep() context only, so the current |
---|
| 3563 | + * CPU can pass the QS state. |
---|
| 3564 | + */ |
---|
| 3565 | + if (!success) { |
---|
| 3566 | + debug_rcu_head_unqueue((struct rcu_head *) ptr); |
---|
| 3567 | + synchronize_rcu(); |
---|
| 3568 | + kvfree(ptr); |
---|
| 3569 | + } |
---|
| 3570 | +} |
---|
| 3571 | +EXPORT_SYMBOL_GPL(kvfree_call_rcu); |
---|
| 3572 | + |
---|
| 3573 | +static unsigned long |
---|
| 3574 | +kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
---|
| 3575 | +{ |
---|
| 3576 | + int cpu; |
---|
| 3577 | + unsigned long count = 0; |
---|
| 3578 | + |
---|
| 3579 | + /* Snapshot count of all CPUs */ |
---|
| 3580 | + for_each_possible_cpu(cpu) { |
---|
| 3581 | + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); |
---|
| 3582 | + |
---|
| 3583 | + count += READ_ONCE(krcp->count); |
---|
| 3584 | + } |
---|
| 3585 | + |
---|
| 3586 | + return count; |
---|
| 3587 | +} |
---|
| 3588 | + |
---|
| 3589 | +static unsigned long |
---|
| 3590 | +kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
---|
| 3591 | +{ |
---|
| 3592 | + int cpu, freed = 0; |
---|
| 3593 | + unsigned long flags; |
---|
| 3594 | + |
---|
| 3595 | + for_each_possible_cpu(cpu) { |
---|
| 3596 | + int count; |
---|
| 3597 | + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); |
---|
| 3598 | + |
---|
| 3599 | + count = krcp->count; |
---|
| 3600 | + raw_spin_lock_irqsave(&krcp->lock, flags); |
---|
| 3601 | + if (krcp->monitor_todo) |
---|
| 3602 | + kfree_rcu_drain_unlock(krcp, flags); |
---|
| 3603 | + else |
---|
| 3604 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3605 | + |
---|
| 3606 | + sc->nr_to_scan -= count; |
---|
| 3607 | + freed += count; |
---|
| 3608 | + |
---|
| 3609 | + if (sc->nr_to_scan <= 0) |
---|
| 3610 | + break; |
---|
| 3611 | + } |
---|
| 3612 | + |
---|
| 3613 | + return freed == 0 ? SHRINK_STOP : freed; |
---|
| 3614 | +} |
---|
| 3615 | + |
---|
| 3616 | +static struct shrinker kfree_rcu_shrinker = { |
---|
| 3617 | + .count_objects = kfree_rcu_shrink_count, |
---|
| 3618 | + .scan_objects = kfree_rcu_shrink_scan, |
---|
| 3619 | + .batch = 0, |
---|
| 3620 | + .seeks = DEFAULT_SEEKS, |
---|
| 3621 | +}; |
---|
| 3622 | + |
---|
| 3623 | +void __init kfree_rcu_scheduler_running(void) |
---|
| 3624 | +{ |
---|
| 3625 | + int cpu; |
---|
| 3626 | + unsigned long flags; |
---|
| 3627 | + |
---|
| 3628 | + for_each_possible_cpu(cpu) { |
---|
| 3629 | + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); |
---|
| 3630 | + |
---|
| 3631 | + raw_spin_lock_irqsave(&krcp->lock, flags); |
---|
| 3632 | + if (!krcp->head || krcp->monitor_todo) { |
---|
| 3633 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3634 | + continue; |
---|
| 3635 | + } |
---|
| 3636 | + krcp->monitor_todo = true; |
---|
| 3637 | + schedule_delayed_work_on(cpu, &krcp->monitor_work, |
---|
| 3638 | + KFREE_DRAIN_JIFFIES); |
---|
| 3639 | + raw_spin_unlock_irqrestore(&krcp->lock, flags); |
---|
| 3640 | + } |
---|
| 3641 | +} |
---|
| 3642 | + |
---|
| 3643 | +/* |
---|
| 3644 | + * During early boot, any blocking grace-period wait automatically |
---|
| 3645 | + * implies a grace period. Later on, this is never the case for PREEMPTION. |
---|
| 3646 | + * |
---|
| 3647 | + * Howevr, because a context switch is a grace period for !PREEMPTION, any |
---|
| 3648 | + * blocking grace-period wait automatically implies a grace period if |
---|
| 3649 | + * there is only one CPU online at any point time during execution of |
---|
| 3650 | + * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to |
---|
3110 | 3651 | * occasionally incorrectly indicate that there are multiple CPUs online |
---|
3111 | | - * when there was in fact only one the whole time, as this just adds |
---|
3112 | | - * some overhead: RCU still operates correctly. |
---|
| 3652 | + * when there was in fact only one the whole time, as this just adds some |
---|
| 3653 | + * overhead: RCU still operates correctly. |
---|
3113 | 3654 | */ |
---|
3114 | 3655 | static int rcu_blocking_is_gp(void) |
---|
3115 | 3656 | { |
---|
3116 | 3657 | int ret; |
---|
3117 | 3658 | |
---|
| 3659 | + if (IS_ENABLED(CONFIG_PREEMPTION)) |
---|
| 3660 | + return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE; |
---|
3118 | 3661 | might_sleep(); /* Check for RCU read-side critical section. */ |
---|
3119 | 3662 | preempt_disable(); |
---|
3120 | 3663 | ret = num_online_cpus() <= 1; |
---|
.. | .. |
---|
3123 | 3666 | } |
---|
3124 | 3667 | |
---|
3125 | 3668 | /** |
---|
3126 | | - * synchronize_sched - wait until an rcu-sched grace period has elapsed. |
---|
| 3669 | + * synchronize_rcu - wait until a grace period has elapsed. |
---|
3127 | 3670 | * |
---|
3128 | | - * Control will return to the caller some time after a full rcu-sched |
---|
3129 | | - * grace period has elapsed, in other words after all currently executing |
---|
3130 | | - * rcu-sched read-side critical sections have completed. These read-side |
---|
3131 | | - * critical sections are delimited by rcu_read_lock_sched() and |
---|
3132 | | - * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), |
---|
3133 | | - * local_irq_disable(), and so on may be used in place of |
---|
3134 | | - * rcu_read_lock_sched(). |
---|
3135 | | - * |
---|
3136 | | - * This means that all preempt_disable code sequences, including NMI and |
---|
3137 | | - * non-threaded hardware-interrupt handlers, in progress on entry will |
---|
3138 | | - * have completed before this primitive returns. However, this does not |
---|
3139 | | - * guarantee that softirq handlers will have completed, since in some |
---|
3140 | | - * kernels, these handlers can run in process context, and can block. |
---|
| 3671 | + * Control will return to the caller some time after a full grace |
---|
| 3672 | + * period has elapsed, in other words after all currently executing RCU |
---|
| 3673 | + * read-side critical sections have completed. Note, however, that |
---|
| 3674 | + * upon return from synchronize_rcu(), the caller might well be executing |
---|
| 3675 | + * concurrently with new RCU read-side critical sections that began while |
---|
| 3676 | + * synchronize_rcu() was waiting. RCU read-side critical sections are |
---|
| 3677 | + * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. |
---|
| 3678 | + * In addition, regions of code across which interrupts, preemption, or |
---|
| 3679 | + * softirqs have been disabled also serve as RCU read-side critical |
---|
| 3680 | + * sections. This includes hardware interrupt handlers, softirq handlers, |
---|
| 3681 | + * and NMI handlers. |
---|
3141 | 3682 | * |
---|
3142 | 3683 | * Note that this guarantee implies further memory-ordering guarantees. |
---|
3143 | | - * On systems with more than one CPU, when synchronize_sched() returns, |
---|
3144 | | - * each CPU is guaranteed to have executed a full memory barrier since the |
---|
3145 | | - * end of its last RCU-sched read-side critical section whose beginning |
---|
3146 | | - * preceded the call to synchronize_sched(). In addition, each CPU having |
---|
| 3684 | + * On systems with more than one CPU, when synchronize_rcu() returns, |
---|
| 3685 | + * each CPU is guaranteed to have executed a full memory barrier since |
---|
| 3686 | + * the end of its last RCU read-side critical section whose beginning |
---|
| 3687 | + * preceded the call to synchronize_rcu(). In addition, each CPU having |
---|
3147 | 3688 | * an RCU read-side critical section that extends beyond the return from |
---|
3148 | | - * synchronize_sched() is guaranteed to have executed a full memory barrier |
---|
3149 | | - * after the beginning of synchronize_sched() and before the beginning of |
---|
| 3689 | + * synchronize_rcu() is guaranteed to have executed a full memory barrier |
---|
| 3690 | + * after the beginning of synchronize_rcu() and before the beginning of |
---|
3150 | 3691 | * that RCU read-side critical section. Note that these guarantees include |
---|
3151 | 3692 | * CPUs that are offline, idle, or executing in user mode, as well as CPUs |
---|
3152 | 3693 | * that are executing in the kernel. |
---|
3153 | 3694 | * |
---|
3154 | | - * Furthermore, if CPU A invoked synchronize_sched(), which returned |
---|
| 3695 | + * Furthermore, if CPU A invoked synchronize_rcu(), which returned |
---|
3155 | 3696 | * to its caller on CPU B, then both CPU A and CPU B are guaranteed |
---|
3156 | 3697 | * to have executed a full memory barrier during the execution of |
---|
3157 | | - * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but |
---|
| 3698 | + * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but |
---|
3158 | 3699 | * again only if the system has more than one CPU). |
---|
3159 | 3700 | */ |
---|
3160 | | -void synchronize_sched(void) |
---|
| 3701 | +void synchronize_rcu(void) |
---|
3161 | 3702 | { |
---|
3162 | 3703 | RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || |
---|
3163 | 3704 | lock_is_held(&rcu_lock_map) || |
---|
3164 | 3705 | lock_is_held(&rcu_sched_lock_map), |
---|
3165 | | - "Illegal synchronize_sched() in RCU-sched read-side critical section"); |
---|
| 3706 | + "Illegal synchronize_rcu() in RCU read-side critical section"); |
---|
3166 | 3707 | if (rcu_blocking_is_gp()) |
---|
3167 | 3708 | return; |
---|
3168 | 3709 | if (rcu_gp_is_expedited()) |
---|
3169 | | - synchronize_sched_expedited(); |
---|
| 3710 | + synchronize_rcu_expedited(); |
---|
3170 | 3711 | else |
---|
3171 | | - wait_rcu_gp(call_rcu_sched); |
---|
| 3712 | + wait_rcu_gp(call_rcu); |
---|
3172 | 3713 | } |
---|
3173 | | -EXPORT_SYMBOL_GPL(synchronize_sched); |
---|
3174 | | - |
---|
3175 | | -/** |
---|
3176 | | - * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. |
---|
3177 | | - * |
---|
3178 | | - * Control will return to the caller some time after a full rcu_bh grace |
---|
3179 | | - * period has elapsed, in other words after all currently executing rcu_bh |
---|
3180 | | - * read-side critical sections have completed. RCU read-side critical |
---|
3181 | | - * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), |
---|
3182 | | - * and may be nested. |
---|
3183 | | - * |
---|
3184 | | - * See the description of synchronize_sched() for more detailed information |
---|
3185 | | - * on memory ordering guarantees. |
---|
3186 | | - */ |
---|
3187 | | -void synchronize_rcu_bh(void) |
---|
3188 | | -{ |
---|
3189 | | - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || |
---|
3190 | | - lock_is_held(&rcu_lock_map) || |
---|
3191 | | - lock_is_held(&rcu_sched_lock_map), |
---|
3192 | | - "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); |
---|
3193 | | - if (rcu_blocking_is_gp()) |
---|
3194 | | - return; |
---|
3195 | | - if (rcu_gp_is_expedited()) |
---|
3196 | | - synchronize_rcu_bh_expedited(); |
---|
3197 | | - else |
---|
3198 | | - wait_rcu_gp(call_rcu_bh); |
---|
3199 | | -} |
---|
3200 | | -EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
---|
| 3714 | +EXPORT_SYMBOL_GPL(synchronize_rcu); |
---|
3201 | 3715 | |
---|
3202 | 3716 | /** |
---|
3203 | 3717 | * get_state_synchronize_rcu - Snapshot current RCU state |
---|
.. | .. |
---|
3213 | 3727 | * before the load from ->gp_seq. |
---|
3214 | 3728 | */ |
---|
3215 | 3729 | smp_mb(); /* ^^^ */ |
---|
3216 | | - return rcu_seq_snap(&rcu_state_p->gp_seq); |
---|
| 3730 | + return rcu_seq_snap(&rcu_state.gp_seq); |
---|
3217 | 3731 | } |
---|
3218 | 3732 | EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); |
---|
3219 | 3733 | |
---|
.. | .. |
---|
3233 | 3747 | */ |
---|
3234 | 3748 | void cond_synchronize_rcu(unsigned long oldstate) |
---|
3235 | 3749 | { |
---|
3236 | | - if (!rcu_seq_done(&rcu_state_p->gp_seq, oldstate)) |
---|
| 3750 | + if (!rcu_seq_done(&rcu_state.gp_seq, oldstate)) |
---|
3237 | 3751 | synchronize_rcu(); |
---|
3238 | 3752 | else |
---|
3239 | 3753 | smp_mb(); /* Ensure GP ends before subsequent accesses. */ |
---|
3240 | 3754 | } |
---|
3241 | 3755 | EXPORT_SYMBOL_GPL(cond_synchronize_rcu); |
---|
3242 | 3756 | |
---|
3243 | | -/** |
---|
3244 | | - * get_state_synchronize_sched - Snapshot current RCU-sched state |
---|
3245 | | - * |
---|
3246 | | - * Returns a cookie that is used by a later call to cond_synchronize_sched() |
---|
3247 | | - * to determine whether or not a full grace period has elapsed in the |
---|
3248 | | - * meantime. |
---|
3249 | | - */ |
---|
3250 | | -unsigned long get_state_synchronize_sched(void) |
---|
3251 | | -{ |
---|
3252 | | - /* |
---|
3253 | | - * Any prior manipulation of RCU-protected data must happen |
---|
3254 | | - * before the load from ->gp_seq. |
---|
3255 | | - */ |
---|
3256 | | - smp_mb(); /* ^^^ */ |
---|
3257 | | - return rcu_seq_snap(&rcu_sched_state.gp_seq); |
---|
3258 | | -} |
---|
3259 | | -EXPORT_SYMBOL_GPL(get_state_synchronize_sched); |
---|
3260 | | - |
---|
3261 | | -/** |
---|
3262 | | - * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period |
---|
3263 | | - * |
---|
3264 | | - * @oldstate: return value from earlier call to get_state_synchronize_sched() |
---|
3265 | | - * |
---|
3266 | | - * If a full RCU-sched grace period has elapsed since the earlier call to |
---|
3267 | | - * get_state_synchronize_sched(), just return. Otherwise, invoke |
---|
3268 | | - * synchronize_sched() to wait for a full grace period. |
---|
3269 | | - * |
---|
3270 | | - * Yes, this function does not take counter wrap into account. But |
---|
3271 | | - * counter wrap is harmless. If the counter wraps, we have waited for |
---|
3272 | | - * more than 2 billion grace periods (and way more on a 64-bit system!), |
---|
3273 | | - * so waiting for one additional grace period should be just fine. |
---|
3274 | | - */ |
---|
3275 | | -void cond_synchronize_sched(unsigned long oldstate) |
---|
3276 | | -{ |
---|
3277 | | - if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate)) |
---|
3278 | | - synchronize_sched(); |
---|
3279 | | - else |
---|
3280 | | - smp_mb(); /* Ensure GP ends before subsequent accesses. */ |
---|
3281 | | -} |
---|
3282 | | -EXPORT_SYMBOL_GPL(cond_synchronize_sched); |
---|
3283 | | - |
---|
3284 | 3757 | /* |
---|
3285 | | - * Check to see if there is any immediate RCU-related work to be done |
---|
3286 | | - * by the current CPU, for the specified type of RCU, returning 1 if so. |
---|
3287 | | - * The checks are in order of increasing expense: checks that can be |
---|
3288 | | - * carried out against CPU-local state are performed first. However, |
---|
3289 | | - * we must check for CPU stalls first, else we might not get a chance. |
---|
| 3758 | + * Check to see if there is any immediate RCU-related work to be done by |
---|
| 3759 | + * the current CPU, returning 1 if so and zero otherwise. The checks are |
---|
| 3760 | + * in order of increasing expense: checks that can be carried out against |
---|
| 3761 | + * CPU-local state are performed first. However, we must check for CPU |
---|
| 3762 | + * stalls first, else we might not get a chance. |
---|
3290 | 3763 | */ |
---|
3291 | | -static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) |
---|
| 3764 | +static int rcu_pending(int user) |
---|
3292 | 3765 | { |
---|
| 3766 | + bool gp_in_progress; |
---|
| 3767 | + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); |
---|
3293 | 3768 | struct rcu_node *rnp = rdp->mynode; |
---|
3294 | 3769 | |
---|
3295 | | - /* Check for CPU stalls, if enabled. */ |
---|
3296 | | - check_cpu_stall(rsp, rdp); |
---|
| 3770 | + lockdep_assert_irqs_disabled(); |
---|
3297 | 3771 | |
---|
3298 | | - /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */ |
---|
3299 | | - if (rcu_nohz_full_cpu(rsp)) |
---|
| 3772 | + /* Check for CPU stalls, if enabled. */ |
---|
| 3773 | + check_cpu_stall(rdp); |
---|
| 3774 | + |
---|
| 3775 | + /* Does this CPU need a deferred NOCB wakeup? */ |
---|
| 3776 | + if (rcu_nocb_need_deferred_wakeup(rdp)) |
---|
| 3777 | + return 1; |
---|
| 3778 | + |
---|
| 3779 | + /* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */ |
---|
| 3780 | + if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu()) |
---|
3300 | 3781 | return 0; |
---|
3301 | 3782 | |
---|
3302 | 3783 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
---|
3303 | | - if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) |
---|
| 3784 | + gp_in_progress = rcu_gp_in_progress(); |
---|
| 3785 | + if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress) |
---|
3304 | 3786 | return 1; |
---|
3305 | 3787 | |
---|
3306 | 3788 | /* Does this CPU have callbacks ready to invoke? */ |
---|
.. | .. |
---|
3308 | 3790 | return 1; |
---|
3309 | 3791 | |
---|
3310 | 3792 | /* Has RCU gone idle with this CPU needing another grace period? */ |
---|
3311 | | - if (!rcu_gp_in_progress(rsp) && |
---|
3312 | | - rcu_segcblist_is_enabled(&rdp->cblist) && |
---|
| 3793 | + if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) && |
---|
| 3794 | + (!IS_ENABLED(CONFIG_RCU_NOCB_CPU) || |
---|
| 3795 | + !rcu_segcblist_is_offloaded(&rdp->cblist)) && |
---|
3313 | 3796 | !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) |
---|
3314 | 3797 | return 1; |
---|
3315 | 3798 | |
---|
.. | .. |
---|
3318 | 3801 | unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */ |
---|
3319 | 3802 | return 1; |
---|
3320 | 3803 | |
---|
3321 | | - /* Does this CPU need a deferred NOCB wakeup? */ |
---|
3322 | | - if (rcu_nocb_need_deferred_wakeup(rdp)) |
---|
3323 | | - return 1; |
---|
3324 | | - |
---|
3325 | 3804 | /* nothing to do */ |
---|
3326 | 3805 | return 0; |
---|
3327 | 3806 | } |
---|
3328 | 3807 | |
---|
3329 | 3808 | /* |
---|
3330 | | - * Check to see if there is any immediate RCU-related work to be done |
---|
3331 | | - * by the current CPU, returning 1 if so. This function is part of the |
---|
3332 | | - * RCU implementation; it is -not- an exported member of the RCU API. |
---|
3333 | | - */ |
---|
3334 | | -static int rcu_pending(void) |
---|
3335 | | -{ |
---|
3336 | | - struct rcu_state *rsp; |
---|
3337 | | - |
---|
3338 | | - for_each_rcu_flavor(rsp) |
---|
3339 | | - if (__rcu_pending(rsp, this_cpu_ptr(rsp->rda))) |
---|
3340 | | - return 1; |
---|
3341 | | - return 0; |
---|
3342 | | -} |
---|
3343 | | - |
---|
3344 | | -/* |
---|
3345 | | - * Return true if the specified CPU has any callback. If all_lazy is |
---|
3346 | | - * non-NULL, store an indication of whether all callbacks are lazy. |
---|
3347 | | - * (If there are no callbacks, all of them are deemed to be lazy.) |
---|
3348 | | - */ |
---|
3349 | | -static bool rcu_cpu_has_callbacks(bool *all_lazy) |
---|
3350 | | -{ |
---|
3351 | | - bool al = true; |
---|
3352 | | - bool hc = false; |
---|
3353 | | - struct rcu_data *rdp; |
---|
3354 | | - struct rcu_state *rsp; |
---|
3355 | | - |
---|
3356 | | - for_each_rcu_flavor(rsp) { |
---|
3357 | | - rdp = this_cpu_ptr(rsp->rda); |
---|
3358 | | - if (rcu_segcblist_empty(&rdp->cblist)) |
---|
3359 | | - continue; |
---|
3360 | | - hc = true; |
---|
3361 | | - if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) { |
---|
3362 | | - al = false; |
---|
3363 | | - break; |
---|
3364 | | - } |
---|
3365 | | - } |
---|
3366 | | - if (all_lazy) |
---|
3367 | | - *all_lazy = al; |
---|
3368 | | - return hc; |
---|
3369 | | -} |
---|
3370 | | - |
---|
3371 | | -/* |
---|
3372 | | - * Helper function for _rcu_barrier() tracing. If tracing is disabled, |
---|
| 3809 | + * Helper function for rcu_barrier() tracing. If tracing is disabled, |
---|
3373 | 3810 | * the compiler is expected to optimize this away. |
---|
3374 | 3811 | */ |
---|
3375 | | -static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s, |
---|
3376 | | - int cpu, unsigned long done) |
---|
| 3812 | +static void rcu_barrier_trace(const char *s, int cpu, unsigned long done) |
---|
3377 | 3813 | { |
---|
3378 | | - trace_rcu_barrier(rsp->name, s, cpu, |
---|
3379 | | - atomic_read(&rsp->barrier_cpu_count), done); |
---|
| 3814 | + trace_rcu_barrier(rcu_state.name, s, cpu, |
---|
| 3815 | + atomic_read(&rcu_state.barrier_cpu_count), done); |
---|
3380 | 3816 | } |
---|
3381 | 3817 | |
---|
3382 | 3818 | /* |
---|
3383 | | - * RCU callback function for _rcu_barrier(). If we are last, wake |
---|
3384 | | - * up the task executing _rcu_barrier(). |
---|
| 3819 | + * RCU callback function for rcu_barrier(). If we are last, wake |
---|
| 3820 | + * up the task executing rcu_barrier(). |
---|
| 3821 | + * |
---|
| 3822 | + * Note that the value of rcu_state.barrier_sequence must be captured |
---|
| 3823 | + * before the atomic_dec_and_test(). Otherwise, if this CPU is not last, |
---|
| 3824 | + * other CPUs might count the value down to zero before this CPU gets |
---|
| 3825 | + * around to invoking rcu_barrier_trace(), which might result in bogus |
---|
| 3826 | + * data from the next instance of rcu_barrier(). |
---|
3385 | 3827 | */ |
---|
3386 | 3828 | static void rcu_barrier_callback(struct rcu_head *rhp) |
---|
3387 | 3829 | { |
---|
3388 | | - struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); |
---|
3389 | | - struct rcu_state *rsp = rdp->rsp; |
---|
| 3830 | + unsigned long __maybe_unused s = rcu_state.barrier_sequence; |
---|
3390 | 3831 | |
---|
3391 | | - if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { |
---|
3392 | | - _rcu_barrier_trace(rsp, TPS("LastCB"), -1, |
---|
3393 | | - rsp->barrier_sequence); |
---|
3394 | | - complete(&rsp->barrier_completion); |
---|
| 3832 | + if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) { |
---|
| 3833 | + rcu_barrier_trace(TPS("LastCB"), -1, s); |
---|
| 3834 | + complete(&rcu_state.barrier_completion); |
---|
3395 | 3835 | } else { |
---|
3396 | | - _rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence); |
---|
| 3836 | + rcu_barrier_trace(TPS("CB"), -1, s); |
---|
3397 | 3837 | } |
---|
3398 | 3838 | } |
---|
3399 | 3839 | |
---|
3400 | 3840 | /* |
---|
3401 | 3841 | * Called with preemption disabled, and from cross-cpu IRQ context. |
---|
3402 | 3842 | */ |
---|
3403 | | -static void rcu_barrier_func(void *type) |
---|
| 3843 | +static void rcu_barrier_func(void *cpu_in) |
---|
3404 | 3844 | { |
---|
3405 | | - struct rcu_state *rsp = type; |
---|
3406 | | - struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); |
---|
| 3845 | + uintptr_t cpu = (uintptr_t)cpu_in; |
---|
| 3846 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
3407 | 3847 | |
---|
3408 | | - _rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence); |
---|
| 3848 | + rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence); |
---|
3409 | 3849 | rdp->barrier_head.func = rcu_barrier_callback; |
---|
3410 | 3850 | debug_rcu_head_queue(&rdp->barrier_head); |
---|
3411 | | - if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { |
---|
3412 | | - atomic_inc(&rsp->barrier_cpu_count); |
---|
| 3851 | + rcu_nocb_lock(rdp); |
---|
| 3852 | + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); |
---|
| 3853 | + if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { |
---|
| 3854 | + atomic_inc(&rcu_state.barrier_cpu_count); |
---|
3413 | 3855 | } else { |
---|
3414 | 3856 | debug_rcu_head_unqueue(&rdp->barrier_head); |
---|
3415 | | - _rcu_barrier_trace(rsp, TPS("IRQNQ"), -1, |
---|
3416 | | - rsp->barrier_sequence); |
---|
| 3857 | + rcu_barrier_trace(TPS("IRQNQ"), -1, |
---|
| 3858 | + rcu_state.barrier_sequence); |
---|
3417 | 3859 | } |
---|
| 3860 | + rcu_nocb_unlock(rdp); |
---|
3418 | 3861 | } |
---|
3419 | 3862 | |
---|
3420 | | -/* |
---|
3421 | | - * Orchestrate the specified type of RCU barrier, waiting for all |
---|
3422 | | - * RCU callbacks of the specified type to complete. |
---|
| 3863 | +/** |
---|
| 3864 | + * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. |
---|
| 3865 | + * |
---|
| 3866 | + * Note that this primitive does not necessarily wait for an RCU grace period |
---|
| 3867 | + * to complete. For example, if there are no RCU callbacks queued anywhere |
---|
| 3868 | + * in the system, then rcu_barrier() is within its rights to return |
---|
| 3869 | + * immediately, without waiting for anything, much less an RCU grace period. |
---|
3423 | 3870 | */ |
---|
3424 | | -static void _rcu_barrier(struct rcu_state *rsp) |
---|
| 3871 | +void rcu_barrier(void) |
---|
3425 | 3872 | { |
---|
3426 | | - int cpu; |
---|
| 3873 | + uintptr_t cpu; |
---|
3427 | 3874 | struct rcu_data *rdp; |
---|
3428 | | - unsigned long s = rcu_seq_snap(&rsp->barrier_sequence); |
---|
| 3875 | + unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence); |
---|
3429 | 3876 | |
---|
3430 | | - _rcu_barrier_trace(rsp, TPS("Begin"), -1, s); |
---|
| 3877 | + rcu_barrier_trace(TPS("Begin"), -1, s); |
---|
3431 | 3878 | |
---|
3432 | 3879 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
---|
3433 | | - mutex_lock(&rsp->barrier_mutex); |
---|
| 3880 | + mutex_lock(&rcu_state.barrier_mutex); |
---|
3434 | 3881 | |
---|
3435 | 3882 | /* Did someone else do our work for us? */ |
---|
3436 | | - if (rcu_seq_done(&rsp->barrier_sequence, s)) { |
---|
3437 | | - _rcu_barrier_trace(rsp, TPS("EarlyExit"), -1, |
---|
3438 | | - rsp->barrier_sequence); |
---|
| 3883 | + if (rcu_seq_done(&rcu_state.barrier_sequence, s)) { |
---|
| 3884 | + rcu_barrier_trace(TPS("EarlyExit"), -1, |
---|
| 3885 | + rcu_state.barrier_sequence); |
---|
3439 | 3886 | smp_mb(); /* caller's subsequent code after above check. */ |
---|
3440 | | - mutex_unlock(&rsp->barrier_mutex); |
---|
| 3887 | + mutex_unlock(&rcu_state.barrier_mutex); |
---|
3441 | 3888 | return; |
---|
3442 | 3889 | } |
---|
3443 | 3890 | |
---|
3444 | 3891 | /* Mark the start of the barrier operation. */ |
---|
3445 | | - rcu_seq_start(&rsp->barrier_sequence); |
---|
3446 | | - _rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence); |
---|
| 3892 | + rcu_seq_start(&rcu_state.barrier_sequence); |
---|
| 3893 | + rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence); |
---|
3447 | 3894 | |
---|
3448 | 3895 | /* |
---|
3449 | | - * Initialize the count to one rather than to zero in order to |
---|
3450 | | - * avoid a too-soon return to zero in case of a short grace period |
---|
3451 | | - * (or preemption of this task). Exclude CPU-hotplug operations |
---|
3452 | | - * to ensure that no offline CPU has callbacks queued. |
---|
| 3896 | + * Initialize the count to two rather than to zero in order |
---|
| 3897 | + * to avoid a too-soon return to zero in case of an immediate |
---|
| 3898 | + * invocation of the just-enqueued callback (or preemption of |
---|
| 3899 | + * this task). Exclude CPU-hotplug operations to ensure that no |
---|
| 3900 | + * offline non-offloaded CPU has callbacks queued. |
---|
3453 | 3901 | */ |
---|
3454 | | - init_completion(&rsp->barrier_completion); |
---|
3455 | | - atomic_set(&rsp->barrier_cpu_count, 1); |
---|
| 3902 | + init_completion(&rcu_state.barrier_completion); |
---|
| 3903 | + atomic_set(&rcu_state.barrier_cpu_count, 2); |
---|
3456 | 3904 | get_online_cpus(); |
---|
3457 | 3905 | |
---|
3458 | 3906 | /* |
---|
.. | .. |
---|
3461 | 3909 | * corresponding CPU's preceding callbacks have been invoked. |
---|
3462 | 3910 | */ |
---|
3463 | 3911 | for_each_possible_cpu(cpu) { |
---|
3464 | | - if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu)) |
---|
| 3912 | + rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 3913 | + if (cpu_is_offline(cpu) && |
---|
| 3914 | + !rcu_segcblist_is_offloaded(&rdp->cblist)) |
---|
3465 | 3915 | continue; |
---|
3466 | | - rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3467 | | - if (rcu_is_nocb_cpu(cpu)) { |
---|
3468 | | - if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { |
---|
3469 | | - _rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu, |
---|
3470 | | - rsp->barrier_sequence); |
---|
3471 | | - } else { |
---|
3472 | | - _rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu, |
---|
3473 | | - rsp->barrier_sequence); |
---|
3474 | | - smp_mb__before_atomic(); |
---|
3475 | | - atomic_inc(&rsp->barrier_cpu_count); |
---|
3476 | | - __call_rcu(&rdp->barrier_head, |
---|
3477 | | - rcu_barrier_callback, rsp, cpu, 0); |
---|
3478 | | - } |
---|
3479 | | - } else if (rcu_segcblist_n_cbs(&rdp->cblist)) { |
---|
3480 | | - _rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu, |
---|
3481 | | - rsp->barrier_sequence); |
---|
3482 | | - smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); |
---|
| 3916 | + if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) { |
---|
| 3917 | + rcu_barrier_trace(TPS("OnlineQ"), cpu, |
---|
| 3918 | + rcu_state.barrier_sequence); |
---|
| 3919 | + smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1); |
---|
| 3920 | + } else if (rcu_segcblist_n_cbs(&rdp->cblist) && |
---|
| 3921 | + cpu_is_offline(cpu)) { |
---|
| 3922 | + rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, |
---|
| 3923 | + rcu_state.barrier_sequence); |
---|
| 3924 | + local_irq_disable(); |
---|
| 3925 | + rcu_barrier_func((void *)cpu); |
---|
| 3926 | + local_irq_enable(); |
---|
| 3927 | + } else if (cpu_is_offline(cpu)) { |
---|
| 3928 | + rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu, |
---|
| 3929 | + rcu_state.barrier_sequence); |
---|
3483 | 3930 | } else { |
---|
3484 | | - _rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu, |
---|
3485 | | - rsp->barrier_sequence); |
---|
| 3931 | + rcu_barrier_trace(TPS("OnlineNQ"), cpu, |
---|
| 3932 | + rcu_state.barrier_sequence); |
---|
3486 | 3933 | } |
---|
3487 | 3934 | } |
---|
3488 | 3935 | put_online_cpus(); |
---|
.. | .. |
---|
3491 | 3938 | * Now that we have an rcu_barrier_callback() callback on each |
---|
3492 | 3939 | * CPU, and thus each counted, remove the initial count. |
---|
3493 | 3940 | */ |
---|
3494 | | - if (atomic_dec_and_test(&rsp->barrier_cpu_count)) |
---|
3495 | | - complete(&rsp->barrier_completion); |
---|
| 3941 | + if (atomic_sub_and_test(2, &rcu_state.barrier_cpu_count)) |
---|
| 3942 | + complete(&rcu_state.barrier_completion); |
---|
3496 | 3943 | |
---|
3497 | 3944 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ |
---|
3498 | | - wait_for_completion(&rsp->barrier_completion); |
---|
| 3945 | + wait_for_completion(&rcu_state.barrier_completion); |
---|
3499 | 3946 | |
---|
3500 | 3947 | /* Mark the end of the barrier operation. */ |
---|
3501 | | - _rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence); |
---|
3502 | | - rcu_seq_end(&rsp->barrier_sequence); |
---|
| 3948 | + rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence); |
---|
| 3949 | + rcu_seq_end(&rcu_state.barrier_sequence); |
---|
3503 | 3950 | |
---|
3504 | 3951 | /* Other rcu_barrier() invocations can now safely proceed. */ |
---|
3505 | | - mutex_unlock(&rsp->barrier_mutex); |
---|
| 3952 | + mutex_unlock(&rcu_state.barrier_mutex); |
---|
3506 | 3953 | } |
---|
3507 | | - |
---|
3508 | | -/** |
---|
3509 | | - * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. |
---|
3510 | | - */ |
---|
3511 | | -void rcu_barrier_bh(void) |
---|
3512 | | -{ |
---|
3513 | | - _rcu_barrier(&rcu_bh_state); |
---|
3514 | | -} |
---|
3515 | | -EXPORT_SYMBOL_GPL(rcu_barrier_bh); |
---|
3516 | | - |
---|
3517 | | -/** |
---|
3518 | | - * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. |
---|
3519 | | - */ |
---|
3520 | | -void rcu_barrier_sched(void) |
---|
3521 | | -{ |
---|
3522 | | - _rcu_barrier(&rcu_sched_state); |
---|
3523 | | -} |
---|
3524 | | -EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
---|
| 3954 | +EXPORT_SYMBOL_GPL(rcu_barrier); |
---|
3525 | 3955 | |
---|
3526 | 3956 | /* |
---|
3527 | 3957 | * Propagate ->qsinitmask bits up the rcu_node tree to account for the |
---|
.. | .. |
---|
3555 | 3985 | * Do boot-time initialization of a CPU's per-CPU RCU data. |
---|
3556 | 3986 | */ |
---|
3557 | 3987 | static void __init |
---|
3558 | | -rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) |
---|
| 3988 | +rcu_boot_init_percpu_data(int cpu) |
---|
3559 | 3989 | { |
---|
3560 | | - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
| 3990 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
3561 | 3991 | |
---|
3562 | 3992 | /* Set up local state, ensuring consistent view of global state. */ |
---|
3563 | 3993 | rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); |
---|
3564 | | - rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
---|
3565 | | - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); |
---|
3566 | | - WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); |
---|
3567 | | - rdp->rcu_ofl_gp_seq = rsp->gp_seq; |
---|
| 3994 | + INIT_WORK(&rdp->strict_work, strict_work_handler); |
---|
| 3995 | + WARN_ON_ONCE(rdp->dynticks_nesting != 1); |
---|
| 3996 | + WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); |
---|
| 3997 | + rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; |
---|
3568 | 3998 | rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; |
---|
3569 | | - rdp->rcu_onl_gp_seq = rsp->gp_seq; |
---|
| 3999 | + rdp->rcu_onl_gp_seq = rcu_state.gp_seq; |
---|
3570 | 4000 | rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; |
---|
3571 | 4001 | rdp->cpu = cpu; |
---|
3572 | | - rdp->rsp = rsp; |
---|
3573 | 4002 | rcu_boot_init_nocb_percpu_data(rdp); |
---|
3574 | 4003 | } |
---|
3575 | 4004 | |
---|
3576 | 4005 | /* |
---|
3577 | | - * Initialize a CPU's per-CPU RCU data. Note that only one online or |
---|
| 4006 | + * Invoked early in the CPU-online process, when pretty much all services |
---|
| 4007 | + * are available. The incoming CPU is not present. |
---|
| 4008 | + * |
---|
| 4009 | + * Initializes a CPU's per-CPU RCU data. Note that only one online or |
---|
3578 | 4010 | * offline event can be happening at a given time. Note also that we can |
---|
3579 | 4011 | * accept some slop in the rsp->gp_seq access due to the fact that this |
---|
3580 | | - * CPU cannot possibly have any RCU callbacks in flight yet. |
---|
| 4012 | + * CPU cannot possibly have any non-offloaded RCU callbacks in flight yet. |
---|
| 4013 | + * And any offloaded callbacks are being numbered elsewhere. |
---|
3581 | 4014 | */ |
---|
3582 | | -static void |
---|
3583 | | -rcu_init_percpu_data(int cpu, struct rcu_state *rsp) |
---|
| 4015 | +int rcutree_prepare_cpu(unsigned int cpu) |
---|
3584 | 4016 | { |
---|
3585 | 4017 | unsigned long flags; |
---|
3586 | | - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3587 | | - struct rcu_node *rnp = rcu_get_root(rsp); |
---|
| 4018 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 4019 | + struct rcu_node *rnp = rcu_get_root(); |
---|
3588 | 4020 | |
---|
3589 | 4021 | /* Set up local state, ensuring consistent view of global state. */ |
---|
3590 | 4022 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
3591 | 4023 | rdp->qlen_last_fqs_check = 0; |
---|
3592 | | - rdp->n_force_qs_snap = rsp->n_force_qs; |
---|
| 4024 | + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); |
---|
3593 | 4025 | rdp->blimit = blimit; |
---|
3594 | 4026 | if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ |
---|
3595 | | - !init_nocb_callback_list(rdp)) |
---|
| 4027 | + !rcu_segcblist_is_offloaded(&rdp->cblist)) |
---|
3596 | 4028 | rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ |
---|
3597 | | - rdp->dynticks->dynticks_nesting = 1; /* CPU not up, no tearing. */ |
---|
| 4029 | + rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */ |
---|
3598 | 4030 | rcu_dynticks_eqs_online(); |
---|
3599 | 4031 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
---|
3600 | 4032 | |
---|
.. | .. |
---|
3606 | 4038 | rnp = rdp->mynode; |
---|
3607 | 4039 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ |
---|
3608 | 4040 | rdp->beenonline = true; /* We have now been online. */ |
---|
3609 | | - rdp->gp_seq = rnp->gp_seq; |
---|
3610 | | - rdp->gp_seq_needed = rnp->gp_seq; |
---|
| 4041 | + rdp->gp_seq = READ_ONCE(rnp->gp_seq); |
---|
| 4042 | + rdp->gp_seq_needed = rdp->gp_seq; |
---|
3611 | 4043 | rdp->cpu_no_qs.b.norm = true; |
---|
3612 | | - rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); |
---|
3613 | 4044 | rdp->core_needs_qs = false; |
---|
3614 | 4045 | rdp->rcu_iw_pending = false; |
---|
3615 | | - rdp->rcu_iw_gp_seq = rnp->gp_seq - 1; |
---|
3616 | | - trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuonl")); |
---|
| 4046 | + rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; |
---|
| 4047 | + trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); |
---|
3617 | 4048 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3618 | | -} |
---|
3619 | | - |
---|
3620 | | -/* |
---|
3621 | | - * Invoked early in the CPU-online process, when pretty much all |
---|
3622 | | - * services are available. The incoming CPU is not present. |
---|
3623 | | - */ |
---|
3624 | | -int rcutree_prepare_cpu(unsigned int cpu) |
---|
3625 | | -{ |
---|
3626 | | - struct rcu_state *rsp; |
---|
3627 | | - |
---|
3628 | | - for_each_rcu_flavor(rsp) |
---|
3629 | | - rcu_init_percpu_data(cpu, rsp); |
---|
3630 | | - |
---|
3631 | 4049 | rcu_prepare_kthreads(cpu); |
---|
3632 | | - rcu_spawn_all_nocb_kthreads(cpu); |
---|
| 4050 | + rcu_spawn_cpu_nocb_kthread(cpu); |
---|
3633 | 4051 | |
---|
3634 | 4052 | return 0; |
---|
3635 | 4053 | } |
---|
.. | .. |
---|
3639 | 4057 | */ |
---|
3640 | 4058 | static void rcutree_affinity_setting(unsigned int cpu, int outgoing) |
---|
3641 | 4059 | { |
---|
3642 | | - struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); |
---|
| 4060 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
3643 | 4061 | |
---|
3644 | 4062 | rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); |
---|
3645 | 4063 | } |
---|
.. | .. |
---|
3653 | 4071 | unsigned long flags; |
---|
3654 | 4072 | struct rcu_data *rdp; |
---|
3655 | 4073 | struct rcu_node *rnp; |
---|
3656 | | - struct rcu_state *rsp; |
---|
3657 | 4074 | |
---|
3658 | | - for_each_rcu_flavor(rsp) { |
---|
3659 | | - rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3660 | | - rnp = rdp->mynode; |
---|
3661 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
3662 | | - rnp->ffmask |= rdp->grpmask; |
---|
3663 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3664 | | - } |
---|
3665 | | - if (IS_ENABLED(CONFIG_TREE_SRCU)) |
---|
3666 | | - srcu_online_cpu(cpu); |
---|
| 4075 | + rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 4076 | + rnp = rdp->mynode; |
---|
| 4077 | + raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
| 4078 | + rnp->ffmask |= rdp->grpmask; |
---|
| 4079 | + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3667 | 4080 | if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) |
---|
3668 | 4081 | return 0; /* Too early in boot for scheduler work. */ |
---|
3669 | 4082 | sync_sched_exp_online_cleanup(cpu); |
---|
3670 | 4083 | rcutree_affinity_setting(cpu, -1); |
---|
| 4084 | + |
---|
| 4085 | + // Stop-machine done, so allow nohz_full to disable tick. |
---|
| 4086 | + tick_dep_clear(TICK_DEP_BIT_RCU); |
---|
3671 | 4087 | return 0; |
---|
3672 | 4088 | } |
---|
3673 | 4089 | |
---|
.. | .. |
---|
3680 | 4096 | unsigned long flags; |
---|
3681 | 4097 | struct rcu_data *rdp; |
---|
3682 | 4098 | struct rcu_node *rnp; |
---|
3683 | | - struct rcu_state *rsp; |
---|
3684 | 4099 | |
---|
3685 | | - for_each_rcu_flavor(rsp) { |
---|
3686 | | - rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3687 | | - rnp = rdp->mynode; |
---|
3688 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
3689 | | - rnp->ffmask &= ~rdp->grpmask; |
---|
3690 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3691 | | - } |
---|
| 4100 | + rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 4101 | + rnp = rdp->mynode; |
---|
| 4102 | + raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
| 4103 | + rnp->ffmask &= ~rdp->grpmask; |
---|
| 4104 | + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3692 | 4105 | |
---|
3693 | 4106 | rcutree_affinity_setting(cpu, cpu); |
---|
3694 | | - if (IS_ENABLED(CONFIG_TREE_SRCU)) |
---|
3695 | | - srcu_offline_cpu(cpu); |
---|
| 4107 | + |
---|
| 4108 | + // nohz_full CPUs need the tick for stop-machine to work quickly |
---|
| 4109 | + tick_dep_set(TICK_DEP_BIT_RCU); |
---|
3696 | 4110 | return 0; |
---|
3697 | 4111 | } |
---|
3698 | | - |
---|
3699 | | -/* |
---|
3700 | | - * Near the end of the offline process. We do only tracing here. |
---|
3701 | | - */ |
---|
3702 | | -int rcutree_dying_cpu(unsigned int cpu) |
---|
3703 | | -{ |
---|
3704 | | - struct rcu_state *rsp; |
---|
3705 | | - |
---|
3706 | | - for_each_rcu_flavor(rsp) |
---|
3707 | | - rcu_cleanup_dying_cpu(rsp); |
---|
3708 | | - return 0; |
---|
3709 | | -} |
---|
3710 | | - |
---|
3711 | | -/* |
---|
3712 | | - * The outgoing CPU is gone and we are running elsewhere. |
---|
3713 | | - */ |
---|
3714 | | -int rcutree_dead_cpu(unsigned int cpu) |
---|
3715 | | -{ |
---|
3716 | | - struct rcu_state *rsp; |
---|
3717 | | - |
---|
3718 | | - for_each_rcu_flavor(rsp) { |
---|
3719 | | - rcu_cleanup_dead_cpu(cpu, rsp); |
---|
3720 | | - do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu)); |
---|
3721 | | - } |
---|
3722 | | - return 0; |
---|
3723 | | -} |
---|
3724 | | - |
---|
3725 | | -static DEFINE_PER_CPU(int, rcu_cpu_started); |
---|
3726 | 4112 | |
---|
3727 | 4113 | /* |
---|
3728 | 4114 | * Mark the specified CPU as being online so that subsequent grace periods |
---|
.. | .. |
---|
3739 | 4125 | { |
---|
3740 | 4126 | unsigned long flags; |
---|
3741 | 4127 | unsigned long mask; |
---|
3742 | | - int nbits; |
---|
3743 | | - unsigned long oldmask; |
---|
3744 | 4128 | struct rcu_data *rdp; |
---|
3745 | 4129 | struct rcu_node *rnp; |
---|
3746 | | - struct rcu_state *rsp; |
---|
| 4130 | + bool newcpu; |
---|
3747 | 4131 | |
---|
3748 | | - if (per_cpu(rcu_cpu_started, cpu)) |
---|
| 4132 | + rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 4133 | + if (rdp->cpu_started) |
---|
3749 | 4134 | return; |
---|
| 4135 | + rdp->cpu_started = true; |
---|
3750 | 4136 | |
---|
3751 | | - per_cpu(rcu_cpu_started, cpu) = 1; |
---|
3752 | | - |
---|
3753 | | - for_each_rcu_flavor(rsp) { |
---|
3754 | | - rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3755 | | - rnp = rdp->mynode; |
---|
3756 | | - mask = rdp->grpmask; |
---|
3757 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
3758 | | - rnp->qsmaskinitnext |= mask; |
---|
3759 | | - oldmask = rnp->expmaskinitnext; |
---|
3760 | | - rnp->expmaskinitnext |= mask; |
---|
3761 | | - oldmask ^= rnp->expmaskinitnext; |
---|
3762 | | - nbits = bitmap_weight(&oldmask, BITS_PER_LONG); |
---|
3763 | | - /* Allow lockless access for expedited grace periods. */ |
---|
3764 | | - smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */ |
---|
3765 | | - rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */ |
---|
3766 | | - rdp->rcu_onl_gp_seq = READ_ONCE(rsp->gp_seq); |
---|
3767 | | - rdp->rcu_onl_gp_flags = READ_ONCE(rsp->gp_flags); |
---|
3768 | | - if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */ |
---|
3769 | | - /* Report QS -after- changing ->qsmaskinitnext! */ |
---|
3770 | | - rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); |
---|
3771 | | - } else { |
---|
3772 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3773 | | - } |
---|
3774 | | - } |
---|
3775 | | - smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ |
---|
3776 | | -} |
---|
3777 | | - |
---|
3778 | | -#ifdef CONFIG_HOTPLUG_CPU |
---|
3779 | | -/* |
---|
3780 | | - * The CPU is exiting the idle loop into the arch_cpu_idle_dead() |
---|
3781 | | - * function. We now remove it from the rcu_node tree's ->qsmaskinitnext |
---|
3782 | | - * bit masks. |
---|
3783 | | - */ |
---|
3784 | | -static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) |
---|
3785 | | -{ |
---|
3786 | | - unsigned long flags; |
---|
3787 | | - unsigned long mask; |
---|
3788 | | - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3789 | | - struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ |
---|
3790 | | - |
---|
3791 | | - /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ |
---|
| 4137 | + rnp = rdp->mynode; |
---|
3792 | 4138 | mask = rdp->grpmask; |
---|
3793 | | - spin_lock(&rsp->ofl_lock); |
---|
3794 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ |
---|
3795 | | - rdp->rcu_ofl_gp_seq = READ_ONCE(rsp->gp_seq); |
---|
3796 | | - rdp->rcu_ofl_gp_flags = READ_ONCE(rsp->gp_flags); |
---|
3797 | | - if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */ |
---|
3798 | | - /* Report quiescent state -before- changing ->qsmaskinitnext! */ |
---|
3799 | | - rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); |
---|
3800 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
| 4139 | + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); |
---|
| 4140 | + WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); |
---|
| 4141 | + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). |
---|
| 4142 | + raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
| 4143 | + WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); |
---|
| 4144 | + newcpu = !(rnp->expmaskinitnext & mask); |
---|
| 4145 | + rnp->expmaskinitnext |= mask; |
---|
| 4146 | + /* Allow lockless access for expedited grace periods. */ |
---|
| 4147 | + smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + newcpu); /* ^^^ */ |
---|
| 4148 | + ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus); |
---|
| 4149 | + rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */ |
---|
| 4150 | + rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq); |
---|
| 4151 | + rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags); |
---|
| 4152 | + if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */ |
---|
| 4153 | + rcu_disable_urgency_upon_qs(rdp); |
---|
| 4154 | + /* Report QS -after- changing ->qsmaskinitnext! */ |
---|
| 4155 | + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); |
---|
| 4156 | + } else { |
---|
| 4157 | + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3801 | 4158 | } |
---|
3802 | | - rnp->qsmaskinitnext &= ~mask; |
---|
3803 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3804 | | - spin_unlock(&rsp->ofl_lock); |
---|
| 4159 | + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). |
---|
| 4160 | + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); |
---|
| 4161 | + WARN_ON_ONCE(rnp->ofl_seq & 0x1); |
---|
| 4162 | + smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ |
---|
3805 | 4163 | } |
---|
3806 | 4164 | |
---|
3807 | 4165 | /* |
---|
3808 | 4166 | * The outgoing function has no further need of RCU, so remove it from |
---|
3809 | | - * the list of CPUs that RCU must track. |
---|
| 4167 | + * the rcu_node tree's ->qsmaskinitnext bit masks. |
---|
3810 | 4168 | * |
---|
3811 | 4169 | * Note that this function is special in that it is invoked directly |
---|
3812 | 4170 | * from the outgoing CPU rather than from the cpuhp_step mechanism. |
---|
.. | .. |
---|
3814 | 4172 | */ |
---|
3815 | 4173 | void rcu_report_dead(unsigned int cpu) |
---|
3816 | 4174 | { |
---|
3817 | | - struct rcu_state *rsp; |
---|
| 4175 | + unsigned long flags; |
---|
| 4176 | + unsigned long mask; |
---|
| 4177 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
| 4178 | + struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ |
---|
3818 | 4179 | |
---|
3819 | | - /* QS for any half-done expedited RCU-sched GP. */ |
---|
| 4180 | + /* QS for any half-done expedited grace period. */ |
---|
3820 | 4181 | preempt_disable(); |
---|
3821 | | - rcu_report_exp_rdp(&rcu_sched_state, |
---|
3822 | | - this_cpu_ptr(rcu_sched_state.rda), true); |
---|
| 4182 | + rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); |
---|
3823 | 4183 | preempt_enable(); |
---|
3824 | | - for_each_rcu_flavor(rsp) |
---|
3825 | | - rcu_cleanup_dying_idle_cpu(cpu, rsp); |
---|
| 4184 | + rcu_preempt_deferred_qs(current); |
---|
3826 | 4185 | |
---|
3827 | | - per_cpu(rcu_cpu_started, cpu) = 0; |
---|
| 4186 | + /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ |
---|
| 4187 | + mask = rdp->grpmask; |
---|
| 4188 | + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); |
---|
| 4189 | + WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); |
---|
| 4190 | + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). |
---|
| 4191 | + raw_spin_lock(&rcu_state.ofl_lock); |
---|
| 4192 | + raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ |
---|
| 4193 | + rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); |
---|
| 4194 | + rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags); |
---|
| 4195 | + if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */ |
---|
| 4196 | + /* Report quiescent state -before- changing ->qsmaskinitnext! */ |
---|
| 4197 | + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); |
---|
| 4198 | + raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
| 4199 | + } |
---|
| 4200 | + WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); |
---|
| 4201 | + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
| 4202 | + raw_spin_unlock(&rcu_state.ofl_lock); |
---|
| 4203 | + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). |
---|
| 4204 | + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); |
---|
| 4205 | + WARN_ON_ONCE(rnp->ofl_seq & 0x1); |
---|
| 4206 | + |
---|
| 4207 | + rdp->cpu_started = false; |
---|
3828 | 4208 | } |
---|
3829 | 4209 | |
---|
3830 | | -/* Migrate the dead CPU's callbacks to the current CPU. */ |
---|
3831 | | -static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) |
---|
| 4210 | +#ifdef CONFIG_HOTPLUG_CPU |
---|
| 4211 | +/* |
---|
| 4212 | + * The outgoing CPU has just passed through the dying-idle state, and we |
---|
| 4213 | + * are being invoked from the CPU that was IPIed to continue the offline |
---|
| 4214 | + * operation. Migrate the outgoing CPU's callbacks to the current CPU. |
---|
| 4215 | + */ |
---|
| 4216 | +void rcutree_migrate_callbacks(int cpu) |
---|
3832 | 4217 | { |
---|
3833 | 4218 | unsigned long flags; |
---|
3834 | 4219 | struct rcu_data *my_rdp; |
---|
3835 | | - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
---|
3836 | | - struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); |
---|
| 4220 | + struct rcu_node *my_rnp; |
---|
| 4221 | + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); |
---|
3837 | 4222 | bool needwake; |
---|
3838 | 4223 | |
---|
3839 | | - if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist)) |
---|
| 4224 | + if (rcu_segcblist_is_offloaded(&rdp->cblist) || |
---|
| 4225 | + rcu_segcblist_empty(&rdp->cblist)) |
---|
3840 | 4226 | return; /* No callbacks to migrate. */ |
---|
3841 | 4227 | |
---|
3842 | 4228 | local_irq_save(flags); |
---|
3843 | | - my_rdp = this_cpu_ptr(rsp->rda); |
---|
3844 | | - if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) { |
---|
3845 | | - local_irq_restore(flags); |
---|
3846 | | - return; |
---|
3847 | | - } |
---|
3848 | | - raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ |
---|
| 4229 | + my_rdp = this_cpu_ptr(&rcu_data); |
---|
| 4230 | + my_rnp = my_rdp->mynode; |
---|
| 4231 | + rcu_nocb_lock(my_rdp); /* irqs already disabled. */ |
---|
| 4232 | + WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); |
---|
| 4233 | + raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */ |
---|
3849 | 4234 | /* Leverage recent GPs and set GP for new callbacks. */ |
---|
3850 | | - needwake = rcu_advance_cbs(rsp, rnp_root, rdp) || |
---|
3851 | | - rcu_advance_cbs(rsp, rnp_root, my_rdp); |
---|
| 4235 | + needwake = rcu_advance_cbs(my_rnp, rdp) || |
---|
| 4236 | + rcu_advance_cbs(my_rnp, my_rdp); |
---|
3852 | 4237 | rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); |
---|
| 4238 | + needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp); |
---|
| 4239 | + rcu_segcblist_disable(&rdp->cblist); |
---|
3853 | 4240 | WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != |
---|
3854 | 4241 | !rcu_segcblist_n_cbs(&my_rdp->cblist)); |
---|
3855 | | - raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); |
---|
| 4242 | + if (rcu_segcblist_is_offloaded(&my_rdp->cblist)) { |
---|
| 4243 | + raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */ |
---|
| 4244 | + __call_rcu_nocb_wake(my_rdp, true, flags); |
---|
| 4245 | + } else { |
---|
| 4246 | + rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */ |
---|
| 4247 | + raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags); |
---|
| 4248 | + } |
---|
3856 | 4249 | if (needwake) |
---|
3857 | | - rcu_gp_kthread_wake(rsp); |
---|
| 4250 | + rcu_gp_kthread_wake(); |
---|
| 4251 | + lockdep_assert_irqs_enabled(); |
---|
3858 | 4252 | WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || |
---|
3859 | 4253 | !rcu_segcblist_empty(&rdp->cblist), |
---|
3860 | 4254 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", |
---|
3861 | 4255 | cpu, rcu_segcblist_n_cbs(&rdp->cblist), |
---|
3862 | 4256 | rcu_segcblist_first_cb(&rdp->cblist)); |
---|
3863 | | -} |
---|
3864 | | - |
---|
3865 | | -/* |
---|
3866 | | - * The outgoing CPU has just passed through the dying-idle state, |
---|
3867 | | - * and we are being invoked from the CPU that was IPIed to continue the |
---|
3868 | | - * offline operation. We need to migrate the outgoing CPU's callbacks. |
---|
3869 | | - */ |
---|
3870 | | -void rcutree_migrate_callbacks(int cpu) |
---|
3871 | | -{ |
---|
3872 | | - struct rcu_state *rsp; |
---|
3873 | | - |
---|
3874 | | - for_each_rcu_flavor(rsp) |
---|
3875 | | - rcu_migrate_callbacks(cpu, rsp); |
---|
3876 | 4257 | } |
---|
3877 | 4258 | #endif |
---|
3878 | 4259 | |
---|
.. | .. |
---|
3886 | 4267 | switch (action) { |
---|
3887 | 4268 | case PM_HIBERNATION_PREPARE: |
---|
3888 | 4269 | case PM_SUSPEND_PREPARE: |
---|
3889 | | - if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ |
---|
3890 | | - rcu_expedite_gp(); |
---|
| 4270 | + rcu_expedite_gp(); |
---|
3891 | 4271 | break; |
---|
3892 | 4272 | case PM_POST_HIBERNATION: |
---|
3893 | 4273 | case PM_POST_SUSPEND: |
---|
3894 | | - if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ |
---|
3895 | | - rcu_unexpedite_gp(); |
---|
| 4274 | + rcu_unexpedite_gp(); |
---|
3896 | 4275 | break; |
---|
3897 | 4276 | default: |
---|
3898 | 4277 | break; |
---|
.. | .. |
---|
3901 | 4280 | } |
---|
3902 | 4281 | |
---|
3903 | 4282 | /* |
---|
3904 | | - * Spawn the kthreads that handle each RCU flavor's grace periods. |
---|
| 4283 | + * Spawn the kthreads that handle RCU's grace periods. |
---|
3905 | 4284 | */ |
---|
3906 | 4285 | static int __init rcu_spawn_gp_kthread(void) |
---|
3907 | 4286 | { |
---|
3908 | 4287 | unsigned long flags; |
---|
3909 | 4288 | int kthread_prio_in = kthread_prio; |
---|
3910 | 4289 | struct rcu_node *rnp; |
---|
3911 | | - struct rcu_state *rsp; |
---|
3912 | 4290 | struct sched_param sp; |
---|
3913 | 4291 | struct task_struct *t; |
---|
3914 | 4292 | |
---|
.. | .. |
---|
3928 | 4306 | kthread_prio, kthread_prio_in); |
---|
3929 | 4307 | |
---|
3930 | 4308 | rcu_scheduler_fully_active = 1; |
---|
3931 | | - for_each_rcu_flavor(rsp) { |
---|
3932 | | - t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name); |
---|
3933 | | - BUG_ON(IS_ERR(t)); |
---|
3934 | | - rnp = rcu_get_root(rsp); |
---|
3935 | | - raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
3936 | | - rsp->gp_kthread = t; |
---|
3937 | | - if (kthread_prio) { |
---|
3938 | | - sp.sched_priority = kthread_prio; |
---|
3939 | | - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
---|
3940 | | - } |
---|
3941 | | - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
3942 | | - wake_up_process(t); |
---|
| 4309 | + t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); |
---|
| 4310 | + if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__)) |
---|
| 4311 | + return 0; |
---|
| 4312 | + if (kthread_prio) { |
---|
| 4313 | + sp.sched_priority = kthread_prio; |
---|
| 4314 | + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
---|
3943 | 4315 | } |
---|
| 4316 | + rnp = rcu_get_root(); |
---|
| 4317 | + raw_spin_lock_irqsave_rcu_node(rnp, flags); |
---|
| 4318 | + WRITE_ONCE(rcu_state.gp_activity, jiffies); |
---|
| 4319 | + WRITE_ONCE(rcu_state.gp_req_activity, jiffies); |
---|
| 4320 | + // Reset .gp_activity and .gp_req_activity before setting .gp_kthread. |
---|
| 4321 | + smp_store_release(&rcu_state.gp_kthread, t); /* ^^^ */ |
---|
| 4322 | + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
---|
| 4323 | + wake_up_process(t); |
---|
3944 | 4324 | rcu_spawn_nocb_kthreads(); |
---|
3945 | 4325 | rcu_spawn_boost_kthreads(); |
---|
| 4326 | + rcu_spawn_core_kthreads(); |
---|
3946 | 4327 | return 0; |
---|
3947 | 4328 | } |
---|
3948 | 4329 | early_initcall(rcu_spawn_gp_kthread); |
---|
.. | .. |
---|
3967 | 4348 | } |
---|
3968 | 4349 | |
---|
3969 | 4350 | /* |
---|
3970 | | - * Helper function for rcu_init() that initializes one rcu_state structure. |
---|
| 4351 | + * Helper function for rcu_init() that initializes the rcu_state structure. |
---|
3971 | 4352 | */ |
---|
3972 | | -static void __init rcu_init_one(struct rcu_state *rsp) |
---|
| 4353 | +static void __init rcu_init_one(void) |
---|
3973 | 4354 | { |
---|
3974 | 4355 | static const char * const buf[] = RCU_NODE_NAME_INIT; |
---|
3975 | 4356 | static const char * const fqs[] = RCU_FQS_NAME_INIT; |
---|
.. | .. |
---|
3991 | 4372 | /* Initialize the level-tracking arrays. */ |
---|
3992 | 4373 | |
---|
3993 | 4374 | for (i = 1; i < rcu_num_lvls; i++) |
---|
3994 | | - rsp->level[i] = rsp->level[i - 1] + num_rcu_lvl[i - 1]; |
---|
| 4375 | + rcu_state.level[i] = |
---|
| 4376 | + rcu_state.level[i - 1] + num_rcu_lvl[i - 1]; |
---|
3995 | 4377 | rcu_init_levelspread(levelspread, num_rcu_lvl); |
---|
3996 | 4378 | |
---|
3997 | 4379 | /* Initialize the elements themselves, starting from the leaves. */ |
---|
3998 | 4380 | |
---|
3999 | 4381 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
---|
4000 | 4382 | cpustride *= levelspread[i]; |
---|
4001 | | - rnp = rsp->level[i]; |
---|
| 4383 | + rnp = rcu_state.level[i]; |
---|
4002 | 4384 | for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) { |
---|
4003 | 4385 | raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock)); |
---|
4004 | 4386 | lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock), |
---|
.. | .. |
---|
4006 | 4388 | raw_spin_lock_init(&rnp->fqslock); |
---|
4007 | 4389 | lockdep_set_class_and_name(&rnp->fqslock, |
---|
4008 | 4390 | &rcu_fqs_class[i], fqs[i]); |
---|
4009 | | - rnp->gp_seq = rsp->gp_seq; |
---|
4010 | | - rnp->gp_seq_needed = rsp->gp_seq; |
---|
4011 | | - rnp->completedqs = rsp->gp_seq; |
---|
| 4391 | + rnp->gp_seq = rcu_state.gp_seq; |
---|
| 4392 | + rnp->gp_seq_needed = rcu_state.gp_seq; |
---|
| 4393 | + rnp->completedqs = rcu_state.gp_seq; |
---|
4012 | 4394 | rnp->qsmask = 0; |
---|
4013 | 4395 | rnp->qsmaskinit = 0; |
---|
4014 | 4396 | rnp->grplo = j * cpustride; |
---|
.. | .. |
---|
4021 | 4403 | rnp->parent = NULL; |
---|
4022 | 4404 | } else { |
---|
4023 | 4405 | rnp->grpnum = j % levelspread[i - 1]; |
---|
4024 | | - rnp->grpmask = 1UL << rnp->grpnum; |
---|
4025 | | - rnp->parent = rsp->level[i - 1] + |
---|
| 4406 | + rnp->grpmask = BIT(rnp->grpnum); |
---|
| 4407 | + rnp->parent = rcu_state.level[i - 1] + |
---|
4026 | 4408 | j / levelspread[i - 1]; |
---|
4027 | 4409 | } |
---|
4028 | 4410 | rnp->level = i; |
---|
.. | .. |
---|
4036 | 4418 | } |
---|
4037 | 4419 | } |
---|
4038 | 4420 | |
---|
4039 | | - init_swait_queue_head(&rsp->gp_wq); |
---|
4040 | | - init_swait_queue_head(&rsp->expedited_wq); |
---|
4041 | | - rnp = rcu_first_leaf_node(rsp); |
---|
| 4421 | + init_swait_queue_head(&rcu_state.gp_wq); |
---|
| 4422 | + init_swait_queue_head(&rcu_state.expedited_wq); |
---|
| 4423 | + rnp = rcu_first_leaf_node(); |
---|
4042 | 4424 | for_each_possible_cpu(i) { |
---|
4043 | 4425 | while (i > rnp->grphi) |
---|
4044 | 4426 | rnp++; |
---|
4045 | | - per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
---|
4046 | | - rcu_boot_init_percpu_data(i, rsp); |
---|
| 4427 | + per_cpu_ptr(&rcu_data, i)->mynode = rnp; |
---|
| 4428 | + rcu_boot_init_percpu_data(i); |
---|
4047 | 4429 | } |
---|
4048 | | - list_add(&rsp->flavors, &rcu_struct_flavors); |
---|
4049 | 4430 | } |
---|
4050 | 4431 | |
---|
4051 | 4432 | /* |
---|
.. | .. |
---|
4053 | 4434 | * replace the definitions in tree.h because those are needed to size |
---|
4054 | 4435 | * the ->node array in the rcu_state structure. |
---|
4055 | 4436 | */ |
---|
4056 | | -static void __init rcu_init_geometry(void) |
---|
| 4437 | +void rcu_init_geometry(void) |
---|
4057 | 4438 | { |
---|
4058 | 4439 | ulong d; |
---|
4059 | 4440 | int i; |
---|
| 4441 | + static unsigned long old_nr_cpu_ids; |
---|
4060 | 4442 | int rcu_capacity[RCU_NUM_LVLS]; |
---|
| 4443 | + static bool initialized; |
---|
| 4444 | + |
---|
| 4445 | + if (initialized) { |
---|
| 4446 | + /* |
---|
| 4447 | + * Warn if setup_nr_cpu_ids() had not yet been invoked, |
---|
| 4448 | + * unless nr_cpus_ids == NR_CPUS, in which case who cares? |
---|
| 4449 | + */ |
---|
| 4450 | + WARN_ON_ONCE(old_nr_cpu_ids != nr_cpu_ids); |
---|
| 4451 | + return; |
---|
| 4452 | + } |
---|
| 4453 | + |
---|
| 4454 | + old_nr_cpu_ids = nr_cpu_ids; |
---|
| 4455 | + initialized = true; |
---|
4061 | 4456 | |
---|
4062 | 4457 | /* |
---|
4063 | 4458 | * Initialize any unspecified boot parameters. |
---|
.. | .. |
---|
4071 | 4466 | jiffies_till_first_fqs = d; |
---|
4072 | 4467 | if (jiffies_till_next_fqs == ULONG_MAX) |
---|
4073 | 4468 | jiffies_till_next_fqs = d; |
---|
| 4469 | + adjust_jiffies_till_sched_qs(); |
---|
4074 | 4470 | |
---|
4075 | 4471 | /* If the compile-time values are accurate, just leave. */ |
---|
4076 | 4472 | if (rcu_fanout_leaf == RCU_FANOUT_LEAF && |
---|
.. | .. |
---|
4129 | 4525 | |
---|
4130 | 4526 | /* |
---|
4131 | 4527 | * Dump out the structure of the rcu_node combining tree associated |
---|
4132 | | - * with the rcu_state structure referenced by rsp. |
---|
| 4528 | + * with the rcu_state structure. |
---|
4133 | 4529 | */ |
---|
4134 | | -static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) |
---|
| 4530 | +static void __init rcu_dump_rcu_node_tree(void) |
---|
4135 | 4531 | { |
---|
4136 | 4532 | int level = 0; |
---|
4137 | 4533 | struct rcu_node *rnp; |
---|
4138 | 4534 | |
---|
4139 | 4535 | pr_info("rcu_node tree layout dump\n"); |
---|
4140 | 4536 | pr_info(" "); |
---|
4141 | | - rcu_for_each_node_breadth_first(rsp, rnp) { |
---|
| 4537 | + rcu_for_each_node_breadth_first(rnp) { |
---|
4142 | 4538 | if (rnp->level != level) { |
---|
4143 | 4539 | pr_cont("\n"); |
---|
4144 | 4540 | pr_info(" "); |
---|
.. | .. |
---|
4152 | 4548 | struct workqueue_struct *rcu_gp_wq; |
---|
4153 | 4549 | struct workqueue_struct *rcu_par_gp_wq; |
---|
4154 | 4550 | |
---|
| 4551 | +static void __init kfree_rcu_batch_init(void) |
---|
| 4552 | +{ |
---|
| 4553 | + int cpu; |
---|
| 4554 | + int i; |
---|
| 4555 | + |
---|
| 4556 | + for_each_possible_cpu(cpu) { |
---|
| 4557 | + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); |
---|
| 4558 | + |
---|
| 4559 | + for (i = 0; i < KFREE_N_BATCHES; i++) { |
---|
| 4560 | + INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work); |
---|
| 4561 | + krcp->krw_arr[i].krcp = krcp; |
---|
| 4562 | + } |
---|
| 4563 | + |
---|
| 4564 | + INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor); |
---|
| 4565 | + INIT_WORK(&krcp->page_cache_work, fill_page_cache_func); |
---|
| 4566 | + krcp->initialized = true; |
---|
| 4567 | + } |
---|
| 4568 | + if (register_shrinker(&kfree_rcu_shrinker)) |
---|
| 4569 | + pr_err("Failed to register kfree_rcu() shrinker!\n"); |
---|
| 4570 | +} |
---|
| 4571 | + |
---|
4155 | 4572 | void __init rcu_init(void) |
---|
4156 | 4573 | { |
---|
4157 | 4574 | int cpu; |
---|
4158 | 4575 | |
---|
4159 | 4576 | rcu_early_boot_tests(); |
---|
4160 | 4577 | |
---|
| 4578 | + kfree_rcu_batch_init(); |
---|
4161 | 4579 | rcu_bootup_announce(); |
---|
4162 | 4580 | rcu_init_geometry(); |
---|
4163 | | - rcu_init_one(&rcu_bh_state); |
---|
4164 | | - rcu_init_one(&rcu_sched_state); |
---|
| 4581 | + rcu_init_one(); |
---|
4165 | 4582 | if (dump_tree) |
---|
4166 | | - rcu_dump_rcu_node_tree(&rcu_sched_state); |
---|
4167 | | - __rcu_init_preempt(); |
---|
4168 | | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
---|
| 4583 | + rcu_dump_rcu_node_tree(); |
---|
| 4584 | + if (use_softirq) |
---|
| 4585 | + open_softirq(RCU_SOFTIRQ, rcu_core_si); |
---|
4169 | 4586 | |
---|
4170 | 4587 | /* |
---|
4171 | 4588 | * We don't need protection against CPU-hotplug here because |
---|
.. | .. |
---|
4184 | 4601 | WARN_ON(!rcu_gp_wq); |
---|
4185 | 4602 | rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); |
---|
4186 | 4603 | WARN_ON(!rcu_par_gp_wq); |
---|
| 4604 | + srcu_init(); |
---|
| 4605 | + |
---|
| 4606 | + /* Fill in default value for rcutree.qovld boot parameter. */ |
---|
| 4607 | + /* -After- the rcu_node ->lock fields are initialized! */ |
---|
| 4608 | + if (qovld < 0) |
---|
| 4609 | + qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark; |
---|
| 4610 | + else |
---|
| 4611 | + qovld_calc = qovld; |
---|
4187 | 4612 | } |
---|
4188 | 4613 | |
---|
| 4614 | +#include "tree_stall.h" |
---|
4189 | 4615 | #include "tree_exp.h" |
---|
4190 | 4616 | #include "tree_plugin.h" |
---|