| .. | .. | 
|---|
 | 1 | +// SPDX-License-Identifier: GPL-2.0+  | 
|---|
| 1 | 2 |  /* | 
|---|
| 2 |  | - * Read-Copy Update mechanism for mutual exclusion  | 
|---|
| 3 |  | - *  | 
|---|
| 4 |  | - * This program is free software; you can redistribute it and/or modify  | 
|---|
| 5 |  | - * it under the terms of the GNU General Public License as published by  | 
|---|
| 6 |  | - * the Free Software Foundation; either version 2 of the License, or  | 
|---|
| 7 |  | - * (at your option) any later version.  | 
|---|
| 8 |  | - *  | 
|---|
| 9 |  | - * This program is distributed in the hope that it will be useful,  | 
|---|
| 10 |  | - * but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
|---|
| 11 |  | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
|---|
| 12 |  | - * GNU General Public License for more details.  | 
|---|
| 13 |  | - *  | 
|---|
| 14 |  | - * You should have received a copy of the GNU General Public License  | 
|---|
| 15 |  | - * along with this program; if not, you can access it online at  | 
|---|
| 16 |  | - * http://www.gnu.org/licenses/gpl-2.0.html.  | 
|---|
 | 3 | + * Read-Copy Update mechanism for mutual exclusion (tree-based version)  | 
|---|
| 17 | 4 |   * | 
|---|
| 18 | 5 |   * Copyright IBM Corporation, 2008 | 
|---|
| 19 | 6 |   * | 
|---|
| 20 | 7 |   * Authors: Dipankar Sarma <dipankar@in.ibm.com> | 
|---|
| 21 | 8 |   *	    Manfred Spraul <manfred@colorfullife.com> | 
|---|
| 22 |  | - *	    Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version  | 
|---|
 | 9 | + *	    Paul E. McKenney <paulmck@linux.ibm.com>  | 
|---|
| 23 | 10 |   * | 
|---|
| 24 |  | - * Based on the original work by Paul McKenney <paulmck@us.ibm.com>  | 
|---|
 | 11 | + * Based on the original work by Paul McKenney <paulmck@linux.ibm.com>  | 
|---|
| 25 | 12 |   * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. | 
|---|
| 26 | 13 |   * | 
|---|
| 27 | 14 |   * For detailed explanation of Read-Copy Update mechanism see - | 
|---|
| .. | .. | 
|---|
| 56 | 43 |  #include <uapi/linux/sched/types.h> | 
|---|
| 57 | 44 |  #include <linux/prefetch.h> | 
|---|
| 58 | 45 |  #include <linux/delay.h> | 
|---|
| 59 |  | -#include <linux/stop_machine.h>  | 
|---|
| 60 | 46 |  #include <linux/random.h> | 
|---|
| 61 | 47 |  #include <linux/trace_events.h> | 
|---|
| 62 | 48 |  #include <linux/suspend.h> | 
|---|
| 63 | 49 |  #include <linux/ftrace.h> | 
|---|
| 64 |  | -#include <linux/delay.h>  | 
|---|
 | 50 | +#include <linux/tick.h>  | 
|---|
 | 51 | +#include <linux/sysrq.h>  | 
|---|
 | 52 | +#include <linux/kprobes.h>  | 
|---|
| 65 | 53 |  #include <linux/gfp.h> | 
|---|
| 66 | 54 |  #include <linux/oom.h> | 
|---|
| 67 | 55 |  #include <linux/smpboot.h> | 
|---|
| 68 | 56 |  #include <linux/jiffies.h> | 
|---|
 | 57 | +#include <linux/slab.h>  | 
|---|
| 69 | 58 |  #include <linux/sched/isolation.h> | 
|---|
 | 59 | +#include <linux/sched/clock.h>  | 
|---|
 | 60 | +#include <linux/vmalloc.h>  | 
|---|
 | 61 | +#include <linux/mm.h>  | 
|---|
 | 62 | +#include <linux/kasan.h>  | 
|---|
| 70 | 63 |  #include "../time/tick-internal.h" | 
|---|
| 71 | 64 |   | 
|---|
| 72 | 65 |  #include "tree.h" | 
|---|
| .. | .. | 
|---|
| 80 | 73 |  /* Data structures. */ | 
|---|
| 81 | 74 |   | 
|---|
| 82 | 75 |  /* | 
|---|
| 83 |  | - * In order to export the rcu_state name to the tracing tools, it  | 
|---|
| 84 |  | - * needs to be added in the __tracepoint_string section.  | 
|---|
| 85 |  | - * This requires defining a separate variable tp_<sname>_varname  | 
|---|
| 86 |  | - * that points to the string being used, and this will allow  | 
|---|
| 87 |  | - * the tracing userspace tools to be able to decipher the string  | 
|---|
| 88 |  | - * address to the matching string.  | 
|---|
 | 76 | + * Steal a bit from the bottom of ->dynticks for idle entry/exit  | 
|---|
 | 77 | + * control.  Initially this is for TLB flushing.  | 
|---|
| 89 | 78 |   */ | 
|---|
| 90 |  | -#ifdef CONFIG_TRACING  | 
|---|
| 91 |  | -# define DEFINE_RCU_TPS(sname) \  | 
|---|
| 92 |  | -static char sname##_varname[] = #sname; \  | 
|---|
| 93 |  | -static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname;  | 
|---|
| 94 |  | -# define RCU_STATE_NAME(sname) sname##_varname  | 
|---|
| 95 |  | -#else  | 
|---|
| 96 |  | -# define DEFINE_RCU_TPS(sname)  | 
|---|
| 97 |  | -# define RCU_STATE_NAME(sname) __stringify(sname)  | 
|---|
| 98 |  | -#endif  | 
|---|
 | 79 | +#define RCU_DYNTICK_CTRL_MASK 0x1  | 
|---|
 | 80 | +#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)  | 
|---|
| 99 | 81 |   | 
|---|
| 100 |  | -#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \  | 
|---|
| 101 |  | -DEFINE_RCU_TPS(sname) \  | 
|---|
| 102 |  | -static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \  | 
|---|
| 103 |  | -struct rcu_state sname##_state = { \  | 
|---|
| 104 |  | -	.level = { &sname##_state.node[0] }, \  | 
|---|
| 105 |  | -	.rda = &sname##_data, \  | 
|---|
| 106 |  | -	.call = cr, \  | 
|---|
| 107 |  | -	.gp_state = RCU_GP_IDLE, \  | 
|---|
| 108 |  | -	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \  | 
|---|
| 109 |  | -	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \  | 
|---|
| 110 |  | -	.name = RCU_STATE_NAME(sname), \  | 
|---|
| 111 |  | -	.abbr = sabbr, \  | 
|---|
| 112 |  | -	.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \  | 
|---|
| 113 |  | -	.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \  | 
|---|
| 114 |  | -	.ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \  | 
|---|
| 115 |  | -}  | 
|---|
| 116 |  | -  | 
|---|
| 117 |  | -RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);  | 
|---|
| 118 |  | -RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);  | 
|---|
| 119 |  | -  | 
|---|
| 120 |  | -static struct rcu_state *const rcu_state_p;  | 
|---|
| 121 |  | -LIST_HEAD(rcu_struct_flavors);  | 
|---|
 | 82 | +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {  | 
|---|
 | 83 | +	.dynticks_nesting = 1,  | 
|---|
 | 84 | +	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,  | 
|---|
 | 85 | +	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),  | 
|---|
 | 86 | +};  | 
|---|
 | 87 | +static struct rcu_state rcu_state = {  | 
|---|
 | 88 | +	.level = { &rcu_state.node[0] },  | 
|---|
 | 89 | +	.gp_state = RCU_GP_IDLE,  | 
|---|
 | 90 | +	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,  | 
|---|
 | 91 | +	.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),  | 
|---|
 | 92 | +	.name = RCU_NAME,  | 
|---|
 | 93 | +	.abbr = RCU_ABBR,  | 
|---|
 | 94 | +	.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),  | 
|---|
 | 95 | +	.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),  | 
|---|
 | 96 | +	.ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),  | 
|---|
 | 97 | +};  | 
|---|
| 122 | 98 |   | 
|---|
| 123 | 99 |  /* Dump rcu_node combining tree at boot to verify correct setup. */ | 
|---|
| 124 | 100 |  static bool dump_tree; | 
|---|
| 125 | 101 |  module_param(dump_tree, bool, 0444); | 
|---|
 | 102 | +/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */  | 
|---|
 | 103 | +static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);  | 
|---|
 | 104 | +#ifndef CONFIG_PREEMPT_RT  | 
|---|
 | 105 | +module_param(use_softirq, bool, 0444);  | 
|---|
 | 106 | +#endif  | 
|---|
| 126 | 107 |  /* Control rcu_node-tree auto-balancing at boot time. */ | 
|---|
| 127 | 108 |  static bool rcu_fanout_exact; | 
|---|
| 128 | 109 |  module_param(rcu_fanout_exact, bool, 0444); | 
|---|
| .. | .. | 
|---|
| 133 | 114 |  /* Number of rcu_nodes at specified level. */ | 
|---|
| 134 | 115 |  int num_rcu_lvl[] = NUM_RCU_LVL_INIT; | 
|---|
| 135 | 116 |  int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ | 
|---|
| 136 |  | -/* panic() on RCU Stall sysctl. */  | 
|---|
| 137 |  | -int sysctl_panic_on_rcu_stall __read_mostly = CONFIG_BOOTPARAM_RCU_STALL_PANIC_VALUE;  | 
|---|
| 138 |  | -ATOMIC_NOTIFIER_HEAD(rcu_stall_notifier_list);  | 
|---|
| 139 | 117 |   | 
|---|
| 140 | 118 |  /* | 
|---|
| 141 | 119 |   * The rcu_scheduler_active variable is initialized to the value | 
|---|
| .. | .. | 
|---|
| 166 | 144 |   */ | 
|---|
| 167 | 145 |  static int rcu_scheduler_fully_active __read_mostly; | 
|---|
| 168 | 146 |   | 
|---|
| 169 |  | -static void  | 
|---|
| 170 |  | -rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,  | 
|---|
| 171 |  | -		  struct rcu_node *rnp, unsigned long gps, unsigned long flags);  | 
|---|
 | 147 | +static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,  | 
|---|
 | 148 | +			      unsigned long gps, unsigned long flags);  | 
|---|
| 172 | 149 |  static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); | 
|---|
| 173 | 150 |  static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); | 
|---|
| 174 | 151 |  static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 
|---|
| 175 | 152 |  static void invoke_rcu_core(void); | 
|---|
| 176 |  | -static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);  | 
|---|
| 177 |  | -static void rcu_report_exp_rdp(struct rcu_state *rsp,  | 
|---|
| 178 |  | -			       struct rcu_data *rdp, bool wake);  | 
|---|
 | 153 | +static void rcu_report_exp_rdp(struct rcu_data *rdp);  | 
|---|
| 179 | 154 |  static void sync_sched_exp_online_cleanup(int cpu); | 
|---|
 | 155 | +static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);  | 
|---|
| 180 | 156 |   | 
|---|
| 181 | 157 |  /* rcuc/rcub kthread realtime priority */ | 
|---|
| 182 | 158 |  static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; | 
|---|
| 183 |  | -module_param(kthread_prio, int, 0644);  | 
|---|
 | 159 | +module_param(kthread_prio, int, 0444);  | 
|---|
| 184 | 160 |   | 
|---|
| 185 | 161 |  /* Delay in jiffies for grace-period initialization delays, debug only. */ | 
|---|
| 186 | 162 |   | 
|---|
| .. | .. | 
|---|
| 191 | 167 |  static int gp_cleanup_delay; | 
|---|
| 192 | 168 |  module_param(gp_cleanup_delay, int, 0444); | 
|---|
| 193 | 169 |   | 
|---|
| 194 |  | -/* Retreive RCU kthreads priority for rcutorture */  | 
|---|
 | 170 | +// Add delay to rcu_read_unlock() for strict grace periods.  | 
|---|
 | 171 | +static int rcu_unlock_delay;  | 
|---|
 | 172 | +#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD  | 
|---|
 | 173 | +module_param(rcu_unlock_delay, int, 0444);  | 
|---|
 | 174 | +#endif  | 
|---|
 | 175 | +  | 
|---|
 | 176 | +/*  | 
|---|
 | 177 | + * This rcu parameter is runtime-read-only. It reflects  | 
|---|
 | 178 | + * a minimum allowed number of objects which can be cached  | 
|---|
 | 179 | + * per-CPU. Object size is equal to one page. This value  | 
|---|
 | 180 | + * can be changed at boot time.  | 
|---|
 | 181 | + */  | 
|---|
 | 182 | +static int rcu_min_cached_objs = 5;  | 
|---|
 | 183 | +module_param(rcu_min_cached_objs, int, 0444);  | 
|---|
 | 184 | +  | 
|---|
 | 185 | +/* Retrieve RCU kthreads priority for rcutorture */  | 
|---|
| 195 | 186 |  int rcu_get_gp_kthreads_prio(void) | 
|---|
| 196 | 187 |  { | 
|---|
| 197 | 188 |  	return kthread_prio; | 
|---|
| .. | .. | 
|---|
| 215 | 206 |   * held, but the bit corresponding to the current CPU will be stable | 
|---|
| 216 | 207 |   * in most contexts. | 
|---|
| 217 | 208 |   */ | 
|---|
| 218 |  | -unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)  | 
|---|
 | 209 | +static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)  | 
|---|
| 219 | 210 |  { | 
|---|
| 220 | 211 |  	return READ_ONCE(rnp->qsmaskinitnext); | 
|---|
| 221 | 212 |  } | 
|---|
| .. | .. | 
|---|
| 225 | 216 |   * permit this function to be invoked without holding the root rcu_node | 
|---|
| 226 | 217 |   * structure's ->lock, but of course results can be subject to change. | 
|---|
| 227 | 218 |   */ | 
|---|
| 228 |  | -static int rcu_gp_in_progress(struct rcu_state *rsp)  | 
|---|
 | 219 | +static int rcu_gp_in_progress(void)  | 
|---|
| 229 | 220 |  { | 
|---|
| 230 |  | -	return rcu_seq_state(rcu_seq_current(&rsp->gp_seq));  | 
|---|
 | 221 | +	return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));  | 
|---|
| 231 | 222 |  } | 
|---|
| 232 | 223 |   | 
|---|
| 233 | 224 |  /* | 
|---|
| 234 |  | - * Note a quiescent state.  Because we do not need to know  | 
|---|
| 235 |  | - * how many quiescent states passed, just if there was at least  | 
|---|
| 236 |  | - * one since the start of the grace period, this just sets a flag.  | 
|---|
| 237 |  | - * The caller must have disabled preemption.  | 
|---|
 | 225 | + * Return the number of callbacks queued on the specified CPU.  | 
|---|
 | 226 | + * Handles both the nocbs and normal cases.  | 
|---|
| 238 | 227 |   */ | 
|---|
| 239 |  | -void rcu_sched_qs(void)  | 
|---|
 | 228 | +static long rcu_get_n_cbs_cpu(int cpu)  | 
|---|
| 240 | 229 |  { | 
|---|
| 241 |  | -	RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");  | 
|---|
| 242 |  | -	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))  | 
|---|
| 243 |  | -		return;  | 
|---|
| 244 |  | -	trace_rcu_grace_period(TPS("rcu_sched"),  | 
|---|
| 245 |  | -			       __this_cpu_read(rcu_sched_data.gp_seq),  | 
|---|
| 246 |  | -			       TPS("cpuqs"));  | 
|---|
| 247 |  | -	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);  | 
|---|
| 248 |  | -	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))  | 
|---|
| 249 |  | -		return;  | 
|---|
| 250 |  | -	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);  | 
|---|
| 251 |  | -	rcu_report_exp_rdp(&rcu_sched_state,  | 
|---|
| 252 |  | -			   this_cpu_ptr(&rcu_sched_data), true);  | 
|---|
 | 230 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 231 | +  | 
|---|
 | 232 | +	if (rcu_segcblist_is_enabled(&rdp->cblist))  | 
|---|
 | 233 | +		return rcu_segcblist_n_cbs(&rdp->cblist);  | 
|---|
 | 234 | +	return 0;  | 
|---|
| 253 | 235 |  } | 
|---|
| 254 | 236 |   | 
|---|
| 255 |  | -#ifdef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 256 |  | -static void rcu_preempt_qs(void);  | 
|---|
| 257 |  | -  | 
|---|
| 258 |  | -void rcu_bh_qs(void)  | 
|---|
 | 237 | +void rcu_softirq_qs(void)  | 
|---|
| 259 | 238 |  { | 
|---|
| 260 |  | -	unsigned long flags;  | 
|---|
| 261 |  | -  | 
|---|
| 262 |  | -	/* Callers to this function, rcu_preempt_qs(), must disable irqs. */  | 
|---|
| 263 |  | -	local_irq_save(flags);  | 
|---|
| 264 |  | -	rcu_preempt_qs();  | 
|---|
| 265 |  | -	local_irq_restore(flags);  | 
|---|
 | 239 | +	rcu_qs();  | 
|---|
 | 240 | +	rcu_preempt_deferred_qs(current);  | 
|---|
| 266 | 241 |  } | 
|---|
| 267 |  | -#else  | 
|---|
| 268 |  | -void rcu_bh_qs(void)  | 
|---|
| 269 |  | -{  | 
|---|
| 270 |  | -	RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");  | 
|---|
| 271 |  | -	if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {  | 
|---|
| 272 |  | -		trace_rcu_grace_period(TPS("rcu_bh"),  | 
|---|
| 273 |  | -				       __this_cpu_read(rcu_bh_data.gp_seq),  | 
|---|
| 274 |  | -				       TPS("cpuqs"));  | 
|---|
| 275 |  | -		__this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);  | 
|---|
| 276 |  | -	}  | 
|---|
| 277 |  | -}  | 
|---|
| 278 |  | -#endif  | 
|---|
| 279 |  | -  | 
|---|
| 280 |  | -/*  | 
|---|
| 281 |  | - * Steal a bit from the bottom of ->dynticks for idle entry/exit  | 
|---|
| 282 |  | - * control.  Initially this is for TLB flushing.  | 
|---|
| 283 |  | - */  | 
|---|
| 284 |  | -#define RCU_DYNTICK_CTRL_MASK 0x1  | 
|---|
| 285 |  | -#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)  | 
|---|
| 286 |  | -#ifndef rcu_eqs_special_exit  | 
|---|
| 287 |  | -#define rcu_eqs_special_exit() do { } while (0)  | 
|---|
| 288 |  | -#endif  | 
|---|
| 289 |  | -  | 
|---|
| 290 |  | -static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {  | 
|---|
| 291 |  | -	.dynticks_nesting = 1,  | 
|---|
| 292 |  | -	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,  | 
|---|
| 293 |  | -	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),  | 
|---|
| 294 |  | -};  | 
|---|
| 295 | 242 |   | 
|---|
| 296 | 243 |  /* | 
|---|
| 297 | 244 |   * Record entry into an extended quiescent state.  This is only to be | 
|---|
| 298 |  | - * called when not already in an extended quiescent state.  | 
|---|
 | 245 | + * called when not already in an extended quiescent state, that is,  | 
|---|
 | 246 | + * RCU is watching prior to the call to this function and is no longer  | 
|---|
 | 247 | + * watching upon return.  | 
|---|
| 299 | 248 |   */ | 
|---|
| 300 |  | -static void rcu_dynticks_eqs_enter(void)  | 
|---|
 | 249 | +static noinstr void rcu_dynticks_eqs_enter(void)  | 
|---|
| 301 | 250 |  { | 
|---|
| 302 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 251 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 303 | 252 |  	int seq; | 
|---|
| 304 | 253 |   | 
|---|
| 305 | 254 |  	/* | 
|---|
| .. | .. | 
|---|
| 307 | 256 |  	 * critical sections, and we also must force ordering with the | 
|---|
| 308 | 257 |  	 * next idle sojourn. | 
|---|
| 309 | 258 |  	 */ | 
|---|
| 310 |  | -	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);  | 
|---|
| 311 |  | -	/* Better be in an extended quiescent state! */  | 
|---|
 | 259 | +	rcu_dynticks_task_trace_enter();  // Before ->dynticks update!  | 
|---|
 | 260 | +	seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);  | 
|---|
 | 261 | +	// RCU is no longer watching.  Better be in extended quiescent state!  | 
|---|
| 312 | 262 |  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && | 
|---|
| 313 | 263 |  		     (seq & RCU_DYNTICK_CTRL_CTR)); | 
|---|
| 314 | 264 |  	/* Better not have special action (TLB flush) pending! */ | 
|---|
| .. | .. | 
|---|
| 318 | 268 |   | 
|---|
| 319 | 269 |  /* | 
|---|
| 320 | 270 |   * Record exit from an extended quiescent state.  This is only to be | 
|---|
| 321 |  | - * called from an extended quiescent state.  | 
|---|
 | 271 | + * called from an extended quiescent state, that is, RCU is not watching  | 
|---|
 | 272 | + * prior to the call to this function and is watching upon return.  | 
|---|
| 322 | 273 |   */ | 
|---|
| 323 |  | -static void rcu_dynticks_eqs_exit(void)  | 
|---|
 | 274 | +static noinstr void rcu_dynticks_eqs_exit(void)  | 
|---|
| 324 | 275 |  { | 
|---|
| 325 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 276 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 326 | 277 |  	int seq; | 
|---|
| 327 | 278 |   | 
|---|
| 328 | 279 |  	/* | 
|---|
| .. | .. | 
|---|
| 330 | 281 |  	 * and we also must force ordering with the next RCU read-side | 
|---|
| 331 | 282 |  	 * critical section. | 
|---|
| 332 | 283 |  	 */ | 
|---|
| 333 |  | -	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);  | 
|---|
 | 284 | +	seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);  | 
|---|
 | 285 | +	// RCU is now watching.  Better not be in an extended quiescent state!  | 
|---|
 | 286 | +	rcu_dynticks_task_trace_exit();  // After ->dynticks update!  | 
|---|
| 334 | 287 |  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && | 
|---|
| 335 | 288 |  		     !(seq & RCU_DYNTICK_CTRL_CTR)); | 
|---|
| 336 | 289 |  	if (seq & RCU_DYNTICK_CTRL_MASK) { | 
|---|
| 337 |  | -		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);  | 
|---|
 | 290 | +		arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);  | 
|---|
| 338 | 291 |  		smp_mb__after_atomic(); /* _exit after clearing mask. */ | 
|---|
| 339 |  | -		/* Prefer duplicate flushes to losing a flush. */  | 
|---|
| 340 |  | -		rcu_eqs_special_exit();  | 
|---|
| 341 | 292 |  	} | 
|---|
| 342 | 293 |  } | 
|---|
| 343 | 294 |   | 
|---|
| .. | .. | 
|---|
| 353 | 304 |   */ | 
|---|
| 354 | 305 |  static void rcu_dynticks_eqs_online(void) | 
|---|
| 355 | 306 |  { | 
|---|
| 356 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 307 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 357 | 308 |   | 
|---|
| 358 |  | -	if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR)  | 
|---|
 | 309 | +	if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)  | 
|---|
| 359 | 310 |  		return; | 
|---|
| 360 |  | -	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);  | 
|---|
 | 311 | +	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);  | 
|---|
| 361 | 312 |  } | 
|---|
| 362 | 313 |   | 
|---|
| 363 | 314 |  /* | 
|---|
| .. | .. | 
|---|
| 365 | 316 |   * | 
|---|
| 366 | 317 |   * No ordering, as we are sampling CPU-local information. | 
|---|
| 367 | 318 |   */ | 
|---|
| 368 |  | -bool rcu_dynticks_curr_cpu_in_eqs(void)  | 
|---|
 | 319 | +static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)  | 
|---|
| 369 | 320 |  { | 
|---|
| 370 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 321 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 371 | 322 |   | 
|---|
| 372 |  | -	return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR);  | 
|---|
 | 323 | +	return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);  | 
|---|
| 373 | 324 |  } | 
|---|
| 374 | 325 |   | 
|---|
| 375 | 326 |  /* | 
|---|
| 376 | 327 |   * Snapshot the ->dynticks counter with full ordering so as to allow | 
|---|
| 377 | 328 |   * stable comparison of this counter with past and future snapshots. | 
|---|
| 378 | 329 |   */ | 
|---|
| 379 |  | -int rcu_dynticks_snap(struct rcu_dynticks *rdtp)  | 
|---|
 | 330 | +static int rcu_dynticks_snap(struct rcu_data *rdp)  | 
|---|
| 380 | 331 |  { | 
|---|
| 381 |  | -	int snap = atomic_add_return(0, &rdtp->dynticks);  | 
|---|
 | 332 | +	int snap = atomic_add_return(0, &rdp->dynticks);  | 
|---|
| 382 | 333 |   | 
|---|
| 383 | 334 |  	return snap & ~RCU_DYNTICK_CTRL_MASK; | 
|---|
| 384 | 335 |  } | 
|---|
| .. | .. | 
|---|
| 393 | 344 |  } | 
|---|
| 394 | 345 |   | 
|---|
| 395 | 346 |  /* | 
|---|
| 396 |  | - * Return true if the CPU corresponding to the specified rcu_dynticks  | 
|---|
 | 347 | + * Return true if the CPU corresponding to the specified rcu_data  | 
|---|
| 397 | 348 |   * structure has spent some time in an extended quiescent state since | 
|---|
| 398 | 349 |   * rcu_dynticks_snap() returned the specified snapshot. | 
|---|
| 399 | 350 |   */ | 
|---|
| 400 |  | -static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)  | 
|---|
 | 351 | +static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)  | 
|---|
| 401 | 352 |  { | 
|---|
| 402 |  | -	return snap != rcu_dynticks_snap(rdtp);  | 
|---|
 | 353 | +	return snap != rcu_dynticks_snap(rdp);  | 
|---|
 | 354 | +}  | 
|---|
 | 355 | +  | 
|---|
 | 356 | +/*  | 
|---|
 | 357 | + * Return true if the referenced integer is zero while the specified  | 
|---|
 | 358 | + * CPU remains within a single extended quiescent state.  | 
|---|
 | 359 | + */  | 
|---|
 | 360 | +bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)  | 
|---|
 | 361 | +{  | 
|---|
 | 362 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 363 | +	int snap;  | 
|---|
 | 364 | +  | 
|---|
 | 365 | +	// If not quiescent, force back to earlier extended quiescent state.  | 
|---|
 | 366 | +	snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK |  | 
|---|
 | 367 | +					       RCU_DYNTICK_CTRL_CTR);  | 
|---|
 | 368 | +  | 
|---|
 | 369 | +	smp_rmb(); // Order ->dynticks and *vp reads.  | 
|---|
 | 370 | +	if (READ_ONCE(*vp))  | 
|---|
 | 371 | +		return false;  // Non-zero, so report failure;  | 
|---|
 | 372 | +	smp_rmb(); // Order *vp read and ->dynticks re-read.  | 
|---|
 | 373 | +  | 
|---|
 | 374 | +	// If still in the same extended quiescent state, we are good!  | 
|---|
 | 375 | +	return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK);  | 
|---|
| 403 | 376 |  } | 
|---|
| 404 | 377 |   | 
|---|
| 405 | 378 |  /* | 
|---|
| .. | .. | 
|---|
| 413 | 386 |  { | 
|---|
| 414 | 387 |  	int old; | 
|---|
| 415 | 388 |  	int new; | 
|---|
| 416 |  | -	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);  | 
|---|
 | 389 | +	int new_old;  | 
|---|
 | 390 | +	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);  | 
|---|
| 417 | 391 |   | 
|---|
 | 392 | +	new_old = atomic_read(&rdp->dynticks);  | 
|---|
| 418 | 393 |  	do { | 
|---|
| 419 |  | -		old = atomic_read(&rdtp->dynticks);  | 
|---|
 | 394 | +		old = new_old;  | 
|---|
| 420 | 395 |  		if (old & RCU_DYNTICK_CTRL_CTR) | 
|---|
| 421 | 396 |  			return false; | 
|---|
| 422 | 397 |  		new = old | RCU_DYNTICK_CTRL_MASK; | 
|---|
| 423 |  | -	} while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);  | 
|---|
 | 398 | +		new_old = atomic_cmpxchg(&rdp->dynticks, old, new);  | 
|---|
 | 399 | +	} while (new_old != old);  | 
|---|
| 424 | 400 |  	return true; | 
|---|
| 425 | 401 |  } | 
|---|
| 426 | 402 |   | 
|---|
| .. | .. | 
|---|
| 435 | 411 |   * | 
|---|
| 436 | 412 |   * The caller must have disabled interrupts and must not be idle. | 
|---|
| 437 | 413 |   */ | 
|---|
| 438 |  | -static void rcu_momentary_dyntick_idle(void)  | 
|---|
 | 414 | +notrace void rcu_momentary_dyntick_idle(void)  | 
|---|
| 439 | 415 |  { | 
|---|
| 440 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
| 441 | 416 |  	int special; | 
|---|
| 442 | 417 |   | 
|---|
| 443 |  | -	raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false);  | 
|---|
| 444 |  | -	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);  | 
|---|
 | 418 | +	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);  | 
|---|
 | 419 | +	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,  | 
|---|
 | 420 | +				    &this_cpu_ptr(&rcu_data)->dynticks);  | 
|---|
| 445 | 421 |  	/* It is illegal to call this from idle state. */ | 
|---|
| 446 | 422 |  	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); | 
|---|
 | 423 | +	rcu_preempt_deferred_qs(current);  | 
|---|
| 447 | 424 |  } | 
|---|
 | 425 | +EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);  | 
|---|
| 448 | 426 |   | 
|---|
| 449 |  | -/*  | 
|---|
| 450 |  | - * Note a context switch.  This is a quiescent state for RCU-sched,  | 
|---|
| 451 |  | - * and requires special handling for preemptible RCU.  | 
|---|
| 452 |  | - * The caller must have disabled interrupts.  | 
|---|
| 453 |  | - */  | 
|---|
| 454 |  | -void rcu_note_context_switch(bool preempt)  | 
|---|
| 455 |  | -{  | 
|---|
| 456 |  | -	barrier(); /* Avoid RCU read-side critical sections leaking down. */  | 
|---|
| 457 |  | -	trace_rcu_utilization(TPS("Start context switch"));  | 
|---|
| 458 |  | -	rcu_sched_qs();  | 
|---|
| 459 |  | -	rcu_preempt_note_context_switch(preempt);  | 
|---|
| 460 |  | -	/* Load rcu_urgent_qs before other flags. */  | 
|---|
| 461 |  | -	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))  | 
|---|
| 462 |  | -		goto out;  | 
|---|
| 463 |  | -	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);  | 
|---|
| 464 |  | -	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))  | 
|---|
| 465 |  | -		rcu_momentary_dyntick_idle();  | 
|---|
| 466 |  | -	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);  | 
|---|
| 467 |  | -	if (!preempt)  | 
|---|
| 468 |  | -		rcu_tasks_qs(current);  | 
|---|
| 469 |  | -out:  | 
|---|
| 470 |  | -	trace_rcu_utilization(TPS("End context switch"));  | 
|---|
| 471 |  | -	barrier(); /* Avoid RCU read-side critical sections leaking up. */  | 
|---|
| 472 |  | -}  | 
|---|
| 473 |  | -EXPORT_SYMBOL_GPL(rcu_note_context_switch);  | 
|---|
| 474 |  | -  | 
|---|
| 475 |  | -/*  | 
|---|
| 476 |  | - * Register a quiescent state for all RCU flavors.  If there is an  | 
|---|
| 477 |  | - * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight  | 
|---|
| 478 |  | - * dyntick-idle quiescent state visible to other CPUs (but only for those  | 
|---|
| 479 |  | - * RCU flavors in desperate need of a quiescent state, which will normally  | 
|---|
| 480 |  | - * be none of them).  Either way, do a lightweight quiescent state for  | 
|---|
| 481 |  | - * all RCU flavors.  | 
|---|
 | 427 | +/**  | 
|---|
 | 428 | + * rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle  | 
|---|
| 482 | 429 |   * | 
|---|
| 483 |  | - * The barrier() calls are redundant in the common case when this is  | 
|---|
| 484 |  | - * called externally, but just in case this is called from within this  | 
|---|
| 485 |  | - * file.  | 
|---|
 | 430 | + * If the current CPU is idle and running at a first-level (not nested)  | 
|---|
 | 431 | + * interrupt, or directly, from idle, return true.  | 
|---|
| 486 | 432 |   * | 
|---|
 | 433 | + * The caller must have at least disabled IRQs.  | 
|---|
| 487 | 434 |   */ | 
|---|
| 488 |  | -void rcu_all_qs(void)  | 
|---|
 | 435 | +static int rcu_is_cpu_rrupt_from_idle(void)  | 
|---|
| 489 | 436 |  { | 
|---|
| 490 |  | -	unsigned long flags;  | 
|---|
 | 437 | +	long nesting;  | 
|---|
| 491 | 438 |   | 
|---|
| 492 |  | -	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))  | 
|---|
| 493 |  | -		return;  | 
|---|
| 494 |  | -	preempt_disable();  | 
|---|
| 495 |  | -	/* Load rcu_urgent_qs before other flags. */  | 
|---|
| 496 |  | -	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {  | 
|---|
| 497 |  | -		preempt_enable();  | 
|---|
| 498 |  | -		return;  | 
|---|
| 499 |  | -	}  | 
|---|
| 500 |  | -	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);  | 
|---|
| 501 |  | -	barrier(); /* Avoid RCU read-side critical sections leaking down. */  | 
|---|
| 502 |  | -	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {  | 
|---|
| 503 |  | -		local_irq_save(flags);  | 
|---|
| 504 |  | -		rcu_momentary_dyntick_idle();  | 
|---|
| 505 |  | -		local_irq_restore(flags);  | 
|---|
| 506 |  | -	}  | 
|---|
| 507 |  | -	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))  | 
|---|
| 508 |  | -		rcu_sched_qs();  | 
|---|
| 509 |  | -	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);  | 
|---|
| 510 |  | -	barrier(); /* Avoid RCU read-side critical sections leaking up. */  | 
|---|
| 511 |  | -	preempt_enable();  | 
|---|
 | 439 | +	/*  | 
|---|
 | 440 | +	 * Usually called from the tick; but also used from smp_function_call()  | 
|---|
 | 441 | +	 * for expedited grace periods. This latter can result in running from  | 
|---|
 | 442 | +	 * the idle task, instead of an actual IPI.  | 
|---|
 | 443 | +	 */  | 
|---|
 | 444 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 445 | +  | 
|---|
 | 446 | +	/* Check for counter underflows */  | 
|---|
 | 447 | +	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,  | 
|---|
 | 448 | +			 "RCU dynticks_nesting counter underflow!");  | 
|---|
 | 449 | +	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,  | 
|---|
 | 450 | +			 "RCU dynticks_nmi_nesting counter underflow/zero!");  | 
|---|
 | 451 | +  | 
|---|
 | 452 | +	/* Are we at first interrupt nesting level? */  | 
|---|
 | 453 | +	nesting = __this_cpu_read(rcu_data.dynticks_nmi_nesting);  | 
|---|
 | 454 | +	if (nesting > 1)  | 
|---|
 | 455 | +		return false;  | 
|---|
 | 456 | +  | 
|---|
 | 457 | +	/*  | 
|---|
 | 458 | +	 * If we're not in an interrupt, we must be in the idle task!  | 
|---|
 | 459 | +	 */  | 
|---|
 | 460 | +	WARN_ON_ONCE(!nesting && !is_idle_task(current));  | 
|---|
 | 461 | +  | 
|---|
 | 462 | +	/* Does CPU appear to be idle from an RCU standpoint? */  | 
|---|
 | 463 | +	return __this_cpu_read(rcu_data.dynticks_nesting) == 0;  | 
|---|
| 512 | 464 |  } | 
|---|
| 513 |  | -EXPORT_SYMBOL_GPL(rcu_all_qs);  | 
|---|
| 514 | 465 |   | 
|---|
| 515 |  | -#define DEFAULT_RCU_BLIMIT 10     /* Maximum callbacks per rcu_do_batch. */  | 
|---|
 | 466 | +#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)  | 
|---|
 | 467 | +				// Maximum callbacks per rcu_do_batch ...  | 
|---|
 | 468 | +#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.  | 
|---|
| 516 | 469 |  static long blimit = DEFAULT_RCU_BLIMIT; | 
|---|
| 517 |  | -#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */  | 
|---|
 | 470 | +#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.  | 
|---|
| 518 | 471 |  static long qhimark = DEFAULT_RCU_QHIMARK; | 
|---|
| 519 |  | -#define DEFAULT_RCU_QLOMARK 100   /* Once only this many pending, use blimit. */  | 
|---|
 | 472 | +#define DEFAULT_RCU_QLOMARK 100   // Once only this many pending, use blimit.  | 
|---|
| 520 | 473 |  static long qlowmark = DEFAULT_RCU_QLOMARK; | 
|---|
 | 474 | +#define DEFAULT_RCU_QOVLD_MULT 2  | 
|---|
 | 475 | +#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)  | 
|---|
 | 476 | +static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.  | 
|---|
 | 477 | +static long qovld_calc = -1;	  // No pre-initialization lock acquisitions!  | 
|---|
| 521 | 478 |   | 
|---|
| 522 | 479 |  module_param(blimit, long, 0444); | 
|---|
| 523 | 480 |  module_param(qhimark, long, 0444); | 
|---|
| 524 | 481 |  module_param(qlowmark, long, 0444); | 
|---|
 | 482 | +module_param(qovld, long, 0444);  | 
|---|
| 525 | 483 |   | 
|---|
| 526 |  | -static ulong jiffies_till_first_fqs = ULONG_MAX;  | 
|---|
 | 484 | +static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;  | 
|---|
| 527 | 485 |  static ulong jiffies_till_next_fqs = ULONG_MAX; | 
|---|
| 528 | 486 |  static bool rcu_kick_kthreads; | 
|---|
 | 487 | +static int rcu_divisor = 7;  | 
|---|
 | 488 | +module_param(rcu_divisor, int, 0644);  | 
|---|
 | 489 | +  | 
|---|
 | 490 | +/* Force an exit from rcu_do_batch() after 3 milliseconds. */  | 
|---|
 | 491 | +static long rcu_resched_ns = 3 * NSEC_PER_MSEC;  | 
|---|
 | 492 | +module_param(rcu_resched_ns, long, 0644);  | 
|---|
 | 493 | +  | 
|---|
 | 494 | +/*  | 
|---|
 | 495 | + * How long the grace period must be before we start recruiting  | 
|---|
 | 496 | + * quiescent-state help from rcu_note_context_switch().  | 
|---|
 | 497 | + */  | 
|---|
 | 498 | +static ulong jiffies_till_sched_qs = ULONG_MAX;  | 
|---|
 | 499 | +module_param(jiffies_till_sched_qs, ulong, 0444);  | 
|---|
 | 500 | +static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */  | 
|---|
 | 501 | +module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */  | 
|---|
 | 502 | +  | 
|---|
 | 503 | +/*  | 
|---|
 | 504 | + * Make sure that we give the grace-period kthread time to detect any  | 
|---|
 | 505 | + * idle CPUs before taking active measures to force quiescent states.  | 
|---|
 | 506 | + * However, don't go below 100 milliseconds, adjusted upwards for really  | 
|---|
 | 507 | + * large systems.  | 
|---|
 | 508 | + */  | 
|---|
 | 509 | +static void adjust_jiffies_till_sched_qs(void)  | 
|---|
 | 510 | +{  | 
|---|
 | 511 | +	unsigned long j;  | 
|---|
 | 512 | +  | 
|---|
 | 513 | +	/* If jiffies_till_sched_qs was specified, respect the request. */  | 
|---|
 | 514 | +	if (jiffies_till_sched_qs != ULONG_MAX) {  | 
|---|
 | 515 | +		WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);  | 
|---|
 | 516 | +		return;  | 
|---|
 | 517 | +	}  | 
|---|
 | 518 | +	/* Otherwise, set to third fqs scan, but bound below on large system. */  | 
|---|
 | 519 | +	j = READ_ONCE(jiffies_till_first_fqs) +  | 
|---|
 | 520 | +		      2 * READ_ONCE(jiffies_till_next_fqs);  | 
|---|
 | 521 | +	if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)  | 
|---|
 | 522 | +		j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;  | 
|---|
 | 523 | +	pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);  | 
|---|
 | 524 | +	WRITE_ONCE(jiffies_to_sched_qs, j);  | 
|---|
 | 525 | +}  | 
|---|
| 529 | 526 |   | 
|---|
| 530 | 527 |  static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp) | 
|---|
| 531 | 528 |  { | 
|---|
| 532 | 529 |  	ulong j; | 
|---|
| 533 | 530 |  	int ret = kstrtoul(val, 0, &j); | 
|---|
| 534 | 531 |   | 
|---|
| 535 |  | -	if (!ret)  | 
|---|
 | 532 | +	if (!ret) {  | 
|---|
| 536 | 533 |  		WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j); | 
|---|
 | 534 | +		adjust_jiffies_till_sched_qs();  | 
|---|
 | 535 | +	}  | 
|---|
| 537 | 536 |  	return ret; | 
|---|
| 538 | 537 |  } | 
|---|
| 539 | 538 |   | 
|---|
| .. | .. | 
|---|
| 542 | 541 |  	ulong j; | 
|---|
| 543 | 542 |  	int ret = kstrtoul(val, 0, &j); | 
|---|
| 544 | 543 |   | 
|---|
| 545 |  | -	if (!ret)  | 
|---|
 | 544 | +	if (!ret) {  | 
|---|
| 546 | 545 |  		WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1)); | 
|---|
 | 546 | +		adjust_jiffies_till_sched_qs();  | 
|---|
 | 547 | +	}  | 
|---|
| 547 | 548 |  	return ret; | 
|---|
| 548 | 549 |  } | 
|---|
| 549 | 550 |   | 
|---|
| .. | .. | 
|---|
| 561 | 562 |  module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644); | 
|---|
| 562 | 563 |  module_param(rcu_kick_kthreads, bool, 0644); | 
|---|
| 563 | 564 |   | 
|---|
| 564 |  | -/*  | 
|---|
| 565 |  | - * How long the grace period must be before we start recruiting  | 
|---|
| 566 |  | - * quiescent-state help from rcu_note_context_switch().  | 
|---|
| 567 |  | - */  | 
|---|
| 568 |  | -static ulong jiffies_till_sched_qs = HZ / 10;  | 
|---|
| 569 |  | -module_param(jiffies_till_sched_qs, ulong, 0444);  | 
|---|
| 570 |  | -  | 
|---|
| 571 |  | -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));  | 
|---|
| 572 |  | -static void force_quiescent_state(struct rcu_state *rsp);  | 
|---|
| 573 |  | -static int rcu_pending(void);  | 
|---|
 | 565 | +static void force_qs_rnp(int (*f)(struct rcu_data *rdp));  | 
|---|
 | 566 | +static int rcu_pending(int user);  | 
|---|
| 574 | 567 |   | 
|---|
| 575 | 568 |  /* | 
|---|
| 576 | 569 |   * Return the number of RCU GPs completed thus far for debug & stats. | 
|---|
| 577 | 570 |   */ | 
|---|
| 578 | 571 |  unsigned long rcu_get_gp_seq(void) | 
|---|
| 579 | 572 |  { | 
|---|
| 580 |  | -	return READ_ONCE(rcu_state_p->gp_seq);  | 
|---|
 | 573 | +	return READ_ONCE(rcu_state.gp_seq);  | 
|---|
| 581 | 574 |  } | 
|---|
| 582 | 575 |  EXPORT_SYMBOL_GPL(rcu_get_gp_seq); | 
|---|
| 583 |  | -  | 
|---|
| 584 |  | -/*  | 
|---|
| 585 |  | - * Return the number of RCU-sched GPs completed thus far for debug & stats.  | 
|---|
| 586 |  | - */  | 
|---|
| 587 |  | -unsigned long rcu_sched_get_gp_seq(void)  | 
|---|
| 588 |  | -{  | 
|---|
| 589 |  | -	return READ_ONCE(rcu_sched_state.gp_seq);  | 
|---|
| 590 |  | -}  | 
|---|
| 591 |  | -EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);  | 
|---|
| 592 |  | -  | 
|---|
| 593 |  | -#ifndef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 594 |  | -/*  | 
|---|
| 595 |  | - * Return the number of RCU-bh GPs completed thus far for debug & stats.  | 
|---|
| 596 |  | - */  | 
|---|
| 597 |  | -unsigned long rcu_bh_get_gp_seq(void)  | 
|---|
| 598 |  | -{  | 
|---|
| 599 |  | -	return READ_ONCE(rcu_bh_state.gp_seq);  | 
|---|
| 600 |  | -}  | 
|---|
| 601 |  | -EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq);  | 
|---|
| 602 |  | -#endif  | 
|---|
| 603 | 576 |   | 
|---|
| 604 | 577 |  /* | 
|---|
| 605 | 578 |   * Return the number of RCU expedited batches completed thus far for | 
|---|
| .. | .. | 
|---|
| 609 | 582 |   */ | 
|---|
| 610 | 583 |  unsigned long rcu_exp_batches_completed(void) | 
|---|
| 611 | 584 |  { | 
|---|
| 612 |  | -	return rcu_state_p->expedited_sequence;  | 
|---|
 | 585 | +	return rcu_state.expedited_sequence;  | 
|---|
| 613 | 586 |  } | 
|---|
| 614 | 587 |  EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); | 
|---|
| 615 | 588 |   | 
|---|
| 616 | 589 |  /* | 
|---|
| 617 |  | - * Return the number of RCU-sched expedited batches completed thus far  | 
|---|
| 618 |  | - * for debug & stats.  Similar to rcu_exp_batches_completed().  | 
|---|
 | 590 | + * Return the root node of the rcu_state structure.  | 
|---|
| 619 | 591 |   */ | 
|---|
| 620 |  | -unsigned long rcu_exp_batches_completed_sched(void)  | 
|---|
 | 592 | +static struct rcu_node *rcu_get_root(void)  | 
|---|
| 621 | 593 |  { | 
|---|
| 622 |  | -	return rcu_sched_state.expedited_sequence;  | 
|---|
 | 594 | +	return &rcu_state.node[0];  | 
|---|
| 623 | 595 |  } | 
|---|
| 624 |  | -EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);  | 
|---|
| 625 |  | -  | 
|---|
| 626 |  | -#ifndef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 627 |  | -/*  | 
|---|
| 628 |  | - * Force a quiescent state.  | 
|---|
| 629 |  | - */  | 
|---|
| 630 |  | -void rcu_force_quiescent_state(void)  | 
|---|
| 631 |  | -{  | 
|---|
| 632 |  | -	force_quiescent_state(rcu_state_p);  | 
|---|
| 633 |  | -}  | 
|---|
| 634 |  | -EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);  | 
|---|
| 635 |  | -  | 
|---|
| 636 |  | -/*  | 
|---|
| 637 |  | - * Force a quiescent state for RCU BH.  | 
|---|
| 638 |  | - */  | 
|---|
| 639 |  | -void rcu_bh_force_quiescent_state(void)  | 
|---|
| 640 |  | -{  | 
|---|
| 641 |  | -	force_quiescent_state(&rcu_bh_state);  | 
|---|
| 642 |  | -}  | 
|---|
| 643 |  | -EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);  | 
|---|
| 644 |  | -  | 
|---|
| 645 |  | -#else  | 
|---|
| 646 |  | -void rcu_force_quiescent_state(void)  | 
|---|
| 647 |  | -{  | 
|---|
| 648 |  | -}  | 
|---|
| 649 |  | -EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);  | 
|---|
| 650 |  | -#endif  | 
|---|
| 651 |  | -  | 
|---|
| 652 |  | -/*  | 
|---|
| 653 |  | - * Force a quiescent state for RCU-sched.  | 
|---|
| 654 |  | - */  | 
|---|
| 655 |  | -void rcu_sched_force_quiescent_state(void)  | 
|---|
| 656 |  | -{  | 
|---|
| 657 |  | -	force_quiescent_state(&rcu_sched_state);  | 
|---|
| 658 |  | -}  | 
|---|
| 659 |  | -EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);  | 
|---|
| 660 |  | -  | 
|---|
| 661 |  | -/*  | 
|---|
| 662 |  | - * Show the state of the grace-period kthreads.  | 
|---|
| 663 |  | - */  | 
|---|
| 664 |  | -void show_rcu_gp_kthreads(void)  | 
|---|
| 665 |  | -{  | 
|---|
| 666 |  | -	int cpu;  | 
|---|
| 667 |  | -	struct rcu_data *rdp;  | 
|---|
| 668 |  | -	struct rcu_node *rnp;  | 
|---|
| 669 |  | -	struct rcu_state *rsp;  | 
|---|
| 670 |  | -  | 
|---|
| 671 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 672 |  | -		pr_info("%s: wait state: %d ->state: %#lx\n",  | 
|---|
| 673 |  | -			rsp->name, rsp->gp_state, rsp->gp_kthread->state);  | 
|---|
| 674 |  | -		rcu_for_each_node_breadth_first(rsp, rnp) {  | 
|---|
| 675 |  | -			if (ULONG_CMP_GE(rsp->gp_seq, rnp->gp_seq_needed))  | 
|---|
| 676 |  | -				continue;  | 
|---|
| 677 |  | -			pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n",  | 
|---|
| 678 |  | -				rnp->grplo, rnp->grphi, rnp->gp_seq,  | 
|---|
| 679 |  | -				rnp->gp_seq_needed);  | 
|---|
| 680 |  | -			if (!rcu_is_leaf_node(rnp))  | 
|---|
| 681 |  | -				continue;  | 
|---|
| 682 |  | -			for_each_leaf_node_possible_cpu(rnp, cpu) {  | 
|---|
| 683 |  | -				rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 684 |  | -				if (rdp->gpwrap ||  | 
|---|
| 685 |  | -				    ULONG_CMP_GE(rsp->gp_seq,  | 
|---|
| 686 |  | -						 rdp->gp_seq_needed))  | 
|---|
| 687 |  | -					continue;  | 
|---|
| 688 |  | -				pr_info("\tcpu %d ->gp_seq_needed %lu\n",  | 
|---|
| 689 |  | -					cpu, rdp->gp_seq_needed);  | 
|---|
| 690 |  | -			}  | 
|---|
| 691 |  | -		}  | 
|---|
| 692 |  | -		/* sched_show_task(rsp->gp_kthread); */  | 
|---|
| 693 |  | -	}  | 
|---|
| 694 |  | -}  | 
|---|
| 695 |  | -EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);  | 
|---|
| 696 | 596 |   | 
|---|
| 697 | 597 |  /* | 
|---|
| 698 | 598 |   * Send along grace-period-related data for rcutorture diagnostics. | 
|---|
| .. | .. | 
|---|
| 700 | 600 |  void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, | 
|---|
| 701 | 601 |  			    unsigned long *gp_seq) | 
|---|
| 702 | 602 |  { | 
|---|
| 703 |  | -	struct rcu_state *rsp = NULL;  | 
|---|
| 704 |  | -  | 
|---|
| 705 | 603 |  	switch (test_type) { | 
|---|
| 706 | 604 |  	case RCU_FLAVOR: | 
|---|
| 707 |  | -		rsp = rcu_state_p;  | 
|---|
| 708 |  | -		break;  | 
|---|
| 709 |  | -#ifndef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 710 |  | -	case RCU_BH_FLAVOR:  | 
|---|
| 711 |  | -		rsp = &rcu_bh_state;  | 
|---|
| 712 |  | -		break;  | 
|---|
| 713 |  | -#endif  | 
|---|
| 714 |  | -	case RCU_SCHED_FLAVOR:  | 
|---|
| 715 |  | -		rsp = &rcu_sched_state;  | 
|---|
 | 605 | +		*flags = READ_ONCE(rcu_state.gp_flags);  | 
|---|
 | 606 | +		*gp_seq = rcu_seq_current(&rcu_state.gp_seq);  | 
|---|
| 716 | 607 |  		break; | 
|---|
| 717 | 608 |  	default: | 
|---|
| 718 | 609 |  		break; | 
|---|
| 719 | 610 |  	} | 
|---|
| 720 |  | -	if (rsp == NULL)  | 
|---|
| 721 |  | -		return;  | 
|---|
| 722 |  | -	*flags = READ_ONCE(rsp->gp_flags);  | 
|---|
| 723 |  | -	*gp_seq = rcu_seq_current(&rsp->gp_seq);  | 
|---|
| 724 | 611 |  } | 
|---|
| 725 | 612 |  EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); | 
|---|
| 726 |  | -  | 
|---|
| 727 |  | -/*  | 
|---|
| 728 |  | - * Return the root node of the specified rcu_state structure.  | 
|---|
| 729 |  | - */  | 
|---|
| 730 |  | -static struct rcu_node *rcu_get_root(struct rcu_state *rsp)  | 
|---|
| 731 |  | -{  | 
|---|
| 732 |  | -	return &rsp->node[0];  | 
|---|
| 733 |  | -}  | 
|---|
| 734 | 613 |   | 
|---|
| 735 | 614 |  /* | 
|---|
| 736 | 615 |   * Enter an RCU extended quiescent state, which can be either the | 
|---|
| .. | .. | 
|---|
| 740 | 619 |   * the possibility of usermode upcalls having messed up our count | 
|---|
| 741 | 620 |   * of interrupt nesting level during the prior busy period. | 
|---|
| 742 | 621 |   */ | 
|---|
| 743 |  | -static void rcu_eqs_enter(bool user)  | 
|---|
 | 622 | +static noinstr void rcu_eqs_enter(bool user)  | 
|---|
| 744 | 623 |  { | 
|---|
| 745 |  | -	struct rcu_state *rsp;  | 
|---|
| 746 |  | -	struct rcu_data *rdp;  | 
|---|
| 747 |  | -	struct rcu_dynticks *rdtp;  | 
|---|
 | 624 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 748 | 625 |   | 
|---|
| 749 |  | -	rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
| 750 |  | -	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0);  | 
|---|
 | 626 | +	WARN_ON_ONCE(rdp->dynticks_nmi_nesting != DYNTICK_IRQ_NONIDLE);  | 
|---|
 | 627 | +	WRITE_ONCE(rdp->dynticks_nmi_nesting, 0);  | 
|---|
| 751 | 628 |  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && | 
|---|
| 752 |  | -		     rdtp->dynticks_nesting == 0);  | 
|---|
| 753 |  | -	if (rdtp->dynticks_nesting != 1) {  | 
|---|
| 754 |  | -		rdtp->dynticks_nesting--;  | 
|---|
 | 629 | +		     rdp->dynticks_nesting == 0);  | 
|---|
 | 630 | +	if (rdp->dynticks_nesting != 1) {  | 
|---|
 | 631 | +		// RCU will still be watching, so just do accounting and leave.  | 
|---|
 | 632 | +		rdp->dynticks_nesting--;  | 
|---|
| 755 | 633 |  		return; | 
|---|
| 756 | 634 |  	} | 
|---|
| 757 | 635 |   | 
|---|
| 758 | 636 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 759 |  | -	trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks);  | 
|---|
 | 637 | +	instrumentation_begin();  | 
|---|
 | 638 | +	trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));  | 
|---|
| 760 | 639 |  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); | 
|---|
| 761 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 762 |  | -		rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 763 |  | -		do_nocb_deferred_wakeup(rdp);  | 
|---|
| 764 |  | -	}  | 
|---|
 | 640 | +	rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 765 | 641 |  	rcu_prepare_for_idle(); | 
|---|
| 766 |  | -	WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */  | 
|---|
 | 642 | +	rcu_preempt_deferred_qs(current);  | 
|---|
 | 643 | +  | 
|---|
 | 644 | +	// instrumentation for the noinstr rcu_dynticks_eqs_enter()  | 
|---|
 | 645 | +	instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));  | 
|---|
 | 646 | +  | 
|---|
 | 647 | +	instrumentation_end();  | 
|---|
 | 648 | +	WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */  | 
|---|
 | 649 | +	// RCU is watching here ...  | 
|---|
| 767 | 650 |  	rcu_dynticks_eqs_enter(); | 
|---|
 | 651 | +	// ... but is no longer watching here.  | 
|---|
| 768 | 652 |  	rcu_dynticks_task_enter(); | 
|---|
| 769 | 653 |  } | 
|---|
| 770 | 654 |   | 
|---|
| .. | .. | 
|---|
| 784 | 668 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 785 | 669 |  	rcu_eqs_enter(false); | 
|---|
| 786 | 670 |  } | 
|---|
 | 671 | +EXPORT_SYMBOL_GPL(rcu_idle_enter);  | 
|---|
| 787 | 672 |   | 
|---|
| 788 | 673 |  #ifdef CONFIG_NO_HZ_FULL | 
|---|
| 789 | 674 |  /** | 
|---|
| .. | .. | 
|---|
| 797 | 682 |   * If you add or remove a call to rcu_user_enter(), be sure to test with | 
|---|
| 798 | 683 |   * CONFIG_RCU_EQS_DEBUG=y. | 
|---|
| 799 | 684 |   */ | 
|---|
| 800 |  | -void rcu_user_enter(void)  | 
|---|
 | 685 | +noinstr void rcu_user_enter(void)  | 
|---|
| 801 | 686 |  { | 
|---|
 | 687 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 688 | +  | 
|---|
| 802 | 689 |  	lockdep_assert_irqs_disabled(); | 
|---|
 | 690 | +  | 
|---|
 | 691 | +	instrumentation_begin();  | 
|---|
 | 692 | +	do_nocb_deferred_wakeup(rdp);  | 
|---|
 | 693 | +	instrumentation_end();  | 
|---|
 | 694 | +  | 
|---|
| 803 | 695 |  	rcu_eqs_enter(true); | 
|---|
| 804 | 696 |  } | 
|---|
| 805 | 697 |  #endif /* CONFIG_NO_HZ_FULL */ | 
|---|
| .. | .. | 
|---|
| 808 | 700 |   * rcu_nmi_exit - inform RCU of exit from NMI context | 
|---|
| 809 | 701 |   * | 
|---|
| 810 | 702 |   * If we are returning from the outermost NMI handler that interrupted an | 
|---|
| 811 |  | - * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting  | 
|---|
 | 703 | + * RCU-idle period, update rdp->dynticks and rdp->dynticks_nmi_nesting  | 
|---|
| 812 | 704 |   * to let the RCU grace-period handling know that the CPU is back to | 
|---|
| 813 | 705 |   * being RCU-idle. | 
|---|
| 814 | 706 |   * | 
|---|
| 815 | 707 |   * If you add or remove a call to rcu_nmi_exit(), be sure to test | 
|---|
| 816 | 708 |   * with CONFIG_RCU_EQS_DEBUG=y. | 
|---|
| 817 | 709 |   */ | 
|---|
| 818 |  | -void rcu_nmi_exit(void)  | 
|---|
 | 710 | +noinstr void rcu_nmi_exit(void)  | 
|---|
| 819 | 711 |  { | 
|---|
| 820 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 712 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 821 | 713 |   | 
|---|
 | 714 | +	instrumentation_begin();  | 
|---|
| 822 | 715 |  	/* | 
|---|
| 823 | 716 |  	 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. | 
|---|
| 824 | 717 |  	 * (We are exiting an NMI handler, so RCU better be paying attention | 
|---|
| 825 | 718 |  	 * to us!) | 
|---|
| 826 | 719 |  	 */ | 
|---|
| 827 |  | -	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);  | 
|---|
 | 720 | +	WARN_ON_ONCE(rdp->dynticks_nmi_nesting <= 0);  | 
|---|
| 828 | 721 |  	WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); | 
|---|
| 829 | 722 |   | 
|---|
| 830 | 723 |  	/* | 
|---|
| 831 | 724 |  	 * If the nesting level is not 1, the CPU wasn't RCU-idle, so | 
|---|
| 832 | 725 |  	 * leave it in non-RCU-idle state. | 
|---|
| 833 | 726 |  	 */ | 
|---|
| 834 |  | -	if (rdtp->dynticks_nmi_nesting != 1) {  | 
|---|
| 835 |  | -		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks);  | 
|---|
| 836 |  | -		WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */  | 
|---|
| 837 |  | -			   rdtp->dynticks_nmi_nesting - 2);  | 
|---|
 | 727 | +	if (rdp->dynticks_nmi_nesting != 1) {  | 
|---|
 | 728 | +		trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,  | 
|---|
 | 729 | +				  atomic_read(&rdp->dynticks));  | 
|---|
 | 730 | +		WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */  | 
|---|
 | 731 | +			   rdp->dynticks_nmi_nesting - 2);  | 
|---|
 | 732 | +		instrumentation_end();  | 
|---|
| 838 | 733 |  		return; | 
|---|
| 839 | 734 |  	} | 
|---|
| 840 | 735 |   | 
|---|
| 841 | 736 |  	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ | 
|---|
| 842 |  | -	trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks);  | 
|---|
| 843 |  | -	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */  | 
|---|
 | 737 | +	trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));  | 
|---|
 | 738 | +	WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */  | 
|---|
 | 739 | +  | 
|---|
 | 740 | +	if (!in_nmi())  | 
|---|
 | 741 | +		rcu_prepare_for_idle();  | 
|---|
 | 742 | +  | 
|---|
 | 743 | +	// instrumentation for the noinstr rcu_dynticks_eqs_enter()  | 
|---|
 | 744 | +	instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));  | 
|---|
 | 745 | +	instrumentation_end();  | 
|---|
 | 746 | +  | 
|---|
 | 747 | +	// RCU is watching here ...  | 
|---|
| 844 | 748 |  	rcu_dynticks_eqs_enter(); | 
|---|
 | 749 | +	// ... but is no longer watching here.  | 
|---|
 | 750 | +  | 
|---|
 | 751 | +	if (!in_nmi())  | 
|---|
 | 752 | +		rcu_dynticks_task_enter();  | 
|---|
| 845 | 753 |  } | 
|---|
| 846 | 754 |   | 
|---|
| 847 | 755 |  /** | 
|---|
| .. | .. | 
|---|
| 863 | 771 |   * If you add or remove a call to rcu_irq_exit(), be sure to test with | 
|---|
| 864 | 772 |   * CONFIG_RCU_EQS_DEBUG=y. | 
|---|
| 865 | 773 |   */ | 
|---|
| 866 |  | -void rcu_irq_exit(void)  | 
|---|
 | 774 | +void noinstr rcu_irq_exit(void)  | 
|---|
| 867 | 775 |  { | 
|---|
| 868 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
| 869 |  | -  | 
|---|
| 870 | 776 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 871 |  | -	if (rdtp->dynticks_nmi_nesting == 1)  | 
|---|
| 872 |  | -		rcu_prepare_for_idle();  | 
|---|
| 873 | 777 |  	rcu_nmi_exit(); | 
|---|
| 874 |  | -	if (rdtp->dynticks_nmi_nesting == 0)  | 
|---|
| 875 |  | -		rcu_dynticks_task_enter();  | 
|---|
| 876 | 778 |  } | 
|---|
 | 779 | +  | 
|---|
 | 780 | +/**  | 
|---|
 | 781 | + * rcu_irq_exit_preempt - Inform RCU that current CPU is exiting irq  | 
|---|
 | 782 | + *			  towards in kernel preemption  | 
|---|
 | 783 | + *  | 
|---|
 | 784 | + * Same as rcu_irq_exit() but has a sanity check that scheduling is safe  | 
|---|
 | 785 | + * from RCU point of view. Invoked from return from interrupt before kernel  | 
|---|
 | 786 | + * preemption.  | 
|---|
 | 787 | + */  | 
|---|
 | 788 | +void rcu_irq_exit_preempt(void)  | 
|---|
 | 789 | +{  | 
|---|
 | 790 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 791 | +	rcu_nmi_exit();  | 
|---|
 | 792 | +  | 
|---|
 | 793 | +	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,  | 
|---|
 | 794 | +			 "RCU dynticks_nesting counter underflow/zero!");  | 
|---|
 | 795 | +	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=  | 
|---|
 | 796 | +			 DYNTICK_IRQ_NONIDLE,  | 
|---|
 | 797 | +			 "Bad RCU  dynticks_nmi_nesting counter\n");  | 
|---|
 | 798 | +	RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),  | 
|---|
 | 799 | +			 "RCU in extended quiescent state!");  | 
|---|
 | 800 | +}  | 
|---|
 | 801 | +  | 
|---|
 | 802 | +#ifdef CONFIG_PROVE_RCU  | 
|---|
 | 803 | +/**  | 
|---|
 | 804 | + * rcu_irq_exit_check_preempt - Validate that scheduling is possible  | 
|---|
 | 805 | + */  | 
|---|
 | 806 | +void rcu_irq_exit_check_preempt(void)  | 
|---|
 | 807 | +{  | 
|---|
 | 808 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 809 | +  | 
|---|
 | 810 | +	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,  | 
|---|
 | 811 | +			 "RCU dynticks_nesting counter underflow/zero!");  | 
|---|
 | 812 | +	RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=  | 
|---|
 | 813 | +			 DYNTICK_IRQ_NONIDLE,  | 
|---|
 | 814 | +			 "Bad RCU  dynticks_nmi_nesting counter\n");  | 
|---|
 | 815 | +	RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),  | 
|---|
 | 816 | +			 "RCU in extended quiescent state!");  | 
|---|
 | 817 | +}  | 
|---|
 | 818 | +#endif /* #ifdef CONFIG_PROVE_RCU */  | 
|---|
| 877 | 819 |   | 
|---|
| 878 | 820 |  /* | 
|---|
| 879 | 821 |   * Wrapper for rcu_irq_exit() where interrupts are enabled. | 
|---|
| .. | .. | 
|---|
| 898 | 840 |   * allow for the possibility of usermode upcalls messing up our count of | 
|---|
| 899 | 841 |   * interrupt nesting level during the busy period that is just now starting. | 
|---|
| 900 | 842 |   */ | 
|---|
| 901 |  | -static void rcu_eqs_exit(bool user)  | 
|---|
 | 843 | +static void noinstr rcu_eqs_exit(bool user)  | 
|---|
| 902 | 844 |  { | 
|---|
| 903 |  | -	struct rcu_dynticks *rdtp;  | 
|---|
 | 845 | +	struct rcu_data *rdp;  | 
|---|
| 904 | 846 |  	long oldval; | 
|---|
| 905 | 847 |   | 
|---|
| 906 | 848 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 907 |  | -	rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
| 908 |  | -	oldval = rdtp->dynticks_nesting;  | 
|---|
 | 849 | +	rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 850 | +	oldval = rdp->dynticks_nesting;  | 
|---|
| 909 | 851 |  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); | 
|---|
| 910 | 852 |  	if (oldval) { | 
|---|
| 911 |  | -		rdtp->dynticks_nesting++;  | 
|---|
 | 853 | +		// RCU was already watching, so just do accounting and leave.  | 
|---|
 | 854 | +		rdp->dynticks_nesting++;  | 
|---|
| 912 | 855 |  		return; | 
|---|
| 913 | 856 |  	} | 
|---|
| 914 | 857 |  	rcu_dynticks_task_exit(); | 
|---|
 | 858 | +	// RCU is not watching here ...  | 
|---|
| 915 | 859 |  	rcu_dynticks_eqs_exit(); | 
|---|
 | 860 | +	// ... but is watching here.  | 
|---|
 | 861 | +	instrumentation_begin();  | 
|---|
 | 862 | +  | 
|---|
 | 863 | +	// instrumentation for the noinstr rcu_dynticks_eqs_exit()  | 
|---|
 | 864 | +	instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));  | 
|---|
 | 865 | +  | 
|---|
| 916 | 866 |  	rcu_cleanup_after_idle(); | 
|---|
| 917 |  | -	trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks);  | 
|---|
 | 867 | +	trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));  | 
|---|
| 918 | 868 |  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); | 
|---|
| 919 |  | -	WRITE_ONCE(rdtp->dynticks_nesting, 1);  | 
|---|
| 920 |  | -	WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);  | 
|---|
 | 869 | +	WRITE_ONCE(rdp->dynticks_nesting, 1);  | 
|---|
 | 870 | +	WARN_ON_ONCE(rdp->dynticks_nmi_nesting);  | 
|---|
 | 871 | +	WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);  | 
|---|
 | 872 | +	instrumentation_end();  | 
|---|
| 921 | 873 |  } | 
|---|
| 922 | 874 |   | 
|---|
| 923 | 875 |  /** | 
|---|
| .. | .. | 
|---|
| 937 | 889 |  	rcu_eqs_exit(false); | 
|---|
| 938 | 890 |  	local_irq_restore(flags); | 
|---|
| 939 | 891 |  } | 
|---|
 | 892 | +EXPORT_SYMBOL_GPL(rcu_idle_exit);  | 
|---|
| 940 | 893 |   | 
|---|
| 941 | 894 |  #ifdef CONFIG_NO_HZ_FULL | 
|---|
| 942 | 895 |  /** | 
|---|
| .. | .. | 
|---|
| 948 | 901 |   * If you add or remove a call to rcu_user_exit(), be sure to test with | 
|---|
| 949 | 902 |   * CONFIG_RCU_EQS_DEBUG=y. | 
|---|
| 950 | 903 |   */ | 
|---|
| 951 |  | -void rcu_user_exit(void)  | 
|---|
 | 904 | +void noinstr rcu_user_exit(void)  | 
|---|
| 952 | 905 |  { | 
|---|
| 953 | 906 |  	rcu_eqs_exit(1); | 
|---|
 | 907 | +}  | 
|---|
 | 908 | +  | 
|---|
 | 909 | +/**  | 
|---|
 | 910 | + * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.  | 
|---|
 | 911 | + *  | 
|---|
 | 912 | + * The scheduler tick is not normally enabled when CPUs enter the kernel  | 
|---|
 | 913 | + * from nohz_full userspace execution.  After all, nohz_full userspace  | 
|---|
 | 914 | + * execution is an RCU quiescent state and the time executing in the kernel  | 
|---|
 | 915 | + * is quite short.  Except of course when it isn't.  And it is not hard to  | 
|---|
 | 916 | + * cause a large system to spend tens of seconds or even minutes looping  | 
|---|
 | 917 | + * in the kernel, which can cause a number of problems, include RCU CPU  | 
|---|
 | 918 | + * stall warnings.  | 
|---|
 | 919 | + *  | 
|---|
 | 920 | + * Therefore, if a nohz_full CPU fails to report a quiescent state  | 
|---|
 | 921 | + * in a timely manner, the RCU grace-period kthread sets that CPU's  | 
|---|
 | 922 | + * ->rcu_urgent_qs flag with the expectation that the next interrupt or  | 
|---|
 | 923 | + * exception will invoke this function, which will turn on the scheduler  | 
|---|
 | 924 | + * tick, which will enable RCU to detect that CPU's quiescent states,  | 
|---|
 | 925 | + * for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.  | 
|---|
 | 926 | + * The tick will be disabled once a quiescent state is reported for  | 
|---|
 | 927 | + * this CPU.  | 
|---|
 | 928 | + *  | 
|---|
 | 929 | + * Of course, in carefully tuned systems, there might never be an  | 
|---|
 | 930 | + * interrupt or exception.  In that case, the RCU grace-period kthread  | 
|---|
 | 931 | + * will eventually cause one to happen.  However, in less carefully  | 
|---|
 | 932 | + * controlled environments, this function allows RCU to get what it  | 
|---|
 | 933 | + * needs without creating otherwise useless interruptions.  | 
|---|
 | 934 | + */  | 
|---|
 | 935 | +void __rcu_irq_enter_check_tick(void)  | 
|---|
 | 936 | +{  | 
|---|
 | 937 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 938 | +  | 
|---|
 | 939 | +	// If we're here from NMI there's nothing to do.  | 
|---|
 | 940 | +	if (in_nmi())  | 
|---|
 | 941 | +		return;  | 
|---|
 | 942 | +  | 
|---|
 | 943 | +	RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),  | 
|---|
 | 944 | +			 "Illegal rcu_irq_enter_check_tick() from extended quiescent state");  | 
|---|
 | 945 | +  | 
|---|
 | 946 | +	if (!tick_nohz_full_cpu(rdp->cpu) ||  | 
|---|
 | 947 | +	    !READ_ONCE(rdp->rcu_urgent_qs) ||  | 
|---|
 | 948 | +	    READ_ONCE(rdp->rcu_forced_tick)) {  | 
|---|
 | 949 | +		// RCU doesn't need nohz_full help from this CPU, or it is  | 
|---|
 | 950 | +		// already getting that help.  | 
|---|
 | 951 | +		return;  | 
|---|
 | 952 | +	}  | 
|---|
 | 953 | +  | 
|---|
 | 954 | +	// We get here only when not in an extended quiescent state and  | 
|---|
 | 955 | +	// from interrupts (as opposed to NMIs).  Therefore, (1) RCU is  | 
|---|
 | 956 | +	// already watching and (2) The fact that we are in an interrupt  | 
|---|
 | 957 | +	// handler and that the rcu_node lock is an irq-disabled lock  | 
|---|
 | 958 | +	// prevents self-deadlock.  So we can safely recheck under the lock.  | 
|---|
 | 959 | +	// Note that the nohz_full state currently cannot change.  | 
|---|
 | 960 | +	raw_spin_lock_rcu_node(rdp->mynode);  | 
|---|
 | 961 | +	if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {  | 
|---|
 | 962 | +		// A nohz_full CPU is in the kernel and RCU needs a  | 
|---|
 | 963 | +		// quiescent state.  Turn on the tick!  | 
|---|
 | 964 | +		WRITE_ONCE(rdp->rcu_forced_tick, true);  | 
|---|
 | 965 | +		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);  | 
|---|
 | 966 | +	}  | 
|---|
 | 967 | +	raw_spin_unlock_rcu_node(rdp->mynode);  | 
|---|
| 954 | 968 |  } | 
|---|
| 955 | 969 |  #endif /* CONFIG_NO_HZ_FULL */ | 
|---|
| 956 | 970 |   | 
|---|
| 957 | 971 |  /** | 
|---|
| 958 | 972 |   * rcu_nmi_enter - inform RCU of entry to NMI context | 
|---|
| 959 | 973 |   * | 
|---|
| 960 |  | - * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and  | 
|---|
| 961 |  | - * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know  | 
|---|
 | 974 | + * If the CPU was idle from RCU's viewpoint, update rdp->dynticks and  | 
|---|
 | 975 | + * rdp->dynticks_nmi_nesting to let the RCU grace-period handling know  | 
|---|
| 962 | 976 |   * that the CPU is active.  This implementation permits nested NMIs, as | 
|---|
| 963 | 977 |   * long as the nesting level does not overflow an int.  (You will probably | 
|---|
| 964 | 978 |   * run out of stack space first.) | 
|---|
| .. | .. | 
|---|
| 966 | 980 |   * If you add or remove a call to rcu_nmi_enter(), be sure to test | 
|---|
| 967 | 981 |   * with CONFIG_RCU_EQS_DEBUG=y. | 
|---|
| 968 | 982 |   */ | 
|---|
| 969 |  | -void rcu_nmi_enter(void)  | 
|---|
 | 983 | +noinstr void rcu_nmi_enter(void)  | 
|---|
| 970 | 984 |  { | 
|---|
| 971 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
| 972 | 985 |  	long incby = 2; | 
|---|
 | 986 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 973 | 987 |   | 
|---|
| 974 | 988 |  	/* Complain about underflow. */ | 
|---|
| 975 |  | -	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);  | 
|---|
 | 989 | +	WARN_ON_ONCE(rdp->dynticks_nmi_nesting < 0);  | 
|---|
| 976 | 990 |   | 
|---|
| 977 | 991 |  	/* | 
|---|
| 978 | 992 |  	 * If idle from RCU viewpoint, atomically increment ->dynticks | 
|---|
| .. | .. | 
|---|
| 983 | 997 |  	 * period (observation due to Andy Lutomirski). | 
|---|
| 984 | 998 |  	 */ | 
|---|
| 985 | 999 |  	if (rcu_dynticks_curr_cpu_in_eqs()) { | 
|---|
 | 1000 | +  | 
|---|
 | 1001 | +		if (!in_nmi())  | 
|---|
 | 1002 | +			rcu_dynticks_task_exit();  | 
|---|
 | 1003 | +  | 
|---|
 | 1004 | +		// RCU is not watching here ...  | 
|---|
| 986 | 1005 |  		rcu_dynticks_eqs_exit(); | 
|---|
 | 1006 | +		// ... but is watching here.  | 
|---|
 | 1007 | +  | 
|---|
 | 1008 | +		if (!in_nmi()) {  | 
|---|
 | 1009 | +			instrumentation_begin();  | 
|---|
 | 1010 | +			rcu_cleanup_after_idle();  | 
|---|
 | 1011 | +			instrumentation_end();  | 
|---|
 | 1012 | +		}  | 
|---|
 | 1013 | +  | 
|---|
 | 1014 | +		instrumentation_begin();  | 
|---|
 | 1015 | +		// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()  | 
|---|
 | 1016 | +		instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));  | 
|---|
 | 1017 | +		// instrumentation for the noinstr rcu_dynticks_eqs_exit()  | 
|---|
 | 1018 | +		instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));  | 
|---|
 | 1019 | +  | 
|---|
| 987 | 1020 |  		incby = 1; | 
|---|
 | 1021 | +	} else if (!in_nmi()) {  | 
|---|
 | 1022 | +		instrumentation_begin();  | 
|---|
 | 1023 | +		rcu_irq_enter_check_tick();  | 
|---|
 | 1024 | +	} else  {  | 
|---|
 | 1025 | +		instrumentation_begin();  | 
|---|
| 988 | 1026 |  	} | 
|---|
 | 1027 | +  | 
|---|
| 989 | 1028 |  	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), | 
|---|
| 990 |  | -			  rdtp->dynticks_nmi_nesting,  | 
|---|
| 991 |  | -			  rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks);  | 
|---|
| 992 |  | -	WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */  | 
|---|
| 993 |  | -		   rdtp->dynticks_nmi_nesting + incby);  | 
|---|
 | 1029 | +			  rdp->dynticks_nmi_nesting,  | 
|---|
 | 1030 | +			  rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));  | 
|---|
 | 1031 | +	instrumentation_end();  | 
|---|
 | 1032 | +	WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */  | 
|---|
 | 1033 | +		   rdp->dynticks_nmi_nesting + incby);  | 
|---|
| 994 | 1034 |  	barrier(); | 
|---|
| 995 | 1035 |  } | 
|---|
| 996 | 1036 |   | 
|---|
| .. | .. | 
|---|
| 1016 | 1056 |   * If you add or remove a call to rcu_irq_enter(), be sure to test with | 
|---|
| 1017 | 1057 |   * CONFIG_RCU_EQS_DEBUG=y. | 
|---|
| 1018 | 1058 |   */ | 
|---|
| 1019 |  | -void rcu_irq_enter(void)  | 
|---|
 | 1059 | +noinstr void rcu_irq_enter(void)  | 
|---|
| 1020 | 1060 |  { | 
|---|
| 1021 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
| 1022 |  | -  | 
|---|
| 1023 | 1061 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 1024 |  | -	if (rdtp->dynticks_nmi_nesting == 0)  | 
|---|
| 1025 |  | -		rcu_dynticks_task_exit();  | 
|---|
| 1026 | 1062 |  	rcu_nmi_enter(); | 
|---|
| 1027 |  | -	if (rdtp->dynticks_nmi_nesting == 1)  | 
|---|
| 1028 |  | -		rcu_cleanup_after_idle();  | 
|---|
| 1029 | 1063 |  } | 
|---|
| 1030 | 1064 |   | 
|---|
| 1031 | 1065 |  /* | 
|---|
| .. | .. | 
|---|
| 1043 | 1077 |  	local_irq_restore(flags); | 
|---|
| 1044 | 1078 |  } | 
|---|
| 1045 | 1079 |   | 
|---|
 | 1080 | +/*  | 
|---|
 | 1081 | + * If any sort of urgency was applied to the current CPU (for example,  | 
|---|
 | 1082 | + * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order  | 
|---|
 | 1083 | + * to get to a quiescent state, disable it.  | 
|---|
 | 1084 | + */  | 
|---|
 | 1085 | +static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)  | 
|---|
 | 1086 | +{  | 
|---|
 | 1087 | +	raw_lockdep_assert_held_rcu_node(rdp->mynode);  | 
|---|
 | 1088 | +	WRITE_ONCE(rdp->rcu_urgent_qs, false);  | 
|---|
 | 1089 | +	WRITE_ONCE(rdp->rcu_need_heavy_qs, false);  | 
|---|
 | 1090 | +	if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {  | 
|---|
 | 1091 | +		tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);  | 
|---|
 | 1092 | +		WRITE_ONCE(rdp->rcu_forced_tick, false);  | 
|---|
 | 1093 | +	}  | 
|---|
 | 1094 | +}  | 
|---|
 | 1095 | +  | 
|---|
| 1046 | 1096 |  /** | 
|---|
| 1047 |  | - * rcu_is_watching - see if RCU thinks that the current CPU is idle  | 
|---|
 | 1097 | + * rcu_is_watching - see if RCU thinks that the current CPU is not idle  | 
|---|
| 1048 | 1098 |   * | 
|---|
| 1049 | 1099 |   * Return true if RCU is watching the running CPU, which means that this | 
|---|
| 1050 | 1100 |   * CPU can safely enter RCU read-side critical sections.  In other words, | 
|---|
| 1051 |  | - * if the current CPU is in its idle loop and is neither in an interrupt  | 
|---|
| 1052 |  | - * or NMI handler, return true.  | 
|---|
 | 1101 | + * if the current CPU is not in its idle loop or is in an interrupt or  | 
|---|
 | 1102 | + * NMI handler, return true.  | 
|---|
 | 1103 | + *  | 
|---|
 | 1104 | + * Make notrace because it can be called by the internal functions of  | 
|---|
 | 1105 | + * ftrace, and making this notrace removes unnecessary recursion calls.  | 
|---|
| 1053 | 1106 |   */ | 
|---|
| 1054 |  | -bool notrace rcu_is_watching(void)  | 
|---|
 | 1107 | +notrace bool rcu_is_watching(void)  | 
|---|
| 1055 | 1108 |  { | 
|---|
| 1056 | 1109 |  	bool ret; | 
|---|
| 1057 | 1110 |   | 
|---|
| .. | .. | 
|---|
| 1077 | 1130 |  	cpu = task_cpu(t); | 
|---|
| 1078 | 1131 |  	if (!task_curr(t)) | 
|---|
| 1079 | 1132 |  		return; /* This task is not running on that CPU. */ | 
|---|
| 1080 |  | -	smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true);  | 
|---|
 | 1133 | +	smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);  | 
|---|
| 1081 | 1134 |  } | 
|---|
| 1082 | 1135 |   | 
|---|
| 1083 | 1136 |  #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) | 
|---|
| .. | .. | 
|---|
| 1088 | 1141 |   * Disable preemption to avoid false positives that could otherwise | 
|---|
| 1089 | 1142 |   * happen due to the current CPU number being sampled, this task being | 
|---|
| 1090 | 1143 |   * preempted, its old CPU being taken offline, resuming on some other CPU, | 
|---|
| 1091 |  | - * then determining that its old CPU is now offline.  Because there are  | 
|---|
| 1092 |  | - * multiple flavors of RCU, and because this function can be called in the  | 
|---|
| 1093 |  | - * midst of updating the flavors while a given CPU coming online or going  | 
|---|
| 1094 |  | - * offline, it is necessary to check all flavors.  If any of the flavors  | 
|---|
| 1095 |  | - * believe that given CPU is online, it is considered to be online.  | 
|---|
 | 1144 | + * then determining that its old CPU is now offline.  | 
|---|
| 1096 | 1145 |   * | 
|---|
| 1097 | 1146 |   * Disable checking if in an NMI handler because we cannot safely | 
|---|
| 1098 | 1147 |   * report errors from NMI handlers anyway.  In addition, it is OK to use | 
|---|
| .. | .. | 
|---|
| 1103 | 1152 |  { | 
|---|
| 1104 | 1153 |  	struct rcu_data *rdp; | 
|---|
| 1105 | 1154 |  	struct rcu_node *rnp; | 
|---|
| 1106 |  | -	struct rcu_state *rsp;  | 
|---|
 | 1155 | +	bool ret = false;  | 
|---|
| 1107 | 1156 |   | 
|---|
| 1108 | 1157 |  	if (in_nmi() || !rcu_scheduler_fully_active) | 
|---|
| 1109 | 1158 |  		return true; | 
|---|
| 1110 |  | -	preempt_disable();  | 
|---|
| 1111 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 1112 |  | -		rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 1113 |  | -		rnp = rdp->mynode;  | 
|---|
| 1114 |  | -		if (rdp->grpmask & rcu_rnp_online_cpus(rnp)) {  | 
|---|
| 1115 |  | -			preempt_enable();  | 
|---|
| 1116 |  | -			return true;  | 
|---|
| 1117 |  | -		}  | 
|---|
| 1118 |  | -	}  | 
|---|
| 1119 |  | -	preempt_enable();  | 
|---|
| 1120 |  | -	return false;  | 
|---|
 | 1159 | +	preempt_disable_notrace();  | 
|---|
 | 1160 | +	rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 1161 | +	rnp = rdp->mynode;  | 
|---|
 | 1162 | +	if (rdp->grpmask & rcu_rnp_online_cpus(rnp))  | 
|---|
 | 1163 | +		ret = true;  | 
|---|
 | 1164 | +	preempt_enable_notrace();  | 
|---|
 | 1165 | +	return ret;  | 
|---|
| 1121 | 1166 |  } | 
|---|
| 1122 | 1167 |  EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); | 
|---|
| 1123 | 1168 |   | 
|---|
| 1124 | 1169 |  #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ | 
|---|
| 1125 |  | -  | 
|---|
| 1126 |  | -/**  | 
|---|
| 1127 |  | - * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle  | 
|---|
| 1128 |  | - *  | 
|---|
| 1129 |  | - * If the current CPU is idle or running at a first-level (not nested)  | 
|---|
| 1130 |  | - * interrupt from idle, return true.  The caller must have at least  | 
|---|
| 1131 |  | - * disabled preemption.  | 
|---|
| 1132 |  | - */  | 
|---|
| 1133 |  | -static int rcu_is_cpu_rrupt_from_idle(void)  | 
|---|
| 1134 |  | -{  | 
|---|
| 1135 |  | -	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&  | 
|---|
| 1136 |  | -	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;  | 
|---|
| 1137 |  | -}  | 
|---|
| 1138 | 1170 |   | 
|---|
| 1139 | 1171 |  /* | 
|---|
| 1140 | 1172 |   * We are reporting a quiescent state on behalf of some other CPU, so | 
|---|
| .. | .. | 
|---|
| 1160 | 1192 |   */ | 
|---|
| 1161 | 1193 |  static int dyntick_save_progress_counter(struct rcu_data *rdp) | 
|---|
| 1162 | 1194 |  { | 
|---|
| 1163 |  | -	rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);  | 
|---|
 | 1195 | +	rdp->dynticks_snap = rcu_dynticks_snap(rdp);  | 
|---|
| 1164 | 1196 |  	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { | 
|---|
| 1165 |  | -		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti"));  | 
|---|
 | 1197 | +		trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));  | 
|---|
| 1166 | 1198 |  		rcu_gpnum_ovf(rdp->mynode, rdp); | 
|---|
| 1167 | 1199 |  		return 1; | 
|---|
| 1168 | 1200 |  	} | 
|---|
| 1169 | 1201 |  	return 0; | 
|---|
| 1170 |  | -}  | 
|---|
| 1171 |  | -  | 
|---|
| 1172 |  | -/*  | 
|---|
| 1173 |  | - * Handler for the irq_work request posted when a grace period has  | 
|---|
| 1174 |  | - * gone on for too long, but not yet long enough for an RCU CPU  | 
|---|
| 1175 |  | - * stall warning.  Set state appropriately, but just complain if  | 
|---|
| 1176 |  | - * there is unexpected state on entry.  | 
|---|
| 1177 |  | - */  | 
|---|
| 1178 |  | -static void rcu_iw_handler(struct irq_work *iwp)  | 
|---|
| 1179 |  | -{  | 
|---|
| 1180 |  | -	struct rcu_data *rdp;  | 
|---|
| 1181 |  | -	struct rcu_node *rnp;  | 
|---|
| 1182 |  | -  | 
|---|
| 1183 |  | -	rdp = container_of(iwp, struct rcu_data, rcu_iw);  | 
|---|
| 1184 |  | -	rnp = rdp->mynode;  | 
|---|
| 1185 |  | -	raw_spin_lock_rcu_node(rnp);  | 
|---|
| 1186 |  | -	if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {  | 
|---|
| 1187 |  | -		rdp->rcu_iw_gp_seq = rnp->gp_seq;  | 
|---|
| 1188 |  | -		rdp->rcu_iw_pending = false;  | 
|---|
| 1189 |  | -	}  | 
|---|
| 1190 |  | -	raw_spin_unlock_rcu_node(rnp);  | 
|---|
| 1191 | 1202 |  } | 
|---|
| 1192 | 1203 |   | 
|---|
| 1193 | 1204 |  /* | 
|---|
| .. | .. | 
|---|
| 1211 | 1222 |  	 * read-side critical section that started before the beginning | 
|---|
| 1212 | 1223 |  	 * of the current RCU grace period. | 
|---|
| 1213 | 1224 |  	 */ | 
|---|
| 1214 |  | -	if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {  | 
|---|
| 1215 |  | -		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti"));  | 
|---|
| 1216 |  | -		rdp->dynticks_fqs++;  | 
|---|
 | 1225 | +	if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) {  | 
|---|
 | 1226 | +		trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));  | 
|---|
| 1217 | 1227 |  		rcu_gpnum_ovf(rnp, rdp); | 
|---|
| 1218 | 1228 |  		return 1; | 
|---|
| 1219 | 1229 |  	} | 
|---|
| 1220 | 1230 |   | 
|---|
| 1221 | 1231 |  	/* | 
|---|
| 1222 |  | -	 * Has this CPU encountered a cond_resched() since the beginning  | 
|---|
| 1223 |  | -	 * of the grace period?  For this to be the case, the CPU has to  | 
|---|
| 1224 |  | -	 * have noticed the current grace period.  This might not be the  | 
|---|
| 1225 |  | -	 * case for nohz_full CPUs looping in the kernel.  | 
|---|
 | 1232 | +	 * Complain if a CPU that is considered to be offline from RCU's  | 
|---|
 | 1233 | +	 * perspective has not yet reported a quiescent state.  After all,  | 
|---|
 | 1234 | +	 * the offline CPU should have reported a quiescent state during  | 
|---|
 | 1235 | +	 * the CPU-offline process, or, failing that, by rcu_gp_init()  | 
|---|
 | 1236 | +	 * if it ran concurrently with either the CPU going offline or the  | 
|---|
 | 1237 | +	 * last task on a leaf rcu_node structure exiting its RCU read-side  | 
|---|
 | 1238 | +	 * critical section while all CPUs corresponding to that structure  | 
|---|
 | 1239 | +	 * are offline.  This added warning detects bugs in any of these  | 
|---|
 | 1240 | +	 * code paths.  | 
|---|
 | 1241 | +	 *  | 
|---|
 | 1242 | +	 * The rcu_node structure's ->lock is held here, which excludes  | 
|---|
 | 1243 | +	 * the relevant portions the CPU-hotplug code, the grace-period  | 
|---|
 | 1244 | +	 * initialization code, and the rcu_read_unlock() code paths.  | 
|---|
 | 1245 | +	 *  | 
|---|
 | 1246 | +	 * For more detail, please refer to the "Hotplug CPU" section  | 
|---|
 | 1247 | +	 * of RCU's Requirements documentation.  | 
|---|
| 1226 | 1248 |  	 */ | 
|---|
| 1227 |  | -	jtsq = jiffies_till_sched_qs;  | 
|---|
| 1228 |  | -	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);  | 
|---|
| 1229 |  | -	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&  | 
|---|
| 1230 |  | -	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&  | 
|---|
| 1231 |  | -	    rcu_seq_current(&rdp->gp_seq) == rnp->gp_seq && !rdp->gpwrap) {  | 
|---|
| 1232 |  | -		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("rqc"));  | 
|---|
| 1233 |  | -		rcu_gpnum_ovf(rnp, rdp);  | 
|---|
| 1234 |  | -		return 1;  | 
|---|
| 1235 |  | -	} else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {  | 
|---|
| 1236 |  | -		/* Load rcu_qs_ctr before store to rcu_urgent_qs. */  | 
|---|
| 1237 |  | -		smp_store_release(ruqp, true);  | 
|---|
| 1238 |  | -	}  | 
|---|
| 1239 |  | -  | 
|---|
| 1240 |  | -	/* If waiting too long on an offline CPU, complain. */  | 
|---|
| 1241 |  | -	if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&  | 
|---|
| 1242 |  | -	    time_after(jiffies, rdp->rsp->gp_start + HZ)) {  | 
|---|
 | 1249 | +	if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) {  | 
|---|
| 1243 | 1250 |  		bool onl; | 
|---|
| 1244 | 1251 |  		struct rcu_node *rnp1; | 
|---|
| 1245 | 1252 |   | 
|---|
| 1246 |  | -		WARN_ON(1);  /* Offline CPUs are supposed to report QS! */  | 
|---|
| 1247 | 1253 |  		pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", | 
|---|
| 1248 | 1254 |  			__func__, rnp->grplo, rnp->grphi, rnp->level, | 
|---|
| 1249 | 1255 |  			(long)rnp->gp_seq, (long)rnp->completedqs); | 
|---|
| .. | .. | 
|---|
| 1260 | 1266 |   | 
|---|
| 1261 | 1267 |  	/* | 
|---|
| 1262 | 1268 |  	 * A CPU running for an extended time within the kernel can | 
|---|
| 1263 |  | -	 * delay RCU grace periods.  When the CPU is in NO_HZ_FULL mode,  | 
|---|
| 1264 |  | -	 * even context-switching back and forth between a pair of  | 
|---|
| 1265 |  | -	 * in-kernel CPU-bound tasks cannot advance grace periods.  | 
|---|
| 1266 |  | -	 * So if the grace period is old enough, make the CPU pay attention.  | 
|---|
| 1267 |  | -	 * Note that the unsynchronized assignments to the per-CPU  | 
|---|
| 1268 |  | -	 * rcu_need_heavy_qs variable are safe.  Yes, setting of  | 
|---|
| 1269 |  | -	 * bits can be lost, but they will be set again on the next  | 
|---|
| 1270 |  | -	 * force-quiescent-state pass.  So lost bit sets do not result  | 
|---|
| 1271 |  | -	 * in incorrect behavior, merely in a grace period lasting  | 
|---|
| 1272 |  | -	 * a few jiffies longer than it might otherwise.  Because  | 
|---|
| 1273 |  | -	 * there are at most four threads involved, and because the  | 
|---|
| 1274 |  | -	 * updates are only once every few jiffies, the probability of  | 
|---|
| 1275 |  | -	 * lossage (and thus of slight grace-period extension) is  | 
|---|
| 1276 |  | -	 * quite low.  | 
|---|
 | 1269 | +	 * delay RCU grace periods: (1) At age jiffies_to_sched_qs,  | 
|---|
 | 1270 | +	 * set .rcu_urgent_qs, (2) At age 2*jiffies_to_sched_qs, set  | 
|---|
 | 1271 | +	 * both .rcu_need_heavy_qs and .rcu_urgent_qs.  Note that the  | 
|---|
 | 1272 | +	 * unsynchronized assignments to the per-CPU rcu_need_heavy_qs  | 
|---|
 | 1273 | +	 * variable are safe because the assignments are repeated if this  | 
|---|
 | 1274 | +	 * CPU failed to pass through a quiescent state.  This code  | 
|---|
 | 1275 | +	 * also checks .jiffies_resched in case jiffies_to_sched_qs  | 
|---|
 | 1276 | +	 * is set way high.  | 
|---|
| 1277 | 1277 |  	 */ | 
|---|
| 1278 |  | -	rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);  | 
|---|
 | 1278 | +	jtsq = READ_ONCE(jiffies_to_sched_qs);  | 
|---|
 | 1279 | +	ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu);  | 
|---|
 | 1280 | +	rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu);  | 
|---|
| 1279 | 1281 |  	if (!READ_ONCE(*rnhqp) && | 
|---|
| 1280 |  | -	    (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||  | 
|---|
| 1281 |  | -	     time_after(jiffies, rdp->rsp->jiffies_resched))) {  | 
|---|
 | 1282 | +	    (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||  | 
|---|
 | 1283 | +	     time_after(jiffies, rcu_state.jiffies_resched) ||  | 
|---|
 | 1284 | +	     rcu_state.cbovld)) {  | 
|---|
| 1282 | 1285 |  		WRITE_ONCE(*rnhqp, true); | 
|---|
| 1283 | 1286 |  		/* Store rcu_need_heavy_qs before rcu_urgent_qs. */ | 
|---|
| 1284 | 1287 |  		smp_store_release(ruqp, true); | 
|---|
| 1285 |  | -		rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */  | 
|---|
 | 1288 | +	} else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {  | 
|---|
 | 1289 | +		WRITE_ONCE(*ruqp, true);  | 
|---|
| 1286 | 1290 |  	} | 
|---|
| 1287 | 1291 |   | 
|---|
| 1288 | 1292 |  	/* | 
|---|
| 1289 |  | -	 * If more than halfway to RCU CPU stall-warning time, do a  | 
|---|
| 1290 |  | -	 * resched_cpu() to try to loosen things up a bit.  Also check to  | 
|---|
| 1291 |  | -	 * see if the CPU is getting hammered with interrupts, but only  | 
|---|
| 1292 |  | -	 * once per grace period, just to keep the IPIs down to a dull roar.  | 
|---|
 | 1293 | +	 * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq!  | 
|---|
 | 1294 | +	 * The above code handles this, but only for straight cond_resched().  | 
|---|
 | 1295 | +	 * And some in-kernel loops check need_resched() before calling  | 
|---|
 | 1296 | +	 * cond_resched(), which defeats the above code for CPUs that are  | 
|---|
 | 1297 | +	 * running in-kernel with scheduling-clock interrupts disabled.  | 
|---|
 | 1298 | +	 * So hit them over the head with the resched_cpu() hammer!  | 
|---|
| 1293 | 1299 |  	 */ | 
|---|
| 1294 |  | -	if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) {  | 
|---|
 | 1300 | +	if (tick_nohz_full_cpu(rdp->cpu) &&  | 
|---|
 | 1301 | +	    (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||  | 
|---|
 | 1302 | +	     rcu_state.cbovld)) {  | 
|---|
 | 1303 | +		WRITE_ONCE(*ruqp, true);  | 
|---|
| 1295 | 1304 |  		resched_cpu(rdp->cpu); | 
|---|
 | 1305 | +		WRITE_ONCE(rdp->last_fqs_resched, jiffies);  | 
|---|
 | 1306 | +	}  | 
|---|
 | 1307 | +  | 
|---|
 | 1308 | +	/*  | 
|---|
 | 1309 | +	 * If more than halfway to RCU CPU stall-warning time, invoke  | 
|---|
 | 1310 | +	 * resched_cpu() more frequently to try to loosen things up a bit.  | 
|---|
 | 1311 | +	 * Also check to see if the CPU is getting hammered with interrupts,  | 
|---|
 | 1312 | +	 * but only once per grace period, just to keep the IPIs down to  | 
|---|
 | 1313 | +	 * a dull roar.  | 
|---|
 | 1314 | +	 */  | 
|---|
 | 1315 | +	if (time_after(jiffies, rcu_state.jiffies_resched)) {  | 
|---|
 | 1316 | +		if (time_after(jiffies,  | 
|---|
 | 1317 | +			       READ_ONCE(rdp->last_fqs_resched) + jtsq)) {  | 
|---|
 | 1318 | +			resched_cpu(rdp->cpu);  | 
|---|
 | 1319 | +			WRITE_ONCE(rdp->last_fqs_resched, jiffies);  | 
|---|
 | 1320 | +		}  | 
|---|
| 1296 | 1321 |  		if (IS_ENABLED(CONFIG_IRQ_WORK) && | 
|---|
| 1297 | 1322 |  		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && | 
|---|
| 1298 | 1323 |  		    (rnp->ffmask & rdp->grpmask)) { | 
|---|
| 1299 | 1324 |  			init_irq_work(&rdp->rcu_iw, rcu_iw_handler); | 
|---|
| 1300 |  | -			rdp->rcu_iw.flags = IRQ_WORK_HARD_IRQ;  | 
|---|
 | 1325 | +			atomic_set(&rdp->rcu_iw.flags, IRQ_WORK_HARD_IRQ);  | 
|---|
| 1301 | 1326 |  			rdp->rcu_iw_pending = true; | 
|---|
| 1302 | 1327 |  			rdp->rcu_iw_gp_seq = rnp->gp_seq; | 
|---|
| 1303 | 1328 |  			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); | 
|---|
| .. | .. | 
|---|
| 1307 | 1332 |  	return 0; | 
|---|
| 1308 | 1333 |  } | 
|---|
| 1309 | 1334 |   | 
|---|
| 1310 |  | -static void record_gp_stall_check_time(struct rcu_state *rsp)  | 
|---|
| 1311 |  | -{  | 
|---|
| 1312 |  | -	unsigned long j = jiffies;  | 
|---|
| 1313 |  | -	unsigned long j1;  | 
|---|
| 1314 |  | -  | 
|---|
| 1315 |  | -	rsp->gp_start = j;  | 
|---|
| 1316 |  | -	j1 = rcu_jiffies_till_stall_check();  | 
|---|
| 1317 |  | -	/* Record ->gp_start before ->jiffies_stall. */  | 
|---|
| 1318 |  | -	smp_store_release(&rsp->jiffies_stall, j + j1); /* ^^^ */  | 
|---|
| 1319 |  | -	rsp->jiffies_resched = j + j1 / 2;  | 
|---|
| 1320 |  | -	rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs);  | 
|---|
| 1321 |  | -}  | 
|---|
| 1322 |  | -  | 
|---|
| 1323 |  | -/*  | 
|---|
| 1324 |  | - * Convert a ->gp_state value to a character string.  | 
|---|
| 1325 |  | - */  | 
|---|
| 1326 |  | -static const char *gp_state_getname(short gs)  | 
|---|
| 1327 |  | -{  | 
|---|
| 1328 |  | -	if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))  | 
|---|
| 1329 |  | -		return "???";  | 
|---|
| 1330 |  | -	return gp_state_names[gs];  | 
|---|
| 1331 |  | -}  | 
|---|
| 1332 |  | -  | 
|---|
| 1333 |  | -/*  | 
|---|
| 1334 |  | - * Complain about starvation of grace-period kthread.  | 
|---|
| 1335 |  | - */  | 
|---|
| 1336 |  | -static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)  | 
|---|
| 1337 |  | -{  | 
|---|
| 1338 |  | -	unsigned long gpa;  | 
|---|
| 1339 |  | -	unsigned long j;  | 
|---|
| 1340 |  | -  | 
|---|
| 1341 |  | -	j = jiffies;  | 
|---|
| 1342 |  | -	gpa = READ_ONCE(rsp->gp_activity);  | 
|---|
| 1343 |  | -	if (j - gpa > 2 * HZ) {  | 
|---|
| 1344 |  | -		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",  | 
|---|
| 1345 |  | -		       rsp->name, j - gpa,  | 
|---|
| 1346 |  | -		       (long)rcu_seq_current(&rsp->gp_seq),  | 
|---|
| 1347 |  | -		       rsp->gp_flags,  | 
|---|
| 1348 |  | -		       gp_state_getname(rsp->gp_state), rsp->gp_state,  | 
|---|
| 1349 |  | -		       rsp->gp_kthread ? rsp->gp_kthread->state : ~0,  | 
|---|
| 1350 |  | -		       rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);  | 
|---|
| 1351 |  | -		if (rsp->gp_kthread) {  | 
|---|
| 1352 |  | -			pr_err("RCU grace-period kthread stack dump:\n");  | 
|---|
| 1353 |  | -			sched_show_task(rsp->gp_kthread);  | 
|---|
| 1354 |  | -			wake_up_process(rsp->gp_kthread);  | 
|---|
| 1355 |  | -		}  | 
|---|
| 1356 |  | -	}  | 
|---|
| 1357 |  | -}  | 
|---|
| 1358 |  | -  | 
|---|
| 1359 |  | -/*  | 
|---|
| 1360 |  | - * Dump stacks of all tasks running on stalled CPUs.  First try using  | 
|---|
| 1361 |  | - * NMIs, but fall back to manual remote stack tracing on architectures  | 
|---|
| 1362 |  | - * that don't support NMI-based stack dumps.  The NMI-triggered stack  | 
|---|
| 1363 |  | - * traces are more accurate because they are printed by the target CPU.  | 
|---|
| 1364 |  | - */  | 
|---|
| 1365 |  | -static void rcu_dump_cpu_stacks(struct rcu_state *rsp)  | 
|---|
| 1366 |  | -{  | 
|---|
| 1367 |  | -	int cpu;  | 
|---|
| 1368 |  | -	unsigned long flags;  | 
|---|
| 1369 |  | -	struct rcu_node *rnp;  | 
|---|
| 1370 |  | -  | 
|---|
| 1371 |  | -	rcu_for_each_leaf_node(rsp, rnp) {  | 
|---|
| 1372 |  | -		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 1373 |  | -		for_each_leaf_node_possible_cpu(rnp, cpu)  | 
|---|
| 1374 |  | -			if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))  | 
|---|
| 1375 |  | -				if (!trigger_single_cpu_backtrace(cpu))  | 
|---|
| 1376 |  | -					dump_cpu_task(cpu);  | 
|---|
| 1377 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 1378 |  | -	}  | 
|---|
| 1379 |  | -}  | 
|---|
| 1380 |  | -  | 
|---|
| 1381 |  | -/*  | 
|---|
| 1382 |  | - * If too much time has passed in the current grace period, and if  | 
|---|
| 1383 |  | - * so configured, go kick the relevant kthreads.  | 
|---|
| 1384 |  | - */  | 
|---|
| 1385 |  | -static void rcu_stall_kick_kthreads(struct rcu_state *rsp)  | 
|---|
| 1386 |  | -{  | 
|---|
| 1387 |  | -	unsigned long j;  | 
|---|
| 1388 |  | -  | 
|---|
| 1389 |  | -	if (!rcu_kick_kthreads)  | 
|---|
| 1390 |  | -		return;  | 
|---|
| 1391 |  | -	j = READ_ONCE(rsp->jiffies_kick_kthreads);  | 
|---|
| 1392 |  | -	if (time_after(jiffies, j) && rsp->gp_kthread &&  | 
|---|
| 1393 |  | -	    (rcu_gp_in_progress(rsp) || READ_ONCE(rsp->gp_flags))) {  | 
|---|
| 1394 |  | -		WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);  | 
|---|
| 1395 |  | -		rcu_ftrace_dump(DUMP_ALL);  | 
|---|
| 1396 |  | -		wake_up_process(rsp->gp_kthread);  | 
|---|
| 1397 |  | -		WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ);  | 
|---|
| 1398 |  | -	}  | 
|---|
| 1399 |  | -}  | 
|---|
| 1400 |  | -  | 
|---|
| 1401 |  | -static void panic_on_rcu_stall(void)  | 
|---|
| 1402 |  | -{  | 
|---|
| 1403 |  | -	if (sysctl_panic_on_rcu_stall)  | 
|---|
| 1404 |  | -		panic("RCU Stall\n");  | 
|---|
| 1405 |  | -}  | 
|---|
| 1406 |  | -  | 
|---|
| 1407 |  | -static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq)  | 
|---|
| 1408 |  | -{  | 
|---|
| 1409 |  | -	int cpu;  | 
|---|
| 1410 |  | -	unsigned long flags;  | 
|---|
| 1411 |  | -	unsigned long gpa;  | 
|---|
| 1412 |  | -	unsigned long j;  | 
|---|
| 1413 |  | -	int ndetected = 0;  | 
|---|
| 1414 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
| 1415 |  | -	long totqlen = 0;  | 
|---|
| 1416 |  | -  | 
|---|
| 1417 |  | -	/* Kick and suppress, if so configured. */  | 
|---|
| 1418 |  | -	rcu_stall_kick_kthreads(rsp);  | 
|---|
| 1419 |  | -	if (rcu_cpu_stall_suppress)  | 
|---|
| 1420 |  | -		return;  | 
|---|
| 1421 |  | -  | 
|---|
| 1422 |  | -	/*  | 
|---|
| 1423 |  | -	 * OK, time to rat on our buddy...  | 
|---|
| 1424 |  | -	 * See Documentation/RCU/stallwarn.txt for info on how to debug  | 
|---|
| 1425 |  | -	 * RCU CPU stall warnings.  | 
|---|
| 1426 |  | -	 */  | 
|---|
| 1427 |  | -	pr_err("INFO: %s detected stalls on CPUs/tasks:", rsp->name);  | 
|---|
| 1428 |  | -	print_cpu_stall_info_begin();  | 
|---|
| 1429 |  | -	rcu_for_each_leaf_node(rsp, rnp) {  | 
|---|
| 1430 |  | -		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 1431 |  | -		ndetected += rcu_print_task_stall(rnp);  | 
|---|
| 1432 |  | -		if (rnp->qsmask != 0) {  | 
|---|
| 1433 |  | -			for_each_leaf_node_possible_cpu(rnp, cpu)  | 
|---|
| 1434 |  | -				if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {  | 
|---|
| 1435 |  | -					print_cpu_stall_info(rsp, cpu);  | 
|---|
| 1436 |  | -					ndetected++;  | 
|---|
| 1437 |  | -				}  | 
|---|
| 1438 |  | -		}  | 
|---|
| 1439 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 1440 |  | -	}  | 
|---|
| 1441 |  | -  | 
|---|
| 1442 |  | -	print_cpu_stall_info_end();  | 
|---|
| 1443 |  | -	for_each_possible_cpu(cpu)  | 
|---|
| 1444 |  | -		totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,  | 
|---|
| 1445 |  | -							    cpu)->cblist);  | 
|---|
| 1446 |  | -	pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",  | 
|---|
| 1447 |  | -	       smp_processor_id(), (long)(jiffies - rsp->gp_start),  | 
|---|
| 1448 |  | -	       (long)rcu_seq_current(&rsp->gp_seq), totqlen);  | 
|---|
| 1449 |  | -	if (ndetected) {  | 
|---|
| 1450 |  | -		rcu_dump_cpu_stacks(rsp);  | 
|---|
| 1451 |  | -  | 
|---|
| 1452 |  | -		/* Complain about tasks blocking the grace period. */  | 
|---|
| 1453 |  | -		rcu_print_detail_task_stall(rsp);  | 
|---|
| 1454 |  | -	} else {  | 
|---|
| 1455 |  | -		if (rcu_seq_current(&rsp->gp_seq) != gp_seq) {  | 
|---|
| 1456 |  | -			pr_err("INFO: Stall ended before state dump start\n");  | 
|---|
| 1457 |  | -		} else {  | 
|---|
| 1458 |  | -			j = jiffies;  | 
|---|
| 1459 |  | -			gpa = READ_ONCE(rsp->gp_activity);  | 
|---|
| 1460 |  | -			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",  | 
|---|
| 1461 |  | -			       rsp->name, j - gpa, j, gpa,  | 
|---|
| 1462 |  | -			       jiffies_till_next_fqs,  | 
|---|
| 1463 |  | -			       rcu_get_root(rsp)->qsmask);  | 
|---|
| 1464 |  | -			/* In this case, the current CPU might be at fault. */  | 
|---|
| 1465 |  | -			sched_show_task(current);  | 
|---|
| 1466 |  | -		}  | 
|---|
| 1467 |  | -	}  | 
|---|
| 1468 |  | -	/* Rewrite if needed in case of slow consoles. */  | 
|---|
| 1469 |  | -	if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))  | 
|---|
| 1470 |  | -		WRITE_ONCE(rsp->jiffies_stall,  | 
|---|
| 1471 |  | -			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);  | 
|---|
| 1472 |  | -  | 
|---|
| 1473 |  | -	rcu_check_gp_kthread_starvation(rsp);  | 
|---|
| 1474 |  | -  | 
|---|
| 1475 |  | -	atomic_notifier_call_chain(&rcu_stall_notifier_list, 0, NULL);  | 
|---|
| 1476 |  | -  | 
|---|
| 1477 |  | -	panic_on_rcu_stall();  | 
|---|
| 1478 |  | -  | 
|---|
| 1479 |  | -	force_quiescent_state(rsp);  /* Kick them all. */  | 
|---|
| 1480 |  | -}  | 
|---|
| 1481 |  | -  | 
|---|
| 1482 |  | -static void print_cpu_stall(struct rcu_state *rsp)  | 
|---|
| 1483 |  | -{  | 
|---|
| 1484 |  | -	int cpu;  | 
|---|
| 1485 |  | -	unsigned long flags;  | 
|---|
| 1486 |  | -	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 1487 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
| 1488 |  | -	long totqlen = 0;  | 
|---|
| 1489 |  | -  | 
|---|
| 1490 |  | -	/* Kick and suppress, if so configured. */  | 
|---|
| 1491 |  | -	rcu_stall_kick_kthreads(rsp);  | 
|---|
| 1492 |  | -	if (rcu_cpu_stall_suppress)  | 
|---|
| 1493 |  | -		return;  | 
|---|
| 1494 |  | -  | 
|---|
| 1495 |  | -	/*  | 
|---|
| 1496 |  | -	 * OK, time to rat on ourselves...  | 
|---|
| 1497 |  | -	 * See Documentation/RCU/stallwarn.txt for info on how to debug  | 
|---|
| 1498 |  | -	 * RCU CPU stall warnings.  | 
|---|
| 1499 |  | -	 */  | 
|---|
| 1500 |  | -	pr_err("INFO: %s self-detected stall on CPU", rsp->name);  | 
|---|
| 1501 |  | -	print_cpu_stall_info_begin();  | 
|---|
| 1502 |  | -	raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);  | 
|---|
| 1503 |  | -	print_cpu_stall_info(rsp, smp_processor_id());  | 
|---|
| 1504 |  | -	raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);  | 
|---|
| 1505 |  | -	print_cpu_stall_info_end();  | 
|---|
| 1506 |  | -	for_each_possible_cpu(cpu)  | 
|---|
| 1507 |  | -		totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,  | 
|---|
| 1508 |  | -							    cpu)->cblist);  | 
|---|
| 1509 |  | -	pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",  | 
|---|
| 1510 |  | -		jiffies - rsp->gp_start,  | 
|---|
| 1511 |  | -		(long)rcu_seq_current(&rsp->gp_seq), totqlen);  | 
|---|
| 1512 |  | -  | 
|---|
| 1513 |  | -	rcu_check_gp_kthread_starvation(rsp);  | 
|---|
| 1514 |  | -  | 
|---|
| 1515 |  | -	rcu_dump_cpu_stacks(rsp);  | 
|---|
| 1516 |  | -  | 
|---|
| 1517 |  | -	raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 1518 |  | -	/* Rewrite if needed in case of slow consoles. */  | 
|---|
| 1519 |  | -	if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))  | 
|---|
| 1520 |  | -		WRITE_ONCE(rsp->jiffies_stall,  | 
|---|
| 1521 |  | -			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);  | 
|---|
| 1522 |  | -	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 1523 |  | -  | 
|---|
| 1524 |  | -	panic_on_rcu_stall();  | 
|---|
| 1525 |  | -  | 
|---|
| 1526 |  | -	/*  | 
|---|
| 1527 |  | -	 * Attempt to revive the RCU machinery by forcing a context switch.  | 
|---|
| 1528 |  | -	 *  | 
|---|
| 1529 |  | -	 * A context switch would normally allow the RCU state machine to make  | 
|---|
| 1530 |  | -	 * progress and it could be we're stuck in kernel space without context  | 
|---|
| 1531 |  | -	 * switches for an entirely unreasonable amount of time.  | 
|---|
| 1532 |  | -	 */  | 
|---|
| 1533 |  | -	resched_cpu(smp_processor_id());  | 
|---|
| 1534 |  | -}  | 
|---|
| 1535 |  | -  | 
|---|
| 1536 |  | -static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)  | 
|---|
| 1537 |  | -{  | 
|---|
| 1538 |  | -	unsigned long gs1;  | 
|---|
| 1539 |  | -	unsigned long gs2;  | 
|---|
| 1540 |  | -	unsigned long gps;  | 
|---|
| 1541 |  | -	unsigned long j;  | 
|---|
| 1542 |  | -	unsigned long jn;  | 
|---|
| 1543 |  | -	unsigned long js;  | 
|---|
| 1544 |  | -	struct rcu_node *rnp;  | 
|---|
| 1545 |  | -  | 
|---|
| 1546 |  | -	if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||  | 
|---|
| 1547 |  | -	    !rcu_gp_in_progress(rsp))  | 
|---|
| 1548 |  | -		return;  | 
|---|
| 1549 |  | -	rcu_stall_kick_kthreads(rsp);  | 
|---|
| 1550 |  | -	j = jiffies;  | 
|---|
| 1551 |  | -  | 
|---|
| 1552 |  | -	/*  | 
|---|
| 1553 |  | -	 * Lots of memory barriers to reject false positives.  | 
|---|
| 1554 |  | -	 *  | 
|---|
| 1555 |  | -	 * The idea is to pick up rsp->gp_seq, then rsp->jiffies_stall,  | 
|---|
| 1556 |  | -	 * then rsp->gp_start, and finally another copy of rsp->gp_seq.  | 
|---|
| 1557 |  | -	 * These values are updated in the opposite order with memory  | 
|---|
| 1558 |  | -	 * barriers (or equivalent) during grace-period initialization  | 
|---|
| 1559 |  | -	 * and cleanup.  Now, a false positive can occur if we get an new  | 
|---|
| 1560 |  | -	 * value of rsp->gp_start and a old value of rsp->jiffies_stall.  | 
|---|
| 1561 |  | -	 * But given the memory barriers, the only way that this can happen  | 
|---|
| 1562 |  | -	 * is if one grace period ends and another starts between these  | 
|---|
| 1563 |  | -	 * two fetches.  This is detected by comparing the second fetch  | 
|---|
| 1564 |  | -	 * of rsp->gp_seq with the previous fetch from rsp->gp_seq.  | 
|---|
| 1565 |  | -	 *  | 
|---|
| 1566 |  | -	 * Given this check, comparisons of jiffies, rsp->jiffies_stall,  | 
|---|
| 1567 |  | -	 * and rsp->gp_start suffice to forestall false positives.  | 
|---|
| 1568 |  | -	 */  | 
|---|
| 1569 |  | -	gs1 = READ_ONCE(rsp->gp_seq);  | 
|---|
| 1570 |  | -	smp_rmb(); /* Pick up ->gp_seq first... */  | 
|---|
| 1571 |  | -	js = READ_ONCE(rsp->jiffies_stall);  | 
|---|
| 1572 |  | -	smp_rmb(); /* ...then ->jiffies_stall before the rest... */  | 
|---|
| 1573 |  | -	gps = READ_ONCE(rsp->gp_start);  | 
|---|
| 1574 |  | -	smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */  | 
|---|
| 1575 |  | -	gs2 = READ_ONCE(rsp->gp_seq);  | 
|---|
| 1576 |  | -	if (gs1 != gs2 ||  | 
|---|
| 1577 |  | -	    ULONG_CMP_LT(j, js) ||  | 
|---|
| 1578 |  | -	    ULONG_CMP_GE(gps, js))  | 
|---|
| 1579 |  | -		return; /* No stall or GP completed since entering function. */  | 
|---|
| 1580 |  | -	rnp = rdp->mynode;  | 
|---|
| 1581 |  | -	jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;  | 
|---|
| 1582 |  | -	if (rcu_gp_in_progress(rsp) &&  | 
|---|
| 1583 |  | -	    (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&  | 
|---|
| 1584 |  | -	    cmpxchg(&rsp->jiffies_stall, js, jn) == js) {  | 
|---|
| 1585 |  | -  | 
|---|
| 1586 |  | -		/* We haven't checked in, so go dump stack. */  | 
|---|
| 1587 |  | -		print_cpu_stall(rsp);  | 
|---|
| 1588 |  | -  | 
|---|
| 1589 |  | -	} else if (rcu_gp_in_progress(rsp) &&  | 
|---|
| 1590 |  | -		   ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&  | 
|---|
| 1591 |  | -		   cmpxchg(&rsp->jiffies_stall, js, jn) == js) {  | 
|---|
| 1592 |  | -  | 
|---|
| 1593 |  | -		/* They had a few time units to dump stack, so complain. */  | 
|---|
| 1594 |  | -		print_other_cpu_stall(rsp, gs2);  | 
|---|
| 1595 |  | -	}  | 
|---|
| 1596 |  | -}  | 
|---|
| 1597 |  | -  | 
|---|
| 1598 |  | -/**  | 
|---|
| 1599 |  | - * rcu_cpu_stall_reset - prevent further stall warnings in current grace period  | 
|---|
| 1600 |  | - *  | 
|---|
| 1601 |  | - * Set the stall-warning timeout way off into the future, thus preventing  | 
|---|
| 1602 |  | - * any RCU CPU stall-warning messages from appearing in the current set of  | 
|---|
| 1603 |  | - * RCU grace periods.  | 
|---|
| 1604 |  | - *  | 
|---|
| 1605 |  | - * The caller must disable hard irqs.  | 
|---|
| 1606 |  | - */  | 
|---|
| 1607 |  | -void rcu_cpu_stall_reset(void)  | 
|---|
| 1608 |  | -{  | 
|---|
| 1609 |  | -	struct rcu_state *rsp;  | 
|---|
| 1610 |  | -  | 
|---|
| 1611 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 1612 |  | -		WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2);  | 
|---|
| 1613 |  | -}  | 
|---|
| 1614 |  | -  | 
|---|
| 1615 | 1335 |  /* Trace-event wrapper function for trace_rcu_future_grace_period.  */ | 
|---|
| 1616 | 1336 |  static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, | 
|---|
| 1617 | 1337 |  			      unsigned long gp_seq_req, const char *s) | 
|---|
| 1618 | 1338 |  { | 
|---|
| 1619 |  | -	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gp_seq, gp_seq_req,  | 
|---|
| 1620 |  | -				      rnp->level, rnp->grplo, rnp->grphi, s);  | 
|---|
 | 1339 | +	trace_rcu_future_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),  | 
|---|
 | 1340 | +				      gp_seq_req, rnp->level,  | 
|---|
 | 1341 | +				      rnp->grplo, rnp->grphi, s);  | 
|---|
| 1621 | 1342 |  } | 
|---|
| 1622 | 1343 |   | 
|---|
| 1623 | 1344 |  /* | 
|---|
| .. | .. | 
|---|
| 1640 | 1361 |  			      unsigned long gp_seq_req) | 
|---|
| 1641 | 1362 |  { | 
|---|
| 1642 | 1363 |  	bool ret = false; | 
|---|
| 1643 |  | -	struct rcu_state *rsp = rdp->rsp;  | 
|---|
| 1644 | 1364 |  	struct rcu_node *rnp; | 
|---|
| 1645 | 1365 |   | 
|---|
| 1646 | 1366 |  	/* | 
|---|
| .. | .. | 
|---|
| 1665 | 1385 |  					  TPS("Prestarted")); | 
|---|
| 1666 | 1386 |  			goto unlock_out; | 
|---|
| 1667 | 1387 |  		} | 
|---|
| 1668 |  | -		rnp->gp_seq_needed = gp_seq_req;  | 
|---|
 | 1388 | +		WRITE_ONCE(rnp->gp_seq_needed, gp_seq_req);  | 
|---|
| 1669 | 1389 |  		if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) { | 
|---|
| 1670 | 1390 |  			/* | 
|---|
| 1671 | 1391 |  			 * We just marked the leaf or internal node, and a | 
|---|
| .. | .. | 
|---|
| 1684 | 1404 |  	} | 
|---|
| 1685 | 1405 |   | 
|---|
| 1686 | 1406 |  	/* If GP already in progress, just leave, otherwise start one. */ | 
|---|
| 1687 |  | -	if (rcu_gp_in_progress(rsp)) {  | 
|---|
 | 1407 | +	if (rcu_gp_in_progress()) {  | 
|---|
| 1688 | 1408 |  		trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot")); | 
|---|
| 1689 | 1409 |  		goto unlock_out; | 
|---|
| 1690 | 1410 |  	} | 
|---|
| 1691 | 1411 |  	trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot")); | 
|---|
| 1692 |  | -	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);  | 
|---|
| 1693 |  | -	rsp->gp_req_activity = jiffies;  | 
|---|
| 1694 |  | -	if (!rsp->gp_kthread) {  | 
|---|
 | 1412 | +	WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags | RCU_GP_FLAG_INIT);  | 
|---|
 | 1413 | +	WRITE_ONCE(rcu_state.gp_req_activity, jiffies);  | 
|---|
 | 1414 | +	if (!READ_ONCE(rcu_state.gp_kthread)) {  | 
|---|
| 1695 | 1415 |  		trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread")); | 
|---|
| 1696 | 1416 |  		goto unlock_out; | 
|---|
| 1697 | 1417 |  	} | 
|---|
| 1698 |  | -	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq"));  | 
|---|
 | 1418 | +	trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq"));  | 
|---|
| 1699 | 1419 |  	ret = true;  /* Caller must wake GP kthread. */ | 
|---|
| 1700 | 1420 |  unlock_out: | 
|---|
| 1701 | 1421 |  	/* Push furthest requested GP to leaf node and rcu_data structure. */ | 
|---|
| 1702 | 1422 |  	if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) { | 
|---|
| 1703 |  | -		rnp_start->gp_seq_needed = rnp->gp_seq_needed;  | 
|---|
| 1704 |  | -		rdp->gp_seq_needed = rnp->gp_seq_needed;  | 
|---|
 | 1423 | +		WRITE_ONCE(rnp_start->gp_seq_needed, rnp->gp_seq_needed);  | 
|---|
 | 1424 | +		WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);  | 
|---|
| 1705 | 1425 |  	} | 
|---|
| 1706 | 1426 |  	if (rnp != rnp_start) | 
|---|
| 1707 | 1427 |  		raw_spin_unlock_rcu_node(rnp); | 
|---|
| .. | .. | 
|---|
| 1712 | 1432 |   * Clean up any old requests for the just-ended grace period.  Also return | 
|---|
| 1713 | 1433 |   * whether any additional grace periods have been requested. | 
|---|
| 1714 | 1434 |   */ | 
|---|
| 1715 |  | -static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)  | 
|---|
 | 1435 | +static bool rcu_future_gp_cleanup(struct rcu_node *rnp)  | 
|---|
| 1716 | 1436 |  { | 
|---|
| 1717 | 1437 |  	bool needmore; | 
|---|
| 1718 |  | -	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);  | 
|---|
 | 1438 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 1719 | 1439 |   | 
|---|
| 1720 | 1440 |  	needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed); | 
|---|
| 1721 | 1441 |  	if (!needmore) | 
|---|
| .. | .. | 
|---|
| 1726 | 1446 |  } | 
|---|
| 1727 | 1447 |   | 
|---|
| 1728 | 1448 |  /* | 
|---|
| 1729 |  | - * Awaken the grace-period kthread.  Don't do a self-awaken (unless in  | 
|---|
| 1730 |  | - * an interrupt or softirq handler), and don't bother awakening when there  | 
|---|
| 1731 |  | - * is nothing for the grace-period kthread to do (as in several CPUs raced  | 
|---|
| 1732 |  | - * to awaken, and we lost), and finally don't try to awaken a kthread that  | 
|---|
| 1733 |  | - * has not yet been created.  If all those checks are passed, track some  | 
|---|
| 1734 |  | - * debug information and awaken.  | 
|---|
 | 1449 | + * Awaken the grace-period kthread.  Don't do a self-awaken (unless in an  | 
|---|
 | 1450 | + * interrupt or softirq handler, in which case we just might immediately  | 
|---|
 | 1451 | + * sleep upon return, resulting in a grace-period hang), and don't bother  | 
|---|
 | 1452 | + * awakening when there is nothing for the grace-period kthread to do  | 
|---|
 | 1453 | + * (as in several CPUs raced to awaken, we lost), and finally don't try  | 
|---|
 | 1454 | + * to awaken a kthread that has not yet been created.  If all those checks  | 
|---|
 | 1455 | + * are passed, track some debug information and awaken.  | 
|---|
| 1735 | 1456 |   * | 
|---|
| 1736 | 1457 |   * So why do the self-wakeup when in an interrupt or softirq handler | 
|---|
| 1737 | 1458 |   * in the grace-period kthread's context?  Because the kthread might have | 
|---|
| .. | .. | 
|---|
| 1739 | 1460 |   * pre-sleep check of the awaken condition.  In this case, a wakeup really | 
|---|
| 1740 | 1461 |   * is required, and is therefore supplied. | 
|---|
| 1741 | 1462 |   */ | 
|---|
| 1742 |  | -static void rcu_gp_kthread_wake(struct rcu_state *rsp)  | 
|---|
 | 1463 | +static void rcu_gp_kthread_wake(void)  | 
|---|
| 1743 | 1464 |  { | 
|---|
| 1744 |  | -	if ((current == rsp->gp_kthread &&  | 
|---|
| 1745 |  | -	     !in_interrupt() && !in_serving_softirq()) ||  | 
|---|
| 1746 |  | -	    !READ_ONCE(rsp->gp_flags) ||  | 
|---|
| 1747 |  | -	    !rsp->gp_kthread)  | 
|---|
 | 1465 | +	struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);  | 
|---|
 | 1466 | +  | 
|---|
 | 1467 | +	if ((current == t && !in_irq() && !in_serving_softirq()) ||  | 
|---|
 | 1468 | +	    !READ_ONCE(rcu_state.gp_flags) || !t)  | 
|---|
| 1748 | 1469 |  		return; | 
|---|
| 1749 |  | -	swake_up_one(&rsp->gp_wq);  | 
|---|
 | 1470 | +	WRITE_ONCE(rcu_state.gp_wake_time, jiffies);  | 
|---|
 | 1471 | +	WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));  | 
|---|
 | 1472 | +	swake_up_one(&rcu_state.gp_wq);  | 
|---|
| 1750 | 1473 |  } | 
|---|
| 1751 | 1474 |   | 
|---|
| 1752 | 1475 |  /* | 
|---|
| .. | .. | 
|---|
| 1761 | 1484 |   * | 
|---|
| 1762 | 1485 |   * The caller must hold rnp->lock with interrupts disabled. | 
|---|
| 1763 | 1486 |   */ | 
|---|
| 1764 |  | -static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,  | 
|---|
| 1765 |  | -			       struct rcu_data *rdp)  | 
|---|
 | 1487 | +static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)  | 
|---|
| 1766 | 1488 |  { | 
|---|
| 1767 | 1489 |  	unsigned long gp_seq_req; | 
|---|
| 1768 | 1490 |  	bool ret = false; | 
|---|
| 1769 | 1491 |   | 
|---|
 | 1492 | +	rcu_lockdep_assert_cblist_protected(rdp);  | 
|---|
| 1770 | 1493 |  	raw_lockdep_assert_held_rcu_node(rnp); | 
|---|
| 1771 | 1494 |   | 
|---|
| 1772 | 1495 |  	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */ | 
|---|
| .. | .. | 
|---|
| 1783 | 1506 |  	 * accelerating callback invocation to an earlier grace-period | 
|---|
| 1784 | 1507 |  	 * number. | 
|---|
| 1785 | 1508 |  	 */ | 
|---|
| 1786 |  | -	gp_seq_req = rcu_seq_snap(&rsp->gp_seq);  | 
|---|
 | 1509 | +	gp_seq_req = rcu_seq_snap(&rcu_state.gp_seq);  | 
|---|
| 1787 | 1510 |  	if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req)) | 
|---|
| 1788 | 1511 |  		ret = rcu_start_this_gp(rnp, rdp, gp_seq_req); | 
|---|
| 1789 | 1512 |   | 
|---|
| 1790 | 1513 |  	/* Trace depending on how much we were able to accelerate. */ | 
|---|
| 1791 | 1514 |  	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) | 
|---|
| 1792 |  | -		trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccWaitCB"));  | 
|---|
 | 1515 | +		trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccWaitCB"));  | 
|---|
| 1793 | 1516 |  	else | 
|---|
| 1794 |  | -		trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccReadyCB"));  | 
|---|
 | 1517 | +		trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));  | 
|---|
 | 1518 | +  | 
|---|
| 1795 | 1519 |  	return ret; | 
|---|
| 1796 | 1520 |  } | 
|---|
| 1797 | 1521 |   | 
|---|
| .. | .. | 
|---|
| 1802 | 1526 |   * that a new grace-period request be made, invokes rcu_accelerate_cbs() | 
|---|
| 1803 | 1527 |   * while holding the leaf rcu_node structure's ->lock. | 
|---|
| 1804 | 1528 |   */ | 
|---|
| 1805 |  | -static void rcu_accelerate_cbs_unlocked(struct rcu_state *rsp,  | 
|---|
| 1806 |  | -					struct rcu_node *rnp,  | 
|---|
 | 1529 | +static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,  | 
|---|
| 1807 | 1530 |  					struct rcu_data *rdp) | 
|---|
| 1808 | 1531 |  { | 
|---|
| 1809 | 1532 |  	unsigned long c; | 
|---|
| 1810 | 1533 |  	bool needwake; | 
|---|
| 1811 | 1534 |   | 
|---|
| 1812 |  | -	lockdep_assert_irqs_disabled();  | 
|---|
| 1813 |  | -	c = rcu_seq_snap(&rsp->gp_seq);  | 
|---|
| 1814 |  | -	if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {  | 
|---|
 | 1535 | +	rcu_lockdep_assert_cblist_protected(rdp);  | 
|---|
 | 1536 | +	c = rcu_seq_snap(&rcu_state.gp_seq);  | 
|---|
 | 1537 | +	if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {  | 
|---|
| 1815 | 1538 |  		/* Old request still live, so mark recent callbacks. */ | 
|---|
| 1816 | 1539 |  		(void)rcu_segcblist_accelerate(&rdp->cblist, c); | 
|---|
| 1817 | 1540 |  		return; | 
|---|
| 1818 | 1541 |  	} | 
|---|
| 1819 | 1542 |  	raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ | 
|---|
| 1820 |  | -	needwake = rcu_accelerate_cbs(rsp, rnp, rdp);  | 
|---|
 | 1543 | +	needwake = rcu_accelerate_cbs(rnp, rdp);  | 
|---|
| 1821 | 1544 |  	raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ | 
|---|
| 1822 | 1545 |  	if (needwake) | 
|---|
| 1823 |  | -		rcu_gp_kthread_wake(rsp);  | 
|---|
 | 1546 | +		rcu_gp_kthread_wake();  | 
|---|
| 1824 | 1547 |  } | 
|---|
| 1825 | 1548 |   | 
|---|
| 1826 | 1549 |  /* | 
|---|
| .. | .. | 
|---|
| 1833 | 1556 |   * | 
|---|
| 1834 | 1557 |   * The caller must hold rnp->lock with interrupts disabled. | 
|---|
| 1835 | 1558 |   */ | 
|---|
| 1836 |  | -static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,  | 
|---|
| 1837 |  | -			    struct rcu_data *rdp)  | 
|---|
 | 1559 | +static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)  | 
|---|
| 1838 | 1560 |  { | 
|---|
 | 1561 | +	rcu_lockdep_assert_cblist_protected(rdp);  | 
|---|
| 1839 | 1562 |  	raw_lockdep_assert_held_rcu_node(rnp); | 
|---|
| 1840 | 1563 |   | 
|---|
| 1841 | 1564 |  	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */ | 
|---|
| .. | .. | 
|---|
| 1849 | 1572 |  	rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq); | 
|---|
| 1850 | 1573 |   | 
|---|
| 1851 | 1574 |  	/* Classify any remaining callbacks. */ | 
|---|
| 1852 |  | -	return rcu_accelerate_cbs(rsp, rnp, rdp);  | 
|---|
 | 1575 | +	return rcu_accelerate_cbs(rnp, rdp);  | 
|---|
 | 1576 | +}  | 
|---|
 | 1577 | +  | 
|---|
 | 1578 | +/*  | 
|---|
 | 1579 | + * Move and classify callbacks, but only if doing so won't require  | 
|---|
 | 1580 | + * that the RCU grace-period kthread be awakened.  | 
|---|
 | 1581 | + */  | 
|---|
 | 1582 | +static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,  | 
|---|
 | 1583 | +						  struct rcu_data *rdp)  | 
|---|
 | 1584 | +{  | 
|---|
 | 1585 | +	rcu_lockdep_assert_cblist_protected(rdp);  | 
|---|
 | 1586 | +	if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))  | 
|---|
 | 1587 | +		return;  | 
|---|
 | 1588 | +	// The grace period cannot end while we hold the rcu_node lock.  | 
|---|
 | 1589 | +	if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))  | 
|---|
 | 1590 | +		WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));  | 
|---|
 | 1591 | +	raw_spin_unlock_rcu_node(rnp);  | 
|---|
 | 1592 | +}  | 
|---|
 | 1593 | +  | 
|---|
 | 1594 | +/*  | 
|---|
 | 1595 | + * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a  | 
|---|
 | 1596 | + * quiescent state.  This is intended to be invoked when the CPU notices  | 
|---|
 | 1597 | + * a new grace period.  | 
|---|
 | 1598 | + */  | 
|---|
 | 1599 | +static void rcu_strict_gp_check_qs(void)  | 
|---|
 | 1600 | +{  | 
|---|
 | 1601 | +	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {  | 
|---|
 | 1602 | +		rcu_read_lock();  | 
|---|
 | 1603 | +		rcu_read_unlock();  | 
|---|
 | 1604 | +	}  | 
|---|
| 1853 | 1605 |  } | 
|---|
| 1854 | 1606 |   | 
|---|
| 1855 | 1607 |  /* | 
|---|
| .. | .. | 
|---|
| 1858 | 1610 |   * structure corresponding to the current CPU, and must have irqs disabled. | 
|---|
| 1859 | 1611 |   * Returns true if the grace-period kthread needs to be awakened. | 
|---|
| 1860 | 1612 |   */ | 
|---|
| 1861 |  | -static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,  | 
|---|
| 1862 |  | -			      struct rcu_data *rdp)  | 
|---|
 | 1613 | +static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)  | 
|---|
| 1863 | 1614 |  { | 
|---|
| 1864 |  | -	bool ret;  | 
|---|
| 1865 |  | -	bool need_gp;  | 
|---|
 | 1615 | +	bool ret = false;  | 
|---|
 | 1616 | +	bool need_qs;  | 
|---|
 | 1617 | +	const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&  | 
|---|
 | 1618 | +			       rcu_segcblist_is_offloaded(&rdp->cblist);  | 
|---|
| 1866 | 1619 |   | 
|---|
| 1867 | 1620 |  	raw_lockdep_assert_held_rcu_node(rnp); | 
|---|
| 1868 | 1621 |   | 
|---|
| .. | .. | 
|---|
| 1872 | 1625 |  	/* Handle the ends of any preceding grace periods first. */ | 
|---|
| 1873 | 1626 |  	if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) || | 
|---|
| 1874 | 1627 |  	    unlikely(READ_ONCE(rdp->gpwrap))) { | 
|---|
| 1875 |  | -		ret = rcu_advance_cbs(rsp, rnp, rdp); /* Advance callbacks. */  | 
|---|
| 1876 |  | -		trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuend"));  | 
|---|
 | 1628 | +		if (!offloaded)  | 
|---|
 | 1629 | +			ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */  | 
|---|
 | 1630 | +		rdp->core_needs_qs = false;  | 
|---|
 | 1631 | +		trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));  | 
|---|
| 1877 | 1632 |  	} else { | 
|---|
| 1878 |  | -		ret = rcu_accelerate_cbs(rsp, rnp, rdp); /* Recent callbacks. */  | 
|---|
 | 1633 | +		if (!offloaded)  | 
|---|
 | 1634 | +			ret = rcu_accelerate_cbs(rnp, rdp); /* Recent CBs. */  | 
|---|
 | 1635 | +		if (rdp->core_needs_qs)  | 
|---|
 | 1636 | +			rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);  | 
|---|
| 1879 | 1637 |  	} | 
|---|
| 1880 | 1638 |   | 
|---|
| 1881 | 1639 |  	/* Now handle the beginnings of any new-to-this-CPU grace periods. */ | 
|---|
| .. | .. | 
|---|
| 1886 | 1644 |  		 * set up to detect a quiescent state, otherwise don't | 
|---|
| 1887 | 1645 |  		 * go looking for one. | 
|---|
| 1888 | 1646 |  		 */ | 
|---|
| 1889 |  | -		trace_rcu_grace_period(rsp->name, rnp->gp_seq, TPS("cpustart"));  | 
|---|
| 1890 |  | -		need_gp = !!(rnp->qsmask & rdp->grpmask);  | 
|---|
| 1891 |  | -		rdp->cpu_no_qs.b.norm = need_gp;  | 
|---|
| 1892 |  | -		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);  | 
|---|
| 1893 |  | -		rdp->core_needs_qs = need_gp;  | 
|---|
 | 1647 | +		trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));  | 
|---|
 | 1648 | +		need_qs = !!(rnp->qsmask & rdp->grpmask);  | 
|---|
 | 1649 | +		rdp->cpu_no_qs.b.norm = need_qs;  | 
|---|
 | 1650 | +		rdp->core_needs_qs = need_qs;  | 
|---|
| 1894 | 1651 |  		zero_cpu_stall_ticks(rdp); | 
|---|
| 1895 | 1652 |  	} | 
|---|
| 1896 | 1653 |  	rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */ | 
|---|
| 1897 |  | -	if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap)  | 
|---|
| 1898 |  | -		rdp->gp_seq_needed = rnp->gp_seq_needed;  | 
|---|
 | 1654 | +	if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)  | 
|---|
 | 1655 | +		WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);  | 
|---|
| 1899 | 1656 |  	WRITE_ONCE(rdp->gpwrap, false); | 
|---|
| 1900 | 1657 |  	rcu_gpnum_ovf(rnp, rdp); | 
|---|
| 1901 | 1658 |  	return ret; | 
|---|
| 1902 | 1659 |  } | 
|---|
| 1903 | 1660 |   | 
|---|
| 1904 |  | -static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)  | 
|---|
 | 1661 | +static void note_gp_changes(struct rcu_data *rdp)  | 
|---|
| 1905 | 1662 |  { | 
|---|
| 1906 | 1663 |  	unsigned long flags; | 
|---|
| 1907 | 1664 |  	bool needwake; | 
|---|
| .. | .. | 
|---|
| 1915 | 1672 |  		local_irq_restore(flags); | 
|---|
| 1916 | 1673 |  		return; | 
|---|
| 1917 | 1674 |  	} | 
|---|
| 1918 |  | -	needwake = __note_gp_changes(rsp, rnp, rdp);  | 
|---|
 | 1675 | +	needwake = __note_gp_changes(rnp, rdp);  | 
|---|
| 1919 | 1676 |  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
 | 1677 | +	rcu_strict_gp_check_qs();  | 
|---|
| 1920 | 1678 |  	if (needwake) | 
|---|
| 1921 |  | -		rcu_gp_kthread_wake(rsp);  | 
|---|
 | 1679 | +		rcu_gp_kthread_wake();  | 
|---|
| 1922 | 1680 |  } | 
|---|
| 1923 | 1681 |   | 
|---|
| 1924 |  | -static void rcu_gp_slow(struct rcu_state *rsp, int delay)  | 
|---|
 | 1682 | +static void rcu_gp_slow(int delay)  | 
|---|
| 1925 | 1683 |  { | 
|---|
| 1926 | 1684 |  	if (delay > 0 && | 
|---|
| 1927 |  | -	    !(rcu_seq_ctr(rsp->gp_seq) %  | 
|---|
 | 1685 | +	    !(rcu_seq_ctr(rcu_state.gp_seq) %  | 
|---|
| 1928 | 1686 |  	      (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) | 
|---|
| 1929 |  | -		schedule_timeout_uninterruptible(delay);  | 
|---|
 | 1687 | +		schedule_timeout_idle(delay);  | 
|---|
 | 1688 | +}  | 
|---|
 | 1689 | +  | 
|---|
 | 1690 | +static unsigned long sleep_duration;  | 
|---|
 | 1691 | +  | 
|---|
 | 1692 | +/* Allow rcutorture to stall the grace-period kthread. */  | 
|---|
 | 1693 | +void rcu_gp_set_torture_wait(int duration)  | 
|---|
 | 1694 | +{  | 
|---|
 | 1695 | +	if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)  | 
|---|
 | 1696 | +		WRITE_ONCE(sleep_duration, duration);  | 
|---|
 | 1697 | +}  | 
|---|
 | 1698 | +EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);  | 
|---|
 | 1699 | +  | 
|---|
 | 1700 | +/* Actually implement the aforementioned wait. */  | 
|---|
 | 1701 | +static void rcu_gp_torture_wait(void)  | 
|---|
 | 1702 | +{  | 
|---|
 | 1703 | +	unsigned long duration;  | 
|---|
 | 1704 | +  | 
|---|
 | 1705 | +	if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))  | 
|---|
 | 1706 | +		return;  | 
|---|
 | 1707 | +	duration = xchg(&sleep_duration, 0UL);  | 
|---|
 | 1708 | +	if (duration > 0) {  | 
|---|
 | 1709 | +		pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);  | 
|---|
 | 1710 | +		schedule_timeout_idle(duration);  | 
|---|
 | 1711 | +		pr_alert("%s: Wait complete\n", __func__);  | 
|---|
 | 1712 | +	}  | 
|---|
 | 1713 | +}  | 
|---|
 | 1714 | +  | 
|---|
 | 1715 | +/*  | 
|---|
 | 1716 | + * Handler for on_each_cpu() to invoke the target CPU's RCU core  | 
|---|
 | 1717 | + * processing.  | 
|---|
 | 1718 | + */  | 
|---|
 | 1719 | +static void rcu_strict_gp_boundary(void *unused)  | 
|---|
 | 1720 | +{  | 
|---|
 | 1721 | +	invoke_rcu_core();  | 
|---|
| 1930 | 1722 |  } | 
|---|
| 1931 | 1723 |   | 
|---|
| 1932 | 1724 |  /* | 
|---|
| 1933 | 1725 |   * Initialize a new grace period.  Return false if no grace period required. | 
|---|
| 1934 | 1726 |   */ | 
|---|
| 1935 |  | -static bool rcu_gp_init(struct rcu_state *rsp)  | 
|---|
 | 1727 | +static bool rcu_gp_init(void)  | 
|---|
| 1936 | 1728 |  { | 
|---|
| 1937 | 1729 |  	unsigned long flags; | 
|---|
| 1938 | 1730 |  	unsigned long oldmask; | 
|---|
| 1939 | 1731 |  	unsigned long mask; | 
|---|
| 1940 | 1732 |  	struct rcu_data *rdp; | 
|---|
| 1941 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
 | 1733 | +	struct rcu_node *rnp = rcu_get_root();  | 
|---|
| 1942 | 1734 |   | 
|---|
| 1943 |  | -	WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
 | 1735 | +	WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
| 1944 | 1736 |  	raw_spin_lock_irq_rcu_node(rnp); | 
|---|
| 1945 |  | -	if (!READ_ONCE(rsp->gp_flags)) {  | 
|---|
 | 1737 | +	if (!READ_ONCE(rcu_state.gp_flags)) {  | 
|---|
| 1946 | 1738 |  		/* Spurious wakeup, tell caller to go back to sleep.  */ | 
|---|
| 1947 | 1739 |  		raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
| 1948 | 1740 |  		return false; | 
|---|
| 1949 | 1741 |  	} | 
|---|
| 1950 |  | -	WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */  | 
|---|
 | 1742 | +	WRITE_ONCE(rcu_state.gp_flags, 0); /* Clear all flags: New GP. */  | 
|---|
| 1951 | 1743 |   | 
|---|
| 1952 |  | -	if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {  | 
|---|
 | 1744 | +	if (WARN_ON_ONCE(rcu_gp_in_progress())) {  | 
|---|
| 1953 | 1745 |  		/* | 
|---|
| 1954 | 1746 |  		 * Grace period already in progress, don't start another. | 
|---|
| 1955 | 1747 |  		 * Not supposed to be able to happen. | 
|---|
| .. | .. | 
|---|
| 1959 | 1751 |  	} | 
|---|
| 1960 | 1752 |   | 
|---|
| 1961 | 1753 |  	/* Advance to a new grace period and initialize state. */ | 
|---|
| 1962 |  | -	record_gp_stall_check_time(rsp);  | 
|---|
 | 1754 | +	record_gp_stall_check_time();  | 
|---|
| 1963 | 1755 |  	/* Record GP times before starting GP, hence rcu_seq_start(). */ | 
|---|
| 1964 |  | -	rcu_seq_start(&rsp->gp_seq);  | 
|---|
| 1965 |  | -	trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("start"));  | 
|---|
 | 1756 | +	rcu_seq_start(&rcu_state.gp_seq);  | 
|---|
 | 1757 | +	ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);  | 
|---|
 | 1758 | +	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));  | 
|---|
| 1966 | 1759 |  	raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
| 1967 | 1760 |   | 
|---|
| 1968 | 1761 |  	/* | 
|---|
| 1969 |  | -	 * Apply per-leaf buffered online and offline operations to the  | 
|---|
| 1970 |  | -	 * rcu_node tree.  Note that this new grace period need not wait  | 
|---|
| 1971 |  | -	 * for subsequent online CPUs, and that quiescent-state forcing  | 
|---|
| 1972 |  | -	 * will handle subsequent offline CPUs.  | 
|---|
 | 1762 | +	 * Apply per-leaf buffered online and offline operations to  | 
|---|
 | 1763 | +	 * the rcu_node tree. Note that this new grace period need not  | 
|---|
 | 1764 | +	 * wait for subsequent online CPUs, and that RCU hooks in the CPU  | 
|---|
 | 1765 | +	 * offlining path, when combined with checks in this function,  | 
|---|
 | 1766 | +	 * will handle CPUs that are currently going offline or that will  | 
|---|
 | 1767 | +	 * go offline later.  Please also refer to "Hotplug CPU" section  | 
|---|
 | 1768 | +	 * of RCU's Requirements documentation.  | 
|---|
| 1973 | 1769 |  	 */ | 
|---|
| 1974 |  | -	rsp->gp_state = RCU_GP_ONOFF;  | 
|---|
| 1975 |  | -	rcu_for_each_leaf_node(rsp, rnp) {  | 
|---|
| 1976 |  | -		spin_lock(&rsp->ofl_lock);  | 
|---|
 | 1770 | +	rcu_state.gp_state = RCU_GP_ONOFF;  | 
|---|
 | 1771 | +	rcu_for_each_leaf_node(rnp) {  | 
|---|
 | 1772 | +		raw_spin_lock(&rcu_state.ofl_lock);  | 
|---|
| 1977 | 1773 |  		raw_spin_lock_irq_rcu_node(rnp); | 
|---|
| 1978 | 1774 |  		if (rnp->qsmaskinit == rnp->qsmaskinitnext && | 
|---|
| 1979 | 1775 |  		    !rnp->wait_blkd_tasks) { | 
|---|
| 1980 | 1776 |  			/* Nothing to do on this leaf rcu_node structure. */ | 
|---|
| 1981 | 1777 |  			raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
| 1982 |  | -			spin_unlock(&rsp->ofl_lock);  | 
|---|
 | 1778 | +			raw_spin_unlock(&rcu_state.ofl_lock);  | 
|---|
| 1983 | 1779 |  			continue; | 
|---|
| 1984 | 1780 |  		} | 
|---|
| 1985 | 1781 |   | 
|---|
| .. | .. | 
|---|
| 2015 | 1811 |  		} | 
|---|
| 2016 | 1812 |   | 
|---|
| 2017 | 1813 |  		raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
| 2018 |  | -		spin_unlock(&rsp->ofl_lock);  | 
|---|
 | 1814 | +		raw_spin_unlock(&rcu_state.ofl_lock);  | 
|---|
| 2019 | 1815 |  	} | 
|---|
| 2020 |  | -	rcu_gp_slow(rsp, gp_preinit_delay); /* Races with CPU hotplug. */  | 
|---|
 | 1816 | +	rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */  | 
|---|
| 2021 | 1817 |   | 
|---|
| 2022 | 1818 |  	/* | 
|---|
| 2023 | 1819 |  	 * Set the quiescent-state-needed bits in all the rcu_node | 
|---|
| 2024 |  | -	 * structures for all currently online CPUs in breadth-first order,  | 
|---|
| 2025 |  | -	 * starting from the root rcu_node structure, relying on the layout  | 
|---|
| 2026 |  | -	 * of the tree within the rsp->node[] array.  Note that other CPUs  | 
|---|
| 2027 |  | -	 * will access only the leaves of the hierarchy, thus seeing that no  | 
|---|
| 2028 |  | -	 * grace period is in progress, at least until the corresponding  | 
|---|
| 2029 |  | -	 * leaf node has been initialized.  | 
|---|
 | 1820 | +	 * structures for all currently online CPUs in breadth-first  | 
|---|
 | 1821 | +	 * order, starting from the root rcu_node structure, relying on the  | 
|---|
 | 1822 | +	 * layout of the tree within the rcu_state.node[] array.  Note that  | 
|---|
 | 1823 | +	 * other CPUs will access only the leaves of the hierarchy, thus  | 
|---|
 | 1824 | +	 * seeing that no grace period is in progress, at least until the  | 
|---|
 | 1825 | +	 * corresponding leaf node has been initialized.  | 
|---|
| 2030 | 1826 |  	 * | 
|---|
| 2031 | 1827 |  	 * The grace period cannot complete until the initialization | 
|---|
| 2032 | 1828 |  	 * process finishes, because this kthread handles both. | 
|---|
| 2033 | 1829 |  	 */ | 
|---|
| 2034 |  | -	rsp->gp_state = RCU_GP_INIT;  | 
|---|
| 2035 |  | -	rcu_for_each_node_breadth_first(rsp, rnp) {  | 
|---|
| 2036 |  | -		rcu_gp_slow(rsp, gp_init_delay);  | 
|---|
 | 1830 | +	rcu_state.gp_state = RCU_GP_INIT;  | 
|---|
 | 1831 | +	rcu_for_each_node_breadth_first(rnp) {  | 
|---|
 | 1832 | +		rcu_gp_slow(gp_init_delay);  | 
|---|
| 2037 | 1833 |  		raw_spin_lock_irqsave_rcu_node(rnp, flags); | 
|---|
| 2038 |  | -		rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 2039 |  | -		rcu_preempt_check_blocked_tasks(rsp, rnp);  | 
|---|
 | 1834 | +		rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 1835 | +		rcu_preempt_check_blocked_tasks(rnp);  | 
|---|
| 2040 | 1836 |  		rnp->qsmask = rnp->qsmaskinit; | 
|---|
| 2041 |  | -		WRITE_ONCE(rnp->gp_seq, rsp->gp_seq);  | 
|---|
 | 1837 | +		WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq);  | 
|---|
| 2042 | 1838 |  		if (rnp == rdp->mynode) | 
|---|
| 2043 |  | -			(void)__note_gp_changes(rsp, rnp, rdp);  | 
|---|
 | 1839 | +			(void)__note_gp_changes(rnp, rdp);  | 
|---|
| 2044 | 1840 |  		rcu_preempt_boost_start_gp(rnp); | 
|---|
| 2045 |  | -		trace_rcu_grace_period_init(rsp->name, rnp->gp_seq,  | 
|---|
 | 1841 | +		trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq,  | 
|---|
| 2046 | 1842 |  					    rnp->level, rnp->grplo, | 
|---|
| 2047 | 1843 |  					    rnp->grphi, rnp->qsmask); | 
|---|
| 2048 | 1844 |  		/* Quiescent states for tasks on any now-offline CPUs. */ | 
|---|
| 2049 | 1845 |  		mask = rnp->qsmask & ~rnp->qsmaskinitnext; | 
|---|
| 2050 | 1846 |  		rnp->rcu_gp_init_mask = mask; | 
|---|
| 2051 | 1847 |  		if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp)) | 
|---|
| 2052 |  | -			rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);  | 
|---|
 | 1848 | +			rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  | 
|---|
| 2053 | 1849 |  		else | 
|---|
| 2054 | 1850 |  			raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
| 2055 | 1851 |  		cond_resched_tasks_rcu_qs(); | 
|---|
| 2056 |  | -		WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
 | 1852 | +		WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
| 2057 | 1853 |  	} | 
|---|
 | 1854 | +  | 
|---|
 | 1855 | +	// If strict, make all CPUs aware of new grace period.  | 
|---|
 | 1856 | +	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))  | 
|---|
 | 1857 | +		on_each_cpu(rcu_strict_gp_boundary, NULL, 0);  | 
|---|
| 2058 | 1858 |   | 
|---|
| 2059 | 1859 |  	return true; | 
|---|
| 2060 | 1860 |  } | 
|---|
| .. | .. | 
|---|
| 2063 | 1863 |   * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state | 
|---|
| 2064 | 1864 |   * time. | 
|---|
| 2065 | 1865 |   */ | 
|---|
| 2066 |  | -static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)  | 
|---|
 | 1866 | +static bool rcu_gp_fqs_check_wake(int *gfp)  | 
|---|
| 2067 | 1867 |  { | 
|---|
| 2068 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
 | 1868 | +	struct rcu_node *rnp = rcu_get_root();  | 
|---|
| 2069 | 1869 |   | 
|---|
| 2070 |  | -	/* Someone like call_rcu() requested a force-quiescent-state scan. */  | 
|---|
| 2071 |  | -	*gfp = READ_ONCE(rsp->gp_flags);  | 
|---|
 | 1870 | +	// If under overload conditions, force an immediate FQS scan.  | 
|---|
 | 1871 | +	if (*gfp & RCU_GP_FLAG_OVLD)  | 
|---|
 | 1872 | +		return true;  | 
|---|
 | 1873 | +  | 
|---|
 | 1874 | +	// Someone like call_rcu() requested a force-quiescent-state scan.  | 
|---|
 | 1875 | +	*gfp = READ_ONCE(rcu_state.gp_flags);  | 
|---|
| 2072 | 1876 |  	if (*gfp & RCU_GP_FLAG_FQS) | 
|---|
| 2073 | 1877 |  		return true; | 
|---|
| 2074 | 1878 |   | 
|---|
| 2075 |  | -	/* The current grace period has completed. */  | 
|---|
 | 1879 | +	// The current grace period has completed.  | 
|---|
| 2076 | 1880 |  	if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)) | 
|---|
| 2077 | 1881 |  		return true; | 
|---|
| 2078 | 1882 |   | 
|---|
| .. | .. | 
|---|
| 2082 | 1886 |  /* | 
|---|
| 2083 | 1887 |   * Do one round of quiescent-state forcing. | 
|---|
| 2084 | 1888 |   */ | 
|---|
| 2085 |  | -static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)  | 
|---|
 | 1889 | +static void rcu_gp_fqs(bool first_time)  | 
|---|
| 2086 | 1890 |  { | 
|---|
| 2087 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
 | 1891 | +	struct rcu_node *rnp = rcu_get_root();  | 
|---|
| 2088 | 1892 |   | 
|---|
| 2089 |  | -	WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
| 2090 |  | -	rsp->n_force_qs++;  | 
|---|
 | 1893 | +	WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
 | 1894 | +	WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1);  | 
|---|
| 2091 | 1895 |  	if (first_time) { | 
|---|
| 2092 | 1896 |  		/* Collect dyntick-idle snapshots. */ | 
|---|
| 2093 |  | -		force_qs_rnp(rsp, dyntick_save_progress_counter);  | 
|---|
 | 1897 | +		force_qs_rnp(dyntick_save_progress_counter);  | 
|---|
| 2094 | 1898 |  	} else { | 
|---|
| 2095 | 1899 |  		/* Handle dyntick-idle and offline CPUs. */ | 
|---|
| 2096 |  | -		force_qs_rnp(rsp, rcu_implicit_dynticks_qs);  | 
|---|
 | 1900 | +		force_qs_rnp(rcu_implicit_dynticks_qs);  | 
|---|
| 2097 | 1901 |  	} | 
|---|
| 2098 | 1902 |  	/* Clear flag to prevent immediate re-entry. */ | 
|---|
| 2099 |  | -	if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {  | 
|---|
 | 1903 | +	if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {  | 
|---|
| 2100 | 1904 |  		raw_spin_lock_irq_rcu_node(rnp); | 
|---|
| 2101 |  | -		WRITE_ONCE(rsp->gp_flags,  | 
|---|
| 2102 |  | -			   READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);  | 
|---|
 | 1905 | +		WRITE_ONCE(rcu_state.gp_flags,  | 
|---|
 | 1906 | +			   READ_ONCE(rcu_state.gp_flags) & ~RCU_GP_FLAG_FQS);  | 
|---|
| 2103 | 1907 |  		raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
 | 1908 | +	}  | 
|---|
 | 1909 | +}  | 
|---|
 | 1910 | +  | 
|---|
 | 1911 | +/*  | 
|---|
 | 1912 | + * Loop doing repeated quiescent-state forcing until the grace period ends.  | 
|---|
 | 1913 | + */  | 
|---|
 | 1914 | +static void rcu_gp_fqs_loop(void)  | 
|---|
 | 1915 | +{  | 
|---|
 | 1916 | +	bool first_gp_fqs;  | 
|---|
 | 1917 | +	int gf = 0;  | 
|---|
 | 1918 | +	unsigned long j;  | 
|---|
 | 1919 | +	int ret;  | 
|---|
 | 1920 | +	struct rcu_node *rnp = rcu_get_root();  | 
|---|
 | 1921 | +  | 
|---|
 | 1922 | +	first_gp_fqs = true;  | 
|---|
 | 1923 | +	j = READ_ONCE(jiffies_till_first_fqs);  | 
|---|
 | 1924 | +	if (rcu_state.cbovld)  | 
|---|
 | 1925 | +		gf = RCU_GP_FLAG_OVLD;  | 
|---|
 | 1926 | +	ret = 0;  | 
|---|
 | 1927 | +	for (;;) {  | 
|---|
 | 1928 | +		if (!ret) {  | 
|---|
 | 1929 | +			rcu_state.jiffies_force_qs = jiffies + j;  | 
|---|
 | 1930 | +			WRITE_ONCE(rcu_state.jiffies_kick_kthreads,  | 
|---|
 | 1931 | +				   jiffies + (j ? 3 * j : 2));  | 
|---|
 | 1932 | +		}  | 
|---|
 | 1933 | +		trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,  | 
|---|
 | 1934 | +				       TPS("fqswait"));  | 
|---|
 | 1935 | +		rcu_state.gp_state = RCU_GP_WAIT_FQS;  | 
|---|
 | 1936 | +		ret = swait_event_idle_timeout_exclusive(  | 
|---|
 | 1937 | +				rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);  | 
|---|
 | 1938 | +		rcu_gp_torture_wait();  | 
|---|
 | 1939 | +		rcu_state.gp_state = RCU_GP_DOING_FQS;  | 
|---|
 | 1940 | +		/* Locking provides needed memory barriers. */  | 
|---|
 | 1941 | +		/* If grace period done, leave loop. */  | 
|---|
 | 1942 | +		if (!READ_ONCE(rnp->qsmask) &&  | 
|---|
 | 1943 | +		    !rcu_preempt_blocked_readers_cgp(rnp))  | 
|---|
 | 1944 | +			break;  | 
|---|
 | 1945 | +		/* If time for quiescent-state forcing, do it. */  | 
|---|
 | 1946 | +		if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||  | 
|---|
 | 1947 | +		    (gf & (RCU_GP_FLAG_FQS | RCU_GP_FLAG_OVLD))) {  | 
|---|
 | 1948 | +			trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,  | 
|---|
 | 1949 | +					       TPS("fqsstart"));  | 
|---|
 | 1950 | +			rcu_gp_fqs(first_gp_fqs);  | 
|---|
 | 1951 | +			gf = 0;  | 
|---|
 | 1952 | +			if (first_gp_fqs) {  | 
|---|
 | 1953 | +				first_gp_fqs = false;  | 
|---|
 | 1954 | +				gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0;  | 
|---|
 | 1955 | +			}  | 
|---|
 | 1956 | +			trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,  | 
|---|
 | 1957 | +					       TPS("fqsend"));  | 
|---|
 | 1958 | +			cond_resched_tasks_rcu_qs();  | 
|---|
 | 1959 | +			WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
 | 1960 | +			ret = 0; /* Force full wait till next FQS. */  | 
|---|
 | 1961 | +			j = READ_ONCE(jiffies_till_next_fqs);  | 
|---|
 | 1962 | +		} else {  | 
|---|
 | 1963 | +			/* Deal with stray signal. */  | 
|---|
 | 1964 | +			cond_resched_tasks_rcu_qs();  | 
|---|
 | 1965 | +			WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
 | 1966 | +			WARN_ON(signal_pending(current));  | 
|---|
 | 1967 | +			trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,  | 
|---|
 | 1968 | +					       TPS("fqswaitsig"));  | 
|---|
 | 1969 | +			ret = 1; /* Keep old FQS timing. */  | 
|---|
 | 1970 | +			j = jiffies;  | 
|---|
 | 1971 | +			if (time_after(jiffies, rcu_state.jiffies_force_qs))  | 
|---|
 | 1972 | +				j = 1;  | 
|---|
 | 1973 | +			else  | 
|---|
 | 1974 | +				j = rcu_state.jiffies_force_qs - j;  | 
|---|
 | 1975 | +			gf = 0;  | 
|---|
 | 1976 | +		}  | 
|---|
| 2104 | 1977 |  	} | 
|---|
| 2105 | 1978 |  } | 
|---|
| 2106 | 1979 |   | 
|---|
| 2107 | 1980 |  /* | 
|---|
| 2108 | 1981 |   * Clean up after the old grace period. | 
|---|
| 2109 | 1982 |   */ | 
|---|
| 2110 |  | -static void rcu_gp_cleanup(struct rcu_state *rsp)  | 
|---|
 | 1983 | +static void rcu_gp_cleanup(void)  | 
|---|
| 2111 | 1984 |  { | 
|---|
| 2112 |  | -	unsigned long gp_duration;  | 
|---|
 | 1985 | +	int cpu;  | 
|---|
| 2113 | 1986 |  	bool needgp = false; | 
|---|
 | 1987 | +	unsigned long gp_duration;  | 
|---|
| 2114 | 1988 |  	unsigned long new_gp_seq; | 
|---|
 | 1989 | +	bool offloaded;  | 
|---|
| 2115 | 1990 |  	struct rcu_data *rdp; | 
|---|
| 2116 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
 | 1991 | +	struct rcu_node *rnp = rcu_get_root();  | 
|---|
| 2117 | 1992 |  	struct swait_queue_head *sq; | 
|---|
| 2118 | 1993 |   | 
|---|
| 2119 |  | -	WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
 | 1994 | +	WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
| 2120 | 1995 |  	raw_spin_lock_irq_rcu_node(rnp); | 
|---|
| 2121 |  | -	gp_duration = jiffies - rsp->gp_start;  | 
|---|
| 2122 |  | -	if (gp_duration > rsp->gp_max)  | 
|---|
| 2123 |  | -		rsp->gp_max = gp_duration;  | 
|---|
 | 1996 | +	rcu_state.gp_end = jiffies;  | 
|---|
 | 1997 | +	gp_duration = rcu_state.gp_end - rcu_state.gp_start;  | 
|---|
 | 1998 | +	if (gp_duration > rcu_state.gp_max)  | 
|---|
 | 1999 | +		rcu_state.gp_max = gp_duration;  | 
|---|
| 2124 | 2000 |   | 
|---|
| 2125 | 2001 |  	/* | 
|---|
| 2126 | 2002 |  	 * We know the grace period is complete, but to everyone else | 
|---|
| .. | .. | 
|---|
| 2141 | 2017 |  	 * the rcu_node structures before the beginning of the next grace | 
|---|
| 2142 | 2018 |  	 * period is recorded in any of the rcu_node structures. | 
|---|
| 2143 | 2019 |  	 */ | 
|---|
| 2144 |  | -	new_gp_seq = rsp->gp_seq;  | 
|---|
 | 2020 | +	new_gp_seq = rcu_state.gp_seq;  | 
|---|
| 2145 | 2021 |  	rcu_seq_end(&new_gp_seq); | 
|---|
| 2146 |  | -	rcu_for_each_node_breadth_first(rsp, rnp) {  | 
|---|
 | 2022 | +	rcu_for_each_node_breadth_first(rnp) {  | 
|---|
| 2147 | 2023 |  		raw_spin_lock_irq_rcu_node(rnp); | 
|---|
| 2148 | 2024 |  		if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) | 
|---|
| 2149 |  | -			dump_blkd_tasks(rsp, rnp, 10);  | 
|---|
 | 2025 | +			dump_blkd_tasks(rnp, 10);  | 
|---|
| 2150 | 2026 |  		WARN_ON_ONCE(rnp->qsmask); | 
|---|
| 2151 | 2027 |  		WRITE_ONCE(rnp->gp_seq, new_gp_seq); | 
|---|
| 2152 |  | -		rdp = this_cpu_ptr(rsp->rda);  | 
|---|
 | 2028 | +		rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 2153 | 2029 |  		if (rnp == rdp->mynode) | 
|---|
| 2154 |  | -			needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;  | 
|---|
 | 2030 | +			needgp = __note_gp_changes(rnp, rdp) || needgp;  | 
|---|
| 2155 | 2031 |  		/* smp_mb() provided by prior unlock-lock pair. */ | 
|---|
| 2156 |  | -		needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;  | 
|---|
 | 2032 | +		needgp = rcu_future_gp_cleanup(rnp) || needgp;  | 
|---|
 | 2033 | +		// Reset overload indication for CPUs no longer overloaded  | 
|---|
 | 2034 | +		if (rcu_is_leaf_node(rnp))  | 
|---|
 | 2035 | +			for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) {  | 
|---|
 | 2036 | +				rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 2037 | +				check_cb_ovld_locked(rdp, rnp);  | 
|---|
 | 2038 | +			}  | 
|---|
| 2157 | 2039 |  		sq = rcu_nocb_gp_get(rnp); | 
|---|
| 2158 | 2040 |  		raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
| 2159 | 2041 |  		rcu_nocb_gp_cleanup(sq); | 
|---|
| 2160 | 2042 |  		cond_resched_tasks_rcu_qs(); | 
|---|
| 2161 |  | -		WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
| 2162 |  | -		rcu_gp_slow(rsp, gp_cleanup_delay);  | 
|---|
 | 2043 | +		WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
 | 2044 | +		rcu_gp_slow(gp_cleanup_delay);  | 
|---|
| 2163 | 2045 |  	} | 
|---|
| 2164 |  | -	rnp = rcu_get_root(rsp);  | 
|---|
| 2165 |  | -	raw_spin_lock_irq_rcu_node(rnp); /* GP before rsp->gp_seq update. */  | 
|---|
 | 2046 | +	rnp = rcu_get_root();  | 
|---|
 | 2047 | +	raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */  | 
|---|
| 2166 | 2048 |   | 
|---|
| 2167 |  | -	/* Declare grace period done. */  | 
|---|
| 2168 |  | -	rcu_seq_end(&rsp->gp_seq);  | 
|---|
| 2169 |  | -	trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("end"));  | 
|---|
| 2170 |  | -	rsp->gp_state = RCU_GP_IDLE;  | 
|---|
 | 2049 | +	/* Declare grace period done, trace first to use old GP number. */  | 
|---|
 | 2050 | +	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));  | 
|---|
 | 2051 | +	rcu_seq_end(&rcu_state.gp_seq);  | 
|---|
 | 2052 | +	ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);  | 
|---|
 | 2053 | +	rcu_state.gp_state = RCU_GP_IDLE;  | 
|---|
| 2171 | 2054 |  	/* Check for GP requests since above loop. */ | 
|---|
| 2172 |  | -	rdp = this_cpu_ptr(rsp->rda);  | 
|---|
 | 2055 | +	rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 2173 | 2056 |  	if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) { | 
|---|
| 2174 | 2057 |  		trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed, | 
|---|
| 2175 | 2058 |  				  TPS("CleanupMore")); | 
|---|
| 2176 | 2059 |  		needgp = true; | 
|---|
| 2177 | 2060 |  	} | 
|---|
| 2178 | 2061 |  	/* Advance CBs to reduce false positives below. */ | 
|---|
| 2179 |  | -	if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {  | 
|---|
| 2180 |  | -		WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);  | 
|---|
| 2181 |  | -		rsp->gp_req_activity = jiffies;  | 
|---|
| 2182 |  | -		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq),  | 
|---|
 | 2062 | +	offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&  | 
|---|
 | 2063 | +		    rcu_segcblist_is_offloaded(&rdp->cblist);  | 
|---|
 | 2064 | +	if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {  | 
|---|
 | 2065 | +		WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);  | 
|---|
 | 2066 | +		WRITE_ONCE(rcu_state.gp_req_activity, jiffies);  | 
|---|
 | 2067 | +		trace_rcu_grace_period(rcu_state.name,  | 
|---|
 | 2068 | +				       rcu_state.gp_seq,  | 
|---|
| 2183 | 2069 |  				       TPS("newreq")); | 
|---|
| 2184 | 2070 |  	} else { | 
|---|
| 2185 |  | -		WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);  | 
|---|
 | 2071 | +		WRITE_ONCE(rcu_state.gp_flags,  | 
|---|
 | 2072 | +			   rcu_state.gp_flags & RCU_GP_FLAG_INIT);  | 
|---|
| 2186 | 2073 |  	} | 
|---|
| 2187 | 2074 |  	raw_spin_unlock_irq_rcu_node(rnp); | 
|---|
 | 2075 | +  | 
|---|
 | 2076 | +	// If strict, make all CPUs aware of the end of the old grace period.  | 
|---|
 | 2077 | +	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))  | 
|---|
 | 2078 | +		on_each_cpu(rcu_strict_gp_boundary, NULL, 0);  | 
|---|
| 2188 | 2079 |  } | 
|---|
| 2189 | 2080 |   | 
|---|
| 2190 | 2081 |  /* | 
|---|
| 2191 | 2082 |   * Body of kthread that handles grace periods. | 
|---|
| 2192 | 2083 |   */ | 
|---|
| 2193 |  | -static int __noreturn rcu_gp_kthread(void *arg)  | 
|---|
 | 2084 | +static int __noreturn rcu_gp_kthread(void *unused)  | 
|---|
| 2194 | 2085 |  { | 
|---|
| 2195 |  | -	bool first_gp_fqs;  | 
|---|
| 2196 |  | -	int gf;  | 
|---|
| 2197 |  | -	unsigned long j;  | 
|---|
| 2198 |  | -	int ret;  | 
|---|
| 2199 |  | -	struct rcu_state *rsp = arg;  | 
|---|
| 2200 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
| 2201 |  | -  | 
|---|
| 2202 | 2086 |  	rcu_bind_gp_kthread(); | 
|---|
| 2203 | 2087 |  	for (;;) { | 
|---|
| 2204 | 2088 |   | 
|---|
| 2205 | 2089 |  		/* Handle grace-period start. */ | 
|---|
| 2206 | 2090 |  		for (;;) { | 
|---|
| 2207 |  | -			trace_rcu_grace_period(rsp->name,  | 
|---|
| 2208 |  | -					       READ_ONCE(rsp->gp_seq),  | 
|---|
 | 2091 | +			trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,  | 
|---|
| 2209 | 2092 |  					       TPS("reqwait")); | 
|---|
| 2210 |  | -			rsp->gp_state = RCU_GP_WAIT_GPS;  | 
|---|
| 2211 |  | -			swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &  | 
|---|
| 2212 |  | -						     RCU_GP_FLAG_INIT);  | 
|---|
| 2213 |  | -			rsp->gp_state = RCU_GP_DONE_GPS;  | 
|---|
 | 2093 | +			rcu_state.gp_state = RCU_GP_WAIT_GPS;  | 
|---|
 | 2094 | +			swait_event_idle_exclusive(rcu_state.gp_wq,  | 
|---|
 | 2095 | +					 READ_ONCE(rcu_state.gp_flags) &  | 
|---|
 | 2096 | +					 RCU_GP_FLAG_INIT);  | 
|---|
 | 2097 | +			rcu_gp_torture_wait();  | 
|---|
 | 2098 | +			rcu_state.gp_state = RCU_GP_DONE_GPS;  | 
|---|
| 2214 | 2099 |  			/* Locking provides needed memory barrier. */ | 
|---|
| 2215 |  | -			if (rcu_gp_init(rsp))  | 
|---|
 | 2100 | +			if (rcu_gp_init())  | 
|---|
| 2216 | 2101 |  				break; | 
|---|
| 2217 | 2102 |  			cond_resched_tasks_rcu_qs(); | 
|---|
| 2218 |  | -			WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
 | 2103 | +			WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
| 2219 | 2104 |  			WARN_ON(signal_pending(current)); | 
|---|
| 2220 |  | -			trace_rcu_grace_period(rsp->name,  | 
|---|
| 2221 |  | -					       READ_ONCE(rsp->gp_seq),  | 
|---|
 | 2105 | +			trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,  | 
|---|
| 2222 | 2106 |  					       TPS("reqwaitsig")); | 
|---|
| 2223 | 2107 |  		} | 
|---|
| 2224 | 2108 |   | 
|---|
| 2225 | 2109 |  		/* Handle quiescent-state forcing. */ | 
|---|
| 2226 |  | -		first_gp_fqs = true;  | 
|---|
| 2227 |  | -		j = jiffies_till_first_fqs;  | 
|---|
| 2228 |  | -		ret = 0;  | 
|---|
| 2229 |  | -		for (;;) {  | 
|---|
| 2230 |  | -			if (!ret) {  | 
|---|
| 2231 |  | -				rsp->jiffies_force_qs = jiffies + j;  | 
|---|
| 2232 |  | -				WRITE_ONCE(rsp->jiffies_kick_kthreads,  | 
|---|
| 2233 |  | -					   jiffies + 3 * j);  | 
|---|
| 2234 |  | -			}  | 
|---|
| 2235 |  | -			trace_rcu_grace_period(rsp->name,  | 
|---|
| 2236 |  | -					       READ_ONCE(rsp->gp_seq),  | 
|---|
| 2237 |  | -					       TPS("fqswait"));  | 
|---|
| 2238 |  | -			rsp->gp_state = RCU_GP_WAIT_FQS;  | 
|---|
| 2239 |  | -			ret = swait_event_idle_timeout_exclusive(rsp->gp_wq,  | 
|---|
| 2240 |  | -					rcu_gp_fqs_check_wake(rsp, &gf), j);  | 
|---|
| 2241 |  | -			rsp->gp_state = RCU_GP_DOING_FQS;  | 
|---|
| 2242 |  | -			/* Locking provides needed memory barriers. */  | 
|---|
| 2243 |  | -			/* If grace period done, leave loop. */  | 
|---|
| 2244 |  | -			if (!READ_ONCE(rnp->qsmask) &&  | 
|---|
| 2245 |  | -			    !rcu_preempt_blocked_readers_cgp(rnp))  | 
|---|
| 2246 |  | -				break;  | 
|---|
| 2247 |  | -			/* If time for quiescent-state forcing, do it. */  | 
|---|
| 2248 |  | -			if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||  | 
|---|
| 2249 |  | -			    (gf & RCU_GP_FLAG_FQS)) {  | 
|---|
| 2250 |  | -				trace_rcu_grace_period(rsp->name,  | 
|---|
| 2251 |  | -						       READ_ONCE(rsp->gp_seq),  | 
|---|
| 2252 |  | -						       TPS("fqsstart"));  | 
|---|
| 2253 |  | -				rcu_gp_fqs(rsp, first_gp_fqs);  | 
|---|
| 2254 |  | -				first_gp_fqs = false;  | 
|---|
| 2255 |  | -				trace_rcu_grace_period(rsp->name,  | 
|---|
| 2256 |  | -						       READ_ONCE(rsp->gp_seq),  | 
|---|
| 2257 |  | -						       TPS("fqsend"));  | 
|---|
| 2258 |  | -				cond_resched_tasks_rcu_qs();  | 
|---|
| 2259 |  | -				WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
| 2260 |  | -				ret = 0; /* Force full wait till next FQS. */  | 
|---|
| 2261 |  | -				j = jiffies_till_next_fqs;  | 
|---|
| 2262 |  | -			} else {  | 
|---|
| 2263 |  | -				/* Deal with stray signal. */  | 
|---|
| 2264 |  | -				cond_resched_tasks_rcu_qs();  | 
|---|
| 2265 |  | -				WRITE_ONCE(rsp->gp_activity, jiffies);  | 
|---|
| 2266 |  | -				WARN_ON(signal_pending(current));  | 
|---|
| 2267 |  | -				trace_rcu_grace_period(rsp->name,  | 
|---|
| 2268 |  | -						       READ_ONCE(rsp->gp_seq),  | 
|---|
| 2269 |  | -						       TPS("fqswaitsig"));  | 
|---|
| 2270 |  | -				ret = 1; /* Keep old FQS timing. */  | 
|---|
| 2271 |  | -				j = jiffies;  | 
|---|
| 2272 |  | -				if (time_after(jiffies, rsp->jiffies_force_qs))  | 
|---|
| 2273 |  | -					j = 1;  | 
|---|
| 2274 |  | -				else  | 
|---|
| 2275 |  | -					j = rsp->jiffies_force_qs - j;  | 
|---|
| 2276 |  | -			}  | 
|---|
| 2277 |  | -		}  | 
|---|
 | 2110 | +		rcu_gp_fqs_loop();  | 
|---|
| 2278 | 2111 |   | 
|---|
| 2279 | 2112 |  		/* Handle grace-period end. */ | 
|---|
| 2280 |  | -		rsp->gp_state = RCU_GP_CLEANUP;  | 
|---|
| 2281 |  | -		rcu_gp_cleanup(rsp);  | 
|---|
| 2282 |  | -		rsp->gp_state = RCU_GP_CLEANED;  | 
|---|
 | 2113 | +		rcu_state.gp_state = RCU_GP_CLEANUP;  | 
|---|
 | 2114 | +		rcu_gp_cleanup();  | 
|---|
 | 2115 | +		rcu_state.gp_state = RCU_GP_CLEANED;  | 
|---|
| 2283 | 2116 |  	} | 
|---|
| 2284 | 2117 |  } | 
|---|
| 2285 | 2118 |   | 
|---|
| 2286 | 2119 |  /* | 
|---|
| 2287 |  | - * Report a full set of quiescent states to the specified rcu_state data  | 
|---|
| 2288 |  | - * structure.  Invoke rcu_gp_kthread_wake() to awaken the grace-period  | 
|---|
| 2289 |  | - * kthread if another grace period is required.  Whether we wake  | 
|---|
| 2290 |  | - * the grace-period kthread or it awakens itself for the next round  | 
|---|
| 2291 |  | - * of quiescent-state forcing, that kthread will clean up after the  | 
|---|
| 2292 |  | - * just-completed grace period.  Note that the caller must hold rnp->lock,  | 
|---|
| 2293 |  | - * which is released before return.  | 
|---|
 | 2120 | + * Report a full set of quiescent states to the rcu_state data structure.  | 
|---|
 | 2121 | + * Invoke rcu_gp_kthread_wake() to awaken the grace-period kthread if  | 
|---|
 | 2122 | + * another grace period is required.  Whether we wake the grace-period  | 
|---|
 | 2123 | + * kthread or it awakens itself for the next round of quiescent-state  | 
|---|
 | 2124 | + * forcing, that kthread will clean up after the just-completed grace  | 
|---|
 | 2125 | + * period.  Note that the caller must hold rnp->lock, which is released  | 
|---|
 | 2126 | + * before return.  | 
|---|
| 2294 | 2127 |   */ | 
|---|
| 2295 |  | -static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)  | 
|---|
| 2296 |  | -	__releases(rcu_get_root(rsp)->lock)  | 
|---|
 | 2128 | +static void rcu_report_qs_rsp(unsigned long flags)  | 
|---|
 | 2129 | +	__releases(rcu_get_root()->lock)  | 
|---|
| 2297 | 2130 |  { | 
|---|
| 2298 |  | -	raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp));  | 
|---|
| 2299 |  | -	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));  | 
|---|
| 2300 |  | -	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);  | 
|---|
| 2301 |  | -	raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);  | 
|---|
| 2302 |  | -	rcu_gp_kthread_wake(rsp);  | 
|---|
 | 2131 | +	raw_lockdep_assert_held_rcu_node(rcu_get_root());  | 
|---|
 | 2132 | +	WARN_ON_ONCE(!rcu_gp_in_progress());  | 
|---|
 | 2133 | +	WRITE_ONCE(rcu_state.gp_flags,  | 
|---|
 | 2134 | +		   READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);  | 
|---|
 | 2135 | +	raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(), flags);  | 
|---|
 | 2136 | +	rcu_gp_kthread_wake();  | 
|---|
| 2303 | 2137 |  } | 
|---|
| 2304 | 2138 |   | 
|---|
| 2305 | 2139 |  /* | 
|---|
| .. | .. | 
|---|
| 2316 | 2150 |   * disabled.  This allows propagating quiescent state due to resumed tasks | 
|---|
| 2317 | 2151 |   * during grace-period initialization. | 
|---|
| 2318 | 2152 |   */ | 
|---|
| 2319 |  | -static void  | 
|---|
| 2320 |  | -rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,  | 
|---|
| 2321 |  | -		  struct rcu_node *rnp, unsigned long gps, unsigned long flags)  | 
|---|
 | 2153 | +static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,  | 
|---|
 | 2154 | +			      unsigned long gps, unsigned long flags)  | 
|---|
| 2322 | 2155 |  	__releases(rnp->lock) | 
|---|
| 2323 | 2156 |  { | 
|---|
| 2324 | 2157 |  	unsigned long oldmask = 0; | 
|---|
| .. | .. | 
|---|
| 2340 | 2173 |  		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ | 
|---|
| 2341 | 2174 |  		WARN_ON_ONCE(!rcu_is_leaf_node(rnp) && | 
|---|
| 2342 | 2175 |  			     rcu_preempt_blocked_readers_cgp(rnp)); | 
|---|
| 2343 |  | -		rnp->qsmask &= ~mask;  | 
|---|
| 2344 |  | -		trace_rcu_quiescent_state_report(rsp->name, rnp->gp_seq,  | 
|---|
 | 2176 | +		WRITE_ONCE(rnp->qsmask, rnp->qsmask & ~mask);  | 
|---|
 | 2177 | +		trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq,  | 
|---|
| 2345 | 2178 |  						 mask, rnp->qsmask, rnp->level, | 
|---|
| 2346 | 2179 |  						 rnp->grplo, rnp->grphi, | 
|---|
| 2347 | 2180 |  						 !!rnp->gp_tasks); | 
|---|
| .. | .. | 
|---|
| 2363 | 2196 |  		rnp_c = rnp; | 
|---|
| 2364 | 2197 |  		rnp = rnp->parent; | 
|---|
| 2365 | 2198 |  		raw_spin_lock_irqsave_rcu_node(rnp, flags); | 
|---|
| 2366 |  | -		oldmask = rnp_c->qsmask;  | 
|---|
 | 2199 | +		oldmask = READ_ONCE(rnp_c->qsmask);  | 
|---|
| 2367 | 2200 |  	} | 
|---|
| 2368 | 2201 |   | 
|---|
| 2369 | 2202 |  	/* | 
|---|
| .. | .. | 
|---|
| 2371 | 2204 |  	 * state for this grace period.  Invoke rcu_report_qs_rsp() | 
|---|
| 2372 | 2205 |  	 * to clean up and start the next grace period if one is needed. | 
|---|
| 2373 | 2206 |  	 */ | 
|---|
| 2374 |  | -	rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */  | 
|---|
 | 2207 | +	rcu_report_qs_rsp(flags); /* releases rnp->lock. */  | 
|---|
| 2375 | 2208 |  } | 
|---|
| 2376 | 2209 |   | 
|---|
| 2377 | 2210 |  /* | 
|---|
| 2378 | 2211 |   * Record a quiescent state for all tasks that were previously queued | 
|---|
| 2379 | 2212 |   * on the specified rcu_node structure and that were blocking the current | 
|---|
| 2380 |  | - * RCU grace period.  The caller must hold the specified rnp->lock with  | 
|---|
 | 2213 | + * RCU grace period.  The caller must hold the corresponding rnp->lock with  | 
|---|
| 2381 | 2214 |   * irqs disabled, and this lock is released upon return, but irqs remain | 
|---|
| 2382 | 2215 |   * disabled. | 
|---|
| 2383 | 2216 |   */ | 
|---|
| 2384 | 2217 |  static void __maybe_unused | 
|---|
| 2385 |  | -rcu_report_unblock_qs_rnp(struct rcu_state *rsp,  | 
|---|
| 2386 |  | -			  struct rcu_node *rnp, unsigned long flags)  | 
|---|
 | 2218 | +rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)  | 
|---|
| 2387 | 2219 |  	__releases(rnp->lock) | 
|---|
| 2388 | 2220 |  { | 
|---|
| 2389 | 2221 |  	unsigned long gps; | 
|---|
| .. | .. | 
|---|
| 2391 | 2223 |  	struct rcu_node *rnp_p; | 
|---|
| 2392 | 2224 |   | 
|---|
| 2393 | 2225 |  	raw_lockdep_assert_held_rcu_node(rnp); | 
|---|
| 2394 |  | -	if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) ||  | 
|---|
| 2395 |  | -	    WARN_ON_ONCE(rsp != rcu_state_p) ||  | 
|---|
 | 2226 | +	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RCU)) ||  | 
|---|
| 2396 | 2227 |  	    WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) || | 
|---|
| 2397 | 2228 |  	    rnp->qsmask != 0) { | 
|---|
| 2398 | 2229 |  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| .. | .. | 
|---|
| 2406 | 2237 |  		 * Only one rcu_node structure in the tree, so don't | 
|---|
| 2407 | 2238 |  		 * try to report up to its nonexistent parent! | 
|---|
| 2408 | 2239 |  		 */ | 
|---|
| 2409 |  | -		rcu_report_qs_rsp(rsp, flags);  | 
|---|
 | 2240 | +		rcu_report_qs_rsp(flags);  | 
|---|
| 2410 | 2241 |  		return; | 
|---|
| 2411 | 2242 |  	} | 
|---|
| 2412 | 2243 |   | 
|---|
| .. | .. | 
|---|
| 2415 | 2246 |  	mask = rnp->grpmask; | 
|---|
| 2416 | 2247 |  	raw_spin_unlock_rcu_node(rnp);	/* irqs remain disabled. */ | 
|---|
| 2417 | 2248 |  	raw_spin_lock_rcu_node(rnp_p);	/* irqs already disabled. */ | 
|---|
| 2418 |  | -	rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);  | 
|---|
 | 2249 | +	rcu_report_qs_rnp(mask, rnp_p, gps, flags);  | 
|---|
| 2419 | 2250 |  } | 
|---|
| 2420 | 2251 |   | 
|---|
| 2421 | 2252 |  /* | 
|---|
| .. | .. | 
|---|
| 2423 | 2254 |   * structure.  This must be called from the specified CPU. | 
|---|
| 2424 | 2255 |   */ | 
|---|
| 2425 | 2256 |  static void | 
|---|
| 2426 |  | -rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)  | 
|---|
 | 2257 | +rcu_report_qs_rdp(struct rcu_data *rdp)  | 
|---|
| 2427 | 2258 |  { | 
|---|
| 2428 | 2259 |  	unsigned long flags; | 
|---|
| 2429 | 2260 |  	unsigned long mask; | 
|---|
| 2430 |  | -	bool needwake;  | 
|---|
 | 2261 | +	bool needwake = false;  | 
|---|
 | 2262 | +	const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&  | 
|---|
 | 2263 | +			       rcu_segcblist_is_offloaded(&rdp->cblist);  | 
|---|
| 2431 | 2264 |  	struct rcu_node *rnp; | 
|---|
| 2432 | 2265 |   | 
|---|
 | 2266 | +	WARN_ON_ONCE(rdp->cpu != smp_processor_id());  | 
|---|
| 2433 | 2267 |  	rnp = rdp->mynode; | 
|---|
| 2434 | 2268 |  	raw_spin_lock_irqsave_rcu_node(rnp, flags); | 
|---|
| 2435 | 2269 |  	if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || | 
|---|
| .. | .. | 
|---|
| 2442 | 2276 |  		 * within the current grace period. | 
|---|
| 2443 | 2277 |  		 */ | 
|---|
| 2444 | 2278 |  		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */ | 
|---|
| 2445 |  | -		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);  | 
|---|
| 2446 | 2279 |  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 2447 | 2280 |  		return; | 
|---|
| 2448 | 2281 |  	} | 
|---|
| 2449 | 2282 |  	mask = rdp->grpmask; | 
|---|
 | 2283 | +	rdp->core_needs_qs = false;  | 
|---|
| 2450 | 2284 |  	if ((rnp->qsmask & mask) == 0) { | 
|---|
| 2451 | 2285 |  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 2452 | 2286 |  	} else { | 
|---|
| 2453 |  | -		rdp->core_needs_qs = false;  | 
|---|
| 2454 |  | -  | 
|---|
| 2455 | 2287 |  		/* | 
|---|
| 2456 | 2288 |  		 * This GP can't end until cpu checks in, so all of our | 
|---|
| 2457 | 2289 |  		 * callbacks can be processed during the next GP. | 
|---|
| 2458 | 2290 |  		 */ | 
|---|
| 2459 |  | -		needwake = rcu_accelerate_cbs(rsp, rnp, rdp);  | 
|---|
 | 2291 | +		if (!offloaded)  | 
|---|
 | 2292 | +			needwake = rcu_accelerate_cbs(rnp, rdp);  | 
|---|
| 2460 | 2293 |   | 
|---|
| 2461 |  | -		rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);  | 
|---|
 | 2294 | +		rcu_disable_urgency_upon_qs(rdp);  | 
|---|
 | 2295 | +		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  | 
|---|
| 2462 | 2296 |  		/* ^^^ Released rnp->lock */ | 
|---|
| 2463 | 2297 |  		if (needwake) | 
|---|
| 2464 |  | -			rcu_gp_kthread_wake(rsp);  | 
|---|
 | 2298 | +			rcu_gp_kthread_wake();  | 
|---|
| 2465 | 2299 |  	} | 
|---|
| 2466 | 2300 |  } | 
|---|
| 2467 | 2301 |   | 
|---|
| .. | .. | 
|---|
| 2472 | 2306 |   * quiescent state for this grace period, and record that fact if so. | 
|---|
| 2473 | 2307 |   */ | 
|---|
| 2474 | 2308 |  static void | 
|---|
| 2475 |  | -rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)  | 
|---|
 | 2309 | +rcu_check_quiescent_state(struct rcu_data *rdp)  | 
|---|
| 2476 | 2310 |  { | 
|---|
| 2477 | 2311 |  	/* Check for grace-period ends and beginnings. */ | 
|---|
| 2478 |  | -	note_gp_changes(rsp, rdp);  | 
|---|
 | 2312 | +	note_gp_changes(rdp);  | 
|---|
| 2479 | 2313 |   | 
|---|
| 2480 | 2314 |  	/* | 
|---|
| 2481 | 2315 |  	 * Does this CPU still need to do its part for current grace period? | 
|---|
| .. | .. | 
|---|
| 2495 | 2329 |  	 * Tell RCU we are done (but rcu_report_qs_rdp() will be the | 
|---|
| 2496 | 2330 |  	 * judge of that). | 
|---|
| 2497 | 2331 |  	 */ | 
|---|
| 2498 |  | -	rcu_report_qs_rdp(rdp->cpu, rsp, rdp);  | 
|---|
 | 2332 | +	rcu_report_qs_rdp(rdp);  | 
|---|
| 2499 | 2333 |  } | 
|---|
| 2500 | 2334 |   | 
|---|
| 2501 | 2335 |  /* | 
|---|
| 2502 |  | - * Trace the fact that this CPU is going offline.  | 
|---|
 | 2336 | + * Near the end of the offline process.  Trace the fact that this CPU  | 
|---|
 | 2337 | + * is going offline.  | 
|---|
| 2503 | 2338 |   */ | 
|---|
| 2504 |  | -static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)  | 
|---|
 | 2339 | +int rcutree_dying_cpu(unsigned int cpu)  | 
|---|
| 2505 | 2340 |  { | 
|---|
| 2506 |  | -	RCU_TRACE(bool blkd;)  | 
|---|
| 2507 |  | -	RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);)  | 
|---|
| 2508 |  | -	RCU_TRACE(struct rcu_node *rnp = rdp->mynode;)  | 
|---|
 | 2341 | +	bool blkd;  | 
|---|
 | 2342 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 2343 | +	struct rcu_node *rnp = rdp->mynode;  | 
|---|
| 2509 | 2344 |   | 
|---|
| 2510 | 2345 |  	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) | 
|---|
| 2511 |  | -		return;  | 
|---|
 | 2346 | +		return 0;  | 
|---|
| 2512 | 2347 |   | 
|---|
| 2513 |  | -	RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);)  | 
|---|
| 2514 |  | -	trace_rcu_grace_period(rsp->name, rnp->gp_seq,  | 
|---|
 | 2348 | +	blkd = !!(rnp->qsmask & rdp->grpmask);  | 
|---|
 | 2349 | +	trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),  | 
|---|
| 2515 | 2350 |  			       blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); | 
|---|
 | 2351 | +	return 0;  | 
|---|
| 2516 | 2352 |  } | 
|---|
| 2517 | 2353 |   | 
|---|
| 2518 | 2354 |  /* | 
|---|
| .. | .. | 
|---|
| 2566 | 2402 |   * There can only be one CPU hotplug operation at a time, so no need for | 
|---|
| 2567 | 2403 |   * explicit locking. | 
|---|
| 2568 | 2404 |   */ | 
|---|
| 2569 |  | -static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)  | 
|---|
 | 2405 | +int rcutree_dead_cpu(unsigned int cpu)  | 
|---|
| 2570 | 2406 |  { | 
|---|
| 2571 |  | -	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
 | 2407 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 2572 | 2408 |  	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */ | 
|---|
| 2573 | 2409 |   | 
|---|
| 2574 | 2410 |  	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) | 
|---|
| 2575 |  | -		return;  | 
|---|
 | 2411 | +		return 0;  | 
|---|
| 2576 | 2412 |   | 
|---|
| 2577 | 2413 |  	/* Adjust any no-longer-needed kthreads. */ | 
|---|
| 2578 | 2414 |  	rcu_boost_kthread_setaffinity(rnp, -1); | 
|---|
 | 2415 | +	/* Do any needed no-CB deferred wakeups from this CPU. */  | 
|---|
 | 2416 | +	do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu));  | 
|---|
 | 2417 | +  | 
|---|
 | 2418 | +	// Stop-machine done, so allow nohz_full to disable tick.  | 
|---|
 | 2419 | +	tick_dep_clear(TICK_DEP_BIT_RCU);  | 
|---|
 | 2420 | +	return 0;  | 
|---|
| 2579 | 2421 |  } | 
|---|
| 2580 | 2422 |   | 
|---|
| 2581 | 2423 |  /* | 
|---|
| 2582 | 2424 |   * Invoke any RCU callbacks that have made it to the end of their grace | 
|---|
| 2583 | 2425 |   * period.  Thottle as specified by rdp->blimit. | 
|---|
| 2584 | 2426 |   */ | 
|---|
| 2585 |  | -static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)  | 
|---|
 | 2427 | +static void rcu_do_batch(struct rcu_data *rdp)  | 
|---|
| 2586 | 2428 |  { | 
|---|
 | 2429 | +	int div;  | 
|---|
| 2587 | 2430 |  	unsigned long flags; | 
|---|
 | 2431 | +	const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&  | 
|---|
 | 2432 | +			       rcu_segcblist_is_offloaded(&rdp->cblist);  | 
|---|
| 2588 | 2433 |  	struct rcu_head *rhp; | 
|---|
| 2589 | 2434 |  	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); | 
|---|
| 2590 | 2435 |  	long bl, count; | 
|---|
 | 2436 | +	long pending, tlimit = 0;  | 
|---|
| 2591 | 2437 |   | 
|---|
| 2592 | 2438 |  	/* If no callbacks are ready, just return. */ | 
|---|
| 2593 | 2439 |  	if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { | 
|---|
| 2594 |  | -		trace_rcu_batch_start(rsp->name,  | 
|---|
| 2595 |  | -				      rcu_segcblist_n_lazy_cbs(&rdp->cblist),  | 
|---|
 | 2440 | +		trace_rcu_batch_start(rcu_state.name,  | 
|---|
| 2596 | 2441 |  				      rcu_segcblist_n_cbs(&rdp->cblist), 0); | 
|---|
| 2597 |  | -		trace_rcu_batch_end(rsp->name, 0,  | 
|---|
 | 2442 | +		trace_rcu_batch_end(rcu_state.name, 0,  | 
|---|
| 2598 | 2443 |  				    !rcu_segcblist_empty(&rdp->cblist), | 
|---|
| 2599 | 2444 |  				    need_resched(), is_idle_task(current), | 
|---|
| 2600 | 2445 |  				    rcu_is_callbacks_kthread()); | 
|---|
| .. | .. | 
|---|
| 2607 | 2452 |  	 * callback counts, as rcu_barrier() needs to be conservative. | 
|---|
| 2608 | 2453 |  	 */ | 
|---|
| 2609 | 2454 |  	local_irq_save(flags); | 
|---|
 | 2455 | +	rcu_nocb_lock(rdp);  | 
|---|
| 2610 | 2456 |  	WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | 
|---|
| 2611 |  | -	bl = rdp->blimit;  | 
|---|
| 2612 |  | -	trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist),  | 
|---|
 | 2457 | +	pending = rcu_segcblist_n_cbs(&rdp->cblist);  | 
|---|
 | 2458 | +	div = READ_ONCE(rcu_divisor);  | 
|---|
 | 2459 | +	div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;  | 
|---|
 | 2460 | +	bl = max(rdp->blimit, pending >> div);  | 
|---|
 | 2461 | +	if (in_serving_softirq() && unlikely(bl > 100)) {  | 
|---|
 | 2462 | +		long rrn = READ_ONCE(rcu_resched_ns);  | 
|---|
 | 2463 | +  | 
|---|
 | 2464 | +		rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;  | 
|---|
 | 2465 | +		tlimit = local_clock() + rrn;  | 
|---|
 | 2466 | +	}  | 
|---|
 | 2467 | +	trace_rcu_batch_start(rcu_state.name,  | 
|---|
| 2613 | 2468 |  			      rcu_segcblist_n_cbs(&rdp->cblist), bl); | 
|---|
| 2614 | 2469 |  	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); | 
|---|
| 2615 |  | -	local_irq_restore(flags);  | 
|---|
 | 2470 | +	if (offloaded)  | 
|---|
 | 2471 | +		rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);  | 
|---|
 | 2472 | +	rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 2616 | 2473 |   | 
|---|
| 2617 | 2474 |  	/* Invoke callbacks. */ | 
|---|
 | 2475 | +	tick_dep_set_task(current, TICK_DEP_BIT_RCU);  | 
|---|
| 2618 | 2476 |  	rhp = rcu_cblist_dequeue(&rcl); | 
|---|
| 2619 | 2477 |  	for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) { | 
|---|
 | 2478 | +		rcu_callback_t f;  | 
|---|
 | 2479 | +  | 
|---|
| 2620 | 2480 |  		debug_rcu_head_unqueue(rhp); | 
|---|
| 2621 |  | -		if (__rcu_reclaim(rsp->name, rhp))  | 
|---|
| 2622 |  | -			rcu_cblist_dequeued_lazy(&rcl);  | 
|---|
 | 2481 | +  | 
|---|
 | 2482 | +		rcu_lock_acquire(&rcu_callback_map);  | 
|---|
 | 2483 | +		trace_rcu_invoke_callback(rcu_state.name, rhp);  | 
|---|
 | 2484 | +  | 
|---|
 | 2485 | +		f = rhp->func;  | 
|---|
 | 2486 | +		WRITE_ONCE(rhp->func, (rcu_callback_t)0L);  | 
|---|
 | 2487 | +		f(rhp);  | 
|---|
 | 2488 | +  | 
|---|
 | 2489 | +		rcu_lock_release(&rcu_callback_map);  | 
|---|
 | 2490 | +  | 
|---|
| 2623 | 2491 |  		/* | 
|---|
| 2624 | 2492 |  		 * Stop only if limit reached and CPU has something to do. | 
|---|
| 2625 | 2493 |  		 * Note: The rcl structure counts down from zero. | 
|---|
| 2626 | 2494 |  		 */ | 
|---|
| 2627 |  | -		if (-rcl.len >= bl &&  | 
|---|
| 2628 |  | -		    (need_resched() ||  | 
|---|
| 2629 |  | -		     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))  | 
|---|
| 2630 |  | -			break;  | 
|---|
 | 2495 | +		if (in_serving_softirq()) {  | 
|---|
 | 2496 | +			if (-rcl.len >= bl && (need_resched() ||  | 
|---|
 | 2497 | +					(!is_idle_task(current) && !rcu_is_callbacks_kthread())))  | 
|---|
 | 2498 | +				break;  | 
|---|
 | 2499 | +  | 
|---|
 | 2500 | +			/*  | 
|---|
 | 2501 | +			 * Make sure we don't spend too much time here and deprive other  | 
|---|
 | 2502 | +			 * softirq vectors of CPU cycles.  | 
|---|
 | 2503 | +			 */  | 
|---|
 | 2504 | +			if (unlikely(tlimit)) {  | 
|---|
 | 2505 | +				/* only call local_clock() every 32 callbacks */  | 
|---|
 | 2506 | +				if (likely((-rcl.len & 31) || local_clock() < tlimit))  | 
|---|
 | 2507 | +					continue;  | 
|---|
 | 2508 | +				/* Exceeded the time limit, so leave. */  | 
|---|
 | 2509 | +				break;  | 
|---|
 | 2510 | +			}  | 
|---|
 | 2511 | +		} else {  | 
|---|
 | 2512 | +			local_bh_enable();  | 
|---|
 | 2513 | +			lockdep_assert_irqs_enabled();  | 
|---|
 | 2514 | +			cond_resched_tasks_rcu_qs();  | 
|---|
 | 2515 | +			lockdep_assert_irqs_enabled();  | 
|---|
 | 2516 | +			local_bh_disable();  | 
|---|
 | 2517 | +		}  | 
|---|
| 2631 | 2518 |  	} | 
|---|
| 2632 | 2519 |   | 
|---|
| 2633 | 2520 |  	local_irq_save(flags); | 
|---|
 | 2521 | +	rcu_nocb_lock(rdp);  | 
|---|
| 2634 | 2522 |  	count = -rcl.len; | 
|---|
| 2635 |  | -	trace_rcu_batch_end(rsp->name, count, !!rcl.head, need_resched(),  | 
|---|
 | 2523 | +	rdp->n_cbs_invoked += count;  | 
|---|
 | 2524 | +	trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),  | 
|---|
| 2636 | 2525 |  			    is_idle_task(current), rcu_is_callbacks_kthread()); | 
|---|
| 2637 | 2526 |   | 
|---|
| 2638 | 2527 |  	/* Update counts and requeue any remaining callbacks. */ | 
|---|
| .. | .. | 
|---|
| 2642 | 2531 |   | 
|---|
| 2643 | 2532 |  	/* Reinstate batch limit if we have worked down the excess. */ | 
|---|
| 2644 | 2533 |  	count = rcu_segcblist_n_cbs(&rdp->cblist); | 
|---|
| 2645 |  | -	if (rdp->blimit == LONG_MAX && count <= qlowmark)  | 
|---|
 | 2534 | +	if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)  | 
|---|
| 2646 | 2535 |  		rdp->blimit = blimit; | 
|---|
| 2647 | 2536 |   | 
|---|
| 2648 | 2537 |  	/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ | 
|---|
| 2649 | 2538 |  	if (count == 0 && rdp->qlen_last_fqs_check != 0) { | 
|---|
| 2650 | 2539 |  		rdp->qlen_last_fqs_check = 0; | 
|---|
| 2651 |  | -		rdp->n_force_qs_snap = rsp->n_force_qs;  | 
|---|
 | 2540 | +		rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);  | 
|---|
| 2652 | 2541 |  	} else if (count < rdp->qlen_last_fqs_check - qhimark) | 
|---|
| 2653 | 2542 |  		rdp->qlen_last_fqs_check = count; | 
|---|
| 2654 | 2543 |   | 
|---|
| .. | .. | 
|---|
| 2656 | 2545 |  	 * The following usually indicates a double call_rcu().  To track | 
|---|
| 2657 | 2546 |  	 * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. | 
|---|
| 2658 | 2547 |  	 */ | 
|---|
| 2659 |  | -	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));  | 
|---|
 | 2548 | +	WARN_ON_ONCE(count == 0 && !rcu_segcblist_empty(&rdp->cblist));  | 
|---|
 | 2549 | +	WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&  | 
|---|
 | 2550 | +		     count != 0 && rcu_segcblist_empty(&rdp->cblist));  | 
|---|
| 2660 | 2551 |   | 
|---|
| 2661 |  | -	local_irq_restore(flags);  | 
|---|
 | 2552 | +	rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 2662 | 2553 |   | 
|---|
| 2663 | 2554 |  	/* Re-invoke RCU core processing if there are callbacks remaining. */ | 
|---|
| 2664 |  | -	if (rcu_segcblist_ready_cbs(&rdp->cblist))  | 
|---|
 | 2555 | +	if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist))  | 
|---|
| 2665 | 2556 |  		invoke_rcu_core(); | 
|---|
 | 2557 | +	tick_dep_clear_task(current, TICK_DEP_BIT_RCU);  | 
|---|
| 2666 | 2558 |  } | 
|---|
| 2667 | 2559 |   | 
|---|
| 2668 | 2560 |  /* | 
|---|
| 2669 |  | - * Check to see if this CPU is in a non-context-switch quiescent state  | 
|---|
| 2670 |  | - * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).  | 
|---|
| 2671 |  | - * Also schedule RCU core processing.  | 
|---|
| 2672 |  | - *  | 
|---|
| 2673 |  | - * This function must be called from hardirq context.  It is normally  | 
|---|
| 2674 |  | - * invoked from the scheduling-clock interrupt.  | 
|---|
 | 2561 | + * This function is invoked from each scheduling-clock interrupt,  | 
|---|
 | 2562 | + * and checks to see if this CPU is in a non-context-switch quiescent  | 
|---|
 | 2563 | + * state, for example, user mode or idle loop.  It also schedules RCU  | 
|---|
 | 2564 | + * core processing.  If the current grace period has gone on too long,  | 
|---|
 | 2565 | + * it will ask the scheduler to manufacture a context switch for the sole  | 
|---|
 | 2566 | + * purpose of providing a providing the needed quiescent state.  | 
|---|
| 2675 | 2567 |   */ | 
|---|
| 2676 |  | -void rcu_check_callbacks(int user)  | 
|---|
 | 2568 | +void rcu_sched_clock_irq(int user)  | 
|---|
| 2677 | 2569 |  { | 
|---|
| 2678 | 2570 |  	trace_rcu_utilization(TPS("Start scheduler-tick")); | 
|---|
| 2679 |  | -	increment_cpu_stall_ticks();  | 
|---|
| 2680 |  | -	if (user || rcu_is_cpu_rrupt_from_idle()) {  | 
|---|
| 2681 |  | -  | 
|---|
| 2682 |  | -		/*  | 
|---|
| 2683 |  | -		 * Get here if this CPU took its interrupt from user  | 
|---|
| 2684 |  | -		 * mode or from the idle loop, and if this is not a  | 
|---|
| 2685 |  | -		 * nested interrupt.  In this case, the CPU is in  | 
|---|
| 2686 |  | -		 * a quiescent state, so note it.  | 
|---|
| 2687 |  | -		 *  | 
|---|
| 2688 |  | -		 * No memory barrier is required here because both  | 
|---|
| 2689 |  | -		 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local  | 
|---|
| 2690 |  | -		 * variables that other CPUs neither access nor modify,  | 
|---|
| 2691 |  | -		 * at least not while the corresponding CPU is online.  | 
|---|
| 2692 |  | -		 */  | 
|---|
| 2693 |  | -  | 
|---|
| 2694 |  | -		rcu_sched_qs();  | 
|---|
| 2695 |  | -		rcu_bh_qs();  | 
|---|
| 2696 |  | -		rcu_note_voluntary_context_switch(current);  | 
|---|
| 2697 |  | -  | 
|---|
| 2698 |  | -	} else if (!in_softirq()) {  | 
|---|
| 2699 |  | -  | 
|---|
| 2700 |  | -		/*  | 
|---|
| 2701 |  | -		 * Get here if this CPU did not take its interrupt from  | 
|---|
| 2702 |  | -		 * softirq, in other words, if it is not interrupting  | 
|---|
| 2703 |  | -		 * a rcu_bh read-side critical section.  This is an _bh  | 
|---|
| 2704 |  | -		 * critical section, so note it.  | 
|---|
| 2705 |  | -		 */  | 
|---|
| 2706 |  | -  | 
|---|
| 2707 |  | -		rcu_bh_qs();  | 
|---|
| 2708 |  | -	}  | 
|---|
| 2709 |  | -	rcu_preempt_check_callbacks();  | 
|---|
 | 2571 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 2572 | +	raw_cpu_inc(rcu_data.ticks_this_gp);  | 
|---|
| 2710 | 2573 |  	/* The load-acquire pairs with the store-release setting to true. */ | 
|---|
| 2711 |  | -	if (smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {  | 
|---|
 | 2574 | +	if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {  | 
|---|
| 2712 | 2575 |  		/* Idle and userspace execution already are quiescent states. */ | 
|---|
| 2713 | 2576 |  		if (!rcu_is_cpu_rrupt_from_idle() && !user) { | 
|---|
| 2714 | 2577 |  			set_tsk_need_resched(current); | 
|---|
| 2715 | 2578 |  			set_preempt_need_resched(); | 
|---|
| 2716 | 2579 |  		} | 
|---|
| 2717 |  | -		__this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);  | 
|---|
 | 2580 | +		__this_cpu_write(rcu_data.rcu_urgent_qs, false);  | 
|---|
| 2718 | 2581 |  	} | 
|---|
| 2719 |  | -	if (rcu_pending())  | 
|---|
 | 2582 | +	rcu_flavor_sched_clock_irq(user);  | 
|---|
 | 2583 | +	if (rcu_pending(user))  | 
|---|
| 2720 | 2584 |  		invoke_rcu_core(); | 
|---|
 | 2585 | +	lockdep_assert_irqs_disabled();  | 
|---|
| 2721 | 2586 |   | 
|---|
| 2722 | 2587 |  	trace_rcu_utilization(TPS("End scheduler-tick")); | 
|---|
| 2723 | 2588 |  } | 
|---|
| 2724 | 2589 |   | 
|---|
| 2725 | 2590 |  /* | 
|---|
| 2726 |  | - * Scan the leaf rcu_node structures, processing dyntick state for any that  | 
|---|
| 2727 |  | - * have not yet encountered a quiescent state, using the function specified.  | 
|---|
| 2728 |  | - * Also initiate boosting for any threads blocked on the root rcu_node.  | 
|---|
| 2729 |  | - *  | 
|---|
| 2730 |  | - * The caller must have suppressed start of new grace periods.  | 
|---|
 | 2591 | + * Scan the leaf rcu_node structures.  For each structure on which all  | 
|---|
 | 2592 | + * CPUs have reported a quiescent state and on which there are tasks  | 
|---|
 | 2593 | + * blocking the current grace period, initiate RCU priority boosting.  | 
|---|
 | 2594 | + * Otherwise, invoke the specified function to check dyntick state for  | 
|---|
 | 2595 | + * each CPU that has not yet reported a quiescent state.  | 
|---|
| 2731 | 2596 |   */ | 
|---|
| 2732 |  | -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))  | 
|---|
 | 2597 | +static void force_qs_rnp(int (*f)(struct rcu_data *rdp))  | 
|---|
| 2733 | 2598 |  { | 
|---|
| 2734 | 2599 |  	int cpu; | 
|---|
| 2735 | 2600 |  	unsigned long flags; | 
|---|
| 2736 | 2601 |  	unsigned long mask; | 
|---|
 | 2602 | +	struct rcu_data *rdp;  | 
|---|
| 2737 | 2603 |  	struct rcu_node *rnp; | 
|---|
| 2738 | 2604 |   | 
|---|
| 2739 |  | -	rcu_for_each_leaf_node(rsp, rnp) {  | 
|---|
 | 2605 | +	rcu_state.cbovld = rcu_state.cbovldnext;  | 
|---|
 | 2606 | +	rcu_state.cbovldnext = false;  | 
|---|
 | 2607 | +	rcu_for_each_leaf_node(rnp) {  | 
|---|
| 2740 | 2608 |  		cond_resched_tasks_rcu_qs(); | 
|---|
| 2741 | 2609 |  		mask = 0; | 
|---|
| 2742 | 2610 |  		raw_spin_lock_irqsave_rcu_node(rnp, flags); | 
|---|
 | 2611 | +		rcu_state.cbovldnext |= !!rnp->cbovldmask;  | 
|---|
| 2743 | 2612 |  		if (rnp->qsmask == 0) { | 
|---|
| 2744 |  | -			if (rcu_state_p == &rcu_sched_state ||  | 
|---|
| 2745 |  | -			    rsp != rcu_state_p ||  | 
|---|
| 2746 |  | -			    rcu_preempt_blocked_readers_cgp(rnp)) {  | 
|---|
 | 2613 | +			if (rcu_preempt_blocked_readers_cgp(rnp)) {  | 
|---|
| 2747 | 2614 |  				/* | 
|---|
| 2748 | 2615 |  				 * No point in scanning bits because they | 
|---|
| 2749 | 2616 |  				 * are all zero.  But we might need to | 
|---|
| .. | .. | 
|---|
| 2756 | 2623 |  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 2757 | 2624 |  			continue; | 
|---|
| 2758 | 2625 |  		} | 
|---|
| 2759 |  | -		for_each_leaf_node_possible_cpu(rnp, cpu) {  | 
|---|
| 2760 |  | -			unsigned long bit = leaf_node_cpu_bit(rnp, cpu);  | 
|---|
| 2761 |  | -			if ((rnp->qsmask & bit) != 0) {  | 
|---|
| 2762 |  | -				if (f(per_cpu_ptr(rsp->rda, cpu)))  | 
|---|
| 2763 |  | -					mask |= bit;  | 
|---|
 | 2626 | +		for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) {  | 
|---|
 | 2627 | +			rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 2628 | +			if (f(rdp)) {  | 
|---|
 | 2629 | +				mask |= rdp->grpmask;  | 
|---|
 | 2630 | +				rcu_disable_urgency_upon_qs(rdp);  | 
|---|
| 2764 | 2631 |  			} | 
|---|
| 2765 | 2632 |  		} | 
|---|
| 2766 | 2633 |  		if (mask != 0) { | 
|---|
| 2767 | 2634 |  			/* Idle/offline CPUs, report (releases rnp->lock). */ | 
|---|
| 2768 |  | -			rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);  | 
|---|
 | 2635 | +			rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  | 
|---|
| 2769 | 2636 |  		} else { | 
|---|
| 2770 | 2637 |  			/* Nothing to do here, so just drop the lock. */ | 
|---|
| 2771 | 2638 |  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| .. | .. | 
|---|
| 2777 | 2644 |   * Force quiescent states on reluctant CPUs, and also detect which | 
|---|
| 2778 | 2645 |   * CPUs are in dyntick-idle mode. | 
|---|
| 2779 | 2646 |   */ | 
|---|
| 2780 |  | -static void force_quiescent_state(struct rcu_state *rsp)  | 
|---|
 | 2647 | +void rcu_force_quiescent_state(void)  | 
|---|
| 2781 | 2648 |  { | 
|---|
| 2782 | 2649 |  	unsigned long flags; | 
|---|
| 2783 | 2650 |  	bool ret; | 
|---|
| .. | .. | 
|---|
| 2785 | 2652 |  	struct rcu_node *rnp_old = NULL; | 
|---|
| 2786 | 2653 |   | 
|---|
| 2787 | 2654 |  	/* Funnel through hierarchy to reduce memory contention. */ | 
|---|
| 2788 |  | -	rnp = __this_cpu_read(rsp->rda->mynode);  | 
|---|
 | 2655 | +	rnp = __this_cpu_read(rcu_data.mynode);  | 
|---|
| 2789 | 2656 |  	for (; rnp != NULL; rnp = rnp->parent) { | 
|---|
| 2790 |  | -		ret = (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||  | 
|---|
| 2791 |  | -		      !raw_spin_trylock(&rnp->fqslock);  | 
|---|
 | 2657 | +		ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||  | 
|---|
 | 2658 | +		       !raw_spin_trylock(&rnp->fqslock);  | 
|---|
| 2792 | 2659 |  		if (rnp_old != NULL) | 
|---|
| 2793 | 2660 |  			raw_spin_unlock(&rnp_old->fqslock); | 
|---|
| 2794 | 2661 |  		if (ret) | 
|---|
| 2795 | 2662 |  			return; | 
|---|
| 2796 | 2663 |  		rnp_old = rnp; | 
|---|
| 2797 | 2664 |  	} | 
|---|
| 2798 |  | -	/* rnp_old == rcu_get_root(rsp), rnp == NULL. */  | 
|---|
 | 2665 | +	/* rnp_old == rcu_get_root(), rnp == NULL. */  | 
|---|
| 2799 | 2666 |   | 
|---|
| 2800 | 2667 |  	/* Reached the root of the rcu_node tree, acquire lock. */ | 
|---|
| 2801 | 2668 |  	raw_spin_lock_irqsave_rcu_node(rnp_old, flags); | 
|---|
| 2802 | 2669 |  	raw_spin_unlock(&rnp_old->fqslock); | 
|---|
| 2803 |  | -	if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {  | 
|---|
 | 2670 | +	if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {  | 
|---|
| 2804 | 2671 |  		raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); | 
|---|
| 2805 | 2672 |  		return;  /* Someone beat us to it. */ | 
|---|
| 2806 | 2673 |  	} | 
|---|
| 2807 |  | -	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);  | 
|---|
 | 2674 | +	WRITE_ONCE(rcu_state.gp_flags,  | 
|---|
 | 2675 | +		   READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);  | 
|---|
| 2808 | 2676 |  	raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); | 
|---|
| 2809 |  | -	rcu_gp_kthread_wake(rsp);  | 
|---|
 | 2677 | +	rcu_gp_kthread_wake();  | 
|---|
 | 2678 | +}  | 
|---|
 | 2679 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);  | 
|---|
 | 2680 | +  | 
|---|
 | 2681 | +// Workqueue handler for an RCU reader for kernels enforcing struct RCU  | 
|---|
 | 2682 | +// grace periods.  | 
|---|
 | 2683 | +static void strict_work_handler(struct work_struct *work)  | 
|---|
 | 2684 | +{  | 
|---|
 | 2685 | +	rcu_read_lock();  | 
|---|
 | 2686 | +	rcu_read_unlock();  | 
|---|
| 2810 | 2687 |  } | 
|---|
| 2811 | 2688 |   | 
|---|
| 2812 |  | -/*  | 
|---|
| 2813 |  | - * This function checks for grace-period requests that fail to motivate  | 
|---|
| 2814 |  | - * RCU to come out of its idle mode.  | 
|---|
| 2815 |  | - */  | 
|---|
| 2816 |  | -static void  | 
|---|
| 2817 |  | -rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,  | 
|---|
| 2818 |  | -			 struct rcu_data *rdp)  | 
|---|
| 2819 |  | -{  | 
|---|
| 2820 |  | -	const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ;  | 
|---|
| 2821 |  | -	unsigned long flags;  | 
|---|
| 2822 |  | -	unsigned long j;  | 
|---|
| 2823 |  | -	struct rcu_node *rnp_root = rcu_get_root(rsp);  | 
|---|
| 2824 |  | -	static atomic_t warned = ATOMIC_INIT(0);  | 
|---|
| 2825 |  | -  | 
|---|
| 2826 |  | -	if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress(rsp) ||  | 
|---|
| 2827 |  | -	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))  | 
|---|
| 2828 |  | -		return;  | 
|---|
| 2829 |  | -	j = jiffies; /* Expensive access, and in common case don't get here. */  | 
|---|
| 2830 |  | -	if (time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) ||  | 
|---|
| 2831 |  | -	    time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) ||  | 
|---|
| 2832 |  | -	    atomic_read(&warned))  | 
|---|
| 2833 |  | -		return;  | 
|---|
| 2834 |  | -  | 
|---|
| 2835 |  | -	raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 2836 |  | -	j = jiffies;  | 
|---|
| 2837 |  | -	if (rcu_gp_in_progress(rsp) ||  | 
|---|
| 2838 |  | -	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||  | 
|---|
| 2839 |  | -	    time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) ||  | 
|---|
| 2840 |  | -	    time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) ||  | 
|---|
| 2841 |  | -	    atomic_read(&warned)) {  | 
|---|
| 2842 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 2843 |  | -		return;  | 
|---|
| 2844 |  | -	}  | 
|---|
| 2845 |  | -	/* Hold onto the leaf lock to make others see warned==1. */  | 
|---|
| 2846 |  | -  | 
|---|
| 2847 |  | -	if (rnp_root != rnp)  | 
|---|
| 2848 |  | -		raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */  | 
|---|
| 2849 |  | -	j = jiffies;  | 
|---|
| 2850 |  | -	if (rcu_gp_in_progress(rsp) ||  | 
|---|
| 2851 |  | -	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||  | 
|---|
| 2852 |  | -	    time_before(j, rsp->gp_req_activity + gpssdelay) ||  | 
|---|
| 2853 |  | -	    time_before(j, rsp->gp_activity + gpssdelay) ||  | 
|---|
| 2854 |  | -	    atomic_xchg(&warned, 1)) {  | 
|---|
| 2855 |  | -		raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */  | 
|---|
| 2856 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 2857 |  | -		return;  | 
|---|
| 2858 |  | -	}  | 
|---|
| 2859 |  | -	pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",  | 
|---|
| 2860 |  | -		 __func__, (long)READ_ONCE(rsp->gp_seq),  | 
|---|
| 2861 |  | -		 (long)READ_ONCE(rnp_root->gp_seq_needed),  | 
|---|
| 2862 |  | -		 j - rsp->gp_req_activity, j - rsp->gp_activity,  | 
|---|
| 2863 |  | -		 rsp->gp_flags, rsp->gp_state, rsp->name,  | 
|---|
| 2864 |  | -		 rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);  | 
|---|
| 2865 |  | -	WARN_ON(1);  | 
|---|
| 2866 |  | -	if (rnp_root != rnp)  | 
|---|
| 2867 |  | -		raw_spin_unlock_rcu_node(rnp_root);  | 
|---|
| 2868 |  | -	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 2869 |  | -}  | 
|---|
| 2870 |  | -  | 
|---|
| 2871 |  | -/*  | 
|---|
| 2872 |  | - * This does the RCU core processing work for the specified rcu_state  | 
|---|
| 2873 |  | - * and rcu_data structures.  This may be called only from the CPU to  | 
|---|
| 2874 |  | - * whom the rdp belongs.  | 
|---|
| 2875 |  | - */  | 
|---|
| 2876 |  | -static void  | 
|---|
| 2877 |  | -__rcu_process_callbacks(struct rcu_state *rsp)  | 
|---|
 | 2689 | +/* Perform RCU core processing work for the current CPU.  */  | 
|---|
 | 2690 | +static __latent_entropy void rcu_core(void)  | 
|---|
| 2878 | 2691 |  { | 
|---|
| 2879 | 2692 |  	unsigned long flags; | 
|---|
| 2880 |  | -	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);  | 
|---|
 | 2693 | +	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);  | 
|---|
| 2881 | 2694 |  	struct rcu_node *rnp = rdp->mynode; | 
|---|
| 2882 |  | -  | 
|---|
| 2883 |  | -	WARN_ON_ONCE(!rdp->beenonline);  | 
|---|
| 2884 |  | -  | 
|---|
| 2885 |  | -	/* Update RCU state based on any recent quiescent states. */  | 
|---|
| 2886 |  | -	rcu_check_quiescent_state(rsp, rdp);  | 
|---|
| 2887 |  | -  | 
|---|
| 2888 |  | -	/* No grace period and unregistered callbacks? */  | 
|---|
| 2889 |  | -	if (!rcu_gp_in_progress(rsp) &&  | 
|---|
| 2890 |  | -	    rcu_segcblist_is_enabled(&rdp->cblist)) {  | 
|---|
| 2891 |  | -		local_irq_save(flags);  | 
|---|
| 2892 |  | -		if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))  | 
|---|
| 2893 |  | -			rcu_accelerate_cbs_unlocked(rsp, rnp, rdp);  | 
|---|
| 2894 |  | -		local_irq_restore(flags);  | 
|---|
| 2895 |  | -	}  | 
|---|
| 2896 |  | -  | 
|---|
| 2897 |  | -	rcu_check_gp_start_stall(rsp, rnp, rdp);  | 
|---|
| 2898 |  | -  | 
|---|
| 2899 |  | -	/* If there are callbacks ready, invoke them. */  | 
|---|
| 2900 |  | -	if (rcu_segcblist_ready_cbs(&rdp->cblist))  | 
|---|
| 2901 |  | -		invoke_rcu_callbacks(rsp, rdp);  | 
|---|
| 2902 |  | -  | 
|---|
| 2903 |  | -	/* Do any needed deferred wakeups of rcuo kthreads. */  | 
|---|
| 2904 |  | -	do_nocb_deferred_wakeup(rdp);  | 
|---|
| 2905 |  | -}  | 
|---|
| 2906 |  | -  | 
|---|
| 2907 |  | -/*  | 
|---|
| 2908 |  | - * Do RCU core processing for the current CPU.  | 
|---|
| 2909 |  | - */  | 
|---|
| 2910 |  | -static __latent_entropy void rcu_process_callbacks(void)  | 
|---|
| 2911 |  | -{  | 
|---|
| 2912 |  | -	struct rcu_state *rsp;  | 
|---|
 | 2695 | +	const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&  | 
|---|
 | 2696 | +			       rcu_segcblist_is_offloaded(&rdp->cblist);  | 
|---|
| 2913 | 2697 |   | 
|---|
| 2914 | 2698 |  	if (cpu_is_offline(smp_processor_id())) | 
|---|
| 2915 | 2699 |  		return; | 
|---|
| 2916 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 2917 |  | -		__rcu_process_callbacks(rsp);  | 
|---|
 | 2700 | +	trace_rcu_utilization(TPS("Start RCU core"));  | 
|---|
 | 2701 | +	WARN_ON_ONCE(!rdp->beenonline);  | 
|---|
 | 2702 | +  | 
|---|
 | 2703 | +	/* Report any deferred quiescent states if preemption enabled. */  | 
|---|
 | 2704 | +	if (!(preempt_count() & PREEMPT_MASK)) {  | 
|---|
 | 2705 | +		rcu_preempt_deferred_qs(current);  | 
|---|
 | 2706 | +	} else if (rcu_preempt_need_deferred_qs(current)) {  | 
|---|
 | 2707 | +		set_tsk_need_resched(current);  | 
|---|
 | 2708 | +		set_preempt_need_resched();  | 
|---|
 | 2709 | +	}  | 
|---|
 | 2710 | +  | 
|---|
 | 2711 | +	/* Update RCU state based on any recent quiescent states. */  | 
|---|
 | 2712 | +	rcu_check_quiescent_state(rdp);  | 
|---|
 | 2713 | +  | 
|---|
 | 2714 | +	/* No grace period and unregistered callbacks? */  | 
|---|
 | 2715 | +	if (!rcu_gp_in_progress() &&  | 
|---|
 | 2716 | +	    rcu_segcblist_is_enabled(&rdp->cblist) && !offloaded) {  | 
|---|
 | 2717 | +		local_irq_save(flags);  | 
|---|
 | 2718 | +		if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))  | 
|---|
 | 2719 | +			rcu_accelerate_cbs_unlocked(rnp, rdp);  | 
|---|
 | 2720 | +		local_irq_restore(flags);  | 
|---|
 | 2721 | +	}  | 
|---|
 | 2722 | +  | 
|---|
 | 2723 | +	rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());  | 
|---|
 | 2724 | +  | 
|---|
 | 2725 | +	/* If there are callbacks ready, invoke them. */  | 
|---|
 | 2726 | +	if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist) &&  | 
|---|
 | 2727 | +	    likely(READ_ONCE(rcu_scheduler_fully_active)))  | 
|---|
 | 2728 | +		rcu_do_batch(rdp);  | 
|---|
 | 2729 | +  | 
|---|
 | 2730 | +	/* Do any needed deferred wakeups of rcuo kthreads. */  | 
|---|
 | 2731 | +	do_nocb_deferred_wakeup(rdp);  | 
|---|
 | 2732 | +	trace_rcu_utilization(TPS("End RCU core"));  | 
|---|
 | 2733 | +  | 
|---|
 | 2734 | +	// If strict GPs, schedule an RCU reader in a clean environment.  | 
|---|
 | 2735 | +	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))  | 
|---|
 | 2736 | +		queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);  | 
|---|
| 2918 | 2737 |  } | 
|---|
| 2919 | 2738 |   | 
|---|
| 2920 |  | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);  | 
|---|
| 2921 |  | -/*  | 
|---|
| 2922 |  | - * Schedule RCU callback invocation.  If the specified type of RCU  | 
|---|
| 2923 |  | - * does not support RCU priority boosting, just do a direct call,  | 
|---|
| 2924 |  | - * otherwise wake up the per-CPU kernel kthread.  Note that because we  | 
|---|
| 2925 |  | - * are running on the current CPU with softirqs disabled, the  | 
|---|
| 2926 |  | - * rcu_cpu_kthread_task cannot disappear out from under us.  | 
|---|
| 2927 |  | - */  | 
|---|
| 2928 |  | -static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)  | 
|---|
 | 2739 | +static void rcu_core_si(struct softirq_action *h)  | 
|---|
| 2929 | 2740 |  { | 
|---|
| 2930 |  | -	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))  | 
|---|
| 2931 |  | -		return;  | 
|---|
| 2932 |  | -	rcu_do_batch(rsp, rdp);  | 
|---|
 | 2741 | +	rcu_core();  | 
|---|
| 2933 | 2742 |  } | 
|---|
| 2934 | 2743 |   | 
|---|
| 2935 | 2744 |  static void rcu_wake_cond(struct task_struct *t, int status) | 
|---|
| .. | .. | 
|---|
| 2942 | 2751 |  		wake_up_process(t); | 
|---|
| 2943 | 2752 |  } | 
|---|
| 2944 | 2753 |   | 
|---|
 | 2754 | +static void invoke_rcu_core_kthread(void)  | 
|---|
 | 2755 | +{  | 
|---|
 | 2756 | +	struct task_struct *t;  | 
|---|
 | 2757 | +	unsigned long flags;  | 
|---|
 | 2758 | +  | 
|---|
 | 2759 | +	local_irq_save(flags);  | 
|---|
 | 2760 | +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);  | 
|---|
 | 2761 | +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);  | 
|---|
 | 2762 | +	if (t != NULL && t != current)  | 
|---|
 | 2763 | +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));  | 
|---|
 | 2764 | +	local_irq_restore(flags);  | 
|---|
 | 2765 | +}  | 
|---|
 | 2766 | +  | 
|---|
| 2945 | 2767 |  /* | 
|---|
| 2946 | 2768 |   * Wake up this CPU's rcuc kthread to do RCU core processing. | 
|---|
| 2947 | 2769 |   */ | 
|---|
| 2948 | 2770 |  static void invoke_rcu_core(void) | 
|---|
| 2949 | 2771 |  { | 
|---|
| 2950 |  | -	unsigned long flags;  | 
|---|
| 2951 |  | -	struct task_struct *t;  | 
|---|
| 2952 |  | -  | 
|---|
| 2953 | 2772 |  	if (!cpu_online(smp_processor_id())) | 
|---|
| 2954 | 2773 |  		return; | 
|---|
| 2955 |  | -	local_irq_save(flags);  | 
|---|
| 2956 |  | -	__this_cpu_write(rcu_cpu_has_work, 1);  | 
|---|
| 2957 |  | -	t = __this_cpu_read(rcu_cpu_kthread_task);  | 
|---|
| 2958 |  | -	if (t != NULL && current != t)  | 
|---|
| 2959 |  | -		rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));  | 
|---|
| 2960 |  | -	local_irq_restore(flags);  | 
|---|
 | 2774 | +	if (use_softirq)  | 
|---|
 | 2775 | +		raise_softirq(RCU_SOFTIRQ);  | 
|---|
 | 2776 | +	else  | 
|---|
 | 2777 | +		invoke_rcu_core_kthread();  | 
|---|
| 2961 | 2778 |  } | 
|---|
| 2962 | 2779 |   | 
|---|
| 2963 | 2780 |  static void rcu_cpu_kthread_park(unsigned int cpu) | 
|---|
| 2964 | 2781 |  { | 
|---|
| 2965 |  | -	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;  | 
|---|
 | 2782 | +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;  | 
|---|
| 2966 | 2783 |  } | 
|---|
| 2967 | 2784 |   | 
|---|
| 2968 | 2785 |  static int rcu_cpu_kthread_should_run(unsigned int cpu) | 
|---|
| 2969 | 2786 |  { | 
|---|
| 2970 |  | -	return __this_cpu_read(rcu_cpu_has_work);  | 
|---|
 | 2787 | +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);  | 
|---|
| 2971 | 2788 |  } | 
|---|
| 2972 | 2789 |   | 
|---|
| 2973 | 2790 |  /* | 
|---|
| 2974 |  | - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the  | 
|---|
| 2975 |  | - * RCU softirq used in flavors and configurations of RCU that do not  | 
|---|
| 2976 |  | - * support RCU priority boosting.  | 
|---|
 | 2791 | + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces  | 
|---|
 | 2792 | + * the RCU softirq used in configurations of RCU that do not support RCU  | 
|---|
 | 2793 | + * priority boosting.  | 
|---|
| 2977 | 2794 |   */ | 
|---|
| 2978 | 2795 |  static void rcu_cpu_kthread(unsigned int cpu) | 
|---|
| 2979 | 2796 |  { | 
|---|
| 2980 |  | -	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);  | 
|---|
| 2981 |  | -	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);  | 
|---|
 | 2797 | +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);  | 
|---|
 | 2798 | +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);  | 
|---|
| 2982 | 2799 |  	int spincnt; | 
|---|
| 2983 | 2800 |   | 
|---|
 | 2801 | +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));  | 
|---|
| 2984 | 2802 |  	for (spincnt = 0; spincnt < 10; spincnt++) { | 
|---|
| 2985 |  | -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));  | 
|---|
| 2986 | 2803 |  		local_bh_disable(); | 
|---|
| 2987 | 2804 |  		*statusp = RCU_KTHREAD_RUNNING; | 
|---|
| 2988 |  | -		this_cpu_inc(rcu_cpu_kthread_loops);  | 
|---|
| 2989 | 2805 |  		local_irq_disable(); | 
|---|
| 2990 | 2806 |  		work = *workp; | 
|---|
| 2991 | 2807 |  		*workp = 0; | 
|---|
| 2992 | 2808 |  		local_irq_enable(); | 
|---|
| 2993 | 2809 |  		if (work) | 
|---|
| 2994 |  | -			rcu_process_callbacks();  | 
|---|
 | 2810 | +			rcu_core();  | 
|---|
| 2995 | 2811 |  		local_bh_enable(); | 
|---|
| 2996 | 2812 |  		if (*workp == 0) { | 
|---|
| 2997 | 2813 |  			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | 
|---|
| .. | .. | 
|---|
| 3001 | 2817 |  	} | 
|---|
| 3002 | 2818 |  	*statusp = RCU_KTHREAD_YIELDING; | 
|---|
| 3003 | 2819 |  	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | 
|---|
| 3004 |  | -	schedule_timeout_interruptible(2);  | 
|---|
 | 2820 | +	schedule_timeout_idle(2);  | 
|---|
| 3005 | 2821 |  	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | 
|---|
| 3006 | 2822 |  	*statusp = RCU_KTHREAD_WAITING; | 
|---|
| 3007 | 2823 |  } | 
|---|
| 3008 | 2824 |   | 
|---|
| 3009 | 2825 |  static struct smp_hotplug_thread rcu_cpu_thread_spec = { | 
|---|
| 3010 |  | -	.store			= &rcu_cpu_kthread_task,  | 
|---|
 | 2826 | +	.store			= &rcu_data.rcu_cpu_kthread_task,  | 
|---|
| 3011 | 2827 |  	.thread_should_run	= rcu_cpu_kthread_should_run, | 
|---|
| 3012 | 2828 |  	.thread_fn		= rcu_cpu_kthread, | 
|---|
| 3013 | 2829 |  	.thread_comm		= "rcuc/%u", | 
|---|
| .. | .. | 
|---|
| 3023 | 2839 |  	int cpu; | 
|---|
| 3024 | 2840 |   | 
|---|
| 3025 | 2841 |  	for_each_possible_cpu(cpu) | 
|---|
| 3026 |  | -		per_cpu(rcu_cpu_has_work, cpu) = 0;  | 
|---|
| 3027 |  | -	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));  | 
|---|
 | 2842 | +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;  | 
|---|
 | 2843 | +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)  | 
|---|
 | 2844 | +		return 0;  | 
|---|
 | 2845 | +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),  | 
|---|
 | 2846 | +		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);  | 
|---|
| 3028 | 2847 |  	return 0; | 
|---|
| 3029 | 2848 |  } | 
|---|
| 3030 |  | -early_initcall(rcu_spawn_core_kthreads);  | 
|---|
| 3031 | 2849 |   | 
|---|
| 3032 | 2850 |  /* | 
|---|
| 3033 | 2851 |   * Handle any core-RCU processing required by a call_rcu() invocation. | 
|---|
| 3034 | 2852 |   */ | 
|---|
| 3035 |  | -static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,  | 
|---|
| 3036 |  | -			    struct rcu_head *head, unsigned long flags)  | 
|---|
 | 2853 | +static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,  | 
|---|
 | 2854 | +			    unsigned long flags)  | 
|---|
| 3037 | 2855 |  { | 
|---|
| 3038 | 2856 |  	/* | 
|---|
| 3039 | 2857 |  	 * If called from an extended quiescent state, invoke the RCU | 
|---|
| .. | .. | 
|---|
| 3048 | 2866 |   | 
|---|
| 3049 | 2867 |  	/* | 
|---|
| 3050 | 2868 |  	 * Force the grace period if too many callbacks or too long waiting. | 
|---|
| 3051 |  | -	 * Enforce hysteresis, and don't invoke force_quiescent_state()  | 
|---|
 | 2869 | +	 * Enforce hysteresis, and don't invoke rcu_force_quiescent_state()  | 
|---|
| 3052 | 2870 |  	 * if some other CPU has recently done so.  Also, don't bother | 
|---|
| 3053 |  | -	 * invoking force_quiescent_state() if the newly enqueued callback  | 
|---|
 | 2871 | +	 * invoking rcu_force_quiescent_state() if the newly enqueued callback  | 
|---|
| 3054 | 2872 |  	 * is the only one waiting for a grace period to complete. | 
|---|
| 3055 | 2873 |  	 */ | 
|---|
| 3056 | 2874 |  	if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > | 
|---|
| 3057 | 2875 |  		     rdp->qlen_last_fqs_check + qhimark)) { | 
|---|
| 3058 | 2876 |   | 
|---|
| 3059 | 2877 |  		/* Are we ignoring a completed grace period? */ | 
|---|
| 3060 |  | -		note_gp_changes(rsp, rdp);  | 
|---|
 | 2878 | +		note_gp_changes(rdp);  | 
|---|
| 3061 | 2879 |   | 
|---|
| 3062 | 2880 |  		/* Start a new grace period if one not already started. */ | 
|---|
| 3063 |  | -		if (!rcu_gp_in_progress(rsp)) {  | 
|---|
| 3064 |  | -			rcu_accelerate_cbs_unlocked(rsp, rdp->mynode, rdp);  | 
|---|
 | 2881 | +		if (!rcu_gp_in_progress()) {  | 
|---|
 | 2882 | +			rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);  | 
|---|
| 3065 | 2883 |  		} else { | 
|---|
| 3066 | 2884 |  			/* Give the grace period a kick. */ | 
|---|
| 3067 |  | -			rdp->blimit = LONG_MAX;  | 
|---|
| 3068 |  | -			if (rsp->n_force_qs == rdp->n_force_qs_snap &&  | 
|---|
 | 2885 | +			rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;  | 
|---|
 | 2886 | +			if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap &&  | 
|---|
| 3069 | 2887 |  			    rcu_segcblist_first_pend_cb(&rdp->cblist) != head) | 
|---|
| 3070 |  | -				force_quiescent_state(rsp);  | 
|---|
| 3071 |  | -			rdp->n_force_qs_snap = rsp->n_force_qs;  | 
|---|
 | 2888 | +				rcu_force_quiescent_state();  | 
|---|
 | 2889 | +			rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);  | 
|---|
| 3072 | 2890 |  			rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); | 
|---|
| 3073 | 2891 |  		} | 
|---|
| 3074 | 2892 |  	} | 
|---|
| .. | .. | 
|---|
| 3082 | 2900 |  } | 
|---|
| 3083 | 2901 |   | 
|---|
| 3084 | 2902 |  /* | 
|---|
| 3085 |  | - * Helper function for call_rcu() and friends.  The cpu argument will  | 
|---|
| 3086 |  | - * normally be -1, indicating "currently running CPU".  It may specify  | 
|---|
| 3087 |  | - * a CPU only if that CPU is a no-CBs CPU.  Currently, only _rcu_barrier()  | 
|---|
| 3088 |  | - * is expected to specify a CPU.  | 
|---|
 | 2903 | + * Check and if necessary update the leaf rcu_node structure's  | 
|---|
 | 2904 | + * ->cbovldmask bit corresponding to the current CPU based on that CPU's  | 
|---|
 | 2905 | + * number of queued RCU callbacks.  The caller must hold the leaf rcu_node  | 
|---|
 | 2906 | + * structure's ->lock.  | 
|---|
| 3089 | 2907 |   */ | 
|---|
 | 2908 | +static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp)  | 
|---|
 | 2909 | +{  | 
|---|
 | 2910 | +	raw_lockdep_assert_held_rcu_node(rnp);  | 
|---|
 | 2911 | +	if (qovld_calc <= 0)  | 
|---|
 | 2912 | +		return; // Early boot and wildcard value set.  | 
|---|
 | 2913 | +	if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc)  | 
|---|
 | 2914 | +		WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask);  | 
|---|
 | 2915 | +	else  | 
|---|
 | 2916 | +		WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask);  | 
|---|
 | 2917 | +}  | 
|---|
 | 2918 | +  | 
|---|
 | 2919 | +/*  | 
|---|
 | 2920 | + * Check and if necessary update the leaf rcu_node structure's  | 
|---|
 | 2921 | + * ->cbovldmask bit corresponding to the current CPU based on that CPU's  | 
|---|
 | 2922 | + * number of queued RCU callbacks.  No locks need be held, but the  | 
|---|
 | 2923 | + * caller must have disabled interrupts.  | 
|---|
 | 2924 | + *  | 
|---|
 | 2925 | + * Note that this function ignores the possibility that there are a lot  | 
|---|
 | 2926 | + * of callbacks all of which have already seen the end of their respective  | 
|---|
 | 2927 | + * grace periods.  This omission is due to the need for no-CBs CPUs to  | 
|---|
 | 2928 | + * be holding ->nocb_lock to do this check, which is too heavy for a  | 
|---|
 | 2929 | + * common-case operation.  | 
|---|
 | 2930 | + */  | 
|---|
 | 2931 | +static void check_cb_ovld(struct rcu_data *rdp)  | 
|---|
 | 2932 | +{  | 
|---|
 | 2933 | +	struct rcu_node *const rnp = rdp->mynode;  | 
|---|
 | 2934 | +  | 
|---|
 | 2935 | +	if (qovld_calc <= 0 ||  | 
|---|
 | 2936 | +	    ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) ==  | 
|---|
 | 2937 | +	     !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask)))  | 
|---|
 | 2938 | +		return; // Early boot wildcard value or already set correctly.  | 
|---|
 | 2939 | +	raw_spin_lock_rcu_node(rnp);  | 
|---|
 | 2940 | +	check_cb_ovld_locked(rdp, rnp);  | 
|---|
 | 2941 | +	raw_spin_unlock_rcu_node(rnp);  | 
|---|
 | 2942 | +}  | 
|---|
 | 2943 | +  | 
|---|
 | 2944 | +/* Helper function for call_rcu() and friends.  */  | 
|---|
| 3090 | 2945 |  static void | 
|---|
| 3091 |  | -__call_rcu(struct rcu_head *head, rcu_callback_t func,  | 
|---|
| 3092 |  | -	   struct rcu_state *rsp, int cpu, bool lazy)  | 
|---|
 | 2946 | +__call_rcu(struct rcu_head *head, rcu_callback_t func)  | 
|---|
| 3093 | 2947 |  { | 
|---|
| 3094 | 2948 |  	unsigned long flags; | 
|---|
| 3095 | 2949 |  	struct rcu_data *rdp; | 
|---|
 | 2950 | +	bool was_alldone;  | 
|---|
| 3096 | 2951 |   | 
|---|
| 3097 | 2952 |  	/* Misaligned rcu_head! */ | 
|---|
| 3098 | 2953 |  	WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); | 
|---|
| .. | .. | 
|---|
| 3103 | 2958 |  		 * Use rcu:rcu_callback trace event to find the previous | 
|---|
| 3104 | 2959 |  		 * time callback was passed to __call_rcu(). | 
|---|
| 3105 | 2960 |  		 */ | 
|---|
| 3106 |  | -		WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n",  | 
|---|
 | 2961 | +		WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pS()!!!\n",  | 
|---|
| 3107 | 2962 |  			  head, head->func); | 
|---|
| 3108 | 2963 |  		WRITE_ONCE(head->func, rcu_leak_callback); | 
|---|
| 3109 | 2964 |  		return; | 
|---|
| .. | .. | 
|---|
| 3111 | 2966 |  	head->func = func; | 
|---|
| 3112 | 2967 |  	head->next = NULL; | 
|---|
| 3113 | 2968 |  	local_irq_save(flags); | 
|---|
| 3114 |  | -	rdp = this_cpu_ptr(rsp->rda);  | 
|---|
 | 2969 | +	kasan_record_aux_stack(head);  | 
|---|
 | 2970 | +	rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 3115 | 2971 |   | 
|---|
| 3116 | 2972 |  	/* Add the callback to our list. */ | 
|---|
| 3117 |  | -	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) {  | 
|---|
| 3118 |  | -		int offline;  | 
|---|
| 3119 |  | -  | 
|---|
| 3120 |  | -		if (cpu != -1)  | 
|---|
| 3121 |  | -			rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3122 |  | -		if (likely(rdp->mynode)) {  | 
|---|
| 3123 |  | -			/* Post-boot, so this should be for a no-CBs CPU. */  | 
|---|
| 3124 |  | -			offline = !__call_rcu_nocb(rdp, head, lazy, flags);  | 
|---|
| 3125 |  | -			WARN_ON_ONCE(offline);  | 
|---|
| 3126 |  | -			/* Offline CPU, _call_rcu() illegal, leak callback.  */  | 
|---|
| 3127 |  | -			local_irq_restore(flags);  | 
|---|
| 3128 |  | -			return;  | 
|---|
| 3129 |  | -		}  | 
|---|
| 3130 |  | -		/*  | 
|---|
| 3131 |  | -		 * Very early boot, before rcu_init().  Initialize if needed  | 
|---|
| 3132 |  | -		 * and then drop through to queue the callback.  | 
|---|
| 3133 |  | -		 */  | 
|---|
| 3134 |  | -		BUG_ON(cpu != -1);  | 
|---|
 | 2973 | +	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {  | 
|---|
 | 2974 | +		// This can trigger due to call_rcu() from offline CPU:  | 
|---|
 | 2975 | +		WARN_ON_ONCE(rcu_scheduler_active != RCU_SCHEDULER_INACTIVE);  | 
|---|
| 3135 | 2976 |  		WARN_ON_ONCE(!rcu_is_watching()); | 
|---|
 | 2977 | +		// Very early boot, before rcu_init().  Initialize if needed  | 
|---|
 | 2978 | +		// and then drop through to queue the callback.  | 
|---|
| 3136 | 2979 |  		if (rcu_segcblist_empty(&rdp->cblist)) | 
|---|
| 3137 | 2980 |  			rcu_segcblist_init(&rdp->cblist); | 
|---|
| 3138 | 2981 |  	} | 
|---|
| 3139 |  | -	rcu_segcblist_enqueue(&rdp->cblist, head, lazy);  | 
|---|
| 3140 |  | -	if (!lazy)  | 
|---|
| 3141 |  | -		rcu_idle_count_callbacks_posted();  | 
|---|
| 3142 | 2982 |   | 
|---|
| 3143 |  | -	if (__is_kfree_rcu_offset((unsigned long)func))  | 
|---|
| 3144 |  | -		trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,  | 
|---|
| 3145 |  | -					 rcu_segcblist_n_lazy_cbs(&rdp->cblist),  | 
|---|
 | 2983 | +	check_cb_ovld(rdp);  | 
|---|
 | 2984 | +	if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))  | 
|---|
 | 2985 | +		return; // Enqueued onto ->nocb_bypass, so just leave.  | 
|---|
 | 2986 | +	// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.  | 
|---|
 | 2987 | +	rcu_segcblist_enqueue(&rdp->cblist, head);  | 
|---|
 | 2988 | +	if (__is_kvfree_rcu_offset((unsigned long)func))  | 
|---|
 | 2989 | +		trace_rcu_kvfree_callback(rcu_state.name, head,  | 
|---|
 | 2990 | +					 (unsigned long)func,  | 
|---|
| 3146 | 2991 |  					 rcu_segcblist_n_cbs(&rdp->cblist)); | 
|---|
| 3147 | 2992 |  	else | 
|---|
| 3148 |  | -		trace_rcu_callback(rsp->name, head,  | 
|---|
| 3149 |  | -				   rcu_segcblist_n_lazy_cbs(&rdp->cblist),  | 
|---|
 | 2993 | +		trace_rcu_callback(rcu_state.name, head,  | 
|---|
| 3150 | 2994 |  				   rcu_segcblist_n_cbs(&rdp->cblist)); | 
|---|
| 3151 | 2995 |   | 
|---|
| 3152 | 2996 |  	/* Go handle any RCU core processing required. */ | 
|---|
| 3153 |  | -	__call_rcu_core(rsp, rdp, head, flags);  | 
|---|
 | 2997 | +	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&  | 
|---|
 | 2998 | +	    unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {  | 
|---|
 | 2999 | +		__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */  | 
|---|
 | 3000 | +	} else {  | 
|---|
 | 3001 | +		__call_rcu_core(rdp, head, flags);  | 
|---|
 | 3002 | +		local_irq_restore(flags);  | 
|---|
 | 3003 | +	}  | 
|---|
 | 3004 | +}  | 
|---|
 | 3005 | +  | 
|---|
 | 3006 | +/**  | 
|---|
 | 3007 | + * call_rcu() - Queue an RCU callback for invocation after a grace period.  | 
|---|
 | 3008 | + * @head: structure to be used for queueing the RCU updates.  | 
|---|
 | 3009 | + * @func: actual callback function to be invoked after the grace period  | 
|---|
 | 3010 | + *  | 
|---|
 | 3011 | + * The callback function will be invoked some time after a full grace  | 
|---|
 | 3012 | + * period elapses, in other words after all pre-existing RCU read-side  | 
|---|
 | 3013 | + * critical sections have completed.  However, the callback function  | 
|---|
 | 3014 | + * might well execute concurrently with RCU read-side critical sections  | 
|---|
 | 3015 | + * that started after call_rcu() was invoked.  RCU read-side critical  | 
|---|
 | 3016 | + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), and  | 
|---|
 | 3017 | + * may be nested.  In addition, regions of code across which interrupts,  | 
|---|
 | 3018 | + * preemption, or softirqs have been disabled also serve as RCU read-side  | 
|---|
 | 3019 | + * critical sections.  This includes hardware interrupt handlers, softirq  | 
|---|
 | 3020 | + * handlers, and NMI handlers.  | 
|---|
 | 3021 | + *  | 
|---|
 | 3022 | + * Note that all CPUs must agree that the grace period extended beyond  | 
|---|
 | 3023 | + * all pre-existing RCU read-side critical section.  On systems with more  | 
|---|
 | 3024 | + * than one CPU, this means that when "func()" is invoked, each CPU is  | 
|---|
 | 3025 | + * guaranteed to have executed a full memory barrier since the end of its  | 
|---|
 | 3026 | + * last RCU read-side critical section whose beginning preceded the call  | 
|---|
 | 3027 | + * to call_rcu().  It also means that each CPU executing an RCU read-side  | 
|---|
 | 3028 | + * critical section that continues beyond the start of "func()" must have  | 
|---|
 | 3029 | + * executed a memory barrier after the call_rcu() but before the beginning  | 
|---|
 | 3030 | + * of that RCU read-side critical section.  Note that these guarantees  | 
|---|
 | 3031 | + * include CPUs that are offline, idle, or executing in user mode, as  | 
|---|
 | 3032 | + * well as CPUs that are executing in the kernel.  | 
|---|
 | 3033 | + *  | 
|---|
 | 3034 | + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the  | 
|---|
 | 3035 | + * resulting RCU callback function "func()", then both CPU A and CPU B are  | 
|---|
 | 3036 | + * guaranteed to execute a full memory barrier during the time interval  | 
|---|
 | 3037 | + * between the call to call_rcu() and the invocation of "func()" -- even  | 
|---|
 | 3038 | + * if CPU A and CPU B are the same CPU (but again only if the system has  | 
|---|
 | 3039 | + * more than one CPU).  | 
|---|
 | 3040 | + */  | 
|---|
 | 3041 | +void call_rcu(struct rcu_head *head, rcu_callback_t func)  | 
|---|
 | 3042 | +{  | 
|---|
 | 3043 | +	__call_rcu(head, func);  | 
|---|
 | 3044 | +}  | 
|---|
 | 3045 | +EXPORT_SYMBOL_GPL(call_rcu);  | 
|---|
 | 3046 | +  | 
|---|
 | 3047 | +  | 
|---|
 | 3048 | +/* Maximum number of jiffies to wait before draining a batch. */  | 
|---|
 | 3049 | +#define KFREE_DRAIN_JIFFIES (HZ / 50)  | 
|---|
 | 3050 | +#define KFREE_N_BATCHES 2  | 
|---|
 | 3051 | +#define FREE_N_CHANNELS 2  | 
|---|
 | 3052 | +  | 
|---|
 | 3053 | +/**  | 
|---|
 | 3054 | + * struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers  | 
|---|
 | 3055 | + * @nr_records: Number of active pointers in the array  | 
|---|
 | 3056 | + * @next: Next bulk object in the block chain  | 
|---|
 | 3057 | + * @records: Array of the kvfree_rcu() pointers  | 
|---|
 | 3058 | + */  | 
|---|
 | 3059 | +struct kvfree_rcu_bulk_data {  | 
|---|
 | 3060 | +	unsigned long nr_records;  | 
|---|
 | 3061 | +	struct kvfree_rcu_bulk_data *next;  | 
|---|
 | 3062 | +	void *records[];  | 
|---|
 | 3063 | +};  | 
|---|
 | 3064 | +  | 
|---|
 | 3065 | +/*  | 
|---|
 | 3066 | + * This macro defines how many entries the "records" array  | 
|---|
 | 3067 | + * will contain. It is based on the fact that the size of  | 
|---|
 | 3068 | + * kvfree_rcu_bulk_data structure becomes exactly one page.  | 
|---|
 | 3069 | + */  | 
|---|
 | 3070 | +#define KVFREE_BULK_MAX_ENTR \  | 
|---|
 | 3071 | +	((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))  | 
|---|
 | 3072 | +  | 
|---|
 | 3073 | +/**  | 
|---|
 | 3074 | + * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests  | 
|---|
 | 3075 | + * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period  | 
|---|
 | 3076 | + * @head_free: List of kfree_rcu() objects waiting for a grace period  | 
|---|
 | 3077 | + * @bkvhead_free: Bulk-List of kvfree_rcu() objects waiting for a grace period  | 
|---|
 | 3078 | + * @krcp: Pointer to @kfree_rcu_cpu structure  | 
|---|
 | 3079 | + */  | 
|---|
 | 3080 | +  | 
|---|
 | 3081 | +struct kfree_rcu_cpu_work {  | 
|---|
 | 3082 | +	struct rcu_work rcu_work;  | 
|---|
 | 3083 | +	struct rcu_head *head_free;  | 
|---|
 | 3084 | +	struct kvfree_rcu_bulk_data *bkvhead_free[FREE_N_CHANNELS];  | 
|---|
 | 3085 | +	struct kfree_rcu_cpu *krcp;  | 
|---|
 | 3086 | +};  | 
|---|
 | 3087 | +  | 
|---|
 | 3088 | +/**  | 
|---|
 | 3089 | + * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period  | 
|---|
 | 3090 | + * @head: List of kfree_rcu() objects not yet waiting for a grace period  | 
|---|
 | 3091 | + * @bkvhead: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period  | 
|---|
 | 3092 | + * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period  | 
|---|
 | 3093 | + * @lock: Synchronize access to this structure  | 
|---|
 | 3094 | + * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES  | 
|---|
 | 3095 | + * @monitor_todo: Tracks whether a @monitor_work delayed work is pending  | 
|---|
 | 3096 | + * @initialized: The @rcu_work fields have been initialized  | 
|---|
 | 3097 | + * @count: Number of objects for which GP not started  | 
|---|
 | 3098 | + * @bkvcache:  | 
|---|
 | 3099 | + *	A simple cache list that contains objects for reuse purpose.  | 
|---|
 | 3100 | + *	In order to save some per-cpu space the list is singular.  | 
|---|
 | 3101 | + *	Even though it is lockless an access has to be protected by the  | 
|---|
 | 3102 | + *	per-cpu lock.  | 
|---|
 | 3103 | + * @page_cache_work: A work to refill the cache when it is empty  | 
|---|
 | 3104 | + * @work_in_progress: Indicates that page_cache_work is running  | 
|---|
 | 3105 | + * @hrtimer: A hrtimer for scheduling a page_cache_work  | 
|---|
 | 3106 | + * @nr_bkv_objs: number of allocated objects at @bkvcache.  | 
|---|
 | 3107 | + *  | 
|---|
 | 3108 | + * This is a per-CPU structure.  The reason that it is not included in  | 
|---|
 | 3109 | + * the rcu_data structure is to permit this code to be extracted from  | 
|---|
 | 3110 | + * the RCU files.  Such extraction could allow further optimization of  | 
|---|
 | 3111 | + * the interactions with the slab allocators.  | 
|---|
 | 3112 | + */  | 
|---|
 | 3113 | +struct kfree_rcu_cpu {  | 
|---|
 | 3114 | +	struct rcu_head *head;  | 
|---|
 | 3115 | +	struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS];  | 
|---|
 | 3116 | +	struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];  | 
|---|
 | 3117 | +	raw_spinlock_t lock;  | 
|---|
 | 3118 | +	struct delayed_work monitor_work;  | 
|---|
 | 3119 | +	bool monitor_todo;  | 
|---|
 | 3120 | +	bool initialized;  | 
|---|
 | 3121 | +	int count;  | 
|---|
 | 3122 | +  | 
|---|
 | 3123 | +	struct work_struct page_cache_work;  | 
|---|
 | 3124 | +	atomic_t work_in_progress;  | 
|---|
 | 3125 | +	struct hrtimer hrtimer;  | 
|---|
 | 3126 | +  | 
|---|
 | 3127 | +	struct llist_head bkvcache;  | 
|---|
 | 3128 | +	int nr_bkv_objs;  | 
|---|
 | 3129 | +};  | 
|---|
 | 3130 | +  | 
|---|
 | 3131 | +static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = {  | 
|---|
 | 3132 | +	.lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),  | 
|---|
 | 3133 | +};  | 
|---|
 | 3134 | +  | 
|---|
 | 3135 | +static __always_inline void  | 
|---|
 | 3136 | +debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data *bhead)  | 
|---|
 | 3137 | +{  | 
|---|
 | 3138 | +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD  | 
|---|
 | 3139 | +	int i;  | 
|---|
 | 3140 | +  | 
|---|
 | 3141 | +	for (i = 0; i < bhead->nr_records; i++)  | 
|---|
 | 3142 | +		debug_rcu_head_unqueue((struct rcu_head *)(bhead->records[i]));  | 
|---|
 | 3143 | +#endif  | 
|---|
 | 3144 | +}  | 
|---|
 | 3145 | +  | 
|---|
 | 3146 | +static inline struct kfree_rcu_cpu *  | 
|---|
 | 3147 | +krc_this_cpu_lock(unsigned long *flags)  | 
|---|
 | 3148 | +{  | 
|---|
 | 3149 | +	struct kfree_rcu_cpu *krcp;  | 
|---|
 | 3150 | +  | 
|---|
 | 3151 | +	local_irq_save(*flags);	// For safely calling this_cpu_ptr().  | 
|---|
 | 3152 | +	krcp = this_cpu_ptr(&krc);  | 
|---|
 | 3153 | +	raw_spin_lock(&krcp->lock);  | 
|---|
 | 3154 | +  | 
|---|
 | 3155 | +	return krcp;  | 
|---|
 | 3156 | +}  | 
|---|
 | 3157 | +  | 
|---|
 | 3158 | +static inline void  | 
|---|
 | 3159 | +krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags)  | 
|---|
 | 3160 | +{  | 
|---|
 | 3161 | +	raw_spin_unlock(&krcp->lock);  | 
|---|
| 3154 | 3162 |  	local_irq_restore(flags); | 
|---|
| 3155 | 3163 |  } | 
|---|
| 3156 | 3164 |   | 
|---|
| 3157 |  | -/**  | 
|---|
| 3158 |  | - * call_rcu_sched() - Queue an RCU for invocation after sched grace period.  | 
|---|
| 3159 |  | - * @head: structure to be used for queueing the RCU updates.  | 
|---|
| 3160 |  | - * @func: actual callback function to be invoked after the grace period  | 
|---|
| 3161 |  | - *  | 
|---|
| 3162 |  | - * The callback function will be invoked some time after a full grace  | 
|---|
| 3163 |  | - * period elapses, in other words after all currently executing RCU  | 
|---|
| 3164 |  | - * read-side critical sections have completed. call_rcu_sched() assumes  | 
|---|
| 3165 |  | - * that the read-side critical sections end on enabling of preemption  | 
|---|
| 3166 |  | - * or on voluntary preemption.  | 
|---|
| 3167 |  | - * RCU read-side critical sections are delimited by:  | 
|---|
| 3168 |  | - *  | 
|---|
| 3169 |  | - * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR  | 
|---|
| 3170 |  | - * - anything that disables preemption.  | 
|---|
| 3171 |  | - *  | 
|---|
| 3172 |  | - *  These may be nested.  | 
|---|
| 3173 |  | - *  | 
|---|
| 3174 |  | - * See the description of call_rcu() for more detailed information on  | 
|---|
| 3175 |  | - * memory ordering guarantees.  | 
|---|
| 3176 |  | - */  | 
|---|
| 3177 |  | -void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)  | 
|---|
 | 3165 | +static inline struct kvfree_rcu_bulk_data *  | 
|---|
 | 3166 | +get_cached_bnode(struct kfree_rcu_cpu *krcp)  | 
|---|
| 3178 | 3167 |  { | 
|---|
| 3179 |  | -	__call_rcu(head, func, &rcu_sched_state, -1, 0);  | 
|---|
| 3180 |  | -}  | 
|---|
| 3181 |  | -EXPORT_SYMBOL_GPL(call_rcu_sched);  | 
|---|
 | 3168 | +	if (!krcp->nr_bkv_objs)  | 
|---|
 | 3169 | +		return NULL;  | 
|---|
| 3182 | 3170 |   | 
|---|
| 3183 |  | -#ifndef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 3184 |  | -/**  | 
|---|
| 3185 |  | - * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.  | 
|---|
| 3186 |  | - * @head: structure to be used for queueing the RCU updates.  | 
|---|
| 3187 |  | - * @func: actual callback function to be invoked after the grace period  | 
|---|
| 3188 |  | - *  | 
|---|
| 3189 |  | - * The callback function will be invoked some time after a full grace  | 
|---|
| 3190 |  | - * period elapses, in other words after all currently executing RCU  | 
|---|
| 3191 |  | - * read-side critical sections have completed. call_rcu_bh() assumes  | 
|---|
| 3192 |  | - * that the read-side critical sections end on completion of a softirq  | 
|---|
| 3193 |  | - * handler. This means that read-side critical sections in process  | 
|---|
| 3194 |  | - * context must not be interrupted by softirqs. This interface is to be  | 
|---|
| 3195 |  | - * used when most of the read-side critical sections are in softirq context.  | 
|---|
| 3196 |  | - * RCU read-side critical sections are delimited by:  | 
|---|
| 3197 |  | - *  | 
|---|
| 3198 |  | - * - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context, OR  | 
|---|
| 3199 |  | - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.  | 
|---|
| 3200 |  | - *  | 
|---|
| 3201 |  | - * These may be nested.  | 
|---|
| 3202 |  | - *  | 
|---|
| 3203 |  | - * See the description of call_rcu() for more detailed information on  | 
|---|
| 3204 |  | - * memory ordering guarantees.  | 
|---|
| 3205 |  | - */  | 
|---|
| 3206 |  | -void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)  | 
|---|
| 3207 |  | -{  | 
|---|
| 3208 |  | -	__call_rcu(head, func, &rcu_bh_state, -1, 0);  | 
|---|
 | 3171 | +	krcp->nr_bkv_objs--;  | 
|---|
 | 3172 | +	return (struct kvfree_rcu_bulk_data *)  | 
|---|
 | 3173 | +		llist_del_first(&krcp->bkvcache);  | 
|---|
| 3209 | 3174 |  } | 
|---|
| 3210 |  | -EXPORT_SYMBOL_GPL(call_rcu_bh);  | 
|---|
| 3211 |  | -#endif  | 
|---|
 | 3175 | +  | 
|---|
 | 3176 | +static inline bool  | 
|---|
 | 3177 | +put_cached_bnode(struct kfree_rcu_cpu *krcp,  | 
|---|
 | 3178 | +	struct kvfree_rcu_bulk_data *bnode)  | 
|---|
 | 3179 | +{  | 
|---|
 | 3180 | +	// Check the limit.  | 
|---|
 | 3181 | +	if (krcp->nr_bkv_objs >= rcu_min_cached_objs)  | 
|---|
 | 3182 | +		return false;  | 
|---|
 | 3183 | +  | 
|---|
 | 3184 | +	llist_add((struct llist_node *) bnode, &krcp->bkvcache);  | 
|---|
 | 3185 | +	krcp->nr_bkv_objs++;  | 
|---|
 | 3186 | +	return true;  | 
|---|
 | 3187 | +  | 
|---|
 | 3188 | +}  | 
|---|
| 3212 | 3189 |   | 
|---|
| 3213 | 3190 |  /* | 
|---|
| 3214 |  | - * Queue an RCU callback for lazy invocation after a grace period.  | 
|---|
| 3215 |  | - * This will likely be later named something like "call_rcu_lazy()",  | 
|---|
| 3216 |  | - * but this change will require some way of tagging the lazy RCU  | 
|---|
| 3217 |  | - * callbacks in the list of pending callbacks. Until then, this  | 
|---|
| 3218 |  | - * function may only be called from __kfree_rcu().  | 
|---|
 | 3191 | + * This function is invoked in workqueue context after a grace period.  | 
|---|
 | 3192 | + * It frees all the objects queued on ->bhead_free or ->head_free.  | 
|---|
| 3219 | 3193 |   */ | 
|---|
| 3220 |  | -void kfree_call_rcu(struct rcu_head *head,  | 
|---|
| 3221 |  | -		    rcu_callback_t func)  | 
|---|
 | 3194 | +static void kfree_rcu_work(struct work_struct *work)  | 
|---|
| 3222 | 3195 |  { | 
|---|
| 3223 |  | -	__call_rcu(head, func, rcu_state_p, -1, 1);  | 
|---|
 | 3196 | +	unsigned long flags;  | 
|---|
 | 3197 | +	struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS], *bnext;  | 
|---|
 | 3198 | +	struct rcu_head *head, *next;  | 
|---|
 | 3199 | +	struct kfree_rcu_cpu *krcp;  | 
|---|
 | 3200 | +	struct kfree_rcu_cpu_work *krwp;  | 
|---|
 | 3201 | +	int i, j;  | 
|---|
 | 3202 | +  | 
|---|
 | 3203 | +	krwp = container_of(to_rcu_work(work),  | 
|---|
 | 3204 | +			    struct kfree_rcu_cpu_work, rcu_work);  | 
|---|
 | 3205 | +	krcp = krwp->krcp;  | 
|---|
 | 3206 | +  | 
|---|
 | 3207 | +	raw_spin_lock_irqsave(&krcp->lock, flags);  | 
|---|
 | 3208 | +	// Channels 1 and 2.  | 
|---|
 | 3209 | +	for (i = 0; i < FREE_N_CHANNELS; i++) {  | 
|---|
 | 3210 | +		bkvhead[i] = krwp->bkvhead_free[i];  | 
|---|
 | 3211 | +		krwp->bkvhead_free[i] = NULL;  | 
|---|
 | 3212 | +	}  | 
|---|
 | 3213 | +  | 
|---|
 | 3214 | +	// Channel 3.  | 
|---|
 | 3215 | +	head = krwp->head_free;  | 
|---|
 | 3216 | +	krwp->head_free = NULL;  | 
|---|
 | 3217 | +	raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3218 | +  | 
|---|
 | 3219 | +	// Handle two first channels.  | 
|---|
 | 3220 | +	for (i = 0; i < FREE_N_CHANNELS; i++) {  | 
|---|
 | 3221 | +		for (; bkvhead[i]; bkvhead[i] = bnext) {  | 
|---|
 | 3222 | +			bnext = bkvhead[i]->next;  | 
|---|
 | 3223 | +			debug_rcu_bhead_unqueue(bkvhead[i]);  | 
|---|
 | 3224 | +  | 
|---|
 | 3225 | +			rcu_lock_acquire(&rcu_callback_map);  | 
|---|
 | 3226 | +			if (i == 0) { // kmalloc() / kfree().  | 
|---|
 | 3227 | +				trace_rcu_invoke_kfree_bulk_callback(  | 
|---|
 | 3228 | +					rcu_state.name, bkvhead[i]->nr_records,  | 
|---|
 | 3229 | +					bkvhead[i]->records);  | 
|---|
 | 3230 | +  | 
|---|
 | 3231 | +				kfree_bulk(bkvhead[i]->nr_records,  | 
|---|
 | 3232 | +					bkvhead[i]->records);  | 
|---|
 | 3233 | +			} else { // vmalloc() / vfree().  | 
|---|
 | 3234 | +				for (j = 0; j < bkvhead[i]->nr_records; j++) {  | 
|---|
 | 3235 | +					trace_rcu_invoke_kvfree_callback(  | 
|---|
 | 3236 | +						rcu_state.name,  | 
|---|
 | 3237 | +						bkvhead[i]->records[j], 0);  | 
|---|
 | 3238 | +  | 
|---|
 | 3239 | +					vfree(bkvhead[i]->records[j]);  | 
|---|
 | 3240 | +				}  | 
|---|
 | 3241 | +			}  | 
|---|
 | 3242 | +			rcu_lock_release(&rcu_callback_map);  | 
|---|
 | 3243 | +  | 
|---|
 | 3244 | +			raw_spin_lock_irqsave(&krcp->lock, flags);  | 
|---|
 | 3245 | +			if (put_cached_bnode(krcp, bkvhead[i]))  | 
|---|
 | 3246 | +				bkvhead[i] = NULL;  | 
|---|
 | 3247 | +			raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3248 | +  | 
|---|
 | 3249 | +			if (bkvhead[i])  | 
|---|
 | 3250 | +				free_page((unsigned long) bkvhead[i]);  | 
|---|
 | 3251 | +  | 
|---|
 | 3252 | +			cond_resched_tasks_rcu_qs();  | 
|---|
 | 3253 | +		}  | 
|---|
 | 3254 | +	}  | 
|---|
 | 3255 | +  | 
|---|
 | 3256 | +	/*  | 
|---|
 | 3257 | +	 * Emergency case only. It can happen under low memory  | 
|---|
 | 3258 | +	 * condition when an allocation gets failed, so the "bulk"  | 
|---|
 | 3259 | +	 * path can not be temporary maintained.  | 
|---|
 | 3260 | +	 */  | 
|---|
 | 3261 | +	for (; head; head = next) {  | 
|---|
 | 3262 | +		unsigned long offset = (unsigned long)head->func;  | 
|---|
 | 3263 | +		void *ptr = (void *)head - offset;  | 
|---|
 | 3264 | +  | 
|---|
 | 3265 | +		next = head->next;  | 
|---|
 | 3266 | +		debug_rcu_head_unqueue((struct rcu_head *)ptr);  | 
|---|
 | 3267 | +		rcu_lock_acquire(&rcu_callback_map);  | 
|---|
 | 3268 | +		trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);  | 
|---|
 | 3269 | +  | 
|---|
 | 3270 | +		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))  | 
|---|
 | 3271 | +			kvfree(ptr);  | 
|---|
 | 3272 | +  | 
|---|
 | 3273 | +		rcu_lock_release(&rcu_callback_map);  | 
|---|
 | 3274 | +		cond_resched_tasks_rcu_qs();  | 
|---|
 | 3275 | +	}  | 
|---|
| 3224 | 3276 |  } | 
|---|
| 3225 |  | -EXPORT_SYMBOL_GPL(kfree_call_rcu);  | 
|---|
| 3226 | 3277 |   | 
|---|
| 3227 | 3278 |  /* | 
|---|
| 3228 |  | - * Because a context switch is a grace period for RCU-sched and RCU-bh,  | 
|---|
| 3229 |  | - * any blocking grace-period wait automatically implies a grace period  | 
|---|
| 3230 |  | - * if there is only one CPU online at any point time during execution  | 
|---|
| 3231 |  | - * of either synchronize_sched() or synchronize_rcu_bh().  It is OK to  | 
|---|
 | 3279 | + * Schedule the kfree batch RCU work to run in workqueue context after a GP.  | 
|---|
 | 3280 | + *  | 
|---|
 | 3281 | + * This function is invoked by kfree_rcu_monitor() when the KFREE_DRAIN_JIFFIES  | 
|---|
 | 3282 | + * timeout has been reached.  | 
|---|
 | 3283 | + */  | 
|---|
 | 3284 | +static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)  | 
|---|
 | 3285 | +{  | 
|---|
 | 3286 | +	struct kfree_rcu_cpu_work *krwp;  | 
|---|
 | 3287 | +	bool repeat = false;  | 
|---|
 | 3288 | +	int i, j;  | 
|---|
 | 3289 | +  | 
|---|
 | 3290 | +	lockdep_assert_held(&krcp->lock);  | 
|---|
 | 3291 | +  | 
|---|
 | 3292 | +	for (i = 0; i < KFREE_N_BATCHES; i++) {  | 
|---|
 | 3293 | +		krwp = &(krcp->krw_arr[i]);  | 
|---|
 | 3294 | +  | 
|---|
 | 3295 | +		/*  | 
|---|
 | 3296 | +		 * Try to detach bkvhead or head and attach it over any  | 
|---|
 | 3297 | +		 * available corresponding free channel. It can be that  | 
|---|
 | 3298 | +		 * a previous RCU batch is in progress, it means that  | 
|---|
 | 3299 | +		 * immediately to queue another one is not possible so  | 
|---|
 | 3300 | +		 * return false to tell caller to retry.  | 
|---|
 | 3301 | +		 */  | 
|---|
 | 3302 | +		if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||  | 
|---|
 | 3303 | +			(krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||  | 
|---|
 | 3304 | +				(krcp->head && !krwp->head_free)) {  | 
|---|
 | 3305 | +			// Channel 1 corresponds to SLAB ptrs.  | 
|---|
 | 3306 | +			// Channel 2 corresponds to vmalloc ptrs.  | 
|---|
 | 3307 | +			for (j = 0; j < FREE_N_CHANNELS; j++) {  | 
|---|
 | 3308 | +				if (!krwp->bkvhead_free[j]) {  | 
|---|
 | 3309 | +					krwp->bkvhead_free[j] = krcp->bkvhead[j];  | 
|---|
 | 3310 | +					krcp->bkvhead[j] = NULL;  | 
|---|
 | 3311 | +				}  | 
|---|
 | 3312 | +			}  | 
|---|
 | 3313 | +  | 
|---|
 | 3314 | +			// Channel 3 corresponds to emergency path.  | 
|---|
 | 3315 | +			if (!krwp->head_free) {  | 
|---|
 | 3316 | +				krwp->head_free = krcp->head;  | 
|---|
 | 3317 | +				krcp->head = NULL;  | 
|---|
 | 3318 | +			}  | 
|---|
 | 3319 | +  | 
|---|
 | 3320 | +			WRITE_ONCE(krcp->count, 0);  | 
|---|
 | 3321 | +  | 
|---|
 | 3322 | +			/*  | 
|---|
 | 3323 | +			 * One work is per one batch, so there are three  | 
|---|
 | 3324 | +			 * "free channels", the batch can handle. It can  | 
|---|
 | 3325 | +			 * be that the work is in the pending state when  | 
|---|
 | 3326 | +			 * channels have been detached following by each  | 
|---|
 | 3327 | +			 * other.  | 
|---|
 | 3328 | +			 */  | 
|---|
 | 3329 | +			queue_rcu_work(system_wq, &krwp->rcu_work);  | 
|---|
 | 3330 | +		}  | 
|---|
 | 3331 | +  | 
|---|
 | 3332 | +		// Repeat if any "free" corresponding channel is still busy.  | 
|---|
 | 3333 | +		if (krcp->bkvhead[0] || krcp->bkvhead[1] || krcp->head)  | 
|---|
 | 3334 | +			repeat = true;  | 
|---|
 | 3335 | +	}  | 
|---|
 | 3336 | +  | 
|---|
 | 3337 | +	return !repeat;  | 
|---|
 | 3338 | +}  | 
|---|
 | 3339 | +  | 
|---|
 | 3340 | +static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,  | 
|---|
 | 3341 | +					  unsigned long flags)  | 
|---|
 | 3342 | +{  | 
|---|
 | 3343 | +	// Attempt to start a new batch.  | 
|---|
 | 3344 | +	krcp->monitor_todo = false;  | 
|---|
 | 3345 | +	if (queue_kfree_rcu_work(krcp)) {  | 
|---|
 | 3346 | +		// Success! Our job is done here.  | 
|---|
 | 3347 | +		raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3348 | +		return;  | 
|---|
 | 3349 | +	}  | 
|---|
 | 3350 | +  | 
|---|
 | 3351 | +	// Previous RCU batch still in progress, try again later.  | 
|---|
 | 3352 | +	krcp->monitor_todo = true;  | 
|---|
 | 3353 | +	schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);  | 
|---|
 | 3354 | +	raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3355 | +}  | 
|---|
 | 3356 | +  | 
|---|
 | 3357 | +/*  | 
|---|
 | 3358 | + * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.  | 
|---|
 | 3359 | + * It invokes kfree_rcu_drain_unlock() to attempt to start another batch.  | 
|---|
 | 3360 | + */  | 
|---|
 | 3361 | +static void kfree_rcu_monitor(struct work_struct *work)  | 
|---|
 | 3362 | +{  | 
|---|
 | 3363 | +	unsigned long flags;  | 
|---|
 | 3364 | +	struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu,  | 
|---|
 | 3365 | +						 monitor_work.work);  | 
|---|
 | 3366 | +  | 
|---|
 | 3367 | +	raw_spin_lock_irqsave(&krcp->lock, flags);  | 
|---|
 | 3368 | +	if (krcp->monitor_todo)  | 
|---|
 | 3369 | +		kfree_rcu_drain_unlock(krcp, flags);  | 
|---|
 | 3370 | +	else  | 
|---|
 | 3371 | +		raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3372 | +}  | 
|---|
 | 3373 | +  | 
|---|
 | 3374 | +static enum hrtimer_restart  | 
|---|
 | 3375 | +schedule_page_work_fn(struct hrtimer *t)  | 
|---|
 | 3376 | +{  | 
|---|
 | 3377 | +	struct kfree_rcu_cpu *krcp =  | 
|---|
 | 3378 | +		container_of(t, struct kfree_rcu_cpu, hrtimer);  | 
|---|
 | 3379 | +  | 
|---|
 | 3380 | +	queue_work(system_highpri_wq, &krcp->page_cache_work);  | 
|---|
 | 3381 | +	return HRTIMER_NORESTART;  | 
|---|
 | 3382 | +}  | 
|---|
 | 3383 | +  | 
|---|
 | 3384 | +static void fill_page_cache_func(struct work_struct *work)  | 
|---|
 | 3385 | +{  | 
|---|
 | 3386 | +	struct kvfree_rcu_bulk_data *bnode;  | 
|---|
 | 3387 | +	struct kfree_rcu_cpu *krcp =  | 
|---|
 | 3388 | +		container_of(work, struct kfree_rcu_cpu,  | 
|---|
 | 3389 | +			page_cache_work);  | 
|---|
 | 3390 | +	unsigned long flags;  | 
|---|
 | 3391 | +	bool pushed;  | 
|---|
 | 3392 | +	int i;  | 
|---|
 | 3393 | +  | 
|---|
 | 3394 | +	for (i = 0; i < rcu_min_cached_objs; i++) {  | 
|---|
 | 3395 | +		bnode = (struct kvfree_rcu_bulk_data *)  | 
|---|
 | 3396 | +			__get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);  | 
|---|
 | 3397 | +  | 
|---|
 | 3398 | +		if (!bnode)  | 
|---|
 | 3399 | +			break;  | 
|---|
 | 3400 | +  | 
|---|
 | 3401 | +		raw_spin_lock_irqsave(&krcp->lock, flags);  | 
|---|
 | 3402 | +		pushed = put_cached_bnode(krcp, bnode);  | 
|---|
 | 3403 | +		raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3404 | +  | 
|---|
 | 3405 | +		if (!pushed) {  | 
|---|
 | 3406 | +			free_page((unsigned long) bnode);  | 
|---|
 | 3407 | +			break;  | 
|---|
 | 3408 | +		}  | 
|---|
 | 3409 | +	}  | 
|---|
 | 3410 | +  | 
|---|
 | 3411 | +	atomic_set(&krcp->work_in_progress, 0);  | 
|---|
 | 3412 | +}  | 
|---|
 | 3413 | +  | 
|---|
 | 3414 | +static void  | 
|---|
 | 3415 | +run_page_cache_worker(struct kfree_rcu_cpu *krcp)  | 
|---|
 | 3416 | +{  | 
|---|
 | 3417 | +	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&  | 
|---|
 | 3418 | +			!atomic_xchg(&krcp->work_in_progress, 1)) {  | 
|---|
 | 3419 | +		hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,  | 
|---|
 | 3420 | +			HRTIMER_MODE_REL);  | 
|---|
 | 3421 | +		krcp->hrtimer.function = schedule_page_work_fn;  | 
|---|
 | 3422 | +		hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);  | 
|---|
 | 3423 | +	}  | 
|---|
 | 3424 | +}  | 
|---|
 | 3425 | +  | 
|---|
 | 3426 | +static inline bool  | 
|---|
 | 3427 | +kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)  | 
|---|
 | 3428 | +{  | 
|---|
 | 3429 | +	struct kvfree_rcu_bulk_data *bnode;  | 
|---|
 | 3430 | +	int idx;  | 
|---|
 | 3431 | +  | 
|---|
 | 3432 | +	if (unlikely(!krcp->initialized))  | 
|---|
 | 3433 | +		return false;  | 
|---|
 | 3434 | +  | 
|---|
 | 3435 | +	lockdep_assert_held(&krcp->lock);  | 
|---|
 | 3436 | +	idx = !!is_vmalloc_addr(ptr);  | 
|---|
 | 3437 | +  | 
|---|
 | 3438 | +	/* Check if a new block is required. */  | 
|---|
 | 3439 | +	if (!krcp->bkvhead[idx] ||  | 
|---|
 | 3440 | +			krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {  | 
|---|
 | 3441 | +		bnode = get_cached_bnode(krcp);  | 
|---|
 | 3442 | +		/* Switch to emergency path. */  | 
|---|
 | 3443 | +		if (!bnode)  | 
|---|
 | 3444 | +			return false;  | 
|---|
 | 3445 | +  | 
|---|
 | 3446 | +		/* Initialize the new block. */  | 
|---|
 | 3447 | +		bnode->nr_records = 0;  | 
|---|
 | 3448 | +		bnode->next = krcp->bkvhead[idx];  | 
|---|
 | 3449 | +  | 
|---|
 | 3450 | +		/* Attach it to the head. */  | 
|---|
 | 3451 | +		krcp->bkvhead[idx] = bnode;  | 
|---|
 | 3452 | +	}  | 
|---|
 | 3453 | +  | 
|---|
 | 3454 | +	/* Finally insert. */  | 
|---|
 | 3455 | +	krcp->bkvhead[idx]->records  | 
|---|
 | 3456 | +		[krcp->bkvhead[idx]->nr_records++] = ptr;  | 
|---|
 | 3457 | +  | 
|---|
 | 3458 | +	return true;  | 
|---|
 | 3459 | +}  | 
|---|
 | 3460 | +  | 
|---|
 | 3461 | +/*  | 
|---|
 | 3462 | + * Queue a request for lazy invocation of appropriate free routine after a  | 
|---|
 | 3463 | + * grace period. Please note there are three paths are maintained, two are the  | 
|---|
 | 3464 | + * main ones that use array of pointers interface and third one is emergency  | 
|---|
 | 3465 | + * one, that is used only when the main path can not be maintained temporary,  | 
|---|
 | 3466 | + * due to memory pressure.  | 
|---|
 | 3467 | + *  | 
|---|
 | 3468 | + * Each kvfree_call_rcu() request is added to a batch. The batch will be drained  | 
|---|
 | 3469 | + * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will  | 
|---|
 | 3470 | + * be free'd in workqueue context. This allows us to: batch requests together to  | 
|---|
 | 3471 | + * reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load.  | 
|---|
 | 3472 | + */  | 
|---|
 | 3473 | +void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)  | 
|---|
 | 3474 | +{  | 
|---|
 | 3475 | +	unsigned long flags;  | 
|---|
 | 3476 | +	struct kfree_rcu_cpu *krcp;  | 
|---|
 | 3477 | +	bool success;  | 
|---|
 | 3478 | +	void *ptr;  | 
|---|
 | 3479 | +  | 
|---|
 | 3480 | +	if (head) {  | 
|---|
 | 3481 | +		ptr = (void *) head - (unsigned long) func;  | 
|---|
 | 3482 | +	} else {  | 
|---|
 | 3483 | +		/*  | 
|---|
 | 3484 | +		 * Please note there is a limitation for the head-less  | 
|---|
 | 3485 | +		 * variant, that is why there is a clear rule for such  | 
|---|
 | 3486 | +		 * objects: it can be used from might_sleep() context  | 
|---|
 | 3487 | +		 * only. For other places please embed an rcu_head to  | 
|---|
 | 3488 | +		 * your data.  | 
|---|
 | 3489 | +		 */  | 
|---|
 | 3490 | +		might_sleep();  | 
|---|
 | 3491 | +		ptr = (unsigned long *) func;  | 
|---|
 | 3492 | +	}  | 
|---|
 | 3493 | +  | 
|---|
 | 3494 | +	krcp = krc_this_cpu_lock(&flags);  | 
|---|
 | 3495 | +  | 
|---|
 | 3496 | +	// Queue the object but don't yet schedule the batch.  | 
|---|
 | 3497 | +	if (debug_rcu_head_queue(ptr)) {  | 
|---|
 | 3498 | +		// Probable double kfree_rcu(), just leak.  | 
|---|
 | 3499 | +		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",  | 
|---|
 | 3500 | +			  __func__, head);  | 
|---|
 | 3501 | +  | 
|---|
 | 3502 | +		// Mark as success and leave.  | 
|---|
 | 3503 | +		success = true;  | 
|---|
 | 3504 | +		goto unlock_return;  | 
|---|
 | 3505 | +	}  | 
|---|
 | 3506 | +  | 
|---|
 | 3507 | +	success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);  | 
|---|
 | 3508 | +	if (!success) {  | 
|---|
 | 3509 | +		run_page_cache_worker(krcp);  | 
|---|
 | 3510 | +  | 
|---|
 | 3511 | +		if (head == NULL)  | 
|---|
 | 3512 | +			// Inline if kvfree_rcu(one_arg) call.  | 
|---|
 | 3513 | +			goto unlock_return;  | 
|---|
 | 3514 | +  | 
|---|
 | 3515 | +		head->func = func;  | 
|---|
 | 3516 | +		head->next = krcp->head;  | 
|---|
 | 3517 | +		krcp->head = head;  | 
|---|
 | 3518 | +		success = true;  | 
|---|
 | 3519 | +	}  | 
|---|
 | 3520 | +  | 
|---|
 | 3521 | +	WRITE_ONCE(krcp->count, krcp->count + 1);  | 
|---|
 | 3522 | +  | 
|---|
 | 3523 | +	// Set timer to drain after KFREE_DRAIN_JIFFIES.  | 
|---|
 | 3524 | +	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&  | 
|---|
 | 3525 | +	    !krcp->monitor_todo) {  | 
|---|
 | 3526 | +		krcp->monitor_todo = true;  | 
|---|
 | 3527 | +		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);  | 
|---|
 | 3528 | +	}  | 
|---|
 | 3529 | +  | 
|---|
 | 3530 | +unlock_return:  | 
|---|
 | 3531 | +	krc_this_cpu_unlock(krcp, flags);  | 
|---|
 | 3532 | +  | 
|---|
 | 3533 | +	/*  | 
|---|
 | 3534 | +	 * Inline kvfree() after synchronize_rcu(). We can do  | 
|---|
 | 3535 | +	 * it from might_sleep() context only, so the current  | 
|---|
 | 3536 | +	 * CPU can pass the QS state.  | 
|---|
 | 3537 | +	 */  | 
|---|
 | 3538 | +	if (!success) {  | 
|---|
 | 3539 | +		debug_rcu_head_unqueue((struct rcu_head *) ptr);  | 
|---|
 | 3540 | +		synchronize_rcu();  | 
|---|
 | 3541 | +		kvfree(ptr);  | 
|---|
 | 3542 | +	}  | 
|---|
 | 3543 | +}  | 
|---|
 | 3544 | +EXPORT_SYMBOL_GPL(kvfree_call_rcu);  | 
|---|
 | 3545 | +  | 
|---|
 | 3546 | +static unsigned long  | 
|---|
 | 3547 | +kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)  | 
|---|
 | 3548 | +{  | 
|---|
 | 3549 | +	int cpu;  | 
|---|
 | 3550 | +	unsigned long count = 0;  | 
|---|
 | 3551 | +  | 
|---|
 | 3552 | +	/* Snapshot count of all CPUs */  | 
|---|
 | 3553 | +	for_each_possible_cpu(cpu) {  | 
|---|
 | 3554 | +		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);  | 
|---|
 | 3555 | +  | 
|---|
 | 3556 | +		count += READ_ONCE(krcp->count);  | 
|---|
 | 3557 | +	}  | 
|---|
 | 3558 | +  | 
|---|
 | 3559 | +	return count;  | 
|---|
 | 3560 | +}  | 
|---|
 | 3561 | +  | 
|---|
 | 3562 | +static unsigned long  | 
|---|
 | 3563 | +kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)  | 
|---|
 | 3564 | +{  | 
|---|
 | 3565 | +	int cpu, freed = 0;  | 
|---|
 | 3566 | +	unsigned long flags;  | 
|---|
 | 3567 | +  | 
|---|
 | 3568 | +	for_each_possible_cpu(cpu) {  | 
|---|
 | 3569 | +		int count;  | 
|---|
 | 3570 | +		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);  | 
|---|
 | 3571 | +  | 
|---|
 | 3572 | +		count = krcp->count;  | 
|---|
 | 3573 | +		raw_spin_lock_irqsave(&krcp->lock, flags);  | 
|---|
 | 3574 | +		if (krcp->monitor_todo)  | 
|---|
 | 3575 | +			kfree_rcu_drain_unlock(krcp, flags);  | 
|---|
 | 3576 | +		else  | 
|---|
 | 3577 | +			raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3578 | +  | 
|---|
 | 3579 | +		sc->nr_to_scan -= count;  | 
|---|
 | 3580 | +		freed += count;  | 
|---|
 | 3581 | +  | 
|---|
 | 3582 | +		if (sc->nr_to_scan <= 0)  | 
|---|
 | 3583 | +			break;  | 
|---|
 | 3584 | +	}  | 
|---|
 | 3585 | +  | 
|---|
 | 3586 | +	return freed == 0 ? SHRINK_STOP : freed;  | 
|---|
 | 3587 | +}  | 
|---|
 | 3588 | +  | 
|---|
 | 3589 | +static struct shrinker kfree_rcu_shrinker = {  | 
|---|
 | 3590 | +	.count_objects = kfree_rcu_shrink_count,  | 
|---|
 | 3591 | +	.scan_objects = kfree_rcu_shrink_scan,  | 
|---|
 | 3592 | +	.batch = 0,  | 
|---|
 | 3593 | +	.seeks = DEFAULT_SEEKS,  | 
|---|
 | 3594 | +};  | 
|---|
 | 3595 | +  | 
|---|
 | 3596 | +void __init kfree_rcu_scheduler_running(void)  | 
|---|
 | 3597 | +{  | 
|---|
 | 3598 | +	int cpu;  | 
|---|
 | 3599 | +	unsigned long flags;  | 
|---|
 | 3600 | +  | 
|---|
 | 3601 | +	for_each_possible_cpu(cpu) {  | 
|---|
 | 3602 | +		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);  | 
|---|
 | 3603 | +  | 
|---|
 | 3604 | +		raw_spin_lock_irqsave(&krcp->lock, flags);  | 
|---|
 | 3605 | +		if (!krcp->head || krcp->monitor_todo) {  | 
|---|
 | 3606 | +			raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3607 | +			continue;  | 
|---|
 | 3608 | +		}  | 
|---|
 | 3609 | +		krcp->monitor_todo = true;  | 
|---|
 | 3610 | +		schedule_delayed_work_on(cpu, &krcp->monitor_work,  | 
|---|
 | 3611 | +					 KFREE_DRAIN_JIFFIES);  | 
|---|
 | 3612 | +		raw_spin_unlock_irqrestore(&krcp->lock, flags);  | 
|---|
 | 3613 | +	}  | 
|---|
 | 3614 | +}  | 
|---|
 | 3615 | +  | 
|---|
 | 3616 | +/*  | 
|---|
 | 3617 | + * During early boot, any blocking grace-period wait automatically  | 
|---|
 | 3618 | + * implies a grace period.  Later on, this is never the case for PREEMPTION.  | 
|---|
 | 3619 | + *  | 
|---|
 | 3620 | + * Howevr, because a context switch is a grace period for !PREEMPTION, any  | 
|---|
 | 3621 | + * blocking grace-period wait automatically implies a grace period if  | 
|---|
 | 3622 | + * there is only one CPU online at any point time during execution of  | 
|---|
 | 3623 | + * either synchronize_rcu() or synchronize_rcu_expedited().  It is OK to  | 
|---|
| 3232 | 3624 |   * occasionally incorrectly indicate that there are multiple CPUs online | 
|---|
| 3233 |  | - * when there was in fact only one the whole time, as this just adds  | 
|---|
| 3234 |  | - * some overhead: RCU still operates correctly.  | 
|---|
 | 3625 | + * when there was in fact only one the whole time, as this just adds some  | 
|---|
 | 3626 | + * overhead: RCU still operates correctly.  | 
|---|
| 3235 | 3627 |   */ | 
|---|
| 3236 | 3628 |  static int rcu_blocking_is_gp(void) | 
|---|
| 3237 | 3629 |  { | 
|---|
| 3238 | 3630 |  	int ret; | 
|---|
| 3239 | 3631 |   | 
|---|
 | 3632 | +	if (IS_ENABLED(CONFIG_PREEMPTION))  | 
|---|
 | 3633 | +		return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;  | 
|---|
| 3240 | 3634 |  	might_sleep();  /* Check for RCU read-side critical section. */ | 
|---|
| 3241 | 3635 |  	preempt_disable(); | 
|---|
| 3242 | 3636 |  	ret = num_online_cpus() <= 1; | 
|---|
| .. | .. | 
|---|
| 3245 | 3639 |  } | 
|---|
| 3246 | 3640 |   | 
|---|
| 3247 | 3641 |  /** | 
|---|
| 3248 |  | - * synchronize_sched - wait until an rcu-sched grace period has elapsed.  | 
|---|
 | 3642 | + * synchronize_rcu - wait until a grace period has elapsed.  | 
|---|
| 3249 | 3643 |   * | 
|---|
| 3250 |  | - * Control will return to the caller some time after a full rcu-sched  | 
|---|
| 3251 |  | - * grace period has elapsed, in other words after all currently executing  | 
|---|
| 3252 |  | - * rcu-sched read-side critical sections have completed.   These read-side  | 
|---|
| 3253 |  | - * critical sections are delimited by rcu_read_lock_sched() and  | 
|---|
| 3254 |  | - * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),  | 
|---|
| 3255 |  | - * local_irq_disable(), and so on may be used in place of  | 
|---|
| 3256 |  | - * rcu_read_lock_sched().  | 
|---|
| 3257 |  | - *  | 
|---|
| 3258 |  | - * This means that all preempt_disable code sequences, including NMI and  | 
|---|
| 3259 |  | - * non-threaded hardware-interrupt handlers, in progress on entry will  | 
|---|
| 3260 |  | - * have completed before this primitive returns.  However, this does not  | 
|---|
| 3261 |  | - * guarantee that softirq handlers will have completed, since in some  | 
|---|
| 3262 |  | - * kernels, these handlers can run in process context, and can block.  | 
|---|
 | 3644 | + * Control will return to the caller some time after a full grace  | 
|---|
 | 3645 | + * period has elapsed, in other words after all currently executing RCU  | 
|---|
 | 3646 | + * read-side critical sections have completed.  Note, however, that  | 
|---|
 | 3647 | + * upon return from synchronize_rcu(), the caller might well be executing  | 
|---|
 | 3648 | + * concurrently with new RCU read-side critical sections that began while  | 
|---|
 | 3649 | + * synchronize_rcu() was waiting.  RCU read-side critical sections are  | 
|---|
 | 3650 | + * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.  | 
|---|
 | 3651 | + * In addition, regions of code across which interrupts, preemption, or  | 
|---|
 | 3652 | + * softirqs have been disabled also serve as RCU read-side critical  | 
|---|
 | 3653 | + * sections.  This includes hardware interrupt handlers, softirq handlers,  | 
|---|
 | 3654 | + * and NMI handlers.  | 
|---|
| 3263 | 3655 |   * | 
|---|
| 3264 | 3656 |   * Note that this guarantee implies further memory-ordering guarantees. | 
|---|
| 3265 |  | - * On systems with more than one CPU, when synchronize_sched() returns,  | 
|---|
| 3266 |  | - * each CPU is guaranteed to have executed a full memory barrier since the  | 
|---|
| 3267 |  | - * end of its last RCU-sched read-side critical section whose beginning  | 
|---|
| 3268 |  | - * preceded the call to synchronize_sched().  In addition, each CPU having  | 
|---|
 | 3657 | + * On systems with more than one CPU, when synchronize_rcu() returns,  | 
|---|
 | 3658 | + * each CPU is guaranteed to have executed a full memory barrier since  | 
|---|
 | 3659 | + * the end of its last RCU read-side critical section whose beginning  | 
|---|
 | 3660 | + * preceded the call to synchronize_rcu().  In addition, each CPU having  | 
|---|
| 3269 | 3661 |   * an RCU read-side critical section that extends beyond the return from | 
|---|
| 3270 |  | - * synchronize_sched() is guaranteed to have executed a full memory barrier  | 
|---|
| 3271 |  | - * after the beginning of synchronize_sched() and before the beginning of  | 
|---|
 | 3662 | + * synchronize_rcu() is guaranteed to have executed a full memory barrier  | 
|---|
 | 3663 | + * after the beginning of synchronize_rcu() and before the beginning of  | 
|---|
| 3272 | 3664 |   * that RCU read-side critical section.  Note that these guarantees include | 
|---|
| 3273 | 3665 |   * CPUs that are offline, idle, or executing in user mode, as well as CPUs | 
|---|
| 3274 | 3666 |   * that are executing in the kernel. | 
|---|
| 3275 | 3667 |   * | 
|---|
| 3276 |  | - * Furthermore, if CPU A invoked synchronize_sched(), which returned  | 
|---|
 | 3668 | + * Furthermore, if CPU A invoked synchronize_rcu(), which returned  | 
|---|
| 3277 | 3669 |   * to its caller on CPU B, then both CPU A and CPU B are guaranteed | 
|---|
| 3278 | 3670 |   * to have executed a full memory barrier during the execution of | 
|---|
| 3279 |  | - * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but  | 
|---|
 | 3671 | + * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but  | 
|---|
| 3280 | 3672 |   * again only if the system has more than one CPU). | 
|---|
| 3281 | 3673 |   */ | 
|---|
| 3282 |  | -void synchronize_sched(void)  | 
|---|
 | 3674 | +void synchronize_rcu(void)  | 
|---|
| 3283 | 3675 |  { | 
|---|
| 3284 | 3676 |  	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || | 
|---|
| 3285 | 3677 |  			 lock_is_held(&rcu_lock_map) || | 
|---|
| 3286 | 3678 |  			 lock_is_held(&rcu_sched_lock_map), | 
|---|
| 3287 |  | -			 "Illegal synchronize_sched() in RCU-sched read-side critical section");  | 
|---|
 | 3679 | +			 "Illegal synchronize_rcu() in RCU read-side critical section");  | 
|---|
| 3288 | 3680 |  	if (rcu_blocking_is_gp()) | 
|---|
| 3289 | 3681 |  		return; | 
|---|
| 3290 | 3682 |  	if (rcu_gp_is_expedited()) | 
|---|
| 3291 |  | -		synchronize_sched_expedited();  | 
|---|
 | 3683 | +		synchronize_rcu_expedited();  | 
|---|
| 3292 | 3684 |  	else | 
|---|
| 3293 |  | -		wait_rcu_gp(call_rcu_sched);  | 
|---|
 | 3685 | +		wait_rcu_gp(call_rcu);  | 
|---|
| 3294 | 3686 |  } | 
|---|
| 3295 |  | -EXPORT_SYMBOL_GPL(synchronize_sched);  | 
|---|
| 3296 |  | -  | 
|---|
| 3297 |  | -#ifndef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 3298 |  | -/**  | 
|---|
| 3299 |  | - * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.  | 
|---|
| 3300 |  | - *  | 
|---|
| 3301 |  | - * Control will return to the caller some time after a full rcu_bh grace  | 
|---|
| 3302 |  | - * period has elapsed, in other words after all currently executing rcu_bh  | 
|---|
| 3303 |  | - * read-side critical sections have completed.  RCU read-side critical  | 
|---|
| 3304 |  | - * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),  | 
|---|
| 3305 |  | - * and may be nested.  | 
|---|
| 3306 |  | - *  | 
|---|
| 3307 |  | - * See the description of synchronize_sched() for more detailed information  | 
|---|
| 3308 |  | - * on memory ordering guarantees.  | 
|---|
| 3309 |  | - */  | 
|---|
| 3310 |  | -void synchronize_rcu_bh(void)  | 
|---|
| 3311 |  | -{  | 
|---|
| 3312 |  | -	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||  | 
|---|
| 3313 |  | -			 lock_is_held(&rcu_lock_map) ||  | 
|---|
| 3314 |  | -			 lock_is_held(&rcu_sched_lock_map),  | 
|---|
| 3315 |  | -			 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");  | 
|---|
| 3316 |  | -	if (rcu_blocking_is_gp())  | 
|---|
| 3317 |  | -		return;  | 
|---|
| 3318 |  | -	if (rcu_gp_is_expedited())  | 
|---|
| 3319 |  | -		synchronize_rcu_bh_expedited();  | 
|---|
| 3320 |  | -	else  | 
|---|
| 3321 |  | -		wait_rcu_gp(call_rcu_bh);  | 
|---|
| 3322 |  | -}  | 
|---|
| 3323 |  | -EXPORT_SYMBOL_GPL(synchronize_rcu_bh);  | 
|---|
| 3324 |  | -#endif  | 
|---|
 | 3687 | +EXPORT_SYMBOL_GPL(synchronize_rcu);  | 
|---|
| 3325 | 3688 |   | 
|---|
| 3326 | 3689 |  /** | 
|---|
| 3327 | 3690 |   * get_state_synchronize_rcu - Snapshot current RCU state | 
|---|
| .. | .. | 
|---|
| 3337 | 3700 |  	 * before the load from ->gp_seq. | 
|---|
| 3338 | 3701 |  	 */ | 
|---|
| 3339 | 3702 |  	smp_mb();  /* ^^^ */ | 
|---|
| 3340 |  | -	return rcu_seq_snap(&rcu_state_p->gp_seq);  | 
|---|
 | 3703 | +	return rcu_seq_snap(&rcu_state.gp_seq);  | 
|---|
| 3341 | 3704 |  } | 
|---|
| 3342 | 3705 |  EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); | 
|---|
| 3343 | 3706 |   | 
|---|
| .. | .. | 
|---|
| 3357 | 3720 |   */ | 
|---|
| 3358 | 3721 |  void cond_synchronize_rcu(unsigned long oldstate) | 
|---|
| 3359 | 3722 |  { | 
|---|
| 3360 |  | -	if (!rcu_seq_done(&rcu_state_p->gp_seq, oldstate))  | 
|---|
 | 3723 | +	if (!rcu_seq_done(&rcu_state.gp_seq, oldstate))  | 
|---|
| 3361 | 3724 |  		synchronize_rcu(); | 
|---|
| 3362 | 3725 |  	else | 
|---|
| 3363 | 3726 |  		smp_mb(); /* Ensure GP ends before subsequent accesses. */ | 
|---|
| 3364 | 3727 |  } | 
|---|
| 3365 | 3728 |  EXPORT_SYMBOL_GPL(cond_synchronize_rcu); | 
|---|
| 3366 | 3729 |   | 
|---|
| 3367 |  | -/**  | 
|---|
| 3368 |  | - * get_state_synchronize_sched - Snapshot current RCU-sched state  | 
|---|
| 3369 |  | - *  | 
|---|
| 3370 |  | - * Returns a cookie that is used by a later call to cond_synchronize_sched()  | 
|---|
| 3371 |  | - * to determine whether or not a full grace period has elapsed in the  | 
|---|
| 3372 |  | - * meantime.  | 
|---|
| 3373 |  | - */  | 
|---|
| 3374 |  | -unsigned long get_state_synchronize_sched(void)  | 
|---|
| 3375 |  | -{  | 
|---|
| 3376 |  | -	/*  | 
|---|
| 3377 |  | -	 * Any prior manipulation of RCU-protected data must happen  | 
|---|
| 3378 |  | -	 * before the load from ->gp_seq.  | 
|---|
| 3379 |  | -	 */  | 
|---|
| 3380 |  | -	smp_mb();  /* ^^^ */  | 
|---|
| 3381 |  | -	return rcu_seq_snap(&rcu_sched_state.gp_seq);  | 
|---|
| 3382 |  | -}  | 
|---|
| 3383 |  | -EXPORT_SYMBOL_GPL(get_state_synchronize_sched);  | 
|---|
| 3384 |  | -  | 
|---|
| 3385 |  | -/**  | 
|---|
| 3386 |  | - * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period  | 
|---|
| 3387 |  | - *  | 
|---|
| 3388 |  | - * @oldstate: return value from earlier call to get_state_synchronize_sched()  | 
|---|
| 3389 |  | - *  | 
|---|
| 3390 |  | - * If a full RCU-sched grace period has elapsed since the earlier call to  | 
|---|
| 3391 |  | - * get_state_synchronize_sched(), just return.  Otherwise, invoke  | 
|---|
| 3392 |  | - * synchronize_sched() to wait for a full grace period.  | 
|---|
| 3393 |  | - *  | 
|---|
| 3394 |  | - * Yes, this function does not take counter wrap into account.  But  | 
|---|
| 3395 |  | - * counter wrap is harmless.  If the counter wraps, we have waited for  | 
|---|
| 3396 |  | - * more than 2 billion grace periods (and way more on a 64-bit system!),  | 
|---|
| 3397 |  | - * so waiting for one additional grace period should be just fine.  | 
|---|
| 3398 |  | - */  | 
|---|
| 3399 |  | -void cond_synchronize_sched(unsigned long oldstate)  | 
|---|
| 3400 |  | -{  | 
|---|
| 3401 |  | -	if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate))  | 
|---|
| 3402 |  | -		synchronize_sched();  | 
|---|
| 3403 |  | -	else  | 
|---|
| 3404 |  | -		smp_mb(); /* Ensure GP ends before subsequent accesses. */  | 
|---|
| 3405 |  | -}  | 
|---|
| 3406 |  | -EXPORT_SYMBOL_GPL(cond_synchronize_sched);  | 
|---|
| 3407 |  | -  | 
|---|
| 3408 | 3730 |  /* | 
|---|
| 3409 |  | - * Check to see if there is any immediate RCU-related work to be done  | 
|---|
| 3410 |  | - * by the current CPU, for the specified type of RCU, returning 1 if so.  | 
|---|
| 3411 |  | - * The checks are in order of increasing expense: checks that can be  | 
|---|
| 3412 |  | - * carried out against CPU-local state are performed first.  However,  | 
|---|
| 3413 |  | - * we must check for CPU stalls first, else we might not get a chance.  | 
|---|
 | 3731 | + * Check to see if there is any immediate RCU-related work to be done by  | 
|---|
 | 3732 | + * the current CPU, returning 1 if so and zero otherwise.  The checks are  | 
|---|
 | 3733 | + * in order of increasing expense: checks that can be carried out against  | 
|---|
 | 3734 | + * CPU-local state are performed first.  However, we must check for CPU  | 
|---|
 | 3735 | + * stalls first, else we might not get a chance.  | 
|---|
| 3414 | 3736 |   */ | 
|---|
| 3415 |  | -static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)  | 
|---|
 | 3737 | +static int rcu_pending(int user)  | 
|---|
| 3416 | 3738 |  { | 
|---|
 | 3739 | +	bool gp_in_progress;  | 
|---|
 | 3740 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 3417 | 3741 |  	struct rcu_node *rnp = rdp->mynode; | 
|---|
| 3418 | 3742 |   | 
|---|
| 3419 |  | -	/* Check for CPU stalls, if enabled. */  | 
|---|
| 3420 |  | -	check_cpu_stall(rsp, rdp);  | 
|---|
 | 3743 | +	lockdep_assert_irqs_disabled();  | 
|---|
| 3421 | 3744 |   | 
|---|
| 3422 |  | -	/* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */  | 
|---|
| 3423 |  | -	if (rcu_nohz_full_cpu(rsp))  | 
|---|
 | 3745 | +	/* Check for CPU stalls, if enabled. */  | 
|---|
 | 3746 | +	check_cpu_stall(rdp);  | 
|---|
 | 3747 | +  | 
|---|
 | 3748 | +	/* Does this CPU need a deferred NOCB wakeup? */  | 
|---|
 | 3749 | +	if (rcu_nocb_need_deferred_wakeup(rdp))  | 
|---|
 | 3750 | +		return 1;  | 
|---|
 | 3751 | +  | 
|---|
 | 3752 | +	/* Is this a nohz_full CPU in userspace or idle?  (Ignore RCU if so.) */  | 
|---|
 | 3753 | +	if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu())  | 
|---|
| 3424 | 3754 |  		return 0; | 
|---|
| 3425 | 3755 |   | 
|---|
| 3426 | 3756 |  	/* Is the RCU core waiting for a quiescent state from this CPU? */ | 
|---|
| 3427 |  | -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)  | 
|---|
 | 3757 | +	gp_in_progress = rcu_gp_in_progress();  | 
|---|
 | 3758 | +	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress)  | 
|---|
| 3428 | 3759 |  		return 1; | 
|---|
| 3429 | 3760 |   | 
|---|
| 3430 | 3761 |  	/* Does this CPU have callbacks ready to invoke? */ | 
|---|
| .. | .. | 
|---|
| 3432 | 3763 |  		return 1; | 
|---|
| 3433 | 3764 |   | 
|---|
| 3434 | 3765 |  	/* Has RCU gone idle with this CPU needing another grace period? */ | 
|---|
| 3435 |  | -	if (!rcu_gp_in_progress(rsp) &&  | 
|---|
| 3436 |  | -	    rcu_segcblist_is_enabled(&rdp->cblist) &&  | 
|---|
 | 3766 | +	if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&  | 
|---|
 | 3767 | +	    (!IS_ENABLED(CONFIG_RCU_NOCB_CPU) ||  | 
|---|
 | 3768 | +	     !rcu_segcblist_is_offloaded(&rdp->cblist)) &&  | 
|---|
| 3437 | 3769 |  	    !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) | 
|---|
| 3438 | 3770 |  		return 1; | 
|---|
| 3439 | 3771 |   | 
|---|
| .. | .. | 
|---|
| 3442 | 3774 |  	    unlikely(READ_ONCE(rdp->gpwrap))) /* outside lock */ | 
|---|
| 3443 | 3775 |  		return 1; | 
|---|
| 3444 | 3776 |   | 
|---|
| 3445 |  | -	/* Does this CPU need a deferred NOCB wakeup? */  | 
|---|
| 3446 |  | -	if (rcu_nocb_need_deferred_wakeup(rdp))  | 
|---|
| 3447 |  | -		return 1;  | 
|---|
| 3448 |  | -  | 
|---|
| 3449 | 3777 |  	/* nothing to do */ | 
|---|
| 3450 | 3778 |  	return 0; | 
|---|
| 3451 | 3779 |  } | 
|---|
| 3452 | 3780 |   | 
|---|
| 3453 | 3781 |  /* | 
|---|
| 3454 |  | - * Check to see if there is any immediate RCU-related work to be done  | 
|---|
| 3455 |  | - * by the current CPU, returning 1 if so.  This function is part of the  | 
|---|
| 3456 |  | - * RCU implementation; it is -not- an exported member of the RCU API.  | 
|---|
| 3457 |  | - */  | 
|---|
| 3458 |  | -static int rcu_pending(void)  | 
|---|
| 3459 |  | -{  | 
|---|
| 3460 |  | -	struct rcu_state *rsp;  | 
|---|
| 3461 |  | -  | 
|---|
| 3462 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 3463 |  | -		if (__rcu_pending(rsp, this_cpu_ptr(rsp->rda)))  | 
|---|
| 3464 |  | -			return 1;  | 
|---|
| 3465 |  | -	return 0;  | 
|---|
| 3466 |  | -}  | 
|---|
| 3467 |  | -  | 
|---|
| 3468 |  | -/*  | 
|---|
| 3469 |  | - * Return true if the specified CPU has any callback.  If all_lazy is  | 
|---|
| 3470 |  | - * non-NULL, store an indication of whether all callbacks are lazy.  | 
|---|
| 3471 |  | - * (If there are no callbacks, all of them are deemed to be lazy.)  | 
|---|
| 3472 |  | - */  | 
|---|
| 3473 |  | -static bool rcu_cpu_has_callbacks(bool *all_lazy)  | 
|---|
| 3474 |  | -{  | 
|---|
| 3475 |  | -	bool al = true;  | 
|---|
| 3476 |  | -	bool hc = false;  | 
|---|
| 3477 |  | -	struct rcu_data *rdp;  | 
|---|
| 3478 |  | -	struct rcu_state *rsp;  | 
|---|
| 3479 |  | -  | 
|---|
| 3480 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 3481 |  | -		rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 3482 |  | -		if (rcu_segcblist_empty(&rdp->cblist))  | 
|---|
| 3483 |  | -			continue;  | 
|---|
| 3484 |  | -		hc = true;  | 
|---|
| 3485 |  | -		if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) {  | 
|---|
| 3486 |  | -			al = false;  | 
|---|
| 3487 |  | -			break;  | 
|---|
| 3488 |  | -		}  | 
|---|
| 3489 |  | -	}  | 
|---|
| 3490 |  | -	if (all_lazy)  | 
|---|
| 3491 |  | -		*all_lazy = al;  | 
|---|
| 3492 |  | -	return hc;  | 
|---|
| 3493 |  | -}  | 
|---|
| 3494 |  | -  | 
|---|
| 3495 |  | -/*  | 
|---|
| 3496 |  | - * Helper function for _rcu_barrier() tracing.  If tracing is disabled,  | 
|---|
 | 3782 | + * Helper function for rcu_barrier() tracing.  If tracing is disabled,  | 
|---|
| 3497 | 3783 |   * the compiler is expected to optimize this away. | 
|---|
| 3498 | 3784 |   */ | 
|---|
| 3499 |  | -static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,  | 
|---|
| 3500 |  | -			       int cpu, unsigned long done)  | 
|---|
 | 3785 | +static void rcu_barrier_trace(const char *s, int cpu, unsigned long done)  | 
|---|
| 3501 | 3786 |  { | 
|---|
| 3502 |  | -	trace_rcu_barrier(rsp->name, s, cpu,  | 
|---|
| 3503 |  | -			  atomic_read(&rsp->barrier_cpu_count), done);  | 
|---|
 | 3787 | +	trace_rcu_barrier(rcu_state.name, s, cpu,  | 
|---|
 | 3788 | +			  atomic_read(&rcu_state.barrier_cpu_count), done);  | 
|---|
| 3504 | 3789 |  } | 
|---|
| 3505 | 3790 |   | 
|---|
| 3506 | 3791 |  /* | 
|---|
| 3507 |  | - * RCU callback function for _rcu_barrier().  If we are last, wake  | 
|---|
| 3508 |  | - * up the task executing _rcu_barrier().  | 
|---|
 | 3792 | + * RCU callback function for rcu_barrier().  If we are last, wake  | 
|---|
 | 3793 | + * up the task executing rcu_barrier().  | 
|---|
 | 3794 | + *  | 
|---|
 | 3795 | + * Note that the value of rcu_state.barrier_sequence must be captured  | 
|---|
 | 3796 | + * before the atomic_dec_and_test().  Otherwise, if this CPU is not last,  | 
|---|
 | 3797 | + * other CPUs might count the value down to zero before this CPU gets  | 
|---|
 | 3798 | + * around to invoking rcu_barrier_trace(), which might result in bogus  | 
|---|
 | 3799 | + * data from the next instance of rcu_barrier().  | 
|---|
| 3509 | 3800 |   */ | 
|---|
| 3510 | 3801 |  static void rcu_barrier_callback(struct rcu_head *rhp) | 
|---|
| 3511 | 3802 |  { | 
|---|
| 3512 |  | -	struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);  | 
|---|
| 3513 |  | -	struct rcu_state *rsp = rdp->rsp;  | 
|---|
 | 3803 | +	unsigned long __maybe_unused s = rcu_state.barrier_sequence;  | 
|---|
| 3514 | 3804 |   | 
|---|
| 3515 |  | -	if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {  | 
|---|
| 3516 |  | -		_rcu_barrier_trace(rsp, TPS("LastCB"), -1,  | 
|---|
| 3517 |  | -				   rsp->barrier_sequence);  | 
|---|
| 3518 |  | -		complete(&rsp->barrier_completion);  | 
|---|
 | 3805 | +	if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {  | 
|---|
 | 3806 | +		rcu_barrier_trace(TPS("LastCB"), -1, s);  | 
|---|
 | 3807 | +		complete(&rcu_state.barrier_completion);  | 
|---|
| 3519 | 3808 |  	} else { | 
|---|
| 3520 |  | -		_rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence);  | 
|---|
 | 3809 | +		rcu_barrier_trace(TPS("CB"), -1, s);  | 
|---|
| 3521 | 3810 |  	} | 
|---|
| 3522 | 3811 |  } | 
|---|
| 3523 | 3812 |   | 
|---|
| 3524 | 3813 |  /* | 
|---|
| 3525 | 3814 |   * Called with preemption disabled, and from cross-cpu IRQ context. | 
|---|
| 3526 | 3815 |   */ | 
|---|
| 3527 |  | -static void rcu_barrier_func(void *type)  | 
|---|
 | 3816 | +static void rcu_barrier_func(void *cpu_in)  | 
|---|
| 3528 | 3817 |  { | 
|---|
| 3529 |  | -	struct rcu_state *rsp = type;  | 
|---|
| 3530 |  | -	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);  | 
|---|
 | 3818 | +	uintptr_t cpu = (uintptr_t)cpu_in;  | 
|---|
 | 3819 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 3531 | 3820 |   | 
|---|
| 3532 |  | -	_rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence);  | 
|---|
 | 3821 | +	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);  | 
|---|
| 3533 | 3822 |  	rdp->barrier_head.func = rcu_barrier_callback; | 
|---|
| 3534 | 3823 |  	debug_rcu_head_queue(&rdp->barrier_head); | 
|---|
| 3535 |  | -	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {  | 
|---|
| 3536 |  | -		atomic_inc(&rsp->barrier_cpu_count);  | 
|---|
 | 3824 | +	rcu_nocb_lock(rdp);  | 
|---|
 | 3825 | +	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));  | 
|---|
 | 3826 | +	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {  | 
|---|
 | 3827 | +		atomic_inc(&rcu_state.barrier_cpu_count);  | 
|---|
| 3537 | 3828 |  	} else { | 
|---|
| 3538 | 3829 |  		debug_rcu_head_unqueue(&rdp->barrier_head); | 
|---|
| 3539 |  | -		_rcu_barrier_trace(rsp, TPS("IRQNQ"), -1,  | 
|---|
| 3540 |  | -				   rsp->barrier_sequence);  | 
|---|
 | 3830 | +		rcu_barrier_trace(TPS("IRQNQ"), -1,  | 
|---|
 | 3831 | +				  rcu_state.barrier_sequence);  | 
|---|
| 3541 | 3832 |  	} | 
|---|
 | 3833 | +	rcu_nocb_unlock(rdp);  | 
|---|
| 3542 | 3834 |  } | 
|---|
| 3543 | 3835 |   | 
|---|
| 3544 |  | -/*  | 
|---|
| 3545 |  | - * Orchestrate the specified type of RCU barrier, waiting for all  | 
|---|
| 3546 |  | - * RCU callbacks of the specified type to complete.  | 
|---|
 | 3836 | +/**  | 
|---|
 | 3837 | + * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.  | 
|---|
 | 3838 | + *  | 
|---|
 | 3839 | + * Note that this primitive does not necessarily wait for an RCU grace period  | 
|---|
 | 3840 | + * to complete.  For example, if there are no RCU callbacks queued anywhere  | 
|---|
 | 3841 | + * in the system, then rcu_barrier() is within its rights to return  | 
|---|
 | 3842 | + * immediately, without waiting for anything, much less an RCU grace period.  | 
|---|
| 3547 | 3843 |   */ | 
|---|
| 3548 |  | -static void _rcu_barrier(struct rcu_state *rsp)  | 
|---|
 | 3844 | +void rcu_barrier(void)  | 
|---|
| 3549 | 3845 |  { | 
|---|
| 3550 |  | -	int cpu;  | 
|---|
 | 3846 | +	uintptr_t cpu;  | 
|---|
| 3551 | 3847 |  	struct rcu_data *rdp; | 
|---|
| 3552 |  | -	unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);  | 
|---|
 | 3848 | +	unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);  | 
|---|
| 3553 | 3849 |   | 
|---|
| 3554 |  | -	_rcu_barrier_trace(rsp, TPS("Begin"), -1, s);  | 
|---|
 | 3850 | +	rcu_barrier_trace(TPS("Begin"), -1, s);  | 
|---|
| 3555 | 3851 |   | 
|---|
| 3556 | 3852 |  	/* Take mutex to serialize concurrent rcu_barrier() requests. */ | 
|---|
| 3557 |  | -	mutex_lock(&rsp->barrier_mutex);  | 
|---|
 | 3853 | +	mutex_lock(&rcu_state.barrier_mutex);  | 
|---|
| 3558 | 3854 |   | 
|---|
| 3559 | 3855 |  	/* Did someone else do our work for us? */ | 
|---|
| 3560 |  | -	if (rcu_seq_done(&rsp->barrier_sequence, s)) {  | 
|---|
| 3561 |  | -		_rcu_barrier_trace(rsp, TPS("EarlyExit"), -1,  | 
|---|
| 3562 |  | -				   rsp->barrier_sequence);  | 
|---|
 | 3856 | +	if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {  | 
|---|
 | 3857 | +		rcu_barrier_trace(TPS("EarlyExit"), -1,  | 
|---|
 | 3858 | +				  rcu_state.barrier_sequence);  | 
|---|
| 3563 | 3859 |  		smp_mb(); /* caller's subsequent code after above check. */ | 
|---|
| 3564 |  | -		mutex_unlock(&rsp->barrier_mutex);  | 
|---|
 | 3860 | +		mutex_unlock(&rcu_state.barrier_mutex);  | 
|---|
| 3565 | 3861 |  		return; | 
|---|
| 3566 | 3862 |  	} | 
|---|
| 3567 | 3863 |   | 
|---|
| 3568 | 3864 |  	/* Mark the start of the barrier operation. */ | 
|---|
| 3569 |  | -	rcu_seq_start(&rsp->barrier_sequence);  | 
|---|
| 3570 |  | -	_rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence);  | 
|---|
 | 3865 | +	rcu_seq_start(&rcu_state.barrier_sequence);  | 
|---|
 | 3866 | +	rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);  | 
|---|
| 3571 | 3867 |   | 
|---|
| 3572 | 3868 |  	/* | 
|---|
| 3573 |  | -	 * Initialize the count to one rather than to zero in order to  | 
|---|
| 3574 |  | -	 * avoid a too-soon return to zero in case of a short grace period  | 
|---|
| 3575 |  | -	 * (or preemption of this task).  Exclude CPU-hotplug operations  | 
|---|
| 3576 |  | -	 * to ensure that no offline CPU has callbacks queued.  | 
|---|
 | 3869 | +	 * Initialize the count to two rather than to zero in order  | 
|---|
 | 3870 | +	 * to avoid a too-soon return to zero in case of an immediate  | 
|---|
 | 3871 | +	 * invocation of the just-enqueued callback (or preemption of  | 
|---|
 | 3872 | +	 * this task).  Exclude CPU-hotplug operations to ensure that no  | 
|---|
 | 3873 | +	 * offline non-offloaded CPU has callbacks queued.  | 
|---|
| 3577 | 3874 |  	 */ | 
|---|
| 3578 |  | -	init_completion(&rsp->barrier_completion);  | 
|---|
| 3579 |  | -	atomic_set(&rsp->barrier_cpu_count, 1);  | 
|---|
 | 3875 | +	init_completion(&rcu_state.barrier_completion);  | 
|---|
 | 3876 | +	atomic_set(&rcu_state.barrier_cpu_count, 2);  | 
|---|
| 3580 | 3877 |  	get_online_cpus(); | 
|---|
| 3581 | 3878 |   | 
|---|
| 3582 | 3879 |  	/* | 
|---|
| .. | .. | 
|---|
| 3585 | 3882 |  	 * corresponding CPU's preceding callbacks have been invoked. | 
|---|
| 3586 | 3883 |  	 */ | 
|---|
| 3587 | 3884 |  	for_each_possible_cpu(cpu) { | 
|---|
| 3588 |  | -		if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))  | 
|---|
 | 3885 | +		rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 3886 | +		if (cpu_is_offline(cpu) &&  | 
|---|
 | 3887 | +		    !rcu_segcblist_is_offloaded(&rdp->cblist))  | 
|---|
| 3589 | 3888 |  			continue; | 
|---|
| 3590 |  | -		rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3591 |  | -		if (rcu_is_nocb_cpu(cpu)) {  | 
|---|
| 3592 |  | -			if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {  | 
|---|
| 3593 |  | -				_rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu,  | 
|---|
| 3594 |  | -						   rsp->barrier_sequence);  | 
|---|
| 3595 |  | -			} else {  | 
|---|
| 3596 |  | -				_rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu,  | 
|---|
| 3597 |  | -						   rsp->barrier_sequence);  | 
|---|
| 3598 |  | -				smp_mb__before_atomic();  | 
|---|
| 3599 |  | -				atomic_inc(&rsp->barrier_cpu_count);  | 
|---|
| 3600 |  | -				__call_rcu(&rdp->barrier_head,  | 
|---|
| 3601 |  | -					   rcu_barrier_callback, rsp, cpu, 0);  | 
|---|
| 3602 |  | -			}  | 
|---|
| 3603 |  | -		} else if (rcu_segcblist_n_cbs(&rdp->cblist)) {  | 
|---|
| 3604 |  | -			_rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu,  | 
|---|
| 3605 |  | -					   rsp->barrier_sequence);  | 
|---|
| 3606 |  | -			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);  | 
|---|
 | 3889 | +		if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) {  | 
|---|
 | 3890 | +			rcu_barrier_trace(TPS("OnlineQ"), cpu,  | 
|---|
 | 3891 | +					  rcu_state.barrier_sequence);  | 
|---|
 | 3892 | +			smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);  | 
|---|
 | 3893 | +		} else if (rcu_segcblist_n_cbs(&rdp->cblist) &&  | 
|---|
 | 3894 | +			   cpu_is_offline(cpu)) {  | 
|---|
 | 3895 | +			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu,  | 
|---|
 | 3896 | +					  rcu_state.barrier_sequence);  | 
|---|
 | 3897 | +			local_irq_disable();  | 
|---|
 | 3898 | +			rcu_barrier_func((void *)cpu);  | 
|---|
 | 3899 | +			local_irq_enable();  | 
|---|
 | 3900 | +		} else if (cpu_is_offline(cpu)) {  | 
|---|
 | 3901 | +			rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu,  | 
|---|
 | 3902 | +					  rcu_state.barrier_sequence);  | 
|---|
| 3607 | 3903 |  		} else { | 
|---|
| 3608 |  | -			_rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu,  | 
|---|
| 3609 |  | -					   rsp->barrier_sequence);  | 
|---|
 | 3904 | +			rcu_barrier_trace(TPS("OnlineNQ"), cpu,  | 
|---|
 | 3905 | +					  rcu_state.barrier_sequence);  | 
|---|
| 3610 | 3906 |  		} | 
|---|
| 3611 | 3907 |  	} | 
|---|
| 3612 | 3908 |  	put_online_cpus(); | 
|---|
| .. | .. | 
|---|
| 3615 | 3911 |  	 * Now that we have an rcu_barrier_callback() callback on each | 
|---|
| 3616 | 3912 |  	 * CPU, and thus each counted, remove the initial count. | 
|---|
| 3617 | 3913 |  	 */ | 
|---|
| 3618 |  | -	if (atomic_dec_and_test(&rsp->barrier_cpu_count))  | 
|---|
| 3619 |  | -		complete(&rsp->barrier_completion);  | 
|---|
 | 3914 | +	if (atomic_sub_and_test(2, &rcu_state.barrier_cpu_count))  | 
|---|
 | 3915 | +		complete(&rcu_state.barrier_completion);  | 
|---|
| 3620 | 3916 |   | 
|---|
| 3621 | 3917 |  	/* Wait for all rcu_barrier_callback() callbacks to be invoked. */ | 
|---|
| 3622 |  | -	wait_for_completion(&rsp->barrier_completion);  | 
|---|
 | 3918 | +	wait_for_completion(&rcu_state.barrier_completion);  | 
|---|
| 3623 | 3919 |   | 
|---|
| 3624 | 3920 |  	/* Mark the end of the barrier operation. */ | 
|---|
| 3625 |  | -	_rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence);  | 
|---|
| 3626 |  | -	rcu_seq_end(&rsp->barrier_sequence);  | 
|---|
 | 3921 | +	rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);  | 
|---|
 | 3922 | +	rcu_seq_end(&rcu_state.barrier_sequence);  | 
|---|
| 3627 | 3923 |   | 
|---|
| 3628 | 3924 |  	/* Other rcu_barrier() invocations can now safely proceed. */ | 
|---|
| 3629 |  | -	mutex_unlock(&rsp->barrier_mutex);  | 
|---|
 | 3925 | +	mutex_unlock(&rcu_state.barrier_mutex);  | 
|---|
| 3630 | 3926 |  } | 
|---|
| 3631 |  | -  | 
|---|
| 3632 |  | -#ifndef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 3633 |  | -/**  | 
|---|
| 3634 |  | - * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.  | 
|---|
| 3635 |  | - */  | 
|---|
| 3636 |  | -void rcu_barrier_bh(void)  | 
|---|
| 3637 |  | -{  | 
|---|
| 3638 |  | -	_rcu_barrier(&rcu_bh_state);  | 
|---|
| 3639 |  | -}  | 
|---|
| 3640 |  | -EXPORT_SYMBOL_GPL(rcu_barrier_bh);  | 
|---|
| 3641 |  | -#endif  | 
|---|
| 3642 |  | -  | 
|---|
| 3643 |  | -/**  | 
|---|
| 3644 |  | - * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.  | 
|---|
| 3645 |  | - */  | 
|---|
| 3646 |  | -void rcu_barrier_sched(void)  | 
|---|
| 3647 |  | -{  | 
|---|
| 3648 |  | -	_rcu_barrier(&rcu_sched_state);  | 
|---|
| 3649 |  | -}  | 
|---|
| 3650 |  | -EXPORT_SYMBOL_GPL(rcu_barrier_sched);  | 
|---|
 | 3927 | +EXPORT_SYMBOL_GPL(rcu_barrier);  | 
|---|
| 3651 | 3928 |   | 
|---|
| 3652 | 3929 |  /* | 
|---|
| 3653 | 3930 |   * Propagate ->qsinitmask bits up the rcu_node tree to account for the | 
|---|
| .. | .. | 
|---|
| 3681 | 3958 |   * Do boot-time initialization of a CPU's per-CPU RCU data. | 
|---|
| 3682 | 3959 |   */ | 
|---|
| 3683 | 3960 |  static void __init | 
|---|
| 3684 |  | -rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)  | 
|---|
 | 3961 | +rcu_boot_init_percpu_data(int cpu)  | 
|---|
| 3685 | 3962 |  { | 
|---|
| 3686 |  | -	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
 | 3963 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 3687 | 3964 |   | 
|---|
| 3688 | 3965 |  	/* Set up local state, ensuring consistent view of global state. */ | 
|---|
| 3689 | 3966 |  	rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); | 
|---|
| 3690 |  | -	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);  | 
|---|
| 3691 |  | -	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);  | 
|---|
| 3692 |  | -	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks)));  | 
|---|
| 3693 |  | -	rdp->rcu_ofl_gp_seq = rsp->gp_seq;  | 
|---|
 | 3967 | +	INIT_WORK(&rdp->strict_work, strict_work_handler);  | 
|---|
 | 3968 | +	WARN_ON_ONCE(rdp->dynticks_nesting != 1);  | 
|---|
 | 3969 | +	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));  | 
|---|
 | 3970 | +	rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;  | 
|---|
| 3694 | 3971 |  	rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; | 
|---|
| 3695 |  | -	rdp->rcu_onl_gp_seq = rsp->gp_seq;  | 
|---|
 | 3972 | +	rdp->rcu_onl_gp_seq = rcu_state.gp_seq;  | 
|---|
| 3696 | 3973 |  	rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; | 
|---|
| 3697 | 3974 |  	rdp->cpu = cpu; | 
|---|
| 3698 |  | -	rdp->rsp = rsp;  | 
|---|
| 3699 | 3975 |  	rcu_boot_init_nocb_percpu_data(rdp); | 
|---|
| 3700 | 3976 |  } | 
|---|
| 3701 | 3977 |   | 
|---|
| 3702 | 3978 |  /* | 
|---|
| 3703 |  | - * Initialize a CPU's per-CPU RCU data.  Note that only one online or  | 
|---|
 | 3979 | + * Invoked early in the CPU-online process, when pretty much all services  | 
|---|
 | 3980 | + * are available.  The incoming CPU is not present.  | 
|---|
 | 3981 | + *  | 
|---|
 | 3982 | + * Initializes a CPU's per-CPU RCU data.  Note that only one online or  | 
|---|
| 3704 | 3983 |   * offline event can be happening at a given time.  Note also that we can | 
|---|
| 3705 | 3984 |   * accept some slop in the rsp->gp_seq access due to the fact that this | 
|---|
| 3706 |  | - * CPU cannot possibly have any RCU callbacks in flight yet.  | 
|---|
 | 3985 | + * CPU cannot possibly have any non-offloaded RCU callbacks in flight yet.  | 
|---|
 | 3986 | + * And any offloaded callbacks are being numbered elsewhere.  | 
|---|
| 3707 | 3987 |   */ | 
|---|
| 3708 |  | -static void  | 
|---|
| 3709 |  | -rcu_init_percpu_data(int cpu, struct rcu_state *rsp)  | 
|---|
 | 3988 | +int rcutree_prepare_cpu(unsigned int cpu)  | 
|---|
| 3710 | 3989 |  { | 
|---|
| 3711 | 3990 |  	unsigned long flags; | 
|---|
| 3712 |  | -	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3713 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
 | 3991 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 3992 | +	struct rcu_node *rnp = rcu_get_root();  | 
|---|
| 3714 | 3993 |   | 
|---|
| 3715 | 3994 |  	/* Set up local state, ensuring consistent view of global state. */ | 
|---|
| 3716 | 3995 |  	raw_spin_lock_irqsave_rcu_node(rnp, flags); | 
|---|
| 3717 | 3996 |  	rdp->qlen_last_fqs_check = 0; | 
|---|
| 3718 |  | -	rdp->n_force_qs_snap = rsp->n_force_qs;  | 
|---|
 | 3997 | +	rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs);  | 
|---|
| 3719 | 3998 |  	rdp->blimit = blimit; | 
|---|
| 3720 | 3999 |  	if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ | 
|---|
| 3721 |  | -	    !init_nocb_callback_list(rdp))  | 
|---|
 | 4000 | +	    !rcu_segcblist_is_offloaded(&rdp->cblist))  | 
|---|
| 3722 | 4001 |  		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */ | 
|---|
| 3723 |  | -	rdp->dynticks->dynticks_nesting = 1;	/* CPU not up, no tearing. */  | 
|---|
 | 4002 | +	rdp->dynticks_nesting = 1;	/* CPU not up, no tearing. */  | 
|---|
| 3724 | 4003 |  	rcu_dynticks_eqs_online(); | 
|---|
| 3725 | 4004 |  	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */ | 
|---|
| 3726 | 4005 |   | 
|---|
| .. | .. | 
|---|
| 3732 | 4011 |  	rnp = rdp->mynode; | 
|---|
| 3733 | 4012 |  	raw_spin_lock_rcu_node(rnp);		/* irqs already disabled. */ | 
|---|
| 3734 | 4013 |  	rdp->beenonline = true;	 /* We have now been online. */ | 
|---|
| 3735 |  | -	rdp->gp_seq = rnp->gp_seq;  | 
|---|
| 3736 |  | -	rdp->gp_seq_needed = rnp->gp_seq;  | 
|---|
 | 4014 | +	rdp->gp_seq = READ_ONCE(rnp->gp_seq);  | 
|---|
 | 4015 | +	rdp->gp_seq_needed = rdp->gp_seq;  | 
|---|
| 3737 | 4016 |  	rdp->cpu_no_qs.b.norm = true; | 
|---|
| 3738 |  | -	rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);  | 
|---|
| 3739 | 4017 |  	rdp->core_needs_qs = false; | 
|---|
| 3740 | 4018 |  	rdp->rcu_iw_pending = false; | 
|---|
| 3741 |  | -	rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;  | 
|---|
| 3742 |  | -	trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuonl"));  | 
|---|
 | 4019 | +	rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;  | 
|---|
 | 4020 | +	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));  | 
|---|
| 3743 | 4021 |  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 3744 |  | -}  | 
|---|
| 3745 |  | -  | 
|---|
| 3746 |  | -/*  | 
|---|
| 3747 |  | - * Invoked early in the CPU-online process, when pretty much all  | 
|---|
| 3748 |  | - * services are available.  The incoming CPU is not present.  | 
|---|
| 3749 |  | - */  | 
|---|
| 3750 |  | -int rcutree_prepare_cpu(unsigned int cpu)  | 
|---|
| 3751 |  | -{  | 
|---|
| 3752 |  | -	struct rcu_state *rsp;  | 
|---|
| 3753 |  | -  | 
|---|
| 3754 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 3755 |  | -		rcu_init_percpu_data(cpu, rsp);  | 
|---|
| 3756 |  | -  | 
|---|
| 3757 | 4022 |  	rcu_prepare_kthreads(cpu); | 
|---|
| 3758 |  | -	rcu_spawn_all_nocb_kthreads(cpu);  | 
|---|
 | 4023 | +	rcu_spawn_cpu_nocb_kthread(cpu);  | 
|---|
| 3759 | 4024 |   | 
|---|
| 3760 | 4025 |  	return 0; | 
|---|
| 3761 | 4026 |  } | 
|---|
| .. | .. | 
|---|
| 3765 | 4030 |   */ | 
|---|
| 3766 | 4031 |  static void rcutree_affinity_setting(unsigned int cpu, int outgoing) | 
|---|
| 3767 | 4032 |  { | 
|---|
| 3768 |  | -	struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);  | 
|---|
 | 4033 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 3769 | 4034 |   | 
|---|
| 3770 | 4035 |  	rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); | 
|---|
| 3771 | 4036 |  } | 
|---|
| .. | .. | 
|---|
| 3779 | 4044 |  	unsigned long flags; | 
|---|
| 3780 | 4045 |  	struct rcu_data *rdp; | 
|---|
| 3781 | 4046 |  	struct rcu_node *rnp; | 
|---|
| 3782 |  | -	struct rcu_state *rsp;  | 
|---|
| 3783 | 4047 |   | 
|---|
| 3784 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 3785 |  | -		rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3786 |  | -		rnp = rdp->mynode;  | 
|---|
| 3787 |  | -		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 3788 |  | -		rnp->ffmask |= rdp->grpmask;  | 
|---|
| 3789 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 3790 |  | -	}  | 
|---|
 | 4048 | +	rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 4049 | +	rnp = rdp->mynode;  | 
|---|
 | 4050 | +	raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
 | 4051 | +	rnp->ffmask |= rdp->grpmask;  | 
|---|
 | 4052 | +	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 3791 | 4053 |  	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) | 
|---|
| 3792 | 4054 |  		return 0; /* Too early in boot for scheduler work. */ | 
|---|
| 3793 | 4055 |  	sync_sched_exp_online_cleanup(cpu); | 
|---|
| 3794 | 4056 |  	rcutree_affinity_setting(cpu, -1); | 
|---|
 | 4057 | +  | 
|---|
 | 4058 | +	// Stop-machine done, so allow nohz_full to disable tick.  | 
|---|
 | 4059 | +	tick_dep_clear(TICK_DEP_BIT_RCU);  | 
|---|
| 3795 | 4060 |  	return 0; | 
|---|
| 3796 | 4061 |  } | 
|---|
| 3797 | 4062 |   | 
|---|
| .. | .. | 
|---|
| 3804 | 4069 |  	unsigned long flags; | 
|---|
| 3805 | 4070 |  	struct rcu_data *rdp; | 
|---|
| 3806 | 4071 |  	struct rcu_node *rnp; | 
|---|
| 3807 |  | -	struct rcu_state *rsp;  | 
|---|
| 3808 | 4072 |   | 
|---|
| 3809 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 3810 |  | -		rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3811 |  | -		rnp = rdp->mynode;  | 
|---|
| 3812 |  | -		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 3813 |  | -		rnp->ffmask &= ~rdp->grpmask;  | 
|---|
| 3814 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 3815 |  | -	}  | 
|---|
 | 4073 | +	rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 4074 | +	rnp = rdp->mynode;  | 
|---|
 | 4075 | +	raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
 | 4076 | +	rnp->ffmask &= ~rdp->grpmask;  | 
|---|
 | 4077 | +	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 3816 | 4078 |   | 
|---|
| 3817 | 4079 |  	rcutree_affinity_setting(cpu, cpu); | 
|---|
 | 4080 | +  | 
|---|
 | 4081 | +	// nohz_full CPUs need the tick for stop-machine to work quickly  | 
|---|
 | 4082 | +	tick_dep_set(TICK_DEP_BIT_RCU);  | 
|---|
| 3818 | 4083 |  	return 0; | 
|---|
| 3819 | 4084 |  } | 
|---|
| 3820 |  | -  | 
|---|
| 3821 |  | -/*  | 
|---|
| 3822 |  | - * Near the end of the offline process.  We do only tracing here.  | 
|---|
| 3823 |  | - */  | 
|---|
| 3824 |  | -int rcutree_dying_cpu(unsigned int cpu)  | 
|---|
| 3825 |  | -{  | 
|---|
| 3826 |  | -	struct rcu_state *rsp;  | 
|---|
| 3827 |  | -  | 
|---|
| 3828 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 3829 |  | -		rcu_cleanup_dying_cpu(rsp);  | 
|---|
| 3830 |  | -	return 0;  | 
|---|
| 3831 |  | -}  | 
|---|
| 3832 |  | -  | 
|---|
| 3833 |  | -/*  | 
|---|
| 3834 |  | - * The outgoing CPU is gone and we are running elsewhere.  | 
|---|
| 3835 |  | - */  | 
|---|
| 3836 |  | -int rcutree_dead_cpu(unsigned int cpu)  | 
|---|
| 3837 |  | -{  | 
|---|
| 3838 |  | -	struct rcu_state *rsp;  | 
|---|
| 3839 |  | -  | 
|---|
| 3840 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 3841 |  | -		rcu_cleanup_dead_cpu(cpu, rsp);  | 
|---|
| 3842 |  | -		do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu));  | 
|---|
| 3843 |  | -	}  | 
|---|
| 3844 |  | -	return 0;  | 
|---|
| 3845 |  | -}  | 
|---|
| 3846 |  | -  | 
|---|
| 3847 |  | -static DEFINE_PER_CPU(int, rcu_cpu_started);  | 
|---|
| 3848 | 4085 |   | 
|---|
| 3849 | 4086 |  /* | 
|---|
| 3850 | 4087 |   * Mark the specified CPU as being online so that subsequent grace periods | 
|---|
| .. | .. | 
|---|
| 3861 | 4098 |  { | 
|---|
| 3862 | 4099 |  	unsigned long flags; | 
|---|
| 3863 | 4100 |  	unsigned long mask; | 
|---|
| 3864 |  | -	int nbits;  | 
|---|
| 3865 |  | -	unsigned long oldmask;  | 
|---|
| 3866 | 4101 |  	struct rcu_data *rdp; | 
|---|
| 3867 | 4102 |  	struct rcu_node *rnp; | 
|---|
| 3868 |  | -	struct rcu_state *rsp;  | 
|---|
 | 4103 | +	bool newcpu;  | 
|---|
| 3869 | 4104 |   | 
|---|
| 3870 |  | -	if (per_cpu(rcu_cpu_started, cpu))  | 
|---|
 | 4105 | +	rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 4106 | +	if (rdp->cpu_started)  | 
|---|
| 3871 | 4107 |  		return; | 
|---|
 | 4108 | +	rdp->cpu_started = true;  | 
|---|
| 3872 | 4109 |   | 
|---|
| 3873 |  | -	per_cpu(rcu_cpu_started, cpu) = 1;  | 
|---|
| 3874 |  | -  | 
|---|
| 3875 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 3876 |  | -		rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3877 |  | -		rnp = rdp->mynode;  | 
|---|
| 3878 |  | -		mask = rdp->grpmask;  | 
|---|
| 3879 |  | -		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 3880 |  | -		rnp->qsmaskinitnext |= mask;  | 
|---|
| 3881 |  | -		oldmask = rnp->expmaskinitnext;  | 
|---|
| 3882 |  | -		rnp->expmaskinitnext |= mask;  | 
|---|
| 3883 |  | -		oldmask ^= rnp->expmaskinitnext;  | 
|---|
| 3884 |  | -		nbits = bitmap_weight(&oldmask, BITS_PER_LONG);  | 
|---|
| 3885 |  | -		/* Allow lockless access for expedited grace periods. */  | 
|---|
| 3886 |  | -		smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */  | 
|---|
| 3887 |  | -		rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */  | 
|---|
| 3888 |  | -		rdp->rcu_onl_gp_seq = READ_ONCE(rsp->gp_seq);  | 
|---|
| 3889 |  | -		rdp->rcu_onl_gp_flags = READ_ONCE(rsp->gp_flags);  | 
|---|
| 3890 |  | -		if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */  | 
|---|
| 3891 |  | -			/* Report QS -after- changing ->qsmaskinitnext! */  | 
|---|
| 3892 |  | -			rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);  | 
|---|
| 3893 |  | -		} else {  | 
|---|
| 3894 |  | -			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 3895 |  | -		}  | 
|---|
 | 4110 | +	rnp = rdp->mynode;  | 
|---|
 | 4111 | +	mask = rdp->grpmask;  | 
|---|
 | 4112 | +	raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
 | 4113 | +	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);  | 
|---|
 | 4114 | +	newcpu = !(rnp->expmaskinitnext & mask);  | 
|---|
 | 4115 | +	rnp->expmaskinitnext |= mask;  | 
|---|
 | 4116 | +	/* Allow lockless access for expedited grace periods. */  | 
|---|
 | 4117 | +	smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + newcpu); /* ^^^ */  | 
|---|
 | 4118 | +	ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);  | 
|---|
 | 4119 | +	rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */  | 
|---|
 | 4120 | +	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);  | 
|---|
 | 4121 | +	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);  | 
|---|
 | 4122 | +	if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */  | 
|---|
 | 4123 | +		rcu_disable_urgency_upon_qs(rdp);  | 
|---|
 | 4124 | +		/* Report QS -after- changing ->qsmaskinitnext! */  | 
|---|
 | 4125 | +		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  | 
|---|
 | 4126 | +	} else {  | 
|---|
 | 4127 | +		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 3896 | 4128 |  	} | 
|---|
| 3897 | 4129 |  	smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ | 
|---|
| 3898 | 4130 |  } | 
|---|
| 3899 | 4131 |   | 
|---|
| 3900 |  | -#ifdef CONFIG_HOTPLUG_CPU  | 
|---|
| 3901 |  | -/*  | 
|---|
| 3902 |  | - * The CPU is exiting the idle loop into the arch_cpu_idle_dead()  | 
|---|
| 3903 |  | - * function.  We now remove it from the rcu_node tree's ->qsmaskinitnext  | 
|---|
| 3904 |  | - * bit masks.  | 
|---|
| 3905 |  | - */  | 
|---|
| 3906 |  | -static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)  | 
|---|
| 3907 |  | -{  | 
|---|
| 3908 |  | -	unsigned long flags;  | 
|---|
| 3909 |  | -	unsigned long mask;  | 
|---|
| 3910 |  | -	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3911 |  | -	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */  | 
|---|
| 3912 |  | -  | 
|---|
| 3913 |  | -	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */  | 
|---|
| 3914 |  | -	mask = rdp->grpmask;  | 
|---|
| 3915 |  | -	spin_lock(&rsp->ofl_lock);  | 
|---|
| 3916 |  | -	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */  | 
|---|
| 3917 |  | -	rdp->rcu_ofl_gp_seq = READ_ONCE(rsp->gp_seq);  | 
|---|
| 3918 |  | -	rdp->rcu_ofl_gp_flags = READ_ONCE(rsp->gp_flags);  | 
|---|
| 3919 |  | -	if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */  | 
|---|
| 3920 |  | -		/* Report quiescent state -before- changing ->qsmaskinitnext! */  | 
|---|
| 3921 |  | -		rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);  | 
|---|
| 3922 |  | -		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 3923 |  | -	}  | 
|---|
| 3924 |  | -	rnp->qsmaskinitnext &= ~mask;  | 
|---|
| 3925 |  | -	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 3926 |  | -	spin_unlock(&rsp->ofl_lock);  | 
|---|
| 3927 |  | -}  | 
|---|
| 3928 |  | -  | 
|---|
| 3929 | 4132 |  /* | 
|---|
| 3930 | 4133 |   * The outgoing function has no further need of RCU, so remove it from | 
|---|
| 3931 |  | - * the list of CPUs that RCU must track.  | 
|---|
 | 4134 | + * the rcu_node tree's ->qsmaskinitnext bit masks.  | 
|---|
| 3932 | 4135 |   * | 
|---|
| 3933 | 4136 |   * Note that this function is special in that it is invoked directly | 
|---|
| 3934 | 4137 |   * from the outgoing CPU rather than from the cpuhp_step mechanism. | 
|---|
| .. | .. | 
|---|
| 3936 | 4139 |   */ | 
|---|
| 3937 | 4140 |  void rcu_report_dead(unsigned int cpu) | 
|---|
| 3938 | 4141 |  { | 
|---|
| 3939 |  | -	struct rcu_state *rsp;  | 
|---|
 | 4142 | +	unsigned long flags;  | 
|---|
 | 4143 | +	unsigned long mask;  | 
|---|
 | 4144 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 4145 | +	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */  | 
|---|
| 3940 | 4146 |   | 
|---|
| 3941 |  | -	/* QS for any half-done expedited RCU-sched GP. */  | 
|---|
 | 4147 | +	/* QS for any half-done expedited grace period. */  | 
|---|
| 3942 | 4148 |  	preempt_disable(); | 
|---|
| 3943 |  | -	rcu_report_exp_rdp(&rcu_sched_state,  | 
|---|
| 3944 |  | -			   this_cpu_ptr(rcu_sched_state.rda), true);  | 
|---|
 | 4149 | +	rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));  | 
|---|
| 3945 | 4150 |  	preempt_enable(); | 
|---|
| 3946 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 3947 |  | -		rcu_cleanup_dying_idle_cpu(cpu, rsp);  | 
|---|
 | 4151 | +	rcu_preempt_deferred_qs(current);  | 
|---|
| 3948 | 4152 |   | 
|---|
| 3949 |  | -	per_cpu(rcu_cpu_started, cpu) = 0;  | 
|---|
 | 4153 | +	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */  | 
|---|
 | 4154 | +	mask = rdp->grpmask;  | 
|---|
 | 4155 | +	raw_spin_lock(&rcu_state.ofl_lock);  | 
|---|
 | 4156 | +	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */  | 
|---|
 | 4157 | +	rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);  | 
|---|
 | 4158 | +	rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);  | 
|---|
 | 4159 | +	if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */  | 
|---|
 | 4160 | +		/* Report quiescent state -before- changing ->qsmaskinitnext! */  | 
|---|
 | 4161 | +		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);  | 
|---|
 | 4162 | +		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
 | 4163 | +	}  | 
|---|
 | 4164 | +	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);  | 
|---|
 | 4165 | +	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
 | 4166 | +	raw_spin_unlock(&rcu_state.ofl_lock);  | 
|---|
 | 4167 | +  | 
|---|
 | 4168 | +	rdp->cpu_started = false;  | 
|---|
| 3950 | 4169 |  } | 
|---|
| 3951 | 4170 |   | 
|---|
| 3952 |  | -/* Migrate the dead CPU's callbacks to the current CPU. */  | 
|---|
| 3953 |  | -static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)  | 
|---|
 | 4171 | +#ifdef CONFIG_HOTPLUG_CPU  | 
|---|
 | 4172 | +/*  | 
|---|
 | 4173 | + * The outgoing CPU has just passed through the dying-idle state, and we  | 
|---|
 | 4174 | + * are being invoked from the CPU that was IPIed to continue the offline  | 
|---|
 | 4175 | + * operation.  Migrate the outgoing CPU's callbacks to the current CPU.  | 
|---|
 | 4176 | + */  | 
|---|
 | 4177 | +void rcutree_migrate_callbacks(int cpu)  | 
|---|
| 3954 | 4178 |  { | 
|---|
| 3955 | 4179 |  	unsigned long flags; | 
|---|
| 3956 | 4180 |  	struct rcu_data *my_rdp; | 
|---|
| 3957 |  | -	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 3958 |  | -	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);  | 
|---|
 | 4181 | +	struct rcu_node *my_rnp;  | 
|---|
 | 4182 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 3959 | 4183 |  	bool needwake; | 
|---|
| 3960 | 4184 |   | 
|---|
| 3961 |  | -	if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))  | 
|---|
 | 4185 | +	if (rcu_segcblist_is_offloaded(&rdp->cblist) ||  | 
|---|
 | 4186 | +	    rcu_segcblist_empty(&rdp->cblist))  | 
|---|
| 3962 | 4187 |  		return;  /* No callbacks to migrate. */ | 
|---|
| 3963 | 4188 |   | 
|---|
| 3964 | 4189 |  	local_irq_save(flags); | 
|---|
| 3965 |  | -	my_rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 3966 |  | -	if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {  | 
|---|
| 3967 |  | -		local_irq_restore(flags);  | 
|---|
| 3968 |  | -		return;  | 
|---|
| 3969 |  | -	}  | 
|---|
| 3970 |  | -	raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */  | 
|---|
 | 4190 | +	my_rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 4191 | +	my_rnp = my_rdp->mynode;  | 
|---|
 | 4192 | +	rcu_nocb_lock(my_rdp); /* irqs already disabled. */  | 
|---|
 | 4193 | +	WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));  | 
|---|
 | 4194 | +	raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */  | 
|---|
| 3971 | 4195 |  	/* Leverage recent GPs and set GP for new callbacks. */ | 
|---|
| 3972 |  | -	needwake = rcu_advance_cbs(rsp, rnp_root, rdp) ||  | 
|---|
| 3973 |  | -		   rcu_advance_cbs(rsp, rnp_root, my_rdp);  | 
|---|
 | 4196 | +	needwake = rcu_advance_cbs(my_rnp, rdp) ||  | 
|---|
 | 4197 | +		   rcu_advance_cbs(my_rnp, my_rdp);  | 
|---|
| 3974 | 4198 |  	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); | 
|---|
 | 4199 | +	needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);  | 
|---|
 | 4200 | +	rcu_segcblist_disable(&rdp->cblist);  | 
|---|
| 3975 | 4201 |  	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != | 
|---|
| 3976 | 4202 |  		     !rcu_segcblist_n_cbs(&my_rdp->cblist)); | 
|---|
| 3977 |  | -	raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);  | 
|---|
 | 4203 | +	if (rcu_segcblist_is_offloaded(&my_rdp->cblist)) {  | 
|---|
 | 4204 | +		raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */  | 
|---|
 | 4205 | +		__call_rcu_nocb_wake(my_rdp, true, flags);  | 
|---|
 | 4206 | +	} else {  | 
|---|
 | 4207 | +		rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */  | 
|---|
 | 4208 | +		raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags);  | 
|---|
 | 4209 | +	}  | 
|---|
| 3978 | 4210 |  	if (needwake) | 
|---|
| 3979 |  | -		rcu_gp_kthread_wake(rsp);  | 
|---|
 | 4211 | +		rcu_gp_kthread_wake();  | 
|---|
 | 4212 | +	lockdep_assert_irqs_enabled();  | 
|---|
| 3980 | 4213 |  	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || | 
|---|
| 3981 | 4214 |  		  !rcu_segcblist_empty(&rdp->cblist), | 
|---|
| 3982 | 4215 |  		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", | 
|---|
| 3983 | 4216 |  		  cpu, rcu_segcblist_n_cbs(&rdp->cblist), | 
|---|
| 3984 | 4217 |  		  rcu_segcblist_first_cb(&rdp->cblist)); | 
|---|
| 3985 |  | -}  | 
|---|
| 3986 |  | -  | 
|---|
| 3987 |  | -/*  | 
|---|
| 3988 |  | - * The outgoing CPU has just passed through the dying-idle state,  | 
|---|
| 3989 |  | - * and we are being invoked from the CPU that was IPIed to continue the  | 
|---|
| 3990 |  | - * offline operation.  We need to migrate the outgoing CPU's callbacks.  | 
|---|
| 3991 |  | - */  | 
|---|
| 3992 |  | -void rcutree_migrate_callbacks(int cpu)  | 
|---|
| 3993 |  | -{  | 
|---|
| 3994 |  | -	struct rcu_state *rsp;  | 
|---|
| 3995 |  | -  | 
|---|
| 3996 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 3997 |  | -		rcu_migrate_callbacks(cpu, rsp);  | 
|---|
| 3998 | 4218 |  } | 
|---|
| 3999 | 4219 |  #endif | 
|---|
| 4000 | 4220 |   | 
|---|
| .. | .. | 
|---|
| 4008 | 4228 |  	switch (action) { | 
|---|
| 4009 | 4229 |  	case PM_HIBERNATION_PREPARE: | 
|---|
| 4010 | 4230 |  	case PM_SUSPEND_PREPARE: | 
|---|
| 4011 |  | -		if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */  | 
|---|
| 4012 |  | -			rcu_expedite_gp();  | 
|---|
 | 4231 | +		rcu_expedite_gp();  | 
|---|
| 4013 | 4232 |  		break; | 
|---|
| 4014 | 4233 |  	case PM_POST_HIBERNATION: | 
|---|
| 4015 | 4234 |  	case PM_POST_SUSPEND: | 
|---|
| 4016 |  | -		if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */  | 
|---|
| 4017 |  | -			rcu_unexpedite_gp();  | 
|---|
 | 4235 | +		rcu_unexpedite_gp();  | 
|---|
| 4018 | 4236 |  		break; | 
|---|
| 4019 | 4237 |  	default: | 
|---|
| 4020 | 4238 |  		break; | 
|---|
| .. | .. | 
|---|
| 4023 | 4241 |  } | 
|---|
| 4024 | 4242 |   | 
|---|
| 4025 | 4243 |  /* | 
|---|
| 4026 |  | - * Spawn the kthreads that handle each RCU flavor's grace periods.  | 
|---|
 | 4244 | + * Spawn the kthreads that handle RCU's grace periods.  | 
|---|
| 4027 | 4245 |   */ | 
|---|
| 4028 | 4246 |  static int __init rcu_spawn_gp_kthread(void) | 
|---|
| 4029 | 4247 |  { | 
|---|
| 4030 | 4248 |  	unsigned long flags; | 
|---|
| 4031 | 4249 |  	int kthread_prio_in = kthread_prio; | 
|---|
| 4032 | 4250 |  	struct rcu_node *rnp; | 
|---|
| 4033 |  | -	struct rcu_state *rsp;  | 
|---|
| 4034 | 4251 |  	struct sched_param sp; | 
|---|
| 4035 | 4252 |  	struct task_struct *t; | 
|---|
| 4036 | 4253 |   | 
|---|
| .. | .. | 
|---|
| 4050 | 4267 |  			 kthread_prio, kthread_prio_in); | 
|---|
| 4051 | 4268 |   | 
|---|
| 4052 | 4269 |  	rcu_scheduler_fully_active = 1; | 
|---|
| 4053 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 4054 |  | -		t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);  | 
|---|
| 4055 |  | -		BUG_ON(IS_ERR(t));  | 
|---|
| 4056 |  | -		rnp = rcu_get_root(rsp);  | 
|---|
| 4057 |  | -		raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 4058 |  | -		rsp->gp_kthread = t;  | 
|---|
| 4059 |  | -		if (kthread_prio) {  | 
|---|
| 4060 |  | -			sp.sched_priority = kthread_prio;  | 
|---|
| 4061 |  | -			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);  | 
|---|
| 4062 |  | -		}  | 
|---|
| 4063 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 4064 |  | -		wake_up_process(t);  | 
|---|
 | 4270 | +	t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);  | 
|---|
 | 4271 | +	if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))  | 
|---|
 | 4272 | +		return 0;  | 
|---|
 | 4273 | +	if (kthread_prio) {  | 
|---|
 | 4274 | +		sp.sched_priority = kthread_prio;  | 
|---|
 | 4275 | +		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);  | 
|---|
| 4065 | 4276 |  	} | 
|---|
 | 4277 | +	rnp = rcu_get_root();  | 
|---|
 | 4278 | +	raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
 | 4279 | +	WRITE_ONCE(rcu_state.gp_activity, jiffies);  | 
|---|
 | 4280 | +	WRITE_ONCE(rcu_state.gp_req_activity, jiffies);  | 
|---|
 | 4281 | +	// Reset .gp_activity and .gp_req_activity before setting .gp_kthread.  | 
|---|
 | 4282 | +	smp_store_release(&rcu_state.gp_kthread, t);  /* ^^^ */  | 
|---|
 | 4283 | +	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
 | 4284 | +	wake_up_process(t);  | 
|---|
| 4066 | 4285 |  	rcu_spawn_nocb_kthreads(); | 
|---|
| 4067 | 4286 |  	rcu_spawn_boost_kthreads(); | 
|---|
 | 4287 | +	rcu_spawn_core_kthreads();  | 
|---|
| 4068 | 4288 |  	return 0; | 
|---|
| 4069 | 4289 |  } | 
|---|
| 4070 | 4290 |  early_initcall(rcu_spawn_gp_kthread); | 
|---|
| .. | .. | 
|---|
| 4089 | 4309 |  } | 
|---|
| 4090 | 4310 |   | 
|---|
| 4091 | 4311 |  /* | 
|---|
| 4092 |  | - * Helper function for rcu_init() that initializes one rcu_state structure.  | 
|---|
 | 4312 | + * Helper function for rcu_init() that initializes the rcu_state structure.  | 
|---|
| 4093 | 4313 |   */ | 
|---|
| 4094 |  | -static void __init rcu_init_one(struct rcu_state *rsp)  | 
|---|
 | 4314 | +static void __init rcu_init_one(void)  | 
|---|
| 4095 | 4315 |  { | 
|---|
| 4096 | 4316 |  	static const char * const buf[] = RCU_NODE_NAME_INIT; | 
|---|
| 4097 | 4317 |  	static const char * const fqs[] = RCU_FQS_NAME_INIT; | 
|---|
| .. | .. | 
|---|
| 4113 | 4333 |  	/* Initialize the level-tracking arrays. */ | 
|---|
| 4114 | 4334 |   | 
|---|
| 4115 | 4335 |  	for (i = 1; i < rcu_num_lvls; i++) | 
|---|
| 4116 |  | -		rsp->level[i] = rsp->level[i - 1] + num_rcu_lvl[i - 1];  | 
|---|
 | 4336 | +		rcu_state.level[i] =  | 
|---|
 | 4337 | +			rcu_state.level[i - 1] + num_rcu_lvl[i - 1];  | 
|---|
| 4117 | 4338 |  	rcu_init_levelspread(levelspread, num_rcu_lvl); | 
|---|
| 4118 | 4339 |   | 
|---|
| 4119 | 4340 |  	/* Initialize the elements themselves, starting from the leaves. */ | 
|---|
| 4120 | 4341 |   | 
|---|
| 4121 | 4342 |  	for (i = rcu_num_lvls - 1; i >= 0; i--) { | 
|---|
| 4122 | 4343 |  		cpustride *= levelspread[i]; | 
|---|
| 4123 |  | -		rnp = rsp->level[i];  | 
|---|
 | 4344 | +		rnp = rcu_state.level[i];  | 
|---|
| 4124 | 4345 |  		for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) { | 
|---|
| 4125 | 4346 |  			raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock)); | 
|---|
| 4126 | 4347 |  			lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock), | 
|---|
| .. | .. | 
|---|
| 4128 | 4349 |  			raw_spin_lock_init(&rnp->fqslock); | 
|---|
| 4129 | 4350 |  			lockdep_set_class_and_name(&rnp->fqslock, | 
|---|
| 4130 | 4351 |  						   &rcu_fqs_class[i], fqs[i]); | 
|---|
| 4131 |  | -			rnp->gp_seq = rsp->gp_seq;  | 
|---|
| 4132 |  | -			rnp->gp_seq_needed = rsp->gp_seq;  | 
|---|
| 4133 |  | -			rnp->completedqs = rsp->gp_seq;  | 
|---|
 | 4352 | +			rnp->gp_seq = rcu_state.gp_seq;  | 
|---|
 | 4353 | +			rnp->gp_seq_needed = rcu_state.gp_seq;  | 
|---|
 | 4354 | +			rnp->completedqs = rcu_state.gp_seq;  | 
|---|
| 4134 | 4355 |  			rnp->qsmask = 0; | 
|---|
| 4135 | 4356 |  			rnp->qsmaskinit = 0; | 
|---|
| 4136 | 4357 |  			rnp->grplo = j * cpustride; | 
|---|
| .. | .. | 
|---|
| 4143 | 4364 |  				rnp->parent = NULL; | 
|---|
| 4144 | 4365 |  			} else { | 
|---|
| 4145 | 4366 |  				rnp->grpnum = j % levelspread[i - 1]; | 
|---|
| 4146 |  | -				rnp->grpmask = 1UL << rnp->grpnum;  | 
|---|
| 4147 |  | -				rnp->parent = rsp->level[i - 1] +  | 
|---|
 | 4367 | +				rnp->grpmask = BIT(rnp->grpnum);  | 
|---|
 | 4368 | +				rnp->parent = rcu_state.level[i - 1] +  | 
|---|
| 4148 | 4369 |  					      j / levelspread[i - 1]; | 
|---|
| 4149 | 4370 |  			} | 
|---|
| 4150 | 4371 |  			rnp->level = i; | 
|---|
| .. | .. | 
|---|
| 4158 | 4379 |  		} | 
|---|
| 4159 | 4380 |  	} | 
|---|
| 4160 | 4381 |   | 
|---|
| 4161 |  | -	init_swait_queue_head(&rsp->gp_wq);  | 
|---|
| 4162 |  | -	init_swait_queue_head(&rsp->expedited_wq);  | 
|---|
| 4163 |  | -	rnp = rcu_first_leaf_node(rsp);  | 
|---|
 | 4382 | +	init_swait_queue_head(&rcu_state.gp_wq);  | 
|---|
 | 4383 | +	init_swait_queue_head(&rcu_state.expedited_wq);  | 
|---|
 | 4384 | +	rnp = rcu_first_leaf_node();  | 
|---|
| 4164 | 4385 |  	for_each_possible_cpu(i) { | 
|---|
| 4165 | 4386 |  		while (i > rnp->grphi) | 
|---|
| 4166 | 4387 |  			rnp++; | 
|---|
| 4167 |  | -		per_cpu_ptr(rsp->rda, i)->mynode = rnp;  | 
|---|
| 4168 |  | -		rcu_boot_init_percpu_data(i, rsp);  | 
|---|
 | 4388 | +		per_cpu_ptr(&rcu_data, i)->mynode = rnp;  | 
|---|
 | 4389 | +		rcu_boot_init_percpu_data(i);  | 
|---|
| 4169 | 4390 |  	} | 
|---|
| 4170 |  | -	list_add(&rsp->flavors, &rcu_struct_flavors);  | 
|---|
| 4171 | 4391 |  } | 
|---|
| 4172 | 4392 |   | 
|---|
| 4173 | 4393 |  /* | 
|---|
| .. | .. | 
|---|
| 4175 | 4395 |   * replace the definitions in tree.h because those are needed to size | 
|---|
| 4176 | 4396 |   * the ->node array in the rcu_state structure. | 
|---|
| 4177 | 4397 |   */ | 
|---|
| 4178 |  | -static void __init rcu_init_geometry(void)  | 
|---|
 | 4398 | +void rcu_init_geometry(void)  | 
|---|
| 4179 | 4399 |  { | 
|---|
| 4180 | 4400 |  	ulong d; | 
|---|
| 4181 | 4401 |  	int i; | 
|---|
 | 4402 | +	static unsigned long old_nr_cpu_ids;  | 
|---|
| 4182 | 4403 |  	int rcu_capacity[RCU_NUM_LVLS]; | 
|---|
 | 4404 | +	static bool initialized;  | 
|---|
 | 4405 | +  | 
|---|
 | 4406 | +	if (initialized) {  | 
|---|
 | 4407 | +		/*  | 
|---|
 | 4408 | +		 * Warn if setup_nr_cpu_ids() had not yet been invoked,  | 
|---|
 | 4409 | +		 * unless nr_cpus_ids == NR_CPUS, in which case who cares?  | 
|---|
 | 4410 | +		 */  | 
|---|
 | 4411 | +		WARN_ON_ONCE(old_nr_cpu_ids != nr_cpu_ids);  | 
|---|
 | 4412 | +		return;  | 
|---|
 | 4413 | +	}  | 
|---|
 | 4414 | +  | 
|---|
 | 4415 | +	old_nr_cpu_ids = nr_cpu_ids;  | 
|---|
 | 4416 | +	initialized = true;  | 
|---|
| 4183 | 4417 |   | 
|---|
| 4184 | 4418 |  	/* | 
|---|
| 4185 | 4419 |  	 * Initialize any unspecified boot parameters. | 
|---|
| .. | .. | 
|---|
| 4193 | 4427 |  		jiffies_till_first_fqs = d; | 
|---|
| 4194 | 4428 |  	if (jiffies_till_next_fqs == ULONG_MAX) | 
|---|
| 4195 | 4429 |  		jiffies_till_next_fqs = d; | 
|---|
 | 4430 | +	adjust_jiffies_till_sched_qs();  | 
|---|
| 4196 | 4431 |   | 
|---|
| 4197 | 4432 |  	/* If the compile-time values are accurate, just leave. */ | 
|---|
| 4198 | 4433 |  	if (rcu_fanout_leaf == RCU_FANOUT_LEAF && | 
|---|
| .. | .. | 
|---|
| 4251 | 4486 |   | 
|---|
| 4252 | 4487 |  /* | 
|---|
| 4253 | 4488 |   * Dump out the structure of the rcu_node combining tree associated | 
|---|
| 4254 |  | - * with the rcu_state structure referenced by rsp.  | 
|---|
 | 4489 | + * with the rcu_state structure.  | 
|---|
| 4255 | 4490 |   */ | 
|---|
| 4256 |  | -static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)  | 
|---|
 | 4491 | +static void __init rcu_dump_rcu_node_tree(void)  | 
|---|
| 4257 | 4492 |  { | 
|---|
| 4258 | 4493 |  	int level = 0; | 
|---|
| 4259 | 4494 |  	struct rcu_node *rnp; | 
|---|
| 4260 | 4495 |   | 
|---|
| 4261 | 4496 |  	pr_info("rcu_node tree layout dump\n"); | 
|---|
| 4262 | 4497 |  	pr_info(" "); | 
|---|
| 4263 |  | -	rcu_for_each_node_breadth_first(rsp, rnp) {  | 
|---|
 | 4498 | +	rcu_for_each_node_breadth_first(rnp) {  | 
|---|
| 4264 | 4499 |  		if (rnp->level != level) { | 
|---|
| 4265 | 4500 |  			pr_cont("\n"); | 
|---|
| 4266 | 4501 |  			pr_info(" "); | 
|---|
| .. | .. | 
|---|
| 4274 | 4509 |  struct workqueue_struct *rcu_gp_wq; | 
|---|
| 4275 | 4510 |  struct workqueue_struct *rcu_par_gp_wq; | 
|---|
| 4276 | 4511 |   | 
|---|
 | 4512 | +static void __init kfree_rcu_batch_init(void)  | 
|---|
 | 4513 | +{  | 
|---|
 | 4514 | +	int cpu;  | 
|---|
 | 4515 | +	int i;  | 
|---|
 | 4516 | +  | 
|---|
 | 4517 | +	for_each_possible_cpu(cpu) {  | 
|---|
 | 4518 | +		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);  | 
|---|
 | 4519 | +  | 
|---|
 | 4520 | +		for (i = 0; i < KFREE_N_BATCHES; i++) {  | 
|---|
 | 4521 | +			INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);  | 
|---|
 | 4522 | +			krcp->krw_arr[i].krcp = krcp;  | 
|---|
 | 4523 | +		}  | 
|---|
 | 4524 | +  | 
|---|
 | 4525 | +		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);  | 
|---|
 | 4526 | +		INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);  | 
|---|
 | 4527 | +		krcp->initialized = true;  | 
|---|
 | 4528 | +	}  | 
|---|
 | 4529 | +	if (register_shrinker(&kfree_rcu_shrinker))  | 
|---|
 | 4530 | +		pr_err("Failed to register kfree_rcu() shrinker!\n");  | 
|---|
 | 4531 | +}  | 
|---|
 | 4532 | +  | 
|---|
| 4277 | 4533 |  void __init rcu_init(void) | 
|---|
| 4278 | 4534 |  { | 
|---|
| 4279 | 4535 |  	int cpu; | 
|---|
| 4280 | 4536 |   | 
|---|
| 4281 | 4537 |  	rcu_early_boot_tests(); | 
|---|
| 4282 | 4538 |   | 
|---|
 | 4539 | +	kfree_rcu_batch_init();  | 
|---|
| 4283 | 4540 |  	rcu_bootup_announce(); | 
|---|
| 4284 | 4541 |  	rcu_init_geometry(); | 
|---|
| 4285 |  | -#ifndef CONFIG_PREEMPT_RT_FULL  | 
|---|
| 4286 |  | -	rcu_init_one(&rcu_bh_state);  | 
|---|
| 4287 |  | -#endif  | 
|---|
| 4288 |  | -	rcu_init_one(&rcu_sched_state);  | 
|---|
 | 4542 | +	rcu_init_one();  | 
|---|
| 4289 | 4543 |  	if (dump_tree) | 
|---|
| 4290 |  | -		rcu_dump_rcu_node_tree(&rcu_sched_state);  | 
|---|
| 4291 |  | -	__rcu_init_preempt();  | 
|---|
 | 4544 | +		rcu_dump_rcu_node_tree();  | 
|---|
 | 4545 | +	if (use_softirq)  | 
|---|
 | 4546 | +		open_softirq(RCU_SOFTIRQ, rcu_core_si);  | 
|---|
| 4292 | 4547 |   | 
|---|
| 4293 | 4548 |  	/* | 
|---|
| 4294 | 4549 |  	 * We don't need protection against CPU-hotplug here because | 
|---|
| .. | .. | 
|---|
| 4307 | 4562 |  	WARN_ON(!rcu_gp_wq); | 
|---|
| 4308 | 4563 |  	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); | 
|---|
| 4309 | 4564 |  	WARN_ON(!rcu_par_gp_wq); | 
|---|
 | 4565 | +	srcu_init();  | 
|---|
 | 4566 | +  | 
|---|
 | 4567 | +	/* Fill in default value for rcutree.qovld boot parameter. */  | 
|---|
 | 4568 | +	/* -After- the rcu_node ->lock fields are initialized! */  | 
|---|
 | 4569 | +	if (qovld < 0)  | 
|---|
 | 4570 | +		qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark;  | 
|---|
 | 4571 | +	else  | 
|---|
 | 4572 | +		qovld_calc = qovld;  | 
|---|
| 4310 | 4573 |  } | 
|---|
| 4311 | 4574 |   | 
|---|
 | 4575 | +#include "tree_stall.h"  | 
|---|
| 4312 | 4576 |  #include "tree_exp.h" | 
|---|
| 4313 | 4577 |  #include "tree_plugin.h" | 
|---|