| .. | .. | 
|---|
 | 1 | +/* SPDX-License-Identifier: GPL-2.0+ */  | 
|---|
| 1 | 2 |  /* | 
|---|
| 2 | 3 |   * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 
|---|
| 3 | 4 |   * Internal non-public definitions that provide either classic | 
|---|
| 4 | 5 |   * or preemptible semantics. | 
|---|
| 5 | 6 |   * | 
|---|
| 6 |  | - * This program is free software; you can redistribute it and/or modify  | 
|---|
| 7 |  | - * it under the terms of the GNU General Public License as published by  | 
|---|
| 8 |  | - * the Free Software Foundation; either version 2 of the License, or  | 
|---|
| 9 |  | - * (at your option) any later version.  | 
|---|
| 10 |  | - *  | 
|---|
| 11 |  | - * This program is distributed in the hope that it will be useful,  | 
|---|
| 12 |  | - * but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
|---|
| 13 |  | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
|---|
| 14 |  | - * GNU General Public License for more details.  | 
|---|
| 15 |  | - *  | 
|---|
| 16 |  | - * You should have received a copy of the GNU General Public License  | 
|---|
| 17 |  | - * along with this program; if not, you can access it online at  | 
|---|
| 18 |  | - * http://www.gnu.org/licenses/gpl-2.0.html.  | 
|---|
| 19 |  | - *  | 
|---|
| 20 | 7 |   * Copyright Red Hat, 2009 | 
|---|
| 21 | 8 |   * Copyright IBM Corporation, 2009 | 
|---|
| 22 | 9 |   * | 
|---|
| 23 | 10 |   * Author: Ingo Molnar <mingo@elte.hu> | 
|---|
| 24 |  | - *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>  | 
|---|
 | 11 | + *	   Paul E. McKenney <paulmck@linux.ibm.com>  | 
|---|
| 25 | 12 |   */ | 
|---|
| 26 |  | -  | 
|---|
| 27 |  | -#include <linux/delay.h>  | 
|---|
| 28 |  | -#include <linux/gfp.h>  | 
|---|
| 29 |  | -#include <linux/oom.h>  | 
|---|
| 30 |  | -#include <linux/sched/debug.h>  | 
|---|
| 31 |  | -#include <linux/smpboot.h>  | 
|---|
| 32 |  | -#include <linux/sched/isolation.h>  | 
|---|
| 33 |  | -#include <uapi/linux/sched/types.h>  | 
|---|
| 34 |  | -#include "../time/tick-internal.h"  | 
|---|
| 35 |  | -  | 
|---|
| 36 |  | -#ifdef CONFIG_RCU_BOOST  | 
|---|
| 37 | 13 |   | 
|---|
| 38 | 14 |  #include "../locking/rtmutex_common.h" | 
|---|
| 39 |  | -  | 
|---|
| 40 |  | -/*  | 
|---|
| 41 |  | - * Control variables for per-CPU and per-rcu_node kthreads.  These  | 
|---|
| 42 |  | - * handle all flavors of RCU.  | 
|---|
| 43 |  | - */  | 
|---|
| 44 |  | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);  | 
|---|
| 45 |  | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);  | 
|---|
| 46 |  | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);  | 
|---|
| 47 |  | -DEFINE_PER_CPU(char, rcu_cpu_has_work);  | 
|---|
| 48 |  | -  | 
|---|
| 49 |  | -#else /* #ifdef CONFIG_RCU_BOOST */  | 
|---|
| 50 |  | -  | 
|---|
| 51 |  | -/*  | 
|---|
| 52 |  | - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,  | 
|---|
| 53 |  | - * all uses are in dead code.  Provide a definition to keep the compiler  | 
|---|
| 54 |  | - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.  | 
|---|
| 55 |  | - * This probably needs to be excluded from -rt builds.  | 
|---|
| 56 |  | - */  | 
|---|
| 57 |  | -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })  | 
|---|
| 58 |  | -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)  | 
|---|
| 59 |  | -  | 
|---|
| 60 |  | -#endif /* #else #ifdef CONFIG_RCU_BOOST */  | 
|---|
| 61 | 15 |   | 
|---|
| 62 | 16 |  #ifdef CONFIG_RCU_NOCB_CPU | 
|---|
| 63 | 17 |  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | 
|---|
| .. | .. | 
|---|
| 82 | 36 |  		pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); | 
|---|
| 83 | 37 |  	if (IS_ENABLED(CONFIG_PROVE_RCU)) | 
|---|
| 84 | 38 |  		pr_info("\tRCU lockdep checking is enabled.\n"); | 
|---|
 | 39 | +	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))  | 
|---|
 | 40 | +		pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");  | 
|---|
| 85 | 41 |  	if (RCU_NUM_LVLS >= 4) | 
|---|
| 86 | 42 |  		pr_info("\tFour(or more)-level hierarchy is enabled.\n"); | 
|---|
| 87 | 43 |  	if (RCU_FANOUT_LEAF != 16) | 
|---|
| .. | .. | 
|---|
| 102 | 58 |  		pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark); | 
|---|
| 103 | 59 |  	if (qlowmark != DEFAULT_RCU_QLOMARK) | 
|---|
| 104 | 60 |  		pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark); | 
|---|
 | 61 | +	if (qovld != DEFAULT_RCU_QOVLD)  | 
|---|
 | 62 | +		pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);  | 
|---|
| 105 | 63 |  	if (jiffies_till_first_fqs != ULONG_MAX) | 
|---|
| 106 | 64 |  		pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs); | 
|---|
| 107 | 65 |  	if (jiffies_till_next_fqs != ULONG_MAX) | 
|---|
| 108 | 66 |  		pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs); | 
|---|
 | 67 | +	if (jiffies_till_sched_qs != ULONG_MAX)  | 
|---|
 | 68 | +		pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);  | 
|---|
| 109 | 69 |  	if (rcu_kick_kthreads) | 
|---|
| 110 | 70 |  		pr_info("\tKick kthreads if too-long grace period.\n"); | 
|---|
| 111 | 71 |  	if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) | 
|---|
| .. | .. | 
|---|
| 116 | 76 |  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); | 
|---|
| 117 | 77 |  	if (gp_cleanup_delay) | 
|---|
| 118 | 78 |  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay); | 
|---|
 | 79 | +	if (!use_softirq)  | 
|---|
 | 80 | +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");  | 
|---|
| 119 | 81 |  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) | 
|---|
| 120 | 82 |  		pr_info("\tRCU debug extended QS entry/exit.\n"); | 
|---|
| 121 | 83 |  	rcupdate_announce_bootup_oddness(); | 
|---|
| .. | .. | 
|---|
| 123 | 85 |   | 
|---|
| 124 | 86 |  #ifdef CONFIG_PREEMPT_RCU | 
|---|
| 125 | 87 |   | 
|---|
| 126 |  | -RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);  | 
|---|
| 127 |  | -static struct rcu_state *const rcu_state_p = &rcu_preempt_state;  | 
|---|
| 128 |  | -static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;  | 
|---|
| 129 |  | -  | 
|---|
| 130 |  | -static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,  | 
|---|
| 131 |  | -			       bool wake);  | 
|---|
 | 88 | +static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);  | 
|---|
| 132 | 89 |  static void rcu_read_unlock_special(struct task_struct *t); | 
|---|
| 133 | 90 |   | 
|---|
| 134 | 91 |  /* | 
|---|
| .. | .. | 
|---|
| 271 | 228 |  		WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq); | 
|---|
| 272 | 229 |  	} | 
|---|
| 273 | 230 |  	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) | 
|---|
| 274 |  | -		rnp->exp_tasks = &t->rcu_node_entry;  | 
|---|
 | 231 | +		WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);  | 
|---|
| 275 | 232 |  	WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) != | 
|---|
| 276 | 233 |  		     !(rnp->qsmask & rdp->grpmask)); | 
|---|
| 277 | 234 |  	WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) != | 
|---|
| .. | .. | 
|---|
| 284 | 241 |  	 * no need to check for a subsequent expedited GP.  (Though we are | 
|---|
| 285 | 242 |  	 * still in a quiescent state in any case.) | 
|---|
| 286 | 243 |  	 */ | 
|---|
| 287 |  | -	if (blkd_state & RCU_EXP_BLKD &&  | 
|---|
| 288 |  | -	    t->rcu_read_unlock_special.b.exp_need_qs) {  | 
|---|
| 289 |  | -		t->rcu_read_unlock_special.b.exp_need_qs = false;  | 
|---|
| 290 |  | -		rcu_report_exp_rdp(rdp->rsp, rdp, true);  | 
|---|
| 291 |  | -	} else {  | 
|---|
| 292 |  | -		WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);  | 
|---|
| 293 |  | -	}  | 
|---|
 | 244 | +	if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)  | 
|---|
 | 245 | +		rcu_report_exp_rdp(rdp);  | 
|---|
 | 246 | +	else  | 
|---|
 | 247 | +		WARN_ON_ONCE(rdp->exp_deferred_qs);  | 
|---|
| 294 | 248 |  } | 
|---|
| 295 | 249 |   | 
|---|
| 296 | 250 |  /* | 
|---|
| .. | .. | 
|---|
| 306 | 260 |   * | 
|---|
| 307 | 261 |   * Callers to this function must disable preemption. | 
|---|
| 308 | 262 |   */ | 
|---|
| 309 |  | -static void rcu_preempt_qs(void)  | 
|---|
 | 263 | +static void rcu_qs(void)  | 
|---|
| 310 | 264 |  { | 
|---|
| 311 |  | -	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");  | 
|---|
| 312 |  | -	if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {  | 
|---|
 | 265 | +	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");  | 
|---|
 | 266 | +	if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {  | 
|---|
| 313 | 267 |  		trace_rcu_grace_period(TPS("rcu_preempt"), | 
|---|
| 314 |  | -				       __this_cpu_read(rcu_data_p->gp_seq),  | 
|---|
 | 268 | +				       __this_cpu_read(rcu_data.gp_seq),  | 
|---|
| 315 | 269 |  				       TPS("cpuqs")); | 
|---|
| 316 |  | -		__this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);  | 
|---|
| 317 |  | -		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */  | 
|---|
| 318 |  | -		current->rcu_read_unlock_special.b.need_qs = false;  | 
|---|
 | 270 | +		__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);  | 
|---|
 | 271 | +		barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */  | 
|---|
 | 272 | +		WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);  | 
|---|
| 319 | 273 |  	} | 
|---|
| 320 | 274 |  } | 
|---|
| 321 | 275 |   | 
|---|
| .. | .. | 
|---|
| 332 | 286 |   * | 
|---|
| 333 | 287 |   * Caller must disable interrupts. | 
|---|
| 334 | 288 |   */ | 
|---|
| 335 |  | -static void rcu_preempt_note_context_switch(bool preempt)  | 
|---|
 | 289 | +void rcu_note_context_switch(bool preempt)  | 
|---|
| 336 | 290 |  { | 
|---|
| 337 | 291 |  	struct task_struct *t = current; | 
|---|
| 338 |  | -	struct rcu_data *rdp;  | 
|---|
 | 292 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 339 | 293 |  	struct rcu_node *rnp; | 
|---|
| 340 | 294 |   | 
|---|
 | 295 | +	trace_rcu_utilization(TPS("Start context switch"));  | 
|---|
| 341 | 296 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 342 |  | -	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);  | 
|---|
| 343 |  | -	if (t->rcu_read_lock_nesting > 0 &&  | 
|---|
 | 297 | +	WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);  | 
|---|
 | 298 | +	if (rcu_preempt_depth() > 0 &&  | 
|---|
| 344 | 299 |  	    !t->rcu_read_unlock_special.b.blocked) { | 
|---|
| 345 | 300 |   | 
|---|
| 346 | 301 |  		/* Possibly blocking in an RCU read-side critical section. */ | 
|---|
| 347 |  | -		rdp = this_cpu_ptr(rcu_state_p->rda);  | 
|---|
| 348 | 302 |  		rnp = rdp->mynode; | 
|---|
| 349 | 303 |  		raw_spin_lock_rcu_node(rnp); | 
|---|
| 350 | 304 |  		t->rcu_read_unlock_special.b.blocked = true; | 
|---|
| .. | .. | 
|---|
| 357 | 311 |  		 */ | 
|---|
| 358 | 312 |  		WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); | 
|---|
| 359 | 313 |  		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); | 
|---|
| 360 |  | -		trace_rcu_preempt_task(rdp->rsp->name,  | 
|---|
 | 314 | +		trace_rcu_preempt_task(rcu_state.name,  | 
|---|
| 361 | 315 |  				       t->pid, | 
|---|
| 362 | 316 |  				       (rnp->qsmask & rdp->grpmask) | 
|---|
| 363 | 317 |  				       ? rnp->gp_seq | 
|---|
| 364 | 318 |  				       : rcu_seq_snap(&rnp->gp_seq)); | 
|---|
| 365 | 319 |  		rcu_preempt_ctxt_queue(rnp, rdp); | 
|---|
| 366 |  | -	} else if (t->rcu_read_lock_nesting < 0 &&  | 
|---|
| 367 |  | -		   t->rcu_read_unlock_special.s) {  | 
|---|
| 368 |  | -  | 
|---|
| 369 |  | -		/*  | 
|---|
| 370 |  | -		 * Complete exit from RCU read-side critical section on  | 
|---|
| 371 |  | -		 * behalf of preempted instance of __rcu_read_unlock().  | 
|---|
| 372 |  | -		 */  | 
|---|
| 373 |  | -		rcu_read_unlock_special(t);  | 
|---|
 | 320 | +	} else {  | 
|---|
 | 321 | +		rcu_preempt_deferred_qs(t);  | 
|---|
| 374 | 322 |  	} | 
|---|
| 375 | 323 |   | 
|---|
| 376 | 324 |  	/* | 
|---|
| .. | .. | 
|---|
| 382 | 330 |  	 * grace period, then the fact that the task has been enqueued | 
|---|
| 383 | 331 |  	 * means that we continue to block the current grace period. | 
|---|
| 384 | 332 |  	 */ | 
|---|
| 385 |  | -	rcu_preempt_qs();  | 
|---|
 | 333 | +	rcu_qs();  | 
|---|
 | 334 | +	if (rdp->exp_deferred_qs)  | 
|---|
 | 335 | +		rcu_report_exp_rdp(rdp);  | 
|---|
 | 336 | +	rcu_tasks_qs(current, preempt);  | 
|---|
 | 337 | +	trace_rcu_utilization(TPS("End context switch"));  | 
|---|
| 386 | 338 |  } | 
|---|
 | 339 | +EXPORT_SYMBOL_GPL(rcu_note_context_switch);  | 
|---|
| 387 | 340 |   | 
|---|
| 388 | 341 |  /* | 
|---|
| 389 | 342 |   * Check for preempted RCU readers blocking the current grace period | 
|---|
| .. | .. | 
|---|
| 395 | 348 |  	return READ_ONCE(rnp->gp_tasks) != NULL; | 
|---|
| 396 | 349 |  } | 
|---|
| 397 | 350 |   | 
|---|
 | 351 | +/* limit value for ->rcu_read_lock_nesting. */  | 
|---|
 | 352 | +#define RCU_NEST_PMAX (INT_MAX / 2)  | 
|---|
 | 353 | +  | 
|---|
 | 354 | +static void rcu_preempt_read_enter(void)  | 
|---|
 | 355 | +{  | 
|---|
 | 356 | +	current->rcu_read_lock_nesting++;  | 
|---|
 | 357 | +}  | 
|---|
 | 358 | +  | 
|---|
 | 359 | +static int rcu_preempt_read_exit(void)  | 
|---|
 | 360 | +{  | 
|---|
 | 361 | +	return --current->rcu_read_lock_nesting;  | 
|---|
 | 362 | +}  | 
|---|
 | 363 | +  | 
|---|
 | 364 | +static void rcu_preempt_depth_set(int val)  | 
|---|
 | 365 | +{  | 
|---|
 | 366 | +	current->rcu_read_lock_nesting = val;  | 
|---|
 | 367 | +}  | 
|---|
 | 368 | +  | 
|---|
| 398 | 369 |  /* | 
|---|
| 399 | 370 |   * Preemptible RCU implementation for rcu_read_lock(). | 
|---|
| 400 | 371 |   * Just increment ->rcu_read_lock_nesting, shared state will be updated | 
|---|
| .. | .. | 
|---|
| 402 | 373 |   */ | 
|---|
| 403 | 374 |  void __rcu_read_lock(void) | 
|---|
| 404 | 375 |  { | 
|---|
| 405 |  | -	current->rcu_read_lock_nesting++;  | 
|---|
 | 376 | +	rcu_preempt_read_enter();  | 
|---|
 | 377 | +	if (IS_ENABLED(CONFIG_PROVE_LOCKING))  | 
|---|
 | 378 | +		WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);  | 
|---|
 | 379 | +	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)  | 
|---|
 | 380 | +		WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);  | 
|---|
| 406 | 381 |  	barrier();  /* critical section after entry code. */ | 
|---|
| 407 | 382 |  } | 
|---|
| 408 | 383 |  EXPORT_SYMBOL_GPL(__rcu_read_lock); | 
|---|
| .. | .. | 
|---|
| 418 | 393 |  { | 
|---|
| 419 | 394 |  	struct task_struct *t = current; | 
|---|
| 420 | 395 |   | 
|---|
| 421 |  | -	if (t->rcu_read_lock_nesting != 1) {  | 
|---|
| 422 |  | -		--t->rcu_read_lock_nesting;  | 
|---|
| 423 |  | -	} else {  | 
|---|
 | 396 | +	if (rcu_preempt_read_exit() == 0) {  | 
|---|
| 424 | 397 |  		barrier();  /* critical section before exit code. */ | 
|---|
| 425 |  | -		t->rcu_read_lock_nesting = INT_MIN;  | 
|---|
| 426 |  | -		barrier();  /* assign before ->rcu_read_unlock_special load */  | 
|---|
| 427 | 398 |  		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) | 
|---|
| 428 | 399 |  			rcu_read_unlock_special(t); | 
|---|
| 429 |  | -		barrier();  /* ->rcu_read_unlock_special load before assign */  | 
|---|
| 430 |  | -		t->rcu_read_lock_nesting = 0;  | 
|---|
| 431 | 400 |  	} | 
|---|
| 432 |  | -#ifdef CONFIG_PROVE_LOCKING  | 
|---|
| 433 |  | -	{  | 
|---|
| 434 |  | -		int rrln = READ_ONCE(t->rcu_read_lock_nesting);  | 
|---|
 | 401 | +	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {  | 
|---|
 | 402 | +		int rrln = rcu_preempt_depth();  | 
|---|
| 435 | 403 |   | 
|---|
| 436 |  | -		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);  | 
|---|
 | 404 | +		WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);  | 
|---|
| 437 | 405 |  	} | 
|---|
| 438 |  | -#endif /* #ifdef CONFIG_PROVE_LOCKING */  | 
|---|
| 439 | 406 |  } | 
|---|
| 440 | 407 |  EXPORT_SYMBOL_GPL(__rcu_read_unlock); | 
|---|
| 441 | 408 |   | 
|---|
| .. | .. | 
|---|
| 464 | 431 |  } | 
|---|
| 465 | 432 |   | 
|---|
| 466 | 433 |  /* | 
|---|
| 467 |  | - * Handle special cases during rcu_read_unlock(), such as needing to  | 
|---|
| 468 |  | - * notify RCU core processing or task having blocked during the RCU  | 
|---|
| 469 |  | - * read-side critical section.  | 
|---|
 | 434 | + * Report deferred quiescent states.  The deferral time can  | 
|---|
 | 435 | + * be quite short, for example, in the case of the call from  | 
|---|
 | 436 | + * rcu_read_unlock_special().  | 
|---|
| 470 | 437 |   */ | 
|---|
| 471 |  | -static void rcu_read_unlock_special(struct task_struct *t)  | 
|---|
 | 438 | +static void  | 
|---|
 | 439 | +rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)  | 
|---|
| 472 | 440 |  { | 
|---|
| 473 | 441 |  	bool empty_exp; | 
|---|
| 474 | 442 |  	bool empty_norm; | 
|---|
| 475 | 443 |  	bool empty_exp_now; | 
|---|
| 476 |  | -	unsigned long flags;  | 
|---|
| 477 | 444 |  	struct list_head *np; | 
|---|
| 478 | 445 |  	bool drop_boost_mutex = false; | 
|---|
| 479 | 446 |  	struct rcu_data *rdp; | 
|---|
| 480 | 447 |  	struct rcu_node *rnp; | 
|---|
| 481 | 448 |  	union rcu_special special; | 
|---|
| 482 |  | -  | 
|---|
| 483 |  | -	/* NMI handlers cannot block and cannot safely manipulate state. */  | 
|---|
| 484 |  | -	if (in_nmi())  | 
|---|
| 485 |  | -		return;  | 
|---|
| 486 |  | -  | 
|---|
| 487 |  | -	local_irq_save(flags);  | 
|---|
| 488 | 449 |   | 
|---|
| 489 | 450 |  	/* | 
|---|
| 490 | 451 |  	 * If RCU core is waiting for this CPU to exit its critical section, | 
|---|
| .. | .. | 
|---|
| 492 | 453 |  	 * t->rcu_read_unlock_special cannot change. | 
|---|
| 493 | 454 |  	 */ | 
|---|
| 494 | 455 |  	special = t->rcu_read_unlock_special; | 
|---|
 | 456 | +	rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 457 | +	if (!special.s && !rdp->exp_deferred_qs) {  | 
|---|
 | 458 | +		local_irq_restore(flags);  | 
|---|
 | 459 | +		return;  | 
|---|
 | 460 | +	}  | 
|---|
 | 461 | +	t->rcu_read_unlock_special.s = 0;  | 
|---|
| 495 | 462 |  	if (special.b.need_qs) { | 
|---|
| 496 |  | -		rcu_preempt_qs();  | 
|---|
| 497 |  | -		t->rcu_read_unlock_special.b.need_qs = false;  | 
|---|
| 498 |  | -		if (!t->rcu_read_unlock_special.s) {  | 
|---|
| 499 |  | -			local_irq_restore(flags);  | 
|---|
| 500 |  | -			return;  | 
|---|
 | 463 | +		if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {  | 
|---|
 | 464 | +			rcu_report_qs_rdp(rdp);  | 
|---|
 | 465 | +			udelay(rcu_unlock_delay);  | 
|---|
 | 466 | +		} else {  | 
|---|
 | 467 | +			rcu_qs();  | 
|---|
| 501 | 468 |  		} | 
|---|
| 502 | 469 |  	} | 
|---|
| 503 | 470 |   | 
|---|
| 504 | 471 |  	/* | 
|---|
| 505 |  | -	 * Respond to a request for an expedited grace period, but only if  | 
|---|
| 506 |  | -	 * we were not preempted, meaning that we were running on the same  | 
|---|
| 507 |  | -	 * CPU throughout.  If we were preempted, the exp_need_qs flag  | 
|---|
| 508 |  | -	 * would have been cleared at the time of the first preemption,  | 
|---|
| 509 |  | -	 * and the quiescent state would be reported when we were dequeued.  | 
|---|
 | 472 | +	 * Respond to a request by an expedited grace period for a  | 
|---|
 | 473 | +	 * quiescent state from this CPU.  Note that requests from  | 
|---|
 | 474 | +	 * tasks are handled when removing the task from the  | 
|---|
 | 475 | +	 * blocked-tasks list below.  | 
|---|
| 510 | 476 |  	 */ | 
|---|
| 511 |  | -	if (special.b.exp_need_qs) {  | 
|---|
| 512 |  | -		WARN_ON_ONCE(special.b.blocked);  | 
|---|
| 513 |  | -		t->rcu_read_unlock_special.b.exp_need_qs = false;  | 
|---|
| 514 |  | -		rdp = this_cpu_ptr(rcu_state_p->rda);  | 
|---|
| 515 |  | -		rcu_report_exp_rdp(rcu_state_p, rdp, true);  | 
|---|
| 516 |  | -		if (!t->rcu_read_unlock_special.s) {  | 
|---|
| 517 |  | -			local_irq_restore(flags);  | 
|---|
| 518 |  | -			return;  | 
|---|
| 519 |  | -		}  | 
|---|
| 520 |  | -	}  | 
|---|
| 521 |  | -  | 
|---|
| 522 |  | -	/* Hardware IRQ handlers cannot block, complain if they get here. */  | 
|---|
| 523 |  | -	if (in_irq() || in_serving_softirq()) {  | 
|---|
| 524 |  | -		lockdep_rcu_suspicious(__FILE__, __LINE__,  | 
|---|
| 525 |  | -				       "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");  | 
|---|
| 526 |  | -		pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",  | 
|---|
| 527 |  | -			 t->rcu_read_unlock_special.s,  | 
|---|
| 528 |  | -			 t->rcu_read_unlock_special.b.blocked,  | 
|---|
| 529 |  | -			 t->rcu_read_unlock_special.b.exp_need_qs,  | 
|---|
| 530 |  | -			 t->rcu_read_unlock_special.b.need_qs);  | 
|---|
| 531 |  | -		local_irq_restore(flags);  | 
|---|
| 532 |  | -		return;  | 
|---|
| 533 |  | -	}  | 
|---|
 | 477 | +	if (rdp->exp_deferred_qs)  | 
|---|
 | 478 | +		rcu_report_exp_rdp(rdp);  | 
|---|
| 534 | 479 |   | 
|---|
| 535 | 480 |  	/* Clean up if blocked during RCU read-side critical section. */ | 
|---|
| 536 | 481 |  	if (special.b.blocked) { | 
|---|
| 537 |  | -		t->rcu_read_unlock_special.b.blocked = false;  | 
|---|
| 538 | 482 |   | 
|---|
| 539 | 483 |  		/* | 
|---|
| 540 | 484 |  		 * Remove this task from the list it blocked on.  The task | 
|---|
| .. | .. | 
|---|
| 549 | 493 |  		empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); | 
|---|
| 550 | 494 |  		WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq && | 
|---|
| 551 | 495 |  			     (!empty_norm || rnp->qsmask)); | 
|---|
| 552 |  | -		empty_exp = sync_rcu_preempt_exp_done(rnp);  | 
|---|
 | 496 | +		empty_exp = sync_rcu_exp_done(rnp);  | 
|---|
| 553 | 497 |  		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 
|---|
| 554 | 498 |  		np = rcu_next_node_entry(t, rnp); | 
|---|
| 555 | 499 |  		list_del_init(&t->rcu_node_entry); | 
|---|
| .. | .. | 
|---|
| 559 | 503 |  		if (&t->rcu_node_entry == rnp->gp_tasks) | 
|---|
| 560 | 504 |  			WRITE_ONCE(rnp->gp_tasks, np); | 
|---|
| 561 | 505 |  		if (&t->rcu_node_entry == rnp->exp_tasks) | 
|---|
| 562 |  | -			rnp->exp_tasks = np;  | 
|---|
 | 506 | +			WRITE_ONCE(rnp->exp_tasks, np);  | 
|---|
| 563 | 507 |  		if (IS_ENABLED(CONFIG_RCU_BOOST)) { | 
|---|
| 564 | 508 |  			/* Snapshot ->boost_mtx ownership w/rnp->lock held. */ | 
|---|
| 565 | 509 |  			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; | 
|---|
| 566 | 510 |  			if (&t->rcu_node_entry == rnp->boost_tasks) | 
|---|
| 567 |  | -				rnp->boost_tasks = np;  | 
|---|
 | 511 | +				WRITE_ONCE(rnp->boost_tasks, np);  | 
|---|
| 568 | 512 |  		} | 
|---|
| 569 | 513 |   | 
|---|
| 570 | 514 |  		/* | 
|---|
| .. | .. | 
|---|
| 573 | 517 |  		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, | 
|---|
| 574 | 518 |  		 * so we must take a snapshot of the expedited state. | 
|---|
| 575 | 519 |  		 */ | 
|---|
| 576 |  | -		empty_exp_now = sync_rcu_preempt_exp_done(rnp);  | 
|---|
 | 520 | +		empty_exp_now = sync_rcu_exp_done(rnp);  | 
|---|
| 577 | 521 |  		if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { | 
|---|
| 578 | 522 |  			trace_rcu_quiescent_state_report(TPS("preempt_rcu"), | 
|---|
| 579 | 523 |  							 rnp->gp_seq, | 
|---|
| .. | .. | 
|---|
| 582 | 526 |  							 rnp->grplo, | 
|---|
| 583 | 527 |  							 rnp->grphi, | 
|---|
| 584 | 528 |  							 !!rnp->gp_tasks); | 
|---|
| 585 |  | -			rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);  | 
|---|
 | 529 | +			rcu_report_unblock_qs_rnp(rnp, flags);  | 
|---|
| 586 | 530 |  		} else { | 
|---|
| 587 | 531 |  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 588 | 532 |  		} | 
|---|
| 589 |  | -  | 
|---|
| 590 |  | -		/* Unboost if we were boosted. */  | 
|---|
| 591 |  | -		if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)  | 
|---|
| 592 |  | -			rt_mutex_futex_unlock(&rnp->boost_mtx);  | 
|---|
| 593 | 533 |   | 
|---|
| 594 | 534 |  		/* | 
|---|
| 595 | 535 |  		 * If this was the last task on the expedited lists, | 
|---|
| 596 | 536 |  		 * then we need to report up the rcu_node hierarchy. | 
|---|
| 597 | 537 |  		 */ | 
|---|
| 598 | 538 |  		if (!empty_exp && empty_exp_now) | 
|---|
| 599 |  | -			rcu_report_exp_rnp(rcu_state_p, rnp, true);  | 
|---|
 | 539 | +			rcu_report_exp_rnp(rnp, true);  | 
|---|
 | 540 | +  | 
|---|
 | 541 | +		/* Unboost if we were boosted. */  | 
|---|
 | 542 | +		if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)  | 
|---|
 | 543 | +			rt_mutex_futex_unlock(&rnp->boost_mtx);  | 
|---|
 | 544 | +  | 
|---|
| 600 | 545 |  	} else { | 
|---|
| 601 | 546 |  		local_irq_restore(flags); | 
|---|
| 602 | 547 |  	} | 
|---|
| 603 | 548 |  } | 
|---|
| 604 | 549 |   | 
|---|
| 605 | 550 |  /* | 
|---|
| 606 |  | - * Dump detailed information for all tasks blocking the current RCU  | 
|---|
| 607 |  | - * grace period on the specified rcu_node structure.  | 
|---|
 | 551 | + * Is a deferred quiescent-state pending, and are we also not in  | 
|---|
 | 552 | + * an RCU read-side critical section?  It is the caller's responsibility  | 
|---|
 | 553 | + * to ensure it is otherwise safe to report any deferred quiescent  | 
|---|
 | 554 | + * states.  The reason for this is that it is safe to report a  | 
|---|
 | 555 | + * quiescent state during context switch even though preemption  | 
|---|
 | 556 | + * is disabled.  This function cannot be expected to understand these  | 
|---|
 | 557 | + * nuances, so the caller must handle them.  | 
|---|
| 608 | 558 |   */ | 
|---|
| 609 |  | -static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)  | 
|---|
 | 559 | +static bool rcu_preempt_need_deferred_qs(struct task_struct *t)  | 
|---|
 | 560 | +{  | 
|---|
 | 561 | +	return (__this_cpu_read(rcu_data.exp_deferred_qs) ||  | 
|---|
 | 562 | +		READ_ONCE(t->rcu_read_unlock_special.s)) &&  | 
|---|
 | 563 | +	       rcu_preempt_depth() == 0;  | 
|---|
 | 564 | +}  | 
|---|
 | 565 | +  | 
|---|
 | 566 | +/*  | 
|---|
 | 567 | + * Report a deferred quiescent state if needed and safe to do so.  | 
|---|
 | 568 | + * As with rcu_preempt_need_deferred_qs(), "safe" involves only  | 
|---|
 | 569 | + * not being in an RCU read-side critical section.  The caller must  | 
|---|
 | 570 | + * evaluate safety in terms of interrupt, softirq, and preemption  | 
|---|
 | 571 | + * disabling.  | 
|---|
 | 572 | + */  | 
|---|
 | 573 | +static void rcu_preempt_deferred_qs(struct task_struct *t)  | 
|---|
| 610 | 574 |  { | 
|---|
| 611 | 575 |  	unsigned long flags; | 
|---|
| 612 |  | -	struct task_struct *t;  | 
|---|
| 613 | 576 |   | 
|---|
| 614 |  | -	raw_spin_lock_irqsave_rcu_node(rnp, flags);  | 
|---|
| 615 |  | -	if (!rcu_preempt_blocked_readers_cgp(rnp)) {  | 
|---|
| 616 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
 | 577 | +	if (!rcu_preempt_need_deferred_qs(t))  | 
|---|
 | 578 | +		return;  | 
|---|
 | 579 | +	local_irq_save(flags);  | 
|---|
 | 580 | +	rcu_preempt_deferred_qs_irqrestore(t, flags);  | 
|---|
 | 581 | +}  | 
|---|
 | 582 | +  | 
|---|
 | 583 | +/*  | 
|---|
 | 584 | + * Minimal handler to give the scheduler a chance to re-evaluate.  | 
|---|
 | 585 | + */  | 
|---|
 | 586 | +static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)  | 
|---|
 | 587 | +{  | 
|---|
 | 588 | +	struct rcu_data *rdp;  | 
|---|
 | 589 | +  | 
|---|
 | 590 | +	rdp = container_of(iwp, struct rcu_data, defer_qs_iw);  | 
|---|
 | 591 | +	rdp->defer_qs_iw_pending = false;  | 
|---|
 | 592 | +}  | 
|---|
 | 593 | +  | 
|---|
 | 594 | +/*  | 
|---|
 | 595 | + * Handle special cases during rcu_read_unlock(), such as needing to  | 
|---|
 | 596 | + * notify RCU core processing or task having blocked during the RCU  | 
|---|
 | 597 | + * read-side critical section.  | 
|---|
 | 598 | + */  | 
|---|
 | 599 | +static void rcu_read_unlock_special(struct task_struct *t)  | 
|---|
 | 600 | +{  | 
|---|
 | 601 | +	unsigned long flags;  | 
|---|
 | 602 | +	bool preempt_bh_were_disabled =  | 
|---|
 | 603 | +			!!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));  | 
|---|
 | 604 | +	bool irqs_were_disabled;  | 
|---|
 | 605 | +  | 
|---|
 | 606 | +	/* NMI handlers cannot block and cannot safely manipulate state. */  | 
|---|
 | 607 | +	if (in_nmi())  | 
|---|
 | 608 | +		return;  | 
|---|
 | 609 | +  | 
|---|
 | 610 | +	local_irq_save(flags);  | 
|---|
 | 611 | +	irqs_were_disabled = irqs_disabled_flags(flags);  | 
|---|
 | 612 | +	if (preempt_bh_were_disabled || irqs_were_disabled) {  | 
|---|
 | 613 | +		bool exp;  | 
|---|
 | 614 | +		struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 615 | +		struct rcu_node *rnp = rdp->mynode;  | 
|---|
 | 616 | +  | 
|---|
 | 617 | +		exp = (t->rcu_blocked_node &&  | 
|---|
 | 618 | +		       READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||  | 
|---|
 | 619 | +		      (rdp->grpmask & READ_ONCE(rnp->expmask));  | 
|---|
 | 620 | +		// Need to defer quiescent state until everything is enabled.  | 
|---|
 | 621 | +		if (use_softirq && (in_irq() || (exp && !irqs_were_disabled))) {  | 
|---|
 | 622 | +			// Using softirq, safe to awaken, and either the  | 
|---|
 | 623 | +			// wakeup is free or there is an expedited GP.  | 
|---|
 | 624 | +			raise_softirq_irqoff(RCU_SOFTIRQ);  | 
|---|
 | 625 | +		} else {  | 
|---|
 | 626 | +			// Enabling BH or preempt does reschedule, so...  | 
|---|
 | 627 | +			// Also if no expediting, slow is OK.  | 
|---|
 | 628 | +			// Plus nohz_full CPUs eventually get tick enabled.  | 
|---|
 | 629 | +			set_tsk_need_resched(current);  | 
|---|
 | 630 | +			set_preempt_need_resched();  | 
|---|
 | 631 | +			if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&  | 
|---|
 | 632 | +			    !rdp->defer_qs_iw_pending && exp && cpu_online(rdp->cpu)) {  | 
|---|
 | 633 | +				// Get scheduler to re-evaluate and call hooks.  | 
|---|
 | 634 | +				// If !IRQ_WORK, FQS scan will eventually IPI.  | 
|---|
 | 635 | +				init_irq_work(&rdp->defer_qs_iw,  | 
|---|
 | 636 | +					      rcu_preempt_deferred_qs_handler);  | 
|---|
 | 637 | +				rdp->defer_qs_iw_pending = true;  | 
|---|
 | 638 | +				irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);  | 
|---|
 | 639 | +			}  | 
|---|
 | 640 | +		}  | 
|---|
 | 641 | +		local_irq_restore(flags);  | 
|---|
| 617 | 642 |  		return; | 
|---|
| 618 | 643 |  	} | 
|---|
| 619 |  | -	t = list_entry(rnp->gp_tasks->prev,  | 
|---|
| 620 |  | -		       struct task_struct, rcu_node_entry);  | 
|---|
| 621 |  | -	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {  | 
|---|
| 622 |  | -		/*  | 
|---|
| 623 |  | -		 * We could be printing a lot while holding a spinlock.  | 
|---|
| 624 |  | -		 * Avoid triggering hard lockup.  | 
|---|
| 625 |  | -		 */  | 
|---|
| 626 |  | -		touch_nmi_watchdog();  | 
|---|
| 627 |  | -		sched_show_task(t);  | 
|---|
| 628 |  | -	}  | 
|---|
| 629 |  | -	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 630 |  | -}  | 
|---|
| 631 |  | -  | 
|---|
| 632 |  | -/*  | 
|---|
| 633 |  | - * Dump detailed information for all tasks blocking the current RCU  | 
|---|
| 634 |  | - * grace period.  | 
|---|
| 635 |  | - */  | 
|---|
| 636 |  | -static void rcu_print_detail_task_stall(struct rcu_state *rsp)  | 
|---|
| 637 |  | -{  | 
|---|
| 638 |  | -	struct rcu_node *rnp = rcu_get_root(rsp);  | 
|---|
| 639 |  | -  | 
|---|
| 640 |  | -	rcu_print_detail_task_stall_rnp(rnp);  | 
|---|
| 641 |  | -	rcu_for_each_leaf_node(rsp, rnp)  | 
|---|
| 642 |  | -		rcu_print_detail_task_stall_rnp(rnp);  | 
|---|
| 643 |  | -}  | 
|---|
| 644 |  | -  | 
|---|
| 645 |  | -static void rcu_print_task_stall_begin(struct rcu_node *rnp)  | 
|---|
| 646 |  | -{  | 
|---|
| 647 |  | -	pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",  | 
|---|
| 648 |  | -	       rnp->level, rnp->grplo, rnp->grphi);  | 
|---|
| 649 |  | -}  | 
|---|
| 650 |  | -  | 
|---|
| 651 |  | -static void rcu_print_task_stall_end(void)  | 
|---|
| 652 |  | -{  | 
|---|
| 653 |  | -	pr_cont("\n");  | 
|---|
| 654 |  | -}  | 
|---|
| 655 |  | -  | 
|---|
| 656 |  | -/*  | 
|---|
| 657 |  | - * Scan the current list of tasks blocked within RCU read-side critical  | 
|---|
| 658 |  | - * sections, printing out the tid of each.  | 
|---|
| 659 |  | - */  | 
|---|
| 660 |  | -static int rcu_print_task_stall(struct rcu_node *rnp)  | 
|---|
| 661 |  | -{  | 
|---|
| 662 |  | -	struct task_struct *t;  | 
|---|
| 663 |  | -	int ndetected = 0;  | 
|---|
| 664 |  | -  | 
|---|
| 665 |  | -	if (!rcu_preempt_blocked_readers_cgp(rnp))  | 
|---|
| 666 |  | -		return 0;  | 
|---|
| 667 |  | -	rcu_print_task_stall_begin(rnp);  | 
|---|
| 668 |  | -	t = list_entry(rnp->gp_tasks->prev,  | 
|---|
| 669 |  | -		       struct task_struct, rcu_node_entry);  | 
|---|
| 670 |  | -	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {  | 
|---|
| 671 |  | -		pr_cont(" P%d", t->pid);  | 
|---|
| 672 |  | -		ndetected++;  | 
|---|
| 673 |  | -	}  | 
|---|
| 674 |  | -	rcu_print_task_stall_end();  | 
|---|
| 675 |  | -	return ndetected;  | 
|---|
| 676 |  | -}  | 
|---|
| 677 |  | -  | 
|---|
| 678 |  | -/*  | 
|---|
| 679 |  | - * Scan the current list of tasks blocked within RCU read-side critical  | 
|---|
| 680 |  | - * sections, printing out the tid of each that is blocking the current  | 
|---|
| 681 |  | - * expedited grace period.  | 
|---|
| 682 |  | - */  | 
|---|
| 683 |  | -static int rcu_print_task_exp_stall(struct rcu_node *rnp)  | 
|---|
| 684 |  | -{  | 
|---|
| 685 |  | -	struct task_struct *t;  | 
|---|
| 686 |  | -	int ndetected = 0;  | 
|---|
| 687 |  | -  | 
|---|
| 688 |  | -	if (!rnp->exp_tasks)  | 
|---|
| 689 |  | -		return 0;  | 
|---|
| 690 |  | -	t = list_entry(rnp->exp_tasks->prev,  | 
|---|
| 691 |  | -		       struct task_struct, rcu_node_entry);  | 
|---|
| 692 |  | -	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {  | 
|---|
| 693 |  | -		pr_cont(" P%d", t->pid);  | 
|---|
| 694 |  | -		ndetected++;  | 
|---|
| 695 |  | -	}  | 
|---|
| 696 |  | -	return ndetected;  | 
|---|
 | 644 | +	rcu_preempt_deferred_qs_irqrestore(t, flags);  | 
|---|
| 697 | 645 |  } | 
|---|
| 698 | 646 |   | 
|---|
| 699 | 647 |  /* | 
|---|
| 700 | 648 |   * Check that the list of blocked tasks for the newly completed grace | 
|---|
| 701 | 649 |   * period is in fact empty.  It is a serious bug to complete a grace | 
|---|
| 702 | 650 |   * period that still has RCU readers blocked!  This function must be | 
|---|
| 703 |  | - * invoked -before- updating this rnp's ->gp_seq, and the rnp's ->lock  | 
|---|
| 704 |  | - * must be held by the caller.  | 
|---|
 | 651 | + * invoked -before- updating this rnp's ->gp_seq.  | 
|---|
| 705 | 652 |   * | 
|---|
| 706 | 653 |   * Also, if there are blocked tasks on the list, they automatically | 
|---|
| 707 | 654 |   * block the newly created grace period, so set up ->gp_tasks accordingly. | 
|---|
| 708 | 655 |   */ | 
|---|
| 709 |  | -static void  | 
|---|
| 710 |  | -rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)  | 
|---|
 | 656 | +static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)  | 
|---|
| 711 | 657 |  { | 
|---|
| 712 | 658 |  	struct task_struct *t; | 
|---|
| 713 | 659 |   | 
|---|
| 714 | 660 |  	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); | 
|---|
 | 661 | +	raw_lockdep_assert_held_rcu_node(rnp);  | 
|---|
| 715 | 662 |  	if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) | 
|---|
| 716 |  | -		dump_blkd_tasks(rsp, rnp, 10);  | 
|---|
 | 663 | +		dump_blkd_tasks(rnp, 10);  | 
|---|
| 717 | 664 |  	if (rcu_preempt_has_tasks(rnp) && | 
|---|
| 718 | 665 |  	    (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { | 
|---|
| 719 | 666 |  		WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next); | 
|---|
| .. | .. | 
|---|
| 726 | 673 |  } | 
|---|
| 727 | 674 |   | 
|---|
| 728 | 675 |  /* | 
|---|
| 729 |  | - * Check for a quiescent state from the current CPU.  When a task blocks,  | 
|---|
| 730 |  | - * the task is recorded in the corresponding CPU's rcu_node structure,  | 
|---|
| 731 |  | - * which is checked elsewhere.  | 
|---|
| 732 |  | - *  | 
|---|
| 733 |  | - * Caller must disable hard irqs.  | 
|---|
 | 676 | + * Check for a quiescent state from the current CPU, including voluntary  | 
|---|
 | 677 | + * context switches for Tasks RCU.  When a task blocks, the task is  | 
|---|
 | 678 | + * recorded in the corresponding CPU's rcu_node structure, which is checked  | 
|---|
 | 679 | + * elsewhere, hence this function need only check for quiescent states  | 
|---|
 | 680 | + * related to the current CPU, not to those related to tasks.  | 
|---|
| 734 | 681 |   */ | 
|---|
| 735 |  | -static void rcu_preempt_check_callbacks(void)  | 
|---|
 | 682 | +static void rcu_flavor_sched_clock_irq(int user)  | 
|---|
| 736 | 683 |  { | 
|---|
| 737 |  | -	struct rcu_state *rsp = &rcu_preempt_state;  | 
|---|
| 738 | 684 |  	struct task_struct *t = current; | 
|---|
| 739 | 685 |   | 
|---|
| 740 |  | -	if (t->rcu_read_lock_nesting == 0) {  | 
|---|
| 741 |  | -		rcu_preempt_qs();  | 
|---|
 | 686 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 687 | +	if (user || rcu_is_cpu_rrupt_from_idle()) {  | 
|---|
 | 688 | +		rcu_note_voluntary_context_switch(current);  | 
|---|
 | 689 | +	}  | 
|---|
 | 690 | +	if (rcu_preempt_depth() > 0 ||  | 
|---|
 | 691 | +	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {  | 
|---|
 | 692 | +		/* No QS, force context switch if deferred. */  | 
|---|
 | 693 | +		if (rcu_preempt_need_deferred_qs(t)) {  | 
|---|
 | 694 | +			set_tsk_need_resched(t);  | 
|---|
 | 695 | +			set_preempt_need_resched();  | 
|---|
 | 696 | +		}  | 
|---|
 | 697 | +	} else if (rcu_preempt_need_deferred_qs(t)) {  | 
|---|
 | 698 | +		rcu_preempt_deferred_qs(t); /* Report deferred QS. */  | 
|---|
 | 699 | +		return;  | 
|---|
 | 700 | +	} else if (!WARN_ON_ONCE(rcu_preempt_depth())) {  | 
|---|
 | 701 | +		rcu_qs(); /* Report immediate QS. */  | 
|---|
| 742 | 702 |  		return; | 
|---|
| 743 | 703 |  	} | 
|---|
| 744 |  | -	if (t->rcu_read_lock_nesting > 0 &&  | 
|---|
| 745 |  | -	    __this_cpu_read(rcu_data_p->core_needs_qs) &&  | 
|---|
| 746 |  | -	    __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) &&  | 
|---|
 | 704 | +  | 
|---|
 | 705 | +	/* If GP is oldish, ask for help from rcu_read_unlock_special(). */  | 
|---|
 | 706 | +	if (rcu_preempt_depth() > 0 &&  | 
|---|
 | 707 | +	    __this_cpu_read(rcu_data.core_needs_qs) &&  | 
|---|
 | 708 | +	    __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&  | 
|---|
| 747 | 709 |  	    !t->rcu_read_unlock_special.b.need_qs && | 
|---|
| 748 |  | -	    time_after(jiffies, rsp->gp_start + HZ))  | 
|---|
 | 710 | +	    time_after(jiffies, rcu_state.gp_start + HZ))  | 
|---|
| 749 | 711 |  		t->rcu_read_unlock_special.b.need_qs = true; | 
|---|
| 750 |  | -}  | 
|---|
| 751 |  | -  | 
|---|
| 752 |  | -/**  | 
|---|
| 753 |  | - * call_rcu() - Queue an RCU callback for invocation after a grace period.  | 
|---|
| 754 |  | - * @head: structure to be used for queueing the RCU updates.  | 
|---|
| 755 |  | - * @func: actual callback function to be invoked after the grace period  | 
|---|
| 756 |  | - *  | 
|---|
| 757 |  | - * The callback function will be invoked some time after a full grace  | 
|---|
| 758 |  | - * period elapses, in other words after all pre-existing RCU read-side  | 
|---|
| 759 |  | - * critical sections have completed.  However, the callback function  | 
|---|
| 760 |  | - * might well execute concurrently with RCU read-side critical sections  | 
|---|
| 761 |  | - * that started after call_rcu() was invoked.  RCU read-side critical  | 
|---|
| 762 |  | - * sections are delimited by rcu_read_lock() and rcu_read_unlock(),  | 
|---|
| 763 |  | - * and may be nested.  | 
|---|
| 764 |  | - *  | 
|---|
| 765 |  | - * Note that all CPUs must agree that the grace period extended beyond  | 
|---|
| 766 |  | - * all pre-existing RCU read-side critical section.  On systems with more  | 
|---|
| 767 |  | - * than one CPU, this means that when "func()" is invoked, each CPU is  | 
|---|
| 768 |  | - * guaranteed to have executed a full memory barrier since the end of its  | 
|---|
| 769 |  | - * last RCU read-side critical section whose beginning preceded the call  | 
|---|
| 770 |  | - * to call_rcu().  It also means that each CPU executing an RCU read-side  | 
|---|
| 771 |  | - * critical section that continues beyond the start of "func()" must have  | 
|---|
| 772 |  | - * executed a memory barrier after the call_rcu() but before the beginning  | 
|---|
| 773 |  | - * of that RCU read-side critical section.  Note that these guarantees  | 
|---|
| 774 |  | - * include CPUs that are offline, idle, or executing in user mode, as  | 
|---|
| 775 |  | - * well as CPUs that are executing in the kernel.  | 
|---|
| 776 |  | - *  | 
|---|
| 777 |  | - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the  | 
|---|
| 778 |  | - * resulting RCU callback function "func()", then both CPU A and CPU B are  | 
|---|
| 779 |  | - * guaranteed to execute a full memory barrier during the time interval  | 
|---|
| 780 |  | - * between the call to call_rcu() and the invocation of "func()" -- even  | 
|---|
| 781 |  | - * if CPU A and CPU B are the same CPU (but again only if the system has  | 
|---|
| 782 |  | - * more than one CPU).  | 
|---|
| 783 |  | - */  | 
|---|
| 784 |  | -void call_rcu(struct rcu_head *head, rcu_callback_t func)  | 
|---|
| 785 |  | -{  | 
|---|
| 786 |  | -	__call_rcu(head, func, rcu_state_p, -1, 0);  | 
|---|
| 787 |  | -}  | 
|---|
| 788 |  | -EXPORT_SYMBOL_GPL(call_rcu);  | 
|---|
| 789 |  | -  | 
|---|
| 790 |  | -/**  | 
|---|
| 791 |  | - * synchronize_rcu - wait until a grace period has elapsed.  | 
|---|
| 792 |  | - *  | 
|---|
| 793 |  | - * Control will return to the caller some time after a full grace  | 
|---|
| 794 |  | - * period has elapsed, in other words after all currently executing RCU  | 
|---|
| 795 |  | - * read-side critical sections have completed.  Note, however, that  | 
|---|
| 796 |  | - * upon return from synchronize_rcu(), the caller might well be executing  | 
|---|
| 797 |  | - * concurrently with new RCU read-side critical sections that began while  | 
|---|
| 798 |  | - * synchronize_rcu() was waiting.  RCU read-side critical sections are  | 
|---|
| 799 |  | - * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.  | 
|---|
| 800 |  | - *  | 
|---|
| 801 |  | - * See the description of synchronize_sched() for more detailed  | 
|---|
| 802 |  | - * information on memory-ordering guarantees.  However, please note  | 
|---|
| 803 |  | - * that -only- the memory-ordering guarantees apply.  For example,  | 
|---|
| 804 |  | - * synchronize_rcu() is -not- guaranteed to wait on things like code  | 
|---|
| 805 |  | - * protected by preempt_disable(), instead, synchronize_rcu() is -only-  | 
|---|
| 806 |  | - * guaranteed to wait on RCU read-side critical sections, that is, sections  | 
|---|
| 807 |  | - * of code protected by rcu_read_lock().  | 
|---|
| 808 |  | - */  | 
|---|
| 809 |  | -void synchronize_rcu(void)  | 
|---|
| 810 |  | -{  | 
|---|
| 811 |  | -	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||  | 
|---|
| 812 |  | -			 lock_is_held(&rcu_lock_map) ||  | 
|---|
| 813 |  | -			 lock_is_held(&rcu_sched_lock_map),  | 
|---|
| 814 |  | -			 "Illegal synchronize_rcu() in RCU read-side critical section");  | 
|---|
| 815 |  | -	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)  | 
|---|
| 816 |  | -		return;  | 
|---|
| 817 |  | -	if (rcu_gp_is_expedited())  | 
|---|
| 818 |  | -		synchronize_rcu_expedited();  | 
|---|
| 819 |  | -	else  | 
|---|
| 820 |  | -		wait_rcu_gp(call_rcu);  | 
|---|
| 821 |  | -}  | 
|---|
| 822 |  | -EXPORT_SYMBOL_GPL(synchronize_rcu);  | 
|---|
| 823 |  | -  | 
|---|
| 824 |  | -/**  | 
|---|
| 825 |  | - * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.  | 
|---|
| 826 |  | - *  | 
|---|
| 827 |  | - * Note that this primitive does not necessarily wait for an RCU grace period  | 
|---|
| 828 |  | - * to complete.  For example, if there are no RCU callbacks queued anywhere  | 
|---|
| 829 |  | - * in the system, then rcu_barrier() is within its rights to return  | 
|---|
| 830 |  | - * immediately, without waiting for anything, much less an RCU grace period.  | 
|---|
| 831 |  | - */  | 
|---|
| 832 |  | -void rcu_barrier(void)  | 
|---|
| 833 |  | -{  | 
|---|
| 834 |  | -	_rcu_barrier(rcu_state_p);  | 
|---|
| 835 |  | -}  | 
|---|
| 836 |  | -EXPORT_SYMBOL_GPL(rcu_barrier);  | 
|---|
| 837 |  | -  | 
|---|
| 838 |  | -/*  | 
|---|
| 839 |  | - * Initialize preemptible RCU's state structures.  | 
|---|
| 840 |  | - */  | 
|---|
| 841 |  | -static void __init __rcu_init_preempt(void)  | 
|---|
| 842 |  | -{  | 
|---|
| 843 |  | -	rcu_init_one(rcu_state_p);  | 
|---|
| 844 | 712 |  } | 
|---|
| 845 | 713 |   | 
|---|
| 846 | 714 |  /* | 
|---|
| 847 | 715 |   * Check for a task exiting while in a preemptible-RCU read-side | 
|---|
| 848 |  | - * critical section, clean up if so.  No need to issue warnings,  | 
|---|
| 849 |  | - * as debug_check_no_locks_held() already does this if lockdep  | 
|---|
| 850 |  | - * is enabled.  | 
|---|
 | 716 | + * critical section, clean up if so.  No need to issue warnings, as  | 
|---|
 | 717 | + * debug_check_no_locks_held() already does this if lockdep is enabled.  | 
|---|
 | 718 | + * Besides, if this function does anything other than just immediately  | 
|---|
 | 719 | + * return, there was a bug of some sort.  Spewing warnings from this  | 
|---|
 | 720 | + * function is like as not to simply obscure important prior warnings.  | 
|---|
| 851 | 721 |   */ | 
|---|
| 852 | 722 |  void exit_rcu(void) | 
|---|
| 853 | 723 |  { | 
|---|
| 854 | 724 |  	struct task_struct *t = current; | 
|---|
| 855 | 725 |   | 
|---|
| 856 |  | -	if (likely(list_empty(¤t->rcu_node_entry)))  | 
|---|
 | 726 | +	if (unlikely(!list_empty(¤t->rcu_node_entry))) {  | 
|---|
 | 727 | +		rcu_preempt_depth_set(1);  | 
|---|
 | 728 | +		barrier();  | 
|---|
 | 729 | +		WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);  | 
|---|
 | 730 | +	} else if (unlikely(rcu_preempt_depth())) {  | 
|---|
 | 731 | +		rcu_preempt_depth_set(1);  | 
|---|
 | 732 | +	} else {  | 
|---|
| 857 | 733 |  		return; | 
|---|
| 858 |  | -	t->rcu_read_lock_nesting = 1;  | 
|---|
| 859 |  | -	barrier();  | 
|---|
| 860 |  | -	t->rcu_read_unlock_special.b.blocked = true;  | 
|---|
 | 734 | +	}  | 
|---|
| 861 | 735 |  	__rcu_read_unlock(); | 
|---|
 | 736 | +	rcu_preempt_deferred_qs(current);  | 
|---|
| 862 | 737 |  } | 
|---|
| 863 | 738 |   | 
|---|
| 864 | 739 |  /* | 
|---|
| .. | .. | 
|---|
| 866 | 741 |   * specified number of elements. | 
|---|
| 867 | 742 |   */ | 
|---|
| 868 | 743 |  static void | 
|---|
| 869 |  | -dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)  | 
|---|
 | 744 | +dump_blkd_tasks(struct rcu_node *rnp, int ncheck)  | 
|---|
| 870 | 745 |  { | 
|---|
| 871 | 746 |  	int cpu; | 
|---|
| 872 | 747 |  	int i; | 
|---|
| .. | .. | 
|---|
| 878 | 753 |  	raw_lockdep_assert_held_rcu_node(rnp); | 
|---|
| 879 | 754 |  	pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", | 
|---|
| 880 | 755 |  		__func__, rnp->grplo, rnp->grphi, rnp->level, | 
|---|
| 881 |  | -		(long)rnp->gp_seq, (long)rnp->completedqs);  | 
|---|
 | 756 | +		(long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);  | 
|---|
| 882 | 757 |  	for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent) | 
|---|
| 883 | 758 |  		pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", | 
|---|
| 884 | 759 |  			__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext); | 
|---|
| 885 | 760 |  	pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n", | 
|---|
| 886 |  | -		__func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,  | 
|---|
| 887 |  | -		rnp->exp_tasks);  | 
|---|
 | 761 | +		__func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),  | 
|---|
 | 762 | +		READ_ONCE(rnp->exp_tasks));  | 
|---|
| 888 | 763 |  	pr_info("%s: ->blkd_tasks", __func__); | 
|---|
| 889 | 764 |  	i = 0; | 
|---|
| 890 | 765 |  	list_for_each(lhp, &rnp->blkd_tasks) { | 
|---|
| 891 | 766 |  		pr_cont(" %p", lhp); | 
|---|
| 892 |  | -		if (++i >= 10)  | 
|---|
 | 767 | +		if (++i >= ncheck)  | 
|---|
| 893 | 768 |  			break; | 
|---|
| 894 | 769 |  	} | 
|---|
| 895 | 770 |  	pr_cont("\n"); | 
|---|
| 896 | 771 |  	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { | 
|---|
| 897 |  | -		rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
 | 772 | +		rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 898 | 773 |  		onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); | 
|---|
| 899 | 774 |  		pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n", | 
|---|
| 900 | 775 |  			cpu, ".o"[onl], | 
|---|
| .. | .. | 
|---|
| 905 | 780 |   | 
|---|
| 906 | 781 |  #else /* #ifdef CONFIG_PREEMPT_RCU */ | 
|---|
| 907 | 782 |   | 
|---|
| 908 |  | -static struct rcu_state *const rcu_state_p = &rcu_sched_state;  | 
|---|
 | 783 | +/*  | 
|---|
 | 784 | + * If strict grace periods are enabled, and if the calling  | 
|---|
 | 785 | + * __rcu_read_unlock() marks the beginning of a quiescent state, immediately  | 
|---|
 | 786 | + * report that quiescent state and, if requested, spin for a bit.  | 
|---|
 | 787 | + */  | 
|---|
 | 788 | +void rcu_read_unlock_strict(void)  | 
|---|
 | 789 | +{  | 
|---|
 | 790 | +	struct rcu_data *rdp;  | 
|---|
 | 791 | +  | 
|---|
 | 792 | +	if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||  | 
|---|
 | 793 | +	   irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)  | 
|---|
 | 794 | +		return;  | 
|---|
 | 795 | +	rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 796 | +	rcu_report_qs_rdp(rdp);  | 
|---|
 | 797 | +	udelay(rcu_unlock_delay);  | 
|---|
 | 798 | +}  | 
|---|
 | 799 | +EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);  | 
|---|
| 909 | 800 |   | 
|---|
| 910 | 801 |  /* | 
|---|
| 911 | 802 |   * Tell them what RCU they are running. | 
|---|
| .. | .. | 
|---|
| 917 | 808 |  } | 
|---|
| 918 | 809 |   | 
|---|
| 919 | 810 |  /* | 
|---|
| 920 |  | - * Because preemptible RCU does not exist, we never have to check for  | 
|---|
| 921 |  | - * CPUs being in quiescent states.  | 
|---|
 | 811 | + * Note a quiescent state for PREEMPTION=n.  Because we do not need to know  | 
|---|
 | 812 | + * how many quiescent states passed, just if there was at least one since  | 
|---|
 | 813 | + * the start of the grace period, this just sets a flag.  The caller must  | 
|---|
 | 814 | + * have disabled preemption.  | 
|---|
| 922 | 815 |   */ | 
|---|
| 923 |  | -static void rcu_preempt_note_context_switch(bool preempt)  | 
|---|
 | 816 | +static void rcu_qs(void)  | 
|---|
| 924 | 817 |  { | 
|---|
 | 818 | +	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");  | 
|---|
 | 819 | +	if (!__this_cpu_read(rcu_data.cpu_no_qs.s))  | 
|---|
 | 820 | +		return;  | 
|---|
 | 821 | +	trace_rcu_grace_period(TPS("rcu_sched"),  | 
|---|
 | 822 | +			       __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));  | 
|---|
 | 823 | +	__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);  | 
|---|
 | 824 | +	if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))  | 
|---|
 | 825 | +		return;  | 
|---|
 | 826 | +	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);  | 
|---|
 | 827 | +	rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));  | 
|---|
| 925 | 828 |  } | 
|---|
 | 829 | +  | 
|---|
 | 830 | +/*  | 
|---|
 | 831 | + * Register an urgently needed quiescent state.  If there is an  | 
|---|
 | 832 | + * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight  | 
|---|
 | 833 | + * dyntick-idle quiescent state visible to other CPUs, which will in  | 
|---|
 | 834 | + * some cases serve for expedited as well as normal grace periods.  | 
|---|
 | 835 | + * Either way, register a lightweight quiescent state.  | 
|---|
 | 836 | + */  | 
|---|
 | 837 | +void rcu_all_qs(void)  | 
|---|
 | 838 | +{  | 
|---|
 | 839 | +	unsigned long flags;  | 
|---|
 | 840 | +  | 
|---|
 | 841 | +	if (!raw_cpu_read(rcu_data.rcu_urgent_qs))  | 
|---|
 | 842 | +		return;  | 
|---|
 | 843 | +	preempt_disable();  | 
|---|
 | 844 | +	/* Load rcu_urgent_qs before other flags. */  | 
|---|
 | 845 | +	if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {  | 
|---|
 | 846 | +		preempt_enable();  | 
|---|
 | 847 | +		return;  | 
|---|
 | 848 | +	}  | 
|---|
 | 849 | +	this_cpu_write(rcu_data.rcu_urgent_qs, false);  | 
|---|
 | 850 | +	if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {  | 
|---|
 | 851 | +		local_irq_save(flags);  | 
|---|
 | 852 | +		rcu_momentary_dyntick_idle();  | 
|---|
 | 853 | +		local_irq_restore(flags);  | 
|---|
 | 854 | +	}  | 
|---|
 | 855 | +	rcu_qs();  | 
|---|
 | 856 | +	preempt_enable();  | 
|---|
 | 857 | +}  | 
|---|
 | 858 | +EXPORT_SYMBOL_GPL(rcu_all_qs);  | 
|---|
 | 859 | +  | 
|---|
 | 860 | +/*  | 
|---|
 | 861 | + * Note a PREEMPTION=n context switch. The caller must have disabled interrupts.  | 
|---|
 | 862 | + */  | 
|---|
 | 863 | +void rcu_note_context_switch(bool preempt)  | 
|---|
 | 864 | +{  | 
|---|
 | 865 | +	trace_rcu_utilization(TPS("Start context switch"));  | 
|---|
 | 866 | +	rcu_qs();  | 
|---|
 | 867 | +	/* Load rcu_urgent_qs before other flags. */  | 
|---|
 | 868 | +	if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))  | 
|---|
 | 869 | +		goto out;  | 
|---|
 | 870 | +	this_cpu_write(rcu_data.rcu_urgent_qs, false);  | 
|---|
 | 871 | +	if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))  | 
|---|
 | 872 | +		rcu_momentary_dyntick_idle();  | 
|---|
 | 873 | +	rcu_tasks_qs(current, preempt);  | 
|---|
 | 874 | +out:  | 
|---|
 | 875 | +	trace_rcu_utilization(TPS("End context switch"));  | 
|---|
 | 876 | +}  | 
|---|
 | 877 | +EXPORT_SYMBOL_GPL(rcu_note_context_switch);  | 
|---|
| 926 | 878 |   | 
|---|
| 927 | 879 |  /* | 
|---|
| 928 | 880 |   * Because preemptible RCU does not exist, there are never any preempted | 
|---|
| .. | .. | 
|---|
| 942 | 894 |  } | 
|---|
| 943 | 895 |   | 
|---|
| 944 | 896 |  /* | 
|---|
| 945 |  | - * Because preemptible RCU does not exist, we never have to check for  | 
|---|
| 946 |  | - * tasks blocked within RCU read-side critical sections.  | 
|---|
 | 897 | + * Because there is no preemptible RCU, there can be no deferred quiescent  | 
|---|
 | 898 | + * states.  | 
|---|
| 947 | 899 |   */ | 
|---|
| 948 |  | -static void rcu_print_detail_task_stall(struct rcu_state *rsp)  | 
|---|
 | 900 | +static bool rcu_preempt_need_deferred_qs(struct task_struct *t)  | 
|---|
| 949 | 901 |  { | 
|---|
 | 902 | +	return false;  | 
|---|
| 950 | 903 |  } | 
|---|
| 951 |  | -  | 
|---|
| 952 |  | -/*  | 
|---|
| 953 |  | - * Because preemptible RCU does not exist, we never have to check for  | 
|---|
| 954 |  | - * tasks blocked within RCU read-side critical sections.  | 
|---|
| 955 |  | - */  | 
|---|
| 956 |  | -static int rcu_print_task_stall(struct rcu_node *rnp)  | 
|---|
| 957 |  | -{  | 
|---|
| 958 |  | -	return 0;  | 
|---|
| 959 |  | -}  | 
|---|
| 960 |  | -  | 
|---|
| 961 |  | -/*  | 
|---|
| 962 |  | - * Because preemptible RCU does not exist, we never have to check for  | 
|---|
| 963 |  | - * tasks blocked within RCU read-side critical sections that are  | 
|---|
| 964 |  | - * blocking the current expedited grace period.  | 
|---|
| 965 |  | - */  | 
|---|
| 966 |  | -static int rcu_print_task_exp_stall(struct rcu_node *rnp)  | 
|---|
| 967 |  | -{  | 
|---|
| 968 |  | -	return 0;  | 
|---|
| 969 |  | -}  | 
|---|
 | 904 | +static void rcu_preempt_deferred_qs(struct task_struct *t) { }  | 
|---|
| 970 | 905 |   | 
|---|
| 971 | 906 |  /* | 
|---|
| 972 | 907 |   * Because there is no preemptible RCU, there can be no readers blocked, | 
|---|
| 973 | 908 |   * so there is no need to check for blocked tasks.  So check only for | 
|---|
| 974 | 909 |   * bogus qsmask values. | 
|---|
| 975 | 910 |   */ | 
|---|
| 976 |  | -static void  | 
|---|
| 977 |  | -rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)  | 
|---|
 | 911 | +static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)  | 
|---|
| 978 | 912 |  { | 
|---|
| 979 | 913 |  	WARN_ON_ONCE(rnp->qsmask); | 
|---|
| 980 | 914 |  } | 
|---|
| 981 | 915 |   | 
|---|
| 982 | 916 |  /* | 
|---|
| 983 |  | - * Because preemptible RCU does not exist, it never has any callbacks  | 
|---|
| 984 |  | - * to check.  | 
|---|
 | 917 | + * Check to see if this CPU is in a non-context-switch quiescent state,  | 
|---|
 | 918 | + * namely user mode and idle loop.  | 
|---|
| 985 | 919 |   */ | 
|---|
| 986 |  | -static void rcu_preempt_check_callbacks(void)  | 
|---|
 | 920 | +static void rcu_flavor_sched_clock_irq(int user)  | 
|---|
| 987 | 921 |  { | 
|---|
| 988 |  | -}  | 
|---|
 | 922 | +	if (user || rcu_is_cpu_rrupt_from_idle()) {  | 
|---|
| 989 | 923 |   | 
|---|
| 990 |  | -/*  | 
|---|
| 991 |  | - * Because preemptible RCU does not exist, rcu_barrier() is just  | 
|---|
| 992 |  | - * another name for rcu_barrier_sched().  | 
|---|
| 993 |  | - */  | 
|---|
| 994 |  | -void rcu_barrier(void)  | 
|---|
| 995 |  | -{  | 
|---|
| 996 |  | -	rcu_barrier_sched();  | 
|---|
| 997 |  | -}  | 
|---|
| 998 |  | -EXPORT_SYMBOL_GPL(rcu_barrier);  | 
|---|
 | 924 | +		/*  | 
|---|
 | 925 | +		 * Get here if this CPU took its interrupt from user  | 
|---|
 | 926 | +		 * mode or from the idle loop, and if this is not a  | 
|---|
 | 927 | +		 * nested interrupt.  In this case, the CPU is in  | 
|---|
 | 928 | +		 * a quiescent state, so note it.  | 
|---|
 | 929 | +		 *  | 
|---|
 | 930 | +		 * No memory barrier is required here because rcu_qs()  | 
|---|
 | 931 | +		 * references only CPU-local variables that other CPUs  | 
|---|
 | 932 | +		 * neither access nor modify, at least not while the  | 
|---|
 | 933 | +		 * corresponding CPU is online.  | 
|---|
 | 934 | +		 */  | 
|---|
| 999 | 935 |   | 
|---|
| 1000 |  | -/*  | 
|---|
| 1001 |  | - * Because preemptible RCU does not exist, it need not be initialized.  | 
|---|
| 1002 |  | - */  | 
|---|
| 1003 |  | -static void __init __rcu_init_preempt(void)  | 
|---|
| 1004 |  | -{  | 
|---|
 | 936 | +		rcu_qs();  | 
|---|
 | 937 | +	}  | 
|---|
| 1005 | 938 |  } | 
|---|
| 1006 | 939 |   | 
|---|
| 1007 | 940 |  /* | 
|---|
| .. | .. | 
|---|
| 1016 | 949 |   * Dump the guaranteed-empty blocked-tasks state.  Trust but verify. | 
|---|
| 1017 | 950 |   */ | 
|---|
| 1018 | 951 |  static void | 
|---|
| 1019 |  | -dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)  | 
|---|
 | 952 | +dump_blkd_tasks(struct rcu_node *rnp, int ncheck)  | 
|---|
| 1020 | 953 |  { | 
|---|
| 1021 | 954 |  	WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks)); | 
|---|
| 1022 | 955 |  } | 
|---|
| 1023 | 956 |   | 
|---|
| 1024 | 957 |  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | 
|---|
| 1025 | 958 |   | 
|---|
| 1026 |  | -#ifdef CONFIG_RCU_BOOST  | 
|---|
| 1027 |  | -  | 
|---|
| 1028 |  | -static void rcu_wake_cond(struct task_struct *t, int status)  | 
|---|
 | 959 | +/*  | 
|---|
 | 960 | + * If boosting, set rcuc kthreads to realtime priority.  | 
|---|
 | 961 | + */  | 
|---|
 | 962 | +static void rcu_cpu_kthread_setup(unsigned int cpu)  | 
|---|
| 1029 | 963 |  { | 
|---|
| 1030 |  | -	/*  | 
|---|
| 1031 |  | -	 * If the thread is yielding, only wake it when this  | 
|---|
| 1032 |  | -	 * is invoked from idle  | 
|---|
| 1033 |  | -	 */  | 
|---|
| 1034 |  | -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))  | 
|---|
| 1035 |  | -		wake_up_process(t);  | 
|---|
 | 964 | +#ifdef CONFIG_RCU_BOOST  | 
|---|
 | 965 | +	struct sched_param sp;  | 
|---|
 | 966 | +  | 
|---|
 | 967 | +	sp.sched_priority = kthread_prio;  | 
|---|
 | 968 | +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);  | 
|---|
 | 969 | +#endif /* #ifdef CONFIG_RCU_BOOST */  | 
|---|
| 1036 | 970 |  } | 
|---|
 | 971 | +  | 
|---|
 | 972 | +#ifdef CONFIG_RCU_BOOST  | 
|---|
| 1037 | 973 |   | 
|---|
| 1038 | 974 |  /* | 
|---|
| 1039 | 975 |   * Carry out RCU priority boosting on the task indicated by ->exp_tasks | 
|---|
| .. | .. | 
|---|
| 1113 | 1049 |   | 
|---|
| 1114 | 1050 |  	trace_rcu_utilization(TPS("Start boost kthread@init")); | 
|---|
| 1115 | 1051 |  	for (;;) { | 
|---|
| 1116 |  | -		rnp->boost_kthread_status = RCU_KTHREAD_WAITING;  | 
|---|
 | 1052 | +		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);  | 
|---|
| 1117 | 1053 |  		trace_rcu_utilization(TPS("End boost kthread@rcu_wait")); | 
|---|
| 1118 |  | -		rcu_wait(rnp->boost_tasks || rnp->exp_tasks);  | 
|---|
 | 1054 | +		rcu_wait(READ_ONCE(rnp->boost_tasks) ||  | 
|---|
 | 1055 | +			 READ_ONCE(rnp->exp_tasks));  | 
|---|
| 1119 | 1056 |  		trace_rcu_utilization(TPS("Start boost kthread@rcu_wait")); | 
|---|
| 1120 |  | -		rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;  | 
|---|
 | 1057 | +		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);  | 
|---|
| 1121 | 1058 |  		more2boost = rcu_boost(rnp); | 
|---|
| 1122 | 1059 |  		if (more2boost) | 
|---|
| 1123 | 1060 |  			spincnt++; | 
|---|
| 1124 | 1061 |  		else | 
|---|
| 1125 | 1062 |  			spincnt = 0; | 
|---|
| 1126 | 1063 |  		if (spincnt > 10) { | 
|---|
| 1127 |  | -			rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;  | 
|---|
 | 1064 | +			WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);  | 
|---|
| 1128 | 1065 |  			trace_rcu_utilization(TPS("End boost kthread@rcu_yield")); | 
|---|
| 1129 |  | -			schedule_timeout_interruptible(2);  | 
|---|
 | 1066 | +			schedule_timeout_idle(2);  | 
|---|
| 1130 | 1067 |  			trace_rcu_utilization(TPS("Start boost kthread@rcu_yield")); | 
|---|
| 1131 | 1068 |  			spincnt = 0; | 
|---|
| 1132 | 1069 |  		} | 
|---|
| .. | .. | 
|---|
| 1149 | 1086 |  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | 
|---|
| 1150 | 1087 |  	__releases(rnp->lock) | 
|---|
| 1151 | 1088 |  { | 
|---|
| 1152 |  | -	struct task_struct *t;  | 
|---|
| 1153 |  | -  | 
|---|
| 1154 | 1089 |  	raw_lockdep_assert_held_rcu_node(rnp); | 
|---|
| 1155 | 1090 |  	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { | 
|---|
| 1156 | 1091 |  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| .. | .. | 
|---|
| 1160 | 1095 |  	    (rnp->gp_tasks != NULL && | 
|---|
| 1161 | 1096 |  	     rnp->boost_tasks == NULL && | 
|---|
| 1162 | 1097 |  	     rnp->qsmask == 0 && | 
|---|
| 1163 |  | -	     ULONG_CMP_GE(jiffies, rnp->boost_time))) {  | 
|---|
 | 1098 | +	     (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {  | 
|---|
| 1164 | 1099 |  		if (rnp->exp_tasks == NULL) | 
|---|
| 1165 |  | -			rnp->boost_tasks = rnp->gp_tasks;  | 
|---|
 | 1100 | +			WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);  | 
|---|
| 1166 | 1101 |  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 1167 |  | -		t = rnp->boost_kthread_task;  | 
|---|
| 1168 |  | -		if (t)  | 
|---|
| 1169 |  | -			rcu_wake_cond(t, rnp->boost_kthread_status);  | 
|---|
 | 1102 | +		rcu_wake_cond(rnp->boost_kthread_task,  | 
|---|
 | 1103 | +			      READ_ONCE(rnp->boost_kthread_status));  | 
|---|
| 1170 | 1104 |  	} else { | 
|---|
| 1171 | 1105 |  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 1172 | 1106 |  	} | 
|---|
| 1173 |  | -}  | 
|---|
| 1174 |  | -  | 
|---|
| 1175 |  | -/*  | 
|---|
| 1176 |  | - * Wake up the per-CPU kthread to invoke RCU callbacks.  | 
|---|
| 1177 |  | - */  | 
|---|
| 1178 |  | -static void invoke_rcu_callbacks_kthread(void)  | 
|---|
| 1179 |  | -{  | 
|---|
| 1180 |  | -	unsigned long flags;  | 
|---|
| 1181 |  | -  | 
|---|
| 1182 |  | -	local_irq_save(flags);  | 
|---|
| 1183 |  | -	__this_cpu_write(rcu_cpu_has_work, 1);  | 
|---|
| 1184 |  | -	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&  | 
|---|
| 1185 |  | -	    current != __this_cpu_read(rcu_cpu_kthread_task)) {  | 
|---|
| 1186 |  | -		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),  | 
|---|
| 1187 |  | -			      __this_cpu_read(rcu_cpu_kthread_status));  | 
|---|
| 1188 |  | -	}  | 
|---|
| 1189 |  | -	local_irq_restore(flags);  | 
|---|
| 1190 | 1107 |  } | 
|---|
| 1191 | 1108 |   | 
|---|
| 1192 | 1109 |  /* | 
|---|
| .. | .. | 
|---|
| 1195 | 1112 |   */ | 
|---|
| 1196 | 1113 |  static bool rcu_is_callbacks_kthread(void) | 
|---|
| 1197 | 1114 |  { | 
|---|
| 1198 |  | -	return __this_cpu_read(rcu_cpu_kthread_task) == current;  | 
|---|
 | 1115 | +	return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;  | 
|---|
| 1199 | 1116 |  } | 
|---|
| 1200 | 1117 |   | 
|---|
| 1201 | 1118 |  #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) | 
|---|
| .. | .. | 
|---|
| 1213 | 1130 |   * already exist.  We only create this kthread for preemptible RCU. | 
|---|
| 1214 | 1131 |   * Returns zero if all is well, a negated errno otherwise. | 
|---|
| 1215 | 1132 |   */ | 
|---|
| 1216 |  | -static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,  | 
|---|
| 1217 |  | -				       struct rcu_node *rnp)  | 
|---|
 | 1133 | +static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)  | 
|---|
| 1218 | 1134 |  { | 
|---|
| 1219 |  | -	int rnp_index = rnp - &rsp->node[0];  | 
|---|
 | 1135 | +	int rnp_index = rnp - rcu_get_root();  | 
|---|
| 1220 | 1136 |  	unsigned long flags; | 
|---|
| 1221 | 1137 |  	struct sched_param sp; | 
|---|
| 1222 | 1138 |  	struct task_struct *t; | 
|---|
| 1223 | 1139 |   | 
|---|
| 1224 |  | -	if (rcu_state_p != rsp)  | 
|---|
| 1225 |  | -		return 0;  | 
|---|
 | 1140 | +	if (!IS_ENABLED(CONFIG_PREEMPT_RCU))  | 
|---|
 | 1141 | +		return;  | 
|---|
| 1226 | 1142 |   | 
|---|
| 1227 | 1143 |  	if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) | 
|---|
| 1228 |  | -		return 0;  | 
|---|
 | 1144 | +		return;  | 
|---|
| 1229 | 1145 |   | 
|---|
| 1230 |  | -	rsp->boost = 1;  | 
|---|
 | 1146 | +	rcu_state.boost = 1;  | 
|---|
 | 1147 | +  | 
|---|
| 1231 | 1148 |  	if (rnp->boost_kthread_task != NULL) | 
|---|
| 1232 |  | -		return 0;  | 
|---|
 | 1149 | +		return;  | 
|---|
 | 1150 | +  | 
|---|
| 1233 | 1151 |  	t = kthread_create(rcu_boost_kthread, (void *)rnp, | 
|---|
| 1234 | 1152 |  			   "rcub/%d", rnp_index); | 
|---|
| 1235 |  | -	if (IS_ERR(t))  | 
|---|
| 1236 |  | -		return PTR_ERR(t);  | 
|---|
 | 1153 | +	if (WARN_ON_ONCE(IS_ERR(t)))  | 
|---|
 | 1154 | +		return;  | 
|---|
 | 1155 | +  | 
|---|
| 1237 | 1156 |  	raw_spin_lock_irqsave_rcu_node(rnp, flags); | 
|---|
| 1238 | 1157 |  	rnp->boost_kthread_task = t; | 
|---|
| 1239 | 1158 |  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 1240 | 1159 |  	sp.sched_priority = kthread_prio; | 
|---|
| 1241 | 1160 |  	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 
|---|
| 1242 | 1161 |  	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 
|---|
| 1243 |  | -	return 0;  | 
|---|
| 1244 |  | -}  | 
|---|
| 1245 |  | -  | 
|---|
| 1246 |  | -static void rcu_kthread_do_work(void)  | 
|---|
| 1247 |  | -{  | 
|---|
| 1248 |  | -	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));  | 
|---|
| 1249 |  | -	rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));  | 
|---|
| 1250 |  | -	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));  | 
|---|
| 1251 |  | -}  | 
|---|
| 1252 |  | -  | 
|---|
| 1253 |  | -static void rcu_cpu_kthread_setup(unsigned int cpu)  | 
|---|
| 1254 |  | -{  | 
|---|
| 1255 |  | -	struct sched_param sp;  | 
|---|
| 1256 |  | -  | 
|---|
| 1257 |  | -	sp.sched_priority = kthread_prio;  | 
|---|
| 1258 |  | -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);  | 
|---|
| 1259 |  | -}  | 
|---|
| 1260 |  | -  | 
|---|
| 1261 |  | -static void rcu_cpu_kthread_park(unsigned int cpu)  | 
|---|
| 1262 |  | -{  | 
|---|
| 1263 |  | -	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;  | 
|---|
| 1264 |  | -}  | 
|---|
| 1265 |  | -  | 
|---|
| 1266 |  | -static int rcu_cpu_kthread_should_run(unsigned int cpu)  | 
|---|
| 1267 |  | -{  | 
|---|
| 1268 |  | -	return __this_cpu_read(rcu_cpu_has_work);  | 
|---|
| 1269 |  | -}  | 
|---|
| 1270 |  | -  | 
|---|
| 1271 |  | -/*  | 
|---|
| 1272 |  | - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the  | 
|---|
| 1273 |  | - * RCU softirq used in flavors and configurations of RCU that do not  | 
|---|
| 1274 |  | - * support RCU priority boosting.  | 
|---|
| 1275 |  | - */  | 
|---|
| 1276 |  | -static void rcu_cpu_kthread(unsigned int cpu)  | 
|---|
| 1277 |  | -{  | 
|---|
| 1278 |  | -	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);  | 
|---|
| 1279 |  | -	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);  | 
|---|
| 1280 |  | -	int spincnt;  | 
|---|
| 1281 |  | -  | 
|---|
| 1282 |  | -	for (spincnt = 0; spincnt < 10; spincnt++) {  | 
|---|
| 1283 |  | -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));  | 
|---|
| 1284 |  | -		local_bh_disable();  | 
|---|
| 1285 |  | -		*statusp = RCU_KTHREAD_RUNNING;  | 
|---|
| 1286 |  | -		this_cpu_inc(rcu_cpu_kthread_loops);  | 
|---|
| 1287 |  | -		local_irq_disable();  | 
|---|
| 1288 |  | -		work = *workp;  | 
|---|
| 1289 |  | -		*workp = 0;  | 
|---|
| 1290 |  | -		local_irq_enable();  | 
|---|
| 1291 |  | -		if (work)  | 
|---|
| 1292 |  | -			rcu_kthread_do_work();  | 
|---|
| 1293 |  | -		local_bh_enable();  | 
|---|
| 1294 |  | -		if (*workp == 0) {  | 
|---|
| 1295 |  | -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));  | 
|---|
| 1296 |  | -			*statusp = RCU_KTHREAD_WAITING;  | 
|---|
| 1297 |  | -			return;  | 
|---|
| 1298 |  | -		}  | 
|---|
| 1299 |  | -	}  | 
|---|
| 1300 |  | -	*statusp = RCU_KTHREAD_YIELDING;  | 
|---|
| 1301 |  | -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));  | 
|---|
| 1302 |  | -	schedule_timeout_interruptible(2);  | 
|---|
| 1303 |  | -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));  | 
|---|
| 1304 |  | -	*statusp = RCU_KTHREAD_WAITING;  | 
|---|
| 1305 | 1162 |  } | 
|---|
| 1306 | 1163 |   | 
|---|
| 1307 | 1164 |  /* | 
|---|
| .. | .. | 
|---|
| 1334 | 1191 |  	free_cpumask_var(cm); | 
|---|
| 1335 | 1192 |  } | 
|---|
| 1336 | 1193 |   | 
|---|
| 1337 |  | -static struct smp_hotplug_thread rcu_cpu_thread_spec = {  | 
|---|
| 1338 |  | -	.store			= &rcu_cpu_kthread_task,  | 
|---|
| 1339 |  | -	.thread_should_run	= rcu_cpu_kthread_should_run,  | 
|---|
| 1340 |  | -	.thread_fn		= rcu_cpu_kthread,  | 
|---|
| 1341 |  | -	.thread_comm		= "rcuc/%u",  | 
|---|
| 1342 |  | -	.setup			= rcu_cpu_kthread_setup,  | 
|---|
| 1343 |  | -	.park			= rcu_cpu_kthread_park,  | 
|---|
| 1344 |  | -};  | 
|---|
| 1345 |  | -  | 
|---|
| 1346 | 1194 |  /* | 
|---|
| 1347 | 1195 |   * Spawn boost kthreads -- called as soon as the scheduler is running. | 
|---|
| 1348 | 1196 |   */ | 
|---|
| 1349 | 1197 |  static void __init rcu_spawn_boost_kthreads(void) | 
|---|
| 1350 | 1198 |  { | 
|---|
| 1351 | 1199 |  	struct rcu_node *rnp; | 
|---|
| 1352 |  | -	int cpu;  | 
|---|
| 1353 | 1200 |   | 
|---|
| 1354 |  | -	for_each_possible_cpu(cpu)  | 
|---|
| 1355 |  | -		per_cpu(rcu_cpu_has_work, cpu) = 0;  | 
|---|
| 1356 |  | -	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));  | 
|---|
| 1357 |  | -	rcu_for_each_leaf_node(rcu_state_p, rnp)  | 
|---|
| 1358 |  | -		(void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);  | 
|---|
 | 1201 | +	rcu_for_each_leaf_node(rnp)  | 
|---|
 | 1202 | +		rcu_spawn_one_boost_kthread(rnp);  | 
|---|
| 1359 | 1203 |  } | 
|---|
| 1360 | 1204 |   | 
|---|
| 1361 | 1205 |  static void rcu_prepare_kthreads(int cpu) | 
|---|
| 1362 | 1206 |  { | 
|---|
| 1363 |  | -	struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);  | 
|---|
 | 1207 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 1364 | 1208 |  	struct rcu_node *rnp = rdp->mynode; | 
|---|
| 1365 | 1209 |   | 
|---|
| 1366 | 1210 |  	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ | 
|---|
| 1367 | 1211 |  	if (rcu_scheduler_fully_active) | 
|---|
| 1368 |  | -		(void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);  | 
|---|
 | 1212 | +		rcu_spawn_one_boost_kthread(rnp);  | 
|---|
| 1369 | 1213 |  } | 
|---|
| 1370 | 1214 |   | 
|---|
| 1371 | 1215 |  #else /* #ifdef CONFIG_RCU_BOOST */ | 
|---|
| .. | .. | 
|---|
| 1374 | 1218 |  	__releases(rnp->lock) | 
|---|
| 1375 | 1219 |  { | 
|---|
| 1376 | 1220 |  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 
|---|
| 1377 |  | -}  | 
|---|
| 1378 |  | -  | 
|---|
| 1379 |  | -static void invoke_rcu_callbacks_kthread(void)  | 
|---|
| 1380 |  | -{  | 
|---|
| 1381 |  | -	WARN_ON_ONCE(1);  | 
|---|
| 1382 | 1221 |  } | 
|---|
| 1383 | 1222 |   | 
|---|
| 1384 | 1223 |  static bool rcu_is_callbacks_kthread(void) | 
|---|
| .. | .. | 
|---|
| 1407 | 1246 |  #if !defined(CONFIG_RCU_FAST_NO_HZ) | 
|---|
| 1408 | 1247 |   | 
|---|
| 1409 | 1248 |  /* | 
|---|
| 1410 |  | - * Check to see if any future RCU-related work will need to be done  | 
|---|
| 1411 |  | - * by the current CPU, even if none need be done immediately, returning  | 
|---|
| 1412 |  | - * 1 if so.  This function is part of the RCU implementation; it is -not-  | 
|---|
| 1413 |  | - * an exported member of the RCU API.  | 
|---|
 | 1249 | + * Check to see if any future non-offloaded RCU-related work will need  | 
|---|
 | 1250 | + * to be done by the current CPU, even if none need be done immediately,  | 
|---|
 | 1251 | + * returning 1 if so.  This function is part of the RCU implementation;  | 
|---|
 | 1252 | + * it is -not- an exported member of the RCU API.  | 
|---|
| 1414 | 1253 |   * | 
|---|
| 1415 |  | - * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs  | 
|---|
| 1416 |  | - * any flavor of RCU.  | 
|---|
 | 1254 | + * Because we not have RCU_FAST_NO_HZ, just check whether or not this  | 
|---|
 | 1255 | + * CPU has RCU callbacks queued.  | 
|---|
| 1417 | 1256 |   */ | 
|---|
| 1418 | 1257 |  int rcu_needs_cpu(u64 basemono, u64 *nextevt) | 
|---|
| 1419 | 1258 |  { | 
|---|
| 1420 | 1259 |  	*nextevt = KTIME_MAX; | 
|---|
| 1421 |  | -	return rcu_cpu_has_callbacks(NULL);  | 
|---|
 | 1260 | +	return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&  | 
|---|
 | 1261 | +	       !rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist);  | 
|---|
| 1422 | 1262 |  } | 
|---|
| 1423 | 1263 |   | 
|---|
| 1424 | 1264 |  /* | 
|---|
| .. | .. | 
|---|
| 1437 | 1277 |  { | 
|---|
| 1438 | 1278 |  } | 
|---|
| 1439 | 1279 |   | 
|---|
| 1440 |  | -/*  | 
|---|
| 1441 |  | - * Don't bother keeping a running count of the number of RCU callbacks  | 
|---|
| 1442 |  | - * posted because CONFIG_RCU_FAST_NO_HZ=n.  | 
|---|
| 1443 |  | - */  | 
|---|
| 1444 |  | -static void rcu_idle_count_callbacks_posted(void)  | 
|---|
| 1445 |  | -{  | 
|---|
| 1446 |  | -}  | 
|---|
| 1447 |  | -  | 
|---|
| 1448 | 1280 |  #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 
|---|
| 1449 | 1281 |   | 
|---|
| 1450 | 1282 |  /* | 
|---|
| 1451 | 1283 |   * This code is invoked when a CPU goes idle, at which point we want | 
|---|
| 1452 | 1284 |   * to have the CPU do everything required for RCU so that it can enter | 
|---|
| 1453 |  | - * the energy-efficient dyntick-idle mode.  This is handled by a  | 
|---|
| 1454 |  | - * state machine implemented by rcu_prepare_for_idle() below.  | 
|---|
 | 1285 | + * the energy-efficient dyntick-idle mode.  | 
|---|
| 1455 | 1286 |   * | 
|---|
| 1456 |  | - * The following three proprocessor symbols control this state machine:  | 
|---|
 | 1287 | + * The following preprocessor symbol controls this:  | 
|---|
| 1457 | 1288 |   * | 
|---|
| 1458 | 1289 |   * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted | 
|---|
| 1459 | 1290 |   *	to sleep in dyntick-idle mode with RCU callbacks pending.  This | 
|---|
| .. | .. | 
|---|
| 1462 | 1293 |   *	number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your | 
|---|
| 1463 | 1294 |   *	system.  And if you are -that- concerned about energy efficiency, | 
|---|
| 1464 | 1295 |   *	just power the system down and be done with it! | 
|---|
| 1465 |  | - * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is  | 
|---|
| 1466 |  | - *	permitted to sleep in dyntick-idle mode with only lazy RCU  | 
|---|
| 1467 |  | - *	callbacks pending.  Setting this too high can OOM your system.  | 
|---|
| 1468 | 1296 |   * | 
|---|
| 1469 |  | - * The values below work well in practice.  If future workloads require  | 
|---|
 | 1297 | + * The value below works well in practice.  If future workloads require  | 
|---|
| 1470 | 1298 |   * adjustment, they can be converted into kernel config parameters, though | 
|---|
| 1471 | 1299 |   * making the state machine smarter might be a better option. | 
|---|
| 1472 | 1300 |   */ | 
|---|
| 1473 | 1301 |  #define RCU_IDLE_GP_DELAY 4		/* Roughly one grace period. */ | 
|---|
| 1474 |  | -#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)	/* Roughly six seconds. */  | 
|---|
| 1475 | 1302 |   | 
|---|
| 1476 | 1303 |  static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; | 
|---|
| 1477 | 1304 |  module_param(rcu_idle_gp_delay, int, 0644); | 
|---|
| 1478 |  | -static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;  | 
|---|
| 1479 |  | -module_param(rcu_idle_lazy_gp_delay, int, 0644);  | 
|---|
| 1480 | 1305 |   | 
|---|
| 1481 | 1306 |  /* | 
|---|
| 1482 |  | - * Try to advance callbacks for all flavors of RCU on the current CPU, but  | 
|---|
| 1483 |  | - * only if it has been awhile since the last time we did so.  Afterwards,  | 
|---|
| 1484 |  | - * if there are any callbacks ready for immediate invocation, return true.  | 
|---|
 | 1307 | + * Try to advance callbacks on the current CPU, but only if it has been  | 
|---|
 | 1308 | + * awhile since the last time we did so.  Afterwards, if there are any  | 
|---|
 | 1309 | + * callbacks ready for immediate invocation, return true.  | 
|---|
| 1485 | 1310 |   */ | 
|---|
| 1486 | 1311 |  static bool __maybe_unused rcu_try_advance_all_cbs(void) | 
|---|
| 1487 | 1312 |  { | 
|---|
| 1488 | 1313 |  	bool cbs_ready = false; | 
|---|
| 1489 |  | -	struct rcu_data *rdp;  | 
|---|
| 1490 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 1314 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 1491 | 1315 |  	struct rcu_node *rnp; | 
|---|
| 1492 |  | -	struct rcu_state *rsp;  | 
|---|
| 1493 | 1316 |   | 
|---|
| 1494 | 1317 |  	/* Exit early if we advanced recently. */ | 
|---|
| 1495 |  | -	if (jiffies == rdtp->last_advance_all)  | 
|---|
 | 1318 | +	if (jiffies == rdp->last_advance_all)  | 
|---|
| 1496 | 1319 |  		return false; | 
|---|
| 1497 |  | -	rdtp->last_advance_all = jiffies;  | 
|---|
 | 1320 | +	rdp->last_advance_all = jiffies;  | 
|---|
| 1498 | 1321 |   | 
|---|
| 1499 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 1500 |  | -		rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 1501 |  | -		rnp = rdp->mynode;  | 
|---|
 | 1322 | +	rnp = rdp->mynode;  | 
|---|
| 1502 | 1323 |   | 
|---|
| 1503 |  | -		/*  | 
|---|
| 1504 |  | -		 * Don't bother checking unless a grace period has  | 
|---|
| 1505 |  | -		 * completed since we last checked and there are  | 
|---|
| 1506 |  | -		 * callbacks not yet ready to invoke.  | 
|---|
| 1507 |  | -		 */  | 
|---|
| 1508 |  | -		if ((rcu_seq_completed_gp(rdp->gp_seq,  | 
|---|
| 1509 |  | -					  rcu_seq_current(&rnp->gp_seq)) ||  | 
|---|
| 1510 |  | -		     unlikely(READ_ONCE(rdp->gpwrap))) &&  | 
|---|
| 1511 |  | -		    rcu_segcblist_pend_cbs(&rdp->cblist))  | 
|---|
| 1512 |  | -			note_gp_changes(rsp, rdp);  | 
|---|
 | 1324 | +	/*  | 
|---|
 | 1325 | +	 * Don't bother checking unless a grace period has  | 
|---|
 | 1326 | +	 * completed since we last checked and there are  | 
|---|
 | 1327 | +	 * callbacks not yet ready to invoke.  | 
|---|
 | 1328 | +	 */  | 
|---|
 | 1329 | +	if ((rcu_seq_completed_gp(rdp->gp_seq,  | 
|---|
 | 1330 | +				  rcu_seq_current(&rnp->gp_seq)) ||  | 
|---|
 | 1331 | +	     unlikely(READ_ONCE(rdp->gpwrap))) &&  | 
|---|
 | 1332 | +	    rcu_segcblist_pend_cbs(&rdp->cblist))  | 
|---|
 | 1333 | +		note_gp_changes(rdp);  | 
|---|
| 1513 | 1334 |   | 
|---|
| 1514 |  | -		if (rcu_segcblist_ready_cbs(&rdp->cblist))  | 
|---|
| 1515 |  | -			cbs_ready = true;  | 
|---|
| 1516 |  | -	}  | 
|---|
 | 1335 | +	if (rcu_segcblist_ready_cbs(&rdp->cblist))  | 
|---|
 | 1336 | +		cbs_ready = true;  | 
|---|
| 1517 | 1337 |  	return cbs_ready; | 
|---|
| 1518 | 1338 |  } | 
|---|
| 1519 | 1339 |   | 
|---|
| 1520 | 1340 |  /* | 
|---|
| 1521 | 1341 |   * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | 
|---|
| 1522 | 1342 |   * to invoke.  If the CPU has callbacks, try to advance them.  Tell the | 
|---|
| 1523 |  | - * caller to set the timeout based on whether or not there are non-lazy  | 
|---|
| 1524 |  | - * callbacks.  | 
|---|
 | 1343 | + * caller about what to set the timeout.  | 
|---|
| 1525 | 1344 |   * | 
|---|
| 1526 | 1345 |   * The caller must have disabled interrupts. | 
|---|
| 1527 | 1346 |   */ | 
|---|
| 1528 | 1347 |  int rcu_needs_cpu(u64 basemono, u64 *nextevt) | 
|---|
| 1529 | 1348 |  { | 
|---|
| 1530 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 1349 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 1531 | 1350 |  	unsigned long dj; | 
|---|
| 1532 | 1351 |   | 
|---|
| 1533 | 1352 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 1534 | 1353 |   | 
|---|
| 1535 |  | -	/* Snapshot to detect later posting of non-lazy callback. */  | 
|---|
| 1536 |  | -	rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;  | 
|---|
| 1537 |  | -  | 
|---|
| 1538 |  | -	/* If no callbacks, RCU doesn't need the CPU. */  | 
|---|
| 1539 |  | -	if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) {  | 
|---|
 | 1354 | +	/* If no non-offloaded callbacks, RCU doesn't need the CPU. */  | 
|---|
 | 1355 | +	if (rcu_segcblist_empty(&rdp->cblist) ||  | 
|---|
 | 1356 | +	    rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist)) {  | 
|---|
| 1540 | 1357 |  		*nextevt = KTIME_MAX; | 
|---|
| 1541 | 1358 |  		return 0; | 
|---|
| 1542 | 1359 |  	} | 
|---|
| .. | .. | 
|---|
| 1547 | 1364 |  		invoke_rcu_core(); | 
|---|
| 1548 | 1365 |  		return 1; | 
|---|
| 1549 | 1366 |  	} | 
|---|
| 1550 |  | -	rdtp->last_accelerate = jiffies;  | 
|---|
 | 1367 | +	rdp->last_accelerate = jiffies;  | 
|---|
| 1551 | 1368 |   | 
|---|
| 1552 |  | -	/* Request timer delay depending on laziness, and round. */  | 
|---|
| 1553 |  | -	if (!rdtp->all_lazy) {  | 
|---|
| 1554 |  | -		dj = round_up(rcu_idle_gp_delay + jiffies,  | 
|---|
| 1555 |  | -			       rcu_idle_gp_delay) - jiffies;  | 
|---|
| 1556 |  | -	} else {  | 
|---|
| 1557 |  | -		dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;  | 
|---|
| 1558 |  | -	}  | 
|---|
 | 1369 | +	/* Request timer and round. */  | 
|---|
 | 1370 | +	dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;  | 
|---|
 | 1371 | +  | 
|---|
| 1559 | 1372 |  	*nextevt = basemono + dj * TICK_NSEC; | 
|---|
| 1560 | 1373 |  	return 0; | 
|---|
| 1561 | 1374 |  } | 
|---|
| 1562 | 1375 |   | 
|---|
| 1563 | 1376 |  /* | 
|---|
| 1564 |  | - * Prepare a CPU for idle from an RCU perspective.  The first major task  | 
|---|
| 1565 |  | - * is to sense whether nohz mode has been enabled or disabled via sysfs.  | 
|---|
| 1566 |  | - * The second major task is to check to see if a non-lazy callback has  | 
|---|
| 1567 |  | - * arrived at a CPU that previously had only lazy callbacks.  The third  | 
|---|
| 1568 |  | - * major task is to accelerate (that is, assign grace-period numbers to)  | 
|---|
| 1569 |  | - * any recently arrived callbacks.  | 
|---|
 | 1377 | + * Prepare a CPU for idle from an RCU perspective.  The first major task is to  | 
|---|
 | 1378 | + * sense whether nohz mode has been enabled or disabled via sysfs.  The second  | 
|---|
 | 1379 | + * major task is to accelerate (that is, assign grace-period numbers to) any  | 
|---|
 | 1380 | + * recently arrived callbacks.  | 
|---|
| 1570 | 1381 |   * | 
|---|
| 1571 | 1382 |   * The caller must have disabled interrupts. | 
|---|
| 1572 | 1383 |   */ | 
|---|
| 1573 | 1384 |  static void rcu_prepare_for_idle(void) | 
|---|
| 1574 | 1385 |  { | 
|---|
| 1575 | 1386 |  	bool needwake; | 
|---|
| 1576 |  | -	struct rcu_data *rdp;  | 
|---|
| 1577 |  | -	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);  | 
|---|
 | 1387 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
| 1578 | 1388 |  	struct rcu_node *rnp; | 
|---|
| 1579 |  | -	struct rcu_state *rsp;  | 
|---|
| 1580 | 1389 |  	int tne; | 
|---|
| 1581 | 1390 |   | 
|---|
| 1582 | 1391 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 1583 |  | -	if (rcu_is_nocb_cpu(smp_processor_id()))  | 
|---|
 | 1392 | +	if (rcu_segcblist_is_offloaded(&rdp->cblist))  | 
|---|
| 1584 | 1393 |  		return; | 
|---|
| 1585 | 1394 |   | 
|---|
| 1586 | 1395 |  	/* Handle nohz enablement switches conservatively. */ | 
|---|
| 1587 | 1396 |  	tne = READ_ONCE(tick_nohz_active); | 
|---|
| 1588 |  | -	if (tne != rdtp->tick_nohz_enabled_snap) {  | 
|---|
| 1589 |  | -		if (rcu_cpu_has_callbacks(NULL))  | 
|---|
 | 1397 | +	if (tne != rdp->tick_nohz_enabled_snap) {  | 
|---|
 | 1398 | +		if (!rcu_segcblist_empty(&rdp->cblist))  | 
|---|
| 1590 | 1399 |  			invoke_rcu_core(); /* force nohz to see update. */ | 
|---|
| 1591 |  | -		rdtp->tick_nohz_enabled_snap = tne;  | 
|---|
 | 1400 | +		rdp->tick_nohz_enabled_snap = tne;  | 
|---|
| 1592 | 1401 |  		return; | 
|---|
| 1593 | 1402 |  	} | 
|---|
| 1594 | 1403 |  	if (!tne) | 
|---|
| 1595 | 1404 |  		return; | 
|---|
| 1596 | 1405 |   | 
|---|
| 1597 | 1406 |  	/* | 
|---|
| 1598 |  | -	 * If a non-lazy callback arrived at a CPU having only lazy  | 
|---|
| 1599 |  | -	 * callbacks, invoke RCU core for the side-effect of recalculating  | 
|---|
| 1600 |  | -	 * idle duration on re-entry to idle.  | 
|---|
| 1601 |  | -	 */  | 
|---|
| 1602 |  | -	if (rdtp->all_lazy &&  | 
|---|
| 1603 |  | -	    rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {  | 
|---|
| 1604 |  | -		rdtp->all_lazy = false;  | 
|---|
| 1605 |  | -		rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;  | 
|---|
| 1606 |  | -		invoke_rcu_core();  | 
|---|
| 1607 |  | -		return;  | 
|---|
| 1608 |  | -	}  | 
|---|
| 1609 |  | -  | 
|---|
| 1610 |  | -	/*  | 
|---|
| 1611 | 1407 |  	 * If we have not yet accelerated this jiffy, accelerate all | 
|---|
| 1612 | 1408 |  	 * callbacks on this CPU. | 
|---|
| 1613 | 1409 |  	 */ | 
|---|
| 1614 |  | -	if (rdtp->last_accelerate == jiffies)  | 
|---|
 | 1410 | +	if (rdp->last_accelerate == jiffies)  | 
|---|
| 1615 | 1411 |  		return; | 
|---|
| 1616 |  | -	rdtp->last_accelerate = jiffies;  | 
|---|
| 1617 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 1618 |  | -		rdp = this_cpu_ptr(rsp->rda);  | 
|---|
| 1619 |  | -		if (!rcu_segcblist_pend_cbs(&rdp->cblist))  | 
|---|
| 1620 |  | -			continue;  | 
|---|
 | 1412 | +	rdp->last_accelerate = jiffies;  | 
|---|
 | 1413 | +	if (rcu_segcblist_pend_cbs(&rdp->cblist)) {  | 
|---|
| 1621 | 1414 |  		rnp = rdp->mynode; | 
|---|
| 1622 | 1415 |  		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ | 
|---|
| 1623 |  | -		needwake = rcu_accelerate_cbs(rsp, rnp, rdp);  | 
|---|
 | 1416 | +		needwake = rcu_accelerate_cbs(rnp, rdp);  | 
|---|
| 1624 | 1417 |  		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ | 
|---|
| 1625 | 1418 |  		if (needwake) | 
|---|
| 1626 |  | -			rcu_gp_kthread_wake(rsp);  | 
|---|
 | 1419 | +			rcu_gp_kthread_wake();  | 
|---|
| 1627 | 1420 |  	} | 
|---|
| 1628 | 1421 |  } | 
|---|
| 1629 | 1422 |   | 
|---|
| .. | .. | 
|---|
| 1634 | 1427 |   */ | 
|---|
| 1635 | 1428 |  static void rcu_cleanup_after_idle(void) | 
|---|
| 1636 | 1429 |  { | 
|---|
 | 1430 | +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);  | 
|---|
 | 1431 | +  | 
|---|
| 1637 | 1432 |  	lockdep_assert_irqs_disabled(); | 
|---|
| 1638 |  | -	if (rcu_is_nocb_cpu(smp_processor_id()))  | 
|---|
 | 1433 | +	if (rcu_segcblist_is_offloaded(&rdp->cblist))  | 
|---|
| 1639 | 1434 |  		return; | 
|---|
| 1640 | 1435 |  	if (rcu_try_advance_all_cbs()) | 
|---|
| 1641 | 1436 |  		invoke_rcu_core(); | 
|---|
| 1642 | 1437 |  } | 
|---|
| 1643 | 1438 |   | 
|---|
| 1644 |  | -/*  | 
|---|
| 1645 |  | - * Keep a running count of the number of non-lazy callbacks posted  | 
|---|
| 1646 |  | - * on this CPU.  This running counter (which is never decremented) allows  | 
|---|
| 1647 |  | - * rcu_prepare_for_idle() to detect when something out of the idle loop  | 
|---|
| 1648 |  | - * posts a callback, even if an equal number of callbacks are invoked.  | 
|---|
| 1649 |  | - * Of course, callbacks should only be posted from within a trace event  | 
|---|
| 1650 |  | - * designed to be called from idle or from within RCU_NONIDLE().  | 
|---|
| 1651 |  | - */  | 
|---|
| 1652 |  | -static void rcu_idle_count_callbacks_posted(void)  | 
|---|
| 1653 |  | -{  | 
|---|
| 1654 |  | -	__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);  | 
|---|
| 1655 |  | -}  | 
|---|
| 1656 |  | -  | 
|---|
| 1657 |  | -/*  | 
|---|
| 1658 |  | - * Data for flushing lazy RCU callbacks at OOM time.  | 
|---|
| 1659 |  | - */  | 
|---|
| 1660 |  | -static atomic_t oom_callback_count;  | 
|---|
| 1661 |  | -static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);  | 
|---|
| 1662 |  | -  | 
|---|
| 1663 |  | -/*  | 
|---|
| 1664 |  | - * RCU OOM callback -- decrement the outstanding count and deliver the  | 
|---|
| 1665 |  | - * wake-up if we are the last one.  | 
|---|
| 1666 |  | - */  | 
|---|
| 1667 |  | -static void rcu_oom_callback(struct rcu_head *rhp)  | 
|---|
| 1668 |  | -{  | 
|---|
| 1669 |  | -	if (atomic_dec_and_test(&oom_callback_count))  | 
|---|
| 1670 |  | -		wake_up(&oom_callback_wq);  | 
|---|
| 1671 |  | -}  | 
|---|
| 1672 |  | -  | 
|---|
| 1673 |  | -/*  | 
|---|
| 1674 |  | - * Post an rcu_oom_notify callback on the current CPU if it has at  | 
|---|
| 1675 |  | - * least one lazy callback.  This will unnecessarily post callbacks  | 
|---|
| 1676 |  | - * to CPUs that already have a non-lazy callback at the end of their  | 
|---|
| 1677 |  | - * callback list, but this is an infrequent operation, so accept some  | 
|---|
| 1678 |  | - * extra overhead to keep things simple.  | 
|---|
| 1679 |  | - */  | 
|---|
| 1680 |  | -static void rcu_oom_notify_cpu(void *unused)  | 
|---|
| 1681 |  | -{  | 
|---|
| 1682 |  | -	struct rcu_state *rsp;  | 
|---|
| 1683 |  | -	struct rcu_data *rdp;  | 
|---|
| 1684 |  | -  | 
|---|
| 1685 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 1686 |  | -		rdp = raw_cpu_ptr(rsp->rda);  | 
|---|
| 1687 |  | -		if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {  | 
|---|
| 1688 |  | -			atomic_inc(&oom_callback_count);  | 
|---|
| 1689 |  | -			rsp->call(&rdp->oom_head, rcu_oom_callback);  | 
|---|
| 1690 |  | -		}  | 
|---|
| 1691 |  | -	}  | 
|---|
| 1692 |  | -}  | 
|---|
| 1693 |  | -  | 
|---|
| 1694 |  | -/*  | 
|---|
| 1695 |  | - * If low on memory, ensure that each CPU has a non-lazy callback.  | 
|---|
| 1696 |  | - * This will wake up CPUs that have only lazy callbacks, in turn  | 
|---|
| 1697 |  | - * ensuring that they free up the corresponding memory in a timely manner.  | 
|---|
| 1698 |  | - * Because an uncertain amount of memory will be freed in some uncertain  | 
|---|
| 1699 |  | - * timeframe, we do not claim to have freed anything.  | 
|---|
| 1700 |  | - */  | 
|---|
| 1701 |  | -static int rcu_oom_notify(struct notifier_block *self,  | 
|---|
| 1702 |  | -			  unsigned long notused, void *nfreed)  | 
|---|
| 1703 |  | -{  | 
|---|
| 1704 |  | -	int cpu;  | 
|---|
| 1705 |  | -  | 
|---|
| 1706 |  | -	/* Wait for callbacks from earlier instance to complete. */  | 
|---|
| 1707 |  | -	wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);  | 
|---|
| 1708 |  | -	smp_mb(); /* Ensure callback reuse happens after callback invocation. */  | 
|---|
| 1709 |  | -  | 
|---|
| 1710 |  | -	/*  | 
|---|
| 1711 |  | -	 * Prevent premature wakeup: ensure that all increments happen  | 
|---|
| 1712 |  | -	 * before there is a chance of the counter reaching zero.  | 
|---|
| 1713 |  | -	 */  | 
|---|
| 1714 |  | -	atomic_set(&oom_callback_count, 1);  | 
|---|
| 1715 |  | -  | 
|---|
| 1716 |  | -	for_each_online_cpu(cpu) {  | 
|---|
| 1717 |  | -		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);  | 
|---|
| 1718 |  | -		cond_resched_tasks_rcu_qs();  | 
|---|
| 1719 |  | -	}  | 
|---|
| 1720 |  | -  | 
|---|
| 1721 |  | -	/* Unconditionally decrement: no need to wake ourselves up. */  | 
|---|
| 1722 |  | -	atomic_dec(&oom_callback_count);  | 
|---|
| 1723 |  | -  | 
|---|
| 1724 |  | -	return NOTIFY_OK;  | 
|---|
| 1725 |  | -}  | 
|---|
| 1726 |  | -  | 
|---|
| 1727 |  | -static struct notifier_block rcu_oom_nb = {  | 
|---|
| 1728 |  | -	.notifier_call = rcu_oom_notify  | 
|---|
| 1729 |  | -};  | 
|---|
| 1730 |  | -  | 
|---|
| 1731 |  | -static int __init rcu_register_oom_notifier(void)  | 
|---|
| 1732 |  | -{  | 
|---|
| 1733 |  | -	register_oom_notifier(&rcu_oom_nb);  | 
|---|
| 1734 |  | -	return 0;  | 
|---|
| 1735 |  | -}  | 
|---|
| 1736 |  | -early_initcall(rcu_register_oom_notifier);  | 
|---|
| 1737 |  | -  | 
|---|
| 1738 | 1439 |  #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 
|---|
| 1739 |  | -  | 
|---|
| 1740 |  | -#ifdef CONFIG_RCU_FAST_NO_HZ  | 
|---|
| 1741 |  | -  | 
|---|
| 1742 |  | -static void print_cpu_stall_fast_no_hz(char *cp, int cpu)  | 
|---|
| 1743 |  | -{  | 
|---|
| 1744 |  | -	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);  | 
|---|
| 1745 |  | -	unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;  | 
|---|
| 1746 |  | -  | 
|---|
| 1747 |  | -	sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",  | 
|---|
| 1748 |  | -		rdtp->last_accelerate & 0xffff, jiffies & 0xffff,  | 
|---|
| 1749 |  | -		ulong2long(nlpd),  | 
|---|
| 1750 |  | -		rdtp->all_lazy ? 'L' : '.',  | 
|---|
| 1751 |  | -		rdtp->tick_nohz_enabled_snap ? '.' : 'D');  | 
|---|
| 1752 |  | -}  | 
|---|
| 1753 |  | -  | 
|---|
| 1754 |  | -#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */  | 
|---|
| 1755 |  | -  | 
|---|
| 1756 |  | -static void print_cpu_stall_fast_no_hz(char *cp, int cpu)  | 
|---|
| 1757 |  | -{  | 
|---|
| 1758 |  | -	*cp = '\0';  | 
|---|
| 1759 |  | -}  | 
|---|
| 1760 |  | -  | 
|---|
| 1761 |  | -#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */  | 
|---|
| 1762 |  | -  | 
|---|
| 1763 |  | -/* Initiate the stall-info list. */  | 
|---|
| 1764 |  | -static void print_cpu_stall_info_begin(void)  | 
|---|
| 1765 |  | -{  | 
|---|
| 1766 |  | -	pr_cont("\n");  | 
|---|
| 1767 |  | -}  | 
|---|
| 1768 |  | -  | 
|---|
| 1769 |  | -/*  | 
|---|
| 1770 |  | - * Print out diagnostic information for the specified stalled CPU.  | 
|---|
| 1771 |  | - *  | 
|---|
| 1772 |  | - * If the specified CPU is aware of the current RCU grace period  | 
|---|
| 1773 |  | - * (flavor specified by rsp), then print the number of scheduling  | 
|---|
| 1774 |  | - * clock interrupts the CPU has taken during the time that it has  | 
|---|
| 1775 |  | - * been aware.  Otherwise, print the number of RCU grace periods  | 
|---|
| 1776 |  | - * that this CPU is ignorant of, for example, "1" if the CPU was  | 
|---|
| 1777 |  | - * aware of the previous grace period.  | 
|---|
| 1778 |  | - *  | 
|---|
| 1779 |  | - * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.  | 
|---|
| 1780 |  | - */  | 
|---|
| 1781 |  | -static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)  | 
|---|
| 1782 |  | -{  | 
|---|
| 1783 |  | -	unsigned long delta;  | 
|---|
| 1784 |  | -	char fast_no_hz[72];  | 
|---|
| 1785 |  | -	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 1786 |  | -	struct rcu_dynticks *rdtp = rdp->dynticks;  | 
|---|
| 1787 |  | -	char *ticks_title;  | 
|---|
| 1788 |  | -	unsigned long ticks_value;  | 
|---|
| 1789 |  | -  | 
|---|
| 1790 |  | -	/*  | 
|---|
| 1791 |  | -	 * We could be printing a lot while holding a spinlock.  Avoid  | 
|---|
| 1792 |  | -	 * triggering hard lockup.  | 
|---|
| 1793 |  | -	 */  | 
|---|
| 1794 |  | -	touch_nmi_watchdog();  | 
|---|
| 1795 |  | -  | 
|---|
| 1796 |  | -	ticks_value = rcu_seq_ctr(rsp->gp_seq - rdp->gp_seq);  | 
|---|
| 1797 |  | -	if (ticks_value) {  | 
|---|
| 1798 |  | -		ticks_title = "GPs behind";  | 
|---|
| 1799 |  | -	} else {  | 
|---|
| 1800 |  | -		ticks_title = "ticks this GP";  | 
|---|
| 1801 |  | -		ticks_value = rdp->ticks_this_gp;  | 
|---|
| 1802 |  | -	}  | 
|---|
| 1803 |  | -	print_cpu_stall_fast_no_hz(fast_no_hz, cpu);  | 
|---|
| 1804 |  | -	delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);  | 
|---|
| 1805 |  | -	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",  | 
|---|
| 1806 |  | -	       cpu,  | 
|---|
| 1807 |  | -	       "O."[!!cpu_online(cpu)],  | 
|---|
| 1808 |  | -	       "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],  | 
|---|
| 1809 |  | -	       "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],  | 
|---|
| 1810 |  | -	       !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :  | 
|---|
| 1811 |  | -			rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :  | 
|---|
| 1812 |  | -				"!."[!delta],  | 
|---|
| 1813 |  | -	       ticks_value, ticks_title,  | 
|---|
| 1814 |  | -	       rcu_dynticks_snap(rdtp) & 0xfff,  | 
|---|
| 1815 |  | -	       rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,  | 
|---|
| 1816 |  | -	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),  | 
|---|
| 1817 |  | -	       READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,  | 
|---|
| 1818 |  | -	       fast_no_hz);  | 
|---|
| 1819 |  | -}  | 
|---|
| 1820 |  | -  | 
|---|
| 1821 |  | -/* Terminate the stall-info list. */  | 
|---|
| 1822 |  | -static void print_cpu_stall_info_end(void)  | 
|---|
| 1823 |  | -{  | 
|---|
| 1824 |  | -	pr_err("\t");  | 
|---|
| 1825 |  | -}  | 
|---|
| 1826 |  | -  | 
|---|
| 1827 |  | -/* Zero ->ticks_this_gp for all flavors of RCU. */  | 
|---|
| 1828 |  | -static void zero_cpu_stall_ticks(struct rcu_data *rdp)  | 
|---|
| 1829 |  | -{  | 
|---|
| 1830 |  | -	rdp->ticks_this_gp = 0;  | 
|---|
| 1831 |  | -	rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());  | 
|---|
| 1832 |  | -}  | 
|---|
| 1833 |  | -  | 
|---|
| 1834 |  | -/* Increment ->ticks_this_gp for all flavors of RCU. */  | 
|---|
| 1835 |  | -static void increment_cpu_stall_ticks(void)  | 
|---|
| 1836 |  | -{  | 
|---|
| 1837 |  | -	struct rcu_state *rsp;  | 
|---|
| 1838 |  | -  | 
|---|
| 1839 |  | -	for_each_rcu_flavor(rsp)  | 
|---|
| 1840 |  | -		raw_cpu_inc(rsp->rda->ticks_this_gp);  | 
|---|
| 1841 |  | -}  | 
|---|
| 1842 | 1440 |   | 
|---|
| 1843 | 1441 |  #ifdef CONFIG_RCU_NOCB_CPU | 
|---|
| 1844 | 1442 |   | 
|---|
| 1845 | 1443 |  /* | 
|---|
| 1846 | 1444 |   * Offload callback processing from the boot-time-specified set of CPUs | 
|---|
| 1847 |  | - * specified by rcu_nocb_mask.  For each CPU in the set, there is a  | 
|---|
| 1848 |  | - * kthread created that pulls the callbacks from the corresponding CPU,  | 
|---|
| 1849 |  | - * waits for a grace period to elapse, and invokes the callbacks.  | 
|---|
| 1850 |  | - * The no-CBs CPUs do a wake_up() on their kthread when they insert  | 
|---|
| 1851 |  | - * a callback into any empty list, unless the rcu_nocb_poll boot parameter  | 
|---|
| 1852 |  | - * has been specified, in which case each kthread actively polls its  | 
|---|
| 1853 |  | - * CPU.  (Which isn't so great for energy efficiency, but which does  | 
|---|
| 1854 |  | - * reduce RCU's overhead on that CPU.)  | 
|---|
 | 1445 | + * specified by rcu_nocb_mask.  For the CPUs in the set, there are kthreads  | 
|---|
 | 1446 | + * created that pull the callbacks from the corresponding CPU, wait for  | 
|---|
 | 1447 | + * a grace period to elapse, and invoke the callbacks.  These kthreads  | 
|---|
 | 1448 | + * are organized into GP kthreads, which manage incoming callbacks, wait for  | 
|---|
 | 1449 | + * grace periods, and awaken CB kthreads, and the CB kthreads, which only  | 
|---|
 | 1450 | + * invoke callbacks.  Each GP kthread invokes its own CBs.  The no-CBs CPUs  | 
|---|
 | 1451 | + * do a wake_up() on their GP kthread when they insert a callback into any  | 
|---|
 | 1452 | + * empty list, unless the rcu_nocb_poll boot parameter has been specified,  | 
|---|
 | 1453 | + * in which case each kthread actively polls its CPU.  (Which isn't so great  | 
|---|
 | 1454 | + * for energy efficiency, but which does reduce RCU's overhead on that CPU.)  | 
|---|
| 1855 | 1455 |   * | 
|---|
| 1856 | 1456 |   * This is intended to be used in conjunction with Frederic Weisbecker's | 
|---|
| 1857 | 1457 |   * adaptive-idle work, which would seriously reduce OS jitter on CPUs | 
|---|
| 1858 | 1458 |   * running CPU-bound user-mode computations. | 
|---|
| 1859 | 1459 |   * | 
|---|
| 1860 |  | - * Offloading of callback processing could also in theory be used as  | 
|---|
| 1861 |  | - * an energy-efficiency measure because CPUs with no RCU callbacks  | 
|---|
| 1862 |  | - * queued are more aggressive about entering dyntick-idle mode.  | 
|---|
 | 1460 | + * Offloading of callbacks can also be used as an energy-efficiency  | 
|---|
 | 1461 | + * measure because CPUs with no RCU callbacks queued are more aggressive  | 
|---|
 | 1462 | + * about entering dyntick-idle mode.  | 
|---|
| 1863 | 1463 |   */ | 
|---|
| 1864 | 1464 |   | 
|---|
| 1865 | 1465 |   | 
|---|
| 1866 |  | -/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */  | 
|---|
 | 1466 | +/*  | 
|---|
 | 1467 | + * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.  | 
|---|
 | 1468 | + * The string after the "rcu_nocbs=" is either "all" for all CPUs, or a  | 
|---|
 | 1469 | + * comma-separated list of CPUs and/or CPU ranges.  If an invalid list is  | 
|---|
 | 1470 | + * given, a warning is emitted and all CPUs are offloaded.  | 
|---|
 | 1471 | + */  | 
|---|
| 1867 | 1472 |  static int __init rcu_nocb_setup(char *str) | 
|---|
| 1868 | 1473 |  { | 
|---|
| 1869 | 1474 |  	alloc_bootmem_cpumask_var(&rcu_nocb_mask); | 
|---|
| 1870 |  | -	cpulist_parse(str, rcu_nocb_mask);  | 
|---|
 | 1475 | +	if (!strcasecmp(str, "all"))  | 
|---|
 | 1476 | +		cpumask_setall(rcu_nocb_mask);  | 
|---|
 | 1477 | +	else  | 
|---|
 | 1478 | +		if (cpulist_parse(str, rcu_nocb_mask)) {  | 
|---|
 | 1479 | +			pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");  | 
|---|
 | 1480 | +			cpumask_setall(rcu_nocb_mask);  | 
|---|
 | 1481 | +		}  | 
|---|
| 1871 | 1482 |  	return 1; | 
|---|
| 1872 | 1483 |  } | 
|---|
| 1873 | 1484 |  __setup("rcu_nocbs=", rcu_nocb_setup); | 
|---|
| .. | .. | 
|---|
| 1878 | 1489 |  	return 0; | 
|---|
| 1879 | 1490 |  } | 
|---|
| 1880 | 1491 |  early_param("rcu_nocb_poll", parse_rcu_nocb_poll); | 
|---|
 | 1492 | +  | 
|---|
 | 1493 | +/*  | 
|---|
 | 1494 | + * Don't bother bypassing ->cblist if the call_rcu() rate is low.  | 
|---|
 | 1495 | + * After all, the main point of bypassing is to avoid lock contention  | 
|---|
 | 1496 | + * on ->nocb_lock, which only can happen at high call_rcu() rates.  | 
|---|
 | 1497 | + */  | 
|---|
 | 1498 | +int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;  | 
|---|
 | 1499 | +module_param(nocb_nobypass_lim_per_jiffy, int, 0);  | 
|---|
 | 1500 | +  | 
|---|
 | 1501 | +/*  | 
|---|
 | 1502 | + * Acquire the specified rcu_data structure's ->nocb_bypass_lock.  If the  | 
|---|
 | 1503 | + * lock isn't immediately available, increment ->nocb_lock_contended to  | 
|---|
 | 1504 | + * flag the contention.  | 
|---|
 | 1505 | + */  | 
|---|
 | 1506 | +static void rcu_nocb_bypass_lock(struct rcu_data *rdp)  | 
|---|
 | 1507 | +	__acquires(&rdp->nocb_bypass_lock)  | 
|---|
 | 1508 | +{  | 
|---|
 | 1509 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 1510 | +	if (raw_spin_trylock(&rdp->nocb_bypass_lock))  | 
|---|
 | 1511 | +		return;  | 
|---|
 | 1512 | +	atomic_inc(&rdp->nocb_lock_contended);  | 
|---|
 | 1513 | +	WARN_ON_ONCE(smp_processor_id() != rdp->cpu);  | 
|---|
 | 1514 | +	smp_mb__after_atomic(); /* atomic_inc() before lock. */  | 
|---|
 | 1515 | +	raw_spin_lock(&rdp->nocb_bypass_lock);  | 
|---|
 | 1516 | +	smp_mb__before_atomic(); /* atomic_dec() after lock. */  | 
|---|
 | 1517 | +	atomic_dec(&rdp->nocb_lock_contended);  | 
|---|
 | 1518 | +}  | 
|---|
 | 1519 | +  | 
|---|
 | 1520 | +/*  | 
|---|
 | 1521 | + * Spinwait until the specified rcu_data structure's ->nocb_lock is  | 
|---|
 | 1522 | + * not contended.  Please note that this is extremely special-purpose,  | 
|---|
 | 1523 | + * relying on the fact that at most two kthreads and one CPU contend for  | 
|---|
 | 1524 | + * this lock, and also that the two kthreads are guaranteed to have frequent  | 
|---|
 | 1525 | + * grace-period-duration time intervals between successive acquisitions  | 
|---|
 | 1526 | + * of the lock.  This allows us to use an extremely simple throttling  | 
|---|
 | 1527 | + * mechanism, and further to apply it only to the CPU doing floods of  | 
|---|
 | 1528 | + * call_rcu() invocations.  Don't try this at home!  | 
|---|
 | 1529 | + */  | 
|---|
 | 1530 | +static void rcu_nocb_wait_contended(struct rcu_data *rdp)  | 
|---|
 | 1531 | +{  | 
|---|
 | 1532 | +	WARN_ON_ONCE(smp_processor_id() != rdp->cpu);  | 
|---|
 | 1533 | +	while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))  | 
|---|
 | 1534 | +		cpu_relax();  | 
|---|
 | 1535 | +}  | 
|---|
 | 1536 | +  | 
|---|
 | 1537 | +/*  | 
|---|
 | 1538 | + * Conditionally acquire the specified rcu_data structure's  | 
|---|
 | 1539 | + * ->nocb_bypass_lock.  | 
|---|
 | 1540 | + */  | 
|---|
 | 1541 | +static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)  | 
|---|
 | 1542 | +{  | 
|---|
 | 1543 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 1544 | +	return raw_spin_trylock(&rdp->nocb_bypass_lock);  | 
|---|
 | 1545 | +}  | 
|---|
 | 1546 | +  | 
|---|
 | 1547 | +/*  | 
|---|
 | 1548 | + * Release the specified rcu_data structure's ->nocb_bypass_lock.  | 
|---|
 | 1549 | + */  | 
|---|
 | 1550 | +static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)  | 
|---|
 | 1551 | +	__releases(&rdp->nocb_bypass_lock)  | 
|---|
 | 1552 | +{  | 
|---|
 | 1553 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 1554 | +	raw_spin_unlock(&rdp->nocb_bypass_lock);  | 
|---|
 | 1555 | +}  | 
|---|
 | 1556 | +  | 
|---|
 | 1557 | +/*  | 
|---|
 | 1558 | + * Acquire the specified rcu_data structure's ->nocb_lock, but only  | 
|---|
 | 1559 | + * if it corresponds to a no-CBs CPU.  | 
|---|
 | 1560 | + */  | 
|---|
 | 1561 | +static void rcu_nocb_lock(struct rcu_data *rdp)  | 
|---|
 | 1562 | +{  | 
|---|
 | 1563 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 1564 | +	if (!rcu_segcblist_is_offloaded(&rdp->cblist))  | 
|---|
 | 1565 | +		return;  | 
|---|
 | 1566 | +	raw_spin_lock(&rdp->nocb_lock);  | 
|---|
 | 1567 | +}  | 
|---|
 | 1568 | +  | 
|---|
 | 1569 | +/*  | 
|---|
 | 1570 | + * Release the specified rcu_data structure's ->nocb_lock, but only  | 
|---|
 | 1571 | + * if it corresponds to a no-CBs CPU.  | 
|---|
 | 1572 | + */  | 
|---|
 | 1573 | +static void rcu_nocb_unlock(struct rcu_data *rdp)  | 
|---|
 | 1574 | +{  | 
|---|
 | 1575 | +	if (rcu_segcblist_is_offloaded(&rdp->cblist)) {  | 
|---|
 | 1576 | +		lockdep_assert_irqs_disabled();  | 
|---|
 | 1577 | +		raw_spin_unlock(&rdp->nocb_lock);  | 
|---|
 | 1578 | +	}  | 
|---|
 | 1579 | +}  | 
|---|
 | 1580 | +  | 
|---|
 | 1581 | +/*  | 
|---|
 | 1582 | + * Release the specified rcu_data structure's ->nocb_lock and restore  | 
|---|
 | 1583 | + * interrupts, but only if it corresponds to a no-CBs CPU.  | 
|---|
 | 1584 | + */  | 
|---|
 | 1585 | +static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,  | 
|---|
 | 1586 | +				       unsigned long flags)  | 
|---|
 | 1587 | +{  | 
|---|
 | 1588 | +	if (rcu_segcblist_is_offloaded(&rdp->cblist)) {  | 
|---|
 | 1589 | +		lockdep_assert_irqs_disabled();  | 
|---|
 | 1590 | +		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
 | 1591 | +	} else {  | 
|---|
 | 1592 | +		local_irq_restore(flags);  | 
|---|
 | 1593 | +	}  | 
|---|
 | 1594 | +}  | 
|---|
 | 1595 | +  | 
|---|
 | 1596 | +/* Lockdep check that ->cblist may be safely accessed. */  | 
|---|
 | 1597 | +static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)  | 
|---|
 | 1598 | +{  | 
|---|
 | 1599 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 1600 | +	if (rcu_segcblist_is_offloaded(&rdp->cblist))  | 
|---|
 | 1601 | +		lockdep_assert_held(&rdp->nocb_lock);  | 
|---|
 | 1602 | +}  | 
|---|
| 1881 | 1603 |   | 
|---|
| 1882 | 1604 |  /* | 
|---|
| 1883 | 1605 |   * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended | 
|---|
| .. | .. | 
|---|
| 1908 | 1630 |  } | 
|---|
| 1909 | 1631 |   | 
|---|
| 1910 | 1632 |  /* | 
|---|
| 1911 |  | - * Kick the leader kthread for this NOCB group.  Caller holds ->nocb_lock  | 
|---|
 | 1633 | + * Kick the GP kthread for this NOCB group.  Caller holds ->nocb_lock  | 
|---|
| 1912 | 1634 |   * and this function releases it. | 
|---|
| 1913 | 1635 |   */ | 
|---|
| 1914 |  | -static void __wake_nocb_leader(struct rcu_data *rdp, bool force,  | 
|---|
| 1915 |  | -			       unsigned long flags)  | 
|---|
 | 1636 | +static void wake_nocb_gp(struct rcu_data *rdp, bool force,  | 
|---|
 | 1637 | +			   unsigned long flags)  | 
|---|
| 1916 | 1638 |  	__releases(rdp->nocb_lock) | 
|---|
| 1917 | 1639 |  { | 
|---|
| 1918 |  | -	struct rcu_data *rdp_leader = rdp->nocb_leader;  | 
|---|
 | 1640 | +	bool needwake = false;  | 
|---|
 | 1641 | +	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;  | 
|---|
| 1919 | 1642 |   | 
|---|
| 1920 | 1643 |  	lockdep_assert_held(&rdp->nocb_lock); | 
|---|
| 1921 |  | -	if (!READ_ONCE(rdp_leader->nocb_kthread)) {  | 
|---|
| 1922 |  | -		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
 | 1644 | +	if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {  | 
|---|
 | 1645 | +		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
 | 1646 | +				    TPS("AlreadyAwake"));  | 
|---|
 | 1647 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 1923 | 1648 |  		return; | 
|---|
| 1924 | 1649 |  	} | 
|---|
| 1925 |  | -	if (rdp_leader->nocb_leader_sleep || force) {  | 
|---|
| 1926 |  | -		/* Prior smp_mb__after_atomic() orders against prior enqueue. */  | 
|---|
| 1927 |  | -		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);  | 
|---|
 | 1650 | +  | 
|---|
 | 1651 | +	if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) {  | 
|---|
 | 1652 | +		WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);  | 
|---|
| 1928 | 1653 |  		del_timer(&rdp->nocb_timer); | 
|---|
| 1929 |  | -		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
| 1930 |  | -		smp_mb(); /* ->nocb_leader_sleep before swake_up_one(). */  | 
|---|
| 1931 |  | -		swake_up_one(&rdp_leader->nocb_wq);  | 
|---|
| 1932 |  | -	} else {  | 
|---|
| 1933 |  | -		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
| 1934 | 1654 |  	} | 
|---|
 | 1655 | +	rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
 | 1656 | +	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);  | 
|---|
 | 1657 | +	if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {  | 
|---|
 | 1658 | +		WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);  | 
|---|
 | 1659 | +		needwake = true;  | 
|---|
 | 1660 | +		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));  | 
|---|
 | 1661 | +	}  | 
|---|
 | 1662 | +	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);  | 
|---|
 | 1663 | +	if (needwake)  | 
|---|
 | 1664 | +		wake_up_process(rdp_gp->nocb_gp_kthread);  | 
|---|
| 1935 | 1665 |  } | 
|---|
| 1936 | 1666 |   | 
|---|
| 1937 | 1667 |  /* | 
|---|
| 1938 |  | - * Kick the leader kthread for this NOCB group, but caller has not  | 
|---|
| 1939 |  | - * acquired locks.  | 
|---|
 | 1668 | + * Arrange to wake the GP kthread for this NOCB group at some future  | 
|---|
 | 1669 | + * time when it is safe to do so.  | 
|---|
| 1940 | 1670 |   */ | 
|---|
| 1941 |  | -static void wake_nocb_leader(struct rcu_data *rdp, bool force)  | 
|---|
 | 1671 | +static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,  | 
|---|
 | 1672 | +			       const char *reason)  | 
|---|
| 1942 | 1673 |  { | 
|---|
| 1943 |  | -	unsigned long flags;  | 
|---|
| 1944 |  | -  | 
|---|
| 1945 |  | -	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);  | 
|---|
| 1946 |  | -	__wake_nocb_leader(rdp, force, flags);  | 
|---|
| 1947 |  | -}  | 
|---|
| 1948 |  | -  | 
|---|
| 1949 |  | -/*  | 
|---|
| 1950 |  | - * Arrange to wake the leader kthread for this NOCB group at some  | 
|---|
| 1951 |  | - * future time when it is safe to do so.  | 
|---|
| 1952 |  | - */  | 
|---|
| 1953 |  | -static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,  | 
|---|
| 1954 |  | -				   const char *reason)  | 
|---|
| 1955 |  | -{  | 
|---|
| 1956 |  | -	unsigned long flags;  | 
|---|
| 1957 |  | -  | 
|---|
| 1958 |  | -	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);  | 
|---|
| 1959 | 1674 |  	if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) | 
|---|
| 1960 | 1675 |  		mod_timer(&rdp->nocb_timer, jiffies + 1); | 
|---|
| 1961 |  | -	WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);  | 
|---|
| 1962 |  | -	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, reason);  | 
|---|
| 1963 |  | -	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
 | 1676 | +	if (rdp->nocb_defer_wakeup < waketype)  | 
|---|
 | 1677 | +		WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);  | 
|---|
 | 1678 | +	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);  | 
|---|
| 1964 | 1679 |  } | 
|---|
| 1965 | 1680 |   | 
|---|
| 1966 | 1681 |  /* | 
|---|
| 1967 |  | - * Does the specified CPU need an RCU callback for the specified flavor  | 
|---|
| 1968 |  | - * of rcu_barrier()?  | 
|---|
 | 1682 | + * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.  | 
|---|
 | 1683 | + * However, if there is a callback to be enqueued and if ->nocb_bypass  | 
|---|
 | 1684 | + * proves to be initially empty, just return false because the no-CB GP  | 
|---|
 | 1685 | + * kthread may need to be awakened in this case.  | 
|---|
 | 1686 | + *  | 
|---|
 | 1687 | + * Note that this function always returns true if rhp is NULL.  | 
|---|
| 1969 | 1688 |   */ | 
|---|
| 1970 |  | -static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)  | 
|---|
 | 1689 | +static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,  | 
|---|
 | 1690 | +				     unsigned long j)  | 
|---|
| 1971 | 1691 |  { | 
|---|
| 1972 |  | -	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
| 1973 |  | -	unsigned long ret;  | 
|---|
| 1974 |  | -#ifdef CONFIG_PROVE_RCU  | 
|---|
| 1975 |  | -	struct rcu_head *rhp;  | 
|---|
| 1976 |  | -#endif /* #ifdef CONFIG_PROVE_RCU */  | 
|---|
 | 1692 | +	struct rcu_cblist rcl;  | 
|---|
| 1977 | 1693 |   | 
|---|
| 1978 |  | -	/*  | 
|---|
| 1979 |  | -	 * Check count of all no-CBs callbacks awaiting invocation.  | 
|---|
| 1980 |  | -	 * There needs to be a barrier before this function is called,  | 
|---|
| 1981 |  | -	 * but associated with a prior determination that no more  | 
|---|
| 1982 |  | -	 * callbacks would be posted.  In the worst case, the first  | 
|---|
| 1983 |  | -	 * barrier in _rcu_barrier() suffices (but the caller cannot  | 
|---|
| 1984 |  | -	 * necessarily rely on this, not a substitute for the caller  | 
|---|
| 1985 |  | -	 * getting the concurrency design right!).  There must also be  | 
|---|
| 1986 |  | -	 * a barrier between the following load an posting of a callback  | 
|---|
| 1987 |  | -	 * (if a callback is in fact needed).  This is associated with an  | 
|---|
| 1988 |  | -	 * atomic_inc() in the caller.  | 
|---|
| 1989 |  | -	 */  | 
|---|
| 1990 |  | -	ret = atomic_long_read(&rdp->nocb_q_count);  | 
|---|
| 1991 |  | -  | 
|---|
| 1992 |  | -#ifdef CONFIG_PROVE_RCU  | 
|---|
| 1993 |  | -	rhp = READ_ONCE(rdp->nocb_head);  | 
|---|
| 1994 |  | -	if (!rhp)  | 
|---|
| 1995 |  | -		rhp = READ_ONCE(rdp->nocb_gp_head);  | 
|---|
| 1996 |  | -	if (!rhp)  | 
|---|
| 1997 |  | -		rhp = READ_ONCE(rdp->nocb_follower_head);  | 
|---|
| 1998 |  | -  | 
|---|
| 1999 |  | -	/* Having no rcuo kthread but CBs after scheduler starts is bad! */  | 
|---|
| 2000 |  | -	if (!READ_ONCE(rdp->nocb_kthread) && rhp &&  | 
|---|
| 2001 |  | -	    rcu_scheduler_fully_active) {  | 
|---|
| 2002 |  | -		/* RCU callback enqueued before CPU first came online??? */  | 
|---|
| 2003 |  | -		pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",  | 
|---|
| 2004 |  | -		       cpu, rhp->func);  | 
|---|
| 2005 |  | -		WARN_ON_ONCE(1);  | 
|---|
 | 1694 | +	WARN_ON_ONCE(!rcu_segcblist_is_offloaded(&rdp->cblist));  | 
|---|
 | 1695 | +	rcu_lockdep_assert_cblist_protected(rdp);  | 
|---|
 | 1696 | +	lockdep_assert_held(&rdp->nocb_bypass_lock);  | 
|---|
 | 1697 | +	if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {  | 
|---|
 | 1698 | +		raw_spin_unlock(&rdp->nocb_bypass_lock);  | 
|---|
 | 1699 | +		return false;  | 
|---|
| 2006 | 1700 |  	} | 
|---|
| 2007 |  | -#endif /* #ifdef CONFIG_PROVE_RCU */  | 
|---|
| 2008 |  | -  | 
|---|
| 2009 |  | -	return !!ret;  | 
|---|
 | 1701 | +	/* Note: ->cblist.len already accounts for ->nocb_bypass contents. */  | 
|---|
 | 1702 | +	if (rhp)  | 
|---|
 | 1703 | +		rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */  | 
|---|
 | 1704 | +	rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);  | 
|---|
 | 1705 | +	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);  | 
|---|
 | 1706 | +	WRITE_ONCE(rdp->nocb_bypass_first, j);  | 
|---|
 | 1707 | +	rcu_nocb_bypass_unlock(rdp);  | 
|---|
 | 1708 | +	return true;  | 
|---|
| 2010 | 1709 |  } | 
|---|
| 2011 | 1710 |   | 
|---|
| 2012 | 1711 |  /* | 
|---|
| 2013 |  | - * Enqueue the specified string of rcu_head structures onto the specified  | 
|---|
| 2014 |  | - * CPU's no-CBs lists.  The CPU is specified by rdp, the head of the  | 
|---|
| 2015 |  | - * string by rhp, and the tail of the string by rhtp.  The non-lazy/lazy  | 
|---|
| 2016 |  | - * counts are supplied by rhcount and rhcount_lazy.  | 
|---|
 | 1712 | + * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL.  | 
|---|
 | 1713 | + * However, if there is a callback to be enqueued and if ->nocb_bypass  | 
|---|
 | 1714 | + * proves to be initially empty, just return false because the no-CB GP  | 
|---|
 | 1715 | + * kthread may need to be awakened in this case.  | 
|---|
 | 1716 | + *  | 
|---|
 | 1717 | + * Note that this function always returns true if rhp is NULL.  | 
|---|
 | 1718 | + */  | 
|---|
 | 1719 | +static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,  | 
|---|
 | 1720 | +				  unsigned long j)  | 
|---|
 | 1721 | +{  | 
|---|
 | 1722 | +	if (!rcu_segcblist_is_offloaded(&rdp->cblist))  | 
|---|
 | 1723 | +		return true;  | 
|---|
 | 1724 | +	rcu_lockdep_assert_cblist_protected(rdp);  | 
|---|
 | 1725 | +	rcu_nocb_bypass_lock(rdp);  | 
|---|
 | 1726 | +	return rcu_nocb_do_flush_bypass(rdp, rhp, j);  | 
|---|
 | 1727 | +}  | 
|---|
 | 1728 | +  | 
|---|
 | 1729 | +/*  | 
|---|
 | 1730 | + * If the ->nocb_bypass_lock is immediately available, flush the  | 
|---|
 | 1731 | + * ->nocb_bypass queue into ->cblist.  | 
|---|
 | 1732 | + */  | 
|---|
 | 1733 | +static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)  | 
|---|
 | 1734 | +{  | 
|---|
 | 1735 | +	rcu_lockdep_assert_cblist_protected(rdp);  | 
|---|
 | 1736 | +	if (!rcu_segcblist_is_offloaded(&rdp->cblist) ||  | 
|---|
 | 1737 | +	    !rcu_nocb_bypass_trylock(rdp))  | 
|---|
 | 1738 | +		return;  | 
|---|
 | 1739 | +	WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));  | 
|---|
 | 1740 | +}  | 
|---|
 | 1741 | +  | 
|---|
 | 1742 | +/*  | 
|---|
 | 1743 | + * See whether it is appropriate to use the ->nocb_bypass list in order  | 
|---|
 | 1744 | + * to control contention on ->nocb_lock.  A limited number of direct  | 
|---|
 | 1745 | + * enqueues are permitted into ->cblist per jiffy.  If ->nocb_bypass  | 
|---|
 | 1746 | + * is non-empty, further callbacks must be placed into ->nocb_bypass,  | 
|---|
 | 1747 | + * otherwise rcu_barrier() breaks.  Use rcu_nocb_flush_bypass() to switch  | 
|---|
 | 1748 | + * back to direct use of ->cblist.  However, ->nocb_bypass should not be  | 
|---|
 | 1749 | + * used if ->cblist is empty, because otherwise callbacks can be stranded  | 
|---|
 | 1750 | + * on ->nocb_bypass because we cannot count on the current CPU ever again  | 
|---|
 | 1751 | + * invoking call_rcu().  The general rule is that if ->nocb_bypass is  | 
|---|
 | 1752 | + * non-empty, the corresponding no-CBs grace-period kthread must not be  | 
|---|
 | 1753 | + * in an indefinite sleep state.  | 
|---|
 | 1754 | + *  | 
|---|
 | 1755 | + * Finally, it is not permitted to use the bypass during early boot,  | 
|---|
 | 1756 | + * as doing so would confuse the auto-initialization code.  Besides  | 
|---|
 | 1757 | + * which, there is no point in worrying about lock contention while  | 
|---|
 | 1758 | + * there is only one CPU in operation.  | 
|---|
 | 1759 | + */  | 
|---|
 | 1760 | +static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,  | 
|---|
 | 1761 | +				bool *was_alldone, unsigned long flags)  | 
|---|
 | 1762 | +{  | 
|---|
 | 1763 | +	unsigned long c;  | 
|---|
 | 1764 | +	unsigned long cur_gp_seq;  | 
|---|
 | 1765 | +	unsigned long j = jiffies;  | 
|---|
 | 1766 | +	long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);  | 
|---|
 | 1767 | +  | 
|---|
 | 1768 | +	if (!rcu_segcblist_is_offloaded(&rdp->cblist)) {  | 
|---|
 | 1769 | +		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);  | 
|---|
 | 1770 | +		return false; /* Not offloaded, no bypassing. */  | 
|---|
 | 1771 | +	}  | 
|---|
 | 1772 | +	lockdep_assert_irqs_disabled();  | 
|---|
 | 1773 | +  | 
|---|
 | 1774 | +	// Don't use ->nocb_bypass during early boot.  | 
|---|
 | 1775 | +	if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {  | 
|---|
 | 1776 | +		rcu_nocb_lock(rdp);  | 
|---|
 | 1777 | +		WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));  | 
|---|
 | 1778 | +		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);  | 
|---|
 | 1779 | +		return false;  | 
|---|
 | 1780 | +	}  | 
|---|
 | 1781 | +  | 
|---|
 | 1782 | +	// If we have advanced to a new jiffy, reset counts to allow  | 
|---|
 | 1783 | +	// moving back from ->nocb_bypass to ->cblist.  | 
|---|
 | 1784 | +	if (j == rdp->nocb_nobypass_last) {  | 
|---|
 | 1785 | +		c = rdp->nocb_nobypass_count + 1;  | 
|---|
 | 1786 | +	} else {  | 
|---|
 | 1787 | +		WRITE_ONCE(rdp->nocb_nobypass_last, j);  | 
|---|
 | 1788 | +		c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;  | 
|---|
 | 1789 | +		if (ULONG_CMP_LT(rdp->nocb_nobypass_count,  | 
|---|
 | 1790 | +				 nocb_nobypass_lim_per_jiffy))  | 
|---|
 | 1791 | +			c = 0;  | 
|---|
 | 1792 | +		else if (c > nocb_nobypass_lim_per_jiffy)  | 
|---|
 | 1793 | +			c = nocb_nobypass_lim_per_jiffy;  | 
|---|
 | 1794 | +	}  | 
|---|
 | 1795 | +	WRITE_ONCE(rdp->nocb_nobypass_count, c);  | 
|---|
 | 1796 | +  | 
|---|
 | 1797 | +	// If there hasn't yet been all that many ->cblist enqueues  | 
|---|
 | 1798 | +	// this jiffy, tell the caller to enqueue onto ->cblist.  But flush  | 
|---|
 | 1799 | +	// ->nocb_bypass first.  | 
|---|
 | 1800 | +	if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {  | 
|---|
 | 1801 | +		rcu_nocb_lock(rdp);  | 
|---|
 | 1802 | +		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);  | 
|---|
 | 1803 | +		if (*was_alldone)  | 
|---|
 | 1804 | +			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
 | 1805 | +					    TPS("FirstQ"));  | 
|---|
 | 1806 | +		WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));  | 
|---|
 | 1807 | +		WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));  | 
|---|
 | 1808 | +		return false; // Caller must enqueue the callback.  | 
|---|
 | 1809 | +	}  | 
|---|
 | 1810 | +  | 
|---|
 | 1811 | +	// If ->nocb_bypass has been used too long or is too full,  | 
|---|
 | 1812 | +	// flush ->nocb_bypass to ->cblist.  | 
|---|
 | 1813 | +	if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||  | 
|---|
 | 1814 | +	    ncbs >= qhimark) {  | 
|---|
 | 1815 | +		rcu_nocb_lock(rdp);  | 
|---|
 | 1816 | +		if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {  | 
|---|
 | 1817 | +			*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);  | 
|---|
 | 1818 | +			if (*was_alldone)  | 
|---|
 | 1819 | +				trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
 | 1820 | +						    TPS("FirstQ"));  | 
|---|
 | 1821 | +			WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));  | 
|---|
 | 1822 | +			return false; // Caller must enqueue the callback.  | 
|---|
 | 1823 | +		}  | 
|---|
 | 1824 | +		if (j != rdp->nocb_gp_adv_time &&  | 
|---|
 | 1825 | +		    rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&  | 
|---|
 | 1826 | +		    rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {  | 
|---|
 | 1827 | +			rcu_advance_cbs_nowake(rdp->mynode, rdp);  | 
|---|
 | 1828 | +			rdp->nocb_gp_adv_time = j;  | 
|---|
 | 1829 | +		}  | 
|---|
 | 1830 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
 | 1831 | +		return true; // Callback already enqueued.  | 
|---|
 | 1832 | +	}  | 
|---|
 | 1833 | +  | 
|---|
 | 1834 | +	// We need to use the bypass.  | 
|---|
 | 1835 | +	rcu_nocb_wait_contended(rdp);  | 
|---|
 | 1836 | +	rcu_nocb_bypass_lock(rdp);  | 
|---|
 | 1837 | +	ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);  | 
|---|
 | 1838 | +	rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */  | 
|---|
 | 1839 | +	rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);  | 
|---|
 | 1840 | +	if (!ncbs) {  | 
|---|
 | 1841 | +		WRITE_ONCE(rdp->nocb_bypass_first, j);  | 
|---|
 | 1842 | +		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));  | 
|---|
 | 1843 | +	}  | 
|---|
 | 1844 | +	rcu_nocb_bypass_unlock(rdp);  | 
|---|
 | 1845 | +	smp_mb(); /* Order enqueue before wake. */  | 
|---|
 | 1846 | +	if (ncbs) {  | 
|---|
 | 1847 | +		local_irq_restore(flags);  | 
|---|
 | 1848 | +	} else {  | 
|---|
 | 1849 | +		// No-CBs GP kthread might be indefinitely asleep, if so, wake.  | 
|---|
 | 1850 | +		rcu_nocb_lock(rdp); // Rare during call_rcu() flood.  | 
|---|
 | 1851 | +		if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {  | 
|---|
 | 1852 | +			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
 | 1853 | +					    TPS("FirstBQwake"));  | 
|---|
 | 1854 | +			__call_rcu_nocb_wake(rdp, true, flags);  | 
|---|
 | 1855 | +		} else {  | 
|---|
 | 1856 | +			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
 | 1857 | +					    TPS("FirstBQnoWake"));  | 
|---|
 | 1858 | +			rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
 | 1859 | +		}  | 
|---|
 | 1860 | +	}  | 
|---|
 | 1861 | +	return true; // Callback already enqueued.  | 
|---|
 | 1862 | +}  | 
|---|
 | 1863 | +  | 
|---|
 | 1864 | +/*  | 
|---|
 | 1865 | + * Awaken the no-CBs grace-period kthead if needed, either due to it  | 
|---|
 | 1866 | + * legitimately being asleep or due to overload conditions.  | 
|---|
| 2017 | 1867 |   * | 
|---|
| 2018 | 1868 |   * If warranted, also wake up the kthread servicing this CPUs queues. | 
|---|
| 2019 | 1869 |   */ | 
|---|
| 2020 |  | -static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,  | 
|---|
| 2021 |  | -				    struct rcu_head *rhp,  | 
|---|
| 2022 |  | -				    struct rcu_head **rhtp,  | 
|---|
| 2023 |  | -				    int rhcount, int rhcount_lazy,  | 
|---|
| 2024 |  | -				    unsigned long flags)  | 
|---|
 | 1870 | +static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,  | 
|---|
 | 1871 | +				 unsigned long flags)  | 
|---|
 | 1872 | +				 __releases(rdp->nocb_lock)  | 
|---|
| 2025 | 1873 |  { | 
|---|
| 2026 |  | -	int len;  | 
|---|
| 2027 |  | -	struct rcu_head **old_rhpp;  | 
|---|
 | 1874 | +	unsigned long cur_gp_seq;  | 
|---|
 | 1875 | +	unsigned long j;  | 
|---|
 | 1876 | +	long len;  | 
|---|
| 2028 | 1877 |  	struct task_struct *t; | 
|---|
| 2029 | 1878 |   | 
|---|
| 2030 |  | -	/* Enqueue the callback on the nocb list and update counts. */  | 
|---|
| 2031 |  | -	atomic_long_add(rhcount, &rdp->nocb_q_count);  | 
|---|
| 2032 |  | -	/* rcu_barrier() relies on ->nocb_q_count add before xchg. */  | 
|---|
| 2033 |  | -	old_rhpp = xchg(&rdp->nocb_tail, rhtp);  | 
|---|
| 2034 |  | -	WRITE_ONCE(*old_rhpp, rhp);  | 
|---|
| 2035 |  | -	atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);  | 
|---|
| 2036 |  | -	smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */  | 
|---|
| 2037 |  | -  | 
|---|
| 2038 |  | -	/* If we are not being polled and there is a kthread, awaken it ... */  | 
|---|
| 2039 |  | -	t = READ_ONCE(rdp->nocb_kthread);  | 
|---|
 | 1879 | +	// If we are being polled or there is no kthread, just leave.  | 
|---|
 | 1880 | +	t = READ_ONCE(rdp->nocb_gp_kthread);  | 
|---|
| 2040 | 1881 |  	if (rcu_nocb_poll || !t) { | 
|---|
| 2041 |  | -		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,  | 
|---|
 | 1882 | +		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
| 2042 | 1883 |  				    TPS("WakeNotPoll")); | 
|---|
 | 1884 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 2043 | 1885 |  		return; | 
|---|
| 2044 | 1886 |  	} | 
|---|
| 2045 |  | -	len = atomic_long_read(&rdp->nocb_q_count);  | 
|---|
| 2046 |  | -	if (old_rhpp == &rdp->nocb_head) {  | 
|---|
 | 1887 | +	// Need to actually to a wakeup.  | 
|---|
 | 1888 | +	len = rcu_segcblist_n_cbs(&rdp->cblist);  | 
|---|
 | 1889 | +	if (was_alldone) {  | 
|---|
 | 1890 | +		rdp->qlen_last_fqs_check = len;  | 
|---|
| 2047 | 1891 |  		if (!irqs_disabled_flags(flags)) { | 
|---|
| 2048 | 1892 |  			/* ... if queue was empty ... */ | 
|---|
| 2049 |  | -			wake_nocb_leader(rdp, false);  | 
|---|
| 2050 |  | -			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,  | 
|---|
 | 1893 | +			wake_nocb_gp(rdp, false, flags);  | 
|---|
 | 1894 | +			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
| 2051 | 1895 |  					    TPS("WakeEmpty")); | 
|---|
| 2052 | 1896 |  		} else { | 
|---|
| 2053 |  | -			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,  | 
|---|
| 2054 |  | -					       TPS("WakeEmptyIsDeferred"));  | 
|---|
 | 1897 | +			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,  | 
|---|
 | 1898 | +					   TPS("WakeEmptyIsDeferred"));  | 
|---|
 | 1899 | +			rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 2055 | 1900 |  		} | 
|---|
| 2056 |  | -		rdp->qlen_last_fqs_check = 0;  | 
|---|
| 2057 | 1901 |  	} else if (len > rdp->qlen_last_fqs_check + qhimark) { | 
|---|
| 2058 | 1902 |  		/* ... or if many callbacks queued. */ | 
|---|
| 2059 |  | -		if (!irqs_disabled_flags(flags)) {  | 
|---|
| 2060 |  | -			wake_nocb_leader(rdp, true);  | 
|---|
| 2061 |  | -			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,  | 
|---|
| 2062 |  | -					    TPS("WakeOvf"));  | 
|---|
| 2063 |  | -		} else {  | 
|---|
| 2064 |  | -			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,  | 
|---|
| 2065 |  | -					       TPS("WakeOvfIsDeferred"));  | 
|---|
 | 1903 | +		rdp->qlen_last_fqs_check = len;  | 
|---|
 | 1904 | +		j = jiffies;  | 
|---|
 | 1905 | +		if (j != rdp->nocb_gp_adv_time &&  | 
|---|
 | 1906 | +		    rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&  | 
|---|
 | 1907 | +		    rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {  | 
|---|
 | 1908 | +			rcu_advance_cbs_nowake(rdp->mynode, rdp);  | 
|---|
 | 1909 | +			rdp->nocb_gp_adv_time = j;  | 
|---|
| 2066 | 1910 |  		} | 
|---|
| 2067 |  | -		rdp->qlen_last_fqs_check = LONG_MAX / 2;  | 
|---|
 | 1911 | +		smp_mb(); /* Enqueue before timer_pending(). */  | 
|---|
 | 1912 | +		if ((rdp->nocb_cb_sleep ||  | 
|---|
 | 1913 | +		     !rcu_segcblist_ready_cbs(&rdp->cblist)) &&  | 
|---|
 | 1914 | +		    !timer_pending(&rdp->nocb_bypass_timer))  | 
|---|
 | 1915 | +			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,  | 
|---|
 | 1916 | +					   TPS("WakeOvfIsDeferred"));  | 
|---|
 | 1917 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 2068 | 1918 |  	} else { | 
|---|
| 2069 |  | -		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));  | 
|---|
 | 1919 | +		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));  | 
|---|
 | 1920 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 2070 | 1921 |  	} | 
|---|
| 2071 | 1922 |  	return; | 
|---|
| 2072 | 1923 |  } | 
|---|
| 2073 | 1924 |   | 
|---|
| 2074 |  | -/*  | 
|---|
| 2075 |  | - * This is a helper for __call_rcu(), which invokes this when the normal  | 
|---|
| 2076 |  | - * callback queue is inoperable.  If this is not a no-CBs CPU, this  | 
|---|
| 2077 |  | - * function returns failure back to __call_rcu(), which can complain  | 
|---|
| 2078 |  | - * appropriately.  | 
|---|
| 2079 |  | - *  | 
|---|
| 2080 |  | - * Otherwise, this function queues the callback where the corresponding  | 
|---|
| 2081 |  | - * "rcuo" kthread can find it.  | 
|---|
| 2082 |  | - */  | 
|---|
| 2083 |  | -static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,  | 
|---|
| 2084 |  | -			    bool lazy, unsigned long flags)  | 
|---|
 | 1925 | +/* Wake up the no-CBs GP kthread to flush ->nocb_bypass. */  | 
|---|
 | 1926 | +static void do_nocb_bypass_wakeup_timer(struct timer_list *t)  | 
|---|
| 2085 | 1927 |  { | 
|---|
 | 1928 | +	unsigned long flags;  | 
|---|
 | 1929 | +	struct rcu_data *rdp = from_timer(rdp, t, nocb_bypass_timer);  | 
|---|
| 2086 | 1930 |   | 
|---|
| 2087 |  | -	if (!rcu_is_nocb_cpu(rdp->cpu))  | 
|---|
| 2088 |  | -		return false;  | 
|---|
| 2089 |  | -	__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);  | 
|---|
| 2090 |  | -	if (__is_kfree_rcu_offset((unsigned long)rhp->func))  | 
|---|
| 2091 |  | -		trace_rcu_kfree_callback(rdp->rsp->name, rhp,  | 
|---|
| 2092 |  | -					 (unsigned long)rhp->func,  | 
|---|
| 2093 |  | -					 -atomic_long_read(&rdp->nocb_q_count_lazy),  | 
|---|
| 2094 |  | -					 -atomic_long_read(&rdp->nocb_q_count));  | 
|---|
| 2095 |  | -	else  | 
|---|
| 2096 |  | -		trace_rcu_callback(rdp->rsp->name, rhp,  | 
|---|
| 2097 |  | -				   -atomic_long_read(&rdp->nocb_q_count_lazy),  | 
|---|
| 2098 |  | -				   -atomic_long_read(&rdp->nocb_q_count));  | 
|---|
 | 1931 | +	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));  | 
|---|
 | 1932 | +	rcu_nocb_lock_irqsave(rdp, flags);  | 
|---|
 | 1933 | +	smp_mb__after_spinlock(); /* Timer expire before wakeup. */  | 
|---|
 | 1934 | +	__call_rcu_nocb_wake(rdp, true, flags);  | 
|---|
 | 1935 | +}  | 
|---|
 | 1936 | +  | 
|---|
 | 1937 | +/*  | 
|---|
 | 1938 | + * No-CBs GP kthreads come here to wait for additional callbacks to show up  | 
|---|
 | 1939 | + * or for grace periods to end.  | 
|---|
 | 1940 | + */  | 
|---|
 | 1941 | +static void nocb_gp_wait(struct rcu_data *my_rdp)  | 
|---|
 | 1942 | +{  | 
|---|
 | 1943 | +	bool bypass = false;  | 
|---|
 | 1944 | +	long bypass_ncbs;  | 
|---|
 | 1945 | +	int __maybe_unused cpu = my_rdp->cpu;  | 
|---|
 | 1946 | +	unsigned long cur_gp_seq;  | 
|---|
 | 1947 | +	unsigned long flags;  | 
|---|
 | 1948 | +	bool gotcbs = false;  | 
|---|
 | 1949 | +	unsigned long j = jiffies;  | 
|---|
 | 1950 | +	bool needwait_gp = false; // This prevents actual uninitialized use.  | 
|---|
 | 1951 | +	bool needwake;  | 
|---|
 | 1952 | +	bool needwake_gp;  | 
|---|
 | 1953 | +	struct rcu_data *rdp;  | 
|---|
 | 1954 | +	struct rcu_node *rnp;  | 
|---|
 | 1955 | +	unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning.  | 
|---|
 | 1956 | +	bool wasempty = false;  | 
|---|
| 2099 | 1957 |   | 
|---|
| 2100 | 1958 |  	/* | 
|---|
| 2101 |  | -	 * If called from an extended quiescent state with interrupts  | 
|---|
| 2102 |  | -	 * disabled, invoke the RCU core in order to allow the idle-entry  | 
|---|
| 2103 |  | -	 * deferred-wakeup check to function.  | 
|---|
 | 1959 | +	 * Each pass through the following loop checks for CBs and for the  | 
|---|
 | 1960 | +	 * nearest grace period (if any) to wait for next.  The CB kthreads  | 
|---|
 | 1961 | +	 * and the global grace-period kthread are awakened if needed.  | 
|---|
| 2104 | 1962 |  	 */ | 
|---|
| 2105 |  | -	if (irqs_disabled_flags(flags) &&  | 
|---|
| 2106 |  | -	    !rcu_is_watching() &&  | 
|---|
| 2107 |  | -	    cpu_online(smp_processor_id()))  | 
|---|
| 2108 |  | -		invoke_rcu_core();  | 
|---|
 | 1963 | +	WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);  | 
|---|
 | 1964 | +	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {  | 
|---|
 | 1965 | +		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));  | 
|---|
 | 1966 | +		rcu_nocb_lock_irqsave(rdp, flags);  | 
|---|
 | 1967 | +		bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);  | 
|---|
 | 1968 | +		if (bypass_ncbs &&  | 
|---|
 | 1969 | +		    (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||  | 
|---|
 | 1970 | +		     bypass_ncbs > 2 * qhimark)) {  | 
|---|
 | 1971 | +			// Bypass full or old, so flush it.  | 
|---|
 | 1972 | +			(void)rcu_nocb_try_flush_bypass(rdp, j);  | 
|---|
 | 1973 | +			bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);  | 
|---|
 | 1974 | +		} else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {  | 
|---|
 | 1975 | +			rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
 | 1976 | +			continue; /* No callbacks here, try next. */  | 
|---|
 | 1977 | +		}  | 
|---|
 | 1978 | +		if (bypass_ncbs) {  | 
|---|
 | 1979 | +			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
 | 1980 | +					    TPS("Bypass"));  | 
|---|
 | 1981 | +			bypass = true;  | 
|---|
 | 1982 | +		}  | 
|---|
 | 1983 | +		rnp = rdp->mynode;  | 
|---|
 | 1984 | +		if (bypass) {  // Avoid race with first bypass CB.  | 
|---|
 | 1985 | +			WRITE_ONCE(my_rdp->nocb_defer_wakeup,  | 
|---|
 | 1986 | +				   RCU_NOCB_WAKE_NOT);  | 
|---|
 | 1987 | +			del_timer(&my_rdp->nocb_timer);  | 
|---|
 | 1988 | +		}  | 
|---|
 | 1989 | +		// Advance callbacks if helpful and low contention.  | 
|---|
 | 1990 | +		needwake_gp = false;  | 
|---|
 | 1991 | +		if (!rcu_segcblist_restempty(&rdp->cblist,  | 
|---|
 | 1992 | +					     RCU_NEXT_READY_TAIL) ||  | 
|---|
 | 1993 | +		    (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&  | 
|---|
 | 1994 | +		     rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {  | 
|---|
 | 1995 | +			raw_spin_lock_rcu_node(rnp); /* irqs disabled. */  | 
|---|
 | 1996 | +			needwake_gp = rcu_advance_cbs(rnp, rdp);  | 
|---|
 | 1997 | +			wasempty = rcu_segcblist_restempty(&rdp->cblist,  | 
|---|
 | 1998 | +							   RCU_NEXT_READY_TAIL);  | 
|---|
 | 1999 | +			raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */  | 
|---|
 | 2000 | +		}  | 
|---|
 | 2001 | +		// Need to wait on some grace period?  | 
|---|
 | 2002 | +		WARN_ON_ONCE(wasempty &&  | 
|---|
 | 2003 | +			     !rcu_segcblist_restempty(&rdp->cblist,  | 
|---|
 | 2004 | +						      RCU_NEXT_READY_TAIL));  | 
|---|
 | 2005 | +		if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {  | 
|---|
 | 2006 | +			if (!needwait_gp ||  | 
|---|
 | 2007 | +			    ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))  | 
|---|
 | 2008 | +				wait_gp_seq = cur_gp_seq;  | 
|---|
 | 2009 | +			needwait_gp = true;  | 
|---|
 | 2010 | +			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,  | 
|---|
 | 2011 | +					    TPS("NeedWaitGP"));  | 
|---|
 | 2012 | +		}  | 
|---|
 | 2013 | +		if (rcu_segcblist_ready_cbs(&rdp->cblist)) {  | 
|---|
 | 2014 | +			needwake = rdp->nocb_cb_sleep;  | 
|---|
 | 2015 | +			WRITE_ONCE(rdp->nocb_cb_sleep, false);  | 
|---|
 | 2016 | +			smp_mb(); /* CB invocation -after- GP end. */  | 
|---|
 | 2017 | +		} else {  | 
|---|
 | 2018 | +			needwake = false;  | 
|---|
 | 2019 | +		}  | 
|---|
 | 2020 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
 | 2021 | +		if (needwake) {  | 
|---|
 | 2022 | +			swake_up_one(&rdp->nocb_cb_wq);  | 
|---|
 | 2023 | +			gotcbs = true;  | 
|---|
 | 2024 | +		}  | 
|---|
 | 2025 | +		if (needwake_gp)  | 
|---|
 | 2026 | +			rcu_gp_kthread_wake();  | 
|---|
 | 2027 | +	}  | 
|---|
| 2109 | 2028 |   | 
|---|
| 2110 |  | -	return true;  | 
|---|
 | 2029 | +	my_rdp->nocb_gp_bypass = bypass;  | 
|---|
 | 2030 | +	my_rdp->nocb_gp_gp = needwait_gp;  | 
|---|
 | 2031 | +	my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;  | 
|---|
 | 2032 | +	if (bypass && !rcu_nocb_poll) {  | 
|---|
 | 2033 | +		// At least one child with non-empty ->nocb_bypass, so set  | 
|---|
 | 2034 | +		// timer in order to avoid stranding its callbacks.  | 
|---|
 | 2035 | +		raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);  | 
|---|
 | 2036 | +		mod_timer(&my_rdp->nocb_bypass_timer, j + 2);  | 
|---|
 | 2037 | +		raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);  | 
|---|
 | 2038 | +	}  | 
|---|
 | 2039 | +	if (rcu_nocb_poll) {  | 
|---|
 | 2040 | +		/* Polling, so trace if first poll in the series. */  | 
|---|
 | 2041 | +		if (gotcbs)  | 
|---|
 | 2042 | +			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));  | 
|---|
 | 2043 | +		schedule_timeout_idle(1);  | 
|---|
 | 2044 | +	} else if (!needwait_gp) {  | 
|---|
 | 2045 | +		/* Wait for callbacks to appear. */  | 
|---|
 | 2046 | +		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));  | 
|---|
 | 2047 | +		swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,  | 
|---|
 | 2048 | +				!READ_ONCE(my_rdp->nocb_gp_sleep));  | 
|---|
 | 2049 | +		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));  | 
|---|
 | 2050 | +	} else {  | 
|---|
 | 2051 | +		rnp = my_rdp->mynode;  | 
|---|
 | 2052 | +		trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));  | 
|---|
 | 2053 | +		swait_event_interruptible_exclusive(  | 
|---|
 | 2054 | +			rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],  | 
|---|
 | 2055 | +			rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||  | 
|---|
 | 2056 | +			!READ_ONCE(my_rdp->nocb_gp_sleep));  | 
|---|
 | 2057 | +		trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));  | 
|---|
 | 2058 | +	}  | 
|---|
 | 2059 | +	if (!rcu_nocb_poll) {  | 
|---|
 | 2060 | +		raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);  | 
|---|
 | 2061 | +		if (bypass)  | 
|---|
 | 2062 | +			del_timer(&my_rdp->nocb_bypass_timer);  | 
|---|
 | 2063 | +		WRITE_ONCE(my_rdp->nocb_gp_sleep, true);  | 
|---|
 | 2064 | +		raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);  | 
|---|
 | 2065 | +	}  | 
|---|
 | 2066 | +	my_rdp->nocb_gp_seq = -1;  | 
|---|
 | 2067 | +	WARN_ON(signal_pending(current));  | 
|---|
| 2111 | 2068 |  } | 
|---|
| 2112 | 2069 |   | 
|---|
| 2113 | 2070 |  /* | 
|---|
| 2114 |  | - * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is  | 
|---|
| 2115 |  | - * not a no-CBs CPU.  | 
|---|
 | 2071 | + * No-CBs grace-period-wait kthread.  There is one of these per group  | 
|---|
 | 2072 | + * of CPUs, but only once at least one CPU in that group has come online  | 
|---|
 | 2073 | + * at least once since boot.  This kthread checks for newly posted  | 
|---|
 | 2074 | + * callbacks from any of the CPUs it is responsible for, waits for a  | 
|---|
 | 2075 | + * grace period, then awakens all of the rcu_nocb_cb_kthread() instances  | 
|---|
 | 2076 | + * that then have callback-invocation work to do.  | 
|---|
| 2116 | 2077 |   */ | 
|---|
| 2117 |  | -static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,  | 
|---|
| 2118 |  | -						     struct rcu_data *rdp,  | 
|---|
| 2119 |  | -						     unsigned long flags)  | 
|---|
 | 2078 | +static int rcu_nocb_gp_kthread(void *arg)  | 
|---|
| 2120 | 2079 |  { | 
|---|
| 2121 |  | -	lockdep_assert_irqs_disabled();  | 
|---|
| 2122 |  | -	if (!rcu_is_nocb_cpu(smp_processor_id()))  | 
|---|
| 2123 |  | -		return false; /* Not NOCBs CPU, caller must migrate CBs. */  | 
|---|
| 2124 |  | -	__call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist),  | 
|---|
| 2125 |  | -				rcu_segcblist_tail(&rdp->cblist),  | 
|---|
| 2126 |  | -				rcu_segcblist_n_cbs(&rdp->cblist),  | 
|---|
| 2127 |  | -				rcu_segcblist_n_lazy_cbs(&rdp->cblist), flags);  | 
|---|
| 2128 |  | -	rcu_segcblist_init(&rdp->cblist);  | 
|---|
| 2129 |  | -	rcu_segcblist_disable(&rdp->cblist);  | 
|---|
| 2130 |  | -	return true;  | 
|---|
 | 2080 | +	struct rcu_data *rdp = arg;  | 
|---|
 | 2081 | +  | 
|---|
 | 2082 | +	for (;;) {  | 
|---|
 | 2083 | +		WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);  | 
|---|
 | 2084 | +		nocb_gp_wait(rdp);  | 
|---|
 | 2085 | +		cond_resched_tasks_rcu_qs();  | 
|---|
 | 2086 | +	}  | 
|---|
 | 2087 | +	return 0;  | 
|---|
| 2131 | 2088 |  } | 
|---|
| 2132 | 2089 |   | 
|---|
| 2133 | 2090 |  /* | 
|---|
| 2134 |  | - * If necessary, kick off a new grace period, and either way wait  | 
|---|
| 2135 |  | - * for a subsequent grace period to complete.  | 
|---|
 | 2091 | + * Invoke any ready callbacks from the corresponding no-CBs CPU,  | 
|---|
 | 2092 | + * then, if there are no more, wait for more to appear.  | 
|---|
| 2136 | 2093 |   */ | 
|---|
| 2137 |  | -static void rcu_nocb_wait_gp(struct rcu_data *rdp)  | 
|---|
 | 2094 | +static void nocb_cb_wait(struct rcu_data *rdp)  | 
|---|
| 2138 | 2095 |  { | 
|---|
| 2139 |  | -	unsigned long c;  | 
|---|
| 2140 |  | -	bool d;  | 
|---|
 | 2096 | +	unsigned long cur_gp_seq;  | 
|---|
| 2141 | 2097 |  	unsigned long flags; | 
|---|
| 2142 |  | -	bool needwake;  | 
|---|
 | 2098 | +	bool needwake_gp = false;  | 
|---|
| 2143 | 2099 |  	struct rcu_node *rnp = rdp->mynode; | 
|---|
| 2144 | 2100 |   | 
|---|
| 2145 | 2101 |  	local_irq_save(flags); | 
|---|
| 2146 |  | -	c = rcu_seq_snap(&rdp->rsp->gp_seq);  | 
|---|
| 2147 |  | -	if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {  | 
|---|
| 2148 |  | -		local_irq_restore(flags);  | 
|---|
| 2149 |  | -	} else {  | 
|---|
| 2150 |  | -		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */  | 
|---|
| 2151 |  | -		needwake = rcu_start_this_gp(rnp, rdp, c);  | 
|---|
| 2152 |  | -		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  | 
|---|
| 2153 |  | -		if (needwake)  | 
|---|
| 2154 |  | -			rcu_gp_kthread_wake(rdp->rsp);  | 
|---|
 | 2102 | +	rcu_momentary_dyntick_idle();  | 
|---|
 | 2103 | +	local_irq_restore(flags);  | 
|---|
 | 2104 | +	local_bh_disable();  | 
|---|
 | 2105 | +	rcu_do_batch(rdp);  | 
|---|
 | 2106 | +	local_bh_enable();  | 
|---|
 | 2107 | +	lockdep_assert_irqs_enabled();  | 
|---|
 | 2108 | +	rcu_nocb_lock_irqsave(rdp, flags);  | 
|---|
 | 2109 | +	if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&  | 
|---|
 | 2110 | +	    rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&  | 
|---|
 | 2111 | +	    raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */  | 
|---|
 | 2112 | +		needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);  | 
|---|
 | 2113 | +		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */  | 
|---|
 | 2114 | +	}  | 
|---|
 | 2115 | +	if (rcu_segcblist_ready_cbs(&rdp->cblist)) {  | 
|---|
 | 2116 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
 | 2117 | +		if (needwake_gp)  | 
|---|
 | 2118 | +			rcu_gp_kthread_wake();  | 
|---|
 | 2119 | +		return;  | 
|---|
| 2155 | 2120 |  	} | 
|---|
| 2156 | 2121 |   | 
|---|
| 2157 |  | -	/*  | 
|---|
| 2158 |  | -	 * Wait for the grace period.  Do so interruptibly to avoid messing  | 
|---|
| 2159 |  | -	 * up the load average.  | 
|---|
| 2160 |  | -	 */  | 
|---|
| 2161 |  | -	trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));  | 
|---|
| 2162 |  | -	for (;;) {  | 
|---|
| 2163 |  | -		swait_event_interruptible_exclusive(  | 
|---|
| 2164 |  | -			rnp->nocb_gp_wq[rcu_seq_ctr(c) & 0x1],  | 
|---|
| 2165 |  | -			(d = rcu_seq_done(&rnp->gp_seq, c)));  | 
|---|
| 2166 |  | -		if (likely(d))  | 
|---|
| 2167 |  | -			break;  | 
|---|
| 2168 |  | -		WARN_ON(signal_pending(current));  | 
|---|
| 2169 |  | -		trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));  | 
|---|
 | 2122 | +	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));  | 
|---|
 | 2123 | +	WRITE_ONCE(rdp->nocb_cb_sleep, true);  | 
|---|
 | 2124 | +	rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
 | 2125 | +	if (needwake_gp)  | 
|---|
 | 2126 | +		rcu_gp_kthread_wake();  | 
|---|
 | 2127 | +	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,  | 
|---|
 | 2128 | +				 !READ_ONCE(rdp->nocb_cb_sleep));  | 
|---|
 | 2129 | +	if (!smp_load_acquire(&rdp->nocb_cb_sleep)) { /* VVV */  | 
|---|
 | 2130 | +		/* ^^^ Ensure CB invocation follows _sleep test. */  | 
|---|
 | 2131 | +		return;  | 
|---|
| 2170 | 2132 |  	} | 
|---|
| 2171 |  | -	trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));  | 
|---|
| 2172 |  | -	smp_mb(); /* Ensure that CB invocation happens after GP end. */  | 
|---|
 | 2133 | +	WARN_ON(signal_pending(current));  | 
|---|
 | 2134 | +	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));  | 
|---|
| 2173 | 2135 |  } | 
|---|
| 2174 | 2136 |   | 
|---|
| 2175 | 2137 |  /* | 
|---|
| 2176 |  | - * Leaders come here to wait for additional callbacks to show up.  | 
|---|
| 2177 |  | - * This function does not return until callbacks appear.  | 
|---|
 | 2138 | + * Per-rcu_data kthread, but only for no-CBs CPUs.  Repeatedly invoke  | 
|---|
 | 2139 | + * nocb_cb_wait() to do the dirty work.  | 
|---|
| 2178 | 2140 |   */ | 
|---|
| 2179 |  | -static void nocb_leader_wait(struct rcu_data *my_rdp)  | 
|---|
 | 2141 | +static int rcu_nocb_cb_kthread(void *arg)  | 
|---|
| 2180 | 2142 |  { | 
|---|
| 2181 |  | -	bool firsttime = true;  | 
|---|
| 2182 |  | -	unsigned long flags;  | 
|---|
| 2183 |  | -	bool gotcbs;  | 
|---|
| 2184 |  | -	struct rcu_data *rdp;  | 
|---|
| 2185 |  | -	struct rcu_head **tail;  | 
|---|
| 2186 |  | -  | 
|---|
| 2187 |  | -wait_again:  | 
|---|
| 2188 |  | -  | 
|---|
| 2189 |  | -	/* Wait for callbacks to appear. */  | 
|---|
| 2190 |  | -	if (!rcu_nocb_poll) {  | 
|---|
| 2191 |  | -		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));  | 
|---|
| 2192 |  | -		swait_event_interruptible_exclusive(my_rdp->nocb_wq,  | 
|---|
| 2193 |  | -				!READ_ONCE(my_rdp->nocb_leader_sleep));  | 
|---|
| 2194 |  | -		raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);  | 
|---|
| 2195 |  | -		my_rdp->nocb_leader_sleep = true;  | 
|---|
| 2196 |  | -		WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);  | 
|---|
| 2197 |  | -		del_timer(&my_rdp->nocb_timer);  | 
|---|
| 2198 |  | -		raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);  | 
|---|
| 2199 |  | -	} else if (firsttime) {  | 
|---|
| 2200 |  | -		firsttime = false; /* Don't drown trace log with "Poll"! */  | 
|---|
| 2201 |  | -		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Poll"));  | 
|---|
| 2202 |  | -	}  | 
|---|
| 2203 |  | -  | 
|---|
| 2204 |  | -	/*  | 
|---|
| 2205 |  | -	 * Each pass through the following loop checks a follower for CBs.  | 
|---|
| 2206 |  | -	 * We are our own first follower.  Any CBs found are moved to  | 
|---|
| 2207 |  | -	 * nocb_gp_head, where they await a grace period.  | 
|---|
| 2208 |  | -	 */  | 
|---|
| 2209 |  | -	gotcbs = false;  | 
|---|
| 2210 |  | -	smp_mb(); /* wakeup and _sleep before ->nocb_head reads. */  | 
|---|
| 2211 |  | -	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {  | 
|---|
| 2212 |  | -		rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);  | 
|---|
| 2213 |  | -		if (!rdp->nocb_gp_head)  | 
|---|
| 2214 |  | -			continue;  /* No CBs here, try next follower. */  | 
|---|
| 2215 |  | -  | 
|---|
| 2216 |  | -		/* Move callbacks to wait-for-GP list, which is empty. */  | 
|---|
| 2217 |  | -		WRITE_ONCE(rdp->nocb_head, NULL);  | 
|---|
| 2218 |  | -		rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);  | 
|---|
| 2219 |  | -		gotcbs = true;  | 
|---|
| 2220 |  | -	}  | 
|---|
| 2221 |  | -  | 
|---|
| 2222 |  | -	/* No callbacks?  Sleep a bit if polling, and go retry.  */  | 
|---|
| 2223 |  | -	if (unlikely(!gotcbs)) {  | 
|---|
| 2224 |  | -		WARN_ON(signal_pending(current));  | 
|---|
| 2225 |  | -		if (rcu_nocb_poll) {  | 
|---|
| 2226 |  | -			schedule_timeout_interruptible(1);  | 
|---|
| 2227 |  | -		} else {  | 
|---|
| 2228 |  | -			trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,  | 
|---|
| 2229 |  | -					    TPS("WokeEmpty"));  | 
|---|
| 2230 |  | -		}  | 
|---|
| 2231 |  | -		goto wait_again;  | 
|---|
| 2232 |  | -	}  | 
|---|
| 2233 |  | -  | 
|---|
| 2234 |  | -	/* Wait for one grace period. */  | 
|---|
| 2235 |  | -	rcu_nocb_wait_gp(my_rdp);  | 
|---|
| 2236 |  | -  | 
|---|
| 2237 |  | -	/* Each pass through the following loop wakes a follower, if needed. */  | 
|---|
| 2238 |  | -	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {  | 
|---|
| 2239 |  | -		if (!rcu_nocb_poll &&  | 
|---|
| 2240 |  | -		    READ_ONCE(rdp->nocb_head) &&  | 
|---|
| 2241 |  | -		    READ_ONCE(my_rdp->nocb_leader_sleep)) {  | 
|---|
| 2242 |  | -			raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);  | 
|---|
| 2243 |  | -			my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/  | 
|---|
| 2244 |  | -			raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);  | 
|---|
| 2245 |  | -		}  | 
|---|
| 2246 |  | -		if (!rdp->nocb_gp_head)  | 
|---|
| 2247 |  | -			continue; /* No CBs, so no need to wake follower. */  | 
|---|
| 2248 |  | -  | 
|---|
| 2249 |  | -		/* Append callbacks to follower's "done" list. */  | 
|---|
| 2250 |  | -		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);  | 
|---|
| 2251 |  | -		tail = rdp->nocb_follower_tail;  | 
|---|
| 2252 |  | -		rdp->nocb_follower_tail = rdp->nocb_gp_tail;  | 
|---|
| 2253 |  | -		*tail = rdp->nocb_gp_head;  | 
|---|
| 2254 |  | -		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
| 2255 |  | -		if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {  | 
|---|
| 2256 |  | -			/* List was empty, so wake up the follower.  */  | 
|---|
| 2257 |  | -			swake_up_one(&rdp->nocb_wq);  | 
|---|
| 2258 |  | -		}  | 
|---|
| 2259 |  | -	}  | 
|---|
| 2260 |  | -  | 
|---|
| 2261 |  | -	/* If we (the leader) don't have CBs, go wait some more. */  | 
|---|
| 2262 |  | -	if (!my_rdp->nocb_follower_head)  | 
|---|
| 2263 |  | -		goto wait_again;  | 
|---|
| 2264 |  | -}  | 
|---|
| 2265 |  | -  | 
|---|
| 2266 |  | -/*  | 
|---|
| 2267 |  | - * Followers come here to wait for additional callbacks to show up.  | 
|---|
| 2268 |  | - * This function does not return until callbacks appear.  | 
|---|
| 2269 |  | - */  | 
|---|
| 2270 |  | -static void nocb_follower_wait(struct rcu_data *rdp)  | 
|---|
| 2271 |  | -{  | 
|---|
| 2272 |  | -	for (;;) {  | 
|---|
| 2273 |  | -		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));  | 
|---|
| 2274 |  | -		swait_event_interruptible_exclusive(rdp->nocb_wq,  | 
|---|
| 2275 |  | -					 READ_ONCE(rdp->nocb_follower_head));  | 
|---|
| 2276 |  | -		if (smp_load_acquire(&rdp->nocb_follower_head)) {  | 
|---|
| 2277 |  | -			/* ^^^ Ensure CB invocation follows _head test. */  | 
|---|
| 2278 |  | -			return;  | 
|---|
| 2279 |  | -		}  | 
|---|
| 2280 |  | -		WARN_ON(signal_pending(current));  | 
|---|
| 2281 |  | -		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeEmpty"));  | 
|---|
| 2282 |  | -	}  | 
|---|
| 2283 |  | -}  | 
|---|
| 2284 |  | -  | 
|---|
| 2285 |  | -/*  | 
|---|
| 2286 |  | - * Per-rcu_data kthread, but only for no-CBs CPUs.  Each kthread invokes  | 
|---|
| 2287 |  | - * callbacks queued by the corresponding no-CBs CPU, however, there is  | 
|---|
| 2288 |  | - * an optional leader-follower relationship so that the grace-period  | 
|---|
| 2289 |  | - * kthreads don't have to do quite so many wakeups.  | 
|---|
| 2290 |  | - */  | 
|---|
| 2291 |  | -static int rcu_nocb_kthread(void *arg)  | 
|---|
| 2292 |  | -{  | 
|---|
| 2293 |  | -	int c, cl;  | 
|---|
| 2294 |  | -	unsigned long flags;  | 
|---|
| 2295 |  | -	struct rcu_head *list;  | 
|---|
| 2296 |  | -	struct rcu_head *next;  | 
|---|
| 2297 |  | -	struct rcu_head **tail;  | 
|---|
| 2298 | 2143 |  	struct rcu_data *rdp = arg; | 
|---|
| 2299 | 2144 |   | 
|---|
| 2300 |  | -	/* Each pass through this loop invokes one batch of callbacks */  | 
|---|
 | 2145 | +	// Each pass through this loop does one callback batch, and,  | 
|---|
 | 2146 | +	// if there are no more ready callbacks, waits for them.  | 
|---|
| 2301 | 2147 |  	for (;;) { | 
|---|
| 2302 |  | -		/* Wait for callbacks. */  | 
|---|
| 2303 |  | -		if (rdp->nocb_leader == rdp)  | 
|---|
| 2304 |  | -			nocb_leader_wait(rdp);  | 
|---|
| 2305 |  | -		else  | 
|---|
| 2306 |  | -			nocb_follower_wait(rdp);  | 
|---|
| 2307 |  | -  | 
|---|
| 2308 |  | -		/* Pull the ready-to-invoke callbacks onto local list. */  | 
|---|
| 2309 |  | -		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);  | 
|---|
| 2310 |  | -		list = rdp->nocb_follower_head;  | 
|---|
| 2311 |  | -		rdp->nocb_follower_head = NULL;  | 
|---|
| 2312 |  | -		tail = rdp->nocb_follower_tail;  | 
|---|
| 2313 |  | -		rdp->nocb_follower_tail = &rdp->nocb_follower_head;  | 
|---|
| 2314 |  | -		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
| 2315 |  | -		BUG_ON(!list);  | 
|---|
| 2316 |  | -		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeNonEmpty"));  | 
|---|
| 2317 |  | -  | 
|---|
| 2318 |  | -		/* Each pass through the following loop invokes a callback. */  | 
|---|
| 2319 |  | -		trace_rcu_batch_start(rdp->rsp->name,  | 
|---|
| 2320 |  | -				      atomic_long_read(&rdp->nocb_q_count_lazy),  | 
|---|
| 2321 |  | -				      atomic_long_read(&rdp->nocb_q_count), -1);  | 
|---|
| 2322 |  | -		c = cl = 0;  | 
|---|
| 2323 |  | -		while (list) {  | 
|---|
| 2324 |  | -			next = list->next;  | 
|---|
| 2325 |  | -			/* Wait for enqueuing to complete, if needed. */  | 
|---|
| 2326 |  | -			while (next == NULL && &list->next != tail) {  | 
|---|
| 2327 |  | -				trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,  | 
|---|
| 2328 |  | -						    TPS("WaitQueue"));  | 
|---|
| 2329 |  | -				schedule_timeout_interruptible(1);  | 
|---|
| 2330 |  | -				trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,  | 
|---|
| 2331 |  | -						    TPS("WokeQueue"));  | 
|---|
| 2332 |  | -				next = list->next;  | 
|---|
| 2333 |  | -			}  | 
|---|
| 2334 |  | -			debug_rcu_head_unqueue(list);  | 
|---|
| 2335 |  | -			local_bh_disable();  | 
|---|
| 2336 |  | -			if (__rcu_reclaim(rdp->rsp->name, list))  | 
|---|
| 2337 |  | -				cl++;  | 
|---|
| 2338 |  | -			c++;  | 
|---|
| 2339 |  | -			local_bh_enable();  | 
|---|
| 2340 |  | -			cond_resched_tasks_rcu_qs();  | 
|---|
| 2341 |  | -			list = next;  | 
|---|
| 2342 |  | -		}  | 
|---|
| 2343 |  | -		trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);  | 
|---|
| 2344 |  | -		smp_mb__before_atomic();  /* _add after CB invocation. */  | 
|---|
| 2345 |  | -		atomic_long_add(-c, &rdp->nocb_q_count);  | 
|---|
| 2346 |  | -		atomic_long_add(-cl, &rdp->nocb_q_count_lazy);  | 
|---|
 | 2148 | +		nocb_cb_wait(rdp);  | 
|---|
 | 2149 | +		cond_resched_tasks_rcu_qs();  | 
|---|
| 2347 | 2150 |  	} | 
|---|
| 2348 | 2151 |  	return 0; | 
|---|
| 2349 | 2152 |  } | 
|---|
| .. | .. | 
|---|
| 2360 | 2163 |  	unsigned long flags; | 
|---|
| 2361 | 2164 |  	int ndw; | 
|---|
| 2362 | 2165 |   | 
|---|
| 2363 |  | -	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);  | 
|---|
 | 2166 | +	rcu_nocb_lock_irqsave(rdp, flags);  | 
|---|
| 2364 | 2167 |  	if (!rcu_nocb_need_deferred_wakeup(rdp)) { | 
|---|
| 2365 |  | -		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);  | 
|---|
 | 2168 | +		rcu_nocb_unlock_irqrestore(rdp, flags);  | 
|---|
| 2366 | 2169 |  		return; | 
|---|
| 2367 | 2170 |  	} | 
|---|
| 2368 | 2171 |  	ndw = READ_ONCE(rdp->nocb_defer_wakeup); | 
|---|
| 2369 |  | -	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);  | 
|---|
| 2370 |  | -	__wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);  | 
|---|
| 2371 |  | -	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));  | 
|---|
 | 2172 | +	wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);  | 
|---|
 | 2173 | +	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));  | 
|---|
| 2372 | 2174 |  } | 
|---|
| 2373 | 2175 |   | 
|---|
| 2374 | 2176 |  /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ | 
|---|
| .. | .. | 
|---|
| 2390 | 2192 |  		do_nocb_deferred_wakeup_common(rdp); | 
|---|
| 2391 | 2193 |  } | 
|---|
| 2392 | 2194 |   | 
|---|
 | 2195 | +void rcu_nocb_flush_deferred_wakeup(void)  | 
|---|
 | 2196 | +{  | 
|---|
 | 2197 | +	do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));  | 
|---|
 | 2198 | +}  | 
|---|
 | 2199 | +  | 
|---|
| 2393 | 2200 |  void __init rcu_init_nohz(void) | 
|---|
| 2394 | 2201 |  { | 
|---|
| 2395 | 2202 |  	int cpu; | 
|---|
| 2396 | 2203 |  	bool need_rcu_nocb_mask = false; | 
|---|
| 2397 |  | -	struct rcu_state *rsp;  | 
|---|
 | 2204 | +	struct rcu_data *rdp;  | 
|---|
| 2398 | 2205 |   | 
|---|
| 2399 | 2206 |  #if defined(CONFIG_NO_HZ_FULL) | 
|---|
| 2400 | 2207 |  	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) | 
|---|
| .. | .. | 
|---|
| 2428 | 2235 |  	if (rcu_nocb_poll) | 
|---|
| 2429 | 2236 |  		pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); | 
|---|
| 2430 | 2237 |   | 
|---|
| 2431 |  | -	for_each_rcu_flavor(rsp) {  | 
|---|
| 2432 |  | -		for_each_cpu(cpu, rcu_nocb_mask)  | 
|---|
| 2433 |  | -			init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu));  | 
|---|
| 2434 |  | -		rcu_organize_nocb_kthreads(rsp);  | 
|---|
 | 2238 | +	for_each_cpu(cpu, rcu_nocb_mask) {  | 
|---|
 | 2239 | +		rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 2240 | +		if (rcu_segcblist_empty(&rdp->cblist))  | 
|---|
 | 2241 | +			rcu_segcblist_init(&rdp->cblist);  | 
|---|
 | 2242 | +		rcu_segcblist_offload(&rdp->cblist);  | 
|---|
| 2435 | 2243 |  	} | 
|---|
 | 2244 | +	rcu_organize_nocb_kthreads();  | 
|---|
| 2436 | 2245 |  } | 
|---|
| 2437 | 2246 |   | 
|---|
| 2438 | 2247 |  /* Initialize per-rcu_data variables for no-CBs CPUs. */ | 
|---|
| 2439 | 2248 |  static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | 
|---|
| 2440 | 2249 |  { | 
|---|
| 2441 |  | -	rdp->nocb_tail = &rdp->nocb_head;  | 
|---|
| 2442 |  | -	init_swait_queue_head(&rdp->nocb_wq);  | 
|---|
| 2443 |  | -	rdp->nocb_follower_tail = &rdp->nocb_follower_head;  | 
|---|
 | 2250 | +	init_swait_queue_head(&rdp->nocb_cb_wq);  | 
|---|
 | 2251 | +	init_swait_queue_head(&rdp->nocb_gp_wq);  | 
|---|
| 2444 | 2252 |  	raw_spin_lock_init(&rdp->nocb_lock); | 
|---|
 | 2253 | +	raw_spin_lock_init(&rdp->nocb_bypass_lock);  | 
|---|
 | 2254 | +	raw_spin_lock_init(&rdp->nocb_gp_lock);  | 
|---|
| 2445 | 2255 |  	timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); | 
|---|
 | 2256 | +	timer_setup(&rdp->nocb_bypass_timer, do_nocb_bypass_wakeup_timer, 0);  | 
|---|
 | 2257 | +	rcu_cblist_init(&rdp->nocb_bypass);  | 
|---|
| 2446 | 2258 |  } | 
|---|
| 2447 | 2259 |   | 
|---|
| 2448 | 2260 |  /* | 
|---|
| 2449 | 2261 |   * If the specified CPU is a no-CBs CPU that does not already have its | 
|---|
| 2450 |  | - * rcuo kthread for the specified RCU flavor, spawn it.  If the CPUs are  | 
|---|
| 2451 |  | - * brought online out of order, this can require re-organizing the  | 
|---|
| 2452 |  | - * leader-follower relationships.  | 
|---|
 | 2262 | + * rcuo CB kthread, spawn it.  Additionally, if the rcuo GP kthread  | 
|---|
 | 2263 | + * for this CPU's group has not yet been created, spawn it as well.  | 
|---|
| 2453 | 2264 |   */ | 
|---|
| 2454 |  | -static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)  | 
|---|
 | 2265 | +static void rcu_spawn_one_nocb_kthread(int cpu)  | 
|---|
| 2455 | 2266 |  { | 
|---|
| 2456 |  | -	struct rcu_data *rdp;  | 
|---|
| 2457 |  | -	struct rcu_data *rdp_last;  | 
|---|
| 2458 |  | -	struct rcu_data *rdp_old_leader;  | 
|---|
| 2459 |  | -	struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
 | 2267 | +	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
 | 2268 | +	struct rcu_data *rdp_gp;  | 
|---|
| 2460 | 2269 |  	struct task_struct *t; | 
|---|
| 2461 | 2270 |   | 
|---|
| 2462 | 2271 |  	/* | 
|---|
| 2463 | 2272 |  	 * If this isn't a no-CBs CPU or if it already has an rcuo kthread, | 
|---|
| 2464 | 2273 |  	 * then nothing to do. | 
|---|
| 2465 | 2274 |  	 */ | 
|---|
| 2466 |  | -	if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread)  | 
|---|
 | 2275 | +	if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)  | 
|---|
| 2467 | 2276 |  		return; | 
|---|
| 2468 | 2277 |   | 
|---|
| 2469 |  | -	/* If we didn't spawn the leader first, reorganize! */  | 
|---|
| 2470 |  | -	rdp_old_leader = rdp_spawn->nocb_leader;  | 
|---|
| 2471 |  | -	if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) {  | 
|---|
| 2472 |  | -		rdp_last = NULL;  | 
|---|
| 2473 |  | -		rdp = rdp_old_leader;  | 
|---|
| 2474 |  | -		do {  | 
|---|
| 2475 |  | -			rdp->nocb_leader = rdp_spawn;  | 
|---|
| 2476 |  | -			if (rdp_last && rdp != rdp_spawn)  | 
|---|
| 2477 |  | -				rdp_last->nocb_next_follower = rdp;  | 
|---|
| 2478 |  | -			if (rdp == rdp_spawn) {  | 
|---|
| 2479 |  | -				rdp = rdp->nocb_next_follower;  | 
|---|
| 2480 |  | -			} else {  | 
|---|
| 2481 |  | -				rdp_last = rdp;  | 
|---|
| 2482 |  | -				rdp = rdp->nocb_next_follower;  | 
|---|
| 2483 |  | -				rdp_last->nocb_next_follower = NULL;  | 
|---|
| 2484 |  | -			}  | 
|---|
| 2485 |  | -		} while (rdp);  | 
|---|
| 2486 |  | -		rdp_spawn->nocb_next_follower = rdp_old_leader;  | 
|---|
 | 2278 | +	/* If we didn't spawn the GP kthread first, reorganize! */  | 
|---|
 | 2279 | +	rdp_gp = rdp->nocb_gp_rdp;  | 
|---|
 | 2280 | +	if (!rdp_gp->nocb_gp_kthread) {  | 
|---|
 | 2281 | +		t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,  | 
|---|
 | 2282 | +				"rcuog/%d", rdp_gp->cpu);  | 
|---|
 | 2283 | +		if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))  | 
|---|
 | 2284 | +			return;  | 
|---|
 | 2285 | +		WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);  | 
|---|
| 2487 | 2286 |  	} | 
|---|
| 2488 | 2287 |   | 
|---|
| 2489 |  | -	/* Spawn the kthread for this CPU and RCU flavor. */  | 
|---|
| 2490 |  | -	t = kthread_run(rcu_nocb_kthread, rdp_spawn,  | 
|---|
| 2491 |  | -			"rcuo%c/%d", rsp->abbr, cpu);  | 
|---|
| 2492 |  | -	BUG_ON(IS_ERR(t));  | 
|---|
| 2493 |  | -	WRITE_ONCE(rdp_spawn->nocb_kthread, t);  | 
|---|
 | 2288 | +	/* Spawn the kthread for this CPU. */  | 
|---|
 | 2289 | +	t = kthread_run(rcu_nocb_cb_kthread, rdp,  | 
|---|
 | 2290 | +			"rcuo%c/%d", rcu_state.abbr, cpu);  | 
|---|
 | 2291 | +	if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))  | 
|---|
 | 2292 | +		return;  | 
|---|
 | 2293 | +	WRITE_ONCE(rdp->nocb_cb_kthread, t);  | 
|---|
 | 2294 | +	WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);  | 
|---|
| 2494 | 2295 |  } | 
|---|
| 2495 | 2296 |   | 
|---|
| 2496 | 2297 |  /* | 
|---|
| 2497 | 2298 |   * If the specified CPU is a no-CBs CPU that does not already have its | 
|---|
| 2498 |  | - * rcuo kthreads, spawn them.  | 
|---|
 | 2299 | + * rcuo kthread, spawn it.  | 
|---|
| 2499 | 2300 |   */ | 
|---|
| 2500 |  | -static void rcu_spawn_all_nocb_kthreads(int cpu)  | 
|---|
 | 2301 | +static void rcu_spawn_cpu_nocb_kthread(int cpu)  | 
|---|
| 2501 | 2302 |  { | 
|---|
| 2502 |  | -	struct rcu_state *rsp;  | 
|---|
| 2503 |  | -  | 
|---|
| 2504 | 2303 |  	if (rcu_scheduler_fully_active) | 
|---|
| 2505 |  | -		for_each_rcu_flavor(rsp)  | 
|---|
| 2506 |  | -			rcu_spawn_one_nocb_kthread(rsp, cpu);  | 
|---|
 | 2304 | +		rcu_spawn_one_nocb_kthread(cpu);  | 
|---|
| 2507 | 2305 |  } | 
|---|
| 2508 | 2306 |   | 
|---|
| 2509 | 2307 |  /* | 
|---|
| .. | .. | 
|---|
| 2517 | 2315 |  	int cpu; | 
|---|
| 2518 | 2316 |   | 
|---|
| 2519 | 2317 |  	for_each_online_cpu(cpu) | 
|---|
| 2520 |  | -		rcu_spawn_all_nocb_kthreads(cpu);  | 
|---|
 | 2318 | +		rcu_spawn_cpu_nocb_kthread(cpu);  | 
|---|
| 2521 | 2319 |  } | 
|---|
| 2522 | 2320 |   | 
|---|
| 2523 |  | -/* How many follower CPU IDs per leader?  Default of -1 for sqrt(nr_cpu_ids). */  | 
|---|
| 2524 |  | -static int rcu_nocb_leader_stride = -1;  | 
|---|
| 2525 |  | -module_param(rcu_nocb_leader_stride, int, 0444);  | 
|---|
 | 2321 | +/* How many CB CPU IDs per GP kthread?  Default of -1 for sqrt(nr_cpu_ids). */  | 
|---|
 | 2322 | +static int rcu_nocb_gp_stride = -1;  | 
|---|
 | 2323 | +module_param(rcu_nocb_gp_stride, int, 0444);  | 
|---|
| 2526 | 2324 |   | 
|---|
| 2527 | 2325 |  /* | 
|---|
| 2528 |  | - * Initialize leader-follower relationships for all no-CBs CPU.  | 
|---|
 | 2326 | + * Initialize GP-CB relationships for all no-CBs CPU.  | 
|---|
| 2529 | 2327 |   */ | 
|---|
| 2530 |  | -static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)  | 
|---|
 | 2328 | +static void __init rcu_organize_nocb_kthreads(void)  | 
|---|
| 2531 | 2329 |  { | 
|---|
| 2532 | 2330 |  	int cpu; | 
|---|
| 2533 |  | -	int ls = rcu_nocb_leader_stride;  | 
|---|
| 2534 |  | -	int nl = 0;  /* Next leader. */  | 
|---|
 | 2331 | +	bool firsttime = true;  | 
|---|
 | 2332 | +	bool gotnocbs = false;  | 
|---|
 | 2333 | +	bool gotnocbscbs = true;  | 
|---|
 | 2334 | +	int ls = rcu_nocb_gp_stride;  | 
|---|
 | 2335 | +	int nl = 0;  /* Next GP kthread. */  | 
|---|
| 2535 | 2336 |  	struct rcu_data *rdp; | 
|---|
| 2536 |  | -	struct rcu_data *rdp_leader = NULL;  /* Suppress misguided gcc warn. */  | 
|---|
 | 2337 | +	struct rcu_data *rdp_gp = NULL;  /* Suppress misguided gcc warn. */  | 
|---|
| 2537 | 2338 |  	struct rcu_data *rdp_prev = NULL; | 
|---|
| 2538 | 2339 |   | 
|---|
| 2539 | 2340 |  	if (!cpumask_available(rcu_nocb_mask)) | 
|---|
| 2540 | 2341 |  		return; | 
|---|
| 2541 | 2342 |  	if (ls == -1) { | 
|---|
| 2542 |  | -		ls = int_sqrt(nr_cpu_ids);  | 
|---|
| 2543 |  | -		rcu_nocb_leader_stride = ls;  | 
|---|
 | 2343 | +		ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);  | 
|---|
 | 2344 | +		rcu_nocb_gp_stride = ls;  | 
|---|
| 2544 | 2345 |  	} | 
|---|
| 2545 | 2346 |   | 
|---|
| 2546 | 2347 |  	/* | 
|---|
| .. | .. | 
|---|
| 2549 | 2350 |  	 * we will spawn the needed set of rcu_nocb_kthread() kthreads. | 
|---|
| 2550 | 2351 |  	 */ | 
|---|
| 2551 | 2352 |  	for_each_cpu(cpu, rcu_nocb_mask) { | 
|---|
| 2552 |  | -		rdp = per_cpu_ptr(rsp->rda, cpu);  | 
|---|
 | 2353 | +		rdp = per_cpu_ptr(&rcu_data, cpu);  | 
|---|
| 2553 | 2354 |  		if (rdp->cpu >= nl) { | 
|---|
| 2554 |  | -			/* New leader, set up for followers & next leader. */  | 
|---|
 | 2355 | +			/* New GP kthread, set up for CBs & next GP. */  | 
|---|
 | 2356 | +			gotnocbs = true;  | 
|---|
| 2555 | 2357 |  			nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; | 
|---|
| 2556 |  | -			rdp->nocb_leader = rdp;  | 
|---|
| 2557 |  | -			rdp_leader = rdp;  | 
|---|
 | 2358 | +			rdp->nocb_gp_rdp = rdp;  | 
|---|
 | 2359 | +			rdp_gp = rdp;  | 
|---|
 | 2360 | +			if (dump_tree) {  | 
|---|
 | 2361 | +				if (!firsttime)  | 
|---|
 | 2362 | +					pr_cont("%s\n", gotnocbscbs  | 
|---|
 | 2363 | +							? "" : " (self only)");  | 
|---|
 | 2364 | +				gotnocbscbs = false;  | 
|---|
 | 2365 | +				firsttime = false;  | 
|---|
 | 2366 | +				pr_alert("%s: No-CB GP kthread CPU %d:",  | 
|---|
 | 2367 | +					 __func__, cpu);  | 
|---|
 | 2368 | +			}  | 
|---|
| 2558 | 2369 |  		} else { | 
|---|
| 2559 |  | -			/* Another follower, link to previous leader. */  | 
|---|
| 2560 |  | -			rdp->nocb_leader = rdp_leader;  | 
|---|
| 2561 |  | -			rdp_prev->nocb_next_follower = rdp;  | 
|---|
 | 2370 | +			/* Another CB kthread, link to previous GP kthread. */  | 
|---|
 | 2371 | +			gotnocbscbs = true;  | 
|---|
 | 2372 | +			rdp->nocb_gp_rdp = rdp_gp;  | 
|---|
 | 2373 | +			rdp_prev->nocb_next_cb_rdp = rdp;  | 
|---|
 | 2374 | +			if (dump_tree)  | 
|---|
 | 2375 | +				pr_cont(" %d", cpu);  | 
|---|
| 2562 | 2376 |  		} | 
|---|
| 2563 | 2377 |  		rdp_prev = rdp; | 
|---|
| 2564 | 2378 |  	} | 
|---|
 | 2379 | +	if (gotnocbs && dump_tree)  | 
|---|
 | 2380 | +		pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");  | 
|---|
| 2565 | 2381 |  } | 
|---|
| 2566 | 2382 |   | 
|---|
| 2567 |  | -/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */  | 
|---|
| 2568 |  | -static bool init_nocb_callback_list(struct rcu_data *rdp)  | 
|---|
 | 2383 | +/*  | 
|---|
 | 2384 | + * Bind the current task to the offloaded CPUs.  If there are no offloaded  | 
|---|
 | 2385 | + * CPUs, leave the task unbound.  Splat if the bind attempt fails.  | 
|---|
 | 2386 | + */  | 
|---|
 | 2387 | +void rcu_bind_current_to_nocb(void)  | 
|---|
| 2569 | 2388 |  { | 
|---|
| 2570 |  | -	if (!rcu_is_nocb_cpu(rdp->cpu))  | 
|---|
| 2571 |  | -		return false;  | 
|---|
 | 2389 | +	if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))  | 
|---|
 | 2390 | +		WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));  | 
|---|
 | 2391 | +}  | 
|---|
 | 2392 | +EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);  | 
|---|
| 2572 | 2393 |   | 
|---|
| 2573 |  | -	/* If there are early-boot callbacks, move them to nocb lists. */  | 
|---|
| 2574 |  | -	if (!rcu_segcblist_empty(&rdp->cblist)) {  | 
|---|
| 2575 |  | -		rdp->nocb_head = rcu_segcblist_head(&rdp->cblist);  | 
|---|
| 2576 |  | -		rdp->nocb_tail = rcu_segcblist_tail(&rdp->cblist);  | 
|---|
| 2577 |  | -		atomic_long_set(&rdp->nocb_q_count,  | 
|---|
| 2578 |  | -				rcu_segcblist_n_cbs(&rdp->cblist));  | 
|---|
| 2579 |  | -		atomic_long_set(&rdp->nocb_q_count_lazy,  | 
|---|
| 2580 |  | -				rcu_segcblist_n_lazy_cbs(&rdp->cblist));  | 
|---|
| 2581 |  | -		rcu_segcblist_init(&rdp->cblist);  | 
|---|
| 2582 |  | -	}  | 
|---|
| 2583 |  | -	rcu_segcblist_disable(&rdp->cblist);  | 
|---|
| 2584 |  | -	return true;  | 
|---|
 | 2394 | +/*  | 
|---|
 | 2395 | + * Dump out nocb grace-period kthread state for the specified rcu_data  | 
|---|
 | 2396 | + * structure.  | 
|---|
 | 2397 | + */  | 
|---|
 | 2398 | +static void show_rcu_nocb_gp_state(struct rcu_data *rdp)  | 
|---|
 | 2399 | +{  | 
|---|
 | 2400 | +	struct rcu_node *rnp = rdp->mynode;  | 
|---|
 | 2401 | +  | 
|---|
 | 2402 | +	pr_info("nocb GP %d %c%c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu\n",  | 
|---|
 | 2403 | +		rdp->cpu,  | 
|---|
 | 2404 | +		"kK"[!!rdp->nocb_gp_kthread],  | 
|---|
 | 2405 | +		"lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],  | 
|---|
 | 2406 | +		"dD"[!!rdp->nocb_defer_wakeup],  | 
|---|
 | 2407 | +		"tT"[timer_pending(&rdp->nocb_timer)],  | 
|---|
 | 2408 | +		"bB"[timer_pending(&rdp->nocb_bypass_timer)],  | 
|---|
 | 2409 | +		"sS"[!!rdp->nocb_gp_sleep],  | 
|---|
 | 2410 | +		".W"[swait_active(&rdp->nocb_gp_wq)],  | 
|---|
 | 2411 | +		".W"[swait_active(&rnp->nocb_gp_wq[0])],  | 
|---|
 | 2412 | +		".W"[swait_active(&rnp->nocb_gp_wq[1])],  | 
|---|
 | 2413 | +		".B"[!!rdp->nocb_gp_bypass],  | 
|---|
 | 2414 | +		".G"[!!rdp->nocb_gp_gp],  | 
|---|
 | 2415 | +		(long)rdp->nocb_gp_seq,  | 
|---|
 | 2416 | +		rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops));  | 
|---|
 | 2417 | +}  | 
|---|
 | 2418 | +  | 
|---|
 | 2419 | +/* Dump out nocb kthread state for the specified rcu_data structure. */  | 
|---|
 | 2420 | +static void show_rcu_nocb_state(struct rcu_data *rdp)  | 
|---|
 | 2421 | +{  | 
|---|
 | 2422 | +	struct rcu_segcblist *rsclp = &rdp->cblist;  | 
|---|
 | 2423 | +	bool waslocked;  | 
|---|
 | 2424 | +	bool wastimer;  | 
|---|
 | 2425 | +	bool wassleep;  | 
|---|
 | 2426 | +  | 
|---|
 | 2427 | +	if (rdp->nocb_gp_rdp == rdp)  | 
|---|
 | 2428 | +		show_rcu_nocb_gp_state(rdp);  | 
|---|
 | 2429 | +  | 
|---|
 | 2430 | +	pr_info("   CB %d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%c%c%c q%ld\n",  | 
|---|
 | 2431 | +		rdp->cpu, rdp->nocb_gp_rdp->cpu,  | 
|---|
 | 2432 | +		"kK"[!!rdp->nocb_cb_kthread],  | 
|---|
 | 2433 | +		"bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],  | 
|---|
 | 2434 | +		"cC"[!!atomic_read(&rdp->nocb_lock_contended)],  | 
|---|
 | 2435 | +		"lL"[raw_spin_is_locked(&rdp->nocb_lock)],  | 
|---|
 | 2436 | +		"sS"[!!rdp->nocb_cb_sleep],  | 
|---|
 | 2437 | +		".W"[swait_active(&rdp->nocb_cb_wq)],  | 
|---|
 | 2438 | +		jiffies - rdp->nocb_bypass_first,  | 
|---|
 | 2439 | +		jiffies - rdp->nocb_nobypass_last,  | 
|---|
 | 2440 | +		rdp->nocb_nobypass_count,  | 
|---|
 | 2441 | +		".D"[rcu_segcblist_ready_cbs(rsclp)],  | 
|---|
 | 2442 | +		".W"[!rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)],  | 
|---|
 | 2443 | +		".R"[!rcu_segcblist_restempty(rsclp, RCU_WAIT_TAIL)],  | 
|---|
 | 2444 | +		".N"[!rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL)],  | 
|---|
 | 2445 | +		".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],  | 
|---|
 | 2446 | +		rcu_segcblist_n_cbs(&rdp->cblist));  | 
|---|
 | 2447 | +  | 
|---|
 | 2448 | +	/* It is OK for GP kthreads to have GP state. */  | 
|---|
 | 2449 | +	if (rdp->nocb_gp_rdp == rdp)  | 
|---|
 | 2450 | +		return;  | 
|---|
 | 2451 | +  | 
|---|
 | 2452 | +	waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);  | 
|---|
 | 2453 | +	wastimer = timer_pending(&rdp->nocb_bypass_timer);  | 
|---|
 | 2454 | +	wassleep = swait_active(&rdp->nocb_gp_wq);  | 
|---|
 | 2455 | +	if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)  | 
|---|
 | 2456 | +		return;  /* Nothing untowards. */  | 
|---|
 | 2457 | +  | 
|---|
 | 2458 | +	pr_info("   nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",  | 
|---|
 | 2459 | +		"lL"[waslocked],  | 
|---|
 | 2460 | +		"dD"[!!rdp->nocb_defer_wakeup],  | 
|---|
 | 2461 | +		"tT"[wastimer],  | 
|---|
 | 2462 | +		"sS"[!!rdp->nocb_gp_sleep],  | 
|---|
 | 2463 | +		".W"[wassleep]);  | 
|---|
| 2585 | 2464 |  } | 
|---|
| 2586 | 2465 |   | 
|---|
| 2587 | 2466 |  #else /* #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| 2588 | 2467 |   | 
|---|
| 2589 |  | -static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)  | 
|---|
 | 2468 | +/* No ->nocb_lock to acquire.  */  | 
|---|
 | 2469 | +static void rcu_nocb_lock(struct rcu_data *rdp)  | 
|---|
| 2590 | 2470 |  { | 
|---|
| 2591 |  | -	WARN_ON_ONCE(1); /* Should be dead code. */  | 
|---|
| 2592 |  | -	return false;  | 
|---|
 | 2471 | +}  | 
|---|
 | 2472 | +  | 
|---|
 | 2473 | +/* No ->nocb_lock to release.  */  | 
|---|
 | 2474 | +static void rcu_nocb_unlock(struct rcu_data *rdp)  | 
|---|
 | 2475 | +{  | 
|---|
 | 2476 | +}  | 
|---|
 | 2477 | +  | 
|---|
 | 2478 | +/* No ->nocb_lock to release.  */  | 
|---|
 | 2479 | +static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,  | 
|---|
 | 2480 | +				       unsigned long flags)  | 
|---|
 | 2481 | +{  | 
|---|
 | 2482 | +	local_irq_restore(flags);  | 
|---|
 | 2483 | +}  | 
|---|
 | 2484 | +  | 
|---|
 | 2485 | +/* Lockdep check that ->cblist may be safely accessed. */  | 
|---|
 | 2486 | +static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)  | 
|---|
 | 2487 | +{  | 
|---|
 | 2488 | +	lockdep_assert_irqs_disabled();  | 
|---|
| 2593 | 2489 |  } | 
|---|
| 2594 | 2490 |   | 
|---|
| 2595 | 2491 |  static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) | 
|---|
| .. | .. | 
|---|
| 2605 | 2501 |  { | 
|---|
| 2606 | 2502 |  } | 
|---|
| 2607 | 2503 |   | 
|---|
| 2608 |  | -static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,  | 
|---|
| 2609 |  | -			    bool lazy, unsigned long flags)  | 
|---|
 | 2504 | +static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,  | 
|---|
 | 2505 | +				  unsigned long j)  | 
|---|
 | 2506 | +{  | 
|---|
 | 2507 | +	return true;  | 
|---|
 | 2508 | +}  | 
|---|
 | 2509 | +  | 
|---|
 | 2510 | +static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,  | 
|---|
 | 2511 | +				bool *was_alldone, unsigned long flags)  | 
|---|
| 2610 | 2512 |  { | 
|---|
| 2611 | 2513 |  	return false; | 
|---|
| 2612 | 2514 |  } | 
|---|
| 2613 | 2515 |   | 
|---|
| 2614 |  | -static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,  | 
|---|
| 2615 |  | -						     struct rcu_data *rdp,  | 
|---|
| 2616 |  | -						     unsigned long flags)  | 
|---|
 | 2516 | +static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,  | 
|---|
 | 2517 | +				 unsigned long flags)  | 
|---|
| 2617 | 2518 |  { | 
|---|
| 2618 |  | -	return false;  | 
|---|
 | 2519 | +	WARN_ON_ONCE(1);  /* Should be dead code! */  | 
|---|
| 2619 | 2520 |  } | 
|---|
| 2620 | 2521 |   | 
|---|
| 2621 | 2522 |  static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | 
|---|
| .. | .. | 
|---|
| 2631 | 2532 |  { | 
|---|
| 2632 | 2533 |  } | 
|---|
| 2633 | 2534 |   | 
|---|
| 2634 |  | -static void rcu_spawn_all_nocb_kthreads(int cpu)  | 
|---|
 | 2535 | +static void rcu_spawn_cpu_nocb_kthread(int cpu)  | 
|---|
| 2635 | 2536 |  { | 
|---|
| 2636 | 2537 |  } | 
|---|
| 2637 | 2538 |   | 
|---|
| .. | .. | 
|---|
| 2639 | 2540 |  { | 
|---|
| 2640 | 2541 |  } | 
|---|
| 2641 | 2542 |   | 
|---|
| 2642 |  | -static bool init_nocb_callback_list(struct rcu_data *rdp)  | 
|---|
 | 2543 | +static void show_rcu_nocb_state(struct rcu_data *rdp)  | 
|---|
| 2643 | 2544 |  { | 
|---|
| 2644 |  | -	return false;  | 
|---|
| 2645 | 2545 |  } | 
|---|
| 2646 | 2546 |   | 
|---|
| 2647 | 2547 |  #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| .. | .. | 
|---|
| 2655 | 2555 |   * This code relies on the fact that all NO_HZ_FULL CPUs are also | 
|---|
| 2656 | 2556 |   * CONFIG_RCU_NOCB_CPU CPUs. | 
|---|
| 2657 | 2557 |   */ | 
|---|
| 2658 |  | -static bool rcu_nohz_full_cpu(struct rcu_state *rsp)  | 
|---|
 | 2558 | +static bool rcu_nohz_full_cpu(void)  | 
|---|
| 2659 | 2559 |  { | 
|---|
| 2660 | 2560 |  #ifdef CONFIG_NO_HZ_FULL | 
|---|
| 2661 | 2561 |  	if (tick_nohz_full_cpu(smp_processor_id()) && | 
|---|
| 2662 |  | -	    (!rcu_gp_in_progress(rsp) ||  | 
|---|
| 2663 |  | -	     ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ)))  | 
|---|
 | 2562 | +	    (!rcu_gp_in_progress() ||  | 
|---|
 | 2563 | +	     time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))  | 
|---|
| 2664 | 2564 |  		return true; | 
|---|
| 2665 | 2565 |  #endif /* #ifdef CONFIG_NO_HZ_FULL */ | 
|---|
| 2666 | 2566 |  	return false; | 
|---|
| .. | .. | 
|---|
| 2677 | 2577 |  } | 
|---|
| 2678 | 2578 |   | 
|---|
| 2679 | 2579 |  /* Record the current task on dyntick-idle entry. */ | 
|---|
| 2680 |  | -static void rcu_dynticks_task_enter(void)  | 
|---|
 | 2580 | +static __always_inline void rcu_dynticks_task_enter(void)  | 
|---|
| 2681 | 2581 |  { | 
|---|
| 2682 | 2582 |  #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) | 
|---|
| 2683 | 2583 |  	WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); | 
|---|
| .. | .. | 
|---|
| 2685 | 2585 |  } | 
|---|
| 2686 | 2586 |   | 
|---|
| 2687 | 2587 |  /* Record no current task on dyntick-idle exit. */ | 
|---|
| 2688 |  | -static void rcu_dynticks_task_exit(void)  | 
|---|
 | 2588 | +static __always_inline void rcu_dynticks_task_exit(void)  | 
|---|
| 2689 | 2589 |  { | 
|---|
| 2690 | 2590 |  #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) | 
|---|
| 2691 | 2591 |  	WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); | 
|---|
| 2692 | 2592 |  #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ | 
|---|
| 2693 | 2593 |  } | 
|---|
 | 2594 | +  | 
|---|
 | 2595 | +/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */  | 
|---|
 | 2596 | +static __always_inline void rcu_dynticks_task_trace_enter(void)  | 
|---|
 | 2597 | +{  | 
|---|
 | 2598 | +#ifdef CONFIG_TASKS_TRACE_RCU  | 
|---|
 | 2599 | +	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))  | 
|---|
 | 2600 | +		current->trc_reader_special.b.need_mb = true;  | 
|---|
 | 2601 | +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */  | 
|---|
 | 2602 | +}  | 
|---|
 | 2603 | +  | 
|---|
 | 2604 | +/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */  | 
|---|
 | 2605 | +static __always_inline void rcu_dynticks_task_trace_exit(void)  | 
|---|
 | 2606 | +{  | 
|---|
 | 2607 | +#ifdef CONFIG_TASKS_TRACE_RCU  | 
|---|
 | 2608 | +	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))  | 
|---|
 | 2609 | +		current->trc_reader_special.b.need_mb = false;  | 
|---|
 | 2610 | +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */  | 
|---|
 | 2611 | +}  | 
|---|