hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/kernel/rcu/tree.c
....@@ -964,6 +964,7 @@
964964 }
965965 raw_spin_unlock_rcu_node(rdp->mynode);
966966 }
967
+NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
967968 #endif /* CONFIG_NO_HZ_FULL */
968969
969970 /**
....@@ -1157,7 +1158,7 @@
11571158 preempt_disable_notrace();
11581159 rdp = this_cpu_ptr(&rcu_data);
11591160 rnp = rdp->mynode;
1160
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
1161
+ if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
11611162 ret = true;
11621163 preempt_enable_notrace();
11631164 return ret;
....@@ -1724,6 +1725,7 @@
17241725 */
17251726 static bool rcu_gp_init(void)
17261727 {
1728
+ unsigned long firstseq;
17271729 unsigned long flags;
17281730 unsigned long oldmask;
17291731 unsigned long mask;
....@@ -1767,6 +1769,12 @@
17671769 */
17681770 rcu_state.gp_state = RCU_GP_ONOFF;
17691771 rcu_for_each_leaf_node(rnp) {
1772
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
1773
+ firstseq = READ_ONCE(rnp->ofl_seq);
1774
+ if (firstseq & 0x1)
1775
+ while (firstseq == READ_ONCE(rnp->ofl_seq))
1776
+ schedule_timeout_idle(1); // Can't wake unless RCU is watching.
1777
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
17701778 raw_spin_lock(&rcu_state.ofl_lock);
17711779 raw_spin_lock_irq_rcu_node(rnp);
17721780 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
....@@ -2650,7 +2658,7 @@
26502658 struct rcu_node *rnp_old = NULL;
26512659
26522660 /* Funnel through hierarchy to reduce memory contention. */
2653
- rnp = __this_cpu_read(rcu_data.mynode);
2661
+ rnp = raw_cpu_read(rcu_data.mynode);
26542662 for (; rnp != NULL; rnp = rnp->parent) {
26552663 ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
26562664 !raw_spin_trylock(&rnp->fqslock);
....@@ -3273,6 +3281,30 @@
32733281 }
32743282 }
32753283
3284
+static bool
3285
+need_offload_krc(struct kfree_rcu_cpu *krcp)
3286
+{
3287
+ int i;
3288
+
3289
+ for (i = 0; i < FREE_N_CHANNELS; i++)
3290
+ if (krcp->bkvhead[i])
3291
+ return true;
3292
+
3293
+ return !!krcp->head;
3294
+}
3295
+
3296
+static bool
3297
+need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp)
3298
+{
3299
+ int i;
3300
+
3301
+ for (i = 0; i < FREE_N_CHANNELS; i++)
3302
+ if (krwp->bkvhead_free[i])
3303
+ return true;
3304
+
3305
+ return !!krwp->head_free;
3306
+}
3307
+
32763308 /*
32773309 * Schedule the kfree batch RCU work to run in workqueue context after a GP.
32783310 *
....@@ -3290,16 +3322,13 @@
32903322 for (i = 0; i < KFREE_N_BATCHES; i++) {
32913323 krwp = &(krcp->krw_arr[i]);
32923324
3293
- /*
3294
- * Try to detach bkvhead or head and attach it over any
3295
- * available corresponding free channel. It can be that
3296
- * a previous RCU batch is in progress, it means that
3297
- * immediately to queue another one is not possible so
3298
- * return false to tell caller to retry.
3299
- */
3300
- if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||
3301
- (krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||
3302
- (krcp->head && !krwp->head_free)) {
3325
+ // Try to detach bulk_head or head and attach it, only when
3326
+ // all channels are free. Any channel is not free means at krwp
3327
+ // there is on-going rcu work to handle krwp's free business.
3328
+ if (need_wait_for_krwp_work(krwp))
3329
+ continue;
3330
+
3331
+ if (need_offload_krc(krcp)) {
33033332 // Channel 1 corresponds to SLAB ptrs.
33043333 // Channel 2 corresponds to vmalloc ptrs.
33053334 for (j = 0; j < FREE_N_CHANNELS; j++) {
....@@ -3326,11 +3355,11 @@
33263355 */
33273356 queue_rcu_work(system_wq, &krwp->rcu_work);
33283357 }
3329
-
3330
- // Repeat if any "free" corresponding channel is still busy.
3331
- if (krcp->bkvhead[0] || krcp->bkvhead[1] || krcp->head)
3332
- repeat = true;
33333358 }
3359
+
3360
+ // Repeat if any "free" corresponding channel is still busy.
3361
+ if (need_offload_krc(krcp))
3362
+ repeat = true;
33343363
33353364 return !repeat;
33363365 }
....@@ -4107,6 +4136,9 @@
41074136
41084137 rnp = rdp->mynode;
41094138 mask = rdp->grpmask;
4139
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4140
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
4141
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
41104142 raw_spin_lock_irqsave_rcu_node(rnp, flags);
41114143 WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
41124144 newcpu = !(rnp->expmaskinitnext & mask);
....@@ -4124,6 +4156,9 @@
41244156 } else {
41254157 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
41264158 }
4159
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
4160
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4161
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
41274162 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
41284163 }
41294164
....@@ -4150,6 +4185,9 @@
41504185
41514186 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
41524187 mask = rdp->grpmask;
4188
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4189
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
4190
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
41534191 raw_spin_lock(&rcu_state.ofl_lock);
41544192 raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
41554193 rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
....@@ -4162,6 +4200,9 @@
41624200 WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
41634201 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
41644202 raw_spin_unlock(&rcu_state.ofl_lock);
4203
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
4204
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4205
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
41654206
41664207 rdp->cpu_started = false;
41674208 }