hc
2023-11-06 e3e12f52b214121840b44c91de5b3e5af5d3eb84
kernel/mm/slub.c
....@@ -1199,7 +1199,7 @@
11991199 unsigned long uninitialized_var(flags);
12001200 int ret = 0;
12011201
1202
- spin_lock_irqsave(&n->list_lock, flags);
1202
+ raw_spin_lock_irqsave(&n->list_lock, flags);
12031203 slab_lock(page);
12041204
12051205 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
....@@ -1234,7 +1234,7 @@
12341234 bulk_cnt, cnt);
12351235
12361236 slab_unlock(page);
1237
- spin_unlock_irqrestore(&n->list_lock, flags);
1237
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
12381238 if (!ret)
12391239 slab_fix(s, "Object at 0x%p not freed", object);
12401240 return ret;
....@@ -1372,6 +1372,12 @@
13721372 return false;
13731373 }
13741374 #endif /* CONFIG_SLUB_DEBUG */
1375
+
1376
+struct slub_free_list {
1377
+ raw_spinlock_t lock;
1378
+ struct list_head list;
1379
+};
1380
+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
13751381
13761382 /*
13771383 * Hooks for other subsystems that check memory allocations. In a typical
....@@ -1619,10 +1625,17 @@
16191625 void *start, *p, *next;
16201626 int idx, order;
16211627 bool shuffle;
1628
+ bool enableirqs = false;
16221629
16231630 flags &= gfp_allowed_mask;
16241631
16251632 if (gfpflags_allow_blocking(flags))
1633
+ enableirqs = true;
1634
+#ifdef CONFIG_PREEMPT_RT_FULL
1635
+ if (system_state > SYSTEM_BOOTING)
1636
+ enableirqs = true;
1637
+#endif
1638
+ if (enableirqs)
16261639 local_irq_enable();
16271640
16281641 flags |= s->allocflags;
....@@ -1682,7 +1695,7 @@
16821695 page->frozen = 1;
16831696
16841697 out:
1685
- if (gfpflags_allow_blocking(flags))
1698
+ if (enableirqs)
16861699 local_irq_disable();
16871700 if (!page)
16881701 return NULL;
....@@ -1740,6 +1753,16 @@
17401753 __free_pages(page, order);
17411754 }
17421755
1756
+static void free_delayed(struct list_head *h)
1757
+{
1758
+ while (!list_empty(h)) {
1759
+ struct page *page = list_first_entry(h, struct page, lru);
1760
+
1761
+ list_del(&page->lru);
1762
+ __free_slab(page->slab_cache, page);
1763
+ }
1764
+}
1765
+
17431766 static void rcu_free_slab(struct rcu_head *h)
17441767 {
17451768 struct page *page = container_of(h, struct page, rcu_head);
....@@ -1751,6 +1774,12 @@
17511774 {
17521775 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
17531776 call_rcu(&page->rcu_head, rcu_free_slab);
1777
+ } else if (irqs_disabled()) {
1778
+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
1779
+
1780
+ raw_spin_lock(&f->lock);
1781
+ list_add(&page->lru, &f->list);
1782
+ raw_spin_unlock(&f->lock);
17541783 } else
17551784 __free_slab(s, page);
17561785 }
....@@ -1858,7 +1887,7 @@
18581887 if (!n || !n->nr_partial)
18591888 return NULL;
18601889
1861
- spin_lock(&n->list_lock);
1890
+ raw_spin_lock(&n->list_lock);
18621891 list_for_each_entry_safe(page, page2, &n->partial, lru) {
18631892 void *t;
18641893
....@@ -1883,7 +1912,7 @@
18831912 break;
18841913
18851914 }
1886
- spin_unlock(&n->list_lock);
1915
+ raw_spin_unlock(&n->list_lock);
18871916 return object;
18881917 }
18891918
....@@ -2135,7 +2164,7 @@
21352164 * that acquire_slab() will see a slab page that
21362165 * is frozen
21372166 */
2138
- spin_lock(&n->list_lock);
2167
+ raw_spin_lock(&n->list_lock);
21392168 }
21402169 } else {
21412170 m = M_FULL;
....@@ -2146,7 +2175,7 @@
21462175 * slabs from diagnostic functions will not see
21472176 * any frozen slabs.
21482177 */
2149
- spin_lock(&n->list_lock);
2178
+ raw_spin_lock(&n->list_lock);
21502179 }
21512180 }
21522181
....@@ -2181,7 +2210,7 @@
21812210 goto redo;
21822211
21832212 if (lock)
2184
- spin_unlock(&n->list_lock);
2213
+ raw_spin_unlock(&n->list_lock);
21852214
21862215 if (m == M_FREE) {
21872216 stat(s, DEACTIVATE_EMPTY);
....@@ -2216,10 +2245,10 @@
22162245 n2 = get_node(s, page_to_nid(page));
22172246 if (n != n2) {
22182247 if (n)
2219
- spin_unlock(&n->list_lock);
2248
+ raw_spin_unlock(&n->list_lock);
22202249
22212250 n = n2;
2222
- spin_lock(&n->list_lock);
2251
+ raw_spin_lock(&n->list_lock);
22232252 }
22242253
22252254 do {
....@@ -2248,7 +2277,7 @@
22482277 }
22492278
22502279 if (n)
2251
- spin_unlock(&n->list_lock);
2280
+ raw_spin_unlock(&n->list_lock);
22522281
22532282 while (discard_page) {
22542283 page = discard_page;
....@@ -2285,14 +2314,21 @@
22852314 pobjects = oldpage->pobjects;
22862315 pages = oldpage->pages;
22872316 if (drain && pobjects > s->cpu_partial) {
2317
+ struct slub_free_list *f;
22882318 unsigned long flags;
2319
+ LIST_HEAD(tofree);
22892320 /*
22902321 * partial array is full. Move the existing
22912322 * set to the per node partial list.
22922323 */
22932324 local_irq_save(flags);
22942325 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2326
+ f = this_cpu_ptr(&slub_free_list);
2327
+ raw_spin_lock(&f->lock);
2328
+ list_splice_init(&f->list, &tofree);
2329
+ raw_spin_unlock(&f->lock);
22952330 local_irq_restore(flags);
2331
+ free_delayed(&tofree);
22962332 oldpage = NULL;
22972333 pobjects = 0;
22982334 pages = 0;
....@@ -2362,7 +2398,19 @@
23622398
23632399 static void flush_all(struct kmem_cache *s)
23642400 {
2401
+ LIST_HEAD(tofree);
2402
+ int cpu;
2403
+
23652404 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2405
+ for_each_online_cpu(cpu) {
2406
+ struct slub_free_list *f;
2407
+
2408
+ f = &per_cpu(slub_free_list, cpu);
2409
+ raw_spin_lock_irq(&f->lock);
2410
+ list_splice_init(&f->list, &tofree);
2411
+ raw_spin_unlock_irq(&f->lock);
2412
+ free_delayed(&tofree);
2413
+ }
23662414 }
23672415
23682416 /*
....@@ -2417,10 +2465,10 @@
24172465 unsigned long x = 0;
24182466 struct page *page;
24192467
2420
- spin_lock_irqsave(&n->list_lock, flags);
2468
+ raw_spin_lock_irqsave(&n->list_lock, flags);
24212469 list_for_each_entry(page, &n->partial, lru)
24222470 x += get_count(page);
2423
- spin_unlock_irqrestore(&n->list_lock, flags);
2471
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
24242472 return x;
24252473 }
24262474 #endif /* CONFIG_SLUB_DEBUG || CONFIG_SLUB_SYSFS */
....@@ -2560,8 +2608,10 @@
25602608 * already disabled (which is the case for bulk allocation).
25612609 */
25622610 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2563
- unsigned long addr, struct kmem_cache_cpu *c)
2611
+ unsigned long addr, struct kmem_cache_cpu *c,
2612
+ struct list_head *to_free)
25642613 {
2614
+ struct slub_free_list *f;
25652615 void *freelist;
25662616 struct page *page;
25672617
....@@ -2627,6 +2677,13 @@
26272677 VM_BUG_ON(!c->page->frozen);
26282678 c->freelist = get_freepointer(s, freelist);
26292679 c->tid = next_tid(c->tid);
2680
+
2681
+out:
2682
+ f = this_cpu_ptr(&slub_free_list);
2683
+ raw_spin_lock(&f->lock);
2684
+ list_splice_init(&f->list, to_free);
2685
+ raw_spin_unlock(&f->lock);
2686
+
26302687 return freelist;
26312688
26322689 new_slab:
....@@ -2642,7 +2699,7 @@
26422699
26432700 if (unlikely(!freelist)) {
26442701 slab_out_of_memory(s, gfpflags, node);
2645
- return NULL;
2702
+ goto out;
26462703 }
26472704
26482705 page = c->page;
....@@ -2655,7 +2712,7 @@
26552712 goto new_slab; /* Slab failed checks. Next slab needed */
26562713
26572714 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2658
- return freelist;
2715
+ goto out;
26592716 }
26602717
26612718 /*
....@@ -2667,6 +2724,7 @@
26672724 {
26682725 void *p;
26692726 unsigned long flags;
2727
+ LIST_HEAD(tofree);
26702728
26712729 local_irq_save(flags);
26722730 #ifdef CONFIG_PREEMPT
....@@ -2678,8 +2736,9 @@
26782736 c = this_cpu_ptr(s->cpu_slab);
26792737 #endif
26802738
2681
- p = ___slab_alloc(s, gfpflags, node, addr, c);
2739
+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
26822740 local_irq_restore(flags);
2741
+ free_delayed(&tofree);
26832742 return p;
26842743 }
26852744
....@@ -2878,7 +2937,7 @@
28782937
28792938 do {
28802939 if (unlikely(n)) {
2881
- spin_unlock_irqrestore(&n->list_lock, flags);
2940
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
28822941 n = NULL;
28832942 }
28842943 prior = page->freelist;
....@@ -2910,7 +2969,7 @@
29102969 * Otherwise the list_lock will synchronize with
29112970 * other processors updating the list of slabs.
29122971 */
2913
- spin_lock_irqsave(&n->list_lock, flags);
2972
+ raw_spin_lock_irqsave(&n->list_lock, flags);
29142973
29152974 }
29162975 }
....@@ -2952,7 +3011,7 @@
29523011 add_partial(n, page, DEACTIVATE_TO_TAIL);
29533012 stat(s, FREE_ADD_PARTIAL);
29543013 }
2955
- spin_unlock_irqrestore(&n->list_lock, flags);
3014
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
29563015 return;
29573016
29583017 slab_empty:
....@@ -2967,7 +3026,7 @@
29673026 remove_full(s, n, page);
29683027 }
29693028
2970
- spin_unlock_irqrestore(&n->list_lock, flags);
3029
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
29713030 stat(s, FREE_SLAB);
29723031 discard_slab(s, page);
29733032 }
....@@ -3172,6 +3231,7 @@
31723231 void **p)
31733232 {
31743233 struct kmem_cache_cpu *c;
3234
+ LIST_HEAD(to_free);
31753235 int i;
31763236
31773237 /* memcg and kmem_cache debug support */
....@@ -3204,7 +3264,7 @@
32043264 * of re-populating per CPU c->freelist
32053265 */
32063266 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3207
- _RET_IP_, c);
3267
+ _RET_IP_, c, &to_free);
32083268 if (unlikely(!p[i]))
32093269 goto error;
32103270
....@@ -3219,6 +3279,7 @@
32193279 }
32203280 c->tid = next_tid(c->tid);
32213281 local_irq_enable();
3282
+ free_delayed(&to_free);
32223283
32233284 /* Clear memory outside IRQ disabled fastpath loop */
32243285 if (unlikely(slab_want_init_on_alloc(flags, s))) {
....@@ -3233,6 +3294,7 @@
32333294 return i;
32343295 error:
32353296 local_irq_enable();
3297
+ free_delayed(&to_free);
32363298 slab_post_alloc_hook(s, flags, i, p);
32373299 __kmem_cache_free_bulk(s, i, p);
32383300 return 0;
....@@ -3368,7 +3430,7 @@
33683430 init_kmem_cache_node(struct kmem_cache_node *n)
33693431 {
33703432 n->nr_partial = 0;
3371
- spin_lock_init(&n->list_lock);
3433
+ raw_spin_lock_init(&n->list_lock);
33723434 INIT_LIST_HEAD(&n->partial);
33733435 #ifdef CONFIG_SLUB_DEBUG
33743436 atomic_long_set(&n->nr_slabs, 0);
....@@ -3721,6 +3783,11 @@
37213783 const char *text)
37223784 {
37233785 #ifdef CONFIG_SLUB_DEBUG
3786
+#ifdef CONFIG_PREEMPT_RT_BASE
3787
+ /* XXX move out of irq-off section */
3788
+ slab_err(s, page, text, s->name);
3789
+#else
3790
+
37243791 void *addr = page_address(page);
37253792 void *p;
37263793 unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects),
....@@ -3742,6 +3809,7 @@
37423809 slab_unlock(page);
37433810 kfree(map);
37443811 #endif
3812
+#endif
37453813 }
37463814
37473815 /*
....@@ -3755,7 +3823,7 @@
37553823 struct page *page, *h;
37563824
37573825 BUG_ON(irqs_disabled());
3758
- spin_lock_irq(&n->list_lock);
3826
+ raw_spin_lock_irq(&n->list_lock);
37593827 list_for_each_entry_safe(page, h, &n->partial, lru) {
37603828 if (!page->inuse) {
37613829 remove_partial(n, page);
....@@ -3765,7 +3833,7 @@
37653833 "Objects remaining in %s on __kmem_cache_shutdown()");
37663834 }
37673835 }
3768
- spin_unlock_irq(&n->list_lock);
3836
+ raw_spin_unlock_irq(&n->list_lock);
37693837
37703838 list_for_each_entry_safe(page, h, &discard, lru)
37713839 discard_slab(s, page);
....@@ -4039,7 +4107,7 @@
40394107 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
40404108 INIT_LIST_HEAD(promote + i);
40414109
4042
- spin_lock_irqsave(&n->list_lock, flags);
4110
+ raw_spin_lock_irqsave(&n->list_lock, flags);
40434111
40444112 /*
40454113 * Build lists of slabs to discard or promote.
....@@ -4070,7 +4138,7 @@
40704138 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
40714139 list_splice(promote + i, &n->partial);
40724140
4073
- spin_unlock_irqrestore(&n->list_lock, flags);
4141
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
40744142
40754143 /* Release empty slabs */
40764144 list_for_each_entry_safe(page, t, &discard, lru)
....@@ -4283,6 +4351,12 @@
42834351 {
42844352 static __initdata struct kmem_cache boot_kmem_cache,
42854353 boot_kmem_cache_node;
4354
+ int cpu;
4355
+
4356
+ for_each_possible_cpu(cpu) {
4357
+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
4358
+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
4359
+ }
42864360
42874361 if (debug_guardpage_minorder())
42884362 slub_max_order = 0;
....@@ -4486,7 +4560,7 @@
44864560 struct page *page;
44874561 unsigned long flags;
44884562
4489
- spin_lock_irqsave(&n->list_lock, flags);
4563
+ raw_spin_lock_irqsave(&n->list_lock, flags);
44904564
44914565 list_for_each_entry(page, &n->partial, lru) {
44924566 validate_slab_slab(s, page, map);
....@@ -4508,7 +4582,7 @@
45084582 s->name, count, atomic_long_read(&n->nr_slabs));
45094583
45104584 out:
4511
- spin_unlock_irqrestore(&n->list_lock, flags);
4585
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
45124586 return count;
45134587 }
45144588
....@@ -4564,6 +4638,9 @@
45644638 {
45654639 struct location *l;
45664640 int order;
4641
+
4642
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC)
4643
+ return 0;
45674644
45684645 order = get_order(sizeof(struct location) * max);
45694646
....@@ -4698,12 +4775,12 @@
46984775 if (!atomic_long_read(&n->nr_slabs))
46994776 continue;
47004777
4701
- spin_lock_irqsave(&n->list_lock, flags);
4778
+ raw_spin_lock_irqsave(&n->list_lock, flags);
47024779 list_for_each_entry(page, &n->partial, lru)
47034780 process_slab(&t, s, page, alloc, map);
47044781 list_for_each_entry(page, &n->full, lru)
47054782 process_slab(&t, s, page, alloc, map);
4706
- spin_unlock_irqrestore(&n->list_lock, flags);
4783
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
47074784 }
47084785
47094786 for (i = 0; i < t.count; i++) {