hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/mm/page_alloc.c
....@@ -61,7 +61,6 @@
6161 #include <linux/hugetlb.h>
6262 #include <linux/sched/rt.h>
6363 #include <linux/sched/mm.h>
64
-#include <linux/local_lock.h>
6564 #include <linux/page_owner.h>
6665 #include <linux/page_pinner.h>
6766 #include <linux/kthread.h>
....@@ -385,13 +384,6 @@
385384 EXPORT_SYMBOL(nr_node_ids);
386385 EXPORT_SYMBOL(nr_online_nodes);
387386 #endif
388
-
389
-struct pa_lock {
390
- local_lock_t l;
391
-};
392
-static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
393
- .l = INIT_LOCAL_LOCK(l),
394
-};
395387
396388 int page_group_by_mobility_disabled __read_mostly;
397389
....@@ -1430,7 +1422,7 @@
14301422 }
14311423
14321424 /*
1433
- * Frees a number of pages which have been collected from the pcp lists.
1425
+ * Frees a number of pages from the PCP lists
14341426 * Assumes all pages on list are in same zone, and of same order.
14351427 * count is the number of pages to free.
14361428 *
....@@ -1440,56 +1432,15 @@
14401432 * And clear the zone's pages_scanned counter, to hold off the "all pages are
14411433 * pinned" detection logic.
14421434 */
1443
-static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
1444
- bool zone_retry)
1445
-{
1446
- bool isolated_pageblocks;
1447
- struct page *page, *tmp;
1448
- unsigned long flags;
1449
-
1450
- spin_lock_irqsave(&zone->lock, flags);
1451
- isolated_pageblocks = has_isolate_pageblock(zone);
1452
-
1453
- /*
1454
- * Use safe version since after __free_one_page(),
1455
- * page->lru.next will not point to original list.
1456
- */
1457
- list_for_each_entry_safe(page, tmp, head, lru) {
1458
- int mt = get_pcppage_migratetype(page);
1459
-
1460
- if (page_zone(page) != zone) {
1461
- /*
1462
- * free_unref_page_list() sorts pages by zone. If we end
1463
- * up with pages from a different NUMA nodes belonging
1464
- * to the same ZONE index then we need to redo with the
1465
- * correct ZONE pointer. Skip the page for now, redo it
1466
- * on the next iteration.
1467
- */
1468
- WARN_ON_ONCE(zone_retry == false);
1469
- if (zone_retry)
1470
- continue;
1471
- }
1472
-
1473
- /* MIGRATE_ISOLATE page should not go to pcplists */
1474
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
1475
- /* Pageblock could have been isolated meanwhile */
1476
- if (unlikely(isolated_pageblocks))
1477
- mt = get_pageblock_migratetype(page);
1478
-
1479
- list_del(&page->lru);
1480
- __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
1481
- trace_mm_page_pcpu_drain(page, 0, mt);
1482
- }
1483
- spin_unlock_irqrestore(&zone->lock, flags);
1484
-}
1485
-
1486
-static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
1487
- struct list_head *dst)
1435
+static void free_pcppages_bulk(struct zone *zone, int count,
1436
+ struct per_cpu_pages *pcp)
14881437 {
14891438 int migratetype = 0;
14901439 int batch_free = 0;
14911440 int prefetch_nr = 0;
1492
- struct page *page;
1441
+ bool isolated_pageblocks;
1442
+ struct page *page, *tmp;
1443
+ LIST_HEAD(head);
14931444
14941445 /*
14951446 * Ensure proper count is passed which otherwise would stuck in the
....@@ -1526,7 +1477,7 @@
15261477 if (bulkfree_pcp_prepare(page))
15271478 continue;
15281479
1529
- list_add_tail(&page->lru, dst);
1480
+ list_add_tail(&page->lru, &head);
15301481
15311482 /*
15321483 * We are going to put the page back to the global
....@@ -1541,6 +1492,26 @@
15411492 prefetch_buddy(page);
15421493 } while (--count && --batch_free && !list_empty(list));
15431494 }
1495
+
1496
+ spin_lock(&zone->lock);
1497
+ isolated_pageblocks = has_isolate_pageblock(zone);
1498
+
1499
+ /*
1500
+ * Use safe version since after __free_one_page(),
1501
+ * page->lru.next will not point to original list.
1502
+ */
1503
+ list_for_each_entry_safe(page, tmp, &head, lru) {
1504
+ int mt = get_pcppage_migratetype(page);
1505
+ /* MIGRATE_ISOLATE page should not go to pcplists */
1506
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
1507
+ /* Pageblock could have been isolated meanwhile */
1508
+ if (unlikely(isolated_pageblocks))
1509
+ mt = get_pageblock_migratetype(page);
1510
+
1511
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
1512
+ trace_mm_page_pcpu_drain(page, 0, mt);
1513
+ }
1514
+ spin_unlock(&zone->lock);
15441515 }
15451516
15461517 static void free_one_page(struct zone *zone,
....@@ -1643,16 +1614,21 @@
16431614 unsigned long flags;
16441615 int migratetype;
16451616 unsigned long pfn = page_to_pfn(page);
1617
+ bool skip_free_unref_page = false;
16461618
16471619 if (!free_pages_prepare(page, order, true, fpi_flags))
16481620 return;
16491621
16501622 migratetype = get_pfnblock_migratetype(page, pfn);
1651
- local_lock_irqsave(&pa_lock.l, flags);
1623
+ trace_android_vh_free_unref_page_bypass(page, order, migratetype, &skip_free_unref_page);
1624
+ if (skip_free_unref_page)
1625
+ return;
1626
+
1627
+ local_irq_save(flags);
16521628 __count_vm_events(PGFREE, 1 << order);
16531629 free_one_page(page_zone(page), page, pfn, order, migratetype,
16541630 fpi_flags);
1655
- local_unlock_irqrestore(&pa_lock.l, flags);
1631
+ local_irq_restore(flags);
16561632 }
16571633
16581634 void __free_pages_core(struct page *page, unsigned int order)
....@@ -2826,6 +2802,7 @@
28262802 struct page *page;
28272803 int order;
28282804 bool ret;
2805
+ bool skip_unreserve_highatomic = false;
28292806
28302807 for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
28312808 ac->nodemask) {
....@@ -2835,6 +2812,11 @@
28352812 */
28362813 if (!force && zone->nr_reserved_highatomic <=
28372814 pageblock_nr_pages)
2815
+ continue;
2816
+
2817
+ trace_android_vh_unreserve_highatomic_bypass(force, zone,
2818
+ &skip_unreserve_highatomic);
2819
+ if (skip_unreserve_highatomic)
28382820 continue;
28392821
28402822 spin_lock_irqsave(&zone->lock, flags);
....@@ -3082,6 +3064,10 @@
30823064 struct list_head *list = &pcp->lists[migratetype];
30833065
30843066 if (list_empty(list)) {
3067
+ trace_android_vh_rmqueue_bulk_bypass(order, pcp, migratetype, list);
3068
+ if (!list_empty(list))
3069
+ return list;
3070
+
30853071 pcp->count += rmqueue_bulk(zone, order,
30863072 pcp->batch, list,
30873073 migratetype, alloc_flags);
....@@ -3105,18 +3091,13 @@
31053091 {
31063092 unsigned long flags;
31073093 int to_drain, batch;
3108
- LIST_HEAD(dst);
31093094
3110
- local_lock_irqsave(&pa_lock.l, flags);
3095
+ local_irq_save(flags);
31113096 batch = READ_ONCE(pcp->batch);
31123097 to_drain = min(pcp->count, batch);
31133098 if (to_drain > 0)
3114
- isolate_pcp_pages(to_drain, pcp, &dst);
3115
-
3116
- local_unlock_irqrestore(&pa_lock.l, flags);
3117
-
3118
- if (to_drain > 0)
3119
- free_pcppages_bulk(zone, &dst, false);
3099
+ free_pcppages_bulk(zone, to_drain, pcp);
3100
+ local_irq_restore(flags);
31203101 }
31213102 #endif
31223103
....@@ -3132,21 +3113,14 @@
31323113 unsigned long flags;
31333114 struct per_cpu_pageset *pset;
31343115 struct per_cpu_pages *pcp;
3135
- LIST_HEAD(dst);
3136
- int count;
31373116
3138
- local_lock_irqsave(&pa_lock.l, flags);
3117
+ local_irq_save(flags);
31393118 pset = per_cpu_ptr(zone->pageset, cpu);
31403119
31413120 pcp = &pset->pcp;
3142
- count = pcp->count;
3143
- if (count)
3144
- isolate_pcp_pages(count, pcp, &dst);
3145
-
3146
- local_unlock_irqrestore(&pa_lock.l, flags);
3147
-
3148
- if (count)
3149
- free_pcppages_bulk(zone, &dst, false);
3121
+ if (pcp->count)
3122
+ free_pcppages_bulk(zone, pcp->count, pcp);
3123
+ local_irq_restore(flags);
31503124 }
31513125
31523126 /*
....@@ -3194,9 +3168,9 @@
31943168 * cpu which is allright but we also have to make sure to not move to
31953169 * a different one.
31963170 */
3197
- migrate_disable();
3171
+ preempt_disable();
31983172 drain_local_pages(drain->zone);
3199
- migrate_enable();
3173
+ preempt_enable();
32003174 }
32013175
32023176 /*
....@@ -3345,8 +3319,7 @@
33453319 return true;
33463320 }
33473321
3348
-static void free_unref_page_commit(struct page *page, unsigned long pfn,
3349
- struct list_head *dst)
3322
+static void free_unref_page_commit(struct page *page, unsigned long pfn)
33503323 {
33513324 struct zone *zone = page_zone(page);
33523325 struct per_cpu_pages *pcp;
....@@ -3380,8 +3353,7 @@
33803353 pcp->count++;
33813354 if (pcp->count >= pcp->high) {
33823355 unsigned long batch = READ_ONCE(pcp->batch);
3383
-
3384
- isolate_pcp_pages(batch, pcp, dst);
3356
+ free_pcppages_bulk(zone, batch, pcp);
33853357 }
33863358 }
33873359
....@@ -3392,17 +3364,20 @@
33923364 {
33933365 unsigned long flags;
33943366 unsigned long pfn = page_to_pfn(page);
3395
- struct zone *zone = page_zone(page);
3396
- LIST_HEAD(dst);
3367
+ int migratetype;
3368
+ bool skip_free_unref_page = false;
33973369
33983370 if (!free_unref_page_prepare(page, pfn))
33993371 return;
34003372
3401
- local_lock_irqsave(&pa_lock.l, flags);
3402
- free_unref_page_commit(page, pfn, &dst);
3403
- local_unlock_irqrestore(&pa_lock.l, flags);
3404
- if (!list_empty(&dst))
3405
- free_pcppages_bulk(zone, &dst, false);
3373
+ migratetype = get_pfnblock_migratetype(page, pfn);
3374
+ trace_android_vh_free_unref_page_bypass(page, 0, migratetype, &skip_free_unref_page);
3375
+ if (skip_free_unref_page)
3376
+ return;
3377
+
3378
+ local_irq_save(flags);
3379
+ free_unref_page_commit(page, pfn);
3380
+ local_irq_restore(flags);
34063381 }
34073382
34083383 /*
....@@ -3413,11 +3388,6 @@
34133388 struct page *page, *next;
34143389 unsigned long flags, pfn;
34153390 int batch_count = 0;
3416
- struct list_head dsts[__MAX_NR_ZONES];
3417
- int i;
3418
-
3419
- for (i = 0; i < __MAX_NR_ZONES; i++)
3420
- INIT_LIST_HEAD(&dsts[i]);
34213391
34223392 /* Prepare pages for freeing */
34233393 list_for_each_entry_safe(page, next, list, lru) {
....@@ -3427,42 +3397,25 @@
34273397 set_page_private(page, pfn);
34283398 }
34293399
3430
- local_lock_irqsave(&pa_lock.l, flags);
3400
+ local_irq_save(flags);
34313401 list_for_each_entry_safe(page, next, list, lru) {
34323402 unsigned long pfn = page_private(page);
3433
- enum zone_type type;
34343403
34353404 set_page_private(page, 0);
34363405 trace_mm_page_free_batched(page);
3437
- type = page_zonenum(page);
3438
- free_unref_page_commit(page, pfn, &dsts[type]);
3406
+ free_unref_page_commit(page, pfn);
34393407
34403408 /*
34413409 * Guard against excessive IRQ disabled times when we get
34423410 * a large list of pages to free.
34433411 */
34443412 if (++batch_count == SWAP_CLUSTER_MAX) {
3445
- local_unlock_irqrestore(&pa_lock.l, flags);
3413
+ local_irq_restore(flags);
34463414 batch_count = 0;
3447
- local_lock_irqsave(&pa_lock.l, flags);
3415
+ local_irq_save(flags);
34483416 }
34493417 }
3450
- local_unlock_irqrestore(&pa_lock.l, flags);
3451
-
3452
- for (i = 0; i < __MAX_NR_ZONES; ) {
3453
- struct page *page;
3454
- struct zone *zone;
3455
-
3456
- if (list_empty(&dsts[i])) {
3457
- i++;
3458
- continue;
3459
- }
3460
-
3461
- page = list_first_entry(&dsts[i], struct page, lru);
3462
- zone = page_zone(page);
3463
-
3464
- free_pcppages_bulk(zone, &dsts[i], true);
3465
- }
3418
+ local_irq_restore(flags);
34663419 }
34673420
34683421 /*
....@@ -3629,7 +3582,7 @@
36293582 struct page *page;
36303583 unsigned long flags;
36313584
3632
- local_lock_irqsave(&pa_lock.l, flags);
3585
+ local_irq_save(flags);
36333586 pcp = &this_cpu_ptr(zone->pageset)->pcp;
36343587 page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp,
36353588 gfp_flags);
....@@ -3637,7 +3590,7 @@
36373590 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
36383591 zone_statistics(preferred_zone, zone);
36393592 }
3640
- local_unlock_irqrestore(&pa_lock.l, flags);
3593
+ local_irq_restore(flags);
36413594 return page;
36423595 }
36433596
....@@ -3664,8 +3617,7 @@
36643617 * allocate greater than order-1 page units with __GFP_NOFAIL.
36653618 */
36663619 WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
3667
- local_lock_irqsave(&pa_lock.l, flags);
3668
- spin_lock(&zone->lock);
3620
+ spin_lock_irqsave(&zone->lock, flags);
36693621
36703622 do {
36713623 page = NULL;
....@@ -3700,7 +3652,7 @@
37003652 zone_statistics(preferred_zone, zone);
37013653 trace_android_vh_rmqueue(preferred_zone, zone, order,
37023654 gfp_flags, alloc_flags, migratetype);
3703
- local_unlock_irqrestore(&pa_lock.l, flags);
3655
+ local_irq_restore(flags);
37043656
37053657 out:
37063658 /* Separate test+clear to avoid unnecessary atomics */
....@@ -3713,7 +3665,7 @@
37133665 return page;
37143666
37153667 failed:
3716
- local_unlock_irqrestore(&pa_lock.l, flags);
3668
+ local_irq_restore(flags);
37173669 return NULL;
37183670 }
37193671
....@@ -4898,6 +4850,7 @@
48984850 unsigned int zonelist_iter_cookie;
48994851 int reserve_flags;
49004852 unsigned long vh_record;
4853
+ bool should_alloc_retry = false;
49014854
49024855 trace_android_vh_alloc_pages_slowpath_begin(gfp_mask, order, &vh_record);
49034856 /*
....@@ -5037,6 +4990,12 @@
50374990
50384991 if (page)
50394992 goto got_pg;
4993
+
4994
+ trace_android_vh_should_alloc_pages_retry(gfp_mask, order,
4995
+ &alloc_flags, ac->migratetype, ac->preferred_zoneref->zone,
4996
+ &page, &should_alloc_retry);
4997
+ if (should_alloc_retry)
4998
+ goto retry;
50404999
50415000 /* Try direct reclaim and then allocating */
50425001 page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
....@@ -5310,10 +5269,13 @@
53105269
53115270 void __free_pages(struct page *page, unsigned int order)
53125271 {
5272
+ /* get PageHead before we drop reference */
5273
+ int head = PageHead(page);
5274
+
53135275 trace_android_vh_free_pages(page, order);
53145276 if (put_page_testzero(page))
53155277 free_the_page(page, order);
5316
- else if (!PageHead(page))
5278
+ else if (!head)
53175279 while (order-- > 0)
53185280 free_the_page(page + (1 << order), order);
53195281 }
....@@ -6228,7 +6190,21 @@
62286190 int nid;
62296191 int __maybe_unused cpu;
62306192 pg_data_t *self = data;
6193
+ unsigned long flags;
62316194
6195
+ /*
6196
+ * Explicitly disable this CPU's interrupts before taking seqlock
6197
+ * to prevent any IRQ handler from calling into the page allocator
6198
+ * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
6199
+ */
6200
+ local_irq_save(flags);
6201
+ /*
6202
+ * Explicitly disable this CPU's synchronous printk() before taking
6203
+ * seqlock to prevent any printk() from trying to hold port->lock, for
6204
+ * tty_insert_flip_string_and_push_buffer() on other CPU might be
6205
+ * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
6206
+ */
6207
+ printk_deferred_enter();
62326208 write_seqlock(&zonelist_update_seq);
62336209
62346210 #ifdef CONFIG_NUMA
....@@ -6263,6 +6239,8 @@
62636239 }
62646240
62656241 write_sequnlock(&zonelist_update_seq);
6242
+ printk_deferred_exit();
6243
+ local_irq_restore(flags);
62666244 }
62676245
62686246 static noinline void __init
....@@ -6682,6 +6660,7 @@
66826660 static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
66836661 unsigned long batch)
66846662 {
6663
+ trace_android_vh_pageset_update(&high, &batch);
66856664 /* start with a fail safe value for batch */
66866665 pcp->batch = 1;
66876666 smp_wmb();
....@@ -9141,7 +9120,7 @@
91419120 struct per_cpu_pageset *pset;
91429121
91439122 /* avoid races with drain_pages() */
9144
- local_lock_irqsave(&pa_lock.l, flags);
9123
+ local_irq_save(flags);
91459124 if (zone->pageset != &boot_pageset) {
91469125 for_each_online_cpu(cpu) {
91479126 pset = per_cpu_ptr(zone->pageset, cpu);
....@@ -9150,7 +9129,7 @@
91509129 free_percpu(zone->pageset);
91519130 zone->pageset = &boot_pageset;
91529131 }
9153
- local_unlock_irqrestore(&pa_lock.l, flags);
9132
+ local_irq_restore(flags);
91549133 }
91559134
91569135 #ifdef CONFIG_MEMORY_HOTREMOVE