From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/page_alloc.c | 231 ++++++++++++++++++++++++++------------------------------- 1 files changed, 105 insertions(+), 126 deletions(-) diff --git a/kernel/mm/page_alloc.c b/kernel/mm/page_alloc.c index 4a15674..3bcee27 100644 --- a/kernel/mm/page_alloc.c +++ b/kernel/mm/page_alloc.c @@ -61,7 +61,6 @@ #include <linux/hugetlb.h> #include <linux/sched/rt.h> #include <linux/sched/mm.h> -#include <linux/local_lock.h> #include <linux/page_owner.h> #include <linux/page_pinner.h> #include <linux/kthread.h> @@ -385,13 +384,6 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif - -struct pa_lock { - local_lock_t l; -}; -static DEFINE_PER_CPU(struct pa_lock, pa_lock) = { - .l = INIT_LOCAL_LOCK(l), -}; int page_group_by_mobility_disabled __read_mostly; @@ -1430,7 +1422,7 @@ } /* - * Frees a number of pages which have been collected from the pcp lists. + * Frees a number of pages from the PCP lists * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * @@ -1440,56 +1432,15 @@ * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ -static void free_pcppages_bulk(struct zone *zone, struct list_head *head, - bool zone_retry) -{ - bool isolated_pageblocks; - struct page *page, *tmp; - unsigned long flags; - - spin_lock_irqsave(&zone->lock, flags); - isolated_pageblocks = has_isolate_pageblock(zone); - - /* - * Use safe version since after __free_one_page(), - * page->lru.next will not point to original list. - */ - list_for_each_entry_safe(page, tmp, head, lru) { - int mt = get_pcppage_migratetype(page); - - if (page_zone(page) != zone) { - /* - * free_unref_page_list() sorts pages by zone. If we end - * up with pages from a different NUMA nodes belonging - * to the same ZONE index then we need to redo with the - * correct ZONE pointer. Skip the page for now, redo it - * on the next iteration. - */ - WARN_ON_ONCE(zone_retry == false); - if (zone_retry) - continue; - } - - /* MIGRATE_ISOLATE page should not go to pcplists */ - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); - /* Pageblock could have been isolated meanwhile */ - if (unlikely(isolated_pageblocks)) - mt = get_pageblock_migratetype(page); - - list_del(&page->lru); - __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); - trace_mm_page_pcpu_drain(page, 0, mt); - } - spin_unlock_irqrestore(&zone->lock, flags); -} - -static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp, - struct list_head *dst) +static void free_pcppages_bulk(struct zone *zone, int count, + struct per_cpu_pages *pcp) { int migratetype = 0; int batch_free = 0; int prefetch_nr = 0; - struct page *page; + bool isolated_pageblocks; + struct page *page, *tmp; + LIST_HEAD(head); /* * Ensure proper count is passed which otherwise would stuck in the @@ -1526,7 +1477,7 @@ if (bulkfree_pcp_prepare(page)) continue; - list_add_tail(&page->lru, dst); + list_add_tail(&page->lru, &head); /* * We are going to put the page back to the global @@ -1541,6 +1492,26 @@ prefetch_buddy(page); } while (--count && --batch_free && !list_empty(list)); } + + spin_lock(&zone->lock); + isolated_pageblocks = has_isolate_pageblock(zone); + + /* + * Use safe version since after __free_one_page(), + * page->lru.next will not point to original list. + */ + list_for_each_entry_safe(page, tmp, &head, lru) { + int mt = get_pcppage_migratetype(page); + /* MIGRATE_ISOLATE page should not go to pcplists */ + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); + /* Pageblock could have been isolated meanwhile */ + if (unlikely(isolated_pageblocks)) + mt = get_pageblock_migratetype(page); + + __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); + trace_mm_page_pcpu_drain(page, 0, mt); + } + spin_unlock(&zone->lock); } static void free_one_page(struct zone *zone, @@ -1643,16 +1614,21 @@ unsigned long flags; int migratetype; unsigned long pfn = page_to_pfn(page); + bool skip_free_unref_page = false; if (!free_pages_prepare(page, order, true, fpi_flags)) return; migratetype = get_pfnblock_migratetype(page, pfn); - local_lock_irqsave(&pa_lock.l, flags); + trace_android_vh_free_unref_page_bypass(page, order, migratetype, &skip_free_unref_page); + if (skip_free_unref_page) + return; + + local_irq_save(flags); __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, pfn, order, migratetype, fpi_flags); - local_unlock_irqrestore(&pa_lock.l, flags); + local_irq_restore(flags); } void __free_pages_core(struct page *page, unsigned int order) @@ -2826,6 +2802,7 @@ struct page *page; int order; bool ret; + bool skip_unreserve_highatomic = false; for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, ac->nodemask) { @@ -2835,6 +2812,11 @@ */ if (!force && zone->nr_reserved_highatomic <= pageblock_nr_pages) + continue; + + trace_android_vh_unreserve_highatomic_bypass(force, zone, + &skip_unreserve_highatomic); + if (skip_unreserve_highatomic) continue; spin_lock_irqsave(&zone->lock, flags); @@ -3082,6 +3064,10 @@ struct list_head *list = &pcp->lists[migratetype]; if (list_empty(list)) { + trace_android_vh_rmqueue_bulk_bypass(order, pcp, migratetype, list); + if (!list_empty(list)) + return list; + pcp->count += rmqueue_bulk(zone, order, pcp->batch, list, migratetype, alloc_flags); @@ -3105,18 +3091,13 @@ { unsigned long flags; int to_drain, batch; - LIST_HEAD(dst); - local_lock_irqsave(&pa_lock.l, flags); + local_irq_save(flags); batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) - isolate_pcp_pages(to_drain, pcp, &dst); - - local_unlock_irqrestore(&pa_lock.l, flags); - - if (to_drain > 0) - free_pcppages_bulk(zone, &dst, false); + free_pcppages_bulk(zone, to_drain, pcp); + local_irq_restore(flags); } #endif @@ -3132,21 +3113,14 @@ unsigned long flags; struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - LIST_HEAD(dst); - int count; - local_lock_irqsave(&pa_lock.l, flags); + local_irq_save(flags); pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; - count = pcp->count; - if (count) - isolate_pcp_pages(count, pcp, &dst); - - local_unlock_irqrestore(&pa_lock.l, flags); - - if (count) - free_pcppages_bulk(zone, &dst, false); + if (pcp->count) + free_pcppages_bulk(zone, pcp->count, pcp); + local_irq_restore(flags); } /* @@ -3194,9 +3168,9 @@ * cpu which is allright but we also have to make sure to not move to * a different one. */ - migrate_disable(); + preempt_disable(); drain_local_pages(drain->zone); - migrate_enable(); + preempt_enable(); } /* @@ -3345,8 +3319,7 @@ return true; } -static void free_unref_page_commit(struct page *page, unsigned long pfn, - struct list_head *dst) +static void free_unref_page_commit(struct page *page, unsigned long pfn) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; @@ -3380,8 +3353,7 @@ pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); - - isolate_pcp_pages(batch, pcp, dst); + free_pcppages_bulk(zone, batch, pcp); } } @@ -3392,17 +3364,20 @@ { unsigned long flags; unsigned long pfn = page_to_pfn(page); - struct zone *zone = page_zone(page); - LIST_HEAD(dst); + int migratetype; + bool skip_free_unref_page = false; if (!free_unref_page_prepare(page, pfn)) return; - local_lock_irqsave(&pa_lock.l, flags); - free_unref_page_commit(page, pfn, &dst); - local_unlock_irqrestore(&pa_lock.l, flags); - if (!list_empty(&dst)) - free_pcppages_bulk(zone, &dst, false); + migratetype = get_pfnblock_migratetype(page, pfn); + trace_android_vh_free_unref_page_bypass(page, 0, migratetype, &skip_free_unref_page); + if (skip_free_unref_page) + return; + + local_irq_save(flags); + free_unref_page_commit(page, pfn); + local_irq_restore(flags); } /* @@ -3413,11 +3388,6 @@ struct page *page, *next; unsigned long flags, pfn; int batch_count = 0; - struct list_head dsts[__MAX_NR_ZONES]; - int i; - - for (i = 0; i < __MAX_NR_ZONES; i++) - INIT_LIST_HEAD(&dsts[i]); /* Prepare pages for freeing */ list_for_each_entry_safe(page, next, list, lru) { @@ -3427,42 +3397,25 @@ set_page_private(page, pfn); } - local_lock_irqsave(&pa_lock.l, flags); + local_irq_save(flags); list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_private(page); - enum zone_type type; set_page_private(page, 0); trace_mm_page_free_batched(page); - type = page_zonenum(page); - free_unref_page_commit(page, pfn, &dsts[type]); + free_unref_page_commit(page, pfn); /* * Guard against excessive IRQ disabled times when we get * a large list of pages to free. */ if (++batch_count == SWAP_CLUSTER_MAX) { - local_unlock_irqrestore(&pa_lock.l, flags); + local_irq_restore(flags); batch_count = 0; - local_lock_irqsave(&pa_lock.l, flags); + local_irq_save(flags); } } - local_unlock_irqrestore(&pa_lock.l, flags); - - for (i = 0; i < __MAX_NR_ZONES; ) { - struct page *page; - struct zone *zone; - - if (list_empty(&dsts[i])) { - i++; - continue; - } - - page = list_first_entry(&dsts[i], struct page, lru); - zone = page_zone(page); - - free_pcppages_bulk(zone, &dsts[i], true); - } + local_irq_restore(flags); } /* @@ -3629,7 +3582,7 @@ struct page *page; unsigned long flags; - local_lock_irqsave(&pa_lock.l, flags); + local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, gfp_flags); @@ -3637,7 +3590,7 @@ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); zone_statistics(preferred_zone, zone); } - local_unlock_irqrestore(&pa_lock.l, flags); + local_irq_restore(flags); return page; } @@ -3664,8 +3617,7 @@ * allocate greater than order-1 page units with __GFP_NOFAIL. */ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); - local_lock_irqsave(&pa_lock.l, flags); - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); do { page = NULL; @@ -3700,7 +3652,7 @@ zone_statistics(preferred_zone, zone); trace_android_vh_rmqueue(preferred_zone, zone, order, gfp_flags, alloc_flags, migratetype); - local_unlock_irqrestore(&pa_lock.l, flags); + local_irq_restore(flags); out: /* Separate test+clear to avoid unnecessary atomics */ @@ -3713,7 +3665,7 @@ return page; failed: - local_unlock_irqrestore(&pa_lock.l, flags); + local_irq_restore(flags); return NULL; } @@ -4898,6 +4850,7 @@ unsigned int zonelist_iter_cookie; int reserve_flags; unsigned long vh_record; + bool should_alloc_retry = false; trace_android_vh_alloc_pages_slowpath_begin(gfp_mask, order, &vh_record); /* @@ -5037,6 +4990,12 @@ if (page) goto got_pg; + + trace_android_vh_should_alloc_pages_retry(gfp_mask, order, + &alloc_flags, ac->migratetype, ac->preferred_zoneref->zone, + &page, &should_alloc_retry); + if (should_alloc_retry) + goto retry; /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, @@ -5310,10 +5269,13 @@ void __free_pages(struct page *page, unsigned int order) { + /* get PageHead before we drop reference */ + int head = PageHead(page); + trace_android_vh_free_pages(page, order); if (put_page_testzero(page)) free_the_page(page, order); - else if (!PageHead(page)) + else if (!head) while (order-- > 0) free_the_page(page + (1 << order), order); } @@ -6228,7 +6190,21 @@ int nid; int __maybe_unused cpu; pg_data_t *self = data; + unsigned long flags; + /* + * Explicitly disable this CPU's interrupts before taking seqlock + * to prevent any IRQ handler from calling into the page allocator + * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock. + */ + local_irq_save(flags); + /* + * Explicitly disable this CPU's synchronous printk() before taking + * seqlock to prevent any printk() from trying to hold port->lock, for + * tty_insert_flip_string_and_push_buffer() on other CPU might be + * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held. + */ + printk_deferred_enter(); write_seqlock(&zonelist_update_seq); #ifdef CONFIG_NUMA @@ -6263,6 +6239,8 @@ } write_sequnlock(&zonelist_update_seq); + printk_deferred_exit(); + local_irq_restore(flags); } static noinline void __init @@ -6682,6 +6660,7 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, unsigned long batch) { + trace_android_vh_pageset_update(&high, &batch); /* start with a fail safe value for batch */ pcp->batch = 1; smp_wmb(); @@ -9141,7 +9120,7 @@ struct per_cpu_pageset *pset; /* avoid races with drain_pages() */ - local_lock_irqsave(&pa_lock.l, flags); + local_irq_save(flags); if (zone->pageset != &boot_pageset) { for_each_online_cpu(cpu) { pset = per_cpu_ptr(zone->pageset, cpu); @@ -9150,7 +9129,7 @@ free_percpu(zone->pageset); zone->pageset = &boot_pageset; } - local_unlock_irqrestore(&pa_lock.l, flags); + local_irq_restore(flags); } #ifdef CONFIG_MEMORY_HOTREMOVE -- Gitblit v1.6.2