From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:20:52 +0000
Subject: [PATCH] add new system file

---
 kernel/mm/page_alloc.c |  231 ++++++++++++++++++++++++++-------------------------------
 1 files changed, 105 insertions(+), 126 deletions(-)

diff --git a/kernel/mm/page_alloc.c b/kernel/mm/page_alloc.c
index 4a15674..3bcee27 100644
--- a/kernel/mm/page_alloc.c
+++ b/kernel/mm/page_alloc.c
@@ -61,7 +61,6 @@
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/mm.h>
-#include <linux/local_lock.h>
 #include <linux/page_owner.h>
 #include <linux/page_pinner.h>
 #include <linux/kthread.h>
@@ -385,13 +384,6 @@
 EXPORT_SYMBOL(nr_node_ids);
 EXPORT_SYMBOL(nr_online_nodes);
 #endif
-
-struct pa_lock {
-	local_lock_t l;
-};
-static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
-	.l	= INIT_LOCAL_LOCK(l),
-};
 
 int page_group_by_mobility_disabled __read_mostly;
 
@@ -1430,7 +1422,7 @@
 }
 
 /*
- * Frees a number of pages which have been collected from the pcp lists.
+ * Frees a number of pages from the PCP lists
  * Assumes all pages on list are in same zone, and of same order.
  * count is the number of pages to free.
  *
@@ -1440,56 +1432,15 @@
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
-			       bool zone_retry)
-{
-	bool isolated_pageblocks;
-	struct page *page, *tmp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&zone->lock, flags);
-	isolated_pageblocks = has_isolate_pageblock(zone);
-
-	/*
-	 * Use safe version since after __free_one_page(),
-	 * page->lru.next will not point to original list.
-	 */
-	list_for_each_entry_safe(page, tmp, head, lru) {
-		int mt = get_pcppage_migratetype(page);
-
-		if (page_zone(page) != zone) {
-			/*
-			 * free_unref_page_list() sorts pages by zone. If we end
-			 * up with pages from a different NUMA nodes belonging
-			 * to the same ZONE index then we need to redo with the
-			 * correct ZONE pointer. Skip the page for now, redo it
-			 * on the next iteration.
-			 */
-			WARN_ON_ONCE(zone_retry == false);
-			if (zone_retry)
-				continue;
-		}
-
-		/* MIGRATE_ISOLATE page should not go to pcplists */
-		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-		/* Pageblock could have been isolated meanwhile */
-		if (unlikely(isolated_pageblocks))
-			mt = get_pageblock_migratetype(page);
-
-		list_del(&page->lru);
-		__free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
-		trace_mm_page_pcpu_drain(page, 0, mt);
-	}
-	spin_unlock_irqrestore(&zone->lock, flags);
-}
-
-static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
-			      struct list_head *dst)
+static void free_pcppages_bulk(struct zone *zone, int count,
+					struct per_cpu_pages *pcp)
 {
 	int migratetype = 0;
 	int batch_free = 0;
 	int prefetch_nr = 0;
-	struct page *page;
+	bool isolated_pageblocks;
+	struct page *page, *tmp;
+	LIST_HEAD(head);
 
 	/*
 	 * Ensure proper count is passed which otherwise would stuck in the
@@ -1526,7 +1477,7 @@
 			if (bulkfree_pcp_prepare(page))
 				continue;
 
-			list_add_tail(&page->lru, dst);
+			list_add_tail(&page->lru, &head);
 
 			/*
 			 * We are going to put the page back to the global
@@ -1541,6 +1492,26 @@
 				prefetch_buddy(page);
 		} while (--count && --batch_free && !list_empty(list));
 	}
+
+	spin_lock(&zone->lock);
+	isolated_pageblocks = has_isolate_pageblock(zone);
+
+	/*
+	 * Use safe version since after __free_one_page(),
+	 * page->lru.next will not point to original list.
+	 */
+	list_for_each_entry_safe(page, tmp, &head, lru) {
+		int mt = get_pcppage_migratetype(page);
+		/* MIGRATE_ISOLATE page should not go to pcplists */
+		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+		/* Pageblock could have been isolated meanwhile */
+		if (unlikely(isolated_pageblocks))
+			mt = get_pageblock_migratetype(page);
+
+		__free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
+		trace_mm_page_pcpu_drain(page, 0, mt);
+	}
+	spin_unlock(&zone->lock);
 }
 
 static void free_one_page(struct zone *zone,
@@ -1643,16 +1614,21 @@
 	unsigned long flags;
 	int migratetype;
 	unsigned long pfn = page_to_pfn(page);
+	bool skip_free_unref_page = false;
 
 	if (!free_pages_prepare(page, order, true, fpi_flags))
 		return;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
-	local_lock_irqsave(&pa_lock.l, flags);
+	trace_android_vh_free_unref_page_bypass(page, order, migratetype, &skip_free_unref_page);
+	if (skip_free_unref_page)
+		return;
+
+	local_irq_save(flags);
 	__count_vm_events(PGFREE, 1 << order);
 	free_one_page(page_zone(page), page, pfn, order, migratetype,
 		      fpi_flags);
-	local_unlock_irqrestore(&pa_lock.l, flags);
+	local_irq_restore(flags);
 }
 
 void __free_pages_core(struct page *page, unsigned int order)
@@ -2826,6 +2802,7 @@
 	struct page *page;
 	int order;
 	bool ret;
+	bool skip_unreserve_highatomic = false;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
 								ac->nodemask) {
@@ -2835,6 +2812,11 @@
 		 */
 		if (!force && zone->nr_reserved_highatomic <=
 					pageblock_nr_pages)
+			continue;
+
+		trace_android_vh_unreserve_highatomic_bypass(force, zone,
+				&skip_unreserve_highatomic);
+		if (skip_unreserve_highatomic)
 			continue;
 
 		spin_lock_irqsave(&zone->lock, flags);
@@ -3082,6 +3064,10 @@
 	struct list_head *list = &pcp->lists[migratetype];
 
 	if (list_empty(list)) {
+		trace_android_vh_rmqueue_bulk_bypass(order, pcp, migratetype, list);
+		if (!list_empty(list))
+			return list;
+
 		pcp->count += rmqueue_bulk(zone, order,
 				pcp->batch, list,
 				migratetype, alloc_flags);
@@ -3105,18 +3091,13 @@
 {
 	unsigned long flags;
 	int to_drain, batch;
-	LIST_HEAD(dst);
 
-	local_lock_irqsave(&pa_lock.l, flags);
+	local_irq_save(flags);
 	batch = READ_ONCE(pcp->batch);
 	to_drain = min(pcp->count, batch);
 	if (to_drain > 0)
-		isolate_pcp_pages(to_drain, pcp, &dst);
-
-	local_unlock_irqrestore(&pa_lock.l, flags);
-
-	if (to_drain > 0)
-		free_pcppages_bulk(zone, &dst, false);
+		free_pcppages_bulk(zone, to_drain, pcp);
+	local_irq_restore(flags);
 }
 #endif
 
@@ -3132,21 +3113,14 @@
 	unsigned long flags;
 	struct per_cpu_pageset *pset;
 	struct per_cpu_pages *pcp;
-	LIST_HEAD(dst);
-	int count;
 
-	local_lock_irqsave(&pa_lock.l, flags);
+	local_irq_save(flags);
 	pset = per_cpu_ptr(zone->pageset, cpu);
 
 	pcp = &pset->pcp;
-	count = pcp->count;
-	if (count)
-		isolate_pcp_pages(count, pcp, &dst);
-
-	local_unlock_irqrestore(&pa_lock.l, flags);
-
-	if (count)
-		free_pcppages_bulk(zone, &dst, false);
+	if (pcp->count)
+		free_pcppages_bulk(zone, pcp->count, pcp);
+	local_irq_restore(flags);
 }
 
 /*
@@ -3194,9 +3168,9 @@
 	 * cpu which is allright but we also have to make sure to not move to
 	 * a different one.
 	 */
-	migrate_disable();
+	preempt_disable();
 	drain_local_pages(drain->zone);
-	migrate_enable();
+	preempt_enable();
 }
 
 /*
@@ -3345,8 +3319,7 @@
 	return true;
 }
 
-static void free_unref_page_commit(struct page *page, unsigned long pfn,
-				   struct list_head *dst)
+static void free_unref_page_commit(struct page *page, unsigned long pfn)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
@@ -3380,8 +3353,7 @@
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
 		unsigned long batch = READ_ONCE(pcp->batch);
-
-		isolate_pcp_pages(batch, pcp, dst);
+		free_pcppages_bulk(zone, batch, pcp);
 	}
 }
 
@@ -3392,17 +3364,20 @@
 {
 	unsigned long flags;
 	unsigned long pfn = page_to_pfn(page);
-	struct zone *zone = page_zone(page);
-	LIST_HEAD(dst);
+	int migratetype;
+	bool skip_free_unref_page = false;
 
 	if (!free_unref_page_prepare(page, pfn))
 		return;
 
-	local_lock_irqsave(&pa_lock.l, flags);
-	free_unref_page_commit(page, pfn, &dst);
-	local_unlock_irqrestore(&pa_lock.l, flags);
-	if (!list_empty(&dst))
-		free_pcppages_bulk(zone, &dst, false);
+	migratetype = get_pfnblock_migratetype(page, pfn);
+	trace_android_vh_free_unref_page_bypass(page, 0, migratetype, &skip_free_unref_page);
+	if (skip_free_unref_page)
+		return;
+
+	local_irq_save(flags);
+	free_unref_page_commit(page, pfn);
+	local_irq_restore(flags);
 }
 
 /*
@@ -3413,11 +3388,6 @@
 	struct page *page, *next;
 	unsigned long flags, pfn;
 	int batch_count = 0;
-	struct list_head dsts[__MAX_NR_ZONES];
-	int i;
-
-	for (i = 0; i < __MAX_NR_ZONES; i++)
-		INIT_LIST_HEAD(&dsts[i]);
 
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
@@ -3427,42 +3397,25 @@
 		set_page_private(page, pfn);
 	}
 
-	local_lock_irqsave(&pa_lock.l, flags);
+	local_irq_save(flags);
 	list_for_each_entry_safe(page, next, list, lru) {
 		unsigned long pfn = page_private(page);
-		enum zone_type type;
 
 		set_page_private(page, 0);
 		trace_mm_page_free_batched(page);
-		type = page_zonenum(page);
-		free_unref_page_commit(page, pfn, &dsts[type]);
+		free_unref_page_commit(page, pfn);
 
 		/*
 		 * Guard against excessive IRQ disabled times when we get
 		 * a large list of pages to free.
 		 */
 		if (++batch_count == SWAP_CLUSTER_MAX) {
-			local_unlock_irqrestore(&pa_lock.l, flags);
+			local_irq_restore(flags);
 			batch_count = 0;
-			local_lock_irqsave(&pa_lock.l, flags);
+			local_irq_save(flags);
 		}
 	}
-	local_unlock_irqrestore(&pa_lock.l, flags);
-
-	for (i = 0; i < __MAX_NR_ZONES; ) {
-		struct page *page;
-		struct zone *zone;
-
-		if (list_empty(&dsts[i])) {
-			i++;
-			continue;
-		}
-
-		page = list_first_entry(&dsts[i], struct page, lru);
-		zone = page_zone(page);
-
-		free_pcppages_bulk(zone, &dsts[i], true);
-	}
+	local_irq_restore(flags);
 }
 
 /*
@@ -3629,7 +3582,7 @@
 	struct page *page;
 	unsigned long flags;
 
-	local_lock_irqsave(&pa_lock.l, flags);
+	local_irq_save(flags);
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp,
 				 gfp_flags);
@@ -3637,7 +3590,7 @@
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
 		zone_statistics(preferred_zone, zone);
 	}
-	local_unlock_irqrestore(&pa_lock.l, flags);
+	local_irq_restore(flags);
 	return page;
 }
 
@@ -3664,8 +3617,7 @@
 	 * allocate greater than order-1 page units with __GFP_NOFAIL.
 	 */
 	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
-	local_lock_irqsave(&pa_lock.l, flags);
-	spin_lock(&zone->lock);
+	spin_lock_irqsave(&zone->lock, flags);
 
 	do {
 		page = NULL;
@@ -3700,7 +3652,7 @@
 	zone_statistics(preferred_zone, zone);
 	trace_android_vh_rmqueue(preferred_zone, zone, order,
 			gfp_flags, alloc_flags, migratetype);
-	local_unlock_irqrestore(&pa_lock.l, flags);
+	local_irq_restore(flags);
 
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
@@ -3713,7 +3665,7 @@
 	return page;
 
 failed:
-	local_unlock_irqrestore(&pa_lock.l, flags);
+	local_irq_restore(flags);
 	return NULL;
 }
 
@@ -4898,6 +4850,7 @@
 	unsigned int zonelist_iter_cookie;
 	int reserve_flags;
 	unsigned long vh_record;
+	bool should_alloc_retry = false;
 
 	trace_android_vh_alloc_pages_slowpath_begin(gfp_mask, order, &vh_record);
 	/*
@@ -5037,6 +4990,12 @@
 
 	if (page)
 		goto got_pg;
+
+	trace_android_vh_should_alloc_pages_retry(gfp_mask, order,
+		&alloc_flags, ac->migratetype, ac->preferred_zoneref->zone,
+		&page, &should_alloc_retry);
+	if (should_alloc_retry)
+		goto retry;
 
 	/* Try direct reclaim and then allocating */
 	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
@@ -5310,10 +5269,13 @@
 
 void __free_pages(struct page *page, unsigned int order)
 {
+	/* get PageHead before we drop reference */
+	int head = PageHead(page);
+
 	trace_android_vh_free_pages(page, order);
 	if (put_page_testzero(page))
 		free_the_page(page, order);
-	else if (!PageHead(page))
+	else if (!head)
 		while (order-- > 0)
 			free_the_page(page + (1 << order), order);
 }
@@ -6228,7 +6190,21 @@
 	int nid;
 	int __maybe_unused cpu;
 	pg_data_t *self = data;
+	unsigned long flags;
 
+	/*
+	 * Explicitly disable this CPU's interrupts before taking seqlock
+	 * to prevent any IRQ handler from calling into the page allocator
+	 * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
+	 */
+	local_irq_save(flags);
+	/*
+	 * Explicitly disable this CPU's synchronous printk() before taking
+	 * seqlock to prevent any printk() from trying to hold port->lock, for
+	 * tty_insert_flip_string_and_push_buffer() on other CPU might be
+	 * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
+	 */
+	printk_deferred_enter();
 	write_seqlock(&zonelist_update_seq);
 
 #ifdef CONFIG_NUMA
@@ -6263,6 +6239,8 @@
 	}
 
 	write_sequnlock(&zonelist_update_seq);
+	printk_deferred_exit();
+	local_irq_restore(flags);
 }
 
 static noinline void __init
@@ -6682,6 +6660,7 @@
 static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
 		unsigned long batch)
 {
+	trace_android_vh_pageset_update(&high, &batch);
        /* start with a fail safe value for batch */
 	pcp->batch = 1;
 	smp_wmb();
@@ -9141,7 +9120,7 @@
 	struct per_cpu_pageset *pset;
 
 	/* avoid races with drain_pages()  */
-	local_lock_irqsave(&pa_lock.l, flags);
+	local_irq_save(flags);
 	if (zone->pageset != &boot_pageset) {
 		for_each_online_cpu(cpu) {
 			pset = per_cpu_ptr(zone->pageset, cpu);
@@ -9150,7 +9129,7 @@
 		free_percpu(zone->pageset);
 		zone->pageset = &boot_pageset;
 	}
-	local_unlock_irqrestore(&pa_lock.l, flags);
+	local_irq_restore(flags);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE

--
Gitblit v1.6.2