From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/page_isolation.c | 179 +++++++++++++++++++++++++++++++---------------------------- 1 files changed, 93 insertions(+), 86 deletions(-) diff --git a/kernel/mm/page_isolation.c b/kernel/mm/page_isolation.c index 658eec1..48ff142 100644 --- a/kernel/mm/page_isolation.c +++ b/kernel/mm/page_isolation.c @@ -15,62 +15,30 @@ #define CREATE_TRACE_POINTS #include <trace/events/page_isolation.h> -static int set_migratetype_isolate(struct page *page, int migratetype, - bool skip_hwpoisoned_pages) +static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) { - struct zone *zone; - unsigned long flags, pfn; - struct memory_isolate_notify arg; - int notifier_ret; - int ret = -EBUSY; - - zone = page_zone(page); + struct zone *zone = page_zone(page); + struct page *unmovable; + unsigned long flags; spin_lock_irqsave(&zone->lock, flags); /* * We assume the caller intended to SET migrate type to isolate. * If it is already set, then someone else must have raced and - * set it before us. Return -EBUSY + * set it before us. */ - if (is_migrate_isolate_page(page)) - goto out; + if (is_migrate_isolate_page(page)) { + spin_unlock_irqrestore(&zone->lock, flags); + return -EBUSY; + } - pfn = page_to_pfn(page); - arg.start_pfn = pfn; - arg.nr_pages = pageblock_nr_pages; - arg.pages_found = 0; - - /* - * It may be possible to isolate a pageblock even if the - * migratetype is not MIGRATE_MOVABLE. The memory isolation - * notifier chain is used by balloon drivers to return the - * number of pages in a range that are held by the balloon - * driver to shrink memory. If all the pages are accounted for - * by balloons, are free, or on the LRU, isolation can continue. - * Later, for example, when memory hotplug notifier runs, these - * pages reported as "can be isolated" should be isolated(freed) - * by the balloon driver through the memory notifier chain. - */ - notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); - notifier_ret = notifier_to_errno(notifier_ret); - if (notifier_ret) - goto out; /* * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. * We just check MOVABLE pages. */ - if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, - skip_hwpoisoned_pages)) - ret = 0; - - /* - * immobile means "not-on-lru" pages. If immobile is larger than - * removable-by-driver pages reported by notifier, we'll fail. - */ - -out: - if (!ret) { + unmovable = has_unmovable_pages(zone, page, migratetype, isol_flags); + if (!unmovable) { unsigned long nr_pages; int mt = get_pageblock_migratetype(page); @@ -80,12 +48,20 @@ NULL); __mod_zone_freepage_state(zone, -nr_pages, mt); + spin_unlock_irqrestore(&zone->lock, flags); + return 0; } spin_unlock_irqrestore(&zone->lock, flags); - if (!ret) - drain_all_pages(zone); - return ret; + if (isol_flags & REPORT_FAILURE) { + /* + * printk() with zone->lock held will likely trigger a + * lockdep splat, so defer it here. + */ + dump_page(unmovable, "unmovable page"); + } + + return -EBUSY; } static void unset_migratetype_isolate(struct page *page, unsigned migratetype) @@ -111,7 +87,7 @@ * these pages to be merged. */ if (PageBuddy(page)) { - order = page_order(page); + order = buddy_order(page); if (order >= pageblock_order) { pfn = page_to_pfn(page); buddy_pfn = __find_buddy_pfn(pfn, order); @@ -129,19 +105,22 @@ * If we isolate freepage with more than pageblock_order, there * should be no freepage in the range, so we could avoid costly * pageblock scanning for freepage moving. + * + * We didn't actually touch any of the isolated pages, so place them + * to the tail of the freelist. This is an optimization for memory + * onlining - just onlined memory won't immediately be considered for + * allocation. */ if (!isolated_page) { nr_pages = move_freepages_block(zone, page, migratetype, NULL); __mod_zone_freepage_state(zone, nr_pages, migratetype); } set_pageblock_migratetype(page, migratetype); + if (isolated_page) + __putback_isolated_page(page, order, migratetype); zone->nr_isolate_pageblock--; out: spin_unlock_irqrestore(&zone->lock, flags); - if (isolated_page) { - post_alloc_hook(page, order, __GFP_MOVABLE); - __free_pages(page, order); - } } static inline struct page * @@ -152,8 +131,6 @@ for (i = 0; i < nr_pages; i++) { struct page *page; - if (!pfn_valid_within(pfn + i)) - continue; page = pfn_to_online_page(pfn + i); if (!page) continue; @@ -162,30 +139,50 @@ return NULL; } -/* - * start_isolate_page_range() -- make page-allocation-type of range of pages - * to be MIGRATE_ISOLATE. - * @start_pfn: The lower PFN of the range to be isolated. - * @end_pfn: The upper PFN of the range to be isolated. - * @migratetype: migrate type to set in error recovery. +/** + * start_isolate_page_range() - make page-allocation-type of range of pages to + * be MIGRATE_ISOLATE. + * @start_pfn: The lower PFN of the range to be isolated. + * @end_pfn: The upper PFN of the range to be isolated. + * start_pfn/end_pfn must be aligned to pageblock_order. + * @migratetype: Migrate type to set in error recovery. + * @flags: The following flags are allowed (they can be combined in + * a bit mask) + * MEMORY_OFFLINE - isolate to offline (!allocate) memory + * e.g., skip over PageHWPoison() pages + * and PageOffline() pages. + * REPORT_FAILURE - report details about the failure to + * isolate the range * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the - * future will not be allocated again. - * - * start_pfn/end_pfn must be aligned to pageblock_order. - * Return 0 on success and -EBUSY if any part of range cannot be isolated. + * future will not be allocated again. If specified range includes migrate types + * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all + * pages in the range finally, the caller have to free all pages in the range. + * test_page_isolated() can be used for test it. * * There is no high level synchronization mechanism that prevents two threads - * from trying to isolate overlapping ranges. If this happens, one thread + * from trying to isolate overlapping ranges. If this happens, one thread * will notice pageblocks in the overlapping range already set to isolate. * This happens in set_migratetype_isolate, and set_migratetype_isolate - * returns an error. We then clean up by restoring the migration type on - * pageblocks we may have modified and return -EBUSY to caller. This + * returns an error. We then clean up by restoring the migration type on + * pageblocks we may have modified and return -EBUSY to caller. This * prevents two threads from simultaneously working on overlapping ranges. + * + * Please note that there is no strong synchronization with the page allocator + * either. Pages might be freed while their page blocks are marked ISOLATED. + * A call to drain_all_pages() after isolation can flush most of them. However + * in some cases pages might still end up on pcp lists and that would allow + * for their allocation even when they are in fact isolated already. Depending + * on how strong of a guarantee the caller needs, further drain_all_pages() + * might be needed (e.g. __offline_pages will need to call it after check for + * isolated range for a next retry). + * + * Return: 0 on success and -EBUSY if any part of range cannot be isolated. */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype, bool skip_hwpoisoned_pages) + unsigned migratetype, int flags, + unsigned long *failed_pfn) { unsigned long pfn; unsigned long undo_pfn; @@ -198,10 +195,13 @@ pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && - set_migratetype_isolate(page, migratetype, skip_hwpoisoned_pages)) { - undo_pfn = pfn; - goto undo; + if (page) { + if (set_migratetype_isolate(page, migratetype, flags)) { + undo_pfn = pfn; + if (failed_pfn) + *failed_pfn = page_to_pfn(page); + goto undo; + } } } return 0; @@ -221,7 +221,7 @@ /* * Make isolated pages available again. */ -int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, +void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype) { unsigned long pfn; @@ -238,7 +238,6 @@ continue; unset_migratetype_isolate(page, migratetype); } - return 0; } /* * Test all pages in the range is free(means isolated) or not. @@ -249,7 +248,7 @@ */ static unsigned long __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, - bool skip_hwpoisoned_pages) + int flags) { struct page *page; @@ -265,14 +264,22 @@ * the correct MIGRATE_ISOLATE freelist. There is no * simple way to verify that as VM_BUG_ON(), though. */ - pfn += 1 << page_order(page); - else if (skip_hwpoisoned_pages && PageHWPoison(page)) + pfn += 1 << buddy_order(page); + else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) /* A HWPoisoned page cannot be also PageBuddy */ + pfn++; + else if ((flags & MEMORY_OFFLINE) && PageOffline(page) && + !page_count(page)) + /* + * The responsible driver agreed to skip PageOffline() + * pages when offlining memory by dropping its + * reference in MEM_GOING_OFFLINE. + */ pfn++; else break; } -#ifdef CONFIG_ARCH_ROCKCHIP +#ifdef CONFIG_NO_GKI if (pfn < end_pfn) dump_page_owner(page); #endif @@ -282,7 +289,7 @@ /* Caller should ensure that requested range is in a single zone */ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, - bool skip_hwpoisoned_pages) + int isol_flags, unsigned long *failed_pfn) { unsigned long pfn, flags; struct page *page; @@ -304,16 +311,16 @@ /* Check all pages are free or marked as ISOLATED */ zone = page_zone(page); spin_lock_irqsave(&zone->lock, flags); - pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, - skip_hwpoisoned_pages); + pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags); spin_unlock_irqrestore(&zone->lock, flags); trace_test_pages_isolated(start_pfn, end_pfn, pfn); + if (pfn < end_pfn) { + page_pinner_failure_detect(pfn_to_page(pfn)); + if (failed_pfn) + *failed_pfn = pfn; + return -EBUSY; + } - return pfn < end_pfn ? -EBUSY : 0; -} - -struct page *alloc_migrate_target(struct page *page, unsigned long private) -{ - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]); + return 0; } -- Gitblit v1.6.2