From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/page_ext.c | 159 +++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 129 insertions(+), 30 deletions(-) diff --git a/kernel/mm/page_ext.c b/kernel/mm/page_ext.c index aad1201..7cd4e37 100644 --- a/kernel/mm/page_ext.c +++ b/kernel/mm/page_ext.c @@ -1,14 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/mmzone.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/page_ext.h> #include <linux/memory.h> #include <linux/vmalloc.h> #include <linux/kmemleak.h> #include <linux/page_owner.h> #include <linux/page_idle.h> - +#include <linux/rcupdate.h> /* * struct page extension * @@ -58,20 +58,35 @@ * can utilize this callback to initialize the state of it correctly. */ -static struct page_ext_operations *page_ext_ops[] = { -#ifdef CONFIG_DEBUG_PAGEALLOC - &debug_guardpage_ops, +#ifdef CONFIG_SPARSEMEM +#define PAGE_EXT_INVALID (0x1) #endif + +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) +static bool need_page_idle(void) +{ + return true; +} +struct page_ext_operations page_idle_ops = { + .need = need_page_idle, +}; +#endif + +static struct page_ext_operations *page_ext_ops[] = { #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, +#endif +#ifdef CONFIG_PAGE_PINNER + &page_pinner_ops, #endif }; +unsigned long page_ext_size = sizeof(struct page_ext); + static unsigned long total_usage; -static unsigned long extra_mem; static bool __init invoke_need_callbacks(void) { @@ -81,9 +96,8 @@ for (i = 0; i < entries; i++) { if (page_ext_ops[i]->need && page_ext_ops[i]->need()) { - page_ext_ops[i]->offset = sizeof(struct page_ext) + - extra_mem; - extra_mem += page_ext_ops[i]->size; + page_ext_ops[i]->offset = page_ext_size; + page_ext_size += page_ext_ops[i]->size; need = true; } } @@ -102,17 +116,61 @@ } } -static unsigned long get_entry_size(void) +#ifndef CONFIG_SPARSEMEM +void __init page_ext_init_flatmem_late(void) { - return sizeof(struct page_ext) + extra_mem; + invoke_init_callbacks(); } +#endif static inline struct page_ext *get_entry(void *base, unsigned long index) { - return base + get_entry_size() * index; + return base + page_ext_size * index; } -#if !defined(CONFIG_SPARSEMEM) +/** + * page_ext_get() - Get the extended information for a page. + * @page: The page we're interested in. + * + * Ensures that the page_ext will remain valid until page_ext_put() + * is called. + * + * Return: NULL if no page_ext exists for this page. + * Context: Any context. Caller may not sleep until they have called + * page_ext_put(). + */ +struct page_ext *page_ext_get(struct page *page) +{ + struct page_ext *page_ext; + + rcu_read_lock(); + page_ext = lookup_page_ext(page); + if (!page_ext) { + rcu_read_unlock(); + return NULL; + } + + return page_ext; +} + +/** + * page_ext_put() - Working with page extended information is done. + * @page_ext: Page extended information received from page_ext_get(). + * + * The page extended information of the page may not be valid after this + * function is called. + * + * Return: None. + * Context: Any context with corresponding page_ext_get() is called. + */ +void page_ext_put(struct page_ext *page_ext) +{ + if (unlikely(!page_ext)) + return; + + rcu_read_unlock(); +} +#ifndef CONFIG_SPARSEMEM void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) @@ -126,6 +184,7 @@ unsigned long index; struct page_ext *base; + WARN_ON_ONCE(!rcu_read_lock_held()); base = NODE_DATA(page_to_nid(page))->node_page_ext; /* * The sanity checks the page allocator does upon freeing a @@ -139,6 +198,7 @@ MAX_ORDER_NR_PAGES); return get_entry(base, index); } +EXPORT_SYMBOL_GPL(lookup_page_ext); static int __init alloc_node_page_ext(int nid) { @@ -159,11 +219,11 @@ !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) nr_pages += MAX_ORDER_NR_PAGES; - table_size = get_entry_size() * nr_pages; + table_size = page_ext_size * nr_pages; - base = memblock_virt_alloc_try_nid_nopanic( + base = memblock_alloc_try_nid( table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), - BOOTMEM_ALLOC_ACCESSIBLE, nid); + MEMBLOCK_ALLOC_ACCESSIBLE, nid); if (!base) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; @@ -185,7 +245,6 @@ goto fail; } pr_info("allocated %ld bytes of page_ext\n", total_usage); - invoke_init_callbacks(); return; fail: @@ -194,21 +253,29 @@ } #else /* CONFIG_FLAT_NODE_MEM_MAP */ +static bool page_ext_invalid(struct page_ext *page_ext) +{ + return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID); +} struct page_ext *lookup_page_ext(const struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); + struct page_ext *page_ext = READ_ONCE(section->page_ext); + + WARN_ON_ONCE(!rcu_read_lock_held()); /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. */ - if (!section->page_ext) + if (page_ext_invalid(page_ext)) return NULL; - return get_entry(section->page_ext, pfn); + return get_entry(page_ext, pfn); } +EXPORT_SYMBOL_GPL(lookup_page_ext); static void *__meminit alloc_page_ext(size_t size, int nid) { @@ -237,7 +304,7 @@ if (section->page_ext) return 0; - table_size = get_entry_size() * PAGES_PER_SECTION; + table_size = page_ext_size * PAGES_PER_SECTION; base = alloc_page_ext(table_size, nid); /* @@ -257,7 +324,7 @@ * we need to apply a mask. */ pfn &= PAGE_SECTION_MASK; - section->page_ext = (void *)base - get_entry_size() * pfn; + section->page_ext = (void *)base - page_ext_size * pfn; total_usage += table_size; return 0; } @@ -270,7 +337,7 @@ struct page *page = virt_to_page(addr); size_t table_size; - table_size = get_entry_size() * PAGES_PER_SECTION; + table_size = page_ext_size * PAGES_PER_SECTION; BUG_ON(PageReserved(page)); kmemleak_free(addr); @@ -286,9 +353,30 @@ ms = __pfn_to_section(pfn); if (!ms || !ms->page_ext) return; - base = get_entry(ms->page_ext, pfn); + + base = READ_ONCE(ms->page_ext); + /* + * page_ext here can be valid while doing the roll back + * operation in online_page_ext(). + */ + if (page_ext_invalid(base)) + base = (void *)base - PAGE_EXT_INVALID; + WRITE_ONCE(ms->page_ext, NULL); + + base = get_entry(base, pfn); free_page_ext(base); - ms->page_ext = NULL; +} + +static void __invalidate_page_ext(unsigned long pfn) +{ + struct mem_section *ms; + void *val; + + ms = __pfn_to_section(pfn); + if (!ms || !ms->page_ext) + return; + val = (void *)ms->page_ext + PAGE_EXT_INVALID; + WRITE_ONCE(ms->page_ext, val); } static int __meminit online_page_ext(unsigned long start_pfn, @@ -301,7 +389,7 @@ start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); - if (nid == -1) { + if (nid == NUMA_NO_NODE) { /* * In this case, "nid" already exists and contains valid memory. * "start_pfn" passed to us is a pfn which is an arg for @@ -311,11 +399,8 @@ VM_BUG_ON(!node_state(nid, N_ONLINE)); } - for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { - if (!pfn_present(pfn)) - continue; + for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) fail = init_section_page_ext(pfn, nid); - } if (!fail) return 0; @@ -334,6 +419,20 @@ start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); + /* + * Freeing of page_ext is done in 3 steps to avoid + * use-after-free of it: + * 1) Traverse all the sections and mark their page_ext + * as invalid. + * 2) Wait for all the existing users of page_ext who + * started before invalidation to finish. + * 3) Free the page_ext. + */ + for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) + __invalidate_page_ext(pfn); + + synchronize_rcu(); + for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_ext(pfn); return 0; -- Gitblit v1.6.2