From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/mm/page_owner.c | 331 ++++++++++++++++++++++++++++++++++--------------------
1 files changed, 206 insertions(+), 125 deletions(-)
diff --git a/kernel/mm/page_owner.c b/kernel/mm/page_owner.c
index 631282e..b3e5f98 100644
--- a/kernel/mm/page_owner.c
+++ b/kernel/mm/page_owner.c
@@ -3,13 +3,14 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/stacktrace.h>
#include <linux/page_owner.h>
#include <linux/jump_label.h>
#include <linux/migrate.h>
#include <linux/stackdepot.h>
#include <linux/seq_file.h>
+#include <linux/sched/clock.h>
#include "internal.h"
@@ -24,10 +25,15 @@
short last_migrate_reason;
gfp_t gfp_mask;
depot_stack_handle_t handle;
+ depot_stack_handle_t free_handle;
+ u64 ts_nsec;
+ u64 free_ts_nsec;
+ pid_t pid;
};
-static bool page_owner_disabled = true;
+bool page_owner_enabled;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
+EXPORT_SYMBOL_GPL(page_owner_inited);
static depot_stack_handle_t dummy_handle;
static depot_stack_handle_t failure_handle;
@@ -41,7 +47,7 @@
return -EINVAL;
if (strcmp(buf, "on") == 0)
- page_owner_disabled = false;
+ page_owner_enabled = true;
return 0;
}
@@ -49,24 +55,16 @@
static bool need_page_owner(void)
{
- if (page_owner_disabled)
- return false;
-
- return true;
+ return page_owner_enabled;
}
static __always_inline depot_stack_handle_t create_dummy_stack(void)
{
unsigned long entries[4];
- struct stack_trace dummy;
+ unsigned int nr_entries;
- dummy.nr_entries = 0;
- dummy.max_entries = ARRAY_SIZE(entries);
- dummy.entries = &entries[0];
- dummy.skip = 0;
-
- save_stack_trace(&dummy);
- return depot_save_stack(&dummy, GFP_KERNEL);
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
+ return stack_depot_save(entries, nr_entries, GFP_KERNEL);
}
static noinline void register_dummy_stack(void)
@@ -86,7 +84,7 @@
static void init_page_owner(void)
{
- if (page_owner_disabled)
+ if (!page_owner_enabled)
return;
register_dummy_stack();
@@ -102,103 +100,134 @@
.init = init_page_owner,
};
-static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
+struct page_owner *get_page_owner(struct page_ext *page_ext)
{
return (void *)page_ext + page_owner_ops.offset;
}
+EXPORT_SYMBOL_GPL(get_page_owner);
-void __reset_page_owner(struct page *page, unsigned int order)
+depot_stack_handle_t get_page_owner_handle(struct page_ext *page_ext, unsigned long pfn)
{
- int i;
- struct page_ext *page_ext;
+ struct page_owner *page_owner;
+ depot_stack_handle_t handle;
- for (i = 0; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
- if (unlikely(!page_ext))
- continue;
- __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
- }
+ if (!page_owner_enabled)
+ return 0;
+
+ page_owner = get_page_owner(page_ext);
+
+ /* skip handle for tail pages of higher order allocations */
+ if (!IS_ALIGNED(pfn, 1 << page_owner->order))
+ return 0;
+
+ handle = READ_ONCE(page_owner->handle);
+ return handle;
}
+EXPORT_SYMBOL_GPL(get_page_owner_handle);
-static inline bool check_recursive_alloc(struct stack_trace *trace,
- unsigned long ip)
+static inline bool check_recursive_alloc(unsigned long *entries,
+ unsigned int nr_entries,
+ unsigned long ip)
{
- int i;
+ unsigned int i;
- if (!trace->nr_entries)
- return false;
-
- for (i = 0; i < trace->nr_entries; i++) {
- if (trace->entries[i] == ip)
+ for (i = 0; i < nr_entries; i++) {
+ if (entries[i] == ip)
return true;
}
-
return false;
}
static noinline depot_stack_handle_t save_stack(gfp_t flags)
{
unsigned long entries[PAGE_OWNER_STACK_DEPTH];
- struct stack_trace trace = {
- .nr_entries = 0,
- .entries = entries,
- .max_entries = PAGE_OWNER_STACK_DEPTH,
- .skip = 2
- };
depot_stack_handle_t handle;
+ unsigned int nr_entries;
- save_stack_trace(&trace);
- if (trace.nr_entries != 0 &&
- trace.entries[trace.nr_entries-1] == ULONG_MAX)
- trace.nr_entries--;
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
/*
- * We need to check recursion here because our request to stackdepot
- * could trigger memory allocation to save new entry. New memory
- * allocation would reach here and call depot_save_stack() again
- * if we don't catch it. There is still not enough memory in stackdepot
- * so it would try to allocate memory again and loop forever.
+ * We need to check recursion here because our request to
+ * stackdepot could trigger memory allocation to save new
+ * entry. New memory allocation would reach here and call
+ * stack_depot_save_entries() again if we don't catch it. There is
+ * still not enough memory in stackdepot so it would try to
+ * allocate memory again and loop forever.
*/
- if (check_recursive_alloc(&trace, _RET_IP_))
+ if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
return dummy_handle;
- handle = depot_save_stack(&trace, flags);
+ handle = stack_depot_save(entries, nr_entries, flags);
if (!handle)
handle = failure_handle;
return handle;
}
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
- depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
+void __reset_page_owner(struct page *page, unsigned int order)
+{
+ int i;
+ struct page_ext *page_ext;
+ depot_stack_handle_t handle = 0;
+ struct page_owner *page_owner;
+ u64 free_ts_nsec = local_clock();
+
+ handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+
+ page_ext = page_ext_get(page);
+ if (unlikely(!page_ext))
+ return;
+ for (i = 0; i < (1 << order); i++) {
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_owner = get_page_owner(page_ext);
+ page_owner->free_handle = handle;
+ page_owner->free_ts_nsec = free_ts_nsec;
+ page_ext = page_ext_next(page_ext);
+ }
+ page_ext_put(page_ext);
+}
+
+static inline void __set_page_owner_handle(struct page *page,
+ struct page_ext *page_ext, depot_stack_handle_t handle,
+ unsigned int order, gfp_t gfp_mask)
{
struct page_owner *page_owner;
+ int i;
- page_owner = get_page_owner(page_ext);
- page_owner->handle = handle;
- page_owner->order = order;
- page_owner->gfp_mask = gfp_mask;
- page_owner->last_migrate_reason = -1;
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ page_owner->handle = handle;
+ page_owner->order = order;
+ page_owner->gfp_mask = gfp_mask;
+ page_owner->last_migrate_reason = -1;
+ page_owner->pid = current->pid;
+ page_owner->ts_nsec = local_clock();
+ __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+ page_ext = page_ext_next(page_ext);
+ }
}
noinline void __set_page_owner(struct page *page, unsigned int order,
gfp_t gfp_mask)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext;
depot_stack_handle_t handle;
+ handle = save_stack(gfp_mask);
+
+ page_ext = page_ext_get(page);
if (unlikely(!page_ext))
return;
-
- handle = save_stack(gfp_mask);
- __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+ __set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
+ page_ext_put(page_ext);
}
+EXPORT_SYMBOL_GPL(__set_page_owner);
void __set_page_owner_migrate_reason(struct page *page, int reason)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = page_ext_get(page);
struct page_owner *page_owner;
if (unlikely(!page_ext))
@@ -206,31 +235,41 @@
page_owner = get_page_owner(page_ext);
page_owner->last_migrate_reason = reason;
+ page_ext_put(page_ext);
}
-void __split_page_owner(struct page *page, unsigned int order)
+void __split_page_owner(struct page *page, unsigned int nr)
{
int i;
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = page_ext_get(page);
struct page_owner *page_owner;
if (unlikely(!page_ext))
return;
- page_owner = get_page_owner(page_ext);
- page_owner->order = 0;
- for (i = 1; i < (1 << order); i++)
- __copy_page_owner(page, page + i);
+ for (i = 0; i < nr; i++) {
+ page_owner = get_page_owner(page_ext);
+ page_owner->order = 0;
+ page_ext = page_ext_next(page_ext);
+ }
+ page_ext_put(page_ext);
}
void __copy_page_owner(struct page *oldpage, struct page *newpage)
{
- struct page_ext *old_ext = lookup_page_ext(oldpage);
- struct page_ext *new_ext = lookup_page_ext(newpage);
+ struct page_ext *old_ext;
+ struct page_ext *new_ext;
struct page_owner *old_page_owner, *new_page_owner;
- if (unlikely(!old_ext || !new_ext))
+ old_ext = page_ext_get(oldpage);
+ if (unlikely(!old_ext))
return;
+
+ new_ext = page_ext_get(newpage);
+ if (unlikely(!new_ext)) {
+ page_ext_put(old_ext);
+ return;
+ }
old_page_owner = get_page_owner(old_ext);
new_page_owner = get_page_owner(new_ext);
@@ -239,6 +278,9 @@
new_page_owner->last_migrate_reason =
old_page_owner->last_migrate_reason;
new_page_owner->handle = old_page_owner->handle;
+ new_page_owner->pid = old_page_owner->pid;
+ new_page_owner->ts_nsec = old_page_owner->ts_nsec;
+ new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
/*
* We don't clear the bit on the oldpage as it's going to be freed
@@ -250,6 +292,9 @@
* the new page, which will be freed.
*/
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
+ page_ext_put(new_ext);
+ page_ext_put(old_ext);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -297,7 +342,7 @@
if (PageBuddy(page)) {
unsigned long freepage_order;
- freepage_order = page_order_unsafe(page);
+ freepage_order = buddy_order_unsafe(page);
if (freepage_order < MAX_ORDER)
pfn += (1UL << freepage_order) - 1;
continue;
@@ -306,16 +351,15 @@
if (PageReserved(page))
continue;
- page_ext = lookup_page_ext(page);
+ page_ext = page_ext_get(page);
if (unlikely(!page_ext))
continue;
- if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
- continue;
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
+ goto ext_put_continue;
page_owner = get_page_owner(page_ext);
- page_mt = gfpflags_to_migratetype(
- page_owner->gfp_mask);
+ page_mt = gfp_migratetype(page_owner->gfp_mask);
if (pageblock_mt != page_mt) {
if (is_migrate_cma(pageblock_mt))
count[MIGRATE_MOVABLE]++;
@@ -323,9 +367,12 @@
count[pageblock_mt]++;
pfn = block_end_pfn;
+ page_ext_put(page_ext);
break;
}
pfn += (1UL << page_owner->order) - 1;
+ext_put_continue:
+ page_ext_put(page_ext);
}
}
@@ -341,32 +388,28 @@
struct page *page, struct page_owner *page_owner,
depot_stack_handle_t handle)
{
- int ret;
- int pageblock_mt, page_mt;
+ int ret, pageblock_mt, page_mt;
+ unsigned long *entries;
+ unsigned int nr_entries;
char *kbuf;
- unsigned long entries[PAGE_OWNER_STACK_DEPTH];
- struct stack_trace trace = {
- .nr_entries = 0,
- .entries = entries,
- .max_entries = PAGE_OWNER_STACK_DEPTH,
- .skip = 0
- };
+ count = min_t(size_t, count, PAGE_SIZE);
kbuf = kmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
ret = snprintf(kbuf, count,
- "Page allocated via order %u, mask %#x(%pGg)\n",
+ "Page allocated via order %u, mask %#x(%pGg), pid %d, ts %llu ns, free_ts %llu ns\n",
page_owner->order, page_owner->gfp_mask,
- &page_owner->gfp_mask);
+ &page_owner->gfp_mask, page_owner->pid,
+ page_owner->ts_nsec, page_owner->free_ts_nsec);
if (ret >= count)
goto err;
/* Print information relevant to grouping pages by mobility */
pageblock_mt = get_pageblock_migratetype(page);
- page_mt = gfpflags_to_migratetype(page_owner->gfp_mask);
+ page_mt = gfp_migratetype(page_owner->gfp_mask);
ret += snprintf(kbuf + ret, count - ret,
"PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
pfn,
@@ -378,8 +421,8 @@
if (ret >= count)
goto err;
- depot_fetch_stack(handle, &trace);
- ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
+ nr_entries = stack_depot_fetch(handle, &entries);
+ ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0);
if (ret >= count)
goto err;
@@ -408,16 +451,11 @@
void __dump_page_owner(struct page *page)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = page_ext_get((void *)page);
struct page_owner *page_owner;
- unsigned long entries[PAGE_OWNER_STACK_DEPTH];
- struct stack_trace trace = {
- .nr_entries = 0,
- .entries = entries,
- .max_entries = PAGE_OWNER_STACK_DEPTH,
- .skip = 0
- };
depot_stack_handle_t handle;
+ unsigned long *entries;
+ unsigned int nr_entries;
gfp_t gfp_mask;
int mt;
@@ -428,28 +466,44 @@
page_owner = get_page_owner(page_ext);
gfp_mask = page_owner->gfp_mask;
- mt = gfpflags_to_migratetype(gfp_mask);
+ mt = gfp_migratetype(gfp_mask);
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
- pr_alert("page_owner info is not active (free page?)\n");
+ pr_alert("page_owner info is not present (never set?)\n");
+ page_ext_put(page_ext);
return;
}
+
+ if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
+ pr_alert("page_owner tracks the page as allocated\n");
+ else
+ pr_alert("page_owner tracks the page as freed\n");
+
+ pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu, free_ts %llu\n",
+ page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
+ page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec);
handle = READ_ONCE(page_owner->handle);
if (!handle) {
- pr_alert("page_owner info is not active (free page?)\n");
- return;
+ pr_alert("page_owner allocation stack trace missing\n");
+ } else {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries, 0);
}
- depot_fetch_stack(handle, &trace);
- pr_alert("PFN 0x%lx allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
- page_to_pfn(page),
- page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
- print_stack_trace(&trace, 0);
+ handle = READ_ONCE(page_owner->free_handle);
+ if (!handle) {
+ pr_alert("page_owner free stack trace missing\n");
+ } else {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ pr_alert("page last free stack trace:\n");
+ stack_trace_print(entries, nr_entries, 0);
+ }
if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n",
migrate_reason_names[page_owner->last_migrate_reason]);
+ page_ext_put(page_ext);
}
static ssize_t
@@ -476,6 +530,14 @@
/* Find an allocated page */
for (; pfn < max_pfn; pfn++) {
/*
+ * This temporary page_owner is required so
+ * that we can avoid the context switches while holding
+ * the rcu lock and copying the page owner information to
+ * user through copy_to_user() or GFP_KERNEL allocations.
+ */
+ struct page_owner page_owner_tmp;
+
+ /*
* If the new page is in a new MAX_ORDER_NR_PAGES area,
* validate the area as existing, skip it if not
*/
@@ -490,14 +552,14 @@
page = pfn_to_page(pfn);
if (PageBuddy(page)) {
- unsigned long freepage_order = page_order_unsafe(page);
+ unsigned long freepage_order = buddy_order_unsafe(page);
if (freepage_order < MAX_ORDER)
pfn += (1UL << freepage_order) - 1;
continue;
}
- page_ext = lookup_page_ext(page);
+ page_ext = page_ext_get(page);
if (unlikely(!page_ext))
continue;
@@ -506,9 +568,23 @@
* because we don't hold the zone lock.
*/
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
- continue;
+ goto ext_put_continue;
+
+ /*
+ * Although we do have the info about past allocation of free
+ * pages, it's not relevant for current memory usage.
+ */
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
+ goto ext_put_continue;
page_owner = get_page_owner(page_ext);
+
+ /*
+ * Don't print "tail" pages of high-order allocations as that
+ * would inflate the stats.
+ */
+ if (!IS_ALIGNED(pfn, 1 << page_owner->order))
+ goto ext_put_continue;
/*
* Access to page_ext->handle isn't synchronous so we should
@@ -516,13 +592,17 @@
*/
handle = READ_ONCE(page_owner->handle);
if (!handle)
- continue;
+ goto ext_put_continue;
/* Record the next PFN to read in the file offset */
*ppos = (pfn - min_low_pfn) + 1;
+ page_owner_tmp = *page_owner;
+ page_ext_put(page_ext);
return print_page_owner(buf, count, pfn, page,
- page_owner, handle);
+ &page_owner_tmp, handle);
+ext_put_continue:
+ page_ext_put(page_ext);
}
return 0;
@@ -570,7 +650,7 @@
* heavy lock contention.
*/
if (PageBuddy(page)) {
- unsigned long order = page_order_unsafe(page);
+ unsigned long order = buddy_order_unsafe(page);
if (order > 0 && order < MAX_ORDER)
pfn += (1UL << order) - 1;
@@ -580,17 +660,20 @@
if (PageReserved(page))
continue;
- page_ext = lookup_page_ext(page);
+ page_ext = page_ext_get(page);
if (unlikely(!page_ext))
continue;
/* Maybe overlapping zone */
if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
- continue;
+ goto ext_put_continue;
/* Found early allocated page */
- __set_page_owner_handle(page_ext, early_handle, 0, 0);
+ __set_page_owner_handle(page, page_ext, early_handle,
+ 0, 0);
count++;
+ext_put_continue:
+ page_ext_put(page_ext);
}
cond_resched();
}
@@ -626,16 +709,14 @@
static int __init pageowner_init(void)
{
- struct dentry *dentry;
-
if (!static_branch_unlikely(&page_owner_inited)) {
pr_info("page_owner is disabled\n");
return 0;
}
- dentry = debugfs_create_file("page_owner", 0400, NULL,
- NULL, &proc_page_owner_operations);
+ debugfs_create_file("page_owner", 0400, NULL, NULL,
+ &proc_page_owner_operations);
- return PTR_ERR_OR_ZERO(dentry);
+ return 0;
}
late_initcall(pageowner_init)
--
Gitblit v1.6.2