From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/mm/vmstat.c | 271 +++++++++++++++++++++++++++++------------------------
1 files changed, 148 insertions(+), 123 deletions(-)
diff --git a/kernel/mm/vmstat.c b/kernel/mm/vmstat.c
index 444915b..604a993 100644
--- a/kernel/mm/vmstat.c
+++ b/kernel/mm/vmstat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/mm/vmstat.c
*
@@ -75,7 +76,7 @@
static DEFINE_MUTEX(vm_numa_stat_lock);
int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
+ void *buffer, size_t *length, loff_t *ppos)
{
int ret, oldval;
@@ -227,7 +228,7 @@
* 125 1024 10 16-32 GB 9
*/
- mem = zone->managed_pages >> (27 - PAGE_SHIFT);
+ mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
@@ -320,17 +321,15 @@
long x;
long t;
- preempt_disable_rt();
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
- if (unlikely(x > t || x < -t)) {
+ if (unlikely(abs(x) > t)) {
zone_page_state_add(x, zone, item);
x = 0;
}
__this_cpu_write(*p, x);
- preempt_enable_rt();
}
EXPORT_SYMBOL(__mod_zone_page_state);
@@ -342,17 +341,20 @@
long x;
long t;
- preempt_disable_rt();
+ if (vmstat_item_in_bytes(item)) {
+ VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
+ delta >>= PAGE_SHIFT;
+ }
+
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
- if (unlikely(x > t || x < -t)) {
+ if (unlikely(abs(x) > t)) {
node_page_state_add(x, pgdat, item);
x = 0;
}
__this_cpu_write(*p, x);
- preempt_enable_rt();
}
EXPORT_SYMBOL(__mod_node_page_state);
@@ -385,7 +387,6 @@
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
- preempt_disable_rt();
v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) {
@@ -394,7 +395,6 @@
zone_page_state_add(v + overstep, zone, item);
__this_cpu_write(*p, -overstep);
}
- preempt_enable_rt();
}
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -403,7 +403,8 @@
s8 __percpu *p = pcp->vm_node_stat_diff + item;
s8 v, t;
- preempt_disable_rt();
+ VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
+
v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) {
@@ -412,7 +413,6 @@
node_page_state_add(v + overstep, pgdat, item);
__this_cpu_write(*p, -overstep);
}
- preempt_enable_rt();
}
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -433,7 +433,6 @@
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
- preempt_disable_rt();
v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) {
@@ -442,7 +441,6 @@
zone_page_state_add(v - overstep, zone, item);
__this_cpu_write(*p, overstep);
}
- preempt_enable_rt();
}
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
@@ -451,7 +449,8 @@
s8 __percpu *p = pcp->vm_node_stat_diff + item;
s8 v, t;
- preempt_disable_rt();
+ VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
+
v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) {
@@ -460,7 +459,6 @@
node_page_state_add(v - overstep, pgdat, item);
__this_cpu_write(*p, overstep);
}
- preempt_enable_rt();
}
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -513,7 +511,7 @@
o = this_cpu_read(*p);
n = delta + o;
- if (n > t || n < -t) {
+ if (abs(n) > t) {
int os = overstep_mode * (t >> 1) ;
/* Overflow must be added to zone counters */
@@ -552,6 +550,11 @@
s8 __percpu *p = pcp->vm_node_stat_diff + item;
long o, n, t, z;
+ if (vmstat_item_in_bytes(item)) {
+ VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
+ delta >>= PAGE_SHIFT;
+ }
+
do {
z = 0; /* overflow to node counters */
@@ -570,7 +573,7 @@
o = this_cpu_read(*p);
n = delta + o;
- if (n > t || n < -t) {
+ if (abs(n) > t) {
int os = overstep_mode * (t >> 1) ;
/* Overflow must be added to node counters */
@@ -1000,8 +1003,8 @@
/*
* Determine the per node value of a stat item.
*/
-unsigned long node_page_state(struct pglist_data *pgdat,
- enum node_stat_item item)
+unsigned long node_page_state_pages(struct pglist_data *pgdat,
+ enum node_stat_item item)
{
long x = atomic_long_read(&pgdat->vm_stat[item]);
#ifdef CONFIG_SMP
@@ -1009,6 +1012,14 @@
x = 0;
#endif
return x;
+}
+
+unsigned long node_page_state(struct pglist_data *pgdat,
+ enum node_stat_item item)
+{
+ VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
+
+ return node_page_state_pages(pgdat, item);
}
#endif
@@ -1085,6 +1096,24 @@
return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
}
+/*
+ * Calculates external fragmentation within a zone wrt the given order.
+ * It is defined as the percentage of pages found in blocks of size
+ * less than 1 << order. It returns values in range [0, 100].
+ */
+unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
+{
+ struct contig_page_info info;
+
+ fill_contig_page_info(zone, order, &info);
+ if (info.free_pages == 0)
+ return 0;
+
+ return div_u64((info.free_pages -
+ (info.free_blocks_suitable << order)) * 100,
+ info.free_pages);
+}
+
/* Same as __fragmentation index but allocs contig_page_info on stack */
int fragmentation_index(struct zone *zone, unsigned int order)
{
@@ -1095,7 +1124,8 @@
}
#endif
-#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
+ defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
#ifdef CONFIG_ZONE_DMA
#define TEXT_FOR_DMA(xx) xx "_dma",
#else
@@ -1118,7 +1148,7 @@
TEXT_FOR_HIGHMEM(xx) xx "_movable",
const char * const vmstat_text[] = {
- /* enum zone_stat_item countes */
+ /* enum zone_stat_item counters */
"nr_free_pages",
"nr_zone_inactive_anon",
"nr_zone_active_anon",
@@ -1128,14 +1158,8 @@
"nr_zone_write_pending",
"nr_mlock",
"nr_page_table_pages",
- "nr_kernel_stack",
-#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
- "nr_shadow_call_stack_bytes",
-#endif
"nr_bounce",
-#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
-#endif
"nr_free_cma",
/* enum numa_stat_item counters */
@@ -1148,7 +1172,7 @@
"numa_other",
#endif
- /* Node-based counters */
+ /* enum node_stat_item counters */
"nr_inactive_anon",
"nr_active_anon",
"nr_inactive_file",
@@ -1158,9 +1182,13 @@
"nr_slab_unreclaimable",
"nr_isolated_anon",
"nr_isolated_file",
- "workingset_refault",
- "workingset_activate",
- "workingset_restore",
+ "workingset_nodes",
+ "workingset_refault_anon",
+ "workingset_refault_file",
+ "workingset_activate_anon",
+ "workingset_activate_file",
+ "workingset_restore_anon",
+ "workingset_restore_file",
"workingset_nodereclaim",
"nr_anon_pages",
"nr_mapped",
@@ -1171,28 +1199,29 @@
"nr_shmem",
"nr_shmem_hugepages",
"nr_shmem_pmdmapped",
+ "nr_file_hugepages",
+ "nr_file_pmdmapped",
"nr_anon_transparent_hugepages",
- "nr_unstable",
"nr_vmscan_write",
"nr_vmscan_immediate_reclaim",
"nr_dirtied",
"nr_written",
"nr_kernel_misc_reclaimable",
- "nr_unreclaimable_pages",
+ "nr_foll_pin_acquired",
+ "nr_foll_pin_released",
+ "nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack",
+#endif
-
- "nr_ion_heap",
- "nr_ion_heap_pool",
- "nr_gpu_heap",
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
"nr_dirty_background_threshold",
-#ifdef CONFIG_VM_EVENT_COUNTERS
+#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
/* enum vm_event_item counters */
"pgpgin",
"pgpgout",
- "pgpgoutclean",
"pswpin",
"pswpout",
@@ -1210,11 +1239,16 @@
"pglazyfreed",
"pgrefill",
+ "pgreuse",
"pgsteal_kswapd",
"pgsteal_direct",
"pgscan_kswapd",
"pgscan_direct",
"pgscan_direct_throttle",
+ "pgscan_anon",
+ "pgscan_file",
+ "pgsteal_anon",
+ "pgsteal_file",
#ifdef CONFIG_NUMA
"zone_reclaim_failed",
@@ -1242,6 +1276,9 @@
#ifdef CONFIG_MIGRATION
"pgmigrate_success",
"pgmigrate_fail",
+ "thp_migration_success",
+ "thp_migration_fail",
+ "thp_migration_split",
#endif
#ifdef CONFIG_COMPACTION
"compact_migrate_scanned",
@@ -1259,6 +1296,10 @@
"htlb_buddy_alloc_success",
"htlb_buddy_alloc_fail",
#endif
+#ifdef CONFIG_CMA
+ "cma_alloc_success",
+ "cma_alloc_fail",
+#endif
"unevictable_pgs_culled",
"unevictable_pgs_scanned",
"unevictable_pgs_rescued",
@@ -1270,9 +1311,12 @@
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
"thp_fault_alloc",
"thp_fault_fallback",
+ "thp_fault_fallback_charge",
"thp_collapse_alloc",
"thp_collapse_alloc_failed",
"thp_file_alloc",
+ "thp_file_fallback",
+ "thp_file_fallback_charge",
"thp_file_mapped",
"thp_split_page",
"thp_split_page_failed",
@@ -1308,9 +1352,13 @@
"swap_ra",
"swap_ra_hit",
#endif
-#endif /* CONFIG_VM_EVENTS_COUNTERS */
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+ "speculative_pgfault",
+ "speculative_pgfault_file"
+#endif
+#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
};
-#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
defined(CONFIG_PROC_FS)
@@ -1400,12 +1448,26 @@
unsigned long freecount = 0;
struct free_area *area;
struct list_head *curr;
+ bool overflow = false;
area = &(zone->free_area[order]);
- list_for_each(curr, &area->free_list[mtype])
- freecount++;
- seq_printf(m, "%6lu ", freecount);
+ list_for_each(curr, &area->free_list[mtype]) {
+ /*
+ * Cap the free_list iteration because it might
+ * be really large and we are under a spinlock
+ * so a long time spent here could trigger a
+ * hard lockup detector. Anyway this is a
+ * debugging tool so knowing there is a handful
+ * of pages of this order should be more than
+ * sufficient.
+ */
+ if (++freecount >= 100000) {
+ overflow = true;
+ break;
+ }
+ }
+ seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
spin_unlock_irq(&zone->lock);
cond_resched();
spin_lock_irq(&zone->lock);
@@ -1445,10 +1507,6 @@
page = pfn_to_online_page(pfn);
if (!page)
- continue;
-
- /* Watch for unexpected holes punched in the memmap */
- if (!memmap_valid_within(pfn, page, zone))
continue;
if (page_zone(page) != zone)
@@ -1567,14 +1625,8 @@
if (is_zone_first_populated(pgdat, zone)) {
seq_printf(m, "\n per-node stats");
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
- /* Skip hidden vmstat items. */
- if (*vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS] == '\0')
- continue;
- seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS],
- node_page_state(pgdat, i));
+ seq_printf(m, "\n %-12s %lu", node_stat_name(i),
+ node_page_state_pages(pgdat, i));
}
}
seq_printf(m,
@@ -1584,14 +1636,16 @@
"\n high %lu"
"\n spanned %lu"
"\n present %lu"
- "\n managed %lu",
+ "\n managed %lu"
+ "\n cma %lu",
zone_page_state(zone, NR_FREE_PAGES),
min_wmark_pages(zone),
low_wmark_pages(zone),
high_wmark_pages(zone),
zone->spanned_pages,
zone->present_pages,
- zone->managed_pages);
+ zone_managed_pages(zone),
+ zone_cma_pages(zone));
seq_printf(m,
"\n protection: (%ld",
@@ -1607,14 +1661,13 @@
}
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- seq_printf(m, "\n %-12s %lu", vmstat_text[i],
- zone_page_state(zone, i));
+ seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
+ zone_page_state(zone, i));
#ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
- zone_numa_state_snapshot(zone, i));
+ seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
+ zone_numa_state_snapshot(zone, i));
#endif
seq_printf(m, "\n pagesets");
@@ -1665,29 +1718,23 @@
.show = zoneinfo_show,
};
-enum writeback_stat_item {
- NR_DIRTY_THRESHOLD,
- NR_DIRTY_BG_THRESHOLD,
- NR_VM_WRITEBACK_STAT_ITEMS,
-};
+#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
+ NR_VM_NUMA_STAT_ITEMS + \
+ NR_VM_NODE_STAT_ITEMS + \
+ NR_VM_WRITEBACK_STAT_ITEMS + \
+ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
+ NR_VM_EVENT_ITEMS : 0))
static void *vmstat_start(struct seq_file *m, loff_t *pos)
{
unsigned long *v;
- int i, stat_items_size;
+ int i;
- if (*pos >= ARRAY_SIZE(vmstat_text))
+ if (*pos >= NR_VMSTAT_ITEMS)
return NULL;
- stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
-#ifdef CONFIG_VM_EVENT_COUNTERS
- stat_items_size += sizeof(struct vm_event_state);
-#endif
-
- v = kmalloc(stat_items_size, GFP_KERNEL);
+ BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+ v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
m->private = v;
if (!v)
return ERR_PTR(-ENOMEM);
@@ -1702,7 +1749,7 @@
#endif
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
- v[i] = global_node_page_state(i);
+ v[i] = global_node_page_state_pages(i);
v += NR_VM_NODE_STAT_ITEMS;
global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
@@ -1720,10 +1767,7 @@
static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
{
(*pos)++;
- //nr_gpu_heap is out-of-tree now so we don't want to export it.
- if (*pos == NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_STAT_ITEMS + NR_GPU_HEAP)
- (*pos)++;
- if (*pos >= ARRAY_SIZE(vmstat_text))
+ if (*pos >= NR_VMSTAT_ITEMS)
return NULL;
return (unsigned long *)m->private + *pos;
}
@@ -1736,6 +1780,14 @@
seq_puts(m, vmstat_text[off]);
seq_put_decimal_ull(m, " ", *l);
seq_putc(m, '\n');
+
+ if (off == NR_VMSTAT_ITEMS - 1) {
+ /*
+ * We've come to the end - add any deprecated counters to avoid
+ * breaking userspace which might depend on them being present.
+ */
+ seq_puts(m, "nr_unstable 0\n");
+ }
return 0;
}
@@ -1764,7 +1816,7 @@
}
int vmstat_refresh(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
long val;
int err;
@@ -1789,7 +1841,7 @@
val = atomic_long_read(&vm_zone_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
- __func__, vmstat_text[i], val);
+ __func__, zone_stat_name(i), val);
err = -EINVAL;
}
}
@@ -1798,7 +1850,7 @@
val = atomic_long_read(&vm_numa_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
- __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
+ __func__, numa_stat_name(i), val);
err = -EINVAL;
}
}
@@ -2068,24 +2120,14 @@
return 0;
}
-static const struct seq_operations unusable_op = {
+static const struct seq_operations unusable_sops = {
.start = frag_start,
.next = frag_next,
.stop = frag_stop,
.show = unusable_show,
};
-static int unusable_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &unusable_op);
-}
-
-static const struct file_operations unusable_file_ops = {
- .open = unusable_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(unusable);
static void extfrag_show_print(struct seq_file *m,
pg_data_t *pgdat, struct zone *zone)
@@ -2120,45 +2162,28 @@
return 0;
}
-static const struct seq_operations extfrag_op = {
+static const struct seq_operations extfrag_sops = {
.start = frag_start,
.next = frag_next,
.stop = frag_stop,
.show = extfrag_show,
};
-static int extfrag_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &extfrag_op);
-}
-
-static const struct file_operations extfrag_file_ops = {
- .open = extfrag_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(extfrag);
static int __init extfrag_debug_init(void)
{
struct dentry *extfrag_debug_root;
extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
- if (!extfrag_debug_root)
- return -ENOMEM;
- if (!debugfs_create_file("unusable_index", 0444,
- extfrag_debug_root, NULL, &unusable_file_ops))
- goto fail;
+ debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
+ &unusable_fops);
- if (!debugfs_create_file("extfrag_index", 0444,
- extfrag_debug_root, NULL, &extfrag_file_ops))
- goto fail;
+ debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
+ &extfrag_fops);
return 0;
-fail:
- debugfs_remove_recursive(extfrag_debug_root);
- return -ENOMEM;
}
module_init(extfrag_debug_init);
--
Gitblit v1.6.2