From 23fa18eaa71266feff7ba8d83022d9e1cc83c65a Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 07:42:03 +0000 Subject: [PATCH] disable pwm7 --- kernel/include/linux/mmzone.h | 547 +++++++++++++++++++++++++++++++++--------------------- 1 files changed, 331 insertions(+), 216 deletions(-) diff --git a/kernel/include/linux/mmzone.h b/kernel/include/linux/mmzone.h index 96d614a..f611a06 100644 --- a/kernel/include/linux/mmzone.h +++ b/kernel/include/linux/mmzone.h @@ -18,6 +18,8 @@ #include <linux/pageblock-flags.h> #include <linux/page-flags-layout.h> #include <linux/atomic.h> +#include <linux/mm_types.h> +#include <linux/page-flags.h> #include <linux/android_kabi.h> #include <asm/page.h> @@ -36,6 +38,8 @@ * will not. */ #define PAGE_ALLOC_COSTLY_ORDER 3 + +#define MAX_KSWAPD_THREADS 16 enum migratetype { MIGRATE_UNMOVABLE, @@ -66,7 +70,7 @@ }; /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ -extern char * const migratetype_names[MIGRATE_TYPES]; +extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) @@ -89,17 +93,27 @@ extern int page_group_by_mobility_disabled; -#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) -#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) +#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1) #define get_pageblock_migratetype(page) \ - get_pfnblock_flags_mask(page, page_to_pfn(page), \ - PB_migrate_end, MIGRATETYPE_MASK) + get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; }; + +static inline struct page *get_page_from_free_area(struct free_area *area, + int migratetype) +{ + return list_first_entry_or_null(&area->free_list[migratetype], + struct page, lru); +} + +static inline bool free_area_empty(struct free_area *area, int migratetype) +{ + return list_empty(&area->free_list[migratetype]); +} struct pglist_data; @@ -144,15 +158,9 @@ NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_PAGETABLE, /* used for pagetables */ - NR_KERNEL_STACK_KB, /* measured in KiB */ -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) - NR_KERNEL_SCS_BYTES, /* measured in bytes */ -#endif /* Second 128 byte cacheline */ NR_BOUNCE, -#if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ -#endif NR_FREE_CMA_PAGES, NR_VM_ZONE_STAT_ITEMS }; @@ -163,13 +171,20 @@ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ - NR_SLAB_RECLAIMABLE, - NR_SLAB_UNRECLAIMABLE, + NR_SLAB_RECLAIMABLE_B, + NR_SLAB_UNRECLAIMABLE_B, NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ - WORKINGSET_REFAULT, - WORKINGSET_ACTIVATE, - WORKINGSET_RESTORE, + WORKINGSET_NODES, + WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_FILE, + WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_FILE, + WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_FILE, WORKINGSET_NODERECLAIM, NR_ANON_MAPPED, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. @@ -181,19 +196,42 @@ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, NR_SHMEM_PMDMAPPED, + NR_FILE_THPS, + NR_FILE_PMDMAPPED, NR_ANON_THPS, - NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_VMSCAN_WRITE, NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ - NR_UNRECLAIMABLE_PAGES, - NR_ION_HEAP, - NR_ION_HEAP_POOL, - NR_GPU_HEAP, + NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ + NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ + NR_KERNEL_STACK_KB, /* measured in KiB */ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + NR_KERNEL_SCS_KB, /* measured in KiB */ +#endif NR_VM_NODE_STAT_ITEMS }; + +/* + * Returns true if the value is measured in bytes (most vmstat values are + * measured in pages). This defines the API part, the internal representation + * might be different. + */ +static __always_inline bool vmstat_item_in_bytes(int idx) +{ + /* + * Global and per-node slab counters track slab pages. + * It's expected that changes are multiples of PAGE_SIZE. + * Internally values are stored in pages. + * + * Per-memcg and per-lruvec counters track memory, consumed + * by individual slab objects. These counters are actually + * byte-precise. + */ + return (idx == NR_SLAB_RECLAIMABLE_B || + idx == NR_SLAB_UNRECLAIMABLE_B); +} /* * We do arithmetic on the LRU lists in various places in the code, @@ -221,47 +259,45 @@ #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) -static inline int is_file_lru(enum lru_list lru) +static inline bool is_file_lru(enum lru_list lru) { return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); } -static inline int is_active_lru(enum lru_list lru) +static inline bool is_active_lru(enum lru_list lru) { return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } -struct zone_reclaim_stat { - /* - * The pageout code in vmscan.c keeps track of how many of the - * mem/swap backed and file backed pages are referenced. - * The higher the rotated/scanned ratio, the more valuable - * that cache is. - * - * The anon LRU stats live in [0], file LRU stats in [1] - */ - unsigned long recent_rotated[2]; - unsigned long recent_scanned[2]; +#define ANON_AND_FILE 2 + +enum lruvec_flags { + LRUVEC_CONGESTED, /* lruvec has many dirty pages + * backed by a congested BDI + */ }; struct lruvec { struct list_head lists[NR_LRU_LISTS]; - struct zone_reclaim_stat reclaim_stat; - /* Evictions & activations on the inactive file list */ - atomic_long_t inactive_age; + /* + * These track the cost of reclaiming one LRU - file or anon - + * over the other. As the observed cost of reclaiming one LRU + * increases, the reclaim scan balance tips toward the other. + */ + unsigned long anon_cost; + unsigned long file_cost; + /* Non-resident age, driven by LRU movement */ + atomic_long_t nonresident_age; /* Refaults at the time of last reclaim cycle */ - unsigned long refaults; + unsigned long refaults[ANON_AND_FILE]; + /* Various lruvec state flags (enum lruvec_flags) */ + unsigned long flags; #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif }; -/* Mask used at gathering information at once (see memcontrol.c) */ -#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) -#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) -#define LRU_ALL ((1 << NR_LRU_LISTS) - 1) - -/* Isolate unmapped file */ +/* Isolate unmapped pages */ #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) @@ -278,9 +314,10 @@ NR_WMARK }; -#define min_wmark_pages(z) (z->watermark[WMARK_MIN]) -#define low_wmark_pages(z) (z->watermark[WMARK_LOW]) -#define high_wmark_pages(z) (z->watermark[WMARK_HIGH]) +#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) +#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) +#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) +#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) struct per_cpu_pages { int count; /* number of pages in the list */ @@ -311,33 +348,20 @@ #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { -#ifdef CONFIG_ZONE_DMA /* - * ZONE_DMA is used when there are devices that are not able - * to do DMA to all of addressable memory (ZONE_NORMAL). Then we - * carve out the portion of memory that is needed for these devices. - * The range is arch specific. - * - * Some examples - * - * Architecture Limit - * --------------------------- - * parisc, ia64, sparc <4G - * s390 <2G - * arm Various - * alpha Unlimited or 0-16MB. - * - * i386, x86_64 and multiple other arches - * <16M. + * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able + * to DMA to all of the addressable memory (ZONE_NORMAL). + * On architectures where this area covers the whole 32 bit address + * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller + * DMA addressing constraints. This distinction is important as a 32bit + * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit + * platforms may need both zones as they support peripherals with + * different DMA addressing limitations. */ +#ifdef CONFIG_ZONE_DMA ZONE_DMA, #endif #ifdef CONFIG_ZONE_DMA32 - /* - * x86_64 needs two ZONE_DMAs because it supports devices that are - * only able to do DMA to the lower 16M but also 32 bit devices that - * can only do DMA areas below 4G. - */ ZONE_DMA32, #endif /* @@ -357,6 +381,41 @@ */ ZONE_HIGHMEM, #endif + /* + * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains + * movable pages with few exceptional cases described below. Main use + * cases for ZONE_MOVABLE are to make memory offlining/unplug more + * likely to succeed, and to locally limit unmovable allocations - e.g., + * to increase the number of THP/huge pages. Notable special cases are: + * + * 1. Pinned pages: (long-term) pinning of movable pages might + * essentially turn such pages unmovable. Memory offlining might + * retry a long time. + * 2. memblock allocations: kernelcore/movablecore setups might create + * situations where ZONE_MOVABLE contains unmovable allocations + * after boot. Memory offlining and allocations fail early. + * 3. Memory holes: kernelcore/movablecore setups might create very rare + * situations where ZONE_MOVABLE contains memory holes after boot, + * for example, if we have sections that are only partially + * populated. Memory offlining and allocations fail early. + * 4. PG_hwpoison pages: while poisoned pages can be skipped during + * memory offlining, such pages cannot be allocated. + * 5. Unmovable PG_offline pages: in paravirtualized environments, + * hotplugged memory blocks might only partially be managed by the + * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The + * parts not manged by the buddy are unmovable PG_offline pages. In + * some cases (virtio-mem), such pages can be skipped during + * memory offlining, however, cannot be moved/allocated. These + * techniques might use alloc_contig_range() to hide previously + * exposed pages from the buddy again (e.g., to implement some sort + * of memory unplug in virtio-mem). + * + * In general, no unmovable allocations that degrade memory offlining + * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) + * have to expect that migrating pages in ZONE_MOVABLE can fail (even + * if has_unmovable_pages() states that there are no unmovable pages, + * there can be false negatives). + */ ZONE_MOVABLE, #ifdef CONFIG_ZONE_DEVICE ZONE_DEVICE, @@ -367,11 +426,14 @@ #ifndef __GENERATING_BOUNDS_H +#define ASYNC_AND_SYNC 2 + struct zone { /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ - unsigned long watermark[NR_WMARK]; + unsigned long _watermark[NR_WMARK]; + unsigned long watermark_boost; unsigned long nr_reserved_highatomic; @@ -386,15 +448,11 @@ */ long lowmem_reserve[MAX_NR_ZONES]; -#ifdef CONFIG_NUMA +#ifdef CONFIG_NEED_MULTIPLE_NODES int node; #endif struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; - -#ifdef CONFIG_CMA - bool cma_alloc; -#endif #ifndef CONFIG_SPARSEMEM /* @@ -421,6 +479,9 @@ * bootmem allocator): * managed_pages = present_pages - reserved_pages; * + * cma pages is present pages that are assigned for CMA use + * (MIGRATE_CMA). + * * So present_pages may be used by memory hotplug or memory power * management logic to figure out unmanaged pages by checking * (present_pages - managed_pages). And managed_pages should be used @@ -441,16 +502,13 @@ * Write access to present_pages at runtime should be protected by * mem_hotplug_begin/end(). Any reader who can't tolerant drift of * present_pages should get_online_mems() to get a stable value. - * - * Read access to managed_pages should be safe because it's unsigned - * long. Write access to zone->managed_pages and totalram_pages are - * protected by managed_page_count_lock at runtime. Idealy only - * adjust_managed_page_count() should be used instead of directly - * touching zone->managed_pages and totalram_pages. */ - unsigned long managed_pages; + atomic_long_t managed_pages; unsigned long spanned_pages; unsigned long present_pages; +#ifdef CONFIG_CMA + unsigned long cma_pages; +#endif const char *name; @@ -495,8 +553,10 @@ #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* pfn where compaction free scanner should start */ unsigned long compact_cached_free_pfn; - /* pfn where async and sync compaction migration scanner should start */ - unsigned long compact_cached_migrate_pfn[2]; + /* pfn where compaction migration scanner should start */ + unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC]; + unsigned long compact_init_migrate_pfn; + unsigned long compact_init_free_pfn; #endif #ifdef CONFIG_COMPACTION @@ -504,6 +564,7 @@ * On compaction failure, 1<<compact_defer_shift compactions * are skipped before trying again. The number attempted since * last failure is tracked with compact_considered. + * compact_order_failed is the minimum compaction failed order. */ unsigned int compact_considered; unsigned int compact_defer_shift; @@ -529,9 +590,6 @@ } ____cacheline_internodealigned_in_smp; enum pgdat_flags { - PGDAT_CONGESTED, /* pgdat has many dirty pages backed by - * a congested BDI - */ PGDAT_DIRTY, /* reclaim scanning has recently found * many dirty file pages at the tail * of the LRU. @@ -541,6 +599,26 @@ */ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ }; + +enum zone_flags { + ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks. + * Cleared when kswapd is woken. + */ +}; + +static inline unsigned long zone_managed_pages(struct zone *zone) +{ + return (unsigned long)atomic_long_read(&zone->managed_pages); +} + +static inline unsigned long zone_cma_pages(struct zone *zone) +{ +#ifdef CONFIG_CMA + return zone->cma_pages; +#else + return 0; +#endif +} static inline unsigned long zone_end_pfn(const struct zone *zone) { @@ -632,6 +710,14 @@ extern struct page *mem_map; #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +struct deferred_split { + spinlock_t split_queue_lock; + struct list_head split_queue; + unsigned long split_queue_len; +}; +#endif + /* * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. On UMA machines there is a single pglist_data which @@ -640,24 +726,32 @@ * Memory statistics and page replacement data structures are maintained on a * per-zone basis. */ -struct bootmem_data; typedef struct pglist_data { + /* + * node_zones contains just the zones for THIS node. Not all of the + * zones may be populated, but it is the full list. It is referenced by + * this node's node_zonelists as well as other node's node_zonelists. + */ struct zone node_zones[MAX_NR_ZONES]; + + /* + * node_zonelists contains references to all zones in all nodes. + * Generally the first zones will be references to this node's + * node_zones. + */ struct zonelist node_zonelists[MAX_ZONELISTS]; - int nr_zones; + + int nr_zones; /* number of populated zones in this node */ #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; #ifdef CONFIG_PAGE_EXTENSION struct page_ext *node_page_ext; #endif #endif -#ifndef CONFIG_NO_BOOTMEM - struct bootmem_data *bdata; -#endif #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* - * Must be held any time you expect node_start_pfn, node_present_pages - * or node_spanned_pages stay constant. + * Must be held any time you expect node_start_pfn, + * node_present_pages, node_spanned_pages or nr_zones to stay constant. * Also synchronizes pgdat->first_deferred_pfn during deferred page * init. * @@ -678,16 +772,19 @@ wait_queue_head_t pfmemalloc_wait; struct task_struct *kswapd; /* Protected by mem_hotplug_begin/end() */ + struct task_struct *mkswapd[MAX_KSWAPD_THREADS]; int kswapd_order; - enum zone_type kswapd_classzone_idx; + enum zone_type kswapd_highest_zoneidx; int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + ANDROID_OEM_DATA(1); #ifdef CONFIG_COMPACTION int kcompactd_max_order; - enum zone_type kcompactd_classzone_idx; + enum zone_type kcompactd_highest_zoneidx; wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; + bool proactive_compact_trigger; #endif /* * This is a per-node reserve of pages that are not available @@ -697,7 +794,7 @@ #ifdef CONFIG_NUMA /* - * zone reclaim becomes active if more unmapped pages exist. + * node reclaim becomes active if more unmapped pages exist. */ unsigned long min_unmapped_pages; unsigned long min_slab_pages; @@ -713,18 +810,20 @@ * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; - /* Number of non-deferred pages */ - unsigned long static_init_pgcnt; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE - spinlock_t split_queue_lock; - struct list_head split_queue; - unsigned long split_queue_len; + struct deferred_split deferred_split_queue; #endif /* Fields commonly accessed by the page reclaim scanner */ - struct lruvec lruvec; + + /* + * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED. + * + * Use mem_cgroup_lruvec() to look up lruvecs. + */ + struct lruvec __lruvec; unsigned long flags; @@ -746,15 +845,6 @@ #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) -static inline spinlock_t *zone_lru_lock(struct zone *zone) -{ - return &zone->zone_pgdat->lru_lock; -} - -static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) -{ - return &pgdat->lruvec; -} static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { @@ -770,15 +860,15 @@ void build_all_zonelists(pg_data_t *pgdat); void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, - enum zone_type classzone_idx); + enum zone_type highest_zoneidx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, - int classzone_idx, unsigned int alloc_flags, + int highest_zoneidx, unsigned int alloc_flags, long free_pages); bool zone_watermark_ok(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx, + unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx); + unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. @@ -798,17 +888,11 @@ #ifdef CONFIG_MEMCG return lruvec->pgdat; #else - return container_of(lruvec, struct pglist_data, lruvec); + return container_of(lruvec, struct pglist_data, __lruvec); #endif } extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); - -#ifdef CONFIG_HAVE_MEMORY_PRESENT -void memory_present(int nid, unsigned long start, unsigned long end); -#else -static inline void memory_present(int nid, unsigned long start, unsigned long end) {} -#endif #ifdef CONFIG_HAVE_MEMORYLESS_NODES int local_memory_node(int node_id); @@ -821,18 +905,6 @@ */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) -#ifdef CONFIG_ZONE_DEVICE -static inline bool is_dev_zone(const struct zone *zone) -{ - return zone_idx(zone) == ZONE_DEVICE; -} -#else -static inline bool is_dev_zone(const struct zone *zone) -{ - return false; -} -#endif - /* * Returns true if a zone has pages managed by the buddy allocator. * All the reclaim decisions have to use this function rather than @@ -841,7 +913,7 @@ */ static inline bool managed_zone(struct zone *zone) { - return zone->managed_pages; + return zone_managed_pages(zone); } /* Returns true if a zone has memory */ @@ -850,7 +922,7 @@ return zone->present_pages; } -#ifdef CONFIG_NUMA +#ifdef CONFIG_NEED_MULTIPLE_NODES static inline int zone_to_nid(struct zone *zone) { return zone->node; @@ -874,7 +946,7 @@ #ifdef CONFIG_HIGHMEM static inline int zone_movable_is_highmem(void) { -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +#ifdef CONFIG_NEED_MULTIPLE_NODES return movable_zone == ZONE_HIGHMEM; #else return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; @@ -892,8 +964,17 @@ #endif } +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void); +#else +static inline bool has_managed_dma(void) +{ + return false; +} +#endif + /** - * is_highmem - helper function to quickly check if a struct zone is a + * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. * @zone - pointer to struct zone variable @@ -909,22 +990,23 @@ /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int numa_zonelist_order_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); +extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, + void *, size_t *, loff_t *); +int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, + void *, size_t *, loff_t *); +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, + void *, size_t *, loff_t *); +int numa_zonelist_order_handler(struct ctl_table *, int, + void *, size_t *, loff_t *); +extern int percpu_pagelist_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 @@ -943,6 +1025,7 @@ extern struct pglist_data *first_online_pgdat(void); extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); extern struct zone *next_zone(struct zone *zone); +extern int isolate_anon_lru_page(struct page *page); /** * for_each_online_pgdat - helper macro to iterate over all online nodes @@ -1039,7 +1122,7 @@ /** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask * @zone - The current zone in the iterator - * @z - The current pointer within zonelist->zones being iterated + * @z - The current pointer within zonelist->_zonerefs being iterated * @zlist - The zonelist being iterated * @highidx - The zone index of the highest zone to return * @nodemask - Nodemask allowed by the allocator @@ -1053,7 +1136,7 @@ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) -#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ +#define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ for (zone = z->zone; \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ @@ -1074,15 +1157,6 @@ #ifdef CONFIG_SPARSEMEM #include <asm/sparsemem.h> -#endif - -#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ - !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) -static inline unsigned long early_pfn_to_nid(unsigned long pfn) -{ - BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)); - return 0; -} #endif #ifdef CONFIG_FLATMEM @@ -1124,6 +1198,32 @@ #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) +#define SUBSECTION_SHIFT 21 +#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) + +#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) +#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) +#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) + +#if SUBSECTION_SHIFT > SECTION_SIZE_BITS +#error Subsection size exceeds section size +#else +#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) +#endif + +#define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION) +#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) + +struct mem_section_usage { +#ifdef CONFIG_SPARSEMEM_VMEMMAP + DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); +#endif + /* See declaration of similar field in struct zone */ + unsigned long pageblock_flags[0]; +}; + +void subsection_map_init(unsigned long pfn, unsigned long nr_pages); + struct page; struct page_ext; struct mem_section { @@ -1141,8 +1241,7 @@ */ unsigned long section_mem_map; - /* See declaration of similar field in struct zone */ - unsigned long *pageblock_flags; + struct mem_section_usage *usage; #ifdef CONFIG_PAGE_EXTENSION /* * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use @@ -1173,18 +1272,26 @@ extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif +static inline unsigned long *section_to_usemap(struct mem_section *ms) +{ + return ms->usage->pageblock_flags; +} + static inline struct mem_section *__nr_to_section(unsigned long nr) { + unsigned long root = SECTION_NR_TO_ROOT(nr); + + if (unlikely(root >= NR_SECTION_ROOTS)) + return NULL; + #ifdef CONFIG_SPARSEMEM_EXTREME - if (!mem_section) + if (!mem_section || !mem_section[root]) return NULL; #endif - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) - return NULL; - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; + return &mem_section[root][nr & SECTION_ROOT_MASK]; } -extern int __section_nr(struct mem_section* ms); -extern unsigned long usemap_size(void); +extern unsigned long __section_nr(struct mem_section *ms); +extern size_t mem_section_usage_size(void); /* * We use the lower bits of the mem_map pointer to store @@ -1202,7 +1309,8 @@ #define SECTION_MARKED_PRESENT (1UL<<0) #define SECTION_HAS_MEM_MAP (1UL<<1) #define SECTION_IS_ONLINE (1UL<<2) -#define SECTION_MAP_LAST_BIT (1UL<<3) +#define SECTION_IS_EARLY (1UL<<3) +#define SECTION_MAP_LAST_BIT (1UL<<4) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) #define SECTION_NID_SHIFT 3 @@ -1226,6 +1334,11 @@ static inline int valid_section(struct mem_section *section) { return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); +} + +static inline int early_section(struct mem_section *section) +{ + return (section && (section->section_mem_map & SECTION_IS_EARLY)); } static inline int valid_section_nr(unsigned long nr) @@ -1255,22 +1368,60 @@ return __nr_to_section(pfn_to_section_nr(pfn)); } -extern int __highest_present_section_nr; +extern unsigned long __highest_present_section_nr; + +static inline int subsection_map_index(unsigned long pfn) +{ + return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; +} + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) +{ + int idx = subsection_map_index(pfn); + + return test_bit(idx, ms->usage->subsection_map); +} +#else +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) +{ + return 1; +} +#endif #ifndef CONFIG_HAVE_ARCH_PFN_VALID static inline int pfn_valid(unsigned long pfn) { + struct mem_section *ms; + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); + ms = __nr_to_section(pfn_to_section_nr(pfn)); + if (!valid_section(ms)) + return 0; + /* + * Traditionally early sections always returned pfn_valid() for + * the entire section-sized span. + */ + return early_section(ms) || pfn_section_valid(ms, pfn); } #endif -static inline int pfn_present(unsigned long pfn) +static inline int pfn_in_present_section(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; return present_section(__nr_to_section(pfn_to_section_nr(pfn))); +} + +static inline unsigned long next_present_section_nr(unsigned long section_nr) +{ + while (++section_nr <= __highest_present_section_nr) { + if (present_section_nr(section_nr)) + return section_nr; + } + + return -1; } /* @@ -1288,11 +1439,12 @@ #define pfn_to_nid(pfn) (0) #endif -#define early_pfn_valid(pfn) pfn_valid(pfn) void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) +#define pfn_in_present_section pfn_valid +#define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ /* @@ -1306,15 +1458,9 @@ int last_nid; }; -#ifndef early_pfn_valid -#define early_pfn_valid(pfn) (1) -#endif - -void memory_present(int nid, unsigned long start, unsigned long end); - /* * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we - * need to check pfn validility within that MAX_ORDER_NR_PAGES block. + * need to check pfn validity within that MAX_ORDER_NR_PAGES block. * pfn_valid_within() should be used in this case; we optimise this away * when we have no holes within a MAX_ORDER_NR_PAGES block. */ @@ -1323,37 +1469,6 @@ #else #define pfn_valid_within(pfn) (1) #endif - -#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL -/* - * pfn_valid() is meant to be able to tell if a given PFN has valid memmap - * associated with it or not. This means that a struct page exists for this - * pfn. The caller cannot assume the page is fully initialized in general. - * Hotplugable pages might not have been onlined yet. pfn_to_online_page() - * will ensure the struct page is fully online and initialized. Special pages - * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. - * - * In FLATMEM, it is expected that holes always have valid memmap as long as - * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed - * that a valid section has a memmap for the entire section. - * - * However, an ARM, and maybe other embedded architectures in the future - * free memmap backing holes to save memory on the assumption the memmap is - * never used. The page_zone linkages are then broken even though pfn_valid() - * returns true. A walker of the full memmap must then do this additional - * check to ensure the memmap they are looking at is sane by making sure - * the zone and PFN linkages are still valid. This is expensive, but walkers - * of the full memmap are extremely rare. - */ -bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone); -#else -static inline bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone) -{ - return true; -} -#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ -- Gitblit v1.6.2