| .. | .. |
|---|
| 18 | 18 | #include <linux/pageblock-flags.h> |
|---|
| 19 | 19 | #include <linux/page-flags-layout.h> |
|---|
| 20 | 20 | #include <linux/atomic.h> |
|---|
| 21 | +#include <linux/mm_types.h> |
|---|
| 22 | +#include <linux/page-flags.h> |
|---|
| 21 | 23 | #include <linux/android_kabi.h> |
|---|
| 22 | 24 | #include <asm/page.h> |
|---|
| 23 | 25 | |
|---|
| .. | .. |
|---|
| 36 | 38 | * will not. |
|---|
| 37 | 39 | */ |
|---|
| 38 | 40 | #define PAGE_ALLOC_COSTLY_ORDER 3 |
|---|
| 41 | + |
|---|
| 42 | +#define MAX_KSWAPD_THREADS 16 |
|---|
| 39 | 43 | |
|---|
| 40 | 44 | enum migratetype { |
|---|
| 41 | 45 | MIGRATE_UNMOVABLE, |
|---|
| .. | .. |
|---|
| 66 | 70 | }; |
|---|
| 67 | 71 | |
|---|
| 68 | 72 | /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ |
|---|
| 69 | | -extern char * const migratetype_names[MIGRATE_TYPES]; |
|---|
| 73 | +extern const char * const migratetype_names[MIGRATE_TYPES]; |
|---|
| 70 | 74 | |
|---|
| 71 | 75 | #ifdef CONFIG_CMA |
|---|
| 72 | 76 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) |
|---|
| .. | .. |
|---|
| 89 | 93 | |
|---|
| 90 | 94 | extern int page_group_by_mobility_disabled; |
|---|
| 91 | 95 | |
|---|
| 92 | | -#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) |
|---|
| 93 | | -#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) |
|---|
| 96 | +#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1) |
|---|
| 94 | 97 | |
|---|
| 95 | 98 | #define get_pageblock_migratetype(page) \ |
|---|
| 96 | | - get_pfnblock_flags_mask(page, page_to_pfn(page), \ |
|---|
| 97 | | - PB_migrate_end, MIGRATETYPE_MASK) |
|---|
| 99 | + get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) |
|---|
| 98 | 100 | |
|---|
| 99 | 101 | struct free_area { |
|---|
| 100 | 102 | struct list_head free_list[MIGRATE_TYPES]; |
|---|
| 101 | 103 | unsigned long nr_free; |
|---|
| 102 | 104 | }; |
|---|
| 105 | + |
|---|
| 106 | +static inline struct page *get_page_from_free_area(struct free_area *area, |
|---|
| 107 | + int migratetype) |
|---|
| 108 | +{ |
|---|
| 109 | + return list_first_entry_or_null(&area->free_list[migratetype], |
|---|
| 110 | + struct page, lru); |
|---|
| 111 | +} |
|---|
| 112 | + |
|---|
| 113 | +static inline bool free_area_empty(struct free_area *area, int migratetype) |
|---|
| 114 | +{ |
|---|
| 115 | + return list_empty(&area->free_list[migratetype]); |
|---|
| 116 | +} |
|---|
| 103 | 117 | |
|---|
| 104 | 118 | struct pglist_data; |
|---|
| 105 | 119 | |
|---|
| .. | .. |
|---|
| 144 | 158 | NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ |
|---|
| 145 | 159 | NR_MLOCK, /* mlock()ed pages found and moved off LRU */ |
|---|
| 146 | 160 | NR_PAGETABLE, /* used for pagetables */ |
|---|
| 147 | | - NR_KERNEL_STACK_KB, /* measured in KiB */ |
|---|
| 148 | | -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) |
|---|
| 149 | | - NR_KERNEL_SCS_BYTES, /* measured in bytes */ |
|---|
| 150 | | -#endif |
|---|
| 151 | 161 | /* Second 128 byte cacheline */ |
|---|
| 152 | 162 | NR_BOUNCE, |
|---|
| 153 | | -#if IS_ENABLED(CONFIG_ZSMALLOC) |
|---|
| 154 | 163 | NR_ZSPAGES, /* allocated in zsmalloc */ |
|---|
| 155 | | -#endif |
|---|
| 156 | 164 | NR_FREE_CMA_PAGES, |
|---|
| 157 | 165 | NR_VM_ZONE_STAT_ITEMS }; |
|---|
| 158 | 166 | |
|---|
| .. | .. |
|---|
| 163 | 171 | NR_INACTIVE_FILE, /* " " " " " */ |
|---|
| 164 | 172 | NR_ACTIVE_FILE, /* " " " " " */ |
|---|
| 165 | 173 | NR_UNEVICTABLE, /* " " " " " */ |
|---|
| 166 | | - NR_SLAB_RECLAIMABLE, |
|---|
| 167 | | - NR_SLAB_UNRECLAIMABLE, |
|---|
| 174 | + NR_SLAB_RECLAIMABLE_B, |
|---|
| 175 | + NR_SLAB_UNRECLAIMABLE_B, |
|---|
| 168 | 176 | NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ |
|---|
| 169 | 177 | NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ |
|---|
| 170 | | - WORKINGSET_REFAULT, |
|---|
| 171 | | - WORKINGSET_ACTIVATE, |
|---|
| 172 | | - WORKINGSET_RESTORE, |
|---|
| 178 | + WORKINGSET_NODES, |
|---|
| 179 | + WORKINGSET_REFAULT_BASE, |
|---|
| 180 | + WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, |
|---|
| 181 | + WORKINGSET_REFAULT_FILE, |
|---|
| 182 | + WORKINGSET_ACTIVATE_BASE, |
|---|
| 183 | + WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, |
|---|
| 184 | + WORKINGSET_ACTIVATE_FILE, |
|---|
| 185 | + WORKINGSET_RESTORE_BASE, |
|---|
| 186 | + WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, |
|---|
| 187 | + WORKINGSET_RESTORE_FILE, |
|---|
| 173 | 188 | WORKINGSET_NODERECLAIM, |
|---|
| 174 | 189 | NR_ANON_MAPPED, /* Mapped anonymous pages */ |
|---|
| 175 | 190 | NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. |
|---|
| .. | .. |
|---|
| 181 | 196 | NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ |
|---|
| 182 | 197 | NR_SHMEM_THPS, |
|---|
| 183 | 198 | NR_SHMEM_PMDMAPPED, |
|---|
| 199 | + NR_FILE_THPS, |
|---|
| 200 | + NR_FILE_PMDMAPPED, |
|---|
| 184 | 201 | NR_ANON_THPS, |
|---|
| 185 | | - NR_UNSTABLE_NFS, /* NFS unstable pages */ |
|---|
| 186 | 202 | NR_VMSCAN_WRITE, |
|---|
| 187 | 203 | NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ |
|---|
| 188 | 204 | NR_DIRTIED, /* page dirtyings since bootup */ |
|---|
| 189 | 205 | NR_WRITTEN, /* page writings since bootup */ |
|---|
| 190 | 206 | NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ |
|---|
| 191 | | - NR_UNRECLAIMABLE_PAGES, |
|---|
| 192 | | - NR_ION_HEAP, |
|---|
| 193 | | - NR_ION_HEAP_POOL, |
|---|
| 194 | | - NR_GPU_HEAP, |
|---|
| 207 | + NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ |
|---|
| 208 | + NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ |
|---|
| 209 | + NR_KERNEL_STACK_KB, /* measured in KiB */ |
|---|
| 210 | +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) |
|---|
| 211 | + NR_KERNEL_SCS_KB, /* measured in KiB */ |
|---|
| 212 | +#endif |
|---|
| 195 | 213 | NR_VM_NODE_STAT_ITEMS |
|---|
| 196 | 214 | }; |
|---|
| 215 | + |
|---|
| 216 | +/* |
|---|
| 217 | + * Returns true if the value is measured in bytes (most vmstat values are |
|---|
| 218 | + * measured in pages). This defines the API part, the internal representation |
|---|
| 219 | + * might be different. |
|---|
| 220 | + */ |
|---|
| 221 | +static __always_inline bool vmstat_item_in_bytes(int idx) |
|---|
| 222 | +{ |
|---|
| 223 | + /* |
|---|
| 224 | + * Global and per-node slab counters track slab pages. |
|---|
| 225 | + * It's expected that changes are multiples of PAGE_SIZE. |
|---|
| 226 | + * Internally values are stored in pages. |
|---|
| 227 | + * |
|---|
| 228 | + * Per-memcg and per-lruvec counters track memory, consumed |
|---|
| 229 | + * by individual slab objects. These counters are actually |
|---|
| 230 | + * byte-precise. |
|---|
| 231 | + */ |
|---|
| 232 | + return (idx == NR_SLAB_RECLAIMABLE_B || |
|---|
| 233 | + idx == NR_SLAB_UNRECLAIMABLE_B); |
|---|
| 234 | +} |
|---|
| 197 | 235 | |
|---|
| 198 | 236 | /* |
|---|
| 199 | 237 | * We do arithmetic on the LRU lists in various places in the code, |
|---|
| .. | .. |
|---|
| 221 | 259 | |
|---|
| 222 | 260 | #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) |
|---|
| 223 | 261 | |
|---|
| 224 | | -static inline int is_file_lru(enum lru_list lru) |
|---|
| 262 | +static inline bool is_file_lru(enum lru_list lru) |
|---|
| 225 | 263 | { |
|---|
| 226 | 264 | return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); |
|---|
| 227 | 265 | } |
|---|
| 228 | 266 | |
|---|
| 229 | | -static inline int is_active_lru(enum lru_list lru) |
|---|
| 267 | +static inline bool is_active_lru(enum lru_list lru) |
|---|
| 230 | 268 | { |
|---|
| 231 | 269 | return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); |
|---|
| 232 | 270 | } |
|---|
| 233 | 271 | |
|---|
| 234 | | -struct zone_reclaim_stat { |
|---|
| 235 | | - /* |
|---|
| 236 | | - * The pageout code in vmscan.c keeps track of how many of the |
|---|
| 237 | | - * mem/swap backed and file backed pages are referenced. |
|---|
| 238 | | - * The higher the rotated/scanned ratio, the more valuable |
|---|
| 239 | | - * that cache is. |
|---|
| 240 | | - * |
|---|
| 241 | | - * The anon LRU stats live in [0], file LRU stats in [1] |
|---|
| 242 | | - */ |
|---|
| 243 | | - unsigned long recent_rotated[2]; |
|---|
| 244 | | - unsigned long recent_scanned[2]; |
|---|
| 272 | +#define ANON_AND_FILE 2 |
|---|
| 273 | + |
|---|
| 274 | +enum lruvec_flags { |
|---|
| 275 | + LRUVEC_CONGESTED, /* lruvec has many dirty pages |
|---|
| 276 | + * backed by a congested BDI |
|---|
| 277 | + */ |
|---|
| 245 | 278 | }; |
|---|
| 246 | 279 | |
|---|
| 247 | 280 | struct lruvec { |
|---|
| 248 | 281 | struct list_head lists[NR_LRU_LISTS]; |
|---|
| 249 | | - struct zone_reclaim_stat reclaim_stat; |
|---|
| 250 | | - /* Evictions & activations on the inactive file list */ |
|---|
| 251 | | - atomic_long_t inactive_age; |
|---|
| 282 | + /* |
|---|
| 283 | + * These track the cost of reclaiming one LRU - file or anon - |
|---|
| 284 | + * over the other. As the observed cost of reclaiming one LRU |
|---|
| 285 | + * increases, the reclaim scan balance tips toward the other. |
|---|
| 286 | + */ |
|---|
| 287 | + unsigned long anon_cost; |
|---|
| 288 | + unsigned long file_cost; |
|---|
| 289 | + /* Non-resident age, driven by LRU movement */ |
|---|
| 290 | + atomic_long_t nonresident_age; |
|---|
| 252 | 291 | /* Refaults at the time of last reclaim cycle */ |
|---|
| 253 | | - unsigned long refaults; |
|---|
| 292 | + unsigned long refaults[ANON_AND_FILE]; |
|---|
| 293 | + /* Various lruvec state flags (enum lruvec_flags) */ |
|---|
| 294 | + unsigned long flags; |
|---|
| 254 | 295 | #ifdef CONFIG_MEMCG |
|---|
| 255 | 296 | struct pglist_data *pgdat; |
|---|
| 256 | 297 | #endif |
|---|
| 257 | 298 | }; |
|---|
| 258 | 299 | |
|---|
| 259 | | -/* Mask used at gathering information at once (see memcontrol.c) */ |
|---|
| 260 | | -#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) |
|---|
| 261 | | -#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) |
|---|
| 262 | | -#define LRU_ALL ((1 << NR_LRU_LISTS) - 1) |
|---|
| 263 | | - |
|---|
| 264 | | -/* Isolate unmapped file */ |
|---|
| 300 | +/* Isolate unmapped pages */ |
|---|
| 265 | 301 | #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) |
|---|
| 266 | 302 | /* Isolate for asynchronous migration */ |
|---|
| 267 | 303 | #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) |
|---|
| .. | .. |
|---|
| 278 | 314 | NR_WMARK |
|---|
| 279 | 315 | }; |
|---|
| 280 | 316 | |
|---|
| 281 | | -#define min_wmark_pages(z) (z->watermark[WMARK_MIN]) |
|---|
| 282 | | -#define low_wmark_pages(z) (z->watermark[WMARK_LOW]) |
|---|
| 283 | | -#define high_wmark_pages(z) (z->watermark[WMARK_HIGH]) |
|---|
| 317 | +#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) |
|---|
| 318 | +#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) |
|---|
| 319 | +#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) |
|---|
| 320 | +#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) |
|---|
| 284 | 321 | |
|---|
| 285 | 322 | struct per_cpu_pages { |
|---|
| 286 | 323 | int count; /* number of pages in the list */ |
|---|
| .. | .. |
|---|
| 311 | 348 | #endif /* !__GENERATING_BOUNDS.H */ |
|---|
| 312 | 349 | |
|---|
| 313 | 350 | enum zone_type { |
|---|
| 314 | | -#ifdef CONFIG_ZONE_DMA |
|---|
| 315 | 351 | /* |
|---|
| 316 | | - * ZONE_DMA is used when there are devices that are not able |
|---|
| 317 | | - * to do DMA to all of addressable memory (ZONE_NORMAL). Then we |
|---|
| 318 | | - * carve out the portion of memory that is needed for these devices. |
|---|
| 319 | | - * The range is arch specific. |
|---|
| 320 | | - * |
|---|
| 321 | | - * Some examples |
|---|
| 322 | | - * |
|---|
| 323 | | - * Architecture Limit |
|---|
| 324 | | - * --------------------------- |
|---|
| 325 | | - * parisc, ia64, sparc <4G |
|---|
| 326 | | - * s390 <2G |
|---|
| 327 | | - * arm Various |
|---|
| 328 | | - * alpha Unlimited or 0-16MB. |
|---|
| 329 | | - * |
|---|
| 330 | | - * i386, x86_64 and multiple other arches |
|---|
| 331 | | - * <16M. |
|---|
| 352 | + * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able |
|---|
| 353 | + * to DMA to all of the addressable memory (ZONE_NORMAL). |
|---|
| 354 | + * On architectures where this area covers the whole 32 bit address |
|---|
| 355 | + * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller |
|---|
| 356 | + * DMA addressing constraints. This distinction is important as a 32bit |
|---|
| 357 | + * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit |
|---|
| 358 | + * platforms may need both zones as they support peripherals with |
|---|
| 359 | + * different DMA addressing limitations. |
|---|
| 332 | 360 | */ |
|---|
| 361 | +#ifdef CONFIG_ZONE_DMA |
|---|
| 333 | 362 | ZONE_DMA, |
|---|
| 334 | 363 | #endif |
|---|
| 335 | 364 | #ifdef CONFIG_ZONE_DMA32 |
|---|
| 336 | | - /* |
|---|
| 337 | | - * x86_64 needs two ZONE_DMAs because it supports devices that are |
|---|
| 338 | | - * only able to do DMA to the lower 16M but also 32 bit devices that |
|---|
| 339 | | - * can only do DMA areas below 4G. |
|---|
| 340 | | - */ |
|---|
| 341 | 365 | ZONE_DMA32, |
|---|
| 342 | 366 | #endif |
|---|
| 343 | 367 | /* |
|---|
| .. | .. |
|---|
| 357 | 381 | */ |
|---|
| 358 | 382 | ZONE_HIGHMEM, |
|---|
| 359 | 383 | #endif |
|---|
| 384 | + /* |
|---|
| 385 | + * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains |
|---|
| 386 | + * movable pages with few exceptional cases described below. Main use |
|---|
| 387 | + * cases for ZONE_MOVABLE are to make memory offlining/unplug more |
|---|
| 388 | + * likely to succeed, and to locally limit unmovable allocations - e.g., |
|---|
| 389 | + * to increase the number of THP/huge pages. Notable special cases are: |
|---|
| 390 | + * |
|---|
| 391 | + * 1. Pinned pages: (long-term) pinning of movable pages might |
|---|
| 392 | + * essentially turn such pages unmovable. Memory offlining might |
|---|
| 393 | + * retry a long time. |
|---|
| 394 | + * 2. memblock allocations: kernelcore/movablecore setups might create |
|---|
| 395 | + * situations where ZONE_MOVABLE contains unmovable allocations |
|---|
| 396 | + * after boot. Memory offlining and allocations fail early. |
|---|
| 397 | + * 3. Memory holes: kernelcore/movablecore setups might create very rare |
|---|
| 398 | + * situations where ZONE_MOVABLE contains memory holes after boot, |
|---|
| 399 | + * for example, if we have sections that are only partially |
|---|
| 400 | + * populated. Memory offlining and allocations fail early. |
|---|
| 401 | + * 4. PG_hwpoison pages: while poisoned pages can be skipped during |
|---|
| 402 | + * memory offlining, such pages cannot be allocated. |
|---|
| 403 | + * 5. Unmovable PG_offline pages: in paravirtualized environments, |
|---|
| 404 | + * hotplugged memory blocks might only partially be managed by the |
|---|
| 405 | + * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The |
|---|
| 406 | + * parts not manged by the buddy are unmovable PG_offline pages. In |
|---|
| 407 | + * some cases (virtio-mem), such pages can be skipped during |
|---|
| 408 | + * memory offlining, however, cannot be moved/allocated. These |
|---|
| 409 | + * techniques might use alloc_contig_range() to hide previously |
|---|
| 410 | + * exposed pages from the buddy again (e.g., to implement some sort |
|---|
| 411 | + * of memory unplug in virtio-mem). |
|---|
| 412 | + * |
|---|
| 413 | + * In general, no unmovable allocations that degrade memory offlining |
|---|
| 414 | + * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) |
|---|
| 415 | + * have to expect that migrating pages in ZONE_MOVABLE can fail (even |
|---|
| 416 | + * if has_unmovable_pages() states that there are no unmovable pages, |
|---|
| 417 | + * there can be false negatives). |
|---|
| 418 | + */ |
|---|
| 360 | 419 | ZONE_MOVABLE, |
|---|
| 361 | 420 | #ifdef CONFIG_ZONE_DEVICE |
|---|
| 362 | 421 | ZONE_DEVICE, |
|---|
| .. | .. |
|---|
| 367 | 426 | |
|---|
| 368 | 427 | #ifndef __GENERATING_BOUNDS_H |
|---|
| 369 | 428 | |
|---|
| 429 | +#define ASYNC_AND_SYNC 2 |
|---|
| 430 | + |
|---|
| 370 | 431 | struct zone { |
|---|
| 371 | 432 | /* Read-mostly fields */ |
|---|
| 372 | 433 | |
|---|
| 373 | 434 | /* zone watermarks, access with *_wmark_pages(zone) macros */ |
|---|
| 374 | | - unsigned long watermark[NR_WMARK]; |
|---|
| 435 | + unsigned long _watermark[NR_WMARK]; |
|---|
| 436 | + unsigned long watermark_boost; |
|---|
| 375 | 437 | |
|---|
| 376 | 438 | unsigned long nr_reserved_highatomic; |
|---|
| 377 | 439 | |
|---|
| .. | .. |
|---|
| 386 | 448 | */ |
|---|
| 387 | 449 | long lowmem_reserve[MAX_NR_ZONES]; |
|---|
| 388 | 450 | |
|---|
| 389 | | -#ifdef CONFIG_NUMA |
|---|
| 451 | +#ifdef CONFIG_NEED_MULTIPLE_NODES |
|---|
| 390 | 452 | int node; |
|---|
| 391 | 453 | #endif |
|---|
| 392 | 454 | struct pglist_data *zone_pgdat; |
|---|
| 393 | 455 | struct per_cpu_pageset __percpu *pageset; |
|---|
| 394 | | - |
|---|
| 395 | | -#ifdef CONFIG_CMA |
|---|
| 396 | | - bool cma_alloc; |
|---|
| 397 | | -#endif |
|---|
| 398 | 456 | |
|---|
| 399 | 457 | #ifndef CONFIG_SPARSEMEM |
|---|
| 400 | 458 | /* |
|---|
| .. | .. |
|---|
| 421 | 479 | * bootmem allocator): |
|---|
| 422 | 480 | * managed_pages = present_pages - reserved_pages; |
|---|
| 423 | 481 | * |
|---|
| 482 | + * cma pages is present pages that are assigned for CMA use |
|---|
| 483 | + * (MIGRATE_CMA). |
|---|
| 484 | + * |
|---|
| 424 | 485 | * So present_pages may be used by memory hotplug or memory power |
|---|
| 425 | 486 | * management logic to figure out unmanaged pages by checking |
|---|
| 426 | 487 | * (present_pages - managed_pages). And managed_pages should be used |
|---|
| .. | .. |
|---|
| 441 | 502 | * Write access to present_pages at runtime should be protected by |
|---|
| 442 | 503 | * mem_hotplug_begin/end(). Any reader who can't tolerant drift of |
|---|
| 443 | 504 | * present_pages should get_online_mems() to get a stable value. |
|---|
| 444 | | - * |
|---|
| 445 | | - * Read access to managed_pages should be safe because it's unsigned |
|---|
| 446 | | - * long. Write access to zone->managed_pages and totalram_pages are |
|---|
| 447 | | - * protected by managed_page_count_lock at runtime. Idealy only |
|---|
| 448 | | - * adjust_managed_page_count() should be used instead of directly |
|---|
| 449 | | - * touching zone->managed_pages and totalram_pages. |
|---|
| 450 | 505 | */ |
|---|
| 451 | | - unsigned long managed_pages; |
|---|
| 506 | + atomic_long_t managed_pages; |
|---|
| 452 | 507 | unsigned long spanned_pages; |
|---|
| 453 | 508 | unsigned long present_pages; |
|---|
| 509 | +#ifdef CONFIG_CMA |
|---|
| 510 | + unsigned long cma_pages; |
|---|
| 511 | +#endif |
|---|
| 454 | 512 | |
|---|
| 455 | 513 | const char *name; |
|---|
| 456 | 514 | |
|---|
| .. | .. |
|---|
| 495 | 553 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA |
|---|
| 496 | 554 | /* pfn where compaction free scanner should start */ |
|---|
| 497 | 555 | unsigned long compact_cached_free_pfn; |
|---|
| 498 | | - /* pfn where async and sync compaction migration scanner should start */ |
|---|
| 499 | | - unsigned long compact_cached_migrate_pfn[2]; |
|---|
| 556 | + /* pfn where compaction migration scanner should start */ |
|---|
| 557 | + unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC]; |
|---|
| 558 | + unsigned long compact_init_migrate_pfn; |
|---|
| 559 | + unsigned long compact_init_free_pfn; |
|---|
| 500 | 560 | #endif |
|---|
| 501 | 561 | |
|---|
| 502 | 562 | #ifdef CONFIG_COMPACTION |
|---|
| .. | .. |
|---|
| 504 | 564 | * On compaction failure, 1<<compact_defer_shift compactions |
|---|
| 505 | 565 | * are skipped before trying again. The number attempted since |
|---|
| 506 | 566 | * last failure is tracked with compact_considered. |
|---|
| 567 | + * compact_order_failed is the minimum compaction failed order. |
|---|
| 507 | 568 | */ |
|---|
| 508 | 569 | unsigned int compact_considered; |
|---|
| 509 | 570 | unsigned int compact_defer_shift; |
|---|
| .. | .. |
|---|
| 529 | 590 | } ____cacheline_internodealigned_in_smp; |
|---|
| 530 | 591 | |
|---|
| 531 | 592 | enum pgdat_flags { |
|---|
| 532 | | - PGDAT_CONGESTED, /* pgdat has many dirty pages backed by |
|---|
| 533 | | - * a congested BDI |
|---|
| 534 | | - */ |
|---|
| 535 | 593 | PGDAT_DIRTY, /* reclaim scanning has recently found |
|---|
| 536 | 594 | * many dirty file pages at the tail |
|---|
| 537 | 595 | * of the LRU. |
|---|
| .. | .. |
|---|
| 541 | 599 | */ |
|---|
| 542 | 600 | PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ |
|---|
| 543 | 601 | }; |
|---|
| 602 | + |
|---|
| 603 | +enum zone_flags { |
|---|
| 604 | + ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks. |
|---|
| 605 | + * Cleared when kswapd is woken. |
|---|
| 606 | + */ |
|---|
| 607 | +}; |
|---|
| 608 | + |
|---|
| 609 | +static inline unsigned long zone_managed_pages(struct zone *zone) |
|---|
| 610 | +{ |
|---|
| 611 | + return (unsigned long)atomic_long_read(&zone->managed_pages); |
|---|
| 612 | +} |
|---|
| 613 | + |
|---|
| 614 | +static inline unsigned long zone_cma_pages(struct zone *zone) |
|---|
| 615 | +{ |
|---|
| 616 | +#ifdef CONFIG_CMA |
|---|
| 617 | + return zone->cma_pages; |
|---|
| 618 | +#else |
|---|
| 619 | + return 0; |
|---|
| 620 | +#endif |
|---|
| 621 | +} |
|---|
| 544 | 622 | |
|---|
| 545 | 623 | static inline unsigned long zone_end_pfn(const struct zone *zone) |
|---|
| 546 | 624 | { |
|---|
| .. | .. |
|---|
| 632 | 710 | extern struct page *mem_map; |
|---|
| 633 | 711 | #endif |
|---|
| 634 | 712 | |
|---|
| 713 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
|---|
| 714 | +struct deferred_split { |
|---|
| 715 | + spinlock_t split_queue_lock; |
|---|
| 716 | + struct list_head split_queue; |
|---|
| 717 | + unsigned long split_queue_len; |
|---|
| 718 | +}; |
|---|
| 719 | +#endif |
|---|
| 720 | + |
|---|
| 635 | 721 | /* |
|---|
| 636 | 722 | * On NUMA machines, each NUMA node would have a pg_data_t to describe |
|---|
| 637 | 723 | * it's memory layout. On UMA machines there is a single pglist_data which |
|---|
| .. | .. |
|---|
| 640 | 726 | * Memory statistics and page replacement data structures are maintained on a |
|---|
| 641 | 727 | * per-zone basis. |
|---|
| 642 | 728 | */ |
|---|
| 643 | | -struct bootmem_data; |
|---|
| 644 | 729 | typedef struct pglist_data { |
|---|
| 730 | + /* |
|---|
| 731 | + * node_zones contains just the zones for THIS node. Not all of the |
|---|
| 732 | + * zones may be populated, but it is the full list. It is referenced by |
|---|
| 733 | + * this node's node_zonelists as well as other node's node_zonelists. |
|---|
| 734 | + */ |
|---|
| 645 | 735 | struct zone node_zones[MAX_NR_ZONES]; |
|---|
| 736 | + |
|---|
| 737 | + /* |
|---|
| 738 | + * node_zonelists contains references to all zones in all nodes. |
|---|
| 739 | + * Generally the first zones will be references to this node's |
|---|
| 740 | + * node_zones. |
|---|
| 741 | + */ |
|---|
| 646 | 742 | struct zonelist node_zonelists[MAX_ZONELISTS]; |
|---|
| 647 | | - int nr_zones; |
|---|
| 743 | + |
|---|
| 744 | + int nr_zones; /* number of populated zones in this node */ |
|---|
| 648 | 745 | #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ |
|---|
| 649 | 746 | struct page *node_mem_map; |
|---|
| 650 | 747 | #ifdef CONFIG_PAGE_EXTENSION |
|---|
| 651 | 748 | struct page_ext *node_page_ext; |
|---|
| 652 | 749 | #endif |
|---|
| 653 | 750 | #endif |
|---|
| 654 | | -#ifndef CONFIG_NO_BOOTMEM |
|---|
| 655 | | - struct bootmem_data *bdata; |
|---|
| 656 | | -#endif |
|---|
| 657 | 751 | #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) |
|---|
| 658 | 752 | /* |
|---|
| 659 | | - * Must be held any time you expect node_start_pfn, node_present_pages |
|---|
| 660 | | - * or node_spanned_pages stay constant. |
|---|
| 753 | + * Must be held any time you expect node_start_pfn, |
|---|
| 754 | + * node_present_pages, node_spanned_pages or nr_zones to stay constant. |
|---|
| 661 | 755 | * Also synchronizes pgdat->first_deferred_pfn during deferred page |
|---|
| 662 | 756 | * init. |
|---|
| 663 | 757 | * |
|---|
| .. | .. |
|---|
| 678 | 772 | wait_queue_head_t pfmemalloc_wait; |
|---|
| 679 | 773 | struct task_struct *kswapd; /* Protected by |
|---|
| 680 | 774 | mem_hotplug_begin/end() */ |
|---|
| 775 | + struct task_struct *mkswapd[MAX_KSWAPD_THREADS]; |
|---|
| 681 | 776 | int kswapd_order; |
|---|
| 682 | | - enum zone_type kswapd_classzone_idx; |
|---|
| 777 | + enum zone_type kswapd_highest_zoneidx; |
|---|
| 683 | 778 | |
|---|
| 684 | 779 | int kswapd_failures; /* Number of 'reclaimed == 0' runs */ |
|---|
| 685 | 780 | |
|---|
| 781 | + ANDROID_OEM_DATA(1); |
|---|
| 686 | 782 | #ifdef CONFIG_COMPACTION |
|---|
| 687 | 783 | int kcompactd_max_order; |
|---|
| 688 | | - enum zone_type kcompactd_classzone_idx; |
|---|
| 784 | + enum zone_type kcompactd_highest_zoneidx; |
|---|
| 689 | 785 | wait_queue_head_t kcompactd_wait; |
|---|
| 690 | 786 | struct task_struct *kcompactd; |
|---|
| 787 | + bool proactive_compact_trigger; |
|---|
| 691 | 788 | #endif |
|---|
| 692 | 789 | /* |
|---|
| 693 | 790 | * This is a per-node reserve of pages that are not available |
|---|
| .. | .. |
|---|
| 697 | 794 | |
|---|
| 698 | 795 | #ifdef CONFIG_NUMA |
|---|
| 699 | 796 | /* |
|---|
| 700 | | - * zone reclaim becomes active if more unmapped pages exist. |
|---|
| 797 | + * node reclaim becomes active if more unmapped pages exist. |
|---|
| 701 | 798 | */ |
|---|
| 702 | 799 | unsigned long min_unmapped_pages; |
|---|
| 703 | 800 | unsigned long min_slab_pages; |
|---|
| .. | .. |
|---|
| 713 | 810 | * is the first PFN that needs to be initialised. |
|---|
| 714 | 811 | */ |
|---|
| 715 | 812 | unsigned long first_deferred_pfn; |
|---|
| 716 | | - /* Number of non-deferred pages */ |
|---|
| 717 | | - unsigned long static_init_pgcnt; |
|---|
| 718 | 813 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
|---|
| 719 | 814 | |
|---|
| 720 | 815 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
|---|
| 721 | | - spinlock_t split_queue_lock; |
|---|
| 722 | | - struct list_head split_queue; |
|---|
| 723 | | - unsigned long split_queue_len; |
|---|
| 816 | + struct deferred_split deferred_split_queue; |
|---|
| 724 | 817 | #endif |
|---|
| 725 | 818 | |
|---|
| 726 | 819 | /* Fields commonly accessed by the page reclaim scanner */ |
|---|
| 727 | | - struct lruvec lruvec; |
|---|
| 820 | + |
|---|
| 821 | + /* |
|---|
| 822 | + * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED. |
|---|
| 823 | + * |
|---|
| 824 | + * Use mem_cgroup_lruvec() to look up lruvecs. |
|---|
| 825 | + */ |
|---|
| 826 | + struct lruvec __lruvec; |
|---|
| 728 | 827 | |
|---|
| 729 | 828 | unsigned long flags; |
|---|
| 730 | 829 | |
|---|
| .. | .. |
|---|
| 746 | 845 | |
|---|
| 747 | 846 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) |
|---|
| 748 | 847 | #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) |
|---|
| 749 | | -static inline spinlock_t *zone_lru_lock(struct zone *zone) |
|---|
| 750 | | -{ |
|---|
| 751 | | - return &zone->zone_pgdat->lru_lock; |
|---|
| 752 | | -} |
|---|
| 753 | | - |
|---|
| 754 | | -static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) |
|---|
| 755 | | -{ |
|---|
| 756 | | - return &pgdat->lruvec; |
|---|
| 757 | | -} |
|---|
| 758 | 848 | |
|---|
| 759 | 849 | static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) |
|---|
| 760 | 850 | { |
|---|
| .. | .. |
|---|
| 770 | 860 | |
|---|
| 771 | 861 | void build_all_zonelists(pg_data_t *pgdat); |
|---|
| 772 | 862 | void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, |
|---|
| 773 | | - enum zone_type classzone_idx); |
|---|
| 863 | + enum zone_type highest_zoneidx); |
|---|
| 774 | 864 | bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, |
|---|
| 775 | | - int classzone_idx, unsigned int alloc_flags, |
|---|
| 865 | + int highest_zoneidx, unsigned int alloc_flags, |
|---|
| 776 | 866 | long free_pages); |
|---|
| 777 | 867 | bool zone_watermark_ok(struct zone *z, unsigned int order, |
|---|
| 778 | | - unsigned long mark, int classzone_idx, |
|---|
| 868 | + unsigned long mark, int highest_zoneidx, |
|---|
| 779 | 869 | unsigned int alloc_flags); |
|---|
| 780 | 870 | bool zone_watermark_ok_safe(struct zone *z, unsigned int order, |
|---|
| 781 | | - unsigned long mark, int classzone_idx); |
|---|
| 871 | + unsigned long mark, int highest_zoneidx); |
|---|
| 782 | 872 | /* |
|---|
| 783 | 873 | * Memory initialization context, use to differentiate memory added by |
|---|
| 784 | 874 | * the platform statically or via memory hotplug interface. |
|---|
| .. | .. |
|---|
| 798 | 888 | #ifdef CONFIG_MEMCG |
|---|
| 799 | 889 | return lruvec->pgdat; |
|---|
| 800 | 890 | #else |
|---|
| 801 | | - return container_of(lruvec, struct pglist_data, lruvec); |
|---|
| 891 | + return container_of(lruvec, struct pglist_data, __lruvec); |
|---|
| 802 | 892 | #endif |
|---|
| 803 | 893 | } |
|---|
| 804 | 894 | |
|---|
| 805 | 895 | extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); |
|---|
| 806 | | - |
|---|
| 807 | | -#ifdef CONFIG_HAVE_MEMORY_PRESENT |
|---|
| 808 | | -void memory_present(int nid, unsigned long start, unsigned long end); |
|---|
| 809 | | -#else |
|---|
| 810 | | -static inline void memory_present(int nid, unsigned long start, unsigned long end) {} |
|---|
| 811 | | -#endif |
|---|
| 812 | 896 | |
|---|
| 813 | 897 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES |
|---|
| 814 | 898 | int local_memory_node(int node_id); |
|---|
| .. | .. |
|---|
| 821 | 905 | */ |
|---|
| 822 | 906 | #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) |
|---|
| 823 | 907 | |
|---|
| 824 | | -#ifdef CONFIG_ZONE_DEVICE |
|---|
| 825 | | -static inline bool is_dev_zone(const struct zone *zone) |
|---|
| 826 | | -{ |
|---|
| 827 | | - return zone_idx(zone) == ZONE_DEVICE; |
|---|
| 828 | | -} |
|---|
| 829 | | -#else |
|---|
| 830 | | -static inline bool is_dev_zone(const struct zone *zone) |
|---|
| 831 | | -{ |
|---|
| 832 | | - return false; |
|---|
| 833 | | -} |
|---|
| 834 | | -#endif |
|---|
| 835 | | - |
|---|
| 836 | 908 | /* |
|---|
| 837 | 909 | * Returns true if a zone has pages managed by the buddy allocator. |
|---|
| 838 | 910 | * All the reclaim decisions have to use this function rather than |
|---|
| .. | .. |
|---|
| 841 | 913 | */ |
|---|
| 842 | 914 | static inline bool managed_zone(struct zone *zone) |
|---|
| 843 | 915 | { |
|---|
| 844 | | - return zone->managed_pages; |
|---|
| 916 | + return zone_managed_pages(zone); |
|---|
| 845 | 917 | } |
|---|
| 846 | 918 | |
|---|
| 847 | 919 | /* Returns true if a zone has memory */ |
|---|
| .. | .. |
|---|
| 850 | 922 | return zone->present_pages; |
|---|
| 851 | 923 | } |
|---|
| 852 | 924 | |
|---|
| 853 | | -#ifdef CONFIG_NUMA |
|---|
| 925 | +#ifdef CONFIG_NEED_MULTIPLE_NODES |
|---|
| 854 | 926 | static inline int zone_to_nid(struct zone *zone) |
|---|
| 855 | 927 | { |
|---|
| 856 | 928 | return zone->node; |
|---|
| .. | .. |
|---|
| 874 | 946 | #ifdef CONFIG_HIGHMEM |
|---|
| 875 | 947 | static inline int zone_movable_is_highmem(void) |
|---|
| 876 | 948 | { |
|---|
| 877 | | -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
|---|
| 949 | +#ifdef CONFIG_NEED_MULTIPLE_NODES |
|---|
| 878 | 950 | return movable_zone == ZONE_HIGHMEM; |
|---|
| 879 | 951 | #else |
|---|
| 880 | 952 | return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; |
|---|
| .. | .. |
|---|
| 892 | 964 | #endif |
|---|
| 893 | 965 | } |
|---|
| 894 | 966 | |
|---|
| 967 | +#ifdef CONFIG_ZONE_DMA |
|---|
| 968 | +bool has_managed_dma(void); |
|---|
| 969 | +#else |
|---|
| 970 | +static inline bool has_managed_dma(void) |
|---|
| 971 | +{ |
|---|
| 972 | + return false; |
|---|
| 973 | +} |
|---|
| 974 | +#endif |
|---|
| 975 | + |
|---|
| 895 | 976 | /** |
|---|
| 896 | | - * is_highmem - helper function to quickly check if a struct zone is a |
|---|
| 977 | + * is_highmem - helper function to quickly check if a struct zone is a |
|---|
| 897 | 978 | * highmem zone or not. This is an attempt to keep references |
|---|
| 898 | 979 | * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. |
|---|
| 899 | 980 | * @zone - pointer to struct zone variable |
|---|
| .. | .. |
|---|
| 909 | 990 | |
|---|
| 910 | 991 | /* These two functions are used to setup the per zone pages min values */ |
|---|
| 911 | 992 | struct ctl_table; |
|---|
| 912 | | -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, |
|---|
| 913 | | - void __user *, size_t *, loff_t *); |
|---|
| 914 | | -int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, |
|---|
| 915 | | - void __user *, size_t *, loff_t *); |
|---|
| 916 | | -extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; |
|---|
| 917 | | -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, |
|---|
| 918 | | - void __user *, size_t *, loff_t *); |
|---|
| 919 | | -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, |
|---|
| 920 | | - void __user *, size_t *, loff_t *); |
|---|
| 921 | | -int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, |
|---|
| 922 | | - void __user *, size_t *, loff_t *); |
|---|
| 923 | | -int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, |
|---|
| 924 | | - void __user *, size_t *, loff_t *); |
|---|
| 925 | 993 | |
|---|
| 926 | | -extern int numa_zonelist_order_handler(struct ctl_table *, int, |
|---|
| 927 | | - void __user *, size_t *, loff_t *); |
|---|
| 994 | +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *, |
|---|
| 995 | + loff_t *); |
|---|
| 996 | +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, |
|---|
| 997 | + size_t *, loff_t *); |
|---|
| 998 | +extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; |
|---|
| 999 | +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, |
|---|
| 1000 | + size_t *, loff_t *); |
|---|
| 1001 | +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, |
|---|
| 1002 | + void *, size_t *, loff_t *); |
|---|
| 1003 | +int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, |
|---|
| 1004 | + void *, size_t *, loff_t *); |
|---|
| 1005 | +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, |
|---|
| 1006 | + void *, size_t *, loff_t *); |
|---|
| 1007 | +int numa_zonelist_order_handler(struct ctl_table *, int, |
|---|
| 1008 | + void *, size_t *, loff_t *); |
|---|
| 1009 | +extern int percpu_pagelist_fraction; |
|---|
| 928 | 1010 | extern char numa_zonelist_order[]; |
|---|
| 929 | 1011 | #define NUMA_ZONELIST_ORDER_LEN 16 |
|---|
| 930 | 1012 | |
|---|
| .. | .. |
|---|
| 943 | 1025 | extern struct pglist_data *first_online_pgdat(void); |
|---|
| 944 | 1026 | extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); |
|---|
| 945 | 1027 | extern struct zone *next_zone(struct zone *zone); |
|---|
| 1028 | +extern int isolate_anon_lru_page(struct page *page); |
|---|
| 946 | 1029 | |
|---|
| 947 | 1030 | /** |
|---|
| 948 | 1031 | * for_each_online_pgdat - helper macro to iterate over all online nodes |
|---|
| .. | .. |
|---|
| 1039 | 1122 | /** |
|---|
| 1040 | 1123 | * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask |
|---|
| 1041 | 1124 | * @zone - The current zone in the iterator |
|---|
| 1042 | | - * @z - The current pointer within zonelist->zones being iterated |
|---|
| 1125 | + * @z - The current pointer within zonelist->_zonerefs being iterated |
|---|
| 1043 | 1126 | * @zlist - The zonelist being iterated |
|---|
| 1044 | 1127 | * @highidx - The zone index of the highest zone to return |
|---|
| 1045 | 1128 | * @nodemask - Nodemask allowed by the allocator |
|---|
| .. | .. |
|---|
| 1053 | 1136 | z = next_zones_zonelist(++z, highidx, nodemask), \ |
|---|
| 1054 | 1137 | zone = zonelist_zone(z)) |
|---|
| 1055 | 1138 | |
|---|
| 1056 | | -#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ |
|---|
| 1139 | +#define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ |
|---|
| 1057 | 1140 | for (zone = z->zone; \ |
|---|
| 1058 | 1141 | zone; \ |
|---|
| 1059 | 1142 | z = next_zones_zonelist(++z, highidx, nodemask), \ |
|---|
| .. | .. |
|---|
| 1074 | 1157 | |
|---|
| 1075 | 1158 | #ifdef CONFIG_SPARSEMEM |
|---|
| 1076 | 1159 | #include <asm/sparsemem.h> |
|---|
| 1077 | | -#endif |
|---|
| 1078 | | - |
|---|
| 1079 | | -#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ |
|---|
| 1080 | | - !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) |
|---|
| 1081 | | -static inline unsigned long early_pfn_to_nid(unsigned long pfn) |
|---|
| 1082 | | -{ |
|---|
| 1083 | | - BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)); |
|---|
| 1084 | | - return 0; |
|---|
| 1085 | | -} |
|---|
| 1086 | 1160 | #endif |
|---|
| 1087 | 1161 | |
|---|
| 1088 | 1162 | #ifdef CONFIG_FLATMEM |
|---|
| .. | .. |
|---|
| 1124 | 1198 | #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) |
|---|
| 1125 | 1199 | #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) |
|---|
| 1126 | 1200 | |
|---|
| 1201 | +#define SUBSECTION_SHIFT 21 |
|---|
| 1202 | +#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) |
|---|
| 1203 | + |
|---|
| 1204 | +#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) |
|---|
| 1205 | +#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) |
|---|
| 1206 | +#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) |
|---|
| 1207 | + |
|---|
| 1208 | +#if SUBSECTION_SHIFT > SECTION_SIZE_BITS |
|---|
| 1209 | +#error Subsection size exceeds section size |
|---|
| 1210 | +#else |
|---|
| 1211 | +#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) |
|---|
| 1212 | +#endif |
|---|
| 1213 | + |
|---|
| 1214 | +#define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION) |
|---|
| 1215 | +#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) |
|---|
| 1216 | + |
|---|
| 1217 | +struct mem_section_usage { |
|---|
| 1218 | +#ifdef CONFIG_SPARSEMEM_VMEMMAP |
|---|
| 1219 | + DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); |
|---|
| 1220 | +#endif |
|---|
| 1221 | + /* See declaration of similar field in struct zone */ |
|---|
| 1222 | + unsigned long pageblock_flags[0]; |
|---|
| 1223 | +}; |
|---|
| 1224 | + |
|---|
| 1225 | +void subsection_map_init(unsigned long pfn, unsigned long nr_pages); |
|---|
| 1226 | + |
|---|
| 1127 | 1227 | struct page; |
|---|
| 1128 | 1228 | struct page_ext; |
|---|
| 1129 | 1229 | struct mem_section { |
|---|
| .. | .. |
|---|
| 1141 | 1241 | */ |
|---|
| 1142 | 1242 | unsigned long section_mem_map; |
|---|
| 1143 | 1243 | |
|---|
| 1144 | | - /* See declaration of similar field in struct zone */ |
|---|
| 1145 | | - unsigned long *pageblock_flags; |
|---|
| 1244 | + struct mem_section_usage *usage; |
|---|
| 1146 | 1245 | #ifdef CONFIG_PAGE_EXTENSION |
|---|
| 1147 | 1246 | /* |
|---|
| 1148 | 1247 | * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use |
|---|
| .. | .. |
|---|
| 1173 | 1272 | extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; |
|---|
| 1174 | 1273 | #endif |
|---|
| 1175 | 1274 | |
|---|
| 1275 | +static inline unsigned long *section_to_usemap(struct mem_section *ms) |
|---|
| 1276 | +{ |
|---|
| 1277 | + return ms->usage->pageblock_flags; |
|---|
| 1278 | +} |
|---|
| 1279 | + |
|---|
| 1176 | 1280 | static inline struct mem_section *__nr_to_section(unsigned long nr) |
|---|
| 1177 | 1281 | { |
|---|
| 1282 | + unsigned long root = SECTION_NR_TO_ROOT(nr); |
|---|
| 1283 | + |
|---|
| 1284 | + if (unlikely(root >= NR_SECTION_ROOTS)) |
|---|
| 1285 | + return NULL; |
|---|
| 1286 | + |
|---|
| 1178 | 1287 | #ifdef CONFIG_SPARSEMEM_EXTREME |
|---|
| 1179 | | - if (!mem_section) |
|---|
| 1288 | + if (!mem_section || !mem_section[root]) |
|---|
| 1180 | 1289 | return NULL; |
|---|
| 1181 | 1290 | #endif |
|---|
| 1182 | | - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) |
|---|
| 1183 | | - return NULL; |
|---|
| 1184 | | - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; |
|---|
| 1291 | + return &mem_section[root][nr & SECTION_ROOT_MASK]; |
|---|
| 1185 | 1292 | } |
|---|
| 1186 | | -extern int __section_nr(struct mem_section* ms); |
|---|
| 1187 | | -extern unsigned long usemap_size(void); |
|---|
| 1293 | +extern unsigned long __section_nr(struct mem_section *ms); |
|---|
| 1294 | +extern size_t mem_section_usage_size(void); |
|---|
| 1188 | 1295 | |
|---|
| 1189 | 1296 | /* |
|---|
| 1190 | 1297 | * We use the lower bits of the mem_map pointer to store |
|---|
| .. | .. |
|---|
| 1202 | 1309 | #define SECTION_MARKED_PRESENT (1UL<<0) |
|---|
| 1203 | 1310 | #define SECTION_HAS_MEM_MAP (1UL<<1) |
|---|
| 1204 | 1311 | #define SECTION_IS_ONLINE (1UL<<2) |
|---|
| 1205 | | -#define SECTION_MAP_LAST_BIT (1UL<<3) |
|---|
| 1312 | +#define SECTION_IS_EARLY (1UL<<3) |
|---|
| 1313 | +#define SECTION_MAP_LAST_BIT (1UL<<4) |
|---|
| 1206 | 1314 | #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) |
|---|
| 1207 | 1315 | #define SECTION_NID_SHIFT 3 |
|---|
| 1208 | 1316 | |
|---|
| .. | .. |
|---|
| 1226 | 1334 | static inline int valid_section(struct mem_section *section) |
|---|
| 1227 | 1335 | { |
|---|
| 1228 | 1336 | return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); |
|---|
| 1337 | +} |
|---|
| 1338 | + |
|---|
| 1339 | +static inline int early_section(struct mem_section *section) |
|---|
| 1340 | +{ |
|---|
| 1341 | + return (section && (section->section_mem_map & SECTION_IS_EARLY)); |
|---|
| 1229 | 1342 | } |
|---|
| 1230 | 1343 | |
|---|
| 1231 | 1344 | static inline int valid_section_nr(unsigned long nr) |
|---|
| .. | .. |
|---|
| 1255 | 1368 | return __nr_to_section(pfn_to_section_nr(pfn)); |
|---|
| 1256 | 1369 | } |
|---|
| 1257 | 1370 | |
|---|
| 1258 | | -extern int __highest_present_section_nr; |
|---|
| 1371 | +extern unsigned long __highest_present_section_nr; |
|---|
| 1372 | + |
|---|
| 1373 | +static inline int subsection_map_index(unsigned long pfn) |
|---|
| 1374 | +{ |
|---|
| 1375 | + return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; |
|---|
| 1376 | +} |
|---|
| 1377 | + |
|---|
| 1378 | +#ifdef CONFIG_SPARSEMEM_VMEMMAP |
|---|
| 1379 | +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) |
|---|
| 1380 | +{ |
|---|
| 1381 | + int idx = subsection_map_index(pfn); |
|---|
| 1382 | + |
|---|
| 1383 | + return test_bit(idx, ms->usage->subsection_map); |
|---|
| 1384 | +} |
|---|
| 1385 | +#else |
|---|
| 1386 | +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) |
|---|
| 1387 | +{ |
|---|
| 1388 | + return 1; |
|---|
| 1389 | +} |
|---|
| 1390 | +#endif |
|---|
| 1259 | 1391 | |
|---|
| 1260 | 1392 | #ifndef CONFIG_HAVE_ARCH_PFN_VALID |
|---|
| 1261 | 1393 | static inline int pfn_valid(unsigned long pfn) |
|---|
| 1262 | 1394 | { |
|---|
| 1395 | + struct mem_section *ms; |
|---|
| 1396 | + |
|---|
| 1263 | 1397 | if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) |
|---|
| 1264 | 1398 | return 0; |
|---|
| 1265 | | - return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); |
|---|
| 1399 | + ms = __nr_to_section(pfn_to_section_nr(pfn)); |
|---|
| 1400 | + if (!valid_section(ms)) |
|---|
| 1401 | + return 0; |
|---|
| 1402 | + /* |
|---|
| 1403 | + * Traditionally early sections always returned pfn_valid() for |
|---|
| 1404 | + * the entire section-sized span. |
|---|
| 1405 | + */ |
|---|
| 1406 | + return early_section(ms) || pfn_section_valid(ms, pfn); |
|---|
| 1266 | 1407 | } |
|---|
| 1267 | 1408 | #endif |
|---|
| 1268 | 1409 | |
|---|
| 1269 | | -static inline int pfn_present(unsigned long pfn) |
|---|
| 1410 | +static inline int pfn_in_present_section(unsigned long pfn) |
|---|
| 1270 | 1411 | { |
|---|
| 1271 | 1412 | if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) |
|---|
| 1272 | 1413 | return 0; |
|---|
| 1273 | 1414 | return present_section(__nr_to_section(pfn_to_section_nr(pfn))); |
|---|
| 1415 | +} |
|---|
| 1416 | + |
|---|
| 1417 | +static inline unsigned long next_present_section_nr(unsigned long section_nr) |
|---|
| 1418 | +{ |
|---|
| 1419 | + while (++section_nr <= __highest_present_section_nr) { |
|---|
| 1420 | + if (present_section_nr(section_nr)) |
|---|
| 1421 | + return section_nr; |
|---|
| 1422 | + } |
|---|
| 1423 | + |
|---|
| 1424 | + return -1; |
|---|
| 1274 | 1425 | } |
|---|
| 1275 | 1426 | |
|---|
| 1276 | 1427 | /* |
|---|
| .. | .. |
|---|
| 1288 | 1439 | #define pfn_to_nid(pfn) (0) |
|---|
| 1289 | 1440 | #endif |
|---|
| 1290 | 1441 | |
|---|
| 1291 | | -#define early_pfn_valid(pfn) pfn_valid(pfn) |
|---|
| 1292 | 1442 | void sparse_init(void); |
|---|
| 1293 | 1443 | #else |
|---|
| 1294 | 1444 | #define sparse_init() do {} while (0) |
|---|
| 1295 | 1445 | #define sparse_index_init(_sec, _nid) do {} while (0) |
|---|
| 1446 | +#define pfn_in_present_section pfn_valid |
|---|
| 1447 | +#define subsection_map_init(_pfn, _nr_pages) do {} while (0) |
|---|
| 1296 | 1448 | #endif /* CONFIG_SPARSEMEM */ |
|---|
| 1297 | 1449 | |
|---|
| 1298 | 1450 | /* |
|---|
| .. | .. |
|---|
| 1306 | 1458 | int last_nid; |
|---|
| 1307 | 1459 | }; |
|---|
| 1308 | 1460 | |
|---|
| 1309 | | -#ifndef early_pfn_valid |
|---|
| 1310 | | -#define early_pfn_valid(pfn) (1) |
|---|
| 1311 | | -#endif |
|---|
| 1312 | | - |
|---|
| 1313 | | -void memory_present(int nid, unsigned long start, unsigned long end); |
|---|
| 1314 | | - |
|---|
| 1315 | 1461 | /* |
|---|
| 1316 | 1462 | * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we |
|---|
| 1317 | | - * need to check pfn validility within that MAX_ORDER_NR_PAGES block. |
|---|
| 1463 | + * need to check pfn validity within that MAX_ORDER_NR_PAGES block. |
|---|
| 1318 | 1464 | * pfn_valid_within() should be used in this case; we optimise this away |
|---|
| 1319 | 1465 | * when we have no holes within a MAX_ORDER_NR_PAGES block. |
|---|
| 1320 | 1466 | */ |
|---|
| .. | .. |
|---|
| 1323 | 1469 | #else |
|---|
| 1324 | 1470 | #define pfn_valid_within(pfn) (1) |
|---|
| 1325 | 1471 | #endif |
|---|
| 1326 | | - |
|---|
| 1327 | | -#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL |
|---|
| 1328 | | -/* |
|---|
| 1329 | | - * pfn_valid() is meant to be able to tell if a given PFN has valid memmap |
|---|
| 1330 | | - * associated with it or not. This means that a struct page exists for this |
|---|
| 1331 | | - * pfn. The caller cannot assume the page is fully initialized in general. |
|---|
| 1332 | | - * Hotplugable pages might not have been onlined yet. pfn_to_online_page() |
|---|
| 1333 | | - * will ensure the struct page is fully online and initialized. Special pages |
|---|
| 1334 | | - * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. |
|---|
| 1335 | | - * |
|---|
| 1336 | | - * In FLATMEM, it is expected that holes always have valid memmap as long as |
|---|
| 1337 | | - * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed |
|---|
| 1338 | | - * that a valid section has a memmap for the entire section. |
|---|
| 1339 | | - * |
|---|
| 1340 | | - * However, an ARM, and maybe other embedded architectures in the future |
|---|
| 1341 | | - * free memmap backing holes to save memory on the assumption the memmap is |
|---|
| 1342 | | - * never used. The page_zone linkages are then broken even though pfn_valid() |
|---|
| 1343 | | - * returns true. A walker of the full memmap must then do this additional |
|---|
| 1344 | | - * check to ensure the memmap they are looking at is sane by making sure |
|---|
| 1345 | | - * the zone and PFN linkages are still valid. This is expensive, but walkers |
|---|
| 1346 | | - * of the full memmap are extremely rare. |
|---|
| 1347 | | - */ |
|---|
| 1348 | | -bool memmap_valid_within(unsigned long pfn, |
|---|
| 1349 | | - struct page *page, struct zone *zone); |
|---|
| 1350 | | -#else |
|---|
| 1351 | | -static inline bool memmap_valid_within(unsigned long pfn, |
|---|
| 1352 | | - struct page *page, struct zone *zone) |
|---|
| 1353 | | -{ |
|---|
| 1354 | | - return true; |
|---|
| 1355 | | -} |
|---|
| 1356 | | -#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ |
|---|
| 1357 | 1472 | |
|---|
| 1358 | 1473 | #endif /* !__GENERATING_BOUNDS.H */ |
|---|
| 1359 | 1474 | #endif /* !__ASSEMBLY__ */ |
|---|