.. | .. |
---|
18 | 18 | #include <linux/pageblock-flags.h> |
---|
19 | 19 | #include <linux/page-flags-layout.h> |
---|
20 | 20 | #include <linux/atomic.h> |
---|
| 21 | +#include <linux/mm_types.h> |
---|
| 22 | +#include <linux/page-flags.h> |
---|
21 | 23 | #include <linux/android_kabi.h> |
---|
22 | 24 | #include <asm/page.h> |
---|
23 | 25 | |
---|
.. | .. |
---|
36 | 38 | * will not. |
---|
37 | 39 | */ |
---|
38 | 40 | #define PAGE_ALLOC_COSTLY_ORDER 3 |
---|
| 41 | + |
---|
| 42 | +#define MAX_KSWAPD_THREADS 16 |
---|
39 | 43 | |
---|
40 | 44 | enum migratetype { |
---|
41 | 45 | MIGRATE_UNMOVABLE, |
---|
.. | .. |
---|
66 | 70 | }; |
---|
67 | 71 | |
---|
68 | 72 | /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ |
---|
69 | | -extern char * const migratetype_names[MIGRATE_TYPES]; |
---|
| 73 | +extern const char * const migratetype_names[MIGRATE_TYPES]; |
---|
70 | 74 | |
---|
71 | 75 | #ifdef CONFIG_CMA |
---|
72 | 76 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) |
---|
.. | .. |
---|
89 | 93 | |
---|
90 | 94 | extern int page_group_by_mobility_disabled; |
---|
91 | 95 | |
---|
92 | | -#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) |
---|
93 | | -#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) |
---|
| 96 | +#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1) |
---|
94 | 97 | |
---|
95 | 98 | #define get_pageblock_migratetype(page) \ |
---|
96 | | - get_pfnblock_flags_mask(page, page_to_pfn(page), \ |
---|
97 | | - PB_migrate_end, MIGRATETYPE_MASK) |
---|
| 99 | + get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK) |
---|
98 | 100 | |
---|
99 | 101 | struct free_area { |
---|
100 | 102 | struct list_head free_list[MIGRATE_TYPES]; |
---|
101 | 103 | unsigned long nr_free; |
---|
102 | 104 | }; |
---|
| 105 | + |
---|
| 106 | +static inline struct page *get_page_from_free_area(struct free_area *area, |
---|
| 107 | + int migratetype) |
---|
| 108 | +{ |
---|
| 109 | + return list_first_entry_or_null(&area->free_list[migratetype], |
---|
| 110 | + struct page, lru); |
---|
| 111 | +} |
---|
| 112 | + |
---|
| 113 | +static inline bool free_area_empty(struct free_area *area, int migratetype) |
---|
| 114 | +{ |
---|
| 115 | + return list_empty(&area->free_list[migratetype]); |
---|
| 116 | +} |
---|
103 | 117 | |
---|
104 | 118 | struct pglist_data; |
---|
105 | 119 | |
---|
.. | .. |
---|
144 | 158 | NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ |
---|
145 | 159 | NR_MLOCK, /* mlock()ed pages found and moved off LRU */ |
---|
146 | 160 | NR_PAGETABLE, /* used for pagetables */ |
---|
147 | | - NR_KERNEL_STACK_KB, /* measured in KiB */ |
---|
148 | | -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) |
---|
149 | | - NR_KERNEL_SCS_BYTES, /* measured in bytes */ |
---|
150 | | -#endif |
---|
151 | 161 | /* Second 128 byte cacheline */ |
---|
152 | 162 | NR_BOUNCE, |
---|
153 | | -#if IS_ENABLED(CONFIG_ZSMALLOC) |
---|
154 | 163 | NR_ZSPAGES, /* allocated in zsmalloc */ |
---|
155 | | -#endif |
---|
156 | 164 | NR_FREE_CMA_PAGES, |
---|
157 | 165 | NR_VM_ZONE_STAT_ITEMS }; |
---|
158 | 166 | |
---|
.. | .. |
---|
163 | 171 | NR_INACTIVE_FILE, /* " " " " " */ |
---|
164 | 172 | NR_ACTIVE_FILE, /* " " " " " */ |
---|
165 | 173 | NR_UNEVICTABLE, /* " " " " " */ |
---|
166 | | - NR_SLAB_RECLAIMABLE, |
---|
167 | | - NR_SLAB_UNRECLAIMABLE, |
---|
| 174 | + NR_SLAB_RECLAIMABLE_B, |
---|
| 175 | + NR_SLAB_UNRECLAIMABLE_B, |
---|
168 | 176 | NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ |
---|
169 | 177 | NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ |
---|
170 | | - WORKINGSET_REFAULT, |
---|
171 | | - WORKINGSET_ACTIVATE, |
---|
172 | | - WORKINGSET_RESTORE, |
---|
| 178 | + WORKINGSET_NODES, |
---|
| 179 | + WORKINGSET_REFAULT_BASE, |
---|
| 180 | + WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, |
---|
| 181 | + WORKINGSET_REFAULT_FILE, |
---|
| 182 | + WORKINGSET_ACTIVATE_BASE, |
---|
| 183 | + WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, |
---|
| 184 | + WORKINGSET_ACTIVATE_FILE, |
---|
| 185 | + WORKINGSET_RESTORE_BASE, |
---|
| 186 | + WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, |
---|
| 187 | + WORKINGSET_RESTORE_FILE, |
---|
173 | 188 | WORKINGSET_NODERECLAIM, |
---|
174 | 189 | NR_ANON_MAPPED, /* Mapped anonymous pages */ |
---|
175 | 190 | NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. |
---|
.. | .. |
---|
181 | 196 | NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ |
---|
182 | 197 | NR_SHMEM_THPS, |
---|
183 | 198 | NR_SHMEM_PMDMAPPED, |
---|
| 199 | + NR_FILE_THPS, |
---|
| 200 | + NR_FILE_PMDMAPPED, |
---|
184 | 201 | NR_ANON_THPS, |
---|
185 | | - NR_UNSTABLE_NFS, /* NFS unstable pages */ |
---|
186 | 202 | NR_VMSCAN_WRITE, |
---|
187 | 203 | NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ |
---|
188 | 204 | NR_DIRTIED, /* page dirtyings since bootup */ |
---|
189 | 205 | NR_WRITTEN, /* page writings since bootup */ |
---|
190 | 206 | NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ |
---|
191 | | - NR_UNRECLAIMABLE_PAGES, |
---|
192 | | - NR_ION_HEAP, |
---|
193 | | - NR_ION_HEAP_POOL, |
---|
194 | | - NR_GPU_HEAP, |
---|
| 207 | + NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ |
---|
| 208 | + NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ |
---|
| 209 | + NR_KERNEL_STACK_KB, /* measured in KiB */ |
---|
| 210 | +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) |
---|
| 211 | + NR_KERNEL_SCS_KB, /* measured in KiB */ |
---|
| 212 | +#endif |
---|
195 | 213 | NR_VM_NODE_STAT_ITEMS |
---|
196 | 214 | }; |
---|
| 215 | + |
---|
| 216 | +/* |
---|
| 217 | + * Returns true if the value is measured in bytes (most vmstat values are |
---|
| 218 | + * measured in pages). This defines the API part, the internal representation |
---|
| 219 | + * might be different. |
---|
| 220 | + */ |
---|
| 221 | +static __always_inline bool vmstat_item_in_bytes(int idx) |
---|
| 222 | +{ |
---|
| 223 | + /* |
---|
| 224 | + * Global and per-node slab counters track slab pages. |
---|
| 225 | + * It's expected that changes are multiples of PAGE_SIZE. |
---|
| 226 | + * Internally values are stored in pages. |
---|
| 227 | + * |
---|
| 228 | + * Per-memcg and per-lruvec counters track memory, consumed |
---|
| 229 | + * by individual slab objects. These counters are actually |
---|
| 230 | + * byte-precise. |
---|
| 231 | + */ |
---|
| 232 | + return (idx == NR_SLAB_RECLAIMABLE_B || |
---|
| 233 | + idx == NR_SLAB_UNRECLAIMABLE_B); |
---|
| 234 | +} |
---|
197 | 235 | |
---|
198 | 236 | /* |
---|
199 | 237 | * We do arithmetic on the LRU lists in various places in the code, |
---|
.. | .. |
---|
221 | 259 | |
---|
222 | 260 | #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) |
---|
223 | 261 | |
---|
224 | | -static inline int is_file_lru(enum lru_list lru) |
---|
| 262 | +static inline bool is_file_lru(enum lru_list lru) |
---|
225 | 263 | { |
---|
226 | 264 | return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); |
---|
227 | 265 | } |
---|
228 | 266 | |
---|
229 | | -static inline int is_active_lru(enum lru_list lru) |
---|
| 267 | +static inline bool is_active_lru(enum lru_list lru) |
---|
230 | 268 | { |
---|
231 | 269 | return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); |
---|
232 | 270 | } |
---|
233 | 271 | |
---|
234 | | -struct zone_reclaim_stat { |
---|
235 | | - /* |
---|
236 | | - * The pageout code in vmscan.c keeps track of how many of the |
---|
237 | | - * mem/swap backed and file backed pages are referenced. |
---|
238 | | - * The higher the rotated/scanned ratio, the more valuable |
---|
239 | | - * that cache is. |
---|
240 | | - * |
---|
241 | | - * The anon LRU stats live in [0], file LRU stats in [1] |
---|
242 | | - */ |
---|
243 | | - unsigned long recent_rotated[2]; |
---|
244 | | - unsigned long recent_scanned[2]; |
---|
| 272 | +#define ANON_AND_FILE 2 |
---|
| 273 | + |
---|
| 274 | +enum lruvec_flags { |
---|
| 275 | + LRUVEC_CONGESTED, /* lruvec has many dirty pages |
---|
| 276 | + * backed by a congested BDI |
---|
| 277 | + */ |
---|
245 | 278 | }; |
---|
246 | 279 | |
---|
247 | 280 | struct lruvec { |
---|
248 | 281 | struct list_head lists[NR_LRU_LISTS]; |
---|
249 | | - struct zone_reclaim_stat reclaim_stat; |
---|
250 | | - /* Evictions & activations on the inactive file list */ |
---|
251 | | - atomic_long_t inactive_age; |
---|
| 282 | + /* |
---|
| 283 | + * These track the cost of reclaiming one LRU - file or anon - |
---|
| 284 | + * over the other. As the observed cost of reclaiming one LRU |
---|
| 285 | + * increases, the reclaim scan balance tips toward the other. |
---|
| 286 | + */ |
---|
| 287 | + unsigned long anon_cost; |
---|
| 288 | + unsigned long file_cost; |
---|
| 289 | + /* Non-resident age, driven by LRU movement */ |
---|
| 290 | + atomic_long_t nonresident_age; |
---|
252 | 291 | /* Refaults at the time of last reclaim cycle */ |
---|
253 | | - unsigned long refaults; |
---|
| 292 | + unsigned long refaults[ANON_AND_FILE]; |
---|
| 293 | + /* Various lruvec state flags (enum lruvec_flags) */ |
---|
| 294 | + unsigned long flags; |
---|
254 | 295 | #ifdef CONFIG_MEMCG |
---|
255 | 296 | struct pglist_data *pgdat; |
---|
256 | 297 | #endif |
---|
257 | 298 | }; |
---|
258 | 299 | |
---|
259 | | -/* Mask used at gathering information at once (see memcontrol.c) */ |
---|
260 | | -#define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) |
---|
261 | | -#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) |
---|
262 | | -#define LRU_ALL ((1 << NR_LRU_LISTS) - 1) |
---|
263 | | - |
---|
264 | | -/* Isolate unmapped file */ |
---|
| 300 | +/* Isolate unmapped pages */ |
---|
265 | 301 | #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) |
---|
266 | 302 | /* Isolate for asynchronous migration */ |
---|
267 | 303 | #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) |
---|
.. | .. |
---|
278 | 314 | NR_WMARK |
---|
279 | 315 | }; |
---|
280 | 316 | |
---|
281 | | -#define min_wmark_pages(z) (z->watermark[WMARK_MIN]) |
---|
282 | | -#define low_wmark_pages(z) (z->watermark[WMARK_LOW]) |
---|
283 | | -#define high_wmark_pages(z) (z->watermark[WMARK_HIGH]) |
---|
| 317 | +#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) |
---|
| 318 | +#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) |
---|
| 319 | +#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) |
---|
| 320 | +#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) |
---|
284 | 321 | |
---|
285 | 322 | struct per_cpu_pages { |
---|
286 | 323 | int count; /* number of pages in the list */ |
---|
.. | .. |
---|
311 | 348 | #endif /* !__GENERATING_BOUNDS.H */ |
---|
312 | 349 | |
---|
313 | 350 | enum zone_type { |
---|
314 | | -#ifdef CONFIG_ZONE_DMA |
---|
315 | 351 | /* |
---|
316 | | - * ZONE_DMA is used when there are devices that are not able |
---|
317 | | - * to do DMA to all of addressable memory (ZONE_NORMAL). Then we |
---|
318 | | - * carve out the portion of memory that is needed for these devices. |
---|
319 | | - * The range is arch specific. |
---|
320 | | - * |
---|
321 | | - * Some examples |
---|
322 | | - * |
---|
323 | | - * Architecture Limit |
---|
324 | | - * --------------------------- |
---|
325 | | - * parisc, ia64, sparc <4G |
---|
326 | | - * s390 <2G |
---|
327 | | - * arm Various |
---|
328 | | - * alpha Unlimited or 0-16MB. |
---|
329 | | - * |
---|
330 | | - * i386, x86_64 and multiple other arches |
---|
331 | | - * <16M. |
---|
| 352 | + * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able |
---|
| 353 | + * to DMA to all of the addressable memory (ZONE_NORMAL). |
---|
| 354 | + * On architectures where this area covers the whole 32 bit address |
---|
| 355 | + * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller |
---|
| 356 | + * DMA addressing constraints. This distinction is important as a 32bit |
---|
| 357 | + * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit |
---|
| 358 | + * platforms may need both zones as they support peripherals with |
---|
| 359 | + * different DMA addressing limitations. |
---|
332 | 360 | */ |
---|
| 361 | +#ifdef CONFIG_ZONE_DMA |
---|
333 | 362 | ZONE_DMA, |
---|
334 | 363 | #endif |
---|
335 | 364 | #ifdef CONFIG_ZONE_DMA32 |
---|
336 | | - /* |
---|
337 | | - * x86_64 needs two ZONE_DMAs because it supports devices that are |
---|
338 | | - * only able to do DMA to the lower 16M but also 32 bit devices that |
---|
339 | | - * can only do DMA areas below 4G. |
---|
340 | | - */ |
---|
341 | 365 | ZONE_DMA32, |
---|
342 | 366 | #endif |
---|
343 | 367 | /* |
---|
.. | .. |
---|
357 | 381 | */ |
---|
358 | 382 | ZONE_HIGHMEM, |
---|
359 | 383 | #endif |
---|
| 384 | + /* |
---|
| 385 | + * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains |
---|
| 386 | + * movable pages with few exceptional cases described below. Main use |
---|
| 387 | + * cases for ZONE_MOVABLE are to make memory offlining/unplug more |
---|
| 388 | + * likely to succeed, and to locally limit unmovable allocations - e.g., |
---|
| 389 | + * to increase the number of THP/huge pages. Notable special cases are: |
---|
| 390 | + * |
---|
| 391 | + * 1. Pinned pages: (long-term) pinning of movable pages might |
---|
| 392 | + * essentially turn such pages unmovable. Memory offlining might |
---|
| 393 | + * retry a long time. |
---|
| 394 | + * 2. memblock allocations: kernelcore/movablecore setups might create |
---|
| 395 | + * situations where ZONE_MOVABLE contains unmovable allocations |
---|
| 396 | + * after boot. Memory offlining and allocations fail early. |
---|
| 397 | + * 3. Memory holes: kernelcore/movablecore setups might create very rare |
---|
| 398 | + * situations where ZONE_MOVABLE contains memory holes after boot, |
---|
| 399 | + * for example, if we have sections that are only partially |
---|
| 400 | + * populated. Memory offlining and allocations fail early. |
---|
| 401 | + * 4. PG_hwpoison pages: while poisoned pages can be skipped during |
---|
| 402 | + * memory offlining, such pages cannot be allocated. |
---|
| 403 | + * 5. Unmovable PG_offline pages: in paravirtualized environments, |
---|
| 404 | + * hotplugged memory blocks might only partially be managed by the |
---|
| 405 | + * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The |
---|
| 406 | + * parts not manged by the buddy are unmovable PG_offline pages. In |
---|
| 407 | + * some cases (virtio-mem), such pages can be skipped during |
---|
| 408 | + * memory offlining, however, cannot be moved/allocated. These |
---|
| 409 | + * techniques might use alloc_contig_range() to hide previously |
---|
| 410 | + * exposed pages from the buddy again (e.g., to implement some sort |
---|
| 411 | + * of memory unplug in virtio-mem). |
---|
| 412 | + * |
---|
| 413 | + * In general, no unmovable allocations that degrade memory offlining |
---|
| 414 | + * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) |
---|
| 415 | + * have to expect that migrating pages in ZONE_MOVABLE can fail (even |
---|
| 416 | + * if has_unmovable_pages() states that there are no unmovable pages, |
---|
| 417 | + * there can be false negatives). |
---|
| 418 | + */ |
---|
360 | 419 | ZONE_MOVABLE, |
---|
361 | 420 | #ifdef CONFIG_ZONE_DEVICE |
---|
362 | 421 | ZONE_DEVICE, |
---|
.. | .. |
---|
367 | 426 | |
---|
368 | 427 | #ifndef __GENERATING_BOUNDS_H |
---|
369 | 428 | |
---|
| 429 | +#define ASYNC_AND_SYNC 2 |
---|
| 430 | + |
---|
370 | 431 | struct zone { |
---|
371 | 432 | /* Read-mostly fields */ |
---|
372 | 433 | |
---|
373 | 434 | /* zone watermarks, access with *_wmark_pages(zone) macros */ |
---|
374 | | - unsigned long watermark[NR_WMARK]; |
---|
| 435 | + unsigned long _watermark[NR_WMARK]; |
---|
| 436 | + unsigned long watermark_boost; |
---|
375 | 437 | |
---|
376 | 438 | unsigned long nr_reserved_highatomic; |
---|
377 | 439 | |
---|
.. | .. |
---|
386 | 448 | */ |
---|
387 | 449 | long lowmem_reserve[MAX_NR_ZONES]; |
---|
388 | 450 | |
---|
389 | | -#ifdef CONFIG_NUMA |
---|
| 451 | +#ifdef CONFIG_NEED_MULTIPLE_NODES |
---|
390 | 452 | int node; |
---|
391 | 453 | #endif |
---|
392 | 454 | struct pglist_data *zone_pgdat; |
---|
393 | 455 | struct per_cpu_pageset __percpu *pageset; |
---|
394 | | - |
---|
395 | | -#ifdef CONFIG_CMA |
---|
396 | | - bool cma_alloc; |
---|
397 | | -#endif |
---|
398 | 456 | |
---|
399 | 457 | #ifndef CONFIG_SPARSEMEM |
---|
400 | 458 | /* |
---|
.. | .. |
---|
421 | 479 | * bootmem allocator): |
---|
422 | 480 | * managed_pages = present_pages - reserved_pages; |
---|
423 | 481 | * |
---|
| 482 | + * cma pages is present pages that are assigned for CMA use |
---|
| 483 | + * (MIGRATE_CMA). |
---|
| 484 | + * |
---|
424 | 485 | * So present_pages may be used by memory hotplug or memory power |
---|
425 | 486 | * management logic to figure out unmanaged pages by checking |
---|
426 | 487 | * (present_pages - managed_pages). And managed_pages should be used |
---|
.. | .. |
---|
441 | 502 | * Write access to present_pages at runtime should be protected by |
---|
442 | 503 | * mem_hotplug_begin/end(). Any reader who can't tolerant drift of |
---|
443 | 504 | * present_pages should get_online_mems() to get a stable value. |
---|
444 | | - * |
---|
445 | | - * Read access to managed_pages should be safe because it's unsigned |
---|
446 | | - * long. Write access to zone->managed_pages and totalram_pages are |
---|
447 | | - * protected by managed_page_count_lock at runtime. Idealy only |
---|
448 | | - * adjust_managed_page_count() should be used instead of directly |
---|
449 | | - * touching zone->managed_pages and totalram_pages. |
---|
450 | 505 | */ |
---|
451 | | - unsigned long managed_pages; |
---|
| 506 | + atomic_long_t managed_pages; |
---|
452 | 507 | unsigned long spanned_pages; |
---|
453 | 508 | unsigned long present_pages; |
---|
| 509 | +#ifdef CONFIG_CMA |
---|
| 510 | + unsigned long cma_pages; |
---|
| 511 | +#endif |
---|
454 | 512 | |
---|
455 | 513 | const char *name; |
---|
456 | 514 | |
---|
.. | .. |
---|
495 | 553 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA |
---|
496 | 554 | /* pfn where compaction free scanner should start */ |
---|
497 | 555 | unsigned long compact_cached_free_pfn; |
---|
498 | | - /* pfn where async and sync compaction migration scanner should start */ |
---|
499 | | - unsigned long compact_cached_migrate_pfn[2]; |
---|
| 556 | + /* pfn where compaction migration scanner should start */ |
---|
| 557 | + unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC]; |
---|
| 558 | + unsigned long compact_init_migrate_pfn; |
---|
| 559 | + unsigned long compact_init_free_pfn; |
---|
500 | 560 | #endif |
---|
501 | 561 | |
---|
502 | 562 | #ifdef CONFIG_COMPACTION |
---|
.. | .. |
---|
504 | 564 | * On compaction failure, 1<<compact_defer_shift compactions |
---|
505 | 565 | * are skipped before trying again. The number attempted since |
---|
506 | 566 | * last failure is tracked with compact_considered. |
---|
| 567 | + * compact_order_failed is the minimum compaction failed order. |
---|
507 | 568 | */ |
---|
508 | 569 | unsigned int compact_considered; |
---|
509 | 570 | unsigned int compact_defer_shift; |
---|
.. | .. |
---|
529 | 590 | } ____cacheline_internodealigned_in_smp; |
---|
530 | 591 | |
---|
531 | 592 | enum pgdat_flags { |
---|
532 | | - PGDAT_CONGESTED, /* pgdat has many dirty pages backed by |
---|
533 | | - * a congested BDI |
---|
534 | | - */ |
---|
535 | 593 | PGDAT_DIRTY, /* reclaim scanning has recently found |
---|
536 | 594 | * many dirty file pages at the tail |
---|
537 | 595 | * of the LRU. |
---|
.. | .. |
---|
541 | 599 | */ |
---|
542 | 600 | PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ |
---|
543 | 601 | }; |
---|
| 602 | + |
---|
| 603 | +enum zone_flags { |
---|
| 604 | + ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks. |
---|
| 605 | + * Cleared when kswapd is woken. |
---|
| 606 | + */ |
---|
| 607 | +}; |
---|
| 608 | + |
---|
| 609 | +static inline unsigned long zone_managed_pages(struct zone *zone) |
---|
| 610 | +{ |
---|
| 611 | + return (unsigned long)atomic_long_read(&zone->managed_pages); |
---|
| 612 | +} |
---|
| 613 | + |
---|
| 614 | +static inline unsigned long zone_cma_pages(struct zone *zone) |
---|
| 615 | +{ |
---|
| 616 | +#ifdef CONFIG_CMA |
---|
| 617 | + return zone->cma_pages; |
---|
| 618 | +#else |
---|
| 619 | + return 0; |
---|
| 620 | +#endif |
---|
| 621 | +} |
---|
544 | 622 | |
---|
545 | 623 | static inline unsigned long zone_end_pfn(const struct zone *zone) |
---|
546 | 624 | { |
---|
.. | .. |
---|
632 | 710 | extern struct page *mem_map; |
---|
633 | 711 | #endif |
---|
634 | 712 | |
---|
| 713 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
| 714 | +struct deferred_split { |
---|
| 715 | + spinlock_t split_queue_lock; |
---|
| 716 | + struct list_head split_queue; |
---|
| 717 | + unsigned long split_queue_len; |
---|
| 718 | +}; |
---|
| 719 | +#endif |
---|
| 720 | + |
---|
635 | 721 | /* |
---|
636 | 722 | * On NUMA machines, each NUMA node would have a pg_data_t to describe |
---|
637 | 723 | * it's memory layout. On UMA machines there is a single pglist_data which |
---|
.. | .. |
---|
640 | 726 | * Memory statistics and page replacement data structures are maintained on a |
---|
641 | 727 | * per-zone basis. |
---|
642 | 728 | */ |
---|
643 | | -struct bootmem_data; |
---|
644 | 729 | typedef struct pglist_data { |
---|
| 730 | + /* |
---|
| 731 | + * node_zones contains just the zones for THIS node. Not all of the |
---|
| 732 | + * zones may be populated, but it is the full list. It is referenced by |
---|
| 733 | + * this node's node_zonelists as well as other node's node_zonelists. |
---|
| 734 | + */ |
---|
645 | 735 | struct zone node_zones[MAX_NR_ZONES]; |
---|
| 736 | + |
---|
| 737 | + /* |
---|
| 738 | + * node_zonelists contains references to all zones in all nodes. |
---|
| 739 | + * Generally the first zones will be references to this node's |
---|
| 740 | + * node_zones. |
---|
| 741 | + */ |
---|
646 | 742 | struct zonelist node_zonelists[MAX_ZONELISTS]; |
---|
647 | | - int nr_zones; |
---|
| 743 | + |
---|
| 744 | + int nr_zones; /* number of populated zones in this node */ |
---|
648 | 745 | #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ |
---|
649 | 746 | struct page *node_mem_map; |
---|
650 | 747 | #ifdef CONFIG_PAGE_EXTENSION |
---|
651 | 748 | struct page_ext *node_page_ext; |
---|
652 | 749 | #endif |
---|
653 | 750 | #endif |
---|
654 | | -#ifndef CONFIG_NO_BOOTMEM |
---|
655 | | - struct bootmem_data *bdata; |
---|
656 | | -#endif |
---|
657 | 751 | #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) |
---|
658 | 752 | /* |
---|
659 | | - * Must be held any time you expect node_start_pfn, node_present_pages |
---|
660 | | - * or node_spanned_pages stay constant. |
---|
| 753 | + * Must be held any time you expect node_start_pfn, |
---|
| 754 | + * node_present_pages, node_spanned_pages or nr_zones to stay constant. |
---|
661 | 755 | * Also synchronizes pgdat->first_deferred_pfn during deferred page |
---|
662 | 756 | * init. |
---|
663 | 757 | * |
---|
.. | .. |
---|
678 | 772 | wait_queue_head_t pfmemalloc_wait; |
---|
679 | 773 | struct task_struct *kswapd; /* Protected by |
---|
680 | 774 | mem_hotplug_begin/end() */ |
---|
| 775 | + struct task_struct *mkswapd[MAX_KSWAPD_THREADS]; |
---|
681 | 776 | int kswapd_order; |
---|
682 | | - enum zone_type kswapd_classzone_idx; |
---|
| 777 | + enum zone_type kswapd_highest_zoneidx; |
---|
683 | 778 | |
---|
684 | 779 | int kswapd_failures; /* Number of 'reclaimed == 0' runs */ |
---|
685 | 780 | |
---|
| 781 | + ANDROID_OEM_DATA(1); |
---|
686 | 782 | #ifdef CONFIG_COMPACTION |
---|
687 | 783 | int kcompactd_max_order; |
---|
688 | | - enum zone_type kcompactd_classzone_idx; |
---|
| 784 | + enum zone_type kcompactd_highest_zoneidx; |
---|
689 | 785 | wait_queue_head_t kcompactd_wait; |
---|
690 | 786 | struct task_struct *kcompactd; |
---|
| 787 | + bool proactive_compact_trigger; |
---|
691 | 788 | #endif |
---|
692 | 789 | /* |
---|
693 | 790 | * This is a per-node reserve of pages that are not available |
---|
.. | .. |
---|
697 | 794 | |
---|
698 | 795 | #ifdef CONFIG_NUMA |
---|
699 | 796 | /* |
---|
700 | | - * zone reclaim becomes active if more unmapped pages exist. |
---|
| 797 | + * node reclaim becomes active if more unmapped pages exist. |
---|
701 | 798 | */ |
---|
702 | 799 | unsigned long min_unmapped_pages; |
---|
703 | 800 | unsigned long min_slab_pages; |
---|
.. | .. |
---|
713 | 810 | * is the first PFN that needs to be initialised. |
---|
714 | 811 | */ |
---|
715 | 812 | unsigned long first_deferred_pfn; |
---|
716 | | - /* Number of non-deferred pages */ |
---|
717 | | - unsigned long static_init_pgcnt; |
---|
718 | 813 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
---|
719 | 814 | |
---|
720 | 815 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
721 | | - spinlock_t split_queue_lock; |
---|
722 | | - struct list_head split_queue; |
---|
723 | | - unsigned long split_queue_len; |
---|
| 816 | + struct deferred_split deferred_split_queue; |
---|
724 | 817 | #endif |
---|
725 | 818 | |
---|
726 | 819 | /* Fields commonly accessed by the page reclaim scanner */ |
---|
727 | | - struct lruvec lruvec; |
---|
| 820 | + |
---|
| 821 | + /* |
---|
| 822 | + * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED. |
---|
| 823 | + * |
---|
| 824 | + * Use mem_cgroup_lruvec() to look up lruvecs. |
---|
| 825 | + */ |
---|
| 826 | + struct lruvec __lruvec; |
---|
728 | 827 | |
---|
729 | 828 | unsigned long flags; |
---|
730 | 829 | |
---|
.. | .. |
---|
746 | 845 | |
---|
747 | 846 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) |
---|
748 | 847 | #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) |
---|
749 | | -static inline spinlock_t *zone_lru_lock(struct zone *zone) |
---|
750 | | -{ |
---|
751 | | - return &zone->zone_pgdat->lru_lock; |
---|
752 | | -} |
---|
753 | | - |
---|
754 | | -static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) |
---|
755 | | -{ |
---|
756 | | - return &pgdat->lruvec; |
---|
757 | | -} |
---|
758 | 848 | |
---|
759 | 849 | static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) |
---|
760 | 850 | { |
---|
.. | .. |
---|
770 | 860 | |
---|
771 | 861 | void build_all_zonelists(pg_data_t *pgdat); |
---|
772 | 862 | void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, |
---|
773 | | - enum zone_type classzone_idx); |
---|
| 863 | + enum zone_type highest_zoneidx); |
---|
774 | 864 | bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, |
---|
775 | | - int classzone_idx, unsigned int alloc_flags, |
---|
| 865 | + int highest_zoneidx, unsigned int alloc_flags, |
---|
776 | 866 | long free_pages); |
---|
777 | 867 | bool zone_watermark_ok(struct zone *z, unsigned int order, |
---|
778 | | - unsigned long mark, int classzone_idx, |
---|
| 868 | + unsigned long mark, int highest_zoneidx, |
---|
779 | 869 | unsigned int alloc_flags); |
---|
780 | 870 | bool zone_watermark_ok_safe(struct zone *z, unsigned int order, |
---|
781 | | - unsigned long mark, int classzone_idx); |
---|
| 871 | + unsigned long mark, int highest_zoneidx); |
---|
782 | 872 | /* |
---|
783 | 873 | * Memory initialization context, use to differentiate memory added by |
---|
784 | 874 | * the platform statically or via memory hotplug interface. |
---|
.. | .. |
---|
798 | 888 | #ifdef CONFIG_MEMCG |
---|
799 | 889 | return lruvec->pgdat; |
---|
800 | 890 | #else |
---|
801 | | - return container_of(lruvec, struct pglist_data, lruvec); |
---|
| 891 | + return container_of(lruvec, struct pglist_data, __lruvec); |
---|
802 | 892 | #endif |
---|
803 | 893 | } |
---|
804 | 894 | |
---|
805 | 895 | extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); |
---|
806 | | - |
---|
807 | | -#ifdef CONFIG_HAVE_MEMORY_PRESENT |
---|
808 | | -void memory_present(int nid, unsigned long start, unsigned long end); |
---|
809 | | -#else |
---|
810 | | -static inline void memory_present(int nid, unsigned long start, unsigned long end) {} |
---|
811 | | -#endif |
---|
812 | 896 | |
---|
813 | 897 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES |
---|
814 | 898 | int local_memory_node(int node_id); |
---|
.. | .. |
---|
821 | 905 | */ |
---|
822 | 906 | #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) |
---|
823 | 907 | |
---|
824 | | -#ifdef CONFIG_ZONE_DEVICE |
---|
825 | | -static inline bool is_dev_zone(const struct zone *zone) |
---|
826 | | -{ |
---|
827 | | - return zone_idx(zone) == ZONE_DEVICE; |
---|
828 | | -} |
---|
829 | | -#else |
---|
830 | | -static inline bool is_dev_zone(const struct zone *zone) |
---|
831 | | -{ |
---|
832 | | - return false; |
---|
833 | | -} |
---|
834 | | -#endif |
---|
835 | | - |
---|
836 | 908 | /* |
---|
837 | 909 | * Returns true if a zone has pages managed by the buddy allocator. |
---|
838 | 910 | * All the reclaim decisions have to use this function rather than |
---|
.. | .. |
---|
841 | 913 | */ |
---|
842 | 914 | static inline bool managed_zone(struct zone *zone) |
---|
843 | 915 | { |
---|
844 | | - return zone->managed_pages; |
---|
| 916 | + return zone_managed_pages(zone); |
---|
845 | 917 | } |
---|
846 | 918 | |
---|
847 | 919 | /* Returns true if a zone has memory */ |
---|
.. | .. |
---|
850 | 922 | return zone->present_pages; |
---|
851 | 923 | } |
---|
852 | 924 | |
---|
853 | | -#ifdef CONFIG_NUMA |
---|
| 925 | +#ifdef CONFIG_NEED_MULTIPLE_NODES |
---|
854 | 926 | static inline int zone_to_nid(struct zone *zone) |
---|
855 | 927 | { |
---|
856 | 928 | return zone->node; |
---|
.. | .. |
---|
874 | 946 | #ifdef CONFIG_HIGHMEM |
---|
875 | 947 | static inline int zone_movable_is_highmem(void) |
---|
876 | 948 | { |
---|
877 | | -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
---|
| 949 | +#ifdef CONFIG_NEED_MULTIPLE_NODES |
---|
878 | 950 | return movable_zone == ZONE_HIGHMEM; |
---|
879 | 951 | #else |
---|
880 | 952 | return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; |
---|
.. | .. |
---|
892 | 964 | #endif |
---|
893 | 965 | } |
---|
894 | 966 | |
---|
| 967 | +#ifdef CONFIG_ZONE_DMA |
---|
| 968 | +bool has_managed_dma(void); |
---|
| 969 | +#else |
---|
| 970 | +static inline bool has_managed_dma(void) |
---|
| 971 | +{ |
---|
| 972 | + return false; |
---|
| 973 | +} |
---|
| 974 | +#endif |
---|
| 975 | + |
---|
895 | 976 | /** |
---|
896 | | - * is_highmem - helper function to quickly check if a struct zone is a |
---|
| 977 | + * is_highmem - helper function to quickly check if a struct zone is a |
---|
897 | 978 | * highmem zone or not. This is an attempt to keep references |
---|
898 | 979 | * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. |
---|
899 | 980 | * @zone - pointer to struct zone variable |
---|
.. | .. |
---|
909 | 990 | |
---|
910 | 991 | /* These two functions are used to setup the per zone pages min values */ |
---|
911 | 992 | struct ctl_table; |
---|
912 | | -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, |
---|
913 | | - void __user *, size_t *, loff_t *); |
---|
914 | | -int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, |
---|
915 | | - void __user *, size_t *, loff_t *); |
---|
916 | | -extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; |
---|
917 | | -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, |
---|
918 | | - void __user *, size_t *, loff_t *); |
---|
919 | | -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, |
---|
920 | | - void __user *, size_t *, loff_t *); |
---|
921 | | -int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, |
---|
922 | | - void __user *, size_t *, loff_t *); |
---|
923 | | -int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, |
---|
924 | | - void __user *, size_t *, loff_t *); |
---|
925 | 993 | |
---|
926 | | -extern int numa_zonelist_order_handler(struct ctl_table *, int, |
---|
927 | | - void __user *, size_t *, loff_t *); |
---|
| 994 | +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *, |
---|
| 995 | + loff_t *); |
---|
| 996 | +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, |
---|
| 997 | + size_t *, loff_t *); |
---|
| 998 | +extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; |
---|
| 999 | +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, |
---|
| 1000 | + size_t *, loff_t *); |
---|
| 1001 | +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, |
---|
| 1002 | + void *, size_t *, loff_t *); |
---|
| 1003 | +int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, |
---|
| 1004 | + void *, size_t *, loff_t *); |
---|
| 1005 | +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, |
---|
| 1006 | + void *, size_t *, loff_t *); |
---|
| 1007 | +int numa_zonelist_order_handler(struct ctl_table *, int, |
---|
| 1008 | + void *, size_t *, loff_t *); |
---|
| 1009 | +extern int percpu_pagelist_fraction; |
---|
928 | 1010 | extern char numa_zonelist_order[]; |
---|
929 | 1011 | #define NUMA_ZONELIST_ORDER_LEN 16 |
---|
930 | 1012 | |
---|
.. | .. |
---|
943 | 1025 | extern struct pglist_data *first_online_pgdat(void); |
---|
944 | 1026 | extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); |
---|
945 | 1027 | extern struct zone *next_zone(struct zone *zone); |
---|
| 1028 | +extern int isolate_anon_lru_page(struct page *page); |
---|
946 | 1029 | |
---|
947 | 1030 | /** |
---|
948 | 1031 | * for_each_online_pgdat - helper macro to iterate over all online nodes |
---|
.. | .. |
---|
1039 | 1122 | /** |
---|
1040 | 1123 | * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask |
---|
1041 | 1124 | * @zone - The current zone in the iterator |
---|
1042 | | - * @z - The current pointer within zonelist->zones being iterated |
---|
| 1125 | + * @z - The current pointer within zonelist->_zonerefs being iterated |
---|
1043 | 1126 | * @zlist - The zonelist being iterated |
---|
1044 | 1127 | * @highidx - The zone index of the highest zone to return |
---|
1045 | 1128 | * @nodemask - Nodemask allowed by the allocator |
---|
.. | .. |
---|
1053 | 1136 | z = next_zones_zonelist(++z, highidx, nodemask), \ |
---|
1054 | 1137 | zone = zonelist_zone(z)) |
---|
1055 | 1138 | |
---|
1056 | | -#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ |
---|
| 1139 | +#define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ |
---|
1057 | 1140 | for (zone = z->zone; \ |
---|
1058 | 1141 | zone; \ |
---|
1059 | 1142 | z = next_zones_zonelist(++z, highidx, nodemask), \ |
---|
.. | .. |
---|
1074 | 1157 | |
---|
1075 | 1158 | #ifdef CONFIG_SPARSEMEM |
---|
1076 | 1159 | #include <asm/sparsemem.h> |
---|
1077 | | -#endif |
---|
1078 | | - |
---|
1079 | | -#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ |
---|
1080 | | - !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) |
---|
1081 | | -static inline unsigned long early_pfn_to_nid(unsigned long pfn) |
---|
1082 | | -{ |
---|
1083 | | - BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)); |
---|
1084 | | - return 0; |
---|
1085 | | -} |
---|
1086 | 1160 | #endif |
---|
1087 | 1161 | |
---|
1088 | 1162 | #ifdef CONFIG_FLATMEM |
---|
.. | .. |
---|
1124 | 1198 | #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) |
---|
1125 | 1199 | #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) |
---|
1126 | 1200 | |
---|
| 1201 | +#define SUBSECTION_SHIFT 21 |
---|
| 1202 | +#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) |
---|
| 1203 | + |
---|
| 1204 | +#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) |
---|
| 1205 | +#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) |
---|
| 1206 | +#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) |
---|
| 1207 | + |
---|
| 1208 | +#if SUBSECTION_SHIFT > SECTION_SIZE_BITS |
---|
| 1209 | +#error Subsection size exceeds section size |
---|
| 1210 | +#else |
---|
| 1211 | +#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) |
---|
| 1212 | +#endif |
---|
| 1213 | + |
---|
| 1214 | +#define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION) |
---|
| 1215 | +#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) |
---|
| 1216 | + |
---|
| 1217 | +struct mem_section_usage { |
---|
| 1218 | +#ifdef CONFIG_SPARSEMEM_VMEMMAP |
---|
| 1219 | + DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); |
---|
| 1220 | +#endif |
---|
| 1221 | + /* See declaration of similar field in struct zone */ |
---|
| 1222 | + unsigned long pageblock_flags[0]; |
---|
| 1223 | +}; |
---|
| 1224 | + |
---|
| 1225 | +void subsection_map_init(unsigned long pfn, unsigned long nr_pages); |
---|
| 1226 | + |
---|
1127 | 1227 | struct page; |
---|
1128 | 1228 | struct page_ext; |
---|
1129 | 1229 | struct mem_section { |
---|
.. | .. |
---|
1141 | 1241 | */ |
---|
1142 | 1242 | unsigned long section_mem_map; |
---|
1143 | 1243 | |
---|
1144 | | - /* See declaration of similar field in struct zone */ |
---|
1145 | | - unsigned long *pageblock_flags; |
---|
| 1244 | + struct mem_section_usage *usage; |
---|
1146 | 1245 | #ifdef CONFIG_PAGE_EXTENSION |
---|
1147 | 1246 | /* |
---|
1148 | 1247 | * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use |
---|
.. | .. |
---|
1173 | 1272 | extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; |
---|
1174 | 1273 | #endif |
---|
1175 | 1274 | |
---|
| 1275 | +static inline unsigned long *section_to_usemap(struct mem_section *ms) |
---|
| 1276 | +{ |
---|
| 1277 | + return ms->usage->pageblock_flags; |
---|
| 1278 | +} |
---|
| 1279 | + |
---|
1176 | 1280 | static inline struct mem_section *__nr_to_section(unsigned long nr) |
---|
1177 | 1281 | { |
---|
| 1282 | + unsigned long root = SECTION_NR_TO_ROOT(nr); |
---|
| 1283 | + |
---|
| 1284 | + if (unlikely(root >= NR_SECTION_ROOTS)) |
---|
| 1285 | + return NULL; |
---|
| 1286 | + |
---|
1178 | 1287 | #ifdef CONFIG_SPARSEMEM_EXTREME |
---|
1179 | | - if (!mem_section) |
---|
| 1288 | + if (!mem_section || !mem_section[root]) |
---|
1180 | 1289 | return NULL; |
---|
1181 | 1290 | #endif |
---|
1182 | | - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) |
---|
1183 | | - return NULL; |
---|
1184 | | - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; |
---|
| 1291 | + return &mem_section[root][nr & SECTION_ROOT_MASK]; |
---|
1185 | 1292 | } |
---|
1186 | | -extern int __section_nr(struct mem_section* ms); |
---|
1187 | | -extern unsigned long usemap_size(void); |
---|
| 1293 | +extern unsigned long __section_nr(struct mem_section *ms); |
---|
| 1294 | +extern size_t mem_section_usage_size(void); |
---|
1188 | 1295 | |
---|
1189 | 1296 | /* |
---|
1190 | 1297 | * We use the lower bits of the mem_map pointer to store |
---|
.. | .. |
---|
1202 | 1309 | #define SECTION_MARKED_PRESENT (1UL<<0) |
---|
1203 | 1310 | #define SECTION_HAS_MEM_MAP (1UL<<1) |
---|
1204 | 1311 | #define SECTION_IS_ONLINE (1UL<<2) |
---|
1205 | | -#define SECTION_MAP_LAST_BIT (1UL<<3) |
---|
| 1312 | +#define SECTION_IS_EARLY (1UL<<3) |
---|
| 1313 | +#define SECTION_MAP_LAST_BIT (1UL<<4) |
---|
1206 | 1314 | #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) |
---|
1207 | 1315 | #define SECTION_NID_SHIFT 3 |
---|
1208 | 1316 | |
---|
.. | .. |
---|
1226 | 1334 | static inline int valid_section(struct mem_section *section) |
---|
1227 | 1335 | { |
---|
1228 | 1336 | return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); |
---|
| 1337 | +} |
---|
| 1338 | + |
---|
| 1339 | +static inline int early_section(struct mem_section *section) |
---|
| 1340 | +{ |
---|
| 1341 | + return (section && (section->section_mem_map & SECTION_IS_EARLY)); |
---|
1229 | 1342 | } |
---|
1230 | 1343 | |
---|
1231 | 1344 | static inline int valid_section_nr(unsigned long nr) |
---|
.. | .. |
---|
1255 | 1368 | return __nr_to_section(pfn_to_section_nr(pfn)); |
---|
1256 | 1369 | } |
---|
1257 | 1370 | |
---|
1258 | | -extern int __highest_present_section_nr; |
---|
| 1371 | +extern unsigned long __highest_present_section_nr; |
---|
| 1372 | + |
---|
| 1373 | +static inline int subsection_map_index(unsigned long pfn) |
---|
| 1374 | +{ |
---|
| 1375 | + return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; |
---|
| 1376 | +} |
---|
| 1377 | + |
---|
| 1378 | +#ifdef CONFIG_SPARSEMEM_VMEMMAP |
---|
| 1379 | +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) |
---|
| 1380 | +{ |
---|
| 1381 | + int idx = subsection_map_index(pfn); |
---|
| 1382 | + |
---|
| 1383 | + return test_bit(idx, ms->usage->subsection_map); |
---|
| 1384 | +} |
---|
| 1385 | +#else |
---|
| 1386 | +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) |
---|
| 1387 | +{ |
---|
| 1388 | + return 1; |
---|
| 1389 | +} |
---|
| 1390 | +#endif |
---|
1259 | 1391 | |
---|
1260 | 1392 | #ifndef CONFIG_HAVE_ARCH_PFN_VALID |
---|
1261 | 1393 | static inline int pfn_valid(unsigned long pfn) |
---|
1262 | 1394 | { |
---|
| 1395 | + struct mem_section *ms; |
---|
| 1396 | + |
---|
1263 | 1397 | if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) |
---|
1264 | 1398 | return 0; |
---|
1265 | | - return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); |
---|
| 1399 | + ms = __nr_to_section(pfn_to_section_nr(pfn)); |
---|
| 1400 | + if (!valid_section(ms)) |
---|
| 1401 | + return 0; |
---|
| 1402 | + /* |
---|
| 1403 | + * Traditionally early sections always returned pfn_valid() for |
---|
| 1404 | + * the entire section-sized span. |
---|
| 1405 | + */ |
---|
| 1406 | + return early_section(ms) || pfn_section_valid(ms, pfn); |
---|
1266 | 1407 | } |
---|
1267 | 1408 | #endif |
---|
1268 | 1409 | |
---|
1269 | | -static inline int pfn_present(unsigned long pfn) |
---|
| 1410 | +static inline int pfn_in_present_section(unsigned long pfn) |
---|
1270 | 1411 | { |
---|
1271 | 1412 | if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) |
---|
1272 | 1413 | return 0; |
---|
1273 | 1414 | return present_section(__nr_to_section(pfn_to_section_nr(pfn))); |
---|
| 1415 | +} |
---|
| 1416 | + |
---|
| 1417 | +static inline unsigned long next_present_section_nr(unsigned long section_nr) |
---|
| 1418 | +{ |
---|
| 1419 | + while (++section_nr <= __highest_present_section_nr) { |
---|
| 1420 | + if (present_section_nr(section_nr)) |
---|
| 1421 | + return section_nr; |
---|
| 1422 | + } |
---|
| 1423 | + |
---|
| 1424 | + return -1; |
---|
1274 | 1425 | } |
---|
1275 | 1426 | |
---|
1276 | 1427 | /* |
---|
.. | .. |
---|
1288 | 1439 | #define pfn_to_nid(pfn) (0) |
---|
1289 | 1440 | #endif |
---|
1290 | 1441 | |
---|
1291 | | -#define early_pfn_valid(pfn) pfn_valid(pfn) |
---|
1292 | 1442 | void sparse_init(void); |
---|
1293 | 1443 | #else |
---|
1294 | 1444 | #define sparse_init() do {} while (0) |
---|
1295 | 1445 | #define sparse_index_init(_sec, _nid) do {} while (0) |
---|
| 1446 | +#define pfn_in_present_section pfn_valid |
---|
| 1447 | +#define subsection_map_init(_pfn, _nr_pages) do {} while (0) |
---|
1296 | 1448 | #endif /* CONFIG_SPARSEMEM */ |
---|
1297 | 1449 | |
---|
1298 | 1450 | /* |
---|
.. | .. |
---|
1306 | 1458 | int last_nid; |
---|
1307 | 1459 | }; |
---|
1308 | 1460 | |
---|
1309 | | -#ifndef early_pfn_valid |
---|
1310 | | -#define early_pfn_valid(pfn) (1) |
---|
1311 | | -#endif |
---|
1312 | | - |
---|
1313 | | -void memory_present(int nid, unsigned long start, unsigned long end); |
---|
1314 | | - |
---|
1315 | 1461 | /* |
---|
1316 | 1462 | * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we |
---|
1317 | | - * need to check pfn validility within that MAX_ORDER_NR_PAGES block. |
---|
| 1463 | + * need to check pfn validity within that MAX_ORDER_NR_PAGES block. |
---|
1318 | 1464 | * pfn_valid_within() should be used in this case; we optimise this away |
---|
1319 | 1465 | * when we have no holes within a MAX_ORDER_NR_PAGES block. |
---|
1320 | 1466 | */ |
---|
.. | .. |
---|
1323 | 1469 | #else |
---|
1324 | 1470 | #define pfn_valid_within(pfn) (1) |
---|
1325 | 1471 | #endif |
---|
1326 | | - |
---|
1327 | | -#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL |
---|
1328 | | -/* |
---|
1329 | | - * pfn_valid() is meant to be able to tell if a given PFN has valid memmap |
---|
1330 | | - * associated with it or not. This means that a struct page exists for this |
---|
1331 | | - * pfn. The caller cannot assume the page is fully initialized in general. |
---|
1332 | | - * Hotplugable pages might not have been onlined yet. pfn_to_online_page() |
---|
1333 | | - * will ensure the struct page is fully online and initialized. Special pages |
---|
1334 | | - * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. |
---|
1335 | | - * |
---|
1336 | | - * In FLATMEM, it is expected that holes always have valid memmap as long as |
---|
1337 | | - * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed |
---|
1338 | | - * that a valid section has a memmap for the entire section. |
---|
1339 | | - * |
---|
1340 | | - * However, an ARM, and maybe other embedded architectures in the future |
---|
1341 | | - * free memmap backing holes to save memory on the assumption the memmap is |
---|
1342 | | - * never used. The page_zone linkages are then broken even though pfn_valid() |
---|
1343 | | - * returns true. A walker of the full memmap must then do this additional |
---|
1344 | | - * check to ensure the memmap they are looking at is sane by making sure |
---|
1345 | | - * the zone and PFN linkages are still valid. This is expensive, but walkers |
---|
1346 | | - * of the full memmap are extremely rare. |
---|
1347 | | - */ |
---|
1348 | | -bool memmap_valid_within(unsigned long pfn, |
---|
1349 | | - struct page *page, struct zone *zone); |
---|
1350 | | -#else |
---|
1351 | | -static inline bool memmap_valid_within(unsigned long pfn, |
---|
1352 | | - struct page *page, struct zone *zone) |
---|
1353 | | -{ |
---|
1354 | | - return true; |
---|
1355 | | -} |
---|
1356 | | -#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ |
---|
1357 | 1472 | |
---|
1358 | 1473 | #endif /* !__GENERATING_BOUNDS.H */ |
---|
1359 | 1474 | #endif /* !__ASSEMBLY__ */ |
---|