.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
---|
1 | 2 | /* internal.h: mm/ internal definitions |
---|
2 | 3 | * |
---|
3 | 4 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. |
---|
4 | 5 | * Written by David Howells (dhowells@redhat.com) |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or |
---|
7 | | - * modify it under the terms of the GNU General Public License |
---|
8 | | - * as published by the Free Software Foundation; either version |
---|
9 | | - * 2 of the License, or (at your option) any later version. |
---|
10 | 6 | */ |
---|
11 | 7 | #ifndef __MM_INTERNAL_H |
---|
12 | 8 | #define __MM_INTERNAL_H |
---|
.. | .. |
---|
40 | 36 | |
---|
41 | 37 | vm_fault_t do_swap_page(struct vm_fault *vmf); |
---|
42 | 38 | |
---|
| 39 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
---|
| 40 | +extern struct vm_area_struct *get_vma(struct mm_struct *mm, |
---|
| 41 | + unsigned long addr); |
---|
| 42 | +extern void put_vma(struct vm_area_struct *vma); |
---|
| 43 | + |
---|
| 44 | +static inline bool vma_has_changed(struct vm_fault *vmf) |
---|
| 45 | +{ |
---|
| 46 | + int ret = RB_EMPTY_NODE(&vmf->vma->vm_rb); |
---|
| 47 | + unsigned int seq = READ_ONCE(vmf->vma->vm_sequence.sequence); |
---|
| 48 | + |
---|
| 49 | + /* |
---|
| 50 | + * Matches both the wmb in write_seqlock_{begin,end}() and |
---|
| 51 | + * the wmb in vma_rb_erase(). |
---|
| 52 | + */ |
---|
| 53 | + smp_rmb(); |
---|
| 54 | + |
---|
| 55 | + return ret || seq != vmf->sequence; |
---|
| 56 | +} |
---|
| 57 | +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ |
---|
| 58 | + |
---|
43 | 59 | void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, |
---|
44 | 60 | unsigned long floor, unsigned long ceiling); |
---|
45 | 61 | |
---|
46 | | -static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma) |
---|
| 62 | +static inline bool can_madv_lru_vma(struct vm_area_struct *vma) |
---|
47 | 63 | { |
---|
48 | 64 | return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)); |
---|
49 | 65 | } |
---|
.. | .. |
---|
53 | 69 | unsigned long addr, unsigned long end, |
---|
54 | 70 | struct zap_details *details); |
---|
55 | 71 | |
---|
56 | | -extern unsigned int __do_page_cache_readahead(struct address_space *mapping, |
---|
57 | | - struct file *filp, pgoff_t offset, unsigned long nr_to_read, |
---|
| 72 | +void do_page_cache_ra(struct readahead_control *, unsigned long nr_to_read, |
---|
58 | 73 | unsigned long lookahead_size); |
---|
59 | | - |
---|
60 | | -/* |
---|
61 | | - * Submit IO for the read-ahead request in file_ra_state. |
---|
62 | | - */ |
---|
63 | | -static inline unsigned long ra_submit(struct file_ra_state *ra, |
---|
64 | | - struct address_space *mapping, struct file *filp) |
---|
| 74 | +void force_page_cache_ra(struct readahead_control *, struct file_ra_state *, |
---|
| 75 | + unsigned long nr); |
---|
| 76 | +static inline void force_page_cache_readahead(struct address_space *mapping, |
---|
| 77 | + struct file *file, pgoff_t index, unsigned long nr_to_read) |
---|
65 | 78 | { |
---|
66 | | - return __do_page_cache_readahead(mapping, filp, |
---|
67 | | - ra->start, ra->size, ra->async_size); |
---|
| 79 | + DEFINE_READAHEAD(ractl, file, mapping, index); |
---|
| 80 | + force_page_cache_ra(&ractl, &file->f_ra, nr_to_read); |
---|
| 81 | +} |
---|
| 82 | + |
---|
| 83 | +struct page *find_get_entry(struct address_space *mapping, pgoff_t index); |
---|
| 84 | +struct page *find_lock_entry(struct address_space *mapping, pgoff_t index); |
---|
| 85 | + |
---|
| 86 | +/** |
---|
| 87 | + * page_evictable - test whether a page is evictable |
---|
| 88 | + * @page: the page to test |
---|
| 89 | + * |
---|
| 90 | + * Test whether page is evictable--i.e., should be placed on active/inactive |
---|
| 91 | + * lists vs unevictable list. |
---|
| 92 | + * |
---|
| 93 | + * Reasons page might not be evictable: |
---|
| 94 | + * (1) page's mapping marked unevictable |
---|
| 95 | + * (2) page is part of an mlocked VMA |
---|
| 96 | + * |
---|
| 97 | + */ |
---|
| 98 | +static inline bool page_evictable(struct page *page) |
---|
| 99 | +{ |
---|
| 100 | + bool ret; |
---|
| 101 | + |
---|
| 102 | + /* Prevent address_space of inode and swap cache from being freed */ |
---|
| 103 | + rcu_read_lock(); |
---|
| 104 | + ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page); |
---|
| 105 | + rcu_read_unlock(); |
---|
| 106 | + return ret; |
---|
68 | 107 | } |
---|
69 | 108 | |
---|
70 | 109 | /* |
---|
.. | .. |
---|
106 | 145 | * between functions involved in allocations, including the alloc_pages* |
---|
107 | 146 | * family of functions. |
---|
108 | 147 | * |
---|
109 | | - * nodemask, migratetype and high_zoneidx are initialized only once in |
---|
| 148 | + * nodemask, migratetype and highest_zoneidx are initialized only once in |
---|
110 | 149 | * __alloc_pages_nodemask() and then never change. |
---|
111 | 150 | * |
---|
112 | | - * zonelist, preferred_zone and classzone_idx are set first in |
---|
| 151 | + * zonelist, preferred_zone and highest_zoneidx are set first in |
---|
113 | 152 | * __alloc_pages_nodemask() for the fast path, and might be later changed |
---|
114 | | - * in __alloc_pages_slowpath(). All other functions pass the whole strucure |
---|
| 153 | + * in __alloc_pages_slowpath(). All other functions pass the whole structure |
---|
115 | 154 | * by a const pointer. |
---|
116 | 155 | */ |
---|
117 | 156 | struct alloc_context { |
---|
.. | .. |
---|
119 | 158 | nodemask_t *nodemask; |
---|
120 | 159 | struct zoneref *preferred_zoneref; |
---|
121 | 160 | int migratetype; |
---|
122 | | - enum zone_type high_zoneidx; |
---|
| 161 | + |
---|
| 162 | + /* |
---|
| 163 | + * highest_zoneidx represents highest usable zone index of |
---|
| 164 | + * the allocation request. Due to the nature of the zone, |
---|
| 165 | + * memory on lower zone than the highest_zoneidx will be |
---|
| 166 | + * protected by lowmem_reserve[highest_zoneidx]. |
---|
| 167 | + * |
---|
| 168 | + * highest_zoneidx is also used by reclaim/compaction to limit |
---|
| 169 | + * the target zone since higher zone than this index cannot be |
---|
| 170 | + * usable for this allocation request. |
---|
| 171 | + */ |
---|
| 172 | + enum zone_type highest_zoneidx; |
---|
123 | 173 | bool spread_dirty_pages; |
---|
124 | 174 | }; |
---|
125 | | - |
---|
126 | | -#define ac_classzone_idx(ac) zonelist_zone_idx(ac->preferred_zoneref) |
---|
127 | 175 | |
---|
128 | 176 | /* |
---|
129 | 177 | * Locate the struct page for both the matching buddy in our |
---|
.. | .. |
---|
161 | 209 | } |
---|
162 | 210 | |
---|
163 | 211 | extern int __isolate_free_page(struct page *page, unsigned int order); |
---|
164 | | -extern void __free_pages_bootmem(struct page *page, unsigned long pfn, |
---|
| 212 | +extern void __putback_isolated_page(struct page *page, unsigned int order, |
---|
| 213 | + int mt); |
---|
| 214 | +extern void memblock_free_pages(struct page *page, unsigned long pfn, |
---|
165 | 215 | unsigned int order); |
---|
| 216 | +extern void __free_pages_core(struct page *page, unsigned int order); |
---|
166 | 217 | extern void prep_compound_page(struct page *page, unsigned int order); |
---|
167 | 218 | extern void post_alloc_hook(struct page *page, unsigned int order, |
---|
168 | 219 | gfp_t gfp_flags); |
---|
169 | 220 | extern int user_min_free_kbytes; |
---|
| 221 | + |
---|
| 222 | +extern void zone_pcp_update(struct zone *zone); |
---|
| 223 | +extern void zone_pcp_reset(struct zone *zone); |
---|
170 | 224 | |
---|
171 | 225 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA |
---|
172 | 226 | |
---|
.. | .. |
---|
183 | 237 | struct compact_control { |
---|
184 | 238 | struct list_head freepages; /* List of free pages to migrate to */ |
---|
185 | 239 | struct list_head migratepages; /* List of pages being migrated */ |
---|
186 | | - struct zone *zone; |
---|
187 | | - unsigned long nr_freepages; /* Number of isolated free pages */ |
---|
188 | | - unsigned long nr_migratepages; /* Number of pages to migrate */ |
---|
189 | | - unsigned long total_migrate_scanned; |
---|
190 | | - unsigned long total_free_scanned; |
---|
| 240 | + unsigned int nr_freepages; /* Number of isolated free pages */ |
---|
| 241 | + unsigned int nr_migratepages; /* Number of pages to migrate */ |
---|
191 | 242 | unsigned long free_pfn; /* isolate_freepages search base */ |
---|
192 | 243 | unsigned long migrate_pfn; /* isolate_migratepages search base */ |
---|
193 | | - unsigned long last_migrated_pfn;/* Not yet flushed page being freed */ |
---|
| 244 | + unsigned long fast_start_pfn; /* a pfn to start linear scan from */ |
---|
| 245 | + struct zone *zone; |
---|
| 246 | + unsigned long total_migrate_scanned; |
---|
| 247 | + unsigned long total_free_scanned; |
---|
| 248 | + unsigned short fast_search_fail;/* failures to use free list searches */ |
---|
| 249 | + short search_order; /* order to start a fast search at */ |
---|
194 | 250 | const gfp_t gfp_mask; /* gfp mask of a direct compactor */ |
---|
195 | 251 | int order; /* order a direct compactor needs */ |
---|
196 | 252 | int migratetype; /* migratetype of direct compactor */ |
---|
197 | 253 | const unsigned int alloc_flags; /* alloc flags of a direct compactor */ |
---|
198 | | - const int classzone_idx; /* zone index of a direct compactor */ |
---|
| 254 | + const int highest_zoneidx; /* zone index of a direct compactor */ |
---|
199 | 255 | enum migrate_mode mode; /* Async or sync migration mode */ |
---|
200 | 256 | bool ignore_skip_hint; /* Scan blocks even if marked skip */ |
---|
201 | 257 | bool no_set_skip_hint; /* Don't mark blocks for skipping */ |
---|
202 | 258 | bool ignore_block_suitable; /* Scan blocks considered unsuitable */ |
---|
203 | 259 | bool direct_compaction; /* False from kcompactd or /proc/... */ |
---|
| 260 | + bool proactive_compaction; /* kcompactd proactive compaction */ |
---|
204 | 261 | bool whole_zone; /* Whole zone should/has been scanned */ |
---|
205 | 262 | bool contended; /* Signal lock or sched contention */ |
---|
206 | | - bool finishing_block; /* Finishing current pageblock */ |
---|
| 263 | + bool rescan; /* Rescanning the same pageblock */ |
---|
| 264 | + bool alloc_contig; /* alloc_contig_range allocation */ |
---|
| 265 | +}; |
---|
| 266 | + |
---|
| 267 | +/* |
---|
| 268 | + * Used in direct compaction when a page should be taken from the freelists |
---|
| 269 | + * immediately when one is created during the free path. |
---|
| 270 | + */ |
---|
| 271 | +struct capture_control { |
---|
| 272 | + struct compact_control *cc; |
---|
| 273 | + struct page *page; |
---|
207 | 274 | }; |
---|
208 | 275 | |
---|
209 | 276 | unsigned long |
---|
.. | .. |
---|
223 | 290 | * page from being allocated in parallel and returning garbage as the order. |
---|
224 | 291 | * If a caller does not hold page_zone(page)->lock, it must guarantee that the |
---|
225 | 292 | * page cannot be allocated or merged in parallel. Alternatively, it must |
---|
226 | | - * handle invalid values gracefully, and use page_order_unsafe() below. |
---|
| 293 | + * handle invalid values gracefully, and use buddy_order_unsafe() below. |
---|
227 | 294 | */ |
---|
228 | | -static inline unsigned int page_order(struct page *page) |
---|
| 295 | +static inline unsigned int buddy_order(struct page *page) |
---|
229 | 296 | { |
---|
230 | 297 | /* PageBuddy() must be checked by the caller */ |
---|
231 | 298 | return page_private(page); |
---|
232 | 299 | } |
---|
233 | 300 | |
---|
234 | 301 | /* |
---|
235 | | - * Like page_order(), but for callers who cannot afford to hold the zone lock. |
---|
| 302 | + * Like buddy_order(), but for callers who cannot afford to hold the zone lock. |
---|
236 | 303 | * PageBuddy() should be checked first by the caller to minimize race window, |
---|
237 | 304 | * and invalid values must be handled gracefully. |
---|
238 | 305 | * |
---|
.. | .. |
---|
242 | 309 | * times, potentially observing different values in the tests and the actual |
---|
243 | 310 | * use of the result. |
---|
244 | 311 | */ |
---|
245 | | -#define page_order_unsafe(page) READ_ONCE(page_private(page)) |
---|
| 312 | +#define buddy_order_unsafe(page) READ_ONCE(page_private(page)) |
---|
246 | 313 | |
---|
247 | 314 | static inline bool is_cow_mapping(vm_flags_t flags) |
---|
248 | 315 | { |
---|
.. | .. |
---|
282 | 349 | |
---|
283 | 350 | /* mm/util.c */ |
---|
284 | 351 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, |
---|
285 | | - struct vm_area_struct *prev, struct rb_node *rb_parent); |
---|
| 352 | + struct vm_area_struct *prev); |
---|
| 353 | +void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma); |
---|
286 | 354 | |
---|
287 | 355 | #ifdef CONFIG_MMU |
---|
288 | 356 | extern long populate_vma_page_range(struct vm_area_struct *vma, |
---|
.. | .. |
---|
295 | 363 | } |
---|
296 | 364 | |
---|
297 | 365 | /* |
---|
298 | | - * must be called with vma's mmap_sem held for read or write, and page locked. |
---|
| 366 | + * must be called with vma's mmap_lock held for read or write, and page locked. |
---|
299 | 367 | */ |
---|
300 | 368 | extern void mlock_vma_page(struct page *page); |
---|
301 | 369 | extern unsigned int munlock_vma_page(struct page *page); |
---|
.. | .. |
---|
319 | 387 | static inline void mlock_migrate_page(struct page *newpage, struct page *page) |
---|
320 | 388 | { |
---|
321 | 389 | if (TestClearPageMlocked(page)) { |
---|
322 | | - int nr_pages = hpage_nr_pages(page); |
---|
| 390 | + int nr_pages = thp_nr_pages(page); |
---|
323 | 391 | |
---|
324 | 392 | /* Holding pmd lock, no change in irq context: __mod is safe */ |
---|
325 | 393 | __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); |
---|
.. | .. |
---|
350 | 418 | if (address < vma->vm_start || address >= vma->vm_end) |
---|
351 | 419 | address = -EFAULT; |
---|
352 | 420 | } else if (PageHead(page) && |
---|
353 | | - pgoff + (1UL << compound_order(page)) - 1 >= vma->vm_pgoff) { |
---|
| 421 | + pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) { |
---|
354 | 422 | /* Test above avoids possibility of wrap to 0 on 32-bit */ |
---|
355 | 423 | address = vma->vm_start; |
---|
356 | 424 | } else { |
---|
.. | .. |
---|
371 | 439 | unsigned long address; |
---|
372 | 440 | |
---|
373 | 441 | VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ |
---|
374 | | - pgoff = page_to_pgoff(page) + (1UL << compound_order(page)); |
---|
| 442 | + pgoff = page_to_pgoff(page) + compound_nr(page); |
---|
375 | 443 | address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
---|
376 | 444 | /* Check for address beyond vma (or wrapped through 0?) */ |
---|
377 | 445 | if (address < vma->vm_start || address > vma->vm_end) |
---|
378 | 446 | address = vma->vm_end; |
---|
379 | 447 | return address; |
---|
| 448 | +} |
---|
| 449 | + |
---|
| 450 | +static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, |
---|
| 451 | + struct file *fpin) |
---|
| 452 | +{ |
---|
| 453 | + int flags = vmf->flags; |
---|
| 454 | + |
---|
| 455 | + if (fpin) |
---|
| 456 | + return fpin; |
---|
| 457 | + |
---|
| 458 | + /* |
---|
| 459 | + * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or |
---|
| 460 | + * anything, so we only pin the file and drop the mmap_lock if only |
---|
| 461 | + * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt. |
---|
| 462 | + */ |
---|
| 463 | + if (fault_flag_allow_retry_first(flags) && |
---|
| 464 | + !(flags & FAULT_FLAG_RETRY_NOWAIT)) { |
---|
| 465 | + fpin = get_file(vmf->vma->vm_file); |
---|
| 466 | + mmap_read_unlock(vmf->vma->vm_mm); |
---|
| 467 | + } |
---|
| 468 | + return fpin; |
---|
380 | 469 | } |
---|
381 | 470 | |
---|
382 | 471 | #else /* !CONFIG_MMU */ |
---|
.. | .. |
---|
493 | 582 | unsigned long, unsigned long); |
---|
494 | 583 | |
---|
495 | 584 | extern void set_pageblock_order(void); |
---|
496 | | -unsigned long reclaim_clean_pages_from_list(struct zone *zone, |
---|
| 585 | +unsigned int reclaim_clean_pages_from_list(struct zone *zone, |
---|
497 | 586 | struct list_head *page_list); |
---|
498 | 587 | /* The ALLOC_WMARK bits are used as an index to zone->watermark */ |
---|
499 | 588 | #define ALLOC_WMARK_MIN WMARK_MIN |
---|
.. | .. |
---|
515 | 604 | #define ALLOC_OOM ALLOC_NO_WATERMARKS |
---|
516 | 605 | #endif |
---|
517 | 606 | |
---|
518 | | -#define ALLOC_HARDER 0x10 /* try to alloc harder */ |
---|
519 | | -#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ |
---|
520 | | -#define ALLOC_CPUSET 0x40 /* check for correct cpuset */ |
---|
521 | | -#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ |
---|
| 607 | +#define ALLOC_HARDER 0x10 /* try to alloc harder */ |
---|
| 608 | +#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ |
---|
| 609 | +#define ALLOC_CPUSET 0x40 /* check for correct cpuset */ |
---|
| 610 | +#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ |
---|
| 611 | +#ifdef CONFIG_ZONE_DMA32 |
---|
| 612 | +#define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */ |
---|
| 613 | +#else |
---|
| 614 | +#define ALLOC_NOFRAGMENT 0x0 |
---|
| 615 | +#endif |
---|
| 616 | +#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ |
---|
522 | 617 | |
---|
523 | 618 | enum ttu_flags; |
---|
524 | 619 | struct tlbflush_unmap_batch; |
---|
.. | .. |
---|
561 | 656 | } |
---|
562 | 657 | |
---|
563 | 658 | void setup_zone_pageset(struct zone *zone); |
---|
564 | | -extern struct page *alloc_new_node_page(struct page *page, unsigned long node); |
---|
| 659 | + |
---|
| 660 | +struct migration_target_control { |
---|
| 661 | + int nid; /* preferred node id */ |
---|
| 662 | + nodemask_t *nmask; |
---|
| 663 | + gfp_t gfp_mask; |
---|
| 664 | +}; |
---|
| 665 | + |
---|
565 | 666 | #endif /* __MM_INTERNAL_H */ |
---|