| .. | .. |
|---|
| 15 | 15 | #include <linux/atomic.h> |
|---|
| 16 | 16 | #include <linux/debug_locks.h> |
|---|
| 17 | 17 | #include <linux/mm_types.h> |
|---|
| 18 | +#include <linux/mmap_lock.h> |
|---|
| 18 | 19 | #include <linux/range.h> |
|---|
| 19 | 20 | #include <linux/pfn.h> |
|---|
| 20 | 21 | #include <linux/percpu-refcount.h> |
|---|
| .. | .. |
|---|
| 23 | 24 | #include <linux/resource.h> |
|---|
| 24 | 25 | #include <linux/page_ext.h> |
|---|
| 25 | 26 | #include <linux/err.h> |
|---|
| 27 | +#include <linux/page-flags.h> |
|---|
| 26 | 28 | #include <linux/page_ref.h> |
|---|
| 27 | 29 | #include <linux/memremap.h> |
|---|
| 28 | 30 | #include <linux/overflow.h> |
|---|
| 31 | +#include <linux/sizes.h> |
|---|
| 32 | +#include <linux/sched.h> |
|---|
| 33 | +#include <linux/pgtable.h> |
|---|
| 34 | +#include <linux/kasan.h> |
|---|
| 35 | +#include <linux/page_pinner.h> |
|---|
| 29 | 36 | #include <linux/android_kabi.h> |
|---|
| 30 | 37 | |
|---|
| 31 | 38 | struct mempolicy; |
|---|
| .. | .. |
|---|
| 35 | 42 | struct user_struct; |
|---|
| 36 | 43 | struct writeback_control; |
|---|
| 37 | 44 | struct bdi_writeback; |
|---|
| 45 | +struct pt_regs; |
|---|
| 46 | + |
|---|
| 47 | +extern int sysctl_page_lock_unfairness; |
|---|
| 38 | 48 | |
|---|
| 39 | 49 | void init_mm_internals(void); |
|---|
| 40 | 50 | |
|---|
| .. | .. |
|---|
| 49 | 59 | static inline void set_max_mapnr(unsigned long limit) { } |
|---|
| 50 | 60 | #endif |
|---|
| 51 | 61 | |
|---|
| 52 | | -extern unsigned long totalram_pages; |
|---|
| 62 | +extern atomic_long_t _totalram_pages; |
|---|
| 63 | +static inline unsigned long totalram_pages(void) |
|---|
| 64 | +{ |
|---|
| 65 | + return (unsigned long)atomic_long_read(&_totalram_pages); |
|---|
| 66 | +} |
|---|
| 67 | + |
|---|
| 68 | +static inline void totalram_pages_inc(void) |
|---|
| 69 | +{ |
|---|
| 70 | + atomic_long_inc(&_totalram_pages); |
|---|
| 71 | +} |
|---|
| 72 | + |
|---|
| 73 | +static inline void totalram_pages_dec(void) |
|---|
| 74 | +{ |
|---|
| 75 | + atomic_long_dec(&_totalram_pages); |
|---|
| 76 | +} |
|---|
| 77 | + |
|---|
| 78 | +static inline void totalram_pages_add(long count) |
|---|
| 79 | +{ |
|---|
| 80 | + atomic_long_add(count, &_totalram_pages); |
|---|
| 81 | +} |
|---|
| 82 | + |
|---|
| 53 | 83 | extern void * high_memory; |
|---|
| 54 | 84 | extern int page_cluster; |
|---|
| 55 | 85 | |
|---|
| .. | .. |
|---|
| 71 | 101 | #endif |
|---|
| 72 | 102 | |
|---|
| 73 | 103 | #include <asm/page.h> |
|---|
| 74 | | -#include <asm/pgtable.h> |
|---|
| 75 | 104 | #include <asm/processor.h> |
|---|
| 76 | 105 | |
|---|
| 77 | 106 | /* |
|---|
| .. | .. |
|---|
| 87 | 116 | |
|---|
| 88 | 117 | #ifndef __pa_symbol |
|---|
| 89 | 118 | #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) |
|---|
| 119 | +#endif |
|---|
| 120 | + |
|---|
| 121 | +#ifndef __va_function |
|---|
| 122 | +#define __va_function(x) (x) |
|---|
| 123 | +#endif |
|---|
| 124 | + |
|---|
| 125 | +#ifndef __pa_function |
|---|
| 126 | +#define __pa_function(x) __pa_symbol(x) |
|---|
| 90 | 127 | #endif |
|---|
| 91 | 128 | |
|---|
| 92 | 129 | #ifndef page_to_virt |
|---|
| .. | .. |
|---|
| 110 | 147 | |
|---|
| 111 | 148 | /* |
|---|
| 112 | 149 | * On some architectures it is expensive to call memset() for small sizes. |
|---|
| 113 | | - * Those architectures should provide their own implementation of "struct page" |
|---|
| 114 | | - * zeroing by defining this macro in <asm/pgtable.h>. |
|---|
| 150 | + * If an architecture decides to implement their own version of |
|---|
| 151 | + * mm_zero_struct_page they should wrap the defines below in a #ifndef and |
|---|
| 152 | + * define their own version of this macro in <asm/pgtable.h> |
|---|
| 115 | 153 | */ |
|---|
| 116 | | -#ifndef mm_zero_struct_page |
|---|
| 154 | +#if BITS_PER_LONG == 64 |
|---|
| 155 | +/* This function must be updated when the size of struct page grows above 80 |
|---|
| 156 | + * or reduces below 56. The idea that compiler optimizes out switch() |
|---|
| 157 | + * statement, and only leaves move/store instructions. Also the compiler can |
|---|
| 158 | + * combine write statments if they are both assignments and can be reordered, |
|---|
| 159 | + * this can result in several of the writes here being dropped. |
|---|
| 160 | + */ |
|---|
| 161 | +#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp) |
|---|
| 162 | +static inline void __mm_zero_struct_page(struct page *page) |
|---|
| 163 | +{ |
|---|
| 164 | + unsigned long *_pp = (void *)page; |
|---|
| 165 | + |
|---|
| 166 | + /* Check that struct page is either 56, 64, 72, or 80 bytes */ |
|---|
| 167 | + BUILD_BUG_ON(sizeof(struct page) & 7); |
|---|
| 168 | + BUILD_BUG_ON(sizeof(struct page) < 56); |
|---|
| 169 | + BUILD_BUG_ON(sizeof(struct page) > 80); |
|---|
| 170 | + |
|---|
| 171 | + switch (sizeof(struct page)) { |
|---|
| 172 | + case 80: |
|---|
| 173 | + _pp[9] = 0; |
|---|
| 174 | + fallthrough; |
|---|
| 175 | + case 72: |
|---|
| 176 | + _pp[8] = 0; |
|---|
| 177 | + fallthrough; |
|---|
| 178 | + case 64: |
|---|
| 179 | + _pp[7] = 0; |
|---|
| 180 | + fallthrough; |
|---|
| 181 | + case 56: |
|---|
| 182 | + _pp[6] = 0; |
|---|
| 183 | + _pp[5] = 0; |
|---|
| 184 | + _pp[4] = 0; |
|---|
| 185 | + _pp[3] = 0; |
|---|
| 186 | + _pp[2] = 0; |
|---|
| 187 | + _pp[1] = 0; |
|---|
| 188 | + _pp[0] = 0; |
|---|
| 189 | + } |
|---|
| 190 | +} |
|---|
| 191 | +#else |
|---|
| 117 | 192 | #define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page))) |
|---|
| 118 | 193 | #endif |
|---|
| 119 | 194 | |
|---|
| .. | .. |
|---|
| 145 | 220 | extern int sysctl_overcommit_ratio; |
|---|
| 146 | 221 | extern unsigned long sysctl_overcommit_kbytes; |
|---|
| 147 | 222 | |
|---|
| 148 | | -extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *, |
|---|
| 149 | | - size_t *, loff_t *); |
|---|
| 150 | | -extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, |
|---|
| 151 | | - size_t *, loff_t *); |
|---|
| 223 | +int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *, |
|---|
| 224 | + loff_t *); |
|---|
| 225 | +int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, |
|---|
| 226 | + loff_t *); |
|---|
| 227 | +int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, |
|---|
| 228 | + loff_t *); |
|---|
| 152 | 229 | |
|---|
| 153 | 230 | #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) |
|---|
| 154 | 231 | |
|---|
| .. | .. |
|---|
| 157 | 234 | |
|---|
| 158 | 235 | /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ |
|---|
| 159 | 236 | #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) |
|---|
| 237 | + |
|---|
| 238 | +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) |
|---|
| 160 | 239 | |
|---|
| 161 | 240 | /* |
|---|
| 162 | 241 | * Linux kernel virtual memory manager primitives. |
|---|
| .. | .. |
|---|
| 267 | 346 | #elif defined(CONFIG_SPARC64) |
|---|
| 268 | 347 | # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ |
|---|
| 269 | 348 | # define VM_ARCH_CLEAR VM_SPARC_ADI |
|---|
| 349 | +#elif defined(CONFIG_ARM64) |
|---|
| 350 | +# define VM_ARM64_BTI VM_ARCH_1 /* BTI guarded page, a.k.a. GP bit */ |
|---|
| 351 | +# define VM_ARCH_CLEAR VM_ARM64_BTI |
|---|
| 270 | 352 | #elif !defined(CONFIG_MMU) |
|---|
| 271 | 353 | # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ |
|---|
| 272 | 354 | #endif |
|---|
| 273 | 355 | |
|---|
| 274 | | -#if defined(CONFIG_X86_INTEL_MPX) |
|---|
| 275 | | -/* MPX specific bounds table or bounds directory */ |
|---|
| 276 | | -# define VM_MPX VM_HIGH_ARCH_4 |
|---|
| 356 | +#if defined(CONFIG_ARM64_MTE) |
|---|
| 357 | +# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ |
|---|
| 358 | +# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ |
|---|
| 277 | 359 | #else |
|---|
| 278 | | -# define VM_MPX VM_NONE |
|---|
| 360 | +# define VM_MTE VM_NONE |
|---|
| 361 | +# define VM_MTE_ALLOWED VM_NONE |
|---|
| 279 | 362 | #endif |
|---|
| 280 | 363 | |
|---|
| 281 | 364 | #ifndef VM_GROWSUP |
|---|
| 282 | 365 | # define VM_GROWSUP VM_NONE |
|---|
| 283 | 366 | #endif |
|---|
| 284 | 367 | |
|---|
| 368 | +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR |
|---|
| 369 | +# define VM_UFFD_MINOR_BIT 37 |
|---|
| 370 | +# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ |
|---|
| 371 | +#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ |
|---|
| 372 | +# define VM_UFFD_MINOR VM_NONE |
|---|
| 373 | +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ |
|---|
| 374 | + |
|---|
| 285 | 375 | /* Bits set in the VMA until the stack is in its final location */ |
|---|
| 286 | 376 | #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) |
|---|
| 377 | + |
|---|
| 378 | +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) |
|---|
| 379 | + |
|---|
| 380 | +/* Common data flag combinations */ |
|---|
| 381 | +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ |
|---|
| 382 | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) |
|---|
| 383 | +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ |
|---|
| 384 | + VM_MAYWRITE | VM_MAYEXEC) |
|---|
| 385 | +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ |
|---|
| 386 | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) |
|---|
| 387 | + |
|---|
| 388 | +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ |
|---|
| 389 | +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC |
|---|
| 390 | +#endif |
|---|
| 287 | 391 | |
|---|
| 288 | 392 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ |
|---|
| 289 | 393 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS |
|---|
| .. | .. |
|---|
| 297 | 401 | |
|---|
| 298 | 402 | #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) |
|---|
| 299 | 403 | |
|---|
| 404 | +/* VMA basic access permission flags */ |
|---|
| 405 | +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) |
|---|
| 406 | + |
|---|
| 407 | + |
|---|
| 300 | 408 | /* |
|---|
| 301 | 409 | * Special vmas that are non-mergable, non-mlock()able. |
|---|
| 302 | | - * Note: mm/huge_memory.c VM_NO_THP depends on this definition. |
|---|
| 303 | 410 | */ |
|---|
| 304 | 411 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) |
|---|
| 412 | + |
|---|
| 413 | +/* This mask prevents VMA from being scanned with khugepaged */ |
|---|
| 414 | +#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) |
|---|
| 305 | 415 | |
|---|
| 306 | 416 | /* This mask defines which mm->def_flags a process can inherit its parent */ |
|---|
| 307 | 417 | #define VM_INIT_DEF_MASK VM_NOHUGEPAGE |
|---|
| .. | .. |
|---|
| 321 | 431 | */ |
|---|
| 322 | 432 | extern pgprot_t protection_map[16]; |
|---|
| 323 | 433 | |
|---|
| 324 | | -#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ |
|---|
| 325 | | -#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */ |
|---|
| 326 | | -#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */ |
|---|
| 327 | | -#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */ |
|---|
| 328 | | -#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ |
|---|
| 329 | | -#define FAULT_FLAG_TRIED 0x20 /* Second try */ |
|---|
| 330 | | -#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ |
|---|
| 331 | | -#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ |
|---|
| 332 | | -#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ |
|---|
| 434 | +/** |
|---|
| 435 | + * Fault flag definitions. |
|---|
| 436 | + * |
|---|
| 437 | + * @FAULT_FLAG_WRITE: Fault was a write fault. |
|---|
| 438 | + * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. |
|---|
| 439 | + * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. |
|---|
| 440 | + * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. |
|---|
| 441 | + * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. |
|---|
| 442 | + * @FAULT_FLAG_TRIED: The fault has been tried once. |
|---|
| 443 | + * @FAULT_FLAG_USER: The fault originated in userspace. |
|---|
| 444 | + * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. |
|---|
| 445 | + * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. |
|---|
| 446 | + * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. |
|---|
| 447 | + * |
|---|
| 448 | + * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify |
|---|
| 449 | + * whether we would allow page faults to retry by specifying these two |
|---|
| 450 | + * fault flags correctly. Currently there can be three legal combinations: |
|---|
| 451 | + * |
|---|
| 452 | + * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and |
|---|
| 453 | + * this is the first try |
|---|
| 454 | + * |
|---|
| 455 | + * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and |
|---|
| 456 | + * we've already tried at least once |
|---|
| 457 | + * |
|---|
| 458 | + * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry |
|---|
| 459 | + * |
|---|
| 460 | + * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never |
|---|
| 461 | + * be used. Note that page faults can be allowed to retry for multiple times, |
|---|
| 462 | + * in which case we'll have an initial fault with flags (a) then later on |
|---|
| 463 | + * continuous faults with flags (b). We should always try to detect pending |
|---|
| 464 | + * signals before a retry to make sure the continuous page faults can still be |
|---|
| 465 | + * interrupted if necessary. |
|---|
| 466 | + */ |
|---|
| 467 | +#define FAULT_FLAG_WRITE 0x01 |
|---|
| 468 | +#define FAULT_FLAG_MKWRITE 0x02 |
|---|
| 469 | +#define FAULT_FLAG_ALLOW_RETRY 0x04 |
|---|
| 470 | +#define FAULT_FLAG_RETRY_NOWAIT 0x08 |
|---|
| 471 | +#define FAULT_FLAG_KILLABLE 0x10 |
|---|
| 472 | +#define FAULT_FLAG_TRIED 0x20 |
|---|
| 473 | +#define FAULT_FLAG_USER 0x40 |
|---|
| 474 | +#define FAULT_FLAG_REMOTE 0x80 |
|---|
| 475 | +#define FAULT_FLAG_INSTRUCTION 0x100 |
|---|
| 476 | +#define FAULT_FLAG_INTERRUPTIBLE 0x200 |
|---|
| 477 | +/* Speculative fault, not holding mmap_sem */ |
|---|
| 478 | +#define FAULT_FLAG_SPECULATIVE 0x400 |
|---|
| 479 | + |
|---|
| 480 | +/* |
|---|
| 481 | + * The default fault flags that should be used by most of the |
|---|
| 482 | + * arch-specific page fault handlers. |
|---|
| 483 | + */ |
|---|
| 484 | +#define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY | \ |
|---|
| 485 | + FAULT_FLAG_KILLABLE | \ |
|---|
| 486 | + FAULT_FLAG_INTERRUPTIBLE) |
|---|
| 487 | + |
|---|
| 488 | +/** |
|---|
| 489 | + * fault_flag_allow_retry_first - check ALLOW_RETRY the first time |
|---|
| 490 | + * |
|---|
| 491 | + * This is mostly used for places where we want to try to avoid taking |
|---|
| 492 | + * the mmap_lock for too long a time when waiting for another condition |
|---|
| 493 | + * to change, in which case we can try to be polite to release the |
|---|
| 494 | + * mmap_lock in the first round to avoid potential starvation of other |
|---|
| 495 | + * processes that would also want the mmap_lock. |
|---|
| 496 | + * |
|---|
| 497 | + * Return: true if the page fault allows retry and this is the first |
|---|
| 498 | + * attempt of the fault handling; false otherwise. |
|---|
| 499 | + */ |
|---|
| 500 | +static inline bool fault_flag_allow_retry_first(unsigned int flags) |
|---|
| 501 | +{ |
|---|
| 502 | + return (flags & FAULT_FLAG_ALLOW_RETRY) && |
|---|
| 503 | + (!(flags & FAULT_FLAG_TRIED)); |
|---|
| 504 | +} |
|---|
| 333 | 505 | |
|---|
| 334 | 506 | #define FAULT_FLAG_TRACE \ |
|---|
| 335 | 507 | { FAULT_FLAG_WRITE, "WRITE" }, \ |
|---|
| .. | .. |
|---|
| 340 | 512 | { FAULT_FLAG_TRIED, "TRIED" }, \ |
|---|
| 341 | 513 | { FAULT_FLAG_USER, "USER" }, \ |
|---|
| 342 | 514 | { FAULT_FLAG_REMOTE, "REMOTE" }, \ |
|---|
| 343 | | - { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" } |
|---|
| 515 | + { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ |
|---|
| 516 | + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } |
|---|
| 344 | 517 | |
|---|
| 345 | 518 | /* |
|---|
| 346 | | - * vm_fault is filled by the the pagefault handler and passed to the vma's |
|---|
| 519 | + * vm_fault is filled by the pagefault handler and passed to the vma's |
|---|
| 347 | 520 | * ->fault function. The vma's ->fault is responsible for returning a bitmask |
|---|
| 348 | 521 | * of VM_FAULT_xxx flags that give details about how the fault was handled. |
|---|
| 349 | 522 | * |
|---|
| .. | .. |
|---|
| 353 | 526 | * pgoff should be used in favour of virtual_address, if possible. |
|---|
| 354 | 527 | */ |
|---|
| 355 | 528 | struct vm_fault { |
|---|
| 356 | | - struct vm_area_struct *vma; /* Target VMA */ |
|---|
| 357 | | - unsigned int flags; /* FAULT_FLAG_xxx flags */ |
|---|
| 358 | | - gfp_t gfp_mask; /* gfp mask to be used for allocations */ |
|---|
| 359 | | - pgoff_t pgoff; /* Logical page offset based on vma */ |
|---|
| 360 | | - unsigned long address; /* Faulting virtual address */ |
|---|
| 529 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
|---|
| 530 | + unsigned int sequence; |
|---|
| 531 | + pmd_t orig_pmd; /* value of PMD at the time of fault */ |
|---|
| 532 | +#endif |
|---|
| 533 | + const struct { |
|---|
| 534 | + struct vm_area_struct *vma; /* Target VMA */ |
|---|
| 535 | + gfp_t gfp_mask; /* gfp mask to be used for allocations */ |
|---|
| 536 | + pgoff_t pgoff; /* Logical page offset based on vma */ |
|---|
| 537 | + unsigned long address; /* Faulting virtual address */ |
|---|
| 538 | + }; |
|---|
| 539 | + unsigned int flags; /* FAULT_FLAG_xxx flags |
|---|
| 540 | + * XXX: should really be 'const' */ |
|---|
| 361 | 541 | pmd_t *pmd; /* Pointer to pmd entry matching |
|---|
| 362 | 542 | * the 'address' */ |
|---|
| 363 | 543 | pud_t *pud; /* Pointer to pud entry matching |
|---|
| .. | .. |
|---|
| 366 | 546 | pte_t orig_pte; /* Value of PTE at the time of fault */ |
|---|
| 367 | 547 | |
|---|
| 368 | 548 | struct page *cow_page; /* Page handler may use for COW fault */ |
|---|
| 369 | | - struct mem_cgroup *memcg; /* Cgroup cow_page belongs to */ |
|---|
| 370 | 549 | struct page *page; /* ->fault handlers should return a |
|---|
| 371 | 550 | * page here, unless VM_FAULT_NOPAGE |
|---|
| 372 | 551 | * is set (which is also implied by |
|---|
| .. | .. |
|---|
| 382 | 561 | * is not NULL, otherwise pmd. |
|---|
| 383 | 562 | */ |
|---|
| 384 | 563 | pgtable_t prealloc_pte; /* Pre-allocated pte page table. |
|---|
| 385 | | - * vm_ops->map_pages() calls |
|---|
| 386 | | - * alloc_set_pte() from atomic context. |
|---|
| 564 | + * vm_ops->map_pages() sets up a page |
|---|
| 565 | + * table from atomic context. |
|---|
| 387 | 566 | * do_fault_around() pre-allocates |
|---|
| 388 | 567 | * page table to avoid allocation from |
|---|
| 389 | 568 | * atomic context. |
|---|
| 390 | 569 | */ |
|---|
| 570 | + /* |
|---|
| 571 | + * These entries are required when handling speculative page fault. |
|---|
| 572 | + * This way the page handling is done using consistent field values. |
|---|
| 573 | + */ |
|---|
| 574 | + unsigned long vma_flags; |
|---|
| 575 | + pgprot_t vma_page_prot; |
|---|
| 576 | + ANDROID_OEM_DATA_ARRAY(1, 2); |
|---|
| 391 | 577 | }; |
|---|
| 392 | 578 | |
|---|
| 393 | 579 | /* page entry size for vm->huge_fault() */ |
|---|
| .. | .. |
|---|
| 410 | 596 | vm_fault_t (*fault)(struct vm_fault *vmf); |
|---|
| 411 | 597 | vm_fault_t (*huge_fault)(struct vm_fault *vmf, |
|---|
| 412 | 598 | enum page_entry_size pe_size); |
|---|
| 413 | | - void (*map_pages)(struct vm_fault *vmf, |
|---|
| 599 | + vm_fault_t (*map_pages)(struct vm_fault *vmf, |
|---|
| 414 | 600 | pgoff_t start_pgoff, pgoff_t end_pgoff); |
|---|
| 415 | 601 | unsigned long (*pagesize)(struct vm_area_struct * area); |
|---|
| 416 | 602 | |
|---|
| .. | .. |
|---|
| 447 | 633 | * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure |
|---|
| 448 | 634 | * in mm/mempolicy.c will do this automatically. |
|---|
| 449 | 635 | * get_policy() must NOT add a ref if the policy at (vma,addr) is not |
|---|
| 450 | | - * marked as MPOL_SHARED. vma policies are protected by the mmap_sem. |
|---|
| 636 | + * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. |
|---|
| 451 | 637 | * If no [shared/vma] mempolicy exists at the addr, get_policy() op |
|---|
| 452 | 638 | * must return NULL--i.e., do not "fallback" to task or system default |
|---|
| 453 | 639 | * policy. |
|---|
| .. | .. |
|---|
| 463 | 649 | struct page *(*find_special_page)(struct vm_area_struct *vma, |
|---|
| 464 | 650 | unsigned long addr); |
|---|
| 465 | 651 | |
|---|
| 652 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
|---|
| 653 | + bool (*allow_speculation)(void); |
|---|
| 654 | +#endif |
|---|
| 655 | + |
|---|
| 466 | 656 | ANDROID_KABI_RESERVE(1); |
|---|
| 467 | 657 | ANDROID_KABI_RESERVE(2); |
|---|
| 468 | 658 | ANDROID_KABI_RESERVE(3); |
|---|
| 469 | 659 | ANDROID_KABI_RESERVE(4); |
|---|
| 470 | 660 | }; |
|---|
| 661 | + |
|---|
| 662 | +static inline void INIT_VMA(struct vm_area_struct *vma) |
|---|
| 663 | +{ |
|---|
| 664 | + INIT_LIST_HEAD(&vma->anon_vma_chain); |
|---|
| 665 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
|---|
| 666 | + seqcount_init(&vma->vm_sequence); |
|---|
| 667 | + atomic_set(&vma->vm_ref_count, 1); |
|---|
| 668 | +#endif |
|---|
| 669 | +} |
|---|
| 471 | 670 | |
|---|
| 472 | 671 | static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) |
|---|
| 473 | 672 | { |
|---|
| .. | .. |
|---|
| 476 | 675 | memset(vma, 0, sizeof(*vma)); |
|---|
| 477 | 676 | vma->vm_mm = mm; |
|---|
| 478 | 677 | vma->vm_ops = &dummy_vm_ops; |
|---|
| 479 | | - INIT_LIST_HEAD(&vma->anon_vma_chain); |
|---|
| 678 | + INIT_VMA(vma); |
|---|
| 480 | 679 | } |
|---|
| 481 | 680 | |
|---|
| 482 | 681 | static inline void vma_set_anonymous(struct vm_area_struct *vma) |
|---|
| .. | .. |
|---|
| 484 | 683 | vma->vm_ops = NULL; |
|---|
| 485 | 684 | } |
|---|
| 486 | 685 | |
|---|
| 686 | +static inline bool vma_is_anonymous(struct vm_area_struct *vma) |
|---|
| 687 | +{ |
|---|
| 688 | + return !vma->vm_ops; |
|---|
| 689 | +} |
|---|
| 690 | + |
|---|
| 691 | +static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) |
|---|
| 692 | +{ |
|---|
| 693 | + int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); |
|---|
| 694 | + |
|---|
| 695 | + if (!maybe_stack) |
|---|
| 696 | + return false; |
|---|
| 697 | + |
|---|
| 698 | + if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == |
|---|
| 699 | + VM_STACK_INCOMPLETE_SETUP) |
|---|
| 700 | + return true; |
|---|
| 701 | + |
|---|
| 702 | + return false; |
|---|
| 703 | +} |
|---|
| 704 | + |
|---|
| 705 | +static inline bool vma_is_foreign(struct vm_area_struct *vma) |
|---|
| 706 | +{ |
|---|
| 707 | + if (!current->mm) |
|---|
| 708 | + return true; |
|---|
| 709 | + |
|---|
| 710 | + if (current->mm != vma->vm_mm) |
|---|
| 711 | + return true; |
|---|
| 712 | + |
|---|
| 713 | + return false; |
|---|
| 714 | +} |
|---|
| 715 | + |
|---|
| 716 | +static inline bool vma_is_accessible(struct vm_area_struct *vma) |
|---|
| 717 | +{ |
|---|
| 718 | + return vma->vm_flags & VM_ACCESS_FLAGS; |
|---|
| 719 | +} |
|---|
| 720 | + |
|---|
| 721 | +#ifdef CONFIG_SHMEM |
|---|
| 722 | +/* |
|---|
| 723 | + * The vma_is_shmem is not inline because it is used only by slow |
|---|
| 724 | + * paths in userfault. |
|---|
| 725 | + */ |
|---|
| 726 | +bool vma_is_shmem(struct vm_area_struct *vma); |
|---|
| 727 | +#else |
|---|
| 728 | +static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } |
|---|
| 729 | +#endif |
|---|
| 730 | + |
|---|
| 731 | +int vma_is_stack_for_current(struct vm_area_struct *vma); |
|---|
| 732 | + |
|---|
| 487 | 733 | /* flush_tlb_range() takes a vma, not a mm, and can care about flags */ |
|---|
| 488 | 734 | #define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) } |
|---|
| 489 | 735 | |
|---|
| 490 | 736 | struct mmu_gather; |
|---|
| 491 | 737 | struct inode; |
|---|
| 492 | 738 | |
|---|
| 493 | | -#define page_private(page) ((page)->private) |
|---|
| 494 | | -#define set_page_private(page, v) ((page)->private = (v)) |
|---|
| 495 | | - |
|---|
| 496 | | -#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) |
|---|
| 497 | | -static inline int pmd_devmap(pmd_t pmd) |
|---|
| 498 | | -{ |
|---|
| 499 | | - return 0; |
|---|
| 500 | | -} |
|---|
| 501 | | -static inline int pud_devmap(pud_t pud) |
|---|
| 502 | | -{ |
|---|
| 503 | | - return 0; |
|---|
| 504 | | -} |
|---|
| 505 | | -static inline int pgd_devmap(pgd_t pgd) |
|---|
| 506 | | -{ |
|---|
| 507 | | - return 0; |
|---|
| 508 | | -} |
|---|
| 509 | | -#endif |
|---|
| 510 | | - |
|---|
| 511 | | -/* |
|---|
| 512 | | - * FIXME: take this include out, include page-flags.h in |
|---|
| 513 | | - * files which need it (119 of them) |
|---|
| 514 | | - */ |
|---|
| 515 | | -#include <linux/page-flags.h> |
|---|
| 516 | 739 | #include <linux/huge_mm.h> |
|---|
| 517 | 740 | |
|---|
| 518 | 741 | /* |
|---|
| .. | .. |
|---|
| 533 | 756 | */ |
|---|
| 534 | 757 | static inline int put_page_testzero(struct page *page) |
|---|
| 535 | 758 | { |
|---|
| 759 | + int ret; |
|---|
| 760 | + |
|---|
| 536 | 761 | VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); |
|---|
| 537 | | - return page_ref_dec_and_test(page); |
|---|
| 762 | + ret = page_ref_dec_and_test(page); |
|---|
| 763 | + page_pinner_put_page(page); |
|---|
| 764 | + |
|---|
| 765 | + return ret; |
|---|
| 538 | 766 | } |
|---|
| 539 | 767 | |
|---|
| 540 | 768 | /* |
|---|
| .. | .. |
|---|
| 569 | 797 | * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there |
|---|
| 570 | 798 | * is no special casing required. |
|---|
| 571 | 799 | */ |
|---|
| 572 | | -static inline bool is_vmalloc_addr(const void *x) |
|---|
| 573 | | -{ |
|---|
| 574 | | -#ifdef CONFIG_MMU |
|---|
| 575 | | - unsigned long addr = (unsigned long)x; |
|---|
| 576 | 800 | |
|---|
| 577 | | - return addr >= VMALLOC_START && addr < VMALLOC_END; |
|---|
| 578 | | -#else |
|---|
| 579 | | - return false; |
|---|
| 801 | +#ifndef is_ioremap_addr |
|---|
| 802 | +#define is_ioremap_addr(x) is_vmalloc_addr(x) |
|---|
| 580 | 803 | #endif |
|---|
| 581 | | -} |
|---|
| 804 | + |
|---|
| 582 | 805 | #ifdef CONFIG_MMU |
|---|
| 806 | +extern bool is_vmalloc_addr(const void *x); |
|---|
| 583 | 807 | extern int is_vmalloc_or_module_addr(const void *x); |
|---|
| 584 | 808 | #else |
|---|
| 809 | +static inline bool is_vmalloc_addr(const void *x) |
|---|
| 810 | +{ |
|---|
| 811 | + return false; |
|---|
| 812 | +} |
|---|
| 585 | 813 | static inline int is_vmalloc_or_module_addr(const void *x) |
|---|
| 586 | 814 | { |
|---|
| 587 | 815 | return 0; |
|---|
| .. | .. |
|---|
| 617 | 845 | return kvmalloc_array(n, size, flags | __GFP_ZERO); |
|---|
| 618 | 846 | } |
|---|
| 619 | 847 | |
|---|
| 848 | +extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, |
|---|
| 849 | + gfp_t flags); |
|---|
| 620 | 850 | extern void kvfree(const void *addr); |
|---|
| 621 | 851 | extern void kvfree_sensitive(const void *addr, size_t len); |
|---|
| 852 | + |
|---|
| 853 | +static inline int head_compound_mapcount(struct page *head) |
|---|
| 854 | +{ |
|---|
| 855 | + return atomic_read(compound_mapcount_ptr(head)) + 1; |
|---|
| 856 | +} |
|---|
| 622 | 857 | |
|---|
| 623 | 858 | /* |
|---|
| 624 | 859 | * Mapcount of compound page as a whole, does not include mapped sub-pages. |
|---|
| .. | .. |
|---|
| 629 | 864 | { |
|---|
| 630 | 865 | VM_BUG_ON_PAGE(!PageCompound(page), page); |
|---|
| 631 | 866 | page = compound_head(page); |
|---|
| 632 | | - return atomic_read(compound_mapcount_ptr(page)) + 1; |
|---|
| 867 | + return head_compound_mapcount(page); |
|---|
| 633 | 868 | } |
|---|
| 634 | 869 | |
|---|
| 635 | 870 | /* |
|---|
| .. | .. |
|---|
| 709 | 944 | #endif |
|---|
| 710 | 945 | NR_COMPOUND_DTORS, |
|---|
| 711 | 946 | }; |
|---|
| 712 | | -extern compound_page_dtor * const compound_page_dtors[]; |
|---|
| 947 | +extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS]; |
|---|
| 713 | 948 | |
|---|
| 714 | 949 | static inline void set_compound_page_dtor(struct page *page, |
|---|
| 715 | 950 | enum compound_dtor_id compound_dtor) |
|---|
| .. | .. |
|---|
| 718 | 953 | page[1].compound_dtor = compound_dtor; |
|---|
| 719 | 954 | } |
|---|
| 720 | 955 | |
|---|
| 721 | | -static inline compound_page_dtor *get_compound_page_dtor(struct page *page) |
|---|
| 956 | +static inline void destroy_compound_page(struct page *page) |
|---|
| 722 | 957 | { |
|---|
| 723 | 958 | VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page); |
|---|
| 724 | | - return compound_page_dtors[page[1].compound_dtor]; |
|---|
| 959 | + compound_page_dtors[page[1].compound_dtor](page); |
|---|
| 725 | 960 | } |
|---|
| 726 | 961 | |
|---|
| 727 | 962 | static inline unsigned int compound_order(struct page *page) |
|---|
| .. | .. |
|---|
| 731 | 966 | return page[1].compound_order; |
|---|
| 732 | 967 | } |
|---|
| 733 | 968 | |
|---|
| 969 | +static inline bool hpage_pincount_available(struct page *page) |
|---|
| 970 | +{ |
|---|
| 971 | + /* |
|---|
| 972 | + * Can the page->hpage_pinned_refcount field be used? That field is in |
|---|
| 973 | + * the 3rd page of the compound page, so the smallest (2-page) compound |
|---|
| 974 | + * pages cannot support it. |
|---|
| 975 | + */ |
|---|
| 976 | + page = compound_head(page); |
|---|
| 977 | + return PageCompound(page) && compound_order(page) > 1; |
|---|
| 978 | +} |
|---|
| 979 | + |
|---|
| 980 | +static inline int head_compound_pincount(struct page *head) |
|---|
| 981 | +{ |
|---|
| 982 | + return atomic_read(compound_pincount_ptr(head)); |
|---|
| 983 | +} |
|---|
| 984 | + |
|---|
| 985 | +static inline int compound_pincount(struct page *page) |
|---|
| 986 | +{ |
|---|
| 987 | + VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); |
|---|
| 988 | + page = compound_head(page); |
|---|
| 989 | + return head_compound_pincount(page); |
|---|
| 990 | +} |
|---|
| 991 | + |
|---|
| 734 | 992 | static inline void set_compound_order(struct page *page, unsigned int order) |
|---|
| 735 | 993 | { |
|---|
| 736 | 994 | page[1].compound_order = order; |
|---|
| 995 | + page[1].compound_nr = 1U << order; |
|---|
| 996 | +} |
|---|
| 997 | + |
|---|
| 998 | +/* Returns the number of pages in this potentially compound page. */ |
|---|
| 999 | +static inline unsigned long compound_nr(struct page *page) |
|---|
| 1000 | +{ |
|---|
| 1001 | + if (!PageHead(page)) |
|---|
| 1002 | + return 1; |
|---|
| 1003 | + return page[1].compound_nr; |
|---|
| 737 | 1004 | } |
|---|
| 738 | 1005 | |
|---|
| 739 | 1006 | /* Returns the number of bytes in this potentially compound page. */ |
|---|
| 740 | 1007 | static inline unsigned long page_size(struct page *page) |
|---|
| 741 | 1008 | { |
|---|
| 742 | 1009 | return PAGE_SIZE << compound_order(page); |
|---|
| 1010 | +} |
|---|
| 1011 | + |
|---|
| 1012 | +/* Returns the number of bits needed for the number of bytes in a page */ |
|---|
| 1013 | +static inline unsigned int page_shift(struct page *page) |
|---|
| 1014 | +{ |
|---|
| 1015 | + return PAGE_SHIFT + compound_order(page); |
|---|
| 743 | 1016 | } |
|---|
| 744 | 1017 | |
|---|
| 745 | 1018 | void free_compound_page(struct page *page); |
|---|
| .. | .. |
|---|
| 751 | 1024 | * pte_mkwrite. But get_user_pages can cause write faults for mappings |
|---|
| 752 | 1025 | * that do not have writing enabled, when used by access_process_vm. |
|---|
| 753 | 1026 | */ |
|---|
| 754 | | -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) |
|---|
| 1027 | +static inline pte_t maybe_mkwrite(pte_t pte, unsigned long vma_flags) |
|---|
| 755 | 1028 | { |
|---|
| 756 | | - if (likely(vma->vm_flags & VM_WRITE)) |
|---|
| 1029 | + if (likely(vma_flags & VM_WRITE)) |
|---|
| 757 | 1030 | pte = pte_mkwrite(pte); |
|---|
| 758 | 1031 | return pte; |
|---|
| 759 | 1032 | } |
|---|
| 760 | 1033 | |
|---|
| 761 | | -vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, |
|---|
| 762 | | - struct page *page); |
|---|
| 1034 | +vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); |
|---|
| 1035 | +void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr); |
|---|
| 1036 | + |
|---|
| 763 | 1037 | vm_fault_t finish_fault(struct vm_fault *vmf); |
|---|
| 764 | 1038 | vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); |
|---|
| 765 | 1039 | #endif |
|---|
| .. | .. |
|---|
| 860 | 1134 | |
|---|
| 861 | 1135 | #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) |
|---|
| 862 | 1136 | |
|---|
| 863 | | -#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS |
|---|
| 864 | | -#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS |
|---|
| 865 | | -#endif |
|---|
| 866 | | - |
|---|
| 867 | 1137 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) |
|---|
| 868 | 1138 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) |
|---|
| 869 | 1139 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) |
|---|
| .. | .. |
|---|
| 873 | 1143 | |
|---|
| 874 | 1144 | static inline enum zone_type page_zonenum(const struct page *page) |
|---|
| 875 | 1145 | { |
|---|
| 1146 | + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); |
|---|
| 876 | 1147 | return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; |
|---|
| 877 | 1148 | } |
|---|
| 878 | 1149 | |
|---|
| .. | .. |
|---|
| 881 | 1152 | { |
|---|
| 882 | 1153 | return page_zonenum(page) == ZONE_DEVICE; |
|---|
| 883 | 1154 | } |
|---|
| 1155 | +extern void memmap_init_zone_device(struct zone *, unsigned long, |
|---|
| 1156 | + unsigned long, struct dev_pagemap *); |
|---|
| 884 | 1157 | #else |
|---|
| 885 | 1158 | static inline bool is_zone_device_page(const struct page *page) |
|---|
| 886 | 1159 | { |
|---|
| .. | .. |
|---|
| 889 | 1162 | #endif |
|---|
| 890 | 1163 | |
|---|
| 891 | 1164 | #ifdef CONFIG_DEV_PAGEMAP_OPS |
|---|
| 892 | | -void dev_pagemap_get_ops(void); |
|---|
| 893 | | -void dev_pagemap_put_ops(void); |
|---|
| 894 | | -void __put_devmap_managed_page(struct page *page); |
|---|
| 1165 | +void free_devmap_managed_page(struct page *page); |
|---|
| 895 | 1166 | DECLARE_STATIC_KEY_FALSE(devmap_managed_key); |
|---|
| 896 | | -static inline bool put_devmap_managed_page(struct page *page) |
|---|
| 1167 | + |
|---|
| 1168 | +static inline bool page_is_devmap_managed(struct page *page) |
|---|
| 897 | 1169 | { |
|---|
| 898 | 1170 | if (!static_branch_unlikely(&devmap_managed_key)) |
|---|
| 899 | 1171 | return false; |
|---|
| .. | .. |
|---|
| 901 | 1173 | return false; |
|---|
| 902 | 1174 | switch (page->pgmap->type) { |
|---|
| 903 | 1175 | case MEMORY_DEVICE_PRIVATE: |
|---|
| 904 | | - case MEMORY_DEVICE_PUBLIC: |
|---|
| 905 | 1176 | case MEMORY_DEVICE_FS_DAX: |
|---|
| 906 | | - __put_devmap_managed_page(page); |
|---|
| 907 | 1177 | return true; |
|---|
| 908 | 1178 | default: |
|---|
| 909 | 1179 | break; |
|---|
| .. | .. |
|---|
| 911 | 1181 | return false; |
|---|
| 912 | 1182 | } |
|---|
| 913 | 1183 | |
|---|
| 1184 | +void put_devmap_managed_page(struct page *page); |
|---|
| 1185 | + |
|---|
| 1186 | +#else /* CONFIG_DEV_PAGEMAP_OPS */ |
|---|
| 1187 | +static inline bool page_is_devmap_managed(struct page *page) |
|---|
| 1188 | +{ |
|---|
| 1189 | + return false; |
|---|
| 1190 | +} |
|---|
| 1191 | + |
|---|
| 1192 | +static inline void put_devmap_managed_page(struct page *page) |
|---|
| 1193 | +{ |
|---|
| 1194 | +} |
|---|
| 1195 | +#endif /* CONFIG_DEV_PAGEMAP_OPS */ |
|---|
| 1196 | + |
|---|
| 914 | 1197 | static inline bool is_device_private_page(const struct page *page) |
|---|
| 915 | 1198 | { |
|---|
| 916 | | - return is_zone_device_page(page) && |
|---|
| 1199 | + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && |
|---|
| 1200 | + IS_ENABLED(CONFIG_DEVICE_PRIVATE) && |
|---|
| 1201 | + is_zone_device_page(page) && |
|---|
| 917 | 1202 | page->pgmap->type == MEMORY_DEVICE_PRIVATE; |
|---|
| 918 | 1203 | } |
|---|
| 919 | 1204 | |
|---|
| 920 | | -static inline bool is_device_public_page(const struct page *page) |
|---|
| 1205 | +static inline bool is_pci_p2pdma_page(const struct page *page) |
|---|
| 921 | 1206 | { |
|---|
| 922 | | - return is_zone_device_page(page) && |
|---|
| 923 | | - page->pgmap->type == MEMORY_DEVICE_PUBLIC; |
|---|
| 1207 | + return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) && |
|---|
| 1208 | + IS_ENABLED(CONFIG_PCI_P2PDMA) && |
|---|
| 1209 | + is_zone_device_page(page) && |
|---|
| 1210 | + page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; |
|---|
| 924 | 1211 | } |
|---|
| 925 | | - |
|---|
| 926 | | -#else /* CONFIG_DEV_PAGEMAP_OPS */ |
|---|
| 927 | | -static inline void dev_pagemap_get_ops(void) |
|---|
| 928 | | -{ |
|---|
| 929 | | -} |
|---|
| 930 | | - |
|---|
| 931 | | -static inline void dev_pagemap_put_ops(void) |
|---|
| 932 | | -{ |
|---|
| 933 | | -} |
|---|
| 934 | | - |
|---|
| 935 | | -static inline bool put_devmap_managed_page(struct page *page) |
|---|
| 936 | | -{ |
|---|
| 937 | | - return false; |
|---|
| 938 | | -} |
|---|
| 939 | | - |
|---|
| 940 | | -static inline bool is_device_private_page(const struct page *page) |
|---|
| 941 | | -{ |
|---|
| 942 | | - return false; |
|---|
| 943 | | -} |
|---|
| 944 | | - |
|---|
| 945 | | -static inline bool is_device_public_page(const struct page *page) |
|---|
| 946 | | -{ |
|---|
| 947 | | - return false; |
|---|
| 948 | | -} |
|---|
| 949 | | -#endif /* CONFIG_DEV_PAGEMAP_OPS */ |
|---|
| 950 | 1212 | |
|---|
| 951 | 1213 | /* 127: arbitrary random number, small enough to assemble well */ |
|---|
| 952 | 1214 | #define page_ref_zero_or_close_to_overflow(page) \ |
|---|
| .. | .. |
|---|
| 962 | 1224 | VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); |
|---|
| 963 | 1225 | page_ref_inc(page); |
|---|
| 964 | 1226 | } |
|---|
| 1227 | + |
|---|
| 1228 | +bool __must_check try_grab_page(struct page *page, unsigned int flags); |
|---|
| 965 | 1229 | |
|---|
| 966 | 1230 | static inline __must_check bool try_get_page(struct page *page) |
|---|
| 967 | 1231 | { |
|---|
| .. | .. |
|---|
| 982 | 1246 | * need to inform the device driver through callback. See |
|---|
| 983 | 1247 | * include/linux/memremap.h and HMM for details. |
|---|
| 984 | 1248 | */ |
|---|
| 985 | | - if (put_devmap_managed_page(page)) |
|---|
| 1249 | + if (page_is_devmap_managed(page)) { |
|---|
| 1250 | + put_devmap_managed_page(page); |
|---|
| 986 | 1251 | return; |
|---|
| 1252 | + } |
|---|
| 987 | 1253 | |
|---|
| 988 | 1254 | if (put_page_testzero(page)) |
|---|
| 989 | 1255 | __put_page(page); |
|---|
| 1256 | +} |
|---|
| 1257 | + |
|---|
| 1258 | +/* |
|---|
| 1259 | + * GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload |
|---|
| 1260 | + * the page's refcount so that two separate items are tracked: the original page |
|---|
| 1261 | + * reference count, and also a new count of how many pin_user_pages() calls were |
|---|
| 1262 | + * made against the page. ("gup-pinned" is another term for the latter). |
|---|
| 1263 | + * |
|---|
| 1264 | + * With this scheme, pin_user_pages() becomes special: such pages are marked as |
|---|
| 1265 | + * distinct from normal pages. As such, the unpin_user_page() call (and its |
|---|
| 1266 | + * variants) must be used in order to release gup-pinned pages. |
|---|
| 1267 | + * |
|---|
| 1268 | + * Choice of value: |
|---|
| 1269 | + * |
|---|
| 1270 | + * By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference |
|---|
| 1271 | + * counts with respect to pin_user_pages() and unpin_user_page() becomes |
|---|
| 1272 | + * simpler, due to the fact that adding an even power of two to the page |
|---|
| 1273 | + * refcount has the effect of using only the upper N bits, for the code that |
|---|
| 1274 | + * counts up using the bias value. This means that the lower bits are left for |
|---|
| 1275 | + * the exclusive use of the original code that increments and decrements by one |
|---|
| 1276 | + * (or at least, by much smaller values than the bias value). |
|---|
| 1277 | + * |
|---|
| 1278 | + * Of course, once the lower bits overflow into the upper bits (and this is |
|---|
| 1279 | + * OK, because subtraction recovers the original values), then visual inspection |
|---|
| 1280 | + * no longer suffices to directly view the separate counts. However, for normal |
|---|
| 1281 | + * applications that don't have huge page reference counts, this won't be an |
|---|
| 1282 | + * issue. |
|---|
| 1283 | + * |
|---|
| 1284 | + * Locking: the lockless algorithm described in page_cache_get_speculative() |
|---|
| 1285 | + * and page_cache_gup_pin_speculative() provides safe operation for |
|---|
| 1286 | + * get_user_pages and page_mkclean and other calls that race to set up page |
|---|
| 1287 | + * table entries. |
|---|
| 1288 | + */ |
|---|
| 1289 | +#define GUP_PIN_COUNTING_BIAS (1U << 10) |
|---|
| 1290 | + |
|---|
| 1291 | +void put_user_page(struct page *page); |
|---|
| 1292 | +void unpin_user_page(struct page *page); |
|---|
| 1293 | +void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, |
|---|
| 1294 | + bool make_dirty); |
|---|
| 1295 | +void unpin_user_pages(struct page **pages, unsigned long npages); |
|---|
| 1296 | + |
|---|
| 1297 | +/** |
|---|
| 1298 | + * page_maybe_dma_pinned() - report if a page is pinned for DMA. |
|---|
| 1299 | + * |
|---|
| 1300 | + * This function checks if a page has been pinned via a call to |
|---|
| 1301 | + * pin_user_pages*(). |
|---|
| 1302 | + * |
|---|
| 1303 | + * For non-huge pages, the return value is partially fuzzy: false is not fuzzy, |
|---|
| 1304 | + * because it means "definitely not pinned for DMA", but true means "probably |
|---|
| 1305 | + * pinned for DMA, but possibly a false positive due to having at least |
|---|
| 1306 | + * GUP_PIN_COUNTING_BIAS worth of normal page references". |
|---|
| 1307 | + * |
|---|
| 1308 | + * False positives are OK, because: a) it's unlikely for a page to get that many |
|---|
| 1309 | + * refcounts, and b) all the callers of this routine are expected to be able to |
|---|
| 1310 | + * deal gracefully with a false positive. |
|---|
| 1311 | + * |
|---|
| 1312 | + * For huge pages, the result will be exactly correct. That's because we have |
|---|
| 1313 | + * more tracking data available: the 3rd struct page in the compound page is |
|---|
| 1314 | + * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS |
|---|
| 1315 | + * scheme). |
|---|
| 1316 | + * |
|---|
| 1317 | + * For more information, please see Documentation/core-api/pin_user_pages.rst. |
|---|
| 1318 | + * |
|---|
| 1319 | + * @page: pointer to page to be queried. |
|---|
| 1320 | + * @Return: True, if it is likely that the page has been "dma-pinned". |
|---|
| 1321 | + * False, if the page is definitely not dma-pinned. |
|---|
| 1322 | + */ |
|---|
| 1323 | +static inline bool page_maybe_dma_pinned(struct page *page) |
|---|
| 1324 | +{ |
|---|
| 1325 | + if (hpage_pincount_available(page)) |
|---|
| 1326 | + return compound_pincount(page) > 0; |
|---|
| 1327 | + |
|---|
| 1328 | + /* |
|---|
| 1329 | + * page_ref_count() is signed. If that refcount overflows, then |
|---|
| 1330 | + * page_ref_count() returns a negative value, and callers will avoid |
|---|
| 1331 | + * further incrementing the refcount. |
|---|
| 1332 | + * |
|---|
| 1333 | + * Here, for that overflow case, use the signed bit to count a little |
|---|
| 1334 | + * bit higher via unsigned math, and thus still get an accurate result. |
|---|
| 1335 | + */ |
|---|
| 1336 | + return ((unsigned int)page_ref_count(compound_head(page))) >= |
|---|
| 1337 | + GUP_PIN_COUNTING_BIAS; |
|---|
| 990 | 1338 | } |
|---|
| 991 | 1339 | |
|---|
| 992 | 1340 | #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) |
|---|
| .. | .. |
|---|
| 1114 | 1462 | |
|---|
| 1115 | 1463 | static inline bool cpupid_pid_unset(int cpupid) |
|---|
| 1116 | 1464 | { |
|---|
| 1117 | | - return 1; |
|---|
| 1465 | + return true; |
|---|
| 1118 | 1466 | } |
|---|
| 1119 | 1467 | |
|---|
| 1120 | 1468 | static inline void page_cpupid_reset_last(struct page *page) |
|---|
| .. | .. |
|---|
| 1127 | 1475 | } |
|---|
| 1128 | 1476 | #endif /* CONFIG_NUMA_BALANCING */ |
|---|
| 1129 | 1477 | |
|---|
| 1130 | | -#ifdef CONFIG_KASAN_SW_TAGS |
|---|
| 1478 | +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) |
|---|
| 1479 | + |
|---|
| 1480 | +/* |
|---|
| 1481 | + * KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid |
|---|
| 1482 | + * setting tags for all pages to native kernel tag value 0xff, as the default |
|---|
| 1483 | + * value 0x00 maps to 0xff. |
|---|
| 1484 | + */ |
|---|
| 1485 | + |
|---|
| 1131 | 1486 | static inline u8 page_kasan_tag(const struct page *page) |
|---|
| 1132 | 1487 | { |
|---|
| 1133 | | - return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; |
|---|
| 1488 | + u8 tag = 0xff; |
|---|
| 1489 | + |
|---|
| 1490 | + if (kasan_enabled()) { |
|---|
| 1491 | + tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; |
|---|
| 1492 | + tag ^= 0xff; |
|---|
| 1493 | + } |
|---|
| 1494 | + |
|---|
| 1495 | + return tag; |
|---|
| 1134 | 1496 | } |
|---|
| 1135 | 1497 | |
|---|
| 1136 | 1498 | static inline void page_kasan_tag_set(struct page *page, u8 tag) |
|---|
| 1137 | 1499 | { |
|---|
| 1138 | | - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); |
|---|
| 1139 | | - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; |
|---|
| 1500 | + if (kasan_enabled()) { |
|---|
| 1501 | + tag ^= 0xff; |
|---|
| 1502 | + page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); |
|---|
| 1503 | + page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; |
|---|
| 1504 | + } |
|---|
| 1140 | 1505 | } |
|---|
| 1141 | 1506 | |
|---|
| 1142 | 1507 | static inline void page_kasan_tag_reset(struct page *page) |
|---|
| 1143 | 1508 | { |
|---|
| 1144 | | - page_kasan_tag_set(page, 0xff); |
|---|
| 1509 | + if (kasan_enabled()) |
|---|
| 1510 | + page_kasan_tag_set(page, 0xff); |
|---|
| 1145 | 1511 | } |
|---|
| 1146 | | -#else |
|---|
| 1512 | + |
|---|
| 1513 | +#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ |
|---|
| 1514 | + |
|---|
| 1147 | 1515 | static inline u8 page_kasan_tag(const struct page *page) |
|---|
| 1148 | 1516 | { |
|---|
| 1149 | 1517 | return 0xff; |
|---|
| .. | .. |
|---|
| 1151 | 1519 | |
|---|
| 1152 | 1520 | static inline void page_kasan_tag_set(struct page *page, u8 tag) { } |
|---|
| 1153 | 1521 | static inline void page_kasan_tag_reset(struct page *page) { } |
|---|
| 1154 | | -#endif |
|---|
| 1522 | + |
|---|
| 1523 | +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ |
|---|
| 1155 | 1524 | |
|---|
| 1156 | 1525 | static inline struct zone *page_zone(const struct page *page) |
|---|
| 1157 | 1526 | { |
|---|
| .. | .. |
|---|
| 1319 | 1688 | } |
|---|
| 1320 | 1689 | |
|---|
| 1321 | 1690 | /* |
|---|
| 1322 | | - * Different kinds of faults, as returned by handle_mm_fault(). |
|---|
| 1323 | | - * Used to decide whether a process gets delivered SIGBUS or |
|---|
| 1324 | | - * just gets major/minor fault counters bumped up. |
|---|
| 1325 | | - */ |
|---|
| 1326 | | - |
|---|
| 1327 | | -#define VM_FAULT_OOM 0x0001 |
|---|
| 1328 | | -#define VM_FAULT_SIGBUS 0x0002 |
|---|
| 1329 | | -#define VM_FAULT_MAJOR 0x0004 |
|---|
| 1330 | | -#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ |
|---|
| 1331 | | -#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ |
|---|
| 1332 | | -#define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ |
|---|
| 1333 | | -#define VM_FAULT_SIGSEGV 0x0040 |
|---|
| 1334 | | - |
|---|
| 1335 | | -#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ |
|---|
| 1336 | | -#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ |
|---|
| 1337 | | -#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ |
|---|
| 1338 | | -#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ |
|---|
| 1339 | | -#define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */ |
|---|
| 1340 | | -#define VM_FAULT_NEEDDSYNC 0x2000 /* ->fault did not modify page tables |
|---|
| 1341 | | - * and needs fsync() to complete (for |
|---|
| 1342 | | - * synchronous page faults in DAX) */ |
|---|
| 1343 | | - |
|---|
| 1344 | | -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ |
|---|
| 1345 | | - VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ |
|---|
| 1346 | | - VM_FAULT_FALLBACK) |
|---|
| 1347 | | - |
|---|
| 1348 | | -#define VM_FAULT_RESULT_TRACE \ |
|---|
| 1349 | | - { VM_FAULT_OOM, "OOM" }, \ |
|---|
| 1350 | | - { VM_FAULT_SIGBUS, "SIGBUS" }, \ |
|---|
| 1351 | | - { VM_FAULT_MAJOR, "MAJOR" }, \ |
|---|
| 1352 | | - { VM_FAULT_WRITE, "WRITE" }, \ |
|---|
| 1353 | | - { VM_FAULT_HWPOISON, "HWPOISON" }, \ |
|---|
| 1354 | | - { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ |
|---|
| 1355 | | - { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ |
|---|
| 1356 | | - { VM_FAULT_NOPAGE, "NOPAGE" }, \ |
|---|
| 1357 | | - { VM_FAULT_LOCKED, "LOCKED" }, \ |
|---|
| 1358 | | - { VM_FAULT_RETRY, "RETRY" }, \ |
|---|
| 1359 | | - { VM_FAULT_FALLBACK, "FALLBACK" }, \ |
|---|
| 1360 | | - { VM_FAULT_DONE_COW, "DONE_COW" }, \ |
|---|
| 1361 | | - { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } |
|---|
| 1362 | | - |
|---|
| 1363 | | -/* Encode hstate index for a hwpoisoned large page */ |
|---|
| 1364 | | -#define VM_FAULT_SET_HINDEX(x) ((x) << 12) |
|---|
| 1365 | | -#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) |
|---|
| 1366 | | - |
|---|
| 1367 | | -/* |
|---|
| 1368 | 1691 | * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. |
|---|
| 1369 | 1692 | */ |
|---|
| 1370 | 1693 | extern void pagefault_out_of_memory(void); |
|---|
| 1371 | 1694 | |
|---|
| 1372 | 1695 | #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) |
|---|
| 1696 | +#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) |
|---|
| 1373 | 1697 | |
|---|
| 1374 | 1698 | /* |
|---|
| 1375 | 1699 | * Flags passed to show_mem() and show_free_areas() to suppress output in |
|---|
| .. | .. |
|---|
| 1379 | 1703 | |
|---|
| 1380 | 1704 | extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); |
|---|
| 1381 | 1705 | |
|---|
| 1706 | +#ifdef CONFIG_MMU |
|---|
| 1382 | 1707 | extern bool can_do_mlock(void); |
|---|
| 1708 | +#else |
|---|
| 1709 | +static inline bool can_do_mlock(void) { return false; } |
|---|
| 1710 | +#endif |
|---|
| 1383 | 1711 | extern int user_shm_lock(size_t, struct user_struct *); |
|---|
| 1384 | 1712 | extern void user_shm_unlock(size_t, struct user_struct *); |
|---|
| 1385 | 1713 | |
|---|
| .. | .. |
|---|
| 1394 | 1722 | }; |
|---|
| 1395 | 1723 | |
|---|
| 1396 | 1724 | struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 1397 | | - pte_t pte, bool with_public_device); |
|---|
| 1398 | | -#define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false) |
|---|
| 1725 | + pte_t pte, unsigned long vma_flags); |
|---|
| 1726 | +static inline struct page *vm_normal_page(struct vm_area_struct *vma, |
|---|
| 1727 | + unsigned long addr, pte_t pte) |
|---|
| 1728 | +{ |
|---|
| 1729 | + return _vm_normal_page(vma, addr, pte, vma->vm_flags); |
|---|
| 1730 | +} |
|---|
| 1399 | 1731 | |
|---|
| 1400 | 1732 | struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 1401 | 1733 | pmd_t pmd); |
|---|
| .. | .. |
|---|
| 1407 | 1739 | void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, |
|---|
| 1408 | 1740 | unsigned long start, unsigned long end); |
|---|
| 1409 | 1741 | |
|---|
| 1410 | | -/** |
|---|
| 1411 | | - * mm_walk - callbacks for walk_page_range |
|---|
| 1412 | | - * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry |
|---|
| 1413 | | - * this handler should only handle pud_trans_huge() puds. |
|---|
| 1414 | | - * the pmd_entry or pte_entry callbacks will be used for |
|---|
| 1415 | | - * regular PUDs. |
|---|
| 1416 | | - * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry |
|---|
| 1417 | | - * this handler is required to be able to handle |
|---|
| 1418 | | - * pmd_trans_huge() pmds. They may simply choose to |
|---|
| 1419 | | - * split_huge_page() instead of handling it explicitly. |
|---|
| 1420 | | - * @pte_entry: if set, called for each non-empty PTE (4th-level) entry |
|---|
| 1421 | | - * @pte_hole: if set, called for each hole at all levels |
|---|
| 1422 | | - * @hugetlb_entry: if set, called for each hugetlb entry |
|---|
| 1423 | | - * @test_walk: caller specific callback function to determine whether |
|---|
| 1424 | | - * we walk over the current vma or not. Returning 0 |
|---|
| 1425 | | - * value means "do page table walk over the current vma," |
|---|
| 1426 | | - * and a negative one means "abort current page table walk |
|---|
| 1427 | | - * right now." 1 means "skip the current vma." |
|---|
| 1428 | | - * @mm: mm_struct representing the target process of page table walk |
|---|
| 1429 | | - * @vma: vma currently walked (NULL if walking outside vmas) |
|---|
| 1430 | | - * @private: private data for callbacks' usage |
|---|
| 1431 | | - * |
|---|
| 1432 | | - * (see the comment on walk_page_range() for more details) |
|---|
| 1433 | | - */ |
|---|
| 1434 | | -struct mm_walk { |
|---|
| 1435 | | - int (*pud_entry)(pud_t *pud, unsigned long addr, |
|---|
| 1436 | | - unsigned long next, struct mm_walk *walk); |
|---|
| 1437 | | - int (*pmd_entry)(pmd_t *pmd, unsigned long addr, |
|---|
| 1438 | | - unsigned long next, struct mm_walk *walk); |
|---|
| 1439 | | - int (*pte_entry)(pte_t *pte, unsigned long addr, |
|---|
| 1440 | | - unsigned long next, struct mm_walk *walk); |
|---|
| 1441 | | - int (*pte_hole)(unsigned long addr, unsigned long next, |
|---|
| 1442 | | - struct mm_walk *walk); |
|---|
| 1443 | | - int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, |
|---|
| 1444 | | - unsigned long addr, unsigned long next, |
|---|
| 1445 | | - struct mm_walk *walk); |
|---|
| 1446 | | - int (*test_walk)(unsigned long addr, unsigned long next, |
|---|
| 1447 | | - struct mm_walk *walk); |
|---|
| 1448 | | - struct mm_struct *mm; |
|---|
| 1449 | | - struct vm_area_struct *vma; |
|---|
| 1450 | | - void *private; |
|---|
| 1451 | | -}; |
|---|
| 1742 | +struct mmu_notifier_range; |
|---|
| 1452 | 1743 | |
|---|
| 1453 | | -int walk_page_range(unsigned long addr, unsigned long end, |
|---|
| 1454 | | - struct mm_walk *walk); |
|---|
| 1455 | | -int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); |
|---|
| 1456 | 1744 | void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, |
|---|
| 1457 | 1745 | unsigned long end, unsigned long floor, unsigned long ceiling); |
|---|
| 1458 | | -int copy_page_range(struct mm_struct *dst, struct mm_struct *src, |
|---|
| 1459 | | - struct vm_area_struct *vma); |
|---|
| 1460 | | -int follow_pte_pmd(struct mm_struct *mm, unsigned long address, |
|---|
| 1461 | | - unsigned long *start, unsigned long *end, |
|---|
| 1462 | | - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); |
|---|
| 1746 | +int |
|---|
| 1747 | +copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); |
|---|
| 1748 | +int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, |
|---|
| 1749 | + struct mmu_notifier_range *range, pte_t **ptepp, |
|---|
| 1750 | + pmd_t **pmdpp, spinlock_t **ptlp); |
|---|
| 1751 | +int follow_pte(struct mm_struct *mm, unsigned long address, |
|---|
| 1752 | + pte_t **ptepp, spinlock_t **ptlp); |
|---|
| 1463 | 1753 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, |
|---|
| 1464 | 1754 | unsigned long *pfn); |
|---|
| 1465 | 1755 | int follow_phys(struct vm_area_struct *vma, unsigned long address, |
|---|
| 1466 | 1756 | unsigned int flags, unsigned long *prot, resource_size_t *phys); |
|---|
| 1467 | 1757 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 1468 | 1758 | void *buf, int len, int write); |
|---|
| 1759 | + |
|---|
| 1760 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
|---|
| 1761 | +static inline void vm_write_begin(struct vm_area_struct *vma) |
|---|
| 1762 | +{ |
|---|
| 1763 | + /* |
|---|
| 1764 | + * Isolated vma might be freed without exclusive mmap_lock but |
|---|
| 1765 | + * speculative page fault handler still needs to know it was changed. |
|---|
| 1766 | + */ |
|---|
| 1767 | + if (!RB_EMPTY_NODE(&vma->vm_rb)) |
|---|
| 1768 | + mmap_assert_write_locked(vma->vm_mm); |
|---|
| 1769 | + /* |
|---|
| 1770 | + * The reads never spins and preemption |
|---|
| 1771 | + * disablement is not required. |
|---|
| 1772 | + */ |
|---|
| 1773 | + raw_write_seqcount_begin(&vma->vm_sequence); |
|---|
| 1774 | +} |
|---|
| 1775 | +static inline void vm_write_end(struct vm_area_struct *vma) |
|---|
| 1776 | +{ |
|---|
| 1777 | + raw_write_seqcount_end(&vma->vm_sequence); |
|---|
| 1778 | +} |
|---|
| 1779 | +#else |
|---|
| 1780 | +static inline void vm_write_begin(struct vm_area_struct *vma) |
|---|
| 1781 | +{ |
|---|
| 1782 | +} |
|---|
| 1783 | +static inline void vm_write_end(struct vm_area_struct *vma) |
|---|
| 1784 | +{ |
|---|
| 1785 | +} |
|---|
| 1786 | +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ |
|---|
| 1469 | 1787 | |
|---|
| 1470 | 1788 | extern void truncate_pagecache(struct inode *inode, loff_t new); |
|---|
| 1471 | 1789 | extern void truncate_setsize(struct inode *inode, loff_t newsize); |
|---|
| .. | .. |
|---|
| 1477 | 1795 | |
|---|
| 1478 | 1796 | #ifdef CONFIG_MMU |
|---|
| 1479 | 1797 | extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, |
|---|
| 1480 | | - unsigned long address, unsigned int flags); |
|---|
| 1481 | | -extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, |
|---|
| 1798 | + unsigned long address, unsigned int flags, |
|---|
| 1799 | + struct pt_regs *regs); |
|---|
| 1800 | +extern int fixup_user_fault(struct mm_struct *mm, |
|---|
| 1482 | 1801 | unsigned long address, unsigned int fault_flags, |
|---|
| 1483 | 1802 | bool *unlocked); |
|---|
| 1803 | + |
|---|
| 1804 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
|---|
| 1805 | +extern vm_fault_t __handle_speculative_fault(struct mm_struct *mm, |
|---|
| 1806 | + unsigned long address, |
|---|
| 1807 | + unsigned int flags, |
|---|
| 1808 | + struct vm_area_struct **vma, |
|---|
| 1809 | + struct pt_regs *regs); |
|---|
| 1810 | +static inline vm_fault_t handle_speculative_fault(struct mm_struct *mm, |
|---|
| 1811 | + unsigned long address, |
|---|
| 1812 | + unsigned int flags, |
|---|
| 1813 | + struct vm_area_struct **vma, |
|---|
| 1814 | + struct pt_regs *regs) |
|---|
| 1815 | +{ |
|---|
| 1816 | + /* |
|---|
| 1817 | + * Try speculative page fault for multithreaded user space task only. |
|---|
| 1818 | + */ |
|---|
| 1819 | + if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1) { |
|---|
| 1820 | + *vma = NULL; |
|---|
| 1821 | + return VM_FAULT_RETRY; |
|---|
| 1822 | + } |
|---|
| 1823 | + return __handle_speculative_fault(mm, address, flags, vma, regs); |
|---|
| 1824 | +} |
|---|
| 1825 | +extern bool can_reuse_spf_vma(struct vm_area_struct *vma, |
|---|
| 1826 | + unsigned long address); |
|---|
| 1827 | +#else |
|---|
| 1828 | +static inline vm_fault_t handle_speculative_fault(struct mm_struct *mm, |
|---|
| 1829 | + unsigned long address, |
|---|
| 1830 | + unsigned int flags, |
|---|
| 1831 | + struct vm_area_struct **vma, |
|---|
| 1832 | + struct pt_regs *regs) |
|---|
| 1833 | +{ |
|---|
| 1834 | + return VM_FAULT_RETRY; |
|---|
| 1835 | +} |
|---|
| 1836 | +static inline bool can_reuse_spf_vma(struct vm_area_struct *vma, |
|---|
| 1837 | + unsigned long address) |
|---|
| 1838 | +{ |
|---|
| 1839 | + return false; |
|---|
| 1840 | +} |
|---|
| 1841 | +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ |
|---|
| 1842 | + |
|---|
| 1484 | 1843 | void unmap_mapping_page(struct page *page); |
|---|
| 1485 | 1844 | void unmap_mapping_pages(struct address_space *mapping, |
|---|
| 1486 | 1845 | pgoff_t start, pgoff_t nr, bool even_cows); |
|---|
| .. | .. |
|---|
| 1488 | 1847 | loff_t const holebegin, loff_t const holelen, int even_cows); |
|---|
| 1489 | 1848 | #else |
|---|
| 1490 | 1849 | static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma, |
|---|
| 1491 | | - unsigned long address, unsigned int flags) |
|---|
| 1850 | + unsigned long address, unsigned int flags, |
|---|
| 1851 | + struct pt_regs *regs) |
|---|
| 1492 | 1852 | { |
|---|
| 1493 | 1853 | /* should never happen if there's no MMU */ |
|---|
| 1494 | 1854 | BUG(); |
|---|
| 1495 | 1855 | return VM_FAULT_SIGBUS; |
|---|
| 1496 | 1856 | } |
|---|
| 1497 | | -static inline int fixup_user_fault(struct task_struct *tsk, |
|---|
| 1498 | | - struct mm_struct *mm, unsigned long address, |
|---|
| 1857 | +static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, |
|---|
| 1499 | 1858 | unsigned int fault_flags, bool *unlocked) |
|---|
| 1500 | 1859 | { |
|---|
| 1501 | 1860 | /* should never happen if there's no MMU */ |
|---|
| .. | .. |
|---|
| 1522 | 1881 | extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, |
|---|
| 1523 | 1882 | unsigned long addr, void *buf, int len, unsigned int gup_flags); |
|---|
| 1524 | 1883 | |
|---|
| 1525 | | -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, |
|---|
| 1884 | +long get_user_pages_remote(struct mm_struct *mm, |
|---|
| 1526 | 1885 | unsigned long start, unsigned long nr_pages, |
|---|
| 1527 | 1886 | unsigned int gup_flags, struct page **pages, |
|---|
| 1528 | 1887 | struct vm_area_struct **vmas, int *locked); |
|---|
| 1888 | +long pin_user_pages_remote(struct mm_struct *mm, |
|---|
| 1889 | + unsigned long start, unsigned long nr_pages, |
|---|
| 1890 | + unsigned int gup_flags, struct page **pages, |
|---|
| 1891 | + struct vm_area_struct **vmas, int *locked); |
|---|
| 1529 | 1892 | long get_user_pages(unsigned long start, unsigned long nr_pages, |
|---|
| 1530 | 1893 | unsigned int gup_flags, struct page **pages, |
|---|
| 1531 | 1894 | struct vm_area_struct **vmas); |
|---|
| 1895 | +long pin_user_pages(unsigned long start, unsigned long nr_pages, |
|---|
| 1896 | + unsigned int gup_flags, struct page **pages, |
|---|
| 1897 | + struct vm_area_struct **vmas); |
|---|
| 1532 | 1898 | long get_user_pages_locked(unsigned long start, unsigned long nr_pages, |
|---|
| 1899 | + unsigned int gup_flags, struct page **pages, int *locked); |
|---|
| 1900 | +long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, |
|---|
| 1533 | 1901 | unsigned int gup_flags, struct page **pages, int *locked); |
|---|
| 1534 | 1902 | long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, |
|---|
| 1535 | 1903 | struct page **pages, unsigned int gup_flags); |
|---|
| 1536 | | -#ifdef CONFIG_FS_DAX |
|---|
| 1537 | | -long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, |
|---|
| 1538 | | - unsigned int gup_flags, struct page **pages, |
|---|
| 1539 | | - struct vm_area_struct **vmas); |
|---|
| 1540 | | -#else |
|---|
| 1541 | | -static inline long get_user_pages_longterm(unsigned long start, |
|---|
| 1542 | | - unsigned long nr_pages, unsigned int gup_flags, |
|---|
| 1543 | | - struct page **pages, struct vm_area_struct **vmas) |
|---|
| 1544 | | -{ |
|---|
| 1545 | | - return get_user_pages(start, nr_pages, gup_flags, pages, vmas); |
|---|
| 1546 | | -} |
|---|
| 1547 | | -#endif /* CONFIG_FS_DAX */ |
|---|
| 1904 | +long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, |
|---|
| 1905 | + struct page **pages, unsigned int gup_flags); |
|---|
| 1548 | 1906 | |
|---|
| 1549 | | -int get_user_pages_fast(unsigned long start, int nr_pages, int write, |
|---|
| 1550 | | - struct page **pages); |
|---|
| 1907 | +int get_user_pages_fast(unsigned long start, int nr_pages, |
|---|
| 1908 | + unsigned int gup_flags, struct page **pages); |
|---|
| 1909 | +int pin_user_pages_fast(unsigned long start, int nr_pages, |
|---|
| 1910 | + unsigned int gup_flags, struct page **pages); |
|---|
| 1911 | + |
|---|
| 1912 | +int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); |
|---|
| 1913 | +int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, |
|---|
| 1914 | + struct task_struct *task, bool bypass_rlim); |
|---|
| 1551 | 1915 | |
|---|
| 1552 | 1916 | /* Container for pinned pfns / pages */ |
|---|
| 1553 | 1917 | struct frame_vector { |
|---|
| .. | .. |
|---|
| 1555 | 1919 | unsigned int nr_frames; /* Number of frames stored in ptrs array */ |
|---|
| 1556 | 1920 | bool got_ref; /* Did we pin pages by getting page ref? */ |
|---|
| 1557 | 1921 | bool is_pfns; /* Does array contain pages or pfns? */ |
|---|
| 1558 | | - void *ptrs[0]; /* Array of pinned pfns / pages. Use |
|---|
| 1922 | + void *ptrs[]; /* Array of pinned pfns / pages. Use |
|---|
| 1559 | 1923 | * pfns_vector_pages() or pfns_vector_pfns() |
|---|
| 1560 | 1924 | * for access */ |
|---|
| 1561 | 1925 | }; |
|---|
| .. | .. |
|---|
| 1622 | 1986 | |
|---|
| 1623 | 1987 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); |
|---|
| 1624 | 1988 | |
|---|
| 1625 | | -static inline bool vma_is_anonymous(struct vm_area_struct *vma) |
|---|
| 1626 | | -{ |
|---|
| 1627 | | - return !vma->vm_ops; |
|---|
| 1628 | | -} |
|---|
| 1629 | | - |
|---|
| 1630 | | -#ifdef CONFIG_SHMEM |
|---|
| 1631 | | -/* |
|---|
| 1632 | | - * The vma_is_shmem is not inline because it is used only by slow |
|---|
| 1633 | | - * paths in userfault. |
|---|
| 1634 | | - */ |
|---|
| 1635 | | -bool vma_is_shmem(struct vm_area_struct *vma); |
|---|
| 1636 | | -#else |
|---|
| 1637 | | -static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } |
|---|
| 1638 | | -#endif |
|---|
| 1639 | | - |
|---|
| 1640 | | -int vma_is_stack_for_current(struct vm_area_struct *vma); |
|---|
| 1641 | | - |
|---|
| 1642 | 1989 | extern unsigned long move_page_tables(struct vm_area_struct *vma, |
|---|
| 1643 | 1990 | unsigned long old_addr, struct vm_area_struct *new_vma, |
|---|
| 1644 | 1991 | unsigned long new_addr, unsigned long len, |
|---|
| 1645 | 1992 | bool need_rmap_locks); |
|---|
| 1993 | + |
|---|
| 1994 | +/* |
|---|
| 1995 | + * Flags used by change_protection(). For now we make it a bitmap so |
|---|
| 1996 | + * that we can pass in multiple flags just like parameters. However |
|---|
| 1997 | + * for now all the callers are only use one of the flags at the same |
|---|
| 1998 | + * time. |
|---|
| 1999 | + */ |
|---|
| 2000 | +/* Whether we should allow dirty bit accounting */ |
|---|
| 2001 | +#define MM_CP_DIRTY_ACCT (1UL << 0) |
|---|
| 2002 | +/* Whether this protection change is for NUMA hints */ |
|---|
| 2003 | +#define MM_CP_PROT_NUMA (1UL << 1) |
|---|
| 2004 | +/* Whether this change is for write protecting */ |
|---|
| 2005 | +#define MM_CP_UFFD_WP (1UL << 2) /* do wp */ |
|---|
| 2006 | +#define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */ |
|---|
| 2007 | +#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ |
|---|
| 2008 | + MM_CP_UFFD_WP_RESOLVE) |
|---|
| 2009 | + |
|---|
| 1646 | 2010 | extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, |
|---|
| 1647 | 2011 | unsigned long end, pgprot_t newprot, |
|---|
| 1648 | | - int dirty_accountable, int prot_numa); |
|---|
| 2012 | + unsigned long cp_flags); |
|---|
| 1649 | 2013 | extern int mprotect_fixup(struct vm_area_struct *vma, |
|---|
| 1650 | 2014 | struct vm_area_struct **pprev, unsigned long start, |
|---|
| 1651 | 2015 | unsigned long end, unsigned long newflags); |
|---|
| .. | .. |
|---|
| 1653 | 2017 | /* |
|---|
| 1654 | 2018 | * doesn't attempt to fault and will return short. |
|---|
| 1655 | 2019 | */ |
|---|
| 1656 | | -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, |
|---|
| 1657 | | - struct page **pages); |
|---|
| 2020 | +int get_user_pages_fast_only(unsigned long start, int nr_pages, |
|---|
| 2021 | + unsigned int gup_flags, struct page **pages); |
|---|
| 2022 | +int pin_user_pages_fast_only(unsigned long start, int nr_pages, |
|---|
| 2023 | + unsigned int gup_flags, struct page **pages); |
|---|
| 2024 | + |
|---|
| 2025 | +static inline bool get_user_page_fast_only(unsigned long addr, |
|---|
| 2026 | + unsigned int gup_flags, struct page **pagep) |
|---|
| 2027 | +{ |
|---|
| 2028 | + return get_user_pages_fast_only(addr, 1, gup_flags, pagep) == 1; |
|---|
| 2029 | +} |
|---|
| 1658 | 2030 | /* |
|---|
| 1659 | 2031 | * per-process(per-mm_struct) statistics. |
|---|
| 1660 | 2032 | */ |
|---|
| .. | .. |
|---|
| 1765 | 2137 | } |
|---|
| 1766 | 2138 | #endif |
|---|
| 1767 | 2139 | |
|---|
| 1768 | | -#ifndef __HAVE_ARCH_PTE_DEVMAP |
|---|
| 2140 | +#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL |
|---|
| 2141 | +static inline int pte_special(pte_t pte) |
|---|
| 2142 | +{ |
|---|
| 2143 | + return 0; |
|---|
| 2144 | +} |
|---|
| 2145 | + |
|---|
| 2146 | +static inline pte_t pte_mkspecial(pte_t pte) |
|---|
| 2147 | +{ |
|---|
| 2148 | + return pte; |
|---|
| 2149 | +} |
|---|
| 2150 | +#endif |
|---|
| 2151 | + |
|---|
| 2152 | +#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP |
|---|
| 1769 | 2153 | static inline int pte_devmap(pte_t pte) |
|---|
| 1770 | 2154 | { |
|---|
| 1771 | 2155 | return 0; |
|---|
| .. | .. |
|---|
| 1881 | 2265 | static inline void mm_dec_nr_ptes(struct mm_struct *mm) {} |
|---|
| 1882 | 2266 | #endif |
|---|
| 1883 | 2267 | |
|---|
| 1884 | | -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); |
|---|
| 1885 | | -int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); |
|---|
| 2268 | +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd); |
|---|
| 2269 | +int __pte_alloc_kernel(pmd_t *pmd); |
|---|
| 1886 | 2270 | |
|---|
| 1887 | | -/* |
|---|
| 1888 | | - * The following ifdef needed to get the 4level-fixup.h header to work. |
|---|
| 1889 | | - * Remove it when 4level-fixup.h has been removed. |
|---|
| 1890 | | - */ |
|---|
| 1891 | | -#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) |
|---|
| 2271 | +#if defined(CONFIG_MMU) |
|---|
| 1892 | 2272 | |
|---|
| 1893 | | -#ifndef __ARCH_HAS_5LEVEL_HACK |
|---|
| 1894 | 2273 | static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, |
|---|
| 1895 | 2274 | unsigned long address) |
|---|
| 1896 | 2275 | { |
|---|
| .. | .. |
|---|
| 1904 | 2283 | return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? |
|---|
| 1905 | 2284 | NULL : pud_offset(p4d, address); |
|---|
| 1906 | 2285 | } |
|---|
| 1907 | | -#endif /* !__ARCH_HAS_5LEVEL_HACK */ |
|---|
| 1908 | 2286 | |
|---|
| 1909 | 2287 | static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) |
|---|
| 1910 | 2288 | { |
|---|
| 1911 | 2289 | return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? |
|---|
| 1912 | 2290 | NULL: pmd_offset(pud, address); |
|---|
| 1913 | 2291 | } |
|---|
| 1914 | | -#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ |
|---|
| 2292 | +#endif /* CONFIG_MMU */ |
|---|
| 1915 | 2293 | |
|---|
| 1916 | 2294 | #if USE_SPLIT_PTE_PTLOCKS |
|---|
| 1917 | 2295 | #if ALLOC_SPLIT_PTLOCKS |
|---|
| .. | .. |
|---|
| 1964 | 2342 | return true; |
|---|
| 1965 | 2343 | } |
|---|
| 1966 | 2344 | |
|---|
| 1967 | | -/* Reset page->mapping so free_pages_check won't complain. */ |
|---|
| 1968 | | -static inline void pte_lock_deinit(struct page *page) |
|---|
| 1969 | | -{ |
|---|
| 1970 | | - page->mapping = NULL; |
|---|
| 1971 | | - ptlock_free(page); |
|---|
| 1972 | | -} |
|---|
| 1973 | | - |
|---|
| 1974 | 2345 | #else /* !USE_SPLIT_PTE_PTLOCKS */ |
|---|
| 1975 | 2346 | /* |
|---|
| 1976 | 2347 | * We use mm->page_table_lock to guard all pagetable pages of the mm. |
|---|
| .. | .. |
|---|
| 1981 | 2352 | } |
|---|
| 1982 | 2353 | static inline void ptlock_cache_init(void) {} |
|---|
| 1983 | 2354 | static inline bool ptlock_init(struct page *page) { return true; } |
|---|
| 1984 | | -static inline void pte_lock_deinit(struct page *page) {} |
|---|
| 2355 | +static inline void ptlock_free(struct page *page) {} |
|---|
| 1985 | 2356 | #endif /* USE_SPLIT_PTE_PTLOCKS */ |
|---|
| 1986 | 2357 | |
|---|
| 1987 | 2358 | static inline void pgtable_init(void) |
|---|
| .. | .. |
|---|
| 1990 | 2361 | pgtable_cache_init(); |
|---|
| 1991 | 2362 | } |
|---|
| 1992 | 2363 | |
|---|
| 1993 | | -static inline bool pgtable_page_ctor(struct page *page) |
|---|
| 2364 | +static inline bool pgtable_pte_page_ctor(struct page *page) |
|---|
| 1994 | 2365 | { |
|---|
| 1995 | 2366 | if (!ptlock_init(page)) |
|---|
| 1996 | 2367 | return false; |
|---|
| .. | .. |
|---|
| 1999 | 2370 | return true; |
|---|
| 2000 | 2371 | } |
|---|
| 2001 | 2372 | |
|---|
| 2002 | | -static inline void pgtable_page_dtor(struct page *page) |
|---|
| 2373 | +static inline void pgtable_pte_page_dtor(struct page *page) |
|---|
| 2003 | 2374 | { |
|---|
| 2004 | | - pte_lock_deinit(page); |
|---|
| 2375 | + ptlock_free(page); |
|---|
| 2005 | 2376 | __ClearPageTable(page); |
|---|
| 2006 | 2377 | dec_zone_page_state(page, NR_PAGETABLE); |
|---|
| 2007 | 2378 | } |
|---|
| .. | .. |
|---|
| 2020 | 2391 | pte_unmap(pte); \ |
|---|
| 2021 | 2392 | } while (0) |
|---|
| 2022 | 2393 | |
|---|
| 2023 | | -#define pte_alloc(mm, pmd, address) \ |
|---|
| 2024 | | - (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address)) |
|---|
| 2394 | +#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd)) |
|---|
| 2025 | 2395 | |
|---|
| 2026 | 2396 | #define pte_alloc_map(mm, pmd, address) \ |
|---|
| 2027 | | - (pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address)) |
|---|
| 2397 | + (pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address)) |
|---|
| 2028 | 2398 | |
|---|
| 2029 | 2399 | #define pte_alloc_map_lock(mm, pmd, address, ptlp) \ |
|---|
| 2030 | | - (pte_alloc(mm, pmd, address) ? \ |
|---|
| 2400 | + (pte_alloc(mm, pmd) ? \ |
|---|
| 2031 | 2401 | NULL : pte_offset_map_lock(mm, pmd, address, ptlp)) |
|---|
| 2032 | 2402 | |
|---|
| 2033 | 2403 | #define pte_alloc_kernel(pmd, address) \ |
|---|
| 2034 | | - ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ |
|---|
| 2404 | + ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \ |
|---|
| 2035 | 2405 | NULL: pte_offset_kernel(pmd, address)) |
|---|
| 2036 | 2406 | |
|---|
| 2037 | 2407 | #if USE_SPLIT_PMD_PTLOCKS |
|---|
| .. | .. |
|---|
| 2047 | 2417 | return ptlock_ptr(pmd_to_page(pmd)); |
|---|
| 2048 | 2418 | } |
|---|
| 2049 | 2419 | |
|---|
| 2050 | | -static inline bool pgtable_pmd_page_ctor(struct page *page) |
|---|
| 2420 | +static inline bool pmd_ptlock_init(struct page *page) |
|---|
| 2051 | 2421 | { |
|---|
| 2052 | 2422 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
|---|
| 2053 | 2423 | page->pmd_huge_pte = NULL; |
|---|
| .. | .. |
|---|
| 2055 | 2425 | return ptlock_init(page); |
|---|
| 2056 | 2426 | } |
|---|
| 2057 | 2427 | |
|---|
| 2058 | | -static inline void pgtable_pmd_page_dtor(struct page *page) |
|---|
| 2428 | +static inline void pmd_ptlock_free(struct page *page) |
|---|
| 2059 | 2429 | { |
|---|
| 2060 | 2430 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
|---|
| 2061 | 2431 | VM_BUG_ON_PAGE(page->pmd_huge_pte, page); |
|---|
| .. | .. |
|---|
| 2072 | 2442 | return &mm->page_table_lock; |
|---|
| 2073 | 2443 | } |
|---|
| 2074 | 2444 | |
|---|
| 2075 | | -static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } |
|---|
| 2076 | | -static inline void pgtable_pmd_page_dtor(struct page *page) {} |
|---|
| 2445 | +static inline bool pmd_ptlock_init(struct page *page) { return true; } |
|---|
| 2446 | +static inline void pmd_ptlock_free(struct page *page) {} |
|---|
| 2077 | 2447 | |
|---|
| 2078 | 2448 | #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) |
|---|
| 2079 | 2449 | |
|---|
| .. | .. |
|---|
| 2084 | 2454 | spinlock_t *ptl = pmd_lockptr(mm, pmd); |
|---|
| 2085 | 2455 | spin_lock(ptl); |
|---|
| 2086 | 2456 | return ptl; |
|---|
| 2457 | +} |
|---|
| 2458 | + |
|---|
| 2459 | +static inline bool pgtable_pmd_page_ctor(struct page *page) |
|---|
| 2460 | +{ |
|---|
| 2461 | + if (!pmd_ptlock_init(page)) |
|---|
| 2462 | + return false; |
|---|
| 2463 | + __SetPageTable(page); |
|---|
| 2464 | + inc_zone_page_state(page, NR_PAGETABLE); |
|---|
| 2465 | + return true; |
|---|
| 2466 | +} |
|---|
| 2467 | + |
|---|
| 2468 | +static inline void pgtable_pmd_page_dtor(struct page *page) |
|---|
| 2469 | +{ |
|---|
| 2470 | + pmd_ptlock_free(page); |
|---|
| 2471 | + __ClearPageTable(page); |
|---|
| 2472 | + dec_zone_page_state(page, NR_PAGETABLE); |
|---|
| 2087 | 2473 | } |
|---|
| 2088 | 2474 | |
|---|
| 2089 | 2475 | /* |
|---|
| .. | .. |
|---|
| 2106 | 2492 | } |
|---|
| 2107 | 2493 | |
|---|
| 2108 | 2494 | extern void __init pagecache_init(void); |
|---|
| 2109 | | -extern void free_area_init(unsigned long * zones_size); |
|---|
| 2110 | | -extern void __init free_area_init_node(int nid, unsigned long * zones_size, |
|---|
| 2111 | | - unsigned long zone_start_pfn, unsigned long *zholes_size); |
|---|
| 2495 | +extern void __init free_area_init_memoryless_node(int nid); |
|---|
| 2112 | 2496 | extern void free_initmem(void); |
|---|
| 2113 | 2497 | |
|---|
| 2114 | 2498 | /* |
|---|
| .. | .. |
|---|
| 2118 | 2502 | * Return pages freed into the buddy system. |
|---|
| 2119 | 2503 | */ |
|---|
| 2120 | 2504 | extern unsigned long free_reserved_area(void *start, void *end, |
|---|
| 2121 | | - int poison, char *s); |
|---|
| 2505 | + int poison, const char *s); |
|---|
| 2122 | 2506 | |
|---|
| 2123 | 2507 | #ifdef CONFIG_HIGHMEM |
|---|
| 2124 | 2508 | /* |
|---|
| .. | .. |
|---|
| 2178 | 2562 | return phys_pages; |
|---|
| 2179 | 2563 | } |
|---|
| 2180 | 2564 | |
|---|
| 2181 | | -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
|---|
| 2182 | 2565 | /* |
|---|
| 2183 | | - * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its |
|---|
| 2184 | | - * zones, allocate the backing mem_map and account for memory holes in a more |
|---|
| 2185 | | - * architecture independent manner. This is a substitute for creating the |
|---|
| 2186 | | - * zone_sizes[] and zholes_size[] arrays and passing them to |
|---|
| 2187 | | - * free_area_init_node() |
|---|
| 2566 | + * Using memblock node mappings, an architecture may initialise its |
|---|
| 2567 | + * zones, allocate the backing mem_map and account for memory holes in an |
|---|
| 2568 | + * architecture independent manner. |
|---|
| 2188 | 2569 | * |
|---|
| 2189 | 2570 | * An architecture is expected to register range of page frames backed by |
|---|
| 2190 | 2571 | * physical memory with memblock_add[_node]() before calling |
|---|
| 2191 | | - * free_area_init_nodes() passing in the PFN each zone ends at. At a basic |
|---|
| 2572 | + * free_area_init() passing in the PFN each zone ends at. At a basic |
|---|
| 2192 | 2573 | * usage, an architecture is expected to do something like |
|---|
| 2193 | 2574 | * |
|---|
| 2194 | 2575 | * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, |
|---|
| 2195 | 2576 | * max_highmem_pfn}; |
|---|
| 2196 | 2577 | * for_each_valid_physical_page_range() |
|---|
| 2197 | 2578 | * memblock_add_node(base, size, nid) |
|---|
| 2198 | | - * free_area_init_nodes(max_zone_pfns); |
|---|
| 2199 | | - * |
|---|
| 2200 | | - * free_bootmem_with_active_regions() calls free_bootmem_node() for each |
|---|
| 2201 | | - * registered physical page range. Similarly |
|---|
| 2202 | | - * sparse_memory_present_with_active_regions() calls memory_present() for |
|---|
| 2203 | | - * each range when SPARSEMEM is enabled. |
|---|
| 2204 | | - * |
|---|
| 2205 | | - * See mm/page_alloc.c for more information on each function exposed by |
|---|
| 2206 | | - * CONFIG_HAVE_MEMBLOCK_NODE_MAP. |
|---|
| 2579 | + * free_area_init(max_zone_pfns); |
|---|
| 2207 | 2580 | */ |
|---|
| 2208 | | -extern void free_area_init_nodes(unsigned long *max_zone_pfn); |
|---|
| 2581 | +void free_area_init(unsigned long *max_zone_pfn); |
|---|
| 2209 | 2582 | unsigned long node_map_pfn_alignment(void); |
|---|
| 2210 | 2583 | unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, |
|---|
| 2211 | 2584 | unsigned long end_pfn); |
|---|
| .. | .. |
|---|
| 2214 | 2587 | extern void get_pfn_range_for_nid(unsigned int nid, |
|---|
| 2215 | 2588 | unsigned long *start_pfn, unsigned long *end_pfn); |
|---|
| 2216 | 2589 | extern unsigned long find_min_pfn_with_active_regions(void); |
|---|
| 2217 | | -extern void free_bootmem_with_active_regions(int nid, |
|---|
| 2218 | | - unsigned long max_low_pfn); |
|---|
| 2219 | | -extern void sparse_memory_present_with_active_regions(int nid); |
|---|
| 2220 | 2590 | |
|---|
| 2221 | | -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
|---|
| 2222 | | - |
|---|
| 2223 | | -#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ |
|---|
| 2224 | | - !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) |
|---|
| 2225 | | -static inline int __early_pfn_to_nid(unsigned long pfn, |
|---|
| 2226 | | - struct mminit_pfnnid_cache *state) |
|---|
| 2591 | +#ifndef CONFIG_NEED_MULTIPLE_NODES |
|---|
| 2592 | +static inline int early_pfn_to_nid(unsigned long pfn) |
|---|
| 2227 | 2593 | { |
|---|
| 2228 | 2594 | return 0; |
|---|
| 2229 | 2595 | } |
|---|
| .. | .. |
|---|
| 2235 | 2601 | struct mminit_pfnnid_cache *state); |
|---|
| 2236 | 2602 | #endif |
|---|
| 2237 | 2603 | |
|---|
| 2238 | | -#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP) |
|---|
| 2239 | | -void zero_resv_unavail(void); |
|---|
| 2240 | | -#else |
|---|
| 2241 | | -static inline void zero_resv_unavail(void) {} |
|---|
| 2242 | | -#endif |
|---|
| 2243 | | - |
|---|
| 2244 | 2604 | extern void set_dma_reserve(unsigned long new_dma_reserve); |
|---|
| 2245 | | -extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, |
|---|
| 2246 | | - enum meminit_context, struct vmem_altmap *); |
|---|
| 2605 | +extern void memmap_init_zone(unsigned long, int, unsigned long, |
|---|
| 2606 | + unsigned long, unsigned long, enum meminit_context, |
|---|
| 2607 | + struct vmem_altmap *, int migratetype); |
|---|
| 2247 | 2608 | extern void setup_per_zone_wmarks(void); |
|---|
| 2248 | 2609 | extern int __meminit init_per_zone_wmark_min(void); |
|---|
| 2249 | 2610 | extern void mem_init(void); |
|---|
| .. | .. |
|---|
| 2261 | 2622 | |
|---|
| 2262 | 2623 | extern void setup_per_cpu_pageset(void); |
|---|
| 2263 | 2624 | |
|---|
| 2264 | | -extern void zone_pcp_update(struct zone *zone); |
|---|
| 2265 | | -extern void zone_pcp_reset(struct zone *zone); |
|---|
| 2266 | | - |
|---|
| 2267 | 2625 | /* page_alloc.c */ |
|---|
| 2268 | 2626 | extern int min_free_kbytes; |
|---|
| 2627 | +extern int watermark_boost_factor; |
|---|
| 2269 | 2628 | extern int watermark_scale_factor; |
|---|
| 2629 | +extern bool arch_has_descending_max_zone_pfns(void); |
|---|
| 2270 | 2630 | |
|---|
| 2271 | 2631 | /* nommu.c */ |
|---|
| 2272 | 2632 | extern atomic_long_t mmap_pages_allocated; |
|---|
| .. | .. |
|---|
| 2310 | 2670 | extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); |
|---|
| 2311 | 2671 | extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start, |
|---|
| 2312 | 2672 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, |
|---|
| 2313 | | - struct vm_area_struct *expand); |
|---|
| 2673 | + struct vm_area_struct *expand, bool keep_locked); |
|---|
| 2314 | 2674 | static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, |
|---|
| 2315 | 2675 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) |
|---|
| 2316 | 2676 | { |
|---|
| 2317 | | - return __vma_adjust(vma, start, end, pgoff, insert, NULL); |
|---|
| 2677 | + return __vma_adjust(vma, start, end, pgoff, insert, NULL, false); |
|---|
| 2318 | 2678 | } |
|---|
| 2319 | | -extern struct vm_area_struct *vma_merge(struct mm_struct *, |
|---|
| 2679 | + |
|---|
| 2680 | +extern struct vm_area_struct *__vma_merge(struct mm_struct *mm, |
|---|
| 2320 | 2681 | struct vm_area_struct *prev, unsigned long addr, unsigned long end, |
|---|
| 2321 | | - unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, |
|---|
| 2322 | | - struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *); |
|---|
| 2682 | + unsigned long vm_flags, struct anon_vma *anon, struct file *file, |
|---|
| 2683 | + pgoff_t pgoff, struct mempolicy *mpol, struct vm_userfaultfd_ctx uff, |
|---|
| 2684 | + const char __user *user, bool keep_locked); |
|---|
| 2685 | + |
|---|
| 2686 | +static inline struct vm_area_struct *vma_merge(struct mm_struct *mm, |
|---|
| 2687 | + struct vm_area_struct *prev, unsigned long addr, unsigned long end, |
|---|
| 2688 | + unsigned long vm_flags, struct anon_vma *anon, struct file *file, |
|---|
| 2689 | + pgoff_t off, struct mempolicy *pol, struct vm_userfaultfd_ctx uff, |
|---|
| 2690 | + const char __user *user) |
|---|
| 2691 | +{ |
|---|
| 2692 | + return __vma_merge(mm, prev, addr, end, vm_flags, anon, file, off, |
|---|
| 2693 | + pol, uff, user, false); |
|---|
| 2694 | +} |
|---|
| 2695 | + |
|---|
| 2323 | 2696 | extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); |
|---|
| 2324 | 2697 | extern int __split_vma(struct mm_struct *, struct vm_area_struct *, |
|---|
| 2325 | 2698 | unsigned long addr, int new_below); |
|---|
| .. | .. |
|---|
| 2369 | 2742 | unsigned long addr, unsigned long len, |
|---|
| 2370 | 2743 | unsigned long flags, struct page **pages); |
|---|
| 2371 | 2744 | |
|---|
| 2745 | +unsigned long randomize_stack_top(unsigned long stack_top); |
|---|
| 2746 | +unsigned long randomize_page(unsigned long start, unsigned long range); |
|---|
| 2747 | + |
|---|
| 2372 | 2748 | extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); |
|---|
| 2373 | 2749 | |
|---|
| 2374 | 2750 | extern unsigned long mmap_region(struct file *file, unsigned long addr, |
|---|
| .. | .. |
|---|
| 2376 | 2752 | struct list_head *uf); |
|---|
| 2377 | 2753 | extern unsigned long do_mmap(struct file *file, unsigned long addr, |
|---|
| 2378 | 2754 | unsigned long len, unsigned long prot, unsigned long flags, |
|---|
| 2379 | | - vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, |
|---|
| 2380 | | - struct list_head *uf); |
|---|
| 2755 | + unsigned long pgoff, unsigned long *populate, struct list_head *uf); |
|---|
| 2756 | +extern int __do_munmap(struct mm_struct *, unsigned long, size_t, |
|---|
| 2757 | + struct list_head *uf, bool downgrade); |
|---|
| 2381 | 2758 | extern int do_munmap(struct mm_struct *, unsigned long, size_t, |
|---|
| 2382 | 2759 | struct list_head *uf); |
|---|
| 2383 | | - |
|---|
| 2384 | | -static inline unsigned long |
|---|
| 2385 | | -do_mmap_pgoff(struct file *file, unsigned long addr, |
|---|
| 2386 | | - unsigned long len, unsigned long prot, unsigned long flags, |
|---|
| 2387 | | - unsigned long pgoff, unsigned long *populate, |
|---|
| 2388 | | - struct list_head *uf) |
|---|
| 2389 | | -{ |
|---|
| 2390 | | - return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf); |
|---|
| 2391 | | -} |
|---|
| 2760 | +extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); |
|---|
| 2392 | 2761 | |
|---|
| 2393 | 2762 | #ifdef CONFIG_MMU |
|---|
| 2394 | 2763 | extern int __mm_populate(unsigned long addr, unsigned long len, |
|---|
| .. | .. |
|---|
| 2420 | 2789 | unsigned long align_offset; |
|---|
| 2421 | 2790 | }; |
|---|
| 2422 | 2791 | |
|---|
| 2423 | | -extern unsigned long unmapped_area(struct vm_unmapped_area_info *info); |
|---|
| 2424 | | -extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info); |
|---|
| 2425 | | - |
|---|
| 2426 | | -/* |
|---|
| 2427 | | - * Search for an unmapped address range. |
|---|
| 2428 | | - * |
|---|
| 2429 | | - * We are looking for a range that: |
|---|
| 2430 | | - * - does not intersect with any VMA; |
|---|
| 2431 | | - * - is contained within the [low_limit, high_limit) interval; |
|---|
| 2432 | | - * - is at least the desired size. |
|---|
| 2433 | | - * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) |
|---|
| 2434 | | - */ |
|---|
| 2435 | | -static inline unsigned long |
|---|
| 2436 | | -vm_unmapped_area(struct vm_unmapped_area_info *info) |
|---|
| 2437 | | -{ |
|---|
| 2438 | | - if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) |
|---|
| 2439 | | - return unmapped_area_topdown(info); |
|---|
| 2440 | | - else |
|---|
| 2441 | | - return unmapped_area(info); |
|---|
| 2442 | | -} |
|---|
| 2792 | +extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info); |
|---|
| 2443 | 2793 | |
|---|
| 2444 | 2794 | /* truncate.c */ |
|---|
| 2445 | 2795 | extern void truncate_inode_pages(struct address_space *, loff_t); |
|---|
| .. | .. |
|---|
| 2449 | 2799 | |
|---|
| 2450 | 2800 | /* generic vm_area_ops exported for stackable file systems */ |
|---|
| 2451 | 2801 | extern vm_fault_t filemap_fault(struct vm_fault *vmf); |
|---|
| 2452 | | -extern void filemap_map_pages(struct vm_fault *vmf, |
|---|
| 2802 | +extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, |
|---|
| 2453 | 2803 | pgoff_t start_pgoff, pgoff_t end_pgoff); |
|---|
| 2454 | 2804 | extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); |
|---|
| 2805 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
|---|
| 2806 | +extern bool filemap_allow_speculation(void); |
|---|
| 2807 | +#endif |
|---|
| 2455 | 2808 | |
|---|
| 2456 | 2809 | /* mm/page-writeback.c */ |
|---|
| 2457 | 2810 | int __must_check write_one_page(struct page *page); |
|---|
| 2458 | 2811 | void task_dirty_inc(struct task_struct *tsk); |
|---|
| 2459 | 2812 | |
|---|
| 2460 | | -/* readahead.c */ |
|---|
| 2461 | | -#define VM_MAX_READAHEAD 128 /* kbytes */ |
|---|
| 2462 | | -#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ |
|---|
| 2463 | | - |
|---|
| 2464 | | -int force_page_cache_readahead(struct address_space *mapping, struct file *filp, |
|---|
| 2465 | | - pgoff_t offset, unsigned long nr_to_read); |
|---|
| 2466 | | - |
|---|
| 2467 | | -void page_cache_sync_readahead(struct address_space *mapping, |
|---|
| 2468 | | - struct file_ra_state *ra, |
|---|
| 2469 | | - struct file *filp, |
|---|
| 2470 | | - pgoff_t offset, |
|---|
| 2471 | | - unsigned long size); |
|---|
| 2472 | | - |
|---|
| 2473 | | -void page_cache_async_readahead(struct address_space *mapping, |
|---|
| 2474 | | - struct file_ra_state *ra, |
|---|
| 2475 | | - struct file *filp, |
|---|
| 2476 | | - struct page *pg, |
|---|
| 2477 | | - pgoff_t offset, |
|---|
| 2478 | | - unsigned long size); |
|---|
| 2479 | | - |
|---|
| 2480 | 2813 | extern unsigned long stack_guard_gap; |
|---|
| 2481 | 2814 | /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ |
|---|
| 2482 | 2815 | extern int expand_stack(struct vm_area_struct *vma, unsigned long address); |
|---|
| 2483 | 2816 | |
|---|
| 2484 | | -/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */ |
|---|
| 2817 | +/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ |
|---|
| 2485 | 2818 | extern int expand_downwards(struct vm_area_struct *vma, |
|---|
| 2486 | 2819 | unsigned long address); |
|---|
| 2487 | 2820 | #if VM_GROWSUP |
|---|
| .. | .. |
|---|
| 2576 | 2909 | int remap_pfn_range(struct vm_area_struct *, unsigned long addr, |
|---|
| 2577 | 2910 | unsigned long pfn, unsigned long size, pgprot_t); |
|---|
| 2578 | 2911 | int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); |
|---|
| 2579 | | -int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2912 | +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2913 | + struct page **pages, unsigned long *num); |
|---|
| 2914 | +int vm_map_pages(struct vm_area_struct *vma, struct page **pages, |
|---|
| 2915 | + unsigned long num); |
|---|
| 2916 | +int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, |
|---|
| 2917 | + unsigned long num); |
|---|
| 2918 | +vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2580 | 2919 | unsigned long pfn); |
|---|
| 2581 | | -int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2920 | +vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2582 | 2921 | unsigned long pfn, pgprot_t pgprot); |
|---|
| 2583 | | -int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2922 | +vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2584 | 2923 | pfn_t pfn); |
|---|
| 2924 | +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, |
|---|
| 2925 | + pfn_t pfn, pgprot_t pgprot); |
|---|
| 2585 | 2926 | vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, |
|---|
| 2586 | 2927 | unsigned long addr, pfn_t pfn); |
|---|
| 2587 | 2928 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); |
|---|
| .. | .. |
|---|
| 2590 | 2931 | unsigned long addr, struct page *page) |
|---|
| 2591 | 2932 | { |
|---|
| 2592 | 2933 | int err = vm_insert_page(vma, addr, page); |
|---|
| 2593 | | - |
|---|
| 2594 | | - if (err == -ENOMEM) |
|---|
| 2595 | | - return VM_FAULT_OOM; |
|---|
| 2596 | | - if (err < 0 && err != -EBUSY) |
|---|
| 2597 | | - return VM_FAULT_SIGBUS; |
|---|
| 2598 | | - |
|---|
| 2599 | | - return VM_FAULT_NOPAGE; |
|---|
| 2600 | | -} |
|---|
| 2601 | | - |
|---|
| 2602 | | -static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, |
|---|
| 2603 | | - unsigned long addr, pfn_t pfn) |
|---|
| 2604 | | -{ |
|---|
| 2605 | | - int err = vm_insert_mixed(vma, addr, pfn); |
|---|
| 2606 | | - |
|---|
| 2607 | | - if (err == -ENOMEM) |
|---|
| 2608 | | - return VM_FAULT_OOM; |
|---|
| 2609 | | - if (err < 0 && err != -EBUSY) |
|---|
| 2610 | | - return VM_FAULT_SIGBUS; |
|---|
| 2611 | | - |
|---|
| 2612 | | - return VM_FAULT_NOPAGE; |
|---|
| 2613 | | -} |
|---|
| 2614 | | - |
|---|
| 2615 | | -static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, |
|---|
| 2616 | | - unsigned long addr, unsigned long pfn) |
|---|
| 2617 | | -{ |
|---|
| 2618 | | - int err = vm_insert_pfn(vma, addr, pfn); |
|---|
| 2619 | 2934 | |
|---|
| 2620 | 2935 | if (err == -ENOMEM) |
|---|
| 2621 | 2936 | return VM_FAULT_OOM; |
|---|
| .. | .. |
|---|
| 2641 | 2956 | return VM_FAULT_SIGBUS; |
|---|
| 2642 | 2957 | } |
|---|
| 2643 | 2958 | |
|---|
| 2644 | | -struct page *follow_page_mask(struct vm_area_struct *vma, |
|---|
| 2645 | | - unsigned long address, unsigned int foll_flags, |
|---|
| 2646 | | - unsigned int *page_mask); |
|---|
| 2647 | | - |
|---|
| 2648 | | -static inline struct page *follow_page(struct vm_area_struct *vma, |
|---|
| 2649 | | - unsigned long address, unsigned int foll_flags) |
|---|
| 2650 | | -{ |
|---|
| 2651 | | - unsigned int unused_page_mask; |
|---|
| 2652 | | - return follow_page_mask(vma, address, foll_flags, &unused_page_mask); |
|---|
| 2653 | | -} |
|---|
| 2959 | +struct page *follow_page(struct vm_area_struct *vma, unsigned long address, |
|---|
| 2960 | + unsigned int foll_flags); |
|---|
| 2654 | 2961 | |
|---|
| 2655 | 2962 | #define FOLL_WRITE 0x01 /* check pte is writable */ |
|---|
| 2656 | 2963 | #define FOLL_TOUCH 0x02 /* mark page accessed */ |
|---|
| .. | .. |
|---|
| 2669 | 2976 | #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ |
|---|
| 2670 | 2977 | #define FOLL_COW 0x4000 /* internal GUP flag */ |
|---|
| 2671 | 2978 | #define FOLL_ANON 0x8000 /* don't do file mappings */ |
|---|
| 2979 | +#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ |
|---|
| 2980 | +#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ |
|---|
| 2981 | +#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ |
|---|
| 2982 | +#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ |
|---|
| 2983 | + |
|---|
| 2984 | +/* |
|---|
| 2985 | + * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each |
|---|
| 2986 | + * other. Here is what they mean, and how to use them: |
|---|
| 2987 | + * |
|---|
| 2988 | + * FOLL_LONGTERM indicates that the page will be held for an indefinite time |
|---|
| 2989 | + * period _often_ under userspace control. This is in contrast to |
|---|
| 2990 | + * iov_iter_get_pages(), whose usages are transient. |
|---|
| 2991 | + * |
|---|
| 2992 | + * FIXME: For pages which are part of a filesystem, mappings are subject to the |
|---|
| 2993 | + * lifetime enforced by the filesystem and we need guarantees that longterm |
|---|
| 2994 | + * users like RDMA and V4L2 only establish mappings which coordinate usage with |
|---|
| 2995 | + * the filesystem. Ideas for this coordination include revoking the longterm |
|---|
| 2996 | + * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was |
|---|
| 2997 | + * added after the problem with filesystems was found FS DAX VMAs are |
|---|
| 2998 | + * specifically failed. Filesystem pages are still subject to bugs and use of |
|---|
| 2999 | + * FOLL_LONGTERM should be avoided on those pages. |
|---|
| 3000 | + * |
|---|
| 3001 | + * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call. |
|---|
| 3002 | + * Currently only get_user_pages() and get_user_pages_fast() support this flag |
|---|
| 3003 | + * and calls to get_user_pages_[un]locked are specifically not allowed. This |
|---|
| 3004 | + * is due to an incompatibility with the FS DAX check and |
|---|
| 3005 | + * FAULT_FLAG_ALLOW_RETRY. |
|---|
| 3006 | + * |
|---|
| 3007 | + * In the CMA case: long term pins in a CMA region would unnecessarily fragment |
|---|
| 3008 | + * that region. And so, CMA attempts to migrate the page before pinning, when |
|---|
| 3009 | + * FOLL_LONGTERM is specified. |
|---|
| 3010 | + * |
|---|
| 3011 | + * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, |
|---|
| 3012 | + * but an additional pin counting system) will be invoked. This is intended for |
|---|
| 3013 | + * anything that gets a page reference and then touches page data (for example, |
|---|
| 3014 | + * Direct IO). This lets the filesystem know that some non-file-system entity is |
|---|
| 3015 | + * potentially changing the pages' data. In contrast to FOLL_GET (whose pages |
|---|
| 3016 | + * are released via put_page()), FOLL_PIN pages must be released, ultimately, by |
|---|
| 3017 | + * a call to unpin_user_page(). |
|---|
| 3018 | + * |
|---|
| 3019 | + * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different |
|---|
| 3020 | + * and separate refcounting mechanisms, however, and that means that each has |
|---|
| 3021 | + * its own acquire and release mechanisms: |
|---|
| 3022 | + * |
|---|
| 3023 | + * FOLL_GET: get_user_pages*() to acquire, and put_page() to release. |
|---|
| 3024 | + * |
|---|
| 3025 | + * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. |
|---|
| 3026 | + * |
|---|
| 3027 | + * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. |
|---|
| 3028 | + * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based |
|---|
| 3029 | + * calls applied to them, and that's perfectly OK. This is a constraint on the |
|---|
| 3030 | + * callers, not on the pages.) |
|---|
| 3031 | + * |
|---|
| 3032 | + * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never |
|---|
| 3033 | + * directly by the caller. That's in order to help avoid mismatches when |
|---|
| 3034 | + * releasing pages: get_user_pages*() pages must be released via put_page(), |
|---|
| 3035 | + * while pin_user_pages*() pages must be released via unpin_user_page(). |
|---|
| 3036 | + * |
|---|
| 3037 | + * Please see Documentation/core-api/pin_user_pages.rst for more information. |
|---|
| 3038 | + */ |
|---|
| 2672 | 3039 | |
|---|
| 2673 | 3040 | static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) |
|---|
| 2674 | 3041 | { |
|---|
| .. | .. |
|---|
| 2681 | 3048 | return 0; |
|---|
| 2682 | 3049 | } |
|---|
| 2683 | 3050 | |
|---|
| 2684 | | -typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, |
|---|
| 2685 | | - void *data); |
|---|
| 3051 | +typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); |
|---|
| 2686 | 3052 | extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, |
|---|
| 2687 | 3053 | unsigned long size, pte_fn_t fn, void *data); |
|---|
| 3054 | +extern int apply_to_existing_page_range(struct mm_struct *mm, |
|---|
| 3055 | + unsigned long address, unsigned long size, |
|---|
| 3056 | + pte_fn_t fn, void *data); |
|---|
| 2688 | 3057 | |
|---|
| 2689 | | - |
|---|
| 3058 | +extern void init_mem_debugging_and_hardening(void); |
|---|
| 2690 | 3059 | #ifdef CONFIG_PAGE_POISONING |
|---|
| 2691 | | -extern bool page_poisoning_enabled(void); |
|---|
| 2692 | | -extern void kernel_poison_pages(struct page *page, int numpages, int enable); |
|---|
| 3060 | +extern void __kernel_poison_pages(struct page *page, int numpages); |
|---|
| 3061 | +extern void __kernel_unpoison_pages(struct page *page, int numpages); |
|---|
| 3062 | +extern bool _page_poisoning_enabled_early; |
|---|
| 3063 | +DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled); |
|---|
| 3064 | +static inline bool page_poisoning_enabled(void) |
|---|
| 3065 | +{ |
|---|
| 3066 | + return _page_poisoning_enabled_early; |
|---|
| 3067 | +} |
|---|
| 3068 | +/* |
|---|
| 3069 | + * For use in fast paths after init_mem_debugging() has run, or when a |
|---|
| 3070 | + * false negative result is not harmful when called too early. |
|---|
| 3071 | + */ |
|---|
| 3072 | +static inline bool page_poisoning_enabled_static(void) |
|---|
| 3073 | +{ |
|---|
| 3074 | + return static_branch_unlikely(&_page_poisoning_enabled); |
|---|
| 3075 | +} |
|---|
| 3076 | +static inline void kernel_poison_pages(struct page *page, int numpages) |
|---|
| 3077 | +{ |
|---|
| 3078 | + if (page_poisoning_enabled_static()) |
|---|
| 3079 | + __kernel_poison_pages(page, numpages); |
|---|
| 3080 | +} |
|---|
| 3081 | +static inline void kernel_unpoison_pages(struct page *page, int numpages) |
|---|
| 3082 | +{ |
|---|
| 3083 | + if (page_poisoning_enabled_static()) |
|---|
| 3084 | + __kernel_unpoison_pages(page, numpages); |
|---|
| 3085 | +} |
|---|
| 2693 | 3086 | #else |
|---|
| 2694 | 3087 | static inline bool page_poisoning_enabled(void) { return false; } |
|---|
| 2695 | | -static inline void kernel_poison_pages(struct page *page, int numpages, |
|---|
| 2696 | | - int enable) { } |
|---|
| 3088 | +static inline bool page_poisoning_enabled_static(void) { return false; } |
|---|
| 3089 | +static inline void __kernel_poison_pages(struct page *page, int nunmpages) { } |
|---|
| 3090 | +static inline void kernel_poison_pages(struct page *page, int numpages) { } |
|---|
| 3091 | +static inline void kernel_unpoison_pages(struct page *page, int numpages) { } |
|---|
| 2697 | 3092 | #endif |
|---|
| 2698 | 3093 | |
|---|
| 2699 | | -#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON |
|---|
| 2700 | | -DECLARE_STATIC_KEY_TRUE(init_on_alloc); |
|---|
| 2701 | | -#else |
|---|
| 2702 | 3094 | DECLARE_STATIC_KEY_FALSE(init_on_alloc); |
|---|
| 2703 | | -#endif |
|---|
| 2704 | 3095 | static inline bool want_init_on_alloc(gfp_t flags) |
|---|
| 2705 | 3096 | { |
|---|
| 2706 | | - if (static_branch_unlikely(&init_on_alloc) && |
|---|
| 2707 | | - !page_poisoning_enabled()) |
|---|
| 3097 | + if (static_branch_unlikely(&init_on_alloc)) |
|---|
| 2708 | 3098 | return true; |
|---|
| 2709 | 3099 | return flags & __GFP_ZERO; |
|---|
| 2710 | 3100 | } |
|---|
| 2711 | 3101 | |
|---|
| 2712 | | -#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON |
|---|
| 2713 | | -DECLARE_STATIC_KEY_TRUE(init_on_free); |
|---|
| 2714 | | -#else |
|---|
| 2715 | 3102 | DECLARE_STATIC_KEY_FALSE(init_on_free); |
|---|
| 2716 | | -#endif |
|---|
| 2717 | 3103 | static inline bool want_init_on_free(void) |
|---|
| 2718 | 3104 | { |
|---|
| 2719 | | - return static_branch_unlikely(&init_on_free) && |
|---|
| 2720 | | - !page_poisoning_enabled(); |
|---|
| 3105 | + return static_branch_unlikely(&init_on_free); |
|---|
| 2721 | 3106 | } |
|---|
| 2722 | 3107 | |
|---|
| 2723 | | -#ifdef CONFIG_DEBUG_PAGEALLOC |
|---|
| 2724 | | -extern bool _debug_pagealloc_enabled; |
|---|
| 2725 | | -extern void __kernel_map_pages(struct page *page, int numpages, int enable); |
|---|
| 3108 | +extern bool _debug_pagealloc_enabled_early; |
|---|
| 3109 | +DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); |
|---|
| 2726 | 3110 | |
|---|
| 2727 | 3111 | static inline bool debug_pagealloc_enabled(void) |
|---|
| 2728 | 3112 | { |
|---|
| 2729 | | - return _debug_pagealloc_enabled; |
|---|
| 3113 | + return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && |
|---|
| 3114 | + _debug_pagealloc_enabled_early; |
|---|
| 2730 | 3115 | } |
|---|
| 2731 | 3116 | |
|---|
| 3117 | +/* |
|---|
| 3118 | + * For use in fast paths after init_debug_pagealloc() has run, or when a |
|---|
| 3119 | + * false negative result is not harmful when called too early. |
|---|
| 3120 | + */ |
|---|
| 3121 | +static inline bool debug_pagealloc_enabled_static(void) |
|---|
| 3122 | +{ |
|---|
| 3123 | + if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) |
|---|
| 3124 | + return false; |
|---|
| 3125 | + |
|---|
| 3126 | + return static_branch_unlikely(&_debug_pagealloc_enabled); |
|---|
| 3127 | +} |
|---|
| 3128 | + |
|---|
| 3129 | +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) |
|---|
| 3130 | +extern void __kernel_map_pages(struct page *page, int numpages, int enable); |
|---|
| 3131 | + |
|---|
| 3132 | +/* |
|---|
| 3133 | + * When called in DEBUG_PAGEALLOC context, the call should most likely be |
|---|
| 3134 | + * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static() |
|---|
| 3135 | + */ |
|---|
| 2732 | 3136 | static inline void |
|---|
| 2733 | 3137 | kernel_map_pages(struct page *page, int numpages, int enable) |
|---|
| 2734 | 3138 | { |
|---|
| 2735 | | - if (!debug_pagealloc_enabled()) |
|---|
| 2736 | | - return; |
|---|
| 2737 | | - |
|---|
| 2738 | 3139 | __kernel_map_pages(page, numpages, enable); |
|---|
| 2739 | 3140 | } |
|---|
| 3141 | + |
|---|
| 3142 | +static inline void debug_pagealloc_map_pages(struct page *page, int numpages) |
|---|
| 3143 | +{ |
|---|
| 3144 | + if (debug_pagealloc_enabled_static()) |
|---|
| 3145 | + __kernel_map_pages(page, numpages, 1); |
|---|
| 3146 | +} |
|---|
| 3147 | + |
|---|
| 3148 | +static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) |
|---|
| 3149 | +{ |
|---|
| 3150 | + if (debug_pagealloc_enabled_static()) |
|---|
| 3151 | + __kernel_map_pages(page, numpages, 0); |
|---|
| 3152 | +} |
|---|
| 3153 | + |
|---|
| 2740 | 3154 | #ifdef CONFIG_HIBERNATION |
|---|
| 2741 | 3155 | extern bool kernel_page_present(struct page *page); |
|---|
| 2742 | 3156 | #endif /* CONFIG_HIBERNATION */ |
|---|
| 2743 | | -#else /* CONFIG_DEBUG_PAGEALLOC */ |
|---|
| 3157 | +#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ |
|---|
| 2744 | 3158 | static inline void |
|---|
| 2745 | 3159 | kernel_map_pages(struct page *page, int numpages, int enable) {} |
|---|
| 3160 | +static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} |
|---|
| 3161 | +static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} |
|---|
| 2746 | 3162 | #ifdef CONFIG_HIBERNATION |
|---|
| 2747 | 3163 | static inline bool kernel_page_present(struct page *page) { return true; } |
|---|
| 2748 | 3164 | #endif /* CONFIG_HIBERNATION */ |
|---|
| 2749 | | -static inline bool debug_pagealloc_enabled(void) |
|---|
| 2750 | | -{ |
|---|
| 2751 | | - return false; |
|---|
| 2752 | | -} |
|---|
| 2753 | | -#endif /* CONFIG_DEBUG_PAGEALLOC */ |
|---|
| 3165 | +#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ |
|---|
| 2754 | 3166 | |
|---|
| 2755 | 3167 | #ifdef __HAVE_ARCH_GATE_AREA |
|---|
| 2756 | 3168 | extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); |
|---|
| .. | .. |
|---|
| 2772 | 3184 | |
|---|
| 2773 | 3185 | #ifdef CONFIG_SYSCTL |
|---|
| 2774 | 3186 | extern int sysctl_drop_caches; |
|---|
| 2775 | | -int drop_caches_sysctl_handler(struct ctl_table *, int, |
|---|
| 2776 | | - void __user *, size_t *, loff_t *); |
|---|
| 3187 | +int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *, |
|---|
| 3188 | + loff_t *); |
|---|
| 2777 | 3189 | #endif |
|---|
| 2778 | 3190 | |
|---|
| 2779 | 3191 | void drop_slab(void); |
|---|
| .. | .. |
|---|
| 2786 | 3198 | #endif |
|---|
| 2787 | 3199 | |
|---|
| 2788 | 3200 | const char * arch_vma_name(struct vm_area_struct *vma); |
|---|
| 3201 | +#ifdef CONFIG_MMU |
|---|
| 2789 | 3202 | void print_vma_addr(char *prefix, unsigned long rip); |
|---|
| 3203 | +#else |
|---|
| 3204 | +static inline void print_vma_addr(char *prefix, unsigned long rip) |
|---|
| 3205 | +{ |
|---|
| 3206 | +} |
|---|
| 3207 | +#endif |
|---|
| 2790 | 3208 | |
|---|
| 2791 | 3209 | void *sparse_buffer_alloc(unsigned long size); |
|---|
| 2792 | | -struct page *sparse_mem_map_populate(unsigned long pnum, int nid, |
|---|
| 2793 | | - struct vmem_altmap *altmap); |
|---|
| 3210 | +struct page * __populate_section_memmap(unsigned long pfn, |
|---|
| 3211 | + unsigned long nr_pages, int nid, struct vmem_altmap *altmap); |
|---|
| 2794 | 3212 | pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); |
|---|
| 2795 | 3213 | p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); |
|---|
| 2796 | 3214 | pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); |
|---|
| 2797 | 3215 | pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); |
|---|
| 2798 | | -pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); |
|---|
| 3216 | +pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, |
|---|
| 3217 | + struct vmem_altmap *altmap); |
|---|
| 2799 | 3218 | void *vmemmap_alloc_block(unsigned long size, int node); |
|---|
| 2800 | 3219 | struct vmem_altmap; |
|---|
| 2801 | | -void *vmemmap_alloc_block_buf(unsigned long size, int node); |
|---|
| 2802 | | -void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap); |
|---|
| 3220 | +void *vmemmap_alloc_block_buf(unsigned long size, int node, |
|---|
| 3221 | + struct vmem_altmap *altmap); |
|---|
| 2803 | 3222 | void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); |
|---|
| 2804 | 3223 | int vmemmap_populate_basepages(unsigned long start, unsigned long end, |
|---|
| 2805 | | - int node); |
|---|
| 3224 | + int node, struct vmem_altmap *altmap); |
|---|
| 2806 | 3225 | int vmemmap_populate(unsigned long start, unsigned long end, int node, |
|---|
| 2807 | 3226 | struct vmem_altmap *altmap); |
|---|
| 2808 | 3227 | void vmemmap_populate_print_last(void); |
|---|
| .. | .. |
|---|
| 2821 | 3240 | }; |
|---|
| 2822 | 3241 | extern int memory_failure(unsigned long pfn, int flags); |
|---|
| 2823 | 3242 | extern void memory_failure_queue(unsigned long pfn, int flags); |
|---|
| 3243 | +extern void memory_failure_queue_kick(int cpu); |
|---|
| 2824 | 3244 | extern int unpoison_memory(unsigned long pfn); |
|---|
| 2825 | | -extern int get_hwpoison_page(struct page *page); |
|---|
| 2826 | | -#define put_hwpoison_page(page) put_page(page) |
|---|
| 2827 | 3245 | extern int sysctl_memory_failure_early_kill; |
|---|
| 2828 | 3246 | extern int sysctl_memory_failure_recovery; |
|---|
| 2829 | 3247 | extern void shake_page(struct page *p, int access); |
|---|
| 2830 | 3248 | extern atomic_long_t num_poisoned_pages __read_mostly; |
|---|
| 2831 | | -extern int soft_offline_page(struct page *page, int flags); |
|---|
| 3249 | +extern int soft_offline_page(unsigned long pfn, int flags); |
|---|
| 2832 | 3250 | |
|---|
| 2833 | 3251 | |
|---|
| 2834 | 3252 | /* |
|---|
| .. | .. |
|---|
| 2863 | 3281 | MF_MSG_BUDDY, |
|---|
| 2864 | 3282 | MF_MSG_BUDDY_2ND, |
|---|
| 2865 | 3283 | MF_MSG_DAX, |
|---|
| 3284 | + MF_MSG_UNSPLIT_THP, |
|---|
| 2866 | 3285 | MF_MSG_UNKNOWN, |
|---|
| 2867 | 3286 | }; |
|---|
| 2868 | 3287 | |
|---|
| .. | .. |
|---|
| 2878 | 3297 | const void __user *usr_src, |
|---|
| 2879 | 3298 | unsigned int pages_per_huge_page, |
|---|
| 2880 | 3299 | bool allow_pagefault); |
|---|
| 2881 | | -#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ |
|---|
| 2882 | 3300 | |
|---|
| 2883 | | -extern struct page_ext_operations debug_guardpage_ops; |
|---|
| 3301 | +/** |
|---|
| 3302 | + * vma_is_special_huge - Are transhuge page-table entries considered special? |
|---|
| 3303 | + * @vma: Pointer to the struct vm_area_struct to consider |
|---|
| 3304 | + * |
|---|
| 3305 | + * Whether transhuge page-table entries are considered "special" following |
|---|
| 3306 | + * the definition in vm_normal_page(). |
|---|
| 3307 | + * |
|---|
| 3308 | + * Return: true if transhuge page-table entries should be considered special, |
|---|
| 3309 | + * false otherwise. |
|---|
| 3310 | + */ |
|---|
| 3311 | +static inline bool vma_is_special_huge(const struct vm_area_struct *vma) |
|---|
| 3312 | +{ |
|---|
| 3313 | + return vma_is_dax(vma) || (vma->vm_file && |
|---|
| 3314 | + (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); |
|---|
| 3315 | +} |
|---|
| 3316 | + |
|---|
| 3317 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ |
|---|
| 2884 | 3318 | |
|---|
| 2885 | 3319 | #ifdef CONFIG_DEBUG_PAGEALLOC |
|---|
| 2886 | 3320 | extern unsigned int _debug_guardpage_minorder; |
|---|
| 2887 | | -extern bool _debug_guardpage_enabled; |
|---|
| 3321 | +DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled); |
|---|
| 2888 | 3322 | |
|---|
| 2889 | 3323 | static inline unsigned int debug_guardpage_minorder(void) |
|---|
| 2890 | 3324 | { |
|---|
| .. | .. |
|---|
| 2893 | 3327 | |
|---|
| 2894 | 3328 | static inline bool debug_guardpage_enabled(void) |
|---|
| 2895 | 3329 | { |
|---|
| 2896 | | - return _debug_guardpage_enabled; |
|---|
| 3330 | + return static_branch_unlikely(&_debug_guardpage_enabled); |
|---|
| 2897 | 3331 | } |
|---|
| 2898 | 3332 | |
|---|
| 2899 | 3333 | static inline bool page_is_guard(struct page *page) |
|---|
| 2900 | 3334 | { |
|---|
| 2901 | | - struct page_ext *page_ext; |
|---|
| 2902 | | - |
|---|
| 2903 | 3335 | if (!debug_guardpage_enabled()) |
|---|
| 2904 | 3336 | return false; |
|---|
| 2905 | 3337 | |
|---|
| 2906 | | - page_ext = lookup_page_ext(page); |
|---|
| 2907 | | - if (unlikely(!page_ext)) |
|---|
| 2908 | | - return false; |
|---|
| 2909 | | - |
|---|
| 2910 | | - return test_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); |
|---|
| 3338 | + return PageGuard(page); |
|---|
| 2911 | 3339 | } |
|---|
| 2912 | 3340 | #else |
|---|
| 2913 | 3341 | static inline unsigned int debug_guardpage_minorder(void) { return 0; } |
|---|
| .. | .. |
|---|
| 2921 | 3349 | static inline void setup_nr_node_ids(void) {} |
|---|
| 2922 | 3350 | #endif |
|---|
| 2923 | 3351 | |
|---|
| 3352 | +extern int memcmp_pages(struct page *page1, struct page *page2); |
|---|
| 3353 | + |
|---|
| 3354 | +static inline int pages_identical(struct page *page1, struct page *page2) |
|---|
| 3355 | +{ |
|---|
| 3356 | + return !memcmp_pages(page1, page2); |
|---|
| 3357 | +} |
|---|
| 3358 | + |
|---|
| 3359 | +#ifdef CONFIG_MAPPING_DIRTY_HELPERS |
|---|
| 3360 | +unsigned long clean_record_shared_mapping_range(struct address_space *mapping, |
|---|
| 3361 | + pgoff_t first_index, pgoff_t nr, |
|---|
| 3362 | + pgoff_t bitmap_pgoff, |
|---|
| 3363 | + unsigned long *bitmap, |
|---|
| 3364 | + pgoff_t *start, |
|---|
| 3365 | + pgoff_t *end); |
|---|
| 3366 | + |
|---|
| 3367 | +unsigned long wp_shared_mapping_range(struct address_space *mapping, |
|---|
| 3368 | + pgoff_t first_index, pgoff_t nr); |
|---|
| 3369 | +#endif |
|---|
| 3370 | + |
|---|
| 3371 | +extern int sysctl_nr_trim_pages; |
|---|
| 3372 | +extern bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr); |
|---|
| 3373 | +extern int reclaim_shmem_address_space(struct address_space *mapping); |
|---|
| 3374 | + |
|---|
| 3375 | +/** |
|---|
| 3376 | + * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it |
|---|
| 3377 | + * @seals: the seals to check |
|---|
| 3378 | + * @vma: the vma to operate on |
|---|
| 3379 | + * |
|---|
| 3380 | + * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on |
|---|
| 3381 | + * the vma flags. Return 0 if check pass, or <0 for errors. |
|---|
| 3382 | + */ |
|---|
| 3383 | +static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) |
|---|
| 3384 | +{ |
|---|
| 3385 | + if (seals & F_SEAL_FUTURE_WRITE) { |
|---|
| 3386 | + /* |
|---|
| 3387 | + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when |
|---|
| 3388 | + * "future write" seal active. |
|---|
| 3389 | + */ |
|---|
| 3390 | + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) |
|---|
| 3391 | + return -EPERM; |
|---|
| 3392 | + |
|---|
| 3393 | + /* |
|---|
| 3394 | + * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as |
|---|
| 3395 | + * MAP_SHARED and read-only, take care to not allow mprotect to |
|---|
| 3396 | + * revert protections on such mappings. Do this only for shared |
|---|
| 3397 | + * mappings. For private mappings, don't need to mask |
|---|
| 3398 | + * VM_MAYWRITE as we still want them to be COW-writable. |
|---|
| 3399 | + */ |
|---|
| 3400 | + if (vma->vm_flags & VM_SHARED) |
|---|
| 3401 | + vma->vm_flags &= ~(VM_MAYWRITE); |
|---|
| 3402 | + } |
|---|
| 3403 | + |
|---|
| 3404 | + return 0; |
|---|
| 3405 | +} |
|---|
| 3406 | + |
|---|
| 2924 | 3407 | #endif /* __KERNEL__ */ |
|---|
| 2925 | 3408 | #endif /* _LINUX_MM_H */ |
|---|