| .. | .. |
|---|
| 21 | 21 | * Lock ordering in mm: |
|---|
| 22 | 22 | * |
|---|
| 23 | 23 | * inode->i_mutex (while writing or truncating, not reading or faulting) |
|---|
| 24 | | - * mm->mmap_sem |
|---|
| 25 | | - * page->flags PG_locked (lock_page) |
|---|
| 24 | + * mm->mmap_lock |
|---|
| 25 | + * page->flags PG_locked (lock_page) * (see huegtlbfs below) |
|---|
| 26 | 26 | * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share) |
|---|
| 27 | 27 | * mapping->i_mmap_rwsem |
|---|
| 28 | + * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) |
|---|
| 28 | 29 | * anon_vma->rwsem |
|---|
| 29 | 30 | * mm->page_table_lock or pte_lock |
|---|
| 30 | | - * zone_lru_lock (in mark_page_accessed, isolate_lru_page) |
|---|
| 31 | + * pgdat->lru_lock (in mark_page_accessed, isolate_lru_page) |
|---|
| 31 | 32 | * swap_lock (in swap_duplicate, swap_info_get) |
|---|
| 32 | 33 | * mmlist_lock (in mmput, drain_mmlist and others) |
|---|
| 33 | 34 | * mapping->private_lock (in __set_page_dirty_buffers) |
|---|
| .. | .. |
|---|
| 43 | 44 | * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) |
|---|
| 44 | 45 | * ->tasklist_lock |
|---|
| 45 | 46 | * pte map lock |
|---|
| 47 | + * |
|---|
| 48 | + * * hugetlbfs PageHuge() pages take locks in this order: |
|---|
| 49 | + * mapping->i_mmap_rwsem |
|---|
| 50 | + * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) |
|---|
| 51 | + * page->flags PG_locked (lock_page) |
|---|
| 46 | 52 | */ |
|---|
| 47 | 53 | |
|---|
| 48 | 54 | #include <linux/mm.h> |
|---|
| .. | .. |
|---|
| 61 | 67 | #include <linux/mmu_notifier.h> |
|---|
| 62 | 68 | #include <linux/migrate.h> |
|---|
| 63 | 69 | #include <linux/hugetlb.h> |
|---|
| 70 | +#include <linux/huge_mm.h> |
|---|
| 64 | 71 | #include <linux/backing-dev.h> |
|---|
| 65 | 72 | #include <linux/page_idle.h> |
|---|
| 66 | 73 | #include <linux/memremap.h> |
|---|
| .. | .. |
|---|
| 69 | 76 | #include <asm/tlbflush.h> |
|---|
| 70 | 77 | |
|---|
| 71 | 78 | #include <trace/events/tlb.h> |
|---|
| 79 | + |
|---|
| 80 | +#include <trace/hooks/mm.h> |
|---|
| 72 | 81 | |
|---|
| 73 | 82 | #include "internal.h" |
|---|
| 74 | 83 | |
|---|
| .. | .. |
|---|
| 170 | 179 | * to do any locking for the common case of already having |
|---|
| 171 | 180 | * an anon_vma. |
|---|
| 172 | 181 | * |
|---|
| 173 | | - * This must be called with the mmap_sem held for reading. |
|---|
| 182 | + * This must be called with the mmap_lock held for reading. |
|---|
| 174 | 183 | */ |
|---|
| 175 | 184 | int __anon_vma_prepare(struct vm_area_struct *vma) |
|---|
| 176 | 185 | { |
|---|
| .. | .. |
|---|
| 250 | 259 | * Attach the anon_vmas from src to dst. |
|---|
| 251 | 260 | * Returns 0 on success, -ENOMEM on failure. |
|---|
| 252 | 261 | * |
|---|
| 253 | | - * If dst->anon_vma is NULL this function tries to find and reuse existing |
|---|
| 254 | | - * anon_vma which has no vmas and only one child anon_vma. This prevents |
|---|
| 255 | | - * degradation of anon_vma hierarchy to endless linear chain in case of |
|---|
| 256 | | - * constantly forking task. On the other hand, an anon_vma with more than one |
|---|
| 257 | | - * child isn't reused even if there was no alive vma, thus rmap walker has a |
|---|
| 258 | | - * good chance of avoiding scanning the whole hierarchy when it searches where |
|---|
| 259 | | - * page is mapped. |
|---|
| 262 | + * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and |
|---|
| 263 | + * anon_vma_fork(). The first three want an exact copy of src, while the last |
|---|
| 264 | + * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent |
|---|
| 265 | + * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call, |
|---|
| 266 | + * we can identify this case by checking (!dst->anon_vma && src->anon_vma). |
|---|
| 267 | + * |
|---|
| 268 | + * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find |
|---|
| 269 | + * and reuse existing anon_vma which has no vmas and only one child anon_vma. |
|---|
| 270 | + * This prevents degradation of anon_vma hierarchy to endless linear chain in |
|---|
| 271 | + * case of constantly forking task. On the other hand, an anon_vma with more |
|---|
| 272 | + * than one child isn't reused even if there was no alive vma, thus rmap |
|---|
| 273 | + * walker has a good chance of avoiding scanning the whole hierarchy when it |
|---|
| 274 | + * searches where page is mapped. |
|---|
| 260 | 275 | */ |
|---|
| 261 | 276 | int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) |
|---|
| 262 | 277 | { |
|---|
| .. | .. |
|---|
| 286 | 301 | * will always reuse it. Root anon_vma is never reused: |
|---|
| 287 | 302 | * it has self-parent reference and at least one child. |
|---|
| 288 | 303 | */ |
|---|
| 289 | | - if (!dst->anon_vma && anon_vma != src->anon_vma && |
|---|
| 290 | | - anon_vma->degree < 2) |
|---|
| 304 | + if (!dst->anon_vma && src->anon_vma && |
|---|
| 305 | + anon_vma != src->anon_vma && anon_vma->degree < 2) |
|---|
| 291 | 306 | dst->anon_vma = anon_vma; |
|---|
| 292 | 307 | } |
|---|
| 293 | 308 | if (dst->anon_vma) |
|---|
| .. | .. |
|---|
| 457 | 472 | * chain and verify that the page in question is indeed mapped in it |
|---|
| 458 | 473 | * [ something equivalent to page_mapped_in_vma() ]. |
|---|
| 459 | 474 | * |
|---|
| 460 | | - * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap() |
|---|
| 461 | | - * that the anon_vma pointer from page->mapping is valid if there is a |
|---|
| 462 | | - * mapcount, we can dereference the anon_vma after observing those. |
|---|
| 475 | + * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from |
|---|
| 476 | + * page_remove_rmap() that the anon_vma pointer from page->mapping is valid |
|---|
| 477 | + * if there is a mapcount, we can dereference the anon_vma after observing |
|---|
| 478 | + * those. |
|---|
| 463 | 479 | */ |
|---|
| 464 | 480 | struct anon_vma *page_get_anon_vma(struct page *page) |
|---|
| 465 | 481 | { |
|---|
| .. | .. |
|---|
| 502 | 518 | * |
|---|
| 503 | 519 | * Its a little more complex as it tries to keep the fast path to a single |
|---|
| 504 | 520 | * atomic op -- the trylock. If we fail the trylock, we fall back to getting a |
|---|
| 505 | | - * reference like with page_get_anon_vma() and then block on the mutex. |
|---|
| 521 | + * reference like with page_get_anon_vma() and then block on the mutex |
|---|
| 522 | + * on !rwc->try_lock case. |
|---|
| 506 | 523 | */ |
|---|
| 507 | | -struct anon_vma *page_lock_anon_vma_read(struct page *page) |
|---|
| 524 | +struct anon_vma *page_lock_anon_vma_read(struct page *page, |
|---|
| 525 | + struct rmap_walk_control *rwc) |
|---|
| 508 | 526 | { |
|---|
| 509 | 527 | struct anon_vma *anon_vma = NULL; |
|---|
| 510 | 528 | struct anon_vma *root_anon_vma; |
|---|
| 511 | 529 | unsigned long anon_mapping; |
|---|
| 530 | + bool success = false; |
|---|
| 512 | 531 | |
|---|
| 513 | 532 | rcu_read_lock(); |
|---|
| 514 | 533 | anon_mapping = (unsigned long)READ_ONCE(page->mapping); |
|---|
| .. | .. |
|---|
| 529 | 548 | up_read(&root_anon_vma->rwsem); |
|---|
| 530 | 549 | anon_vma = NULL; |
|---|
| 531 | 550 | } |
|---|
| 551 | + goto out; |
|---|
| 552 | + } |
|---|
| 553 | + trace_android_vh_do_page_trylock(page, NULL, NULL, &success); |
|---|
| 554 | + if (success) { |
|---|
| 555 | + anon_vma = NULL; |
|---|
| 556 | + goto out; |
|---|
| 557 | + } |
|---|
| 558 | + |
|---|
| 559 | + if (rwc && rwc->try_lock) { |
|---|
| 560 | + anon_vma = NULL; |
|---|
| 561 | + rwc->contended = true; |
|---|
| 532 | 562 | goto out; |
|---|
| 533 | 563 | } |
|---|
| 534 | 564 | |
|---|
| .. | .. |
|---|
| 658 | 688 | */ |
|---|
| 659 | 689 | void flush_tlb_batched_pending(struct mm_struct *mm) |
|---|
| 660 | 690 | { |
|---|
| 661 | | - if (mm->tlb_flush_batched) { |
|---|
| 691 | + if (data_race(mm->tlb_flush_batched)) { |
|---|
| 662 | 692 | flush_tlb_mm(mm); |
|---|
| 663 | 693 | |
|---|
| 664 | 694 | /* |
|---|
| .. | .. |
|---|
| 768 | 798 | } |
|---|
| 769 | 799 | |
|---|
| 770 | 800 | if (pvmw.pte) { |
|---|
| 801 | + trace_android_vh_look_around(&pvmw, page, vma, &referenced); |
|---|
| 771 | 802 | if (ptep_clear_flush_young_notify(vma, address, |
|---|
| 772 | 803 | pvmw.pte)) { |
|---|
| 773 | 804 | /* |
|---|
| .. | .. |
|---|
| 803 | 834 | pra->vm_flags |= vma->vm_flags; |
|---|
| 804 | 835 | } |
|---|
| 805 | 836 | |
|---|
| 837 | + trace_android_vh_page_referenced_one_end(vma, page, referenced); |
|---|
| 806 | 838 | if (!pra->mapcount) |
|---|
| 807 | 839 | return false; /* To break the loop */ |
|---|
| 808 | 840 | |
|---|
| .. | .. |
|---|
| 827 | 859 | * @memcg: target memory cgroup |
|---|
| 828 | 860 | * @vm_flags: collect encountered vma->vm_flags who actually referenced the page |
|---|
| 829 | 861 | * |
|---|
| 830 | | - * Quick test_and_clear_referenced for all mappings to a page, |
|---|
| 831 | | - * returns the number of ptes which referenced the page. |
|---|
| 862 | + * Quick test_and_clear_referenced for all mappings of a page, |
|---|
| 863 | + * |
|---|
| 864 | + * Return: The number of mappings which referenced the page. Return -1 if |
|---|
| 865 | + * the function bailed out due to rmap lock contention. |
|---|
| 832 | 866 | */ |
|---|
| 833 | 867 | int page_referenced(struct page *page, |
|---|
| 834 | 868 | int is_locked, |
|---|
| .. | .. |
|---|
| 844 | 878 | .rmap_one = page_referenced_one, |
|---|
| 845 | 879 | .arg = (void *)&pra, |
|---|
| 846 | 880 | .anon_lock = page_lock_anon_vma_read, |
|---|
| 881 | + .try_lock = true, |
|---|
| 847 | 882 | }; |
|---|
| 848 | 883 | |
|---|
| 849 | 884 | *vm_flags = 0; |
|---|
| 850 | | - if (!page_mapped(page)) |
|---|
| 885 | + if (!pra.mapcount) |
|---|
| 851 | 886 | return 0; |
|---|
| 852 | 887 | |
|---|
| 853 | 888 | if (!page_rmapping(page)) |
|---|
| .. | .. |
|---|
| 874 | 909 | if (we_locked) |
|---|
| 875 | 910 | unlock_page(page); |
|---|
| 876 | 911 | |
|---|
| 877 | | - return pra.referenced; |
|---|
| 912 | + return rwc.contended ? -1 : pra.referenced; |
|---|
| 878 | 913 | } |
|---|
| 879 | 914 | |
|---|
| 880 | 915 | static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, |
|---|
| .. | .. |
|---|
| 886 | 921 | .address = address, |
|---|
| 887 | 922 | .flags = PVMW_SYNC, |
|---|
| 888 | 923 | }; |
|---|
| 889 | | - unsigned long start = address, end; |
|---|
| 924 | + struct mmu_notifier_range range; |
|---|
| 890 | 925 | int *cleaned = arg; |
|---|
| 891 | 926 | |
|---|
| 892 | 927 | /* |
|---|
| 893 | 928 | * We have to assume the worse case ie pmd for invalidation. Note that |
|---|
| 894 | 929 | * the page can not be free from this function. |
|---|
| 895 | 930 | */ |
|---|
| 896 | | - end = vma_address_end(page, vma); |
|---|
| 897 | | - mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); |
|---|
| 931 | + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, |
|---|
| 932 | + 0, vma, vma->vm_mm, address, |
|---|
| 933 | + vma_address_end(page, vma)); |
|---|
| 934 | + mmu_notifier_invalidate_range_start(&range); |
|---|
| 898 | 935 | |
|---|
| 899 | 936 | while (page_vma_mapped_walk(&pvmw)) { |
|---|
| 900 | | - unsigned long cstart; |
|---|
| 901 | 937 | int ret = 0; |
|---|
| 902 | 938 | |
|---|
| 903 | | - cstart = address = pvmw.address; |
|---|
| 939 | + address = pvmw.address; |
|---|
| 904 | 940 | if (pvmw.pte) { |
|---|
| 905 | 941 | pte_t entry; |
|---|
| 906 | 942 | pte_t *pte = pvmw.pte; |
|---|
| .. | .. |
|---|
| 915 | 951 | set_pte_at(vma->vm_mm, address, pte, entry); |
|---|
| 916 | 952 | ret = 1; |
|---|
| 917 | 953 | } else { |
|---|
| 918 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
|---|
| 954 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
|---|
| 919 | 955 | pmd_t *pmd = pvmw.pmd; |
|---|
| 920 | 956 | pmd_t entry; |
|---|
| 921 | 957 | |
|---|
| .. | .. |
|---|
| 927 | 963 | entry = pmd_wrprotect(entry); |
|---|
| 928 | 964 | entry = pmd_mkclean(entry); |
|---|
| 929 | 965 | set_pmd_at(vma->vm_mm, address, pmd, entry); |
|---|
| 930 | | - cstart &= PMD_MASK; |
|---|
| 931 | 966 | ret = 1; |
|---|
| 932 | 967 | #else |
|---|
| 933 | 968 | /* unexpected pmd-mapped page? */ |
|---|
| .. | .. |
|---|
| 946 | 981 | (*cleaned)++; |
|---|
| 947 | 982 | } |
|---|
| 948 | 983 | |
|---|
| 949 | | - mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); |
|---|
| 984 | + mmu_notifier_invalidate_range_end(&range); |
|---|
| 950 | 985 | |
|---|
| 951 | 986 | return true; |
|---|
| 952 | 987 | } |
|---|
| .. | .. |
|---|
| 1014 | 1049 | |
|---|
| 1015 | 1050 | /** |
|---|
| 1016 | 1051 | * __page_set_anon_rmap - set up new anonymous rmap |
|---|
| 1017 | | - * @page: Page to add to rmap |
|---|
| 1052 | + * @page: Page or Hugepage to add to rmap |
|---|
| 1018 | 1053 | * @vma: VM area to add page to. |
|---|
| 1019 | 1054 | * @address: User virtual address of the mapping |
|---|
| 1020 | 1055 | * @exclusive: the page is exclusively owned by the current process |
|---|
| .. | .. |
|---|
| 1051 | 1086 | static void __page_check_anon_rmap(struct page *page, |
|---|
| 1052 | 1087 | struct vm_area_struct *vma, unsigned long address) |
|---|
| 1053 | 1088 | { |
|---|
| 1054 | | -#ifdef CONFIG_DEBUG_VM |
|---|
| 1055 | 1089 | /* |
|---|
| 1056 | 1090 | * The page's anon-rmap details (mapping and index) are guaranteed to |
|---|
| 1057 | 1091 | * be set up correctly at this point. |
|---|
| .. | .. |
|---|
| 1064 | 1098 | * are initially only visible via the pagetables, and the pte is locked |
|---|
| 1065 | 1099 | * over the call to page_add_new_anon_rmap. |
|---|
| 1066 | 1100 | */ |
|---|
| 1067 | | - BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); |
|---|
| 1068 | | - BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address)); |
|---|
| 1069 | | -#endif |
|---|
| 1101 | + VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page); |
|---|
| 1102 | + VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address), |
|---|
| 1103 | + page); |
|---|
| 1070 | 1104 | } |
|---|
| 1071 | 1105 | |
|---|
| 1072 | 1106 | /** |
|---|
| .. | .. |
|---|
| 1097 | 1131 | { |
|---|
| 1098 | 1132 | bool compound = flags & RMAP_COMPOUND; |
|---|
| 1099 | 1133 | bool first; |
|---|
| 1134 | + bool success = false; |
|---|
| 1135 | + |
|---|
| 1136 | + if (unlikely(PageKsm(page))) |
|---|
| 1137 | + lock_page_memcg(page); |
|---|
| 1138 | + else |
|---|
| 1139 | + VM_BUG_ON_PAGE(!PageLocked(page), page); |
|---|
| 1100 | 1140 | |
|---|
| 1101 | 1141 | if (compound) { |
|---|
| 1102 | 1142 | atomic_t *mapcount; |
|---|
| .. | .. |
|---|
| 1105 | 1145 | mapcount = compound_mapcount_ptr(page); |
|---|
| 1106 | 1146 | first = atomic_inc_and_test(mapcount); |
|---|
| 1107 | 1147 | } else { |
|---|
| 1108 | | - first = atomic_inc_and_test(&page->_mapcount); |
|---|
| 1148 | + trace_android_vh_update_page_mapcount(page, true, compound, |
|---|
| 1149 | + &first, &success); |
|---|
| 1150 | + if (!success) |
|---|
| 1151 | + first = atomic_inc_and_test(&page->_mapcount); |
|---|
| 1109 | 1152 | } |
|---|
| 1110 | 1153 | |
|---|
| 1111 | 1154 | if (first) { |
|---|
| 1112 | | - int nr = compound ? hpage_nr_pages(page) : 1; |
|---|
| 1155 | + int nr = compound ? thp_nr_pages(page) : 1; |
|---|
| 1113 | 1156 | /* |
|---|
| 1114 | 1157 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
|---|
| 1115 | 1158 | * these counters are not modified in interrupt context, and |
|---|
| .. | .. |
|---|
| 1117 | 1160 | * disabled. |
|---|
| 1118 | 1161 | */ |
|---|
| 1119 | 1162 | if (compound) |
|---|
| 1120 | | - __inc_node_page_state(page, NR_ANON_THPS); |
|---|
| 1121 | | - __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr); |
|---|
| 1163 | + __inc_lruvec_page_state(page, NR_ANON_THPS); |
|---|
| 1164 | + __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); |
|---|
| 1122 | 1165 | } |
|---|
| 1123 | | - if (unlikely(PageKsm(page))) |
|---|
| 1124 | | - return; |
|---|
| 1125 | 1166 | |
|---|
| 1126 | | - VM_BUG_ON_PAGE(!PageLocked(page), page); |
|---|
| 1167 | + if (unlikely(PageKsm(page))) { |
|---|
| 1168 | + unlock_page_memcg(page); |
|---|
| 1169 | + return; |
|---|
| 1170 | + } |
|---|
| 1127 | 1171 | |
|---|
| 1128 | 1172 | /* address might be in next vma when migration races vma_adjust */ |
|---|
| 1129 | 1173 | if (first) |
|---|
| .. | .. |
|---|
| 1134 | 1178 | } |
|---|
| 1135 | 1179 | |
|---|
| 1136 | 1180 | /** |
|---|
| 1137 | | - * page_add_new_anon_rmap - add pte mapping to a new anonymous page |
|---|
| 1181 | + * __page_add_new_anon_rmap - add pte mapping to a new anonymous page |
|---|
| 1138 | 1182 | * @page: the page to add the mapping to |
|---|
| 1139 | 1183 | * @vma: the vm area in which the mapping is added |
|---|
| 1140 | 1184 | * @address: the user virtual address mapped |
|---|
| .. | .. |
|---|
| 1144 | 1188 | * This means the inc-and-test can be bypassed. |
|---|
| 1145 | 1189 | * Page does not have to be locked. |
|---|
| 1146 | 1190 | */ |
|---|
| 1147 | | -void page_add_new_anon_rmap(struct page *page, |
|---|
| 1191 | +void __page_add_new_anon_rmap(struct page *page, |
|---|
| 1148 | 1192 | struct vm_area_struct *vma, unsigned long address, bool compound) |
|---|
| 1149 | 1193 | { |
|---|
| 1150 | | - int nr = compound ? hpage_nr_pages(page) : 1; |
|---|
| 1194 | + int nr = compound ? thp_nr_pages(page) : 1; |
|---|
| 1151 | 1195 | |
|---|
| 1152 | | - VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); |
|---|
| 1153 | 1196 | __SetPageSwapBacked(page); |
|---|
| 1154 | 1197 | if (compound) { |
|---|
| 1155 | 1198 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
|---|
| 1156 | 1199 | /* increment count (starts at -1) */ |
|---|
| 1157 | 1200 | atomic_set(compound_mapcount_ptr(page), 0); |
|---|
| 1158 | | - __inc_node_page_state(page, NR_ANON_THPS); |
|---|
| 1201 | + if (hpage_pincount_available(page)) |
|---|
| 1202 | + atomic_set(compound_pincount_ptr(page), 0); |
|---|
| 1203 | + |
|---|
| 1204 | + __inc_lruvec_page_state(page, NR_ANON_THPS); |
|---|
| 1159 | 1205 | } else { |
|---|
| 1160 | 1206 | /* Anon THP always mapped first with PMD */ |
|---|
| 1161 | 1207 | VM_BUG_ON_PAGE(PageTransCompound(page), page); |
|---|
| 1162 | 1208 | /* increment count (starts at -1) */ |
|---|
| 1163 | 1209 | atomic_set(&page->_mapcount, 0); |
|---|
| 1164 | 1210 | } |
|---|
| 1165 | | - __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr); |
|---|
| 1211 | + __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); |
|---|
| 1166 | 1212 | __page_set_anon_rmap(page, vma, address, 1); |
|---|
| 1167 | 1213 | } |
|---|
| 1168 | 1214 | |
|---|
| .. | .. |
|---|
| 1176 | 1222 | void page_add_file_rmap(struct page *page, bool compound) |
|---|
| 1177 | 1223 | { |
|---|
| 1178 | 1224 | int i, nr = 1; |
|---|
| 1225 | + bool first_mapping; |
|---|
| 1226 | + bool success = false; |
|---|
| 1179 | 1227 | |
|---|
| 1180 | 1228 | VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); |
|---|
| 1181 | 1229 | lock_page_memcg(page); |
|---|
| 1182 | 1230 | if (compound && PageTransHuge(page)) { |
|---|
| 1183 | | - for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) { |
|---|
| 1184 | | - if (atomic_inc_and_test(&page[i]._mapcount)) |
|---|
| 1185 | | - nr++; |
|---|
| 1231 | + for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { |
|---|
| 1232 | + trace_android_vh_update_page_mapcount(&page[i], true, |
|---|
| 1233 | + compound, &first_mapping, &success); |
|---|
| 1234 | + if ((success)) { |
|---|
| 1235 | + if (first_mapping) |
|---|
| 1236 | + nr++; |
|---|
| 1237 | + } else { |
|---|
| 1238 | + if (atomic_inc_and_test(&page[i]._mapcount)) |
|---|
| 1239 | + nr++; |
|---|
| 1240 | + } |
|---|
| 1186 | 1241 | } |
|---|
| 1187 | 1242 | if (!atomic_inc_and_test(compound_mapcount_ptr(page))) |
|---|
| 1188 | 1243 | goto out; |
|---|
| 1189 | | - VM_BUG_ON_PAGE(!PageSwapBacked(page), page); |
|---|
| 1190 | | - __inc_node_page_state(page, NR_SHMEM_PMDMAPPED); |
|---|
| 1244 | + if (PageSwapBacked(page)) |
|---|
| 1245 | + __inc_node_page_state(page, NR_SHMEM_PMDMAPPED); |
|---|
| 1246 | + else |
|---|
| 1247 | + __inc_node_page_state(page, NR_FILE_PMDMAPPED); |
|---|
| 1191 | 1248 | } else { |
|---|
| 1192 | 1249 | if (PageTransCompound(page) && page_mapping(page)) { |
|---|
| 1193 | 1250 | VM_WARN_ON_ONCE(!PageLocked(page)); |
|---|
| .. | .. |
|---|
| 1196 | 1253 | if (PageMlocked(page)) |
|---|
| 1197 | 1254 | clear_page_mlock(compound_head(page)); |
|---|
| 1198 | 1255 | } |
|---|
| 1199 | | - if (!atomic_inc_and_test(&page->_mapcount)) |
|---|
| 1200 | | - goto out; |
|---|
| 1256 | + trace_android_vh_update_page_mapcount(page, true, |
|---|
| 1257 | + compound, &first_mapping, &success); |
|---|
| 1258 | + if (success) { |
|---|
| 1259 | + if (!first_mapping) |
|---|
| 1260 | + goto out; |
|---|
| 1261 | + } else { |
|---|
| 1262 | + if (!atomic_inc_and_test(&page->_mapcount)) |
|---|
| 1263 | + goto out; |
|---|
| 1264 | + } |
|---|
| 1201 | 1265 | } |
|---|
| 1202 | 1266 | __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr); |
|---|
| 1203 | 1267 | out: |
|---|
| .. | .. |
|---|
| 1207 | 1271 | static void page_remove_file_rmap(struct page *page, bool compound) |
|---|
| 1208 | 1272 | { |
|---|
| 1209 | 1273 | int i, nr = 1; |
|---|
| 1274 | + bool first_mapping; |
|---|
| 1275 | + bool success = false; |
|---|
| 1210 | 1276 | |
|---|
| 1211 | 1277 | VM_BUG_ON_PAGE(compound && !PageHead(page), page); |
|---|
| 1212 | | - lock_page_memcg(page); |
|---|
| 1213 | 1278 | |
|---|
| 1214 | 1279 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ |
|---|
| 1215 | 1280 | if (unlikely(PageHuge(page))) { |
|---|
| 1216 | 1281 | /* hugetlb pages are always mapped with pmds */ |
|---|
| 1217 | 1282 | atomic_dec(compound_mapcount_ptr(page)); |
|---|
| 1218 | | - goto out; |
|---|
| 1283 | + return; |
|---|
| 1219 | 1284 | } |
|---|
| 1220 | 1285 | |
|---|
| 1221 | 1286 | /* page still mapped by someone else? */ |
|---|
| 1222 | 1287 | if (compound && PageTransHuge(page)) { |
|---|
| 1223 | | - for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) { |
|---|
| 1224 | | - if (atomic_add_negative(-1, &page[i]._mapcount)) |
|---|
| 1225 | | - nr++; |
|---|
| 1288 | + for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { |
|---|
| 1289 | + trace_android_vh_update_page_mapcount(&page[i], false, |
|---|
| 1290 | + compound, &first_mapping, &success); |
|---|
| 1291 | + if (success) { |
|---|
| 1292 | + if (first_mapping) |
|---|
| 1293 | + nr++; |
|---|
| 1294 | + } else { |
|---|
| 1295 | + if (atomic_add_negative(-1, &page[i]._mapcount)) |
|---|
| 1296 | + nr++; |
|---|
| 1297 | + } |
|---|
| 1226 | 1298 | } |
|---|
| 1227 | 1299 | if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) |
|---|
| 1228 | | - goto out; |
|---|
| 1229 | | - VM_BUG_ON_PAGE(!PageSwapBacked(page), page); |
|---|
| 1230 | | - __dec_node_page_state(page, NR_SHMEM_PMDMAPPED); |
|---|
| 1300 | + return; |
|---|
| 1301 | + if (PageSwapBacked(page)) |
|---|
| 1302 | + __dec_node_page_state(page, NR_SHMEM_PMDMAPPED); |
|---|
| 1303 | + else |
|---|
| 1304 | + __dec_node_page_state(page, NR_FILE_PMDMAPPED); |
|---|
| 1231 | 1305 | } else { |
|---|
| 1232 | | - if (!atomic_add_negative(-1, &page->_mapcount)) |
|---|
| 1233 | | - goto out; |
|---|
| 1306 | + trace_android_vh_update_page_mapcount(page, false, |
|---|
| 1307 | + compound, &first_mapping, &success); |
|---|
| 1308 | + if (success) { |
|---|
| 1309 | + if (!first_mapping) |
|---|
| 1310 | + return; |
|---|
| 1311 | + } else { |
|---|
| 1312 | + if (!atomic_add_negative(-1, &page->_mapcount)) |
|---|
| 1313 | + return; |
|---|
| 1314 | + } |
|---|
| 1234 | 1315 | } |
|---|
| 1235 | 1316 | |
|---|
| 1236 | 1317 | /* |
|---|
| .. | .. |
|---|
| 1242 | 1323 | |
|---|
| 1243 | 1324 | if (unlikely(PageMlocked(page))) |
|---|
| 1244 | 1325 | clear_page_mlock(page); |
|---|
| 1245 | | -out: |
|---|
| 1246 | | - unlock_page_memcg(page); |
|---|
| 1247 | 1326 | } |
|---|
| 1248 | 1327 | |
|---|
| 1249 | 1328 | static void page_remove_anon_compound_rmap(struct page *page) |
|---|
| 1250 | 1329 | { |
|---|
| 1251 | 1330 | int i, nr; |
|---|
| 1331 | + bool first_mapping; |
|---|
| 1332 | + bool success = false; |
|---|
| 1252 | 1333 | |
|---|
| 1253 | 1334 | if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) |
|---|
| 1254 | 1335 | return; |
|---|
| .. | .. |
|---|
| 1260 | 1341 | if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) |
|---|
| 1261 | 1342 | return; |
|---|
| 1262 | 1343 | |
|---|
| 1263 | | - __dec_node_page_state(page, NR_ANON_THPS); |
|---|
| 1344 | + __dec_lruvec_page_state(page, NR_ANON_THPS); |
|---|
| 1264 | 1345 | |
|---|
| 1265 | 1346 | if (TestClearPageDoubleMap(page)) { |
|---|
| 1266 | 1347 | /* |
|---|
| 1267 | 1348 | * Subpages can be mapped with PTEs too. Check how many of |
|---|
| 1268 | | - * themi are still mapped. |
|---|
| 1349 | + * them are still mapped. |
|---|
| 1269 | 1350 | */ |
|---|
| 1270 | | - for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) { |
|---|
| 1271 | | - if (atomic_add_negative(-1, &page[i]._mapcount)) |
|---|
| 1272 | | - nr++; |
|---|
| 1351 | + for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { |
|---|
| 1352 | + trace_android_vh_update_page_mapcount(&page[i], false, |
|---|
| 1353 | + false, &first_mapping, &success); |
|---|
| 1354 | + if (success) { |
|---|
| 1355 | + if (first_mapping) |
|---|
| 1356 | + nr++; |
|---|
| 1357 | + } else { |
|---|
| 1358 | + if (atomic_add_negative(-1, &page[i]._mapcount)) |
|---|
| 1359 | + nr++; |
|---|
| 1360 | + } |
|---|
| 1273 | 1361 | } |
|---|
| 1362 | + |
|---|
| 1363 | + /* |
|---|
| 1364 | + * Queue the page for deferred split if at least one small |
|---|
| 1365 | + * page of the compound page is unmapped, but at least one |
|---|
| 1366 | + * small page is still mapped. |
|---|
| 1367 | + */ |
|---|
| 1368 | + if (nr && nr < thp_nr_pages(page)) |
|---|
| 1369 | + deferred_split_huge_page(page); |
|---|
| 1274 | 1370 | } else { |
|---|
| 1275 | | - nr = HPAGE_PMD_NR; |
|---|
| 1371 | + nr = thp_nr_pages(page); |
|---|
| 1276 | 1372 | } |
|---|
| 1277 | 1373 | |
|---|
| 1278 | 1374 | if (unlikely(PageMlocked(page))) |
|---|
| 1279 | 1375 | clear_page_mlock(page); |
|---|
| 1280 | 1376 | |
|---|
| 1281 | | - if (nr) { |
|---|
| 1282 | | - __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr); |
|---|
| 1283 | | - deferred_split_huge_page(page); |
|---|
| 1284 | | - } |
|---|
| 1377 | + if (nr) |
|---|
| 1378 | + __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr); |
|---|
| 1285 | 1379 | } |
|---|
| 1286 | 1380 | |
|---|
| 1287 | 1381 | /** |
|---|
| .. | .. |
|---|
| 1293 | 1387 | */ |
|---|
| 1294 | 1388 | void page_remove_rmap(struct page *page, bool compound) |
|---|
| 1295 | 1389 | { |
|---|
| 1296 | | - if (!PageAnon(page)) |
|---|
| 1297 | | - return page_remove_file_rmap(page, compound); |
|---|
| 1390 | + bool first_mapping; |
|---|
| 1391 | + bool success = false; |
|---|
| 1392 | + lock_page_memcg(page); |
|---|
| 1298 | 1393 | |
|---|
| 1299 | | - if (compound) |
|---|
| 1300 | | - return page_remove_anon_compound_rmap(page); |
|---|
| 1394 | + if (!PageAnon(page)) { |
|---|
| 1395 | + page_remove_file_rmap(page, compound); |
|---|
| 1396 | + goto out; |
|---|
| 1397 | + } |
|---|
| 1301 | 1398 | |
|---|
| 1302 | | - /* page still mapped by someone else? */ |
|---|
| 1303 | | - if (!atomic_add_negative(-1, &page->_mapcount)) |
|---|
| 1304 | | - return; |
|---|
| 1399 | + if (compound) { |
|---|
| 1400 | + page_remove_anon_compound_rmap(page); |
|---|
| 1401 | + goto out; |
|---|
| 1402 | + } |
|---|
| 1305 | 1403 | |
|---|
| 1404 | + trace_android_vh_update_page_mapcount(page, false, |
|---|
| 1405 | + compound, &first_mapping, &success); |
|---|
| 1406 | + if (success) { |
|---|
| 1407 | + if (!first_mapping) |
|---|
| 1408 | + goto out; |
|---|
| 1409 | + } else { |
|---|
| 1410 | + /* page still mapped by someone else? */ |
|---|
| 1411 | + if (!atomic_add_negative(-1, &page->_mapcount)) |
|---|
| 1412 | + goto out; |
|---|
| 1413 | + } |
|---|
| 1306 | 1414 | /* |
|---|
| 1307 | 1415 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
|---|
| 1308 | 1416 | * these counters are not modified in interrupt context, and |
|---|
| 1309 | 1417 | * pte lock(a spinlock) is held, which implies preemption disabled. |
|---|
| 1310 | 1418 | */ |
|---|
| 1311 | | - __dec_node_page_state(page, NR_ANON_MAPPED); |
|---|
| 1419 | + __dec_lruvec_page_state(page, NR_ANON_MAPPED); |
|---|
| 1312 | 1420 | |
|---|
| 1313 | 1421 | if (unlikely(PageMlocked(page))) |
|---|
| 1314 | 1422 | clear_page_mlock(page); |
|---|
| .. | .. |
|---|
| 1325 | 1433 | * Leaving it set also helps swapoff to reinstate ptes |
|---|
| 1326 | 1434 | * faster for those pages still in swapcache. |
|---|
| 1327 | 1435 | */ |
|---|
| 1436 | +out: |
|---|
| 1437 | + unlock_page_memcg(page); |
|---|
| 1328 | 1438 | } |
|---|
| 1329 | 1439 | |
|---|
| 1330 | 1440 | /* |
|---|
| .. | .. |
|---|
| 1342 | 1452 | pte_t pteval; |
|---|
| 1343 | 1453 | struct page *subpage; |
|---|
| 1344 | 1454 | bool ret = true; |
|---|
| 1345 | | - unsigned long start = address, end; |
|---|
| 1346 | | - enum ttu_flags flags = (enum ttu_flags)arg; |
|---|
| 1455 | + struct mmu_notifier_range range; |
|---|
| 1456 | + enum ttu_flags flags = (enum ttu_flags)(long)arg; |
|---|
| 1347 | 1457 | |
|---|
| 1348 | 1458 | /* |
|---|
| 1349 | 1459 | * When racing against e.g. zap_pte_range() on another cpu, |
|---|
| .. | .. |
|---|
| 1375 | 1485 | * Note that the page can not be free in this function as call of |
|---|
| 1376 | 1486 | * try_to_unmap() must hold a reference on the page. |
|---|
| 1377 | 1487 | */ |
|---|
| 1378 | | - end = PageKsm(page) ? |
|---|
| 1488 | + range.end = PageKsm(page) ? |
|---|
| 1379 | 1489 | address + PAGE_SIZE : vma_address_end(page, vma); |
|---|
| 1490 | + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, |
|---|
| 1491 | + address, range.end); |
|---|
| 1380 | 1492 | if (PageHuge(page)) { |
|---|
| 1381 | 1493 | /* |
|---|
| 1382 | 1494 | * If sharing is possible, start and end will be adjusted |
|---|
| 1383 | 1495 | * accordingly. |
|---|
| 1384 | 1496 | */ |
|---|
| 1385 | | - adjust_range_if_pmd_sharing_possible(vma, &start, &end); |
|---|
| 1497 | + adjust_range_if_pmd_sharing_possible(vma, &range.start, |
|---|
| 1498 | + &range.end); |
|---|
| 1386 | 1499 | } |
|---|
| 1387 | | - mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); |
|---|
| 1500 | + mmu_notifier_invalidate_range_start(&range); |
|---|
| 1388 | 1501 | |
|---|
| 1389 | 1502 | while (page_vma_mapped_walk(&pvmw)) { |
|---|
| 1390 | 1503 | #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION |
|---|
| .. | .. |
|---|
| 1408 | 1521 | if (!PageTransCompound(page)) { |
|---|
| 1409 | 1522 | /* |
|---|
| 1410 | 1523 | * Holding pte lock, we do *not* need |
|---|
| 1411 | | - * mmap_sem here |
|---|
| 1524 | + * mmap_lock here |
|---|
| 1412 | 1525 | */ |
|---|
| 1413 | 1526 | mlock_vma_page(page); |
|---|
| 1414 | 1527 | } |
|---|
| .. | .. |
|---|
| 1426 | 1539 | subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); |
|---|
| 1427 | 1540 | address = pvmw.address; |
|---|
| 1428 | 1541 | |
|---|
| 1429 | | - if (PageHuge(page)) { |
|---|
| 1430 | | - if (huge_pmd_unshare(mm, &address, pvmw.pte)) { |
|---|
| 1542 | + if (PageHuge(page) && !PageAnon(page)) { |
|---|
| 1543 | + /* |
|---|
| 1544 | + * To call huge_pmd_unshare, i_mmap_rwsem must be |
|---|
| 1545 | + * held in write mode. Caller needs to explicitly |
|---|
| 1546 | + * do this outside rmap routines. |
|---|
| 1547 | + */ |
|---|
| 1548 | + VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); |
|---|
| 1549 | + if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { |
|---|
| 1431 | 1550 | /* |
|---|
| 1432 | 1551 | * huge_pmd_unshare unmapped an entire PMD |
|---|
| 1433 | 1552 | * page. There is no way of knowing exactly |
|---|
| .. | .. |
|---|
| 1435 | 1554 | * we must flush them all. start/end were |
|---|
| 1436 | 1555 | * already adjusted above to cover this range. |
|---|
| 1437 | 1556 | */ |
|---|
| 1438 | | - flush_cache_range(vma, start, end); |
|---|
| 1439 | | - flush_tlb_range(vma, start, end); |
|---|
| 1440 | | - mmu_notifier_invalidate_range(mm, start, end); |
|---|
| 1557 | + flush_cache_range(vma, range.start, range.end); |
|---|
| 1558 | + flush_tlb_range(vma, range.start, range.end); |
|---|
| 1559 | + mmu_notifier_invalidate_range(mm, range.start, |
|---|
| 1560 | + range.end); |
|---|
| 1441 | 1561 | |
|---|
| 1442 | 1562 | /* |
|---|
| 1443 | 1563 | * The ref count of the PMD page was dropped |
|---|
| .. | .. |
|---|
| 1468 | 1588 | */ |
|---|
| 1469 | 1589 | entry = make_migration_entry(page, 0); |
|---|
| 1470 | 1590 | swp_pte = swp_entry_to_pte(entry); |
|---|
| 1471 | | - if (pte_soft_dirty(pteval)) |
|---|
| 1591 | + |
|---|
| 1592 | + /* |
|---|
| 1593 | + * pteval maps a zone device page and is therefore |
|---|
| 1594 | + * a swap pte. |
|---|
| 1595 | + */ |
|---|
| 1596 | + if (pte_swp_soft_dirty(pteval)) |
|---|
| 1472 | 1597 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
|---|
| 1598 | + if (pte_swp_uffd_wp(pteval)) |
|---|
| 1599 | + swp_pte = pte_swp_mkuffd_wp(swp_pte); |
|---|
| 1473 | 1600 | set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); |
|---|
| 1474 | 1601 | /* |
|---|
| 1475 | 1602 | * No need to invalidate here it will synchronize on |
|---|
| .. | .. |
|---|
| 1484 | 1611 | */ |
|---|
| 1485 | 1612 | subpage = page; |
|---|
| 1486 | 1613 | goto discard; |
|---|
| 1487 | | - } |
|---|
| 1488 | | - |
|---|
| 1489 | | - if (!(flags & TTU_IGNORE_ACCESS)) { |
|---|
| 1490 | | - if (ptep_clear_flush_young_notify(vma, address, |
|---|
| 1491 | | - pvmw.pte)) { |
|---|
| 1492 | | - ret = false; |
|---|
| 1493 | | - page_vma_mapped_walk_done(&pvmw); |
|---|
| 1494 | | - break; |
|---|
| 1495 | | - } |
|---|
| 1496 | 1614 | } |
|---|
| 1497 | 1615 | |
|---|
| 1498 | 1616 | /* Nuke the page table entry. */ |
|---|
| .. | .. |
|---|
| 1523 | 1641 | if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { |
|---|
| 1524 | 1642 | pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); |
|---|
| 1525 | 1643 | if (PageHuge(page)) { |
|---|
| 1526 | | - int nr = 1 << compound_order(page); |
|---|
| 1527 | | - hugetlb_count_sub(nr, mm); |
|---|
| 1644 | + hugetlb_count_sub(compound_nr(page), mm); |
|---|
| 1528 | 1645 | set_huge_swap_pte_at(mm, address, |
|---|
| 1529 | 1646 | pvmw.pte, pteval, |
|---|
| 1530 | 1647 | vma_mmu_pagesize(vma)); |
|---|
| .. | .. |
|---|
| 1570 | 1687 | swp_pte = swp_entry_to_pte(entry); |
|---|
| 1571 | 1688 | if (pte_soft_dirty(pteval)) |
|---|
| 1572 | 1689 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
|---|
| 1690 | + if (pte_uffd_wp(pteval)) |
|---|
| 1691 | + swp_pte = pte_swp_mkuffd_wp(swp_pte); |
|---|
| 1573 | 1692 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
|---|
| 1574 | 1693 | /* |
|---|
| 1575 | 1694 | * No need to invalidate here it will synchronize on |
|---|
| .. | .. |
|---|
| 1594 | 1713 | |
|---|
| 1595 | 1714 | /* MADV_FREE page check */ |
|---|
| 1596 | 1715 | if (!PageSwapBacked(page)) { |
|---|
| 1597 | | - if (!PageDirty(page)) { |
|---|
| 1716 | + int ref_count, map_count; |
|---|
| 1717 | + |
|---|
| 1718 | + /* |
|---|
| 1719 | + * Synchronize with gup_pte_range(): |
|---|
| 1720 | + * - clear PTE; barrier; read refcount |
|---|
| 1721 | + * - inc refcount; barrier; read PTE |
|---|
| 1722 | + */ |
|---|
| 1723 | + smp_mb(); |
|---|
| 1724 | + |
|---|
| 1725 | + ref_count = page_ref_count(page); |
|---|
| 1726 | + map_count = page_mapcount(page); |
|---|
| 1727 | + |
|---|
| 1728 | + /* |
|---|
| 1729 | + * Order reads for page refcount and dirty flag |
|---|
| 1730 | + * (see comments in __remove_mapping()). |
|---|
| 1731 | + */ |
|---|
| 1732 | + smp_rmb(); |
|---|
| 1733 | + |
|---|
| 1734 | + /* |
|---|
| 1735 | + * The only page refs must be one from isolation |
|---|
| 1736 | + * plus the rmap(s) (dropped by discard:). |
|---|
| 1737 | + */ |
|---|
| 1738 | + if (ref_count == 1 + map_count && |
|---|
| 1739 | + !PageDirty(page)) { |
|---|
| 1598 | 1740 | /* Invalidate as we cleared the pte */ |
|---|
| 1599 | 1741 | mmu_notifier_invalidate_range(mm, |
|---|
| 1600 | 1742 | address, address + PAGE_SIZE); |
|---|
| .. | .. |
|---|
| 1636 | 1778 | swp_pte = swp_entry_to_pte(entry); |
|---|
| 1637 | 1779 | if (pte_soft_dirty(pteval)) |
|---|
| 1638 | 1780 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
|---|
| 1781 | + if (pte_uffd_wp(pteval)) |
|---|
| 1782 | + swp_pte = pte_swp_mkuffd_wp(swp_pte); |
|---|
| 1639 | 1783 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
|---|
| 1640 | 1784 | /* Invalidate as we cleared the pte */ |
|---|
| 1641 | 1785 | mmu_notifier_invalidate_range(mm, address, |
|---|
| .. | .. |
|---|
| 1665 | 1809 | put_page(page); |
|---|
| 1666 | 1810 | } |
|---|
| 1667 | 1811 | |
|---|
| 1668 | | - mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); |
|---|
| 1812 | + mmu_notifier_invalidate_range_end(&range); |
|---|
| 1813 | + trace_android_vh_try_to_unmap_one(vma, page, address, ret); |
|---|
| 1669 | 1814 | |
|---|
| 1670 | 1815 | return ret; |
|---|
| 1671 | 1816 | } |
|---|
| 1672 | 1817 | |
|---|
| 1673 | | -bool is_vma_temporary_stack(struct vm_area_struct *vma) |
|---|
| 1674 | | -{ |
|---|
| 1675 | | - int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); |
|---|
| 1676 | | - |
|---|
| 1677 | | - if (!maybe_stack) |
|---|
| 1678 | | - return false; |
|---|
| 1679 | | - |
|---|
| 1680 | | - if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == |
|---|
| 1681 | | - VM_STACK_INCOMPLETE_SETUP) |
|---|
| 1682 | | - return true; |
|---|
| 1683 | | - |
|---|
| 1684 | | - return false; |
|---|
| 1685 | | -} |
|---|
| 1686 | | - |
|---|
| 1687 | 1818 | static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) |
|---|
| 1688 | 1819 | { |
|---|
| 1689 | | - return is_vma_temporary_stack(vma); |
|---|
| 1820 | + return vma_is_temporary_stack(vma); |
|---|
| 1690 | 1821 | } |
|---|
| 1691 | 1822 | |
|---|
| 1692 | 1823 | static int page_not_mapped(struct page *page) |
|---|
| .. | .. |
|---|
| 1779 | 1910 | struct anon_vma *anon_vma; |
|---|
| 1780 | 1911 | |
|---|
| 1781 | 1912 | if (rwc->anon_lock) |
|---|
| 1782 | | - return rwc->anon_lock(page); |
|---|
| 1913 | + return rwc->anon_lock(page, rwc); |
|---|
| 1783 | 1914 | |
|---|
| 1784 | 1915 | /* |
|---|
| 1785 | 1916 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() |
|---|
| 1786 | 1917 | * because that depends on page_mapped(); but not all its usages |
|---|
| 1787 | | - * are holding mmap_sem. Users without mmap_sem are required to |
|---|
| 1918 | + * are holding mmap_lock. Users without mmap_lock are required to |
|---|
| 1788 | 1919 | * take a reference count to prevent the anon_vma disappearing |
|---|
| 1789 | 1920 | */ |
|---|
| 1790 | 1921 | anon_vma = page_anon_vma(page); |
|---|
| 1791 | 1922 | if (!anon_vma) |
|---|
| 1792 | 1923 | return NULL; |
|---|
| 1793 | 1924 | |
|---|
| 1925 | + if (anon_vma_trylock_read(anon_vma)) |
|---|
| 1926 | + goto out; |
|---|
| 1927 | + |
|---|
| 1928 | + if (rwc->try_lock) { |
|---|
| 1929 | + anon_vma = NULL; |
|---|
| 1930 | + rwc->contended = true; |
|---|
| 1931 | + goto out; |
|---|
| 1932 | + } |
|---|
| 1933 | + |
|---|
| 1794 | 1934 | anon_vma_lock_read(anon_vma); |
|---|
| 1935 | +out: |
|---|
| 1795 | 1936 | return anon_vma; |
|---|
| 1796 | 1937 | } |
|---|
| 1797 | 1938 | |
|---|
| .. | .. |
|---|
| 1804 | 1945 | * Find all the mappings of a page using the mapping pointer and the vma chains |
|---|
| 1805 | 1946 | * contained in the anon_vma struct it points to. |
|---|
| 1806 | 1947 | * |
|---|
| 1807 | | - * When called from try_to_munlock(), the mmap_sem of the mm containing the vma |
|---|
| 1948 | + * When called from try_to_munlock(), the mmap_lock of the mm containing the vma |
|---|
| 1808 | 1949 | * where the page was found will be held for write. So, we won't recheck |
|---|
| 1809 | 1950 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be |
|---|
| 1810 | 1951 | * LOCKED. |
|---|
| .. | .. |
|---|
| 1827 | 1968 | return; |
|---|
| 1828 | 1969 | |
|---|
| 1829 | 1970 | pgoff_start = page_to_pgoff(page); |
|---|
| 1830 | | - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; |
|---|
| 1971 | + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; |
|---|
| 1831 | 1972 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, |
|---|
| 1832 | 1973 | pgoff_start, pgoff_end) { |
|---|
| 1833 | 1974 | struct vm_area_struct *vma = avc->vma; |
|---|
| .. | .. |
|---|
| 1857 | 1998 | * Find all the mappings of a page using the mapping pointer and the vma chains |
|---|
| 1858 | 1999 | * contained in the address_space struct it points to. |
|---|
| 1859 | 2000 | * |
|---|
| 1860 | | - * When called from try_to_munlock(), the mmap_sem of the mm containing the vma |
|---|
| 2001 | + * When called from try_to_munlock(), the mmap_lock of the mm containing the vma |
|---|
| 1861 | 2002 | * where the page was found will be held for write. So, we won't recheck |
|---|
| 1862 | 2003 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be |
|---|
| 1863 | 2004 | * LOCKED. |
|---|
| .. | .. |
|---|
| 1868 | 2009 | struct address_space *mapping = page_mapping(page); |
|---|
| 1869 | 2010 | pgoff_t pgoff_start, pgoff_end; |
|---|
| 1870 | 2011 | struct vm_area_struct *vma; |
|---|
| 2012 | + bool got_lock = false, success = false; |
|---|
| 1871 | 2013 | |
|---|
| 1872 | 2014 | /* |
|---|
| 1873 | 2015 | * The page lock not only makes sure that page->mapping cannot |
|---|
| .. | .. |
|---|
| 1881 | 2023 | return; |
|---|
| 1882 | 2024 | |
|---|
| 1883 | 2025 | pgoff_start = page_to_pgoff(page); |
|---|
| 1884 | | - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; |
|---|
| 1885 | | - if (!locked) |
|---|
| 1886 | | - i_mmap_lock_read(mapping); |
|---|
| 2026 | + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; |
|---|
| 2027 | + if (!locked) { |
|---|
| 2028 | + trace_android_vh_do_page_trylock(page, |
|---|
| 2029 | + &mapping->i_mmap_rwsem, &got_lock, &success); |
|---|
| 2030 | + if (success) { |
|---|
| 2031 | + if (!got_lock) |
|---|
| 2032 | + return; |
|---|
| 2033 | + } else { |
|---|
| 2034 | + if (i_mmap_trylock_read(mapping)) |
|---|
| 2035 | + goto lookup; |
|---|
| 2036 | + |
|---|
| 2037 | + if (rwc->try_lock) { |
|---|
| 2038 | + rwc->contended = true; |
|---|
| 2039 | + return; |
|---|
| 2040 | + } |
|---|
| 2041 | + |
|---|
| 2042 | + i_mmap_lock_read(mapping); |
|---|
| 2043 | + } |
|---|
| 2044 | + } |
|---|
| 2045 | +lookup: |
|---|
| 1887 | 2046 | vma_interval_tree_foreach(vma, &mapping->i_mmap, |
|---|
| 1888 | 2047 | pgoff_start, pgoff_end) { |
|---|
| 1889 | 2048 | unsigned long address = vma_address(page, vma); |
|---|
| .. | .. |
|---|
| 1928 | 2087 | |
|---|
| 1929 | 2088 | #ifdef CONFIG_HUGETLB_PAGE |
|---|
| 1930 | 2089 | /* |
|---|
| 1931 | | - * The following three functions are for anonymous (private mapped) hugepages. |
|---|
| 2090 | + * The following two functions are for anonymous (private mapped) hugepages. |
|---|
| 1932 | 2091 | * Unlike common anonymous pages, anonymous hugepages have no accounting code |
|---|
| 1933 | 2092 | * and no lru code, because we handle hugepages differently from common pages. |
|---|
| 1934 | 2093 | */ |
|---|
| 1935 | | -static void __hugepage_set_anon_rmap(struct page *page, |
|---|
| 1936 | | - struct vm_area_struct *vma, unsigned long address, int exclusive) |
|---|
| 1937 | | -{ |
|---|
| 1938 | | - struct anon_vma *anon_vma = vma->anon_vma; |
|---|
| 1939 | | - |
|---|
| 1940 | | - BUG_ON(!anon_vma); |
|---|
| 1941 | | - |
|---|
| 1942 | | - if (PageAnon(page)) |
|---|
| 1943 | | - return; |
|---|
| 1944 | | - if (!exclusive) |
|---|
| 1945 | | - anon_vma = anon_vma->root; |
|---|
| 1946 | | - |
|---|
| 1947 | | - anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; |
|---|
| 1948 | | - page->mapping = (struct address_space *) anon_vma; |
|---|
| 1949 | | - page->index = linear_page_index(vma, address); |
|---|
| 1950 | | -} |
|---|
| 1951 | | - |
|---|
| 1952 | 2094 | void hugepage_add_anon_rmap(struct page *page, |
|---|
| 1953 | 2095 | struct vm_area_struct *vma, unsigned long address) |
|---|
| 1954 | 2096 | { |
|---|
| .. | .. |
|---|
| 1960 | 2102 | /* address might be in next vma when migration races vma_adjust */ |
|---|
| 1961 | 2103 | first = atomic_inc_and_test(compound_mapcount_ptr(page)); |
|---|
| 1962 | 2104 | if (first) |
|---|
| 1963 | | - __hugepage_set_anon_rmap(page, vma, address, 0); |
|---|
| 2105 | + __page_set_anon_rmap(page, vma, address, 0); |
|---|
| 1964 | 2106 | } |
|---|
| 1965 | 2107 | |
|---|
| 1966 | 2108 | void hugepage_add_new_anon_rmap(struct page *page, |
|---|
| .. | .. |
|---|
| 1968 | 2110 | { |
|---|
| 1969 | 2111 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
|---|
| 1970 | 2112 | atomic_set(compound_mapcount_ptr(page), 0); |
|---|
| 1971 | | - __hugepage_set_anon_rmap(page, vma, address, 1); |
|---|
| 2113 | + if (hpage_pincount_available(page)) |
|---|
| 2114 | + atomic_set(compound_pincount_ptr(page), 0); |
|---|
| 2115 | + |
|---|
| 2116 | + __page_set_anon_rmap(page, vma, address, 1); |
|---|
| 1972 | 2117 | } |
|---|
| 1973 | 2118 | #endif /* CONFIG_HUGETLB_PAGE */ |
|---|