.. | .. |
---|
21 | 21 | * Lock ordering in mm: |
---|
22 | 22 | * |
---|
23 | 23 | * inode->i_mutex (while writing or truncating, not reading or faulting) |
---|
24 | | - * mm->mmap_sem |
---|
25 | | - * page->flags PG_locked (lock_page) |
---|
| 24 | + * mm->mmap_lock |
---|
| 25 | + * page->flags PG_locked (lock_page) * (see huegtlbfs below) |
---|
26 | 26 | * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share) |
---|
27 | 27 | * mapping->i_mmap_rwsem |
---|
| 28 | + * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) |
---|
28 | 29 | * anon_vma->rwsem |
---|
29 | 30 | * mm->page_table_lock or pte_lock |
---|
30 | | - * zone_lru_lock (in mark_page_accessed, isolate_lru_page) |
---|
| 31 | + * pgdat->lru_lock (in mark_page_accessed, isolate_lru_page) |
---|
31 | 32 | * swap_lock (in swap_duplicate, swap_info_get) |
---|
32 | 33 | * mmlist_lock (in mmput, drain_mmlist and others) |
---|
33 | 34 | * mapping->private_lock (in __set_page_dirty_buffers) |
---|
.. | .. |
---|
43 | 44 | * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) |
---|
44 | 45 | * ->tasklist_lock |
---|
45 | 46 | * pte map lock |
---|
| 47 | + * |
---|
| 48 | + * * hugetlbfs PageHuge() pages take locks in this order: |
---|
| 49 | + * mapping->i_mmap_rwsem |
---|
| 50 | + * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) |
---|
| 51 | + * page->flags PG_locked (lock_page) |
---|
46 | 52 | */ |
---|
47 | 53 | |
---|
48 | 54 | #include <linux/mm.h> |
---|
.. | .. |
---|
61 | 67 | #include <linux/mmu_notifier.h> |
---|
62 | 68 | #include <linux/migrate.h> |
---|
63 | 69 | #include <linux/hugetlb.h> |
---|
| 70 | +#include <linux/huge_mm.h> |
---|
64 | 71 | #include <linux/backing-dev.h> |
---|
65 | 72 | #include <linux/page_idle.h> |
---|
66 | 73 | #include <linux/memremap.h> |
---|
.. | .. |
---|
69 | 76 | #include <asm/tlbflush.h> |
---|
70 | 77 | |
---|
71 | 78 | #include <trace/events/tlb.h> |
---|
| 79 | + |
---|
| 80 | +#include <trace/hooks/mm.h> |
---|
72 | 81 | |
---|
73 | 82 | #include "internal.h" |
---|
74 | 83 | |
---|
.. | .. |
---|
170 | 179 | * to do any locking for the common case of already having |
---|
171 | 180 | * an anon_vma. |
---|
172 | 181 | * |
---|
173 | | - * This must be called with the mmap_sem held for reading. |
---|
| 182 | + * This must be called with the mmap_lock held for reading. |
---|
174 | 183 | */ |
---|
175 | 184 | int __anon_vma_prepare(struct vm_area_struct *vma) |
---|
176 | 185 | { |
---|
.. | .. |
---|
250 | 259 | * Attach the anon_vmas from src to dst. |
---|
251 | 260 | * Returns 0 on success, -ENOMEM on failure. |
---|
252 | 261 | * |
---|
253 | | - * If dst->anon_vma is NULL this function tries to find and reuse existing |
---|
254 | | - * anon_vma which has no vmas and only one child anon_vma. This prevents |
---|
255 | | - * degradation of anon_vma hierarchy to endless linear chain in case of |
---|
256 | | - * constantly forking task. On the other hand, an anon_vma with more than one |
---|
257 | | - * child isn't reused even if there was no alive vma, thus rmap walker has a |
---|
258 | | - * good chance of avoiding scanning the whole hierarchy when it searches where |
---|
259 | | - * page is mapped. |
---|
| 262 | + * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and |
---|
| 263 | + * anon_vma_fork(). The first three want an exact copy of src, while the last |
---|
| 264 | + * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent |
---|
| 265 | + * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call, |
---|
| 266 | + * we can identify this case by checking (!dst->anon_vma && src->anon_vma). |
---|
| 267 | + * |
---|
| 268 | + * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find |
---|
| 269 | + * and reuse existing anon_vma which has no vmas and only one child anon_vma. |
---|
| 270 | + * This prevents degradation of anon_vma hierarchy to endless linear chain in |
---|
| 271 | + * case of constantly forking task. On the other hand, an anon_vma with more |
---|
| 272 | + * than one child isn't reused even if there was no alive vma, thus rmap |
---|
| 273 | + * walker has a good chance of avoiding scanning the whole hierarchy when it |
---|
| 274 | + * searches where page is mapped. |
---|
260 | 275 | */ |
---|
261 | 276 | int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) |
---|
262 | 277 | { |
---|
.. | .. |
---|
286 | 301 | * will always reuse it. Root anon_vma is never reused: |
---|
287 | 302 | * it has self-parent reference and at least one child. |
---|
288 | 303 | */ |
---|
289 | | - if (!dst->anon_vma && anon_vma != src->anon_vma && |
---|
290 | | - anon_vma->degree < 2) |
---|
| 304 | + if (!dst->anon_vma && src->anon_vma && |
---|
| 305 | + anon_vma != src->anon_vma && anon_vma->degree < 2) |
---|
291 | 306 | dst->anon_vma = anon_vma; |
---|
292 | 307 | } |
---|
293 | 308 | if (dst->anon_vma) |
---|
.. | .. |
---|
457 | 472 | * chain and verify that the page in question is indeed mapped in it |
---|
458 | 473 | * [ something equivalent to page_mapped_in_vma() ]. |
---|
459 | 474 | * |
---|
460 | | - * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap() |
---|
461 | | - * that the anon_vma pointer from page->mapping is valid if there is a |
---|
462 | | - * mapcount, we can dereference the anon_vma after observing those. |
---|
| 475 | + * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from |
---|
| 476 | + * page_remove_rmap() that the anon_vma pointer from page->mapping is valid |
---|
| 477 | + * if there is a mapcount, we can dereference the anon_vma after observing |
---|
| 478 | + * those. |
---|
463 | 479 | */ |
---|
464 | 480 | struct anon_vma *page_get_anon_vma(struct page *page) |
---|
465 | 481 | { |
---|
.. | .. |
---|
502 | 518 | * |
---|
503 | 519 | * Its a little more complex as it tries to keep the fast path to a single |
---|
504 | 520 | * atomic op -- the trylock. If we fail the trylock, we fall back to getting a |
---|
505 | | - * reference like with page_get_anon_vma() and then block on the mutex. |
---|
| 521 | + * reference like with page_get_anon_vma() and then block on the mutex |
---|
| 522 | + * on !rwc->try_lock case. |
---|
506 | 523 | */ |
---|
507 | | -struct anon_vma *page_lock_anon_vma_read(struct page *page) |
---|
| 524 | +struct anon_vma *page_lock_anon_vma_read(struct page *page, |
---|
| 525 | + struct rmap_walk_control *rwc) |
---|
508 | 526 | { |
---|
509 | 527 | struct anon_vma *anon_vma = NULL; |
---|
510 | 528 | struct anon_vma *root_anon_vma; |
---|
511 | 529 | unsigned long anon_mapping; |
---|
| 530 | + bool success = false; |
---|
512 | 531 | |
---|
513 | 532 | rcu_read_lock(); |
---|
514 | 533 | anon_mapping = (unsigned long)READ_ONCE(page->mapping); |
---|
.. | .. |
---|
529 | 548 | up_read(&root_anon_vma->rwsem); |
---|
530 | 549 | anon_vma = NULL; |
---|
531 | 550 | } |
---|
| 551 | + goto out; |
---|
| 552 | + } |
---|
| 553 | + trace_android_vh_do_page_trylock(page, NULL, NULL, &success); |
---|
| 554 | + if (success) { |
---|
| 555 | + anon_vma = NULL; |
---|
| 556 | + goto out; |
---|
| 557 | + } |
---|
| 558 | + |
---|
| 559 | + if (rwc && rwc->try_lock) { |
---|
| 560 | + anon_vma = NULL; |
---|
| 561 | + rwc->contended = true; |
---|
532 | 562 | goto out; |
---|
533 | 563 | } |
---|
534 | 564 | |
---|
.. | .. |
---|
658 | 688 | */ |
---|
659 | 689 | void flush_tlb_batched_pending(struct mm_struct *mm) |
---|
660 | 690 | { |
---|
661 | | - if (mm->tlb_flush_batched) { |
---|
| 691 | + if (data_race(mm->tlb_flush_batched)) { |
---|
662 | 692 | flush_tlb_mm(mm); |
---|
663 | 693 | |
---|
664 | 694 | /* |
---|
.. | .. |
---|
768 | 798 | } |
---|
769 | 799 | |
---|
770 | 800 | if (pvmw.pte) { |
---|
| 801 | + trace_android_vh_look_around(&pvmw, page, vma, &referenced); |
---|
771 | 802 | if (ptep_clear_flush_young_notify(vma, address, |
---|
772 | 803 | pvmw.pte)) { |
---|
773 | 804 | /* |
---|
.. | .. |
---|
803 | 834 | pra->vm_flags |= vma->vm_flags; |
---|
804 | 835 | } |
---|
805 | 836 | |
---|
| 837 | + trace_android_vh_page_referenced_one_end(vma, page, referenced); |
---|
806 | 838 | if (!pra->mapcount) |
---|
807 | 839 | return false; /* To break the loop */ |
---|
808 | 840 | |
---|
.. | .. |
---|
827 | 859 | * @memcg: target memory cgroup |
---|
828 | 860 | * @vm_flags: collect encountered vma->vm_flags who actually referenced the page |
---|
829 | 861 | * |
---|
830 | | - * Quick test_and_clear_referenced for all mappings to a page, |
---|
831 | | - * returns the number of ptes which referenced the page. |
---|
| 862 | + * Quick test_and_clear_referenced for all mappings of a page, |
---|
| 863 | + * |
---|
| 864 | + * Return: The number of mappings which referenced the page. Return -1 if |
---|
| 865 | + * the function bailed out due to rmap lock contention. |
---|
832 | 866 | */ |
---|
833 | 867 | int page_referenced(struct page *page, |
---|
834 | 868 | int is_locked, |
---|
.. | .. |
---|
844 | 878 | .rmap_one = page_referenced_one, |
---|
845 | 879 | .arg = (void *)&pra, |
---|
846 | 880 | .anon_lock = page_lock_anon_vma_read, |
---|
| 881 | + .try_lock = true, |
---|
847 | 882 | }; |
---|
848 | 883 | |
---|
849 | 884 | *vm_flags = 0; |
---|
850 | | - if (!page_mapped(page)) |
---|
| 885 | + if (!pra.mapcount) |
---|
851 | 886 | return 0; |
---|
852 | 887 | |
---|
853 | 888 | if (!page_rmapping(page)) |
---|
.. | .. |
---|
874 | 909 | if (we_locked) |
---|
875 | 910 | unlock_page(page); |
---|
876 | 911 | |
---|
877 | | - return pra.referenced; |
---|
| 912 | + return rwc.contended ? -1 : pra.referenced; |
---|
878 | 913 | } |
---|
879 | 914 | |
---|
880 | 915 | static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, |
---|
.. | .. |
---|
886 | 921 | .address = address, |
---|
887 | 922 | .flags = PVMW_SYNC, |
---|
888 | 923 | }; |
---|
889 | | - unsigned long start = address, end; |
---|
| 924 | + struct mmu_notifier_range range; |
---|
890 | 925 | int *cleaned = arg; |
---|
891 | 926 | |
---|
892 | 927 | /* |
---|
893 | 928 | * We have to assume the worse case ie pmd for invalidation. Note that |
---|
894 | 929 | * the page can not be free from this function. |
---|
895 | 930 | */ |
---|
896 | | - end = vma_address_end(page, vma); |
---|
897 | | - mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); |
---|
| 931 | + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, |
---|
| 932 | + 0, vma, vma->vm_mm, address, |
---|
| 933 | + vma_address_end(page, vma)); |
---|
| 934 | + mmu_notifier_invalidate_range_start(&range); |
---|
898 | 935 | |
---|
899 | 936 | while (page_vma_mapped_walk(&pvmw)) { |
---|
900 | | - unsigned long cstart; |
---|
901 | 937 | int ret = 0; |
---|
902 | 938 | |
---|
903 | | - cstart = address = pvmw.address; |
---|
| 939 | + address = pvmw.address; |
---|
904 | 940 | if (pvmw.pte) { |
---|
905 | 941 | pte_t entry; |
---|
906 | 942 | pte_t *pte = pvmw.pte; |
---|
.. | .. |
---|
915 | 951 | set_pte_at(vma->vm_mm, address, pte, entry); |
---|
916 | 952 | ret = 1; |
---|
917 | 953 | } else { |
---|
918 | | -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
---|
| 954 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
919 | 955 | pmd_t *pmd = pvmw.pmd; |
---|
920 | 956 | pmd_t entry; |
---|
921 | 957 | |
---|
.. | .. |
---|
927 | 963 | entry = pmd_wrprotect(entry); |
---|
928 | 964 | entry = pmd_mkclean(entry); |
---|
929 | 965 | set_pmd_at(vma->vm_mm, address, pmd, entry); |
---|
930 | | - cstart &= PMD_MASK; |
---|
931 | 966 | ret = 1; |
---|
932 | 967 | #else |
---|
933 | 968 | /* unexpected pmd-mapped page? */ |
---|
.. | .. |
---|
946 | 981 | (*cleaned)++; |
---|
947 | 982 | } |
---|
948 | 983 | |
---|
949 | | - mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); |
---|
| 984 | + mmu_notifier_invalidate_range_end(&range); |
---|
950 | 985 | |
---|
951 | 986 | return true; |
---|
952 | 987 | } |
---|
.. | .. |
---|
1014 | 1049 | |
---|
1015 | 1050 | /** |
---|
1016 | 1051 | * __page_set_anon_rmap - set up new anonymous rmap |
---|
1017 | | - * @page: Page to add to rmap |
---|
| 1052 | + * @page: Page or Hugepage to add to rmap |
---|
1018 | 1053 | * @vma: VM area to add page to. |
---|
1019 | 1054 | * @address: User virtual address of the mapping |
---|
1020 | 1055 | * @exclusive: the page is exclusively owned by the current process |
---|
.. | .. |
---|
1051 | 1086 | static void __page_check_anon_rmap(struct page *page, |
---|
1052 | 1087 | struct vm_area_struct *vma, unsigned long address) |
---|
1053 | 1088 | { |
---|
1054 | | -#ifdef CONFIG_DEBUG_VM |
---|
1055 | 1089 | /* |
---|
1056 | 1090 | * The page's anon-rmap details (mapping and index) are guaranteed to |
---|
1057 | 1091 | * be set up correctly at this point. |
---|
.. | .. |
---|
1064 | 1098 | * are initially only visible via the pagetables, and the pte is locked |
---|
1065 | 1099 | * over the call to page_add_new_anon_rmap. |
---|
1066 | 1100 | */ |
---|
1067 | | - BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); |
---|
1068 | | - BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address)); |
---|
1069 | | -#endif |
---|
| 1101 | + VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page); |
---|
| 1102 | + VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address), |
---|
| 1103 | + page); |
---|
1070 | 1104 | } |
---|
1071 | 1105 | |
---|
1072 | 1106 | /** |
---|
.. | .. |
---|
1097 | 1131 | { |
---|
1098 | 1132 | bool compound = flags & RMAP_COMPOUND; |
---|
1099 | 1133 | bool first; |
---|
| 1134 | + bool success = false; |
---|
| 1135 | + |
---|
| 1136 | + if (unlikely(PageKsm(page))) |
---|
| 1137 | + lock_page_memcg(page); |
---|
| 1138 | + else |
---|
| 1139 | + VM_BUG_ON_PAGE(!PageLocked(page), page); |
---|
1100 | 1140 | |
---|
1101 | 1141 | if (compound) { |
---|
1102 | 1142 | atomic_t *mapcount; |
---|
.. | .. |
---|
1105 | 1145 | mapcount = compound_mapcount_ptr(page); |
---|
1106 | 1146 | first = atomic_inc_and_test(mapcount); |
---|
1107 | 1147 | } else { |
---|
1108 | | - first = atomic_inc_and_test(&page->_mapcount); |
---|
| 1148 | + trace_android_vh_update_page_mapcount(page, true, compound, |
---|
| 1149 | + &first, &success); |
---|
| 1150 | + if (!success) |
---|
| 1151 | + first = atomic_inc_and_test(&page->_mapcount); |
---|
1109 | 1152 | } |
---|
1110 | 1153 | |
---|
1111 | 1154 | if (first) { |
---|
1112 | | - int nr = compound ? hpage_nr_pages(page) : 1; |
---|
| 1155 | + int nr = compound ? thp_nr_pages(page) : 1; |
---|
1113 | 1156 | /* |
---|
1114 | 1157 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
---|
1115 | 1158 | * these counters are not modified in interrupt context, and |
---|
.. | .. |
---|
1117 | 1160 | * disabled. |
---|
1118 | 1161 | */ |
---|
1119 | 1162 | if (compound) |
---|
1120 | | - __inc_node_page_state(page, NR_ANON_THPS); |
---|
1121 | | - __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr); |
---|
| 1163 | + __inc_lruvec_page_state(page, NR_ANON_THPS); |
---|
| 1164 | + __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); |
---|
1122 | 1165 | } |
---|
1123 | | - if (unlikely(PageKsm(page))) |
---|
1124 | | - return; |
---|
1125 | 1166 | |
---|
1126 | | - VM_BUG_ON_PAGE(!PageLocked(page), page); |
---|
| 1167 | + if (unlikely(PageKsm(page))) { |
---|
| 1168 | + unlock_page_memcg(page); |
---|
| 1169 | + return; |
---|
| 1170 | + } |
---|
1127 | 1171 | |
---|
1128 | 1172 | /* address might be in next vma when migration races vma_adjust */ |
---|
1129 | 1173 | if (first) |
---|
.. | .. |
---|
1134 | 1178 | } |
---|
1135 | 1179 | |
---|
1136 | 1180 | /** |
---|
1137 | | - * page_add_new_anon_rmap - add pte mapping to a new anonymous page |
---|
| 1181 | + * __page_add_new_anon_rmap - add pte mapping to a new anonymous page |
---|
1138 | 1182 | * @page: the page to add the mapping to |
---|
1139 | 1183 | * @vma: the vm area in which the mapping is added |
---|
1140 | 1184 | * @address: the user virtual address mapped |
---|
.. | .. |
---|
1144 | 1188 | * This means the inc-and-test can be bypassed. |
---|
1145 | 1189 | * Page does not have to be locked. |
---|
1146 | 1190 | */ |
---|
1147 | | -void page_add_new_anon_rmap(struct page *page, |
---|
| 1191 | +void __page_add_new_anon_rmap(struct page *page, |
---|
1148 | 1192 | struct vm_area_struct *vma, unsigned long address, bool compound) |
---|
1149 | 1193 | { |
---|
1150 | | - int nr = compound ? hpage_nr_pages(page) : 1; |
---|
| 1194 | + int nr = compound ? thp_nr_pages(page) : 1; |
---|
1151 | 1195 | |
---|
1152 | | - VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); |
---|
1153 | 1196 | __SetPageSwapBacked(page); |
---|
1154 | 1197 | if (compound) { |
---|
1155 | 1198 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
---|
1156 | 1199 | /* increment count (starts at -1) */ |
---|
1157 | 1200 | atomic_set(compound_mapcount_ptr(page), 0); |
---|
1158 | | - __inc_node_page_state(page, NR_ANON_THPS); |
---|
| 1201 | + if (hpage_pincount_available(page)) |
---|
| 1202 | + atomic_set(compound_pincount_ptr(page), 0); |
---|
| 1203 | + |
---|
| 1204 | + __inc_lruvec_page_state(page, NR_ANON_THPS); |
---|
1159 | 1205 | } else { |
---|
1160 | 1206 | /* Anon THP always mapped first with PMD */ |
---|
1161 | 1207 | VM_BUG_ON_PAGE(PageTransCompound(page), page); |
---|
1162 | 1208 | /* increment count (starts at -1) */ |
---|
1163 | 1209 | atomic_set(&page->_mapcount, 0); |
---|
1164 | 1210 | } |
---|
1165 | | - __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr); |
---|
| 1211 | + __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); |
---|
1166 | 1212 | __page_set_anon_rmap(page, vma, address, 1); |
---|
1167 | 1213 | } |
---|
1168 | 1214 | |
---|
.. | .. |
---|
1176 | 1222 | void page_add_file_rmap(struct page *page, bool compound) |
---|
1177 | 1223 | { |
---|
1178 | 1224 | int i, nr = 1; |
---|
| 1225 | + bool first_mapping; |
---|
| 1226 | + bool success = false; |
---|
1179 | 1227 | |
---|
1180 | 1228 | VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); |
---|
1181 | 1229 | lock_page_memcg(page); |
---|
1182 | 1230 | if (compound && PageTransHuge(page)) { |
---|
1183 | | - for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) { |
---|
1184 | | - if (atomic_inc_and_test(&page[i]._mapcount)) |
---|
1185 | | - nr++; |
---|
| 1231 | + for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { |
---|
| 1232 | + trace_android_vh_update_page_mapcount(&page[i], true, |
---|
| 1233 | + compound, &first_mapping, &success); |
---|
| 1234 | + if ((success)) { |
---|
| 1235 | + if (first_mapping) |
---|
| 1236 | + nr++; |
---|
| 1237 | + } else { |
---|
| 1238 | + if (atomic_inc_and_test(&page[i]._mapcount)) |
---|
| 1239 | + nr++; |
---|
| 1240 | + } |
---|
1186 | 1241 | } |
---|
1187 | 1242 | if (!atomic_inc_and_test(compound_mapcount_ptr(page))) |
---|
1188 | 1243 | goto out; |
---|
1189 | | - VM_BUG_ON_PAGE(!PageSwapBacked(page), page); |
---|
1190 | | - __inc_node_page_state(page, NR_SHMEM_PMDMAPPED); |
---|
| 1244 | + if (PageSwapBacked(page)) |
---|
| 1245 | + __inc_node_page_state(page, NR_SHMEM_PMDMAPPED); |
---|
| 1246 | + else |
---|
| 1247 | + __inc_node_page_state(page, NR_FILE_PMDMAPPED); |
---|
1191 | 1248 | } else { |
---|
1192 | 1249 | if (PageTransCompound(page) && page_mapping(page)) { |
---|
1193 | 1250 | VM_WARN_ON_ONCE(!PageLocked(page)); |
---|
.. | .. |
---|
1196 | 1253 | if (PageMlocked(page)) |
---|
1197 | 1254 | clear_page_mlock(compound_head(page)); |
---|
1198 | 1255 | } |
---|
1199 | | - if (!atomic_inc_and_test(&page->_mapcount)) |
---|
1200 | | - goto out; |
---|
| 1256 | + trace_android_vh_update_page_mapcount(page, true, |
---|
| 1257 | + compound, &first_mapping, &success); |
---|
| 1258 | + if (success) { |
---|
| 1259 | + if (!first_mapping) |
---|
| 1260 | + goto out; |
---|
| 1261 | + } else { |
---|
| 1262 | + if (!atomic_inc_and_test(&page->_mapcount)) |
---|
| 1263 | + goto out; |
---|
| 1264 | + } |
---|
1201 | 1265 | } |
---|
1202 | 1266 | __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr); |
---|
1203 | 1267 | out: |
---|
.. | .. |
---|
1207 | 1271 | static void page_remove_file_rmap(struct page *page, bool compound) |
---|
1208 | 1272 | { |
---|
1209 | 1273 | int i, nr = 1; |
---|
| 1274 | + bool first_mapping; |
---|
| 1275 | + bool success = false; |
---|
1210 | 1276 | |
---|
1211 | 1277 | VM_BUG_ON_PAGE(compound && !PageHead(page), page); |
---|
1212 | | - lock_page_memcg(page); |
---|
1213 | 1278 | |
---|
1214 | 1279 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ |
---|
1215 | 1280 | if (unlikely(PageHuge(page))) { |
---|
1216 | 1281 | /* hugetlb pages are always mapped with pmds */ |
---|
1217 | 1282 | atomic_dec(compound_mapcount_ptr(page)); |
---|
1218 | | - goto out; |
---|
| 1283 | + return; |
---|
1219 | 1284 | } |
---|
1220 | 1285 | |
---|
1221 | 1286 | /* page still mapped by someone else? */ |
---|
1222 | 1287 | if (compound && PageTransHuge(page)) { |
---|
1223 | | - for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) { |
---|
1224 | | - if (atomic_add_negative(-1, &page[i]._mapcount)) |
---|
1225 | | - nr++; |
---|
| 1288 | + for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { |
---|
| 1289 | + trace_android_vh_update_page_mapcount(&page[i], false, |
---|
| 1290 | + compound, &first_mapping, &success); |
---|
| 1291 | + if (success) { |
---|
| 1292 | + if (first_mapping) |
---|
| 1293 | + nr++; |
---|
| 1294 | + } else { |
---|
| 1295 | + if (atomic_add_negative(-1, &page[i]._mapcount)) |
---|
| 1296 | + nr++; |
---|
| 1297 | + } |
---|
1226 | 1298 | } |
---|
1227 | 1299 | if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) |
---|
1228 | | - goto out; |
---|
1229 | | - VM_BUG_ON_PAGE(!PageSwapBacked(page), page); |
---|
1230 | | - __dec_node_page_state(page, NR_SHMEM_PMDMAPPED); |
---|
| 1300 | + return; |
---|
| 1301 | + if (PageSwapBacked(page)) |
---|
| 1302 | + __dec_node_page_state(page, NR_SHMEM_PMDMAPPED); |
---|
| 1303 | + else |
---|
| 1304 | + __dec_node_page_state(page, NR_FILE_PMDMAPPED); |
---|
1231 | 1305 | } else { |
---|
1232 | | - if (!atomic_add_negative(-1, &page->_mapcount)) |
---|
1233 | | - goto out; |
---|
| 1306 | + trace_android_vh_update_page_mapcount(page, false, |
---|
| 1307 | + compound, &first_mapping, &success); |
---|
| 1308 | + if (success) { |
---|
| 1309 | + if (!first_mapping) |
---|
| 1310 | + return; |
---|
| 1311 | + } else { |
---|
| 1312 | + if (!atomic_add_negative(-1, &page->_mapcount)) |
---|
| 1313 | + return; |
---|
| 1314 | + } |
---|
1234 | 1315 | } |
---|
1235 | 1316 | |
---|
1236 | 1317 | /* |
---|
.. | .. |
---|
1242 | 1323 | |
---|
1243 | 1324 | if (unlikely(PageMlocked(page))) |
---|
1244 | 1325 | clear_page_mlock(page); |
---|
1245 | | -out: |
---|
1246 | | - unlock_page_memcg(page); |
---|
1247 | 1326 | } |
---|
1248 | 1327 | |
---|
1249 | 1328 | static void page_remove_anon_compound_rmap(struct page *page) |
---|
1250 | 1329 | { |
---|
1251 | 1330 | int i, nr; |
---|
| 1331 | + bool first_mapping; |
---|
| 1332 | + bool success = false; |
---|
1252 | 1333 | |
---|
1253 | 1334 | if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) |
---|
1254 | 1335 | return; |
---|
.. | .. |
---|
1260 | 1341 | if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) |
---|
1261 | 1342 | return; |
---|
1262 | 1343 | |
---|
1263 | | - __dec_node_page_state(page, NR_ANON_THPS); |
---|
| 1344 | + __dec_lruvec_page_state(page, NR_ANON_THPS); |
---|
1264 | 1345 | |
---|
1265 | 1346 | if (TestClearPageDoubleMap(page)) { |
---|
1266 | 1347 | /* |
---|
1267 | 1348 | * Subpages can be mapped with PTEs too. Check how many of |
---|
1268 | | - * themi are still mapped. |
---|
| 1349 | + * them are still mapped. |
---|
1269 | 1350 | */ |
---|
1270 | | - for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) { |
---|
1271 | | - if (atomic_add_negative(-1, &page[i]._mapcount)) |
---|
1272 | | - nr++; |
---|
| 1351 | + for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { |
---|
| 1352 | + trace_android_vh_update_page_mapcount(&page[i], false, |
---|
| 1353 | + false, &first_mapping, &success); |
---|
| 1354 | + if (success) { |
---|
| 1355 | + if (first_mapping) |
---|
| 1356 | + nr++; |
---|
| 1357 | + } else { |
---|
| 1358 | + if (atomic_add_negative(-1, &page[i]._mapcount)) |
---|
| 1359 | + nr++; |
---|
| 1360 | + } |
---|
1273 | 1361 | } |
---|
| 1362 | + |
---|
| 1363 | + /* |
---|
| 1364 | + * Queue the page for deferred split if at least one small |
---|
| 1365 | + * page of the compound page is unmapped, but at least one |
---|
| 1366 | + * small page is still mapped. |
---|
| 1367 | + */ |
---|
| 1368 | + if (nr && nr < thp_nr_pages(page)) |
---|
| 1369 | + deferred_split_huge_page(page); |
---|
1274 | 1370 | } else { |
---|
1275 | | - nr = HPAGE_PMD_NR; |
---|
| 1371 | + nr = thp_nr_pages(page); |
---|
1276 | 1372 | } |
---|
1277 | 1373 | |
---|
1278 | 1374 | if (unlikely(PageMlocked(page))) |
---|
1279 | 1375 | clear_page_mlock(page); |
---|
1280 | 1376 | |
---|
1281 | | - if (nr) { |
---|
1282 | | - __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr); |
---|
1283 | | - deferred_split_huge_page(page); |
---|
1284 | | - } |
---|
| 1377 | + if (nr) |
---|
| 1378 | + __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr); |
---|
1285 | 1379 | } |
---|
1286 | 1380 | |
---|
1287 | 1381 | /** |
---|
.. | .. |
---|
1293 | 1387 | */ |
---|
1294 | 1388 | void page_remove_rmap(struct page *page, bool compound) |
---|
1295 | 1389 | { |
---|
1296 | | - if (!PageAnon(page)) |
---|
1297 | | - return page_remove_file_rmap(page, compound); |
---|
| 1390 | + bool first_mapping; |
---|
| 1391 | + bool success = false; |
---|
| 1392 | + lock_page_memcg(page); |
---|
1298 | 1393 | |
---|
1299 | | - if (compound) |
---|
1300 | | - return page_remove_anon_compound_rmap(page); |
---|
| 1394 | + if (!PageAnon(page)) { |
---|
| 1395 | + page_remove_file_rmap(page, compound); |
---|
| 1396 | + goto out; |
---|
| 1397 | + } |
---|
1301 | 1398 | |
---|
1302 | | - /* page still mapped by someone else? */ |
---|
1303 | | - if (!atomic_add_negative(-1, &page->_mapcount)) |
---|
1304 | | - return; |
---|
| 1399 | + if (compound) { |
---|
| 1400 | + page_remove_anon_compound_rmap(page); |
---|
| 1401 | + goto out; |
---|
| 1402 | + } |
---|
1305 | 1403 | |
---|
| 1404 | + trace_android_vh_update_page_mapcount(page, false, |
---|
| 1405 | + compound, &first_mapping, &success); |
---|
| 1406 | + if (success) { |
---|
| 1407 | + if (!first_mapping) |
---|
| 1408 | + goto out; |
---|
| 1409 | + } else { |
---|
| 1410 | + /* page still mapped by someone else? */ |
---|
| 1411 | + if (!atomic_add_negative(-1, &page->_mapcount)) |
---|
| 1412 | + goto out; |
---|
| 1413 | + } |
---|
1306 | 1414 | /* |
---|
1307 | 1415 | * We use the irq-unsafe __{inc|mod}_zone_page_stat because |
---|
1308 | 1416 | * these counters are not modified in interrupt context, and |
---|
1309 | 1417 | * pte lock(a spinlock) is held, which implies preemption disabled. |
---|
1310 | 1418 | */ |
---|
1311 | | - __dec_node_page_state(page, NR_ANON_MAPPED); |
---|
| 1419 | + __dec_lruvec_page_state(page, NR_ANON_MAPPED); |
---|
1312 | 1420 | |
---|
1313 | 1421 | if (unlikely(PageMlocked(page))) |
---|
1314 | 1422 | clear_page_mlock(page); |
---|
.. | .. |
---|
1325 | 1433 | * Leaving it set also helps swapoff to reinstate ptes |
---|
1326 | 1434 | * faster for those pages still in swapcache. |
---|
1327 | 1435 | */ |
---|
| 1436 | +out: |
---|
| 1437 | + unlock_page_memcg(page); |
---|
1328 | 1438 | } |
---|
1329 | 1439 | |
---|
1330 | 1440 | /* |
---|
.. | .. |
---|
1342 | 1452 | pte_t pteval; |
---|
1343 | 1453 | struct page *subpage; |
---|
1344 | 1454 | bool ret = true; |
---|
1345 | | - unsigned long start = address, end; |
---|
1346 | | - enum ttu_flags flags = (enum ttu_flags)arg; |
---|
| 1455 | + struct mmu_notifier_range range; |
---|
| 1456 | + enum ttu_flags flags = (enum ttu_flags)(long)arg; |
---|
1347 | 1457 | |
---|
1348 | 1458 | /* |
---|
1349 | 1459 | * When racing against e.g. zap_pte_range() on another cpu, |
---|
.. | .. |
---|
1375 | 1485 | * Note that the page can not be free in this function as call of |
---|
1376 | 1486 | * try_to_unmap() must hold a reference on the page. |
---|
1377 | 1487 | */ |
---|
1378 | | - end = PageKsm(page) ? |
---|
| 1488 | + range.end = PageKsm(page) ? |
---|
1379 | 1489 | address + PAGE_SIZE : vma_address_end(page, vma); |
---|
| 1490 | + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, |
---|
| 1491 | + address, range.end); |
---|
1380 | 1492 | if (PageHuge(page)) { |
---|
1381 | 1493 | /* |
---|
1382 | 1494 | * If sharing is possible, start and end will be adjusted |
---|
1383 | 1495 | * accordingly. |
---|
1384 | 1496 | */ |
---|
1385 | | - adjust_range_if_pmd_sharing_possible(vma, &start, &end); |
---|
| 1497 | + adjust_range_if_pmd_sharing_possible(vma, &range.start, |
---|
| 1498 | + &range.end); |
---|
1386 | 1499 | } |
---|
1387 | | - mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); |
---|
| 1500 | + mmu_notifier_invalidate_range_start(&range); |
---|
1388 | 1501 | |
---|
1389 | 1502 | while (page_vma_mapped_walk(&pvmw)) { |
---|
1390 | 1503 | #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION |
---|
.. | .. |
---|
1408 | 1521 | if (!PageTransCompound(page)) { |
---|
1409 | 1522 | /* |
---|
1410 | 1523 | * Holding pte lock, we do *not* need |
---|
1411 | | - * mmap_sem here |
---|
| 1524 | + * mmap_lock here |
---|
1412 | 1525 | */ |
---|
1413 | 1526 | mlock_vma_page(page); |
---|
1414 | 1527 | } |
---|
.. | .. |
---|
1426 | 1539 | subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); |
---|
1427 | 1540 | address = pvmw.address; |
---|
1428 | 1541 | |
---|
1429 | | - if (PageHuge(page)) { |
---|
1430 | | - if (huge_pmd_unshare(mm, &address, pvmw.pte)) { |
---|
| 1542 | + if (PageHuge(page) && !PageAnon(page)) { |
---|
| 1543 | + /* |
---|
| 1544 | + * To call huge_pmd_unshare, i_mmap_rwsem must be |
---|
| 1545 | + * held in write mode. Caller needs to explicitly |
---|
| 1546 | + * do this outside rmap routines. |
---|
| 1547 | + */ |
---|
| 1548 | + VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); |
---|
| 1549 | + if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { |
---|
1431 | 1550 | /* |
---|
1432 | 1551 | * huge_pmd_unshare unmapped an entire PMD |
---|
1433 | 1552 | * page. There is no way of knowing exactly |
---|
.. | .. |
---|
1435 | 1554 | * we must flush them all. start/end were |
---|
1436 | 1555 | * already adjusted above to cover this range. |
---|
1437 | 1556 | */ |
---|
1438 | | - flush_cache_range(vma, start, end); |
---|
1439 | | - flush_tlb_range(vma, start, end); |
---|
1440 | | - mmu_notifier_invalidate_range(mm, start, end); |
---|
| 1557 | + flush_cache_range(vma, range.start, range.end); |
---|
| 1558 | + flush_tlb_range(vma, range.start, range.end); |
---|
| 1559 | + mmu_notifier_invalidate_range(mm, range.start, |
---|
| 1560 | + range.end); |
---|
1441 | 1561 | |
---|
1442 | 1562 | /* |
---|
1443 | 1563 | * The ref count of the PMD page was dropped |
---|
.. | .. |
---|
1468 | 1588 | */ |
---|
1469 | 1589 | entry = make_migration_entry(page, 0); |
---|
1470 | 1590 | swp_pte = swp_entry_to_pte(entry); |
---|
1471 | | - if (pte_soft_dirty(pteval)) |
---|
| 1591 | + |
---|
| 1592 | + /* |
---|
| 1593 | + * pteval maps a zone device page and is therefore |
---|
| 1594 | + * a swap pte. |
---|
| 1595 | + */ |
---|
| 1596 | + if (pte_swp_soft_dirty(pteval)) |
---|
1472 | 1597 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
---|
| 1598 | + if (pte_swp_uffd_wp(pteval)) |
---|
| 1599 | + swp_pte = pte_swp_mkuffd_wp(swp_pte); |
---|
1473 | 1600 | set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); |
---|
1474 | 1601 | /* |
---|
1475 | 1602 | * No need to invalidate here it will synchronize on |
---|
.. | .. |
---|
1484 | 1611 | */ |
---|
1485 | 1612 | subpage = page; |
---|
1486 | 1613 | goto discard; |
---|
1487 | | - } |
---|
1488 | | - |
---|
1489 | | - if (!(flags & TTU_IGNORE_ACCESS)) { |
---|
1490 | | - if (ptep_clear_flush_young_notify(vma, address, |
---|
1491 | | - pvmw.pte)) { |
---|
1492 | | - ret = false; |
---|
1493 | | - page_vma_mapped_walk_done(&pvmw); |
---|
1494 | | - break; |
---|
1495 | | - } |
---|
1496 | 1614 | } |
---|
1497 | 1615 | |
---|
1498 | 1616 | /* Nuke the page table entry. */ |
---|
.. | .. |
---|
1523 | 1641 | if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { |
---|
1524 | 1642 | pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); |
---|
1525 | 1643 | if (PageHuge(page)) { |
---|
1526 | | - int nr = 1 << compound_order(page); |
---|
1527 | | - hugetlb_count_sub(nr, mm); |
---|
| 1644 | + hugetlb_count_sub(compound_nr(page), mm); |
---|
1528 | 1645 | set_huge_swap_pte_at(mm, address, |
---|
1529 | 1646 | pvmw.pte, pteval, |
---|
1530 | 1647 | vma_mmu_pagesize(vma)); |
---|
.. | .. |
---|
1570 | 1687 | swp_pte = swp_entry_to_pte(entry); |
---|
1571 | 1688 | if (pte_soft_dirty(pteval)) |
---|
1572 | 1689 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
---|
| 1690 | + if (pte_uffd_wp(pteval)) |
---|
| 1691 | + swp_pte = pte_swp_mkuffd_wp(swp_pte); |
---|
1573 | 1692 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
---|
1574 | 1693 | /* |
---|
1575 | 1694 | * No need to invalidate here it will synchronize on |
---|
.. | .. |
---|
1594 | 1713 | |
---|
1595 | 1714 | /* MADV_FREE page check */ |
---|
1596 | 1715 | if (!PageSwapBacked(page)) { |
---|
1597 | | - if (!PageDirty(page)) { |
---|
| 1716 | + int ref_count, map_count; |
---|
| 1717 | + |
---|
| 1718 | + /* |
---|
| 1719 | + * Synchronize with gup_pte_range(): |
---|
| 1720 | + * - clear PTE; barrier; read refcount |
---|
| 1721 | + * - inc refcount; barrier; read PTE |
---|
| 1722 | + */ |
---|
| 1723 | + smp_mb(); |
---|
| 1724 | + |
---|
| 1725 | + ref_count = page_ref_count(page); |
---|
| 1726 | + map_count = page_mapcount(page); |
---|
| 1727 | + |
---|
| 1728 | + /* |
---|
| 1729 | + * Order reads for page refcount and dirty flag |
---|
| 1730 | + * (see comments in __remove_mapping()). |
---|
| 1731 | + */ |
---|
| 1732 | + smp_rmb(); |
---|
| 1733 | + |
---|
| 1734 | + /* |
---|
| 1735 | + * The only page refs must be one from isolation |
---|
| 1736 | + * plus the rmap(s) (dropped by discard:). |
---|
| 1737 | + */ |
---|
| 1738 | + if (ref_count == 1 + map_count && |
---|
| 1739 | + !PageDirty(page)) { |
---|
1598 | 1740 | /* Invalidate as we cleared the pte */ |
---|
1599 | 1741 | mmu_notifier_invalidate_range(mm, |
---|
1600 | 1742 | address, address + PAGE_SIZE); |
---|
.. | .. |
---|
1636 | 1778 | swp_pte = swp_entry_to_pte(entry); |
---|
1637 | 1779 | if (pte_soft_dirty(pteval)) |
---|
1638 | 1780 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
---|
| 1781 | + if (pte_uffd_wp(pteval)) |
---|
| 1782 | + swp_pte = pte_swp_mkuffd_wp(swp_pte); |
---|
1639 | 1783 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
---|
1640 | 1784 | /* Invalidate as we cleared the pte */ |
---|
1641 | 1785 | mmu_notifier_invalidate_range(mm, address, |
---|
.. | .. |
---|
1665 | 1809 | put_page(page); |
---|
1666 | 1810 | } |
---|
1667 | 1811 | |
---|
1668 | | - mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); |
---|
| 1812 | + mmu_notifier_invalidate_range_end(&range); |
---|
| 1813 | + trace_android_vh_try_to_unmap_one(vma, page, address, ret); |
---|
1669 | 1814 | |
---|
1670 | 1815 | return ret; |
---|
1671 | 1816 | } |
---|
1672 | 1817 | |
---|
1673 | | -bool is_vma_temporary_stack(struct vm_area_struct *vma) |
---|
1674 | | -{ |
---|
1675 | | - int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); |
---|
1676 | | - |
---|
1677 | | - if (!maybe_stack) |
---|
1678 | | - return false; |
---|
1679 | | - |
---|
1680 | | - if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == |
---|
1681 | | - VM_STACK_INCOMPLETE_SETUP) |
---|
1682 | | - return true; |
---|
1683 | | - |
---|
1684 | | - return false; |
---|
1685 | | -} |
---|
1686 | | - |
---|
1687 | 1818 | static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) |
---|
1688 | 1819 | { |
---|
1689 | | - return is_vma_temporary_stack(vma); |
---|
| 1820 | + return vma_is_temporary_stack(vma); |
---|
1690 | 1821 | } |
---|
1691 | 1822 | |
---|
1692 | 1823 | static int page_not_mapped(struct page *page) |
---|
.. | .. |
---|
1779 | 1910 | struct anon_vma *anon_vma; |
---|
1780 | 1911 | |
---|
1781 | 1912 | if (rwc->anon_lock) |
---|
1782 | | - return rwc->anon_lock(page); |
---|
| 1913 | + return rwc->anon_lock(page, rwc); |
---|
1783 | 1914 | |
---|
1784 | 1915 | /* |
---|
1785 | 1916 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() |
---|
1786 | 1917 | * because that depends on page_mapped(); but not all its usages |
---|
1787 | | - * are holding mmap_sem. Users without mmap_sem are required to |
---|
| 1918 | + * are holding mmap_lock. Users without mmap_lock are required to |
---|
1788 | 1919 | * take a reference count to prevent the anon_vma disappearing |
---|
1789 | 1920 | */ |
---|
1790 | 1921 | anon_vma = page_anon_vma(page); |
---|
1791 | 1922 | if (!anon_vma) |
---|
1792 | 1923 | return NULL; |
---|
1793 | 1924 | |
---|
| 1925 | + if (anon_vma_trylock_read(anon_vma)) |
---|
| 1926 | + goto out; |
---|
| 1927 | + |
---|
| 1928 | + if (rwc->try_lock) { |
---|
| 1929 | + anon_vma = NULL; |
---|
| 1930 | + rwc->contended = true; |
---|
| 1931 | + goto out; |
---|
| 1932 | + } |
---|
| 1933 | + |
---|
1794 | 1934 | anon_vma_lock_read(anon_vma); |
---|
| 1935 | +out: |
---|
1795 | 1936 | return anon_vma; |
---|
1796 | 1937 | } |
---|
1797 | 1938 | |
---|
.. | .. |
---|
1804 | 1945 | * Find all the mappings of a page using the mapping pointer and the vma chains |
---|
1805 | 1946 | * contained in the anon_vma struct it points to. |
---|
1806 | 1947 | * |
---|
1807 | | - * When called from try_to_munlock(), the mmap_sem of the mm containing the vma |
---|
| 1948 | + * When called from try_to_munlock(), the mmap_lock of the mm containing the vma |
---|
1808 | 1949 | * where the page was found will be held for write. So, we won't recheck |
---|
1809 | 1950 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be |
---|
1810 | 1951 | * LOCKED. |
---|
.. | .. |
---|
1827 | 1968 | return; |
---|
1828 | 1969 | |
---|
1829 | 1970 | pgoff_start = page_to_pgoff(page); |
---|
1830 | | - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; |
---|
| 1971 | + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; |
---|
1831 | 1972 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, |
---|
1832 | 1973 | pgoff_start, pgoff_end) { |
---|
1833 | 1974 | struct vm_area_struct *vma = avc->vma; |
---|
.. | .. |
---|
1857 | 1998 | * Find all the mappings of a page using the mapping pointer and the vma chains |
---|
1858 | 1999 | * contained in the address_space struct it points to. |
---|
1859 | 2000 | * |
---|
1860 | | - * When called from try_to_munlock(), the mmap_sem of the mm containing the vma |
---|
| 2001 | + * When called from try_to_munlock(), the mmap_lock of the mm containing the vma |
---|
1861 | 2002 | * where the page was found will be held for write. So, we won't recheck |
---|
1862 | 2003 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be |
---|
1863 | 2004 | * LOCKED. |
---|
.. | .. |
---|
1868 | 2009 | struct address_space *mapping = page_mapping(page); |
---|
1869 | 2010 | pgoff_t pgoff_start, pgoff_end; |
---|
1870 | 2011 | struct vm_area_struct *vma; |
---|
| 2012 | + bool got_lock = false, success = false; |
---|
1871 | 2013 | |
---|
1872 | 2014 | /* |
---|
1873 | 2015 | * The page lock not only makes sure that page->mapping cannot |
---|
.. | .. |
---|
1881 | 2023 | return; |
---|
1882 | 2024 | |
---|
1883 | 2025 | pgoff_start = page_to_pgoff(page); |
---|
1884 | | - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; |
---|
1885 | | - if (!locked) |
---|
1886 | | - i_mmap_lock_read(mapping); |
---|
| 2026 | + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; |
---|
| 2027 | + if (!locked) { |
---|
| 2028 | + trace_android_vh_do_page_trylock(page, |
---|
| 2029 | + &mapping->i_mmap_rwsem, &got_lock, &success); |
---|
| 2030 | + if (success) { |
---|
| 2031 | + if (!got_lock) |
---|
| 2032 | + return; |
---|
| 2033 | + } else { |
---|
| 2034 | + if (i_mmap_trylock_read(mapping)) |
---|
| 2035 | + goto lookup; |
---|
| 2036 | + |
---|
| 2037 | + if (rwc->try_lock) { |
---|
| 2038 | + rwc->contended = true; |
---|
| 2039 | + return; |
---|
| 2040 | + } |
---|
| 2041 | + |
---|
| 2042 | + i_mmap_lock_read(mapping); |
---|
| 2043 | + } |
---|
| 2044 | + } |
---|
| 2045 | +lookup: |
---|
1887 | 2046 | vma_interval_tree_foreach(vma, &mapping->i_mmap, |
---|
1888 | 2047 | pgoff_start, pgoff_end) { |
---|
1889 | 2048 | unsigned long address = vma_address(page, vma); |
---|
.. | .. |
---|
1928 | 2087 | |
---|
1929 | 2088 | #ifdef CONFIG_HUGETLB_PAGE |
---|
1930 | 2089 | /* |
---|
1931 | | - * The following three functions are for anonymous (private mapped) hugepages. |
---|
| 2090 | + * The following two functions are for anonymous (private mapped) hugepages. |
---|
1932 | 2091 | * Unlike common anonymous pages, anonymous hugepages have no accounting code |
---|
1933 | 2092 | * and no lru code, because we handle hugepages differently from common pages. |
---|
1934 | 2093 | */ |
---|
1935 | | -static void __hugepage_set_anon_rmap(struct page *page, |
---|
1936 | | - struct vm_area_struct *vma, unsigned long address, int exclusive) |
---|
1937 | | -{ |
---|
1938 | | - struct anon_vma *anon_vma = vma->anon_vma; |
---|
1939 | | - |
---|
1940 | | - BUG_ON(!anon_vma); |
---|
1941 | | - |
---|
1942 | | - if (PageAnon(page)) |
---|
1943 | | - return; |
---|
1944 | | - if (!exclusive) |
---|
1945 | | - anon_vma = anon_vma->root; |
---|
1946 | | - |
---|
1947 | | - anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; |
---|
1948 | | - page->mapping = (struct address_space *) anon_vma; |
---|
1949 | | - page->index = linear_page_index(vma, address); |
---|
1950 | | -} |
---|
1951 | | - |
---|
1952 | 2094 | void hugepage_add_anon_rmap(struct page *page, |
---|
1953 | 2095 | struct vm_area_struct *vma, unsigned long address) |
---|
1954 | 2096 | { |
---|
.. | .. |
---|
1960 | 2102 | /* address might be in next vma when migration races vma_adjust */ |
---|
1961 | 2103 | first = atomic_inc_and_test(compound_mapcount_ptr(page)); |
---|
1962 | 2104 | if (first) |
---|
1963 | | - __hugepage_set_anon_rmap(page, vma, address, 0); |
---|
| 2105 | + __page_set_anon_rmap(page, vma, address, 0); |
---|
1964 | 2106 | } |
---|
1965 | 2107 | |
---|
1966 | 2108 | void hugepage_add_new_anon_rmap(struct page *page, |
---|
.. | .. |
---|
1968 | 2110 | { |
---|
1969 | 2111 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
---|
1970 | 2112 | atomic_set(compound_mapcount_ptr(page), 0); |
---|
1971 | | - __hugepage_set_anon_rmap(page, vma, address, 1); |
---|
| 2113 | + if (hpage_pincount_available(page)) |
---|
| 2114 | + atomic_set(compound_pincount_ptr(page), 0); |
---|
| 2115 | + |
---|
| 2116 | + __page_set_anon_rmap(page, vma, address, 1); |
---|
1972 | 2117 | } |
---|
1973 | 2118 | #endif /* CONFIG_HUGETLB_PAGE */ |
---|