~hc/RK356X_SDK_RELEASE.git

..	..	@@ -21,13 +21,14 @@
21	21	* Lock ordering in mm:
22	22	*
23	23	* inode->i_mutex (while writing or truncating, not reading or faulting)
24		- * mm->mmap_sem
25		- * page->flags PG_locked (lock_page)
	24	+ * mm->mmap_lock
	25	+ * page->flags PG_locked (lock_page) * (see huegtlbfs below)
26	26	* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
27	27	* mapping->i_mmap_rwsem
	28	+ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
28	29	* anon_vma->rwsem
29	30	* mm->page_table_lock or pte_lock
30		- * zone_lru_lock (in mark_page_accessed, isolate_lru_page)
	31	+ * pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
31	32	* swap_lock (in swap_duplicate, swap_info_get)
32	33	* mmlist_lock (in mmput, drain_mmlist and others)
33	34	* mapping->private_lock (in __set_page_dirty_buffers)
..	..	@@ -43,6 +44,11 @@
43	44	* anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
44	45	* ->tasklist_lock
45	46	* pte map lock
	47	+ *
	48	+ * * hugetlbfs PageHuge() pages take locks in this order:
	49	+ * mapping->i_mmap_rwsem
	50	+ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
	51	+ * page->flags PG_locked (lock_page)
46	52	*/
47	53
48	54	#include <linux/mm.h>
..	..	@@ -61,6 +67,7 @@
61	67	#include <linux/mmu_notifier.h>
62	68	#include <linux/migrate.h>
63	69	#include <linux/hugetlb.h>
	70	+#include <linux/huge_mm.h>
64	71	#include <linux/backing-dev.h>
65	72	#include <linux/page_idle.h>
66	73	#include <linux/memremap.h>
..	..	@@ -69,6 +76,8 @@
69	76	#include <asm/tlbflush.h>
70	77
71	78	#include <trace/events/tlb.h>
	79	+
	80	+#include <trace/hooks/mm.h>
72	81
73	82	#include "internal.h"
74	83
..	..	@@ -82,7 +91,8 @@
82	91	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
83	92	if (anon_vma) {
84	93	atomic_set(&anon_vma->refcount, 1);
85		- anon_vma->degree = 1; /* Reference for first vma */
	94	+ anon_vma->num_children = 0;
	95	+ anon_vma->num_active_vmas = 0;
86	96	anon_vma->parent = anon_vma;
87	97	/*
88	98	* Initialise the anon_vma root to point to itself. If called
..	..	@@ -170,7 +180,7 @@
170	180	* to do any locking for the common case of already having
171	181	* an anon_vma.
172	182	*
173		- * This must be called with the mmap_sem held for reading.
	183	+ * This must be called with the mmap_lock held for reading.
174	184	*/
175	185	int __anon_vma_prepare(struct vm_area_struct *vma)
176	186	{
..	..	@@ -190,6 +200,7 @@
190	200	anon_vma = anon_vma_alloc();
191	201	if (unlikely(!anon_vma))
192	202	goto out_enomem_free_avc;
	203	+ anon_vma->num_children++; /* self-parent link for new root */
193	204	allocated = anon_vma;
194	205	}
195	206
..	..	@@ -199,8 +210,7 @@
199	210	if (likely(!vma->anon_vma)) {
200	211	vma->anon_vma = anon_vma;
201	212	anon_vma_chain_link(vma, avc, anon_vma);
202		- /* vma reference or self-parent link for new root */
203		- anon_vma->degree++;
	213	+ anon_vma->num_active_vmas++;
204	214	allocated = NULL;
205	215	avc = NULL;
206	216	}
..	..	@@ -250,13 +260,19 @@
250	260	* Attach the anon_vmas from src to dst.
251	261	* Returns 0 on success, -ENOMEM on failure.
252	262	*
253		- * If dst->anon_vma is NULL this function tries to find and reuse existing
254		- * anon_vma which has no vmas and only one child anon_vma. This prevents
255		- * degradation of anon_vma hierarchy to endless linear chain in case of
256		- * constantly forking task. On the other hand, an anon_vma with more than one
257		- * child isn't reused even if there was no alive vma, thus rmap walker has a
258		- * good chance of avoiding scanning the whole hierarchy when it searches where
259		- * page is mapped.
	263	+ * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
	264	+ * anon_vma_fork(). The first three want an exact copy of src, while the last
	265	+ * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
	266	+ * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
	267	+ * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
	268	+ *
	269	+ * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
	270	+ * and reuse existing anon_vma which has no vmas and only one child anon_vma.
	271	+ * This prevents degradation of anon_vma hierarchy to endless linear chain in
	272	+ * case of constantly forking task. On the other hand, an anon_vma with more
	273	+ * than one child isn't reused even if there was no alive vma, thus rmap
	274	+ * walker has a good chance of avoiding scanning the whole hierarchy when it
	275	+ * searches where page is mapped.
260	276	*/
261	277	int anon_vma_clone(struct vm_area_struct dst, struct vm_area_struct src)
262	278	{
..	..	@@ -279,19 +295,19 @@
279	295	anon_vma_chain_link(dst, avc, anon_vma);
280	296
281	297	/*
282		- * Reuse existing anon_vma if its degree lower than two,
283		- * that means it has no vma and only one anon_vma child.
	298	+ * Reuse existing anon_vma if it has no vma and only one
	299	+ * anon_vma child.
284	300	*
285		- * Do not chose parent anon_vma, otherwise first child
286		- * will always reuse it. Root anon_vma is never reused:
	301	+ * Root anon_vma is never reused:
287	302	* it has self-parent reference and at least one child.
288	303	*/
289		- if (!dst->anon_vma && anon_vma != src->anon_vma &&
290		- anon_vma->degree < 2)
	304	+ if (!dst->anon_vma && src->anon_vma &&
	305	+ anon_vma->num_children < 2 &&
	306	+ anon_vma->num_active_vmas == 0)
291	307	dst->anon_vma = anon_vma;
292	308	}
293	309	if (dst->anon_vma)
294		- dst->anon_vma->degree++;
	310	+ dst->anon_vma->num_active_vmas++;
295	311	unlock_anon_vma_root(root);
296	312	return 0;
297	313
..	..	@@ -341,6 +357,7 @@
341	357	anon_vma = anon_vma_alloc();
342	358	if (!anon_vma)
343	359	goto out_error;
	360	+ anon_vma->num_active_vmas++;
344	361	avc = anon_vma_chain_alloc(GFP_KERNEL);
345	362	if (!avc)
346	363	goto out_error_free_anon_vma;
..	..	@@ -361,7 +378,7 @@
361	378	vma->anon_vma = anon_vma;
362	379	anon_vma_lock_write(anon_vma);
363	380	anon_vma_chain_link(vma, avc, anon_vma);
364		- anon_vma->parent->degree++;
	381	+ anon_vma->parent->num_children++;
365	382	anon_vma_unlock_write(anon_vma);
366	383
367	384	return 0;
..	..	@@ -393,7 +410,7 @@
393	410	* to free them outside the lock.
394	411	*/
395	412	if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
396		- anon_vma->parent->degree--;
	413	+ anon_vma->parent->num_children--;
397	414	continue;
398	415	}
399	416
..	..	@@ -401,7 +418,8 @@
401	418	anon_vma_chain_free(avc);
402	419	}
403	420	if (vma->anon_vma)
404		- vma->anon_vma->degree--;
	421	+ vma->anon_vma->num_active_vmas--;
	422	+
405	423	unlock_anon_vma_root(root);
406	424
407	425	/*
..	..	@@ -412,7 +430,8 @@
412	430	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
413	431	struct anon_vma *anon_vma = avc->anon_vma;
414	432
415		- VM_WARN_ON(anon_vma->degree);
	433	+ VM_WARN_ON(anon_vma->num_children);
	434	+ VM_WARN_ON(anon_vma->num_active_vmas);
416	435	put_anon_vma(anon_vma);
417	436
418	437	list_del(&avc->same_vma);
..	..	@@ -457,9 +476,10 @@
457	476	* chain and verify that the page in question is indeed mapped in it
458	477	* [ something equivalent to page_mapped_in_vma() ].
459	478	*
460		- * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
461		- * that the anon_vma pointer from page->mapping is valid if there is a
462		- * mapcount, we can dereference the anon_vma after observing those.
	479	+ * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
	480	+ * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
	481	+ * if there is a mapcount, we can dereference the anon_vma after observing
	482	+ * those.
463	483	*/
464	484	struct anon_vma page_get_anon_vma(struct page page)
465	485	{
..	..	@@ -502,13 +522,16 @@
502	522	*
503	523	* Its a little more complex as it tries to keep the fast path to a single
504	524	* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
505		- * reference like with page_get_anon_vma() and then block on the mutex.
	525	+ * reference like with page_get_anon_vma() and then block on the mutex
	526	+ * on !rwc->try_lock case.
506	527	*/
507		-struct anon_vma page_lock_anon_vma_read(struct page page)
	528	+struct anon_vma page_lock_anon_vma_read(struct page page,
	529	+ struct rmap_walk_control *rwc)
508	530	{
509	531	struct anon_vma *anon_vma = NULL;
510	532	struct anon_vma *root_anon_vma;
511	533	unsigned long anon_mapping;
	534	+ bool success = false;
512	535
513	536	rcu_read_lock();
514	537	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
..	..	@@ -529,6 +552,17 @@
529	552	up_read(&root_anon_vma->rwsem);
530	553	anon_vma = NULL;
531	554	}
	555	+ goto out;
	556	+ }
	557	+ trace_android_vh_do_page_trylock(page, NULL, NULL, &success);
	558	+ if (success) {
	559	+ anon_vma = NULL;
	560	+ goto out;
	561	+ }
	562	+
	563	+ if (rwc && rwc->try_lock) {
	564	+ anon_vma = NULL;
	565	+ rwc->contended = true;
532	566	goto out;
533	567	}
534	568
..	..	@@ -658,7 +692,7 @@
658	692	*/
659	693	void flush_tlb_batched_pending(struct mm_struct *mm)
660	694	{
661		- if (mm->tlb_flush_batched) {
	695	+ if (data_race(mm->tlb_flush_batched)) {
662	696	flush_tlb_mm(mm);
663	697
664	698	/*
..	..	@@ -768,6 +802,7 @@
768	802	}
769	803
770	804	if (pvmw.pte) {
	805	+ trace_android_vh_look_around(&pvmw, page, vma, &referenced);
771	806	if (ptep_clear_flush_young_notify(vma, address,
772	807	pvmw.pte)) {
773	808	/*
..	..	@@ -803,6 +838,7 @@
803	838	pra->vm_flags \|= vma->vm_flags;
804	839	}
805	840
	841	+ trace_android_vh_page_referenced_one_end(vma, page, referenced);
806	842	if (!pra->mapcount)
807	843	return false; /* To break the loop */
808	844
..	..	@@ -827,8 +863,10 @@
827	863	* @memcg: target memory cgroup
828	864	* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
829	865	*
830		- * Quick test_and_clear_referenced for all mappings to a page,
831		- * returns the number of ptes which referenced the page.
	866	+ * Quick test_and_clear_referenced for all mappings of a page,
	867	+ *
	868	+ * Return: The number of mappings which referenced the page. Return -1 if
	869	+ * the function bailed out due to rmap lock contention.
832	870	*/
833	871	int page_referenced(struct page *page,
834	872	int is_locked,
..	..	@@ -844,10 +882,11 @@
844	882	.rmap_one = page_referenced_one,
845	883	.arg = (void *)&pra,
846	884	.anon_lock = page_lock_anon_vma_read,
	885	+ .try_lock = true,
847	886	};
848	887
849	888	*vm_flags = 0;
850		- if (!page_mapped(page))
	889	+ if (!pra.mapcount)
851	890	return 0;
852	891
853	892	if (!page_rmapping(page))
..	..	@@ -874,8 +913,9 @@
874	913	if (we_locked)
875	914	unlock_page(page);
876	915
877		- return pra.referenced;
	916	+ return rwc.contended ? -1 : pra.referenced;
878	917	}
	918	+EXPORT_SYMBOL_GPL(page_referenced);
879	919
880	920	static bool page_mkclean_one(struct page page, struct vm_area_struct vma,
881	921	unsigned long address, void *arg)
..	..	@@ -886,21 +926,22 @@
886	926	.address = address,
887	927	.flags = PVMW_SYNC,
888	928	};
889		- unsigned long start = address, end;
	929	+ struct mmu_notifier_range range;
890	930	int *cleaned = arg;
891	931
892	932	/*
893	933	* We have to assume the worse case ie pmd for invalidation. Note that
894	934	* the page can not be free from this function.
895	935	*/
896		- end = vma_address_end(page, vma);
897		- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
	936	+ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
	937	+ 0, vma, vma->vm_mm, address,
	938	+ vma_address_end(page, vma));
	939	+ mmu_notifier_invalidate_range_start(&range);
898	940
899	941	while (page_vma_mapped_walk(&pvmw)) {
900		- unsigned long cstart;
901	942	int ret = 0;
902	943
903		- cstart = address = pvmw.address;
	944	+ address = pvmw.address;
904	945	if (pvmw.pte) {
905	946	pte_t entry;
906	947	pte_t *pte = pvmw.pte;
..	..	@@ -915,7 +956,7 @@
915	956	set_pte_at(vma->vm_mm, address, pte, entry);
916	957	ret = 1;
917	958	} else {
918		-#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
	959	+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
919	960	pmd_t *pmd = pvmw.pmd;
920	961	pmd_t entry;
921	962
..	..	@@ -927,7 +968,6 @@
927	968	entry = pmd_wrprotect(entry);
928	969	entry = pmd_mkclean(entry);
929	970	set_pmd_at(vma->vm_mm, address, pmd, entry);
930		- cstart &= PMD_MASK;
931	971	ret = 1;
932	972	#else
933	973	/* unexpected pmd-mapped page? */
..	..	@@ -946,7 +986,7 @@
946	986	(*cleaned)++;
947	987	}
948	988
949		- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
	989	+ mmu_notifier_invalidate_range_end(&range);
950	990
951	991	return true;
952	992	}
..	..	@@ -1014,7 +1054,7 @@
1014	1054
1015	1055	/**
1016	1056	* __page_set_anon_rmap - set up new anonymous rmap
1017		- * @page: Page to add to rmap
	1057	+ * @page: Page or Hugepage to add to rmap
1018	1058	* @vma: VM area to add page to.
1019	1059	* @address: User virtual address of the mapping
1020	1060	* @exclusive: the page is exclusively owned by the current process
..	..	@@ -1051,7 +1091,6 @@
1051	1091	static void __page_check_anon_rmap(struct page *page,
1052	1092	struct vm_area_struct *vma, unsigned long address)
1053	1093	{
1054		-#ifdef CONFIG_DEBUG_VM
1055	1094	/*
1056	1095	* The page's anon-rmap details (mapping and index) are guaranteed to
1057	1096	* be set up correctly at this point.
..	..	@@ -1064,9 +1103,9 @@
1064	1103	* are initially only visible via the pagetables, and the pte is locked
1065	1104	* over the call to page_add_new_anon_rmap.
1066	1105	*/
1067		- BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1068		- BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address));
1069		-#endif
	1106	+ VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
	1107	+ VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
	1108	+ page);
1070	1109	}
1071	1110
1072	1111	/**
..	..	@@ -1097,6 +1136,12 @@
1097	1136	{
1098	1137	bool compound = flags & RMAP_COMPOUND;
1099	1138	bool first;
	1139	+ bool success = false;
	1140	+
	1141	+ if (unlikely(PageKsm(page)))
	1142	+ lock_page_memcg(page);
	1143	+ else
	1144	+ VM_BUG_ON_PAGE(!PageLocked(page), page);
1100	1145
1101	1146	if (compound) {
1102	1147	atomic_t *mapcount;
..	..	@@ -1105,11 +1150,14 @@
1105	1150	mapcount = compound_mapcount_ptr(page);
1106	1151	first = atomic_inc_and_test(mapcount);
1107	1152	} else {
1108		- first = atomic_inc_and_test(&page->_mapcount);
	1153	+ trace_android_vh_update_page_mapcount(page, true, compound,
	1154	+ &first, &success);
	1155	+ if (!success)
	1156	+ first = atomic_inc_and_test(&page->_mapcount);
1109	1157	}
1110	1158
1111	1159	if (first) {
1112		- int nr = compound ? hpage_nr_pages(page) : 1;
	1160	+ int nr = compound ? thp_nr_pages(page) : 1;
1113	1161	/*
1114	1162	* We use the irq-unsafe __{inc\|mod}_zone_page_stat because
1115	1163	* these counters are not modified in interrupt context, and
..	..	@@ -1117,13 +1165,14 @@
1117	1165	* disabled.
1118	1166	*/
1119	1167	if (compound)
1120		- __inc_node_page_state(page, NR_ANON_THPS);
1121		- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
	1168	+ __inc_lruvec_page_state(page, NR_ANON_THPS);
	1169	+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1122	1170	}
1123		- if (unlikely(PageKsm(page)))
1124		- return;
1125	1171
1126		- VM_BUG_ON_PAGE(!PageLocked(page), page);
	1172	+ if (unlikely(PageKsm(page))) {
	1173	+ unlock_page_memcg(page);
	1174	+ return;
	1175	+ }
1127	1176
1128	1177	/* address might be in next vma when migration races vma_adjust */
1129	1178	if (first)
..	..	@@ -1134,7 +1183,7 @@
1134	1183	}
1135	1184
1136	1185	/**
1137		- * page_add_new_anon_rmap - add pte mapping to a new anonymous page
	1186	+ * __page_add_new_anon_rmap - add pte mapping to a new anonymous page
1138	1187	* @page: the page to add the mapping to
1139	1188	* @vma: the vm area in which the mapping is added
1140	1189	* @address: the user virtual address mapped
..	..	@@ -1144,25 +1193,27 @@
1144	1193	* This means the inc-and-test can be bypassed.
1145	1194	* Page does not have to be locked.
1146	1195	*/
1147		-void page_add_new_anon_rmap(struct page *page,
	1196	+void __page_add_new_anon_rmap(struct page *page,
1148	1197	struct vm_area_struct *vma, unsigned long address, bool compound)
1149	1198	{
1150		- int nr = compound ? hpage_nr_pages(page) : 1;
	1199	+ int nr = compound ? thp_nr_pages(page) : 1;
1151	1200
1152		- VM_BUG_ON_VMA(address < vma->vm_start \|\| address >= vma->vm_end, vma);
1153	1201	__SetPageSwapBacked(page);
1154	1202	if (compound) {
1155	1203	VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1156	1204	/* increment count (starts at -1) */
1157	1205	atomic_set(compound_mapcount_ptr(page), 0);
1158		- __inc_node_page_state(page, NR_ANON_THPS);
	1206	+ if (hpage_pincount_available(page))
	1207	+ atomic_set(compound_pincount_ptr(page), 0);
	1208	+
	1209	+ __inc_lruvec_page_state(page, NR_ANON_THPS);
1159	1210	} else {
1160	1211	/* Anon THP always mapped first with PMD */
1161	1212	VM_BUG_ON_PAGE(PageTransCompound(page), page);
1162	1213	/* increment count (starts at -1) */
1163	1214	atomic_set(&page->_mapcount, 0);
1164	1215	}
1165		- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
	1216	+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1166	1217	__page_set_anon_rmap(page, vma, address, 1);
1167	1218	}
1168	1219
..	..	@@ -1176,18 +1227,29 @@
1176	1227	void page_add_file_rmap(struct page *page, bool compound)
1177	1228	{
1178	1229	int i, nr = 1;
	1230	+ bool first_mapping;
	1231	+ bool success = false;
1179	1232
1180	1233	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
1181	1234	lock_page_memcg(page);
1182	1235	if (compound && PageTransHuge(page)) {
1183		- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
1184		- if (atomic_inc_and_test(&page[i]._mapcount))
1185		- nr++;
	1236	+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
	1237	+ trace_android_vh_update_page_mapcount(&page[i], true,
	1238	+ compound, &first_mapping, &success);
	1239	+ if ((success)) {
	1240	+ if (first_mapping)
	1241	+ nr++;
	1242	+ } else {
	1243	+ if (atomic_inc_and_test(&page[i]._mapcount))
	1244	+ nr++;
	1245	+ }
1186	1246	}
1187	1247	if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
1188	1248	goto out;
1189		- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
1190		- __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1249	+ if (PageSwapBacked(page))
	1250	+ __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1251	+ else
	1252	+ __inc_node_page_state(page, NR_FILE_PMDMAPPED);
1191	1253	} else {
1192	1254	if (PageTransCompound(page) && page_mapping(page)) {
1193	1255	VM_WARN_ON_ONCE(!PageLocked(page));
..	..	@@ -1196,8 +1258,15 @@
1196	1258	if (PageMlocked(page))
1197	1259	clear_page_mlock(compound_head(page));
1198	1260	}
1199		- if (!atomic_inc_and_test(&page->_mapcount))
1200		- goto out;
	1261	+ trace_android_vh_update_page_mapcount(page, true,
	1262	+ compound, &first_mapping, &success);
	1263	+ if (success) {
	1264	+ if (!first_mapping)
	1265	+ goto out;
	1266	+ } else {
	1267	+ if (!atomic_inc_and_test(&page->_mapcount))
	1268	+ goto out;
	1269	+ }
1201	1270	}
1202	1271	__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
1203	1272	out:
..	..	@@ -1207,30 +1276,47 @@
1207	1276	static void page_remove_file_rmap(struct page *page, bool compound)
1208	1277	{
1209	1278	int i, nr = 1;
	1279	+ bool first_mapping;
	1280	+ bool success = false;
1210	1281
1211	1282	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
1212		- lock_page_memcg(page);
1213	1283
1214	1284	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
1215	1285	if (unlikely(PageHuge(page))) {
1216	1286	/* hugetlb pages are always mapped with pmds */
1217	1287	atomic_dec(compound_mapcount_ptr(page));
1218		- goto out;
	1288	+ return;
1219	1289	}
1220	1290
1221	1291	/* page still mapped by someone else? */
1222	1292	if (compound && PageTransHuge(page)) {
1223		- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
1224		- if (atomic_add_negative(-1, &page[i]._mapcount))
1225		- nr++;
	1293	+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
	1294	+ trace_android_vh_update_page_mapcount(&page[i], false,
	1295	+ compound, &first_mapping, &success);
	1296	+ if (success) {
	1297	+ if (first_mapping)
	1298	+ nr++;
	1299	+ } else {
	1300	+ if (atomic_add_negative(-1, &page[i]._mapcount))
	1301	+ nr++;
	1302	+ }
1226	1303	}
1227	1304	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1228		- goto out;
1229		- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
1230		- __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1305	+ return;
	1306	+ if (PageSwapBacked(page))
	1307	+ __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1308	+ else
	1309	+ __dec_node_page_state(page, NR_FILE_PMDMAPPED);
1231	1310	} else {
1232		- if (!atomic_add_negative(-1, &page->_mapcount))
1233		- goto out;
	1311	+ trace_android_vh_update_page_mapcount(page, false,
	1312	+ compound, &first_mapping, &success);
	1313	+ if (success) {
	1314	+ if (!first_mapping)
	1315	+ return;
	1316	+ } else {
	1317	+ if (!atomic_add_negative(-1, &page->_mapcount))
	1318	+ return;
	1319	+ }
1234	1320	}
1235	1321
1236	1322	/*
..	..	@@ -1242,13 +1328,13 @@
1242	1328
1243	1329	if (unlikely(PageMlocked(page)))
1244	1330	clear_page_mlock(page);
1245		-out:
1246		- unlock_page_memcg(page);
1247	1331	}
1248	1332
1249	1333	static void page_remove_anon_compound_rmap(struct page *page)
1250	1334	{
1251	1335	int i, nr;
	1336	+ bool first_mapping;
	1337	+ bool success = false;
1252	1338
1253	1339	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1254	1340	return;
..	..	@@ -1260,28 +1346,41 @@
1260	1346	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1261	1347	return;
1262	1348
1263		- __dec_node_page_state(page, NR_ANON_THPS);
	1349	+ __dec_lruvec_page_state(page, NR_ANON_THPS);
1264	1350
1265	1351	if (TestClearPageDoubleMap(page)) {
1266	1352	/*
1267	1353	* Subpages can be mapped with PTEs too. Check how many of
1268		- * themi are still mapped.
	1354	+ * them are still mapped.
1269	1355	*/
1270		- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
1271		- if (atomic_add_negative(-1, &page[i]._mapcount))
1272		- nr++;
	1356	+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
	1357	+ trace_android_vh_update_page_mapcount(&page[i], false,
	1358	+ false, &first_mapping, &success);
	1359	+ if (success) {
	1360	+ if (first_mapping)
	1361	+ nr++;
	1362	+ } else {
	1363	+ if (atomic_add_negative(-1, &page[i]._mapcount))
	1364	+ nr++;
	1365	+ }
1273	1366	}
	1367	+
	1368	+ /*
	1369	+ * Queue the page for deferred split if at least one small
	1370	+ * page of the compound page is unmapped, but at least one
	1371	+ * small page is still mapped.
	1372	+ */
	1373	+ if (nr && nr < thp_nr_pages(page))
	1374	+ deferred_split_huge_page(page);
1274	1375	} else {
1275		- nr = HPAGE_PMD_NR;
	1376	+ nr = thp_nr_pages(page);
1276	1377	}
1277	1378
1278	1379	if (unlikely(PageMlocked(page)))
1279	1380	clear_page_mlock(page);
1280	1381
1281		- if (nr) {
1282		- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr);
1283		- deferred_split_huge_page(page);
1284		- }
	1382	+ if (nr)
	1383	+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
1285	1384	}
1286	1385
1287	1386	/**
..	..	@@ -1293,22 +1392,36 @@
1293	1392	*/
1294	1393	void page_remove_rmap(struct page *page, bool compound)
1295	1394	{
1296		- if (!PageAnon(page))
1297		- return page_remove_file_rmap(page, compound);
	1395	+ bool first_mapping;
	1396	+ bool success = false;
	1397	+ lock_page_memcg(page);
1298	1398
1299		- if (compound)
1300		- return page_remove_anon_compound_rmap(page);
	1399	+ if (!PageAnon(page)) {
	1400	+ page_remove_file_rmap(page, compound);
	1401	+ goto out;
	1402	+ }
1301	1403
1302		- /* page still mapped by someone else? */
1303		- if (!atomic_add_negative(-1, &page->_mapcount))
1304		- return;
	1404	+ if (compound) {
	1405	+ page_remove_anon_compound_rmap(page);
	1406	+ goto out;
	1407	+ }
1305	1408
	1409	+ trace_android_vh_update_page_mapcount(page, false,
	1410	+ compound, &first_mapping, &success);
	1411	+ if (success) {
	1412	+ if (!first_mapping)
	1413	+ goto out;
	1414	+ } else {
	1415	+ /* page still mapped by someone else? */
	1416	+ if (!atomic_add_negative(-1, &page->_mapcount))
	1417	+ goto out;
	1418	+ }
1306	1419	/*
1307	1420	* We use the irq-unsafe __{inc\|mod}_zone_page_stat because
1308	1421	* these counters are not modified in interrupt context, and
1309	1422	* pte lock(a spinlock) is held, which implies preemption disabled.
1310	1423	*/
1311		- __dec_node_page_state(page, NR_ANON_MAPPED);
	1424	+ __dec_lruvec_page_state(page, NR_ANON_MAPPED);
1312	1425
1313	1426	if (unlikely(PageMlocked(page)))
1314	1427	clear_page_mlock(page);
..	..	@@ -1325,6 +1438,8 @@
1325	1438	* Leaving it set also helps swapoff to reinstate ptes
1326	1439	* faster for those pages still in swapcache.
1327	1440	*/
	1441	+out:
	1442	+ unlock_page_memcg(page);
1328	1443	}
1329	1444
1330	1445	/*
..	..	@@ -1342,8 +1457,8 @@
1342	1457	pte_t pteval;
1343	1458	struct page *subpage;
1344	1459	bool ret = true;
1345		- unsigned long start = address, end;
1346		- enum ttu_flags flags = (enum ttu_flags)arg;
	1460	+ struct mmu_notifier_range range;
	1461	+ enum ttu_flags flags = (enum ttu_flags)(long)arg;
1347	1462
1348	1463	/*
1349	1464	* When racing against e.g. zap_pte_range() on another cpu,
..	..	@@ -1375,16 +1490,19 @@
1375	1490	* Note that the page can not be free in this function as call of
1376	1491	* try_to_unmap() must hold a reference on the page.
1377	1492	*/
1378		- end = PageKsm(page) ?
	1493	+ range.end = PageKsm(page) ?
1379	1494	address + PAGE_SIZE : vma_address_end(page, vma);
	1495	+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
	1496	+ address, range.end);
1380	1497	if (PageHuge(page)) {
1381	1498	/*
1382	1499	* If sharing is possible, start and end will be adjusted
1383	1500	* accordingly.
1384	1501	*/
1385		- adjust_range_if_pmd_sharing_possible(vma, &start, &end);
	1502	+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
	1503	+ &range.end);
1386	1504	}
1387		- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
	1505	+ mmu_notifier_invalidate_range_start(&range);
1388	1506
1389	1507	while (page_vma_mapped_walk(&pvmw)) {
1390	1508	#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
..	..	@@ -1408,7 +1526,7 @@
1408	1526	if (!PageTransCompound(page)) {
1409	1527	/*
1410	1528	* Holding pte lock, we do not need
1411		- * mmap_sem here
	1529	+ * mmap_lock here
1412	1530	*/
1413	1531	mlock_vma_page(page);
1414	1532	}
..	..	@@ -1426,8 +1544,14 @@
1426	1544	subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
1427	1545	address = pvmw.address;
1428	1546
1429		- if (PageHuge(page)) {
1430		- if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
	1547	+ if (PageHuge(page) && !PageAnon(page)) {
	1548	+ /*
	1549	+ * To call huge_pmd_unshare, i_mmap_rwsem must be
	1550	+ * held in write mode. Caller needs to explicitly
	1551	+ * do this outside rmap routines.
	1552	+ */
	1553	+ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
	1554	+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
1431	1555	/*
1432	1556	* huge_pmd_unshare unmapped an entire PMD
1433	1557	* page. There is no way of knowing exactly
..	..	@@ -1435,9 +1559,10 @@
1435	1559	* we must flush them all. start/end were
1436	1560	* already adjusted above to cover this range.
1437	1561	*/
1438		- flush_cache_range(vma, start, end);
1439		- flush_tlb_range(vma, start, end);
1440		- mmu_notifier_invalidate_range(mm, start, end);
	1562	+ flush_cache_range(vma, range.start, range.end);
	1563	+ flush_tlb_range(vma, range.start, range.end);
	1564	+ mmu_notifier_invalidate_range(mm, range.start,
	1565	+ range.end);
1441	1566
1442	1567	/*
1443	1568	* The ref count of the PMD page was dropped
..	..	@@ -1468,8 +1593,15 @@
1468	1593	*/
1469	1594	entry = make_migration_entry(page, 0);
1470	1595	swp_pte = swp_entry_to_pte(entry);
1471		- if (pte_soft_dirty(pteval))
	1596	+
	1597	+ /*
	1598	+ * pteval maps a zone device page and is therefore
	1599	+ * a swap pte.
	1600	+ */
	1601	+ if (pte_swp_soft_dirty(pteval))
1472	1602	swp_pte = pte_swp_mksoft_dirty(swp_pte);
	1603	+ if (pte_swp_uffd_wp(pteval))
	1604	+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
1473	1605	set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
1474	1606	/*
1475	1607	* No need to invalidate here it will synchronize on
..	..	@@ -1484,15 +1616,6 @@
1484	1616	*/
1485	1617	subpage = page;
1486	1618	goto discard;
1487		- }
1488		-
1489		- if (!(flags & TTU_IGNORE_ACCESS)) {
1490		- if (ptep_clear_flush_young_notify(vma, address,
1491		- pvmw.pte)) {
1492		- ret = false;
1493		- page_vma_mapped_walk_done(&pvmw);
1494		- break;
1495		- }
1496	1619	}
1497	1620
1498	1621	/* Nuke the page table entry. */
..	..	@@ -1523,8 +1646,7 @@
1523	1646	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1524	1647	pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
1525	1648	if (PageHuge(page)) {
1526		- int nr = 1 << compound_order(page);
1527		- hugetlb_count_sub(nr, mm);
	1649	+ hugetlb_count_sub(compound_nr(page), mm);
1528	1650	set_huge_swap_pte_at(mm, address,
1529	1651	pvmw.pte, pteval,
1530	1652	vma_mmu_pagesize(vma));
..	..	@@ -1570,6 +1692,8 @@
1570	1692	swp_pte = swp_entry_to_pte(entry);
1571	1693	if (pte_soft_dirty(pteval))
1572	1694	swp_pte = pte_swp_mksoft_dirty(swp_pte);
	1695	+ if (pte_uffd_wp(pteval))
	1696	+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
1573	1697	set_pte_at(mm, address, pvmw.pte, swp_pte);
1574	1698	/*
1575	1699	* No need to invalidate here it will synchronize on
..	..	@@ -1594,7 +1718,30 @@
1594	1718
1595	1719	/* MADV_FREE page check */
1596	1720	if (!PageSwapBacked(page)) {
1597		- if (!PageDirty(page)) {
	1721	+ int ref_count, map_count;
	1722	+
	1723	+ /*
	1724	+ * Synchronize with gup_pte_range():
	1725	+ * - clear PTE; barrier; read refcount
	1726	+ * - inc refcount; barrier; read PTE
	1727	+ */
	1728	+ smp_mb();
	1729	+
	1730	+ ref_count = page_ref_count(page);
	1731	+ map_count = page_mapcount(page);
	1732	+
	1733	+ /*
	1734	+ * Order reads for page refcount and dirty flag
	1735	+ * (see comments in __remove_mapping()).
	1736	+ */
	1737	+ smp_rmb();
	1738	+
	1739	+ /*
	1740	+ * The only page refs must be one from isolation
	1741	+ * plus the rmap(s) (dropped by discard:).
	1742	+ */
	1743	+ if (ref_count == 1 + map_count &&
	1744	+ !PageDirty(page)) {
1598	1745	/* Invalidate as we cleared the pte */
1599	1746	mmu_notifier_invalidate_range(mm,
1600	1747	address, address + PAGE_SIZE);
..	..	@@ -1636,6 +1783,8 @@
1636	1783	swp_pte = swp_entry_to_pte(entry);
1637	1784	if (pte_soft_dirty(pteval))
1638	1785	swp_pte = pte_swp_mksoft_dirty(swp_pte);
	1786	+ if (pte_uffd_wp(pteval))
	1787	+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
1639	1788	set_pte_at(mm, address, pvmw.pte, swp_pte);
1640	1789	/* Invalidate as we cleared the pte */
1641	1790	mmu_notifier_invalidate_range(mm, address,
..	..	@@ -1665,28 +1814,15 @@
1665	1814	put_page(page);
1666	1815	}
1667	1816
1668		- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
	1817	+ mmu_notifier_invalidate_range_end(&range);
	1818	+ trace_android_vh_try_to_unmap_one(vma, page, address, ret);
1669	1819
1670	1820	return ret;
1671	1821	}
1672	1822
1673		-bool is_vma_temporary_stack(struct vm_area_struct *vma)
1674		-{
1675		- int maybe_stack = vma->vm_flags & (VM_GROWSDOWN \| VM_GROWSUP);
1676		-
1677		- if (!maybe_stack)
1678		- return false;
1679		-
1680		- if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1681		- VM_STACK_INCOMPLETE_SETUP)
1682		- return true;
1683		-
1684		- return false;
1685		-}
1686		-
1687	1823	static bool invalid_migration_vma(struct vm_area_struct vma, void arg)
1688	1824	{
1689		- return is_vma_temporary_stack(vma);
	1825	+ return vma_is_temporary_stack(vma);
1690	1826	}
1691	1827
1692	1828	static int page_not_mapped(struct page *page)
..	..	@@ -1779,19 +1915,29 @@
1779	1915	struct anon_vma *anon_vma;
1780	1916
1781	1917	if (rwc->anon_lock)
1782		- return rwc->anon_lock(page);
	1918	+ return rwc->anon_lock(page, rwc);
1783	1919
1784	1920	/*
1785	1921	* Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
1786	1922	* because that depends on page_mapped(); but not all its usages
1787		- * are holding mmap_sem. Users without mmap_sem are required to
	1923	+ * are holding mmap_lock. Users without mmap_lock are required to
1788	1924	* take a reference count to prevent the anon_vma disappearing
1789	1925	*/
1790	1926	anon_vma = page_anon_vma(page);
1791	1927	if (!anon_vma)
1792	1928	return NULL;
1793	1929
	1930	+ if (anon_vma_trylock_read(anon_vma))
	1931	+ goto out;
	1932	+
	1933	+ if (rwc->try_lock) {
	1934	+ anon_vma = NULL;
	1935	+ rwc->contended = true;
	1936	+ goto out;
	1937	+ }
	1938	+
1794	1939	anon_vma_lock_read(anon_vma);
	1940	+out:
1795	1941	return anon_vma;
1796	1942	}
1797	1943
..	..	@@ -1804,7 +1950,7 @@
1804	1950	* Find all the mappings of a page using the mapping pointer and the vma chains
1805	1951	* contained in the anon_vma struct it points to.
1806	1952	*
1807		- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
	1953	+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
1808	1954	* where the page was found will be held for write. So, we won't recheck
1809	1955	* vm_flags for that VMA. That should be OK, because that vma shouldn't be
1810	1956	* LOCKED.
..	..	@@ -1827,7 +1973,7 @@
1827	1973	return;
1828	1974
1829	1975	pgoff_start = page_to_pgoff(page);
1830		- pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
	1976	+ pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
1831	1977	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
1832	1978	pgoff_start, pgoff_end) {
1833	1979	struct vm_area_struct *vma = avc->vma;
..	..	@@ -1857,7 +2003,7 @@
1857	2003	* Find all the mappings of a page using the mapping pointer and the vma chains
1858	2004	* contained in the address_space struct it points to.
1859	2005	*
1860		- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
	2006	+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
1861	2007	* where the page was found will be held for write. So, we won't recheck
1862	2008	* vm_flags for that VMA. That should be OK, because that vma shouldn't be
1863	2009	* LOCKED.
..	..	@@ -1868,6 +2014,7 @@
1868	2014	struct address_space *mapping = page_mapping(page);
1869	2015	pgoff_t pgoff_start, pgoff_end;
1870	2016	struct vm_area_struct *vma;
	2017	+ bool got_lock = false, success = false;
1871	2018
1872	2019	/*
1873	2020	* The page lock not only makes sure that page->mapping cannot
..	..	@@ -1881,9 +2028,26 @@
1881	2028	return;
1882	2029
1883	2030	pgoff_start = page_to_pgoff(page);
1884		- pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
1885		- if (!locked)
1886		- i_mmap_lock_read(mapping);
	2031	+ pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
	2032	+ if (!locked) {
	2033	+ trace_android_vh_do_page_trylock(page,
	2034	+ &mapping->i_mmap_rwsem, &got_lock, &success);
	2035	+ if (success) {
	2036	+ if (!got_lock)
	2037	+ return;
	2038	+ } else {
	2039	+ if (i_mmap_trylock_read(mapping))
	2040	+ goto lookup;
	2041	+
	2042	+ if (rwc->try_lock) {
	2043	+ rwc->contended = true;
	2044	+ return;
	2045	+ }
	2046	+
	2047	+ i_mmap_lock_read(mapping);
	2048	+ }
	2049	+ }
	2050	+lookup:
1887	2051	vma_interval_tree_foreach(vma, &mapping->i_mmap,
1888	2052	pgoff_start, pgoff_end) {
1889	2053	unsigned long address = vma_address(page, vma);
..	..	@@ -1928,27 +2092,10 @@
1928	2092
1929	2093	#ifdef CONFIG_HUGETLB_PAGE
1930	2094	/*
1931		- * The following three functions are for anonymous (private mapped) hugepages.
	2095	+ * The following two functions are for anonymous (private mapped) hugepages.
1932	2096	* Unlike common anonymous pages, anonymous hugepages have no accounting code
1933	2097	* and no lru code, because we handle hugepages differently from common pages.
1934	2098	*/
1935		-static void __hugepage_set_anon_rmap(struct page *page,
1936		- struct vm_area_struct *vma, unsigned long address, int exclusive)
1937		-{
1938		- struct anon_vma *anon_vma = vma->anon_vma;
1939		-
1940		- BUG_ON(!anon_vma);
1941		-
1942		- if (PageAnon(page))
1943		- return;
1944		- if (!exclusive)
1945		- anon_vma = anon_vma->root;
1946		-
1947		- anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1948		- page->mapping = (struct address_space *) anon_vma;
1949		- page->index = linear_page_index(vma, address);
1950		-}
1951		-
1952	2099	void hugepage_add_anon_rmap(struct page *page,
1953	2100	struct vm_area_struct *vma, unsigned long address)
1954	2101	{
..	..	@@ -1960,7 +2107,7 @@
1960	2107	/* address might be in next vma when migration races vma_adjust */
1961	2108	first = atomic_inc_and_test(compound_mapcount_ptr(page));
1962	2109	if (first)
1963		- __hugepage_set_anon_rmap(page, vma, address, 0);
	2110	+ __page_set_anon_rmap(page, vma, address, 0);
1964	2111	}
1965	2112
1966	2113	void hugepage_add_new_anon_rmap(struct page *page,
..	..	@@ -1968,6 +2115,9 @@
1968	2115	{
1969	2116	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);
1970	2117	atomic_set(compound_mapcount_ptr(page), 0);
1971		- __hugepage_set_anon_rmap(page, vma, address, 1);
	2118	+ if (hpage_pincount_available(page))
	2119	+ atomic_set(compound_pincount_ptr(page), 0);
	2120	+
	2121	+ __page_set_anon_rmap(page, vma, address, 1);
1972	2122	}
1973	2123	#endif /* CONFIG_HUGETLB_PAGE */