~hc/RK356X_SDK_RELEASE.git

..	..	@@ -21,13 +21,14 @@
21	21	* Lock ordering in mm:
22	22	*
23	23	* inode->i_mutex (while writing or truncating, not reading or faulting)
24		- * mm->mmap_sem
25		- * page->flags PG_locked (lock_page)
	24	+ * mm->mmap_lock
	25	+ * page->flags PG_locked (lock_page) * (see huegtlbfs below)
26	26	* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
27	27	* mapping->i_mmap_rwsem
	28	+ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
28	29	* anon_vma->rwsem
29	30	* mm->page_table_lock or pte_lock
30		- * zone_lru_lock (in mark_page_accessed, isolate_lru_page)
	31	+ * pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
31	32	* swap_lock (in swap_duplicate, swap_info_get)
32	33	* mmlist_lock (in mmput, drain_mmlist and others)
33	34	* mapping->private_lock (in __set_page_dirty_buffers)
..	..	@@ -43,6 +44,11 @@
43	44	* anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
44	45	* ->tasklist_lock
45	46	* pte map lock
	47	+ *
	48	+ * * hugetlbfs PageHuge() pages take locks in this order:
	49	+ * mapping->i_mmap_rwsem
	50	+ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
	51	+ * page->flags PG_locked (lock_page)
46	52	*/
47	53
48	54	#include <linux/mm.h>
..	..	@@ -61,6 +67,7 @@
61	67	#include <linux/mmu_notifier.h>
62	68	#include <linux/migrate.h>
63	69	#include <linux/hugetlb.h>
	70	+#include <linux/huge_mm.h>
64	71	#include <linux/backing-dev.h>
65	72	#include <linux/page_idle.h>
66	73	#include <linux/memremap.h>
..	..	@@ -69,6 +76,8 @@
69	76	#include <asm/tlbflush.h>
70	77
71	78	#include <trace/events/tlb.h>
	79	+
	80	+#include <trace/hooks/mm.h>
72	81
73	82	#include "internal.h"
74	83
..	..	@@ -170,7 +179,7 @@
170	179	* to do any locking for the common case of already having
171	180	* an anon_vma.
172	181	*
173		- * This must be called with the mmap_sem held for reading.
	182	+ * This must be called with the mmap_lock held for reading.
174	183	*/
175	184	int __anon_vma_prepare(struct vm_area_struct *vma)
176	185	{
..	..	@@ -250,13 +259,19 @@
250	259	* Attach the anon_vmas from src to dst.
251	260	* Returns 0 on success, -ENOMEM on failure.
252	261	*
253		- * If dst->anon_vma is NULL this function tries to find and reuse existing
254		- * anon_vma which has no vmas and only one child anon_vma. This prevents
255		- * degradation of anon_vma hierarchy to endless linear chain in case of
256		- * constantly forking task. On the other hand, an anon_vma with more than one
257		- * child isn't reused even if there was no alive vma, thus rmap walker has a
258		- * good chance of avoiding scanning the whole hierarchy when it searches where
259		- * page is mapped.
	262	+ * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
	263	+ * anon_vma_fork(). The first three want an exact copy of src, while the last
	264	+ * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
	265	+ * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
	266	+ * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
	267	+ *
	268	+ * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
	269	+ * and reuse existing anon_vma which has no vmas and only one child anon_vma.
	270	+ * This prevents degradation of anon_vma hierarchy to endless linear chain in
	271	+ * case of constantly forking task. On the other hand, an anon_vma with more
	272	+ * than one child isn't reused even if there was no alive vma, thus rmap
	273	+ * walker has a good chance of avoiding scanning the whole hierarchy when it
	274	+ * searches where page is mapped.
260	275	*/
261	276	int anon_vma_clone(struct vm_area_struct dst, struct vm_area_struct src)
262	277	{
..	..	@@ -286,8 +301,8 @@
286	301	* will always reuse it. Root anon_vma is never reused:
287	302	* it has self-parent reference and at least one child.
288	303	*/
289		- if (!dst->anon_vma && anon_vma != src->anon_vma &&
290		- anon_vma->degree < 2)
	304	+ if (!dst->anon_vma && src->anon_vma &&
	305	+ anon_vma != src->anon_vma && anon_vma->degree < 2)
291	306	dst->anon_vma = anon_vma;
292	307	}
293	308	if (dst->anon_vma)
..	..	@@ -457,9 +472,10 @@
457	472	* chain and verify that the page in question is indeed mapped in it
458	473	* [ something equivalent to page_mapped_in_vma() ].
459	474	*
460		- * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
461		- * that the anon_vma pointer from page->mapping is valid if there is a
462		- * mapcount, we can dereference the anon_vma after observing those.
	475	+ * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
	476	+ * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
	477	+ * if there is a mapcount, we can dereference the anon_vma after observing
	478	+ * those.
463	479	*/
464	480	struct anon_vma page_get_anon_vma(struct page page)
465	481	{
..	..	@@ -502,13 +518,16 @@
502	518	*
503	519	* Its a little more complex as it tries to keep the fast path to a single
504	520	* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
505		- * reference like with page_get_anon_vma() and then block on the mutex.
	521	+ * reference like with page_get_anon_vma() and then block on the mutex
	522	+ * on !rwc->try_lock case.
506	523	*/
507		-struct anon_vma page_lock_anon_vma_read(struct page page)
	524	+struct anon_vma page_lock_anon_vma_read(struct page page,
	525	+ struct rmap_walk_control *rwc)
508	526	{
509	527	struct anon_vma *anon_vma = NULL;
510	528	struct anon_vma *root_anon_vma;
511	529	unsigned long anon_mapping;
	530	+ bool success = false;
512	531
513	532	rcu_read_lock();
514	533	anon_mapping = (unsigned long)READ_ONCE(page->mapping);
..	..	@@ -529,6 +548,17 @@
529	548	up_read(&root_anon_vma->rwsem);
530	549	anon_vma = NULL;
531	550	}
	551	+ goto out;
	552	+ }
	553	+ trace_android_vh_do_page_trylock(page, NULL, NULL, &success);
	554	+ if (success) {
	555	+ anon_vma = NULL;
	556	+ goto out;
	557	+ }
	558	+
	559	+ if (rwc && rwc->try_lock) {
	560	+ anon_vma = NULL;
	561	+ rwc->contended = true;
532	562	goto out;
533	563	}
534	564
..	..	@@ -658,7 +688,7 @@
658	688	*/
659	689	void flush_tlb_batched_pending(struct mm_struct *mm)
660	690	{
661		- if (mm->tlb_flush_batched) {
	691	+ if (data_race(mm->tlb_flush_batched)) {
662	692	flush_tlb_mm(mm);
663	693
664	694	/*
..	..	@@ -768,6 +798,7 @@
768	798	}
769	799
770	800	if (pvmw.pte) {
	801	+ trace_android_vh_look_around(&pvmw, page, vma, &referenced);
771	802	if (ptep_clear_flush_young_notify(vma, address,
772	803	pvmw.pte)) {
773	804	/*
..	..	@@ -803,6 +834,7 @@
803	834	pra->vm_flags \|= vma->vm_flags;
804	835	}
805	836
	837	+ trace_android_vh_page_referenced_one_end(vma, page, referenced);
806	838	if (!pra->mapcount)
807	839	return false; /* To break the loop */
808	840
..	..	@@ -827,8 +859,10 @@
827	859	* @memcg: target memory cgroup
828	860	* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
829	861	*
830		- * Quick test_and_clear_referenced for all mappings to a page,
831		- * returns the number of ptes which referenced the page.
	862	+ * Quick test_and_clear_referenced for all mappings of a page,
	863	+ *
	864	+ * Return: The number of mappings which referenced the page. Return -1 if
	865	+ * the function bailed out due to rmap lock contention.
832	866	*/
833	867	int page_referenced(struct page *page,
834	868	int is_locked,
..	..	@@ -844,10 +878,11 @@
844	878	.rmap_one = page_referenced_one,
845	879	.arg = (void *)&pra,
846	880	.anon_lock = page_lock_anon_vma_read,
	881	+ .try_lock = true,
847	882	};
848	883
849	884	*vm_flags = 0;
850		- if (!page_mapped(page))
	885	+ if (!pra.mapcount)
851	886	return 0;
852	887
853	888	if (!page_rmapping(page))
..	..	@@ -874,7 +909,7 @@
874	909	if (we_locked)
875	910	unlock_page(page);
876	911
877		- return pra.referenced;
	912	+ return rwc.contended ? -1 : pra.referenced;
878	913	}
879	914
880	915	static bool page_mkclean_one(struct page page, struct vm_area_struct vma,
..	..	@@ -886,21 +921,22 @@
886	921	.address = address,
887	922	.flags = PVMW_SYNC,
888	923	};
889		- unsigned long start = address, end;
	924	+ struct mmu_notifier_range range;
890	925	int *cleaned = arg;
891	926
892	927	/*
893	928	* We have to assume the worse case ie pmd for invalidation. Note that
894	929	* the page can not be free from this function.
895	930	*/
896		- end = vma_address_end(page, vma);
897		- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
	931	+ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
	932	+ 0, vma, vma->vm_mm, address,
	933	+ vma_address_end(page, vma));
	934	+ mmu_notifier_invalidate_range_start(&range);
898	935
899	936	while (page_vma_mapped_walk(&pvmw)) {
900		- unsigned long cstart;
901	937	int ret = 0;
902	938
903		- cstart = address = pvmw.address;
	939	+ address = pvmw.address;
904	940	if (pvmw.pte) {
905	941	pte_t entry;
906	942	pte_t *pte = pvmw.pte;
..	..	@@ -915,7 +951,7 @@
915	951	set_pte_at(vma->vm_mm, address, pte, entry);
916	952	ret = 1;
917	953	} else {
918		-#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
	954	+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
919	955	pmd_t *pmd = pvmw.pmd;
920	956	pmd_t entry;
921	957
..	..	@@ -927,7 +963,6 @@
927	963	entry = pmd_wrprotect(entry);
928	964	entry = pmd_mkclean(entry);
929	965	set_pmd_at(vma->vm_mm, address, pmd, entry);
930		- cstart &= PMD_MASK;
931	966	ret = 1;
932	967	#else
933	968	/* unexpected pmd-mapped page? */
..	..	@@ -946,7 +981,7 @@
946	981	(*cleaned)++;
947	982	}
948	983
949		- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
	984	+ mmu_notifier_invalidate_range_end(&range);
950	985
951	986	return true;
952	987	}
..	..	@@ -1014,7 +1049,7 @@
1014	1049
1015	1050	/**
1016	1051	* __page_set_anon_rmap - set up new anonymous rmap
1017		- * @page: Page to add to rmap
	1052	+ * @page: Page or Hugepage to add to rmap
1018	1053	* @vma: VM area to add page to.
1019	1054	* @address: User virtual address of the mapping
1020	1055	* @exclusive: the page is exclusively owned by the current process
..	..	@@ -1051,7 +1086,6 @@
1051	1086	static void __page_check_anon_rmap(struct page *page,
1052	1087	struct vm_area_struct *vma, unsigned long address)
1053	1088	{
1054		-#ifdef CONFIG_DEBUG_VM
1055	1089	/*
1056	1090	* The page's anon-rmap details (mapping and index) are guaranteed to
1057	1091	* be set up correctly at this point.
..	..	@@ -1064,9 +1098,9 @@
1064	1098	* are initially only visible via the pagetables, and the pte is locked
1065	1099	* over the call to page_add_new_anon_rmap.
1066	1100	*/
1067		- BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1068		- BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address));
1069		-#endif
	1101	+ VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
	1102	+ VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
	1103	+ page);
1070	1104	}
1071	1105
1072	1106	/**
..	..	@@ -1097,6 +1131,12 @@
1097	1131	{
1098	1132	bool compound = flags & RMAP_COMPOUND;
1099	1133	bool first;
	1134	+ bool success = false;
	1135	+
	1136	+ if (unlikely(PageKsm(page)))
	1137	+ lock_page_memcg(page);
	1138	+ else
	1139	+ VM_BUG_ON_PAGE(!PageLocked(page), page);
1100	1140
1101	1141	if (compound) {
1102	1142	atomic_t *mapcount;
..	..	@@ -1105,11 +1145,14 @@
1105	1145	mapcount = compound_mapcount_ptr(page);
1106	1146	first = atomic_inc_and_test(mapcount);
1107	1147	} else {
1108		- first = atomic_inc_and_test(&page->_mapcount);
	1148	+ trace_android_vh_update_page_mapcount(page, true, compound,
	1149	+ &first, &success);
	1150	+ if (!success)
	1151	+ first = atomic_inc_and_test(&page->_mapcount);
1109	1152	}
1110	1153
1111	1154	if (first) {
1112		- int nr = compound ? hpage_nr_pages(page) : 1;
	1155	+ int nr = compound ? thp_nr_pages(page) : 1;
1113	1156	/*
1114	1157	* We use the irq-unsafe __{inc\|mod}_zone_page_stat because
1115	1158	* these counters are not modified in interrupt context, and
..	..	@@ -1117,13 +1160,14 @@
1117	1160	* disabled.
1118	1161	*/
1119	1162	if (compound)
1120		- __inc_node_page_state(page, NR_ANON_THPS);
1121		- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
	1163	+ __inc_lruvec_page_state(page, NR_ANON_THPS);
	1164	+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1122	1165	}
1123		- if (unlikely(PageKsm(page)))
1124		- return;
1125	1166
1126		- VM_BUG_ON_PAGE(!PageLocked(page), page);
	1167	+ if (unlikely(PageKsm(page))) {
	1168	+ unlock_page_memcg(page);
	1169	+ return;
	1170	+ }
1127	1171
1128	1172	/* address might be in next vma when migration races vma_adjust */
1129	1173	if (first)
..	..	@@ -1134,7 +1178,7 @@
1134	1178	}
1135	1179
1136	1180	/**
1137		- * page_add_new_anon_rmap - add pte mapping to a new anonymous page
	1181	+ * __page_add_new_anon_rmap - add pte mapping to a new anonymous page
1138	1182	* @page: the page to add the mapping to
1139	1183	* @vma: the vm area in which the mapping is added
1140	1184	* @address: the user virtual address mapped
..	..	@@ -1144,25 +1188,27 @@
1144	1188	* This means the inc-and-test can be bypassed.
1145	1189	* Page does not have to be locked.
1146	1190	*/
1147		-void page_add_new_anon_rmap(struct page *page,
	1191	+void __page_add_new_anon_rmap(struct page *page,
1148	1192	struct vm_area_struct *vma, unsigned long address, bool compound)
1149	1193	{
1150		- int nr = compound ? hpage_nr_pages(page) : 1;
	1194	+ int nr = compound ? thp_nr_pages(page) : 1;
1151	1195
1152		- VM_BUG_ON_VMA(address < vma->vm_start \|\| address >= vma->vm_end, vma);
1153	1196	__SetPageSwapBacked(page);
1154	1197	if (compound) {
1155	1198	VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1156	1199	/* increment count (starts at -1) */
1157	1200	atomic_set(compound_mapcount_ptr(page), 0);
1158		- __inc_node_page_state(page, NR_ANON_THPS);
	1201	+ if (hpage_pincount_available(page))
	1202	+ atomic_set(compound_pincount_ptr(page), 0);
	1203	+
	1204	+ __inc_lruvec_page_state(page, NR_ANON_THPS);
1159	1205	} else {
1160	1206	/* Anon THP always mapped first with PMD */
1161	1207	VM_BUG_ON_PAGE(PageTransCompound(page), page);
1162	1208	/* increment count (starts at -1) */
1163	1209	atomic_set(&page->_mapcount, 0);
1164	1210	}
1165		- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
	1211	+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1166	1212	__page_set_anon_rmap(page, vma, address, 1);
1167	1213	}
1168	1214
..	..	@@ -1176,18 +1222,29 @@
1176	1222	void page_add_file_rmap(struct page *page, bool compound)
1177	1223	{
1178	1224	int i, nr = 1;
	1225	+ bool first_mapping;
	1226	+ bool success = false;
1179	1227
1180	1228	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
1181	1229	lock_page_memcg(page);
1182	1230	if (compound && PageTransHuge(page)) {
1183		- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
1184		- if (atomic_inc_and_test(&page[i]._mapcount))
1185		- nr++;
	1231	+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
	1232	+ trace_android_vh_update_page_mapcount(&page[i], true,
	1233	+ compound, &first_mapping, &success);
	1234	+ if ((success)) {
	1235	+ if (first_mapping)
	1236	+ nr++;
	1237	+ } else {
	1238	+ if (atomic_inc_and_test(&page[i]._mapcount))
	1239	+ nr++;
	1240	+ }
1186	1241	}
1187	1242	if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
1188	1243	goto out;
1189		- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
1190		- __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1244	+ if (PageSwapBacked(page))
	1245	+ __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1246	+ else
	1247	+ __inc_node_page_state(page, NR_FILE_PMDMAPPED);
1191	1248	} else {
1192	1249	if (PageTransCompound(page) && page_mapping(page)) {
1193	1250	VM_WARN_ON_ONCE(!PageLocked(page));
..	..	@@ -1196,8 +1253,15 @@
1196	1253	if (PageMlocked(page))
1197	1254	clear_page_mlock(compound_head(page));
1198	1255	}
1199		- if (!atomic_inc_and_test(&page->_mapcount))
1200		- goto out;
	1256	+ trace_android_vh_update_page_mapcount(page, true,
	1257	+ compound, &first_mapping, &success);
	1258	+ if (success) {
	1259	+ if (!first_mapping)
	1260	+ goto out;
	1261	+ } else {
	1262	+ if (!atomic_inc_and_test(&page->_mapcount))
	1263	+ goto out;
	1264	+ }
1201	1265	}
1202	1266	__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
1203	1267	out:
..	..	@@ -1207,30 +1271,47 @@
1207	1271	static void page_remove_file_rmap(struct page *page, bool compound)
1208	1272	{
1209	1273	int i, nr = 1;
	1274	+ bool first_mapping;
	1275	+ bool success = false;
1210	1276
1211	1277	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
1212		- lock_page_memcg(page);
1213	1278
1214	1279	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
1215	1280	if (unlikely(PageHuge(page))) {
1216	1281	/* hugetlb pages are always mapped with pmds */
1217	1282	atomic_dec(compound_mapcount_ptr(page));
1218		- goto out;
	1283	+ return;
1219	1284	}
1220	1285
1221	1286	/* page still mapped by someone else? */
1222	1287	if (compound && PageTransHuge(page)) {
1223		- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
1224		- if (atomic_add_negative(-1, &page[i]._mapcount))
1225		- nr++;
	1288	+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
	1289	+ trace_android_vh_update_page_mapcount(&page[i], false,
	1290	+ compound, &first_mapping, &success);
	1291	+ if (success) {
	1292	+ if (first_mapping)
	1293	+ nr++;
	1294	+ } else {
	1295	+ if (atomic_add_negative(-1, &page[i]._mapcount))
	1296	+ nr++;
	1297	+ }
1226	1298	}
1227	1299	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1228		- goto out;
1229		- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
1230		- __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1300	+ return;
	1301	+ if (PageSwapBacked(page))
	1302	+ __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
	1303	+ else
	1304	+ __dec_node_page_state(page, NR_FILE_PMDMAPPED);
1231	1305	} else {
1232		- if (!atomic_add_negative(-1, &page->_mapcount))
1233		- goto out;
	1306	+ trace_android_vh_update_page_mapcount(page, false,
	1307	+ compound, &first_mapping, &success);
	1308	+ if (success) {
	1309	+ if (!first_mapping)
	1310	+ return;
	1311	+ } else {
	1312	+ if (!atomic_add_negative(-1, &page->_mapcount))
	1313	+ return;
	1314	+ }
1234	1315	}
1235	1316
1236	1317	/*
..	..	@@ -1242,13 +1323,13 @@
1242	1323
1243	1324	if (unlikely(PageMlocked(page)))
1244	1325	clear_page_mlock(page);
1245		-out:
1246		- unlock_page_memcg(page);
1247	1326	}
1248	1327
1249	1328	static void page_remove_anon_compound_rmap(struct page *page)
1250	1329	{
1251	1330	int i, nr;
	1331	+ bool first_mapping;
	1332	+ bool success = false;
1252	1333
1253	1334	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1254	1335	return;
..	..	@@ -1260,28 +1341,41 @@
1260	1341	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1261	1342	return;
1262	1343
1263		- __dec_node_page_state(page, NR_ANON_THPS);
	1344	+ __dec_lruvec_page_state(page, NR_ANON_THPS);
1264	1345
1265	1346	if (TestClearPageDoubleMap(page)) {
1266	1347	/*
1267	1348	* Subpages can be mapped with PTEs too. Check how many of
1268		- * themi are still mapped.
	1349	+ * them are still mapped.
1269	1350	*/
1270		- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
1271		- if (atomic_add_negative(-1, &page[i]._mapcount))
1272		- nr++;
	1351	+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
	1352	+ trace_android_vh_update_page_mapcount(&page[i], false,
	1353	+ false, &first_mapping, &success);
	1354	+ if (success) {
	1355	+ if (first_mapping)
	1356	+ nr++;
	1357	+ } else {
	1358	+ if (atomic_add_negative(-1, &page[i]._mapcount))
	1359	+ nr++;
	1360	+ }
1273	1361	}
	1362	+
	1363	+ /*
	1364	+ * Queue the page for deferred split if at least one small
	1365	+ * page of the compound page is unmapped, but at least one
	1366	+ * small page is still mapped.
	1367	+ */
	1368	+ if (nr && nr < thp_nr_pages(page))
	1369	+ deferred_split_huge_page(page);
1274	1370	} else {
1275		- nr = HPAGE_PMD_NR;
	1371	+ nr = thp_nr_pages(page);
1276	1372	}
1277	1373
1278	1374	if (unlikely(PageMlocked(page)))
1279	1375	clear_page_mlock(page);
1280	1376
1281		- if (nr) {
1282		- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr);
1283		- deferred_split_huge_page(page);
1284		- }
	1377	+ if (nr)
	1378	+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
1285	1379	}
1286	1380
1287	1381	/**
..	..	@@ -1293,22 +1387,36 @@
1293	1387	*/
1294	1388	void page_remove_rmap(struct page *page, bool compound)
1295	1389	{
1296		- if (!PageAnon(page))
1297		- return page_remove_file_rmap(page, compound);
	1390	+ bool first_mapping;
	1391	+ bool success = false;
	1392	+ lock_page_memcg(page);
1298	1393
1299		- if (compound)
1300		- return page_remove_anon_compound_rmap(page);
	1394	+ if (!PageAnon(page)) {
	1395	+ page_remove_file_rmap(page, compound);
	1396	+ goto out;
	1397	+ }
1301	1398
1302		- /* page still mapped by someone else? */
1303		- if (!atomic_add_negative(-1, &page->_mapcount))
1304		- return;
	1399	+ if (compound) {
	1400	+ page_remove_anon_compound_rmap(page);
	1401	+ goto out;
	1402	+ }
1305	1403
	1404	+ trace_android_vh_update_page_mapcount(page, false,
	1405	+ compound, &first_mapping, &success);
	1406	+ if (success) {
	1407	+ if (!first_mapping)
	1408	+ goto out;
	1409	+ } else {
	1410	+ /* page still mapped by someone else? */
	1411	+ if (!atomic_add_negative(-1, &page->_mapcount))
	1412	+ goto out;
	1413	+ }
1306	1414	/*
1307	1415	* We use the irq-unsafe __{inc\|mod}_zone_page_stat because
1308	1416	* these counters are not modified in interrupt context, and
1309	1417	* pte lock(a spinlock) is held, which implies preemption disabled.
1310	1418	*/
1311		- __dec_node_page_state(page, NR_ANON_MAPPED);
	1419	+ __dec_lruvec_page_state(page, NR_ANON_MAPPED);
1312	1420
1313	1421	if (unlikely(PageMlocked(page)))
1314	1422	clear_page_mlock(page);
..	..	@@ -1325,6 +1433,8 @@
1325	1433	* Leaving it set also helps swapoff to reinstate ptes
1326	1434	* faster for those pages still in swapcache.
1327	1435	*/
	1436	+out:
	1437	+ unlock_page_memcg(page);
1328	1438	}
1329	1439
1330	1440	/*
..	..	@@ -1342,8 +1452,8 @@
1342	1452	pte_t pteval;
1343	1453	struct page *subpage;
1344	1454	bool ret = true;
1345		- unsigned long start = address, end;
1346		- enum ttu_flags flags = (enum ttu_flags)arg;
	1455	+ struct mmu_notifier_range range;
	1456	+ enum ttu_flags flags = (enum ttu_flags)(long)arg;
1347	1457
1348	1458	/*
1349	1459	* When racing against e.g. zap_pte_range() on another cpu,
..	..	@@ -1375,16 +1485,19 @@
1375	1485	* Note that the page can not be free in this function as call of
1376	1486	* try_to_unmap() must hold a reference on the page.
1377	1487	*/
1378		- end = PageKsm(page) ?
	1488	+ range.end = PageKsm(page) ?
1379	1489	address + PAGE_SIZE : vma_address_end(page, vma);
	1490	+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
	1491	+ address, range.end);
1380	1492	if (PageHuge(page)) {
1381	1493	/*
1382	1494	* If sharing is possible, start and end will be adjusted
1383	1495	* accordingly.
1384	1496	*/
1385		- adjust_range_if_pmd_sharing_possible(vma, &start, &end);
	1497	+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
	1498	+ &range.end);
1386	1499	}
1387		- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
	1500	+ mmu_notifier_invalidate_range_start(&range);
1388	1501
1389	1502	while (page_vma_mapped_walk(&pvmw)) {
1390	1503	#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
..	..	@@ -1408,7 +1521,7 @@
1408	1521	if (!PageTransCompound(page)) {
1409	1522	/*
1410	1523	* Holding pte lock, we do not need
1411		- * mmap_sem here
	1524	+ * mmap_lock here
1412	1525	*/
1413	1526	mlock_vma_page(page);
1414	1527	}
..	..	@@ -1426,8 +1539,14 @@
1426	1539	subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
1427	1540	address = pvmw.address;
1428	1541
1429		- if (PageHuge(page)) {
1430		- if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
	1542	+ if (PageHuge(page) && !PageAnon(page)) {
	1543	+ /*
	1544	+ * To call huge_pmd_unshare, i_mmap_rwsem must be
	1545	+ * held in write mode. Caller needs to explicitly
	1546	+ * do this outside rmap routines.
	1547	+ */
	1548	+ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
	1549	+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
1431	1550	/*
1432	1551	* huge_pmd_unshare unmapped an entire PMD
1433	1552	* page. There is no way of knowing exactly
..	..	@@ -1435,9 +1554,10 @@
1435	1554	* we must flush them all. start/end were
1436	1555	* already adjusted above to cover this range.
1437	1556	*/
1438		- flush_cache_range(vma, start, end);
1439		- flush_tlb_range(vma, start, end);
1440		- mmu_notifier_invalidate_range(mm, start, end);
	1557	+ flush_cache_range(vma, range.start, range.end);
	1558	+ flush_tlb_range(vma, range.start, range.end);
	1559	+ mmu_notifier_invalidate_range(mm, range.start,
	1560	+ range.end);
1441	1561
1442	1562	/*
1443	1563	* The ref count of the PMD page was dropped
..	..	@@ -1468,8 +1588,15 @@
1468	1588	*/
1469	1589	entry = make_migration_entry(page, 0);
1470	1590	swp_pte = swp_entry_to_pte(entry);
1471		- if (pte_soft_dirty(pteval))
	1591	+
	1592	+ /*
	1593	+ * pteval maps a zone device page and is therefore
	1594	+ * a swap pte.
	1595	+ */
	1596	+ if (pte_swp_soft_dirty(pteval))
1472	1597	swp_pte = pte_swp_mksoft_dirty(swp_pte);
	1598	+ if (pte_swp_uffd_wp(pteval))
	1599	+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
1473	1600	set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
1474	1601	/*
1475	1602	* No need to invalidate here it will synchronize on
..	..	@@ -1484,15 +1611,6 @@
1484	1611	*/
1485	1612	subpage = page;
1486	1613	goto discard;
1487		- }
1488		-
1489		- if (!(flags & TTU_IGNORE_ACCESS)) {
1490		- if (ptep_clear_flush_young_notify(vma, address,
1491		- pvmw.pte)) {
1492		- ret = false;
1493		- page_vma_mapped_walk_done(&pvmw);
1494		- break;
1495		- }
1496	1614	}
1497	1615
1498	1616	/* Nuke the page table entry. */
..	..	@@ -1523,8 +1641,7 @@
1523	1641	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1524	1642	pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
1525	1643	if (PageHuge(page)) {
1526		- int nr = 1 << compound_order(page);
1527		- hugetlb_count_sub(nr, mm);
	1644	+ hugetlb_count_sub(compound_nr(page), mm);
1528	1645	set_huge_swap_pte_at(mm, address,
1529	1646	pvmw.pte, pteval,
1530	1647	vma_mmu_pagesize(vma));
..	..	@@ -1570,6 +1687,8 @@
1570	1687	swp_pte = swp_entry_to_pte(entry);
1571	1688	if (pte_soft_dirty(pteval))
1572	1689	swp_pte = pte_swp_mksoft_dirty(swp_pte);
	1690	+ if (pte_uffd_wp(pteval))
	1691	+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
1573	1692	set_pte_at(mm, address, pvmw.pte, swp_pte);
1574	1693	/*
1575	1694	* No need to invalidate here it will synchronize on
..	..	@@ -1594,7 +1713,30 @@
1594	1713
1595	1714	/* MADV_FREE page check */
1596	1715	if (!PageSwapBacked(page)) {
1597		- if (!PageDirty(page)) {
	1716	+ int ref_count, map_count;
	1717	+
	1718	+ /*
	1719	+ * Synchronize with gup_pte_range():
	1720	+ * - clear PTE; barrier; read refcount
	1721	+ * - inc refcount; barrier; read PTE
	1722	+ */
	1723	+ smp_mb();
	1724	+
	1725	+ ref_count = page_ref_count(page);
	1726	+ map_count = page_mapcount(page);
	1727	+
	1728	+ /*
	1729	+ * Order reads for page refcount and dirty flag
	1730	+ * (see comments in __remove_mapping()).
	1731	+ */
	1732	+ smp_rmb();
	1733	+
	1734	+ /*
	1735	+ * The only page refs must be one from isolation
	1736	+ * plus the rmap(s) (dropped by discard:).
	1737	+ */
	1738	+ if (ref_count == 1 + map_count &&
	1739	+ !PageDirty(page)) {
1598	1740	/* Invalidate as we cleared the pte */
1599	1741	mmu_notifier_invalidate_range(mm,
1600	1742	address, address + PAGE_SIZE);
..	..	@@ -1636,6 +1778,8 @@
1636	1778	swp_pte = swp_entry_to_pte(entry);
1637	1779	if (pte_soft_dirty(pteval))
1638	1780	swp_pte = pte_swp_mksoft_dirty(swp_pte);
	1781	+ if (pte_uffd_wp(pteval))
	1782	+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
1639	1783	set_pte_at(mm, address, pvmw.pte, swp_pte);
1640	1784	/* Invalidate as we cleared the pte */
1641	1785	mmu_notifier_invalidate_range(mm, address,
..	..	@@ -1665,28 +1809,15 @@
1665	1809	put_page(page);
1666	1810	}
1667	1811
1668		- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
	1812	+ mmu_notifier_invalidate_range_end(&range);
	1813	+ trace_android_vh_try_to_unmap_one(vma, page, address, ret);
1669	1814
1670	1815	return ret;
1671	1816	}
1672	1817
1673		-bool is_vma_temporary_stack(struct vm_area_struct *vma)
1674		-{
1675		- int maybe_stack = vma->vm_flags & (VM_GROWSDOWN \| VM_GROWSUP);
1676		-
1677		- if (!maybe_stack)
1678		- return false;
1679		-
1680		- if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1681		- VM_STACK_INCOMPLETE_SETUP)
1682		- return true;
1683		-
1684		- return false;
1685		-}
1686		-
1687	1818	static bool invalid_migration_vma(struct vm_area_struct vma, void arg)
1688	1819	{
1689		- return is_vma_temporary_stack(vma);
	1820	+ return vma_is_temporary_stack(vma);
1690	1821	}
1691	1822
1692	1823	static int page_not_mapped(struct page *page)
..	..	@@ -1779,19 +1910,29 @@
1779	1910	struct anon_vma *anon_vma;
1780	1911
1781	1912	if (rwc->anon_lock)
1782		- return rwc->anon_lock(page);
	1913	+ return rwc->anon_lock(page, rwc);
1783	1914
1784	1915	/*
1785	1916	* Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
1786	1917	* because that depends on page_mapped(); but not all its usages
1787		- * are holding mmap_sem. Users without mmap_sem are required to
	1918	+ * are holding mmap_lock. Users without mmap_lock are required to
1788	1919	* take a reference count to prevent the anon_vma disappearing
1789	1920	*/
1790	1921	anon_vma = page_anon_vma(page);
1791	1922	if (!anon_vma)
1792	1923	return NULL;
1793	1924
	1925	+ if (anon_vma_trylock_read(anon_vma))
	1926	+ goto out;
	1927	+
	1928	+ if (rwc->try_lock) {
	1929	+ anon_vma = NULL;
	1930	+ rwc->contended = true;
	1931	+ goto out;
	1932	+ }
	1933	+
1794	1934	anon_vma_lock_read(anon_vma);
	1935	+out:
1795	1936	return anon_vma;
1796	1937	}
1797	1938
..	..	@@ -1804,7 +1945,7 @@
1804	1945	* Find all the mappings of a page using the mapping pointer and the vma chains
1805	1946	* contained in the anon_vma struct it points to.
1806	1947	*
1807		- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
	1948	+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
1808	1949	* where the page was found will be held for write. So, we won't recheck
1809	1950	* vm_flags for that VMA. That should be OK, because that vma shouldn't be
1810	1951	* LOCKED.
..	..	@@ -1827,7 +1968,7 @@
1827	1968	return;
1828	1969
1829	1970	pgoff_start = page_to_pgoff(page);
1830		- pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
	1971	+ pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
1831	1972	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
1832	1973	pgoff_start, pgoff_end) {
1833	1974	struct vm_area_struct *vma = avc->vma;
..	..	@@ -1857,7 +1998,7 @@
1857	1998	* Find all the mappings of a page using the mapping pointer and the vma chains
1858	1999	* contained in the address_space struct it points to.
1859	2000	*
1860		- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
	2001	+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
1861	2002	* where the page was found will be held for write. So, we won't recheck
1862	2003	* vm_flags for that VMA. That should be OK, because that vma shouldn't be
1863	2004	* LOCKED.
..	..	@@ -1868,6 +2009,7 @@
1868	2009	struct address_space *mapping = page_mapping(page);
1869	2010	pgoff_t pgoff_start, pgoff_end;
1870	2011	struct vm_area_struct *vma;
	2012	+ bool got_lock = false, success = false;
1871	2013
1872	2014	/*
1873	2015	* The page lock not only makes sure that page->mapping cannot
..	..	@@ -1881,9 +2023,26 @@
1881	2023	return;
1882	2024
1883	2025	pgoff_start = page_to_pgoff(page);
1884		- pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
1885		- if (!locked)
1886		- i_mmap_lock_read(mapping);
	2026	+ pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
	2027	+ if (!locked) {
	2028	+ trace_android_vh_do_page_trylock(page,
	2029	+ &mapping->i_mmap_rwsem, &got_lock, &success);
	2030	+ if (success) {
	2031	+ if (!got_lock)
	2032	+ return;
	2033	+ } else {
	2034	+ if (i_mmap_trylock_read(mapping))
	2035	+ goto lookup;
	2036	+
	2037	+ if (rwc->try_lock) {
	2038	+ rwc->contended = true;
	2039	+ return;
	2040	+ }
	2041	+
	2042	+ i_mmap_lock_read(mapping);
	2043	+ }
	2044	+ }
	2045	+lookup:
1887	2046	vma_interval_tree_foreach(vma, &mapping->i_mmap,
1888	2047	pgoff_start, pgoff_end) {
1889	2048	unsigned long address = vma_address(page, vma);
..	..	@@ -1928,27 +2087,10 @@
1928	2087
1929	2088	#ifdef CONFIG_HUGETLB_PAGE
1930	2089	/*
1931		- * The following three functions are for anonymous (private mapped) hugepages.
	2090	+ * The following two functions are for anonymous (private mapped) hugepages.
1932	2091	* Unlike common anonymous pages, anonymous hugepages have no accounting code
1933	2092	* and no lru code, because we handle hugepages differently from common pages.
1934	2093	*/
1935		-static void __hugepage_set_anon_rmap(struct page *page,
1936		- struct vm_area_struct *vma, unsigned long address, int exclusive)
1937		-{
1938		- struct anon_vma *anon_vma = vma->anon_vma;
1939		-
1940		- BUG_ON(!anon_vma);
1941		-
1942		- if (PageAnon(page))
1943		- return;
1944		- if (!exclusive)
1945		- anon_vma = anon_vma->root;
1946		-
1947		- anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1948		- page->mapping = (struct address_space *) anon_vma;
1949		- page->index = linear_page_index(vma, address);
1950		-}
1951		-
1952	2094	void hugepage_add_anon_rmap(struct page *page,
1953	2095	struct vm_area_struct *vma, unsigned long address)
1954	2096	{
..	..	@@ -1960,7 +2102,7 @@
1960	2102	/* address might be in next vma when migration races vma_adjust */
1961	2103	first = atomic_inc_and_test(compound_mapcount_ptr(page));
1962	2104	if (first)
1963		- __hugepage_set_anon_rmap(page, vma, address, 0);
	2105	+ __page_set_anon_rmap(page, vma, address, 0);
1964	2106	}
1965	2107
1966	2108	void hugepage_add_new_anon_rmap(struct page *page,
..	..	@@ -1968,6 +2110,9 @@
1968	2110	{
1969	2111	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);
1970	2112	atomic_set(compound_mapcount_ptr(page), 0);
1971		- __hugepage_set_anon_rmap(page, vma, address, 1);
	2113	+ if (hpage_pincount_available(page))
	2114	+ atomic_set(compound_pincount_ptr(page), 0);
	2115	+
	2116	+ __page_set_anon_rmap(page, vma, address, 1);
1972	2117	}
1973	2118	#endif /* CONFIG_HUGETLB_PAGE */