~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* Memory merging support.
3	4	*
..	..	@@ -10,8 +11,6 @@
10	11	* Andrea Arcangeli
11	12	* Chris Wright
12	13	* Hugh Dickins
13		- *
14		- * This work is licensed under the terms of the GNU GPL, version 2.
15	14	*/
16	15
17	16	#include <linux/errno.h>
..	..	@@ -25,7 +24,7 @@
25	24	#include <linux/pagemap.h>
26	25	#include <linux/rmap.h>
27	26	#include <linux/spinlock.h>
28		-#include <linux/jhash.h>
	27	+#include <linux/xxhash.h>
29	28	#include <linux/delay.h>
30	29	#include <linux/kthread.h>
31	30	#include <linux/wait.h>
..	..	@@ -82,7 +81,7 @@
82	81	* different KSM page copy of that content
83	82	*
84	83	* Internally, the regular nodes, "dups" and "chains" are represented
85		- * using the same :c:type:`struct stable_node` structure.
	84	+ * using the same struct stable_node structure.
86	85	*
87	86	* In addition to the stable tree, KSM uses a second data structure called the
88	87	* unstable tree: this tree holds pointers to pages which have been found to
..	..	@@ -296,6 +295,7 @@
296	295	static void wait_while_offlining(void);
297	296
298	297	static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
	298	+static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait);
299	299	static DEFINE_MUTEX(ksm_thread_mutex);
300	300	static DEFINE_SPINLOCK(ksm_mmlist_lock);
301	301
..	..	@@ -442,7 +442,7 @@
442	442	/*
443	443	* ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
444	444	* page tables after it has passed through ksm_exit() - which, if necessary,
445		- * takes mmap_sem briefly to serialize against them. ksm_exit() does not set
	445	+ * takes mmap_lock briefly to serialize against them. ksm_exit() does not set
446	446	* a special flag: they can just back out as soon as mm_users goes to zero.
447	447	* ksm_test_exit() is used throughout to make this test for exit: in some
448	448	* places for correctness, in some places just to avoid unnecessary work.
..	..	@@ -455,7 +455,7 @@
455	455	/*
456	456	* We use break_ksm to break COW on a ksm page: it's a stripped down
457	457	*
458		- * if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
	458	+ * if (get_user_pages(addr, 1, FOLL_WRITE, &page, NULL) == 1)
459	459	* put_page(page);
460	460	*
461	461	* but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
..	..	@@ -480,10 +480,11 @@
480	480	break;
481	481	if (PageKsm(page))
482	482	ret = handle_mm_fault(vma, addr,
483		- FAULT_FLAG_WRITE \| FAULT_FLAG_REMOTE);
	483	+ FAULT_FLAG_WRITE \| FAULT_FLAG_REMOTE,
	484	+ NULL);
484	485	else
485	486	ret = VM_FAULT_WRITE;
486		- put_page(page);
	487	+ put_user_page(page);
487	488	} while (!(ret & (VM_FAULT_WRITE \| VM_FAULT_SIGBUS \| VM_FAULT_SIGSEGV \| VM_FAULT_OOM)));
488	489	/*
489	490	* We must loop because handle_mm_fault() may back out if there's
..	..	@@ -542,11 +543,11 @@
542	543	*/
543	544	put_anon_vma(rmap_item->anon_vma);
544	545
545		- down_read(&mm->mmap_sem);
	546	+ mmap_read_lock(mm);
546	547	vma = find_mergeable_vma(mm, addr);
547	548	if (vma)
548	549	break_ksm(vma, addr);
549		- up_read(&mm->mmap_sem);
	550	+ mmap_read_unlock(mm);
550	551	}
551	552
552	553	static struct page get_mergeable_page(struct rmap_item rmap_item)
..	..	@@ -556,7 +557,7 @@
556	557	struct vm_area_struct *vma;
557	558	struct page *page;
558	559
559		- down_read(&mm->mmap_sem);
	560	+ mmap_read_lock(mm);
560	561	vma = find_mergeable_vma(mm, addr);
561	562	if (!vma)
562	563	goto out;
..	..	@@ -568,11 +569,11 @@
568	569	flush_anon_page(vma, page, addr);
569	570	flush_dcache_page(page);
570	571	} else {
571		- put_page(page);
	572	+ put_user_page(page);
572	573	out:
573	574	page = NULL;
574	575	}
575		- up_read(&mm->mmap_sem);
	576	+ mmap_read_unlock(mm);
576	577	return page;
577	578	}
578	579
..	..	@@ -597,7 +598,7 @@
597	598	chain->chain_prune_time = jiffies;
598	599	chain->rmap_hlist_len = STABLE_NODE_CHAIN;
599	600	#if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA)
600		- chain->nid = -1; /* debug */
	601	+ chain->nid = NUMA_NO_NODE; /* debug */
601	602	#endif
602	603	ksm_stable_node_chains++;
603	604
..	..	@@ -612,7 +613,7 @@
612	613	* Move the old stable node to the second dimension
613	614	* queued in the hlist_dup. The invariant is that all
614	615	* dup stable_nodes in the chain->hlist point to pages
615		- * that are wrprotected and have the exact same
	616	+ * that are write protected and have the exact same
616	617	* content.
617	618	*/
618	619	stable_node_chain_add_dup(dup, chain);
..	..	@@ -666,6 +667,12 @@
666	667	free_stable_node(stable_node);
667	668	}
668	669
	670	+enum get_ksm_page_flags {
	671	+ GET_KSM_PAGE_NOLOCK,
	672	+ GET_KSM_PAGE_LOCK,
	673	+ GET_KSM_PAGE_TRYLOCK
	674	+};
	675	+
669	676	/*
670	677	* get_ksm_page: checks if the page indicated by the stable node
671	678	* is still its ksm page, despite having held no reference to it.
..	..	@@ -685,7 +692,8 @@
685	692	* a page to put something that might look like our key in page->mapping.
686	693	* is on its way to being freed; but it is an anomaly to bear in mind.
687	694	*/
688		-static struct page get_ksm_page(struct stable_node stable_node, bool lock_it)
	695	+static struct page get_ksm_page(struct stable_node stable_node,
	696	+ enum get_ksm_page_flags flags)
689	697	{
690	698	struct page *page;
691	699	void *expected_mapping;
..	..	@@ -705,8 +713,9 @@
705	713	* case this node is no longer referenced, and should be freed;
706	714	* however, it might mean that the page is under page_ref_freeze().
707	715	* The __remove_mapping() case is easy, again the node is now stale;
708		- * but if page is swapcache in migrate_page_move_mapping(), it might
709		- * still be our page, in which case it's essential to keep the node.
	716	+ * the same is in reuse_ksm_page() case; but if page is swapcache
	717	+ * in migrate_page_move_mapping(), it might still be our page,
	718	+ * in which case it's essential to keep the node.
710	719	*/
711	720	while (!get_page_unless_zero(page)) {
712	721	/*
..	..	@@ -727,8 +736,15 @@
727	736	goto stale;
728	737	}
729	738
730		- if (lock_it) {
	739	+ if (flags == GET_KSM_PAGE_TRYLOCK) {
	740	+ if (!trylock_page(page)) {
	741	+ put_page(page);
	742	+ return ERR_PTR(-EBUSY);
	743	+ }
	744	+ } else if (flags == GET_KSM_PAGE_LOCK)
731	745	lock_page(page);
	746	+
	747	+ if (flags != GET_KSM_PAGE_NOLOCK) {
732	748	if (READ_ONCE(page->mapping) != expected_mapping) {
733	749	unlock_page(page);
734	750	put_page(page);
..	..	@@ -762,7 +778,7 @@
762	778	struct page *page;
763	779
764	780	stable_node = rmap_item->head;
765		- page = get_ksm_page(stable_node, true);
	781	+ page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
766	782	if (!page)
767	783	goto out;
768	784
..	..	@@ -817,7 +833,7 @@
817	833	* Though it's very tempting to unmerge rmap_items from stable tree rather
818	834	* than check every pte of a given vma, the locking doesn't quite work for
819	835	* that - an rmap_item is assigned to the stable tree after inserting ksm
820		- * page and upping mmap_sem. Nor does it fit with the way we skip dup'ing
	836	+ * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing
821	837	* rmap_items from parent to child at fork time (so as not to waste time
822	838	* if exit comes before the next scan reaches it).
823	839	*
..	..	@@ -863,7 +879,7 @@
863	879	struct page *page;
864	880	int err;
865	881
866		- page = get_ksm_page(stable_node, true);
	882	+ page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
867	883	if (!page) {
868	884	/*
869	885	* get_ksm_page did remove_node_from_stable_tree itself.
..	..	@@ -962,7 +978,7 @@
962	978	for (mm_slot = ksm_scan.mm_slot;
963	979	mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
964	980	mm = mm_slot->mm;
965		- down_read(&mm->mmap_sem);
	981	+ mmap_read_lock(mm);
966	982	for (vma = mm->mmap; vma; vma = vma->vm_next) {
967	983	if (ksm_test_exit(mm))
968	984	break;
..	..	@@ -975,7 +991,7 @@
975	991	}
976	992
977	993	remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
978		- up_read(&mm->mmap_sem);
	994	+ mmap_read_unlock(mm);
979	995
980	996	spin_lock(&ksm_mmlist_lock);
981	997	ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
..	..	@@ -998,7 +1014,7 @@
998	1014	return 0;
999	1015
1000	1016	error:
1001		- up_read(&mm->mmap_sem);
	1017	+ mmap_read_unlock(mm);
1002	1018	spin_lock(&ksm_mmlist_lock);
1003	1019	ksm_scan.mm_slot = &ksm_mm_head;
1004	1020	spin_unlock(&ksm_mmlist_lock);
..	..	@@ -1010,27 +1026,9 @@
1010	1026	{
1011	1027	u32 checksum;
1012	1028	void *addr = kmap_atomic(page);
1013		- checksum = jhash2(addr, PAGE_SIZE / 4, 17);
	1029	+ checksum = xxhash(addr, PAGE_SIZE, 0);
1014	1030	kunmap_atomic(addr);
1015	1031	return checksum;
1016		-}
1017		-
1018		-static int memcmp_pages(struct page page1, struct page page2)
1019		-{
1020		- char addr1, addr2;
1021		- int ret;
1022		-
1023		- addr1 = kmap_atomic(page1);
1024		- addr2 = kmap_atomic(page2);
1025		- ret = memcmp(addr1, addr2, PAGE_SIZE);
1026		- kunmap_atomic(addr2);
1027		- kunmap_atomic(addr1);
1028		- return ret;
1029		-}
1030		-
1031		-static inline int pages_identical(struct page page1, struct page page2)
1032		-{
1033		- return !memcmp_pages(page1, page2);
1034	1032	}
1035	1033
1036	1034	static int write_protect_page(struct vm_area_struct vma, struct page page,
..	..	@@ -1043,8 +1041,7 @@
1043	1041	};
1044	1042	int swapped;
1045	1043	int err = -EFAULT;
1046		- unsigned long mmun_start; /* For mmu_notifiers */
1047		- unsigned long mmun_end; /* For mmu_notifiers */
	1044	+ struct mmu_notifier_range range;
1048	1045
1049	1046	pvmw.address = page_address_in_vma(page, vma);
1050	1047	if (pvmw.address == -EFAULT)
..	..	@@ -1052,9 +1049,10 @@
1052	1049
1053	1050	BUG_ON(PageTransCompound(page));
1054	1051
1055		- mmun_start = pvmw.address;
1056		- mmun_end = pvmw.address + PAGE_SIZE;
1057		- mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
	1052	+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
	1053	+ pvmw.address,
	1054	+ pvmw.address + PAGE_SIZE);
	1055	+ mmu_notifier_invalidate_range_start(&range);
1058	1056
1059	1057	if (!page_vma_mapped_walk(&pvmw))
1060	1058	goto out_mn;
..	..	@@ -1106,7 +1104,7 @@
1106	1104	out_unlock:
1107	1105	page_vma_mapped_walk_done(&pvmw);
1108	1106	out_mn:
1109		- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
	1107	+ mmu_notifier_invalidate_range_end(&range);
1110	1108	out:
1111	1109	return err;
1112	1110	}
..	..	@@ -1130,8 +1128,7 @@
1130	1128	spinlock_t *ptl;
1131	1129	unsigned long addr;
1132	1130	int err = -EFAULT;
1133		- unsigned long mmun_start; /* For mmu_notifiers */
1134		- unsigned long mmun_end; /* For mmu_notifiers */
	1131	+ struct mmu_notifier_range range;
1135	1132
1136	1133	addr = page_address_in_vma(page, vma);
1137	1134	if (addr == -EFAULT)
..	..	@@ -1141,9 +1138,9 @@
1141	1138	if (!pmd)
1142	1139	goto out;
1143	1140
1144		- mmun_start = addr;
1145		- mmun_end = addr + PAGE_SIZE;
1146		- mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
	1141	+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
	1142	+ addr + PAGE_SIZE);
	1143	+ mmu_notifier_invalidate_range_start(&range);
1147	1144
1148	1145	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
1149	1146	if (!pte_same(*ptep, orig_pte)) {
..	..	@@ -1153,7 +1150,7 @@
1153	1150
1154	1151	/*
1155	1152	* No need to check ksm_use_zero_pages here: we can only have a
1156		- * zero_page here if ksm_use_zero_pages was enabled alreaady.
	1153	+ * zero_page here if ksm_use_zero_pages was enabled already.
1157	1154	*/
1158	1155	if (!is_zero_pfn(page_to_pfn(kpage))) {
1159	1156	get_page(kpage);
..	..	@@ -1189,7 +1186,7 @@
1189	1186	pte_unmap_unlock(ptep, ptl);
1190	1187	err = 0;
1191	1188	out_mn:
1192		- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
	1189	+ mmu_notifier_invalidate_range_end(&range);
1193	1190	out:
1194	1191	return err;
1195	1192	}
..	..	@@ -1285,7 +1282,7 @@
1285	1282	struct vm_area_struct *vma;
1286	1283	int err = -EFAULT;
1287	1284
1288		- down_read(&mm->mmap_sem);
	1285	+ mmap_read_lock(mm);
1289	1286	vma = find_mergeable_vma(mm, rmap_item->address);
1290	1287	if (!vma)
1291	1288	goto out;
..	..	@@ -1297,11 +1294,11 @@
1297	1294	/* Unstable nid is in union with stable anon_vma: remove first */
1298	1295	remove_rmap_item_from_tree(rmap_item);
1299	1296
1300		- /* Must get reference to anon_vma while still holding mmap_sem */
	1297	+ /* Must get reference to anon_vma while still holding mmap_lock */
1301	1298	rmap_item->anon_vma = vma->anon_vma;
1302	1299	get_anon_vma(vma->anon_vma);
1303	1300	out:
1304		- up_read(&mm->mmap_sem);
	1301	+ mmap_read_unlock(mm);
1305	1302	return err;
1306	1303	}
1307	1304
..	..	@@ -1388,7 +1385,7 @@
1388	1385	* stable_node parameter itself will be freed from
1389	1386	* under us if it returns NULL.
1390	1387	*/
1391		- _tree_page = get_ksm_page(dup, false);
	1388	+ _tree_page = get_ksm_page(dup, GET_KSM_PAGE_NOLOCK);
1392	1389	if (!_tree_page)
1393	1390	continue;
1394	1391	nr += 1;
..	..	@@ -1511,7 +1508,7 @@
1511	1508	if (!is_stable_node_chain(stable_node)) {
1512	1509	if (is_page_sharing_candidate(stable_node)) {
1513	1510	*_stable_node_dup = stable_node;
1514		- return get_ksm_page(stable_node, false);
	1511	+ return get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
1515	1512	}
1516	1513	/*
1517	1514	* _stable_node_dup set to NULL means the stable_node
..	..	@@ -1613,10 +1610,11 @@
1613	1610	* continue. All KSM pages belonging to the
1614	1611	* stable_node dups in a stable_node chain
1615	1612	* have the same content and they're
1616		- * wrprotected at all times. Any will work
	1613	+ * write protected at all times. Any will work
1617	1614	* fine to continue the walk.
1618	1615	*/
1619		- tree_page = get_ksm_page(stable_node_any, false);
	1616	+ tree_page = get_ksm_page(stable_node_any,
	1617	+ GET_KSM_PAGE_NOLOCK);
1620	1618	}
1621	1619	VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
1622	1620	if (!tree_page) {
..	..	@@ -1676,7 +1674,12 @@
1676	1674	* It would be more elegant to return stable_node
1677	1675	* than kpage, but that involves more changes.
1678	1676	*/
1679		- tree_page = get_ksm_page(stable_node_dup, true);
	1677	+ tree_page = get_ksm_page(stable_node_dup,
	1678	+ GET_KSM_PAGE_TRYLOCK);
	1679	+
	1680	+ if (PTR_ERR(tree_page) == -EBUSY)
	1681	+ return ERR_PTR(-EBUSY);
	1682	+
1680	1683	if (unlikely(!tree_page))
1681	1684	/*
1682	1685	* The tree may have been rebalanced,
..	..	@@ -1842,10 +1845,11 @@
1842	1845	* continue. All KSM pages belonging to the
1843	1846	* stable_node dups in a stable_node chain
1844	1847	* have the same content and they're
1845		- * wrprotected at all times. Any will work
	1848	+ * write protected at all times. Any will work
1846	1849	* fine to continue the walk.
1847	1850	*/
1848		- tree_page = get_ksm_page(stable_node_any, false);
	1851	+ tree_page = get_ksm_page(stable_node_any,
	1852	+ GET_KSM_PAGE_NOLOCK);
1849	1853	}
1850	1854	VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
1851	1855	if (!tree_page) {
..	..	@@ -1946,7 +1950,7 @@
1946	1950	* Don't substitute a ksm page for a forked page.
1947	1951	*/
1948	1952	if (page == tree_page) {
1949		- put_page(tree_page);
	1953	+ put_user_page(tree_page);
1950	1954	return NULL;
1951	1955	}
1952	1956
..	..	@@ -1954,10 +1958,10 @@
1954	1958
1955	1959	parent = *new;
1956	1960	if (ret < 0) {
1957		- put_page(tree_page);
	1961	+ put_user_page(tree_page);
1958	1962	new = &parent->rb_left;
1959	1963	} else if (ret > 0) {
1960		- put_page(tree_page);
	1964	+ put_user_page(tree_page);
1961	1965	new = &parent->rb_right;
1962	1966	} else if (!ksm_merge_across_nodes &&
1963	1967	page_to_nid(tree_page) != nid) {
..	..	@@ -1966,7 +1970,7 @@
1966	1970	* it will be flushed out and put in the right unstable
1967	1971	* tree next time: only merge with it when across_nodes.
1968	1972	*/
1969		- put_page(tree_page);
	1973	+ put_user_page(tree_page);
1970	1974	return NULL;
1971	1975	} else {
1972	1976	*tree_pagep = tree_page;
..	..	@@ -1999,7 +2003,7 @@
1999	2003	* duplicate. page_migration could break later if rmap breaks,
2000	2004	* so we can as well crash here. We really need to check for
2001	2005	* rmap_hlist_len == STABLE_NODE_CHAIN, but we can as well check
2002		- * for other negative values as an undeflow if detected here
	2006	+ * for other negative values as an underflow if detected here
2003	2007	* for the first time (and not when decreasing rmap_hlist_len)
2004	2008	* would be sign of memory corruption in the stable_node.
2005	2009	*/
..	..	@@ -2071,6 +2075,9 @@
2071	2075	remove_rmap_item_from_tree(rmap_item);
2072	2076
2073	2077	if (kpage) {
	2078	+ if (PTR_ERR(kpage) == -EBUSY)
	2079	+ return;
	2080	+
2074	2081	err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
2075	2082	if (!err) {
2076	2083	/*
..	..	@@ -2105,7 +2112,7 @@
2105	2112	if (ksm_use_zero_pages && (checksum == zero_checksum)) {
2106	2113	struct vm_area_struct *vma;
2107	2114
2108		- down_read(&mm->mmap_sem);
	2115	+ mmap_read_lock(mm);
2109	2116	vma = find_mergeable_vma(mm, rmap_item->address);
2110	2117	if (vma) {
2111	2118	err = try_to_merge_one_page(vma, page,
..	..	@@ -2117,7 +2124,7 @@
2117	2124	*/
2118	2125	err = 0;
2119	2126	}
2120		- up_read(&mm->mmap_sem);
	2127	+ mmap_read_unlock(mm);
2121	2128	/*
2122	2129	* In case of failure, the page was not really empty, so we
2123	2130	* need to continue. Otherwise we're done.
..	..	@@ -2144,7 +2151,7 @@
2144	2151	*/
2145	2152	split = PageTransCompound(page)
2146	2153	&& compound_head(page) == compound_head(tree_page);
2147		- put_page(tree_page);
	2154	+ put_user_page(tree_page);
2148	2155	if (kpage) {
2149	2156	/*
2150	2157	* The pages were successfully merged: insert new
..	..	@@ -2253,7 +2260,8 @@
2253	2260
2254	2261	list_for_each_entry_safe(stable_node, next,
2255	2262	&migrate_nodes, list) {
2256		- page = get_ksm_page(stable_node, false);
	2263	+ page = get_ksm_page(stable_node,
	2264	+ GET_KSM_PAGE_NOLOCK);
2257	2265	if (page)
2258	2266	put_page(page);
2259	2267	cond_resched();
..	..	@@ -2279,7 +2287,7 @@
2279	2287	}
2280	2288
2281	2289	mm = slot->mm;
2282		- down_read(&mm->mmap_sem);
	2290	+ mmap_read_lock(mm);
2283	2291	if (ksm_test_exit(mm))
2284	2292	vma = NULL;
2285	2293	else
..	..	@@ -2312,11 +2320,11 @@
2312	2320	&rmap_item->rmap_list;
2313	2321	ksm_scan.address += PAGE_SIZE;
2314	2322	} else
2315		- put_page(*page);
2316		- up_read(&mm->mmap_sem);
	2323	+ put_user_page(*page);
	2324	+ mmap_read_unlock(mm);
2317	2325	return rmap_item;
2318	2326	}
2319		- put_page(*page);
	2327	+ put_user_page(*page);
2320	2328	ksm_scan.address += PAGE_SIZE;
2321	2329	cond_resched();
2322	2330	}
..	..	@@ -2337,13 +2345,13 @@
2337	2345	struct mm_slot, mm_list);
2338	2346	if (ksm_scan.address == 0) {
2339	2347	/*
2340		- * We've completed a full scan of all vmas, holding mmap_sem
	2348	+ * We've completed a full scan of all vmas, holding mmap_lock
2341	2349	* throughout, and found no VM_MERGEABLE: so do the same as
2342	2350	* __ksm_exit does to remove this mm from all our lists now.
2343	2351	* This applies either when cleaning up after __ksm_exit
2344	2352	* (but beware: we can reach here even before __ksm_exit),
2345	2353	* or when all VM_MERGEABLE areas have been unmapped (and
2346		- * mmap_sem then protects against race with MADV_MERGEABLE).
	2354	+ * mmap_lock then protects against race with MADV_MERGEABLE).
2347	2355	*/
2348	2356	hash_del(&slot->link);
2349	2357	list_del(&slot->mm_list);
..	..	@@ -2351,12 +2359,12 @@
2351	2359
2352	2360	free_mm_slot(slot);
2353	2361	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
2354		- up_read(&mm->mmap_sem);
	2362	+ mmap_read_unlock(mm);
2355	2363	mmdrop(mm);
2356	2364	} else {
2357		- up_read(&mm->mmap_sem);
	2365	+ mmap_read_unlock(mm);
2358	2366	/*
2359		- * up_read(&mm->mmap_sem) first because after
	2367	+ * mmap_read_unlock(mm) first because after
2360	2368	* spin_unlock(&ksm_mmlist_lock) run, the "mm" may
2361	2369	* already have been freed under us by __ksm_exit()
2362	2370	* because the "mm_slot" is still hashed and
..	..	@@ -2381,7 +2389,7 @@
2381	2389	static void ksm_do_scan(unsigned int scan_npages)
2382	2390	{
2383	2391	struct rmap_item *rmap_item;
2384		- struct page *uninitialized_var(page);
	2392	+ struct page *page;
2385	2393
2386	2394	while (scan_npages-- && likely(!freezing(current))) {
2387	2395	cond_resched();
..	..	@@ -2400,6 +2408,8 @@
2400	2408
2401	2409	static int ksm_scan_thread(void *nothing)
2402	2410	{
	2411	+ unsigned int sleep_ms;
	2412	+
2403	2413	set_freezable();
2404	2414	set_user_nice(current, 5);
2405	2415
..	..	@@ -2413,8 +2423,10 @@
2413	2423	try_to_freeze();
2414	2424
2415	2425	if (ksmd_should_run()) {
2416		- schedule_timeout_interruptible(
2417		- msecs_to_jiffies(ksm_thread_sleep_millisecs));
	2426	+ sleep_ms = READ_ONCE(ksm_thread_sleep_millisecs);
	2427	+ wait_event_interruptible_timeout(ksm_iter_wait,
	2428	+ sleep_ms != READ_ONCE(ksm_thread_sleep_millisecs),
	2429	+ msecs_to_jiffies(sleep_ms));
2418	2430	} else {
2419	2431	wait_event_freezable(ksm_thread_wait,
2420	2432	ksmd_should_run() \|\| kthread_should_stop());
..	..	@@ -2476,6 +2488,7 @@
2476	2488
2477	2489	return 0;
2478	2490	}
	2491	+EXPORT_SYMBOL_GPL(ksm_madvise);
2479	2492
2480	2493	int __ksm_enter(struct mm_struct *mm)
2481	2494	{
..	..	@@ -2525,7 +2538,7 @@
2525	2538	* This process is exiting: if it's straightforward (as is the
2526	2539	* case when ksmd was never running), free mm_slot immediately.
2527	2540	* But if it's at the cursor or has rmap_items linked to it, use
2528		- * mmap_sem to synchronize with any break_cows before pagetables
	2541	+ * mmap_lock to synchronize with any break_cows before pagetables
2529	2542	* are freed, and leave the mm_slot on the list for ksmd to free.
2530	2543	* Beware: ksm may already have noticed it exiting and freed the slot.
2531	2544	*/
..	..	@@ -2549,8 +2562,8 @@
2549	2562	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
2550	2563	mmdrop(mm);
2551	2564	} else if (mm_slot) {
2552		- down_write(&mm->mmap_sem);
2553		- up_write(&mm->mmap_sem);
	2565	+ mmap_write_lock(mm);
	2566	+ mmap_write_unlock(mm);
2554	2567	}
2555	2568	}
2556	2569
..	..	@@ -2574,6 +2587,10 @@
2574	2587	return page; /* let do_swap_page report the error */
2575	2588
2576	2589	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
	2590	+ if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) {
	2591	+ put_page(new_page);
	2592	+ new_page = NULL;
	2593	+ }
2577	2594	if (new_page) {
2578	2595	copy_user_highpage(new_page, page, address, vma);
2579	2596
..	..	@@ -2609,7 +2626,13 @@
2609	2626	struct vm_area_struct *vma;
2610	2627
2611	2628	cond_resched();
2612		- anon_vma_lock_read(anon_vma);
	2629	+ if (!anon_vma_trylock_read(anon_vma)) {
	2630	+ if (rwc->try_lock) {
	2631	+ rwc->contended = true;
	2632	+ return;
	2633	+ }
	2634	+ anon_vma_lock_read(anon_vma);
	2635	+ }
2613	2636	anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
2614	2637	0, ULONG_MAX) {
2615	2638	unsigned long addr;
..	..	@@ -2785,8 +2808,7 @@
2785	2808	*/
2786	2809	ksm_check_stable_tree(mn->start_pfn,
2787	2810	mn->start_pfn + mn->nr_pages);
2788		- /* fallthrough */
2789		-
	2811	+ fallthrough;
2790	2812	case MEM_CANCEL_OFFLINE:
2791	2813	mutex_lock(&ksm_thread_mutex);
2792	2814	ksm_run &= ~KSM_RUN_OFFLINE;
..	..	@@ -2833,6 +2855,7 @@
2833	2855	return -EINVAL;
2834	2856
2835	2857	ksm_thread_sleep_millisecs = msecs;
	2858	+ wake_up_interruptible(&ksm_iter_wait);
2836	2859
2837	2860	return count;
2838	2861	}