hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/mm/ksm.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Memory merging support.
34 *
....@@ -10,8 +11,6 @@
1011 * Andrea Arcangeli
1112 * Chris Wright
1213 * Hugh Dickins
13
- *
14
- * This work is licensed under the terms of the GNU GPL, version 2.
1514 */
1615
1716 #include <linux/errno.h>
....@@ -25,7 +24,7 @@
2524 #include <linux/pagemap.h>
2625 #include <linux/rmap.h>
2726 #include <linux/spinlock.h>
28
-#include <linux/jhash.h>
27
+#include <linux/xxhash.h>
2928 #include <linux/delay.h>
3029 #include <linux/kthread.h>
3130 #include <linux/wait.h>
....@@ -82,7 +81,7 @@
8281 * different KSM page copy of that content
8382 *
8483 * Internally, the regular nodes, "dups" and "chains" are represented
85
- * using the same :c:type:`struct stable_node` structure.
84
+ * using the same struct stable_node structure.
8685 *
8786 * In addition to the stable tree, KSM uses a second data structure called the
8887 * unstable tree: this tree holds pointers to pages which have been found to
....@@ -296,6 +295,7 @@
296295 static void wait_while_offlining(void);
297296
298297 static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
298
+static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait);
299299 static DEFINE_MUTEX(ksm_thread_mutex);
300300 static DEFINE_SPINLOCK(ksm_mmlist_lock);
301301
....@@ -442,7 +442,7 @@
442442 /*
443443 * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
444444 * page tables after it has passed through ksm_exit() - which, if necessary,
445
- * takes mmap_sem briefly to serialize against them. ksm_exit() does not set
445
+ * takes mmap_lock briefly to serialize against them. ksm_exit() does not set
446446 * a special flag: they can just back out as soon as mm_users goes to zero.
447447 * ksm_test_exit() is used throughout to make this test for exit: in some
448448 * places for correctness, in some places just to avoid unnecessary work.
....@@ -455,7 +455,7 @@
455455 /*
456456 * We use break_ksm to break COW on a ksm page: it's a stripped down
457457 *
458
- * if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
458
+ * if (get_user_pages(addr, 1, FOLL_WRITE, &page, NULL) == 1)
459459 * put_page(page);
460460 *
461461 * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
....@@ -480,10 +480,11 @@
480480 break;
481481 if (PageKsm(page))
482482 ret = handle_mm_fault(vma, addr,
483
- FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
483
+ FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
484
+ NULL);
484485 else
485486 ret = VM_FAULT_WRITE;
486
- put_page(page);
487
+ put_user_page(page);
487488 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
488489 /*
489490 * We must loop because handle_mm_fault() may back out if there's
....@@ -542,11 +543,11 @@
542543 */
543544 put_anon_vma(rmap_item->anon_vma);
544545
545
- down_read(&mm->mmap_sem);
546
+ mmap_read_lock(mm);
546547 vma = find_mergeable_vma(mm, addr);
547548 if (vma)
548549 break_ksm(vma, addr);
549
- up_read(&mm->mmap_sem);
550
+ mmap_read_unlock(mm);
550551 }
551552
552553 static struct page *get_mergeable_page(struct rmap_item *rmap_item)
....@@ -556,7 +557,7 @@
556557 struct vm_area_struct *vma;
557558 struct page *page;
558559
559
- down_read(&mm->mmap_sem);
560
+ mmap_read_lock(mm);
560561 vma = find_mergeable_vma(mm, addr);
561562 if (!vma)
562563 goto out;
....@@ -568,11 +569,11 @@
568569 flush_anon_page(vma, page, addr);
569570 flush_dcache_page(page);
570571 } else {
571
- put_page(page);
572
+ put_user_page(page);
572573 out:
573574 page = NULL;
574575 }
575
- up_read(&mm->mmap_sem);
576
+ mmap_read_unlock(mm);
576577 return page;
577578 }
578579
....@@ -597,7 +598,7 @@
597598 chain->chain_prune_time = jiffies;
598599 chain->rmap_hlist_len = STABLE_NODE_CHAIN;
599600 #if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA)
600
- chain->nid = -1; /* debug */
601
+ chain->nid = NUMA_NO_NODE; /* debug */
601602 #endif
602603 ksm_stable_node_chains++;
603604
....@@ -612,7 +613,7 @@
612613 * Move the old stable node to the second dimension
613614 * queued in the hlist_dup. The invariant is that all
614615 * dup stable_nodes in the chain->hlist point to pages
615
- * that are wrprotected and have the exact same
616
+ * that are write protected and have the exact same
616617 * content.
617618 */
618619 stable_node_chain_add_dup(dup, chain);
....@@ -666,6 +667,12 @@
666667 free_stable_node(stable_node);
667668 }
668669
670
+enum get_ksm_page_flags {
671
+ GET_KSM_PAGE_NOLOCK,
672
+ GET_KSM_PAGE_LOCK,
673
+ GET_KSM_PAGE_TRYLOCK
674
+};
675
+
669676 /*
670677 * get_ksm_page: checks if the page indicated by the stable node
671678 * is still its ksm page, despite having held no reference to it.
....@@ -685,7 +692,8 @@
685692 * a page to put something that might look like our key in page->mapping.
686693 * is on its way to being freed; but it is an anomaly to bear in mind.
687694 */
688
-static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
695
+static struct page *get_ksm_page(struct stable_node *stable_node,
696
+ enum get_ksm_page_flags flags)
689697 {
690698 struct page *page;
691699 void *expected_mapping;
....@@ -705,8 +713,9 @@
705713 * case this node is no longer referenced, and should be freed;
706714 * however, it might mean that the page is under page_ref_freeze().
707715 * The __remove_mapping() case is easy, again the node is now stale;
708
- * but if page is swapcache in migrate_page_move_mapping(), it might
709
- * still be our page, in which case it's essential to keep the node.
716
+ * the same is in reuse_ksm_page() case; but if page is swapcache
717
+ * in migrate_page_move_mapping(), it might still be our page,
718
+ * in which case it's essential to keep the node.
710719 */
711720 while (!get_page_unless_zero(page)) {
712721 /*
....@@ -727,8 +736,15 @@
727736 goto stale;
728737 }
729738
730
- if (lock_it) {
739
+ if (flags == GET_KSM_PAGE_TRYLOCK) {
740
+ if (!trylock_page(page)) {
741
+ put_page(page);
742
+ return ERR_PTR(-EBUSY);
743
+ }
744
+ } else if (flags == GET_KSM_PAGE_LOCK)
731745 lock_page(page);
746
+
747
+ if (flags != GET_KSM_PAGE_NOLOCK) {
732748 if (READ_ONCE(page->mapping) != expected_mapping) {
733749 unlock_page(page);
734750 put_page(page);
....@@ -762,7 +778,7 @@
762778 struct page *page;
763779
764780 stable_node = rmap_item->head;
765
- page = get_ksm_page(stable_node, true);
781
+ page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
766782 if (!page)
767783 goto out;
768784
....@@ -817,7 +833,7 @@
817833 * Though it's very tempting to unmerge rmap_items from stable tree rather
818834 * than check every pte of a given vma, the locking doesn't quite work for
819835 * that - an rmap_item is assigned to the stable tree after inserting ksm
820
- * page and upping mmap_sem. Nor does it fit with the way we skip dup'ing
836
+ * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing
821837 * rmap_items from parent to child at fork time (so as not to waste time
822838 * if exit comes before the next scan reaches it).
823839 *
....@@ -863,7 +879,7 @@
863879 struct page *page;
864880 int err;
865881
866
- page = get_ksm_page(stable_node, true);
882
+ page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
867883 if (!page) {
868884 /*
869885 * get_ksm_page did remove_node_from_stable_tree itself.
....@@ -962,7 +978,7 @@
962978 for (mm_slot = ksm_scan.mm_slot;
963979 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
964980 mm = mm_slot->mm;
965
- down_read(&mm->mmap_sem);
981
+ mmap_read_lock(mm);
966982 for (vma = mm->mmap; vma; vma = vma->vm_next) {
967983 if (ksm_test_exit(mm))
968984 break;
....@@ -975,7 +991,7 @@
975991 }
976992
977993 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
978
- up_read(&mm->mmap_sem);
994
+ mmap_read_unlock(mm);
979995
980996 spin_lock(&ksm_mmlist_lock);
981997 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
....@@ -998,7 +1014,7 @@
9981014 return 0;
9991015
10001016 error:
1001
- up_read(&mm->mmap_sem);
1017
+ mmap_read_unlock(mm);
10021018 spin_lock(&ksm_mmlist_lock);
10031019 ksm_scan.mm_slot = &ksm_mm_head;
10041020 spin_unlock(&ksm_mmlist_lock);
....@@ -1010,27 +1026,9 @@
10101026 {
10111027 u32 checksum;
10121028 void *addr = kmap_atomic(page);
1013
- checksum = jhash2(addr, PAGE_SIZE / 4, 17);
1029
+ checksum = xxhash(addr, PAGE_SIZE, 0);
10141030 kunmap_atomic(addr);
10151031 return checksum;
1016
-}
1017
-
1018
-static int memcmp_pages(struct page *page1, struct page *page2)
1019
-{
1020
- char *addr1, *addr2;
1021
- int ret;
1022
-
1023
- addr1 = kmap_atomic(page1);
1024
- addr2 = kmap_atomic(page2);
1025
- ret = memcmp(addr1, addr2, PAGE_SIZE);
1026
- kunmap_atomic(addr2);
1027
- kunmap_atomic(addr1);
1028
- return ret;
1029
-}
1030
-
1031
-static inline int pages_identical(struct page *page1, struct page *page2)
1032
-{
1033
- return !memcmp_pages(page1, page2);
10341032 }
10351033
10361034 static int write_protect_page(struct vm_area_struct *vma, struct page *page,
....@@ -1043,8 +1041,7 @@
10431041 };
10441042 int swapped;
10451043 int err = -EFAULT;
1046
- unsigned long mmun_start; /* For mmu_notifiers */
1047
- unsigned long mmun_end; /* For mmu_notifiers */
1044
+ struct mmu_notifier_range range;
10481045
10491046 pvmw.address = page_address_in_vma(page, vma);
10501047 if (pvmw.address == -EFAULT)
....@@ -1052,9 +1049,10 @@
10521049
10531050 BUG_ON(PageTransCompound(page));
10541051
1055
- mmun_start = pvmw.address;
1056
- mmun_end = pvmw.address + PAGE_SIZE;
1057
- mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1052
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
1053
+ pvmw.address,
1054
+ pvmw.address + PAGE_SIZE);
1055
+ mmu_notifier_invalidate_range_start(&range);
10581056
10591057 if (!page_vma_mapped_walk(&pvmw))
10601058 goto out_mn;
....@@ -1106,7 +1104,7 @@
11061104 out_unlock:
11071105 page_vma_mapped_walk_done(&pvmw);
11081106 out_mn:
1109
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1107
+ mmu_notifier_invalidate_range_end(&range);
11101108 out:
11111109 return err;
11121110 }
....@@ -1130,8 +1128,7 @@
11301128 spinlock_t *ptl;
11311129 unsigned long addr;
11321130 int err = -EFAULT;
1133
- unsigned long mmun_start; /* For mmu_notifiers */
1134
- unsigned long mmun_end; /* For mmu_notifiers */
1131
+ struct mmu_notifier_range range;
11351132
11361133 addr = page_address_in_vma(page, vma);
11371134 if (addr == -EFAULT)
....@@ -1141,9 +1138,9 @@
11411138 if (!pmd)
11421139 goto out;
11431140
1144
- mmun_start = addr;
1145
- mmun_end = addr + PAGE_SIZE;
1146
- mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1141
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
1142
+ addr + PAGE_SIZE);
1143
+ mmu_notifier_invalidate_range_start(&range);
11471144
11481145 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
11491146 if (!pte_same(*ptep, orig_pte)) {
....@@ -1153,7 +1150,7 @@
11531150
11541151 /*
11551152 * No need to check ksm_use_zero_pages here: we can only have a
1156
- * zero_page here if ksm_use_zero_pages was enabled alreaady.
1153
+ * zero_page here if ksm_use_zero_pages was enabled already.
11571154 */
11581155 if (!is_zero_pfn(page_to_pfn(kpage))) {
11591156 get_page(kpage);
....@@ -1189,7 +1186,7 @@
11891186 pte_unmap_unlock(ptep, ptl);
11901187 err = 0;
11911188 out_mn:
1192
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1189
+ mmu_notifier_invalidate_range_end(&range);
11931190 out:
11941191 return err;
11951192 }
....@@ -1285,7 +1282,7 @@
12851282 struct vm_area_struct *vma;
12861283 int err = -EFAULT;
12871284
1288
- down_read(&mm->mmap_sem);
1285
+ mmap_read_lock(mm);
12891286 vma = find_mergeable_vma(mm, rmap_item->address);
12901287 if (!vma)
12911288 goto out;
....@@ -1297,11 +1294,11 @@
12971294 /* Unstable nid is in union with stable anon_vma: remove first */
12981295 remove_rmap_item_from_tree(rmap_item);
12991296
1300
- /* Must get reference to anon_vma while still holding mmap_sem */
1297
+ /* Must get reference to anon_vma while still holding mmap_lock */
13011298 rmap_item->anon_vma = vma->anon_vma;
13021299 get_anon_vma(vma->anon_vma);
13031300 out:
1304
- up_read(&mm->mmap_sem);
1301
+ mmap_read_unlock(mm);
13051302 return err;
13061303 }
13071304
....@@ -1388,7 +1385,7 @@
13881385 * stable_node parameter itself will be freed from
13891386 * under us if it returns NULL.
13901387 */
1391
- _tree_page = get_ksm_page(dup, false);
1388
+ _tree_page = get_ksm_page(dup, GET_KSM_PAGE_NOLOCK);
13921389 if (!_tree_page)
13931390 continue;
13941391 nr += 1;
....@@ -1511,7 +1508,7 @@
15111508 if (!is_stable_node_chain(stable_node)) {
15121509 if (is_page_sharing_candidate(stable_node)) {
15131510 *_stable_node_dup = stable_node;
1514
- return get_ksm_page(stable_node, false);
1511
+ return get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
15151512 }
15161513 /*
15171514 * _stable_node_dup set to NULL means the stable_node
....@@ -1613,10 +1610,11 @@
16131610 * continue. All KSM pages belonging to the
16141611 * stable_node dups in a stable_node chain
16151612 * have the same content and they're
1616
- * wrprotected at all times. Any will work
1613
+ * write protected at all times. Any will work
16171614 * fine to continue the walk.
16181615 */
1619
- tree_page = get_ksm_page(stable_node_any, false);
1616
+ tree_page = get_ksm_page(stable_node_any,
1617
+ GET_KSM_PAGE_NOLOCK);
16201618 }
16211619 VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
16221620 if (!tree_page) {
....@@ -1676,7 +1674,12 @@
16761674 * It would be more elegant to return stable_node
16771675 * than kpage, but that involves more changes.
16781676 */
1679
- tree_page = get_ksm_page(stable_node_dup, true);
1677
+ tree_page = get_ksm_page(stable_node_dup,
1678
+ GET_KSM_PAGE_TRYLOCK);
1679
+
1680
+ if (PTR_ERR(tree_page) == -EBUSY)
1681
+ return ERR_PTR(-EBUSY);
1682
+
16801683 if (unlikely(!tree_page))
16811684 /*
16821685 * The tree may have been rebalanced,
....@@ -1842,10 +1845,11 @@
18421845 * continue. All KSM pages belonging to the
18431846 * stable_node dups in a stable_node chain
18441847 * have the same content and they're
1845
- * wrprotected at all times. Any will work
1848
+ * write protected at all times. Any will work
18461849 * fine to continue the walk.
18471850 */
1848
- tree_page = get_ksm_page(stable_node_any, false);
1851
+ tree_page = get_ksm_page(stable_node_any,
1852
+ GET_KSM_PAGE_NOLOCK);
18491853 }
18501854 VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
18511855 if (!tree_page) {
....@@ -1946,7 +1950,7 @@
19461950 * Don't substitute a ksm page for a forked page.
19471951 */
19481952 if (page == tree_page) {
1949
- put_page(tree_page);
1953
+ put_user_page(tree_page);
19501954 return NULL;
19511955 }
19521956
....@@ -1954,10 +1958,10 @@
19541958
19551959 parent = *new;
19561960 if (ret < 0) {
1957
- put_page(tree_page);
1961
+ put_user_page(tree_page);
19581962 new = &parent->rb_left;
19591963 } else if (ret > 0) {
1960
- put_page(tree_page);
1964
+ put_user_page(tree_page);
19611965 new = &parent->rb_right;
19621966 } else if (!ksm_merge_across_nodes &&
19631967 page_to_nid(tree_page) != nid) {
....@@ -1966,7 +1970,7 @@
19661970 * it will be flushed out and put in the right unstable
19671971 * tree next time: only merge with it when across_nodes.
19681972 */
1969
- put_page(tree_page);
1973
+ put_user_page(tree_page);
19701974 return NULL;
19711975 } else {
19721976 *tree_pagep = tree_page;
....@@ -1999,7 +2003,7 @@
19992003 * duplicate. page_migration could break later if rmap breaks,
20002004 * so we can as well crash here. We really need to check for
20012005 * rmap_hlist_len == STABLE_NODE_CHAIN, but we can as well check
2002
- * for other negative values as an undeflow if detected here
2006
+ * for other negative values as an underflow if detected here
20032007 * for the first time (and not when decreasing rmap_hlist_len)
20042008 * would be sign of memory corruption in the stable_node.
20052009 */
....@@ -2071,6 +2075,9 @@
20712075 remove_rmap_item_from_tree(rmap_item);
20722076
20732077 if (kpage) {
2078
+ if (PTR_ERR(kpage) == -EBUSY)
2079
+ return;
2080
+
20742081 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
20752082 if (!err) {
20762083 /*
....@@ -2105,7 +2112,7 @@
21052112 if (ksm_use_zero_pages && (checksum == zero_checksum)) {
21062113 struct vm_area_struct *vma;
21072114
2108
- down_read(&mm->mmap_sem);
2115
+ mmap_read_lock(mm);
21092116 vma = find_mergeable_vma(mm, rmap_item->address);
21102117 if (vma) {
21112118 err = try_to_merge_one_page(vma, page,
....@@ -2117,7 +2124,7 @@
21172124 */
21182125 err = 0;
21192126 }
2120
- up_read(&mm->mmap_sem);
2127
+ mmap_read_unlock(mm);
21212128 /*
21222129 * In case of failure, the page was not really empty, so we
21232130 * need to continue. Otherwise we're done.
....@@ -2144,7 +2151,7 @@
21442151 */
21452152 split = PageTransCompound(page)
21462153 && compound_head(page) == compound_head(tree_page);
2147
- put_page(tree_page);
2154
+ put_user_page(tree_page);
21482155 if (kpage) {
21492156 /*
21502157 * The pages were successfully merged: insert new
....@@ -2253,7 +2260,8 @@
22532260
22542261 list_for_each_entry_safe(stable_node, next,
22552262 &migrate_nodes, list) {
2256
- page = get_ksm_page(stable_node, false);
2263
+ page = get_ksm_page(stable_node,
2264
+ GET_KSM_PAGE_NOLOCK);
22572265 if (page)
22582266 put_page(page);
22592267 cond_resched();
....@@ -2279,7 +2287,7 @@
22792287 }
22802288
22812289 mm = slot->mm;
2282
- down_read(&mm->mmap_sem);
2290
+ mmap_read_lock(mm);
22832291 if (ksm_test_exit(mm))
22842292 vma = NULL;
22852293 else
....@@ -2312,11 +2320,11 @@
23122320 &rmap_item->rmap_list;
23132321 ksm_scan.address += PAGE_SIZE;
23142322 } else
2315
- put_page(*page);
2316
- up_read(&mm->mmap_sem);
2323
+ put_user_page(*page);
2324
+ mmap_read_unlock(mm);
23172325 return rmap_item;
23182326 }
2319
- put_page(*page);
2327
+ put_user_page(*page);
23202328 ksm_scan.address += PAGE_SIZE;
23212329 cond_resched();
23222330 }
....@@ -2337,13 +2345,13 @@
23372345 struct mm_slot, mm_list);
23382346 if (ksm_scan.address == 0) {
23392347 /*
2340
- * We've completed a full scan of all vmas, holding mmap_sem
2348
+ * We've completed a full scan of all vmas, holding mmap_lock
23412349 * throughout, and found no VM_MERGEABLE: so do the same as
23422350 * __ksm_exit does to remove this mm from all our lists now.
23432351 * This applies either when cleaning up after __ksm_exit
23442352 * (but beware: we can reach here even before __ksm_exit),
23452353 * or when all VM_MERGEABLE areas have been unmapped (and
2346
- * mmap_sem then protects against race with MADV_MERGEABLE).
2354
+ * mmap_lock then protects against race with MADV_MERGEABLE).
23472355 */
23482356 hash_del(&slot->link);
23492357 list_del(&slot->mm_list);
....@@ -2351,12 +2359,12 @@
23512359
23522360 free_mm_slot(slot);
23532361 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
2354
- up_read(&mm->mmap_sem);
2362
+ mmap_read_unlock(mm);
23552363 mmdrop(mm);
23562364 } else {
2357
- up_read(&mm->mmap_sem);
2365
+ mmap_read_unlock(mm);
23582366 /*
2359
- * up_read(&mm->mmap_sem) first because after
2367
+ * mmap_read_unlock(mm) first because after
23602368 * spin_unlock(&ksm_mmlist_lock) run, the "mm" may
23612369 * already have been freed under us by __ksm_exit()
23622370 * because the "mm_slot" is still hashed and
....@@ -2381,7 +2389,7 @@
23812389 static void ksm_do_scan(unsigned int scan_npages)
23822390 {
23832391 struct rmap_item *rmap_item;
2384
- struct page *uninitialized_var(page);
2392
+ struct page *page;
23852393
23862394 while (scan_npages-- && likely(!freezing(current))) {
23872395 cond_resched();
....@@ -2400,6 +2408,8 @@
24002408
24012409 static int ksm_scan_thread(void *nothing)
24022410 {
2411
+ unsigned int sleep_ms;
2412
+
24032413 set_freezable();
24042414 set_user_nice(current, 5);
24052415
....@@ -2413,8 +2423,10 @@
24132423 try_to_freeze();
24142424
24152425 if (ksmd_should_run()) {
2416
- schedule_timeout_interruptible(
2417
- msecs_to_jiffies(ksm_thread_sleep_millisecs));
2426
+ sleep_ms = READ_ONCE(ksm_thread_sleep_millisecs);
2427
+ wait_event_interruptible_timeout(ksm_iter_wait,
2428
+ sleep_ms != READ_ONCE(ksm_thread_sleep_millisecs),
2429
+ msecs_to_jiffies(sleep_ms));
24182430 } else {
24192431 wait_event_freezable(ksm_thread_wait,
24202432 ksmd_should_run() || kthread_should_stop());
....@@ -2476,6 +2488,7 @@
24762488
24772489 return 0;
24782490 }
2491
+EXPORT_SYMBOL_GPL(ksm_madvise);
24792492
24802493 int __ksm_enter(struct mm_struct *mm)
24812494 {
....@@ -2525,7 +2538,7 @@
25252538 * This process is exiting: if it's straightforward (as is the
25262539 * case when ksmd was never running), free mm_slot immediately.
25272540 * But if it's at the cursor or has rmap_items linked to it, use
2528
- * mmap_sem to synchronize with any break_cows before pagetables
2541
+ * mmap_lock to synchronize with any break_cows before pagetables
25292542 * are freed, and leave the mm_slot on the list for ksmd to free.
25302543 * Beware: ksm may already have noticed it exiting and freed the slot.
25312544 */
....@@ -2549,8 +2562,8 @@
25492562 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
25502563 mmdrop(mm);
25512564 } else if (mm_slot) {
2552
- down_write(&mm->mmap_sem);
2553
- up_write(&mm->mmap_sem);
2565
+ mmap_write_lock(mm);
2566
+ mmap_write_unlock(mm);
25542567 }
25552568 }
25562569
....@@ -2574,6 +2587,10 @@
25742587 return page; /* let do_swap_page report the error */
25752588
25762589 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2590
+ if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) {
2591
+ put_page(new_page);
2592
+ new_page = NULL;
2593
+ }
25772594 if (new_page) {
25782595 copy_user_highpage(new_page, page, address, vma);
25792596
....@@ -2609,7 +2626,13 @@
26092626 struct vm_area_struct *vma;
26102627
26112628 cond_resched();
2612
- anon_vma_lock_read(anon_vma);
2629
+ if (!anon_vma_trylock_read(anon_vma)) {
2630
+ if (rwc->try_lock) {
2631
+ rwc->contended = true;
2632
+ return;
2633
+ }
2634
+ anon_vma_lock_read(anon_vma);
2635
+ }
26132636 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
26142637 0, ULONG_MAX) {
26152638 unsigned long addr;
....@@ -2785,8 +2808,7 @@
27852808 */
27862809 ksm_check_stable_tree(mn->start_pfn,
27872810 mn->start_pfn + mn->nr_pages);
2788
- /* fallthrough */
2789
-
2811
+ fallthrough;
27902812 case MEM_CANCEL_OFFLINE:
27912813 mutex_lock(&ksm_thread_mutex);
27922814 ksm_run &= ~KSM_RUN_OFFLINE;
....@@ -2833,6 +2855,7 @@
28332855 return -EINVAL;
28342856
28352857 ksm_thread_sleep_millisecs = msecs;
2858
+ wake_up_interruptible(&ksm_iter_wait);
28362859
28372860 return count;
28382861 }