hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/mm/userfaultfd.c
....@@ -1,10 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * mm/userfaultfd.c
34 *
45 * Copyright (C) 2015 Red Hat, Inc.
5
- *
6
- * This work is licensed under the terms of the GNU GPL, version 2. See
7
- * the COPYING file in the top-level directory.
86 */
97
108 #include <linux/mm.h>
....@@ -20,21 +18,119 @@
2018 #include <asm/tlbflush.h>
2119 #include "internal.h"
2220
21
+static __always_inline
22
+struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
23
+ unsigned long dst_start,
24
+ unsigned long len)
25
+{
26
+ /*
27
+ * Make sure that the dst range is both valid and fully within a
28
+ * single existing vma.
29
+ */
30
+ struct vm_area_struct *dst_vma;
31
+
32
+ dst_vma = find_vma(dst_mm, dst_start);
33
+ if (!dst_vma)
34
+ return NULL;
35
+
36
+ if (dst_start < dst_vma->vm_start ||
37
+ dst_start + len > dst_vma->vm_end)
38
+ return NULL;
39
+
40
+ /*
41
+ * Check the vma is registered in uffd, this is required to
42
+ * enforce the VM_MAYWRITE check done at uffd registration
43
+ * time.
44
+ */
45
+ if (!dst_vma->vm_userfaultfd_ctx.ctx)
46
+ return NULL;
47
+
48
+ return dst_vma;
49
+}
50
+
51
+/*
52
+ * Install PTEs, to map dst_addr (within dst_vma) to page.
53
+ *
54
+ * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
55
+ * and anon, and for both shared and private VMAs.
56
+ */
57
+int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
58
+ struct vm_area_struct *dst_vma,
59
+ unsigned long dst_addr, struct page *page,
60
+ bool newly_allocated, bool wp_copy)
61
+{
62
+ int ret;
63
+ pte_t _dst_pte, *dst_pte;
64
+ bool writable = dst_vma->vm_flags & VM_WRITE;
65
+ bool vm_shared = dst_vma->vm_flags & VM_SHARED;
66
+ bool page_in_cache = page_mapping(page);
67
+ spinlock_t *ptl;
68
+ struct inode *inode;
69
+ pgoff_t offset, max_off;
70
+
71
+ _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
72
+ if (page_in_cache && !vm_shared)
73
+ writable = false;
74
+ if (writable || !page_in_cache)
75
+ _dst_pte = pte_mkdirty(_dst_pte);
76
+ if (writable) {
77
+ if (wp_copy)
78
+ _dst_pte = pte_mkuffd_wp(_dst_pte);
79
+ else
80
+ _dst_pte = pte_mkwrite(_dst_pte);
81
+ }
82
+
83
+ dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
84
+
85
+ if (vma_is_shmem(dst_vma)) {
86
+ /* serialize against truncate with the page table lock */
87
+ inode = dst_vma->vm_file->f_inode;
88
+ offset = linear_page_index(dst_vma, dst_addr);
89
+ max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
90
+ ret = -EFAULT;
91
+ if (unlikely(offset >= max_off))
92
+ goto out_unlock;
93
+ }
94
+
95
+ ret = -EEXIST;
96
+ if (!pte_none(*dst_pte))
97
+ goto out_unlock;
98
+
99
+ if (page_in_cache)
100
+ page_add_file_rmap(page, false);
101
+ else
102
+ page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
103
+
104
+ /*
105
+ * Must happen after rmap, as mm_counter() checks mapping (via
106
+ * PageAnon()), which is set by __page_set_anon_rmap().
107
+ */
108
+ inc_mm_counter(dst_mm, mm_counter(page));
109
+
110
+ if (newly_allocated)
111
+ lru_cache_add_inactive_or_unevictable(page, dst_vma);
112
+
113
+ set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
114
+
115
+ /* No need to invalidate - it was non-present before */
116
+ update_mmu_cache(dst_vma, dst_addr, dst_pte);
117
+ ret = 0;
118
+out_unlock:
119
+ pte_unmap_unlock(dst_pte, ptl);
120
+ return ret;
121
+}
122
+
23123 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
24124 pmd_t *dst_pmd,
25125 struct vm_area_struct *dst_vma,
26126 unsigned long dst_addr,
27127 unsigned long src_addr,
28
- struct page **pagep)
128
+ struct page **pagep,
129
+ bool wp_copy)
29130 {
30
- struct mem_cgroup *memcg;
31
- pte_t _dst_pte, *dst_pte;
32
- spinlock_t *ptl;
33131 void *page_kaddr;
34132 int ret;
35133 struct page *page;
36
- pgoff_t offset, max_off;
37
- struct inode *inode;
38134
39135 if (!*pagep) {
40136 ret = -ENOMEM;
....@@ -48,13 +144,15 @@
48144 PAGE_SIZE);
49145 kunmap_atomic(page_kaddr);
50146
51
- /* fallback to copy_from_user outside mmap_sem */
147
+ /* fallback to copy_from_user outside mmap_lock */
52148 if (unlikely(ret)) {
53149 ret = -ENOENT;
54150 *pagep = page;
55151 /* don't free the page */
56152 goto out;
57153 }
154
+
155
+ flush_dcache_page(page);
58156 } else {
59157 page = *pagep;
60158 *pagep = NULL;
....@@ -62,50 +160,21 @@
62160
63161 /*
64162 * The memory barrier inside __SetPageUptodate makes sure that
65
- * preceeding stores to the page contents become visible before
163
+ * preceding stores to the page contents become visible before
66164 * the set_pte_at() write.
67165 */
68166 __SetPageUptodate(page);
69167
70168 ret = -ENOMEM;
71
- if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
169
+ if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
72170 goto out_release;
73171
74
- _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
75
- if (dst_vma->vm_flags & VM_WRITE)
76
- _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
77
-
78
- dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
79
- if (dst_vma->vm_file) {
80
- /* the shmem MAP_PRIVATE case requires checking the i_size */
81
- inode = dst_vma->vm_file->f_inode;
82
- offset = linear_page_index(dst_vma, dst_addr);
83
- max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
84
- ret = -EFAULT;
85
- if (unlikely(offset >= max_off))
86
- goto out_release_uncharge_unlock;
87
- }
88
- ret = -EEXIST;
89
- if (!pte_none(*dst_pte))
90
- goto out_release_uncharge_unlock;
91
-
92
- inc_mm_counter(dst_mm, MM_ANONPAGES);
93
- page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
94
- mem_cgroup_commit_charge(page, memcg, false, false);
95
- lru_cache_add_active_or_unevictable(page, dst_vma);
96
-
97
- set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
98
-
99
- /* No need to invalidate - it was non-present before */
100
- update_mmu_cache(dst_vma, dst_addr, dst_pte);
101
-
102
- pte_unmap_unlock(dst_pte, ptl);
103
- ret = 0;
172
+ ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
173
+ page, true, wp_copy);
174
+ if (ret)
175
+ goto out_release;
104176 out:
105177 return ret;
106
-out_release_uncharge_unlock:
107
- pte_unmap_unlock(dst_pte, ptl);
108
- mem_cgroup_cancel_charge(page, memcg, false);
109178 out_release:
110179 put_page(page);
111180 goto out;
....@@ -146,6 +215,41 @@
146215 return ret;
147216 }
148217
218
+/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
219
+static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
220
+ pmd_t *dst_pmd,
221
+ struct vm_area_struct *dst_vma,
222
+ unsigned long dst_addr,
223
+ bool wp_copy)
224
+{
225
+ struct inode *inode = file_inode(dst_vma->vm_file);
226
+ pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
227
+ struct page *page;
228
+ int ret;
229
+
230
+ ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
231
+ if (ret)
232
+ goto out;
233
+ if (!page) {
234
+ ret = -EFAULT;
235
+ goto out;
236
+ }
237
+
238
+ ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
239
+ page, false, wp_copy);
240
+ if (ret)
241
+ goto out_release;
242
+
243
+ unlock_page(page);
244
+ ret = 0;
245
+out:
246
+ return ret;
247
+out_release:
248
+ unlock_page(page);
249
+ put_page(page);
250
+ goto out;
251
+}
252
+
149253 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
150254 {
151255 pgd_t *pgd;
....@@ -170,14 +274,14 @@
170274 #ifdef CONFIG_HUGETLB_PAGE
171275 /*
172276 * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is
173
- * called with mmap_sem held, it will release mmap_sem before returning.
277
+ * called with mmap_lock held, it will release mmap_lock before returning.
174278 */
175279 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
176280 struct vm_area_struct *dst_vma,
177281 unsigned long dst_start,
178282 unsigned long src_start,
179283 unsigned long len,
180
- bool zeropage)
284
+ enum mcopy_atomic_mode mode)
181285 {
182286 int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
183287 int vm_shared = dst_vma->vm_flags & VM_SHARED;
....@@ -186,7 +290,6 @@
186290 unsigned long src_addr, dst_addr;
187291 long copied;
188292 struct page *page;
189
- struct hstate *h;
190293 unsigned long vma_hpagesize;
191294 pgoff_t idx;
192295 u32 hash;
....@@ -198,8 +301,8 @@
198301 * by THP. Since we can not reliably insert a zero page, this
199302 * feature is not supported.
200303 */
201
- if (zeropage) {
202
- up_read(&dst_mm->mmap_sem);
304
+ if (mode == MCOPY_ATOMIC_ZEROPAGE) {
305
+ mmap_read_unlock(dst_mm);
203306 return -EINVAL;
204307 }
205308
....@@ -218,24 +321,13 @@
218321
219322 retry:
220323 /*
221
- * On routine entry dst_vma is set. If we had to drop mmap_sem and
324
+ * On routine entry dst_vma is set. If we had to drop mmap_lock and
222325 * retry, dst_vma will be set to NULL and we must lookup again.
223326 */
224327 if (!dst_vma) {
225328 err = -ENOENT;
226
- dst_vma = find_vma(dst_mm, dst_start);
329
+ dst_vma = find_dst_vma(dst_mm, dst_start, len);
227330 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
228
- goto out_unlock;
229
- /*
230
- * Check the vma is registered in uffd, this is
231
- * required to enforce the VM_MAYWRITE check done at
232
- * uffd registration time.
233
- */
234
- if (!dst_vma->vm_userfaultfd_ctx.ctx)
235
- goto out_unlock;
236
-
237
- if (dst_start < dst_vma->vm_start ||
238
- dst_start + len > dst_vma->vm_end)
239331 goto out_unlock;
240332
241333 err = -EINVAL;
....@@ -244,10 +336,6 @@
244336
245337 vm_shared = dst_vma->vm_flags & VM_SHARED;
246338 }
247
-
248
- if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
249
- (len - copied) & (vma_hpagesize - 1)))
250
- goto out_unlock;
251339
252340 /*
253341 * If not shared, ensure the dst_vma has a anon_vma.
....@@ -258,56 +346,59 @@
258346 goto out_unlock;
259347 }
260348
261
- h = hstate_vma(dst_vma);
262
-
263349 while (src_addr < src_start + len) {
264
- pte_t dst_pteval;
265
-
266350 BUG_ON(dst_addr >= dst_start + len);
267
- VM_BUG_ON(dst_addr & ~huge_page_mask(h));
268351
269352 /*
270
- * Serialize via hugetlb_fault_mutex
353
+ * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
354
+ * i_mmap_rwsem ensures the dst_pte remains valid even
355
+ * in the case of shared pmds. fault mutex prevents
356
+ * races with other faulting threads.
271357 */
272
- idx = linear_page_index(dst_vma, dst_addr);
273358 mapping = dst_vma->vm_file->f_mapping;
274
- hash = hugetlb_fault_mutex_hash(h, mapping, idx);
359
+ i_mmap_lock_read(mapping);
360
+ idx = linear_page_index(dst_vma, dst_addr);
361
+ hash = hugetlb_fault_mutex_hash(mapping, idx);
275362 mutex_lock(&hugetlb_fault_mutex_table[hash]);
276363
277364 err = -ENOMEM;
278
- dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h));
365
+ dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
279366 if (!dst_pte) {
280367 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
368
+ i_mmap_unlock_read(mapping);
281369 goto out_unlock;
282370 }
283371
284
- err = -EEXIST;
285
- dst_pteval = huge_ptep_get(dst_pte);
286
- if (!huge_pte_none(dst_pteval)) {
372
+ if (mode != MCOPY_ATOMIC_CONTINUE &&
373
+ !huge_pte_none(huge_ptep_get(dst_pte))) {
374
+ err = -EEXIST;
287375 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
376
+ i_mmap_unlock_read(mapping);
288377 goto out_unlock;
289378 }
290379
291380 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
292
- dst_addr, src_addr, &page);
381
+ dst_addr, src_addr, mode, &page);
293382
294383 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
384
+ i_mmap_unlock_read(mapping);
295385 vm_alloc_shared = vm_shared;
296386
297387 cond_resched();
298388
299389 if (unlikely(err == -ENOENT)) {
300
- up_read(&dst_mm->mmap_sem);
390
+ mmap_read_unlock(dst_mm);
301391 BUG_ON(!page);
302392
303393 err = copy_huge_page_from_user(page,
304394 (const void __user *)src_addr,
305
- pages_per_huge_page(h), true);
395
+ vma_hpagesize / PAGE_SIZE,
396
+ true);
306397 if (unlikely(err)) {
307398 err = -EFAULT;
308399 goto out;
309400 }
310
- down_read(&dst_mm->mmap_sem);
401
+ mmap_read_lock(dst_mm);
311402
312403 dst_vma = NULL;
313404 goto retry;
....@@ -327,7 +418,7 @@
327418 }
328419
329420 out_unlock:
330
- up_read(&dst_mm->mmap_sem);
421
+ mmap_read_unlock(dst_mm);
331422 out:
332423 if (page) {
333424 /*
....@@ -338,7 +429,7 @@
338429 * private and shared mappings. See the routine
339430 * restore_reserve_on_error for details. Unfortunately, we
340431 * can not call restore_reserve_on_error now as it would
341
- * require holding mmap_sem.
432
+ * require holding mmap_lock.
342433 *
343434 * If a reservation for the page existed in the reservation
344435 * map of a private mapping, the map was modified to indicate
....@@ -389,7 +480,7 @@
389480 unsigned long dst_start,
390481 unsigned long src_start,
391482 unsigned long len,
392
- bool zeropage);
483
+ enum mcopy_atomic_mode mode);
393484 #endif /* CONFIG_HUGETLB_PAGE */
394485
395486 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
....@@ -398,9 +489,15 @@
398489 unsigned long dst_addr,
399490 unsigned long src_addr,
400491 struct page **page,
401
- bool zeropage)
492
+ enum mcopy_atomic_mode mode,
493
+ bool wp_copy)
402494 {
403495 ssize_t err;
496
+
497
+ if (mode == MCOPY_ATOMIC_CONTINUE) {
498
+ return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
499
+ wp_copy);
500
+ }
404501
405502 /*
406503 * The normal page fault path for a shmem will invoke the
....@@ -413,20 +510,19 @@
413510 * and not in the radix tree.
414511 */
415512 if (!(dst_vma->vm_flags & VM_SHARED)) {
416
- if (!zeropage)
513
+ if (mode == MCOPY_ATOMIC_NORMAL)
417514 err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
418
- dst_addr, src_addr, page);
515
+ dst_addr, src_addr, page,
516
+ wp_copy);
419517 else
420518 err = mfill_zeropage_pte(dst_mm, dst_pmd,
421519 dst_vma, dst_addr);
422520 } else {
423
- if (!zeropage)
424
- err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
425
- dst_vma, dst_addr,
426
- src_addr, page);
427
- else
428
- err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd,
429
- dst_vma, dst_addr);
521
+ VM_WARN_ON_ONCE(wp_copy);
522
+ err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
523
+ dst_addr, src_addr,
524
+ mode != MCOPY_ATOMIC_NORMAL,
525
+ page);
430526 }
431527
432528 return err;
....@@ -436,8 +532,9 @@
436532 unsigned long dst_start,
437533 unsigned long src_start,
438534 unsigned long len,
439
- bool zeropage,
440
- bool *mmap_changing)
535
+ enum mcopy_atomic_mode mcopy_mode,
536
+ bool *mmap_changing,
537
+ __u64 mode)
441538 {
442539 struct vm_area_struct *dst_vma;
443540 ssize_t err;
....@@ -445,6 +542,7 @@
445542 unsigned long src_addr, dst_addr;
446543 long copied;
447544 struct page *page;
545
+ bool wp_copy;
448546
449547 /*
450548 * Sanitize the command parameters:
....@@ -461,7 +559,7 @@
461559 copied = 0;
462560 page = NULL;
463561 retry:
464
- down_read(&dst_mm->mmap_sem);
562
+ mmap_read_lock(dst_mm);
465563
466564 /*
467565 * If memory mappings are changing because of non-cooperative
....@@ -477,19 +575,8 @@
477575 * both valid and fully within a single existing vma.
478576 */
479577 err = -ENOENT;
480
- dst_vma = find_vma(dst_mm, dst_start);
578
+ dst_vma = find_dst_vma(dst_mm, dst_start, len);
481579 if (!dst_vma)
482
- goto out_unlock;
483
- /*
484
- * Check the vma is registered in uffd, this is required to
485
- * enforce the VM_MAYWRITE check done at uffd registration
486
- * time.
487
- */
488
- if (!dst_vma->vm_userfaultfd_ctx.ctx)
489
- goto out_unlock;
490
-
491
- if (dst_start < dst_vma->vm_start ||
492
- dst_start + len > dst_vma->vm_end)
493580 goto out_unlock;
494581
495582 err = -EINVAL;
....@@ -502,13 +589,23 @@
502589 goto out_unlock;
503590
504591 /*
592
+ * validate 'mode' now that we know the dst_vma: don't allow
593
+ * a wrprotect copy if the userfaultfd didn't register as WP.
594
+ */
595
+ wp_copy = mode & UFFDIO_COPY_MODE_WP;
596
+ if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
597
+ goto out_unlock;
598
+
599
+ /*
505600 * If this is a HUGETLB vma, pass off to appropriate routine
506601 */
507602 if (is_vm_hugetlb_page(dst_vma))
508603 return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
509
- src_start, len, zeropage);
604
+ src_start, len, mcopy_mode);
510605
511606 if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
607
+ goto out_unlock;
608
+ if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
512609 goto out_unlock;
513610
514611 /*
....@@ -542,7 +639,7 @@
542639 break;
543640 }
544641 if (unlikely(pmd_none(dst_pmdval)) &&
545
- unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
642
+ unlikely(__pte_alloc(dst_mm, dst_pmd))) {
546643 err = -ENOMEM;
547644 break;
548645 }
....@@ -556,13 +653,13 @@
556653 BUG_ON(pmd_trans_huge(*dst_pmd));
557654
558655 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
559
- src_addr, &page, zeropage);
656
+ src_addr, &page, mcopy_mode, wp_copy);
560657 cond_resched();
561658
562659 if (unlikely(err == -ENOENT)) {
563660 void *page_kaddr;
564661
565
- up_read(&dst_mm->mmap_sem);
662
+ mmap_read_unlock(dst_mm);
566663 BUG_ON(!page);
567664
568665 page_kaddr = kmap(page);
....@@ -574,6 +671,7 @@
574671 err = -EFAULT;
575672 goto out;
576673 }
674
+ flush_dcache_page(page);
577675 goto retry;
578676 } else
579677 BUG_ON(page);
....@@ -591,7 +689,7 @@
591689 }
592690
593691 out_unlock:
594
- up_read(&dst_mm->mmap_sem);
692
+ mmap_read_unlock(dst_mm);
595693 out:
596694 if (page)
597695 put_page(page);
....@@ -603,14 +701,76 @@
603701
604702 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
605703 unsigned long src_start, unsigned long len,
606
- bool *mmap_changing)
704
+ bool *mmap_changing, __u64 mode)
607705 {
608
- return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
609
- mmap_changing);
706
+ return __mcopy_atomic(dst_mm, dst_start, src_start, len,
707
+ MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
610708 }
611709
612710 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
613711 unsigned long len, bool *mmap_changing)
614712 {
615
- return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
713
+ return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
714
+ mmap_changing, 0);
715
+}
716
+
717
+ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
718
+ unsigned long len, bool *mmap_changing)
719
+{
720
+ return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
721
+ mmap_changing, 0);
722
+}
723
+
724
+int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
725
+ unsigned long len, bool enable_wp, bool *mmap_changing)
726
+{
727
+ struct vm_area_struct *dst_vma;
728
+ pgprot_t newprot;
729
+ int err;
730
+
731
+ /*
732
+ * Sanitize the command parameters:
733
+ */
734
+ BUG_ON(start & ~PAGE_MASK);
735
+ BUG_ON(len & ~PAGE_MASK);
736
+
737
+ /* Does the address range wrap, or is the span zero-sized? */
738
+ BUG_ON(start + len <= start);
739
+
740
+ mmap_read_lock(dst_mm);
741
+
742
+ /*
743
+ * If memory mappings are changing because of non-cooperative
744
+ * operation (e.g. mremap) running in parallel, bail out and
745
+ * request the user to retry later
746
+ */
747
+ err = -EAGAIN;
748
+ if (mmap_changing && READ_ONCE(*mmap_changing))
749
+ goto out_unlock;
750
+
751
+ err = -ENOENT;
752
+ dst_vma = find_dst_vma(dst_mm, start, len);
753
+ /*
754
+ * Make sure the vma is not shared, that the dst range is
755
+ * both valid and fully within a single existing vma.
756
+ */
757
+ if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
758
+ goto out_unlock;
759
+ if (!userfaultfd_wp(dst_vma))
760
+ goto out_unlock;
761
+ if (!vma_is_anonymous(dst_vma))
762
+ goto out_unlock;
763
+
764
+ if (enable_wp)
765
+ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
766
+ else
767
+ newprot = vm_get_page_prot(dst_vma->vm_flags);
768
+
769
+ change_protection(dst_vma, start, start + len, newprot,
770
+ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
771
+
772
+ err = 0;
773
+out_unlock:
774
+ mmap_read_unlock(dst_mm);
775
+ return err;
616776 }