hc
2024-05-10 10ebd8556b7990499c896a550e3d416b444211e6
kernel/include/linux/userfaultfd_k.h
....@@ -14,6 +14,11 @@
1414 #include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */
1515
1616 #include <linux/fcntl.h>
17
+#include <linux/mm.h>
18
+#include <asm-generic/pgtable_uffd.h>
19
+
20
+/* The set of all possible UFFD-related VM flags. */
21
+#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR)
1722
1823 /*
1924 * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
....@@ -28,15 +33,43 @@
2833 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
2934 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
3035
36
+extern int sysctl_unprivileged_userfaultfd;
37
+
3138 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
39
+
40
+/*
41
+ * The mode of operation for __mcopy_atomic and its helpers.
42
+ *
43
+ * This is almost an implementation detail (mcopy_atomic below doesn't take this
44
+ * as a parameter), but it's exposed here because memory-kind-specific
45
+ * implementations (e.g. hugetlbfs) need to know the mode of operation.
46
+ */
47
+enum mcopy_atomic_mode {
48
+ /* A normal copy_from_user into the destination range. */
49
+ MCOPY_ATOMIC_NORMAL,
50
+ /* Don't copy; map the destination range to the zero page. */
51
+ MCOPY_ATOMIC_ZEROPAGE,
52
+ /* Just install pte(s) with the existing page(s) in the page cache. */
53
+ MCOPY_ATOMIC_CONTINUE,
54
+};
55
+
56
+extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
57
+ struct vm_area_struct *dst_vma,
58
+ unsigned long dst_addr, struct page *page,
59
+ bool newly_allocated, bool wp_copy);
3260
3361 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
3462 unsigned long src_start, unsigned long len,
35
- bool *mmap_changing);
63
+ bool *mmap_changing, __u64 mode);
3664 extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
3765 unsigned long dst_start,
3866 unsigned long len,
3967 bool *mmap_changing);
68
+extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
69
+ unsigned long len, bool *mmap_changing);
70
+extern int mwriteprotect_range(struct mm_struct *dst_mm,
71
+ unsigned long start, unsigned long len,
72
+ bool enable_wp, bool *mmap_changing);
4073
4174 /* mm helpers */
4275 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
....@@ -45,14 +78,52 @@
4578 return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
4679 }
4780
81
+/*
82
+ * Never enable huge pmd sharing on some uffd registered vmas:
83
+ *
84
+ * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry.
85
+ *
86
+ * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for
87
+ * VMAs which share huge pmds. (If you have two mappings to the same
88
+ * underlying pages, and fault in the non-UFFD-registered one with a write,
89
+ * with huge pmd sharing this would *also* setup the second UFFD-registered
90
+ * mapping, and we'd not get minor faults.)
91
+ */
92
+static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma)
93
+{
94
+ return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR);
95
+}
96
+
4897 static inline bool userfaultfd_missing(struct vm_area_struct *vma)
4998 {
5099 return vma->vm_flags & VM_UFFD_MISSING;
51100 }
52101
102
+static inline bool userfaultfd_wp(struct vm_area_struct *vma)
103
+{
104
+ return vma->vm_flags & VM_UFFD_WP;
105
+}
106
+
107
+static inline bool userfaultfd_minor(struct vm_area_struct *vma)
108
+{
109
+ return vma->vm_flags & VM_UFFD_MINOR;
110
+}
111
+
112
+static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
113
+ pte_t pte)
114
+{
115
+ return userfaultfd_wp(vma) && pte_uffd_wp(pte);
116
+}
117
+
118
+static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
119
+ pmd_t pmd)
120
+{
121
+ return userfaultfd_wp(vma) && pmd_uffd_wp(pmd);
122
+}
123
+
53124 static inline bool userfaultfd_armed(struct vm_area_struct *vma)
54125 {
55
- return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
126
+ return vma->vm_flags & __VM_UFFD_FLAGS;
56127 }
57128
58129 extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
....@@ -94,6 +165,29 @@
94165 return false;
95166 }
96167
168
+static inline bool userfaultfd_wp(struct vm_area_struct *vma)
169
+{
170
+ return false;
171
+}
172
+
173
+static inline bool userfaultfd_minor(struct vm_area_struct *vma)
174
+{
175
+ return false;
176
+}
177
+
178
+static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
179
+ pte_t pte)
180
+{
181
+ return false;
182
+}
183
+
184
+static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
185
+ pmd_t pmd)
186
+{
187
+ return false;
188
+}
189
+
190
+
97191 static inline bool userfaultfd_armed(struct vm_area_struct *vma)
98192 {
99193 return false;