hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/entry/vdso/vma.c
....@@ -1,6 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright 2007 Andi Kleen, SUSE Labs.
3
- * Subject to the GPL, v.2
44 *
55 * This contains most of the x86 vDSO kernel-side code.
66 */
....@@ -14,15 +14,31 @@
1414 #include <linux/elf.h>
1515 #include <linux/cpu.h>
1616 #include <linux/ptrace.h>
17
+#include <linux/time_namespace.h>
18
+
1719 #include <asm/pvclock.h>
1820 #include <asm/vgtod.h>
1921 #include <asm/proto.h>
2022 #include <asm/vdso.h>
2123 #include <asm/vvar.h>
24
+#include <asm/tlb.h>
2225 #include <asm/page.h>
2326 #include <asm/desc.h>
2427 #include <asm/cpufeature.h>
25
-#include <asm/mshyperv.h>
28
+#include <clocksource/hyperv_timer.h>
29
+
30
+#undef _ASM_X86_VVAR_H
31
+#define EMIT_VVAR(name, offset) \
32
+ const size_t name ## _offset = offset;
33
+#include <asm/vvar.h>
34
+
35
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
36
+{
37
+ return (struct vdso_data *)(vvar_page + _vdso_data_offset);
38
+}
39
+#undef EMIT_VVAR
40
+
41
+unsigned int vclocks_used __read_mostly;
2642
2743 #if defined(CONFIG_X86_64)
2844 unsigned int __read_mostly vdso64_enabled = 1;
....@@ -37,9 +53,10 @@
3753 image->alt_len));
3854 }
3955
56
+static const struct vm_special_mapping vvar_mapping;
4057 struct linux_binprm;
4158
42
-static int vdso_fault(const struct vm_special_mapping *sm,
59
+static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
4360 struct vm_area_struct *vma, struct vm_fault *vmf)
4461 {
4562 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
....@@ -84,12 +101,74 @@
84101 return 0;
85102 }
86103
87
-static int vvar_fault(const struct vm_special_mapping *sm,
104
+static int vvar_mremap(const struct vm_special_mapping *sm,
105
+ struct vm_area_struct *new_vma)
106
+{
107
+ const struct vdso_image *image = new_vma->vm_mm->context.vdso_image;
108
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
109
+
110
+ if (new_size != -image->sym_vvar_start)
111
+ return -EINVAL;
112
+
113
+ return 0;
114
+}
115
+
116
+#ifdef CONFIG_TIME_NS
117
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
118
+{
119
+ if (likely(vma->vm_mm == current->mm))
120
+ return current->nsproxy->time_ns->vvar_page;
121
+
122
+ /*
123
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
124
+ * through interfaces like /proc/$pid/mem or
125
+ * process_vm_{readv,writev}() as long as there's no .access()
126
+ * in special_mapping_vmops().
127
+ * For more details check_vma_flags() and __access_remote_vm()
128
+ */
129
+
130
+ WARN(1, "vvar_page accessed remotely");
131
+
132
+ return NULL;
133
+}
134
+
135
+/*
136
+ * The vvar page layout depends on whether a task belongs to the root or
137
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
138
+ * page tables are cleared and then they will re-faulted with a
139
+ * corresponding layout.
140
+ * See also the comment near timens_setup_vdso_data() for details.
141
+ */
142
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
143
+{
144
+ struct mm_struct *mm = task->mm;
145
+ struct vm_area_struct *vma;
146
+
147
+ mmap_read_lock(mm);
148
+
149
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
150
+ unsigned long size = vma->vm_end - vma->vm_start;
151
+
152
+ if (vma_is_special_mapping(vma, &vvar_mapping))
153
+ zap_page_range(vma, vma->vm_start, size);
154
+ }
155
+
156
+ mmap_read_unlock(mm);
157
+ return 0;
158
+}
159
+#else
160
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
161
+{
162
+ return NULL;
163
+}
164
+#endif
165
+
166
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
88167 struct vm_area_struct *vma, struct vm_fault *vmf)
89168 {
90169 const struct vdso_image *image = vma->vm_mm->context.vdso_image;
170
+ unsigned long pfn;
91171 long sym_offset;
92
- int ret = -EFAULT;
93172
94173 if (!image)
95174 return VM_FAULT_SIGBUS;
....@@ -108,28 +187,59 @@
108187 return VM_FAULT_SIGBUS;
109188
110189 if (sym_offset == image->sym_vvar_page) {
111
- ret = vm_insert_pfn(vma, vmf->address,
112
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
190
+ struct page *timens_page = find_timens_vvar_page(vma);
191
+
192
+ pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
193
+
194
+ /*
195
+ * If a task belongs to a time namespace then a namespace
196
+ * specific VVAR is mapped with the sym_vvar_page offset and
197
+ * the real VVAR page is mapped with the sym_timens_page
198
+ * offset.
199
+ * See also the comment near timens_setup_vdso_data().
200
+ */
201
+ if (timens_page) {
202
+ unsigned long addr;
203
+ vm_fault_t err;
204
+
205
+ /*
206
+ * Optimization: inside time namespace pre-fault
207
+ * VVAR page too. As on timens page there are only
208
+ * offsets for clocks on VVAR, it'll be faulted
209
+ * shortly by VDSO code.
210
+ */
211
+ addr = vmf->address + (image->sym_timens_page - sym_offset);
212
+ err = vmf_insert_pfn(vma, addr, pfn);
213
+ if (unlikely(err & VM_FAULT_ERROR))
214
+ return err;
215
+
216
+ pfn = page_to_pfn(timens_page);
217
+ }
218
+
219
+ return vmf_insert_pfn(vma, vmf->address, pfn);
113220 } else if (sym_offset == image->sym_pvclock_page) {
114221 struct pvclock_vsyscall_time_info *pvti =
115222 pvclock_get_pvti_cpu0_va();
116
- if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
117
- ret = vm_insert_pfn_prot(
118
- vma,
119
- vmf->address,
120
- __pa(pvti) >> PAGE_SHIFT,
121
- pgprot_decrypted(vma->vm_page_prot));
223
+ if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
224
+ return vmf_insert_pfn_prot(vma, vmf->address,
225
+ __pa(pvti) >> PAGE_SHIFT,
226
+ pgprot_decrypted(vma->vm_page_prot));
122227 }
123228 } else if (sym_offset == image->sym_hvclock_page) {
124229 struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
125230
126
- if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
127
- ret = vm_insert_pfn(vma, vmf->address,
128
- vmalloc_to_pfn(tsc_pg));
129
- }
231
+ if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
232
+ return vmf_insert_pfn(vma, vmf->address,
233
+ virt_to_phys(tsc_pg) >> PAGE_SHIFT);
234
+ } else if (sym_offset == image->sym_timens_page) {
235
+ struct page *timens_page = find_timens_vvar_page(vma);
130236
131
- if (ret == 0 || ret == -EBUSY)
132
- return VM_FAULT_NOPAGE;
237
+ if (!timens_page)
238
+ return VM_FAULT_SIGBUS;
239
+
240
+ pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
241
+ return vmf_insert_pfn(vma, vmf->address, pfn);
242
+ }
133243
134244 return VM_FAULT_SIGBUS;
135245 }
....@@ -142,6 +252,7 @@
142252 static const struct vm_special_mapping vvar_mapping = {
143253 .name = "[vvar]",
144254 .fault = vvar_fault,
255
+ .mremap = vvar_mremap,
145256 };
146257
147258 /*
....@@ -156,7 +267,7 @@
156267 unsigned long text_start;
157268 int ret = 0;
158269
159
- if (down_write_killable(&mm->mmap_sem))
270
+ if (mmap_write_lock_killable(mm))
160271 return -EINTR;
161272
162273 addr = get_unmapped_area(NULL, addr,
....@@ -199,7 +310,7 @@
199310 }
200311
201312 up_fail:
202
- up_write(&mm->mmap_sem);
313
+ mmap_write_unlock(mm);
203314 return ret;
204315 }
205316
....@@ -228,8 +339,8 @@
228339
229340 /* Round the lowest possible end address up to a PMD boundary. */
230341 end = (start + len + PMD_SIZE - 1) & PMD_MASK;
231
- if (end >= TASK_SIZE_MAX)
232
- end = TASK_SIZE_MAX;
342
+ if (end >= DEFAULT_MAP_WINDOW)
343
+ end = DEFAULT_MAP_WINDOW;
233344 end -= len;
234345
235346 if (end > start) {
....@@ -261,22 +372,22 @@
261372 struct mm_struct *mm = current->mm;
262373 struct vm_area_struct *vma;
263374
264
- down_write(&mm->mmap_sem);
375
+ mmap_write_lock(mm);
265376 /*
266377 * Check if we have already mapped vdso blob - fail to prevent
267378 * abusing from userspace install_speciall_mapping, which may
268379 * not do accounting and rlimit right.
269380 * We could search vma near context.vdso, but it's a slowpath,
270
- * so let's explicitely check all VMAs to be completely sure.
381
+ * so let's explicitly check all VMAs to be completely sure.
271382 */
272383 for (vma = mm->mmap; vma; vma = vma->vm_next) {
273384 if (vma_is_special_mapping(vma, &vdso_mapping) ||
274385 vma_is_special_mapping(vma, &vvar_mapping)) {
275
- up_write(&mm->mmap_sem);
386
+ mmap_write_unlock(mm);
276387 return -EEXIST;
277388 }
278389 }
279
- up_write(&mm->mmap_sem);
390
+ mmap_write_unlock(mm);
280391
281392 return map_vdso(image, addr);
282393 }
....@@ -329,12 +440,14 @@
329440 static __init int vdso_setup(char *s)
330441 {
331442 vdso64_enabled = simple_strtoul(s, NULL, 0);
332
- return 0;
443
+ return 1;
333444 }
334445 __setup("vdso=", vdso_setup);
335446
336447 static int __init init_vdso(void)
337448 {
449
+ BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
450
+
338451 init_vdso_image(&vdso_image_64);
339452
340453 #ifdef CONFIG_X86_X32_ABI