.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright 2007 Andi Kleen, SUSE Labs. |
---|
3 | | - * Subject to the GPL, v.2 |
---|
4 | 4 | * |
---|
5 | 5 | * This contains most of the x86 vDSO kernel-side code. |
---|
6 | 6 | */ |
---|
.. | .. |
---|
14 | 14 | #include <linux/elf.h> |
---|
15 | 15 | #include <linux/cpu.h> |
---|
16 | 16 | #include <linux/ptrace.h> |
---|
| 17 | +#include <linux/time_namespace.h> |
---|
| 18 | + |
---|
17 | 19 | #include <asm/pvclock.h> |
---|
18 | 20 | #include <asm/vgtod.h> |
---|
19 | 21 | #include <asm/proto.h> |
---|
20 | 22 | #include <asm/vdso.h> |
---|
21 | 23 | #include <asm/vvar.h> |
---|
| 24 | +#include <asm/tlb.h> |
---|
22 | 25 | #include <asm/page.h> |
---|
23 | 26 | #include <asm/desc.h> |
---|
24 | 27 | #include <asm/cpufeature.h> |
---|
25 | | -#include <asm/mshyperv.h> |
---|
| 28 | +#include <clocksource/hyperv_timer.h> |
---|
| 29 | + |
---|
| 30 | +#undef _ASM_X86_VVAR_H |
---|
| 31 | +#define EMIT_VVAR(name, offset) \ |
---|
| 32 | + const size_t name ## _offset = offset; |
---|
| 33 | +#include <asm/vvar.h> |
---|
| 34 | + |
---|
| 35 | +struct vdso_data *arch_get_vdso_data(void *vvar_page) |
---|
| 36 | +{ |
---|
| 37 | + return (struct vdso_data *)(vvar_page + _vdso_data_offset); |
---|
| 38 | +} |
---|
| 39 | +#undef EMIT_VVAR |
---|
| 40 | + |
---|
| 41 | +unsigned int vclocks_used __read_mostly; |
---|
26 | 42 | |
---|
27 | 43 | #if defined(CONFIG_X86_64) |
---|
28 | 44 | unsigned int __read_mostly vdso64_enabled = 1; |
---|
.. | .. |
---|
37 | 53 | image->alt_len)); |
---|
38 | 54 | } |
---|
39 | 55 | |
---|
| 56 | +static const struct vm_special_mapping vvar_mapping; |
---|
40 | 57 | struct linux_binprm; |
---|
41 | 58 | |
---|
42 | | -static int vdso_fault(const struct vm_special_mapping *sm, |
---|
| 59 | +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, |
---|
43 | 60 | struct vm_area_struct *vma, struct vm_fault *vmf) |
---|
44 | 61 | { |
---|
45 | 62 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; |
---|
.. | .. |
---|
84 | 101 | return 0; |
---|
85 | 102 | } |
---|
86 | 103 | |
---|
87 | | -static int vvar_fault(const struct vm_special_mapping *sm, |
---|
| 104 | +static int vvar_mremap(const struct vm_special_mapping *sm, |
---|
| 105 | + struct vm_area_struct *new_vma) |
---|
| 106 | +{ |
---|
| 107 | + const struct vdso_image *image = new_vma->vm_mm->context.vdso_image; |
---|
| 108 | + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; |
---|
| 109 | + |
---|
| 110 | + if (new_size != -image->sym_vvar_start) |
---|
| 111 | + return -EINVAL; |
---|
| 112 | + |
---|
| 113 | + return 0; |
---|
| 114 | +} |
---|
| 115 | + |
---|
| 116 | +#ifdef CONFIG_TIME_NS |
---|
| 117 | +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) |
---|
| 118 | +{ |
---|
| 119 | + if (likely(vma->vm_mm == current->mm)) |
---|
| 120 | + return current->nsproxy->time_ns->vvar_page; |
---|
| 121 | + |
---|
| 122 | + /* |
---|
| 123 | + * VM_PFNMAP | VM_IO protect .fault() handler from being called |
---|
| 124 | + * through interfaces like /proc/$pid/mem or |
---|
| 125 | + * process_vm_{readv,writev}() as long as there's no .access() |
---|
| 126 | + * in special_mapping_vmops(). |
---|
| 127 | + * For more details check_vma_flags() and __access_remote_vm() |
---|
| 128 | + */ |
---|
| 129 | + |
---|
| 130 | + WARN(1, "vvar_page accessed remotely"); |
---|
| 131 | + |
---|
| 132 | + return NULL; |
---|
| 133 | +} |
---|
| 134 | + |
---|
| 135 | +/* |
---|
| 136 | + * The vvar page layout depends on whether a task belongs to the root or |
---|
| 137 | + * non-root time namespace. Whenever a task changes its namespace, the VVAR |
---|
| 138 | + * page tables are cleared and then they will re-faulted with a |
---|
| 139 | + * corresponding layout. |
---|
| 140 | + * See also the comment near timens_setup_vdso_data() for details. |
---|
| 141 | + */ |
---|
| 142 | +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) |
---|
| 143 | +{ |
---|
| 144 | + struct mm_struct *mm = task->mm; |
---|
| 145 | + struct vm_area_struct *vma; |
---|
| 146 | + |
---|
| 147 | + mmap_read_lock(mm); |
---|
| 148 | + |
---|
| 149 | + for (vma = mm->mmap; vma; vma = vma->vm_next) { |
---|
| 150 | + unsigned long size = vma->vm_end - vma->vm_start; |
---|
| 151 | + |
---|
| 152 | + if (vma_is_special_mapping(vma, &vvar_mapping)) |
---|
| 153 | + zap_page_range(vma, vma->vm_start, size); |
---|
| 154 | + } |
---|
| 155 | + |
---|
| 156 | + mmap_read_unlock(mm); |
---|
| 157 | + return 0; |
---|
| 158 | +} |
---|
| 159 | +#else |
---|
| 160 | +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) |
---|
| 161 | +{ |
---|
| 162 | + return NULL; |
---|
| 163 | +} |
---|
| 164 | +#endif |
---|
| 165 | + |
---|
| 166 | +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, |
---|
88 | 167 | struct vm_area_struct *vma, struct vm_fault *vmf) |
---|
89 | 168 | { |
---|
90 | 169 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; |
---|
| 170 | + unsigned long pfn; |
---|
91 | 171 | long sym_offset; |
---|
92 | | - int ret = -EFAULT; |
---|
93 | 172 | |
---|
94 | 173 | if (!image) |
---|
95 | 174 | return VM_FAULT_SIGBUS; |
---|
.. | .. |
---|
108 | 187 | return VM_FAULT_SIGBUS; |
---|
109 | 188 | |
---|
110 | 189 | if (sym_offset == image->sym_vvar_page) { |
---|
111 | | - ret = vm_insert_pfn(vma, vmf->address, |
---|
112 | | - __pa_symbol(&__vvar_page) >> PAGE_SHIFT); |
---|
| 190 | + struct page *timens_page = find_timens_vvar_page(vma); |
---|
| 191 | + |
---|
| 192 | + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; |
---|
| 193 | + |
---|
| 194 | + /* |
---|
| 195 | + * If a task belongs to a time namespace then a namespace |
---|
| 196 | + * specific VVAR is mapped with the sym_vvar_page offset and |
---|
| 197 | + * the real VVAR page is mapped with the sym_timens_page |
---|
| 198 | + * offset. |
---|
| 199 | + * See also the comment near timens_setup_vdso_data(). |
---|
| 200 | + */ |
---|
| 201 | + if (timens_page) { |
---|
| 202 | + unsigned long addr; |
---|
| 203 | + vm_fault_t err; |
---|
| 204 | + |
---|
| 205 | + /* |
---|
| 206 | + * Optimization: inside time namespace pre-fault |
---|
| 207 | + * VVAR page too. As on timens page there are only |
---|
| 208 | + * offsets for clocks on VVAR, it'll be faulted |
---|
| 209 | + * shortly by VDSO code. |
---|
| 210 | + */ |
---|
| 211 | + addr = vmf->address + (image->sym_timens_page - sym_offset); |
---|
| 212 | + err = vmf_insert_pfn(vma, addr, pfn); |
---|
| 213 | + if (unlikely(err & VM_FAULT_ERROR)) |
---|
| 214 | + return err; |
---|
| 215 | + |
---|
| 216 | + pfn = page_to_pfn(timens_page); |
---|
| 217 | + } |
---|
| 218 | + |
---|
| 219 | + return vmf_insert_pfn(vma, vmf->address, pfn); |
---|
113 | 220 | } else if (sym_offset == image->sym_pvclock_page) { |
---|
114 | 221 | struct pvclock_vsyscall_time_info *pvti = |
---|
115 | 222 | pvclock_get_pvti_cpu0_va(); |
---|
116 | | - if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { |
---|
117 | | - ret = vm_insert_pfn_prot( |
---|
118 | | - vma, |
---|
119 | | - vmf->address, |
---|
120 | | - __pa(pvti) >> PAGE_SHIFT, |
---|
121 | | - pgprot_decrypted(vma->vm_page_prot)); |
---|
| 223 | + if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) { |
---|
| 224 | + return vmf_insert_pfn_prot(vma, vmf->address, |
---|
| 225 | + __pa(pvti) >> PAGE_SHIFT, |
---|
| 226 | + pgprot_decrypted(vma->vm_page_prot)); |
---|
122 | 227 | } |
---|
123 | 228 | } else if (sym_offset == image->sym_hvclock_page) { |
---|
124 | 229 | struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); |
---|
125 | 230 | |
---|
126 | | - if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) |
---|
127 | | - ret = vm_insert_pfn(vma, vmf->address, |
---|
128 | | - vmalloc_to_pfn(tsc_pg)); |
---|
129 | | - } |
---|
| 231 | + if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) |
---|
| 232 | + return vmf_insert_pfn(vma, vmf->address, |
---|
| 233 | + virt_to_phys(tsc_pg) >> PAGE_SHIFT); |
---|
| 234 | + } else if (sym_offset == image->sym_timens_page) { |
---|
| 235 | + struct page *timens_page = find_timens_vvar_page(vma); |
---|
130 | 236 | |
---|
131 | | - if (ret == 0 || ret == -EBUSY) |
---|
132 | | - return VM_FAULT_NOPAGE; |
---|
| 237 | + if (!timens_page) |
---|
| 238 | + return VM_FAULT_SIGBUS; |
---|
| 239 | + |
---|
| 240 | + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; |
---|
| 241 | + return vmf_insert_pfn(vma, vmf->address, pfn); |
---|
| 242 | + } |
---|
133 | 243 | |
---|
134 | 244 | return VM_FAULT_SIGBUS; |
---|
135 | 245 | } |
---|
.. | .. |
---|
142 | 252 | static const struct vm_special_mapping vvar_mapping = { |
---|
143 | 253 | .name = "[vvar]", |
---|
144 | 254 | .fault = vvar_fault, |
---|
| 255 | + .mremap = vvar_mremap, |
---|
145 | 256 | }; |
---|
146 | 257 | |
---|
147 | 258 | /* |
---|
.. | .. |
---|
156 | 267 | unsigned long text_start; |
---|
157 | 268 | int ret = 0; |
---|
158 | 269 | |
---|
159 | | - if (down_write_killable(&mm->mmap_sem)) |
---|
| 270 | + if (mmap_write_lock_killable(mm)) |
---|
160 | 271 | return -EINTR; |
---|
161 | 272 | |
---|
162 | 273 | addr = get_unmapped_area(NULL, addr, |
---|
.. | .. |
---|
199 | 310 | } |
---|
200 | 311 | |
---|
201 | 312 | up_fail: |
---|
202 | | - up_write(&mm->mmap_sem); |
---|
| 313 | + mmap_write_unlock(mm); |
---|
203 | 314 | return ret; |
---|
204 | 315 | } |
---|
205 | 316 | |
---|
.. | .. |
---|
228 | 339 | |
---|
229 | 340 | /* Round the lowest possible end address up to a PMD boundary. */ |
---|
230 | 341 | end = (start + len + PMD_SIZE - 1) & PMD_MASK; |
---|
231 | | - if (end >= TASK_SIZE_MAX) |
---|
232 | | - end = TASK_SIZE_MAX; |
---|
| 342 | + if (end >= DEFAULT_MAP_WINDOW) |
---|
| 343 | + end = DEFAULT_MAP_WINDOW; |
---|
233 | 344 | end -= len; |
---|
234 | 345 | |
---|
235 | 346 | if (end > start) { |
---|
.. | .. |
---|
261 | 372 | struct mm_struct *mm = current->mm; |
---|
262 | 373 | struct vm_area_struct *vma; |
---|
263 | 374 | |
---|
264 | | - down_write(&mm->mmap_sem); |
---|
| 375 | + mmap_write_lock(mm); |
---|
265 | 376 | /* |
---|
266 | 377 | * Check if we have already mapped vdso blob - fail to prevent |
---|
267 | 378 | * abusing from userspace install_speciall_mapping, which may |
---|
268 | 379 | * not do accounting and rlimit right. |
---|
269 | 380 | * We could search vma near context.vdso, but it's a slowpath, |
---|
270 | | - * so let's explicitely check all VMAs to be completely sure. |
---|
| 381 | + * so let's explicitly check all VMAs to be completely sure. |
---|
271 | 382 | */ |
---|
272 | 383 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
---|
273 | 384 | if (vma_is_special_mapping(vma, &vdso_mapping) || |
---|
274 | 385 | vma_is_special_mapping(vma, &vvar_mapping)) { |
---|
275 | | - up_write(&mm->mmap_sem); |
---|
| 386 | + mmap_write_unlock(mm); |
---|
276 | 387 | return -EEXIST; |
---|
277 | 388 | } |
---|
278 | 389 | } |
---|
279 | | - up_write(&mm->mmap_sem); |
---|
| 390 | + mmap_write_unlock(mm); |
---|
280 | 391 | |
---|
281 | 392 | return map_vdso(image, addr); |
---|
282 | 393 | } |
---|
.. | .. |
---|
329 | 440 | static __init int vdso_setup(char *s) |
---|
330 | 441 | { |
---|
331 | 442 | vdso64_enabled = simple_strtoul(s, NULL, 0); |
---|
332 | | - return 0; |
---|
| 443 | + return 1; |
---|
333 | 444 | } |
---|
334 | 445 | __setup("vdso=", vdso_setup); |
---|
335 | 446 | |
---|
336 | 447 | static int __init init_vdso(void) |
---|
337 | 448 | { |
---|
| 449 | + BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32); |
---|
| 450 | + |
---|
338 | 451 | init_vdso_image(&vdso_image_64); |
---|
339 | 452 | |
---|
340 | 453 | #ifdef CONFIG_X86_X32_ABI |
---|