.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 1995 Linus Torvalds |
---|
3 | 4 | * |
---|
.. | .. |
---|
39 | 40 | #include <linux/ftrace.h> |
---|
40 | 41 | #include <linux/syscalls.h> |
---|
41 | 42 | |
---|
42 | | -#include <asm/pgtable.h> |
---|
43 | 43 | #include <asm/processor.h> |
---|
44 | 44 | #include <asm/fpu/internal.h> |
---|
45 | 45 | #include <asm/mmu_context.h> |
---|
.. | .. |
---|
47 | 47 | #include <asm/desc.h> |
---|
48 | 48 | #include <asm/proto.h> |
---|
49 | 49 | #include <asm/ia32.h> |
---|
50 | | -#include <asm/syscalls.h> |
---|
51 | 50 | #include <asm/debugreg.h> |
---|
52 | 51 | #include <asm/switch_to.h> |
---|
53 | 52 | #include <asm/xen/hypervisor.h> |
---|
54 | 53 | #include <asm/vdso.h> |
---|
55 | | -#include <asm/intel_rdt_sched.h> |
---|
| 54 | +#include <asm/resctrl.h> |
---|
56 | 55 | #include <asm/unistd.h> |
---|
| 56 | +#include <asm/fsgsbase.h> |
---|
57 | 57 | #ifdef CONFIG_IA32_EMULATION |
---|
58 | 58 | /* Not included via unistd.h */ |
---|
59 | 59 | #include <asm/unistd_32_ia32.h> |
---|
.. | .. |
---|
61 | 61 | |
---|
62 | 62 | #include "process.h" |
---|
63 | 63 | |
---|
64 | | -__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); |
---|
65 | | - |
---|
66 | 64 | /* Prints also some state that isn't saved in the pt_regs */ |
---|
67 | | -void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) |
---|
| 65 | +void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, |
---|
| 66 | + const char *log_lvl) |
---|
68 | 67 | { |
---|
69 | 68 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
---|
70 | 69 | unsigned long d0, d1, d2, d3, d6, d7; |
---|
71 | 70 | unsigned int fsindex, gsindex; |
---|
72 | | - unsigned int ds, cs, es; |
---|
| 71 | + unsigned int ds, es; |
---|
73 | 72 | |
---|
74 | | - show_iret_regs(regs); |
---|
| 73 | + show_iret_regs(regs, log_lvl); |
---|
75 | 74 | |
---|
76 | 75 | if (regs->orig_ax != -1) |
---|
77 | 76 | pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); |
---|
78 | 77 | else |
---|
79 | 78 | pr_cont("\n"); |
---|
80 | 79 | |
---|
81 | | - printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
---|
82 | | - regs->ax, regs->bx, regs->cx); |
---|
83 | | - printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
---|
84 | | - regs->dx, regs->si, regs->di); |
---|
85 | | - printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", |
---|
86 | | - regs->bp, regs->r8, regs->r9); |
---|
87 | | - printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", |
---|
88 | | - regs->r10, regs->r11, regs->r12); |
---|
89 | | - printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", |
---|
90 | | - regs->r13, regs->r14, regs->r15); |
---|
| 80 | + printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n", |
---|
| 81 | + log_lvl, regs->ax, regs->bx, regs->cx); |
---|
| 82 | + printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n", |
---|
| 83 | + log_lvl, regs->dx, regs->si, regs->di); |
---|
| 84 | + printk("%sRBP: %016lx R08: %016lx R09: %016lx\n", |
---|
| 85 | + log_lvl, regs->bp, regs->r8, regs->r9); |
---|
| 86 | + printk("%sR10: %016lx R11: %016lx R12: %016lx\n", |
---|
| 87 | + log_lvl, regs->r10, regs->r11, regs->r12); |
---|
| 88 | + printk("%sR13: %016lx R14: %016lx R15: %016lx\n", |
---|
| 89 | + log_lvl, regs->r13, regs->r14, regs->r15); |
---|
91 | 90 | |
---|
92 | 91 | if (mode == SHOW_REGS_SHORT) |
---|
93 | 92 | return; |
---|
.. | .. |
---|
95 | 94 | if (mode == SHOW_REGS_USER) { |
---|
96 | 95 | rdmsrl(MSR_FS_BASE, fs); |
---|
97 | 96 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); |
---|
98 | | - printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n", |
---|
99 | | - fs, shadowgs); |
---|
| 97 | + printk("%sFS: %016lx GS: %016lx\n", |
---|
| 98 | + log_lvl, fs, shadowgs); |
---|
100 | 99 | return; |
---|
101 | 100 | } |
---|
102 | 101 | |
---|
103 | 102 | asm("movl %%ds,%0" : "=r" (ds)); |
---|
104 | | - asm("movl %%cs,%0" : "=r" (cs)); |
---|
105 | 103 | asm("movl %%es,%0" : "=r" (es)); |
---|
106 | 104 | asm("movl %%fs,%0" : "=r" (fsindex)); |
---|
107 | 105 | asm("movl %%gs,%0" : "=r" (gsindex)); |
---|
.. | .. |
---|
115 | 113 | cr3 = __read_cr3(); |
---|
116 | 114 | cr4 = __read_cr4(); |
---|
117 | 115 | |
---|
118 | | - printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
---|
119 | | - fs, fsindex, gs, gsindex, shadowgs); |
---|
120 | | - printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
---|
121 | | - es, cr0); |
---|
122 | | - printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, |
---|
123 | | - cr4); |
---|
| 116 | + printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
---|
| 117 | + log_lvl, fs, fsindex, gs, gsindex, shadowgs); |
---|
| 118 | + printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n", |
---|
| 119 | + log_lvl, regs->cs, ds, es, cr0); |
---|
| 120 | + printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n", |
---|
| 121 | + log_lvl, cr2, cr3, cr4); |
---|
124 | 122 | |
---|
125 | 123 | get_debugreg(d0, 0); |
---|
126 | 124 | get_debugreg(d1, 1); |
---|
.. | .. |
---|
132 | 130 | /* Only print out debug registers if they are in their non-default state. */ |
---|
133 | 131 | if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && |
---|
134 | 132 | (d6 == DR6_RESERVED) && (d7 == 0x400))) { |
---|
135 | | - printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", |
---|
136 | | - d0, d1, d2); |
---|
137 | | - printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", |
---|
138 | | - d3, d6, d7); |
---|
| 133 | + printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", |
---|
| 134 | + log_lvl, d0, d1, d2); |
---|
| 135 | + printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", |
---|
| 136 | + log_lvl, d3, d6, d7); |
---|
139 | 137 | } |
---|
140 | 138 | |
---|
141 | 139 | if (boot_cpu_has(X86_FEATURE_OSPKE)) |
---|
142 | | - printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru()); |
---|
| 140 | + printk("%sPKRU: %08x\n", log_lvl, read_pkru()); |
---|
143 | 141 | } |
---|
144 | 142 | |
---|
145 | 143 | void release_thread(struct task_struct *dead_task) |
---|
146 | 144 | { |
---|
147 | | - if (dead_task->mm) { |
---|
148 | | -#ifdef CONFIG_MODIFY_LDT_SYSCALL |
---|
149 | | - if (dead_task->mm->context.ldt) { |
---|
150 | | - pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", |
---|
151 | | - dead_task->comm, |
---|
152 | | - dead_task->mm->context.ldt->entries, |
---|
153 | | - dead_task->mm->context.ldt->nr_entries); |
---|
154 | | - BUG(); |
---|
155 | | - } |
---|
156 | | -#endif |
---|
157 | | - } |
---|
| 145 | + WARN_ON(dead_task->mm); |
---|
158 | 146 | } |
---|
159 | 147 | |
---|
160 | 148 | enum which_selector { |
---|
161 | 149 | FS, |
---|
162 | 150 | GS |
---|
163 | 151 | }; |
---|
| 152 | + |
---|
| 153 | +/* |
---|
| 154 | + * Out of line to be protected from kprobes and tracing. If this would be |
---|
| 155 | + * traced or probed than any access to a per CPU variable happens with |
---|
| 156 | + * the wrong GS. |
---|
| 157 | + * |
---|
| 158 | + * It is not used on Xen paravirt. When paravirt support is needed, it |
---|
| 159 | + * needs to be renamed with native_ prefix. |
---|
| 160 | + */ |
---|
| 161 | +static noinstr unsigned long __rdgsbase_inactive(void) |
---|
| 162 | +{ |
---|
| 163 | + unsigned long gsbase; |
---|
| 164 | + |
---|
| 165 | + lockdep_assert_irqs_disabled(); |
---|
| 166 | + |
---|
| 167 | + if (!static_cpu_has(X86_FEATURE_XENPV)) { |
---|
| 168 | + native_swapgs(); |
---|
| 169 | + gsbase = rdgsbase(); |
---|
| 170 | + native_swapgs(); |
---|
| 171 | + } else { |
---|
| 172 | + instrumentation_begin(); |
---|
| 173 | + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); |
---|
| 174 | + instrumentation_end(); |
---|
| 175 | + } |
---|
| 176 | + |
---|
| 177 | + return gsbase; |
---|
| 178 | +} |
---|
| 179 | + |
---|
| 180 | +/* |
---|
| 181 | + * Out of line to be protected from kprobes and tracing. If this would be |
---|
| 182 | + * traced or probed than any access to a per CPU variable happens with |
---|
| 183 | + * the wrong GS. |
---|
| 184 | + * |
---|
| 185 | + * It is not used on Xen paravirt. When paravirt support is needed, it |
---|
| 186 | + * needs to be renamed with native_ prefix. |
---|
| 187 | + */ |
---|
| 188 | +static noinstr void __wrgsbase_inactive(unsigned long gsbase) |
---|
| 189 | +{ |
---|
| 190 | + lockdep_assert_irqs_disabled(); |
---|
| 191 | + |
---|
| 192 | + if (!static_cpu_has(X86_FEATURE_XENPV)) { |
---|
| 193 | + native_swapgs(); |
---|
| 194 | + wrgsbase(gsbase); |
---|
| 195 | + native_swapgs(); |
---|
| 196 | + } else { |
---|
| 197 | + instrumentation_begin(); |
---|
| 198 | + wrmsrl(MSR_KERNEL_GS_BASE, gsbase); |
---|
| 199 | + instrumentation_end(); |
---|
| 200 | + } |
---|
| 201 | +} |
---|
164 | 202 | |
---|
165 | 203 | /* |
---|
166 | 204 | * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are |
---|
.. | .. |
---|
211 | 249 | { |
---|
212 | 250 | savesegment(fs, task->thread.fsindex); |
---|
213 | 251 | savesegment(gs, task->thread.gsindex); |
---|
214 | | - save_base_legacy(task, task->thread.fsindex, FS); |
---|
215 | | - save_base_legacy(task, task->thread.gsindex, GS); |
---|
| 252 | + if (static_cpu_has(X86_FEATURE_FSGSBASE)) { |
---|
| 253 | + /* |
---|
| 254 | + * If FSGSBASE is enabled, we can't make any useful guesses |
---|
| 255 | + * about the base, and user code expects us to save the current |
---|
| 256 | + * value. Fortunately, reading the base directly is efficient. |
---|
| 257 | + */ |
---|
| 258 | + task->thread.fsbase = rdfsbase(); |
---|
| 259 | + task->thread.gsbase = __rdgsbase_inactive(); |
---|
| 260 | + } else { |
---|
| 261 | + save_base_legacy(task, task->thread.fsindex, FS); |
---|
| 262 | + save_base_legacy(task, task->thread.gsindex, GS); |
---|
| 263 | + } |
---|
216 | 264 | } |
---|
217 | 265 | |
---|
218 | | -#if IS_ENABLED(CONFIG_KVM) |
---|
219 | 266 | /* |
---|
220 | 267 | * While a process is running,current->thread.fsbase and current->thread.gsbase |
---|
221 | | - * may not match the corresponding CPU registers (see save_base_legacy()). KVM |
---|
222 | | - * wants an efficient way to save and restore FSBASE and GSBASE. |
---|
223 | | - * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE. |
---|
| 268 | + * may not match the corresponding CPU registers (see save_base_legacy()). |
---|
224 | 269 | */ |
---|
225 | | -void save_fsgs_for_kvm(void) |
---|
| 270 | +void current_save_fsgs(void) |
---|
226 | 271 | { |
---|
| 272 | + unsigned long flags; |
---|
| 273 | + |
---|
| 274 | + /* Interrupts need to be off for FSGSBASE */ |
---|
| 275 | + local_irq_save(flags); |
---|
227 | 276 | save_fsgs(current); |
---|
| 277 | + local_irq_restore(flags); |
---|
228 | 278 | } |
---|
229 | | -EXPORT_SYMBOL_GPL(save_fsgs_for_kvm); |
---|
| 279 | +#if IS_ENABLED(CONFIG_KVM) |
---|
| 280 | +EXPORT_SYMBOL_GPL(current_save_fsgs); |
---|
230 | 281 | #endif |
---|
231 | 282 | |
---|
232 | 283 | static __always_inline void loadseg(enum which_selector which, |
---|
.. | .. |
---|
288 | 339 | } |
---|
289 | 340 | } |
---|
290 | 341 | |
---|
291 | | -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
---|
292 | | - unsigned long arg, struct task_struct *p, unsigned long tls) |
---|
| 342 | +static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, |
---|
| 343 | + struct thread_struct *next) |
---|
293 | 344 | { |
---|
294 | | - int err; |
---|
295 | | - struct pt_regs *childregs; |
---|
296 | | - struct fork_frame *fork_frame; |
---|
297 | | - struct inactive_task_frame *frame; |
---|
298 | | - struct task_struct *me = current; |
---|
| 345 | + if (static_cpu_has(X86_FEATURE_FSGSBASE)) { |
---|
| 346 | + /* Update the FS and GS selectors if they could have changed. */ |
---|
| 347 | + if (unlikely(prev->fsindex || next->fsindex)) |
---|
| 348 | + loadseg(FS, next->fsindex); |
---|
| 349 | + if (unlikely(prev->gsindex || next->gsindex)) |
---|
| 350 | + loadseg(GS, next->gsindex); |
---|
299 | 351 | |
---|
300 | | - childregs = task_pt_regs(p); |
---|
301 | | - fork_frame = container_of(childregs, struct fork_frame, regs); |
---|
302 | | - frame = &fork_frame->frame; |
---|
303 | | - |
---|
304 | | - /* |
---|
305 | | - * For a new task use the RESET flags value since there is no before. |
---|
306 | | - * All the status flags are zero; DF and all the system flags must also |
---|
307 | | - * be 0, specifically IF must be 0 because we context switch to the new |
---|
308 | | - * task with interrupts disabled. |
---|
309 | | - */ |
---|
310 | | - frame->flags = X86_EFLAGS_FIXED; |
---|
311 | | - frame->bp = 0; |
---|
312 | | - frame->ret_addr = (unsigned long) ret_from_fork; |
---|
313 | | - p->thread.sp = (unsigned long) fork_frame; |
---|
314 | | - p->thread.io_bitmap_ptr = NULL; |
---|
315 | | - |
---|
316 | | - savesegment(gs, p->thread.gsindex); |
---|
317 | | - p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; |
---|
318 | | - savesegment(fs, p->thread.fsindex); |
---|
319 | | - p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; |
---|
320 | | - savesegment(es, p->thread.es); |
---|
321 | | - savesegment(ds, p->thread.ds); |
---|
322 | | - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
---|
323 | | - |
---|
324 | | - if (unlikely(p->flags & PF_KTHREAD)) { |
---|
325 | | - /* kernel thread */ |
---|
326 | | - memset(childregs, 0, sizeof(struct pt_regs)); |
---|
327 | | - frame->bx = sp; /* function */ |
---|
328 | | - frame->r12 = arg; |
---|
329 | | - return 0; |
---|
| 352 | + /* Update the bases. */ |
---|
| 353 | + wrfsbase(next->fsbase); |
---|
| 354 | + __wrgsbase_inactive(next->gsbase); |
---|
| 355 | + } else { |
---|
| 356 | + load_seg_legacy(prev->fsindex, prev->fsbase, |
---|
| 357 | + next->fsindex, next->fsbase, FS); |
---|
| 358 | + load_seg_legacy(prev->gsindex, prev->gsbase, |
---|
| 359 | + next->gsindex, next->gsbase, GS); |
---|
330 | 360 | } |
---|
331 | | - frame->bx = 0; |
---|
332 | | - *childregs = *current_pt_regs(); |
---|
| 361 | +} |
---|
333 | 362 | |
---|
334 | | - childregs->ax = 0; |
---|
335 | | - if (sp) |
---|
336 | | - childregs->sp = sp; |
---|
| 363 | +unsigned long x86_fsgsbase_read_task(struct task_struct *task, |
---|
| 364 | + unsigned short selector) |
---|
| 365 | +{ |
---|
| 366 | + unsigned short idx = selector >> 3; |
---|
| 367 | + unsigned long base; |
---|
337 | 368 | |
---|
338 | | - err = -ENOMEM; |
---|
339 | | - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
---|
340 | | - p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, |
---|
341 | | - IO_BITMAP_BYTES, GFP_KERNEL); |
---|
342 | | - if (!p->thread.io_bitmap_ptr) { |
---|
343 | | - p->thread.io_bitmap_max = 0; |
---|
344 | | - return -ENOMEM; |
---|
345 | | - } |
---|
346 | | - set_tsk_thread_flag(p, TIF_IO_BITMAP); |
---|
347 | | - } |
---|
| 369 | + if (likely((selector & SEGMENT_TI_MASK) == 0)) { |
---|
| 370 | + if (unlikely(idx >= GDT_ENTRIES)) |
---|
| 371 | + return 0; |
---|
348 | 372 | |
---|
349 | | - /* |
---|
350 | | - * Set a new TLS for the child thread? |
---|
351 | | - */ |
---|
352 | | - if (clone_flags & CLONE_SETTLS) { |
---|
353 | | -#ifdef CONFIG_IA32_EMULATION |
---|
354 | | - if (in_ia32_syscall()) |
---|
355 | | - err = do_set_thread_area(p, -1, |
---|
356 | | - (struct user_desc __user *)tls, 0); |
---|
| 373 | + /* |
---|
| 374 | + * There are no user segments in the GDT with nonzero bases |
---|
| 375 | + * other than the TLS segments. |
---|
| 376 | + */ |
---|
| 377 | + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) |
---|
| 378 | + return 0; |
---|
| 379 | + |
---|
| 380 | + idx -= GDT_ENTRY_TLS_MIN; |
---|
| 381 | + base = get_desc_base(&task->thread.tls_array[idx]); |
---|
| 382 | + } else { |
---|
| 383 | +#ifdef CONFIG_MODIFY_LDT_SYSCALL |
---|
| 384 | + struct ldt_struct *ldt; |
---|
| 385 | + |
---|
| 386 | + /* |
---|
| 387 | + * If performance here mattered, we could protect the LDT |
---|
| 388 | + * with RCU. This is a slow path, though, so we can just |
---|
| 389 | + * take the mutex. |
---|
| 390 | + */ |
---|
| 391 | + mutex_lock(&task->mm->context.lock); |
---|
| 392 | + ldt = task->mm->context.ldt; |
---|
| 393 | + if (unlikely(!ldt || idx >= ldt->nr_entries)) |
---|
| 394 | + base = 0; |
---|
357 | 395 | else |
---|
| 396 | + base = get_desc_base(ldt->entries + idx); |
---|
| 397 | + mutex_unlock(&task->mm->context.lock); |
---|
| 398 | +#else |
---|
| 399 | + base = 0; |
---|
358 | 400 | #endif |
---|
359 | | - err = do_arch_prctl_64(p, ARCH_SET_FS, tls); |
---|
360 | | - if (err) |
---|
361 | | - goto out; |
---|
362 | | - } |
---|
363 | | - err = 0; |
---|
364 | | -out: |
---|
365 | | - if (err && p->thread.io_bitmap_ptr) { |
---|
366 | | - kfree(p->thread.io_bitmap_ptr); |
---|
367 | | - p->thread.io_bitmap_max = 0; |
---|
368 | 401 | } |
---|
369 | 402 | |
---|
370 | | - return err; |
---|
| 403 | + return base; |
---|
| 404 | +} |
---|
| 405 | + |
---|
| 406 | +unsigned long x86_gsbase_read_cpu_inactive(void) |
---|
| 407 | +{ |
---|
| 408 | + unsigned long gsbase; |
---|
| 409 | + |
---|
| 410 | + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { |
---|
| 411 | + unsigned long flags; |
---|
| 412 | + |
---|
| 413 | + local_irq_save(flags); |
---|
| 414 | + gsbase = __rdgsbase_inactive(); |
---|
| 415 | + local_irq_restore(flags); |
---|
| 416 | + } else { |
---|
| 417 | + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); |
---|
| 418 | + } |
---|
| 419 | + |
---|
| 420 | + return gsbase; |
---|
| 421 | +} |
---|
| 422 | + |
---|
| 423 | +void x86_gsbase_write_cpu_inactive(unsigned long gsbase) |
---|
| 424 | +{ |
---|
| 425 | + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { |
---|
| 426 | + unsigned long flags; |
---|
| 427 | + |
---|
| 428 | + local_irq_save(flags); |
---|
| 429 | + __wrgsbase_inactive(gsbase); |
---|
| 430 | + local_irq_restore(flags); |
---|
| 431 | + } else { |
---|
| 432 | + wrmsrl(MSR_KERNEL_GS_BASE, gsbase); |
---|
| 433 | + } |
---|
| 434 | +} |
---|
| 435 | + |
---|
| 436 | +unsigned long x86_fsbase_read_task(struct task_struct *task) |
---|
| 437 | +{ |
---|
| 438 | + unsigned long fsbase; |
---|
| 439 | + |
---|
| 440 | + if (task == current) |
---|
| 441 | + fsbase = x86_fsbase_read_cpu(); |
---|
| 442 | + else if (boot_cpu_has(X86_FEATURE_FSGSBASE) || |
---|
| 443 | + (task->thread.fsindex == 0)) |
---|
| 444 | + fsbase = task->thread.fsbase; |
---|
| 445 | + else |
---|
| 446 | + fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); |
---|
| 447 | + |
---|
| 448 | + return fsbase; |
---|
| 449 | +} |
---|
| 450 | + |
---|
| 451 | +unsigned long x86_gsbase_read_task(struct task_struct *task) |
---|
| 452 | +{ |
---|
| 453 | + unsigned long gsbase; |
---|
| 454 | + |
---|
| 455 | + if (task == current) |
---|
| 456 | + gsbase = x86_gsbase_read_cpu_inactive(); |
---|
| 457 | + else if (boot_cpu_has(X86_FEATURE_FSGSBASE) || |
---|
| 458 | + (task->thread.gsindex == 0)) |
---|
| 459 | + gsbase = task->thread.gsbase; |
---|
| 460 | + else |
---|
| 461 | + gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); |
---|
| 462 | + |
---|
| 463 | + return gsbase; |
---|
| 464 | +} |
---|
| 465 | + |
---|
| 466 | +void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase) |
---|
| 467 | +{ |
---|
| 468 | + WARN_ON_ONCE(task == current); |
---|
| 469 | + |
---|
| 470 | + task->thread.fsbase = fsbase; |
---|
| 471 | +} |
---|
| 472 | + |
---|
| 473 | +void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) |
---|
| 474 | +{ |
---|
| 475 | + WARN_ON_ONCE(task == current); |
---|
| 476 | + |
---|
| 477 | + task->thread.gsbase = gsbase; |
---|
371 | 478 | } |
---|
372 | 479 | |
---|
373 | 480 | static void |
---|
.. | .. |
---|
393 | 500 | regs->cs = _cs; |
---|
394 | 501 | regs->ss = _ss; |
---|
395 | 502 | regs->flags = X86_EFLAGS_IF; |
---|
396 | | - force_iret(); |
---|
397 | 503 | } |
---|
398 | 504 | |
---|
399 | 505 | void |
---|
.. | .. |
---|
429 | 535 | { |
---|
430 | 536 | struct thread_struct *prev = &prev_p->thread; |
---|
431 | 537 | struct thread_struct *next = &next_p->thread; |
---|
432 | | - struct fpu *prev_fpu = &prev->fpu; |
---|
433 | | - struct fpu *next_fpu = &next->fpu; |
---|
434 | 538 | int cpu = smp_processor_id(); |
---|
435 | 539 | |
---|
436 | 540 | WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && |
---|
437 | 541 | this_cpu_read(irq_count) != -1); |
---|
438 | 542 | |
---|
439 | | - switch_fpu_prepare(prev_fpu, cpu); |
---|
| 543 | + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) |
---|
| 544 | + switch_fpu_prepare(prev_p, cpu); |
---|
440 | 545 | |
---|
441 | 546 | /* We must save %fs and %gs before load_TLS() because |
---|
442 | 547 | * %fs and %gs may be cleared by load_TLS(). |
---|
.. | .. |
---|
454 | 559 | /* |
---|
455 | 560 | * Leave lazy mode, flushing any hypercalls made here. This |
---|
456 | 561 | * must be done after loading TLS entries in the GDT but before |
---|
457 | | - * loading segments that might reference them, and and it must |
---|
458 | | - * be done before fpu__restore(), so the TS bit is up to |
---|
459 | | - * date. |
---|
| 562 | + * loading segments that might reference them. |
---|
460 | 563 | */ |
---|
461 | 564 | arch_end_context_switch(next_p); |
---|
462 | 565 | |
---|
.. | .. |
---|
482 | 585 | if (unlikely(next->ds | prev->ds)) |
---|
483 | 586 | loadsegment(ds, next->ds); |
---|
484 | 587 | |
---|
485 | | - load_seg_legacy(prev->fsindex, prev->fsbase, |
---|
486 | | - next->fsindex, next->fsbase, FS); |
---|
487 | | - load_seg_legacy(prev->gsindex, prev->gsbase, |
---|
488 | | - next->gsindex, next->gsbase, GS); |
---|
489 | | - |
---|
490 | | - switch_fpu_finish(next_fpu, cpu); |
---|
| 588 | + x86_fsgsbase_load(prev, next); |
---|
491 | 589 | |
---|
492 | 590 | /* |
---|
493 | 591 | * Switch the PDA and FPU contexts. |
---|
.. | .. |
---|
495 | 593 | this_cpu_write(current_task, next_p); |
---|
496 | 594 | this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); |
---|
497 | 595 | |
---|
| 596 | + switch_fpu_finish(next_p); |
---|
| 597 | + |
---|
498 | 598 | /* Reload sp0. */ |
---|
499 | 599 | update_task_stack(next_p); |
---|
500 | 600 | |
---|
501 | 601 | switch_to_extra(prev_p, next_p); |
---|
502 | | - |
---|
503 | | -#ifdef CONFIG_XEN_PV |
---|
504 | | - /* |
---|
505 | | - * On Xen PV, IOPL bits in pt_regs->flags have no effect, and |
---|
506 | | - * current_pt_regs()->flags may not match the current task's |
---|
507 | | - * intended IOPL. We need to switch it manually. |
---|
508 | | - */ |
---|
509 | | - if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && |
---|
510 | | - prev->iopl != next->iopl)) |
---|
511 | | - xen_set_iopl_mask(next->iopl); |
---|
512 | | -#endif |
---|
513 | 602 | |
---|
514 | 603 | if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { |
---|
515 | 604 | /* |
---|
.. | .. |
---|
540 | 629 | } |
---|
541 | 630 | |
---|
542 | 631 | /* Load the Intel cache allocation PQR MSR. */ |
---|
543 | | - intel_rdt_sched_in(); |
---|
| 632 | + resctrl_sched_in(next_p); |
---|
544 | 633 | |
---|
545 | 634 | return prev_p; |
---|
546 | 635 | } |
---|
.. | .. |
---|
564 | 653 | /* TBD: overwrites user setup. Should have two bits. |
---|
565 | 654 | But 64bit processes have always behaved this way, |
---|
566 | 655 | so it's not too bad. The main problem is just that |
---|
567 | | - 32bit childs are affected again. */ |
---|
| 656 | + 32bit children are affected again. */ |
---|
568 | 657 | current->personality &= ~READ_IMPLIES_EXEC; |
---|
569 | 658 | } |
---|
570 | 659 | |
---|
.. | .. |
---|
577 | 666 | current->mm->context.ia32_compat = TIF_X32; |
---|
578 | 667 | current->personality &= ~READ_IMPLIES_EXEC; |
---|
579 | 668 | /* |
---|
580 | | - * in_compat_syscall() uses the presence of the x32 syscall bit |
---|
| 669 | + * in_32bit_syscall() uses the presence of the x32 syscall bit |
---|
581 | 670 | * flag to determine compat status. The x86 mmap() code relies on |
---|
582 | 671 | * the syscall bitness so set x32 syscall bit right here to make |
---|
583 | | - * in_compat_syscall() work during exec(). |
---|
| 672 | + * in_32bit_syscall() work during exec(). |
---|
584 | 673 | * |
---|
585 | 674 | * Pretend to come from a x32 execve. |
---|
586 | 675 | */ |
---|
.. | .. |
---|
631 | 720 | long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) |
---|
632 | 721 | { |
---|
633 | 722 | int ret = 0; |
---|
634 | | - int doit = task == current; |
---|
635 | | - int cpu; |
---|
636 | 723 | |
---|
637 | 724 | switch (option) { |
---|
638 | | - case ARCH_SET_GS: |
---|
639 | | - if (arg2 >= TASK_SIZE_MAX) |
---|
| 725 | + case ARCH_SET_GS: { |
---|
| 726 | + if (unlikely(arg2 >= TASK_SIZE_MAX)) |
---|
640 | 727 | return -EPERM; |
---|
641 | | - cpu = get_cpu(); |
---|
642 | | - task->thread.gsindex = 0; |
---|
643 | | - task->thread.gsbase = arg2; |
---|
644 | | - if (doit) { |
---|
645 | | - load_gs_index(0); |
---|
646 | | - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); |
---|
647 | | - } |
---|
648 | | - put_cpu(); |
---|
649 | | - break; |
---|
650 | | - case ARCH_SET_FS: |
---|
651 | | - /* Not strictly needed for fs, but do it for symmetry |
---|
652 | | - with gs */ |
---|
653 | | - if (arg2 >= TASK_SIZE_MAX) |
---|
654 | | - return -EPERM; |
---|
655 | | - cpu = get_cpu(); |
---|
656 | | - task->thread.fsindex = 0; |
---|
657 | | - task->thread.fsbase = arg2; |
---|
658 | | - if (doit) { |
---|
659 | | - /* set the selector to 0 to not confuse __switch_to */ |
---|
660 | | - loadsegment(fs, 0); |
---|
661 | | - ret = wrmsrl_safe(MSR_FS_BASE, arg2); |
---|
662 | | - } |
---|
663 | | - put_cpu(); |
---|
664 | | - break; |
---|
665 | | - case ARCH_GET_FS: { |
---|
666 | | - unsigned long base; |
---|
667 | 728 | |
---|
668 | | - if (doit) |
---|
669 | | - rdmsrl(MSR_FS_BASE, base); |
---|
670 | | - else |
---|
671 | | - base = task->thread.fsbase; |
---|
| 729 | + preempt_disable(); |
---|
| 730 | + /* |
---|
| 731 | + * ARCH_SET_GS has always overwritten the index |
---|
| 732 | + * and the base. Zero is the most sensible value |
---|
| 733 | + * to put in the index, and is the only value that |
---|
| 734 | + * makes any sense if FSGSBASE is unavailable. |
---|
| 735 | + */ |
---|
| 736 | + if (task == current) { |
---|
| 737 | + loadseg(GS, 0); |
---|
| 738 | + x86_gsbase_write_cpu_inactive(arg2); |
---|
| 739 | + |
---|
| 740 | + /* |
---|
| 741 | + * On non-FSGSBASE systems, save_base_legacy() expects |
---|
| 742 | + * that we also fill in thread.gsbase. |
---|
| 743 | + */ |
---|
| 744 | + task->thread.gsbase = arg2; |
---|
| 745 | + |
---|
| 746 | + } else { |
---|
| 747 | + task->thread.gsindex = 0; |
---|
| 748 | + x86_gsbase_write_task(task, arg2); |
---|
| 749 | + } |
---|
| 750 | + preempt_enable(); |
---|
| 751 | + break; |
---|
| 752 | + } |
---|
| 753 | + case ARCH_SET_FS: { |
---|
| 754 | + /* |
---|
| 755 | + * Not strictly needed for %fs, but do it for symmetry |
---|
| 756 | + * with %gs |
---|
| 757 | + */ |
---|
| 758 | + if (unlikely(arg2 >= TASK_SIZE_MAX)) |
---|
| 759 | + return -EPERM; |
---|
| 760 | + |
---|
| 761 | + preempt_disable(); |
---|
| 762 | + /* |
---|
| 763 | + * Set the selector to 0 for the same reason |
---|
| 764 | + * as %gs above. |
---|
| 765 | + */ |
---|
| 766 | + if (task == current) { |
---|
| 767 | + loadseg(FS, 0); |
---|
| 768 | + x86_fsbase_write_cpu(arg2); |
---|
| 769 | + |
---|
| 770 | + /* |
---|
| 771 | + * On non-FSGSBASE systems, save_base_legacy() expects |
---|
| 772 | + * that we also fill in thread.fsbase. |
---|
| 773 | + */ |
---|
| 774 | + task->thread.fsbase = arg2; |
---|
| 775 | + } else { |
---|
| 776 | + task->thread.fsindex = 0; |
---|
| 777 | + x86_fsbase_write_task(task, arg2); |
---|
| 778 | + } |
---|
| 779 | + preempt_enable(); |
---|
| 780 | + break; |
---|
| 781 | + } |
---|
| 782 | + case ARCH_GET_FS: { |
---|
| 783 | + unsigned long base = x86_fsbase_read_task(task); |
---|
| 784 | + |
---|
672 | 785 | ret = put_user(base, (unsigned long __user *)arg2); |
---|
673 | 786 | break; |
---|
674 | 787 | } |
---|
675 | 788 | case ARCH_GET_GS: { |
---|
676 | | - unsigned long base; |
---|
| 789 | + unsigned long base = x86_gsbase_read_task(task); |
---|
677 | 790 | |
---|
678 | | - if (doit) |
---|
679 | | - rdmsrl(MSR_KERNEL_GS_BASE, base); |
---|
680 | | - else |
---|
681 | | - base = task->thread.gsbase; |
---|
682 | 791 | ret = put_user(base, (unsigned long __user *)arg2); |
---|
683 | 792 | break; |
---|
684 | 793 | } |
---|