| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 1995 Linus Torvalds |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 39 | 40 | #include <linux/ftrace.h> |
|---|
| 40 | 41 | #include <linux/syscalls.h> |
|---|
| 41 | 42 | |
|---|
| 42 | | -#include <asm/pgtable.h> |
|---|
| 43 | 43 | #include <asm/processor.h> |
|---|
| 44 | 44 | #include <asm/fpu/internal.h> |
|---|
| 45 | 45 | #include <asm/mmu_context.h> |
|---|
| .. | .. |
|---|
| 47 | 47 | #include <asm/desc.h> |
|---|
| 48 | 48 | #include <asm/proto.h> |
|---|
| 49 | 49 | #include <asm/ia32.h> |
|---|
| 50 | | -#include <asm/syscalls.h> |
|---|
| 51 | 50 | #include <asm/debugreg.h> |
|---|
| 52 | 51 | #include <asm/switch_to.h> |
|---|
| 53 | 52 | #include <asm/xen/hypervisor.h> |
|---|
| 54 | 53 | #include <asm/vdso.h> |
|---|
| 55 | | -#include <asm/intel_rdt_sched.h> |
|---|
| 54 | +#include <asm/resctrl.h> |
|---|
| 56 | 55 | #include <asm/unistd.h> |
|---|
| 56 | +#include <asm/fsgsbase.h> |
|---|
| 57 | 57 | #ifdef CONFIG_IA32_EMULATION |
|---|
| 58 | 58 | /* Not included via unistd.h */ |
|---|
| 59 | 59 | #include <asm/unistd_32_ia32.h> |
|---|
| .. | .. |
|---|
| 61 | 61 | |
|---|
| 62 | 62 | #include "process.h" |
|---|
| 63 | 63 | |
|---|
| 64 | | -__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); |
|---|
| 65 | | - |
|---|
| 66 | 64 | /* Prints also some state that isn't saved in the pt_regs */ |
|---|
| 67 | | -void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) |
|---|
| 65 | +void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, |
|---|
| 66 | + const char *log_lvl) |
|---|
| 68 | 67 | { |
|---|
| 69 | 68 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
|---|
| 70 | 69 | unsigned long d0, d1, d2, d3, d6, d7; |
|---|
| 71 | 70 | unsigned int fsindex, gsindex; |
|---|
| 72 | | - unsigned int ds, cs, es; |
|---|
| 71 | + unsigned int ds, es; |
|---|
| 73 | 72 | |
|---|
| 74 | | - show_iret_regs(regs); |
|---|
| 73 | + show_iret_regs(regs, log_lvl); |
|---|
| 75 | 74 | |
|---|
| 76 | 75 | if (regs->orig_ax != -1) |
|---|
| 77 | 76 | pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); |
|---|
| 78 | 77 | else |
|---|
| 79 | 78 | pr_cont("\n"); |
|---|
| 80 | 79 | |
|---|
| 81 | | - printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
|---|
| 82 | | - regs->ax, regs->bx, regs->cx); |
|---|
| 83 | | - printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
|---|
| 84 | | - regs->dx, regs->si, regs->di); |
|---|
| 85 | | - printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", |
|---|
| 86 | | - regs->bp, regs->r8, regs->r9); |
|---|
| 87 | | - printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", |
|---|
| 88 | | - regs->r10, regs->r11, regs->r12); |
|---|
| 89 | | - printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", |
|---|
| 90 | | - regs->r13, regs->r14, regs->r15); |
|---|
| 80 | + printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n", |
|---|
| 81 | + log_lvl, regs->ax, regs->bx, regs->cx); |
|---|
| 82 | + printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n", |
|---|
| 83 | + log_lvl, regs->dx, regs->si, regs->di); |
|---|
| 84 | + printk("%sRBP: %016lx R08: %016lx R09: %016lx\n", |
|---|
| 85 | + log_lvl, regs->bp, regs->r8, regs->r9); |
|---|
| 86 | + printk("%sR10: %016lx R11: %016lx R12: %016lx\n", |
|---|
| 87 | + log_lvl, regs->r10, regs->r11, regs->r12); |
|---|
| 88 | + printk("%sR13: %016lx R14: %016lx R15: %016lx\n", |
|---|
| 89 | + log_lvl, regs->r13, regs->r14, regs->r15); |
|---|
| 91 | 90 | |
|---|
| 92 | 91 | if (mode == SHOW_REGS_SHORT) |
|---|
| 93 | 92 | return; |
|---|
| .. | .. |
|---|
| 95 | 94 | if (mode == SHOW_REGS_USER) { |
|---|
| 96 | 95 | rdmsrl(MSR_FS_BASE, fs); |
|---|
| 97 | 96 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); |
|---|
| 98 | | - printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n", |
|---|
| 99 | | - fs, shadowgs); |
|---|
| 97 | + printk("%sFS: %016lx GS: %016lx\n", |
|---|
| 98 | + log_lvl, fs, shadowgs); |
|---|
| 100 | 99 | return; |
|---|
| 101 | 100 | } |
|---|
| 102 | 101 | |
|---|
| 103 | 102 | asm("movl %%ds,%0" : "=r" (ds)); |
|---|
| 104 | | - asm("movl %%cs,%0" : "=r" (cs)); |
|---|
| 105 | 103 | asm("movl %%es,%0" : "=r" (es)); |
|---|
| 106 | 104 | asm("movl %%fs,%0" : "=r" (fsindex)); |
|---|
| 107 | 105 | asm("movl %%gs,%0" : "=r" (gsindex)); |
|---|
| .. | .. |
|---|
| 115 | 113 | cr3 = __read_cr3(); |
|---|
| 116 | 114 | cr4 = __read_cr4(); |
|---|
| 117 | 115 | |
|---|
| 118 | | - printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
|---|
| 119 | | - fs, fsindex, gs, gsindex, shadowgs); |
|---|
| 120 | | - printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
|---|
| 121 | | - es, cr0); |
|---|
| 122 | | - printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, |
|---|
| 123 | | - cr4); |
|---|
| 116 | + printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
|---|
| 117 | + log_lvl, fs, fsindex, gs, gsindex, shadowgs); |
|---|
| 118 | + printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n", |
|---|
| 119 | + log_lvl, regs->cs, ds, es, cr0); |
|---|
| 120 | + printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n", |
|---|
| 121 | + log_lvl, cr2, cr3, cr4); |
|---|
| 124 | 122 | |
|---|
| 125 | 123 | get_debugreg(d0, 0); |
|---|
| 126 | 124 | get_debugreg(d1, 1); |
|---|
| .. | .. |
|---|
| 132 | 130 | /* Only print out debug registers if they are in their non-default state. */ |
|---|
| 133 | 131 | if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && |
|---|
| 134 | 132 | (d6 == DR6_RESERVED) && (d7 == 0x400))) { |
|---|
| 135 | | - printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", |
|---|
| 136 | | - d0, d1, d2); |
|---|
| 137 | | - printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", |
|---|
| 138 | | - d3, d6, d7); |
|---|
| 133 | + printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", |
|---|
| 134 | + log_lvl, d0, d1, d2); |
|---|
| 135 | + printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", |
|---|
| 136 | + log_lvl, d3, d6, d7); |
|---|
| 139 | 137 | } |
|---|
| 140 | 138 | |
|---|
| 141 | 139 | if (boot_cpu_has(X86_FEATURE_OSPKE)) |
|---|
| 142 | | - printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru()); |
|---|
| 140 | + printk("%sPKRU: %08x\n", log_lvl, read_pkru()); |
|---|
| 143 | 141 | } |
|---|
| 144 | 142 | |
|---|
| 145 | 143 | void release_thread(struct task_struct *dead_task) |
|---|
| 146 | 144 | { |
|---|
| 147 | | - if (dead_task->mm) { |
|---|
| 148 | | -#ifdef CONFIG_MODIFY_LDT_SYSCALL |
|---|
| 149 | | - if (dead_task->mm->context.ldt) { |
|---|
| 150 | | - pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", |
|---|
| 151 | | - dead_task->comm, |
|---|
| 152 | | - dead_task->mm->context.ldt->entries, |
|---|
| 153 | | - dead_task->mm->context.ldt->nr_entries); |
|---|
| 154 | | - BUG(); |
|---|
| 155 | | - } |
|---|
| 156 | | -#endif |
|---|
| 157 | | - } |
|---|
| 145 | + WARN_ON(dead_task->mm); |
|---|
| 158 | 146 | } |
|---|
| 159 | 147 | |
|---|
| 160 | 148 | enum which_selector { |
|---|
| 161 | 149 | FS, |
|---|
| 162 | 150 | GS |
|---|
| 163 | 151 | }; |
|---|
| 152 | + |
|---|
| 153 | +/* |
|---|
| 154 | + * Out of line to be protected from kprobes and tracing. If this would be |
|---|
| 155 | + * traced or probed than any access to a per CPU variable happens with |
|---|
| 156 | + * the wrong GS. |
|---|
| 157 | + * |
|---|
| 158 | + * It is not used on Xen paravirt. When paravirt support is needed, it |
|---|
| 159 | + * needs to be renamed with native_ prefix. |
|---|
| 160 | + */ |
|---|
| 161 | +static noinstr unsigned long __rdgsbase_inactive(void) |
|---|
| 162 | +{ |
|---|
| 163 | + unsigned long gsbase; |
|---|
| 164 | + |
|---|
| 165 | + lockdep_assert_irqs_disabled(); |
|---|
| 166 | + |
|---|
| 167 | + if (!static_cpu_has(X86_FEATURE_XENPV)) { |
|---|
| 168 | + native_swapgs(); |
|---|
| 169 | + gsbase = rdgsbase(); |
|---|
| 170 | + native_swapgs(); |
|---|
| 171 | + } else { |
|---|
| 172 | + instrumentation_begin(); |
|---|
| 173 | + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); |
|---|
| 174 | + instrumentation_end(); |
|---|
| 175 | + } |
|---|
| 176 | + |
|---|
| 177 | + return gsbase; |
|---|
| 178 | +} |
|---|
| 179 | + |
|---|
| 180 | +/* |
|---|
| 181 | + * Out of line to be protected from kprobes and tracing. If this would be |
|---|
| 182 | + * traced or probed than any access to a per CPU variable happens with |
|---|
| 183 | + * the wrong GS. |
|---|
| 184 | + * |
|---|
| 185 | + * It is not used on Xen paravirt. When paravirt support is needed, it |
|---|
| 186 | + * needs to be renamed with native_ prefix. |
|---|
| 187 | + */ |
|---|
| 188 | +static noinstr void __wrgsbase_inactive(unsigned long gsbase) |
|---|
| 189 | +{ |
|---|
| 190 | + lockdep_assert_irqs_disabled(); |
|---|
| 191 | + |
|---|
| 192 | + if (!static_cpu_has(X86_FEATURE_XENPV)) { |
|---|
| 193 | + native_swapgs(); |
|---|
| 194 | + wrgsbase(gsbase); |
|---|
| 195 | + native_swapgs(); |
|---|
| 196 | + } else { |
|---|
| 197 | + instrumentation_begin(); |
|---|
| 198 | + wrmsrl(MSR_KERNEL_GS_BASE, gsbase); |
|---|
| 199 | + instrumentation_end(); |
|---|
| 200 | + } |
|---|
| 201 | +} |
|---|
| 164 | 202 | |
|---|
| 165 | 203 | /* |
|---|
| 166 | 204 | * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are |
|---|
| .. | .. |
|---|
| 211 | 249 | { |
|---|
| 212 | 250 | savesegment(fs, task->thread.fsindex); |
|---|
| 213 | 251 | savesegment(gs, task->thread.gsindex); |
|---|
| 214 | | - save_base_legacy(task, task->thread.fsindex, FS); |
|---|
| 215 | | - save_base_legacy(task, task->thread.gsindex, GS); |
|---|
| 252 | + if (static_cpu_has(X86_FEATURE_FSGSBASE)) { |
|---|
| 253 | + /* |
|---|
| 254 | + * If FSGSBASE is enabled, we can't make any useful guesses |
|---|
| 255 | + * about the base, and user code expects us to save the current |
|---|
| 256 | + * value. Fortunately, reading the base directly is efficient. |
|---|
| 257 | + */ |
|---|
| 258 | + task->thread.fsbase = rdfsbase(); |
|---|
| 259 | + task->thread.gsbase = __rdgsbase_inactive(); |
|---|
| 260 | + } else { |
|---|
| 261 | + save_base_legacy(task, task->thread.fsindex, FS); |
|---|
| 262 | + save_base_legacy(task, task->thread.gsindex, GS); |
|---|
| 263 | + } |
|---|
| 216 | 264 | } |
|---|
| 217 | 265 | |
|---|
| 218 | | -#if IS_ENABLED(CONFIG_KVM) |
|---|
| 219 | 266 | /* |
|---|
| 220 | 267 | * While a process is running,current->thread.fsbase and current->thread.gsbase |
|---|
| 221 | | - * may not match the corresponding CPU registers (see save_base_legacy()). KVM |
|---|
| 222 | | - * wants an efficient way to save and restore FSBASE and GSBASE. |
|---|
| 223 | | - * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE. |
|---|
| 268 | + * may not match the corresponding CPU registers (see save_base_legacy()). |
|---|
| 224 | 269 | */ |
|---|
| 225 | | -void save_fsgs_for_kvm(void) |
|---|
| 270 | +void current_save_fsgs(void) |
|---|
| 226 | 271 | { |
|---|
| 272 | + unsigned long flags; |
|---|
| 273 | + |
|---|
| 274 | + /* Interrupts need to be off for FSGSBASE */ |
|---|
| 275 | + local_irq_save(flags); |
|---|
| 227 | 276 | save_fsgs(current); |
|---|
| 277 | + local_irq_restore(flags); |
|---|
| 228 | 278 | } |
|---|
| 229 | | -EXPORT_SYMBOL_GPL(save_fsgs_for_kvm); |
|---|
| 279 | +#if IS_ENABLED(CONFIG_KVM) |
|---|
| 280 | +EXPORT_SYMBOL_GPL(current_save_fsgs); |
|---|
| 230 | 281 | #endif |
|---|
| 231 | 282 | |
|---|
| 232 | 283 | static __always_inline void loadseg(enum which_selector which, |
|---|
| .. | .. |
|---|
| 288 | 339 | } |
|---|
| 289 | 340 | } |
|---|
| 290 | 341 | |
|---|
| 291 | | -int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
|---|
| 292 | | - unsigned long arg, struct task_struct *p, unsigned long tls) |
|---|
| 342 | +static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, |
|---|
| 343 | + struct thread_struct *next) |
|---|
| 293 | 344 | { |
|---|
| 294 | | - int err; |
|---|
| 295 | | - struct pt_regs *childregs; |
|---|
| 296 | | - struct fork_frame *fork_frame; |
|---|
| 297 | | - struct inactive_task_frame *frame; |
|---|
| 298 | | - struct task_struct *me = current; |
|---|
| 345 | + if (static_cpu_has(X86_FEATURE_FSGSBASE)) { |
|---|
| 346 | + /* Update the FS and GS selectors if they could have changed. */ |
|---|
| 347 | + if (unlikely(prev->fsindex || next->fsindex)) |
|---|
| 348 | + loadseg(FS, next->fsindex); |
|---|
| 349 | + if (unlikely(prev->gsindex || next->gsindex)) |
|---|
| 350 | + loadseg(GS, next->gsindex); |
|---|
| 299 | 351 | |
|---|
| 300 | | - childregs = task_pt_regs(p); |
|---|
| 301 | | - fork_frame = container_of(childregs, struct fork_frame, regs); |
|---|
| 302 | | - frame = &fork_frame->frame; |
|---|
| 303 | | - |
|---|
| 304 | | - /* |
|---|
| 305 | | - * For a new task use the RESET flags value since there is no before. |
|---|
| 306 | | - * All the status flags are zero; DF and all the system flags must also |
|---|
| 307 | | - * be 0, specifically IF must be 0 because we context switch to the new |
|---|
| 308 | | - * task with interrupts disabled. |
|---|
| 309 | | - */ |
|---|
| 310 | | - frame->flags = X86_EFLAGS_FIXED; |
|---|
| 311 | | - frame->bp = 0; |
|---|
| 312 | | - frame->ret_addr = (unsigned long) ret_from_fork; |
|---|
| 313 | | - p->thread.sp = (unsigned long) fork_frame; |
|---|
| 314 | | - p->thread.io_bitmap_ptr = NULL; |
|---|
| 315 | | - |
|---|
| 316 | | - savesegment(gs, p->thread.gsindex); |
|---|
| 317 | | - p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase; |
|---|
| 318 | | - savesegment(fs, p->thread.fsindex); |
|---|
| 319 | | - p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase; |
|---|
| 320 | | - savesegment(es, p->thread.es); |
|---|
| 321 | | - savesegment(ds, p->thread.ds); |
|---|
| 322 | | - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
|---|
| 323 | | - |
|---|
| 324 | | - if (unlikely(p->flags & PF_KTHREAD)) { |
|---|
| 325 | | - /* kernel thread */ |
|---|
| 326 | | - memset(childregs, 0, sizeof(struct pt_regs)); |
|---|
| 327 | | - frame->bx = sp; /* function */ |
|---|
| 328 | | - frame->r12 = arg; |
|---|
| 329 | | - return 0; |
|---|
| 352 | + /* Update the bases. */ |
|---|
| 353 | + wrfsbase(next->fsbase); |
|---|
| 354 | + __wrgsbase_inactive(next->gsbase); |
|---|
| 355 | + } else { |
|---|
| 356 | + load_seg_legacy(prev->fsindex, prev->fsbase, |
|---|
| 357 | + next->fsindex, next->fsbase, FS); |
|---|
| 358 | + load_seg_legacy(prev->gsindex, prev->gsbase, |
|---|
| 359 | + next->gsindex, next->gsbase, GS); |
|---|
| 330 | 360 | } |
|---|
| 331 | | - frame->bx = 0; |
|---|
| 332 | | - *childregs = *current_pt_regs(); |
|---|
| 361 | +} |
|---|
| 333 | 362 | |
|---|
| 334 | | - childregs->ax = 0; |
|---|
| 335 | | - if (sp) |
|---|
| 336 | | - childregs->sp = sp; |
|---|
| 363 | +unsigned long x86_fsgsbase_read_task(struct task_struct *task, |
|---|
| 364 | + unsigned short selector) |
|---|
| 365 | +{ |
|---|
| 366 | + unsigned short idx = selector >> 3; |
|---|
| 367 | + unsigned long base; |
|---|
| 337 | 368 | |
|---|
| 338 | | - err = -ENOMEM; |
|---|
| 339 | | - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
|---|
| 340 | | - p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr, |
|---|
| 341 | | - IO_BITMAP_BYTES, GFP_KERNEL); |
|---|
| 342 | | - if (!p->thread.io_bitmap_ptr) { |
|---|
| 343 | | - p->thread.io_bitmap_max = 0; |
|---|
| 344 | | - return -ENOMEM; |
|---|
| 345 | | - } |
|---|
| 346 | | - set_tsk_thread_flag(p, TIF_IO_BITMAP); |
|---|
| 347 | | - } |
|---|
| 369 | + if (likely((selector & SEGMENT_TI_MASK) == 0)) { |
|---|
| 370 | + if (unlikely(idx >= GDT_ENTRIES)) |
|---|
| 371 | + return 0; |
|---|
| 348 | 372 | |
|---|
| 349 | | - /* |
|---|
| 350 | | - * Set a new TLS for the child thread? |
|---|
| 351 | | - */ |
|---|
| 352 | | - if (clone_flags & CLONE_SETTLS) { |
|---|
| 353 | | -#ifdef CONFIG_IA32_EMULATION |
|---|
| 354 | | - if (in_ia32_syscall()) |
|---|
| 355 | | - err = do_set_thread_area(p, -1, |
|---|
| 356 | | - (struct user_desc __user *)tls, 0); |
|---|
| 373 | + /* |
|---|
| 374 | + * There are no user segments in the GDT with nonzero bases |
|---|
| 375 | + * other than the TLS segments. |
|---|
| 376 | + */ |
|---|
| 377 | + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) |
|---|
| 378 | + return 0; |
|---|
| 379 | + |
|---|
| 380 | + idx -= GDT_ENTRY_TLS_MIN; |
|---|
| 381 | + base = get_desc_base(&task->thread.tls_array[idx]); |
|---|
| 382 | + } else { |
|---|
| 383 | +#ifdef CONFIG_MODIFY_LDT_SYSCALL |
|---|
| 384 | + struct ldt_struct *ldt; |
|---|
| 385 | + |
|---|
| 386 | + /* |
|---|
| 387 | + * If performance here mattered, we could protect the LDT |
|---|
| 388 | + * with RCU. This is a slow path, though, so we can just |
|---|
| 389 | + * take the mutex. |
|---|
| 390 | + */ |
|---|
| 391 | + mutex_lock(&task->mm->context.lock); |
|---|
| 392 | + ldt = task->mm->context.ldt; |
|---|
| 393 | + if (unlikely(!ldt || idx >= ldt->nr_entries)) |
|---|
| 394 | + base = 0; |
|---|
| 357 | 395 | else |
|---|
| 396 | + base = get_desc_base(ldt->entries + idx); |
|---|
| 397 | + mutex_unlock(&task->mm->context.lock); |
|---|
| 398 | +#else |
|---|
| 399 | + base = 0; |
|---|
| 358 | 400 | #endif |
|---|
| 359 | | - err = do_arch_prctl_64(p, ARCH_SET_FS, tls); |
|---|
| 360 | | - if (err) |
|---|
| 361 | | - goto out; |
|---|
| 362 | | - } |
|---|
| 363 | | - err = 0; |
|---|
| 364 | | -out: |
|---|
| 365 | | - if (err && p->thread.io_bitmap_ptr) { |
|---|
| 366 | | - kfree(p->thread.io_bitmap_ptr); |
|---|
| 367 | | - p->thread.io_bitmap_max = 0; |
|---|
| 368 | 401 | } |
|---|
| 369 | 402 | |
|---|
| 370 | | - return err; |
|---|
| 403 | + return base; |
|---|
| 404 | +} |
|---|
| 405 | + |
|---|
| 406 | +unsigned long x86_gsbase_read_cpu_inactive(void) |
|---|
| 407 | +{ |
|---|
| 408 | + unsigned long gsbase; |
|---|
| 409 | + |
|---|
| 410 | + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { |
|---|
| 411 | + unsigned long flags; |
|---|
| 412 | + |
|---|
| 413 | + local_irq_save(flags); |
|---|
| 414 | + gsbase = __rdgsbase_inactive(); |
|---|
| 415 | + local_irq_restore(flags); |
|---|
| 416 | + } else { |
|---|
| 417 | + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); |
|---|
| 418 | + } |
|---|
| 419 | + |
|---|
| 420 | + return gsbase; |
|---|
| 421 | +} |
|---|
| 422 | + |
|---|
| 423 | +void x86_gsbase_write_cpu_inactive(unsigned long gsbase) |
|---|
| 424 | +{ |
|---|
| 425 | + if (boot_cpu_has(X86_FEATURE_FSGSBASE)) { |
|---|
| 426 | + unsigned long flags; |
|---|
| 427 | + |
|---|
| 428 | + local_irq_save(flags); |
|---|
| 429 | + __wrgsbase_inactive(gsbase); |
|---|
| 430 | + local_irq_restore(flags); |
|---|
| 431 | + } else { |
|---|
| 432 | + wrmsrl(MSR_KERNEL_GS_BASE, gsbase); |
|---|
| 433 | + } |
|---|
| 434 | +} |
|---|
| 435 | + |
|---|
| 436 | +unsigned long x86_fsbase_read_task(struct task_struct *task) |
|---|
| 437 | +{ |
|---|
| 438 | + unsigned long fsbase; |
|---|
| 439 | + |
|---|
| 440 | + if (task == current) |
|---|
| 441 | + fsbase = x86_fsbase_read_cpu(); |
|---|
| 442 | + else if (boot_cpu_has(X86_FEATURE_FSGSBASE) || |
|---|
| 443 | + (task->thread.fsindex == 0)) |
|---|
| 444 | + fsbase = task->thread.fsbase; |
|---|
| 445 | + else |
|---|
| 446 | + fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); |
|---|
| 447 | + |
|---|
| 448 | + return fsbase; |
|---|
| 449 | +} |
|---|
| 450 | + |
|---|
| 451 | +unsigned long x86_gsbase_read_task(struct task_struct *task) |
|---|
| 452 | +{ |
|---|
| 453 | + unsigned long gsbase; |
|---|
| 454 | + |
|---|
| 455 | + if (task == current) |
|---|
| 456 | + gsbase = x86_gsbase_read_cpu_inactive(); |
|---|
| 457 | + else if (boot_cpu_has(X86_FEATURE_FSGSBASE) || |
|---|
| 458 | + (task->thread.gsindex == 0)) |
|---|
| 459 | + gsbase = task->thread.gsbase; |
|---|
| 460 | + else |
|---|
| 461 | + gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); |
|---|
| 462 | + |
|---|
| 463 | + return gsbase; |
|---|
| 464 | +} |
|---|
| 465 | + |
|---|
| 466 | +void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase) |
|---|
| 467 | +{ |
|---|
| 468 | + WARN_ON_ONCE(task == current); |
|---|
| 469 | + |
|---|
| 470 | + task->thread.fsbase = fsbase; |
|---|
| 471 | +} |
|---|
| 472 | + |
|---|
| 473 | +void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) |
|---|
| 474 | +{ |
|---|
| 475 | + WARN_ON_ONCE(task == current); |
|---|
| 476 | + |
|---|
| 477 | + task->thread.gsbase = gsbase; |
|---|
| 371 | 478 | } |
|---|
| 372 | 479 | |
|---|
| 373 | 480 | static void |
|---|
| .. | .. |
|---|
| 393 | 500 | regs->cs = _cs; |
|---|
| 394 | 501 | regs->ss = _ss; |
|---|
| 395 | 502 | regs->flags = X86_EFLAGS_IF; |
|---|
| 396 | | - force_iret(); |
|---|
| 397 | 503 | } |
|---|
| 398 | 504 | |
|---|
| 399 | 505 | void |
|---|
| .. | .. |
|---|
| 429 | 535 | { |
|---|
| 430 | 536 | struct thread_struct *prev = &prev_p->thread; |
|---|
| 431 | 537 | struct thread_struct *next = &next_p->thread; |
|---|
| 432 | | - struct fpu *prev_fpu = &prev->fpu; |
|---|
| 433 | | - struct fpu *next_fpu = &next->fpu; |
|---|
| 434 | 538 | int cpu = smp_processor_id(); |
|---|
| 435 | 539 | |
|---|
| 436 | 540 | WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && |
|---|
| 437 | 541 | this_cpu_read(irq_count) != -1); |
|---|
| 438 | 542 | |
|---|
| 439 | | - switch_fpu_prepare(prev_fpu, cpu); |
|---|
| 543 | + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) |
|---|
| 544 | + switch_fpu_prepare(prev_p, cpu); |
|---|
| 440 | 545 | |
|---|
| 441 | 546 | /* We must save %fs and %gs before load_TLS() because |
|---|
| 442 | 547 | * %fs and %gs may be cleared by load_TLS(). |
|---|
| .. | .. |
|---|
| 454 | 559 | /* |
|---|
| 455 | 560 | * Leave lazy mode, flushing any hypercalls made here. This |
|---|
| 456 | 561 | * must be done after loading TLS entries in the GDT but before |
|---|
| 457 | | - * loading segments that might reference them, and and it must |
|---|
| 458 | | - * be done before fpu__restore(), so the TS bit is up to |
|---|
| 459 | | - * date. |
|---|
| 562 | + * loading segments that might reference them. |
|---|
| 460 | 563 | */ |
|---|
| 461 | 564 | arch_end_context_switch(next_p); |
|---|
| 462 | 565 | |
|---|
| .. | .. |
|---|
| 482 | 585 | if (unlikely(next->ds | prev->ds)) |
|---|
| 483 | 586 | loadsegment(ds, next->ds); |
|---|
| 484 | 587 | |
|---|
| 485 | | - load_seg_legacy(prev->fsindex, prev->fsbase, |
|---|
| 486 | | - next->fsindex, next->fsbase, FS); |
|---|
| 487 | | - load_seg_legacy(prev->gsindex, prev->gsbase, |
|---|
| 488 | | - next->gsindex, next->gsbase, GS); |
|---|
| 489 | | - |
|---|
| 490 | | - switch_fpu_finish(next_fpu, cpu); |
|---|
| 588 | + x86_fsgsbase_load(prev, next); |
|---|
| 491 | 589 | |
|---|
| 492 | 590 | /* |
|---|
| 493 | 591 | * Switch the PDA and FPU contexts. |
|---|
| .. | .. |
|---|
| 495 | 593 | this_cpu_write(current_task, next_p); |
|---|
| 496 | 594 | this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); |
|---|
| 497 | 595 | |
|---|
| 596 | + switch_fpu_finish(next_p); |
|---|
| 597 | + |
|---|
| 498 | 598 | /* Reload sp0. */ |
|---|
| 499 | 599 | update_task_stack(next_p); |
|---|
| 500 | 600 | |
|---|
| 501 | 601 | switch_to_extra(prev_p, next_p); |
|---|
| 502 | | - |
|---|
| 503 | | -#ifdef CONFIG_XEN_PV |
|---|
| 504 | | - /* |
|---|
| 505 | | - * On Xen PV, IOPL bits in pt_regs->flags have no effect, and |
|---|
| 506 | | - * current_pt_regs()->flags may not match the current task's |
|---|
| 507 | | - * intended IOPL. We need to switch it manually. |
|---|
| 508 | | - */ |
|---|
| 509 | | - if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && |
|---|
| 510 | | - prev->iopl != next->iopl)) |
|---|
| 511 | | - xen_set_iopl_mask(next->iopl); |
|---|
| 512 | | -#endif |
|---|
| 513 | 602 | |
|---|
| 514 | 603 | if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { |
|---|
| 515 | 604 | /* |
|---|
| .. | .. |
|---|
| 540 | 629 | } |
|---|
| 541 | 630 | |
|---|
| 542 | 631 | /* Load the Intel cache allocation PQR MSR. */ |
|---|
| 543 | | - intel_rdt_sched_in(); |
|---|
| 632 | + resctrl_sched_in(next_p); |
|---|
| 544 | 633 | |
|---|
| 545 | 634 | return prev_p; |
|---|
| 546 | 635 | } |
|---|
| .. | .. |
|---|
| 564 | 653 | /* TBD: overwrites user setup. Should have two bits. |
|---|
| 565 | 654 | But 64bit processes have always behaved this way, |
|---|
| 566 | 655 | so it's not too bad. The main problem is just that |
|---|
| 567 | | - 32bit childs are affected again. */ |
|---|
| 656 | + 32bit children are affected again. */ |
|---|
| 568 | 657 | current->personality &= ~READ_IMPLIES_EXEC; |
|---|
| 569 | 658 | } |
|---|
| 570 | 659 | |
|---|
| .. | .. |
|---|
| 577 | 666 | current->mm->context.ia32_compat = TIF_X32; |
|---|
| 578 | 667 | current->personality &= ~READ_IMPLIES_EXEC; |
|---|
| 579 | 668 | /* |
|---|
| 580 | | - * in_compat_syscall() uses the presence of the x32 syscall bit |
|---|
| 669 | + * in_32bit_syscall() uses the presence of the x32 syscall bit |
|---|
| 581 | 670 | * flag to determine compat status. The x86 mmap() code relies on |
|---|
| 582 | 671 | * the syscall bitness so set x32 syscall bit right here to make |
|---|
| 583 | | - * in_compat_syscall() work during exec(). |
|---|
| 672 | + * in_32bit_syscall() work during exec(). |
|---|
| 584 | 673 | * |
|---|
| 585 | 674 | * Pretend to come from a x32 execve. |
|---|
| 586 | 675 | */ |
|---|
| .. | .. |
|---|
| 631 | 720 | long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) |
|---|
| 632 | 721 | { |
|---|
| 633 | 722 | int ret = 0; |
|---|
| 634 | | - int doit = task == current; |
|---|
| 635 | | - int cpu; |
|---|
| 636 | 723 | |
|---|
| 637 | 724 | switch (option) { |
|---|
| 638 | | - case ARCH_SET_GS: |
|---|
| 639 | | - if (arg2 >= TASK_SIZE_MAX) |
|---|
| 725 | + case ARCH_SET_GS: { |
|---|
| 726 | + if (unlikely(arg2 >= TASK_SIZE_MAX)) |
|---|
| 640 | 727 | return -EPERM; |
|---|
| 641 | | - cpu = get_cpu(); |
|---|
| 642 | | - task->thread.gsindex = 0; |
|---|
| 643 | | - task->thread.gsbase = arg2; |
|---|
| 644 | | - if (doit) { |
|---|
| 645 | | - load_gs_index(0); |
|---|
| 646 | | - ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); |
|---|
| 647 | | - } |
|---|
| 648 | | - put_cpu(); |
|---|
| 649 | | - break; |
|---|
| 650 | | - case ARCH_SET_FS: |
|---|
| 651 | | - /* Not strictly needed for fs, but do it for symmetry |
|---|
| 652 | | - with gs */ |
|---|
| 653 | | - if (arg2 >= TASK_SIZE_MAX) |
|---|
| 654 | | - return -EPERM; |
|---|
| 655 | | - cpu = get_cpu(); |
|---|
| 656 | | - task->thread.fsindex = 0; |
|---|
| 657 | | - task->thread.fsbase = arg2; |
|---|
| 658 | | - if (doit) { |
|---|
| 659 | | - /* set the selector to 0 to not confuse __switch_to */ |
|---|
| 660 | | - loadsegment(fs, 0); |
|---|
| 661 | | - ret = wrmsrl_safe(MSR_FS_BASE, arg2); |
|---|
| 662 | | - } |
|---|
| 663 | | - put_cpu(); |
|---|
| 664 | | - break; |
|---|
| 665 | | - case ARCH_GET_FS: { |
|---|
| 666 | | - unsigned long base; |
|---|
| 667 | 728 | |
|---|
| 668 | | - if (doit) |
|---|
| 669 | | - rdmsrl(MSR_FS_BASE, base); |
|---|
| 670 | | - else |
|---|
| 671 | | - base = task->thread.fsbase; |
|---|
| 729 | + preempt_disable(); |
|---|
| 730 | + /* |
|---|
| 731 | + * ARCH_SET_GS has always overwritten the index |
|---|
| 732 | + * and the base. Zero is the most sensible value |
|---|
| 733 | + * to put in the index, and is the only value that |
|---|
| 734 | + * makes any sense if FSGSBASE is unavailable. |
|---|
| 735 | + */ |
|---|
| 736 | + if (task == current) { |
|---|
| 737 | + loadseg(GS, 0); |
|---|
| 738 | + x86_gsbase_write_cpu_inactive(arg2); |
|---|
| 739 | + |
|---|
| 740 | + /* |
|---|
| 741 | + * On non-FSGSBASE systems, save_base_legacy() expects |
|---|
| 742 | + * that we also fill in thread.gsbase. |
|---|
| 743 | + */ |
|---|
| 744 | + task->thread.gsbase = arg2; |
|---|
| 745 | + |
|---|
| 746 | + } else { |
|---|
| 747 | + task->thread.gsindex = 0; |
|---|
| 748 | + x86_gsbase_write_task(task, arg2); |
|---|
| 749 | + } |
|---|
| 750 | + preempt_enable(); |
|---|
| 751 | + break; |
|---|
| 752 | + } |
|---|
| 753 | + case ARCH_SET_FS: { |
|---|
| 754 | + /* |
|---|
| 755 | + * Not strictly needed for %fs, but do it for symmetry |
|---|
| 756 | + * with %gs |
|---|
| 757 | + */ |
|---|
| 758 | + if (unlikely(arg2 >= TASK_SIZE_MAX)) |
|---|
| 759 | + return -EPERM; |
|---|
| 760 | + |
|---|
| 761 | + preempt_disable(); |
|---|
| 762 | + /* |
|---|
| 763 | + * Set the selector to 0 for the same reason |
|---|
| 764 | + * as %gs above. |
|---|
| 765 | + */ |
|---|
| 766 | + if (task == current) { |
|---|
| 767 | + loadseg(FS, 0); |
|---|
| 768 | + x86_fsbase_write_cpu(arg2); |
|---|
| 769 | + |
|---|
| 770 | + /* |
|---|
| 771 | + * On non-FSGSBASE systems, save_base_legacy() expects |
|---|
| 772 | + * that we also fill in thread.fsbase. |
|---|
| 773 | + */ |
|---|
| 774 | + task->thread.fsbase = arg2; |
|---|
| 775 | + } else { |
|---|
| 776 | + task->thread.fsindex = 0; |
|---|
| 777 | + x86_fsbase_write_task(task, arg2); |
|---|
| 778 | + } |
|---|
| 779 | + preempt_enable(); |
|---|
| 780 | + break; |
|---|
| 781 | + } |
|---|
| 782 | + case ARCH_GET_FS: { |
|---|
| 783 | + unsigned long base = x86_fsbase_read_task(task); |
|---|
| 784 | + |
|---|
| 672 | 785 | ret = put_user(base, (unsigned long __user *)arg2); |
|---|
| 673 | 786 | break; |
|---|
| 674 | 787 | } |
|---|
| 675 | 788 | case ARCH_GET_GS: { |
|---|
| 676 | | - unsigned long base; |
|---|
| 789 | + unsigned long base = x86_gsbase_read_task(task); |
|---|
| 677 | 790 | |
|---|
| 678 | | - if (doit) |
|---|
| 679 | | - rdmsrl(MSR_KERNEL_GS_BASE, base); |
|---|
| 680 | | - else |
|---|
| 681 | | - base = task->thread.gsbase; |
|---|
| 682 | 791 | ret = put_user(base, (unsigned long __user *)arg2); |
|---|
| 683 | 792 | break; |
|---|
| 684 | 793 | } |
|---|