hc
2024-05-11 297b60346df8beafee954a0fd7c2d64f33f3b9bc
kernel/arch/x86/kernel/process_64.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 1995 Linus Torvalds
34 *
....@@ -39,7 +40,6 @@
3940 #include <linux/ftrace.h>
4041 #include <linux/syscalls.h>
4142
42
-#include <asm/pgtable.h>
4343 #include <asm/processor.h>
4444 #include <asm/fpu/internal.h>
4545 #include <asm/mmu_context.h>
....@@ -47,13 +47,13 @@
4747 #include <asm/desc.h>
4848 #include <asm/proto.h>
4949 #include <asm/ia32.h>
50
-#include <asm/syscalls.h>
5150 #include <asm/debugreg.h>
5251 #include <asm/switch_to.h>
5352 #include <asm/xen/hypervisor.h>
5453 #include <asm/vdso.h>
55
-#include <asm/intel_rdt_sched.h>
54
+#include <asm/resctrl.h>
5655 #include <asm/unistd.h>
56
+#include <asm/fsgsbase.h>
5757 #ifdef CONFIG_IA32_EMULATION
5858 /* Not included via unistd.h */
5959 #include <asm/unistd_32_ia32.h>
....@@ -61,33 +61,32 @@
6161
6262 #include "process.h"
6363
64
-__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
65
-
6664 /* Prints also some state that isn't saved in the pt_regs */
67
-void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
65
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
66
+ const char *log_lvl)
6867 {
6968 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
7069 unsigned long d0, d1, d2, d3, d6, d7;
7170 unsigned int fsindex, gsindex;
72
- unsigned int ds, cs, es;
71
+ unsigned int ds, es;
7372
74
- show_iret_regs(regs);
73
+ show_iret_regs(regs, log_lvl);
7574
7675 if (regs->orig_ax != -1)
7776 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
7877 else
7978 pr_cont("\n");
8079
81
- printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
82
- regs->ax, regs->bx, regs->cx);
83
- printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
84
- regs->dx, regs->si, regs->di);
85
- printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
86
- regs->bp, regs->r8, regs->r9);
87
- printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
88
- regs->r10, regs->r11, regs->r12);
89
- printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
90
- regs->r13, regs->r14, regs->r15);
80
+ printk("%sRAX: %016lx RBX: %016lx RCX: %016lx\n",
81
+ log_lvl, regs->ax, regs->bx, regs->cx);
82
+ printk("%sRDX: %016lx RSI: %016lx RDI: %016lx\n",
83
+ log_lvl, regs->dx, regs->si, regs->di);
84
+ printk("%sRBP: %016lx R08: %016lx R09: %016lx\n",
85
+ log_lvl, regs->bp, regs->r8, regs->r9);
86
+ printk("%sR10: %016lx R11: %016lx R12: %016lx\n",
87
+ log_lvl, regs->r10, regs->r11, regs->r12);
88
+ printk("%sR13: %016lx R14: %016lx R15: %016lx\n",
89
+ log_lvl, regs->r13, regs->r14, regs->r15);
9190
9291 if (mode == SHOW_REGS_SHORT)
9392 return;
....@@ -95,13 +94,12 @@
9594 if (mode == SHOW_REGS_USER) {
9695 rdmsrl(MSR_FS_BASE, fs);
9796 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
98
- printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
99
- fs, shadowgs);
97
+ printk("%sFS: %016lx GS: %016lx\n",
98
+ log_lvl, fs, shadowgs);
10099 return;
101100 }
102101
103102 asm("movl %%ds,%0" : "=r" (ds));
104
- asm("movl %%cs,%0" : "=r" (cs));
105103 asm("movl %%es,%0" : "=r" (es));
106104 asm("movl %%fs,%0" : "=r" (fsindex));
107105 asm("movl %%gs,%0" : "=r" (gsindex));
....@@ -115,12 +113,12 @@
115113 cr3 = __read_cr3();
116114 cr4 = __read_cr4();
117115
118
- printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
119
- fs, fsindex, gs, gsindex, shadowgs);
120
- printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
121
- es, cr0);
122
- printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
123
- cr4);
116
+ printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
117
+ log_lvl, fs, fsindex, gs, gsindex, shadowgs);
118
+ printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
119
+ log_lvl, regs->cs, ds, es, cr0);
120
+ printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
121
+ log_lvl, cr2, cr3, cr4);
124122
125123 get_debugreg(d0, 0);
126124 get_debugreg(d1, 1);
....@@ -132,35 +130,75 @@
132130 /* Only print out debug registers if they are in their non-default state. */
133131 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
134132 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
135
- printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
136
- d0, d1, d2);
137
- printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
138
- d3, d6, d7);
133
+ printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
134
+ log_lvl, d0, d1, d2);
135
+ printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
136
+ log_lvl, d3, d6, d7);
139137 }
140138
141139 if (boot_cpu_has(X86_FEATURE_OSPKE))
142
- printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
140
+ printk("%sPKRU: %08x\n", log_lvl, read_pkru());
143141 }
144142
145143 void release_thread(struct task_struct *dead_task)
146144 {
147
- if (dead_task->mm) {
148
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
149
- if (dead_task->mm->context.ldt) {
150
- pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
151
- dead_task->comm,
152
- dead_task->mm->context.ldt->entries,
153
- dead_task->mm->context.ldt->nr_entries);
154
- BUG();
155
- }
156
-#endif
157
- }
145
+ WARN_ON(dead_task->mm);
158146 }
159147
160148 enum which_selector {
161149 FS,
162150 GS
163151 };
152
+
153
+/*
154
+ * Out of line to be protected from kprobes and tracing. If this would be
155
+ * traced or probed than any access to a per CPU variable happens with
156
+ * the wrong GS.
157
+ *
158
+ * It is not used on Xen paravirt. When paravirt support is needed, it
159
+ * needs to be renamed with native_ prefix.
160
+ */
161
+static noinstr unsigned long __rdgsbase_inactive(void)
162
+{
163
+ unsigned long gsbase;
164
+
165
+ lockdep_assert_irqs_disabled();
166
+
167
+ if (!static_cpu_has(X86_FEATURE_XENPV)) {
168
+ native_swapgs();
169
+ gsbase = rdgsbase();
170
+ native_swapgs();
171
+ } else {
172
+ instrumentation_begin();
173
+ rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
174
+ instrumentation_end();
175
+ }
176
+
177
+ return gsbase;
178
+}
179
+
180
+/*
181
+ * Out of line to be protected from kprobes and tracing. If this would be
182
+ * traced or probed than any access to a per CPU variable happens with
183
+ * the wrong GS.
184
+ *
185
+ * It is not used on Xen paravirt. When paravirt support is needed, it
186
+ * needs to be renamed with native_ prefix.
187
+ */
188
+static noinstr void __wrgsbase_inactive(unsigned long gsbase)
189
+{
190
+ lockdep_assert_irqs_disabled();
191
+
192
+ if (!static_cpu_has(X86_FEATURE_XENPV)) {
193
+ native_swapgs();
194
+ wrgsbase(gsbase);
195
+ native_swapgs();
196
+ } else {
197
+ instrumentation_begin();
198
+ wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
199
+ instrumentation_end();
200
+ }
201
+}
164202
165203 /*
166204 * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
....@@ -211,22 +249,35 @@
211249 {
212250 savesegment(fs, task->thread.fsindex);
213251 savesegment(gs, task->thread.gsindex);
214
- save_base_legacy(task, task->thread.fsindex, FS);
215
- save_base_legacy(task, task->thread.gsindex, GS);
252
+ if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
253
+ /*
254
+ * If FSGSBASE is enabled, we can't make any useful guesses
255
+ * about the base, and user code expects us to save the current
256
+ * value. Fortunately, reading the base directly is efficient.
257
+ */
258
+ task->thread.fsbase = rdfsbase();
259
+ task->thread.gsbase = __rdgsbase_inactive();
260
+ } else {
261
+ save_base_legacy(task, task->thread.fsindex, FS);
262
+ save_base_legacy(task, task->thread.gsindex, GS);
263
+ }
216264 }
217265
218
-#if IS_ENABLED(CONFIG_KVM)
219266 /*
220267 * While a process is running,current->thread.fsbase and current->thread.gsbase
221
- * may not match the corresponding CPU registers (see save_base_legacy()). KVM
222
- * wants an efficient way to save and restore FSBASE and GSBASE.
223
- * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE.
268
+ * may not match the corresponding CPU registers (see save_base_legacy()).
224269 */
225
-void save_fsgs_for_kvm(void)
270
+void current_save_fsgs(void)
226271 {
272
+ unsigned long flags;
273
+
274
+ /* Interrupts need to be off for FSGSBASE */
275
+ local_irq_save(flags);
227276 save_fsgs(current);
277
+ local_irq_restore(flags);
228278 }
229
-EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
279
+#if IS_ENABLED(CONFIG_KVM)
280
+EXPORT_SYMBOL_GPL(current_save_fsgs);
230281 #endif
231282
232283 static __always_inline void loadseg(enum which_selector which,
....@@ -288,86 +339,142 @@
288339 }
289340 }
290341
291
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
292
- unsigned long arg, struct task_struct *p, unsigned long tls)
342
+static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
343
+ struct thread_struct *next)
293344 {
294
- int err;
295
- struct pt_regs *childregs;
296
- struct fork_frame *fork_frame;
297
- struct inactive_task_frame *frame;
298
- struct task_struct *me = current;
345
+ if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
346
+ /* Update the FS and GS selectors if they could have changed. */
347
+ if (unlikely(prev->fsindex || next->fsindex))
348
+ loadseg(FS, next->fsindex);
349
+ if (unlikely(prev->gsindex || next->gsindex))
350
+ loadseg(GS, next->gsindex);
299351
300
- childregs = task_pt_regs(p);
301
- fork_frame = container_of(childregs, struct fork_frame, regs);
302
- frame = &fork_frame->frame;
303
-
304
- /*
305
- * For a new task use the RESET flags value since there is no before.
306
- * All the status flags are zero; DF and all the system flags must also
307
- * be 0, specifically IF must be 0 because we context switch to the new
308
- * task with interrupts disabled.
309
- */
310
- frame->flags = X86_EFLAGS_FIXED;
311
- frame->bp = 0;
312
- frame->ret_addr = (unsigned long) ret_from_fork;
313
- p->thread.sp = (unsigned long) fork_frame;
314
- p->thread.io_bitmap_ptr = NULL;
315
-
316
- savesegment(gs, p->thread.gsindex);
317
- p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
318
- savesegment(fs, p->thread.fsindex);
319
- p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
320
- savesegment(es, p->thread.es);
321
- savesegment(ds, p->thread.ds);
322
- memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
323
-
324
- if (unlikely(p->flags & PF_KTHREAD)) {
325
- /* kernel thread */
326
- memset(childregs, 0, sizeof(struct pt_regs));
327
- frame->bx = sp; /* function */
328
- frame->r12 = arg;
329
- return 0;
352
+ /* Update the bases. */
353
+ wrfsbase(next->fsbase);
354
+ __wrgsbase_inactive(next->gsbase);
355
+ } else {
356
+ load_seg_legacy(prev->fsindex, prev->fsbase,
357
+ next->fsindex, next->fsbase, FS);
358
+ load_seg_legacy(prev->gsindex, prev->gsbase,
359
+ next->gsindex, next->gsbase, GS);
330360 }
331
- frame->bx = 0;
332
- *childregs = *current_pt_regs();
361
+}
333362
334
- childregs->ax = 0;
335
- if (sp)
336
- childregs->sp = sp;
363
+unsigned long x86_fsgsbase_read_task(struct task_struct *task,
364
+ unsigned short selector)
365
+{
366
+ unsigned short idx = selector >> 3;
367
+ unsigned long base;
337368
338
- err = -ENOMEM;
339
- if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
340
- p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
341
- IO_BITMAP_BYTES, GFP_KERNEL);
342
- if (!p->thread.io_bitmap_ptr) {
343
- p->thread.io_bitmap_max = 0;
344
- return -ENOMEM;
345
- }
346
- set_tsk_thread_flag(p, TIF_IO_BITMAP);
347
- }
369
+ if (likely((selector & SEGMENT_TI_MASK) == 0)) {
370
+ if (unlikely(idx >= GDT_ENTRIES))
371
+ return 0;
348372
349
- /*
350
- * Set a new TLS for the child thread?
351
- */
352
- if (clone_flags & CLONE_SETTLS) {
353
-#ifdef CONFIG_IA32_EMULATION
354
- if (in_ia32_syscall())
355
- err = do_set_thread_area(p, -1,
356
- (struct user_desc __user *)tls, 0);
373
+ /*
374
+ * There are no user segments in the GDT with nonzero bases
375
+ * other than the TLS segments.
376
+ */
377
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
378
+ return 0;
379
+
380
+ idx -= GDT_ENTRY_TLS_MIN;
381
+ base = get_desc_base(&task->thread.tls_array[idx]);
382
+ } else {
383
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
384
+ struct ldt_struct *ldt;
385
+
386
+ /*
387
+ * If performance here mattered, we could protect the LDT
388
+ * with RCU. This is a slow path, though, so we can just
389
+ * take the mutex.
390
+ */
391
+ mutex_lock(&task->mm->context.lock);
392
+ ldt = task->mm->context.ldt;
393
+ if (unlikely(!ldt || idx >= ldt->nr_entries))
394
+ base = 0;
357395 else
396
+ base = get_desc_base(ldt->entries + idx);
397
+ mutex_unlock(&task->mm->context.lock);
398
+#else
399
+ base = 0;
358400 #endif
359
- err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
360
- if (err)
361
- goto out;
362
- }
363
- err = 0;
364
-out:
365
- if (err && p->thread.io_bitmap_ptr) {
366
- kfree(p->thread.io_bitmap_ptr);
367
- p->thread.io_bitmap_max = 0;
368401 }
369402
370
- return err;
403
+ return base;
404
+}
405
+
406
+unsigned long x86_gsbase_read_cpu_inactive(void)
407
+{
408
+ unsigned long gsbase;
409
+
410
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
411
+ unsigned long flags;
412
+
413
+ local_irq_save(flags);
414
+ gsbase = __rdgsbase_inactive();
415
+ local_irq_restore(flags);
416
+ } else {
417
+ rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
418
+ }
419
+
420
+ return gsbase;
421
+}
422
+
423
+void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
424
+{
425
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
426
+ unsigned long flags;
427
+
428
+ local_irq_save(flags);
429
+ __wrgsbase_inactive(gsbase);
430
+ local_irq_restore(flags);
431
+ } else {
432
+ wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
433
+ }
434
+}
435
+
436
+unsigned long x86_fsbase_read_task(struct task_struct *task)
437
+{
438
+ unsigned long fsbase;
439
+
440
+ if (task == current)
441
+ fsbase = x86_fsbase_read_cpu();
442
+ else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
443
+ (task->thread.fsindex == 0))
444
+ fsbase = task->thread.fsbase;
445
+ else
446
+ fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
447
+
448
+ return fsbase;
449
+}
450
+
451
+unsigned long x86_gsbase_read_task(struct task_struct *task)
452
+{
453
+ unsigned long gsbase;
454
+
455
+ if (task == current)
456
+ gsbase = x86_gsbase_read_cpu_inactive();
457
+ else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
458
+ (task->thread.gsindex == 0))
459
+ gsbase = task->thread.gsbase;
460
+ else
461
+ gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
462
+
463
+ return gsbase;
464
+}
465
+
466
+void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
467
+{
468
+ WARN_ON_ONCE(task == current);
469
+
470
+ task->thread.fsbase = fsbase;
471
+}
472
+
473
+void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
474
+{
475
+ WARN_ON_ONCE(task == current);
476
+
477
+ task->thread.gsbase = gsbase;
371478 }
372479
373480 static void
....@@ -393,7 +500,6 @@
393500 regs->cs = _cs;
394501 regs->ss = _ss;
395502 regs->flags = X86_EFLAGS_IF;
396
- force_iret();
397503 }
398504
399505 void
....@@ -429,14 +535,13 @@
429535 {
430536 struct thread_struct *prev = &prev_p->thread;
431537 struct thread_struct *next = &next_p->thread;
432
- struct fpu *prev_fpu = &prev->fpu;
433
- struct fpu *next_fpu = &next->fpu;
434538 int cpu = smp_processor_id();
435539
436540 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
437541 this_cpu_read(irq_count) != -1);
438542
439
- switch_fpu_prepare(prev_fpu, cpu);
543
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD))
544
+ switch_fpu_prepare(prev_p, cpu);
440545
441546 /* We must save %fs and %gs before load_TLS() because
442547 * %fs and %gs may be cleared by load_TLS().
....@@ -454,9 +559,7 @@
454559 /*
455560 * Leave lazy mode, flushing any hypercalls made here. This
456561 * must be done after loading TLS entries in the GDT but before
457
- * loading segments that might reference them, and and it must
458
- * be done before fpu__restore(), so the TS bit is up to
459
- * date.
562
+ * loading segments that might reference them.
460563 */
461564 arch_end_context_switch(next_p);
462565
....@@ -482,12 +585,7 @@
482585 if (unlikely(next->ds | prev->ds))
483586 loadsegment(ds, next->ds);
484587
485
- load_seg_legacy(prev->fsindex, prev->fsbase,
486
- next->fsindex, next->fsbase, FS);
487
- load_seg_legacy(prev->gsindex, prev->gsbase,
488
- next->gsindex, next->gsbase, GS);
489
-
490
- switch_fpu_finish(next_fpu, cpu);
588
+ x86_fsgsbase_load(prev, next);
491589
492590 /*
493591 * Switch the PDA and FPU contexts.
....@@ -495,21 +593,12 @@
495593 this_cpu_write(current_task, next_p);
496594 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
497595
596
+ switch_fpu_finish(next_p);
597
+
498598 /* Reload sp0. */
499599 update_task_stack(next_p);
500600
501601 switch_to_extra(prev_p, next_p);
502
-
503
-#ifdef CONFIG_XEN_PV
504
- /*
505
- * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
506
- * current_pt_regs()->flags may not match the current task's
507
- * intended IOPL. We need to switch it manually.
508
- */
509
- if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
510
- prev->iopl != next->iopl))
511
- xen_set_iopl_mask(next->iopl);
512
-#endif
513602
514603 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
515604 /*
....@@ -540,7 +629,7 @@
540629 }
541630
542631 /* Load the Intel cache allocation PQR MSR. */
543
- intel_rdt_sched_in();
632
+ resctrl_sched_in(next_p);
544633
545634 return prev_p;
546635 }
....@@ -564,7 +653,7 @@
564653 /* TBD: overwrites user setup. Should have two bits.
565654 But 64bit processes have always behaved this way,
566655 so it's not too bad. The main problem is just that
567
- 32bit childs are affected again. */
656
+ 32bit children are affected again. */
568657 current->personality &= ~READ_IMPLIES_EXEC;
569658 }
570659
....@@ -577,10 +666,10 @@
577666 current->mm->context.ia32_compat = TIF_X32;
578667 current->personality &= ~READ_IMPLIES_EXEC;
579668 /*
580
- * in_compat_syscall() uses the presence of the x32 syscall bit
669
+ * in_32bit_syscall() uses the presence of the x32 syscall bit
581670 * flag to determine compat status. The x86 mmap() code relies on
582671 * the syscall bitness so set x32 syscall bit right here to make
583
- * in_compat_syscall() work during exec().
672
+ * in_32bit_syscall() work during exec().
584673 *
585674 * Pretend to come from a x32 execve.
586675 */
....@@ -631,54 +720,74 @@
631720 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
632721 {
633722 int ret = 0;
634
- int doit = task == current;
635
- int cpu;
636723
637724 switch (option) {
638
- case ARCH_SET_GS:
639
- if (arg2 >= TASK_SIZE_MAX)
725
+ case ARCH_SET_GS: {
726
+ if (unlikely(arg2 >= TASK_SIZE_MAX))
640727 return -EPERM;
641
- cpu = get_cpu();
642
- task->thread.gsindex = 0;
643
- task->thread.gsbase = arg2;
644
- if (doit) {
645
- load_gs_index(0);
646
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
647
- }
648
- put_cpu();
649
- break;
650
- case ARCH_SET_FS:
651
- /* Not strictly needed for fs, but do it for symmetry
652
- with gs */
653
- if (arg2 >= TASK_SIZE_MAX)
654
- return -EPERM;
655
- cpu = get_cpu();
656
- task->thread.fsindex = 0;
657
- task->thread.fsbase = arg2;
658
- if (doit) {
659
- /* set the selector to 0 to not confuse __switch_to */
660
- loadsegment(fs, 0);
661
- ret = wrmsrl_safe(MSR_FS_BASE, arg2);
662
- }
663
- put_cpu();
664
- break;
665
- case ARCH_GET_FS: {
666
- unsigned long base;
667728
668
- if (doit)
669
- rdmsrl(MSR_FS_BASE, base);
670
- else
671
- base = task->thread.fsbase;
729
+ preempt_disable();
730
+ /*
731
+ * ARCH_SET_GS has always overwritten the index
732
+ * and the base. Zero is the most sensible value
733
+ * to put in the index, and is the only value that
734
+ * makes any sense if FSGSBASE is unavailable.
735
+ */
736
+ if (task == current) {
737
+ loadseg(GS, 0);
738
+ x86_gsbase_write_cpu_inactive(arg2);
739
+
740
+ /*
741
+ * On non-FSGSBASE systems, save_base_legacy() expects
742
+ * that we also fill in thread.gsbase.
743
+ */
744
+ task->thread.gsbase = arg2;
745
+
746
+ } else {
747
+ task->thread.gsindex = 0;
748
+ x86_gsbase_write_task(task, arg2);
749
+ }
750
+ preempt_enable();
751
+ break;
752
+ }
753
+ case ARCH_SET_FS: {
754
+ /*
755
+ * Not strictly needed for %fs, but do it for symmetry
756
+ * with %gs
757
+ */
758
+ if (unlikely(arg2 >= TASK_SIZE_MAX))
759
+ return -EPERM;
760
+
761
+ preempt_disable();
762
+ /*
763
+ * Set the selector to 0 for the same reason
764
+ * as %gs above.
765
+ */
766
+ if (task == current) {
767
+ loadseg(FS, 0);
768
+ x86_fsbase_write_cpu(arg2);
769
+
770
+ /*
771
+ * On non-FSGSBASE systems, save_base_legacy() expects
772
+ * that we also fill in thread.fsbase.
773
+ */
774
+ task->thread.fsbase = arg2;
775
+ } else {
776
+ task->thread.fsindex = 0;
777
+ x86_fsbase_write_task(task, arg2);
778
+ }
779
+ preempt_enable();
780
+ break;
781
+ }
782
+ case ARCH_GET_FS: {
783
+ unsigned long base = x86_fsbase_read_task(task);
784
+
672785 ret = put_user(base, (unsigned long __user *)arg2);
673786 break;
674787 }
675788 case ARCH_GET_GS: {
676
- unsigned long base;
789
+ unsigned long base = x86_gsbase_read_task(task);
677790
678
- if (doit)
679
- rdmsrl(MSR_KERNEL_GS_BASE, base);
680
- else
681
- base = task->thread.gsbase;
682791 ret = put_user(base, (unsigned long __user *)arg2);
683792 break;
684793 }