hc
2024-10-12 a5969cabbb4660eab42b6ef0412cbbd1200cf14d
kernel/arch/x86/include/asm/fpu/internal.h
....@@ -14,27 +14,27 @@
1414 #include <linux/compat.h>
1515 #include <linux/sched.h>
1616 #include <linux/slab.h>
17
+#include <linux/mm.h>
1718
1819 #include <asm/user.h>
1920 #include <asm/fpu/api.h>
2021 #include <asm/fpu/xstate.h>
22
+#include <asm/fpu/xcr.h>
2123 #include <asm/cpufeature.h>
2224 #include <asm/trace/fpu.h>
2325
2426 /*
2527 * High level FPU state handling functions:
2628 */
27
-extern void fpu__initialize(struct fpu *fpu);
2829 extern void fpu__prepare_read(struct fpu *fpu);
2930 extern void fpu__prepare_write(struct fpu *fpu);
3031 extern void fpu__save(struct fpu *fpu);
31
-extern void fpu__restore(struct fpu *fpu);
3232 extern int fpu__restore_sig(void __user *buf, int ia32_frame);
3333 extern void fpu__drop(struct fpu *fpu);
34
-extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
35
-extern void fpu__clear(struct fpu *fpu);
34
+extern int fpu__copy(struct task_struct *dst, struct task_struct *src);
35
+extern void fpu__clear_user_states(struct fpu *fpu);
36
+extern void fpu__clear_all(struct fpu *fpu);
3637 extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
37
-extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
3838
3939 /*
4040 * Boot time FPU initialization functions:
....@@ -42,7 +42,7 @@
4242 extern void fpu__init_cpu(void);
4343 extern void fpu__init_system_xstate(void);
4444 extern void fpu__init_cpu_xstate(void);
45
-extern void fpu__init_system(struct cpuinfo_x86 *c);
45
+extern void fpu__init_system(void);
4646 extern void fpu__init_check_bugs(void);
4747 extern void fpu__resume_cpu(void);
4848 extern u64 fpu__get_supported_xfeatures_mask(void);
....@@ -93,7 +93,7 @@
9393 * XRSTORS requires these bits set in xcomp_bv, or it will
9494 * trigger #GP:
9595 */
96
- xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask;
96
+ xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
9797 }
9898
9999 static inline void fpstate_init_fxstate(struct fxregs_state *fx)
....@@ -123,6 +123,21 @@
123123 err; \
124124 })
125125
126
+#define kernel_insn_err(insn, output, input...) \
127
+({ \
128
+ int err; \
129
+ asm volatile("1:" #insn "\n\t" \
130
+ "2:\n" \
131
+ ".section .fixup,\"ax\"\n" \
132
+ "3: movl $-1,%[err]\n" \
133
+ " jmp 2b\n" \
134
+ ".previous\n" \
135
+ _ASM_EXTABLE(1b, 3b) \
136
+ : [err] "=r" (err), output \
137
+ : "0"(0), input); \
138
+ err; \
139
+})
140
+
126141 #define kernel_insn(insn, output, input...) \
127142 asm volatile("1:" #insn "\n\t" \
128143 "2:\n" \
....@@ -138,42 +153,43 @@
138153 {
139154 if (IS_ENABLED(CONFIG_X86_32))
140155 return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
141
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
156
+ else
142157 return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
143158
144
- /* See comment in copy_fxregs_to_kernel() below. */
145
- return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
146159 }
147160
148161 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
149162 {
150
- if (IS_ENABLED(CONFIG_X86_32)) {
163
+ if (IS_ENABLED(CONFIG_X86_32))
151164 kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
152
- } else {
153
- if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
154
- kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
155
- } else {
156
- /* See comment in copy_fxregs_to_kernel() below. */
157
- kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
158
- }
159
- }
165
+ else
166
+ kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
167
+}
168
+
169
+static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx)
170
+{
171
+ if (IS_ENABLED(CONFIG_X86_32))
172
+ return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
173
+ else
174
+ return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
160175 }
161176
162177 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
163178 {
164179 if (IS_ENABLED(CONFIG_X86_32))
165180 return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
166
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
181
+ else
167182 return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
168
-
169
- /* See comment in copy_fxregs_to_kernel() below. */
170
- return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
171
- "m" (*fx));
172183 }
173184
174185 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
175186 {
176187 kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
188
+}
189
+
190
+static inline int copy_kernel_to_fregs_err(struct fregs_state *fx)
191
+{
192
+ return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
177193 }
178194
179195 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
....@@ -185,34 +201,8 @@
185201 {
186202 if (IS_ENABLED(CONFIG_X86_32))
187203 asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
188
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
204
+ else
189205 asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
190
- else {
191
- /* Using "rex64; fxsave %0" is broken because, if the memory
192
- * operand uses any extended registers for addressing, a second
193
- * REX prefix will be generated (to the assembler, rex64
194
- * followed by semicolon is a separate instruction), and hence
195
- * the 64-bitness is lost.
196
- *
197
- * Using "fxsaveq %0" would be the ideal choice, but is only
198
- * supported starting with gas 2.16.
199
- *
200
- * Using, as a workaround, the properly prefixed form below
201
- * isn't accepted by any binutils version so far released,
202
- * complaining that the same type of prefix is used twice if
203
- * an extended register is needed for addressing (fix submitted
204
- * to mainline 2005-11-21).
205
- *
206
- * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
207
- *
208
- * This, however, we can work around by forcing the compiler to
209
- * select an addressing mode that doesn't require extended
210
- * registers.
211
- */
212
- asm volatile( "rex64/fxsave (%[fx])"
213
- : "=m" (fpu->state.fxsave)
214
- : [fx] "R" (&fpu->state.fxsave));
215
- }
216206 }
217207
218208 static inline void fxsave(struct fxregs_state *fx)
....@@ -304,7 +294,7 @@
304294
305295 WARN_ON(system_state != SYSTEM_BOOTING);
306296
307
- if (static_cpu_has(X86_FEATURE_XSAVES))
297
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
308298 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
309299 else
310300 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
....@@ -321,7 +311,7 @@
321311 */
322312 static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
323313 {
324
- u64 mask = -1;
314
+ u64 mask = xfeatures_mask_all;
325315 u32 lmask = mask;
326316 u32 hmask = mask >> 32;
327317 int err;
....@@ -357,6 +347,9 @@
357347 */
358348 static inline int copy_xregs_to_user(struct xregs_state __user *buf)
359349 {
350
+ u64 mask = xfeatures_mask_user();
351
+ u32 lmask = mask;
352
+ u32 hmask = mask >> 32;
360353 int err;
361354
362355 /*
....@@ -368,7 +361,7 @@
368361 return -EFAULT;
369362
370363 stac();
371
- XSTATE_OP(XSAVE, buf, -1, -1, err);
364
+ XSTATE_OP(XSAVE, buf, lmask, hmask, err);
372365 clac();
373366
374367 return err;
....@@ -392,35 +385,24 @@
392385 }
393386
394387 /*
395
- * These must be called with preempt disabled. Returns
396
- * 'true' if the FPU state is still intact and we can
397
- * keep registers active.
398
- *
399
- * The legacy FNSAVE instruction cleared all FPU state
400
- * unconditionally, so registers are essentially destroyed.
401
- * Modern FPU state can be kept in registers, if there are
402
- * no pending FP exceptions.
388
+ * Restore xstate from kernel space xsave area, return an error code instead of
389
+ * an exception.
403390 */
404
-static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
391
+static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
405392 {
406
- if (likely(use_xsave())) {
407
- copy_xregs_to_kernel(&fpu->state.xsave);
408
- return 1;
409
- }
393
+ u32 lmask = mask;
394
+ u32 hmask = mask >> 32;
395
+ int err;
410396
411
- if (likely(use_fxsr())) {
412
- copy_fxregs_to_kernel(fpu);
413
- return 1;
414
- }
397
+ if (static_cpu_has(X86_FEATURE_XSAVES))
398
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
399
+ else
400
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
415401
416
- /*
417
- * Legacy FPU register saving, FNSAVE always clears FPU registers,
418
- * so we have to mark them inactive:
419
- */
420
- asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
421
-
422
- return 0;
402
+ return err;
423403 }
404
+
405
+extern int copy_fpregs_to_fpstate(struct fpu *fpu);
424406
425407 static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
426408 {
....@@ -489,7 +471,7 @@
489471
490472 static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
491473 {
492
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
474
+ return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
493475 }
494476
495477 /*
....@@ -509,6 +491,25 @@
509491 }
510492
511493 /*
494
+ * Internal helper, do not use directly. Use switch_fpu_return() instead.
495
+ */
496
+static inline void __fpregs_load_activate(void)
497
+{
498
+ struct fpu *fpu = &current->thread.fpu;
499
+ int cpu = smp_processor_id();
500
+
501
+ if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
502
+ return;
503
+
504
+ if (!fpregs_state_valid(fpu, cpu)) {
505
+ copy_kernel_to_fpregs(&fpu->state);
506
+ fpregs_activate(fpu);
507
+ fpu->last_cpu = cpu;
508
+ }
509
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
510
+}
511
+
512
+/*
512513 * FPU state switching for scheduling.
513514 *
514515 * This is a two-stage process:
....@@ -516,13 +517,25 @@
516517 * - switch_fpu_prepare() saves the old state.
517518 * This is done within the context of the old process.
518519 *
519
- * - switch_fpu_finish() restores the new state as
520
- * necessary.
520
+ * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
521
+ * will get loaded on return to userspace, or when the kernel needs it.
522
+ *
523
+ * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
524
+ * are saved in the current thread's FPU register state.
525
+ *
526
+ * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
527
+ * hold current()'s FPU registers. It is required to load the
528
+ * registers before returning to userland or using the content
529
+ * otherwise.
530
+ *
531
+ * The FPU context is only stored/restored for a user task and
532
+ * PF_KTHREAD is used to distinguish between kernel and user threads.
521533 */
522
-static inline void
523
-switch_fpu_prepare(struct fpu *old_fpu, int cpu)
534
+static inline void switch_fpu_prepare(struct task_struct *prev, int cpu)
524535 {
525
- if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
536
+ struct fpu *old_fpu = &prev->thread.fpu;
537
+
538
+ if (static_cpu_has(X86_FEATURE_FPU) && !(prev->flags & PF_KTHREAD)) {
526539 if (!copy_fpregs_to_fpstate(old_fpu))
527540 old_fpu->last_cpu = -1;
528541 else
....@@ -530,8 +543,7 @@
530543
531544 /* But leave fpu_fpregs_owner_ctx! */
532545 trace_x86_fpu_regs_deactivated(old_fpu);
533
- } else
534
- old_fpu->last_cpu = -1;
546
+ }
535547 }
536548
537549 /*
....@@ -539,63 +551,40 @@
539551 */
540552
541553 /*
542
- * Set up the userspace FPU context for the new task, if the task
543
- * has used the FPU.
554
+ * Load PKRU from the FPU context if available. Delay loading of the
555
+ * complete FPU state until the return to userland.
544556 */
545
-static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
557
+static inline void switch_fpu_finish(struct task_struct *next)
546558 {
547
- bool preload = static_cpu_has(X86_FEATURE_FPU) &&
548
- new_fpu->initialized;
559
+ u32 pkru_val = init_pkru_value;
560
+ struct pkru_state *pk;
561
+ struct fpu *next_fpu = &next->thread.fpu;
549562
550
- if (preload) {
551
- if (!fpregs_state_valid(new_fpu, cpu))
552
- copy_kernel_to_fpregs(&new_fpu->state);
553
- fpregs_activate(new_fpu);
563
+ if (!static_cpu_has(X86_FEATURE_FPU))
564
+ return;
565
+
566
+ set_thread_flag(TIF_NEED_FPU_LOAD);
567
+
568
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
569
+ return;
570
+
571
+ /*
572
+ * PKRU state is switched eagerly because it needs to be valid before we
573
+ * return to userland e.g. for a copy_to_user() operation.
574
+ */
575
+ if (!(next->flags & PF_KTHREAD)) {
576
+ /*
577
+ * If the PKRU bit in xsave.header.xfeatures is not set,
578
+ * then the PKRU component was in init state, which means
579
+ * XRSTOR will set PKRU to 0. If the bit is not set then
580
+ * get_xsave_addr() will return NULL because the PKRU value
581
+ * in memory is not valid. This means pkru_val has to be
582
+ * set to 0 and not to init_pkru_value.
583
+ */
584
+ pk = get_xsave_addr(&next_fpu->state.xsave, XFEATURE_PKRU);
585
+ pkru_val = pk ? pk->pkru : 0;
554586 }
555
-}
556
-
557
-/*
558
- * Needs to be preemption-safe.
559
- *
560
- * NOTE! user_fpu_begin() must be used only immediately before restoring
561
- * the save state. It does not do any saving/restoring on its own. In
562
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
563
- * the task can lose the FPU right after preempt_enable().
564
- */
565
-static inline void user_fpu_begin(void)
566
-{
567
- struct fpu *fpu = &current->thread.fpu;
568
-
569
- preempt_disable();
570
- fpregs_activate(fpu);
571
- preempt_enable();
572
-}
573
-
574
-/*
575
- * MXCSR and XCR definitions:
576
- */
577
-
578
-extern unsigned int mxcsr_feature_mask;
579
-
580
-#define XCR_XFEATURE_ENABLED_MASK 0x00000000
581
-
582
-static inline u64 xgetbv(u32 index)
583
-{
584
- u32 eax, edx;
585
-
586
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
587
- : "=a" (eax), "=d" (edx)
588
- : "c" (index));
589
- return eax + ((u64)edx << 32);
590
-}
591
-
592
-static inline void xsetbv(u32 index, u64 value)
593
-{
594
- u32 eax = value;
595
- u32 edx = value >> 32;
596
-
597
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
598
- : : "a" (eax), "d" (edx), "c" (index));
587
+ __write_pkru(pkru_val);
599588 }
600589
601590 #endif /* _ASM_X86_FPU_INTERNAL_H */