forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/arch/x86/kernel/fpu/core.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 1994 Linus Torvalds
34 *
....@@ -42,18 +43,6 @@
4243 */
4344 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
4445
45
-static void kernel_fpu_disable(void)
46
-{
47
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
48
- this_cpu_write(in_kernel_fpu, true);
49
-}
50
-
51
-static void kernel_fpu_enable(void)
52
-{
53
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
54
- this_cpu_write(in_kernel_fpu, false);
55
-}
56
-
5746 static bool kernel_fpu_disabled(void)
5847 {
5948 return this_cpu_read(in_kernel_fpu);
....@@ -93,45 +82,79 @@
9382 }
9483 EXPORT_SYMBOL(irq_fpu_usable);
9584
96
-static void __kernel_fpu_begin(void)
85
+/*
86
+ * These must be called with preempt disabled. Returns
87
+ * 'true' if the FPU state is still intact and we can
88
+ * keep registers active.
89
+ *
90
+ * The legacy FNSAVE instruction cleared all FPU state
91
+ * unconditionally, so registers are essentially destroyed.
92
+ * Modern FPU state can be kept in registers, if there are
93
+ * no pending FP exceptions.
94
+ */
95
+int copy_fpregs_to_fpstate(struct fpu *fpu)
9796 {
98
- struct fpu *fpu = &current->thread.fpu;
97
+ if (likely(use_xsave())) {
98
+ copy_xregs_to_kernel(&fpu->state.xsave);
99
+
100
+ /*
101
+ * AVX512 state is tracked here because its use is
102
+ * known to slow the max clock speed of the core.
103
+ */
104
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
105
+ fpu->avx512_timestamp = jiffies;
106
+ return 1;
107
+ }
108
+
109
+ if (likely(use_fxsr())) {
110
+ copy_fxregs_to_kernel(fpu);
111
+ return 1;
112
+ }
113
+
114
+ /*
115
+ * Legacy FPU register saving, FNSAVE always clears FPU registers,
116
+ * so we have to mark them inactive:
117
+ */
118
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
119
+
120
+ return 0;
121
+}
122
+EXPORT_SYMBOL(copy_fpregs_to_fpstate);
123
+
124
+void kernel_fpu_begin_mask(unsigned int kfpu_mask)
125
+{
126
+ preempt_disable();
99127
100128 WARN_ON_FPU(!irq_fpu_usable());
129
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
101130
102
- kernel_fpu_disable();
131
+ this_cpu_write(in_kernel_fpu, true);
103132
104
- if (fpu->initialized) {
133
+ if (!(current->flags & PF_KTHREAD) &&
134
+ !test_thread_flag(TIF_NEED_FPU_LOAD)) {
135
+ set_thread_flag(TIF_NEED_FPU_LOAD);
105136 /*
106137 * Ignore return value -- we don't care if reg state
107138 * is clobbered.
108139 */
109
- copy_fpregs_to_fpstate(fpu);
110
- } else {
111
- __cpu_invalidate_fpregs_state();
140
+ copy_fpregs_to_fpstate(&current->thread.fpu);
112141 }
142
+ __cpu_invalidate_fpregs_state();
143
+
144
+ /* Put sane initial values into the control registers. */
145
+ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
146
+ ldmxcsr(MXCSR_DEFAULT);
147
+
148
+ if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
149
+ asm volatile ("fninit");
113150 }
114
-
115
-static void __kernel_fpu_end(void)
116
-{
117
- struct fpu *fpu = &current->thread.fpu;
118
-
119
- if (fpu->initialized)
120
- copy_kernel_to_fpregs(&fpu->state);
121
-
122
- kernel_fpu_enable();
123
-}
124
-
125
-void kernel_fpu_begin(void)
126
-{
127
- preempt_disable();
128
- __kernel_fpu_begin();
129
-}
130
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
151
+EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
131152
132153 void kernel_fpu_end(void)
133154 {
134
- __kernel_fpu_end();
155
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
156
+
157
+ this_cpu_write(in_kernel_fpu, false);
135158 preempt_enable();
136159 }
137160 EXPORT_SYMBOL_GPL(kernel_fpu_end);
....@@ -157,17 +180,18 @@
157180 {
158181 WARN_ON_FPU(fpu != &current->thread.fpu);
159182
160
- preempt_disable();
183
+ fpregs_lock();
161184 trace_x86_fpu_before_save(fpu);
162
- if (fpu->initialized) {
185
+
186
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
163187 if (!copy_fpregs_to_fpstate(fpu)) {
164188 copy_kernel_to_fpregs(&fpu->state);
165189 }
166190 }
191
+
167192 trace_x86_fpu_after_save(fpu);
168
- preempt_enable();
193
+ fpregs_unlock();
169194 }
170
-EXPORT_SYMBOL_GPL(fpu__save);
171195
172196 /*
173197 * Legacy x87 fpstate state init:
....@@ -198,11 +222,14 @@
198222 }
199223 EXPORT_SYMBOL_GPL(fpstate_init);
200224
201
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
225
+int fpu__copy(struct task_struct *dst, struct task_struct *src)
202226 {
227
+ struct fpu *dst_fpu = &dst->thread.fpu;
228
+ struct fpu *src_fpu = &src->thread.fpu;
229
+
203230 dst_fpu->last_cpu = -1;
204231
205
- if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
232
+ if (!static_cpu_has(X86_FEATURE_FPU))
206233 return 0;
207234
208235 WARN_ON_FPU(src_fpu != &current->thread.fpu);
....@@ -214,16 +241,23 @@
214241 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
215242
216243 /*
217
- * Save current FPU registers directly into the child
218
- * FPU context, without any memory-to-memory copying.
244
+ * If the FPU registers are not current just memcpy() the state.
245
+ * Otherwise save current FPU registers directly into the child's FPU
246
+ * context, without any memory-to-memory copying.
219247 *
220248 * ( The function 'fails' in the FNSAVE case, which destroys
221
- * register contents so we have to copy them back. )
249
+ * register contents so we have to load them back. )
222250 */
223
- if (!copy_fpregs_to_fpstate(dst_fpu)) {
224
- memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
225
- copy_kernel_to_fpregs(&src_fpu->state);
226
- }
251
+ fpregs_lock();
252
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
253
+ memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
254
+
255
+ else if (!copy_fpregs_to_fpstate(dst_fpu))
256
+ copy_kernel_to_fpregs(&dst_fpu->state);
257
+
258
+ fpregs_unlock();
259
+
260
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
227261
228262 trace_x86_fpu_copy_src(src_fpu);
229263 trace_x86_fpu_copy_dst(dst_fpu);
....@@ -235,20 +269,14 @@
235269 * Activate the current task's in-memory FPU context,
236270 * if it has not been used before:
237271 */
238
-void fpu__initialize(struct fpu *fpu)
272
+static void fpu__initialize(struct fpu *fpu)
239273 {
240274 WARN_ON_FPU(fpu != &current->thread.fpu);
241275
242
- if (!fpu->initialized) {
243
- fpstate_init(&fpu->state);
244
- trace_x86_fpu_init_state(fpu);
245
-
246
- trace_x86_fpu_activate_state(fpu);
247
- /* Safe to do for the current task: */
248
- fpu->initialized = 1;
249
- }
276
+ set_thread_flag(TIF_NEED_FPU_LOAD);
277
+ fpstate_init(&fpu->state);
278
+ trace_x86_fpu_init_state(fpu);
250279 }
251
-EXPORT_SYMBOL_GPL(fpu__initialize);
252280
253281 /*
254282 * This function must be called before we read a task's fpstate.
....@@ -260,32 +288,20 @@
260288 *
261289 * - or it's called for stopped tasks (ptrace), in which case the
262290 * registers were already saved by the context-switch code when
263
- * the task scheduled out - we only have to initialize the registers
264
- * if they've never been initialized.
291
+ * the task scheduled out.
265292 *
266293 * If the task has used the FPU before then save it.
267294 */
268295 void fpu__prepare_read(struct fpu *fpu)
269296 {
270
- if (fpu == &current->thread.fpu) {
297
+ if (fpu == &current->thread.fpu)
271298 fpu__save(fpu);
272
- } else {
273
- if (!fpu->initialized) {
274
- fpstate_init(&fpu->state);
275
- trace_x86_fpu_init_state(fpu);
276
-
277
- trace_x86_fpu_activate_state(fpu);
278
- /* Safe to do for current and for stopped child tasks: */
279
- fpu->initialized = 1;
280
- }
281
- }
282299 }
283300
284301 /*
285302 * This function must be called before we write a task's fpstate.
286303 *
287
- * If the task has used the FPU before then invalidate any cached FPU registers.
288
- * If the task has not used the FPU before then initialize its fpstate.
304
+ * Invalidate any cached FPU registers.
289305 *
290306 * After this function call, after registers in the fpstate are
291307 * modified and the child task has woken up, the child task will
....@@ -302,42 +318,9 @@
302318 */
303319 WARN_ON_FPU(fpu == &current->thread.fpu);
304320
305
- if (fpu->initialized) {
306
- /* Invalidate any cached state: */
307
- __fpu_invalidate_fpregs_state(fpu);
308
- } else {
309
- fpstate_init(&fpu->state);
310
- trace_x86_fpu_init_state(fpu);
311
-
312
- trace_x86_fpu_activate_state(fpu);
313
- /* Safe to do for stopped child tasks: */
314
- fpu->initialized = 1;
315
- }
321
+ /* Invalidate any cached state: */
322
+ __fpu_invalidate_fpregs_state(fpu);
316323 }
317
-
318
-/*
319
- * 'fpu__restore()' is called to copy FPU registers from
320
- * the FPU fpstate to the live hw registers and to activate
321
- * access to the hardware registers, so that FPU instructions
322
- * can be used afterwards.
323
- *
324
- * Must be called with kernel preemption disabled (for example
325
- * with local interrupts disabled, as it is in the case of
326
- * do_device_not_available()).
327
- */
328
-void fpu__restore(struct fpu *fpu)
329
-{
330
- fpu__initialize(fpu);
331
-
332
- /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
333
- kernel_fpu_disable();
334
- trace_x86_fpu_before_restore(fpu);
335
- fpregs_activate(fpu);
336
- copy_kernel_to_fpregs(&fpu->state);
337
- trace_x86_fpu_after_restore(fpu);
338
- kernel_fpu_enable();
339
-}
340
-EXPORT_SYMBOL_GPL(fpu__restore);
341324
342325 /*
343326 * Drops current FPU state: deactivates the fpregs and
....@@ -353,16 +336,12 @@
353336 preempt_disable();
354337
355338 if (fpu == &current->thread.fpu) {
356
- if (fpu->initialized) {
357
- /* Ignore delayed exceptions from user space */
358
- asm volatile("1: fwait\n"
359
- "2:\n"
360
- _ASM_EXTABLE(1b, 2b));
361
- fpregs_deactivate(fpu);
362
- }
339
+ /* Ignore delayed exceptions from user space */
340
+ asm volatile("1: fwait\n"
341
+ "2:\n"
342
+ _ASM_EXTABLE(1b, 2b));
343
+ fpregs_deactivate(fpu);
363344 }
364
-
365
- fpu->initialized = 0;
366345
367346 trace_x86_fpu_dropped(fpu);
368347
....@@ -370,13 +349,13 @@
370349 }
371350
372351 /*
373
- * Clear FPU registers by setting them up from
374
- * the init fpstate:
352
+ * Clear FPU registers by setting them up from the init fpstate.
353
+ * Caller must do fpregs_[un]lock() around it.
375354 */
376
-static inline void copy_init_fpstate_to_fpregs(void)
355
+static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
377356 {
378357 if (use_xsave())
379
- copy_kernel_to_xregs(&init_fpstate.xsave, -1);
358
+ copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
380359 else if (static_cpu_has(X86_FEATURE_FXSR))
381360 copy_kernel_to_fxregs(&init_fpstate.fxsave);
382361 else
....@@ -392,24 +371,82 @@
392371 * Called by sys_execve(), by the signal handler code and by various
393372 * error paths.
394373 */
395
-void fpu__clear(struct fpu *fpu)
374
+static void fpu__clear(struct fpu *fpu, bool user_only)
396375 {
397
- WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
376
+ WARN_ON_FPU(fpu != &current->thread.fpu);
398377
399
- fpu__drop(fpu);
400
-
401
- /*
402
- * Make sure fpstate is cleared and initialized.
403
- */
404
- if (static_cpu_has(X86_FEATURE_FPU)) {
405
- preempt_disable();
378
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
379
+ fpu__drop(fpu);
406380 fpu__initialize(fpu);
407
- user_fpu_begin();
408
- copy_init_fpstate_to_fpregs();
409
- preempt_enable();
381
+ return;
410382 }
383
+
384
+ fpregs_lock();
385
+
386
+ if (user_only) {
387
+ if (!fpregs_state_valid(fpu, smp_processor_id()) &&
388
+ xfeatures_mask_supervisor())
389
+ copy_kernel_to_xregs(&fpu->state.xsave,
390
+ xfeatures_mask_supervisor());
391
+ copy_init_fpstate_to_fpregs(xfeatures_mask_user());
392
+ } else {
393
+ copy_init_fpstate_to_fpregs(xfeatures_mask_all);
394
+ }
395
+
396
+ fpregs_mark_activate();
397
+ fpregs_unlock();
411398 }
412399
400
+void fpu__clear_user_states(struct fpu *fpu)
401
+{
402
+ fpu__clear(fpu, true);
403
+}
404
+
405
+void fpu__clear_all(struct fpu *fpu)
406
+{
407
+ fpu__clear(fpu, false);
408
+}
409
+
410
+/*
411
+ * Load FPU context before returning to userspace.
412
+ */
413
+void switch_fpu_return(void)
414
+{
415
+ if (!static_cpu_has(X86_FEATURE_FPU))
416
+ return;
417
+
418
+ __fpregs_load_activate();
419
+}
420
+EXPORT_SYMBOL_GPL(switch_fpu_return);
421
+
422
+#ifdef CONFIG_X86_DEBUG_FPU
423
+/*
424
+ * If current FPU state according to its tracking (loaded FPU context on this
425
+ * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
426
+ * loaded on return to userland.
427
+ */
428
+void fpregs_assert_state_consistent(void)
429
+{
430
+ struct fpu *fpu = &current->thread.fpu;
431
+
432
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
433
+ return;
434
+
435
+ WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
436
+}
437
+EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
438
+#endif
439
+
440
+void fpregs_mark_activate(void)
441
+{
442
+ struct fpu *fpu = &current->thread.fpu;
443
+
444
+ fpregs_activate(fpu);
445
+ fpu->last_cpu = smp_processor_id();
446
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
447
+}
448
+EXPORT_SYMBOL_GPL(fpregs_mark_activate);
449
+
413450 /*
414451 * x87 math exception handling:
415452 */