forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/arch/x86/kernel/fpu/core.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 1994 Linus Torvalds
34 *
....@@ -42,18 +43,6 @@
4243 */
4344 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
4445
45
-static void kernel_fpu_disable(void)
46
-{
47
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
48
- this_cpu_write(in_kernel_fpu, true);
49
-}
50
-
51
-static void kernel_fpu_enable(void)
52
-{
53
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
54
- this_cpu_write(in_kernel_fpu, false);
55
-}
56
-
5746 static bool kernel_fpu_disabled(void)
5847 {
5948 return this_cpu_read(in_kernel_fpu);
....@@ -93,60 +82,82 @@
9382 }
9483 EXPORT_SYMBOL(irq_fpu_usable);
9584
96
-static void __kernel_fpu_begin(void)
85
+/*
86
+ * These must be called with preempt disabled. Returns
87
+ * 'true' if the FPU state is still intact and we can
88
+ * keep registers active.
89
+ *
90
+ * The legacy FNSAVE instruction cleared all FPU state
91
+ * unconditionally, so registers are essentially destroyed.
92
+ * Modern FPU state can be kept in registers, if there are
93
+ * no pending FP exceptions.
94
+ */
95
+int copy_fpregs_to_fpstate(struct fpu *fpu)
9796 {
98
- struct fpu *fpu = &current->thread.fpu;
97
+ if (likely(use_xsave())) {
98
+ copy_xregs_to_kernel(&fpu->state.xsave);
99
+
100
+ /*
101
+ * AVX512 state is tracked here because its use is
102
+ * known to slow the max clock speed of the core.
103
+ */
104
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
105
+ fpu->avx512_timestamp = jiffies;
106
+ return 1;
107
+ }
108
+
109
+ if (likely(use_fxsr())) {
110
+ copy_fxregs_to_kernel(fpu);
111
+ return 1;
112
+ }
113
+
114
+ /*
115
+ * Legacy FPU register saving, FNSAVE always clears FPU registers,
116
+ * so we have to mark them inactive:
117
+ */
118
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
119
+
120
+ return 0;
121
+}
122
+EXPORT_SYMBOL(copy_fpregs_to_fpstate);
123
+
124
+void kernel_fpu_begin_mask(unsigned int kfpu_mask)
125
+{
126
+ preempt_disable();
99127
100128 WARN_ON_FPU(!irq_fpu_usable());
129
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
101130
102
- kernel_fpu_disable();
131
+ this_cpu_write(in_kernel_fpu, true);
103132
104
- if (fpu->initialized) {
133
+ if (!(current->flags & PF_KTHREAD) &&
134
+ !test_thread_flag(TIF_NEED_FPU_LOAD)) {
135
+ set_thread_flag(TIF_NEED_FPU_LOAD);
105136 /*
106137 * Ignore return value -- we don't care if reg state
107138 * is clobbered.
108139 */
109
- copy_fpregs_to_fpstate(fpu);
110
- } else {
111
- __cpu_invalidate_fpregs_state();
140
+ copy_fpregs_to_fpstate(&current->thread.fpu);
112141 }
142
+ __cpu_invalidate_fpregs_state();
143
+
144
+ /* Put sane initial values into the control registers. */
145
+ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
146
+ ldmxcsr(MXCSR_DEFAULT);
147
+
148
+ if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
149
+ asm volatile ("fninit");
113150 }
114
-
115
-static void __kernel_fpu_end(void)
116
-{
117
- struct fpu *fpu = &current->thread.fpu;
118
-
119
- if (fpu->initialized)
120
- copy_kernel_to_fpregs(&fpu->state);
121
-
122
- kernel_fpu_enable();
123
-}
124
-
125
-void kernel_fpu_begin(void)
126
-{
127
- preempt_disable();
128
- __kernel_fpu_begin();
129
-}
130
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
151
+EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
131152
132153 void kernel_fpu_end(void)
133154 {
134
- __kernel_fpu_end();
155
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
156
+
157
+ this_cpu_write(in_kernel_fpu, false);
135158 preempt_enable();
136159 }
137160 EXPORT_SYMBOL_GPL(kernel_fpu_end);
138
-
139
-void kernel_fpu_resched(void)
140
-{
141
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
142
-
143
- if (should_resched(PREEMPT_OFFSET)) {
144
- kernel_fpu_end();
145
- cond_resched();
146
- kernel_fpu_begin();
147
- }
148
-}
149
-EXPORT_SYMBOL_GPL(kernel_fpu_resched);
150161
151162 /*
152163 * Save the FPU state (mark it for reload if necessary):
....@@ -157,17 +168,18 @@
157168 {
158169 WARN_ON_FPU(fpu != &current->thread.fpu);
159170
160
- preempt_disable();
171
+ fpregs_lock();
161172 trace_x86_fpu_before_save(fpu);
162
- if (fpu->initialized) {
173
+
174
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
163175 if (!copy_fpregs_to_fpstate(fpu)) {
164176 copy_kernel_to_fpregs(&fpu->state);
165177 }
166178 }
179
+
167180 trace_x86_fpu_after_save(fpu);
168
- preempt_enable();
181
+ fpregs_unlock();
169182 }
170
-EXPORT_SYMBOL_GPL(fpu__save);
171183
172184 /*
173185 * Legacy x87 fpstate state init:
....@@ -198,11 +210,14 @@
198210 }
199211 EXPORT_SYMBOL_GPL(fpstate_init);
200212
201
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
213
+int fpu__copy(struct task_struct *dst, struct task_struct *src)
202214 {
215
+ struct fpu *dst_fpu = &dst->thread.fpu;
216
+ struct fpu *src_fpu = &src->thread.fpu;
217
+
203218 dst_fpu->last_cpu = -1;
204219
205
- if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
220
+ if (!static_cpu_has(X86_FEATURE_FPU))
206221 return 0;
207222
208223 WARN_ON_FPU(src_fpu != &current->thread.fpu);
....@@ -214,16 +229,23 @@
214229 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
215230
216231 /*
217
- * Save current FPU registers directly into the child
218
- * FPU context, without any memory-to-memory copying.
232
+ * If the FPU registers are not current just memcpy() the state.
233
+ * Otherwise save current FPU registers directly into the child's FPU
234
+ * context, without any memory-to-memory copying.
219235 *
220236 * ( The function 'fails' in the FNSAVE case, which destroys
221
- * register contents so we have to copy them back. )
237
+ * register contents so we have to load them back. )
222238 */
223
- if (!copy_fpregs_to_fpstate(dst_fpu)) {
224
- memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
225
- copy_kernel_to_fpregs(&src_fpu->state);
226
- }
239
+ fpregs_lock();
240
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
241
+ memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
242
+
243
+ else if (!copy_fpregs_to_fpstate(dst_fpu))
244
+ copy_kernel_to_fpregs(&dst_fpu->state);
245
+
246
+ fpregs_unlock();
247
+
248
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
227249
228250 trace_x86_fpu_copy_src(src_fpu);
229251 trace_x86_fpu_copy_dst(dst_fpu);
....@@ -235,20 +257,14 @@
235257 * Activate the current task's in-memory FPU context,
236258 * if it has not been used before:
237259 */
238
-void fpu__initialize(struct fpu *fpu)
260
+static void fpu__initialize(struct fpu *fpu)
239261 {
240262 WARN_ON_FPU(fpu != &current->thread.fpu);
241263
242
- if (!fpu->initialized) {
243
- fpstate_init(&fpu->state);
244
- trace_x86_fpu_init_state(fpu);
245
-
246
- trace_x86_fpu_activate_state(fpu);
247
- /* Safe to do for the current task: */
248
- fpu->initialized = 1;
249
- }
264
+ set_thread_flag(TIF_NEED_FPU_LOAD);
265
+ fpstate_init(&fpu->state);
266
+ trace_x86_fpu_init_state(fpu);
250267 }
251
-EXPORT_SYMBOL_GPL(fpu__initialize);
252268
253269 /*
254270 * This function must be called before we read a task's fpstate.
....@@ -260,32 +276,20 @@
260276 *
261277 * - or it's called for stopped tasks (ptrace), in which case the
262278 * registers were already saved by the context-switch code when
263
- * the task scheduled out - we only have to initialize the registers
264
- * if they've never been initialized.
279
+ * the task scheduled out.
265280 *
266281 * If the task has used the FPU before then save it.
267282 */
268283 void fpu__prepare_read(struct fpu *fpu)
269284 {
270
- if (fpu == &current->thread.fpu) {
285
+ if (fpu == &current->thread.fpu)
271286 fpu__save(fpu);
272
- } else {
273
- if (!fpu->initialized) {
274
- fpstate_init(&fpu->state);
275
- trace_x86_fpu_init_state(fpu);
276
-
277
- trace_x86_fpu_activate_state(fpu);
278
- /* Safe to do for current and for stopped child tasks: */
279
- fpu->initialized = 1;
280
- }
281
- }
282287 }
283288
284289 /*
285290 * This function must be called before we write a task's fpstate.
286291 *
287
- * If the task has used the FPU before then invalidate any cached FPU registers.
288
- * If the task has not used the FPU before then initialize its fpstate.
292
+ * Invalidate any cached FPU registers.
289293 *
290294 * After this function call, after registers in the fpstate are
291295 * modified and the child task has woken up, the child task will
....@@ -302,42 +306,9 @@
302306 */
303307 WARN_ON_FPU(fpu == &current->thread.fpu);
304308
305
- if (fpu->initialized) {
306
- /* Invalidate any cached state: */
307
- __fpu_invalidate_fpregs_state(fpu);
308
- } else {
309
- fpstate_init(&fpu->state);
310
- trace_x86_fpu_init_state(fpu);
311
-
312
- trace_x86_fpu_activate_state(fpu);
313
- /* Safe to do for stopped child tasks: */
314
- fpu->initialized = 1;
315
- }
309
+ /* Invalidate any cached state: */
310
+ __fpu_invalidate_fpregs_state(fpu);
316311 }
317
-
318
-/*
319
- * 'fpu__restore()' is called to copy FPU registers from
320
- * the FPU fpstate to the live hw registers and to activate
321
- * access to the hardware registers, so that FPU instructions
322
- * can be used afterwards.
323
- *
324
- * Must be called with kernel preemption disabled (for example
325
- * with local interrupts disabled, as it is in the case of
326
- * do_device_not_available()).
327
- */
328
-void fpu__restore(struct fpu *fpu)
329
-{
330
- fpu__initialize(fpu);
331
-
332
- /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
333
- kernel_fpu_disable();
334
- trace_x86_fpu_before_restore(fpu);
335
- fpregs_activate(fpu);
336
- copy_kernel_to_fpregs(&fpu->state);
337
- trace_x86_fpu_after_restore(fpu);
338
- kernel_fpu_enable();
339
-}
340
-EXPORT_SYMBOL_GPL(fpu__restore);
341312
342313 /*
343314 * Drops current FPU state: deactivates the fpregs and
....@@ -353,16 +324,12 @@
353324 preempt_disable();
354325
355326 if (fpu == &current->thread.fpu) {
356
- if (fpu->initialized) {
357
- /* Ignore delayed exceptions from user space */
358
- asm volatile("1: fwait\n"
359
- "2:\n"
360
- _ASM_EXTABLE(1b, 2b));
361
- fpregs_deactivate(fpu);
362
- }
327
+ /* Ignore delayed exceptions from user space */
328
+ asm volatile("1: fwait\n"
329
+ "2:\n"
330
+ _ASM_EXTABLE(1b, 2b));
331
+ fpregs_deactivate(fpu);
363332 }
364
-
365
- fpu->initialized = 0;
366333
367334 trace_x86_fpu_dropped(fpu);
368335
....@@ -370,13 +337,13 @@
370337 }
371338
372339 /*
373
- * Clear FPU registers by setting them up from
374
- * the init fpstate:
340
+ * Clear FPU registers by setting them up from the init fpstate.
341
+ * Caller must do fpregs_[un]lock() around it.
375342 */
376
-static inline void copy_init_fpstate_to_fpregs(void)
343
+static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
377344 {
378345 if (use_xsave())
379
- copy_kernel_to_xregs(&init_fpstate.xsave, -1);
346
+ copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
380347 else if (static_cpu_has(X86_FEATURE_FXSR))
381348 copy_kernel_to_fxregs(&init_fpstate.fxsave);
382349 else
....@@ -392,24 +359,82 @@
392359 * Called by sys_execve(), by the signal handler code and by various
393360 * error paths.
394361 */
395
-void fpu__clear(struct fpu *fpu)
362
+static void fpu__clear(struct fpu *fpu, bool user_only)
396363 {
397
- WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
364
+ WARN_ON_FPU(fpu != &current->thread.fpu);
398365
399
- fpu__drop(fpu);
400
-
401
- /*
402
- * Make sure fpstate is cleared and initialized.
403
- */
404
- if (static_cpu_has(X86_FEATURE_FPU)) {
405
- preempt_disable();
366
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
367
+ fpu__drop(fpu);
406368 fpu__initialize(fpu);
407
- user_fpu_begin();
408
- copy_init_fpstate_to_fpregs();
409
- preempt_enable();
369
+ return;
410370 }
371
+
372
+ fpregs_lock();
373
+
374
+ if (user_only) {
375
+ if (!fpregs_state_valid(fpu, smp_processor_id()) &&
376
+ xfeatures_mask_supervisor())
377
+ copy_kernel_to_xregs(&fpu->state.xsave,
378
+ xfeatures_mask_supervisor());
379
+ copy_init_fpstate_to_fpregs(xfeatures_mask_user());
380
+ } else {
381
+ copy_init_fpstate_to_fpregs(xfeatures_mask_all);
382
+ }
383
+
384
+ fpregs_mark_activate();
385
+ fpregs_unlock();
411386 }
412387
388
+void fpu__clear_user_states(struct fpu *fpu)
389
+{
390
+ fpu__clear(fpu, true);
391
+}
392
+
393
+void fpu__clear_all(struct fpu *fpu)
394
+{
395
+ fpu__clear(fpu, false);
396
+}
397
+
398
+/*
399
+ * Load FPU context before returning to userspace.
400
+ */
401
+void switch_fpu_return(void)
402
+{
403
+ if (!static_cpu_has(X86_FEATURE_FPU))
404
+ return;
405
+
406
+ __fpregs_load_activate();
407
+}
408
+EXPORT_SYMBOL_GPL(switch_fpu_return);
409
+
410
+#ifdef CONFIG_X86_DEBUG_FPU
411
+/*
412
+ * If current FPU state according to its tracking (loaded FPU context on this
413
+ * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
414
+ * loaded on return to userland.
415
+ */
416
+void fpregs_assert_state_consistent(void)
417
+{
418
+ struct fpu *fpu = &current->thread.fpu;
419
+
420
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
421
+ return;
422
+
423
+ WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
424
+}
425
+EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
426
+#endif
427
+
428
+void fpregs_mark_activate(void)
429
+{
430
+ struct fpu *fpu = &current->thread.fpu;
431
+
432
+ fpregs_activate(fpu);
433
+ fpu->last_cpu = smp_processor_id();
434
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
435
+}
436
+EXPORT_SYMBOL_GPL(fpregs_mark_activate);
437
+
413438 /*
414439 * x87 math exception handling:
415440 */