forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/arch/x86/kernel/fpu/core.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 1994 Linus Torvalds
34 *
....@@ -42,18 +43,6 @@
4243 */
4344 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
4445
45
-static void kernel_fpu_disable(void)
46
-{
47
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
48
- this_cpu_write(in_kernel_fpu, true);
49
-}
50
-
51
-static void kernel_fpu_enable(void)
52
-{
53
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
54
- this_cpu_write(in_kernel_fpu, false);
55
-}
56
-
5746 static bool kernel_fpu_disabled(void)
5847 {
5948 return this_cpu_read(in_kernel_fpu);
....@@ -93,48 +82,94 @@
9382 }
9483 EXPORT_SYMBOL(irq_fpu_usable);
9584
96
-static void __kernel_fpu_begin(void)
85
+/*
86
+ * These must be called with preempt disabled. Returns
87
+ * 'true' if the FPU state is still intact and we can
88
+ * keep registers active.
89
+ *
90
+ * The legacy FNSAVE instruction cleared all FPU state
91
+ * unconditionally, so registers are essentially destroyed.
92
+ * Modern FPU state can be kept in registers, if there are
93
+ * no pending FP exceptions.
94
+ */
95
+int copy_fpregs_to_fpstate(struct fpu *fpu)
9796 {
98
- struct fpu *fpu = &current->thread.fpu;
97
+ if (likely(use_xsave())) {
98
+ copy_xregs_to_kernel(&fpu->state.xsave);
99
+
100
+ /*
101
+ * AVX512 state is tracked here because its use is
102
+ * known to slow the max clock speed of the core.
103
+ */
104
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
105
+ fpu->avx512_timestamp = jiffies;
106
+ return 1;
107
+ }
108
+
109
+ if (likely(use_fxsr())) {
110
+ copy_fxregs_to_kernel(fpu);
111
+ return 1;
112
+ }
113
+
114
+ /*
115
+ * Legacy FPU register saving, FNSAVE always clears FPU registers,
116
+ * so we have to mark them inactive:
117
+ */
118
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
119
+
120
+ return 0;
121
+}
122
+EXPORT_SYMBOL(copy_fpregs_to_fpstate);
123
+
124
+void kernel_fpu_begin_mask(unsigned int kfpu_mask)
125
+{
126
+ preempt_disable();
99127
100128 WARN_ON_FPU(!irq_fpu_usable());
129
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
101130
102
- kernel_fpu_disable();
131
+ this_cpu_write(in_kernel_fpu, true);
103132
104
- if (fpu->initialized) {
133
+ if (!(current->flags & PF_KTHREAD) &&
134
+ !test_thread_flag(TIF_NEED_FPU_LOAD)) {
135
+ set_thread_flag(TIF_NEED_FPU_LOAD);
105136 /*
106137 * Ignore return value -- we don't care if reg state
107138 * is clobbered.
108139 */
109
- copy_fpregs_to_fpstate(fpu);
110
- } else {
111
- __cpu_invalidate_fpregs_state();
140
+ copy_fpregs_to_fpstate(&current->thread.fpu);
112141 }
142
+ __cpu_invalidate_fpregs_state();
143
+
144
+ /* Put sane initial values into the control registers. */
145
+ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
146
+ ldmxcsr(MXCSR_DEFAULT);
147
+
148
+ if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
149
+ asm volatile ("fninit");
113150 }
114
-
115
-static void __kernel_fpu_end(void)
116
-{
117
- struct fpu *fpu = &current->thread.fpu;
118
-
119
- if (fpu->initialized)
120
- copy_kernel_to_fpregs(&fpu->state);
121
-
122
- kernel_fpu_enable();
123
-}
124
-
125
-void kernel_fpu_begin(void)
126
-{
127
- preempt_disable();
128
- __kernel_fpu_begin();
129
-}
130
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
151
+EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
131152
132153 void kernel_fpu_end(void)
133154 {
134
- __kernel_fpu_end();
155
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
156
+
157
+ this_cpu_write(in_kernel_fpu, false);
135158 preempt_enable();
136159 }
137160 EXPORT_SYMBOL_GPL(kernel_fpu_end);
161
+
162
+void kernel_fpu_resched(void)
163
+{
164
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
165
+
166
+ if (should_resched(PREEMPT_OFFSET)) {
167
+ kernel_fpu_end();
168
+ cond_resched();
169
+ kernel_fpu_begin();
170
+ }
171
+}
172
+EXPORT_SYMBOL_GPL(kernel_fpu_resched);
138173
139174 /*
140175 * Save the FPU state (mark it for reload if necessary):
....@@ -145,17 +180,18 @@
145180 {
146181 WARN_ON_FPU(fpu != &current->thread.fpu);
147182
148
- preempt_disable();
183
+ fpregs_lock();
149184 trace_x86_fpu_before_save(fpu);
150
- if (fpu->initialized) {
185
+
186
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
151187 if (!copy_fpregs_to_fpstate(fpu)) {
152188 copy_kernel_to_fpregs(&fpu->state);
153189 }
154190 }
191
+
155192 trace_x86_fpu_after_save(fpu);
156
- preempt_enable();
193
+ fpregs_unlock();
157194 }
158
-EXPORT_SYMBOL_GPL(fpu__save);
159195
160196 /*
161197 * Legacy x87 fpstate state init:
....@@ -186,11 +222,14 @@
186222 }
187223 EXPORT_SYMBOL_GPL(fpstate_init);
188224
189
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
225
+int fpu__copy(struct task_struct *dst, struct task_struct *src)
190226 {
227
+ struct fpu *dst_fpu = &dst->thread.fpu;
228
+ struct fpu *src_fpu = &src->thread.fpu;
229
+
191230 dst_fpu->last_cpu = -1;
192231
193
- if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
232
+ if (!static_cpu_has(X86_FEATURE_FPU))
194233 return 0;
195234
196235 WARN_ON_FPU(src_fpu != &current->thread.fpu);
....@@ -202,16 +241,23 @@
202241 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
203242
204243 /*
205
- * Save current FPU registers directly into the child
206
- * FPU context, without any memory-to-memory copying.
244
+ * If the FPU registers are not current just memcpy() the state.
245
+ * Otherwise save current FPU registers directly into the child's FPU
246
+ * context, without any memory-to-memory copying.
207247 *
208248 * ( The function 'fails' in the FNSAVE case, which destroys
209
- * register contents so we have to copy them back. )
249
+ * register contents so we have to load them back. )
210250 */
211
- if (!copy_fpregs_to_fpstate(dst_fpu)) {
212
- memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
213
- copy_kernel_to_fpregs(&src_fpu->state);
214
- }
251
+ fpregs_lock();
252
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
253
+ memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
254
+
255
+ else if (!copy_fpregs_to_fpstate(dst_fpu))
256
+ copy_kernel_to_fpregs(&dst_fpu->state);
257
+
258
+ fpregs_unlock();
259
+
260
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
215261
216262 trace_x86_fpu_copy_src(src_fpu);
217263 trace_x86_fpu_copy_dst(dst_fpu);
....@@ -223,20 +269,14 @@
223269 * Activate the current task's in-memory FPU context,
224270 * if it has not been used before:
225271 */
226
-void fpu__initialize(struct fpu *fpu)
272
+static void fpu__initialize(struct fpu *fpu)
227273 {
228274 WARN_ON_FPU(fpu != &current->thread.fpu);
229275
230
- if (!fpu->initialized) {
231
- fpstate_init(&fpu->state);
232
- trace_x86_fpu_init_state(fpu);
233
-
234
- trace_x86_fpu_activate_state(fpu);
235
- /* Safe to do for the current task: */
236
- fpu->initialized = 1;
237
- }
276
+ set_thread_flag(TIF_NEED_FPU_LOAD);
277
+ fpstate_init(&fpu->state);
278
+ trace_x86_fpu_init_state(fpu);
238279 }
239
-EXPORT_SYMBOL_GPL(fpu__initialize);
240280
241281 /*
242282 * This function must be called before we read a task's fpstate.
....@@ -248,32 +288,20 @@
248288 *
249289 * - or it's called for stopped tasks (ptrace), in which case the
250290 * registers were already saved by the context-switch code when
251
- * the task scheduled out - we only have to initialize the registers
252
- * if they've never been initialized.
291
+ * the task scheduled out.
253292 *
254293 * If the task has used the FPU before then save it.
255294 */
256295 void fpu__prepare_read(struct fpu *fpu)
257296 {
258
- if (fpu == &current->thread.fpu) {
297
+ if (fpu == &current->thread.fpu)
259298 fpu__save(fpu);
260
- } else {
261
- if (!fpu->initialized) {
262
- fpstate_init(&fpu->state);
263
- trace_x86_fpu_init_state(fpu);
264
-
265
- trace_x86_fpu_activate_state(fpu);
266
- /* Safe to do for current and for stopped child tasks: */
267
- fpu->initialized = 1;
268
- }
269
- }
270299 }
271300
272301 /*
273302 * This function must be called before we write a task's fpstate.
274303 *
275
- * If the task has used the FPU before then invalidate any cached FPU registers.
276
- * If the task has not used the FPU before then initialize its fpstate.
304
+ * Invalidate any cached FPU registers.
277305 *
278306 * After this function call, after registers in the fpstate are
279307 * modified and the child task has woken up, the child task will
....@@ -290,42 +318,9 @@
290318 */
291319 WARN_ON_FPU(fpu == &current->thread.fpu);
292320
293
- if (fpu->initialized) {
294
- /* Invalidate any cached state: */
295
- __fpu_invalidate_fpregs_state(fpu);
296
- } else {
297
- fpstate_init(&fpu->state);
298
- trace_x86_fpu_init_state(fpu);
299
-
300
- trace_x86_fpu_activate_state(fpu);
301
- /* Safe to do for stopped child tasks: */
302
- fpu->initialized = 1;
303
- }
321
+ /* Invalidate any cached state: */
322
+ __fpu_invalidate_fpregs_state(fpu);
304323 }
305
-
306
-/*
307
- * 'fpu__restore()' is called to copy FPU registers from
308
- * the FPU fpstate to the live hw registers and to activate
309
- * access to the hardware registers, so that FPU instructions
310
- * can be used afterwards.
311
- *
312
- * Must be called with kernel preemption disabled (for example
313
- * with local interrupts disabled, as it is in the case of
314
- * do_device_not_available()).
315
- */
316
-void fpu__restore(struct fpu *fpu)
317
-{
318
- fpu__initialize(fpu);
319
-
320
- /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
321
- kernel_fpu_disable();
322
- trace_x86_fpu_before_restore(fpu);
323
- fpregs_activate(fpu);
324
- copy_kernel_to_fpregs(&fpu->state);
325
- trace_x86_fpu_after_restore(fpu);
326
- kernel_fpu_enable();
327
-}
328
-EXPORT_SYMBOL_GPL(fpu__restore);
329324
330325 /*
331326 * Drops current FPU state: deactivates the fpregs and
....@@ -341,16 +336,12 @@
341336 preempt_disable();
342337
343338 if (fpu == &current->thread.fpu) {
344
- if (fpu->initialized) {
345
- /* Ignore delayed exceptions from user space */
346
- asm volatile("1: fwait\n"
347
- "2:\n"
348
- _ASM_EXTABLE(1b, 2b));
349
- fpregs_deactivate(fpu);
350
- }
339
+ /* Ignore delayed exceptions from user space */
340
+ asm volatile("1: fwait\n"
341
+ "2:\n"
342
+ _ASM_EXTABLE(1b, 2b));
343
+ fpregs_deactivate(fpu);
351344 }
352
-
353
- fpu->initialized = 0;
354345
355346 trace_x86_fpu_dropped(fpu);
356347
....@@ -358,13 +349,13 @@
358349 }
359350
360351 /*
361
- * Clear FPU registers by setting them up from
362
- * the init fpstate:
352
+ * Clear FPU registers by setting them up from the init fpstate.
353
+ * Caller must do fpregs_[un]lock() around it.
363354 */
364
-static inline void copy_init_fpstate_to_fpregs(void)
355
+static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
365356 {
366357 if (use_xsave())
367
- copy_kernel_to_xregs(&init_fpstate.xsave, -1);
358
+ copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
368359 else if (static_cpu_has(X86_FEATURE_FXSR))
369360 copy_kernel_to_fxregs(&init_fpstate.fxsave);
370361 else
....@@ -380,24 +371,82 @@
380371 * Called by sys_execve(), by the signal handler code and by various
381372 * error paths.
382373 */
383
-void fpu__clear(struct fpu *fpu)
374
+static void fpu__clear(struct fpu *fpu, bool user_only)
384375 {
385
- WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
376
+ WARN_ON_FPU(fpu != &current->thread.fpu);
386377
387
- fpu__drop(fpu);
388
-
389
- /*
390
- * Make sure fpstate is cleared and initialized.
391
- */
392
- if (static_cpu_has(X86_FEATURE_FPU)) {
393
- preempt_disable();
378
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
379
+ fpu__drop(fpu);
394380 fpu__initialize(fpu);
395
- user_fpu_begin();
396
- copy_init_fpstate_to_fpregs();
397
- preempt_enable();
381
+ return;
398382 }
383
+
384
+ fpregs_lock();
385
+
386
+ if (user_only) {
387
+ if (!fpregs_state_valid(fpu, smp_processor_id()) &&
388
+ xfeatures_mask_supervisor())
389
+ copy_kernel_to_xregs(&fpu->state.xsave,
390
+ xfeatures_mask_supervisor());
391
+ copy_init_fpstate_to_fpregs(xfeatures_mask_user());
392
+ } else {
393
+ copy_init_fpstate_to_fpregs(xfeatures_mask_all);
394
+ }
395
+
396
+ fpregs_mark_activate();
397
+ fpregs_unlock();
399398 }
400399
400
+void fpu__clear_user_states(struct fpu *fpu)
401
+{
402
+ fpu__clear(fpu, true);
403
+}
404
+
405
+void fpu__clear_all(struct fpu *fpu)
406
+{
407
+ fpu__clear(fpu, false);
408
+}
409
+
410
+/*
411
+ * Load FPU context before returning to userspace.
412
+ */
413
+void switch_fpu_return(void)
414
+{
415
+ if (!static_cpu_has(X86_FEATURE_FPU))
416
+ return;
417
+
418
+ __fpregs_load_activate();
419
+}
420
+EXPORT_SYMBOL_GPL(switch_fpu_return);
421
+
422
+#ifdef CONFIG_X86_DEBUG_FPU
423
+/*
424
+ * If current FPU state according to its tracking (loaded FPU context on this
425
+ * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
426
+ * loaded on return to userland.
427
+ */
428
+void fpregs_assert_state_consistent(void)
429
+{
430
+ struct fpu *fpu = &current->thread.fpu;
431
+
432
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
433
+ return;
434
+
435
+ WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
436
+}
437
+EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
438
+#endif
439
+
440
+void fpregs_mark_activate(void)
441
+{
442
+ struct fpu *fpu = &current->thread.fpu;
443
+
444
+ fpregs_activate(fpu);
445
+ fpu->last_cpu = smp_processor_id();
446
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
447
+}
448
+EXPORT_SYMBOL_GPL(fpregs_mark_activate);
449
+
401450 /*
402451 * x87 math exception handling:
403452 */