forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/arch/x86/kernel/fpu/core.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 1994 Linus Torvalds
34 *
....@@ -42,18 +43,6 @@
4243 */
4344 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
4445
45
-static void kernel_fpu_disable(void)
46
-{
47
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
48
- this_cpu_write(in_kernel_fpu, true);
49
-}
50
-
51
-static void kernel_fpu_enable(void)
52
-{
53
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
54
- this_cpu_write(in_kernel_fpu, false);
55
-}
56
-
5746 static bool kernel_fpu_disabled(void)
5847 {
5948 return this_cpu_read(in_kernel_fpu);
....@@ -93,45 +82,79 @@
9382 }
9483 EXPORT_SYMBOL(irq_fpu_usable);
9584
96
-static void __kernel_fpu_begin(void)
85
+/*
86
+ * These must be called with preempt disabled. Returns
87
+ * 'true' if the FPU state is still intact and we can
88
+ * keep registers active.
89
+ *
90
+ * The legacy FNSAVE instruction cleared all FPU state
91
+ * unconditionally, so registers are essentially destroyed.
92
+ * Modern FPU state can be kept in registers, if there are
93
+ * no pending FP exceptions.
94
+ */
95
+int copy_fpregs_to_fpstate(struct fpu *fpu)
9796 {
98
- struct fpu *fpu = &current->thread.fpu;
97
+ if (likely(use_xsave())) {
98
+ copy_xregs_to_kernel(&fpu->state.xsave);
99
+
100
+ /*
101
+ * AVX512 state is tracked here because its use is
102
+ * known to slow the max clock speed of the core.
103
+ */
104
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
105
+ fpu->avx512_timestamp = jiffies;
106
+ return 1;
107
+ }
108
+
109
+ if (likely(use_fxsr())) {
110
+ copy_fxregs_to_kernel(fpu);
111
+ return 1;
112
+ }
113
+
114
+ /*
115
+ * Legacy FPU register saving, FNSAVE always clears FPU registers,
116
+ * so we have to mark them inactive:
117
+ */
118
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
119
+
120
+ return 0;
121
+}
122
+EXPORT_SYMBOL(copy_fpregs_to_fpstate);
123
+
124
+void kernel_fpu_begin_mask(unsigned int kfpu_mask)
125
+{
126
+ preempt_disable();
99127
100128 WARN_ON_FPU(!irq_fpu_usable());
129
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
101130
102
- kernel_fpu_disable();
131
+ this_cpu_write(in_kernel_fpu, true);
103132
104
- if (fpu->initialized) {
133
+ if (!(current->flags & PF_KTHREAD) &&
134
+ !test_thread_flag(TIF_NEED_FPU_LOAD)) {
135
+ set_thread_flag(TIF_NEED_FPU_LOAD);
105136 /*
106137 * Ignore return value -- we don't care if reg state
107138 * is clobbered.
108139 */
109
- copy_fpregs_to_fpstate(fpu);
110
- } else {
111
- __cpu_invalidate_fpregs_state();
140
+ copy_fpregs_to_fpstate(&current->thread.fpu);
112141 }
142
+ __cpu_invalidate_fpregs_state();
143
+
144
+ /* Put sane initial values into the control registers. */
145
+ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
146
+ ldmxcsr(MXCSR_DEFAULT);
147
+
148
+ if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
149
+ asm volatile ("fninit");
113150 }
114
-
115
-static void __kernel_fpu_end(void)
116
-{
117
- struct fpu *fpu = &current->thread.fpu;
118
-
119
- if (fpu->initialized)
120
- copy_kernel_to_fpregs(&fpu->state);
121
-
122
- kernel_fpu_enable();
123
-}
124
-
125
-void kernel_fpu_begin(void)
126
-{
127
- preempt_disable();
128
- __kernel_fpu_begin();
129
-}
130
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
151
+EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
131152
132153 void kernel_fpu_end(void)
133154 {
134
- __kernel_fpu_end();
155
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
156
+
157
+ this_cpu_write(in_kernel_fpu, false);
135158 preempt_enable();
136159 }
137160 EXPORT_SYMBOL_GPL(kernel_fpu_end);
....@@ -145,17 +168,18 @@
145168 {
146169 WARN_ON_FPU(fpu != &current->thread.fpu);
147170
148
- preempt_disable();
171
+ fpregs_lock();
149172 trace_x86_fpu_before_save(fpu);
150
- if (fpu->initialized) {
173
+
174
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
151175 if (!copy_fpregs_to_fpstate(fpu)) {
152176 copy_kernel_to_fpregs(&fpu->state);
153177 }
154178 }
179
+
155180 trace_x86_fpu_after_save(fpu);
156
- preempt_enable();
181
+ fpregs_unlock();
157182 }
158
-EXPORT_SYMBOL_GPL(fpu__save);
159183
160184 /*
161185 * Legacy x87 fpstate state init:
....@@ -186,11 +210,14 @@
186210 }
187211 EXPORT_SYMBOL_GPL(fpstate_init);
188212
189
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
213
+int fpu__copy(struct task_struct *dst, struct task_struct *src)
190214 {
215
+ struct fpu *dst_fpu = &dst->thread.fpu;
216
+ struct fpu *src_fpu = &src->thread.fpu;
217
+
191218 dst_fpu->last_cpu = -1;
192219
193
- if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
220
+ if (!static_cpu_has(X86_FEATURE_FPU))
194221 return 0;
195222
196223 WARN_ON_FPU(src_fpu != &current->thread.fpu);
....@@ -202,16 +229,23 @@
202229 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
203230
204231 /*
205
- * Save current FPU registers directly into the child
206
- * FPU context, without any memory-to-memory copying.
232
+ * If the FPU registers are not current just memcpy() the state.
233
+ * Otherwise save current FPU registers directly into the child's FPU
234
+ * context, without any memory-to-memory copying.
207235 *
208236 * ( The function 'fails' in the FNSAVE case, which destroys
209
- * register contents so we have to copy them back. )
237
+ * register contents so we have to load them back. )
210238 */
211
- if (!copy_fpregs_to_fpstate(dst_fpu)) {
212
- memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
213
- copy_kernel_to_fpregs(&src_fpu->state);
214
- }
239
+ fpregs_lock();
240
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
241
+ memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
242
+
243
+ else if (!copy_fpregs_to_fpstate(dst_fpu))
244
+ copy_kernel_to_fpregs(&dst_fpu->state);
245
+
246
+ fpregs_unlock();
247
+
248
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
215249
216250 trace_x86_fpu_copy_src(src_fpu);
217251 trace_x86_fpu_copy_dst(dst_fpu);
....@@ -223,20 +257,14 @@
223257 * Activate the current task's in-memory FPU context,
224258 * if it has not been used before:
225259 */
226
-void fpu__initialize(struct fpu *fpu)
260
+static void fpu__initialize(struct fpu *fpu)
227261 {
228262 WARN_ON_FPU(fpu != &current->thread.fpu);
229263
230
- if (!fpu->initialized) {
231
- fpstate_init(&fpu->state);
232
- trace_x86_fpu_init_state(fpu);
233
-
234
- trace_x86_fpu_activate_state(fpu);
235
- /* Safe to do for the current task: */
236
- fpu->initialized = 1;
237
- }
264
+ set_thread_flag(TIF_NEED_FPU_LOAD);
265
+ fpstate_init(&fpu->state);
266
+ trace_x86_fpu_init_state(fpu);
238267 }
239
-EXPORT_SYMBOL_GPL(fpu__initialize);
240268
241269 /*
242270 * This function must be called before we read a task's fpstate.
....@@ -248,32 +276,20 @@
248276 *
249277 * - or it's called for stopped tasks (ptrace), in which case the
250278 * registers were already saved by the context-switch code when
251
- * the task scheduled out - we only have to initialize the registers
252
- * if they've never been initialized.
279
+ * the task scheduled out.
253280 *
254281 * If the task has used the FPU before then save it.
255282 */
256283 void fpu__prepare_read(struct fpu *fpu)
257284 {
258
- if (fpu == &current->thread.fpu) {
285
+ if (fpu == &current->thread.fpu)
259286 fpu__save(fpu);
260
- } else {
261
- if (!fpu->initialized) {
262
- fpstate_init(&fpu->state);
263
- trace_x86_fpu_init_state(fpu);
264
-
265
- trace_x86_fpu_activate_state(fpu);
266
- /* Safe to do for current and for stopped child tasks: */
267
- fpu->initialized = 1;
268
- }
269
- }
270287 }
271288
272289 /*
273290 * This function must be called before we write a task's fpstate.
274291 *
275
- * If the task has used the FPU before then invalidate any cached FPU registers.
276
- * If the task has not used the FPU before then initialize its fpstate.
292
+ * Invalidate any cached FPU registers.
277293 *
278294 * After this function call, after registers in the fpstate are
279295 * modified and the child task has woken up, the child task will
....@@ -290,42 +306,9 @@
290306 */
291307 WARN_ON_FPU(fpu == &current->thread.fpu);
292308
293
- if (fpu->initialized) {
294
- /* Invalidate any cached state: */
295
- __fpu_invalidate_fpregs_state(fpu);
296
- } else {
297
- fpstate_init(&fpu->state);
298
- trace_x86_fpu_init_state(fpu);
299
-
300
- trace_x86_fpu_activate_state(fpu);
301
- /* Safe to do for stopped child tasks: */
302
- fpu->initialized = 1;
303
- }
309
+ /* Invalidate any cached state: */
310
+ __fpu_invalidate_fpregs_state(fpu);
304311 }
305
-
306
-/*
307
- * 'fpu__restore()' is called to copy FPU registers from
308
- * the FPU fpstate to the live hw registers and to activate
309
- * access to the hardware registers, so that FPU instructions
310
- * can be used afterwards.
311
- *
312
- * Must be called with kernel preemption disabled (for example
313
- * with local interrupts disabled, as it is in the case of
314
- * do_device_not_available()).
315
- */
316
-void fpu__restore(struct fpu *fpu)
317
-{
318
- fpu__initialize(fpu);
319
-
320
- /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
321
- kernel_fpu_disable();
322
- trace_x86_fpu_before_restore(fpu);
323
- fpregs_activate(fpu);
324
- copy_kernel_to_fpregs(&fpu->state);
325
- trace_x86_fpu_after_restore(fpu);
326
- kernel_fpu_enable();
327
-}
328
-EXPORT_SYMBOL_GPL(fpu__restore);
329312
330313 /*
331314 * Drops current FPU state: deactivates the fpregs and
....@@ -341,16 +324,12 @@
341324 preempt_disable();
342325
343326 if (fpu == &current->thread.fpu) {
344
- if (fpu->initialized) {
345
- /* Ignore delayed exceptions from user space */
346
- asm volatile("1: fwait\n"
347
- "2:\n"
348
- _ASM_EXTABLE(1b, 2b));
349
- fpregs_deactivate(fpu);
350
- }
327
+ /* Ignore delayed exceptions from user space */
328
+ asm volatile("1: fwait\n"
329
+ "2:\n"
330
+ _ASM_EXTABLE(1b, 2b));
331
+ fpregs_deactivate(fpu);
351332 }
352
-
353
- fpu->initialized = 0;
354333
355334 trace_x86_fpu_dropped(fpu);
356335
....@@ -358,13 +337,13 @@
358337 }
359338
360339 /*
361
- * Clear FPU registers by setting them up from
362
- * the init fpstate:
340
+ * Clear FPU registers by setting them up from the init fpstate.
341
+ * Caller must do fpregs_[un]lock() around it.
363342 */
364
-static inline void copy_init_fpstate_to_fpregs(void)
343
+static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
365344 {
366345 if (use_xsave())
367
- copy_kernel_to_xregs(&init_fpstate.xsave, -1);
346
+ copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
368347 else if (static_cpu_has(X86_FEATURE_FXSR))
369348 copy_kernel_to_fxregs(&init_fpstate.fxsave);
370349 else
....@@ -380,24 +359,82 @@
380359 * Called by sys_execve(), by the signal handler code and by various
381360 * error paths.
382361 */
383
-void fpu__clear(struct fpu *fpu)
362
+static void fpu__clear(struct fpu *fpu, bool user_only)
384363 {
385
- WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
364
+ WARN_ON_FPU(fpu != &current->thread.fpu);
386365
387
- fpu__drop(fpu);
388
-
389
- /*
390
- * Make sure fpstate is cleared and initialized.
391
- */
392
- if (static_cpu_has(X86_FEATURE_FPU)) {
393
- preempt_disable();
366
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
367
+ fpu__drop(fpu);
394368 fpu__initialize(fpu);
395
- user_fpu_begin();
396
- copy_init_fpstate_to_fpregs();
397
- preempt_enable();
369
+ return;
398370 }
371
+
372
+ fpregs_lock();
373
+
374
+ if (user_only) {
375
+ if (!fpregs_state_valid(fpu, smp_processor_id()) &&
376
+ xfeatures_mask_supervisor())
377
+ copy_kernel_to_xregs(&fpu->state.xsave,
378
+ xfeatures_mask_supervisor());
379
+ copy_init_fpstate_to_fpregs(xfeatures_mask_user());
380
+ } else {
381
+ copy_init_fpstate_to_fpregs(xfeatures_mask_all);
382
+ }
383
+
384
+ fpregs_mark_activate();
385
+ fpregs_unlock();
399386 }
400387
388
+void fpu__clear_user_states(struct fpu *fpu)
389
+{
390
+ fpu__clear(fpu, true);
391
+}
392
+
393
+void fpu__clear_all(struct fpu *fpu)
394
+{
395
+ fpu__clear(fpu, false);
396
+}
397
+
398
+/*
399
+ * Load FPU context before returning to userspace.
400
+ */
401
+void switch_fpu_return(void)
402
+{
403
+ if (!static_cpu_has(X86_FEATURE_FPU))
404
+ return;
405
+
406
+ __fpregs_load_activate();
407
+}
408
+EXPORT_SYMBOL_GPL(switch_fpu_return);
409
+
410
+#ifdef CONFIG_X86_DEBUG_FPU
411
+/*
412
+ * If current FPU state according to its tracking (loaded FPU context on this
413
+ * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
414
+ * loaded on return to userland.
415
+ */
416
+void fpregs_assert_state_consistent(void)
417
+{
418
+ struct fpu *fpu = &current->thread.fpu;
419
+
420
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
421
+ return;
422
+
423
+ WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
424
+}
425
+EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
426
+#endif
427
+
428
+void fpregs_mark_activate(void)
429
+{
430
+ struct fpu *fpu = &current->thread.fpu;
431
+
432
+ fpregs_activate(fpu);
433
+ fpu->last_cpu = smp_processor_id();
434
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
435
+}
436
+EXPORT_SYMBOL_GPL(fpregs_mark_activate);
437
+
401438 /*
402439 * x87 math exception handling:
403440 */