hc
2024-05-10 37f49e37ab4cb5d0bc4c60eb5c6d4dd57db767bb
kernel/arch/powerpc/platforms/powernv/idle.c
....@@ -1,12 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * PowerNV cpuidle code
34 *
45 * Copyright 2015 IBM Corp.
5
- *
6
- * This program is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU General Public License
8
- * as published by the Free Software Foundation; either version
9
- * 2 of the License, or (at your option) any later version.
106 */
117
128 #include <linux/types.h>
....@@ -16,6 +12,7 @@
1612 #include <linux/device.h>
1713 #include <linux/cpu.h>
1814
15
+#include <asm/asm-prototypes.h>
1916 #include <asm/firmware.h>
2017 #include <asm/machdep.h>
2118 #include <asm/opal.h>
....@@ -48,10 +45,10 @@
4845 static bool default_stop_found;
4946
5047 /*
51
- * First deep stop state. Used to figure out when to save/restore
52
- * hypervisor context.
48
+ * First stop state levels when SPR and TB loss can occur.
5349 */
54
-u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
50
+static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
51
+static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
5552
5653 /*
5754 * psscr value and mask of the deepest stop idle state.
....@@ -61,6 +58,8 @@
6158 static u64 pnv_deepest_stop_psscr_mask;
6259 static u64 pnv_deepest_stop_flag;
6360 static bool deepest_stop_found;
61
+
62
+static unsigned long power7_offline_type;
6463
6564 static int pnv_save_sprs_for_deep_states(void)
6665 {
....@@ -72,12 +71,9 @@
7271 * all cpus at boot. Get these reg values of current cpu and use the
7372 * same across all cpus.
7473 */
75
- uint64_t lpcr_val = mfspr(SPRN_LPCR);
76
- uint64_t hid0_val = mfspr(SPRN_HID0);
77
- uint64_t hid1_val = mfspr(SPRN_HID1);
78
- uint64_t hid4_val = mfspr(SPRN_HID4);
79
- uint64_t hid5_val = mfspr(SPRN_HID5);
80
- uint64_t hmeer_val = mfspr(SPRN_HMEER);
74
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
75
+ uint64_t hid0_val = mfspr(SPRN_HID0);
76
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
8177 uint64_t msr_val = MSR_IDLE;
8278 uint64_t psscr_val = pnv_deepest_stop_psscr_val;
8379
....@@ -118,6 +114,9 @@
118114
119115 /* Only p8 needs to set extra HID regiters */
120116 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
117
+ uint64_t hid1_val = mfspr(SPRN_HID1);
118
+ uint64_t hid4_val = mfspr(SPRN_HID4);
119
+ uint64_t hid5_val = mfspr(SPRN_HID5);
121120
122121 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
123122 if (rc != 0)
....@@ -137,89 +136,6 @@
137136 return 0;
138137 }
139138
140
-static void pnv_alloc_idle_core_states(void)
141
-{
142
- int i, j;
143
- int nr_cores = cpu_nr_cores();
144
- u32 *core_idle_state;
145
-
146
- /*
147
- * core_idle_state - The lower 8 bits track the idle state of
148
- * each thread of the core.
149
- *
150
- * The most significant bit is the lock bit.
151
- *
152
- * Initially all the bits corresponding to threads_per_core
153
- * are set. They are cleared when the thread enters deep idle
154
- * state like sleep and winkle/stop.
155
- *
156
- * Initially the lock bit is cleared. The lock bit has 2
157
- * purposes:
158
- * a. While the first thread in the core waking up from
159
- * idle is restoring core state, it prevents other
160
- * threads in the core from switching to process
161
- * context.
162
- * b. While the last thread in the core is saving the
163
- * core state, it prevents a different thread from
164
- * waking up.
165
- */
166
- for (i = 0; i < nr_cores; i++) {
167
- int first_cpu = i * threads_per_core;
168
- int node = cpu_to_node(first_cpu);
169
- size_t paca_ptr_array_size;
170
-
171
- core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
172
- *core_idle_state = (1 << threads_per_core) - 1;
173
- paca_ptr_array_size = (threads_per_core *
174
- sizeof(struct paca_struct *));
175
-
176
- for (j = 0; j < threads_per_core; j++) {
177
- int cpu = first_cpu + j;
178
-
179
- paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
180
- paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
181
- paca_ptrs[cpu]->thread_mask = 1 << j;
182
- }
183
- }
184
-
185
- update_subcore_sibling_mask();
186
-
187
- if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
188
- int rc = pnv_save_sprs_for_deep_states();
189
-
190
- if (likely(!rc))
191
- return;
192
-
193
- /*
194
- * The stop-api is unable to restore hypervisor
195
- * resources on wakeup from platform idle states which
196
- * lose full context. So disable such states.
197
- */
198
- supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
199
- pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
200
- pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
201
-
202
- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
203
- (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
204
- /*
205
- * Use the default stop state for CPU-Hotplug
206
- * if available.
207
- */
208
- if (default_stop_found) {
209
- pnv_deepest_stop_psscr_val =
210
- pnv_default_stop_val;
211
- pnv_deepest_stop_psscr_mask =
212
- pnv_default_stop_mask;
213
- pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
214
- pnv_deepest_stop_psscr_val);
215
- } else { /* Fallback to snooze loop for CPU-Hotplug */
216
- deepest_stop_found = false;
217
- pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
218
- }
219
- }
220
- }
221
-}
222
-
223139 u32 pnv_get_supported_cpuidle_states(void)
224140 {
225141 return supported_cpuidle_states;
....@@ -237,6 +153,9 @@
237153 if (rc)
238154 *err = 1;
239155 }
156
+
157
+static bool power7_fastsleep_workaround_entry = true;
158
+static bool power7_fastsleep_workaround_exit = true;
240159
241160 /*
242161 * Used to store fastsleep workaround state
....@@ -269,21 +188,15 @@
269188 * fastsleep_workaround_applyonce = 1 implies
270189 * fastsleep workaround needs to be left in 'applied' state on all
271190 * the cores. Do this by-
272
- * 1. Patching out the call to 'undo' workaround in fastsleep exit path
273
- * 2. Sending ipi to all the cores which have at least one online thread
274
- * 3. Patching out the call to 'apply' workaround in fastsleep entry
275
- * path
191
+ * 1. Disable the 'undo' workaround in fastsleep exit path
192
+ * 2. Sendi IPIs to all the cores which have at least one online thread
193
+ * 3. Disable the 'apply' workaround in fastsleep entry path
194
+ *
276195 * There is no need to send ipi to cores which have all threads
277196 * offlined, as last thread of the core entering fastsleep or deeper
278197 * state would have applied workaround.
279198 */
280
- err = patch_instruction(
281
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
282
- PPC_INST_NOP);
283
- if (err) {
284
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
285
- goto fail;
286
- }
199
+ power7_fastsleep_workaround_exit = false;
287200
288201 get_online_cpus();
289202 primary_thread_mask = cpu_online_cores_map();
....@@ -296,13 +209,7 @@
296209 goto fail;
297210 }
298211
299
- err = patch_instruction(
300
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
301
- PPC_INST_NOP);
302
- if (err) {
303
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
304
- goto fail;
305
- }
212
+ power7_fastsleep_workaround_entry = false;
306213
307214 fastsleep_workaround_applyonce = 1;
308215
....@@ -315,31 +222,350 @@
315222 show_fastsleep_workaround_applyonce,
316223 store_fastsleep_workaround_applyonce);
317224
318
-static unsigned long __power7_idle_type(unsigned long type)
225
+static inline void atomic_start_thread_idle(void)
319226 {
227
+ int cpu = raw_smp_processor_id();
228
+ int first = cpu_first_thread_sibling(cpu);
229
+ int thread_nr = cpu_thread_in_core(cpu);
230
+ unsigned long *state = &paca_ptrs[first]->idle_state;
231
+
232
+ clear_bit(thread_nr, state);
233
+}
234
+
235
+static inline void atomic_stop_thread_idle(void)
236
+{
237
+ int cpu = raw_smp_processor_id();
238
+ int first = cpu_first_thread_sibling(cpu);
239
+ int thread_nr = cpu_thread_in_core(cpu);
240
+ unsigned long *state = &paca_ptrs[first]->idle_state;
241
+
242
+ set_bit(thread_nr, state);
243
+}
244
+
245
+static inline void atomic_lock_thread_idle(void)
246
+{
247
+ int cpu = raw_smp_processor_id();
248
+ int first = cpu_first_thread_sibling(cpu);
249
+ unsigned long *state = &paca_ptrs[first]->idle_state;
250
+
251
+ while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
252
+ barrier();
253
+}
254
+
255
+static inline void atomic_unlock_and_stop_thread_idle(void)
256
+{
257
+ int cpu = raw_smp_processor_id();
258
+ int first = cpu_first_thread_sibling(cpu);
259
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
260
+ unsigned long *state = &paca_ptrs[first]->idle_state;
261
+ u64 s = READ_ONCE(*state);
262
+ u64 new, tmp;
263
+
264
+ BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
265
+ BUG_ON(s & thread);
266
+
267
+again:
268
+ new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
269
+ tmp = cmpxchg(state, s, new);
270
+ if (unlikely(tmp != s)) {
271
+ s = tmp;
272
+ goto again;
273
+ }
274
+}
275
+
276
+static inline void atomic_unlock_thread_idle(void)
277
+{
278
+ int cpu = raw_smp_processor_id();
279
+ int first = cpu_first_thread_sibling(cpu);
280
+ unsigned long *state = &paca_ptrs[first]->idle_state;
281
+
282
+ BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
283
+ clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
284
+}
285
+
286
+/* P7 and P8 */
287
+struct p7_sprs {
288
+ /* per core */
289
+ u64 tscr;
290
+ u64 worc;
291
+
292
+ /* per subcore */
293
+ u64 sdr1;
294
+ u64 rpr;
295
+
296
+ /* per thread */
297
+ u64 lpcr;
298
+ u64 hfscr;
299
+ u64 fscr;
300
+ u64 purr;
301
+ u64 spurr;
302
+ u64 dscr;
303
+ u64 wort;
304
+
305
+ /* per thread SPRs that get lost in shallow states */
306
+ u64 amr;
307
+ u64 iamr;
308
+ u64 amor;
309
+ u64 uamor;
310
+};
311
+
312
+static unsigned long power7_idle_insn(unsigned long type)
313
+{
314
+ int cpu = raw_smp_processor_id();
315
+ int first = cpu_first_thread_sibling(cpu);
316
+ unsigned long *state = &paca_ptrs[first]->idle_state;
317
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
318
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
320319 unsigned long srr1;
320
+ bool full_winkle;
321
+ struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
322
+ bool sprs_saved = false;
323
+ int rc;
321324
322
- if (!prep_irq_for_idle_irqsoff())
323
- return 0;
325
+ if (unlikely(type != PNV_THREAD_NAP)) {
326
+ atomic_lock_thread_idle();
324327
325
- __ppc64_runlatch_off();
326
- srr1 = power7_idle_insn(type);
327
- __ppc64_runlatch_on();
328
+ BUG_ON(!(*state & thread));
329
+ *state &= ~thread;
328330
329
- fini_irq_for_idle_irqsoff();
331
+ if (power7_fastsleep_workaround_entry) {
332
+ if ((*state & core_thread_mask) == 0) {
333
+ rc = opal_config_cpu_idle_state(
334
+ OPAL_CONFIG_IDLE_FASTSLEEP,
335
+ OPAL_CONFIG_IDLE_APPLY);
336
+ BUG_ON(rc);
337
+ }
338
+ }
339
+
340
+ if (type == PNV_THREAD_WINKLE) {
341
+ sprs.tscr = mfspr(SPRN_TSCR);
342
+ sprs.worc = mfspr(SPRN_WORC);
343
+
344
+ sprs.sdr1 = mfspr(SPRN_SDR1);
345
+ sprs.rpr = mfspr(SPRN_RPR);
346
+
347
+ sprs.lpcr = mfspr(SPRN_LPCR);
348
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
349
+ sprs.hfscr = mfspr(SPRN_HFSCR);
350
+ sprs.fscr = mfspr(SPRN_FSCR);
351
+ }
352
+ sprs.purr = mfspr(SPRN_PURR);
353
+ sprs.spurr = mfspr(SPRN_SPURR);
354
+ sprs.dscr = mfspr(SPRN_DSCR);
355
+ sprs.wort = mfspr(SPRN_WORT);
356
+
357
+ sprs_saved = true;
358
+
359
+ /*
360
+ * Increment winkle counter and set all winkle bits if
361
+ * all threads are winkling. This allows wakeup side to
362
+ * distinguish between fast sleep and winkle state
363
+ * loss. Fast sleep still has to resync the timebase so
364
+ * this may not be a really big win.
365
+ */
366
+ *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
367
+ if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
368
+ >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
369
+ == threads_per_core)
370
+ *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
371
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
372
+ }
373
+
374
+ atomic_unlock_thread_idle();
375
+ }
376
+
377
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
378
+ sprs.amr = mfspr(SPRN_AMR);
379
+ sprs.iamr = mfspr(SPRN_IAMR);
380
+ sprs.amor = mfspr(SPRN_AMOR);
381
+ sprs.uamor = mfspr(SPRN_UAMOR);
382
+ }
383
+
384
+ local_paca->thread_idle_state = type;
385
+ srr1 = isa206_idle_insn_mayloss(type); /* go idle */
386
+ local_paca->thread_idle_state = PNV_THREAD_RUNNING;
387
+
388
+ WARN_ON_ONCE(!srr1);
389
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
390
+
391
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
392
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
393
+ /*
394
+ * We don't need an isync after the mtsprs here because
395
+ * the upcoming mtmsrd is execution synchronizing.
396
+ */
397
+ mtspr(SPRN_AMR, sprs.amr);
398
+ mtspr(SPRN_IAMR, sprs.iamr);
399
+ mtspr(SPRN_AMOR, sprs.amor);
400
+ mtspr(SPRN_UAMOR, sprs.uamor);
401
+ }
402
+ }
403
+
404
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
405
+ hmi_exception_realmode(NULL);
406
+
407
+ if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
408
+ if (unlikely(type != PNV_THREAD_NAP)) {
409
+ atomic_lock_thread_idle();
410
+ if (type == PNV_THREAD_WINKLE) {
411
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
412
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
413
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
414
+ }
415
+ atomic_unlock_and_stop_thread_idle();
416
+ }
417
+ return srr1;
418
+ }
419
+
420
+ /* HV state loss */
421
+ BUG_ON(type == PNV_THREAD_NAP);
422
+
423
+ atomic_lock_thread_idle();
424
+
425
+ full_winkle = false;
426
+ if (type == PNV_THREAD_WINKLE) {
427
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
428
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
429
+ if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
430
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
431
+ full_winkle = true;
432
+ BUG_ON(!sprs_saved);
433
+ }
434
+ }
435
+
436
+ WARN_ON(*state & thread);
437
+
438
+ if ((*state & core_thread_mask) != 0)
439
+ goto core_woken;
440
+
441
+ /* Per-core SPRs */
442
+ if (full_winkle) {
443
+ mtspr(SPRN_TSCR, sprs.tscr);
444
+ mtspr(SPRN_WORC, sprs.worc);
445
+ }
446
+
447
+ if (power7_fastsleep_workaround_exit) {
448
+ rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
449
+ OPAL_CONFIG_IDLE_UNDO);
450
+ BUG_ON(rc);
451
+ }
452
+
453
+ /* TB */
454
+ if (opal_resync_timebase() != OPAL_SUCCESS)
455
+ BUG();
456
+
457
+core_woken:
458
+ if (!full_winkle)
459
+ goto subcore_woken;
460
+
461
+ if ((*state & local_paca->subcore_sibling_mask) != 0)
462
+ goto subcore_woken;
463
+
464
+ /* Per-subcore SPRs */
465
+ mtspr(SPRN_SDR1, sprs.sdr1);
466
+ mtspr(SPRN_RPR, sprs.rpr);
467
+
468
+subcore_woken:
469
+ /*
470
+ * isync after restoring shared SPRs and before unlocking. Unlock
471
+ * only contains hwsync which does not necessarily do the right
472
+ * thing for SPRs.
473
+ */
474
+ isync();
475
+ atomic_unlock_and_stop_thread_idle();
476
+
477
+ /* Fast sleep does not lose SPRs */
478
+ if (!full_winkle)
479
+ return srr1;
480
+
481
+ /* Per-thread SPRs */
482
+ mtspr(SPRN_LPCR, sprs.lpcr);
483
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
484
+ mtspr(SPRN_HFSCR, sprs.hfscr);
485
+ mtspr(SPRN_FSCR, sprs.fscr);
486
+ }
487
+ mtspr(SPRN_PURR, sprs.purr);
488
+ mtspr(SPRN_SPURR, sprs.spurr);
489
+ mtspr(SPRN_DSCR, sprs.dscr);
490
+ mtspr(SPRN_WORT, sprs.wort);
491
+
492
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
493
+
494
+ /*
495
+ * The SLB has to be restored here, but it sometimes still
496
+ * contains entries, so the __ variant must be used to prevent
497
+ * multi hits.
498
+ */
499
+ __slb_restore_bolted_realmode();
330500
331501 return srr1;
332502 }
503
+
504
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
505
+
506
+#ifdef CONFIG_HOTPLUG_CPU
507
+static unsigned long power7_offline(void)
508
+{
509
+ unsigned long srr1;
510
+
511
+ mtmsr(MSR_IDLE);
512
+
513
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
514
+ /* Tell KVM we're entering idle. */
515
+ /******************************************************/
516
+ /* N O T E W E L L ! ! ! N O T E W E L L */
517
+ /* The following store to HSTATE_HWTHREAD_STATE(r13) */
518
+ /* MUST occur in real mode, i.e. with the MMU off, */
519
+ /* and the MMU must stay off until we clear this flag */
520
+ /* and test HSTATE_HWTHREAD_REQ(r13) in */
521
+ /* pnv_powersave_wakeup in this file. */
522
+ /* The reason is that another thread can switch the */
523
+ /* MMU to a guest context whenever this flag is set */
524
+ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
525
+ /* that would potentially cause this thread to start */
526
+ /* executing instructions from guest memory in */
527
+ /* hypervisor mode, leading to a host crash or data */
528
+ /* corruption, or worse. */
529
+ /******************************************************/
530
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
531
+#endif
532
+
533
+ __ppc64_runlatch_off();
534
+ srr1 = power7_idle_insn(power7_offline_type);
535
+ __ppc64_runlatch_on();
536
+
537
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
538
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
539
+ /* Order setting hwthread_state vs. testing hwthread_req */
540
+ smp_mb();
541
+ if (local_paca->kvm_hstate.hwthread_req)
542
+ srr1 = idle_kvm_start_guest(srr1);
543
+#endif
544
+
545
+ mtmsr(MSR_KERNEL);
546
+
547
+ return srr1;
548
+}
549
+#endif
333550
334551 void power7_idle_type(unsigned long type)
335552 {
336553 unsigned long srr1;
337554
338
- srr1 = __power7_idle_type(type);
555
+ if (!prep_irq_for_idle_irqsoff())
556
+ return;
557
+
558
+ mtmsr(MSR_IDLE);
559
+ __ppc64_runlatch_off();
560
+ srr1 = power7_idle_insn(type);
561
+ __ppc64_runlatch_on();
562
+ mtmsr(MSR_KERNEL);
563
+
564
+ fini_irq_for_idle_irqsoff();
339565 irq_set_pending_from_srr1(srr1);
340566 }
341567
342
-void power7_idle(void)
568
+static void power7_idle(void)
343569 {
344570 if (!powersave_nap)
345571 return;
....@@ -347,42 +573,236 @@
347573 power7_idle_type(PNV_THREAD_NAP);
348574 }
349575
350
-static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
351
- unsigned long stop_psscr_mask)
352
-{
353
- unsigned long psscr;
354
- unsigned long srr1;
576
+struct p9_sprs {
577
+ /* per core */
578
+ u64 ptcr;
579
+ u64 rpr;
580
+ u64 tscr;
581
+ u64 ldbar;
355582
356
- if (!prep_irq_for_idle_irqsoff())
357
- return 0;
583
+ /* per thread */
584
+ u64 lpcr;
585
+ u64 hfscr;
586
+ u64 fscr;
587
+ u64 pid;
588
+ u64 purr;
589
+ u64 spurr;
590
+ u64 dscr;
591
+ u64 wort;
592
+
593
+ u64 mmcra;
594
+ u32 mmcr0;
595
+ u32 mmcr1;
596
+ u64 mmcr2;
597
+
598
+ /* per thread SPRs that get lost in shallow states */
599
+ u64 amr;
600
+ u64 iamr;
601
+ u64 amor;
602
+ u64 uamor;
603
+};
604
+
605
+static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
606
+{
607
+ int cpu = raw_smp_processor_id();
608
+ int first = cpu_first_thread_sibling(cpu);
609
+ unsigned long *state = &paca_ptrs[first]->idle_state;
610
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
611
+ unsigned long srr1;
612
+ unsigned long pls;
613
+ unsigned long mmcr0 = 0;
614
+ unsigned long mmcra = 0;
615
+ struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
616
+ bool sprs_saved = false;
617
+
618
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
619
+ /* EC=ESL=0 case */
620
+
621
+ BUG_ON(!mmu_on);
622
+
623
+ /*
624
+ * Wake synchronously. SRESET via xscom may still cause
625
+ * a 0x100 powersave wakeup with SRR1 reason!
626
+ */
627
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
628
+ if (likely(!srr1))
629
+ return 0;
630
+
631
+ /*
632
+ * Registers not saved, can't recover!
633
+ * This would be a hardware bug
634
+ */
635
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
636
+
637
+ goto out;
638
+ }
639
+
640
+ /* EC=ESL=1 case */
641
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
642
+ if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
643
+ local_paca->requested_psscr = psscr;
644
+ /* order setting requested_psscr vs testing dont_stop */
645
+ smp_mb();
646
+ if (atomic_read(&local_paca->dont_stop)) {
647
+ local_paca->requested_psscr = 0;
648
+ return 0;
649
+ }
650
+ }
651
+#endif
652
+
653
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
654
+ /*
655
+ * POWER9 DD2 can incorrectly set PMAO when waking up
656
+ * after a state-loss idle. Saving and restoring MMCR0
657
+ * over idle is a workaround.
658
+ */
659
+ mmcr0 = mfspr(SPRN_MMCR0);
660
+ }
661
+
662
+ if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
663
+ sprs.lpcr = mfspr(SPRN_LPCR);
664
+ sprs.hfscr = mfspr(SPRN_HFSCR);
665
+ sprs.fscr = mfspr(SPRN_FSCR);
666
+ sprs.pid = mfspr(SPRN_PID);
667
+ sprs.purr = mfspr(SPRN_PURR);
668
+ sprs.spurr = mfspr(SPRN_SPURR);
669
+ sprs.dscr = mfspr(SPRN_DSCR);
670
+ sprs.wort = mfspr(SPRN_WORT);
671
+
672
+ sprs.mmcra = mfspr(SPRN_MMCRA);
673
+ sprs.mmcr0 = mfspr(SPRN_MMCR0);
674
+ sprs.mmcr1 = mfspr(SPRN_MMCR1);
675
+ sprs.mmcr2 = mfspr(SPRN_MMCR2);
676
+
677
+ sprs.ptcr = mfspr(SPRN_PTCR);
678
+ sprs.rpr = mfspr(SPRN_RPR);
679
+ sprs.tscr = mfspr(SPRN_TSCR);
680
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
681
+ sprs.ldbar = mfspr(SPRN_LDBAR);
682
+
683
+ sprs_saved = true;
684
+
685
+ atomic_start_thread_idle();
686
+ }
687
+
688
+ sprs.amr = mfspr(SPRN_AMR);
689
+ sprs.iamr = mfspr(SPRN_IAMR);
690
+ sprs.amor = mfspr(SPRN_AMOR);
691
+ sprs.uamor = mfspr(SPRN_UAMOR);
692
+
693
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
694
+
695
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
696
+ local_paca->requested_psscr = 0;
697
+#endif
358698
359699 psscr = mfspr(SPRN_PSSCR);
360
- psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
361700
362
- __ppc64_runlatch_off();
363
- srr1 = power9_idle_stop(psscr);
364
- __ppc64_runlatch_on();
701
+ WARN_ON_ONCE(!srr1);
702
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
365703
366
- fini_irq_for_idle_irqsoff();
704
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
705
+ /*
706
+ * We don't need an isync after the mtsprs here because the
707
+ * upcoming mtmsrd is execution synchronizing.
708
+ */
709
+ mtspr(SPRN_AMR, sprs.amr);
710
+ mtspr(SPRN_IAMR, sprs.iamr);
711
+ mtspr(SPRN_AMOR, sprs.amor);
712
+ mtspr(SPRN_UAMOR, sprs.uamor);
713
+
714
+ /*
715
+ * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
716
+ * might have been corrupted and needs flushing. We also need
717
+ * to reload MMCR0 (see mmcr0 comment above).
718
+ */
719
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
720
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
721
+ mtspr(SPRN_MMCR0, mmcr0);
722
+ }
723
+
724
+ /*
725
+ * DD2.2 and earlier need to set then clear bit 60 in MMCRA
726
+ * to ensure the PMU starts running.
727
+ */
728
+ mmcra = mfspr(SPRN_MMCRA);
729
+ mmcra |= PPC_BIT(60);
730
+ mtspr(SPRN_MMCRA, mmcra);
731
+ mmcra &= ~PPC_BIT(60);
732
+ mtspr(SPRN_MMCRA, mmcra);
733
+ }
734
+
735
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
736
+ hmi_exception_realmode(NULL);
737
+
738
+ /*
739
+ * On POWER9, SRR1 bits do not match exactly as expected.
740
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
741
+ * just always test PSSCR for SPR/TB state loss.
742
+ */
743
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
744
+ if (likely(pls < deep_spr_loss_state)) {
745
+ if (sprs_saved)
746
+ atomic_stop_thread_idle();
747
+ goto out;
748
+ }
749
+
750
+ /* HV state loss */
751
+ BUG_ON(!sprs_saved);
752
+
753
+ atomic_lock_thread_idle();
754
+
755
+ if ((*state & core_thread_mask) != 0)
756
+ goto core_woken;
757
+
758
+ /* Per-core SPRs */
759
+ mtspr(SPRN_PTCR, sprs.ptcr);
760
+ mtspr(SPRN_RPR, sprs.rpr);
761
+ mtspr(SPRN_TSCR, sprs.tscr);
762
+
763
+ if (pls >= pnv_first_tb_loss_level) {
764
+ /* TB loss */
765
+ if (opal_resync_timebase() != OPAL_SUCCESS)
766
+ BUG();
767
+ }
768
+
769
+ /*
770
+ * isync after restoring shared SPRs and before unlocking. Unlock
771
+ * only contains hwsync which does not necessarily do the right
772
+ * thing for SPRs.
773
+ */
774
+ isync();
775
+
776
+core_woken:
777
+ atomic_unlock_and_stop_thread_idle();
778
+
779
+ /* Per-thread SPRs */
780
+ mtspr(SPRN_LPCR, sprs.lpcr);
781
+ mtspr(SPRN_HFSCR, sprs.hfscr);
782
+ mtspr(SPRN_FSCR, sprs.fscr);
783
+ mtspr(SPRN_PID, sprs.pid);
784
+ mtspr(SPRN_PURR, sprs.purr);
785
+ mtspr(SPRN_SPURR, sprs.spurr);
786
+ mtspr(SPRN_DSCR, sprs.dscr);
787
+ mtspr(SPRN_WORT, sprs.wort);
788
+
789
+ mtspr(SPRN_MMCRA, sprs.mmcra);
790
+ mtspr(SPRN_MMCR0, sprs.mmcr0);
791
+ mtspr(SPRN_MMCR1, sprs.mmcr1);
792
+ mtspr(SPRN_MMCR2, sprs.mmcr2);
793
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
794
+ mtspr(SPRN_LDBAR, sprs.ldbar);
795
+
796
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
797
+
798
+ if (!radix_enabled())
799
+ __slb_restore_bolted_realmode();
800
+
801
+out:
802
+ if (mmu_on)
803
+ mtmsr(MSR_KERNEL);
367804
368805 return srr1;
369
-}
370
-
371
-void power9_idle_type(unsigned long stop_psscr_val,
372
- unsigned long stop_psscr_mask)
373
-{
374
- unsigned long srr1;
375
-
376
- srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
377
- irq_set_pending_from_srr1(srr1);
378
-}
379
-
380
-/*
381
- * Used for ppc_md.power_save which needs a function with no parameters
382
- */
383
-void power9_idle(void)
384
-{
385
- power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
386806 }
387807
388808 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
....@@ -409,7 +829,7 @@
409829 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
410830 }
411831 /* order setting dont_stop vs testing requested_psscr */
412
- mb();
832
+ smp_mb();
413833 for (thr = 0; thr < threads_per_core; ++thr) {
414834 if (!paca_ptrs[cpu0+thr]->requested_psscr)
415835 ++awake_threads;
....@@ -457,6 +877,198 @@
457877 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
458878 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
459879
880
+struct p10_sprs {
881
+ /*
882
+ * SPRs that get lost in shallow states:
883
+ *
884
+ * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
885
+ * isa300 idle routines restore CR, LR.
886
+ * CTR is volatile
887
+ * idle thread doesn't use FP or VEC
888
+ * kernel doesn't use TAR
889
+ * HSPRG1 is only live in HV interrupt entry
890
+ * SPRG2 is only live in KVM guests, KVM handles it.
891
+ */
892
+};
893
+
894
+static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
895
+{
896
+ int cpu = raw_smp_processor_id();
897
+ int first = cpu_first_thread_sibling(cpu);
898
+ unsigned long *state = &paca_ptrs[first]->idle_state;
899
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
900
+ unsigned long srr1;
901
+ unsigned long pls;
902
+// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
903
+ bool sprs_saved = false;
904
+
905
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
906
+ /* EC=ESL=0 case */
907
+
908
+ BUG_ON(!mmu_on);
909
+
910
+ /*
911
+ * Wake synchronously. SRESET via xscom may still cause
912
+ * a 0x100 powersave wakeup with SRR1 reason!
913
+ */
914
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
915
+ if (likely(!srr1))
916
+ return 0;
917
+
918
+ /*
919
+ * Registers not saved, can't recover!
920
+ * This would be a hardware bug
921
+ */
922
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
923
+
924
+ goto out;
925
+ }
926
+
927
+ /* EC=ESL=1 case */
928
+ if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
929
+ /* XXX: save SPRs for deep state loss here. */
930
+
931
+ sprs_saved = true;
932
+
933
+ atomic_start_thread_idle();
934
+ }
935
+
936
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
937
+
938
+ psscr = mfspr(SPRN_PSSCR);
939
+
940
+ WARN_ON_ONCE(!srr1);
941
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
942
+
943
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
944
+ hmi_exception_realmode(NULL);
945
+
946
+ /*
947
+ * On POWER10, SRR1 bits do not match exactly as expected.
948
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
949
+ * just always test PSSCR for SPR/TB state loss.
950
+ */
951
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
952
+ if (likely(pls < deep_spr_loss_state)) {
953
+ if (sprs_saved)
954
+ atomic_stop_thread_idle();
955
+ goto out;
956
+ }
957
+
958
+ /* HV state loss */
959
+ BUG_ON(!sprs_saved);
960
+
961
+ atomic_lock_thread_idle();
962
+
963
+ if ((*state & core_thread_mask) != 0)
964
+ goto core_woken;
965
+
966
+ /* XXX: restore per-core SPRs here */
967
+
968
+ if (pls >= pnv_first_tb_loss_level) {
969
+ /* TB loss */
970
+ if (opal_resync_timebase() != OPAL_SUCCESS)
971
+ BUG();
972
+ }
973
+
974
+ /*
975
+ * isync after restoring shared SPRs and before unlocking. Unlock
976
+ * only contains hwsync which does not necessarily do the right
977
+ * thing for SPRs.
978
+ */
979
+ isync();
980
+
981
+core_woken:
982
+ atomic_unlock_and_stop_thread_idle();
983
+
984
+ /* XXX: restore per-thread SPRs here */
985
+
986
+ if (!radix_enabled())
987
+ __slb_restore_bolted_realmode();
988
+
989
+out:
990
+ if (mmu_on)
991
+ mtmsr(MSR_KERNEL);
992
+
993
+ return srr1;
994
+}
995
+
996
+#ifdef CONFIG_HOTPLUG_CPU
997
+static unsigned long arch300_offline_stop(unsigned long psscr)
998
+{
999
+ unsigned long srr1;
1000
+
1001
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1002
+ __ppc64_runlatch_off();
1003
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
1004
+ srr1 = power10_idle_stop(psscr, true);
1005
+ else
1006
+ srr1 = power9_idle_stop(psscr, true);
1007
+ __ppc64_runlatch_on();
1008
+#else
1009
+ /*
1010
+ * Tell KVM we're entering idle.
1011
+ * This does not have to be done in real mode because the P9 MMU
1012
+ * is independent per-thread. Some steppings share radix/hash mode
1013
+ * between threads, but in that case KVM has a barrier sync in real
1014
+ * mode before and after switching between radix and hash.
1015
+ *
1016
+ * kvm_start_guest must still be called in real mode though, hence
1017
+ * the false argument.
1018
+ */
1019
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
1020
+
1021
+ __ppc64_runlatch_off();
1022
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
1023
+ srr1 = power10_idle_stop(psscr, false);
1024
+ else
1025
+ srr1 = power9_idle_stop(psscr, false);
1026
+ __ppc64_runlatch_on();
1027
+
1028
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
1029
+ /* Order setting hwthread_state vs. testing hwthread_req */
1030
+ smp_mb();
1031
+ if (local_paca->kvm_hstate.hwthread_req)
1032
+ srr1 = idle_kvm_start_guest(srr1);
1033
+ mtmsr(MSR_KERNEL);
1034
+#endif
1035
+
1036
+ return srr1;
1037
+}
1038
+#endif
1039
+
1040
+void arch300_idle_type(unsigned long stop_psscr_val,
1041
+ unsigned long stop_psscr_mask)
1042
+{
1043
+ unsigned long psscr;
1044
+ unsigned long srr1;
1045
+
1046
+ if (!prep_irq_for_idle_irqsoff())
1047
+ return;
1048
+
1049
+ psscr = mfspr(SPRN_PSSCR);
1050
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
1051
+
1052
+ __ppc64_runlatch_off();
1053
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
1054
+ srr1 = power10_idle_stop(psscr, true);
1055
+ else
1056
+ srr1 = power9_idle_stop(psscr, true);
1057
+ __ppc64_runlatch_on();
1058
+
1059
+ fini_irq_for_idle_irqsoff();
1060
+
1061
+ irq_set_pending_from_srr1(srr1);
1062
+}
1063
+
1064
+/*
1065
+ * Used for ppc_md.power_save which needs a function with no parameters
1066
+ */
1067
+static void arch300_idle(void)
1068
+{
1069
+ arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
1070
+}
1071
+
4601072 #ifdef CONFIG_HOTPLUG_CPU
4611073
4621074 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
....@@ -481,7 +1093,6 @@
4811093 unsigned long pnv_cpu_offline(unsigned int cpu)
4821094 {
4831095 unsigned long srr1;
484
- u32 idle_states = pnv_get_supported_cpuidle_states();
4851096
4861097 __ppc64_runlatch_off();
4871098
....@@ -491,16 +1102,9 @@
4911102 psscr = mfspr(SPRN_PSSCR);
4921103 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
4931104 pnv_deepest_stop_psscr_val;
494
- srr1 = power9_offline_stop(psscr);
495
-
496
- } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
497
- (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
498
- srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
499
- } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
500
- (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
501
- srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
502
- } else if (idle_states & OPAL_PM_NAP_ENABLED) {
503
- srr1 = power7_idle_insn(PNV_THREAD_NAP);
1105
+ srr1 = arch300_offline_stop(psscr);
1106
+ } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
1107
+ srr1 = power7_offline();
5041108 } else {
5051109 /* This is the fallback method. We emulate snooze */
5061110 while (!generic_check_cpu_restart(cpu)) {
....@@ -596,33 +1200,53 @@
5961200 * @dt_idle_states: Number of idle state entries
5971201 * Returns 0 on success
5981202 */
599
-static int __init pnv_power9_idle_init(void)
1203
+static void __init pnv_arch300_idle_init(void)
6001204 {
6011205 u64 max_residency_ns = 0;
6021206 int i;
6031207
1208
+ /* stop is not really architected, we only have p9,p10 drivers */
1209
+ if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
1210
+ return;
1211
+
6041212 /*
605
- * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
606
- * and the pnv_default_stop_{val,mask}.
607
- *
608
- * pnv_first_deep_stop_state should be set to the first stop
609
- * level to cause hypervisor state loss.
610
- *
6111213 * pnv_deepest_stop_{val,mask} should be set to values corresponding to
6121214 * the deepest stop state.
6131215 *
6141216 * pnv_default_stop_{val,mask} should be set to values corresponding to
615
- * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
1217
+ * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
6161218 */
617
- pnv_first_deep_stop_state = MAX_STOP_STATE;
1219
+ pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
1220
+ deep_spr_loss_state = MAX_STOP_STATE + 1;
6181221 for (i = 0; i < nr_pnv_idle_states; i++) {
6191222 int err;
6201223 struct pnv_idle_states_t *state = &pnv_idle_states[i];
6211224 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
6221225
1226
+ /* No deep loss driver implemented for POWER10 yet */
1227
+ if (pvr_version_is(PVR_POWER10) &&
1228
+ state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
1229
+ continue;
1230
+
1231
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1232
+ (pnv_first_tb_loss_level > psscr_rl))
1233
+ pnv_first_tb_loss_level = psscr_rl;
1234
+
6231235 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
624
- pnv_first_deep_stop_state > psscr_rl)
625
- pnv_first_deep_stop_state = psscr_rl;
1236
+ (deep_spr_loss_state > psscr_rl))
1237
+ deep_spr_loss_state = psscr_rl;
1238
+
1239
+ /*
1240
+ * The idle code does not deal with TB loss occurring
1241
+ * in a shallower state than SPR loss, so force it to
1242
+ * behave like SPRs are lost if TB is lost. POWER9 would
1243
+ * never encouter this, but a POWER8 core would if it
1244
+ * implemented the stop instruction. So this is for forward
1245
+ * compatibility.
1246
+ */
1247
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1248
+ (deep_spr_loss_state > psscr_rl))
1249
+ deep_spr_loss_state = psscr_rl;
6261250
6271251 err = validate_psscr_val_mask(&state->psscr_val,
6281252 &state->psscr_mask,
....@@ -647,13 +1271,14 @@
6471271 pnv_default_stop_val = state->psscr_val;
6481272 pnv_default_stop_mask = state->psscr_mask;
6491273 default_stop_found = true;
1274
+ WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
6501275 }
6511276 }
6521277
6531278 if (unlikely(!default_stop_found)) {
6541279 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
6551280 } else {
656
- ppc_md.power_save = power9_idle;
1281
+ ppc_md.power_save = arch300_idle;
6571282 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
6581283 pnv_default_stop_val, pnv_default_stop_mask);
6591284 }
....@@ -666,10 +1291,40 @@
6661291 pnv_deepest_stop_psscr_mask);
6671292 }
6681293
669
- pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
670
- pnv_first_deep_stop_state);
1294
+ pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
1295
+ deep_spr_loss_state);
6711296
672
- return 0;
1297
+ pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
1298
+ pnv_first_tb_loss_level);
1299
+}
1300
+
1301
+static void __init pnv_disable_deep_states(void)
1302
+{
1303
+ /*
1304
+ * The stop-api is unable to restore hypervisor
1305
+ * resources on wakeup from platform idle states which
1306
+ * lose full context. So disable such states.
1307
+ */
1308
+ supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
1309
+ pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
1310
+ pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
1311
+
1312
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1313
+ (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
1314
+ /*
1315
+ * Use the default stop state for CPU-Hotplug
1316
+ * if available.
1317
+ */
1318
+ if (default_stop_found) {
1319
+ pnv_deepest_stop_psscr_val = pnv_default_stop_val;
1320
+ pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
1321
+ pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
1322
+ pnv_deepest_stop_psscr_val);
1323
+ } else { /* Fallback to snooze loop for CPU-Hotplug */
1324
+ deepest_stop_found = false;
1325
+ pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
1326
+ }
1327
+ }
6731328 }
6741329
6751330 /*
....@@ -684,10 +1339,8 @@
6841339 return;
6851340 }
6861341
687
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
688
- if (pnv_power9_idle_init())
689
- return;
690
- }
1342
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
1343
+ pnv_arch300_idle_init();
6911344
6921345 for (i = 0; i < nr_pnv_idle_states; i++)
6931346 supported_cpuidle_states |= pnv_idle_states[i].flags;
....@@ -751,14 +1404,14 @@
7511404 /* Read residencies */
7521405 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
7531406 temp_u32, nr_idle_states)) {
754
- pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
1407
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
7551408 rc = -EINVAL;
7561409 goto out;
7571410 }
7581411 for (i = 0; i < nr_idle_states; i++)
7591412 pnv_idle_states[i].residency_ns = temp_u32[i];
7601413
761
- /* For power9 */
1414
+ /* For power9 and later */
7621415 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
7631416 /* Read pm_crtl_val */
7641417 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
....@@ -807,11 +1460,33 @@
8071460
8081461 static int __init pnv_init_idle_states(void)
8091462 {
1463
+ int cpu;
8101464 int rc = 0;
811
- supported_cpuidle_states = 0;
1465
+
1466
+ /* Set up PACA fields */
1467
+ for_each_present_cpu(cpu) {
1468
+ struct paca_struct *p = paca_ptrs[cpu];
1469
+
1470
+ p->idle_state = 0;
1471
+ if (cpu == cpu_first_thread_sibling(cpu))
1472
+ p->idle_state = (1 << threads_per_core) - 1;
1473
+
1474
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1475
+ /* P7/P8 nap */
1476
+ p->thread_idle_state = PNV_THREAD_RUNNING;
1477
+ } else if (pvr_version_is(PVR_POWER9)) {
1478
+ /* P9 stop workarounds */
1479
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1480
+ p->requested_psscr = 0;
1481
+ atomic_set(&p->dont_stop, 0);
1482
+#endif
1483
+ }
1484
+ }
8121485
8131486 /* In case we error out nr_pnv_idle_states will be zero */
8141487 nr_pnv_idle_states = 0;
1488
+ supported_cpuidle_states = 0;
1489
+
8151490 if (cpuidle_disable != IDLE_NO_OVERRIDE)
8161491 goto out;
8171492 rc = pnv_parse_cpuidle_dt();
....@@ -819,27 +1494,40 @@
8191494 return rc;
8201495 pnv_probe_idle_states();
8211496
822
- if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
823
- patch_instruction(
824
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
825
- PPC_INST_NOP);
826
- patch_instruction(
827
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
828
- PPC_INST_NOP);
829
- } else {
830
- /*
831
- * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
832
- * workaround is needed to use fastsleep. Provide sysfs
833
- * control to choose how this workaround has to be applied.
834
- */
835
- device_create_file(cpu_subsys.dev_root,
1497
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1498
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
1499
+ power7_fastsleep_workaround_entry = false;
1500
+ power7_fastsleep_workaround_exit = false;
1501
+ } else {
1502
+ /*
1503
+ * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
1504
+ * workaround is needed to use fastsleep. Provide sysfs
1505
+ * control to choose how this workaround has to be
1506
+ * applied.
1507
+ */
1508
+ device_create_file(cpu_subsys.dev_root,
8361509 &dev_attr_fastsleep_workaround_applyonce);
1510
+ }
1511
+
1512
+ update_subcore_sibling_mask();
1513
+
1514
+ if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
1515
+ ppc_md.power_save = power7_idle;
1516
+ power7_offline_type = PNV_THREAD_NAP;
1517
+ }
1518
+
1519
+ if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
1520
+ (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
1521
+ power7_offline_type = PNV_THREAD_WINKLE;
1522
+ else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
1523
+ (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
1524
+ power7_offline_type = PNV_THREAD_SLEEP;
8371525 }
8381526
839
- pnv_alloc_idle_core_states();
840
-
841
- if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
842
- ppc_md.power_save = power7_idle;
1527
+ if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
1528
+ if (pnv_save_sprs_for_deep_states())
1529
+ pnv_disable_deep_states();
1530
+ }
8431531
8441532 out:
8451533 return 0;