forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-01-31 f70575805708cabdedea7498aaa3f710fde4d920
kernel/drivers/idle/intel_idle.c
....@@ -1,25 +1,14 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * intel_idle.c - native hardware idle loop for modern Intel processors
34 *
4
- * Copyright (c) 2013, Intel Corporation.
5
+ * Copyright (c) 2013 - 2020, Intel Corporation.
56 * Len Brown <len.brown@intel.com>
6
- *
7
- * This program is free software; you can redistribute it and/or modify it
8
- * under the terms and conditions of the GNU General Public License,
9
- * version 2, as published by the Free Software Foundation.
10
- *
11
- * This program is distributed in the hope it will be useful, but WITHOUT
12
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14
- * more details.
15
- *
16
- * You should have received a copy of the GNU General Public License along with
17
- * this program; if not, write to the Free Software Foundation, Inc.,
18
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
7
+ * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
198 */
209
2110 /*
22
- * intel_idle is a cpuidle driver that loads on specific Intel processors
11
+ * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
2312 * in lieu of the legacy ACPI processor_idle driver. The intent is to
2413 * make Linux more efficient on these processors, as intel_idle knows
2514 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
....@@ -31,16 +20,15 @@
3120 * All CPUs have same idle states as boot CPU
3221 *
3322 * Chipset BM_STS (bus master status) bit is a NOP
34
- * for preventing entry into deep C-stats
23
+ * for preventing entry into deep C-states
24
+ *
25
+ * CPU will flush caches as needed when entering a C-state via MWAIT
26
+ * (in contrast to entering ACPI C3, in which case the WBINVD
27
+ * instruction needs to be executed to flush the caches)
3528 */
3629
3730 /*
3831 * Known limitations
39
- *
40
- * The driver currently initializes for_each_online_cpu() upon modprobe.
41
- * It it unaware of subsequent processors hot-added to the system.
42
- * This means that if you boot with maxcpus=n and later online
43
- * processors above n, those processors will use C1 only.
4432 *
4533 * ACPI has a .suspend hack to turn off deep c-statees during suspend
4634 * to avoid complications with the lapic timer workaround.
....@@ -53,20 +41,23 @@
5341
5442 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5543
44
+#include <linux/acpi.h>
5645 #include <linux/kernel.h>
5746 #include <linux/cpuidle.h>
5847 #include <linux/tick.h>
5948 #include <trace/events/power.h>
6049 #include <linux/sched.h>
50
+#include <linux/sched/smt.h>
6151 #include <linux/notifier.h>
6252 #include <linux/cpu.h>
6353 #include <linux/moduleparam.h>
6454 #include <asm/cpu_device_id.h>
6555 #include <asm/intel-family.h>
56
+#include <asm/nospec-branch.h>
6657 #include <asm/mwait.h>
6758 #include <asm/msr.h>
6859
69
-#define INTEL_IDLE_VERSION "0.4.1"
60
+#define INTEL_IDLE_VERSION "0.5.1"
7061
7162 static struct cpuidle_driver intel_idle_driver = {
7263 .name = "intel_idle",
....@@ -74,12 +65,12 @@
7465 };
7566 /* intel_idle.max_cstate=0 disables driver */
7667 static int max_cstate = CPUIDLE_STATE_MAX - 1;
68
+static unsigned int disabled_states_mask;
7769
78
-static unsigned int mwait_substates;
70
+static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
7971
80
-#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
81
-/* Reliable LAPIC Timer States, bit 1 for C1 etc. */
82
-static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */
72
+static unsigned long auto_demotion_disable_flags;
73
+static bool disable_promotion_to_c1e;
8374
8475 struct idle_cpu {
8576 struct cpuidle_state *state_table;
....@@ -91,23 +82,24 @@
9182 unsigned long auto_demotion_disable_flags;
9283 bool byt_auto_demotion_disable_flag;
9384 bool disable_promotion_to_c1e;
85
+ bool use_acpi;
9486 };
9587
96
-static const struct idle_cpu *icpu;
97
-static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
98
-static int intel_idle(struct cpuidle_device *dev,
99
- struct cpuidle_driver *drv, int index);
100
-static void intel_idle_s2idle(struct cpuidle_device *dev,
101
- struct cpuidle_driver *drv, int index);
102
-static struct cpuidle_state *cpuidle_state_table;
88
+static const struct idle_cpu *icpu __initdata;
89
+static struct cpuidle_state *cpuidle_state_table __initdata;
90
+
91
+static unsigned int mwait_substates __initdata;
10392
10493 /*
105
- * Set this flag for states where the HW flushes the TLB for us
106
- * and so we don't need cross-calls to keep it consistent.
107
- * If this flag is set, SW flushes the TLB, so even if the
108
- * HW doesn't do the flushing, this flag is safe to use.
94
+ * Enable this state by default even if the ACPI _CST does not list it.
10995 */
110
-#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
96
+#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
97
+
98
+/*
99
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
100
+ * above.
101
+ */
102
+#define CPUIDLE_FLAG_IBRS BIT(16)
111103
112104 /*
113105 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
....@@ -119,12 +111,82 @@
119111 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
120112 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
121113
114
+/**
115
+ * intel_idle - Ask the processor to enter the given idle state.
116
+ * @dev: cpuidle device of the target CPU.
117
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
118
+ * @index: Target idle state index.
119
+ *
120
+ * Use the MWAIT instruction to notify the processor that the CPU represented by
121
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
122
+ *
123
+ * If the local APIC timer is not known to be reliable in the target idle state,
124
+ * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
125
+ *
126
+ * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
127
+ * flushing user TLBs.
128
+ *
129
+ * Must be called under local_irq_disable().
130
+ */
131
+static __cpuidle int intel_idle(struct cpuidle_device *dev,
132
+ struct cpuidle_driver *drv, int index)
133
+{
134
+ struct cpuidle_state *state = &drv->states[index];
135
+ unsigned long eax = flg2MWAIT(state->flags);
136
+ unsigned long ecx = 1; /* break on interrupt flag */
137
+
138
+ mwait_idle_with_hints(eax, ecx);
139
+
140
+ return index;
141
+}
142
+
143
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
144
+ struct cpuidle_driver *drv, int index)
145
+{
146
+ bool smt_active = sched_smt_active();
147
+ u64 spec_ctrl = spec_ctrl_current();
148
+ int ret;
149
+
150
+ if (smt_active)
151
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
152
+
153
+ ret = intel_idle(dev, drv, index);
154
+
155
+ if (smt_active)
156
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
157
+
158
+ return ret;
159
+}
160
+
161
+/**
162
+ * intel_idle_s2idle - Ask the processor to enter the given idle state.
163
+ * @dev: cpuidle device of the target CPU.
164
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
165
+ * @index: Target idle state index.
166
+ *
167
+ * Use the MWAIT instruction to notify the processor that the CPU represented by
168
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
169
+ *
170
+ * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
171
+ * scheduler tick and suspended scheduler clock on the target CPU.
172
+ */
173
+static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
174
+ struct cpuidle_driver *drv, int index)
175
+{
176
+ unsigned long eax = flg2MWAIT(drv->states[index].flags);
177
+ unsigned long ecx = 1; /* break on interrupt flag */
178
+
179
+ mwait_idle_with_hints(eax, ecx);
180
+
181
+ return 0;
182
+}
183
+
122184 /*
123185 * States are indexed by the cstate number,
124186 * which is also the index into the MWAIT hint array.
125187 * Thus C0 is a dummy.
126188 */
127
-static struct cpuidle_state nehalem_cstates[] = {
189
+static struct cpuidle_state nehalem_cstates[] __initdata = {
128190 {
129191 .name = "C1",
130192 .desc = "MWAIT 0x00",
....@@ -136,7 +198,7 @@
136198 {
137199 .name = "C1E",
138200 .desc = "MWAIT 0x01",
139
- .flags = MWAIT2flg(0x01),
201
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
140202 .exit_latency = 10,
141203 .target_residency = 20,
142204 .enter = &intel_idle,
....@@ -161,7 +223,7 @@
161223 .enter = NULL }
162224 };
163225
164
-static struct cpuidle_state snb_cstates[] = {
226
+static struct cpuidle_state snb_cstates[] __initdata = {
165227 {
166228 .name = "C1",
167229 .desc = "MWAIT 0x00",
....@@ -173,7 +235,7 @@
173235 {
174236 .name = "C1E",
175237 .desc = "MWAIT 0x01",
176
- .flags = MWAIT2flg(0x01),
238
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
177239 .exit_latency = 10,
178240 .target_residency = 20,
179241 .enter = &intel_idle,
....@@ -206,7 +268,7 @@
206268 .enter = NULL }
207269 };
208270
209
-static struct cpuidle_state byt_cstates[] = {
271
+static struct cpuidle_state byt_cstates[] __initdata = {
210272 {
211273 .name = "C1",
212274 .desc = "MWAIT 0x00",
....@@ -251,7 +313,7 @@
251313 .enter = NULL }
252314 };
253315
254
-static struct cpuidle_state cht_cstates[] = {
316
+static struct cpuidle_state cht_cstates[] __initdata = {
255317 {
256318 .name = "C1",
257319 .desc = "MWAIT 0x00",
....@@ -296,7 +358,7 @@
296358 .enter = NULL }
297359 };
298360
299
-static struct cpuidle_state ivb_cstates[] = {
361
+static struct cpuidle_state ivb_cstates[] __initdata = {
300362 {
301363 .name = "C1",
302364 .desc = "MWAIT 0x00",
....@@ -308,7 +370,7 @@
308370 {
309371 .name = "C1E",
310372 .desc = "MWAIT 0x01",
311
- .flags = MWAIT2flg(0x01),
373
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
312374 .exit_latency = 10,
313375 .target_residency = 20,
314376 .enter = &intel_idle,
....@@ -341,7 +403,7 @@
341403 .enter = NULL }
342404 };
343405
344
-static struct cpuidle_state ivt_cstates[] = {
406
+static struct cpuidle_state ivt_cstates[] __initdata = {
345407 {
346408 .name = "C1",
347409 .desc = "MWAIT 0x00",
....@@ -353,7 +415,7 @@
353415 {
354416 .name = "C1E",
355417 .desc = "MWAIT 0x01",
356
- .flags = MWAIT2flg(0x01),
418
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
357419 .exit_latency = 10,
358420 .target_residency = 80,
359421 .enter = &intel_idle,
....@@ -378,7 +440,7 @@
378440 .enter = NULL }
379441 };
380442
381
-static struct cpuidle_state ivt_cstates_4s[] = {
443
+static struct cpuidle_state ivt_cstates_4s[] __initdata = {
382444 {
383445 .name = "C1",
384446 .desc = "MWAIT 0x00",
....@@ -390,7 +452,7 @@
390452 {
391453 .name = "C1E",
392454 .desc = "MWAIT 0x01",
393
- .flags = MWAIT2flg(0x01),
455
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
394456 .exit_latency = 10,
395457 .target_residency = 250,
396458 .enter = &intel_idle,
....@@ -415,7 +477,7 @@
415477 .enter = NULL }
416478 };
417479
418
-static struct cpuidle_state ivt_cstates_8s[] = {
480
+static struct cpuidle_state ivt_cstates_8s[] __initdata = {
419481 {
420482 .name = "C1",
421483 .desc = "MWAIT 0x00",
....@@ -427,7 +489,7 @@
427489 {
428490 .name = "C1E",
429491 .desc = "MWAIT 0x01",
430
- .flags = MWAIT2flg(0x01),
492
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
431493 .exit_latency = 10,
432494 .target_residency = 500,
433495 .enter = &intel_idle,
....@@ -452,7 +514,7 @@
452514 .enter = NULL }
453515 };
454516
455
-static struct cpuidle_state hsw_cstates[] = {
517
+static struct cpuidle_state hsw_cstates[] __initdata = {
456518 {
457519 .name = "C1",
458520 .desc = "MWAIT 0x00",
....@@ -464,7 +526,7 @@
464526 {
465527 .name = "C1E",
466528 .desc = "MWAIT 0x01",
467
- .flags = MWAIT2flg(0x01),
529
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
468530 .exit_latency = 10,
469531 .target_residency = 20,
470532 .enter = &intel_idle,
....@@ -520,7 +582,7 @@
520582 {
521583 .enter = NULL }
522584 };
523
-static struct cpuidle_state bdw_cstates[] = {
585
+static struct cpuidle_state bdw_cstates[] __initdata = {
524586 {
525587 .name = "C1",
526588 .desc = "MWAIT 0x00",
....@@ -532,7 +594,7 @@
532594 {
533595 .name = "C1E",
534596 .desc = "MWAIT 0x01",
535
- .flags = MWAIT2flg(0x01),
597
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
536598 .exit_latency = 10,
537599 .target_residency = 20,
538600 .enter = &intel_idle,
....@@ -589,7 +651,7 @@
589651 .enter = NULL }
590652 };
591653
592
-static struct cpuidle_state skl_cstates[] = {
654
+static struct cpuidle_state skl_cstates[] __initdata = {
593655 {
594656 .name = "C1",
595657 .desc = "MWAIT 0x00",
....@@ -601,7 +663,7 @@
601663 {
602664 .name = "C1E",
603665 .desc = "MWAIT 0x01",
604
- .flags = MWAIT2flg(0x01),
666
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
605667 .exit_latency = 10,
606668 .target_residency = 20,
607669 .enter = &intel_idle,
....@@ -617,7 +679,7 @@
617679 {
618680 .name = "C6",
619681 .desc = "MWAIT 0x20",
620
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
682
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
621683 .exit_latency = 85,
622684 .target_residency = 200,
623685 .enter = &intel_idle,
....@@ -625,7 +687,7 @@
625687 {
626688 .name = "C7s",
627689 .desc = "MWAIT 0x33",
628
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
690
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
629691 .exit_latency = 124,
630692 .target_residency = 800,
631693 .enter = &intel_idle,
....@@ -633,7 +695,7 @@
633695 {
634696 .name = "C8",
635697 .desc = "MWAIT 0x40",
636
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
698
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
637699 .exit_latency = 200,
638700 .target_residency = 800,
639701 .enter = &intel_idle,
....@@ -641,7 +703,7 @@
641703 {
642704 .name = "C9",
643705 .desc = "MWAIT 0x50",
644
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
706
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
645707 .exit_latency = 480,
646708 .target_residency = 5000,
647709 .enter = &intel_idle,
....@@ -649,7 +711,7 @@
649711 {
650712 .name = "C10",
651713 .desc = "MWAIT 0x60",
652
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
714
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
653715 .exit_latency = 890,
654716 .target_residency = 5000,
655717 .enter = &intel_idle,
....@@ -658,7 +720,7 @@
658720 .enter = NULL }
659721 };
660722
661
-static struct cpuidle_state skx_cstates[] = {
723
+static struct cpuidle_state skx_cstates[] __initdata = {
662724 {
663725 .name = "C1",
664726 .desc = "MWAIT 0x00",
....@@ -670,7 +732,7 @@
670732 {
671733 .name = "C1E",
672734 .desc = "MWAIT 0x01",
673
- .flags = MWAIT2flg(0x01),
735
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
674736 .exit_latency = 10,
675737 .target_residency = 20,
676738 .enter = &intel_idle,
....@@ -678,7 +740,7 @@
678740 {
679741 .name = "C6",
680742 .desc = "MWAIT 0x20",
681
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
743
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
682744 .exit_latency = 133,
683745 .target_residency = 600,
684746 .enter = &intel_idle,
....@@ -687,7 +749,36 @@
687749 .enter = NULL }
688750 };
689751
690
-static struct cpuidle_state atom_cstates[] = {
752
+static struct cpuidle_state icx_cstates[] __initdata = {
753
+ {
754
+ .name = "C1",
755
+ .desc = "MWAIT 0x00",
756
+ .flags = MWAIT2flg(0x00),
757
+ .exit_latency = 1,
758
+ .target_residency = 1,
759
+ .enter = &intel_idle,
760
+ .enter_s2idle = intel_idle_s2idle, },
761
+ {
762
+ .name = "C1E",
763
+ .desc = "MWAIT 0x01",
764
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
765
+ .exit_latency = 4,
766
+ .target_residency = 4,
767
+ .enter = &intel_idle,
768
+ .enter_s2idle = intel_idle_s2idle, },
769
+ {
770
+ .name = "C6",
771
+ .desc = "MWAIT 0x20",
772
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
773
+ .exit_latency = 128,
774
+ .target_residency = 384,
775
+ .enter = &intel_idle,
776
+ .enter_s2idle = intel_idle_s2idle, },
777
+ {
778
+ .enter = NULL }
779
+};
780
+
781
+static struct cpuidle_state atom_cstates[] __initdata = {
691782 {
692783 .name = "C1E",
693784 .desc = "MWAIT 0x00",
....@@ -723,7 +814,7 @@
723814 {
724815 .enter = NULL }
725816 };
726
-static struct cpuidle_state tangier_cstates[] = {
817
+static struct cpuidle_state tangier_cstates[] __initdata = {
727818 {
728819 .name = "C1",
729820 .desc = "MWAIT 0x00",
....@@ -767,7 +858,7 @@
767858 {
768859 .enter = NULL }
769860 };
770
-static struct cpuidle_state avn_cstates[] = {
861
+static struct cpuidle_state avn_cstates[] __initdata = {
771862 {
772863 .name = "C1",
773864 .desc = "MWAIT 0x00",
....@@ -787,7 +878,7 @@
787878 {
788879 .enter = NULL }
789880 };
790
-static struct cpuidle_state knl_cstates[] = {
881
+static struct cpuidle_state knl_cstates[] __initdata = {
791882 {
792883 .name = "C1",
793884 .desc = "MWAIT 0x00",
....@@ -808,7 +899,7 @@
808899 .enter = NULL }
809900 };
810901
811
-static struct cpuidle_state bxt_cstates[] = {
902
+static struct cpuidle_state bxt_cstates[] __initdata = {
812903 {
813904 .name = "C1",
814905 .desc = "MWAIT 0x00",
....@@ -820,7 +911,7 @@
820911 {
821912 .name = "C1E",
822913 .desc = "MWAIT 0x01",
823
- .flags = MWAIT2flg(0x01),
914
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
824915 .exit_latency = 10,
825916 .target_residency = 20,
826917 .enter = &intel_idle,
....@@ -869,7 +960,7 @@
869960 .enter = NULL }
870961 };
871962
872
-static struct cpuidle_state dnv_cstates[] = {
963
+static struct cpuidle_state dnv_cstates[] __initdata = {
873964 {
874965 .name = "C1",
875966 .desc = "MWAIT 0x00",
....@@ -881,7 +972,7 @@
881972 {
882973 .name = "C1E",
883974 .desc = "MWAIT 0x01",
884
- .flags = MWAIT2flg(0x01),
975
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
885976 .exit_latency = 10,
886977 .target_residency = 20,
887978 .enter = &intel_idle,
....@@ -898,287 +989,354 @@
898989 .enter = NULL }
899990 };
900991
901
-/**
902
- * intel_idle
903
- * @dev: cpuidle_device
904
- * @drv: cpuidle driver
905
- * @index: index of cpuidle state
906
- *
907
- * Must be called under local_irq_disable().
908
- */
909
-static __cpuidle int intel_idle(struct cpuidle_device *dev,
910
- struct cpuidle_driver *drv, int index)
911
-{
912
- unsigned long ecx = 1; /* break on interrupt flag */
913
- struct cpuidle_state *state = &drv->states[index];
914
- unsigned long eax = flg2MWAIT(state->flags);
915
- unsigned int cstate;
916
- bool uninitialized_var(tick);
917
- int cpu = smp_processor_id();
918
-
919
- /*
920
- * leave_mm() to avoid costly and often unnecessary wakeups
921
- * for flushing the user TLB's associated with the active mm.
922
- */
923
- if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
924
- leave_mm(cpu);
925
-
926
- if (!static_cpu_has(X86_FEATURE_ARAT)) {
927
- cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
928
- MWAIT_CSTATE_MASK) + 1;
929
- tick = false;
930
- if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
931
- tick = true;
932
- tick_broadcast_enter();
933
- }
934
- }
935
-
936
- mwait_idle_with_hints(eax, ecx);
937
-
938
- if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
939
- tick_broadcast_exit();
940
-
941
- return index;
942
-}
943
-
944
-/**
945
- * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
946
- * @dev: cpuidle_device
947
- * @drv: cpuidle driver
948
- * @index: state index
949
- */
950
-static void intel_idle_s2idle(struct cpuidle_device *dev,
951
- struct cpuidle_driver *drv, int index)
952
-{
953
- unsigned long ecx = 1; /* break on interrupt flag */
954
- unsigned long eax = flg2MWAIT(drv->states[index].flags);
955
-
956
- mwait_idle_with_hints(eax, ecx);
957
-}
958
-
959
-static void __setup_broadcast_timer(bool on)
960
-{
961
- if (on)
962
- tick_broadcast_enable();
963
- else
964
- tick_broadcast_disable();
965
-}
966
-
967
-static void auto_demotion_disable(void)
968
-{
969
- unsigned long long msr_bits;
970
-
971
- rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
972
- msr_bits &= ~(icpu->auto_demotion_disable_flags);
973
- wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
974
-}
975
-static void c1e_promotion_disable(void)
976
-{
977
- unsigned long long msr_bits;
978
-
979
- rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
980
- msr_bits &= ~0x2;
981
- wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
982
-}
983
-
984
-static const struct idle_cpu idle_cpu_nehalem = {
992
+static const struct idle_cpu idle_cpu_nehalem __initconst = {
985993 .state_table = nehalem_cstates,
986994 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
987995 .disable_promotion_to_c1e = true,
988996 };
989997
990
-static const struct idle_cpu idle_cpu_atom = {
998
+static const struct idle_cpu idle_cpu_nhx __initconst = {
999
+ .state_table = nehalem_cstates,
1000
+ .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1001
+ .disable_promotion_to_c1e = true,
1002
+ .use_acpi = true,
1003
+};
1004
+
1005
+static const struct idle_cpu idle_cpu_atom __initconst = {
9911006 .state_table = atom_cstates,
9921007 };
9931008
994
-static const struct idle_cpu idle_cpu_tangier = {
1009
+static const struct idle_cpu idle_cpu_tangier __initconst = {
9951010 .state_table = tangier_cstates,
9961011 };
9971012
998
-static const struct idle_cpu idle_cpu_lincroft = {
1013
+static const struct idle_cpu idle_cpu_lincroft __initconst = {
9991014 .state_table = atom_cstates,
10001015 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
10011016 };
10021017
1003
-static const struct idle_cpu idle_cpu_snb = {
1018
+static const struct idle_cpu idle_cpu_snb __initconst = {
10041019 .state_table = snb_cstates,
10051020 .disable_promotion_to_c1e = true,
10061021 };
10071022
1008
-static const struct idle_cpu idle_cpu_byt = {
1023
+static const struct idle_cpu idle_cpu_snx __initconst = {
1024
+ .state_table = snb_cstates,
1025
+ .disable_promotion_to_c1e = true,
1026
+ .use_acpi = true,
1027
+};
1028
+
1029
+static const struct idle_cpu idle_cpu_byt __initconst = {
10091030 .state_table = byt_cstates,
10101031 .disable_promotion_to_c1e = true,
10111032 .byt_auto_demotion_disable_flag = true,
10121033 };
10131034
1014
-static const struct idle_cpu idle_cpu_cht = {
1035
+static const struct idle_cpu idle_cpu_cht __initconst = {
10151036 .state_table = cht_cstates,
10161037 .disable_promotion_to_c1e = true,
10171038 .byt_auto_demotion_disable_flag = true,
10181039 };
10191040
1020
-static const struct idle_cpu idle_cpu_ivb = {
1041
+static const struct idle_cpu idle_cpu_ivb __initconst = {
10211042 .state_table = ivb_cstates,
10221043 .disable_promotion_to_c1e = true,
10231044 };
10241045
1025
-static const struct idle_cpu idle_cpu_ivt = {
1046
+static const struct idle_cpu idle_cpu_ivt __initconst = {
10261047 .state_table = ivt_cstates,
10271048 .disable_promotion_to_c1e = true,
1049
+ .use_acpi = true,
10281050 };
10291051
1030
-static const struct idle_cpu idle_cpu_hsw = {
1052
+static const struct idle_cpu idle_cpu_hsw __initconst = {
10311053 .state_table = hsw_cstates,
10321054 .disable_promotion_to_c1e = true,
10331055 };
10341056
1035
-static const struct idle_cpu idle_cpu_bdw = {
1057
+static const struct idle_cpu idle_cpu_hsx __initconst = {
1058
+ .state_table = hsw_cstates,
1059
+ .disable_promotion_to_c1e = true,
1060
+ .use_acpi = true,
1061
+};
1062
+
1063
+static const struct idle_cpu idle_cpu_bdw __initconst = {
10361064 .state_table = bdw_cstates,
10371065 .disable_promotion_to_c1e = true,
10381066 };
10391067
1040
-static const struct idle_cpu idle_cpu_skl = {
1068
+static const struct idle_cpu idle_cpu_bdx __initconst = {
1069
+ .state_table = bdw_cstates,
1070
+ .disable_promotion_to_c1e = true,
1071
+ .use_acpi = true,
1072
+};
1073
+
1074
+static const struct idle_cpu idle_cpu_skl __initconst = {
10411075 .state_table = skl_cstates,
10421076 .disable_promotion_to_c1e = true,
10431077 };
10441078
1045
-static const struct idle_cpu idle_cpu_skx = {
1079
+static const struct idle_cpu idle_cpu_skx __initconst = {
10461080 .state_table = skx_cstates,
10471081 .disable_promotion_to_c1e = true,
1082
+ .use_acpi = true,
10481083 };
10491084
1050
-static const struct idle_cpu idle_cpu_avn = {
1085
+static const struct idle_cpu idle_cpu_icx __initconst = {
1086
+ .state_table = icx_cstates,
1087
+ .disable_promotion_to_c1e = true,
1088
+ .use_acpi = true,
1089
+};
1090
+
1091
+static const struct idle_cpu idle_cpu_avn __initconst = {
10511092 .state_table = avn_cstates,
10521093 .disable_promotion_to_c1e = true,
1094
+ .use_acpi = true,
10531095 };
10541096
1055
-static const struct idle_cpu idle_cpu_knl = {
1097
+static const struct idle_cpu idle_cpu_knl __initconst = {
10561098 .state_table = knl_cstates,
1099
+ .use_acpi = true,
10571100 };
10581101
1059
-static const struct idle_cpu idle_cpu_bxt = {
1102
+static const struct idle_cpu idle_cpu_bxt __initconst = {
10601103 .state_table = bxt_cstates,
10611104 .disable_promotion_to_c1e = true,
10621105 };
10631106
1064
-static const struct idle_cpu idle_cpu_dnv = {
1107
+static const struct idle_cpu idle_cpu_dnv __initconst = {
10651108 .state_table = dnv_cstates,
10661109 .disable_promotion_to_c1e = true,
1110
+ .use_acpi = true,
10671111 };
10681112
1069
-#define ICPU(model, cpu) \
1070
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu }
1071
-
10721113 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1073
- ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem),
1074
- ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem),
1075
- ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem),
1076
- ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem),
1077
- ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem),
1078
- ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem),
1079
- ICPU(INTEL_FAM6_ATOM_BONNELL, idle_cpu_atom),
1080
- ICPU(INTEL_FAM6_ATOM_BONNELL_MID, idle_cpu_lincroft),
1081
- ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem),
1082
- ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb),
1083
- ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb),
1084
- ICPU(INTEL_FAM6_ATOM_SALTWELL, idle_cpu_atom),
1085
- ICPU(INTEL_FAM6_ATOM_SILVERMONT, idle_cpu_byt),
1086
- ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, idle_cpu_tangier),
1087
- ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht),
1088
- ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb),
1089
- ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt),
1090
- ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw),
1091
- ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw),
1092
- ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw),
1093
- ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw),
1094
- ICPU(INTEL_FAM6_ATOM_SILVERMONT_X, idle_cpu_avn),
1095
- ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw),
1096
- ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw),
1097
- ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw),
1098
- ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw),
1099
- ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl),
1100
- ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl),
1101
- ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl),
1102
- ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl),
1103
- ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx),
1104
- ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl),
1105
- ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl),
1106
- ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt),
1107
- ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, idle_cpu_bxt),
1108
- ICPU(INTEL_FAM6_ATOM_GOLDMONT_X, idle_cpu_dnv),
1114
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx),
1115
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem),
1116
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem),
1117
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem),
1118
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx),
1119
+ X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx),
1120
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom),
1121
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft),
1122
+ X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx),
1123
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb),
1124
+ X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx),
1125
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom),
1126
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt),
1127
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1128
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht),
1129
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb),
1130
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt),
1131
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw),
1132
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx),
1133
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw),
1134
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw),
1135
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn),
1136
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw),
1137
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw),
1138
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx),
1139
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx),
1140
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl),
1141
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl),
1142
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl),
1143
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl),
1144
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
1145
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
1146
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
1147
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
1148
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
1149
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt),
1150
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv),
1151
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv),
11091152 {}
11101153 };
11111154
1112
-/*
1113
- * intel_idle_probe()
1114
- */
1115
-static int __init intel_idle_probe(void)
1155
+static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1156
+ X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1157
+ {}
1158
+};
1159
+
1160
+static bool __init intel_idle_max_cstate_reached(int cstate)
11161161 {
1117
- unsigned int eax, ebx, ecx;
1118
- const struct x86_cpu_id *id;
1119
-
1120
- if (max_cstate == 0) {
1121
- pr_debug("disabled\n");
1122
- return -EPERM;
1162
+ if (cstate + 1 > max_cstate) {
1163
+ pr_info("max_cstate %d reached\n", max_cstate);
1164
+ return true;
11231165 }
1124
-
1125
- id = x86_match_cpu(intel_idle_ids);
1126
- if (!id) {
1127
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1128
- boot_cpu_data.x86 == 6)
1129
- pr_debug("does not run on family %d model %d\n",
1130
- boot_cpu_data.x86, boot_cpu_data.x86_model);
1131
- return -ENODEV;
1132
- }
1133
-
1134
- if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1135
- pr_debug("Please enable MWAIT in BIOS SETUP\n");
1136
- return -ENODEV;
1137
- }
1138
-
1139
- if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1140
- return -ENODEV;
1141
-
1142
- cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1143
-
1144
- if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1145
- !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1146
- !mwait_substates)
1147
- return -ENODEV;
1148
-
1149
- pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1150
-
1151
- icpu = (const struct idle_cpu *)id->driver_data;
1152
- cpuidle_state_table = icpu->state_table;
1153
-
1154
- pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1155
- boot_cpu_data.x86_model);
1156
-
1157
- return 0;
1166
+ return false;
11581167 }
11591168
1160
-/*
1161
- * intel_idle_cpuidle_devices_uninit()
1162
- * Unregisters the cpuidle devices.
1163
- */
1164
-static void intel_idle_cpuidle_devices_uninit(void)
1169
+static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
11651170 {
1166
- int i;
1167
- struct cpuidle_device *dev;
1171
+ unsigned long eax = flg2MWAIT(state->flags);
11681172
1169
- for_each_online_cpu(i) {
1170
- dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1171
- cpuidle_unregister_device(dev);
1172
- }
1173
+ if (boot_cpu_has(X86_FEATURE_ARAT))
1174
+ return false;
1175
+
1176
+ /*
1177
+ * Switch over to one-shot tick broadcast if the target C-state
1178
+ * is deeper than C1.
1179
+ */
1180
+ return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
11731181 }
11741182
1175
-/*
1176
- * ivt_idle_state_table_update(void)
1183
+#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1184
+#include <acpi/processor.h>
1185
+
1186
+static bool no_acpi __read_mostly;
1187
+module_param(no_acpi, bool, 0444);
1188
+MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1189
+
1190
+static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1191
+module_param_named(use_acpi, force_use_acpi, bool, 0444);
1192
+MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1193
+
1194
+static struct acpi_processor_power acpi_state_table __initdata;
1195
+
1196
+/**
1197
+ * intel_idle_cst_usable - Check if the _CST information can be used.
11771198 *
1178
- * Tune IVT multi-socket targets
1179
- * Assumption: num_sockets == (max_package_num + 1)
1199
+ * Check if all of the C-states listed by _CST in the max_cstate range are
1200
+ * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
11801201 */
1181
-static void ivt_idle_state_table_update(void)
1202
+static bool __init intel_idle_cst_usable(void)
1203
+{
1204
+ int cstate, limit;
1205
+
1206
+ limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1207
+ acpi_state_table.count);
1208
+
1209
+ for (cstate = 1; cstate < limit; cstate++) {
1210
+ struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1211
+
1212
+ if (cx->entry_method != ACPI_CSTATE_FFH)
1213
+ return false;
1214
+ }
1215
+
1216
+ return true;
1217
+}
1218
+
1219
+static bool __init intel_idle_acpi_cst_extract(void)
1220
+{
1221
+ unsigned int cpu;
1222
+
1223
+ if (no_acpi) {
1224
+ pr_debug("Not allowed to use ACPI _CST\n");
1225
+ return false;
1226
+ }
1227
+
1228
+ for_each_possible_cpu(cpu) {
1229
+ struct acpi_processor *pr = per_cpu(processors, cpu);
1230
+
1231
+ if (!pr)
1232
+ continue;
1233
+
1234
+ if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1235
+ continue;
1236
+
1237
+ acpi_state_table.count++;
1238
+
1239
+ if (!intel_idle_cst_usable())
1240
+ continue;
1241
+
1242
+ if (!acpi_processor_claim_cst_control())
1243
+ break;
1244
+
1245
+ return true;
1246
+ }
1247
+
1248
+ acpi_state_table.count = 0;
1249
+ pr_debug("ACPI _CST not found or not usable\n");
1250
+ return false;
1251
+}
1252
+
1253
+static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1254
+{
1255
+ int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1256
+
1257
+ /*
1258
+ * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1259
+ * the interesting states are ACPI_CSTATE_FFH.
1260
+ */
1261
+ for (cstate = 1; cstate < limit; cstate++) {
1262
+ struct acpi_processor_cx *cx;
1263
+ struct cpuidle_state *state;
1264
+
1265
+ if (intel_idle_max_cstate_reached(cstate - 1))
1266
+ break;
1267
+
1268
+ cx = &acpi_state_table.states[cstate];
1269
+
1270
+ state = &drv->states[drv->state_count++];
1271
+
1272
+ snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1273
+ strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1274
+ state->exit_latency = cx->latency;
1275
+ /*
1276
+ * For C1-type C-states use the same number for both the exit
1277
+ * latency and target residency, because that is the case for
1278
+ * C1 in the majority of the static C-states tables above.
1279
+ * For the other types of C-states, however, set the target
1280
+ * residency to 3 times the exit latency which should lead to
1281
+ * a reasonable balance between energy-efficiency and
1282
+ * performance in the majority of interesting cases.
1283
+ */
1284
+ state->target_residency = cx->latency;
1285
+ if (cx->type > ACPI_STATE_C1)
1286
+ state->target_residency *= 3;
1287
+
1288
+ state->flags = MWAIT2flg(cx->address);
1289
+ if (cx->type > ACPI_STATE_C2)
1290
+ state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1291
+
1292
+ if (disabled_states_mask & BIT(cstate))
1293
+ state->flags |= CPUIDLE_FLAG_OFF;
1294
+
1295
+ if (intel_idle_state_needs_timer_stop(state))
1296
+ state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1297
+
1298
+ state->enter = intel_idle;
1299
+ state->enter_s2idle = intel_idle_s2idle;
1300
+ }
1301
+}
1302
+
1303
+static bool __init intel_idle_off_by_default(u32 mwait_hint)
1304
+{
1305
+ int cstate, limit;
1306
+
1307
+ /*
1308
+ * If there are no _CST C-states, do not disable any C-states by
1309
+ * default.
1310
+ */
1311
+ if (!acpi_state_table.count)
1312
+ return false;
1313
+
1314
+ limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1315
+ /*
1316
+ * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1317
+ * the interesting states are ACPI_CSTATE_FFH.
1318
+ */
1319
+ for (cstate = 1; cstate < limit; cstate++) {
1320
+ if (acpi_state_table.states[cstate].address == mwait_hint)
1321
+ return false;
1322
+ }
1323
+ return true;
1324
+}
1325
+#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1326
+#define force_use_acpi (false)
1327
+
1328
+static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1329
+static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1330
+static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1331
+#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1332
+
1333
+/**
1334
+ * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1335
+ *
1336
+ * Tune IVT multi-socket targets.
1337
+ * Assumption: num_sockets == (max_package_num + 1).
1338
+ */
1339
+static void __init ivt_idle_state_table_update(void)
11821340 {
11831341 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
11841342 int cpu, package_num, num_sockets = 1;
....@@ -1201,15 +1359,17 @@
12011359 /* else, 1 and 2 socket systems use default ivt_cstates */
12021360 }
12031361
1204
-/*
1205
- * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1362
+/**
1363
+ * irtl_2_usec - IRTL to microseconds conversion.
1364
+ * @irtl: IRTL MSR value.
1365
+ *
1366
+ * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
12061367 */
1207
-
1208
-static unsigned int irtl_ns_units[] = {
1209
- 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1210
-
1211
-static unsigned long long irtl_2_usec(unsigned long long irtl)
1368
+static unsigned long long __init irtl_2_usec(unsigned long long irtl)
12121369 {
1370
+ static const unsigned int irtl_ns_units[] __initconst = {
1371
+ 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1372
+ };
12131373 unsigned long long ns;
12141374
12151375 if (!irtl)
....@@ -1217,15 +1377,16 @@
12171377
12181378 ns = irtl_ns_units[(irtl >> 10) & 0x7];
12191379
1220
- return div64_u64((irtl & 0x3FF) * ns, 1000);
1380
+ return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
12211381 }
1222
-/*
1223
- * bxt_idle_state_table_update(void)
1382
+
1383
+/**
1384
+ * bxt_idle_state_table_update - Fix up the Broxton idle states table.
12241385 *
1225
- * On BXT, we trust the IRTL to show the definitive maximum latency
1226
- * We use the same value for target_residency.
1386
+ * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1387
+ * definitive maximum latency and use the same value for target_residency.
12271388 */
1228
-static void bxt_idle_state_table_update(void)
1389
+static void __init bxt_idle_state_table_update(void)
12291390 {
12301391 unsigned long long msr;
12311392 unsigned int usec;
....@@ -1266,13 +1427,13 @@
12661427 }
12671428
12681429 }
1269
-/*
1270
- * sklh_idle_state_table_update(void)
1430
+
1431
+/**
1432
+ * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
12711433 *
1272
- * On SKL-H (model 0x5e) disable C8 and C9 if:
1273
- * C10 is enabled and SGX disabled
1434
+ * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
12741435 */
1275
-static void sklh_idle_state_table_update(void)
1436
+static void __init sklh_idle_state_table_update(void)
12761437 {
12771438 unsigned long long msr;
12781439 unsigned int eax, ebx, ecx, edx;
....@@ -1298,26 +1459,38 @@
12981459 /* if SGX is present */
12991460 if (ebx & (1 << 2)) {
13001461
1301
- rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1462
+ rdmsrl(MSR_IA32_FEAT_CTL, msr);
13021463
13031464 /* if SGX is enabled */
13041465 if (msr & (1 << 18))
13051466 return;
13061467 }
13071468
1308
- skl_cstates[5].disabled = 1; /* C8-SKL */
1309
- skl_cstates[6].disabled = 1; /* C9-SKL */
1469
+ skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */
1470
+ skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */
13101471 }
1311
-/*
1312
- * intel_idle_state_table_update()
1313
- *
1314
- * Update the default state_table for this CPU-id
1315
- */
13161472
1317
-static void intel_idle_state_table_update(void)
1473
+static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
13181474 {
1319
- switch (boot_cpu_data.x86_model) {
1475
+ unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1476
+ unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1477
+ MWAIT_SUBSTATE_MASK;
13201478
1479
+ /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1480
+ if (num_substates == 0)
1481
+ return false;
1482
+
1483
+ if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1484
+ mark_tsc_unstable("TSC halts in idle states deeper than C2");
1485
+
1486
+ return true;
1487
+}
1488
+
1489
+static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1490
+{
1491
+ int cstate;
1492
+
1493
+ switch (boot_cpu_data.x86_model) {
13211494 case INTEL_FAM6_IVYBRIDGE_X:
13221495 ivt_idle_state_table_update();
13231496 break;
....@@ -1325,66 +1498,50 @@
13251498 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
13261499 bxt_idle_state_table_update();
13271500 break;
1328
- case INTEL_FAM6_SKYLAKE_DESKTOP:
1501
+ case INTEL_FAM6_SKYLAKE:
13291502 sklh_idle_state_table_update();
13301503 break;
13311504 }
1332
-}
1333
-
1334
-/*
1335
- * intel_idle_cpuidle_driver_init()
1336
- * allocate, initialize cpuidle_states
1337
- */
1338
-static void __init intel_idle_cpuidle_driver_init(void)
1339
-{
1340
- int cstate;
1341
- struct cpuidle_driver *drv = &intel_idle_driver;
1342
-
1343
- intel_idle_state_table_update();
1344
-
1345
- cpuidle_poll_state_init(drv);
1346
- drv->state_count = 1;
13471505
13481506 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1349
- int num_substates, mwait_hint, mwait_cstate;
1507
+ unsigned int mwait_hint;
13501508
1351
- if ((cpuidle_state_table[cstate].enter == NULL) &&
1352
- (cpuidle_state_table[cstate].enter_s2idle == NULL))
1509
+ if (intel_idle_max_cstate_reached(cstate))
13531510 break;
13541511
1355
- if (cstate + 1 > max_cstate) {
1356
- pr_info("max_cstate %d reached\n", max_cstate);
1512
+ if (!cpuidle_state_table[cstate].enter &&
1513
+ !cpuidle_state_table[cstate].enter_s2idle)
13571514 break;
1358
- }
13591515
1360
- mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1361
- mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1362
-
1363
- /* number of sub-states for this state in CPUID.MWAIT */
1364
- num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1365
- & MWAIT_SUBSTATE_MASK;
1366
-
1367
- /* if NO sub-states for this state in CPUID, skip it */
1368
- if (num_substates == 0)
1369
- continue;
1370
-
1371
- /* if state marked as disabled, skip it */
1372
- if (cpuidle_state_table[cstate].disabled != 0) {
1516
+ /* If marked as unusable, skip this state. */
1517
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
13731518 pr_debug("state %s is disabled\n",
13741519 cpuidle_state_table[cstate].name);
13751520 continue;
13761521 }
13771522
1523
+ mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1524
+ if (!intel_idle_verify_cstate(mwait_hint))
1525
+ continue;
13781526
1379
- if (((mwait_cstate + 1) > 2) &&
1380
- !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1381
- mark_tsc_unstable("TSC halts in idle"
1382
- " states deeper than C2");
1527
+ /* Structure copy. */
1528
+ drv->states[drv->state_count] = cpuidle_state_table[cstate];
13831529
1384
- drv->states[drv->state_count] = /* structure copy */
1385
- cpuidle_state_table[cstate];
1530
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1531
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1532
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
1533
+ }
13861534
1387
- drv->state_count += 1;
1535
+ if ((disabled_states_mask & BIT(drv->state_count)) ||
1536
+ ((icpu->use_acpi || force_use_acpi) &&
1537
+ intel_idle_off_by_default(mwait_hint) &&
1538
+ !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1539
+ drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1540
+
1541
+ if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1542
+ drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1543
+
1544
+ drv->state_count++;
13881545 }
13891546
13901547 if (icpu->byt_auto_demotion_disable_flag) {
....@@ -1393,11 +1550,49 @@
13931550 }
13941551 }
13951552
1553
+/**
1554
+ * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1555
+ * @drv: cpuidle driver structure to initialize.
1556
+ */
1557
+static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1558
+{
1559
+ cpuidle_poll_state_init(drv);
13961560
1397
-/*
1398
- * intel_idle_cpu_init()
1399
- * allocate, initialize, register cpuidle_devices
1400
- * @cpu: cpu/core to initialize
1561
+ if (disabled_states_mask & BIT(0))
1562
+ drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1563
+
1564
+ drv->state_count = 1;
1565
+
1566
+ if (icpu)
1567
+ intel_idle_init_cstates_icpu(drv);
1568
+ else
1569
+ intel_idle_init_cstates_acpi(drv);
1570
+}
1571
+
1572
+static void auto_demotion_disable(void)
1573
+{
1574
+ unsigned long long msr_bits;
1575
+
1576
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1577
+ msr_bits &= ~auto_demotion_disable_flags;
1578
+ wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1579
+}
1580
+
1581
+static void c1e_promotion_disable(void)
1582
+{
1583
+ unsigned long long msr_bits;
1584
+
1585
+ rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1586
+ msr_bits &= ~0x2;
1587
+ wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1588
+}
1589
+
1590
+/**
1591
+ * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1592
+ * @cpu: CPU to initialize.
1593
+ *
1594
+ * Register a cpuidle device object for @cpu and update its MSRs in accordance
1595
+ * with the processor model flags.
14011596 */
14021597 static int intel_idle_cpu_init(unsigned int cpu)
14031598 {
....@@ -1411,10 +1606,10 @@
14111606 return -EIO;
14121607 }
14131608
1414
- if (icpu->auto_demotion_disable_flags)
1609
+ if (auto_demotion_disable_flags)
14151610 auto_demotion_disable();
14161611
1417
- if (icpu->disable_promotion_to_c1e)
1612
+ if (disable_promotion_to_c1e)
14181613 c1e_promotion_disable();
14191614
14201615 return 0;
....@@ -1424,8 +1619,8 @@
14241619 {
14251620 struct cpuidle_device *dev;
14261621
1427
- if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1428
- __setup_broadcast_timer(true);
1622
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
1623
+ tick_broadcast_enable();
14291624
14301625 /*
14311626 * Some systems can hotplug a cpu at runtime after
....@@ -1439,23 +1634,76 @@
14391634 return 0;
14401635 }
14411636
1637
+/**
1638
+ * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1639
+ */
1640
+static void __init intel_idle_cpuidle_devices_uninit(void)
1641
+{
1642
+ int i;
1643
+
1644
+ for_each_online_cpu(i)
1645
+ cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1646
+}
1647
+
14421648 static int __init intel_idle_init(void)
14431649 {
1650
+ const struct x86_cpu_id *id;
1651
+ unsigned int eax, ebx, ecx;
14441652 int retval;
14451653
14461654 /* Do not load intel_idle at all for now if idle= is passed */
14471655 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
14481656 return -ENODEV;
14491657
1450
- retval = intel_idle_probe();
1451
- if (retval)
1452
- return retval;
1658
+ if (max_cstate == 0) {
1659
+ pr_debug("disabled\n");
1660
+ return -EPERM;
1661
+ }
1662
+
1663
+ id = x86_match_cpu(intel_idle_ids);
1664
+ if (id) {
1665
+ if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1666
+ pr_debug("Please enable MWAIT in BIOS SETUP\n");
1667
+ return -ENODEV;
1668
+ }
1669
+ } else {
1670
+ id = x86_match_cpu(intel_mwait_ids);
1671
+ if (!id)
1672
+ return -ENODEV;
1673
+ }
1674
+
1675
+ if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1676
+ return -ENODEV;
1677
+
1678
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1679
+
1680
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1681
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1682
+ !mwait_substates)
1683
+ return -ENODEV;
1684
+
1685
+ pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1686
+
1687
+ icpu = (const struct idle_cpu *)id->driver_data;
1688
+ if (icpu) {
1689
+ cpuidle_state_table = icpu->state_table;
1690
+ auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
1691
+ disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
1692
+ if (icpu->use_acpi || force_use_acpi)
1693
+ intel_idle_acpi_cst_extract();
1694
+ } else if (!intel_idle_acpi_cst_extract()) {
1695
+ return -ENODEV;
1696
+ }
1697
+
1698
+ pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1699
+ boot_cpu_data.x86_model);
14531700
14541701 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1455
- if (intel_idle_cpuidle_devices == NULL)
1702
+ if (!intel_idle_cpuidle_devices)
14561703 return -ENOMEM;
14571704
1458
- intel_idle_cpuidle_driver_init();
1705
+ intel_idle_cpuidle_driver_init(&intel_idle_driver);
1706
+
14591707 retval = cpuidle_register_driver(&intel_idle_driver);
14601708 if (retval) {
14611709 struct cpuidle_driver *drv = cpuidle_get_driver();
....@@ -1464,16 +1712,13 @@
14641712 goto init_driver_fail;
14651713 }
14661714
1467
- if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
1468
- lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1469
-
14701715 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
14711716 intel_idle_cpu_online, NULL);
14721717 if (retval < 0)
14731718 goto hp_setup_fail;
14741719
1475
- pr_debug("lapic_timer_reliable_states 0x%x\n",
1476
- lapic_timer_reliable_states);
1720
+ pr_debug("Local APIC timer is reliable in %s\n",
1721
+ boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
14771722
14781723 return 0;
14791724
....@@ -1494,3 +1739,11 @@
14941739 * is the easiest way (currently) to continue doing that.
14951740 */
14961741 module_param(max_cstate, int, 0444);
1742
+/*
1743
+ * The positions of the bits that are set in this number are the indices of the
1744
+ * idle states to be disabled by default (as reflected by the names of the
1745
+ * corresponding idle state directories in sysfs, "state0", "state1" ...
1746
+ * "state<i>" ..., where <i> is the index of the given state).
1747
+ */
1748
+module_param_named(states_off, disabled_states_mask, uint, 0444);
1749
+MODULE_PARM_DESC(states_off, "Mask of disabled idle states");