.. | .. |
---|
19 | 19 | #include <asm/machdep.h> |
---|
20 | 20 | #include <asm/firmware.h> |
---|
21 | 21 | #include <asm/runlatch.h> |
---|
| 22 | +#include <asm/idle.h> |
---|
22 | 23 | #include <asm/plpar_wrappers.h> |
---|
| 24 | +#include <asm/rtas.h> |
---|
23 | 25 | |
---|
24 | | -struct cpuidle_driver pseries_idle_driver = { |
---|
| 26 | +static struct cpuidle_driver pseries_idle_driver = { |
---|
25 | 27 | .name = "pseries_idle", |
---|
26 | 28 | .owner = THIS_MODULE, |
---|
27 | 29 | }; |
---|
.. | .. |
---|
31 | 33 | static u64 snooze_timeout __read_mostly; |
---|
32 | 34 | static bool snooze_timeout_en __read_mostly; |
---|
33 | 35 | |
---|
34 | | -static inline void idle_loop_prolog(unsigned long *in_purr) |
---|
35 | | -{ |
---|
36 | | - ppc64_runlatch_off(); |
---|
37 | | - *in_purr = mfspr(SPRN_PURR); |
---|
38 | | - /* |
---|
39 | | - * Indicate to the HV that we are idle. Now would be |
---|
40 | | - * a good time to find other work to dispatch. |
---|
41 | | - */ |
---|
42 | | - get_lppaca()->idle = 1; |
---|
43 | | -} |
---|
44 | | - |
---|
45 | | -static inline void idle_loop_epilog(unsigned long in_purr) |
---|
46 | | -{ |
---|
47 | | - u64 wait_cycles; |
---|
48 | | - |
---|
49 | | - wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); |
---|
50 | | - wait_cycles += mfspr(SPRN_PURR) - in_purr; |
---|
51 | | - get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); |
---|
52 | | - get_lppaca()->idle = 0; |
---|
53 | | - |
---|
54 | | - ppc64_runlatch_on(); |
---|
55 | | -} |
---|
56 | | - |
---|
57 | 36 | static int snooze_loop(struct cpuidle_device *dev, |
---|
58 | 37 | struct cpuidle_driver *drv, |
---|
59 | 38 | int index) |
---|
60 | 39 | { |
---|
61 | | - unsigned long in_purr; |
---|
62 | 40 | u64 snooze_exit_time; |
---|
63 | 41 | |
---|
64 | 42 | set_thread_flag(TIF_POLLING_NRFLAG); |
---|
65 | 43 | |
---|
66 | | - idle_loop_prolog(&in_purr); |
---|
| 44 | + pseries_idle_prolog(); |
---|
67 | 45 | local_irq_enable(); |
---|
68 | 46 | snooze_exit_time = get_tb() + snooze_timeout; |
---|
69 | 47 | |
---|
.. | .. |
---|
87 | 65 | |
---|
88 | 66 | local_irq_disable(); |
---|
89 | 67 | |
---|
90 | | - idle_loop_epilog(in_purr); |
---|
| 68 | + pseries_idle_epilog(); |
---|
91 | 69 | |
---|
92 | 70 | return index; |
---|
93 | 71 | } |
---|
.. | .. |
---|
109 | 87 | } |
---|
110 | 88 | } |
---|
111 | 89 | |
---|
| 90 | +/* |
---|
| 91 | + * XCEDE: Extended CEDE states discovered through the |
---|
| 92 | + * "ibm,get-systems-parameter" RTAS call with the token |
---|
| 93 | + * CEDE_LATENCY_TOKEN |
---|
| 94 | + */ |
---|
| 95 | + |
---|
| 96 | +/* |
---|
| 97 | + * Section 7.3.16 System Parameters Option of PAPR version 2.8.1 has a |
---|
| 98 | + * table with all the parameters to ibm,get-system-parameters. |
---|
| 99 | + * CEDE_LATENCY_TOKEN corresponds to the token value for Cede Latency |
---|
| 100 | + * Settings Information. |
---|
| 101 | + */ |
---|
| 102 | +#define CEDE_LATENCY_TOKEN 45 |
---|
| 103 | + |
---|
| 104 | +/* |
---|
| 105 | + * If the platform supports the cede latency settings information system |
---|
| 106 | + * parameter it must provide the following information in the NULL terminated |
---|
| 107 | + * parameter string: |
---|
| 108 | + * |
---|
| 109 | + * a. The first byte is the length āNā of each cede latency setting record minus |
---|
| 110 | + * one (zero indicates a length of 1 byte). |
---|
| 111 | + * |
---|
| 112 | + * b. For each supported cede latency setting a cede latency setting record |
---|
| 113 | + * consisting of the first āNā bytes as per the following table. |
---|
| 114 | + * |
---|
| 115 | + * ----------------------------- |
---|
| 116 | + * | Field | Field | |
---|
| 117 | + * | Name | Length | |
---|
| 118 | + * ----------------------------- |
---|
| 119 | + * | Cede Latency | 1 Byte | |
---|
| 120 | + * | Specifier Value | | |
---|
| 121 | + * ----------------------------- |
---|
| 122 | + * | Maximum wakeup | | |
---|
| 123 | + * | latency in | 8 Bytes | |
---|
| 124 | + * | tb-ticks | | |
---|
| 125 | + * ----------------------------- |
---|
| 126 | + * | Responsive to | | |
---|
| 127 | + * | external | 1 Byte | |
---|
| 128 | + * | interrupts | | |
---|
| 129 | + * ----------------------------- |
---|
| 130 | + * |
---|
| 131 | + * This version has cede latency record size = 10. |
---|
| 132 | + * |
---|
| 133 | + * The structure xcede_latency_payload represents a) and b) with |
---|
| 134 | + * xcede_latency_record representing the table in b). |
---|
| 135 | + * |
---|
| 136 | + * xcede_latency_parameter is what gets returned by |
---|
| 137 | + * ibm,get-systems-parameter RTAS call when made with |
---|
| 138 | + * CEDE_LATENCY_TOKEN. |
---|
| 139 | + * |
---|
| 140 | + * These structures are only used to represent the data obtained by the RTAS |
---|
| 141 | + * call. The data is in big-endian. |
---|
| 142 | + */ |
---|
| 143 | +struct xcede_latency_record { |
---|
| 144 | + u8 hint; |
---|
| 145 | + __be64 latency_ticks; |
---|
| 146 | + u8 wake_on_irqs; |
---|
| 147 | +} __packed; |
---|
| 148 | + |
---|
| 149 | +// Make space for 16 records, which "should be enough". |
---|
| 150 | +struct xcede_latency_payload { |
---|
| 151 | + u8 record_size; |
---|
| 152 | + struct xcede_latency_record records[16]; |
---|
| 153 | +} __packed; |
---|
| 154 | + |
---|
| 155 | +struct xcede_latency_parameter { |
---|
| 156 | + __be16 payload_size; |
---|
| 157 | + struct xcede_latency_payload payload; |
---|
| 158 | + u8 null_char; |
---|
| 159 | +} __packed; |
---|
| 160 | + |
---|
| 161 | +static unsigned int nr_xcede_records; |
---|
| 162 | +static struct xcede_latency_parameter xcede_latency_parameter __initdata; |
---|
| 163 | + |
---|
| 164 | +static int __init parse_cede_parameters(void) |
---|
| 165 | +{ |
---|
| 166 | + struct xcede_latency_payload *payload; |
---|
| 167 | + u32 total_xcede_records_size; |
---|
| 168 | + u8 xcede_record_size; |
---|
| 169 | + u16 payload_size; |
---|
| 170 | + int ret, i; |
---|
| 171 | + |
---|
| 172 | + ret = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, |
---|
| 173 | + NULL, CEDE_LATENCY_TOKEN, __pa(&xcede_latency_parameter), |
---|
| 174 | + sizeof(xcede_latency_parameter)); |
---|
| 175 | + if (ret) { |
---|
| 176 | + pr_err("xcede: Error parsing CEDE_LATENCY_TOKEN\n"); |
---|
| 177 | + return ret; |
---|
| 178 | + } |
---|
| 179 | + |
---|
| 180 | + payload_size = be16_to_cpu(xcede_latency_parameter.payload_size); |
---|
| 181 | + payload = &xcede_latency_parameter.payload; |
---|
| 182 | + |
---|
| 183 | + xcede_record_size = payload->record_size + 1; |
---|
| 184 | + |
---|
| 185 | + if (xcede_record_size != sizeof(struct xcede_latency_record)) { |
---|
| 186 | + pr_err("xcede: Expected record-size %lu. Observed size %u.\n", |
---|
| 187 | + sizeof(struct xcede_latency_record), xcede_record_size); |
---|
| 188 | + return -EINVAL; |
---|
| 189 | + } |
---|
| 190 | + |
---|
| 191 | + pr_info("xcede: xcede_record_size = %d\n", xcede_record_size); |
---|
| 192 | + |
---|
| 193 | + /* |
---|
| 194 | + * Since the payload_size includes the last NULL byte and the |
---|
| 195 | + * xcede_record_size, the remaining bytes correspond to array of all |
---|
| 196 | + * cede_latency settings. |
---|
| 197 | + */ |
---|
| 198 | + total_xcede_records_size = payload_size - 2; |
---|
| 199 | + nr_xcede_records = total_xcede_records_size / xcede_record_size; |
---|
| 200 | + |
---|
| 201 | + for (i = 0; i < nr_xcede_records; i++) { |
---|
| 202 | + struct xcede_latency_record *record = &payload->records[i]; |
---|
| 203 | + u64 latency_ticks = be64_to_cpu(record->latency_ticks); |
---|
| 204 | + u8 wake_on_irqs = record->wake_on_irqs; |
---|
| 205 | + u8 hint = record->hint; |
---|
| 206 | + |
---|
| 207 | + pr_info("xcede: Record %d : hint = %u, latency = 0x%llx tb ticks, Wake-on-irq = %u\n", |
---|
| 208 | + i, hint, latency_ticks, wake_on_irqs); |
---|
| 209 | + } |
---|
| 210 | + |
---|
| 211 | + return 0; |
---|
| 212 | +} |
---|
| 213 | + |
---|
| 214 | +#define NR_DEDICATED_STATES 2 /* snooze, CEDE */ |
---|
| 215 | +static u8 cede_latency_hint[NR_DEDICATED_STATES]; |
---|
| 216 | + |
---|
112 | 217 | static int dedicated_cede_loop(struct cpuidle_device *dev, |
---|
113 | 218 | struct cpuidle_driver *drv, |
---|
114 | 219 | int index) |
---|
115 | 220 | { |
---|
116 | | - unsigned long in_purr; |
---|
| 221 | + u8 old_latency_hint; |
---|
117 | 222 | |
---|
118 | | - idle_loop_prolog(&in_purr); |
---|
| 223 | + pseries_idle_prolog(); |
---|
119 | 224 | get_lppaca()->donate_dedicated_cpu = 1; |
---|
| 225 | + old_latency_hint = get_lppaca()->cede_latency_hint; |
---|
| 226 | + get_lppaca()->cede_latency_hint = cede_latency_hint[index]; |
---|
120 | 227 | |
---|
121 | 228 | HMT_medium(); |
---|
122 | 229 | check_and_cede_processor(); |
---|
123 | 230 | |
---|
124 | 231 | local_irq_disable(); |
---|
125 | 232 | get_lppaca()->donate_dedicated_cpu = 0; |
---|
| 233 | + get_lppaca()->cede_latency_hint = old_latency_hint; |
---|
126 | 234 | |
---|
127 | | - idle_loop_epilog(in_purr); |
---|
| 235 | + pseries_idle_epilog(); |
---|
128 | 236 | |
---|
129 | 237 | return index; |
---|
130 | 238 | } |
---|
.. | .. |
---|
133 | 241 | struct cpuidle_driver *drv, |
---|
134 | 242 | int index) |
---|
135 | 243 | { |
---|
136 | | - unsigned long in_purr; |
---|
137 | 244 | |
---|
138 | | - idle_loop_prolog(&in_purr); |
---|
| 245 | + pseries_idle_prolog(); |
---|
139 | 246 | |
---|
140 | 247 | /* |
---|
141 | 248 | * Yield the processor to the hypervisor. We return if |
---|
.. | .. |
---|
147 | 254 | check_and_cede_processor(); |
---|
148 | 255 | |
---|
149 | 256 | local_irq_disable(); |
---|
150 | | - idle_loop_epilog(in_purr); |
---|
| 257 | + pseries_idle_epilog(); |
---|
151 | 258 | |
---|
152 | 259 | return index; |
---|
153 | 260 | } |
---|
.. | .. |
---|
155 | 262 | /* |
---|
156 | 263 | * States for dedicated partition case. |
---|
157 | 264 | */ |
---|
158 | | -static struct cpuidle_state dedicated_states[] = { |
---|
| 265 | +static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = { |
---|
159 | 266 | { /* Snooze */ |
---|
160 | 267 | .name = "snooze", |
---|
161 | 268 | .desc = "snooze", |
---|
.. | .. |
---|
236 | 343 | return 0; |
---|
237 | 344 | } |
---|
238 | 345 | |
---|
| 346 | +static void __init fixup_cede0_latency(void) |
---|
| 347 | +{ |
---|
| 348 | + struct xcede_latency_payload *payload; |
---|
| 349 | + u64 min_latency_us; |
---|
| 350 | + int i; |
---|
| 351 | + |
---|
| 352 | + min_latency_us = dedicated_states[1].exit_latency; // CEDE latency |
---|
| 353 | + |
---|
| 354 | + if (parse_cede_parameters()) |
---|
| 355 | + return; |
---|
| 356 | + |
---|
| 357 | + pr_info("cpuidle: Skipping the %d Extended CEDE idle states\n", |
---|
| 358 | + nr_xcede_records); |
---|
| 359 | + |
---|
| 360 | + payload = &xcede_latency_parameter.payload; |
---|
| 361 | + for (i = 0; i < nr_xcede_records; i++) { |
---|
| 362 | + struct xcede_latency_record *record = &payload->records[i]; |
---|
| 363 | + u64 latency_tb = be64_to_cpu(record->latency_ticks); |
---|
| 364 | + u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC); |
---|
| 365 | + |
---|
| 366 | + if (latency_us == 0) |
---|
| 367 | + pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i); |
---|
| 368 | + |
---|
| 369 | + if (latency_us < min_latency_us) |
---|
| 370 | + min_latency_us = latency_us; |
---|
| 371 | + } |
---|
| 372 | + |
---|
| 373 | + /* |
---|
| 374 | + * By default, we assume that CEDE(0) has exit latency 10us, |
---|
| 375 | + * since there is no way for us to query from the platform. |
---|
| 376 | + * |
---|
| 377 | + * However, if the wakeup latency of an Extended CEDE state is |
---|
| 378 | + * smaller than 10us, then we can be sure that CEDE(0) |
---|
| 379 | + * requires no more than that. |
---|
| 380 | + * |
---|
| 381 | + * Perform the fix-up. |
---|
| 382 | + */ |
---|
| 383 | + if (min_latency_us < dedicated_states[1].exit_latency) { |
---|
| 384 | + /* |
---|
| 385 | + * We set a minimum of 1us wakeup latency for cede0 to |
---|
| 386 | + * distinguish it from snooze |
---|
| 387 | + */ |
---|
| 388 | + u64 cede0_latency = 1; |
---|
| 389 | + |
---|
| 390 | + if (min_latency_us > cede0_latency) |
---|
| 391 | + cede0_latency = min_latency_us - 1; |
---|
| 392 | + |
---|
| 393 | + dedicated_states[1].exit_latency = cede0_latency; |
---|
| 394 | + dedicated_states[1].target_residency = 10 * (cede0_latency); |
---|
| 395 | + pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n", |
---|
| 396 | + cede0_latency); |
---|
| 397 | + } |
---|
| 398 | + |
---|
| 399 | +} |
---|
| 400 | + |
---|
239 | 401 | /* |
---|
240 | 402 | * pseries_idle_probe() |
---|
241 | 403 | * Choose state table for shared versus dedicated partition |
---|
242 | 404 | */ |
---|
243 | | -static int pseries_idle_probe(void) |
---|
| 405 | +static int __init pseries_idle_probe(void) |
---|
244 | 406 | { |
---|
245 | 407 | |
---|
246 | 408 | if (cpuidle_disable != IDLE_NO_OVERRIDE) |
---|
247 | 409 | return -ENODEV; |
---|
248 | 410 | |
---|
249 | 411 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { |
---|
250 | | - /* |
---|
251 | | - * Use local_paca instead of get_lppaca() since |
---|
252 | | - * preemption is not disabled, and it is not required in |
---|
253 | | - * fact, since lppaca_ptr does not need to be the value |
---|
254 | | - * associated to the current CPU, it can be from any CPU. |
---|
255 | | - */ |
---|
256 | | - if (lppaca_shared_proc(local_paca->lppaca_ptr)) { |
---|
| 412 | + if (lppaca_shared_proc()) { |
---|
257 | 413 | cpuidle_state_table = shared_states; |
---|
258 | 414 | max_idle_state = ARRAY_SIZE(shared_states); |
---|
259 | 415 | } else { |
---|
| 416 | + /* |
---|
| 417 | + * Use firmware provided latency values |
---|
| 418 | + * starting with POWER10 platforms. In the |
---|
| 419 | + * case that we are running on a POWER10 |
---|
| 420 | + * platform but in an earlier compat mode, we |
---|
| 421 | + * can still use the firmware provided values. |
---|
| 422 | + * |
---|
| 423 | + * However, on platforms prior to POWER10, we |
---|
| 424 | + * cannot rely on the accuracy of the firmware |
---|
| 425 | + * provided latency values. On such platforms, |
---|
| 426 | + * go with the conservative default estimate |
---|
| 427 | + * of 10us. |
---|
| 428 | + */ |
---|
| 429 | + if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10)) |
---|
| 430 | + fixup_cede0_latency(); |
---|
260 | 431 | cpuidle_state_table = dedicated_states; |
---|
261 | | - max_idle_state = ARRAY_SIZE(dedicated_states); |
---|
| 432 | + max_idle_state = NR_DEDICATED_STATES; |
---|
262 | 433 | } |
---|
263 | 434 | } else |
---|
264 | 435 | return -ENODEV; |
---|