| .. | .. |
|---|
| 19 | 19 | #include <asm/machdep.h> |
|---|
| 20 | 20 | #include <asm/firmware.h> |
|---|
| 21 | 21 | #include <asm/runlatch.h> |
|---|
| 22 | +#include <asm/idle.h> |
|---|
| 22 | 23 | #include <asm/plpar_wrappers.h> |
|---|
| 24 | +#include <asm/rtas.h> |
|---|
| 23 | 25 | |
|---|
| 24 | | -struct cpuidle_driver pseries_idle_driver = { |
|---|
| 26 | +static struct cpuidle_driver pseries_idle_driver = { |
|---|
| 25 | 27 | .name = "pseries_idle", |
|---|
| 26 | 28 | .owner = THIS_MODULE, |
|---|
| 27 | 29 | }; |
|---|
| .. | .. |
|---|
| 31 | 33 | static u64 snooze_timeout __read_mostly; |
|---|
| 32 | 34 | static bool snooze_timeout_en __read_mostly; |
|---|
| 33 | 35 | |
|---|
| 34 | | -static inline void idle_loop_prolog(unsigned long *in_purr) |
|---|
| 35 | | -{ |
|---|
| 36 | | - ppc64_runlatch_off(); |
|---|
| 37 | | - *in_purr = mfspr(SPRN_PURR); |
|---|
| 38 | | - /* |
|---|
| 39 | | - * Indicate to the HV that we are idle. Now would be |
|---|
| 40 | | - * a good time to find other work to dispatch. |
|---|
| 41 | | - */ |
|---|
| 42 | | - get_lppaca()->idle = 1; |
|---|
| 43 | | -} |
|---|
| 44 | | - |
|---|
| 45 | | -static inline void idle_loop_epilog(unsigned long in_purr) |
|---|
| 46 | | -{ |
|---|
| 47 | | - u64 wait_cycles; |
|---|
| 48 | | - |
|---|
| 49 | | - wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); |
|---|
| 50 | | - wait_cycles += mfspr(SPRN_PURR) - in_purr; |
|---|
| 51 | | - get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); |
|---|
| 52 | | - get_lppaca()->idle = 0; |
|---|
| 53 | | - |
|---|
| 54 | | - ppc64_runlatch_on(); |
|---|
| 55 | | -} |
|---|
| 56 | | - |
|---|
| 57 | 36 | static int snooze_loop(struct cpuidle_device *dev, |
|---|
| 58 | 37 | struct cpuidle_driver *drv, |
|---|
| 59 | 38 | int index) |
|---|
| 60 | 39 | { |
|---|
| 61 | | - unsigned long in_purr; |
|---|
| 62 | 40 | u64 snooze_exit_time; |
|---|
| 63 | 41 | |
|---|
| 64 | 42 | set_thread_flag(TIF_POLLING_NRFLAG); |
|---|
| 65 | 43 | |
|---|
| 66 | | - idle_loop_prolog(&in_purr); |
|---|
| 44 | + pseries_idle_prolog(); |
|---|
| 67 | 45 | local_irq_enable(); |
|---|
| 68 | 46 | snooze_exit_time = get_tb() + snooze_timeout; |
|---|
| 69 | 47 | |
|---|
| .. | .. |
|---|
| 87 | 65 | |
|---|
| 88 | 66 | local_irq_disable(); |
|---|
| 89 | 67 | |
|---|
| 90 | | - idle_loop_epilog(in_purr); |
|---|
| 68 | + pseries_idle_epilog(); |
|---|
| 91 | 69 | |
|---|
| 92 | 70 | return index; |
|---|
| 93 | 71 | } |
|---|
| .. | .. |
|---|
| 109 | 87 | } |
|---|
| 110 | 88 | } |
|---|
| 111 | 89 | |
|---|
| 90 | +/* |
|---|
| 91 | + * XCEDE: Extended CEDE states discovered through the |
|---|
| 92 | + * "ibm,get-systems-parameter" RTAS call with the token |
|---|
| 93 | + * CEDE_LATENCY_TOKEN |
|---|
| 94 | + */ |
|---|
| 95 | + |
|---|
| 96 | +/* |
|---|
| 97 | + * Section 7.3.16 System Parameters Option of PAPR version 2.8.1 has a |
|---|
| 98 | + * table with all the parameters to ibm,get-system-parameters. |
|---|
| 99 | + * CEDE_LATENCY_TOKEN corresponds to the token value for Cede Latency |
|---|
| 100 | + * Settings Information. |
|---|
| 101 | + */ |
|---|
| 102 | +#define CEDE_LATENCY_TOKEN 45 |
|---|
| 103 | + |
|---|
| 104 | +/* |
|---|
| 105 | + * If the platform supports the cede latency settings information system |
|---|
| 106 | + * parameter it must provide the following information in the NULL terminated |
|---|
| 107 | + * parameter string: |
|---|
| 108 | + * |
|---|
| 109 | + * a. The first byte is the length āNā of each cede latency setting record minus |
|---|
| 110 | + * one (zero indicates a length of 1 byte). |
|---|
| 111 | + * |
|---|
| 112 | + * b. For each supported cede latency setting a cede latency setting record |
|---|
| 113 | + * consisting of the first āNā bytes as per the following table. |
|---|
| 114 | + * |
|---|
| 115 | + * ----------------------------- |
|---|
| 116 | + * | Field | Field | |
|---|
| 117 | + * | Name | Length | |
|---|
| 118 | + * ----------------------------- |
|---|
| 119 | + * | Cede Latency | 1 Byte | |
|---|
| 120 | + * | Specifier Value | | |
|---|
| 121 | + * ----------------------------- |
|---|
| 122 | + * | Maximum wakeup | | |
|---|
| 123 | + * | latency in | 8 Bytes | |
|---|
| 124 | + * | tb-ticks | | |
|---|
| 125 | + * ----------------------------- |
|---|
| 126 | + * | Responsive to | | |
|---|
| 127 | + * | external | 1 Byte | |
|---|
| 128 | + * | interrupts | | |
|---|
| 129 | + * ----------------------------- |
|---|
| 130 | + * |
|---|
| 131 | + * This version has cede latency record size = 10. |
|---|
| 132 | + * |
|---|
| 133 | + * The structure xcede_latency_payload represents a) and b) with |
|---|
| 134 | + * xcede_latency_record representing the table in b). |
|---|
| 135 | + * |
|---|
| 136 | + * xcede_latency_parameter is what gets returned by |
|---|
| 137 | + * ibm,get-systems-parameter RTAS call when made with |
|---|
| 138 | + * CEDE_LATENCY_TOKEN. |
|---|
| 139 | + * |
|---|
| 140 | + * These structures are only used to represent the data obtained by the RTAS |
|---|
| 141 | + * call. The data is in big-endian. |
|---|
| 142 | + */ |
|---|
| 143 | +struct xcede_latency_record { |
|---|
| 144 | + u8 hint; |
|---|
| 145 | + __be64 latency_ticks; |
|---|
| 146 | + u8 wake_on_irqs; |
|---|
| 147 | +} __packed; |
|---|
| 148 | + |
|---|
| 149 | +// Make space for 16 records, which "should be enough". |
|---|
| 150 | +struct xcede_latency_payload { |
|---|
| 151 | + u8 record_size; |
|---|
| 152 | + struct xcede_latency_record records[16]; |
|---|
| 153 | +} __packed; |
|---|
| 154 | + |
|---|
| 155 | +struct xcede_latency_parameter { |
|---|
| 156 | + __be16 payload_size; |
|---|
| 157 | + struct xcede_latency_payload payload; |
|---|
| 158 | + u8 null_char; |
|---|
| 159 | +} __packed; |
|---|
| 160 | + |
|---|
| 161 | +static unsigned int nr_xcede_records; |
|---|
| 162 | +static struct xcede_latency_parameter xcede_latency_parameter __initdata; |
|---|
| 163 | + |
|---|
| 164 | +static int __init parse_cede_parameters(void) |
|---|
| 165 | +{ |
|---|
| 166 | + struct xcede_latency_payload *payload; |
|---|
| 167 | + u32 total_xcede_records_size; |
|---|
| 168 | + u8 xcede_record_size; |
|---|
| 169 | + u16 payload_size; |
|---|
| 170 | + int ret, i; |
|---|
| 171 | + |
|---|
| 172 | + ret = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, |
|---|
| 173 | + NULL, CEDE_LATENCY_TOKEN, __pa(&xcede_latency_parameter), |
|---|
| 174 | + sizeof(xcede_latency_parameter)); |
|---|
| 175 | + if (ret) { |
|---|
| 176 | + pr_err("xcede: Error parsing CEDE_LATENCY_TOKEN\n"); |
|---|
| 177 | + return ret; |
|---|
| 178 | + } |
|---|
| 179 | + |
|---|
| 180 | + payload_size = be16_to_cpu(xcede_latency_parameter.payload_size); |
|---|
| 181 | + payload = &xcede_latency_parameter.payload; |
|---|
| 182 | + |
|---|
| 183 | + xcede_record_size = payload->record_size + 1; |
|---|
| 184 | + |
|---|
| 185 | + if (xcede_record_size != sizeof(struct xcede_latency_record)) { |
|---|
| 186 | + pr_err("xcede: Expected record-size %lu. Observed size %u.\n", |
|---|
| 187 | + sizeof(struct xcede_latency_record), xcede_record_size); |
|---|
| 188 | + return -EINVAL; |
|---|
| 189 | + } |
|---|
| 190 | + |
|---|
| 191 | + pr_info("xcede: xcede_record_size = %d\n", xcede_record_size); |
|---|
| 192 | + |
|---|
| 193 | + /* |
|---|
| 194 | + * Since the payload_size includes the last NULL byte and the |
|---|
| 195 | + * xcede_record_size, the remaining bytes correspond to array of all |
|---|
| 196 | + * cede_latency settings. |
|---|
| 197 | + */ |
|---|
| 198 | + total_xcede_records_size = payload_size - 2; |
|---|
| 199 | + nr_xcede_records = total_xcede_records_size / xcede_record_size; |
|---|
| 200 | + |
|---|
| 201 | + for (i = 0; i < nr_xcede_records; i++) { |
|---|
| 202 | + struct xcede_latency_record *record = &payload->records[i]; |
|---|
| 203 | + u64 latency_ticks = be64_to_cpu(record->latency_ticks); |
|---|
| 204 | + u8 wake_on_irqs = record->wake_on_irqs; |
|---|
| 205 | + u8 hint = record->hint; |
|---|
| 206 | + |
|---|
| 207 | + pr_info("xcede: Record %d : hint = %u, latency = 0x%llx tb ticks, Wake-on-irq = %u\n", |
|---|
| 208 | + i, hint, latency_ticks, wake_on_irqs); |
|---|
| 209 | + } |
|---|
| 210 | + |
|---|
| 211 | + return 0; |
|---|
| 212 | +} |
|---|
| 213 | + |
|---|
| 214 | +#define NR_DEDICATED_STATES 2 /* snooze, CEDE */ |
|---|
| 215 | +static u8 cede_latency_hint[NR_DEDICATED_STATES]; |
|---|
| 216 | + |
|---|
| 112 | 217 | static int dedicated_cede_loop(struct cpuidle_device *dev, |
|---|
| 113 | 218 | struct cpuidle_driver *drv, |
|---|
| 114 | 219 | int index) |
|---|
| 115 | 220 | { |
|---|
| 116 | | - unsigned long in_purr; |
|---|
| 221 | + u8 old_latency_hint; |
|---|
| 117 | 222 | |
|---|
| 118 | | - idle_loop_prolog(&in_purr); |
|---|
| 223 | + pseries_idle_prolog(); |
|---|
| 119 | 224 | get_lppaca()->donate_dedicated_cpu = 1; |
|---|
| 225 | + old_latency_hint = get_lppaca()->cede_latency_hint; |
|---|
| 226 | + get_lppaca()->cede_latency_hint = cede_latency_hint[index]; |
|---|
| 120 | 227 | |
|---|
| 121 | 228 | HMT_medium(); |
|---|
| 122 | 229 | check_and_cede_processor(); |
|---|
| 123 | 230 | |
|---|
| 124 | 231 | local_irq_disable(); |
|---|
| 125 | 232 | get_lppaca()->donate_dedicated_cpu = 0; |
|---|
| 233 | + get_lppaca()->cede_latency_hint = old_latency_hint; |
|---|
| 126 | 234 | |
|---|
| 127 | | - idle_loop_epilog(in_purr); |
|---|
| 235 | + pseries_idle_epilog(); |
|---|
| 128 | 236 | |
|---|
| 129 | 237 | return index; |
|---|
| 130 | 238 | } |
|---|
| .. | .. |
|---|
| 133 | 241 | struct cpuidle_driver *drv, |
|---|
| 134 | 242 | int index) |
|---|
| 135 | 243 | { |
|---|
| 136 | | - unsigned long in_purr; |
|---|
| 137 | 244 | |
|---|
| 138 | | - idle_loop_prolog(&in_purr); |
|---|
| 245 | + pseries_idle_prolog(); |
|---|
| 139 | 246 | |
|---|
| 140 | 247 | /* |
|---|
| 141 | 248 | * Yield the processor to the hypervisor. We return if |
|---|
| .. | .. |
|---|
| 147 | 254 | check_and_cede_processor(); |
|---|
| 148 | 255 | |
|---|
| 149 | 256 | local_irq_disable(); |
|---|
| 150 | | - idle_loop_epilog(in_purr); |
|---|
| 257 | + pseries_idle_epilog(); |
|---|
| 151 | 258 | |
|---|
| 152 | 259 | return index; |
|---|
| 153 | 260 | } |
|---|
| .. | .. |
|---|
| 155 | 262 | /* |
|---|
| 156 | 263 | * States for dedicated partition case. |
|---|
| 157 | 264 | */ |
|---|
| 158 | | -static struct cpuidle_state dedicated_states[] = { |
|---|
| 265 | +static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = { |
|---|
| 159 | 266 | { /* Snooze */ |
|---|
| 160 | 267 | .name = "snooze", |
|---|
| 161 | 268 | .desc = "snooze", |
|---|
| .. | .. |
|---|
| 236 | 343 | return 0; |
|---|
| 237 | 344 | } |
|---|
| 238 | 345 | |
|---|
| 346 | +static void __init fixup_cede0_latency(void) |
|---|
| 347 | +{ |
|---|
| 348 | + struct xcede_latency_payload *payload; |
|---|
| 349 | + u64 min_latency_us; |
|---|
| 350 | + int i; |
|---|
| 351 | + |
|---|
| 352 | + min_latency_us = dedicated_states[1].exit_latency; // CEDE latency |
|---|
| 353 | + |
|---|
| 354 | + if (parse_cede_parameters()) |
|---|
| 355 | + return; |
|---|
| 356 | + |
|---|
| 357 | + pr_info("cpuidle: Skipping the %d Extended CEDE idle states\n", |
|---|
| 358 | + nr_xcede_records); |
|---|
| 359 | + |
|---|
| 360 | + payload = &xcede_latency_parameter.payload; |
|---|
| 361 | + for (i = 0; i < nr_xcede_records; i++) { |
|---|
| 362 | + struct xcede_latency_record *record = &payload->records[i]; |
|---|
| 363 | + u64 latency_tb = be64_to_cpu(record->latency_ticks); |
|---|
| 364 | + u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC); |
|---|
| 365 | + |
|---|
| 366 | + if (latency_us == 0) |
|---|
| 367 | + pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i); |
|---|
| 368 | + |
|---|
| 369 | + if (latency_us < min_latency_us) |
|---|
| 370 | + min_latency_us = latency_us; |
|---|
| 371 | + } |
|---|
| 372 | + |
|---|
| 373 | + /* |
|---|
| 374 | + * By default, we assume that CEDE(0) has exit latency 10us, |
|---|
| 375 | + * since there is no way for us to query from the platform. |
|---|
| 376 | + * |
|---|
| 377 | + * However, if the wakeup latency of an Extended CEDE state is |
|---|
| 378 | + * smaller than 10us, then we can be sure that CEDE(0) |
|---|
| 379 | + * requires no more than that. |
|---|
| 380 | + * |
|---|
| 381 | + * Perform the fix-up. |
|---|
| 382 | + */ |
|---|
| 383 | + if (min_latency_us < dedicated_states[1].exit_latency) { |
|---|
| 384 | + /* |
|---|
| 385 | + * We set a minimum of 1us wakeup latency for cede0 to |
|---|
| 386 | + * distinguish it from snooze |
|---|
| 387 | + */ |
|---|
| 388 | + u64 cede0_latency = 1; |
|---|
| 389 | + |
|---|
| 390 | + if (min_latency_us > cede0_latency) |
|---|
| 391 | + cede0_latency = min_latency_us - 1; |
|---|
| 392 | + |
|---|
| 393 | + dedicated_states[1].exit_latency = cede0_latency; |
|---|
| 394 | + dedicated_states[1].target_residency = 10 * (cede0_latency); |
|---|
| 395 | + pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n", |
|---|
| 396 | + cede0_latency); |
|---|
| 397 | + } |
|---|
| 398 | + |
|---|
| 399 | +} |
|---|
| 400 | + |
|---|
| 239 | 401 | /* |
|---|
| 240 | 402 | * pseries_idle_probe() |
|---|
| 241 | 403 | * Choose state table for shared versus dedicated partition |
|---|
| 242 | 404 | */ |
|---|
| 243 | | -static int pseries_idle_probe(void) |
|---|
| 405 | +static int __init pseries_idle_probe(void) |
|---|
| 244 | 406 | { |
|---|
| 245 | 407 | |
|---|
| 246 | 408 | if (cpuidle_disable != IDLE_NO_OVERRIDE) |
|---|
| .. | .. |
|---|
| 257 | 419 | cpuidle_state_table = shared_states; |
|---|
| 258 | 420 | max_idle_state = ARRAY_SIZE(shared_states); |
|---|
| 259 | 421 | } else { |
|---|
| 422 | + /* |
|---|
| 423 | + * Use firmware provided latency values |
|---|
| 424 | + * starting with POWER10 platforms. In the |
|---|
| 425 | + * case that we are running on a POWER10 |
|---|
| 426 | + * platform but in an earlier compat mode, we |
|---|
| 427 | + * can still use the firmware provided values. |
|---|
| 428 | + * |
|---|
| 429 | + * However, on platforms prior to POWER10, we |
|---|
| 430 | + * cannot rely on the accuracy of the firmware |
|---|
| 431 | + * provided latency values. On such platforms, |
|---|
| 432 | + * go with the conservative default estimate |
|---|
| 433 | + * of 10us. |
|---|
| 434 | + */ |
|---|
| 435 | + if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10)) |
|---|
| 436 | + fixup_cede0_latency(); |
|---|
| 260 | 437 | cpuidle_state_table = dedicated_states; |
|---|
| 261 | | - max_idle_state = ARRAY_SIZE(dedicated_states); |
|---|
| 438 | + max_idle_state = NR_DEDICATED_STATES; |
|---|
| 262 | 439 | } |
|---|
| 263 | 440 | } else |
|---|
| 264 | 441 | return -ENODEV; |
|---|