| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| 2 | 3 | |
|---|
| 3 | 4 | #include <linux/kernel.h> |
|---|
| .. | .. |
|---|
| 40 | 41 | * TSC can be unstable due to cpufreq or due to unsynced TSCs |
|---|
| 41 | 42 | */ |
|---|
| 42 | 43 | static int __read_mostly tsc_unstable; |
|---|
| 44 | +static unsigned int __initdata tsc_early_khz; |
|---|
| 43 | 45 | |
|---|
| 44 | 46 | static DEFINE_STATIC_KEY_FALSE(__use_tsc); |
|---|
| 45 | 47 | |
|---|
| .. | .. |
|---|
| 52 | 54 | |
|---|
| 53 | 55 | struct cyc2ns { |
|---|
| 54 | 56 | struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */ |
|---|
| 55 | | - seqcount_t seq; /* 32 + 4 = 36 */ |
|---|
| 57 | + seqcount_latch_t seq; /* 32 + 4 = 36 */ |
|---|
| 56 | 58 | |
|---|
| 57 | 59 | }; /* fits one cacheline */ |
|---|
| 58 | 60 | |
|---|
| 59 | 61 | static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); |
|---|
| 60 | 62 | |
|---|
| 61 | | -void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) |
|---|
| 63 | +static int __init tsc_early_khz_setup(char *buf) |
|---|
| 64 | +{ |
|---|
| 65 | + return kstrtouint(buf, 0, &tsc_early_khz); |
|---|
| 66 | +} |
|---|
| 67 | +early_param("tsc_early_khz", tsc_early_khz_setup); |
|---|
| 68 | + |
|---|
| 69 | +__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) |
|---|
| 62 | 70 | { |
|---|
| 63 | 71 | int seq, idx; |
|---|
| 64 | 72 | |
|---|
| 65 | 73 | preempt_disable_notrace(); |
|---|
| 66 | 74 | |
|---|
| 67 | 75 | do { |
|---|
| 68 | | - seq = this_cpu_read(cyc2ns.seq.sequence); |
|---|
| 76 | + seq = this_cpu_read(cyc2ns.seq.seqcount.sequence); |
|---|
| 69 | 77 | idx = seq & 1; |
|---|
| 70 | 78 | |
|---|
| 71 | 79 | data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset); |
|---|
| 72 | 80 | data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul); |
|---|
| 73 | 81 | data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift); |
|---|
| 74 | 82 | |
|---|
| 75 | | - } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); |
|---|
| 83 | + } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence))); |
|---|
| 76 | 84 | } |
|---|
| 77 | 85 | |
|---|
| 78 | | -void __always_inline cyc2ns_read_end(void) |
|---|
| 86 | +__always_inline void cyc2ns_read_end(void) |
|---|
| 79 | 87 | { |
|---|
| 80 | 88 | preempt_enable_notrace(); |
|---|
| 81 | 89 | } |
|---|
| .. | .. |
|---|
| 178 | 186 | { |
|---|
| 179 | 187 | struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns); |
|---|
| 180 | 188 | |
|---|
| 181 | | - seqcount_init(&c2n->seq); |
|---|
| 189 | + seqcount_latch_init(&c2n->seq); |
|---|
| 182 | 190 | __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc()); |
|---|
| 183 | 191 | } |
|---|
| 184 | 192 | |
|---|
| 185 | 193 | /* |
|---|
| 186 | 194 | * Secondary CPUs do not run through tsc_init(), so set up |
|---|
| 187 | 195 | * all the scale factors for all CPUs, assuming the same |
|---|
| 188 | | - * speed as the bootup CPU. (cpufreq notifiers will fix this |
|---|
| 189 | | - * up if their speed diverges) |
|---|
| 196 | + * speed as the bootup CPU. |
|---|
| 190 | 197 | */ |
|---|
| 191 | 198 | static void __init cyc2ns_init_secondary_cpus(void) |
|---|
| 192 | 199 | { |
|---|
| .. | .. |
|---|
| 196 | 203 | |
|---|
| 197 | 204 | for_each_possible_cpu(cpu) { |
|---|
| 198 | 205 | if (cpu != this_cpu) { |
|---|
| 199 | | - seqcount_init(&c2n->seq); |
|---|
| 206 | + seqcount_latch_init(&c2n->seq); |
|---|
| 200 | 207 | c2n = per_cpu_ptr(&cyc2ns, cpu); |
|---|
| 201 | 208 | c2n->data[0] = data[0]; |
|---|
| 202 | 209 | c2n->data[1] = data[1]; |
|---|
| .. | .. |
|---|
| 247 | 254 | |
|---|
| 248 | 255 | bool using_native_sched_clock(void) |
|---|
| 249 | 256 | { |
|---|
| 250 | | - return pv_time_ops.sched_clock == native_sched_clock; |
|---|
| 257 | + return pv_ops.time.sched_clock == native_sched_clock; |
|---|
| 251 | 258 | } |
|---|
| 252 | 259 | #else |
|---|
| 253 | 260 | unsigned long long |
|---|
| .. | .. |
|---|
| 283 | 290 | __setup("notsc", notsc_setup); |
|---|
| 284 | 291 | |
|---|
| 285 | 292 | static int no_sched_irq_time; |
|---|
| 293 | +static int no_tsc_watchdog; |
|---|
| 286 | 294 | |
|---|
| 287 | 295 | static int __init tsc_setup(char *str) |
|---|
| 288 | 296 | { |
|---|
| .. | .. |
|---|
| 292 | 300 | no_sched_irq_time = 1; |
|---|
| 293 | 301 | if (!strcmp(str, "unstable")) |
|---|
| 294 | 302 | mark_tsc_unstable("boot parameter"); |
|---|
| 303 | + if (!strcmp(str, "nowatchdog")) |
|---|
| 304 | + no_tsc_watchdog = 1; |
|---|
| 295 | 305 | return 1; |
|---|
| 296 | 306 | } |
|---|
| 297 | 307 | |
|---|
| 298 | 308 | __setup("tsc=", tsc_setup); |
|---|
| 299 | 309 | |
|---|
| 300 | | -#define MAX_RETRIES 5 |
|---|
| 301 | | -#define SMI_TRESHOLD 50000 |
|---|
| 310 | +#define MAX_RETRIES 5 |
|---|
| 311 | +#define TSC_DEFAULT_THRESHOLD 0x20000 |
|---|
| 302 | 312 | |
|---|
| 303 | 313 | /* |
|---|
| 304 | | - * Read TSC and the reference counters. Take care of SMI disturbance |
|---|
| 314 | + * Read TSC and the reference counters. Take care of any disturbances |
|---|
| 305 | 315 | */ |
|---|
| 306 | 316 | static u64 tsc_read_refs(u64 *p, int hpet) |
|---|
| 307 | 317 | { |
|---|
| 308 | 318 | u64 t1, t2; |
|---|
| 319 | + u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD; |
|---|
| 309 | 320 | int i; |
|---|
| 310 | 321 | |
|---|
| 311 | 322 | for (i = 0; i < MAX_RETRIES; i++) { |
|---|
| .. | .. |
|---|
| 315 | 326 | else |
|---|
| 316 | 327 | *p = acpi_pm_read_early(); |
|---|
| 317 | 328 | t2 = get_cycles(); |
|---|
| 318 | | - if ((t2 - t1) < SMI_TRESHOLD) |
|---|
| 329 | + if ((t2 - t1) < thresh) |
|---|
| 319 | 330 | return t2; |
|---|
| 320 | 331 | } |
|---|
| 321 | 332 | return ULLONG_MAX; |
|---|
| .. | .. |
|---|
| 473 | 484 | * transition from one expected value to another with a fairly |
|---|
| 474 | 485 | * high accuracy, and we didn't miss any events. We can thus |
|---|
| 475 | 486 | * use the TSC value at the transitions to calculate a pretty |
|---|
| 476 | | - * good value for the TSC frequencty. |
|---|
| 487 | + * good value for the TSC frequency. |
|---|
| 477 | 488 | */ |
|---|
| 478 | 489 | static inline int pit_verify_msb(unsigned char val) |
|---|
| 479 | 490 | { |
|---|
| .. | .. |
|---|
| 628 | 639 | |
|---|
| 629 | 640 | crystal_khz = ecx_hz / 1000; |
|---|
| 630 | 641 | |
|---|
| 631 | | - if (crystal_khz == 0) { |
|---|
| 632 | | - switch (boot_cpu_data.x86_model) { |
|---|
| 633 | | - case INTEL_FAM6_SKYLAKE_MOBILE: |
|---|
| 634 | | - case INTEL_FAM6_SKYLAKE_DESKTOP: |
|---|
| 635 | | - case INTEL_FAM6_KABYLAKE_MOBILE: |
|---|
| 636 | | - case INTEL_FAM6_KABYLAKE_DESKTOP: |
|---|
| 637 | | - crystal_khz = 24000; /* 24.0 MHz */ |
|---|
| 638 | | - break; |
|---|
| 639 | | - case INTEL_FAM6_ATOM_GOLDMONT_X: |
|---|
| 640 | | - crystal_khz = 25000; /* 25.0 MHz */ |
|---|
| 641 | | - break; |
|---|
| 642 | | - case INTEL_FAM6_ATOM_GOLDMONT: |
|---|
| 643 | | - crystal_khz = 19200; /* 19.2 MHz */ |
|---|
| 644 | | - break; |
|---|
| 645 | | - } |
|---|
| 642 | + /* |
|---|
| 643 | + * Denverton SoCs don't report crystal clock, and also don't support |
|---|
| 644 | + * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal |
|---|
| 645 | + * clock. |
|---|
| 646 | + */ |
|---|
| 647 | + if (crystal_khz == 0 && |
|---|
| 648 | + boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D) |
|---|
| 649 | + crystal_khz = 25000; |
|---|
| 650 | + |
|---|
| 651 | + /* |
|---|
| 652 | + * TSC frequency reported directly by CPUID is a "hardware reported" |
|---|
| 653 | + * frequency and is the most accurate one so far we have. This |
|---|
| 654 | + * is considered a known frequency. |
|---|
| 655 | + */ |
|---|
| 656 | + if (crystal_khz != 0) |
|---|
| 657 | + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); |
|---|
| 658 | + |
|---|
| 659 | + /* |
|---|
| 660 | + * Some Intel SoCs like Skylake and Kabylake don't report the crystal |
|---|
| 661 | + * clock, but we can easily calculate it to a high degree of accuracy |
|---|
| 662 | + * by considering the crystal ratio and the CPU speed. |
|---|
| 663 | + */ |
|---|
| 664 | + if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) { |
|---|
| 665 | + unsigned int eax_base_mhz, ebx, ecx, edx; |
|---|
| 666 | + |
|---|
| 667 | + cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx); |
|---|
| 668 | + crystal_khz = eax_base_mhz * 1000 * |
|---|
| 669 | + eax_denominator / ebx_numerator; |
|---|
| 646 | 670 | } |
|---|
| 647 | 671 | |
|---|
| 648 | 672 | if (crystal_khz == 0) |
|---|
| 649 | 673 | return 0; |
|---|
| 650 | | - /* |
|---|
| 651 | | - * TSC frequency determined by CPUID is a "hardware reported" |
|---|
| 652 | | - * frequency and is the most accurate one so far we have. This |
|---|
| 653 | | - * is considered a known frequency. |
|---|
| 654 | | - */ |
|---|
| 655 | | - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); |
|---|
| 656 | 674 | |
|---|
| 657 | 675 | /* |
|---|
| 658 | 676 | * For Atom SoCs TSC is the only reliable clocksource. |
|---|
| .. | .. |
|---|
| 660 | 678 | */ |
|---|
| 661 | 679 | if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) |
|---|
| 662 | 680 | setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); |
|---|
| 681 | + |
|---|
| 682 | +#ifdef CONFIG_X86_LOCAL_APIC |
|---|
| 683 | + /* |
|---|
| 684 | + * The local APIC appears to be fed by the core crystal clock |
|---|
| 685 | + * (which sounds entirely sensible). We can set the global |
|---|
| 686 | + * lapic_timer_period here to avoid having to calibrate the APIC |
|---|
| 687 | + * timer later. |
|---|
| 688 | + */ |
|---|
| 689 | + lapic_timer_period = crystal_khz * 1000 / HZ; |
|---|
| 690 | +#endif |
|---|
| 663 | 691 | |
|---|
| 664 | 692 | return crystal_khz * ebx_numerator / eax_denominator; |
|---|
| 665 | 693 | } |
|---|
| .. | .. |
|---|
| 703 | 731 | * zero. In each wait loop iteration we read the TSC and check |
|---|
| 704 | 732 | * the delta to the previous read. We keep track of the min |
|---|
| 705 | 733 | * and max values of that delta. The delta is mostly defined |
|---|
| 706 | | - * by the IO time of the PIT access, so we can detect when a |
|---|
| 707 | | - * SMI/SMM disturbance happened between the two reads. If the |
|---|
| 734 | + * by the IO time of the PIT access, so we can detect when |
|---|
| 735 | + * any disturbance happened between the two reads. If the |
|---|
| 708 | 736 | * maximum time is significantly larger than the minimum time, |
|---|
| 709 | 737 | * then we discard the result and have another try. |
|---|
| 710 | 738 | * |
|---|
| 711 | 739 | * 2) Reference counter. If available we use the HPET or the |
|---|
| 712 | 740 | * PMTIMER as a reference to check the sanity of that value. |
|---|
| 713 | 741 | * We use separate TSC readouts and check inside of the |
|---|
| 714 | | - * reference read for a SMI/SMM disturbance. We dicard |
|---|
| 742 | + * reference read for any possible disturbance. We dicard |
|---|
| 715 | 743 | * disturbed values here as well. We do that around the PIT |
|---|
| 716 | 744 | * calibration delay loop as we have to wait for a certain |
|---|
| 717 | 745 | * amount of time anyway. |
|---|
| .. | .. |
|---|
| 744 | 772 | if (ref1 == ref2) |
|---|
| 745 | 773 | continue; |
|---|
| 746 | 774 | |
|---|
| 747 | | - /* Check, whether the sampling was disturbed by an SMI */ |
|---|
| 775 | + /* Check, whether the sampling was disturbed */ |
|---|
| 748 | 776 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) |
|---|
| 749 | 777 | continue; |
|---|
| 750 | 778 | |
|---|
| .. | .. |
|---|
| 936 | 964 | } |
|---|
| 937 | 965 | |
|---|
| 938 | 966 | #ifdef CONFIG_CPU_FREQ |
|---|
| 939 | | -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency |
|---|
| 967 | +/* |
|---|
| 968 | + * Frequency scaling support. Adjust the TSC based timer when the CPU frequency |
|---|
| 940 | 969 | * changes. |
|---|
| 941 | 970 | * |
|---|
| 942 | | - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's |
|---|
| 943 | | - * not that important because current Opteron setups do not support |
|---|
| 944 | | - * scaling on SMP anyroads. |
|---|
| 971 | + * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC |
|---|
| 972 | + * as unstable and give up in those cases. |
|---|
| 945 | 973 | * |
|---|
| 946 | 974 | * Should fix up last_tsc too. Currently gettimeofday in the |
|---|
| 947 | 975 | * first tick after the change will be slightly wrong. |
|---|
| .. | .. |
|---|
| 955 | 983 | void *data) |
|---|
| 956 | 984 | { |
|---|
| 957 | 985 | struct cpufreq_freqs *freq = data; |
|---|
| 958 | | - unsigned long *lpj; |
|---|
| 959 | 986 | |
|---|
| 960 | | - lpj = &boot_cpu_data.loops_per_jiffy; |
|---|
| 961 | | -#ifdef CONFIG_SMP |
|---|
| 962 | | - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) |
|---|
| 963 | | - lpj = &cpu_data(freq->cpu).loops_per_jiffy; |
|---|
| 964 | | -#endif |
|---|
| 987 | + if (num_online_cpus() > 1) { |
|---|
| 988 | + mark_tsc_unstable("cpufreq changes on SMP"); |
|---|
| 989 | + return 0; |
|---|
| 990 | + } |
|---|
| 965 | 991 | |
|---|
| 966 | 992 | if (!ref_freq) { |
|---|
| 967 | 993 | ref_freq = freq->old; |
|---|
| 968 | | - loops_per_jiffy_ref = *lpj; |
|---|
| 994 | + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; |
|---|
| 969 | 995 | tsc_khz_ref = tsc_khz; |
|---|
| 970 | 996 | } |
|---|
| 997 | + |
|---|
| 971 | 998 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || |
|---|
| 972 | | - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { |
|---|
| 973 | | - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); |
|---|
| 999 | + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { |
|---|
| 1000 | + boot_cpu_data.loops_per_jiffy = |
|---|
| 1001 | + cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); |
|---|
| 974 | 1002 | |
|---|
| 975 | 1003 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); |
|---|
| 976 | 1004 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) |
|---|
| 977 | 1005 | mark_tsc_unstable("cpufreq changes"); |
|---|
| 978 | 1006 | |
|---|
| 979 | | - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); |
|---|
| 1007 | + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); |
|---|
| 980 | 1008 | } |
|---|
| 981 | 1009 | |
|---|
| 982 | 1010 | return 0; |
|---|
| .. | .. |
|---|
| 1087 | 1115 | sched_clock_tick_stable(); |
|---|
| 1088 | 1116 | } |
|---|
| 1089 | 1117 | |
|---|
| 1118 | +static int tsc_cs_enable(struct clocksource *cs) |
|---|
| 1119 | +{ |
|---|
| 1120 | + vclocks_set_used(VDSO_CLOCKMODE_TSC); |
|---|
| 1121 | + return 0; |
|---|
| 1122 | +} |
|---|
| 1123 | + |
|---|
| 1090 | 1124 | /* |
|---|
| 1091 | 1125 | * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() |
|---|
| 1092 | 1126 | */ |
|---|
| 1093 | 1127 | static struct clocksource clocksource_tsc_early = { |
|---|
| 1094 | | - .name = "tsc-early", |
|---|
| 1095 | | - .rating = 299, |
|---|
| 1096 | | - .read = read_tsc, |
|---|
| 1097 | | - .mask = CLOCKSOURCE_MASK(64), |
|---|
| 1098 | | - .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
|---|
| 1128 | + .name = "tsc-early", |
|---|
| 1129 | + .rating = 299, |
|---|
| 1130 | + .read = read_tsc, |
|---|
| 1131 | + .mask = CLOCKSOURCE_MASK(64), |
|---|
| 1132 | + .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
|---|
| 1099 | 1133 | CLOCK_SOURCE_MUST_VERIFY, |
|---|
| 1100 | | - .archdata = { .vclock_mode = VCLOCK_TSC }, |
|---|
| 1134 | + .vdso_clock_mode = VDSO_CLOCKMODE_TSC, |
|---|
| 1135 | + .enable = tsc_cs_enable, |
|---|
| 1101 | 1136 | .resume = tsc_resume, |
|---|
| 1102 | 1137 | .mark_unstable = tsc_cs_mark_unstable, |
|---|
| 1103 | 1138 | .tick_stable = tsc_cs_tick_stable, |
|---|
| .. | .. |
|---|
| 1110 | 1145 | * been found good. |
|---|
| 1111 | 1146 | */ |
|---|
| 1112 | 1147 | static struct clocksource clocksource_tsc = { |
|---|
| 1113 | | - .name = "tsc", |
|---|
| 1114 | | - .rating = 300, |
|---|
| 1115 | | - .read = read_tsc, |
|---|
| 1116 | | - .mask = CLOCKSOURCE_MASK(64), |
|---|
| 1117 | | - .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
|---|
| 1148 | + .name = "tsc", |
|---|
| 1149 | + .rating = 300, |
|---|
| 1150 | + .read = read_tsc, |
|---|
| 1151 | + .mask = CLOCKSOURCE_MASK(64), |
|---|
| 1152 | + .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
|---|
| 1118 | 1153 | CLOCK_SOURCE_VALID_FOR_HRES | |
|---|
| 1119 | | - CLOCK_SOURCE_MUST_VERIFY, |
|---|
| 1120 | | - .archdata = { .vclock_mode = VCLOCK_TSC }, |
|---|
| 1154 | + CLOCK_SOURCE_MUST_VERIFY | |
|---|
| 1155 | + CLOCK_SOURCE_VERIFY_PERCPU, |
|---|
| 1156 | + .vdso_clock_mode = VDSO_CLOCKMODE_TSC, |
|---|
| 1157 | + .enable = tsc_cs_enable, |
|---|
| 1121 | 1158 | .resume = tsc_resume, |
|---|
| 1122 | 1159 | .mark_unstable = tsc_cs_mark_unstable, |
|---|
| 1123 | 1160 | .tick_stable = tsc_cs_tick_stable, |
|---|
| .. | .. |
|---|
| 1141 | 1178 | |
|---|
| 1142 | 1179 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); |
|---|
| 1143 | 1180 | |
|---|
| 1181 | +static void __init tsc_disable_clocksource_watchdog(void) |
|---|
| 1182 | +{ |
|---|
| 1183 | + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
|---|
| 1184 | + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
|---|
| 1185 | +} |
|---|
| 1186 | + |
|---|
| 1144 | 1187 | static void __init check_system_tsc_reliable(void) |
|---|
| 1145 | 1188 | { |
|---|
| 1146 | 1189 | #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) |
|---|
| .. | .. |
|---|
| 1157 | 1200 | #endif |
|---|
| 1158 | 1201 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) |
|---|
| 1159 | 1202 | tsc_clocksource_reliable = 1; |
|---|
| 1203 | + |
|---|
| 1204 | + /* |
|---|
| 1205 | + * Disable the clocksource watchdog when the system has: |
|---|
| 1206 | + * - TSC running at constant frequency |
|---|
| 1207 | + * - TSC which does not stop in C-States |
|---|
| 1208 | + * - the TSC_ADJUST register which allows to detect even minimal |
|---|
| 1209 | + * modifications |
|---|
| 1210 | + * - not more than two sockets. As the number of sockets cannot be |
|---|
| 1211 | + * evaluated at the early boot stage where this has to be |
|---|
| 1212 | + * invoked, check the number of online memory nodes as a |
|---|
| 1213 | + * fallback solution which is an reasonable estimate. |
|---|
| 1214 | + */ |
|---|
| 1215 | + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && |
|---|
| 1216 | + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && |
|---|
| 1217 | + boot_cpu_has(X86_FEATURE_TSC_ADJUST) && |
|---|
| 1218 | + nr_online_nodes <= 2) |
|---|
| 1219 | + tsc_disable_clocksource_watchdog(); |
|---|
| 1160 | 1220 | } |
|---|
| 1161 | 1221 | |
|---|
| 1162 | 1222 | /* |
|---|
| .. | .. |
|---|
| 1268 | 1328 | */ |
|---|
| 1269 | 1329 | static void tsc_refine_calibration_work(struct work_struct *work) |
|---|
| 1270 | 1330 | { |
|---|
| 1271 | | - static u64 tsc_start = -1, ref_start; |
|---|
| 1331 | + static u64 tsc_start = ULLONG_MAX, ref_start; |
|---|
| 1272 | 1332 | static int hpet; |
|---|
| 1273 | 1333 | u64 tsc_stop, ref_stop, delta; |
|---|
| 1274 | 1334 | unsigned long freq; |
|---|
| .. | .. |
|---|
| 1283 | 1343 | * delayed the first time we expire. So set the workqueue |
|---|
| 1284 | 1344 | * again once we know timers are working. |
|---|
| 1285 | 1345 | */ |
|---|
| 1286 | | - if (tsc_start == -1) { |
|---|
| 1346 | + if (tsc_start == ULLONG_MAX) { |
|---|
| 1347 | +restart: |
|---|
| 1287 | 1348 | /* |
|---|
| 1288 | 1349 | * Only set hpet once, to avoid mixing hardware |
|---|
| 1289 | 1350 | * if the hpet becomes enabled later. |
|---|
| 1290 | 1351 | */ |
|---|
| 1291 | 1352 | hpet = is_hpet_enabled(); |
|---|
| 1292 | | - schedule_delayed_work(&tsc_irqwork, HZ); |
|---|
| 1293 | 1353 | tsc_start = tsc_read_refs(&ref_start, hpet); |
|---|
| 1354 | + schedule_delayed_work(&tsc_irqwork, HZ); |
|---|
| 1294 | 1355 | return; |
|---|
| 1295 | 1356 | } |
|---|
| 1296 | 1357 | |
|---|
| .. | .. |
|---|
| 1300 | 1361 | if (ref_start == ref_stop) |
|---|
| 1301 | 1362 | goto out; |
|---|
| 1302 | 1363 | |
|---|
| 1303 | | - /* Check, whether the sampling was disturbed by an SMI */ |
|---|
| 1304 | | - if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) |
|---|
| 1305 | | - goto out; |
|---|
| 1364 | + /* Check, whether the sampling was disturbed */ |
|---|
| 1365 | + if (tsc_stop == ULLONG_MAX) |
|---|
| 1366 | + goto restart; |
|---|
| 1306 | 1367 | |
|---|
| 1307 | 1368 | delta = tsc_stop - tsc_start; |
|---|
| 1308 | 1369 | delta *= 1000000LL; |
|---|
| .. | .. |
|---|
| 1347 | 1408 | if (tsc_unstable) |
|---|
| 1348 | 1409 | goto unreg; |
|---|
| 1349 | 1410 | |
|---|
| 1350 | | - if (tsc_clocksource_reliable) |
|---|
| 1351 | | - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
|---|
| 1352 | | - |
|---|
| 1353 | 1411 | if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) |
|---|
| 1354 | 1412 | clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; |
|---|
| 1355 | 1413 | |
|---|
| .. | .. |
|---|
| 1382 | 1440 | |
|---|
| 1383 | 1441 | if (early) { |
|---|
| 1384 | 1442 | cpu_khz = x86_platform.calibrate_cpu(); |
|---|
| 1385 | | - tsc_khz = x86_platform.calibrate_tsc(); |
|---|
| 1443 | + if (tsc_early_khz) |
|---|
| 1444 | + tsc_khz = tsc_early_khz; |
|---|
| 1445 | + else |
|---|
| 1446 | + tsc_khz = x86_platform.calibrate_tsc(); |
|---|
| 1386 | 1447 | } else { |
|---|
| 1387 | 1448 | /* We should not be here with non-native cpu calibration */ |
|---|
| 1388 | 1449 | WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu); |
|---|
| .. | .. |
|---|
| 1483 | 1544 | return; |
|---|
| 1484 | 1545 | } |
|---|
| 1485 | 1546 | |
|---|
| 1547 | + if (tsc_clocksource_reliable || no_tsc_watchdog) |
|---|
| 1548 | + tsc_disable_clocksource_watchdog(); |
|---|
| 1549 | + |
|---|
| 1486 | 1550 | clocksource_register_khz(&clocksource_tsc_early, tsc_khz); |
|---|
| 1487 | 1551 | detect_art(); |
|---|
| 1488 | 1552 | } |
|---|