.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * pSeries_lpar.c |
---|
3 | 4 | * Copyright (C) 2001 Todd Inglett, IBM Corporation |
---|
4 | 5 | * |
---|
5 | 6 | * pSeries LPAR support. |
---|
6 | | - * |
---|
7 | | - * This program is free software; you can redistribute it and/or modify |
---|
8 | | - * it under the terms of the GNU General Public License as published by |
---|
9 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
10 | | - * (at your option) any later version. |
---|
11 | | - * |
---|
12 | | - * This program is distributed in the hope that it will be useful, |
---|
13 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | | - * GNU General Public License for more details. |
---|
16 | | - * |
---|
17 | | - * You should have received a copy of the GNU General Public License |
---|
18 | | - * along with this program; if not, write to the Free Software |
---|
19 | | - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
20 | 7 | */ |
---|
21 | 8 | |
---|
22 | 9 | /* Enables debugging of low-level hash table routines - careful! */ |
---|
.. | .. |
---|
30 | 17 | #include <linux/jump_label.h> |
---|
31 | 18 | #include <linux/delay.h> |
---|
32 | 19 | #include <linux/stop_machine.h> |
---|
| 20 | +#include <linux/spinlock.h> |
---|
| 21 | +#include <linux/cpuhotplug.h> |
---|
| 22 | +#include <linux/workqueue.h> |
---|
| 23 | +#include <linux/proc_fs.h> |
---|
| 24 | +#include <linux/pgtable.h> |
---|
33 | 25 | #include <asm/processor.h> |
---|
34 | 26 | #include <asm/mmu.h> |
---|
35 | 27 | #include <asm/page.h> |
---|
36 | | -#include <asm/pgtable.h> |
---|
37 | 28 | #include <asm/machdep.h> |
---|
38 | 29 | #include <asm/mmu_context.h> |
---|
39 | 30 | #include <asm/iommu.h> |
---|
.. | .. |
---|
49 | 40 | #include <asm/fadump.h> |
---|
50 | 41 | #include <asm/asm-prototypes.h> |
---|
51 | 42 | #include <asm/debugfs.h> |
---|
| 43 | +#include <asm/dtl.h> |
---|
52 | 44 | |
---|
53 | 45 | #include "pseries.h" |
---|
54 | 46 | |
---|
.. | .. |
---|
65 | 57 | EXPORT_SYMBOL(plpar_hcall9); |
---|
66 | 58 | EXPORT_SYMBOL(plpar_hcall_norets); |
---|
67 | 59 | |
---|
| 60 | +/* |
---|
| 61 | + * H_BLOCK_REMOVE supported block size for this page size in segment who's base |
---|
| 62 | + * page size is that page size. |
---|
| 63 | + * |
---|
| 64 | + * The first index is the segment base page size, the second one is the actual |
---|
| 65 | + * page size. |
---|
| 66 | + */ |
---|
| 67 | +static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init; |
---|
| 68 | + |
---|
| 69 | +/* |
---|
| 70 | + * Due to the involved complexity, and that the current hypervisor is only |
---|
| 71 | + * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE |
---|
| 72 | + * buffer size to 8 size block. |
---|
| 73 | + */ |
---|
| 74 | +#define HBLKRM_SUPPORTED_BLOCK_SIZE 8 |
---|
| 75 | + |
---|
| 76 | +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
---|
| 77 | +static u8 dtl_mask = DTL_LOG_PREEMPT; |
---|
| 78 | +#else |
---|
| 79 | +static u8 dtl_mask; |
---|
| 80 | +#endif |
---|
| 81 | + |
---|
| 82 | +void alloc_dtl_buffers(unsigned long *time_limit) |
---|
| 83 | +{ |
---|
| 84 | + int cpu; |
---|
| 85 | + struct paca_struct *pp; |
---|
| 86 | + struct dtl_entry *dtl; |
---|
| 87 | + |
---|
| 88 | + for_each_possible_cpu(cpu) { |
---|
| 89 | + pp = paca_ptrs[cpu]; |
---|
| 90 | + if (pp->dispatch_log) |
---|
| 91 | + continue; |
---|
| 92 | + dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); |
---|
| 93 | + if (!dtl) { |
---|
| 94 | + pr_warn("Failed to allocate dispatch trace log for cpu %d\n", |
---|
| 95 | + cpu); |
---|
| 96 | +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
---|
| 97 | + pr_warn("Stolen time statistics will be unreliable\n"); |
---|
| 98 | +#endif |
---|
| 99 | + break; |
---|
| 100 | + } |
---|
| 101 | + |
---|
| 102 | + pp->dtl_ridx = 0; |
---|
| 103 | + pp->dispatch_log = dtl; |
---|
| 104 | + pp->dispatch_log_end = dtl + N_DISPATCH_LOG; |
---|
| 105 | + pp->dtl_curr = dtl; |
---|
| 106 | + |
---|
| 107 | + if (time_limit && time_after(jiffies, *time_limit)) { |
---|
| 108 | + cond_resched(); |
---|
| 109 | + *time_limit = jiffies + HZ; |
---|
| 110 | + } |
---|
| 111 | + } |
---|
| 112 | +} |
---|
| 113 | + |
---|
| 114 | +void register_dtl_buffer(int cpu) |
---|
| 115 | +{ |
---|
| 116 | + long ret; |
---|
| 117 | + struct paca_struct *pp; |
---|
| 118 | + struct dtl_entry *dtl; |
---|
| 119 | + int hwcpu = get_hard_smp_processor_id(cpu); |
---|
| 120 | + |
---|
| 121 | + pp = paca_ptrs[cpu]; |
---|
| 122 | + dtl = pp->dispatch_log; |
---|
| 123 | + if (dtl && dtl_mask) { |
---|
| 124 | + pp->dtl_ridx = 0; |
---|
| 125 | + pp->dtl_curr = dtl; |
---|
| 126 | + lppaca_of(cpu).dtl_idx = 0; |
---|
| 127 | + |
---|
| 128 | + /* hypervisor reads buffer length from this field */ |
---|
| 129 | + dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); |
---|
| 130 | + ret = register_dtl(hwcpu, __pa(dtl)); |
---|
| 131 | + if (ret) |
---|
| 132 | + pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n", |
---|
| 133 | + cpu, hwcpu, ret); |
---|
| 134 | + |
---|
| 135 | + lppaca_of(cpu).dtl_enable_mask = dtl_mask; |
---|
| 136 | + } |
---|
| 137 | +} |
---|
| 138 | + |
---|
| 139 | +#ifdef CONFIG_PPC_SPLPAR |
---|
| 140 | +struct dtl_worker { |
---|
| 141 | + struct delayed_work work; |
---|
| 142 | + int cpu; |
---|
| 143 | +}; |
---|
| 144 | + |
---|
| 145 | +struct vcpu_dispatch_data { |
---|
| 146 | + int last_disp_cpu; |
---|
| 147 | + |
---|
| 148 | + int total_disp; |
---|
| 149 | + |
---|
| 150 | + int same_cpu_disp; |
---|
| 151 | + int same_chip_disp; |
---|
| 152 | + int diff_chip_disp; |
---|
| 153 | + int far_chip_disp; |
---|
| 154 | + |
---|
| 155 | + int numa_home_disp; |
---|
| 156 | + int numa_remote_disp; |
---|
| 157 | + int numa_far_disp; |
---|
| 158 | +}; |
---|
| 159 | + |
---|
| 160 | +/* |
---|
| 161 | + * This represents the number of cpus in the hypervisor. Since there is no |
---|
| 162 | + * architected way to discover the number of processors in the host, we |
---|
| 163 | + * provision for dealing with NR_CPUS. This is currently 2048 by default, and |
---|
| 164 | + * is sufficient for our purposes. This will need to be tweaked if |
---|
| 165 | + * CONFIG_NR_CPUS is changed. |
---|
| 166 | + */ |
---|
| 167 | +#define NR_CPUS_H NR_CPUS |
---|
| 168 | + |
---|
| 169 | +DEFINE_RWLOCK(dtl_access_lock); |
---|
| 170 | +static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data); |
---|
| 171 | +static DEFINE_PER_CPU(u64, dtl_entry_ridx); |
---|
| 172 | +static DEFINE_PER_CPU(struct dtl_worker, dtl_workers); |
---|
| 173 | +static enum cpuhp_state dtl_worker_state; |
---|
| 174 | +static DEFINE_MUTEX(dtl_enable_mutex); |
---|
| 175 | +static int vcpudispatch_stats_on __read_mostly; |
---|
| 176 | +static int vcpudispatch_stats_freq = 50; |
---|
| 177 | +static __be32 *vcpu_associativity, *pcpu_associativity; |
---|
| 178 | + |
---|
| 179 | + |
---|
| 180 | +static void free_dtl_buffers(unsigned long *time_limit) |
---|
| 181 | +{ |
---|
| 182 | +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
---|
| 183 | + int cpu; |
---|
| 184 | + struct paca_struct *pp; |
---|
| 185 | + |
---|
| 186 | + for_each_possible_cpu(cpu) { |
---|
| 187 | + pp = paca_ptrs[cpu]; |
---|
| 188 | + if (!pp->dispatch_log) |
---|
| 189 | + continue; |
---|
| 190 | + kmem_cache_free(dtl_cache, pp->dispatch_log); |
---|
| 191 | + pp->dtl_ridx = 0; |
---|
| 192 | + pp->dispatch_log = 0; |
---|
| 193 | + pp->dispatch_log_end = 0; |
---|
| 194 | + pp->dtl_curr = 0; |
---|
| 195 | + |
---|
| 196 | + if (time_limit && time_after(jiffies, *time_limit)) { |
---|
| 197 | + cond_resched(); |
---|
| 198 | + *time_limit = jiffies + HZ; |
---|
| 199 | + } |
---|
| 200 | + } |
---|
| 201 | +#endif |
---|
| 202 | +} |
---|
| 203 | + |
---|
| 204 | +static int init_cpu_associativity(void) |
---|
| 205 | +{ |
---|
| 206 | + vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core, |
---|
| 207 | + VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL); |
---|
| 208 | + pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core, |
---|
| 209 | + VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL); |
---|
| 210 | + |
---|
| 211 | + if (!vcpu_associativity || !pcpu_associativity) { |
---|
| 212 | + pr_err("error allocating memory for associativity information\n"); |
---|
| 213 | + return -ENOMEM; |
---|
| 214 | + } |
---|
| 215 | + |
---|
| 216 | + return 0; |
---|
| 217 | +} |
---|
| 218 | + |
---|
| 219 | +static void destroy_cpu_associativity(void) |
---|
| 220 | +{ |
---|
| 221 | + kfree(vcpu_associativity); |
---|
| 222 | + kfree(pcpu_associativity); |
---|
| 223 | + vcpu_associativity = pcpu_associativity = 0; |
---|
| 224 | +} |
---|
| 225 | + |
---|
| 226 | +static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag) |
---|
| 227 | +{ |
---|
| 228 | + __be32 *assoc; |
---|
| 229 | + int rc = 0; |
---|
| 230 | + |
---|
| 231 | + assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE]; |
---|
| 232 | + if (!assoc[0]) { |
---|
| 233 | + rc = hcall_vphn(cpu, flag, &assoc[0]); |
---|
| 234 | + if (rc) |
---|
| 235 | + return NULL; |
---|
| 236 | + } |
---|
| 237 | + |
---|
| 238 | + return assoc; |
---|
| 239 | +} |
---|
| 240 | + |
---|
| 241 | +static __be32 *get_pcpu_associativity(int cpu) |
---|
| 242 | +{ |
---|
| 243 | + return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU); |
---|
| 244 | +} |
---|
| 245 | + |
---|
| 246 | +static __be32 *get_vcpu_associativity(int cpu) |
---|
| 247 | +{ |
---|
| 248 | + return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU); |
---|
| 249 | +} |
---|
| 250 | + |
---|
| 251 | +static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu) |
---|
| 252 | +{ |
---|
| 253 | + __be32 *last_disp_cpu_assoc, *cur_disp_cpu_assoc; |
---|
| 254 | + |
---|
| 255 | + if (last_disp_cpu >= NR_CPUS_H || cur_disp_cpu >= NR_CPUS_H) |
---|
| 256 | + return -EINVAL; |
---|
| 257 | + |
---|
| 258 | + last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu); |
---|
| 259 | + cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu); |
---|
| 260 | + |
---|
| 261 | + if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc) |
---|
| 262 | + return -EIO; |
---|
| 263 | + |
---|
| 264 | + return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc); |
---|
| 265 | +} |
---|
| 266 | + |
---|
| 267 | +static int cpu_home_node_dispatch_distance(int disp_cpu) |
---|
| 268 | +{ |
---|
| 269 | + __be32 *disp_cpu_assoc, *vcpu_assoc; |
---|
| 270 | + int vcpu_id = smp_processor_id(); |
---|
| 271 | + |
---|
| 272 | + if (disp_cpu >= NR_CPUS_H) { |
---|
| 273 | + pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n", |
---|
| 274 | + disp_cpu, NR_CPUS_H); |
---|
| 275 | + return -EINVAL; |
---|
| 276 | + } |
---|
| 277 | + |
---|
| 278 | + disp_cpu_assoc = get_pcpu_associativity(disp_cpu); |
---|
| 279 | + vcpu_assoc = get_vcpu_associativity(vcpu_id); |
---|
| 280 | + |
---|
| 281 | + if (!disp_cpu_assoc || !vcpu_assoc) |
---|
| 282 | + return -EIO; |
---|
| 283 | + |
---|
| 284 | + return cpu_distance(disp_cpu_assoc, vcpu_assoc); |
---|
| 285 | +} |
---|
| 286 | + |
---|
| 287 | +static void update_vcpu_disp_stat(int disp_cpu) |
---|
| 288 | +{ |
---|
| 289 | + struct vcpu_dispatch_data *disp; |
---|
| 290 | + int distance; |
---|
| 291 | + |
---|
| 292 | + disp = this_cpu_ptr(&vcpu_disp_data); |
---|
| 293 | + if (disp->last_disp_cpu == -1) { |
---|
| 294 | + disp->last_disp_cpu = disp_cpu; |
---|
| 295 | + return; |
---|
| 296 | + } |
---|
| 297 | + |
---|
| 298 | + disp->total_disp++; |
---|
| 299 | + |
---|
| 300 | + if (disp->last_disp_cpu == disp_cpu || |
---|
| 301 | + (cpu_first_thread_sibling(disp->last_disp_cpu) == |
---|
| 302 | + cpu_first_thread_sibling(disp_cpu))) |
---|
| 303 | + disp->same_cpu_disp++; |
---|
| 304 | + else { |
---|
| 305 | + distance = cpu_relative_dispatch_distance(disp->last_disp_cpu, |
---|
| 306 | + disp_cpu); |
---|
| 307 | + if (distance < 0) |
---|
| 308 | + pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n", |
---|
| 309 | + smp_processor_id()); |
---|
| 310 | + else { |
---|
| 311 | + switch (distance) { |
---|
| 312 | + case 0: |
---|
| 313 | + disp->same_chip_disp++; |
---|
| 314 | + break; |
---|
| 315 | + case 1: |
---|
| 316 | + disp->diff_chip_disp++; |
---|
| 317 | + break; |
---|
| 318 | + case 2: |
---|
| 319 | + disp->far_chip_disp++; |
---|
| 320 | + break; |
---|
| 321 | + default: |
---|
| 322 | + pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n", |
---|
| 323 | + smp_processor_id(), |
---|
| 324 | + disp->last_disp_cpu, |
---|
| 325 | + disp_cpu, |
---|
| 326 | + distance); |
---|
| 327 | + } |
---|
| 328 | + } |
---|
| 329 | + } |
---|
| 330 | + |
---|
| 331 | + distance = cpu_home_node_dispatch_distance(disp_cpu); |
---|
| 332 | + if (distance < 0) |
---|
| 333 | + pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n", |
---|
| 334 | + smp_processor_id()); |
---|
| 335 | + else { |
---|
| 336 | + switch (distance) { |
---|
| 337 | + case 0: |
---|
| 338 | + disp->numa_home_disp++; |
---|
| 339 | + break; |
---|
| 340 | + case 1: |
---|
| 341 | + disp->numa_remote_disp++; |
---|
| 342 | + break; |
---|
| 343 | + case 2: |
---|
| 344 | + disp->numa_far_disp++; |
---|
| 345 | + break; |
---|
| 346 | + default: |
---|
| 347 | + pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n", |
---|
| 348 | + smp_processor_id(), |
---|
| 349 | + disp_cpu, |
---|
| 350 | + distance); |
---|
| 351 | + } |
---|
| 352 | + } |
---|
| 353 | + |
---|
| 354 | + disp->last_disp_cpu = disp_cpu; |
---|
| 355 | +} |
---|
| 356 | + |
---|
| 357 | +static void process_dtl_buffer(struct work_struct *work) |
---|
| 358 | +{ |
---|
| 359 | + struct dtl_entry dtle; |
---|
| 360 | + u64 i = __this_cpu_read(dtl_entry_ridx); |
---|
| 361 | + struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); |
---|
| 362 | + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; |
---|
| 363 | + struct lppaca *vpa = local_paca->lppaca_ptr; |
---|
| 364 | + struct dtl_worker *d = container_of(work, struct dtl_worker, work.work); |
---|
| 365 | + |
---|
| 366 | + if (!local_paca->dispatch_log) |
---|
| 367 | + return; |
---|
| 368 | + |
---|
| 369 | + /* if we have been migrated away, we cancel ourself */ |
---|
| 370 | + if (d->cpu != smp_processor_id()) { |
---|
| 371 | + pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n", |
---|
| 372 | + smp_processor_id()); |
---|
| 373 | + return; |
---|
| 374 | + } |
---|
| 375 | + |
---|
| 376 | + if (i == be64_to_cpu(vpa->dtl_idx)) |
---|
| 377 | + goto out; |
---|
| 378 | + |
---|
| 379 | + while (i < be64_to_cpu(vpa->dtl_idx)) { |
---|
| 380 | + dtle = *dtl; |
---|
| 381 | + barrier(); |
---|
| 382 | + if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { |
---|
| 383 | + /* buffer has overflowed */ |
---|
| 384 | + pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n", |
---|
| 385 | + d->cpu, |
---|
| 386 | + be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i); |
---|
| 387 | + i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; |
---|
| 388 | + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); |
---|
| 389 | + continue; |
---|
| 390 | + } |
---|
| 391 | + update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id)); |
---|
| 392 | + ++i; |
---|
| 393 | + ++dtl; |
---|
| 394 | + if (dtl == dtl_end) |
---|
| 395 | + dtl = local_paca->dispatch_log; |
---|
| 396 | + } |
---|
| 397 | + |
---|
| 398 | + __this_cpu_write(dtl_entry_ridx, i); |
---|
| 399 | + |
---|
| 400 | +out: |
---|
| 401 | + schedule_delayed_work_on(d->cpu, to_delayed_work(work), |
---|
| 402 | + HZ / vcpudispatch_stats_freq); |
---|
| 403 | +} |
---|
| 404 | + |
---|
| 405 | +static int dtl_worker_online(unsigned int cpu) |
---|
| 406 | +{ |
---|
| 407 | + struct dtl_worker *d = &per_cpu(dtl_workers, cpu); |
---|
| 408 | + |
---|
| 409 | + memset(d, 0, sizeof(*d)); |
---|
| 410 | + INIT_DELAYED_WORK(&d->work, process_dtl_buffer); |
---|
| 411 | + d->cpu = cpu; |
---|
| 412 | + |
---|
| 413 | +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
---|
| 414 | + per_cpu(dtl_entry_ridx, cpu) = 0; |
---|
| 415 | + register_dtl_buffer(cpu); |
---|
| 416 | +#else |
---|
| 417 | + per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx); |
---|
| 418 | +#endif |
---|
| 419 | + |
---|
| 420 | + schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq); |
---|
| 421 | + return 0; |
---|
| 422 | +} |
---|
| 423 | + |
---|
| 424 | +static int dtl_worker_offline(unsigned int cpu) |
---|
| 425 | +{ |
---|
| 426 | + struct dtl_worker *d = &per_cpu(dtl_workers, cpu); |
---|
| 427 | + |
---|
| 428 | + cancel_delayed_work_sync(&d->work); |
---|
| 429 | + |
---|
| 430 | +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
---|
| 431 | + unregister_dtl(get_hard_smp_processor_id(cpu)); |
---|
| 432 | +#endif |
---|
| 433 | + |
---|
| 434 | + return 0; |
---|
| 435 | +} |
---|
| 436 | + |
---|
| 437 | +static void set_global_dtl_mask(u8 mask) |
---|
| 438 | +{ |
---|
| 439 | + int cpu; |
---|
| 440 | + |
---|
| 441 | + dtl_mask = mask; |
---|
| 442 | + for_each_present_cpu(cpu) |
---|
| 443 | + lppaca_of(cpu).dtl_enable_mask = dtl_mask; |
---|
| 444 | +} |
---|
| 445 | + |
---|
| 446 | +static void reset_global_dtl_mask(void) |
---|
| 447 | +{ |
---|
| 448 | + int cpu; |
---|
| 449 | + |
---|
| 450 | +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
---|
| 451 | + dtl_mask = DTL_LOG_PREEMPT; |
---|
| 452 | +#else |
---|
| 453 | + dtl_mask = 0; |
---|
| 454 | +#endif |
---|
| 455 | + for_each_present_cpu(cpu) |
---|
| 456 | + lppaca_of(cpu).dtl_enable_mask = dtl_mask; |
---|
| 457 | +} |
---|
| 458 | + |
---|
| 459 | +static int dtl_worker_enable(unsigned long *time_limit) |
---|
| 460 | +{ |
---|
| 461 | + int rc = 0, state; |
---|
| 462 | + |
---|
| 463 | + if (!write_trylock(&dtl_access_lock)) { |
---|
| 464 | + rc = -EBUSY; |
---|
| 465 | + goto out; |
---|
| 466 | + } |
---|
| 467 | + |
---|
| 468 | + set_global_dtl_mask(DTL_LOG_ALL); |
---|
| 469 | + |
---|
| 470 | + /* Setup dtl buffers and register those */ |
---|
| 471 | + alloc_dtl_buffers(time_limit); |
---|
| 472 | + |
---|
| 473 | + state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online", |
---|
| 474 | + dtl_worker_online, dtl_worker_offline); |
---|
| 475 | + if (state < 0) { |
---|
| 476 | + pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n"); |
---|
| 477 | + free_dtl_buffers(time_limit); |
---|
| 478 | + reset_global_dtl_mask(); |
---|
| 479 | + write_unlock(&dtl_access_lock); |
---|
| 480 | + rc = -EINVAL; |
---|
| 481 | + goto out; |
---|
| 482 | + } |
---|
| 483 | + dtl_worker_state = state; |
---|
| 484 | + |
---|
| 485 | +out: |
---|
| 486 | + return rc; |
---|
| 487 | +} |
---|
| 488 | + |
---|
| 489 | +static void dtl_worker_disable(unsigned long *time_limit) |
---|
| 490 | +{ |
---|
| 491 | + cpuhp_remove_state(dtl_worker_state); |
---|
| 492 | + free_dtl_buffers(time_limit); |
---|
| 493 | + reset_global_dtl_mask(); |
---|
| 494 | + write_unlock(&dtl_access_lock); |
---|
| 495 | +} |
---|
| 496 | + |
---|
| 497 | +static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p, |
---|
| 498 | + size_t count, loff_t *ppos) |
---|
| 499 | +{ |
---|
| 500 | + unsigned long time_limit = jiffies + HZ; |
---|
| 501 | + struct vcpu_dispatch_data *disp; |
---|
| 502 | + int rc, cmd, cpu; |
---|
| 503 | + char buf[16]; |
---|
| 504 | + |
---|
| 505 | + if (count > 15) |
---|
| 506 | + return -EINVAL; |
---|
| 507 | + |
---|
| 508 | + if (copy_from_user(buf, p, count)) |
---|
| 509 | + return -EFAULT; |
---|
| 510 | + |
---|
| 511 | + buf[count] = 0; |
---|
| 512 | + rc = kstrtoint(buf, 0, &cmd); |
---|
| 513 | + if (rc || cmd < 0 || cmd > 1) { |
---|
| 514 | + pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n"); |
---|
| 515 | + return rc ? rc : -EINVAL; |
---|
| 516 | + } |
---|
| 517 | + |
---|
| 518 | + mutex_lock(&dtl_enable_mutex); |
---|
| 519 | + |
---|
| 520 | + if ((cmd == 0 && !vcpudispatch_stats_on) || |
---|
| 521 | + (cmd == 1 && vcpudispatch_stats_on)) |
---|
| 522 | + goto out; |
---|
| 523 | + |
---|
| 524 | + if (cmd) { |
---|
| 525 | + rc = init_cpu_associativity(); |
---|
| 526 | + if (rc) |
---|
| 527 | + goto out; |
---|
| 528 | + |
---|
| 529 | + for_each_possible_cpu(cpu) { |
---|
| 530 | + disp = per_cpu_ptr(&vcpu_disp_data, cpu); |
---|
| 531 | + memset(disp, 0, sizeof(*disp)); |
---|
| 532 | + disp->last_disp_cpu = -1; |
---|
| 533 | + } |
---|
| 534 | + |
---|
| 535 | + rc = dtl_worker_enable(&time_limit); |
---|
| 536 | + if (rc) { |
---|
| 537 | + destroy_cpu_associativity(); |
---|
| 538 | + goto out; |
---|
| 539 | + } |
---|
| 540 | + } else { |
---|
| 541 | + dtl_worker_disable(&time_limit); |
---|
| 542 | + destroy_cpu_associativity(); |
---|
| 543 | + } |
---|
| 544 | + |
---|
| 545 | + vcpudispatch_stats_on = cmd; |
---|
| 546 | + |
---|
| 547 | +out: |
---|
| 548 | + mutex_unlock(&dtl_enable_mutex); |
---|
| 549 | + if (rc) |
---|
| 550 | + return rc; |
---|
| 551 | + return count; |
---|
| 552 | +} |
---|
| 553 | + |
---|
| 554 | +static int vcpudispatch_stats_display(struct seq_file *p, void *v) |
---|
| 555 | +{ |
---|
| 556 | + int cpu; |
---|
| 557 | + struct vcpu_dispatch_data *disp; |
---|
| 558 | + |
---|
| 559 | + if (!vcpudispatch_stats_on) { |
---|
| 560 | + seq_puts(p, "off\n"); |
---|
| 561 | + return 0; |
---|
| 562 | + } |
---|
| 563 | + |
---|
| 564 | + for_each_online_cpu(cpu) { |
---|
| 565 | + disp = per_cpu_ptr(&vcpu_disp_data, cpu); |
---|
| 566 | + seq_printf(p, "cpu%d", cpu); |
---|
| 567 | + seq_put_decimal_ull(p, " ", disp->total_disp); |
---|
| 568 | + seq_put_decimal_ull(p, " ", disp->same_cpu_disp); |
---|
| 569 | + seq_put_decimal_ull(p, " ", disp->same_chip_disp); |
---|
| 570 | + seq_put_decimal_ull(p, " ", disp->diff_chip_disp); |
---|
| 571 | + seq_put_decimal_ull(p, " ", disp->far_chip_disp); |
---|
| 572 | + seq_put_decimal_ull(p, " ", disp->numa_home_disp); |
---|
| 573 | + seq_put_decimal_ull(p, " ", disp->numa_remote_disp); |
---|
| 574 | + seq_put_decimal_ull(p, " ", disp->numa_far_disp); |
---|
| 575 | + seq_puts(p, "\n"); |
---|
| 576 | + } |
---|
| 577 | + |
---|
| 578 | + return 0; |
---|
| 579 | +} |
---|
| 580 | + |
---|
| 581 | +static int vcpudispatch_stats_open(struct inode *inode, struct file *file) |
---|
| 582 | +{ |
---|
| 583 | + return single_open(file, vcpudispatch_stats_display, NULL); |
---|
| 584 | +} |
---|
| 585 | + |
---|
| 586 | +static const struct proc_ops vcpudispatch_stats_proc_ops = { |
---|
| 587 | + .proc_open = vcpudispatch_stats_open, |
---|
| 588 | + .proc_read = seq_read, |
---|
| 589 | + .proc_write = vcpudispatch_stats_write, |
---|
| 590 | + .proc_lseek = seq_lseek, |
---|
| 591 | + .proc_release = single_release, |
---|
| 592 | +}; |
---|
| 593 | + |
---|
| 594 | +static ssize_t vcpudispatch_stats_freq_write(struct file *file, |
---|
| 595 | + const char __user *p, size_t count, loff_t *ppos) |
---|
| 596 | +{ |
---|
| 597 | + int rc, freq; |
---|
| 598 | + char buf[16]; |
---|
| 599 | + |
---|
| 600 | + if (count > 15) |
---|
| 601 | + return -EINVAL; |
---|
| 602 | + |
---|
| 603 | + if (copy_from_user(buf, p, count)) |
---|
| 604 | + return -EFAULT; |
---|
| 605 | + |
---|
| 606 | + buf[count] = 0; |
---|
| 607 | + rc = kstrtoint(buf, 0, &freq); |
---|
| 608 | + if (rc || freq < 1 || freq > HZ) { |
---|
| 609 | + pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n", |
---|
| 610 | + HZ); |
---|
| 611 | + return rc ? rc : -EINVAL; |
---|
| 612 | + } |
---|
| 613 | + |
---|
| 614 | + vcpudispatch_stats_freq = freq; |
---|
| 615 | + |
---|
| 616 | + return count; |
---|
| 617 | +} |
---|
| 618 | + |
---|
| 619 | +static int vcpudispatch_stats_freq_display(struct seq_file *p, void *v) |
---|
| 620 | +{ |
---|
| 621 | + seq_printf(p, "%d\n", vcpudispatch_stats_freq); |
---|
| 622 | + return 0; |
---|
| 623 | +} |
---|
| 624 | + |
---|
| 625 | +static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file) |
---|
| 626 | +{ |
---|
| 627 | + return single_open(file, vcpudispatch_stats_freq_display, NULL); |
---|
| 628 | +} |
---|
| 629 | + |
---|
| 630 | +static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { |
---|
| 631 | + .proc_open = vcpudispatch_stats_freq_open, |
---|
| 632 | + .proc_read = seq_read, |
---|
| 633 | + .proc_write = vcpudispatch_stats_freq_write, |
---|
| 634 | + .proc_lseek = seq_lseek, |
---|
| 635 | + .proc_release = single_release, |
---|
| 636 | +}; |
---|
| 637 | + |
---|
| 638 | +static int __init vcpudispatch_stats_procfs_init(void) |
---|
| 639 | +{ |
---|
| 640 | + /* |
---|
| 641 | + * Avoid smp_processor_id while preemptible. All CPUs should have |
---|
| 642 | + * the same value for lppaca_shared_proc. |
---|
| 643 | + */ |
---|
| 644 | + preempt_disable(); |
---|
| 645 | + if (!lppaca_shared_proc(get_lppaca())) { |
---|
| 646 | + preempt_enable(); |
---|
| 647 | + return 0; |
---|
| 648 | + } |
---|
| 649 | + preempt_enable(); |
---|
| 650 | + |
---|
| 651 | + if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL, |
---|
| 652 | + &vcpudispatch_stats_proc_ops)) |
---|
| 653 | + pr_err("vcpudispatch_stats: error creating procfs file\n"); |
---|
| 654 | + else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL, |
---|
| 655 | + &vcpudispatch_stats_freq_proc_ops)) |
---|
| 656 | + pr_err("vcpudispatch_stats_freq: error creating procfs file\n"); |
---|
| 657 | + |
---|
| 658 | + return 0; |
---|
| 659 | +} |
---|
| 660 | + |
---|
| 661 | +machine_device_initcall(pseries, vcpudispatch_stats_procfs_init); |
---|
| 662 | +#endif /* CONFIG_PPC_SPLPAR */ |
---|
| 663 | + |
---|
68 | 664 | void vpa_init(int cpu) |
---|
69 | 665 | { |
---|
70 | 666 | int hwcpu = get_hard_smp_processor_id(cpu); |
---|
71 | 667 | unsigned long addr; |
---|
72 | 668 | long ret; |
---|
73 | | - struct paca_struct *pp; |
---|
74 | | - struct dtl_entry *dtl; |
---|
75 | 669 | |
---|
76 | 670 | /* |
---|
77 | 671 | * The spec says it "may be problematic" if CPU x registers the VPA of |
---|
.. | .. |
---|
112 | 706 | /* |
---|
113 | 707 | * Register dispatch trace log, if one has been allocated. |
---|
114 | 708 | */ |
---|
115 | | - pp = paca_ptrs[cpu]; |
---|
116 | | - dtl = pp->dispatch_log; |
---|
117 | | - if (dtl) { |
---|
118 | | - pp->dtl_ridx = 0; |
---|
119 | | - pp->dtl_curr = dtl; |
---|
120 | | - lppaca_of(cpu).dtl_idx = 0; |
---|
121 | | - |
---|
122 | | - /* hypervisor reads buffer length from this field */ |
---|
123 | | - dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); |
---|
124 | | - ret = register_dtl(hwcpu, __pa(dtl)); |
---|
125 | | - if (ret) |
---|
126 | | - pr_err("WARNING: DTL registration of cpu %d (hw %d) " |
---|
127 | | - "failed with %ld\n", smp_processor_id(), |
---|
128 | | - hwcpu, ret); |
---|
129 | | - lppaca_of(cpu).dtl_enable_mask = 2; |
---|
130 | | - } |
---|
| 709 | + register_dtl_buffer(cpu); |
---|
131 | 710 | } |
---|
132 | 711 | |
---|
133 | 712 | #ifdef CONFIG_PPC_BOOK3S_64 |
---|
.. | .. |
---|
204 | 783 | |
---|
205 | 784 | /* don't remove a bolted entry */ |
---|
206 | 785 | lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, |
---|
207 | | - (0x1UL << 4), &dummy1, &dummy2); |
---|
| 786 | + HPTE_V_BOLTED, &dummy1, &dummy2); |
---|
208 | 787 | if (lpar_rc == H_SUCCESS) |
---|
209 | 788 | return i; |
---|
210 | 789 | |
---|
.. | .. |
---|
368 | 947 | hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); |
---|
369 | 948 | want_v = hpte_encode_avpn(vpn, psize, ssize); |
---|
370 | 949 | |
---|
371 | | - /* Bolted entries are always in the primary group */ |
---|
| 950 | + /* |
---|
| 951 | + * We try to keep bolted entries always in primary hash |
---|
| 952 | + * But in some case we can find them in secondary too. |
---|
| 953 | + */ |
---|
372 | 954 | hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
---|
373 | 955 | slot = __pSeries_lpar_hpte_find(want_v, hpte_group); |
---|
374 | | - if (slot < 0) |
---|
375 | | - return -1; |
---|
| 956 | + if (slot < 0) { |
---|
| 957 | + /* Try in secondary */ |
---|
| 958 | + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; |
---|
| 959 | + slot = __pSeries_lpar_hpte_find(want_v, hpte_group); |
---|
| 960 | + if (slot < 0) |
---|
| 961 | + return -1; |
---|
| 962 | + } |
---|
376 | 963 | return hpte_group + slot; |
---|
377 | 964 | } |
---|
378 | 965 | |
---|
.. | .. |
---|
418 | 1005 | BUG_ON(lpar_rc != H_SUCCESS); |
---|
419 | 1006 | } |
---|
420 | 1007 | |
---|
| 1008 | + |
---|
| 1009 | +/* |
---|
| 1010 | + * As defined in the PAPR's section 14.5.4.1.8 |
---|
| 1011 | + * The control mask doesn't include the returned reference and change bit from |
---|
| 1012 | + * the processed PTE. |
---|
| 1013 | + */ |
---|
| 1014 | +#define HBLKR_AVPN 0x0100000000000000UL |
---|
| 1015 | +#define HBLKR_CTRL_MASK 0xf800000000000000UL |
---|
| 1016 | +#define HBLKR_CTRL_SUCCESS 0x8000000000000000UL |
---|
| 1017 | +#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL |
---|
| 1018 | +#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL |
---|
| 1019 | + |
---|
| 1020 | +/* |
---|
| 1021 | + * Returned true if we are supporting this block size for the specified segment |
---|
| 1022 | + * base page size and actual page size. |
---|
| 1023 | + * |
---|
| 1024 | + * Currently, we only support 8 size block. |
---|
| 1025 | + */ |
---|
| 1026 | +static inline bool is_supported_hlbkrm(int bpsize, int psize) |
---|
| 1027 | +{ |
---|
| 1028 | + return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE); |
---|
| 1029 | +} |
---|
| 1030 | + |
---|
| 1031 | +/** |
---|
| 1032 | + * H_BLOCK_REMOVE caller. |
---|
| 1033 | + * @idx should point to the latest @param entry set with a PTEX. |
---|
| 1034 | + * If PTE cannot be processed because another CPUs has already locked that |
---|
| 1035 | + * group, those entries are put back in @param starting at index 1. |
---|
| 1036 | + * If entries has to be retried and @retry_busy is set to true, these entries |
---|
| 1037 | + * are retried until success. If @retry_busy is set to false, the returned |
---|
| 1038 | + * is the number of entries yet to process. |
---|
| 1039 | + */ |
---|
| 1040 | +static unsigned long call_block_remove(unsigned long idx, unsigned long *param, |
---|
| 1041 | + bool retry_busy) |
---|
| 1042 | +{ |
---|
| 1043 | + unsigned long i, rc, new_idx; |
---|
| 1044 | + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; |
---|
| 1045 | + |
---|
| 1046 | + if (idx < 2) { |
---|
| 1047 | + pr_warn("Unexpected empty call to H_BLOCK_REMOVE"); |
---|
| 1048 | + return 0; |
---|
| 1049 | + } |
---|
| 1050 | +again: |
---|
| 1051 | + new_idx = 0; |
---|
| 1052 | + if (idx > PLPAR_HCALL9_BUFSIZE) { |
---|
| 1053 | + pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx); |
---|
| 1054 | + idx = PLPAR_HCALL9_BUFSIZE; |
---|
| 1055 | + } else if (idx < PLPAR_HCALL9_BUFSIZE) |
---|
| 1056 | + param[idx] = HBR_END; |
---|
| 1057 | + |
---|
| 1058 | + rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf, |
---|
| 1059 | + param[0], /* AVA */ |
---|
| 1060 | + param[1], param[2], param[3], param[4], /* TS0-7 */ |
---|
| 1061 | + param[5], param[6], param[7], param[8]); |
---|
| 1062 | + if (rc == H_SUCCESS) |
---|
| 1063 | + return 0; |
---|
| 1064 | + |
---|
| 1065 | + BUG_ON(rc != H_PARTIAL); |
---|
| 1066 | + |
---|
| 1067 | + /* Check that the unprocessed entries were 'not found' or 'busy' */ |
---|
| 1068 | + for (i = 0; i < idx-1; i++) { |
---|
| 1069 | + unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK; |
---|
| 1070 | + |
---|
| 1071 | + if (ctrl == HBLKR_CTRL_ERRBUSY) { |
---|
| 1072 | + param[++new_idx] = param[i+1]; |
---|
| 1073 | + continue; |
---|
| 1074 | + } |
---|
| 1075 | + |
---|
| 1076 | + BUG_ON(ctrl != HBLKR_CTRL_SUCCESS |
---|
| 1077 | + && ctrl != HBLKR_CTRL_ERRNOTFOUND); |
---|
| 1078 | + } |
---|
| 1079 | + |
---|
| 1080 | + /* |
---|
| 1081 | + * If there were entries found busy, retry these entries if requested, |
---|
| 1082 | + * of if all the entries have to be retried. |
---|
| 1083 | + */ |
---|
| 1084 | + if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) { |
---|
| 1085 | + idx = new_idx + 1; |
---|
| 1086 | + goto again; |
---|
| 1087 | + } |
---|
| 1088 | + |
---|
| 1089 | + return new_idx; |
---|
| 1090 | +} |
---|
| 1091 | + |
---|
421 | 1092 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
422 | 1093 | /* |
---|
423 | 1094 | * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need |
---|
.. | .. |
---|
425 | 1096 | */ |
---|
426 | 1097 | #define PPC64_HUGE_HPTE_BATCH 12 |
---|
427 | 1098 | |
---|
428 | | -static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, |
---|
429 | | - unsigned long *vpn, int count, |
---|
430 | | - int psize, int ssize) |
---|
| 1099 | +static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn, |
---|
| 1100 | + int count, int psize, int ssize) |
---|
| 1101 | +{ |
---|
| 1102 | + unsigned long param[PLPAR_HCALL9_BUFSIZE]; |
---|
| 1103 | + unsigned long shift, current_vpgb, vpgb; |
---|
| 1104 | + int i, pix = 0; |
---|
| 1105 | + |
---|
| 1106 | + shift = mmu_psize_defs[psize].shift; |
---|
| 1107 | + |
---|
| 1108 | + for (i = 0; i < count; i++) { |
---|
| 1109 | + /* |
---|
| 1110 | + * Shifting 3 bits more on the right to get a |
---|
| 1111 | + * 8 pages aligned virtual addresse. |
---|
| 1112 | + */ |
---|
| 1113 | + vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3)); |
---|
| 1114 | + if (!pix || vpgb != current_vpgb) { |
---|
| 1115 | + /* |
---|
| 1116 | + * Need to start a new 8 pages block, flush |
---|
| 1117 | + * the current one if needed. |
---|
| 1118 | + */ |
---|
| 1119 | + if (pix) |
---|
| 1120 | + (void)call_block_remove(pix, param, true); |
---|
| 1121 | + current_vpgb = vpgb; |
---|
| 1122 | + param[0] = hpte_encode_avpn(vpn[i], psize, ssize); |
---|
| 1123 | + pix = 1; |
---|
| 1124 | + } |
---|
| 1125 | + |
---|
| 1126 | + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i]; |
---|
| 1127 | + if (pix == PLPAR_HCALL9_BUFSIZE) { |
---|
| 1128 | + pix = call_block_remove(pix, param, false); |
---|
| 1129 | + /* |
---|
| 1130 | + * pix = 0 means that all the entries were |
---|
| 1131 | + * removed, we can start a new block. |
---|
| 1132 | + * Otherwise, this means that there are entries |
---|
| 1133 | + * to retry, and pix points to latest one, so |
---|
| 1134 | + * we should increment it and try to continue |
---|
| 1135 | + * the same block. |
---|
| 1136 | + */ |
---|
| 1137 | + if (pix) |
---|
| 1138 | + pix++; |
---|
| 1139 | + } |
---|
| 1140 | + } |
---|
| 1141 | + if (pix) |
---|
| 1142 | + (void)call_block_remove(pix, param, true); |
---|
| 1143 | +} |
---|
| 1144 | + |
---|
| 1145 | +static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn, |
---|
| 1146 | + int count, int psize, int ssize) |
---|
431 | 1147 | { |
---|
432 | 1148 | unsigned long param[PLPAR_HCALL9_BUFSIZE]; |
---|
433 | 1149 | int i = 0, pix = 0, rc; |
---|
434 | | - unsigned long flags = 0; |
---|
435 | | - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); |
---|
436 | | - |
---|
437 | | - if (lock_tlbie) |
---|
438 | | - spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); |
---|
439 | 1150 | |
---|
440 | 1151 | for (i = 0; i < count; i++) { |
---|
441 | 1152 | |
---|
.. | .. |
---|
463 | 1174 | param[6], param[7]); |
---|
464 | 1175 | BUG_ON(rc != H_SUCCESS); |
---|
465 | 1176 | } |
---|
| 1177 | +} |
---|
| 1178 | + |
---|
| 1179 | +static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, |
---|
| 1180 | + unsigned long *vpn, |
---|
| 1181 | + int count, int psize, |
---|
| 1182 | + int ssize) |
---|
| 1183 | +{ |
---|
| 1184 | + unsigned long flags = 0; |
---|
| 1185 | + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); |
---|
| 1186 | + |
---|
| 1187 | + if (lock_tlbie) |
---|
| 1188 | + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); |
---|
| 1189 | + |
---|
| 1190 | + /* Assuming THP size is 16M */ |
---|
| 1191 | + if (is_supported_hlbkrm(psize, MMU_PAGE_16M)) |
---|
| 1192 | + hugepage_block_invalidate(slot, vpn, count, psize, ssize); |
---|
| 1193 | + else |
---|
| 1194 | + hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); |
---|
466 | 1195 | |
---|
467 | 1196 | if (lock_tlbie) |
---|
468 | 1197 | spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); |
---|
.. | .. |
---|
547 | 1276 | return 0; |
---|
548 | 1277 | } |
---|
549 | 1278 | |
---|
| 1279 | + |
---|
| 1280 | +static inline unsigned long compute_slot(real_pte_t pte, |
---|
| 1281 | + unsigned long vpn, |
---|
| 1282 | + unsigned long index, |
---|
| 1283 | + unsigned long shift, |
---|
| 1284 | + int ssize) |
---|
| 1285 | +{ |
---|
| 1286 | + unsigned long slot, hash, hidx; |
---|
| 1287 | + |
---|
| 1288 | + hash = hpt_hash(vpn, shift, ssize); |
---|
| 1289 | + hidx = __rpte_to_hidx(pte, index); |
---|
| 1290 | + if (hidx & _PTEIDX_SECONDARY) |
---|
| 1291 | + hash = ~hash; |
---|
| 1292 | + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
---|
| 1293 | + slot += hidx & _PTEIDX_GROUP_IX; |
---|
| 1294 | + return slot; |
---|
| 1295 | +} |
---|
| 1296 | + |
---|
| 1297 | +/** |
---|
| 1298 | + * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are |
---|
| 1299 | + * "all within the same naturally aligned 8 page virtual address block". |
---|
| 1300 | + */ |
---|
| 1301 | +static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch, |
---|
| 1302 | + unsigned long *param) |
---|
| 1303 | +{ |
---|
| 1304 | + unsigned long vpn; |
---|
| 1305 | + unsigned long i, pix = 0; |
---|
| 1306 | + unsigned long index, shift, slot, current_vpgb, vpgb; |
---|
| 1307 | + real_pte_t pte; |
---|
| 1308 | + int psize, ssize; |
---|
| 1309 | + |
---|
| 1310 | + psize = batch->psize; |
---|
| 1311 | + ssize = batch->ssize; |
---|
| 1312 | + |
---|
| 1313 | + for (i = 0; i < number; i++) { |
---|
| 1314 | + vpn = batch->vpn[i]; |
---|
| 1315 | + pte = batch->pte[i]; |
---|
| 1316 | + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { |
---|
| 1317 | + /* |
---|
| 1318 | + * Shifting 3 bits more on the right to get a |
---|
| 1319 | + * 8 pages aligned virtual addresse. |
---|
| 1320 | + */ |
---|
| 1321 | + vpgb = (vpn >> (shift - VPN_SHIFT + 3)); |
---|
| 1322 | + if (!pix || vpgb != current_vpgb) { |
---|
| 1323 | + /* |
---|
| 1324 | + * Need to start a new 8 pages block, flush |
---|
| 1325 | + * the current one if needed. |
---|
| 1326 | + */ |
---|
| 1327 | + if (pix) |
---|
| 1328 | + (void)call_block_remove(pix, param, |
---|
| 1329 | + true); |
---|
| 1330 | + current_vpgb = vpgb; |
---|
| 1331 | + param[0] = hpte_encode_avpn(vpn, psize, |
---|
| 1332 | + ssize); |
---|
| 1333 | + pix = 1; |
---|
| 1334 | + } |
---|
| 1335 | + |
---|
| 1336 | + slot = compute_slot(pte, vpn, index, shift, ssize); |
---|
| 1337 | + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot; |
---|
| 1338 | + |
---|
| 1339 | + if (pix == PLPAR_HCALL9_BUFSIZE) { |
---|
| 1340 | + pix = call_block_remove(pix, param, false); |
---|
| 1341 | + /* |
---|
| 1342 | + * pix = 0 means that all the entries were |
---|
| 1343 | + * removed, we can start a new block. |
---|
| 1344 | + * Otherwise, this means that there are entries |
---|
| 1345 | + * to retry, and pix points to latest one, so |
---|
| 1346 | + * we should increment it and try to continue |
---|
| 1347 | + * the same block. |
---|
| 1348 | + */ |
---|
| 1349 | + if (pix) |
---|
| 1350 | + pix++; |
---|
| 1351 | + } |
---|
| 1352 | + } pte_iterate_hashed_end(); |
---|
| 1353 | + } |
---|
| 1354 | + |
---|
| 1355 | + if (pix) |
---|
| 1356 | + (void)call_block_remove(pix, param, true); |
---|
| 1357 | +} |
---|
| 1358 | + |
---|
| 1359 | +/* |
---|
| 1360 | + * TLB Block Invalidate Characteristics |
---|
| 1361 | + * |
---|
| 1362 | + * These characteristics define the size of the block the hcall H_BLOCK_REMOVE |
---|
| 1363 | + * is able to process for each couple segment base page size, actual page size. |
---|
| 1364 | + * |
---|
| 1365 | + * The ibm,get-system-parameter properties is returning a buffer with the |
---|
| 1366 | + * following layout: |
---|
| 1367 | + * |
---|
| 1368 | + * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ] |
---|
| 1369 | + * ----------------- |
---|
| 1370 | + * TLB Block Invalidate Specifiers: |
---|
| 1371 | + * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ] |
---|
| 1372 | + * [ 1 byte Number of page sizes (N) that are supported for the specified |
---|
| 1373 | + * TLB invalidate block size ] |
---|
| 1374 | + * [ 1 byte Encoded segment base page size and actual page size |
---|
| 1375 | + * MSB=0 means 4k segment base page size and actual page size |
---|
| 1376 | + * MSB=1 the penc value in mmu_psize_def ] |
---|
| 1377 | + * ... |
---|
| 1378 | + * ----------------- |
---|
| 1379 | + * Next TLB Block Invalidate Specifiers... |
---|
| 1380 | + * ----------------- |
---|
| 1381 | + * [ 0 ] |
---|
| 1382 | + */ |
---|
| 1383 | +static inline void set_hblkrm_bloc_size(int bpsize, int psize, |
---|
| 1384 | + unsigned int block_size) |
---|
| 1385 | +{ |
---|
| 1386 | + if (block_size > hblkrm_size[bpsize][psize]) |
---|
| 1387 | + hblkrm_size[bpsize][psize] = block_size; |
---|
| 1388 | +} |
---|
| 1389 | + |
---|
| 1390 | +/* |
---|
| 1391 | + * Decode the Encoded segment base page size and actual page size. |
---|
| 1392 | + * PAPR specifies: |
---|
| 1393 | + * - bit 7 is the L bit |
---|
| 1394 | + * - bits 0-5 are the penc value |
---|
| 1395 | + * If the L bit is 0, this means 4K segment base page size and actual page size |
---|
| 1396 | + * otherwise the penc value should be read. |
---|
| 1397 | + */ |
---|
| 1398 | +#define HBLKRM_L_MASK 0x80 |
---|
| 1399 | +#define HBLKRM_PENC_MASK 0x3f |
---|
| 1400 | +static inline void __init check_lp_set_hblkrm(unsigned int lp, |
---|
| 1401 | + unsigned int block_size) |
---|
| 1402 | +{ |
---|
| 1403 | + unsigned int bpsize, psize; |
---|
| 1404 | + |
---|
| 1405 | + /* First, check the L bit, if not set, this means 4K */ |
---|
| 1406 | + if ((lp & HBLKRM_L_MASK) == 0) { |
---|
| 1407 | + set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size); |
---|
| 1408 | + return; |
---|
| 1409 | + } |
---|
| 1410 | + |
---|
| 1411 | + lp &= HBLKRM_PENC_MASK; |
---|
| 1412 | + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) { |
---|
| 1413 | + struct mmu_psize_def *def = &mmu_psize_defs[bpsize]; |
---|
| 1414 | + |
---|
| 1415 | + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { |
---|
| 1416 | + if (def->penc[psize] == lp) { |
---|
| 1417 | + set_hblkrm_bloc_size(bpsize, psize, block_size); |
---|
| 1418 | + return; |
---|
| 1419 | + } |
---|
| 1420 | + } |
---|
| 1421 | + } |
---|
| 1422 | +} |
---|
| 1423 | + |
---|
| 1424 | +#define SPLPAR_TLB_BIC_TOKEN 50 |
---|
| 1425 | + |
---|
| 1426 | +/* |
---|
| 1427 | + * The size of the TLB Block Invalidate Characteristics is variable. But at the |
---|
| 1428 | + * maximum it will be the number of possible page sizes *2 + 10 bytes. |
---|
| 1429 | + * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size |
---|
| 1430 | + * (128 bytes) for the buffer to get plenty of space. |
---|
| 1431 | + */ |
---|
| 1432 | +#define SPLPAR_TLB_BIC_MAXLENGTH 128 |
---|
| 1433 | + |
---|
| 1434 | +void __init pseries_lpar_read_hblkrm_characteristics(void) |
---|
| 1435 | +{ |
---|
| 1436 | + unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH]; |
---|
| 1437 | + int call_status, len, idx, bpsize; |
---|
| 1438 | + |
---|
| 1439 | + if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) |
---|
| 1440 | + return; |
---|
| 1441 | + |
---|
| 1442 | + spin_lock(&rtas_data_buf_lock); |
---|
| 1443 | + memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); |
---|
| 1444 | + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, |
---|
| 1445 | + NULL, |
---|
| 1446 | + SPLPAR_TLB_BIC_TOKEN, |
---|
| 1447 | + __pa(rtas_data_buf), |
---|
| 1448 | + RTAS_DATA_BUF_SIZE); |
---|
| 1449 | + memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH); |
---|
| 1450 | + local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0'; |
---|
| 1451 | + spin_unlock(&rtas_data_buf_lock); |
---|
| 1452 | + |
---|
| 1453 | + if (call_status != 0) { |
---|
| 1454 | + pr_warn("%s %s Error calling get-system-parameter (0x%x)\n", |
---|
| 1455 | + __FILE__, __func__, call_status); |
---|
| 1456 | + return; |
---|
| 1457 | + } |
---|
| 1458 | + |
---|
| 1459 | + /* |
---|
| 1460 | + * The first two (2) bytes of the data in the buffer are the length of |
---|
| 1461 | + * the returned data, not counting these first two (2) bytes. |
---|
| 1462 | + */ |
---|
| 1463 | + len = be16_to_cpu(*((u16 *)local_buffer)) + 2; |
---|
| 1464 | + if (len > SPLPAR_TLB_BIC_MAXLENGTH) { |
---|
| 1465 | + pr_warn("%s too large returned buffer %d", __func__, len); |
---|
| 1466 | + return; |
---|
| 1467 | + } |
---|
| 1468 | + |
---|
| 1469 | + idx = 2; |
---|
| 1470 | + while (idx < len) { |
---|
| 1471 | + u8 block_shift = local_buffer[idx++]; |
---|
| 1472 | + u32 block_size; |
---|
| 1473 | + unsigned int npsize; |
---|
| 1474 | + |
---|
| 1475 | + if (!block_shift) |
---|
| 1476 | + break; |
---|
| 1477 | + |
---|
| 1478 | + block_size = 1 << block_shift; |
---|
| 1479 | + |
---|
| 1480 | + for (npsize = local_buffer[idx++]; |
---|
| 1481 | + npsize > 0 && idx < len; npsize--) |
---|
| 1482 | + check_lp_set_hblkrm((unsigned int) local_buffer[idx++], |
---|
| 1483 | + block_size); |
---|
| 1484 | + } |
---|
| 1485 | + |
---|
| 1486 | + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) |
---|
| 1487 | + for (idx = 0; idx < MMU_PAGE_COUNT; idx++) |
---|
| 1488 | + if (hblkrm_size[bpsize][idx]) |
---|
| 1489 | + pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d", |
---|
| 1490 | + bpsize, idx, hblkrm_size[bpsize][idx]); |
---|
| 1491 | +} |
---|
| 1492 | + |
---|
550 | 1493 | /* |
---|
551 | 1494 | * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie |
---|
552 | 1495 | * lock. |
---|
.. | .. |
---|
559 | 1502 | struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); |
---|
560 | 1503 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); |
---|
561 | 1504 | unsigned long param[PLPAR_HCALL9_BUFSIZE]; |
---|
562 | | - unsigned long hash, index, shift, hidx, slot; |
---|
| 1505 | + unsigned long index, shift, slot; |
---|
563 | 1506 | real_pte_t pte; |
---|
564 | 1507 | int psize, ssize; |
---|
565 | 1508 | |
---|
566 | 1509 | if (lock_tlbie) |
---|
567 | 1510 | spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); |
---|
| 1511 | + |
---|
| 1512 | + if (is_supported_hlbkrm(batch->psize, batch->psize)) { |
---|
| 1513 | + do_block_remove(number, batch, param); |
---|
| 1514 | + goto out; |
---|
| 1515 | + } |
---|
568 | 1516 | |
---|
569 | 1517 | psize = batch->psize; |
---|
570 | 1518 | ssize = batch->ssize; |
---|
.. | .. |
---|
573 | 1521 | vpn = batch->vpn[i]; |
---|
574 | 1522 | pte = batch->pte[i]; |
---|
575 | 1523 | pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { |
---|
576 | | - hash = hpt_hash(vpn, shift, ssize); |
---|
577 | | - hidx = __rpte_to_hidx(pte, index); |
---|
578 | | - if (hidx & _PTEIDX_SECONDARY) |
---|
579 | | - hash = ~hash; |
---|
580 | | - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
---|
581 | | - slot += hidx & _PTEIDX_GROUP_IX; |
---|
| 1524 | + slot = compute_slot(pte, vpn, index, shift, ssize); |
---|
582 | 1525 | if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { |
---|
583 | 1526 | /* |
---|
584 | 1527 | * lpar doesn't use the passed actual page size |
---|
.. | .. |
---|
609 | 1552 | BUG_ON(rc != H_SUCCESS); |
---|
610 | 1553 | } |
---|
611 | 1554 | |
---|
| 1555 | +out: |
---|
612 | 1556 | if (lock_tlbie) |
---|
613 | 1557 | spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); |
---|
614 | 1558 | } |
---|
.. | .. |
---|
693 | 1637 | break; |
---|
694 | 1638 | |
---|
695 | 1639 | case H_PARAMETER: |
---|
| 1640 | + pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); |
---|
696 | 1641 | return -EINVAL; |
---|
697 | 1642 | case H_RESOURCE: |
---|
| 1643 | + pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); |
---|
698 | 1644 | return -EPERM; |
---|
699 | 1645 | default: |
---|
700 | 1646 | pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); |
---|
.. | .. |
---|
711 | 1657 | if (rc != 0) { |
---|
712 | 1658 | switch (state.commit_rc) { |
---|
713 | 1659 | case H_PTEG_FULL: |
---|
714 | | - pr_warn("Hash collision while resizing HPT\n"); |
---|
715 | 1660 | return -ENOSPC; |
---|
716 | 1661 | |
---|
717 | 1662 | default: |
---|
.. | .. |
---|
736 | 1681 | |
---|
737 | 1682 | if (table_size) |
---|
738 | 1683 | flags |= PROC_TABLE_NEW; |
---|
739 | | - if (radix_enabled()) |
---|
740 | | - flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; |
---|
741 | | - else |
---|
| 1684 | + if (radix_enabled()) { |
---|
| 1685 | + flags |= PROC_TABLE_RADIX; |
---|
| 1686 | + if (mmu_has_feature(MMU_FTR_GTSE)) |
---|
| 1687 | + flags |= PROC_TABLE_GTSE; |
---|
| 1688 | + } else |
---|
742 | 1689 | flags |= PROC_TABLE_HPT_SLB; |
---|
743 | 1690 | for (;;) { |
---|
744 | 1691 | rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, |
---|
.. | .. |
---|
765 | 1712 | mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; |
---|
766 | 1713 | mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; |
---|
767 | 1714 | mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; |
---|
768 | | - register_process_table = pseries_lpar_register_process_table; |
---|
769 | 1715 | |
---|
770 | 1716 | if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) |
---|
771 | 1717 | mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; |
---|
| 1718 | + |
---|
| 1719 | + /* |
---|
| 1720 | + * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall |
---|
| 1721 | + * to inform the hypervisor that we wish to use the HPT. |
---|
| 1722 | + */ |
---|
| 1723 | + if (cpu_has_feature(CPU_FTR_ARCH_300)) |
---|
| 1724 | + pseries_lpar_register_process_table(0, 0, 0); |
---|
772 | 1725 | } |
---|
773 | 1726 | |
---|
| 1727 | +#ifdef CONFIG_PPC_RADIX_MMU |
---|
774 | 1728 | void radix_init_pseries(void) |
---|
775 | 1729 | { |
---|
776 | 1730 | pr_info("Using radix MMU under hypervisor\n"); |
---|
777 | | - register_process_table = pseries_lpar_register_process_table; |
---|
| 1731 | + |
---|
| 1732 | + pseries_lpar_register_process_table(__pa(process_tb), |
---|
| 1733 | + 0, PRTB_SIZE_SHIFT - 12); |
---|
778 | 1734 | } |
---|
| 1735 | +#endif |
---|
779 | 1736 | |
---|
780 | 1737 | #ifdef CONFIG_PPC_SMLPAR |
---|
781 | 1738 | #define CMO_FREE_HINT_DEFAULT 1 |
---|
.. | .. |
---|
870 | 1827 | |
---|
871 | 1828 | /* |
---|
872 | 1829 | * Since the tracing code might execute hcalls we need to guard against |
---|
873 | | - * recursion. One example of this are spinlocks calling H_YIELD on |
---|
874 | | - * shared processor partitions. |
---|
| 1830 | + * recursion. |
---|
875 | 1831 | */ |
---|
876 | 1832 | static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); |
---|
877 | 1833 | |
---|
.. | .. |
---|
1062 | 2018 | return 0; |
---|
1063 | 2019 | |
---|
1064 | 2020 | vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root); |
---|
1065 | | - if (!vpa_dir) { |
---|
1066 | | - pr_warn("%s: can't create vpa root dir\n", __func__); |
---|
1067 | | - return -ENOMEM; |
---|
1068 | | - } |
---|
1069 | 2021 | |
---|
1070 | 2022 | /* set up the per-cpu vpa file*/ |
---|
1071 | 2023 | for_each_possible_cpu(i) { |
---|
1072 | | - struct dentry *d; |
---|
1073 | | - |
---|
1074 | 2024 | sprintf(name, "cpu-%ld", i); |
---|
1075 | | - |
---|
1076 | | - d = debugfs_create_file(name, 0400, vpa_dir, (void *)i, |
---|
1077 | | - &vpa_fops); |
---|
1078 | | - if (!d) { |
---|
1079 | | - pr_warn("%s: can't create per-cpu vpa file\n", |
---|
1080 | | - __func__); |
---|
1081 | | - return -ENOMEM; |
---|
1082 | | - } |
---|
| 2025 | + debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops); |
---|
1083 | 2026 | } |
---|
1084 | 2027 | |
---|
1085 | 2028 | return 0; |
---|