| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * This file implements the DMA operations for NVLink devices. The NPU |
|---|
| 3 | 4 | * devices all point to the same iommu table as the parent PCI device. |
|---|
| 4 | 5 | * |
|---|
| 5 | 6 | * Copyright Alistair Popple, IBM Corporation 2015. |
|---|
| 6 | | - * |
|---|
| 7 | | - * This program is free software; you can redistribute it and/or |
|---|
| 8 | | - * modify it under the terms of version 2 of the GNU General Public |
|---|
| 9 | | - * License as published by the Free Software Foundation. |
|---|
| 10 | 7 | */ |
|---|
| 11 | 8 | |
|---|
| 12 | | -#include <linux/slab.h> |
|---|
| 13 | 9 | #include <linux/mmu_notifier.h> |
|---|
| 14 | 10 | #include <linux/mmu_context.h> |
|---|
| 15 | 11 | #include <linux/of.h> |
|---|
| 16 | | -#include <linux/export.h> |
|---|
| 17 | 12 | #include <linux/pci.h> |
|---|
| 18 | 13 | #include <linux/memblock.h> |
|---|
| 19 | | -#include <linux/iommu.h> |
|---|
| 20 | | -#include <linux/debugfs.h> |
|---|
| 14 | +#include <linux/sizes.h> |
|---|
| 21 | 15 | |
|---|
| 22 | 16 | #include <asm/debugfs.h> |
|---|
| 23 | | -#include <asm/tlb.h> |
|---|
| 24 | 17 | #include <asm/powernv.h> |
|---|
| 25 | | -#include <asm/reg.h> |
|---|
| 26 | | -#include <asm/opal.h> |
|---|
| 27 | | -#include <asm/io.h> |
|---|
| 28 | | -#include <asm/iommu.h> |
|---|
| 29 | | -#include <asm/pnv-pci.h> |
|---|
| 30 | | -#include <asm/msi_bitmap.h> |
|---|
| 18 | +#include <asm/ppc-pci.h> |
|---|
| 31 | 19 | #include <asm/opal.h> |
|---|
| 32 | 20 | |
|---|
| 33 | | -#include "powernv.h" |
|---|
| 34 | 21 | #include "pci.h" |
|---|
| 35 | 22 | |
|---|
| 36 | | -#define npu_to_phb(x) container_of(x, struct pnv_phb, npu) |
|---|
| 37 | | - |
|---|
| 38 | | -/* |
|---|
| 39 | | - * spinlock to protect initialisation of an npu_context for a particular |
|---|
| 40 | | - * mm_struct. |
|---|
| 41 | | - */ |
|---|
| 42 | | -static DEFINE_SPINLOCK(npu_context_lock); |
|---|
| 43 | | - |
|---|
| 44 | | -/* |
|---|
| 45 | | - * When an address shootdown range exceeds this threshold we invalidate the |
|---|
| 46 | | - * entire TLB on the GPU for the given PID rather than each specific address in |
|---|
| 47 | | - * the range. |
|---|
| 48 | | - */ |
|---|
| 49 | | -static uint64_t atsd_threshold = 2 * 1024 * 1024; |
|---|
| 50 | | -static struct dentry *atsd_threshold_dentry; |
|---|
| 51 | | - |
|---|
| 52 | | -/* |
|---|
| 53 | | - * Other types of TCE cache invalidation are not functional in the |
|---|
| 54 | | - * hardware. |
|---|
| 55 | | - */ |
|---|
| 56 | 23 | static struct pci_dev *get_pci_dev(struct device_node *dn) |
|---|
| 57 | 24 | { |
|---|
| 58 | 25 | struct pci_dn *pdn = PCI_DN(dn); |
|---|
| .. | .. |
|---|
| 123 | 90 | } |
|---|
| 124 | 91 | EXPORT_SYMBOL(pnv_pci_get_npu_dev); |
|---|
| 125 | 92 | |
|---|
| 126 | | -#define NPU_DMA_OP_UNSUPPORTED() \ |
|---|
| 127 | | - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ |
|---|
| 128 | | - __func__) |
|---|
| 129 | | - |
|---|
| 130 | | -static void *dma_npu_alloc(struct device *dev, size_t size, |
|---|
| 131 | | - dma_addr_t *dma_handle, gfp_t flag, |
|---|
| 132 | | - unsigned long attrs) |
|---|
| 133 | | -{ |
|---|
| 134 | | - NPU_DMA_OP_UNSUPPORTED(); |
|---|
| 135 | | - return NULL; |
|---|
| 136 | | -} |
|---|
| 137 | | - |
|---|
| 138 | | -static void dma_npu_free(struct device *dev, size_t size, |
|---|
| 139 | | - void *vaddr, dma_addr_t dma_handle, |
|---|
| 140 | | - unsigned long attrs) |
|---|
| 141 | | -{ |
|---|
| 142 | | - NPU_DMA_OP_UNSUPPORTED(); |
|---|
| 143 | | -} |
|---|
| 144 | | - |
|---|
| 145 | | -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, |
|---|
| 146 | | - unsigned long offset, size_t size, |
|---|
| 147 | | - enum dma_data_direction direction, |
|---|
| 148 | | - unsigned long attrs) |
|---|
| 149 | | -{ |
|---|
| 150 | | - NPU_DMA_OP_UNSUPPORTED(); |
|---|
| 151 | | - return 0; |
|---|
| 152 | | -} |
|---|
| 153 | | - |
|---|
| 154 | | -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, |
|---|
| 155 | | - int nelems, enum dma_data_direction direction, |
|---|
| 156 | | - unsigned long attrs) |
|---|
| 157 | | -{ |
|---|
| 158 | | - NPU_DMA_OP_UNSUPPORTED(); |
|---|
| 159 | | - return 0; |
|---|
| 160 | | -} |
|---|
| 161 | | - |
|---|
| 162 | | -static int dma_npu_dma_supported(struct device *dev, u64 mask) |
|---|
| 163 | | -{ |
|---|
| 164 | | - NPU_DMA_OP_UNSUPPORTED(); |
|---|
| 165 | | - return 0; |
|---|
| 166 | | -} |
|---|
| 167 | | - |
|---|
| 168 | | -static u64 dma_npu_get_required_mask(struct device *dev) |
|---|
| 169 | | -{ |
|---|
| 170 | | - NPU_DMA_OP_UNSUPPORTED(); |
|---|
| 171 | | - return 0; |
|---|
| 172 | | -} |
|---|
| 173 | | - |
|---|
| 174 | | -static const struct dma_map_ops dma_npu_ops = { |
|---|
| 175 | | - .map_page = dma_npu_map_page, |
|---|
| 176 | | - .map_sg = dma_npu_map_sg, |
|---|
| 177 | | - .alloc = dma_npu_alloc, |
|---|
| 178 | | - .free = dma_npu_free, |
|---|
| 179 | | - .dma_supported = dma_npu_dma_supported, |
|---|
| 180 | | - .get_required_mask = dma_npu_get_required_mask, |
|---|
| 181 | | -}; |
|---|
| 182 | | - |
|---|
| 93 | +#ifdef CONFIG_IOMMU_API |
|---|
| 183 | 94 | /* |
|---|
| 184 | 95 | * Returns the PE assoicated with the PCI device of the given |
|---|
| 185 | 96 | * NPU. Returns the linked pci device if pci_dev != NULL. |
|---|
| .. | .. |
|---|
| 211 | 122 | return pe; |
|---|
| 212 | 123 | } |
|---|
| 213 | 124 | |
|---|
| 214 | | -long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, |
|---|
| 125 | +static long pnv_npu_unset_window(struct iommu_table_group *table_group, |
|---|
| 126 | + int num); |
|---|
| 127 | + |
|---|
| 128 | +static long pnv_npu_set_window(struct iommu_table_group *table_group, int num, |
|---|
| 215 | 129 | struct iommu_table *tbl) |
|---|
| 216 | 130 | { |
|---|
| 131 | + struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, |
|---|
| 132 | + table_group); |
|---|
| 217 | 133 | struct pnv_phb *phb = npe->phb; |
|---|
| 218 | 134 | int64_t rc; |
|---|
| 219 | 135 | const unsigned long size = tbl->it_indirect_levels ? |
|---|
| 220 | 136 | tbl->it_level_size : tbl->it_size; |
|---|
| 221 | 137 | const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; |
|---|
| 222 | 138 | const __u64 win_size = tbl->it_size << tbl->it_page_shift; |
|---|
| 139 | + int num2 = (num == 0) ? 1 : 0; |
|---|
| 140 | + |
|---|
| 141 | + /* NPU has just one TVE so if there is another table, remove it first */ |
|---|
| 142 | + if (npe->table_group.tables[num2]) |
|---|
| 143 | + pnv_npu_unset_window(&npe->table_group, num2); |
|---|
| 223 | 144 | |
|---|
| 224 | 145 | pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", |
|---|
| 225 | 146 | start_addr, start_addr + win_size - 1, |
|---|
| .. | .. |
|---|
| 245 | 166 | return 0; |
|---|
| 246 | 167 | } |
|---|
| 247 | 168 | |
|---|
| 248 | | -long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) |
|---|
| 169 | +static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num) |
|---|
| 249 | 170 | { |
|---|
| 171 | + struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, |
|---|
| 172 | + table_group); |
|---|
| 250 | 173 | struct pnv_phb *phb = npe->phb; |
|---|
| 251 | 174 | int64_t rc; |
|---|
| 175 | + |
|---|
| 176 | + if (!npe->table_group.tables[num]) |
|---|
| 177 | + return 0; |
|---|
| 252 | 178 | |
|---|
| 253 | 179 | pe_info(npe, "Removing DMA window\n"); |
|---|
| 254 | 180 | |
|---|
| .. | .. |
|---|
| 268 | 194 | return 0; |
|---|
| 269 | 195 | } |
|---|
| 270 | 196 | |
|---|
| 271 | | -/* |
|---|
| 272 | | - * Enables 32 bit DMA on NPU. |
|---|
| 273 | | - */ |
|---|
| 274 | | -static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) |
|---|
| 275 | | -{ |
|---|
| 276 | | - struct pci_dev *gpdev; |
|---|
| 277 | | - struct pnv_ioda_pe *gpe; |
|---|
| 278 | | - int64_t rc; |
|---|
| 279 | | - |
|---|
| 280 | | - /* |
|---|
| 281 | | - * Find the assoicated PCI devices and get the dma window |
|---|
| 282 | | - * information from there. |
|---|
| 283 | | - */ |
|---|
| 284 | | - if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV)) |
|---|
| 285 | | - return; |
|---|
| 286 | | - |
|---|
| 287 | | - gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); |
|---|
| 288 | | - if (!gpe) |
|---|
| 289 | | - return; |
|---|
| 290 | | - |
|---|
| 291 | | - rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); |
|---|
| 292 | | - |
|---|
| 293 | | - /* |
|---|
| 294 | | - * We don't initialise npu_pe->tce32_table as we always use |
|---|
| 295 | | - * dma_npu_ops which are nops. |
|---|
| 296 | | - */ |
|---|
| 297 | | - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); |
|---|
| 298 | | -} |
|---|
| 299 | | - |
|---|
| 300 | | -/* |
|---|
| 301 | | - * Enables bypass mode on the NPU. The NPU only supports one |
|---|
| 302 | | - * window per link, so bypass needs to be explicitly enabled or |
|---|
| 303 | | - * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be |
|---|
| 304 | | - * active at the same time. |
|---|
| 305 | | - */ |
|---|
| 306 | | -static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) |
|---|
| 307 | | -{ |
|---|
| 308 | | - struct pnv_phb *phb = npe->phb; |
|---|
| 309 | | - int64_t rc = 0; |
|---|
| 310 | | - phys_addr_t top = memblock_end_of_DRAM(); |
|---|
| 311 | | - |
|---|
| 312 | | - if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) |
|---|
| 313 | | - return -EINVAL; |
|---|
| 314 | | - |
|---|
| 315 | | - rc = pnv_npu_unset_window(npe, 0); |
|---|
| 316 | | - if (rc != OPAL_SUCCESS) |
|---|
| 317 | | - return rc; |
|---|
| 318 | | - |
|---|
| 319 | | - /* Enable the bypass window */ |
|---|
| 320 | | - |
|---|
| 321 | | - top = roundup_pow_of_two(top); |
|---|
| 322 | | - dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n", |
|---|
| 323 | | - npe->pe_number); |
|---|
| 324 | | - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, |
|---|
| 325 | | - npe->pe_number, npe->pe_number, |
|---|
| 326 | | - 0 /* bypass base */, top); |
|---|
| 327 | | - |
|---|
| 328 | | - if (rc == OPAL_SUCCESS) |
|---|
| 329 | | - pnv_pci_ioda2_tce_invalidate_entire(phb, false); |
|---|
| 330 | | - |
|---|
| 331 | | - return rc; |
|---|
| 332 | | -} |
|---|
| 333 | | - |
|---|
| 334 | | -void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass) |
|---|
| 335 | | -{ |
|---|
| 336 | | - int i; |
|---|
| 337 | | - struct pnv_phb *phb; |
|---|
| 338 | | - struct pci_dn *pdn; |
|---|
| 339 | | - struct pnv_ioda_pe *npe; |
|---|
| 340 | | - struct pci_dev *npdev; |
|---|
| 341 | | - |
|---|
| 342 | | - for (i = 0; ; ++i) { |
|---|
| 343 | | - npdev = pnv_pci_get_npu_dev(gpdev, i); |
|---|
| 344 | | - |
|---|
| 345 | | - if (!npdev) |
|---|
| 346 | | - break; |
|---|
| 347 | | - |
|---|
| 348 | | - pdn = pci_get_pdn(npdev); |
|---|
| 349 | | - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) |
|---|
| 350 | | - return; |
|---|
| 351 | | - |
|---|
| 352 | | - phb = pci_bus_to_host(npdev->bus)->private_data; |
|---|
| 353 | | - |
|---|
| 354 | | - /* We only do bypass if it's enabled on the linked device */ |
|---|
| 355 | | - npe = &phb->ioda.pe_array[pdn->pe_number]; |
|---|
| 356 | | - |
|---|
| 357 | | - if (bypass) { |
|---|
| 358 | | - dev_info(&npdev->dev, |
|---|
| 359 | | - "Using 64-bit DMA iommu bypass\n"); |
|---|
| 360 | | - pnv_npu_dma_set_bypass(npe); |
|---|
| 361 | | - } else { |
|---|
| 362 | | - dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n"); |
|---|
| 363 | | - pnv_npu_dma_set_32(npe); |
|---|
| 364 | | - } |
|---|
| 365 | | - } |
|---|
| 366 | | -} |
|---|
| 367 | | - |
|---|
| 368 | 197 | /* Switch ownership from platform code to external user (e.g. VFIO) */ |
|---|
| 369 | | -void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) |
|---|
| 198 | +static void pnv_npu_take_ownership(struct iommu_table_group *table_group) |
|---|
| 370 | 199 | { |
|---|
| 200 | + struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, |
|---|
| 201 | + table_group); |
|---|
| 371 | 202 | struct pnv_phb *phb = npe->phb; |
|---|
| 372 | 203 | int64_t rc; |
|---|
| 204 | + struct pci_dev *gpdev = NULL; |
|---|
| 373 | 205 | |
|---|
| 374 | 206 | /* |
|---|
| 375 | 207 | * Note: NPU has just a single TVE in the hardware which means that |
|---|
| .. | .. |
|---|
| 378 | 210 | * if it was enabled at the moment of ownership change. |
|---|
| 379 | 211 | */ |
|---|
| 380 | 212 | if (npe->table_group.tables[0]) { |
|---|
| 381 | | - pnv_npu_unset_window(npe, 0); |
|---|
| 213 | + pnv_npu_unset_window(&npe->table_group, 0); |
|---|
| 382 | 214 | return; |
|---|
| 383 | 215 | } |
|---|
| 384 | 216 | |
|---|
| .. | .. |
|---|
| 391 | 223 | return; |
|---|
| 392 | 224 | } |
|---|
| 393 | 225 | pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); |
|---|
| 226 | + |
|---|
| 227 | + get_gpu_pci_dev_and_pe(npe, &gpdev); |
|---|
| 228 | + if (gpdev) |
|---|
| 229 | + pnv_npu2_unmap_lpar_dev(gpdev); |
|---|
| 394 | 230 | } |
|---|
| 395 | 231 | |
|---|
| 396 | | -struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) |
|---|
| 232 | +static void pnv_npu_release_ownership(struct iommu_table_group *table_group) |
|---|
| 397 | 233 | { |
|---|
| 398 | | - struct pnv_phb *phb = npe->phb; |
|---|
| 399 | | - struct pci_bus *pbus = phb->hose->bus; |
|---|
| 400 | | - struct pci_dev *npdev, *gpdev = NULL, *gptmp; |
|---|
| 401 | | - struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev); |
|---|
| 234 | + struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, |
|---|
| 235 | + table_group); |
|---|
| 236 | + struct pci_dev *gpdev = NULL; |
|---|
| 402 | 237 | |
|---|
| 403 | | - if (!gpe || !gpdev) |
|---|
| 404 | | - return NULL; |
|---|
| 405 | | - |
|---|
| 406 | | - list_for_each_entry(npdev, &pbus->devices, bus_list) { |
|---|
| 407 | | - gptmp = pnv_pci_get_gpu_dev(npdev); |
|---|
| 408 | | - |
|---|
| 409 | | - if (gptmp != gpdev) |
|---|
| 410 | | - continue; |
|---|
| 411 | | - |
|---|
| 412 | | - pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev)); |
|---|
| 413 | | - iommu_group_add_device(gpe->table_group.group, &npdev->dev); |
|---|
| 414 | | - } |
|---|
| 415 | | - |
|---|
| 416 | | - return gpe; |
|---|
| 238 | + get_gpu_pci_dev_and_pe(npe, &gpdev); |
|---|
| 239 | + if (gpdev) |
|---|
| 240 | + pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV); |
|---|
| 417 | 241 | } |
|---|
| 418 | 242 | |
|---|
| 419 | | -/* Maximum number of nvlinks per npu */ |
|---|
| 420 | | -#define NV_MAX_LINKS 6 |
|---|
| 421 | | - |
|---|
| 422 | | -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ |
|---|
| 423 | | -static int max_npu2_index; |
|---|
| 424 | | - |
|---|
| 425 | | -struct npu_context { |
|---|
| 426 | | - struct mm_struct *mm; |
|---|
| 427 | | - struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; |
|---|
| 428 | | - struct mmu_notifier mn; |
|---|
| 429 | | - struct kref kref; |
|---|
| 430 | | - bool nmmu_flush; |
|---|
| 431 | | - |
|---|
| 432 | | - /* Callback to stop translation requests on a given GPU */ |
|---|
| 433 | | - void (*release_cb)(struct npu_context *context, void *priv); |
|---|
| 434 | | - |
|---|
| 435 | | - /* |
|---|
| 436 | | - * Private pointer passed to the above callback for usage by |
|---|
| 437 | | - * device drivers. |
|---|
| 438 | | - */ |
|---|
| 439 | | - void *priv; |
|---|
| 243 | +static struct iommu_table_group_ops pnv_pci_npu_ops = { |
|---|
| 244 | + .set_window = pnv_npu_set_window, |
|---|
| 245 | + .unset_window = pnv_npu_unset_window, |
|---|
| 246 | + .take_ownership = pnv_npu_take_ownership, |
|---|
| 247 | + .release_ownership = pnv_npu_release_ownership, |
|---|
| 440 | 248 | }; |
|---|
| 441 | | - |
|---|
| 442 | | -struct mmio_atsd_reg { |
|---|
| 443 | | - struct npu *npu; |
|---|
| 444 | | - int reg; |
|---|
| 445 | | -}; |
|---|
| 249 | +#endif /* !CONFIG_IOMMU_API */ |
|---|
| 446 | 250 | |
|---|
| 447 | 251 | /* |
|---|
| 448 | | - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC |
|---|
| 449 | | - * if none are available. |
|---|
| 252 | + * NPU2 ATS |
|---|
| 450 | 253 | */ |
|---|
| 451 | | -static int get_mmio_atsd_reg(struct npu *npu) |
|---|
| 452 | | -{ |
|---|
| 453 | | - int i; |
|---|
| 454 | | - |
|---|
| 455 | | - for (i = 0; i < npu->mmio_atsd_count; i++) { |
|---|
| 456 | | - if (!test_bit(i, &npu->mmio_atsd_usage)) |
|---|
| 457 | | - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) |
|---|
| 458 | | - return i; |
|---|
| 459 | | - } |
|---|
| 460 | | - |
|---|
| 461 | | - return -ENOSPC; |
|---|
| 462 | | -} |
|---|
| 463 | | - |
|---|
| 464 | | -static void put_mmio_atsd_reg(struct npu *npu, int reg) |
|---|
| 465 | | -{ |
|---|
| 466 | | - clear_bit_unlock(reg, &npu->mmio_atsd_usage); |
|---|
| 467 | | -} |
|---|
| 468 | | - |
|---|
| 469 | | -/* MMIO ATSD register offsets */ |
|---|
| 470 | | -#define XTS_ATSD_AVA 1 |
|---|
| 471 | | -#define XTS_ATSD_STAT 2 |
|---|
| 472 | | - |
|---|
| 473 | | -static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, |
|---|
| 474 | | - unsigned long launch, unsigned long va) |
|---|
| 475 | | -{ |
|---|
| 476 | | - struct npu *npu = mmio_atsd_reg->npu; |
|---|
| 477 | | - int reg = mmio_atsd_reg->reg; |
|---|
| 478 | | - |
|---|
| 479 | | - __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); |
|---|
| 480 | | - eieio(); |
|---|
| 481 | | - __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]); |
|---|
| 482 | | -} |
|---|
| 483 | | - |
|---|
| 484 | | -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], |
|---|
| 485 | | - unsigned long pid, bool flush) |
|---|
| 486 | | -{ |
|---|
| 487 | | - int i; |
|---|
| 488 | | - unsigned long launch; |
|---|
| 489 | | - |
|---|
| 490 | | - for (i = 0; i <= max_npu2_index; i++) { |
|---|
| 491 | | - if (mmio_atsd_reg[i].reg < 0) |
|---|
| 492 | | - continue; |
|---|
| 493 | | - |
|---|
| 494 | | - /* IS set to invalidate matching PID */ |
|---|
| 495 | | - launch = PPC_BIT(12); |
|---|
| 496 | | - |
|---|
| 497 | | - /* PRS set to process-scoped */ |
|---|
| 498 | | - launch |= PPC_BIT(13); |
|---|
| 499 | | - |
|---|
| 500 | | - /* AP */ |
|---|
| 501 | | - launch |= (u64) |
|---|
| 502 | | - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); |
|---|
| 503 | | - |
|---|
| 504 | | - /* PID */ |
|---|
| 505 | | - launch |= pid << PPC_BITLSHIFT(38); |
|---|
| 506 | | - |
|---|
| 507 | | - /* No flush */ |
|---|
| 508 | | - launch |= !flush << PPC_BITLSHIFT(39); |
|---|
| 509 | | - |
|---|
| 510 | | - /* Invalidating the entire process doesn't use a va */ |
|---|
| 511 | | - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); |
|---|
| 512 | | - } |
|---|
| 513 | | -} |
|---|
| 514 | | - |
|---|
| 515 | | -static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], |
|---|
| 516 | | - unsigned long va, unsigned long pid, bool flush) |
|---|
| 517 | | -{ |
|---|
| 518 | | - int i; |
|---|
| 519 | | - unsigned long launch; |
|---|
| 520 | | - |
|---|
| 521 | | - for (i = 0; i <= max_npu2_index; i++) { |
|---|
| 522 | | - if (mmio_atsd_reg[i].reg < 0) |
|---|
| 523 | | - continue; |
|---|
| 524 | | - |
|---|
| 525 | | - /* IS set to invalidate target VA */ |
|---|
| 526 | | - launch = 0; |
|---|
| 527 | | - |
|---|
| 528 | | - /* PRS set to process scoped */ |
|---|
| 529 | | - launch |= PPC_BIT(13); |
|---|
| 530 | | - |
|---|
| 531 | | - /* AP */ |
|---|
| 532 | | - launch |= (u64) |
|---|
| 533 | | - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); |
|---|
| 534 | | - |
|---|
| 535 | | - /* PID */ |
|---|
| 536 | | - launch |= pid << PPC_BITLSHIFT(38); |
|---|
| 537 | | - |
|---|
| 538 | | - /* No flush */ |
|---|
| 539 | | - launch |= !flush << PPC_BITLSHIFT(39); |
|---|
| 540 | | - |
|---|
| 541 | | - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); |
|---|
| 542 | | - } |
|---|
| 543 | | -} |
|---|
| 544 | | - |
|---|
| 545 | | -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) |
|---|
| 546 | | - |
|---|
| 547 | | -static void mmio_invalidate_wait( |
|---|
| 548 | | - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) |
|---|
| 549 | | -{ |
|---|
| 550 | | - struct npu *npu; |
|---|
| 551 | | - int i, reg; |
|---|
| 552 | | - |
|---|
| 553 | | - /* Wait for all invalidations to complete */ |
|---|
| 554 | | - for (i = 0; i <= max_npu2_index; i++) { |
|---|
| 555 | | - if (mmio_atsd_reg[i].reg < 0) |
|---|
| 556 | | - continue; |
|---|
| 557 | | - |
|---|
| 558 | | - /* Wait for completion */ |
|---|
| 559 | | - npu = mmio_atsd_reg[i].npu; |
|---|
| 560 | | - reg = mmio_atsd_reg[i].reg; |
|---|
| 561 | | - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) |
|---|
| 562 | | - cpu_relax(); |
|---|
| 563 | | - } |
|---|
| 564 | | -} |
|---|
| 254 | +/* Maximum possible number of ATSD MMIO registers per NPU */ |
|---|
| 255 | +#define NV_NMMU_ATSD_REGS 8 |
|---|
| 256 | +#define NV_NPU_MAX_PE_NUM 16 |
|---|
| 565 | 257 | |
|---|
| 566 | 258 | /* |
|---|
| 567 | | - * Acquires all the address translation shootdown (ATSD) registers required to |
|---|
| 568 | | - * launch an ATSD on all links this npu_context is active on. |
|---|
| 259 | + * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or |
|---|
| 260 | + * up to 3 x (GPU + 2xNPUs) (POWER9). |
|---|
| 569 | 261 | */ |
|---|
| 570 | | -static void acquire_atsd_reg(struct npu_context *npu_context, |
|---|
| 571 | | - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) |
|---|
| 262 | +struct npu_comp { |
|---|
| 263 | + struct iommu_table_group table_group; |
|---|
| 264 | + int pe_num; |
|---|
| 265 | + struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM]; |
|---|
| 266 | +}; |
|---|
| 267 | + |
|---|
| 268 | +/* An NPU descriptor, valid for POWER9 only */ |
|---|
| 269 | +struct npu { |
|---|
| 270 | + int index; |
|---|
| 271 | + struct npu_comp npucomp; |
|---|
| 272 | +}; |
|---|
| 273 | + |
|---|
| 274 | +#ifdef CONFIG_IOMMU_API |
|---|
| 275 | +static long pnv_npu_peers_create_table_userspace( |
|---|
| 276 | + struct iommu_table_group *table_group, |
|---|
| 277 | + int num, __u32 page_shift, __u64 window_size, __u32 levels, |
|---|
| 278 | + struct iommu_table **ptbl) |
|---|
| 279 | +{ |
|---|
| 280 | + struct npu_comp *npucomp = container_of(table_group, struct npu_comp, |
|---|
| 281 | + table_group); |
|---|
| 282 | + |
|---|
| 283 | + if (!npucomp->pe_num || !npucomp->pe[0] || |
|---|
| 284 | + !npucomp->pe[0]->table_group.ops || |
|---|
| 285 | + !npucomp->pe[0]->table_group.ops->create_table) |
|---|
| 286 | + return -EFAULT; |
|---|
| 287 | + |
|---|
| 288 | + return npucomp->pe[0]->table_group.ops->create_table( |
|---|
| 289 | + &npucomp->pe[0]->table_group, num, page_shift, |
|---|
| 290 | + window_size, levels, ptbl); |
|---|
| 291 | +} |
|---|
| 292 | + |
|---|
| 293 | +static long pnv_npu_peers_set_window(struct iommu_table_group *table_group, |
|---|
| 294 | + int num, struct iommu_table *tbl) |
|---|
| 572 | 295 | { |
|---|
| 573 | 296 | int i, j; |
|---|
| 574 | | - struct npu *npu; |
|---|
| 575 | | - struct pci_dev *npdev; |
|---|
| 576 | | - struct pnv_phb *nphb; |
|---|
| 297 | + long ret = 0; |
|---|
| 298 | + struct npu_comp *npucomp = container_of(table_group, struct npu_comp, |
|---|
| 299 | + table_group); |
|---|
| 577 | 300 | |
|---|
| 578 | | - for (i = 0; i <= max_npu2_index; i++) { |
|---|
| 579 | | - mmio_atsd_reg[i].reg = -1; |
|---|
| 580 | | - for (j = 0; j < NV_MAX_LINKS; j++) { |
|---|
| 581 | | - /* |
|---|
| 582 | | - * There are no ordering requirements with respect to |
|---|
| 583 | | - * the setup of struct npu_context, but to ensure |
|---|
| 584 | | - * consistent behaviour we need to ensure npdev[][] is |
|---|
| 585 | | - * only read once. |
|---|
| 586 | | - */ |
|---|
| 587 | | - npdev = READ_ONCE(npu_context->npdev[i][j]); |
|---|
| 588 | | - if (!npdev) |
|---|
| 301 | + for (i = 0; i < npucomp->pe_num; ++i) { |
|---|
| 302 | + struct pnv_ioda_pe *pe = npucomp->pe[i]; |
|---|
| 303 | + |
|---|
| 304 | + if (!pe->table_group.ops->set_window) |
|---|
| 305 | + continue; |
|---|
| 306 | + |
|---|
| 307 | + ret = pe->table_group.ops->set_window(&pe->table_group, |
|---|
| 308 | + num, tbl); |
|---|
| 309 | + if (ret) |
|---|
| 310 | + break; |
|---|
| 311 | + } |
|---|
| 312 | + |
|---|
| 313 | + if (ret) { |
|---|
| 314 | + for (j = 0; j < i; ++j) { |
|---|
| 315 | + struct pnv_ioda_pe *pe = npucomp->pe[j]; |
|---|
| 316 | + |
|---|
| 317 | + if (!pe->table_group.ops->unset_window) |
|---|
| 589 | 318 | continue; |
|---|
| 590 | 319 | |
|---|
| 591 | | - nphb = pci_bus_to_host(npdev->bus)->private_data; |
|---|
| 592 | | - npu = &nphb->npu; |
|---|
| 593 | | - mmio_atsd_reg[i].npu = npu; |
|---|
| 594 | | - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); |
|---|
| 595 | | - while (mmio_atsd_reg[i].reg < 0) { |
|---|
| 596 | | - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); |
|---|
| 597 | | - cpu_relax(); |
|---|
| 598 | | - } |
|---|
| 599 | | - break; |
|---|
| 320 | + ret = pe->table_group.ops->unset_window( |
|---|
| 321 | + &pe->table_group, num); |
|---|
| 322 | + if (ret) |
|---|
| 323 | + break; |
|---|
| 600 | 324 | } |
|---|
| 325 | + } else { |
|---|
| 326 | + table_group->tables[num] = iommu_tce_table_get(tbl); |
|---|
| 601 | 327 | } |
|---|
| 328 | + |
|---|
| 329 | + return ret; |
|---|
| 602 | 330 | } |
|---|
| 603 | 331 | |
|---|
| 604 | | -/* |
|---|
| 605 | | - * Release previously acquired ATSD registers. To avoid deadlocks the registers |
|---|
| 606 | | - * must be released in the same order they were acquired above in |
|---|
| 607 | | - * acquire_atsd_reg. |
|---|
| 608 | | - */ |
|---|
| 609 | | -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) |
|---|
| 332 | +static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group, |
|---|
| 333 | + int num) |
|---|
| 334 | +{ |
|---|
| 335 | + int i, j; |
|---|
| 336 | + long ret = 0; |
|---|
| 337 | + struct npu_comp *npucomp = container_of(table_group, struct npu_comp, |
|---|
| 338 | + table_group); |
|---|
| 339 | + |
|---|
| 340 | + for (i = 0; i < npucomp->pe_num; ++i) { |
|---|
| 341 | + struct pnv_ioda_pe *pe = npucomp->pe[i]; |
|---|
| 342 | + |
|---|
| 343 | + WARN_ON(npucomp->table_group.tables[num] != |
|---|
| 344 | + table_group->tables[num]); |
|---|
| 345 | + if (!npucomp->table_group.tables[num]) |
|---|
| 346 | + continue; |
|---|
| 347 | + |
|---|
| 348 | + if (!pe->table_group.ops->unset_window) |
|---|
| 349 | + continue; |
|---|
| 350 | + |
|---|
| 351 | + ret = pe->table_group.ops->unset_window(&pe->table_group, num); |
|---|
| 352 | + if (ret) |
|---|
| 353 | + break; |
|---|
| 354 | + } |
|---|
| 355 | + |
|---|
| 356 | + if (ret) { |
|---|
| 357 | + for (j = 0; j < i; ++j) { |
|---|
| 358 | + struct pnv_ioda_pe *pe = npucomp->pe[j]; |
|---|
| 359 | + |
|---|
| 360 | + if (!npucomp->table_group.tables[num]) |
|---|
| 361 | + continue; |
|---|
| 362 | + |
|---|
| 363 | + if (!pe->table_group.ops->set_window) |
|---|
| 364 | + continue; |
|---|
| 365 | + |
|---|
| 366 | + ret = pe->table_group.ops->set_window(&pe->table_group, |
|---|
| 367 | + num, table_group->tables[num]); |
|---|
| 368 | + if (ret) |
|---|
| 369 | + break; |
|---|
| 370 | + } |
|---|
| 371 | + } else if (table_group->tables[num]) { |
|---|
| 372 | + iommu_tce_table_put(table_group->tables[num]); |
|---|
| 373 | + table_group->tables[num] = NULL; |
|---|
| 374 | + } |
|---|
| 375 | + |
|---|
| 376 | + return ret; |
|---|
| 377 | +} |
|---|
| 378 | + |
|---|
| 379 | +static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) |
|---|
| 610 | 380 | { |
|---|
| 611 | 381 | int i; |
|---|
| 382 | + struct npu_comp *npucomp = container_of(table_group, struct npu_comp, |
|---|
| 383 | + table_group); |
|---|
| 612 | 384 | |
|---|
| 613 | | - for (i = 0; i <= max_npu2_index; i++) { |
|---|
| 614 | | - /* |
|---|
| 615 | | - * We can't rely on npu_context->npdev[][] being the same here |
|---|
| 616 | | - * as when acquire_atsd_reg() was called, hence we use the |
|---|
| 617 | | - * values stored in mmio_atsd_reg during the acquire phase |
|---|
| 618 | | - * rather than re-reading npdev[][]. |
|---|
| 619 | | - */ |
|---|
| 620 | | - if (mmio_atsd_reg[i].reg < 0) |
|---|
| 385 | + for (i = 0; i < npucomp->pe_num; ++i) { |
|---|
| 386 | + struct pnv_ioda_pe *pe = npucomp->pe[i]; |
|---|
| 387 | + |
|---|
| 388 | + if (!pe->table_group.ops || |
|---|
| 389 | + !pe->table_group.ops->take_ownership) |
|---|
| 621 | 390 | continue; |
|---|
| 622 | | - |
|---|
| 623 | | - put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); |
|---|
| 391 | + pe->table_group.ops->take_ownership(&pe->table_group); |
|---|
| 624 | 392 | } |
|---|
| 625 | 393 | } |
|---|
| 626 | 394 | |
|---|
| 627 | | -/* |
|---|
| 628 | | - * Invalidate either a single address or an entire PID depending on |
|---|
| 629 | | - * the value of va. |
|---|
| 630 | | - */ |
|---|
| 631 | | -static void mmio_invalidate(struct npu_context *npu_context, int va, |
|---|
| 632 | | - unsigned long address, bool flush) |
|---|
| 395 | +static void pnv_npu_peers_release_ownership( |
|---|
| 396 | + struct iommu_table_group *table_group) |
|---|
| 633 | 397 | { |
|---|
| 634 | | - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; |
|---|
| 635 | | - unsigned long pid = npu_context->mm->context.id; |
|---|
| 398 | + int i; |
|---|
| 399 | + struct npu_comp *npucomp = container_of(table_group, struct npu_comp, |
|---|
| 400 | + table_group); |
|---|
| 636 | 401 | |
|---|
| 637 | | - if (npu_context->nmmu_flush) |
|---|
| 638 | | - /* |
|---|
| 639 | | - * Unfortunately the nest mmu does not support flushing specific |
|---|
| 640 | | - * addresses so we have to flush the whole mm once before |
|---|
| 641 | | - * shooting down the GPU translation. |
|---|
| 642 | | - */ |
|---|
| 643 | | - flush_all_mm(npu_context->mm); |
|---|
| 402 | + for (i = 0; i < npucomp->pe_num; ++i) { |
|---|
| 403 | + struct pnv_ioda_pe *pe = npucomp->pe[i]; |
|---|
| 644 | 404 | |
|---|
| 645 | | - /* |
|---|
| 646 | | - * Loop over all the NPUs this process is active on and launch |
|---|
| 647 | | - * an invalidate. |
|---|
| 648 | | - */ |
|---|
| 649 | | - acquire_atsd_reg(npu_context, mmio_atsd_reg); |
|---|
| 650 | | - if (va) |
|---|
| 651 | | - mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); |
|---|
| 652 | | - else |
|---|
| 653 | | - mmio_invalidate_pid(mmio_atsd_reg, pid, flush); |
|---|
| 654 | | - |
|---|
| 655 | | - mmio_invalidate_wait(mmio_atsd_reg); |
|---|
| 656 | | - if (flush) { |
|---|
| 657 | | - /* |
|---|
| 658 | | - * The GPU requires two flush ATSDs to ensure all entries have |
|---|
| 659 | | - * been flushed. We use PID 0 as it will never be used for a |
|---|
| 660 | | - * process on the GPU. |
|---|
| 661 | | - */ |
|---|
| 662 | | - mmio_invalidate_pid(mmio_atsd_reg, 0, true); |
|---|
| 663 | | - mmio_invalidate_wait(mmio_atsd_reg); |
|---|
| 664 | | - mmio_invalidate_pid(mmio_atsd_reg, 0, true); |
|---|
| 665 | | - mmio_invalidate_wait(mmio_atsd_reg); |
|---|
| 666 | | - } |
|---|
| 667 | | - release_atsd_reg(mmio_atsd_reg); |
|---|
| 668 | | -} |
|---|
| 669 | | - |
|---|
| 670 | | -static void pnv_npu2_mn_release(struct mmu_notifier *mn, |
|---|
| 671 | | - struct mm_struct *mm) |
|---|
| 672 | | -{ |
|---|
| 673 | | - struct npu_context *npu_context = mn_to_npu_context(mn); |
|---|
| 674 | | - |
|---|
| 675 | | - /* Call into device driver to stop requests to the NMMU */ |
|---|
| 676 | | - if (npu_context->release_cb) |
|---|
| 677 | | - npu_context->release_cb(npu_context, npu_context->priv); |
|---|
| 678 | | - |
|---|
| 679 | | - /* |
|---|
| 680 | | - * There should be no more translation requests for this PID, but we |
|---|
| 681 | | - * need to ensure any entries for it are removed from the TLB. |
|---|
| 682 | | - */ |
|---|
| 683 | | - mmio_invalidate(npu_context, 0, 0, true); |
|---|
| 684 | | -} |
|---|
| 685 | | - |
|---|
| 686 | | -static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, |
|---|
| 687 | | - struct mm_struct *mm, |
|---|
| 688 | | - unsigned long address, |
|---|
| 689 | | - pte_t pte) |
|---|
| 690 | | -{ |
|---|
| 691 | | - struct npu_context *npu_context = mn_to_npu_context(mn); |
|---|
| 692 | | - |
|---|
| 693 | | - mmio_invalidate(npu_context, 1, address, true); |
|---|
| 694 | | -} |
|---|
| 695 | | - |
|---|
| 696 | | -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, |
|---|
| 697 | | - struct mm_struct *mm, |
|---|
| 698 | | - unsigned long start, unsigned long end) |
|---|
| 699 | | -{ |
|---|
| 700 | | - struct npu_context *npu_context = mn_to_npu_context(mn); |
|---|
| 701 | | - unsigned long address; |
|---|
| 702 | | - |
|---|
| 703 | | - if (end - start > atsd_threshold) { |
|---|
| 704 | | - /* |
|---|
| 705 | | - * Just invalidate the entire PID if the address range is too |
|---|
| 706 | | - * large. |
|---|
| 707 | | - */ |
|---|
| 708 | | - mmio_invalidate(npu_context, 0, 0, true); |
|---|
| 709 | | - } else { |
|---|
| 710 | | - for (address = start; address < end; address += PAGE_SIZE) |
|---|
| 711 | | - mmio_invalidate(npu_context, 1, address, false); |
|---|
| 712 | | - |
|---|
| 713 | | - /* Do the flush only on the final addess == end */ |
|---|
| 714 | | - mmio_invalidate(npu_context, 1, address, true); |
|---|
| 405 | + if (!pe->table_group.ops || |
|---|
| 406 | + !pe->table_group.ops->release_ownership) |
|---|
| 407 | + continue; |
|---|
| 408 | + pe->table_group.ops->release_ownership(&pe->table_group); |
|---|
| 715 | 409 | } |
|---|
| 716 | 410 | } |
|---|
| 717 | 411 | |
|---|
| 718 | | -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { |
|---|
| 719 | | - .release = pnv_npu2_mn_release, |
|---|
| 720 | | - .change_pte = pnv_npu2_mn_change_pte, |
|---|
| 721 | | - .invalidate_range = pnv_npu2_mn_invalidate_range, |
|---|
| 412 | +static struct iommu_table_group_ops pnv_npu_peers_ops = { |
|---|
| 413 | + .get_table_size = pnv_pci_ioda2_get_table_size, |
|---|
| 414 | + .create_table = pnv_npu_peers_create_table_userspace, |
|---|
| 415 | + .set_window = pnv_npu_peers_set_window, |
|---|
| 416 | + .unset_window = pnv_npu_peers_unset_window, |
|---|
| 417 | + .take_ownership = pnv_npu_peers_take_ownership, |
|---|
| 418 | + .release_ownership = pnv_npu_peers_release_ownership, |
|---|
| 722 | 419 | }; |
|---|
| 723 | 420 | |
|---|
| 724 | | -/* |
|---|
| 725 | | - * Call into OPAL to setup the nmmu context for the current task in |
|---|
| 726 | | - * the NPU. This must be called to setup the context tables before the |
|---|
| 727 | | - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device. |
|---|
| 728 | | - * |
|---|
| 729 | | - * A release callback should be registered to allow a device driver to |
|---|
| 730 | | - * be notified that it should not launch any new translation requests |
|---|
| 731 | | - * as the final TLB invalidate is about to occur. |
|---|
| 732 | | - * |
|---|
| 733 | | - * Returns an error if there no contexts are currently available or a |
|---|
| 734 | | - * npu_context which should be passed to pnv_npu2_handle_fault(). |
|---|
| 735 | | - * |
|---|
| 736 | | - * mmap_sem must be held in write mode and must not be called from interrupt |
|---|
| 737 | | - * context. |
|---|
| 738 | | - */ |
|---|
| 739 | | -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, |
|---|
| 740 | | - unsigned long flags, |
|---|
| 741 | | - void (*cb)(struct npu_context *, void *), |
|---|
| 742 | | - void *priv) |
|---|
| 421 | +static void pnv_comp_attach_table_group(struct npu_comp *npucomp, |
|---|
| 422 | + struct pnv_ioda_pe *pe) |
|---|
| 743 | 423 | { |
|---|
| 744 | | - int rc; |
|---|
| 745 | | - u32 nvlink_index; |
|---|
| 746 | | - struct device_node *nvlink_dn; |
|---|
| 747 | | - struct mm_struct *mm = current->mm; |
|---|
| 748 | | - struct pnv_phb *nphb; |
|---|
| 749 | | - struct npu *npu; |
|---|
| 750 | | - struct npu_context *npu_context; |
|---|
| 424 | + if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM)) |
|---|
| 425 | + return; |
|---|
| 751 | 426 | |
|---|
| 752 | | - /* |
|---|
| 753 | | - * At present we don't support GPUs connected to multiple NPUs and I'm |
|---|
| 754 | | - * not sure the hardware does either. |
|---|
| 755 | | - */ |
|---|
| 756 | | - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); |
|---|
| 427 | + npucomp->pe[npucomp->pe_num] = pe; |
|---|
| 428 | + ++npucomp->pe_num; |
|---|
| 429 | +} |
|---|
| 757 | 430 | |
|---|
| 758 | | - if (!firmware_has_feature(FW_FEATURE_OPAL)) |
|---|
| 759 | | - return ERR_PTR(-ENODEV); |
|---|
| 431 | +static struct iommu_table_group * |
|---|
| 432 | + pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) |
|---|
| 433 | +{ |
|---|
| 434 | + struct iommu_table_group *compound_group; |
|---|
| 435 | + struct npu_comp *npucomp; |
|---|
| 436 | + struct pci_dev *gpdev = NULL; |
|---|
| 437 | + struct pci_controller *hose; |
|---|
| 438 | + struct pci_dev *npdev = NULL; |
|---|
| 439 | + |
|---|
| 440 | + list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) { |
|---|
| 441 | + npdev = pnv_pci_get_npu_dev(gpdev, 0); |
|---|
| 442 | + if (npdev) |
|---|
| 443 | + break; |
|---|
| 444 | + } |
|---|
| 760 | 445 | |
|---|
| 761 | 446 | if (!npdev) |
|---|
| 762 | | - /* No nvlink associated with this GPU device */ |
|---|
| 763 | | - return ERR_PTR(-ENODEV); |
|---|
| 447 | + /* It is not an NPU attached device, skip */ |
|---|
| 448 | + return NULL; |
|---|
| 764 | 449 | |
|---|
| 765 | | - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); |
|---|
| 766 | | - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", |
|---|
| 767 | | - &nvlink_index))) |
|---|
| 768 | | - return ERR_PTR(-ENODEV); |
|---|
| 450 | + hose = pci_bus_to_host(npdev->bus); |
|---|
| 769 | 451 | |
|---|
| 770 | | - if (!mm || mm->context.id == 0) { |
|---|
| 771 | | - /* |
|---|
| 772 | | - * Kernel thread contexts are not supported and context id 0 is |
|---|
| 773 | | - * reserved on the GPU. |
|---|
| 774 | | - */ |
|---|
| 775 | | - return ERR_PTR(-EINVAL); |
|---|
| 452 | + if (hose->npu) { |
|---|
| 453 | + /* P9 case: compound group is per-NPU (all gpus, all links) */ |
|---|
| 454 | + npucomp = &hose->npu->npucomp; |
|---|
| 455 | + } else { |
|---|
| 456 | + /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */ |
|---|
| 457 | + npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL); |
|---|
| 776 | 458 | } |
|---|
| 777 | 459 | |
|---|
| 778 | | - nphb = pci_bus_to_host(npdev->bus)->private_data; |
|---|
| 779 | | - npu = &nphb->npu; |
|---|
| 460 | + compound_group = &npucomp->table_group; |
|---|
| 461 | + if (!compound_group->group) { |
|---|
| 462 | + compound_group->ops = &pnv_npu_peers_ops; |
|---|
| 463 | + iommu_register_group(compound_group, hose->global_number, |
|---|
| 464 | + pe->pe_number); |
|---|
| 780 | 465 | |
|---|
| 781 | | - /* |
|---|
| 782 | | - * Setup the NPU context table for a particular GPU. These need to be |
|---|
| 783 | | - * per-GPU as we need the tables to filter ATSDs when there are no |
|---|
| 784 | | - * active contexts on a particular GPU. It is safe for these to be |
|---|
| 785 | | - * called concurrently with destroy as the OPAL call takes appropriate |
|---|
| 786 | | - * locks and refcounts on init/destroy. |
|---|
| 787 | | - */ |
|---|
| 788 | | - rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, |
|---|
| 789 | | - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); |
|---|
| 790 | | - if (rc < 0) |
|---|
| 791 | | - return ERR_PTR(-ENOSPC); |
|---|
| 792 | | - |
|---|
| 793 | | - /* |
|---|
| 794 | | - * We store the npu pci device so we can more easily get at the |
|---|
| 795 | | - * associated npus. |
|---|
| 796 | | - */ |
|---|
| 797 | | - spin_lock(&npu_context_lock); |
|---|
| 798 | | - npu_context = mm->context.npu_context; |
|---|
| 799 | | - if (npu_context) { |
|---|
| 800 | | - if (npu_context->release_cb != cb || |
|---|
| 801 | | - npu_context->priv != priv) { |
|---|
| 802 | | - spin_unlock(&npu_context_lock); |
|---|
| 803 | | - opal_npu_destroy_context(nphb->opal_id, mm->context.id, |
|---|
| 804 | | - PCI_DEVID(gpdev->bus->number, |
|---|
| 805 | | - gpdev->devfn)); |
|---|
| 806 | | - return ERR_PTR(-EINVAL); |
|---|
| 807 | | - } |
|---|
| 808 | | - |
|---|
| 809 | | - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); |
|---|
| 810 | | - } |
|---|
| 811 | | - spin_unlock(&npu_context_lock); |
|---|
| 812 | | - |
|---|
| 813 | | - if (!npu_context) { |
|---|
| 814 | | - /* |
|---|
| 815 | | - * We can set up these fields without holding the |
|---|
| 816 | | - * npu_context_lock as the npu_context hasn't been returned to |
|---|
| 817 | | - * the caller meaning it can't be destroyed. Parallel allocation |
|---|
| 818 | | - * is protected against by mmap_sem. |
|---|
| 819 | | - */ |
|---|
| 820 | | - rc = -ENOMEM; |
|---|
| 821 | | - npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); |
|---|
| 822 | | - if (npu_context) { |
|---|
| 823 | | - kref_init(&npu_context->kref); |
|---|
| 824 | | - npu_context->mm = mm; |
|---|
| 825 | | - npu_context->mn.ops = &nv_nmmu_notifier_ops; |
|---|
| 826 | | - rc = __mmu_notifier_register(&npu_context->mn, mm); |
|---|
| 827 | | - } |
|---|
| 828 | | - |
|---|
| 829 | | - if (rc) { |
|---|
| 830 | | - kfree(npu_context); |
|---|
| 831 | | - opal_npu_destroy_context(nphb->opal_id, mm->context.id, |
|---|
| 832 | | - PCI_DEVID(gpdev->bus->number, |
|---|
| 833 | | - gpdev->devfn)); |
|---|
| 834 | | - return ERR_PTR(rc); |
|---|
| 835 | | - } |
|---|
| 836 | | - |
|---|
| 837 | | - mm->context.npu_context = npu_context; |
|---|
| 466 | + /* Steal capabilities from a GPU PE */ |
|---|
| 467 | + compound_group->max_dynamic_windows_supported = |
|---|
| 468 | + pe->table_group.max_dynamic_windows_supported; |
|---|
| 469 | + compound_group->tce32_start = pe->table_group.tce32_start; |
|---|
| 470 | + compound_group->tce32_size = pe->table_group.tce32_size; |
|---|
| 471 | + compound_group->max_levels = pe->table_group.max_levels; |
|---|
| 472 | + if (!compound_group->pgsizes) |
|---|
| 473 | + compound_group->pgsizes = pe->table_group.pgsizes; |
|---|
| 838 | 474 | } |
|---|
| 839 | 475 | |
|---|
| 840 | | - npu_context->release_cb = cb; |
|---|
| 841 | | - npu_context->priv = priv; |
|---|
| 842 | | - |
|---|
| 843 | 476 | /* |
|---|
| 844 | | - * npdev is a pci_dev pointer setup by the PCI code. We assign it to |
|---|
| 845 | | - * npdev[][] to indicate to the mmu notifiers that an invalidation |
|---|
| 846 | | - * should also be sent over this nvlink. The notifiers don't use any |
|---|
| 847 | | - * other fields in npu_context, so we just need to ensure that when they |
|---|
| 848 | | - * deference npu_context->npdev[][] it is either a valid pointer or |
|---|
| 849 | | - * NULL. |
|---|
| 477 | + * The gpu would have been added to the iommu group that's created |
|---|
| 478 | + * for the PE. Pull it out now. |
|---|
| 850 | 479 | */ |
|---|
| 851 | | - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); |
|---|
| 480 | + iommu_del_device(&gpdev->dev); |
|---|
| 852 | 481 | |
|---|
| 853 | | - if (!nphb->npu.nmmu_flush) { |
|---|
| 854 | | - /* |
|---|
| 855 | | - * If we're not explicitly flushing ourselves we need to mark |
|---|
| 856 | | - * the thread for global flushes |
|---|
| 857 | | - */ |
|---|
| 858 | | - npu_context->nmmu_flush = false; |
|---|
| 859 | | - mm_context_add_copro(mm); |
|---|
| 860 | | - } else |
|---|
| 861 | | - npu_context->nmmu_flush = true; |
|---|
| 482 | + /* |
|---|
| 483 | + * I'm not sure this is strictly required, but it's probably a good idea |
|---|
| 484 | + * since the table_group for the PE is going to be attached to the |
|---|
| 485 | + * compound table group. If we leave the PE's iommu group active then |
|---|
| 486 | + * we might have the same table_group being modifiable via two sepeate |
|---|
| 487 | + * iommu groups. |
|---|
| 488 | + */ |
|---|
| 489 | + iommu_group_put(pe->table_group.group); |
|---|
| 862 | 490 | |
|---|
| 863 | | - return npu_context; |
|---|
| 864 | | -} |
|---|
| 865 | | -EXPORT_SYMBOL(pnv_npu2_init_context); |
|---|
| 491 | + /* now put the GPU into the compound group */ |
|---|
| 492 | + pnv_comp_attach_table_group(npucomp, pe); |
|---|
| 493 | + iommu_add_device(compound_group, &gpdev->dev); |
|---|
| 866 | 494 | |
|---|
| 867 | | -static void pnv_npu2_release_context(struct kref *kref) |
|---|
| 868 | | -{ |
|---|
| 869 | | - struct npu_context *npu_context = |
|---|
| 870 | | - container_of(kref, struct npu_context, kref); |
|---|
| 871 | | - |
|---|
| 872 | | - if (!npu_context->nmmu_flush) |
|---|
| 873 | | - mm_context_remove_copro(npu_context->mm); |
|---|
| 874 | | - |
|---|
| 875 | | - npu_context->mm->context.npu_context = NULL; |
|---|
| 495 | + return compound_group; |
|---|
| 876 | 496 | } |
|---|
| 877 | 497 | |
|---|
| 878 | | -/* |
|---|
| 879 | | - * Destroy a context on the given GPU. May free the npu_context if it is no |
|---|
| 880 | | - * longer active on any GPUs. Must not be called from interrupt context. |
|---|
| 881 | | - */ |
|---|
| 882 | | -void pnv_npu2_destroy_context(struct npu_context *npu_context, |
|---|
| 883 | | - struct pci_dev *gpdev) |
|---|
| 498 | +static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) |
|---|
| 884 | 499 | { |
|---|
| 885 | | - int removed; |
|---|
| 886 | | - struct pnv_phb *nphb; |
|---|
| 500 | + struct iommu_table_group *table_group; |
|---|
| 501 | + struct npu_comp *npucomp; |
|---|
| 502 | + struct pci_dev *gpdev = NULL; |
|---|
| 503 | + struct pci_dev *npdev; |
|---|
| 504 | + struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev); |
|---|
| 505 | + |
|---|
| 506 | + WARN_ON(!(pe->flags & PNV_IODA_PE_DEV)); |
|---|
| 507 | + if (!gpe) |
|---|
| 508 | + return NULL; |
|---|
| 509 | + |
|---|
| 510 | + /* |
|---|
| 511 | + * IODA2 bridges get this set up from pci_controller_ops::setup_bridge |
|---|
| 512 | + * but NPU bridges do not have this hook defined so we do it here. |
|---|
| 513 | + * We do not setup other table group parameters as they won't be used |
|---|
| 514 | + * anyway - NVLink bridges are subordinate PEs. |
|---|
| 515 | + */ |
|---|
| 516 | + pe->table_group.ops = &pnv_pci_npu_ops; |
|---|
| 517 | + |
|---|
| 518 | + table_group = iommu_group_get_iommudata( |
|---|
| 519 | + iommu_group_get(&gpdev->dev)); |
|---|
| 520 | + |
|---|
| 521 | + /* |
|---|
| 522 | + * On P9 NPU PHB and PCI PHB support different page sizes, |
|---|
| 523 | + * keep only matching. We expect here that NVLink bridge PE pgsizes is |
|---|
| 524 | + * initialized by the caller. |
|---|
| 525 | + */ |
|---|
| 526 | + table_group->pgsizes &= pe->table_group.pgsizes; |
|---|
| 527 | + npucomp = container_of(table_group, struct npu_comp, table_group); |
|---|
| 528 | + pnv_comp_attach_table_group(npucomp, pe); |
|---|
| 529 | + |
|---|
| 530 | + list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) { |
|---|
| 531 | + struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev); |
|---|
| 532 | + |
|---|
| 533 | + if (gpdevtmp != gpdev) |
|---|
| 534 | + continue; |
|---|
| 535 | + |
|---|
| 536 | + iommu_add_device(table_group, &npdev->dev); |
|---|
| 537 | + } |
|---|
| 538 | + |
|---|
| 539 | + return table_group; |
|---|
| 540 | +} |
|---|
| 541 | + |
|---|
| 542 | +void pnv_pci_npu_setup_iommu_groups(void) |
|---|
| 543 | +{ |
|---|
| 544 | + struct pci_controller *hose; |
|---|
| 545 | + struct pnv_phb *phb; |
|---|
| 546 | + struct pnv_ioda_pe *pe; |
|---|
| 547 | + |
|---|
| 548 | + /* |
|---|
| 549 | + * For non-nvlink devices the IOMMU group is registered when the PE is |
|---|
| 550 | + * configured and devices are added to the group when the per-device |
|---|
| 551 | + * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is |
|---|
| 552 | + * only initialise for "normal" IODA PHBs. |
|---|
| 553 | + * |
|---|
| 554 | + * For NVLink devices we need to ensure the NVLinks and the GPU end up |
|---|
| 555 | + * in the same IOMMU group, so that's handled here. |
|---|
| 556 | + */ |
|---|
| 557 | + list_for_each_entry(hose, &hose_list, list_node) { |
|---|
| 558 | + phb = hose->private_data; |
|---|
| 559 | + |
|---|
| 560 | + if (phb->type == PNV_PHB_IODA2) |
|---|
| 561 | + list_for_each_entry(pe, &phb->ioda.pe_list, list) |
|---|
| 562 | + pnv_try_setup_npu_table_group(pe); |
|---|
| 563 | + } |
|---|
| 564 | + |
|---|
| 565 | + /* |
|---|
| 566 | + * Now we have all PHBs discovered, time to add NPU devices to |
|---|
| 567 | + * the corresponding IOMMU groups. |
|---|
| 568 | + */ |
|---|
| 569 | + list_for_each_entry(hose, &hose_list, list_node) { |
|---|
| 570 | + unsigned long pgsizes; |
|---|
| 571 | + |
|---|
| 572 | + phb = hose->private_data; |
|---|
| 573 | + |
|---|
| 574 | + if (phb->type != PNV_PHB_NPU_NVLINK) |
|---|
| 575 | + continue; |
|---|
| 576 | + |
|---|
| 577 | + pgsizes = pnv_ioda_parse_tce_sizes(phb); |
|---|
| 578 | + list_for_each_entry(pe, &phb->ioda.pe_list, list) { |
|---|
| 579 | + /* |
|---|
| 580 | + * IODA2 bridges get this set up from |
|---|
| 581 | + * pci_controller_ops::setup_bridge but NPU bridges |
|---|
| 582 | + * do not have this hook defined so we do it here. |
|---|
| 583 | + */ |
|---|
| 584 | + pe->table_group.pgsizes = pgsizes; |
|---|
| 585 | + pnv_npu_compound_attach(pe); |
|---|
| 586 | + } |
|---|
| 587 | + } |
|---|
| 588 | +} |
|---|
| 589 | +#endif /* CONFIG_IOMMU_API */ |
|---|
| 590 | + |
|---|
| 591 | +int pnv_npu2_init(struct pci_controller *hose) |
|---|
| 592 | +{ |
|---|
| 593 | + static int npu_index; |
|---|
| 887 | 594 | struct npu *npu; |
|---|
| 888 | | - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); |
|---|
| 889 | | - struct device_node *nvlink_dn; |
|---|
| 890 | | - u32 nvlink_index; |
|---|
| 595 | + int ret; |
|---|
| 891 | 596 | |
|---|
| 892 | | - if (WARN_ON(!npdev)) |
|---|
| 893 | | - return; |
|---|
| 597 | + npu = kzalloc(sizeof(*npu), GFP_KERNEL); |
|---|
| 598 | + if (!npu) |
|---|
| 599 | + return -ENOMEM; |
|---|
| 894 | 600 | |
|---|
| 895 | | - if (!firmware_has_feature(FW_FEATURE_OPAL)) |
|---|
| 896 | | - return; |
|---|
| 897 | | - |
|---|
| 898 | | - nphb = pci_bus_to_host(npdev->bus)->private_data; |
|---|
| 899 | | - npu = &nphb->npu; |
|---|
| 900 | | - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); |
|---|
| 901 | | - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", |
|---|
| 902 | | - &nvlink_index))) |
|---|
| 903 | | - return; |
|---|
| 904 | | - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); |
|---|
| 905 | | - opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, |
|---|
| 906 | | - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); |
|---|
| 907 | | - spin_lock(&npu_context_lock); |
|---|
| 908 | | - removed = kref_put(&npu_context->kref, pnv_npu2_release_context); |
|---|
| 909 | | - spin_unlock(&npu_context_lock); |
|---|
| 910 | | - |
|---|
| 911 | | - /* |
|---|
| 912 | | - * We need to do this outside of pnv_npu2_release_context so that it is |
|---|
| 913 | | - * outside the spinlock as mmu_notifier_destroy uses SRCU. |
|---|
| 914 | | - */ |
|---|
| 915 | | - if (removed) { |
|---|
| 916 | | - mmu_notifier_unregister(&npu_context->mn, |
|---|
| 917 | | - npu_context->mm); |
|---|
| 918 | | - |
|---|
| 919 | | - kfree(npu_context); |
|---|
| 601 | + npu_index++; |
|---|
| 602 | + if (WARN_ON(npu_index >= NV_MAX_NPUS)) { |
|---|
| 603 | + ret = -ENOSPC; |
|---|
| 604 | + goto fail_exit; |
|---|
| 920 | 605 | } |
|---|
| 606 | + npu->index = npu_index; |
|---|
| 607 | + hose->npu = npu; |
|---|
| 921 | 608 | |
|---|
| 609 | + return 0; |
|---|
| 610 | + |
|---|
| 611 | +fail_exit: |
|---|
| 612 | + kfree(npu); |
|---|
| 613 | + return ret; |
|---|
| 922 | 614 | } |
|---|
| 923 | | -EXPORT_SYMBOL(pnv_npu2_destroy_context); |
|---|
| 924 | 615 | |
|---|
| 925 | | -/* |
|---|
| 926 | | - * Assumes mmap_sem is held for the contexts associated mm. |
|---|
| 927 | | - */ |
|---|
| 928 | | -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, |
|---|
| 929 | | - unsigned long *flags, unsigned long *status, int count) |
|---|
| 616 | +int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, |
|---|
| 617 | + unsigned long msr) |
|---|
| 930 | 618 | { |
|---|
| 931 | | - u64 rc = 0, result = 0; |
|---|
| 932 | | - int i, is_write; |
|---|
| 933 | | - struct page *page[1]; |
|---|
| 619 | + int ret; |
|---|
| 620 | + struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); |
|---|
| 621 | + struct pci_controller *hose; |
|---|
| 622 | + struct pnv_phb *nphb; |
|---|
| 934 | 623 | |
|---|
| 935 | | - /* mmap_sem should be held so the struct_mm must be present */ |
|---|
| 936 | | - struct mm_struct *mm = context->mm; |
|---|
| 937 | | - |
|---|
| 938 | | - if (!firmware_has_feature(FW_FEATURE_OPAL)) |
|---|
| 624 | + if (!npdev) |
|---|
| 939 | 625 | return -ENODEV; |
|---|
| 940 | 626 | |
|---|
| 941 | | - WARN_ON(!rwsem_is_locked(&mm->mmap_sem)); |
|---|
| 942 | | - |
|---|
| 943 | | - for (i = 0; i < count; i++) { |
|---|
| 944 | | - is_write = flags[i] & NPU2_WRITE; |
|---|
| 945 | | - rc = get_user_pages_remote(NULL, mm, ea[i], 1, |
|---|
| 946 | | - is_write ? FOLL_WRITE : 0, |
|---|
| 947 | | - page, NULL, NULL); |
|---|
| 948 | | - |
|---|
| 949 | | - /* |
|---|
| 950 | | - * To support virtualised environments we will have to do an |
|---|
| 951 | | - * access to the page to ensure it gets faulted into the |
|---|
| 952 | | - * hypervisor. For the moment virtualisation is not supported in |
|---|
| 953 | | - * other areas so leave the access out. |
|---|
| 954 | | - */ |
|---|
| 955 | | - if (rc != 1) { |
|---|
| 956 | | - status[i] = rc; |
|---|
| 957 | | - result = -EFAULT; |
|---|
| 958 | | - continue; |
|---|
| 959 | | - } |
|---|
| 960 | | - |
|---|
| 961 | | - status[i] = 0; |
|---|
| 962 | | - put_page(page[0]); |
|---|
| 627 | + hose = pci_bus_to_host(npdev->bus); |
|---|
| 628 | + if (hose->npu == NULL) { |
|---|
| 629 | + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); |
|---|
| 630 | + return 0; |
|---|
| 963 | 631 | } |
|---|
| 964 | 632 | |
|---|
| 965 | | - return result; |
|---|
| 966 | | -} |
|---|
| 967 | | -EXPORT_SYMBOL(pnv_npu2_handle_fault); |
|---|
| 633 | + nphb = hose->private_data; |
|---|
| 968 | 634 | |
|---|
| 969 | | -int pnv_npu2_init(struct pnv_phb *phb) |
|---|
| 970 | | -{ |
|---|
| 971 | | - unsigned int i; |
|---|
| 972 | | - u64 mmio_atsd; |
|---|
| 973 | | - struct device_node *dn; |
|---|
| 974 | | - struct pci_dev *gpdev; |
|---|
| 975 | | - static int npu_index; |
|---|
| 976 | | - uint64_t rc = 0; |
|---|
| 977 | | - |
|---|
| 978 | | - if (!atsd_threshold_dentry) { |
|---|
| 979 | | - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", |
|---|
| 980 | | - 0600, powerpc_debugfs_root, &atsd_threshold); |
|---|
| 635 | + dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", |
|---|
| 636 | + nphb->opal_id, lparid); |
|---|
| 637 | + /* |
|---|
| 638 | + * Currently we only support radix and non-zero LPCR only makes sense |
|---|
| 639 | + * for hash tables so skiboot expects the LPCR parameter to be a zero. |
|---|
| 640 | + */ |
|---|
| 641 | + ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid, |
|---|
| 642 | + 0 /* LPCR bits */); |
|---|
| 643 | + if (ret) { |
|---|
| 644 | + dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); |
|---|
| 645 | + return ret; |
|---|
| 981 | 646 | } |
|---|
| 982 | 647 | |
|---|
| 983 | | - phb->npu.nmmu_flush = |
|---|
| 984 | | - of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); |
|---|
| 985 | | - for_each_child_of_node(phb->hose->dn, dn) { |
|---|
| 986 | | - gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); |
|---|
| 987 | | - if (gpdev) { |
|---|
| 988 | | - rc = opal_npu_map_lpar(phb->opal_id, |
|---|
| 989 | | - PCI_DEVID(gpdev->bus->number, gpdev->devfn), |
|---|
| 990 | | - 0, 0); |
|---|
| 991 | | - if (rc) |
|---|
| 992 | | - dev_err(&gpdev->dev, |
|---|
| 993 | | - "Error %lld mapping device to LPAR\n", |
|---|
| 994 | | - rc); |
|---|
| 995 | | - } |
|---|
| 996 | | - } |
|---|
| 997 | | - |
|---|
| 998 | | - for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd", |
|---|
| 999 | | - i, &mmio_atsd); i++) |
|---|
| 1000 | | - phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); |
|---|
| 1001 | | - |
|---|
| 1002 | | - pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i); |
|---|
| 1003 | | - phb->npu.mmio_atsd_count = i; |
|---|
| 1004 | | - phb->npu.mmio_atsd_usage = 0; |
|---|
| 1005 | | - npu_index++; |
|---|
| 1006 | | - if (WARN_ON(npu_index >= NV_MAX_NPUS)) |
|---|
| 1007 | | - return -ENOSPC; |
|---|
| 1008 | | - max_npu2_index = npu_index; |
|---|
| 1009 | | - phb->npu.index = npu_index; |
|---|
| 648 | + dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n", |
|---|
| 649 | + nphb->opal_id, msr); |
|---|
| 650 | + ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr, |
|---|
| 651 | + pci_dev_id(gpdev)); |
|---|
| 652 | + if (ret < 0) |
|---|
| 653 | + dev_err(&gpdev->dev, "Failed to init context: %d\n", ret); |
|---|
| 654 | + else |
|---|
| 655 | + ret = 0; |
|---|
| 1010 | 656 | |
|---|
| 1011 | 657 | return 0; |
|---|
| 1012 | 658 | } |
|---|
| 659 | +EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev); |
|---|
| 660 | + |
|---|
| 661 | +void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr) |
|---|
| 662 | +{ |
|---|
| 663 | + struct pci_dev *gpdev; |
|---|
| 664 | + |
|---|
| 665 | + list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list) |
|---|
| 666 | + pnv_npu2_map_lpar_dev(gpdev, 0, msr); |
|---|
| 667 | +} |
|---|
| 668 | + |
|---|
| 669 | +int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) |
|---|
| 670 | +{ |
|---|
| 671 | + int ret; |
|---|
| 672 | + struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); |
|---|
| 673 | + struct pci_controller *hose; |
|---|
| 674 | + struct pnv_phb *nphb; |
|---|
| 675 | + |
|---|
| 676 | + if (!npdev) |
|---|
| 677 | + return -ENODEV; |
|---|
| 678 | + |
|---|
| 679 | + hose = pci_bus_to_host(npdev->bus); |
|---|
| 680 | + if (hose->npu == NULL) { |
|---|
| 681 | + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); |
|---|
| 682 | + return 0; |
|---|
| 683 | + } |
|---|
| 684 | + |
|---|
| 685 | + nphb = hose->private_data; |
|---|
| 686 | + |
|---|
| 687 | + dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", |
|---|
| 688 | + nphb->opal_id); |
|---|
| 689 | + ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/, |
|---|
| 690 | + pci_dev_id(gpdev)); |
|---|
| 691 | + if (ret < 0) { |
|---|
| 692 | + dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret); |
|---|
| 693 | + return ret; |
|---|
| 694 | + } |
|---|
| 695 | + |
|---|
| 696 | + /* Set LPID to 0 anyway, just to be safe */ |
|---|
| 697 | + dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id); |
|---|
| 698 | + ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/, |
|---|
| 699 | + 0 /* LPCR bits */); |
|---|
| 700 | + if (ret) |
|---|
| 701 | + dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); |
|---|
| 702 | + |
|---|
| 703 | + return ret; |
|---|
| 704 | +} |
|---|
| 705 | +EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev); |
|---|