hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/powerpc/platforms/powernv/npu-dma.c
....@@ -1,58 +1,25 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * This file implements the DMA operations for NVLink devices. The NPU
34 * devices all point to the same iommu table as the parent PCI device.
45 *
56 * Copyright Alistair Popple, IBM Corporation 2015.
6
- *
7
- * This program is free software; you can redistribute it and/or
8
- * modify it under the terms of version 2 of the GNU General Public
9
- * License as published by the Free Software Foundation.
107 */
118
12
-#include <linux/slab.h>
139 #include <linux/mmu_notifier.h>
1410 #include <linux/mmu_context.h>
1511 #include <linux/of.h>
16
-#include <linux/export.h>
1712 #include <linux/pci.h>
1813 #include <linux/memblock.h>
19
-#include <linux/iommu.h>
20
-#include <linux/debugfs.h>
14
+#include <linux/sizes.h>
2115
2216 #include <asm/debugfs.h>
23
-#include <asm/tlb.h>
2417 #include <asm/powernv.h>
25
-#include <asm/reg.h>
26
-#include <asm/opal.h>
27
-#include <asm/io.h>
28
-#include <asm/iommu.h>
29
-#include <asm/pnv-pci.h>
30
-#include <asm/msi_bitmap.h>
18
+#include <asm/ppc-pci.h>
3119 #include <asm/opal.h>
3220
33
-#include "powernv.h"
3421 #include "pci.h"
3522
36
-#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
37
-
38
-/*
39
- * spinlock to protect initialisation of an npu_context for a particular
40
- * mm_struct.
41
- */
42
-static DEFINE_SPINLOCK(npu_context_lock);
43
-
44
-/*
45
- * When an address shootdown range exceeds this threshold we invalidate the
46
- * entire TLB on the GPU for the given PID rather than each specific address in
47
- * the range.
48
- */
49
-static uint64_t atsd_threshold = 2 * 1024 * 1024;
50
-static struct dentry *atsd_threshold_dentry;
51
-
52
-/*
53
- * Other types of TCE cache invalidation are not functional in the
54
- * hardware.
55
- */
5623 static struct pci_dev *get_pci_dev(struct device_node *dn)
5724 {
5825 struct pci_dn *pdn = PCI_DN(dn);
....@@ -123,63 +90,7 @@
12390 }
12491 EXPORT_SYMBOL(pnv_pci_get_npu_dev);
12592
126
-#define NPU_DMA_OP_UNSUPPORTED() \
127
- dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
128
- __func__)
129
-
130
-static void *dma_npu_alloc(struct device *dev, size_t size,
131
- dma_addr_t *dma_handle, gfp_t flag,
132
- unsigned long attrs)
133
-{
134
- NPU_DMA_OP_UNSUPPORTED();
135
- return NULL;
136
-}
137
-
138
-static void dma_npu_free(struct device *dev, size_t size,
139
- void *vaddr, dma_addr_t dma_handle,
140
- unsigned long attrs)
141
-{
142
- NPU_DMA_OP_UNSUPPORTED();
143
-}
144
-
145
-static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
146
- unsigned long offset, size_t size,
147
- enum dma_data_direction direction,
148
- unsigned long attrs)
149
-{
150
- NPU_DMA_OP_UNSUPPORTED();
151
- return 0;
152
-}
153
-
154
-static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
155
- int nelems, enum dma_data_direction direction,
156
- unsigned long attrs)
157
-{
158
- NPU_DMA_OP_UNSUPPORTED();
159
- return 0;
160
-}
161
-
162
-static int dma_npu_dma_supported(struct device *dev, u64 mask)
163
-{
164
- NPU_DMA_OP_UNSUPPORTED();
165
- return 0;
166
-}
167
-
168
-static u64 dma_npu_get_required_mask(struct device *dev)
169
-{
170
- NPU_DMA_OP_UNSUPPORTED();
171
- return 0;
172
-}
173
-
174
-static const struct dma_map_ops dma_npu_ops = {
175
- .map_page = dma_npu_map_page,
176
- .map_sg = dma_npu_map_sg,
177
- .alloc = dma_npu_alloc,
178
- .free = dma_npu_free,
179
- .dma_supported = dma_npu_dma_supported,
180
- .get_required_mask = dma_npu_get_required_mask,
181
-};
182
-
93
+#ifdef CONFIG_IOMMU_API
18394 /*
18495 * Returns the PE assoicated with the PCI device of the given
18596 * NPU. Returns the linked pci device if pci_dev != NULL.
....@@ -211,15 +122,25 @@
211122 return pe;
212123 }
213124
214
-long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
125
+static long pnv_npu_unset_window(struct iommu_table_group *table_group,
126
+ int num);
127
+
128
+static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
215129 struct iommu_table *tbl)
216130 {
131
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
132
+ table_group);
217133 struct pnv_phb *phb = npe->phb;
218134 int64_t rc;
219135 const unsigned long size = tbl->it_indirect_levels ?
220136 tbl->it_level_size : tbl->it_size;
221137 const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
222138 const __u64 win_size = tbl->it_size << tbl->it_page_shift;
139
+ int num2 = (num == 0) ? 1 : 0;
140
+
141
+ /* NPU has just one TVE so if there is another table, remove it first */
142
+ if (npe->table_group.tables[num2])
143
+ pnv_npu_unset_window(&npe->table_group, num2);
223144
224145 pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
225146 start_addr, start_addr + win_size - 1,
....@@ -245,10 +166,15 @@
245166 return 0;
246167 }
247168
248
-long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
169
+static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
249170 {
171
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
172
+ table_group);
250173 struct pnv_phb *phb = npe->phb;
251174 int64_t rc;
175
+
176
+ if (!npe->table_group.tables[num])
177
+ return 0;
252178
253179 pe_info(npe, "Removing DMA window\n");
254180
....@@ -268,108 +194,14 @@
268194 return 0;
269195 }
270196
271
-/*
272
- * Enables 32 bit DMA on NPU.
273
- */
274
-static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
275
-{
276
- struct pci_dev *gpdev;
277
- struct pnv_ioda_pe *gpe;
278
- int64_t rc;
279
-
280
- /*
281
- * Find the assoicated PCI devices and get the dma window
282
- * information from there.
283
- */
284
- if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
285
- return;
286
-
287
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
288
- if (!gpe)
289
- return;
290
-
291
- rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
292
-
293
- /*
294
- * We don't initialise npu_pe->tce32_table as we always use
295
- * dma_npu_ops which are nops.
296
- */
297
- set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
298
-}
299
-
300
-/*
301
- * Enables bypass mode on the NPU. The NPU only supports one
302
- * window per link, so bypass needs to be explicitly enabled or
303
- * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
304
- * active at the same time.
305
- */
306
-static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
307
-{
308
- struct pnv_phb *phb = npe->phb;
309
- int64_t rc = 0;
310
- phys_addr_t top = memblock_end_of_DRAM();
311
-
312
- if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
313
- return -EINVAL;
314
-
315
- rc = pnv_npu_unset_window(npe, 0);
316
- if (rc != OPAL_SUCCESS)
317
- return rc;
318
-
319
- /* Enable the bypass window */
320
-
321
- top = roundup_pow_of_two(top);
322
- dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n",
323
- npe->pe_number);
324
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
325
- npe->pe_number, npe->pe_number,
326
- 0 /* bypass base */, top);
327
-
328
- if (rc == OPAL_SUCCESS)
329
- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
330
-
331
- return rc;
332
-}
333
-
334
-void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
335
-{
336
- int i;
337
- struct pnv_phb *phb;
338
- struct pci_dn *pdn;
339
- struct pnv_ioda_pe *npe;
340
- struct pci_dev *npdev;
341
-
342
- for (i = 0; ; ++i) {
343
- npdev = pnv_pci_get_npu_dev(gpdev, i);
344
-
345
- if (!npdev)
346
- break;
347
-
348
- pdn = pci_get_pdn(npdev);
349
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
350
- return;
351
-
352
- phb = pci_bus_to_host(npdev->bus)->private_data;
353
-
354
- /* We only do bypass if it's enabled on the linked device */
355
- npe = &phb->ioda.pe_array[pdn->pe_number];
356
-
357
- if (bypass) {
358
- dev_info(&npdev->dev,
359
- "Using 64-bit DMA iommu bypass\n");
360
- pnv_npu_dma_set_bypass(npe);
361
- } else {
362
- dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
363
- pnv_npu_dma_set_32(npe);
364
- }
365
- }
366
-}
367
-
368197 /* Switch ownership from platform code to external user (e.g. VFIO) */
369
-void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
198
+static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
370199 {
200
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
201
+ table_group);
371202 struct pnv_phb *phb = npe->phb;
372203 int64_t rc;
204
+ struct pci_dev *gpdev = NULL;
373205
374206 /*
375207 * Note: NPU has just a single TVE in the hardware which means that
....@@ -378,7 +210,7 @@
378210 * if it was enabled at the moment of ownership change.
379211 */
380212 if (npe->table_group.tables[0]) {
381
- pnv_npu_unset_window(npe, 0);
213
+ pnv_npu_unset_window(&npe->table_group, 0);
382214 return;
383215 }
384216
....@@ -391,622 +223,483 @@
391223 return;
392224 }
393225 pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
226
+
227
+ get_gpu_pci_dev_and_pe(npe, &gpdev);
228
+ if (gpdev)
229
+ pnv_npu2_unmap_lpar_dev(gpdev);
394230 }
395231
396
-struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
232
+static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
397233 {
398
- struct pnv_phb *phb = npe->phb;
399
- struct pci_bus *pbus = phb->hose->bus;
400
- struct pci_dev *npdev, *gpdev = NULL, *gptmp;
401
- struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
234
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
235
+ table_group);
236
+ struct pci_dev *gpdev = NULL;
402237
403
- if (!gpe || !gpdev)
404
- return NULL;
405
-
406
- list_for_each_entry(npdev, &pbus->devices, bus_list) {
407
- gptmp = pnv_pci_get_gpu_dev(npdev);
408
-
409
- if (gptmp != gpdev)
410
- continue;
411
-
412
- pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
413
- iommu_group_add_device(gpe->table_group.group, &npdev->dev);
414
- }
415
-
416
- return gpe;
238
+ get_gpu_pci_dev_and_pe(npe, &gpdev);
239
+ if (gpdev)
240
+ pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
417241 }
418242
419
-/* Maximum number of nvlinks per npu */
420
-#define NV_MAX_LINKS 6
421
-
422
-/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
423
-static int max_npu2_index;
424
-
425
-struct npu_context {
426
- struct mm_struct *mm;
427
- struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
428
- struct mmu_notifier mn;
429
- struct kref kref;
430
- bool nmmu_flush;
431
-
432
- /* Callback to stop translation requests on a given GPU */
433
- void (*release_cb)(struct npu_context *context, void *priv);
434
-
435
- /*
436
- * Private pointer passed to the above callback for usage by
437
- * device drivers.
438
- */
439
- void *priv;
243
+static struct iommu_table_group_ops pnv_pci_npu_ops = {
244
+ .set_window = pnv_npu_set_window,
245
+ .unset_window = pnv_npu_unset_window,
246
+ .take_ownership = pnv_npu_take_ownership,
247
+ .release_ownership = pnv_npu_release_ownership,
440248 };
441
-
442
-struct mmio_atsd_reg {
443
- struct npu *npu;
444
- int reg;
445
-};
249
+#endif /* !CONFIG_IOMMU_API */
446250
447251 /*
448
- * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
449
- * if none are available.
252
+ * NPU2 ATS
450253 */
451
-static int get_mmio_atsd_reg(struct npu *npu)
452
-{
453
- int i;
454
-
455
- for (i = 0; i < npu->mmio_atsd_count; i++) {
456
- if (!test_bit(i, &npu->mmio_atsd_usage))
457
- if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
458
- return i;
459
- }
460
-
461
- return -ENOSPC;
462
-}
463
-
464
-static void put_mmio_atsd_reg(struct npu *npu, int reg)
465
-{
466
- clear_bit_unlock(reg, &npu->mmio_atsd_usage);
467
-}
468
-
469
-/* MMIO ATSD register offsets */
470
-#define XTS_ATSD_AVA 1
471
-#define XTS_ATSD_STAT 2
472
-
473
-static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
474
- unsigned long launch, unsigned long va)
475
-{
476
- struct npu *npu = mmio_atsd_reg->npu;
477
- int reg = mmio_atsd_reg->reg;
478
-
479
- __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
480
- eieio();
481
- __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]);
482
-}
483
-
484
-static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
485
- unsigned long pid, bool flush)
486
-{
487
- int i;
488
- unsigned long launch;
489
-
490
- for (i = 0; i <= max_npu2_index; i++) {
491
- if (mmio_atsd_reg[i].reg < 0)
492
- continue;
493
-
494
- /* IS set to invalidate matching PID */
495
- launch = PPC_BIT(12);
496
-
497
- /* PRS set to process-scoped */
498
- launch |= PPC_BIT(13);
499
-
500
- /* AP */
501
- launch |= (u64)
502
- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
503
-
504
- /* PID */
505
- launch |= pid << PPC_BITLSHIFT(38);
506
-
507
- /* No flush */
508
- launch |= !flush << PPC_BITLSHIFT(39);
509
-
510
- /* Invalidating the entire process doesn't use a va */
511
- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
512
- }
513
-}
514
-
515
-static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
516
- unsigned long va, unsigned long pid, bool flush)
517
-{
518
- int i;
519
- unsigned long launch;
520
-
521
- for (i = 0; i <= max_npu2_index; i++) {
522
- if (mmio_atsd_reg[i].reg < 0)
523
- continue;
524
-
525
- /* IS set to invalidate target VA */
526
- launch = 0;
527
-
528
- /* PRS set to process scoped */
529
- launch |= PPC_BIT(13);
530
-
531
- /* AP */
532
- launch |= (u64)
533
- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
534
-
535
- /* PID */
536
- launch |= pid << PPC_BITLSHIFT(38);
537
-
538
- /* No flush */
539
- launch |= !flush << PPC_BITLSHIFT(39);
540
-
541
- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
542
- }
543
-}
544
-
545
-#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
546
-
547
-static void mmio_invalidate_wait(
548
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
549
-{
550
- struct npu *npu;
551
- int i, reg;
552
-
553
- /* Wait for all invalidations to complete */
554
- for (i = 0; i <= max_npu2_index; i++) {
555
- if (mmio_atsd_reg[i].reg < 0)
556
- continue;
557
-
558
- /* Wait for completion */
559
- npu = mmio_atsd_reg[i].npu;
560
- reg = mmio_atsd_reg[i].reg;
561
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
562
- cpu_relax();
563
- }
564
-}
254
+/* Maximum possible number of ATSD MMIO registers per NPU */
255
+#define NV_NMMU_ATSD_REGS 8
256
+#define NV_NPU_MAX_PE_NUM 16
565257
566258 /*
567
- * Acquires all the address translation shootdown (ATSD) registers required to
568
- * launch an ATSD on all links this npu_context is active on.
259
+ * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
260
+ * up to 3 x (GPU + 2xNPUs) (POWER9).
569261 */
570
-static void acquire_atsd_reg(struct npu_context *npu_context,
571
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
262
+struct npu_comp {
263
+ struct iommu_table_group table_group;
264
+ int pe_num;
265
+ struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
266
+};
267
+
268
+/* An NPU descriptor, valid for POWER9 only */
269
+struct npu {
270
+ int index;
271
+ struct npu_comp npucomp;
272
+};
273
+
274
+#ifdef CONFIG_IOMMU_API
275
+static long pnv_npu_peers_create_table_userspace(
276
+ struct iommu_table_group *table_group,
277
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
278
+ struct iommu_table **ptbl)
279
+{
280
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
281
+ table_group);
282
+
283
+ if (!npucomp->pe_num || !npucomp->pe[0] ||
284
+ !npucomp->pe[0]->table_group.ops ||
285
+ !npucomp->pe[0]->table_group.ops->create_table)
286
+ return -EFAULT;
287
+
288
+ return npucomp->pe[0]->table_group.ops->create_table(
289
+ &npucomp->pe[0]->table_group, num, page_shift,
290
+ window_size, levels, ptbl);
291
+}
292
+
293
+static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
294
+ int num, struct iommu_table *tbl)
572295 {
573296 int i, j;
574
- struct npu *npu;
575
- struct pci_dev *npdev;
576
- struct pnv_phb *nphb;
297
+ long ret = 0;
298
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
299
+ table_group);
577300
578
- for (i = 0; i <= max_npu2_index; i++) {
579
- mmio_atsd_reg[i].reg = -1;
580
- for (j = 0; j < NV_MAX_LINKS; j++) {
581
- /*
582
- * There are no ordering requirements with respect to
583
- * the setup of struct npu_context, but to ensure
584
- * consistent behaviour we need to ensure npdev[][] is
585
- * only read once.
586
- */
587
- npdev = READ_ONCE(npu_context->npdev[i][j]);
588
- if (!npdev)
301
+ for (i = 0; i < npucomp->pe_num; ++i) {
302
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
303
+
304
+ if (!pe->table_group.ops->set_window)
305
+ continue;
306
+
307
+ ret = pe->table_group.ops->set_window(&pe->table_group,
308
+ num, tbl);
309
+ if (ret)
310
+ break;
311
+ }
312
+
313
+ if (ret) {
314
+ for (j = 0; j < i; ++j) {
315
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
316
+
317
+ if (!pe->table_group.ops->unset_window)
589318 continue;
590319
591
- nphb = pci_bus_to_host(npdev->bus)->private_data;
592
- npu = &nphb->npu;
593
- mmio_atsd_reg[i].npu = npu;
594
- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
595
- while (mmio_atsd_reg[i].reg < 0) {
596
- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
597
- cpu_relax();
598
- }
599
- break;
320
+ ret = pe->table_group.ops->unset_window(
321
+ &pe->table_group, num);
322
+ if (ret)
323
+ break;
600324 }
325
+ } else {
326
+ table_group->tables[num] = iommu_tce_table_get(tbl);
601327 }
328
+
329
+ return ret;
602330 }
603331
604
-/*
605
- * Release previously acquired ATSD registers. To avoid deadlocks the registers
606
- * must be released in the same order they were acquired above in
607
- * acquire_atsd_reg.
608
- */
609
-static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
332
+static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
333
+ int num)
334
+{
335
+ int i, j;
336
+ long ret = 0;
337
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
338
+ table_group);
339
+
340
+ for (i = 0; i < npucomp->pe_num; ++i) {
341
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
342
+
343
+ WARN_ON(npucomp->table_group.tables[num] !=
344
+ table_group->tables[num]);
345
+ if (!npucomp->table_group.tables[num])
346
+ continue;
347
+
348
+ if (!pe->table_group.ops->unset_window)
349
+ continue;
350
+
351
+ ret = pe->table_group.ops->unset_window(&pe->table_group, num);
352
+ if (ret)
353
+ break;
354
+ }
355
+
356
+ if (ret) {
357
+ for (j = 0; j < i; ++j) {
358
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
359
+
360
+ if (!npucomp->table_group.tables[num])
361
+ continue;
362
+
363
+ if (!pe->table_group.ops->set_window)
364
+ continue;
365
+
366
+ ret = pe->table_group.ops->set_window(&pe->table_group,
367
+ num, table_group->tables[num]);
368
+ if (ret)
369
+ break;
370
+ }
371
+ } else if (table_group->tables[num]) {
372
+ iommu_tce_table_put(table_group->tables[num]);
373
+ table_group->tables[num] = NULL;
374
+ }
375
+
376
+ return ret;
377
+}
378
+
379
+static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
610380 {
611381 int i;
382
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
383
+ table_group);
612384
613
- for (i = 0; i <= max_npu2_index; i++) {
614
- /*
615
- * We can't rely on npu_context->npdev[][] being the same here
616
- * as when acquire_atsd_reg() was called, hence we use the
617
- * values stored in mmio_atsd_reg during the acquire phase
618
- * rather than re-reading npdev[][].
619
- */
620
- if (mmio_atsd_reg[i].reg < 0)
385
+ for (i = 0; i < npucomp->pe_num; ++i) {
386
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
387
+
388
+ if (!pe->table_group.ops ||
389
+ !pe->table_group.ops->take_ownership)
621390 continue;
622
-
623
- put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
391
+ pe->table_group.ops->take_ownership(&pe->table_group);
624392 }
625393 }
626394
627
-/*
628
- * Invalidate either a single address or an entire PID depending on
629
- * the value of va.
630
- */
631
-static void mmio_invalidate(struct npu_context *npu_context, int va,
632
- unsigned long address, bool flush)
395
+static void pnv_npu_peers_release_ownership(
396
+ struct iommu_table_group *table_group)
633397 {
634
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
635
- unsigned long pid = npu_context->mm->context.id;
398
+ int i;
399
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
400
+ table_group);
636401
637
- if (npu_context->nmmu_flush)
638
- /*
639
- * Unfortunately the nest mmu does not support flushing specific
640
- * addresses so we have to flush the whole mm once before
641
- * shooting down the GPU translation.
642
- */
643
- flush_all_mm(npu_context->mm);
402
+ for (i = 0; i < npucomp->pe_num; ++i) {
403
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
644404
645
- /*
646
- * Loop over all the NPUs this process is active on and launch
647
- * an invalidate.
648
- */
649
- acquire_atsd_reg(npu_context, mmio_atsd_reg);
650
- if (va)
651
- mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
652
- else
653
- mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
654
-
655
- mmio_invalidate_wait(mmio_atsd_reg);
656
- if (flush) {
657
- /*
658
- * The GPU requires two flush ATSDs to ensure all entries have
659
- * been flushed. We use PID 0 as it will never be used for a
660
- * process on the GPU.
661
- */
662
- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
663
- mmio_invalidate_wait(mmio_atsd_reg);
664
- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
665
- mmio_invalidate_wait(mmio_atsd_reg);
666
- }
667
- release_atsd_reg(mmio_atsd_reg);
668
-}
669
-
670
-static void pnv_npu2_mn_release(struct mmu_notifier *mn,
671
- struct mm_struct *mm)
672
-{
673
- struct npu_context *npu_context = mn_to_npu_context(mn);
674
-
675
- /* Call into device driver to stop requests to the NMMU */
676
- if (npu_context->release_cb)
677
- npu_context->release_cb(npu_context, npu_context->priv);
678
-
679
- /*
680
- * There should be no more translation requests for this PID, but we
681
- * need to ensure any entries for it are removed from the TLB.
682
- */
683
- mmio_invalidate(npu_context, 0, 0, true);
684
-}
685
-
686
-static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
687
- struct mm_struct *mm,
688
- unsigned long address,
689
- pte_t pte)
690
-{
691
- struct npu_context *npu_context = mn_to_npu_context(mn);
692
-
693
- mmio_invalidate(npu_context, 1, address, true);
694
-}
695
-
696
-static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
697
- struct mm_struct *mm,
698
- unsigned long start, unsigned long end)
699
-{
700
- struct npu_context *npu_context = mn_to_npu_context(mn);
701
- unsigned long address;
702
-
703
- if (end - start > atsd_threshold) {
704
- /*
705
- * Just invalidate the entire PID if the address range is too
706
- * large.
707
- */
708
- mmio_invalidate(npu_context, 0, 0, true);
709
- } else {
710
- for (address = start; address < end; address += PAGE_SIZE)
711
- mmio_invalidate(npu_context, 1, address, false);
712
-
713
- /* Do the flush only on the final addess == end */
714
- mmio_invalidate(npu_context, 1, address, true);
405
+ if (!pe->table_group.ops ||
406
+ !pe->table_group.ops->release_ownership)
407
+ continue;
408
+ pe->table_group.ops->release_ownership(&pe->table_group);
715409 }
716410 }
717411
718
-static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
719
- .release = pnv_npu2_mn_release,
720
- .change_pte = pnv_npu2_mn_change_pte,
721
- .invalidate_range = pnv_npu2_mn_invalidate_range,
412
+static struct iommu_table_group_ops pnv_npu_peers_ops = {
413
+ .get_table_size = pnv_pci_ioda2_get_table_size,
414
+ .create_table = pnv_npu_peers_create_table_userspace,
415
+ .set_window = pnv_npu_peers_set_window,
416
+ .unset_window = pnv_npu_peers_unset_window,
417
+ .take_ownership = pnv_npu_peers_take_ownership,
418
+ .release_ownership = pnv_npu_peers_release_ownership,
722419 };
723420
724
-/*
725
- * Call into OPAL to setup the nmmu context for the current task in
726
- * the NPU. This must be called to setup the context tables before the
727
- * GPU issues ATRs. pdev should be a pointed to PCIe GPU device.
728
- *
729
- * A release callback should be registered to allow a device driver to
730
- * be notified that it should not launch any new translation requests
731
- * as the final TLB invalidate is about to occur.
732
- *
733
- * Returns an error if there no contexts are currently available or a
734
- * npu_context which should be passed to pnv_npu2_handle_fault().
735
- *
736
- * mmap_sem must be held in write mode and must not be called from interrupt
737
- * context.
738
- */
739
-struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
740
- unsigned long flags,
741
- void (*cb)(struct npu_context *, void *),
742
- void *priv)
421
+static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
422
+ struct pnv_ioda_pe *pe)
743423 {
744
- int rc;
745
- u32 nvlink_index;
746
- struct device_node *nvlink_dn;
747
- struct mm_struct *mm = current->mm;
748
- struct pnv_phb *nphb;
749
- struct npu *npu;
750
- struct npu_context *npu_context;
424
+ if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
425
+ return;
751426
752
- /*
753
- * At present we don't support GPUs connected to multiple NPUs and I'm
754
- * not sure the hardware does either.
755
- */
756
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
427
+ npucomp->pe[npucomp->pe_num] = pe;
428
+ ++npucomp->pe_num;
429
+}
757430
758
- if (!firmware_has_feature(FW_FEATURE_OPAL))
759
- return ERR_PTR(-ENODEV);
431
+static struct iommu_table_group *
432
+ pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
433
+{
434
+ struct iommu_table_group *compound_group;
435
+ struct npu_comp *npucomp;
436
+ struct pci_dev *gpdev = NULL;
437
+ struct pci_controller *hose;
438
+ struct pci_dev *npdev = NULL;
439
+
440
+ list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
441
+ npdev = pnv_pci_get_npu_dev(gpdev, 0);
442
+ if (npdev)
443
+ break;
444
+ }
760445
761446 if (!npdev)
762
- /* No nvlink associated with this GPU device */
763
- return ERR_PTR(-ENODEV);
447
+ /* It is not an NPU attached device, skip */
448
+ return NULL;
764449
765
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
766
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
767
- &nvlink_index)))
768
- return ERR_PTR(-ENODEV);
450
+ hose = pci_bus_to_host(npdev->bus);
769451
770
- if (!mm || mm->context.id == 0) {
771
- /*
772
- * Kernel thread contexts are not supported and context id 0 is
773
- * reserved on the GPU.
774
- */
775
- return ERR_PTR(-EINVAL);
452
+ if (hose->npu) {
453
+ /* P9 case: compound group is per-NPU (all gpus, all links) */
454
+ npucomp = &hose->npu->npucomp;
455
+ } else {
456
+ /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
457
+ npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
776458 }
777459
778
- nphb = pci_bus_to_host(npdev->bus)->private_data;
779
- npu = &nphb->npu;
460
+ compound_group = &npucomp->table_group;
461
+ if (!compound_group->group) {
462
+ compound_group->ops = &pnv_npu_peers_ops;
463
+ iommu_register_group(compound_group, hose->global_number,
464
+ pe->pe_number);
780465
781
- /*
782
- * Setup the NPU context table for a particular GPU. These need to be
783
- * per-GPU as we need the tables to filter ATSDs when there are no
784
- * active contexts on a particular GPU. It is safe for these to be
785
- * called concurrently with destroy as the OPAL call takes appropriate
786
- * locks and refcounts on init/destroy.
787
- */
788
- rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
789
- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
790
- if (rc < 0)
791
- return ERR_PTR(-ENOSPC);
792
-
793
- /*
794
- * We store the npu pci device so we can more easily get at the
795
- * associated npus.
796
- */
797
- spin_lock(&npu_context_lock);
798
- npu_context = mm->context.npu_context;
799
- if (npu_context) {
800
- if (npu_context->release_cb != cb ||
801
- npu_context->priv != priv) {
802
- spin_unlock(&npu_context_lock);
803
- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
804
- PCI_DEVID(gpdev->bus->number,
805
- gpdev->devfn));
806
- return ERR_PTR(-EINVAL);
807
- }
808
-
809
- WARN_ON(!kref_get_unless_zero(&npu_context->kref));
810
- }
811
- spin_unlock(&npu_context_lock);
812
-
813
- if (!npu_context) {
814
- /*
815
- * We can set up these fields without holding the
816
- * npu_context_lock as the npu_context hasn't been returned to
817
- * the caller meaning it can't be destroyed. Parallel allocation
818
- * is protected against by mmap_sem.
819
- */
820
- rc = -ENOMEM;
821
- npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
822
- if (npu_context) {
823
- kref_init(&npu_context->kref);
824
- npu_context->mm = mm;
825
- npu_context->mn.ops = &nv_nmmu_notifier_ops;
826
- rc = __mmu_notifier_register(&npu_context->mn, mm);
827
- }
828
-
829
- if (rc) {
830
- kfree(npu_context);
831
- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
832
- PCI_DEVID(gpdev->bus->number,
833
- gpdev->devfn));
834
- return ERR_PTR(rc);
835
- }
836
-
837
- mm->context.npu_context = npu_context;
466
+ /* Steal capabilities from a GPU PE */
467
+ compound_group->max_dynamic_windows_supported =
468
+ pe->table_group.max_dynamic_windows_supported;
469
+ compound_group->tce32_start = pe->table_group.tce32_start;
470
+ compound_group->tce32_size = pe->table_group.tce32_size;
471
+ compound_group->max_levels = pe->table_group.max_levels;
472
+ if (!compound_group->pgsizes)
473
+ compound_group->pgsizes = pe->table_group.pgsizes;
838474 }
839475
840
- npu_context->release_cb = cb;
841
- npu_context->priv = priv;
842
-
843476 /*
844
- * npdev is a pci_dev pointer setup by the PCI code. We assign it to
845
- * npdev[][] to indicate to the mmu notifiers that an invalidation
846
- * should also be sent over this nvlink. The notifiers don't use any
847
- * other fields in npu_context, so we just need to ensure that when they
848
- * deference npu_context->npdev[][] it is either a valid pointer or
849
- * NULL.
477
+ * The gpu would have been added to the iommu group that's created
478
+ * for the PE. Pull it out now.
850479 */
851
- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
480
+ iommu_del_device(&gpdev->dev);
852481
853
- if (!nphb->npu.nmmu_flush) {
854
- /*
855
- * If we're not explicitly flushing ourselves we need to mark
856
- * the thread for global flushes
857
- */
858
- npu_context->nmmu_flush = false;
859
- mm_context_add_copro(mm);
860
- } else
861
- npu_context->nmmu_flush = true;
482
+ /*
483
+ * I'm not sure this is strictly required, but it's probably a good idea
484
+ * since the table_group for the PE is going to be attached to the
485
+ * compound table group. If we leave the PE's iommu group active then
486
+ * we might have the same table_group being modifiable via two sepeate
487
+ * iommu groups.
488
+ */
489
+ iommu_group_put(pe->table_group.group);
862490
863
- return npu_context;
864
-}
865
-EXPORT_SYMBOL(pnv_npu2_init_context);
491
+ /* now put the GPU into the compound group */
492
+ pnv_comp_attach_table_group(npucomp, pe);
493
+ iommu_add_device(compound_group, &gpdev->dev);
866494
867
-static void pnv_npu2_release_context(struct kref *kref)
868
-{
869
- struct npu_context *npu_context =
870
- container_of(kref, struct npu_context, kref);
871
-
872
- if (!npu_context->nmmu_flush)
873
- mm_context_remove_copro(npu_context->mm);
874
-
875
- npu_context->mm->context.npu_context = NULL;
495
+ return compound_group;
876496 }
877497
878
-/*
879
- * Destroy a context on the given GPU. May free the npu_context if it is no
880
- * longer active on any GPUs. Must not be called from interrupt context.
881
- */
882
-void pnv_npu2_destroy_context(struct npu_context *npu_context,
883
- struct pci_dev *gpdev)
498
+static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
884499 {
885
- int removed;
886
- struct pnv_phb *nphb;
500
+ struct iommu_table_group *table_group;
501
+ struct npu_comp *npucomp;
502
+ struct pci_dev *gpdev = NULL;
503
+ struct pci_dev *npdev;
504
+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
505
+
506
+ WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
507
+ if (!gpe)
508
+ return NULL;
509
+
510
+ /*
511
+ * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
512
+ * but NPU bridges do not have this hook defined so we do it here.
513
+ * We do not setup other table group parameters as they won't be used
514
+ * anyway - NVLink bridges are subordinate PEs.
515
+ */
516
+ pe->table_group.ops = &pnv_pci_npu_ops;
517
+
518
+ table_group = iommu_group_get_iommudata(
519
+ iommu_group_get(&gpdev->dev));
520
+
521
+ /*
522
+ * On P9 NPU PHB and PCI PHB support different page sizes,
523
+ * keep only matching. We expect here that NVLink bridge PE pgsizes is
524
+ * initialized by the caller.
525
+ */
526
+ table_group->pgsizes &= pe->table_group.pgsizes;
527
+ npucomp = container_of(table_group, struct npu_comp, table_group);
528
+ pnv_comp_attach_table_group(npucomp, pe);
529
+
530
+ list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
531
+ struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
532
+
533
+ if (gpdevtmp != gpdev)
534
+ continue;
535
+
536
+ iommu_add_device(table_group, &npdev->dev);
537
+ }
538
+
539
+ return table_group;
540
+}
541
+
542
+void pnv_pci_npu_setup_iommu_groups(void)
543
+{
544
+ struct pci_controller *hose;
545
+ struct pnv_phb *phb;
546
+ struct pnv_ioda_pe *pe;
547
+
548
+ /*
549
+ * For non-nvlink devices the IOMMU group is registered when the PE is
550
+ * configured and devices are added to the group when the per-device
551
+ * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
552
+ * only initialise for "normal" IODA PHBs.
553
+ *
554
+ * For NVLink devices we need to ensure the NVLinks and the GPU end up
555
+ * in the same IOMMU group, so that's handled here.
556
+ */
557
+ list_for_each_entry(hose, &hose_list, list_node) {
558
+ phb = hose->private_data;
559
+
560
+ if (phb->type == PNV_PHB_IODA2)
561
+ list_for_each_entry(pe, &phb->ioda.pe_list, list)
562
+ pnv_try_setup_npu_table_group(pe);
563
+ }
564
+
565
+ /*
566
+ * Now we have all PHBs discovered, time to add NPU devices to
567
+ * the corresponding IOMMU groups.
568
+ */
569
+ list_for_each_entry(hose, &hose_list, list_node) {
570
+ unsigned long pgsizes;
571
+
572
+ phb = hose->private_data;
573
+
574
+ if (phb->type != PNV_PHB_NPU_NVLINK)
575
+ continue;
576
+
577
+ pgsizes = pnv_ioda_parse_tce_sizes(phb);
578
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
579
+ /*
580
+ * IODA2 bridges get this set up from
581
+ * pci_controller_ops::setup_bridge but NPU bridges
582
+ * do not have this hook defined so we do it here.
583
+ */
584
+ pe->table_group.pgsizes = pgsizes;
585
+ pnv_npu_compound_attach(pe);
586
+ }
587
+ }
588
+}
589
+#endif /* CONFIG_IOMMU_API */
590
+
591
+int pnv_npu2_init(struct pci_controller *hose)
592
+{
593
+ static int npu_index;
887594 struct npu *npu;
888
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
889
- struct device_node *nvlink_dn;
890
- u32 nvlink_index;
595
+ int ret;
891596
892
- if (WARN_ON(!npdev))
893
- return;
597
+ npu = kzalloc(sizeof(*npu), GFP_KERNEL);
598
+ if (!npu)
599
+ return -ENOMEM;
894600
895
- if (!firmware_has_feature(FW_FEATURE_OPAL))
896
- return;
897
-
898
- nphb = pci_bus_to_host(npdev->bus)->private_data;
899
- npu = &nphb->npu;
900
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
901
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
902
- &nvlink_index)))
903
- return;
904
- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
905
- opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
906
- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
907
- spin_lock(&npu_context_lock);
908
- removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
909
- spin_unlock(&npu_context_lock);
910
-
911
- /*
912
- * We need to do this outside of pnv_npu2_release_context so that it is
913
- * outside the spinlock as mmu_notifier_destroy uses SRCU.
914
- */
915
- if (removed) {
916
- mmu_notifier_unregister(&npu_context->mn,
917
- npu_context->mm);
918
-
919
- kfree(npu_context);
601
+ npu_index++;
602
+ if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
603
+ ret = -ENOSPC;
604
+ goto fail_exit;
920605 }
606
+ npu->index = npu_index;
607
+ hose->npu = npu;
921608
609
+ return 0;
610
+
611
+fail_exit:
612
+ kfree(npu);
613
+ return ret;
922614 }
923
-EXPORT_SYMBOL(pnv_npu2_destroy_context);
924615
925
-/*
926
- * Assumes mmap_sem is held for the contexts associated mm.
927
- */
928
-int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
929
- unsigned long *flags, unsigned long *status, int count)
616
+int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
617
+ unsigned long msr)
930618 {
931
- u64 rc = 0, result = 0;
932
- int i, is_write;
933
- struct page *page[1];
619
+ int ret;
620
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
621
+ struct pci_controller *hose;
622
+ struct pnv_phb *nphb;
934623
935
- /* mmap_sem should be held so the struct_mm must be present */
936
- struct mm_struct *mm = context->mm;
937
-
938
- if (!firmware_has_feature(FW_FEATURE_OPAL))
624
+ if (!npdev)
939625 return -ENODEV;
940626
941
- WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
942
-
943
- for (i = 0; i < count; i++) {
944
- is_write = flags[i] & NPU2_WRITE;
945
- rc = get_user_pages_remote(NULL, mm, ea[i], 1,
946
- is_write ? FOLL_WRITE : 0,
947
- page, NULL, NULL);
948
-
949
- /*
950
- * To support virtualised environments we will have to do an
951
- * access to the page to ensure it gets faulted into the
952
- * hypervisor. For the moment virtualisation is not supported in
953
- * other areas so leave the access out.
954
- */
955
- if (rc != 1) {
956
- status[i] = rc;
957
- result = -EFAULT;
958
- continue;
959
- }
960
-
961
- status[i] = 0;
962
- put_page(page[0]);
627
+ hose = pci_bus_to_host(npdev->bus);
628
+ if (hose->npu == NULL) {
629
+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
630
+ return 0;
963631 }
964632
965
- return result;
966
-}
967
-EXPORT_SYMBOL(pnv_npu2_handle_fault);
633
+ nphb = hose->private_data;
968634
969
-int pnv_npu2_init(struct pnv_phb *phb)
970
-{
971
- unsigned int i;
972
- u64 mmio_atsd;
973
- struct device_node *dn;
974
- struct pci_dev *gpdev;
975
- static int npu_index;
976
- uint64_t rc = 0;
977
-
978
- if (!atsd_threshold_dentry) {
979
- atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
980
- 0600, powerpc_debugfs_root, &atsd_threshold);
635
+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
636
+ nphb->opal_id, lparid);
637
+ /*
638
+ * Currently we only support radix and non-zero LPCR only makes sense
639
+ * for hash tables so skiboot expects the LPCR parameter to be a zero.
640
+ */
641
+ ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
642
+ 0 /* LPCR bits */);
643
+ if (ret) {
644
+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
645
+ return ret;
981646 }
982647
983
- phb->npu.nmmu_flush =
984
- of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
985
- for_each_child_of_node(phb->hose->dn, dn) {
986
- gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
987
- if (gpdev) {
988
- rc = opal_npu_map_lpar(phb->opal_id,
989
- PCI_DEVID(gpdev->bus->number, gpdev->devfn),
990
- 0, 0);
991
- if (rc)
992
- dev_err(&gpdev->dev,
993
- "Error %lld mapping device to LPAR\n",
994
- rc);
995
- }
996
- }
997
-
998
- for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd",
999
- i, &mmio_atsd); i++)
1000
- phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
1001
-
1002
- pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i);
1003
- phb->npu.mmio_atsd_count = i;
1004
- phb->npu.mmio_atsd_usage = 0;
1005
- npu_index++;
1006
- if (WARN_ON(npu_index >= NV_MAX_NPUS))
1007
- return -ENOSPC;
1008
- max_npu2_index = npu_index;
1009
- phb->npu.index = npu_index;
648
+ dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
649
+ nphb->opal_id, msr);
650
+ ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
651
+ pci_dev_id(gpdev));
652
+ if (ret < 0)
653
+ dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
654
+ else
655
+ ret = 0;
1010656
1011657 return 0;
1012658 }
659
+EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
660
+
661
+void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
662
+{
663
+ struct pci_dev *gpdev;
664
+
665
+ list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
666
+ pnv_npu2_map_lpar_dev(gpdev, 0, msr);
667
+}
668
+
669
+int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
670
+{
671
+ int ret;
672
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
673
+ struct pci_controller *hose;
674
+ struct pnv_phb *nphb;
675
+
676
+ if (!npdev)
677
+ return -ENODEV;
678
+
679
+ hose = pci_bus_to_host(npdev->bus);
680
+ if (hose->npu == NULL) {
681
+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
682
+ return 0;
683
+ }
684
+
685
+ nphb = hose->private_data;
686
+
687
+ dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
688
+ nphb->opal_id);
689
+ ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
690
+ pci_dev_id(gpdev));
691
+ if (ret < 0) {
692
+ dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
693
+ return ret;
694
+ }
695
+
696
+ /* Set LPID to 0 anyway, just to be safe */
697
+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
698
+ ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/,
699
+ 0 /* LPCR bits */);
700
+ if (ret)
701
+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
702
+
703
+ return ret;
704
+}
705
+EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);