~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,58 +1,25 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* This file implements the DMA operations for NVLink devices. The NPU
3	4	* devices all point to the same iommu table as the parent PCI device.
4	5	*
5	6	* Copyright Alistair Popple, IBM Corporation 2015.
6		- *
7		- * This program is free software; you can redistribute it and/or
8		- * modify it under the terms of version 2 of the GNU General Public
9		- * License as published by the Free Software Foundation.
10	7	*/
11	8
12		-#include <linux/slab.h>
13	9	#include <linux/mmu_notifier.h>
14	10	#include <linux/mmu_context.h>
15	11	#include <linux/of.h>
16		-#include <linux/export.h>
17	12	#include <linux/pci.h>
18	13	#include <linux/memblock.h>
19		-#include <linux/iommu.h>
20		-#include <linux/debugfs.h>
	14	+#include <linux/sizes.h>
21	15
22	16	#include <asm/debugfs.h>
23		-#include <asm/tlb.h>
24	17	#include <asm/powernv.h>
25		-#include <asm/reg.h>
26		-#include <asm/opal.h>
27		-#include <asm/io.h>
28		-#include <asm/iommu.h>
29		-#include <asm/pnv-pci.h>
30		-#include <asm/msi_bitmap.h>
	18	+#include <asm/ppc-pci.h>
31	19	#include <asm/opal.h>
32	20
33		-#include "powernv.h"
34	21	#include "pci.h"
35	22
36		-#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
37		-
38		-/*
39		- * spinlock to protect initialisation of an npu_context for a particular
40		- * mm_struct.
41		- */
42		-static DEFINE_SPINLOCK(npu_context_lock);
43		-
44		-/*
45		- * When an address shootdown range exceeds this threshold we invalidate the
46		- * entire TLB on the GPU for the given PID rather than each specific address in
47		- * the range.
48		- */
49		-static uint64_t atsd_threshold = 2 * 1024 * 1024;
50		-static struct dentry *atsd_threshold_dentry;
51		-
52		-/*
53		- * Other types of TCE cache invalidation are not functional in the
54		- * hardware.
55		- */
56	23	static struct pci_dev get_pci_dev(struct device_node dn)
57	24	{
58	25	struct pci_dn *pdn = PCI_DN(dn);
..	..	@@ -123,63 +90,7 @@
123	90	}
124	91	EXPORT_SYMBOL(pnv_pci_get_npu_dev);
125	92
126		-#define NPU_DMA_OP_UNSUPPORTED() \
127		- dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
128		- __func__)
129		-
130		-static void dma_npu_alloc(struct device dev, size_t size,
131		- dma_addr_t *dma_handle, gfp_t flag,
132		- unsigned long attrs)
133		-{
134		- NPU_DMA_OP_UNSUPPORTED();
135		- return NULL;
136		-}
137		-
138		-static void dma_npu_free(struct device *dev, size_t size,
139		- void *vaddr, dma_addr_t dma_handle,
140		- unsigned long attrs)
141		-{
142		- NPU_DMA_OP_UNSUPPORTED();
143		-}
144		-
145		-static dma_addr_t dma_npu_map_page(struct device dev, struct page page,
146		- unsigned long offset, size_t size,
147		- enum dma_data_direction direction,
148		- unsigned long attrs)
149		-{
150		- NPU_DMA_OP_UNSUPPORTED();
151		- return 0;
152		-}
153		-
154		-static int dma_npu_map_sg(struct device dev, struct scatterlist sglist,
155		- int nelems, enum dma_data_direction direction,
156		- unsigned long attrs)
157		-{
158		- NPU_DMA_OP_UNSUPPORTED();
159		- return 0;
160		-}
161		-
162		-static int dma_npu_dma_supported(struct device *dev, u64 mask)
163		-{
164		- NPU_DMA_OP_UNSUPPORTED();
165		- return 0;
166		-}
167		-
168		-static u64 dma_npu_get_required_mask(struct device *dev)
169		-{
170		- NPU_DMA_OP_UNSUPPORTED();
171		- return 0;
172		-}
173		-
174		-static const struct dma_map_ops dma_npu_ops = {
175		- .map_page = dma_npu_map_page,
176		- .map_sg = dma_npu_map_sg,
177		- .alloc = dma_npu_alloc,
178		- .free = dma_npu_free,
179		- .dma_supported = dma_npu_dma_supported,
180		- .get_required_mask = dma_npu_get_required_mask,
181		-};
182		-
	93	+#ifdef CONFIG_IOMMU_API
183	94	/*
184	95	* Returns the PE assoicated with the PCI device of the given
185	96	* NPU. Returns the linked pci device if pci_dev != NULL.
..	..	@@ -211,15 +122,25 @@
211	122	return pe;
212	123	}
213	124
214		-long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
	125	+static long pnv_npu_unset_window(struct iommu_table_group *table_group,
	126	+ int num);
	127	+
	128	+static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
215	129	struct iommu_table *tbl)
216	130	{
	131	+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
	132	+ table_group);
217	133	struct pnv_phb *phb = npe->phb;
218	134	int64_t rc;
219	135	const unsigned long size = tbl->it_indirect_levels ?
220	136	tbl->it_level_size : tbl->it_size;
221	137	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
222	138	const __u64 win_size = tbl->it_size << tbl->it_page_shift;
	139	+ int num2 = (num == 0) ? 1 : 0;
	140	+
	141	+ /* NPU has just one TVE so if there is another table, remove it first */
	142	+ if (npe->table_group.tables[num2])
	143	+ pnv_npu_unset_window(&npe->table_group, num2);
223	144
224	145	pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
225	146	start_addr, start_addr + win_size - 1,
..	..	@@ -245,10 +166,15 @@
245	166	return 0;
246	167	}
247	168
248		-long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
	169	+static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
249	170	{
	171	+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
	172	+ table_group);
250	173	struct pnv_phb *phb = npe->phb;
251	174	int64_t rc;
	175	+
	176	+ if (!npe->table_group.tables[num])
	177	+ return 0;
252	178
253	179	pe_info(npe, "Removing DMA window\n");
254	180
..	..	@@ -268,108 +194,14 @@
268	194	return 0;
269	195	}
270	196
271		-/*
272		- * Enables 32 bit DMA on NPU.
273		- */
274		-static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
275		-{
276		- struct pci_dev *gpdev;
277		- struct pnv_ioda_pe *gpe;
278		- int64_t rc;
279		-
280		- /*
281		- * Find the assoicated PCI devices and get the dma window
282		- * information from there.
283		- */
284		- if (!npe->pdev \|\| !(npe->flags & PNV_IODA_PE_DEV))
285		- return;
286		-
287		- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
288		- if (!gpe)
289		- return;
290		-
291		- rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
292		-
293		- /*
294		- * We don't initialise npu_pe->tce32_table as we always use
295		- * dma_npu_ops which are nops.
296		- */
297		- set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
298		-}
299		-
300		-/*
301		- * Enables bypass mode on the NPU. The NPU only supports one
302		- * window per link, so bypass needs to be explicitly enabled or
303		- * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
304		- * active at the same time.
305		- */
306		-static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
307		-{
308		- struct pnv_phb *phb = npe->phb;
309		- int64_t rc = 0;
310		- phys_addr_t top = memblock_end_of_DRAM();
311		-
312		- if (phb->type != PNV_PHB_NPU_NVLINK \|\| !npe->pdev)
313		- return -EINVAL;
314		-
315		- rc = pnv_npu_unset_window(npe, 0);
316		- if (rc != OPAL_SUCCESS)
317		- return rc;
318		-
319		- /* Enable the bypass window */
320		-
321		- top = roundup_pow_of_two(top);
322		- dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n",
323		- npe->pe_number);
324		- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
325		- npe->pe_number, npe->pe_number,
326		- 0 /* bypass base */, top);
327		-
328		- if (rc == OPAL_SUCCESS)
329		- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
330		-
331		- return rc;
332		-}
333		-
334		-void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
335		-{
336		- int i;
337		- struct pnv_phb *phb;
338		- struct pci_dn *pdn;
339		- struct pnv_ioda_pe *npe;
340		- struct pci_dev *npdev;
341		-
342		- for (i = 0; ; ++i) {
343		- npdev = pnv_pci_get_npu_dev(gpdev, i);
344		-
345		- if (!npdev)
346		- break;
347		-
348		- pdn = pci_get_pdn(npdev);
349		- if (WARN_ON(!pdn \|\| pdn->pe_number == IODA_INVALID_PE))
350		- return;
351		-
352		- phb = pci_bus_to_host(npdev->bus)->private_data;
353		-
354		- /* We only do bypass if it's enabled on the linked device */
355		- npe = &phb->ioda.pe_array[pdn->pe_number];
356		-
357		- if (bypass) {
358		- dev_info(&npdev->dev,
359		- "Using 64-bit DMA iommu bypass\n");
360		- pnv_npu_dma_set_bypass(npe);
361		- } else {
362		- dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
363		- pnv_npu_dma_set_32(npe);
364		- }
365		- }
366		-}
367		-
368	197	/* Switch ownership from platform code to external user (e.g. VFIO) */
369		-void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
	198	+static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
370	199	{
	200	+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
	201	+ table_group);
371	202	struct pnv_phb *phb = npe->phb;
372	203	int64_t rc;
	204	+ struct pci_dev *gpdev = NULL;
373	205
374	206	/*
375	207	* Note: NPU has just a single TVE in the hardware which means that
..	..	@@ -378,7 +210,7 @@
378	210	* if it was enabled at the moment of ownership change.
379	211	*/
380	212	if (npe->table_group.tables[0]) {
381		- pnv_npu_unset_window(npe, 0);
	213	+ pnv_npu_unset_window(&npe->table_group, 0);
382	214	return;
383	215	}
384	216
..	..	@@ -391,622 +223,483 @@
391	223	return;
392	224	}
393	225	pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
	226	+
	227	+ get_gpu_pci_dev_and_pe(npe, &gpdev);
	228	+ if (gpdev)
	229	+ pnv_npu2_unmap_lpar_dev(gpdev);
394	230	}
395	231
396		-struct pnv_ioda_pe pnv_pci_npu_setup_iommu(struct pnv_ioda_pe npe)
	232	+static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
397	233	{
398		- struct pnv_phb *phb = npe->phb;
399		- struct pci_bus *pbus = phb->hose->bus;
400		- struct pci_dev npdev, gpdev = NULL, *gptmp;
401		- struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
	234	+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
	235	+ table_group);
	236	+ struct pci_dev *gpdev = NULL;
402	237
403		- if (!gpe \|\| !gpdev)
404		- return NULL;
405		-
406		- list_for_each_entry(npdev, &pbus->devices, bus_list) {
407		- gptmp = pnv_pci_get_gpu_dev(npdev);
408		-
409		- if (gptmp != gpdev)
410		- continue;
411		-
412		- pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
413		- iommu_group_add_device(gpe->table_group.group, &npdev->dev);
414		- }
415		-
416		- return gpe;
	238	+ get_gpu_pci_dev_and_pe(npe, &gpdev);
	239	+ if (gpdev)
	240	+ pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR \| MSR_PR \| MSR_HV);
417	241	}
418	242
419		-/* Maximum number of nvlinks per npu */
420		-#define NV_MAX_LINKS 6
421		-
422		-/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
423		-static int max_npu2_index;
424		-
425		-struct npu_context {
426		- struct mm_struct *mm;
427		- struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
428		- struct mmu_notifier mn;
429		- struct kref kref;
430		- bool nmmu_flush;
431		-
432		- /* Callback to stop translation requests on a given GPU */
433		- void (release_cb)(struct npu_context context, void *priv);
434		-
435		- /*
436		- * Private pointer passed to the above callback for usage by
437		- * device drivers.
438		- */
439		- void *priv;
	243	+static struct iommu_table_group_ops pnv_pci_npu_ops = {
	244	+ .set_window = pnv_npu_set_window,
	245	+ .unset_window = pnv_npu_unset_window,
	246	+ .take_ownership = pnv_npu_take_ownership,
	247	+ .release_ownership = pnv_npu_release_ownership,
440	248	};
441		-
442		-struct mmio_atsd_reg {
443		- struct npu *npu;
444		- int reg;
445		-};
	249	+#endif /* !CONFIG_IOMMU_API */
446	250
447	251	/*
448		- * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
449		- * if none are available.
	252	+ * NPU2 ATS
450	253	*/
451		-static int get_mmio_atsd_reg(struct npu *npu)
452		-{
453		- int i;
454		-
455		- for (i = 0; i < npu->mmio_atsd_count; i++) {
456		- if (!test_bit(i, &npu->mmio_atsd_usage))
457		- if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
458		- return i;
459		- }
460		-
461		- return -ENOSPC;
462		-}
463		-
464		-static void put_mmio_atsd_reg(struct npu *npu, int reg)
465		-{
466		- clear_bit_unlock(reg, &npu->mmio_atsd_usage);
467		-}
468		-
469		-/* MMIO ATSD register offsets */
470		-#define XTS_ATSD_AVA 1
471		-#define XTS_ATSD_STAT 2
472		-
473		-static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
474		- unsigned long launch, unsigned long va)
475		-{
476		- struct npu *npu = mmio_atsd_reg->npu;
477		- int reg = mmio_atsd_reg->reg;
478		-
479		- __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
480		- eieio();
481		- __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]);
482		-}
483		-
484		-static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
485		- unsigned long pid, bool flush)
486		-{
487		- int i;
488		- unsigned long launch;
489		-
490		- for (i = 0; i <= max_npu2_index; i++) {
491		- if (mmio_atsd_reg[i].reg < 0)
492		- continue;
493		-
494		- /* IS set to invalidate matching PID */
495		- launch = PPC_BIT(12);
496		-
497		- /* PRS set to process-scoped */
498		- launch \|= PPC_BIT(13);
499		-
500		- /* AP */
501		- launch \|= (u64)
502		- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
503		-
504		- /* PID */
505		- launch \|= pid << PPC_BITLSHIFT(38);
506		-
507		- /* No flush */
508		- launch \|= !flush << PPC_BITLSHIFT(39);
509		-
510		- /* Invalidating the entire process doesn't use a va */
511		- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
512		- }
513		-}
514		-
515		-static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
516		- unsigned long va, unsigned long pid, bool flush)
517		-{
518		- int i;
519		- unsigned long launch;
520		-
521		- for (i = 0; i <= max_npu2_index; i++) {
522		- if (mmio_atsd_reg[i].reg < 0)
523		- continue;
524		-
525		- /* IS set to invalidate target VA */
526		- launch = 0;
527		-
528		- /* PRS set to process scoped */
529		- launch \|= PPC_BIT(13);
530		-
531		- /* AP */
532		- launch \|= (u64)
533		- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
534		-
535		- /* PID */
536		- launch \|= pid << PPC_BITLSHIFT(38);
537		-
538		- /* No flush */
539		- launch \|= !flush << PPC_BITLSHIFT(39);
540		-
541		- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
542		- }
543		-}
544		-
545		-#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
546		-
547		-static void mmio_invalidate_wait(
548		- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
549		-{
550		- struct npu *npu;
551		- int i, reg;
552		-
553		- /* Wait for all invalidations to complete */
554		- for (i = 0; i <= max_npu2_index; i++) {
555		- if (mmio_atsd_reg[i].reg < 0)
556		- continue;
557		-
558		- /* Wait for completion */
559		- npu = mmio_atsd_reg[i].npu;
560		- reg = mmio_atsd_reg[i].reg;
561		- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
562		- cpu_relax();
563		- }
564		-}
	254	+/* Maximum possible number of ATSD MMIO registers per NPU */
	255	+#define NV_NMMU_ATSD_REGS 8
	256	+#define NV_NPU_MAX_PE_NUM 16
565	257
566	258	/*
567		- * Acquires all the address translation shootdown (ATSD) registers required to
568		- * launch an ATSD on all links this npu_context is active on.
	259	+ * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
	260	+ * up to 3 x (GPU + 2xNPUs) (POWER9).
569	261	*/
570		-static void acquire_atsd_reg(struct npu_context *npu_context,
571		- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
	262	+struct npu_comp {
	263	+ struct iommu_table_group table_group;
	264	+ int pe_num;
	265	+ struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
	266	+};
	267	+
	268	+/* An NPU descriptor, valid for POWER9 only */
	269	+struct npu {
	270	+ int index;
	271	+ struct npu_comp npucomp;
	272	+};
	273	+
	274	+#ifdef CONFIG_IOMMU_API
	275	+static long pnv_npu_peers_create_table_userspace(
	276	+ struct iommu_table_group *table_group,
	277	+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
	278	+ struct iommu_table **ptbl)
	279	+{
	280	+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
	281	+ table_group);
	282	+
	283	+ if (!npucomp->pe_num \|\| !npucomp->pe[0] \|\|
	284	+ !npucomp->pe[0]->table_group.ops \|\|
	285	+ !npucomp->pe[0]->table_group.ops->create_table)
	286	+ return -EFAULT;
	287	+
	288	+ return npucomp->pe[0]->table_group.ops->create_table(
	289	+ &npucomp->pe[0]->table_group, num, page_shift,
	290	+ window_size, levels, ptbl);
	291	+}
	292	+
	293	+static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
	294	+ int num, struct iommu_table *tbl)
572	295	{
573	296	int i, j;
574		- struct npu *npu;
575		- struct pci_dev *npdev;
576		- struct pnv_phb *nphb;
	297	+ long ret = 0;
	298	+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
	299	+ table_group);
577	300
578		- for (i = 0; i <= max_npu2_index; i++) {
579		- mmio_atsd_reg[i].reg = -1;
580		- for (j = 0; j < NV_MAX_LINKS; j++) {
581		- /*
582		- * There are no ordering requirements with respect to
583		- * the setup of struct npu_context, but to ensure
584		- * consistent behaviour we need to ensure npdev[][] is
585		- * only read once.
586		- */
587		- npdev = READ_ONCE(npu_context->npdev[i][j]);
588		- if (!npdev)
	301	+ for (i = 0; i < npucomp->pe_num; ++i) {
	302	+ struct pnv_ioda_pe *pe = npucomp->pe[i];
	303	+
	304	+ if (!pe->table_group.ops->set_window)
	305	+ continue;
	306	+
	307	+ ret = pe->table_group.ops->set_window(&pe->table_group,
	308	+ num, tbl);
	309	+ if (ret)
	310	+ break;
	311	+ }
	312	+
	313	+ if (ret) {
	314	+ for (j = 0; j < i; ++j) {
	315	+ struct pnv_ioda_pe *pe = npucomp->pe[j];
	316	+
	317	+ if (!pe->table_group.ops->unset_window)
589	318	continue;
590	319
591		- nphb = pci_bus_to_host(npdev->bus)->private_data;
592		- npu = &nphb->npu;
593		- mmio_atsd_reg[i].npu = npu;
594		- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
595		- while (mmio_atsd_reg[i].reg < 0) {
596		- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
597		- cpu_relax();
598		- }
599		- break;
	320	+ ret = pe->table_group.ops->unset_window(
	321	+ &pe->table_group, num);
	322	+ if (ret)
	323	+ break;
600	324	}
	325	+ } else {
	326	+ table_group->tables[num] = iommu_tce_table_get(tbl);
601	327	}
	328	+
	329	+ return ret;
602	330	}
603	331
604		-/*
605		- * Release previously acquired ATSD registers. To avoid deadlocks the registers
606		- * must be released in the same order they were acquired above in
607		- * acquire_atsd_reg.
608		- */
609		-static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
	332	+static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
	333	+ int num)
	334	+{
	335	+ int i, j;
	336	+ long ret = 0;
	337	+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
	338	+ table_group);
	339	+
	340	+ for (i = 0; i < npucomp->pe_num; ++i) {
	341	+ struct pnv_ioda_pe *pe = npucomp->pe[i];
	342	+
	343	+ WARN_ON(npucomp->table_group.tables[num] !=
	344	+ table_group->tables[num]);
	345	+ if (!npucomp->table_group.tables[num])
	346	+ continue;
	347	+
	348	+ if (!pe->table_group.ops->unset_window)
	349	+ continue;
	350	+
	351	+ ret = pe->table_group.ops->unset_window(&pe->table_group, num);
	352	+ if (ret)
	353	+ break;
	354	+ }
	355	+
	356	+ if (ret) {
	357	+ for (j = 0; j < i; ++j) {
	358	+ struct pnv_ioda_pe *pe = npucomp->pe[j];
	359	+
	360	+ if (!npucomp->table_group.tables[num])
	361	+ continue;
	362	+
	363	+ if (!pe->table_group.ops->set_window)
	364	+ continue;
	365	+
	366	+ ret = pe->table_group.ops->set_window(&pe->table_group,
	367	+ num, table_group->tables[num]);
	368	+ if (ret)
	369	+ break;
	370	+ }
	371	+ } else if (table_group->tables[num]) {
	372	+ iommu_tce_table_put(table_group->tables[num]);
	373	+ table_group->tables[num] = NULL;
	374	+ }
	375	+
	376	+ return ret;
	377	+}
	378	+
	379	+static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
610	380	{
611	381	int i;
	382	+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
	383	+ table_group);
612	384
613		- for (i = 0; i <= max_npu2_index; i++) {
614		- /*
615		- * We can't rely on npu_context->npdev[][] being the same here
616		- * as when acquire_atsd_reg() was called, hence we use the
617		- * values stored in mmio_atsd_reg during the acquire phase
618		- * rather than re-reading npdev[][].
619		- */
620		- if (mmio_atsd_reg[i].reg < 0)
	385	+ for (i = 0; i < npucomp->pe_num; ++i) {
	386	+ struct pnv_ioda_pe *pe = npucomp->pe[i];
	387	+
	388	+ if (!pe->table_group.ops \|\|
	389	+ !pe->table_group.ops->take_ownership)
621	390	continue;
622		-
623		- put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
	391	+ pe->table_group.ops->take_ownership(&pe->table_group);
624	392	}
625	393	}
626	394
627		-/*
628		- * Invalidate either a single address or an entire PID depending on
629		- * the value of va.
630		- */
631		-static void mmio_invalidate(struct npu_context *npu_context, int va,
632		- unsigned long address, bool flush)
	395	+static void pnv_npu_peers_release_ownership(
	396	+ struct iommu_table_group *table_group)
633	397	{
634		- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
635		- unsigned long pid = npu_context->mm->context.id;
	398	+ int i;
	399	+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
	400	+ table_group);
636	401
637		- if (npu_context->nmmu_flush)
638		- /*
639		- * Unfortunately the nest mmu does not support flushing specific
640		- * addresses so we have to flush the whole mm once before
641		- * shooting down the GPU translation.
642		- */
643		- flush_all_mm(npu_context->mm);
	402	+ for (i = 0; i < npucomp->pe_num; ++i) {
	403	+ struct pnv_ioda_pe *pe = npucomp->pe[i];
644	404
645		- /*
646		- * Loop over all the NPUs this process is active on and launch
647		- * an invalidate.
648		- */
649		- acquire_atsd_reg(npu_context, mmio_atsd_reg);
650		- if (va)
651		- mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
652		- else
653		- mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
654		-
655		- mmio_invalidate_wait(mmio_atsd_reg);
656		- if (flush) {
657		- /*
658		- * The GPU requires two flush ATSDs to ensure all entries have
659		- * been flushed. We use PID 0 as it will never be used for a
660		- * process on the GPU.
661		- */
662		- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
663		- mmio_invalidate_wait(mmio_atsd_reg);
664		- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
665		- mmio_invalidate_wait(mmio_atsd_reg);
666		- }
667		- release_atsd_reg(mmio_atsd_reg);
668		-}
669		-
670		-static void pnv_npu2_mn_release(struct mmu_notifier *mn,
671		- struct mm_struct *mm)
672		-{
673		- struct npu_context *npu_context = mn_to_npu_context(mn);
674		-
675		- /* Call into device driver to stop requests to the NMMU */
676		- if (npu_context->release_cb)
677		- npu_context->release_cb(npu_context, npu_context->priv);
678		-
679		- /*
680		- * There should be no more translation requests for this PID, but we
681		- * need to ensure any entries for it are removed from the TLB.
682		- */
683		- mmio_invalidate(npu_context, 0, 0, true);
684		-}
685		-
686		-static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
687		- struct mm_struct *mm,
688		- unsigned long address,
689		- pte_t pte)
690		-{
691		- struct npu_context *npu_context = mn_to_npu_context(mn);
692		-
693		- mmio_invalidate(npu_context, 1, address, true);
694		-}
695		-
696		-static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
697		- struct mm_struct *mm,
698		- unsigned long start, unsigned long end)
699		-{
700		- struct npu_context *npu_context = mn_to_npu_context(mn);
701		- unsigned long address;
702		-
703		- if (end - start > atsd_threshold) {
704		- /*
705		- * Just invalidate the entire PID if the address range is too
706		- * large.
707		- */
708		- mmio_invalidate(npu_context, 0, 0, true);
709		- } else {
710		- for (address = start; address < end; address += PAGE_SIZE)
711		- mmio_invalidate(npu_context, 1, address, false);
712		-
713		- /* Do the flush only on the final addess == end */
714		- mmio_invalidate(npu_context, 1, address, true);
	405	+ if (!pe->table_group.ops \|\|
	406	+ !pe->table_group.ops->release_ownership)
	407	+ continue;
	408	+ pe->table_group.ops->release_ownership(&pe->table_group);
715	409	}
716	410	}
717	411
718		-static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
719		- .release = pnv_npu2_mn_release,
720		- .change_pte = pnv_npu2_mn_change_pte,
721		- .invalidate_range = pnv_npu2_mn_invalidate_range,
	412	+static struct iommu_table_group_ops pnv_npu_peers_ops = {
	413	+ .get_table_size = pnv_pci_ioda2_get_table_size,
	414	+ .create_table = pnv_npu_peers_create_table_userspace,
	415	+ .set_window = pnv_npu_peers_set_window,
	416	+ .unset_window = pnv_npu_peers_unset_window,
	417	+ .take_ownership = pnv_npu_peers_take_ownership,
	418	+ .release_ownership = pnv_npu_peers_release_ownership,
722	419	};
723	420
724		-/*
725		- * Call into OPAL to setup the nmmu context for the current task in
726		- * the NPU. This must be called to setup the context tables before the
727		- * GPU issues ATRs. pdev should be a pointed to PCIe GPU device.
728		- *
729		- * A release callback should be registered to allow a device driver to
730		- * be notified that it should not launch any new translation requests
731		- * as the final TLB invalidate is about to occur.
732		- *
733		- * Returns an error if there no contexts are currently available or a
734		- * npu_context which should be passed to pnv_npu2_handle_fault().
735		- *
736		- * mmap_sem must be held in write mode and must not be called from interrupt
737		- * context.
738		- */
739		-struct npu_context pnv_npu2_init_context(struct pci_dev gpdev,
740		- unsigned long flags,
741		- void (cb)(struct npu_context , void *),
742		- void *priv)
	421	+static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
	422	+ struct pnv_ioda_pe *pe)
743	423	{
744		- int rc;
745		- u32 nvlink_index;
746		- struct device_node *nvlink_dn;
747		- struct mm_struct *mm = current->mm;
748		- struct pnv_phb *nphb;
749		- struct npu *npu;
750		- struct npu_context *npu_context;
	424	+ if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
	425	+ return;
751	426
752		- /*
753		- * At present we don't support GPUs connected to multiple NPUs and I'm
754		- * not sure the hardware does either.
755		- */
756		- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
	427	+ npucomp->pe[npucomp->pe_num] = pe;
	428	+ ++npucomp->pe_num;
	429	+}
757	430
758		- if (!firmware_has_feature(FW_FEATURE_OPAL))
759		- return ERR_PTR(-ENODEV);
	431	+static struct iommu_table_group *
	432	+ pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
	433	+{
	434	+ struct iommu_table_group *compound_group;
	435	+ struct npu_comp *npucomp;
	436	+ struct pci_dev *gpdev = NULL;
	437	+ struct pci_controller *hose;
	438	+ struct pci_dev *npdev = NULL;
	439	+
	440	+ list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
	441	+ npdev = pnv_pci_get_npu_dev(gpdev, 0);
	442	+ if (npdev)
	443	+ break;
	444	+ }
760	445
761	446	if (!npdev)
762		- /* No nvlink associated with this GPU device */
763		- return ERR_PTR(-ENODEV);
	447	+ /* It is not an NPU attached device, skip */
	448	+ return NULL;
764	449
765		- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
766		- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
767		- &nvlink_index)))
768		- return ERR_PTR(-ENODEV);
	450	+ hose = pci_bus_to_host(npdev->bus);
769	451
770		- if (!mm \|\| mm->context.id == 0) {
771		- /*
772		- * Kernel thread contexts are not supported and context id 0 is
773		- * reserved on the GPU.
774		- */
775		- return ERR_PTR(-EINVAL);
	452	+ if (hose->npu) {
	453	+ /* P9 case: compound group is per-NPU (all gpus, all links) */
	454	+ npucomp = &hose->npu->npucomp;
	455	+ } else {
	456	+ /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
	457	+ npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
776	458	}
777	459
778		- nphb = pci_bus_to_host(npdev->bus)->private_data;
779		- npu = &nphb->npu;
	460	+ compound_group = &npucomp->table_group;
	461	+ if (!compound_group->group) {
	462	+ compound_group->ops = &pnv_npu_peers_ops;
	463	+ iommu_register_group(compound_group, hose->global_number,
	464	+ pe->pe_number);
780	465
781		- /*
782		- * Setup the NPU context table for a particular GPU. These need to be
783		- * per-GPU as we need the tables to filter ATSDs when there are no
784		- * active contexts on a particular GPU. It is safe for these to be
785		- * called concurrently with destroy as the OPAL call takes appropriate
786		- * locks and refcounts on init/destroy.
787		- */
788		- rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
789		- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
790		- if (rc < 0)
791		- return ERR_PTR(-ENOSPC);
792		-
793		- /*
794		- * We store the npu pci device so we can more easily get at the
795		- * associated npus.
796		- */
797		- spin_lock(&npu_context_lock);
798		- npu_context = mm->context.npu_context;
799		- if (npu_context) {
800		- if (npu_context->release_cb != cb \|\|
801		- npu_context->priv != priv) {
802		- spin_unlock(&npu_context_lock);
803		- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
804		- PCI_DEVID(gpdev->bus->number,
805		- gpdev->devfn));
806		- return ERR_PTR(-EINVAL);
807		- }
808		-
809		- WARN_ON(!kref_get_unless_zero(&npu_context->kref));
810		- }
811		- spin_unlock(&npu_context_lock);
812		-
813		- if (!npu_context) {
814		- /*
815		- * We can set up these fields without holding the
816		- * npu_context_lock as the npu_context hasn't been returned to
817		- * the caller meaning it can't be destroyed. Parallel allocation
818		- * is protected against by mmap_sem.
819		- */
820		- rc = -ENOMEM;
821		- npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
822		- if (npu_context) {
823		- kref_init(&npu_context->kref);
824		- npu_context->mm = mm;
825		- npu_context->mn.ops = &nv_nmmu_notifier_ops;
826		- rc = __mmu_notifier_register(&npu_context->mn, mm);
827		- }
828		-
829		- if (rc) {
830		- kfree(npu_context);
831		- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
832		- PCI_DEVID(gpdev->bus->number,
833		- gpdev->devfn));
834		- return ERR_PTR(rc);
835		- }
836		-
837		- mm->context.npu_context = npu_context;
	466	+ /* Steal capabilities from a GPU PE */
	467	+ compound_group->max_dynamic_windows_supported =
	468	+ pe->table_group.max_dynamic_windows_supported;
	469	+ compound_group->tce32_start = pe->table_group.tce32_start;
	470	+ compound_group->tce32_size = pe->table_group.tce32_size;
	471	+ compound_group->max_levels = pe->table_group.max_levels;
	472	+ if (!compound_group->pgsizes)
	473	+ compound_group->pgsizes = pe->table_group.pgsizes;
838	474	}
839	475
840		- npu_context->release_cb = cb;
841		- npu_context->priv = priv;
842		-
843	476	/*
844		- * npdev is a pci_dev pointer setup by the PCI code. We assign it to
845		- * npdev[][] to indicate to the mmu notifiers that an invalidation
846		- * should also be sent over this nvlink. The notifiers don't use any
847		- * other fields in npu_context, so we just need to ensure that when they
848		- * deference npu_context->npdev[][] it is either a valid pointer or
849		- * NULL.
	477	+ * The gpu would have been added to the iommu group that's created
	478	+ * for the PE. Pull it out now.
850	479	*/
851		- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
	480	+ iommu_del_device(&gpdev->dev);
852	481
853		- if (!nphb->npu.nmmu_flush) {
854		- /*
855		- * If we're not explicitly flushing ourselves we need to mark
856		- * the thread for global flushes
857		- */
858		- npu_context->nmmu_flush = false;
859		- mm_context_add_copro(mm);
860		- } else
861		- npu_context->nmmu_flush = true;
	482	+ /*
	483	+ * I'm not sure this is strictly required, but it's probably a good idea
	484	+ * since the table_group for the PE is going to be attached to the
	485	+ * compound table group. If we leave the PE's iommu group active then
	486	+ * we might have the same table_group being modifiable via two sepeate
	487	+ * iommu groups.
	488	+ */
	489	+ iommu_group_put(pe->table_group.group);
862	490
863		- return npu_context;
864		-}
865		-EXPORT_SYMBOL(pnv_npu2_init_context);
	491	+ /* now put the GPU into the compound group */
	492	+ pnv_comp_attach_table_group(npucomp, pe);
	493	+ iommu_add_device(compound_group, &gpdev->dev);
866	494
867		-static void pnv_npu2_release_context(struct kref *kref)
868		-{
869		- struct npu_context *npu_context =
870		- container_of(kref, struct npu_context, kref);
871		-
872		- if (!npu_context->nmmu_flush)
873		- mm_context_remove_copro(npu_context->mm);
874		-
875		- npu_context->mm->context.npu_context = NULL;
	495	+ return compound_group;
876	496	}
877	497
878		-/*
879		- * Destroy a context on the given GPU. May free the npu_context if it is no
880		- * longer active on any GPUs. Must not be called from interrupt context.
881		- */
882		-void pnv_npu2_destroy_context(struct npu_context *npu_context,
883		- struct pci_dev *gpdev)
	498	+static struct iommu_table_group pnv_npu_compound_attach(struct pnv_ioda_pe pe)
884	499	{
885		- int removed;
886		- struct pnv_phb *nphb;
	500	+ struct iommu_table_group *table_group;
	501	+ struct npu_comp *npucomp;
	502	+ struct pci_dev *gpdev = NULL;
	503	+ struct pci_dev *npdev;
	504	+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
	505	+
	506	+ WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
	507	+ if (!gpe)
	508	+ return NULL;
	509	+
	510	+ /*
	511	+ * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
	512	+ * but NPU bridges do not have this hook defined so we do it here.
	513	+ * We do not setup other table group parameters as they won't be used
	514	+ * anyway - NVLink bridges are subordinate PEs.
	515	+ */
	516	+ pe->table_group.ops = &pnv_pci_npu_ops;
	517	+
	518	+ table_group = iommu_group_get_iommudata(
	519	+ iommu_group_get(&gpdev->dev));
	520	+
	521	+ /*
	522	+ * On P9 NPU PHB and PCI PHB support different page sizes,
	523	+ * keep only matching. We expect here that NVLink bridge PE pgsizes is
	524	+ * initialized by the caller.
	525	+ */
	526	+ table_group->pgsizes &= pe->table_group.pgsizes;
	527	+ npucomp = container_of(table_group, struct npu_comp, table_group);
	528	+ pnv_comp_attach_table_group(npucomp, pe);
	529	+
	530	+ list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
	531	+ struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
	532	+
	533	+ if (gpdevtmp != gpdev)
	534	+ continue;
	535	+
	536	+ iommu_add_device(table_group, &npdev->dev);
	537	+ }
	538	+
	539	+ return table_group;
	540	+}
	541	+
	542	+void pnv_pci_npu_setup_iommu_groups(void)
	543	+{
	544	+ struct pci_controller *hose;
	545	+ struct pnv_phb *phb;
	546	+ struct pnv_ioda_pe *pe;
	547	+
	548	+ /*
	549	+ * For non-nvlink devices the IOMMU group is registered when the PE is
	550	+ * configured and devices are added to the group when the per-device
	551	+ * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
	552	+ * only initialise for "normal" IODA PHBs.
	553	+ *
	554	+ * For NVLink devices we need to ensure the NVLinks and the GPU end up
	555	+ * in the same IOMMU group, so that's handled here.
	556	+ */
	557	+ list_for_each_entry(hose, &hose_list, list_node) {
	558	+ phb = hose->private_data;
	559	+
	560	+ if (phb->type == PNV_PHB_IODA2)
	561	+ list_for_each_entry(pe, &phb->ioda.pe_list, list)
	562	+ pnv_try_setup_npu_table_group(pe);
	563	+ }
	564	+
	565	+ /*
	566	+ * Now we have all PHBs discovered, time to add NPU devices to
	567	+ * the corresponding IOMMU groups.
	568	+ */
	569	+ list_for_each_entry(hose, &hose_list, list_node) {
	570	+ unsigned long pgsizes;
	571	+
	572	+ phb = hose->private_data;
	573	+
	574	+ if (phb->type != PNV_PHB_NPU_NVLINK)
	575	+ continue;
	576	+
	577	+ pgsizes = pnv_ioda_parse_tce_sizes(phb);
	578	+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
	579	+ /*
	580	+ * IODA2 bridges get this set up from
	581	+ * pci_controller_ops::setup_bridge but NPU bridges
	582	+ * do not have this hook defined so we do it here.
	583	+ */
	584	+ pe->table_group.pgsizes = pgsizes;
	585	+ pnv_npu_compound_attach(pe);
	586	+ }
	587	+ }
	588	+}
	589	+#endif /* CONFIG_IOMMU_API */
	590	+
	591	+int pnv_npu2_init(struct pci_controller *hose)
	592	+{
	593	+ static int npu_index;
887	594	struct npu *npu;
888		- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
889		- struct device_node *nvlink_dn;
890		- u32 nvlink_index;
	595	+ int ret;
891	596
892		- if (WARN_ON(!npdev))
893		- return;
	597	+ npu = kzalloc(sizeof(*npu), GFP_KERNEL);
	598	+ if (!npu)
	599	+ return -ENOMEM;
894	600
895		- if (!firmware_has_feature(FW_FEATURE_OPAL))
896		- return;
897		-
898		- nphb = pci_bus_to_host(npdev->bus)->private_data;
899		- npu = &nphb->npu;
900		- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
901		- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
902		- &nvlink_index)))
903		- return;
904		- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
905		- opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
906		- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
907		- spin_lock(&npu_context_lock);
908		- removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
909		- spin_unlock(&npu_context_lock);
910		-
911		- /*
912		- * We need to do this outside of pnv_npu2_release_context so that it is
913		- * outside the spinlock as mmu_notifier_destroy uses SRCU.
914		- */
915		- if (removed) {
916		- mmu_notifier_unregister(&npu_context->mn,
917		- npu_context->mm);
918		-
919		- kfree(npu_context);
	601	+ npu_index++;
	602	+ if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
	603	+ ret = -ENOSPC;
	604	+ goto fail_exit;
920	605	}
	606	+ npu->index = npu_index;
	607	+ hose->npu = npu;
921	608
	609	+ return 0;
	610	+
	611	+fail_exit:
	612	+ kfree(npu);
	613	+ return ret;
922	614	}
923		-EXPORT_SYMBOL(pnv_npu2_destroy_context);
924	615
925		-/*
926		- * Assumes mmap_sem is held for the contexts associated mm.
927		- */
928		-int pnv_npu2_handle_fault(struct npu_context context, uintptr_t ea,
929		- unsigned long flags, unsigned long status, int count)
	616	+int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
	617	+ unsigned long msr)
930	618	{
931		- u64 rc = 0, result = 0;
932		- int i, is_write;
933		- struct page *page[1];
	619	+ int ret;
	620	+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
	621	+ struct pci_controller *hose;
	622	+ struct pnv_phb *nphb;
934	623
935		- /* mmap_sem should be held so the struct_mm must be present */
936		- struct mm_struct *mm = context->mm;
937		-
938		- if (!firmware_has_feature(FW_FEATURE_OPAL))
	624	+ if (!npdev)
939	625	return -ENODEV;
940	626
941		- WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
942		-
943		- for (i = 0; i < count; i++) {
944		- is_write = flags[i] & NPU2_WRITE;
945		- rc = get_user_pages_remote(NULL, mm, ea[i], 1,
946		- is_write ? FOLL_WRITE : 0,
947		- page, NULL, NULL);
948		-
949		- /*
950		- * To support virtualised environments we will have to do an
951		- * access to the page to ensure it gets faulted into the
952		- * hypervisor. For the moment virtualisation is not supported in
953		- * other areas so leave the access out.
954		- */
955		- if (rc != 1) {
956		- status[i] = rc;
957		- result = -EFAULT;
958		- continue;
959		- }
960		-
961		- status[i] = 0;
962		- put_page(page[0]);
	627	+ hose = pci_bus_to_host(npdev->bus);
	628	+ if (hose->npu == NULL) {
	629	+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
	630	+ return 0;
963	631	}
964	632
965		- return result;
966		-}
967		-EXPORT_SYMBOL(pnv_npu2_handle_fault);
	633	+ nphb = hose->private_data;
968	634
969		-int pnv_npu2_init(struct pnv_phb *phb)
970		-{
971		- unsigned int i;
972		- u64 mmio_atsd;
973		- struct device_node *dn;
974		- struct pci_dev *gpdev;
975		- static int npu_index;
976		- uint64_t rc = 0;
977		-
978		- if (!atsd_threshold_dentry) {
979		- atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
980		- 0600, powerpc_debugfs_root, &atsd_threshold);
	635	+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
	636	+ nphb->opal_id, lparid);
	637	+ /*
	638	+ * Currently we only support radix and non-zero LPCR only makes sense
	639	+ * for hash tables so skiboot expects the LPCR parameter to be a zero.
	640	+ */
	641	+ ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
	642	+ 0 /* LPCR bits */);
	643	+ if (ret) {
	644	+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
	645	+ return ret;
981	646	}
982	647
983		- phb->npu.nmmu_flush =
984		- of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
985		- for_each_child_of_node(phb->hose->dn, dn) {
986		- gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
987		- if (gpdev) {
988		- rc = opal_npu_map_lpar(phb->opal_id,
989		- PCI_DEVID(gpdev->bus->number, gpdev->devfn),
990		- 0, 0);
991		- if (rc)
992		- dev_err(&gpdev->dev,
993		- "Error %lld mapping device to LPAR\n",
994		- rc);
995		- }
996		- }
997		-
998		- for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd",
999		- i, &mmio_atsd); i++)
1000		- phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
1001		-
1002		- pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i);
1003		- phb->npu.mmio_atsd_count = i;
1004		- phb->npu.mmio_atsd_usage = 0;
1005		- npu_index++;
1006		- if (WARN_ON(npu_index >= NV_MAX_NPUS))
1007		- return -ENOSPC;
1008		- max_npu2_index = npu_index;
1009		- phb->npu.index = npu_index;
	648	+ dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
	649	+ nphb->opal_id, msr);
	650	+ ret = opal_npu_init_context(nphb->opal_id, 0/__unused/, msr,
	651	+ pci_dev_id(gpdev));
	652	+ if (ret < 0)
	653	+ dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
	654	+ else
	655	+ ret = 0;
1010	656
1011	657	return 0;
1012	658	}
	659	+EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
	660	+
	661	+void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
	662	+{
	663	+ struct pci_dev *gpdev;
	664	+
	665	+ list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
	666	+ pnv_npu2_map_lpar_dev(gpdev, 0, msr);
	667	+}
	668	+
	669	+int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
	670	+{
	671	+ int ret;
	672	+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
	673	+ struct pci_controller *hose;
	674	+ struct pnv_phb *nphb;
	675	+
	676	+ if (!npdev)
	677	+ return -ENODEV;
	678	+
	679	+ hose = pci_bus_to_host(npdev->bus);
	680	+ if (hose->npu == NULL) {
	681	+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
	682	+ return 0;
	683	+ }
	684	+
	685	+ nphb = hose->private_data;
	686	+
	687	+ dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
	688	+ nphb->opal_id);
	689	+ ret = opal_npu_destroy_context(nphb->opal_id, 0/__unused/,
	690	+ pci_dev_id(gpdev));
	691	+ if (ret < 0) {
	692	+ dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
	693	+ return ret;
	694	+ }
	695	+
	696	+ /* Set LPID to 0 anyway, just to be safe */
	697	+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
	698	+ ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /LPID/,
	699	+ 0 /* LPCR bits */);
	700	+ if (ret)
	701	+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
	702	+
	703	+ return ret;
	704	+}
	705	+EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);