.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * Support PCI/PCIe on PowerNV platforms |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or |
---|
7 | | - * modify it under the terms of the GNU General Public License |
---|
8 | | - * as published by the Free Software Foundation; either version |
---|
9 | | - * 2 of the License, or (at your option) any later version. |
---|
10 | 6 | */ |
---|
11 | 7 | |
---|
12 | 8 | #undef DEBUG |
---|
.. | .. |
---|
17 | 13 | #include <linux/delay.h> |
---|
18 | 14 | #include <linux/string.h> |
---|
19 | 15 | #include <linux/init.h> |
---|
20 | | -#include <linux/bootmem.h> |
---|
| 16 | +#include <linux/memblock.h> |
---|
21 | 17 | #include <linux/irq.h> |
---|
22 | 18 | #include <linux/io.h> |
---|
23 | 19 | #include <linux/msi.h> |
---|
24 | | -#include <linux/memblock.h> |
---|
25 | 20 | #include <linux/iommu.h> |
---|
26 | 21 | #include <linux/rculist.h> |
---|
27 | 22 | #include <linux/sizes.h> |
---|
.. | .. |
---|
54 | 49 | |
---|
55 | 50 | static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", |
---|
56 | 51 | "NPU_OCAPI" }; |
---|
| 52 | + |
---|
| 53 | +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); |
---|
| 54 | +static void pnv_pci_configure_bus(struct pci_bus *bus); |
---|
57 | 55 | |
---|
58 | 56 | void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, |
---|
59 | 57 | const char *fmt, ...) |
---|
.. | .. |
---|
117 | 115 | |
---|
118 | 116 | early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); |
---|
119 | 117 | |
---|
120 | | -static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) |
---|
121 | | -{ |
---|
122 | | - /* |
---|
123 | | - * WARNING: We cannot rely on the resource flags. The Linux PCI |
---|
124 | | - * allocation code sometimes decides to put a 64-bit prefetchable |
---|
125 | | - * BAR in the 32-bit window, so we have to compare the addresses. |
---|
126 | | - * |
---|
127 | | - * For simplicity we only test resource start. |
---|
128 | | - */ |
---|
129 | | - return (r->start >= phb->ioda.m64_base && |
---|
130 | | - r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); |
---|
131 | | -} |
---|
132 | | - |
---|
133 | | -static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) |
---|
134 | | -{ |
---|
135 | | - unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); |
---|
136 | | - |
---|
137 | | - return (resource_flags & flags) == flags; |
---|
138 | | -} |
---|
139 | | - |
---|
140 | 118 | static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) |
---|
141 | 119 | { |
---|
142 | 120 | s64 rc; |
---|
143 | 121 | |
---|
144 | 122 | phb->ioda.pe_array[pe_no].phb = phb; |
---|
145 | 123 | phb->ioda.pe_array[pe_no].pe_number = pe_no; |
---|
| 124 | + phb->ioda.pe_array[pe_no].dma_setup_done = false; |
---|
146 | 125 | |
---|
147 | 126 | /* |
---|
148 | 127 | * Clear the PE frozen state as it might be put into frozen state |
---|
.. | .. |
---|
166 | 145 | return; |
---|
167 | 146 | } |
---|
168 | 147 | |
---|
| 148 | + mutex_lock(&phb->ioda.pe_alloc_mutex); |
---|
169 | 149 | if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) |
---|
170 | 150 | pr_debug("%s: PE %x was reserved on PHB#%x\n", |
---|
171 | 151 | __func__, pe_no, phb->hose->global_number); |
---|
| 152 | + mutex_unlock(&phb->ioda.pe_alloc_mutex); |
---|
172 | 153 | |
---|
173 | 154 | pnv_ioda_init_pe(phb, pe_no); |
---|
174 | 155 | } |
---|
175 | 156 | |
---|
176 | | -static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) |
---|
| 157 | +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count) |
---|
177 | 158 | { |
---|
178 | | - long pe; |
---|
| 159 | + struct pnv_ioda_pe *ret = NULL; |
---|
| 160 | + int run = 0, pe, i; |
---|
179 | 161 | |
---|
| 162 | + mutex_lock(&phb->ioda.pe_alloc_mutex); |
---|
| 163 | + |
---|
| 164 | + /* scan backwards for a run of @count cleared bits */ |
---|
180 | 165 | for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { |
---|
181 | | - if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) |
---|
182 | | - return pnv_ioda_init_pe(phb, pe); |
---|
183 | | - } |
---|
| 166 | + if (test_bit(pe, phb->ioda.pe_alloc)) { |
---|
| 167 | + run = 0; |
---|
| 168 | + continue; |
---|
| 169 | + } |
---|
184 | 170 | |
---|
185 | | - return NULL; |
---|
| 171 | + run++; |
---|
| 172 | + if (run == count) |
---|
| 173 | + break; |
---|
| 174 | + } |
---|
| 175 | + if (run != count) |
---|
| 176 | + goto out; |
---|
| 177 | + |
---|
| 178 | + for (i = pe; i < pe + count; i++) { |
---|
| 179 | + set_bit(i, phb->ioda.pe_alloc); |
---|
| 180 | + pnv_ioda_init_pe(phb, i); |
---|
| 181 | + } |
---|
| 182 | + ret = &phb->ioda.pe_array[pe]; |
---|
| 183 | + |
---|
| 184 | +out: |
---|
| 185 | + mutex_unlock(&phb->ioda.pe_alloc_mutex); |
---|
| 186 | + return ret; |
---|
186 | 187 | } |
---|
187 | 188 | |
---|
188 | | -static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) |
---|
| 189 | +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) |
---|
189 | 190 | { |
---|
190 | 191 | struct pnv_phb *phb = pe->phb; |
---|
191 | 192 | unsigned int pe_num = pe->pe_number; |
---|
192 | 193 | |
---|
193 | 194 | WARN_ON(pe->pdev); |
---|
194 | | - |
---|
| 195 | + WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ |
---|
| 196 | + kfree(pe->npucomp); |
---|
195 | 197 | memset(pe, 0, sizeof(struct pnv_ioda_pe)); |
---|
| 198 | + |
---|
| 199 | + mutex_lock(&phb->ioda.pe_alloc_mutex); |
---|
196 | 200 | clear_bit(pe_num, phb->ioda.pe_alloc); |
---|
| 201 | + mutex_unlock(&phb->ioda.pe_alloc_mutex); |
---|
197 | 202 | } |
---|
198 | 203 | |
---|
199 | 204 | /* The default M64 BAR is shared by all PEs */ |
---|
.. | .. |
---|
253 | 258 | static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, |
---|
254 | 259 | unsigned long *pe_bitmap) |
---|
255 | 260 | { |
---|
256 | | - struct pci_controller *hose = pci_bus_to_host(pdev->bus); |
---|
257 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 261 | + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); |
---|
258 | 262 | struct resource *r; |
---|
259 | 263 | resource_size_t base, sgsz, start, end; |
---|
260 | 264 | int segno, i; |
---|
.. | .. |
---|
266 | 270 | if (!r->parent || !pnv_pci_is_m64(phb, r)) |
---|
267 | 271 | continue; |
---|
268 | 272 | |
---|
269 | | - start = _ALIGN_DOWN(r->start - base, sgsz); |
---|
270 | | - end = _ALIGN_UP(r->end - base, sgsz); |
---|
| 273 | + start = ALIGN_DOWN(r->start - base, sgsz); |
---|
| 274 | + end = ALIGN(r->end - base, sgsz); |
---|
271 | 275 | for (segno = start / sgsz; segno < end / sgsz; segno++) { |
---|
272 | 276 | if (pe_bitmap) |
---|
273 | 277 | set_bit(segno, pe_bitmap); |
---|
.. | .. |
---|
312 | 316 | } |
---|
313 | 317 | } |
---|
314 | 318 | |
---|
| 319 | + for (index = 0; index < phb->ioda.total_pe_num; index++) { |
---|
| 320 | + int64_t rc; |
---|
| 321 | + |
---|
| 322 | + /* |
---|
| 323 | + * P7IOC supports M64DT, which helps mapping M64 segment |
---|
| 324 | + * to one particular PE#. However, PHB3 has fixed mapping |
---|
| 325 | + * between M64 segment and PE#. In order to have same logic |
---|
| 326 | + * for P7IOC and PHB3, we enforce fixed mapping between M64 |
---|
| 327 | + * segment and PE# on P7IOC. |
---|
| 328 | + */ |
---|
| 329 | + rc = opal_pci_map_pe_mmio_window(phb->opal_id, |
---|
| 330 | + index, OPAL_M64_WINDOW_TYPE, |
---|
| 331 | + index / PNV_IODA1_M64_SEGS, |
---|
| 332 | + index % PNV_IODA1_M64_SEGS); |
---|
| 333 | + if (rc != OPAL_SUCCESS) { |
---|
| 334 | + pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", |
---|
| 335 | + __func__, rc, phb->hose->global_number, |
---|
| 336 | + index); |
---|
| 337 | + goto fail; |
---|
| 338 | + } |
---|
| 339 | + } |
---|
| 340 | + |
---|
315 | 341 | /* |
---|
316 | 342 | * Exclude the segments for reserved and root bus PE, which |
---|
317 | 343 | * are first or last two PEs. |
---|
.. | .. |
---|
352 | 378 | |
---|
353 | 379 | static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) |
---|
354 | 380 | { |
---|
355 | | - struct pci_controller *hose = pci_bus_to_host(bus); |
---|
356 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 381 | + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); |
---|
357 | 382 | struct pnv_ioda_pe *master_pe, *pe; |
---|
358 | 383 | unsigned long size, *pe_alloc; |
---|
359 | 384 | int i; |
---|
.. | .. |
---|
363 | 388 | return NULL; |
---|
364 | 389 | |
---|
365 | 390 | /* Allocate bitmap */ |
---|
366 | | - size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); |
---|
| 391 | + size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); |
---|
367 | 392 | pe_alloc = kzalloc(size, GFP_KERNEL); |
---|
368 | 393 | if (!pe_alloc) { |
---|
369 | 394 | pr_warn("%s: Out of memory !\n", |
---|
.. | .. |
---|
403 | 428 | pe->flags |= PNV_IODA_PE_SLAVE; |
---|
404 | 429 | pe->master = master_pe; |
---|
405 | 430 | list_add_tail(&pe->list, &master_pe->slaves); |
---|
406 | | - } |
---|
407 | | - |
---|
408 | | - /* |
---|
409 | | - * P7IOC supports M64DT, which helps mapping M64 segment |
---|
410 | | - * to one particular PE#. However, PHB3 has fixed mapping |
---|
411 | | - * between M64 segment and PE#. In order to have same logic |
---|
412 | | - * for P7IOC and PHB3, we enforce fixed mapping between M64 |
---|
413 | | - * segment and PE# on P7IOC. |
---|
414 | | - */ |
---|
415 | | - if (phb->type == PNV_PHB_IODA1) { |
---|
416 | | - int64_t rc; |
---|
417 | | - |
---|
418 | | - rc = opal_pci_map_pe_mmio_window(phb->opal_id, |
---|
419 | | - pe->pe_number, OPAL_M64_WINDOW_TYPE, |
---|
420 | | - pe->pe_number / PNV_IODA1_M64_SEGS, |
---|
421 | | - pe->pe_number % PNV_IODA1_M64_SEGS); |
---|
422 | | - if (rc != OPAL_SUCCESS) |
---|
423 | | - pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", |
---|
424 | | - __func__, rc, phb->hose->global_number, |
---|
425 | | - pe->pe_number); |
---|
426 | 431 | } |
---|
427 | 432 | } |
---|
428 | 433 | |
---|
.. | .. |
---|
518 | 523 | phb->init_m64 = pnv_ioda1_init_m64; |
---|
519 | 524 | else |
---|
520 | 525 | phb->init_m64 = pnv_ioda2_init_m64; |
---|
521 | | - phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe; |
---|
522 | | - phb->pick_m64_pe = pnv_ioda_pick_m64_pe; |
---|
523 | 526 | } |
---|
524 | 527 | |
---|
525 | 528 | static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) |
---|
.. | .. |
---|
664 | 667 | return state; |
---|
665 | 668 | } |
---|
666 | 669 | |
---|
667 | | -/* Currently those 2 are only used when MSIs are enabled, this will change |
---|
668 | | - * but in the meantime, we need to protect them to avoid warnings |
---|
669 | | - */ |
---|
670 | | -#ifdef CONFIG_PCI_MSI |
---|
| 670 | +struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn) |
---|
| 671 | +{ |
---|
| 672 | + int pe_number = phb->ioda.pe_rmap[bdfn]; |
---|
| 673 | + |
---|
| 674 | + if (pe_number == IODA_INVALID_PE) |
---|
| 675 | + return NULL; |
---|
| 676 | + |
---|
| 677 | + return &phb->ioda.pe_array[pe_number]; |
---|
| 678 | +} |
---|
| 679 | + |
---|
671 | 680 | struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) |
---|
672 | 681 | { |
---|
673 | | - struct pci_controller *hose = pci_bus_to_host(dev->bus); |
---|
674 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 682 | + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); |
---|
675 | 683 | struct pci_dn *pdn = pci_get_pdn(dev); |
---|
676 | 684 | |
---|
677 | 685 | if (!pdn) |
---|
.. | .. |
---|
680 | 688 | return NULL; |
---|
681 | 689 | return &phb->ioda.pe_array[pdn->pe_number]; |
---|
682 | 690 | } |
---|
683 | | -#endif /* CONFIG_PCI_MSI */ |
---|
684 | 691 | |
---|
685 | 692 | static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, |
---|
686 | 693 | struct pnv_ioda_pe *parent, |
---|
.. | .. |
---|
786 | 793 | return 0; |
---|
787 | 794 | } |
---|
788 | 795 | |
---|
789 | | -static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) |
---|
| 796 | +static void pnv_ioda_unset_peltv(struct pnv_phb *phb, |
---|
| 797 | + struct pnv_ioda_pe *pe, |
---|
| 798 | + struct pci_dev *parent) |
---|
| 799 | +{ |
---|
| 800 | + int64_t rc; |
---|
| 801 | + |
---|
| 802 | + while (parent) { |
---|
| 803 | + struct pci_dn *pdn = pci_get_pdn(parent); |
---|
| 804 | + |
---|
| 805 | + if (pdn && pdn->pe_number != IODA_INVALID_PE) { |
---|
| 806 | + rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, |
---|
| 807 | + pe->pe_number, |
---|
| 808 | + OPAL_REMOVE_PE_FROM_DOMAIN); |
---|
| 809 | + /* XXX What to do in case of error ? */ |
---|
| 810 | + } |
---|
| 811 | + parent = parent->bus->self; |
---|
| 812 | + } |
---|
| 813 | + |
---|
| 814 | + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, |
---|
| 815 | + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); |
---|
| 816 | + |
---|
| 817 | + /* Disassociate PE in PELT */ |
---|
| 818 | + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, |
---|
| 819 | + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); |
---|
| 820 | + if (rc) |
---|
| 821 | + pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); |
---|
| 822 | +} |
---|
| 823 | + |
---|
| 824 | +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) |
---|
790 | 825 | { |
---|
791 | 826 | struct pci_dev *parent; |
---|
792 | 827 | uint8_t bcomp, dcomp, fcomp; |
---|
.. | .. |
---|
801 | 836 | fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; |
---|
802 | 837 | parent = pe->pbus->self; |
---|
803 | 838 | if (pe->flags & PNV_IODA_PE_BUS_ALL) |
---|
804 | | - count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; |
---|
| 839 | + count = resource_size(&pe->pbus->busn_res); |
---|
805 | 840 | else |
---|
806 | 841 | count = 1; |
---|
807 | 842 | |
---|
.. | .. |
---|
836 | 871 | for (rid = pe->rid; rid < rid_end; rid++) |
---|
837 | 872 | phb->ioda.pe_rmap[rid] = IODA_INVALID_PE; |
---|
838 | 873 | |
---|
839 | | - /* Release from all parents PELT-V */ |
---|
840 | | - while (parent) { |
---|
841 | | - struct pci_dn *pdn = pci_get_pdn(parent); |
---|
842 | | - if (pdn && pdn->pe_number != IODA_INVALID_PE) { |
---|
843 | | - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, |
---|
844 | | - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); |
---|
845 | | - /* XXX What to do in case of error ? */ |
---|
846 | | - } |
---|
847 | | - parent = parent->bus->self; |
---|
848 | | - } |
---|
| 874 | + /* |
---|
| 875 | + * Release from all parents PELT-V. NPUs don't have a PELTV |
---|
| 876 | + * table |
---|
| 877 | + */ |
---|
| 878 | + if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) |
---|
| 879 | + pnv_ioda_unset_peltv(phb, pe, parent); |
---|
849 | 880 | |
---|
850 | | - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, |
---|
851 | | - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); |
---|
852 | | - |
---|
853 | | - /* Disassociate PE in PELT */ |
---|
854 | | - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, |
---|
855 | | - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); |
---|
856 | | - if (rc) |
---|
857 | | - pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc); |
---|
858 | 881 | rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, |
---|
859 | 882 | bcomp, dcomp, fcomp, OPAL_UNMAP_PE); |
---|
860 | 883 | if (rc) |
---|
861 | | - pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); |
---|
| 884 | + pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc); |
---|
862 | 885 | |
---|
863 | 886 | pe->pbus = NULL; |
---|
864 | 887 | pe->pdev = NULL; |
---|
.. | .. |
---|
869 | 892 | return 0; |
---|
870 | 893 | } |
---|
871 | 894 | |
---|
872 | | -static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) |
---|
| 895 | +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) |
---|
873 | 896 | { |
---|
874 | | - struct pci_dev *parent; |
---|
875 | 897 | uint8_t bcomp, dcomp, fcomp; |
---|
876 | 898 | long rc, rid_end, rid; |
---|
877 | 899 | |
---|
.. | .. |
---|
881 | 903 | |
---|
882 | 904 | dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; |
---|
883 | 905 | fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; |
---|
884 | | - parent = pe->pbus->self; |
---|
885 | 906 | if (pe->flags & PNV_IODA_PE_BUS_ALL) |
---|
886 | | - count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; |
---|
| 907 | + count = resource_size(&pe->pbus->busn_res); |
---|
887 | 908 | else |
---|
888 | 909 | count = 1; |
---|
889 | 910 | |
---|
.. | .. |
---|
902 | 923 | } |
---|
903 | 924 | rid_end = pe->rid + (count << 8); |
---|
904 | 925 | } else { |
---|
905 | | -#ifdef CONFIG_PCI_IOV |
---|
906 | | - if (pe->flags & PNV_IODA_PE_VF) |
---|
907 | | - parent = pe->parent_dev; |
---|
908 | | - else |
---|
909 | | -#endif /* CONFIG_PCI_IOV */ |
---|
910 | | - parent = pe->pdev->bus->self; |
---|
911 | 926 | bcomp = OpalPciBusAll; |
---|
912 | 927 | dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; |
---|
913 | 928 | fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; |
---|
.. | .. |
---|
964 | 979 | return 0; |
---|
965 | 980 | } |
---|
966 | 981 | |
---|
967 | | -#ifdef CONFIG_PCI_IOV |
---|
968 | | -static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) |
---|
969 | | -{ |
---|
970 | | - struct pci_dn *pdn = pci_get_pdn(dev); |
---|
971 | | - int i; |
---|
972 | | - struct resource *res, res2; |
---|
973 | | - resource_size_t size; |
---|
974 | | - u16 num_vfs; |
---|
975 | | - |
---|
976 | | - if (!dev->is_physfn) |
---|
977 | | - return -EINVAL; |
---|
978 | | - |
---|
979 | | - /* |
---|
980 | | - * "offset" is in VFs. The M64 windows are sized so that when they |
---|
981 | | - * are segmented, each segment is the same size as the IOV BAR. |
---|
982 | | - * Each segment is in a separate PE, and the high order bits of the |
---|
983 | | - * address are the PE number. Therefore, each VF's BAR is in a |
---|
984 | | - * separate PE, and changing the IOV BAR start address changes the |
---|
985 | | - * range of PEs the VFs are in. |
---|
986 | | - */ |
---|
987 | | - num_vfs = pdn->num_vfs; |
---|
988 | | - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { |
---|
989 | | - res = &dev->resource[i + PCI_IOV_RESOURCES]; |
---|
990 | | - if (!res->flags || !res->parent) |
---|
991 | | - continue; |
---|
992 | | - |
---|
993 | | - /* |
---|
994 | | - * The actual IOV BAR range is determined by the start address |
---|
995 | | - * and the actual size for num_vfs VFs BAR. This check is to |
---|
996 | | - * make sure that after shifting, the range will not overlap |
---|
997 | | - * with another device. |
---|
998 | | - */ |
---|
999 | | - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); |
---|
1000 | | - res2.flags = res->flags; |
---|
1001 | | - res2.start = res->start + (size * offset); |
---|
1002 | | - res2.end = res2.start + (size * num_vfs) - 1; |
---|
1003 | | - |
---|
1004 | | - if (res2.end > res->end) { |
---|
1005 | | - dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", |
---|
1006 | | - i, &res2, res, num_vfs, offset); |
---|
1007 | | - return -EBUSY; |
---|
1008 | | - } |
---|
1009 | | - } |
---|
1010 | | - |
---|
1011 | | - /* |
---|
1012 | | - * Since M64 BAR shares segments among all possible 256 PEs, |
---|
1013 | | - * we have to shift the beginning of PF IOV BAR to make it start from |
---|
1014 | | - * the segment which belongs to the PE number assigned to the first VF. |
---|
1015 | | - * This creates a "hole" in the /proc/iomem which could be used for |
---|
1016 | | - * allocating other resources so we reserve this area below and |
---|
1017 | | - * release when IOV is released. |
---|
1018 | | - */ |
---|
1019 | | - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { |
---|
1020 | | - res = &dev->resource[i + PCI_IOV_RESOURCES]; |
---|
1021 | | - if (!res->flags || !res->parent) |
---|
1022 | | - continue; |
---|
1023 | | - |
---|
1024 | | - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); |
---|
1025 | | - res2 = *res; |
---|
1026 | | - res->start += size * offset; |
---|
1027 | | - |
---|
1028 | | - dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", |
---|
1029 | | - i, &res2, res, (offset > 0) ? "En" : "Dis", |
---|
1030 | | - num_vfs, offset); |
---|
1031 | | - |
---|
1032 | | - if (offset < 0) { |
---|
1033 | | - devm_release_resource(&dev->dev, &pdn->holes[i]); |
---|
1034 | | - memset(&pdn->holes[i], 0, sizeof(pdn->holes[i])); |
---|
1035 | | - } |
---|
1036 | | - |
---|
1037 | | - pci_update_resource(dev, i + PCI_IOV_RESOURCES); |
---|
1038 | | - |
---|
1039 | | - if (offset > 0) { |
---|
1040 | | - pdn->holes[i].start = res2.start; |
---|
1041 | | - pdn->holes[i].end = res2.start + size * offset - 1; |
---|
1042 | | - pdn->holes[i].flags = IORESOURCE_BUS; |
---|
1043 | | - pdn->holes[i].name = "pnv_iov_reserved"; |
---|
1044 | | - devm_request_resource(&dev->dev, res->parent, |
---|
1045 | | - &pdn->holes[i]); |
---|
1046 | | - } |
---|
1047 | | - } |
---|
1048 | | - return 0; |
---|
1049 | | -} |
---|
1050 | | -#endif /* CONFIG_PCI_IOV */ |
---|
1051 | | - |
---|
1052 | 982 | static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) |
---|
1053 | 983 | { |
---|
1054 | | - struct pci_controller *hose = pci_bus_to_host(dev->bus); |
---|
1055 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 984 | + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); |
---|
1056 | 985 | struct pci_dn *pdn = pci_get_pdn(dev); |
---|
1057 | 986 | struct pnv_ioda_pe *pe; |
---|
1058 | 987 | |
---|
.. | .. |
---|
1064 | 993 | if (pdn->pe_number != IODA_INVALID_PE) |
---|
1065 | 994 | return NULL; |
---|
1066 | 995 | |
---|
1067 | | - pe = pnv_ioda_alloc_pe(phb); |
---|
| 996 | + pe = pnv_ioda_alloc_pe(phb, 1); |
---|
1068 | 997 | if (!pe) { |
---|
1069 | 998 | pr_warn("%s: Not enough PE# available, disabling device\n", |
---|
1070 | 999 | pci_name(dev)); |
---|
1071 | 1000 | return NULL; |
---|
1072 | 1001 | } |
---|
1073 | 1002 | |
---|
1074 | | - /* NOTE: We get only one ref to the pci_dev for the pdn, not for the |
---|
1075 | | - * pointer in the PE data structure, both should be destroyed at the |
---|
1076 | | - * same time. However, this needs to be looked at more closely again |
---|
1077 | | - * once we actually start removing things (Hotplug, SR-IOV, ...) |
---|
| 1003 | + /* NOTE: We don't get a reference for the pointer in the PE |
---|
| 1004 | + * data structure, both the device and PE structures should be |
---|
| 1005 | + * destroyed at the same time. However, removing nvlink |
---|
| 1006 | + * devices will need some work. |
---|
1078 | 1007 | * |
---|
1079 | 1008 | * At some point we want to remove the PDN completely anyways |
---|
1080 | 1009 | */ |
---|
1081 | | - pci_dev_get(dev); |
---|
1082 | 1010 | pdn->pe_number = pe->pe_number; |
---|
1083 | 1011 | pe->flags = PNV_IODA_PE_DEV; |
---|
1084 | 1012 | pe->pdev = dev; |
---|
1085 | 1013 | pe->pbus = NULL; |
---|
1086 | 1014 | pe->mve_number = -1; |
---|
1087 | 1015 | pe->rid = dev->bus->number << 8 | pdn->devfn; |
---|
| 1016 | + pe->device_count++; |
---|
1088 | 1017 | |
---|
1089 | 1018 | pe_info(pe, "Associated device to PE\n"); |
---|
1090 | 1019 | |
---|
.. | .. |
---|
1093 | 1022 | pnv_ioda_free_pe(pe); |
---|
1094 | 1023 | pdn->pe_number = IODA_INVALID_PE; |
---|
1095 | 1024 | pe->pdev = NULL; |
---|
1096 | | - pci_dev_put(dev); |
---|
1097 | 1025 | return NULL; |
---|
1098 | 1026 | } |
---|
1099 | 1027 | |
---|
1100 | 1028 | /* Put PE to the list */ |
---|
| 1029 | + mutex_lock(&phb->ioda.pe_list_mutex); |
---|
1101 | 1030 | list_add_tail(&pe->list, &phb->ioda.pe_list); |
---|
1102 | | - |
---|
| 1031 | + mutex_unlock(&phb->ioda.pe_list_mutex); |
---|
1103 | 1032 | return pe; |
---|
1104 | | -} |
---|
1105 | | - |
---|
1106 | | -static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) |
---|
1107 | | -{ |
---|
1108 | | - struct pci_dev *dev; |
---|
1109 | | - |
---|
1110 | | - list_for_each_entry(dev, &bus->devices, bus_list) { |
---|
1111 | | - struct pci_dn *pdn = pci_get_pdn(dev); |
---|
1112 | | - |
---|
1113 | | - if (pdn == NULL) { |
---|
1114 | | - pr_warn("%s: No device node associated with device !\n", |
---|
1115 | | - pci_name(dev)); |
---|
1116 | | - continue; |
---|
1117 | | - } |
---|
1118 | | - |
---|
1119 | | - /* |
---|
1120 | | - * In partial hotplug case, the PCI device might be still |
---|
1121 | | - * associated with the PE and needn't attach it to the PE |
---|
1122 | | - * again. |
---|
1123 | | - */ |
---|
1124 | | - if (pdn->pe_number != IODA_INVALID_PE) |
---|
1125 | | - continue; |
---|
1126 | | - |
---|
1127 | | - pe->device_count++; |
---|
1128 | | - pdn->pe_number = pe->pe_number; |
---|
1129 | | - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) |
---|
1130 | | - pnv_ioda_setup_same_PE(dev->subordinate, pe); |
---|
1131 | | - } |
---|
1132 | 1033 | } |
---|
1133 | 1034 | |
---|
1134 | 1035 | /* |
---|
.. | .. |
---|
1139 | 1040 | */ |
---|
1140 | 1041 | static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) |
---|
1141 | 1042 | { |
---|
1142 | | - struct pci_controller *hose = pci_bus_to_host(bus); |
---|
1143 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 1043 | + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); |
---|
1144 | 1044 | struct pnv_ioda_pe *pe = NULL; |
---|
1145 | 1045 | unsigned int pe_num; |
---|
1146 | 1046 | |
---|
.. | .. |
---|
1149 | 1049 | * We should reuse it instead of allocating a new one. |
---|
1150 | 1050 | */ |
---|
1151 | 1051 | pe_num = phb->ioda.pe_rmap[bus->number << 8]; |
---|
1152 | | - if (pe_num != IODA_INVALID_PE) { |
---|
| 1052 | + if (WARN_ON(pe_num != IODA_INVALID_PE)) { |
---|
1153 | 1053 | pe = &phb->ioda.pe_array[pe_num]; |
---|
1154 | | - pnv_ioda_setup_same_PE(bus, pe); |
---|
1155 | 1054 | return NULL; |
---|
1156 | 1055 | } |
---|
1157 | 1056 | |
---|
1158 | 1057 | /* PE number for root bus should have been reserved */ |
---|
1159 | | - if (pci_is_root_bus(bus) && |
---|
1160 | | - phb->ioda.root_pe_idx != IODA_INVALID_PE) |
---|
| 1058 | + if (pci_is_root_bus(bus)) |
---|
1161 | 1059 | pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx]; |
---|
1162 | 1060 | |
---|
1163 | 1061 | /* Check if PE is determined by M64 */ |
---|
1164 | | - if (!pe && phb->pick_m64_pe) |
---|
1165 | | - pe = phb->pick_m64_pe(bus, all); |
---|
| 1062 | + if (!pe) |
---|
| 1063 | + pe = pnv_ioda_pick_m64_pe(bus, all); |
---|
1166 | 1064 | |
---|
1167 | 1065 | /* The PE number isn't pinned by M64 */ |
---|
1168 | 1066 | if (!pe) |
---|
1169 | | - pe = pnv_ioda_alloc_pe(phb); |
---|
| 1067 | + pe = pnv_ioda_alloc_pe(phb, 1); |
---|
1170 | 1068 | |
---|
1171 | 1069 | if (!pe) { |
---|
1172 | 1070 | pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", |
---|
.. | .. |
---|
1181 | 1079 | pe->rid = bus->busn_res.start << 8; |
---|
1182 | 1080 | |
---|
1183 | 1081 | if (all) |
---|
1184 | | - pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n", |
---|
1185 | | - bus->busn_res.start, bus->busn_res.end, pe->pe_number); |
---|
| 1082 | + pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n", |
---|
| 1083 | + &bus->busn_res.start, &bus->busn_res.end, |
---|
| 1084 | + pe->pe_number); |
---|
1186 | 1085 | else |
---|
1187 | | - pe_info(pe, "Secondary bus %d associated with PE#%x\n", |
---|
1188 | | - bus->busn_res.start, pe->pe_number); |
---|
| 1086 | + pe_info(pe, "Secondary bus %pad associated with PE#%x\n", |
---|
| 1087 | + &bus->busn_res.start, pe->pe_number); |
---|
1189 | 1088 | |
---|
1190 | 1089 | if (pnv_ioda_configure_pe(phb, pe)) { |
---|
1191 | 1090 | /* XXX What do we do here ? */ |
---|
.. | .. |
---|
1193 | 1092 | pe->pbus = NULL; |
---|
1194 | 1093 | return NULL; |
---|
1195 | 1094 | } |
---|
1196 | | - |
---|
1197 | | - /* Associate it with all child devices */ |
---|
1198 | | - pnv_ioda_setup_same_PE(bus, pe); |
---|
1199 | 1095 | |
---|
1200 | 1096 | /* Put PE to the list */ |
---|
1201 | 1097 | list_add_tail(&pe->list, &phb->ioda.pe_list); |
---|
.. | .. |
---|
1210 | 1106 | struct pnv_ioda_pe *pe; |
---|
1211 | 1107 | struct pci_dev *gpu_pdev; |
---|
1212 | 1108 | struct pci_dn *npu_pdn; |
---|
1213 | | - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); |
---|
1214 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 1109 | + struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus); |
---|
| 1110 | + |
---|
| 1111 | + /* |
---|
| 1112 | + * Intentionally leak a reference on the npu device (for |
---|
| 1113 | + * nvlink only; this is not an opencapi path) to make sure it |
---|
| 1114 | + * never goes away, as it's been the case all along and some |
---|
| 1115 | + * work is needed otherwise. |
---|
| 1116 | + */ |
---|
| 1117 | + pci_dev_get(npu_pdev); |
---|
1215 | 1118 | |
---|
1216 | 1119 | /* |
---|
1217 | 1120 | * Due to a hardware errata PE#0 on the NPU is reserved for |
---|
.. | .. |
---|
1236 | 1139 | */ |
---|
1237 | 1140 | dev_info(&npu_pdev->dev, |
---|
1238 | 1141 | "Associating to existing PE %x\n", pe_num); |
---|
1239 | | - pci_dev_get(npu_pdev); |
---|
1240 | 1142 | npu_pdn = pci_get_pdn(npu_pdev); |
---|
1241 | 1143 | rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; |
---|
1242 | 1144 | npu_pdn->pe_number = pe_num; |
---|
1243 | 1145 | phb->ioda.pe_rmap[rid] = pe->pe_number; |
---|
| 1146 | + pe->device_count++; |
---|
1244 | 1147 | |
---|
1245 | 1148 | /* Map the PE to this link */ |
---|
1246 | 1149 | rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, |
---|
.. | .. |
---|
1272 | 1175 | pnv_ioda_setup_npu_PE(pdev); |
---|
1273 | 1176 | } |
---|
1274 | 1177 | |
---|
1275 | | -static void pnv_pci_ioda_setup_PEs(void) |
---|
| 1178 | +static void pnv_pci_ioda_setup_nvlink(void) |
---|
1276 | 1179 | { |
---|
1277 | | - struct pci_controller *hose, *tmp; |
---|
| 1180 | + struct pci_controller *hose; |
---|
1278 | 1181 | struct pnv_phb *phb; |
---|
1279 | | - struct pci_bus *bus; |
---|
1280 | | - struct pci_dev *pdev; |
---|
| 1182 | + struct pnv_ioda_pe *pe; |
---|
1281 | 1183 | |
---|
1282 | | - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { |
---|
| 1184 | + list_for_each_entry(hose, &hose_list, list_node) { |
---|
1283 | 1185 | phb = hose->private_data; |
---|
1284 | 1186 | if (phb->type == PNV_PHB_NPU_NVLINK) { |
---|
1285 | 1187 | /* PE#0 is needed for error reporting */ |
---|
1286 | 1188 | pnv_ioda_reserve_pe(phb, 0); |
---|
1287 | 1189 | pnv_ioda_setup_npu_PEs(hose->bus); |
---|
1288 | 1190 | if (phb->model == PNV_PHB_MODEL_NPU2) |
---|
1289 | | - pnv_npu2_init(phb); |
---|
1290 | | - } |
---|
1291 | | - if (phb->type == PNV_PHB_NPU_OCAPI) { |
---|
1292 | | - bus = hose->bus; |
---|
1293 | | - list_for_each_entry(pdev, &bus->devices, bus_list) |
---|
1294 | | - pnv_ioda_setup_dev_PE(pdev); |
---|
| 1191 | + WARN_ON_ONCE(pnv_npu2_init(hose)); |
---|
1295 | 1192 | } |
---|
1296 | 1193 | } |
---|
1297 | | -} |
---|
1298 | | - |
---|
1299 | | -#ifdef CONFIG_PCI_IOV |
---|
1300 | | -static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) |
---|
1301 | | -{ |
---|
1302 | | - struct pci_bus *bus; |
---|
1303 | | - struct pci_controller *hose; |
---|
1304 | | - struct pnv_phb *phb; |
---|
1305 | | - struct pci_dn *pdn; |
---|
1306 | | - int i, j; |
---|
1307 | | - int m64_bars; |
---|
1308 | | - |
---|
1309 | | - bus = pdev->bus; |
---|
1310 | | - hose = pci_bus_to_host(bus); |
---|
1311 | | - phb = hose->private_data; |
---|
1312 | | - pdn = pci_get_pdn(pdev); |
---|
1313 | | - |
---|
1314 | | - if (pdn->m64_single_mode) |
---|
1315 | | - m64_bars = num_vfs; |
---|
1316 | | - else |
---|
1317 | | - m64_bars = 1; |
---|
1318 | | - |
---|
1319 | | - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) |
---|
1320 | | - for (j = 0; j < m64_bars; j++) { |
---|
1321 | | - if (pdn->m64_map[j][i] == IODA_INVALID_M64) |
---|
1322 | | - continue; |
---|
1323 | | - opal_pci_phb_mmio_enable(phb->opal_id, |
---|
1324 | | - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); |
---|
1325 | | - clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); |
---|
1326 | | - pdn->m64_map[j][i] = IODA_INVALID_M64; |
---|
1327 | | - } |
---|
1328 | | - |
---|
1329 | | - kfree(pdn->m64_map); |
---|
1330 | | - return 0; |
---|
1331 | | -} |
---|
1332 | | - |
---|
1333 | | -static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) |
---|
1334 | | -{ |
---|
1335 | | - struct pci_bus *bus; |
---|
1336 | | - struct pci_controller *hose; |
---|
1337 | | - struct pnv_phb *phb; |
---|
1338 | | - struct pci_dn *pdn; |
---|
1339 | | - unsigned int win; |
---|
1340 | | - struct resource *res; |
---|
1341 | | - int i, j; |
---|
1342 | | - int64_t rc; |
---|
1343 | | - int total_vfs; |
---|
1344 | | - resource_size_t size, start; |
---|
1345 | | - int pe_num; |
---|
1346 | | - int m64_bars; |
---|
1347 | | - |
---|
1348 | | - bus = pdev->bus; |
---|
1349 | | - hose = pci_bus_to_host(bus); |
---|
1350 | | - phb = hose->private_data; |
---|
1351 | | - pdn = pci_get_pdn(pdev); |
---|
1352 | | - total_vfs = pci_sriov_get_totalvfs(pdev); |
---|
1353 | | - |
---|
1354 | | - if (pdn->m64_single_mode) |
---|
1355 | | - m64_bars = num_vfs; |
---|
1356 | | - else |
---|
1357 | | - m64_bars = 1; |
---|
1358 | | - |
---|
1359 | | - pdn->m64_map = kmalloc_array(m64_bars, |
---|
1360 | | - sizeof(*pdn->m64_map), |
---|
1361 | | - GFP_KERNEL); |
---|
1362 | | - if (!pdn->m64_map) |
---|
1363 | | - return -ENOMEM; |
---|
1364 | | - /* Initialize the m64_map to IODA_INVALID_M64 */ |
---|
1365 | | - for (i = 0; i < m64_bars ; i++) |
---|
1366 | | - for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) |
---|
1367 | | - pdn->m64_map[i][j] = IODA_INVALID_M64; |
---|
1368 | | - |
---|
1369 | | - |
---|
1370 | | - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { |
---|
1371 | | - res = &pdev->resource[i + PCI_IOV_RESOURCES]; |
---|
1372 | | - if (!res->flags || !res->parent) |
---|
| 1194 | + list_for_each_entry(hose, &hose_list, list_node) { |
---|
| 1195 | + phb = hose->private_data; |
---|
| 1196 | + if (phb->type != PNV_PHB_IODA2) |
---|
1373 | 1197 | continue; |
---|
1374 | 1198 | |
---|
1375 | | - for (j = 0; j < m64_bars; j++) { |
---|
1376 | | - do { |
---|
1377 | | - win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, |
---|
1378 | | - phb->ioda.m64_bar_idx + 1, 0); |
---|
1379 | | - |
---|
1380 | | - if (win >= phb->ioda.m64_bar_idx + 1) |
---|
1381 | | - goto m64_failed; |
---|
1382 | | - } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); |
---|
1383 | | - |
---|
1384 | | - pdn->m64_map[j][i] = win; |
---|
1385 | | - |
---|
1386 | | - if (pdn->m64_single_mode) { |
---|
1387 | | - size = pci_iov_resource_size(pdev, |
---|
1388 | | - PCI_IOV_RESOURCES + i); |
---|
1389 | | - start = res->start + size * j; |
---|
1390 | | - } else { |
---|
1391 | | - size = resource_size(res); |
---|
1392 | | - start = res->start; |
---|
1393 | | - } |
---|
1394 | | - |
---|
1395 | | - /* Map the M64 here */ |
---|
1396 | | - if (pdn->m64_single_mode) { |
---|
1397 | | - pe_num = pdn->pe_num_map[j]; |
---|
1398 | | - rc = opal_pci_map_pe_mmio_window(phb->opal_id, |
---|
1399 | | - pe_num, OPAL_M64_WINDOW_TYPE, |
---|
1400 | | - pdn->m64_map[j][i], 0); |
---|
1401 | | - } |
---|
1402 | | - |
---|
1403 | | - rc = opal_pci_set_phb_mem_window(phb->opal_id, |
---|
1404 | | - OPAL_M64_WINDOW_TYPE, |
---|
1405 | | - pdn->m64_map[j][i], |
---|
1406 | | - start, |
---|
1407 | | - 0, /* unused */ |
---|
1408 | | - size); |
---|
1409 | | - |
---|
1410 | | - |
---|
1411 | | - if (rc != OPAL_SUCCESS) { |
---|
1412 | | - dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n", |
---|
1413 | | - win, rc); |
---|
1414 | | - goto m64_failed; |
---|
1415 | | - } |
---|
1416 | | - |
---|
1417 | | - if (pdn->m64_single_mode) |
---|
1418 | | - rc = opal_pci_phb_mmio_enable(phb->opal_id, |
---|
1419 | | - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); |
---|
1420 | | - else |
---|
1421 | | - rc = opal_pci_phb_mmio_enable(phb->opal_id, |
---|
1422 | | - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); |
---|
1423 | | - |
---|
1424 | | - if (rc != OPAL_SUCCESS) { |
---|
1425 | | - dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", |
---|
1426 | | - win, rc); |
---|
1427 | | - goto m64_failed; |
---|
1428 | | - } |
---|
1429 | | - } |
---|
| 1199 | + list_for_each_entry(pe, &phb->ioda.pe_list, list) |
---|
| 1200 | + pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV); |
---|
1430 | 1201 | } |
---|
1431 | | - return 0; |
---|
1432 | 1202 | |
---|
1433 | | -m64_failed: |
---|
1434 | | - pnv_pci_vf_release_m64(pdev, num_vfs); |
---|
1435 | | - return -EBUSY; |
---|
| 1203 | +#ifdef CONFIG_IOMMU_API |
---|
| 1204 | + /* setup iommu groups so we can do nvlink pass-thru */ |
---|
| 1205 | + pnv_pci_npu_setup_iommu_groups(); |
---|
| 1206 | +#endif |
---|
1436 | 1207 | } |
---|
1437 | 1208 | |
---|
1438 | | -static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, |
---|
1439 | | - int num); |
---|
1440 | | - |
---|
1441 | | -static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) |
---|
1442 | | -{ |
---|
1443 | | - struct iommu_table *tbl; |
---|
1444 | | - int64_t rc; |
---|
1445 | | - |
---|
1446 | | - tbl = pe->table_group.tables[0]; |
---|
1447 | | - rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); |
---|
1448 | | - if (rc) |
---|
1449 | | - pe_warn(pe, "OPAL error %ld release DMA window\n", rc); |
---|
1450 | | - |
---|
1451 | | - pnv_pci_ioda2_set_bypass(pe, false); |
---|
1452 | | - if (pe->table_group.group) { |
---|
1453 | | - iommu_group_put(pe->table_group.group); |
---|
1454 | | - BUG_ON(pe->table_group.group); |
---|
1455 | | - } |
---|
1456 | | - iommu_tce_table_put(tbl); |
---|
1457 | | -} |
---|
1458 | | - |
---|
1459 | | -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) |
---|
1460 | | -{ |
---|
1461 | | - struct pci_bus *bus; |
---|
1462 | | - struct pci_controller *hose; |
---|
1463 | | - struct pnv_phb *phb; |
---|
1464 | | - struct pnv_ioda_pe *pe, *pe_n; |
---|
1465 | | - struct pci_dn *pdn; |
---|
1466 | | - |
---|
1467 | | - bus = pdev->bus; |
---|
1468 | | - hose = pci_bus_to_host(bus); |
---|
1469 | | - phb = hose->private_data; |
---|
1470 | | - pdn = pci_get_pdn(pdev); |
---|
1471 | | - |
---|
1472 | | - if (!pdev->is_physfn) |
---|
1473 | | - return; |
---|
1474 | | - |
---|
1475 | | - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { |
---|
1476 | | - if (pe->parent_dev != pdev) |
---|
1477 | | - continue; |
---|
1478 | | - |
---|
1479 | | - pnv_pci_ioda2_release_dma_pe(pdev, pe); |
---|
1480 | | - |
---|
1481 | | - /* Remove from list */ |
---|
1482 | | - mutex_lock(&phb->ioda.pe_list_mutex); |
---|
1483 | | - list_del(&pe->list); |
---|
1484 | | - mutex_unlock(&phb->ioda.pe_list_mutex); |
---|
1485 | | - |
---|
1486 | | - pnv_ioda_deconfigure_pe(phb, pe); |
---|
1487 | | - |
---|
1488 | | - pnv_ioda_free_pe(pe); |
---|
1489 | | - } |
---|
1490 | | -} |
---|
1491 | | - |
---|
1492 | | -void pnv_pci_sriov_disable(struct pci_dev *pdev) |
---|
1493 | | -{ |
---|
1494 | | - struct pci_bus *bus; |
---|
1495 | | - struct pci_controller *hose; |
---|
1496 | | - struct pnv_phb *phb; |
---|
1497 | | - struct pnv_ioda_pe *pe; |
---|
1498 | | - struct pci_dn *pdn; |
---|
1499 | | - u16 num_vfs, i; |
---|
1500 | | - |
---|
1501 | | - bus = pdev->bus; |
---|
1502 | | - hose = pci_bus_to_host(bus); |
---|
1503 | | - phb = hose->private_data; |
---|
1504 | | - pdn = pci_get_pdn(pdev); |
---|
1505 | | - num_vfs = pdn->num_vfs; |
---|
1506 | | - |
---|
1507 | | - /* Release VF PEs */ |
---|
1508 | | - pnv_ioda_release_vf_PE(pdev); |
---|
1509 | | - |
---|
1510 | | - if (phb->type == PNV_PHB_IODA2) { |
---|
1511 | | - if (!pdn->m64_single_mode) |
---|
1512 | | - pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); |
---|
1513 | | - |
---|
1514 | | - /* Release M64 windows */ |
---|
1515 | | - pnv_pci_vf_release_m64(pdev, num_vfs); |
---|
1516 | | - |
---|
1517 | | - /* Release PE numbers */ |
---|
1518 | | - if (pdn->m64_single_mode) { |
---|
1519 | | - for (i = 0; i < num_vfs; i++) { |
---|
1520 | | - if (pdn->pe_num_map[i] == IODA_INVALID_PE) |
---|
1521 | | - continue; |
---|
1522 | | - |
---|
1523 | | - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; |
---|
1524 | | - pnv_ioda_free_pe(pe); |
---|
1525 | | - } |
---|
1526 | | - } else |
---|
1527 | | - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); |
---|
1528 | | - /* Releasing pe_num_map */ |
---|
1529 | | - kfree(pdn->pe_num_map); |
---|
1530 | | - } |
---|
1531 | | -} |
---|
1532 | | - |
---|
1533 | | -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, |
---|
| 1209 | +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, |
---|
1534 | 1210 | struct pnv_ioda_pe *pe); |
---|
1535 | | -static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) |
---|
| 1211 | + |
---|
| 1212 | +static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) |
---|
1536 | 1213 | { |
---|
1537 | | - struct pci_bus *bus; |
---|
1538 | | - struct pci_controller *hose; |
---|
1539 | | - struct pnv_phb *phb; |
---|
1540 | | - struct pnv_ioda_pe *pe; |
---|
1541 | | - int pe_num; |
---|
1542 | | - u16 vf_index; |
---|
1543 | | - struct pci_dn *pdn; |
---|
1544 | | - |
---|
1545 | | - bus = pdev->bus; |
---|
1546 | | - hose = pci_bus_to_host(bus); |
---|
1547 | | - phb = hose->private_data; |
---|
1548 | | - pdn = pci_get_pdn(pdev); |
---|
1549 | | - |
---|
1550 | | - if (!pdev->is_physfn) |
---|
1551 | | - return; |
---|
1552 | | - |
---|
1553 | | - /* Reserve PE for each VF */ |
---|
1554 | | - for (vf_index = 0; vf_index < num_vfs; vf_index++) { |
---|
1555 | | - int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); |
---|
1556 | | - int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); |
---|
1557 | | - struct pci_dn *vf_pdn; |
---|
1558 | | - |
---|
1559 | | - if (pdn->m64_single_mode) |
---|
1560 | | - pe_num = pdn->pe_num_map[vf_index]; |
---|
1561 | | - else |
---|
1562 | | - pe_num = *pdn->pe_num_map + vf_index; |
---|
1563 | | - |
---|
1564 | | - pe = &phb->ioda.pe_array[pe_num]; |
---|
1565 | | - pe->pe_number = pe_num; |
---|
1566 | | - pe->phb = phb; |
---|
1567 | | - pe->flags = PNV_IODA_PE_VF; |
---|
1568 | | - pe->pbus = NULL; |
---|
1569 | | - pe->parent_dev = pdev; |
---|
1570 | | - pe->mve_number = -1; |
---|
1571 | | - pe->rid = (vf_bus << 8) | vf_devfn; |
---|
1572 | | - |
---|
1573 | | - pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", |
---|
1574 | | - hose->global_number, pdev->bus->number, |
---|
1575 | | - PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); |
---|
1576 | | - |
---|
1577 | | - if (pnv_ioda_configure_pe(phb, pe)) { |
---|
1578 | | - /* XXX What do we do here ? */ |
---|
1579 | | - pnv_ioda_free_pe(pe); |
---|
1580 | | - pe->pdev = NULL; |
---|
1581 | | - continue; |
---|
1582 | | - } |
---|
1583 | | - |
---|
1584 | | - /* Put PE to the list */ |
---|
1585 | | - mutex_lock(&phb->ioda.pe_list_mutex); |
---|
1586 | | - list_add_tail(&pe->list, &phb->ioda.pe_list); |
---|
1587 | | - mutex_unlock(&phb->ioda.pe_list_mutex); |
---|
1588 | | - |
---|
1589 | | - /* associate this pe to it's pdn */ |
---|
1590 | | - list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { |
---|
1591 | | - if (vf_pdn->busno == vf_bus && |
---|
1592 | | - vf_pdn->devfn == vf_devfn) { |
---|
1593 | | - vf_pdn->pe_number = pe_num; |
---|
1594 | | - break; |
---|
1595 | | - } |
---|
1596 | | - } |
---|
1597 | | - |
---|
1598 | | - pnv_pci_ioda2_setup_dma_pe(phb, pe); |
---|
1599 | | - } |
---|
1600 | | -} |
---|
1601 | | - |
---|
1602 | | -int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) |
---|
1603 | | -{ |
---|
1604 | | - struct pci_bus *bus; |
---|
1605 | | - struct pci_controller *hose; |
---|
1606 | | - struct pnv_phb *phb; |
---|
1607 | | - struct pnv_ioda_pe *pe; |
---|
1608 | | - struct pci_dn *pdn; |
---|
1609 | | - int ret; |
---|
1610 | | - u16 i; |
---|
1611 | | - |
---|
1612 | | - bus = pdev->bus; |
---|
1613 | | - hose = pci_bus_to_host(bus); |
---|
1614 | | - phb = hose->private_data; |
---|
1615 | | - pdn = pci_get_pdn(pdev); |
---|
1616 | | - |
---|
1617 | | - if (phb->type == PNV_PHB_IODA2) { |
---|
1618 | | - if (!pdn->vfs_expanded) { |
---|
1619 | | - dev_info(&pdev->dev, "don't support this SRIOV device" |
---|
1620 | | - " with non 64bit-prefetchable IOV BAR\n"); |
---|
1621 | | - return -ENOSPC; |
---|
1622 | | - } |
---|
1623 | | - |
---|
1624 | | - /* |
---|
1625 | | - * When M64 BARs functions in Single PE mode, the number of VFs |
---|
1626 | | - * could be enabled must be less than the number of M64 BARs. |
---|
1627 | | - */ |
---|
1628 | | - if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { |
---|
1629 | | - dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); |
---|
1630 | | - return -EBUSY; |
---|
1631 | | - } |
---|
1632 | | - |
---|
1633 | | - /* Allocating pe_num_map */ |
---|
1634 | | - if (pdn->m64_single_mode) |
---|
1635 | | - pdn->pe_num_map = kmalloc_array(num_vfs, |
---|
1636 | | - sizeof(*pdn->pe_num_map), |
---|
1637 | | - GFP_KERNEL); |
---|
1638 | | - else |
---|
1639 | | - pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); |
---|
1640 | | - |
---|
1641 | | - if (!pdn->pe_num_map) |
---|
1642 | | - return -ENOMEM; |
---|
1643 | | - |
---|
1644 | | - if (pdn->m64_single_mode) |
---|
1645 | | - for (i = 0; i < num_vfs; i++) |
---|
1646 | | - pdn->pe_num_map[i] = IODA_INVALID_PE; |
---|
1647 | | - |
---|
1648 | | - /* Calculate available PE for required VFs */ |
---|
1649 | | - if (pdn->m64_single_mode) { |
---|
1650 | | - for (i = 0; i < num_vfs; i++) { |
---|
1651 | | - pe = pnv_ioda_alloc_pe(phb); |
---|
1652 | | - if (!pe) { |
---|
1653 | | - ret = -EBUSY; |
---|
1654 | | - goto m64_failed; |
---|
1655 | | - } |
---|
1656 | | - |
---|
1657 | | - pdn->pe_num_map[i] = pe->pe_number; |
---|
1658 | | - } |
---|
1659 | | - } else { |
---|
1660 | | - mutex_lock(&phb->ioda.pe_alloc_mutex); |
---|
1661 | | - *pdn->pe_num_map = bitmap_find_next_zero_area( |
---|
1662 | | - phb->ioda.pe_alloc, phb->ioda.total_pe_num, |
---|
1663 | | - 0, num_vfs, 0); |
---|
1664 | | - if (*pdn->pe_num_map >= phb->ioda.total_pe_num) { |
---|
1665 | | - mutex_unlock(&phb->ioda.pe_alloc_mutex); |
---|
1666 | | - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); |
---|
1667 | | - kfree(pdn->pe_num_map); |
---|
1668 | | - return -EBUSY; |
---|
1669 | | - } |
---|
1670 | | - bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); |
---|
1671 | | - mutex_unlock(&phb->ioda.pe_alloc_mutex); |
---|
1672 | | - } |
---|
1673 | | - pdn->num_vfs = num_vfs; |
---|
1674 | | - |
---|
1675 | | - /* Assign M64 window accordingly */ |
---|
1676 | | - ret = pnv_pci_vf_assign_m64(pdev, num_vfs); |
---|
1677 | | - if (ret) { |
---|
1678 | | - dev_info(&pdev->dev, "Not enough M64 window resources\n"); |
---|
1679 | | - goto m64_failed; |
---|
1680 | | - } |
---|
1681 | | - |
---|
1682 | | - /* |
---|
1683 | | - * When using one M64 BAR to map one IOV BAR, we need to shift |
---|
1684 | | - * the IOV BAR according to the PE# allocated to the VFs. |
---|
1685 | | - * Otherwise, the PE# for the VF will conflict with others. |
---|
1686 | | - */ |
---|
1687 | | - if (!pdn->m64_single_mode) { |
---|
1688 | | - ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); |
---|
1689 | | - if (ret) |
---|
1690 | | - goto m64_failed; |
---|
1691 | | - } |
---|
1692 | | - } |
---|
1693 | | - |
---|
1694 | | - /* Setup VF PEs */ |
---|
1695 | | - pnv_ioda_setup_vf_PE(pdev, num_vfs); |
---|
1696 | | - |
---|
1697 | | - return 0; |
---|
1698 | | - |
---|
1699 | | -m64_failed: |
---|
1700 | | - if (pdn->m64_single_mode) { |
---|
1701 | | - for (i = 0; i < num_vfs; i++) { |
---|
1702 | | - if (pdn->pe_num_map[i] == IODA_INVALID_PE) |
---|
1703 | | - continue; |
---|
1704 | | - |
---|
1705 | | - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; |
---|
1706 | | - pnv_ioda_free_pe(pe); |
---|
1707 | | - } |
---|
1708 | | - } else |
---|
1709 | | - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); |
---|
1710 | | - |
---|
1711 | | - /* Releasing pe_num_map */ |
---|
1712 | | - kfree(pdn->pe_num_map); |
---|
1713 | | - |
---|
1714 | | - return ret; |
---|
1715 | | -} |
---|
1716 | | - |
---|
1717 | | -int pnv_pcibios_sriov_disable(struct pci_dev *pdev) |
---|
1718 | | -{ |
---|
1719 | | - pnv_pci_sriov_disable(pdev); |
---|
1720 | | - |
---|
1721 | | - /* Release PCI data */ |
---|
1722 | | - remove_dev_pci_data(pdev); |
---|
1723 | | - return 0; |
---|
1724 | | -} |
---|
1725 | | - |
---|
1726 | | -int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) |
---|
1727 | | -{ |
---|
1728 | | - /* Allocate PCI data */ |
---|
1729 | | - add_dev_pci_data(pdev); |
---|
1730 | | - |
---|
1731 | | - return pnv_pci_sriov_enable(pdev, num_vfs); |
---|
1732 | | -} |
---|
1733 | | -#endif /* CONFIG_PCI_IOV */ |
---|
1734 | | - |
---|
1735 | | -static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) |
---|
1736 | | -{ |
---|
| 1214 | + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); |
---|
1737 | 1215 | struct pci_dn *pdn = pci_get_pdn(pdev); |
---|
1738 | 1216 | struct pnv_ioda_pe *pe; |
---|
1739 | 1217 | |
---|
1740 | | - /* |
---|
1741 | | - * The function can be called while the PE# |
---|
1742 | | - * hasn't been assigned. Do nothing for the |
---|
1743 | | - * case. |
---|
1744 | | - */ |
---|
1745 | | - if (!pdn || pdn->pe_number == IODA_INVALID_PE) |
---|
1746 | | - return; |
---|
| 1218 | + /* Check if the BDFN for this device is associated with a PE yet */ |
---|
| 1219 | + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); |
---|
| 1220 | + if (!pe) { |
---|
| 1221 | + /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */ |
---|
| 1222 | + if (WARN_ON(pdev->is_virtfn)) |
---|
| 1223 | + return; |
---|
1747 | 1224 | |
---|
1748 | | - pe = &phb->ioda.pe_array[pdn->pe_number]; |
---|
1749 | | - WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); |
---|
1750 | | - set_dma_offset(&pdev->dev, pe->tce_bypass_base); |
---|
1751 | | - set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); |
---|
1752 | | - /* |
---|
1753 | | - * Note: iommu_add_device() will fail here as |
---|
1754 | | - * for physical PE: the device is already added by now; |
---|
1755 | | - * for virtual PE: sysfs entries are not ready yet and |
---|
1756 | | - * tce_iommu_bus_notifier will add the device to a group later. |
---|
1757 | | - */ |
---|
1758 | | -} |
---|
| 1225 | + pnv_pci_configure_bus(pdev->bus); |
---|
| 1226 | + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); |
---|
| 1227 | + pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff); |
---|
1759 | 1228 | |
---|
1760 | | -static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe) |
---|
1761 | | -{ |
---|
1762 | | - unsigned short vendor = 0; |
---|
1763 | | - struct pci_dev *pdev; |
---|
1764 | 1229 | |
---|
1765 | | - if (pe->device_count == 1) |
---|
1766 | | - return true; |
---|
1767 | | - |
---|
1768 | | - /* pe->pdev should be set if it's a single device, pe->pbus if not */ |
---|
1769 | | - if (!pe->pbus) |
---|
1770 | | - return true; |
---|
1771 | | - |
---|
1772 | | - list_for_each_entry(pdev, &pe->pbus->devices, bus_list) { |
---|
1773 | | - if (!vendor) { |
---|
1774 | | - vendor = pdev->vendor; |
---|
1775 | | - continue; |
---|
1776 | | - } |
---|
1777 | | - |
---|
1778 | | - if (pdev->vendor != vendor) |
---|
1779 | | - return false; |
---|
| 1230 | + /* |
---|
| 1231 | + * If we can't setup the IODA PE something has gone horribly |
---|
| 1232 | + * wrong and we can't enable DMA for the device. |
---|
| 1233 | + */ |
---|
| 1234 | + if (WARN_ON(!pe)) |
---|
| 1235 | + return; |
---|
| 1236 | + } else { |
---|
| 1237 | + pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number); |
---|
1780 | 1238 | } |
---|
1781 | 1239 | |
---|
1782 | | - return true; |
---|
| 1240 | + /* |
---|
| 1241 | + * We assume that bridges *probably* don't need to do any DMA so we can |
---|
| 1242 | + * skip allocating a TCE table, etc unless we get a non-bridge device. |
---|
| 1243 | + */ |
---|
| 1244 | + if (!pe->dma_setup_done && !pci_is_bridge(pdev)) { |
---|
| 1245 | + switch (phb->type) { |
---|
| 1246 | + case PNV_PHB_IODA1: |
---|
| 1247 | + pnv_pci_ioda1_setup_dma_pe(phb, pe); |
---|
| 1248 | + break; |
---|
| 1249 | + case PNV_PHB_IODA2: |
---|
| 1250 | + pnv_pci_ioda2_setup_dma_pe(phb, pe); |
---|
| 1251 | + break; |
---|
| 1252 | + default: |
---|
| 1253 | + pr_warn("%s: No DMA for PHB#%x (type %d)\n", |
---|
| 1254 | + __func__, phb->hose->global_number, phb->type); |
---|
| 1255 | + } |
---|
| 1256 | + } |
---|
| 1257 | + |
---|
| 1258 | + if (pdn) |
---|
| 1259 | + pdn->pe_number = pe->pe_number; |
---|
| 1260 | + pe->device_count++; |
---|
| 1261 | + |
---|
| 1262 | + WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); |
---|
| 1263 | + pdev->dev.archdata.dma_offset = pe->tce_bypass_base; |
---|
| 1264 | + set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); |
---|
| 1265 | + |
---|
| 1266 | + /* PEs with a DMA weight of zero won't have a group */ |
---|
| 1267 | + if (pe->table_group.group) |
---|
| 1268 | + iommu_add_device(&pe->table_group, &pdev->dev); |
---|
1783 | 1269 | } |
---|
1784 | 1270 | |
---|
1785 | 1271 | /* |
---|
.. | .. |
---|
1851 | 1337 | return -EIO; |
---|
1852 | 1338 | } |
---|
1853 | 1339 | |
---|
1854 | | -static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) |
---|
| 1340 | +static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, |
---|
| 1341 | + u64 dma_mask) |
---|
1855 | 1342 | { |
---|
1856 | | - struct pci_controller *hose = pci_bus_to_host(pdev->bus); |
---|
1857 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 1343 | + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); |
---|
1858 | 1344 | struct pci_dn *pdn = pci_get_pdn(pdev); |
---|
1859 | 1345 | struct pnv_ioda_pe *pe; |
---|
1860 | | - uint64_t top; |
---|
1861 | | - bool bypass = false; |
---|
1862 | | - s64 rc; |
---|
1863 | 1346 | |
---|
1864 | 1347 | if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) |
---|
1865 | | - return -ENODEV; |
---|
| 1348 | + return false; |
---|
1866 | 1349 | |
---|
1867 | 1350 | pe = &phb->ioda.pe_array[pdn->pe_number]; |
---|
1868 | 1351 | if (pe->tce_bypass_enabled) { |
---|
1869 | | - top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; |
---|
1870 | | - bypass = (dma_mask >= top); |
---|
| 1352 | + u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; |
---|
| 1353 | + if (dma_mask >= top) |
---|
| 1354 | + return true; |
---|
1871 | 1355 | } |
---|
1872 | 1356 | |
---|
1873 | | - if (bypass) { |
---|
1874 | | - dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); |
---|
1875 | | - set_dma_ops(&pdev->dev, &dma_nommu_ops); |
---|
1876 | | - } else { |
---|
1877 | | - /* |
---|
1878 | | - * If the device can't set the TCE bypass bit but still wants |
---|
1879 | | - * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to |
---|
1880 | | - * bypass the 32-bit region and be usable for 64-bit DMAs. |
---|
1881 | | - * The device needs to be able to address all of this space. |
---|
1882 | | - */ |
---|
1883 | | - if (dma_mask >> 32 && |
---|
1884 | | - dma_mask > (memory_hotplug_max() + (1ULL << 32)) && |
---|
1885 | | - pnv_pci_ioda_pe_single_vendor(pe) && |
---|
1886 | | - phb->model == PNV_PHB_MODEL_PHB3) { |
---|
1887 | | - /* Configure the bypass mode */ |
---|
1888 | | - rc = pnv_pci_ioda_dma_64bit_bypass(pe); |
---|
1889 | | - if (rc) |
---|
1890 | | - return rc; |
---|
1891 | | - /* 4GB offset bypasses 32-bit space */ |
---|
1892 | | - set_dma_offset(&pdev->dev, (1ULL << 32)); |
---|
1893 | | - set_dma_ops(&pdev->dev, &dma_nommu_ops); |
---|
1894 | | - } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) { |
---|
1895 | | - /* |
---|
1896 | | - * Fail the request if a DMA mask between 32 and 64 bits |
---|
1897 | | - * was requested but couldn't be fulfilled. Ideally we |
---|
1898 | | - * would do this for 64-bits but historically we have |
---|
1899 | | - * always fallen back to 32-bits. |
---|
1900 | | - */ |
---|
1901 | | - return -ENOMEM; |
---|
1902 | | - } else { |
---|
1903 | | - dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); |
---|
1904 | | - set_dma_ops(&pdev->dev, &dma_iommu_ops); |
---|
1905 | | - } |
---|
| 1357 | + /* |
---|
| 1358 | + * If the device can't set the TCE bypass bit but still wants |
---|
| 1359 | + * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to |
---|
| 1360 | + * bypass the 32-bit region and be usable for 64-bit DMAs. |
---|
| 1361 | + * The device needs to be able to address all of this space. |
---|
| 1362 | + */ |
---|
| 1363 | + if (dma_mask >> 32 && |
---|
| 1364 | + dma_mask > (memory_hotplug_max() + (1ULL << 32)) && |
---|
| 1365 | + /* pe->pdev should be set if it's a single device, pe->pbus if not */ |
---|
| 1366 | + (pe->device_count == 1 || !pe->pbus) && |
---|
| 1367 | + phb->model == PNV_PHB_MODEL_PHB3) { |
---|
| 1368 | + /* Configure the bypass mode */ |
---|
| 1369 | + s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe); |
---|
| 1370 | + if (rc) |
---|
| 1371 | + return false; |
---|
| 1372 | + /* 4GB offset bypasses 32-bit space */ |
---|
| 1373 | + pdev->dev.archdata.dma_offset = (1ULL << 32); |
---|
| 1374 | + return true; |
---|
1906 | 1375 | } |
---|
1907 | | - *pdev->dev.dma_mask = dma_mask; |
---|
1908 | 1376 | |
---|
1909 | | - /* Update peer npu devices */ |
---|
1910 | | - pnv_npu_try_dma_set_bypass(pdev, bypass); |
---|
1911 | | - |
---|
1912 | | - return 0; |
---|
1913 | | -} |
---|
1914 | | - |
---|
1915 | | -static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev) |
---|
1916 | | -{ |
---|
1917 | | - struct pci_controller *hose = pci_bus_to_host(pdev->bus); |
---|
1918 | | - struct pnv_phb *phb = hose->private_data; |
---|
1919 | | - struct pci_dn *pdn = pci_get_pdn(pdev); |
---|
1920 | | - struct pnv_ioda_pe *pe; |
---|
1921 | | - u64 end, mask; |
---|
1922 | | - |
---|
1923 | | - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) |
---|
1924 | | - return 0; |
---|
1925 | | - |
---|
1926 | | - pe = &phb->ioda.pe_array[pdn->pe_number]; |
---|
1927 | | - if (!pe->tce_bypass_enabled) |
---|
1928 | | - return __dma_get_required_mask(&pdev->dev); |
---|
1929 | | - |
---|
1930 | | - |
---|
1931 | | - end = pe->tce_bypass_base + memblock_end_of_DRAM(); |
---|
1932 | | - mask = 1ULL << (fls64(end) - 1); |
---|
1933 | | - mask += mask - 1; |
---|
1934 | | - |
---|
1935 | | - return mask; |
---|
1936 | | -} |
---|
1937 | | - |
---|
1938 | | -static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, |
---|
1939 | | - struct pci_bus *bus, |
---|
1940 | | - bool add_to_group) |
---|
1941 | | -{ |
---|
1942 | | - struct pci_dev *dev; |
---|
1943 | | - |
---|
1944 | | - list_for_each_entry(dev, &bus->devices, bus_list) { |
---|
1945 | | - set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); |
---|
1946 | | - set_dma_offset(&dev->dev, pe->tce_bypass_base); |
---|
1947 | | - if (add_to_group) |
---|
1948 | | - iommu_add_device(&dev->dev); |
---|
1949 | | - |
---|
1950 | | - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) |
---|
1951 | | - pnv_ioda_setup_bus_dma(pe, dev->subordinate, |
---|
1952 | | - add_to_group); |
---|
1953 | | - } |
---|
| 1377 | + return false; |
---|
1954 | 1378 | } |
---|
1955 | 1379 | |
---|
1956 | 1380 | static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb, |
---|
.. | .. |
---|
2012 | 1436 | } |
---|
2013 | 1437 | |
---|
2014 | 1438 | #ifdef CONFIG_IOMMU_API |
---|
2015 | | -static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, |
---|
2016 | | - unsigned long *hpa, enum dma_data_direction *direction) |
---|
| 1439 | +/* Common for IODA1 and IODA2 */ |
---|
| 1440 | +static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index, |
---|
| 1441 | + unsigned long *hpa, enum dma_data_direction *direction, |
---|
| 1442 | + bool realmode) |
---|
2017 | 1443 | { |
---|
2018 | | - long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); |
---|
2019 | | - |
---|
2020 | | - if (!ret) |
---|
2021 | | - pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false); |
---|
2022 | | - |
---|
2023 | | - return ret; |
---|
2024 | | -} |
---|
2025 | | - |
---|
2026 | | -static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index, |
---|
2027 | | - unsigned long *hpa, enum dma_data_direction *direction) |
---|
2028 | | -{ |
---|
2029 | | - long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); |
---|
2030 | | - |
---|
2031 | | - if (!ret) |
---|
2032 | | - pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true); |
---|
2033 | | - |
---|
2034 | | - return ret; |
---|
| 1444 | + return pnv_tce_xchg(tbl, index, hpa, direction, !realmode); |
---|
2035 | 1445 | } |
---|
2036 | 1446 | #endif |
---|
2037 | 1447 | |
---|
.. | .. |
---|
2046 | 1456 | static struct iommu_table_ops pnv_ioda1_iommu_ops = { |
---|
2047 | 1457 | .set = pnv_ioda1_tce_build, |
---|
2048 | 1458 | #ifdef CONFIG_IOMMU_API |
---|
2049 | | - .exchange = pnv_ioda1_tce_xchg, |
---|
2050 | | - .exchange_rm = pnv_ioda1_tce_xchg_rm, |
---|
| 1459 | + .xchg_no_kill = pnv_ioda_tce_xchg_no_kill, |
---|
| 1460 | + .tce_kill = pnv_pci_p7ioc_tce_invalidate, |
---|
2051 | 1461 | .useraddrptr = pnv_tce_useraddrptr, |
---|
2052 | 1462 | #endif |
---|
2053 | 1463 | .clear = pnv_ioda1_tce_free, |
---|
.. | .. |
---|
2176 | 1586 | return ret; |
---|
2177 | 1587 | } |
---|
2178 | 1588 | |
---|
2179 | | -#ifdef CONFIG_IOMMU_API |
---|
2180 | | -static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, |
---|
2181 | | - unsigned long *hpa, enum dma_data_direction *direction) |
---|
2182 | | -{ |
---|
2183 | | - long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); |
---|
2184 | | - |
---|
2185 | | - if (!ret) |
---|
2186 | | - pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); |
---|
2187 | | - |
---|
2188 | | - return ret; |
---|
2189 | | -} |
---|
2190 | | - |
---|
2191 | | -static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index, |
---|
2192 | | - unsigned long *hpa, enum dma_data_direction *direction) |
---|
2193 | | -{ |
---|
2194 | | - long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); |
---|
2195 | | - |
---|
2196 | | - if (!ret) |
---|
2197 | | - pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true); |
---|
2198 | | - |
---|
2199 | | - return ret; |
---|
2200 | | -} |
---|
2201 | | -#endif |
---|
2202 | | - |
---|
2203 | 1589 | static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, |
---|
2204 | 1590 | long npages) |
---|
2205 | 1591 | { |
---|
.. | .. |
---|
2211 | 1597 | static struct iommu_table_ops pnv_ioda2_iommu_ops = { |
---|
2212 | 1598 | .set = pnv_ioda2_tce_build, |
---|
2213 | 1599 | #ifdef CONFIG_IOMMU_API |
---|
2214 | | - .exchange = pnv_ioda2_tce_xchg, |
---|
2215 | | - .exchange_rm = pnv_ioda2_tce_xchg_rm, |
---|
| 1600 | + .xchg_no_kill = pnv_ioda_tce_xchg_no_kill, |
---|
| 1601 | + .tce_kill = pnv_pci_ioda2_tce_invalidate, |
---|
2216 | 1602 | .useraddrptr = pnv_tce_useraddrptr, |
---|
2217 | 1603 | #endif |
---|
2218 | 1604 | .clear = pnv_ioda2_tce_free, |
---|
.. | .. |
---|
2358 | 1744 | __pa(addr) + tce32_segsz * i, |
---|
2359 | 1745 | tce32_segsz, IOMMU_PAGE_SIZE_4K); |
---|
2360 | 1746 | if (rc) { |
---|
2361 | | - pe_err(pe, " Failed to configure 32-bit TCE table," |
---|
2362 | | - " err %ld\n", rc); |
---|
| 1747 | + pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n", |
---|
| 1748 | + rc); |
---|
2363 | 1749 | goto fail; |
---|
2364 | 1750 | } |
---|
2365 | 1751 | } |
---|
.. | .. |
---|
2376 | 1762 | tbl->it_ops = &pnv_ioda1_iommu_ops; |
---|
2377 | 1763 | pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; |
---|
2378 | 1764 | pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; |
---|
2379 | | - iommu_init_table(tbl, phb->hose->node); |
---|
| 1765 | + iommu_init_table(tbl, phb->hose->node, 0, 0); |
---|
2380 | 1766 | |
---|
2381 | | - if (pe->flags & PNV_IODA_PE_DEV) { |
---|
2382 | | - /* |
---|
2383 | | - * Setting table base here only for carrying iommu_group |
---|
2384 | | - * further down to let iommu_add_device() do the job. |
---|
2385 | | - * pnv_pci_ioda_dma_dev_setup will override it later anyway. |
---|
2386 | | - */ |
---|
2387 | | - set_iommu_table_base(&pe->pdev->dev, tbl); |
---|
2388 | | - iommu_add_device(&pe->pdev->dev); |
---|
2389 | | - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) |
---|
2390 | | - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); |
---|
2391 | | - |
---|
| 1767 | + pe->dma_setup_done = true; |
---|
2392 | 1768 | return; |
---|
2393 | 1769 | fail: |
---|
2394 | 1770 | /* XXX Failure: Try to fallback to 64-bit only ? */ |
---|
.. | .. |
---|
2412 | 1788 | const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; |
---|
2413 | 1789 | const __u64 win_size = tbl->it_size << tbl->it_page_shift; |
---|
2414 | 1790 | |
---|
2415 | | - pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num, |
---|
2416 | | - start_addr, start_addr + win_size - 1, |
---|
2417 | | - IOMMU_PAGE_SIZE(tbl)); |
---|
| 1791 | + pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n", |
---|
| 1792 | + num, start_addr, start_addr + win_size - 1, |
---|
| 1793 | + IOMMU_PAGE_SIZE(tbl)); |
---|
2418 | 1794 | |
---|
2419 | 1795 | /* |
---|
2420 | 1796 | * Map TCE table through TVT. The TVE index is the PE number |
---|
.. | .. |
---|
2428 | 1804 | size << 3, |
---|
2429 | 1805 | IOMMU_PAGE_SIZE(tbl)); |
---|
2430 | 1806 | if (rc) { |
---|
2431 | | - pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); |
---|
| 1807 | + pe_err(pe, "Failed to configure TCE table, err %lld\n", rc); |
---|
2432 | 1808 | return rc; |
---|
2433 | 1809 | } |
---|
2434 | 1810 | |
---|
.. | .. |
---|
2439 | 1815 | return 0; |
---|
2440 | 1816 | } |
---|
2441 | 1817 | |
---|
2442 | | -void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) |
---|
| 1818 | +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) |
---|
2443 | 1819 | { |
---|
2444 | 1820 | uint16_t window_id = (pe->pe_number << 1 ) + 1; |
---|
2445 | 1821 | int64_t rc; |
---|
.. | .. |
---|
2501 | 1877 | { |
---|
2502 | 1878 | struct iommu_table *tbl = NULL; |
---|
2503 | 1879 | long rc; |
---|
| 1880 | + unsigned long res_start, res_end; |
---|
2504 | 1881 | |
---|
2505 | 1882 | /* |
---|
2506 | 1883 | * crashkernel= specifies the kdump kernel's maximum memory at |
---|
.. | .. |
---|
2514 | 1891 | * DMA window can be larger than available memory, which will |
---|
2515 | 1892 | * cause errors later. |
---|
2516 | 1893 | */ |
---|
2517 | | - const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory); |
---|
| 1894 | + const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER - 1); |
---|
2518 | 1895 | |
---|
2519 | | - rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, |
---|
2520 | | - IOMMU_PAGE_SHIFT_4K, |
---|
2521 | | - window_size, |
---|
2522 | | - POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl); |
---|
| 1896 | + /* |
---|
| 1897 | + * We create the default window as big as we can. The constraint is |
---|
| 1898 | + * the max order of allocation possible. The TCE table is likely to |
---|
| 1899 | + * end up being multilevel and with on-demand allocation in place, |
---|
| 1900 | + * the initial use is not going to be huge as the default window aims |
---|
| 1901 | + * to support crippled devices (i.e. not fully 64bit DMAble) only. |
---|
| 1902 | + */ |
---|
| 1903 | + /* iommu_table::it_map uses 1 bit per IOMMU page, hence 8 */ |
---|
| 1904 | + const u64 window_size = min((maxblock * 8) << PAGE_SHIFT, max_memory); |
---|
| 1905 | + /* Each TCE level cannot exceed maxblock so go multilevel if needed */ |
---|
| 1906 | + unsigned long tces_order = ilog2(window_size >> PAGE_SHIFT); |
---|
| 1907 | + unsigned long tcelevel_order = ilog2(maxblock >> 3); |
---|
| 1908 | + unsigned int levels = tces_order / tcelevel_order; |
---|
| 1909 | + |
---|
| 1910 | + if (tces_order % tcelevel_order) |
---|
| 1911 | + levels += 1; |
---|
| 1912 | + /* |
---|
| 1913 | + * We try to stick to default levels (which is >1 at the moment) in |
---|
| 1914 | + * order to save memory by relying on on-demain TCE level allocation. |
---|
| 1915 | + */ |
---|
| 1916 | + levels = max_t(unsigned int, levels, POWERNV_IOMMU_DEFAULT_LEVELS); |
---|
| 1917 | + |
---|
| 1918 | + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, PAGE_SHIFT, |
---|
| 1919 | + window_size, levels, false, &tbl); |
---|
2523 | 1920 | if (rc) { |
---|
2524 | 1921 | pe_err(pe, "Failed to create 32-bit TCE table, err %ld", |
---|
2525 | 1922 | rc); |
---|
2526 | 1923 | return rc; |
---|
2527 | 1924 | } |
---|
2528 | 1925 | |
---|
2529 | | - iommu_init_table(tbl, pe->phb->hose->node); |
---|
| 1926 | + /* We use top part of 32bit space for MMIO so exclude it from DMA */ |
---|
| 1927 | + res_start = 0; |
---|
| 1928 | + res_end = 0; |
---|
| 1929 | + if (window_size > pe->phb->ioda.m32_pci_base) { |
---|
| 1930 | + res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift; |
---|
| 1931 | + res_end = min(window_size, SZ_4G) >> tbl->it_page_shift; |
---|
| 1932 | + } |
---|
| 1933 | + iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end); |
---|
2530 | 1934 | |
---|
2531 | 1935 | rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); |
---|
2532 | 1936 | if (rc) { |
---|
.. | .. |
---|
2540 | 1944 | pnv_pci_ioda2_set_bypass(pe, true); |
---|
2541 | 1945 | |
---|
2542 | 1946 | /* |
---|
2543 | | - * Setting table base here only for carrying iommu_group |
---|
2544 | | - * further down to let iommu_add_device() do the job. |
---|
2545 | | - * pnv_pci_ioda_dma_dev_setup will override it later anyway. |
---|
| 1947 | + * Set table base for the case of IOMMU DMA use. Usually this is done |
---|
| 1948 | + * from dma_dev_setup() which is not called when a device is returned |
---|
| 1949 | + * from VFIO so do it here. |
---|
2546 | 1950 | */ |
---|
2547 | | - if (pe->flags & PNV_IODA_PE_DEV) |
---|
| 1951 | + if (pe->pdev) |
---|
2548 | 1952 | set_iommu_table_base(&pe->pdev->dev, tbl); |
---|
2549 | 1953 | |
---|
2550 | 1954 | return 0; |
---|
2551 | 1955 | } |
---|
2552 | 1956 | |
---|
2553 | | -#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV) |
---|
2554 | 1957 | static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, |
---|
2555 | 1958 | int num) |
---|
2556 | 1959 | { |
---|
.. | .. |
---|
2574 | 1977 | |
---|
2575 | 1978 | return ret; |
---|
2576 | 1979 | } |
---|
2577 | | -#endif |
---|
2578 | 1980 | |
---|
2579 | 1981 | #ifdef CONFIG_IOMMU_API |
---|
2580 | | -static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, |
---|
| 1982 | +unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, |
---|
2581 | 1983 | __u64 window_size, __u32 levels) |
---|
2582 | 1984 | { |
---|
2583 | 1985 | unsigned long bytes = 0; |
---|
.. | .. |
---|
2598 | 2000 | direct_table_size = 1UL << table_shift; |
---|
2599 | 2001 | |
---|
2600 | 2002 | for ( ; levels; --levels) { |
---|
2601 | | - bytes += _ALIGN_UP(tce_table_size, direct_table_size); |
---|
| 2003 | + bytes += ALIGN(tce_table_size, direct_table_size); |
---|
2602 | 2004 | |
---|
2603 | 2005 | tce_table_size /= direct_table_size; |
---|
2604 | 2006 | tce_table_size <<= 3; |
---|
.. | .. |
---|
2623 | 2025 | return ret; |
---|
2624 | 2026 | } |
---|
2625 | 2027 | |
---|
| 2028 | +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) |
---|
| 2029 | +{ |
---|
| 2030 | + struct pci_dev *dev; |
---|
| 2031 | + |
---|
| 2032 | + list_for_each_entry(dev, &bus->devices, bus_list) { |
---|
| 2033 | + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); |
---|
| 2034 | + dev->dev.archdata.dma_offset = pe->tce_bypass_base; |
---|
| 2035 | + |
---|
| 2036 | + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) |
---|
| 2037 | + pnv_ioda_setup_bus_dma(pe, dev->subordinate); |
---|
| 2038 | + } |
---|
| 2039 | +} |
---|
| 2040 | + |
---|
2626 | 2041 | static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) |
---|
2627 | 2042 | { |
---|
2628 | 2043 | struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, |
---|
.. | .. |
---|
2633 | 2048 | pnv_pci_ioda2_set_bypass(pe, false); |
---|
2634 | 2049 | pnv_pci_ioda2_unset_window(&pe->table_group, 0); |
---|
2635 | 2050 | if (pe->pbus) |
---|
2636 | | - pnv_ioda_setup_bus_dma(pe, pe->pbus, false); |
---|
| 2051 | + pnv_ioda_setup_bus_dma(pe, pe->pbus); |
---|
| 2052 | + else if (pe->pdev) |
---|
| 2053 | + set_iommu_table_base(&pe->pdev->dev, NULL); |
---|
2637 | 2054 | iommu_tce_table_put(tbl); |
---|
2638 | 2055 | } |
---|
2639 | 2056 | |
---|
.. | .. |
---|
2644 | 2061 | |
---|
2645 | 2062 | pnv_pci_ioda2_setup_default_config(pe); |
---|
2646 | 2063 | if (pe->pbus) |
---|
2647 | | - pnv_ioda_setup_bus_dma(pe, pe->pbus, false); |
---|
| 2064 | + pnv_ioda_setup_bus_dma(pe, pe->pbus); |
---|
2648 | 2065 | } |
---|
2649 | 2066 | |
---|
2650 | 2067 | static struct iommu_table_group_ops pnv_pci_ioda2_ops = { |
---|
.. | .. |
---|
2655 | 2072 | .take_ownership = pnv_ioda2_take_ownership, |
---|
2656 | 2073 | .release_ownership = pnv_ioda2_release_ownership, |
---|
2657 | 2074 | }; |
---|
2658 | | - |
---|
2659 | | -static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) |
---|
2660 | | -{ |
---|
2661 | | - struct pci_controller *hose; |
---|
2662 | | - struct pnv_phb *phb; |
---|
2663 | | - struct pnv_ioda_pe **ptmppe = opaque; |
---|
2664 | | - struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); |
---|
2665 | | - struct pci_dn *pdn = pci_get_pdn(pdev); |
---|
2666 | | - |
---|
2667 | | - if (!pdn || pdn->pe_number == IODA_INVALID_PE) |
---|
2668 | | - return 0; |
---|
2669 | | - |
---|
2670 | | - hose = pci_bus_to_host(pdev->bus); |
---|
2671 | | - phb = hose->private_data; |
---|
2672 | | - if (phb->type != PNV_PHB_NPU_NVLINK) |
---|
2673 | | - return 0; |
---|
2674 | | - |
---|
2675 | | - *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; |
---|
2676 | | - |
---|
2677 | | - return 1; |
---|
2678 | | -} |
---|
2679 | | - |
---|
2680 | | -/* |
---|
2681 | | - * This returns PE of associated NPU. |
---|
2682 | | - * This assumes that NPU is in the same IOMMU group with GPU and there is |
---|
2683 | | - * no other PEs. |
---|
2684 | | - */ |
---|
2685 | | -static struct pnv_ioda_pe *gpe_table_group_to_npe( |
---|
2686 | | - struct iommu_table_group *table_group) |
---|
2687 | | -{ |
---|
2688 | | - struct pnv_ioda_pe *npe = NULL; |
---|
2689 | | - int ret = iommu_group_for_each_dev(table_group->group, &npe, |
---|
2690 | | - gpe_table_group_to_npe_cb); |
---|
2691 | | - |
---|
2692 | | - BUG_ON(!ret || !npe); |
---|
2693 | | - |
---|
2694 | | - return npe; |
---|
2695 | | -} |
---|
2696 | | - |
---|
2697 | | -static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, |
---|
2698 | | - int num, struct iommu_table *tbl) |
---|
2699 | | -{ |
---|
2700 | | - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); |
---|
2701 | | - int num2 = (num == 0) ? 1 : 0; |
---|
2702 | | - long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); |
---|
2703 | | - |
---|
2704 | | - if (ret) |
---|
2705 | | - return ret; |
---|
2706 | | - |
---|
2707 | | - if (table_group->tables[num2]) |
---|
2708 | | - pnv_npu_unset_window(npe, num2); |
---|
2709 | | - |
---|
2710 | | - ret = pnv_npu_set_window(npe, num, tbl); |
---|
2711 | | - if (ret) { |
---|
2712 | | - pnv_pci_ioda2_unset_window(table_group, num); |
---|
2713 | | - if (table_group->tables[num2]) |
---|
2714 | | - pnv_npu_set_window(npe, num2, |
---|
2715 | | - table_group->tables[num2]); |
---|
2716 | | - } |
---|
2717 | | - |
---|
2718 | | - return ret; |
---|
2719 | | -} |
---|
2720 | | - |
---|
2721 | | -static long pnv_pci_ioda2_npu_unset_window( |
---|
2722 | | - struct iommu_table_group *table_group, |
---|
2723 | | - int num) |
---|
2724 | | -{ |
---|
2725 | | - struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); |
---|
2726 | | - int num2 = (num == 0) ? 1 : 0; |
---|
2727 | | - long ret = pnv_pci_ioda2_unset_window(table_group, num); |
---|
2728 | | - |
---|
2729 | | - if (ret) |
---|
2730 | | - return ret; |
---|
2731 | | - |
---|
2732 | | - if (!npe->table_group.tables[num]) |
---|
2733 | | - return 0; |
---|
2734 | | - |
---|
2735 | | - ret = pnv_npu_unset_window(npe, num); |
---|
2736 | | - if (ret) |
---|
2737 | | - return ret; |
---|
2738 | | - |
---|
2739 | | - if (table_group->tables[num2]) |
---|
2740 | | - ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); |
---|
2741 | | - |
---|
2742 | | - return ret; |
---|
2743 | | -} |
---|
2744 | | - |
---|
2745 | | -static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) |
---|
2746 | | -{ |
---|
2747 | | - /* |
---|
2748 | | - * Detach NPU first as pnv_ioda2_take_ownership() will destroy |
---|
2749 | | - * the iommu_table if 32bit DMA is enabled. |
---|
2750 | | - */ |
---|
2751 | | - pnv_npu_take_ownership(gpe_table_group_to_npe(table_group)); |
---|
2752 | | - pnv_ioda2_take_ownership(table_group); |
---|
2753 | | -} |
---|
2754 | | - |
---|
2755 | | -static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { |
---|
2756 | | - .get_table_size = pnv_pci_ioda2_get_table_size, |
---|
2757 | | - .create_table = pnv_pci_ioda2_create_table_userspace, |
---|
2758 | | - .set_window = pnv_pci_ioda2_npu_set_window, |
---|
2759 | | - .unset_window = pnv_pci_ioda2_npu_unset_window, |
---|
2760 | | - .take_ownership = pnv_ioda2_npu_take_ownership, |
---|
2761 | | - .release_ownership = pnv_ioda2_release_ownership, |
---|
2762 | | -}; |
---|
2763 | | - |
---|
2764 | | -static void pnv_pci_ioda_setup_iommu_api(void) |
---|
2765 | | -{ |
---|
2766 | | - struct pci_controller *hose, *tmp; |
---|
2767 | | - struct pnv_phb *phb; |
---|
2768 | | - struct pnv_ioda_pe *pe, *gpe; |
---|
2769 | | - |
---|
2770 | | - /* |
---|
2771 | | - * Now we have all PHBs discovered, time to add NPU devices to |
---|
2772 | | - * the corresponding IOMMU groups. |
---|
2773 | | - */ |
---|
2774 | | - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { |
---|
2775 | | - phb = hose->private_data; |
---|
2776 | | - |
---|
2777 | | - if (phb->type != PNV_PHB_NPU_NVLINK) |
---|
2778 | | - continue; |
---|
2779 | | - |
---|
2780 | | - list_for_each_entry(pe, &phb->ioda.pe_list, list) { |
---|
2781 | | - gpe = pnv_pci_npu_setup_iommu(pe); |
---|
2782 | | - if (gpe) |
---|
2783 | | - gpe->table_group.ops = &pnv_pci_ioda2_npu_ops; |
---|
2784 | | - } |
---|
2785 | | - } |
---|
2786 | | -} |
---|
2787 | | -#else /* !CONFIG_IOMMU_API */ |
---|
2788 | | -static void pnv_pci_ioda_setup_iommu_api(void) { }; |
---|
2789 | 2075 | #endif |
---|
2790 | 2076 | |
---|
2791 | | -static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) |
---|
2792 | | -{ |
---|
2793 | | - struct pci_controller *hose = phb->hose; |
---|
2794 | | - struct device_node *dn = hose->dn; |
---|
2795 | | - unsigned long mask = 0; |
---|
2796 | | - int i, rc, count; |
---|
2797 | | - u32 val; |
---|
2798 | | - |
---|
2799 | | - count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); |
---|
2800 | | - if (count <= 0) { |
---|
2801 | | - mask = SZ_4K | SZ_64K; |
---|
2802 | | - /* Add 16M for POWER8 by default */ |
---|
2803 | | - if (cpu_has_feature(CPU_FTR_ARCH_207S) && |
---|
2804 | | - !cpu_has_feature(CPU_FTR_ARCH_300)) |
---|
2805 | | - mask |= SZ_16M | SZ_256M; |
---|
2806 | | - return mask; |
---|
2807 | | - } |
---|
2808 | | - |
---|
2809 | | - for (i = 0; i < count; i++) { |
---|
2810 | | - rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", |
---|
2811 | | - i, &val); |
---|
2812 | | - if (rc == 0) |
---|
2813 | | - mask |= 1ULL << val; |
---|
2814 | | - } |
---|
2815 | | - |
---|
2816 | | - return mask; |
---|
2817 | | -} |
---|
2818 | | - |
---|
2819 | | -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, |
---|
2820 | | - struct pnv_ioda_pe *pe) |
---|
| 2077 | +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, |
---|
| 2078 | + struct pnv_ioda_pe *pe) |
---|
2821 | 2079 | { |
---|
2822 | 2080 | int64_t rc; |
---|
2823 | 2081 | |
---|
2824 | | - if (!pnv_pci_ioda_pe_dma_weight(pe)) |
---|
2825 | | - return; |
---|
2826 | | - |
---|
2827 | 2082 | /* TVE #1 is selected by PCI address bit 59 */ |
---|
2828 | 2083 | pe->tce_bypass_base = 1ull << 59; |
---|
2829 | | - |
---|
2830 | | - iommu_register_group(&pe->table_group, phb->hose->global_number, |
---|
2831 | | - pe->pe_number); |
---|
2832 | 2084 | |
---|
2833 | 2085 | /* The PE will reserve all possible 32-bits space */ |
---|
2834 | 2086 | pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", |
---|
.. | .. |
---|
2841 | 2093 | IOMMU_TABLE_GROUP_MAX_TABLES; |
---|
2842 | 2094 | pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; |
---|
2843 | 2095 | pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb); |
---|
2844 | | -#ifdef CONFIG_IOMMU_API |
---|
2845 | | - pe->table_group.ops = &pnv_pci_ioda2_ops; |
---|
2846 | | -#endif |
---|
2847 | 2096 | |
---|
2848 | 2097 | rc = pnv_pci_ioda2_setup_default_config(pe); |
---|
2849 | 2098 | if (rc) |
---|
2850 | 2099 | return; |
---|
2851 | 2100 | |
---|
2852 | | - if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) |
---|
2853 | | - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); |
---|
| 2101 | +#ifdef CONFIG_IOMMU_API |
---|
| 2102 | + pe->table_group.ops = &pnv_pci_ioda2_ops; |
---|
| 2103 | + iommu_register_group(&pe->table_group, phb->hose->global_number, |
---|
| 2104 | + pe->pe_number); |
---|
| 2105 | +#endif |
---|
| 2106 | + pe->dma_setup_done = true; |
---|
2854 | 2107 | } |
---|
2855 | 2108 | |
---|
2856 | | -#ifdef CONFIG_PCI_MSI |
---|
2857 | 2109 | int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) |
---|
2858 | 2110 | { |
---|
2859 | 2111 | struct pnv_phb *phb = container_of(chip, struct pnv_phb, |
---|
.. | .. |
---|
2999 | 2251 | pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", |
---|
3000 | 2252 | count, phb->msi_base); |
---|
3001 | 2253 | } |
---|
3002 | | -#else |
---|
3003 | | -static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } |
---|
3004 | | -#endif /* CONFIG_PCI_MSI */ |
---|
3005 | | - |
---|
3006 | | -#ifdef CONFIG_PCI_IOV |
---|
3007 | | -static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) |
---|
3008 | | -{ |
---|
3009 | | - struct pci_controller *hose = pci_bus_to_host(pdev->bus); |
---|
3010 | | - struct pnv_phb *phb = hose->private_data; |
---|
3011 | | - const resource_size_t gate = phb->ioda.m64_segsize >> 2; |
---|
3012 | | - struct resource *res; |
---|
3013 | | - int i; |
---|
3014 | | - resource_size_t size, total_vf_bar_sz; |
---|
3015 | | - struct pci_dn *pdn; |
---|
3016 | | - int mul, total_vfs; |
---|
3017 | | - |
---|
3018 | | - pdn = pci_get_pdn(pdev); |
---|
3019 | | - pdn->vfs_expanded = 0; |
---|
3020 | | - pdn->m64_single_mode = false; |
---|
3021 | | - |
---|
3022 | | - total_vfs = pci_sriov_get_totalvfs(pdev); |
---|
3023 | | - mul = phb->ioda.total_pe_num; |
---|
3024 | | - total_vf_bar_sz = 0; |
---|
3025 | | - |
---|
3026 | | - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { |
---|
3027 | | - res = &pdev->resource[i + PCI_IOV_RESOURCES]; |
---|
3028 | | - if (!res->flags || res->parent) |
---|
3029 | | - continue; |
---|
3030 | | - if (!pnv_pci_is_m64_flags(res->flags)) { |
---|
3031 | | - dev_warn(&pdev->dev, "Don't support SR-IOV with" |
---|
3032 | | - " non M64 VF BAR%d: %pR. \n", |
---|
3033 | | - i, res); |
---|
3034 | | - goto truncate_iov; |
---|
3035 | | - } |
---|
3036 | | - |
---|
3037 | | - total_vf_bar_sz += pci_iov_resource_size(pdev, |
---|
3038 | | - i + PCI_IOV_RESOURCES); |
---|
3039 | | - |
---|
3040 | | - /* |
---|
3041 | | - * If bigger than quarter of M64 segment size, just round up |
---|
3042 | | - * power of two. |
---|
3043 | | - * |
---|
3044 | | - * Generally, one M64 BAR maps one IOV BAR. To avoid conflict |
---|
3045 | | - * with other devices, IOV BAR size is expanded to be |
---|
3046 | | - * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 |
---|
3047 | | - * segment size , the expanded size would equal to half of the |
---|
3048 | | - * whole M64 space size, which will exhaust the M64 Space and |
---|
3049 | | - * limit the system flexibility. This is a design decision to |
---|
3050 | | - * set the boundary to quarter of the M64 segment size. |
---|
3051 | | - */ |
---|
3052 | | - if (total_vf_bar_sz > gate) { |
---|
3053 | | - mul = roundup_pow_of_two(total_vfs); |
---|
3054 | | - dev_info(&pdev->dev, |
---|
3055 | | - "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", |
---|
3056 | | - total_vf_bar_sz, gate, mul); |
---|
3057 | | - pdn->m64_single_mode = true; |
---|
3058 | | - break; |
---|
3059 | | - } |
---|
3060 | | - } |
---|
3061 | | - |
---|
3062 | | - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { |
---|
3063 | | - res = &pdev->resource[i + PCI_IOV_RESOURCES]; |
---|
3064 | | - if (!res->flags || res->parent) |
---|
3065 | | - continue; |
---|
3066 | | - |
---|
3067 | | - size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); |
---|
3068 | | - /* |
---|
3069 | | - * On PHB3, the minimum size alignment of M64 BAR in single |
---|
3070 | | - * mode is 32MB. |
---|
3071 | | - */ |
---|
3072 | | - if (pdn->m64_single_mode && (size < SZ_32M)) |
---|
3073 | | - goto truncate_iov; |
---|
3074 | | - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); |
---|
3075 | | - res->end = res->start + size * mul - 1; |
---|
3076 | | - dev_dbg(&pdev->dev, " %pR\n", res); |
---|
3077 | | - dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", |
---|
3078 | | - i, res, mul); |
---|
3079 | | - } |
---|
3080 | | - pdn->vfs_expanded = mul; |
---|
3081 | | - |
---|
3082 | | - return; |
---|
3083 | | - |
---|
3084 | | -truncate_iov: |
---|
3085 | | - /* To save MMIO space, IOV BAR is truncated. */ |
---|
3086 | | - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { |
---|
3087 | | - res = &pdev->resource[i + PCI_IOV_RESOURCES]; |
---|
3088 | | - res->flags = 0; |
---|
3089 | | - res->end = res->start - 1; |
---|
3090 | | - } |
---|
3091 | | -} |
---|
3092 | | - |
---|
3093 | | -static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) |
---|
3094 | | -{ |
---|
3095 | | - if (WARN_ON(pci_dev_is_added(pdev))) |
---|
3096 | | - return; |
---|
3097 | | - |
---|
3098 | | - if (pdev->is_virtfn) { |
---|
3099 | | - struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); |
---|
3100 | | - |
---|
3101 | | - /* |
---|
3102 | | - * VF PEs are single-device PEs so their pdev pointer needs to |
---|
3103 | | - * be set. The pdev doesn't exist when the PE is allocated (in |
---|
3104 | | - * (pcibios_sriov_enable()) so we fix it up here. |
---|
3105 | | - */ |
---|
3106 | | - pe->pdev = pdev; |
---|
3107 | | - WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); |
---|
3108 | | - } else if (pdev->is_physfn) { |
---|
3109 | | - /* |
---|
3110 | | - * For PFs adjust their allocated IOV resources to match what |
---|
3111 | | - * the PHB can support using it's M64 BAR table. |
---|
3112 | | - */ |
---|
3113 | | - pnv_pci_ioda_fixup_iov_resources(pdev); |
---|
3114 | | - } |
---|
3115 | | -} |
---|
3116 | | -#endif /* CONFIG_PCI_IOV */ |
---|
3117 | 2254 | |
---|
3118 | 2255 | static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, |
---|
3119 | 2256 | struct resource *res) |
---|
.. | .. |
---|
3123 | 2260 | int index; |
---|
3124 | 2261 | int64_t rc; |
---|
3125 | 2262 | |
---|
3126 | | - if (!res || !res->flags || res->start > res->end) |
---|
| 2263 | + if (!res || !res->flags || res->start > res->end || |
---|
| 2264 | + res->flags & IORESOURCE_UNSET) |
---|
3127 | 2265 | return; |
---|
3128 | 2266 | |
---|
3129 | 2267 | if (res->flags & IORESOURCE_IO) { |
---|
.. | .. |
---|
3209 | 2347 | #ifdef CONFIG_DEBUG_FS |
---|
3210 | 2348 | static int pnv_pci_diag_data_set(void *data, u64 val) |
---|
3211 | 2349 | { |
---|
3212 | | - struct pci_controller *hose; |
---|
3213 | | - struct pnv_phb *phb; |
---|
| 2350 | + struct pnv_phb *phb = data; |
---|
3214 | 2351 | s64 ret; |
---|
3215 | | - |
---|
3216 | | - if (val != 1ULL) |
---|
3217 | | - return -EINVAL; |
---|
3218 | | - |
---|
3219 | | - hose = (struct pci_controller *)data; |
---|
3220 | | - if (!hose || !hose->private_data) |
---|
3221 | | - return -ENODEV; |
---|
3222 | | - |
---|
3223 | | - phb = hose->private_data; |
---|
3224 | 2352 | |
---|
3225 | 2353 | /* Retrieve the diag data from firmware */ |
---|
3226 | 2354 | ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data, |
---|
.. | .. |
---|
3233 | 2361 | return 0; |
---|
3234 | 2362 | } |
---|
3235 | 2363 | |
---|
3236 | | -DEFINE_SIMPLE_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, |
---|
3237 | | - pnv_pci_diag_data_set, "%llu\n"); |
---|
| 2364 | +DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set, |
---|
| 2365 | + "%llu\n"); |
---|
| 2366 | + |
---|
| 2367 | +static int pnv_pci_ioda_pe_dump(void *data, u64 val) |
---|
| 2368 | +{ |
---|
| 2369 | + struct pnv_phb *phb = data; |
---|
| 2370 | + int pe_num; |
---|
| 2371 | + |
---|
| 2372 | + for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { |
---|
| 2373 | + struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num]; |
---|
| 2374 | + |
---|
| 2375 | + if (!test_bit(pe_num, phb->ioda.pe_alloc)) |
---|
| 2376 | + continue; |
---|
| 2377 | + |
---|
| 2378 | + pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n", |
---|
| 2379 | + pe->rid, pe->device_count, |
---|
| 2380 | + (pe->flags & PNV_IODA_PE_DEV) ? "dev " : "", |
---|
| 2381 | + (pe->flags & PNV_IODA_PE_BUS) ? "bus " : "", |
---|
| 2382 | + (pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "", |
---|
| 2383 | + (pe->flags & PNV_IODA_PE_MASTER) ? "master " : "", |
---|
| 2384 | + (pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "", |
---|
| 2385 | + (pe->flags & PNV_IODA_PE_VF) ? "vf " : ""); |
---|
| 2386 | + } |
---|
| 2387 | + |
---|
| 2388 | + return 0; |
---|
| 2389 | +} |
---|
| 2390 | + |
---|
| 2391 | +DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL, |
---|
| 2392 | + pnv_pci_ioda_pe_dump, "%llu\n"); |
---|
3238 | 2393 | |
---|
3239 | 2394 | #endif /* CONFIG_DEBUG_FS */ |
---|
3240 | 2395 | |
---|
.. | .. |
---|
3253 | 2408 | |
---|
3254 | 2409 | sprintf(name, "PCI%04x", hose->global_number); |
---|
3255 | 2410 | phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); |
---|
3256 | | - if (!phb->dbgfs) { |
---|
3257 | | - pr_warn("%s: Error on creating debugfs on PHB#%x\n", |
---|
3258 | | - __func__, hose->global_number); |
---|
3259 | | - continue; |
---|
3260 | | - } |
---|
3261 | 2411 | |
---|
3262 | | - debugfs_create_file("dump_diag_regs", 0200, phb->dbgfs, hose, |
---|
3263 | | - &pnv_pci_diag_data_fops); |
---|
| 2412 | + debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs, |
---|
| 2413 | + phb, &pnv_pci_diag_data_fops); |
---|
| 2414 | + debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs, |
---|
| 2415 | + phb, &pnv_pci_ioda_pe_dump_fops); |
---|
3264 | 2416 | } |
---|
3265 | 2417 | #endif /* CONFIG_DEBUG_FS */ |
---|
3266 | 2418 | } |
---|
.. | .. |
---|
3302 | 2454 | |
---|
3303 | 2455 | static void pnv_pci_ioda_fixup(void) |
---|
3304 | 2456 | { |
---|
3305 | | - pnv_pci_ioda_setup_PEs(); |
---|
3306 | | - pnv_pci_ioda_setup_iommu_api(); |
---|
| 2457 | + pnv_pci_ioda_setup_nvlink(); |
---|
3307 | 2458 | pnv_pci_ioda_create_dbgfs(); |
---|
3308 | 2459 | |
---|
3309 | 2460 | pnv_pci_enable_bridges(); |
---|
.. | .. |
---|
3328 | 2479 | static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, |
---|
3329 | 2480 | unsigned long type) |
---|
3330 | 2481 | { |
---|
3331 | | - struct pci_dev *bridge; |
---|
3332 | | - struct pci_controller *hose = pci_bus_to_host(bus); |
---|
3333 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 2482 | + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); |
---|
3334 | 2483 | int num_pci_bridges = 0; |
---|
| 2484 | + struct pci_dev *bridge; |
---|
3335 | 2485 | |
---|
3336 | 2486 | bridge = bus->self; |
---|
3337 | 2487 | while (bridge) { |
---|
.. | .. |
---|
3415 | 2565 | } |
---|
3416 | 2566 | } |
---|
3417 | 2567 | |
---|
3418 | | -static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) |
---|
| 2568 | +static void pnv_pci_configure_bus(struct pci_bus *bus) |
---|
3419 | 2569 | { |
---|
3420 | | - struct pci_controller *hose = pci_bus_to_host(bus); |
---|
3421 | | - struct pnv_phb *phb = hose->private_data; |
---|
3422 | 2570 | struct pci_dev *bridge = bus->self; |
---|
3423 | 2571 | struct pnv_ioda_pe *pe; |
---|
3424 | | - bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); |
---|
| 2572 | + bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); |
---|
3425 | 2573 | |
---|
3426 | | - /* Extend bridge's windows if necessary */ |
---|
3427 | | - pnv_pci_fixup_bridge_resources(bus, type); |
---|
3428 | | - |
---|
3429 | | - /* The PE for root bus should be realized before any one else */ |
---|
3430 | | - if (!phb->ioda.root_pe_populated) { |
---|
3431 | | - pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false); |
---|
3432 | | - if (pe) { |
---|
3433 | | - phb->ioda.root_pe_idx = pe->pe_number; |
---|
3434 | | - phb->ioda.root_pe_populated = true; |
---|
3435 | | - } |
---|
3436 | | - } |
---|
| 2574 | + dev_info(&bus->dev, "Configuring PE for bus\n"); |
---|
3437 | 2575 | |
---|
3438 | 2576 | /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ |
---|
3439 | | - if (list_empty(&bus->devices)) |
---|
| 2577 | + if (WARN_ON(list_empty(&bus->devices))) |
---|
3440 | 2578 | return; |
---|
3441 | 2579 | |
---|
3442 | 2580 | /* Reserve PEs according to used M64 resources */ |
---|
3443 | | - if (phb->reserve_m64_pe) |
---|
3444 | | - phb->reserve_m64_pe(bus, NULL, all); |
---|
| 2581 | + pnv_ioda_reserve_m64_pe(bus, NULL, all); |
---|
3445 | 2582 | |
---|
3446 | 2583 | /* |
---|
3447 | 2584 | * Assign PE. We might run here because of partial hotplug. |
---|
.. | .. |
---|
3453 | 2590 | return; |
---|
3454 | 2591 | |
---|
3455 | 2592 | pnv_ioda_setup_pe_seg(pe); |
---|
3456 | | - switch (phb->type) { |
---|
3457 | | - case PNV_PHB_IODA1: |
---|
3458 | | - pnv_pci_ioda1_setup_dma_pe(phb, pe); |
---|
3459 | | - break; |
---|
3460 | | - case PNV_PHB_IODA2: |
---|
3461 | | - pnv_pci_ioda2_setup_dma_pe(phb, pe); |
---|
3462 | | - break; |
---|
3463 | | - default: |
---|
3464 | | - pr_warn("%s: No DMA for PHB#%x (type %d)\n", |
---|
3465 | | - __func__, phb->hose->global_number, phb->type); |
---|
3466 | | - } |
---|
3467 | 2593 | } |
---|
3468 | 2594 | |
---|
3469 | 2595 | static resource_size_t pnv_pci_default_alignment(void) |
---|
.. | .. |
---|
3471 | 2597 | return PAGE_SIZE; |
---|
3472 | 2598 | } |
---|
3473 | 2599 | |
---|
3474 | | -#ifdef CONFIG_PCI_IOV |
---|
3475 | | -static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, |
---|
3476 | | - int resno) |
---|
3477 | | -{ |
---|
3478 | | - struct pci_controller *hose = pci_bus_to_host(pdev->bus); |
---|
3479 | | - struct pnv_phb *phb = hose->private_data; |
---|
3480 | | - struct pci_dn *pdn = pci_get_pdn(pdev); |
---|
3481 | | - resource_size_t align; |
---|
3482 | | - |
---|
3483 | | - /* |
---|
3484 | | - * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the |
---|
3485 | | - * SR-IOV. While from hardware perspective, the range mapped by M64 |
---|
3486 | | - * BAR should be size aligned. |
---|
3487 | | - * |
---|
3488 | | - * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra |
---|
3489 | | - * powernv-specific hardware restriction is gone. But if just use the |
---|
3490 | | - * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with |
---|
3491 | | - * in one segment of M64 #15, which introduces the PE conflict between |
---|
3492 | | - * PF and VF. Based on this, the minimum alignment of an IOV BAR is |
---|
3493 | | - * m64_segsize. |
---|
3494 | | - * |
---|
3495 | | - * This function returns the total IOV BAR size if M64 BAR is in |
---|
3496 | | - * Shared PE mode or just VF BAR size if not. |
---|
3497 | | - * If the M64 BAR is in Single PE mode, return the VF BAR size or |
---|
3498 | | - * M64 segment size if IOV BAR size is less. |
---|
3499 | | - */ |
---|
3500 | | - align = pci_iov_resource_size(pdev, resno); |
---|
3501 | | - if (!pdn->vfs_expanded) |
---|
3502 | | - return align; |
---|
3503 | | - if (pdn->m64_single_mode) |
---|
3504 | | - return max(align, (resource_size_t)phb->ioda.m64_segsize); |
---|
3505 | | - |
---|
3506 | | - return pdn->vfs_expanded * align; |
---|
3507 | | -} |
---|
3508 | | -#endif /* CONFIG_PCI_IOV */ |
---|
3509 | | - |
---|
3510 | 2600 | /* Prevent enabling devices for which we couldn't properly |
---|
3511 | 2601 | * assign a PE |
---|
3512 | 2602 | */ |
---|
3513 | 2603 | static bool pnv_pci_enable_device_hook(struct pci_dev *dev) |
---|
3514 | 2604 | { |
---|
3515 | | - struct pci_controller *hose = pci_bus_to_host(dev->bus); |
---|
3516 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 2605 | + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); |
---|
3517 | 2606 | struct pci_dn *pdn; |
---|
3518 | 2607 | |
---|
3519 | 2608 | /* The function is probably called while the PEs have |
---|
.. | .. |
---|
3528 | 2617 | if (!pdn || pdn->pe_number == IODA_INVALID_PE) |
---|
3529 | 2618 | return false; |
---|
3530 | 2619 | |
---|
| 2620 | + return true; |
---|
| 2621 | +} |
---|
| 2622 | + |
---|
| 2623 | +static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) |
---|
| 2624 | +{ |
---|
| 2625 | + struct pci_controller *hose = pci_bus_to_host(dev->bus); |
---|
| 2626 | + struct pnv_phb *phb = hose->private_data; |
---|
| 2627 | + struct pci_dn *pdn; |
---|
| 2628 | + struct pnv_ioda_pe *pe; |
---|
| 2629 | + |
---|
| 2630 | + if (!phb->initialized) |
---|
| 2631 | + return true; |
---|
| 2632 | + |
---|
| 2633 | + pdn = pci_get_pdn(dev); |
---|
| 2634 | + if (!pdn) |
---|
| 2635 | + return false; |
---|
| 2636 | + |
---|
| 2637 | + if (pdn->pe_number == IODA_INVALID_PE) { |
---|
| 2638 | + pe = pnv_ioda_setup_dev_PE(dev); |
---|
| 2639 | + if (!pe) |
---|
| 2640 | + return false; |
---|
| 2641 | + } |
---|
3531 | 2642 | return true; |
---|
3532 | 2643 | } |
---|
3533 | 2644 | |
---|
.. | .. |
---|
3562 | 2673 | |
---|
3563 | 2674 | static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) |
---|
3564 | 2675 | { |
---|
3565 | | - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); |
---|
3566 | 2676 | struct iommu_table *tbl = pe->table_group.tables[0]; |
---|
3567 | 2677 | int64_t rc; |
---|
3568 | 2678 | |
---|
3569 | | - if (!weight) |
---|
| 2679 | + if (!pe->dma_setup_done) |
---|
3570 | 2680 | return; |
---|
3571 | 2681 | |
---|
3572 | 2682 | rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0); |
---|
.. | .. |
---|
3583 | 2693 | iommu_tce_table_put(tbl); |
---|
3584 | 2694 | } |
---|
3585 | 2695 | |
---|
3586 | | -static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) |
---|
| 2696 | +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) |
---|
3587 | 2697 | { |
---|
3588 | 2698 | struct iommu_table *tbl = pe->table_group.tables[0]; |
---|
3589 | | - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); |
---|
3590 | | -#ifdef CONFIG_IOMMU_API |
---|
3591 | 2699 | int64_t rc; |
---|
3592 | | -#endif |
---|
3593 | 2700 | |
---|
3594 | | - if (!weight) |
---|
| 2701 | + if (!pe->dma_setup_done) |
---|
3595 | 2702 | return; |
---|
3596 | 2703 | |
---|
3597 | | -#ifdef CONFIG_IOMMU_API |
---|
3598 | 2704 | rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); |
---|
3599 | 2705 | if (rc) |
---|
3600 | | - pe_warn(pe, "OPAL error %ld release DMA window\n", rc); |
---|
3601 | | -#endif |
---|
| 2706 | + pe_warn(pe, "OPAL error %lld release DMA window\n", rc); |
---|
3602 | 2707 | |
---|
3603 | 2708 | pnv_pci_ioda2_set_bypass(pe, false); |
---|
3604 | 2709 | if (pe->table_group.group) { |
---|
.. | .. |
---|
3621 | 2726 | if (map[idx] != pe->pe_number) |
---|
3622 | 2727 | continue; |
---|
3623 | 2728 | |
---|
3624 | | - if (win == OPAL_M64_WINDOW_TYPE) |
---|
3625 | | - rc = opal_pci_map_pe_mmio_window(phb->opal_id, |
---|
3626 | | - phb->ioda.reserved_pe_idx, win, |
---|
3627 | | - idx / PNV_IODA1_M64_SEGS, |
---|
3628 | | - idx % PNV_IODA1_M64_SEGS); |
---|
3629 | | - else |
---|
3630 | | - rc = opal_pci_map_pe_mmio_window(phb->opal_id, |
---|
3631 | | - phb->ioda.reserved_pe_idx, win, 0, idx); |
---|
| 2729 | + rc = opal_pci_map_pe_mmio_window(phb->opal_id, |
---|
| 2730 | + phb->ioda.reserved_pe_idx, win, 0, idx); |
---|
3632 | 2731 | |
---|
3633 | 2732 | if (rc != OPAL_SUCCESS) |
---|
3634 | | - pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n", |
---|
| 2733 | + pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n", |
---|
3635 | 2734 | rc, win, idx); |
---|
3636 | 2735 | |
---|
3637 | 2736 | map[idx] = IODA_INVALID_PE; |
---|
.. | .. |
---|
3647 | 2746 | phb->ioda.io_segmap); |
---|
3648 | 2747 | pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, |
---|
3649 | 2748 | phb->ioda.m32_segmap); |
---|
3650 | | - pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE, |
---|
3651 | | - phb->ioda.m64_segmap); |
---|
| 2749 | + /* M64 is pre-configured by pnv_ioda1_init_m64() */ |
---|
3652 | 2750 | } else if (phb->type == PNV_PHB_IODA2) { |
---|
3653 | 2751 | pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, |
---|
3654 | 2752 | phb->ioda.m32_segmap); |
---|
.. | .. |
---|
3660 | 2758 | struct pnv_phb *phb = pe->phb; |
---|
3661 | 2759 | struct pnv_ioda_pe *slave, *tmp; |
---|
3662 | 2760 | |
---|
| 2761 | + pe_info(pe, "Releasing PE\n"); |
---|
| 2762 | + |
---|
| 2763 | + mutex_lock(&phb->ioda.pe_list_mutex); |
---|
3663 | 2764 | list_del(&pe->list); |
---|
| 2765 | + mutex_unlock(&phb->ioda.pe_list_mutex); |
---|
| 2766 | + |
---|
3664 | 2767 | switch (phb->type) { |
---|
3665 | 2768 | case PNV_PHB_IODA1: |
---|
3666 | 2769 | pnv_pci_ioda1_release_pe_dma(pe); |
---|
3667 | 2770 | break; |
---|
3668 | 2771 | case PNV_PHB_IODA2: |
---|
3669 | 2772 | pnv_pci_ioda2_release_pe_dma(pe); |
---|
| 2773 | + break; |
---|
| 2774 | + case PNV_PHB_NPU_OCAPI: |
---|
3670 | 2775 | break; |
---|
3671 | 2776 | default: |
---|
3672 | 2777 | WARN_ON(1); |
---|
.. | .. |
---|
3689 | 2794 | * that it can be populated again in PCI hot add path. The PE |
---|
3690 | 2795 | * shouldn't be destroyed as it's the global reserved resource. |
---|
3691 | 2796 | */ |
---|
3692 | | - if (phb->ioda.root_pe_populated && |
---|
3693 | | - phb->ioda.root_pe_idx == pe->pe_number) |
---|
3694 | | - phb->ioda.root_pe_populated = false; |
---|
3695 | | - else |
---|
3696 | | - pnv_ioda_free_pe(pe); |
---|
| 2797 | + if (phb->ioda.root_pe_idx == pe->pe_number) |
---|
| 2798 | + return; |
---|
| 2799 | + |
---|
| 2800 | + pnv_ioda_free_pe(pe); |
---|
3697 | 2801 | } |
---|
3698 | 2802 | |
---|
3699 | 2803 | static void pnv_pci_release_device(struct pci_dev *pdev) |
---|
3700 | 2804 | { |
---|
3701 | | - struct pci_controller *hose = pci_bus_to_host(pdev->bus); |
---|
3702 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 2805 | + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); |
---|
3703 | 2806 | struct pci_dn *pdn = pci_get_pdn(pdev); |
---|
3704 | 2807 | struct pnv_ioda_pe *pe; |
---|
3705 | 2808 | |
---|
| 2809 | + /* The VF PE state is torn down when sriov_disable() is called */ |
---|
3706 | 2810 | if (pdev->is_virtfn) |
---|
3707 | 2811 | return; |
---|
3708 | 2812 | |
---|
3709 | 2813 | if (!pdn || pdn->pe_number == IODA_INVALID_PE) |
---|
3710 | 2814 | return; |
---|
| 2815 | + |
---|
| 2816 | +#ifdef CONFIG_PCI_IOV |
---|
| 2817 | + /* |
---|
| 2818 | + * FIXME: Try move this to sriov_disable(). It's here since we allocate |
---|
| 2819 | + * the iov state at probe time since we need to fiddle with the IOV |
---|
| 2820 | + * resources. |
---|
| 2821 | + */ |
---|
| 2822 | + if (pdev->is_physfn) |
---|
| 2823 | + kfree(pdev->dev.archdata.iov_data); |
---|
| 2824 | +#endif |
---|
3711 | 2825 | |
---|
3712 | 2826 | /* |
---|
3713 | 2827 | * PCI hotplug can happen as part of EEH error recovery. The @pdn |
---|
.. | .. |
---|
3725 | 2839 | pnv_ioda_release_pe(pe); |
---|
3726 | 2840 | } |
---|
3727 | 2841 | |
---|
| 2842 | +static void pnv_npu_disable_device(struct pci_dev *pdev) |
---|
| 2843 | +{ |
---|
| 2844 | + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); |
---|
| 2845 | + struct eeh_pe *eehpe = edev ? edev->pe : NULL; |
---|
| 2846 | + |
---|
| 2847 | + if (eehpe && eeh_ops && eeh_ops->reset) |
---|
| 2848 | + eeh_ops->reset(eehpe, EEH_RESET_HOT); |
---|
| 2849 | +} |
---|
| 2850 | + |
---|
3728 | 2851 | static void pnv_pci_ioda_shutdown(struct pci_controller *hose) |
---|
3729 | 2852 | { |
---|
3730 | 2853 | struct pnv_phb *phb = hose->private_data; |
---|
.. | .. |
---|
3733 | 2856 | OPAL_ASSERT_RESET); |
---|
3734 | 2857 | } |
---|
3735 | 2858 | |
---|
| 2859 | +static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) |
---|
| 2860 | +{ |
---|
| 2861 | + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); |
---|
| 2862 | + struct pnv_ioda_pe *pe; |
---|
| 2863 | + |
---|
| 2864 | + list_for_each_entry(pe, &phb->ioda.pe_list, list) { |
---|
| 2865 | + if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) |
---|
| 2866 | + continue; |
---|
| 2867 | + |
---|
| 2868 | + if (!pe->pbus) |
---|
| 2869 | + continue; |
---|
| 2870 | + |
---|
| 2871 | + if (bus->number == ((pe->rid >> 8) & 0xFF)) { |
---|
| 2872 | + pe->pbus = bus; |
---|
| 2873 | + break; |
---|
| 2874 | + } |
---|
| 2875 | + } |
---|
| 2876 | +} |
---|
| 2877 | + |
---|
3736 | 2878 | static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { |
---|
3737 | | - .dma_dev_setup = pnv_pci_dma_dev_setup, |
---|
3738 | | - .dma_bus_setup = pnv_pci_dma_bus_setup, |
---|
3739 | | -#ifdef CONFIG_PCI_MSI |
---|
| 2879 | + .dma_dev_setup = pnv_pci_ioda_dma_dev_setup, |
---|
| 2880 | + .dma_bus_setup = pnv_pci_ioda_dma_bus_setup, |
---|
| 2881 | + .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported, |
---|
3740 | 2882 | .setup_msi_irqs = pnv_setup_msi_irqs, |
---|
3741 | 2883 | .teardown_msi_irqs = pnv_teardown_msi_irqs, |
---|
3742 | | -#endif |
---|
3743 | 2884 | .enable_device_hook = pnv_pci_enable_device_hook, |
---|
3744 | 2885 | .release_device = pnv_pci_release_device, |
---|
3745 | 2886 | .window_alignment = pnv_pci_window_alignment, |
---|
3746 | | - .setup_bridge = pnv_pci_setup_bridge, |
---|
| 2887 | + .setup_bridge = pnv_pci_fixup_bridge_resources, |
---|
3747 | 2888 | .reset_secondary_bus = pnv_pci_reset_secondary_bus, |
---|
3748 | | - .dma_set_mask = pnv_pci_ioda_dma_set_mask, |
---|
3749 | | - .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, |
---|
3750 | 2889 | .shutdown = pnv_pci_ioda_shutdown, |
---|
3751 | 2890 | }; |
---|
3752 | 2891 | |
---|
3753 | | -static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask) |
---|
3754 | | -{ |
---|
3755 | | - dev_err_once(&npdev->dev, |
---|
3756 | | - "%s operation unsupported for NVLink devices\n", |
---|
3757 | | - __func__); |
---|
3758 | | - return -EPERM; |
---|
3759 | | -} |
---|
3760 | | - |
---|
3761 | 2892 | static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { |
---|
3762 | | - .dma_dev_setup = pnv_pci_dma_dev_setup, |
---|
3763 | | -#ifdef CONFIG_PCI_MSI |
---|
3764 | 2893 | .setup_msi_irqs = pnv_setup_msi_irqs, |
---|
3765 | 2894 | .teardown_msi_irqs = pnv_teardown_msi_irqs, |
---|
3766 | | -#endif |
---|
3767 | 2895 | .enable_device_hook = pnv_pci_enable_device_hook, |
---|
3768 | 2896 | .window_alignment = pnv_pci_window_alignment, |
---|
3769 | 2897 | .reset_secondary_bus = pnv_pci_reset_secondary_bus, |
---|
3770 | | - .dma_set_mask = pnv_npu_dma_set_mask, |
---|
3771 | 2898 | .shutdown = pnv_pci_ioda_shutdown, |
---|
| 2899 | + .disable_device = pnv_npu_disable_device, |
---|
3772 | 2900 | }; |
---|
3773 | 2901 | |
---|
3774 | 2902 | static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { |
---|
3775 | | - .enable_device_hook = pnv_pci_enable_device_hook, |
---|
| 2903 | + .enable_device_hook = pnv_ocapi_enable_device_hook, |
---|
| 2904 | + .release_device = pnv_pci_release_device, |
---|
3776 | 2905 | .window_alignment = pnv_pci_window_alignment, |
---|
3777 | 2906 | .reset_secondary_bus = pnv_pci_reset_secondary_bus, |
---|
3778 | 2907 | .shutdown = pnv_pci_ioda_shutdown, |
---|
.. | .. |
---|
3785 | 2914 | struct pnv_phb *phb; |
---|
3786 | 2915 | unsigned long size, m64map_off, m32map_off, pemap_off; |
---|
3787 | 2916 | unsigned long iomap_off = 0, dma32map_off = 0; |
---|
| 2917 | + struct pnv_ioda_pe *root_pe; |
---|
3788 | 2918 | struct resource r; |
---|
3789 | 2919 | const __be64 *prop64; |
---|
3790 | 2920 | const __be32 *prop32; |
---|
.. | .. |
---|
3807 | 2937 | phb_id = be64_to_cpup(prop64); |
---|
3808 | 2938 | pr_debug(" PHB-ID : 0x%016llx\n", phb_id); |
---|
3809 | 2939 | |
---|
3810 | | - phb = memblock_virt_alloc(sizeof(*phb), 0); |
---|
| 2940 | + phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES); |
---|
| 2941 | + if (!phb) |
---|
| 2942 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 2943 | + sizeof(*phb)); |
---|
3811 | 2944 | |
---|
3812 | 2945 | /* Allocate PCI controller */ |
---|
3813 | 2946 | phb->hose = hose = pcibios_alloc_controller(np); |
---|
.. | .. |
---|
3853 | 2986 | else |
---|
3854 | 2987 | phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; |
---|
3855 | 2988 | |
---|
3856 | | - phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0); |
---|
| 2989 | + phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES); |
---|
| 2990 | + if (!phb->diag_data) |
---|
| 2991 | + panic("%s: Failed to allocate %u bytes\n", __func__, |
---|
| 2992 | + phb->diag_data_size); |
---|
3857 | 2993 | |
---|
3858 | 2994 | /* Parse 32-bit and IO ranges (if any) */ |
---|
3859 | 2995 | pci_process_bridge_OF_ranges(hose, np, !hose->global_number); |
---|
.. | .. |
---|
3897 | 3033 | PNV_IODA1_DMA32_SEGSIZE; |
---|
3898 | 3034 | |
---|
3899 | 3035 | /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ |
---|
3900 | | - size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, |
---|
| 3036 | + size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, |
---|
3901 | 3037 | sizeof(unsigned long)); |
---|
3902 | 3038 | m64map_off = size; |
---|
3903 | 3039 | size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]); |
---|
.. | .. |
---|
3912 | 3048 | } |
---|
3913 | 3049 | pemap_off = size; |
---|
3914 | 3050 | size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); |
---|
3915 | | - aux = memblock_virt_alloc(size, 0); |
---|
| 3051 | + aux = memblock_alloc(size, SMP_CACHE_BYTES); |
---|
| 3052 | + if (!aux) |
---|
| 3053 | + panic("%s: Failed to allocate %lu bytes\n", __func__, size); |
---|
3916 | 3054 | phb->ioda.pe_alloc = aux; |
---|
3917 | 3055 | phb->ioda.m64_segmap = aux + m64map_off; |
---|
3918 | 3056 | phb->ioda.m32_segmap = aux + m32map_off; |
---|
.. | .. |
---|
3944 | 3082 | phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1; |
---|
3945 | 3083 | pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); |
---|
3946 | 3084 | } else { |
---|
3947 | | - phb->ioda.root_pe_idx = IODA_INVALID_PE; |
---|
| 3085 | + /* otherwise just allocate one */ |
---|
| 3086 | + root_pe = pnv_ioda_alloc_pe(phb, 1); |
---|
| 3087 | + phb->ioda.root_pe_idx = root_pe->pe_number; |
---|
3948 | 3088 | } |
---|
3949 | 3089 | |
---|
3950 | 3090 | INIT_LIST_HEAD(&phb->ioda.pe_list); |
---|
.. | .. |
---|
3999 | 3139 | hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; |
---|
4000 | 3140 | break; |
---|
4001 | 3141 | default: |
---|
4002 | | - phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; |
---|
4003 | 3142 | hose->controller_ops = pnv_pci_ioda_controller_ops; |
---|
4004 | 3143 | } |
---|
4005 | 3144 | |
---|
.. | .. |
---|
4024 | 3163 | * shutdown PCI devices correctly. We already got IODA table |
---|
4025 | 3164 | * cleaned out. So we have to issue PHB reset to stop all PCI |
---|
4026 | 3165 | * transactions from previous kernel. The ppc_pci_reset_phbs |
---|
4027 | | - * kernel parameter will force this reset too. |
---|
| 3166 | + * kernel parameter will force this reset too. Additionally, |
---|
| 3167 | + * if the IODA reset above failed then use a bigger hammer. |
---|
| 3168 | + * This can happen if we get a PHB fatal error in very early |
---|
| 3169 | + * boot. |
---|
4028 | 3170 | */ |
---|
4029 | | - if (is_kdump_kernel() || pci_reset_phbs) { |
---|
| 3171 | + if (is_kdump_kernel() || pci_reset_phbs || rc) { |
---|
4030 | 3172 | pr_info(" Issue PHB reset ...\n"); |
---|
4031 | 3173 | pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); |
---|
4032 | 3174 | pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); |
---|
.. | .. |
---|
4054 | 3196 | |
---|
4055 | 3197 | static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) |
---|
4056 | 3198 | { |
---|
4057 | | - struct pci_controller *hose = pci_bus_to_host(dev->bus); |
---|
4058 | | - struct pnv_phb *phb = hose->private_data; |
---|
| 3199 | + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); |
---|
4059 | 3200 | |
---|
4060 | 3201 | if (!machine_is(powernv)) |
---|
4061 | 3202 | return; |
---|