From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/drivers/pci/controller/vmd.c | 481 ++++++++++++++++++++++++----------------------------- 1 files changed, 216 insertions(+), 265 deletions(-) diff --git a/kernel/drivers/pci/controller/vmd.c b/kernel/drivers/pci/controller/vmd.c index 3d1b004..f375c21 100644 --- a/kernel/drivers/pci/controller/vmd.c +++ b/kernel/drivers/pci/controller/vmd.c @@ -40,13 +40,19 @@ * membars, in order to allow proper address translation during * resource assignment to enable guest virtualization */ - VMD_FEAT_HAS_MEMBAR_SHADOW = (1 << 0), + VMD_FEAT_HAS_MEMBAR_SHADOW = (1 << 0), /* * Device may provide root port configuration information which limits * bus numbering */ - VMD_FEAT_HAS_BUS_RESTRICTIONS = (1 << 1), + VMD_FEAT_HAS_BUS_RESTRICTIONS = (1 << 1), + + /* + * Device contains physical location shadow registers in + * vendor-specific capability space + */ + VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP = (1 << 2), }; /* @@ -98,11 +104,6 @@ struct irq_domain *irq_domain; struct pci_bus *bus; u8 busn_start; - -#ifdef CONFIG_X86_DEV_DMA_OPS - struct dma_map_ops dma_ops; - struct dma_domain dma_domain; -#endif }; static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) @@ -297,173 +298,32 @@ .chip = &vmd_msi_controller, }; -#ifdef CONFIG_X86_DEV_DMA_OPS -/* - * VMD replaces the requester ID with its own. DMA mappings for devices in a - * VMD domain need to be mapped for the VMD, not the device requiring - * the mapping. - */ -static struct device *to_vmd_dev(struct device *dev) +static int vmd_create_irq_domain(struct vmd_dev *vmd) { - struct pci_dev *pdev = to_pci_dev(dev); - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); + struct fwnode_handle *fn; - return &vmd->dev->dev; + fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain); + if (!fn) + return -ENODEV; + + vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info, NULL); + if (!vmd->irq_domain) { + irq_domain_free_fwnode(fn); + return -ENODEV; + } + + return 0; } -static const struct dma_map_ops *vmd_dma_ops(struct device *dev) +static void vmd_remove_irq_domain(struct vmd_dev *vmd) { - return get_dma_ops(to_vmd_dev(dev)); + if (vmd->irq_domain) { + struct fwnode_handle *fn = vmd->irq_domain->fwnode; + + irq_domain_remove(vmd->irq_domain); + irq_domain_free_fwnode(fn); + } } - -static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, - gfp_t flag, unsigned long attrs) -{ - return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, - attrs); -} - -static void vmd_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t addr, unsigned long attrs) -{ - return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, - attrs); -} - -static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t addr, size_t size, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, - size, attrs); -} - -static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t addr, size_t size, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, - addr, size, attrs); -} - -static dma_addr_t vmd_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, - dir, attrs); -} - -static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); -} - -static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); -} - -static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); -} - -static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); -} - -static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, - dir); -} - -static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); -} - -static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); -} - -static int vmd_mapping_error(struct device *dev, dma_addr_t addr) -{ - return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); -} - -static int vmd_dma_supported(struct device *dev, u64 mask) -{ - return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); -} - -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK -static u64 vmd_get_required_mask(struct device *dev) -{ - return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); -} -#endif - -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) -{ - struct dma_domain *domain = &vmd->dma_domain; - - if (get_dma_ops(&vmd->dev->dev)) - del_dma_domain(domain); -} - -#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ - do { \ - if (source->fn) \ - dest->fn = vmd_##fn; \ - } while (0) - -static void vmd_setup_dma_ops(struct vmd_dev *vmd) -{ - const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); - struct dma_map_ops *dest = &vmd->dma_ops; - struct dma_domain *domain = &vmd->dma_domain; - - domain->domain_nr = vmd->sysdata.domain; - domain->dma_ops = dest; - - if (!source) - return; - ASSIGN_VMD_DMA_OPS(source, dest, alloc); - ASSIGN_VMD_DMA_OPS(source, dest, free); - ASSIGN_VMD_DMA_OPS(source, dest, mmap); - ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); - ASSIGN_VMD_DMA_OPS(source, dest, map_page); - ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); - ASSIGN_VMD_DMA_OPS(source, dest, map_sg); - ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); - ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK - ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); -#endif - add_dma_domain(domain); -} -#undef ASSIGN_VMD_DMA_OPS -#else -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} -static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} -#endif static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, unsigned int devfn, int reg, int len) @@ -584,16 +444,149 @@ return domain + 1; } +static int vmd_get_phys_offsets(struct vmd_dev *vmd, bool native_hint, + resource_size_t *offset1, + resource_size_t *offset2) +{ + struct pci_dev *dev = vmd->dev; + u64 phys1, phys2; + + if (native_hint) { + u32 vmlock; + int ret; + + ret = pci_read_config_dword(dev, PCI_REG_VMLOCK, &vmlock); + if (ret || vmlock == ~0) + return -ENODEV; + + if (MB2_SHADOW_EN(vmlock)) { + void __iomem *membar2; + + membar2 = pci_iomap(dev, VMD_MEMBAR2, 0); + if (!membar2) + return -ENOMEM; + phys1 = readq(membar2 + MB2_SHADOW_OFFSET); + phys2 = readq(membar2 + MB2_SHADOW_OFFSET + 8); + pci_iounmap(dev, membar2); + } else + return 0; + } else { + /* Hypervisor-Emulated Vendor-Specific Capability */ + int pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); + u32 reg, regu; + + pci_read_config_dword(dev, pos + 4, ®); + + /* "SHDW" */ + if (pos && reg == 0x53484457) { + pci_read_config_dword(dev, pos + 8, ®); + pci_read_config_dword(dev, pos + 12, ®u); + phys1 = (u64) regu << 32 | reg; + + pci_read_config_dword(dev, pos + 16, ®); + pci_read_config_dword(dev, pos + 20, ®u); + phys2 = (u64) regu << 32 | reg; + } else + return 0; + } + + *offset1 = dev->resource[VMD_MEMBAR1].start - + (phys1 & PCI_BASE_ADDRESS_MEM_MASK); + *offset2 = dev->resource[VMD_MEMBAR2].start - + (phys2 & PCI_BASE_ADDRESS_MEM_MASK); + + return 0; +} + +static int vmd_get_bus_number_start(struct vmd_dev *vmd) +{ + struct pci_dev *dev = vmd->dev; + u16 reg; + + pci_read_config_word(dev, PCI_REG_VMCAP, ®); + if (BUS_RESTRICT_CAP(reg)) { + pci_read_config_word(dev, PCI_REG_VMCONFIG, ®); + + switch (BUS_RESTRICT_CFG(reg)) { + case 0: + vmd->busn_start = 0; + break; + case 1: + vmd->busn_start = 128; + break; + case 2: + vmd->busn_start = 224; + break; + default: + pci_err(dev, "Unknown Bus Offset Setting (%d)\n", + BUS_RESTRICT_CFG(reg)); + return -ENODEV; + } + } + + return 0; +} + +static irqreturn_t vmd_irq(int irq, void *data) +{ + struct vmd_irq_list *irqs = data; + struct vmd_irq *vmdirq; + int idx; + + idx = srcu_read_lock(&irqs->srcu); + list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) + generic_handle_irq(vmdirq->virq); + srcu_read_unlock(&irqs->srcu, idx); + + return IRQ_HANDLED; +} + +static int vmd_alloc_irqs(struct vmd_dev *vmd) +{ + struct pci_dev *dev = vmd->dev; + int i, err; + + vmd->msix_count = pci_msix_vec_count(dev); + if (vmd->msix_count < 0) + return -ENODEV; + + vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count, + PCI_IRQ_MSIX); + if (vmd->msix_count < 0) + return vmd->msix_count; + + vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), + GFP_KERNEL); + if (!vmd->irqs) + return -ENOMEM; + + for (i = 0; i < vmd->msix_count; i++) { + err = init_srcu_struct(&vmd->irqs[i].srcu); + if (err) + return err; + + INIT_LIST_HEAD(&vmd->irqs[i].irq_list); + err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), + vmd_irq, IRQF_NO_THREAD, + "vmd", &vmd->irqs[i]); + if (err) + return err; + } + + return 0; +} + static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) { struct pci_sysdata *sd = &vmd->sysdata; - struct fwnode_handle *fn; struct resource *res; u32 upper_bits; unsigned long flags; LIST_HEAD(resources); resource_size_t offset[2] = {0}; resource_size_t membar2_offset = 0x2000; + struct pci_bus *child; + int ret; /* * Shadow registers may exist in certain VMD device ids which allow @@ -602,42 +595,24 @@ * or 0, depending on an enable bit in the VMD device. */ if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { - u32 vmlock; - int ret; - membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE; - ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock); - if (ret || vmlock == ~0) - return -ENODEV; - - if (MB2_SHADOW_EN(vmlock)) { - void __iomem *membar2; - - membar2 = pci_iomap(vmd->dev, VMD_MEMBAR2, 0); - if (!membar2) - return -ENOMEM; - offset[0] = vmd->dev->resource[VMD_MEMBAR1].start - - (readq(membar2 + MB2_SHADOW_OFFSET) & - PCI_BASE_ADDRESS_MEM_MASK); - offset[1] = vmd->dev->resource[VMD_MEMBAR2].start - - (readq(membar2 + MB2_SHADOW_OFFSET + 8) & - PCI_BASE_ADDRESS_MEM_MASK); - pci_iounmap(vmd->dev, membar2); - } + ret = vmd_get_phys_offsets(vmd, true, &offset[0], &offset[1]); + if (ret) + return ret; + } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) { + ret = vmd_get_phys_offsets(vmd, false, &offset[0], &offset[1]); + if (ret) + return ret; } /* * Certain VMD devices may have a root port configuration option which - * limits the bus range to between 0-127 or 128-255 + * limits the bus range to between 0-127, 128-255, or 224-255 */ if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) { - u32 vmcap, vmconfig; - - pci_read_config_dword(vmd->dev, PCI_REG_VMCAP, &vmcap); - pci_read_config_dword(vmd->dev, PCI_REG_VMCONFIG, &vmconfig); - if (BUS_RESTRICT_CAP(vmcap) && - (BUS_RESTRICT_CFG(vmconfig) == 0x1)) - vmd->busn_start = 128; + ret = vmd_get_bus_number_start(vmd); + if (ret) + return ret; } res = &vmd->dev->resource[VMD_CFGBAR]; @@ -658,7 +633,7 @@ * 32-bit resources. __pci_assign_resource() enforces that * artificial restriction to make sure everything will fit. * - * The only way we could use a 64-bit non-prefechable MEMBAR is + * The only way we could use a 64-bit non-prefetchable MEMBAR is * if its address is <4GB so that we can convert it to a 32-bit * resource. To be visible to the host OS, all VMD endpoints must * be initially configured by platform BIOS, which includes setting @@ -691,65 +666,61 @@ .parent = res, }; - sd->vmd_domain = true; + sd->vmd_dev = vmd->dev; sd->domain = vmd_find_free_domain(); if (sd->domain < 0) return sd->domain; sd->node = pcibus_to_node(vmd->dev->bus); - fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain); - if (!fn) - return -ENODEV; + ret = vmd_create_irq_domain(vmd); + if (ret) + return ret; - vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info, - x86_vector_domain); - if (!vmd->irq_domain) { - irq_domain_free_fwnode(fn); - return -ENODEV; - } + /* + * Override the irq domain bus token so the domain can be distinguished + * from a regular PCI/MSI domain. + */ + irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI); pci_add_resource(&resources, &vmd->resources[0]); pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]); pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]); vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start, - &vmd_ops, sd, &resources); + &vmd_ops, sd, &resources); if (!vmd->bus) { pci_free_resource_list(&resources); - irq_domain_remove(vmd->irq_domain); - irq_domain_free_fwnode(fn); + vmd_remove_irq_domain(vmd); return -ENODEV; } vmd_attach_resources(vmd); - vmd_setup_dma_ops(vmd); - dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); - pci_rescan_bus(vmd->bus); + if (vmd->irq_domain) + dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); + + pci_scan_child_bus(vmd->bus); + pci_assign_unassigned_bus_resources(vmd->bus); + + /* + * VMD root buses are virtual and don't return true on pci_is_pcie() + * and will fail pcie_bus_configure_settings() early. It can instead be + * run on each of the real root ports. + */ + list_for_each_entry(child, &vmd->bus->children, node) + pcie_bus_configure_settings(child); + + pci_bus_add_devices(vmd->bus); WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, "domain"), "Can't create symlink to domain\n"); return 0; } -static irqreturn_t vmd_irq(int irq, void *data) -{ - struct vmd_irq_list *irqs = data; - struct vmd_irq *vmdirq; - int idx; - - idx = srcu_read_lock(&irqs->srcu); - list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) - generic_handle_irq(vmdirq->virq); - srcu_read_unlock(&irqs->srcu, idx); - - return IRQ_HANDLED; -} - static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct vmd_dev *vmd; - int i, err; + int err; if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) return -ENOMEM; @@ -772,32 +743,9 @@ dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) return -ENODEV; - vmd->msix_count = pci_msix_vec_count(dev); - if (vmd->msix_count < 0) - return -ENODEV; - - vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count, - PCI_IRQ_MSIX); - if (vmd->msix_count < 0) - return vmd->msix_count; - - vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), - GFP_KERNEL); - if (!vmd->irqs) - return -ENOMEM; - - for (i = 0; i < vmd->msix_count; i++) { - err = init_srcu_struct(&vmd->irqs[i].srcu); - if (err) - return err; - - INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), - vmd_irq, IRQF_NO_THREAD, - "vmd", &vmd->irqs[i]); - if (err) - return err; - } + err = vmd_alloc_irqs(vmd); + if (err) + return err; spin_lock_init(&vmd->cfg_lock); pci_set_drvdata(dev, vmd); @@ -821,16 +769,13 @@ static void vmd_remove(struct pci_dev *dev) { struct vmd_dev *vmd = pci_get_drvdata(dev); - struct fwnode_handle *fn = vmd->irq_domain->fwnode; sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); pci_stop_root_bus(vmd->bus); pci_remove_root_bus(vmd->bus); vmd_cleanup_srcu(vmd); - vmd_teardown_dma_ops(vmd); vmd_detach_resources(vmd); - irq_domain_remove(vmd->irq_domain); - irq_domain_free_fwnode(fn); + vmd_remove_irq_domain(vmd); } #ifdef CONFIG_PM_SLEEP @@ -841,9 +786,8 @@ int i; for (i = 0; i < vmd->msix_count; i++) - devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]); + devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]); - pci_save_state(pdev); return 0; } @@ -861,19 +805,26 @@ return err; } - pci_restore_state(pdev); return 0; } #endif static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); static const struct pci_device_id vmd_ids[] = { - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_201D),}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_201D), + .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP,}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0), .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW | VMD_FEAT_HAS_BUS_RESTRICTIONS,}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x467f), + .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | + VMD_FEAT_HAS_BUS_RESTRICTIONS,}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4c3d), + .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | + VMD_FEAT_HAS_BUS_RESTRICTIONS,}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B), - .driver_data = VMD_FEAT_HAS_BUS_RESTRICTIONS,}, + .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | + VMD_FEAT_HAS_BUS_RESTRICTIONS,}, {0,} }; MODULE_DEVICE_TABLE(pci, vmd_ids); -- Gitblit v1.6.2