From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/drivers/gpu/drm/i915/gvt/kvmgt.c | 599 +++++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 417 insertions(+), 182 deletions(-) diff --git a/kernel/drivers/gpu/drm/i915/gvt/kvmgt.c b/kernel/drivers/gpu/drm/i915/gvt/kvmgt.c index 66abe06..778eb8c 100644 --- a/kernel/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/kernel/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -31,7 +31,7 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/mm.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/sched/mm.h> #include <linux/types.h> #include <linux/list.h> @@ -57,6 +57,8 @@ #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define EDID_BLOB_OFFSET (PAGE_SIZE/2) + #define OPREGION_SIGNATURE "IntelGraphicsMem" struct vfio_region; @@ -74,6 +76,11 @@ u32 flags; const struct intel_vgpu_regops *ops; void *data; +}; + +struct vfio_edid_region { + struct vfio_region_gfx_edid vfio_edid_regs; + void *edid_blob; }; struct kvmgt_pgfn { @@ -101,6 +108,37 @@ struct kref ref; }; +struct kvmgt_vdev { + struct intel_vgpu *vgpu; + struct mdev_device *mdev; + struct vfio_region *region; + int num_regions; + struct eventfd_ctx *intx_trigger; + struct eventfd_ctx *msi_trigger; + + /* + * Two caches are used to avoid mapping duplicated pages (eg. + * scratch pages). This help to reduce dma setup overhead. + */ + struct rb_root gfn_cache; + struct rb_root dma_addr_cache; + unsigned long nr_cache_entries; + struct mutex cache_lock; + + struct notifier_block iommu_notifier; + struct notifier_block group_notifier; + struct kvm *kvm; + struct work_struct release_work; + atomic_t released; + struct vfio_device *vfio_device; + struct vfio_group *vfio_group; +}; + +static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu) +{ + return intel_vgpu_vdev(vgpu); +} + static inline bool handle_valid(unsigned long handle) { return !!(handle & ~0xff); @@ -113,6 +151,8 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { + struct drm_i915_private *i915 = vgpu->gvt->gt->i915; + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); int total_pages; int npage; int ret; @@ -122,8 +162,8 @@ for (npage = 0; npage < total_pages; npage++) { unsigned long cur_gfn = gfn + npage; - ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1); - WARN_ON(ret != 1); + ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1); + drm_WARN_ON(&i915->drm, ret != 1); } } @@ -131,6 +171,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long base_pfn = 0; int total_pages; int npage; @@ -145,8 +186,8 @@ unsigned long cur_gfn = gfn + npage; unsigned long pfn; - ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); + ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); if (ret != 1) { gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", cur_gfn, ret); @@ -180,7 +221,7 @@ static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, dma_addr_t *dma_addr, unsigned long size) { - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; + struct device *dev = &vgpu->gvt->gt->i915->drm.pdev->dev; struct page *page = NULL; int ret; @@ -203,7 +244,7 @@ static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, dma_addr_t dma_addr, unsigned long size) { - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; + struct device *dev = &vgpu->gvt->gt->i915->drm.pdev->dev; dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); gvt_unpin_guest_page(vgpu, gfn, size); @@ -212,7 +253,7 @@ static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, dma_addr_t dma_addr) { - struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node; + struct rb_node *node = kvmgt_vdev(vgpu)->dma_addr_cache.rb_node; struct gvt_dma *itr; while (node) { @@ -230,7 +271,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) { - struct rb_node *node = vgpu->vdev.gfn_cache.rb_node; + struct rb_node *node = kvmgt_vdev(vgpu)->gfn_cache.rb_node; struct gvt_dma *itr; while (node) { @@ -251,6 +292,7 @@ { struct gvt_dma *new, *itr; struct rb_node **link, *parent = NULL; + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); if (!new) @@ -263,7 +305,7 @@ kref_init(&new->ref); /* gfn_cache maps gfn to struct gvt_dma. */ - link = &vgpu->vdev.gfn_cache.rb_node; + link = &vdev->gfn_cache.rb_node; while (*link) { parent = *link; itr = rb_entry(parent, struct gvt_dma, gfn_node); @@ -274,11 +316,11 @@ link = &parent->rb_right; } rb_link_node(&new->gfn_node, parent, link); - rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache); + rb_insert_color(&new->gfn_node, &vdev->gfn_cache); /* dma_addr_cache maps dma addr to struct gvt_dma. */ parent = NULL; - link = &vgpu->vdev.dma_addr_cache.rb_node; + link = &vdev->dma_addr_cache.rb_node; while (*link) { parent = *link; itr = rb_entry(parent, struct gvt_dma, dma_addr_node); @@ -289,46 +331,51 @@ link = &parent->rb_right; } rb_link_node(&new->dma_addr_node, parent, link); - rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache); + rb_insert_color(&new->dma_addr_node, &vdev->dma_addr_cache); - vgpu->vdev.nr_cache_entries++; + vdev->nr_cache_entries++; return 0; } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, struct gvt_dma *entry) { - rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache); - rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache); + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + + rb_erase(&entry->gfn_node, &vdev->gfn_cache); + rb_erase(&entry->dma_addr_node, &vdev->dma_addr_cache); kfree(entry); - vgpu->vdev.nr_cache_entries--; + vdev->nr_cache_entries--; } static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); for (;;) { - mutex_lock(&vgpu->vdev.cache_lock); - node = rb_first(&vgpu->vdev.gfn_cache); + mutex_lock(&vdev->cache_lock); + node = rb_first(&vdev->gfn_cache); if (!node) { - mutex_unlock(&vgpu->vdev.cache_lock); + mutex_unlock(&vdev->cache_lock); break; } dma = rb_entry(node, struct gvt_dma, gfn_node); gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); __gvt_cache_remove_entry(vgpu, dma); - mutex_unlock(&vgpu->vdev.cache_lock); + mutex_unlock(&vdev->cache_lock); } } static void gvt_cache_init(struct intel_vgpu *vgpu) { - vgpu->vdev.gfn_cache = RB_ROOT; - vgpu->vdev.dma_addr_cache = RB_ROOT; - vgpu->vdev.nr_cache_entries = 0; - mutex_init(&vgpu->vdev.cache_lock); + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + + vdev->gfn_cache = RB_ROOT; + vdev->dma_addr_cache = RB_ROOT; + vdev->nr_cache_entries = 0; + mutex_init(&vdev->cache_lock); } static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) @@ -402,16 +449,18 @@ static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, size_t count, loff_t *ppos, bool iswrite) { + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - void *base = vgpu->vdev.region[i].data; + void *base = vdev->region[i].data; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - if (pos >= vgpu->vdev.region[i].size || iswrite) { + + if (pos >= vdev->region[i].size || iswrite) { gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); return -EINVAL; } - count = min(count, (size_t)(vgpu->vdev.region[i].size - pos)); + count = min(count, (size_t)(vdev->region[i].size - pos)); memcpy(buf, base + pos, count); return count; @@ -427,37 +476,144 @@ .release = intel_vgpu_reg_release_opregion, }; +static int handle_edid_regs(struct intel_vgpu *vgpu, + struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs; + unsigned int data; + + if (offset + count > sizeof(*regs)) + return -EINVAL; + + if (count != 4) + return -EINVAL; + + if (is_write) { + data = *((unsigned int *)buf); + switch (offset) { + case offsetof(struct vfio_region_gfx_edid, link_state): + if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { + if (!drm_edid_block_valid( + (u8 *)region->edid_blob, + 0, + true, + NULL)) { + gvt_vgpu_err("invalid EDID blob\n"); + return -EINVAL; + } + intel_gvt_ops->emulate_hotplug(vgpu, true); + } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) + intel_gvt_ops->emulate_hotplug(vgpu, false); + else { + gvt_vgpu_err("invalid EDID link state %d\n", + regs->link_state); + return -EINVAL; + } + regs->link_state = data; + break; + case offsetof(struct vfio_region_gfx_edid, edid_size): + if (data > regs->edid_max_size) { + gvt_vgpu_err("EDID size is bigger than %d!\n", + regs->edid_max_size); + return -EINVAL; + } + regs->edid_size = data; + break; + default: + /* read-only regs */ + gvt_vgpu_err("write read-only EDID region at offset %d\n", + offset); + return -EPERM; + } + } else { + memcpy(buf, (char *)regs + offset, count); + } + + return count; +} + +static int handle_edid_blob(struct vfio_edid_region *region, char *buf, + size_t count, u16 offset, bool is_write) +{ + if (offset + count > region->vfio_edid_regs.edid_size) + return -EINVAL; + + if (is_write) + memcpy(region->edid_blob + offset, buf, count); + else + memcpy(buf, region->edid_blob + offset, count); + + return count; +} + +static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - + VFIO_PCI_NUM_REGIONS; + struct vfio_edid_region *region = + (struct vfio_edid_region *)kvmgt_vdev(vgpu)->region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (pos < region->vfio_edid_regs.edid_offset) { + ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite); + } else { + pos -= EDID_BLOB_OFFSET; + ret = handle_edid_blob(region, buf, count, pos, iswrite); + } + + if (ret < 0) + gvt_vgpu_err("failed to access EDID region\n"); + + return ret; +} + +static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu, + struct vfio_region *region) +{ + kfree(region->data); +} + +static const struct intel_vgpu_regops intel_vgpu_regops_edid = { + .rw = intel_vgpu_reg_rw_edid, + .release = intel_vgpu_reg_release_edid, +}; + static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, unsigned int type, unsigned int subtype, const struct intel_vgpu_regops *ops, size_t size, u32 flags, void *data) { + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); struct vfio_region *region; - region = krealloc(vgpu->vdev.region, - (vgpu->vdev.num_regions + 1) * sizeof(*region), + region = krealloc(vdev->region, + (vdev->num_regions + 1) * sizeof(*region), GFP_KERNEL); if (!region) return -ENOMEM; - vgpu->vdev.region = region; - vgpu->vdev.region[vgpu->vdev.num_regions].type = type; - vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype; - vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops; - vgpu->vdev.region[vgpu->vdev.num_regions].size = size; - vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags; - vgpu->vdev.region[vgpu->vdev.num_regions].data = data; - vgpu->vdev.num_regions++; + vdev->region = region; + vdev->region[vdev->num_regions].type = type; + vdev->region[vdev->num_regions].subtype = subtype; + vdev->region[vdev->num_regions].ops = ops; + vdev->region[vdev->num_regions].size = size; + vdev->region[vdev->num_regions].flags = flags; + vdev->region[vdev->num_regions].data = data; + vdev->num_regions++; return 0; } static int kvmgt_get_vfio_device(void *p_vgpu) { struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - vgpu->vdev.vfio_device = vfio_device_get_from_dev( - mdev_dev(vgpu->vdev.mdev)); - if (!vgpu->vdev.vfio_device) { + vdev->vfio_device = vfio_device_get_from_dev( + mdev_dev(vdev->mdev)); + if (!vdev->vfio_device) { gvt_vgpu_err("failed to get vfio device\n"); return -ENODEV; } @@ -493,12 +649,44 @@ return ret; } +static int kvmgt_set_edid(void *p_vgpu, int port_num) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); + struct vfio_edid_region *base; + int ret; + + base = kzalloc(sizeof(*base), GFP_KERNEL); + if (!base) + return -ENOMEM; + + /* TODO: Add multi-port and EDID extension block support */ + base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET; + base->vfio_edid_regs.edid_max_size = EDID_SIZE; + base->vfio_edid_regs.edid_size = EDID_SIZE; + base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id); + base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id); + base->edid_blob = port->edid->edid_block; + + ret = intel_vgpu_register_reg(vgpu, + VFIO_REGION_TYPE_GFX, + VFIO_REGION_SUBTYPE_GFX_EDID, + &intel_vgpu_regops_edid, EDID_SIZE, + VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_CAPS, base); + + return ret; +} + static void kvmgt_put_vfio_device(void *vgpu) { - if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) + struct kvmgt_vdev *vdev = kvmgt_vdev((struct intel_vgpu *)vgpu); + + if (WARN_ON(!vdev->vfio_device)) return; - vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device); + vfio_device_put(vdev->vfio_device); } static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) @@ -527,9 +715,9 @@ goto out; } - INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work); + INIT_WORK(&kvmgt_vdev(vgpu)->release_work, intel_vgpu_release_work); - vgpu->vdev.mdev = mdev; + kvmgt_vdev(vgpu)->mdev = mdev; mdev_set_drvdata(mdev, vgpu); gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", @@ -554,9 +742,10 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct intel_vgpu *vgpu = container_of(nb, - struct intel_vgpu, - vdev.iommu_notifier); + struct kvmgt_vdev *vdev = container_of(nb, + struct kvmgt_vdev, + iommu_notifier); + struct intel_vgpu *vgpu = vdev->vgpu; if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { struct vfio_iommu_type1_dma_unmap *unmap = data; @@ -566,7 +755,7 @@ iov_pfn = unmap->iova >> PAGE_SHIFT; end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; - mutex_lock(&vgpu->vdev.cache_lock); + mutex_lock(&vdev->cache_lock); for (; iov_pfn < end_iov_pfn; iov_pfn++) { entry = __gvt_cache_find_gfn(vgpu, iov_pfn); if (!entry) @@ -576,7 +765,7 @@ entry->size); __gvt_cache_remove_entry(vgpu, entry); } - mutex_unlock(&vgpu->vdev.cache_lock); + mutex_unlock(&vdev->cache_lock); } return NOTIFY_OK; @@ -585,16 +774,16 @@ static int intel_vgpu_group_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct intel_vgpu *vgpu = container_of(nb, - struct intel_vgpu, - vdev.group_notifier); + struct kvmgt_vdev *vdev = container_of(nb, + struct kvmgt_vdev, + group_notifier); /* the only action we care about */ if (action == VFIO_GROUP_NOTIFY_SET_KVM) { - vgpu->vdev.kvm = data; + vdev->kvm = data; if (!data) - schedule_work(&vgpu->vdev.release_work); + schedule_work(&vdev->release_work); } return NOTIFY_OK; @@ -603,15 +792,17 @@ static int intel_vgpu_open(struct mdev_device *mdev) { struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long events; int ret; + struct vfio_group *vfio_group; - vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier; + vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; + vdev->group_notifier.notifier_call = intel_vgpu_group_notifier; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, - &vgpu->vdev.iommu_notifier); + &vdev->iommu_notifier); if (ret != 0) { gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", ret); @@ -620,11 +811,27 @@ events = VFIO_GROUP_NOTIFY_SET_KVM; ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, - &vgpu->vdev.group_notifier); + &vdev->group_notifier); if (ret != 0) { gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", ret); goto undo_iommu; + } + + vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev)); + if (IS_ERR_OR_NULL(vfio_group)) { + ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group); + gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n"); + goto undo_register; + } + vdev->vfio_group = vfio_group; + + /* Take a module reference as mdev core doesn't take + * a reference for vendor driver. + */ + if (!try_module_get(THIS_MODULE)) { + ret = -ENODEV; + goto undo_group; } ret = kvmgt_guest_init(mdev); @@ -633,58 +840,71 @@ intel_gvt_ops->vgpu_activate(vgpu); - atomic_set(&vgpu->vdev.released, 0); + atomic_set(&vdev->released, 0); return ret; undo_group: + vfio_group_put_external_user(vdev->vfio_group); + vdev->vfio_group = NULL; + +undo_register: vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, - &vgpu->vdev.group_notifier); + &vdev->group_notifier); undo_iommu: vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, - &vgpu->vdev.iommu_notifier); + &vdev->iommu_notifier); out: return ret; } static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) { + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); struct eventfd_ctx *trigger; - trigger = vgpu->vdev.msi_trigger; + trigger = vdev->msi_trigger; if (trigger) { eventfd_ctx_put(trigger); - vgpu->vdev.msi_trigger = NULL; + vdev->msi_trigger = NULL; } } static void __intel_vgpu_release(struct intel_vgpu *vgpu) { + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + struct drm_i915_private *i915 = vgpu->gvt->gt->i915; struct kvmgt_guest_info *info; int ret; if (!handle_valid(vgpu->handle)) return; - if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) + if (atomic_cmpxchg(&vdev->released, 0, 1)) return; intel_gvt_ops->vgpu_release(vgpu); - ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, - &vgpu->vdev.iommu_notifier); - WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); + ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_IOMMU_NOTIFY, + &vdev->iommu_notifier); + drm_WARN(&i915->drm, ret, + "vfio_unregister_notifier for iommu failed: %d\n", ret); - ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY, - &vgpu->vdev.group_notifier); - WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret); + ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_GROUP_NOTIFY, + &vdev->group_notifier); + drm_WARN(&i915->drm, ret, + "vfio_unregister_notifier for group failed: %d\n", ret); + + /* dereference module reference taken at open */ + module_put(THIS_MODULE); info = (struct kvmgt_guest_info *)vgpu->handle; kvmgt_guest_exit(info); intel_vgpu_release_msi_eventfd_ctx(vgpu); + vfio_group_put_external_user(vdev->vfio_group); - vgpu->vdev.kvm = NULL; + vdev->kvm = NULL; vgpu->handle = 0; } @@ -697,13 +917,13 @@ static void intel_vgpu_release_work(struct work_struct *work) { - struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu, - vdev.release_work); + struct kvmgt_vdev *vdev = container_of(work, struct kvmgt_vdev, + release_work); - __intel_vgpu_release(vgpu); + __intel_vgpu_release(vdev->vgpu); } -static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) +static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) { u32 start_lo, start_hi; u32 mem_type; @@ -730,10 +950,10 @@ return ((u64)start_hi << 32) | start_lo; } -static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off, +static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, void *buf, unsigned int count, bool is_write) { - uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar); + u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar); int ret; if (is_write) @@ -745,16 +965,16 @@ return ret; } -static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, uint64_t off) +static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off) { return off >= vgpu_aperture_offset(vgpu) && off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); } -static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, uint64_t off, +static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, void *buf, unsigned long count, bool is_write) { - void *aperture_va; + void __iomem *aperture_va; if (!intel_vgpu_in_aperture(vgpu, off) || !intel_vgpu_in_aperture(vgpu, off + count)) { @@ -762,16 +982,16 @@ return -EINVAL; } - aperture_va = io_mapping_map_wc(&vgpu->gvt->dev_priv->ggtt.iomap, + aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap, ALIGN_DOWN(off, PAGE_SIZE), count + offset_in_page(off)); if (!aperture_va) return -EIO; if (is_write) - memcpy(aperture_va + offset_in_page(off), buf, count); + memcpy_toio(aperture_va + offset_in_page(off), buf, count); else - memcpy(buf, aperture_va + offset_in_page(off), count); + memcpy_fromio(buf, aperture_va + offset_in_page(off), count); io_mapping_unmap(aperture_va); @@ -782,12 +1002,13 @@ size_t count, loff_t *ppos, bool is_write) { struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; int ret = -EINVAL; - if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) { + if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) { gvt_vgpu_err("invalid index: %u\n", index); return -EINVAL; } @@ -816,11 +1037,11 @@ case VFIO_PCI_ROM_REGION_INDEX: break; default: - if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) + if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) return -EINVAL; index -= VFIO_PCI_NUM_REGIONS; - return vgpu->vdev.region[index].ops->rw(vgpu, buf, count, + return vdev->region[index].ops->rw(vgpu, buf, count, ppos, is_write); } @@ -1039,7 +1260,7 @@ static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, - unsigned int count, uint32_t flags, + unsigned int count, u32 flags, void *data) { return 0; @@ -1047,21 +1268,21 @@ static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, - unsigned int count, uint32_t flags, void *data) + unsigned int count, u32 flags, void *data) { return 0; } static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, unsigned int count, - uint32_t flags, void *data) + u32 flags, void *data) { return 0; } static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, unsigned int index, unsigned int start, unsigned int count, - uint32_t flags, void *data) + u32 flags, void *data) { struct eventfd_ctx *trigger; @@ -1073,19 +1294,19 @@ gvt_vgpu_err("eventfd_ctx_fdget failed\n"); return PTR_ERR(trigger); } - vgpu->vdev.msi_trigger = trigger; + kvmgt_vdev(vgpu)->msi_trigger = trigger; } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count) intel_vgpu_release_msi_eventfd_ctx(vgpu); return 0; } -static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags, +static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, unsigned int index, unsigned int start, unsigned int count, void *data) { int (*func)(struct intel_vgpu *vgpu, unsigned int index, - unsigned int start, unsigned int count, uint32_t flags, + unsigned int start, unsigned int count, u32 flags, void *data) = NULL; switch (index) { @@ -1125,6 +1346,7 @@ unsigned long arg) { struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long minsz; gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd); @@ -1143,7 +1365,7 @@ info.flags = VFIO_DEVICE_FLAGS_PCI; info.flags |= VFIO_DEVICE_FLAGS_RESET; info.num_regions = VFIO_PCI_NUM_REGIONS + - vgpu->vdev.num_regions; + vdev->num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; return copy_to_user((void __user *)arg, &info, minsz) ? @@ -1155,7 +1377,6 @@ unsigned int i; int ret; struct vfio_region_info_cap_sparse_mmap *sparse = NULL; - size_t size; int nr_areas = 1; int cap_type_id; @@ -1198,9 +1419,8 @@ VFIO_REGION_INFO_FLAG_WRITE; info.size = gvt_aperture_sz(vgpu->gvt); - size = sizeof(*sparse) + - (nr_areas * sizeof(*sparse->areas)); - sparse = kzalloc(size, GFP_KERNEL); + sparse = kzalloc(struct_size(sparse, areas, nr_areas), + GFP_KERNEL); if (!sparse) return -ENOMEM; @@ -1236,22 +1456,22 @@ .header.version = 1 }; if (info.index >= VFIO_PCI_NUM_REGIONS + - vgpu->vdev.num_regions) + vdev->num_regions) return -EINVAL; info.index = array_index_nospec(info.index, VFIO_PCI_NUM_REGIONS + - vgpu->vdev.num_regions); + vdev->num_regions); i = info.index - VFIO_PCI_NUM_REGIONS; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = vgpu->vdev.region[i].size; - info.flags = vgpu->vdev.region[i].flags; + info.size = vdev->region[i].size; + info.flags = vdev->region[i].flags; - cap_type.type = vgpu->vdev.region[i].type; - cap_type.subtype = vgpu->vdev.region[i].subtype; + cap_type.type = vdev->region[i].type; + cap_type.subtype = vdev->region[i].subtype; ret = vfio_info_add_capability(&caps, &cap_type.header, @@ -1265,9 +1485,9 @@ switch (cap_type_id) { case VFIO_REGION_INFO_CAP_SPARSE_MMAP: ret = vfio_info_add_capability(&caps, - &sparse->header, sizeof(*sparse) + - (sparse->nr_areas * - sizeof(*sparse->areas))); + &sparse->header, + struct_size(sparse, areas, + sparse->nr_areas)); if (ret) { kfree(sparse); return ret; @@ -1415,27 +1635,10 @@ return sprintf(buf, "\n"); } -static ssize_t -hw_id_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mdev_device *mdev = mdev_from_dev(dev); - - if (mdev) { - struct intel_vgpu *vgpu = (struct intel_vgpu *) - mdev_get_drvdata(mdev); - return sprintf(buf, "%u\n", - vgpu->submission.shadow_ctx->hw_id); - } - return sprintf(buf, "\n"); -} - static DEVICE_ATTR_RO(vgpu_id); -static DEVICE_ATTR_RO(hw_id); static struct attribute *intel_vgpu_attrs[] = { &dev_attr_vgpu_id.attr, - &dev_attr_hw_id.attr, NULL }; @@ -1465,19 +1668,17 @@ static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) { - struct attribute **kvm_type_attrs; struct attribute_group **kvm_vgpu_type_groups; intel_gvt_ops = ops; - if (!intel_gvt_ops->get_gvt_attrs(&kvm_type_attrs, - &kvm_vgpu_type_groups)) + if (!intel_gvt_ops->get_gvt_attrs(&kvm_vgpu_type_groups)) return -EFAULT; intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups; return mdev_register_device(dev, &intel_vgpu_ops); } -static void kvmgt_host_exit(struct device *dev, void *gvt) +static void kvmgt_host_exit(struct device *dev) { mdev_unregister_device(dev); } @@ -1610,13 +1811,15 @@ { struct kvmgt_guest_info *info; struct intel_vgpu *vgpu; + struct kvmgt_vdev *vdev; struct kvm *kvm; vgpu = mdev_get_drvdata(mdev); if (handle_valid(vgpu->handle)) return -EEXIST; - kvm = vgpu->vdev.kvm; + vdev = kvmgt_vdev(vgpu); + kvm = vdev->kvm; if (!kvm || kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); return -ESRCH; @@ -1637,8 +1840,6 @@ kvmgt_protect_table_init(info); gvt_cache_init(vgpu); - init_completion(&vgpu->vblank_done); - info->track_node.track_write = kvmgt_page_track_write; info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_page_track_register_notifier(kvm, &info->track_node); @@ -1646,10 +1847,7 @@ info->debugfs_cache_entries = debugfs_create_ulong( "kvmgt_nr_cache_entries", 0444, vgpu->debugfs, - &vgpu->vdev.nr_cache_entries); - if (!info->debugfs_cache_entries) - gvt_vgpu_err("Cannot create kvmgt debugfs entry\n"); - + &vdev->nr_cache_entries); return 0; } @@ -1666,27 +1864,52 @@ return true; } -static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle) +static int kvmgt_attach_vgpu(void *p_vgpu, unsigned long *handle) { - /* nothing to do here */ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + + vgpu->vdev = kzalloc(sizeof(struct kvmgt_vdev), GFP_KERNEL); + + if (!vgpu->vdev) + return -ENOMEM; + + kvmgt_vdev(vgpu)->vgpu = vgpu; + return 0; } -static void kvmgt_detach_vgpu(unsigned long handle) +static void kvmgt_detach_vgpu(void *p_vgpu) { - /* nothing to do here */ + int i; + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + + if (!vdev->region) + return; + + for (i = 0; i < vdev->num_regions; i++) + if (vdev->region[i].ops->release) + vdev->region[i].ops->release(vgpu, + &vdev->region[i]); + vdev->num_regions = 0; + kfree(vdev->region); + vdev->region = NULL; + + kfree(vdev); } static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) { struct kvmgt_guest_info *info; struct intel_vgpu *vgpu; + struct kvmgt_vdev *vdev; if (!handle_valid(handle)) return -ESRCH; info = (struct kvmgt_guest_info *)handle; vgpu = info->vgpu; + vdev = kvmgt_vdev(vgpu); /* * When guest is poweroff, msi_trigger is set to NULL, but vgpu's @@ -1697,10 +1920,10 @@ * enabled by guest. so if msi_trigger is null, success is still * returned and don't inject interrupt into guest. */ - if (vgpu->vdev.msi_trigger == NULL) + if (vdev->msi_trigger == NULL) return 0; - if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1) + if (eventfd_signal(vdev->msi_trigger, 1) == 1) return 0; return -EFAULT; @@ -1723,29 +1946,29 @@ return pfn; } -int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, +static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, unsigned long size, dma_addr_t *dma_addr) { - struct kvmgt_guest_info *info; struct intel_vgpu *vgpu; + struct kvmgt_vdev *vdev; struct gvt_dma *entry; int ret; if (!handle_valid(handle)) return -EINVAL; - info = (struct kvmgt_guest_info *)handle; - vgpu = info->vgpu; + vgpu = ((struct kvmgt_guest_info *)handle)->vgpu; + vdev = kvmgt_vdev(vgpu); - mutex_lock(&info->vgpu->vdev.cache_lock); + mutex_lock(&vdev->cache_lock); - entry = __gvt_cache_find_gfn(info->vgpu, gfn); + entry = __gvt_cache_find_gfn(vgpu, gfn); if (!entry) { ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); if (ret) goto err_unlock; - ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); + ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size); if (ret) goto err_unmap; } else if (entry->size != size) { @@ -1757,7 +1980,7 @@ if (ret) goto err_unlock; - ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); + ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size); if (ret) goto err_unmap; } else { @@ -1765,13 +1988,37 @@ *dma_addr = entry->dma_addr; } - mutex_unlock(&info->vgpu->vdev.cache_lock); + mutex_unlock(&vdev->cache_lock); return 0; err_unmap: gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); err_unlock: - mutex_unlock(&info->vgpu->vdev.cache_lock); + mutex_unlock(&vdev->cache_lock); + return ret; +} + +static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr) +{ + struct kvmgt_guest_info *info; + struct kvmgt_vdev *vdev; + struct gvt_dma *entry; + int ret = 0; + + if (!handle_valid(handle)) + return -ENODEV; + + info = (struct kvmgt_guest_info *)handle; + vdev = kvmgt_vdev(info->vgpu); + + mutex_lock(&vdev->cache_lock); + entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + if (entry) + kref_get(&entry->ref); + else + ret = -ENOMEM; + mutex_unlock(&vdev->cache_lock); + return ret; } @@ -1784,54 +2031,37 @@ __gvt_cache_remove_entry(entry->vgpu, entry); } -void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr) +static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr) { - struct kvmgt_guest_info *info; + struct intel_vgpu *vgpu; + struct kvmgt_vdev *vdev; struct gvt_dma *entry; if (!handle_valid(handle)) return; - info = (struct kvmgt_guest_info *)handle; + vgpu = ((struct kvmgt_guest_info *)handle)->vgpu; + vdev = kvmgt_vdev(vgpu); - mutex_lock(&info->vgpu->vdev.cache_lock); - entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + mutex_lock(&vdev->cache_lock); + entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); if (entry) kref_put(&entry->ref, __gvt_dma_release); - mutex_unlock(&info->vgpu->vdev.cache_lock); + mutex_unlock(&vdev->cache_lock); } static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, void *buf, unsigned long len, bool write) { struct kvmgt_guest_info *info; - struct kvm *kvm; - int idx, ret; - bool kthread = current->mm == NULL; if (!handle_valid(handle)) return -ESRCH; info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; - if (kthread) { - if (!mmget_not_zero(kvm->mm)) - return -EFAULT; - use_mm(kvm->mm); - } - - idx = srcu_read_lock(&kvm->srcu); - ret = write ? kvm_write_guest(kvm, gpa, buf, len) : - kvm_read_guest(kvm, gpa, buf, len); - srcu_read_unlock(&kvm->srcu, idx); - - if (kthread) { - unuse_mm(kvm->mm); - mmput(kvm->mm); - } - - return ret; + return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group, + gpa, buf, len, write); } static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, @@ -1871,7 +2101,8 @@ return ret; } -struct intel_gvt_mpt kvmgt_mpt = { +static struct intel_gvt_mpt kvmgt_mpt = { + .type = INTEL_GVT_HYPERVISOR_KVM, .host_init = kvmgt_host_init, .host_exit = kvmgt_host_exit, .attach_vgpu = kvmgt_attach_vgpu, @@ -1885,20 +2116,24 @@ .gfn_to_mfn = kvmgt_gfn_to_pfn, .dma_map_guest_page = kvmgt_dma_map_guest_page, .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, + .dma_pin_guest_page = kvmgt_dma_pin_guest_page, .set_opregion = kvmgt_set_opregion, + .set_edid = kvmgt_set_edid, .get_vfio_device = kvmgt_get_vfio_device, .put_vfio_device = kvmgt_put_vfio_device, .is_valid_gfn = kvmgt_is_valid_gfn, }; -EXPORT_SYMBOL_GPL(kvmgt_mpt); static int __init kvmgt_init(void) { + if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0) + return -ENODEV; return 0; } static void __exit kvmgt_exit(void) { + intel_gvt_unregister_hypervisor(); } module_init(kvmgt_init); -- Gitblit v1.6.2