| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * VFIO core |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
|---|
| 5 | 6 | * Author: Alex Williamson <alex.williamson@redhat.com> |
|---|
| 6 | | - * |
|---|
| 7 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 8 | | - * it under the terms of the GNU General Public License version 2 as |
|---|
| 9 | | - * published by the Free Software Foundation. |
|---|
| 10 | 7 | * |
|---|
| 11 | 8 | * Derived from original vfio: |
|---|
| 12 | 9 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. |
|---|
| .. | .. |
|---|
| 49 | 46 | struct mutex group_lock; |
|---|
| 50 | 47 | struct cdev group_cdev; |
|---|
| 51 | 48 | dev_t group_devt; |
|---|
| 52 | | - wait_queue_head_t release_q; |
|---|
| 53 | 49 | } vfio; |
|---|
| 54 | 50 | |
|---|
| 55 | 51 | struct vfio_iommu_driver { |
|---|
| .. | .. |
|---|
| 88 | 84 | atomic_t opened; |
|---|
| 89 | 85 | wait_queue_head_t container_q; |
|---|
| 90 | 86 | bool noiommu; |
|---|
| 87 | + unsigned int dev_counter; |
|---|
| 91 | 88 | struct kvm *kvm; |
|---|
| 92 | 89 | struct blocking_notifier_head notifier; |
|---|
| 93 | | -}; |
|---|
| 94 | | - |
|---|
| 95 | | -struct vfio_device { |
|---|
| 96 | | - struct kref kref; |
|---|
| 97 | | - struct device *dev; |
|---|
| 98 | | - const struct vfio_device_ops *ops; |
|---|
| 99 | | - struct vfio_group *group; |
|---|
| 100 | | - struct list_head group_next; |
|---|
| 101 | | - void *device_data; |
|---|
| 102 | 90 | }; |
|---|
| 103 | 91 | |
|---|
| 104 | 92 | #ifdef CONFIG_VFIO_NOIOMMU |
|---|
| .. | .. |
|---|
| 534 | 522 | /** |
|---|
| 535 | 523 | * Device objects - create, release, get, put, search |
|---|
| 536 | 524 | */ |
|---|
| 537 | | -static |
|---|
| 538 | | -struct vfio_device *vfio_group_create_device(struct vfio_group *group, |
|---|
| 539 | | - struct device *dev, |
|---|
| 540 | | - const struct vfio_device_ops *ops, |
|---|
| 541 | | - void *device_data) |
|---|
| 542 | | -{ |
|---|
| 543 | | - struct vfio_device *device; |
|---|
| 544 | | - |
|---|
| 545 | | - device = kzalloc(sizeof(*device), GFP_KERNEL); |
|---|
| 546 | | - if (!device) |
|---|
| 547 | | - return ERR_PTR(-ENOMEM); |
|---|
| 548 | | - |
|---|
| 549 | | - kref_init(&device->kref); |
|---|
| 550 | | - device->dev = dev; |
|---|
| 551 | | - device->group = group; |
|---|
| 552 | | - device->ops = ops; |
|---|
| 553 | | - device->device_data = device_data; |
|---|
| 554 | | - dev_set_drvdata(dev, device); |
|---|
| 555 | | - |
|---|
| 556 | | - /* No need to get group_lock, caller has group reference */ |
|---|
| 557 | | - vfio_group_get(group); |
|---|
| 558 | | - |
|---|
| 559 | | - mutex_lock(&group->device_lock); |
|---|
| 560 | | - list_add(&device->group_next, &group->device_list); |
|---|
| 561 | | - mutex_unlock(&group->device_lock); |
|---|
| 562 | | - |
|---|
| 563 | | - return device; |
|---|
| 564 | | -} |
|---|
| 565 | | - |
|---|
| 566 | | -static void vfio_device_release(struct kref *kref) |
|---|
| 567 | | -{ |
|---|
| 568 | | - struct vfio_device *device = container_of(kref, |
|---|
| 569 | | - struct vfio_device, kref); |
|---|
| 570 | | - struct vfio_group *group = device->group; |
|---|
| 571 | | - |
|---|
| 572 | | - list_del(&device->group_next); |
|---|
| 573 | | - mutex_unlock(&group->device_lock); |
|---|
| 574 | | - |
|---|
| 575 | | - dev_set_drvdata(device->dev, NULL); |
|---|
| 576 | | - |
|---|
| 577 | | - kfree(device); |
|---|
| 578 | | - |
|---|
| 579 | | - /* vfio_del_group_dev may be waiting for this device */ |
|---|
| 580 | | - wake_up(&vfio.release_q); |
|---|
| 581 | | -} |
|---|
| 582 | | - |
|---|
| 583 | 525 | /* Device reference always implies a group reference */ |
|---|
| 584 | 526 | void vfio_device_put(struct vfio_device *device) |
|---|
| 585 | 527 | { |
|---|
| 586 | | - struct vfio_group *group = device->group; |
|---|
| 587 | | - kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); |
|---|
| 588 | | - vfio_group_put(group); |
|---|
| 528 | + if (refcount_dec_and_test(&device->refcount)) |
|---|
| 529 | + complete(&device->comp); |
|---|
| 589 | 530 | } |
|---|
| 590 | 531 | EXPORT_SYMBOL_GPL(vfio_device_put); |
|---|
| 591 | 532 | |
|---|
| 592 | | -static void vfio_device_get(struct vfio_device *device) |
|---|
| 533 | +static bool vfio_device_try_get(struct vfio_device *device) |
|---|
| 593 | 534 | { |
|---|
| 594 | | - vfio_group_get(device->group); |
|---|
| 595 | | - kref_get(&device->kref); |
|---|
| 535 | + return refcount_inc_not_zero(&device->refcount); |
|---|
| 596 | 536 | } |
|---|
| 597 | 537 | |
|---|
| 598 | 538 | static struct vfio_device *vfio_group_get_device(struct vfio_group *group, |
|---|
| .. | .. |
|---|
| 602 | 542 | |
|---|
| 603 | 543 | mutex_lock(&group->device_lock); |
|---|
| 604 | 544 | list_for_each_entry(device, &group->device_list, group_next) { |
|---|
| 605 | | - if (device->dev == dev) { |
|---|
| 606 | | - vfio_device_get(device); |
|---|
| 545 | + if (device->dev == dev && vfio_device_try_get(device)) { |
|---|
| 607 | 546 | mutex_unlock(&group->device_lock); |
|---|
| 608 | 547 | return device; |
|---|
| 609 | 548 | } |
|---|
| .. | .. |
|---|
| 627 | 566 | * that error notification via MSI can be affected for platforms that handle |
|---|
| 628 | 567 | * MSI within the same IOVA space as DMA. |
|---|
| 629 | 568 | */ |
|---|
| 630 | | -static const char * const vfio_driver_whitelist[] = { "pci-stub" }; |
|---|
| 569 | +static const char * const vfio_driver_allowed[] = { "pci-stub" }; |
|---|
| 631 | 570 | |
|---|
| 632 | | -static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) |
|---|
| 571 | +static bool vfio_dev_driver_allowed(struct device *dev, |
|---|
| 572 | + struct device_driver *drv) |
|---|
| 633 | 573 | { |
|---|
| 634 | 574 | if (dev_is_pci(dev)) { |
|---|
| 635 | 575 | struct pci_dev *pdev = to_pci_dev(dev); |
|---|
| .. | .. |
|---|
| 638 | 578 | return true; |
|---|
| 639 | 579 | } |
|---|
| 640 | 580 | |
|---|
| 641 | | - return match_string(vfio_driver_whitelist, |
|---|
| 642 | | - ARRAY_SIZE(vfio_driver_whitelist), |
|---|
| 581 | + return match_string(vfio_driver_allowed, |
|---|
| 582 | + ARRAY_SIZE(vfio_driver_allowed), |
|---|
| 643 | 583 | drv->name) >= 0; |
|---|
| 644 | 584 | } |
|---|
| 645 | 585 | |
|---|
| .. | .. |
|---|
| 648 | 588 | * one of the following states: |
|---|
| 649 | 589 | * - driver-less |
|---|
| 650 | 590 | * - bound to a vfio driver |
|---|
| 651 | | - * - bound to a whitelisted driver |
|---|
| 591 | + * - bound to an otherwise allowed driver |
|---|
| 652 | 592 | * - a PCI interconnect device |
|---|
| 653 | 593 | * |
|---|
| 654 | 594 | * We use two methods to determine whether a device is bound to a vfio |
|---|
| .. | .. |
|---|
| 674 | 614 | } |
|---|
| 675 | 615 | mutex_unlock(&group->unbound_lock); |
|---|
| 676 | 616 | |
|---|
| 677 | | - if (!ret || !drv || vfio_dev_whitelisted(dev, drv)) |
|---|
| 617 | + if (!ret || !drv || vfio_dev_driver_allowed(dev, drv)) |
|---|
| 678 | 618 | return 0; |
|---|
| 679 | 619 | |
|---|
| 680 | 620 | device = vfio_group_get_device(group, dev); |
|---|
| .. | .. |
|---|
| 705 | 645 | return 0; |
|---|
| 706 | 646 | |
|---|
| 707 | 647 | /* TODO Prevent device auto probing */ |
|---|
| 708 | | - WARN(1, "Device %s added to live group %d!\n", dev_name(dev), |
|---|
| 709 | | - iommu_group_id(group->iommu_group)); |
|---|
| 648 | + dev_WARN(dev, "Device added to live group %d!\n", |
|---|
| 649 | + iommu_group_id(group->iommu_group)); |
|---|
| 710 | 650 | |
|---|
| 711 | 651 | return 0; |
|---|
| 712 | 652 | } |
|---|
| .. | .. |
|---|
| 749 | 689 | */ |
|---|
| 750 | 690 | break; |
|---|
| 751 | 691 | case IOMMU_GROUP_NOTIFY_BIND_DRIVER: |
|---|
| 752 | | - pr_debug("%s: Device %s, group %d binding to driver\n", |
|---|
| 753 | | - __func__, dev_name(dev), |
|---|
| 754 | | - iommu_group_id(group->iommu_group)); |
|---|
| 692 | + dev_dbg(dev, "%s: group %d binding to driver\n", __func__, |
|---|
| 693 | + iommu_group_id(group->iommu_group)); |
|---|
| 755 | 694 | break; |
|---|
| 756 | 695 | case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: |
|---|
| 757 | | - pr_debug("%s: Device %s, group %d bound to driver %s\n", |
|---|
| 758 | | - __func__, dev_name(dev), |
|---|
| 759 | | - iommu_group_id(group->iommu_group), dev->driver->name); |
|---|
| 696 | + dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__, |
|---|
| 697 | + iommu_group_id(group->iommu_group), dev->driver->name); |
|---|
| 760 | 698 | BUG_ON(vfio_group_nb_verify(group, dev)); |
|---|
| 761 | 699 | break; |
|---|
| 762 | 700 | case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: |
|---|
| 763 | | - pr_debug("%s: Device %s, group %d unbinding from driver %s\n", |
|---|
| 764 | | - __func__, dev_name(dev), |
|---|
| 765 | | - iommu_group_id(group->iommu_group), dev->driver->name); |
|---|
| 701 | + dev_dbg(dev, "%s: group %d unbinding from driver %s\n", |
|---|
| 702 | + __func__, iommu_group_id(group->iommu_group), |
|---|
| 703 | + dev->driver->name); |
|---|
| 766 | 704 | break; |
|---|
| 767 | 705 | case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: |
|---|
| 768 | | - pr_debug("%s: Device %s, group %d unbound from driver\n", |
|---|
| 769 | | - __func__, dev_name(dev), |
|---|
| 770 | | - iommu_group_id(group->iommu_group)); |
|---|
| 706 | + dev_dbg(dev, "%s: group %d unbound from driver\n", __func__, |
|---|
| 707 | + iommu_group_id(group->iommu_group)); |
|---|
| 771 | 708 | /* |
|---|
| 772 | 709 | * XXX An unbound device in a live group is ok, but we'd |
|---|
| 773 | 710 | * really like to avoid the above BUG_ON by preventing other |
|---|
| .. | .. |
|---|
| 803 | 740 | /** |
|---|
| 804 | 741 | * VFIO driver API |
|---|
| 805 | 742 | */ |
|---|
| 806 | | -int vfio_add_group_dev(struct device *dev, |
|---|
| 807 | | - const struct vfio_device_ops *ops, void *device_data) |
|---|
| 743 | +void vfio_init_group_dev(struct vfio_device *device, struct device *dev, |
|---|
| 744 | + const struct vfio_device_ops *ops, void *device_data) |
|---|
| 808 | 745 | { |
|---|
| 746 | + init_completion(&device->comp); |
|---|
| 747 | + device->dev = dev; |
|---|
| 748 | + device->ops = ops; |
|---|
| 749 | + device->device_data = device_data; |
|---|
| 750 | +} |
|---|
| 751 | +EXPORT_SYMBOL_GPL(vfio_init_group_dev); |
|---|
| 752 | + |
|---|
| 753 | +int vfio_register_group_dev(struct vfio_device *device) |
|---|
| 754 | +{ |
|---|
| 755 | + struct vfio_device *existing_device; |
|---|
| 809 | 756 | struct iommu_group *iommu_group; |
|---|
| 810 | 757 | struct vfio_group *group; |
|---|
| 811 | | - struct vfio_device *device; |
|---|
| 812 | 758 | |
|---|
| 813 | | - iommu_group = iommu_group_get(dev); |
|---|
| 759 | + iommu_group = iommu_group_get(device->dev); |
|---|
| 814 | 760 | if (!iommu_group) |
|---|
| 815 | 761 | return -EINVAL; |
|---|
| 816 | 762 | |
|---|
| .. | .. |
|---|
| 829 | 775 | iommu_group_put(iommu_group); |
|---|
| 830 | 776 | } |
|---|
| 831 | 777 | |
|---|
| 832 | | - device = vfio_group_get_device(group, dev); |
|---|
| 833 | | - if (device) { |
|---|
| 834 | | - WARN(1, "Device %s already exists on group %d\n", |
|---|
| 835 | | - dev_name(dev), iommu_group_id(iommu_group)); |
|---|
| 836 | | - vfio_device_put(device); |
|---|
| 778 | + existing_device = vfio_group_get_device(group, device->dev); |
|---|
| 779 | + if (existing_device) { |
|---|
| 780 | + dev_WARN(device->dev, "Device already exists on group %d\n", |
|---|
| 781 | + iommu_group_id(iommu_group)); |
|---|
| 782 | + vfio_device_put(existing_device); |
|---|
| 837 | 783 | vfio_group_put(group); |
|---|
| 838 | 784 | return -EBUSY; |
|---|
| 839 | 785 | } |
|---|
| 840 | 786 | |
|---|
| 841 | | - device = vfio_group_create_device(group, dev, ops, device_data); |
|---|
| 842 | | - if (IS_ERR(device)) { |
|---|
| 843 | | - vfio_group_put(group); |
|---|
| 844 | | - return PTR_ERR(device); |
|---|
| 845 | | - } |
|---|
| 787 | + /* Our reference on group is moved to the device */ |
|---|
| 788 | + device->group = group; |
|---|
| 846 | 789 | |
|---|
| 847 | | - /* |
|---|
| 848 | | - * Drop all but the vfio_device reference. The vfio_device holds |
|---|
| 849 | | - * a reference to the vfio_group, which holds a reference to the |
|---|
| 850 | | - * iommu_group. |
|---|
| 851 | | - */ |
|---|
| 852 | | - vfio_group_put(group); |
|---|
| 790 | + /* Refcounting can't start until the driver calls register */ |
|---|
| 791 | + refcount_set(&device->refcount, 1); |
|---|
| 792 | + |
|---|
| 793 | + mutex_lock(&group->device_lock); |
|---|
| 794 | + list_add(&device->group_next, &group->device_list); |
|---|
| 795 | + group->dev_counter++; |
|---|
| 796 | + mutex_unlock(&group->device_lock); |
|---|
| 853 | 797 | |
|---|
| 854 | 798 | return 0; |
|---|
| 799 | +} |
|---|
| 800 | +EXPORT_SYMBOL_GPL(vfio_register_group_dev); |
|---|
| 801 | + |
|---|
| 802 | +int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, |
|---|
| 803 | + void *device_data) |
|---|
| 804 | +{ |
|---|
| 805 | + struct vfio_device *device; |
|---|
| 806 | + int ret; |
|---|
| 807 | + |
|---|
| 808 | + device = kzalloc(sizeof(*device), GFP_KERNEL); |
|---|
| 809 | + if (!device) |
|---|
| 810 | + return -ENOMEM; |
|---|
| 811 | + |
|---|
| 812 | + vfio_init_group_dev(device, dev, ops, device_data); |
|---|
| 813 | + ret = vfio_register_group_dev(device); |
|---|
| 814 | + if (ret) |
|---|
| 815 | + goto err_kfree; |
|---|
| 816 | + dev_set_drvdata(dev, device); |
|---|
| 817 | + return 0; |
|---|
| 818 | + |
|---|
| 819 | +err_kfree: |
|---|
| 820 | + kfree(device); |
|---|
| 821 | + return ret; |
|---|
| 855 | 822 | } |
|---|
| 856 | 823 | EXPORT_SYMBOL_GPL(vfio_add_group_dev); |
|---|
| 857 | 824 | |
|---|
| .. | .. |
|---|
| 881 | 848 | static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, |
|---|
| 882 | 849 | char *buf) |
|---|
| 883 | 850 | { |
|---|
| 884 | | - struct vfio_device *it, *device = NULL; |
|---|
| 851 | + struct vfio_device *it, *device = ERR_PTR(-ENODEV); |
|---|
| 885 | 852 | |
|---|
| 886 | 853 | mutex_lock(&group->device_lock); |
|---|
| 887 | 854 | list_for_each_entry(it, &group->device_list, group_next) { |
|---|
| 888 | | - if (!strcmp(dev_name(it->dev), buf)) { |
|---|
| 855 | + int ret; |
|---|
| 856 | + |
|---|
| 857 | + if (it->ops->match) { |
|---|
| 858 | + ret = it->ops->match(it->device_data, buf); |
|---|
| 859 | + if (ret < 0) { |
|---|
| 860 | + device = ERR_PTR(ret); |
|---|
| 861 | + break; |
|---|
| 862 | + } |
|---|
| 863 | + } else { |
|---|
| 864 | + ret = !strcmp(dev_name(it->dev), buf); |
|---|
| 865 | + } |
|---|
| 866 | + |
|---|
| 867 | + if (ret && vfio_device_try_get(it)) { |
|---|
| 889 | 868 | device = it; |
|---|
| 890 | | - vfio_device_get(device); |
|---|
| 891 | 869 | break; |
|---|
| 892 | 870 | } |
|---|
| 893 | 871 | } |
|---|
| .. | .. |
|---|
| 908 | 886 | /* |
|---|
| 909 | 887 | * Decrement the device reference count and wait for the device to be |
|---|
| 910 | 888 | * removed. Open file descriptors for the device... */ |
|---|
| 911 | | -void *vfio_del_group_dev(struct device *dev) |
|---|
| 889 | +void vfio_unregister_group_dev(struct vfio_device *device) |
|---|
| 912 | 890 | { |
|---|
| 913 | | - DEFINE_WAIT_FUNC(wait, woken_wake_function); |
|---|
| 914 | | - struct vfio_device *device = dev_get_drvdata(dev); |
|---|
| 915 | 891 | struct vfio_group *group = device->group; |
|---|
| 916 | | - void *device_data = device->device_data; |
|---|
| 917 | 892 | struct vfio_unbound_dev *unbound; |
|---|
| 918 | 893 | unsigned int i = 0; |
|---|
| 919 | 894 | bool interrupted = false; |
|---|
| 920 | | - |
|---|
| 921 | | - /* |
|---|
| 922 | | - * The group exists so long as we have a device reference. Get |
|---|
| 923 | | - * a group reference and use it to scan for the device going away. |
|---|
| 924 | | - */ |
|---|
| 925 | | - vfio_group_get(group); |
|---|
| 895 | + long rc; |
|---|
| 926 | 896 | |
|---|
| 927 | 897 | /* |
|---|
| 928 | 898 | * When the device is removed from the group, the group suddenly |
|---|
| .. | .. |
|---|
| 935 | 905 | */ |
|---|
| 936 | 906 | unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); |
|---|
| 937 | 907 | if (unbound) { |
|---|
| 938 | | - unbound->dev = dev; |
|---|
| 908 | + unbound->dev = device->dev; |
|---|
| 939 | 909 | mutex_lock(&group->unbound_lock); |
|---|
| 940 | 910 | list_add(&unbound->unbound_next, &group->unbound_list); |
|---|
| 941 | 911 | mutex_unlock(&group->unbound_lock); |
|---|
| .. | .. |
|---|
| 943 | 913 | WARN_ON(!unbound); |
|---|
| 944 | 914 | |
|---|
| 945 | 915 | vfio_device_put(device); |
|---|
| 946 | | - |
|---|
| 947 | | - /* |
|---|
| 948 | | - * If the device is still present in the group after the above |
|---|
| 949 | | - * 'put', then it is in use and we need to request it from the |
|---|
| 950 | | - * bus driver. The driver may in turn need to request the |
|---|
| 951 | | - * device from the user. We send the request on an arbitrary |
|---|
| 952 | | - * interval with counter to allow the driver to take escalating |
|---|
| 953 | | - * measures to release the device if it has the ability to do so. |
|---|
| 954 | | - */ |
|---|
| 955 | | - add_wait_queue(&vfio.release_q, &wait); |
|---|
| 956 | | - |
|---|
| 957 | | - do { |
|---|
| 958 | | - device = vfio_group_get_device(group, dev); |
|---|
| 959 | | - if (!device) |
|---|
| 960 | | - break; |
|---|
| 961 | | - |
|---|
| 916 | + rc = try_wait_for_completion(&device->comp); |
|---|
| 917 | + while (rc <= 0) { |
|---|
| 962 | 918 | if (device->ops->request) |
|---|
| 963 | | - device->ops->request(device_data, i++); |
|---|
| 964 | | - |
|---|
| 965 | | - vfio_device_put(device); |
|---|
| 919 | + device->ops->request(device->device_data, i++); |
|---|
| 966 | 920 | |
|---|
| 967 | 921 | if (interrupted) { |
|---|
| 968 | | - wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10); |
|---|
| 922 | + rc = wait_for_completion_timeout(&device->comp, |
|---|
| 923 | + HZ * 10); |
|---|
| 969 | 924 | } else { |
|---|
| 970 | | - wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10); |
|---|
| 971 | | - if (signal_pending(current)) { |
|---|
| 925 | + rc = wait_for_completion_interruptible_timeout( |
|---|
| 926 | + &device->comp, HZ * 10); |
|---|
| 927 | + if (rc < 0) { |
|---|
| 972 | 928 | interrupted = true; |
|---|
| 973 | | - dev_warn(dev, |
|---|
| 929 | + dev_warn(device->dev, |
|---|
| 974 | 930 | "Device is currently in use, task" |
|---|
| 975 | 931 | " \"%s\" (%d) " |
|---|
| 976 | 932 | "blocked until device is released", |
|---|
| 977 | 933 | current->comm, task_pid_nr(current)); |
|---|
| 978 | 934 | } |
|---|
| 979 | 935 | } |
|---|
| 936 | + } |
|---|
| 980 | 937 | |
|---|
| 981 | | - } while (1); |
|---|
| 938 | + mutex_lock(&group->device_lock); |
|---|
| 939 | + list_del(&device->group_next); |
|---|
| 940 | + group->dev_counter--; |
|---|
| 941 | + mutex_unlock(&group->device_lock); |
|---|
| 982 | 942 | |
|---|
| 983 | | - remove_wait_queue(&vfio.release_q, &wait); |
|---|
| 984 | 943 | /* |
|---|
| 985 | 944 | * In order to support multiple devices per group, devices can be |
|---|
| 986 | 945 | * plucked from the group while other devices in the group are still |
|---|
| .. | .. |
|---|
| 998 | 957 | if (list_empty(&group->device_list)) |
|---|
| 999 | 958 | wait_event(group->container_q, !group->container); |
|---|
| 1000 | 959 | |
|---|
| 960 | + /* Matches the get in vfio_register_group_dev() */ |
|---|
| 1001 | 961 | vfio_group_put(group); |
|---|
| 962 | +} |
|---|
| 963 | +EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); |
|---|
| 1002 | 964 | |
|---|
| 965 | +void *vfio_del_group_dev(struct device *dev) |
|---|
| 966 | +{ |
|---|
| 967 | + struct vfio_device *device = dev_get_drvdata(dev); |
|---|
| 968 | + void *device_data = device->device_data; |
|---|
| 969 | + |
|---|
| 970 | + vfio_unregister_group_dev(device); |
|---|
| 971 | + dev_set_drvdata(dev, NULL); |
|---|
| 972 | + kfree(device); |
|---|
| 1003 | 973 | return device_data; |
|---|
| 1004 | 974 | } |
|---|
| 1005 | 975 | EXPORT_SYMBOL_GPL(vfio_del_group_dev); |
|---|
| .. | .. |
|---|
| 1190 | 1160 | return ret; |
|---|
| 1191 | 1161 | } |
|---|
| 1192 | 1162 | |
|---|
| 1193 | | -#ifdef CONFIG_COMPAT |
|---|
| 1194 | | -static long vfio_fops_compat_ioctl(struct file *filep, |
|---|
| 1195 | | - unsigned int cmd, unsigned long arg) |
|---|
| 1196 | | -{ |
|---|
| 1197 | | - arg = (unsigned long)compat_ptr(arg); |
|---|
| 1198 | | - return vfio_fops_unl_ioctl(filep, cmd, arg); |
|---|
| 1199 | | -} |
|---|
| 1200 | | -#endif /* CONFIG_COMPAT */ |
|---|
| 1201 | | - |
|---|
| 1202 | 1163 | static int vfio_fops_open(struct inode *inode, struct file *filep) |
|---|
| 1203 | 1164 | { |
|---|
| 1204 | 1165 | struct vfio_container *container; |
|---|
| .. | .. |
|---|
| 1281 | 1242 | .read = vfio_fops_read, |
|---|
| 1282 | 1243 | .write = vfio_fops_write, |
|---|
| 1283 | 1244 | .unlocked_ioctl = vfio_fops_unl_ioctl, |
|---|
| 1284 | | -#ifdef CONFIG_COMPAT |
|---|
| 1285 | | - .compat_ioctl = vfio_fops_compat_ioctl, |
|---|
| 1286 | | -#endif |
|---|
| 1245 | + .compat_ioctl = compat_ptr_ioctl, |
|---|
| 1287 | 1246 | .mmap = vfio_fops_mmap, |
|---|
| 1288 | 1247 | }; |
|---|
| 1289 | 1248 | |
|---|
| .. | .. |
|---|
| 1447 | 1406 | return -EPERM; |
|---|
| 1448 | 1407 | |
|---|
| 1449 | 1408 | device = vfio_device_get_from_name(group, buf); |
|---|
| 1450 | | - if (!device) |
|---|
| 1451 | | - return -ENODEV; |
|---|
| 1409 | + if (IS_ERR(device)) |
|---|
| 1410 | + return PTR_ERR(device); |
|---|
| 1452 | 1411 | |
|---|
| 1453 | 1412 | ret = device->ops->open(device->device_data); |
|---|
| 1454 | 1413 | if (ret) { |
|---|
| .. | .. |
|---|
| 1562 | 1521 | return ret; |
|---|
| 1563 | 1522 | } |
|---|
| 1564 | 1523 | |
|---|
| 1565 | | -#ifdef CONFIG_COMPAT |
|---|
| 1566 | | -static long vfio_group_fops_compat_ioctl(struct file *filep, |
|---|
| 1567 | | - unsigned int cmd, unsigned long arg) |
|---|
| 1568 | | -{ |
|---|
| 1569 | | - arg = (unsigned long)compat_ptr(arg); |
|---|
| 1570 | | - return vfio_group_fops_unl_ioctl(filep, cmd, arg); |
|---|
| 1571 | | -} |
|---|
| 1572 | | -#endif /* CONFIG_COMPAT */ |
|---|
| 1573 | | - |
|---|
| 1574 | 1524 | static int vfio_group_fops_open(struct inode *inode, struct file *filep) |
|---|
| 1575 | 1525 | { |
|---|
| 1576 | 1526 | struct vfio_group *group; |
|---|
| .. | .. |
|---|
| 1626 | 1576 | static const struct file_operations vfio_group_fops = { |
|---|
| 1627 | 1577 | .owner = THIS_MODULE, |
|---|
| 1628 | 1578 | .unlocked_ioctl = vfio_group_fops_unl_ioctl, |
|---|
| 1629 | | -#ifdef CONFIG_COMPAT |
|---|
| 1630 | | - .compat_ioctl = vfio_group_fops_compat_ioctl, |
|---|
| 1631 | | -#endif |
|---|
| 1579 | + .compat_ioctl = compat_ptr_ioctl, |
|---|
| 1632 | 1580 | .open = vfio_group_fops_open, |
|---|
| 1633 | 1581 | .release = vfio_group_fops_release, |
|---|
| 1634 | 1582 | }; |
|---|
| .. | .. |
|---|
| 1693 | 1641 | return device->ops->mmap(device->device_data, vma); |
|---|
| 1694 | 1642 | } |
|---|
| 1695 | 1643 | |
|---|
| 1696 | | -#ifdef CONFIG_COMPAT |
|---|
| 1697 | | -static long vfio_device_fops_compat_ioctl(struct file *filep, |
|---|
| 1698 | | - unsigned int cmd, unsigned long arg) |
|---|
| 1699 | | -{ |
|---|
| 1700 | | - arg = (unsigned long)compat_ptr(arg); |
|---|
| 1701 | | - return vfio_device_fops_unl_ioctl(filep, cmd, arg); |
|---|
| 1702 | | -} |
|---|
| 1703 | | -#endif /* CONFIG_COMPAT */ |
|---|
| 1704 | | - |
|---|
| 1705 | 1644 | static const struct file_operations vfio_device_fops = { |
|---|
| 1706 | 1645 | .owner = THIS_MODULE, |
|---|
| 1707 | 1646 | .release = vfio_device_fops_release, |
|---|
| 1708 | 1647 | .read = vfio_device_fops_read, |
|---|
| 1709 | 1648 | .write = vfio_device_fops_write, |
|---|
| 1710 | 1649 | .unlocked_ioctl = vfio_device_fops_unl_ioctl, |
|---|
| 1711 | | -#ifdef CONFIG_COMPAT |
|---|
| 1712 | | - .compat_ioctl = vfio_device_fops_compat_ioctl, |
|---|
| 1713 | | -#endif |
|---|
| 1650 | + .compat_ioctl = compat_ptr_ioctl, |
|---|
| 1714 | 1651 | .mmap = vfio_device_fops_mmap, |
|---|
| 1715 | 1652 | }; |
|---|
| 1716 | 1653 | |
|---|
| .. | .. |
|---|
| 1758 | 1695 | return group; |
|---|
| 1759 | 1696 | } |
|---|
| 1760 | 1697 | EXPORT_SYMBOL_GPL(vfio_group_get_external_user); |
|---|
| 1698 | + |
|---|
| 1699 | +/** |
|---|
| 1700 | + * External user API, exported by symbols to be linked dynamically. |
|---|
| 1701 | + * The external user passes in a device pointer |
|---|
| 1702 | + * to verify that: |
|---|
| 1703 | + * - A VFIO group is assiciated with the device; |
|---|
| 1704 | + * - IOMMU is set for the group. |
|---|
| 1705 | + * If both checks passed, vfio_group_get_external_user_from_dev() |
|---|
| 1706 | + * increments the container user counter to prevent the VFIO group |
|---|
| 1707 | + * from disposal before external user exits and returns the pointer |
|---|
| 1708 | + * to the VFIO group. |
|---|
| 1709 | + * |
|---|
| 1710 | + * When the external user finishes using the VFIO group, it calls |
|---|
| 1711 | + * vfio_group_put_external_user() to release the VFIO group and |
|---|
| 1712 | + * decrement the container user counter. |
|---|
| 1713 | + * |
|---|
| 1714 | + * @dev [in] : device |
|---|
| 1715 | + * Return error PTR or pointer to VFIO group. |
|---|
| 1716 | + */ |
|---|
| 1717 | + |
|---|
| 1718 | +struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev) |
|---|
| 1719 | +{ |
|---|
| 1720 | + struct vfio_group *group; |
|---|
| 1721 | + int ret; |
|---|
| 1722 | + |
|---|
| 1723 | + group = vfio_group_get_from_dev(dev); |
|---|
| 1724 | + if (!group) |
|---|
| 1725 | + return ERR_PTR(-ENODEV); |
|---|
| 1726 | + |
|---|
| 1727 | + ret = vfio_group_add_container_user(group); |
|---|
| 1728 | + if (ret) { |
|---|
| 1729 | + vfio_group_put(group); |
|---|
| 1730 | + return ERR_PTR(ret); |
|---|
| 1731 | + } |
|---|
| 1732 | + |
|---|
| 1733 | + return group; |
|---|
| 1734 | +} |
|---|
| 1735 | +EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev); |
|---|
| 1761 | 1736 | |
|---|
| 1762 | 1737 | void vfio_group_put_external_user(struct vfio_group *group) |
|---|
| 1763 | 1738 | { |
|---|
| .. | .. |
|---|
| 1808 | 1783 | buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); |
|---|
| 1809 | 1784 | if (!buf) { |
|---|
| 1810 | 1785 | kfree(caps->buf); |
|---|
| 1786 | + caps->buf = NULL; |
|---|
| 1811 | 1787 | caps->size = 0; |
|---|
| 1812 | 1788 | return ERR_PTR(-ENOMEM); |
|---|
| 1813 | 1789 | } |
|---|
| .. | .. |
|---|
| 1934 | 1910 | if (!group) |
|---|
| 1935 | 1911 | return -ENODEV; |
|---|
| 1936 | 1912 | |
|---|
| 1913 | + if (group->dev_counter > 1) { |
|---|
| 1914 | + ret = -EINVAL; |
|---|
| 1915 | + goto err_pin_pages; |
|---|
| 1916 | + } |
|---|
| 1917 | + |
|---|
| 1937 | 1918 | ret = vfio_group_add_container_user(group); |
|---|
| 1938 | 1919 | if (ret) |
|---|
| 1939 | 1920 | goto err_pin_pages; |
|---|
| .. | .. |
|---|
| 1941 | 1922 | container = group->container; |
|---|
| 1942 | 1923 | driver = container->iommu_driver; |
|---|
| 1943 | 1924 | if (likely(driver && driver->ops->pin_pages)) |
|---|
| 1944 | | - ret = driver->ops->pin_pages(container->iommu_data, user_pfn, |
|---|
| 1925 | + ret = driver->ops->pin_pages(container->iommu_data, |
|---|
| 1926 | + group->iommu_group, user_pfn, |
|---|
| 1945 | 1927 | npage, prot, phys_pfn); |
|---|
| 1946 | 1928 | else |
|---|
| 1947 | 1929 | ret = -ENOTTY; |
|---|
| .. | .. |
|---|
| 1999 | 1981 | return ret; |
|---|
| 2000 | 1982 | } |
|---|
| 2001 | 1983 | EXPORT_SYMBOL(vfio_unpin_pages); |
|---|
| 1984 | + |
|---|
| 1985 | +/* |
|---|
| 1986 | + * Pin a set of guest IOVA PFNs and return their associated host PFNs for a |
|---|
| 1987 | + * VFIO group. |
|---|
| 1988 | + * |
|---|
| 1989 | + * The caller needs to call vfio_group_get_external_user() or |
|---|
| 1990 | + * vfio_group_get_external_user_from_dev() prior to calling this interface, |
|---|
| 1991 | + * so as to prevent the VFIO group from disposal in the middle of the call. |
|---|
| 1992 | + * But it can keep the reference to the VFIO group for several calls into |
|---|
| 1993 | + * this interface. |
|---|
| 1994 | + * After finishing using of the VFIO group, the caller needs to release the |
|---|
| 1995 | + * VFIO group by calling vfio_group_put_external_user(). |
|---|
| 1996 | + * |
|---|
| 1997 | + * @group [in] : VFIO group |
|---|
| 1998 | + * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be pinned. |
|---|
| 1999 | + * @npage [in] : count of elements in user_iova_pfn array. |
|---|
| 2000 | + * This count should not be greater |
|---|
| 2001 | + * VFIO_PIN_PAGES_MAX_ENTRIES. |
|---|
| 2002 | + * @prot [in] : protection flags |
|---|
| 2003 | + * @phys_pfn [out] : array of host PFNs |
|---|
| 2004 | + * Return error or number of pages pinned. |
|---|
| 2005 | + */ |
|---|
| 2006 | +int vfio_group_pin_pages(struct vfio_group *group, |
|---|
| 2007 | + unsigned long *user_iova_pfn, int npage, |
|---|
| 2008 | + int prot, unsigned long *phys_pfn) |
|---|
| 2009 | +{ |
|---|
| 2010 | + struct vfio_container *container; |
|---|
| 2011 | + struct vfio_iommu_driver *driver; |
|---|
| 2012 | + int ret; |
|---|
| 2013 | + |
|---|
| 2014 | + if (!group || !user_iova_pfn || !phys_pfn || !npage) |
|---|
| 2015 | + return -EINVAL; |
|---|
| 2016 | + |
|---|
| 2017 | + if (group->dev_counter > 1) |
|---|
| 2018 | + return -EINVAL; |
|---|
| 2019 | + |
|---|
| 2020 | + if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) |
|---|
| 2021 | + return -E2BIG; |
|---|
| 2022 | + |
|---|
| 2023 | + container = group->container; |
|---|
| 2024 | + driver = container->iommu_driver; |
|---|
| 2025 | + if (likely(driver && driver->ops->pin_pages)) |
|---|
| 2026 | + ret = driver->ops->pin_pages(container->iommu_data, |
|---|
| 2027 | + group->iommu_group, user_iova_pfn, |
|---|
| 2028 | + npage, prot, phys_pfn); |
|---|
| 2029 | + else |
|---|
| 2030 | + ret = -ENOTTY; |
|---|
| 2031 | + |
|---|
| 2032 | + return ret; |
|---|
| 2033 | +} |
|---|
| 2034 | +EXPORT_SYMBOL(vfio_group_pin_pages); |
|---|
| 2035 | + |
|---|
| 2036 | +/* |
|---|
| 2037 | + * Unpin a set of guest IOVA PFNs for a VFIO group. |
|---|
| 2038 | + * |
|---|
| 2039 | + * The caller needs to call vfio_group_get_external_user() or |
|---|
| 2040 | + * vfio_group_get_external_user_from_dev() prior to calling this interface, |
|---|
| 2041 | + * so as to prevent the VFIO group from disposal in the middle of the call. |
|---|
| 2042 | + * But it can keep the reference to the VFIO group for several calls into |
|---|
| 2043 | + * this interface. |
|---|
| 2044 | + * After finishing using of the VFIO group, the caller needs to release the |
|---|
| 2045 | + * VFIO group by calling vfio_group_put_external_user(). |
|---|
| 2046 | + * |
|---|
| 2047 | + * @group [in] : vfio group |
|---|
| 2048 | + * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be unpinned. |
|---|
| 2049 | + * @npage [in] : count of elements in user_iova_pfn array. |
|---|
| 2050 | + * This count should not be greater than |
|---|
| 2051 | + * VFIO_PIN_PAGES_MAX_ENTRIES. |
|---|
| 2052 | + * Return error or number of pages unpinned. |
|---|
| 2053 | + */ |
|---|
| 2054 | +int vfio_group_unpin_pages(struct vfio_group *group, |
|---|
| 2055 | + unsigned long *user_iova_pfn, int npage) |
|---|
| 2056 | +{ |
|---|
| 2057 | + struct vfio_container *container; |
|---|
| 2058 | + struct vfio_iommu_driver *driver; |
|---|
| 2059 | + int ret; |
|---|
| 2060 | + |
|---|
| 2061 | + if (!group || !user_iova_pfn || !npage) |
|---|
| 2062 | + return -EINVAL; |
|---|
| 2063 | + |
|---|
| 2064 | + if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) |
|---|
| 2065 | + return -E2BIG; |
|---|
| 2066 | + |
|---|
| 2067 | + container = group->container; |
|---|
| 2068 | + driver = container->iommu_driver; |
|---|
| 2069 | + if (likely(driver && driver->ops->unpin_pages)) |
|---|
| 2070 | + ret = driver->ops->unpin_pages(container->iommu_data, |
|---|
| 2071 | + user_iova_pfn, npage); |
|---|
| 2072 | + else |
|---|
| 2073 | + ret = -ENOTTY; |
|---|
| 2074 | + |
|---|
| 2075 | + return ret; |
|---|
| 2076 | +} |
|---|
| 2077 | +EXPORT_SYMBOL(vfio_group_unpin_pages); |
|---|
| 2078 | + |
|---|
| 2079 | + |
|---|
| 2080 | +/* |
|---|
| 2081 | + * This interface allows the CPUs to perform some sort of virtual DMA on |
|---|
| 2082 | + * behalf of the device. |
|---|
| 2083 | + * |
|---|
| 2084 | + * CPUs read/write from/into a range of IOVAs pointing to user space memory |
|---|
| 2085 | + * into/from a kernel buffer. |
|---|
| 2086 | + * |
|---|
| 2087 | + * As the read/write of user space memory is conducted via the CPUs and is |
|---|
| 2088 | + * not a real device DMA, it is not necessary to pin the user space memory. |
|---|
| 2089 | + * |
|---|
| 2090 | + * The caller needs to call vfio_group_get_external_user() or |
|---|
| 2091 | + * vfio_group_get_external_user_from_dev() prior to calling this interface, |
|---|
| 2092 | + * so as to prevent the VFIO group from disposal in the middle of the call. |
|---|
| 2093 | + * But it can keep the reference to the VFIO group for several calls into |
|---|
| 2094 | + * this interface. |
|---|
| 2095 | + * After finishing using of the VFIO group, the caller needs to release the |
|---|
| 2096 | + * VFIO group by calling vfio_group_put_external_user(). |
|---|
| 2097 | + * |
|---|
| 2098 | + * @group [in] : VFIO group |
|---|
| 2099 | + * @user_iova [in] : base IOVA of a user space buffer |
|---|
| 2100 | + * @data [in] : pointer to kernel buffer |
|---|
| 2101 | + * @len [in] : kernel buffer length |
|---|
| 2102 | + * @write : indicate read or write |
|---|
| 2103 | + * Return error code on failure or 0 on success. |
|---|
| 2104 | + */ |
|---|
| 2105 | +int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, |
|---|
| 2106 | + void *data, size_t len, bool write) |
|---|
| 2107 | +{ |
|---|
| 2108 | + struct vfio_container *container; |
|---|
| 2109 | + struct vfio_iommu_driver *driver; |
|---|
| 2110 | + int ret = 0; |
|---|
| 2111 | + |
|---|
| 2112 | + if (!group || !data || len <= 0) |
|---|
| 2113 | + return -EINVAL; |
|---|
| 2114 | + |
|---|
| 2115 | + container = group->container; |
|---|
| 2116 | + driver = container->iommu_driver; |
|---|
| 2117 | + |
|---|
| 2118 | + if (likely(driver && driver->ops->dma_rw)) |
|---|
| 2119 | + ret = driver->ops->dma_rw(container->iommu_data, |
|---|
| 2120 | + user_iova, data, len, write); |
|---|
| 2121 | + else |
|---|
| 2122 | + ret = -ENOTTY; |
|---|
| 2123 | + |
|---|
| 2124 | + return ret; |
|---|
| 2125 | +} |
|---|
| 2126 | +EXPORT_SYMBOL(vfio_dma_rw); |
|---|
| 2002 | 2127 | |
|---|
| 2003 | 2128 | static int vfio_register_iommu_notifier(struct vfio_group *group, |
|---|
| 2004 | 2129 | unsigned long *events, |
|---|
| .. | .. |
|---|
| 2192 | 2317 | mutex_init(&vfio.iommu_drivers_lock); |
|---|
| 2193 | 2318 | INIT_LIST_HEAD(&vfio.group_list); |
|---|
| 2194 | 2319 | INIT_LIST_HEAD(&vfio.iommu_drivers_list); |
|---|
| 2195 | | - init_waitqueue_head(&vfio.release_q); |
|---|
| 2196 | 2320 | |
|---|
| 2197 | 2321 | ret = misc_register(&vfio_dev); |
|---|
| 2198 | 2322 | if (ret) { |
|---|
| .. | .. |
|---|
| 2209 | 2333 | |
|---|
| 2210 | 2334 | vfio.class->devnode = vfio_devnode; |
|---|
| 2211 | 2335 | |
|---|
| 2212 | | - ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio"); |
|---|
| 2336 | + ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); |
|---|
| 2213 | 2337 | if (ret) |
|---|
| 2214 | 2338 | goto err_alloc_chrdev; |
|---|
| 2215 | 2339 | |
|---|
| 2216 | 2340 | cdev_init(&vfio.group_cdev, &vfio_group_fops); |
|---|
| 2217 | | - ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK); |
|---|
| 2341 | + ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK + 1); |
|---|
| 2218 | 2342 | if (ret) |
|---|
| 2219 | 2343 | goto err_cdev_add; |
|---|
| 2220 | 2344 | |
|---|
| .. | .. |
|---|
| 2226 | 2350 | return 0; |
|---|
| 2227 | 2351 | |
|---|
| 2228 | 2352 | err_cdev_add: |
|---|
| 2229 | | - unregister_chrdev_region(vfio.group_devt, MINORMASK); |
|---|
| 2353 | + unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); |
|---|
| 2230 | 2354 | err_alloc_chrdev: |
|---|
| 2231 | 2355 | class_destroy(vfio.class); |
|---|
| 2232 | 2356 | vfio.class = NULL; |
|---|
| .. | .. |
|---|
| 2244 | 2368 | #endif |
|---|
| 2245 | 2369 | idr_destroy(&vfio.group_idr); |
|---|
| 2246 | 2370 | cdev_del(&vfio.group_cdev); |
|---|
| 2247 | | - unregister_chrdev_region(vfio.group_devt, MINORMASK); |
|---|
| 2371 | + unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); |
|---|
| 2248 | 2372 | class_destroy(vfio.class); |
|---|
| 2249 | 2373 | vfio.class = NULL; |
|---|
| 2250 | 2374 | misc_deregister(&vfio_dev); |
|---|