| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* Copyright (C) 2009 Red Hat, Inc. |
|---|
| 2 | 3 | * Copyright (C) 2006 Rusty Russell IBM Corporation |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 6 | 7 | * Inspiration, some code, and most witty comments come from |
|---|
| 7 | 8 | * Documentation/virtual/lguest/lguest.c, by Rusty Russell |
|---|
| 8 | 9 | * |
|---|
| 9 | | - * This work is licensed under the terms of the GNU GPL, version 2. |
|---|
| 10 | | - * |
|---|
| 11 | 10 | * Generic code for virtio server in host kernel. |
|---|
| 12 | 11 | */ |
|---|
| 13 | 12 | |
|---|
| .. | .. |
|---|
| 15 | 14 | #include <linux/vhost.h> |
|---|
| 16 | 15 | #include <linux/uio.h> |
|---|
| 17 | 16 | #include <linux/mm.h> |
|---|
| 18 | | -#include <linux/mmu_context.h> |
|---|
| 19 | 17 | #include <linux/miscdevice.h> |
|---|
| 20 | 18 | #include <linux/mutex.h> |
|---|
| 21 | 19 | #include <linux/poll.h> |
|---|
| .. | .. |
|---|
| 50 | 48 | |
|---|
| 51 | 49 | #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) |
|---|
| 52 | 50 | #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) |
|---|
| 53 | | - |
|---|
| 54 | | -INTERVAL_TREE_DEFINE(struct vhost_umem_node, |
|---|
| 55 | | - rb, __u64, __subtree_last, |
|---|
| 56 | | - START, LAST, static inline, vhost_umem_interval_tree); |
|---|
| 57 | 51 | |
|---|
| 58 | 52 | #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY |
|---|
| 59 | 53 | static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) |
|---|
| .. | .. |
|---|
| 171 | 165 | void *key) |
|---|
| 172 | 166 | { |
|---|
| 173 | 167 | struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); |
|---|
| 168 | + struct vhost_work *work = &poll->work; |
|---|
| 174 | 169 | |
|---|
| 175 | 170 | if (!(key_to_poll(key) & poll->mask)) |
|---|
| 176 | 171 | return 0; |
|---|
| 177 | 172 | |
|---|
| 178 | | - vhost_poll_queue(poll); |
|---|
| 173 | + if (!poll->dev->use_worker) |
|---|
| 174 | + work->fn(work); |
|---|
| 175 | + else |
|---|
| 176 | + vhost_poll_queue(poll); |
|---|
| 177 | + |
|---|
| 179 | 178 | return 0; |
|---|
| 180 | 179 | } |
|---|
| 181 | 180 | |
|---|
| .. | .. |
|---|
| 205 | 204 | int vhost_poll_start(struct vhost_poll *poll, struct file *file) |
|---|
| 206 | 205 | { |
|---|
| 207 | 206 | __poll_t mask; |
|---|
| 208 | | - int ret = 0; |
|---|
| 209 | 207 | |
|---|
| 210 | 208 | if (poll->wqh) |
|---|
| 211 | 209 | return 0; |
|---|
| .. | .. |
|---|
| 215 | 213 | vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); |
|---|
| 216 | 214 | if (mask & EPOLLERR) { |
|---|
| 217 | 215 | vhost_poll_stop(poll); |
|---|
| 218 | | - ret = -EINVAL; |
|---|
| 216 | + return -EINVAL; |
|---|
| 219 | 217 | } |
|---|
| 220 | 218 | |
|---|
| 221 | | - return ret; |
|---|
| 219 | + return 0; |
|---|
| 222 | 220 | } |
|---|
| 223 | 221 | EXPORT_SYMBOL_GPL(vhost_poll_start); |
|---|
| 224 | 222 | |
|---|
| .. | .. |
|---|
| 300 | 298 | __vhost_vq_meta_reset(d->vqs[i]); |
|---|
| 301 | 299 | } |
|---|
| 302 | 300 | |
|---|
| 301 | +static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) |
|---|
| 302 | +{ |
|---|
| 303 | + call_ctx->ctx = NULL; |
|---|
| 304 | + memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); |
|---|
| 305 | +} |
|---|
| 306 | + |
|---|
| 307 | +bool vhost_vq_is_setup(struct vhost_virtqueue *vq) |
|---|
| 308 | +{ |
|---|
| 309 | + return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); |
|---|
| 310 | +} |
|---|
| 311 | +EXPORT_SYMBOL_GPL(vhost_vq_is_setup); |
|---|
| 312 | + |
|---|
| 303 | 313 | static void vhost_vq_reset(struct vhost_dev *dev, |
|---|
| 304 | 314 | struct vhost_virtqueue *vq) |
|---|
| 305 | 315 | { |
|---|
| .. | .. |
|---|
| 321 | 331 | vq->log_base = NULL; |
|---|
| 322 | 332 | vq->error_ctx = NULL; |
|---|
| 323 | 333 | vq->kick = NULL; |
|---|
| 324 | | - vq->call_ctx = NULL; |
|---|
| 325 | 334 | vq->log_ctx = NULL; |
|---|
| 326 | 335 | vhost_disable_cross_endian(vq); |
|---|
| 327 | 336 | vhost_reset_is_le(vq); |
|---|
| 328 | 337 | vq->busyloop_timeout = 0; |
|---|
| 329 | 338 | vq->umem = NULL; |
|---|
| 330 | 339 | vq->iotlb = NULL; |
|---|
| 340 | + vhost_vring_call_reset(&vq->call_ctx); |
|---|
| 331 | 341 | __vhost_vq_meta_reset(vq); |
|---|
| 332 | 342 | } |
|---|
| 333 | 343 | |
|---|
| .. | .. |
|---|
| 336 | 346 | struct vhost_dev *dev = data; |
|---|
| 337 | 347 | struct vhost_work *work, *work_next; |
|---|
| 338 | 348 | struct llist_node *node; |
|---|
| 339 | | - mm_segment_t oldfs = get_fs(); |
|---|
| 340 | 349 | |
|---|
| 341 | | - set_fs(USER_DS); |
|---|
| 342 | | - use_mm(dev->mm); |
|---|
| 350 | + kthread_use_mm(dev->mm); |
|---|
| 343 | 351 | |
|---|
| 344 | 352 | for (;;) { |
|---|
| 345 | 353 | /* mb paired w/ kthread_stop */ |
|---|
| .. | .. |
|---|
| 367 | 375 | schedule(); |
|---|
| 368 | 376 | } |
|---|
| 369 | 377 | } |
|---|
| 370 | | - unuse_mm(dev->mm); |
|---|
| 371 | | - set_fs(oldfs); |
|---|
| 378 | + kthread_unuse_mm(dev->mm); |
|---|
| 372 | 379 | return 0; |
|---|
| 373 | 380 | } |
|---|
| 374 | 381 | |
|---|
| .. | .. |
|---|
| 431 | 438 | } |
|---|
| 432 | 439 | EXPORT_SYMBOL_GPL(vhost_exceeds_weight); |
|---|
| 433 | 440 | |
|---|
| 441 | +static size_t vhost_get_avail_size(struct vhost_virtqueue *vq, |
|---|
| 442 | + unsigned int num) |
|---|
| 443 | +{ |
|---|
| 444 | + size_t event __maybe_unused = |
|---|
| 445 | + vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
|---|
| 446 | + |
|---|
| 447 | + return sizeof(*vq->avail) + |
|---|
| 448 | + sizeof(*vq->avail->ring) * num + event; |
|---|
| 449 | +} |
|---|
| 450 | + |
|---|
| 451 | +static size_t vhost_get_used_size(struct vhost_virtqueue *vq, |
|---|
| 452 | + unsigned int num) |
|---|
| 453 | +{ |
|---|
| 454 | + size_t event __maybe_unused = |
|---|
| 455 | + vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
|---|
| 456 | + |
|---|
| 457 | + return sizeof(*vq->used) + |
|---|
| 458 | + sizeof(*vq->used->ring) * num + event; |
|---|
| 459 | +} |
|---|
| 460 | + |
|---|
| 461 | +static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, |
|---|
| 462 | + unsigned int num) |
|---|
| 463 | +{ |
|---|
| 464 | + return sizeof(*vq->desc) * num; |
|---|
| 465 | +} |
|---|
| 466 | + |
|---|
| 434 | 467 | void vhost_dev_init(struct vhost_dev *dev, |
|---|
| 435 | 468 | struct vhost_virtqueue **vqs, int nvqs, |
|---|
| 436 | | - int iov_limit, int weight, int byte_weight) |
|---|
| 469 | + int iov_limit, int weight, int byte_weight, |
|---|
| 470 | + bool use_worker, |
|---|
| 471 | + int (*msg_handler)(struct vhost_dev *dev, |
|---|
| 472 | + struct vhost_iotlb_msg *msg)) |
|---|
| 437 | 473 | { |
|---|
| 438 | 474 | struct vhost_virtqueue *vq; |
|---|
| 439 | 475 | int i; |
|---|
| .. | .. |
|---|
| 449 | 485 | dev->iov_limit = iov_limit; |
|---|
| 450 | 486 | dev->weight = weight; |
|---|
| 451 | 487 | dev->byte_weight = byte_weight; |
|---|
| 488 | + dev->use_worker = use_worker; |
|---|
| 489 | + dev->msg_handler = msg_handler; |
|---|
| 452 | 490 | init_llist_head(&dev->work_list); |
|---|
| 453 | 491 | init_waitqueue_head(&dev->wait); |
|---|
| 454 | 492 | INIT_LIST_HEAD(&dev->read_list); |
|---|
| .. | .. |
|---|
| 511 | 549 | } |
|---|
| 512 | 550 | EXPORT_SYMBOL_GPL(vhost_dev_has_owner); |
|---|
| 513 | 551 | |
|---|
| 552 | +static void vhost_attach_mm(struct vhost_dev *dev) |
|---|
| 553 | +{ |
|---|
| 554 | + /* No owner, become one */ |
|---|
| 555 | + if (dev->use_worker) { |
|---|
| 556 | + dev->mm = get_task_mm(current); |
|---|
| 557 | + } else { |
|---|
| 558 | + /* vDPA device does not use worker thead, so there's |
|---|
| 559 | + * no need to hold the address space for mm. This help |
|---|
| 560 | + * to avoid deadlock in the case of mmap() which may |
|---|
| 561 | + * held the refcnt of the file and depends on release |
|---|
| 562 | + * method to remove vma. |
|---|
| 563 | + */ |
|---|
| 564 | + dev->mm = current->mm; |
|---|
| 565 | + mmgrab(dev->mm); |
|---|
| 566 | + } |
|---|
| 567 | +} |
|---|
| 568 | + |
|---|
| 569 | +static void vhost_detach_mm(struct vhost_dev *dev) |
|---|
| 570 | +{ |
|---|
| 571 | + if (!dev->mm) |
|---|
| 572 | + return; |
|---|
| 573 | + |
|---|
| 574 | + if (dev->use_worker) |
|---|
| 575 | + mmput(dev->mm); |
|---|
| 576 | + else |
|---|
| 577 | + mmdrop(dev->mm); |
|---|
| 578 | + |
|---|
| 579 | + dev->mm = NULL; |
|---|
| 580 | +} |
|---|
| 581 | + |
|---|
| 514 | 582 | /* Caller should have device mutex */ |
|---|
| 515 | 583 | long vhost_dev_set_owner(struct vhost_dev *dev) |
|---|
| 516 | 584 | { |
|---|
| .. | .. |
|---|
| 523 | 591 | goto err_mm; |
|---|
| 524 | 592 | } |
|---|
| 525 | 593 | |
|---|
| 526 | | - /* No owner, become one */ |
|---|
| 527 | | - dev->mm = get_task_mm(current); |
|---|
| 594 | + vhost_attach_mm(dev); |
|---|
| 595 | + |
|---|
| 528 | 596 | dev->kcov_handle = kcov_common_handle(); |
|---|
| 529 | | - worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); |
|---|
| 530 | | - if (IS_ERR(worker)) { |
|---|
| 531 | | - err = PTR_ERR(worker); |
|---|
| 532 | | - goto err_worker; |
|---|
| 597 | + if (dev->use_worker) { |
|---|
| 598 | + worker = kthread_create(vhost_worker, dev, |
|---|
| 599 | + "vhost-%d", current->pid); |
|---|
| 600 | + if (IS_ERR(worker)) { |
|---|
| 601 | + err = PTR_ERR(worker); |
|---|
| 602 | + goto err_worker; |
|---|
| 603 | + } |
|---|
| 604 | + |
|---|
| 605 | + dev->worker = worker; |
|---|
| 606 | + wake_up_process(worker); /* avoid contributing to loadavg */ |
|---|
| 607 | + |
|---|
| 608 | + err = vhost_attach_cgroups(dev); |
|---|
| 609 | + if (err) |
|---|
| 610 | + goto err_cgroup; |
|---|
| 533 | 611 | } |
|---|
| 534 | | - |
|---|
| 535 | | - dev->worker = worker; |
|---|
| 536 | | - wake_up_process(worker); /* avoid contributing to loadavg */ |
|---|
| 537 | | - |
|---|
| 538 | | - err = vhost_attach_cgroups(dev); |
|---|
| 539 | | - if (err) |
|---|
| 540 | | - goto err_cgroup; |
|---|
| 541 | 612 | |
|---|
| 542 | 613 | err = vhost_dev_alloc_iovecs(dev); |
|---|
| 543 | 614 | if (err) |
|---|
| .. | .. |
|---|
| 545 | 616 | |
|---|
| 546 | 617 | return 0; |
|---|
| 547 | 618 | err_cgroup: |
|---|
| 548 | | - kthread_stop(worker); |
|---|
| 549 | | - dev->worker = NULL; |
|---|
| 619 | + if (dev->worker) { |
|---|
| 620 | + kthread_stop(dev->worker); |
|---|
| 621 | + dev->worker = NULL; |
|---|
| 622 | + } |
|---|
| 550 | 623 | err_worker: |
|---|
| 551 | | - if (dev->mm) |
|---|
| 552 | | - mmput(dev->mm); |
|---|
| 553 | | - dev->mm = NULL; |
|---|
| 624 | + vhost_detach_mm(dev); |
|---|
| 554 | 625 | dev->kcov_handle = 0; |
|---|
| 555 | 626 | err_mm: |
|---|
| 556 | 627 | return err; |
|---|
| 557 | 628 | } |
|---|
| 558 | 629 | EXPORT_SYMBOL_GPL(vhost_dev_set_owner); |
|---|
| 559 | 630 | |
|---|
| 560 | | -struct vhost_umem *vhost_dev_reset_owner_prepare(void) |
|---|
| 631 | +static struct vhost_iotlb *iotlb_alloc(void) |
|---|
| 561 | 632 | { |
|---|
| 562 | | - return kvzalloc(sizeof(struct vhost_umem), GFP_KERNEL); |
|---|
| 633 | + return vhost_iotlb_alloc(max_iotlb_entries, |
|---|
| 634 | + VHOST_IOTLB_FLAG_RETIRE); |
|---|
| 635 | +} |
|---|
| 636 | + |
|---|
| 637 | +struct vhost_iotlb *vhost_dev_reset_owner_prepare(void) |
|---|
| 638 | +{ |
|---|
| 639 | + return iotlb_alloc(); |
|---|
| 563 | 640 | } |
|---|
| 564 | 641 | EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); |
|---|
| 565 | 642 | |
|---|
| 566 | 643 | /* Caller should have device mutex */ |
|---|
| 567 | | -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) |
|---|
| 644 | +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) |
|---|
| 568 | 645 | { |
|---|
| 569 | 646 | int i; |
|---|
| 570 | 647 | |
|---|
| 571 | 648 | vhost_dev_cleanup(dev); |
|---|
| 572 | 649 | |
|---|
| 573 | | - /* Restore memory to default empty mapping. */ |
|---|
| 574 | | - INIT_LIST_HEAD(&umem->umem_list); |
|---|
| 575 | 650 | dev->umem = umem; |
|---|
| 576 | 651 | /* We don't need VQ locks below since vhost_dev_cleanup makes sure |
|---|
| 577 | 652 | * VQs aren't running. |
|---|
| .. | .. |
|---|
| 593 | 668 | } |
|---|
| 594 | 669 | } |
|---|
| 595 | 670 | EXPORT_SYMBOL_GPL(vhost_dev_stop); |
|---|
| 596 | | - |
|---|
| 597 | | -static void vhost_umem_free(struct vhost_umem *umem, |
|---|
| 598 | | - struct vhost_umem_node *node) |
|---|
| 599 | | -{ |
|---|
| 600 | | - vhost_umem_interval_tree_remove(node, &umem->umem_tree); |
|---|
| 601 | | - list_del(&node->link); |
|---|
| 602 | | - kfree(node); |
|---|
| 603 | | - umem->numem--; |
|---|
| 604 | | -} |
|---|
| 605 | | - |
|---|
| 606 | | -static void vhost_umem_clean(struct vhost_umem *umem) |
|---|
| 607 | | -{ |
|---|
| 608 | | - struct vhost_umem_node *node, *tmp; |
|---|
| 609 | | - |
|---|
| 610 | | - if (!umem) |
|---|
| 611 | | - return; |
|---|
| 612 | | - |
|---|
| 613 | | - list_for_each_entry_safe(node, tmp, &umem->umem_list, link) |
|---|
| 614 | | - vhost_umem_free(umem, node); |
|---|
| 615 | | - |
|---|
| 616 | | - kvfree(umem); |
|---|
| 617 | | -} |
|---|
| 618 | 671 | |
|---|
| 619 | 672 | static void vhost_clear_msg(struct vhost_dev *dev) |
|---|
| 620 | 673 | { |
|---|
| .. | .. |
|---|
| 644 | 697 | eventfd_ctx_put(dev->vqs[i]->error_ctx); |
|---|
| 645 | 698 | if (dev->vqs[i]->kick) |
|---|
| 646 | 699 | fput(dev->vqs[i]->kick); |
|---|
| 647 | | - if (dev->vqs[i]->call_ctx) |
|---|
| 648 | | - eventfd_ctx_put(dev->vqs[i]->call_ctx); |
|---|
| 700 | + if (dev->vqs[i]->call_ctx.ctx) |
|---|
| 701 | + eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); |
|---|
| 649 | 702 | vhost_vq_reset(dev, dev->vqs[i]); |
|---|
| 650 | 703 | } |
|---|
| 651 | 704 | vhost_dev_free_iovecs(dev); |
|---|
| .. | .. |
|---|
| 653 | 706 | eventfd_ctx_put(dev->log_ctx); |
|---|
| 654 | 707 | dev->log_ctx = NULL; |
|---|
| 655 | 708 | /* No one will access memory at this point */ |
|---|
| 656 | | - vhost_umem_clean(dev->umem); |
|---|
| 709 | + vhost_iotlb_free(dev->umem); |
|---|
| 657 | 710 | dev->umem = NULL; |
|---|
| 658 | | - vhost_umem_clean(dev->iotlb); |
|---|
| 711 | + vhost_iotlb_free(dev->iotlb); |
|---|
| 659 | 712 | dev->iotlb = NULL; |
|---|
| 660 | 713 | vhost_clear_msg(dev); |
|---|
| 661 | 714 | wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); |
|---|
| .. | .. |
|---|
| 665 | 718 | dev->worker = NULL; |
|---|
| 666 | 719 | dev->kcov_handle = 0; |
|---|
| 667 | 720 | } |
|---|
| 668 | | - if (dev->mm) |
|---|
| 669 | | - mmput(dev->mm); |
|---|
| 670 | | - dev->mm = NULL; |
|---|
| 721 | + vhost_detach_mm(dev); |
|---|
| 671 | 722 | } |
|---|
| 672 | 723 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); |
|---|
| 673 | 724 | |
|---|
| .. | .. |
|---|
| 680 | 731 | a + (unsigned long)log_base > ULONG_MAX) |
|---|
| 681 | 732 | return false; |
|---|
| 682 | 733 | |
|---|
| 683 | | - return access_ok(VERIFY_WRITE, log_base + a, |
|---|
| 734 | + return access_ok(log_base + a, |
|---|
| 684 | 735 | (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); |
|---|
| 685 | 736 | } |
|---|
| 686 | 737 | |
|---|
| .. | .. |
|---|
| 697 | 748 | } |
|---|
| 698 | 749 | |
|---|
| 699 | 750 | /* Caller should have vq mutex and device mutex. */ |
|---|
| 700 | | -static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, |
|---|
| 751 | +static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, |
|---|
| 701 | 752 | int log_all) |
|---|
| 702 | 753 | { |
|---|
| 703 | | - struct vhost_umem_node *node; |
|---|
| 754 | + struct vhost_iotlb_map *map; |
|---|
| 704 | 755 | |
|---|
| 705 | 756 | if (!umem) |
|---|
| 706 | 757 | return false; |
|---|
| 707 | 758 | |
|---|
| 708 | | - list_for_each_entry(node, &umem->umem_list, link) { |
|---|
| 709 | | - unsigned long a = node->userspace_addr; |
|---|
| 759 | + list_for_each_entry(map, &umem->list, link) { |
|---|
| 760 | + unsigned long a = map->addr; |
|---|
| 710 | 761 | |
|---|
| 711 | | - if (vhost_overflow(node->userspace_addr, node->size)) |
|---|
| 762 | + if (vhost_overflow(map->addr, map->size)) |
|---|
| 712 | 763 | return false; |
|---|
| 713 | 764 | |
|---|
| 714 | 765 | |
|---|
| 715 | | - if (!access_ok(VERIFY_WRITE, (void __user *)a, |
|---|
| 716 | | - node->size)) |
|---|
| 766 | + if (!access_ok((void __user *)a, map->size)) |
|---|
| 717 | 767 | return false; |
|---|
| 718 | 768 | else if (log_all && !log_access_ok(log_base, |
|---|
| 719 | | - node->start, |
|---|
| 720 | | - node->size)) |
|---|
| 769 | + map->start, |
|---|
| 770 | + map->size)) |
|---|
| 721 | 771 | return false; |
|---|
| 722 | 772 | } |
|---|
| 723 | 773 | return true; |
|---|
| .. | .. |
|---|
| 727 | 777 | u64 addr, unsigned int size, |
|---|
| 728 | 778 | int type) |
|---|
| 729 | 779 | { |
|---|
| 730 | | - const struct vhost_umem_node *node = vq->meta_iotlb[type]; |
|---|
| 780 | + const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; |
|---|
| 731 | 781 | |
|---|
| 732 | | - if (!node) |
|---|
| 782 | + if (!map) |
|---|
| 733 | 783 | return NULL; |
|---|
| 734 | 784 | |
|---|
| 735 | | - return (void *)(uintptr_t)(node->userspace_addr + addr - node->start); |
|---|
| 785 | + return (void __user *)(uintptr_t)(map->addr + addr - map->start); |
|---|
| 736 | 786 | } |
|---|
| 737 | 787 | |
|---|
| 738 | 788 | /* Can we switch to this memory table? */ |
|---|
| 739 | 789 | /* Caller should have device mutex but not vq mutex */ |
|---|
| 740 | | -static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, |
|---|
| 790 | +static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, |
|---|
| 741 | 791 | int log_all) |
|---|
| 742 | 792 | { |
|---|
| 743 | 793 | int i; |
|---|
| .. | .. |
|---|
| 871 | 921 | * not happen in this case. |
|---|
| 872 | 922 | */ |
|---|
| 873 | 923 | static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, |
|---|
| 874 | | - void *addr, unsigned int size, |
|---|
| 924 | + void __user *addr, unsigned int size, |
|---|
| 875 | 925 | int type) |
|---|
| 876 | 926 | { |
|---|
| 877 | 927 | void __user *uaddr = vhost_vq_meta_fetch(vq, |
|---|
| .. | .. |
|---|
| 884 | 934 | |
|---|
| 885 | 935 | #define vhost_put_user(vq, x, ptr) \ |
|---|
| 886 | 936 | ({ \ |
|---|
| 887 | | - int ret = -EFAULT; \ |
|---|
| 937 | + int ret; \ |
|---|
| 888 | 938 | if (!vq->iotlb) { \ |
|---|
| 889 | 939 | ret = __put_user(x, ptr); \ |
|---|
| 890 | 940 | } else { \ |
|---|
| .. | .. |
|---|
| 898 | 948 | } \ |
|---|
| 899 | 949 | ret; \ |
|---|
| 900 | 950 | }) |
|---|
| 951 | + |
|---|
| 952 | +static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) |
|---|
| 953 | +{ |
|---|
| 954 | + return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), |
|---|
| 955 | + vhost_avail_event(vq)); |
|---|
| 956 | +} |
|---|
| 957 | + |
|---|
| 958 | +static inline int vhost_put_used(struct vhost_virtqueue *vq, |
|---|
| 959 | + struct vring_used_elem *head, int idx, |
|---|
| 960 | + int count) |
|---|
| 961 | +{ |
|---|
| 962 | + return vhost_copy_to_user(vq, vq->used->ring + idx, head, |
|---|
| 963 | + count * sizeof(*head)); |
|---|
| 964 | +} |
|---|
| 965 | + |
|---|
| 966 | +static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) |
|---|
| 967 | + |
|---|
| 968 | +{ |
|---|
| 969 | + return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), |
|---|
| 970 | + &vq->used->flags); |
|---|
| 971 | +} |
|---|
| 972 | + |
|---|
| 973 | +static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) |
|---|
| 974 | + |
|---|
| 975 | +{ |
|---|
| 976 | + return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), |
|---|
| 977 | + &vq->used->idx); |
|---|
| 978 | +} |
|---|
| 901 | 979 | |
|---|
| 902 | 980 | #define vhost_get_user(vq, x, ptr, type) \ |
|---|
| 903 | 981 | ({ \ |
|---|
| .. | .. |
|---|
| 937 | 1015 | mutex_unlock(&d->vqs[i]->mutex); |
|---|
| 938 | 1016 | } |
|---|
| 939 | 1017 | |
|---|
| 940 | | -static int vhost_new_umem_range(struct vhost_umem *umem, |
|---|
| 941 | | - u64 start, u64 size, u64 end, |
|---|
| 942 | | - u64 userspace_addr, int perm) |
|---|
| 1018 | +static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, |
|---|
| 1019 | + __virtio16 *idx) |
|---|
| 943 | 1020 | { |
|---|
| 944 | | - struct vhost_umem_node *tmp, *node; |
|---|
| 945 | | - |
|---|
| 946 | | - if (!size) |
|---|
| 947 | | - return -EFAULT; |
|---|
| 948 | | - |
|---|
| 949 | | - node = kmalloc(sizeof(*node), GFP_ATOMIC); |
|---|
| 950 | | - if (!node) |
|---|
| 951 | | - return -ENOMEM; |
|---|
| 952 | | - |
|---|
| 953 | | - if (umem->numem == max_iotlb_entries) { |
|---|
| 954 | | - tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link); |
|---|
| 955 | | - vhost_umem_free(umem, tmp); |
|---|
| 956 | | - } |
|---|
| 957 | | - |
|---|
| 958 | | - node->start = start; |
|---|
| 959 | | - node->size = size; |
|---|
| 960 | | - node->last = end; |
|---|
| 961 | | - node->userspace_addr = userspace_addr; |
|---|
| 962 | | - node->perm = perm; |
|---|
| 963 | | - INIT_LIST_HEAD(&node->link); |
|---|
| 964 | | - list_add_tail(&node->link, &umem->umem_list); |
|---|
| 965 | | - vhost_umem_interval_tree_insert(node, &umem->umem_tree); |
|---|
| 966 | | - umem->numem++; |
|---|
| 967 | | - |
|---|
| 968 | | - return 0; |
|---|
| 1021 | + return vhost_get_avail(vq, *idx, &vq->avail->idx); |
|---|
| 969 | 1022 | } |
|---|
| 970 | 1023 | |
|---|
| 971 | | -static void vhost_del_umem_range(struct vhost_umem *umem, |
|---|
| 972 | | - u64 start, u64 end) |
|---|
| 1024 | +static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, |
|---|
| 1025 | + __virtio16 *head, int idx) |
|---|
| 973 | 1026 | { |
|---|
| 974 | | - struct vhost_umem_node *node; |
|---|
| 1027 | + return vhost_get_avail(vq, *head, |
|---|
| 1028 | + &vq->avail->ring[idx & (vq->num - 1)]); |
|---|
| 1029 | +} |
|---|
| 975 | 1030 | |
|---|
| 976 | | - while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, |
|---|
| 977 | | - start, end))) |
|---|
| 978 | | - vhost_umem_free(umem, node); |
|---|
| 1031 | +static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, |
|---|
| 1032 | + __virtio16 *flags) |
|---|
| 1033 | +{ |
|---|
| 1034 | + return vhost_get_avail(vq, *flags, &vq->avail->flags); |
|---|
| 1035 | +} |
|---|
| 1036 | + |
|---|
| 1037 | +static inline int vhost_get_used_event(struct vhost_virtqueue *vq, |
|---|
| 1038 | + __virtio16 *event) |
|---|
| 1039 | +{ |
|---|
| 1040 | + return vhost_get_avail(vq, *event, vhost_used_event(vq)); |
|---|
| 1041 | +} |
|---|
| 1042 | + |
|---|
| 1043 | +static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, |
|---|
| 1044 | + __virtio16 *idx) |
|---|
| 1045 | +{ |
|---|
| 1046 | + return vhost_get_used(vq, *idx, &vq->used->idx); |
|---|
| 1047 | +} |
|---|
| 1048 | + |
|---|
| 1049 | +static inline int vhost_get_desc(struct vhost_virtqueue *vq, |
|---|
| 1050 | + struct vring_desc *desc, int idx) |
|---|
| 1051 | +{ |
|---|
| 1052 | + return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); |
|---|
| 979 | 1053 | } |
|---|
| 980 | 1054 | |
|---|
| 981 | 1055 | static void vhost_iotlb_notify_vq(struct vhost_dev *d, |
|---|
| .. | .. |
|---|
| 1008 | 1082 | return false; |
|---|
| 1009 | 1083 | |
|---|
| 1010 | 1084 | if ((access & VHOST_ACCESS_RO) && |
|---|
| 1011 | | - !access_ok(VERIFY_READ, (void __user *)a, size)) |
|---|
| 1085 | + !access_ok((void __user *)a, size)) |
|---|
| 1012 | 1086 | return false; |
|---|
| 1013 | 1087 | if ((access & VHOST_ACCESS_WO) && |
|---|
| 1014 | | - !access_ok(VERIFY_WRITE, (void __user *)a, size)) |
|---|
| 1088 | + !access_ok((void __user *)a, size)) |
|---|
| 1015 | 1089 | return false; |
|---|
| 1016 | 1090 | return true; |
|---|
| 1017 | 1091 | } |
|---|
| .. | .. |
|---|
| 1034 | 1108 | break; |
|---|
| 1035 | 1109 | } |
|---|
| 1036 | 1110 | vhost_vq_meta_reset(dev); |
|---|
| 1037 | | - if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, |
|---|
| 1038 | | - msg->iova + msg->size - 1, |
|---|
| 1039 | | - msg->uaddr, msg->perm)) { |
|---|
| 1111 | + if (vhost_iotlb_add_range(dev->iotlb, msg->iova, |
|---|
| 1112 | + msg->iova + msg->size - 1, |
|---|
| 1113 | + msg->uaddr, msg->perm)) { |
|---|
| 1040 | 1114 | ret = -ENOMEM; |
|---|
| 1041 | 1115 | break; |
|---|
| 1042 | 1116 | } |
|---|
| .. | .. |
|---|
| 1048 | 1122 | break; |
|---|
| 1049 | 1123 | } |
|---|
| 1050 | 1124 | vhost_vq_meta_reset(dev); |
|---|
| 1051 | | - vhost_del_umem_range(dev->iotlb, msg->iova, |
|---|
| 1052 | | - msg->iova + msg->size - 1); |
|---|
| 1125 | + vhost_iotlb_del_range(dev->iotlb, msg->iova, |
|---|
| 1126 | + msg->iova + msg->size - 1); |
|---|
| 1053 | 1127 | break; |
|---|
| 1054 | 1128 | default: |
|---|
| 1055 | 1129 | ret = -EINVAL; |
|---|
| .. | .. |
|---|
| 1095 | 1169 | ret = -EINVAL; |
|---|
| 1096 | 1170 | goto done; |
|---|
| 1097 | 1171 | } |
|---|
| 1098 | | - if (vhost_process_iotlb_msg(dev, &msg)) { |
|---|
| 1172 | + |
|---|
| 1173 | + if (dev->msg_handler) |
|---|
| 1174 | + ret = dev->msg_handler(dev, &msg); |
|---|
| 1175 | + else |
|---|
| 1176 | + ret = vhost_process_iotlb_msg(dev, &msg); |
|---|
| 1177 | + if (ret) { |
|---|
| 1099 | 1178 | ret = -EFAULT; |
|---|
| 1100 | 1179 | goto done; |
|---|
| 1101 | 1180 | } |
|---|
| .. | .. |
|---|
| 1217 | 1296 | } |
|---|
| 1218 | 1297 | |
|---|
| 1219 | 1298 | static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, |
|---|
| 1220 | | - struct vring_desc __user *desc, |
|---|
| 1221 | | - struct vring_avail __user *avail, |
|---|
| 1222 | | - struct vring_used __user *used) |
|---|
| 1299 | + vring_desc_t __user *desc, |
|---|
| 1300 | + vring_avail_t __user *avail, |
|---|
| 1301 | + vring_used_t __user *used) |
|---|
| 1223 | 1302 | |
|---|
| 1224 | 1303 | { |
|---|
| 1225 | | - size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
|---|
| 1304 | + /* If an IOTLB device is present, the vring addresses are |
|---|
| 1305 | + * GIOVAs. Access validation occurs at prefetch time. */ |
|---|
| 1306 | + if (vq->iotlb) |
|---|
| 1307 | + return true; |
|---|
| 1226 | 1308 | |
|---|
| 1227 | | - return access_ok(VERIFY_READ, desc, num * sizeof *desc) && |
|---|
| 1228 | | - access_ok(VERIFY_READ, avail, |
|---|
| 1229 | | - sizeof *avail + num * sizeof *avail->ring + s) && |
|---|
| 1230 | | - access_ok(VERIFY_WRITE, used, |
|---|
| 1231 | | - sizeof *used + num * sizeof *used->ring + s); |
|---|
| 1309 | + return access_ok(desc, vhost_get_desc_size(vq, num)) && |
|---|
| 1310 | + access_ok(avail, vhost_get_avail_size(vq, num)) && |
|---|
| 1311 | + access_ok(used, vhost_get_used_size(vq, num)); |
|---|
| 1232 | 1312 | } |
|---|
| 1233 | 1313 | |
|---|
| 1234 | 1314 | static void vhost_vq_meta_update(struct vhost_virtqueue *vq, |
|---|
| 1235 | | - const struct vhost_umem_node *node, |
|---|
| 1315 | + const struct vhost_iotlb_map *map, |
|---|
| 1236 | 1316 | int type) |
|---|
| 1237 | 1317 | { |
|---|
| 1238 | 1318 | int access = (type == VHOST_ADDR_USED) ? |
|---|
| 1239 | 1319 | VHOST_ACCESS_WO : VHOST_ACCESS_RO; |
|---|
| 1240 | 1320 | |
|---|
| 1241 | | - if (likely(node->perm & access)) |
|---|
| 1242 | | - vq->meta_iotlb[type] = node; |
|---|
| 1321 | + if (likely(map->perm & access)) |
|---|
| 1322 | + vq->meta_iotlb[type] = map; |
|---|
| 1243 | 1323 | } |
|---|
| 1244 | 1324 | |
|---|
| 1245 | 1325 | static bool iotlb_access_ok(struct vhost_virtqueue *vq, |
|---|
| 1246 | 1326 | int access, u64 addr, u64 len, int type) |
|---|
| 1247 | 1327 | { |
|---|
| 1248 | | - const struct vhost_umem_node *node; |
|---|
| 1249 | | - struct vhost_umem *umem = vq->iotlb; |
|---|
| 1328 | + const struct vhost_iotlb_map *map; |
|---|
| 1329 | + struct vhost_iotlb *umem = vq->iotlb; |
|---|
| 1250 | 1330 | u64 s = 0, size, orig_addr = addr, last = addr + len - 1; |
|---|
| 1251 | 1331 | |
|---|
| 1252 | 1332 | if (vhost_vq_meta_fetch(vq, addr, len, type)) |
|---|
| 1253 | 1333 | return true; |
|---|
| 1254 | 1334 | |
|---|
| 1255 | 1335 | while (len > s) { |
|---|
| 1256 | | - node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, |
|---|
| 1257 | | - addr, |
|---|
| 1258 | | - last); |
|---|
| 1259 | | - if (node == NULL || node->start > addr) { |
|---|
| 1336 | + map = vhost_iotlb_itree_first(umem, addr, last); |
|---|
| 1337 | + if (map == NULL || map->start > addr) { |
|---|
| 1260 | 1338 | vhost_iotlb_miss(vq, addr, access); |
|---|
| 1261 | 1339 | return false; |
|---|
| 1262 | | - } else if (!(node->perm & access)) { |
|---|
| 1340 | + } else if (!(map->perm & access)) { |
|---|
| 1263 | 1341 | /* Report the possible access violation by |
|---|
| 1264 | 1342 | * request another translation from userspace. |
|---|
| 1265 | 1343 | */ |
|---|
| 1266 | 1344 | return false; |
|---|
| 1267 | 1345 | } |
|---|
| 1268 | 1346 | |
|---|
| 1269 | | - size = node->size - addr + node->start; |
|---|
| 1347 | + size = map->size - addr + map->start; |
|---|
| 1270 | 1348 | |
|---|
| 1271 | 1349 | if (orig_addr == addr && size >= len) |
|---|
| 1272 | | - vhost_vq_meta_update(vq, node, type); |
|---|
| 1350 | + vhost_vq_meta_update(vq, map, type); |
|---|
| 1273 | 1351 | |
|---|
| 1274 | 1352 | s += size; |
|---|
| 1275 | 1353 | addr += size; |
|---|
| .. | .. |
|---|
| 1278 | 1356 | return true; |
|---|
| 1279 | 1357 | } |
|---|
| 1280 | 1358 | |
|---|
| 1281 | | -int vq_iotlb_prefetch(struct vhost_virtqueue *vq) |
|---|
| 1359 | +int vq_meta_prefetch(struct vhost_virtqueue *vq) |
|---|
| 1282 | 1360 | { |
|---|
| 1283 | | - size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
|---|
| 1284 | 1361 | unsigned int num = vq->num; |
|---|
| 1285 | 1362 | |
|---|
| 1286 | 1363 | if (!vq->iotlb) |
|---|
| 1287 | 1364 | return 1; |
|---|
| 1288 | 1365 | |
|---|
| 1289 | | - return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, |
|---|
| 1290 | | - num * sizeof(*vq->desc), VHOST_ADDR_DESC) && |
|---|
| 1291 | | - iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, |
|---|
| 1292 | | - sizeof *vq->avail + |
|---|
| 1293 | | - num * sizeof(*vq->avail->ring) + s, |
|---|
| 1366 | + return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, |
|---|
| 1367 | + vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && |
|---|
| 1368 | + iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, |
|---|
| 1369 | + vhost_get_avail_size(vq, num), |
|---|
| 1294 | 1370 | VHOST_ADDR_AVAIL) && |
|---|
| 1295 | | - iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, |
|---|
| 1296 | | - sizeof *vq->used + |
|---|
| 1297 | | - num * sizeof(*vq->used->ring) + s, |
|---|
| 1298 | | - VHOST_ADDR_USED); |
|---|
| 1371 | + iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, |
|---|
| 1372 | + vhost_get_used_size(vq, num), VHOST_ADDR_USED); |
|---|
| 1299 | 1373 | } |
|---|
| 1300 | | -EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); |
|---|
| 1374 | +EXPORT_SYMBOL_GPL(vq_meta_prefetch); |
|---|
| 1301 | 1375 | |
|---|
| 1302 | 1376 | /* Can we log writes? */ |
|---|
| 1303 | 1377 | /* Caller should have device mutex but not vq mutex */ |
|---|
| .. | .. |
|---|
| 1307 | 1381 | } |
|---|
| 1308 | 1382 | EXPORT_SYMBOL_GPL(vhost_log_access_ok); |
|---|
| 1309 | 1383 | |
|---|
| 1384 | +static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, |
|---|
| 1385 | + void __user *log_base, |
|---|
| 1386 | + bool log_used, |
|---|
| 1387 | + u64 log_addr) |
|---|
| 1388 | +{ |
|---|
| 1389 | + /* If an IOTLB device is present, log_addr is a GIOVA that |
|---|
| 1390 | + * will never be logged by log_used(). */ |
|---|
| 1391 | + if (vq->iotlb) |
|---|
| 1392 | + return true; |
|---|
| 1393 | + |
|---|
| 1394 | + return !log_used || log_access_ok(log_base, log_addr, |
|---|
| 1395 | + vhost_get_used_size(vq, vq->num)); |
|---|
| 1396 | +} |
|---|
| 1397 | + |
|---|
| 1310 | 1398 | /* Verify access for write logging. */ |
|---|
| 1311 | 1399 | /* Caller should have vq mutex and device mutex */ |
|---|
| 1312 | 1400 | static bool vq_log_access_ok(struct vhost_virtqueue *vq, |
|---|
| 1313 | 1401 | void __user *log_base) |
|---|
| 1314 | 1402 | { |
|---|
| 1315 | | - size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
|---|
| 1316 | | - |
|---|
| 1317 | 1403 | return vq_memory_access_ok(log_base, vq->umem, |
|---|
| 1318 | 1404 | vhost_has_feature(vq, VHOST_F_LOG_ALL)) && |
|---|
| 1319 | | - (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
|---|
| 1320 | | - sizeof *vq->used + |
|---|
| 1321 | | - vq->num * sizeof *vq->used->ring + s)); |
|---|
| 1405 | + vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); |
|---|
| 1322 | 1406 | } |
|---|
| 1323 | 1407 | |
|---|
| 1324 | 1408 | /* Can we start vq? */ |
|---|
| .. | .. |
|---|
| 1328 | 1412 | if (!vq_log_access_ok(vq, vq->log_base)) |
|---|
| 1329 | 1413 | return false; |
|---|
| 1330 | 1414 | |
|---|
| 1331 | | - /* Access validation occurs at prefetch time with IOTLB */ |
|---|
| 1332 | | - if (vq->iotlb) |
|---|
| 1333 | | - return true; |
|---|
| 1334 | | - |
|---|
| 1335 | 1415 | return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); |
|---|
| 1336 | 1416 | } |
|---|
| 1337 | 1417 | EXPORT_SYMBOL_GPL(vhost_vq_access_ok); |
|---|
| 1338 | | - |
|---|
| 1339 | | -static struct vhost_umem *vhost_umem_alloc(void) |
|---|
| 1340 | | -{ |
|---|
| 1341 | | - struct vhost_umem *umem = kvzalloc(sizeof(*umem), GFP_KERNEL); |
|---|
| 1342 | | - |
|---|
| 1343 | | - if (!umem) |
|---|
| 1344 | | - return NULL; |
|---|
| 1345 | | - |
|---|
| 1346 | | - umem->umem_tree = RB_ROOT_CACHED; |
|---|
| 1347 | | - umem->numem = 0; |
|---|
| 1348 | | - INIT_LIST_HEAD(&umem->umem_list); |
|---|
| 1349 | | - |
|---|
| 1350 | | - return umem; |
|---|
| 1351 | | -} |
|---|
| 1352 | 1418 | |
|---|
| 1353 | 1419 | static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) |
|---|
| 1354 | 1420 | { |
|---|
| 1355 | 1421 | struct vhost_memory mem, *newmem; |
|---|
| 1356 | 1422 | struct vhost_memory_region *region; |
|---|
| 1357 | | - struct vhost_umem *newumem, *oldumem; |
|---|
| 1423 | + struct vhost_iotlb *newumem, *oldumem; |
|---|
| 1358 | 1424 | unsigned long size = offsetof(struct vhost_memory, regions); |
|---|
| 1359 | 1425 | int i; |
|---|
| 1360 | 1426 | |
|---|
| .. | .. |
|---|
| 1371 | 1437 | |
|---|
| 1372 | 1438 | memcpy(newmem, &mem, size); |
|---|
| 1373 | 1439 | if (copy_from_user(newmem->regions, m->regions, |
|---|
| 1374 | | - mem.nregions * sizeof *m->regions)) { |
|---|
| 1440 | + flex_array_size(newmem, regions, mem.nregions))) { |
|---|
| 1375 | 1441 | kvfree(newmem); |
|---|
| 1376 | 1442 | return -EFAULT; |
|---|
| 1377 | 1443 | } |
|---|
| 1378 | 1444 | |
|---|
| 1379 | | - newumem = vhost_umem_alloc(); |
|---|
| 1445 | + newumem = iotlb_alloc(); |
|---|
| 1380 | 1446 | if (!newumem) { |
|---|
| 1381 | 1447 | kvfree(newmem); |
|---|
| 1382 | 1448 | return -ENOMEM; |
|---|
| .. | .. |
|---|
| 1385 | 1451 | for (region = newmem->regions; |
|---|
| 1386 | 1452 | region < newmem->regions + mem.nregions; |
|---|
| 1387 | 1453 | region++) { |
|---|
| 1388 | | - if (vhost_new_umem_range(newumem, |
|---|
| 1389 | | - region->guest_phys_addr, |
|---|
| 1390 | | - region->memory_size, |
|---|
| 1391 | | - region->guest_phys_addr + |
|---|
| 1392 | | - region->memory_size - 1, |
|---|
| 1393 | | - region->userspace_addr, |
|---|
| 1394 | | - VHOST_ACCESS_RW)) |
|---|
| 1454 | + if (vhost_iotlb_add_range(newumem, |
|---|
| 1455 | + region->guest_phys_addr, |
|---|
| 1456 | + region->guest_phys_addr + |
|---|
| 1457 | + region->memory_size - 1, |
|---|
| 1458 | + region->userspace_addr, |
|---|
| 1459 | + VHOST_MAP_RW)) |
|---|
| 1395 | 1460 | goto err; |
|---|
| 1396 | 1461 | } |
|---|
| 1397 | 1462 | |
|---|
| .. | .. |
|---|
| 1409 | 1474 | } |
|---|
| 1410 | 1475 | |
|---|
| 1411 | 1476 | kvfree(newmem); |
|---|
| 1412 | | - vhost_umem_clean(oldumem); |
|---|
| 1477 | + vhost_iotlb_free(oldumem); |
|---|
| 1413 | 1478 | return 0; |
|---|
| 1414 | 1479 | |
|---|
| 1415 | 1480 | err: |
|---|
| 1416 | | - vhost_umem_clean(newumem); |
|---|
| 1481 | + vhost_iotlb_free(newumem); |
|---|
| 1417 | 1482 | kvfree(newmem); |
|---|
| 1418 | 1483 | return -EFAULT; |
|---|
| 1419 | 1484 | } |
|---|
| 1420 | 1485 | |
|---|
| 1486 | +static long vhost_vring_set_num(struct vhost_dev *d, |
|---|
| 1487 | + struct vhost_virtqueue *vq, |
|---|
| 1488 | + void __user *argp) |
|---|
| 1489 | +{ |
|---|
| 1490 | + struct vhost_vring_state s; |
|---|
| 1491 | + |
|---|
| 1492 | + /* Resizing ring with an active backend? |
|---|
| 1493 | + * You don't want to do that. */ |
|---|
| 1494 | + if (vq->private_data) |
|---|
| 1495 | + return -EBUSY; |
|---|
| 1496 | + |
|---|
| 1497 | + if (copy_from_user(&s, argp, sizeof s)) |
|---|
| 1498 | + return -EFAULT; |
|---|
| 1499 | + |
|---|
| 1500 | + if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) |
|---|
| 1501 | + return -EINVAL; |
|---|
| 1502 | + vq->num = s.num; |
|---|
| 1503 | + |
|---|
| 1504 | + return 0; |
|---|
| 1505 | +} |
|---|
| 1506 | + |
|---|
| 1507 | +static long vhost_vring_set_addr(struct vhost_dev *d, |
|---|
| 1508 | + struct vhost_virtqueue *vq, |
|---|
| 1509 | + void __user *argp) |
|---|
| 1510 | +{ |
|---|
| 1511 | + struct vhost_vring_addr a; |
|---|
| 1512 | + |
|---|
| 1513 | + if (copy_from_user(&a, argp, sizeof a)) |
|---|
| 1514 | + return -EFAULT; |
|---|
| 1515 | + if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) |
|---|
| 1516 | + return -EOPNOTSUPP; |
|---|
| 1517 | + |
|---|
| 1518 | + /* For 32bit, verify that the top 32bits of the user |
|---|
| 1519 | + data are set to zero. */ |
|---|
| 1520 | + if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || |
|---|
| 1521 | + (u64)(unsigned long)a.used_user_addr != a.used_user_addr || |
|---|
| 1522 | + (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) |
|---|
| 1523 | + return -EFAULT; |
|---|
| 1524 | + |
|---|
| 1525 | + /* Make sure it's safe to cast pointers to vring types. */ |
|---|
| 1526 | + BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); |
|---|
| 1527 | + BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); |
|---|
| 1528 | + if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || |
|---|
| 1529 | + (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || |
|---|
| 1530 | + (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) |
|---|
| 1531 | + return -EINVAL; |
|---|
| 1532 | + |
|---|
| 1533 | + /* We only verify access here if backend is configured. |
|---|
| 1534 | + * If it is not, we don't as size might not have been setup. |
|---|
| 1535 | + * We will verify when backend is configured. */ |
|---|
| 1536 | + if (vq->private_data) { |
|---|
| 1537 | + if (!vq_access_ok(vq, vq->num, |
|---|
| 1538 | + (void __user *)(unsigned long)a.desc_user_addr, |
|---|
| 1539 | + (void __user *)(unsigned long)a.avail_user_addr, |
|---|
| 1540 | + (void __user *)(unsigned long)a.used_user_addr)) |
|---|
| 1541 | + return -EINVAL; |
|---|
| 1542 | + |
|---|
| 1543 | + /* Also validate log access for used ring if enabled. */ |
|---|
| 1544 | + if (!vq_log_used_access_ok(vq, vq->log_base, |
|---|
| 1545 | + a.flags & (0x1 << VHOST_VRING_F_LOG), |
|---|
| 1546 | + a.log_guest_addr)) |
|---|
| 1547 | + return -EINVAL; |
|---|
| 1548 | + } |
|---|
| 1549 | + |
|---|
| 1550 | + vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); |
|---|
| 1551 | + vq->desc = (void __user *)(unsigned long)a.desc_user_addr; |
|---|
| 1552 | + vq->avail = (void __user *)(unsigned long)a.avail_user_addr; |
|---|
| 1553 | + vq->log_addr = a.log_guest_addr; |
|---|
| 1554 | + vq->used = (void __user *)(unsigned long)a.used_user_addr; |
|---|
| 1555 | + |
|---|
| 1556 | + return 0; |
|---|
| 1557 | +} |
|---|
| 1558 | + |
|---|
| 1559 | +static long vhost_vring_set_num_addr(struct vhost_dev *d, |
|---|
| 1560 | + struct vhost_virtqueue *vq, |
|---|
| 1561 | + unsigned int ioctl, |
|---|
| 1562 | + void __user *argp) |
|---|
| 1563 | +{ |
|---|
| 1564 | + long r; |
|---|
| 1565 | + |
|---|
| 1566 | + mutex_lock(&vq->mutex); |
|---|
| 1567 | + |
|---|
| 1568 | + switch (ioctl) { |
|---|
| 1569 | + case VHOST_SET_VRING_NUM: |
|---|
| 1570 | + r = vhost_vring_set_num(d, vq, argp); |
|---|
| 1571 | + break; |
|---|
| 1572 | + case VHOST_SET_VRING_ADDR: |
|---|
| 1573 | + r = vhost_vring_set_addr(d, vq, argp); |
|---|
| 1574 | + break; |
|---|
| 1575 | + default: |
|---|
| 1576 | + BUG(); |
|---|
| 1577 | + } |
|---|
| 1578 | + |
|---|
| 1579 | + mutex_unlock(&vq->mutex); |
|---|
| 1580 | + |
|---|
| 1581 | + return r; |
|---|
| 1582 | +} |
|---|
| 1421 | 1583 | long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) |
|---|
| 1422 | 1584 | { |
|---|
| 1423 | 1585 | struct file *eventfp, *filep = NULL; |
|---|
| .. | .. |
|---|
| 1427 | 1589 | struct vhost_virtqueue *vq; |
|---|
| 1428 | 1590 | struct vhost_vring_state s; |
|---|
| 1429 | 1591 | struct vhost_vring_file f; |
|---|
| 1430 | | - struct vhost_vring_addr a; |
|---|
| 1431 | 1592 | u32 idx; |
|---|
| 1432 | 1593 | long r; |
|---|
| 1433 | 1594 | |
|---|
| .. | .. |
|---|
| 1440 | 1601 | idx = array_index_nospec(idx, d->nvqs); |
|---|
| 1441 | 1602 | vq = d->vqs[idx]; |
|---|
| 1442 | 1603 | |
|---|
| 1604 | + if (ioctl == VHOST_SET_VRING_NUM || |
|---|
| 1605 | + ioctl == VHOST_SET_VRING_ADDR) { |
|---|
| 1606 | + return vhost_vring_set_num_addr(d, vq, ioctl, argp); |
|---|
| 1607 | + } |
|---|
| 1608 | + |
|---|
| 1443 | 1609 | mutex_lock(&vq->mutex); |
|---|
| 1444 | 1610 | |
|---|
| 1445 | 1611 | switch (ioctl) { |
|---|
| 1446 | | - case VHOST_SET_VRING_NUM: |
|---|
| 1447 | | - /* Resizing ring with an active backend? |
|---|
| 1448 | | - * You don't want to do that. */ |
|---|
| 1449 | | - if (vq->private_data) { |
|---|
| 1450 | | - r = -EBUSY; |
|---|
| 1451 | | - break; |
|---|
| 1452 | | - } |
|---|
| 1453 | | - if (copy_from_user(&s, argp, sizeof s)) { |
|---|
| 1454 | | - r = -EFAULT; |
|---|
| 1455 | | - break; |
|---|
| 1456 | | - } |
|---|
| 1457 | | - if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { |
|---|
| 1458 | | - r = -EINVAL; |
|---|
| 1459 | | - break; |
|---|
| 1460 | | - } |
|---|
| 1461 | | - vq->num = s.num; |
|---|
| 1462 | | - break; |
|---|
| 1463 | 1612 | case VHOST_SET_VRING_BASE: |
|---|
| 1464 | 1613 | /* Moving base with an active backend? |
|---|
| 1465 | 1614 | * You don't want to do that. */ |
|---|
| .. | .. |
|---|
| 1485 | 1634 | if (copy_to_user(argp, &s, sizeof s)) |
|---|
| 1486 | 1635 | r = -EFAULT; |
|---|
| 1487 | 1636 | break; |
|---|
| 1488 | | - case VHOST_SET_VRING_ADDR: |
|---|
| 1489 | | - if (copy_from_user(&a, argp, sizeof a)) { |
|---|
| 1490 | | - r = -EFAULT; |
|---|
| 1491 | | - break; |
|---|
| 1492 | | - } |
|---|
| 1493 | | - if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { |
|---|
| 1494 | | - r = -EOPNOTSUPP; |
|---|
| 1495 | | - break; |
|---|
| 1496 | | - } |
|---|
| 1497 | | - /* For 32bit, verify that the top 32bits of the user |
|---|
| 1498 | | - data are set to zero. */ |
|---|
| 1499 | | - if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || |
|---|
| 1500 | | - (u64)(unsigned long)a.used_user_addr != a.used_user_addr || |
|---|
| 1501 | | - (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { |
|---|
| 1502 | | - r = -EFAULT; |
|---|
| 1503 | | - break; |
|---|
| 1504 | | - } |
|---|
| 1505 | | - |
|---|
| 1506 | | - /* Make sure it's safe to cast pointers to vring types. */ |
|---|
| 1507 | | - BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); |
|---|
| 1508 | | - BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); |
|---|
| 1509 | | - if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || |
|---|
| 1510 | | - (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || |
|---|
| 1511 | | - (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) { |
|---|
| 1512 | | - r = -EINVAL; |
|---|
| 1513 | | - break; |
|---|
| 1514 | | - } |
|---|
| 1515 | | - |
|---|
| 1516 | | - /* We only verify access here if backend is configured. |
|---|
| 1517 | | - * If it is not, we don't as size might not have been setup. |
|---|
| 1518 | | - * We will verify when backend is configured. */ |
|---|
| 1519 | | - if (vq->private_data) { |
|---|
| 1520 | | - if (!vq_access_ok(vq, vq->num, |
|---|
| 1521 | | - (void __user *)(unsigned long)a.desc_user_addr, |
|---|
| 1522 | | - (void __user *)(unsigned long)a.avail_user_addr, |
|---|
| 1523 | | - (void __user *)(unsigned long)a.used_user_addr)) { |
|---|
| 1524 | | - r = -EINVAL; |
|---|
| 1525 | | - break; |
|---|
| 1526 | | - } |
|---|
| 1527 | | - |
|---|
| 1528 | | - /* Also validate log access for used ring if enabled. */ |
|---|
| 1529 | | - if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && |
|---|
| 1530 | | - !log_access_ok(vq->log_base, a.log_guest_addr, |
|---|
| 1531 | | - sizeof *vq->used + |
|---|
| 1532 | | - vq->num * sizeof *vq->used->ring)) { |
|---|
| 1533 | | - r = -EINVAL; |
|---|
| 1534 | | - break; |
|---|
| 1535 | | - } |
|---|
| 1536 | | - } |
|---|
| 1537 | | - |
|---|
| 1538 | | - vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); |
|---|
| 1539 | | - vq->desc = (void __user *)(unsigned long)a.desc_user_addr; |
|---|
| 1540 | | - vq->avail = (void __user *)(unsigned long)a.avail_user_addr; |
|---|
| 1541 | | - vq->log_addr = a.log_guest_addr; |
|---|
| 1542 | | - vq->used = (void __user *)(unsigned long)a.used_user_addr; |
|---|
| 1543 | | - break; |
|---|
| 1544 | 1637 | case VHOST_SET_VRING_KICK: |
|---|
| 1545 | 1638 | if (copy_from_user(&f, argp, sizeof f)) { |
|---|
| 1546 | 1639 | r = -EFAULT; |
|---|
| 1547 | 1640 | break; |
|---|
| 1548 | 1641 | } |
|---|
| 1549 | | - eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); |
|---|
| 1642 | + eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); |
|---|
| 1550 | 1643 | if (IS_ERR(eventfp)) { |
|---|
| 1551 | 1644 | r = PTR_ERR(eventfp); |
|---|
| 1552 | 1645 | break; |
|---|
| .. | .. |
|---|
| 1562 | 1655 | r = -EFAULT; |
|---|
| 1563 | 1656 | break; |
|---|
| 1564 | 1657 | } |
|---|
| 1565 | | - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); |
|---|
| 1658 | + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); |
|---|
| 1566 | 1659 | if (IS_ERR(ctx)) { |
|---|
| 1567 | 1660 | r = PTR_ERR(ctx); |
|---|
| 1568 | 1661 | break; |
|---|
| 1569 | 1662 | } |
|---|
| 1570 | | - swap(ctx, vq->call_ctx); |
|---|
| 1663 | + |
|---|
| 1664 | + swap(ctx, vq->call_ctx.ctx); |
|---|
| 1571 | 1665 | break; |
|---|
| 1572 | 1666 | case VHOST_SET_VRING_ERR: |
|---|
| 1573 | 1667 | if (copy_from_user(&f, argp, sizeof f)) { |
|---|
| 1574 | 1668 | r = -EFAULT; |
|---|
| 1575 | 1669 | break; |
|---|
| 1576 | 1670 | } |
|---|
| 1577 | | - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); |
|---|
| 1671 | + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); |
|---|
| 1578 | 1672 | if (IS_ERR(ctx)) { |
|---|
| 1579 | 1673 | r = PTR_ERR(ctx); |
|---|
| 1580 | 1674 | break; |
|---|
| .. | .. |
|---|
| 1625 | 1719 | |
|---|
| 1626 | 1720 | int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) |
|---|
| 1627 | 1721 | { |
|---|
| 1628 | | - struct vhost_umem *niotlb, *oiotlb; |
|---|
| 1722 | + struct vhost_iotlb *niotlb, *oiotlb; |
|---|
| 1629 | 1723 | int i; |
|---|
| 1630 | 1724 | |
|---|
| 1631 | | - niotlb = vhost_umem_alloc(); |
|---|
| 1725 | + niotlb = iotlb_alloc(); |
|---|
| 1632 | 1726 | if (!niotlb) |
|---|
| 1633 | 1727 | return -ENOMEM; |
|---|
| 1634 | 1728 | |
|---|
| .. | .. |
|---|
| 1644 | 1738 | mutex_unlock(&vq->mutex); |
|---|
| 1645 | 1739 | } |
|---|
| 1646 | 1740 | |
|---|
| 1647 | | - vhost_umem_clean(oiotlb); |
|---|
| 1741 | + vhost_iotlb_free(oiotlb); |
|---|
| 1648 | 1742 | |
|---|
| 1649 | 1743 | return 0; |
|---|
| 1650 | 1744 | } |
|---|
| .. | .. |
|---|
| 1699 | 1793 | r = get_user(fd, (int __user *)argp); |
|---|
| 1700 | 1794 | if (r < 0) |
|---|
| 1701 | 1795 | break; |
|---|
| 1702 | | - ctx = fd == -1 ? NULL : eventfd_ctx_fdget(fd); |
|---|
| 1796 | + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); |
|---|
| 1703 | 1797 | if (IS_ERR(ctx)) { |
|---|
| 1704 | 1798 | r = PTR_ERR(ctx); |
|---|
| 1705 | 1799 | break; |
|---|
| .. | .. |
|---|
| 1724 | 1818 | |
|---|
| 1725 | 1819 | /* TODO: This is really inefficient. We need something like get_user() |
|---|
| 1726 | 1820 | * (instruction directly accesses the data, with an exception table entry |
|---|
| 1727 | | - * returning -EFAULT). See Documentation/x86/exception-tables.txt. |
|---|
| 1821 | + * returning -EFAULT). See Documentation/x86/exception-tables.rst. |
|---|
| 1728 | 1822 | */ |
|---|
| 1729 | 1823 | static int set_bit_to_user(int nr, void __user *addr) |
|---|
| 1730 | 1824 | { |
|---|
| .. | .. |
|---|
| 1734 | 1828 | int bit = nr + (log % PAGE_SIZE) * 8; |
|---|
| 1735 | 1829 | int r; |
|---|
| 1736 | 1830 | |
|---|
| 1737 | | - r = get_user_pages_fast(log, 1, 1, &page); |
|---|
| 1831 | + r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); |
|---|
| 1738 | 1832 | if (r < 0) |
|---|
| 1739 | 1833 | return r; |
|---|
| 1740 | 1834 | BUG_ON(r != 1); |
|---|
| 1741 | 1835 | base = kmap_atomic(page); |
|---|
| 1742 | 1836 | set_bit(bit, base); |
|---|
| 1743 | 1837 | kunmap_atomic(base); |
|---|
| 1744 | | - set_page_dirty_lock(page); |
|---|
| 1745 | | - put_page(page); |
|---|
| 1838 | + unpin_user_pages_dirty_lock(&page, 1, true); |
|---|
| 1746 | 1839 | return 0; |
|---|
| 1747 | 1840 | } |
|---|
| 1748 | 1841 | |
|---|
| .. | .. |
|---|
| 1774 | 1867 | |
|---|
| 1775 | 1868 | static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) |
|---|
| 1776 | 1869 | { |
|---|
| 1777 | | - struct vhost_umem *umem = vq->umem; |
|---|
| 1778 | | - struct vhost_umem_node *u; |
|---|
| 1870 | + struct vhost_iotlb *umem = vq->umem; |
|---|
| 1871 | + struct vhost_iotlb_map *u; |
|---|
| 1779 | 1872 | u64 start, end, l, min; |
|---|
| 1780 | 1873 | int r; |
|---|
| 1781 | 1874 | bool hit = false; |
|---|
| .. | .. |
|---|
| 1785 | 1878 | /* More than one GPAs can be mapped into a single HVA. So |
|---|
| 1786 | 1879 | * iterate all possible umems here to be safe. |
|---|
| 1787 | 1880 | */ |
|---|
| 1788 | | - list_for_each_entry(u, &umem->umem_list, link) { |
|---|
| 1789 | | - if (u->userspace_addr > hva - 1 + len || |
|---|
| 1790 | | - u->userspace_addr - 1 + u->size < hva) |
|---|
| 1881 | + list_for_each_entry(u, &umem->list, link) { |
|---|
| 1882 | + if (u->addr > hva - 1 + len || |
|---|
| 1883 | + u->addr - 1 + u->size < hva) |
|---|
| 1791 | 1884 | continue; |
|---|
| 1792 | | - start = max(u->userspace_addr, hva); |
|---|
| 1793 | | - end = min(u->userspace_addr - 1 + u->size, |
|---|
| 1794 | | - hva - 1 + len); |
|---|
| 1885 | + start = max(u->addr, hva); |
|---|
| 1886 | + end = min(u->addr - 1 + u->size, hva - 1 + len); |
|---|
| 1795 | 1887 | l = end - start + 1; |
|---|
| 1796 | 1888 | r = log_write(vq->log_base, |
|---|
| 1797 | | - u->start + start - u->userspace_addr, |
|---|
| 1889 | + u->start + start - u->addr, |
|---|
| 1798 | 1890 | l); |
|---|
| 1799 | 1891 | if (r < 0) |
|---|
| 1800 | 1892 | return r; |
|---|
| .. | .. |
|---|
| 1814 | 1906 | |
|---|
| 1815 | 1907 | static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) |
|---|
| 1816 | 1908 | { |
|---|
| 1817 | | - struct iovec iov[64]; |
|---|
| 1909 | + struct iovec *iov = vq->log_iov; |
|---|
| 1818 | 1910 | int i, ret; |
|---|
| 1819 | 1911 | |
|---|
| 1820 | 1912 | if (!vq->iotlb) |
|---|
| .. | .. |
|---|
| 1874 | 1966 | static int vhost_update_used_flags(struct vhost_virtqueue *vq) |
|---|
| 1875 | 1967 | { |
|---|
| 1876 | 1968 | void __user *used; |
|---|
| 1877 | | - if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), |
|---|
| 1878 | | - &vq->used->flags) < 0) |
|---|
| 1969 | + if (vhost_put_used_flags(vq)) |
|---|
| 1879 | 1970 | return -EFAULT; |
|---|
| 1880 | 1971 | if (unlikely(vq->log_used)) { |
|---|
| 1881 | 1972 | /* Make sure the flag is seen before log. */ |
|---|
| .. | .. |
|---|
| 1892 | 1983 | |
|---|
| 1893 | 1984 | static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) |
|---|
| 1894 | 1985 | { |
|---|
| 1895 | | - if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), |
|---|
| 1896 | | - vhost_avail_event(vq))) |
|---|
| 1986 | + if (vhost_put_avail_event(vq)) |
|---|
| 1897 | 1987 | return -EFAULT; |
|---|
| 1898 | 1988 | if (unlikely(vq->log_used)) { |
|---|
| 1899 | 1989 | void __user *used; |
|---|
| .. | .. |
|---|
| 1925 | 2015 | goto err; |
|---|
| 1926 | 2016 | vq->signalled_used_valid = false; |
|---|
| 1927 | 2017 | if (!vq->iotlb && |
|---|
| 1928 | | - !access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { |
|---|
| 2018 | + !access_ok(&vq->used->idx, sizeof vq->used->idx)) { |
|---|
| 1929 | 2019 | r = -EFAULT; |
|---|
| 1930 | 2020 | goto err; |
|---|
| 1931 | 2021 | } |
|---|
| 1932 | | - r = vhost_get_used(vq, last_used_idx, &vq->used->idx); |
|---|
| 2022 | + r = vhost_get_used_idx(vq, &last_used_idx); |
|---|
| 1933 | 2023 | if (r) { |
|---|
| 1934 | 2024 | vq_err(vq, "Can't access used idx at %p\n", |
|---|
| 1935 | 2025 | &vq->used->idx); |
|---|
| .. | .. |
|---|
| 1947 | 2037 | static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, |
|---|
| 1948 | 2038 | struct iovec iov[], int iov_size, int access) |
|---|
| 1949 | 2039 | { |
|---|
| 1950 | | - const struct vhost_umem_node *node; |
|---|
| 2040 | + const struct vhost_iotlb_map *map; |
|---|
| 1951 | 2041 | struct vhost_dev *dev = vq->dev; |
|---|
| 1952 | | - struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem; |
|---|
| 2042 | + struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; |
|---|
| 1953 | 2043 | struct iovec *_iov; |
|---|
| 1954 | 2044 | u64 s = 0; |
|---|
| 1955 | 2045 | int ret = 0; |
|---|
| .. | .. |
|---|
| 1961 | 2051 | break; |
|---|
| 1962 | 2052 | } |
|---|
| 1963 | 2053 | |
|---|
| 1964 | | - node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, |
|---|
| 1965 | | - addr, addr + len - 1); |
|---|
| 1966 | | - if (node == NULL || node->start > addr) { |
|---|
| 2054 | + map = vhost_iotlb_itree_first(umem, addr, addr + len - 1); |
|---|
| 2055 | + if (map == NULL || map->start > addr) { |
|---|
| 1967 | 2056 | if (umem != dev->iotlb) { |
|---|
| 1968 | 2057 | ret = -EFAULT; |
|---|
| 1969 | 2058 | break; |
|---|
| 1970 | 2059 | } |
|---|
| 1971 | 2060 | ret = -EAGAIN; |
|---|
| 1972 | 2061 | break; |
|---|
| 1973 | | - } else if (!(node->perm & access)) { |
|---|
| 2062 | + } else if (!(map->perm & access)) { |
|---|
| 1974 | 2063 | ret = -EPERM; |
|---|
| 1975 | 2064 | break; |
|---|
| 1976 | 2065 | } |
|---|
| 1977 | 2066 | |
|---|
| 1978 | 2067 | _iov = iov + ret; |
|---|
| 1979 | | - size = node->size - addr + node->start; |
|---|
| 2068 | + size = map->size - addr + map->start; |
|---|
| 1980 | 2069 | _iov->iov_len = min((u64)len - s, size); |
|---|
| 1981 | 2070 | _iov->iov_base = (void __user *)(unsigned long) |
|---|
| 1982 | | - (node->userspace_addr + addr - node->start); |
|---|
| 2071 | + (map->addr + addr - map->start); |
|---|
| 1983 | 2072 | s += size; |
|---|
| 1984 | 2073 | addr += size; |
|---|
| 1985 | 2074 | ++ret; |
|---|
| .. | .. |
|---|
| 2035 | 2124 | return ret; |
|---|
| 2036 | 2125 | } |
|---|
| 2037 | 2126 | iov_iter_init(&from, READ, vq->indirect, ret, len); |
|---|
| 2038 | | - |
|---|
| 2039 | | - /* We will use the result as an address to read from, so most |
|---|
| 2040 | | - * architectures only need a compiler barrier here. */ |
|---|
| 2041 | | - read_barrier_depends(); |
|---|
| 2042 | | - |
|---|
| 2043 | 2127 | count = len / sizeof desc; |
|---|
| 2044 | 2128 | /* Buffers are chained via a 16 bit next field, so |
|---|
| 2045 | 2129 | * we can have at most 2^16 of these. */ |
|---|
| .. | .. |
|---|
| 2128 | 2212 | last_avail_idx = vq->last_avail_idx; |
|---|
| 2129 | 2213 | |
|---|
| 2130 | 2214 | if (vq->avail_idx == vq->last_avail_idx) { |
|---|
| 2131 | | - if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { |
|---|
| 2215 | + if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) { |
|---|
| 2132 | 2216 | vq_err(vq, "Failed to access avail idx at %p\n", |
|---|
| 2133 | 2217 | &vq->avail->idx); |
|---|
| 2134 | 2218 | return -EFAULT; |
|---|
| .. | .. |
|---|
| 2155 | 2239 | |
|---|
| 2156 | 2240 | /* Grab the next descriptor number they're advertising, and increment |
|---|
| 2157 | 2241 | * the index we've seen. */ |
|---|
| 2158 | | - if (unlikely(vhost_get_avail(vq, ring_head, |
|---|
| 2159 | | - &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { |
|---|
| 2242 | + if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { |
|---|
| 2160 | 2243 | vq_err(vq, "Failed to read head: idx %d address %p\n", |
|---|
| 2161 | 2244 | last_avail_idx, |
|---|
| 2162 | 2245 | &vq->avail->ring[last_avail_idx % vq->num]); |
|---|
| .. | .. |
|---|
| 2191 | 2274 | i, vq->num, head); |
|---|
| 2192 | 2275 | return -EINVAL; |
|---|
| 2193 | 2276 | } |
|---|
| 2194 | | - ret = vhost_copy_from_user(vq, &desc, vq->desc + i, |
|---|
| 2195 | | - sizeof desc); |
|---|
| 2277 | + ret = vhost_get_desc(vq, &desc, i); |
|---|
| 2196 | 2278 | if (unlikely(ret)) { |
|---|
| 2197 | 2279 | vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", |
|---|
| 2198 | 2280 | i, vq->desc + i); |
|---|
| .. | .. |
|---|
| 2279 | 2361 | struct vring_used_elem *heads, |
|---|
| 2280 | 2362 | unsigned count) |
|---|
| 2281 | 2363 | { |
|---|
| 2282 | | - struct vring_used_elem __user *used; |
|---|
| 2364 | + vring_used_elem_t __user *used; |
|---|
| 2283 | 2365 | u16 old, new; |
|---|
| 2284 | 2366 | int start; |
|---|
| 2285 | 2367 | |
|---|
| 2286 | 2368 | start = vq->last_used_idx & (vq->num - 1); |
|---|
| 2287 | 2369 | used = vq->used->ring + start; |
|---|
| 2288 | | - if (count == 1) { |
|---|
| 2289 | | - if (vhost_put_user(vq, heads[0].id, &used->id)) { |
|---|
| 2290 | | - vq_err(vq, "Failed to write used id"); |
|---|
| 2291 | | - return -EFAULT; |
|---|
| 2292 | | - } |
|---|
| 2293 | | - if (vhost_put_user(vq, heads[0].len, &used->len)) { |
|---|
| 2294 | | - vq_err(vq, "Failed to write used len"); |
|---|
| 2295 | | - return -EFAULT; |
|---|
| 2296 | | - } |
|---|
| 2297 | | - } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) { |
|---|
| 2370 | + if (vhost_put_used(vq, heads, start, count)) { |
|---|
| 2298 | 2371 | vq_err(vq, "Failed to write used"); |
|---|
| 2299 | 2372 | return -EFAULT; |
|---|
| 2300 | 2373 | } |
|---|
| .. | .. |
|---|
| 2336 | 2409 | |
|---|
| 2337 | 2410 | /* Make sure buffer is written before we update index. */ |
|---|
| 2338 | 2411 | smp_wmb(); |
|---|
| 2339 | | - if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), |
|---|
| 2340 | | - &vq->used->idx)) { |
|---|
| 2412 | + if (vhost_put_used_idx(vq)) { |
|---|
| 2341 | 2413 | vq_err(vq, "Failed to increment used idx"); |
|---|
| 2342 | 2414 | return -EFAULT; |
|---|
| 2343 | 2415 | } |
|---|
| .. | .. |
|---|
| 2370 | 2442 | |
|---|
| 2371 | 2443 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
|---|
| 2372 | 2444 | __virtio16 flags; |
|---|
| 2373 | | - if (vhost_get_avail(vq, flags, &vq->avail->flags)) { |
|---|
| 2445 | + if (vhost_get_avail_flags(vq, &flags)) { |
|---|
| 2374 | 2446 | vq_err(vq, "Failed to get flags"); |
|---|
| 2375 | 2447 | return true; |
|---|
| 2376 | 2448 | } |
|---|
| .. | .. |
|---|
| 2384 | 2456 | if (unlikely(!v)) |
|---|
| 2385 | 2457 | return true; |
|---|
| 2386 | 2458 | |
|---|
| 2387 | | - if (vhost_get_avail(vq, event, vhost_used_event(vq))) { |
|---|
| 2459 | + if (vhost_get_used_event(vq, &event)) { |
|---|
| 2388 | 2460 | vq_err(vq, "Failed to get used event idx"); |
|---|
| 2389 | 2461 | return true; |
|---|
| 2390 | 2462 | } |
|---|
| .. | .. |
|---|
| 2395 | 2467 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
|---|
| 2396 | 2468 | { |
|---|
| 2397 | 2469 | /* Signal the Guest tell them we used something up. */ |
|---|
| 2398 | | - if (vq->call_ctx && vhost_notify(dev, vq)) |
|---|
| 2399 | | - eventfd_signal(vq->call_ctx, 1); |
|---|
| 2470 | + if (vq->call_ctx.ctx && vhost_notify(dev, vq)) |
|---|
| 2471 | + eventfd_signal(vq->call_ctx.ctx, 1); |
|---|
| 2400 | 2472 | } |
|---|
| 2401 | 2473 | EXPORT_SYMBOL_GPL(vhost_signal); |
|---|
| 2402 | 2474 | |
|---|
| .. | .. |
|---|
| 2429 | 2501 | if (vq->avail_idx != vq->last_avail_idx) |
|---|
| 2430 | 2502 | return false; |
|---|
| 2431 | 2503 | |
|---|
| 2432 | | - r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); |
|---|
| 2504 | + r = vhost_get_avail_idx(vq, &avail_idx); |
|---|
| 2433 | 2505 | if (unlikely(r)) |
|---|
| 2434 | 2506 | return false; |
|---|
| 2435 | 2507 | vq->avail_idx = vhost16_to_cpu(vq, avail_idx); |
|---|
| .. | .. |
|---|
| 2465 | 2537 | /* They could have slipped one in as we were doing that: make |
|---|
| 2466 | 2538 | * sure it's written, then check again. */ |
|---|
| 2467 | 2539 | smp_mb(); |
|---|
| 2468 | | - r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); |
|---|
| 2540 | + r = vhost_get_avail_idx(vq, &avail_idx); |
|---|
| 2469 | 2541 | if (r) { |
|---|
| 2470 | 2542 | vq_err(vq, "Failed to check avail idx at %p: %d\n", |
|---|
| 2471 | 2543 | &vq->avail->idx, r); |
|---|
| .. | .. |
|---|
| 2487 | 2559 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
|---|
| 2488 | 2560 | r = vhost_update_used_flags(vq); |
|---|
| 2489 | 2561 | if (r) |
|---|
| 2490 | | - vq_err(vq, "Failed to enable notification at %p: %d\n", |
|---|
| 2562 | + vq_err(vq, "Failed to disable notification at %p: %d\n", |
|---|
| 2491 | 2563 | &vq->used->flags, r); |
|---|
| 2492 | 2564 | } |
|---|
| 2493 | 2565 | } |
|---|
| .. | .. |
|---|
| 2536 | 2608 | } |
|---|
| 2537 | 2609 | EXPORT_SYMBOL_GPL(vhost_dequeue_msg); |
|---|
| 2538 | 2610 | |
|---|
| 2611 | +void vhost_set_backend_features(struct vhost_dev *dev, u64 features) |
|---|
| 2612 | +{ |
|---|
| 2613 | + struct vhost_virtqueue *vq; |
|---|
| 2614 | + int i; |
|---|
| 2615 | + |
|---|
| 2616 | + mutex_lock(&dev->mutex); |
|---|
| 2617 | + for (i = 0; i < dev->nvqs; ++i) { |
|---|
| 2618 | + vq = dev->vqs[i]; |
|---|
| 2619 | + mutex_lock(&vq->mutex); |
|---|
| 2620 | + vq->acked_backend_features = features; |
|---|
| 2621 | + mutex_unlock(&vq->mutex); |
|---|
| 2622 | + } |
|---|
| 2623 | + mutex_unlock(&dev->mutex); |
|---|
| 2624 | +} |
|---|
| 2625 | +EXPORT_SYMBOL_GPL(vhost_set_backend_features); |
|---|
| 2539 | 2626 | |
|---|
| 2540 | 2627 | static int __init vhost_init(void) |
|---|
| 2541 | 2628 | { |
|---|