.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* Copyright (C) 2009 Red Hat, Inc. |
---|
2 | 3 | * Copyright (C) 2006 Rusty Russell IBM Corporation |
---|
3 | 4 | * |
---|
.. | .. |
---|
6 | 7 | * Inspiration, some code, and most witty comments come from |
---|
7 | 8 | * Documentation/virtual/lguest/lguest.c, by Rusty Russell |
---|
8 | 9 | * |
---|
9 | | - * This work is licensed under the terms of the GNU GPL, version 2. |
---|
10 | | - * |
---|
11 | 10 | * Generic code for virtio server in host kernel. |
---|
12 | 11 | */ |
---|
13 | 12 | |
---|
.. | .. |
---|
15 | 14 | #include <linux/vhost.h> |
---|
16 | 15 | #include <linux/uio.h> |
---|
17 | 16 | #include <linux/mm.h> |
---|
18 | | -#include <linux/mmu_context.h> |
---|
19 | 17 | #include <linux/miscdevice.h> |
---|
20 | 18 | #include <linux/mutex.h> |
---|
21 | 19 | #include <linux/poll.h> |
---|
.. | .. |
---|
50 | 48 | |
---|
51 | 49 | #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) |
---|
52 | 50 | #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) |
---|
53 | | - |
---|
54 | | -INTERVAL_TREE_DEFINE(struct vhost_umem_node, |
---|
55 | | - rb, __u64, __subtree_last, |
---|
56 | | - START, LAST, static inline, vhost_umem_interval_tree); |
---|
57 | 51 | |
---|
58 | 52 | #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY |
---|
59 | 53 | static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) |
---|
.. | .. |
---|
171 | 165 | void *key) |
---|
172 | 166 | { |
---|
173 | 167 | struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); |
---|
| 168 | + struct vhost_work *work = &poll->work; |
---|
174 | 169 | |
---|
175 | 170 | if (!(key_to_poll(key) & poll->mask)) |
---|
176 | 171 | return 0; |
---|
177 | 172 | |
---|
178 | | - vhost_poll_queue(poll); |
---|
| 173 | + if (!poll->dev->use_worker) |
---|
| 174 | + work->fn(work); |
---|
| 175 | + else |
---|
| 176 | + vhost_poll_queue(poll); |
---|
| 177 | + |
---|
179 | 178 | return 0; |
---|
180 | 179 | } |
---|
181 | 180 | |
---|
.. | .. |
---|
205 | 204 | int vhost_poll_start(struct vhost_poll *poll, struct file *file) |
---|
206 | 205 | { |
---|
207 | 206 | __poll_t mask; |
---|
208 | | - int ret = 0; |
---|
209 | 207 | |
---|
210 | 208 | if (poll->wqh) |
---|
211 | 209 | return 0; |
---|
.. | .. |
---|
215 | 213 | vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); |
---|
216 | 214 | if (mask & EPOLLERR) { |
---|
217 | 215 | vhost_poll_stop(poll); |
---|
218 | | - ret = -EINVAL; |
---|
| 216 | + return -EINVAL; |
---|
219 | 217 | } |
---|
220 | 218 | |
---|
221 | | - return ret; |
---|
| 219 | + return 0; |
---|
222 | 220 | } |
---|
223 | 221 | EXPORT_SYMBOL_GPL(vhost_poll_start); |
---|
224 | 222 | |
---|
.. | .. |
---|
300 | 298 | __vhost_vq_meta_reset(d->vqs[i]); |
---|
301 | 299 | } |
---|
302 | 300 | |
---|
| 301 | +static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) |
---|
| 302 | +{ |
---|
| 303 | + call_ctx->ctx = NULL; |
---|
| 304 | + memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); |
---|
| 305 | +} |
---|
| 306 | + |
---|
| 307 | +bool vhost_vq_is_setup(struct vhost_virtqueue *vq) |
---|
| 308 | +{ |
---|
| 309 | + return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); |
---|
| 310 | +} |
---|
| 311 | +EXPORT_SYMBOL_GPL(vhost_vq_is_setup); |
---|
| 312 | + |
---|
303 | 313 | static void vhost_vq_reset(struct vhost_dev *dev, |
---|
304 | 314 | struct vhost_virtqueue *vq) |
---|
305 | 315 | { |
---|
.. | .. |
---|
321 | 331 | vq->log_base = NULL; |
---|
322 | 332 | vq->error_ctx = NULL; |
---|
323 | 333 | vq->kick = NULL; |
---|
324 | | - vq->call_ctx = NULL; |
---|
325 | 334 | vq->log_ctx = NULL; |
---|
326 | 335 | vhost_disable_cross_endian(vq); |
---|
327 | 336 | vhost_reset_is_le(vq); |
---|
328 | 337 | vq->busyloop_timeout = 0; |
---|
329 | 338 | vq->umem = NULL; |
---|
330 | 339 | vq->iotlb = NULL; |
---|
| 340 | + vhost_vring_call_reset(&vq->call_ctx); |
---|
331 | 341 | __vhost_vq_meta_reset(vq); |
---|
332 | 342 | } |
---|
333 | 343 | |
---|
.. | .. |
---|
336 | 346 | struct vhost_dev *dev = data; |
---|
337 | 347 | struct vhost_work *work, *work_next; |
---|
338 | 348 | struct llist_node *node; |
---|
339 | | - mm_segment_t oldfs = get_fs(); |
---|
340 | 349 | |
---|
341 | | - set_fs(USER_DS); |
---|
342 | | - use_mm(dev->mm); |
---|
| 350 | + kthread_use_mm(dev->mm); |
---|
343 | 351 | |
---|
344 | 352 | for (;;) { |
---|
345 | 353 | /* mb paired w/ kthread_stop */ |
---|
.. | .. |
---|
367 | 375 | schedule(); |
---|
368 | 376 | } |
---|
369 | 377 | } |
---|
370 | | - unuse_mm(dev->mm); |
---|
371 | | - set_fs(oldfs); |
---|
| 378 | + kthread_unuse_mm(dev->mm); |
---|
372 | 379 | return 0; |
---|
373 | 380 | } |
---|
374 | 381 | |
---|
.. | .. |
---|
431 | 438 | } |
---|
432 | 439 | EXPORT_SYMBOL_GPL(vhost_exceeds_weight); |
---|
433 | 440 | |
---|
| 441 | +static size_t vhost_get_avail_size(struct vhost_virtqueue *vq, |
---|
| 442 | + unsigned int num) |
---|
| 443 | +{ |
---|
| 444 | + size_t event __maybe_unused = |
---|
| 445 | + vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
---|
| 446 | + |
---|
| 447 | + return sizeof(*vq->avail) + |
---|
| 448 | + sizeof(*vq->avail->ring) * num + event; |
---|
| 449 | +} |
---|
| 450 | + |
---|
| 451 | +static size_t vhost_get_used_size(struct vhost_virtqueue *vq, |
---|
| 452 | + unsigned int num) |
---|
| 453 | +{ |
---|
| 454 | + size_t event __maybe_unused = |
---|
| 455 | + vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
---|
| 456 | + |
---|
| 457 | + return sizeof(*vq->used) + |
---|
| 458 | + sizeof(*vq->used->ring) * num + event; |
---|
| 459 | +} |
---|
| 460 | + |
---|
| 461 | +static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, |
---|
| 462 | + unsigned int num) |
---|
| 463 | +{ |
---|
| 464 | + return sizeof(*vq->desc) * num; |
---|
| 465 | +} |
---|
| 466 | + |
---|
434 | 467 | void vhost_dev_init(struct vhost_dev *dev, |
---|
435 | 468 | struct vhost_virtqueue **vqs, int nvqs, |
---|
436 | | - int iov_limit, int weight, int byte_weight) |
---|
| 469 | + int iov_limit, int weight, int byte_weight, |
---|
| 470 | + bool use_worker, |
---|
| 471 | + int (*msg_handler)(struct vhost_dev *dev, |
---|
| 472 | + struct vhost_iotlb_msg *msg)) |
---|
437 | 473 | { |
---|
438 | 474 | struct vhost_virtqueue *vq; |
---|
439 | 475 | int i; |
---|
.. | .. |
---|
449 | 485 | dev->iov_limit = iov_limit; |
---|
450 | 486 | dev->weight = weight; |
---|
451 | 487 | dev->byte_weight = byte_weight; |
---|
| 488 | + dev->use_worker = use_worker; |
---|
| 489 | + dev->msg_handler = msg_handler; |
---|
452 | 490 | init_llist_head(&dev->work_list); |
---|
453 | 491 | init_waitqueue_head(&dev->wait); |
---|
454 | 492 | INIT_LIST_HEAD(&dev->read_list); |
---|
.. | .. |
---|
511 | 549 | } |
---|
512 | 550 | EXPORT_SYMBOL_GPL(vhost_dev_has_owner); |
---|
513 | 551 | |
---|
| 552 | +static void vhost_attach_mm(struct vhost_dev *dev) |
---|
| 553 | +{ |
---|
| 554 | + /* No owner, become one */ |
---|
| 555 | + if (dev->use_worker) { |
---|
| 556 | + dev->mm = get_task_mm(current); |
---|
| 557 | + } else { |
---|
| 558 | + /* vDPA device does not use worker thead, so there's |
---|
| 559 | + * no need to hold the address space for mm. This help |
---|
| 560 | + * to avoid deadlock in the case of mmap() which may |
---|
| 561 | + * held the refcnt of the file and depends on release |
---|
| 562 | + * method to remove vma. |
---|
| 563 | + */ |
---|
| 564 | + dev->mm = current->mm; |
---|
| 565 | + mmgrab(dev->mm); |
---|
| 566 | + } |
---|
| 567 | +} |
---|
| 568 | + |
---|
| 569 | +static void vhost_detach_mm(struct vhost_dev *dev) |
---|
| 570 | +{ |
---|
| 571 | + if (!dev->mm) |
---|
| 572 | + return; |
---|
| 573 | + |
---|
| 574 | + if (dev->use_worker) |
---|
| 575 | + mmput(dev->mm); |
---|
| 576 | + else |
---|
| 577 | + mmdrop(dev->mm); |
---|
| 578 | + |
---|
| 579 | + dev->mm = NULL; |
---|
| 580 | +} |
---|
| 581 | + |
---|
514 | 582 | /* Caller should have device mutex */ |
---|
515 | 583 | long vhost_dev_set_owner(struct vhost_dev *dev) |
---|
516 | 584 | { |
---|
.. | .. |
---|
523 | 591 | goto err_mm; |
---|
524 | 592 | } |
---|
525 | 593 | |
---|
526 | | - /* No owner, become one */ |
---|
527 | | - dev->mm = get_task_mm(current); |
---|
| 594 | + vhost_attach_mm(dev); |
---|
| 595 | + |
---|
528 | 596 | dev->kcov_handle = kcov_common_handle(); |
---|
529 | | - worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); |
---|
530 | | - if (IS_ERR(worker)) { |
---|
531 | | - err = PTR_ERR(worker); |
---|
532 | | - goto err_worker; |
---|
| 597 | + if (dev->use_worker) { |
---|
| 598 | + worker = kthread_create(vhost_worker, dev, |
---|
| 599 | + "vhost-%d", current->pid); |
---|
| 600 | + if (IS_ERR(worker)) { |
---|
| 601 | + err = PTR_ERR(worker); |
---|
| 602 | + goto err_worker; |
---|
| 603 | + } |
---|
| 604 | + |
---|
| 605 | + dev->worker = worker; |
---|
| 606 | + wake_up_process(worker); /* avoid contributing to loadavg */ |
---|
| 607 | + |
---|
| 608 | + err = vhost_attach_cgroups(dev); |
---|
| 609 | + if (err) |
---|
| 610 | + goto err_cgroup; |
---|
533 | 611 | } |
---|
534 | | - |
---|
535 | | - dev->worker = worker; |
---|
536 | | - wake_up_process(worker); /* avoid contributing to loadavg */ |
---|
537 | | - |
---|
538 | | - err = vhost_attach_cgroups(dev); |
---|
539 | | - if (err) |
---|
540 | | - goto err_cgroup; |
---|
541 | 612 | |
---|
542 | 613 | err = vhost_dev_alloc_iovecs(dev); |
---|
543 | 614 | if (err) |
---|
.. | .. |
---|
545 | 616 | |
---|
546 | 617 | return 0; |
---|
547 | 618 | err_cgroup: |
---|
548 | | - kthread_stop(worker); |
---|
549 | | - dev->worker = NULL; |
---|
| 619 | + if (dev->worker) { |
---|
| 620 | + kthread_stop(dev->worker); |
---|
| 621 | + dev->worker = NULL; |
---|
| 622 | + } |
---|
550 | 623 | err_worker: |
---|
551 | | - if (dev->mm) |
---|
552 | | - mmput(dev->mm); |
---|
553 | | - dev->mm = NULL; |
---|
| 624 | + vhost_detach_mm(dev); |
---|
554 | 625 | dev->kcov_handle = 0; |
---|
555 | 626 | err_mm: |
---|
556 | 627 | return err; |
---|
557 | 628 | } |
---|
558 | 629 | EXPORT_SYMBOL_GPL(vhost_dev_set_owner); |
---|
559 | 630 | |
---|
560 | | -struct vhost_umem *vhost_dev_reset_owner_prepare(void) |
---|
| 631 | +static struct vhost_iotlb *iotlb_alloc(void) |
---|
561 | 632 | { |
---|
562 | | - return kvzalloc(sizeof(struct vhost_umem), GFP_KERNEL); |
---|
| 633 | + return vhost_iotlb_alloc(max_iotlb_entries, |
---|
| 634 | + VHOST_IOTLB_FLAG_RETIRE); |
---|
| 635 | +} |
---|
| 636 | + |
---|
| 637 | +struct vhost_iotlb *vhost_dev_reset_owner_prepare(void) |
---|
| 638 | +{ |
---|
| 639 | + return iotlb_alloc(); |
---|
563 | 640 | } |
---|
564 | 641 | EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); |
---|
565 | 642 | |
---|
566 | 643 | /* Caller should have device mutex */ |
---|
567 | | -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) |
---|
| 644 | +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) |
---|
568 | 645 | { |
---|
569 | 646 | int i; |
---|
570 | 647 | |
---|
571 | 648 | vhost_dev_cleanup(dev); |
---|
572 | 649 | |
---|
573 | | - /* Restore memory to default empty mapping. */ |
---|
574 | | - INIT_LIST_HEAD(&umem->umem_list); |
---|
575 | 650 | dev->umem = umem; |
---|
576 | 651 | /* We don't need VQ locks below since vhost_dev_cleanup makes sure |
---|
577 | 652 | * VQs aren't running. |
---|
.. | .. |
---|
594 | 669 | } |
---|
595 | 670 | EXPORT_SYMBOL_GPL(vhost_dev_stop); |
---|
596 | 671 | |
---|
597 | | -static void vhost_umem_free(struct vhost_umem *umem, |
---|
598 | | - struct vhost_umem_node *node) |
---|
599 | | -{ |
---|
600 | | - vhost_umem_interval_tree_remove(node, &umem->umem_tree); |
---|
601 | | - list_del(&node->link); |
---|
602 | | - kfree(node); |
---|
603 | | - umem->numem--; |
---|
604 | | -} |
---|
605 | | - |
---|
606 | | -static void vhost_umem_clean(struct vhost_umem *umem) |
---|
607 | | -{ |
---|
608 | | - struct vhost_umem_node *node, *tmp; |
---|
609 | | - |
---|
610 | | - if (!umem) |
---|
611 | | - return; |
---|
612 | | - |
---|
613 | | - list_for_each_entry_safe(node, tmp, &umem->umem_list, link) |
---|
614 | | - vhost_umem_free(umem, node); |
---|
615 | | - |
---|
616 | | - kvfree(umem); |
---|
617 | | -} |
---|
618 | | - |
---|
619 | | -static void vhost_clear_msg(struct vhost_dev *dev) |
---|
| 672 | +void vhost_clear_msg(struct vhost_dev *dev) |
---|
620 | 673 | { |
---|
621 | 674 | struct vhost_msg_node *node, *n; |
---|
622 | 675 | |
---|
.. | .. |
---|
634 | 687 | |
---|
635 | 688 | spin_unlock(&dev->iotlb_lock); |
---|
636 | 689 | } |
---|
| 690 | +EXPORT_SYMBOL_GPL(vhost_clear_msg); |
---|
637 | 691 | |
---|
638 | 692 | void vhost_dev_cleanup(struct vhost_dev *dev) |
---|
639 | 693 | { |
---|
.. | .. |
---|
644 | 698 | eventfd_ctx_put(dev->vqs[i]->error_ctx); |
---|
645 | 699 | if (dev->vqs[i]->kick) |
---|
646 | 700 | fput(dev->vqs[i]->kick); |
---|
647 | | - if (dev->vqs[i]->call_ctx) |
---|
648 | | - eventfd_ctx_put(dev->vqs[i]->call_ctx); |
---|
| 701 | + if (dev->vqs[i]->call_ctx.ctx) |
---|
| 702 | + eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); |
---|
649 | 703 | vhost_vq_reset(dev, dev->vqs[i]); |
---|
650 | 704 | } |
---|
651 | 705 | vhost_dev_free_iovecs(dev); |
---|
.. | .. |
---|
653 | 707 | eventfd_ctx_put(dev->log_ctx); |
---|
654 | 708 | dev->log_ctx = NULL; |
---|
655 | 709 | /* No one will access memory at this point */ |
---|
656 | | - vhost_umem_clean(dev->umem); |
---|
| 710 | + vhost_iotlb_free(dev->umem); |
---|
657 | 711 | dev->umem = NULL; |
---|
658 | | - vhost_umem_clean(dev->iotlb); |
---|
| 712 | + vhost_iotlb_free(dev->iotlb); |
---|
659 | 713 | dev->iotlb = NULL; |
---|
660 | 714 | vhost_clear_msg(dev); |
---|
661 | 715 | wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); |
---|
.. | .. |
---|
665 | 719 | dev->worker = NULL; |
---|
666 | 720 | dev->kcov_handle = 0; |
---|
667 | 721 | } |
---|
668 | | - if (dev->mm) |
---|
669 | | - mmput(dev->mm); |
---|
670 | | - dev->mm = NULL; |
---|
| 722 | + vhost_detach_mm(dev); |
---|
671 | 723 | } |
---|
672 | 724 | EXPORT_SYMBOL_GPL(vhost_dev_cleanup); |
---|
673 | 725 | |
---|
.. | .. |
---|
680 | 732 | a + (unsigned long)log_base > ULONG_MAX) |
---|
681 | 733 | return false; |
---|
682 | 734 | |
---|
683 | | - return access_ok(VERIFY_WRITE, log_base + a, |
---|
| 735 | + return access_ok(log_base + a, |
---|
684 | 736 | (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); |
---|
685 | 737 | } |
---|
686 | 738 | |
---|
.. | .. |
---|
697 | 749 | } |
---|
698 | 750 | |
---|
699 | 751 | /* Caller should have vq mutex and device mutex. */ |
---|
700 | | -static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, |
---|
| 752 | +static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, |
---|
701 | 753 | int log_all) |
---|
702 | 754 | { |
---|
703 | | - struct vhost_umem_node *node; |
---|
| 755 | + struct vhost_iotlb_map *map; |
---|
704 | 756 | |
---|
705 | 757 | if (!umem) |
---|
706 | 758 | return false; |
---|
707 | 759 | |
---|
708 | | - list_for_each_entry(node, &umem->umem_list, link) { |
---|
709 | | - unsigned long a = node->userspace_addr; |
---|
| 760 | + list_for_each_entry(map, &umem->list, link) { |
---|
| 761 | + unsigned long a = map->addr; |
---|
710 | 762 | |
---|
711 | | - if (vhost_overflow(node->userspace_addr, node->size)) |
---|
| 763 | + if (vhost_overflow(map->addr, map->size)) |
---|
712 | 764 | return false; |
---|
713 | 765 | |
---|
714 | 766 | |
---|
715 | | - if (!access_ok(VERIFY_WRITE, (void __user *)a, |
---|
716 | | - node->size)) |
---|
| 767 | + if (!access_ok((void __user *)a, map->size)) |
---|
717 | 768 | return false; |
---|
718 | 769 | else if (log_all && !log_access_ok(log_base, |
---|
719 | | - node->start, |
---|
720 | | - node->size)) |
---|
| 770 | + map->start, |
---|
| 771 | + map->size)) |
---|
721 | 772 | return false; |
---|
722 | 773 | } |
---|
723 | 774 | return true; |
---|
.. | .. |
---|
727 | 778 | u64 addr, unsigned int size, |
---|
728 | 779 | int type) |
---|
729 | 780 | { |
---|
730 | | - const struct vhost_umem_node *node = vq->meta_iotlb[type]; |
---|
| 781 | + const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; |
---|
731 | 782 | |
---|
732 | | - if (!node) |
---|
| 783 | + if (!map) |
---|
733 | 784 | return NULL; |
---|
734 | 785 | |
---|
735 | | - return (void *)(uintptr_t)(node->userspace_addr + addr - node->start); |
---|
| 786 | + return (void __user *)(uintptr_t)(map->addr + addr - map->start); |
---|
736 | 787 | } |
---|
737 | 788 | |
---|
738 | 789 | /* Can we switch to this memory table? */ |
---|
739 | 790 | /* Caller should have device mutex but not vq mutex */ |
---|
740 | | -static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, |
---|
| 791 | +static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, |
---|
741 | 792 | int log_all) |
---|
742 | 793 | { |
---|
743 | 794 | int i; |
---|
.. | .. |
---|
871 | 922 | * not happen in this case. |
---|
872 | 923 | */ |
---|
873 | 924 | static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, |
---|
874 | | - void *addr, unsigned int size, |
---|
| 925 | + void __user *addr, unsigned int size, |
---|
875 | 926 | int type) |
---|
876 | 927 | { |
---|
877 | 928 | void __user *uaddr = vhost_vq_meta_fetch(vq, |
---|
.. | .. |
---|
884 | 935 | |
---|
885 | 936 | #define vhost_put_user(vq, x, ptr) \ |
---|
886 | 937 | ({ \ |
---|
887 | | - int ret = -EFAULT; \ |
---|
| 938 | + int ret; \ |
---|
888 | 939 | if (!vq->iotlb) { \ |
---|
889 | 940 | ret = __put_user(x, ptr); \ |
---|
890 | 941 | } else { \ |
---|
.. | .. |
---|
898 | 949 | } \ |
---|
899 | 950 | ret; \ |
---|
900 | 951 | }) |
---|
| 952 | + |
---|
| 953 | +static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) |
---|
| 954 | +{ |
---|
| 955 | + return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), |
---|
| 956 | + vhost_avail_event(vq)); |
---|
| 957 | +} |
---|
| 958 | + |
---|
| 959 | +static inline int vhost_put_used(struct vhost_virtqueue *vq, |
---|
| 960 | + struct vring_used_elem *head, int idx, |
---|
| 961 | + int count) |
---|
| 962 | +{ |
---|
| 963 | + return vhost_copy_to_user(vq, vq->used->ring + idx, head, |
---|
| 964 | + count * sizeof(*head)); |
---|
| 965 | +} |
---|
| 966 | + |
---|
| 967 | +static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) |
---|
| 968 | + |
---|
| 969 | +{ |
---|
| 970 | + return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), |
---|
| 971 | + &vq->used->flags); |
---|
| 972 | +} |
---|
| 973 | + |
---|
| 974 | +static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) |
---|
| 975 | + |
---|
| 976 | +{ |
---|
| 977 | + return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), |
---|
| 978 | + &vq->used->idx); |
---|
| 979 | +} |
---|
901 | 980 | |
---|
902 | 981 | #define vhost_get_user(vq, x, ptr, type) \ |
---|
903 | 982 | ({ \ |
---|
.. | .. |
---|
937 | 1016 | mutex_unlock(&d->vqs[i]->mutex); |
---|
938 | 1017 | } |
---|
939 | 1018 | |
---|
940 | | -static int vhost_new_umem_range(struct vhost_umem *umem, |
---|
941 | | - u64 start, u64 size, u64 end, |
---|
942 | | - u64 userspace_addr, int perm) |
---|
| 1019 | +static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, |
---|
| 1020 | + __virtio16 *idx) |
---|
943 | 1021 | { |
---|
944 | | - struct vhost_umem_node *tmp, *node; |
---|
945 | | - |
---|
946 | | - if (!size) |
---|
947 | | - return -EFAULT; |
---|
948 | | - |
---|
949 | | - node = kmalloc(sizeof(*node), GFP_ATOMIC); |
---|
950 | | - if (!node) |
---|
951 | | - return -ENOMEM; |
---|
952 | | - |
---|
953 | | - if (umem->numem == max_iotlb_entries) { |
---|
954 | | - tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link); |
---|
955 | | - vhost_umem_free(umem, tmp); |
---|
956 | | - } |
---|
957 | | - |
---|
958 | | - node->start = start; |
---|
959 | | - node->size = size; |
---|
960 | | - node->last = end; |
---|
961 | | - node->userspace_addr = userspace_addr; |
---|
962 | | - node->perm = perm; |
---|
963 | | - INIT_LIST_HEAD(&node->link); |
---|
964 | | - list_add_tail(&node->link, &umem->umem_list); |
---|
965 | | - vhost_umem_interval_tree_insert(node, &umem->umem_tree); |
---|
966 | | - umem->numem++; |
---|
967 | | - |
---|
968 | | - return 0; |
---|
| 1022 | + return vhost_get_avail(vq, *idx, &vq->avail->idx); |
---|
969 | 1023 | } |
---|
970 | 1024 | |
---|
971 | | -static void vhost_del_umem_range(struct vhost_umem *umem, |
---|
972 | | - u64 start, u64 end) |
---|
| 1025 | +static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, |
---|
| 1026 | + __virtio16 *head, int idx) |
---|
973 | 1027 | { |
---|
974 | | - struct vhost_umem_node *node; |
---|
| 1028 | + return vhost_get_avail(vq, *head, |
---|
| 1029 | + &vq->avail->ring[idx & (vq->num - 1)]); |
---|
| 1030 | +} |
---|
975 | 1031 | |
---|
976 | | - while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, |
---|
977 | | - start, end))) |
---|
978 | | - vhost_umem_free(umem, node); |
---|
| 1032 | +static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, |
---|
| 1033 | + __virtio16 *flags) |
---|
| 1034 | +{ |
---|
| 1035 | + return vhost_get_avail(vq, *flags, &vq->avail->flags); |
---|
| 1036 | +} |
---|
| 1037 | + |
---|
| 1038 | +static inline int vhost_get_used_event(struct vhost_virtqueue *vq, |
---|
| 1039 | + __virtio16 *event) |
---|
| 1040 | +{ |
---|
| 1041 | + return vhost_get_avail(vq, *event, vhost_used_event(vq)); |
---|
| 1042 | +} |
---|
| 1043 | + |
---|
| 1044 | +static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, |
---|
| 1045 | + __virtio16 *idx) |
---|
| 1046 | +{ |
---|
| 1047 | + return vhost_get_used(vq, *idx, &vq->used->idx); |
---|
| 1048 | +} |
---|
| 1049 | + |
---|
| 1050 | +static inline int vhost_get_desc(struct vhost_virtqueue *vq, |
---|
| 1051 | + struct vring_desc *desc, int idx) |
---|
| 1052 | +{ |
---|
| 1053 | + return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); |
---|
979 | 1054 | } |
---|
980 | 1055 | |
---|
981 | 1056 | static void vhost_iotlb_notify_vq(struct vhost_dev *d, |
---|
.. | .. |
---|
1008 | 1083 | return false; |
---|
1009 | 1084 | |
---|
1010 | 1085 | if ((access & VHOST_ACCESS_RO) && |
---|
1011 | | - !access_ok(VERIFY_READ, (void __user *)a, size)) |
---|
| 1086 | + !access_ok((void __user *)a, size)) |
---|
1012 | 1087 | return false; |
---|
1013 | 1088 | if ((access & VHOST_ACCESS_WO) && |
---|
1014 | | - !access_ok(VERIFY_WRITE, (void __user *)a, size)) |
---|
| 1089 | + !access_ok((void __user *)a, size)) |
---|
1015 | 1090 | return false; |
---|
1016 | 1091 | return true; |
---|
1017 | 1092 | } |
---|
.. | .. |
---|
1034 | 1109 | break; |
---|
1035 | 1110 | } |
---|
1036 | 1111 | vhost_vq_meta_reset(dev); |
---|
1037 | | - if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, |
---|
1038 | | - msg->iova + msg->size - 1, |
---|
1039 | | - msg->uaddr, msg->perm)) { |
---|
| 1112 | + if (vhost_iotlb_add_range(dev->iotlb, msg->iova, |
---|
| 1113 | + msg->iova + msg->size - 1, |
---|
| 1114 | + msg->uaddr, msg->perm)) { |
---|
1040 | 1115 | ret = -ENOMEM; |
---|
1041 | 1116 | break; |
---|
1042 | 1117 | } |
---|
.. | .. |
---|
1048 | 1123 | break; |
---|
1049 | 1124 | } |
---|
1050 | 1125 | vhost_vq_meta_reset(dev); |
---|
1051 | | - vhost_del_umem_range(dev->iotlb, msg->iova, |
---|
1052 | | - msg->iova + msg->size - 1); |
---|
| 1126 | + vhost_iotlb_del_range(dev->iotlb, msg->iova, |
---|
| 1127 | + msg->iova + msg->size - 1); |
---|
1053 | 1128 | break; |
---|
1054 | 1129 | default: |
---|
1055 | 1130 | ret = -EINVAL; |
---|
.. | .. |
---|
1095 | 1170 | ret = -EINVAL; |
---|
1096 | 1171 | goto done; |
---|
1097 | 1172 | } |
---|
1098 | | - if (vhost_process_iotlb_msg(dev, &msg)) { |
---|
| 1173 | + |
---|
| 1174 | + if (dev->msg_handler) |
---|
| 1175 | + ret = dev->msg_handler(dev, &msg); |
---|
| 1176 | + else |
---|
| 1177 | + ret = vhost_process_iotlb_msg(dev, &msg); |
---|
| 1178 | + if (ret) { |
---|
1099 | 1179 | ret = -EFAULT; |
---|
1100 | 1180 | goto done; |
---|
1101 | 1181 | } |
---|
.. | .. |
---|
1217 | 1297 | } |
---|
1218 | 1298 | |
---|
1219 | 1299 | static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, |
---|
1220 | | - struct vring_desc __user *desc, |
---|
1221 | | - struct vring_avail __user *avail, |
---|
1222 | | - struct vring_used __user *used) |
---|
| 1300 | + vring_desc_t __user *desc, |
---|
| 1301 | + vring_avail_t __user *avail, |
---|
| 1302 | + vring_used_t __user *used) |
---|
1223 | 1303 | |
---|
1224 | 1304 | { |
---|
1225 | | - size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
---|
| 1305 | + /* If an IOTLB device is present, the vring addresses are |
---|
| 1306 | + * GIOVAs. Access validation occurs at prefetch time. */ |
---|
| 1307 | + if (vq->iotlb) |
---|
| 1308 | + return true; |
---|
1226 | 1309 | |
---|
1227 | | - return access_ok(VERIFY_READ, desc, num * sizeof *desc) && |
---|
1228 | | - access_ok(VERIFY_READ, avail, |
---|
1229 | | - sizeof *avail + num * sizeof *avail->ring + s) && |
---|
1230 | | - access_ok(VERIFY_WRITE, used, |
---|
1231 | | - sizeof *used + num * sizeof *used->ring + s); |
---|
| 1310 | + return access_ok(desc, vhost_get_desc_size(vq, num)) && |
---|
| 1311 | + access_ok(avail, vhost_get_avail_size(vq, num)) && |
---|
| 1312 | + access_ok(used, vhost_get_used_size(vq, num)); |
---|
1232 | 1313 | } |
---|
1233 | 1314 | |
---|
1234 | 1315 | static void vhost_vq_meta_update(struct vhost_virtqueue *vq, |
---|
1235 | | - const struct vhost_umem_node *node, |
---|
| 1316 | + const struct vhost_iotlb_map *map, |
---|
1236 | 1317 | int type) |
---|
1237 | 1318 | { |
---|
1238 | 1319 | int access = (type == VHOST_ADDR_USED) ? |
---|
1239 | 1320 | VHOST_ACCESS_WO : VHOST_ACCESS_RO; |
---|
1240 | 1321 | |
---|
1241 | | - if (likely(node->perm & access)) |
---|
1242 | | - vq->meta_iotlb[type] = node; |
---|
| 1322 | + if (likely(map->perm & access)) |
---|
| 1323 | + vq->meta_iotlb[type] = map; |
---|
1243 | 1324 | } |
---|
1244 | 1325 | |
---|
1245 | 1326 | static bool iotlb_access_ok(struct vhost_virtqueue *vq, |
---|
1246 | 1327 | int access, u64 addr, u64 len, int type) |
---|
1247 | 1328 | { |
---|
1248 | | - const struct vhost_umem_node *node; |
---|
1249 | | - struct vhost_umem *umem = vq->iotlb; |
---|
| 1329 | + const struct vhost_iotlb_map *map; |
---|
| 1330 | + struct vhost_iotlb *umem = vq->iotlb; |
---|
1250 | 1331 | u64 s = 0, size, orig_addr = addr, last = addr + len - 1; |
---|
1251 | 1332 | |
---|
1252 | 1333 | if (vhost_vq_meta_fetch(vq, addr, len, type)) |
---|
1253 | 1334 | return true; |
---|
1254 | 1335 | |
---|
1255 | 1336 | while (len > s) { |
---|
1256 | | - node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, |
---|
1257 | | - addr, |
---|
1258 | | - last); |
---|
1259 | | - if (node == NULL || node->start > addr) { |
---|
| 1337 | + map = vhost_iotlb_itree_first(umem, addr, last); |
---|
| 1338 | + if (map == NULL || map->start > addr) { |
---|
1260 | 1339 | vhost_iotlb_miss(vq, addr, access); |
---|
1261 | 1340 | return false; |
---|
1262 | | - } else if (!(node->perm & access)) { |
---|
| 1341 | + } else if (!(map->perm & access)) { |
---|
1263 | 1342 | /* Report the possible access violation by |
---|
1264 | 1343 | * request another translation from userspace. |
---|
1265 | 1344 | */ |
---|
1266 | 1345 | return false; |
---|
1267 | 1346 | } |
---|
1268 | 1347 | |
---|
1269 | | - size = node->size - addr + node->start; |
---|
| 1348 | + size = map->size - addr + map->start; |
---|
1270 | 1349 | |
---|
1271 | 1350 | if (orig_addr == addr && size >= len) |
---|
1272 | | - vhost_vq_meta_update(vq, node, type); |
---|
| 1351 | + vhost_vq_meta_update(vq, map, type); |
---|
1273 | 1352 | |
---|
1274 | 1353 | s += size; |
---|
1275 | 1354 | addr += size; |
---|
.. | .. |
---|
1278 | 1357 | return true; |
---|
1279 | 1358 | } |
---|
1280 | 1359 | |
---|
1281 | | -int vq_iotlb_prefetch(struct vhost_virtqueue *vq) |
---|
| 1360 | +int vq_meta_prefetch(struct vhost_virtqueue *vq) |
---|
1282 | 1361 | { |
---|
1283 | | - size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
---|
1284 | 1362 | unsigned int num = vq->num; |
---|
1285 | 1363 | |
---|
1286 | 1364 | if (!vq->iotlb) |
---|
1287 | 1365 | return 1; |
---|
1288 | 1366 | |
---|
1289 | | - return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, |
---|
1290 | | - num * sizeof(*vq->desc), VHOST_ADDR_DESC) && |
---|
1291 | | - iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, |
---|
1292 | | - sizeof *vq->avail + |
---|
1293 | | - num * sizeof(*vq->avail->ring) + s, |
---|
| 1367 | + return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, |
---|
| 1368 | + vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && |
---|
| 1369 | + iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, |
---|
| 1370 | + vhost_get_avail_size(vq, num), |
---|
1294 | 1371 | VHOST_ADDR_AVAIL) && |
---|
1295 | | - iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, |
---|
1296 | | - sizeof *vq->used + |
---|
1297 | | - num * sizeof(*vq->used->ring) + s, |
---|
1298 | | - VHOST_ADDR_USED); |
---|
| 1372 | + iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, |
---|
| 1373 | + vhost_get_used_size(vq, num), VHOST_ADDR_USED); |
---|
1299 | 1374 | } |
---|
1300 | | -EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); |
---|
| 1375 | +EXPORT_SYMBOL_GPL(vq_meta_prefetch); |
---|
1301 | 1376 | |
---|
1302 | 1377 | /* Can we log writes? */ |
---|
1303 | 1378 | /* Caller should have device mutex but not vq mutex */ |
---|
.. | .. |
---|
1307 | 1382 | } |
---|
1308 | 1383 | EXPORT_SYMBOL_GPL(vhost_log_access_ok); |
---|
1309 | 1384 | |
---|
| 1385 | +static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, |
---|
| 1386 | + void __user *log_base, |
---|
| 1387 | + bool log_used, |
---|
| 1388 | + u64 log_addr) |
---|
| 1389 | +{ |
---|
| 1390 | + /* If an IOTLB device is present, log_addr is a GIOVA that |
---|
| 1391 | + * will never be logged by log_used(). */ |
---|
| 1392 | + if (vq->iotlb) |
---|
| 1393 | + return true; |
---|
| 1394 | + |
---|
| 1395 | + return !log_used || log_access_ok(log_base, log_addr, |
---|
| 1396 | + vhost_get_used_size(vq, vq->num)); |
---|
| 1397 | +} |
---|
| 1398 | + |
---|
1310 | 1399 | /* Verify access for write logging. */ |
---|
1311 | 1400 | /* Caller should have vq mutex and device mutex */ |
---|
1312 | 1401 | static bool vq_log_access_ok(struct vhost_virtqueue *vq, |
---|
1313 | 1402 | void __user *log_base) |
---|
1314 | 1403 | { |
---|
1315 | | - size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; |
---|
1316 | | - |
---|
1317 | 1404 | return vq_memory_access_ok(log_base, vq->umem, |
---|
1318 | 1405 | vhost_has_feature(vq, VHOST_F_LOG_ALL)) && |
---|
1319 | | - (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
---|
1320 | | - sizeof *vq->used + |
---|
1321 | | - vq->num * sizeof *vq->used->ring + s)); |
---|
| 1406 | + vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); |
---|
1322 | 1407 | } |
---|
1323 | 1408 | |
---|
1324 | 1409 | /* Can we start vq? */ |
---|
.. | .. |
---|
1328 | 1413 | if (!vq_log_access_ok(vq, vq->log_base)) |
---|
1329 | 1414 | return false; |
---|
1330 | 1415 | |
---|
1331 | | - /* Access validation occurs at prefetch time with IOTLB */ |
---|
1332 | | - if (vq->iotlb) |
---|
1333 | | - return true; |
---|
1334 | | - |
---|
1335 | 1416 | return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); |
---|
1336 | 1417 | } |
---|
1337 | 1418 | EXPORT_SYMBOL_GPL(vhost_vq_access_ok); |
---|
1338 | | - |
---|
1339 | | -static struct vhost_umem *vhost_umem_alloc(void) |
---|
1340 | | -{ |
---|
1341 | | - struct vhost_umem *umem = kvzalloc(sizeof(*umem), GFP_KERNEL); |
---|
1342 | | - |
---|
1343 | | - if (!umem) |
---|
1344 | | - return NULL; |
---|
1345 | | - |
---|
1346 | | - umem->umem_tree = RB_ROOT_CACHED; |
---|
1347 | | - umem->numem = 0; |
---|
1348 | | - INIT_LIST_HEAD(&umem->umem_list); |
---|
1349 | | - |
---|
1350 | | - return umem; |
---|
1351 | | -} |
---|
1352 | 1419 | |
---|
1353 | 1420 | static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) |
---|
1354 | 1421 | { |
---|
1355 | 1422 | struct vhost_memory mem, *newmem; |
---|
1356 | 1423 | struct vhost_memory_region *region; |
---|
1357 | | - struct vhost_umem *newumem, *oldumem; |
---|
| 1424 | + struct vhost_iotlb *newumem, *oldumem; |
---|
1358 | 1425 | unsigned long size = offsetof(struct vhost_memory, regions); |
---|
1359 | 1426 | int i; |
---|
1360 | 1427 | |
---|
.. | .. |
---|
1371 | 1438 | |
---|
1372 | 1439 | memcpy(newmem, &mem, size); |
---|
1373 | 1440 | if (copy_from_user(newmem->regions, m->regions, |
---|
1374 | | - mem.nregions * sizeof *m->regions)) { |
---|
| 1441 | + flex_array_size(newmem, regions, mem.nregions))) { |
---|
1375 | 1442 | kvfree(newmem); |
---|
1376 | 1443 | return -EFAULT; |
---|
1377 | 1444 | } |
---|
1378 | 1445 | |
---|
1379 | | - newumem = vhost_umem_alloc(); |
---|
| 1446 | + newumem = iotlb_alloc(); |
---|
1380 | 1447 | if (!newumem) { |
---|
1381 | 1448 | kvfree(newmem); |
---|
1382 | 1449 | return -ENOMEM; |
---|
.. | .. |
---|
1385 | 1452 | for (region = newmem->regions; |
---|
1386 | 1453 | region < newmem->regions + mem.nregions; |
---|
1387 | 1454 | region++) { |
---|
1388 | | - if (vhost_new_umem_range(newumem, |
---|
1389 | | - region->guest_phys_addr, |
---|
1390 | | - region->memory_size, |
---|
1391 | | - region->guest_phys_addr + |
---|
1392 | | - region->memory_size - 1, |
---|
1393 | | - region->userspace_addr, |
---|
1394 | | - VHOST_ACCESS_RW)) |
---|
| 1455 | + if (vhost_iotlb_add_range(newumem, |
---|
| 1456 | + region->guest_phys_addr, |
---|
| 1457 | + region->guest_phys_addr + |
---|
| 1458 | + region->memory_size - 1, |
---|
| 1459 | + region->userspace_addr, |
---|
| 1460 | + VHOST_MAP_RW)) |
---|
1395 | 1461 | goto err; |
---|
1396 | 1462 | } |
---|
1397 | 1463 | |
---|
.. | .. |
---|
1409 | 1475 | } |
---|
1410 | 1476 | |
---|
1411 | 1477 | kvfree(newmem); |
---|
1412 | | - vhost_umem_clean(oldumem); |
---|
| 1478 | + vhost_iotlb_free(oldumem); |
---|
1413 | 1479 | return 0; |
---|
1414 | 1480 | |
---|
1415 | 1481 | err: |
---|
1416 | | - vhost_umem_clean(newumem); |
---|
| 1482 | + vhost_iotlb_free(newumem); |
---|
1417 | 1483 | kvfree(newmem); |
---|
1418 | 1484 | return -EFAULT; |
---|
1419 | 1485 | } |
---|
1420 | 1486 | |
---|
| 1487 | +static long vhost_vring_set_num(struct vhost_dev *d, |
---|
| 1488 | + struct vhost_virtqueue *vq, |
---|
| 1489 | + void __user *argp) |
---|
| 1490 | +{ |
---|
| 1491 | + struct vhost_vring_state s; |
---|
| 1492 | + |
---|
| 1493 | + /* Resizing ring with an active backend? |
---|
| 1494 | + * You don't want to do that. */ |
---|
| 1495 | + if (vq->private_data) |
---|
| 1496 | + return -EBUSY; |
---|
| 1497 | + |
---|
| 1498 | + if (copy_from_user(&s, argp, sizeof s)) |
---|
| 1499 | + return -EFAULT; |
---|
| 1500 | + |
---|
| 1501 | + if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) |
---|
| 1502 | + return -EINVAL; |
---|
| 1503 | + vq->num = s.num; |
---|
| 1504 | + |
---|
| 1505 | + return 0; |
---|
| 1506 | +} |
---|
| 1507 | + |
---|
| 1508 | +static long vhost_vring_set_addr(struct vhost_dev *d, |
---|
| 1509 | + struct vhost_virtqueue *vq, |
---|
| 1510 | + void __user *argp) |
---|
| 1511 | +{ |
---|
| 1512 | + struct vhost_vring_addr a; |
---|
| 1513 | + |
---|
| 1514 | + if (copy_from_user(&a, argp, sizeof a)) |
---|
| 1515 | + return -EFAULT; |
---|
| 1516 | + if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) |
---|
| 1517 | + return -EOPNOTSUPP; |
---|
| 1518 | + |
---|
| 1519 | + /* For 32bit, verify that the top 32bits of the user |
---|
| 1520 | + data are set to zero. */ |
---|
| 1521 | + if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || |
---|
| 1522 | + (u64)(unsigned long)a.used_user_addr != a.used_user_addr || |
---|
| 1523 | + (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) |
---|
| 1524 | + return -EFAULT; |
---|
| 1525 | + |
---|
| 1526 | + /* Make sure it's safe to cast pointers to vring types. */ |
---|
| 1527 | + BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); |
---|
| 1528 | + BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); |
---|
| 1529 | + if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || |
---|
| 1530 | + (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || |
---|
| 1531 | + (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) |
---|
| 1532 | + return -EINVAL; |
---|
| 1533 | + |
---|
| 1534 | + /* We only verify access here if backend is configured. |
---|
| 1535 | + * If it is not, we don't as size might not have been setup. |
---|
| 1536 | + * We will verify when backend is configured. */ |
---|
| 1537 | + if (vq->private_data) { |
---|
| 1538 | + if (!vq_access_ok(vq, vq->num, |
---|
| 1539 | + (void __user *)(unsigned long)a.desc_user_addr, |
---|
| 1540 | + (void __user *)(unsigned long)a.avail_user_addr, |
---|
| 1541 | + (void __user *)(unsigned long)a.used_user_addr)) |
---|
| 1542 | + return -EINVAL; |
---|
| 1543 | + |
---|
| 1544 | + /* Also validate log access for used ring if enabled. */ |
---|
| 1545 | + if (!vq_log_used_access_ok(vq, vq->log_base, |
---|
| 1546 | + a.flags & (0x1 << VHOST_VRING_F_LOG), |
---|
| 1547 | + a.log_guest_addr)) |
---|
| 1548 | + return -EINVAL; |
---|
| 1549 | + } |
---|
| 1550 | + |
---|
| 1551 | + vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); |
---|
| 1552 | + vq->desc = (void __user *)(unsigned long)a.desc_user_addr; |
---|
| 1553 | + vq->avail = (void __user *)(unsigned long)a.avail_user_addr; |
---|
| 1554 | + vq->log_addr = a.log_guest_addr; |
---|
| 1555 | + vq->used = (void __user *)(unsigned long)a.used_user_addr; |
---|
| 1556 | + |
---|
| 1557 | + return 0; |
---|
| 1558 | +} |
---|
| 1559 | + |
---|
| 1560 | +static long vhost_vring_set_num_addr(struct vhost_dev *d, |
---|
| 1561 | + struct vhost_virtqueue *vq, |
---|
| 1562 | + unsigned int ioctl, |
---|
| 1563 | + void __user *argp) |
---|
| 1564 | +{ |
---|
| 1565 | + long r; |
---|
| 1566 | + |
---|
| 1567 | + mutex_lock(&vq->mutex); |
---|
| 1568 | + |
---|
| 1569 | + switch (ioctl) { |
---|
| 1570 | + case VHOST_SET_VRING_NUM: |
---|
| 1571 | + r = vhost_vring_set_num(d, vq, argp); |
---|
| 1572 | + break; |
---|
| 1573 | + case VHOST_SET_VRING_ADDR: |
---|
| 1574 | + r = vhost_vring_set_addr(d, vq, argp); |
---|
| 1575 | + break; |
---|
| 1576 | + default: |
---|
| 1577 | + BUG(); |
---|
| 1578 | + } |
---|
| 1579 | + |
---|
| 1580 | + mutex_unlock(&vq->mutex); |
---|
| 1581 | + |
---|
| 1582 | + return r; |
---|
| 1583 | +} |
---|
1421 | 1584 | long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) |
---|
1422 | 1585 | { |
---|
1423 | 1586 | struct file *eventfp, *filep = NULL; |
---|
.. | .. |
---|
1427 | 1590 | struct vhost_virtqueue *vq; |
---|
1428 | 1591 | struct vhost_vring_state s; |
---|
1429 | 1592 | struct vhost_vring_file f; |
---|
1430 | | - struct vhost_vring_addr a; |
---|
1431 | 1593 | u32 idx; |
---|
1432 | 1594 | long r; |
---|
1433 | 1595 | |
---|
.. | .. |
---|
1440 | 1602 | idx = array_index_nospec(idx, d->nvqs); |
---|
1441 | 1603 | vq = d->vqs[idx]; |
---|
1442 | 1604 | |
---|
| 1605 | + if (ioctl == VHOST_SET_VRING_NUM || |
---|
| 1606 | + ioctl == VHOST_SET_VRING_ADDR) { |
---|
| 1607 | + return vhost_vring_set_num_addr(d, vq, ioctl, argp); |
---|
| 1608 | + } |
---|
| 1609 | + |
---|
1443 | 1610 | mutex_lock(&vq->mutex); |
---|
1444 | 1611 | |
---|
1445 | 1612 | switch (ioctl) { |
---|
1446 | | - case VHOST_SET_VRING_NUM: |
---|
1447 | | - /* Resizing ring with an active backend? |
---|
1448 | | - * You don't want to do that. */ |
---|
1449 | | - if (vq->private_data) { |
---|
1450 | | - r = -EBUSY; |
---|
1451 | | - break; |
---|
1452 | | - } |
---|
1453 | | - if (copy_from_user(&s, argp, sizeof s)) { |
---|
1454 | | - r = -EFAULT; |
---|
1455 | | - break; |
---|
1456 | | - } |
---|
1457 | | - if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { |
---|
1458 | | - r = -EINVAL; |
---|
1459 | | - break; |
---|
1460 | | - } |
---|
1461 | | - vq->num = s.num; |
---|
1462 | | - break; |
---|
1463 | 1613 | case VHOST_SET_VRING_BASE: |
---|
1464 | 1614 | /* Moving base with an active backend? |
---|
1465 | 1615 | * You don't want to do that. */ |
---|
.. | .. |
---|
1471 | 1621 | r = -EFAULT; |
---|
1472 | 1622 | break; |
---|
1473 | 1623 | } |
---|
1474 | | - if (s.num > 0xffff) { |
---|
1475 | | - r = -EINVAL; |
---|
1476 | | - break; |
---|
| 1624 | + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { |
---|
| 1625 | + vq->last_avail_idx = s.num & 0xffff; |
---|
| 1626 | + vq->last_used_idx = (s.num >> 16) & 0xffff; |
---|
| 1627 | + } else { |
---|
| 1628 | + if (s.num > 0xffff) { |
---|
| 1629 | + r = -EINVAL; |
---|
| 1630 | + break; |
---|
| 1631 | + } |
---|
| 1632 | + vq->last_avail_idx = s.num; |
---|
1477 | 1633 | } |
---|
1478 | | - vq->last_avail_idx = s.num; |
---|
1479 | 1634 | /* Forget the cached index value. */ |
---|
1480 | 1635 | vq->avail_idx = vq->last_avail_idx; |
---|
1481 | 1636 | break; |
---|
1482 | 1637 | case VHOST_GET_VRING_BASE: |
---|
1483 | 1638 | s.index = idx; |
---|
1484 | | - s.num = vq->last_avail_idx; |
---|
| 1639 | + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) |
---|
| 1640 | + s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); |
---|
| 1641 | + else |
---|
| 1642 | + s.num = vq->last_avail_idx; |
---|
1485 | 1643 | if (copy_to_user(argp, &s, sizeof s)) |
---|
1486 | 1644 | r = -EFAULT; |
---|
1487 | | - break; |
---|
1488 | | - case VHOST_SET_VRING_ADDR: |
---|
1489 | | - if (copy_from_user(&a, argp, sizeof a)) { |
---|
1490 | | - r = -EFAULT; |
---|
1491 | | - break; |
---|
1492 | | - } |
---|
1493 | | - if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { |
---|
1494 | | - r = -EOPNOTSUPP; |
---|
1495 | | - break; |
---|
1496 | | - } |
---|
1497 | | - /* For 32bit, verify that the top 32bits of the user |
---|
1498 | | - data are set to zero. */ |
---|
1499 | | - if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || |
---|
1500 | | - (u64)(unsigned long)a.used_user_addr != a.used_user_addr || |
---|
1501 | | - (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { |
---|
1502 | | - r = -EFAULT; |
---|
1503 | | - break; |
---|
1504 | | - } |
---|
1505 | | - |
---|
1506 | | - /* Make sure it's safe to cast pointers to vring types. */ |
---|
1507 | | - BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); |
---|
1508 | | - BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); |
---|
1509 | | - if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || |
---|
1510 | | - (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || |
---|
1511 | | - (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) { |
---|
1512 | | - r = -EINVAL; |
---|
1513 | | - break; |
---|
1514 | | - } |
---|
1515 | | - |
---|
1516 | | - /* We only verify access here if backend is configured. |
---|
1517 | | - * If it is not, we don't as size might not have been setup. |
---|
1518 | | - * We will verify when backend is configured. */ |
---|
1519 | | - if (vq->private_data) { |
---|
1520 | | - if (!vq_access_ok(vq, vq->num, |
---|
1521 | | - (void __user *)(unsigned long)a.desc_user_addr, |
---|
1522 | | - (void __user *)(unsigned long)a.avail_user_addr, |
---|
1523 | | - (void __user *)(unsigned long)a.used_user_addr)) { |
---|
1524 | | - r = -EINVAL; |
---|
1525 | | - break; |
---|
1526 | | - } |
---|
1527 | | - |
---|
1528 | | - /* Also validate log access for used ring if enabled. */ |
---|
1529 | | - if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && |
---|
1530 | | - !log_access_ok(vq->log_base, a.log_guest_addr, |
---|
1531 | | - sizeof *vq->used + |
---|
1532 | | - vq->num * sizeof *vq->used->ring)) { |
---|
1533 | | - r = -EINVAL; |
---|
1534 | | - break; |
---|
1535 | | - } |
---|
1536 | | - } |
---|
1537 | | - |
---|
1538 | | - vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); |
---|
1539 | | - vq->desc = (void __user *)(unsigned long)a.desc_user_addr; |
---|
1540 | | - vq->avail = (void __user *)(unsigned long)a.avail_user_addr; |
---|
1541 | | - vq->log_addr = a.log_guest_addr; |
---|
1542 | | - vq->used = (void __user *)(unsigned long)a.used_user_addr; |
---|
1543 | 1645 | break; |
---|
1544 | 1646 | case VHOST_SET_VRING_KICK: |
---|
1545 | 1647 | if (copy_from_user(&f, argp, sizeof f)) { |
---|
1546 | 1648 | r = -EFAULT; |
---|
1547 | 1649 | break; |
---|
1548 | 1650 | } |
---|
1549 | | - eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); |
---|
| 1651 | + eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); |
---|
1550 | 1652 | if (IS_ERR(eventfp)) { |
---|
1551 | 1653 | r = PTR_ERR(eventfp); |
---|
1552 | 1654 | break; |
---|
.. | .. |
---|
1562 | 1664 | r = -EFAULT; |
---|
1563 | 1665 | break; |
---|
1564 | 1666 | } |
---|
1565 | | - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); |
---|
| 1667 | + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); |
---|
1566 | 1668 | if (IS_ERR(ctx)) { |
---|
1567 | 1669 | r = PTR_ERR(ctx); |
---|
1568 | 1670 | break; |
---|
1569 | 1671 | } |
---|
1570 | | - swap(ctx, vq->call_ctx); |
---|
| 1672 | + |
---|
| 1673 | + swap(ctx, vq->call_ctx.ctx); |
---|
1571 | 1674 | break; |
---|
1572 | 1675 | case VHOST_SET_VRING_ERR: |
---|
1573 | 1676 | if (copy_from_user(&f, argp, sizeof f)) { |
---|
1574 | 1677 | r = -EFAULT; |
---|
1575 | 1678 | break; |
---|
1576 | 1679 | } |
---|
1577 | | - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); |
---|
| 1680 | + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); |
---|
1578 | 1681 | if (IS_ERR(ctx)) { |
---|
1579 | 1682 | r = PTR_ERR(ctx); |
---|
1580 | 1683 | break; |
---|
.. | .. |
---|
1625 | 1728 | |
---|
1626 | 1729 | int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) |
---|
1627 | 1730 | { |
---|
1628 | | - struct vhost_umem *niotlb, *oiotlb; |
---|
| 1731 | + struct vhost_iotlb *niotlb, *oiotlb; |
---|
1629 | 1732 | int i; |
---|
1630 | 1733 | |
---|
1631 | | - niotlb = vhost_umem_alloc(); |
---|
| 1734 | + niotlb = iotlb_alloc(); |
---|
1632 | 1735 | if (!niotlb) |
---|
1633 | 1736 | return -ENOMEM; |
---|
1634 | 1737 | |
---|
.. | .. |
---|
1644 | 1747 | mutex_unlock(&vq->mutex); |
---|
1645 | 1748 | } |
---|
1646 | 1749 | |
---|
1647 | | - vhost_umem_clean(oiotlb); |
---|
| 1750 | + vhost_iotlb_free(oiotlb); |
---|
1648 | 1751 | |
---|
1649 | 1752 | return 0; |
---|
1650 | 1753 | } |
---|
.. | .. |
---|
1699 | 1802 | r = get_user(fd, (int __user *)argp); |
---|
1700 | 1803 | if (r < 0) |
---|
1701 | 1804 | break; |
---|
1702 | | - ctx = fd == -1 ? NULL : eventfd_ctx_fdget(fd); |
---|
| 1805 | + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); |
---|
1703 | 1806 | if (IS_ERR(ctx)) { |
---|
1704 | 1807 | r = PTR_ERR(ctx); |
---|
1705 | 1808 | break; |
---|
.. | .. |
---|
1724 | 1827 | |
---|
1725 | 1828 | /* TODO: This is really inefficient. We need something like get_user() |
---|
1726 | 1829 | * (instruction directly accesses the data, with an exception table entry |
---|
1727 | | - * returning -EFAULT). See Documentation/x86/exception-tables.txt. |
---|
| 1830 | + * returning -EFAULT). See Documentation/x86/exception-tables.rst. |
---|
1728 | 1831 | */ |
---|
1729 | 1832 | static int set_bit_to_user(int nr, void __user *addr) |
---|
1730 | 1833 | { |
---|
.. | .. |
---|
1734 | 1837 | int bit = nr + (log % PAGE_SIZE) * 8; |
---|
1735 | 1838 | int r; |
---|
1736 | 1839 | |
---|
1737 | | - r = get_user_pages_fast(log, 1, 1, &page); |
---|
| 1840 | + r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); |
---|
1738 | 1841 | if (r < 0) |
---|
1739 | 1842 | return r; |
---|
1740 | 1843 | BUG_ON(r != 1); |
---|
1741 | 1844 | base = kmap_atomic(page); |
---|
1742 | 1845 | set_bit(bit, base); |
---|
1743 | 1846 | kunmap_atomic(base); |
---|
1744 | | - set_page_dirty_lock(page); |
---|
1745 | | - put_page(page); |
---|
| 1847 | + unpin_user_pages_dirty_lock(&page, 1, true); |
---|
1746 | 1848 | return 0; |
---|
1747 | 1849 | } |
---|
1748 | 1850 | |
---|
.. | .. |
---|
1774 | 1876 | |
---|
1775 | 1877 | static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) |
---|
1776 | 1878 | { |
---|
1777 | | - struct vhost_umem *umem = vq->umem; |
---|
1778 | | - struct vhost_umem_node *u; |
---|
| 1879 | + struct vhost_iotlb *umem = vq->umem; |
---|
| 1880 | + struct vhost_iotlb_map *u; |
---|
1779 | 1881 | u64 start, end, l, min; |
---|
1780 | 1882 | int r; |
---|
1781 | 1883 | bool hit = false; |
---|
.. | .. |
---|
1785 | 1887 | /* More than one GPAs can be mapped into a single HVA. So |
---|
1786 | 1888 | * iterate all possible umems here to be safe. |
---|
1787 | 1889 | */ |
---|
1788 | | - list_for_each_entry(u, &umem->umem_list, link) { |
---|
1789 | | - if (u->userspace_addr > hva - 1 + len || |
---|
1790 | | - u->userspace_addr - 1 + u->size < hva) |
---|
| 1890 | + list_for_each_entry(u, &umem->list, link) { |
---|
| 1891 | + if (u->addr > hva - 1 + len || |
---|
| 1892 | + u->addr - 1 + u->size < hva) |
---|
1791 | 1893 | continue; |
---|
1792 | | - start = max(u->userspace_addr, hva); |
---|
1793 | | - end = min(u->userspace_addr - 1 + u->size, |
---|
1794 | | - hva - 1 + len); |
---|
| 1894 | + start = max(u->addr, hva); |
---|
| 1895 | + end = min(u->addr - 1 + u->size, hva - 1 + len); |
---|
1795 | 1896 | l = end - start + 1; |
---|
1796 | 1897 | r = log_write(vq->log_base, |
---|
1797 | | - u->start + start - u->userspace_addr, |
---|
| 1898 | + u->start + start - u->addr, |
---|
1798 | 1899 | l); |
---|
1799 | 1900 | if (r < 0) |
---|
1800 | 1901 | return r; |
---|
.. | .. |
---|
1814 | 1915 | |
---|
1815 | 1916 | static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) |
---|
1816 | 1917 | { |
---|
1817 | | - struct iovec iov[64]; |
---|
| 1918 | + struct iovec *iov = vq->log_iov; |
---|
1818 | 1919 | int i, ret; |
---|
1819 | 1920 | |
---|
1820 | 1921 | if (!vq->iotlb) |
---|
.. | .. |
---|
1874 | 1975 | static int vhost_update_used_flags(struct vhost_virtqueue *vq) |
---|
1875 | 1976 | { |
---|
1876 | 1977 | void __user *used; |
---|
1877 | | - if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), |
---|
1878 | | - &vq->used->flags) < 0) |
---|
| 1978 | + if (vhost_put_used_flags(vq)) |
---|
1879 | 1979 | return -EFAULT; |
---|
1880 | 1980 | if (unlikely(vq->log_used)) { |
---|
1881 | 1981 | /* Make sure the flag is seen before log. */ |
---|
.. | .. |
---|
1892 | 1992 | |
---|
1893 | 1993 | static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) |
---|
1894 | 1994 | { |
---|
1895 | | - if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), |
---|
1896 | | - vhost_avail_event(vq))) |
---|
| 1995 | + if (vhost_put_avail_event(vq)) |
---|
1897 | 1996 | return -EFAULT; |
---|
1898 | 1997 | if (unlikely(vq->log_used)) { |
---|
1899 | 1998 | void __user *used; |
---|
.. | .. |
---|
1925 | 2024 | goto err; |
---|
1926 | 2025 | vq->signalled_used_valid = false; |
---|
1927 | 2026 | if (!vq->iotlb && |
---|
1928 | | - !access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { |
---|
| 2027 | + !access_ok(&vq->used->idx, sizeof vq->used->idx)) { |
---|
1929 | 2028 | r = -EFAULT; |
---|
1930 | 2029 | goto err; |
---|
1931 | 2030 | } |
---|
1932 | | - r = vhost_get_used(vq, last_used_idx, &vq->used->idx); |
---|
| 2031 | + r = vhost_get_used_idx(vq, &last_used_idx); |
---|
1933 | 2032 | if (r) { |
---|
1934 | 2033 | vq_err(vq, "Can't access used idx at %p\n", |
---|
1935 | 2034 | &vq->used->idx); |
---|
.. | .. |
---|
1947 | 2046 | static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, |
---|
1948 | 2047 | struct iovec iov[], int iov_size, int access) |
---|
1949 | 2048 | { |
---|
1950 | | - const struct vhost_umem_node *node; |
---|
| 2049 | + const struct vhost_iotlb_map *map; |
---|
1951 | 2050 | struct vhost_dev *dev = vq->dev; |
---|
1952 | | - struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem; |
---|
| 2051 | + struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; |
---|
1953 | 2052 | struct iovec *_iov; |
---|
1954 | | - u64 s = 0; |
---|
| 2053 | + u64 s = 0, last = addr + len - 1; |
---|
1955 | 2054 | int ret = 0; |
---|
1956 | 2055 | |
---|
1957 | 2056 | while ((u64)len > s) { |
---|
.. | .. |
---|
1961 | 2060 | break; |
---|
1962 | 2061 | } |
---|
1963 | 2062 | |
---|
1964 | | - node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, |
---|
1965 | | - addr, addr + len - 1); |
---|
1966 | | - if (node == NULL || node->start > addr) { |
---|
| 2063 | + map = vhost_iotlb_itree_first(umem, addr, last); |
---|
| 2064 | + if (map == NULL || map->start > addr) { |
---|
1967 | 2065 | if (umem != dev->iotlb) { |
---|
1968 | 2066 | ret = -EFAULT; |
---|
1969 | 2067 | break; |
---|
1970 | 2068 | } |
---|
1971 | 2069 | ret = -EAGAIN; |
---|
1972 | 2070 | break; |
---|
1973 | | - } else if (!(node->perm & access)) { |
---|
| 2071 | + } else if (!(map->perm & access)) { |
---|
1974 | 2072 | ret = -EPERM; |
---|
1975 | 2073 | break; |
---|
1976 | 2074 | } |
---|
1977 | 2075 | |
---|
1978 | 2076 | _iov = iov + ret; |
---|
1979 | | - size = node->size - addr + node->start; |
---|
| 2077 | + size = map->size - addr + map->start; |
---|
1980 | 2078 | _iov->iov_len = min((u64)len - s, size); |
---|
1981 | 2079 | _iov->iov_base = (void __user *)(unsigned long) |
---|
1982 | | - (node->userspace_addr + addr - node->start); |
---|
| 2080 | + (map->addr + addr - map->start); |
---|
1983 | 2081 | s += size; |
---|
1984 | 2082 | addr += size; |
---|
1985 | 2083 | ++ret; |
---|
.. | .. |
---|
2035 | 2133 | return ret; |
---|
2036 | 2134 | } |
---|
2037 | 2135 | iov_iter_init(&from, READ, vq->indirect, ret, len); |
---|
2038 | | - |
---|
2039 | | - /* We will use the result as an address to read from, so most |
---|
2040 | | - * architectures only need a compiler barrier here. */ |
---|
2041 | | - read_barrier_depends(); |
---|
2042 | | - |
---|
2043 | 2136 | count = len / sizeof desc; |
---|
2044 | 2137 | /* Buffers are chained via a 16 bit next field, so |
---|
2045 | 2138 | * we can have at most 2^16 of these. */ |
---|
.. | .. |
---|
2128 | 2221 | last_avail_idx = vq->last_avail_idx; |
---|
2129 | 2222 | |
---|
2130 | 2223 | if (vq->avail_idx == vq->last_avail_idx) { |
---|
2131 | | - if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { |
---|
| 2224 | + if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) { |
---|
2132 | 2225 | vq_err(vq, "Failed to access avail idx at %p\n", |
---|
2133 | 2226 | &vq->avail->idx); |
---|
2134 | 2227 | return -EFAULT; |
---|
.. | .. |
---|
2155 | 2248 | |
---|
2156 | 2249 | /* Grab the next descriptor number they're advertising, and increment |
---|
2157 | 2250 | * the index we've seen. */ |
---|
2158 | | - if (unlikely(vhost_get_avail(vq, ring_head, |
---|
2159 | | - &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { |
---|
| 2251 | + if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { |
---|
2160 | 2252 | vq_err(vq, "Failed to read head: idx %d address %p\n", |
---|
2161 | 2253 | last_avail_idx, |
---|
2162 | 2254 | &vq->avail->ring[last_avail_idx % vq->num]); |
---|
.. | .. |
---|
2191 | 2283 | i, vq->num, head); |
---|
2192 | 2284 | return -EINVAL; |
---|
2193 | 2285 | } |
---|
2194 | | - ret = vhost_copy_from_user(vq, &desc, vq->desc + i, |
---|
2195 | | - sizeof desc); |
---|
| 2286 | + ret = vhost_get_desc(vq, &desc, i); |
---|
2196 | 2287 | if (unlikely(ret)) { |
---|
2197 | 2288 | vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", |
---|
2198 | 2289 | i, vq->desc + i); |
---|
.. | .. |
---|
2279 | 2370 | struct vring_used_elem *heads, |
---|
2280 | 2371 | unsigned count) |
---|
2281 | 2372 | { |
---|
2282 | | - struct vring_used_elem __user *used; |
---|
| 2373 | + vring_used_elem_t __user *used; |
---|
2283 | 2374 | u16 old, new; |
---|
2284 | 2375 | int start; |
---|
2285 | 2376 | |
---|
2286 | 2377 | start = vq->last_used_idx & (vq->num - 1); |
---|
2287 | 2378 | used = vq->used->ring + start; |
---|
2288 | | - if (count == 1) { |
---|
2289 | | - if (vhost_put_user(vq, heads[0].id, &used->id)) { |
---|
2290 | | - vq_err(vq, "Failed to write used id"); |
---|
2291 | | - return -EFAULT; |
---|
2292 | | - } |
---|
2293 | | - if (vhost_put_user(vq, heads[0].len, &used->len)) { |
---|
2294 | | - vq_err(vq, "Failed to write used len"); |
---|
2295 | | - return -EFAULT; |
---|
2296 | | - } |
---|
2297 | | - } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) { |
---|
| 2379 | + if (vhost_put_used(vq, heads, start, count)) { |
---|
2298 | 2380 | vq_err(vq, "Failed to write used"); |
---|
2299 | 2381 | return -EFAULT; |
---|
2300 | 2382 | } |
---|
.. | .. |
---|
2336 | 2418 | |
---|
2337 | 2419 | /* Make sure buffer is written before we update index. */ |
---|
2338 | 2420 | smp_wmb(); |
---|
2339 | | - if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), |
---|
2340 | | - &vq->used->idx)) { |
---|
| 2421 | + if (vhost_put_used_idx(vq)) { |
---|
2341 | 2422 | vq_err(vq, "Failed to increment used idx"); |
---|
2342 | 2423 | return -EFAULT; |
---|
2343 | 2424 | } |
---|
.. | .. |
---|
2370 | 2451 | |
---|
2371 | 2452 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
---|
2372 | 2453 | __virtio16 flags; |
---|
2373 | | - if (vhost_get_avail(vq, flags, &vq->avail->flags)) { |
---|
| 2454 | + if (vhost_get_avail_flags(vq, &flags)) { |
---|
2374 | 2455 | vq_err(vq, "Failed to get flags"); |
---|
2375 | 2456 | return true; |
---|
2376 | 2457 | } |
---|
.. | .. |
---|
2384 | 2465 | if (unlikely(!v)) |
---|
2385 | 2466 | return true; |
---|
2386 | 2467 | |
---|
2387 | | - if (vhost_get_avail(vq, event, vhost_used_event(vq))) { |
---|
| 2468 | + if (vhost_get_used_event(vq, &event)) { |
---|
2388 | 2469 | vq_err(vq, "Failed to get used event idx"); |
---|
2389 | 2470 | return true; |
---|
2390 | 2471 | } |
---|
.. | .. |
---|
2395 | 2476 | void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) |
---|
2396 | 2477 | { |
---|
2397 | 2478 | /* Signal the Guest tell them we used something up. */ |
---|
2398 | | - if (vq->call_ctx && vhost_notify(dev, vq)) |
---|
2399 | | - eventfd_signal(vq->call_ctx, 1); |
---|
| 2479 | + if (vq->call_ctx.ctx && vhost_notify(dev, vq)) |
---|
| 2480 | + eventfd_signal(vq->call_ctx.ctx, 1); |
---|
2400 | 2481 | } |
---|
2401 | 2482 | EXPORT_SYMBOL_GPL(vhost_signal); |
---|
2402 | 2483 | |
---|
.. | .. |
---|
2429 | 2510 | if (vq->avail_idx != vq->last_avail_idx) |
---|
2430 | 2511 | return false; |
---|
2431 | 2512 | |
---|
2432 | | - r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); |
---|
| 2513 | + r = vhost_get_avail_idx(vq, &avail_idx); |
---|
2433 | 2514 | if (unlikely(r)) |
---|
2434 | 2515 | return false; |
---|
2435 | 2516 | vq->avail_idx = vhost16_to_cpu(vq, avail_idx); |
---|
.. | .. |
---|
2465 | 2546 | /* They could have slipped one in as we were doing that: make |
---|
2466 | 2547 | * sure it's written, then check again. */ |
---|
2467 | 2548 | smp_mb(); |
---|
2468 | | - r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); |
---|
| 2549 | + r = vhost_get_avail_idx(vq, &avail_idx); |
---|
2469 | 2550 | if (r) { |
---|
2470 | 2551 | vq_err(vq, "Failed to check avail idx at %p: %d\n", |
---|
2471 | 2552 | &vq->avail->idx, r); |
---|
.. | .. |
---|
2487 | 2568 | if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { |
---|
2488 | 2569 | r = vhost_update_used_flags(vq); |
---|
2489 | 2570 | if (r) |
---|
2490 | | - vq_err(vq, "Failed to enable notification at %p: %d\n", |
---|
| 2571 | + vq_err(vq, "Failed to disable notification at %p: %d\n", |
---|
2491 | 2572 | &vq->used->flags, r); |
---|
2492 | 2573 | } |
---|
2493 | 2574 | } |
---|
.. | .. |
---|
2536 | 2617 | } |
---|
2537 | 2618 | EXPORT_SYMBOL_GPL(vhost_dequeue_msg); |
---|
2538 | 2619 | |
---|
| 2620 | +void vhost_set_backend_features(struct vhost_dev *dev, u64 features) |
---|
| 2621 | +{ |
---|
| 2622 | + struct vhost_virtqueue *vq; |
---|
| 2623 | + int i; |
---|
| 2624 | + |
---|
| 2625 | + mutex_lock(&dev->mutex); |
---|
| 2626 | + for (i = 0; i < dev->nvqs; ++i) { |
---|
| 2627 | + vq = dev->vqs[i]; |
---|
| 2628 | + mutex_lock(&vq->mutex); |
---|
| 2629 | + vq->acked_backend_features = features; |
---|
| 2630 | + mutex_unlock(&vq->mutex); |
---|
| 2631 | + } |
---|
| 2632 | + mutex_unlock(&dev->mutex); |
---|
| 2633 | +} |
---|
| 2634 | +EXPORT_SYMBOL_GPL(vhost_set_backend_features); |
---|
2539 | 2635 | |
---|
2540 | 2636 | static int __init vhost_init(void) |
---|
2541 | 2637 | { |
---|