From bedbef8ad3e75a304af6361af235302bcc61d06b Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 14 May 2024 06:39:01 +0000 Subject: [PATCH] 修改内核路径 --- kernel/drivers/vhost/net.c | 487 +++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 353 insertions(+), 134 deletions(-) diff --git a/kernel/drivers/vhost/net.c b/kernel/drivers/vhost/net.c index 0c7bbc9..b9c8e40 100644 --- a/kernel/drivers/vhost/net.c +++ b/kernel/drivers/vhost/net.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2009 Red Hat, Inc. * Author: Michael S. Tsirkin <mst@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. * * virtio-net server in host kernel. */ @@ -74,7 +73,7 @@ VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM) + (1ULL << VIRTIO_F_ACCESS_PLATFORM) }; enum { @@ -116,6 +115,8 @@ * For RX, number of batched heads */ int done_idx; + /* Number of XDP frames batched */ + int batched_xdp; /* an array of userspace buffers info */ struct ubuf_info *ubuf_info; /* Reference counting for outstanding ubufs. @@ -123,6 +124,8 @@ struct vhost_net_ubuf_ref *ubufs; struct ptr_ring *rx_ring; struct vhost_net_buf rxq; + /* Batched XDP buffs */ + struct xdp_buff *xdp; }; struct vhost_net { @@ -137,6 +140,10 @@ unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; + /* Private page frag */ + struct page_frag page_frag; + /* Refcount bias of page frag */ + int refcnt_bias; }; static unsigned vhost_net_zcopy_mask __read_mostly; @@ -338,6 +345,11 @@ sock_flag(sock->sk, SOCK_ZEROCOPY); } +static bool vhost_sock_xdp(struct socket *sock) +{ + return sock_flag(sock->sk, SOCK_XDP); +} + /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -412,7 +424,7 @@ struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) return; vhost_poll_stop(poll); } @@ -425,7 +437,7 @@ struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; - sock = vq->private_data; + sock = vhost_vq_get_backend(vq); if (!sock) return 0; @@ -444,32 +456,138 @@ nvq->done_idx = 0; } -static int vhost_net_tx_get_vq_desc(struct vhost_net *net, - struct vhost_net_virtqueue *nvq, - unsigned int *out_num, unsigned int *in_num, - bool *busyloop_intr) +static void vhost_tx_batch(struct vhost_net *net, + struct vhost_net_virtqueue *nvq, + struct socket *sock, + struct msghdr *msghdr) { - struct vhost_virtqueue *vq = &nvq->vq; - unsigned long uninitialized_var(endtime); - int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + struct tun_msg_ctl ctl = { + .type = TUN_MSG_PTR, + .num = nvq->batched_xdp, + .ptr = nvq->xdp, + }; + int i, err; + + if (nvq->batched_xdp == 0) + goto signal_used; + + msghdr->msg_control = &ctl; + msghdr->msg_controllen = sizeof(ctl); + err = sock->ops->sendmsg(sock, msghdr, 0); + if (unlikely(err < 0)) { + vq_err(&nvq->vq, "Fail to batch sending packets\n"); + + /* free pages owned by XDP; since this is an unlikely error path, + * keep it simple and avoid more complex bulk update for the + * used pages + */ + for (i = 0; i < nvq->batched_xdp; ++i) + put_page(virt_to_head_page(nvq->xdp[i].data)); + nvq->batched_xdp = 0; + nvq->done_idx = 0; + return; + } + +signal_used: + vhost_net_signal_used(nvq); + nvq->batched_xdp = 0; +} + +static int sock_has_rx_data(struct socket *sock) +{ + if (unlikely(!sock)) + return 0; + + if (sock->ops->peek_len) + return sock->ops->peek_len(sock); + + return skb_queue_empty(&sock->sk->sk_receive_queue); +} + +static void vhost_net_busy_poll_try_queue(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + if (!vhost_vq_avail_empty(&net->dev, vq)) { + vhost_poll_queue(&vq->poll); + } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); + vhost_poll_queue(&vq->poll); + } +} + +static void vhost_net_busy_poll(struct vhost_net *net, + struct vhost_virtqueue *rvq, + struct vhost_virtqueue *tvq, + bool *busyloop_intr, + bool poll_rx) +{ + unsigned long busyloop_timeout; + unsigned long endtime; + struct socket *sock; + struct vhost_virtqueue *vq = poll_rx ? tvq : rvq; + + /* Try to hold the vq mutex of the paired virtqueue. We can't + * use mutex_lock() here since we could not guarantee a + * consistenet lock ordering. + */ + if (!mutex_trylock(&vq->mutex)) + return; + + vhost_disable_notify(&net->dev, vq); + sock = vhost_vq_get_backend(rvq); + + busyloop_timeout = poll_rx ? rvq->busyloop_timeout: + tvq->busyloop_timeout; + + preempt_disable(); + endtime = busy_clock() + busyloop_timeout; + + while (vhost_can_busy_poll(endtime)) { + if (vhost_has_work(&net->dev)) { + *busyloop_intr = true; + break; + } + + if ((sock_has_rx_data(sock) && + !vhost_vq_avail_empty(&net->dev, rvq)) || + !vhost_vq_avail_empty(&net->dev, tvq)) + break; + + cpu_relax(); + } + + preempt_enable(); + + if (poll_rx || sock_has_rx_data(sock)) + vhost_net_busy_poll_try_queue(net, vq); + else if (!poll_rx) /* On tx here, sock has no rx data. */ + vhost_enable_notify(&net->dev, rvq); + + mutex_unlock(&vq->mutex); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_net_virtqueue *tnvq, + unsigned int *out_num, unsigned int *in_num, + struct msghdr *msghdr, bool *busyloop_intr) +{ + struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; + struct vhost_virtqueue *rvq = &rnvq->vq; + struct vhost_virtqueue *tvq = &tnvq->vq; + + int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); - if (r == vq->num && vq->busyloop_timeout) { - if (!vhost_sock_zcopy(vq->private_data)) - vhost_net_signal_used(nvq); - preempt_disable(); - endtime = busy_clock() + vq->busyloop_timeout; - while (vhost_can_busy_poll(endtime)) { - if (vhost_has_work(vq->dev)) { - *busyloop_intr = true; - break; - } - if (!vhost_vq_avail_empty(vq->dev, vq)) - break; - cpu_relax(); - } - preempt_enable(); - r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + if (r == tvq->num && tvq->busyloop_timeout) { + /* Flush batched packets first */ + if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq))) + vhost_tx_batch(net, tnvq, + vhost_vq_get_backend(tvq), + msghdr); + + vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false); + + r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); } @@ -506,7 +624,7 @@ struct vhost_virtqueue *vq = &nvq->vq; int ret; - ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, busyloop_intr); + ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr); if (ret < 0 || ret == vq->num) return ret; @@ -534,6 +652,121 @@ !vhost_vq_avail_empty(vq->dev, vq); } +#define SKB_FRAG_PAGE_ORDER get_order(32768) + +static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, + struct page_frag *pfrag, gfp_t gfp) +{ + if (pfrag->page) { + if (pfrag->offset + sz <= pfrag->size) + return true; + __page_frag_cache_drain(pfrag->page, net->refcnt_bias); + } + + pfrag->offset = 0; + net->refcnt_bias = 0; + if (SKB_FRAG_PAGE_ORDER) { + /* Avoid direct reclaim but allow kswapd to wake */ + pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | + __GFP_COMP | __GFP_NOWARN | + __GFP_NORETRY, + SKB_FRAG_PAGE_ORDER); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; + goto done; + } + } + pfrag->page = alloc_page(gfp); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE; + goto done; + } + return false; + +done: + net->refcnt_bias = USHRT_MAX; + page_ref_add(pfrag->page, USHRT_MAX - 1); + return true; +} + +#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) + +static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, + struct iov_iter *from) +{ + struct vhost_virtqueue *vq = &nvq->vq; + struct vhost_net *net = container_of(vq->dev, struct vhost_net, + dev); + struct socket *sock = vhost_vq_get_backend(vq); + struct page_frag *alloc_frag = &net->page_frag; + struct virtio_net_hdr *gso; + struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; + struct tun_xdp_hdr *hdr; + size_t len = iov_iter_count(from); + int headroom = vhost_sock_xdp(sock) ? XDP_PACKET_HEADROOM : 0; + int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + int pad = SKB_DATA_ALIGN(VHOST_NET_RX_PAD + headroom + nvq->sock_hlen); + int sock_hlen = nvq->sock_hlen; + void *buf; + int copied; + + if (unlikely(len < nvq->sock_hlen)) + return -EFAULT; + + if (SKB_DATA_ALIGN(len + pad) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) + return -ENOSPC; + + buflen += SKB_DATA_ALIGN(len + pad); + alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); + if (unlikely(!vhost_net_page_frag_refill(net, buflen, + alloc_frag, GFP_KERNEL))) + return -ENOMEM; + + buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + copied = copy_page_from_iter(alloc_frag->page, + alloc_frag->offset + + offsetof(struct tun_xdp_hdr, gso), + sock_hlen, from); + if (copied != sock_hlen) + return -EFAULT; + + hdr = buf; + gso = &hdr->gso; + + if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + vhost16_to_cpu(vq, gso->csum_start) + + vhost16_to_cpu(vq, gso->csum_offset) + 2 > + vhost16_to_cpu(vq, gso->hdr_len)) { + gso->hdr_len = cpu_to_vhost16(vq, + vhost16_to_cpu(vq, gso->csum_start) + + vhost16_to_cpu(vq, gso->csum_offset) + 2); + + if (vhost16_to_cpu(vq, gso->hdr_len) > len) + return -EINVAL; + } + + len -= sock_hlen; + copied = copy_page_from_iter(alloc_frag->page, + alloc_frag->offset + pad, + len, from); + if (copied != len) + return -EFAULT; + + xdp->data_hard_start = buf; + xdp->data = buf + pad; + xdp->data_end = xdp->data + len; + hdr->buflen = buflen; + xdp->frame_sz = buflen; + + --net->refcnt_bias; + alloc_frag->offset += buflen; + + ++nvq->batched_xdp; + + return 0; +} + static void handle_tx_copy(struct vhost_net *net, struct socket *sock) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; @@ -550,9 +783,13 @@ size_t len, total_len = 0; int err; int sent_pkts = 0; + bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); do { bool busyloop_intr = false; + + if (nvq->done_idx == VHOST_NET_BATCH) + vhost_tx_batch(net, nvq, sock, &msg); head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, &busyloop_intr); @@ -571,14 +808,34 @@ break; } - vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); - vq->heads[nvq->done_idx].len = 0; - total_len += len; - if (tx_can_batch(vq, total_len)) - msg.msg_flags |= MSG_MORE; - else - msg.msg_flags &= ~MSG_MORE; + + /* For simplicity, TX batching is only enabled if + * sndbuf is unlimited. + */ + if (sock_can_batch) { + err = vhost_net_build_xdp(nvq, &msg.msg_iter); + if (!err) { + goto done; + } else if (unlikely(err != -ENOSPC)) { + vhost_tx_batch(net, nvq, sock, &msg); + vhost_discard_vq_desc(vq, 1); + vhost_net_enable_vq(net, vq); + break; + } + + /* We can't build XDP buff, go for single + * packet path but let's flush batched + * packets. + */ + vhost_tx_batch(net, nvq, sock, &msg); + msg.msg_control = NULL; + } else { + if (tx_can_batch(vq, total_len)) + msg.msg_flags |= MSG_MORE; + else + msg.msg_flags &= ~MSG_MORE; + } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(sock, &msg, len); @@ -590,11 +847,13 @@ if (err != len) pr_debug("Truncated TX packet: len %d != %zd\n", err, len); - if (++nvq->done_idx >= VHOST_NET_BATCH) - vhost_net_signal_used(nvq); +done: + vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); + vq->heads[nvq->done_idx].len = 0; + ++nvq->done_idx; } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); - vhost_net_signal_used(nvq); + vhost_tx_batch(net, nvq, sock, &msg); } static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) @@ -610,9 +869,10 @@ .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; + struct tun_msg_ctl ctl; size_t len, total_len = 0; int err; - struct vhost_net_ubuf_ref *uninitialized_var(ubufs); + struct vhost_net_ubuf_ref *ubufs; struct ubuf_info *ubuf; bool zcopy_used; int sent_pkts = 0; @@ -653,8 +913,10 @@ ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; refcount_set(&ubuf->refcnt, 1); - msg.msg_control = ubuf; - msg.msg_controllen = sizeof(ubuf); + msg.msg_control = &ctl; + ctl.type = TUN_MSG_UBUF; + ctl.ptr = ubuf; + msg.msg_controllen = sizeof(ctl); ubufs = nvq->ubufs; atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; @@ -702,12 +964,12 @@ struct vhost_virtqueue *vq = &nvq->vq; struct socket *sock; - mutex_lock(&vq->mutex); - sock = vq->private_data; + mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); + sock = vhost_vq_get_backend(vq); if (!sock) goto out; - if (!vq_iotlb_prefetch(vq)) + if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&net->dev, vq); @@ -743,16 +1005,6 @@ return len; } -static int sk_has_rx_data(struct sock *sk) -{ - struct socket *sock = sk->sk_socket; - - if (sock->ops->peek_len) - return sock->ops->peek_len(sock); - - return skb_queue_empty(&sk->sk_receive_queue); -} - static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, bool *busyloop_intr) { @@ -760,41 +1012,13 @@ struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *rvq = &rnvq->vq; struct vhost_virtqueue *tvq = &tnvq->vq; - unsigned long uninitialized_var(endtime); int len = peek_head_len(rnvq, sk); - if (!len && tvq->busyloop_timeout) { + if (!len && rvq->busyloop_timeout) { /* Flush batched heads first */ vhost_net_signal_used(rnvq); /* Both tx vq and rx socket were polled here */ - mutex_lock_nested(&tvq->mutex, 1); - vhost_disable_notify(&net->dev, tvq); - - preempt_disable(); - endtime = busy_clock() + tvq->busyloop_timeout; - - while (vhost_can_busy_poll(endtime)) { - if (vhost_has_work(&net->dev)) { - *busyloop_intr = true; - break; - } - if ((sk_has_rx_data(sk) && - !vhost_vq_avail_empty(&net->dev, rvq)) || - !vhost_vq_avail_empty(&net->dev, tvq)) - break; - cpu_relax(); - } - - preempt_enable(); - - if (!vhost_vq_avail_empty(&net->dev, tvq)) { - vhost_poll_queue(&tvq->poll); - } else if (unlikely(vhost_enable_notify(&net->dev, tvq))) { - vhost_disable_notify(&net->dev, tvq); - vhost_poll_queue(&tvq->poll); - } - - mutex_unlock(&tvq->mutex); + vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); len = peek_head_len(rnvq, sk); } @@ -828,7 +1052,7 @@ /* len is always initialized before use since we are always called with * datalen > 0. */ - u32 uninitialized_var(len); + u32 len; while (datalen > 0 && headcount < quota) { if (unlikely(seg >= UIO_MAXIOV)) { @@ -885,7 +1109,7 @@ { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *vq = &nvq->vq; - unsigned uninitialized_var(in), log; + unsigned in, log; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, @@ -909,12 +1133,12 @@ __virtio16 num_buffers; int recv_pkts = 0; - mutex_lock_nested(&vq->mutex, 0); - sock = vq->private_data; + mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); + sock = vhost_vq_get_backend(vq); if (!sock) goto out; - if (!vq_iotlb_prefetch(vq)) + if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&net->dev, vq); @@ -1065,6 +1289,7 @@ struct vhost_dev *dev; struct vhost_virtqueue **vqs; void **queue; + struct xdp_buff *xdp; int i; n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); @@ -1085,6 +1310,15 @@ } n->vqs[VHOST_NET_VQ_RX].rxq.queue = queue; + xdp = kmalloc_array(VHOST_NET_BATCH, sizeof(*xdp), GFP_KERNEL); + if (!xdp) { + kfree(vqs); + kvfree(n); + kfree(queue); + return -ENOMEM; + } + n->vqs[VHOST_NET_VQ_TX].xdp = xdp; + dev = &n->dev; vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; @@ -1095,6 +1329,7 @@ n->vqs[i].ubuf_info = NULL; n->vqs[i].upend_idx = 0; n->vqs[i].done_idx = 0; + n->vqs[i].batched_xdp = 0; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; n->vqs[i].rx_ring = NULL; @@ -1102,12 +1337,15 @@ } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, - VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT); + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, + NULL); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); f->private_data = n; + n->page_frag.page = NULL; + n->refcnt_bias = 0; return 0; } @@ -1120,9 +1358,9 @@ container_of(vq, struct vhost_net_virtqueue, vq); mutex_lock(&vq->mutex); - sock = vq->private_data; + sock = vhost_vq_get_backend(vq); vhost_net_disable_vq(n, vq); - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); vhost_net_buf_unproduce(nvq); nvq->rx_ring = NULL; mutex_unlock(&vq->mutex); @@ -1175,12 +1413,15 @@ if (rx_sock) sockfd_put(rx_sock); /* Make sure no callbacks are outstanding */ - synchronize_rcu_bh(); + synchronize_rcu(); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); + kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); + if (n->page_frag.page) + __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); kvfree(n); return 0; } @@ -1209,13 +1450,9 @@ return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1224,7 +1461,6 @@ goto out; ring = NULL; out: - fput(file); return ring; } @@ -1281,6 +1517,9 @@ nvq = &n->vqs[index]; mutex_lock(&vq->mutex); + if (fd == -1) + vhost_clear_msg(&n->dev); + /* Verify that ring has been setup correctly. */ if (!vhost_vq_access_ok(vq)) { r = -EFAULT; @@ -1293,7 +1532,7 @@ } /* start polling new socket */ - oldsock = vq->private_data; + oldsock = vhost_vq_get_backend(vq); if (sock != oldsock) { ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); @@ -1303,7 +1542,7 @@ } vhost_net_disable_vq(n, vq); - vq->private_data = sock; + vhost_vq_set_backend(vq, sock); vhost_net_buf_unproduce(nvq); r = vhost_vq_init_access(vq); if (r) @@ -1311,8 +1550,12 @@ r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; @@ -1340,7 +1583,7 @@ return 0; err_used: - vq->private_data = oldsock; + vhost_vq_set_backend(vq, oldsock); vhost_net_enable_vq(n, vq); if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); @@ -1359,7 +1602,7 @@ struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; - struct vhost_umem *umem; + struct vhost_iotlb *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); @@ -1382,21 +1625,6 @@ if (rx_sock) sockfd_put(rx_sock); return err; -} - -static int vhost_net_set_backend_features(struct vhost_net *n, u64 features) -{ - int i; - - mutex_lock(&n->dev.mutex); - for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { - mutex_lock(&n->vqs[i].vq.mutex); - n->vqs[i].vq.acked_backend_features = features; - mutex_unlock(&n->vqs[i].vq.mutex); - } - mutex_unlock(&n->dev.mutex); - - return 0; } static int vhost_net_set_features(struct vhost_net *n, u64 features) @@ -1422,7 +1650,7 @@ !vhost_log_access_ok(&n->dev)) goto out_unlock; - if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) { + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { if (vhost_init_device_iotlb(&n->dev, true)) goto out_unlock; } @@ -1499,7 +1727,8 @@ return -EFAULT; if (features & ~VHOST_NET_BACKEND_FEATURES) return -EOPNOTSUPP; - return vhost_net_set_backend_features(n, features); + vhost_set_backend_features(&n->dev, features); + return 0; case VHOST_RESET_OWNER: return vhost_net_reset_owner(n); case VHOST_SET_OWNER: @@ -1515,14 +1744,6 @@ return r; } } - -#ifdef CONFIG_COMPAT -static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, - unsigned long arg) -{ - return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); -} -#endif static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) { @@ -1559,9 +1780,7 @@ .write_iter = vhost_net_chr_write_iter, .poll = vhost_net_chr_poll, .unlocked_ioctl = vhost_net_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = vhost_net_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, .open = vhost_net_open, .llseek = noop_llseek, }; -- Gitblit v1.6.2