From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/drivers/net/virtio_net.c | 454 +++++++++++++++++++++++++++++++++++---------------------
1 files changed, 283 insertions(+), 171 deletions(-)
diff --git a/kernel/drivers/net/virtio_net.c b/kernel/drivers/net/virtio_net.c
index 1a8fe5b..61d83d5 100644
--- a/kernel/drivers/net/virtio_net.c
+++ b/kernel/drivers/net/virtio_net.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* A network driver using virtio.
*
* Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
//#define DEBUG
#include <linux/netdevice.h>
@@ -31,7 +19,6 @@
#include <linux/average.h>
#include <linux/filter.h>
#include <linux/kernel.h>
-#include <linux/pci.h>
#include <net/route.h>
#include <net/xdp.h>
#include <net/net_failover.h>
@@ -39,7 +26,7 @@
static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);
-static bool csum = true, gso = true, napi_tx;
+static bool csum = true, gso = true, napi_tx = true;
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);
module_param(napi_tx, bool, 0644);
@@ -75,6 +62,11 @@
VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_GUEST_CSUM
};
+
+#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
+ (1ULL << VIRTIO_NET_F_GUEST_UFO))
struct virtnet_stat_desc {
char desc[ETH_GSTRING_LEN];
@@ -203,6 +195,9 @@
/* # of XDP queue pairs currently used by the driver */
u16 xdp_queue_pairs;
+ /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
+ bool xdp_enabled;
+
/* I like... big packets and I cannot lie! */
bool big_packets;
@@ -218,8 +213,14 @@
/* Packet virtio header size */
u8 hdr_len;
- /* Work struct for refilling if we run low on memory. */
+ /* Work struct for delayed refilling if we run low on memory. */
struct delayed_work refill;
+
+ /* Is delayed refill enabled? */
+ bool refill_enabled;
+
+ /* The lock to synchronize the access to refill_enabled */
+ spinlock_t refill_lock;
/* Work struct for config space updates */
struct work_struct config_work;
@@ -238,6 +239,7 @@
u32 speed;
unsigned long guest_offloads;
+ unsigned long guest_offloads_capable;
/* failover when STANDBY feature enabled */
struct failover *failover;
@@ -321,6 +323,20 @@
} else
p = alloc_page(gfp_mask);
return p;
+}
+
+static void enable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = true;
+ spin_unlock_bh(&vi->refill_lock);
+}
+
+static void disable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = false;
+ spin_unlock_bh(&vi->refill_lock);
}
static void virtqueue_napi_schedule(struct napi_struct *napi,
@@ -492,12 +508,41 @@
return 0;
}
-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
-{
- unsigned int qp;
+/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
+ * the current cpu, so it does not need to be locked.
+ *
+ * Here we use marco instead of inline functions because we have to deal with
+ * three issues at the same time: 1. the choice of sq. 2. judge and execute the
+ * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
+ * functions to perfectly solve these three problems at the same time.
+ */
+#define virtnet_xdp_get_sq(vi) ({ \
+ struct netdev_queue *txq; \
+ typeof(vi) v = (vi); \
+ unsigned int qp; \
+ \
+ if (v->curr_queue_pairs > nr_cpu_ids) { \
+ qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
+ qp += smp_processor_id(); \
+ txq = netdev_get_tx_queue(v->dev, qp); \
+ __netif_tx_acquire(txq); \
+ } else { \
+ qp = smp_processor_id() % v->curr_queue_pairs; \
+ txq = netdev_get_tx_queue(v->dev, qp); \
+ __netif_tx_lock(txq, raw_smp_processor_id()); \
+ } \
+ v->sq + qp; \
+})
- qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
- return &vi->sq[qp];
+#define virtnet_xdp_put_sq(vi, q) { \
+ struct netdev_queue *txq; \
+ typeof(vi) v = (vi); \
+ \
+ txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
+ if (v->curr_queue_pairs > nr_cpu_ids) \
+ __netif_tx_release(txq); \
+ else \
+ __netif_tx_unlock(txq); \
}
static int virtnet_xdp_xmit(struct net_device *dev,
@@ -519,11 +564,11 @@
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
*/
- xdp_prog = rcu_dereference(rq->xdp_prog);
+ xdp_prog = rcu_access_pointer(rq->xdp_prog);
if (!xdp_prog)
return -ENXIO;
- sq = virtnet_xdp_sq(vi);
+ sq = virtnet_xdp_get_sq(vi);
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
ret = -EINVAL;
@@ -571,12 +616,13 @@
sq->stats.kicks += kicks;
u64_stats_update_end(&sq->stats.syncp);
+ virtnet_xdp_put_sq(vi, sq);
return ret;
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
- return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
+ return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
}
/* We copy the packet for XDP in the following cases:
@@ -600,8 +646,13 @@
int page_off,
unsigned int *len)
{
- struct page *page = alloc_page(GFP_ATOMIC);
+ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct page *page;
+ if (page_off + *len + tailroom > PAGE_SIZE)
+ return NULL;
+
+ page = alloc_page(GFP_ATOMIC);
if (!page)
return NULL;
@@ -609,7 +660,6 @@
page_off += *len;
while (--*num_buf) {
- int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
unsigned int buflen;
void *buf;
int off;
@@ -667,6 +717,12 @@
len -= vi->hdr_len;
stats->bytes += len;
+ if (unlikely(len > GOOD_PACKET_LEN)) {
+ pr_debug("%s: rx error: len %u exceeds max size %d\n",
+ dev->name, len, GOOD_PACKET_LEN);
+ dev->stats.rx_length_errors++;
+ goto err_len;
+ }
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
@@ -705,6 +761,7 @@
xdp.data_end = xdp.data + len;
xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
+ xdp.frame_sz = buflen;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;
@@ -718,7 +775,7 @@
break;
case XDP_TX:
stats->xdp_tx++;
- xdpf = convert_to_xdp_frame(&xdp);
+ xdpf = xdp_convert_buff_to_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
@@ -739,7 +796,7 @@
goto xdp_xmit;
default:
bpf_warn_invalid_xdp_action(act);
- /* fall through */
+ fallthrough;
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
case XDP_DROP:
@@ -755,10 +812,10 @@
}
skb_reserve(skb, headroom - delta);
skb_put(skb, len);
- if (!delta) {
+ if (!xdp_prog) {
buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
- } /* keep zeroed vnet hdr since packet was changed by bpf */
+ } /* keep zeroed vnet hdr since XDP is loaded */
if (metasize)
skb_metadata_set(skb, metasize);
@@ -769,6 +826,7 @@
err_xdp:
rcu_read_unlock();
stats->xdp_drops++;
+err_len:
stats->drops++;
put_page(page);
xdp_xmit:
@@ -813,14 +871,21 @@
int offset = buf - page_address(page);
struct sk_buff *head_skb, *curr_skb;
struct bpf_prog *xdp_prog;
- unsigned int truesize;
+ unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
- int err;
unsigned int metasize = 0;
+ unsigned int frame_sz;
+ int err;
head_skb = NULL;
stats->bytes += len - vi->hdr_len;
+ if (unlikely(len > truesize)) {
+ pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+ dev->name, len, (unsigned long)ctx);
+ dev->stats.rx_length_errors++;
+ goto err_skb;
+ }
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
@@ -837,6 +902,11 @@
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
+ /* Buffers with headroom use PAGE_SIZE as alloc size,
+ * see add_recvbuf_mergeable() + get_mergeable_buf_len()
+ */
+ frame_sz = headroom ? PAGE_SIZE : truesize;
+
/* This happens when rx buffer size is underestimated
* or headroom is not enough because of the buffer
* was refilled before XDP is set. This should only
@@ -850,6 +920,8 @@
page, offset,
VIRTIO_XDP_HEADROOM,
&len);
+ frame_sz = PAGE_SIZE;
+
if (!xdp_page)
goto err_xdp;
offset = VIRTIO_XDP_HEADROOM;
@@ -866,6 +938,7 @@
xdp.data_end = xdp.data + (len - vi->hdr_len);
xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
+ xdp.frame_sz = frame_sz - vi->hdr_len;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;
@@ -898,9 +971,12 @@
break;
case XDP_TX:
stats->xdp_tx++;
- xdpf = convert_to_xdp_frame(&xdp);
- if (unlikely(!xdpf))
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ if (unlikely(!xdpf)) {
+ if (unlikely(xdp_page != page))
+ put_page(xdp_page);
goto err_xdp;
+ }
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
if (unlikely(err < 0)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
@@ -928,10 +1004,10 @@
goto xdp_xmit;
default:
bpf_warn_invalid_xdp_action(act);
- /* fall through */
+ fallthrough;
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
- /* fall through */
+ fallthrough;
case XDP_DROP:
if (unlikely(xdp_page != page))
__free_pages(xdp_page, 0);
@@ -939,14 +1015,6 @@
}
}
rcu_read_unlock();
-
- truesize = mergeable_ctx_to_truesize(ctx);
- if (unlikely(len > truesize)) {
- pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
- dev->name, len, (unsigned long)ctx);
- dev->stats.rx_length_errors++;
- goto err_skb;
- }
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
metasize);
@@ -1081,6 +1149,7 @@
goto frame_err;
}
+ skb_record_rx_queue(skb, vq2rxq(rq->vq));
skb->protocol = eth_type_trans(skb, dev);
pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
ntohs(skb->protocol), skb->len, skb->pkt_type);
@@ -1360,9 +1429,13 @@
}
}
- if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
- schedule_delayed_work(&vi->refill, 0);
+ if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
+ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
+ spin_lock(&vi->refill_lock);
+ if (vi->refill_enabled)
+ schedule_delayed_work(&vi->refill, 0);
+ spin_unlock(&vi->refill_lock);
+ }
}
u64_stats_update_begin(&rq->stats.syncp);
@@ -1456,20 +1529,21 @@
received = virtnet_receive(rq, budget, &xdp_xmit);
+ if (xdp_xmit & VIRTIO_XDP_REDIR)
+ xdp_do_flush();
+
/* Out of packets? */
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);
- if (xdp_xmit & VIRTIO_XDP_REDIR)
- xdp_do_flush_map();
-
if (xdp_xmit & VIRTIO_XDP_TX) {
- sq = virtnet_xdp_sq(vi);
+ sq = virtnet_xdp_get_sq(vi);
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
u64_stats_update_begin(&sq->stats.syncp);
sq->stats.kicks++;
u64_stats_update_end(&sq->stats.syncp);
}
+ virtnet_xdp_put_sq(vi, sq);
}
return received;
@@ -1479,6 +1553,8 @@
{
struct virtnet_info *vi = netdev_priv(dev);
int i, err;
+
+ enable_delayed_refill(vi);
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
@@ -1604,7 +1680,7 @@
struct send_queue *sq = &vi->sq[qnum];
int err;
struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
- bool kick = !skb->xmit_more;
+ bool kick = !netdev_xmit_more();
bool use_napi = sq->napi.weight;
/* Free up any pending old buffers before queueing new ones. */
@@ -1624,7 +1700,8 @@
dev->stats.tx_fifo_errors++;
if (net_ratelimit())
dev_warn(&dev->dev,
- "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
+ "Unexpected TXQ (%d) queue failure: %d\n",
+ qnum, err);
dev->stats.tx_dropped++;
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
@@ -1633,7 +1710,7 @@
/* Don't wait up for transmitted skbs to be freed. */
if (!use_napi) {
skb_orphan(skb);
- nf_reset(skb);
+ nf_reset_ct(skb);
}
/* If running out of space, stop queue to avoid getting packets that we
@@ -1849,12 +1926,14 @@
struct virtnet_info *vi = netdev_priv(dev);
int i;
+ /* Make sure NAPI doesn't schedule refill work */
+ disable_delayed_refill(vi);
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
for (i = 0; i < vi->max_queue_pairs; i++) {
- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
napi_disable(&vi->rq[i].napi);
+ xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
virtnet_napi_tx_disable(&vi->sq[i].napi);
}
@@ -1961,7 +2040,7 @@
return 0;
}
-static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
+static void virtnet_clean_affinity(struct virtnet_info *vi)
{
int i;
@@ -1985,7 +2064,7 @@
int stride;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
- virtnet_clean_affinity(vi, -1);
+ virtnet_clean_affinity(vi);
return;
}
@@ -2035,7 +2114,7 @@
struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
node);
- virtnet_clean_affinity(vi, cpu);
+ virtnet_clean_affinity(vi);
return 0;
}
@@ -2216,48 +2295,13 @@
channels->other_count = 0;
}
-/* Check if the user is trying to change anything besides speed/duplex */
-static bool
-virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
-{
- struct ethtool_link_ksettings diff1 = *cmd;
- struct ethtool_link_ksettings diff2 = {};
-
- /* cmd is always set so we need to clear it, validate the port type
- * and also without autonegotiation we can ignore advertising
- */
- diff1.base.speed = 0;
- diff2.base.port = PORT_OTHER;
- ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
- diff1.base.duplex = 0;
- diff1.base.cmd = 0;
- diff1.base.link_mode_masks_nwords = 0;
-
- return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
- bitmap_empty(diff1.link_modes.supported,
- __ETHTOOL_LINK_MODE_MASK_NBITS) &&
- bitmap_empty(diff1.link_modes.advertising,
- __ETHTOOL_LINK_MODE_MASK_NBITS) &&
- bitmap_empty(diff1.link_modes.lp_advertising,
- __ETHTOOL_LINK_MODE_MASK_NBITS);
-}
-
static int virtnet_set_link_ksettings(struct net_device *dev,
const struct ethtool_link_ksettings *cmd)
{
struct virtnet_info *vi = netdev_priv(dev);
- u32 speed;
- speed = cmd->base.speed;
- /* don't allow custom speed and duplex */
- if (!ethtool_validate_speed(speed) ||
- !ethtool_validate_duplex(cmd->base.duplex) ||
- !virtnet_validate_ethtool_cmd(cmd))
- return -EINVAL;
- vi->speed = speed;
- vi->duplex = cmd->base.duplex;
-
- return 0;
+ return ethtool_virtdev_set_link_ksettings(dev, cmd,
+ &vi->speed, &vi->duplex);
}
static int virtnet_get_link_ksettings(struct net_device *dev,
@@ -2268,6 +2312,44 @@
cmd->base.speed = vi->speed;
cmd->base.duplex = vi->duplex;
cmd->base.port = PORT_OTHER;
+
+ return 0;
+}
+
+static int virtnet_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i, napi_weight;
+
+ if (ec->tx_max_coalesced_frames > 1 ||
+ ec->rx_max_coalesced_frames != 1)
+ return -EINVAL;
+
+ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
+ if (napi_weight ^ vi->sq[0].napi.weight) {
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ vi->sq[i].napi.weight = napi_weight;
+ }
+
+ return 0;
+}
+
+static int virtnet_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ struct ethtool_coalesce ec_default = {
+ .cmd = ETHTOOL_GCOALESCE,
+ .rx_max_coalesced_frames = 1,
+ };
+ struct virtnet_info *vi = netdev_priv(dev);
+
+ memcpy(ec, &ec_default, sizeof(ec_default));
+
+ if (vi->sq[0].napi.weight)
+ ec->tx_max_coalesced_frames = 1;
return 0;
}
@@ -2288,17 +2370,19 @@
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
return;
- speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config,
- speed));
+ virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);
+
if (ethtool_validate_speed(speed))
vi->speed = speed;
- duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config,
- duplex));
+
+ virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);
+
if (ethtool_validate_duplex(duplex))
vi->duplex = duplex;
}
static const struct ethtool_ops virtnet_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
.get_drvinfo = virtnet_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ringparam = virtnet_get_ringparam,
@@ -2310,12 +2394,13 @@
.get_ts_info = ethtool_op_get_ts_info,
.get_link_ksettings = virtnet_get_link_ksettings,
.set_link_ksettings = virtnet_set_link_ksettings,
+ .set_coalesce = virtnet_set_coalesce,
+ .get_coalesce = virtnet_get_coalesce,
};
static void virtnet_freeze_down(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int i;
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
@@ -2323,14 +2408,8 @@
netif_tx_lock_bh(vi->dev);
netif_device_detach(vi->dev);
netif_tx_unlock_bh(vi->dev);
- cancel_delayed_work_sync(&vi->refill);
-
- if (netif_running(vi->dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- napi_disable(&vi->rq[i].napi);
- virtnet_napi_tx_disable(&vi->sq[i].napi);
- }
- }
+ if (netif_running(vi->dev))
+ virtnet_close(vi->dev);
}
static int init_vqs(struct virtnet_info *vi);
@@ -2338,7 +2417,7 @@
static int virtnet_restore_up(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int err, i;
+ int err;
err = init_vqs(vi);
if (err)
@@ -2346,16 +2425,12 @@
virtio_device_ready(vdev);
- if (netif_running(vi->dev)) {
- for (i = 0; i < vi->curr_queue_pairs; i++)
- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
- schedule_delayed_work(&vi->refill, 0);
+ enable_delayed_refill(vi);
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
- &vi->sq[i].napi);
- }
+ if (netif_running(vi->dev)) {
+ err = virtnet_open(vi->dev);
+ if (err)
+ return err;
}
netif_tx_lock_bh(vi->dev);
@@ -2373,7 +2448,7 @@
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
- dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
+ dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
return -EINVAL;
}
@@ -2415,7 +2490,7 @@
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
+ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
return -EOPNOTSUPP;
}
@@ -2436,21 +2511,17 @@
/* XDP requires extra queues for XDP_TX */
if (curr_qp + xdp_qp > vi->max_queue_pairs) {
- NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
- netdev_warn(dev, "request %i queues but max is %i\n",
+ netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
curr_qp + xdp_qp, vi->max_queue_pairs);
- return -ENOMEM;
+ xdp_qp = 0;
}
old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
if (!prog && !old_prog)
return 0;
- if (prog) {
- prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
- }
+ if (prog)
+ bpf_prog_add(prog, vi->max_queue_pairs - 1);
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
@@ -2476,11 +2547,14 @@
vi->xdp_queue_pairs = xdp_qp;
if (prog) {
+ vi->xdp_enabled = true;
for (i = 0; i < vi->max_queue_pairs; i++) {
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
if (i == 0 && !old_prog)
virtnet_clear_guest_offloads(vi);
}
+ } else {
+ vi->xdp_enabled = false;
}
for (i = 0; i < vi->max_queue_pairs; i++) {
@@ -2514,28 +2588,11 @@
return err;
}
-static u32 virtnet_xdp_query(struct net_device *dev)
-{
- struct virtnet_info *vi = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
- int i;
-
- for (i = 0; i < vi->max_queue_pairs; i++) {
- xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog);
- if (xdp_prog)
- return xdp_prog->aux->id;
- }
- return 0;
-}
-
static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = virtnet_xdp_query(dev);
- return 0;
default:
return -EINVAL;
}
@@ -2557,6 +2614,35 @@
return 0;
}
+static int virtnet_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ u64 offloads;
+ int err;
+
+ if (!vi->has_cvq)
+ return 0;
+
+ if ((dev->features ^ features) & NETIF_F_GRO_HW) {
+ if (vi->xdp_enabled)
+ return -EBUSY;
+
+ if (features & NETIF_F_GRO_HW)
+ offloads = vi->guest_offloads_capable;
+ else
+ offloads = vi->guest_offloads_capable &
+ ~GUEST_OFFLOAD_GRO_HW_MASK;
+
+ err = virtnet_set_guest_offloads(vi, offloads);
+ if (err)
+ return err;
+ vi->guest_offloads = offloads;
+ }
+
+ return 0;
+}
+
static const struct net_device_ops virtnet_netdev = {
.ndo_open = virtnet_open,
.ndo_stop = virtnet_close,
@@ -2571,6 +2657,7 @@
.ndo_xdp_xmit = virtnet_xdp_xmit,
.ndo_features_check = passthru_features_check,
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
+ .ndo_set_features = virtnet_set_features,
};
static void virtnet_config_changed_work(struct work_struct *work)
@@ -2618,12 +2705,11 @@
int i;
for (i = 0; i < vi->max_queue_pairs; i++) {
- napi_hash_del(&vi->rq[i].napi);
- netif_napi_del(&vi->rq[i].napi);
- netif_napi_del(&vi->sq[i].napi);
+ __netif_napi_del(&vi->rq[i].napi);
+ __netif_napi_del(&vi->sq[i].napi);
}
- /* We called napi_hash_del() before netif_napi_del(),
+ /* We called __netif_napi_del(),
* we need to respect an RCU grace period before freeing vi->rq
*/
synchronize_net();
@@ -2664,6 +2750,27 @@
put_page(vi->rq[i].alloc_frag.page);
}
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+ if (!is_xdp_frame(buf))
+ dev_kfree_skb(buf);
+ else
+ xdp_return_frame(ptr_to_xdp(buf));
+}
+
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+ struct virtnet_info *vi = vq->vdev->priv;
+ int i = vq2rxq(vq);
+
+ if (vi->mergeable_rx_bufs)
+ put_page(virt_to_head_page(buf));
+ else if (vi->big_packets)
+ give_pages(&vi->rq[i], buf);
+ else
+ put_page(virt_to_head_page(buf));
+}
+
static void free_unused_bufs(struct virtnet_info *vi)
{
void *buf;
@@ -2671,26 +2778,16 @@
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->sq[i].vq;
- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
- if (!is_xdp_frame(buf))
- dev_kfree_skb(buf);
- else
- xdp_return_frame(ptr_to_xdp(buf));
- }
+ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+ virtnet_sq_free_unused_buf(vq, buf);
+ cond_resched();
}
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->rq[i].vq;
-
- while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
- if (vi->mergeable_rx_bufs) {
- put_page(virt_to_head_page(buf));
- } else if (vi->big_packets) {
- give_pages(&vi->rq[i], buf);
- } else {
- put_page(virt_to_head_page(buf));
- }
- }
+ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+ virtnet_rq_free_unused_buf(vq, buf);
+ cond_resched();
}
}
@@ -2698,7 +2795,7 @@
{
struct virtio_device *vdev = vi->vdev;
- virtnet_clean_affinity(vi, -1);
+ virtnet_clean_affinity(vi);
vdev->config->del_vqs(vdev);
@@ -3019,6 +3116,11 @@
}
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
dev->features |= NETIF_F_RXCSUM;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+ virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
+ dev->features |= NETIF_F_GRO_HW;
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
+ dev->hw_features |= NETIF_F_GRO_HW;
dev->vlan_features = dev->features;
@@ -3041,6 +3143,7 @@
vdev->priv = vi;
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+ spin_lock_init(&vi->refill_lock);
/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -3073,8 +3176,10 @@
/* Should never trigger: MTU was previously validated
* in virtnet_validate.
*/
- dev_err(&vdev->dev, "device MTU appears to have changed "
- "it is now %d < %d", mtu, dev->min_mtu);
+ dev_err(&vdev->dev,
+ "device MTU appears to have changed it is now %d < %d",
+ mtu, dev->min_mtu);
+ err = -EINVAL;
goto free;
}
@@ -3118,21 +3223,27 @@
}
}
- err = register_netdev(dev);
+ /* serialize netdev register + virtio_device_ready() with ndo_open() */
+ rtnl_lock();
+
+ err = register_netdevice(dev);
if (err) {
pr_debug("virtio_net: registering device failed\n");
+ rtnl_unlock();
goto free_failover;
}
virtio_device_ready(vdev);
+
+ _virtnet_set_queues(vi, vi->curr_queue_pairs);
+
+ rtnl_unlock();
err = virtnet_cpu_notif_add(vi);
if (err) {
pr_debug("virtio_net: registering cpu notifier failed\n");
goto free_unregister_netdev;
}
-
- virtnet_set_queues(vi, vi->curr_queue_pairs);
/* Assume link up if device can't report link status,
otherwise get link status from config. */
@@ -3148,6 +3259,7 @@
for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
if (virtio_has_feature(vi->vdev, guest_offloads[i]))
set_bit(guest_offloads[i], &vi->guest_offloads);
+ vi->guest_offloads_capable = vi->guest_offloads;
pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
dev->name, max_queue_pairs);
--
Gitblit v1.6.2