hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/net/veth.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * drivers/net/veth.c
34 *
....@@ -24,6 +25,7 @@
2425 #include <linux/filter.h>
2526 #include <linux/ptr_ring.h>
2627 #include <linux/bpf_trace.h>
28
+#include <linux/net_tstamp.h>
2729
2830 #define DRV_NAME "veth"
2931 #define DRV_VERSION "1.0"
....@@ -32,13 +34,23 @@
3234 #define VETH_RING_SIZE 256
3335 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
3436
35
-/* Separating two types of XDP xmit */
36
-#define VETH_XDP_TX BIT(0)
37
-#define VETH_XDP_REDIR BIT(1)
37
+#define VETH_XDP_TX_BULK_SIZE 16
3838
39
-struct pcpu_vstats {
40
- u64 packets;
41
- u64 bytes;
39
+struct veth_stats {
40
+ u64 rx_drops;
41
+ /* xdp */
42
+ u64 xdp_packets;
43
+ u64 xdp_bytes;
44
+ u64 xdp_redirect;
45
+ u64 xdp_drops;
46
+ u64 xdp_tx;
47
+ u64 xdp_tx_err;
48
+ u64 peer_tq_xdp_xmit;
49
+ u64 peer_tq_xdp_xmit_err;
50
+};
51
+
52
+struct veth_rq_stats {
53
+ struct veth_stats vs;
4254 struct u64_stats_sync syncp;
4355 };
4456
....@@ -47,6 +59,7 @@
4759 struct net_device *dev;
4860 struct bpf_prog __rcu *xdp_prog;
4961 struct xdp_mem_info xdp_mem;
62
+ struct veth_rq_stats stats;
5063 bool rx_notify_masked;
5164 struct ptr_ring xdp_ring;
5265 struct xdp_rxq_info xdp_rxq;
....@@ -60,9 +73,40 @@
6073 unsigned int requested_headroom;
6174 };
6275
76
+struct veth_xdp_tx_bq {
77
+ struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
78
+ unsigned int count;
79
+};
80
+
6381 /*
6482 * ethtool interface
6583 */
84
+
85
+struct veth_q_stat_desc {
86
+ char desc[ETH_GSTRING_LEN];
87
+ size_t offset;
88
+};
89
+
90
+#define VETH_RQ_STAT(m) offsetof(struct veth_stats, m)
91
+
92
+static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
93
+ { "xdp_packets", VETH_RQ_STAT(xdp_packets) },
94
+ { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) },
95
+ { "drops", VETH_RQ_STAT(rx_drops) },
96
+ { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) },
97
+ { "xdp_drops", VETH_RQ_STAT(xdp_drops) },
98
+ { "xdp_tx", VETH_RQ_STAT(xdp_tx) },
99
+ { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) },
100
+};
101
+
102
+#define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc)
103
+
104
+static const struct veth_q_stat_desc veth_tq_stats_desc[] = {
105
+ { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) },
106
+ { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) },
107
+};
108
+
109
+#define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc)
66110
67111 static struct {
68112 const char string[ETH_GSTRING_LEN];
....@@ -88,9 +132,29 @@
88132
89133 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
90134 {
135
+ char *p = (char *)buf;
136
+ int i, j;
137
+
91138 switch(stringset) {
92139 case ETH_SS_STATS:
93
- memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
140
+ memcpy(p, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
141
+ p += sizeof(ethtool_stats_keys);
142
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
143
+ for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
144
+ snprintf(p, ETH_GSTRING_LEN,
145
+ "rx_queue_%u_%.18s",
146
+ i, veth_rq_stats_desc[j].desc);
147
+ p += ETH_GSTRING_LEN;
148
+ }
149
+ }
150
+ for (i = 0; i < dev->real_num_tx_queues; i++) {
151
+ for (j = 0; j < VETH_TQ_STATS_LEN; j++) {
152
+ snprintf(p, ETH_GSTRING_LEN,
153
+ "tx_queue_%u_%.18s",
154
+ i, veth_tq_stats_desc[j].desc);
155
+ p += ETH_GSTRING_LEN;
156
+ }
157
+ }
94158 break;
95159 }
96160 }
....@@ -99,7 +163,9 @@
99163 {
100164 switch (sset) {
101165 case ETH_SS_STATS:
102
- return ARRAY_SIZE(ethtool_stats_keys);
166
+ return ARRAY_SIZE(ethtool_stats_keys) +
167
+ VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
168
+ VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
103169 default:
104170 return -EOPNOTSUPP;
105171 }
....@@ -108,10 +174,47 @@
108174 static void veth_get_ethtool_stats(struct net_device *dev,
109175 struct ethtool_stats *stats, u64 *data)
110176 {
111
- struct veth_priv *priv = netdev_priv(dev);
177
+ struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
112178 struct net_device *peer = rtnl_dereference(priv->peer);
179
+ int i, j, idx;
113180
114181 data[0] = peer ? peer->ifindex : 0;
182
+ idx = 1;
183
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
184
+ const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
185
+ const void *stats_base = (void *)&rq_stats->vs;
186
+ unsigned int start;
187
+ size_t offset;
188
+
189
+ do {
190
+ start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
191
+ for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
192
+ offset = veth_rq_stats_desc[j].offset;
193
+ data[idx + j] = *(u64 *)(stats_base + offset);
194
+ }
195
+ } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
196
+ idx += VETH_RQ_STATS_LEN;
197
+ }
198
+
199
+ if (!peer)
200
+ return;
201
+
202
+ rcv_priv = netdev_priv(peer);
203
+ for (i = 0; i < peer->real_num_rx_queues; i++) {
204
+ const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats;
205
+ const void *base = (void *)&rq_stats->vs;
206
+ unsigned int start, tx_idx = idx;
207
+ size_t offset;
208
+
209
+ tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN;
210
+ do {
211
+ start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
212
+ for (j = 0; j < VETH_TQ_STATS_LEN; j++) {
213
+ offset = veth_tq_stats_desc[j].offset;
214
+ data[tx_idx + j] += *(u64 *)(base + offset);
215
+ }
216
+ } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
217
+ }
115218 }
116219
117220 static const struct ethtool_ops veth_ethtool_ops = {
....@@ -121,6 +224,7 @@
121224 .get_sset_count = veth_get_sset_count,
122225 .get_ethtool_stats = veth_get_ethtool_stats,
123226 .get_link_ksettings = veth_get_link_ksettings,
227
+ .get_ts_info = ethtool_op_get_ts_info,
124228 };
125229
126230 /* general routines */
....@@ -130,14 +234,14 @@
130234 return (unsigned long)ptr & VETH_XDP_FLAG;
131235 }
132236
133
-static void *veth_ptr_to_xdp(void *ptr)
237
+static struct xdp_frame *veth_ptr_to_xdp(void *ptr)
134238 {
135239 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
136240 }
137241
138
-static void *veth_xdp_to_ptr(void *ptr)
242
+static void *veth_xdp_to_ptr(struct xdp_frame *xdp)
139243 {
140
- return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
244
+ return (void *)((unsigned long)xdp | VETH_XDP_FLAG);
141245 }
142246
143247 static void veth_ptr_free(void *ptr)
....@@ -181,6 +285,7 @@
181285 {
182286 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
183287 struct veth_rq *rq = NULL;
288
+ int ret = NETDEV_TX_OK;
184289 struct net_device *rcv;
185290 int length = skb->len;
186291 bool rcv_xdp = false;
....@@ -188,7 +293,7 @@
188293
189294 rcu_read_lock();
190295 rcv = rcu_dereference(priv->peer);
191
- if (unlikely(!rcv)) {
296
+ if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) {
192297 kfree_skb(skb);
193298 goto drop;
194299 }
....@@ -200,16 +305,14 @@
200305 rcv_xdp = rcu_access_pointer(rq->xdp_prog);
201306 }
202307
308
+ skb_tx_timestamp(skb);
203309 if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
204
- struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
205
-
206
- u64_stats_update_begin(&stats->syncp);
207
- stats->bytes += length;
208
- stats->packets++;
209
- u64_stats_update_end(&stats->syncp);
310
+ if (!rcv_xdp)
311
+ dev_lstats_add(dev, length);
210312 } else {
211313 drop:
212314 atomic64_inc(&priv->dropped);
315
+ ret = NET_XMIT_DROP;
213316 }
214317
215318 if (rcv_xdp)
....@@ -217,30 +320,46 @@
217320
218321 rcu_read_unlock();
219322
220
- return NETDEV_TX_OK;
323
+ return ret;
221324 }
222325
223
-static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
326
+static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
224327 {
225328 struct veth_priv *priv = netdev_priv(dev);
226
- int cpu;
227329
228
- result->packets = 0;
229
- result->bytes = 0;
230
- for_each_possible_cpu(cpu) {
231
- struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
232
- u64 packets, bytes;
330
+ dev_lstats_read(dev, packets, bytes);
331
+ return atomic64_read(&priv->dropped);
332
+}
333
+
334
+static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
335
+{
336
+ struct veth_priv *priv = netdev_priv(dev);
337
+ int i;
338
+
339
+ result->peer_tq_xdp_xmit_err = 0;
340
+ result->xdp_packets = 0;
341
+ result->xdp_tx_err = 0;
342
+ result->xdp_bytes = 0;
343
+ result->rx_drops = 0;
344
+ for (i = 0; i < dev->num_rx_queues; i++) {
345
+ u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err;
346
+ struct veth_rq_stats *stats = &priv->rq[i].stats;
233347 unsigned int start;
234348
235349 do {
236350 start = u64_stats_fetch_begin_irq(&stats->syncp);
237
- packets = stats->packets;
238
- bytes = stats->bytes;
351
+ peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err;
352
+ xdp_tx_err = stats->vs.xdp_tx_err;
353
+ packets = stats->vs.xdp_packets;
354
+ bytes = stats->vs.xdp_bytes;
355
+ drops = stats->vs.rx_drops;
239356 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
240
- result->packets += packets;
241
- result->bytes += bytes;
357
+ result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err;
358
+ result->xdp_tx_err += xdp_tx_err;
359
+ result->xdp_packets += packets;
360
+ result->xdp_bytes += bytes;
361
+ result->rx_drops += drops;
242362 }
243
- return atomic64_read(&priv->dropped);
244363 }
245364
246365 static void veth_get_stats64(struct net_device *dev,
....@@ -248,18 +367,31 @@
248367 {
249368 struct veth_priv *priv = netdev_priv(dev);
250369 struct net_device *peer;
251
- struct pcpu_vstats one;
370
+ struct veth_stats rx;
371
+ u64 packets, bytes;
252372
253
- tot->tx_dropped = veth_stats_one(&one, dev);
254
- tot->tx_bytes = one.bytes;
255
- tot->tx_packets = one.packets;
373
+ tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
374
+ tot->tx_bytes = bytes;
375
+ tot->tx_packets = packets;
376
+
377
+ veth_stats_rx(&rx, dev);
378
+ tot->tx_dropped += rx.xdp_tx_err;
379
+ tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
380
+ tot->rx_bytes = rx.xdp_bytes;
381
+ tot->rx_packets = rx.xdp_packets;
256382
257383 rcu_read_lock();
258384 peer = rcu_dereference(priv->peer);
259385 if (peer) {
260
- tot->rx_dropped = veth_stats_one(&one, peer);
261
- tot->rx_bytes = one.bytes;
262
- tot->rx_packets = one.packets;
386
+ veth_stats_tx(peer, &packets, &bytes);
387
+ tot->rx_bytes += bytes;
388
+ tot->rx_packets += packets;
389
+
390
+ veth_stats_rx(&rx, peer);
391
+ tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
392
+ tot->rx_dropped += rx.xdp_tx_err;
393
+ tot->tx_bytes += rx.xdp_bytes;
394
+ tot->tx_packets += rx.xdp_packets;
263395 }
264396 rcu_read_unlock();
265397 }
....@@ -274,10 +406,6 @@
274406 {
275407 struct sk_buff *skb;
276408
277
- if (!buflen) {
278
- buflen = SKB_DATA_ALIGN(headroom + len) +
279
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
280
- }
281409 skb = build_skb(head, buflen);
282410 if (!skb)
283411 return NULL;
....@@ -293,21 +421,31 @@
293421 return smp_processor_id() % dev->real_num_rx_queues;
294422 }
295423
424
+static struct net_device *veth_peer_dev(struct net_device *dev)
425
+{
426
+ struct veth_priv *priv = netdev_priv(dev);
427
+
428
+ /* Callers must be under RCU read side. */
429
+ return rcu_dereference(priv->peer);
430
+}
431
+
296432 static int veth_xdp_xmit(struct net_device *dev, int n,
297
- struct xdp_frame **frames, u32 flags)
433
+ struct xdp_frame **frames,
434
+ u32 flags, bool ndo_xmit)
298435 {
299436 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
437
+ int i, ret = -ENXIO, drops = 0;
300438 struct net_device *rcv;
301439 unsigned int max_len;
302440 struct veth_rq *rq;
303
- int i, drops = 0;
304441
305442 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
306443 return -EINVAL;
307444
445
+ rcu_read_lock();
308446 rcv = rcu_dereference(priv->peer);
309447 if (unlikely(!rcv))
310
- return -ENXIO;
448
+ goto out;
311449
312450 rcv_priv = netdev_priv(rcv);
313451 rq = &rcv_priv->rq[veth_select_rxq(rcv)];
....@@ -316,7 +454,7 @@
316454 * device is up.
317455 */
318456 if (!rcu_access_pointer(rq->xdp_prog))
319
- return -ENXIO;
457
+ goto out;
320458
321459 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
322460
....@@ -336,44 +474,99 @@
336474 if (flags & XDP_XMIT_FLUSH)
337475 __veth_xdp_flush(rq);
338476
339
- return n - drops;
477
+ ret = n - drops;
478
+ if (ndo_xmit) {
479
+ u64_stats_update_begin(&rq->stats.syncp);
480
+ rq->stats.vs.peer_tq_xdp_xmit += n - drops;
481
+ rq->stats.vs.peer_tq_xdp_xmit_err += drops;
482
+ u64_stats_update_end(&rq->stats.syncp);
483
+ }
484
+
485
+out:
486
+ rcu_read_unlock();
487
+
488
+ return ret;
340489 }
341490
342
-static void veth_xdp_flush(struct net_device *dev)
491
+static int veth_ndo_xdp_xmit(struct net_device *dev, int n,
492
+ struct xdp_frame **frames, u32 flags)
343493 {
344
- struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
494
+ int err;
495
+
496
+ err = veth_xdp_xmit(dev, n, frames, flags, true);
497
+ if (err < 0) {
498
+ struct veth_priv *priv = netdev_priv(dev);
499
+
500
+ atomic64_add(n, &priv->dropped);
501
+ }
502
+
503
+ return err;
504
+}
505
+
506
+static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
507
+{
508
+ int sent, i, err = 0;
509
+
510
+ sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false);
511
+ if (sent < 0) {
512
+ err = sent;
513
+ sent = 0;
514
+ for (i = 0; i < bq->count; i++)
515
+ xdp_return_frame(bq->q[i]);
516
+ }
517
+ trace_xdp_bulk_tx(rq->dev, sent, bq->count - sent, err);
518
+
519
+ u64_stats_update_begin(&rq->stats.syncp);
520
+ rq->stats.vs.xdp_tx += sent;
521
+ rq->stats.vs.xdp_tx_err += bq->count - sent;
522
+ u64_stats_update_end(&rq->stats.syncp);
523
+
524
+ bq->count = 0;
525
+}
526
+
527
+static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
528
+{
529
+ struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev);
345530 struct net_device *rcv;
346
- struct veth_rq *rq;
531
+ struct veth_rq *rcv_rq;
347532
348533 rcu_read_lock();
534
+ veth_xdp_flush_bq(rq, bq);
349535 rcv = rcu_dereference(priv->peer);
350536 if (unlikely(!rcv))
351537 goto out;
352538
353539 rcv_priv = netdev_priv(rcv);
354
- rq = &rcv_priv->rq[veth_select_rxq(rcv)];
540
+ rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)];
355541 /* xdp_ring is initialized on receive side? */
356
- if (unlikely(!rcu_access_pointer(rq->xdp_prog)))
542
+ if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog)))
357543 goto out;
358544
359
- __veth_xdp_flush(rq);
545
+ __veth_xdp_flush(rcv_rq);
360546 out:
361547 rcu_read_unlock();
362548 }
363549
364
-static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
550
+static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp,
551
+ struct veth_xdp_tx_bq *bq)
365552 {
366
- struct xdp_frame *frame = convert_to_xdp_frame(xdp);
553
+ struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
367554
368555 if (unlikely(!frame))
369556 return -EOVERFLOW;
370557
371
- return veth_xdp_xmit(dev, 1, &frame, 0);
558
+ if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
559
+ veth_xdp_flush_bq(rq, bq);
560
+
561
+ bq->q[bq->count++] = frame;
562
+
563
+ return 0;
372564 }
373565
374566 static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
375567 struct xdp_frame *frame,
376
- unsigned int *xdp_xmit)
568
+ struct veth_xdp_tx_bq *bq,
569
+ struct veth_stats *stats)
377570 {
378571 void *hard_start = frame->data - frame->headroom;
379572 int len = frame->len, delta = 0;
....@@ -391,10 +584,7 @@
391584 struct xdp_buff xdp;
392585 u32 act;
393586
394
- xdp.data_hard_start = hard_start;
395
- xdp.data = frame->data;
396
- xdp.data_end = frame->data + frame->len;
397
- xdp.data_meta = frame->data - frame->metasize;
587
+ xdp_convert_frame_to_buff(frame, &xdp);
398588 xdp.rxq = &rq->xdp_rxq;
399589
400590 act = bpf_prog_run_xdp(xdp_prog, &xdp);
....@@ -407,12 +597,13 @@
407597 case XDP_TX:
408598 orig_frame = *frame;
409599 xdp.rxq->mem = frame->mem;
410
- if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
600
+ if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
411601 trace_xdp_exception(rq->dev, xdp_prog, act);
412602 frame = &orig_frame;
603
+ stats->rx_drops++;
413604 goto err_xdp;
414605 }
415
- *xdp_xmit |= VETH_XDP_TX;
606
+ stats->xdp_tx++;
416607 rcu_read_unlock();
417608 goto xdp_xmit;
418609 case XDP_REDIRECT:
....@@ -420,28 +611,34 @@
420611 xdp.rxq->mem = frame->mem;
421612 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
422613 frame = &orig_frame;
614
+ stats->rx_drops++;
423615 goto err_xdp;
424616 }
425
- *xdp_xmit |= VETH_XDP_REDIR;
617
+ stats->xdp_redirect++;
426618 rcu_read_unlock();
427619 goto xdp_xmit;
428620 default:
429621 bpf_warn_invalid_xdp_action(act);
622
+ fallthrough;
430623 case XDP_ABORTED:
431624 trace_xdp_exception(rq->dev, xdp_prog, act);
625
+ fallthrough;
432626 case XDP_DROP:
627
+ stats->xdp_drops++;
433628 goto err_xdp;
434629 }
435630 }
436631 rcu_read_unlock();
437632
438633 headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
439
- skb = veth_build_skb(hard_start, headroom, len, 0);
634
+ skb = veth_build_skb(hard_start, headroom, len, frame->frame_sz);
440635 if (!skb) {
441636 xdp_return_frame(frame);
637
+ stats->rx_drops++;
442638 goto err;
443639 }
444640
641
+ xdp_release_frame(frame);
445642 xdp_scrub_frame(frame);
446643 skb->protocol = eth_type_trans(skb, rq->dev);
447644 err:
....@@ -453,8 +650,10 @@
453650 return NULL;
454651 }
455652
456
-static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
457
- unsigned int *xdp_xmit)
653
+static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
654
+ struct sk_buff *skb,
655
+ struct veth_xdp_tx_bq *bq,
656
+ struct veth_stats *stats)
458657 {
459658 u32 pktlen, headroom, act, metalen;
460659 void *orig_data, *orig_data_end;
....@@ -498,9 +697,8 @@
498697 goto drop;
499698 }
500699
501
- nskb = veth_build_skb(head,
502
- VETH_XDP_HEADROOM + mac_len, skb->len,
503
- PAGE_SIZE);
700
+ nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len,
701
+ skb->len, PAGE_SIZE);
504702 if (!nskb) {
505703 page_frag_free(head);
506704 goto drop;
....@@ -518,6 +716,11 @@
518716 xdp.data_end = xdp.data + pktlen;
519717 xdp.data_meta = xdp.data;
520718 xdp.rxq = &rq->xdp_rxq;
719
+
720
+ /* SKB "head" area always have tailroom for skb_shared_info */
721
+ xdp.frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start;
722
+ xdp.frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
723
+
521724 orig_data = xdp.data;
522725 orig_data_end = xdp.data_end;
523726
....@@ -530,31 +733,38 @@
530733 get_page(virt_to_page(xdp.data));
531734 consume_skb(skb);
532735 xdp.rxq->mem = rq->xdp_mem;
533
- if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
736
+ if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
534737 trace_xdp_exception(rq->dev, xdp_prog, act);
738
+ stats->rx_drops++;
535739 goto err_xdp;
536740 }
537
- *xdp_xmit |= VETH_XDP_TX;
741
+ stats->xdp_tx++;
538742 rcu_read_unlock();
539743 goto xdp_xmit;
540744 case XDP_REDIRECT:
541745 get_page(virt_to_page(xdp.data));
542746 consume_skb(skb);
543747 xdp.rxq->mem = rq->xdp_mem;
544
- if (xdp_do_redirect(rq->dev, &xdp, xdp_prog))
748
+ if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
749
+ stats->rx_drops++;
545750 goto err_xdp;
546
- *xdp_xmit |= VETH_XDP_REDIR;
751
+ }
752
+ stats->xdp_redirect++;
547753 rcu_read_unlock();
548754 goto xdp_xmit;
549755 default:
550756 bpf_warn_invalid_xdp_action(act);
757
+ fallthrough;
551758 case XDP_ABORTED:
552759 trace_xdp_exception(rq->dev, xdp_prog, act);
760
+ fallthrough;
553761 case XDP_DROP:
554
- goto drop;
762
+ stats->xdp_drops++;
763
+ goto xdp_drop;
555764 }
556765 rcu_read_unlock();
557766
767
+ /* check if bpf_xdp_adjust_head was used */
558768 delta = orig_data - xdp.data;
559769 off = mac_len + delta;
560770 if (off > 0)
....@@ -562,9 +772,11 @@
562772 else if (off < 0)
563773 __skb_pull(skb, -off);
564774 skb->mac_header -= delta;
775
+
776
+ /* check if bpf_xdp_adjust_tail was used */
565777 off = xdp.data_end - orig_data_end;
566778 if (off != 0)
567
- __skb_put(skb, off);
779
+ __skb_put(skb, off); /* positive on grow, negative on shrink */
568780 skb->protocol = eth_type_trans(skb, rq->dev);
569781
570782 metalen = xdp.data - xdp.data_meta;
....@@ -573,6 +785,8 @@
573785 out:
574786 return skb;
575787 drop:
788
+ stats->rx_drops++;
789
+xdp_drop:
576790 rcu_read_unlock();
577791 kfree_skb(skb);
578792 return NULL;
....@@ -583,7 +797,9 @@
583797 return NULL;
584798 }
585799
586
-static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
800
+static int veth_xdp_rcv(struct veth_rq *rq, int budget,
801
+ struct veth_xdp_tx_bq *bq,
802
+ struct veth_stats *stats)
587803 {
588804 int i, done = 0;
589805
....@@ -595,10 +811,14 @@
595811 break;
596812
597813 if (veth_is_xdp_frame(ptr)) {
598
- skb = veth_xdp_rcv_one(rq, veth_ptr_to_xdp(ptr),
599
- xdp_xmit);
814
+ struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
815
+
816
+ stats->xdp_bytes += frame->len;
817
+ skb = veth_xdp_rcv_one(rq, frame, bq, stats);
600818 } else {
601
- skb = veth_xdp_rcv_skb(rq, ptr, xdp_xmit);
819
+ skb = ptr;
820
+ stats->xdp_bytes += skb->len;
821
+ skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
602822 }
603823
604824 if (skb)
....@@ -607,6 +827,14 @@
607827 done++;
608828 }
609829
830
+ u64_stats_update_begin(&rq->stats.syncp);
831
+ rq->stats.vs.xdp_redirect += stats->xdp_redirect;
832
+ rq->stats.vs.xdp_bytes += stats->xdp_bytes;
833
+ rq->stats.vs.xdp_drops += stats->xdp_drops;
834
+ rq->stats.vs.rx_drops += stats->rx_drops;
835
+ rq->stats.vs.xdp_packets += done;
836
+ u64_stats_update_end(&rq->stats.syncp);
837
+
610838 return done;
611839 }
612840
....@@ -614,11 +842,17 @@
614842 {
615843 struct veth_rq *rq =
616844 container_of(napi, struct veth_rq, xdp_napi);
617
- unsigned int xdp_xmit = 0;
845
+ struct veth_stats stats = {};
846
+ struct veth_xdp_tx_bq bq;
618847 int done;
619848
849
+ bq.count = 0;
850
+
620851 xdp_set_return_frame_no_direct();
621
- done = veth_xdp_rcv(rq, budget, &xdp_xmit);
852
+ done = veth_xdp_rcv(rq, budget, &bq, &stats);
853
+
854
+ if (stats.xdp_redirect > 0)
855
+ xdp_do_flush();
622856
623857 if (done < budget && napi_complete_done(napi, done)) {
624858 /* Write rx_notify_masked before reading ptr_ring */
....@@ -631,10 +865,8 @@
631865 }
632866 }
633867
634
- if (xdp_xmit & VETH_XDP_TX)
635
- veth_xdp_flush(rq->dev);
636
- if (xdp_xmit & VETH_XDP_REDIR)
637
- xdp_do_flush_map();
868
+ if (stats.xdp_tx > 0)
869
+ veth_xdp_flush(rq, &bq);
638870 xdp_clear_return_frame_no_direct();
639871
640872 return done;
....@@ -677,14 +909,13 @@
677909 struct veth_rq *rq = &priv->rq[i];
678910
679911 napi_disable(&rq->xdp_napi);
680
- napi_hash_del(&rq->xdp_napi);
912
+ __netif_napi_del(&rq->xdp_napi);
681913 }
682914 synchronize_net();
683915
684916 for (i = 0; i < dev->real_num_rx_queues; i++) {
685917 struct veth_rq *rq = &priv->rq[i];
686918
687
- netif_napi_del(&rq->xdp_napi);
688919 rq->rx_notify_masked = false;
689920 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
690921 }
....@@ -799,8 +1030,10 @@
7991030 if (!priv->rq)
8001031 return -ENOMEM;
8011032
802
- for (i = 0; i < dev->num_rx_queues; i++)
1033
+ for (i = 0; i < dev->num_rx_queues; i++) {
8031034 priv->rq[i].dev = dev;
1035
+ u64_stats_init(&priv->rq[i].stats.syncp);
1036
+ }
8041037
8051038 return 0;
8061039 }
....@@ -816,13 +1049,13 @@
8161049 {
8171050 int err;
8181051
819
- dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
820
- if (!dev->vstats)
1052
+ dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
1053
+ if (!dev->lstats)
8211054 return -ENOMEM;
8221055
8231056 err = veth_alloc_queues(dev);
8241057 if (err) {
825
- free_percpu(dev->vstats);
1058
+ free_percpu(dev->lstats);
8261059 return err;
8271060 }
8281061
....@@ -832,7 +1065,7 @@
8321065 static void veth_dev_free(struct net_device *dev)
8331066 {
8341067 veth_free_queues(dev);
835
- free_percpu(dev->vstats);
1068
+ free_percpu(dev->lstats);
8361069 }
8371070
8381071 #ifdef CONFIG_NET_POLL_CONTROLLER
....@@ -976,26 +1209,11 @@
9761209 return err;
9771210 }
9781211
979
-static u32 veth_xdp_query(struct net_device *dev)
980
-{
981
- struct veth_priv *priv = netdev_priv(dev);
982
- const struct bpf_prog *xdp_prog;
983
-
984
- xdp_prog = priv->_xdp_prog;
985
- if (xdp_prog)
986
- return xdp_prog->aux->id;
987
-
988
- return 0;
989
-}
990
-
9911212 static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
9921213 {
9931214 switch (xdp->command) {
9941215 case XDP_SETUP_PROG:
9951216 return veth_xdp_set(dev, xdp->prog, xdp->extack);
996
- case XDP_QUERY_PROG:
997
- xdp->prog_id = veth_xdp_query(dev);
998
- return 0;
9991217 default:
10001218 return -EINVAL;
10011219 }
....@@ -1017,7 +1235,8 @@
10171235 .ndo_features_check = passthru_features_check,
10181236 .ndo_set_rx_headroom = veth_set_rx_headroom,
10191237 .ndo_bpf = veth_xdp,
1020
- .ndo_xdp_xmit = veth_xdp_xmit,
1238
+ .ndo_xdp_xmit = veth_ndo_xdp_xmit,
1239
+ .ndo_get_peer_dev = veth_peer_dev,
10211240 };
10221241
10231242 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
....@@ -1096,10 +1315,7 @@
10961315
10971316 nla_peer = data[VETH_INFO_PEER];
10981317 ifmp = nla_data(nla_peer);
1099
- err = rtnl_nla_parse_ifla(peer_tb,
1100
- nla_data(nla_peer) + sizeof(struct ifinfomsg),
1101
- nla_len(nla_peer) - sizeof(struct ifinfomsg),
1102
- NULL);
1318
+ err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
11031319 if (err < 0)
11041320 return err;
11051321
....@@ -1126,7 +1342,7 @@
11261342 return PTR_ERR(net);
11271343
11281344 peer = rtnl_create_link(net, ifname, name_assign_type,
1129
- &veth_link_ops, tbp);
1345
+ &veth_link_ops, tbp, extack);
11301346 if (IS_ERR(peer)) {
11311347 put_net(net);
11321348 return PTR_ERR(peer);