forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/drivers/net/veth.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * drivers/net/veth.c
34 *
....@@ -24,6 +25,7 @@
2425 #include <linux/filter.h>
2526 #include <linux/ptr_ring.h>
2627 #include <linux/bpf_trace.h>
28
+#include <linux/net_tstamp.h>
2729
2830 #define DRV_NAME "veth"
2931 #define DRV_VERSION "1.0"
....@@ -32,13 +34,23 @@
3234 #define VETH_RING_SIZE 256
3335 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
3436
35
-/* Separating two types of XDP xmit */
36
-#define VETH_XDP_TX BIT(0)
37
-#define VETH_XDP_REDIR BIT(1)
37
+#define VETH_XDP_TX_BULK_SIZE 16
3838
39
-struct pcpu_vstats {
40
- u64 packets;
41
- u64 bytes;
39
+struct veth_stats {
40
+ u64 rx_drops;
41
+ /* xdp */
42
+ u64 xdp_packets;
43
+ u64 xdp_bytes;
44
+ u64 xdp_redirect;
45
+ u64 xdp_drops;
46
+ u64 xdp_tx;
47
+ u64 xdp_tx_err;
48
+ u64 peer_tq_xdp_xmit;
49
+ u64 peer_tq_xdp_xmit_err;
50
+};
51
+
52
+struct veth_rq_stats {
53
+ struct veth_stats vs;
4254 struct u64_stats_sync syncp;
4355 };
4456
....@@ -47,6 +59,7 @@
4759 struct net_device *dev;
4860 struct bpf_prog __rcu *xdp_prog;
4961 struct xdp_mem_info xdp_mem;
62
+ struct veth_rq_stats stats;
5063 bool rx_notify_masked;
5164 struct ptr_ring xdp_ring;
5265 struct xdp_rxq_info xdp_rxq;
....@@ -60,9 +73,40 @@
6073 unsigned int requested_headroom;
6174 };
6275
76
+struct veth_xdp_tx_bq {
77
+ struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
78
+ unsigned int count;
79
+};
80
+
6381 /*
6482 * ethtool interface
6583 */
84
+
85
+struct veth_q_stat_desc {
86
+ char desc[ETH_GSTRING_LEN];
87
+ size_t offset;
88
+};
89
+
90
+#define VETH_RQ_STAT(m) offsetof(struct veth_stats, m)
91
+
92
+static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
93
+ { "xdp_packets", VETH_RQ_STAT(xdp_packets) },
94
+ { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) },
95
+ { "drops", VETH_RQ_STAT(rx_drops) },
96
+ { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) },
97
+ { "xdp_drops", VETH_RQ_STAT(xdp_drops) },
98
+ { "xdp_tx", VETH_RQ_STAT(xdp_tx) },
99
+ { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) },
100
+};
101
+
102
+#define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc)
103
+
104
+static const struct veth_q_stat_desc veth_tq_stats_desc[] = {
105
+ { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) },
106
+ { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) },
107
+};
108
+
109
+#define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc)
66110
67111 static struct {
68112 const char string[ETH_GSTRING_LEN];
....@@ -88,9 +132,29 @@
88132
89133 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
90134 {
135
+ char *p = (char *)buf;
136
+ int i, j;
137
+
91138 switch(stringset) {
92139 case ETH_SS_STATS:
93
- memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
140
+ memcpy(p, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
141
+ p += sizeof(ethtool_stats_keys);
142
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
143
+ for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
144
+ snprintf(p, ETH_GSTRING_LEN,
145
+ "rx_queue_%u_%.18s",
146
+ i, veth_rq_stats_desc[j].desc);
147
+ p += ETH_GSTRING_LEN;
148
+ }
149
+ }
150
+ for (i = 0; i < dev->real_num_tx_queues; i++) {
151
+ for (j = 0; j < VETH_TQ_STATS_LEN; j++) {
152
+ snprintf(p, ETH_GSTRING_LEN,
153
+ "tx_queue_%u_%.18s",
154
+ i, veth_tq_stats_desc[j].desc);
155
+ p += ETH_GSTRING_LEN;
156
+ }
157
+ }
94158 break;
95159 }
96160 }
....@@ -99,7 +163,9 @@
99163 {
100164 switch (sset) {
101165 case ETH_SS_STATS:
102
- return ARRAY_SIZE(ethtool_stats_keys);
166
+ return ARRAY_SIZE(ethtool_stats_keys) +
167
+ VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
168
+ VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
103169 default:
104170 return -EOPNOTSUPP;
105171 }
....@@ -108,10 +174,47 @@
108174 static void veth_get_ethtool_stats(struct net_device *dev,
109175 struct ethtool_stats *stats, u64 *data)
110176 {
111
- struct veth_priv *priv = netdev_priv(dev);
177
+ struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
112178 struct net_device *peer = rtnl_dereference(priv->peer);
179
+ int i, j, idx;
113180
114181 data[0] = peer ? peer->ifindex : 0;
182
+ idx = 1;
183
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
184
+ const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
185
+ const void *stats_base = (void *)&rq_stats->vs;
186
+ unsigned int start;
187
+ size_t offset;
188
+
189
+ do {
190
+ start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
191
+ for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
192
+ offset = veth_rq_stats_desc[j].offset;
193
+ data[idx + j] = *(u64 *)(stats_base + offset);
194
+ }
195
+ } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
196
+ idx += VETH_RQ_STATS_LEN;
197
+ }
198
+
199
+ if (!peer)
200
+ return;
201
+
202
+ rcv_priv = netdev_priv(peer);
203
+ for (i = 0; i < peer->real_num_rx_queues; i++) {
204
+ const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats;
205
+ const void *base = (void *)&rq_stats->vs;
206
+ unsigned int start, tx_idx = idx;
207
+ size_t offset;
208
+
209
+ tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN;
210
+ do {
211
+ start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
212
+ for (j = 0; j < VETH_TQ_STATS_LEN; j++) {
213
+ offset = veth_tq_stats_desc[j].offset;
214
+ data[tx_idx + j] += *(u64 *)(base + offset);
215
+ }
216
+ } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
217
+ }
115218 }
116219
117220 static const struct ethtool_ops veth_ethtool_ops = {
....@@ -121,6 +224,7 @@
121224 .get_sset_count = veth_get_sset_count,
122225 .get_ethtool_stats = veth_get_ethtool_stats,
123226 .get_link_ksettings = veth_get_link_ksettings,
227
+ .get_ts_info = ethtool_op_get_ts_info,
124228 };
125229
126230 /* general routines */
....@@ -130,14 +234,14 @@
130234 return (unsigned long)ptr & VETH_XDP_FLAG;
131235 }
132236
133
-static void *veth_ptr_to_xdp(void *ptr)
237
+static struct xdp_frame *veth_ptr_to_xdp(void *ptr)
134238 {
135239 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
136240 }
137241
138
-static void *veth_xdp_to_ptr(void *ptr)
242
+static void *veth_xdp_to_ptr(struct xdp_frame *xdp)
139243 {
140
- return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
244
+ return (void *)((unsigned long)xdp | VETH_XDP_FLAG);
141245 }
142246
143247 static void veth_ptr_free(void *ptr)
....@@ -188,7 +292,7 @@
188292
189293 rcu_read_lock();
190294 rcv = rcu_dereference(priv->peer);
191
- if (unlikely(!rcv)) {
295
+ if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) {
192296 kfree_skb(skb);
193297 goto drop;
194298 }
....@@ -200,13 +304,10 @@
200304 rcv_xdp = rcu_access_pointer(rq->xdp_prog);
201305 }
202306
307
+ skb_tx_timestamp(skb);
203308 if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
204
- struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
205
-
206
- u64_stats_update_begin(&stats->syncp);
207
- stats->bytes += length;
208
- stats->packets++;
209
- u64_stats_update_end(&stats->syncp);
309
+ if (!rcv_xdp)
310
+ dev_lstats_add(dev, length);
210311 } else {
211312 drop:
212313 atomic64_inc(&priv->dropped);
....@@ -220,27 +321,43 @@
220321 return NETDEV_TX_OK;
221322 }
222323
223
-static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
324
+static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
224325 {
225326 struct veth_priv *priv = netdev_priv(dev);
226
- int cpu;
227327
228
- result->packets = 0;
229
- result->bytes = 0;
230
- for_each_possible_cpu(cpu) {
231
- struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
232
- u64 packets, bytes;
328
+ dev_lstats_read(dev, packets, bytes);
329
+ return atomic64_read(&priv->dropped);
330
+}
331
+
332
+static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
333
+{
334
+ struct veth_priv *priv = netdev_priv(dev);
335
+ int i;
336
+
337
+ result->peer_tq_xdp_xmit_err = 0;
338
+ result->xdp_packets = 0;
339
+ result->xdp_tx_err = 0;
340
+ result->xdp_bytes = 0;
341
+ result->rx_drops = 0;
342
+ for (i = 0; i < dev->num_rx_queues; i++) {
343
+ u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err;
344
+ struct veth_rq_stats *stats = &priv->rq[i].stats;
233345 unsigned int start;
234346
235347 do {
236348 start = u64_stats_fetch_begin_irq(&stats->syncp);
237
- packets = stats->packets;
238
- bytes = stats->bytes;
349
+ peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err;
350
+ xdp_tx_err = stats->vs.xdp_tx_err;
351
+ packets = stats->vs.xdp_packets;
352
+ bytes = stats->vs.xdp_bytes;
353
+ drops = stats->vs.rx_drops;
239354 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
240
- result->packets += packets;
241
- result->bytes += bytes;
355
+ result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err;
356
+ result->xdp_tx_err += xdp_tx_err;
357
+ result->xdp_packets += packets;
358
+ result->xdp_bytes += bytes;
359
+ result->rx_drops += drops;
242360 }
243
- return atomic64_read(&priv->dropped);
244361 }
245362
246363 static void veth_get_stats64(struct net_device *dev,
....@@ -248,18 +365,31 @@
248365 {
249366 struct veth_priv *priv = netdev_priv(dev);
250367 struct net_device *peer;
251
- struct pcpu_vstats one;
368
+ struct veth_stats rx;
369
+ u64 packets, bytes;
252370
253
- tot->tx_dropped = veth_stats_one(&one, dev);
254
- tot->tx_bytes = one.bytes;
255
- tot->tx_packets = one.packets;
371
+ tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
372
+ tot->tx_bytes = bytes;
373
+ tot->tx_packets = packets;
374
+
375
+ veth_stats_rx(&rx, dev);
376
+ tot->tx_dropped += rx.xdp_tx_err;
377
+ tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
378
+ tot->rx_bytes = rx.xdp_bytes;
379
+ tot->rx_packets = rx.xdp_packets;
256380
257381 rcu_read_lock();
258382 peer = rcu_dereference(priv->peer);
259383 if (peer) {
260
- tot->rx_dropped = veth_stats_one(&one, peer);
261
- tot->rx_bytes = one.bytes;
262
- tot->rx_packets = one.packets;
384
+ veth_stats_tx(peer, &packets, &bytes);
385
+ tot->rx_bytes += bytes;
386
+ tot->rx_packets += packets;
387
+
388
+ veth_stats_rx(&rx, peer);
389
+ tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
390
+ tot->rx_dropped += rx.xdp_tx_err;
391
+ tot->tx_bytes += rx.xdp_bytes;
392
+ tot->tx_packets += rx.xdp_packets;
263393 }
264394 rcu_read_unlock();
265395 }
....@@ -274,10 +404,6 @@
274404 {
275405 struct sk_buff *skb;
276406
277
- if (!buflen) {
278
- buflen = SKB_DATA_ALIGN(headroom + len) +
279
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
280
- }
281407 skb = build_skb(head, buflen);
282408 if (!skb)
283409 return NULL;
....@@ -293,21 +419,31 @@
293419 return smp_processor_id() % dev->real_num_rx_queues;
294420 }
295421
422
+static struct net_device *veth_peer_dev(struct net_device *dev)
423
+{
424
+ struct veth_priv *priv = netdev_priv(dev);
425
+
426
+ /* Callers must be under RCU read side. */
427
+ return rcu_dereference(priv->peer);
428
+}
429
+
296430 static int veth_xdp_xmit(struct net_device *dev, int n,
297
- struct xdp_frame **frames, u32 flags)
431
+ struct xdp_frame **frames,
432
+ u32 flags, bool ndo_xmit)
298433 {
299434 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
435
+ int i, ret = -ENXIO, drops = 0;
300436 struct net_device *rcv;
301437 unsigned int max_len;
302438 struct veth_rq *rq;
303
- int i, drops = 0;
304439
305440 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
306441 return -EINVAL;
307442
443
+ rcu_read_lock();
308444 rcv = rcu_dereference(priv->peer);
309445 if (unlikely(!rcv))
310
- return -ENXIO;
446
+ goto out;
311447
312448 rcv_priv = netdev_priv(rcv);
313449 rq = &rcv_priv->rq[veth_select_rxq(rcv)];
....@@ -316,7 +452,7 @@
316452 * device is up.
317453 */
318454 if (!rcu_access_pointer(rq->xdp_prog))
319
- return -ENXIO;
455
+ goto out;
320456
321457 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
322458
....@@ -336,44 +472,99 @@
336472 if (flags & XDP_XMIT_FLUSH)
337473 __veth_xdp_flush(rq);
338474
339
- return n - drops;
475
+ ret = n - drops;
476
+ if (ndo_xmit) {
477
+ u64_stats_update_begin(&rq->stats.syncp);
478
+ rq->stats.vs.peer_tq_xdp_xmit += n - drops;
479
+ rq->stats.vs.peer_tq_xdp_xmit_err += drops;
480
+ u64_stats_update_end(&rq->stats.syncp);
481
+ }
482
+
483
+out:
484
+ rcu_read_unlock();
485
+
486
+ return ret;
340487 }
341488
342
-static void veth_xdp_flush(struct net_device *dev)
489
+static int veth_ndo_xdp_xmit(struct net_device *dev, int n,
490
+ struct xdp_frame **frames, u32 flags)
343491 {
344
- struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
492
+ int err;
493
+
494
+ err = veth_xdp_xmit(dev, n, frames, flags, true);
495
+ if (err < 0) {
496
+ struct veth_priv *priv = netdev_priv(dev);
497
+
498
+ atomic64_add(n, &priv->dropped);
499
+ }
500
+
501
+ return err;
502
+}
503
+
504
+static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
505
+{
506
+ int sent, i, err = 0;
507
+
508
+ sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false);
509
+ if (sent < 0) {
510
+ err = sent;
511
+ sent = 0;
512
+ for (i = 0; i < bq->count; i++)
513
+ xdp_return_frame(bq->q[i]);
514
+ }
515
+ trace_xdp_bulk_tx(rq->dev, sent, bq->count - sent, err);
516
+
517
+ u64_stats_update_begin(&rq->stats.syncp);
518
+ rq->stats.vs.xdp_tx += sent;
519
+ rq->stats.vs.xdp_tx_err += bq->count - sent;
520
+ u64_stats_update_end(&rq->stats.syncp);
521
+
522
+ bq->count = 0;
523
+}
524
+
525
+static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
526
+{
527
+ struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev);
345528 struct net_device *rcv;
346
- struct veth_rq *rq;
529
+ struct veth_rq *rcv_rq;
347530
348531 rcu_read_lock();
532
+ veth_xdp_flush_bq(rq, bq);
349533 rcv = rcu_dereference(priv->peer);
350534 if (unlikely(!rcv))
351535 goto out;
352536
353537 rcv_priv = netdev_priv(rcv);
354
- rq = &rcv_priv->rq[veth_select_rxq(rcv)];
538
+ rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)];
355539 /* xdp_ring is initialized on receive side? */
356
- if (unlikely(!rcu_access_pointer(rq->xdp_prog)))
540
+ if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog)))
357541 goto out;
358542
359
- __veth_xdp_flush(rq);
543
+ __veth_xdp_flush(rcv_rq);
360544 out:
361545 rcu_read_unlock();
362546 }
363547
364
-static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
548
+static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp,
549
+ struct veth_xdp_tx_bq *bq)
365550 {
366
- struct xdp_frame *frame = convert_to_xdp_frame(xdp);
551
+ struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
367552
368553 if (unlikely(!frame))
369554 return -EOVERFLOW;
370555
371
- return veth_xdp_xmit(dev, 1, &frame, 0);
556
+ if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
557
+ veth_xdp_flush_bq(rq, bq);
558
+
559
+ bq->q[bq->count++] = frame;
560
+
561
+ return 0;
372562 }
373563
374564 static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
375565 struct xdp_frame *frame,
376
- unsigned int *xdp_xmit)
566
+ struct veth_xdp_tx_bq *bq,
567
+ struct veth_stats *stats)
377568 {
378569 void *hard_start = frame->data - frame->headroom;
379570 int len = frame->len, delta = 0;
....@@ -391,10 +582,7 @@
391582 struct xdp_buff xdp;
392583 u32 act;
393584
394
- xdp.data_hard_start = hard_start;
395
- xdp.data = frame->data;
396
- xdp.data_end = frame->data + frame->len;
397
- xdp.data_meta = frame->data - frame->metasize;
585
+ xdp_convert_frame_to_buff(frame, &xdp);
398586 xdp.rxq = &rq->xdp_rxq;
399587
400588 act = bpf_prog_run_xdp(xdp_prog, &xdp);
....@@ -407,12 +595,13 @@
407595 case XDP_TX:
408596 orig_frame = *frame;
409597 xdp.rxq->mem = frame->mem;
410
- if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
598
+ if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
411599 trace_xdp_exception(rq->dev, xdp_prog, act);
412600 frame = &orig_frame;
601
+ stats->rx_drops++;
413602 goto err_xdp;
414603 }
415
- *xdp_xmit |= VETH_XDP_TX;
604
+ stats->xdp_tx++;
416605 rcu_read_unlock();
417606 goto xdp_xmit;
418607 case XDP_REDIRECT:
....@@ -420,28 +609,34 @@
420609 xdp.rxq->mem = frame->mem;
421610 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
422611 frame = &orig_frame;
612
+ stats->rx_drops++;
423613 goto err_xdp;
424614 }
425
- *xdp_xmit |= VETH_XDP_REDIR;
615
+ stats->xdp_redirect++;
426616 rcu_read_unlock();
427617 goto xdp_xmit;
428618 default:
429619 bpf_warn_invalid_xdp_action(act);
620
+ fallthrough;
430621 case XDP_ABORTED:
431622 trace_xdp_exception(rq->dev, xdp_prog, act);
623
+ fallthrough;
432624 case XDP_DROP:
625
+ stats->xdp_drops++;
433626 goto err_xdp;
434627 }
435628 }
436629 rcu_read_unlock();
437630
438631 headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
439
- skb = veth_build_skb(hard_start, headroom, len, 0);
632
+ skb = veth_build_skb(hard_start, headroom, len, frame->frame_sz);
440633 if (!skb) {
441634 xdp_return_frame(frame);
635
+ stats->rx_drops++;
442636 goto err;
443637 }
444638
639
+ xdp_release_frame(frame);
445640 xdp_scrub_frame(frame);
446641 skb->protocol = eth_type_trans(skb, rq->dev);
447642 err:
....@@ -453,8 +648,10 @@
453648 return NULL;
454649 }
455650
456
-static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
457
- unsigned int *xdp_xmit)
651
+static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
652
+ struct sk_buff *skb,
653
+ struct veth_xdp_tx_bq *bq,
654
+ struct veth_stats *stats)
458655 {
459656 u32 pktlen, headroom, act, metalen;
460657 void *orig_data, *orig_data_end;
....@@ -498,9 +695,8 @@
498695 goto drop;
499696 }
500697
501
- nskb = veth_build_skb(head,
502
- VETH_XDP_HEADROOM + mac_len, skb->len,
503
- PAGE_SIZE);
698
+ nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len,
699
+ skb->len, PAGE_SIZE);
504700 if (!nskb) {
505701 page_frag_free(head);
506702 goto drop;
....@@ -518,6 +714,11 @@
518714 xdp.data_end = xdp.data + pktlen;
519715 xdp.data_meta = xdp.data;
520716 xdp.rxq = &rq->xdp_rxq;
717
+
718
+ /* SKB "head" area always have tailroom for skb_shared_info */
719
+ xdp.frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start;
720
+ xdp.frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
721
+
521722 orig_data = xdp.data;
522723 orig_data_end = xdp.data_end;
523724
....@@ -530,31 +731,38 @@
530731 get_page(virt_to_page(xdp.data));
531732 consume_skb(skb);
532733 xdp.rxq->mem = rq->xdp_mem;
533
- if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
734
+ if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
534735 trace_xdp_exception(rq->dev, xdp_prog, act);
736
+ stats->rx_drops++;
535737 goto err_xdp;
536738 }
537
- *xdp_xmit |= VETH_XDP_TX;
739
+ stats->xdp_tx++;
538740 rcu_read_unlock();
539741 goto xdp_xmit;
540742 case XDP_REDIRECT:
541743 get_page(virt_to_page(xdp.data));
542744 consume_skb(skb);
543745 xdp.rxq->mem = rq->xdp_mem;
544
- if (xdp_do_redirect(rq->dev, &xdp, xdp_prog))
746
+ if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
747
+ stats->rx_drops++;
545748 goto err_xdp;
546
- *xdp_xmit |= VETH_XDP_REDIR;
749
+ }
750
+ stats->xdp_redirect++;
547751 rcu_read_unlock();
548752 goto xdp_xmit;
549753 default:
550754 bpf_warn_invalid_xdp_action(act);
755
+ fallthrough;
551756 case XDP_ABORTED:
552757 trace_xdp_exception(rq->dev, xdp_prog, act);
758
+ fallthrough;
553759 case XDP_DROP:
554
- goto drop;
760
+ stats->xdp_drops++;
761
+ goto xdp_drop;
555762 }
556763 rcu_read_unlock();
557764
765
+ /* check if bpf_xdp_adjust_head was used */
558766 delta = orig_data - xdp.data;
559767 off = mac_len + delta;
560768 if (off > 0)
....@@ -562,9 +770,11 @@
562770 else if (off < 0)
563771 __skb_pull(skb, -off);
564772 skb->mac_header -= delta;
773
+
774
+ /* check if bpf_xdp_adjust_tail was used */
565775 off = xdp.data_end - orig_data_end;
566776 if (off != 0)
567
- __skb_put(skb, off);
777
+ __skb_put(skb, off); /* positive on grow, negative on shrink */
568778 skb->protocol = eth_type_trans(skb, rq->dev);
569779
570780 metalen = xdp.data - xdp.data_meta;
....@@ -573,6 +783,8 @@
573783 out:
574784 return skb;
575785 drop:
786
+ stats->rx_drops++;
787
+xdp_drop:
576788 rcu_read_unlock();
577789 kfree_skb(skb);
578790 return NULL;
....@@ -583,7 +795,9 @@
583795 return NULL;
584796 }
585797
586
-static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
798
+static int veth_xdp_rcv(struct veth_rq *rq, int budget,
799
+ struct veth_xdp_tx_bq *bq,
800
+ struct veth_stats *stats)
587801 {
588802 int i, done = 0;
589803
....@@ -595,10 +809,14 @@
595809 break;
596810
597811 if (veth_is_xdp_frame(ptr)) {
598
- skb = veth_xdp_rcv_one(rq, veth_ptr_to_xdp(ptr),
599
- xdp_xmit);
812
+ struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
813
+
814
+ stats->xdp_bytes += frame->len;
815
+ skb = veth_xdp_rcv_one(rq, frame, bq, stats);
600816 } else {
601
- skb = veth_xdp_rcv_skb(rq, ptr, xdp_xmit);
817
+ skb = ptr;
818
+ stats->xdp_bytes += skb->len;
819
+ skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
602820 }
603821
604822 if (skb)
....@@ -607,6 +825,14 @@
607825 done++;
608826 }
609827
828
+ u64_stats_update_begin(&rq->stats.syncp);
829
+ rq->stats.vs.xdp_redirect += stats->xdp_redirect;
830
+ rq->stats.vs.xdp_bytes += stats->xdp_bytes;
831
+ rq->stats.vs.xdp_drops += stats->xdp_drops;
832
+ rq->stats.vs.rx_drops += stats->rx_drops;
833
+ rq->stats.vs.xdp_packets += done;
834
+ u64_stats_update_end(&rq->stats.syncp);
835
+
610836 return done;
611837 }
612838
....@@ -614,11 +840,14 @@
614840 {
615841 struct veth_rq *rq =
616842 container_of(napi, struct veth_rq, xdp_napi);
617
- unsigned int xdp_xmit = 0;
843
+ struct veth_stats stats = {};
844
+ struct veth_xdp_tx_bq bq;
618845 int done;
619846
847
+ bq.count = 0;
848
+
620849 xdp_set_return_frame_no_direct();
621
- done = veth_xdp_rcv(rq, budget, &xdp_xmit);
850
+ done = veth_xdp_rcv(rq, budget, &bq, &stats);
622851
623852 if (done < budget && napi_complete_done(napi, done)) {
624853 /* Write rx_notify_masked before reading ptr_ring */
....@@ -631,10 +860,10 @@
631860 }
632861 }
633862
634
- if (xdp_xmit & VETH_XDP_TX)
635
- veth_xdp_flush(rq->dev);
636
- if (xdp_xmit & VETH_XDP_REDIR)
637
- xdp_do_flush_map();
863
+ if (stats.xdp_tx > 0)
864
+ veth_xdp_flush(rq, &bq);
865
+ if (stats.xdp_redirect > 0)
866
+ xdp_do_flush();
638867 xdp_clear_return_frame_no_direct();
639868
640869 return done;
....@@ -677,14 +906,13 @@
677906 struct veth_rq *rq = &priv->rq[i];
678907
679908 napi_disable(&rq->xdp_napi);
680
- napi_hash_del(&rq->xdp_napi);
909
+ __netif_napi_del(&rq->xdp_napi);
681910 }
682911 synchronize_net();
683912
684913 for (i = 0; i < dev->real_num_rx_queues; i++) {
685914 struct veth_rq *rq = &priv->rq[i];
686915
687
- netif_napi_del(&rq->xdp_napi);
688916 rq->rx_notify_masked = false;
689917 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
690918 }
....@@ -799,8 +1027,10 @@
7991027 if (!priv->rq)
8001028 return -ENOMEM;
8011029
802
- for (i = 0; i < dev->num_rx_queues; i++)
1030
+ for (i = 0; i < dev->num_rx_queues; i++) {
8031031 priv->rq[i].dev = dev;
1032
+ u64_stats_init(&priv->rq[i].stats.syncp);
1033
+ }
8041034
8051035 return 0;
8061036 }
....@@ -816,13 +1046,13 @@
8161046 {
8171047 int err;
8181048
819
- dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
820
- if (!dev->vstats)
1049
+ dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
1050
+ if (!dev->lstats)
8211051 return -ENOMEM;
8221052
8231053 err = veth_alloc_queues(dev);
8241054 if (err) {
825
- free_percpu(dev->vstats);
1055
+ free_percpu(dev->lstats);
8261056 return err;
8271057 }
8281058
....@@ -832,7 +1062,7 @@
8321062 static void veth_dev_free(struct net_device *dev)
8331063 {
8341064 veth_free_queues(dev);
835
- free_percpu(dev->vstats);
1065
+ free_percpu(dev->lstats);
8361066 }
8371067
8381068 #ifdef CONFIG_NET_POLL_CONTROLLER
....@@ -976,26 +1206,11 @@
9761206 return err;
9771207 }
9781208
979
-static u32 veth_xdp_query(struct net_device *dev)
980
-{
981
- struct veth_priv *priv = netdev_priv(dev);
982
- const struct bpf_prog *xdp_prog;
983
-
984
- xdp_prog = priv->_xdp_prog;
985
- if (xdp_prog)
986
- return xdp_prog->aux->id;
987
-
988
- return 0;
989
-}
990
-
9911209 static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
9921210 {
9931211 switch (xdp->command) {
9941212 case XDP_SETUP_PROG:
9951213 return veth_xdp_set(dev, xdp->prog, xdp->extack);
996
- case XDP_QUERY_PROG:
997
- xdp->prog_id = veth_xdp_query(dev);
998
- return 0;
9991214 default:
10001215 return -EINVAL;
10011216 }
....@@ -1017,7 +1232,8 @@
10171232 .ndo_features_check = passthru_features_check,
10181233 .ndo_set_rx_headroom = veth_set_rx_headroom,
10191234 .ndo_bpf = veth_xdp,
1020
- .ndo_xdp_xmit = veth_xdp_xmit,
1235
+ .ndo_xdp_xmit = veth_ndo_xdp_xmit,
1236
+ .ndo_get_peer_dev = veth_peer_dev,
10211237 };
10221238
10231239 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
....@@ -1126,7 +1342,7 @@
11261342 return PTR_ERR(net);
11271343
11281344 peer = rtnl_create_link(net, ifname, name_assign_type,
1129
- &veth_link_ops, tbp);
1345
+ &veth_link_ops, tbp, extack);
11301346 if (IS_ERR(peer)) {
11311347 put_net(net);
11321348 return PTR_ERR(peer);