hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/drivers/net/xen-netback/netback.c
....@@ -96,6 +96,13 @@
9696 module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644);
9797 MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
9898
99
+/* The module parameter tells that we have to put data
100
+ * for xen-netfront with the XDP_PACKET_HEADROOM offset
101
+ * needed for XDP processing
102
+ */
103
+bool provides_xdp_headroom = true;
104
+module_param(provides_xdp_headroom, bool, 0644);
105
+
99106 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
100107 u8 status);
101108
....@@ -104,6 +111,8 @@
104111 unsigned int extra_count,
105112 s8 st);
106113 static void push_tx_responses(struct xenvif_queue *queue);
114
+
115
+static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
107116
108117 static inline int tx_work_todo(struct xenvif_queue *queue);
109118
....@@ -136,12 +145,12 @@
136145
137146 static u16 frag_get_pending_idx(skb_frag_t *frag)
138147 {
139
- return (u16)frag->page_offset;
148
+ return (u16)skb_frag_off(frag);
140149 }
141150
142151 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
143152 {
144
- frag->page_offset = pending_idx;
153
+ skb_frag_off_set(frag, pending_idx);
145154 }
146155
147156 static inline pending_ring_idx_t pending_index(unsigned i)
....@@ -323,10 +332,13 @@
323332
324333
325334 struct xenvif_tx_cb {
326
- u16 pending_idx;
335
+ u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
336
+ u8 copy_count;
327337 };
328338
329339 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
340
+#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
341
+#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
330342
331343 static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
332344 u16 pending_idx,
....@@ -361,31 +373,93 @@
361373 return skb;
362374 }
363375
364
-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
365
- struct sk_buff *skb,
366
- struct xen_netif_tx_request *txp,
367
- struct gnttab_map_grant_ref *gop,
368
- unsigned int frag_overflow,
369
- struct sk_buff *nskb)
376
+static void xenvif_get_requests(struct xenvif_queue *queue,
377
+ struct sk_buff *skb,
378
+ struct xen_netif_tx_request *first,
379
+ struct xen_netif_tx_request *txfrags,
380
+ unsigned *copy_ops,
381
+ unsigned *map_ops,
382
+ unsigned int frag_overflow,
383
+ struct sk_buff *nskb,
384
+ unsigned int extra_count,
385
+ unsigned int data_len)
370386 {
371387 struct skb_shared_info *shinfo = skb_shinfo(skb);
372388 skb_frag_t *frags = shinfo->frags;
373
- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
374
- int start;
389
+ u16 pending_idx;
375390 pending_ring_idx_t index;
376391 unsigned int nr_slots;
392
+ struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops;
393
+ struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
394
+ struct xen_netif_tx_request *txp = first;
377395
378
- nr_slots = shinfo->nr_frags;
396
+ nr_slots = shinfo->nr_frags + 1;
379397
380
- /* Skip first skb fragment if it is on same page as header fragment. */
381
- start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
398
+ copy_count(skb) = 0;
382399
383
- for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
384
- shinfo->nr_frags++, txp++, gop++) {
400
+ /* Create copy ops for exactly data_len bytes into the skb head. */
401
+ __skb_put(skb, data_len);
402
+ while (data_len > 0) {
403
+ int amount = data_len > txp->size ? txp->size : data_len;
404
+
405
+ cop->source.u.ref = txp->gref;
406
+ cop->source.domid = queue->vif->domid;
407
+ cop->source.offset = txp->offset;
408
+
409
+ cop->dest.domid = DOMID_SELF;
410
+ cop->dest.offset = (offset_in_page(skb->data +
411
+ skb_headlen(skb) -
412
+ data_len)) & ~XEN_PAGE_MASK;
413
+ cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
414
+ - data_len);
415
+
416
+ cop->len = amount;
417
+ cop->flags = GNTCOPY_source_gref;
418
+
419
+ index = pending_index(queue->pending_cons);
420
+ pending_idx = queue->pending_ring[index];
421
+ callback_param(queue, pending_idx).ctx = NULL;
422
+ copy_pending_idx(skb, copy_count(skb)) = pending_idx;
423
+ copy_count(skb)++;
424
+
425
+ cop++;
426
+ data_len -= amount;
427
+
428
+ if (amount == txp->size) {
429
+ /* The copy op covered the full tx_request */
430
+
431
+ memcpy(&queue->pending_tx_info[pending_idx].req,
432
+ txp, sizeof(*txp));
433
+ queue->pending_tx_info[pending_idx].extra_count =
434
+ (txp == first) ? extra_count : 0;
435
+
436
+ if (txp == first)
437
+ txp = txfrags;
438
+ else
439
+ txp++;
440
+ queue->pending_cons++;
441
+ nr_slots--;
442
+ } else {
443
+ /* The copy op partially covered the tx_request.
444
+ * The remainder will be mapped.
445
+ */
446
+ txp->offset += amount;
447
+ txp->size -= amount;
448
+ }
449
+ }
450
+
451
+ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
452
+ shinfo->nr_frags++, gop++) {
385453 index = pending_index(queue->pending_cons++);
386454 pending_idx = queue->pending_ring[index];
387
- xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
455
+ xenvif_tx_create_map_op(queue, pending_idx, txp,
456
+ txp == first ? extra_count : 0, gop);
388457 frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
458
+
459
+ if (txp == first)
460
+ txp = txfrags;
461
+ else
462
+ txp++;
389463 }
390464
391465 if (frag_overflow) {
....@@ -406,7 +480,8 @@
406480 skb_shinfo(skb)->frag_list = nskb;
407481 }
408482
409
- return gop;
483
+ (*copy_ops) = cop - queue->tx_copy_ops;
484
+ (*map_ops) = gop - queue->tx_map_ops;
410485 }
411486
412487 static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
....@@ -442,7 +517,7 @@
442517 struct gnttab_copy **gopp_copy)
443518 {
444519 struct gnttab_map_grant_ref *gop_map = *gopp_map;
445
- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
520
+ u16 pending_idx;
446521 /* This always points to the shinfo of the skb being checked, which
447522 * could be either the first or the one on the frag_list
448523 */
....@@ -453,24 +528,37 @@
453528 struct skb_shared_info *first_shinfo = NULL;
454529 int nr_frags = shinfo->nr_frags;
455530 const bool sharedslot = nr_frags &&
456
- frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
457
- int i, err;
531
+ frag_get_pending_idx(&shinfo->frags[0]) ==
532
+ copy_pending_idx(skb, copy_count(skb) - 1);
533
+ int i, err = 0;
458534
459
- /* Check status of header. */
460
- err = (*gopp_copy)->status;
461
- if (unlikely(err)) {
462
- if (net_ratelimit())
463
- netdev_dbg(queue->vif->dev,
464
- "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
465
- (*gopp_copy)->status,
466
- pending_idx,
467
- (*gopp_copy)->source.u.ref);
468
- /* The first frag might still have this slot mapped */
469
- if (!sharedslot)
470
- xenvif_idx_release(queue, pending_idx,
471
- XEN_NETIF_RSP_ERROR);
535
+ for (i = 0; i < copy_count(skb); i++) {
536
+ int newerr;
537
+
538
+ /* Check status of header. */
539
+ pending_idx = copy_pending_idx(skb, i);
540
+
541
+ newerr = (*gopp_copy)->status;
542
+ if (likely(!newerr)) {
543
+ /* The first frag might still have this slot mapped */
544
+ if (i < copy_count(skb) - 1 || !sharedslot)
545
+ xenvif_idx_release(queue, pending_idx,
546
+ XEN_NETIF_RSP_OKAY);
547
+ } else {
548
+ err = newerr;
549
+ if (net_ratelimit())
550
+ netdev_dbg(queue->vif->dev,
551
+ "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
552
+ (*gopp_copy)->status,
553
+ pending_idx,
554
+ (*gopp_copy)->source.u.ref);
555
+ /* The first frag might still have this slot mapped */
556
+ if (i < copy_count(skb) - 1 || !sharedslot)
557
+ xenvif_idx_release(queue, pending_idx,
558
+ XEN_NETIF_RSP_ERROR);
559
+ }
560
+ (*gopp_copy)++;
472561 }
473
- (*gopp_copy)++;
474562
475563 check_frags:
476564 for (i = 0; i < nr_frags; i++, gop_map++) {
....@@ -516,14 +604,6 @@
516604 /* Not the first error? Preceding frags already invalidated. */
517605 if (err)
518606 continue;
519
-
520
- /* First error: if the header haven't shared a slot with the
521
- * first frag, release it as well.
522
- */
523
- if (!sharedslot)
524
- xenvif_idx_release(queue,
525
- XENVIF_TX_CB(skb)->pending_idx,
526
- XEN_NETIF_RSP_OKAY);
527607
528608 /* Invalidate preceding fragments of this skb. */
529609 for (j = 0; j < i; j++) {
....@@ -794,7 +874,6 @@
794874 unsigned *copy_ops,
795875 unsigned *map_ops)
796876 {
797
- struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
798877 struct sk_buff *skb, *nskb;
799878 int ret;
800879 unsigned int frag_overflow;
....@@ -876,8 +955,12 @@
876955 continue;
877956 }
878957
958
+ data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
959
+ XEN_NETBACK_TX_COPY_LEN : txreq.size;
960
+
879961 ret = xenvif_count_requests(queue, &txreq, extra_count,
880962 txfrags, work_to_do);
963
+
881964 if (unlikely(ret < 0))
882965 break;
883966
....@@ -903,9 +986,8 @@
903986 index = pending_index(queue->pending_cons);
904987 pending_idx = queue->pending_ring[index];
905988
906
- data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
907
- ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
908
- XEN_NETBACK_TX_COPY_LEN : txreq.size;
989
+ if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size)
990
+ data_len = txreq.size;
909991
910992 skb = xenvif_alloc_skb(data_len);
911993 if (unlikely(skb == NULL)) {
....@@ -916,8 +998,6 @@
916998 }
917999
9181000 skb_shinfo(skb)->nr_frags = ret;
919
- if (data_len < txreq.size)
920
- skb_shinfo(skb)->nr_frags++;
9211001 /* At this point shinfo->nr_frags is in fact the number of
9221002 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
9231003 */
....@@ -979,54 +1059,19 @@
9791059 type);
9801060 }
9811061
982
- XENVIF_TX_CB(skb)->pending_idx = pending_idx;
983
-
984
- __skb_put(skb, data_len);
985
- queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
986
- queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
987
- queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
988
-
989
- queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
990
- virt_to_gfn(skb->data);
991
- queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
992
- queue->tx_copy_ops[*copy_ops].dest.offset =
993
- offset_in_page(skb->data) & ~XEN_PAGE_MASK;
994
-
995
- queue->tx_copy_ops[*copy_ops].len = data_len;
996
- queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
997
-
998
- (*copy_ops)++;
999
-
1000
- if (data_len < txreq.size) {
1001
- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1002
- pending_idx);
1003
- xenvif_tx_create_map_op(queue, pending_idx, &txreq,
1004
- extra_count, gop);
1005
- gop++;
1006
- } else {
1007
- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1008
- INVALID_PENDING_IDX);
1009
- memcpy(&queue->pending_tx_info[pending_idx].req,
1010
- &txreq, sizeof(txreq));
1011
- queue->pending_tx_info[pending_idx].extra_count =
1012
- extra_count;
1013
- }
1014
-
1015
- queue->pending_cons++;
1016
-
1017
- gop = xenvif_get_requests(queue, skb, txfrags, gop,
1018
- frag_overflow, nskb);
1062
+ xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops,
1063
+ map_ops, frag_overflow, nskb, extra_count,
1064
+ data_len);
10191065
10201066 __skb_queue_tail(&queue->tx_queue, skb);
10211067
10221068 queue->tx.req_cons = idx;
10231069
1024
- if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
1070
+ if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) ||
10251071 (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
10261072 break;
10271073 }
10281074
1029
- (*map_ops) = gop - queue->tx_map_ops;
10301075 return;
10311076 }
10321077
....@@ -1061,7 +1106,7 @@
10611106 int j;
10621107 skb->truesize += skb->data_len;
10631108 for (j = 0; j < i; j++)
1064
- put_page(frags[j].page.p);
1109
+ put_page(skb_frag_page(&frags[j]));
10651110 return -ENOMEM;
10661111 }
10671112
....@@ -1073,8 +1118,8 @@
10731118 BUG();
10741119
10751120 offset += len;
1076
- frags[i].page.p = page;
1077
- frags[i].page_offset = 0;
1121
+ __skb_frag_set_page(&frags[i], page);
1122
+ skb_frag_off_set(&frags[i], 0);
10781123 skb_frag_size_set(&frags[i], len);
10791124 }
10801125
....@@ -1105,9 +1150,8 @@
11051150 while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
11061151 struct xen_netif_tx_request *txp;
11071152 u16 pending_idx;
1108
- unsigned data_len;
11091153
1110
- pending_idx = XENVIF_TX_CB(skb)->pending_idx;
1154
+ pending_idx = copy_pending_idx(skb, 0);
11111155 txp = &queue->pending_tx_info[pending_idx].req;
11121156
11131157 /* Check the remap error code. */
....@@ -1124,18 +1168,6 @@
11241168 }
11251169 kfree_skb(skb);
11261170 continue;
1127
- }
1128
-
1129
- data_len = skb->len;
1130
- callback_param(queue, pending_idx).ctx = NULL;
1131
- if (data_len < txp->size) {
1132
- /* Append the packet payload as a fragment. */
1133
- txp->offset += data_len;
1134
- txp->size -= data_len;
1135
- } else {
1136
- /* Schedule a response immediately. */
1137
- xenvif_idx_release(queue, pending_idx,
1138
- XEN_NETIF_RSP_OKAY);
11391171 }
11401172
11411173 if (txp->flags & XEN_NETTXF_csum_blank)
....@@ -1175,15 +1207,24 @@
11751207 continue;
11761208 }
11771209
1178
- skb_probe_transport_header(skb, 0);
1210
+ skb_probe_transport_header(skb);
11791211
11801212 /* If the packet is GSO then we will have just set up the
11811213 * transport header offset in checksum_setup so it's now
11821214 * straightforward to calculate gso_segs.
11831215 */
11841216 if (skb_is_gso(skb)) {
1185
- int mss = skb_shinfo(skb)->gso_size;
1186
- int hdrlen = skb_transport_header(skb) -
1217
+ int mss, hdrlen;
1218
+
1219
+ /* GSO implies having the L4 header. */
1220
+ WARN_ON_ONCE(!skb_transport_header_was_set(skb));
1221
+ if (unlikely(!skb_transport_header_was_set(skb))) {
1222
+ kfree_skb(skb);
1223
+ continue;
1224
+ }
1225
+
1226
+ mss = skb_shinfo(skb)->gso_size;
1227
+ hdrlen = skb_transport_header(skb) -
11871228 skb_mac_header(skb) +
11881229 tcp_hdrlen(skb);
11891230
....@@ -1314,7 +1355,7 @@
13141355 /* Called after netfront has transmitted */
13151356 int xenvif_tx_action(struct xenvif_queue *queue, int budget)
13161357 {
1317
- unsigned nr_mops, nr_cops = 0;
1358
+ unsigned nr_mops = 0, nr_cops = 0;
13181359 int work_done, ret;
13191360
13201361 if (unlikely(!tx_work_todo(queue)))
....@@ -1401,7 +1442,7 @@
14011442 notify_remote_via_irq(queue->tx_irq);
14021443 }
14031444
1404
-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
1445
+static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
14051446 {
14061447 int ret;
14071448 struct gnttab_unmap_grant_ref tx_unmap_op;
....@@ -1456,7 +1497,7 @@
14561497 void *addr;
14571498 struct xen_netif_tx_sring *txs;
14581499 struct xen_netif_rx_sring *rxs;
1459
-
1500
+ RING_IDX rsp_prod, req_prod;
14601501 int err = -ENOMEM;
14611502
14621503 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
....@@ -1465,7 +1506,14 @@
14651506 goto err;
14661507
14671508 txs = (struct xen_netif_tx_sring *)addr;
1468
- BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
1509
+ rsp_prod = READ_ONCE(txs->rsp_prod);
1510
+ req_prod = READ_ONCE(txs->req_prod);
1511
+
1512
+ BACK_RING_ATTACH(&queue->tx, txs, rsp_prod, XEN_PAGE_SIZE);
1513
+
1514
+ err = -EIO;
1515
+ if (req_prod - rsp_prod > RING_SIZE(&queue->tx))
1516
+ goto err;
14691517
14701518 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
14711519 &rx_ring_ref, 1, &addr);
....@@ -1473,7 +1521,14 @@
14731521 goto err;
14741522
14751523 rxs = (struct xen_netif_rx_sring *)addr;
1476
- BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
1524
+ rsp_prod = READ_ONCE(rxs->rsp_prod);
1525
+ req_prod = READ_ONCE(rxs->req_prod);
1526
+
1527
+ BACK_RING_ATTACH(&queue->rx, rxs, rsp_prod, XEN_PAGE_SIZE);
1528
+
1529
+ err = -EIO;
1530
+ if (req_prod - rsp_prod > RING_SIZE(&queue->rx))
1531
+ goto err;
14771532
14781533 return 0;
14791534
....@@ -1663,9 +1718,6 @@
16631718
16641719 #ifdef CONFIG_DEBUG_FS
16651720 xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
1666
- if (IS_ERR_OR_NULL(xen_netback_dbg_root))
1667
- pr_warn("Init of debugfs returned %ld!\n",
1668
- PTR_ERR(xen_netback_dbg_root));
16691721 #endif /* CONFIG_DEBUG_FS */
16701722
16711723 return 0;
....@@ -1679,8 +1731,7 @@
16791731 static void __exit netback_fini(void)
16801732 {
16811733 #ifdef CONFIG_DEBUG_FS
1682
- if (!IS_ERR_OR_NULL(xen_netback_dbg_root))
1683
- debugfs_remove_recursive(xen_netback_dbg_root);
1734
+ debugfs_remove_recursive(xen_netback_dbg_root);
16841735 #endif /* CONFIG_DEBUG_FS */
16851736 xenvif_xenbus_fini();
16861737 }