hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/drivers/net/ethernet/intel/i40e/i40e_txrx.c
....@@ -2,22 +2,13 @@
22 /* Copyright(c) 2013 - 2018 Intel Corporation. */
33
44 #include <linux/prefetch.h>
5
-#include <net/busy_poll.h>
65 #include <linux/bpf_trace.h>
76 #include <net/xdp.h>
87 #include "i40e.h"
98 #include "i40e_trace.h"
109 #include "i40e_prototype.h"
11
-
12
-static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
13
- u32 td_tag)
14
-{
15
- return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
16
- ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
17
- ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
18
- ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
19
- ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
20
-}
10
+#include "i40e_txrx_common.h"
11
+#include "i40e_xsk.h"
2112
2213 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
2314 /**
....@@ -530,28 +521,29 @@
530521 /**
531522 * i40e_fd_handle_status - check the Programming Status for FD
532523 * @rx_ring: the Rx ring for this descriptor
533
- * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
524
+ * @qword0_raw: qword0
525
+ * @qword1: qword1 after le_to_cpu
534526 * @prog_id: the id originally used for programming
535527 *
536528 * This is used to verify if the FD programming or invalidation
537529 * requested by SW to the HW is successful or not and take actions accordingly.
538530 **/
539
-static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
540
- union i40e_rx_desc *rx_desc, u8 prog_id)
531
+static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
532
+ u64 qword1, u8 prog_id)
541533 {
542534 struct i40e_pf *pf = rx_ring->vsi->back;
543535 struct pci_dev *pdev = pf->pdev;
536
+ struct i40e_16b_rx_wb_qw0 *qw0;
544537 u32 fcnt_prog, fcnt_avail;
545538 u32 error;
546
- u64 qw;
547539
548
- qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
549
- error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
540
+ qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw;
541
+ error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
550542 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
551543
552544 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
553
- pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
554
- if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
545
+ pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id);
546
+ if (qw0->hi_dword.fd_id != 0 ||
555547 (I40E_DEBUG_FD & pf->hw.debug_mask))
556548 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
557549 pf->fd_inv);
....@@ -569,7 +561,7 @@
569561 /* store the current atr filter count */
570562 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
571563
572
- if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
564
+ if (qw0->hi_dword.fd_id == 0 &&
573565 test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
574566 /* These set_bit() calls aren't atomic with the
575567 * test_bit() here, but worse case we potentially
....@@ -598,7 +590,7 @@
598590 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
599591 if (I40E_DEBUG_FD & pf->hw.debug_mask)
600592 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
601
- rx_desc->wb.qword0.hi_dword.fd_id);
593
+ qw0->hi_dword.fd_id);
602594 }
603595 }
604596
....@@ -644,13 +636,18 @@
644636 unsigned long bi_size;
645637 u16 i;
646638
647
- /* ring already cleared, nothing to do */
648
- if (!tx_ring->tx_bi)
649
- return;
639
+ if (ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
640
+ i40e_xsk_clean_tx_ring(tx_ring);
641
+ } else {
642
+ /* ring already cleared, nothing to do */
643
+ if (!tx_ring->tx_bi)
644
+ return;
650645
651
- /* Free all the Tx ring sk_buffs */
652
- for (i = 0; i < tx_ring->count; i++)
653
- i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
646
+ /* Free all the Tx ring sk_buffs */
647
+ for (i = 0; i < tx_ring->count; i++)
648
+ i40e_unmap_and_free_tx_resource(tx_ring,
649
+ &tx_ring->tx_bi[i]);
650
+ }
654651
655652 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
656653 memset(tx_ring->tx_bi, 0, bi_size);
....@@ -767,8 +764,6 @@
767764 }
768765 }
769766
770
-#define WB_STRIDE 4
771
-
772767 /**
773768 * i40e_clean_tx_irq - Reclaim resources after transmit completes
774769 * @vsi: the VSI we care about
....@@ -780,7 +775,7 @@
780775 static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
781776 struct i40e_ring *tx_ring, int napi_budget)
782777 {
783
- u16 i = tx_ring->next_to_clean;
778
+ int i = tx_ring->next_to_clean;
784779 struct i40e_tx_buffer *tx_buf;
785780 struct i40e_tx_desc *tx_head;
786781 struct i40e_tx_desc *tx_desc;
....@@ -873,27 +868,8 @@
873868
874869 i += tx_ring->count;
875870 tx_ring->next_to_clean = i;
876
- u64_stats_update_begin(&tx_ring->syncp);
877
- tx_ring->stats.bytes += total_bytes;
878
- tx_ring->stats.packets += total_packets;
879
- u64_stats_update_end(&tx_ring->syncp);
880
- tx_ring->q_vector->tx.total_bytes += total_bytes;
881
- tx_ring->q_vector->tx.total_packets += total_packets;
882
-
883
- if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
884
- /* check to see if there are < 4 descriptors
885
- * waiting to be written back, then kick the hardware to force
886
- * them to be written back in case we stay in NAPI.
887
- * In this mode on X722 we do not enable Interrupt.
888
- */
889
- unsigned int j = i40e_get_tx_pending(tx_ring, false);
890
-
891
- if (budget &&
892
- ((j / WB_STRIDE) == 0) && (j > 0) &&
893
- !test_bit(__I40E_VSI_DOWN, vsi->state) &&
894
- (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
895
- tx_ring->arm_wb = true;
896
- }
871
+ i40e_update_tx_stats(tx_ring, total_packets, total_bytes);
872
+ i40e_arm_wb(tx_ring, vsi, budget);
897873
898874 if (ring_is_xdp(tx_ring))
899875 return !!budget;
....@@ -1220,6 +1196,11 @@
12201196 rc->total_packets = 0;
12211197 }
12221198
1199
+static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
1200
+{
1201
+ return &rx_ring->rx_bi[idx];
1202
+}
1203
+
12231204 /**
12241205 * i40e_reuse_rx_page - page flip buffer and store it back on the ring
12251206 * @rx_ring: rx descriptor ring to store buffers on
....@@ -1233,7 +1214,7 @@
12331214 struct i40e_rx_buffer *new_buff;
12341215 u16 nta = rx_ring->next_to_alloc;
12351216
1236
- new_buff = &rx_ring->rx_bi[nta];
1217
+ new_buff = i40e_rx_bi(rx_ring, nta);
12371218
12381219 /* update, and store next to alloc */
12391220 nta++;
....@@ -1244,65 +1225,35 @@
12441225 new_buff->page = old_buff->page;
12451226 new_buff->page_offset = old_buff->page_offset;
12461227 new_buff->pagecnt_bias = old_buff->pagecnt_bias;
1247
-}
12481228
1249
-/**
1250
- * i40e_rx_is_programming_status - check for programming status descriptor
1251
- * @qw: qword representing status_error_len in CPU ordering
1252
- *
1253
- * The value of in the descriptor length field indicate if this
1254
- * is a programming status descriptor for flow director or FCoE
1255
- * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
1256
- * it is a packet descriptor.
1257
- **/
1258
-static inline bool i40e_rx_is_programming_status(u64 qw)
1259
-{
1260
- /* The Rx filter programming status and SPH bit occupy the same
1261
- * spot in the descriptor. Since we don't support packet split we
1262
- * can just reuse the bit as an indication that this is a
1263
- * programming status descriptor.
1264
- */
1265
- return qw & I40E_RXD_QW1_LENGTH_SPH_MASK;
1229
+ rx_ring->rx_stats.page_reuse_count++;
1230
+
1231
+ /* clear contents of buffer_info */
1232
+ old_buff->page = NULL;
12661233 }
12671234
12681235 /**
12691236 * i40e_clean_programming_status - clean the programming status descriptor
12701237 * @rx_ring: the rx ring that has this descriptor
1271
- * @rx_desc: the rx descriptor written back by HW
1272
- * @qw: qword representing status_error_len in CPU ordering
1238
+ * @qword0_raw: qword0
1239
+ * @qword1: qword1 representing status_error_len in CPU ordering
12731240 *
12741241 * Flow director should handle FD_FILTER_STATUS to check its filter programming
12751242 * status being successful or not and take actions accordingly. FCoE should
12761243 * handle its context/filter programming/invalidation status and take actions.
12771244 *
1245
+ * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
12781246 **/
1279
-static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
1280
- union i40e_rx_desc *rx_desc,
1281
- u64 qw)
1247
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
1248
+ u64 qword1)
12821249 {
1283
- struct i40e_rx_buffer *rx_buffer;
1284
- u32 ntc = rx_ring->next_to_clean;
12851250 u8 id;
12861251
1287
- /* fetch, update, and store next to clean */
1288
- rx_buffer = &rx_ring->rx_bi[ntc++];
1289
- ntc = (ntc < rx_ring->count) ? ntc : 0;
1290
- rx_ring->next_to_clean = ntc;
1291
-
1292
- prefetch(I40E_RX_DESC(rx_ring, ntc));
1293
-
1294
- /* place unused page back on the ring */
1295
- i40e_reuse_rx_page(rx_ring, rx_buffer);
1296
- rx_ring->rx_stats.page_reuse_count++;
1297
-
1298
- /* clear contents of buffer_info */
1299
- rx_buffer->page = NULL;
1300
-
1301
- id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
1252
+ id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
13021253 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
13031254
13041255 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
1305
- i40e_fd_handle_status(rx_ring, rx_desc, id);
1256
+ i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
13061257 }
13071258
13081259 /**
....@@ -1354,13 +1305,17 @@
13541305 return -ENOMEM;
13551306 }
13561307
1308
+static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
1309
+{
1310
+ memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
1311
+}
1312
+
13571313 /**
13581314 * i40e_clean_rx_ring - Free Rx buffers
13591315 * @rx_ring: ring to be cleaned
13601316 **/
13611317 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
13621318 {
1363
- unsigned long bi_size;
13641319 u16 i;
13651320
13661321 /* ring already cleared, nothing to do */
....@@ -1372,9 +1327,14 @@
13721327 rx_ring->skb = NULL;
13731328 }
13741329
1330
+ if (rx_ring->xsk_pool) {
1331
+ i40e_xsk_clean_rx_ring(rx_ring);
1332
+ goto skip_free;
1333
+ }
1334
+
13751335 /* Free all the Rx ring sk_buffs */
13761336 for (i = 0; i < rx_ring->count; i++) {
1377
- struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
1337
+ struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
13781338
13791339 if (!rx_bi->page)
13801340 continue;
....@@ -1400,8 +1360,11 @@
14001360 rx_bi->page_offset = 0;
14011361 }
14021362
1403
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1404
- memset(rx_ring->rx_bi, 0, bi_size);
1363
+skip_free:
1364
+ if (rx_ring->xsk_pool)
1365
+ i40e_clear_rx_bi_zc(rx_ring);
1366
+ else
1367
+ i40e_clear_rx_bi(rx_ring);
14051368
14061369 /* Zero out the descriptor ring */
14071370 memset(rx_ring->desc, 0, rx_ring->size);
....@@ -1442,20 +1405,12 @@
14421405 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
14431406 {
14441407 struct device *dev = rx_ring->dev;
1445
- int err = -ENOMEM;
1446
- int bi_size;
1447
-
1448
- /* warn if we are about to overwrite the pointer */
1449
- WARN_ON(rx_ring->rx_bi);
1450
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1451
- rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1452
- if (!rx_ring->rx_bi)
1453
- goto err;
1408
+ int err;
14541409
14551410 u64_stats_init(&rx_ring->syncp);
14561411
14571412 /* Round up to nearest 4K */
1458
- rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1413
+ rx_ring->size = rx_ring->count * sizeof(union i40e_rx_desc);
14591414 rx_ring->size = ALIGN(rx_ring->size, 4096);
14601415 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
14611416 &rx_ring->dma, GFP_KERNEL);
....@@ -1463,7 +1418,7 @@
14631418 if (!rx_ring->desc) {
14641419 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
14651420 rx_ring->size);
1466
- goto err;
1421
+ return -ENOMEM;
14671422 }
14681423
14691424 rx_ring->next_to_alloc = 0;
....@@ -1475,16 +1430,17 @@
14751430 err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
14761431 rx_ring->queue_index);
14771432 if (err < 0)
1478
- goto err;
1433
+ return err;
14791434 }
14801435
14811436 rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
14821437
1438
+ rx_ring->rx_bi =
1439
+ kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
1440
+ if (!rx_ring->rx_bi)
1441
+ return -ENOMEM;
1442
+
14831443 return 0;
1484
-err:
1485
- kfree(rx_ring->rx_bi);
1486
- rx_ring->rx_bi = NULL;
1487
- return err;
14881444 }
14891445
14901446 /**
....@@ -1492,7 +1448,7 @@
14921448 * @rx_ring: ring to bump
14931449 * @val: new head index
14941450 **/
1495
-static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1451
+void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
14961452 {
14971453 rx_ring->next_to_use = val;
14981454
....@@ -1517,6 +1473,22 @@
15171473 static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
15181474 {
15191475 return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
1476
+}
1477
+
1478
+static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
1479
+ unsigned int size)
1480
+{
1481
+ unsigned int truesize;
1482
+
1483
+#if (PAGE_SIZE < 8192)
1484
+ truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
1485
+#else
1486
+ truesize = i40e_rx_offset(rx_ring) ?
1487
+ SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)) +
1488
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
1489
+ SKB_DATA_ALIGN(size);
1490
+#endif
1491
+ return truesize;
15201492 }
15211493
15221494 /**
....@@ -1571,24 +1543,6 @@
15711543 }
15721544
15731545 /**
1574
- * i40e_receive_skb - Send a completed packet up the stack
1575
- * @rx_ring: rx ring in play
1576
- * @skb: packet to send up
1577
- * @vlan_tag: vlan tag for packet
1578
- **/
1579
-static void i40e_receive_skb(struct i40e_ring *rx_ring,
1580
- struct sk_buff *skb, u16 vlan_tag)
1581
-{
1582
- struct i40e_q_vector *q_vector = rx_ring->q_vector;
1583
-
1584
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1585
- (vlan_tag & VLAN_VID_MASK))
1586
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1587
-
1588
- napi_gro_receive(&q_vector->napi, skb);
1589
-}
1590
-
1591
-/**
15921546 * i40e_alloc_rx_buffers - Replace used receive buffers
15931547 * @rx_ring: ring to place buffers on
15941548 * @cleaned_count: number of buffers to replace
....@@ -1606,7 +1560,7 @@
16061560 return false;
16071561
16081562 rx_desc = I40E_RX_DESC(rx_ring, ntu);
1609
- bi = &rx_ring->rx_bi[ntu];
1563
+ bi = i40e_rx_bi(rx_ring, ntu);
16101564
16111565 do {
16121566 if (!i40e_alloc_mapped_page(rx_ring, bi))
....@@ -1628,7 +1582,7 @@
16281582 ntu++;
16291583 if (unlikely(ntu == rx_ring->count)) {
16301584 rx_desc = I40E_RX_DESC(rx_ring, 0);
1631
- bi = rx_ring->rx_bi;
1585
+ bi = i40e_rx_bi(rx_ring, 0);
16321586 ntu = 0;
16331587 }
16341588
....@@ -1733,7 +1687,7 @@
17331687 case I40E_RX_PTYPE_INNER_PROT_UDP:
17341688 case I40E_RX_PTYPE_INNER_PROT_SCTP:
17351689 skb->ip_summed = CHECKSUM_UNNECESSARY;
1736
- /* fall though */
1690
+ fallthrough;
17371691 default:
17381692 break;
17391693 }
....@@ -1798,16 +1752,13 @@
17981752 * @rx_ring: rx descriptor ring packet is being transacted on
17991753 * @rx_desc: pointer to the EOP Rx descriptor
18001754 * @skb: pointer to current skb being populated
1801
- * @rx_ptype: the packet type decoded by hardware
18021755 *
18031756 * This function checks the ring, descriptor, and packet information in
18041757 * order to populate the hash, checksum, VLAN, protocol, and
18051758 * other fields within the skb.
18061759 **/
1807
-static inline
18081760 void i40e_process_skb_fields(struct i40e_ring *rx_ring,
1809
- union i40e_rx_desc *rx_desc, struct sk_buff *skb,
1810
- u8 rx_ptype)
1761
+ union i40e_rx_desc *rx_desc, struct sk_buff *skb)
18111762 {
18121763 u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
18131764 u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
....@@ -1815,6 +1766,8 @@
18151766 u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK;
18161767 u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
18171768 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
1769
+ u8 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1770
+ I40E_RXD_QW1_PTYPE_SHIFT;
18181771
18191772 if (unlikely(tsynvalid))
18201773 i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
....@@ -1824,6 +1777,13 @@
18241777 i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
18251778
18261779 skb_record_rx_queue(skb, rx_ring->queue_index);
1780
+
1781
+ if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
1782
+ __le16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1;
1783
+
1784
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
1785
+ le16_to_cpu(vlan_tag));
1786
+ }
18271787
18281788 /* modifies the skb - consumes the enet header */
18291789 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
....@@ -1847,10 +1807,6 @@
18471807 union i40e_rx_desc *rx_desc)
18481808
18491809 {
1850
- /* XDP packets use error pointer so abort at this point */
1851
- if (IS_ERR(skb))
1852
- return true;
1853
-
18541810 /* ERR_MASK will only have valid bits if EOP set, and
18551811 * what we are doing here is actually checking
18561812 * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
....@@ -1887,6 +1843,7 @@
18871843 * the adapter for another receive
18881844 *
18891845 * @rx_buffer: buffer containing the page
1846
+ * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
18901847 *
18911848 * If page is reusable, rx_buffer->page_offset is adjusted to point to
18921849 * an unused region in the page.
....@@ -1909,7 +1866,8 @@
19091866 *
19101867 * In either case, if the page is reusable its refcount is increased.
19111868 **/
1912
-static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
1869
+static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
1870
+ int rx_buffer_pgcnt)
19131871 {
19141872 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
19151873 struct page *page = rx_buffer->page;
....@@ -1920,7 +1878,7 @@
19201878
19211879 #if (PAGE_SIZE < 8192)
19221880 /* if we are only owner of page we can reuse it */
1923
- if (unlikely((page_count(page) - pagecnt_bias) > 1))
1881
+ if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
19241882 return false;
19251883 #else
19261884 #define I40E_LAST_OFFSET \
....@@ -1979,17 +1937,25 @@
19791937 * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
19801938 * @rx_ring: rx descriptor ring to transact packets on
19811939 * @size: size of buffer to add to skb
1940
+ * @rx_buffer_pgcnt: buffer page refcount
19821941 *
19831942 * This function will pull an Rx buffer from the ring and synchronize it
19841943 * for use by the CPU.
19851944 */
19861945 static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
1987
- const unsigned int size)
1946
+ const unsigned int size,
1947
+ int *rx_buffer_pgcnt)
19881948 {
19891949 struct i40e_rx_buffer *rx_buffer;
19901950
1991
- rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
1992
- prefetchw(rx_buffer->page);
1951
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
1952
+ *rx_buffer_pgcnt =
1953
+#if (PAGE_SIZE < 8192)
1954
+ page_count(rx_buffer->page);
1955
+#else
1956
+ 0;
1957
+#endif
1958
+ prefetch_page_address(rx_buffer->page);
19931959
19941960 /* we are reusing so sync this buffer for CPU use */
19951961 dma_sync_single_range_for_cpu(rx_ring->dev,
....@@ -2028,10 +1994,8 @@
20281994 struct sk_buff *skb;
20291995
20301996 /* prefetch first cache line of first page */
2031
- prefetch(xdp->data);
2032
-#if L1_CACHE_BYTES < 128
2033
- prefetch(xdp->data + L1_CACHE_BYTES);
2034
-#endif
1997
+ net_prefetch(xdp->data);
1998
+
20351999 /* Note, we get here by enabling legacy-rx via:
20362000 *
20372001 * ethtool --set-priv-flags <dev> legacy-rx on
....@@ -2058,7 +2022,8 @@
20582022 /* Determine available headroom for copy */
20592023 headlen = size;
20602024 if (headlen > I40E_RX_HDR_SIZE)
2061
- headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
2025
+ headlen = eth_get_headlen(skb->dev, xdp->data,
2026
+ I40E_RX_HDR_SIZE);
20622027
20632028 /* align pull length to size of long to optimize memcpy performance */
20642029 memcpy(__skb_put(skb, headlen), xdp->data,
....@@ -2113,10 +2078,8 @@
21132078 * likely have a consumer accessing first few bytes of meta
21142079 * data, and then actual data.
21152080 */
2116
- prefetch(xdp->data_meta);
2117
-#if L1_CACHE_BYTES < 128
2118
- prefetch(xdp->data_meta + L1_CACHE_BYTES);
2119
-#endif
2081
+ net_prefetch(xdp->data_meta);
2082
+
21202083 /* build an skb around the page buffer */
21212084 skb = build_skb(xdp->data_hard_start, truesize);
21222085 if (unlikely(!skb))
....@@ -2142,17 +2105,18 @@
21422105 * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
21432106 * @rx_ring: rx descriptor ring to transact packets on
21442107 * @rx_buffer: rx buffer to pull data from
2108
+ * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call
21452109 *
21462110 * This function will clean up the contents of the rx_buffer. It will
21472111 * either recycle the buffer or unmap it and free the associated resources.
21482112 */
21492113 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
2150
- struct i40e_rx_buffer *rx_buffer)
2114
+ struct i40e_rx_buffer *rx_buffer,
2115
+ int rx_buffer_pgcnt)
21512116 {
2152
- if (i40e_can_reuse_rx_page(rx_buffer)) {
2117
+ if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
21532118 /* hand second half of page back to the ring */
21542119 i40e_reuse_rx_page(rx_ring, rx_buffer);
2155
- rx_ring->rx_stats.page_reuse_count++;
21562120 } else {
21572121 /* we are not reusing the buffer so unmap it */
21582122 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
....@@ -2160,10 +2124,9 @@
21602124 DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
21612125 __page_frag_cache_drain(rx_buffer->page,
21622126 rx_buffer->pagecnt_bias);
2127
+ /* clear contents of buffer_info */
2128
+ rx_buffer->page = NULL;
21632129 }
2164
-
2165
- /* clear contents of buffer_info */
2166
- rx_buffer->page = NULL;
21672130 }
21682131
21692132 /**
....@@ -2199,18 +2162,12 @@
21992162 return true;
22002163 }
22012164
2202
-#define I40E_XDP_PASS 0
2203
-#define I40E_XDP_CONSUMED BIT(0)
2204
-#define I40E_XDP_TX BIT(1)
2205
-#define I40E_XDP_REDIR BIT(2)
2206
-
22072165 static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
22082166 struct i40e_ring *xdp_ring);
22092167
2210
-static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
2211
- struct i40e_ring *xdp_ring)
2168
+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring)
22122169 {
2213
- struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
2170
+ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
22142171
22152172 if (unlikely(!xdpf))
22162173 return I40E_XDP_CONSUMED;
....@@ -2223,8 +2180,7 @@
22232180 * @rx_ring: Rx ring being processed
22242181 * @xdp: XDP buffer containing the frame
22252182 **/
2226
-static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
2227
- struct xdp_buff *xdp)
2183
+static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
22282184 {
22292185 int err, result = I40E_XDP_PASS;
22302186 struct i40e_ring *xdp_ring;
....@@ -2246,24 +2202,29 @@
22462202 case XDP_TX:
22472203 xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
22482204 result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
2205
+ if (result == I40E_XDP_CONSUMED)
2206
+ goto out_failure;
22492207 break;
22502208 case XDP_REDIRECT:
22512209 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
2252
- result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
2210
+ if (err)
2211
+ goto out_failure;
2212
+ result = I40E_XDP_REDIR;
22532213 break;
22542214 default:
22552215 bpf_warn_invalid_xdp_action(act);
2256
- /* fall through */
2216
+ fallthrough;
22572217 case XDP_ABORTED:
2218
+out_failure:
22582219 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
2259
- /* fall through -- handle aborts by dropping packet */
2220
+ fallthrough; /* handle aborts by dropping packet */
22602221 case XDP_DROP:
22612222 result = I40E_XDP_CONSUMED;
22622223 break;
22632224 }
22642225 xdp_out:
22652226 rcu_read_unlock();
2266
- return ERR_PTR(-result);
2227
+ return result;
22672228 }
22682229
22692230 /**
....@@ -2276,24 +2237,83 @@
22762237 struct i40e_rx_buffer *rx_buffer,
22772238 unsigned int size)
22782239 {
2279
-#if (PAGE_SIZE < 8192)
2280
- unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
2240
+ unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size);
22812241
2242
+#if (PAGE_SIZE < 8192)
22822243 rx_buffer->page_offset ^= truesize;
22832244 #else
2284
- unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
2285
-
22862245 rx_buffer->page_offset += truesize;
22872246 #endif
22882247 }
22892248
2290
-static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
2249
+/**
2250
+ * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
2251
+ * @xdp_ring: XDP Tx ring
2252
+ *
2253
+ * This function updates the XDP Tx ring tail register.
2254
+ **/
2255
+void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
22912256 {
22922257 /* Force memory writes to complete before letting h/w
22932258 * know there are new descriptors to fetch.
22942259 */
22952260 wmb();
22962261 writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
2262
+}
2263
+
2264
+/**
2265
+ * i40e_update_rx_stats - Update Rx ring statistics
2266
+ * @rx_ring: rx descriptor ring
2267
+ * @total_rx_bytes: number of bytes received
2268
+ * @total_rx_packets: number of packets received
2269
+ *
2270
+ * This function updates the Rx ring statistics.
2271
+ **/
2272
+void i40e_update_rx_stats(struct i40e_ring *rx_ring,
2273
+ unsigned int total_rx_bytes,
2274
+ unsigned int total_rx_packets)
2275
+{
2276
+ u64_stats_update_begin(&rx_ring->syncp);
2277
+ rx_ring->stats.packets += total_rx_packets;
2278
+ rx_ring->stats.bytes += total_rx_bytes;
2279
+ u64_stats_update_end(&rx_ring->syncp);
2280
+ rx_ring->q_vector->rx.total_packets += total_rx_packets;
2281
+ rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
2282
+}
2283
+
2284
+/**
2285
+ * i40e_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
2286
+ * @rx_ring: Rx ring
2287
+ * @xdp_res: Result of the receive batch
2288
+ *
2289
+ * This function bumps XDP Tx tail and/or flush redirect map, and
2290
+ * should be called when a batch of packets has been processed in the
2291
+ * napi loop.
2292
+ **/
2293
+void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
2294
+{
2295
+ if (xdp_res & I40E_XDP_REDIR)
2296
+ xdp_do_flush_map();
2297
+
2298
+ if (xdp_res & I40E_XDP_TX) {
2299
+ struct i40e_ring *xdp_ring =
2300
+ rx_ring->vsi->xdp_rings[rx_ring->queue_index];
2301
+
2302
+ i40e_xdp_ring_update_tail(xdp_ring);
2303
+ }
2304
+}
2305
+
2306
+/**
2307
+ * i40e_inc_ntc: Advance the next_to_clean index
2308
+ * @rx_ring: Rx ring
2309
+ **/
2310
+static void i40e_inc_ntc(struct i40e_ring *rx_ring)
2311
+{
2312
+ u32 ntc = rx_ring->next_to_clean + 1;
2313
+
2314
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
2315
+ rx_ring->next_to_clean = ntc;
2316
+ prefetch(I40E_RX_DESC(rx_ring, ntc));
22972317 }
22982318
22992319 /**
....@@ -2316,15 +2336,18 @@
23162336 unsigned int xdp_xmit = 0;
23172337 bool failure = false;
23182338 struct xdp_buff xdp;
2339
+ int xdp_res = 0;
23192340
2341
+#if (PAGE_SIZE < 8192)
2342
+ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
2343
+#endif
23202344 xdp.rxq = &rx_ring->xdp_rxq;
23212345
23222346 while (likely(total_rx_packets < (unsigned int)budget)) {
23232347 struct i40e_rx_buffer *rx_buffer;
23242348 union i40e_rx_desc *rx_desc;
2349
+ int rx_buffer_pgcnt;
23252350 unsigned int size;
2326
- u16 vlan_tag;
2327
- u8 rx_ptype;
23282351 u64 qword;
23292352
23302353 /* return some buffers to hardware, one at a time is too slow */
....@@ -2349,18 +2372,24 @@
23492372 */
23502373 dma_rmb();
23512374
2352
- if (unlikely(i40e_rx_is_programming_status(qword))) {
2353
- i40e_clean_programming_status(rx_ring, rx_desc, qword);
2375
+ if (i40e_rx_is_programming_status(qword)) {
2376
+ i40e_clean_programming_status(rx_ring,
2377
+ rx_desc->raw.qword[0],
2378
+ qword);
2379
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
2380
+ i40e_inc_ntc(rx_ring);
2381
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
23542382 cleaned_count++;
23552383 continue;
23562384 }
2385
+
23572386 size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
23582387 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
23592388 if (!size)
23602389 break;
23612390
23622391 i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
2363
- rx_buffer = i40e_get_rx_buffer(rx_ring, size);
2392
+ rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
23642393
23652394 /* retrieve a buffer from the ring */
23662395 if (!skb) {
....@@ -2370,13 +2399,14 @@
23702399 xdp.data_hard_start = xdp.data -
23712400 i40e_rx_offset(rx_ring);
23722401 xdp.data_end = xdp.data + size;
2373
-
2374
- skb = i40e_run_xdp(rx_ring, &xdp);
2402
+#if (PAGE_SIZE > 4096)
2403
+ /* At larger PAGE_SIZE, frame_sz depend on len size */
2404
+ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
2405
+#endif
2406
+ xdp_res = i40e_run_xdp(rx_ring, &xdp);
23752407 }
23762408
2377
- if (IS_ERR(skb)) {
2378
- unsigned int xdp_res = -PTR_ERR(skb);
2379
-
2409
+ if (xdp_res) {
23802410 if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
23812411 xdp_xmit |= xdp_res;
23822412 i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
....@@ -2394,19 +2424,19 @@
23942424 }
23952425
23962426 /* exit if we failed to retrieve a buffer */
2397
- if (!skb) {
2427
+ if (!xdp_res && !skb) {
23982428 rx_ring->rx_stats.alloc_buff_failed++;
23992429 rx_buffer->pagecnt_bias++;
24002430 break;
24012431 }
24022432
2403
- i40e_put_rx_buffer(rx_ring, rx_buffer);
2433
+ i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
24042434 cleaned_count++;
24052435
24062436 if (i40e_is_non_eop(rx_ring, rx_desc, skb))
24072437 continue;
24082438
2409
- if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
2439
+ if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
24102440 skb = NULL;
24112441 continue;
24122442 }
....@@ -2414,42 +2444,21 @@
24142444 /* probably a little skewed due to removing CRC */
24152445 total_rx_bytes += skb->len;
24162446
2417
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
2418
- rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
2419
- I40E_RXD_QW1_PTYPE_SHIFT;
2420
-
24212447 /* populate checksum, VLAN, and protocol */
2422
- i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
2423
-
2424
- vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
2425
- le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
2448
+ i40e_process_skb_fields(rx_ring, rx_desc, skb);
24262449
24272450 i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
2428
- i40e_receive_skb(rx_ring, skb, vlan_tag);
2451
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
24292452 skb = NULL;
24302453
24312454 /* update budget accounting */
24322455 total_rx_packets++;
24332456 }
24342457
2435
- if (xdp_xmit & I40E_XDP_REDIR)
2436
- xdp_do_flush_map();
2437
-
2438
- if (xdp_xmit & I40E_XDP_TX) {
2439
- struct i40e_ring *xdp_ring =
2440
- rx_ring->vsi->xdp_rings[rx_ring->queue_index];
2441
-
2442
- i40e_xdp_ring_update_tail(xdp_ring);
2443
- }
2444
-
2458
+ i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
24452459 rx_ring->skb = skb;
24462460
2447
- u64_stats_update_begin(&rx_ring->syncp);
2448
- rx_ring->stats.packets += total_rx_packets;
2449
- rx_ring->stats.bytes += total_rx_bytes;
2450
- u64_stats_update_end(&rx_ring->syncp);
2451
- rx_ring->q_vector->rx.total_packets += total_rx_packets;
2452
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
2461
+ i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
24532462
24542463 /* guarantee a trip back through this routine if there was a failure */
24552464 return failure ? budget : (int)total_rx_packets;
....@@ -2587,7 +2596,11 @@
25872596 * budget and be more aggressive about cleaning up the Tx descriptors.
25882597 */
25892598 i40e_for_each_ring(ring, q_vector->tx) {
2590
- if (!i40e_clean_tx_irq(vsi, ring, budget)) {
2599
+ bool wd = ring->xsk_pool ?
2600
+ i40e_clean_xdp_tx_irq(vsi, ring) :
2601
+ i40e_clean_tx_irq(vsi, ring, budget);
2602
+
2603
+ if (!wd) {
25912604 clean_complete = false;
25922605 continue;
25932606 }
....@@ -2599,13 +2612,21 @@
25992612 if (budget <= 0)
26002613 goto tx_only;
26012614
2602
- /* We attempt to distribute budget to each Rx queue fairly, but don't
2603
- * allow the budget to go below 1 because that would exit polling early.
2604
- */
2605
- budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
2615
+ /* normally we have 1 Rx ring per q_vector */
2616
+ if (unlikely(q_vector->num_ringpairs > 1))
2617
+ /* We attempt to distribute budget to each Rx queue fairly, but
2618
+ * don't allow the budget to go below 1 because that would exit
2619
+ * polling early.
2620
+ */
2621
+ budget_per_ring = max_t(int, budget / q_vector->num_ringpairs, 1);
2622
+ else
2623
+ /* Max of 1 Rx ring in this q_vector so give it the budget */
2624
+ budget_per_ring = budget;
26062625
26072626 i40e_for_each_ring(ring, q_vector->rx) {
2608
- int cleaned = i40e_clean_rx_irq(ring, budget_per_ring);
2627
+ int cleaned = ring->xsk_pool ?
2628
+ i40e_clean_rx_irq_zc(ring, budget_per_ring) :
2629
+ i40e_clean_rx_irq(ring, budget_per_ring);
26092630
26102631 work_done += cleaned;
26112632 /* if we clean as many as budgeted, we must not be done */
....@@ -2645,10 +2666,11 @@
26452666 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
26462667 q_vector->arm_wb_state = false;
26472668
2648
- /* Work is done so exit the polling mode and re-enable the interrupt */
2649
- napi_complete_done(napi, work_done);
2650
-
2651
- i40e_update_enable_itr(vsi, q_vector);
2669
+ /* Exit the polling mode, but don't re-enable interrupts if stack might
2670
+ * poll us due to busy-polling
2671
+ */
2672
+ if (likely(napi_complete_done(napi, work_done)))
2673
+ i40e_update_enable_itr(vsi, q_vector);
26522674
26532675 return min(work_done, budget - 1);
26542676 }
....@@ -2955,10 +2977,16 @@
29552977
29562978 /* remove payload length from inner checksum */
29572979 paylen = skb->len - l4_offset;
2958
- csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
29592980
2960
- /* compute length of segmentation header */
2961
- *hdr_len = (l4.tcp->doff * 4) + l4_offset;
2981
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
2982
+ csum_replace_by_diff(&l4.udp->check, (__force __wsum)htonl(paylen));
2983
+ /* compute length of segmentation header */
2984
+ *hdr_len = sizeof(*l4.udp) + l4_offset;
2985
+ } else {
2986
+ csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
2987
+ /* compute length of segmentation header */
2988
+ *hdr_len = (l4.tcp->doff * 4) + l4_offset;
2989
+ }
29622990
29632991 /* pull values out of skb_shinfo */
29642992 gso_size = skb_shinfo(skb)->gso_size;
....@@ -3260,7 +3288,7 @@
32603288 **/
32613289 bool __i40e_chk_linearize(struct sk_buff *skb)
32623290 {
3263
- const struct skb_frag_struct *frag, *stale;
3291
+ const skb_frag_t *frag, *stale;
32643292 int nr_frags, sum;
32653293
32663294 /* no need to check if number of frags is less than 7 */
....@@ -3304,7 +3332,7 @@
33043332 * descriptor associated with the fragment.
33053333 */
33063334 if (stale_size > I40E_MAX_DATA_PER_TXD) {
3307
- int align_pad = -(stale->page_offset) &
3335
+ int align_pad = -(skb_frag_off(stale)) &
33083336 (I40E_MAX_READ_REQ_SIZE - 1);
33093337
33103338 sum -= align_pad;
....@@ -3347,7 +3375,7 @@
33473375 {
33483376 unsigned int data_len = skb->data_len;
33493377 unsigned int size = skb_headlen(skb);
3350
- struct skb_frag_struct *frag;
3378
+ skb_frag_t *frag;
33513379 struct i40e_tx_buffer *tx_bi;
33523380 struct i40e_tx_desc *tx_desc;
33533381 u16 i = tx_ring->next_to_use;
....@@ -3454,6 +3482,8 @@
34543482 tx_desc->cmd_type_offset_bsz =
34553483 build_ctob(td_cmd, td_offset, size, td_tag);
34563484
3485
+ skb_tx_timestamp(skb);
3486
+
34573487 /* Force memory writes to complete before letting h/w know there
34583488 * are new descriptors to fetch.
34593489 *
....@@ -3466,13 +3496,8 @@
34663496 first->next_to_watch = tx_desc;
34673497
34683498 /* notify HW of packet */
3469
- if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
3499
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
34703500 writel(i, tx_ring->tail);
3471
-
3472
- /* we need this if more than one processor can write to our tail
3473
- * at a time, it synchronizes IO on IA64/Altix systems
3474
- */
3475
- mmiowb();
34763501 }
34773502
34783503 return 0;
....@@ -3496,9 +3521,58 @@
34963521 return -1;
34973522 }
34983523
3524
+static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev,
3525
+ const struct sk_buff *skb,
3526
+ u16 num_tx_queues)
3527
+{
3528
+ u32 jhash_initval_salt = 0xd631614b;
3529
+ u32 hash;
3530
+
3531
+ if (skb->sk && skb->sk->sk_hash)
3532
+ hash = skb->sk->sk_hash;
3533
+ else
3534
+ hash = (__force u16)skb->protocol ^ skb->hash;
3535
+
3536
+ hash = jhash_1word(hash, jhash_initval_salt);
3537
+
3538
+ return (u16)(((u64)hash * num_tx_queues) >> 32);
3539
+}
3540
+
3541
+u16 i40e_lan_select_queue(struct net_device *netdev,
3542
+ struct sk_buff *skb,
3543
+ struct net_device __always_unused *sb_dev)
3544
+{
3545
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
3546
+ struct i40e_vsi *vsi = np->vsi;
3547
+ struct i40e_hw *hw;
3548
+ u16 qoffset;
3549
+ u16 qcount;
3550
+ u8 tclass;
3551
+ u16 hash;
3552
+ u8 prio;
3553
+
3554
+ /* is DCB enabled at all? */
3555
+ if (vsi->tc_config.numtc == 1)
3556
+ return netdev_pick_tx(netdev, skb, sb_dev);
3557
+
3558
+ prio = skb->priority;
3559
+ hw = &vsi->back->hw;
3560
+ tclass = hw->local_dcbx_config.etscfg.prioritytable[prio];
3561
+ /* sanity check */
3562
+ if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass))))
3563
+ tclass = 0;
3564
+
3565
+ /* select a queue assigned for the given TC */
3566
+ qcount = vsi->tc_config.tc_info[tclass].qcount;
3567
+ hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount);
3568
+
3569
+ qoffset = vsi->tc_config.tc_info[tclass].qoffset;
3570
+ return qoffset + hash;
3571
+}
3572
+
34993573 /**
35003574 * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
3501
- * @xdp: data to transmit
3575
+ * @xdpf: data to transmit
35023576 * @xdp_ring: XDP Tx ring
35033577 **/
35043578 static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
....@@ -3507,6 +3581,7 @@
35073581 u16 i = xdp_ring->next_to_use;
35083582 struct i40e_tx_buffer *tx_bi;
35093583 struct i40e_tx_desc *tx_desc;
3584
+ void *data = xdpf->data;
35103585 u32 size = xdpf->len;
35113586 dma_addr_t dma;
35123587
....@@ -3514,8 +3589,7 @@
35143589 xdp_ring->tx_stats.tx_busy++;
35153590 return I40E_XDP_CONSUMED;
35163591 }
3517
-
3518
- dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE);
3592
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
35193593 if (dma_mapping_error(xdp_ring->dev, dma))
35203594 return I40E_XDP_CONSUMED;
35213595
....@@ -3539,6 +3613,7 @@
35393613 */
35403614 smp_wmb();
35413615
3616
+ xdp_ring->xdp_tx_active++;
35423617 i++;
35433618 if (i == xdp_ring->count)
35443619 i = 0;
....@@ -3633,8 +3708,6 @@
36333708 if (tsyn)
36343709 tx_flags |= I40E_TX_FLAGS_TSYN;
36353710
3636
- skb_tx_timestamp(skb);
3637
-
36383711 /* always enable CRC insertion offload */
36393712 td_cmd |= I40E_TX_DESC_CMD_ICRC;
36403713
....@@ -3694,7 +3767,9 @@
36943767 /**
36953768 * i40e_xdp_xmit - Implements ndo_xdp_xmit
36963769 * @dev: netdev
3697
- * @xdp: XDP buffer
3770
+ * @n: number of frames
3771
+ * @frames: array of XDP buffer pointers
3772
+ * @flags: XDP extra info
36983773 *
36993774 * Returns number of frames successfully sent. Frames that fail are
37003775 * free'ed via XDP return API.
....@@ -3708,6 +3783,7 @@
37083783 struct i40e_netdev_priv *np = netdev_priv(dev);
37093784 unsigned int queue_index = smp_processor_id();
37103785 struct i40e_vsi *vsi = np->vsi;
3786
+ struct i40e_pf *pf = vsi->back;
37113787 struct i40e_ring *xdp_ring;
37123788 int drops = 0;
37133789 int i;
....@@ -3715,7 +3791,8 @@
37153791 if (test_bit(__I40E_VSI_DOWN, vsi->state))
37163792 return -ENETDOWN;
37173793
3718
- if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
3794
+ if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs ||
3795
+ test_bit(__I40E_CONFIG_BUSY, pf->state))
37193796 return -ENXIO;
37203797
37213798 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))