hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/net/ethernet/qlogic/qede/qede_fp.c
....@@ -1,34 +1,9 @@
1
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
12 /* QLogic qede NIC Driver
23 * Copyright (c) 2015-2017 QLogic Corporation
3
- *
4
- * This software is available to you under a choice of one of two
5
- * licenses. You may choose to be licensed under the terms of the GNU
6
- * General Public License (GPL) Version 2, available from the file
7
- * COPYING in the main directory of this source tree, or the
8
- * OpenIB.org BSD license below:
9
- *
10
- * Redistribution and use in source and binary forms, with or
11
- * without modification, are permitted provided that the following
12
- * conditions are met:
13
- *
14
- * - Redistributions of source code must retain the above
15
- * copyright notice, this list of conditions and the following
16
- * disclaimer.
17
- *
18
- * - Redistributions in binary form must reproduce the above
19
- * copyright notice, this list of conditions and the following
20
- * disclaimer in the documentation and /or other materials
21
- * provided with the distribution.
22
- *
23
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
- * SOFTWARE.
4
+ * Copyright (c) 2019-2020 Marvell International Ltd.
315 */
6
+
327 #include <linux/netdevice.h>
338 #include <linux/etherdevice.h>
349 #include <linux/skbuff.h>
....@@ -327,49 +302,92 @@
327302 wmb();
328303 }
329304
330
-static int qede_xdp_xmit(struct qede_dev *edev, struct qede_fastpath *fp,
331
- struct sw_rx_data *metadata, u16 padding, u16 length)
305
+static int qede_xdp_xmit(struct qede_tx_queue *txq, dma_addr_t dma, u16 pad,
306
+ u16 len, struct page *page, struct xdp_frame *xdpf)
332307 {
333
- struct qede_tx_queue *txq = fp->xdp_tx;
334
- struct eth_tx_1st_bd *first_bd;
335
- u16 idx = txq->sw_tx_prod;
308
+ struct eth_tx_1st_bd *bd;
309
+ struct sw_tx_xdp *xdp;
336310 u16 val;
337311
338
- if (!qed_chain_get_elem_left(&txq->tx_pbl)) {
312
+ if (unlikely(qed_chain_get_elem_used(&txq->tx_pbl) >=
313
+ txq->num_tx_buffers)) {
339314 txq->stopped_cnt++;
340315 return -ENOMEM;
341316 }
342317
343
- first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
318
+ bd = qed_chain_produce(&txq->tx_pbl);
319
+ bd->data.nbds = 1;
320
+ bd->data.bd_flags.bitfields = BIT(ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
344321
345
- memset(first_bd, 0, sizeof(*first_bd));
346
- first_bd->data.bd_flags.bitfields =
347
- BIT(ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
348
-
349
- val = (length & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
322
+ val = (len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
350323 ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
351324
352
- first_bd->data.bitfields |= cpu_to_le16(val);
353
- first_bd->data.nbds = 1;
325
+ bd->data.bitfields = cpu_to_le16(val);
354326
355327 /* We can safely ignore the offset, as it's 0 for XDP */
356
- BD_SET_UNMAP_ADDR_LEN(first_bd, metadata->mapping + padding, length);
328
+ BD_SET_UNMAP_ADDR_LEN(bd, dma + pad, len);
357329
358
- /* Synchronize the buffer back to device, as program [probably]
359
- * has changed it.
360
- */
361
- dma_sync_single_for_device(&edev->pdev->dev,
362
- metadata->mapping + padding,
363
- length, PCI_DMA_TODEVICE);
330
+ xdp = txq->sw_tx_ring.xdp + txq->sw_tx_prod;
331
+ xdp->mapping = dma;
332
+ xdp->page = page;
333
+ xdp->xdpf = xdpf;
364334
365
- txq->sw_tx_ring.xdp[idx].page = metadata->data;
366
- txq->sw_tx_ring.xdp[idx].mapping = metadata->mapping;
367335 txq->sw_tx_prod = (txq->sw_tx_prod + 1) % txq->num_tx_buffers;
368336
369
- /* Mark the fastpath for future XDP doorbell */
370
- fp->xdp_xmit = 1;
371
-
372337 return 0;
338
+}
339
+
340
+int qede_xdp_transmit(struct net_device *dev, int n_frames,
341
+ struct xdp_frame **frames, u32 flags)
342
+{
343
+ struct qede_dev *edev = netdev_priv(dev);
344
+ struct device *dmadev = &edev->pdev->dev;
345
+ struct qede_tx_queue *xdp_tx;
346
+ struct xdp_frame *xdpf;
347
+ dma_addr_t mapping;
348
+ int i, drops = 0;
349
+ u16 xdp_prod;
350
+
351
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
352
+ return -EINVAL;
353
+
354
+ if (unlikely(!netif_running(dev)))
355
+ return -ENETDOWN;
356
+
357
+ i = smp_processor_id() % edev->total_xdp_queues;
358
+ xdp_tx = edev->fp_array[i].xdp_tx;
359
+
360
+ spin_lock(&xdp_tx->xdp_tx_lock);
361
+
362
+ for (i = 0; i < n_frames; i++) {
363
+ xdpf = frames[i];
364
+
365
+ mapping = dma_map_single(dmadev, xdpf->data, xdpf->len,
366
+ DMA_TO_DEVICE);
367
+ if (unlikely(dma_mapping_error(dmadev, mapping))) {
368
+ xdp_return_frame_rx_napi(xdpf);
369
+ drops++;
370
+
371
+ continue;
372
+ }
373
+
374
+ if (unlikely(qede_xdp_xmit(xdp_tx, mapping, 0, xdpf->len,
375
+ NULL, xdpf))) {
376
+ xdp_return_frame_rx_napi(xdpf);
377
+ drops++;
378
+ }
379
+ }
380
+
381
+ if (flags & XDP_XMIT_FLUSH) {
382
+ xdp_prod = qed_chain_get_prod_idx(&xdp_tx->tx_pbl);
383
+
384
+ xdp_tx->tx_db.data.bd_prod = cpu_to_le16(xdp_prod);
385
+ qede_update_tx_producer(xdp_tx);
386
+ }
387
+
388
+ spin_unlock(&xdp_tx->xdp_tx_lock);
389
+
390
+ return n_frames - drops;
373391 }
374392
375393 int qede_txq_has_work(struct qede_tx_queue *txq)
....@@ -387,20 +405,31 @@
387405
388406 static void qede_xdp_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
389407 {
390
- u16 hw_bd_cons, idx;
408
+ struct sw_tx_xdp *xdp_info, *xdp_arr = txq->sw_tx_ring.xdp;
409
+ struct device *dev = &edev->pdev->dev;
410
+ struct xdp_frame *xdpf;
411
+ u16 hw_bd_cons;
391412
392413 hw_bd_cons = le16_to_cpu(*txq->hw_cons_ptr);
393414 barrier();
394415
395416 while (hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl)) {
417
+ xdp_info = xdp_arr + txq->sw_tx_cons;
418
+ xdpf = xdp_info->xdpf;
419
+
420
+ if (xdpf) {
421
+ dma_unmap_single(dev, xdp_info->mapping, xdpf->len,
422
+ DMA_TO_DEVICE);
423
+ xdp_return_frame(xdpf);
424
+
425
+ xdp_info->xdpf = NULL;
426
+ } else {
427
+ dma_unmap_page(dev, xdp_info->mapping, PAGE_SIZE,
428
+ DMA_BIDIRECTIONAL);
429
+ __free_page(xdp_info->page);
430
+ }
431
+
396432 qed_chain_consume(&txq->tx_pbl);
397
- idx = txq->sw_tx_cons;
398
-
399
- dma_unmap_page(&edev->pdev->dev,
400
- txq->sw_tx_ring.xdp[idx].mapping,
401
- PAGE_SIZE, DMA_BIDIRECTIONAL);
402
- __free_page(txq->sw_tx_ring.xdp[idx].page);
403
-
404433 txq->sw_tx_cons = (txq->sw_tx_cons + 1) % txq->num_tx_buffers;
405434 txq->xmit_pkts++;
406435 }
....@@ -580,14 +609,6 @@
580609
581610 internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
582611 (u32 *)&rx_prods);
583
-
584
- /* mmiowb is needed to synchronize doorbell writes from more than one
585
- * processor. It guarantees that the write arrives to the device before
586
- * the napi lock is released and another qede_poll is called (possibly
587
- * on another CPU). Without this barrier, the next doorbell can bypass
588
- * this doorbell. This is applicable to IA64/Altix systems.
589
- */
590
- mmiowb();
591612 }
592613
593614 static void qede_get_rxhash(struct sk_buff *skb, u8 bitfields, __le32 rss_hash)
....@@ -731,6 +752,9 @@
731752 buf = page_address(bd->data) + bd->page_offset;
732753 skb = build_skb(buf, rxq->rx_buf_seg_size);
733754
755
+ if (unlikely(!skb))
756
+ return NULL;
757
+
734758 skb_reserve(skb, pad);
735759 skb_put(skb, len);
736760
....@@ -787,8 +811,7 @@
787811 return NULL;
788812
789813 skb_reserve(skb, pad);
790
- memcpy(skb_put(skb, len),
791
- page_address(bd->data) + offset, len);
814
+ skb_put_data(skb, page_address(bd->data) + offset, len);
792815 qede_reuse_page(rxq, bd);
793816 goto out;
794817 }
....@@ -857,13 +880,13 @@
857880 qede_set_gro_params(edev, tpa_info->skb, cqe);
858881
859882 cons_buf: /* We still need to handle bd_len_list to consume buffers */
860
- if (likely(cqe->ext_bd_len_list[0]))
883
+ if (likely(cqe->bw_ext_bd_len_list[0]))
861884 qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
862
- le16_to_cpu(cqe->ext_bd_len_list[0]));
885
+ le16_to_cpu(cqe->bw_ext_bd_len_list[0]));
863886
864
- if (unlikely(cqe->ext_bd_len_list[1])) {
887
+ if (unlikely(cqe->bw_ext_bd_len_list[1])) {
865888 DP_ERR(edev,
866
- "Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n");
889
+ "Unlikely - got a TPA aggregation with more than one bw_ext_bd_len_list entry in the TPA start\n");
867890 tpa_info->state = QEDE_AGG_STATE_ERROR;
868891 }
869892 }
....@@ -1075,6 +1098,7 @@
10751098 xdp_set_data_meta_invalid(&xdp);
10761099 xdp.data_end = xdp.data + *len;
10771100 xdp.rxq = &rxq->xdp_rxq;
1101
+ xdp.frame_sz = rxq->rx_buf_seg_size; /* PAGE_SIZE when XDP enabled */
10781102
10791103 /* Queues always have a full reset currently, so for the time
10801104 * being until there's atomic program replace just mark read
....@@ -1097,32 +1121,59 @@
10971121 switch (act) {
10981122 case XDP_TX:
10991123 /* We need the replacement buffer before transmit. */
1100
- if (qede_alloc_rx_buffer(rxq, true)) {
1124
+ if (unlikely(qede_alloc_rx_buffer(rxq, true))) {
11011125 qede_recycle_rx_bd_ring(rxq, 1);
1126
+
11021127 trace_xdp_exception(edev->ndev, prog, act);
1103
- return false;
1128
+ break;
11041129 }
11051130
11061131 /* Now if there's a transmission problem, we'd still have to
11071132 * throw current buffer, as replacement was already allocated.
11081133 */
1109
- if (qede_xdp_xmit(edev, fp, bd, *data_offset, *len)) {
1110
- dma_unmap_page(rxq->dev, bd->mapping,
1111
- PAGE_SIZE, DMA_BIDIRECTIONAL);
1134
+ if (unlikely(qede_xdp_xmit(fp->xdp_tx, bd->mapping,
1135
+ *data_offset, *len, bd->data,
1136
+ NULL))) {
1137
+ dma_unmap_page(rxq->dev, bd->mapping, PAGE_SIZE,
1138
+ rxq->data_direction);
11121139 __free_page(bd->data);
1140
+
11131141 trace_xdp_exception(edev->ndev, prog, act);
1142
+ } else {
1143
+ dma_sync_single_for_device(rxq->dev,
1144
+ bd->mapping + *data_offset,
1145
+ *len, rxq->data_direction);
1146
+ fp->xdp_xmit |= QEDE_XDP_TX;
11141147 }
11151148
11161149 /* Regardless, we've consumed an Rx BD */
11171150 qede_rx_bd_ring_consume(rxq);
1118
- return false;
1151
+ break;
1152
+ case XDP_REDIRECT:
1153
+ /* We need the replacement buffer before transmit. */
1154
+ if (unlikely(qede_alloc_rx_buffer(rxq, true))) {
1155
+ qede_recycle_rx_bd_ring(rxq, 1);
11191156
1157
+ trace_xdp_exception(edev->ndev, prog, act);
1158
+ break;
1159
+ }
1160
+
1161
+ dma_unmap_page(rxq->dev, bd->mapping, PAGE_SIZE,
1162
+ rxq->data_direction);
1163
+
1164
+ if (unlikely(xdp_do_redirect(edev->ndev, &xdp, prog)))
1165
+ DP_NOTICE(edev, "Failed to redirect the packet\n");
1166
+ else
1167
+ fp->xdp_xmit |= QEDE_XDP_REDIRECT;
1168
+
1169
+ qede_rx_bd_ring_consume(rxq);
1170
+ break;
11201171 default:
11211172 bpf_warn_invalid_xdp_action(act);
1122
- /* Fall through */
1173
+ fallthrough;
11231174 case XDP_ABORTED:
11241175 trace_xdp_exception(edev->ndev, prog, act);
1125
- /* Fall through */
1176
+ fallthrough;
11261177 case XDP_DROP:
11271178 qede_recycle_rx_bd_ring(rxq, cqe->bd_num);
11281179 }
....@@ -1386,6 +1437,9 @@
13861437 napi);
13871438 struct qede_dev *edev = fp->edev;
13881439 int rx_work_done = 0;
1440
+ u16 xdp_prod;
1441
+
1442
+ fp->xdp_xmit = 0;
13891443
13901444 if (likely(fp->type & QEDE_FASTPATH_TX)) {
13911445 int cos;
....@@ -1402,7 +1456,12 @@
14021456 rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
14031457 qede_has_rx_work(fp->rxq)) ?
14041458 qede_rx_int(fp, budget) : 0;
1405
- if (rx_work_done < budget) {
1459
+
1460
+ if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
1461
+ xdp_do_flush();
1462
+
1463
+ /* Handle case where we are called by netpoll with a budget of 0 */
1464
+ if (rx_work_done < budget || !budget) {
14061465 if (!qede_poll_is_more_work(fp)) {
14071466 napi_complete_done(napi, rx_work_done);
14081467
....@@ -1413,10 +1472,9 @@
14131472 }
14141473 }
14151474
1416
- if (fp->xdp_xmit) {
1417
- u16 xdp_prod = qed_chain_get_prod_idx(&fp->xdp_tx->tx_pbl);
1475
+ if (fp->xdp_xmit & QEDE_XDP_TX) {
1476
+ xdp_prod = qed_chain_get_prod_idx(&fp->xdp_tx->tx_pbl);
14181477
1419
- fp->xdp_xmit = 0;
14201478 fp->xdp_tx->tx_db.data.bd_prod = cpu_to_le16(xdp_prod);
14211479 qede_update_tx_producer(fp->xdp_tx);
14221480 }
....@@ -1466,8 +1524,8 @@
14661524 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
14671525 if (qede_pkt_req_lin(skb, xmit_type)) {
14681526 if (skb_linearize(skb)) {
1469
- DP_NOTICE(edev,
1470
- "SKB linearization failed - silently dropping this SKB\n");
1527
+ txq->tx_mem_alloc_err++;
1528
+
14711529 dev_kfree_skb_any(skb);
14721530 return NETDEV_TX_OK;
14731531 }
....@@ -1672,12 +1730,12 @@
16721730 txq->tx_db.data.bd_prod =
16731731 cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
16741732
1675
- if (!skb->xmit_more || netif_xmit_stopped(netdev_txq))
1733
+ if (!netdev_xmit_more() || netif_xmit_stopped(netdev_txq))
16761734 qede_update_tx_producer(txq);
16771735
16781736 if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
16791737 < (MAX_SKB_FRAGS + 1))) {
1680
- if (skb->xmit_more)
1738
+ if (netdev_xmit_more())
16811739 qede_update_tx_producer(txq);
16821740
16831741 netif_tx_stop_queue(netdev_txq);
....@@ -1703,8 +1761,7 @@
17031761 }
17041762
17051763 u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
1706
- struct net_device *sb_dev,
1707
- select_queue_fallback_t fallback)
1764
+ struct net_device *sb_dev)
17081765 {
17091766 struct qede_dev *edev = netdev_priv(dev);
17101767 int total_txq;
....@@ -1712,7 +1769,7 @@
17121769 total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc;
17131770
17141771 return QEDE_TSS_COUNT(edev) ?
1715
- fallback(dev, skb, NULL) % total_txq : 0;
1772
+ netdev_pick_tx(dev, skb, NULL) % total_txq : 0;
17161773 }
17171774
17181775 /* 8B udp header + 8B base tunnel header + 32B option length */