.. | .. |
---|
5 | 5 | |
---|
6 | 6 | #include <linux/prefetch.h> |
---|
7 | 7 | #include <linux/mm.h> |
---|
| 8 | +#include <linux/bpf_trace.h> |
---|
| 9 | +#include <net/xdp.h> |
---|
| 10 | +#include "ice_txrx_lib.h" |
---|
| 11 | +#include "ice_lib.h" |
---|
8 | 12 | #include "ice.h" |
---|
| 13 | +#include "ice_dcb_lib.h" |
---|
| 14 | +#include "ice_xsk.h" |
---|
9 | 15 | |
---|
10 | 16 | #define ICE_RX_HDR_SIZE 256 |
---|
| 17 | + |
---|
| 18 | +#define FDIR_DESC_RXDID 0x40 |
---|
| 19 | +#define ICE_FDIR_CLEAN_DELAY 10 |
---|
| 20 | + |
---|
| 21 | +/** |
---|
| 22 | + * ice_prgm_fdir_fltr - Program a Flow Director filter |
---|
| 23 | + * @vsi: VSI to send dummy packet |
---|
| 24 | + * @fdir_desc: flow director descriptor |
---|
| 25 | + * @raw_packet: allocated buffer for flow director |
---|
| 26 | + */ |
---|
| 27 | +int |
---|
| 28 | +ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, |
---|
| 29 | + u8 *raw_packet) |
---|
| 30 | +{ |
---|
| 31 | + struct ice_tx_buf *tx_buf, *first; |
---|
| 32 | + struct ice_fltr_desc *f_desc; |
---|
| 33 | + struct ice_tx_desc *tx_desc; |
---|
| 34 | + struct ice_ring *tx_ring; |
---|
| 35 | + struct device *dev; |
---|
| 36 | + dma_addr_t dma; |
---|
| 37 | + u32 td_cmd; |
---|
| 38 | + u16 i; |
---|
| 39 | + |
---|
| 40 | + /* VSI and Tx ring */ |
---|
| 41 | + if (!vsi) |
---|
| 42 | + return -ENOENT; |
---|
| 43 | + tx_ring = vsi->tx_rings[0]; |
---|
| 44 | + if (!tx_ring || !tx_ring->desc) |
---|
| 45 | + return -ENOENT; |
---|
| 46 | + dev = tx_ring->dev; |
---|
| 47 | + |
---|
| 48 | + /* we are using two descriptors to add/del a filter and we can wait */ |
---|
| 49 | + for (i = ICE_FDIR_CLEAN_DELAY; ICE_DESC_UNUSED(tx_ring) < 2; i--) { |
---|
| 50 | + if (!i) |
---|
| 51 | + return -EAGAIN; |
---|
| 52 | + msleep_interruptible(1); |
---|
| 53 | + } |
---|
| 54 | + |
---|
| 55 | + dma = dma_map_single(dev, raw_packet, ICE_FDIR_MAX_RAW_PKT_SIZE, |
---|
| 56 | + DMA_TO_DEVICE); |
---|
| 57 | + |
---|
| 58 | + if (dma_mapping_error(dev, dma)) |
---|
| 59 | + return -EINVAL; |
---|
| 60 | + |
---|
| 61 | + /* grab the next descriptor */ |
---|
| 62 | + i = tx_ring->next_to_use; |
---|
| 63 | + first = &tx_ring->tx_buf[i]; |
---|
| 64 | + f_desc = ICE_TX_FDIRDESC(tx_ring, i); |
---|
| 65 | + memcpy(f_desc, fdir_desc, sizeof(*f_desc)); |
---|
| 66 | + |
---|
| 67 | + i++; |
---|
| 68 | + i = (i < tx_ring->count) ? i : 0; |
---|
| 69 | + tx_desc = ICE_TX_DESC(tx_ring, i); |
---|
| 70 | + tx_buf = &tx_ring->tx_buf[i]; |
---|
| 71 | + |
---|
| 72 | + i++; |
---|
| 73 | + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; |
---|
| 74 | + |
---|
| 75 | + memset(tx_buf, 0, sizeof(*tx_buf)); |
---|
| 76 | + dma_unmap_len_set(tx_buf, len, ICE_FDIR_MAX_RAW_PKT_SIZE); |
---|
| 77 | + dma_unmap_addr_set(tx_buf, dma, dma); |
---|
| 78 | + |
---|
| 79 | + tx_desc->buf_addr = cpu_to_le64(dma); |
---|
| 80 | + td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY | |
---|
| 81 | + ICE_TX_DESC_CMD_RE; |
---|
| 82 | + |
---|
| 83 | + tx_buf->tx_flags = ICE_TX_FLAGS_DUMMY_PKT; |
---|
| 84 | + tx_buf->raw_buf = raw_packet; |
---|
| 85 | + |
---|
| 86 | + tx_desc->cmd_type_offset_bsz = |
---|
| 87 | + ice_build_ctob(td_cmd, 0, ICE_FDIR_MAX_RAW_PKT_SIZE, 0); |
---|
| 88 | + |
---|
| 89 | + /* Force memory write to complete before letting h/w know |
---|
| 90 | + * there are new descriptors to fetch. |
---|
| 91 | + */ |
---|
| 92 | + wmb(); |
---|
| 93 | + |
---|
| 94 | + /* mark the data descriptor to be watched */ |
---|
| 95 | + first->next_to_watch = tx_desc; |
---|
| 96 | + |
---|
| 97 | + writel(tx_ring->next_to_use, tx_ring->tail); |
---|
| 98 | + |
---|
| 99 | + return 0; |
---|
| 100 | +} |
---|
11 | 101 | |
---|
12 | 102 | /** |
---|
13 | 103 | * ice_unmap_and_free_tx_buf - Release a Tx buffer |
---|
.. | .. |
---|
18 | 108 | ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) |
---|
19 | 109 | { |
---|
20 | 110 | if (tx_buf->skb) { |
---|
21 | | - dev_kfree_skb_any(tx_buf->skb); |
---|
| 111 | + if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) |
---|
| 112 | + devm_kfree(ring->dev, tx_buf->raw_buf); |
---|
| 113 | + else if (ice_ring_is_xdp(ring)) |
---|
| 114 | + page_frag_free(tx_buf->raw_buf); |
---|
| 115 | + else |
---|
| 116 | + dev_kfree_skb_any(tx_buf->skb); |
---|
22 | 117 | if (dma_unmap_len(tx_buf, len)) |
---|
23 | 118 | dma_unmap_single(ring->dev, |
---|
24 | 119 | dma_unmap_addr(tx_buf, dma), |
---|
.. | .. |
---|
48 | 143 | */ |
---|
49 | 144 | void ice_clean_tx_ring(struct ice_ring *tx_ring) |
---|
50 | 145 | { |
---|
51 | | - unsigned long size; |
---|
52 | 146 | u16 i; |
---|
| 147 | + |
---|
| 148 | + if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) { |
---|
| 149 | + ice_xsk_clean_xdp_ring(tx_ring); |
---|
| 150 | + goto tx_skip_free; |
---|
| 151 | + } |
---|
53 | 152 | |
---|
54 | 153 | /* ring already cleared, nothing to do */ |
---|
55 | 154 | if (!tx_ring->tx_buf) |
---|
56 | 155 | return; |
---|
57 | 156 | |
---|
58 | | - /* Free all the Tx ring sk_bufss */ |
---|
| 157 | + /* Free all the Tx ring sk_buffs */ |
---|
59 | 158 | for (i = 0; i < tx_ring->count; i++) |
---|
60 | 159 | ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]); |
---|
61 | 160 | |
---|
62 | | - size = sizeof(struct ice_tx_buf) * tx_ring->count; |
---|
63 | | - memset(tx_ring->tx_buf, 0, size); |
---|
| 161 | +tx_skip_free: |
---|
| 162 | + memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count); |
---|
64 | 163 | |
---|
65 | 164 | /* Zero out the descriptor ring */ |
---|
66 | 165 | memset(tx_ring->desc, 0, tx_ring->size); |
---|
.. | .. |
---|
96 | 195 | |
---|
97 | 196 | /** |
---|
98 | 197 | * ice_clean_tx_irq - Reclaim resources after transmit completes |
---|
99 | | - * @vsi: the VSI we care about |
---|
100 | 198 | * @tx_ring: Tx ring to clean |
---|
101 | 199 | * @napi_budget: Used to determine if we are in netpoll |
---|
102 | 200 | * |
---|
103 | 201 | * Returns true if there's any budget left (e.g. the clean is finished) |
---|
104 | 202 | */ |
---|
105 | | -static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, |
---|
106 | | - int napi_budget) |
---|
| 203 | +static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) |
---|
107 | 204 | { |
---|
108 | 205 | unsigned int total_bytes = 0, total_pkts = 0; |
---|
109 | | - unsigned int budget = vsi->work_lmt; |
---|
| 206 | + unsigned int budget = ICE_DFLT_IRQ_WORK; |
---|
| 207 | + struct ice_vsi *vsi = tx_ring->vsi; |
---|
110 | 208 | s16 i = tx_ring->next_to_clean; |
---|
111 | 209 | struct ice_tx_desc *tx_desc; |
---|
112 | 210 | struct ice_tx_buf *tx_buf; |
---|
.. | .. |
---|
114 | 212 | tx_buf = &tx_ring->tx_buf[i]; |
---|
115 | 213 | tx_desc = ICE_TX_DESC(tx_ring, i); |
---|
116 | 214 | i -= tx_ring->count; |
---|
| 215 | + |
---|
| 216 | + prefetch(&vsi->state); |
---|
117 | 217 | |
---|
118 | 218 | do { |
---|
119 | 219 | struct ice_tx_desc *eop_desc = tx_buf->next_to_watch; |
---|
.. | .. |
---|
136 | 236 | total_bytes += tx_buf->bytecount; |
---|
137 | 237 | total_pkts += tx_buf->gso_segs; |
---|
138 | 238 | |
---|
139 | | - /* free the skb */ |
---|
140 | | - napi_consume_skb(tx_buf->skb, napi_budget); |
---|
| 239 | + if (ice_ring_is_xdp(tx_ring)) |
---|
| 240 | + page_frag_free(tx_buf->raw_buf); |
---|
| 241 | + else |
---|
| 242 | + /* free the skb */ |
---|
| 243 | + napi_consume_skb(tx_buf->skb, napi_budget); |
---|
141 | 244 | |
---|
142 | 245 | /* unmap skb header data */ |
---|
143 | 246 | dma_unmap_single(tx_ring->dev, |
---|
.. | .. |
---|
188 | 291 | |
---|
189 | 292 | i += tx_ring->count; |
---|
190 | 293 | tx_ring->next_to_clean = i; |
---|
191 | | - u64_stats_update_begin(&tx_ring->syncp); |
---|
192 | | - tx_ring->stats.bytes += total_bytes; |
---|
193 | | - tx_ring->stats.pkts += total_pkts; |
---|
194 | | - u64_stats_update_end(&tx_ring->syncp); |
---|
195 | | - tx_ring->q_vector->tx.total_bytes += total_bytes; |
---|
196 | | - tx_ring->q_vector->tx.total_pkts += total_pkts; |
---|
| 294 | + |
---|
| 295 | + ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes); |
---|
| 296 | + |
---|
| 297 | + if (ice_ring_is_xdp(tx_ring)) |
---|
| 298 | + return !!budget; |
---|
197 | 299 | |
---|
198 | 300 | netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, |
---|
199 | 301 | total_bytes); |
---|
.. | .. |
---|
207 | 309 | smp_mb(); |
---|
208 | 310 | if (__netif_subqueue_stopped(tx_ring->netdev, |
---|
209 | 311 | tx_ring->q_index) && |
---|
210 | | - !test_bit(__ICE_DOWN, vsi->state)) { |
---|
| 312 | + !test_bit(__ICE_DOWN, vsi->state)) { |
---|
211 | 313 | netif_wake_subqueue(tx_ring->netdev, |
---|
212 | 314 | tx_ring->q_index); |
---|
213 | 315 | ++tx_ring->tx_stats.restart_q; |
---|
.. | .. |
---|
219 | 321 | |
---|
220 | 322 | /** |
---|
221 | 323 | * ice_setup_tx_ring - Allocate the Tx descriptors |
---|
222 | | - * @tx_ring: the tx ring to set up |
---|
| 324 | + * @tx_ring: the Tx ring to set up |
---|
223 | 325 | * |
---|
224 | 326 | * Return 0 on success, negative on error |
---|
225 | 327 | */ |
---|
226 | 328 | int ice_setup_tx_ring(struct ice_ring *tx_ring) |
---|
227 | 329 | { |
---|
228 | 330 | struct device *dev = tx_ring->dev; |
---|
229 | | - int bi_size; |
---|
230 | 331 | |
---|
231 | 332 | if (!dev) |
---|
232 | 333 | return -ENOMEM; |
---|
233 | 334 | |
---|
234 | 335 | /* warn if we are about to overwrite the pointer */ |
---|
235 | 336 | WARN_ON(tx_ring->tx_buf); |
---|
236 | | - bi_size = sizeof(struct ice_tx_buf) * tx_ring->count; |
---|
237 | | - tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL); |
---|
| 337 | + tx_ring->tx_buf = |
---|
| 338 | + devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count, |
---|
| 339 | + GFP_KERNEL); |
---|
238 | 340 | if (!tx_ring->tx_buf) |
---|
239 | 341 | return -ENOMEM; |
---|
240 | 342 | |
---|
241 | | - /* round up to nearest 4K */ |
---|
242 | | - tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc); |
---|
243 | | - tx_ring->size = ALIGN(tx_ring->size, 4096); |
---|
| 343 | + /* round up to nearest page */ |
---|
| 344 | + tx_ring->size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), |
---|
| 345 | + PAGE_SIZE); |
---|
244 | 346 | tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, |
---|
245 | 347 | GFP_KERNEL); |
---|
246 | 348 | if (!tx_ring->desc) { |
---|
.. | .. |
---|
251 | 353 | |
---|
252 | 354 | tx_ring->next_to_use = 0; |
---|
253 | 355 | tx_ring->next_to_clean = 0; |
---|
| 356 | + tx_ring->tx_stats.prev_pkt = -1; |
---|
254 | 357 | return 0; |
---|
255 | 358 | |
---|
256 | 359 | err: |
---|
.. | .. |
---|
266 | 369 | void ice_clean_rx_ring(struct ice_ring *rx_ring) |
---|
267 | 370 | { |
---|
268 | 371 | struct device *dev = rx_ring->dev; |
---|
269 | | - unsigned long size; |
---|
270 | 372 | u16 i; |
---|
271 | 373 | |
---|
272 | 374 | /* ring already cleared, nothing to do */ |
---|
273 | 375 | if (!rx_ring->rx_buf) |
---|
274 | 376 | return; |
---|
| 377 | + |
---|
| 378 | + if (rx_ring->xsk_pool) { |
---|
| 379 | + ice_xsk_clean_rx_ring(rx_ring); |
---|
| 380 | + goto rx_skip_free; |
---|
| 381 | + } |
---|
275 | 382 | |
---|
276 | 383 | /* Free all the Rx ring sk_buffs */ |
---|
277 | 384 | for (i = 0; i < rx_ring->count; i++) { |
---|
.. | .. |
---|
284 | 391 | if (!rx_buf->page) |
---|
285 | 392 | continue; |
---|
286 | 393 | |
---|
287 | | - dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE); |
---|
288 | | - __free_pages(rx_buf->page, 0); |
---|
| 394 | + /* Invalidate cache lines that may have been written to by |
---|
| 395 | + * device so that we avoid corrupting memory. |
---|
| 396 | + */ |
---|
| 397 | + dma_sync_single_range_for_cpu(dev, rx_buf->dma, |
---|
| 398 | + rx_buf->page_offset, |
---|
| 399 | + rx_ring->rx_buf_len, |
---|
| 400 | + DMA_FROM_DEVICE); |
---|
| 401 | + |
---|
| 402 | + /* free resources associated with mapping */ |
---|
| 403 | + dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring), |
---|
| 404 | + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); |
---|
| 405 | + __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); |
---|
289 | 406 | |
---|
290 | 407 | rx_buf->page = NULL; |
---|
291 | 408 | rx_buf->page_offset = 0; |
---|
292 | 409 | } |
---|
293 | 410 | |
---|
294 | | - size = sizeof(struct ice_rx_buf) * rx_ring->count; |
---|
295 | | - memset(rx_ring->rx_buf, 0, size); |
---|
| 411 | +rx_skip_free: |
---|
| 412 | + memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count); |
---|
296 | 413 | |
---|
297 | 414 | /* Zero out the descriptor ring */ |
---|
298 | 415 | memset(rx_ring->desc, 0, rx_ring->size); |
---|
.. | .. |
---|
311 | 428 | void ice_free_rx_ring(struct ice_ring *rx_ring) |
---|
312 | 429 | { |
---|
313 | 430 | ice_clean_rx_ring(rx_ring); |
---|
| 431 | + if (rx_ring->vsi->type == ICE_VSI_PF) |
---|
| 432 | + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) |
---|
| 433 | + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); |
---|
| 434 | + rx_ring->xdp_prog = NULL; |
---|
314 | 435 | devm_kfree(rx_ring->dev, rx_ring->rx_buf); |
---|
315 | 436 | rx_ring->rx_buf = NULL; |
---|
316 | 437 | |
---|
.. | .. |
---|
323 | 444 | |
---|
324 | 445 | /** |
---|
325 | 446 | * ice_setup_rx_ring - Allocate the Rx descriptors |
---|
326 | | - * @rx_ring: the rx ring to set up |
---|
| 447 | + * @rx_ring: the Rx ring to set up |
---|
327 | 448 | * |
---|
328 | 449 | * Return 0 on success, negative on error |
---|
329 | 450 | */ |
---|
330 | 451 | int ice_setup_rx_ring(struct ice_ring *rx_ring) |
---|
331 | 452 | { |
---|
332 | 453 | struct device *dev = rx_ring->dev; |
---|
333 | | - int bi_size; |
---|
334 | 454 | |
---|
335 | 455 | if (!dev) |
---|
336 | 456 | return -ENOMEM; |
---|
337 | 457 | |
---|
338 | 458 | /* warn if we are about to overwrite the pointer */ |
---|
339 | 459 | WARN_ON(rx_ring->rx_buf); |
---|
340 | | - bi_size = sizeof(struct ice_rx_buf) * rx_ring->count; |
---|
341 | | - rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL); |
---|
| 460 | + rx_ring->rx_buf = |
---|
| 461 | + devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count, |
---|
| 462 | + GFP_KERNEL); |
---|
342 | 463 | if (!rx_ring->rx_buf) |
---|
343 | 464 | return -ENOMEM; |
---|
344 | 465 | |
---|
345 | | - /* round up to nearest 4K */ |
---|
346 | | - rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc); |
---|
347 | | - rx_ring->size = ALIGN(rx_ring->size, 4096); |
---|
| 466 | + /* round up to nearest page */ |
---|
| 467 | + rx_ring->size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), |
---|
| 468 | + PAGE_SIZE); |
---|
348 | 469 | rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, |
---|
349 | 470 | GFP_KERNEL); |
---|
350 | 471 | if (!rx_ring->desc) { |
---|
.. | .. |
---|
355 | 476 | |
---|
356 | 477 | rx_ring->next_to_use = 0; |
---|
357 | 478 | rx_ring->next_to_clean = 0; |
---|
| 479 | + |
---|
| 480 | + if (ice_is_xdp_ena_vsi(rx_ring->vsi)) |
---|
| 481 | + WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); |
---|
| 482 | + |
---|
| 483 | + if (rx_ring->vsi->type == ICE_VSI_PF && |
---|
| 484 | + !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) |
---|
| 485 | + if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, |
---|
| 486 | + rx_ring->q_index)) |
---|
| 487 | + goto err; |
---|
358 | 488 | return 0; |
---|
359 | 489 | |
---|
360 | 490 | err: |
---|
.. | .. |
---|
364 | 494 | } |
---|
365 | 495 | |
---|
366 | 496 | /** |
---|
367 | | - * ice_release_rx_desc - Store the new tail and head values |
---|
368 | | - * @rx_ring: ring to bump |
---|
369 | | - * @val: new head index |
---|
| 497 | + * ice_rx_offset - Return expected offset into page to access data |
---|
| 498 | + * @rx_ring: Ring we are requesting offset of |
---|
| 499 | + * |
---|
| 500 | + * Returns the offset value for ring into the data buffer. |
---|
370 | 501 | */ |
---|
371 | | -static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) |
---|
| 502 | +static unsigned int ice_rx_offset(struct ice_ring *rx_ring) |
---|
372 | 503 | { |
---|
373 | | - rx_ring->next_to_use = val; |
---|
| 504 | + if (ice_ring_uses_build_skb(rx_ring)) |
---|
| 505 | + return ICE_SKB_PAD; |
---|
| 506 | + else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) |
---|
| 507 | + return XDP_PACKET_HEADROOM; |
---|
374 | 508 | |
---|
375 | | - /* update next to alloc since we have filled the ring */ |
---|
376 | | - rx_ring->next_to_alloc = val; |
---|
| 509 | + return 0; |
---|
| 510 | +} |
---|
377 | 511 | |
---|
378 | | - /* Force memory writes to complete before letting h/w |
---|
379 | | - * know there are new descriptors to fetch. (Only |
---|
380 | | - * applicable for weak-ordered memory model archs, |
---|
381 | | - * such as IA-64). |
---|
382 | | - */ |
---|
383 | | - wmb(); |
---|
384 | | - writel(val, rx_ring->tail); |
---|
| 512 | +static unsigned int |
---|
| 513 | +ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size) |
---|
| 514 | +{ |
---|
| 515 | + unsigned int truesize; |
---|
| 516 | + |
---|
| 517 | +#if (PAGE_SIZE < 8192) |
---|
| 518 | + truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ |
---|
| 519 | +#else |
---|
| 520 | + truesize = ice_rx_offset(rx_ring) ? |
---|
| 521 | + SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + size) + |
---|
| 522 | + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : |
---|
| 523 | + SKB_DATA_ALIGN(size); |
---|
| 524 | +#endif |
---|
| 525 | + return truesize; |
---|
| 526 | +} |
---|
| 527 | + |
---|
| 528 | +/** |
---|
| 529 | + * ice_run_xdp - Executes an XDP program on initialized xdp_buff |
---|
| 530 | + * @rx_ring: Rx ring |
---|
| 531 | + * @xdp: xdp_buff used as input to the XDP program |
---|
| 532 | + * @xdp_prog: XDP program to run |
---|
| 533 | + * |
---|
| 534 | + * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} |
---|
| 535 | + */ |
---|
| 536 | +static int |
---|
| 537 | +ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, |
---|
| 538 | + struct bpf_prog *xdp_prog) |
---|
| 539 | +{ |
---|
| 540 | + struct ice_ring *xdp_ring; |
---|
| 541 | + int err, result; |
---|
| 542 | + u32 act; |
---|
| 543 | + |
---|
| 544 | + act = bpf_prog_run_xdp(xdp_prog, xdp); |
---|
| 545 | + switch (act) { |
---|
| 546 | + case XDP_PASS: |
---|
| 547 | + return ICE_XDP_PASS; |
---|
| 548 | + case XDP_TX: |
---|
| 549 | + xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()]; |
---|
| 550 | + result = ice_xmit_xdp_buff(xdp, xdp_ring); |
---|
| 551 | + if (result == ICE_XDP_CONSUMED) |
---|
| 552 | + goto out_failure; |
---|
| 553 | + return result; |
---|
| 554 | + case XDP_REDIRECT: |
---|
| 555 | + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); |
---|
| 556 | + if (err) |
---|
| 557 | + goto out_failure; |
---|
| 558 | + return ICE_XDP_REDIR; |
---|
| 559 | + default: |
---|
| 560 | + bpf_warn_invalid_xdp_action(act); |
---|
| 561 | + fallthrough; |
---|
| 562 | + case XDP_ABORTED: |
---|
| 563 | +out_failure: |
---|
| 564 | + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); |
---|
| 565 | + fallthrough; |
---|
| 566 | + case XDP_DROP: |
---|
| 567 | + return ICE_XDP_CONSUMED; |
---|
| 568 | + } |
---|
| 569 | +} |
---|
| 570 | + |
---|
| 571 | +/** |
---|
| 572 | + * ice_xdp_xmit - submit packets to XDP ring for transmission |
---|
| 573 | + * @dev: netdev |
---|
| 574 | + * @n: number of XDP frames to be transmitted |
---|
| 575 | + * @frames: XDP frames to be transmitted |
---|
| 576 | + * @flags: transmit flags |
---|
| 577 | + * |
---|
| 578 | + * Returns number of frames successfully sent. Frames that fail are |
---|
| 579 | + * free'ed via XDP return API. |
---|
| 580 | + * For error cases, a negative errno code is returned and no-frames |
---|
| 581 | + * are transmitted (caller must handle freeing frames). |
---|
| 582 | + */ |
---|
| 583 | +int |
---|
| 584 | +ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, |
---|
| 585 | + u32 flags) |
---|
| 586 | +{ |
---|
| 587 | + struct ice_netdev_priv *np = netdev_priv(dev); |
---|
| 588 | + unsigned int queue_index = smp_processor_id(); |
---|
| 589 | + struct ice_vsi *vsi = np->vsi; |
---|
| 590 | + struct ice_ring *xdp_ring; |
---|
| 591 | + int drops = 0, i; |
---|
| 592 | + |
---|
| 593 | + if (test_bit(__ICE_DOWN, vsi->state)) |
---|
| 594 | + return -ENETDOWN; |
---|
| 595 | + |
---|
| 596 | + if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq) |
---|
| 597 | + return -ENXIO; |
---|
| 598 | + |
---|
| 599 | + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) |
---|
| 600 | + return -EINVAL; |
---|
| 601 | + |
---|
| 602 | + xdp_ring = vsi->xdp_rings[queue_index]; |
---|
| 603 | + for (i = 0; i < n; i++) { |
---|
| 604 | + struct xdp_frame *xdpf = frames[i]; |
---|
| 605 | + int err; |
---|
| 606 | + |
---|
| 607 | + err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); |
---|
| 608 | + if (err != ICE_XDP_TX) { |
---|
| 609 | + xdp_return_frame_rx_napi(xdpf); |
---|
| 610 | + drops++; |
---|
| 611 | + } |
---|
| 612 | + } |
---|
| 613 | + |
---|
| 614 | + if (unlikely(flags & XDP_XMIT_FLUSH)) |
---|
| 615 | + ice_xdp_ring_update_tail(xdp_ring); |
---|
| 616 | + |
---|
| 617 | + return n - drops; |
---|
385 | 618 | } |
---|
386 | 619 | |
---|
387 | 620 | /** |
---|
.. | .. |
---|
392 | 625 | * Returns true if the page was successfully allocated or |
---|
393 | 626 | * reused. |
---|
394 | 627 | */ |
---|
395 | | -static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, |
---|
396 | | - struct ice_rx_buf *bi) |
---|
| 628 | +static bool |
---|
| 629 | +ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) |
---|
397 | 630 | { |
---|
398 | 631 | struct page *page = bi->page; |
---|
399 | 632 | dma_addr_t dma; |
---|
400 | 633 | |
---|
401 | 634 | /* since we are recycling buffers we should seldom need to alloc */ |
---|
402 | | - if (likely(page)) { |
---|
403 | | - rx_ring->rx_stats.page_reuse_count++; |
---|
| 635 | + if (likely(page)) |
---|
404 | 636 | return true; |
---|
405 | | - } |
---|
406 | 637 | |
---|
407 | 638 | /* alloc new page for storage */ |
---|
408 | | - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); |
---|
| 639 | + page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); |
---|
409 | 640 | if (unlikely(!page)) { |
---|
410 | 641 | rx_ring->rx_stats.alloc_page_failed++; |
---|
411 | 642 | return false; |
---|
412 | 643 | } |
---|
413 | 644 | |
---|
414 | 645 | /* map page for use */ |
---|
415 | | - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); |
---|
| 646 | + dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring), |
---|
| 647 | + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); |
---|
416 | 648 | |
---|
417 | 649 | /* if mapping failed free memory back to system since |
---|
418 | 650 | * there isn't much point in holding memory we can't use |
---|
419 | 651 | */ |
---|
420 | 652 | if (dma_mapping_error(rx_ring->dev, dma)) { |
---|
421 | | - __free_pages(page, 0); |
---|
| 653 | + __free_pages(page, ice_rx_pg_order(rx_ring)); |
---|
422 | 654 | rx_ring->rx_stats.alloc_page_failed++; |
---|
423 | 655 | return false; |
---|
424 | 656 | } |
---|
425 | 657 | |
---|
426 | 658 | bi->dma = dma; |
---|
427 | 659 | bi->page = page; |
---|
428 | | - bi->page_offset = 0; |
---|
| 660 | + bi->page_offset = ice_rx_offset(rx_ring); |
---|
| 661 | + page_ref_add(page, USHRT_MAX - 1); |
---|
| 662 | + bi->pagecnt_bias = USHRT_MAX; |
---|
429 | 663 | |
---|
430 | 664 | return true; |
---|
431 | 665 | } |
---|
.. | .. |
---|
435 | 669 | * @rx_ring: ring to place buffers on |
---|
436 | 670 | * @cleaned_count: number of buffers to replace |
---|
437 | 671 | * |
---|
438 | | - * Returns false if all allocations were successful, true if any fail |
---|
| 672 | + * Returns false if all allocations were successful, true if any fail. Returning |
---|
| 673 | + * true signals to the caller that we didn't replace cleaned_count buffers and |
---|
| 674 | + * there is more work to do. |
---|
| 675 | + * |
---|
| 676 | + * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx |
---|
| 677 | + * buffers. Then bump tail at most one time. Grouping like this lets us avoid |
---|
| 678 | + * multiple tail writes per call. |
---|
439 | 679 | */ |
---|
440 | 680 | bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) |
---|
441 | 681 | { |
---|
.. | .. |
---|
444 | 684 | struct ice_rx_buf *bi; |
---|
445 | 685 | |
---|
446 | 686 | /* do nothing if no valid netdev defined */ |
---|
447 | | - if (!rx_ring->netdev || !cleaned_count) |
---|
| 687 | + if ((!rx_ring->netdev && rx_ring->vsi->type != ICE_VSI_CTRL) || |
---|
| 688 | + !cleaned_count) |
---|
448 | 689 | return false; |
---|
449 | 690 | |
---|
450 | | - /* get the RX descriptor and buffer based on next_to_use */ |
---|
| 691 | + /* get the Rx descriptor and buffer based on next_to_use */ |
---|
451 | 692 | rx_desc = ICE_RX_DESC(rx_ring, ntu); |
---|
452 | 693 | bi = &rx_ring->rx_buf[ntu]; |
---|
453 | 694 | |
---|
454 | 695 | do { |
---|
| 696 | + /* if we fail here, we have work remaining */ |
---|
455 | 697 | if (!ice_alloc_mapped_page(rx_ring, bi)) |
---|
456 | | - goto no_bufs; |
---|
| 698 | + break; |
---|
| 699 | + |
---|
| 700 | + /* sync the buffer for use by the device */ |
---|
| 701 | + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, |
---|
| 702 | + bi->page_offset, |
---|
| 703 | + rx_ring->rx_buf_len, |
---|
| 704 | + DMA_FROM_DEVICE); |
---|
457 | 705 | |
---|
458 | 706 | /* Refresh the desc even if buffer_addrs didn't change |
---|
459 | 707 | * because each write-back erases this info. |
---|
.. | .. |
---|
478 | 726 | if (rx_ring->next_to_use != ntu) |
---|
479 | 727 | ice_release_rx_desc(rx_ring, ntu); |
---|
480 | 728 | |
---|
481 | | - return false; |
---|
482 | | - |
---|
483 | | -no_bufs: |
---|
484 | | - if (rx_ring->next_to_use != ntu) |
---|
485 | | - ice_release_rx_desc(rx_ring, ntu); |
---|
486 | | - |
---|
487 | | - /* make sure to come back via polling to try again after |
---|
488 | | - * allocation failure |
---|
489 | | - */ |
---|
490 | | - return true; |
---|
| 729 | + return !!cleaned_count; |
---|
491 | 730 | } |
---|
492 | 731 | |
---|
493 | 732 | /** |
---|
.. | .. |
---|
500 | 739 | } |
---|
501 | 740 | |
---|
502 | 741 | /** |
---|
503 | | - * ice_add_rx_frag - Add contents of Rx buffer to sk_buff |
---|
504 | | - * @rx_buf: buffer containing page to add |
---|
505 | | - * @rx_desc: descriptor containing length of buffer written by hardware |
---|
506 | | - * @skb: sk_buf to place the data into |
---|
| 742 | + * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse |
---|
| 743 | + * @rx_buf: Rx buffer to adjust |
---|
| 744 | + * @size: Size of adjustment |
---|
507 | 745 | * |
---|
508 | | - * This function will add the data contained in rx_buf->page to the skb. |
---|
509 | | - * This is done either through a direct copy if the data in the buffer is |
---|
510 | | - * less than the skb header size, otherwise it will just attach the page as |
---|
511 | | - * a frag to the skb. |
---|
512 | | - * |
---|
513 | | - * The function will then update the page offset if necessary and return |
---|
514 | | - * true if the buffer can be reused by the adapter. |
---|
| 746 | + * Update the offset within page so that Rx buf will be ready to be reused. |
---|
| 747 | + * For systems with PAGE_SIZE < 8192 this function will flip the page offset |
---|
| 748 | + * so the second half of page assigned to Rx buffer will be used, otherwise |
---|
| 749 | + * the offset is moved by "size" bytes |
---|
515 | 750 | */ |
---|
516 | | -static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf, |
---|
517 | | - union ice_32b_rx_flex_desc *rx_desc, |
---|
518 | | - struct sk_buff *skb) |
---|
| 751 | +static void |
---|
| 752 | +ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) |
---|
519 | 753 | { |
---|
520 | 754 | #if (PAGE_SIZE < 8192) |
---|
521 | | - unsigned int truesize = ICE_RXBUF_2048; |
---|
| 755 | + /* flip page offset to other buffer */ |
---|
| 756 | + rx_buf->page_offset ^= size; |
---|
522 | 757 | #else |
---|
523 | | - unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; |
---|
524 | | - unsigned int truesize; |
---|
525 | | -#endif /* PAGE_SIZE < 8192) */ |
---|
| 758 | + /* move offset up to the next cache line */ |
---|
| 759 | + rx_buf->page_offset += size; |
---|
| 760 | +#endif |
---|
| 761 | +} |
---|
526 | 762 | |
---|
527 | | - struct page *page; |
---|
528 | | - unsigned int size; |
---|
529 | | - |
---|
530 | | - size = le16_to_cpu(rx_desc->wb.pkt_len) & |
---|
531 | | - ICE_RX_FLX_DESC_PKT_LEN_M; |
---|
532 | | - |
---|
533 | | - page = rx_buf->page; |
---|
534 | | - |
---|
535 | | -#if (PAGE_SIZE >= 8192) |
---|
536 | | - truesize = ALIGN(size, L1_CACHE_BYTES); |
---|
537 | | -#endif /* PAGE_SIZE >= 8192) */ |
---|
538 | | - |
---|
539 | | - /* will the data fit in the skb we allocated? if so, just |
---|
540 | | - * copy it as it is pretty small anyway |
---|
541 | | - */ |
---|
542 | | - if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) { |
---|
543 | | - unsigned char *va = page_address(page) + rx_buf->page_offset; |
---|
544 | | - |
---|
545 | | - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); |
---|
546 | | - |
---|
547 | | - /* page is not reserved, we can reuse buffer as-is */ |
---|
548 | | - if (likely(!ice_page_is_reserved(page))) |
---|
549 | | - return true; |
---|
550 | | - |
---|
551 | | - /* this page cannot be reused so discard it */ |
---|
552 | | - __free_pages(page, 0); |
---|
553 | | - return false; |
---|
554 | | - } |
---|
555 | | - |
---|
556 | | - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, |
---|
557 | | - rx_buf->page_offset, size, truesize); |
---|
| 763 | +/** |
---|
| 764 | + * ice_can_reuse_rx_page - Determine if page can be reused for another Rx |
---|
| 765 | + * @rx_buf: buffer containing the page |
---|
| 766 | + * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call |
---|
| 767 | + * |
---|
| 768 | + * If page is reusable, we have a green light for calling ice_reuse_rx_page, |
---|
| 769 | + * which will assign the current buffer to the buffer that next_to_alloc is |
---|
| 770 | + * pointing to; otherwise, the DMA mapping needs to be destroyed and |
---|
| 771 | + * page freed |
---|
| 772 | + */ |
---|
| 773 | +static bool |
---|
| 774 | +ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) |
---|
| 775 | +{ |
---|
| 776 | + unsigned int pagecnt_bias = rx_buf->pagecnt_bias; |
---|
| 777 | + struct page *page = rx_buf->page; |
---|
558 | 778 | |
---|
559 | 779 | /* avoid re-using remote pages */ |
---|
560 | 780 | if (unlikely(ice_page_is_reserved(page))) |
---|
.. | .. |
---|
562 | 782 | |
---|
563 | 783 | #if (PAGE_SIZE < 8192) |
---|
564 | 784 | /* if we are only owner of page we can reuse it */ |
---|
565 | | - if (unlikely(page_count(page) != 1)) |
---|
| 785 | + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) |
---|
566 | 786 | return false; |
---|
567 | | - |
---|
568 | | - /* flip page offset to other buffer */ |
---|
569 | | - rx_buf->page_offset ^= truesize; |
---|
570 | 787 | #else |
---|
571 | | - /* move offset up to the next cache line */ |
---|
572 | | - rx_buf->page_offset += truesize; |
---|
573 | | - |
---|
574 | | - if (rx_buf->page_offset > last_offset) |
---|
| 788 | +#define ICE_LAST_OFFSET \ |
---|
| 789 | + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) |
---|
| 790 | + if (rx_buf->page_offset > ICE_LAST_OFFSET) |
---|
575 | 791 | return false; |
---|
576 | 792 | #endif /* PAGE_SIZE < 8192) */ |
---|
577 | 793 | |
---|
578 | | - /* Even if we own the page, we are not allowed to use atomic_set() |
---|
579 | | - * This would break get_page_unless_zero() users. |
---|
| 794 | + /* If we have drained the page fragment pool we need to update |
---|
| 795 | + * the pagecnt_bias and page count so that we fully restock the |
---|
| 796 | + * number of references the driver holds. |
---|
580 | 797 | */ |
---|
581 | | - get_page(rx_buf->page); |
---|
| 798 | + if (unlikely(pagecnt_bias == 1)) { |
---|
| 799 | + page_ref_add(page, USHRT_MAX - 1); |
---|
| 800 | + rx_buf->pagecnt_bias = USHRT_MAX; |
---|
| 801 | + } |
---|
582 | 802 | |
---|
583 | 803 | return true; |
---|
584 | 804 | } |
---|
585 | 805 | |
---|
586 | 806 | /** |
---|
| 807 | + * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag |
---|
| 808 | + * @rx_ring: Rx descriptor ring to transact packets on |
---|
| 809 | + * @rx_buf: buffer containing page to add |
---|
| 810 | + * @skb: sk_buff to place the data into |
---|
| 811 | + * @size: packet length from rx_desc |
---|
| 812 | + * |
---|
| 813 | + * This function will add the data contained in rx_buf->page to the skb. |
---|
| 814 | + * It will just attach the page as a frag to the skb. |
---|
| 815 | + * The function will then update the page offset. |
---|
| 816 | + */ |
---|
| 817 | +static void |
---|
| 818 | +ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, |
---|
| 819 | + struct sk_buff *skb, unsigned int size) |
---|
| 820 | +{ |
---|
| 821 | +#if (PAGE_SIZE >= 8192) |
---|
| 822 | + unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring)); |
---|
| 823 | +#else |
---|
| 824 | + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; |
---|
| 825 | +#endif |
---|
| 826 | + |
---|
| 827 | + if (!size) |
---|
| 828 | + return; |
---|
| 829 | + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, |
---|
| 830 | + rx_buf->page_offset, size, truesize); |
---|
| 831 | + |
---|
| 832 | + /* page is being used so we must update the page offset */ |
---|
| 833 | + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); |
---|
| 834 | +} |
---|
| 835 | + |
---|
| 836 | +/** |
---|
587 | 837 | * ice_reuse_rx_page - page flip buffer and store it back on the ring |
---|
588 | | - * @rx_ring: rx descriptor ring to store buffers on |
---|
| 838 | + * @rx_ring: Rx descriptor ring to store buffers on |
---|
589 | 839 | * @old_buf: donor buffer to have page reused |
---|
590 | 840 | * |
---|
591 | 841 | * Synchronizes page for reuse by the adapter |
---|
592 | 842 | */ |
---|
593 | | -static void ice_reuse_rx_page(struct ice_ring *rx_ring, |
---|
594 | | - struct ice_rx_buf *old_buf) |
---|
| 843 | +static void |
---|
| 844 | +ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) |
---|
595 | 845 | { |
---|
596 | 846 | u16 nta = rx_ring->next_to_alloc; |
---|
597 | 847 | struct ice_rx_buf *new_buf; |
---|
.. | .. |
---|
602 | 852 | nta++; |
---|
603 | 853 | rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; |
---|
604 | 854 | |
---|
605 | | - /* transfer page from old buffer to new buffer */ |
---|
606 | | - *new_buf = *old_buf; |
---|
| 855 | + /* Transfer page from old buffer to new buffer. |
---|
| 856 | + * Move each member individually to avoid possible store |
---|
| 857 | + * forwarding stalls and unnecessary copy of skb. |
---|
| 858 | + */ |
---|
| 859 | + new_buf->dma = old_buf->dma; |
---|
| 860 | + new_buf->page = old_buf->page; |
---|
| 861 | + new_buf->page_offset = old_buf->page_offset; |
---|
| 862 | + new_buf->pagecnt_bias = old_buf->pagecnt_bias; |
---|
607 | 863 | } |
---|
608 | 864 | |
---|
609 | 865 | /** |
---|
610 | | - * ice_fetch_rx_buf - Allocate skb and populate it |
---|
611 | | - * @rx_ring: rx descriptor ring to transact packets on |
---|
612 | | - * @rx_desc: descriptor containing info written by hardware |
---|
| 866 | + * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use |
---|
| 867 | + * @rx_ring: Rx descriptor ring to transact packets on |
---|
| 868 | + * @skb: skb to be used |
---|
| 869 | + * @size: size of buffer to add to skb |
---|
| 870 | + * @rx_buf_pgcnt: rx_buf page refcount |
---|
613 | 871 | * |
---|
614 | | - * This function allocates an skb on the fly, and populates it with the page |
---|
615 | | - * data from the current receive descriptor, taking care to set up the skb |
---|
616 | | - * correctly, as well as handling calling the page recycle function if |
---|
617 | | - * necessary. |
---|
| 872 | + * This function will pull an Rx buffer from the ring and synchronize it |
---|
| 873 | + * for use by the CPU. |
---|
618 | 874 | */ |
---|
619 | | -static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring, |
---|
620 | | - union ice_32b_rx_flex_desc *rx_desc) |
---|
| 875 | +static struct ice_rx_buf * |
---|
| 876 | +ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, |
---|
| 877 | + const unsigned int size, int *rx_buf_pgcnt) |
---|
621 | 878 | { |
---|
622 | 879 | struct ice_rx_buf *rx_buf; |
---|
623 | | - struct sk_buff *skb; |
---|
624 | | - struct page *page; |
---|
625 | 880 | |
---|
626 | 881 | rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; |
---|
627 | | - page = rx_buf->page; |
---|
628 | | - prefetchw(page); |
---|
| 882 | + *rx_buf_pgcnt = |
---|
| 883 | +#if (PAGE_SIZE < 8192) |
---|
| 884 | + page_count(rx_buf->page); |
---|
| 885 | +#else |
---|
| 886 | + 0; |
---|
| 887 | +#endif |
---|
| 888 | + prefetchw(rx_buf->page); |
---|
| 889 | + *skb = rx_buf->skb; |
---|
629 | 890 | |
---|
630 | | - skb = rx_buf->skb; |
---|
| 891 | + if (!size) |
---|
| 892 | + return rx_buf; |
---|
| 893 | + /* we are reusing so sync this buffer for CPU use */ |
---|
| 894 | + dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, |
---|
| 895 | + rx_buf->page_offset, size, |
---|
| 896 | + DMA_FROM_DEVICE); |
---|
631 | 897 | |
---|
632 | | - if (likely(!skb)) { |
---|
633 | | - u8 *page_addr = page_address(page) + rx_buf->page_offset; |
---|
| 898 | + /* We have pulled a buffer for use, so decrement pagecnt_bias */ |
---|
| 899 | + rx_buf->pagecnt_bias--; |
---|
634 | 900 | |
---|
635 | | - /* prefetch first cache line of first page */ |
---|
636 | | - prefetch(page_addr); |
---|
637 | | -#if L1_CACHE_BYTES < 128 |
---|
638 | | - prefetch((void *)(page_addr + L1_CACHE_BYTES)); |
---|
639 | | -#endif /* L1_CACHE_BYTES */ |
---|
| 901 | + return rx_buf; |
---|
| 902 | +} |
---|
640 | 903 | |
---|
641 | | - /* allocate a skb to store the frags */ |
---|
642 | | - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, |
---|
643 | | - ICE_RX_HDR_SIZE, |
---|
644 | | - GFP_ATOMIC | __GFP_NOWARN); |
---|
645 | | - if (unlikely(!skb)) { |
---|
646 | | - rx_ring->rx_stats.alloc_buf_failed++; |
---|
647 | | - return NULL; |
---|
648 | | - } |
---|
| 904 | +/** |
---|
| 905 | + * ice_build_skb - Build skb around an existing buffer |
---|
| 906 | + * @rx_ring: Rx descriptor ring to transact packets on |
---|
| 907 | + * @rx_buf: Rx buffer to pull data from |
---|
| 908 | + * @xdp: xdp_buff pointing to the data |
---|
| 909 | + * |
---|
| 910 | + * This function builds an skb around an existing Rx buffer, taking care |
---|
| 911 | + * to set up the skb correctly and avoid any memcpy overhead. |
---|
| 912 | + */ |
---|
| 913 | +static struct sk_buff * |
---|
| 914 | +ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, |
---|
| 915 | + struct xdp_buff *xdp) |
---|
| 916 | +{ |
---|
| 917 | + u8 metasize = xdp->data - xdp->data_meta; |
---|
| 918 | +#if (PAGE_SIZE < 8192) |
---|
| 919 | + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; |
---|
| 920 | +#else |
---|
| 921 | + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + |
---|
| 922 | + SKB_DATA_ALIGN(xdp->data_end - |
---|
| 923 | + xdp->data_hard_start); |
---|
| 924 | +#endif |
---|
| 925 | + struct sk_buff *skb; |
---|
649 | 926 | |
---|
650 | | - /* we will be copying header into skb->data in |
---|
651 | | - * pskb_may_pull so it is in our interest to prefetch |
---|
652 | | - * it now to avoid a possible cache miss |
---|
653 | | - */ |
---|
654 | | - prefetchw(skb->data); |
---|
| 927 | + /* Prefetch first cache line of first page. If xdp->data_meta |
---|
| 928 | + * is unused, this points exactly as xdp->data, otherwise we |
---|
| 929 | + * likely have a consumer accessing first few bytes of meta |
---|
| 930 | + * data, and then actual data. |
---|
| 931 | + */ |
---|
| 932 | + net_prefetch(xdp->data_meta); |
---|
| 933 | + /* build an skb around the page buffer */ |
---|
| 934 | + skb = build_skb(xdp->data_hard_start, truesize); |
---|
| 935 | + if (unlikely(!skb)) |
---|
| 936 | + return NULL; |
---|
655 | 937 | |
---|
656 | | - skb_record_rx_queue(skb, rx_ring->q_index); |
---|
657 | | - } else { |
---|
658 | | - /* we are reusing so sync this buffer for CPU use */ |
---|
659 | | - dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, |
---|
660 | | - rx_buf->page_offset, |
---|
661 | | - ICE_RXBUF_2048, |
---|
662 | | - DMA_FROM_DEVICE); |
---|
| 938 | + /* must to record Rx queue, otherwise OS features such as |
---|
| 939 | + * symmetric queue won't work |
---|
| 940 | + */ |
---|
| 941 | + skb_record_rx_queue(skb, rx_ring->q_index); |
---|
663 | 942 | |
---|
664 | | - rx_buf->skb = NULL; |
---|
665 | | - } |
---|
| 943 | + /* update pointers within the skb to store the data */ |
---|
| 944 | + skb_reserve(skb, xdp->data - xdp->data_hard_start); |
---|
| 945 | + __skb_put(skb, xdp->data_end - xdp->data); |
---|
| 946 | + if (metasize) |
---|
| 947 | + skb_metadata_set(skb, metasize); |
---|
666 | 948 | |
---|
667 | | - /* pull page into skb */ |
---|
668 | | - if (ice_add_rx_frag(rx_buf, rx_desc, skb)) { |
---|
669 | | - /* hand second half of page back to the ring */ |
---|
670 | | - ice_reuse_rx_page(rx_ring, rx_buf); |
---|
671 | | - rx_ring->rx_stats.page_reuse_count++; |
---|
672 | | - } else { |
---|
673 | | - /* we are not reusing the buffer so unmap it */ |
---|
674 | | - dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE, |
---|
675 | | - DMA_FROM_DEVICE); |
---|
676 | | - } |
---|
677 | | - |
---|
678 | | - /* clear contents of buffer_info */ |
---|
679 | | - rx_buf->page = NULL; |
---|
| 949 | + /* buffer is used by skb, update page_offset */ |
---|
| 950 | + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); |
---|
680 | 951 | |
---|
681 | 952 | return skb; |
---|
682 | 953 | } |
---|
683 | 954 | |
---|
684 | 955 | /** |
---|
685 | | - * ice_pull_tail - ice specific version of skb_pull_tail |
---|
686 | | - * @skb: pointer to current skb being adjusted |
---|
| 956 | + * ice_construct_skb - Allocate skb and populate it |
---|
| 957 | + * @rx_ring: Rx descriptor ring to transact packets on |
---|
| 958 | + * @rx_buf: Rx buffer to pull data from |
---|
| 959 | + * @xdp: xdp_buff pointing to the data |
---|
687 | 960 | * |
---|
688 | | - * This function is an ice specific version of __pskb_pull_tail. The |
---|
689 | | - * main difference between this version and the original function is that |
---|
690 | | - * this function can make several assumptions about the state of things |
---|
691 | | - * that allow for significant optimizations versus the standard function. |
---|
692 | | - * As a result we can do things like drop a frag and maintain an accurate |
---|
693 | | - * truesize for the skb. |
---|
| 961 | + * This function allocates an skb. It then populates it with the page |
---|
| 962 | + * data from the current receive descriptor, taking care to set up the |
---|
| 963 | + * skb correctly. |
---|
694 | 964 | */ |
---|
695 | | -static void ice_pull_tail(struct sk_buff *skb) |
---|
| 965 | +static struct sk_buff * |
---|
| 966 | +ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, |
---|
| 967 | + struct xdp_buff *xdp) |
---|
696 | 968 | { |
---|
697 | | - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; |
---|
698 | | - unsigned int pull_len; |
---|
699 | | - unsigned char *va; |
---|
| 969 | + unsigned int size = xdp->data_end - xdp->data; |
---|
| 970 | + unsigned int headlen; |
---|
| 971 | + struct sk_buff *skb; |
---|
700 | 972 | |
---|
701 | | - /* it is valid to use page_address instead of kmap since we are |
---|
702 | | - * working with pages allocated out of the lomem pool per |
---|
703 | | - * alloc_page(GFP_ATOMIC) |
---|
704 | | - */ |
---|
705 | | - va = skb_frag_address(frag); |
---|
| 973 | + /* prefetch first cache line of first page */ |
---|
| 974 | + net_prefetch(xdp->data); |
---|
706 | 975 | |
---|
707 | | - /* we need the header to contain the greater of either ETH_HLEN or |
---|
708 | | - * 60 bytes if the skb->len is less than 60 for skb_pad. |
---|
709 | | - */ |
---|
710 | | - pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); |
---|
| 976 | + /* allocate a skb to store the frags */ |
---|
| 977 | + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, |
---|
| 978 | + GFP_ATOMIC | __GFP_NOWARN); |
---|
| 979 | + if (unlikely(!skb)) |
---|
| 980 | + return NULL; |
---|
| 981 | + |
---|
| 982 | + skb_record_rx_queue(skb, rx_ring->q_index); |
---|
| 983 | + /* Determine available headroom for copy */ |
---|
| 984 | + headlen = size; |
---|
| 985 | + if (headlen > ICE_RX_HDR_SIZE) |
---|
| 986 | + headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); |
---|
711 | 987 | |
---|
712 | 988 | /* align pull length to size of long to optimize memcpy performance */ |
---|
713 | | - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); |
---|
| 989 | + memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, |
---|
| 990 | + sizeof(long))); |
---|
714 | 991 | |
---|
715 | | - /* update all of the pointers */ |
---|
716 | | - skb_frag_size_sub(frag, pull_len); |
---|
717 | | - frag->page_offset += pull_len; |
---|
718 | | - skb->data_len -= pull_len; |
---|
719 | | - skb->tail += pull_len; |
---|
| 992 | + /* if we exhaust the linear part then add what is left as a frag */ |
---|
| 993 | + size -= headlen; |
---|
| 994 | + if (size) { |
---|
| 995 | +#if (PAGE_SIZE >= 8192) |
---|
| 996 | + unsigned int truesize = SKB_DATA_ALIGN(size); |
---|
| 997 | +#else |
---|
| 998 | + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; |
---|
| 999 | +#endif |
---|
| 1000 | + skb_add_rx_frag(skb, 0, rx_buf->page, |
---|
| 1001 | + rx_buf->page_offset + headlen, size, truesize); |
---|
| 1002 | + /* buffer is used by skb, update page_offset */ |
---|
| 1003 | + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); |
---|
| 1004 | + } else { |
---|
| 1005 | + /* buffer is unused, reset bias back to rx_buf; data was copied |
---|
| 1006 | + * onto skb's linear part so there's no need for adjusting |
---|
| 1007 | + * page offset and we can reuse this buffer as-is |
---|
| 1008 | + */ |
---|
| 1009 | + rx_buf->pagecnt_bias++; |
---|
| 1010 | + } |
---|
| 1011 | + |
---|
| 1012 | + return skb; |
---|
720 | 1013 | } |
---|
721 | 1014 | |
---|
722 | 1015 | /** |
---|
723 | | - * ice_cleanup_headers - Correct empty headers |
---|
724 | | - * @skb: pointer to current skb being fixed |
---|
| 1016 | + * ice_put_rx_buf - Clean up used buffer and either recycle or free |
---|
| 1017 | + * @rx_ring: Rx descriptor ring to transact packets on |
---|
| 1018 | + * @rx_buf: Rx buffer to pull data from |
---|
| 1019 | + * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect() |
---|
725 | 1020 | * |
---|
726 | | - * Also address the case where we are pulling data in on pages only |
---|
727 | | - * and as such no data is present in the skb header. |
---|
728 | | - * |
---|
729 | | - * In addition if skb is not at least 60 bytes we need to pad it so that |
---|
730 | | - * it is large enough to qualify as a valid Ethernet frame. |
---|
731 | | - * |
---|
732 | | - * Returns true if an error was encountered and skb was freed. |
---|
| 1021 | + * This function will update next_to_clean and then clean up the contents |
---|
| 1022 | + * of the rx_buf. It will either recycle the buffer or unmap it and free |
---|
| 1023 | + * the associated resources. |
---|
733 | 1024 | */ |
---|
734 | | -static bool ice_cleanup_headers(struct sk_buff *skb) |
---|
| 1025 | +static void |
---|
| 1026 | +ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, |
---|
| 1027 | + int rx_buf_pgcnt) |
---|
735 | 1028 | { |
---|
736 | | - /* place header in linear portion of buffer */ |
---|
737 | | - if (skb_is_nonlinear(skb)) |
---|
738 | | - ice_pull_tail(skb); |
---|
| 1029 | + u16 ntc = rx_ring->next_to_clean + 1; |
---|
739 | 1030 | |
---|
740 | | - /* if eth_skb_pad returns an error the skb was freed */ |
---|
741 | | - if (eth_skb_pad(skb)) |
---|
742 | | - return true; |
---|
| 1031 | + /* fetch, update, and store next to clean */ |
---|
| 1032 | + ntc = (ntc < rx_ring->count) ? ntc : 0; |
---|
| 1033 | + rx_ring->next_to_clean = ntc; |
---|
743 | 1034 | |
---|
744 | | - return false; |
---|
745 | | -} |
---|
| 1035 | + if (!rx_buf) |
---|
| 1036 | + return; |
---|
746 | 1037 | |
---|
747 | | -/** |
---|
748 | | - * ice_test_staterr - tests bits in Rx descriptor status and error fields |
---|
749 | | - * @rx_desc: pointer to receive descriptor (in le64 format) |
---|
750 | | - * @stat_err_bits: value to mask |
---|
751 | | - * |
---|
752 | | - * This function does some fast chicanery in order to return the |
---|
753 | | - * value of the mask which is really only used for boolean tests. |
---|
754 | | - * The status_error_len doesn't need to be shifted because it begins |
---|
755 | | - * at offset zero. |
---|
756 | | - */ |
---|
757 | | -static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, |
---|
758 | | - const u16 stat_err_bits) |
---|
759 | | -{ |
---|
760 | | - return !!(rx_desc->wb.status_error0 & |
---|
761 | | - cpu_to_le16(stat_err_bits)); |
---|
| 1038 | + if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) { |
---|
| 1039 | + /* hand second half of page back to the ring */ |
---|
| 1040 | + ice_reuse_rx_page(rx_ring, rx_buf); |
---|
| 1041 | + } else { |
---|
| 1042 | + /* we are not reusing the buffer so unmap it */ |
---|
| 1043 | + dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, |
---|
| 1044 | + ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, |
---|
| 1045 | + ICE_RX_DMA_ATTR); |
---|
| 1046 | + __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); |
---|
| 1047 | + } |
---|
| 1048 | + |
---|
| 1049 | + /* clear contents of buffer_info */ |
---|
| 1050 | + rx_buf->page = NULL; |
---|
| 1051 | + rx_buf->skb = NULL; |
---|
762 | 1052 | } |
---|
763 | 1053 | |
---|
764 | 1054 | /** |
---|
.. | .. |
---|
767 | 1057 | * @rx_desc: Rx descriptor for current buffer |
---|
768 | 1058 | * @skb: Current socket buffer containing buffer in progress |
---|
769 | 1059 | * |
---|
770 | | - * This function updates next to clean. If the buffer is an EOP buffer |
---|
771 | | - * this function exits returning false, otherwise it will place the |
---|
772 | | - * sk_buff in the next buffer to be chained and return true indicating |
---|
773 | | - * that this is in fact a non-EOP buffer. |
---|
| 1060 | + * If the buffer is an EOP buffer, this function exits returning false, |
---|
| 1061 | + * otherwise return true indicating that this is in fact a non-EOP buffer. |
---|
774 | 1062 | */ |
---|
775 | | -static bool ice_is_non_eop(struct ice_ring *rx_ring, |
---|
776 | | - union ice_32b_rx_flex_desc *rx_desc, |
---|
777 | | - struct sk_buff *skb) |
---|
| 1063 | +static bool |
---|
| 1064 | +ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, |
---|
| 1065 | + struct sk_buff *skb) |
---|
778 | 1066 | { |
---|
779 | | - u32 ntc = rx_ring->next_to_clean + 1; |
---|
780 | | - |
---|
781 | | - /* fetch, update, and store next to clean */ |
---|
782 | | - ntc = (ntc < rx_ring->count) ? ntc : 0; |
---|
783 | | - rx_ring->next_to_clean = ntc; |
---|
784 | | - |
---|
785 | | - prefetch(ICE_RX_DESC(rx_ring, ntc)); |
---|
786 | | - |
---|
787 | 1067 | /* if we are the last buffer then there is nothing else to do */ |
---|
788 | 1068 | #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) |
---|
789 | 1069 | if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) |
---|
790 | 1070 | return false; |
---|
791 | 1071 | |
---|
792 | 1072 | /* place skb in next buffer to be received */ |
---|
793 | | - rx_ring->rx_buf[ntc].skb = skb; |
---|
| 1073 | + rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb; |
---|
794 | 1074 | rx_ring->rx_stats.non_eop_descs++; |
---|
795 | 1075 | |
---|
796 | 1076 | return true; |
---|
797 | 1077 | } |
---|
798 | 1078 | |
---|
799 | 1079 | /** |
---|
800 | | - * ice_ptype_to_htype - get a hash type |
---|
801 | | - * @ptype: the ptype value from the descriptor |
---|
802 | | - * |
---|
803 | | - * Returns a hash type to be used by skb_set_hash |
---|
804 | | - */ |
---|
805 | | -static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) |
---|
806 | | -{ |
---|
807 | | - return PKT_HASH_TYPE_NONE; |
---|
808 | | -} |
---|
809 | | - |
---|
810 | | -/** |
---|
811 | | - * ice_rx_hash - set the hash value in the skb |
---|
812 | | - * @rx_ring: descriptor ring |
---|
813 | | - * @rx_desc: specific descriptor |
---|
814 | | - * @skb: pointer to current skb |
---|
815 | | - * @rx_ptype: the ptype value from the descriptor |
---|
816 | | - */ |
---|
817 | | -static void |
---|
818 | | -ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, |
---|
819 | | - struct sk_buff *skb, u8 rx_ptype) |
---|
820 | | -{ |
---|
821 | | - struct ice_32b_rx_flex_desc_nic *nic_mdid; |
---|
822 | | - u32 hash; |
---|
823 | | - |
---|
824 | | - if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) |
---|
825 | | - return; |
---|
826 | | - |
---|
827 | | - if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) |
---|
828 | | - return; |
---|
829 | | - |
---|
830 | | - nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; |
---|
831 | | - hash = le32_to_cpu(nic_mdid->rss_hash); |
---|
832 | | - skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); |
---|
833 | | -} |
---|
834 | | - |
---|
835 | | -/** |
---|
836 | | - * ice_rx_csum - Indicate in skb if checksum is good |
---|
837 | | - * @vsi: the VSI we care about |
---|
838 | | - * @skb: skb currently being received and modified |
---|
839 | | - * @rx_desc: the receive descriptor |
---|
840 | | - * @ptype: the packet type decoded by hardware |
---|
841 | | - * |
---|
842 | | - * skb->protocol must be set before this function is called |
---|
843 | | - */ |
---|
844 | | -static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb, |
---|
845 | | - union ice_32b_rx_flex_desc *rx_desc, u8 ptype) |
---|
846 | | -{ |
---|
847 | | - struct ice_rx_ptype_decoded decoded; |
---|
848 | | - u32 rx_error, rx_status; |
---|
849 | | - bool ipv4, ipv6; |
---|
850 | | - |
---|
851 | | - rx_status = le16_to_cpu(rx_desc->wb.status_error0); |
---|
852 | | - rx_error = rx_status; |
---|
853 | | - |
---|
854 | | - decoded = ice_decode_rx_desc_ptype(ptype); |
---|
855 | | - |
---|
856 | | - /* Start with CHECKSUM_NONE and by default csum_level = 0 */ |
---|
857 | | - skb->ip_summed = CHECKSUM_NONE; |
---|
858 | | - skb_checksum_none_assert(skb); |
---|
859 | | - |
---|
860 | | - /* check if Rx checksum is enabled */ |
---|
861 | | - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) |
---|
862 | | - return; |
---|
863 | | - |
---|
864 | | - /* check if HW has decoded the packet and checksum */ |
---|
865 | | - if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) |
---|
866 | | - return; |
---|
867 | | - |
---|
868 | | - if (!(decoded.known && decoded.outer_ip)) |
---|
869 | | - return; |
---|
870 | | - |
---|
871 | | - ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && |
---|
872 | | - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); |
---|
873 | | - ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && |
---|
874 | | - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); |
---|
875 | | - |
---|
876 | | - if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | |
---|
877 | | - BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) |
---|
878 | | - goto checksum_fail; |
---|
879 | | - else if (ipv6 && (rx_status & |
---|
880 | | - (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) |
---|
881 | | - goto checksum_fail; |
---|
882 | | - |
---|
883 | | - /* check for L4 errors and handle packets that were not able to be |
---|
884 | | - * checksummed due to arrival speed |
---|
885 | | - */ |
---|
886 | | - if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) |
---|
887 | | - goto checksum_fail; |
---|
888 | | - |
---|
889 | | - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ |
---|
890 | | - switch (decoded.inner_prot) { |
---|
891 | | - case ICE_RX_PTYPE_INNER_PROT_TCP: |
---|
892 | | - case ICE_RX_PTYPE_INNER_PROT_UDP: |
---|
893 | | - case ICE_RX_PTYPE_INNER_PROT_SCTP: |
---|
894 | | - skb->ip_summed = CHECKSUM_UNNECESSARY; |
---|
895 | | - default: |
---|
896 | | - break; |
---|
897 | | - } |
---|
898 | | - return; |
---|
899 | | - |
---|
900 | | -checksum_fail: |
---|
901 | | - vsi->back->hw_csum_rx_error++; |
---|
902 | | -} |
---|
903 | | - |
---|
904 | | -/** |
---|
905 | | - * ice_process_skb_fields - Populate skb header fields from Rx descriptor |
---|
906 | | - * @rx_ring: rx descriptor ring packet is being transacted on |
---|
907 | | - * @rx_desc: pointer to the EOP Rx descriptor |
---|
908 | | - * @skb: pointer to current skb being populated |
---|
909 | | - * @ptype: the packet type decoded by hardware |
---|
910 | | - * |
---|
911 | | - * This function checks the ring, descriptor, and packet information in |
---|
912 | | - * order to populate the hash, checksum, VLAN, protocol, and |
---|
913 | | - * other fields within the skb. |
---|
914 | | - */ |
---|
915 | | -static void ice_process_skb_fields(struct ice_ring *rx_ring, |
---|
916 | | - union ice_32b_rx_flex_desc *rx_desc, |
---|
917 | | - struct sk_buff *skb, u8 ptype) |
---|
918 | | -{ |
---|
919 | | - ice_rx_hash(rx_ring, rx_desc, skb, ptype); |
---|
920 | | - |
---|
921 | | - /* modifies the skb - consumes the enet header */ |
---|
922 | | - skb->protocol = eth_type_trans(skb, rx_ring->netdev); |
---|
923 | | - |
---|
924 | | - ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype); |
---|
925 | | -} |
---|
926 | | - |
---|
927 | | -/** |
---|
928 | | - * ice_receive_skb - Send a completed packet up the stack |
---|
929 | | - * @rx_ring: rx ring in play |
---|
930 | | - * @skb: packet to send up |
---|
931 | | - * @vlan_tag: vlan tag for packet |
---|
932 | | - * |
---|
933 | | - * This function sends the completed packet (via. skb) up the stack using |
---|
934 | | - * gro receive functions (with/without vlan tag) |
---|
935 | | - */ |
---|
936 | | -static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, |
---|
937 | | - u16 vlan_tag) |
---|
938 | | -{ |
---|
939 | | - if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && |
---|
940 | | - (vlan_tag & VLAN_VID_MASK)) { |
---|
941 | | - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); |
---|
942 | | - } |
---|
943 | | - napi_gro_receive(&rx_ring->q_vector->napi, skb); |
---|
944 | | -} |
---|
945 | | - |
---|
946 | | -/** |
---|
947 | 1080 | * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf |
---|
948 | | - * @rx_ring: rx descriptor ring to transact packets on |
---|
| 1081 | + * @rx_ring: Rx descriptor ring to transact packets on |
---|
949 | 1082 | * @budget: Total limit on number of packets to process |
---|
950 | 1083 | * |
---|
951 | 1084 | * This function provides a "bounce buffer" approach to Rx interrupt |
---|
952 | | - * processing. The advantage to this is that on systems that have |
---|
| 1085 | + * processing. The advantage to this is that on systems that have |
---|
953 | 1086 | * expensive overhead for IOMMU access this provides a means of avoiding |
---|
954 | 1087 | * it by maintaining the mapping of the page to the system. |
---|
955 | 1088 | * |
---|
956 | 1089 | * Returns amount of work completed |
---|
957 | 1090 | */ |
---|
958 | | -static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) |
---|
| 1091 | +int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) |
---|
959 | 1092 | { |
---|
960 | 1093 | unsigned int total_rx_bytes = 0, total_rx_pkts = 0; |
---|
961 | 1094 | u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); |
---|
962 | | - bool failure = false; |
---|
| 1095 | + unsigned int xdp_res, xdp_xmit = 0; |
---|
| 1096 | + struct bpf_prog *xdp_prog = NULL; |
---|
| 1097 | + struct xdp_buff xdp; |
---|
| 1098 | + bool failure; |
---|
963 | 1099 | |
---|
964 | | - /* start the loop to process RX packets bounded by 'budget' */ |
---|
| 1100 | + xdp.rxq = &rx_ring->xdp_rxq; |
---|
| 1101 | + /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ |
---|
| 1102 | +#if (PAGE_SIZE < 8192) |
---|
| 1103 | + xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0); |
---|
| 1104 | +#endif |
---|
| 1105 | + |
---|
| 1106 | + /* start the loop to process Rx packets bounded by 'budget' */ |
---|
965 | 1107 | while (likely(total_rx_pkts < (unsigned int)budget)) { |
---|
966 | 1108 | union ice_32b_rx_flex_desc *rx_desc; |
---|
| 1109 | + struct ice_rx_buf *rx_buf; |
---|
967 | 1110 | struct sk_buff *skb; |
---|
| 1111 | + unsigned int size; |
---|
968 | 1112 | u16 stat_err_bits; |
---|
| 1113 | + int rx_buf_pgcnt; |
---|
969 | 1114 | u16 vlan_tag = 0; |
---|
970 | 1115 | u8 rx_ptype; |
---|
971 | 1116 | |
---|
972 | | - /* return some buffers to hardware, one at a time is too slow */ |
---|
973 | | - if (cleaned_count >= ICE_RX_BUF_WRITE) { |
---|
974 | | - failure = failure || |
---|
975 | | - ice_alloc_rx_bufs(rx_ring, cleaned_count); |
---|
976 | | - cleaned_count = 0; |
---|
977 | | - } |
---|
978 | | - |
---|
979 | | - /* get the RX desc from RX ring based on 'next_to_clean' */ |
---|
| 1117 | + /* get the Rx desc from Rx ring based on 'next_to_clean' */ |
---|
980 | 1118 | rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); |
---|
981 | 1119 | |
---|
982 | 1120 | /* status_error_len will always be zero for unused descriptors |
---|
.. | .. |
---|
994 | 1132 | */ |
---|
995 | 1133 | dma_rmb(); |
---|
996 | 1134 | |
---|
997 | | - /* allocate (if needed) and populate skb */ |
---|
998 | | - skb = ice_fetch_rx_buf(rx_ring, rx_desc); |
---|
999 | | - if (!skb) |
---|
1000 | | - break; |
---|
| 1135 | + if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) { |
---|
| 1136 | + ice_put_rx_buf(rx_ring, NULL, 0); |
---|
| 1137 | + cleaned_count++; |
---|
| 1138 | + continue; |
---|
| 1139 | + } |
---|
1001 | 1140 | |
---|
| 1141 | + size = le16_to_cpu(rx_desc->wb.pkt_len) & |
---|
| 1142 | + ICE_RX_FLX_DESC_PKT_LEN_M; |
---|
| 1143 | + |
---|
| 1144 | + /* retrieve a buffer from the ring */ |
---|
| 1145 | + rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt); |
---|
| 1146 | + |
---|
| 1147 | + if (!size) { |
---|
| 1148 | + xdp.data = NULL; |
---|
| 1149 | + xdp.data_end = NULL; |
---|
| 1150 | + xdp.data_hard_start = NULL; |
---|
| 1151 | + xdp.data_meta = NULL; |
---|
| 1152 | + goto construct_skb; |
---|
| 1153 | + } |
---|
| 1154 | + |
---|
| 1155 | + xdp.data = page_address(rx_buf->page) + rx_buf->page_offset; |
---|
| 1156 | + xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring); |
---|
| 1157 | + xdp.data_meta = xdp.data; |
---|
| 1158 | + xdp.data_end = xdp.data + size; |
---|
| 1159 | +#if (PAGE_SIZE > 4096) |
---|
| 1160 | + /* At larger PAGE_SIZE, frame_sz depend on len size */ |
---|
| 1161 | + xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); |
---|
| 1162 | +#endif |
---|
| 1163 | + |
---|
| 1164 | + rcu_read_lock(); |
---|
| 1165 | + xdp_prog = READ_ONCE(rx_ring->xdp_prog); |
---|
| 1166 | + if (!xdp_prog) { |
---|
| 1167 | + rcu_read_unlock(); |
---|
| 1168 | + goto construct_skb; |
---|
| 1169 | + } |
---|
| 1170 | + |
---|
| 1171 | + xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog); |
---|
| 1172 | + rcu_read_unlock(); |
---|
| 1173 | + if (!xdp_res) |
---|
| 1174 | + goto construct_skb; |
---|
| 1175 | + if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { |
---|
| 1176 | + xdp_xmit |= xdp_res; |
---|
| 1177 | + ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz); |
---|
| 1178 | + } else { |
---|
| 1179 | + rx_buf->pagecnt_bias++; |
---|
| 1180 | + } |
---|
| 1181 | + total_rx_bytes += size; |
---|
| 1182 | + total_rx_pkts++; |
---|
| 1183 | + |
---|
| 1184 | + cleaned_count++; |
---|
| 1185 | + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); |
---|
| 1186 | + continue; |
---|
| 1187 | +construct_skb: |
---|
| 1188 | + if (skb) { |
---|
| 1189 | + ice_add_rx_frag(rx_ring, rx_buf, skb, size); |
---|
| 1190 | + } else if (likely(xdp.data)) { |
---|
| 1191 | + if (ice_ring_uses_build_skb(rx_ring)) |
---|
| 1192 | + skb = ice_build_skb(rx_ring, rx_buf, &xdp); |
---|
| 1193 | + else |
---|
| 1194 | + skb = ice_construct_skb(rx_ring, rx_buf, &xdp); |
---|
| 1195 | + } |
---|
| 1196 | + /* exit if we failed to retrieve a buffer */ |
---|
| 1197 | + if (!skb) { |
---|
| 1198 | + rx_ring->rx_stats.alloc_buf_failed++; |
---|
| 1199 | + if (rx_buf) |
---|
| 1200 | + rx_buf->pagecnt_bias++; |
---|
| 1201 | + break; |
---|
| 1202 | + } |
---|
| 1203 | + |
---|
| 1204 | + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); |
---|
1002 | 1205 | cleaned_count++; |
---|
1003 | 1206 | |
---|
1004 | 1207 | /* skip if it is NOP desc */ |
---|
.. | .. |
---|
1011 | 1214 | continue; |
---|
1012 | 1215 | } |
---|
1013 | 1216 | |
---|
1014 | | - rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & |
---|
1015 | | - ICE_RX_FLEX_DESC_PTYPE_M; |
---|
1016 | | - |
---|
1017 | 1217 | stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); |
---|
1018 | 1218 | if (ice_test_staterr(rx_desc, stat_err_bits)) |
---|
1019 | 1219 | vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); |
---|
1020 | 1220 | |
---|
1021 | | - /* correct empty headers and pad skb if needed (to make valid |
---|
1022 | | - * ethernet frame |
---|
1023 | | - */ |
---|
1024 | | - if (ice_cleanup_headers(skb)) { |
---|
| 1221 | + /* pad the skb if needed, to make a valid ethernet frame */ |
---|
| 1222 | + if (eth_skb_pad(skb)) { |
---|
1025 | 1223 | skb = NULL; |
---|
1026 | 1224 | continue; |
---|
1027 | 1225 | } |
---|
.. | .. |
---|
1030 | 1228 | total_rx_bytes += skb->len; |
---|
1031 | 1229 | |
---|
1032 | 1230 | /* populate checksum, VLAN, and protocol */ |
---|
| 1231 | + rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & |
---|
| 1232 | + ICE_RX_FLEX_DESC_PTYPE_M; |
---|
| 1233 | + |
---|
1033 | 1234 | ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); |
---|
1034 | 1235 | |
---|
1035 | 1236 | /* send completed skb up the stack */ |
---|
.. | .. |
---|
1039 | 1240 | total_rx_pkts++; |
---|
1040 | 1241 | } |
---|
1041 | 1242 | |
---|
1042 | | - /* update queue and vector specific stats */ |
---|
1043 | | - u64_stats_update_begin(&rx_ring->syncp); |
---|
1044 | | - rx_ring->stats.pkts += total_rx_pkts; |
---|
1045 | | - rx_ring->stats.bytes += total_rx_bytes; |
---|
1046 | | - u64_stats_update_end(&rx_ring->syncp); |
---|
1047 | | - rx_ring->q_vector->rx.total_pkts += total_rx_pkts; |
---|
1048 | | - rx_ring->q_vector->rx.total_bytes += total_rx_bytes; |
---|
| 1243 | + /* return up to cleaned_count buffers to hardware */ |
---|
| 1244 | + failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); |
---|
| 1245 | + |
---|
| 1246 | + if (xdp_prog) |
---|
| 1247 | + ice_finalize_xdp_rx(rx_ring, xdp_xmit); |
---|
| 1248 | + |
---|
| 1249 | + ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); |
---|
1049 | 1250 | |
---|
1050 | 1251 | /* guarantee a trip back through this routine if there was a failure */ |
---|
1051 | 1252 | return failure ? budget : (int)total_rx_pkts; |
---|
| 1253 | +} |
---|
| 1254 | + |
---|
| 1255 | +/** |
---|
| 1256 | + * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic |
---|
| 1257 | + * @port_info: port_info structure containing the current link speed |
---|
| 1258 | + * @avg_pkt_size: average size of Tx or Rx packets based on clean routine |
---|
| 1259 | + * @itr: ITR value to update |
---|
| 1260 | + * |
---|
| 1261 | + * Calculate how big of an increment should be applied to the ITR value passed |
---|
| 1262 | + * in based on wmem_default, SKB overhead, ethernet overhead, and the current |
---|
| 1263 | + * link speed. |
---|
| 1264 | + * |
---|
| 1265 | + * The following is a calculation derived from: |
---|
| 1266 | + * wmem_default / (size + overhead) = desired_pkts_per_int |
---|
| 1267 | + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate |
---|
| 1268 | + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value |
---|
| 1269 | + * |
---|
| 1270 | + * Assuming wmem_default is 212992 and overhead is 640 bytes per |
---|
| 1271 | + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the |
---|
| 1272 | + * formula down to: |
---|
| 1273 | + * |
---|
| 1274 | + * wmem_default * bits_per_byte * usecs_per_sec pkt_size + 24 |
---|
| 1275 | + * ITR = -------------------------------------------- * -------------- |
---|
| 1276 | + * rate pkt_size + 640 |
---|
| 1277 | + */ |
---|
| 1278 | +static unsigned int |
---|
| 1279 | +ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info, |
---|
| 1280 | + unsigned int avg_pkt_size, |
---|
| 1281 | + unsigned int itr) |
---|
| 1282 | +{ |
---|
| 1283 | + switch (port_info->phy.link_info.link_speed) { |
---|
| 1284 | + case ICE_AQ_LINK_SPEED_100GB: |
---|
| 1285 | + itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24), |
---|
| 1286 | + avg_pkt_size + 640); |
---|
| 1287 | + break; |
---|
| 1288 | + case ICE_AQ_LINK_SPEED_50GB: |
---|
| 1289 | + itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24), |
---|
| 1290 | + avg_pkt_size + 640); |
---|
| 1291 | + break; |
---|
| 1292 | + case ICE_AQ_LINK_SPEED_40GB: |
---|
| 1293 | + itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24), |
---|
| 1294 | + avg_pkt_size + 640); |
---|
| 1295 | + break; |
---|
| 1296 | + case ICE_AQ_LINK_SPEED_25GB: |
---|
| 1297 | + itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24), |
---|
| 1298 | + avg_pkt_size + 640); |
---|
| 1299 | + break; |
---|
| 1300 | + case ICE_AQ_LINK_SPEED_20GB: |
---|
| 1301 | + itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24), |
---|
| 1302 | + avg_pkt_size + 640); |
---|
| 1303 | + break; |
---|
| 1304 | + case ICE_AQ_LINK_SPEED_10GB: |
---|
| 1305 | + default: |
---|
| 1306 | + itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24), |
---|
| 1307 | + avg_pkt_size + 640); |
---|
| 1308 | + break; |
---|
| 1309 | + } |
---|
| 1310 | + |
---|
| 1311 | + if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { |
---|
| 1312 | + itr &= ICE_ITR_ADAPTIVE_LATENCY; |
---|
| 1313 | + itr += ICE_ITR_ADAPTIVE_MAX_USECS; |
---|
| 1314 | + } |
---|
| 1315 | + |
---|
| 1316 | + return itr; |
---|
| 1317 | +} |
---|
| 1318 | + |
---|
| 1319 | +/** |
---|
| 1320 | + * ice_update_itr - update the adaptive ITR value based on statistics |
---|
| 1321 | + * @q_vector: structure containing interrupt and ring information |
---|
| 1322 | + * @rc: structure containing ring performance data |
---|
| 1323 | + * |
---|
| 1324 | + * Stores a new ITR value based on packets and byte |
---|
| 1325 | + * counts during the last interrupt. The advantage of per interrupt |
---|
| 1326 | + * computation is faster updates and more accurate ITR for the current |
---|
| 1327 | + * traffic pattern. Constants in this function were computed |
---|
| 1328 | + * based on theoretical maximum wire speed and thresholds were set based |
---|
| 1329 | + * on testing data as well as attempting to minimize response time |
---|
| 1330 | + * while increasing bulk throughput. |
---|
| 1331 | + */ |
---|
| 1332 | +static void |
---|
| 1333 | +ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) |
---|
| 1334 | +{ |
---|
| 1335 | + unsigned long next_update = jiffies; |
---|
| 1336 | + unsigned int packets, bytes, itr; |
---|
| 1337 | + bool container_is_rx; |
---|
| 1338 | + |
---|
| 1339 | + if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting)) |
---|
| 1340 | + return; |
---|
| 1341 | + |
---|
| 1342 | + /* If itr_countdown is set it means we programmed an ITR within |
---|
| 1343 | + * the last 4 interrupt cycles. This has a side effect of us |
---|
| 1344 | + * potentially firing an early interrupt. In order to work around |
---|
| 1345 | + * this we need to throw out any data received for a few |
---|
| 1346 | + * interrupts following the update. |
---|
| 1347 | + */ |
---|
| 1348 | + if (q_vector->itr_countdown) { |
---|
| 1349 | + itr = rc->target_itr; |
---|
| 1350 | + goto clear_counts; |
---|
| 1351 | + } |
---|
| 1352 | + |
---|
| 1353 | + container_is_rx = (&q_vector->rx == rc); |
---|
| 1354 | + /* For Rx we want to push the delay up and default to low latency. |
---|
| 1355 | + * for Tx we want to pull the delay down and default to high latency. |
---|
| 1356 | + */ |
---|
| 1357 | + itr = container_is_rx ? |
---|
| 1358 | + ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY : |
---|
| 1359 | + ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY; |
---|
| 1360 | + |
---|
| 1361 | + /* If we didn't update within up to 1 - 2 jiffies we can assume |
---|
| 1362 | + * that either packets are coming in so slow there hasn't been |
---|
| 1363 | + * any work, or that there is so much work that NAPI is dealing |
---|
| 1364 | + * with interrupt moderation and we don't need to do anything. |
---|
| 1365 | + */ |
---|
| 1366 | + if (time_after(next_update, rc->next_update)) |
---|
| 1367 | + goto clear_counts; |
---|
| 1368 | + |
---|
| 1369 | + prefetch(q_vector->vsi->port_info); |
---|
| 1370 | + |
---|
| 1371 | + packets = rc->total_pkts; |
---|
| 1372 | + bytes = rc->total_bytes; |
---|
| 1373 | + |
---|
| 1374 | + if (container_is_rx) { |
---|
| 1375 | + /* If Rx there are 1 to 4 packets and bytes are less than |
---|
| 1376 | + * 9000 assume insufficient data to use bulk rate limiting |
---|
| 1377 | + * approach unless Tx is already in bulk rate limiting. We |
---|
| 1378 | + * are likely latency driven. |
---|
| 1379 | + */ |
---|
| 1380 | + if (packets && packets < 4 && bytes < 9000 && |
---|
| 1381 | + (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) { |
---|
| 1382 | + itr = ICE_ITR_ADAPTIVE_LATENCY; |
---|
| 1383 | + goto adjust_by_size_and_speed; |
---|
| 1384 | + } |
---|
| 1385 | + } else if (packets < 4) { |
---|
| 1386 | + /* If we have Tx and Rx ITR maxed and Tx ITR is running in |
---|
| 1387 | + * bulk mode and we are receiving 4 or fewer packets just |
---|
| 1388 | + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so |
---|
| 1389 | + * that the Rx can relax. |
---|
| 1390 | + */ |
---|
| 1391 | + if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS && |
---|
| 1392 | + (q_vector->rx.target_itr & ICE_ITR_MASK) == |
---|
| 1393 | + ICE_ITR_ADAPTIVE_MAX_USECS) |
---|
| 1394 | + goto clear_counts; |
---|
| 1395 | + } else if (packets > 32) { |
---|
| 1396 | + /* If we have processed over 32 packets in a single interrupt |
---|
| 1397 | + * for Tx assume we need to switch over to "bulk" mode. |
---|
| 1398 | + */ |
---|
| 1399 | + rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY; |
---|
| 1400 | + } |
---|
| 1401 | + |
---|
| 1402 | + /* We have no packets to actually measure against. This means |
---|
| 1403 | + * either one of the other queues on this vector is active or |
---|
| 1404 | + * we are a Tx queue doing TSO with too high of an interrupt rate. |
---|
| 1405 | + * |
---|
| 1406 | + * Between 4 and 56 we can assume that our current interrupt delay |
---|
| 1407 | + * is only slightly too low. As such we should increase it by a small |
---|
| 1408 | + * fixed amount. |
---|
| 1409 | + */ |
---|
| 1410 | + if (packets < 56) { |
---|
| 1411 | + itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC; |
---|
| 1412 | + if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { |
---|
| 1413 | + itr &= ICE_ITR_ADAPTIVE_LATENCY; |
---|
| 1414 | + itr += ICE_ITR_ADAPTIVE_MAX_USECS; |
---|
| 1415 | + } |
---|
| 1416 | + goto clear_counts; |
---|
| 1417 | + } |
---|
| 1418 | + |
---|
| 1419 | + if (packets <= 256) { |
---|
| 1420 | + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); |
---|
| 1421 | + itr &= ICE_ITR_MASK; |
---|
| 1422 | + |
---|
| 1423 | + /* Between 56 and 112 is our "goldilocks" zone where we are |
---|
| 1424 | + * working out "just right". Just report that our current |
---|
| 1425 | + * ITR is good for us. |
---|
| 1426 | + */ |
---|
| 1427 | + if (packets <= 112) |
---|
| 1428 | + goto clear_counts; |
---|
| 1429 | + |
---|
| 1430 | + /* If packet count is 128 or greater we are likely looking |
---|
| 1431 | + * at a slight overrun of the delay we want. Try halving |
---|
| 1432 | + * our delay to see if that will cut the number of packets |
---|
| 1433 | + * in half per interrupt. |
---|
| 1434 | + */ |
---|
| 1435 | + itr >>= 1; |
---|
| 1436 | + itr &= ICE_ITR_MASK; |
---|
| 1437 | + if (itr < ICE_ITR_ADAPTIVE_MIN_USECS) |
---|
| 1438 | + itr = ICE_ITR_ADAPTIVE_MIN_USECS; |
---|
| 1439 | + |
---|
| 1440 | + goto clear_counts; |
---|
| 1441 | + } |
---|
| 1442 | + |
---|
| 1443 | + /* The paths below assume we are dealing with a bulk ITR since |
---|
| 1444 | + * number of packets is greater than 256. We are just going to have |
---|
| 1445 | + * to compute a value and try to bring the count under control, |
---|
| 1446 | + * though for smaller packet sizes there isn't much we can do as |
---|
| 1447 | + * NAPI polling will likely be kicking in sooner rather than later. |
---|
| 1448 | + */ |
---|
| 1449 | + itr = ICE_ITR_ADAPTIVE_BULK; |
---|
| 1450 | + |
---|
| 1451 | +adjust_by_size_and_speed: |
---|
| 1452 | + |
---|
| 1453 | + /* based on checks above packets cannot be 0 so division is safe */ |
---|
| 1454 | + itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info, |
---|
| 1455 | + bytes / packets, itr); |
---|
| 1456 | + |
---|
| 1457 | +clear_counts: |
---|
| 1458 | + /* write back value */ |
---|
| 1459 | + rc->target_itr = itr; |
---|
| 1460 | + |
---|
| 1461 | + /* next update should occur within next jiffy */ |
---|
| 1462 | + rc->next_update = next_update + 1; |
---|
| 1463 | + |
---|
| 1464 | + rc->total_bytes = 0; |
---|
| 1465 | + rc->total_pkts = 0; |
---|
| 1466 | +} |
---|
| 1467 | + |
---|
| 1468 | +/** |
---|
| 1469 | + * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register |
---|
| 1470 | + * @itr_idx: interrupt throttling index |
---|
| 1471 | + * @itr: interrupt throttling value in usecs |
---|
| 1472 | + */ |
---|
| 1473 | +static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) |
---|
| 1474 | +{ |
---|
| 1475 | + /* The ITR value is reported in microseconds, and the register value is |
---|
| 1476 | + * recorded in 2 microsecond units. For this reason we only need to |
---|
| 1477 | + * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this |
---|
| 1478 | + * granularity as a shift instead of division. The mask makes sure the |
---|
| 1479 | + * ITR value is never odd so we don't accidentally write into the field |
---|
| 1480 | + * prior to the ITR field. |
---|
| 1481 | + */ |
---|
| 1482 | + itr &= ICE_ITR_MASK; |
---|
| 1483 | + |
---|
| 1484 | + return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | |
---|
| 1485 | + (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) | |
---|
| 1486 | + (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)); |
---|
| 1487 | +} |
---|
| 1488 | + |
---|
| 1489 | +/* The act of updating the ITR will cause it to immediately trigger. In order |
---|
| 1490 | + * to prevent this from throwing off adaptive update statistics we defer the |
---|
| 1491 | + * update so that it can only happen so often. So after either Tx or Rx are |
---|
| 1492 | + * updated we make the adaptive scheme wait until either the ITR completely |
---|
| 1493 | + * expires via the next_update expiration or we have been through at least |
---|
| 1494 | + * 3 interrupts. |
---|
| 1495 | + */ |
---|
| 1496 | +#define ITR_COUNTDOWN_START 3 |
---|
| 1497 | + |
---|
| 1498 | +/** |
---|
| 1499 | + * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt |
---|
| 1500 | + * @q_vector: q_vector for which ITR is being updated and interrupt enabled |
---|
| 1501 | + */ |
---|
| 1502 | +static void ice_update_ena_itr(struct ice_q_vector *q_vector) |
---|
| 1503 | +{ |
---|
| 1504 | + struct ice_ring_container *tx = &q_vector->tx; |
---|
| 1505 | + struct ice_ring_container *rx = &q_vector->rx; |
---|
| 1506 | + struct ice_vsi *vsi = q_vector->vsi; |
---|
| 1507 | + u32 itr_val; |
---|
| 1508 | + |
---|
| 1509 | + /* when exiting WB_ON_ITR lets set a low ITR value and trigger |
---|
| 1510 | + * interrupts to expire right away in case we have more work ready to go |
---|
| 1511 | + * already |
---|
| 1512 | + */ |
---|
| 1513 | + if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) { |
---|
| 1514 | + itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS); |
---|
| 1515 | + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); |
---|
| 1516 | + /* set target back to last user set value */ |
---|
| 1517 | + rx->target_itr = rx->itr_setting; |
---|
| 1518 | + /* set current to what we just wrote and dynamic if needed */ |
---|
| 1519 | + rx->current_itr = ICE_WB_ON_ITR_USECS | |
---|
| 1520 | + (rx->itr_setting & ICE_ITR_DYNAMIC); |
---|
| 1521 | + /* allow normal interrupt flow to start */ |
---|
| 1522 | + q_vector->itr_countdown = 0; |
---|
| 1523 | + return; |
---|
| 1524 | + } |
---|
| 1525 | + |
---|
| 1526 | + /* This will do nothing if dynamic updates are not enabled */ |
---|
| 1527 | + ice_update_itr(q_vector, tx); |
---|
| 1528 | + ice_update_itr(q_vector, rx); |
---|
| 1529 | + |
---|
| 1530 | + /* This block of logic allows us to get away with only updating |
---|
| 1531 | + * one ITR value with each interrupt. The idea is to perform a |
---|
| 1532 | + * pseudo-lazy update with the following criteria. |
---|
| 1533 | + * |
---|
| 1534 | + * 1. Rx is given higher priority than Tx if both are in same state |
---|
| 1535 | + * 2. If we must reduce an ITR that is given highest priority. |
---|
| 1536 | + * 3. We then give priority to increasing ITR based on amount. |
---|
| 1537 | + */ |
---|
| 1538 | + if (rx->target_itr < rx->current_itr) { |
---|
| 1539 | + /* Rx ITR needs to be reduced, this is highest priority */ |
---|
| 1540 | + itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); |
---|
| 1541 | + rx->current_itr = rx->target_itr; |
---|
| 1542 | + q_vector->itr_countdown = ITR_COUNTDOWN_START; |
---|
| 1543 | + } else if ((tx->target_itr < tx->current_itr) || |
---|
| 1544 | + ((rx->target_itr - rx->current_itr) < |
---|
| 1545 | + (tx->target_itr - tx->current_itr))) { |
---|
| 1546 | + /* Tx ITR needs to be reduced, this is second priority |
---|
| 1547 | + * Tx ITR needs to be increased more than Rx, fourth priority |
---|
| 1548 | + */ |
---|
| 1549 | + itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr); |
---|
| 1550 | + tx->current_itr = tx->target_itr; |
---|
| 1551 | + q_vector->itr_countdown = ITR_COUNTDOWN_START; |
---|
| 1552 | + } else if (rx->current_itr != rx->target_itr) { |
---|
| 1553 | + /* Rx ITR needs to be increased, third priority */ |
---|
| 1554 | + itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); |
---|
| 1555 | + rx->current_itr = rx->target_itr; |
---|
| 1556 | + q_vector->itr_countdown = ITR_COUNTDOWN_START; |
---|
| 1557 | + } else { |
---|
| 1558 | + /* Still have to re-enable the interrupts */ |
---|
| 1559 | + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); |
---|
| 1560 | + if (q_vector->itr_countdown) |
---|
| 1561 | + q_vector->itr_countdown--; |
---|
| 1562 | + } |
---|
| 1563 | + |
---|
| 1564 | + if (!test_bit(__ICE_DOWN, q_vector->vsi->state)) |
---|
| 1565 | + wr32(&q_vector->vsi->back->hw, |
---|
| 1566 | + GLINT_DYN_CTL(q_vector->reg_idx), |
---|
| 1567 | + itr_val); |
---|
| 1568 | +} |
---|
| 1569 | + |
---|
| 1570 | +/** |
---|
| 1571 | + * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector |
---|
| 1572 | + * @q_vector: q_vector to set WB_ON_ITR on |
---|
| 1573 | + * |
---|
| 1574 | + * We need to tell hardware to write-back completed descriptors even when |
---|
| 1575 | + * interrupts are disabled. Descriptors will be written back on cache line |
---|
| 1576 | + * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR |
---|
| 1577 | + * descriptors may not be written back if they don't fill a cache line until the |
---|
| 1578 | + * next interrupt. |
---|
| 1579 | + * |
---|
| 1580 | + * This sets the write-back frequency to 2 microseconds as that is the minimum |
---|
| 1581 | + * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to |
---|
| 1582 | + * make sure hardware knows we aren't meddling with the INTENA_M bit. |
---|
| 1583 | + */ |
---|
| 1584 | +static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) |
---|
| 1585 | +{ |
---|
| 1586 | + struct ice_vsi *vsi = q_vector->vsi; |
---|
| 1587 | + |
---|
| 1588 | + /* already in WB_ON_ITR mode no need to change it */ |
---|
| 1589 | + if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) |
---|
| 1590 | + return; |
---|
| 1591 | + |
---|
| 1592 | + if (q_vector->num_ring_rx) |
---|
| 1593 | + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), |
---|
| 1594 | + ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, |
---|
| 1595 | + ICE_RX_ITR)); |
---|
| 1596 | + |
---|
| 1597 | + if (q_vector->num_ring_tx) |
---|
| 1598 | + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), |
---|
| 1599 | + ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, |
---|
| 1600 | + ICE_TX_ITR)); |
---|
| 1601 | + |
---|
| 1602 | + q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE; |
---|
1052 | 1603 | } |
---|
1053 | 1604 | |
---|
1054 | 1605 | /** |
---|
.. | .. |
---|
1064 | 1615 | { |
---|
1065 | 1616 | struct ice_q_vector *q_vector = |
---|
1066 | 1617 | container_of(napi, struct ice_q_vector, napi); |
---|
1067 | | - struct ice_vsi *vsi = q_vector->vsi; |
---|
1068 | | - struct ice_pf *pf = vsi->back; |
---|
1069 | 1618 | bool clean_complete = true; |
---|
1070 | | - int budget_per_ring = 0; |
---|
1071 | 1619 | struct ice_ring *ring; |
---|
| 1620 | + int budget_per_ring; |
---|
1072 | 1621 | int work_done = 0; |
---|
1073 | 1622 | |
---|
1074 | 1623 | /* Since the actual Tx work is minimal, we can give the Tx a larger |
---|
1075 | 1624 | * budget and be more aggressive about cleaning up the Tx descriptors. |
---|
1076 | 1625 | */ |
---|
1077 | | - ice_for_each_ring(ring, q_vector->tx) |
---|
1078 | | - if (!ice_clean_tx_irq(vsi, ring, budget)) |
---|
| 1626 | + ice_for_each_ring(ring, q_vector->tx) { |
---|
| 1627 | + bool wd = ring->xsk_pool ? |
---|
| 1628 | + ice_clean_tx_irq_zc(ring, budget) : |
---|
| 1629 | + ice_clean_tx_irq(ring, budget); |
---|
| 1630 | + |
---|
| 1631 | + if (!wd) |
---|
1079 | 1632 | clean_complete = false; |
---|
| 1633 | + } |
---|
1080 | 1634 | |
---|
1081 | 1635 | /* Handle case where we are called by netpoll with a budget of 0 */ |
---|
1082 | | - if (budget <= 0) |
---|
| 1636 | + if (unlikely(budget <= 0)) |
---|
1083 | 1637 | return budget; |
---|
1084 | 1638 | |
---|
1085 | | - /* We attempt to distribute budget to each Rx queue fairly, but don't |
---|
1086 | | - * allow the budget to go below 1 because that would exit polling early. |
---|
1087 | | - */ |
---|
1088 | | - if (q_vector->num_ring_rx) |
---|
1089 | | - budget_per_ring = max(budget / q_vector->num_ring_rx, 1); |
---|
| 1639 | + /* normally we have 1 Rx ring per q_vector */ |
---|
| 1640 | + if (unlikely(q_vector->num_ring_rx > 1)) |
---|
| 1641 | + /* We attempt to distribute budget to each Rx queue fairly, but |
---|
| 1642 | + * don't allow the budget to go below 1 because that would exit |
---|
| 1643 | + * polling early. |
---|
| 1644 | + */ |
---|
| 1645 | + budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1); |
---|
| 1646 | + else |
---|
| 1647 | + /* Max of 1 Rx ring in this q_vector so give it the budget */ |
---|
| 1648 | + budget_per_ring = budget; |
---|
1090 | 1649 | |
---|
1091 | 1650 | ice_for_each_ring(ring, q_vector->rx) { |
---|
1092 | 1651 | int cleaned; |
---|
1093 | 1652 | |
---|
1094 | | - cleaned = ice_clean_rx_irq(ring, budget_per_ring); |
---|
| 1653 | + /* A dedicated path for zero-copy allows making a single |
---|
| 1654 | + * comparison in the irq context instead of many inside the |
---|
| 1655 | + * ice_clean_rx_irq function and makes the codebase cleaner. |
---|
| 1656 | + */ |
---|
| 1657 | + cleaned = ring->xsk_pool ? |
---|
| 1658 | + ice_clean_rx_irq_zc(ring, budget_per_ring) : |
---|
| 1659 | + ice_clean_rx_irq(ring, budget_per_ring); |
---|
1095 | 1660 | work_done += cleaned; |
---|
1096 | 1661 | /* if we clean as many as budgeted, we must not be done */ |
---|
1097 | 1662 | if (cleaned >= budget_per_ring) |
---|
.. | .. |
---|
1102 | 1667 | if (!clean_complete) |
---|
1103 | 1668 | return budget; |
---|
1104 | 1669 | |
---|
1105 | | - /* Work is done so exit the polling mode and re-enable the interrupt */ |
---|
1106 | | - napi_complete_done(napi, work_done); |
---|
1107 | | - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) |
---|
1108 | | - ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector); |
---|
| 1670 | + /* Exit the polling mode, but don't re-enable interrupts if stack might |
---|
| 1671 | + * poll us due to busy-polling |
---|
| 1672 | + */ |
---|
| 1673 | + if (likely(napi_complete_done(napi, work_done))) |
---|
| 1674 | + ice_update_ena_itr(q_vector); |
---|
| 1675 | + else |
---|
| 1676 | + ice_set_wb_on_itr(q_vector); |
---|
1109 | 1677 | |
---|
1110 | | - return min(work_done, budget - 1); |
---|
1111 | | -} |
---|
1112 | | - |
---|
1113 | | -/* helper function for building cmd/type/offset */ |
---|
1114 | | -static __le64 |
---|
1115 | | -build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) |
---|
1116 | | -{ |
---|
1117 | | - return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | |
---|
1118 | | - (td_cmd << ICE_TXD_QW1_CMD_S) | |
---|
1119 | | - (td_offset << ICE_TXD_QW1_OFFSET_S) | |
---|
1120 | | - ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | |
---|
1121 | | - (td_tag << ICE_TXD_QW1_L2TAG1_S)); |
---|
| 1678 | + return min_t(int, work_done, budget - 1); |
---|
1122 | 1679 | } |
---|
1123 | 1680 | |
---|
1124 | 1681 | /** |
---|
1125 | | - * __ice_maybe_stop_tx - 2nd level check for tx stop conditions |
---|
| 1682 | + * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions |
---|
1126 | 1683 | * @tx_ring: the ring to be checked |
---|
1127 | 1684 | * @size: the size buffer we want to assure is available |
---|
1128 | 1685 | * |
---|
.. | .. |
---|
1145 | 1702 | } |
---|
1146 | 1703 | |
---|
1147 | 1704 | /** |
---|
1148 | | - * ice_maybe_stop_tx - 1st level check for tx stop conditions |
---|
| 1705 | + * ice_maybe_stop_tx - 1st level check for Tx stop conditions |
---|
1149 | 1706 | * @tx_ring: the ring to be checked |
---|
1150 | 1707 | * @size: the size buffer we want to assure is available |
---|
1151 | 1708 | * |
---|
.. | .. |
---|
1155 | 1712 | { |
---|
1156 | 1713 | if (likely(ICE_DESC_UNUSED(tx_ring) >= size)) |
---|
1157 | 1714 | return 0; |
---|
| 1715 | + |
---|
1158 | 1716 | return __ice_maybe_stop_tx(tx_ring, size); |
---|
1159 | 1717 | } |
---|
1160 | 1718 | |
---|
.. | .. |
---|
1174 | 1732 | { |
---|
1175 | 1733 | u64 td_offset, td_tag, td_cmd; |
---|
1176 | 1734 | u16 i = tx_ring->next_to_use; |
---|
1177 | | - struct skb_frag_struct *frag; |
---|
1178 | 1735 | unsigned int data_len, size; |
---|
1179 | 1736 | struct ice_tx_desc *tx_desc; |
---|
1180 | 1737 | struct ice_tx_buf *tx_buf; |
---|
1181 | 1738 | struct sk_buff *skb; |
---|
| 1739 | + skb_frag_t *frag; |
---|
1182 | 1740 | dma_addr_t dma; |
---|
1183 | 1741 | |
---|
1184 | 1742 | td_tag = off->td_l2tag1; |
---|
.. | .. |
---|
1220 | 1778 | */ |
---|
1221 | 1779 | while (unlikely(size > ICE_MAX_DATA_PER_TXD)) { |
---|
1222 | 1780 | tx_desc->cmd_type_offset_bsz = |
---|
1223 | | - build_ctob(td_cmd, td_offset, max_data, td_tag); |
---|
| 1781 | + ice_build_ctob(td_cmd, td_offset, max_data, |
---|
| 1782 | + td_tag); |
---|
1224 | 1783 | |
---|
1225 | 1784 | tx_desc++; |
---|
1226 | 1785 | i++; |
---|
.. | .. |
---|
1240 | 1799 | if (likely(!data_len)) |
---|
1241 | 1800 | break; |
---|
1242 | 1801 | |
---|
1243 | | - tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, |
---|
1244 | | - size, td_tag); |
---|
| 1802 | + tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset, |
---|
| 1803 | + size, td_tag); |
---|
1245 | 1804 | |
---|
1246 | 1805 | tx_desc++; |
---|
1247 | 1806 | i++; |
---|
.. | .. |
---|
1271 | 1830 | i = 0; |
---|
1272 | 1831 | |
---|
1273 | 1832 | /* write last descriptor with RS and EOP bits */ |
---|
1274 | | - td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS); |
---|
| 1833 | + td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD; |
---|
1275 | 1834 | tx_desc->cmd_type_offset_bsz = |
---|
1276 | | - build_ctob(td_cmd, td_offset, size, td_tag); |
---|
| 1835 | + ice_build_ctob(td_cmd, td_offset, size, td_tag); |
---|
1277 | 1836 | |
---|
1278 | 1837 | /* Force memory writes to complete before letting h/w know there |
---|
1279 | 1838 | * are new descriptors to fetch. |
---|
.. | .. |
---|
1291 | 1850 | ice_maybe_stop_tx(tx_ring, DESC_NEEDED); |
---|
1292 | 1851 | |
---|
1293 | 1852 | /* notify HW of packet */ |
---|
1294 | | - if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { |
---|
| 1853 | + if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) |
---|
1295 | 1854 | writel(i, tx_ring->tail); |
---|
1296 | | - |
---|
1297 | | - /* we need this if more than one processor can write to our tail |
---|
1298 | | - * at a time, it synchronizes IO on IA64/Altix systems |
---|
1299 | | - */ |
---|
1300 | | - mmiowb(); |
---|
1301 | | - } |
---|
1302 | 1855 | |
---|
1303 | 1856 | return; |
---|
1304 | 1857 | |
---|
1305 | 1858 | dma_error: |
---|
1306 | | - /* clear dma mappings for failed tx_buf map */ |
---|
| 1859 | + /* clear DMA mappings for failed tx_buf map */ |
---|
1307 | 1860 | for (;;) { |
---|
1308 | 1861 | tx_buf = &tx_ring->tx_buf[i]; |
---|
1309 | 1862 | ice_unmap_and_free_tx_buf(tx_ring, tx_buf); |
---|
.. | .. |
---|
1353 | 1906 | l2_len = ip.hdr - skb->data; |
---|
1354 | 1907 | offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S; |
---|
1355 | 1908 | |
---|
1356 | | - if (skb->encapsulation) |
---|
1357 | | - return -1; |
---|
| 1909 | + protocol = vlan_get_protocol(skb); |
---|
| 1910 | + |
---|
| 1911 | + if (protocol == htons(ETH_P_IP)) |
---|
| 1912 | + first->tx_flags |= ICE_TX_FLAGS_IPV4; |
---|
| 1913 | + else if (protocol == htons(ETH_P_IPV6)) |
---|
| 1914 | + first->tx_flags |= ICE_TX_FLAGS_IPV6; |
---|
| 1915 | + |
---|
| 1916 | + if (skb->encapsulation) { |
---|
| 1917 | + bool gso_ena = false; |
---|
| 1918 | + u32 tunnel = 0; |
---|
| 1919 | + |
---|
| 1920 | + /* define outer network header type */ |
---|
| 1921 | + if (first->tx_flags & ICE_TX_FLAGS_IPV4) { |
---|
| 1922 | + tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ? |
---|
| 1923 | + ICE_TX_CTX_EIPT_IPV4 : |
---|
| 1924 | + ICE_TX_CTX_EIPT_IPV4_NO_CSUM; |
---|
| 1925 | + l4_proto = ip.v4->protocol; |
---|
| 1926 | + } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) { |
---|
| 1927 | + int ret; |
---|
| 1928 | + |
---|
| 1929 | + tunnel |= ICE_TX_CTX_EIPT_IPV6; |
---|
| 1930 | + exthdr = ip.hdr + sizeof(*ip.v6); |
---|
| 1931 | + l4_proto = ip.v6->nexthdr; |
---|
| 1932 | + ret = ipv6_skip_exthdr(skb, exthdr - skb->data, |
---|
| 1933 | + &l4_proto, &frag_off); |
---|
| 1934 | + if (ret < 0) |
---|
| 1935 | + return -1; |
---|
| 1936 | + } |
---|
| 1937 | + |
---|
| 1938 | + /* define outer transport */ |
---|
| 1939 | + switch (l4_proto) { |
---|
| 1940 | + case IPPROTO_UDP: |
---|
| 1941 | + tunnel |= ICE_TXD_CTX_UDP_TUNNELING; |
---|
| 1942 | + first->tx_flags |= ICE_TX_FLAGS_TUNNEL; |
---|
| 1943 | + break; |
---|
| 1944 | + case IPPROTO_GRE: |
---|
| 1945 | + tunnel |= ICE_TXD_CTX_GRE_TUNNELING; |
---|
| 1946 | + first->tx_flags |= ICE_TX_FLAGS_TUNNEL; |
---|
| 1947 | + break; |
---|
| 1948 | + case IPPROTO_IPIP: |
---|
| 1949 | + case IPPROTO_IPV6: |
---|
| 1950 | + first->tx_flags |= ICE_TX_FLAGS_TUNNEL; |
---|
| 1951 | + l4.hdr = skb_inner_network_header(skb); |
---|
| 1952 | + break; |
---|
| 1953 | + default: |
---|
| 1954 | + if (first->tx_flags & ICE_TX_FLAGS_TSO) |
---|
| 1955 | + return -1; |
---|
| 1956 | + |
---|
| 1957 | + skb_checksum_help(skb); |
---|
| 1958 | + return 0; |
---|
| 1959 | + } |
---|
| 1960 | + |
---|
| 1961 | + /* compute outer L3 header size */ |
---|
| 1962 | + tunnel |= ((l4.hdr - ip.hdr) / 4) << |
---|
| 1963 | + ICE_TXD_CTX_QW0_EIPLEN_S; |
---|
| 1964 | + |
---|
| 1965 | + /* switch IP header pointer from outer to inner header */ |
---|
| 1966 | + ip.hdr = skb_inner_network_header(skb); |
---|
| 1967 | + |
---|
| 1968 | + /* compute tunnel header size */ |
---|
| 1969 | + tunnel |= ((ip.hdr - l4.hdr) / 2) << |
---|
| 1970 | + ICE_TXD_CTX_QW0_NATLEN_S; |
---|
| 1971 | + |
---|
| 1972 | + gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; |
---|
| 1973 | + /* indicate if we need to offload outer UDP header */ |
---|
| 1974 | + if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena && |
---|
| 1975 | + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) |
---|
| 1976 | + tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M; |
---|
| 1977 | + |
---|
| 1978 | + /* record tunnel offload values */ |
---|
| 1979 | + off->cd_tunnel_params |= tunnel; |
---|
| 1980 | + |
---|
| 1981 | + /* set DTYP=1 to indicate that it's an Tx context descriptor |
---|
| 1982 | + * in IPsec tunnel mode with Tx offloads in Quad word 1 |
---|
| 1983 | + */ |
---|
| 1984 | + off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX; |
---|
| 1985 | + |
---|
| 1986 | + /* switch L4 header pointer from outer to inner */ |
---|
| 1987 | + l4.hdr = skb_inner_transport_header(skb); |
---|
| 1988 | + l4_proto = 0; |
---|
| 1989 | + |
---|
| 1990 | + /* reset type as we transition from outer to inner headers */ |
---|
| 1991 | + first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6); |
---|
| 1992 | + if (ip.v4->version == 4) |
---|
| 1993 | + first->tx_flags |= ICE_TX_FLAGS_IPV4; |
---|
| 1994 | + if (ip.v6->version == 6) |
---|
| 1995 | + first->tx_flags |= ICE_TX_FLAGS_IPV6; |
---|
| 1996 | + } |
---|
1358 | 1997 | |
---|
1359 | 1998 | /* Enable IP checksum offloads */ |
---|
1360 | | - protocol = vlan_get_protocol(skb); |
---|
1361 | | - if (protocol == htons(ETH_P_IP)) { |
---|
| 1999 | + if (first->tx_flags & ICE_TX_FLAGS_IPV4) { |
---|
1362 | 2000 | l4_proto = ip.v4->protocol; |
---|
1363 | 2001 | /* the stack computes the IP header already, the only time we |
---|
1364 | 2002 | * need the hardware to recompute it is in the case of TSO. |
---|
.. | .. |
---|
1368 | 2006 | else |
---|
1369 | 2007 | cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; |
---|
1370 | 2008 | |
---|
1371 | | - } else if (protocol == htons(ETH_P_IPV6)) { |
---|
| 2009 | + } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) { |
---|
1372 | 2010 | cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; |
---|
1373 | 2011 | exthdr = ip.hdr + sizeof(*ip.v6); |
---|
1374 | 2012 | l4_proto = ip.v6->nexthdr; |
---|
.. | .. |
---|
1398 | 2036 | offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; |
---|
1399 | 2037 | break; |
---|
1400 | 2038 | case IPPROTO_SCTP: |
---|
| 2039 | + /* enable SCTP checksum offload */ |
---|
| 2040 | + cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP; |
---|
| 2041 | + l4_len = sizeof(struct sctphdr) >> 2; |
---|
| 2042 | + offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; |
---|
| 2043 | + break; |
---|
| 2044 | + |
---|
1401 | 2045 | default: |
---|
1402 | 2046 | if (first->tx_flags & ICE_TX_FLAGS_TSO) |
---|
1403 | 2047 | return -1; |
---|
.. | .. |
---|
1411 | 2055 | } |
---|
1412 | 2056 | |
---|
1413 | 2057 | /** |
---|
1414 | | - * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW |
---|
| 2058 | + * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW |
---|
1415 | 2059 | * @tx_ring: ring to send buffer on |
---|
1416 | 2060 | * @first: pointer to struct ice_tx_buf |
---|
1417 | 2061 | * |
---|
1418 | 2062 | * Checks the skb and set up correspondingly several generic transmit flags |
---|
1419 | 2063 | * related to VLAN tagging for the HW, such as VLAN, DCB, etc. |
---|
1420 | | - * |
---|
1421 | | - * Returns error code indicate the frame should be dropped upon error and the |
---|
1422 | | - * otherwise returns 0 to indicate the flags has been set properly. |
---|
1423 | 2064 | */ |
---|
1424 | | -static int |
---|
| 2065 | +static void |
---|
1425 | 2066 | ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first) |
---|
1426 | 2067 | { |
---|
1427 | 2068 | struct sk_buff *skb = first->skb; |
---|
1428 | | - __be16 protocol = skb->protocol; |
---|
1429 | 2069 | |
---|
1430 | | - if (protocol == htons(ETH_P_8021Q) && |
---|
1431 | | - !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { |
---|
1432 | | - /* when HW VLAN acceleration is turned off by the user the |
---|
1433 | | - * stack sets the protocol to 8021q so that the driver |
---|
1434 | | - * can take any steps required to support the SW only |
---|
1435 | | - * VLAN handling. In our case the driver doesn't need |
---|
1436 | | - * to take any further steps so just set the protocol |
---|
1437 | | - * to the encapsulated ethertype. |
---|
1438 | | - */ |
---|
1439 | | - skb->protocol = vlan_get_protocol(skb); |
---|
1440 | | - goto out; |
---|
1441 | | - } |
---|
| 2070 | + /* nothing left to do, software offloaded VLAN */ |
---|
| 2071 | + if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) |
---|
| 2072 | + return; |
---|
1442 | 2073 | |
---|
1443 | | - /* if we have a HW VLAN tag being added, default to the HW one */ |
---|
| 2074 | + /* currently, we always assume 802.1Q for VLAN insertion as VLAN |
---|
| 2075 | + * insertion for 802.1AD is not supported |
---|
| 2076 | + */ |
---|
1444 | 2077 | if (skb_vlan_tag_present(skb)) { |
---|
1445 | 2078 | first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S; |
---|
1446 | 2079 | first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; |
---|
1447 | | - } else if (protocol == htons(ETH_P_8021Q)) { |
---|
1448 | | - struct vlan_hdr *vhdr, _vhdr; |
---|
1449 | | - |
---|
1450 | | - /* for SW VLAN, check the next protocol and store the tag */ |
---|
1451 | | - vhdr = (struct vlan_hdr *)skb_header_pointer(skb, ETH_HLEN, |
---|
1452 | | - sizeof(_vhdr), |
---|
1453 | | - &_vhdr); |
---|
1454 | | - if (!vhdr) |
---|
1455 | | - return -EINVAL; |
---|
1456 | | - |
---|
1457 | | - first->tx_flags |= ntohs(vhdr->h_vlan_TCI) << |
---|
1458 | | - ICE_TX_FLAGS_VLAN_S; |
---|
1459 | | - first->tx_flags |= ICE_TX_FLAGS_SW_VLAN; |
---|
1460 | 2080 | } |
---|
1461 | 2081 | |
---|
1462 | | -out: |
---|
1463 | | - return 0; |
---|
| 2082 | + ice_tx_prepare_vlan_flags_dcb(tx_ring, first); |
---|
1464 | 2083 | } |
---|
1465 | 2084 | |
---|
1466 | 2085 | /** |
---|
.. | .. |
---|
1481 | 2100 | } ip; |
---|
1482 | 2101 | union { |
---|
1483 | 2102 | struct tcphdr *tcp; |
---|
| 2103 | + struct udphdr *udp; |
---|
1484 | 2104 | unsigned char *hdr; |
---|
1485 | 2105 | } l4; |
---|
1486 | 2106 | u64 cd_mss, cd_tso_len; |
---|
1487 | | - u32 paylen, l4_start; |
---|
| 2107 | + u32 paylen; |
---|
| 2108 | + u8 l4_start; |
---|
1488 | 2109 | int err; |
---|
1489 | 2110 | |
---|
1490 | 2111 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
---|
.. | .. |
---|
1497 | 2118 | if (err < 0) |
---|
1498 | 2119 | return err; |
---|
1499 | 2120 | |
---|
| 2121 | + /* cppcheck-suppress unreadVariable */ |
---|
1500 | 2122 | ip.hdr = skb_network_header(skb); |
---|
1501 | 2123 | l4.hdr = skb_transport_header(skb); |
---|
1502 | 2124 | |
---|
.. | .. |
---|
1508 | 2130 | ip.v6->payload_len = 0; |
---|
1509 | 2131 | } |
---|
1510 | 2132 | |
---|
| 2133 | + if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | |
---|
| 2134 | + SKB_GSO_GRE_CSUM | |
---|
| 2135 | + SKB_GSO_IPXIP4 | |
---|
| 2136 | + SKB_GSO_IPXIP6 | |
---|
| 2137 | + SKB_GSO_UDP_TUNNEL | |
---|
| 2138 | + SKB_GSO_UDP_TUNNEL_CSUM)) { |
---|
| 2139 | + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && |
---|
| 2140 | + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { |
---|
| 2141 | + l4.udp->len = 0; |
---|
| 2142 | + |
---|
| 2143 | + /* determine offset of outer transport header */ |
---|
| 2144 | + l4_start = (u8)(l4.hdr - skb->data); |
---|
| 2145 | + |
---|
| 2146 | + /* remove payload length from outer checksum */ |
---|
| 2147 | + paylen = skb->len - l4_start; |
---|
| 2148 | + csum_replace_by_diff(&l4.udp->check, |
---|
| 2149 | + (__force __wsum)htonl(paylen)); |
---|
| 2150 | + } |
---|
| 2151 | + |
---|
| 2152 | + /* reset pointers to inner headers */ |
---|
| 2153 | + |
---|
| 2154 | + /* cppcheck-suppress unreadVariable */ |
---|
| 2155 | + ip.hdr = skb_inner_network_header(skb); |
---|
| 2156 | + l4.hdr = skb_inner_transport_header(skb); |
---|
| 2157 | + |
---|
| 2158 | + /* initialize inner IP header fields */ |
---|
| 2159 | + if (ip.v4->version == 4) { |
---|
| 2160 | + ip.v4->tot_len = 0; |
---|
| 2161 | + ip.v4->check = 0; |
---|
| 2162 | + } else { |
---|
| 2163 | + ip.v6->payload_len = 0; |
---|
| 2164 | + } |
---|
| 2165 | + } |
---|
| 2166 | + |
---|
1511 | 2167 | /* determine offset of transport header */ |
---|
1512 | | - l4_start = l4.hdr - skb->data; |
---|
| 2168 | + l4_start = (u8)(l4.hdr - skb->data); |
---|
1513 | 2169 | |
---|
1514 | 2170 | /* remove payload length from checksum */ |
---|
1515 | 2171 | paylen = skb->len - l4_start; |
---|
1516 | | - csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); |
---|
1517 | 2172 | |
---|
1518 | | - /* compute length of segmentation header */ |
---|
1519 | | - off->header_len = (l4.tcp->doff * 4) + l4_start; |
---|
| 2173 | + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { |
---|
| 2174 | + csum_replace_by_diff(&l4.udp->check, |
---|
| 2175 | + (__force __wsum)htonl(paylen)); |
---|
| 2176 | + /* compute length of UDP segmentation header */ |
---|
| 2177 | + off->header_len = (u8)sizeof(l4.udp) + l4_start; |
---|
| 2178 | + } else { |
---|
| 2179 | + csum_replace_by_diff(&l4.tcp->check, |
---|
| 2180 | + (__force __wsum)htonl(paylen)); |
---|
| 2181 | + /* compute length of TCP segmentation header */ |
---|
| 2182 | + off->header_len = (u8)((l4.tcp->doff * 4) + l4_start); |
---|
| 2183 | + } |
---|
1520 | 2184 | |
---|
1521 | 2185 | /* update gso_segs and bytecount */ |
---|
1522 | 2186 | first->gso_segs = skb_shinfo(skb)->gso_segs; |
---|
.. | .. |
---|
1526 | 2190 | cd_mss = skb_shinfo(skb)->gso_size; |
---|
1527 | 2191 | |
---|
1528 | 2192 | /* record cdesc_qw1 with TSO parameters */ |
---|
1529 | | - off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX | |
---|
1530 | | - (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) | |
---|
1531 | | - (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) | |
---|
1532 | | - (cd_mss << ICE_TXD_CTX_QW1_MSS_S); |
---|
| 2193 | + off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | |
---|
| 2194 | + (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) | |
---|
| 2195 | + (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) | |
---|
| 2196 | + (cd_mss << ICE_TXD_CTX_QW1_MSS_S)); |
---|
1533 | 2197 | first->tx_flags |= ICE_TX_FLAGS_TSO; |
---|
1534 | 2198 | return 1; |
---|
1535 | 2199 | } |
---|
.. | .. |
---|
1552 | 2216 | * Finally, we add one to round up. Because 256 isn't an exact multiple of |
---|
1553 | 2217 | * 3, we'll underestimate near each multiple of 12K. This is actually more |
---|
1554 | 2218 | * accurate as we have 4K - 1 of wiggle room that we can fit into the last |
---|
1555 | | - * segment. For our purposes this is accurate out to 1M which is orders of |
---|
| 2219 | + * segment. For our purposes this is accurate out to 1M which is orders of |
---|
1556 | 2220 | * magnitude greater than our largest possible GSO size. |
---|
1557 | 2221 | * |
---|
1558 | 2222 | * This would then be implemented as: |
---|
1559 | | - * return (((size >> 12) * 85) >> 8) + 1; |
---|
| 2223 | + * return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR; |
---|
1560 | 2224 | * |
---|
1561 | 2225 | * Since multiplication and division are commutative, we can reorder |
---|
1562 | 2226 | * operations into: |
---|
1563 | | - * return ((size * 85) >> 20) + 1; |
---|
| 2227 | + * return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR; |
---|
1564 | 2228 | */ |
---|
1565 | 2229 | static unsigned int ice_txd_use_count(unsigned int size) |
---|
1566 | 2230 | { |
---|
1567 | | - return ((size * 85) >> 20) + 1; |
---|
| 2231 | + return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR; |
---|
1568 | 2232 | } |
---|
1569 | 2233 | |
---|
1570 | 2234 | /** |
---|
1571 | | - * ice_xmit_desc_count - calculate number of tx descriptors needed |
---|
| 2235 | + * ice_xmit_desc_count - calculate number of Tx descriptors needed |
---|
1572 | 2236 | * @skb: send buffer |
---|
1573 | 2237 | * |
---|
1574 | 2238 | * Returns number of data descriptors needed for this skb. |
---|
1575 | 2239 | */ |
---|
1576 | 2240 | static unsigned int ice_xmit_desc_count(struct sk_buff *skb) |
---|
1577 | 2241 | { |
---|
1578 | | - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; |
---|
| 2242 | + const skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; |
---|
1579 | 2243 | unsigned int nr_frags = skb_shinfo(skb)->nr_frags; |
---|
1580 | 2244 | unsigned int count = 0, size = skb_headlen(skb); |
---|
1581 | 2245 | |
---|
.. | .. |
---|
1606 | 2270 | */ |
---|
1607 | 2271 | static bool __ice_chk_linearize(struct sk_buff *skb) |
---|
1608 | 2272 | { |
---|
1609 | | - const struct skb_frag_struct *frag, *stale; |
---|
| 2273 | + const skb_frag_t *frag, *stale; |
---|
1610 | 2274 | int nr_frags, sum; |
---|
1611 | 2275 | |
---|
1612 | 2276 | /* no need to check if number of frags is less than 7 */ |
---|
.. | .. |
---|
1620 | 2284 | nr_frags -= ICE_MAX_BUF_TXD - 2; |
---|
1621 | 2285 | frag = &skb_shinfo(skb)->frags[0]; |
---|
1622 | 2286 | |
---|
1623 | | - /* Initialize size to the negative value of gso_size minus 1. We |
---|
1624 | | - * use this as the worst case scenerio in which the frag ahead |
---|
| 2287 | + /* Initialize size to the negative value of gso_size minus 1. We |
---|
| 2288 | + * use this as the worst case scenario in which the frag ahead |
---|
1625 | 2289 | * of us only provides one byte which is why we are limited to 6 |
---|
1626 | 2290 | * descriptors for a single transmit as the header and previous |
---|
1627 | 2291 | * fragment are already consuming 2 descriptors. |
---|
.. | .. |
---|
1638 | 2302 | /* Walk through fragments adding latest fragment, testing it, and |
---|
1639 | 2303 | * then removing stale fragments from the sum. |
---|
1640 | 2304 | */ |
---|
1641 | | - stale = &skb_shinfo(skb)->frags[0]; |
---|
1642 | | - for (;;) { |
---|
| 2305 | + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { |
---|
| 2306 | + int stale_size = skb_frag_size(stale); |
---|
| 2307 | + |
---|
1643 | 2308 | sum += skb_frag_size(frag++); |
---|
| 2309 | + |
---|
| 2310 | + /* The stale fragment may present us with a smaller |
---|
| 2311 | + * descriptor than the actual fragment size. To account |
---|
| 2312 | + * for that we need to remove all the data on the front and |
---|
| 2313 | + * figure out what the remainder would be in the last |
---|
| 2314 | + * descriptor associated with the fragment. |
---|
| 2315 | + */ |
---|
| 2316 | + if (stale_size > ICE_MAX_DATA_PER_TXD) { |
---|
| 2317 | + int align_pad = -(skb_frag_off(stale)) & |
---|
| 2318 | + (ICE_MAX_READ_REQ_SIZE - 1); |
---|
| 2319 | + |
---|
| 2320 | + sum -= align_pad; |
---|
| 2321 | + stale_size -= align_pad; |
---|
| 2322 | + |
---|
| 2323 | + do { |
---|
| 2324 | + sum -= ICE_MAX_DATA_PER_TXD_ALIGNED; |
---|
| 2325 | + stale_size -= ICE_MAX_DATA_PER_TXD_ALIGNED; |
---|
| 2326 | + } while (stale_size > ICE_MAX_DATA_PER_TXD); |
---|
| 2327 | + } |
---|
1644 | 2328 | |
---|
1645 | 2329 | /* if sum is negative we failed to make sufficient progress */ |
---|
1646 | 2330 | if (sum < 0) |
---|
.. | .. |
---|
1649 | 2333 | if (!nr_frags--) |
---|
1650 | 2334 | break; |
---|
1651 | 2335 | |
---|
1652 | | - sum -= skb_frag_size(stale++); |
---|
| 2336 | + sum -= stale_size; |
---|
1653 | 2337 | } |
---|
1654 | 2338 | |
---|
1655 | 2339 | return false; |
---|
.. | .. |
---|
1688 | 2372 | ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) |
---|
1689 | 2373 | { |
---|
1690 | 2374 | struct ice_tx_offload_params offload = { 0 }; |
---|
| 2375 | + struct ice_vsi *vsi = tx_ring->vsi; |
---|
1691 | 2376 | struct ice_tx_buf *first; |
---|
| 2377 | + struct ethhdr *eth; |
---|
1692 | 2378 | unsigned int count; |
---|
1693 | 2379 | int tso, csum; |
---|
1694 | 2380 | |
---|
.. | .. |
---|
1706 | 2392 | * + 1 desc for context descriptor, |
---|
1707 | 2393 | * otherwise try next time |
---|
1708 | 2394 | */ |
---|
1709 | | - if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) { |
---|
| 2395 | + if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + |
---|
| 2396 | + ICE_DESCS_FOR_CTX_DESC)) { |
---|
1710 | 2397 | tx_ring->tx_stats.tx_busy++; |
---|
1711 | 2398 | return NETDEV_TX_BUSY; |
---|
1712 | 2399 | } |
---|
.. | .. |
---|
1721 | 2408 | first->tx_flags = 0; |
---|
1722 | 2409 | |
---|
1723 | 2410 | /* prepare the VLAN tagging flags for Tx */ |
---|
1724 | | - if (ice_tx_prepare_vlan_flags(tx_ring, first)) |
---|
1725 | | - goto out_drop; |
---|
| 2411 | + ice_tx_prepare_vlan_flags(tx_ring, first); |
---|
1726 | 2412 | |
---|
1727 | 2413 | /* set up TSO offload */ |
---|
1728 | 2414 | tso = ice_tso(first, &offload); |
---|
.. | .. |
---|
1734 | 2420 | if (csum < 0) |
---|
1735 | 2421 | goto out_drop; |
---|
1736 | 2422 | |
---|
1737 | | - if (tso || offload.cd_tunnel_params) { |
---|
| 2423 | + /* allow CONTROL frames egress from main VSI if FW LLDP disabled */ |
---|
| 2424 | + eth = (struct ethhdr *)skb_mac_header(skb); |
---|
| 2425 | + if (unlikely((skb->priority == TC_PRIO_CONTROL || |
---|
| 2426 | + eth->h_proto == htons(ETH_P_LLDP)) && |
---|
| 2427 | + vsi->type == ICE_VSI_PF && |
---|
| 2428 | + vsi->port_info->qos_cfg.is_sw_lldp)) |
---|
| 2429 | + offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | |
---|
| 2430 | + ICE_TX_CTX_DESC_SWTCH_UPLINK << |
---|
| 2431 | + ICE_TXD_CTX_QW1_CMD_S); |
---|
| 2432 | + |
---|
| 2433 | + if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { |
---|
1738 | 2434 | struct ice_tx_ctx_desc *cdesc; |
---|
1739 | | - int i = tx_ring->next_to_use; |
---|
| 2435 | + u16 i = tx_ring->next_to_use; |
---|
1740 | 2436 | |
---|
1741 | 2437 | /* grab the next descriptor */ |
---|
1742 | 2438 | cdesc = ICE_TX_CTX_DESC(tx_ring, i); |
---|
.. | .. |
---|
1781 | 2477 | |
---|
1782 | 2478 | return ice_xmit_frame_ring(skb, tx_ring); |
---|
1783 | 2479 | } |
---|
| 2480 | + |
---|
| 2481 | +/** |
---|
| 2482 | + * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue |
---|
| 2483 | + * @tx_ring: tx_ring to clean |
---|
| 2484 | + */ |
---|
| 2485 | +void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring) |
---|
| 2486 | +{ |
---|
| 2487 | + struct ice_vsi *vsi = tx_ring->vsi; |
---|
| 2488 | + s16 i = tx_ring->next_to_clean; |
---|
| 2489 | + int budget = ICE_DFLT_IRQ_WORK; |
---|
| 2490 | + struct ice_tx_desc *tx_desc; |
---|
| 2491 | + struct ice_tx_buf *tx_buf; |
---|
| 2492 | + |
---|
| 2493 | + tx_buf = &tx_ring->tx_buf[i]; |
---|
| 2494 | + tx_desc = ICE_TX_DESC(tx_ring, i); |
---|
| 2495 | + i -= tx_ring->count; |
---|
| 2496 | + |
---|
| 2497 | + do { |
---|
| 2498 | + struct ice_tx_desc *eop_desc = tx_buf->next_to_watch; |
---|
| 2499 | + |
---|
| 2500 | + /* if next_to_watch is not set then there is no pending work */ |
---|
| 2501 | + if (!eop_desc) |
---|
| 2502 | + break; |
---|
| 2503 | + |
---|
| 2504 | + /* prevent any other reads prior to eop_desc */ |
---|
| 2505 | + smp_rmb(); |
---|
| 2506 | + |
---|
| 2507 | + /* if the descriptor isn't done, no work to do */ |
---|
| 2508 | + if (!(eop_desc->cmd_type_offset_bsz & |
---|
| 2509 | + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) |
---|
| 2510 | + break; |
---|
| 2511 | + |
---|
| 2512 | + /* clear next_to_watch to prevent false hangs */ |
---|
| 2513 | + tx_buf->next_to_watch = NULL; |
---|
| 2514 | + tx_desc->buf_addr = 0; |
---|
| 2515 | + tx_desc->cmd_type_offset_bsz = 0; |
---|
| 2516 | + |
---|
| 2517 | + /* move past filter desc */ |
---|
| 2518 | + tx_buf++; |
---|
| 2519 | + tx_desc++; |
---|
| 2520 | + i++; |
---|
| 2521 | + if (unlikely(!i)) { |
---|
| 2522 | + i -= tx_ring->count; |
---|
| 2523 | + tx_buf = tx_ring->tx_buf; |
---|
| 2524 | + tx_desc = ICE_TX_DESC(tx_ring, 0); |
---|
| 2525 | + } |
---|
| 2526 | + |
---|
| 2527 | + /* unmap the data header */ |
---|
| 2528 | + if (dma_unmap_len(tx_buf, len)) |
---|
| 2529 | + dma_unmap_single(tx_ring->dev, |
---|
| 2530 | + dma_unmap_addr(tx_buf, dma), |
---|
| 2531 | + dma_unmap_len(tx_buf, len), |
---|
| 2532 | + DMA_TO_DEVICE); |
---|
| 2533 | + if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) |
---|
| 2534 | + devm_kfree(tx_ring->dev, tx_buf->raw_buf); |
---|
| 2535 | + |
---|
| 2536 | + /* clear next_to_watch to prevent false hangs */ |
---|
| 2537 | + tx_buf->raw_buf = NULL; |
---|
| 2538 | + tx_buf->tx_flags = 0; |
---|
| 2539 | + tx_buf->next_to_watch = NULL; |
---|
| 2540 | + dma_unmap_len_set(tx_buf, len, 0); |
---|
| 2541 | + tx_desc->buf_addr = 0; |
---|
| 2542 | + tx_desc->cmd_type_offset_bsz = 0; |
---|
| 2543 | + |
---|
| 2544 | + /* move past eop_desc for start of next FD desc */ |
---|
| 2545 | + tx_buf++; |
---|
| 2546 | + tx_desc++; |
---|
| 2547 | + i++; |
---|
| 2548 | + if (unlikely(!i)) { |
---|
| 2549 | + i -= tx_ring->count; |
---|
| 2550 | + tx_buf = tx_ring->tx_buf; |
---|
| 2551 | + tx_desc = ICE_TX_DESC(tx_ring, 0); |
---|
| 2552 | + } |
---|
| 2553 | + |
---|
| 2554 | + budget--; |
---|
| 2555 | + } while (likely(budget)); |
---|
| 2556 | + |
---|
| 2557 | + i += tx_ring->count; |
---|
| 2558 | + tx_ring->next_to_clean = i; |
---|
| 2559 | + |
---|
| 2560 | + /* re-enable interrupt if needed */ |
---|
| 2561 | + ice_irq_dynamic_ena(&vsi->back->hw, vsi, vsi->q_vectors[0]); |
---|
| 2562 | +} |
---|