hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/net/ethernet/sfc/rx.c
....@@ -1,11 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /****************************************************************************
23 * Driver for Solarflare network controllers and boards
34 * Copyright 2005-2006 Fen Systems Ltd.
45 * Copyright 2005-2013 Solarflare Communications Inc.
5
- *
6
- * This program is free software; you can redistribute it and/or modify it
7
- * under the terms of the GNU General Public License version 2 as published
8
- * by the Free Software Foundation, incorporated herein by reference.
96 */
107
118 #include <linux/socket.h>
....@@ -20,8 +17,11 @@
2017 #include <linux/iommu.h>
2118 #include <net/ip.h>
2219 #include <net/checksum.h>
20
+#include <net/xdp.h>
21
+#include <linux/bpf_trace.h>
2322 #include "net_driver.h"
2423 #include "efx.h"
24
+#include "rx_common.h"
2525 #include "filter.h"
2626 #include "nic.h"
2727 #include "selftest.h"
....@@ -30,360 +30,15 @@
3030 /* Preferred number of descriptors to fill at once */
3131 #define EFX_RX_PREFERRED_BATCH 8U
3232
33
-/* Number of RX buffers to recycle pages for. When creating the RX page recycle
34
- * ring, this number is divided by the number of buffers per page to calculate
35
- * the number of pages to store in the RX page recycle ring.
36
- */
37
-#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
38
-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
33
+/* Maximum rx prefix used by any architecture. */
34
+#define EFX_MAX_RX_PREFIX_SIZE 16
3935
4036 /* Size of buffer allocated for skb header area. */
4137 #define EFX_SKB_HEADERS 128u
4238
43
-/* This is the percentage fill level below which new RX descriptors
44
- * will be added to the RX descriptor ring.
45
- */
46
-static unsigned int rx_refill_threshold;
47
-
4839 /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
4940 #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
5041 EFX_RX_USR_BUF_SIZE)
51
-
52
-/*
53
- * RX maximum head room required.
54
- *
55
- * This must be at least 1 to prevent overflow, plus one packet-worth
56
- * to allow pipelined receives.
57
- */
58
-#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
59
-
60
-static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
61
-{
62
- return page_address(buf->page) + buf->page_offset;
63
-}
64
-
65
-static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
66
-{
67
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
68
- return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
69
-#else
70
- const u8 *data = eh + efx->rx_packet_hash_offset;
71
- return (u32)data[0] |
72
- (u32)data[1] << 8 |
73
- (u32)data[2] << 16 |
74
- (u32)data[3] << 24;
75
-#endif
76
-}
77
-
78
-static inline struct efx_rx_buffer *
79
-efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
80
-{
81
- if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
82
- return efx_rx_buffer(rx_queue, 0);
83
- else
84
- return rx_buf + 1;
85
-}
86
-
87
-static inline void efx_sync_rx_buffer(struct efx_nic *efx,
88
- struct efx_rx_buffer *rx_buf,
89
- unsigned int len)
90
-{
91
- dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
92
- DMA_FROM_DEVICE);
93
-}
94
-
95
-void efx_rx_config_page_split(struct efx_nic *efx)
96
-{
97
- efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align,
98
- EFX_RX_BUF_ALIGNMENT);
99
- efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
100
- ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
101
- efx->rx_page_buf_step);
102
- efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
103
- efx->rx_bufs_per_page;
104
- efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
105
- efx->rx_bufs_per_page);
106
-}
107
-
108
-/* Check the RX page recycle ring for a page that can be reused. */
109
-static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
110
-{
111
- struct efx_nic *efx = rx_queue->efx;
112
- struct page *page;
113
- struct efx_rx_page_state *state;
114
- unsigned index;
115
-
116
- index = rx_queue->page_remove & rx_queue->page_ptr_mask;
117
- page = rx_queue->page_ring[index];
118
- if (page == NULL)
119
- return NULL;
120
-
121
- rx_queue->page_ring[index] = NULL;
122
- /* page_remove cannot exceed page_add. */
123
- if (rx_queue->page_remove != rx_queue->page_add)
124
- ++rx_queue->page_remove;
125
-
126
- /* If page_count is 1 then we hold the only reference to this page. */
127
- if (page_count(page) == 1) {
128
- ++rx_queue->page_recycle_count;
129
- return page;
130
- } else {
131
- state = page_address(page);
132
- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
133
- PAGE_SIZE << efx->rx_buffer_order,
134
- DMA_FROM_DEVICE);
135
- put_page(page);
136
- ++rx_queue->page_recycle_failed;
137
- }
138
-
139
- return NULL;
140
-}
141
-
142
-/**
143
- * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
144
- *
145
- * @rx_queue: Efx RX queue
146
- *
147
- * This allocates a batch of pages, maps them for DMA, and populates
148
- * struct efx_rx_buffers for each one. Return a negative error code or
149
- * 0 on success. If a single page can be used for multiple buffers,
150
- * then the page will either be inserted fully, or not at all.
151
- */
152
-static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
153
-{
154
- struct efx_nic *efx = rx_queue->efx;
155
- struct efx_rx_buffer *rx_buf;
156
- struct page *page;
157
- unsigned int page_offset;
158
- struct efx_rx_page_state *state;
159
- dma_addr_t dma_addr;
160
- unsigned index, count;
161
-
162
- count = 0;
163
- do {
164
- page = efx_reuse_page(rx_queue);
165
- if (page == NULL) {
166
- page = alloc_pages(__GFP_COMP |
167
- (atomic ? GFP_ATOMIC : GFP_KERNEL),
168
- efx->rx_buffer_order);
169
- if (unlikely(page == NULL))
170
- return -ENOMEM;
171
- dma_addr =
172
- dma_map_page(&efx->pci_dev->dev, page, 0,
173
- PAGE_SIZE << efx->rx_buffer_order,
174
- DMA_FROM_DEVICE);
175
- if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
176
- dma_addr))) {
177
- __free_pages(page, efx->rx_buffer_order);
178
- return -EIO;
179
- }
180
- state = page_address(page);
181
- state->dma_addr = dma_addr;
182
- } else {
183
- state = page_address(page);
184
- dma_addr = state->dma_addr;
185
- }
186
-
187
- dma_addr += sizeof(struct efx_rx_page_state);
188
- page_offset = sizeof(struct efx_rx_page_state);
189
-
190
- do {
191
- index = rx_queue->added_count & rx_queue->ptr_mask;
192
- rx_buf = efx_rx_buffer(rx_queue, index);
193
- rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
194
- rx_buf->page = page;
195
- rx_buf->page_offset = page_offset + efx->rx_ip_align;
196
- rx_buf->len = efx->rx_dma_len;
197
- rx_buf->flags = 0;
198
- ++rx_queue->added_count;
199
- get_page(page);
200
- dma_addr += efx->rx_page_buf_step;
201
- page_offset += efx->rx_page_buf_step;
202
- } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
203
-
204
- rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
205
- } while (++count < efx->rx_pages_per_batch);
206
-
207
- return 0;
208
-}
209
-
210
-/* Unmap a DMA-mapped page. This function is only called for the final RX
211
- * buffer in a page.
212
- */
213
-static void efx_unmap_rx_buffer(struct efx_nic *efx,
214
- struct efx_rx_buffer *rx_buf)
215
-{
216
- struct page *page = rx_buf->page;
217
-
218
- if (page) {
219
- struct efx_rx_page_state *state = page_address(page);
220
- dma_unmap_page(&efx->pci_dev->dev,
221
- state->dma_addr,
222
- PAGE_SIZE << efx->rx_buffer_order,
223
- DMA_FROM_DEVICE);
224
- }
225
-}
226
-
227
-static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
228
- struct efx_rx_buffer *rx_buf,
229
- unsigned int num_bufs)
230
-{
231
- do {
232
- if (rx_buf->page) {
233
- put_page(rx_buf->page);
234
- rx_buf->page = NULL;
235
- }
236
- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
237
- } while (--num_bufs);
238
-}
239
-
240
-/* Attempt to recycle the page if there is an RX recycle ring; the page can
241
- * only be added if this is the final RX buffer, to prevent pages being used in
242
- * the descriptor ring and appearing in the recycle ring simultaneously.
243
- */
244
-static void efx_recycle_rx_page(struct efx_channel *channel,
245
- struct efx_rx_buffer *rx_buf)
246
-{
247
- struct page *page = rx_buf->page;
248
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
249
- struct efx_nic *efx = rx_queue->efx;
250
- unsigned index;
251
-
252
- /* Only recycle the page after processing the final buffer. */
253
- if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
254
- return;
255
-
256
- index = rx_queue->page_add & rx_queue->page_ptr_mask;
257
- if (rx_queue->page_ring[index] == NULL) {
258
- unsigned read_index = rx_queue->page_remove &
259
- rx_queue->page_ptr_mask;
260
-
261
- /* The next slot in the recycle ring is available, but
262
- * increment page_remove if the read pointer currently
263
- * points here.
264
- */
265
- if (read_index == index)
266
- ++rx_queue->page_remove;
267
- rx_queue->page_ring[index] = page;
268
- ++rx_queue->page_add;
269
- return;
270
- }
271
- ++rx_queue->page_recycle_full;
272
- efx_unmap_rx_buffer(efx, rx_buf);
273
- put_page(rx_buf->page);
274
-}
275
-
276
-static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
277
- struct efx_rx_buffer *rx_buf)
278
-{
279
- /* Release the page reference we hold for the buffer. */
280
- if (rx_buf->page)
281
- put_page(rx_buf->page);
282
-
283
- /* If this is the last buffer in a page, unmap and free it. */
284
- if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
285
- efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
286
- efx_free_rx_buffers(rx_queue, rx_buf, 1);
287
- }
288
- rx_buf->page = NULL;
289
-}
290
-
291
-/* Recycle the pages that are used by buffers that have just been received. */
292
-static void efx_recycle_rx_pages(struct efx_channel *channel,
293
- struct efx_rx_buffer *rx_buf,
294
- unsigned int n_frags)
295
-{
296
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
297
-
298
- do {
299
- efx_recycle_rx_page(channel, rx_buf);
300
- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
301
- } while (--n_frags);
302
-}
303
-
304
-static void efx_discard_rx_packet(struct efx_channel *channel,
305
- struct efx_rx_buffer *rx_buf,
306
- unsigned int n_frags)
307
-{
308
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
309
-
310
- efx_recycle_rx_pages(channel, rx_buf, n_frags);
311
-
312
- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
313
-}
314
-
315
-/**
316
- * efx_fast_push_rx_descriptors - push new RX descriptors quickly
317
- * @rx_queue: RX descriptor queue
318
- *
319
- * This will aim to fill the RX descriptor queue up to
320
- * @rx_queue->@max_fill. If there is insufficient atomic
321
- * memory to do so, a slow fill will be scheduled.
322
- *
323
- * The caller must provide serialisation (none is used here). In practise,
324
- * this means this function must run from the NAPI handler, or be called
325
- * when NAPI is disabled.
326
- */
327
-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
328
-{
329
- struct efx_nic *efx = rx_queue->efx;
330
- unsigned int fill_level, batch_size;
331
- int space, rc = 0;
332
-
333
- if (!rx_queue->refill_enabled)
334
- return;
335
-
336
- /* Calculate current fill level, and exit if we don't need to fill */
337
- fill_level = (rx_queue->added_count - rx_queue->removed_count);
338
- EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
339
- if (fill_level >= rx_queue->fast_fill_trigger)
340
- goto out;
341
-
342
- /* Record minimum fill level */
343
- if (unlikely(fill_level < rx_queue->min_fill)) {
344
- if (fill_level)
345
- rx_queue->min_fill = fill_level;
346
- }
347
-
348
- batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
349
- space = rx_queue->max_fill - fill_level;
350
- EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
351
-
352
- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
353
- "RX queue %d fast-filling descriptor ring from"
354
- " level %d to level %d\n",
355
- efx_rx_queue_index(rx_queue), fill_level,
356
- rx_queue->max_fill);
357
-
358
-
359
- do {
360
- rc = efx_init_rx_buffers(rx_queue, atomic);
361
- if (unlikely(rc)) {
362
- /* Ensure that we don't leave the rx queue empty */
363
- if (rx_queue->added_count == rx_queue->removed_count)
364
- efx_schedule_slow_fill(rx_queue);
365
- goto out;
366
- }
367
- } while ((space -= batch_size) >= batch_size);
368
-
369
- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
370
- "RX queue %d fast-filled descriptor ring "
371
- "to level %d\n", efx_rx_queue_index(rx_queue),
372
- rx_queue->added_count - rx_queue->removed_count);
373
-
374
- out:
375
- if (rx_queue->notified_count != rx_queue->added_count)
376
- efx_nic_notify_rx_desc(rx_queue);
377
-}
378
-
379
-void efx_rx_slow_fill(struct timer_list *t)
380
-{
381
- struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
382
-
383
- /* Post an event to cause NAPI to run and refill the queue */
384
- efx_nic_generate_fill_event(rx_queue);
385
- ++rx_queue->slow_fill_count;
386
-}
38742
38843 static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
38944 struct efx_rx_buffer *rx_buf,
....@@ -406,56 +61,6 @@
40661 efx_rx_queue_index(rx_queue), len, max_len);
40762
40863 efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
409
-}
410
-
411
-/* Pass a received packet up through GRO. GRO can handle pages
412
- * regardless of checksum state and skbs with a good checksum.
413
- */
414
-static void
415
-efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
416
- unsigned int n_frags, u8 *eh)
417
-{
418
- struct napi_struct *napi = &channel->napi_str;
419
- gro_result_t gro_result;
420
- struct efx_nic *efx = channel->efx;
421
- struct sk_buff *skb;
422
-
423
- skb = napi_get_frags(napi);
424
- if (unlikely(!skb)) {
425
- struct efx_rx_queue *rx_queue;
426
-
427
- rx_queue = efx_channel_get_rx_queue(channel);
428
- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
429
- return;
430
- }
431
-
432
- if (efx->net_dev->features & NETIF_F_RXHASH)
433
- skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
434
- PKT_HASH_TYPE_L3);
435
- skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
436
- CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
437
- skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
438
-
439
- for (;;) {
440
- skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
441
- rx_buf->page, rx_buf->page_offset,
442
- rx_buf->len);
443
- rx_buf->page = NULL;
444
- skb->len += rx_buf->len;
445
- if (skb_shinfo(skb)->nr_frags == n_frags)
446
- break;
447
-
448
- rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
449
- }
450
-
451
- skb->data_len = skb->len;
452
- skb->truesize += n_frags * efx->rx_buffer_truesize;
453
-
454
- skb_record_rx_queue(skb, channel->rx_queue.core_index);
455
-
456
- gro_result = napi_gro_frags(napi);
457
- if (gro_result != GRO_DROP)
458
- channel->irq_mod_score += 2;
45964 }
46065
46166 /* Allocate and construct an SKB around page fragments */
....@@ -642,6 +247,127 @@
642247 netif_receive_skb(skb);
643248 }
644249
250
+/** efx_do_xdp: perform XDP processing on a received packet
251
+ *
252
+ * Returns true if packet should still be delivered.
253
+ */
254
+static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
255
+ struct efx_rx_buffer *rx_buf, u8 **ehp)
256
+{
257
+ u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE];
258
+ struct efx_rx_queue *rx_queue;
259
+ struct bpf_prog *xdp_prog;
260
+ struct xdp_frame *xdpf;
261
+ struct xdp_buff xdp;
262
+ u32 xdp_act;
263
+ s16 offset;
264
+ int err;
265
+
266
+ rcu_read_lock();
267
+ xdp_prog = rcu_dereference(efx->xdp_prog);
268
+ if (!xdp_prog) {
269
+ rcu_read_unlock();
270
+ return true;
271
+ }
272
+
273
+ rx_queue = efx_channel_get_rx_queue(channel);
274
+
275
+ if (unlikely(channel->rx_pkt_n_frags > 1)) {
276
+ /* We can't do XDP on fragmented packets - drop. */
277
+ rcu_read_unlock();
278
+ efx_free_rx_buffers(rx_queue, rx_buf,
279
+ channel->rx_pkt_n_frags);
280
+ if (net_ratelimit())
281
+ netif_err(efx, rx_err, efx->net_dev,
282
+ "XDP is not possible with multiple receive fragments (%d)\n",
283
+ channel->rx_pkt_n_frags);
284
+ channel->n_rx_xdp_bad_drops++;
285
+ return false;
286
+ }
287
+
288
+ dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr,
289
+ rx_buf->len, DMA_FROM_DEVICE);
290
+
291
+ /* Save the rx prefix. */
292
+ EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE);
293
+ memcpy(rx_prefix, *ehp - efx->rx_prefix_size,
294
+ efx->rx_prefix_size);
295
+
296
+ xdp.data = *ehp;
297
+ xdp.data_hard_start = xdp.data - EFX_XDP_HEADROOM;
298
+
299
+ /* No support yet for XDP metadata */
300
+ xdp_set_data_meta_invalid(&xdp);
301
+ xdp.data_end = xdp.data + rx_buf->len;
302
+ xdp.rxq = &rx_queue->xdp_rxq_info;
303
+ xdp.frame_sz = efx->rx_page_buf_step;
304
+
305
+ xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
306
+ rcu_read_unlock();
307
+
308
+ offset = (u8 *)xdp.data - *ehp;
309
+
310
+ switch (xdp_act) {
311
+ case XDP_PASS:
312
+ /* Fix up rx prefix. */
313
+ if (offset) {
314
+ *ehp += offset;
315
+ rx_buf->page_offset += offset;
316
+ rx_buf->len -= offset;
317
+ memcpy(*ehp - efx->rx_prefix_size, rx_prefix,
318
+ efx->rx_prefix_size);
319
+ }
320
+ break;
321
+
322
+ case XDP_TX:
323
+ /* Buffer ownership passes to tx on success. */
324
+ xdpf = xdp_convert_buff_to_frame(&xdp);
325
+ err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
326
+ if (unlikely(err != 1)) {
327
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
328
+ if (net_ratelimit())
329
+ netif_err(efx, rx_err, efx->net_dev,
330
+ "XDP TX failed (%d)\n", err);
331
+ channel->n_rx_xdp_bad_drops++;
332
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
333
+ } else {
334
+ channel->n_rx_xdp_tx++;
335
+ }
336
+ break;
337
+
338
+ case XDP_REDIRECT:
339
+ err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog);
340
+ if (unlikely(err)) {
341
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
342
+ if (net_ratelimit())
343
+ netif_err(efx, rx_err, efx->net_dev,
344
+ "XDP redirect failed (%d)\n", err);
345
+ channel->n_rx_xdp_bad_drops++;
346
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
347
+ } else {
348
+ channel->n_rx_xdp_redirect++;
349
+ }
350
+ break;
351
+
352
+ default:
353
+ bpf_warn_invalid_xdp_action(xdp_act);
354
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
355
+ channel->n_rx_xdp_bad_drops++;
356
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
357
+ break;
358
+
359
+ case XDP_ABORTED:
360
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
361
+ fallthrough;
362
+ case XDP_DROP:
363
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
364
+ channel->n_rx_xdp_drops++;
365
+ break;
366
+ }
367
+
368
+ return xdp_act == XDP_PASS;
369
+}
370
+
645371 /* Handle a received packet. Second half: Touches packet payload. */
646372 void __efx_rx_packet(struct efx_channel *channel)
647373 {
....@@ -670,400 +396,16 @@
670396 goto out;
671397 }
672398
399
+ if (!efx_do_xdp(efx, channel, rx_buf, &eh))
400
+ goto out;
401
+
673402 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
674403 rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
675404
676405 if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
677
- efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
406
+ efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh, 0);
678407 else
679408 efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
680409 out:
681410 channel->rx_pkt_n_frags = 0;
682
-}
683
-
684
-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
685
-{
686
- struct efx_nic *efx = rx_queue->efx;
687
- unsigned int entries;
688
- int rc;
689
-
690
- /* Create the smallest power-of-two aligned ring */
691
- entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
692
- EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
693
- rx_queue->ptr_mask = entries - 1;
694
-
695
- netif_dbg(efx, probe, efx->net_dev,
696
- "creating RX queue %d size %#x mask %#x\n",
697
- efx_rx_queue_index(rx_queue), efx->rxq_entries,
698
- rx_queue->ptr_mask);
699
-
700
- /* Allocate RX buffers */
701
- rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
702
- GFP_KERNEL);
703
- if (!rx_queue->buffer)
704
- return -ENOMEM;
705
-
706
- rc = efx_nic_probe_rx(rx_queue);
707
- if (rc) {
708
- kfree(rx_queue->buffer);
709
- rx_queue->buffer = NULL;
710
- }
711
-
712
- return rc;
713
-}
714
-
715
-static void efx_init_rx_recycle_ring(struct efx_nic *efx,
716
- struct efx_rx_queue *rx_queue)
717
-{
718
- unsigned int bufs_in_recycle_ring, page_ring_size;
719
-
720
- /* Set the RX recycle ring size */
721
-#ifdef CONFIG_PPC64
722
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
723
-#else
724
- if (iommu_present(&pci_bus_type))
725
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
726
- else
727
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
728
-#endif /* CONFIG_PPC64 */
729
-
730
- page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
731
- efx->rx_bufs_per_page);
732
- rx_queue->page_ring = kcalloc(page_ring_size,
733
- sizeof(*rx_queue->page_ring), GFP_KERNEL);
734
- rx_queue->page_ptr_mask = page_ring_size - 1;
735
-}
736
-
737
-void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
738
-{
739
- struct efx_nic *efx = rx_queue->efx;
740
- unsigned int max_fill, trigger, max_trigger;
741
-
742
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
743
- "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
744
-
745
- /* Initialise ptr fields */
746
- rx_queue->added_count = 0;
747
- rx_queue->notified_count = 0;
748
- rx_queue->removed_count = 0;
749
- rx_queue->min_fill = -1U;
750
- efx_init_rx_recycle_ring(efx, rx_queue);
751
-
752
- rx_queue->page_remove = 0;
753
- rx_queue->page_add = rx_queue->page_ptr_mask + 1;
754
- rx_queue->page_recycle_count = 0;
755
- rx_queue->page_recycle_failed = 0;
756
- rx_queue->page_recycle_full = 0;
757
-
758
- /* Initialise limit fields */
759
- max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
760
- max_trigger =
761
- max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
762
- if (rx_refill_threshold != 0) {
763
- trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
764
- if (trigger > max_trigger)
765
- trigger = max_trigger;
766
- } else {
767
- trigger = max_trigger;
768
- }
769
-
770
- rx_queue->max_fill = max_fill;
771
- rx_queue->fast_fill_trigger = trigger;
772
- rx_queue->refill_enabled = true;
773
-
774
- /* Set up RX descriptor ring */
775
- efx_nic_init_rx(rx_queue);
776
-}
777
-
778
-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
779
-{
780
- int i;
781
- struct efx_nic *efx = rx_queue->efx;
782
- struct efx_rx_buffer *rx_buf;
783
-
784
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
785
- "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
786
-
787
- del_timer_sync(&rx_queue->slow_fill);
788
-
789
- /* Release RX buffers from the current read ptr to the write ptr */
790
- if (rx_queue->buffer) {
791
- for (i = rx_queue->removed_count; i < rx_queue->added_count;
792
- i++) {
793
- unsigned index = i & rx_queue->ptr_mask;
794
- rx_buf = efx_rx_buffer(rx_queue, index);
795
- efx_fini_rx_buffer(rx_queue, rx_buf);
796
- }
797
- }
798
-
799
- /* Unmap and release the pages in the recycle ring. Remove the ring. */
800
- for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
801
- struct page *page = rx_queue->page_ring[i];
802
- struct efx_rx_page_state *state;
803
-
804
- if (page == NULL)
805
- continue;
806
-
807
- state = page_address(page);
808
- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
809
- PAGE_SIZE << efx->rx_buffer_order,
810
- DMA_FROM_DEVICE);
811
- put_page(page);
812
- }
813
- kfree(rx_queue->page_ring);
814
- rx_queue->page_ring = NULL;
815
-}
816
-
817
-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
818
-{
819
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
820
- "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
821
-
822
- efx_nic_remove_rx(rx_queue);
823
-
824
- kfree(rx_queue->buffer);
825
- rx_queue->buffer = NULL;
826
-}
827
-
828
-
829
-module_param(rx_refill_threshold, uint, 0444);
830
-MODULE_PARM_DESC(rx_refill_threshold,
831
- "RX descriptor ring refill threshold (%)");
832
-
833
-#ifdef CONFIG_RFS_ACCEL
834
-
835
-static void efx_filter_rfs_work(struct work_struct *data)
836
-{
837
- struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
838
- work);
839
- struct efx_nic *efx = netdev_priv(req->net_dev);
840
- struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
841
- int slot_idx = req - efx->rps_slot;
842
- struct efx_arfs_rule *rule;
843
- u16 arfs_id = 0;
844
- int rc;
845
-
846
- rc = efx->type->filter_insert(efx, &req->spec, true);
847
- if (rc >= 0)
848
- rc %= efx->type->max_rx_ip_filters;
849
- if (efx->rps_hash_table) {
850
- spin_lock_bh(&efx->rps_hash_lock);
851
- rule = efx_rps_hash_find(efx, &req->spec);
852
- /* The rule might have already gone, if someone else's request
853
- * for the same spec was already worked and then expired before
854
- * we got around to our work. In that case we have nothing
855
- * tying us to an arfs_id, meaning that as soon as the filter
856
- * is considered for expiry it will be removed.
857
- */
858
- if (rule) {
859
- if (rc < 0)
860
- rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
861
- else
862
- rule->filter_id = rc;
863
- arfs_id = rule->arfs_id;
864
- }
865
- spin_unlock_bh(&efx->rps_hash_lock);
866
- }
867
- if (rc >= 0) {
868
- /* Remember this so we can check whether to expire the filter
869
- * later.
870
- */
871
- mutex_lock(&efx->rps_mutex);
872
- channel->rps_flow_id[rc] = req->flow_id;
873
- ++channel->rfs_filters_added;
874
- mutex_unlock(&efx->rps_mutex);
875
-
876
- if (req->spec.ether_type == htons(ETH_P_IP))
877
- netif_info(efx, rx_status, efx->net_dev,
878
- "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
879
- (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
880
- req->spec.rem_host, ntohs(req->spec.rem_port),
881
- req->spec.loc_host, ntohs(req->spec.loc_port),
882
- req->rxq_index, req->flow_id, rc, arfs_id);
883
- else
884
- netif_info(efx, rx_status, efx->net_dev,
885
- "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
886
- (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
887
- req->spec.rem_host, ntohs(req->spec.rem_port),
888
- req->spec.loc_host, ntohs(req->spec.loc_port),
889
- req->rxq_index, req->flow_id, rc, arfs_id);
890
- }
891
-
892
- /* Release references */
893
- clear_bit(slot_idx, &efx->rps_slot_map);
894
- dev_put(req->net_dev);
895
-}
896
-
897
-int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
898
- u16 rxq_index, u32 flow_id)
899
-{
900
- struct efx_nic *efx = netdev_priv(net_dev);
901
- struct efx_async_filter_insertion *req;
902
- struct efx_arfs_rule *rule;
903
- struct flow_keys fk;
904
- int slot_idx;
905
- bool new;
906
- int rc;
907
-
908
- /* find a free slot */
909
- for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
910
- if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
911
- break;
912
- if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
913
- return -EBUSY;
914
-
915
- if (flow_id == RPS_FLOW_ID_INVALID) {
916
- rc = -EINVAL;
917
- goto out_clear;
918
- }
919
-
920
- if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
921
- rc = -EPROTONOSUPPORT;
922
- goto out_clear;
923
- }
924
-
925
- if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
926
- rc = -EPROTONOSUPPORT;
927
- goto out_clear;
928
- }
929
- if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
930
- rc = -EPROTONOSUPPORT;
931
- goto out_clear;
932
- }
933
-
934
- req = efx->rps_slot + slot_idx;
935
- efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
936
- efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
937
- rxq_index);
938
- req->spec.match_flags =
939
- EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
940
- EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
941
- EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
942
- req->spec.ether_type = fk.basic.n_proto;
943
- req->spec.ip_proto = fk.basic.ip_proto;
944
-
945
- if (fk.basic.n_proto == htons(ETH_P_IP)) {
946
- req->spec.rem_host[0] = fk.addrs.v4addrs.src;
947
- req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
948
- } else {
949
- memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
950
- sizeof(struct in6_addr));
951
- memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
952
- sizeof(struct in6_addr));
953
- }
954
-
955
- req->spec.rem_port = fk.ports.src;
956
- req->spec.loc_port = fk.ports.dst;
957
-
958
- if (efx->rps_hash_table) {
959
- /* Add it to ARFS hash table */
960
- spin_lock(&efx->rps_hash_lock);
961
- rule = efx_rps_hash_add(efx, &req->spec, &new);
962
- if (!rule) {
963
- rc = -ENOMEM;
964
- goto out_unlock;
965
- }
966
- if (new)
967
- rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
968
- rc = rule->arfs_id;
969
- /* Skip if existing or pending filter already does the right thing */
970
- if (!new && rule->rxq_index == rxq_index &&
971
- rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
972
- goto out_unlock;
973
- rule->rxq_index = rxq_index;
974
- rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
975
- spin_unlock(&efx->rps_hash_lock);
976
- } else {
977
- /* Without an ARFS hash table, we just use arfs_id 0 for all
978
- * filters. This means if multiple flows hash to the same
979
- * flow_id, all but the most recently touched will be eligible
980
- * for expiry.
981
- */
982
- rc = 0;
983
- }
984
-
985
- /* Queue the request */
986
- dev_hold(req->net_dev = net_dev);
987
- INIT_WORK(&req->work, efx_filter_rfs_work);
988
- req->rxq_index = rxq_index;
989
- req->flow_id = flow_id;
990
- schedule_work(&req->work);
991
- return rc;
992
-out_unlock:
993
- spin_unlock(&efx->rps_hash_lock);
994
-out_clear:
995
- clear_bit(slot_idx, &efx->rps_slot_map);
996
- return rc;
997
-}
998
-
999
-bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
1000
-{
1001
- bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
1002
- unsigned int channel_idx, index, size;
1003
- u32 flow_id;
1004
-
1005
- if (!mutex_trylock(&efx->rps_mutex))
1006
- return false;
1007
- expire_one = efx->type->filter_rfs_expire_one;
1008
- channel_idx = efx->rps_expire_channel;
1009
- index = efx->rps_expire_index;
1010
- size = efx->type->max_rx_ip_filters;
1011
- while (quota--) {
1012
- struct efx_channel *channel = efx_get_channel(efx, channel_idx);
1013
- flow_id = channel->rps_flow_id[index];
1014
-
1015
- if (flow_id != RPS_FLOW_ID_INVALID &&
1016
- expire_one(efx, flow_id, index)) {
1017
- netif_info(efx, rx_status, efx->net_dev,
1018
- "expired filter %d [queue %u flow %u]\n",
1019
- index, channel_idx, flow_id);
1020
- channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
1021
- }
1022
- if (++index == size) {
1023
- if (++channel_idx == efx->n_channels)
1024
- channel_idx = 0;
1025
- index = 0;
1026
- }
1027
- }
1028
- efx->rps_expire_channel = channel_idx;
1029
- efx->rps_expire_index = index;
1030
-
1031
- mutex_unlock(&efx->rps_mutex);
1032
- return true;
1033
-}
1034
-
1035
-#endif /* CONFIG_RFS_ACCEL */
1036
-
1037
-/**
1038
- * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
1039
- * @spec: Specification to test
1040
- *
1041
- * Return: %true if the specification is a non-drop RX filter that
1042
- * matches a local MAC address I/G bit value of 1 or matches a local
1043
- * IPv4 or IPv6 address value in the respective multicast address
1044
- * range. Otherwise %false.
1045
- */
1046
-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
1047
-{
1048
- if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
1049
- spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
1050
- return false;
1051
-
1052
- if (spec->match_flags &
1053
- (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
1054
- is_multicast_ether_addr(spec->loc_mac))
1055
- return true;
1056
-
1057
- if ((spec->match_flags &
1058
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
1059
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
1060
- if (spec->ether_type == htons(ETH_P_IP) &&
1061
- ipv4_is_multicast(spec->loc_host[0]))
1062
- return true;
1063
- if (spec->ether_type == htons(ETH_P_IPV6) &&
1064
- ((const u8 *)spec->loc_host)[0] == 0xff)
1065
- return true;
1066
- }
1067
-
1068
- return false;
1069411 }