~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,11 +1,8 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/****************************************************************************
2	3	* Driver for Solarflare network controllers and boards
3	4	* Copyright 2005-2006 Fen Systems Ltd.
4	5	* Copyright 2005-2013 Solarflare Communications Inc.
5		- *
6		- * This program is free software; you can redistribute it and/or modify it
7		- * under the terms of the GNU General Public License version 2 as published
8		- * by the Free Software Foundation, incorporated herein by reference.
9	6	*/
10	7
11	8	#include <linux/socket.h>
..	..	@@ -20,8 +17,11 @@
20	17	#include <linux/iommu.h>
21	18	#include <net/ip.h>
22	19	#include <net/checksum.h>
	20	+#include <net/xdp.h>
	21	+#include <linux/bpf_trace.h>
23	22	#include "net_driver.h"
24	23	#include "efx.h"
	24	+#include "rx_common.h"
25	25	#include "filter.h"
26	26	#include "nic.h"
27	27	#include "selftest.h"
..	..	@@ -30,360 +30,15 @@
30	30	/* Preferred number of descriptors to fill at once */
31	31	#define EFX_RX_PREFERRED_BATCH 8U
32	32
33		-/* Number of RX buffers to recycle pages for. When creating the RX page recycle
34		- * ring, this number is divided by the number of buffers per page to calculate
35		- * the number of pages to store in the RX page recycle ring.
36		- */
37		-#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
38		-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
	33	+/* Maximum rx prefix used by any architecture. */
	34	+#define EFX_MAX_RX_PREFIX_SIZE 16
39	35
40	36	/* Size of buffer allocated for skb header area. */
41	37	#define EFX_SKB_HEADERS 128u
42	38
43		-/* This is the percentage fill level below which new RX descriptors
44		- * will be added to the RX descriptor ring.
45		- */
46		-static unsigned int rx_refill_threshold;
47		-
48	39	/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
49	40	#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
50	41	EFX_RX_USR_BUF_SIZE)
51		-
52		-/*
53		- * RX maximum head room required.
54		- *
55		- * This must be at least 1 to prevent overflow, plus one packet-worth
56		- * to allow pipelined receives.
57		- */
58		-#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
59		-
60		-static inline u8 efx_rx_buf_va(struct efx_rx_buffer buf)
61		-{
62		- return page_address(buf->page) + buf->page_offset;
63		-}
64		-
65		-static inline u32 efx_rx_buf_hash(struct efx_nic efx, const u8 eh)
66		-{
67		-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
68		- return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
69		-#else
70		- const u8 *data = eh + efx->rx_packet_hash_offset;
71		- return (u32)data[0] \|
72		- (u32)data[1] << 8 \|
73		- (u32)data[2] << 16 \|
74		- (u32)data[3] << 24;
75		-#endif
76		-}
77		-
78		-static inline struct efx_rx_buffer *
79		-efx_rx_buf_next(struct efx_rx_queue rx_queue, struct efx_rx_buffer rx_buf)
80		-{
81		- if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
82		- return efx_rx_buffer(rx_queue, 0);
83		- else
84		- return rx_buf + 1;
85		-}
86		-
87		-static inline void efx_sync_rx_buffer(struct efx_nic *efx,
88		- struct efx_rx_buffer *rx_buf,
89		- unsigned int len)
90		-{
91		- dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
92		- DMA_FROM_DEVICE);
93		-}
94		-
95		-void efx_rx_config_page_split(struct efx_nic *efx)
96		-{
97		- efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align,
98		- EFX_RX_BUF_ALIGNMENT);
99		- efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
100		- ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
101		- efx->rx_page_buf_step);
102		- efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
103		- efx->rx_bufs_per_page;
104		- efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
105		- efx->rx_bufs_per_page);
106		-}
107		-
108		-/* Check the RX page recycle ring for a page that can be reused. */
109		-static struct page efx_reuse_page(struct efx_rx_queue rx_queue)
110		-{
111		- struct efx_nic *efx = rx_queue->efx;
112		- struct page *page;
113		- struct efx_rx_page_state *state;
114		- unsigned index;
115		-
116		- index = rx_queue->page_remove & rx_queue->page_ptr_mask;
117		- page = rx_queue->page_ring[index];
118		- if (page == NULL)
119		- return NULL;
120		-
121		- rx_queue->page_ring[index] = NULL;
122		- /* page_remove cannot exceed page_add. */
123		- if (rx_queue->page_remove != rx_queue->page_add)
124		- ++rx_queue->page_remove;
125		-
126		- /* If page_count is 1 then we hold the only reference to this page. */
127		- if (page_count(page) == 1) {
128		- ++rx_queue->page_recycle_count;
129		- return page;
130		- } else {
131		- state = page_address(page);
132		- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
133		- PAGE_SIZE << efx->rx_buffer_order,
134		- DMA_FROM_DEVICE);
135		- put_page(page);
136		- ++rx_queue->page_recycle_failed;
137		- }
138		-
139		- return NULL;
140		-}
141		-
142		-/**
143		- * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
144		- *
145		- * @rx_queue: Efx RX queue
146		- *
147		- * This allocates a batch of pages, maps them for DMA, and populates
148		- * struct efx_rx_buffers for each one. Return a negative error code or
149		- * 0 on success. If a single page can be used for multiple buffers,
150		- * then the page will either be inserted fully, or not at all.
151		- */
152		-static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
153		-{
154		- struct efx_nic *efx = rx_queue->efx;
155		- struct efx_rx_buffer *rx_buf;
156		- struct page *page;
157		- unsigned int page_offset;
158		- struct efx_rx_page_state *state;
159		- dma_addr_t dma_addr;
160		- unsigned index, count;
161		-
162		- count = 0;
163		- do {
164		- page = efx_reuse_page(rx_queue);
165		- if (page == NULL) {
166		- page = alloc_pages(__GFP_COMP \|
167		- (atomic ? GFP_ATOMIC : GFP_KERNEL),
168		- efx->rx_buffer_order);
169		- if (unlikely(page == NULL))
170		- return -ENOMEM;
171		- dma_addr =
172		- dma_map_page(&efx->pci_dev->dev, page, 0,
173		- PAGE_SIZE << efx->rx_buffer_order,
174		- DMA_FROM_DEVICE);
175		- if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
176		- dma_addr))) {
177		- __free_pages(page, efx->rx_buffer_order);
178		- return -EIO;
179		- }
180		- state = page_address(page);
181		- state->dma_addr = dma_addr;
182		- } else {
183		- state = page_address(page);
184		- dma_addr = state->dma_addr;
185		- }
186		-
187		- dma_addr += sizeof(struct efx_rx_page_state);
188		- page_offset = sizeof(struct efx_rx_page_state);
189		-
190		- do {
191		- index = rx_queue->added_count & rx_queue->ptr_mask;
192		- rx_buf = efx_rx_buffer(rx_queue, index);
193		- rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
194		- rx_buf->page = page;
195		- rx_buf->page_offset = page_offset + efx->rx_ip_align;
196		- rx_buf->len = efx->rx_dma_len;
197		- rx_buf->flags = 0;
198		- ++rx_queue->added_count;
199		- get_page(page);
200		- dma_addr += efx->rx_page_buf_step;
201		- page_offset += efx->rx_page_buf_step;
202		- } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
203		-
204		- rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
205		- } while (++count < efx->rx_pages_per_batch);
206		-
207		- return 0;
208		-}
209		-
210		-/* Unmap a DMA-mapped page. This function is only called for the final RX
211		- * buffer in a page.
212		- */
213		-static void efx_unmap_rx_buffer(struct efx_nic *efx,
214		- struct efx_rx_buffer *rx_buf)
215		-{
216		- struct page *page = rx_buf->page;
217		-
218		- if (page) {
219		- struct efx_rx_page_state *state = page_address(page);
220		- dma_unmap_page(&efx->pci_dev->dev,
221		- state->dma_addr,
222		- PAGE_SIZE << efx->rx_buffer_order,
223		- DMA_FROM_DEVICE);
224		- }
225		-}
226		-
227		-static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
228		- struct efx_rx_buffer *rx_buf,
229		- unsigned int num_bufs)
230		-{
231		- do {
232		- if (rx_buf->page) {
233		- put_page(rx_buf->page);
234		- rx_buf->page = NULL;
235		- }
236		- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
237		- } while (--num_bufs);
238		-}
239		-
240		-/* Attempt to recycle the page if there is an RX recycle ring; the page can
241		- * only be added if this is the final RX buffer, to prevent pages being used in
242		- * the descriptor ring and appearing in the recycle ring simultaneously.
243		- */
244		-static void efx_recycle_rx_page(struct efx_channel *channel,
245		- struct efx_rx_buffer *rx_buf)
246		-{
247		- struct page *page = rx_buf->page;
248		- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
249		- struct efx_nic *efx = rx_queue->efx;
250		- unsigned index;
251		-
252		- /* Only recycle the page after processing the final buffer. */
253		- if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
254		- return;
255		-
256		- index = rx_queue->page_add & rx_queue->page_ptr_mask;
257		- if (rx_queue->page_ring[index] == NULL) {
258		- unsigned read_index = rx_queue->page_remove &
259		- rx_queue->page_ptr_mask;
260		-
261		- /* The next slot in the recycle ring is available, but
262		- * increment page_remove if the read pointer currently
263		- * points here.
264		- */
265		- if (read_index == index)
266		- ++rx_queue->page_remove;
267		- rx_queue->page_ring[index] = page;
268		- ++rx_queue->page_add;
269		- return;
270		- }
271		- ++rx_queue->page_recycle_full;
272		- efx_unmap_rx_buffer(efx, rx_buf);
273		- put_page(rx_buf->page);
274		-}
275		-
276		-static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
277		- struct efx_rx_buffer *rx_buf)
278		-{
279		- /* Release the page reference we hold for the buffer. */
280		- if (rx_buf->page)
281		- put_page(rx_buf->page);
282		-
283		- /* If this is the last buffer in a page, unmap and free it. */
284		- if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
285		- efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
286		- efx_free_rx_buffers(rx_queue, rx_buf, 1);
287		- }
288		- rx_buf->page = NULL;
289		-}
290		-
291		-/* Recycle the pages that are used by buffers that have just been received. */
292		-static void efx_recycle_rx_pages(struct efx_channel *channel,
293		- struct efx_rx_buffer *rx_buf,
294		- unsigned int n_frags)
295		-{
296		- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
297		-
298		- do {
299		- efx_recycle_rx_page(channel, rx_buf);
300		- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
301		- } while (--n_frags);
302		-}
303		-
304		-static void efx_discard_rx_packet(struct efx_channel *channel,
305		- struct efx_rx_buffer *rx_buf,
306		- unsigned int n_frags)
307		-{
308		- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
309		-
310		- efx_recycle_rx_pages(channel, rx_buf, n_frags);
311		-
312		- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
313		-}
314		-
315		-/**
316		- * efx_fast_push_rx_descriptors - push new RX descriptors quickly
317		- * @rx_queue: RX descriptor queue
318		- *
319		- * This will aim to fill the RX descriptor queue up to
320		- * @rx_queue->@max_fill. If there is insufficient atomic
321		- * memory to do so, a slow fill will be scheduled.
322		- *
323		- * The caller must provide serialisation (none is used here). In practise,
324		- * this means this function must run from the NAPI handler, or be called
325		- * when NAPI is disabled.
326		- */
327		-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
328		-{
329		- struct efx_nic *efx = rx_queue->efx;
330		- unsigned int fill_level, batch_size;
331		- int space, rc = 0;
332		-
333		- if (!rx_queue->refill_enabled)
334		- return;
335		-
336		- /* Calculate current fill level, and exit if we don't need to fill */
337		- fill_level = (rx_queue->added_count - rx_queue->removed_count);
338		- EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
339		- if (fill_level >= rx_queue->fast_fill_trigger)
340		- goto out;
341		-
342		- /* Record minimum fill level */
343		- if (unlikely(fill_level < rx_queue->min_fill)) {
344		- if (fill_level)
345		- rx_queue->min_fill = fill_level;
346		- }
347		-
348		- batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
349		- space = rx_queue->max_fill - fill_level;
350		- EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
351		-
352		- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
353		- "RX queue %d fast-filling descriptor ring from"
354		- " level %d to level %d\n",
355		- efx_rx_queue_index(rx_queue), fill_level,
356		- rx_queue->max_fill);
357		-
358		-
359		- do {
360		- rc = efx_init_rx_buffers(rx_queue, atomic);
361		- if (unlikely(rc)) {
362		- /* Ensure that we don't leave the rx queue empty */
363		- if (rx_queue->added_count == rx_queue->removed_count)
364		- efx_schedule_slow_fill(rx_queue);
365		- goto out;
366		- }
367		- } while ((space -= batch_size) >= batch_size);
368		-
369		- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
370		- "RX queue %d fast-filled descriptor ring "
371		- "to level %d\n", efx_rx_queue_index(rx_queue),
372		- rx_queue->added_count - rx_queue->removed_count);
373		-
374		- out:
375		- if (rx_queue->notified_count != rx_queue->added_count)
376		- efx_nic_notify_rx_desc(rx_queue);
377		-}
378		-
379		-void efx_rx_slow_fill(struct timer_list *t)
380		-{
381		- struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
382		-
383		- /* Post an event to cause NAPI to run and refill the queue */
384		- efx_nic_generate_fill_event(rx_queue);
385		- ++rx_queue->slow_fill_count;
386		-}
387	42
388	43	static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
389	44	struct efx_rx_buffer *rx_buf,
..	..	@@ -406,56 +61,6 @@
406	61	efx_rx_queue_index(rx_queue), len, max_len);
407	62
408	63	efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
409		-}
410		-
411		-/* Pass a received packet up through GRO. GRO can handle pages
412		- * regardless of checksum state and skbs with a good checksum.
413		- */
414		-static void
415		-efx_rx_packet_gro(struct efx_channel channel, struct efx_rx_buffer rx_buf,
416		- unsigned int n_frags, u8 *eh)
417		-{
418		- struct napi_struct *napi = &channel->napi_str;
419		- gro_result_t gro_result;
420		- struct efx_nic *efx = channel->efx;
421		- struct sk_buff *skb;
422		-
423		- skb = napi_get_frags(napi);
424		- if (unlikely(!skb)) {
425		- struct efx_rx_queue *rx_queue;
426		-
427		- rx_queue = efx_channel_get_rx_queue(channel);
428		- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
429		- return;
430		- }
431		-
432		- if (efx->net_dev->features & NETIF_F_RXHASH)
433		- skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
434		- PKT_HASH_TYPE_L3);
435		- skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
436		- CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
437		- skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
438		-
439		- for (;;) {
440		- skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
441		- rx_buf->page, rx_buf->page_offset,
442		- rx_buf->len);
443		- rx_buf->page = NULL;
444		- skb->len += rx_buf->len;
445		- if (skb_shinfo(skb)->nr_frags == n_frags)
446		- break;
447		-
448		- rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
449		- }
450		-
451		- skb->data_len = skb->len;
452		- skb->truesize += n_frags * efx->rx_buffer_truesize;
453		-
454		- skb_record_rx_queue(skb, channel->rx_queue.core_index);
455		-
456		- gro_result = napi_gro_frags(napi);
457		- if (gro_result != GRO_DROP)
458		- channel->irq_mod_score += 2;
459	64	}
460	65
461	66	/* Allocate and construct an SKB around page fragments */
..	..	@@ -642,6 +247,127 @@
642	247	netif_receive_skb(skb);
643	248	}
644	249
	250	+/** efx_do_xdp: perform XDP processing on a received packet
	251	+ *
	252	+ * Returns true if packet should still be delivered.
	253	+ */
	254	+static bool efx_do_xdp(struct efx_nic efx, struct efx_channel channel,
	255	+ struct efx_rx_buffer rx_buf, u8 *ehp)
	256	+{
	257	+ u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE];
	258	+ struct efx_rx_queue *rx_queue;
	259	+ struct bpf_prog *xdp_prog;
	260	+ struct xdp_frame *xdpf;
	261	+ struct xdp_buff xdp;
	262	+ u32 xdp_act;
	263	+ s16 offset;
	264	+ int err;
	265	+
	266	+ rcu_read_lock();
	267	+ xdp_prog = rcu_dereference(efx->xdp_prog);
	268	+ if (!xdp_prog) {
	269	+ rcu_read_unlock();
	270	+ return true;
	271	+ }
	272	+
	273	+ rx_queue = efx_channel_get_rx_queue(channel);
	274	+
	275	+ if (unlikely(channel->rx_pkt_n_frags > 1)) {
	276	+ /* We can't do XDP on fragmented packets - drop. */
	277	+ rcu_read_unlock();
	278	+ efx_free_rx_buffers(rx_queue, rx_buf,
	279	+ channel->rx_pkt_n_frags);
	280	+ if (net_ratelimit())
	281	+ netif_err(efx, rx_err, efx->net_dev,
	282	+ "XDP is not possible with multiple receive fragments (%d)\n",
	283	+ channel->rx_pkt_n_frags);
	284	+ channel->n_rx_xdp_bad_drops++;
	285	+ return false;
	286	+ }
	287	+
	288	+ dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr,
	289	+ rx_buf->len, DMA_FROM_DEVICE);
	290	+
	291	+ /* Save the rx prefix. */
	292	+ EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE);
	293	+ memcpy(rx_prefix, *ehp - efx->rx_prefix_size,
	294	+ efx->rx_prefix_size);
	295	+
	296	+ xdp.data = *ehp;
	297	+ xdp.data_hard_start = xdp.data - EFX_XDP_HEADROOM;
	298	+
	299	+ /* No support yet for XDP metadata */
	300	+ xdp_set_data_meta_invalid(&xdp);
	301	+ xdp.data_end = xdp.data + rx_buf->len;
	302	+ xdp.rxq = &rx_queue->xdp_rxq_info;
	303	+ xdp.frame_sz = efx->rx_page_buf_step;
	304	+
	305	+ xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
	306	+ rcu_read_unlock();
	307	+
	308	+ offset = (u8 )xdp.data - ehp;
	309	+
	310	+ switch (xdp_act) {
	311	+ case XDP_PASS:
	312	+ /* Fix up rx prefix. */
	313	+ if (offset) {
	314	+ *ehp += offset;
	315	+ rx_buf->page_offset += offset;
	316	+ rx_buf->len -= offset;
	317	+ memcpy(*ehp - efx->rx_prefix_size, rx_prefix,
	318	+ efx->rx_prefix_size);
	319	+ }
	320	+ break;
	321	+
	322	+ case XDP_TX:
	323	+ /* Buffer ownership passes to tx on success. */
	324	+ xdpf = xdp_convert_buff_to_frame(&xdp);
	325	+ err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
	326	+ if (unlikely(err != 1)) {
	327	+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
	328	+ if (net_ratelimit())
	329	+ netif_err(efx, rx_err, efx->net_dev,
	330	+ "XDP TX failed (%d)\n", err);
	331	+ channel->n_rx_xdp_bad_drops++;
	332	+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
	333	+ } else {
	334	+ channel->n_rx_xdp_tx++;
	335	+ }
	336	+ break;
	337	+
	338	+ case XDP_REDIRECT:
	339	+ err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog);
	340	+ if (unlikely(err)) {
	341	+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
	342	+ if (net_ratelimit())
	343	+ netif_err(efx, rx_err, efx->net_dev,
	344	+ "XDP redirect failed (%d)\n", err);
	345	+ channel->n_rx_xdp_bad_drops++;
	346	+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
	347	+ } else {
	348	+ channel->n_rx_xdp_redirect++;
	349	+ }
	350	+ break;
	351	+
	352	+ default:
	353	+ bpf_warn_invalid_xdp_action(xdp_act);
	354	+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
	355	+ channel->n_rx_xdp_bad_drops++;
	356	+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
	357	+ break;
	358	+
	359	+ case XDP_ABORTED:
	360	+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
	361	+ fallthrough;
	362	+ case XDP_DROP:
	363	+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
	364	+ channel->n_rx_xdp_drops++;
	365	+ break;
	366	+ }
	367	+
	368	+ return xdp_act == XDP_PASS;
	369	+}
	370	+
645	371	/* Handle a received packet. Second half: Touches packet payload. */
646	372	void __efx_rx_packet(struct efx_channel *channel)
647	373	{
..	..	@@ -670,400 +396,16 @@
670	396	goto out;
671	397	}
672	398
	399	+ if (!efx_do_xdp(efx, channel, rx_buf, &eh))
	400	+ goto out;
	401	+
673	402	if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
674	403	rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
675	404
676	405	if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
677		- efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
	406	+ efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh, 0);
678	407	else
679	408	efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
680	409	out:
681	410	channel->rx_pkt_n_frags = 0;
682		-}
683		-
684		-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
685		-{
686		- struct efx_nic *efx = rx_queue->efx;
687		- unsigned int entries;
688		- int rc;
689		-
690		- /* Create the smallest power-of-two aligned ring */
691		- entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
692		- EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
693		- rx_queue->ptr_mask = entries - 1;
694		-
695		- netif_dbg(efx, probe, efx->net_dev,
696		- "creating RX queue %d size %#x mask %#x\n",
697		- efx_rx_queue_index(rx_queue), efx->rxq_entries,
698		- rx_queue->ptr_mask);
699		-
700		- /* Allocate RX buffers */
701		- rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
702		- GFP_KERNEL);
703		- if (!rx_queue->buffer)
704		- return -ENOMEM;
705		-
706		- rc = efx_nic_probe_rx(rx_queue);
707		- if (rc) {
708		- kfree(rx_queue->buffer);
709		- rx_queue->buffer = NULL;
710		- }
711		-
712		- return rc;
713		-}
714		-
715		-static void efx_init_rx_recycle_ring(struct efx_nic *efx,
716		- struct efx_rx_queue *rx_queue)
717		-{
718		- unsigned int bufs_in_recycle_ring, page_ring_size;
719		-
720		- /* Set the RX recycle ring size */
721		-#ifdef CONFIG_PPC64
722		- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
723		-#else
724		- if (iommu_present(&pci_bus_type))
725		- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
726		- else
727		- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
728		-#endif /* CONFIG_PPC64 */
729		-
730		- page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
731		- efx->rx_bufs_per_page);
732		- rx_queue->page_ring = kcalloc(page_ring_size,
733		- sizeof(*rx_queue->page_ring), GFP_KERNEL);
734		- rx_queue->page_ptr_mask = page_ring_size - 1;
735		-}
736		-
737		-void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
738		-{
739		- struct efx_nic *efx = rx_queue->efx;
740		- unsigned int max_fill, trigger, max_trigger;
741		-
742		- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
743		- "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
744		-
745		- /* Initialise ptr fields */
746		- rx_queue->added_count = 0;
747		- rx_queue->notified_count = 0;
748		- rx_queue->removed_count = 0;
749		- rx_queue->min_fill = -1U;
750		- efx_init_rx_recycle_ring(efx, rx_queue);
751		-
752		- rx_queue->page_remove = 0;
753		- rx_queue->page_add = rx_queue->page_ptr_mask + 1;
754		- rx_queue->page_recycle_count = 0;
755		- rx_queue->page_recycle_failed = 0;
756		- rx_queue->page_recycle_full = 0;
757		-
758		- /* Initialise limit fields */
759		- max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
760		- max_trigger =
761		- max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
762		- if (rx_refill_threshold != 0) {
763		- trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
764		- if (trigger > max_trigger)
765		- trigger = max_trigger;
766		- } else {
767		- trigger = max_trigger;
768		- }
769		-
770		- rx_queue->max_fill = max_fill;
771		- rx_queue->fast_fill_trigger = trigger;
772		- rx_queue->refill_enabled = true;
773		-
774		- /* Set up RX descriptor ring */
775		- efx_nic_init_rx(rx_queue);
776		-}
777		-
778		-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
779		-{
780		- int i;
781		- struct efx_nic *efx = rx_queue->efx;
782		- struct efx_rx_buffer *rx_buf;
783		-
784		- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
785		- "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
786		-
787		- del_timer_sync(&rx_queue->slow_fill);
788		-
789		- /* Release RX buffers from the current read ptr to the write ptr */
790		- if (rx_queue->buffer) {
791		- for (i = rx_queue->removed_count; i < rx_queue->added_count;
792		- i++) {
793		- unsigned index = i & rx_queue->ptr_mask;
794		- rx_buf = efx_rx_buffer(rx_queue, index);
795		- efx_fini_rx_buffer(rx_queue, rx_buf);
796		- }
797		- }
798		-
799		- /* Unmap and release the pages in the recycle ring. Remove the ring. */
800		- for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
801		- struct page *page = rx_queue->page_ring[i];
802		- struct efx_rx_page_state *state;
803		-
804		- if (page == NULL)
805		- continue;
806		-
807		- state = page_address(page);
808		- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
809		- PAGE_SIZE << efx->rx_buffer_order,
810		- DMA_FROM_DEVICE);
811		- put_page(page);
812		- }
813		- kfree(rx_queue->page_ring);
814		- rx_queue->page_ring = NULL;
815		-}
816		-
817		-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
818		-{
819		- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
820		- "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
821		-
822		- efx_nic_remove_rx(rx_queue);
823		-
824		- kfree(rx_queue->buffer);
825		- rx_queue->buffer = NULL;
826		-}
827		-
828		-
829		-module_param(rx_refill_threshold, uint, 0444);
830		-MODULE_PARM_DESC(rx_refill_threshold,
831		- "RX descriptor ring refill threshold (%)");
832		-
833		-#ifdef CONFIG_RFS_ACCEL
834		-
835		-static void efx_filter_rfs_work(struct work_struct *data)
836		-{
837		- struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
838		- work);
839		- struct efx_nic *efx = netdev_priv(req->net_dev);
840		- struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
841		- int slot_idx = req - efx->rps_slot;
842		- struct efx_arfs_rule *rule;
843		- u16 arfs_id = 0;
844		- int rc;
845		-
846		- rc = efx->type->filter_insert(efx, &req->spec, true);
847		- if (rc >= 0)
848		- rc %= efx->type->max_rx_ip_filters;
849		- if (efx->rps_hash_table) {
850		- spin_lock_bh(&efx->rps_hash_lock);
851		- rule = efx_rps_hash_find(efx, &req->spec);
852		- /* The rule might have already gone, if someone else's request
853		- * for the same spec was already worked and then expired before
854		- * we got around to our work. In that case we have nothing
855		- * tying us to an arfs_id, meaning that as soon as the filter
856		- * is considered for expiry it will be removed.
857		- */
858		- if (rule) {
859		- if (rc < 0)
860		- rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
861		- else
862		- rule->filter_id = rc;
863		- arfs_id = rule->arfs_id;
864		- }
865		- spin_unlock_bh(&efx->rps_hash_lock);
866		- }
867		- if (rc >= 0) {
868		- /* Remember this so we can check whether to expire the filter
869		- * later.
870		- */
871		- mutex_lock(&efx->rps_mutex);
872		- channel->rps_flow_id[rc] = req->flow_id;
873		- ++channel->rfs_filters_added;
874		- mutex_unlock(&efx->rps_mutex);
875		-
876		- if (req->spec.ether_type == htons(ETH_P_IP))
877		- netif_info(efx, rx_status, efx->net_dev,
878		- "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
879		- (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
880		- req->spec.rem_host, ntohs(req->spec.rem_port),
881		- req->spec.loc_host, ntohs(req->spec.loc_port),
882		- req->rxq_index, req->flow_id, rc, arfs_id);
883		- else
884		- netif_info(efx, rx_status, efx->net_dev,
885		- "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
886		- (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
887		- req->spec.rem_host, ntohs(req->spec.rem_port),
888		- req->spec.loc_host, ntohs(req->spec.loc_port),
889		- req->rxq_index, req->flow_id, rc, arfs_id);
890		- }
891		-
892		- /* Release references */
893		- clear_bit(slot_idx, &efx->rps_slot_map);
894		- dev_put(req->net_dev);
895		-}
896		-
897		-int efx_filter_rfs(struct net_device net_dev, const struct sk_buff skb,
898		- u16 rxq_index, u32 flow_id)
899		-{
900		- struct efx_nic *efx = netdev_priv(net_dev);
901		- struct efx_async_filter_insertion *req;
902		- struct efx_arfs_rule *rule;
903		- struct flow_keys fk;
904		- int slot_idx;
905		- bool new;
906		- int rc;
907		-
908		- /* find a free slot */
909		- for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
910		- if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
911		- break;
912		- if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
913		- return -EBUSY;
914		-
915		- if (flow_id == RPS_FLOW_ID_INVALID) {
916		- rc = -EINVAL;
917		- goto out_clear;
918		- }
919		-
920		- if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
921		- rc = -EPROTONOSUPPORT;
922		- goto out_clear;
923		- }
924		-
925		- if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
926		- rc = -EPROTONOSUPPORT;
927		- goto out_clear;
928		- }
929		- if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
930		- rc = -EPROTONOSUPPORT;
931		- goto out_clear;
932		- }
933		-
934		- req = efx->rps_slot + slot_idx;
935		- efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
936		- efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
937		- rxq_index);
938		- req->spec.match_flags =
939		- EFX_FILTER_MATCH_ETHER_TYPE \| EFX_FILTER_MATCH_IP_PROTO \|
940		- EFX_FILTER_MATCH_LOC_HOST \| EFX_FILTER_MATCH_LOC_PORT \|
941		- EFX_FILTER_MATCH_REM_HOST \| EFX_FILTER_MATCH_REM_PORT;
942		- req->spec.ether_type = fk.basic.n_proto;
943		- req->spec.ip_proto = fk.basic.ip_proto;
944		-
945		- if (fk.basic.n_proto == htons(ETH_P_IP)) {
946		- req->spec.rem_host[0] = fk.addrs.v4addrs.src;
947		- req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
948		- } else {
949		- memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
950		- sizeof(struct in6_addr));
951		- memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
952		- sizeof(struct in6_addr));
953		- }
954		-
955		- req->spec.rem_port = fk.ports.src;
956		- req->spec.loc_port = fk.ports.dst;
957		-
958		- if (efx->rps_hash_table) {
959		- /* Add it to ARFS hash table */
960		- spin_lock(&efx->rps_hash_lock);
961		- rule = efx_rps_hash_add(efx, &req->spec, &new);
962		- if (!rule) {
963		- rc = -ENOMEM;
964		- goto out_unlock;
965		- }
966		- if (new)
967		- rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
968		- rc = rule->arfs_id;
969		- /* Skip if existing or pending filter already does the right thing */
970		- if (!new && rule->rxq_index == rxq_index &&
971		- rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
972		- goto out_unlock;
973		- rule->rxq_index = rxq_index;
974		- rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
975		- spin_unlock(&efx->rps_hash_lock);
976		- } else {
977		- /* Without an ARFS hash table, we just use arfs_id 0 for all
978		- * filters. This means if multiple flows hash to the same
979		- * flow_id, all but the most recently touched will be eligible
980		- * for expiry.
981		- */
982		- rc = 0;
983		- }
984		-
985		- /* Queue the request */
986		- dev_hold(req->net_dev = net_dev);
987		- INIT_WORK(&req->work, efx_filter_rfs_work);
988		- req->rxq_index = rxq_index;
989		- req->flow_id = flow_id;
990		- schedule_work(&req->work);
991		- return rc;
992		-out_unlock:
993		- spin_unlock(&efx->rps_hash_lock);
994		-out_clear:
995		- clear_bit(slot_idx, &efx->rps_slot_map);
996		- return rc;
997		-}
998		-
999		-bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
1000		-{
1001		- bool (expire_one)(struct efx_nic efx, u32 flow_id, unsigned int index);
1002		- unsigned int channel_idx, index, size;
1003		- u32 flow_id;
1004		-
1005		- if (!mutex_trylock(&efx->rps_mutex))
1006		- return false;
1007		- expire_one = efx->type->filter_rfs_expire_one;
1008		- channel_idx = efx->rps_expire_channel;
1009		- index = efx->rps_expire_index;
1010		- size = efx->type->max_rx_ip_filters;
1011		- while (quota--) {
1012		- struct efx_channel *channel = efx_get_channel(efx, channel_idx);
1013		- flow_id = channel->rps_flow_id[index];
1014		-
1015		- if (flow_id != RPS_FLOW_ID_INVALID &&
1016		- expire_one(efx, flow_id, index)) {
1017		- netif_info(efx, rx_status, efx->net_dev,
1018		- "expired filter %d [queue %u flow %u]\n",
1019		- index, channel_idx, flow_id);
1020		- channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
1021		- }
1022		- if (++index == size) {
1023		- if (++channel_idx == efx->n_channels)
1024		- channel_idx = 0;
1025		- index = 0;
1026		- }
1027		- }
1028		- efx->rps_expire_channel = channel_idx;
1029		- efx->rps_expire_index = index;
1030		-
1031		- mutex_unlock(&efx->rps_mutex);
1032		- return true;
1033		-}
1034		-
1035		-#endif /* CONFIG_RFS_ACCEL */
1036		-
1037		-/**
1038		- * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
1039		- * @spec: Specification to test
1040		- *
1041		- * Return: %true if the specification is a non-drop RX filter that
1042		- * matches a local MAC address I/G bit value of 1 or matches a local
1043		- * IPv4 or IPv6 address value in the respective multicast address
1044		- * range. Otherwise %false.
1045		- */
1046		-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
1047		-{
1048		- if (!(spec->flags & EFX_FILTER_FLAG_RX) \|\|
1049		- spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
1050		- return false;
1051		-
1052		- if (spec->match_flags &
1053		- (EFX_FILTER_MATCH_LOC_MAC \| EFX_FILTER_MATCH_LOC_MAC_IG) &&
1054		- is_multicast_ether_addr(spec->loc_mac))
1055		- return true;
1056		-
1057		- if ((spec->match_flags &
1058		- (EFX_FILTER_MATCH_ETHER_TYPE \| EFX_FILTER_MATCH_LOC_HOST)) ==
1059		- (EFX_FILTER_MATCH_ETHER_TYPE \| EFX_FILTER_MATCH_LOC_HOST)) {
1060		- if (spec->ether_type == htons(ETH_P_IP) &&
1061		- ipv4_is_multicast(spec->loc_host[0]))
1062		- return true;
1063		- if (spec->ether_type == htons(ETH_P_IPV6) &&
1064		- ((const u8 *)spec->loc_host)[0] == 0xff)
1065		- return true;
1066		- }
1067		-
1068		- return false;
1069	411	}