hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/drivers/net/ethernet/amazon/ena/ena_netdev.c
....@@ -1,33 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
12 /*
2
- * Copyright 2015 Amazon.com, Inc. or its affiliates.
3
- *
4
- * This software is available to you under a choice of one of two
5
- * licenses. You may choose to be licensed under the terms of the GNU
6
- * General Public License (GPL) Version 2, available from the file
7
- * COPYING in the main directory of this source tree, or the
8
- * BSD license below:
9
- *
10
- * Redistribution and use in source and binary forms, with or
11
- * without modification, are permitted provided that the following
12
- * conditions are met:
13
- *
14
- * - Redistributions of source code must retain the above
15
- * copyright notice, this list of conditions and the following
16
- * disclaimer.
17
- *
18
- * - Redistributions in binary form must reproduce the above
19
- * copyright notice, this list of conditions and the following
20
- * disclaimer in the documentation and/or other materials
21
- * provided with the distribution.
22
- *
23
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
- * SOFTWARE.
3
+ * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
314 */
325
336 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -36,10 +9,8 @@
369 #include <linux/cpu_rmap.h>
3710 #endif /* CONFIG_RFS_ACCEL */
3811 #include <linux/ethtool.h>
39
-#include <linux/if_vlan.h>
4012 #include <linux/kernel.h>
4113 #include <linux/module.h>
42
-#include <linux/moduleparam.h>
4314 #include <linux/numa.h>
4415 #include <linux/pci.h>
4516 #include <linux/utsname.h>
....@@ -48,14 +19,12 @@
4819 #include <net/ip.h>
4920
5021 #include "ena_netdev.h"
22
+#include <linux/bpf_trace.h>
5123 #include "ena_pci_id_tbl.h"
52
-
53
-static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
5424
5525 MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
5626 MODULE_DESCRIPTION(DEVICE_NAME);
5727 MODULE_LICENSE("GPL");
58
-MODULE_VERSION(DRV_MODULE_VERSION);
5928
6029 /* Time in jiffies before concluding the transmitter is hung. */
6130 #define TX_TIMEOUT (5 * HZ)
....@@ -79,7 +48,37 @@
7948 static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
8049 static int ena_restore_device(struct ena_adapter *adapter);
8150
82
-static void ena_tx_timeout(struct net_device *dev)
51
+static void ena_init_io_rings(struct ena_adapter *adapter,
52
+ int first_index, int count);
53
+static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
54
+ int count);
55
+static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
56
+ int count);
57
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
58
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
59
+ int first_index,
60
+ int count);
61
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
62
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
63
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
64
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
65
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
66
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
67
+ int first_index, int count);
68
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
69
+ int first_index, int count);
70
+static int ena_up(struct ena_adapter *adapter);
71
+static void ena_down(struct ena_adapter *adapter);
72
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
73
+ struct ena_ring *rx_ring);
74
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
75
+ struct ena_ring *rx_ring);
76
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
77
+ struct ena_tx_buffer *tx_info);
78
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
79
+ int first_index, int count);
80
+
81
+static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
8382 {
8483 struct ena_adapter *adapter = netdev_priv(dev);
8584
....@@ -102,7 +101,7 @@
102101 {
103102 int i;
104103
105
- for (i = 0; i < adapter->num_queues; i++)
104
+ for (i = 0; i < adapter->num_io_queues; i++)
106105 adapter->rx_ring[i].mtu = mtu;
107106 }
108107
....@@ -113,7 +112,7 @@
113112
114113 ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
115114 if (!ret) {
116
- netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
115
+ netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
117116 update_rx_ring_mtu(adapter, new_mtu);
118117 dev->mtu = new_mtu;
119118 } else {
....@@ -124,16 +123,466 @@
124123 return ret;
125124 }
126125
126
+static int ena_xmit_common(struct net_device *dev,
127
+ struct ena_ring *ring,
128
+ struct ena_tx_buffer *tx_info,
129
+ struct ena_com_tx_ctx *ena_tx_ctx,
130
+ u16 next_to_use,
131
+ u32 bytes)
132
+{
133
+ struct ena_adapter *adapter = netdev_priv(dev);
134
+ int rc, nb_hw_desc;
135
+
136
+ if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
137
+ ena_tx_ctx))) {
138
+ netif_dbg(adapter, tx_queued, dev,
139
+ "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
140
+ ring->qid);
141
+ ena_com_write_sq_doorbell(ring->ena_com_io_sq);
142
+ }
143
+
144
+ /* prepare the packet's descriptors to dma engine */
145
+ rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
146
+ &nb_hw_desc);
147
+
148
+ /* In case there isn't enough space in the queue for the packet,
149
+ * we simply drop it. All other failure reasons of
150
+ * ena_com_prepare_tx() are fatal and therefore require a device reset.
151
+ */
152
+ if (unlikely(rc)) {
153
+ netif_err(adapter, tx_queued, dev,
154
+ "Failed to prepare tx bufs\n");
155
+ u64_stats_update_begin(&ring->syncp);
156
+ ring->tx_stats.prepare_ctx_err++;
157
+ u64_stats_update_end(&ring->syncp);
158
+ if (rc != -ENOMEM) {
159
+ adapter->reset_reason =
160
+ ENA_REGS_RESET_DRIVER_INVALID_STATE;
161
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
162
+ }
163
+ return rc;
164
+ }
165
+
166
+ u64_stats_update_begin(&ring->syncp);
167
+ ring->tx_stats.cnt++;
168
+ ring->tx_stats.bytes += bytes;
169
+ u64_stats_update_end(&ring->syncp);
170
+
171
+ tx_info->tx_descs = nb_hw_desc;
172
+ tx_info->last_jiffies = jiffies;
173
+ tx_info->print_once = 0;
174
+
175
+ ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
176
+ ring->ring_size);
177
+ return 0;
178
+}
179
+
180
+/* This is the XDP napi callback. XDP queues use a separate napi callback
181
+ * than Rx/Tx queues.
182
+ */
183
+static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
184
+{
185
+ struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
186
+ u32 xdp_work_done, xdp_budget;
187
+ struct ena_ring *xdp_ring;
188
+ int napi_comp_call = 0;
189
+ int ret;
190
+
191
+ xdp_ring = ena_napi->xdp_ring;
192
+ xdp_ring->first_interrupt = ena_napi->first_interrupt;
193
+
194
+ xdp_budget = budget;
195
+
196
+ if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
197
+ test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
198
+ napi_complete_done(napi, 0);
199
+ return 0;
200
+ }
201
+
202
+ xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
203
+
204
+ /* If the device is about to reset or down, avoid unmask
205
+ * the interrupt and return 0 so NAPI won't reschedule
206
+ */
207
+ if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
208
+ napi_complete_done(napi, 0);
209
+ ret = 0;
210
+ } else if (xdp_budget > xdp_work_done) {
211
+ napi_comp_call = 1;
212
+ if (napi_complete_done(napi, xdp_work_done))
213
+ ena_unmask_interrupt(xdp_ring, NULL);
214
+ ena_update_ring_numa_node(xdp_ring, NULL);
215
+ ret = xdp_work_done;
216
+ } else {
217
+ ret = xdp_budget;
218
+ }
219
+
220
+ u64_stats_update_begin(&xdp_ring->syncp);
221
+ xdp_ring->tx_stats.napi_comp += napi_comp_call;
222
+ xdp_ring->tx_stats.tx_poll++;
223
+ u64_stats_update_end(&xdp_ring->syncp);
224
+
225
+ return ret;
226
+}
227
+
228
+static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring,
229
+ struct ena_tx_buffer *tx_info,
230
+ struct xdp_buff *xdp,
231
+ void **push_hdr,
232
+ u32 *push_len)
233
+{
234
+ struct ena_adapter *adapter = xdp_ring->adapter;
235
+ struct ena_com_buf *ena_buf;
236
+ dma_addr_t dma = 0;
237
+ u32 size;
238
+
239
+ tx_info->xdpf = xdp_convert_buff_to_frame(xdp);
240
+ size = tx_info->xdpf->len;
241
+ ena_buf = tx_info->bufs;
242
+
243
+ /* llq push buffer */
244
+ *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
245
+ *push_hdr = tx_info->xdpf->data;
246
+
247
+ if (size - *push_len > 0) {
248
+ dma = dma_map_single(xdp_ring->dev,
249
+ *push_hdr + *push_len,
250
+ size - *push_len,
251
+ DMA_TO_DEVICE);
252
+ if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
253
+ goto error_report_dma_error;
254
+
255
+ tx_info->map_linear_data = 1;
256
+ tx_info->num_of_bufs = 1;
257
+ }
258
+
259
+ ena_buf->paddr = dma;
260
+ ena_buf->len = size;
261
+
262
+ return 0;
263
+
264
+error_report_dma_error:
265
+ u64_stats_update_begin(&xdp_ring->syncp);
266
+ xdp_ring->tx_stats.dma_mapping_err++;
267
+ u64_stats_update_end(&xdp_ring->syncp);
268
+ netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
269
+
270
+ xdp_return_frame_rx_napi(tx_info->xdpf);
271
+ tx_info->xdpf = NULL;
272
+ tx_info->num_of_bufs = 0;
273
+
274
+ return -EINVAL;
275
+}
276
+
277
+static int ena_xdp_xmit_buff(struct net_device *dev,
278
+ struct xdp_buff *xdp,
279
+ int qid,
280
+ struct ena_rx_buffer *rx_info)
281
+{
282
+ struct ena_adapter *adapter = netdev_priv(dev);
283
+ struct ena_com_tx_ctx ena_tx_ctx = {};
284
+ struct ena_tx_buffer *tx_info;
285
+ struct ena_ring *xdp_ring;
286
+ u16 next_to_use, req_id;
287
+ int rc;
288
+ void *push_hdr;
289
+ u32 push_len;
290
+
291
+ xdp_ring = &adapter->tx_ring[qid];
292
+ next_to_use = xdp_ring->next_to_use;
293
+ req_id = xdp_ring->free_ids[next_to_use];
294
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
295
+ tx_info->num_of_bufs = 0;
296
+ page_ref_inc(rx_info->page);
297
+ tx_info->xdp_rx_page = rx_info->page;
298
+
299
+ rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len);
300
+ if (unlikely(rc))
301
+ goto error_drop_packet;
302
+
303
+ ena_tx_ctx.ena_bufs = tx_info->bufs;
304
+ ena_tx_ctx.push_header = push_hdr;
305
+ ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
306
+ ena_tx_ctx.req_id = req_id;
307
+ ena_tx_ctx.header_len = push_len;
308
+
309
+ rc = ena_xmit_common(dev,
310
+ xdp_ring,
311
+ tx_info,
312
+ &ena_tx_ctx,
313
+ next_to_use,
314
+ xdp->data_end - xdp->data);
315
+ if (rc)
316
+ goto error_unmap_dma;
317
+ /* trigger the dma engine. ena_com_write_sq_doorbell()
318
+ * has a mb
319
+ */
320
+ ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
321
+ u64_stats_update_begin(&xdp_ring->syncp);
322
+ xdp_ring->tx_stats.doorbells++;
323
+ u64_stats_update_end(&xdp_ring->syncp);
324
+
325
+ return NETDEV_TX_OK;
326
+
327
+error_unmap_dma:
328
+ ena_unmap_tx_buff(xdp_ring, tx_info);
329
+ tx_info->xdpf = NULL;
330
+error_drop_packet:
331
+ __free_page(tx_info->xdp_rx_page);
332
+ return NETDEV_TX_OK;
333
+}
334
+
335
+static int ena_xdp_execute(struct ena_ring *rx_ring,
336
+ struct xdp_buff *xdp,
337
+ struct ena_rx_buffer *rx_info)
338
+{
339
+ struct bpf_prog *xdp_prog;
340
+ u32 verdict = XDP_PASS;
341
+ u64 *xdp_stat;
342
+
343
+ rcu_read_lock();
344
+ xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
345
+
346
+ if (!xdp_prog)
347
+ goto out;
348
+
349
+ verdict = bpf_prog_run_xdp(xdp_prog, xdp);
350
+
351
+ if (verdict == XDP_TX) {
352
+ ena_xdp_xmit_buff(rx_ring->netdev,
353
+ xdp,
354
+ rx_ring->qid + rx_ring->adapter->num_io_queues,
355
+ rx_info);
356
+
357
+ xdp_stat = &rx_ring->rx_stats.xdp_tx;
358
+ } else if (unlikely(verdict == XDP_ABORTED)) {
359
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
360
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
361
+ } else if (unlikely(verdict == XDP_DROP)) {
362
+ xdp_stat = &rx_ring->rx_stats.xdp_drop;
363
+ } else if (unlikely(verdict == XDP_PASS)) {
364
+ xdp_stat = &rx_ring->rx_stats.xdp_pass;
365
+ } else {
366
+ bpf_warn_invalid_xdp_action(verdict);
367
+ xdp_stat = &rx_ring->rx_stats.xdp_invalid;
368
+ }
369
+
370
+ u64_stats_update_begin(&rx_ring->syncp);
371
+ (*xdp_stat)++;
372
+ u64_stats_update_end(&rx_ring->syncp);
373
+out:
374
+ rcu_read_unlock();
375
+
376
+ return verdict;
377
+}
378
+
379
+static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
380
+{
381
+ adapter->xdp_first_ring = adapter->num_io_queues;
382
+ adapter->xdp_num_queues = adapter->num_io_queues;
383
+
384
+ ena_init_io_rings(adapter,
385
+ adapter->xdp_first_ring,
386
+ adapter->xdp_num_queues);
387
+}
388
+
389
+static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
390
+{
391
+ int rc = 0;
392
+
393
+ rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
394
+ adapter->xdp_num_queues);
395
+ if (rc)
396
+ goto setup_err;
397
+
398
+ rc = ena_create_io_tx_queues_in_range(adapter,
399
+ adapter->xdp_first_ring,
400
+ adapter->xdp_num_queues);
401
+ if (rc)
402
+ goto create_err;
403
+
404
+ return 0;
405
+
406
+create_err:
407
+ ena_free_all_io_tx_resources(adapter);
408
+setup_err:
409
+ return rc;
410
+}
411
+
412
+/* Provides a way for both kernel and bpf-prog to know
413
+ * more about the RX-queue a given XDP frame arrived on.
414
+ */
415
+static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
416
+{
417
+ int rc;
418
+
419
+ rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid);
420
+
421
+ if (rc) {
422
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
423
+ "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
424
+ rx_ring->qid, rc);
425
+ goto err;
426
+ }
427
+
428
+ rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
429
+ NULL);
430
+
431
+ if (rc) {
432
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
433
+ "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
434
+ rx_ring->qid, rc);
435
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
436
+ }
437
+
438
+err:
439
+ return rc;
440
+}
441
+
442
+static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
443
+{
444
+ xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
445
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
446
+}
447
+
448
+static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
449
+ struct bpf_prog *prog,
450
+ int first, int count)
451
+{
452
+ struct ena_ring *rx_ring;
453
+ int i = 0;
454
+
455
+ for (i = first; i < count; i++) {
456
+ rx_ring = &adapter->rx_ring[i];
457
+ xchg(&rx_ring->xdp_bpf_prog, prog);
458
+ if (prog) {
459
+ ena_xdp_register_rxq_info(rx_ring);
460
+ rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
461
+ } else {
462
+ ena_xdp_unregister_rxq_info(rx_ring);
463
+ rx_ring->rx_headroom = 0;
464
+ }
465
+ }
466
+}
467
+
468
+static void ena_xdp_exchange_program(struct ena_adapter *adapter,
469
+ struct bpf_prog *prog)
470
+{
471
+ struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
472
+
473
+ ena_xdp_exchange_program_rx_in_range(adapter,
474
+ prog,
475
+ 0,
476
+ adapter->num_io_queues);
477
+
478
+ if (old_bpf_prog)
479
+ bpf_prog_put(old_bpf_prog);
480
+}
481
+
482
+static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
483
+{
484
+ bool was_up;
485
+ int rc;
486
+
487
+ was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
488
+
489
+ if (was_up)
490
+ ena_down(adapter);
491
+
492
+ adapter->xdp_first_ring = 0;
493
+ adapter->xdp_num_queues = 0;
494
+ ena_xdp_exchange_program(adapter, NULL);
495
+ if (was_up) {
496
+ rc = ena_up(adapter);
497
+ if (rc)
498
+ return rc;
499
+ }
500
+ return 0;
501
+}
502
+
503
+static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
504
+{
505
+ struct ena_adapter *adapter = netdev_priv(netdev);
506
+ struct bpf_prog *prog = bpf->prog;
507
+ struct bpf_prog *old_bpf_prog;
508
+ int rc, prev_mtu;
509
+ bool is_up;
510
+
511
+ is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
512
+ rc = ena_xdp_allowed(adapter);
513
+ if (rc == ENA_XDP_ALLOWED) {
514
+ old_bpf_prog = adapter->xdp_bpf_prog;
515
+ if (prog) {
516
+ if (!is_up) {
517
+ ena_init_all_xdp_queues(adapter);
518
+ } else if (!old_bpf_prog) {
519
+ ena_down(adapter);
520
+ ena_init_all_xdp_queues(adapter);
521
+ }
522
+ ena_xdp_exchange_program(adapter, prog);
523
+
524
+ if (is_up && !old_bpf_prog) {
525
+ rc = ena_up(adapter);
526
+ if (rc)
527
+ return rc;
528
+ }
529
+ } else if (old_bpf_prog) {
530
+ rc = ena_destroy_and_free_all_xdp_queues(adapter);
531
+ if (rc)
532
+ return rc;
533
+ }
534
+
535
+ prev_mtu = netdev->max_mtu;
536
+ netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
537
+
538
+ if (!old_bpf_prog)
539
+ netif_info(adapter, drv, adapter->netdev,
540
+ "XDP program is set, changing the max_mtu from %d to %d",
541
+ prev_mtu, netdev->max_mtu);
542
+
543
+ } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
544
+ netif_err(adapter, drv, adapter->netdev,
545
+ "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
546
+ netdev->mtu, ENA_XDP_MAX_MTU);
547
+ NL_SET_ERR_MSG_MOD(bpf->extack,
548
+ "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
549
+ return -EINVAL;
550
+ } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
551
+ netif_err(adapter, drv, adapter->netdev,
552
+ "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
553
+ adapter->num_io_queues, adapter->max_num_io_queues);
554
+ NL_SET_ERR_MSG_MOD(bpf->extack,
555
+ "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
556
+ return -EINVAL;
557
+ }
558
+
559
+ return 0;
560
+}
561
+
562
+/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
563
+ * program as well as to query the current xdp program id.
564
+ */
565
+static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
566
+{
567
+ switch (bpf->command) {
568
+ case XDP_SETUP_PROG:
569
+ return ena_xdp_set(netdev, bpf);
570
+ default:
571
+ return -EINVAL;
572
+ }
573
+ return 0;
574
+}
575
+
127576 static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
128577 {
129578 #ifdef CONFIG_RFS_ACCEL
130579 u32 i;
131580 int rc;
132581
133
- adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
582
+ adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
134583 if (!adapter->netdev->rx_cpu_rmap)
135584 return -ENOMEM;
136
- for (i = 0; i < adapter->num_queues; i++) {
585
+ for (i = 0; i < adapter->num_io_queues; i++) {
137586 int irq_idx = ENA_IO_IRQ_IDX(i);
138587
139588 rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
....@@ -159,14 +608,14 @@
159608 ring->adapter = adapter;
160609 ring->ena_dev = adapter->ena_dev;
161610 ring->per_napi_packets = 0;
162
- ring->per_napi_bytes = 0;
163611 ring->cpu = 0;
164612 ring->first_interrupt = false;
165613 ring->no_interrupt_event_cnt = 0;
166614 u64_stats_init(&ring->syncp);
167615 }
168616
169
-static void ena_init_io_rings(struct ena_adapter *adapter)
617
+static void ena_init_io_rings(struct ena_adapter *adapter,
618
+ int first_index, int count)
170619 {
171620 struct ena_com_dev *ena_dev;
172621 struct ena_ring *txr, *rxr;
....@@ -174,29 +623,36 @@
174623
175624 ena_dev = adapter->ena_dev;
176625
177
- for (i = 0; i < adapter->num_queues; i++) {
626
+ for (i = first_index; i < first_index + count; i++) {
178627 txr = &adapter->tx_ring[i];
179628 rxr = &adapter->rx_ring[i];
180629
181
- /* TX/RX common ring state */
630
+ /* TX common ring state */
182631 ena_init_io_rings_common(adapter, txr, i);
183
- ena_init_io_rings_common(adapter, rxr, i);
184632
185633 /* TX specific ring state */
186
- txr->ring_size = adapter->tx_ring_size;
634
+ txr->ring_size = adapter->requested_tx_ring_size;
187635 txr->tx_max_header_size = ena_dev->tx_max_header_size;
188636 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
189637 txr->sgl_size = adapter->max_tx_sgl_size;
190638 txr->smoothed_interval =
191639 ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
640
+ txr->disable_meta_caching = adapter->disable_meta_caching;
192641
193
- /* RX specific ring state */
194
- rxr->ring_size = adapter->rx_ring_size;
195
- rxr->rx_copybreak = adapter->rx_copybreak;
196
- rxr->sgl_size = adapter->max_rx_sgl_size;
197
- rxr->smoothed_interval =
198
- ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
199
- rxr->empty_rx_queue = 0;
642
+ /* Don't init RX queues for xdp queues */
643
+ if (!ENA_IS_XDP_INDEX(adapter, i)) {
644
+ /* RX common ring state */
645
+ ena_init_io_rings_common(adapter, rxr, i);
646
+
647
+ /* RX specific ring state */
648
+ rxr->ring_size = adapter->requested_rx_ring_size;
649
+ rxr->rx_copybreak = adapter->rx_copybreak;
650
+ rxr->sgl_size = adapter->max_rx_sgl_size;
651
+ rxr->smoothed_interval =
652
+ ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
653
+ rxr->empty_rx_queue = 0;
654
+ adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
655
+ }
200656 }
201657 }
202658
....@@ -225,22 +681,28 @@
225681 if (!tx_ring->tx_buffer_info) {
226682 tx_ring->tx_buffer_info = vzalloc(size);
227683 if (!tx_ring->tx_buffer_info)
228
- return -ENOMEM;
684
+ goto err_tx_buffer_info;
229685 }
230686
231687 size = sizeof(u16) * tx_ring->ring_size;
232
- tx_ring->free_tx_ids = vzalloc_node(size, node);
233
- if (!tx_ring->free_tx_ids) {
234
- tx_ring->free_tx_ids = vzalloc(size);
235
- if (!tx_ring->free_tx_ids) {
236
- vfree(tx_ring->tx_buffer_info);
237
- return -ENOMEM;
238
- }
688
+ tx_ring->free_ids = vzalloc_node(size, node);
689
+ if (!tx_ring->free_ids) {
690
+ tx_ring->free_ids = vzalloc(size);
691
+ if (!tx_ring->free_ids)
692
+ goto err_tx_free_ids;
693
+ }
694
+
695
+ size = tx_ring->tx_max_header_size;
696
+ tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
697
+ if (!tx_ring->push_buf_intermediate_buf) {
698
+ tx_ring->push_buf_intermediate_buf = vzalloc(size);
699
+ if (!tx_ring->push_buf_intermediate_buf)
700
+ goto err_push_buf_intermediate_buf;
239701 }
240702
241703 /* Req id ring for TX out of order completions */
242704 for (i = 0; i < tx_ring->ring_size; i++)
243
- tx_ring->free_tx_ids[i] = i;
705
+ tx_ring->free_ids[i] = i;
244706
245707 /* Reset tx statistics */
246708 memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
....@@ -249,6 +711,15 @@
249711 tx_ring->next_to_clean = 0;
250712 tx_ring->cpu = ena_irq->cpu;
251713 return 0;
714
+
715
+err_push_buf_intermediate_buf:
716
+ vfree(tx_ring->free_ids);
717
+ tx_ring->free_ids = NULL;
718
+err_tx_free_ids:
719
+ vfree(tx_ring->tx_buffer_info);
720
+ tx_ring->tx_buffer_info = NULL;
721
+err_tx_buffer_info:
722
+ return -ENOMEM;
252723 }
253724
254725 /* ena_free_tx_resources - Free I/O Tx Resources per Queue
....@@ -264,20 +735,20 @@
264735 vfree(tx_ring->tx_buffer_info);
265736 tx_ring->tx_buffer_info = NULL;
266737
267
- vfree(tx_ring->free_tx_ids);
268
- tx_ring->free_tx_ids = NULL;
738
+ vfree(tx_ring->free_ids);
739
+ tx_ring->free_ids = NULL;
740
+
741
+ vfree(tx_ring->push_buf_intermediate_buf);
742
+ tx_ring->push_buf_intermediate_buf = NULL;
269743 }
270744
271
-/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
272
- * @adapter: private structure
273
- *
274
- * Return 0 on success, negative on failure
275
- */
276
-static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
745
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
746
+ int first_index,
747
+ int count)
277748 {
278749 int i, rc = 0;
279750
280
- for (i = 0; i < adapter->num_queues; i++) {
751
+ for (i = first_index; i < first_index + count; i++) {
281752 rc = ena_setup_tx_resources(adapter, i);
282753 if (rc)
283754 goto err_setup_tx;
....@@ -291,9 +762,18 @@
291762 "Tx queue %d: allocation failed\n", i);
292763
293764 /* rewind the index freeing the rings as we go */
294
- while (i--)
765
+ while (first_index < i--)
295766 ena_free_tx_resources(adapter, i);
296767 return rc;
768
+}
769
+
770
+static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
771
+ int first_index, int count)
772
+{
773
+ int i;
774
+
775
+ for (i = first_index; i < first_index + count; i++)
776
+ ena_free_tx_resources(adapter, i);
297777 }
298778
299779 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
....@@ -303,28 +783,10 @@
303783 */
304784 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
305785 {
306
- int i;
307
-
308
- for (i = 0; i < adapter->num_queues; i++)
309
- ena_free_tx_resources(adapter, i);
310
-}
311
-
312
-static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
313
-{
314
- if (likely(req_id < rx_ring->ring_size))
315
- return 0;
316
-
317
- netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
318
- "Invalid rx req_id: %hu\n", req_id);
319
-
320
- u64_stats_update_begin(&rx_ring->syncp);
321
- rx_ring->rx_stats.bad_req_id++;
322
- u64_stats_update_end(&rx_ring->syncp);
323
-
324
- /* Trigger device reset */
325
- rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
326
- set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
327
- return -EFAULT;
786
+ ena_free_all_io_tx_resources_in_range(adapter,
787
+ 0,
788
+ adapter->xdp_num_queues +
789
+ adapter->num_io_queues);
328790 }
329791
330792 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
....@@ -360,18 +822,19 @@
360822 }
361823
362824 size = sizeof(u16) * rx_ring->ring_size;
363
- rx_ring->free_rx_ids = vzalloc_node(size, node);
364
- if (!rx_ring->free_rx_ids) {
365
- rx_ring->free_rx_ids = vzalloc(size);
366
- if (!rx_ring->free_rx_ids) {
825
+ rx_ring->free_ids = vzalloc_node(size, node);
826
+ if (!rx_ring->free_ids) {
827
+ rx_ring->free_ids = vzalloc(size);
828
+ if (!rx_ring->free_ids) {
367829 vfree(rx_ring->rx_buffer_info);
830
+ rx_ring->rx_buffer_info = NULL;
368831 return -ENOMEM;
369832 }
370833 }
371834
372835 /* Req id ring for receiving RX pkts out of order */
373836 for (i = 0; i < rx_ring->ring_size; i++)
374
- rx_ring->free_rx_ids[i] = i;
837
+ rx_ring->free_ids[i] = i;
375838
376839 /* Reset rx statistics */
377840 memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
....@@ -397,8 +860,8 @@
397860 vfree(rx_ring->rx_buffer_info);
398861 rx_ring->rx_buffer_info = NULL;
399862
400
- vfree(rx_ring->free_rx_ids);
401
- rx_ring->free_rx_ids = NULL;
863
+ vfree(rx_ring->free_ids);
864
+ rx_ring->free_ids = NULL;
402865 }
403866
404867 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
....@@ -410,7 +873,7 @@
410873 {
411874 int i, rc = 0;
412875
413
- for (i = 0; i < adapter->num_queues; i++) {
876
+ for (i = 0; i < adapter->num_io_queues; i++) {
414877 rc = ena_setup_rx_resources(adapter, i);
415878 if (rc)
416879 goto err_setup_rx;
....@@ -438,16 +901,20 @@
438901 {
439902 int i;
440903
441
- for (i = 0; i < adapter->num_queues; i++)
904
+ for (i = 0; i < adapter->num_io_queues; i++)
442905 ena_free_rx_resources(adapter, i);
443906 }
444907
445
-static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
908
+static int ena_alloc_rx_page(struct ena_ring *rx_ring,
446909 struct ena_rx_buffer *rx_info, gfp_t gfp)
447910 {
911
+ int headroom = rx_ring->rx_headroom;
448912 struct ena_com_buf *ena_buf;
449913 struct page *page;
450914 dma_addr_t dma;
915
+
916
+ /* restore page offset value in case it has been changed by device */
917
+ rx_info->page_offset = headroom;
451918
452919 /* if previous allocated page is not used */
453920 if (unlikely(rx_info->page))
....@@ -461,8 +928,11 @@
461928 return -ENOMEM;
462929 }
463930
931
+ /* To enable NIC-side port-mirroring, AKA SPAN port,
932
+ * we make the buffer readable from the nic as well
933
+ */
464934 dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
465
- DMA_FROM_DEVICE);
935
+ DMA_BIDIRECTIONAL);
466936 if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
467937 u64_stats_update_begin(&rx_ring->syncp);
468938 rx_ring->rx_stats.dma_mapping_err++;
....@@ -472,13 +942,12 @@
472942 return -EIO;
473943 }
474944 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
475
- "alloc page %p, rx_info %p\n", page, rx_info);
945
+ "Allocate page %p, rx_info %p\n", page, rx_info);
476946
477947 rx_info->page = page;
478
- rx_info->page_offset = 0;
479948 ena_buf = &rx_info->ena_buf;
480
- ena_buf->paddr = dma;
481
- ena_buf->len = ENA_PAGE_SIZE;
949
+ ena_buf->paddr = dma + headroom;
950
+ ena_buf->len = ENA_PAGE_SIZE - headroom;
482951
483952 return 0;
484953 }
....@@ -495,8 +964,9 @@
495964 return;
496965 }
497966
498
- dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE,
499
- DMA_FROM_DEVICE);
967
+ dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom,
968
+ ENA_PAGE_SIZE,
969
+ DMA_BIDIRECTIONAL);
500970
501971 __free_page(page);
502972 rx_info->page = NULL;
....@@ -513,19 +983,15 @@
513983 for (i = 0; i < num; i++) {
514984 struct ena_rx_buffer *rx_info;
515985
516
- req_id = rx_ring->free_rx_ids[next_to_use];
517
- rc = validate_rx_req_id(rx_ring, req_id);
518
- if (unlikely(rc < 0))
519
- break;
986
+ req_id = rx_ring->free_ids[next_to_use];
520987
521988 rx_info = &rx_ring->rx_buffer_info[req_id];
522
-
523989
524990 rc = ena_alloc_rx_page(rx_ring, rx_info,
525991 GFP_ATOMIC | __GFP_COMP);
526992 if (unlikely(rc < 0)) {
527993 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
528
- "failed to alloc buffer for rx queue %d\n",
994
+ "Failed to allocate buffer for rx queue %d\n",
529995 rx_ring->qid);
530996 break;
531997 }
....@@ -534,7 +1000,7 @@
5341000 req_id);
5351001 if (unlikely(rc)) {
5361002 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
537
- "failed to add buffer for rx queue %d\n",
1003
+ "Failed to add buffer for rx queue %d\n",
5381004 rx_ring->qid);
5391005 break;
5401006 }
....@@ -546,9 +1012,9 @@
5461012 u64_stats_update_begin(&rx_ring->syncp);
5471013 rx_ring->rx_stats.refil_partial++;
5481014 u64_stats_update_end(&rx_ring->syncp);
549
- netdev_warn(rx_ring->netdev,
550
- "refilled rx qid %d with only %d buffers (from %d)\n",
551
- rx_ring->qid, i, num);
1015
+ netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
1016
+ "Refilled rx qid %d with only %d buffers (from %d)\n",
1017
+ rx_ring->qid, i, num);
5521018 }
5531019
5541020 /* ena_com_write_sq_doorbell issues a wmb() */
....@@ -576,21 +1042,20 @@
5761042
5771043 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers
5781044 * @adapter: board private structure
579
- *
5801045 */
5811046 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
5821047 {
5831048 struct ena_ring *rx_ring;
5841049 int i, rc, bufs_num;
5851050
586
- for (i = 0; i < adapter->num_queues; i++) {
1051
+ for (i = 0; i < adapter->num_io_queues; i++) {
5871052 rx_ring = &adapter->rx_ring[i];
5881053 bufs_num = rx_ring->ring_size - 1;
5891054 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
5901055
5911056 if (unlikely(rc != bufs_num))
5921057 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
593
- "refilling Queue %d failed. allocated %d buffers from: %d\n",
1058
+ "Refilling Queue %d failed. allocated %d buffers from: %d\n",
5941059 i, rc, bufs_num);
5951060 }
5961061 }
....@@ -599,8 +1064,38 @@
5991064 {
6001065 int i;
6011066
602
- for (i = 0; i < adapter->num_queues; i++)
1067
+ for (i = 0; i < adapter->num_io_queues; i++)
6031068 ena_free_rx_bufs(adapter, i);
1069
+}
1070
+
1071
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
1072
+ struct ena_tx_buffer *tx_info)
1073
+{
1074
+ struct ena_com_buf *ena_buf;
1075
+ u32 cnt;
1076
+ int i;
1077
+
1078
+ ena_buf = tx_info->bufs;
1079
+ cnt = tx_info->num_of_bufs;
1080
+
1081
+ if (unlikely(!cnt))
1082
+ return;
1083
+
1084
+ if (tx_info->map_linear_data) {
1085
+ dma_unmap_single(tx_ring->dev,
1086
+ dma_unmap_addr(ena_buf, paddr),
1087
+ dma_unmap_len(ena_buf, len),
1088
+ DMA_TO_DEVICE);
1089
+ ena_buf++;
1090
+ cnt--;
1091
+ }
1092
+
1093
+ /* unmap remaining mapped pages */
1094
+ for (i = 0; i < cnt; i++) {
1095
+ dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
1096
+ dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
1097
+ ena_buf++;
1098
+ }
6041099 }
6051100
6061101 /* ena_free_tx_bufs - Free Tx Buffers per Queue
....@@ -613,39 +1108,22 @@
6131108
6141109 for (i = 0; i < tx_ring->ring_size; i++) {
6151110 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
616
- struct ena_com_buf *ena_buf;
617
- int nr_frags;
618
- int j;
6191111
6201112 if (!tx_info->skb)
6211113 continue;
6221114
6231115 if (print_once) {
624
- netdev_notice(tx_ring->netdev,
625
- "free uncompleted tx skb qid %d idx 0x%x\n",
626
- tx_ring->qid, i);
1116
+ netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
1117
+ "Free uncompleted tx skb qid %d idx 0x%x\n",
1118
+ tx_ring->qid, i);
6271119 print_once = false;
6281120 } else {
629
- netdev_dbg(tx_ring->netdev,
630
- "free uncompleted tx skb qid %d idx 0x%x\n",
631
- tx_ring->qid, i);
1121
+ netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
1122
+ "Free uncompleted tx skb qid %d idx 0x%x\n",
1123
+ tx_ring->qid, i);
6321124 }
6331125
634
- ena_buf = tx_info->bufs;
635
- dma_unmap_single(tx_ring->dev,
636
- ena_buf->paddr,
637
- ena_buf->len,
638
- DMA_TO_DEVICE);
639
-
640
- /* unmap remaining mapped pages */
641
- nr_frags = tx_info->num_of_bufs - 1;
642
- for (j = 0; j < nr_frags; j++) {
643
- ena_buf++;
644
- dma_unmap_page(tx_ring->dev,
645
- ena_buf->paddr,
646
- ena_buf->len,
647
- DMA_TO_DEVICE);
648
- }
1126
+ ena_unmap_tx_buff(tx_ring, tx_info);
6491127
6501128 dev_kfree_skb_any(tx_info->skb);
6511129 }
....@@ -658,7 +1136,7 @@
6581136 struct ena_ring *tx_ring;
6591137 int i;
6601138
661
- for (i = 0; i < adapter->num_queues; i++) {
1139
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
6621140 tx_ring = &adapter->tx_ring[i];
6631141 ena_free_tx_bufs(tx_ring);
6641142 }
....@@ -669,7 +1147,7 @@
6691147 u16 ena_qid;
6701148 int i;
6711149
672
- for (i = 0; i < adapter->num_queues; i++) {
1150
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
6731151 ena_qid = ENA_IO_TXQ_IDX(i);
6741152 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
6751153 }
....@@ -680,8 +1158,9 @@
6801158 u16 ena_qid;
6811159 int i;
6821160
683
- for (i = 0; i < adapter->num_queues; i++) {
1161
+ for (i = 0; i < adapter->num_io_queues; i++) {
6841162 ena_qid = ENA_IO_RXQ_IDX(i);
1163
+ cancel_work_sync(&adapter->ena_napi[i].dim.work);
6851164 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
6861165 }
6871166 }
....@@ -692,31 +1171,52 @@
6921171 ena_destroy_all_rx_queues(adapter);
6931172 }
6941173
695
-static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
1174
+static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
1175
+ struct ena_tx_buffer *tx_info, bool is_xdp)
6961176 {
697
- struct ena_tx_buffer *tx_info = NULL;
698
-
699
- if (likely(req_id < tx_ring->ring_size)) {
700
- tx_info = &tx_ring->tx_buffer_info[req_id];
701
- if (likely(tx_info->skb))
702
- return 0;
703
- }
704
-
7051177 if (tx_info)
706
- netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
707
- "tx_info doesn't have valid skb\n");
1178
+ netif_err(ring->adapter,
1179
+ tx_done,
1180
+ ring->netdev,
1181
+ "tx_info doesn't have valid %s",
1182
+ is_xdp ? "xdp frame" : "skb");
7081183 else
709
- netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
710
- "Invalid req_id: %hu\n", req_id);
1184
+ netif_err(ring->adapter,
1185
+ tx_done,
1186
+ ring->netdev,
1187
+ "Invalid req_id: %hu\n",
1188
+ req_id);
7111189
712
- u64_stats_update_begin(&tx_ring->syncp);
713
- tx_ring->tx_stats.bad_req_id++;
714
- u64_stats_update_end(&tx_ring->syncp);
1190
+ u64_stats_update_begin(&ring->syncp);
1191
+ ring->tx_stats.bad_req_id++;
1192
+ u64_stats_update_end(&ring->syncp);
7151193
7161194 /* Trigger device reset */
717
- tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
718
- set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
1195
+ ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
1196
+ set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags);
7191197 return -EFAULT;
1198
+}
1199
+
1200
+static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
1201
+{
1202
+ struct ena_tx_buffer *tx_info;
1203
+
1204
+ tx_info = &tx_ring->tx_buffer_info[req_id];
1205
+ if (likely(tx_info->skb))
1206
+ return 0;
1207
+
1208
+ return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
1209
+}
1210
+
1211
+static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
1212
+{
1213
+ struct ena_tx_buffer *tx_info;
1214
+
1215
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
1216
+ if (likely(tx_info->xdpf))
1217
+ return 0;
1218
+
1219
+ return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
7201220 }
7211221
7221222 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
....@@ -736,14 +1236,17 @@
7361236 while (tx_pkts < budget) {
7371237 struct ena_tx_buffer *tx_info;
7381238 struct sk_buff *skb;
739
- struct ena_com_buf *ena_buf;
740
- int i, nr_frags;
7411239
7421240 rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
7431241 &req_id);
744
- if (rc)
1242
+ if (rc) {
1243
+ if (unlikely(rc == -EINVAL))
1244
+ handle_invalid_req_id(tx_ring, req_id, NULL,
1245
+ false);
7451246 break;
1247
+ }
7461248
1249
+ /* validate that the request id points to a valid skb */
7471250 rc = validate_tx_req_id(tx_ring, req_id);
7481251 if (rc)
7491252 break;
....@@ -757,24 +1260,7 @@
7571260 tx_info->skb = NULL;
7581261 tx_info->last_jiffies = 0;
7591262
760
- if (likely(tx_info->num_of_bufs != 0)) {
761
- ena_buf = tx_info->bufs;
762
-
763
- dma_unmap_single(tx_ring->dev,
764
- dma_unmap_addr(ena_buf, paddr),
765
- dma_unmap_len(ena_buf, len),
766
- DMA_TO_DEVICE);
767
-
768
- /* unmap remaining mapped pages */
769
- nr_frags = tx_info->num_of_bufs - 1;
770
- for (i = 0; i < nr_frags; i++) {
771
- ena_buf++;
772
- dma_unmap_page(tx_ring->dev,
773
- dma_unmap_addr(ena_buf, paddr),
774
- dma_unmap_len(ena_buf, len),
775
- DMA_TO_DEVICE);
776
- }
777
- }
1263
+ ena_unmap_tx_buff(tx_ring, tx_info);
7781264
7791265 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
7801266 "tx_poll: q %d skb %p completed\n", tx_ring->qid,
....@@ -785,7 +1271,7 @@
7851271 tx_pkts++;
7861272 total_done += tx_info->tx_descs;
7871273
788
- tx_ring->free_tx_ids[next_to_clean] = req_id;
1274
+ tx_ring->free_ids[next_to_clean] = req_id;
7891275 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
7901276 tx_ring->ring_size);
7911277 }
....@@ -805,13 +1291,15 @@
8051291 */
8061292 smp_mb();
8071293
808
- above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
809
- ENA_TX_WAKEUP_THRESH;
1294
+ above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1295
+ ENA_TX_WAKEUP_THRESH);
8101296 if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
8111297 __netif_tx_lock(txq, smp_processor_id());
812
- above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
813
- ENA_TX_WAKEUP_THRESH;
814
- if (netif_tx_queue_stopped(txq) && above_thresh) {
1298
+ above_thresh =
1299
+ ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1300
+ ENA_TX_WAKEUP_THRESH);
1301
+ if (netif_tx_queue_stopped(txq) && above_thresh &&
1302
+ test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
8151303 netif_tx_wake_queue(txq);
8161304 u64_stats_update_begin(&tx_ring->syncp);
8171305 tx_ring->tx_stats.queue_wakeup++;
....@@ -819,9 +1307,6 @@
8191307 }
8201308 __netif_tx_unlock(txq);
8211309 }
822
-
823
- tx_ring->per_napi_bytes += tx_bytes;
824
- tx_ring->per_napi_packets += tx_pkts;
8251310
8261311 return tx_pkts;
8271312 }
....@@ -860,6 +1345,7 @@
8601345
8611346 len = ena_bufs[buf].len;
8621347 req_id = ena_bufs[buf].req_id;
1348
+
8631349 rx_info = &rx_ring->rx_buffer_info[req_id];
8641350
8651351 if (unlikely(!rx_info->page)) {
....@@ -874,7 +1360,8 @@
8741360
8751361 /* save virt address of first buffer */
8761362 va = page_address(rx_info->page) + rx_info->page_offset;
877
- prefetch(va + NET_IP_ALIGN);
1363
+
1364
+ prefetch(va);
8781365
8791366 if (len <= rx_ring->rx_copybreak) {
8801367 skb = ena_alloc_skb(rx_ring, false);
....@@ -882,7 +1369,7 @@
8821369 return NULL;
8831370
8841371 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
885
- "rx allocated small packet. len %d. data_len %d\n",
1372
+ "RX allocated small packet. len %d. data_len %d\n",
8861373 skb->len, skb->data_len);
8871374
8881375 /* sync this buffer for CPU use */
....@@ -898,7 +1385,7 @@
8981385
8991386 skb_put(skb, len);
9001387 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
901
- rx_ring->free_rx_ids[*next_to_clean] = req_id;
1388
+ rx_ring->free_ids[*next_to_clean] = req_id;
9021389 *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
9031390 rx_ring->ring_size);
9041391 return skb;
....@@ -911,18 +1398,18 @@
9111398 do {
9121399 dma_unmap_page(rx_ring->dev,
9131400 dma_unmap_addr(&rx_info->ena_buf, paddr),
914
- ENA_PAGE_SIZE, DMA_FROM_DEVICE);
1401
+ ENA_PAGE_SIZE, DMA_BIDIRECTIONAL);
9151402
9161403 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
9171404 rx_info->page_offset, len, ENA_PAGE_SIZE);
9181405
9191406 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
920
- "rx skb updated. len %d. data_len %d\n",
1407
+ "RX skb updated. len %d. data_len %d\n",
9211408 skb->len, skb->data_len);
9221409
9231410 rx_info->page = NULL;
9241411
925
- rx_ring->free_rx_ids[*next_to_clean] = req_id;
1412
+ rx_ring->free_ids[*next_to_clean] = req_id;
9261413 *next_to_clean =
9271414 ENA_RX_RING_IDX_NEXT(*next_to_clean,
9281415 rx_ring->ring_size);
....@@ -932,6 +1419,7 @@
9321419 buf++;
9331420 len = ena_bufs[buf].len;
9341421 req_id = ena_bufs[buf].req_id;
1422
+
9351423 rx_info = &rx_ring->rx_buffer_info[req_id];
9361424 } while (1);
9371425
....@@ -943,7 +1431,7 @@
9431431 * @ena_rx_ctx: received packet context/metadata
9441432 * @skb: skb currently being received and modified
9451433 */
946
-static inline void ena_rx_checksum(struct ena_ring *rx_ring,
1434
+static void ena_rx_checksum(struct ena_ring *rx_ring,
9471435 struct ena_com_rx_ctx *ena_rx_ctx,
9481436 struct sk_buff *skb)
9491437 {
....@@ -986,8 +1474,22 @@
9861474 return;
9871475 }
9881476
989
- skb->ip_summed = CHECKSUM_UNNECESSARY;
1477
+ if (likely(ena_rx_ctx->l4_csum_checked)) {
1478
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
1479
+ u64_stats_update_begin(&rx_ring->syncp);
1480
+ rx_ring->rx_stats.csum_good++;
1481
+ u64_stats_update_end(&rx_ring->syncp);
1482
+ } else {
1483
+ u64_stats_update_begin(&rx_ring->syncp);
1484
+ rx_ring->rx_stats.csum_unchecked++;
1485
+ u64_stats_update_end(&rx_ring->syncp);
1486
+ skb->ip_summed = CHECKSUM_NONE;
1487
+ }
1488
+ } else {
1489
+ skb->ip_summed = CHECKSUM_NONE;
1490
+ return;
9901491 }
1492
+
9911493 }
9921494
9931495 static void ena_set_rx_hash(struct ena_ring *rx_ring,
....@@ -1012,6 +1514,32 @@
10121514 }
10131515 }
10141516
1517
+static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
1518
+{
1519
+ struct ena_rx_buffer *rx_info;
1520
+ int ret;
1521
+
1522
+ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1523
+ xdp->data = page_address(rx_info->page) + rx_info->page_offset;
1524
+ xdp_set_data_meta_invalid(xdp);
1525
+ xdp->data_hard_start = page_address(rx_info->page);
1526
+ xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
1527
+ /* If for some reason we received a bigger packet than
1528
+ * we expect, then we simply drop it
1529
+ */
1530
+ if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
1531
+ return XDP_DROP;
1532
+
1533
+ ret = ena_xdp_execute(rx_ring, xdp, rx_info);
1534
+
1535
+ /* The xdp program might expand the headers */
1536
+ if (ret == XDP_PASS) {
1537
+ rx_info->page_offset = xdp->data - xdp->data_hard_start;
1538
+ rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1539
+ }
1540
+
1541
+ return ret;
1542
+}
10151543 /* ena_clean_rx_irq - Cleanup RX irq
10161544 * @rx_ring: RX ring to clean
10171545 * @napi: napi handler
....@@ -1023,26 +1551,33 @@
10231551 u32 budget)
10241552 {
10251553 u16 next_to_clean = rx_ring->next_to_clean;
1026
- u32 res_budget, work_done;
1027
-
10281554 struct ena_com_rx_ctx ena_rx_ctx;
1555
+ struct ena_rx_buffer *rx_info;
10291556 struct ena_adapter *adapter;
1557
+ u32 res_budget, work_done;
1558
+ int rx_copybreak_pkt = 0;
1559
+ int refill_threshold;
10301560 struct sk_buff *skb;
10311561 int refill_required;
1032
- int refill_threshold;
1033
- int rc = 0;
1562
+ struct xdp_buff xdp;
10341563 int total_len = 0;
1035
- int rx_copybreak_pkt = 0;
1564
+ int xdp_verdict;
1565
+ int rc = 0;
10361566 int i;
10371567
10381568 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
10391569 "%s qid %d\n", __func__, rx_ring->qid);
10401570 res_budget = budget;
1571
+ xdp.rxq = &rx_ring->xdp_rxq;
1572
+ xdp.frame_sz = ENA_PAGE_SIZE;
10411573
10421574 do {
1575
+ xdp_verdict = XDP_PASS;
1576
+ skb = NULL;
10431577 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
10441578 ena_rx_ctx.max_bufs = rx_ring->sgl_size;
10451579 ena_rx_ctx.descs = 0;
1580
+ ena_rx_ctx.pkt_offset = 0;
10461581 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
10471582 rx_ring->ena_com_io_sq,
10481583 &ena_rx_ctx);
....@@ -1052,23 +1587,44 @@
10521587 if (unlikely(ena_rx_ctx.descs == 0))
10531588 break;
10541589
1590
+ /* First descriptor might have an offset set by the device */
1591
+ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1592
+ rx_info->page_offset += ena_rx_ctx.pkt_offset;
1593
+
10551594 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
10561595 "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
10571596 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
10581597 ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
10591598
1060
- /* allocate skb and fill it */
1061
- skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
1062
- &next_to_clean);
1599
+ if (ena_xdp_present_ring(rx_ring))
1600
+ xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
10631601
1064
- /* exit if we failed to retrieve a buffer */
1602
+ /* allocate skb and fill it */
1603
+ if (xdp_verdict == XDP_PASS)
1604
+ skb = ena_rx_skb(rx_ring,
1605
+ rx_ring->ena_bufs,
1606
+ ena_rx_ctx.descs,
1607
+ &next_to_clean);
1608
+
10651609 if (unlikely(!skb)) {
1610
+ /* The page might not actually be freed here since the
1611
+ * page reference count is incremented in
1612
+ * ena_xdp_xmit_buff(), and it will be decreased only
1613
+ * when send completion was received from the device
1614
+ */
1615
+ if (xdp_verdict == XDP_TX)
1616
+ ena_free_rx_page(rx_ring,
1617
+ &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]);
10661618 for (i = 0; i < ena_rx_ctx.descs; i++) {
1067
- rx_ring->free_tx_ids[next_to_clean] =
1619
+ rx_ring->free_ids[next_to_clean] =
10681620 rx_ring->ena_bufs[i].req_id;
10691621 next_to_clean =
10701622 ENA_RX_RING_IDX_NEXT(next_to_clean,
10711623 rx_ring->ring_size);
1624
+ }
1625
+ if (xdp_verdict != XDP_PASS) {
1626
+ res_budget--;
1627
+ continue;
10721628 }
10731629 break;
10741630 }
....@@ -1092,7 +1648,6 @@
10921648 } while (likely(res_budget));
10931649
10941650 work_done = budget - res_budget;
1095
- rx_ring->per_napi_bytes += total_len;
10961651 rx_ring->per_napi_packets += work_done;
10971652 u64_stats_update_begin(&rx_ring->syncp);
10981653 rx_ring->rx_stats.bytes += total_len;
....@@ -1102,8 +1657,10 @@
11021657
11031658 rx_ring->next_to_clean = next_to_clean;
11041659
1105
- refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
1106
- refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
1660
+ refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1661
+ refill_threshold =
1662
+ min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1663
+ ENA_RX_REFILL_THRESH_PACKET);
11071664
11081665 /* Optimization, try to batch new rx buffers */
11091666 if (refill_required > refill_threshold) {
....@@ -1116,57 +1673,88 @@
11161673 error:
11171674 adapter = netdev_priv(rx_ring->netdev);
11181675
1119
- u64_stats_update_begin(&rx_ring->syncp);
1120
- rx_ring->rx_stats.bad_desc_num++;
1121
- u64_stats_update_end(&rx_ring->syncp);
1676
+ if (rc == -ENOSPC) {
1677
+ u64_stats_update_begin(&rx_ring->syncp);
1678
+ rx_ring->rx_stats.bad_desc_num++;
1679
+ u64_stats_update_end(&rx_ring->syncp);
1680
+ adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1681
+ } else {
1682
+ u64_stats_update_begin(&rx_ring->syncp);
1683
+ rx_ring->rx_stats.bad_req_id++;
1684
+ u64_stats_update_end(&rx_ring->syncp);
1685
+ adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
1686
+ }
11221687
1123
- /* Too many desc from the device. Trigger reset */
1124
- adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
11251688 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
11261689
11271690 return 0;
11281691 }
11291692
1130
-inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
1131
- struct ena_ring *tx_ring)
1693
+static void ena_dim_work(struct work_struct *w)
11321694 {
1133
- /* We apply adaptive moderation on Rx path only.
1134
- * Tx uses static interrupt moderation.
1135
- */
1136
- ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
1137
- rx_ring->per_napi_packets,
1138
- rx_ring->per_napi_bytes,
1139
- &rx_ring->smoothed_interval,
1140
- &rx_ring->moder_tbl_idx);
1695
+ struct dim *dim = container_of(w, struct dim, work);
1696
+ struct dim_cq_moder cur_moder =
1697
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1698
+ struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
11411699
1142
- /* Reset per napi packets/bytes */
1143
- tx_ring->per_napi_packets = 0;
1144
- tx_ring->per_napi_bytes = 0;
1145
- rx_ring->per_napi_packets = 0;
1146
- rx_ring->per_napi_bytes = 0;
1700
+ ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1701
+ dim->state = DIM_START_MEASURE;
11471702 }
11481703
1149
-static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
1704
+static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1705
+{
1706
+ struct dim_sample dim_sample;
1707
+ struct ena_ring *rx_ring = ena_napi->rx_ring;
1708
+
1709
+ if (!rx_ring->per_napi_packets)
1710
+ return;
1711
+
1712
+ rx_ring->non_empty_napi_events++;
1713
+
1714
+ dim_update_sample(rx_ring->non_empty_napi_events,
1715
+ rx_ring->rx_stats.cnt,
1716
+ rx_ring->rx_stats.bytes,
1717
+ &dim_sample);
1718
+
1719
+ net_dim(&ena_napi->dim, dim_sample);
1720
+
1721
+ rx_ring->per_napi_packets = 0;
1722
+}
1723
+
1724
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
11501725 struct ena_ring *rx_ring)
11511726 {
11521727 struct ena_eth_io_intr_reg intr_reg;
1728
+ u32 rx_interval = 0;
1729
+ /* Rx ring can be NULL when for XDP tx queues which don't have an
1730
+ * accompanying rx_ring pair.
1731
+ */
1732
+ if (rx_ring)
1733
+ rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1734
+ rx_ring->smoothed_interval :
1735
+ ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
11531736
11541737 /* Update intr register: rx intr delay,
11551738 * tx intr delay and interrupt unmask
11561739 */
11571740 ena_com_update_intr_reg(&intr_reg,
1158
- rx_ring->smoothed_interval,
1741
+ rx_interval,
11591742 tx_ring->smoothed_interval,
11601743 true);
1744
+
1745
+ u64_stats_update_begin(&tx_ring->syncp);
1746
+ tx_ring->tx_stats.unmask_interrupt++;
1747
+ u64_stats_update_end(&tx_ring->syncp);
11611748
11621749 /* It is a shared MSI-X.
11631750 * Tx and Rx CQ have pointer to it.
11641751 * So we use one of them to reach the intr reg
1752
+ * The Tx ring is used because the rx_ring is NULL for XDP queues
11651753 */
1166
- ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
1754
+ ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
11671755 }
11681756
1169
-static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1757
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
11701758 struct ena_ring *rx_ring)
11711759 {
11721760 int cpu = get_cpu();
....@@ -1181,22 +1769,87 @@
11811769
11821770 if (numa_node != NUMA_NO_NODE) {
11831771 ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1184
- ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
1772
+ if (rx_ring)
1773
+ ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1774
+ numa_node);
11851775 }
11861776
11871777 tx_ring->cpu = cpu;
1188
- rx_ring->cpu = cpu;
1778
+ if (rx_ring)
1779
+ rx_ring->cpu = cpu;
11891780
11901781 return;
11911782 out:
11921783 put_cpu();
11931784 }
11941785
1786
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
1787
+{
1788
+ u32 total_done = 0;
1789
+ u16 next_to_clean;
1790
+ u32 tx_bytes = 0;
1791
+ int tx_pkts = 0;
1792
+ u16 req_id;
1793
+ int rc;
1794
+
1795
+ if (unlikely(!xdp_ring))
1796
+ return 0;
1797
+ next_to_clean = xdp_ring->next_to_clean;
1798
+
1799
+ while (tx_pkts < budget) {
1800
+ struct ena_tx_buffer *tx_info;
1801
+ struct xdp_frame *xdpf;
1802
+
1803
+ rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
1804
+ &req_id);
1805
+ if (rc) {
1806
+ if (unlikely(rc == -EINVAL))
1807
+ handle_invalid_req_id(xdp_ring, req_id, NULL,
1808
+ true);
1809
+ break;
1810
+ }
1811
+
1812
+ /* validate that the request id points to a valid xdp_frame */
1813
+ rc = validate_xdp_req_id(xdp_ring, req_id);
1814
+ if (rc)
1815
+ break;
1816
+
1817
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
1818
+ xdpf = tx_info->xdpf;
1819
+
1820
+ tx_info->xdpf = NULL;
1821
+ tx_info->last_jiffies = 0;
1822
+ ena_unmap_tx_buff(xdp_ring, tx_info);
1823
+
1824
+ netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1825
+ "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
1826
+ xdpf);
1827
+
1828
+ tx_bytes += xdpf->len;
1829
+ tx_pkts++;
1830
+ total_done += tx_info->tx_descs;
1831
+
1832
+ __free_page(tx_info->xdp_rx_page);
1833
+ xdp_ring->free_ids[next_to_clean] = req_id;
1834
+ next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1835
+ xdp_ring->ring_size);
1836
+ }
1837
+
1838
+ xdp_ring->next_to_clean = next_to_clean;
1839
+ ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
1840
+ ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
1841
+
1842
+ netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1843
+ "tx_poll: q %d done. total pkts: %d\n",
1844
+ xdp_ring->qid, tx_pkts);
1845
+
1846
+ return tx_pkts;
1847
+}
1848
+
11951849 static int ena_io_poll(struct napi_struct *napi, int budget)
11961850 {
11971851 struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
11981852 struct ena_ring *tx_ring, *rx_ring;
1199
-
12001853 int tx_work_done;
12011854 int rx_work_done = 0;
12021855 int tx_budget;
....@@ -1205,6 +1858,9 @@
12051858
12061859 tx_ring = ena_napi->tx_ring;
12071860 rx_ring = ena_napi->rx_ring;
1861
+
1862
+ tx_ring->first_interrupt = ena_napi->first_interrupt;
1863
+ rx_ring->first_interrupt = ena_napi->first_interrupt;
12081864
12091865 tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
12101866
....@@ -1235,10 +1891,15 @@
12351891 /* Update numa and unmask the interrupt only when schedule
12361892 * from the interrupt context (vs from sk_busy_loop)
12371893 */
1238
- if (napi_complete_done(napi, rx_work_done)) {
1239
- /* Tx and Rx share the same interrupt vector */
1894
+ if (napi_complete_done(napi, rx_work_done) &&
1895
+ READ_ONCE(ena_napi->interrupts_masked)) {
1896
+ smp_rmb(); /* make sure interrupts_masked is read */
1897
+ WRITE_ONCE(ena_napi->interrupts_masked, false);
1898
+ /* We apply adaptive moderation on Rx path only.
1899
+ * Tx uses static interrupt moderation.
1900
+ */
12401901 if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1241
- ena_adjust_intr_moderation(rx_ring, tx_ring);
1902
+ ena_adjust_adaptive_rx_intr_moderation(ena_napi);
12421903
12431904 ena_unmask_interrupt(tx_ring, rx_ring);
12441905 }
....@@ -1279,8 +1940,10 @@
12791940 {
12801941 struct ena_napi *ena_napi = data;
12811942
1282
- ena_napi->tx_ring->first_interrupt = true;
1283
- ena_napi->rx_ring->first_interrupt = true;
1943
+ ena_napi->first_interrupt = true;
1944
+
1945
+ WRITE_ONCE(ena_napi->interrupts_masked, true);
1946
+ smp_wmb(); /* write interrupts_masked before calling napi */
12841947
12851948 napi_schedule_irqoff(&ena_napi->napi);
12861949
....@@ -1292,7 +1955,7 @@
12921955 * the number of potential io queues is the minimum of what the device
12931956 * supports and the number of vCPUs.
12941957 */
1295
-static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
1958
+static int ena_enable_msix(struct ena_adapter *adapter)
12961959 {
12971960 int msix_vecs, irq_cnt;
12981961
....@@ -1303,10 +1966,9 @@
13031966 }
13041967
13051968 /* Reserved the max msix vectors we might need */
1306
- msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
1307
-
1969
+ msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
13081970 netif_dbg(adapter, probe, adapter->netdev,
1309
- "trying to enable MSI-X, vectors %d\n", msix_vecs);
1971
+ "Trying to enable MSI-X, vectors %d\n", msix_vecs);
13101972
13111973 irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
13121974 msix_vecs, PCI_IRQ_MSIX);
....@@ -1319,9 +1981,9 @@
13191981
13201982 if (irq_cnt != msix_vecs) {
13211983 netif_notice(adapter, probe, adapter->netdev,
1322
- "enable only %d MSI-X (out of %d), reduce the number of queues\n",
1984
+ "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
13231985 irq_cnt, msix_vecs);
1324
- adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1986
+ adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
13251987 }
13261988
13271989 if (ena_init_rx_cpu_rmap(adapter))
....@@ -1356,10 +2018,12 @@
13562018 {
13572019 struct net_device *netdev;
13582020 int irq_idx, i, cpu;
2021
+ int io_queue_count;
13592022
13602023 netdev = adapter->netdev;
2024
+ io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
13612025
1362
- for (i = 0; i < adapter->num_queues; i++) {
2026
+ for (i = 0; i < io_queue_count; i++) {
13632027 irq_idx = ENA_IO_IRQ_IDX(i);
13642028 cpu = i % num_online_cpus();
13652029
....@@ -1387,12 +2051,12 @@
13872051 irq->data);
13882052 if (rc) {
13892053 netif_err(adapter, probe, adapter->netdev,
1390
- "failed to request admin irq\n");
2054
+ "Failed to request admin irq\n");
13912055 return rc;
13922056 }
13932057
13942058 netif_dbg(adapter, probe, adapter->netdev,
1395
- "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
2059
+ "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
13962060 irq->affinity_hint_mask.bits[0], irq->vector);
13972061
13982062 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
....@@ -1402,6 +2066,7 @@
14022066
14032067 static int ena_request_io_irq(struct ena_adapter *adapter)
14042068 {
2069
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
14052070 unsigned long flags = 0;
14062071 struct ena_irq *irq;
14072072 int rc = 0, i, k;
....@@ -1412,7 +2077,7 @@
14122077 return -EINVAL;
14132078 }
14142079
1415
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
2080
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
14162081 irq = &adapter->irq_tbl[i];
14172082 rc = request_irq(irq->vector, irq->handler, flags, irq->name,
14182083 irq->data);
....@@ -1424,7 +2089,7 @@
14242089 }
14252090
14262091 netif_dbg(adapter, ifup, adapter->netdev,
1427
- "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
2092
+ "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
14282093 i, irq->affinity_hint_mask.bits[0], irq->vector);
14292094
14302095 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
....@@ -1453,6 +2118,7 @@
14532118
14542119 static void ena_free_io_irq(struct ena_adapter *adapter)
14552120 {
2121
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
14562122 struct ena_irq *irq;
14572123 int i;
14582124
....@@ -1463,7 +2129,7 @@
14632129 }
14642130 #endif /* CONFIG_RFS_ACCEL */
14652131
1466
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
2132
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
14672133 irq = &adapter->irq_tbl[i];
14682134 irq_set_affinity_hint(irq->vector, NULL);
14692135 free_irq(irq->vector, irq->data);
....@@ -1478,63 +2144,71 @@
14782144
14792145 static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
14802146 {
2147
+ u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
14812148 int i;
14822149
14832150 if (!netif_running(adapter->netdev))
14842151 return;
14852152
1486
- for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
2153
+ for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
14872154 synchronize_irq(adapter->irq_tbl[i].vector);
14882155 }
14892156
1490
-static void ena_del_napi(struct ena_adapter *adapter)
2157
+static void ena_del_napi_in_range(struct ena_adapter *adapter,
2158
+ int first_index,
2159
+ int count)
14912160 {
14922161 int i;
14932162
1494
- for (i = 0; i < adapter->num_queues; i++)
2163
+ for (i = first_index; i < first_index + count; i++) {
14952164 netif_napi_del(&adapter->ena_napi[i].napi);
2165
+
2166
+ WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) &&
2167
+ adapter->ena_napi[i].xdp_ring);
2168
+ }
14962169 }
14972170
1498
-static void ena_init_napi(struct ena_adapter *adapter)
2171
+static void ena_init_napi_in_range(struct ena_adapter *adapter,
2172
+ int first_index, int count)
14992173 {
1500
- struct ena_napi *napi;
15012174 int i;
15022175
1503
- for (i = 0; i < adapter->num_queues; i++) {
1504
- napi = &adapter->ena_napi[i];
2176
+ for (i = first_index; i < first_index + count; i++) {
2177
+ struct ena_napi *napi = &adapter->ena_napi[i];
15052178
15062179 netif_napi_add(adapter->netdev,
1507
- &adapter->ena_napi[i].napi,
1508
- ena_io_poll,
2180
+ &napi->napi,
2181
+ ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll,
15092182 ENA_NAPI_BUDGET);
1510
- napi->rx_ring = &adapter->rx_ring[i];
1511
- napi->tx_ring = &adapter->tx_ring[i];
2183
+
2184
+ if (!ENA_IS_XDP_INDEX(adapter, i)) {
2185
+ napi->rx_ring = &adapter->rx_ring[i];
2186
+ napi->tx_ring = &adapter->tx_ring[i];
2187
+ } else {
2188
+ napi->xdp_ring = &adapter->tx_ring[i];
2189
+ }
15122190 napi->qid = i;
15132191 }
15142192 }
15152193
1516
-static void ena_napi_disable_all(struct ena_adapter *adapter)
2194
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
2195
+ int first_index,
2196
+ int count)
15172197 {
15182198 int i;
15192199
1520
- for (i = 0; i < adapter->num_queues; i++)
2200
+ for (i = first_index; i < first_index + count; i++)
15212201 napi_disable(&adapter->ena_napi[i].napi);
15222202 }
15232203
1524
-static void ena_napi_enable_all(struct ena_adapter *adapter)
2204
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
2205
+ int first_index,
2206
+ int count)
15252207 {
15262208 int i;
15272209
1528
- for (i = 0; i < adapter->num_queues; i++)
2210
+ for (i = first_index; i < first_index + count; i++)
15292211 napi_enable(&adapter->ena_napi[i].napi);
1530
-}
1531
-
1532
-static void ena_restore_ethtool_params(struct ena_adapter *adapter)
1533
-{
1534
- adapter->tx_usecs = 0;
1535
- adapter->rx_usecs = 0;
1536
- adapter->tx_frames = 1;
1537
- adapter->rx_frames = 1;
15382212 }
15392213
15402214 /* Configure the Rx forwarding */
....@@ -1586,16 +2260,16 @@
15862260 /* enable transmits */
15872261 netif_tx_start_all_queues(adapter->netdev);
15882262
1589
- ena_restore_ethtool_params(adapter);
1590
-
1591
- ena_napi_enable_all(adapter);
2263
+ ena_napi_enable_in_range(adapter,
2264
+ 0,
2265
+ adapter->xdp_num_queues + adapter->num_io_queues);
15922266
15932267 return 0;
15942268 }
15952269
15962270 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
15972271 {
1598
- struct ena_com_create_io_ctx ctx = { 0 };
2272
+ struct ena_com_create_io_ctx ctx;
15992273 struct ena_com_dev *ena_dev;
16002274 struct ena_ring *tx_ring;
16012275 u32 msix_vector;
....@@ -1608,11 +2282,13 @@
16082282 msix_vector = ENA_IO_IRQ_IDX(qid);
16092283 ena_qid = ENA_IO_TXQ_IDX(qid);
16102284
2285
+ memset(&ctx, 0x0, sizeof(ctx));
2286
+
16112287 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
16122288 ctx.qid = ena_qid;
16132289 ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
16142290 ctx.msix_vector = msix_vector;
1615
- ctx.queue_size = adapter->tx_ring_size;
2291
+ ctx.queue_size = tx_ring->ring_size;
16162292 ctx.numa_node = cpu_to_node(tx_ring->cpu);
16172293
16182294 rc = ena_com_create_io_queue(ena_dev, &ctx);
....@@ -1638,12 +2314,13 @@
16382314 return rc;
16392315 }
16402316
1641
-static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
2317
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
2318
+ int first_index, int count)
16422319 {
16432320 struct ena_com_dev *ena_dev = adapter->ena_dev;
16442321 int rc, i;
16452322
1646
- for (i = 0; i < adapter->num_queues; i++) {
2323
+ for (i = first_index; i < first_index + count; i++) {
16472324 rc = ena_create_io_tx_queue(adapter, i);
16482325 if (rc)
16492326 goto create_err;
....@@ -1652,7 +2329,7 @@
16522329 return 0;
16532330
16542331 create_err:
1655
- while (i--)
2332
+ while (i-- > first_index)
16562333 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
16572334
16582335 return rc;
....@@ -1661,7 +2338,7 @@
16612338 static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
16622339 {
16632340 struct ena_com_dev *ena_dev;
1664
- struct ena_com_create_io_ctx ctx = { 0 };
2341
+ struct ena_com_create_io_ctx ctx;
16652342 struct ena_ring *rx_ring;
16662343 u32 msix_vector;
16672344 u16 ena_qid;
....@@ -1673,11 +2350,13 @@
16732350 msix_vector = ENA_IO_IRQ_IDX(qid);
16742351 ena_qid = ENA_IO_RXQ_IDX(qid);
16752352
2353
+ memset(&ctx, 0x0, sizeof(ctx));
2354
+
16762355 ctx.qid = ena_qid;
16772356 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
16782357 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
16792358 ctx.msix_vector = msix_vector;
1680
- ctx.queue_size = adapter->rx_ring_size;
2359
+ ctx.queue_size = rx_ring->ring_size;
16812360 ctx.numa_node = cpu_to_node(rx_ring->cpu);
16822361
16832362 rc = ena_com_create_io_queue(ena_dev, &ctx);
....@@ -1695,12 +2374,14 @@
16952374 netif_err(adapter, ifup, adapter->netdev,
16962375 "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
16972376 qid, rc);
1698
- ena_com_destroy_io_queue(ena_dev, ena_qid);
1699
- return rc;
2377
+ goto err;
17002378 }
17012379
17022380 ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
17032381
2382
+ return rc;
2383
+err:
2384
+ ena_com_destroy_io_queue(ena_dev, ena_qid);
17042385 return rc;
17052386 }
17062387
....@@ -1709,27 +2390,148 @@
17092390 struct ena_com_dev *ena_dev = adapter->ena_dev;
17102391 int rc, i;
17112392
1712
- for (i = 0; i < adapter->num_queues; i++) {
2393
+ for (i = 0; i < adapter->num_io_queues; i++) {
17132394 rc = ena_create_io_rx_queue(adapter, i);
17142395 if (rc)
17152396 goto create_err;
2397
+ INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
17162398 }
17172399
17182400 return 0;
17192401
17202402 create_err:
1721
- while (i--)
2403
+ while (i--) {
2404
+ cancel_work_sync(&adapter->ena_napi[i].dim.work);
17222405 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2406
+ }
17232407
17242408 return rc;
17252409 }
17262410
2411
+static void set_io_rings_size(struct ena_adapter *adapter,
2412
+ int new_tx_size,
2413
+ int new_rx_size)
2414
+{
2415
+ int i;
2416
+
2417
+ for (i = 0; i < adapter->num_io_queues; i++) {
2418
+ adapter->tx_ring[i].ring_size = new_tx_size;
2419
+ adapter->rx_ring[i].ring_size = new_rx_size;
2420
+ }
2421
+}
2422
+
2423
+/* This function allows queue allocation to backoff when the system is
2424
+ * low on memory. If there is not enough memory to allocate io queues
2425
+ * the driver will try to allocate smaller queues.
2426
+ *
2427
+ * The backoff algorithm is as follows:
2428
+ * 1. Try to allocate TX and RX and if successful.
2429
+ * 1.1. return success
2430
+ *
2431
+ * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2432
+ *
2433
+ * 3. If TX or RX is smaller than 256
2434
+ * 3.1. return failure.
2435
+ * 4. else
2436
+ * 4.1. go back to 1.
2437
+ */
2438
+static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2439
+{
2440
+ int rc, cur_rx_ring_size, cur_tx_ring_size;
2441
+ int new_rx_ring_size, new_tx_ring_size;
2442
+
2443
+ /* current queue sizes might be set to smaller than the requested
2444
+ * ones due to past queue allocation failures.
2445
+ */
2446
+ set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2447
+ adapter->requested_rx_ring_size);
2448
+
2449
+ while (1) {
2450
+ if (ena_xdp_present(adapter)) {
2451
+ rc = ena_setup_and_create_all_xdp_queues(adapter);
2452
+
2453
+ if (rc)
2454
+ goto err_setup_tx;
2455
+ }
2456
+ rc = ena_setup_tx_resources_in_range(adapter,
2457
+ 0,
2458
+ adapter->num_io_queues);
2459
+ if (rc)
2460
+ goto err_setup_tx;
2461
+
2462
+ rc = ena_create_io_tx_queues_in_range(adapter,
2463
+ 0,
2464
+ adapter->num_io_queues);
2465
+ if (rc)
2466
+ goto err_create_tx_queues;
2467
+
2468
+ rc = ena_setup_all_rx_resources(adapter);
2469
+ if (rc)
2470
+ goto err_setup_rx;
2471
+
2472
+ rc = ena_create_all_io_rx_queues(adapter);
2473
+ if (rc)
2474
+ goto err_create_rx_queues;
2475
+
2476
+ return 0;
2477
+
2478
+err_create_rx_queues:
2479
+ ena_free_all_io_rx_resources(adapter);
2480
+err_setup_rx:
2481
+ ena_destroy_all_tx_queues(adapter);
2482
+err_create_tx_queues:
2483
+ ena_free_all_io_tx_resources(adapter);
2484
+err_setup_tx:
2485
+ if (rc != -ENOMEM) {
2486
+ netif_err(adapter, ifup, adapter->netdev,
2487
+ "Queue creation failed with error code %d\n",
2488
+ rc);
2489
+ return rc;
2490
+ }
2491
+
2492
+ cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2493
+ cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2494
+
2495
+ netif_err(adapter, ifup, adapter->netdev,
2496
+ "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2497
+ cur_tx_ring_size, cur_rx_ring_size);
2498
+
2499
+ new_tx_ring_size = cur_tx_ring_size;
2500
+ new_rx_ring_size = cur_rx_ring_size;
2501
+
2502
+ /* Decrease the size of the larger queue, or
2503
+ * decrease both if they are the same size.
2504
+ */
2505
+ if (cur_rx_ring_size <= cur_tx_ring_size)
2506
+ new_tx_ring_size = cur_tx_ring_size / 2;
2507
+ if (cur_rx_ring_size >= cur_tx_ring_size)
2508
+ new_rx_ring_size = cur_rx_ring_size / 2;
2509
+
2510
+ if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2511
+ new_rx_ring_size < ENA_MIN_RING_SIZE) {
2512
+ netif_err(adapter, ifup, adapter->netdev,
2513
+ "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2514
+ ENA_MIN_RING_SIZE);
2515
+ return rc;
2516
+ }
2517
+
2518
+ netif_err(adapter, ifup, adapter->netdev,
2519
+ "Retrying queue creation with sizes TX=%d, RX=%d\n",
2520
+ new_tx_ring_size,
2521
+ new_rx_ring_size);
2522
+
2523
+ set_io_rings_size(adapter, new_tx_ring_size,
2524
+ new_rx_ring_size);
2525
+ }
2526
+}
2527
+
17272528 static int ena_up(struct ena_adapter *adapter)
17282529 {
1729
- int rc, i;
2530
+ int io_queue_count, rc, i;
17302531
1731
- netdev_dbg(adapter->netdev, "%s\n", __func__);
2532
+ netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
17322533
2534
+ io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
17332535 ena_setup_io_intr(adapter);
17342536
17352537 /* napi poll functions should be initialized before running
....@@ -1737,31 +2539,15 @@
17372539 * interrupt, causing the ISR to fire immediately while the poll
17382540 * function wasn't set yet, causing a null dereference
17392541 */
1740
- ena_init_napi(adapter);
2542
+ ena_init_napi_in_range(adapter, 0, io_queue_count);
17412543
17422544 rc = ena_request_io_irq(adapter);
17432545 if (rc)
17442546 goto err_req_irq;
17452547
1746
- /* allocate transmit descriptors */
1747
- rc = ena_setup_all_tx_resources(adapter);
2548
+ rc = create_queues_with_size_backoff(adapter);
17482549 if (rc)
1749
- goto err_setup_tx;
1750
-
1751
- /* allocate receive descriptors */
1752
- rc = ena_setup_all_rx_resources(adapter);
1753
- if (rc)
1754
- goto err_setup_rx;
1755
-
1756
- /* Create TX queues */
1757
- rc = ena_create_all_io_tx_queues(adapter);
1758
- if (rc)
1759
- goto err_create_tx_queues;
1760
-
1761
- /* Create RX queues */
1762
- rc = ena_create_all_io_rx_queues(adapter);
1763
- if (rc)
1764
- goto err_create_rx_queues;
2550
+ goto err_create_queues_with_backoff;
17652551
17662552 rc = ena_up_complete(adapter);
17672553 if (rc)
....@@ -1777,36 +2563,35 @@
17772563 set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
17782564
17792565 /* Enable completion queues interrupt */
1780
- for (i = 0; i < adapter->num_queues; i++)
2566
+ for (i = 0; i < adapter->num_io_queues; i++)
17812567 ena_unmask_interrupt(&adapter->tx_ring[i],
17822568 &adapter->rx_ring[i]);
17832569
17842570 /* schedule napi in case we had pending packets
17852571 * from the last time we disable napi
17862572 */
1787
- for (i = 0; i < adapter->num_queues; i++)
2573
+ for (i = 0; i < io_queue_count; i++)
17882574 napi_schedule(&adapter->ena_napi[i].napi);
17892575
17902576 return rc;
17912577
17922578 err_up:
1793
- ena_destroy_all_rx_queues(adapter);
1794
-err_create_rx_queues:
17952579 ena_destroy_all_tx_queues(adapter);
1796
-err_create_tx_queues:
1797
- ena_free_all_io_rx_resources(adapter);
1798
-err_setup_rx:
17992580 ena_free_all_io_tx_resources(adapter);
1800
-err_setup_tx:
2581
+ ena_destroy_all_rx_queues(adapter);
2582
+ ena_free_all_io_rx_resources(adapter);
2583
+err_create_queues_with_backoff:
18012584 ena_free_io_irq(adapter);
18022585 err_req_irq:
1803
- ena_del_napi(adapter);
2586
+ ena_del_napi_in_range(adapter, 0, io_queue_count);
18042587
18052588 return rc;
18062589 }
18072590
18082591 static void ena_down(struct ena_adapter *adapter)
18092592 {
2593
+ int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2594
+
18102595 netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
18112596
18122597 clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
....@@ -1819,7 +2604,7 @@
18192604 netif_tx_disable(adapter->netdev);
18202605
18212606 /* After this point the napi handler won't enable the tx queue */
1822
- ena_napi_disable_all(adapter);
2607
+ ena_napi_disable_in_range(adapter, 0, io_queue_count);
18232608
18242609 /* After destroy the queue there won't be any new interrupts */
18252610
....@@ -1828,14 +2613,17 @@
18282613
18292614 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
18302615 if (rc)
1831
- dev_err(&adapter->pdev->dev, "Device reset failed\n");
2616
+ netif_err(adapter, ifdown, adapter->netdev,
2617
+ "Device reset failed\n");
2618
+ /* stop submitting admin commands on a device that was reset */
2619
+ ena_com_set_admin_running_state(adapter->ena_dev, false);
18322620 }
18332621
18342622 ena_destroy_all_io_queues(adapter);
18352623
18362624 ena_disable_io_intr_sync(adapter);
18372625 ena_free_io_irq(adapter);
1838
- ena_del_napi(adapter);
2626
+ ena_del_napi_in_range(adapter, 0, io_queue_count);
18392627
18402628 ena_free_all_tx_bufs(adapter);
18412629 ena_free_all_rx_bufs(adapter);
....@@ -1860,13 +2648,13 @@
18602648 int rc;
18612649
18622650 /* Notify the stack of the actual queue counts. */
1863
- rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
2651
+ rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
18642652 if (rc) {
18652653 netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
18662654 return rc;
18672655 }
18682656
1869
- rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
2657
+ rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
18702658 if (rc) {
18712659 netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
18722660 return rc;
....@@ -1895,6 +2683,9 @@
18952683
18962684 netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
18972685
2686
+ if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2687
+ return 0;
2688
+
18982689 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
18992690 ena_down(adapter);
19002691
....@@ -1912,7 +2703,63 @@
19122703 return 0;
19132704 }
19142705
1915
-static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
2706
+int ena_update_queue_sizes(struct ena_adapter *adapter,
2707
+ u32 new_tx_size,
2708
+ u32 new_rx_size)
2709
+{
2710
+ bool dev_was_up;
2711
+
2712
+ dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2713
+ ena_close(adapter->netdev);
2714
+ adapter->requested_tx_ring_size = new_tx_size;
2715
+ adapter->requested_rx_ring_size = new_rx_size;
2716
+ ena_init_io_rings(adapter,
2717
+ 0,
2718
+ adapter->xdp_num_queues +
2719
+ adapter->num_io_queues);
2720
+ return dev_was_up ? ena_up(adapter) : 0;
2721
+}
2722
+
2723
+int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2724
+{
2725
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
2726
+ int prev_channel_count;
2727
+ bool dev_was_up;
2728
+
2729
+ dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2730
+ ena_close(adapter->netdev);
2731
+ prev_channel_count = adapter->num_io_queues;
2732
+ adapter->num_io_queues = new_channel_count;
2733
+ if (ena_xdp_present(adapter) &&
2734
+ ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2735
+ adapter->xdp_first_ring = new_channel_count;
2736
+ adapter->xdp_num_queues = new_channel_count;
2737
+ if (prev_channel_count > new_channel_count)
2738
+ ena_xdp_exchange_program_rx_in_range(adapter,
2739
+ NULL,
2740
+ new_channel_count,
2741
+ prev_channel_count);
2742
+ else
2743
+ ena_xdp_exchange_program_rx_in_range(adapter,
2744
+ adapter->xdp_bpf_prog,
2745
+ prev_channel_count,
2746
+ new_channel_count);
2747
+ }
2748
+
2749
+ /* We need to destroy the rss table so that the indirection
2750
+ * table will be reinitialized by ena_up()
2751
+ */
2752
+ ena_com_rss_destroy(ena_dev);
2753
+ ena_init_io_rings(adapter,
2754
+ 0,
2755
+ adapter->xdp_num_queues +
2756
+ adapter->num_io_queues);
2757
+ return dev_was_up ? ena_open(adapter->netdev) : 0;
2758
+}
2759
+
2760
+static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
2761
+ struct sk_buff *skb,
2762
+ bool disable_meta_caching)
19162763 {
19172764 u32 mss = skb_shinfo(skb)->gso_size;
19182765 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
....@@ -1956,7 +2803,9 @@
19562803 ena_meta->l3_hdr_len = skb_network_header_len(skb);
19572804 ena_meta->l3_hdr_offset = skb_network_offset(skb);
19582805 ena_tx_ctx->meta_valid = 1;
1959
-
2806
+ } else if (disable_meta_caching) {
2807
+ memset(ena_meta, 0, sizeof(*ena_meta));
2808
+ ena_tx_ctx->meta_valid = 1;
19602809 } else {
19612810 ena_tx_ctx->meta_valid = 0;
19622811 }
....@@ -1991,6 +2840,112 @@
19912840 return rc;
19922841 }
19932842
2843
+static int ena_tx_map_skb(struct ena_ring *tx_ring,
2844
+ struct ena_tx_buffer *tx_info,
2845
+ struct sk_buff *skb,
2846
+ void **push_hdr,
2847
+ u16 *header_len)
2848
+{
2849
+ struct ena_adapter *adapter = tx_ring->adapter;
2850
+ struct ena_com_buf *ena_buf;
2851
+ dma_addr_t dma;
2852
+ u32 skb_head_len, frag_len, last_frag;
2853
+ u16 push_len = 0;
2854
+ u16 delta = 0;
2855
+ int i = 0;
2856
+
2857
+ skb_head_len = skb_headlen(skb);
2858
+ tx_info->skb = skb;
2859
+ ena_buf = tx_info->bufs;
2860
+
2861
+ if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2862
+ /* When the device is LLQ mode, the driver will copy
2863
+ * the header into the device memory space.
2864
+ * the ena_com layer assume the header is in a linear
2865
+ * memory space.
2866
+ * This assumption might be wrong since part of the header
2867
+ * can be in the fragmented buffers.
2868
+ * Use skb_header_pointer to make sure the header is in a
2869
+ * linear memory space.
2870
+ */
2871
+
2872
+ push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2873
+ *push_hdr = skb_header_pointer(skb, 0, push_len,
2874
+ tx_ring->push_buf_intermediate_buf);
2875
+ *header_len = push_len;
2876
+ if (unlikely(skb->data != *push_hdr)) {
2877
+ u64_stats_update_begin(&tx_ring->syncp);
2878
+ tx_ring->tx_stats.llq_buffer_copy++;
2879
+ u64_stats_update_end(&tx_ring->syncp);
2880
+
2881
+ delta = push_len - skb_head_len;
2882
+ }
2883
+ } else {
2884
+ *push_hdr = NULL;
2885
+ *header_len = min_t(u32, skb_head_len,
2886
+ tx_ring->tx_max_header_size);
2887
+ }
2888
+
2889
+ netif_dbg(adapter, tx_queued, adapter->netdev,
2890
+ "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2891
+ *push_hdr, push_len);
2892
+
2893
+ if (skb_head_len > push_len) {
2894
+ dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2895
+ skb_head_len - push_len, DMA_TO_DEVICE);
2896
+ if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2897
+ goto error_report_dma_error;
2898
+
2899
+ ena_buf->paddr = dma;
2900
+ ena_buf->len = skb_head_len - push_len;
2901
+
2902
+ ena_buf++;
2903
+ tx_info->num_of_bufs++;
2904
+ tx_info->map_linear_data = 1;
2905
+ } else {
2906
+ tx_info->map_linear_data = 0;
2907
+ }
2908
+
2909
+ last_frag = skb_shinfo(skb)->nr_frags;
2910
+
2911
+ for (i = 0; i < last_frag; i++) {
2912
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2913
+
2914
+ frag_len = skb_frag_size(frag);
2915
+
2916
+ if (unlikely(delta >= frag_len)) {
2917
+ delta -= frag_len;
2918
+ continue;
2919
+ }
2920
+
2921
+ dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2922
+ frag_len - delta, DMA_TO_DEVICE);
2923
+ if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2924
+ goto error_report_dma_error;
2925
+
2926
+ ena_buf->paddr = dma;
2927
+ ena_buf->len = frag_len - delta;
2928
+ ena_buf++;
2929
+ tx_info->num_of_bufs++;
2930
+ delta = 0;
2931
+ }
2932
+
2933
+ return 0;
2934
+
2935
+error_report_dma_error:
2936
+ u64_stats_update_begin(&tx_ring->syncp);
2937
+ tx_ring->tx_stats.dma_mapping_err++;
2938
+ u64_stats_update_end(&tx_ring->syncp);
2939
+ netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
2940
+
2941
+ tx_info->skb = NULL;
2942
+
2943
+ tx_info->num_of_bufs += i;
2944
+ ena_unmap_tx_buff(tx_ring, tx_info);
2945
+
2946
+ return -EINVAL;
2947
+}
2948
+
19942949 /* Called with netif_tx_lock. */
19952950 static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
19962951 {
....@@ -1999,16 +2954,9 @@
19992954 struct ena_com_tx_ctx ena_tx_ctx;
20002955 struct ena_ring *tx_ring;
20012956 struct netdev_queue *txq;
2002
- struct ena_com_buf *ena_buf;
20032957 void *push_hdr;
2004
- u32 len, last_frag;
2005
- u16 next_to_use;
2006
- u16 req_id;
2007
- u16 push_len;
2008
- u16 header_len;
2009
- dma_addr_t dma;
2010
- int qid, rc, nb_hw_desc;
2011
- int i = -1;
2958
+ u16 next_to_use, req_id, header_len;
2959
+ int qid, rc;
20122960
20132961 netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
20142962 /* Determine which tx ring we will be placed on */
....@@ -2021,62 +2969,17 @@
20212969 goto error_drop_packet;
20222970
20232971 skb_tx_timestamp(skb);
2024
- len = skb_headlen(skb);
20252972
20262973 next_to_use = tx_ring->next_to_use;
2027
- req_id = tx_ring->free_tx_ids[next_to_use];
2974
+ req_id = tx_ring->free_ids[next_to_use];
20282975 tx_info = &tx_ring->tx_buffer_info[req_id];
20292976 tx_info->num_of_bufs = 0;
20302977
20312978 WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2032
- ena_buf = tx_info->bufs;
2033
- tx_info->skb = skb;
20342979
2035
- if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2036
- /* prepared the push buffer */
2037
- push_len = min_t(u32, len, tx_ring->tx_max_header_size);
2038
- header_len = push_len;
2039
- push_hdr = skb->data;
2040
- } else {
2041
- push_len = 0;
2042
- header_len = min_t(u32, len, tx_ring->tx_max_header_size);
2043
- push_hdr = NULL;
2044
- }
2045
-
2046
- netif_dbg(adapter, tx_queued, dev,
2047
- "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2048
- push_hdr, push_len);
2049
-
2050
- if (len > push_len) {
2051
- dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2052
- len - push_len, DMA_TO_DEVICE);
2053
- if (dma_mapping_error(tx_ring->dev, dma))
2054
- goto error_report_dma_error;
2055
-
2056
- ena_buf->paddr = dma;
2057
- ena_buf->len = len - push_len;
2058
-
2059
- ena_buf++;
2060
- tx_info->num_of_bufs++;
2061
- }
2062
-
2063
- last_frag = skb_shinfo(skb)->nr_frags;
2064
-
2065
- for (i = 0; i < last_frag; i++) {
2066
- const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2067
-
2068
- len = skb_frag_size(frag);
2069
- dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
2070
- DMA_TO_DEVICE);
2071
- if (dma_mapping_error(tx_ring->dev, dma))
2072
- goto error_report_dma_error;
2073
-
2074
- ena_buf->paddr = dma;
2075
- ena_buf->len = len;
2076
- ena_buf++;
2077
- }
2078
-
2079
- tx_info->num_of_bufs += last_frag;
2980
+ rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
2981
+ if (unlikely(rc))
2982
+ goto error_drop_packet;
20802983
20812984 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
20822985 ena_tx_ctx.ena_bufs = tx_info->bufs;
....@@ -2086,43 +2989,25 @@
20862989 ena_tx_ctx.header_len = header_len;
20872990
20882991 /* set flags and meta data */
2089
- ena_tx_csum(&ena_tx_ctx, skb);
2992
+ ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
20902993
2091
- /* prepare the packet's descriptors to dma engine */
2092
- rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
2093
- &nb_hw_desc);
2094
-
2095
- if (unlikely(rc)) {
2096
- netif_err(adapter, tx_queued, dev,
2097
- "failed to prepare tx bufs\n");
2098
- u64_stats_update_begin(&tx_ring->syncp);
2099
- tx_ring->tx_stats.queue_stop++;
2100
- tx_ring->tx_stats.prepare_ctx_err++;
2101
- u64_stats_update_end(&tx_ring->syncp);
2102
- netif_tx_stop_queue(txq);
2994
+ rc = ena_xmit_common(dev,
2995
+ tx_ring,
2996
+ tx_info,
2997
+ &ena_tx_ctx,
2998
+ next_to_use,
2999
+ skb->len);
3000
+ if (rc)
21033001 goto error_unmap_dma;
2104
- }
21053002
21063003 netdev_tx_sent_queue(txq, skb->len);
2107
-
2108
- u64_stats_update_begin(&tx_ring->syncp);
2109
- tx_ring->tx_stats.cnt++;
2110
- tx_ring->tx_stats.bytes += skb->len;
2111
- u64_stats_update_end(&tx_ring->syncp);
2112
-
2113
- tx_info->tx_descs = nb_hw_desc;
2114
- tx_info->last_jiffies = jiffies;
2115
- tx_info->print_once = 0;
2116
-
2117
- tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
2118
- tx_ring->ring_size);
21193004
21203005 /* stop the queue when no more space available, the packet can have up
21213006 * to sgl_size + 2. one for the meta descriptor and one for header
21223007 * (if the header is larger than tx_max_header_size).
21233008 */
2124
- if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
2125
- (tx_ring->sgl_size + 2))) {
3009
+ if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3010
+ tx_ring->sgl_size + 2))) {
21263011 netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
21273012 __func__, qid);
21283013
....@@ -2141,8 +3026,8 @@
21413026 */
21423027 smp_mb();
21433028
2144
- if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
2145
- > ENA_TX_WAKEUP_THRESH) {
3029
+ if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3030
+ ENA_TX_WAKEUP_THRESH)) {
21463031 netif_tx_wake_queue(txq);
21473032 u64_stats_update_begin(&tx_ring->syncp);
21483033 tx_ring->tx_stats.queue_wakeup++;
....@@ -2150,7 +3035,7 @@
21503035 }
21513036 }
21523037
2153
- if (netif_xmit_stopped(txq) || !skb->xmit_more) {
3038
+ if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
21543039 /* trigger the dma engine. ena_com_write_sq_doorbell()
21553040 * has a mb
21563041 */
....@@ -2162,42 +3047,17 @@
21623047
21633048 return NETDEV_TX_OK;
21643049
2165
-error_report_dma_error:
2166
- u64_stats_update_begin(&tx_ring->syncp);
2167
- tx_ring->tx_stats.dma_mapping_err++;
2168
- u64_stats_update_end(&tx_ring->syncp);
2169
- netdev_warn(adapter->netdev, "failed to map skb\n");
2170
-
3050
+error_unmap_dma:
3051
+ ena_unmap_tx_buff(tx_ring, tx_info);
21713052 tx_info->skb = NULL;
21723053
2173
-error_unmap_dma:
2174
- if (i >= 0) {
2175
- /* save value of frag that failed */
2176
- last_frag = i;
2177
-
2178
- /* start back at beginning and unmap skb */
2179
- tx_info->skb = NULL;
2180
- ena_buf = tx_info->bufs;
2181
- dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
2182
- dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
2183
-
2184
- /* unmap remaining mapped pages */
2185
- for (i = 0; i < last_frag; i++) {
2186
- ena_buf++;
2187
- dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
2188
- dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
2189
- }
2190
- }
2191
-
21923054 error_drop_packet:
2193
-
21943055 dev_kfree_skb(skb);
21953056 return NETDEV_TX_OK;
21963057 }
21973058
21983059 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
2199
- struct net_device *sb_dev,
2200
- select_queue_fallback_t fallback)
3060
+ struct net_device *sb_dev)
22013061 {
22023062 u16 qid;
22033063 /* we suspect that this is good for in--kernel network services that
....@@ -2207,25 +3067,27 @@
22073067 if (skb_rx_queue_recorded(skb))
22083068 qid = skb_get_rx_queue(skb);
22093069 else
2210
- qid = fallback(dev, skb, NULL);
3070
+ qid = netdev_pick_tx(dev, skb, NULL);
22113071
22123072 return qid;
22133073 }
22143074
2215
-static void ena_config_host_info(struct ena_com_dev *ena_dev)
3075
+static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
22163076 {
3077
+ struct device *dev = &pdev->dev;
22173078 struct ena_admin_host_info *host_info;
22183079 int rc;
22193080
22203081 /* Allocate only the host info */
22213082 rc = ena_com_allocate_host_info(ena_dev);
22223083 if (rc) {
2223
- pr_err("Cannot allocate host info\n");
3084
+ dev_err(dev, "Cannot allocate host info\n");
22243085 return;
22253086 }
22263087
22273088 host_info = ena_dev->host_attr.host_info;
22283089
3090
+ host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
22293091 host_info->os_type = ENA_ADMIN_OS_LINUX;
22303092 host_info->kernel_ver = LINUX_VERSION_CODE;
22313093 strlcpy(host_info->kernel_ver_str, utsname()->version,
....@@ -2234,16 +3096,24 @@
22343096 strncpy(host_info->os_dist_str, utsname()->release,
22353097 sizeof(host_info->os_dist_str) - 1);
22363098 host_info->driver_version =
2237
- (DRV_MODULE_VER_MAJOR) |
2238
- (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2239
- (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
3099
+ (DRV_MODULE_GEN_MAJOR) |
3100
+ (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3101
+ (DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
3102
+ ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
3103
+ host_info->num_cpus = num_online_cpus();
3104
+
3105
+ host_info->driver_supported_features =
3106
+ ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
3107
+ ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
3108
+ ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
3109
+ ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
22403110
22413111 rc = ena_com_set_host_attributes(ena_dev);
22423112 if (rc) {
22433113 if (rc == -EOPNOTSUPP)
2244
- pr_warn("Cannot set host attributes\n");
3114
+ dev_warn(dev, "Cannot set host attributes\n");
22453115 else
2246
- pr_err("Cannot set host attributes\n");
3116
+ dev_err(dev, "Cannot set host attributes\n");
22473117
22483118 goto err;
22493119 }
....@@ -2271,7 +3141,8 @@
22713141
22723142 rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
22733143 if (rc) {
2274
- pr_err("Cannot allocate debug area\n");
3144
+ netif_err(adapter, drv, adapter->netdev,
3145
+ "Cannot allocate debug area\n");
22753146 return;
22763147 }
22773148
....@@ -2291,6 +3162,19 @@
22913162 ena_com_delete_debug_area(adapter->ena_dev);
22923163 }
22933164
3165
+int ena_update_hw_stats(struct ena_adapter *adapter)
3166
+{
3167
+ int rc = 0;
3168
+
3169
+ rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
3170
+ if (rc) {
3171
+ dev_info_once(&adapter->pdev->dev, "Failed to get ENI stats\n");
3172
+ return rc;
3173
+ }
3174
+
3175
+ return 0;
3176
+}
3177
+
22943178 static void ena_get_stats64(struct net_device *netdev,
22953179 struct rtnl_link_stats64 *stats)
22963180 {
....@@ -2298,12 +3182,13 @@
22983182 struct ena_ring *rx_ring, *tx_ring;
22993183 unsigned int start;
23003184 u64 rx_drops;
3185
+ u64 tx_drops;
23013186 int i;
23023187
23033188 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
23043189 return;
23053190
2306
- for (i = 0; i < adapter->num_queues; i++) {
3191
+ for (i = 0; i < adapter->num_io_queues; i++) {
23073192 u64 bytes, packets;
23083193
23093194 tx_ring = &adapter->tx_ring[i];
....@@ -2332,9 +3217,11 @@
23323217 do {
23333218 start = u64_stats_fetch_begin_irq(&adapter->syncp);
23343219 rx_drops = adapter->dev_stats.rx_drops;
3220
+ tx_drops = adapter->dev_stats.tx_drops;
23353221 } while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
23363222
23373223 stats->rx_dropped = rx_drops;
3224
+ stats->tx_dropped = tx_drops;
23383225
23393226 stats->multicast = 0;
23403227 stats->collisions = 0;
....@@ -2360,6 +3247,7 @@
23603247 .ndo_change_mtu = ena_change_mtu,
23613248 .ndo_set_mac_address = NULL,
23623249 .ndo_validate_addr = eth_validate_addr,
3250
+ .ndo_bpf = ena_xdp,
23633251 };
23643252
23653253 static int ena_device_validate_params(struct ena_adapter *adapter,
....@@ -2376,13 +3264,6 @@
23763264 return -EINVAL;
23773265 }
23783266
2379
- if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
2380
- (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
2381
- netif_err(adapter, drv, netdev,
2382
- "Error, device doesn't support enough queues\n");
2383
- return -EINVAL;
2384
- }
2385
-
23863267 if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
23873268 netif_err(adapter, drv, netdev,
23883269 "Error, device max mtu is smaller than netdev MTU\n");
....@@ -2392,10 +3273,71 @@
23923273 return 0;
23933274 }
23943275
3276
+static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
3277
+{
3278
+ llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3279
+ llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3280
+ llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3281
+ llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3282
+ llq_config->llq_ring_entry_size_value = 128;
3283
+}
3284
+
3285
+static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3286
+ struct ena_com_dev *ena_dev,
3287
+ struct ena_admin_feature_llq_desc *llq,
3288
+ struct ena_llq_configurations *llq_default_configurations)
3289
+{
3290
+ int rc;
3291
+ u32 llq_feature_mask;
3292
+
3293
+ llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3294
+ if (!(ena_dev->supported_features & llq_feature_mask)) {
3295
+ dev_err(&pdev->dev,
3296
+ "LLQ is not supported Fallback to host mode policy.\n");
3297
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3298
+ return 0;
3299
+ }
3300
+
3301
+ rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3302
+ if (unlikely(rc)) {
3303
+ dev_err(&pdev->dev,
3304
+ "Failed to configure the device mode. Fallback to host mode policy.\n");
3305
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3306
+ }
3307
+
3308
+ return 0;
3309
+}
3310
+
3311
+static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
3312
+ int bars)
3313
+{
3314
+ bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
3315
+
3316
+ if (!has_mem_bar) {
3317
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
3318
+ dev_err(&pdev->dev,
3319
+ "ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
3320
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3321
+ }
3322
+
3323
+ return 0;
3324
+ }
3325
+
3326
+ ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3327
+ pci_resource_start(pdev, ENA_MEM_BAR),
3328
+ pci_resource_len(pdev, ENA_MEM_BAR));
3329
+
3330
+ if (!ena_dev->mem_bar)
3331
+ return -EFAULT;
3332
+
3333
+ return 0;
3334
+}
3335
+
23953336 static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
23963337 struct ena_com_dev_get_features_ctx *get_feat_ctx,
23973338 bool *wd_state)
23983339 {
3340
+ struct ena_llq_configurations llq_config;
23993341 struct device *dev = &pdev->dev;
24003342 bool readless_supported;
24013343 u32 aenq_groups;
....@@ -2404,7 +3346,7 @@
24043346
24053347 rc = ena_com_mmio_reg_read_request_init(ena_dev);
24063348 if (rc) {
2407
- dev_err(dev, "failed to init mmio read less\n");
3349
+ dev_err(dev, "Failed to init mmio read less\n");
24083350 return rc;
24093351 }
24103352
....@@ -2422,7 +3364,7 @@
24223364
24233365 rc = ena_com_validate_version(ena_dev);
24243366 if (rc) {
2425
- dev_err(dev, "device version is too low\n");
3367
+ dev_err(dev, "Device version is too low\n");
24263368 goto err_mmio_read_less;
24273369 }
24283370
....@@ -2440,7 +3382,7 @@
24403382 }
24413383
24423384 /* ENA admin level init */
2443
- rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
3385
+ rc = ena_com_admin_init(ena_dev, &aenq_handlers);
24443386 if (rc) {
24453387 dev_err(dev,
24463388 "Can not initialize ena admin queue with device\n");
....@@ -2453,7 +3395,7 @@
24533395 */
24543396 ena_com_set_admin_polling_mode(ena_dev, true);
24553397
2456
- ena_config_host_info(ena_dev);
3398
+ ena_config_host_info(ena_dev, pdev);
24573399
24583400 /* Get Device Attributes*/
24593401 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
....@@ -2479,6 +3421,15 @@
24793421
24803422 *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
24813423
3424
+ set_default_llq_configurations(&llq_config);
3425
+
3426
+ rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
3427
+ &llq_config);
3428
+ if (rc) {
3429
+ dev_err(dev, "ENA device init failed\n");
3430
+ goto err_admin_init;
3431
+ }
3432
+
24823433 return 0;
24833434
24843435 err_admin_init:
....@@ -2490,14 +3441,13 @@
24903441 return rc;
24913442 }
24923443
2493
-static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
2494
- int io_vectors)
3444
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
24953445 {
24963446 struct ena_com_dev *ena_dev = adapter->ena_dev;
24973447 struct device *dev = &adapter->pdev->dev;
24983448 int rc;
24993449
2500
- rc = ena_enable_msix(adapter, io_vectors);
3450
+ rc = ena_enable_msix(adapter);
25013451 if (rc) {
25023452 dev_err(dev, "Can not reserve msix vectors\n");
25033453 return rc;
....@@ -2538,17 +3488,14 @@
25383488
25393489 dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
25403490 adapter->dev_up_before_reset = dev_up;
2541
-
25423491 if (!graceful)
25433492 ena_com_set_admin_running_state(ena_dev, false);
25443493
25453494 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
25463495 ena_down(adapter);
25473496
2548
- /* Before releasing the ENA resources, a device reset is required.
2549
- * (to prevent the device from accessing them).
2550
- * In case the reset flag is set and the device is up, ena_down()
2551
- * already perform the reset, so it can be skipped.
3497
+ /* Stop the device from sending AENQ events (in case reset flag is set
3498
+ * and device is up, ena_down() already reset the device.
25523499 */
25533500 if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
25543501 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
....@@ -2565,6 +3512,7 @@
25653512
25663513 ena_com_mmio_reg_read_request_destroy(ena_dev);
25673514
3515
+ /* return reset reason to default value */
25683516 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
25693517
25703518 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
....@@ -2593,8 +3541,7 @@
25933541 goto err_device_destroy;
25943542 }
25953543
2596
- rc = ena_enable_msix_and_set_admin_interrupts(adapter,
2597
- adapter->num_queues);
3544
+ rc = ena_enable_msix_and_set_admin_interrupts(adapter);
25983545 if (rc) {
25993546 dev_err(&pdev->dev, "Enable MSI-X failed\n");
26003547 goto err_device_destroy;
....@@ -2615,6 +3562,8 @@
26153562 netif_carrier_on(adapter->netdev);
26163563
26173564 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3565
+ adapter->last_keep_alive_jiffies = jiffies;
3566
+
26183567 dev_err(&pdev->dev, "Device reset completed successfully\n");
26193568
26203569 return rc;
....@@ -2740,7 +3689,9 @@
27403689 struct ena_ring *tx_ring;
27413690 struct ena_ring *rx_ring;
27423691 int i, budget, rc;
3692
+ int io_queue_count;
27433693
3694
+ io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
27443695 /* Make sure the driver doesn't turn the device in other process */
27453696 smp_rmb();
27463697
....@@ -2755,7 +3706,7 @@
27553706
27563707 budget = ENA_MONITORED_TX_QUEUES;
27573708
2758
- for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
3709
+ for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
27593710 tx_ring = &adapter->tx_ring[i];
27603711 rx_ring = &adapter->rx_ring[i];
27613712
....@@ -2763,7 +3714,8 @@
27633714 if (unlikely(rc))
27643715 return;
27653716
2766
- rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3717
+ rc = !ENA_IS_XDP_INDEX(adapter, i) ?
3718
+ check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
27673719 if (unlikely(rc))
27683720 return;
27693721
....@@ -2772,7 +3724,7 @@
27723724 break;
27733725 }
27743726
2775
- adapter->last_monitored_tx_qid = i % adapter->num_queues;
3727
+ adapter->last_monitored_tx_qid = i % io_queue_count;
27763728 }
27773729
27783730 /* trigger napi schedule after 2 consecutive detections */
....@@ -2802,11 +3754,10 @@
28023754 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
28033755 return;
28043756
2805
- for (i = 0; i < adapter->num_queues; i++) {
3757
+ for (i = 0; i < adapter->num_io_queues; i++) {
28063758 rx_ring = &adapter->rx_ring[i];
28073759
2808
- refill_required =
2809
- ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
3760
+ refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
28103761 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
28113762 rx_ring->empty_rx_queue++;
28123763
....@@ -2816,7 +3767,7 @@
28163767 u64_stats_update_end(&rx_ring->syncp);
28173768
28183769 netif_err(adapter, drv, adapter->netdev,
2819
- "trigger refill for ring %d\n", i);
3770
+ "Trigger refill for ring %d\n", i);
28203771
28213772 napi_schedule(rx_ring->napi);
28223773 rx_ring->empty_rx_queue = 0;
....@@ -2944,54 +3895,40 @@
29443895 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
29453896 }
29463897
2947
-static int ena_calc_io_queue_num(struct pci_dev *pdev,
2948
- struct ena_com_dev *ena_dev,
2949
- struct ena_com_dev_get_features_ctx *get_feat_ctx)
3898
+static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
3899
+ struct ena_com_dev *ena_dev,
3900
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
29503901 {
2951
- int io_sq_num, io_queue_num;
3902
+ u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
29523903
2953
- /* In case of LLQ use the llq number in the get feature cmd */
2954
- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2955
- io_sq_num = get_feat_ctx->max_queues.max_llq_num;
3904
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3905
+ struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3906
+ &get_feat_ctx->max_queue_ext.max_queue_ext;
3907
+ io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
3908
+ max_queue_ext->max_rx_cq_num);
29563909
2957
- if (io_sq_num == 0) {
2958
- dev_err(&pdev->dev,
2959
- "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
2960
-
2961
- ena_dev->tx_mem_queue_type =
2962
- ENA_ADMIN_PLACEMENT_POLICY_HOST;
2963
- io_sq_num = get_feat_ctx->max_queues.max_sq_num;
2964
- }
3910
+ io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3911
+ io_tx_cq_num = max_queue_ext->max_tx_cq_num;
29653912 } else {
2966
- io_sq_num = get_feat_ctx->max_queues.max_sq_num;
3913
+ struct ena_admin_queue_feature_desc *max_queues =
3914
+ &get_feat_ctx->max_queues;
3915
+ io_tx_sq_num = max_queues->max_sq_num;
3916
+ io_tx_cq_num = max_queues->max_cq_num;
3917
+ io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
29673918 }
29683919
2969
- io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
2970
- io_queue_num = min_t(int, io_queue_num, io_sq_num);
2971
- io_queue_num = min_t(int, io_queue_num,
2972
- get_feat_ctx->max_queues.max_cq_num);
3920
+ /* In case of LLQ use the llq fields for the tx SQ/CQ */
3921
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3922
+ io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3923
+
3924
+ max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3925
+ max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
3926
+ max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
3927
+ max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
29733928 /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
2974
- io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
2975
- if (unlikely(!io_queue_num)) {
2976
- dev_err(&pdev->dev, "The device doesn't have io queues\n");
2977
- return -EFAULT;
2978
- }
3929
+ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
29793930
2980
- return io_queue_num;
2981
-}
2982
-
2983
-static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
2984
- struct ena_com_dev_get_features_ctx *get_feat_ctx)
2985
-{
2986
- bool has_mem_bar;
2987
-
2988
- has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
2989
-
2990
- /* Enable push mode if device supports LLQ */
2991
- if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
2992
- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
2993
- else
2994
- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3931
+ return max_num_io_queues;
29953932 }
29963933
29973934 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
....@@ -3071,7 +4008,7 @@
30714008 }
30724009
30734010 for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3074
- val = ethtool_rxfh_indir_default(i, adapter->num_queues);
4011
+ val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
30754012 rc = ena_com_indirect_table_fill_entry(ena_dev, i,
30764013 ENA_IO_RXQ_IDX(val));
30774014 if (unlikely(rc && (rc != -EOPNOTSUPP))) {
....@@ -3080,7 +4017,7 @@
30804017 }
30814018 }
30824019
3083
- rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
4020
+ rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
30844021 ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
30854022 if (unlikely(rc && (rc != -EOPNOTSUPP))) {
30864023 dev_err(dev, "Cannot fill hash function\n");
....@@ -3109,36 +4046,71 @@
31094046 pci_release_selected_regions(pdev, release_bars);
31104047 }
31114048
3112
-static int ena_calc_queue_size(struct pci_dev *pdev,
3113
- struct ena_com_dev *ena_dev,
3114
- u16 *max_tx_sgl_size,
3115
- u16 *max_rx_sgl_size,
3116
- struct ena_com_dev_get_features_ctx *get_feat_ctx)
4049
+
4050
+static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
31174051 {
3118
- u32 queue_size = ENA_DEFAULT_RING_SIZE;
4052
+ struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
4053
+ struct ena_com_dev *ena_dev = ctx->ena_dev;
4054
+ u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
4055
+ u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
4056
+ u32 max_tx_queue_size;
4057
+ u32 max_rx_queue_size;
31194058
3120
- queue_size = min_t(u32, queue_size,
3121
- get_feat_ctx->max_queues.max_cq_depth);
3122
- queue_size = min_t(u32, queue_size,
3123
- get_feat_ctx->max_queues.max_sq_depth);
4059
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
4060
+ struct ena_admin_queue_ext_feature_fields *max_queue_ext =
4061
+ &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
4062
+ max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
4063
+ max_queue_ext->max_rx_sq_depth);
4064
+ max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
31244065
3125
- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3126
- queue_size = min_t(u32, queue_size,
3127
- get_feat_ctx->max_queues.max_llq_depth);
4066
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4067
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
4068
+ llq->max_llq_depth);
4069
+ else
4070
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
4071
+ max_queue_ext->max_tx_sq_depth);
31284072
3129
- queue_size = rounddown_pow_of_two(queue_size);
4073
+ ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4074
+ max_queue_ext->max_per_packet_tx_descs);
4075
+ ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4076
+ max_queue_ext->max_per_packet_rx_descs);
4077
+ } else {
4078
+ struct ena_admin_queue_feature_desc *max_queues =
4079
+ &ctx->get_feat_ctx->max_queues;
4080
+ max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
4081
+ max_queues->max_sq_depth);
4082
+ max_tx_queue_size = max_queues->max_cq_depth;
31304083
3131
- if (unlikely(!queue_size)) {
3132
- dev_err(&pdev->dev, "Invalid queue size\n");
3133
- return -EFAULT;
4084
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4085
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
4086
+ llq->max_llq_depth);
4087
+ else
4088
+ max_tx_queue_size = min_t(u32, max_tx_queue_size,
4089
+ max_queues->max_sq_depth);
4090
+
4091
+ ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4092
+ max_queues->max_packet_tx_descs);
4093
+ ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4094
+ max_queues->max_packet_rx_descs);
31344095 }
31354096
3136
- *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3137
- get_feat_ctx->max_queues.max_packet_tx_descs);
3138
- *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3139
- get_feat_ctx->max_queues.max_packet_rx_descs);
4097
+ max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
4098
+ max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
31404099
3141
- return queue_size;
4100
+ tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
4101
+ max_tx_queue_size);
4102
+ rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
4103
+ max_rx_queue_size);
4104
+
4105
+ tx_queue_size = rounddown_pow_of_two(tx_queue_size);
4106
+ rx_queue_size = rounddown_pow_of_two(rx_queue_size);
4107
+
4108
+ ctx->max_tx_queue_size = max_tx_queue_size;
4109
+ ctx->max_rx_queue_size = max_rx_queue_size;
4110
+ ctx->tx_queue_size = tx_queue_size;
4111
+ ctx->rx_queue_size = rx_queue_size;
4112
+
4113
+ return 0;
31424114 }
31434115
31444116 /* ena_probe - Device Initialization Routine
....@@ -3153,22 +4125,17 @@
31534125 */
31544126 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
31554127 {
4128
+ struct ena_calc_queue_size_ctx calc_queue_ctx = {};
31564129 struct ena_com_dev_get_features_ctx get_feat_ctx;
3157
- static int version_printed;
3158
- struct net_device *netdev;
3159
- struct ena_adapter *adapter;
31604130 struct ena_com_dev *ena_dev = NULL;
4131
+ struct ena_adapter *adapter;
4132
+ struct net_device *netdev;
31614133 static int adapters_found;
3162
- int io_queue_num, bars, rc;
3163
- int queue_size;
3164
- u16 tx_sgl_size = 0;
3165
- u16 rx_sgl_size = 0;
4134
+ u32 max_num_io_queues;
31664135 bool wd_state;
4136
+ int bars, rc;
31674137
31684138 dev_dbg(&pdev->dev, "%s\n", __func__);
3169
-
3170
- if (version_printed++ == 0)
3171
- dev_info(&pdev->dev, "%s", version);
31724139
31734140 rc = pci_enable_device_mem(pdev);
31744141 if (rc) {
....@@ -3202,50 +4169,48 @@
32024169 pci_resource_start(pdev, ENA_REG_BAR),
32034170 pci_resource_len(pdev, ENA_REG_BAR));
32044171 if (!ena_dev->reg_bar) {
3205
- dev_err(&pdev->dev, "failed to remap regs bar\n");
4172
+ dev_err(&pdev->dev, "Failed to remap regs bar\n");
32064173 rc = -EFAULT;
32074174 goto err_free_region;
32084175 }
4176
+
4177
+ ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
32094178
32104179 ena_dev->dmadev = &pdev->dev;
32114180
32124181 rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
32134182 if (rc) {
3214
- dev_err(&pdev->dev, "ena device init failed\n");
4183
+ dev_err(&pdev->dev, "ENA device init failed\n");
32154184 if (rc == -ETIME)
32164185 rc = -EPROBE_DEFER;
32174186 goto err_free_region;
32184187 }
32194188
3220
- ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
3221
-
3222
- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
3223
- ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3224
- pci_resource_start(pdev, ENA_MEM_BAR),
3225
- pci_resource_len(pdev, ENA_MEM_BAR));
3226
- if (!ena_dev->mem_bar) {
3227
- rc = -EFAULT;
3228
- goto err_device_destroy;
3229
- }
4189
+ rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
4190
+ if (rc) {
4191
+ dev_err(&pdev->dev, "ENA llq bar mapping failed\n");
4192
+ goto err_free_ena_dev;
32304193 }
32314194
3232
- /* initial Tx interrupt delay, Assumes 1 usec granularity.
3233
- * Updated during device initialization with the real granularity
3234
- */
4195
+ calc_queue_ctx.ena_dev = ena_dev;
4196
+ calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
4197
+ calc_queue_ctx.pdev = pdev;
4198
+
4199
+ /* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
4200
+ * Updated during device initialization with the real granularity
4201
+ */
32354202 ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
3236
- io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
3237
- queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
3238
- &rx_sgl_size, &get_feat_ctx);
3239
- if ((queue_size <= 0) || (io_queue_num <= 0)) {
4203
+ ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
4204
+ ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
4205
+ max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
4206
+ rc = ena_calc_io_queue_size(&calc_queue_ctx);
4207
+ if (rc || !max_num_io_queues) {
32404208 rc = -EFAULT;
32414209 goto err_device_destroy;
32424210 }
32434211
3244
- dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
3245
- io_queue_num, queue_size);
3246
-
32474212 /* dev zeroed in init_etherdev */
3248
- netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
4213
+ netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues);
32494214 if (!netdev) {
32504215 dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
32514216 rc = -ENOMEM;
....@@ -3266,16 +4231,26 @@
32664231 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
32674232 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
32684233
3269
- adapter->tx_ring_size = queue_size;
3270
- adapter->rx_ring_size = queue_size;
4234
+ adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
4235
+ adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
4236
+ adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
4237
+ adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
4238
+ adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
4239
+ adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
32714240
3272
- adapter->max_tx_sgl_size = tx_sgl_size;
3273
- adapter->max_rx_sgl_size = rx_sgl_size;
3274
-
3275
- adapter->num_queues = io_queue_num;
4241
+ adapter->num_io_queues = max_num_io_queues;
4242
+ adapter->max_num_io_queues = max_num_io_queues;
32764243 adapter->last_monitored_tx_qid = 0;
32774244
4245
+ adapter->xdp_first_ring = 0;
4246
+ adapter->xdp_num_queues = 0;
4247
+
32784248 adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4249
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4250
+ adapter->disable_meta_caching =
4251
+ !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
4252
+ BIT(ENA_ADMIN_DISABLE_META_CACHING));
4253
+
32794254 adapter->wd_state = wd_state;
32804255
32814256 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
....@@ -3286,7 +4261,10 @@
32864261 "Failed to query interrupt moderation feature\n");
32874262 goto err_netdev_destroy;
32884263 }
3289
- ena_init_io_rings(adapter);
4264
+ ena_init_io_rings(adapter,
4265
+ 0,
4266
+ adapter->xdp_num_queues +
4267
+ adapter->num_io_queues);
32904268
32914269 netdev->netdev_ops = &ena_netdev_ops;
32924270 netdev->watchdog_timeo = TX_TIMEOUT;
....@@ -3296,7 +4274,7 @@
32964274
32974275 u64_stats_init(&adapter->syncp);
32984276
3299
- rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
4277
+ rc = ena_enable_msix_and_set_admin_interrupts(adapter);
33004278 if (rc) {
33014279 dev_err(&pdev->dev,
33024280 "Failed to enable and set the admin interrupts\n");
....@@ -3309,6 +4287,11 @@
33094287 }
33104288
33114289 ena_config_debug_area(adapter);
4290
+
4291
+ if (!ena_update_hw_stats(adapter))
4292
+ adapter->eni_stats_supported = true;
4293
+ else
4294
+ adapter->eni_stats_supported = false;
33124295
33134296 memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
33144297
....@@ -3332,9 +4315,10 @@
33324315 timer_setup(&adapter->timer_service, ena_timer_service, 0);
33334316 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
33344317
3335
- dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
4318
+ dev_info(&pdev->dev,
4319
+ "%s found at mem %lx, mac addr %pM\n",
33364320 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
3337
- netdev->dev_addr, io_queue_num);
4321
+ netdev->dev_addr);
33384322
33394323 set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
33404324
....@@ -3347,10 +4331,11 @@
33474331 ena_com_rss_destroy(ena_dev);
33484332 err_free_msix:
33494333 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4334
+ /* stop submitting admin commands on a device that was reset */
4335
+ ena_com_set_admin_running_state(ena_dev, false);
33504336 ena_free_mgmnt_irq(adapter);
33514337 ena_disable_msix(adapter);
33524338 err_worker_destroy:
3353
- ena_com_destroy_interrupt_moderation(ena_dev);
33544339 del_timer(&adapter->timer_service);
33554340 err_netdev_destroy:
33564341 free_netdev(netdev);
....@@ -3368,13 +4353,15 @@
33684353
33694354 /*****************************************************************************/
33704355
3371
-/* ena_remove - Device Removal Routine
4356
+/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
33724357 * @pdev: PCI device information struct
4358
+ * @shutdown: Is it a shutdown operation? If false, means it is a removal
33734359 *
3374
- * ena_remove is called by the PCI subsystem to alert the driver
3375
- * that it should release a PCI device.
4360
+ * __ena_shutoff is a helper routine that does the real work on shutdown and
4361
+ * removal paths; the difference between those paths is with regards to whether
4362
+ * dettach or unregister the netdevice.
33764363 */
3377
-static void ena_remove(struct pci_dev *pdev)
4364
+static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
33784365 {
33794366 struct ena_adapter *adapter = pci_get_drvdata(pdev);
33804367 struct ena_com_dev *ena_dev;
....@@ -3396,19 +4383,18 @@
33964383 del_timer_sync(&adapter->timer_service);
33974384 cancel_work_sync(&adapter->reset_task);
33984385
3399
- unregister_netdev(netdev);
3400
-
3401
- /* If the device is running then we want to make sure the device will be
3402
- * reset to make sure no more events will be issued by the device.
3403
- */
3404
- if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3405
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3406
-
3407
- rtnl_lock();
4386
+ rtnl_lock(); /* lock released inside the below if-else block */
4387
+ adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
34084388 ena_destroy_device(adapter, true);
3409
- rtnl_unlock();
3410
-
3411
- free_netdev(netdev);
4389
+ if (shutdown) {
4390
+ netif_device_detach(netdev);
4391
+ dev_close(netdev);
4392
+ rtnl_unlock();
4393
+ } else {
4394
+ rtnl_unlock();
4395
+ unregister_netdev(netdev);
4396
+ free_netdev(netdev);
4397
+ }
34124398
34134399 ena_com_rss_destroy(ena_dev);
34144400
....@@ -3420,18 +4406,39 @@
34204406
34214407 pci_disable_device(pdev);
34224408
3423
- ena_com_destroy_interrupt_moderation(ena_dev);
3424
-
34254409 vfree(ena_dev);
34264410 }
34274411
3428
-#ifdef CONFIG_PM
3429
-/* ena_suspend - PM suspend callback
4412
+/* ena_remove - Device Removal Routine
34304413 * @pdev: PCI device information struct
3431
- * @state:power state
4414
+ *
4415
+ * ena_remove is called by the PCI subsystem to alert the driver
4416
+ * that it should release a PCI device.
34324417 */
3433
-static int ena_suspend(struct pci_dev *pdev, pm_message_t state)
4418
+
4419
+static void ena_remove(struct pci_dev *pdev)
34344420 {
4421
+ __ena_shutoff(pdev, false);
4422
+}
4423
+
4424
+/* ena_shutdown - Device Shutdown Routine
4425
+ * @pdev: PCI device information struct
4426
+ *
4427
+ * ena_shutdown is called by the PCI subsystem to alert the driver that
4428
+ * a shutdown/reboot (or kexec) is happening and device must be disabled.
4429
+ */
4430
+
4431
+static void ena_shutdown(struct pci_dev *pdev)
4432
+{
4433
+ __ena_shutoff(pdev, true);
4434
+}
4435
+
4436
+/* ena_suspend - PM suspend callback
4437
+ * @dev_d: Device information struct
4438
+ */
4439
+static int __maybe_unused ena_suspend(struct device *dev_d)
4440
+{
4441
+ struct pci_dev *pdev = to_pci_dev(dev_d);
34354442 struct ena_adapter *adapter = pci_get_drvdata(pdev);
34364443
34374444 u64_stats_update_begin(&adapter->syncp);
....@@ -3441,7 +4448,7 @@
34414448 rtnl_lock();
34424449 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
34434450 dev_err(&pdev->dev,
3444
- "ignoring device reset request as the device is being suspended\n");
4451
+ "Ignoring device reset request as the device is being suspended\n");
34454452 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
34464453 }
34474454 ena_destroy_device(adapter, true);
....@@ -3450,12 +4457,11 @@
34504457 }
34514458
34524459 /* ena_resume - PM resume callback
3453
- * @pdev: PCI device information struct
3454
- *
4460
+ * @dev_d: Device information struct
34554461 */
3456
-static int ena_resume(struct pci_dev *pdev)
4462
+static int __maybe_unused ena_resume(struct device *dev_d)
34574463 {
3458
- struct ena_adapter *adapter = pci_get_drvdata(pdev);
4464
+ struct ena_adapter *adapter = dev_get_drvdata(dev_d);
34594465 int rc;
34604466
34614467 u64_stats_update_begin(&adapter->syncp);
....@@ -3467,23 +4473,22 @@
34674473 rtnl_unlock();
34684474 return rc;
34694475 }
3470
-#endif
4476
+
4477
+static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
34714478
34724479 static struct pci_driver ena_pci_driver = {
34734480 .name = DRV_MODULE_NAME,
34744481 .id_table = ena_pci_tbl,
34754482 .probe = ena_probe,
34764483 .remove = ena_remove,
3477
-#ifdef CONFIG_PM
3478
- .suspend = ena_suspend,
3479
- .resume = ena_resume,
3480
-#endif
4484
+ .shutdown = ena_shutdown,
4485
+ .driver.pm = &ena_pm_ops,
34814486 .sriov_configure = pci_sriov_configure_simple,
34824487 };
34834488
34844489 static int __init ena_init(void)
34854490 {
3486
- pr_info("%s", version);
4491
+ int ret;
34874492
34884493 ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
34894494 if (!ena_wq) {
....@@ -3491,7 +4496,11 @@
34914496 return -ENOMEM;
34924497 }
34934498
3494
- return pci_register_driver(&ena_pci_driver);
4499
+ ret = pci_register_driver(&ena_pci_driver);
4500
+ if (ret)
4501
+ destroy_workqueue(ena_wq);
4502
+
4503
+ return ret;
34954504 }
34964505
34974506 static void __exit ena_cleanup(void)
....@@ -3520,7 +4529,7 @@
35204529 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
35214530
35224531 if (status) {
3523
- netdev_dbg(adapter->netdev, "%s\n", __func__);
4532
+ netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
35244533 set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
35254534 if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
35264535 netif_carrier_on(adapter->netdev);
....@@ -3536,17 +4545,20 @@
35364545 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
35374546 struct ena_admin_aenq_keep_alive_desc *desc;
35384547 u64 rx_drops;
4548
+ u64 tx_drops;
35394549
35404550 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
35414551 adapter->last_keep_alive_jiffies = jiffies;
35424552
35434553 rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4554
+ tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
35444555
35454556 u64_stats_update_begin(&adapter->syncp);
35464557 /* These stats are accumulated by the device, so the counters indicate
35474558 * all drops since last reset.
35484559 */
35494560 adapter->dev_stats.rx_drops = rx_drops;
4561
+ adapter->dev_stats.tx_drops = tx_drops;
35504562 u64_stats_update_end(&adapter->syncp);
35514563 }
35524564
....@@ -3561,7 +4573,7 @@
35614573 aenq_e->aenq_common_desc.group,
35624574 ENA_ADMIN_NOTIFICATION);
35634575
3564
- switch (aenq_e->aenq_common_desc.syndrom) {
4576
+ switch (aenq_e->aenq_common_desc.syndrome) {
35654577 case ENA_ADMIN_UPDATE_HINTS:
35664578 hints = (struct ena_admin_ena_hw_hints *)
35674579 (&aenq_e->inline_data_w4);
....@@ -3570,7 +4582,7 @@
35704582 default:
35714583 netif_err(adapter, drv, adapter->netdev,
35724584 "Invalid aenq notification link state %d\n",
3573
- aenq_e->aenq_common_desc.syndrom);
4585
+ aenq_e->aenq_common_desc.syndrome);
35744586 }
35754587 }
35764588