hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/drivers/infiniband/hw/hfi1/init.c
....@@ -1,5 +1,5 @@
11 /*
2
- * Copyright(c) 2015 - 2018 Intel Corporation.
2
+ * Copyright(c) 2015 - 2020 Intel Corporation.
33 *
44 * This file is provided under a dual BSD/GPLv2 license. When using or
55 * redistributing this file, you may do so under either license.
....@@ -49,11 +49,12 @@
4949 #include <linux/netdevice.h>
5050 #include <linux/vmalloc.h>
5151 #include <linux/delay.h>
52
-#include <linux/idr.h>
52
+#include <linux/xarray.h>
5353 #include <linux/module.h>
5454 #include <linux/printk.h>
5555 #include <linux/hrtimer.h>
5656 #include <linux/bitmap.h>
57
+#include <linux/numa.h>
5758 #include <rdma/rdma_vt.h>
5859
5960 #include "hfi.h"
....@@ -68,20 +69,20 @@
6869 #include "affinity.h"
6970 #include "vnic.h"
7071 #include "exp_rcv.h"
72
+#include "netdev.h"
7173
7274 #undef pr_fmt
7375 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
7476
75
-#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
7677 /*
7778 * min buffers we want to have per context, after driver
7879 */
7980 #define HFI1_MIN_USER_CTXT_BUFCNT 7
8081
81
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
82
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
8382 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
8483 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
84
+
85
+#define NUM_IB_PORTS 1
8586
8687 /*
8788 * Number of user receive contexts we are configured to use (to allow for more
....@@ -120,9 +121,7 @@
120121 module_param(user_credit_return_threshold, uint, S_IRUGO);
121122 MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
122123
123
-static inline u64 encode_rcv_header_entry_size(u16 size);
124
-
125
-static struct idr hfi1_unit_table;
124
+DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
126125
127126 static int hfi1_create_kctxt(struct hfi1_devdata *dd,
128127 struct hfi1_pportdata *ppd)
....@@ -152,7 +151,11 @@
152151 /* Control context must use DMA_RTAIL */
153152 if (rcd->ctxt == HFI1_CTRL_CTXT)
154153 rcd->flags |= HFI1_CAP_DMA_RTAIL;
155
- rcd->seq_cnt = 1;
154
+ rcd->fast_handler = get_dma_rtail_setting(rcd) ?
155
+ handle_receive_interrupt_dma_rtail :
156
+ handle_receive_interrupt_nodma_rtail;
157
+
158
+ hfi1_set_seq_cnt(rcd, 1);
156159
157160 rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
158161 if (!rcd->sc) {
....@@ -371,8 +374,14 @@
371374 rcd->numa_id = numa;
372375 rcd->rcv_array_groups = dd->rcv_entries.ngroups;
373376 rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
377
+ rcd->slow_handler = handle_receive_interrupt;
378
+ rcd->do_interrupt = rcd->slow_handler;
379
+ rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
374380
375381 mutex_init(&rcd->exp_mutex);
382
+ spin_lock_init(&rcd->exp_lock);
383
+ INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
384
+ INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
376385
377386 hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
378387
....@@ -464,7 +473,7 @@
464473 if (rcd->egrbufs.size < hfi1_max_mtu) {
465474 rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu);
466475 hfi1_cdbg(PROC,
467
- "ctxt%u: eager bufs size too small. Adjusting to %zu\n",
476
+ "ctxt%u: eager bufs size too small. Adjusting to %u\n",
468477 rcd->ctxt, rcd->egrbufs.size);
469478 }
470479 rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
....@@ -475,6 +484,9 @@
475484 GFP_KERNEL, numa);
476485 if (!rcd->opstats)
477486 goto bail;
487
+
488
+ /* Initialize TID flow generations for the context */
489
+ hfi1_kern_init_ctxt_generations(rcd);
478490 }
479491
480492 *context = rcd;
....@@ -503,23 +515,6 @@
503515 }
504516
505517 /*
506
- * Convert a receive header entry size that to the encoding used in the CSR.
507
- *
508
- * Return a zero if the given size is invalid.
509
- */
510
-static inline u64 encode_rcv_header_entry_size(u16 size)
511
-{
512
- /* there are only 3 valid receive header entry sizes */
513
- if (size == 2)
514
- return 1;
515
- if (size == 16)
516
- return 2;
517
- else if (size == 32)
518
- return 4;
519
- return 0; /* invalid */
520
-}
521
-
522
-/*
523518 * Select the largest ccti value over all SLs to determine the intra-
524519 * packet gap for the link.
525520 *
....@@ -535,7 +530,7 @@
535530 u16 shift, mult;
536531 u64 src;
537532 u32 current_egress_rate; /* Mbits /sec */
538
- u32 max_pkt_time;
533
+ u64 max_pkt_time;
539534 /*
540535 * max_pkt_time is the maximum packet egress time in units
541536 * of the fabric clock period 1/(805 MHz).
....@@ -656,13 +651,7 @@
656651
657652 ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
658653 ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
659
-
660
- if (loopback) {
661
- hfi1_early_err(&pdev->dev,
662
- "Faking data partition 0x8001 in idx %u\n",
663
- !default_pkey_idx);
664
- ppd->pkeys[!default_pkey_idx] = 0x8001;
665
- }
654
+ ppd->pkeys[0] = 0x8001;
666655
667656 INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
668657 INIT_WORK(&ppd->link_up_work, handle_link_up);
....@@ -706,9 +695,7 @@
706695 return;
707696
708697 bail:
709
-
710
- hfi1_early_err(&pdev->dev,
711
- "Congestion Control Agent disabled for port %d\n", port);
698
+ dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
712699 }
713700
714701 /*
....@@ -777,6 +764,8 @@
777764 rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
778765 if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
779766 rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
767
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
768
+ rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
780769 hfi1_rcvctrl(dd, rcvmask, rcd);
781770 sc_enable(rcd->sc);
782771 hfi1_rcd_put(rcd);
....@@ -838,6 +827,46 @@
838827 }
839828
840829 /**
830
+ * destroy_workqueues - destroy per port workqueues
831
+ * @dd: the hfi1_ib device
832
+ */
833
+static void destroy_workqueues(struct hfi1_devdata *dd)
834
+{
835
+ int pidx;
836
+ struct hfi1_pportdata *ppd;
837
+
838
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
839
+ ppd = dd->pport + pidx;
840
+
841
+ if (ppd->hfi1_wq) {
842
+ destroy_workqueue(ppd->hfi1_wq);
843
+ ppd->hfi1_wq = NULL;
844
+ }
845
+ if (ppd->link_wq) {
846
+ destroy_workqueue(ppd->link_wq);
847
+ ppd->link_wq = NULL;
848
+ }
849
+ }
850
+}
851
+
852
+/**
853
+ * enable_general_intr() - Enable the IRQs that will be handled by the
854
+ * general interrupt handler.
855
+ * @dd: valid devdata
856
+ *
857
+ */
858
+static void enable_general_intr(struct hfi1_devdata *dd)
859
+{
860
+ set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
861
+ set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
862
+ set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
863
+ set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
864
+ set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
865
+ set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
866
+ set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
867
+}
868
+
869
+/**
841870 * hfi1_init - do the actual initialization sequence on the chip
842871 * @dd: the hfi1_ib device
843872 * @reinit: re-initializing, so don't allocate new memory
....@@ -868,10 +897,10 @@
868897
869898 if (is_ax(dd)) {
870899 atomic_set(&dd->drop_packet, DROP_PACKET_ON);
871
- dd->do_drop = 1;
900
+ dd->do_drop = true;
872901 } else {
873902 atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
874
- dd->do_drop = 0;
903
+ dd->do_drop = false;
875904 }
876905
877906 /* make sure the link is not "up" */
....@@ -887,18 +916,6 @@
887916 if (ret)
888917 goto done;
889918
890
- /* allocate dummy tail memory for all receive contexts */
891
- dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
892
- &dd->pcidev->dev, sizeof(u64),
893
- &dd->rcvhdrtail_dummy_dma,
894
- GFP_KERNEL);
895
-
896
- if (!dd->rcvhdrtail_dummy_kvaddr) {
897
- dd_dev_err(dd, "cannot allocate dummy tail memory\n");
898
- ret = -ENOMEM;
899
- goto done;
900
- }
901
-
902919 /* dd->rcd can be NULL if early initialization failed */
903920 for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
904921 /*
....@@ -911,16 +928,17 @@
911928 if (!rcd)
912929 continue;
913930
914
- rcd->do_interrupt = &handle_receive_interrupt;
915
-
916931 lastfail = hfi1_create_rcvhdrq(dd, rcd);
917932 if (!lastfail)
918933 lastfail = hfi1_setup_eagerbufs(rcd);
934
+ if (!lastfail)
935
+ lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
919936 if (lastfail) {
920937 dd_dev_err(dd,
921938 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
922939 ret = lastfail;
923940 }
941
+ /* enable IRQ */
924942 hfi1_rcd_put(rcd);
925943 }
926944
....@@ -959,7 +977,8 @@
959977 HFI1_STATUS_INITTED;
960978 if (!ret) {
961979 /* enable all interrupts from the chip */
962
- set_intr_state(dd, 1);
980
+ enable_general_intr(dd);
981
+ init_qsfp_int(dd);
963982
964983 /* chip is OK for user apps; mark it as initialized */
965984 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
....@@ -991,21 +1010,9 @@
9911010 return ret;
9921011 }
9931012
994
-static inline struct hfi1_devdata *__hfi1_lookup(int unit)
995
-{
996
- return idr_find(&hfi1_unit_table, unit);
997
-}
998
-
9991013 struct hfi1_devdata *hfi1_lookup(int unit)
10001014 {
1001
- struct hfi1_devdata *dd;
1002
- unsigned long flags;
1003
-
1004
- spin_lock_irqsave(&hfi1_devs_lock, flags);
1005
- dd = __hfi1_lookup(unit);
1006
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
1007
-
1008
- return dd;
1015
+ return xa_load(&hfi1_dev_table, unit);
10091016 }
10101017
10111018 /*
....@@ -1056,9 +1063,9 @@
10561063 }
10571064 dd->flags &= ~HFI1_INITTED;
10581065
1059
- /* mask and clean up interrupts, but not errors */
1060
- set_intr_state(dd, 0);
1061
- hfi1_clean_up_interrupts(dd);
1066
+ /* mask and clean up interrupts */
1067
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
1068
+ msix_clean_up_interrupts(dd);
10621069
10631070 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
10641071 ppd = dd->pport + pidx;
....@@ -1101,15 +1108,10 @@
11011108 * We can't count on interrupts since we are stopping.
11021109 */
11031110 hfi1_quiet_serdes(ppd);
1104
-
1105
- if (ppd->hfi1_wq) {
1106
- destroy_workqueue(ppd->hfi1_wq);
1107
- ppd->hfi1_wq = NULL;
1108
- }
1109
- if (ppd->link_wq) {
1110
- destroy_workqueue(ppd->link_wq);
1111
- ppd->link_wq = NULL;
1112
- }
1111
+ if (ppd->hfi1_wq)
1112
+ flush_workqueue(ppd->hfi1_wq);
1113
+ if (ppd->link_wq)
1114
+ flush_workqueue(ppd->link_wq);
11131115 }
11141116 sdma_exit(dd);
11151117 }
....@@ -1133,9 +1135,9 @@
11331135 dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
11341136 rcd->rcvhdrq, rcd->rcvhdrq_dma);
11351137 rcd->rcvhdrq = NULL;
1136
- if (rcd->rcvhdrtail_kvaddr) {
1138
+ if (hfi1_rcvhdrtail_kvaddr(rcd)) {
11371139 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
1138
- (void *)rcd->rcvhdrtail_kvaddr,
1140
+ (void *)hfi1_rcvhdrtail_kvaddr(rcd),
11391141 rcd->rcvhdrqtailaddr_dma);
11401142 rcd->rcvhdrtail_kvaddr = NULL;
11411143 }
....@@ -1173,7 +1175,7 @@
11731175 /*
11741176 * Release our hold on the shared asic data. If we are the last one,
11751177 * return the structure to be finalized outside the lock. Must be
1176
- * holding hfi1_devs_lock.
1178
+ * holding hfi1_dev_table lock.
11771179 */
11781180 static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
11791181 {
....@@ -1198,24 +1200,21 @@
11981200 }
11991201
12001202 /**
1201
- * hfi1_clean_devdata - cleans up per-unit data structure
1203
+ * hfi1_free_devdata - cleans up and frees per-unit data structure
12021204 * @dd: pointer to a valid devdata structure
12031205 *
1204
- * It cleans up all data structures set up by
1206
+ * It cleans up and frees all data structures set up by
12051207 * by hfi1_alloc_devdata().
12061208 */
1207
-static void hfi1_clean_devdata(struct hfi1_devdata *dd)
1209
+void hfi1_free_devdata(struct hfi1_devdata *dd)
12081210 {
12091211 struct hfi1_asic_data *ad;
12101212 unsigned long flags;
12111213
1212
- spin_lock_irqsave(&hfi1_devs_lock, flags);
1213
- if (!list_empty(&dd->list)) {
1214
- idr_remove(&hfi1_unit_table, dd->unit);
1215
- list_del_init(&dd->list);
1216
- }
1214
+ xa_lock_irqsave(&hfi1_dev_table, flags);
1215
+ __xa_erase(&hfi1_dev_table, dd->unit);
12171216 ad = release_asic_data(dd);
1218
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
1217
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
12191218
12201219 finalize_asic_data(dd, ad);
12211220 free_platform_config(dd);
....@@ -1230,38 +1229,27 @@
12301229 dd->tx_opstats = NULL;
12311230 kfree(dd->comp_vect);
12321231 dd->comp_vect = NULL;
1232
+ if (dd->rcvhdrtail_dummy_kvaddr)
1233
+ dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
1234
+ (void *)dd->rcvhdrtail_dummy_kvaddr,
1235
+ dd->rcvhdrtail_dummy_dma);
1236
+ dd->rcvhdrtail_dummy_kvaddr = NULL;
12331237 sdma_clean(dd, dd->num_sdma);
12341238 rvt_dealloc_device(&dd->verbs_dev.rdi);
12351239 }
12361240
1237
-static void __hfi1_free_devdata(struct kobject *kobj)
1238
-{
1239
- struct hfi1_devdata *dd =
1240
- container_of(kobj, struct hfi1_devdata, kobj);
1241
-
1242
- hfi1_clean_devdata(dd);
1243
-}
1244
-
1245
-static struct kobj_type hfi1_devdata_type = {
1246
- .release = __hfi1_free_devdata,
1247
-};
1248
-
1249
-void hfi1_free_devdata(struct hfi1_devdata *dd)
1250
-{
1251
- kobject_put(&dd->kobj);
1252
-}
1253
-
1254
-/*
1255
- * Allocate our primary per-unit data structure. Must be done via verbs
1256
- * allocator, because the verbs cleanup process both does cleanup and
1257
- * free of the data structure.
1258
- * "extra" is for chip-specific data.
1241
+/**
1242
+ * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
1243
+ * @pdev: Valid PCI device
1244
+ * @extra: How many bytes to alloc past the default
12591245 *
1260
- * Use the idr mechanism to get a unit number for this unit.
1246
+ * Must be done via verbs allocator, because the verbs cleanup process
1247
+ * both does cleanup and free of the data structure.
1248
+ * "extra" is for chip-specific data.
12611249 */
1262
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
1250
+static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
1251
+ size_t extra)
12631252 {
1264
- unsigned long flags;
12651253 struct hfi1_devdata *dd;
12661254 int ret, nports;
12671255
....@@ -1277,26 +1265,23 @@
12771265 dd->pcidev = pdev;
12781266 pci_set_drvdata(pdev, dd);
12791267
1280
- INIT_LIST_HEAD(&dd->list);
1281
- idr_preload(GFP_KERNEL);
1282
- spin_lock_irqsave(&hfi1_devs_lock, flags);
1283
-
1284
- ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT);
1285
- if (ret >= 0) {
1286
- dd->unit = ret;
1287
- list_add(&dd->list, &hfi1_dev_list);
1288
- }
1289
- dd->node = -1;
1290
-
1291
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
1292
- idr_preload_end();
1293
-
1268
+ ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
1269
+ GFP_KERNEL);
12941270 if (ret < 0) {
1295
- hfi1_early_err(&pdev->dev,
1296
- "Could not allocate unit ID: error %d\n", -ret);
1271
+ dev_err(&pdev->dev,
1272
+ "Could not allocate unit ID: error %d\n", -ret);
12971273 goto bail;
12981274 }
12991275 rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
1276
+ /*
1277
+ * If the BIOS does not have the NUMA node information set, select
1278
+ * NUMA 0 so we get consistent performance.
1279
+ */
1280
+ dd->node = pcibus_to_node(pdev->bus);
1281
+ if (dd->node == NUMA_NO_NODE) {
1282
+ dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
1283
+ dd->node = 0;
1284
+ }
13001285
13011286 /*
13021287 * Initialize all locks for the device. This needs to be as early as
....@@ -1314,6 +1299,7 @@
13141299 spin_lock_init(&dd->pio_map_lock);
13151300 mutex_init(&dd->dc8051_lock);
13161301 init_waitqueue_head(&dd->event_queue);
1302
+ spin_lock_init(&dd->irq_src_lock);
13171303
13181304 dd->int_counter = alloc_percpu(u64);
13191305 if (!dd->int_counter) {
....@@ -1345,11 +1331,20 @@
13451331 goto bail;
13461332 }
13471333
1348
- kobject_init(&dd->kobj, &hfi1_devdata_type);
1334
+ /* allocate dummy tail memory for all receive contexts */
1335
+ dd->rcvhdrtail_dummy_kvaddr =
1336
+ dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
1337
+ &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
1338
+ if (!dd->rcvhdrtail_dummy_kvaddr) {
1339
+ ret = -ENOMEM;
1340
+ goto bail;
1341
+ }
1342
+
1343
+ atomic_set(&dd->ipoib_rsm_usr_num, 0);
13491344 return dd;
13501345
13511346 bail:
1352
- hfi1_clean_devdata(dd);
1347
+ hfi1_free_devdata(dd);
13531348 return ERR_PTR(ret);
13541349 }
13551350
....@@ -1479,16 +1474,17 @@
14791474 /* sanitize link CRC options */
14801475 link_crc_mask &= SUPPORTED_CRCS;
14811476
1477
+ ret = opfn_init();
1478
+ if (ret < 0) {
1479
+ pr_err("Failed to allocate opfn_wq");
1480
+ goto bail_dev;
1481
+ }
1482
+
14821483 /*
14831484 * These must be called before the driver is registered with
14841485 * the PCI subsystem.
14851486 */
1486
- idr_init(&hfi1_unit_table);
1487
-
14881487 hfi1_dbg_init();
1489
- ret = hfi1_wss_init();
1490
- if (ret < 0)
1491
- goto bail_wss;
14921488 ret = pci_register_driver(&hfi1_pci_driver);
14931489 if (ret < 0) {
14941490 pr_err("Unable to register driver: error %d\n", -ret);
....@@ -1497,10 +1493,7 @@
14971493 goto bail; /* all OK */
14981494
14991495 bail_dev:
1500
- hfi1_wss_exit();
1501
-bail_wss:
15021496 hfi1_dbg_exit();
1503
- idr_destroy(&hfi1_unit_table);
15041497 dev_cleanup();
15051498 bail:
15061499 return ret;
....@@ -1514,11 +1507,11 @@
15141507 static void __exit hfi1_mod_cleanup(void)
15151508 {
15161509 pci_unregister_driver(&hfi1_pci_driver);
1510
+ opfn_exit();
15171511 node_affinity_destroy_all();
1518
- hfi1_wss_exit();
15191512 hfi1_dbg_exit();
15201513
1521
- idr_destroy(&hfi1_unit_table);
1514
+ WARN_ON(!xa_empty(&hfi1_dev_table));
15221515 dispose_firmware(); /* asymmetric with obtain_firmware() */
15231516 dev_cleanup();
15241517 }
....@@ -1554,13 +1547,6 @@
15541547
15551548 free_credit_return(dd);
15561549
1557
- if (dd->rcvhdrtail_dummy_kvaddr) {
1558
- dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
1559
- (void *)dd->rcvhdrtail_dummy_kvaddr,
1560
- dd->rcvhdrtail_dummy_dma);
1561
- dd->rcvhdrtail_dummy_kvaddr = NULL;
1562
- }
1563
-
15641550 /*
15651551 * Free any resources still in use (usually just kernel contexts)
15661552 * at unload; we do for ctxtcnt, because that's what we allocate.
....@@ -1569,7 +1555,7 @@
15691555 struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
15701556
15711557 if (rcd) {
1572
- hfi1_clear_tids(rcd);
1558
+ hfi1_free_ctxt_rcv_groups(rcd);
15731559 hfi1_free_ctxt(rcd);
15741560 }
15751561 }
....@@ -1609,29 +1595,6 @@
16091595 hfi1_free_devdata(dd);
16101596 }
16111597
1612
-static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
1613
-{
1614
- if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
1615
- hfi1_early_err(dev, "Receive header queue count too small\n");
1616
- return -EINVAL;
1617
- }
1618
-
1619
- if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
1620
- hfi1_early_err(dev,
1621
- "Receive header queue count cannot be greater than %u\n",
1622
- HFI1_MAX_HDRQ_EGRBUF_CNT);
1623
- return -EINVAL;
1624
- }
1625
-
1626
- if (thecnt % HDRQ_INCREMENT) {
1627
- hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
1628
- thecnt, HDRQ_INCREMENT);
1629
- return -EINVAL;
1630
- }
1631
-
1632
- return 0;
1633
-}
1634
-
16351598 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
16361599 {
16371600 int ret = 0, j, pidx, initfail;
....@@ -1644,22 +1607,29 @@
16441607 /* Validate dev ids */
16451608 if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
16461609 ent->device == PCI_DEVICE_ID_INTEL1)) {
1647
- hfi1_early_err(&pdev->dev,
1648
- "Failing on unknown Intel deviceid 0x%x\n",
1649
- ent->device);
1610
+ dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
1611
+ ent->device);
16501612 ret = -ENODEV;
16511613 goto bail;
16521614 }
16531615
1616
+ /* Allocate the dd so we can get to work */
1617
+ dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
1618
+ sizeof(struct hfi1_pportdata));
1619
+ if (IS_ERR(dd)) {
1620
+ ret = PTR_ERR(dd);
1621
+ goto bail;
1622
+ }
1623
+
16541624 /* Validate some global module parameters */
1655
- ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
1625
+ ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
16561626 if (ret)
16571627 goto bail;
16581628
16591629 /* use the encoding function as a sanitization check */
16601630 if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
1661
- hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
1662
- hfi1_hdrq_entsize);
1631
+ dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
1632
+ hfi1_hdrq_entsize);
16631633 ret = -EINVAL;
16641634 goto bail;
16651635 }
....@@ -1681,10 +1651,10 @@
16811651 clamp_val(eager_buffer_size,
16821652 MIN_EAGER_BUFFER * 8,
16831653 MAX_EAGER_BUFFER_TOTAL);
1684
- hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
1685
- eager_buffer_size);
1654
+ dd_dev_info(dd, "Eager buffer size %u\n",
1655
+ eager_buffer_size);
16861656 } else {
1687
- hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
1657
+ dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
16881658 ret = -EINVAL;
16891659 goto bail;
16901660 }
....@@ -1692,7 +1662,7 @@
16921662 /* restrict value of hfi1_rcvarr_split */
16931663 hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
16941664
1695
- ret = hfi1_pcie_init(pdev, ent);
1665
+ ret = hfi1_pcie_init(dd);
16961666 if (ret)
16971667 goto bail;
16981668
....@@ -1700,12 +1670,9 @@
17001670 * Do device-specific initialization, function table setup, dd
17011671 * allocation, etc.
17021672 */
1703
- dd = hfi1_init_dd(pdev, ent);
1704
-
1705
- if (IS_ERR(dd)) {
1706
- ret = PTR_ERR(dd);
1673
+ ret = hfi1_init_dd(dd);
1674
+ if (ret)
17071675 goto clean_bail; /* error already printed */
1708
- }
17091676
17101677 ret = create_workqueues(dd);
17111678 if (ret)
....@@ -1713,9 +1680,6 @@
17131680
17141681 /* do the generic initialization */
17151682 initfail = hfi1_init(dd, 0);
1716
-
1717
- /* setup vnic */
1718
- hfi1_vnic_setup(dd);
17191683
17201684 ret = hfi1_register_ib_device(dd);
17211685
....@@ -1736,7 +1700,7 @@
17361700 dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
17371701
17381702 if (initfail || ret) {
1739
- hfi1_clean_up_interrupts(dd);
1703
+ msix_clean_up_interrupts(dd);
17401704 stop_timers(dd);
17411705 flush_workqueue(ib_wq);
17421706 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
....@@ -1755,7 +1719,6 @@
17551719 hfi1_device_remove(dd);
17561720 if (!ret)
17571721 hfi1_unregister_ib_device(dd);
1758
- hfi1_vnic_cleanup(dd);
17591722 postinit_cleanup(dd);
17601723 if (initfail)
17611724 ret = initfail;
....@@ -1800,14 +1763,15 @@
18001763 /* unregister from IB core */
18011764 hfi1_unregister_ib_device(dd);
18021765
1803
- /* cleanup vnic */
1804
- hfi1_vnic_cleanup(dd);
1766
+ /* free netdev data */
1767
+ hfi1_netdev_free(dd);
18051768
18061769 /*
18071770 * Disable the IB link, disable interrupts on the device,
18081771 * clear dma engines, etc.
18091772 */
18101773 shutdown_device(dd);
1774
+ destroy_workqueues(dd);
18111775
18121776 stop_timers(dd);
18131777
....@@ -1836,7 +1800,6 @@
18361800 int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
18371801 {
18381802 unsigned amt;
1839
- u64 reg;
18401803
18411804 if (!rcd->rcvhdrq) {
18421805 gfp_t gfp_flags;
....@@ -1847,9 +1810,9 @@
18471810 gfp_flags = GFP_KERNEL;
18481811 else
18491812 gfp_flags = GFP_USER;
1850
- rcd->rcvhdrq = dma_zalloc_coherent(
1851
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
1852
- gfp_flags | __GFP_COMP);
1813
+ rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
1814
+ &rcd->rcvhdrq_dma,
1815
+ gfp_flags | __GFP_COMP);
18531816
18541817 if (!rcd->rcvhdrq) {
18551818 dd_dev_err(dd,
....@@ -1860,37 +1823,17 @@
18601823
18611824 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
18621825 HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
1863
- rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
1864
- &dd->pcidev->dev, PAGE_SIZE,
1865
- &rcd->rcvhdrqtailaddr_dma, gfp_flags);
1826
+ rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
1827
+ PAGE_SIZE,
1828
+ &rcd->rcvhdrqtailaddr_dma,
1829
+ gfp_flags);
18661830 if (!rcd->rcvhdrtail_kvaddr)
18671831 goto bail_free;
18681832 }
18691833 }
1870
- /*
1871
- * These values are per-context:
1872
- * RcvHdrCnt
1873
- * RcvHdrEntSize
1874
- * RcvHdrSize
1875
- */
1876
- reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
1877
- & RCV_HDR_CNT_CNT_MASK)
1878
- << RCV_HDR_CNT_CNT_SHIFT;
1879
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
1880
- reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
1881
- & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
1882
- << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
1883
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
1884
- reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
1885
- << RCV_HDR_SIZE_HDR_SIZE_SHIFT;
1886
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
18871834
1888
- /*
1889
- * Program dummy tail address for every receive context
1890
- * before enabling any receive context
1891
- */
1892
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
1893
- dd->rcvhdrtail_dummy_dma);
1835
+ set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
1836
+ rcd->rcvhdrq_cnt);
18941837
18951838 return 0;
18961839
....@@ -1958,10 +1901,10 @@
19581901 while (alloced_bytes < rcd->egrbufs.size &&
19591902 rcd->egrbufs.alloced < rcd->egrbufs.count) {
19601903 rcd->egrbufs.buffers[idx].addr =
1961
- dma_zalloc_coherent(&dd->pcidev->dev,
1962
- rcd->egrbufs.rcvtid_size,
1963
- &rcd->egrbufs.buffers[idx].dma,
1964
- gfp_flags);
1904
+ dma_alloc_coherent(&dd->pcidev->dev,
1905
+ rcd->egrbufs.rcvtid_size,
1906
+ &rcd->egrbufs.buffers[idx].dma,
1907
+ gfp_flags);
19651908 if (rcd->egrbufs.buffers[idx].addr) {
19661909 rcd->egrbufs.buffers[idx].len =
19671910 rcd->egrbufs.rcvtid_size;
....@@ -2032,7 +1975,7 @@
20321975 rcd->egrbufs.size = alloced_bytes;
20331976
20341977 hfi1_cdbg(PROC,
2035
- "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n",
1978
+ "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n",
20361979 rcd->ctxt, rcd->egrbufs.alloced,
20371980 rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
20381981