hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/infiniband/hw/hfi1/chip.c
....@@ -1,5 +1,5 @@
11 /*
2
- * Copyright(c) 2015 - 2018 Intel Corporation.
2
+ * Copyright(c) 2015 - 2020 Intel Corporation.
33 *
44 * This file is provided under a dual BSD/GPLv2 license. When using or
55 * redistributing this file, you may do so under either license.
....@@ -66,12 +66,7 @@
6666 #include "affinity.h"
6767 #include "debugfs.h"
6868 #include "fault.h"
69
-
70
-#define NUM_IB_PORTS 1
71
-
72
-uint kdeth_qp;
73
-module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
74
-MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
69
+#include "netdev.h"
7570
7671 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
7772 module_param(num_vls, uint, S_IRUGO);
....@@ -130,13 +125,15 @@
130125
131126 /*
132127 * RSM instance allocation
133
- * 0 - Verbs
134
- * 1 - User Fecn Handling
135
- * 2 - Vnic
128
+ * 0 - User Fecn Handling
129
+ * 1 - Vnic
130
+ * 2 - AIP
131
+ * 3 - Verbs
136132 */
137
-#define RSM_INS_VERBS 0
138
-#define RSM_INS_FECN 1
139
-#define RSM_INS_VNIC 2
133
+#define RSM_INS_FECN 0
134
+#define RSM_INS_VNIC 1
135
+#define RSM_INS_AIP 2
136
+#define RSM_INS_VERBS 3
140137
141138 /* Bit offset into the GUID which carries HFI id information */
142139 #define GUID_HFI_INDEX_SHIFT 39
....@@ -176,6 +173,25 @@
176173
177174 /* QPN[m+n:1] QW 1, OFFSET 1 */
178175 #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
176
+
177
+/* RSM fields for AIP */
178
+/* LRH.BTH above is reused for this rule */
179
+
180
+/* BTH.DESTQP: QW 1, OFFSET 16 for match */
181
+#define BTH_DESTQP_QW 1ull
182
+#define BTH_DESTQP_BIT_OFFSET 16ull
183
+#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off))
184
+#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET)
185
+#define BTH_DESTQP_MASK 0xFFull
186
+#define BTH_DESTQP_VALUE 0x81ull
187
+
188
+/* DETH.SQPN: QW 1 Offset 56 for select */
189
+/* We use 8 most significant Soure QPN bits as entropy fpr AIP */
190
+#define DETH_AIP_SQPN_QW 3ull
191
+#define DETH_AIP_SQPN_BIT_OFFSET 56ull
192
+#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off))
193
+#define DETH_AIP_SQPN_SELECT_OFFSET \
194
+ DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET)
179195
180196 /* RSM fields for Vnic */
181197 /* L2_TYPE: QW 0, OFFSET 61 - for match */
....@@ -1102,9 +1118,9 @@
11021118 const char *desc;
11031119 };
11041120
1105
-#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1106
-#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1107
-#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1121
+#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
1122
+#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
1123
+#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
11081124
11091125 /*
11101126 * Helpers for building HFI and DC error interrupt table entries. Different
....@@ -4111,9 +4127,14 @@
41114127 def_access_ibp_counter(rdma_seq);
41124128 def_access_ibp_counter(unaligned);
41134129 def_access_ibp_counter(seq_naks);
4130
+def_access_ibp_counter(rc_crwaits);
41144131
41154132 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
41164133 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
4134
+[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH),
4135
+[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH),
4136
+[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH),
4137
+[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH),
41174138 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
41184139 CNTR_NORMAL),
41194140 [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
....@@ -4265,6 +4286,8 @@
42654286 access_sw_pio_drain),
42664287 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
42674288 access_sw_kmem_wait),
4289
+[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
4290
+ hfi1_access_sw_tid_wait),
42684291 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
42694292 access_sw_send_schedule),
42704293 [C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
....@@ -5126,6 +5149,7 @@
51265149 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
51275150 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
51285151 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
5152
+[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits),
51295153 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
51305154 access_sw_cpu_rc_acks),
51315155 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
....@@ -5232,6 +5256,17 @@
52325256 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
52335257 & CCE_REVISION_CHIP_REV_MINOR_MASK;
52345258 return (chip_rev_minor & 0xF0) == 0x10;
5259
+}
5260
+
5261
+/* return true is kernel urg disabled for rcd */
5262
+bool is_urg_masked(struct hfi1_ctxtdata *rcd)
5263
+{
5264
+ u64 mask;
5265
+ u32 is = IS_RCVURGENT_START + rcd->ctxt;
5266
+ u8 bit = is % 64;
5267
+
5268
+ mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
5269
+ return !(mask & BIT_ULL(bit));
52355270 }
52365271
52375272 /*
....@@ -6856,7 +6891,7 @@
68566891 }
68576892 rcvmask = HFI1_RCVCTRL_CTXT_ENB;
68586893 /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
6859
- rcvmask |= rcd->rcvhdrtail_kvaddr ?
6894
+ rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ?
68606895 HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
68616896 hfi1_rcvctrl(dd, rcvmask, rcd);
68626897 hfi1_rcd_put(rcd);
....@@ -7282,11 +7317,11 @@
72827317 case 1: return OPA_LINK_WIDTH_1X;
72837318 case 2: return OPA_LINK_WIDTH_2X;
72847319 case 3: return OPA_LINK_WIDTH_3X;
7320
+ case 4: return OPA_LINK_WIDTH_4X;
72857321 default:
72867322 dd_dev_info(dd, "%s: invalid width %d, using 4\n",
72877323 __func__, width);
7288
- /* fall through */
7289
- case 4: return OPA_LINK_WIDTH_4X;
7324
+ return OPA_LINK_WIDTH_4X;
72907325 }
72917326 }
72927327
....@@ -7341,12 +7376,13 @@
73417376 case 0:
73427377 dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
73437378 break;
7379
+ case 1:
7380
+ dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
7381
+ break;
73447382 default:
73457383 dd_dev_err(dd,
73467384 "%s: unexpected max rate %d, using 25Gb\n",
73477385 __func__, (int)max_rate);
7348
- /* fall through */
7349
- case 1:
73507386 dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
73517387 break;
73527388 }
....@@ -8193,7 +8229,7 @@
81938229 /**
81948230 * is_rcv_urgent_int() - User receive context urgent IRQ handler
81958231 * @dd: valid dd
8196
- * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
8232
+ * @source: logical IRQ source (offset from IS_RCVURGENT_START)
81978233 *
81988234 * RX block receive urgent interrupt. Source is < 160.
81998235 *
....@@ -8243,7 +8279,7 @@
82438279 is_sdma_eng_err_name, is_sdma_eng_err_int },
82448280 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
82458281 is_sendctxt_err_name, is_sendctxt_err_int },
8246
-{ IS_SDMA_START, IS_SDMA_END,
8282
+{ IS_SDMA_START, IS_SDMA_IDLE_END,
82478283 is_sdma_eng_name, is_sdma_eng_int },
82488284 { IS_VARIOUS_START, IS_VARIOUS_END,
82498285 is_various_name, is_various_int },
....@@ -8269,7 +8305,7 @@
82698305
82708306 /* avoids a double compare by walking the table in-order */
82718307 for (entry = &is_table[0]; entry->is_name; entry++) {
8272
- if (source < entry->end) {
8308
+ if (source <= entry->end) {
82738309 trace_hfi1_interrupt(dd, entry, source);
82748310 entry->is_int(dd, source - entry->start);
82758311 return;
....@@ -8288,7 +8324,7 @@
82888324 * context DATA IRQs are threaded and are not supported by this handler.
82898325 *
82908326 */
8291
-static irqreturn_t general_interrupt(int irq, void *data)
8327
+irqreturn_t general_interrupt(int irq, void *data)
82928328 {
82938329 struct hfi1_devdata *dd = data;
82948330 u64 regs[CCE_NUM_INT_CSRS];
....@@ -8321,7 +8357,7 @@
83218357 return handled;
83228358 }
83238359
8324
-static irqreturn_t sdma_interrupt(int irq, void *data)
8360
+irqreturn_t sdma_interrupt(int irq, void *data)
83258361 {
83268362 struct sdma_engine *sde = data;
83278363 struct hfi1_devdata *dd = sde->dd;
....@@ -8364,7 +8400,6 @@
83648400 struct hfi1_devdata *dd = rcd->dd;
83658401 u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
83668402
8367
- mmiowb(); /* make sure everything before is written */
83688403 write_csr(dd, addr, rcd->imask);
83698404 /* force the above write on the chip and get a value back */
83708405 (void)read_csr(dd, addr);
....@@ -8389,20 +8424,107 @@
83898424 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
83908425 {
83918426 u32 tail;
8392
- int present;
83938427
8394
- if (!rcd->rcvhdrtail_kvaddr)
8395
- present = (rcd->seq_cnt ==
8396
- rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
8397
- else /* is RDMA rtail */
8398
- present = (rcd->head != get_rcvhdrtail(rcd));
8399
-
8400
- if (present)
8428
+ if (hfi1_packet_present(rcd))
84018429 return 1;
84028430
84038431 /* fall back to a CSR read, correct indpendent of DMA_RTAIL */
84048432 tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
8405
- return rcd->head != tail;
8433
+ return hfi1_rcd_head(rcd) != tail;
8434
+}
8435
+
8436
+/**
8437
+ * Common code for receive contexts interrupt handlers.
8438
+ * Update traces, increment kernel IRQ counter and
8439
+ * setup ASPM when needed.
8440
+ */
8441
+static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
8442
+{
8443
+ struct hfi1_devdata *dd = rcd->dd;
8444
+
8445
+ trace_hfi1_receive_interrupt(dd, rcd);
8446
+ this_cpu_inc(*dd->int_counter);
8447
+ aspm_ctx_disable(rcd);
8448
+}
8449
+
8450
+/**
8451
+ * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
8452
+ * when there are packets present in the queue. When calling
8453
+ * with interrupts enabled please use hfi1_rcd_eoi_intr.
8454
+ *
8455
+ * @rcd: valid receive context
8456
+ */
8457
+static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
8458
+{
8459
+ if (!rcd->rcvhdrq)
8460
+ return;
8461
+ clear_recv_intr(rcd);
8462
+ if (check_packet_present(rcd))
8463
+ force_recv_intr(rcd);
8464
+}
8465
+
8466
+/**
8467
+ * hfi1_rcd_eoi_intr() - End of Interrupt processing action
8468
+ *
8469
+ * @rcd: Ptr to hfi1_ctxtdata of receive context
8470
+ *
8471
+ * Hold IRQs so we can safely clear the interrupt and
8472
+ * recheck for a packet that may have arrived after the previous
8473
+ * check and the interrupt clear. If a packet arrived, force another
8474
+ * interrupt. This routine can be called at the end of receive packet
8475
+ * processing in interrupt service routines, interrupt service thread
8476
+ * and softirqs
8477
+ */
8478
+static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
8479
+{
8480
+ unsigned long flags;
8481
+
8482
+ local_irq_save(flags);
8483
+ __hfi1_rcd_eoi_intr(rcd);
8484
+ local_irq_restore(flags);
8485
+}
8486
+
8487
+/**
8488
+ * hfi1_netdev_rx_napi - napi poll function to move eoi inline
8489
+ * @napi - pointer to napi object
8490
+ * @budget - netdev budget
8491
+ */
8492
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget)
8493
+{
8494
+ struct hfi1_netdev_rxq *rxq = container_of(napi,
8495
+ struct hfi1_netdev_rxq, napi);
8496
+ struct hfi1_ctxtdata *rcd = rxq->rcd;
8497
+ int work_done = 0;
8498
+
8499
+ work_done = rcd->do_interrupt(rcd, budget);
8500
+
8501
+ if (work_done < budget) {
8502
+ napi_complete_done(napi, work_done);
8503
+ hfi1_rcd_eoi_intr(rcd);
8504
+ }
8505
+
8506
+ return work_done;
8507
+}
8508
+
8509
+/* Receive packet napi handler for netdevs VNIC and AIP */
8510
+irqreturn_t receive_context_interrupt_napi(int irq, void *data)
8511
+{
8512
+ struct hfi1_ctxtdata *rcd = data;
8513
+
8514
+ receive_interrupt_common(rcd);
8515
+
8516
+ if (likely(rcd->napi)) {
8517
+ if (likely(napi_schedule_prep(rcd->napi)))
8518
+ __napi_schedule_irqoff(rcd->napi);
8519
+ else
8520
+ __hfi1_rcd_eoi_intr(rcd);
8521
+ } else {
8522
+ WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n",
8523
+ rcd->ctxt);
8524
+ __hfi1_rcd_eoi_intr(rcd);
8525
+ }
8526
+
8527
+ return IRQ_HANDLED;
84068528 }
84078529
84088530 /*
....@@ -8413,16 +8535,12 @@
84138535 * invoked) is finished. The intent is to avoid extra interrupts while we
84148536 * are processing packets anyway.
84158537 */
8416
-static irqreturn_t receive_context_interrupt(int irq, void *data)
8538
+irqreturn_t receive_context_interrupt(int irq, void *data)
84178539 {
84188540 struct hfi1_ctxtdata *rcd = data;
8419
- struct hfi1_devdata *dd = rcd->dd;
84208541 int disposition;
8421
- int present;
84228542
8423
- trace_hfi1_receive_interrupt(dd, rcd);
8424
- this_cpu_inc(*dd->int_counter);
8425
- aspm_ctx_disable(rcd);
8543
+ receive_interrupt_common(rcd);
84268544
84278545 /* receive interrupt remains blocked while processing packets */
84288546 disposition = rcd->do_interrupt(rcd, 0);
....@@ -8435,17 +8553,7 @@
84358553 if (disposition == RCV_PKT_LIMIT)
84368554 return IRQ_WAKE_THREAD;
84378555
8438
- /*
8439
- * The packet processor detected no more packets. Clear the receive
8440
- * interrupt and recheck for a packet packet that may have arrived
8441
- * after the previous check and interrupt clear. If a packet arrived,
8442
- * force another interrupt.
8443
- */
8444
- clear_recv_intr(rcd);
8445
- present = check_packet_present(rcd);
8446
- if (present)
8447
- force_recv_intr(rcd);
8448
-
8556
+ __hfi1_rcd_eoi_intr(rcd);
84498557 return IRQ_HANDLED;
84508558 }
84518559
....@@ -8453,27 +8561,14 @@
84538561 * Receive packet thread handler. This expects to be invoked with the
84548562 * receive interrupt still blocked.
84558563 */
8456
-static irqreturn_t receive_context_thread(int irq, void *data)
8564
+irqreturn_t receive_context_thread(int irq, void *data)
84578565 {
84588566 struct hfi1_ctxtdata *rcd = data;
8459
- int present;
84608567
84618568 /* receive interrupt is still blocked from the IRQ handler */
84628569 (void)rcd->do_interrupt(rcd, 1);
84638570
8464
- /*
8465
- * The packet processor will only return if it detected no more
8466
- * packets. Hold IRQs here so we can safely clear the interrupt and
8467
- * recheck for a packet that may have arrived after the previous
8468
- * check and the interrupt clear. If a packet arrived, force another
8469
- * interrupt.
8470
- */
8471
- local_irq_disable();
8472
- clear_recv_intr(rcd);
8473
- present = check_packet_present(rcd);
8474
- if (present)
8475
- force_recv_intr(rcd);
8476
- local_irq_enable();
8571
+ hfi1_rcd_eoi_intr(rcd);
84778572
84788573 return IRQ_HANDLED;
84798574 }
....@@ -9663,30 +9758,10 @@
96639758 }
96649759 }
96659760
9666
-static void init_qsfp_int(struct hfi1_devdata *dd)
9761
+void init_qsfp_int(struct hfi1_devdata *dd)
96679762 {
96689763 struct hfi1_pportdata *ppd = dd->pport;
9669
- u64 qsfp_mask, cce_int_mask;
9670
- const int qsfp1_int_smask = QSFP1_INT % 64;
9671
- const int qsfp2_int_smask = QSFP2_INT % 64;
9672
-
9673
- /*
9674
- * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
9675
- * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
9676
- * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
9677
- * the index of the appropriate CSR in the CCEIntMask CSR array
9678
- */
9679
- cce_int_mask = read_csr(dd, CCE_INT_MASK +
9680
- (8 * (QSFP1_INT / 64)));
9681
- if (dd->hfi1_id) {
9682
- cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
9683
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
9684
- cce_int_mask);
9685
- } else {
9686
- cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
9687
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
9688
- cce_int_mask);
9689
- }
9764
+ u64 qsfp_mask;
96909765
96919766 qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
96929767 /* Clear current status to avoid spurious interrupts */
....@@ -9703,6 +9778,12 @@
97039778 write_csr(dd,
97049779 dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
97059780 qsfp_mask);
9781
+
9782
+ /* Enable the appropriate QSFP IRQ source */
9783
+ if (!dd->hfi1_id)
9784
+ set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
9785
+ else
9786
+ set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
97069787 }
97079788
97089789 /*
....@@ -10058,7 +10139,7 @@
1005810139 * the first kernel context would have been allocated by now so
1005910140 * we are guaranteed a valid value.
1006010141 */
10061
- return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
10142
+ return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
1006210143 }
1006310144
1006410145 /*
....@@ -10103,7 +10184,7 @@
1010310184 thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
1010410185 sc_mtu_to_threshold(dd->vld[i].sc,
1010510186 dd->vld[i].mtu,
10106
- dd->rcd[0]->rcvhdrqentsize));
10187
+ get_hdrqentsize(dd->rcd[0])));
1010710188 for (j = 0; j < INIT_SC_PER_VL; j++)
1010810189 sc_set_cr_threshold(
1010910190 pio_select_send_context_vl(dd, j, i),
....@@ -11817,12 +11898,10 @@
1181711898 << RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
1181811899 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
1181911900 }
11820
- mmiowb();
1182111901 reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
1182211902 (((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
1182311903 << RCV_HDR_HEAD_HEAD_SHIFT);
1182411904 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
11825
- mmiowb();
1182611905 }
1182711906
1182811907 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
....@@ -11832,7 +11911,7 @@
1183211911 head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
1183311912 & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
1183411913
11835
- if (rcd->rcvhdrtail_kvaddr)
11914
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
1183611915 tail = get_rcvhdrtail(rcd);
1183711916 else
1183811917 tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
....@@ -11876,6 +11955,84 @@
1187611955 return 0x1; /* if invalid, go with the minimum size */
1187711956 }
1187811957
11958
+/**
11959
+ * encode_rcv_header_entry_size - return chip specific encoding for size
11960
+ * @size: size in dwords
11961
+ *
11962
+ * Convert a receive header entry size that to the encoding used in the CSR.
11963
+ *
11964
+ * Return a zero if the given size is invalid, otherwise the encoding.
11965
+ */
11966
+u8 encode_rcv_header_entry_size(u8 size)
11967
+{
11968
+ /* there are only 3 valid receive header entry sizes */
11969
+ if (size == 2)
11970
+ return 1;
11971
+ if (size == 16)
11972
+ return 2;
11973
+ if (size == 32)
11974
+ return 4;
11975
+ return 0; /* invalid */
11976
+}
11977
+
11978
+/**
11979
+ * hfi1_validate_rcvhdrcnt - validate hdrcnt
11980
+ * @dd: the device data
11981
+ * @thecnt: the header count
11982
+ */
11983
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
11984
+{
11985
+ if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
11986
+ dd_dev_err(dd, "Receive header queue count too small\n");
11987
+ return -EINVAL;
11988
+ }
11989
+
11990
+ if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
11991
+ dd_dev_err(dd,
11992
+ "Receive header queue count cannot be greater than %u\n",
11993
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
11994
+ return -EINVAL;
11995
+ }
11996
+
11997
+ if (thecnt % HDRQ_INCREMENT) {
11998
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
11999
+ thecnt, HDRQ_INCREMENT);
12000
+ return -EINVAL;
12001
+ }
12002
+
12003
+ return 0;
12004
+}
12005
+
12006
+/**
12007
+ * set_hdrq_regs - set header queue registers for context
12008
+ * @dd: the device data
12009
+ * @ctxt: the context
12010
+ * @entsize: the dword entry size
12011
+ * @hdrcnt: the number of header entries
12012
+ */
12013
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt)
12014
+{
12015
+ u64 reg;
12016
+
12017
+ reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) <<
12018
+ RCV_HDR_CNT_CNT_SHIFT;
12019
+ write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg);
12020
+ reg = ((u64)encode_rcv_header_entry_size(entsize) &
12021
+ RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) <<
12022
+ RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
12023
+ write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg);
12024
+ reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) <<
12025
+ RCV_HDR_SIZE_HDR_SIZE_SHIFT;
12026
+ write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg);
12027
+
12028
+ /*
12029
+ * Program dummy tail address for every receive context
12030
+ * before enabling any receive context
12031
+ */
12032
+ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
12033
+ dd->rcvhdrtail_dummy_dma);
12034
+}
12035
+
1187912036 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
1188012037 struct hfi1_ctxtdata *rcd)
1188112038 {
....@@ -11897,13 +12054,13 @@
1189712054 /* reset the tail and hdr addresses, and sequence count */
1189812055 write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
1189912056 rcd->rcvhdrq_dma);
11900
- if (rcd->rcvhdrtail_kvaddr)
12057
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
1190112058 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
1190212059 rcd->rcvhdrqtailaddr_dma);
11903
- rcd->seq_cnt = 1;
12060
+ hfi1_set_seq_cnt(rcd, 1);
1190412061
1190512062 /* reset the cached receive header queue head value */
11906
- rcd->head = 0;
12063
+ hfi1_set_rcd_head(rcd, 0);
1190712064
1190812065 /*
1190912066 * Zero the receive header queue so we don't get false
....@@ -11973,11 +12130,17 @@
1197312130
1197412131 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
1197512132 }
11976
- if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
12133
+ if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
12134
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
12135
+ IS_RCVAVAIL_START + rcd->ctxt, true);
1197712136 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
11978
- if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
12137
+ }
12138
+ if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
12139
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
12140
+ IS_RCVAVAIL_START + rcd->ctxt, false);
1197912141 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
11980
- if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
12142
+ }
12143
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd))
1198112144 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
1198212145 if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
1198312146 /* See comment on RcvCtxtCtrl.TailUpd above */
....@@ -12006,6 +12169,13 @@
1200612169 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
1200712170 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
1200812171 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
12172
+ if (op & HFI1_RCVCTRL_URGENT_ENB)
12173
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
12174
+ IS_RCVURGENT_START + rcd->ctxt, true);
12175
+ if (op & HFI1_RCVCTRL_URGENT_DIS)
12176
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
12177
+ IS_RCVURGENT_START + rcd->ctxt, false);
12178
+
1200912179 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
1201012180 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
1201112181
....@@ -12711,11 +12881,6 @@
1271112881 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
1271212882 {
1271312883 switch (chip_lstate) {
12714
- default:
12715
- dd_dev_err(dd,
12716
- "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
12717
- chip_lstate);
12718
- /* fall through */
1271912884 case LSTATE_DOWN:
1272012885 return IB_PORT_DOWN;
1272112886 case LSTATE_INIT:
....@@ -12724,6 +12889,11 @@
1272412889 return IB_PORT_ARMED;
1272512890 case LSTATE_ACTIVE:
1272612891 return IB_PORT_ACTIVE;
12892
+ default:
12893
+ dd_dev_err(dd,
12894
+ "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
12895
+ chip_lstate);
12896
+ return IB_PORT_DOWN;
1272712897 }
1272812898 }
1272912899
....@@ -12731,10 +12901,6 @@
1273112901 {
1273212902 /* look at the HFI meta-states only */
1273312903 switch (chip_pstate & 0xf0) {
12734
- default:
12735
- dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
12736
- chip_pstate);
12737
- /* fall through */
1273812904 case PLS_DISABLED:
1273912905 return IB_PORTPHYSSTATE_DISABLED;
1274012906 case PLS_OFFLINE:
....@@ -12747,6 +12913,10 @@
1274712913 return IB_PORTPHYSSTATE_LINKUP;
1274812914 case PLS_PHYTEST:
1274912915 return IB_PORTPHYSSTATE_PHY_TEST;
12916
+ default:
12917
+ dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
12918
+ chip_pstate);
12919
+ return IB_PORTPHYSSTATE_DISABLED;
1275012920 }
1275112921 }
1275212922
....@@ -13038,63 +13208,71 @@
1303813208 return ret;
1303913209 }
1304013210
13041
-/**
13042
- * get_int_mask - get 64 bit int mask
13043
- * @dd - the devdata
13044
- * @i - the csr (relative to CCE_INT_MASK)
13045
- *
13046
- * Returns the mask with the urgent interrupt mask
13047
- * bit clear for kernel receive contexts.
13048
- */
13049
-static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
13050
-{
13051
- u64 mask = U64_MAX; /* default to no change */
13052
-
13053
- if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
13054
- int j = (i - (IS_RCVURGENT_START / 64)) * 64;
13055
- int k = !j ? IS_RCVURGENT_START % 64 : 0;
13056
-
13057
- if (j)
13058
- j -= IS_RCVURGENT_START % 64;
13059
- /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
13060
- for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
13061
- /* convert to bit in mask and clear */
13062
- mask &= ~BIT_ULL(k);
13063
- }
13064
- return mask;
13065
-}
13066
-
1306713211 /* ========================================================================= */
1306813212
13069
-/*
13070
- * Enable/disable chip from delivering interrupts.
13213
+/**
13214
+ * read_mod_write() - Calculate the IRQ register index and set/clear the bits
13215
+ * @dd: valid devdata
13216
+ * @src: IRQ source to determine register index from
13217
+ * @bits: the bits to set or clear
13218
+ * @set: true == set the bits, false == clear the bits
13219
+ *
1307113220 */
13072
-void set_intr_state(struct hfi1_devdata *dd, u32 enable)
13221
+static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
13222
+ bool set)
1307313223 {
13074
- int i;
13224
+ u64 reg;
13225
+ u16 idx = src / BITS_PER_REGISTER;
1307513226
13076
- /*
13077
- * In HFI, the mask needs to be 1 to allow interrupts.
13078
- */
13079
- if (enable) {
13080
- /* enable all interrupts but urgent on kernel contexts */
13081
- for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
13082
- u64 mask = get_int_mask(dd, i);
13227
+ spin_lock(&dd->irq_src_lock);
13228
+ reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
13229
+ if (set)
13230
+ reg |= bits;
13231
+ else
13232
+ reg &= ~bits;
13233
+ write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
13234
+ spin_unlock(&dd->irq_src_lock);
13235
+}
1308313236
13084
- write_csr(dd, CCE_INT_MASK + (8 * i), mask);
13237
+/**
13238
+ * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
13239
+ * @dd: valid devdata
13240
+ * @first: first IRQ source to set/clear
13241
+ * @last: last IRQ source (inclusive) to set/clear
13242
+ * @set: true == set the bits, false == clear the bits
13243
+ *
13244
+ * If first == last, set the exact source.
13245
+ */
13246
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
13247
+{
13248
+ u64 bits = 0;
13249
+ u64 bit;
13250
+ u16 src;
13251
+
13252
+ if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
13253
+ return -EINVAL;
13254
+
13255
+ if (last < first)
13256
+ return -ERANGE;
13257
+
13258
+ for (src = first; src <= last; src++) {
13259
+ bit = src % BITS_PER_REGISTER;
13260
+ /* wrapped to next register? */
13261
+ if (!bit && bits) {
13262
+ read_mod_write(dd, src - 1, bits, set);
13263
+ bits = 0;
1308513264 }
13086
-
13087
- init_qsfp_int(dd);
13088
- } else {
13089
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
13090
- write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
13265
+ bits |= BIT_ULL(bit);
1309113266 }
13267
+ read_mod_write(dd, last, bits, set);
13268
+
13269
+ return 0;
1309213270 }
1309313271
1309413272 /*
1309513273 * Clear all interrupt sources on the chip.
1309613274 */
13097
-static void clear_all_interrupts(struct hfi1_devdata *dd)
13275
+void clear_all_interrupts(struct hfi1_devdata *dd)
1309813276 {
1309913277 int i;
1310013278
....@@ -13118,38 +13296,11 @@
1311813296 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
1311913297 }
1312013298
13121
-/**
13122
- * hfi1_clean_up_interrupts() - Free all IRQ resources
13123
- * @dd: valid device data data structure
13124
- *
13125
- * Free the MSIx and assoicated PCI resources, if they have been allocated.
13126
- */
13127
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
13128
-{
13129
- int i;
13130
- struct hfi1_msix_entry *me = dd->msix_entries;
13131
-
13132
- /* remove irqs - must happen before disabling/turning off */
13133
- for (i = 0; i < dd->num_msix_entries; i++, me++) {
13134
- if (!me->arg) /* => no irq, no affinity */
13135
- continue;
13136
- hfi1_put_irq_affinity(dd, me);
13137
- pci_free_irq(dd->pcidev, i, me->arg);
13138
- }
13139
-
13140
- /* clean structures */
13141
- kfree(dd->msix_entries);
13142
- dd->msix_entries = NULL;
13143
- dd->num_msix_entries = 0;
13144
-
13145
- pci_free_irq_vectors(dd->pcidev);
13146
-}
13147
-
1314813299 /*
1314913300 * Remap the interrupt source from the general handler to the given MSI-X
1315013301 * interrupt.
1315113302 */
13152
-static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
13303
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
1315313304 {
1315413305 u64 reg;
1315513306 int m, n;
....@@ -13173,8 +13324,7 @@
1317313324 write_csr(dd, CCE_INT_MAP + (8 * m), reg);
1317413325 }
1317513326
13176
-static void remap_sdma_interrupts(struct hfi1_devdata *dd,
13177
- int engine, int msix_intr)
13327
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
1317813328 {
1317913329 /*
1318013330 * SDMA engine interrupt sources grouped by type, rather than
....@@ -13183,204 +13333,16 @@
1318313333 * SDMAProgress
1318413334 * SDMAIdle
1318513335 */
13186
- remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
13187
- msix_intr);
13188
- remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
13189
- msix_intr);
13190
- remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
13191
- msix_intr);
13192
-}
13193
-
13194
-static int request_msix_irqs(struct hfi1_devdata *dd)
13195
-{
13196
- int first_general, last_general;
13197
- int first_sdma, last_sdma;
13198
- int first_rx, last_rx;
13199
- int i, ret = 0;
13200
-
13201
- /* calculate the ranges we are going to use */
13202
- first_general = 0;
13203
- last_general = first_general + 1;
13204
- first_sdma = last_general;
13205
- last_sdma = first_sdma + dd->num_sdma;
13206
- first_rx = last_sdma;
13207
- last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
13208
-
13209
- /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
13210
- dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
13211
-
13212
- /*
13213
- * Sanity check - the code expects all SDMA chip source
13214
- * interrupts to be in the same CSR, starting at bit 0. Verify
13215
- * that this is true by checking the bit location of the start.
13216
- */
13217
- BUILD_BUG_ON(IS_SDMA_START % 64);
13218
-
13219
- for (i = 0; i < dd->num_msix_entries; i++) {
13220
- struct hfi1_msix_entry *me = &dd->msix_entries[i];
13221
- const char *err_info;
13222
- irq_handler_t handler;
13223
- irq_handler_t thread = NULL;
13224
- void *arg = NULL;
13225
- int idx;
13226
- struct hfi1_ctxtdata *rcd = NULL;
13227
- struct sdma_engine *sde = NULL;
13228
- char name[MAX_NAME_SIZE];
13229
-
13230
- /* obtain the arguments to pci_request_irq */
13231
- if (first_general <= i && i < last_general) {
13232
- idx = i - first_general;
13233
- handler = general_interrupt;
13234
- arg = dd;
13235
- snprintf(name, sizeof(name),
13236
- DRIVER_NAME "_%d", dd->unit);
13237
- err_info = "general";
13238
- me->type = IRQ_GENERAL;
13239
- } else if (first_sdma <= i && i < last_sdma) {
13240
- idx = i - first_sdma;
13241
- sde = &dd->per_sdma[idx];
13242
- handler = sdma_interrupt;
13243
- arg = sde;
13244
- snprintf(name, sizeof(name),
13245
- DRIVER_NAME "_%d sdma%d", dd->unit, idx);
13246
- err_info = "sdma";
13247
- remap_sdma_interrupts(dd, idx, i);
13248
- me->type = IRQ_SDMA;
13249
- } else if (first_rx <= i && i < last_rx) {
13250
- idx = i - first_rx;
13251
- rcd = hfi1_rcd_get_by_index_safe(dd, idx);
13252
- if (rcd) {
13253
- /*
13254
- * Set the interrupt register and mask for this
13255
- * context's interrupt.
13256
- */
13257
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
13258
- rcd->imask = ((u64)1) <<
13259
- ((IS_RCVAVAIL_START + idx) % 64);
13260
- handler = receive_context_interrupt;
13261
- thread = receive_context_thread;
13262
- arg = rcd;
13263
- snprintf(name, sizeof(name),
13264
- DRIVER_NAME "_%d kctxt%d",
13265
- dd->unit, idx);
13266
- err_info = "receive context";
13267
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
13268
- me->type = IRQ_RCVCTXT;
13269
- rcd->msix_intr = i;
13270
- hfi1_rcd_put(rcd);
13271
- }
13272
- } else {
13273
- /* not in our expected range - complain, then
13274
- * ignore it
13275
- */
13276
- dd_dev_err(dd,
13277
- "Unexpected extra MSI-X interrupt %d\n", i);
13278
- continue;
13279
- }
13280
- /* no argument, no interrupt */
13281
- if (!arg)
13282
- continue;
13283
- /* make sure the name is terminated */
13284
- name[sizeof(name) - 1] = 0;
13285
- me->irq = pci_irq_vector(dd->pcidev, i);
13286
- ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
13287
- name);
13288
- if (ret) {
13289
- dd_dev_err(dd,
13290
- "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
13291
- err_info, me->irq, idx, ret);
13292
- return ret;
13293
- }
13294
- /*
13295
- * assign arg after pci_request_irq call, so it will be
13296
- * cleaned up
13297
- */
13298
- me->arg = arg;
13299
-
13300
- ret = hfi1_get_irq_affinity(dd, me);
13301
- if (ret)
13302
- dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
13303
- }
13304
-
13305
- return ret;
13306
-}
13307
-
13308
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
13309
-{
13310
- int i;
13311
-
13312
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
13313
- struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
13314
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
13315
-
13316
- synchronize_irq(me->irq);
13317
- }
13318
-}
13319
-
13320
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
13321
-{
13322
- struct hfi1_devdata *dd = rcd->dd;
13323
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
13324
-
13325
- if (!me->arg) /* => no irq, no affinity */
13326
- return;
13327
-
13328
- hfi1_put_irq_affinity(dd, me);
13329
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
13330
-
13331
- me->arg = NULL;
13332
-}
13333
-
13334
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
13335
-{
13336
- struct hfi1_devdata *dd = rcd->dd;
13337
- struct hfi1_msix_entry *me;
13338
- int idx = rcd->ctxt;
13339
- void *arg = rcd;
13340
- int ret;
13341
-
13342
- rcd->msix_intr = dd->vnic.msix_idx++;
13343
- me = &dd->msix_entries[rcd->msix_intr];
13344
-
13345
- /*
13346
- * Set the interrupt register and mask for this
13347
- * context's interrupt.
13348
- */
13349
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
13350
- rcd->imask = ((u64)1) <<
13351
- ((IS_RCVAVAIL_START + idx) % 64);
13352
- me->type = IRQ_RCVCTXT;
13353
- me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
13354
- remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
13355
-
13356
- ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
13357
- receive_context_interrupt,
13358
- receive_context_thread, arg,
13359
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
13360
- if (ret) {
13361
- dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
13362
- me->irq, idx, ret);
13363
- return;
13364
- }
13365
- /*
13366
- * assign arg after pci_request_irq call, so it will be
13367
- * cleaned up
13368
- */
13369
- me->arg = arg;
13370
-
13371
- ret = hfi1_get_irq_affinity(dd, me);
13372
- if (ret) {
13373
- dd_dev_err(dd,
13374
- "unable to pin IRQ %d\n", ret);
13375
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
13376
- }
13336
+ remap_intr(dd, IS_SDMA_START + engine, msix_intr);
13337
+ remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
13338
+ remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
1337713339 }
1337813340
1337913341 /*
1338013342 * Set the general handler to accept all interrupts, remap all
1338113343 * chip interrupts back to MSI-X 0.
1338213344 */
13383
-static void reset_interrupts(struct hfi1_devdata *dd)
13345
+void reset_interrupts(struct hfi1_devdata *dd)
1338413346 {
1338513347 int i;
1338613348
....@@ -13393,54 +13355,33 @@
1339313355 write_csr(dd, CCE_INT_MAP + (8 * i), 0);
1339413356 }
1339513357
13358
+/**
13359
+ * set_up_interrupts() - Initialize the IRQ resources and state
13360
+ * @dd: valid devdata
13361
+ *
13362
+ */
1339613363 static int set_up_interrupts(struct hfi1_devdata *dd)
1339713364 {
13398
- u32 total;
13399
- int ret, request;
13400
-
13401
- /*
13402
- * Interrupt count:
13403
- * 1 general, "slow path" interrupt (includes the SDMA engines
13404
- * slow source, SDMACleanupDone)
13405
- * N interrupts - one per used SDMA engine
13406
- * M interrupt - one per kernel receive context
13407
- * V interrupt - one for each VNIC context
13408
- */
13409
- total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
13410
-
13411
- /* ask for MSI-X interrupts */
13412
- request = request_msix(dd, total);
13413
- if (request < 0) {
13414
- ret = request;
13415
- goto fail;
13416
- } else {
13417
- dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
13418
- GFP_KERNEL);
13419
- if (!dd->msix_entries) {
13420
- ret = -ENOMEM;
13421
- goto fail;
13422
- }
13423
- /* using MSI-X */
13424
- dd->num_msix_entries = total;
13425
- dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
13426
- }
13365
+ int ret;
1342713366
1342813367 /* mask all interrupts */
13429
- set_intr_state(dd, 0);
13368
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
13369
+
1343013370 /* clear all pending interrupts */
1343113371 clear_all_interrupts(dd);
1343213372
1343313373 /* reset general handler mask, chip MSI-X mappings */
1343413374 reset_interrupts(dd);
1343513375
13436
- ret = request_msix_irqs(dd);
13376
+ /* ask for MSI-X interrupts */
13377
+ ret = msix_initialize(dd);
1343713378 if (ret)
13438
- goto fail;
13379
+ return ret;
1343913380
13440
- return 0;
13381
+ ret = msix_request_irqs(dd);
13382
+ if (ret)
13383
+ msix_clean_up_interrupts(dd);
1344113384
13442
-fail:
13443
- hfi1_clean_up_interrupts(dd);
1344413385 return ret;
1344513386 }
1344613387
....@@ -13453,13 +13394,12 @@
1345313394 * in array of contexts
1345413395 * freectxts - number of free user contexts
1345513396 * num_send_contexts - number of PIO send contexts being used
13456
- * num_vnic_contexts - number of contexts reserved for VNIC
13397
+ * num_netdev_contexts - number of contexts reserved for netdev
1345713398 */
1345813399 static int set_up_context_variables(struct hfi1_devdata *dd)
1345913400 {
1346013401 unsigned long num_kernel_contexts;
13461
- u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT;
13462
- int total_contexts;
13402
+ u16 num_netdev_contexts;
1346313403 int ret;
1346413404 unsigned ngroups;
1346513405 int rmt_count;
....@@ -13496,13 +13436,6 @@
1349613436 num_kernel_contexts = send_contexts - num_vls - 1;
1349713437 }
1349813438
13499
- /* Accommodate VNIC contexts if possible */
13500
- if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
13501
- dd_dev_err(dd, "No receive contexts available for VNIC\n");
13502
- num_vnic_contexts = 0;
13503
- }
13504
- total_contexts = num_kernel_contexts + num_vnic_contexts;
13505
-
1350613439 /*
1350713440 * User contexts:
1350813441 * - default to 1 user context per real (non-HT) CPU core if
....@@ -13515,27 +13448,34 @@
1351513448 /*
1351613449 * Adjust the counts given a global max.
1351713450 */
13518
- if (total_contexts + n_usr_ctxts > rcv_contexts) {
13451
+ if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) {
1351913452 dd_dev_err(dd,
13520
- "Reducing # user receive contexts to: %d, from %u\n",
13521
- rcv_contexts - total_contexts,
13453
+ "Reducing # user receive contexts to: %u, from %u\n",
13454
+ (u32)(rcv_contexts - num_kernel_contexts),
1352213455 n_usr_ctxts);
1352313456 /* recalculate */
13524
- n_usr_ctxts = rcv_contexts - total_contexts;
13457
+ n_usr_ctxts = rcv_contexts - num_kernel_contexts;
1352513458 }
1352613459
13460
+ num_netdev_contexts =
13461
+ hfi1_num_netdev_contexts(dd, rcv_contexts -
13462
+ (num_kernel_contexts + n_usr_ctxts),
13463
+ &node_affinity.real_cpu_mask);
1352713464 /*
1352813465 * The RMT entries are currently allocated as shown below:
1352913466 * 1. QOS (0 to 128 entries);
13530
- * 2. FECN for PSM (num_user_contexts + num_vnic_contexts);
13531
- * 3. VNIC (num_vnic_contexts).
13532
- * It should be noted that PSM FECN oversubscribe num_vnic_contexts
13533
- * entries of RMT because both VNIC and PSM could allocate any receive
13467
+ * 2. FECN (num_kernel_context - 1 + num_user_contexts +
13468
+ * num_netdev_contexts);
13469
+ * 3. netdev (num_netdev_contexts).
13470
+ * It should be noted that FECN oversubscribe num_netdev_contexts
13471
+ * entries of RMT because both netdev and PSM could allocate any receive
1353413472 * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
1353513473 * and PSM FECN must reserve an RMT entry for each possible PSM receive
1353613474 * context.
1353713475 */
13538
- rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
13476
+ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
13477
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
13478
+ rmt_count += num_kernel_contexts - 1;
1353913479 if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
1354013480 user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
1354113481 dd_dev_err(dd,
....@@ -13546,21 +13486,20 @@
1354613486 n_usr_ctxts = user_rmt_reduced;
1354713487 }
1354813488
13549
- total_contexts += n_usr_ctxts;
13550
-
13551
- /* the first N are kernel contexts, the rest are user/vnic contexts */
13552
- dd->num_rcv_contexts = total_contexts;
13489
+ /* the first N are kernel contexts, the rest are user/netdev contexts */
13490
+ dd->num_rcv_contexts =
13491
+ num_kernel_contexts + n_usr_ctxts + num_netdev_contexts;
1355313492 dd->n_krcv_queues = num_kernel_contexts;
1355413493 dd->first_dyn_alloc_ctxt = num_kernel_contexts;
13555
- dd->num_vnic_contexts = num_vnic_contexts;
13494
+ dd->num_netdev_contexts = num_netdev_contexts;
1355613495 dd->num_user_contexts = n_usr_ctxts;
1355713496 dd->freectxts = n_usr_ctxts;
1355813497 dd_dev_info(dd,
13559
- "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
13498
+ "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n",
1356013499 rcv_contexts,
1356113500 (int)dd->num_rcv_contexts,
1356213501 (int)dd->n_krcv_queues,
13563
- dd->num_vnic_contexts,
13502
+ dd->num_netdev_contexts,
1356413503 dd->num_user_contexts);
1356513504
1356613505 /*
....@@ -14239,22 +14178,26 @@
1423914178
1424014179 static void init_kdeth_qp(struct hfi1_devdata *dd)
1424114180 {
14242
- /* user changed the KDETH_QP */
14243
- if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
14244
- /* out of range or illegal value */
14245
- dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
14246
- kdeth_qp = 0;
14247
- }
14248
- if (kdeth_qp == 0) /* not set, or failed range check */
14249
- kdeth_qp = DEFAULT_KDETH_QP;
14250
-
1425114181 write_csr(dd, SEND_BTH_QP,
14252
- (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
14182
+ (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) <<
1425314183 SEND_BTH_QP_KDETH_QP_SHIFT);
1425414184
1425514185 write_csr(dd, RCV_BTH_QP,
14256
- (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
14186
+ (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) <<
1425714187 RCV_BTH_QP_KDETH_QP_SHIFT);
14188
+}
14189
+
14190
+/**
14191
+ * hfi1_get_qp_map
14192
+ * @dd: device data
14193
+ * @idx: index to read
14194
+ */
14195
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
14196
+{
14197
+ u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);
14198
+
14199
+ reg >>= (idx % 8) * 8;
14200
+ return reg;
1425814201 }
1425914202
1426014203 /**
....@@ -14354,6 +14297,12 @@
1435414297 /* enable RSM */
1435514298 add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
1435614299 }
14300
+}
14301
+
14302
+/* Is a receive side mapping rule */
14303
+static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
14304
+{
14305
+ return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0;
1435714306 }
1435814307
1435914308 /*
....@@ -14518,37 +14467,43 @@
1451814467 init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
1451914468 }
1452014469
14521
-static void init_user_fecn_handling(struct hfi1_devdata *dd,
14522
- struct rsm_map_table *rmt)
14470
+static void init_fecn_handling(struct hfi1_devdata *dd,
14471
+ struct rsm_map_table *rmt)
1452314472 {
1452414473 struct rsm_rule_data rrd;
1452514474 u64 reg;
14526
- int i, idx, regoff, regidx;
14475
+ int i, idx, regoff, regidx, start;
1452714476 u8 offset;
1452814477 u32 total_cnt;
1452914478
14479
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
14480
+ /* Exclude context 0 */
14481
+ start = 1;
14482
+ else
14483
+ start = dd->first_dyn_alloc_ctxt;
14484
+
14485
+ total_cnt = dd->num_rcv_contexts - start;
14486
+
1453014487 /* there needs to be enough room in the map table */
14531
- total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
1453214488 if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
14533
- dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
14489
+ dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n");
1453414490 return;
1453514491 }
1453614492
1453714493 /*
1453814494 * RSM will extract the destination context as an index into the
1453914495 * map table. The destination contexts are a sequential block
14540
- * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
14496
+ * in the range start...num_rcv_contexts-1 (inclusive).
1454114497 * Map entries are accessed as offset + extracted value. Adjust
1454214498 * the added offset so this sequence can be placed anywhere in
1454314499 * the table - as long as the entries themselves do not wrap.
1454414500 * There are only enough bits in offset for the table size, so
1454514501 * start with that to allow for a "negative" offset.
1454614502 */
14547
- offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
14548
- (int)dd->first_dyn_alloc_ctxt);
14503
+ offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start);
1454914504
14550
- for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
14551
- i < dd->num_rcv_contexts; i++, idx++) {
14505
+ for (i = start, idx = rmt->used; i < dd->num_rcv_contexts;
14506
+ i++, idx++) {
1455214507 /* replace with identity mapping */
1455314508 regoff = (idx % 8) * 8;
1455414509 regidx = idx / 8;
....@@ -14586,77 +14541,138 @@
1458614541 rmt->used += total_cnt;
1458714542 }
1458814543
14589
-/* Initialize RSM for VNIC */
14590
-void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
14544
+static inline bool hfi1_is_rmt_full(int start, int spare)
14545
+{
14546
+ return (start + spare) > NUM_MAP_ENTRIES;
14547
+}
14548
+
14549
+static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
1459114550 {
1459214551 u8 i, j;
1459314552 u8 ctx_id = 0;
1459414553 u64 reg;
1459514554 u32 regoff;
14596
- struct rsm_rule_data rrd;
14555
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
14556
+ int ctxt_count = hfi1_netdev_ctxt_count(dd);
1459714557
14598
- if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
14599
- dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
14600
- dd->vnic.rmt_start);
14601
- return;
14558
+ /* We already have contexts mapped in RMT */
14559
+ if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) {
14560
+ dd_dev_info(dd, "Contexts are already mapped in RMT\n");
14561
+ return true;
1460214562 }
1460314563
14604
- dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
14605
- dd->vnic.rmt_start,
14606
- dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
14564
+ if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) {
14565
+ dd_dev_err(dd, "Not enough RMT entries used = %d\n",
14566
+ rmt_start);
14567
+ return false;
14568
+ }
14569
+
14570
+ dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n",
14571
+ rmt_start,
14572
+ rmt_start + NUM_NETDEV_MAP_ENTRIES);
1460714573
1460814574 /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
14609
- regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
14575
+ regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8;
1461014576 reg = read_csr(dd, regoff);
14611
- for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
14612
- /* Update map register with vnic context */
14613
- j = (dd->vnic.rmt_start + i) % 8;
14577
+ for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) {
14578
+ /* Update map register with netdev context */
14579
+ j = (rmt_start + i) % 8;
1461414580 reg &= ~(0xffllu << (j * 8));
14615
- reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
14616
- /* Wrap up vnic ctx index */
14617
- ctx_id %= dd->vnic.num_ctxt;
14581
+ reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8);
14582
+ /* Wrap up netdev ctx index */
14583
+ ctx_id %= ctxt_count;
1461814584 /* Write back map register */
14619
- if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
14585
+ if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) {
1462014586 dev_dbg(&(dd)->pcidev->dev,
14621
- "Vnic rsm map reg[%d] =0x%llx\n",
14587
+ "RMT[%d] =0x%llx\n",
1462214588 regoff - RCV_RSM_MAP_TABLE, reg);
1462314589
1462414590 write_csr(dd, regoff, reg);
1462514591 regoff += 8;
14626
- if (i < (NUM_VNIC_MAP_ENTRIES - 1))
14592
+ if (i < (NUM_NETDEV_MAP_ENTRIES - 1))
1462714593 reg = read_csr(dd, regoff);
1462814594 }
1462914595 }
1463014596
14631
- /* Add rule for vnic */
14632
- rrd.offset = dd->vnic.rmt_start;
14633
- rrd.pkt_type = 4;
14634
- /* Match 16B packets */
14635
- rrd.field1_off = L2_TYPE_MATCH_OFFSET;
14636
- rrd.mask1 = L2_TYPE_MASK;
14637
- rrd.value1 = L2_16B_VALUE;
14638
- /* Match ETH L4 packets */
14639
- rrd.field2_off = L4_TYPE_MATCH_OFFSET;
14640
- rrd.mask2 = L4_16B_TYPE_MASK;
14641
- rrd.value2 = L4_16B_ETH_VALUE;
14642
- /* Calc context from veswid and entropy */
14643
- rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
14644
- rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
14645
- rrd.index2_off = L2_16B_ENTROPY_OFFSET;
14646
- rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
14647
- add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
14597
+ return true;
14598
+}
1464814599
14649
- /* Enable RSM if not already enabled */
14600
+static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd,
14601
+ int rule, struct rsm_rule_data *rrd)
14602
+{
14603
+ if (!hfi1_netdev_update_rmt(dd)) {
14604
+ dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule);
14605
+ return;
14606
+ }
14607
+
14608
+ add_rsm_rule(dd, rule, rrd);
1465014609 add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
14610
+}
14611
+
14612
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
14613
+{
14614
+ /*
14615
+ * go through with the initialisation only if this rule actually doesn't
14616
+ * exist yet
14617
+ */
14618
+ if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) {
14619
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
14620
+ struct rsm_rule_data rrd = {
14621
+ .offset = rmt_start,
14622
+ .pkt_type = IB_PACKET_TYPE,
14623
+ .field1_off = LRH_BTH_MATCH_OFFSET,
14624
+ .mask1 = LRH_BTH_MASK,
14625
+ .value1 = LRH_BTH_VALUE,
14626
+ .field2_off = BTH_DESTQP_MATCH_OFFSET,
14627
+ .mask2 = BTH_DESTQP_MASK,
14628
+ .value2 = BTH_DESTQP_VALUE,
14629
+ .index1_off = DETH_AIP_SQPN_SELECT_OFFSET +
14630
+ ilog2(NUM_NETDEV_MAP_ENTRIES),
14631
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
14632
+ .index2_off = DETH_AIP_SQPN_SELECT_OFFSET,
14633
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
14634
+ };
14635
+
14636
+ hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd);
14637
+ }
14638
+}
14639
+
14640
+/* Initialize RSM for VNIC */
14641
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
14642
+{
14643
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
14644
+ struct rsm_rule_data rrd = {
14645
+ /* Add rule for vnic */
14646
+ .offset = rmt_start,
14647
+ .pkt_type = 4,
14648
+ /* Match 16B packets */
14649
+ .field1_off = L2_TYPE_MATCH_OFFSET,
14650
+ .mask1 = L2_TYPE_MASK,
14651
+ .value1 = L2_16B_VALUE,
14652
+ /* Match ETH L4 packets */
14653
+ .field2_off = L4_TYPE_MATCH_OFFSET,
14654
+ .mask2 = L4_16B_TYPE_MASK,
14655
+ .value2 = L4_16B_ETH_VALUE,
14656
+ /* Calc context from veswid and entropy */
14657
+ .index1_off = L4_16B_HDR_VESWID_OFFSET,
14658
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
14659
+ .index2_off = L2_16B_ENTROPY_OFFSET,
14660
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
14661
+ };
14662
+
14663
+ hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd);
1465114664 }
1465214665
1465314666 void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
1465414667 {
1465514668 clear_rsm_rule(dd, RSM_INS_VNIC);
14669
+}
1465614670
14657
- /* Disable RSM if used only by vnic */
14658
- if (dd->vnic.rmt_start == 0)
14659
- clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
14671
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd)
14672
+{
14673
+ /* only actually clear the rule if it's the last user asking to do so */
14674
+ if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1)
14675
+ clear_rsm_rule(dd, RSM_INS_AIP);
1466014676 }
1466114677
1466214678 static int init_rxe(struct hfi1_devdata *dd)
....@@ -14673,10 +14689,10 @@
1467314689
1467414690 /* set up QOS, including the QPN map table */
1467514691 init_qos(dd, rmt);
14676
- init_user_fecn_handling(dd, rmt);
14692
+ init_fecn_handling(dd, rmt);
1467714693 complete_rsm_map_table(dd, rmt);
14678
- /* record number of used rsm map entries for vnic */
14679
- dd->vnic.rmt_start = rmt->used;
14694
+ /* record number of used rsm map entries for netdev */
14695
+ hfi1_netdev_set_free_rmt_idx(dd, rmt->used);
1468014696 kfree(rmt);
1468114697
1468214698 /*
....@@ -14900,8 +14916,8 @@
1490014916 */
1490114917 static int init_asic_data(struct hfi1_devdata *dd)
1490214918 {
14903
- unsigned long flags;
14904
- struct hfi1_devdata *tmp, *peer = NULL;
14919
+ unsigned long index;
14920
+ struct hfi1_devdata *peer;
1490514921 struct hfi1_asic_data *asic_data;
1490614922 int ret = 0;
1490714923
....@@ -14910,14 +14926,12 @@
1491014926 if (!asic_data)
1491114927 return -ENOMEM;
1491214928
14913
- spin_lock_irqsave(&hfi1_devs_lock, flags);
14929
+ xa_lock_irq(&hfi1_dev_table);
1491414930 /* Find our peer device */
14915
- list_for_each_entry(tmp, &hfi1_dev_list, list) {
14916
- if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
14917
- dd->unit != tmp->unit) {
14918
- peer = tmp;
14931
+ xa_for_each(&hfi1_dev_table, index, peer) {
14932
+ if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(peer)) &&
14933
+ dd->unit != peer->unit)
1491914934 break;
14920
- }
1492114935 }
1492214936
1492314937 if (peer) {
....@@ -14929,7 +14943,7 @@
1492914943 mutex_init(&dd->asic_data->asic_resource_mutex);
1493014944 }
1493114945 dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
14932
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
14946
+ xa_unlock_irq(&hfi1_dev_table);
1493314947
1493414948 /* first one through - set up i2c devices */
1493514949 if (!peer)
....@@ -15009,20 +15023,16 @@
1500915023 }
1501015024
1501115025 /**
15012
- * Allocate and initialize the device structure for the hfi.
15026
+ * hfi1_init_dd() - Initialize most of the dd structure.
1501315027 * @dev: the pci_dev for hfi1_ib device
1501415028 * @ent: pci_device_id struct for this dev
15015
- *
15016
- * Also allocates, initializes, and returns the devdata struct for this
15017
- * device instance
1501815029 *
1501915030 * This is global, and is called directly at init to set up the
1502015031 * chip-specific function pointers for later use.
1502115032 */
15022
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
15023
- const struct pci_device_id *ent)
15033
+int hfi1_init_dd(struct hfi1_devdata *dd)
1502415034 {
15025
- struct hfi1_devdata *dd;
15035
+ struct pci_dev *pdev = dd->pcidev;
1502615036 struct hfi1_pportdata *ppd;
1502715037 u64 reg;
1502815038 int i, ret;
....@@ -15033,13 +15043,8 @@
1503315043 "Functional simulator"
1503415044 };
1503515045 struct pci_dev *parent = pdev->bus->self;
15036
- u32 sdma_engines;
15046
+ u32 sdma_engines = chip_sdma_engines(dd);
1503715047
15038
- dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
15039
- sizeof(struct hfi1_pportdata));
15040
- if (IS_ERR(dd))
15041
- goto bail;
15042
- sdma_engines = chip_sdma_engines(dd);
1504315048 ppd = dd->pport;
1504415049 for (i = 0; i < dd->num_pports; i++, ppd++) {
1504515050 int vl;
....@@ -15218,6 +15223,12 @@
1521815223 if (ret)
1521915224 goto bail_cleanup;
1522015225
15226
+ /*
15227
+ * This should probably occur in hfi1_pcie_init(), but historically
15228
+ * occurs after the do_pcie_gen3_transition() code.
15229
+ */
15230
+ tune_pcie_caps(dd);
15231
+
1522115232 /* start setting dd values and adjusting CSRs */
1522215233 init_early_variables(dd);
1522315234
....@@ -15234,6 +15245,11 @@
1523415245 (u32)dd->minrev,
1523515246 (dd->revision >> CCE_REVISION_SW_SHIFT)
1523615247 & CCE_REVISION_SW_MASK);
15248
+
15249
+ /* alloc netdev data */
15250
+ ret = hfi1_netdev_alloc(dd);
15251
+ if (ret)
15252
+ goto bail_cleanup;
1523715253
1523815254 ret = set_up_context_variables(dd);
1523915255 if (ret)
....@@ -15333,14 +15349,14 @@
1533315349 free_cntrs(dd);
1533415350 bail_clear_intr:
1533515351 hfi1_comp_vectors_clean_up(dd);
15336
- hfi1_clean_up_interrupts(dd);
15352
+ msix_clean_up_interrupts(dd);
1533715353 bail_cleanup:
15354
+ hfi1_netdev_free(dd);
1533815355 hfi1_pcie_ddcleanup(dd);
1533915356 bail_free:
1534015357 hfi1_free_devdata(dd);
15341
- dd = ERR_PTR(ret);
1534215358 bail:
15343
- return dd;
15359
+ return ret;
1534415360 }
1534515361
1534615362 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,