hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/drivers/net/ethernet/mellanox/mlx5/core/eq.c
....@@ -31,20 +31,22 @@
3131 */
3232
3333 #include <linux/interrupt.h>
34
+#include <linux/notifier.h>
3435 #include <linux/module.h>
3536 #include <linux/mlx5/driver.h>
36
-#include <linux/mlx5/cmd.h>
37
+#include <linux/mlx5/vport.h>
38
+#include <linux/mlx5/eq.h>
3739 #ifdef CONFIG_RFS_ACCEL
3840 #include <linux/cpu_rmap.h>
3941 #endif
4042 #include "mlx5_core.h"
43
+#include "lib/eq.h"
4144 #include "fpga/core.h"
4245 #include "eswitch.h"
4346 #include "lib/clock.h"
4447 #include "diag/fw_tracer.h"
4548
4649 enum {
47
- MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
4850 MLX5_EQE_OWNER_INIT_VAL = 0x1,
4951 };
5052
....@@ -55,14 +57,33 @@
5557 };
5658
5759 enum {
58
- MLX5_NUM_SPARE_EQE = 0x80,
59
- MLX5_NUM_ASYNC_EQE = 0x1000,
60
- MLX5_NUM_CMD_EQE = 32,
61
- MLX5_NUM_PF_DRAIN = 64,
60
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
6261 };
6362
63
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
64
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
65
+ * used to set the EQ size, budget must be smaller than the EQ size.
66
+ */
6467 enum {
65
- MLX5_EQ_DOORBEL_OFFSET = 0x40,
68
+ MLX5_EQ_POLLING_BUDGET = 128,
69
+};
70
+
71
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
72
+
73
+struct mlx5_eq_table {
74
+ struct list_head comp_eqs_list;
75
+ struct mlx5_eq_async pages_eq;
76
+ struct mlx5_eq_async cmd_eq;
77
+ struct mlx5_eq_async async_eq;
78
+
79
+ struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
80
+
81
+ /* Since CQ DB is stored in async_eq */
82
+ struct mlx5_nb cq_err_nb;
83
+
84
+ struct mutex lock; /* sync async eqs creations */
85
+ int num_comp_eqs;
86
+ struct mlx5_irq_table *irq_table;
6687 };
6788
6889 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
....@@ -78,344 +99,13 @@
7899 (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
79100 (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
80101
81
-struct map_eq_in {
82
- u64 mask;
83
- u32 reserved;
84
- u32 unmap_eqn;
85
-};
86
-
87
-struct cre_des_eq {
88
- u8 reserved[15];
89
- u8 eqn;
90
-};
91
-
92102 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
93103 {
94
- u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
95
- u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0};
104
+ u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {};
96105
97106 MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
98107 MLX5_SET(destroy_eq_in, in, eq_number, eqn);
99
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
100
-}
101
-
102
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
103
-{
104
- return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
105
-}
106
-
107
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
108
-{
109
- struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
110
-
111
- return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
112
-}
113
-
114
-static const char *eqe_type_str(u8 type)
115
-{
116
- switch (type) {
117
- case MLX5_EVENT_TYPE_COMP:
118
- return "MLX5_EVENT_TYPE_COMP";
119
- case MLX5_EVENT_TYPE_PATH_MIG:
120
- return "MLX5_EVENT_TYPE_PATH_MIG";
121
- case MLX5_EVENT_TYPE_COMM_EST:
122
- return "MLX5_EVENT_TYPE_COMM_EST";
123
- case MLX5_EVENT_TYPE_SQ_DRAINED:
124
- return "MLX5_EVENT_TYPE_SQ_DRAINED";
125
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
126
- return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
127
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
128
- return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
129
- case MLX5_EVENT_TYPE_CQ_ERROR:
130
- return "MLX5_EVENT_TYPE_CQ_ERROR";
131
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
132
- return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
133
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
134
- return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
135
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
136
- return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
137
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
138
- return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
139
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
140
- return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
141
- case MLX5_EVENT_TYPE_INTERNAL_ERROR:
142
- return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
143
- case MLX5_EVENT_TYPE_PORT_CHANGE:
144
- return "MLX5_EVENT_TYPE_PORT_CHANGE";
145
- case MLX5_EVENT_TYPE_GPIO_EVENT:
146
- return "MLX5_EVENT_TYPE_GPIO_EVENT";
147
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
148
- return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
149
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
150
- return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
151
- case MLX5_EVENT_TYPE_REMOTE_CONFIG:
152
- return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
153
- case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
154
- return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
155
- case MLX5_EVENT_TYPE_STALL_EVENT:
156
- return "MLX5_EVENT_TYPE_STALL_EVENT";
157
- case MLX5_EVENT_TYPE_CMD:
158
- return "MLX5_EVENT_TYPE_CMD";
159
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
160
- return "MLX5_EVENT_TYPE_PAGE_REQUEST";
161
- case MLX5_EVENT_TYPE_PAGE_FAULT:
162
- return "MLX5_EVENT_TYPE_PAGE_FAULT";
163
- case MLX5_EVENT_TYPE_PPS_EVENT:
164
- return "MLX5_EVENT_TYPE_PPS_EVENT";
165
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
166
- return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
167
- case MLX5_EVENT_TYPE_FPGA_ERROR:
168
- return "MLX5_EVENT_TYPE_FPGA_ERROR";
169
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
170
- return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
171
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
172
- return "MLX5_EVENT_TYPE_GENERAL_EVENT";
173
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
174
- return "MLX5_EVENT_TYPE_DEVICE_TRACER";
175
- default:
176
- return "Unrecognized event";
177
- }
178
-}
179
-
180
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
181
-{
182
- switch (subtype) {
183
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
184
- return MLX5_DEV_EVENT_PORT_DOWN;
185
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
186
- return MLX5_DEV_EVENT_PORT_UP;
187
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
188
- return MLX5_DEV_EVENT_PORT_INITIALIZED;
189
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
190
- return MLX5_DEV_EVENT_LID_CHANGE;
191
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
192
- return MLX5_DEV_EVENT_PKEY_CHANGE;
193
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
194
- return MLX5_DEV_EVENT_GUID_CHANGE;
195
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
196
- return MLX5_DEV_EVENT_CLIENT_REREG;
197
- }
198
- return -1;
199
-}
200
-
201
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
202
-{
203
- __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
204
- u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
205
-
206
- __raw_writel((__force u32)cpu_to_be32(val), addr);
207
- /* We still want ordering, just not swabbing, so add a barrier */
208
- mb();
209
-}
210
-
211
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
212
-static void eqe_pf_action(struct work_struct *work)
213
-{
214
- struct mlx5_pagefault *pfault = container_of(work,
215
- struct mlx5_pagefault,
216
- work);
217
- struct mlx5_eq *eq = pfault->eq;
218
-
219
- mlx5_core_page_fault(eq->dev, pfault);
220
- mempool_free(pfault, eq->pf_ctx.pool);
221
-}
222
-
223
-static void eq_pf_process(struct mlx5_eq *eq)
224
-{
225
- struct mlx5_core_dev *dev = eq->dev;
226
- struct mlx5_eqe_page_fault *pf_eqe;
227
- struct mlx5_pagefault *pfault;
228
- struct mlx5_eqe *eqe;
229
- int set_ci = 0;
230
-
231
- while ((eqe = next_eqe_sw(eq))) {
232
- pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
233
- if (!pfault) {
234
- schedule_work(&eq->pf_ctx.work);
235
- break;
236
- }
237
-
238
- dma_rmb();
239
- pf_eqe = &eqe->data.page_fault;
240
- pfault->event_subtype = eqe->sub_type;
241
- pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
242
-
243
- mlx5_core_dbg(dev,
244
- "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
245
- eqe->sub_type, pfault->bytes_committed);
246
-
247
- switch (eqe->sub_type) {
248
- case MLX5_PFAULT_SUBTYPE_RDMA:
249
- /* RDMA based event */
250
- pfault->type =
251
- be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
252
- pfault->token =
253
- be32_to_cpu(pf_eqe->rdma.pftype_token) &
254
- MLX5_24BIT_MASK;
255
- pfault->rdma.r_key =
256
- be32_to_cpu(pf_eqe->rdma.r_key);
257
- pfault->rdma.packet_size =
258
- be16_to_cpu(pf_eqe->rdma.packet_length);
259
- pfault->rdma.rdma_op_len =
260
- be32_to_cpu(pf_eqe->rdma.rdma_op_len);
261
- pfault->rdma.rdma_va =
262
- be64_to_cpu(pf_eqe->rdma.rdma_va);
263
- mlx5_core_dbg(dev,
264
- "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
265
- pfault->type, pfault->token,
266
- pfault->rdma.r_key);
267
- mlx5_core_dbg(dev,
268
- "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
269
- pfault->rdma.rdma_op_len,
270
- pfault->rdma.rdma_va);
271
- break;
272
-
273
- case MLX5_PFAULT_SUBTYPE_WQE:
274
- /* WQE based event */
275
- pfault->type =
276
- (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
277
- pfault->token =
278
- be32_to_cpu(pf_eqe->wqe.token);
279
- pfault->wqe.wq_num =
280
- be32_to_cpu(pf_eqe->wqe.pftype_wq) &
281
- MLX5_24BIT_MASK;
282
- pfault->wqe.wqe_index =
283
- be16_to_cpu(pf_eqe->wqe.wqe_index);
284
- pfault->wqe.packet_size =
285
- be16_to_cpu(pf_eqe->wqe.packet_length);
286
- mlx5_core_dbg(dev,
287
- "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
288
- pfault->type, pfault->token,
289
- pfault->wqe.wq_num,
290
- pfault->wqe.wqe_index);
291
- break;
292
-
293
- default:
294
- mlx5_core_warn(dev,
295
- "Unsupported page fault event sub-type: 0x%02hhx\n",
296
- eqe->sub_type);
297
- /* Unsupported page faults should still be
298
- * resolved by the page fault handler
299
- */
300
- }
301
-
302
- pfault->eq = eq;
303
- INIT_WORK(&pfault->work, eqe_pf_action);
304
- queue_work(eq->pf_ctx.wq, &pfault->work);
305
-
306
- ++eq->cons_index;
307
- ++set_ci;
308
-
309
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
310
- eq_update_ci(eq, 0);
311
- set_ci = 0;
312
- }
313
- }
314
-
315
- eq_update_ci(eq, 1);
316
-}
317
-
318
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
319
-{
320
- struct mlx5_eq *eq = eq_ptr;
321
- unsigned long flags;
322
-
323
- if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
324
- eq_pf_process(eq);
325
- spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
326
- } else {
327
- schedule_work(&eq->pf_ctx.work);
328
- }
329
-
330
- return IRQ_HANDLED;
331
-}
332
-
333
-/* mempool_refill() was proposed but unfortunately wasn't accepted
334
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
335
- * Chip workaround.
336
- */
337
-static void mempool_refill(mempool_t *pool)
338
-{
339
- while (pool->curr_nr < pool->min_nr)
340
- mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
341
-}
342
-
343
-static void eq_pf_action(struct work_struct *work)
344
-{
345
- struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
346
-
347
- mempool_refill(eq->pf_ctx.pool);
348
-
349
- spin_lock_irq(&eq->pf_ctx.lock);
350
- eq_pf_process(eq);
351
- spin_unlock_irq(&eq->pf_ctx.lock);
352
-}
353
-
354
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
355
-{
356
- spin_lock_init(&pf_ctx->lock);
357
- INIT_WORK(&pf_ctx->work, eq_pf_action);
358
-
359
- pf_ctx->wq = alloc_ordered_workqueue(name,
360
- WQ_MEM_RECLAIM);
361
- if (!pf_ctx->wq)
362
- return -ENOMEM;
363
-
364
- pf_ctx->pool = mempool_create_kmalloc_pool
365
- (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
366
- if (!pf_ctx->pool)
367
- goto err_wq;
368
-
369
- return 0;
370
-err_wq:
371
- destroy_workqueue(pf_ctx->wq);
372
- return -ENOMEM;
373
-}
374
-
375
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
376
- u32 wq_num, u8 type, int error)
377
-{
378
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
379
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
380
-
381
- MLX5_SET(page_fault_resume_in, in, opcode,
382
- MLX5_CMD_OP_PAGE_FAULT_RESUME);
383
- MLX5_SET(page_fault_resume_in, in, error, !!error);
384
- MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
385
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
386
- MLX5_SET(page_fault_resume_in, in, token, token);
387
-
388
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
389
-}
390
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
391
-#endif
392
-
393
-static void general_event_handler(struct mlx5_core_dev *dev,
394
- struct mlx5_eqe *eqe)
395
-{
396
- switch (eqe->sub_type) {
397
- case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
398
- if (dev->event)
399
- dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
400
- break;
401
- default:
402
- mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
403
- eqe->sub_type);
404
- }
405
-}
406
-
407
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
408
- struct mlx5_eqe *eqe)
409
-{
410
- u64 value_lsb;
411
- u64 value_msb;
412
-
413
- value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
414
- value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
415
-
416
- mlx5_core_warn(dev,
417
- "High temperature on sensors with bit set %llx %llx",
418
- value_msb, value_lsb);
108
+ return mlx5_cmd_exec_in(dev, destroy_eq, in);
419109 }
420110
421111 /* caller must eventually call mlx5_cq_put on the returned cq */
....@@ -424,194 +114,61 @@
424114 struct mlx5_cq_table *table = &eq->cq_table;
425115 struct mlx5_core_cq *cq = NULL;
426116
427
- spin_lock(&table->lock);
117
+ rcu_read_lock();
428118 cq = radix_tree_lookup(&table->tree, cqn);
429119 if (likely(cq))
430120 mlx5_cq_hold(cq);
431
- spin_unlock(&table->lock);
121
+ rcu_read_unlock();
432122
433123 return cq;
434124 }
435125
436
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
126
+static int mlx5_eq_comp_int(struct notifier_block *nb,
127
+ __always_unused unsigned long action,
128
+ __always_unused void *data)
437129 {
438
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
439
-
440
- if (unlikely(!cq)) {
441
- mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
442
- return;
443
- }
444
-
445
- ++cq->arm_sn;
446
-
447
- cq->comp(cq);
448
-
449
- mlx5_cq_put(cq);
450
-}
451
-
452
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
453
-{
454
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
455
-
456
- if (unlikely(!cq)) {
457
- mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
458
- return;
459
- }
460
-
461
- cq->event(cq, event_type);
462
-
463
- mlx5_cq_put(cq);
464
-}
465
-
466
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
467
-{
468
- struct mlx5_eq *eq = eq_ptr;
469
- struct mlx5_core_dev *dev = eq->dev;
130
+ struct mlx5_eq_comp *eq_comp =
131
+ container_of(nb, struct mlx5_eq_comp, irq_nb);
132
+ struct mlx5_eq *eq = &eq_comp->core;
470133 struct mlx5_eqe *eqe;
471
- int set_ci = 0;
134
+ int num_eqes = 0;
472135 u32 cqn = -1;
473
- u32 rsn;
474
- u8 port;
475136
476
- while ((eqe = next_eqe_sw(eq))) {
477
- /*
478
- * Make sure we read EQ entry contents after we've
137
+ eqe = next_eqe_sw(eq);
138
+ if (!eqe)
139
+ goto out;
140
+
141
+ do {
142
+ struct mlx5_core_cq *cq;
143
+
144
+ /* Make sure we read EQ entry contents after we've
479145 * checked the ownership bit.
480146 */
481147 dma_rmb();
148
+ /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
149
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
482150
483
- mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
484
- eq->eqn, eqe_type_str(eqe->type));
485
- switch (eqe->type) {
486
- case MLX5_EVENT_TYPE_COMP:
487
- cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
488
- mlx5_eq_cq_completion(eq, cqn);
489
- break;
490
- case MLX5_EVENT_TYPE_DCT_DRAINED:
491
- rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
492
- rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
493
- mlx5_rsc_event(dev, rsn, eqe->type);
494
- break;
495
- case MLX5_EVENT_TYPE_PATH_MIG:
496
- case MLX5_EVENT_TYPE_COMM_EST:
497
- case MLX5_EVENT_TYPE_SQ_DRAINED:
498
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
499
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
500
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
501
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
502
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
503
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
504
- rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
505
- mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
506
- eqe_type_str(eqe->type), eqe->type, rsn);
507
- mlx5_rsc_event(dev, rsn, eqe->type);
508
- break;
509
-
510
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
511
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
512
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
513
- mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
514
- eqe_type_str(eqe->type), eqe->type, rsn);
515
- mlx5_srq_event(dev, rsn, eqe->type);
516
- break;
517
-
518
- case MLX5_EVENT_TYPE_CMD:
519
- mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
520
- break;
521
-
522
- case MLX5_EVENT_TYPE_PORT_CHANGE:
523
- port = (eqe->data.port.port >> 4) & 0xf;
524
- switch (eqe->sub_type) {
525
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
526
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
527
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
528
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
529
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
530
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
531
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
532
- if (dev->event)
533
- dev->event(dev, port_subtype_event(eqe->sub_type),
534
- (unsigned long)port);
535
- break;
536
- default:
537
- mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
538
- port, eqe->sub_type);
539
- }
540
- break;
541
- case MLX5_EVENT_TYPE_CQ_ERROR:
542
- cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
543
- mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
544
- cqn, eqe->data.cq_err.syndrome);
545
- mlx5_eq_cq_event(eq, cqn, eqe->type);
546
- break;
547
-
548
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
549
- {
550
- u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
551
- s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
552
-
553
- mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
554
- func_id, npages);
555
- mlx5_core_req_pages_handler(dev, func_id, npages);
556
- }
557
- break;
558
-
559
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
560
- mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
561
- break;
562
-
563
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
564
- mlx5_port_module_event(dev, eqe);
565
- break;
566
-
567
- case MLX5_EVENT_TYPE_PPS_EVENT:
568
- mlx5_pps_event(dev, eqe);
569
- break;
570
-
571
- case MLX5_EVENT_TYPE_FPGA_ERROR:
572
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
573
- mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
574
- break;
575
-
576
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
577
- mlx5_temp_warning_event(dev, eqe);
578
- break;
579
-
580
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
581
- general_event_handler(dev, eqe);
582
- break;
583
-
584
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
585
- mlx5_fw_tracer_event(dev, eqe);
586
- break;
587
-
588
- default:
589
- mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
590
- eqe->type, eq->eqn);
591
- break;
151
+ cq = mlx5_eq_cq_get(eq, cqn);
152
+ if (likely(cq)) {
153
+ ++cq->arm_sn;
154
+ cq->comp(cq, eqe);
155
+ mlx5_cq_put(cq);
156
+ } else {
157
+ dev_dbg_ratelimited(eq->dev->device,
158
+ "Completion event for bogus CQ 0x%x\n", cqn);
592159 }
593160
594161 ++eq->cons_index;
595
- ++set_ci;
596162
597
- /* The HCA will think the queue has overflowed if we
598
- * don't tell it we've been processing events. We
599
- * create our EQs with MLX5_NUM_SPARE_EQE extra
600
- * entries, so we must update our consumer index at
601
- * least that often.
602
- */
603
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
604
- eq_update_ci(eq, 0);
605
- set_ci = 0;
606
- }
607
- }
163
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
608164
165
+out:
609166 eq_update_ci(eq, 1);
610167
611168 if (cqn != -1)
612
- tasklet_schedule(&eq->tasklet_ctx.task);
169
+ tasklet_schedule(&eq_comp->tasklet_ctx.task);
613170
614
- return IRQ_HANDLED;
171
+ return 0;
615172 }
616173
617174 /* Some architectures don't latch interrupts when they are disabled, so using
....@@ -619,17 +176,92 @@
619176 * avoid losing them. It is not recommended to use it, unless this is the last
620177 * resort.
621178 */
622
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
179
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
623180 {
624181 u32 count_eqe;
625182
626
- disable_irq(eq->irqn);
627
- count_eqe = eq->cons_index;
628
- mlx5_eq_int(eq->irqn, eq);
629
- count_eqe = eq->cons_index - count_eqe;
630
- enable_irq(eq->irqn);
183
+ disable_irq(eq->core.irqn);
184
+ count_eqe = eq->core.cons_index;
185
+ mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
186
+ count_eqe = eq->core.cons_index - count_eqe;
187
+ enable_irq(eq->core.irqn);
631188
632189 return count_eqe;
190
+}
191
+
192
+static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags)
193
+ __acquires(&eq->lock)
194
+{
195
+ if (in_irq())
196
+ spin_lock(&eq->lock);
197
+ else
198
+ spin_lock_irqsave(&eq->lock, *flags);
199
+}
200
+
201
+static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags)
202
+ __releases(&eq->lock)
203
+{
204
+ if (in_irq())
205
+ spin_unlock(&eq->lock);
206
+ else
207
+ spin_unlock_irqrestore(&eq->lock, *flags);
208
+}
209
+
210
+enum async_eq_nb_action {
211
+ ASYNC_EQ_IRQ_HANDLER = 0,
212
+ ASYNC_EQ_RECOVER = 1,
213
+};
214
+
215
+static int mlx5_eq_async_int(struct notifier_block *nb,
216
+ unsigned long action, void *data)
217
+{
218
+ struct mlx5_eq_async *eq_async =
219
+ container_of(nb, struct mlx5_eq_async, irq_nb);
220
+ struct mlx5_eq *eq = &eq_async->core;
221
+ struct mlx5_eq_table *eqt;
222
+ struct mlx5_core_dev *dev;
223
+ struct mlx5_eqe *eqe;
224
+ unsigned long flags;
225
+ int num_eqes = 0;
226
+
227
+ dev = eq->dev;
228
+ eqt = dev->priv.eq_table;
229
+
230
+ mlx5_eq_async_int_lock(eq_async, &flags);
231
+
232
+ eqe = next_eqe_sw(eq);
233
+ if (!eqe)
234
+ goto out;
235
+
236
+ do {
237
+ /*
238
+ * Make sure we read EQ entry contents after we've
239
+ * checked the ownership bit.
240
+ */
241
+ dma_rmb();
242
+
243
+ atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
244
+ atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
245
+
246
+ ++eq->cons_index;
247
+
248
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
249
+
250
+out:
251
+ eq_update_ci(eq, 1);
252
+ mlx5_eq_async_int_unlock(eq_async, &flags);
253
+
254
+ return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0;
255
+}
256
+
257
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
258
+{
259
+ struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
260
+ int eqes;
261
+
262
+ eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
263
+ if (eqes)
264
+ mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
633265 }
634266
635267 static void init_eq_buf(struct mlx5_eq *eq)
....@@ -643,38 +275,31 @@
643275 }
644276 }
645277
646
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
647
- int nent, u64 mask, const char *name,
648
- enum mlx5_eq_type type)
278
+static int
279
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
280
+ struct mlx5_eq_param *param)
649281 {
650282 struct mlx5_cq_table *cq_table = &eq->cq_table;
651283 u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
652284 struct mlx5_priv *priv = &dev->priv;
653
- irq_handler_t handler;
285
+ u8 vecidx = param->irq_index;
654286 __be64 *pas;
655287 void *eqc;
656288 int inlen;
657289 u32 *in;
658290 int err;
291
+ int i;
659292
660293 /* Init CQ table */
661294 memset(cq_table, 0, sizeof(*cq_table));
662295 spin_lock_init(&cq_table->lock);
663296 INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
664297
665
- eq->type = type;
666
- eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
298
+ eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
667299 eq->cons_index = 0;
668300 err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
669301 if (err)
670302 return err;
671
-
672
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
673
- if (type == MLX5_EQ_TYPE_PF)
674
- handler = mlx5_eq_pf_int;
675
- else
676
-#endif
677
- handler = mlx5_eq_int;
678303
679304 init_eq_buf(eq);
680305
....@@ -691,7 +316,12 @@
691316 mlx5_fill_page_array(&eq->buf, pas);
692317
693318 MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
694
- MLX5_SET64(create_eq_in, in, event_bitmask, mask);
319
+ if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
320
+ MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
321
+
322
+ for (i = 0; i < 4; i++)
323
+ MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
324
+ param->mask[i]);
695325
696326 eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
697327 MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
....@@ -704,46 +334,18 @@
704334 if (err)
705335 goto err_in;
706336
707
- snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
708
- name, pci_name(dev->pdev));
709
-
337
+ eq->vecidx = vecidx;
710338 eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
711339 eq->irqn = pci_irq_vector(dev->pdev, vecidx);
712340 eq->dev = dev;
713341 eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
714
- err = request_irq(eq->irqn, handler, 0,
715
- priv->irq_info[vecidx].name, eq);
716
- if (err)
717
- goto err_eq;
718342
719343 err = mlx5_debug_eq_add(dev, eq);
720344 if (err)
721
- goto err_irq;
722
-
723
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
724
- if (type == MLX5_EQ_TYPE_PF) {
725
- err = init_pf_ctx(&eq->pf_ctx, name);
726
- if (err)
727
- goto err_irq;
728
- } else
729
-#endif
730
- {
731
- INIT_LIST_HEAD(&eq->tasklet_ctx.list);
732
- INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
733
- spin_lock_init(&eq->tasklet_ctx.lock);
734
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
735
- (unsigned long)&eq->tasklet_ctx);
736
- }
737
-
738
- /* EQs are created in ARMED state
739
- */
740
- eq_update_ci(eq, 1);
345
+ goto err_eq;
741346
742347 kvfree(in);
743348 return 0;
744
-
745
-err_irq:
746
- free_irq(eq->irqn, eq);
747349
748350 err_eq:
749351 mlx5_cmd_destroy_eq(dev, eq->eqn);
....@@ -756,27 +358,59 @@
756358 return err;
757359 }
758360
759
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
361
+/**
362
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
363
+ * @dev : Device which owns the eq
364
+ * @eq : EQ to enable
365
+ * @nb : Notifier call block
366
+ *
367
+ * Must be called after EQ is created in device.
368
+ *
369
+ * @return: 0 if no error
370
+ */
371
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
372
+ struct notifier_block *nb)
373
+{
374
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
375
+ int err;
376
+
377
+ err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
378
+ if (!err)
379
+ eq_update_ci(eq, 1);
380
+
381
+ return err;
382
+}
383
+EXPORT_SYMBOL(mlx5_eq_enable);
384
+
385
+/**
386
+ * mlx5_eq_disable - Disable EQ for receiving EQEs
387
+ * @dev : Device which owns the eq
388
+ * @eq : EQ to disable
389
+ * @nb : Notifier call block
390
+ *
391
+ * Must be called before EQ is destroyed.
392
+ */
393
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
394
+ struct notifier_block *nb)
395
+{
396
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
397
+
398
+ mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
399
+}
400
+EXPORT_SYMBOL(mlx5_eq_disable);
401
+
402
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
760403 {
761404 int err;
762405
763406 mlx5_debug_eq_remove(dev, eq);
764
- free_irq(eq->irqn, eq);
407
+
765408 err = mlx5_cmd_destroy_eq(dev, eq->eqn);
766409 if (err)
767410 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
768411 eq->eqn);
769412 synchronize_irq(eq->irqn);
770413
771
- if (eq->type == MLX5_EQ_TYPE_COMP) {
772
- tasklet_disable(&eq->tasklet_ctx.task);
773
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
774
- } else if (eq->type == MLX5_EQ_TYPE_PF) {
775
- cancel_work_sync(&eq->pf_ctx.work);
776
- destroy_workqueue(eq->pf_ctx.wq);
777
- mempool_destroy(eq->pf_ctx.pool);
778
-#endif
779
- }
780414 mlx5_buf_free(dev, &eq->buf);
781415
782416 return err;
....@@ -787,62 +421,149 @@
787421 struct mlx5_cq_table *table = &eq->cq_table;
788422 int err;
789423
790
- spin_lock_irq(&table->lock);
424
+ spin_lock(&table->lock);
791425 err = radix_tree_insert(&table->tree, cq->cqn, cq);
792
- spin_unlock_irq(&table->lock);
426
+ spin_unlock(&table->lock);
793427
794428 return err;
795429 }
796430
797
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
431
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
798432 {
799433 struct mlx5_cq_table *table = &eq->cq_table;
800434 struct mlx5_core_cq *tmp;
801435
802
- spin_lock_irq(&table->lock);
436
+ spin_lock(&table->lock);
803437 tmp = radix_tree_delete(&table->tree, cq->cqn);
804
- spin_unlock_irq(&table->lock);
438
+ spin_unlock(&table->lock);
805439
806440 if (!tmp) {
807
- mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
808
- return -ENOENT;
441
+ mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
442
+ eq->eqn, cq->cqn);
443
+ return;
809444 }
810445
811
- if (tmp != cq) {
812
- mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
813
- return -EINVAL;
814
- }
446
+ if (tmp != cq)
447
+ mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
448
+ eq->eqn, cq->cqn);
449
+}
815450
451
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
452
+{
453
+ struct mlx5_eq_table *eq_table;
454
+ int i;
455
+
456
+ eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
457
+ if (!eq_table)
458
+ return -ENOMEM;
459
+
460
+ dev->priv.eq_table = eq_table;
461
+
462
+ mlx5_eq_debugfs_init(dev);
463
+
464
+ mutex_init(&eq_table->lock);
465
+ for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
466
+ ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
467
+
468
+ eq_table->irq_table = dev->priv.irq_table;
816469 return 0;
817470 }
818471
819
-int mlx5_eq_init(struct mlx5_core_dev *dev)
472
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
820473 {
474
+ mlx5_eq_debugfs_cleanup(dev);
475
+ kvfree(dev->priv.eq_table);
476
+}
477
+
478
+/* Async EQs */
479
+
480
+static int create_async_eq(struct mlx5_core_dev *dev,
481
+ struct mlx5_eq *eq, struct mlx5_eq_param *param)
482
+{
483
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
821484 int err;
822485
823
- spin_lock_init(&dev->priv.eq_table.lock);
486
+ mutex_lock(&eq_table->lock);
487
+ /* Async EQs must share irq index 0 */
488
+ if (param->irq_index != 0) {
489
+ err = -EINVAL;
490
+ goto unlock;
491
+ }
824492
825
- err = mlx5_eq_debugfs_init(dev);
826
-
493
+ err = create_map_eq(dev, eq, param);
494
+unlock:
495
+ mutex_unlock(&eq_table->lock);
827496 return err;
828497 }
829498
830
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
499
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
831500 {
832
- mlx5_eq_debugfs_cleanup(dev);
501
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
502
+ int err;
503
+
504
+ mutex_lock(&eq_table->lock);
505
+ err = destroy_unmap_eq(dev, eq);
506
+ mutex_unlock(&eq_table->lock);
507
+ return err;
833508 }
834509
835
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
510
+static int cq_err_event_notifier(struct notifier_block *nb,
511
+ unsigned long type, void *data)
836512 {
837
- struct mlx5_eq_table *table = &dev->priv.eq_table;
513
+ struct mlx5_eq_table *eqt;
514
+ struct mlx5_core_cq *cq;
515
+ struct mlx5_eqe *eqe;
516
+ struct mlx5_eq *eq;
517
+ u32 cqn;
518
+
519
+ /* type == MLX5_EVENT_TYPE_CQ_ERROR */
520
+
521
+ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
522
+ eq = &eqt->async_eq.core;
523
+ eqe = data;
524
+
525
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
526
+ mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
527
+ cqn, eqe->data.cq_err.syndrome);
528
+
529
+ cq = mlx5_eq_cq_get(eq, cqn);
530
+ if (unlikely(!cq)) {
531
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
532
+ return NOTIFY_OK;
533
+ }
534
+
535
+ if (cq->event)
536
+ cq->event(cq, type);
537
+
538
+ mlx5_cq_put(cq);
539
+
540
+ return NOTIFY_OK;
541
+}
542
+
543
+static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
544
+{
545
+ __be64 *user_unaffiliated_events;
546
+ __be64 *user_affiliated_events;
547
+ int i;
548
+
549
+ user_affiliated_events =
550
+ MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
551
+ user_unaffiliated_events =
552
+ MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
553
+
554
+ for (i = 0; i < 4; i++)
555
+ mask[i] |= be64_to_cpu(user_affiliated_events[i] |
556
+ user_unaffiliated_events[i]);
557
+}
558
+
559
+static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
560
+{
838561 u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
839
- int err;
840562
841563 if (MLX5_VPORT_MANAGER(dev))
842564 async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
843565
844
- if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
845
- MLX5_CAP_GEN(dev, general_notification_event))
566
+ if (MLX5_CAP_GEN(dev, general_notification_event))
846567 async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
847568
848569 if (MLX5_CAP_GEN(dev, port_module_event))
....@@ -865,127 +586,414 @@
865586 if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
866587 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
867588
868
- err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
869
- MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
870
- "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
589
+ if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
590
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
591
+
592
+ if (mlx5_eswitch_is_funcs_handler(dev))
593
+ async_event_mask |=
594
+ (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
595
+
596
+ mask[0] = async_event_mask;
597
+
598
+ if (MLX5_CAP_GEN(dev, event_cap))
599
+ gather_user_async_events(dev, mask);
600
+}
601
+
602
+static int
603
+setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
604
+ struct mlx5_eq_param *param, const char *name)
605
+{
606
+ int err;
607
+
608
+ eq->irq_nb.notifier_call = mlx5_eq_async_int;
609
+ spin_lock_init(&eq->lock);
610
+
611
+ err = create_async_eq(dev, &eq->core, param);
871612 if (err) {
872
- mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
613
+ mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
873614 return err;
874615 }
616
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
617
+ if (err) {
618
+ mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
619
+ destroy_async_eq(dev, &eq->core);
620
+ }
621
+ return err;
622
+}
623
+
624
+static void cleanup_async_eq(struct mlx5_core_dev *dev,
625
+ struct mlx5_eq_async *eq, const char *name)
626
+{
627
+ int err;
628
+
629
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
630
+ err = destroy_async_eq(dev, &eq->core);
631
+ if (err)
632
+ mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
633
+ name, err);
634
+}
635
+
636
+static int create_async_eqs(struct mlx5_core_dev *dev)
637
+{
638
+ struct mlx5_eq_table *table = dev->priv.eq_table;
639
+ struct mlx5_eq_param param = {};
640
+ int err;
641
+
642
+ MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
643
+ mlx5_eq_notifier_register(dev, &table->cq_err_nb);
644
+
645
+ param = (struct mlx5_eq_param) {
646
+ .irq_index = 0,
647
+ .nent = MLX5_NUM_CMD_EQE,
648
+ .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
649
+ };
650
+ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
651
+ err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
652
+ if (err)
653
+ goto err1;
875654
876655 mlx5_cmd_use_events(dev);
656
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
877657
878
- err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
879
- MLX5_NUM_ASYNC_EQE, async_event_mask,
880
- "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
881
- if (err) {
882
- mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
883
- goto err1;
884
- }
658
+ param = (struct mlx5_eq_param) {
659
+ .irq_index = 0,
660
+ .nent = MLX5_NUM_ASYNC_EQE,
661
+ };
885662
886
- err = mlx5_create_map_eq(dev, &table->pages_eq,
887
- MLX5_EQ_VEC_PAGES,
888
- /* TODO: sriov max_vf + */ 1,
889
- 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
890
- MLX5_EQ_TYPE_ASYNC);
891
- if (err) {
892
- mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
663
+ gather_async_events_mask(dev, param.mask);
664
+ err = setup_async_eq(dev, &table->async_eq, &param, "async");
665
+ if (err)
893666 goto err2;
667
+
668
+ param = (struct mlx5_eq_param) {
669
+ .irq_index = 0,
670
+ .nent = /* TODO: sriov max_vf + */ 1,
671
+ .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
672
+ };
673
+
674
+ err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
675
+ if (err)
676
+ goto err3;
677
+
678
+ return 0;
679
+
680
+err3:
681
+ cleanup_async_eq(dev, &table->async_eq, "async");
682
+err2:
683
+ mlx5_cmd_use_polling(dev);
684
+ cleanup_async_eq(dev, &table->cmd_eq, "cmd");
685
+err1:
686
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
687
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
688
+ return err;
689
+}
690
+
691
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
692
+{
693
+ struct mlx5_eq_table *table = dev->priv.eq_table;
694
+
695
+ cleanup_async_eq(dev, &table->pages_eq, "pages");
696
+ cleanup_async_eq(dev, &table->async_eq, "async");
697
+ mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
698
+ mlx5_cmd_use_polling(dev);
699
+ cleanup_async_eq(dev, &table->cmd_eq, "cmd");
700
+ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
701
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
702
+}
703
+
704
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
705
+{
706
+ return &dev->priv.eq_table->async_eq.core;
707
+}
708
+
709
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
710
+{
711
+ synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
712
+}
713
+
714
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
715
+{
716
+ synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
717
+}
718
+
719
+/* Generic EQ API for mlx5_core consumers
720
+ * Needed For RDMA ODP EQ for now
721
+ */
722
+struct mlx5_eq *
723
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
724
+ struct mlx5_eq_param *param)
725
+{
726
+ struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
727
+ int err;
728
+
729
+ if (!eq)
730
+ return ERR_PTR(-ENOMEM);
731
+
732
+ err = create_async_eq(dev, eq, param);
733
+ if (err) {
734
+ kvfree(eq);
735
+ eq = ERR_PTR(err);
894736 }
895737
896
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
897
- if (MLX5_CAP_GEN(dev, pg)) {
898
- err = mlx5_create_map_eq(dev, &table->pfault_eq,
899
- MLX5_EQ_VEC_PFAULT,
900
- MLX5_NUM_ASYNC_EQE,
901
- 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
902
- "mlx5_page_fault_eq",
903
- MLX5_EQ_TYPE_PF);
738
+ return eq;
739
+}
740
+EXPORT_SYMBOL(mlx5_eq_create_generic);
741
+
742
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
743
+{
744
+ int err;
745
+
746
+ if (IS_ERR(eq))
747
+ return -EINVAL;
748
+
749
+ err = destroy_async_eq(dev, eq);
750
+ if (err)
751
+ goto out;
752
+
753
+ kvfree(eq);
754
+out:
755
+ return err;
756
+}
757
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
758
+
759
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
760
+{
761
+ u32 ci = eq->cons_index + cc;
762
+ struct mlx5_eqe *eqe;
763
+
764
+ eqe = get_eqe(eq, ci & (eq->nent - 1));
765
+ eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
766
+ /* Make sure we read EQ entry contents after we've
767
+ * checked the ownership bit.
768
+ */
769
+ if (eqe)
770
+ dma_rmb();
771
+
772
+ return eqe;
773
+}
774
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
775
+
776
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
777
+{
778
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
779
+ u32 val;
780
+
781
+ eq->cons_index += cc;
782
+ val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
783
+
784
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
785
+ /* We still want ordering, just not swabbing, so add a barrier */
786
+ wmb();
787
+}
788
+EXPORT_SYMBOL(mlx5_eq_update_ci);
789
+
790
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
791
+{
792
+ struct mlx5_eq_table *table = dev->priv.eq_table;
793
+ struct mlx5_eq_comp *eq, *n;
794
+
795
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
796
+ list_del(&eq->list);
797
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
798
+ if (destroy_unmap_eq(dev, &eq->core))
799
+ mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
800
+ eq->core.eqn);
801
+ tasklet_disable(&eq->tasklet_ctx.task);
802
+ kfree(eq);
803
+ }
804
+}
805
+
806
+static int create_comp_eqs(struct mlx5_core_dev *dev)
807
+{
808
+ struct mlx5_eq_table *table = dev->priv.eq_table;
809
+ struct mlx5_eq_comp *eq;
810
+ int ncomp_eqs;
811
+ int nent;
812
+ int err;
813
+ int i;
814
+
815
+ INIT_LIST_HEAD(&table->comp_eqs_list);
816
+ ncomp_eqs = table->num_comp_eqs;
817
+ nent = MLX5_COMP_EQ_SIZE;
818
+ for (i = 0; i < ncomp_eqs; i++) {
819
+ int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
820
+ struct mlx5_eq_param param = {};
821
+
822
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
823
+ if (!eq) {
824
+ err = -ENOMEM;
825
+ goto clean;
826
+ }
827
+
828
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
829
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
830
+ spin_lock_init(&eq->tasklet_ctx.lock);
831
+ tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
832
+
833
+ eq->irq_nb.notifier_call = mlx5_eq_comp_int;
834
+ param = (struct mlx5_eq_param) {
835
+ .irq_index = vecidx,
836
+ .nent = nent,
837
+ };
838
+ err = create_map_eq(dev, &eq->core, &param);
904839 if (err) {
905
- mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
906
- err);
907
- goto err3;
840
+ kfree(eq);
841
+ goto clean;
842
+ }
843
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
844
+ if (err) {
845
+ destroy_unmap_eq(dev, &eq->core);
846
+ kfree(eq);
847
+ goto clean;
848
+ }
849
+
850
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
851
+ /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
852
+ list_add_tail(&eq->list, &table->comp_eqs_list);
853
+ }
854
+
855
+ return 0;
856
+
857
+clean:
858
+ destroy_comp_eqs(dev);
859
+ return err;
860
+}
861
+
862
+static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn,
863
+ unsigned int *irqn)
864
+{
865
+ struct mlx5_eq_table *table = dev->priv.eq_table;
866
+ struct mlx5_eq_comp *eq, *n;
867
+ int err = -ENOENT;
868
+ int i = 0;
869
+
870
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
871
+ if (i++ == vector) {
872
+ if (irqn)
873
+ *irqn = eq->core.irqn;
874
+ if (eqn)
875
+ *eqn = eq->core.eqn;
876
+ err = 0;
877
+ break;
908878 }
909879 }
910880
911881 return err;
912
-err3:
913
- mlx5_destroy_unmap_eq(dev, &table->pages_eq);
914
-#else
915
- return err;
916
-#endif
917
-
918
-err2:
919
- mlx5_destroy_unmap_eq(dev, &table->async_eq);
920
-
921
-err1:
922
- mlx5_cmd_use_polling(dev);
923
- mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
924
- return err;
925882 }
926883
927
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
884
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn)
928885 {
929
- struct mlx5_eq_table *table = &dev->priv.eq_table;
930
- int err;
886
+ return vector2eqnirqn(dev, vector, eqn, NULL);
887
+}
888
+EXPORT_SYMBOL(mlx5_vector2eqn);
931889
932
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
933
- if (MLX5_CAP_GEN(dev, pg)) {
934
- err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
935
- if (err)
936
- mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
937
- err);
890
+int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn)
891
+{
892
+ return vector2eqnirqn(dev, vector, NULL, irqn);
893
+}
894
+
895
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
896
+{
897
+ return dev->priv.eq_table->num_comp_eqs;
898
+}
899
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
900
+
901
+struct cpumask *
902
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
903
+{
904
+ int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
905
+
906
+ return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
907
+ vecidx);
908
+}
909
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
910
+
911
+#ifdef CONFIG_RFS_ACCEL
912
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
913
+{
914
+ return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
915
+}
916
+#endif
917
+
918
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
919
+{
920
+ struct mlx5_eq_table *table = dev->priv.eq_table;
921
+ struct mlx5_eq_comp *eq;
922
+
923
+ list_for_each_entry(eq, &table->comp_eqs_list, list) {
924
+ if (eq->core.eqn == eqn)
925
+ return eq;
938926 }
939
-#endif
940927
941
- err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
942
- if (err)
943
- mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
944
- err);
945
-
946
- err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
947
- if (err)
948
- mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
949
- err);
950
- mlx5_cmd_use_polling(dev);
951
-
952
- err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
953
- if (err)
954
- mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
955
- err);
956
-}
957
-
958
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
959
- u32 *out, int outlen)
960
-{
961
- u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
962
-
963
- MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
964
- MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
965
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
928
+ return ERR_PTR(-ENOENT);
966929 }
967930
968931 /* This function should only be called after mlx5_cmd_force_teardown_hca */
969932 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
970933 {
971
- struct mlx5_eq_table *table = &dev->priv.eq_table;
972
- struct mlx5_eq *eq;
934
+ struct mlx5_eq_table *table = dev->priv.eq_table;
973935
974
-#ifdef CONFIG_RFS_ACCEL
975
- if (dev->rmap) {
976
- free_irq_cpu_rmap(dev->rmap);
977
- dev->rmap = NULL;
978
- }
979
-#endif
980
- list_for_each_entry(eq, &table->comp_eqs_list, list)
981
- free_irq(eq->irqn, eq);
982
-
983
- free_irq(table->pages_eq.irqn, &table->pages_eq);
984
- free_irq(table->async_eq.irqn, &table->async_eq);
985
- free_irq(table->cmd_eq.irqn, &table->cmd_eq);
986
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
987
- if (MLX5_CAP_GEN(dev, pg))
988
- free_irq(table->pfault_eq.irqn, &table->pfault_eq);
989
-#endif
990
- pci_free_irq_vectors(dev->pdev);
936
+ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
937
+ mlx5_irq_table_destroy(dev);
938
+ mutex_unlock(&table->lock);
991939 }
940
+
941
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
942
+#define MLX5_MAX_ASYNC_EQS 4
943
+#else
944
+#define MLX5_MAX_ASYNC_EQS 3
945
+#endif
946
+
947
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
948
+{
949
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
950
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
951
+ MLX5_CAP_GEN(dev, max_num_eqs) :
952
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
953
+ int err;
954
+
955
+ eq_table->num_comp_eqs =
956
+ min_t(int,
957
+ mlx5_irq_get_num_comp(eq_table->irq_table),
958
+ num_eqs - MLX5_MAX_ASYNC_EQS);
959
+
960
+ err = create_async_eqs(dev);
961
+ if (err) {
962
+ mlx5_core_err(dev, "Failed to create async EQs\n");
963
+ goto err_async_eqs;
964
+ }
965
+
966
+ err = create_comp_eqs(dev);
967
+ if (err) {
968
+ mlx5_core_err(dev, "Failed to create completion EQs\n");
969
+ goto err_comp_eqs;
970
+ }
971
+
972
+ return 0;
973
+err_comp_eqs:
974
+ destroy_async_eqs(dev);
975
+err_async_eqs:
976
+ return err;
977
+}
978
+
979
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
980
+{
981
+ destroy_comp_eqs(dev);
982
+ destroy_async_eqs(dev);
983
+}
984
+
985
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
986
+{
987
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
988
+
989
+ return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
990
+}
991
+EXPORT_SYMBOL(mlx5_eq_notifier_register);
992
+
993
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
994
+{
995
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
996
+
997
+ return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
998
+}
999
+EXPORT_SYMBOL(mlx5_eq_notifier_unregister);