hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/infiniband/hw/hfi1/chip.c
....@@ -1097,7 +1097,7 @@
10971097 static void handle_temp_err(struct hfi1_devdata *dd);
10981098 static void dc_shutdown(struct hfi1_devdata *dd);
10991099 static void dc_start(struct hfi1_devdata *dd);
1100
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
1100
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
11011101 unsigned int *np);
11021102 static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
11031103 static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
....@@ -12348,6 +12348,7 @@
1234812348
1234912349 if (dd->synth_stats_timer.function)
1235012350 del_timer_sync(&dd->synth_stats_timer);
12351
+ cancel_work_sync(&dd->update_cntr_work);
1235112352 ppd = (struct hfi1_pportdata *)(dd + 1);
1235212353 for (i = 0; i < dd->num_pports; i++, ppd++) {
1235312354 kfree(ppd->cntrs);
....@@ -13403,7 +13404,6 @@
1340313404 int ret;
1340413405 unsigned ngroups;
1340513406 int rmt_count;
13406
- int user_rmt_reduced;
1340713407 u32 n_usr_ctxts;
1340813408 u32 send_contexts = chip_send_contexts(dd);
1340913409 u32 rcv_contexts = chip_rcv_contexts(dd);
....@@ -13462,28 +13462,34 @@
1346213462 (num_kernel_contexts + n_usr_ctxts),
1346313463 &node_affinity.real_cpu_mask);
1346413464 /*
13465
- * The RMT entries are currently allocated as shown below:
13466
- * 1. QOS (0 to 128 entries);
13467
- * 2. FECN (num_kernel_context - 1 + num_user_contexts +
13468
- * num_netdev_contexts);
13469
- * 3. netdev (num_netdev_contexts).
13470
- * It should be noted that FECN oversubscribe num_netdev_contexts
13471
- * entries of RMT because both netdev and PSM could allocate any receive
13472
- * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
13473
- * and PSM FECN must reserve an RMT entry for each possible PSM receive
13474
- * context.
13465
+ * RMT entries are allocated as follows:
13466
+ * 1. QOS (0 to 128 entries)
13467
+ * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts +
13468
+ * num_netdev_contexts [b])
13469
+ * 3. netdev (NUM_NETDEV_MAP_ENTRIES)
13470
+ *
13471
+ * Notes:
13472
+ * [a] Kernel contexts (except control) are included in FECN if kernel
13473
+ * TID_RDMA is active.
13474
+ * [b] Netdev and user contexts are randomly allocated from the same
13475
+ * context pool, so FECN must cover all contexts in the pool.
1347513476 */
13476
- rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
13477
- if (HFI1_CAP_IS_KSET(TID_RDMA))
13478
- rmt_count += num_kernel_contexts - 1;
13479
- if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
13480
- user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
13481
- dd_dev_err(dd,
13482
- "RMT size is reducing the number of user receive contexts from %u to %d\n",
13483
- n_usr_ctxts,
13484
- user_rmt_reduced);
13485
- /* recalculate */
13486
- n_usr_ctxts = user_rmt_reduced;
13477
+ rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL)
13478
+ + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1
13479
+ : 0)
13480
+ + n_usr_ctxts
13481
+ + num_netdev_contexts
13482
+ + NUM_NETDEV_MAP_ENTRIES;
13483
+ if (rmt_count > NUM_MAP_ENTRIES) {
13484
+ int over = rmt_count - NUM_MAP_ENTRIES;
13485
+ /* try to squish user contexts, minimum of 1 */
13486
+ if (over >= n_usr_ctxts) {
13487
+ dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n");
13488
+ return -EINVAL;
13489
+ }
13490
+ dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n",
13491
+ n_usr_ctxts, n_usr_ctxts - over);
13492
+ n_usr_ctxts -= over;
1348713493 }
1348813494
1348913495 /* the first N are kernel contexts, the rest are user/netdev contexts */
....@@ -14340,15 +14346,15 @@
1434014346 }
1434114347
1434214348 /* return the number of RSM map table entries that will be used for QOS */
14343
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
14349
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
1434414350 unsigned int *np)
1434514351 {
1434614352 int i;
1434714353 unsigned int m, n;
14348
- u8 max_by_vl = 0;
14354
+ uint max_by_vl = 0;
1434914355
1435014356 /* is QOS active at all? */
14351
- if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
14357
+ if (n_krcv_queues < MIN_KERNEL_KCTXTS ||
1435214358 num_vls == 1 ||
1435314359 krcvqsset <= 1)
1435414360 goto no_qos;
....@@ -14406,7 +14412,7 @@
1440614412
1440714413 if (!rmt)
1440814414 goto bail;
14409
- rmt_entries = qos_rmt_entries(dd, &m, &n);
14415
+ rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n);
1441014416 if (rmt_entries == 0)
1441114417 goto bail;
1441214418 qpns_per_vl = 1 << m;