hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/powerpc/platforms/powernv/vas-window.c
....@@ -1,10 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright 2016-17 IBM Corp.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU General Public License
6
- * as published by the Free Software Foundation; either version
7
- * 2 of the License, or (at your option) any later version.
84 */
95
106 #define pr_fmt(fmt) "vas: " fmt
....@@ -16,6 +12,8 @@
1612 #include <linux/log2.h>
1713 #include <linux/rcupdate.h>
1814 #include <linux/cred.h>
15
+#include <linux/sched/mm.h>
16
+#include <linux/mmu_context.h>
1917 #include <asm/switch_to.h>
2018 #include <asm/ppc-opcode.h>
2119 #include "vas.h"
....@@ -28,7 +26,7 @@
2826 * Compute the paste address region for the window @window using the
2927 * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
3028 */
31
-static void compute_paste_address(struct vas_window *window, u64 *addr, int *len)
29
+void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
3230 {
3331 int winid;
3432 u64 base, shift;
....@@ -43,16 +41,6 @@
4341
4442 pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
4543 }
46
-
47
-u64 vas_win_paste_addr(struct vas_window *win)
48
-{
49
- u64 addr;
50
-
51
- compute_paste_address(win, &addr, NULL);
52
-
53
- return addr;
54
-}
55
-EXPORT_SYMBOL(vas_win_paste_addr);
5644
5745 static inline void get_hvwc_mmio_bar(struct vas_window *window,
5846 u64 *start, int *len)
....@@ -92,7 +80,7 @@
9280 goto free_name;
9381
9482 txwin->paste_addr_name = name;
95
- compute_paste_address(txwin, &start, &len);
83
+ vas_win_paste_addr(txwin, &start, &len);
9684
9785 if (!request_mem_region(start, len, name)) {
9886 pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
....@@ -150,7 +138,7 @@
150138 u64 busaddr_start;
151139
152140 if (window->paste_kaddr) {
153
- compute_paste_address(window, &busaddr_start, &len);
141
+ vas_win_paste_addr(window, &busaddr_start, &len);
154142 unmap_region(window->paste_kaddr, busaddr_start, len);
155143 window->paste_kaddr = NULL;
156144 kfree(window->paste_addr_name);
....@@ -198,7 +186,7 @@
198186 * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
199187 * Map these bus addresses and save the mapped kernel addresses in @window.
200188 */
201
-int map_winctx_mmio_bars(struct vas_window *window)
189
+static int map_winctx_mmio_bars(struct vas_window *window)
202190 {
203191 int len;
204192 u64 start;
....@@ -226,7 +214,7 @@
226214 * registers are not sequential. And, we can only write to offsets
227215 * with valid registers.
228216 */
229
-void reset_window_regs(struct vas_window *window)
217
+static void reset_window_regs(struct vas_window *window)
230218 {
231219 write_hvwc_reg(window, VREG(LPID), 0ULL);
232220 write_hvwc_reg(window, VREG(PID), 0ULL);
....@@ -369,7 +357,8 @@
369357 * as a one-time task? That could work for NX but what about other
370358 * receivers? Let the receivers tell us the rx-fifo buffers for now.
371359 */
372
-int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
360
+static void init_winctx_regs(struct vas_window *window,
361
+ struct vas_winctx *winctx)
373362 {
374363 u64 val;
375364 int fifo_size;
....@@ -387,7 +376,7 @@
387376 init_xlate_regs(window, winctx->user_win);
388377
389378 val = 0ULL;
390
- val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0);
379
+ val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id);
391380 write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
392381
393382 /* In PowerNV, interrupts go to HV. */
....@@ -414,7 +403,7 @@
414403 *
415404 * See also: Design note in function header.
416405 */
417
- val = __pa(winctx->rx_fifo);
406
+ val = winctx->rx_fifo;
418407 val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
419408 write_hvwc_reg(window, VREG(LFIFO_BAR), val);
420409
....@@ -511,8 +500,6 @@
511500 val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
512501 val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
513502 write_hvwc_reg(window, VREG(WINCTL), val);
514
-
515
- return 0;
516503 }
517504
518505 static void vas_release_window_id(struct ida *ida, int winid)
....@@ -750,7 +737,7 @@
750737 */
751738 winctx->fifo_disable = true;
752739 winctx->intr_disable = true;
753
- winctx->rx_fifo = NULL;
740
+ winctx->rx_fifo = 0;
754741 }
755742
756743 winctx->lnotify_lpid = rxattr->lnotify_lpid;
....@@ -762,6 +749,8 @@
762749
763750 winctx->min_scope = VAS_SCOPE_LOCAL;
764751 winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
752
+ if (rxwin->vinst->virq)
753
+ winctx->irq_port = rxwin->vinst->irq_port;
765754 }
766755
767756 static bool rx_win_args_valid(enum vas_cop_type cop,
....@@ -782,7 +771,7 @@
782771 if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
783772 return false;
784773
785
- if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
774
+ if (!attr->wcreds_max)
786775 return false;
787776
788777 if (attr->nx_win) {
....@@ -827,7 +816,8 @@
827816 {
828817 memset(rxattr, 0, sizeof(*rxattr));
829818
830
- if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
819
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
820
+ cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
831821 rxattr->pin_win = true;
832822 rxattr->nx_win = true;
833823 rxattr->fault_win = false;
....@@ -841,9 +831,9 @@
841831 rxattr->fault_win = true;
842832 rxattr->notify_disable = true;
843833 rxattr->rx_wcred_mode = true;
844
- rxattr->tx_wcred_mode = true;
845834 rxattr->rx_win_ord_mode = true;
846
- rxattr->tx_win_ord_mode = true;
835
+ rxattr->rej_no_credit = true;
836
+ rxattr->tc_mode = VAS_THRESH_DISABLED;
847837 } else if (cop == VAS_COP_TYPE_FTW) {
848838 rxattr->user_win = true;
849839 rxattr->intr_disable = true;
....@@ -887,9 +877,7 @@
887877 rxwin->nx_win = rxattr->nx_win;
888878 rxwin->user_win = rxattr->user_win;
889879 rxwin->cop = cop;
890
- rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
891
- if (rxattr->user_win)
892
- rxwin->pid = task_pid_vnr(current);
880
+ rxwin->wcreds_max = rxattr->wcreds_max;
893881
894882 init_winctx_for_rxwin(rxwin, rxattr, &winctx);
895883 init_winctx_regs(rxwin, &winctx);
....@@ -904,7 +892,8 @@
904892 {
905893 memset(txattr, 0, sizeof(*txattr));
906894
907
- if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
895
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
896
+ cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
908897 txattr->rej_no_credit = false;
909898 txattr->rx_wcred_mode = true;
910899 txattr->tx_wcred_mode = true;
....@@ -958,13 +947,22 @@
958947 winctx->lpid = txattr->lpid;
959948 winctx->pidr = txattr->pidr;
960949 winctx->rx_win_id = txwin->rxwin->winid;
950
+ /*
951
+ * IRQ and fault window setup is successful. Set fault window
952
+ * for the send window so that ready to handle faults.
953
+ */
954
+ if (txwin->vinst->virq)
955
+ winctx->fault_win_id = txwin->vinst->fault_win->winid;
961956
962957 winctx->dma_type = VAS_DMA_TYPE_INJECT;
963958 winctx->tc_mode = txattr->tc_mode;
964959 winctx->min_scope = VAS_SCOPE_LOCAL;
965960 winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
961
+ if (txwin->vinst->virq)
962
+ winctx->irq_port = txwin->vinst->irq_port;
966963
967
- winctx->pswid = 0;
964
+ winctx->pswid = txattr->pswid ? txattr->pswid :
965
+ encode_pswid(txwin->vinst->vas_id, txwin->winid);
968966 }
969967
970968 static bool tx_win_args_valid(enum vas_cop_type cop,
....@@ -979,9 +977,14 @@
979977 if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
980978 return false;
981979
982
- if (attr->user_win &&
983
- (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
984
- return false;
980
+ if (attr->user_win) {
981
+ if (attr->rsvd_txbuf_count)
982
+ return false;
983
+
984
+ if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP &&
985
+ cop != VAS_COP_TYPE_GZIP_HIPRI)
986
+ return false;
987
+ }
985988
986989 return true;
987990 }
....@@ -1030,7 +1033,6 @@
10301033 txwin->tx_win = 1;
10311034 txwin->rxwin = rxwin;
10321035 txwin->nx_win = txwin->rxwin->nx_win;
1033
- txwin->pid = attr->pid;
10341036 txwin->user_win = attr->user_win;
10351037 txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
10361038
....@@ -1054,12 +1056,59 @@
10541056 }
10551057 } else {
10561058 /*
1057
- * A user mapping must ensure that context switch issues
1058
- * CP_ABORT for this thread.
1059
+ * Interrupt hanlder or fault window setup failed. Means
1060
+ * NX can not generate fault for page fault. So not
1061
+ * opening for user space tx window.
10591062 */
1060
- rc = set_thread_uses_vas();
1061
- if (rc)
1063
+ if (!vinst->virq) {
1064
+ rc = -ENODEV;
10621065 goto free_window;
1066
+ }
1067
+
1068
+ /*
1069
+ * Window opened by a child thread may not be closed when
1070
+ * it exits. So take reference to its pid and release it
1071
+ * when the window is free by parent thread.
1072
+ * Acquire a reference to the task's pid to make sure
1073
+ * pid will not be re-used - needed only for multithread
1074
+ * applications.
1075
+ */
1076
+ txwin->pid = get_task_pid(current, PIDTYPE_PID);
1077
+ /*
1078
+ * Acquire a reference to the task's mm.
1079
+ */
1080
+ txwin->mm = get_task_mm(current);
1081
+
1082
+ if (!txwin->mm) {
1083
+ put_pid(txwin->pid);
1084
+ pr_err("VAS: pid(%d): mm_struct is not found\n",
1085
+ current->pid);
1086
+ rc = -EPERM;
1087
+ goto free_window;
1088
+ }
1089
+
1090
+ mmgrab(txwin->mm);
1091
+ mmput(txwin->mm);
1092
+ mm_context_add_vas_window(txwin->mm);
1093
+ /*
1094
+ * Process closes window during exit. In the case of
1095
+ * multithread application, the child thread can open
1096
+ * window and can exit without closing it. so takes tgid
1097
+ * reference until window closed to make sure tgid is not
1098
+ * reused.
1099
+ */
1100
+ txwin->tgid = find_get_pid(task_tgid_vnr(current));
1101
+ /*
1102
+ * Even a process that has no foreign real address mapping can
1103
+ * use an unpaired COPY instruction (to no real effect). Issue
1104
+ * CP_ABORT to clear any pending COPY and prevent a covert
1105
+ * channel.
1106
+ *
1107
+ * __switch_to() will issue CP_ABORT on future context switches
1108
+ * if process / thread has any open VAS window (Use
1109
+ * current->mm->context.vas_windows).
1110
+ */
1111
+ asm volatile(PPC_CP_ABORT);
10631112 }
10641113
10651114 set_vinst_win(vinst, txwin);
....@@ -1142,6 +1191,7 @@
11421191 {
11431192 u64 val;
11441193 int creds, mode;
1194
+ int count = 0;
11451195
11461196 val = read_hvwc_reg(window, VREG(WINCTL));
11471197 if (window->tx_win)
....@@ -1160,10 +1210,27 @@
11601210 creds = GET_FIELD(VAS_LRX_WCRED, val);
11611211 }
11621212
1213
+ /*
1214
+ * Takes around few milliseconds to complete all pending requests
1215
+ * and return credits.
1216
+ * TODO: Scan fault FIFO and invalidate CRBs points to this window
1217
+ * and issue CRB Kill to stop all pending requests. Need only
1218
+ * if there is a bug in NX or fault handling in kernel.
1219
+ */
11631220 if (creds < window->wcreds_max) {
11641221 val = 0;
11651222 set_current_state(TASK_UNINTERRUPTIBLE);
11661223 schedule_timeout(msecs_to_jiffies(10));
1224
+ count++;
1225
+ /*
1226
+ * Process can not close send window until all credits are
1227
+ * returned.
1228
+ */
1229
+ if (!(count % 1000))
1230
+ pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
1231
+ vas_window_pid(window), window->winid,
1232
+ creds, count);
1233
+
11671234 goto retry;
11681235 }
11691236 }
....@@ -1177,6 +1244,7 @@
11771244 {
11781245 int busy;
11791246 u64 val;
1247
+ int count = 0;
11801248
11811249 retry:
11821250 val = read_hvwc_reg(window, VREG(WIN_STATUS));
....@@ -1184,7 +1252,16 @@
11841252 if (busy) {
11851253 val = 0;
11861254 set_current_state(TASK_UNINTERRUPTIBLE);
1187
- schedule_timeout(msecs_to_jiffies(5));
1255
+ schedule_timeout(msecs_to_jiffies(10));
1256
+ count++;
1257
+ /*
1258
+ * Takes around few milliseconds to process all pending
1259
+ * requests.
1260
+ */
1261
+ if (!(count % 1000))
1262
+ pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
1263
+ vas_window_pid(window), window->winid, count);
1264
+
11881265 goto retry;
11891266 }
11901267 }
....@@ -1249,19 +1326,29 @@
12491326
12501327 unmap_paste_region(window);
12511328
1252
- clear_vinst_win(window);
1253
-
12541329 poll_window_busy_state(window);
12551330
12561331 unpin_close_window(window);
12571332
12581333 poll_window_credits(window);
12591334
1335
+ clear_vinst_win(window);
1336
+
12601337 poll_window_castout(window);
12611338
12621339 /* if send window, drop reference to matching receive window */
1263
- if (window->tx_win)
1340
+ if (window->tx_win) {
1341
+ if (window->user_win) {
1342
+ /* Drop references to pid. tgid and mm */
1343
+ put_pid(window->pid);
1344
+ put_pid(window->tgid);
1345
+ if (window->mm) {
1346
+ mm_context_remove_vas_window(window->mm);
1347
+ mmdrop(window->mm);
1348
+ }
1349
+ }
12641350 put_rx_win(window->rxwin);
1351
+ }
12651352
12661353 vas_window_free(window);
12671354
....@@ -1270,10 +1357,88 @@
12701357 EXPORT_SYMBOL_GPL(vas_win_close);
12711358
12721359 /*
1273
- * Return a system-wide unique window id for the window @win.
1360
+ * Return credit for the given window.
1361
+ * Send windows and fault window uses credit mechanism as follows:
1362
+ *
1363
+ * Send windows:
1364
+ * - The default number of credits available for each send window is
1365
+ * 1024. It means 1024 requests can be issued asynchronously at the
1366
+ * same time. If the credit is not available, that request will be
1367
+ * returned with RMA_Busy.
1368
+ * - One credit is taken when NX request is issued.
1369
+ * - This credit is returned after NX processed that request.
1370
+ * - If NX encounters translation error, kernel will return the
1371
+ * credit on the specific send window after processing the fault CRB.
1372
+ *
1373
+ * Fault window:
1374
+ * - The total number credits available is FIFO_SIZE/CRB_SIZE.
1375
+ * Means 4MB/128 in the current implementation. If credit is not
1376
+ * available, RMA_Reject is returned.
1377
+ * - A credit is taken when NX pastes CRB in fault FIFO.
1378
+ * - The kernel with return credit on fault window after reading entry
1379
+ * from fault FIFO.
12741380 */
1275
-u32 vas_win_id(struct vas_window *win)
1381
+void vas_return_credit(struct vas_window *window, bool tx)
12761382 {
1277
- return encode_pswid(win->vinst->vas_id, win->winid);
1383
+ uint64_t val;
1384
+
1385
+ val = 0ULL;
1386
+ if (tx) { /* send window */
1387
+ val = SET_FIELD(VAS_TX_WCRED, val, 1);
1388
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val);
1389
+ } else {
1390
+ val = SET_FIELD(VAS_LRX_WCRED, val, 1);
1391
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val);
1392
+ }
12781393 }
1279
-EXPORT_SYMBOL_GPL(vas_win_id);
1394
+
1395
+struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
1396
+ uint32_t pswid)
1397
+{
1398
+ struct vas_window *window;
1399
+ int winid;
1400
+
1401
+ if (!pswid) {
1402
+ pr_devel("%s: called for pswid 0!\n", __func__);
1403
+ return ERR_PTR(-ESRCH);
1404
+ }
1405
+
1406
+ decode_pswid(pswid, NULL, &winid);
1407
+
1408
+ if (winid >= VAS_WINDOWS_PER_CHIP)
1409
+ return ERR_PTR(-ESRCH);
1410
+
1411
+ /*
1412
+ * If application closes the window before the hardware
1413
+ * returns the fault CRB, we should wait in vas_win_close()
1414
+ * for the pending requests. so the window must be active
1415
+ * and the process alive.
1416
+ *
1417
+ * If its a kernel process, we should not get any faults and
1418
+ * should not get here.
1419
+ */
1420
+ window = vinst->windows[winid];
1421
+
1422
+ if (!window) {
1423
+ pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n",
1424
+ winid, pswid, vinst);
1425
+ return NULL;
1426
+ }
1427
+
1428
+ /*
1429
+ * Do some sanity checks on the decoded window. Window should be
1430
+ * NX GZIP user send window. FTW windows should not incur faults
1431
+ * since their CRBs are ignored (not queued on FIFO or processed
1432
+ * by NX).
1433
+ */
1434
+ if (!window->tx_win || !window->user_win || !window->nx_win ||
1435
+ window->cop == VAS_COP_TYPE_FAULT ||
1436
+ window->cop == VAS_COP_TYPE_FTW) {
1437
+ pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
1438
+ winid, window->tx_win, window->user_win,
1439
+ window->nx_win, window->cop);
1440
+ WARN_ON(1);
1441
+ }
1442
+
1443
+ return window;
1444
+}