From 08f87f769b595151be1afeff53e144f543faa614 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 06 Dec 2023 09:51:13 +0000 Subject: [PATCH] add dts config --- kernel/arch/powerpc/platforms/powernv/vas-window.c | 267 +++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 216 insertions(+), 51 deletions(-) diff --git a/kernel/arch/powerpc/platforms/powernv/vas-window.c b/kernel/arch/powerpc/platforms/powernv/vas-window.c index e59e0e6..3a86cdd 100644 --- a/kernel/arch/powerpc/platforms/powernv/vas-window.c +++ b/kernel/arch/powerpc/platforms/powernv/vas-window.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2016-17 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "vas: " fmt @@ -16,6 +12,8 @@ #include <linux/log2.h> #include <linux/rcupdate.h> #include <linux/cred.h> +#include <linux/sched/mm.h> +#include <linux/mmu_context.h> #include <asm/switch_to.h> #include <asm/ppc-opcode.h> #include "vas.h" @@ -28,7 +26,7 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -static void compute_paste_address(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; @@ -43,16 +41,6 @@ pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } - -u64 vas_win_paste_addr(struct vas_window *win) -{ - u64 addr; - - compute_paste_address(win, &addr, NULL); - - return addr; -} -EXPORT_SYMBOL(vas_win_paste_addr); static inline void get_hvwc_mmio_bar(struct vas_window *window, u64 *start, int *len) @@ -92,7 +80,7 @@ goto free_name; txwin->paste_addr_name = name; - compute_paste_address(txwin, &start, &len); + vas_win_paste_addr(txwin, &start, &len); if (!request_mem_region(start, len, name)) { pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", @@ -150,7 +138,7 @@ u64 busaddr_start; if (window->paste_kaddr) { - compute_paste_address(window, &busaddr_start, &len); + vas_win_paste_addr(window, &busaddr_start, &len); unmap_region(window->paste_kaddr, busaddr_start, len); window->paste_kaddr = NULL; kfree(window->paste_addr_name); @@ -198,7 +186,7 @@ * OS/User Window Context (UWC) MMIO Base Address Region for the given window. * Map these bus addresses and save the mapped kernel addresses in @window. */ -int map_winctx_mmio_bars(struct vas_window *window) +static int map_winctx_mmio_bars(struct vas_window *window) { int len; u64 start; @@ -226,7 +214,7 @@ * registers are not sequential. And, we can only write to offsets * with valid registers. */ -void reset_window_regs(struct vas_window *window) +static void reset_window_regs(struct vas_window *window) { write_hvwc_reg(window, VREG(LPID), 0ULL); write_hvwc_reg(window, VREG(PID), 0ULL); @@ -369,7 +357,8 @@ * as a one-time task? That could work for NX but what about other * receivers? Let the receivers tell us the rx-fifo buffers for now. */ -int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) +static void init_winctx_regs(struct vas_window *window, + struct vas_winctx *winctx) { u64 val; int fifo_size; @@ -387,7 +376,7 @@ init_xlate_regs(window, winctx->user_win); val = 0ULL; - val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0); + val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id); write_hvwc_reg(window, VREG(FAULT_TX_WIN), val); /* In PowerNV, interrupts go to HV. */ @@ -414,7 +403,7 @@ * * See also: Design note in function header. */ - val = __pa(winctx->rx_fifo); + val = winctx->rx_fifo; val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0); write_hvwc_reg(window, VREG(LFIFO_BAR), val); @@ -511,8 +500,6 @@ val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win); val = SET_FIELD(VAS_WINCTL_OPEN, val, 1); write_hvwc_reg(window, VREG(WINCTL), val); - - return 0; } static void vas_release_window_id(struct ida *ida, int winid) @@ -750,7 +737,7 @@ */ winctx->fifo_disable = true; winctx->intr_disable = true; - winctx->rx_fifo = NULL; + winctx->rx_fifo = 0; } winctx->lnotify_lpid = rxattr->lnotify_lpid; @@ -762,6 +749,8 @@ winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (rxwin->vinst->virq) + winctx->irq_port = rxwin->vinst->irq_port; } static bool rx_win_args_valid(enum vas_cop_type cop, @@ -782,7 +771,7 @@ if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) return false; - if (attr->wcreds_max > VAS_RX_WCREDS_MAX) + if (!attr->wcreds_max) return false; if (attr->nx_win) { @@ -827,7 +816,8 @@ { memset(rxattr, 0, sizeof(*rxattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { rxattr->pin_win = true; rxattr->nx_win = true; rxattr->fault_win = false; @@ -841,9 +831,9 @@ rxattr->fault_win = true; rxattr->notify_disable = true; rxattr->rx_wcred_mode = true; - rxattr->tx_wcred_mode = true; rxattr->rx_win_ord_mode = true; - rxattr->tx_win_ord_mode = true; + rxattr->rej_no_credit = true; + rxattr->tc_mode = VAS_THRESH_DISABLED; } else if (cop == VAS_COP_TYPE_FTW) { rxattr->user_win = true; rxattr->intr_disable = true; @@ -887,9 +877,7 @@ rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; - if (rxattr->user_win) - rxwin->pid = task_pid_vnr(current); + rxwin->wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); @@ -904,7 +892,8 @@ { memset(txattr, 0, sizeof(*txattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { txattr->rej_no_credit = false; txattr->rx_wcred_mode = true; txattr->tx_wcred_mode = true; @@ -958,13 +947,22 @@ winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; winctx->rx_win_id = txwin->rxwin->winid; + /* + * IRQ and fault window setup is successful. Set fault window + * for the send window so that ready to handle faults. + */ + if (txwin->vinst->virq) + winctx->fault_win_id = txwin->vinst->fault_win->winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (txwin->vinst->virq) + winctx->irq_port = txwin->vinst->irq_port; - winctx->pswid = 0; + winctx->pswid = txattr->pswid ? txattr->pswid : + encode_pswid(txwin->vinst->vas_id, txwin->winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -979,9 +977,14 @@ if (attr->wcreds_max > VAS_TX_WCREDS_MAX) return false; - if (attr->user_win && - (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) - return false; + if (attr->user_win) { + if (attr->rsvd_txbuf_count) + return false; + + if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP && + cop != VAS_COP_TYPE_GZIP_HIPRI) + return false; + } return true; } @@ -1030,7 +1033,6 @@ txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; - txwin->pid = attr->pid; txwin->user_win = attr->user_win; txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; @@ -1054,12 +1056,59 @@ } } else { /* - * A user mapping must ensure that context switch issues - * CP_ABORT for this thread. + * Interrupt hanlder or fault window setup failed. Means + * NX can not generate fault for page fault. So not + * opening for user space tx window. */ - rc = set_thread_uses_vas(); - if (rc) + if (!vinst->virq) { + rc = -ENODEV; goto free_window; + } + + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + txwin->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + txwin->mm = get_task_mm(current); + + if (!txwin->mm) { + put_pid(txwin->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + rc = -EPERM; + goto free_window; + } + + mmgrab(txwin->mm); + mmput(txwin->mm); + mm_context_add_vas_window(txwin->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. so takes tgid + * reference until window closed to make sure tgid is not + * reused. + */ + txwin->tgid = find_get_pid(task_tgid_vnr(current)); + /* + * Even a process that has no foreign real address mapping can + * use an unpaired COPY instruction (to no real effect). Issue + * CP_ABORT to clear any pending COPY and prevent a covert + * channel. + * + * __switch_to() will issue CP_ABORT on future context switches + * if process / thread has any open VAS window (Use + * current->mm->context.vas_windows). + */ + asm volatile(PPC_CP_ABORT); } set_vinst_win(vinst, txwin); @@ -1142,6 +1191,7 @@ { u64 val; int creds, mode; + int count = 0; val = read_hvwc_reg(window, VREG(WINCTL)); if (window->tx_win) @@ -1160,10 +1210,27 @@ creds = GET_FIELD(VAS_LRX_WCRED, val); } + /* + * Takes around few milliseconds to complete all pending requests + * and return credits. + * TODO: Scan fault FIFO and invalidate CRBs points to this window + * and issue CRB Kill to stop all pending requests. Need only + * if there is a bug in NX or fault handling in kernel. + */ if (creds < window->wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Process can not close send window until all credits are + * returned. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", + vas_window_pid(window), window->winid, + creds, count); + goto retry; } } @@ -1177,6 +1244,7 @@ { int busy; u64 val; + int count = 0; retry: val = read_hvwc_reg(window, VREG(WIN_STATUS)); @@ -1184,7 +1252,16 @@ if (busy) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(5)); + schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Takes around few milliseconds to process all pending + * requests. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", + vas_window_pid(window), window->winid, count); + goto retry; } } @@ -1249,19 +1326,29 @@ unmap_paste_region(window); - clear_vinst_win(window); - poll_window_busy_state(window); unpin_close_window(window); poll_window_credits(window); + clear_vinst_win(window); + poll_window_castout(window); /* if send window, drop reference to matching receive window */ - if (window->tx_win) + if (window->tx_win) { + if (window->user_win) { + /* Drop references to pid. tgid and mm */ + put_pid(window->pid); + put_pid(window->tgid); + if (window->mm) { + mm_context_remove_vas_window(window->mm); + mmdrop(window->mm); + } + } put_rx_win(window->rxwin); + } vas_window_free(window); @@ -1270,10 +1357,88 @@ EXPORT_SYMBOL_GPL(vas_win_close); /* - * Return a system-wide unique window id for the window @win. + * Return credit for the given window. + * Send windows and fault window uses credit mechanism as follows: + * + * Send windows: + * - The default number of credits available for each send window is + * 1024. It means 1024 requests can be issued asynchronously at the + * same time. If the credit is not available, that request will be + * returned with RMA_Busy. + * - One credit is taken when NX request is issued. + * - This credit is returned after NX processed that request. + * - If NX encounters translation error, kernel will return the + * credit on the specific send window after processing the fault CRB. + * + * Fault window: + * - The total number credits available is FIFO_SIZE/CRB_SIZE. + * Means 4MB/128 in the current implementation. If credit is not + * available, RMA_Reject is returned. + * - A credit is taken when NX pastes CRB in fault FIFO. + * - The kernel with return credit on fault window after reading entry + * from fault FIFO. */ -u32 vas_win_id(struct vas_window *win) +void vas_return_credit(struct vas_window *window, bool tx) { - return encode_pswid(win->vinst->vas_id, win->winid); + uint64_t val; + + val = 0ULL; + if (tx) { /* send window */ + val = SET_FIELD(VAS_TX_WCRED, val, 1); + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val); + } else { + val = SET_FIELD(VAS_LRX_WCRED, val, 1); + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val); + } } -EXPORT_SYMBOL_GPL(vas_win_id); + +struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid) +{ + struct vas_window *window; + int winid; + + if (!pswid) { + pr_devel("%s: called for pswid 0!\n", __func__); + return ERR_PTR(-ESRCH); + } + + decode_pswid(pswid, NULL, &winid); + + if (winid >= VAS_WINDOWS_PER_CHIP) + return ERR_PTR(-ESRCH); + + /* + * If application closes the window before the hardware + * returns the fault CRB, we should wait in vas_win_close() + * for the pending requests. so the window must be active + * and the process alive. + * + * If its a kernel process, we should not get any faults and + * should not get here. + */ + window = vinst->windows[winid]; + + if (!window) { + pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n", + winid, pswid, vinst); + return NULL; + } + + /* + * Do some sanity checks on the decoded window. Window should be + * NX GZIP user send window. FTW windows should not incur faults + * since their CRBs are ignored (not queued on FIFO or processed + * by NX). + */ + if (!window->tx_win || !window->user_win || !window->nx_win || + window->cop == VAS_COP_TYPE_FAULT || + window->cop == VAS_COP_TYPE_FTW) { + pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", + winid, window->tx_win, window->user_win, + window->nx_win, window->cop); + WARN_ON(1); + } + + return window; +} -- Gitblit v1.6.2