// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
/*
|
*
|
* (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the
|
* GNU General Public License version 2 as published by the Free Software
|
* Foundation, and any use by you of this program is subject to the terms
|
* of such GNU license.
|
*
|
* This program is distributed in the hope that it will be useful,
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* GNU General Public License for more details.
|
*
|
* You should have received a copy of the GNU General Public License
|
* along with this program; if not, you can access it online at
|
* http://www.gnu.org/licenses/gpl-2.0.html.
|
*
|
*/
|
|
/**
|
* DOC: Base kernel MMU management.
|
*/
|
|
#include <linux/kernel.h>
|
#include <linux/dma-mapping.h>
|
#include <mali_kbase.h>
|
#include <gpu/mali_kbase_gpu_fault.h>
|
#include <gpu/mali_kbase_gpu_regmap.h>
|
#include <tl/mali_kbase_tracepoints.h>
|
#include <backend/gpu/mali_kbase_instr_defs.h>
|
#include <mali_kbase_ctx_sched.h>
|
#include <mali_kbase_debug.h>
|
#include <mali_kbase_defs.h>
|
#include <mali_kbase_hw.h>
|
#include <mmu/mali_kbase_mmu_hw.h>
|
#include <mali_kbase_mem.h>
|
#include <mali_kbase_reset_gpu.h>
|
#include <mmu/mali_kbase_mmu.h>
|
#include <mmu/mali_kbase_mmu_internal.h>
|
#include <mali_kbase_cs_experimental.h>
|
#include <device/mali_kbase_device.h>
|
|
#include <mali_kbase_trace_gpu_mem.h>
|
#define KBASE_MMU_PAGE_ENTRIES 512
|
|
/**
|
* kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
|
* @kctx: The KBase context.
|
* @vpfn: The virtual page frame number to start the flush on.
|
* @nr: The number of pages to flush.
|
* @sync: Set if the operation should be synchronous or not.
|
*
|
* Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
|
*
|
* If sync is not set then transactions still in flight when the flush is issued
|
* may use the old page tables and the data they write will not be written out
|
* to memory, this function returns after the flush has been issued but
|
* before all accesses which might effect the flushed region have completed.
|
*
|
* If sync is set then accesses in the flushed region will be drained
|
* before data is flush and invalidated through L1, L2 and into memory,
|
* after which point this function will return.
|
*/
|
static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
|
u64 vpfn, size_t nr, bool sync);
|
|
/**
|
* kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
|
* @kbdev: Device pointer.
|
* @vpfn: The virtual page frame number to start the flush on.
|
* @nr: The number of pages to flush.
|
* @sync: Set if the operation should be synchronous or not.
|
* @as_nr: GPU address space number for which flush + invalidate is required.
|
*
|
* This is used for MMU tables which do not belong to a user space context.
|
*/
|
static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
|
u64 vpfn, size_t nr, bool sync, int as_nr);
|
|
/**
|
* kbase_mmu_sync_pgd() - sync page directory to memory when needed.
|
* @kbdev: Device pointer.
|
* @handle: Address of DMA region.
|
* @size: Size of the region to sync.
|
*
|
* This should be called after each page directory update.
|
*/
|
static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
|
dma_addr_t handle, size_t size)
|
{
|
/* In non-coherent system, ensure the GPU can read
|
* the pages from memory
|
*/
|
if (kbdev->system_coherency == COHERENCY_NONE)
|
dma_sync_single_for_device(kbdev->dev, handle, size,
|
DMA_TO_DEVICE);
|
}
|
|
/*
|
* Definitions:
|
* - PGD: Page Directory.
|
* - PTE: Page Table Entry. A 64bit value pointing to the next
|
* level of translation
|
* - ATE: Address Translation Entry. A 64bit value pointing to
|
* a 4kB physical page.
|
*/
|
|
static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
|
struct tagged_addr *phys, size_t nr,
|
unsigned long flags, int group_id);
|
|
/**
|
* reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
|
* a region on a GPU page fault
|
* @kbdev: KBase device
|
* @reg: The region that will be backed with more pages
|
* @fault_rel_pfn: PFN of the fault relative to the start of the region
|
*
|
* This calculates how much to increase the backing of a region by, based on
|
* where a GPU page fault occurred and the flags in the region.
|
*
|
* This can be more than the minimum number of pages that would reach
|
* @fault_rel_pfn, for example to reduce the overall rate of page fault
|
* interrupts on a region, or to ensure that the end address is aligned.
|
*
|
* Return: the number of backed pages to increase by
|
*/
|
static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
|
struct kbase_va_region *reg, size_t fault_rel_pfn)
|
{
|
size_t multiple = reg->extension;
|
size_t reg_current_size = kbase_reg_current_backed_size(reg);
|
size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
|
size_t remainder;
|
|
if (!multiple) {
|
dev_warn(
|
kbdev->dev,
|
"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
|
((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
|
return minimum_extra;
|
}
|
|
/* Calculate the remainder to subtract from minimum_extra to make it
|
* the desired (rounded down) multiple of the extension.
|
* Depending on reg's flags, the base used for calculating multiples is
|
* different
|
*/
|
|
/* multiple is based from the current backed size, even if the
|
* current backed size/pfn for end of committed memory are not
|
* themselves aligned to multiple
|
*/
|
remainder = minimum_extra % multiple;
|
|
#if !MALI_USE_CSF
|
if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
|
/* multiple is based from the top of the initial commit, which
|
* has been allocated in such a way that (start_pfn +
|
* initial_commit) is already aligned to multiple. Hence the
|
* pfn for the end of committed memory will also be aligned to
|
* multiple
|
*/
|
size_t initial_commit = reg->initial_commit;
|
|
if (fault_rel_pfn < initial_commit) {
|
/* this case is just to catch in case it's been
|
* recommitted by userspace to be smaller than the
|
* initial commit
|
*/
|
minimum_extra = initial_commit - reg_current_size;
|
remainder = 0;
|
} else {
|
/* same as calculating
|
* (fault_rel_pfn - initial_commit + 1)
|
*/
|
size_t pages_after_initial = minimum_extra +
|
reg_current_size - initial_commit;
|
|
remainder = pages_after_initial % multiple;
|
}
|
}
|
#endif /* !MALI_USE_CSF */
|
|
if (remainder == 0)
|
return minimum_extra;
|
|
return minimum_extra + multiple - remainder;
|
}
|
|
#ifdef CONFIG_MALI_CINSTR_GWT
|
static void kbase_gpu_mmu_handle_write_faulting_as(
|
struct kbase_device *kbdev,
|
struct kbase_as *faulting_as,
|
u64 start_pfn, size_t nr, u32 op)
|
{
|
mutex_lock(&kbdev->mmu_hw_mutex);
|
|
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn,
|
nr, op, 1);
|
|
mutex_unlock(&kbdev->mmu_hw_mutex);
|
|
kbase_mmu_hw_enable_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
}
|
|
static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
|
struct kbase_as *faulting_as)
|
{
|
struct kbasep_gwt_list_element *pos;
|
struct kbase_va_region *region;
|
struct kbase_device *kbdev;
|
struct kbase_fault *fault;
|
u64 fault_pfn, pfn_offset;
|
u32 op;
|
int ret;
|
int as_no;
|
|
as_no = faulting_as->number;
|
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
|
fault = &faulting_as->pf_data;
|
fault_pfn = fault->addr >> PAGE_SHIFT;
|
|
kbase_gpu_vm_lock(kctx);
|
|
/* Find region and check if it should be writable. */
|
region = kbase_region_tracker_find_region_enclosing_address(kctx,
|
fault->addr);
|
if (kbase_is_region_invalid_or_free(region)) {
|
kbase_gpu_vm_unlock(kctx);
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Memory is not mapped on the GPU",
|
&faulting_as->pf_data);
|
return;
|
}
|
|
if (!(region->flags & KBASE_REG_GPU_WR)) {
|
kbase_gpu_vm_unlock(kctx);
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Region does not have write permissions",
|
&faulting_as->pf_data);
|
return;
|
}
|
|
/* Capture addresses of faulting write location
|
* for job dumping if write tracking is enabled.
|
*/
|
if (kctx->gwt_enabled) {
|
u64 page_addr = fault->addr & PAGE_MASK;
|
bool found = false;
|
/* Check if this write was already handled. */
|
list_for_each_entry(pos, &kctx->gwt_current_list, link) {
|
if (page_addr == pos->page_addr) {
|
found = true;
|
break;
|
}
|
}
|
|
if (!found) {
|
pos = kmalloc(sizeof(*pos), GFP_KERNEL);
|
if (pos) {
|
pos->region = region;
|
pos->page_addr = page_addr;
|
pos->num_pages = 1;
|
list_add(&pos->link, &kctx->gwt_current_list);
|
} else {
|
dev_warn(kbdev->dev, "kmalloc failure");
|
}
|
}
|
}
|
|
pfn_offset = fault_pfn - region->start_pfn;
|
/* Now make this faulting page writable to GPU. */
|
ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
|
&kbase_get_gpu_phy_pages(region)[pfn_offset],
|
1, region->flags, region->gpu_alloc->group_id);
|
|
/* flush L2 and unlock the VA (resumes the MMU) */
|
op = AS_COMMAND_FLUSH_PT;
|
|
kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as,
|
fault_pfn, 1, op);
|
|
kbase_gpu_vm_unlock(kctx);
|
}
|
|
static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
|
struct kbase_as *faulting_as)
|
{
|
struct kbase_fault *fault = &faulting_as->pf_data;
|
|
switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) {
|
case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
|
case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
|
kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
|
break;
|
case AS_FAULTSTATUS_ACCESS_TYPE_EX:
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Execute Permission fault", fault);
|
break;
|
case AS_FAULTSTATUS_ACCESS_TYPE_READ:
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Read Permission fault", fault);
|
break;
|
default:
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Unknown Permission fault", fault);
|
break;
|
}
|
}
|
#endif
|
|
#define MAX_POOL_LEVEL 2
|
|
/**
|
* page_fault_try_alloc - Try to allocate memory from a context pool
|
* @kctx: Context pointer
|
* @region: Region to grow
|
* @new_pages: Number of 4 kB pages to allocate
|
* @pages_to_grow: Pointer to variable to store number of outstanding pages on
|
* failure. This can be either 4 kB or 2 MB pages, depending on
|
* the number of pages requested.
|
* @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
|
* for 2 MB, false for 4 kB.
|
* @prealloc_sas: Pointer to kbase_sub_alloc structures
|
*
|
* This function will try to allocate as many pages as possible from the context
|
* pool, then if required will try to allocate the remaining pages from the
|
* device pool.
|
*
|
* This function will not allocate any new memory beyond that that is already
|
* present in the context or device pools. This is because it is intended to be
|
* called with the vm_lock held, which could cause recursive locking if the
|
* allocation caused the out-of-memory killer to run.
|
*
|
* If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
|
* a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
|
*
|
* Return: true if successful, false on failure
|
*/
|
static bool page_fault_try_alloc(struct kbase_context *kctx,
|
struct kbase_va_region *region, size_t new_pages,
|
int *pages_to_grow, bool *grow_2mb_pool,
|
struct kbase_sub_alloc **prealloc_sas)
|
{
|
struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
|
struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
|
size_t pages_alloced[MAX_POOL_LEVEL] = {0};
|
struct kbase_mem_pool *pool, *root_pool;
|
int pool_level = 0;
|
bool alloc_failed = false;
|
size_t pages_still_required;
|
|
if (WARN_ON(region->gpu_alloc->group_id >=
|
MEMORY_GROUP_MANAGER_NR_GROUPS)) {
|
/* Do not try to grow the memory pool */
|
*pages_to_grow = 0;
|
return false;
|
}
|
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
if (new_pages >= (SZ_2M / SZ_4K)) {
|
root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
|
*grow_2mb_pool = true;
|
} else {
|
#endif
|
root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
|
*grow_2mb_pool = false;
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
}
|
#endif
|
|
if (region->gpu_alloc != region->cpu_alloc)
|
new_pages *= 2;
|
|
pages_still_required = new_pages;
|
|
/* Determine how many pages are in the pools before trying to allocate.
|
* Don't attempt to allocate & free if the allocation can't succeed.
|
*/
|
for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
|
size_t pool_size_4k;
|
|
kbase_mem_pool_lock(pool);
|
|
pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
|
if (pool_size_4k >= pages_still_required)
|
pages_still_required = 0;
|
else
|
pages_still_required -= pool_size_4k;
|
|
kbase_mem_pool_unlock(pool);
|
|
if (!pages_still_required)
|
break;
|
}
|
|
if (pages_still_required) {
|
/* Insufficient pages in pools. Don't try to allocate - just
|
* request a grow.
|
*/
|
*pages_to_grow = pages_still_required;
|
|
return false;
|
}
|
|
/* Since we've dropped the pool locks, the amount of memory in the pools
|
* may change between the above check and the actual allocation.
|
*/
|
pool = root_pool;
|
for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
|
size_t pool_size_4k;
|
size_t pages_to_alloc_4k;
|
size_t pages_to_alloc_4k_per_alloc;
|
|
kbase_mem_pool_lock(pool);
|
|
/* Allocate as much as possible from this pool*/
|
pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
|
pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
|
if (region->gpu_alloc == region->cpu_alloc)
|
pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
|
else
|
pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
|
|
pages_alloced[pool_level] = pages_to_alloc_4k;
|
if (pages_to_alloc_4k) {
|
gpu_pages[pool_level] =
|
kbase_alloc_phy_pages_helper_locked(
|
region->gpu_alloc, pool,
|
pages_to_alloc_4k_per_alloc,
|
&prealloc_sas[0]);
|
|
if (!gpu_pages[pool_level]) {
|
alloc_failed = true;
|
} else if (region->gpu_alloc != region->cpu_alloc) {
|
cpu_pages[pool_level] =
|
kbase_alloc_phy_pages_helper_locked(
|
region->cpu_alloc, pool,
|
pages_to_alloc_4k_per_alloc,
|
&prealloc_sas[1]);
|
|
if (!cpu_pages[pool_level])
|
alloc_failed = true;
|
}
|
}
|
|
kbase_mem_pool_unlock(pool);
|
|
if (alloc_failed) {
|
WARN_ON(!new_pages);
|
WARN_ON(pages_to_alloc_4k >= new_pages);
|
WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
|
break;
|
}
|
|
new_pages -= pages_to_alloc_4k;
|
|
if (!new_pages)
|
break;
|
|
pool = pool->next_pool;
|
if (!pool)
|
break;
|
}
|
|
if (new_pages) {
|
/* Allocation was unsuccessful */
|
int max_pool_level = pool_level;
|
|
pool = root_pool;
|
|
/* Free memory allocated so far */
|
for (pool_level = 0; pool_level <= max_pool_level;
|
pool_level++) {
|
kbase_mem_pool_lock(pool);
|
|
if (region->gpu_alloc != region->cpu_alloc) {
|
if (pages_alloced[pool_level] &&
|
cpu_pages[pool_level])
|
kbase_free_phy_pages_helper_locked(
|
region->cpu_alloc,
|
pool, cpu_pages[pool_level],
|
pages_alloced[pool_level]);
|
}
|
|
if (pages_alloced[pool_level] && gpu_pages[pool_level])
|
kbase_free_phy_pages_helper_locked(
|
region->gpu_alloc,
|
pool, gpu_pages[pool_level],
|
pages_alloced[pool_level]);
|
|
kbase_mem_pool_unlock(pool);
|
|
pool = pool->next_pool;
|
}
|
|
/*
|
* If the allocation failed despite there being enough memory in
|
* the pool, then just fail. Otherwise, try to grow the memory
|
* pool.
|
*/
|
if (alloc_failed)
|
*pages_to_grow = 0;
|
else
|
*pages_to_grow = new_pages;
|
|
return false;
|
}
|
|
/* Allocation was successful. No pages to grow, return success. */
|
*pages_to_grow = 0;
|
|
return true;
|
}
|
|
/* Small wrapper function to factor out GPU-dependent context releasing */
|
static void release_ctx(struct kbase_device *kbdev,
|
struct kbase_context *kctx)
|
{
|
#if MALI_USE_CSF
|
CSTD_UNUSED(kbdev);
|
kbase_ctx_sched_release_ctx_lock(kctx);
|
#else /* MALI_USE_CSF */
|
kbasep_js_runpool_release_ctx(kbdev, kctx);
|
#endif /* MALI_USE_CSF */
|
}
|
|
void kbase_mmu_page_fault_worker(struct work_struct *data)
|
{
|
u64 fault_pfn;
|
u32 fault_status;
|
size_t new_pages;
|
size_t fault_rel_pfn;
|
struct kbase_as *faulting_as;
|
int as_no;
|
struct kbase_context *kctx;
|
struct kbase_device *kbdev;
|
struct kbase_va_region *region;
|
struct kbase_fault *fault;
|
int err;
|
bool grown = false;
|
int pages_to_grow;
|
bool grow_2mb_pool;
|
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
|
int i;
|
size_t current_backed_size;
|
#if MALI_JIT_PRESSURE_LIMIT_BASE
|
size_t pages_trimmed = 0;
|
#endif
|
|
faulting_as = container_of(data, struct kbase_as, work_pagefault);
|
fault = &faulting_as->pf_data;
|
fault_pfn = fault->addr >> PAGE_SHIFT;
|
as_no = faulting_as->number;
|
|
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
|
dev_dbg(kbdev->dev,
|
"Entering %s %pK, fault_pfn %lld, as_no %d\n",
|
__func__, (void *)data, fault_pfn, as_no);
|
|
/* Grab the context that was already refcounted in kbase_mmu_interrupt()
|
* Therefore, it cannot be scheduled out of this AS until we explicitly
|
* release it
|
*/
|
kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no);
|
if (!kctx) {
|
atomic_dec(&kbdev->faults_pending);
|
return;
|
}
|
|
KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
|
|
#if MALI_JIT_PRESSURE_LIMIT_BASE
|
#if !MALI_USE_CSF
|
mutex_lock(&kctx->jctx.lock);
|
#endif
|
#endif
|
|
#ifdef CONFIG_MALI_ARBITER_SUPPORT
|
/* check if we still have GPU */
|
if (unlikely(kbase_is_gpu_removed(kbdev))) {
|
dev_dbg(kbdev->dev,
|
"%s: GPU has been removed\n", __func__);
|
goto fault_done;
|
}
|
#endif
|
|
if (unlikely(fault->protected_mode)) {
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Protected mode fault", fault);
|
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
|
goto fault_done;
|
}
|
|
fault_status = fault->status;
|
switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
|
|
case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
|
/* need to check against the region to handle this one */
|
break;
|
|
case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
|
#ifdef CONFIG_MALI_CINSTR_GWT
|
/* If GWT was ever enabled then we need to handle
|
* write fault pages even if the feature was disabled later.
|
*/
|
if (kctx->gwt_was_enabled) {
|
kbase_gpu_mmu_handle_permission_fault(kctx,
|
faulting_as);
|
goto fault_done;
|
}
|
#endif
|
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Permission failure", fault);
|
goto fault_done;
|
|
case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Translation table bus fault", fault);
|
goto fault_done;
|
|
case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
|
/* nothing to do, but we don't expect this fault currently */
|
dev_warn(kbdev->dev, "Access flag unexpectedly set");
|
goto fault_done;
|
|
case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Address size fault", fault);
|
goto fault_done;
|
|
case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Memory attributes fault", fault);
|
goto fault_done;
|
|
default:
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Unknown fault code", fault);
|
goto fault_done;
|
}
|
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
/* Preallocate memory for the sub-allocation structs if necessary */
|
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
|
prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
|
if (!prealloc_sas[i]) {
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Failed pre-allocating memory for sub-allocations' metadata",
|
fault);
|
goto fault_done;
|
}
|
}
|
#endif /* CONFIG_MALI_2MB_ALLOC */
|
|
page_fault_retry:
|
/* so we have a translation fault,
|
* let's see if it is for growable memory
|
*/
|
kbase_gpu_vm_lock(kctx);
|
|
region = kbase_region_tracker_find_region_enclosing_address(kctx,
|
fault->addr);
|
if (kbase_is_region_invalid_or_free(region)) {
|
kbase_gpu_vm_unlock(kctx);
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Memory is not mapped on the GPU", fault);
|
goto fault_done;
|
}
|
|
if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
|
kbase_gpu_vm_unlock(kctx);
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"DMA-BUF is not mapped on the GPU", fault);
|
goto fault_done;
|
}
|
|
if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) {
|
kbase_gpu_vm_unlock(kctx);
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Bad physical memory group ID", fault);
|
goto fault_done;
|
}
|
|
if ((region->flags & GROWABLE_FLAGS_REQUIRED)
|
!= GROWABLE_FLAGS_REQUIRED) {
|
kbase_gpu_vm_unlock(kctx);
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Memory is not growable", fault);
|
goto fault_done;
|
}
|
|
if ((region->flags & KBASE_REG_DONT_NEED)) {
|
kbase_gpu_vm_unlock(kctx);
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Don't need memory can't be grown", fault);
|
goto fault_done;
|
}
|
|
if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) ==
|
AS_FAULTSTATUS_ACCESS_TYPE_READ)
|
dev_warn(kbdev->dev, "Grow on pagefault while reading");
|
|
/* find the size we need to grow it by
|
* we know the result fit in a size_t due to
|
* kbase_region_tracker_find_region_enclosing_address
|
* validating the fault_address to be within a size_t from the start_pfn
|
*/
|
fault_rel_pfn = fault_pfn - region->start_pfn;
|
|
current_backed_size = kbase_reg_current_backed_size(region);
|
|
if (fault_rel_pfn < current_backed_size) {
|
dev_dbg(kbdev->dev,
|
"Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
|
fault->addr, region->start_pfn,
|
region->start_pfn +
|
current_backed_size);
|
|
mutex_lock(&kbdev->mmu_hw_mutex);
|
|
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
/* [1] in case another page fault occurred while we were
|
* handling the (duplicate) page fault we need to ensure we
|
* don't loose the other page fault as result of us clearing
|
* the MMU IRQ. Therefore, after we clear the MMU IRQ we send
|
* an UNLOCK command that will retry any stalled memory
|
* transaction (which should cause the other page fault to be
|
* raised again).
|
*/
|
kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
|
AS_COMMAND_UNLOCK, 1);
|
|
mutex_unlock(&kbdev->mmu_hw_mutex);
|
|
kbase_mmu_hw_enable_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
kbase_gpu_vm_unlock(kctx);
|
|
goto fault_done;
|
}
|
|
new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
|
|
/* cap to max vsize */
|
new_pages = min(new_pages, region->nr_pages - current_backed_size);
|
dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n",
|
new_pages);
|
|
if (new_pages == 0) {
|
mutex_lock(&kbdev->mmu_hw_mutex);
|
|
/* Duplicate of a fault we've already handled, nothing to do */
|
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
/* See comment [1] about UNLOCK usage */
|
kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
|
AS_COMMAND_UNLOCK, 1);
|
|
mutex_unlock(&kbdev->mmu_hw_mutex);
|
|
kbase_mmu_hw_enable_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
kbase_gpu_vm_unlock(kctx);
|
goto fault_done;
|
}
|
|
pages_to_grow = 0;
|
|
#if MALI_JIT_PRESSURE_LIMIT_BASE
|
if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) {
|
kbase_jit_request_phys_increase(kctx, new_pages);
|
pages_trimmed = new_pages;
|
}
|
#endif
|
|
spin_lock(&kctx->mem_partials_lock);
|
grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
|
&grow_2mb_pool, prealloc_sas);
|
spin_unlock(&kctx->mem_partials_lock);
|
|
if (grown) {
|
u64 pfn_offset;
|
u32 op;
|
|
/* alloc success */
|
WARN_ON(kbase_reg_current_backed_size(region) >
|
region->nr_pages);
|
|
/* set up the new pages */
|
pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
|
/*
|
* Note:
|
* Issuing an MMU operation will unlock the MMU and cause the
|
* translation to be replayed. If the page insertion fails then
|
* rather then trying to continue the context should be killed
|
* so the no_flush version of insert_pages is used which allows
|
* us to unlock the MMU as we see fit.
|
*/
|
err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
|
region->start_pfn + pfn_offset,
|
&kbase_get_gpu_phy_pages(region)[pfn_offset],
|
new_pages, region->flags, region->gpu_alloc->group_id);
|
if (err) {
|
kbase_free_phy_pages_helper(region->gpu_alloc,
|
new_pages);
|
if (region->gpu_alloc != region->cpu_alloc)
|
kbase_free_phy_pages_helper(region->cpu_alloc,
|
new_pages);
|
kbase_gpu_vm_unlock(kctx);
|
/* The locked VA region will be unlocked and the cache
|
* invalidated in here
|
*/
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Page table update failure", fault);
|
goto fault_done;
|
}
|
KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no,
|
(u64)new_pages);
|
trace_mali_mmu_page_fault_grow(region, fault, new_pages);
|
|
#if MALI_INCREMENTAL_RENDERING
|
/* Switch to incremental rendering if we have nearly run out of
|
* memory in a JIT memory allocation.
|
*/
|
if (region->threshold_pages &&
|
kbase_reg_current_backed_size(region) >
|
region->threshold_pages) {
|
|
dev_dbg(kctx->kbdev->dev,
|
"%zu pages exceeded IR threshold %zu\n",
|
new_pages + current_backed_size,
|
region->threshold_pages);
|
|
if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
|
dev_dbg(kctx->kbdev->dev,
|
"Get region %pK for IR\n",
|
(void *)region);
|
kbase_va_region_alloc_get(kctx, region);
|
}
|
}
|
#endif
|
|
/* AS transaction begin */
|
mutex_lock(&kbdev->mmu_hw_mutex);
|
|
/* flush L2 and unlock the VA (resumes the MMU) */
|
op = AS_COMMAND_FLUSH_PT;
|
|
/* clear MMU interrupt - this needs to be done after updating
|
* the page tables but before issuing a FLUSH command. The
|
* FLUSH cmd has a side effect that it restarts stalled memory
|
* transactions in other address spaces which may cause
|
* another fault to occur. If we didn't clear the interrupt at
|
* this stage a new IRQ might not be raised when the GPU finds
|
* a MMU IRQ is already pending.
|
*/
|
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
|
kbase_mmu_hw_do_operation(kbdev, faulting_as,
|
fault->addr >> PAGE_SHIFT,
|
new_pages, op, 1);
|
|
mutex_unlock(&kbdev->mmu_hw_mutex);
|
/* AS transaction end */
|
|
/* reenable this in the mask */
|
kbase_mmu_hw_enable_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_PAGE);
|
|
#ifdef CONFIG_MALI_CINSTR_GWT
|
if (kctx->gwt_enabled) {
|
/* GWT also tracks growable regions. */
|
struct kbasep_gwt_list_element *pos;
|
|
pos = kmalloc(sizeof(*pos), GFP_KERNEL);
|
if (pos) {
|
pos->region = region;
|
pos->page_addr = (region->start_pfn +
|
pfn_offset) <<
|
PAGE_SHIFT;
|
pos->num_pages = new_pages;
|
list_add(&pos->link,
|
&kctx->gwt_current_list);
|
} else {
|
dev_warn(kbdev->dev, "kmalloc failure");
|
}
|
}
|
#endif
|
|
#if MALI_JIT_PRESSURE_LIMIT_BASE
|
if (pages_trimmed) {
|
kbase_jit_done_phys_increase(kctx, pages_trimmed);
|
pages_trimmed = 0;
|
}
|
#endif
|
kbase_gpu_vm_unlock(kctx);
|
} else {
|
int ret = -ENOMEM;
|
|
kbase_gpu_vm_unlock(kctx);
|
|
/* If the memory pool was insufficient then grow it and retry.
|
* Otherwise fail the allocation.
|
*/
|
if (pages_to_grow > 0) {
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
if (grow_2mb_pool) {
|
/* Round page requirement up to nearest 2 MB */
|
struct kbase_mem_pool *const lp_mem_pool =
|
&kctx->mem_pools.large[
|
region->gpu_alloc->group_id];
|
|
pages_to_grow = (pages_to_grow +
|
((1 << lp_mem_pool->order) - 1))
|
>> lp_mem_pool->order;
|
|
ret = kbase_mem_pool_grow(lp_mem_pool,
|
pages_to_grow);
|
} else {
|
#endif
|
struct kbase_mem_pool *const mem_pool =
|
&kctx->mem_pools.small[
|
region->gpu_alloc->group_id];
|
|
ret = kbase_mem_pool_grow(mem_pool,
|
pages_to_grow);
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
}
|
#endif
|
}
|
if (ret < 0) {
|
/* failed to extend, handle as a normal PF */
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Page allocation failure", fault);
|
} else {
|
dev_dbg(kbdev->dev, "Try again after pool_grow\n");
|
goto page_fault_retry;
|
}
|
}
|
|
fault_done:
|
#if MALI_JIT_PRESSURE_LIMIT_BASE
|
if (pages_trimmed) {
|
kbase_gpu_vm_lock(kctx);
|
kbase_jit_done_phys_increase(kctx, pages_trimmed);
|
kbase_gpu_vm_unlock(kctx);
|
}
|
#if !MALI_USE_CSF
|
mutex_unlock(&kctx->jctx.lock);
|
#endif
|
#endif
|
|
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
|
kfree(prealloc_sas[i]);
|
|
/*
|
* By this point, the fault was handled in some way,
|
* so release the ctx refcount
|
*/
|
release_ctx(kbdev, kctx);
|
|
atomic_dec(&kbdev->faults_pending);
|
dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
|
}
|
|
static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut)
|
{
|
u64 *page;
|
int i;
|
struct page *p;
|
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
p = kbase_mem_pool_alloc(&kbdev->mem_pools.large[mmut->group_id]);
|
#else /* CONFIG_MALI_2MB_ALLOC */
|
p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
|
#endif /* CONFIG_MALI_2MB_ALLOC */
|
if (!p)
|
return 0;
|
|
page = kmap(p);
|
if (page == NULL)
|
goto alloc_free;
|
|
/* If the MMU tables belong to a context then account the memory usage
|
* to that context, otherwise the MMU tables are device wide and are
|
* only accounted to the device.
|
*/
|
if (mmut->kctx) {
|
int new_page_count;
|
|
new_page_count = atomic_add_return(1,
|
&mmut->kctx->used_pages);
|
KBASE_TLSTREAM_AUX_PAGESALLOC(
|
kbdev,
|
mmut->kctx->id,
|
(u64)new_page_count);
|
kbase_process_page_usage_inc(mmut->kctx, 1);
|
}
|
|
atomic_add(1, &kbdev->memdev.used_pages);
|
|
kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
|
|
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
|
kbdev->mmu_mode->entry_invalidate(&page[i]);
|
|
kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
|
|
kunmap(p);
|
return page_to_phys(p);
|
|
alloc_free:
|
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], p, false);
|
#else /* CONFIG_MALI_2MB_ALLOC */
|
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false);
|
#endif /* CONFIG_MALI_2MB_ALLOC */
|
|
return 0;
|
}
|
|
/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
|
* new table from the pool if needed and possible
|
*/
|
static int mmu_get_next_pgd(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut,
|
phys_addr_t *pgd, u64 vpfn, int level)
|
{
|
u64 *page;
|
phys_addr_t target_pgd;
|
struct page *p;
|
|
KBASE_DEBUG_ASSERT(*pgd);
|
|
lockdep_assert_held(&mmut->mmu_lock);
|
|
/*
|
* Architecture spec defines level-0 as being the top-most.
|
* This is a bit unfortunate here, but we keep the same convention.
|
*/
|
vpfn >>= (3 - level) * 9;
|
vpfn &= 0x1FF;
|
|
p = pfn_to_page(PFN_DOWN(*pgd));
|
page = kmap(p);
|
if (page == NULL) {
|
dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
|
return -EINVAL;
|
}
|
|
target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
|
|
if (!target_pgd) {
|
target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
|
if (!target_pgd) {
|
dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
|
__func__);
|
kunmap(p);
|
return -ENOMEM;
|
}
|
|
kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
|
|
kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
|
/* Rely on the caller to update the address space flags. */
|
}
|
|
kunmap(p);
|
*pgd = target_pgd;
|
|
return 0;
|
}
|
|
/*
|
* Returns the PGD for the specified level of translation
|
*/
|
static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut,
|
u64 vpfn,
|
int level,
|
phys_addr_t *out_pgd)
|
{
|
phys_addr_t pgd;
|
int l;
|
|
lockdep_assert_held(&mmut->mmu_lock);
|
pgd = mmut->pgd;
|
|
for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
|
int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
|
/* Handle failure condition */
|
if (err) {
|
dev_dbg(kbdev->dev,
|
"%s: mmu_get_next_pgd failure at level %d\n",
|
__func__, l);
|
return err;
|
}
|
}
|
|
*out_pgd = pgd;
|
|
return 0;
|
}
|
|
static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut,
|
u64 vpfn,
|
phys_addr_t *out_pgd)
|
{
|
return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
|
out_pgd);
|
}
|
|
static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut,
|
u64 from_vpfn, u64 to_vpfn)
|
{
|
phys_addr_t pgd;
|
u64 vpfn = from_vpfn;
|
struct kbase_mmu_mode const *mmu_mode;
|
|
/* 64-bit address range is the max */
|
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
|
KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
|
|
lockdep_assert_held(&mmut->mmu_lock);
|
|
mmu_mode = kbdev->mmu_mode;
|
|
while (vpfn < to_vpfn) {
|
unsigned int i;
|
unsigned int idx = vpfn & 0x1FF;
|
unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
|
unsigned int pcount = 0;
|
unsigned int left = to_vpfn - vpfn;
|
int level;
|
u64 *page;
|
|
if (count > left)
|
count = left;
|
|
/* need to check if this is a 2MB page or a 4kB */
|
pgd = mmut->pgd;
|
|
for (level = MIDGARD_MMU_TOPLEVEL;
|
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
|
idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
|
page = kmap(phys_to_page(pgd));
|
if (mmu_mode->ate_is_valid(page[idx], level))
|
break; /* keep the mapping */
|
kunmap(phys_to_page(pgd));
|
pgd = mmu_mode->pte_to_phy_addr(page[idx]);
|
}
|
|
switch (level) {
|
case MIDGARD_MMU_LEVEL(2):
|
/* remap to single entry to update */
|
pcount = 1;
|
break;
|
case MIDGARD_MMU_BOTTOMLEVEL:
|
/* page count is the same as the logical count */
|
pcount = count;
|
break;
|
default:
|
dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
|
__func__, level);
|
goto next;
|
}
|
|
/* Invalidate the entries we added */
|
for (i = 0; i < pcount; i++)
|
mmu_mode->entry_invalidate(&page[idx + i]);
|
|
kbase_mmu_sync_pgd(kbdev,
|
kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
|
8 * pcount);
|
kunmap(phys_to_page(pgd));
|
|
next:
|
vpfn += count;
|
}
|
}
|
|
/*
|
* Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
|
*/
|
int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
|
struct tagged_addr phys, size_t nr,
|
unsigned long flags, int const group_id)
|
{
|
phys_addr_t pgd;
|
u64 *pgd_page;
|
/* In case the insert_single_page only partially completes
|
* we need to be able to recover
|
*/
|
bool recover_required = false;
|
u64 start_vpfn = vpfn;
|
size_t recover_count = 0;
|
size_t remain = nr;
|
int err;
|
struct kbase_device *kbdev;
|
|
if (WARN_ON(kctx == NULL))
|
return -EINVAL;
|
|
/* 64-bit address range is the max */
|
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
|
|
kbdev = kctx->kbdev;
|
|
/* Early out if there is nothing to do */
|
if (nr == 0)
|
return 0;
|
|
mutex_lock(&kctx->mmu.mmu_lock);
|
|
while (remain) {
|
unsigned int i;
|
unsigned int index = vpfn & 0x1FF;
|
unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
|
struct page *p;
|
|
if (count > remain)
|
count = remain;
|
|
/*
|
* Repeatedly calling mmu_get_bottom_pte() is clearly
|
* suboptimal. We don't have to re-parse the whole tree
|
* each time (just cache the l0-l2 sequence).
|
* On the other hand, it's only a gain when we map more than
|
* 256 pages at once (on average). Do we really care?
|
*/
|
do {
|
err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
|
vpfn, &pgd);
|
if (err != -ENOMEM)
|
break;
|
/* Fill the memory pool with enough pages for
|
* the page walk to succeed
|
*/
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
err = kbase_mem_pool_grow(
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
&kbdev->mem_pools.large[
|
#else
|
&kbdev->mem_pools.small[
|
#endif
|
kctx->mmu.group_id],
|
MIDGARD_MMU_BOTTOMLEVEL);
|
mutex_lock(&kctx->mmu.mmu_lock);
|
} while (!err);
|
if (err) {
|
dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
|
if (recover_required) {
|
/* Invalidate the pages we have partially
|
* completed
|
*/
|
mmu_insert_pages_failure_recovery(kbdev,
|
&kctx->mmu,
|
start_vpfn,
|
start_vpfn + recover_count);
|
}
|
goto fail_unlock;
|
}
|
|
p = pfn_to_page(PFN_DOWN(pgd));
|
pgd_page = kmap(p);
|
if (!pgd_page) {
|
dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
|
if (recover_required) {
|
/* Invalidate the pages we have partially
|
* completed
|
*/
|
mmu_insert_pages_failure_recovery(kbdev,
|
&kctx->mmu,
|
start_vpfn,
|
start_vpfn + recover_count);
|
}
|
err = -ENOMEM;
|
goto fail_unlock;
|
}
|
|
for (i = 0; i < count; i++) {
|
unsigned int ofs = index + i;
|
|
/* Fail if the current page is a valid ATE entry */
|
KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
|
|
pgd_page[ofs] = kbase_mmu_create_ate(kbdev,
|
phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id);
|
}
|
|
vpfn += count;
|
remain -= count;
|
|
kbase_mmu_sync_pgd(kbdev,
|
kbase_dma_addr(p) + (index * sizeof(u64)),
|
count * sizeof(u64));
|
|
kunmap(p);
|
/* We have started modifying the page table.
|
* If further pages need inserting and fail we need to undo what
|
* has already taken place
|
*/
|
recover_required = true;
|
recover_count += count;
|
}
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false);
|
return 0;
|
|
fail_unlock:
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false);
|
return err;
|
}
|
|
static inline void cleanup_empty_pte(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut, u64 *pte)
|
{
|
phys_addr_t tmp_pgd;
|
struct page *tmp_p;
|
|
tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
|
tmp_p = phys_to_page(tmp_pgd);
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id],
|
#else
|
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
|
#endif
|
tmp_p, false);
|
|
/* If the MMU tables belong to a context then we accounted the memory
|
* usage to that context, so decrement here.
|
*/
|
if (mmut->kctx) {
|
kbase_process_page_usage_dec(mmut->kctx, 1);
|
atomic_sub(1, &mmut->kctx->used_pages);
|
}
|
atomic_sub(1, &kbdev->memdev.used_pages);
|
|
kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
|
}
|
|
u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
|
struct tagged_addr const phy, unsigned long const flags,
|
int const level, int const group_id)
|
{
|
u64 entry;
|
|
kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level);
|
return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev,
|
group_id, level, entry);
|
}
|
|
int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut,
|
const u64 start_vpfn,
|
struct tagged_addr *phys, size_t nr,
|
unsigned long flags,
|
int const group_id)
|
{
|
phys_addr_t pgd;
|
u64 *pgd_page;
|
u64 insert_vpfn = start_vpfn;
|
size_t remain = nr;
|
int err;
|
struct kbase_mmu_mode const *mmu_mode;
|
|
/* Note that 0 is a valid start_vpfn */
|
/* 64-bit address range is the max */
|
KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
|
|
mmu_mode = kbdev->mmu_mode;
|
|
/* Early out if there is nothing to do */
|
if (nr == 0)
|
return 0;
|
|
mutex_lock(&mmut->mmu_lock);
|
|
while (remain) {
|
unsigned int i;
|
unsigned int vindex = insert_vpfn & 0x1FF;
|
unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
|
struct page *p;
|
int cur_level;
|
|
if (count > remain)
|
count = remain;
|
|
if (!vindex && is_huge_head(*phys))
|
cur_level = MIDGARD_MMU_LEVEL(2);
|
else
|
cur_level = MIDGARD_MMU_BOTTOMLEVEL;
|
|
/*
|
* Repeatedly calling mmu_get_pgd_at_level() is clearly
|
* suboptimal. We don't have to re-parse the whole tree
|
* each time (just cache the l0-l2 sequence).
|
* On the other hand, it's only a gain when we map more than
|
* 256 pages at once (on average). Do we really care?
|
*/
|
do {
|
err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
|
cur_level, &pgd);
|
if (err != -ENOMEM)
|
break;
|
/* Fill the memory pool with enough pages for
|
* the page walk to succeed
|
*/
|
mutex_unlock(&mmut->mmu_lock);
|
err = kbase_mem_pool_grow(
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
&kbdev->mem_pools.large[mmut->group_id],
|
#else
|
&kbdev->mem_pools.small[mmut->group_id],
|
#endif
|
cur_level);
|
mutex_lock(&mmut->mmu_lock);
|
} while (!err);
|
|
if (err) {
|
dev_warn(kbdev->dev,
|
"%s: mmu_get_bottom_pgd failure\n", __func__);
|
if (insert_vpfn != start_vpfn) {
|
/* Invalidate the pages we have partially
|
* completed
|
*/
|
mmu_insert_pages_failure_recovery(kbdev,
|
mmut, start_vpfn, insert_vpfn);
|
}
|
goto fail_unlock;
|
}
|
|
p = pfn_to_page(PFN_DOWN(pgd));
|
pgd_page = kmap(p);
|
if (!pgd_page) {
|
dev_warn(kbdev->dev, "%s: kmap failure\n",
|
__func__);
|
if (insert_vpfn != start_vpfn) {
|
/* Invalidate the pages we have partially
|
* completed
|
*/
|
mmu_insert_pages_failure_recovery(kbdev,
|
mmut, start_vpfn, insert_vpfn);
|
}
|
err = -ENOMEM;
|
goto fail_unlock;
|
}
|
|
if (cur_level == MIDGARD_MMU_LEVEL(2)) {
|
int level_index = (insert_vpfn >> 9) & 0x1FF;
|
u64 *target = &pgd_page[level_index];
|
|
if (mmu_mode->pte_is_valid(*target, cur_level))
|
cleanup_empty_pte(kbdev, mmut, target);
|
*target = kbase_mmu_create_ate(kbdev, *phys, flags,
|
cur_level, group_id);
|
} else {
|
for (i = 0; i < count; i++) {
|
unsigned int ofs = vindex + i;
|
u64 *target = &pgd_page[ofs];
|
|
/* Warn if the current page is a valid ATE
|
* entry. The page table shouldn't have anything
|
* in the place where we are trying to put a
|
* new entry. Modification to page table entries
|
* should be performed with
|
* kbase_mmu_update_pages()
|
*/
|
WARN_ON((*target & 1UL) != 0);
|
|
*target = kbase_mmu_create_ate(kbdev,
|
phys[i], flags, cur_level, group_id);
|
}
|
}
|
|
phys += count;
|
insert_vpfn += count;
|
remain -= count;
|
|
kbase_mmu_sync_pgd(kbdev,
|
kbase_dma_addr(p) + (vindex * sizeof(u64)),
|
count * sizeof(u64));
|
|
kunmap(p);
|
}
|
|
err = 0;
|
|
fail_unlock:
|
mutex_unlock(&mmut->mmu_lock);
|
return err;
|
}
|
|
/*
|
* Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
|
* number 'as_nr'.
|
*/
|
int kbase_mmu_insert_pages(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut, u64 vpfn,
|
struct tagged_addr *phys, size_t nr,
|
unsigned long flags, int as_nr, int const group_id)
|
{
|
int err;
|
|
err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
|
phys, nr, flags, group_id);
|
|
if (mmut->kctx)
|
kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
|
else
|
kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false,
|
as_nr);
|
|
return err;
|
}
|
|
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
|
|
/**
|
* kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
|
* without retaining the kbase context.
|
* @kctx: The KBase context.
|
* @vpfn: The virtual page frame number to start the flush on.
|
* @nr: The number of pages to flush.
|
* @sync: Set if the operation should be synchronous or not.
|
*
|
* As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
|
* other locking.
|
*/
|
static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
|
u64 vpfn, size_t nr, bool sync)
|
{
|
struct kbase_device *kbdev = kctx->kbdev;
|
int err;
|
u32 op;
|
|
/* Early out if there is nothing to do */
|
if (nr == 0)
|
return;
|
|
if (sync)
|
op = AS_COMMAND_FLUSH_MEM;
|
else
|
op = AS_COMMAND_FLUSH_PT;
|
|
err = kbase_mmu_hw_do_operation(kbdev,
|
&kbdev->as[kctx->as_nr],
|
vpfn, nr, op, 0);
|
if (err) {
|
/* Flush failed to complete, assume the
|
* GPU has hung and perform a reset to recover
|
*/
|
dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
|
|
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
|
kbase_reset_gpu_locked(kbdev);
|
}
|
}
|
|
/* Perform a flush/invalidate on a particular address space
|
*/
|
static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
|
struct kbase_as *as,
|
u64 vpfn, size_t nr, bool sync)
|
{
|
int err;
|
u32 op;
|
bool gpu_powered;
|
unsigned long flags;
|
|
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
|
gpu_powered = kbdev->pm.backend.gpu_powered;
|
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
|
|
/* GPU is off so there's no need to perform flush/invalidate.
|
* But even if GPU is not actually powered down, after gpu_powered flag
|
* was set to false, it is still safe to skip the flush/invalidate.
|
* The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE
|
* which is sent when address spaces are restored after gpu_powered flag
|
* is set to true. Flushing of L2 cache is certainly not required as L2
|
* cache is definitely off if gpu_powered is false.
|
*/
|
if (!gpu_powered)
|
return;
|
|
if (kbase_pm_context_active_handle_suspend(kbdev,
|
KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
|
/* GPU has just been powered off due to system suspend.
|
* So again, no need to perform flush/invalidate.
|
*/
|
return;
|
}
|
|
/* AS transaction begin */
|
mutex_lock(&kbdev->mmu_hw_mutex);
|
|
if (sync)
|
op = AS_COMMAND_FLUSH_MEM;
|
else
|
op = AS_COMMAND_FLUSH_PT;
|
|
err = kbase_mmu_hw_do_operation(kbdev,
|
as, vpfn, nr, op, 0);
|
|
if (err) {
|
/* Flush failed to complete, assume the GPU has hung and
|
* perform a reset to recover
|
*/
|
dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
|
|
if (kbase_prepare_to_reset_gpu(
|
kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
|
kbase_reset_gpu(kbdev);
|
}
|
|
mutex_unlock(&kbdev->mmu_hw_mutex);
|
/* AS transaction end */
|
|
kbase_pm_context_idle(kbdev);
|
}
|
|
static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
|
u64 vpfn, size_t nr, bool sync, int as_nr)
|
{
|
/* Skip if there is nothing to do */
|
if (nr) {
|
kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
|
nr, sync);
|
}
|
}
|
|
static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
|
u64 vpfn, size_t nr, bool sync)
|
{
|
struct kbase_device *kbdev;
|
bool ctx_is_in_runpool;
|
|
/* Early out if there is nothing to do */
|
if (nr == 0)
|
return;
|
|
kbdev = kctx->kbdev;
|
#if !MALI_USE_CSF
|
mutex_lock(&kbdev->js_data.queue_mutex);
|
ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
|
mutex_unlock(&kbdev->js_data.queue_mutex);
|
#else
|
ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
|
#endif /* !MALI_USE_CSF */
|
|
if (ctx_is_in_runpool) {
|
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
|
|
kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
|
vpfn, nr, sync);
|
|
release_ctx(kbdev, kctx);
|
}
|
}
|
|
void kbase_mmu_update(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut,
|
int as_nr)
|
{
|
lockdep_assert_held(&kbdev->hwaccess_lock);
|
lockdep_assert_held(&kbdev->mmu_hw_mutex);
|
KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
|
|
kbdev->mmu_mode->update(kbdev, mmut, as_nr);
|
}
|
KBASE_EXPORT_TEST_API(kbase_mmu_update);
|
|
void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
|
{
|
lockdep_assert_held(&kbdev->hwaccess_lock);
|
lockdep_assert_held(&kbdev->mmu_hw_mutex);
|
|
kbdev->mmu_mode->disable_as(kbdev, as_nr);
|
}
|
|
void kbase_mmu_disable(struct kbase_context *kctx)
|
{
|
/* ASSERT that the context has a valid as_nr, which is only the case
|
* when it's scheduled in.
|
*
|
* as_nr won't change because the caller has the hwaccess_lock
|
*/
|
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
|
|
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
|
lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
|
|
/*
|
* The address space is being disabled, drain all knowledge of it out
|
* from the caches as pages and page tables might be freed after this.
|
*
|
* The job scheduler code will already be holding the locks and context
|
* so just do the flush.
|
*/
|
kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
|
|
kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
|
}
|
KBASE_EXPORT_TEST_API(kbase_mmu_disable);
|
|
/*
|
* We actually only discard the ATE, and not the page table
|
* pages. There is a potential DoS here, as we'll leak memory by
|
* having PTEs that are potentially unused. Will require physical
|
* page accounting, so MMU pages are part of the process allocation.
|
*
|
* IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
|
* currently scheduled into the runpool, and so potentially uses a lot of locks.
|
* These locks must be taken in the correct order with respect to others
|
* already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
|
* information.
|
*/
|
int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
|
{
|
phys_addr_t pgd;
|
u64 start_vpfn = vpfn;
|
size_t requested_nr = nr;
|
struct kbase_mmu_mode const *mmu_mode;
|
int err = -EFAULT;
|
|
if (nr == 0) {
|
/* early out if nothing to do */
|
return 0;
|
}
|
|
mutex_lock(&mmut->mmu_lock);
|
|
mmu_mode = kbdev->mmu_mode;
|
|
while (nr) {
|
unsigned int i;
|
unsigned int index = vpfn & 0x1FF;
|
unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
|
unsigned int pcount;
|
int level;
|
u64 *page;
|
|
if (count > nr)
|
count = nr;
|
|
/* need to check if this is a 2MB or a 4kB page */
|
pgd = mmut->pgd;
|
|
for (level = MIDGARD_MMU_TOPLEVEL;
|
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
|
phys_addr_t next_pgd;
|
|
index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
|
page = kmap(phys_to_page(pgd));
|
if (mmu_mode->ate_is_valid(page[index], level))
|
break; /* keep the mapping */
|
else if (!mmu_mode->pte_is_valid(page[index], level)) {
|
/* nothing here, advance */
|
switch (level) {
|
case MIDGARD_MMU_LEVEL(0):
|
count = 134217728;
|
break;
|
case MIDGARD_MMU_LEVEL(1):
|
count = 262144;
|
break;
|
case MIDGARD_MMU_LEVEL(2):
|
count = 512;
|
break;
|
case MIDGARD_MMU_LEVEL(3):
|
count = 1;
|
break;
|
}
|
if (count > nr)
|
count = nr;
|
goto next;
|
}
|
next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
|
kunmap(phys_to_page(pgd));
|
pgd = next_pgd;
|
}
|
|
switch (level) {
|
case MIDGARD_MMU_LEVEL(0):
|
case MIDGARD_MMU_LEVEL(1):
|
dev_warn(kbdev->dev,
|
"%s: No support for ATEs at level %d\n",
|
__func__, level);
|
kunmap(phys_to_page(pgd));
|
goto out;
|
case MIDGARD_MMU_LEVEL(2):
|
/* can only teardown if count >= 512 */
|
if (count >= 512) {
|
pcount = 1;
|
} else {
|
dev_warn(kbdev->dev,
|
"%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
|
__func__, count);
|
pcount = 0;
|
}
|
break;
|
case MIDGARD_MMU_BOTTOMLEVEL:
|
/* page count is the same as the logical count */
|
pcount = count;
|
break;
|
default:
|
dev_err(kbdev->dev,
|
"%s: found non-mapped memory, early out\n",
|
__func__);
|
vpfn += count;
|
nr -= count;
|
continue;
|
}
|
|
/* Invalidate the entries we added */
|
for (i = 0; i < pcount; i++)
|
mmu_mode->entry_invalidate(&page[index + i]);
|
|
kbase_mmu_sync_pgd(kbdev,
|
kbase_dma_addr(phys_to_page(pgd)) +
|
8 * index, 8*pcount);
|
|
next:
|
kunmap(phys_to_page(pgd));
|
vpfn += count;
|
nr -= count;
|
}
|
err = 0;
|
out:
|
mutex_unlock(&mmut->mmu_lock);
|
|
if (mmut->kctx)
|
kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr,
|
true);
|
else
|
kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr,
|
true, as_nr);
|
|
return err;
|
}
|
|
KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
|
|
/**
|
* kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU
|
*
|
* This will update page table entries that already exist on the GPU based on
|
* the new flags that are passed. It is used as a response to the changes of
|
* the memory attributes
|
*
|
* The caller is responsible for validating the memory attributes
|
*
|
* @kctx: Kbase context
|
* @vpfn: Virtual PFN (Page Frame Number) of the first page to update
|
* @phys: Tagged physical addresses of the physical pages to replace the
|
* current mappings
|
* @nr: Number of pages to update
|
* @flags: Flags
|
* @group_id: The physical memory group in which the page was allocated.
|
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
|
*/
|
static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
|
struct tagged_addr *phys, size_t nr,
|
unsigned long flags, int const group_id)
|
{
|
phys_addr_t pgd;
|
u64 *pgd_page;
|
int err;
|
struct kbase_device *kbdev;
|
|
if (WARN_ON(kctx == NULL))
|
return -EINVAL;
|
|
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
|
|
/* Early out if there is nothing to do */
|
if (nr == 0)
|
return 0;
|
|
mutex_lock(&kctx->mmu.mmu_lock);
|
|
kbdev = kctx->kbdev;
|
|
while (nr) {
|
unsigned int i;
|
unsigned int index = vpfn & 0x1FF;
|
size_t count = KBASE_MMU_PAGE_ENTRIES - index;
|
struct page *p;
|
|
if (count > nr)
|
count = nr;
|
|
do {
|
err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
|
vpfn, &pgd);
|
if (err != -ENOMEM)
|
break;
|
/* Fill the memory pool with enough pages for
|
* the page walk to succeed
|
*/
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
err = kbase_mem_pool_grow(
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
&kbdev->mem_pools.large[
|
#else
|
&kbdev->mem_pools.small[
|
#endif
|
kctx->mmu.group_id],
|
MIDGARD_MMU_BOTTOMLEVEL);
|
mutex_lock(&kctx->mmu.mmu_lock);
|
} while (!err);
|
if (err) {
|
dev_warn(kbdev->dev,
|
"mmu_get_bottom_pgd failure\n");
|
goto fail_unlock;
|
}
|
|
p = pfn_to_page(PFN_DOWN(pgd));
|
pgd_page = kmap(p);
|
if (!pgd_page) {
|
dev_warn(kbdev->dev, "kmap failure\n");
|
err = -ENOMEM;
|
goto fail_unlock;
|
}
|
|
for (i = 0; i < count; i++)
|
pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
|
phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
|
group_id);
|
|
phys += count;
|
vpfn += count;
|
nr -= count;
|
|
kbase_mmu_sync_pgd(kbdev,
|
kbase_dma_addr(p) + (index * sizeof(u64)),
|
count * sizeof(u64));
|
|
kunmap(pfn_to_page(PFN_DOWN(pgd)));
|
}
|
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
return 0;
|
|
fail_unlock:
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
return err;
|
}
|
|
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
|
struct tagged_addr *phys, size_t nr,
|
unsigned long flags, int const group_id)
|
{
|
int err;
|
|
err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags,
|
group_id);
|
kbase_mmu_flush_invalidate(kctx, vpfn, nr, true);
|
return err;
|
}
|
|
static void mmu_teardown_level(struct kbase_device *kbdev,
|
struct kbase_mmu_table *mmut, phys_addr_t pgd,
|
int level, u64 *pgd_page_buffer)
|
{
|
phys_addr_t target_pgd;
|
struct page *p;
|
u64 *pgd_page;
|
int i;
|
struct kbase_mmu_mode const *mmu_mode;
|
|
lockdep_assert_held(&mmut->mmu_lock);
|
|
pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
|
/* kmap_atomic should NEVER fail. */
|
if (WARN_ON(pgd_page == NULL))
|
return;
|
/* Copy the page to our preallocated buffer so that we can minimize
|
* kmap_atomic usage
|
*/
|
memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
|
kunmap_atomic(pgd_page);
|
pgd_page = pgd_page_buffer;
|
|
mmu_mode = kbdev->mmu_mode;
|
|
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
|
target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
|
|
if (target_pgd) {
|
if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
|
mmu_teardown_level(kbdev, mmut,
|
target_pgd,
|
level + 1,
|
pgd_page_buffer +
|
(PAGE_SIZE / sizeof(u64)));
|
}
|
}
|
}
|
|
p = pfn_to_page(PFN_DOWN(pgd));
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id],
|
#else
|
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
|
#endif
|
p, true);
|
|
atomic_sub(1, &kbdev->memdev.used_pages);
|
|
/* If MMU tables belong to a context then pages will have been accounted
|
* against it, so we must decrement the usage counts here.
|
*/
|
if (mmut->kctx) {
|
kbase_process_page_usage_dec(mmut->kctx, 1);
|
atomic_sub(1, &mmut->kctx->used_pages);
|
}
|
|
kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
|
}
|
|
int kbase_mmu_init(struct kbase_device *const kbdev,
|
struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
|
int const group_id)
|
{
|
if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
|
WARN_ON(group_id < 0))
|
return -EINVAL;
|
|
mmut->group_id = group_id;
|
mutex_init(&mmut->mmu_lock);
|
mmut->kctx = kctx;
|
|
/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
|
mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
|
|
if (mmut->mmu_teardown_pages == NULL)
|
return -ENOMEM;
|
|
mmut->pgd = 0;
|
/* We allocate pages into the kbdev memory pool, then
|
* kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
|
* avoid allocations from the kernel happening with the lock held.
|
*/
|
while (!mmut->pgd) {
|
int err;
|
|
err = kbase_mem_pool_grow(
|
#ifdef CONFIG_MALI_2MB_ALLOC
|
&kbdev->mem_pools.large[mmut->group_id],
|
#else
|
&kbdev->mem_pools.small[mmut->group_id],
|
#endif
|
MIDGARD_MMU_BOTTOMLEVEL);
|
if (err) {
|
kbase_mmu_term(kbdev, mmut);
|
return -ENOMEM;
|
}
|
|
mutex_lock(&mmut->mmu_lock);
|
mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
|
mutex_unlock(&mmut->mmu_lock);
|
}
|
|
return 0;
|
}
|
|
void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
|
{
|
if (mmut->pgd) {
|
mutex_lock(&mmut->mmu_lock);
|
mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL,
|
mmut->mmu_teardown_pages);
|
mutex_unlock(&mmut->mmu_lock);
|
|
if (mmut->kctx)
|
KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
|
}
|
|
kfree(mmut->mmu_teardown_pages);
|
mutex_destroy(&mmut->mmu_lock);
|
}
|
|
void kbase_mmu_as_term(struct kbase_device *kbdev, int i)
|
{
|
destroy_workqueue(kbdev->as[i].pf_wq);
|
}
|
|
static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
|
int level, char ** const buffer, size_t *size_left)
|
{
|
phys_addr_t target_pgd;
|
u64 *pgd_page;
|
int i;
|
size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
|
size_t dump_size;
|
struct kbase_device *kbdev;
|
struct kbase_mmu_mode const *mmu_mode;
|
|
if (WARN_ON(kctx == NULL))
|
return 0;
|
lockdep_assert_held(&kctx->mmu.mmu_lock);
|
|
kbdev = kctx->kbdev;
|
mmu_mode = kbdev->mmu_mode;
|
|
pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
|
if (!pgd_page) {
|
dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
|
return 0;
|
}
|
|
if (*size_left >= size) {
|
/* A modified physical address that contains
|
* the page table level
|
*/
|
u64 m_pgd = pgd | level;
|
|
/* Put the modified physical address in the output buffer */
|
memcpy(*buffer, &m_pgd, sizeof(m_pgd));
|
*buffer += sizeof(m_pgd);
|
|
/* Followed by the page table itself */
|
memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
|
*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
|
|
*size_left -= size;
|
}
|
|
if (level < MIDGARD_MMU_BOTTOMLEVEL) {
|
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
|
if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
|
target_pgd = mmu_mode->pte_to_phy_addr(
|
pgd_page[i]);
|
|
dump_size = kbasep_mmu_dump_level(kctx,
|
target_pgd, level + 1,
|
buffer, size_left);
|
if (!dump_size) {
|
kunmap(pfn_to_page(PFN_DOWN(pgd)));
|
return 0;
|
}
|
size += dump_size;
|
}
|
}
|
}
|
|
kunmap(pfn_to_page(PFN_DOWN(pgd)));
|
|
return size;
|
}
|
|
void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
|
{
|
void *kaddr;
|
size_t size_left;
|
|
KBASE_DEBUG_ASSERT(kctx);
|
|
if (nr_pages == 0) {
|
/* can't dump in a 0 sized buffer, early out */
|
return NULL;
|
}
|
|
size_left = nr_pages * PAGE_SIZE;
|
|
if (WARN_ON(size_left == 0))
|
return NULL;
|
kaddr = vmalloc_user(size_left);
|
|
mutex_lock(&kctx->mmu.mmu_lock);
|
|
if (kaddr) {
|
u64 end_marker = 0xFFULL;
|
char *buffer;
|
char *mmu_dump_buffer;
|
u64 config[3];
|
size_t dump_size, size = 0;
|
struct kbase_mmu_setup as_setup;
|
|
buffer = (char *)kaddr;
|
mmu_dump_buffer = buffer;
|
|
kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
|
&as_setup);
|
config[0] = as_setup.transtab;
|
config[1] = as_setup.memattr;
|
config[2] = as_setup.transcfg;
|
memcpy(buffer, &config, sizeof(config));
|
mmu_dump_buffer += sizeof(config);
|
size_left -= sizeof(config);
|
size += sizeof(config);
|
|
dump_size = kbasep_mmu_dump_level(kctx,
|
kctx->mmu.pgd,
|
MIDGARD_MMU_TOPLEVEL,
|
&mmu_dump_buffer,
|
&size_left);
|
|
if (!dump_size)
|
goto fail_free;
|
|
size += dump_size;
|
|
/* Add on the size for the end marker */
|
size += sizeof(u64);
|
|
if (size > (nr_pages * PAGE_SIZE)) {
|
/* The buffer isn't big enough - free the memory and
|
* return failure
|
*/
|
goto fail_free;
|
}
|
|
/* Add the end marker */
|
memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
|
}
|
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
return kaddr;
|
|
fail_free:
|
vfree(kaddr);
|
mutex_unlock(&kctx->mmu.mmu_lock);
|
return NULL;
|
}
|
KBASE_EXPORT_TEST_API(kbase_mmu_dump);
|
|
void kbase_mmu_bus_fault_worker(struct work_struct *data)
|
{
|
struct kbase_as *faulting_as;
|
int as_no;
|
struct kbase_context *kctx;
|
struct kbase_device *kbdev;
|
struct kbase_fault *fault;
|
|
faulting_as = container_of(data, struct kbase_as, work_busfault);
|
fault = &faulting_as->bf_data;
|
|
/* Ensure that any pending page fault worker has completed */
|
flush_work(&faulting_as->work_pagefault);
|
|
as_no = faulting_as->number;
|
|
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
|
|
/* Grab the context, already refcounted in kbase_mmu_interrupt() on
|
* flagging of the bus-fault. Therefore, it cannot be scheduled out of
|
* this AS until we explicitly release it
|
*/
|
kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no);
|
if (!kctx) {
|
atomic_dec(&kbdev->faults_pending);
|
return;
|
}
|
|
#ifdef CONFIG_MALI_ARBITER_SUPPORT
|
/* check if we still have GPU */
|
if (unlikely(kbase_is_gpu_removed(kbdev))) {
|
dev_dbg(kbdev->dev,
|
"%s: GPU has been removed\n", __func__);
|
release_ctx(kbdev, kctx);
|
atomic_dec(&kbdev->faults_pending);
|
return;
|
}
|
#endif
|
|
if (unlikely(fault->protected_mode)) {
|
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
|
"Permission failure", fault);
|
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
|
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
|
release_ctx(kbdev, kctx);
|
atomic_dec(&kbdev->faults_pending);
|
return;
|
|
}
|
|
/* NOTE: If GPU already powered off for suspend,
|
* we don't need to switch to unmapped
|
*/
|
if (!kbase_pm_context_active_handle_suspend(kbdev,
|
KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
|
kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
|
kbase_pm_context_idle(kbdev);
|
}
|
|
release_ctx(kbdev, kctx);
|
|
atomic_dec(&kbdev->faults_pending);
|
}
|
|
void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
|
{
|
int i;
|
|
for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
|
struct kbase_as *as = &kbdev->as[i];
|
|
flush_workqueue(as->pf_wq);
|
}
|
}
|