From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c | 1077 +++++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 709 insertions(+), 368 deletions(-)
diff --git a/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
index a1565c1..f1251a4 100644
--- a/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
+++ b/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,14 +31,13 @@
#include <linux/fs.h>
#include <linux/version.h>
#include <linux/dma-mapping.h>
-#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
-#include <linux/dma-attrs.h>
-#endif /* LINUX_VERSION_CODE < 4.8.0 */
#include <linux/dma-buf.h>
#include <linux/shrinker.h>
#include <linux/cache.h>
#include <linux/memory_group_manager.h>
-
+#include <linux/math64.h>
+#include <linux/migrate.h>
+#include <linux/version.h>
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
@@ -86,23 +85,34 @@
#define IR_THRESHOLD_STEPS (256u)
#if MALI_USE_CSF
-static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
- struct vm_area_struct *vma);
-static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
- struct vm_area_struct *vma);
+static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma);
+static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma);
#endif
-static int kbase_vmap_phy_pages(struct kbase_context *kctx,
- struct kbase_va_region *reg, u64 offset_bytes, size_t size,
- struct kbase_vmap_struct *map);
+static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map,
+ kbase_vmap_flag vmap_flags);
static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
struct kbase_vmap_struct *map);
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
- struct kbase_va_region *reg,
- u64 new_pages, u64 old_pages);
+static bool is_process_exiting(struct vm_area_struct *vma)
+{
+ /* PF_EXITING flag can't be reliably used here for the detection
+ * of process exit, as 'mm_users' counter could still be non-zero
+ * when all threads of the process have exited. Later when the
+ * thread (which took a reference on the 'mm' of process that
+ * exited) drops it reference, the vm_ops->close method would be
+ * called for all the vmas (owned by 'mm' of process that exited)
+ * but the PF_EXITING flag may not be neccessarily set for the
+ * thread at that time.
+ */
+ if (atomic_read(&vma->vm_mm->mm_users))
+ return false;
+
+ return true;
+}
/* Retrieve the associated region pointer if the GPU address corresponds to
* one of the event memory pages. The enclosing region, if found, shouldn't
@@ -184,20 +194,12 @@
reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
return -EINVAL;
- if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
- atomic_read(&kctx->permanent_mapped_pages))) {
- dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages",
- (u64)size,
- KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
- atomic_read(&kctx->permanent_mapped_pages));
- return -ENOMEM;
- }
-
kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL);
if (!kern_mapping)
return -ENOMEM;
- err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping);
+ err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping,
+ KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
if (err < 0)
goto vmap_fail;
@@ -205,7 +207,6 @@
reg->flags &= ~KBASE_REG_GROWABLE;
reg->cpu_alloc->permanent_map = kern_mapping;
- atomic_add(size, &kctx->permanent_mapped_pages);
return 0;
vmap_fail:
@@ -221,13 +222,6 @@
kfree(alloc->permanent_map);
alloc->permanent_map = NULL;
-
- /* Mappings are only done on cpu_alloc, so don't need to worry about
- * this being reduced a second time if a separate gpu_alloc is
- * freed
- */
- WARN_ON(alloc->nents > atomic_read(&kctx->permanent_mapped_pages));
- atomic_sub(alloc->nents, &kctx->permanent_mapped_pages);
}
void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx,
@@ -291,9 +285,9 @@
*/
}
-struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
- u64 va_pages, u64 commit_pages,
- u64 extension, u64 *flags, u64 *gpu_va)
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
+ u64 extension, u64 *flags, u64 *gpu_va,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
int zone;
struct kbase_va_region *reg;
@@ -310,19 +304,21 @@
va_pages, commit_pages, extension, *flags);
#if MALI_USE_CSF
- *gpu_va = 0; /* return 0 on failure */
+ if (!(*flags & BASE_MEM_FIXED))
+ *gpu_va = 0; /* return 0 on failure */
#else
if (!(*flags & BASE_MEM_FLAG_MAP_FIXED))
*gpu_va = 0; /* return 0 on failure */
+#endif
else
- dev_err(dev,
+ dev_dbg(dev,
"Keeping requested GPU VA of 0x%llx\n",
(unsigned long long)*gpu_va);
-#endif
if (!kbase_check_alloc_flags(*flags)) {
dev_warn(dev,
- "kbase_mem_alloc called with bad flags (%llx)",
+ "%s called with bad flags (%llx)",
+ __func__,
(unsigned long long)*flags);
goto bad_flags;
}
@@ -344,7 +340,8 @@
}
if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
!kbase_device_is_cpu_coherent(kctx->kbdev)) {
- dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+ dev_warn(dev, "%s call required coherent mem when unavailable",
+ __func__);
goto bad_flags;
}
if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
@@ -367,7 +364,20 @@
if (*flags & BASE_MEM_SAME_VA) {
rbtree = &kctx->reg_rbtree_same;
zone = KBASE_REG_ZONE_SAME_VA;
- } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
+ }
+#if MALI_USE_CSF
+ /* fixed va_zone always exists */
+ else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) {
+ if (*flags & BASE_MEM_PROT_GPU_EX) {
+ rbtree = &kctx->reg_rbtree_exec_fixed;
+ zone = KBASE_REG_ZONE_EXEC_FIXED_VA;
+ } else {
+ rbtree = &kctx->reg_rbtree_fixed;
+ zone = KBASE_REG_ZONE_FIXED_VA;
+ }
+ }
+#endif
+ else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
rbtree = &kctx->reg_rbtree_exec;
zone = KBASE_REG_ZONE_EXEC_VA;
} else {
@@ -375,8 +385,7 @@
zone = KBASE_REG_ZONE_CUSTOM_VA;
}
- reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va),
- va_pages, zone);
+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone);
if (!reg) {
dev_err(dev, "Failed to allocate free region");
@@ -387,7 +396,7 @@
goto invalid_flags;
if (kbase_reg_prepare_native(reg, kctx,
- base_mem_group_id_get(*flags)) != 0) {
+ kbase_mem_group_id_get(*flags)) != 0) {
dev_err(dev, "Failed to prepare region");
goto prepare_failed;
}
@@ -469,7 +478,26 @@
*gpu_va = (u64) cookie;
} else /* we control the VA */ {
- if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) {
+ size_t align = 1;
+
+ if (kctx->kbdev->pagesize_2mb) {
+ /* If there's enough (> 33 bits) of GPU VA space, align to 2MB
+ * boundaries. The similar condition is used for mapping from
+ * the SAME_VA zone inside kbase_context_get_unmapped_area().
+ */
+ if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+ if (va_pages >= (SZ_2M / SZ_4K))
+ align = (SZ_2M / SZ_4K);
+ }
+ if (*gpu_va)
+ align = 1;
+#if !MALI_USE_CSF
+ if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
+ align = 1;
+#endif /* !MALI_USE_CSF */
+ }
+ if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align,
+ mmu_sync_info) != 0) {
dev_warn(dev, "Failed to map memory on GPU");
kbase_gpu_vm_unlock(kctx);
goto no_mmap;
@@ -490,6 +518,14 @@
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
kbase_gpu_vm_unlock(kctx);
+
+#if MALI_USE_CSF
+ if (*flags & BASE_MEM_FIXABLE)
+ atomic64_inc(&kctx->num_fixable_allocs);
+ else if (*flags & BASE_MEM_FIXED)
+ atomic64_inc(&kctx->num_fixed_allocs);
+#endif
+
return reg;
no_mmap:
@@ -600,11 +636,18 @@
#if MALI_USE_CSF
if (KBASE_REG_CSF_EVENT & reg->flags)
*out |= BASE_MEM_CSF_EVENT;
+ if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) ||
+ ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+ if (KBASE_REG_FIXED_ADDRESS & reg->flags)
+ *out |= BASE_MEM_FIXED;
+ else
+ *out |= BASE_MEM_FIXABLE;
+ }
#endif
if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
- *out |= base_mem_group_id_set(reg->cpu_alloc->group_id);
+ *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id);
WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
@@ -629,24 +672,36 @@
* @s: Shrinker
* @sc: Shrinker control
*
- * Return: Number of pages which can be freed.
+ * Return: Number of pages which can be freed or SHRINK_EMPTY if no page remains.
*/
static
unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
struct shrink_control *sc)
{
- struct kbase_context *kctx;
-
- kctx = container_of(s, struct kbase_context, reclaim);
+ struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim);
+ int evict_nents = atomic_read(&kctx->evict_nents);
+ unsigned long nr_freeable_items;
WARN((sc->gfp_mask & __GFP_ATOMIC),
"Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n",
sc->gfp_mask);
WARN(in_atomic(),
- "Shrinker called whilst in atomic context. The caller must switch to using GFP_ATOMIC or similar. gfp_mask==%x\n",
+ "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n",
sc->gfp_mask);
- return atomic_read(&kctx->evict_nents);
+ if (unlikely(evict_nents < 0)) {
+ dev_err(kctx->kbdev->dev, "invalid evict_nents(%d)", evict_nents);
+ nr_freeable_items = 0;
+ } else {
+ nr_freeable_items = evict_nents;
+ }
+
+#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE
+ if (nr_freeable_items == 0)
+ nr_freeable_items = SHRINK_EMPTY;
+#endif
+
+ return nr_freeable_items;
}
/**
@@ -655,8 +710,8 @@
* @s: Shrinker
* @sc: Shrinker control
*
- * Return: Number of pages freed (can be less then requested) or -1 if the
- * shrinker failed to free pages in its pool.
+ * Return: Number of pages freed (can be less then requested) or
+ * SHRINK_STOP if reclaim isn't possible.
*
* Note:
* This function accesses region structures without taking the region lock,
@@ -684,17 +739,15 @@
list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
int err;
+ if (!alloc->reg)
+ continue;
+
err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
0, alloc->nents);
- if (err != 0) {
- /*
- * Failed to remove GPU mapping, tell the shrinker
- * to stop trying to shrink our slab even though we
- * have pages in it.
- */
- freed = -1;
- goto out_unlock;
- }
+
+ /* Failed to remove GPU mapping, proceed to next one. */
+ if (err != 0)
+ continue;
/*
* Update alloc->evicted before freeing the backing so the
@@ -718,7 +771,7 @@
if (freed > sc->nr_to_scan)
break;
}
-out_unlock:
+
mutex_unlock(&kctx->jit_evict_lock);
return freed;
@@ -738,7 +791,11 @@
* struct shrinker does not define batch
*/
kctx->reclaim.batch = 0;
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
register_shrinker(&kctx->reclaim);
+#else
+ register_shrinker(&kctx->reclaim, "mali-mem");
+#endif
return 0;
}
@@ -802,6 +859,9 @@
lockdep_assert_held(&kctx->reg_lock);
+ /* Memory is in the process of transitioning to the shrinker, and
+ * should ignore migration attempts
+ */
kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
0, gpu_alloc->nents);
@@ -809,12 +869,17 @@
/* This allocation can't already be on a list. */
WARN_ON(!list_empty(&gpu_alloc->evict_node));
- /*
- * Add the allocation to the eviction list, after this point the shrink
+ /* Add the allocation to the eviction list, after this point the shrink
* can reclaim it.
*/
list_add(&gpu_alloc->evict_node, &kctx->evict_list);
atomic_add(gpu_alloc->nents, &kctx->evict_nents);
+
+ /* Indicate to page migration that the memory can be reclaimed by the shrinker.
+ */
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE);
+
mutex_unlock(&kctx->jit_evict_lock);
kbase_mem_evictable_mark_reclaim(gpu_alloc);
@@ -826,6 +891,11 @@
{
struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
int err = 0;
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
lockdep_assert_held(&kctx->reg_lock);
@@ -856,11 +926,20 @@
* pre-eviction size.
*/
if (!err)
- err = kbase_mem_grow_gpu_mapping(kctx,
- gpu_alloc->reg,
- gpu_alloc->evicted, 0);
+ err = kbase_mem_grow_gpu_mapping(
+ kctx, gpu_alloc->reg,
+ gpu_alloc->evicted, 0, mmu_sync_info);
gpu_alloc->evicted = 0;
+
+ /* Since the allocation is no longer evictable, and we ensure that
+ * it grows back to its pre-eviction size, we will consider the
+ * state of it to be ALLOCATED_MAPPED, as that is the only state
+ * in which a physical allocation could transition to NOT_MOVABLE
+ * from.
+ */
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED);
}
}
@@ -911,6 +990,15 @@
/* Validate the region */
reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
if (kbase_is_region_invalid_or_free(reg))
+ goto out_unlock;
+
+ /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations
+ * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable.
+ * This would usually include JIT allocations, Tiler heap related allocations
+ * & GPU queue ringbuffer and none of them needs to be explicitly marked
+ * as evictable by Userspace.
+ */
+ if (kbase_va_region_is_no_user_free(reg))
goto out_unlock;
/* Is the region being transitioning between not needed and needed? */
@@ -1022,7 +1110,7 @@
struct kbase_va_region *reg, enum kbase_sync_type sync_fn)
{
int ret = -EINVAL;
- struct dma_buf *dma_buf;
+ struct dma_buf __maybe_unused *dma_buf;
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
lockdep_assert_held(&kctx->reg_lock);
@@ -1066,19 +1154,7 @@
ret = 0;
}
#else
- /* Though the below version check could be superfluous depending upon the version condition
- * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow
- * ease of modification for non-ION systems or systems where ION has been patched.
- */
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
- dma_buf_end_cpu_access(dma_buf,
- 0, dma_buf->size,
- dir);
- ret = 0;
-#else
- ret = dma_buf_end_cpu_access(dma_buf,
- dir);
-#endif
+ ret = dma_buf_end_cpu_access(dma_buf, dir);
#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
break;
case KBASE_SYNC_TO_CPU:
@@ -1095,11 +1171,7 @@
ret = 0;
}
#else
- ret = dma_buf_begin_cpu_access(dma_buf,
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
- 0, dma_buf->size,
-#endif
- dir);
+ ret = dma_buf_begin_cpu_access(dma_buf, dir);
#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
break;
}
@@ -1218,6 +1290,11 @@
struct kbase_mem_phy_alloc *alloc;
unsigned long gwt_mask = ~0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
lockdep_assert_held(&kctx->reg_lock);
alloc = reg->gpu_alloc;
@@ -1244,14 +1321,11 @@
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask,
- kctx->as_nr,
- alloc->group_id);
+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+ mmu_sync_info, NULL);
if (err)
goto bad_insert;
@@ -1264,13 +1338,11 @@
* Assume alloc->nents is the number of actual pages in the
* dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(kctx,
- reg->start_pfn + alloc->nents,
- kctx->aliasing_sink_page,
- reg->nr_pages - alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) &
- ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK);
+ err = kbase_mmu_insert_single_imported_page(
+ kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page,
+ reg->nr_pages - alloc->nents,
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+ mmu_sync_info);
if (err)
goto bad_pad_insert;
}
@@ -1278,11 +1350,8 @@
return 0;
bad_pad_insert:
- kbase_mmu_teardown_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- alloc->nents,
- kctx->as_nr);
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+ alloc->nents, alloc->nents, kctx->as_nr, true);
bad_insert:
kbase_mem_umm_unmap_attachment(kctx, alloc);
bad_map_attachment:
@@ -1310,11 +1379,9 @@
if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) {
int err;
- err = kbase_mmu_teardown_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- reg->nr_pages,
- kctx->as_nr);
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, reg->nr_pages, reg->nr_pages,
+ kctx->as_nr, true);
WARN_ON(err);
}
@@ -1386,6 +1453,9 @@
return NULL;
}
+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+ return NULL;
+
/* ignore SAME_VA */
*flags &= ~BASE_MEM_SAME_VA;
@@ -1406,23 +1476,21 @@
if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
need_sync = true;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/*
* 64-bit tasks require us to reserve VA on the CPU that we use
* on the GPU.
*/
shared_zone = true;
}
-#endif
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
- 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages,
+ KBASE_REG_ZONE_SAME_VA);
} else {
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
- 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg) {
@@ -1507,7 +1575,7 @@
struct kbase_context *kctx, unsigned long address,
unsigned long size, u64 *va_pages, u64 *flags)
{
- long i;
+ long i, dma_mapped_pages;
struct kbase_va_region *reg;
struct rb_root *rbtree;
long faulted_pages;
@@ -1516,6 +1584,8 @@
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
struct kbase_alloc_import_user_buf *user_buf;
struct page **pages = NULL;
+ struct tagged_addr *pa;
+ struct device *dev;
int write;
/* Flag supported only for dma-buf imported memory */
@@ -1553,21 +1623,22 @@
/* 64-bit address range is the max */
goto bad_size;
+ if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+ goto bad_size;
+
/* SAME_VA generally not supported with imported memory (no known use cases) */
*flags &= ~BASE_MEM_SAME_VA;
if (*flags & BASE_MEM_IMPORT_SHARED)
shared_zone = true;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/*
* 64-bit tasks require us to reserve VA on the CPU that we use
* on the GPU.
*/
shared_zone = true;
}
-#endif
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
@@ -1576,7 +1647,7 @@
} else
rbtree = &kctx->reg_rbtree_custom;
- reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
+ reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone);
if (!reg)
goto no_region;
@@ -1602,11 +1673,7 @@
user_buf->address = address;
user_buf->nr_pages = *va_pages;
user_buf->mm = current->mm;
-#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
- atomic_inc(¤t->mm->mm_count);
-#else
- mmgrab(current->mm);
-#endif
+ kbase_mem_mmgrab();
if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
user_buf->pages = vmalloc(*va_pages * sizeof(struct page *));
else
@@ -1632,20 +1699,21 @@
write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
- faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
-#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
-KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
- write ? FOLL_WRITE : 0, pages, NULL);
-#else
- write, 0, pages, NULL);
-#endif
-#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
- faulted_pages = get_user_pages(address, *va_pages,
- write, 0, pages, NULL);
-#else
+#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(address, *va_pages,
write ? FOLL_WRITE : 0, pages, NULL);
+#else
+ /* pin_user_pages function cannot be called with pages param NULL.
+ * get_user_pages function will be used instead because it is safe to be
+ * used with NULL pages param as long as it doesn't have FOLL_GET flag.
+ */
+ if (pages != NULL) {
+ faulted_pages =
+ pin_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL);
+ } else {
+ faulted_pages =
+ get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL);
+ }
#endif
up_read(kbase_mem_get_process_mmap_lock());
@@ -1656,31 +1724,44 @@
reg->gpu_alloc->nents = 0;
reg->extension = 0;
- if (pages) {
- struct device *dev = kctx->kbdev->dev;
- unsigned long local_size = user_buf->size;
- unsigned long offset = user_buf->address & ~PAGE_MASK;
- struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+ pa = kbase_get_gpu_phy_pages(reg);
+ dev = kctx->kbdev->dev;
+ if (pages) {
/* Top bit signifies that this was pinned on import */
user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < faulted_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
-
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1689,15 +1770,32 @@
return reg;
unwind_dma_map:
- while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- user_buf->dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_mapped_pages = i;
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This precautionary measure is kept here to keep this code
+ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+ * in the future.
+ */
+ for (i = 0; i < dma_mapped_pages; i++) {
+ dma_addr_t dma_addr = user_buf->dma_addrs[i];
+
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
}
fault_mismatch:
if (pages) {
+ /* In this case, the region was not yet in the region tracker,
+ * and so there are no CPU mappings to remove before we unpin
+ * the page
+ */
for (i = 0; i < faulted_pages; i++)
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
}
no_page_array:
invalid_flags:
@@ -1708,7 +1806,6 @@
no_region:
bad_size:
return NULL;
-
}
@@ -1720,6 +1817,12 @@
u64 gpu_va;
size_t i;
bool coherent;
+ uint64_t max_stride;
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(flags);
@@ -1733,7 +1836,8 @@
if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
dev_warn(kctx->kbdev->dev,
- "kbase_mem_alias called with bad flags (%llx)",
+ "%s called with bad flags (%llx)",
+ __func__,
(unsigned long long)*flags);
goto bad_flags;
}
@@ -1746,6 +1850,11 @@
if (!nents)
goto bad_nents;
+ max_stride = div64_u64(U64_MAX, nents);
+
+ if (stride > max_stride)
+ goto bad_size;
+
if ((nents * stride) > (U64_MAX / PAGE_SIZE))
/* 64-bit address range is the max */
goto bad_size;
@@ -1753,22 +1862,19 @@
/* calculate the number of pages this alias will cover */
*num_pages = nents * stride;
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages))
+ goto bad_size;
+
+ if (!kbase_ctx_compat_mode(kctx)) {
/* 64-bit tasks must MMAP anyway, but not expose this address to
* clients
*/
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
- *num_pages,
- KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages,
+ KBASE_REG_ZONE_SAME_VA);
} else {
-#else
- if (1) {
-#endif
- reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
- 0, *num_pages,
- KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg)
@@ -1817,9 +1923,9 @@
/* validate found region */
if (kbase_is_region_invalid_or_free(aliasing_reg))
goto bad_handle; /* Not found/already free */
- if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+ if (kbase_is_region_shrinkable(aliasing_reg))
goto bad_handle; /* Ephemeral region */
- if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE)
+ if (kbase_va_region_is_no_user_free(aliasing_reg))
goto bad_handle; /* JIT regions can't be
* aliased. NO_USER_FREE flag
* covers the entire lifetime
@@ -1874,8 +1980,7 @@
}
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+ if (!kbase_ctx_compat_mode(kctx)) {
/* Bind to a cookie */
if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) {
dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
@@ -1890,11 +1995,10 @@
/* relocate to correct base */
gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
gpu_va <<= PAGE_SHIFT;
- } else /* we control the VA */ {
-#else
- if (1) {
-#endif
- if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+ } else {
+ /* we control the VA */
+ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1,
+ mmu_sync_info) != 0) {
dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
goto no_mmap;
}
@@ -1909,9 +2013,7 @@
return gpu_va;
-#if IS_ENABLED(CONFIG_64BIT)
no_cookie:
-#endif
no_mmap:
bad_handle:
/* Marking the source allocs as not being mapped on the GPU and putting
@@ -1939,6 +2041,11 @@
{
struct kbase_va_region *reg;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(gpu_va);
KBASE_DEBUG_ASSERT(va_pages);
@@ -1950,7 +2057,8 @@
if (!kbase_check_import_flags(*flags)) {
dev_warn(kctx->kbdev->dev,
- "kbase_mem_import called with bad flags (%llx)",
+ "%s called with bad flags (%llx)",
+ __func__,
(unsigned long long)*flags);
goto bad_flags;
}
@@ -1963,7 +2071,8 @@
if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
!kbase_device_is_cpu_coherent(kctx->kbdev)) {
dev_warn(kctx->kbdev->dev,
- "kbase_mem_import call required coherent mem when unavailable");
+ "%s call required coherent mem when unavailable",
+ __func__);
goto bad_flags;
}
if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
@@ -1971,7 +2080,10 @@
/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
*flags &= ~BASE_MEM_COHERENT_SYSTEM;
}
-
+ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+ goto bad_flags;
+ }
if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
dev_warn(kctx->kbdev->dev,
"padding is only supported for UMM");
@@ -2038,7 +2150,8 @@
} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) {
/* we control the VA, mmap now to the GPU */
- if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+ if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) !=
+ 0)
goto no_gpu_va;
/* return real GPU VA */
*gpu_va = reg->start_pfn << PAGE_SHIFT;
@@ -2072,8 +2185,9 @@
}
int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
- struct kbase_va_region *reg,
- u64 new_pages, u64 old_pages)
+ struct kbase_va_region *reg, u64 new_pages,
+ u64 old_pages,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
struct tagged_addr *phy_pages;
u64 delta = new_pages - old_pages;
@@ -2083,9 +2197,9 @@
/* Map the new pages into the GPU */
phy_pages = kbase_get_gpu_phy_pages(reg);
- ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn + old_pages, phy_pages + old_pages, delta,
- reg->flags, kctx->as_nr, reg->gpu_alloc->group_id);
+ ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
+ phy_pages + old_pages, delta, reg->flags, kctx->as_nr,
+ reg->gpu_alloc->group_id, mmu_sync_info, reg, false);
return ret;
}
@@ -2105,28 +2219,16 @@
(old_pages - new_pages)<<PAGE_SHIFT, 1);
}
-/**
- * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
- * @kctx: Context the region belongs to
- * @reg: The GPU region or NULL if there isn't one
- * @new_pages: The number of pages after the shrink
- * @old_pages: The number of pages before the shrink
- *
- * Return: 0 on success, negative -errno on error
- *
- * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
- * itself is unmodified as we still need to reserve the VA, only the page tables
- * will be modified by this function.
- */
-static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
- struct kbase_va_region *const reg,
- u64 const new_pages, u64 const old_pages)
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
+ struct kbase_va_region *const reg, u64 const new_pages,
+ u64 const old_pages)
{
u64 delta = old_pages - new_pages;
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
int ret = 0;
- ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn + new_pages, delta, kctx->as_nr);
+ ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
+ alloc->pages + new_pages, delta, delta, kctx->as_nr, false);
return ret;
}
@@ -2138,6 +2240,11 @@
int res = -EINVAL;
struct kbase_va_region *reg;
bool read_locked = false;
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
KBASE_DEBUG_ASSERT(kctx);
KBASE_DEBUG_ASSERT(gpu_addr != 0);
@@ -2185,8 +2292,11 @@
if (atomic_read(®->cpu_alloc->kernel_mappings) > 0)
goto out_unlock;
- /* can't grow regions which are ephemeral */
- if (reg->flags & KBASE_REG_DONT_NEED)
+
+ if (kbase_is_region_shrinkable(reg))
+ goto out_unlock;
+
+ if (kbase_va_region_is_no_user_free(reg))
goto out_unlock;
#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -2230,8 +2340,8 @@
/* No update required for CPU mappings, that's done on fault. */
/* Update GPU mapping. */
- res = kbase_mem_grow_gpu_mapping(kctx, reg,
- new_pages, old_pages);
+ res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages,
+ old_pages, mmu_sync_info);
/* On error free the new pages */
if (res) {
@@ -2259,7 +2369,7 @@
}
int kbase_mem_shrink(struct kbase_context *const kctx,
- struct kbase_va_region *const reg, u64 const new_pages)
+ struct kbase_va_region *const reg, u64 new_pages)
{
u64 delta, old_pages;
int err;
@@ -2289,6 +2399,19 @@
kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
if (reg->cpu_alloc != reg->gpu_alloc)
kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+
+ if (kctx->kbdev->pagesize_2mb) {
+ if (kbase_reg_current_backed_size(reg) > new_pages) {
+ old_pages = new_pages;
+ new_pages = kbase_reg_current_backed_size(reg);
+
+ /* Update GPU mapping. */
+ err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages,
+ CALLER_MMU_ASYNC);
+ }
+ } else {
+ WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
+ }
}
return err;
@@ -2327,7 +2450,7 @@
/* Avoid freeing memory on the process death which results in
* GPU Page Fault. Memory will be freed in kbase_destroy_context
*/
- if (!(current->flags & PF_EXITING))
+ if (!is_process_exiting(vma))
kbase_mem_free_region(map->kctx, map->region);
}
@@ -2559,7 +2682,6 @@
while (kbase_jit_evict(kctx))
;
}
-#endif
static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
struct vm_area_struct *vma,
@@ -2572,13 +2694,13 @@
size_t size;
int err = 0;
- dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+ lockdep_assert_held(&kctx->reg_lock);
+
+ dev_dbg(kctx->kbdev->dev, "%s\n", __func__);
size = (vma->vm_end - vma->vm_start);
nr_pages = size >> PAGE_SHIFT;
-#ifdef CONFIG_MALI_VECTOR_DUMP
kbase_free_unused_jit_allocations(kctx);
-#endif
kaddr = kbase_mmu_dump(kctx, nr_pages);
@@ -2587,8 +2709,8 @@
goto out;
}
- new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
- KBASE_REG_ZONE_SAME_VA);
+ new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages,
+ KBASE_REG_ZONE_SAME_VA);
if (!new_reg) {
err = -ENOMEM;
WARN_ON(1);
@@ -2617,7 +2739,7 @@
*kmap_addr = kaddr;
*reg = new_reg;
- dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
return 0;
out_no_alloc:
@@ -2626,7 +2748,7 @@
out:
return err;
}
-
+#endif
void kbase_os_mem_map_lock(struct kbase_context *kctx)
{
@@ -2646,13 +2768,18 @@
size_t *nr_pages, size_t *aligned_offset)
{
- int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+ unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
struct kbase_va_region *reg;
int err = 0;
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
*aligned_offset = 0;
- dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+ dev_dbg(kctx->kbdev->dev, "%s\n", __func__);
/* SAME_VA stuff, fetch the right region */
reg = kctx->pending_regions[cookie];
@@ -2682,9 +2809,8 @@
/* adjust down nr_pages to what we have physically */
*nr_pages = kbase_reg_current_backed_size(reg);
-
if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
- reg->nr_pages, 1) != 0) {
+ reg->nr_pages, 1, mmu_sync_info) != 0) {
dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
/* Unable to map in GPU space. */
WARN_ON(1);
@@ -2709,7 +2835,7 @@
vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
out:
*regm = reg;
- dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
return err;
}
@@ -2750,17 +2876,10 @@
goto out_unlock;
}
- /* if not the MTP, verify that the MTP has been mapped */
- rcu_read_lock();
- /* catches both when the special page isn't present or
- * when we've forked
- */
- if (rcu_dereference(kctx->process_mm) != current->mm) {
+ if (!kbase_mem_allow_alloc(kctx)) {
err = -EINVAL;
- rcu_read_unlock();
goto out_unlock;
}
- rcu_read_unlock();
switch (vma->vm_pgoff) {
case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
@@ -2769,6 +2888,7 @@
err = -EINVAL;
goto out_unlock;
case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+#if defined(CONFIG_MALI_VECTOR_DUMP)
/* MMU dump */
err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr);
if (err != 0)
@@ -2776,6 +2896,11 @@
/* free the region on munmap */
free_on_close = 1;
break;
+#else
+ /* Illegal handle for direct map */
+ err = -EINVAL;
+ goto out_unlock;
+#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */
#if MALI_USE_CSF
case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE):
kbase_gpu_vm_unlock(kctx);
@@ -2846,8 +2971,7 @@
dev_warn(dev, "mmap aliased: invalid params!\n");
goto out_unlock;
}
- }
- else if (reg->cpu_alloc->nents <
+ } else if (reg->cpu_alloc->nents <
(vma->vm_pgoff - reg->start_pfn + nr_pages)) {
/* limit what we map to the amount currently backed */
if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
@@ -2864,7 +2988,7 @@
err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset,
free_on_close);
-
+#if defined(CONFIG_MALI_VECTOR_DUMP)
if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
/* MMU dump - userspace should now have a reference on
* the pages, so we can now free the kernel mapping
@@ -2883,7 +3007,7 @@
*/
vma->vm_pgoff = PFN_DOWN(vma->vm_start);
}
-
+#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */
out_unlock:
kbase_gpu_vm_unlock(kctx);
out:
@@ -2925,9 +3049,102 @@
}
}
-static int kbase_vmap_phy_pages(struct kbase_context *kctx,
- struct kbase_va_region *reg, u64 offset_bytes, size_t size,
- struct kbase_vmap_struct *map)
+/**
+ * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for
+ * array of physical pages
+ *
+ * @pages: Array of pages.
+ * @page_count: Number of pages.
+ * @flags: Region flags.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is incremented by 1. Errors are handled by making pages
+ * not movable. Permanent kernel mappings will be marked as not movable, too.
+ */
+static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages,
+ size_t page_count, unsigned long flags)
+{
+ size_t i;
+
+ for (i = 0; i < page_count; i++) {
+ struct page *p = as_page(pages[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(pages[i]) || is_partial(pages[i]))
+ continue;
+
+ spin_lock(&page_md->migrate_lock);
+ /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely
+ * to stay mapped for a long time. However, keep on counting the number
+ * of mappings even for them: they don't represent an exception for the
+ * vmap_count.
+ *
+ * At the same time, errors need to be handled if a client tries to add
+ * too many mappings, hence a page may end up in the NOT_MOVABLE state
+ * anyway even if it's not a permanent kernel mapping.
+ */
+ if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING)
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ if (page_md->vmap_count < U8_MAX)
+ page_md->vmap_count++;
+ else
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+}
+
+/**
+ * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for
+ * array of physical pages
+ *
+ * @pages: Array of pages.
+ * @page_count: Number of pages.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is decremented by 1. Errors are handled by making pages
+ * not movable.
+ */
+static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages,
+ size_t page_count)
+{
+ size_t i;
+
+ for (i = 0; i < page_count; i++) {
+ struct page *p = as_page(pages[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(pages[i]) || is_partial(pages[i]))
+ continue;
+
+ spin_lock(&page_md->migrate_lock);
+ /* Decrement the number of mappings for all kinds of pages, including
+ * pages which are NOT_MOVABLE (e.g. permanent kernel mappings).
+ * However, errors still need to be handled if a client tries to remove
+ * more mappings than created.
+ */
+ if (page_md->vmap_count == 0)
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ else
+ page_md->vmap_count--;
+ spin_unlock(&page_md->migrate_lock);
+ }
+}
+
+static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 offset_bytes, size_t size, struct kbase_vmap_struct *map,
+ kbase_vmap_flag vmap_flags)
{
unsigned long page_index;
unsigned int offset_in_page = offset_bytes & ~PAGE_MASK;
@@ -2937,6 +3154,12 @@
void *cpu_addr = NULL;
pgprot_t prot;
size_t i;
+
+ if (WARN_ON(vmap_flags & ~KBASE_VMAP_INPUT_FLAGS))
+ return -EINVAL;
+
+ if (WARN_ON(kbase_is_region_invalid_or_free(reg)))
+ return -EINVAL;
if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc)
return -EINVAL;
@@ -2953,6 +3176,17 @@
if (page_index + page_count > kbase_reg_current_backed_size(reg))
return -ENOMEM;
+
+ if ((vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) &&
+ (page_count > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
+ atomic_read(&kctx->permanent_mapped_pages)))) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages",
+ (u64)page_count, KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
+ atomic_read(&kctx->permanent_mapped_pages));
+ return -ENOMEM;
+ }
if (reg->flags & KBASE_REG_DONT_NEED)
return -EINVAL;
@@ -2980,6 +3214,13 @@
*/
cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+ /* If page migration is enabled, increment the number of VMA mappings
+ * of all physical pages. In case of errors, e.g. too many mappings,
+ * make the page not movable to prevent trouble.
+ */
+ if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type))
+ kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags);
+
kfree(pages);
if (!cpu_addr)
@@ -2992,14 +3233,55 @@
map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page);
map->size = size;
- map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
- !kbase_mem_is_imported(map->gpu_alloc->type);
+ map->flags = vmap_flags;
+ if ((reg->flags & KBASE_REG_CPU_CACHED) && !kbase_mem_is_imported(map->gpu_alloc->type))
+ map->flags |= KBASE_VMAP_FLAG_SYNC_NEEDED;
- if (map->sync_needed)
+ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
+ if (vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING)
+ atomic_add(page_count, &kctx->permanent_mapped_pages);
+
kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc);
+
return 0;
+}
+
+void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr,
+ size_t size, unsigned long prot_request, struct kbase_vmap_struct *map,
+ kbase_vmap_flag vmap_flags)
+{
+ u64 offset_bytes;
+ struct kbase_mem_phy_alloc *cpu_alloc;
+ struct kbase_mem_phy_alloc *gpu_alloc;
+ int err;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ if (WARN_ON(kbase_is_region_invalid_or_free(reg)))
+ return NULL;
+
+ /* check access permissions can be satisfied
+ * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
+ */
+ if ((reg->flags & prot_request) != prot_request)
+ return NULL;
+
+ offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+ cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+ gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+ err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map, vmap_flags);
+ if (err < 0)
+ goto fail_vmap_phy_pages;
+
+ return map->addr;
+
+fail_vmap_phy_pages:
+ kbase_mem_phy_alloc_put(cpu_alloc);
+ kbase_mem_phy_alloc_put(gpu_alloc);
+ return NULL;
}
void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
@@ -3007,44 +3289,21 @@
{
struct kbase_va_region *reg;
void *addr = NULL;
- u64 offset_bytes;
- struct kbase_mem_phy_alloc *cpu_alloc;
- struct kbase_mem_phy_alloc *gpu_alloc;
- int err;
kbase_gpu_vm_lock(kctx);
- reg = kbase_region_tracker_find_region_enclosing_address(kctx,
- gpu_addr);
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
if (kbase_is_region_invalid_or_free(reg))
goto out_unlock;
- /* check access permissions can be satisfied
- * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
- */
- if ((reg->flags & prot_request) != prot_request)
+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
goto out_unlock;
- offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
- cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
- gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
-
- err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map);
- if (err < 0)
- goto fail_vmap_phy_pages;
-
- addr = map->addr;
+ addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u);
out_unlock:
kbase_gpu_vm_unlock(kctx);
return addr;
-
-fail_vmap_phy_pages:
- kbase_gpu_vm_unlock(kctx);
- kbase_mem_phy_alloc_put(cpu_alloc);
- kbase_mem_phy_alloc_put(gpu_alloc);
-
- return NULL;
}
void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
@@ -3064,18 +3323,37 @@
struct kbase_vmap_struct *map)
{
void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+
vunmap(addr);
- if (map->sync_needed)
+ /* If page migration is enabled, decrement the number of VMA mappings
+ * for all physical pages. Now is a good time to do it because references
+ * haven't been released yet.
+ */
+ if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) {
+ const size_t page_count = PFN_UP(map->offset_in_page + map->size);
+ struct tagged_addr *pages_array = map->cpu_pages;
+
+ kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count);
+ }
+
+ if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+ if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) {
+ size_t page_count = PFN_UP(map->offset_in_page + map->size);
+
+ WARN_ON(page_count > atomic_read(&kctx->permanent_mapped_pages));
+ atomic_sub(page_count, &kctx->permanent_mapped_pages);
+ }
kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc);
+
map->offset_in_page = 0;
map->cpu_pages = NULL;
map->gpu_pages = NULL;
map->addr = NULL;
map->size = 0;
- map->sync_needed = false;
+ map->flags = 0;
}
void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
@@ -3102,79 +3380,29 @@
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = kctx->process_mm;
- rcu_read_lock();
- mm = rcu_dereference(kctx->process_mm);
- if (mm) {
- atomic_add(pages, &kctx->nonmapped_pages);
-#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
- spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
- spin_unlock(&mm->page_table_lock);
-#endif
- }
- rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
- int pages;
- struct mm_struct *mm;
-
- spin_lock(&kctx->mm_update_lock);
- mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
- if (!mm) {
- spin_unlock(&kctx->mm_update_lock);
+ if (unlikely(!mm))
return;
- }
- rcu_assign_pointer(kctx->process_mm, NULL);
- spin_unlock(&kctx->mm_update_lock);
- synchronize_rcu();
-
- pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+ atomic_add(pages, &kctx->nonmapped_pages);
#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
#else
spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
spin_unlock(&mm->page_table_lock);
#endif
}
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
- struct kbase_context *kctx;
-
- kctx = vma->vm_private_data;
- kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
- .close = kbase_special_vm_close,
-};
-
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
{
- /* check that this is the only tracking page */
- spin_lock(&kctx->mm_update_lock);
- if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
- spin_unlock(&kctx->mm_update_lock);
- return -EFAULT;
- }
-
- rcu_assign_pointer(kctx->process_mm, current->mm);
-
- spin_unlock(&kctx->mm_update_lock);
+ if (vma_pages(vma) != 1)
+ return -EINVAL;
/* no real access */
vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
- vma->vm_ops = &kbase_vm_special_ops;
- vma->vm_private_data = kctx;
return 0;
}
@@ -3189,15 +3417,37 @@
* assigned one, otherwise a dummy page. Always return the
* dummy page in no mali builds.
*/
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+ return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page));
+#else
if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page));
+#endif
return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET +
(u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE));
}
+static int
+#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE || \
+ KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)
+kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma)
+#else
+kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma, unsigned long flags)
+#endif
+{
+ pr_debug("Unexpected call to mremap method for User IO pages mapping vma\n");
+ return -EINVAL;
+}
+
+static int kbase_csf_user_io_pages_vm_split(struct vm_area_struct *vma, unsigned long addr)
+{
+ pr_debug("Unexpected call to split method for User IO pages mapping vma\n");
+ return -EINVAL;
+}
+
static void kbase_csf_user_io_pages_vm_open(struct vm_area_struct *vma)
{
- WARN(1, "Unexpected attempt to clone private vma\n");
+ pr_debug("Unexpected call to the open method for User IO pages mapping vma\n");
vma->vm_private_data = NULL;
}
@@ -3209,8 +3459,10 @@
int err;
bool reset_prevented = false;
- if (WARN_ON(!queue))
+ if (!queue) {
+ pr_debug("Close method called for the new User IO pages mapping vma\n");
return;
+ }
kctx = queue->kctx;
kbdev = kctx->kbdev;
@@ -3225,7 +3477,7 @@
reset_prevented = true;
mutex_lock(&kctx->csf.lock);
- kbase_csf_queue_unbind(queue);
+ kbase_csf_queue_unbind(queue, is_process_exiting(vma));
mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
@@ -3254,24 +3506,21 @@
struct memory_group_manager_device *mgm_dev;
/* Few sanity checks up front */
- if ((nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) ||
- (vma->vm_pgoff != queue->db_file_offset))
+ if (!queue || (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) ||
+ (vma->vm_pgoff != queue->db_file_offset)) {
+ pr_warn("Unexpected CPU page fault on User IO pages mapping for process %s tgid %d pid %d\n",
+ current->comm, current->tgid, current->pid);
return VM_FAULT_SIGBUS;
+ }
- mutex_lock(&queue->kctx->csf.lock);
kbdev = queue->kctx->kbdev;
mgm_dev = kbdev->mgm_dev;
+
+ mutex_lock(&kbdev->csf.reg_lock);
/* Always map the doorbell page as uncached */
doorbell_pgprot = pgprot_device(vma->vm_page_prot);
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
- vma->vm_page_prot = doorbell_pgprot;
- input_page_pgprot = doorbell_pgprot;
- output_page_pgprot = doorbell_pgprot;
-#else
if (kbdev->system_coherency == COHERENCY_NONE) {
input_page_pgprot = pgprot_writecombine(vma->vm_page_prot);
output_page_pgprot = pgprot_writecombine(vma->vm_page_prot);
@@ -3279,7 +3528,6 @@
input_page_pgprot = vma->vm_page_prot;
output_page_pgprot = vma->vm_page_prot;
}
-#endif
doorbell_cpu_addr = vma->vm_start;
@@ -3288,12 +3536,10 @@
#else
if (vmf->address == doorbell_cpu_addr) {
#endif
- mutex_lock(&kbdev->csf.reg_lock);
doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue);
ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr,
doorbell_page_pfn, doorbell_pgprot);
- mutex_unlock(&kbdev->csf.reg_lock);
} else {
/* Map the Input page */
input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE;
@@ -3313,13 +3559,19 @@
}
exit:
- mutex_unlock(&queue->kctx->csf.lock);
+ mutex_unlock(&kbdev->csf.reg_lock);
return ret;
}
static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = {
.open = kbase_csf_user_io_pages_vm_open,
.close = kbase_csf_user_io_pages_vm_close,
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+ .may_split = kbase_csf_user_io_pages_vm_split,
+#else
+ .split = kbase_csf_user_io_pages_vm_split,
+#endif
+ .mremap = kbase_csf_user_io_pages_vm_mremap,
.fault = kbase_csf_user_io_pages_vm_fault
};
@@ -3399,13 +3651,75 @@
return err;
}
+/**
+ * kbase_csf_user_reg_vm_open - VMA open function for the USER page
+ *
+ * @vma: Pointer to the struct containing information about
+ * the userspace mapping of USER page.
+ * Note:
+ * This function isn't expected to be called. If called (i.e> mremap),
+ * set private_data as NULL to indicate to close() and fault() functions.
+ */
+static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma)
+{
+ pr_debug("Unexpected call to the open method for USER register mapping");
+ vma->vm_private_data = NULL;
+}
+
+/**
+ * kbase_csf_user_reg_vm_close - VMA close function for the USER page
+ *
+ * @vma: Pointer to the struct containing information about
+ * the userspace mapping of USER page.
+ */
static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
{
struct kbase_context *kctx = vma->vm_private_data;
+ struct kbase_device *kbdev;
- WARN_ON(!kctx->csf.user_reg_vma);
+ if (unlikely(!kctx)) {
+ pr_debug("Close function called for the unexpected mapping");
+ return;
+ }
- kctx->csf.user_reg_vma = NULL;
+ kbdev = kctx->kbdev;
+
+ if (unlikely(!kctx->csf.user_reg.vma))
+ dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
+ kctx->tgid, kctx->id);
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ list_del_init(&kctx->csf.user_reg.link);
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ kctx->csf.user_reg.vma = NULL;
+
+ /* Now as the VMA is closed, drop the reference on mali device file */
+ fput(kctx->filp);
+}
+
+/**
+ * kbase_csf_user_reg_vm_mremap - VMA mremap function for the USER page
+ *
+ * @vma: Pointer to the struct containing information about
+ * the userspace mapping of USER page.
+ *
+ * Return: -EINVAL
+ *
+ * Note:
+ * User space must not attempt mremap on USER page mapping.
+ * This function will return an error to fail the attempt.
+ */
+static int
+#if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \
+ (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE))
+kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma)
+#else
+kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags)
+#endif
+{
+ pr_debug("Unexpected call to mremap method for USER page mapping vma\n");
+ return -EINVAL;
}
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
@@ -3418,39 +3732,52 @@
struct vm_area_struct *vma = vmf->vma;
#endif
struct kbase_context *kctx = vma->vm_private_data;
- struct kbase_device *kbdev = kctx->kbdev;
- struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
- unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
+ struct kbase_device *kbdev;
+ struct memory_group_manager_device *mgm_dev;
+ unsigned long pfn;
size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start);
vm_fault_t ret = VM_FAULT_SIGBUS;
+ unsigned long flags;
/* Few sanity checks up front */
- if (WARN_ON(nr_pages != 1) ||
- WARN_ON(vma != kctx->csf.user_reg_vma) ||
- WARN_ON(vma->vm_pgoff !=
- PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE)))
+
+ if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) ||
+ (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) {
+ pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
+ current->comm, current->tgid, current->pid);
return VM_FAULT_SIGBUS;
+ }
- mutex_lock(&kbdev->pm.lock);
+ kbdev = kctx->kbdev;
+ mgm_dev = kbdev->mgm_dev;
+ pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
- /* Don't map in the actual register page if GPU is powered down.
- * Always map in the dummy page in no mali builds.
+ mutex_lock(&kbdev->csf.reg_lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ /* Dummy page will be mapped during GPU off.
+ *
+ * In no mail builds, always map in the dummy page.
*/
- if (!kbdev->pm.backend.gpu_powered)
- pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
+ if (IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) || !kbdev->pm.backend.gpu_powered)
+ pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page));
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list);
ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
KBASE_MEM_GROUP_CSF_FW, vma,
vma->vm_start, pfn,
vma->vm_page_prot);
- mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&kbdev->csf.reg_lock);
return ret;
}
static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
+ .open = kbase_csf_user_reg_vm_open,
.close = kbase_csf_user_reg_vm_close,
+ .mremap = kbase_csf_user_reg_vm_mremap,
.fault = kbase_csf_user_reg_vm_fault
};
@@ -3458,9 +3785,10 @@
struct vm_area_struct *vma)
{
size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start);
+ struct kbase_device *kbdev = kctx->kbdev;
/* Few sanity checks */
- if (kctx->csf.user_reg_vma)
+ if (kctx->csf.user_reg.vma)
return -EBUSY;
if (nr_pages != 1)
@@ -3479,8 +3807,21 @@
*/
vma->vm_flags |= VM_PFNMAP;
- kctx->csf.user_reg_vma = vma;
+ kctx->csf.user_reg.vma = vma;
+ mutex_lock(&kbdev->csf.reg_lock);
+ kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++;
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ /* Make VMA point to the special internal file, but don't drop the
+ * reference on mali device file (that would be done later when the
+ * VMA is closed).
+ */
+ vma->vm_file = kctx->kbdev->csf.user_reg.filp;
+ get_file(vma->vm_file);
+
+ /* Also adjust the vm_pgoff */
+ vma->vm_pgoff = kctx->csf.user_reg.file_offset;
vma->vm_ops = &kbase_csf_user_reg_vm_ops;
vma->vm_private_data = kctx;
--
Gitblit v1.6.2