From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/arm/bifrost/mali_kbase_mem.c | 1452 +++++++++++++++++++++++++++++++++++++++++----------------
1 files changed, 1,034 insertions(+), 418 deletions(-)
diff --git a/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem.c
index 2084171..1c94e9c 100644
--- a/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem.c
+++ b/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
*/
/**
- * Base kernel memory APIs
+ * DOC: Base kernel memory APIs
*/
#include <linux/dma-buf.h>
#include <linux/kernel.h>
@@ -44,6 +44,11 @@
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_trace_gpu_mem.h>
+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+
/*
* Alignment of objects allocated by the GPU inside a just-in-time memory
* region whose size is given by an end address
@@ -66,6 +71,7 @@
*/
#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u)
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/* Forward declarations */
static void free_partial_locked(struct kbase_context *kctx,
@@ -89,10 +95,8 @@
#error "Unknown CPU VA width for this architecture"
#endif
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (kbase_ctx_compat_mode(kctx))
cpu_va_bits = 32;
-#endif
return cpu_va_bits;
}
@@ -104,29 +108,37 @@
u64 gpu_pfn)
{
struct rb_root *rbtree = NULL;
- struct kbase_reg_zone *exec_va_zone =
- kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
- /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA
- * zone if this has been initialized.
- */
+ struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+
+#if MALI_USE_CSF
+ struct kbase_reg_zone *fixed_va_zone =
+ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA);
+
+ struct kbase_reg_zone *exec_fixed_va_zone =
+ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA);
+
+ if (gpu_pfn >= fixed_va_zone->base_pfn) {
+ rbtree = &kctx->reg_rbtree_fixed;
+ return rbtree;
+ } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) {
+ rbtree = &kctx->reg_rbtree_exec_fixed;
+ return rbtree;
+ }
+#endif
if (gpu_pfn >= exec_va_zone->base_pfn)
rbtree = &kctx->reg_rbtree_exec;
else {
u64 same_va_end;
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif /* CONFIG_64BIT */
+ if (kbase_ctx_compat_mode(kctx)) {
same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
-#if IS_ENABLED(CONFIG_64BIT)
} else {
struct kbase_reg_zone *same_va_zone =
kbase_ctx_reg_zone_get(kctx,
KBASE_REG_ZONE_SAME_VA);
same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
}
-#endif /* CONFIG_64BIT */
if (gpu_pfn >= same_va_end)
rbtree = &kctx->reg_rbtree_custom;
@@ -350,7 +362,9 @@
}
/**
- * Remove a region object from the global list.
+ * kbase_remove_va_region - Remove a region object from the global list.
+ *
+ * @kbdev: The kbase device
* @reg: Region object to remove
*
* The region reg is removed, possibly by merging with other free and
@@ -358,19 +372,23 @@
* region lock held. The associated memory is not released (see
* kbase_free_alloced_region). Internal use only.
*/
-int kbase_remove_va_region(struct kbase_va_region *reg)
+void kbase_remove_va_region(struct kbase_device *kbdev,
+ struct kbase_va_region *reg)
{
struct rb_node *rbprev;
struct kbase_va_region *prev = NULL;
struct rb_node *rbnext;
struct kbase_va_region *next = NULL;
struct rb_root *reg_rbtree = NULL;
+ struct kbase_va_region *orig_reg = reg;
int merged_front = 0;
int merged_back = 0;
- int err = 0;
reg_rbtree = reg->rbtree;
+
+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
+ return;
/* Try to merge with the previous block first */
rbprev = rb_prev(&(reg->rblink));
@@ -378,10 +396,14 @@
prev = rb_entry(rbprev, struct kbase_va_region, rblink);
if (prev->flags & KBASE_REG_FREE) {
/* We're compatible with the previous VMA, merge with
- * it
+ * it, handling any gaps for robustness.
*/
+ u64 prev_end_pfn = prev->start_pfn + prev->nr_pages;
+
WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
(reg->flags & KBASE_REG_ZONE_MASK));
+ if (!WARN_ON(reg->start_pfn < prev_end_pfn))
+ prev->nr_pages += reg->start_pfn - prev_end_pfn;
prev->nr_pages += reg->nr_pages;
rb_erase(&(reg->rblink), reg_rbtree);
reg = prev;
@@ -393,42 +415,76 @@
/* Note we do the lookup here as the tree may have been rebalanced. */
rbnext = rb_next(&(reg->rblink));
if (rbnext) {
- /* We're compatible with the next VMA, merge with it */
next = rb_entry(rbnext, struct kbase_va_region, rblink);
if (next->flags & KBASE_REG_FREE) {
+ /* We're compatible with the next VMA, merge with it,
+ * handling any gaps for robustness.
+ */
+ u64 reg_end_pfn = reg->start_pfn + reg->nr_pages;
+
WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
(reg->flags & KBASE_REG_ZONE_MASK));
+ if (!WARN_ON(next->start_pfn < reg_end_pfn))
+ next->nr_pages += next->start_pfn - reg_end_pfn;
next->start_pfn = reg->start_pfn;
next->nr_pages += reg->nr_pages;
rb_erase(&(reg->rblink), reg_rbtree);
merged_back = 1;
- if (merged_front) {
- /* We already merged with prev, free it */
- kfree(reg);
- }
}
}
- /* If we failed to merge then we need to add a new block */
- if (!(merged_front || merged_back)) {
+ if (merged_front && merged_back) {
+ /* We already merged with prev, free it */
+ kfree(reg);
+ } else if (!(merged_front || merged_back)) {
+ /* If we failed to merge then we need to add a new block */
+
/*
- * We didn't merge anything. Add a new free
- * placeholder and remove the original one.
+ * We didn't merge anything. Try to add a new free
+ * placeholder, and in any case, remove the original one.
*/
struct kbase_va_region *free_reg;
- free_reg = kbase_alloc_free_region(reg_rbtree,
- reg->start_pfn, reg->nr_pages,
- reg->flags & KBASE_REG_ZONE_MASK);
+ free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
+ reg->flags & KBASE_REG_ZONE_MASK);
if (!free_reg) {
- err = -ENOMEM;
+ /* In case of failure, we cannot allocate a replacement
+ * free region, so we will be left with a 'gap' in the
+ * region tracker's address range (though, the rbtree
+ * will itself still be correct after erasing
+ * 'reg').
+ *
+ * The gap will be rectified when an adjacent region is
+ * removed by one of the above merging paths. Other
+ * paths will gracefully fail to allocate if they try
+ * to allocate in the gap.
+ *
+ * There is nothing that the caller can do, since free
+ * paths must not fail. The existing 'reg' cannot be
+ * repurposed as the free region as callers must have
+ * freedom of use with it by virtue of it being owned
+ * by them, not the region tracker insert/remove code.
+ */
+ dev_warn(
+ kbdev->dev,
+ "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx",
+ (unsigned long long)reg->start_pfn << PAGE_SHIFT,
+ (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT);
+ rb_erase(&(reg->rblink), reg_rbtree);
+
goto out;
}
rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
}
- out:
- return err;
+ /* This operation is always safe because the function never frees
+ * the region. If the region has been merged to both front and back,
+ * then it's the previous region that is supposed to be freed.
+ */
+ orig_reg->start_pfn = 0;
+
+out:
+ return;
}
KBASE_EXPORT_TEST_API(kbase_remove_va_region);
@@ -437,13 +493,18 @@
* kbase_insert_va_region_nolock - Insert a VA region to the list,
* replacing the existing one.
*
+ * @kbdev: The kbase device
* @new_reg: The new region to insert
* @at_reg: The region to replace
* @start_pfn: The Page Frame Number to insert at
* @nr_pages: The number of pages of the region
+ *
+ * Return: 0 on success, error code otherwise.
*/
-static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
- struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
+ struct kbase_va_region *new_reg,
+ struct kbase_va_region *at_reg, u64 start_pfn,
+ size_t nr_pages)
{
struct rb_root *reg_rbtree = NULL;
int err = 0;
@@ -456,6 +517,9 @@
KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
/* at least nr_pages from start_pfn should be contained within at_reg */
KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+ /* having at_reg means the rb_tree should not be empty */
+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
+ return -ENOMEM;
new_reg->start_pfn = start_pfn;
new_reg->nr_pages = nr_pages;
@@ -484,10 +548,9 @@
else {
struct kbase_va_region *new_front_reg;
- new_front_reg = kbase_alloc_free_region(reg_rbtree,
- at_reg->start_pfn,
- start_pfn - at_reg->start_pfn,
- at_reg->flags & KBASE_REG_ZONE_MASK);
+ new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
+ start_pfn - at_reg->start_pfn,
+ at_reg->flags & KBASE_REG_ZONE_MASK);
if (new_front_reg) {
at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -511,6 +574,8 @@
* @addr: the address to insert the region at
* @nr_pages: the number of pages in the region
* @align: the minimum alignment in pages
+ *
+ * Return: 0 on success, error code otherwise.
*/
int kbase_add_va_region(struct kbase_context *kctx,
struct kbase_va_region *reg, u64 addr,
@@ -527,12 +592,19 @@
lockdep_assert_held(&kctx->reg_lock);
- /* The executable allocation from the SAME_VA zone would already have an
+ /* The executable allocation from the SAME_VA zone should already have an
* appropriately aligned GPU VA chosen for it.
- * Also the executable allocation from EXEC_VA zone doesn't need the
- * special alignment.
+ * Also, executable allocations from EXEC_VA don't need the special
+ * alignment.
*/
+#if MALI_USE_CSF
+ /* The same is also true for the EXEC_FIXED_VA zone.
+ */
+#endif
if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
+#if MALI_USE_CSF
+ ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) &&
+#endif
((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
if (cpu_va_bits > gpu_pc_bits) {
align = max(align, (size_t)((1ULL << gpu_pc_bits)
@@ -564,15 +636,17 @@
/**
* kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
*
- * Insert a region into the rbtree that was specified when the region was
- * created. If addr is 0 a free area in the rbtree is used, otherwise the
- * specified address is used.
- *
* @kbdev: The kbase device
* @reg: The region to add
* @addr: The address to add the region at, or 0 to map at any available address
* @nr_pages: The size of the region in pages
* @align: The minimum alignment in pages
+ *
+ * Insert a region into the rbtree that was specified when the region was
+ * created. If addr is 0 a free area in the rbtree is used, otherwise the
+ * specified address is used.
+ *
+ * Return: 0 on success, error code otherwise.
*/
int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
struct kbase_va_region *reg,
@@ -613,8 +687,7 @@
goto exit;
}
- err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
- nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
if (err) {
dev_warn(dev, "Failed to insert va region");
err = -ENOMEM;
@@ -639,8 +712,7 @@
nr_pages, align_offset, align_mask,
&start_pfn);
if (tmp) {
- err = kbase_insert_va_region_nolock(reg, tmp,
- start_pfn, nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
if (unlikely(err)) {
dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
start_pfn, nr_pages);
@@ -659,6 +731,59 @@
/*
* @brief Initialize the internal region tracker data structure.
*/
+#if MALI_USE_CSF
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+ struct kbase_va_region *same_va_reg,
+ struct kbase_va_region *custom_va_reg,
+ struct kbase_va_region *exec_va_reg,
+ struct kbase_va_region *exec_fixed_va_reg,
+ struct kbase_va_region *fixed_va_reg)
+{
+ u64 last_zone_end_pfn;
+
+ kctx->reg_rbtree_same = RB_ROOT;
+ kbase_region_tracker_insert(same_va_reg);
+
+ last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages;
+
+ /* Although custom_va_reg doesn't always exist, initialize
+ * unconditionally because of the mem_view debugfs
+ * implementation which relies on it being empty.
+ */
+ kctx->reg_rbtree_custom = RB_ROOT;
+ kctx->reg_rbtree_exec = RB_ROOT;
+
+ if (custom_va_reg) {
+ WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn);
+ kbase_region_tracker_insert(custom_va_reg);
+ last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages;
+ }
+
+ /* Initialize exec, fixed and exec_fixed. These are always
+ * initialized at this stage, if they will exist at all.
+ */
+ kctx->reg_rbtree_fixed = RB_ROOT;
+ kctx->reg_rbtree_exec_fixed = RB_ROOT;
+
+ if (exec_va_reg) {
+ WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn);
+ kbase_region_tracker_insert(exec_va_reg);
+ last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages;
+ }
+
+ if (exec_fixed_va_reg) {
+ WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn);
+ kbase_region_tracker_insert(exec_fixed_va_reg);
+ last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages;
+ }
+
+ if (fixed_va_reg) {
+ WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn);
+ kbase_region_tracker_insert(fixed_va_reg);
+ last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages;
+ }
+}
+#else
static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
struct kbase_va_region *same_va_reg,
struct kbase_va_region *custom_va_reg)
@@ -679,6 +804,41 @@
if (custom_va_reg)
kbase_region_tracker_insert(custom_va_reg);
}
+#endif /* MALI_USE_CSF */
+
+static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
+{
+ struct kbase_context *kctx = NULL;
+ struct rb_root *rbtree = reg->rbtree;
+
+ switch (reg->flags & KBASE_REG_ZONE_MASK) {
+ case KBASE_REG_ZONE_CUSTOM_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
+ break;
+ case KBASE_REG_ZONE_SAME_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
+ break;
+ case KBASE_REG_ZONE_EXEC_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
+ break;
+#if MALI_USE_CSF
+ case KBASE_REG_ZONE_EXEC_FIXED_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
+ break;
+ case KBASE_REG_ZONE_FIXED_VA:
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
+ break;
+ case KBASE_REG_ZONE_MCU_SHARED:
+ /* This is only expected to be called on driver unload. */
+ break;
+#endif
+ default:
+ WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+ break;
+ }
+
+ return kctx;
+}
static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
{
@@ -690,7 +850,9 @@
if (rbnode) {
rb_erase(rbnode, rbtree);
reg = rb_entry(rbnode, struct kbase_va_region, rblink);
- WARN_ON(reg->va_refcnt != 1);
+ WARN_ON(kbase_refcount_read(®->va_refcnt) != 1);
+ if (kbase_page_migration_enabled)
+ kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
/* Reset the start_pfn - as the rbtree is being
* destroyed and we've already erased this region, there
* is no further need to attempt to remove it.
@@ -707,12 +869,19 @@
void kbase_region_tracker_term(struct kbase_context *kctx)
{
+ WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
+ kctx->tgid, kctx->id);
+
kbase_gpu_vm_lock(kctx);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
#if MALI_USE_CSF
WARN_ON(!list_empty(&kctx->csf.event_pages_head));
+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed);
+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed);
+
#endif
kbase_gpu_vm_unlock(kctx);
}
@@ -724,8 +893,8 @@
static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
{
- return min(kbase_get_num_cpu_va_bits(kctx),
- (size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+ return min_t(size_t, kbase_get_num_cpu_va_bits(kctx),
+ kctx->kbdev->gpu_props.mmu.va_bits);
}
int kbase_region_tracker_init(struct kbase_context *kctx)
@@ -734,19 +903,41 @@
struct kbase_va_region *custom_va_reg = NULL;
size_t same_va_bits = kbase_get_same_va_bits(kctx);
u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
- u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+ u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
+ u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
u64 same_va_pages;
u64 same_va_base = 1u;
int err;
+#if MALI_USE_CSF
+ struct kbase_va_region *exec_va_reg;
+ struct kbase_va_region *exec_fixed_va_reg;
+ struct kbase_va_region *fixed_va_reg;
+
+ u64 exec_va_base;
+ u64 fixed_va_end;
+ u64 exec_fixed_va_base;
+ u64 fixed_va_base;
+ u64 fixed_va_pages;
+#endif
/* Take the lock as kbase_free_alloced_region requires it */
kbase_gpu_vm_lock(kctx);
same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
+
+#if MALI_USE_CSF
+ if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
+ /* Depending on how the kernel is configured, it's possible (eg on aarch64) for
+ * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone
+ * doesn't cross into the exec_va zone.
+ */
+ same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base;
+ }
+#endif
+
/* all have SAME_VA */
- same_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
- same_va_pages, KBASE_REG_ZONE_SAME_VA);
+ same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
+ same_va_pages, KBASE_REG_ZONE_SAME_VA);
if (!same_va_reg) {
err = -ENOMEM;
@@ -755,10 +946,7 @@
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
same_va_pages);
-#if IS_ENABLED(CONFIG_64BIT)
- /* 32-bit clients have custom VA zones */
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+ if (kbase_ctx_compat_mode(kctx)) {
if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
err = -EINVAL;
goto fail_free_same_va;
@@ -770,10 +958,9 @@
if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
- custom_va_reg = kbase_alloc_free_region(
- &kctx->reg_rbtree_custom,
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
- custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
+ KBASE_REG_ZONE_CUSTOM_VA_BASE,
+ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
if (!custom_va_reg) {
err = -ENOMEM;
@@ -782,11 +969,70 @@
kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
KBASE_REG_ZONE_CUSTOM_VA_BASE,
custom_va_size);
-#if IS_ENABLED(CONFIG_64BIT)
} else {
custom_va_size = 0;
}
-#endif
+
+#if MALI_USE_CSF
+ /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
+ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+
+ /* Similarly the end of the FIXED_VA zone also depends on whether the client
+ * is 32 or 64-bits.
+ */
+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+
+ if (kbase_ctx_compat_mode(kctx)) {
+ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
+ }
+
+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
+ KBASE_REG_ZONE_EXEC_VA_SIZE);
+
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
+ KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
+
+ if (!exec_va_reg) {
+ err = -ENOMEM;
+ goto fail_free_custom_va;
+ }
+
+ exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE;
+
+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base,
+ KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+
+ exec_fixed_va_reg =
+ kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
+ exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
+ KBASE_REG_ZONE_EXEC_FIXED_VA);
+
+ if (!exec_fixed_va_reg) {
+ err = -ENOMEM;
+ goto fail_free_exec_va;
+ }
+
+ fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
+ fixed_va_pages = fixed_va_end - fixed_va_base;
+
+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
+
+ fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
+ fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
+
+ kctx->gpu_va_end = fixed_va_end;
+
+ if (!fixed_va_reg) {
+ err = -ENOMEM;
+ goto fail_free_exec_fixed_va;
+ }
+
+ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg,
+ exec_fixed_va_reg, fixed_va_reg);
+
+ INIT_LIST_HEAD(&kctx->csf.event_pages_head);
+#else
/* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
* initially U64_MAX
*/
@@ -794,16 +1040,22 @@
/* Other zones are 0: kbase_create_context() uses vzalloc */
kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
-
kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
- kctx->jit_va = false;
-
-#if MALI_USE_CSF
- INIT_LIST_HEAD(&kctx->csf.event_pages_head);
#endif
+ kctx->jit_va = false;
kbase_gpu_vm_unlock(kctx);
return 0;
+
+#if MALI_USE_CSF
+fail_free_exec_fixed_va:
+ kbase_free_alloced_region(exec_fixed_va_reg);
+fail_free_exec_va:
+ kbase_free_alloced_region(exec_va_reg);
+fail_free_custom_va:
+ if (custom_va_reg)
+ kbase_free_alloced_region(custom_va_reg);
+#endif
fail_free_same_va:
kbase_free_alloced_region(same_va_reg);
@@ -834,7 +1086,9 @@
}
/**
- * Determine if any allocations have been made on a context's region tracker
+ * kbase_region_tracker_has_allocs - Determine if any allocations have been made
+ * on a context's region tracker
+ *
* @kctx: KBase context
*
* Check the context to determine if any allocations have been made yet from
@@ -862,6 +1116,8 @@
unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
unsigned long reg_zone;
+ if (!kbase_is_ctx_reg_zone(zone_bits))
+ continue;
zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
zone_base_addr = zone->base_pfn << PAGE_SHIFT;
@@ -901,7 +1157,6 @@
return false;
}
-#if IS_ENABLED(CONFIG_64BIT)
static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
u64 jit_va_pages)
{
@@ -950,9 +1205,8 @@
* Create a custom VA zone at the end of the VA for allocations which
* JIT can use so it doesn't have to allocate VA from the kernel.
*/
- custom_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
- jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
+ jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
/*
* The context will be destroyed if we fail here so no point
@@ -969,7 +1223,6 @@
kbase_region_tracker_insert(custom_va_reg);
return 0;
}
-#endif
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
int max_allocations, int trim_level, int group_id,
@@ -1010,10 +1263,8 @@
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (!kbase_ctx_compat_mode(kctx))
err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
-#endif
/*
* Nothing to do for 32-bit clients, JIT uses the existing
* custom VA zone.
@@ -1039,6 +1290,7 @@
int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
{
+#if !MALI_USE_CSF
struct kbase_va_region *exec_va_reg;
struct kbase_reg_zone *exec_va_zone;
struct kbase_reg_zone *target_zone;
@@ -1047,6 +1299,7 @@
unsigned long target_zone_bits;
u64 exec_va_start;
int err;
+#endif
/* The EXEC_VA zone shall be created by making space either:
* - for 64-bit clients, at the end of the process's address space
@@ -1060,6 +1313,12 @@
if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
return -EINVAL;
+#if MALI_USE_CSF
+ /* For CSF GPUs we now setup the EXEC_VA zone during initialization,
+ * so this request is a null-op.
+ */
+ return 0;
+#else
kbase_gpu_vm_lock(kctx);
/* Verify that we've not already created a EXEC_VA zone, and that the
@@ -1081,17 +1340,14 @@
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+ if (kbase_ctx_compat_mode(kctx)) {
/* 32-bit client: take from CUSTOM_VA zone */
target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
-#if IS_ENABLED(CONFIG_64BIT)
} else {
/* 64-bit client: take from SAME_VA zone */
target_zone_bits = KBASE_REG_ZONE_SAME_VA;
}
-#endif
+
target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
@@ -1119,10 +1375,8 @@
/* Taken from the end of the target zone */
exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
- exec_va_start,
- exec_va_pages,
- KBASE_REG_ZONE_EXEC_VA);
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
+ exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
if (!exec_va_reg) {
err = -ENOMEM;
goto exit_unlock;
@@ -1145,6 +1399,7 @@
exit_unlock:
kbase_gpu_vm_unlock(kctx);
return err;
+#endif /* MALI_USE_CSF */
}
#if MALI_USE_CSF
@@ -1164,10 +1419,9 @@
kbdev->csf.shared_reg_rbtree = RB_ROOT;
- shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree,
- shared_reg_start_pfn,
- shared_reg_size,
- KBASE_REG_ZONE_MCU_SHARED);
+ shared_reg =
+ kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
+ shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
if (!shared_reg)
return -ENOMEM;
@@ -1176,10 +1430,30 @@
}
#endif
+static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
+{
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
+ kbdev->pagesize_2mb = true;
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
+ dev_warn(
+ kbdev->dev,
+ "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
+ }
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+ kbdev->pagesize_2mb = false;
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+ /* Set it to the default based on which GPU is present */
+ kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+}
+
int kbase_mem_init(struct kbase_device *kbdev)
{
int err = 0;
struct kbasep_mem_device *memdev;
+ char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
#if IS_ENABLED(CONFIG_OF)
struct device_node *mgm_node = NULL;
#endif
@@ -1188,6 +1462,20 @@
memdev = &kbdev->memdev;
+ kbasep_mem_page_size_init(kbdev);
+
+ scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
+ kbdev->devname);
+
+ /* Initialize slab cache for kbase_va_regions */
+ kbdev->va_region_slab =
+ kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
+ if (kbdev->va_region_slab == NULL) {
+ dev_err(kbdev->dev, "Failed to create va_region_slab\n");
+ return -ENOMEM;
+ }
+
+ kbase_mem_migrate_init(kbdev);
kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
KBASE_MEM_POOL_MAX_SIZE_KCTX);
@@ -1250,8 +1538,7 @@
kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults,
KBASE_MEM_POOL_MAX_SIZE_KBDEV);
- err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev,
- &mem_pool_defaults, NULL);
+ err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL);
}
return err;
@@ -1277,6 +1564,11 @@
kbase_mem_pool_group_term(&kbdev->mem_pools);
+ kbase_mem_migrate_term(kbdev);
+
+ kmem_cache_destroy(kbdev->va_region_slab);
+ kbdev->va_region_slab = NULL;
+
WARN_ON(kbdev->total_gpu_pages);
WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
@@ -1288,7 +1580,9 @@
KBASE_EXPORT_TEST_API(kbase_mem_term);
/**
- * Allocate a free region object.
+ * kbase_alloc_free_region - Allocate a free region object.
+ *
+ * @kbdev: kbase device
* @rbtree: Backlink to the red-black tree of memory regions.
* @start_pfn: The Page Frame Number in GPU virtual address space.
* @nr_pages: The size of the region in pages.
@@ -1299,9 +1593,10 @@
*
* zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
*
+ * Return: pointer to the allocated region object on success, NULL otherwise.
*/
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+ u64 start_pfn, size_t nr_pages, int zone)
{
struct kbase_va_region *new_reg;
@@ -1313,12 +1608,13 @@
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
- new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+ new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
if (!new_reg)
return NULL;
- new_reg->va_refcnt = 1;
+ kbase_refcount_set(&new_reg->va_refcnt, 1);
+ atomic_set(&new_reg->no_user_free_count, 0);
new_reg->cpu_alloc = NULL; /* no alloc bound yet */
new_reg->gpu_alloc = NULL; /* no alloc bound yet */
new_reg->rbtree = rbtree;
@@ -1337,35 +1633,9 @@
KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
-static struct kbase_context *kbase_reg_flags_to_kctx(
- struct kbase_va_region *reg)
-{
- struct kbase_context *kctx = NULL;
- struct rb_root *rbtree = reg->rbtree;
-
- switch (reg->flags & KBASE_REG_ZONE_MASK) {
- case KBASE_REG_ZONE_CUSTOM_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_custom);
- break;
- case KBASE_REG_ZONE_SAME_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_same);
- break;
- case KBASE_REG_ZONE_EXEC_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_exec);
- break;
- default:
- WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
- break;
- }
-
- return kctx;
-}
-
/**
- * Free a region object.
+ * kbase_free_alloced_region - Free a region object.
+ *
* @reg: Region
*
* The described region must be freed of any mapping.
@@ -1374,6 +1644,7 @@
* alloc object will be released.
* It is a bug if no alloc object exists for non-free regions.
*
+ * If region is KBASE_REG_ZONE_MCU_SHARED it is freed
*/
void kbase_free_alloced_region(struct kbase_va_region *reg)
{
@@ -1397,6 +1668,13 @@
(void *)reg);
#if MALI_USE_CSF
if (reg->flags & KBASE_REG_CSF_EVENT)
+ /*
+ * This should not be reachable if called from 'mcu_shared' functions
+ * such as:
+ * kbase_csf_firmware_mcu_shared_mapping_init
+ * kbase_csf_firmware_mcu_shared_mapping_term
+ */
+
kbase_unlink_event_mem_page(kctx, reg);
#endif
@@ -1410,8 +1688,6 @@
* on the list at termination time of the region tracker.
*/
if (!list_empty(®->gpu_alloc->evict_node)) {
- mutex_unlock(&kctx->jit_evict_lock);
-
/*
* Unlink the physical allocation before unmaking it
* evictable so that the allocation isn't grown back to
@@ -1421,6 +1697,8 @@
reg->cpu_alloc->reg = NULL;
if (reg->cpu_alloc != reg->gpu_alloc)
reg->gpu_alloc->reg = NULL;
+
+ mutex_unlock(&kctx->jit_evict_lock);
/*
* If a region has been made evictable then we must
@@ -1457,7 +1735,9 @@
KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
-int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 addr, size_t nr_pages, size_t align,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
int err;
size_t i = 0;
@@ -1494,41 +1774,46 @@
KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
for (i = 0; i < alloc->imported.alias.nents; i++) {
if (alloc->imported.alias.aliased[i].alloc) {
- err = kbase_mmu_insert_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn + (i * stride),
- alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
- alloc->imported.alias.aliased[i].length,
- reg->flags & gwt_mask,
- kctx->as_nr,
- group_id);
+ err = kbase_mmu_insert_aliased_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+ alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset,
+ alloc->imported.alias.aliased[i].length,
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
+ NULL);
if (err)
- goto bad_insert;
+ goto bad_aliased_insert;
/* Note: mapping count is tracked at alias
* creation time
*/
} else {
- err = kbase_mmu_insert_single_page(kctx,
- reg->start_pfn + i * stride,
- kctx->aliasing_sink_page,
+ err = kbase_mmu_insert_single_aliased_page(
+ kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
alloc->imported.alias.aliased[i].length,
- (reg->flags & mask & gwt_mask) | attr,
- group_id);
+ (reg->flags & mask & gwt_mask) | attr, group_id,
+ mmu_sync_info);
if (err)
- goto bad_insert;
+ goto bad_aliased_insert;
}
}
} else {
- err = kbase_mmu_insert_pages(kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask,
- kctx->as_nr,
- group_id);
+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+
+ err = kbase_mmu_insert_imported_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
+ } else {
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id,
+ mmu_sync_info, reg, true);
+ }
+
if (err)
goto bad_insert;
kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1538,9 +1823,9 @@
!WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
- /* For padded imported dma-buf memory, map the dummy aliasing
- * page from the end of the dma-buf pages, to the end of the
- * region using a read only mapping.
+ /* For padded imported dma-buf or user-buf memory, map the dummy
+ * aliasing page from the end of the imported pages, to the end of
+ * the region using a read only mapping.
*
* Only map when it's imported dma-buf memory that is currently
* mapped.
@@ -1548,37 +1833,46 @@
* Assume reg->gpu_alloc->nents is the number of actual pages
* in the dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(kctx,
- reg->start_pfn + reg->gpu_alloc->nents,
- kctx->aliasing_sink_page,
- reg->nr_pages - reg->gpu_alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) &
- ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK);
+ err = kbase_mmu_insert_single_imported_page(
+ kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
+ reg->nr_pages - reg->gpu_alloc->nents,
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+ mmu_sync_info);
if (err)
goto bad_insert;
}
return err;
-bad_insert:
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, reg->nr_pages,
- kctx->as_nr);
+bad_aliased_insert:
+ while (i-- > 0) {
+ struct tagged_addr *phys_alloc = NULL;
+ u64 const stride = alloc->imported.alias.stride;
- kbase_remove_va_region(reg);
+ if (alloc->imported.alias.aliased[i].alloc != NULL)
+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset;
+
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+ phys_alloc, alloc->imported.alias.aliased[i].length,
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
+ false);
+ }
+bad_insert:
+ kbase_remove_va_region(kctx->kbdev, reg);
return err;
}
KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
-static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
- struct kbase_mem_phy_alloc *alloc, bool writeable);
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
+ struct kbase_va_region *reg, bool writeable);
int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
{
int err = 0;
+ struct kbase_mem_phy_alloc *alloc;
if (reg->start_pfn == 0)
return 0;
@@ -1586,43 +1880,98 @@
if (!reg->gpu_alloc)
return -EINVAL;
- /* Tear down down GPU page tables, depending on memory type. */
- switch (reg->gpu_alloc->type) {
- case KBASE_MEM_TYPE_ALIAS: /* Fall-through */
- case KBASE_MEM_TYPE_IMPORTED_UMM:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, reg->nr_pages, kctx->as_nr);
+ alloc = reg->gpu_alloc;
+
+ /* Tear down GPU page tables, depending on memory type. */
+ switch (alloc->type) {
+ case KBASE_MEM_TYPE_ALIAS: {
+ size_t i = 0;
+ /* Due to the way the number of valid PTEs and ATEs are tracked
+ * currently, only the GPU virtual range that is backed & mapped
+ * should be passed to the kbase_mmu_teardown_pages() function,
+ * hence individual aliased regions needs to be unmapped
+ * separately.
+ */
+ for (i = 0; i < alloc->imported.alias.nents; i++) {
+ struct tagged_addr *phys_alloc = NULL;
+ int err_loop;
+
+ if (alloc->imported.alias.aliased[i].alloc != NULL)
+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset;
+
+ err_loop = kbase_mmu_teardown_pages(
+ kctx->kbdev, &kctx->mmu,
+ reg->start_pfn + (i * alloc->imported.alias.stride),
+ phys_alloc, alloc->imported.alias.aliased[i].length,
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
+ false);
+
+ if (WARN_ON_ONCE(err_loop))
+ err = err_loop;
+ }
+ }
break;
- default:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, kbase_reg_current_backed_size(reg),
- kctx->as_nr);
+ case KBASE_MEM_TYPE_IMPORTED_UMM: {
+ size_t nr_phys_pages = reg->nr_pages;
+ size_t nr_virt_pages = reg->nr_pages;
+ /* If the region has import padding and falls under the threshold for
+ * issuing a partial GPU cache flush, we want to reduce the number of
+ * physical pages that get flushed.
+
+ * This is symmetric with case of mapping the memory, which first maps
+ * each imported physical page to a separate virtual page, and then
+ * maps the single aliasing sink page to each of the virtual padding
+ * pages.
+ */
+ if (reg->flags & KBASE_REG_IMPORT_PAD)
+ nr_phys_pages = alloc->nents + 1;
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_phys_pages, nr_virt_pages,
+ kctx->as_nr, true);
+ }
+ break;
+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_reg_pages, nr_reg_pages,
+ kctx->as_nr, true);
+ }
+ break;
+ default: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_reg_pages, nr_reg_pages,
+ kctx->as_nr, false);
+ }
break;
}
/* Update tracking, and other cleanup, depending on memory type. */
- switch (reg->gpu_alloc->type) {
+ switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS:
/* We mark the source allocs as unmapped from the GPU when
* putting reg's allocs
*/
break;
case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
- struct kbase_alloc_import_user_buf *user_buf =
- ®->gpu_alloc->imported.user_buf;
+ struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf;
- if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
- user_buf->current_mapping_usage_count &=
- ~PINNED_ON_IMPORT;
+ if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+ user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT;
- /* The allocation could still have active mappings. */
- if (user_buf->current_mapping_usage_count == 0) {
- kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
- (reg->flags & KBASE_REG_GPU_WR));
- }
+ /* The allocation could still have active mappings. */
+ if (user_buf->current_mapping_usage_count == 0) {
+ kbase_jd_user_buf_unmap(kctx, alloc, reg,
+ (reg->flags &
+ (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)));
}
}
- /* Fall-through */
+ }
+ fallthrough;
default:
kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
break;
@@ -1741,7 +2090,8 @@
BUG_ON(!cpu_page);
BUG_ON(offset + size > PAGE_SIZE);
- dma_addr = kbase_dma_addr(cpu_page) + offset;
+ dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset;
+
if (sync_fn == KBASE_SYNC_TO_CPU)
dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
size, DMA_BIDIRECTIONAL);
@@ -1752,29 +2102,30 @@
void *src = NULL;
void *dst = NULL;
struct page *gpu_page;
+ dma_addr_t dma_addr;
if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
return;
gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+ dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset;
if (sync_fn == KBASE_SYNC_TO_DEVICE) {
src = ((unsigned char *)kmap(cpu_page)) + offset;
dst = ((unsigned char *)kmap(gpu_page)) + offset;
} else if (sync_fn == KBASE_SYNC_TO_CPU) {
- dma_sync_single_for_cpu(kctx->kbdev->dev,
- kbase_dma_addr(gpu_page) + offset,
- size, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size,
+ DMA_BIDIRECTIONAL);
src = ((unsigned char *)kmap(gpu_page)) + offset;
dst = ((unsigned char *)kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
kunmap(gpu_page);
kunmap(cpu_page);
if (sync_fn == KBASE_SYNC_TO_DEVICE)
- dma_sync_single_for_device(kctx->kbdev->dev,
- kbase_dma_addr(gpu_page) + offset,
- size, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size,
+ DMA_BIDIRECTIONAL);
}
}
@@ -1920,29 +2271,27 @@
__func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
- if (reg->flags & KBASE_REG_NO_USER_FREE) {
+ if (kbase_va_region_is_no_user_free(reg)) {
dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
return -EINVAL;
}
- /*
- * Unlink the physical allocation before unmaking it evictable so
- * that the allocation isn't grown back to its last backed size
- * as we're going to unmap it anyway.
- */
- reg->cpu_alloc->reg = NULL;
- if (reg->cpu_alloc != reg->gpu_alloc)
- reg->gpu_alloc->reg = NULL;
-
- /*
- * If a region has been made evictable then we must unmake it
+ /* If a region has been made evictable then we must unmake it
* before trying to free it.
* If the memory hasn't been reclaimed it will be unmapped and freed
* below, if it has been reclaimed then the operations below are no-ops.
*/
if (reg->flags & KBASE_REG_DONT_NEED) {
- KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
- KBASE_MEM_TYPE_NATIVE);
+ WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE);
+ mutex_lock(&kctx->jit_evict_lock);
+ /* Unlink the physical allocation before unmaking it evictable so
+ * that the allocation isn't grown back to its last backed size
+ * as we're going to unmap it anyway.
+ */
+ reg->cpu_alloc->reg = NULL;
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ reg->gpu_alloc->reg = NULL;
+ mutex_unlock(&kctx->jit_evict_lock);
kbase_mem_evictable_unmake(reg->gpu_alloc);
}
@@ -1952,22 +2301,35 @@
goto out;
}
+#if MALI_USE_CSF
+ if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) ||
+ ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+ if (reg->flags & KBASE_REG_FIXED_ADDRESS)
+ atomic64_dec(&kctx->num_fixed_allocs);
+ else
+ atomic64_dec(&kctx->num_fixable_allocs);
+ }
+#endif
+
/* This will also free the physical pages */
kbase_free_alloced_region(reg);
- out:
+out:
return err;
}
KBASE_EXPORT_TEST_API(kbase_mem_free_region);
/**
- * Free the region from the GPU and unregister it.
+ * kbase_mem_free - Free the region from the GPU and unregister it.
+ *
* @kctx: KBase context
* @gpu_addr: GPU address to free
*
* This function implements the free operation on a memory segment.
* It will loudly fail if called with outstanding mappings.
+ *
+ * Return: 0 on success.
*/
int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
{
@@ -1979,12 +2341,14 @@
__func__, gpu_addr, (void *)kctx);
if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
- dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
+ dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__);
return -EINVAL;
}
if (gpu_addr == 0) {
- dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+ dev_warn(kctx->kbdev->dev,
+ "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n",
+ __func__);
return -EINVAL;
}
kbase_gpu_vm_lock(kctx);
@@ -2010,8 +2374,8 @@
/* Validate the region */
reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
if (kbase_is_region_invalid_or_free(reg)) {
- dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
- gpu_addr);
+ dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX",
+ __func__, gpu_addr);
err = -EINVAL;
goto out_unlock;
}
@@ -2026,7 +2390,7 @@
err = kbase_mem_free_region(kctx, reg);
}
- out_unlock:
+out_unlock:
kbase_gpu_vm_unlock(kctx);
return err;
}
@@ -2126,11 +2490,19 @@
if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
- if (flags & BASEP_MEM_NO_USER_FREE)
- reg->flags |= KBASE_REG_NO_USER_FREE;
+ if (flags & BASEP_MEM_NO_USER_FREE) {
+ kbase_gpu_vm_lock(kctx);
+ kbase_va_region_no_user_free_inc(reg);
+ kbase_gpu_vm_unlock(kctx);
+ }
if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
+
+#if MALI_USE_CSF
+ if (flags & BASE_MEM_FIXED)
+ reg->flags |= KBASE_REG_FIXED_ADDRESS;
+#endif
return 0;
}
@@ -2174,18 +2546,14 @@
tp = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
/* Check if we have enough pages requested so we can allocate a large
* page (512 * 4KB = 2MB )
*/
- if (nr_left >= (SZ_2M / SZ_4K)) {
+ if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
- res = kbase_mem_pool_alloc_pages(
- &kctx->mem_pools.large[alloc->group_id],
- nr_lp * (SZ_2M / SZ_4K),
- tp,
- true);
+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
+ nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task);
if (res > 0) {
nr_left -= res;
@@ -2239,7 +2607,7 @@
err = kbase_mem_pool_grow(
&kctx->mem_pools.large[alloc->group_id],
- 1);
+ 1, kctx->task);
if (err)
break;
} while (1);
@@ -2280,13 +2648,11 @@
}
}
}
-no_new_partial:
-#endif
+no_new_partial:
if (nr_left) {
- res = kbase_mem_pool_alloc_pages(
- &kctx->mem_pools.small[alloc->group_id],
- nr_left, tp, false);
+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
+ tp, false, kctx->task);
if (res <= 0)
goto alloc_failed;
}
@@ -2345,17 +2711,16 @@
lockdep_assert_held(&pool->pool_lock);
-#if !defined(CONFIG_MALI_2MB_ALLOC)
- WARN_ON(pool->order);
-#endif
+ kctx = alloc->imported.native.kctx;
+ kbdev = kctx->kbdev;
+
+ if (!kbdev->pagesize_2mb)
+ WARN_ON(pool->order);
if (alloc->reg) {
if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
goto invalid_request;
}
-
- kctx = alloc->imported.native.kctx;
- kbdev = kctx->kbdev;
lockdep_assert_held(&kctx->mem_partials_lock);
@@ -2375,8 +2740,7 @@
tp = alloc->pages + alloc->nents;
new_pages = tp;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
res = kbase_mem_pool_alloc_pages_locked(pool,
@@ -2460,15 +2824,12 @@
if (nr_left)
goto alloc_failed;
} else {
-#endif
res = kbase_mem_pool_alloc_pages_locked(pool,
nr_left,
tp);
if (res <= 0)
goto alloc_failed;
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
KBASE_TLSTREAM_AUX_PAGESALLOC(
kbdev,
@@ -2489,8 +2850,7 @@
struct tagged_addr *start_free = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
while (nr_pages_to_free) {
if (is_huge_head(*start_free)) {
kbase_mem_pool_free_pages_locked(
@@ -2508,15 +2868,12 @@
}
}
} else {
-#endif
kbase_mem_pool_free_pages_locked(pool,
nr_pages_to_free,
start_free,
false, /* not dirty */
true); /* return to pool */
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
}
kbase_process_page_usage_dec(kctx, nr_pages_requested);
@@ -2778,6 +3135,13 @@
/**
* kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer.
* @alloc: The allocation for the imported user buffer.
+ *
+ * This must only be called when terminating an alloc, when its refcount
+ * (number of users) has become 0. This also ensures it is only called once all
+ * CPU mappings have been closed.
+ *
+ * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active
+ * allocations
*/
static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc);
#endif
@@ -2908,8 +3272,31 @@
out_term:
return -1;
}
-
KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+ enum kbase_page_status status)
+{
+ u32 i = 0;
+
+ for (; i < alloc->nents; i++) {
+ struct tagged_addr phys = alloc->pages[i];
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(phys) || is_partial(phys))
+ continue;
+
+ if (!page_md)
+ continue;
+
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
+ spin_unlock(&page_md->migrate_lock);
+ }
+}
bool kbase_check_alloc_flags(unsigned long flags)
{
@@ -2983,6 +3370,14 @@
(BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
return false;
+#if MALI_USE_CSF
+ if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED)))
+ return false;
+
+ if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED))
+ return false;
+#endif
+
return true;
}
@@ -3004,7 +3399,11 @@
if (flags & BASE_MEM_GROW_ON_GPF)
return false;
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+ /* Imported memory cannot be fixed */
+ if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)))
+ return false;
+#else
/* Imported memory cannot be aligned to the end of its initial commit */
if (flags & BASE_MEM_TILER_ALIGN_TOP)
return false;
@@ -3139,10 +3538,6 @@
#undef KBASE_MSG_PRE
}
-/**
- * Acquire the per-context region list lock
- * @kctx: KBase context
- */
void kbase_gpu_vm_lock(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -3151,10 +3546,6 @@
KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
-/**
- * Release the per-context region list lock
- * @kctx: KBase context
- */
void kbase_gpu_vm_unlock(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -3165,7 +3556,7 @@
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_jit_debugfs_data {
- int (*func)(struct kbase_jit_debugfs_data *);
+ int (*func)(struct kbase_jit_debugfs_data *data);
struct mutex lock;
struct kbase_context *kctx;
u64 active_value;
@@ -3388,14 +3779,9 @@
void kbase_jit_debugfs_init(struct kbase_context *kctx)
{
/* prevent unprivileged use of debug file system
- * in old kernel version
- */
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
- /* only for newer kernel version debug file system is safe */
+ * in old kernel version
+ */
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
/* Caller already ensures this, but we keep the pattern for
* maintenance safety.
@@ -3469,7 +3855,15 @@
mutex_unlock(&kctx->jit_evict_lock);
kbase_gpu_vm_lock(kctx);
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
+
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(®->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
kbase_gpu_vm_unlock(kctx);
} while (1);
@@ -3484,6 +3878,7 @@
INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
#if MALI_USE_CSF
+ mutex_init(&kctx->csf.kcpu_queues.jit_lock);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues);
#else /* !MALI_USE_CSF */
@@ -3698,7 +4093,8 @@
static int kbase_jit_grow(struct kbase_context *kctx,
const struct base_jit_alloc_info *info,
struct kbase_va_region *reg,
- struct kbase_sub_alloc **prealloc_sas)
+ struct kbase_sub_alloc **prealloc_sas,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
size_t delta;
size_t pages_required;
@@ -3728,18 +4124,14 @@
delta = info->commit_pages - reg->gpu_alloc->nents;
pages_required = delta;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pages_required >= (SZ_2M / SZ_4K)) {
+ if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
pool = &kctx->mem_pools.large[kctx->jit_group_id];
/* Round up to number of 2 MB pages required */
pages_required += ((SZ_2M / SZ_4K) - 1);
pages_required /= (SZ_2M / SZ_4K);
} else {
-#endif
pool = &kctx->mem_pools.small[kctx->jit_group_id];
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
if (reg->cpu_alloc != reg->gpu_alloc)
pages_required *= 2;
@@ -3760,7 +4152,7 @@
spin_unlock(&kctx->mem_partials_lock);
kbase_gpu_vm_unlock(kctx);
- ret = kbase_mem_pool_grow(pool, pool_delta);
+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
kbase_gpu_vm_lock(kctx);
if (ret)
@@ -3795,7 +4187,7 @@
spin_unlock(&kctx->mem_partials_lock);
ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
- old_size);
+ old_size, mmu_sync_info);
/*
* The grow failed so put the allocation back in the
* pool and return failure.
@@ -3920,11 +4312,11 @@
const struct base_jit_alloc_info *info,
bool ignore_pressure_limit)
{
-#if MALI_USE_CSF
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-#else
+#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
-#endif
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
#if MALI_JIT_PRESSURE_LIMIT_BASE
if (!ignore_pressure_limit &&
@@ -4010,23 +4402,28 @@
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
int i;
-#if MALI_USE_CSF
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-#else
+ /* Calls to this function are inherently synchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
+
+#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
-#endif
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
return NULL;
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate memory for the sub-allocation structs */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
- if (!prealloc_sas[i])
- goto end;
+ if (kctx->kbdev->pagesize_2mb) {
+ /* Preallocate memory for the sub-allocation structs */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+ if (!prealloc_sas[i])
+ goto end;
+ }
}
-#endif
kbase_gpu_vm_lock(kctx);
mutex_lock(&kctx->jit_evict_lock);
@@ -4102,7 +4499,8 @@
* so any state protected by that lock might need to be
* re-evaluated if more code is added here in future.
*/
- ret = kbase_jit_grow(kctx, info, reg, prealloc_sas);
+ ret = kbase_jit_grow(kctx, info, reg, prealloc_sas,
+ mmu_sync_info);
#if MALI_JIT_PRESSURE_LIMIT_BASE
if (!ignore_pressure_limit)
@@ -4114,7 +4512,7 @@
if (ret < 0) {
/*
* An update to an allocation from the pool failed,
- * chances are slim a new allocation would fair any
+ * chances are slim a new allocation would fare any
* better so return the allocation to the pool and
* return the function with failure.
*/
@@ -4136,6 +4534,17 @@
mutex_unlock(&kctx->jit_evict_lock);
reg = NULL;
goto end;
+ } else {
+ /* A suitable JIT allocation existed on the evict list, so we need
+ * to make sure that the NOT_MOVABLE property is cleared.
+ */
+ if (kbase_page_migration_enabled) {
+ kbase_gpu_vm_lock(kctx);
+ mutex_lock(&kctx->jit_evict_lock);
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
+ mutex_unlock(&kctx->jit_evict_lock);
+ kbase_gpu_vm_unlock(kctx);
+ }
}
} else {
/* No suitable JIT allocation was found so create a new one */
@@ -4150,7 +4559,7 @@
flags |= BASE_MEM_TILER_ALIGN_TOP;
#endif /* !MALI_USE_CSF */
- flags |= base_mem_group_id_set(kctx->jit_group_id);
+ flags |= kbase_mem_group_id_set(kctx->jit_group_id);
#if MALI_JIT_PRESSURE_LIMIT_BASE
if (!ignore_pressure_limit) {
flags |= BASEP_MEM_PERFORM_JIT_TRIM;
@@ -4165,8 +4574,8 @@
mutex_unlock(&kctx->jit_evict_lock);
kbase_gpu_vm_unlock(kctx);
- reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
- info->extension, &flags, &gpu_addr);
+ reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension,
+ &flags, &gpu_addr, mmu_sync_info);
if (!reg) {
/* Most likely not enough GPU virtual space left for
* the new JIT allocation.
@@ -4192,6 +4601,29 @@
}
}
+ /* Similarly to tiler heap init, there is a short window of time
+ * where the (either recycled or newly allocated, in our case) region has
+ * "no user free" count incremented but is still missing the DONT_NEED flag, and
+ * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
+ * allocation is the least bad option that doesn't lead to a security issue down the
+ * line (it will eventually be cleaned up during context termination).
+ *
+ * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region
+ * flags.
+ */
+ kbase_gpu_vm_lock(kctx);
+ if (unlikely(atomic_read(®->no_user_free_count) > 1)) {
+ kbase_gpu_vm_unlock(kctx);
+ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");
+
+ mutex_lock(&kctx->jit_evict_lock);
+ list_move(®->jit_node, &kctx->jit_pool_head);
+ mutex_unlock(&kctx->jit_evict_lock);
+
+ reg = NULL;
+ goto end;
+ }
+
trace_mali_jit_alloc(reg, info->id);
kctx->jit_current_allocations++;
@@ -4209,6 +4641,7 @@
kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+ kbase_gpu_vm_unlock(kctx);
end:
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
@@ -4220,6 +4653,12 @@
void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
{
u64 old_pages;
+
+#if !MALI_USE_CSF
+ lockdep_assert_held(&kctx->jctx.lock);
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
/* JIT id not immediately available here, so use 0u */
trace_mali_jit_free(reg, 0u);
@@ -4273,6 +4712,12 @@
list_move(®->jit_node, &kctx->jit_pool_head);
+ /* Inactive JIT regions should be freed by the shrinker and not impacted
+ * by page migration. Once freed, they will enter into the page migration
+ * state machine via the mempools.
+ */
+ if (kbase_page_migration_enabled)
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
mutex_unlock(&kctx->jit_evict_lock);
}
@@ -4319,7 +4764,14 @@
mutex_unlock(&kctx->jit_evict_lock);
if (reg) {
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(®->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
}
@@ -4341,7 +4793,14 @@
list_del(&walker->jit_node);
list_del_init(&walker->gpu_alloc->evict_node);
mutex_unlock(&kctx->jit_evict_lock);
- walker->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4353,7 +4812,14 @@
list_del(&walker->jit_node);
list_del_init(&walker->gpu_alloc->evict_node);
mutex_unlock(&kctx->jit_evict_lock);
- walker->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4396,8 +4862,8 @@
addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset;
- ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
- &mapping);
+ ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
+ KBASE_REG_CPU_RD, &mapping);
if (!ptr) {
dev_warn(kctx->kbdev->dev,
"%s: JIT start=0x%llx unable to map memory near end pointer %llx\n",
@@ -4455,17 +4921,44 @@
}
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+void kbase_unpin_user_buf_page(struct page *page)
+{
+#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
+ put_page(page);
+#else
+ unpin_user_page(page);
+#endif
+}
+
#if MALI_USE_CSF
static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
{
- if (alloc->nents) {
+ /* In CSF builds, we keep pages pinned until the last reference is
+ * released on the alloc. A refcount of 0 also means we can be sure
+ * that all CPU mappings have been closed on this alloc, and no more
+ * mappings of it will be created.
+ *
+ * Further, the WARN() below captures the restriction that this
+ * function will not handle anything other than the alloc termination
+ * path, because the caller of kbase_mem_phy_alloc_put() is not
+ * required to hold the kctx's reg_lock, and so we could not handle
+ * removing an existing CPU mapping here.
+ *
+ * Refer to this function's kernel-doc comments for alternatives for
+ * unpinning a User buffer.
+ */
+
+ if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0,
+ "must only be called on terminating an allocation")) {
struct page **pages = alloc->imported.user_buf.pages;
long i;
WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages);
for (i = 0; i < alloc->nents; i++)
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
+
+ alloc->nents = 0;
}
}
#endif
@@ -4479,6 +4972,9 @@
struct mm_struct *mm = alloc->imported.user_buf.mm;
long pinned_pages;
long i;
+ int write;
+
+ lockdep_assert_held(&kctx->reg_lock);
if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF))
return -EINVAL;
@@ -4493,44 +4989,28 @@
if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm))
return -EINVAL;
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
- pinned_pages = get_user_pages(NULL, mm,
- address,
- alloc->imported.user_buf.nr_pages,
-#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
-KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
- pages, NULL);
+ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+
+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
+ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
+ write ? FOLL_WRITE : 0, pages, NULL);
+#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
+ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
+ write ? FOLL_WRITE : 0, pages, NULL, NULL);
#else
- reg->flags & KBASE_REG_GPU_WR,
- 0, pages, NULL);
-#endif
-#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
- pinned_pages = get_user_pages_remote(NULL, mm,
- address,
- alloc->imported.user_buf.nr_pages,
- reg->flags & KBASE_REG_GPU_WR,
- 0, pages, NULL);
-#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
- pinned_pages = get_user_pages_remote(NULL, mm,
- address,
- alloc->imported.user_buf.nr_pages,
- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
- pages, NULL);
-#else
- pinned_pages = get_user_pages_remote(NULL, mm,
- address,
- alloc->imported.user_buf.nr_pages,
- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
- pages, NULL, NULL);
+ pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages,
+ write ? FOLL_WRITE : 0, pages, NULL, NULL);
#endif
if (pinned_pages <= 0)
return pinned_pages;
if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+ /* Above code already ensures there will not have been a CPU
+ * mapping by ensuring alloc->nents is 0
+ */
for (i = 0; i < pinned_pages; i++)
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
return -ENOMEM;
}
@@ -4542,46 +5022,64 @@
static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct kbase_va_region *reg)
{
- long pinned_pages;
+ int err;
+ long pinned_pages = 0;
struct kbase_mem_phy_alloc *alloc;
struct page **pages;
struct tagged_addr *pa;
- long i;
- unsigned long address;
+ long i, dma_mapped_pages;
struct device *dev;
- unsigned long offset;
- unsigned long local_size;
unsigned long gwt_mask = ~0;
- int err = kbase_jd_user_buf_pin_pages(kctx, reg);
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ err = kbase_jd_user_buf_pin_pages(kctx, reg);
if (err)
return err;
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset = address & ~PAGE_MASK;
- local_size = alloc->imported.user_buf.size;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < pinned_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
-
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
- if (dma_mapping_error(dev, dma_addr))
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
+ err = dma_mapping_error(dev, dma_addr);
+ if (err)
goto unwind;
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -4589,24 +5087,46 @@
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- pa, kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr,
- alloc->group_id);
+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+ mmu_sync_info, NULL);
if (err == 0)
return 0;
/* fall down */
unwind:
alloc->nents = 0;
- while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_mapped_pages = i;
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
+ for (i = 0; i < dma_mapped_pages; i++) {
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
}
- while (++i < pinned_pages) {
- put_page(pages[i]);
+ /* The user buffer could already have been previously pinned before
+ * entering this function, and hence there could potentially be CPU
+ * mappings of it
+ */
+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages);
+
+ for (i = 0; i < pinned_pages; i++) {
+ kbase_unpin_user_buf_page(pages[i]);
pages[i] = NULL;
}
@@ -4617,30 +5137,118 @@
* GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
* have a corresponding call to kbase_jd_user_buf_unpin_pages().
*/
-static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
- struct kbase_mem_phy_alloc *alloc, bool writeable)
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
+ struct kbase_va_region *reg, bool writeable)
{
long i;
struct page **pages;
- unsigned long size = alloc->imported.user_buf.size;
+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
+ unsigned long remaining_size = alloc->imported.user_buf.size;
+
+ lockdep_assert_held(&kctx->reg_lock);
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
pages = alloc->imported.user_buf.pages;
+
+#if !MALI_USE_CSF
+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents);
+#else
+ CSTD_UNUSED(reg);
+#endif
+
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long local_size;
+ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
- dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
- DMA_BIDIRECTIONAL);
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
+ * | |/
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+#else
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
- put_page(pages[i]);
+ kbase_unpin_user_buf_page(pages[i]);
pages[i] = NULL;
#endif
- size -= local_size;
+ remaining_size -= imported_size;
+ offset_within_page = 0;
}
#if !MALI_USE_CSF
alloc->nents = 0;
@@ -4687,11 +5295,11 @@
return 0;
}
-struct kbase_mem_phy_alloc *kbase_map_external_resource(
- struct kbase_context *kctx, struct kbase_va_region *reg,
- struct mm_struct *locked_mm)
+int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg,
+ struct mm_struct *locked_mm)
{
- int err;
+ int err = 0;
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
lockdep_assert_held(&kctx->reg_lock);
@@ -4700,7 +5308,7 @@
case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) &&
(!reg->gpu_alloc->nents))
- goto exit;
+ return -EINVAL;
reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
if (reg->gpu_alloc->imported.user_buf
@@ -4708,7 +5316,7 @@
err = kbase_jd_user_buf_map(kctx, reg);
if (err) {
reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
- goto exit;
+ return err;
}
}
}
@@ -4716,21 +5324,30 @@
case KBASE_MEM_TYPE_IMPORTED_UMM: {
err = kbase_mem_umm_map(kctx, reg);
if (err)
- goto exit;
+ return err;
break;
}
default:
- goto exit;
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid external resource GPU allocation type (%x) on mapping",
+ alloc->type);
+ return -EINVAL;
}
- return kbase_mem_phy_alloc_get(reg->gpu_alloc);
-exit:
- return NULL;
+ kbase_va_region_alloc_get(kctx, reg);
+ kbase_mem_phy_alloc_get(alloc);
+ return err;
}
-void kbase_unmap_external_resource(struct kbase_context *kctx,
- struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg)
{
+ /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the
+ * unmapping operation.
+ */
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
switch (alloc->type) {
case KBASE_MEM_TYPE_IMPORTED_UMM: {
kbase_mem_umm_unmap(kctx, reg, alloc);
@@ -4742,26 +5359,33 @@
if (alloc->imported.user_buf.current_mapping_usage_count == 0) {
bool writeable = true;
- if (!kbase_is_region_invalid_or_free(reg) &&
- reg->gpu_alloc == alloc)
- kbase_mmu_teardown_pages(
- kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- kbase_reg_current_backed_size(reg),
- kctx->as_nr);
+ if (!kbase_is_region_invalid_or_free(reg)) {
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages,
+ kbase_reg_current_backed_size(reg),
+ kbase_reg_current_backed_size(reg),
+ kctx->as_nr, true);
+ }
- if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+ if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
writeable = false;
- kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+ kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable);
}
- }
+ }
break;
default:
- break;
+ WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping",
+ alloc->type);
+ return;
}
kbase_mem_phy_alloc_put(alloc);
+ kbase_va_region_alloc_put(kctx, reg);
+}
+
+static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg)
+{
+ return reg->start_pfn << PAGE_SHIFT;
}
struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
@@ -4777,7 +5401,7 @@
* metadata which matches the region which is being acquired.
*/
list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
- if (walker->gpu_addr == gpu_addr) {
+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) {
meta = walker;
meta->ref++;
break;
@@ -4789,8 +5413,7 @@
struct kbase_va_region *reg;
/* Find the region */
- reg = kbase_region_tracker_find_region_enclosing_address(
- kctx, gpu_addr);
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
if (kbase_is_region_invalid_or_free(reg))
goto failed;
@@ -4798,18 +5421,18 @@
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
if (!meta)
goto failed;
-
/*
* Fill in the metadata object and acquire a reference
* for the physical resource.
*/
- meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
- meta->ref = 1;
+ meta->reg = reg;
- if (!meta->alloc)
+ /* Map the external resource to the GPU allocation of the region
+ * and acquire the reference to the VA region
+ */
+ if (kbase_map_external_resource(kctx, meta->reg, NULL))
goto fail_map;
-
- meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+ meta->ref = 1;
list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
}
@@ -4834,7 +5457,7 @@
* metadata which matches the region which is being released.
*/
list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node)
- if (walker->gpu_addr == gpu_addr)
+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr)
return walker;
return NULL;
@@ -4843,14 +5466,7 @@
static void release_sticky_resource_meta(struct kbase_context *kctx,
struct kbase_ctx_ext_res_meta *meta)
{
- struct kbase_va_region *reg;
-
- /* Drop the physical memory reference and free the metadata. */
- reg = kbase_region_tracker_find_region_enclosing_address(
- kctx,
- meta->gpu_addr);
-
- kbase_unmap_external_resource(kctx, reg, meta->alloc);
+ kbase_unmap_external_resource(kctx, meta->reg);
list_del(&meta->ext_res_node);
kfree(meta);
}
--
Gitblit v1.6.2