From 08f87f769b595151be1afeff53e144f543faa614 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 06 Dec 2023 09:51:13 +0000 Subject: [PATCH] add dts config --- kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c | 1077 +++++++++++++++++++++++++++++++++++++++-------------------- 1 files changed, 709 insertions(+), 368 deletions(-) diff --git a/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index a1565c1..f1251a4 100644 --- a/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/kernel/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,14 +31,13 @@ #include <linux/fs.h> #include <linux/version.h> #include <linux/dma-mapping.h> -#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) -#include <linux/dma-attrs.h> -#endif /* LINUX_VERSION_CODE < 4.8.0 */ #include <linux/dma-buf.h> #include <linux/shrinker.h> #include <linux/cache.h> #include <linux/memory_group_manager.h> - +#include <linux/math64.h> +#include <linux/migrate.h> +#include <linux/version.h> #include <mali_kbase.h> #include <mali_kbase_mem_linux.h> #include <tl/mali_kbase_tracepoints.h> @@ -86,23 +85,34 @@ #define IR_THRESHOLD_STEPS (256u) #if MALI_USE_CSF -static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, - struct vm_area_struct *vma); -static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, - struct vm_area_struct *vma); +static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma); +static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma); #endif -static int kbase_vmap_phy_pages(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 offset_bytes, size_t size, - struct kbase_vmap_struct *map); +static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, + kbase_vmap_flag vmap_flags); static void kbase_vunmap_phy_pages(struct kbase_context *kctx, struct kbase_vmap_struct *map); static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); -static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); +static bool is_process_exiting(struct vm_area_struct *vma) +{ + /* PF_EXITING flag can't be reliably used here for the detection + * of process exit, as 'mm_users' counter could still be non-zero + * when all threads of the process have exited. Later when the + * thread (which took a reference on the 'mm' of process that + * exited) drops it reference, the vm_ops->close method would be + * called for all the vmas (owned by 'mm' of process that exited) + * but the PF_EXITING flag may not be neccessarily set for the + * thread at that time. + */ + if (atomic_read(&vma->vm_mm->mm_users)) + return false; + + return true; +} /* Retrieve the associated region pointer if the GPU address corresponds to * one of the event memory pages. The enclosing region, if found, shouldn't @@ -184,20 +194,12 @@ reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) return -EINVAL; - if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - - atomic_read(&kctx->permanent_mapped_pages))) { - dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", - (u64)size, - KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, - atomic_read(&kctx->permanent_mapped_pages)); - return -ENOMEM; - } - kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); if (!kern_mapping) return -ENOMEM; - err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping); + err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping, + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); if (err < 0) goto vmap_fail; @@ -205,7 +207,6 @@ reg->flags &= ~KBASE_REG_GROWABLE; reg->cpu_alloc->permanent_map = kern_mapping; - atomic_add(size, &kctx->permanent_mapped_pages); return 0; vmap_fail: @@ -221,13 +222,6 @@ kfree(alloc->permanent_map); alloc->permanent_map = NULL; - - /* Mappings are only done on cpu_alloc, so don't need to worry about - * this being reduced a second time if a separate gpu_alloc is - * freed - */ - WARN_ON(alloc->nents > atomic_read(&kctx->permanent_mapped_pages)); - atomic_sub(alloc->nents, &kctx->permanent_mapped_pages); } void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, @@ -291,9 +285,9 @@ */ } -struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, - u64 va_pages, u64 commit_pages, - u64 extension, u64 *flags, u64 *gpu_va) +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, + u64 extension, u64 *flags, u64 *gpu_va, + enum kbase_caller_mmu_sync_info mmu_sync_info) { int zone; struct kbase_va_region *reg; @@ -310,19 +304,21 @@ va_pages, commit_pages, extension, *flags); #if MALI_USE_CSF - *gpu_va = 0; /* return 0 on failure */ + if (!(*flags & BASE_MEM_FIXED)) + *gpu_va = 0; /* return 0 on failure */ #else if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) *gpu_va = 0; /* return 0 on failure */ +#endif else - dev_err(dev, + dev_dbg(dev, "Keeping requested GPU VA of 0x%llx\n", (unsigned long long)*gpu_va); -#endif if (!kbase_check_alloc_flags(*flags)) { dev_warn(dev, - "kbase_mem_alloc called with bad flags (%llx)", + "%s called with bad flags (%llx)", + __func__, (unsigned long long)*flags); goto bad_flags; } @@ -344,7 +340,8 @@ } if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && !kbase_device_is_cpu_coherent(kctx->kbdev)) { - dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); + dev_warn(dev, "%s call required coherent mem when unavailable", + __func__); goto bad_flags; } if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && @@ -367,7 +364,20 @@ if (*flags & BASE_MEM_SAME_VA) { rbtree = &kctx->reg_rbtree_same; zone = KBASE_REG_ZONE_SAME_VA; - } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { + } +#if MALI_USE_CSF + /* fixed va_zone always exists */ + else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) { + if (*flags & BASE_MEM_PROT_GPU_EX) { + rbtree = &kctx->reg_rbtree_exec_fixed; + zone = KBASE_REG_ZONE_EXEC_FIXED_VA; + } else { + rbtree = &kctx->reg_rbtree_fixed; + zone = KBASE_REG_ZONE_FIXED_VA; + } + } +#endif + else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { rbtree = &kctx->reg_rbtree_exec; zone = KBASE_REG_ZONE_EXEC_VA; } else { @@ -375,8 +385,7 @@ zone = KBASE_REG_ZONE_CUSTOM_VA; } - reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va), - va_pages, zone); + reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone); if (!reg) { dev_err(dev, "Failed to allocate free region"); @@ -387,7 +396,7 @@ goto invalid_flags; if (kbase_reg_prepare_native(reg, kctx, - base_mem_group_id_get(*flags)) != 0) { + kbase_mem_group_id_get(*flags)) != 0) { dev_err(dev, "Failed to prepare region"); goto prepare_failed; } @@ -469,7 +478,26 @@ *gpu_va = (u64) cookie; } else /* we control the VA */ { - if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) { + size_t align = 1; + + if (kctx->kbdev->pagesize_2mb) { + /* If there's enough (> 33 bits) of GPU VA space, align to 2MB + * boundaries. The similar condition is used for mapping from + * the SAME_VA zone inside kbase_context_get_unmapped_area(). + */ + if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { + if (va_pages >= (SZ_2M / SZ_4K)) + align = (SZ_2M / SZ_4K); + } + if (*gpu_va) + align = 1; +#if !MALI_USE_CSF + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) + align = 1; +#endif /* !MALI_USE_CSF */ + } + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, + mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); kbase_gpu_vm_unlock(kctx); goto no_mmap; @@ -490,6 +518,14 @@ #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kbase_gpu_vm_unlock(kctx); + +#if MALI_USE_CSF + if (*flags & BASE_MEM_FIXABLE) + atomic64_inc(&kctx->num_fixable_allocs); + else if (*flags & BASE_MEM_FIXED) + atomic64_inc(&kctx->num_fixed_allocs); +#endif + return reg; no_mmap: @@ -600,11 +636,18 @@ #if MALI_USE_CSF if (KBASE_REG_CSF_EVENT & reg->flags) *out |= BASE_MEM_CSF_EVENT; + if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) || + ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { + if (KBASE_REG_FIXED_ADDRESS & reg->flags) + *out |= BASE_MEM_FIXED; + else + *out |= BASE_MEM_FIXABLE; + } #endif if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; - *out |= base_mem_group_id_set(reg->cpu_alloc->group_id); + *out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id); WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); @@ -629,24 +672,36 @@ * @s: Shrinker * @sc: Shrinker control * - * Return: Number of pages which can be freed. + * Return: Number of pages which can be freed or SHRINK_EMPTY if no page remains. */ static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_context *kctx; - - kctx = container_of(s, struct kbase_context, reclaim); + struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); + int evict_nents = atomic_read(&kctx->evict_nents); + unsigned long nr_freeable_items; WARN((sc->gfp_mask & __GFP_ATOMIC), "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n", sc->gfp_mask); WARN(in_atomic(), - "Shrinker called whilst in atomic context. The caller must switch to using GFP_ATOMIC or similar. gfp_mask==%x\n", + "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n", sc->gfp_mask); - return atomic_read(&kctx->evict_nents); + if (unlikely(evict_nents < 0)) { + dev_err(kctx->kbdev->dev, "invalid evict_nents(%d)", evict_nents); + nr_freeable_items = 0; + } else { + nr_freeable_items = evict_nents; + } + +#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE + if (nr_freeable_items == 0) + nr_freeable_items = SHRINK_EMPTY; +#endif + + return nr_freeable_items; } /** @@ -655,8 +710,8 @@ * @s: Shrinker * @sc: Shrinker control * - * Return: Number of pages freed (can be less then requested) or -1 if the - * shrinker failed to free pages in its pool. + * Return: Number of pages freed (can be less then requested) or + * SHRINK_STOP if reclaim isn't possible. * * Note: * This function accesses region structures without taking the region lock, @@ -684,17 +739,15 @@ list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { int err; + if (!alloc->reg) + continue; + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, 0, alloc->nents); - if (err != 0) { - /* - * Failed to remove GPU mapping, tell the shrinker - * to stop trying to shrink our slab even though we - * have pages in it. - */ - freed = -1; - goto out_unlock; - } + + /* Failed to remove GPU mapping, proceed to next one. */ + if (err != 0) + continue; /* * Update alloc->evicted before freeing the backing so the @@ -718,7 +771,7 @@ if (freed > sc->nr_to_scan) break; } -out_unlock: + mutex_unlock(&kctx->jit_evict_lock); return freed; @@ -738,7 +791,11 @@ * struct shrinker does not define batch */ kctx->reclaim.batch = 0; +#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE register_shrinker(&kctx->reclaim); +#else + register_shrinker(&kctx->reclaim, "mali-mem"); +#endif return 0; } @@ -802,6 +859,9 @@ lockdep_assert_held(&kctx->reg_lock); + /* Memory is in the process of transitioning to the shrinker, and + * should ignore migration attempts + */ kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, 0, gpu_alloc->nents); @@ -809,12 +869,17 @@ /* This allocation can't already be on a list. */ WARN_ON(!list_empty(&gpu_alloc->evict_node)); - /* - * Add the allocation to the eviction list, after this point the shrink + /* Add the allocation to the eviction list, after this point the shrink * can reclaim it. */ list_add(&gpu_alloc->evict_node, &kctx->evict_list); atomic_add(gpu_alloc->nents, &kctx->evict_nents); + + /* Indicate to page migration that the memory can be reclaimed by the shrinker. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); + mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); @@ -826,6 +891,11 @@ { struct kbase_context *kctx = gpu_alloc->imported.native.kctx; int err = 0; + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; lockdep_assert_held(&kctx->reg_lock); @@ -856,11 +926,20 @@ * pre-eviction size. */ if (!err) - err = kbase_mem_grow_gpu_mapping(kctx, - gpu_alloc->reg, - gpu_alloc->evicted, 0); + err = kbase_mem_grow_gpu_mapping( + kctx, gpu_alloc->reg, + gpu_alloc->evicted, 0, mmu_sync_info); gpu_alloc->evicted = 0; + + /* Since the allocation is no longer evictable, and we ensure that + * it grows back to its pre-eviction size, we will consider the + * state of it to be ALLOCATED_MAPPED, as that is the only state + * in which a physical allocation could transition to NOT_MOVABLE + * from. + */ + if (kbase_page_migration_enabled) + kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); } } @@ -911,6 +990,15 @@ /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations + * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. + * This would usually include JIT allocations, Tiler heap related allocations + * & GPU queue ringbuffer and none of them needs to be explicitly marked + * as evictable by Userspace. + */ + if (kbase_va_region_is_no_user_free(reg)) goto out_unlock; /* Is the region being transitioning between not needed and needed? */ @@ -1022,7 +1110,7 @@ struct kbase_va_region *reg, enum kbase_sync_type sync_fn) { int ret = -EINVAL; - struct dma_buf *dma_buf; + struct dma_buf __maybe_unused *dma_buf; enum dma_data_direction dir = DMA_BIDIRECTIONAL; lockdep_assert_held(&kctx->reg_lock); @@ -1066,19 +1154,7 @@ ret = 0; } #else - /* Though the below version check could be superfluous depending upon the version condition - * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow - * ease of modification for non-ION systems or systems where ION has been patched. - */ -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) - dma_buf_end_cpu_access(dma_buf, - 0, dma_buf->size, - dir); - ret = 0; -#else - ret = dma_buf_end_cpu_access(dma_buf, - dir); -#endif + ret = dma_buf_end_cpu_access(dma_buf, dir); #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; case KBASE_SYNC_TO_CPU: @@ -1095,11 +1171,7 @@ ret = 0; } #else - ret = dma_buf_begin_cpu_access(dma_buf, -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) - 0, dma_buf->size, -#endif - dir); + ret = dma_buf_begin_cpu_access(dma_buf, dir); #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; } @@ -1218,6 +1290,11 @@ struct kbase_mem_phy_alloc *alloc; unsigned long gwt_mask = ~0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kctx->reg_lock); alloc = reg->gpu_alloc; @@ -1244,14 +1321,11 @@ gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, - kctx->as_nr, - alloc->group_id); + err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, + mmu_sync_info, NULL); if (err) goto bad_insert; @@ -1264,13 +1338,11 @@ * Assume alloc->nents is the number of actual pages in the * dma-buf memory. */ - err = kbase_mmu_insert_single_page(kctx, - reg->start_pfn + alloc->nents, - kctx->aliasing_sink_page, - reg->nr_pages - alloc->nents, - (reg->flags | KBASE_REG_GPU_RD) & - ~KBASE_REG_GPU_WR, - KBASE_MEM_GROUP_SINK); + err = kbase_mmu_insert_single_imported_page( + kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page, + reg->nr_pages - alloc->nents, + (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, + mmu_sync_info); if (err) goto bad_pad_insert; } @@ -1278,11 +1350,8 @@ return 0; bad_pad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - alloc->nents, - kctx->as_nr); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + alloc->nents, alloc->nents, kctx->as_nr, true); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1310,11 +1379,9 @@ if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { int err; - err = kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - reg->nr_pages, - kctx->as_nr); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, reg->nr_pages, + kctx->as_nr, true); WARN_ON(err); } @@ -1386,6 +1453,9 @@ return NULL; } + if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) + return NULL; + /* ignore SAME_VA */ *flags &= ~BASE_MEM_SAME_VA; @@ -1406,23 +1476,21 @@ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) need_sync = true; -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_ctx_compat_mode(kctx)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. */ shared_zone = true; } -#endif if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, - 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages, + KBASE_REG_ZONE_SAME_VA); } else { - reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages, + KBASE_REG_ZONE_CUSTOM_VA); } if (!reg) { @@ -1507,7 +1575,7 @@ struct kbase_context *kctx, unsigned long address, unsigned long size, u64 *va_pages, u64 *flags) { - long i; + long i, dma_mapped_pages; struct kbase_va_region *reg; struct rb_root *rbtree; long faulted_pages; @@ -1516,6 +1584,8 @@ u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + struct tagged_addr *pa; + struct device *dev; int write; /* Flag supported only for dma-buf imported memory */ @@ -1553,21 +1623,22 @@ /* 64-bit address range is the max */ goto bad_size; + if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages)) + goto bad_size; + /* SAME_VA generally not supported with imported memory (no known use cases) */ *flags &= ~BASE_MEM_SAME_VA; if (*flags & BASE_MEM_IMPORT_SHARED) shared_zone = true; -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_ctx_compat_mode(kctx)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. */ shared_zone = true; } -#endif if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; @@ -1576,7 +1647,7 @@ } else rbtree = &kctx->reg_rbtree_custom; - reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone); + reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone); if (!reg) goto no_region; @@ -1602,11 +1673,7 @@ user_buf->address = address; user_buf->nr_pages = *va_pages; user_buf->mm = current->mm; -#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE - atomic_inc(¤t->mm->mm_count); -#else - mmgrab(current->mm); -#endif + kbase_mem_mmgrab(); if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); else @@ -1632,20 +1699,21 @@ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE - faulted_pages = get_user_pages(current, current->mm, address, *va_pages, -#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ -KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE - write ? FOLL_WRITE : 0, pages, NULL); -#else - write, 0, pages, NULL); -#endif -#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE - faulted_pages = get_user_pages(address, *va_pages, - write, 0, pages, NULL); -#else +#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE faulted_pages = get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); +#else + /* pin_user_pages function cannot be called with pages param NULL. + * get_user_pages function will be used instead because it is safe to be + * used with NULL pages param as long as it doesn't have FOLL_GET flag. + */ + if (pages != NULL) { + faulted_pages = + pin_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); + } else { + faulted_pages = + get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL); + } #endif up_read(kbase_mem_get_process_mmap_lock()); @@ -1656,31 +1724,44 @@ reg->gpu_alloc->nents = 0; reg->extension = 0; - if (pages) { - struct device *dev = kctx->kbdev->dev; - unsigned long local_size = user_buf->size; - unsigned long offset = user_buf->address & ~PAGE_MASK; - struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); + pa = kbase_get_gpu_phy_pages(reg); + dev = kctx->kbdev->dev; + if (pages) { /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + /* Manual CPU cache synchronization. + * + * The driver disables automatic CPU cache synchronization because the + * memory pages that enclose the imported region may also contain + * sub-regions which are not imported and that are allocated and used + * by the user process. This may be the case of memory at the beginning + * of the first page and at the end of the last page. Automatic CPU cache + * synchronization would force some operations on those memory allocations, + * unbeknown to the user process: in particular, a CPU cache invalidate + * upon unmapping would destroy the content of dirty CPU caches and cause + * the user process to lose CPU writes to the non-imported sub-regions. + * + * When the GPU claims ownership of the imported memory buffer, it shall + * commit CPU writes for the whole of all pages that enclose the imported + * region, otherwise the initial content of memory would be wrong. + */ for (i = 0; i < faulted_pages; i++) { dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); +#endif if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); } reg->gpu_alloc->nents = faulted_pages; @@ -1689,15 +1770,32 @@ return reg; unwind_dma_map: - while (i--) { - dma_unmap_page(kctx->kbdev->dev, - user_buf->dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_mapped_pages = i; + /* Run the unmap loop in the same order as map loop, and perform again + * CPU cache synchronization to re-write the content of dirty CPU caches + * to memory. This precautionary measure is kept here to keep this code + * aligned with kbase_jd_user_buf_map() to allow for a potential refactor + * in the future. + */ + for (i = 0; i < dma_mapped_pages; i++) { + dma_addr_t dma_addr = user_buf->dma_addrs[i]; + + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +#else + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); +#endif } fault_mismatch: if (pages) { + /* In this case, the region was not yet in the region tracker, + * and so there are no CPU mappings to remove before we unpin + * the page + */ for (i = 0; i < faulted_pages; i++) - put_page(pages[i]); + kbase_unpin_user_buf_page(pages[i]); } no_page_array: invalid_flags: @@ -1708,7 +1806,6 @@ no_region: bad_size: return NULL; - } @@ -1720,6 +1817,12 @@ u64 gpu_va; size_t i; bool coherent; + uint64_t max_stride; + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(flags); @@ -1733,7 +1836,8 @@ if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { dev_warn(kctx->kbdev->dev, - "kbase_mem_alias called with bad flags (%llx)", + "%s called with bad flags (%llx)", + __func__, (unsigned long long)*flags); goto bad_flags; } @@ -1746,6 +1850,11 @@ if (!nents) goto bad_nents; + max_stride = div64_u64(U64_MAX, nents); + + if (stride > max_stride) + goto bad_size; + if ((nents * stride) > (U64_MAX / PAGE_SIZE)) /* 64-bit address range is the max */ goto bad_size; @@ -1753,22 +1862,19 @@ /* calculate the number of pages this alias will cover */ *num_pages = nents * stride; -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages)) + goto bad_size; + + if (!kbase_ctx_compat_mode(kctx)) { /* 64-bit tasks must MMAP anyway, but not expose this address to * clients */ *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, - *num_pages, - KBASE_REG_ZONE_SAME_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages, + KBASE_REG_ZONE_SAME_VA); } else { -#else - if (1) { -#endif - reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - 0, *num_pages, - KBASE_REG_ZONE_CUSTOM_VA); + reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages, + KBASE_REG_ZONE_CUSTOM_VA); } if (!reg) @@ -1817,9 +1923,9 @@ /* validate found region */ if (kbase_is_region_invalid_or_free(aliasing_reg)) goto bad_handle; /* Not found/already free */ - if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + if (kbase_is_region_shrinkable(aliasing_reg)) goto bad_handle; /* Ephemeral region */ - if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE) + if (kbase_va_region_is_no_user_free(aliasing_reg)) goto bad_handle; /* JIT regions can't be * aliased. NO_USER_FREE flag * covers the entire lifetime @@ -1874,8 +1980,7 @@ } } -#if IS_ENABLED(CONFIG_64BIT) - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (!kbase_ctx_compat_mode(kctx)) { /* Bind to a cookie */ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); @@ -1890,11 +1995,10 @@ /* relocate to correct base */ gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); gpu_va <<= PAGE_SHIFT; - } else /* we control the VA */ { -#else - if (1) { -#endif - if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { + } else { + /* we control the VA */ + if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1, + mmu_sync_info) != 0) { dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); goto no_mmap; } @@ -1909,9 +2013,7 @@ return gpu_va; -#if IS_ENABLED(CONFIG_64BIT) no_cookie: -#endif no_mmap: bad_handle: /* Marking the source allocs as not being mapped on the GPU and putting @@ -1939,6 +2041,11 @@ { struct kbase_va_region *reg; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(gpu_va); KBASE_DEBUG_ASSERT(va_pages); @@ -1950,7 +2057,8 @@ if (!kbase_check_import_flags(*flags)) { dev_warn(kctx->kbdev->dev, - "kbase_mem_import called with bad flags (%llx)", + "%s called with bad flags (%llx)", + __func__, (unsigned long long)*flags); goto bad_flags; } @@ -1963,7 +2071,8 @@ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && !kbase_device_is_cpu_coherent(kctx->kbdev)) { dev_warn(kctx->kbdev->dev, - "kbase_mem_import call required coherent mem when unavailable"); + "%s call required coherent mem when unavailable", + __func__); goto bad_flags; } if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && @@ -1971,7 +2080,10 @@ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - + if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) { + dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached"); + goto bad_flags; + } if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { dev_warn(kctx->kbdev->dev, "padding is only supported for UMM"); @@ -2038,7 +2150,8 @@ } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { /* we control the VA, mmap now to the GPU */ - if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) + if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1, mmu_sync_info) != + 0) goto no_gpu_va; /* return real GPU VA */ *gpu_va = reg->start_pfn << PAGE_SHIFT; @@ -2072,8 +2185,9 @@ } int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) + struct kbase_va_region *reg, u64 new_pages, + u64 old_pages, + enum kbase_caller_mmu_sync_info mmu_sync_info) { struct tagged_addr *phy_pages; u64 delta = new_pages - old_pages; @@ -2083,9 +2197,9 @@ /* Map the new pages into the GPU */ phy_pages = kbase_get_gpu_phy_pages(reg); - ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + old_pages, phy_pages + old_pages, delta, - reg->flags, kctx->as_nr, reg->gpu_alloc->group_id); + ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags, kctx->as_nr, + reg->gpu_alloc->group_id, mmu_sync_info, reg, false); return ret; } @@ -2105,28 +2219,16 @@ (old_pages - new_pages)<<PAGE_SHIFT, 1); } -/** - * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region or NULL if there isn't one - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Return: 0 on success, negative -errno on error - * - * Unmap the shrunk pages from the GPU mapping. Note that the size of the region - * itself is unmodified as we still need to reserve the VA, only the page tables - * will be modified by this function. - */ -static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, - struct kbase_va_region *const reg, - u64 const new_pages, u64 const old_pages) +int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, + struct kbase_va_region *const reg, u64 const new_pages, + u64 const old_pages) { u64 delta = old_pages - new_pages; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; int ret = 0; - ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn + new_pages, delta, kctx->as_nr); + ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, + alloc->pages + new_pages, delta, delta, kctx->as_nr, false); return ret; } @@ -2138,6 +2240,11 @@ int res = -EINVAL; struct kbase_va_region *reg; bool read_locked = false; + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(gpu_addr != 0); @@ -2185,8 +2292,11 @@ if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) goto out_unlock; - /* can't grow regions which are ephemeral */ - if (reg->flags & KBASE_REG_DONT_NEED) + + if (kbase_is_region_shrinkable(reg)) + goto out_unlock; + + if (kbase_va_region_is_no_user_free(reg)) goto out_unlock; #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED @@ -2230,8 +2340,8 @@ /* No update required for CPU mappings, that's done on fault. */ /* Update GPU mapping. */ - res = kbase_mem_grow_gpu_mapping(kctx, reg, - new_pages, old_pages); + res = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, + old_pages, mmu_sync_info); /* On error free the new pages */ if (res) { @@ -2259,7 +2369,7 @@ } int kbase_mem_shrink(struct kbase_context *const kctx, - struct kbase_va_region *const reg, u64 const new_pages) + struct kbase_va_region *const reg, u64 new_pages) { u64 delta, old_pages; int err; @@ -2289,6 +2399,19 @@ kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + + if (kctx->kbdev->pagesize_2mb) { + if (kbase_reg_current_backed_size(reg) > new_pages) { + old_pages = new_pages; + new_pages = kbase_reg_current_backed_size(reg); + + /* Update GPU mapping. */ + err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages, + CALLER_MMU_ASYNC); + } + } else { + WARN_ON(kbase_reg_current_backed_size(reg) != new_pages); + } } return err; @@ -2327,7 +2450,7 @@ /* Avoid freeing memory on the process death which results in * GPU Page Fault. Memory will be freed in kbase_destroy_context */ - if (!(current->flags & PF_EXITING)) + if (!is_process_exiting(vma)) kbase_mem_free_region(map->kctx, map->region); } @@ -2559,7 +2682,6 @@ while (kbase_jit_evict(kctx)) ; } -#endif static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, @@ -2572,13 +2694,13 @@ size_t size; int err = 0; - dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); + lockdep_assert_held(&kctx->reg_lock); + + dev_dbg(kctx->kbdev->dev, "%s\n", __func__); size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; -#ifdef CONFIG_MALI_VECTOR_DUMP kbase_free_unused_jit_allocations(kctx); -#endif kaddr = kbase_mmu_dump(kctx, nr_pages); @@ -2587,8 +2709,8 @@ goto out; } - new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages, - KBASE_REG_ZONE_SAME_VA); + new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages, + KBASE_REG_ZONE_SAME_VA); if (!new_reg) { err = -ENOMEM; WARN_ON(1); @@ -2617,7 +2739,7 @@ *kmap_addr = kaddr; *reg = new_reg; - dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); + dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); return 0; out_no_alloc: @@ -2626,7 +2748,7 @@ out: return err; } - +#endif void kbase_os_mem_map_lock(struct kbase_context *kctx) { @@ -2646,13 +2768,18 @@ size_t *nr_pages, size_t *aligned_offset) { - int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); struct kbase_va_region *reg; int err = 0; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + *aligned_offset = 0; - dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); + dev_dbg(kctx->kbdev->dev, "%s\n", __func__); /* SAME_VA stuff, fetch the right region */ reg = kctx->pending_regions[cookie]; @@ -2682,9 +2809,8 @@ /* adjust down nr_pages to what we have physically */ *nr_pages = kbase_reg_current_backed_size(reg); - if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, - reg->nr_pages, 1) != 0) { + reg->nr_pages, 1, mmu_sync_info) != 0) { dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); /* Unable to map in GPU space. */ WARN_ON(1); @@ -2709,7 +2835,7 @@ vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); out: *regm = reg; - dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n"); + dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); return err; } @@ -2750,17 +2876,10 @@ goto out_unlock; } - /* if not the MTP, verify that the MTP has been mapped */ - rcu_read_lock(); - /* catches both when the special page isn't present or - * when we've forked - */ - if (rcu_dereference(kctx->process_mm) != current->mm) { + if (!kbase_mem_allow_alloc(kctx)) { err = -EINVAL; - rcu_read_unlock(); goto out_unlock; } - rcu_read_unlock(); switch (vma->vm_pgoff) { case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): @@ -2769,6 +2888,7 @@ err = -EINVAL; goto out_unlock; case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): +#if defined(CONFIG_MALI_VECTOR_DUMP) /* MMU dump */ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); if (err != 0) @@ -2776,6 +2896,11 @@ /* free the region on munmap */ free_on_close = 1; break; +#else + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ #if MALI_USE_CSF case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): kbase_gpu_vm_unlock(kctx); @@ -2846,8 +2971,7 @@ dev_warn(dev, "mmap aliased: invalid params!\n"); goto out_unlock; } - } - else if (reg->cpu_alloc->nents < + } else if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { /* limit what we map to the amount currently backed */ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) @@ -2864,7 +2988,7 @@ err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); - +#if defined(CONFIG_MALI_VECTOR_DUMP) if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on * the pages, so we can now free the kernel mapping @@ -2883,7 +3007,7 @@ */ vma->vm_pgoff = PFN_DOWN(vma->vm_start); } - +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: kbase_gpu_vm_unlock(kctx); out: @@ -2925,9 +3049,102 @@ } } -static int kbase_vmap_phy_pages(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 offset_bytes, size_t size, - struct kbase_vmap_struct *map) +/** + * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * @flags: Region flags. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is incremented by 1. Errors are handled by making pages + * not movable. Permanent kernel mappings will be marked as not movable, too. + */ +static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages, + size_t page_count, unsigned long flags) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Mark permanent kernel mappings as NOT_MOVABLE because they're likely + * to stay mapped for a long time. However, keep on counting the number + * of mappings even for them: they don't represent an exception for the + * vmap_count. + * + * At the same time, errors need to be handled if a client tries to add + * too many mappings, hence a page may end up in the NOT_MOVABLE state + * anyway even if it's not a permanent kernel mapping. + */ + if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + if (page_md->vmap_count < U8_MAX) + page_md->vmap_count++; + else + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + spin_unlock(&page_md->migrate_lock); + } +} + +/** + * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for + * array of physical pages + * + * @pages: Array of pages. + * @page_count: Number of pages. + * + * This function is supposed to be called only if page migration support + * is enabled in the driver. + * + * The counter of kernel CPU mappings of the physical pages involved in a + * mapping operation is decremented by 1. Errors are handled by making pages + * not movable. + */ +static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages, + size_t page_count) +{ + size_t i; + + for (i = 0; i < page_count; i++) { + struct page *p = as_page(pages[i]); + struct kbase_page_metadata *page_md = kbase_page_private(p); + + /* Skip the 4KB page that is part of a large page, as the large page is + * excluded from the migration process. + */ + if (is_huge(pages[i]) || is_partial(pages[i])) + continue; + + spin_lock(&page_md->migrate_lock); + /* Decrement the number of mappings for all kinds of pages, including + * pages which are NOT_MOVABLE (e.g. permanent kernel mappings). + * However, errors still need to be handled if a client tries to remove + * more mappings than created. + */ + if (page_md->vmap_count == 0) + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE); + else + page_md->vmap_count--; + spin_unlock(&page_md->migrate_lock); + } +} + +static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, + kbase_vmap_flag vmap_flags) { unsigned long page_index; unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; @@ -2937,6 +3154,12 @@ void *cpu_addr = NULL; pgprot_t prot; size_t i; + + if (WARN_ON(vmap_flags & ~KBASE_VMAP_INPUT_FLAGS)) + return -EINVAL; + + if (WARN_ON(kbase_is_region_invalid_or_free(reg))) + return -EINVAL; if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) return -EINVAL; @@ -2953,6 +3176,17 @@ if (page_index + page_count > kbase_reg_current_backed_size(reg)) return -ENOMEM; + + if ((vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) && + (page_count > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - + atomic_read(&kctx->permanent_mapped_pages)))) { + dev_warn( + kctx->kbdev->dev, + "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", + (u64)page_count, KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, + atomic_read(&kctx->permanent_mapped_pages)); + return -ENOMEM; + } if (reg->flags & KBASE_REG_DONT_NEED) return -EINVAL; @@ -2980,6 +3214,13 @@ */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); + /* If page migration is enabled, increment the number of VMA mappings + * of all physical pages. In case of errors, e.g. too many mappings, + * make the page not movable to prevent trouble. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) + kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); + kfree(pages); if (!cpu_addr) @@ -2992,14 +3233,55 @@ map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); map->size = size; - map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) && - !kbase_mem_is_imported(map->gpu_alloc->type); + map->flags = vmap_flags; + if ((reg->flags & KBASE_REG_CPU_CACHED) && !kbase_mem_is_imported(map->gpu_alloc->type)) + map->flags |= KBASE_VMAP_FLAG_SYNC_NEEDED; - if (map->sync_needed) + if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); + if (vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) + atomic_add(page_count, &kctx->permanent_mapped_pages); + kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); + return 0; +} + +void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr, + size_t size, unsigned long prot_request, struct kbase_vmap_struct *map, + kbase_vmap_flag vmap_flags) +{ + u64 offset_bytes; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (WARN_ON(kbase_is_region_invalid_or_free(reg))) + return NULL; + + /* check access permissions can be satisfied + * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} + */ + if ((reg->flags & prot_request) != prot_request) + return NULL; + + offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map, vmap_flags); + if (err < 0) + goto fail_vmap_phy_pages; + + return map->addr; + +fail_vmap_phy_pages: + kbase_mem_phy_alloc_put(cpu_alloc); + kbase_mem_phy_alloc_put(gpu_alloc); + return NULL; } void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, @@ -3007,44 +3289,21 @@ { struct kbase_va_region *reg; void *addr = NULL; - u64 offset_bytes; - struct kbase_mem_phy_alloc *cpu_alloc; - struct kbase_mem_phy_alloc *gpu_alloc; - int err; kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - gpu_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; - /* check access permissions can be satisfied - * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} - */ - if ((reg->flags & prot_request) != prot_request) + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) goto out_unlock; - offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); - cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); - gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - - err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map); - if (err < 0) - goto fail_vmap_phy_pages; - - addr = map->addr; + addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u); out_unlock: kbase_gpu_vm_unlock(kctx); return addr; - -fail_vmap_phy_pages: - kbase_gpu_vm_unlock(kctx); - kbase_mem_phy_alloc_put(cpu_alloc); - kbase_mem_phy_alloc_put(gpu_alloc); - - return NULL; } void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, @@ -3064,18 +3323,37 @@ struct kbase_vmap_struct *map) { void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); + vunmap(addr); - if (map->sync_needed) + /* If page migration is enabled, decrement the number of VMA mappings + * for all physical pages. Now is a good time to do it because references + * haven't been released yet. + */ + if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { + const size_t page_count = PFN_UP(map->offset_in_page + map->size); + struct tagged_addr *pages_array = map->cpu_pages; + + kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count); + } + + if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); + if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { + size_t page_count = PFN_UP(map->offset_in_page + map->size); + + WARN_ON(page_count > atomic_read(&kctx->permanent_mapped_pages)); + atomic_sub(page_count, &kctx->permanent_mapped_pages); + } kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc); + map->offset_in_page = 0; map->cpu_pages = NULL; map->gpu_pages = NULL; map->addr = NULL; map->size = 0; - map->sync_needed = false; + map->flags = 0; } void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) @@ -3102,79 +3380,29 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { - struct mm_struct *mm; + struct mm_struct *mm = kctx->process_mm; - rcu_read_lock(); - mm = rcu_dereference(kctx->process_mm); - if (mm) { - atomic_add(pages, &kctx->nonmapped_pages); -#ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); -#else - spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); - spin_unlock(&mm->page_table_lock); -#endif - } - rcu_read_unlock(); -} - -static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) -{ - int pages; - struct mm_struct *mm; - - spin_lock(&kctx->mm_update_lock); - mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); - if (!mm) { - spin_unlock(&kctx->mm_update_lock); + if (unlikely(!mm)) return; - } - rcu_assign_pointer(kctx->process_mm, NULL); - spin_unlock(&kctx->mm_update_lock); - synchronize_rcu(); - - pages = atomic_xchg(&kctx->nonmapped_pages, 0); + atomic_add(pages, &kctx->nonmapped_pages); #ifdef SPLIT_RSS_COUNTING - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); #else spin_lock(&mm->page_table_lock); - kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); spin_unlock(&mm->page_table_lock); #endif } -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx; - - kctx = vma->vm_private_data; - kbasep_os_process_page_usage_drain(kctx); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .close = kbase_special_vm_close, -}; - static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { - /* check that this is the only tracking page */ - spin_lock(&kctx->mm_update_lock); - if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { - spin_unlock(&kctx->mm_update_lock); - return -EFAULT; - } - - rcu_assign_pointer(kctx->process_mm, current->mm); - - spin_unlock(&kctx->mm_update_lock); + if (vma_pages(vma) != 1) + return -EINVAL; /* no real access */ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; return 0; } @@ -3189,15 +3417,37 @@ * assigned one, otherwise a dummy page. Always return the * dummy page in no mali builds. */ +#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) + return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); +#else if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); +#endif return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET + (u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE)); } +static int +#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE || \ + KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE) +kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma) +#else +kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma, unsigned long flags) +#endif +{ + pr_debug("Unexpected call to mremap method for User IO pages mapping vma\n"); + return -EINVAL; +} + +static int kbase_csf_user_io_pages_vm_split(struct vm_area_struct *vma, unsigned long addr) +{ + pr_debug("Unexpected call to split method for User IO pages mapping vma\n"); + return -EINVAL; +} + static void kbase_csf_user_io_pages_vm_open(struct vm_area_struct *vma) { - WARN(1, "Unexpected attempt to clone private vma\n"); + pr_debug("Unexpected call to the open method for User IO pages mapping vma\n"); vma->vm_private_data = NULL; } @@ -3209,8 +3459,10 @@ int err; bool reset_prevented = false; - if (WARN_ON(!queue)) + if (!queue) { + pr_debug("Close method called for the new User IO pages mapping vma\n"); return; + } kctx = queue->kctx; kbdev = kctx->kbdev; @@ -3225,7 +3477,7 @@ reset_prevented = true; mutex_lock(&kctx->csf.lock); - kbase_csf_queue_unbind(queue); + kbase_csf_queue_unbind(queue, is_process_exiting(vma)); mutex_unlock(&kctx->csf.lock); if (reset_prevented) @@ -3254,24 +3506,21 @@ struct memory_group_manager_device *mgm_dev; /* Few sanity checks up front */ - if ((nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || - (vma->vm_pgoff != queue->db_file_offset)) + if (!queue || (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || + (vma->vm_pgoff != queue->db_file_offset)) { + pr_warn("Unexpected CPU page fault on User IO pages mapping for process %s tgid %d pid %d\n", + current->comm, current->tgid, current->pid); return VM_FAULT_SIGBUS; + } - mutex_lock(&queue->kctx->csf.lock); kbdev = queue->kctx->kbdev; mgm_dev = kbdev->mgm_dev; + + mutex_lock(&kbdev->csf.reg_lock); /* Always map the doorbell page as uncached */ doorbell_pgprot = pgprot_device(vma->vm_page_prot); -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) - vma->vm_page_prot = doorbell_pgprot; - input_page_pgprot = doorbell_pgprot; - output_page_pgprot = doorbell_pgprot; -#else if (kbdev->system_coherency == COHERENCY_NONE) { input_page_pgprot = pgprot_writecombine(vma->vm_page_prot); output_page_pgprot = pgprot_writecombine(vma->vm_page_prot); @@ -3279,7 +3528,6 @@ input_page_pgprot = vma->vm_page_prot; output_page_pgprot = vma->vm_page_prot; } -#endif doorbell_cpu_addr = vma->vm_start; @@ -3288,12 +3536,10 @@ #else if (vmf->address == doorbell_cpu_addr) { #endif - mutex_lock(&kbdev->csf.reg_lock); doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue); ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr, doorbell_page_pfn, doorbell_pgprot); - mutex_unlock(&kbdev->csf.reg_lock); } else { /* Map the Input page */ input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE; @@ -3313,13 +3559,19 @@ } exit: - mutex_unlock(&queue->kctx->csf.lock); + mutex_unlock(&kbdev->csf.reg_lock); return ret; } static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = { .open = kbase_csf_user_io_pages_vm_open, .close = kbase_csf_user_io_pages_vm_close, +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + .may_split = kbase_csf_user_io_pages_vm_split, +#else + .split = kbase_csf_user_io_pages_vm_split, +#endif + .mremap = kbase_csf_user_io_pages_vm_mremap, .fault = kbase_csf_user_io_pages_vm_fault }; @@ -3399,13 +3651,75 @@ return err; } +/** + * kbase_csf_user_reg_vm_open - VMA open function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + * Note: + * This function isn't expected to be called. If called (i.e> mremap), + * set private_data as NULL to indicate to close() and fault() functions. + */ +static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma) +{ + pr_debug("Unexpected call to the open method for USER register mapping"); + vma->vm_private_data = NULL; +} + +/** + * kbase_csf_user_reg_vm_close - VMA close function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + */ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; + struct kbase_device *kbdev; - WARN_ON(!kctx->csf.user_reg_vma); + if (unlikely(!kctx)) { + pr_debug("Close function called for the unexpected mapping"); + return; + } - kctx->csf.user_reg_vma = NULL; + kbdev = kctx->kbdev; + + if (unlikely(!kctx->csf.user_reg.vma)) + dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", + kctx->tgid, kctx->id); + + mutex_lock(&kbdev->csf.reg_lock); + list_del_init(&kctx->csf.user_reg.link); + mutex_unlock(&kbdev->csf.reg_lock); + + kctx->csf.user_reg.vma = NULL; + + /* Now as the VMA is closed, drop the reference on mali device file */ + fput(kctx->filp); +} + +/** + * kbase_csf_user_reg_vm_mremap - VMA mremap function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + * + * Return: -EINVAL + * + * Note: + * User space must not attempt mremap on USER page mapping. + * This function will return an error to fail the attempt. + */ +static int +#if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)) +kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma) +#else +kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags) +#endif +{ + pr_debug("Unexpected call to mremap method for USER page mapping vma\n"); + return -EINVAL; } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) @@ -3418,39 +3732,52 @@ struct vm_area_struct *vma = vmf->vma; #endif struct kbase_context *kctx = vma->vm_private_data; - struct kbase_device *kbdev = kctx->kbdev; - struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; - unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); + struct kbase_device *kbdev; + struct memory_group_manager_device *mgm_dev; + unsigned long pfn; size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); vm_fault_t ret = VM_FAULT_SIGBUS; + unsigned long flags; /* Few sanity checks up front */ - if (WARN_ON(nr_pages != 1) || - WARN_ON(vma != kctx->csf.user_reg_vma) || - WARN_ON(vma->vm_pgoff != - PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) + + if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) || + (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) { + pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n", + current->comm, current->tgid, current->pid); return VM_FAULT_SIGBUS; + } - mutex_lock(&kbdev->pm.lock); + kbdev = kctx->kbdev; + mgm_dev = kbdev->mgm_dev; + pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); - /* Don't map in the actual register page if GPU is powered down. - * Always map in the dummy page in no mali builds. + mutex_lock(&kbdev->csf.reg_lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* Dummy page will be mapped during GPU off. + * + * In no mail builds, always map in the dummy page. */ - if (!kbdev->pm.backend.gpu_powered) - pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page)); + if (IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) || !kbdev->pm.backend.gpu_powered) + pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page)); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list); ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, KBASE_MEM_GROUP_CSF_FW, vma, vma->vm_start, pfn, vma->vm_page_prot); - mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->csf.reg_lock); return ret; } static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { + .open = kbase_csf_user_reg_vm_open, .close = kbase_csf_user_reg_vm_close, + .mremap = kbase_csf_user_reg_vm_mremap, .fault = kbase_csf_user_reg_vm_fault }; @@ -3458,9 +3785,10 @@ struct vm_area_struct *vma) { size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); + struct kbase_device *kbdev = kctx->kbdev; /* Few sanity checks */ - if (kctx->csf.user_reg_vma) + if (kctx->csf.user_reg.vma) return -EBUSY; if (nr_pages != 1) @@ -3479,8 +3807,21 @@ */ vma->vm_flags |= VM_PFNMAP; - kctx->csf.user_reg_vma = vma; + kctx->csf.user_reg.vma = vma; + mutex_lock(&kbdev->csf.reg_lock); + kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++; + mutex_unlock(&kbdev->csf.reg_lock); + + /* Make VMA point to the special internal file, but don't drop the + * reference on mali device file (that would be done later when the + * VMA is closed). + */ + vma->vm_file = kctx->kbdev->csf.user_reg.filp; + get_file(vma->vm_file); + + /* Also adjust the vm_pgoff */ + vma->vm_pgoff = kctx->csf.user_reg.file_offset; vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; -- Gitblit v1.6.2