~ljy/RK356X_SDK_RELEASE.git

/*
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU licence.
 *
 * A copy of the licence is included with the program, and can also be obtained
 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */
 
#include "linux/mman.h"
#include <mali_kbase.h>
 
/* mali_kbase_mmap.c
 *
 * This file contains Linux specific implementation of
 * kbase_context_get_unmapped_area() interface.
 */
 
 
/**
 * align_and_check() - Align the specified pointer to the provided alignment and
 *                     check that it is still in range.
 * @gap_end:        Highest possible start address for allocation (end of gap in
 *                  address space)
 * @gap_start:      Start address of current memory area / gap in address space
 * @info:           vm_unmapped_area_info structure passed to caller, containing
 *                  alignment, length and limits for the allocation
 * @is_shader_code: True if the allocation is for shader code (which has
 *                  additional alignment requirements)
 * @is_same_4gb_page: True if the allocation needs to reside completely within
 *                    a 4GB chunk
 *
 * Return: true if gap_end is now aligned correctly and is still in range,
 *         false otherwise
 */
static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
        struct vm_unmapped_area_info *info, bool is_shader_code,
        bool is_same_4gb_page)
{
    /* Compute highest gap address at the desired alignment */
    (*gap_end) -= info->length;
    (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
 
    if (is_shader_code) {
        /* Check for 4GB boundary */
        if (0 == (*gap_end & BASE_MEM_MASK_4GB))
            (*gap_end) -= (info->align_offset ? info->align_offset :
                    info->length);
        if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
            (*gap_end) -= (info->align_offset ? info->align_offset :
                    info->length);
 
        if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
                info->length) & BASE_MEM_MASK_4GB))
            return false;
    } else if (is_same_4gb_page) {
        unsigned long start = *gap_end;
        unsigned long end = *gap_end + info->length;
        unsigned long mask = ~((unsigned long)U32_MAX);
 
        /* Check if 4GB boundary is straddled */
        if ((start & mask) != ((end - 1) & mask)) {
            unsigned long offset = end - (end & mask);
            /* This is to ensure that alignment doesn't get
             * disturbed in an attempt to prevent straddling at
             * 4GB boundary. The GPU VA is aligned to 2MB when the
             * allocation size is > 2MB and there is enough CPU &
             * GPU virtual space.
             */
            unsigned long rounded_offset =
                    ALIGN(offset, info->align_mask + 1);
 
            start -= rounded_offset;
            end -= rounded_offset;
 
            *gap_end = start;
 
            /* The preceding 4GB boundary shall not get straddled,
             * even after accounting for the alignment, as the
             * size of allocation is limited to 4GB and the initial
             * start location was already aligned.
             */
            WARN_ON((start & mask) != ((end - 1) & mask));
        }
    }
 
 
    if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
        return false;
 
 
    return true;
}
 
/**
 * kbase_unmapped_area_topdown() - allocates new areas top-down from
 *                                 below the stack limit.
 * @info:              Information about the memory area to allocate.
 * @is_shader_code:    Boolean which denotes whether the allocated area is
 *                      intended for the use by shader core in which case a
 *                      special alignment requirements apply.
 * @is_same_4gb_page: Boolean which indicates whether the allocated area needs
 *                    to reside completely within a 4GB chunk.
 *
 * The unmapped_area_topdown() function in the Linux kernel is not exported
 * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
 * module and also make use of the fact that some of the requirements for
 * the unmapped area are known in advance, we implemented an extended version
 * of this function and prefixed it with 'kbase_'.
 *
 * The difference in the call parameter list comes from the fact that
 * kbase_unmapped_area_topdown() is called with additional parameters which
 * are provided to indicate whether the allocation is for a shader core memory,
 * which has additional alignment requirements, and whether the allocation can
 * straddle a 4GB boundary.
 *
 * The modification of the original Linux function lies in how the computation
 * of the highest gap address at the desired alignment is performed once the
 * gap with desirable properties is found. For this purpose a special function
 * is introduced (@ref align_and_check()) which beside computing the gap end
 * at the desired alignment also performs additional alignment checks for the
 * case when the memory is executable shader core memory, for which it is
 * ensured that the gap does not end on a 4GB boundary, and for the case when
 * memory needs to be confined within a 4GB chunk.
 *
 * Return: address of the found gap end (high limit) if area is found;
 *         -ENOMEM if search is unsuccessful
*/
 
static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
        *info, bool is_shader_code, bool is_same_4gb_page)
{
    struct mm_struct *mm = current->mm;
    struct vm_area_struct *vma;
    unsigned long length, low_limit, high_limit, gap_start, gap_end;
 
    /* Adjust search length to account for worst case alignment overhead */
    length = info->length + info->align_mask;
    if (length < info->length)
        return -ENOMEM;
 
    /*
     * Adjust search limits by the desired length.
     * See implementation comment at top of unmapped_area().
     */
    gap_end = info->high_limit;
    if (gap_end < length)
        return -ENOMEM;
    high_limit = gap_end - length;
 
    if (info->low_limit > high_limit)
        return -ENOMEM;
    low_limit = info->low_limit + length;
 
    /* Check highest gap, which does not precede any rbtree node */
    gap_start = mm->highest_vm_end;
    if (gap_start <= high_limit) {
        if (align_and_check(&gap_end, gap_start, info,
                is_shader_code, is_same_4gb_page))
            return gap_end;
    }
 
    /* Check if rbtree root looks promising */
    if (RB_EMPTY_ROOT(&mm->mm_rb))
        return -ENOMEM;
    vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
    if (vma->rb_subtree_gap < length)
        return -ENOMEM;
 
    while (true) {
        /* Visit right subtree if it looks promising */
        gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
        if (gap_start <= high_limit && vma->vm_rb.rb_right) {
            struct vm_area_struct *right =
                rb_entry(vma->vm_rb.rb_right,
                     struct vm_area_struct, vm_rb);
            if (right->rb_subtree_gap >= length) {
                vma = right;
                continue;
            }
        }
 
check_current:
        /* Check if current node has a suitable gap */
        gap_end = vma->vm_start;
        if (gap_end < low_limit)
            return -ENOMEM;
        if (gap_start <= high_limit && gap_end - gap_start >= length) {
            /* We found a suitable gap. Clip it with the original
             * high_limit.
             */
            if (gap_end > info->high_limit)
                gap_end = info->high_limit;
 
            if (align_and_check(&gap_end, gap_start, info,
                    is_shader_code, is_same_4gb_page))
                return gap_end;
        }
 
        /* Visit left subtree if it looks promising */
        if (vma->vm_rb.rb_left) {
            struct vm_area_struct *left =
                rb_entry(vma->vm_rb.rb_left,
                     struct vm_area_struct, vm_rb);
            if (left->rb_subtree_gap >= length) {
                vma = left;
                continue;
            }
        }
 
        /* Go back up the rbtree to find next candidate node */
        while (true) {
            struct rb_node *prev = &vma->vm_rb;
 
            if (!rb_parent(prev))
                return -ENOMEM;
            vma = rb_entry(rb_parent(prev),
                       struct vm_area_struct, vm_rb);
            if (prev == vma->vm_rb.rb_right) {
                gap_start = vma->vm_prev ?
                    vma->vm_prev->vm_end : 0;
                goto check_current;
            }
        }
    }
 
    return -ENOMEM;
}
 
 
/* This function is based on Linux kernel's arch_get_unmapped_area, but
 * simplified slightly. Modifications come from the fact that some values
 * about the memory area are known in advance.
 */
unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
        const unsigned long addr, const unsigned long len,
        const unsigned long pgoff, const unsigned long flags)
{
    struct mm_struct *mm = current->mm;
    struct vm_unmapped_area_info info;
    unsigned long align_offset = 0;
    unsigned long align_mask = 0;
    unsigned long high_limit = mm->mmap_base;
    unsigned long low_limit = PAGE_SIZE;
    int cpu_va_bits = BITS_PER_LONG;
    int gpu_pc_bits =
          kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
    bool is_shader_code = false;
    bool is_same_4gb_page = false;
    unsigned long ret;
 
    /* the 'nolock' form is used here:
     * - the base_pfn of the SAME_VA zone does not change
     * - in normal use, va_size_pages is constant once the first allocation
     *   begins
     *
     * However, in abnormal use this function could be processing whilst
     * another new zone is being setup in a different thread (e.g. to
     * borrow part of the SAME_VA zone). In the worst case, this path may
     * witness a higher SAME_VA end_pfn than the code setting up the new
     * zone.
     *
     * This is safe because once we reach the main allocation functions,
     * we'll see the updated SAME_VA end_pfn and will determine that there
     * is no free region at the address found originally by too large a
     * same_va_end_addr here, and will fail the allocation gracefully.
     */
    struct kbase_reg_zone *zone =
        kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
    u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
 
    /* err on fixed address */
    if ((flags & MAP_FIXED) || addr)
        return -EINVAL;
 
#if IS_ENABLED(CONFIG_64BIT)
    /* too big? */
    if (len > TASK_SIZE - SZ_2M)
        return -ENOMEM;
 
    if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
        high_limit =
            min_t(unsigned long, mm->mmap_base, same_va_end_addr);
 
        /* If there's enough (> 33 bits) of GPU VA space, align
         * to 2MB boundaries.
         */
        if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
            if (len >= SZ_2M) {
                align_offset = SZ_2M;
                align_mask = SZ_2M - 1;
            }
        }
 
        low_limit = SZ_2M;
    } else {
        cpu_va_bits = 32;
    }
#endif /* CONFIG_64BIT */
    if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
        (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
            int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
            struct kbase_va_region *reg;
 
            /* Need to hold gpu vm lock when using reg */
            kbase_gpu_vm_lock(kctx);
            reg = kctx->pending_regions[cookie];
            if (!reg) {
                kbase_gpu_vm_unlock(kctx);
                return -EINVAL;
            }
            if (!(reg->flags & KBASE_REG_GPU_NX)) {
                if (cpu_va_bits > gpu_pc_bits) {
                    align_offset = 1ULL << gpu_pc_bits;
                    align_mask = align_offset - 1;
                    is_shader_code = true;
                }
#if !MALI_USE_CSF
            } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
                unsigned long extension_bytes =
                    (unsigned long)(reg->extension
                            << PAGE_SHIFT);
                /* kbase_check_alloc_sizes() already satisfies
                 * these checks, but they're here to avoid
                 * maintenance hazards due to the assumptions
                 * involved
                 */
                WARN_ON(reg->extension >
                    (ULONG_MAX >> PAGE_SHIFT));
                WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
                WARN_ON(!is_power_of_2(extension_bytes));
                align_mask = extension_bytes - 1;
                align_offset =
                    extension_bytes -
                    (reg->initial_commit << PAGE_SHIFT);
#endif /* !MALI_USE_CSF */
            } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
                is_same_4gb_page = true;
            }
            kbase_gpu_vm_unlock(kctx);
#ifndef CONFIG_64BIT
    } else {
        return current->mm->get_unmapped_area(
            kctx->filp, addr, len, pgoff, flags);
#endif
    }
 
    info.flags = 0;
    info.length = len;
    info.low_limit = low_limit;
    info.high_limit = high_limit;
    info.align_offset = align_offset;
    info.align_mask = align_mask;
 
    ret = kbase_unmapped_area_topdown(&info, is_shader_code,
            is_same_4gb_page);
 
    if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
        high_limit < same_va_end_addr) {
        /* Retry above mmap_base */
        info.low_limit = mm->mmap_base;
        info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
 
        ret = kbase_unmapped_area_topdown(&info, is_shader_code,
                is_same_4gb_page);
    }
 
    return ret;
}