From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c | 2064 ++++++++++++++++++++++++++++++++--------------------------
1 files changed, 1,142 insertions(+), 922 deletions(-)
diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
index b4c780b..7a939fc 100644
--- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,34 +27,23 @@
#include <linux/export.h>
#include <linux/priority_control_manager.h>
#include <linux/shmem_fs.h>
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
#include "mali_kbase_csf_tiler_heap.h"
#include <mmu/mali_kbase_mmu.h>
#include "mali_kbase_csf_timeout.h"
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#include <mali_kbase_hwaccess_time.h>
+#include "mali_kbase_csf_event.h"
+#include <tl/mali_kbase_tracepoints.h>
+#include "mali_kbase_csf_mcu_shared_reg.h"
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
-/**
- * struct kbase_csf_event - CSF event callback.
- *
- * This structure belongs to the list of events which is part of a Kbase
- * context, and describes a callback function with a custom parameter to pass
- * to it when a CSF event is signalled.
- *
- * @link: Link to the rest of the list.
- * @kctx: Pointer to the Kbase context this event belongs to.
- * @callback: Callback function to call when a CSF event is signalled.
- * @param: Parameter to pass to the callback function.
- */
-struct kbase_csf_event {
- struct list_head link;
- struct kbase_context *kctx;
- kbase_csf_event_callback *callback;
- void *param;
-};
+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
+
+#define PROTM_ALLOC_MAX_RETRIES ((u8)5)
const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
KBASE_QUEUE_GROUP_PRIORITY_HIGH,
@@ -68,6 +57,55 @@
BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
BASE_QUEUE_GROUP_PRIORITY_LOW
};
+
+/*
+ * struct irq_idle_and_protm_track - Object that tracks the idle and protected mode
+ * request information in an interrupt case across
+ * groups.
+ *
+ * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt.
+ * If NULL, no such case observed in the tracked interrupt case.
+ * @idle_seq: The highest priority group that notified idle. If no such instance in the
+ * interrupt case, marked with the largest field value: U32_MAX.
+ * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case.
+ */
+struct irq_idle_and_protm_track {
+ struct kbase_queue_group *protm_grp;
+ u32 idle_seq;
+ s8 idle_slot;
+};
+
+/**
+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
+ *
+ * @kctx: Pointer to the kbase context
+ */
+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ if (unlikely(kctx->csf.user_reg.vma))
+ dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
+ kctx->tgid, kctx->id);
+ if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
+ list_del_init(&kctx->csf.user_reg.link);
+}
+
+/**
+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
+ *
+ * @kctx: Pointer to the kbase context
+ *
+ * @return: 0 on success.
+ */
+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
+{
+ INIT_LIST_HEAD(&kctx->csf.user_reg.link);
+ kctx->csf.user_reg.vma = NULL;
+ kctx->csf.user_reg.file_offset = 0;
+
+ return 0;
+}
static void put_user_pages_mmap_handle(struct kbase_context *kctx,
struct kbase_queue *queue)
@@ -129,21 +167,6 @@
return 0;
}
-static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
- struct kbase_va_region *reg)
-{
- size_t num_pages = 2;
-
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- reg->start_pfn, num_pages, MCU_AS_NR);
-
- WARN_ON(reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- kbase_remove_va_region(reg);
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-}
-
static void init_user_io_pages(struct kbase_queue *queue)
{
u32 *input_addr = (u32 *)(queue->user_io_addr);
@@ -161,80 +184,15 @@
output_addr[CS_ACTIVE/4] = 0;
}
-/* Map the input/output pages in the shared interface segment of MCU firmware
- * address space.
- */
-static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
- struct tagged_addr *phys, struct kbase_va_region *reg)
-{
- unsigned long mem_flags = KBASE_REG_GPU_RD;
- const size_t num_pages = 2;
- int ret;
-
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
- mem_flags |=
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
-#else
- if (kbdev->system_coherency == COHERENCY_NONE) {
- mem_flags |=
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
- } else {
- mem_flags |= KBASE_REG_SHARE_BOTH |
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
- }
-#endif
-
- mutex_lock(&kbdev->csf.reg_lock);
- ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
-
- if (ret)
- return ret;
-
- /* Map input page */
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn, &phys[0],
- 1, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_IO);
- if (ret)
- goto bad_insert;
-
- /* Map output page, it needs rw access */
- mem_flags |= KBASE_REG_GPU_WR;
- ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn + 1, &phys[1],
- 1, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_IO);
- if (ret)
- goto bad_insert_output_page;
-
- return 0;
-
-bad_insert_output_page:
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn, 1, MCU_AS_NR);
-bad_insert:
- mutex_lock(&kbdev->csf.reg_lock);
- kbase_remove_va_region(reg);
- mutex_unlock(&kbdev->csf.reg_lock);
-
- return ret;
-}
-
static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
struct kbase_queue *queue)
{
- const size_t num_pages = 2;
-
kbase_gpu_vm_lock(kctx);
vunmap(queue->user_io_addr);
- WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
- atomic_sub(num_pages, &kctx->permanent_mapped_pages);
+ WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES);
+ atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages);
kbase_gpu_vm_unlock(kctx);
}
@@ -244,6 +202,8 @@
{
struct page *page_list[2];
pgprot_t cpu_map_prot;
+ unsigned long flags;
+ char *user_io_addr;
int ret = 0;
size_t i;
@@ -258,26 +218,29 @@
/* The pages are mapped to Userspace also, so use the same mapping
* attributes as used inside the CPU page fault handler.
*/
-#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
- ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
- (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
- cpu_map_prot = pgprot_device(PAGE_KERNEL);
-#else
if (kctx->kbdev->system_coherency == COHERENCY_NONE)
cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
else
cpu_map_prot = PAGE_KERNEL;
-#endif
for (i = 0; i < ARRAY_SIZE(page_list); i++)
page_list[i] = as_page(queue->phys[i]);
- queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
+ user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
- if (!queue->user_io_addr)
+ if (!user_io_addr) {
+ dev_err(kctx->kbdev->dev,
+ "%s(): user_io_addr is NULL, queue: %p",
+ __func__,
+ queue);
ret = -ENOMEM;
- else
+ } else {
atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
+ }
+
+ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
+ queue->user_io_addr = user_io_addr;
+ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
unlock:
kbase_gpu_vm_unlock(kctx);
@@ -310,70 +273,62 @@
* If an explicit or implicit unbind was missed by the userspace then the
* mapping will persist. On process exit kernel itself will remove the mapping.
*/
-static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
- struct kbase_queue *queue)
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
{
- const size_t num_pages = 2;
-
- gpu_munmap_user_io_pages(kctx, queue->reg);
kernel_unmap_user_io_pages(kctx, queue);
kbase_mem_pool_free_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, true, false);
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false);
+ kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
- kfree(queue->reg);
- queue->reg = NULL;
+ /* The user_io_gpu_va should have been unmapped inside the scheduler */
+ WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping");
/* If the queue has already been terminated by userspace
* then the ref count for queue object will drop to 0 here.
*/
release_queue(queue);
}
+KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
-int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
- struct kbase_queue *queue)
+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
{
struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_va_region *reg;
- const size_t num_pages = 2;
int ret;
lockdep_assert_held(&kctx->csf.lock);
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
- num_pages, KBASE_REG_ZONE_MCU_SHARED);
- if (!reg)
+ ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ KBASEP_NUM_CS_USER_IO_PAGES,
+ queue->phys, false, kctx->task);
+ if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
+ /* Marking both the phys to zero for indicating there is no phys allocated */
+ queue->phys[0].tagged_addr = 0;
+ queue->phys[1].tagged_addr = 0;
return -ENOMEM;
-
- ret = kbase_mem_pool_alloc_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, false);
-
- if (ret != num_pages)
- goto phys_alloc_failed;
+ }
ret = kernel_map_user_io_pages(kctx, queue);
if (ret)
goto kernel_map_failed;
+ kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
init_user_io_pages(queue);
- ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
- if (ret)
- goto gpu_mmap_failed;
-
- queue->reg = reg;
+ /* user_io_gpu_va is only mapped when scheduler decides to put the queue
+ * on slot at runtime. Initialize it to 0, signalling no mapping.
+ */
+ queue->user_io_gpu_va = 0;
mutex_lock(&kbdev->csf.reg_lock);
- if (kbdev->csf.db_file_offsets >
- (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
+ if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
kbdev->csf.db_file_offsets = 0;
queue->db_file_offset = kbdev->csf.db_file_offsets;
kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
-
- WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
+ WARN(kbase_refcount_read(&queue->refcount) != 1,
+ "Incorrect refcounting for queue object\n");
/* This is the second reference taken on the queue object and
* would be dropped only when the IO mapping is removed either
* explicitly by userspace or implicitly by kernel on process exit.
@@ -384,19 +339,16 @@
return 0;
-gpu_mmap_failed:
- kernel_unmap_user_io_pages(kctx, queue);
-
kernel_map_failed:
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, false, false);
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false);
+ /* Marking both the phys to zero for indicating there is no phys allocated */
+ queue->phys[0].tagged_addr = 0;
+ queue->phys[1].tagged_addr = 0;
-phys_alloc_failed:
- kfree(reg);
-
- return -ENOMEM;
+ return ret;
}
+KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
u8 group_handle)
@@ -413,6 +365,12 @@
return NULL;
}
+
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle)
+{
+ return find_queue_group(kctx, group_handle);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group);
int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
u8 group_handle)
@@ -442,25 +400,37 @@
static void get_queue(struct kbase_queue *queue)
{
- WARN_ON(!atomic_inc_not_zero(&queue->refcount));
+ WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
}
static void release_queue(struct kbase_queue *queue)
{
lockdep_assert_held(&queue->kctx->csf.lock);
-
- WARN_ON(atomic_read(&queue->refcount) <= 0);
-
- if (atomic_dec_and_test(&queue->refcount)) {
+ if (kbase_refcount_dec_and_test(&queue->refcount)) {
/* The queue can't still be on the per context list. */
WARN_ON(!list_empty(&queue->link));
WARN_ON(queue->group);
+ dev_dbg(queue->kctx->kbdev->dev,
+ "Remove any pending command queue fatal from ctx %d_%d",
+ queue->kctx->tgid, queue->kctx->id);
+ kbase_csf_event_remove_error(queue->kctx, &queue->error);
+
+ /* After this the Userspace would be able to free the
+ * memory for GPU queue. In case the Userspace missed
+ * terminating the queue, the cleanup will happen on
+ * context termination where tear down of region tracker
+ * would free up the GPU queue memory.
+ */
+ kbase_gpu_vm_lock(queue->kctx);
+ kbase_va_region_no_user_free_dec(queue->queue_reg);
+ kbase_gpu_vm_unlock(queue->kctx);
+
kfree(queue);
}
}
static void oom_event_worker(struct work_struct *data);
-static void fatal_event_worker(struct work_struct *data);
+static void cs_error_worker(struct work_struct *data);
/* Between reg and reg_ex, one and only one must be null */
static int csf_queue_register_internal(struct kbase_context *kctx,
@@ -475,7 +445,7 @@
/* Only one pointer expected, otherwise coding error */
if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) {
- dev_err(kctx->kbdev->dev,
+ dev_dbg(kctx->kbdev->dev,
"Error, one and only one param-ptr expected!");
return -EINVAL;
}
@@ -508,7 +478,8 @@
region = kbase_region_tracker_find_region_enclosing_address(kctx,
queue_addr);
- if (kbase_is_region_invalid_or_free(region)) {
+ if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) ||
+ region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
ret = -ENOENT;
goto out_unlock_vm;
}
@@ -525,24 +496,24 @@
if (reg_ex && reg_ex->ex_buffer_size) {
int buf_pages = (reg_ex->ex_buffer_size +
(1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+ struct kbase_va_region *region_ex =
+ kbase_region_tracker_find_region_enclosing_address(kctx,
+ reg_ex->ex_buffer_base);
- region = kbase_region_tracker_find_region_enclosing_address(
- kctx, reg_ex->ex_buffer_base);
- if (kbase_is_region_invalid_or_free(region)) {
+ if (kbase_is_region_invalid_or_free(region_ex)) {
ret = -ENOENT;
goto out_unlock_vm;
}
- if (buf_pages > (region->nr_pages -
- ((reg_ex->ex_buffer_base >> PAGE_SHIFT) -
- region->start_pfn))) {
+ if (buf_pages > (region_ex->nr_pages -
+ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) {
ret = -EINVAL;
goto out_unlock_vm;
}
- region = kbase_region_tracker_find_region_enclosing_address(
- kctx, reg_ex->ex_offset_var_addr);
- if (kbase_is_region_invalid_or_free(region)) {
+ region_ex = kbase_region_tracker_find_region_enclosing_address(
+ kctx, reg_ex->ex_offset_var_addr);
+ if (kbase_is_region_invalid_or_free(region_ex)) {
ret = -ENOENT;
goto out_unlock_vm;
}
@@ -557,13 +528,16 @@
queue->kctx = kctx;
queue->base_addr = queue_addr;
+
queue->queue_reg = region;
+ kbase_va_region_no_user_free_inc(region);
+
queue->size = (queue_size << PAGE_SHIFT);
queue->csi_index = KBASEP_IF_NR_INVALID;
queue->enabled = false;
queue->priority = reg->priority;
- atomic_set(&queue->refcount, 1);
+ kbase_refcount_set(&queue->refcount, 1);
queue->group = NULL;
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
@@ -574,16 +548,24 @@
queue->sync_ptr = 0;
queue->sync_value = 0;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ queue->saved_cmd_ptr = 0;
+#endif
+
queue->sb_status = 0;
queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
+
+ atomic_set(&queue->pending, 0);
INIT_LIST_HEAD(&queue->link);
INIT_LIST_HEAD(&queue->error.link);
INIT_WORK(&queue->oom_event_work, oom_event_worker);
- INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
+ INIT_WORK(&queue->cs_error_work, cs_error_worker);
list_add(&queue->link, &kctx->csf.queue_list);
- region->flags |= KBASE_REG_NO_USER_FREE;
+ queue->extract_ofs = 0;
+
+ region->user_data = queue;
/* Initialize the cs_trace configuration parameters, When buffer_size
* is 0, trace is disabled. Here we only update the fields when
@@ -612,6 +594,13 @@
int kbase_csf_queue_register(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_register *reg)
{
+ /* Validate the ring buffer configuration parameters */
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+ reg->buffer_gpu_addr & ~PAGE_MASK)
+ return -EINVAL;
+
return csf_queue_register_internal(kctx, reg, NULL);
}
@@ -630,14 +619,21 @@
if (glb_version < kbase_csf_interface_version(1, 1, 0))
return -EINVAL;
- /* Validate the cs_trace configuration parameters */
- if (reg->ex_buffer_size &&
- ((reg->ex_event_size > max_size) ||
- (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
- (reg->ex_buffer_size < min_buf_size)))
- return -EINVAL;
+ /* Validate the ring buffer configuration parameters */
+ if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+ reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+ reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+ reg->buffer_gpu_addr & ~PAGE_MASK)
+ return -EINVAL;
- return csf_queue_register_internal(kctx, NULL, reg);
+ /* Validate the cs_trace configuration parameters */
+ if (reg->ex_buffer_size &&
+ ((reg->ex_event_size > max_size) ||
+ (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
+ (reg->ex_buffer_size < min_buf_size)))
+ return -EINVAL;
+
+ return csf_queue_register_internal(kctx, NULL, reg);
}
static void unbind_queue(struct kbase_context *kctx,
@@ -664,8 +660,6 @@
queue = find_queue(kctx, term->buffer_gpu_addr);
if (queue) {
- unsigned long flags;
-
/* As the GPU queue has been terminated by the
* user space, undo the actions that were performed when the
* queue was registered i.e. remove the queue from the per
@@ -678,23 +672,9 @@
unbind_queue(kctx, queue);
kbase_gpu_vm_lock(kctx);
- if (!WARN_ON(!queue->queue_reg)) {
- /* After this the Userspace would be able to free the
- * memory for GPU queue. In case the Userspace missed
- * terminating the queue, the cleanup will happen on
- * context termination where teardown of region tracker
- * would free up the GPU queue memory.
- */
- queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
- }
+ if (!WARN_ON(!queue->queue_reg))
+ queue->queue_reg->user_data = NULL;
kbase_gpu_vm_unlock(kctx);
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
- dev_dbg(kctx->kbdev->dev,
- "Remove any pending command queue fatal from context %pK\n",
- (void *)kctx);
- list_del_init(&queue->error.link);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
release_queue(queue);
}
@@ -776,10 +756,69 @@
return group;
}
+static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
+{
+ queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
+}
+
+/**
+ * pending_submission_worker() - Work item to process pending kicked GPU command queues.
+ *
+ * @work: Pointer to pending_submission_work.
+ *
+ * This function starts all pending queues, for which the work
+ * was previously submitted via ioctl call from application thread.
+ * If the queue is already scheduled and resident, it will be started
+ * right away, otherwise once the group is made resident.
+ */
+static void pending_submission_worker(struct work_struct *work)
+{
+ struct kbase_context *kctx =
+ container_of(work, struct kbase_context, csf.pending_submission_work);
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_queue *queue;
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+
+ if (err) {
+ dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
+ return;
+ }
+
+ mutex_lock(&kctx->csf.lock);
+
+ /* Iterate through the queue list and schedule the pending ones for submission. */
+ list_for_each_entry(queue, &kctx->csf.queue_list, link) {
+ if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
+ struct kbase_queue_group *group = get_bound_queue_group(queue);
+ int ret;
+
+ if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
+ dev_dbg(kbdev->dev, "queue is not bound to a group");
+ continue;
+ }
+
+ ret = kbase_csf_scheduler_queue_start(queue);
+ if (unlikely(ret)) {
+ dev_dbg(kbdev->dev, "Failed to start queue");
+ if (ret == -EBUSY) {
+ atomic_cmpxchg(&queue->pending, 0, 1);
+ enqueue_gpu_submission_work(kctx);
+ }
+ }
+ }
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+
+ kbase_reset_gpu_allow(kbdev);
+}
+
void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
{
if (WARN_ON(slot < 0))
return;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
}
@@ -793,8 +832,19 @@
(u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
u32 value;
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
if (WARN_ON(slot_bitmap > allowed_bitmap))
return;
+
+ /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and
+ * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request
+ * or 2 CSI requests overlap and FW ends up missing the 2nd request.
+ * Memory barrier is required, both on Host and FW side, to guarantee the ordering.
+ *
+ * 'osh' is used as CPU and GPU would be in the same Outer shareable domain.
+ */
+ dmb(osh);
value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
value ^= slot_bitmap;
@@ -822,6 +872,8 @@
struct kbase_csf_cmd_stream_group_info *ginfo;
u32 value;
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
if (WARN_ON(csg_nr < 0) ||
WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
return;
@@ -831,6 +883,14 @@
if (WARN_ON(csi_index < 0) ||
WARN_ON(csi_index >= ginfo->stream_num))
return;
+
+ /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to
+ * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to
+ * FW before CS_REQ/ACK is set.
+ *
+ * 'osh' is used as CPU and GPU would be in the same outer shareable domain.
+ */
+ dmb(osh);
value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
value ^= (1 << csi_index);
@@ -845,36 +905,37 @@
struct kbase_ioctl_cs_queue_kick *kick)
{
struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_queue_group *group;
- struct kbase_queue *queue;
+ bool trigger_submission = false;
+ struct kbase_va_region *region;
int err = 0;
- err = kbase_reset_gpu_prevent_and_wait(kbdev);
- if (err) {
- dev_warn(
- kbdev->dev,
- "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)",
- kick->buffer_gpu_addr);
- return err;
- }
+ KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr);
- mutex_lock(&kctx->csf.lock);
- queue = find_queue(kctx, kick->buffer_gpu_addr);
- if (!queue)
- err = -EINVAL;
+ /* GPU work submission happening asynchronously to prevent the contention with
+ * scheduler lock and as the result blocking application thread. For this reason,
+ * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
+ * from the context list of active va_regions.
+ * Once the target queue is found the pending flag is set to one atomically avoiding
+ * a race between submission ioctl thread and the work item.
+ */
+ kbase_gpu_vm_lock(kctx);
+ region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr);
+ if (!kbase_is_region_invalid_or_free(region)) {
+ struct kbase_queue *queue = region->user_data;
- if (!err) {
- group = get_bound_queue_group(queue);
- if (!group) {
- dev_err(kctx->kbdev->dev, "queue not bound\n");
- err = -EINVAL;
+ if (queue) {
+ atomic_cmpxchg(&queue->pending, 0, 1);
+ trigger_submission = true;
}
+ } else {
+ dev_dbg(kbdev->dev,
+ "Attempt to kick GPU queue without a valid command buffer region");
+ err = -EFAULT;
}
+ kbase_gpu_vm_unlock(kctx);
- if (!err)
- err = kbase_csf_scheduler_queue_start(queue);
- mutex_unlock(&kctx->csf.lock);
- kbase_reset_gpu_allow(kbdev);
+ if (likely(trigger_submission))
+ enqueue_gpu_submission_work(kctx);
return err;
}
@@ -884,19 +945,23 @@
{
lockdep_assert_held(&kctx->csf.lock);
+ if (WARN_ON(queue->csi_index < 0))
+ return;
+
if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
unsigned long flags;
kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
bitmap_clear(queue->group->protm_pending_bitmap,
queue->csi_index, 1);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR,
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR,
queue->group, queue, queue->group->protm_pending_bitmap[0]);
queue->group->bound_queues[queue->csi_index] = NULL;
queue->group = NULL;
kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
put_user_pages_mmap_handle(kctx, queue);
+ WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
}
}
@@ -938,7 +1003,16 @@
}
}
-void kbase_csf_queue_unbind(struct kbase_queue *queue)
+static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue)
+{
+ /* The queue's phys are zeroed when allocation fails. Both of them being
+ * zero is an impossible condition for a successful allocated set of phy pages.
+ */
+
+ return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr);
+}
+
+void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
{
struct kbase_context *kctx = queue->kctx;
@@ -952,7 +1026,7 @@
* whereas CSG TERM request would result in an immediate abort or
* cancellation of the pending work.
*/
- if (current->flags & PF_EXITING) {
+ if (process_exit) {
struct kbase_queue_group *group = get_bound_queue_group(queue);
if (group)
@@ -963,8 +1037,8 @@
unbind_queue(kctx, queue);
}
- /* Free the resources, if allocated for this queue. */
- if (queue->reg)
+ /* Free the resources, if allocated phys for this queue */
+ if (kbase_csf_queue_phys_allocated(queue))
kbase_csf_free_command_stream_user_pages(kctx, queue);
}
@@ -977,8 +1051,8 @@
WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
unbind_stopped_queue(kctx, queue);
- /* Free the resources, if allocated for this queue. */
- if (queue->reg)
+ /* Free the resources, if allocated phys for this queue */
+ if (kbase_csf_queue_phys_allocated(queue))
kbase_csf_free_command_stream_user_pages(kctx, queue);
}
@@ -1041,159 +1115,39 @@
* @kctx: Pointer to kbase context where the queue group is created at
* @s_buf: Pointer to suspend buffer that is attached to queue group
*
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- * MMU page table. Otherwise -ENOMEM.
+ * Return: 0 if phy-pages for the suspend buffer is successfully allocated.
+ * Otherwise -ENOMEM or error code.
*/
static int create_normal_suspend_buffer(struct kbase_context *const kctx,
struct kbase_normal_suspend_buffer *s_buf)
{
- struct kbase_va_region *reg = NULL;
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
const size_t nr_pages =
PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
- int err = 0;
+ int err;
lockdep_assert_held(&kctx->csf.lock);
- /* Allocate and initialize Region Object */
- reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
- if (!reg)
- return -ENOMEM;
+ /* The suspend buffer's mapping address is valid only when the CSG is to
+ * run on slot, initializing it 0, signalling the buffer is not mapped.
+ */
+ s_buf->gpu_va = 0;
s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
- if (!s_buf->phy) {
- err = -ENOMEM;
- goto phy_alloc_failed;
- }
-
- /* Get physical page for a normal suspend buffer */
- err = kbase_mem_pool_alloc_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- nr_pages, &s_buf->phy[0], false);
-
- if (err < 0)
- goto phy_pages_alloc_failed;
-
- /* Insert Region Object into rbtree and make virtual address available
- * to map it to physical page
- */
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
- if (err)
- goto add_va_region_failed;
-
- /* Update MMU table */
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- reg->start_pfn, &s_buf->phy[0],
- nr_pages, mem_flags,
- MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
- if (err)
- goto mmu_insert_failed;
-
- s_buf->reg = reg;
-
- return 0;
-
-mmu_insert_failed:
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(reg));
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
-add_va_region_failed:
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
- &s_buf->phy[0], false, false);
-
-phy_pages_alloc_failed:
- kfree(s_buf->phy);
-phy_alloc_failed:
- kfree(reg);
-
- return err;
-}
-
-/**
- * create_protected_suspend_buffer() - Create protected-mode suspend buffer
- * per queue group
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to suspend buffer that is attached to queue group
- *
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- * MMU page table. Otherwise -ENOMEM.
- */
-static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
- struct kbase_protected_suspend_buffer *s_buf)
-{
- struct kbase_va_region *reg = NULL;
- struct tagged_addr *phys = NULL;
- const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
- const size_t nr_pages =
- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
- int err = 0;
-
- /* Allocate and initialize Region Object */
- reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
- nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
- if (!reg)
+ if (!s_buf->phy)
return -ENOMEM;
- phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
- if (!phys) {
- err = -ENOMEM;
- goto phy_alloc_failed;
+ /* Get physical page for a normal suspend buffer */
+ err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
+ &s_buf->phy[0], false, kctx->task);
+
+ if (err < 0) {
+ kfree(s_buf->phy);
+ return err;
}
- s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
- nr_pages);
- if (s_buf->pma == NULL) {
- err = -ENOMEM;
- goto pma_alloc_failed;
- }
-
- /* Insert Region Object into rbtree and make virtual address available
- * to map it to physical page
- */
- mutex_lock(&kbdev->csf.reg_lock);
- err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
- reg->flags &= ~KBASE_REG_FREE;
- mutex_unlock(&kbdev->csf.reg_lock);
-
- if (err)
- goto add_va_region_failed;
-
- /* Update MMU table */
- err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
- reg->start_pfn, phys,
- nr_pages, mem_flags, MCU_AS_NR,
- KBASE_MEM_GROUP_CSF_FW);
- if (err)
- goto mmu_insert_failed;
-
- s_buf->reg = reg;
- kfree(phys);
+ kbase_process_page_usage_inc(kctx, nr_pages);
return 0;
-
-mmu_insert_failed:
- mutex_lock(&kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(reg));
- mutex_unlock(&kbdev->csf.reg_lock);
-
-add_va_region_failed:
- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
-pma_alloc_failed:
- kfree(phys);
-phy_alloc_failed:
- kfree(reg);
-
- return err;
}
static void timer_event_worker(struct work_struct *data);
@@ -1214,26 +1168,17 @@
static int create_suspend_buffers(struct kbase_context *const kctx,
struct kbase_queue_group * const group)
{
- int err = 0;
-
if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
return -ENOMEM;
}
- if (kctx->kbdev->csf.pma_dev) {
- err = create_protected_suspend_buffer(kctx->kbdev,
- &group->protected_suspend_buf);
- if (err) {
- term_normal_suspend_buffer(kctx,
- &group->normal_suspend_buf);
- dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
- }
- } else {
- group->protected_suspend_buf.reg = NULL;
- }
+ /* Protected suspend buffer, runtime binding so just initialize it */
+ group->protected_suspend_buf.gpu_va = 0;
+ group->protected_suspend_buf.pma = NULL;
+ group->protected_suspend_buf.alloc_retries = 0;
- return err;
+ return 0;
}
/**
@@ -1244,16 +1189,9 @@
*/
static u32 generate_group_uid(void)
{
- /* use first KBase device to store max UID */
- struct kbase_device *kbdev = kbase_find_device(-1);
- u32 uid = 1;
+ static atomic_t global_csg_uid = ATOMIC_INIT(0);
- if (kbdev)
- uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices);
- else
- WARN(1, "NULL kbase device pointer in group UID generation");
-
- return uid;
+ return (u32)atomic_inc_return(&global_csg_uid);
}
/**
@@ -1272,8 +1210,8 @@
int group_handle = find_free_group_handle(kctx);
if (group_handle < 0) {
- dev_err(kctx->kbdev->dev,
- "All queue group handles are already in use\n");
+ dev_dbg(kctx->kbdev->dev,
+ "All queue group handles are already in use");
} else {
struct kbase_queue_group * const group =
kmalloc(sizeof(struct kbase_queue_group),
@@ -1298,10 +1236,22 @@
group->tiler_max = create->in.tiler_max;
group->fragment_max = create->in.fragment_max;
group->compute_max = create->in.compute_max;
+ group->csi_handlers = create->in.csi_handlers;
group->priority = kbase_csf_priority_queue_group_priority_to_relative(
kbase_csf_priority_check(kctx->kbdev, create->in.priority));
group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
group->faulted = false;
+ group->cs_unrecoverable = false;
+ group->reevaluate_idle_status = false;
+
+ group->csg_reg = NULL;
+ group->csg_reg_bind_retries = 0;
+
+ group->dvs_buf = create->in.dvs_buf;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ group->deschedule_deferred_cnt = 0;
+#endif
group->group_uid = generate_group_uid();
create->out.group_uid = group->group_uid;
@@ -1317,6 +1267,9 @@
MAX_SUPPORTED_STREAMS_PER_GROUP);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
+ group->run_state);
+
err = create_suspend_buffers(kctx, group);
if (err < 0) {
@@ -1336,6 +1289,18 @@
return group_handle;
}
+static bool dvs_supported(u32 csf_version)
+{
+ if (GLB_VERSION_MAJOR_GET(csf_version) < 3)
+ return false;
+
+ if (GLB_VERSION_MAJOR_GET(csf_version) == 3)
+ if (GLB_VERSION_MINOR_GET(csf_version) < 2)
+ return false;
+
+ return true;
+}
+
int kbase_csf_queue_group_create(struct kbase_context *const kctx,
union kbase_ioctl_cs_queue_group_create *const create)
{
@@ -1343,23 +1308,47 @@
const u32 tiler_count = hweight64(create->in.tiler_mask);
const u32 fragment_count = hweight64(create->in.fragment_mask);
const u32 compute_count = hweight64(create->in.compute_mask);
+ size_t i;
+
+ for (i = 0; i < sizeof(create->in.padding); i++) {
+ if (create->in.padding[i] != 0) {
+ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
+ return -EINVAL;
+ }
+ }
mutex_lock(&kctx->csf.lock);
if ((create->in.tiler_max > tiler_count) ||
(create->in.fragment_max > fragment_count) ||
(create->in.compute_max > compute_count)) {
- dev_err(kctx->kbdev->dev,
- "Invalid maximum number of endpoints for a queue group\n");
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid maximum number of endpoints for a queue group");
err = -EINVAL;
} else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) {
- dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n",
+ dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u",
(unsigned int)create->in.priority);
err = -EINVAL;
} else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
- dev_err(kctx->kbdev->dev,
- "No CSG has at least %d CSs\n",
+ dev_dbg(kctx->kbdev->dev,
+ "No CSG has at least %d CSs",
create->in.cs_min);
+ err = -EINVAL;
+ } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) {
+ dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
+ create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
+ err = -EINVAL;
+ } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
+ create->in.dvs_buf) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "GPU does not support DVS but userspace is trying to use it");
+ err = -EINVAL;
+ } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) &&
+ !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) &&
+ CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) {
+ dev_warn(kctx->kbdev->dev,
+ "DVS buffer pointer is null but size is not 0");
err = -EINVAL;
} else {
/* For the CSG which satisfies the condition for having
@@ -1389,60 +1378,39 @@
* @s_buf: Pointer to queue group suspend buffer to be freed
*/
static void term_normal_suspend_buffer(struct kbase_context *const kctx,
- struct kbase_normal_suspend_buffer *s_buf)
+ struct kbase_normal_suspend_buffer *s_buf)
{
- const size_t nr_pages =
- PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
+ const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
lockdep_assert_held(&kctx->csf.lock);
- WARN_ON(kbase_mmu_teardown_pages(
- kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
+ /* The group should not have a bind remaining on any suspend buf region */
+ WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination");
- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kctx->kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(s_buf->reg));
- mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
- kbase_mem_pool_free_pages(
- &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- nr_pages, &s_buf->phy[0], false, false);
+ kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
+ &s_buf->phy[0], false, false);
+ kbase_process_page_usage_dec(kctx, nr_pages);
kfree(s_buf->phy);
s_buf->phy = NULL;
- kfree(s_buf->reg);
- s_buf->reg = NULL;
}
/**
- * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
+ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of
* queue group
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to queue group suspend buffer to be freed
+ * @sbuf: Pointer to queue group suspend buffer to be freed
*/
static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
- struct kbase_protected_suspend_buffer *s_buf)
+ struct kbase_protected_suspend_buffer *sbuf)
{
- const size_t nr_pages =
- PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
-
- WARN_ON(kbase_mmu_teardown_pages(
- kbdev, &kbdev->csf.mcu_mmu,
- s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
-
- WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
-
- mutex_lock(&kbdev->csf.reg_lock);
- WARN_ON(kbase_remove_va_region(s_buf->reg));
- mutex_unlock(&kbdev->csf.reg_lock);
-
- kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
- s_buf->pma = NULL;
- kfree(s_buf->reg);
- s_buf->reg = NULL;
+ WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!");
+ if (sbuf->pma) {
+ const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
+ sbuf->pma = NULL;
+ }
}
void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
@@ -1474,6 +1442,7 @@
&group->protected_suspend_buf);
group->run_state = KBASE_CSF_GROUP_TERMINATED;
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state);
}
/**
@@ -1504,10 +1473,51 @@
kbase_csf_term_descheduled_queue_group(group);
}
+/**
+ * wait_group_deferred_deschedule_completion - Wait for refcount of the group to
+ * become 0 that was taken when the group deschedule had to be deferred.
+ *
+ * @group: Pointer to GPU command queue group that is being deleted.
+ *
+ * This function is called when Userspace deletes the group and after the group
+ * has been descheduled. The function synchronizes with the other threads that were
+ * also trying to deschedule the group whilst the dumping was going on for a fault.
+ * Please refer the documentation of wait_for_dump_complete_on_group_deschedule()
+ * for more details.
+ */
+static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct kbase_context *kctx = group->kctx;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (likely(!group->deschedule_deferred_cnt))
+ return;
+
+ mutex_unlock(&kctx->csf.lock);
+ wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt);
+ mutex_lock(&kctx->csf.lock);
+#endif
+}
+
static void cancel_queue_group_events(struct kbase_queue_group *group)
{
cancel_work_sync(&group->timer_event_work);
cancel_work_sync(&group->protm_event_work);
+}
+
+static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Remove any pending group fatal error from context %pK\n",
+ (void *)group->kctx);
+
+ kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
+ kbase_csf_event_remove_error(kctx, &group->error_timeout);
+ kbase_csf_event_remove_error(kctx, &group->error_fatal);
}
void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
@@ -1532,39 +1542,44 @@
group = find_queue_group(kctx, group_handle);
if (group) {
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- dev_dbg(kbdev->dev,
- "Remove any pending group fatal error from context %pK\n",
- (void *)group->kctx);
-
- list_del_init(&group->error_tiler_oom.link);
- list_del_init(&group->error_timeout.link);
- list_del_init(&group->error_fatal.link);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
- term_queue_group(group);
kctx->csf.queue_groups[group_handle] = NULL;
+ /* Stop the running of the given group */
+ term_queue_group(group);
+ mutex_unlock(&kctx->csf.lock);
+
+ if (reset_prevented) {
+ /* Allow GPU reset before cancelling the group specific
+ * work item to avoid potential deadlock.
+ * Reset prevention isn't needed after group termination.
+ */
+ kbase_reset_gpu_allow(kbdev);
+ reset_prevented = false;
+ }
+
+ /* Cancel any pending event callbacks. If one is in progress
+ * then this thread waits synchronously for it to complete (which
+ * is why we must unlock the context first). We already ensured
+ * that no more callbacks can be enqueued by terminating the group.
+ */
+ cancel_queue_group_events(group);
+
+ mutex_lock(&kctx->csf.lock);
+
+ /* Clean up after the termination */
+ remove_pending_group_fatal_error(group);
+
+ wait_group_deferred_deschedule_completion(group);
}
mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
- if (!group)
- return;
-
- /* Cancel any pending event callbacks. If one is in progress
- * then this thread waits synchronously for it to complete (which
- * is why we must unlock the context first). We already ensured
- * that no more callbacks can be enqueued by terminating the group.
- */
- cancel_queue_group_events(group);
kfree(group);
}
+KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
struct kbase_suspend_copy_buffer *sus_buf,
u8 group_handle)
@@ -1595,48 +1610,7 @@
return err;
}
-
-/**
- * add_error() - Add an error to the list of errors to report to user space
- *
- * @kctx: Address of a base context associated with a GPU address space.
- * @error: Address of the item to be added to the context's pending error list.
- * @data: Error data to be returned to userspace.
- *
- * Does not wake up the event queue blocking a user thread in kbase_poll. This
- * is to make it more efficient to add multiple errors.
- *
- * The added error must not already be on the context's list of errors waiting
- * to be reported (e.g. because a previous error concerning the same object has
- * not yet been reported).
- */
-static void add_error(struct kbase_context *const kctx,
- struct kbase_csf_notification *const error,
- struct base_csf_notification const *const data)
-{
- unsigned long flags;
-
- if (WARN_ON(!kctx))
- return;
-
- if (WARN_ON(!error))
- return;
-
- if (WARN_ON(!data))
- return;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- if (!WARN_ON(!list_empty(&error->link))) {
- error->data = *data;
- list_add_tail(&error->link, &kctx->csf.error_list);
- dev_dbg(kctx->kbdev->dev,
- "Added error %pK of type %d in context %pK\n",
- (void *)error, data->type, (void *)kctx);
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
+#endif
void kbase_csf_add_group_fatal_error(
struct kbase_queue_group *const group,
@@ -1660,7 +1634,7 @@
}
};
- add_error(group->kctx, &group->error_fatal, &error);
+ kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
}
void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
@@ -1698,29 +1672,12 @@
int kbase_csf_ctx_init(struct kbase_context *kctx)
{
- struct kbase_device *kbdev = kctx->kbdev;
int err = -ENOMEM;
- INIT_LIST_HEAD(&kctx->csf.event_callback_list);
INIT_LIST_HEAD(&kctx->csf.queue_list);
INIT_LIST_HEAD(&kctx->csf.link);
- INIT_LIST_HEAD(&kctx->csf.error_list);
- spin_lock_init(&kctx->csf.event_lock);
- kctx->csf.user_reg_vma = NULL;
- mutex_lock(&kbdev->pm.lock);
- /* The inode information for /dev/malixx file is not available at the
- * time of device probe as the inode is created when the device node
- * is created by udevd (through mknod).
- */
- if (kctx->filp) {
- if (!kbdev->csf.mali_file_inode)
- kbdev->csf.mali_file_inode = kctx->filp->f_inode;
-
- /* inode is unique for a file */
- WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
- }
- mutex_unlock(&kbdev->pm.lock);
+ kbase_csf_event_init(kctx);
/* Mark all the cookies as 'free' */
bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
@@ -1737,9 +1694,18 @@
if (likely(!err)) {
err = kbase_csf_tiler_heap_context_init(kctx);
- if (likely(!err))
+ if (likely(!err)) {
mutex_init(&kctx->csf.lock);
- else
+ INIT_WORK(&kctx->csf.pending_submission_work,
+ pending_submission_worker);
+
+ err = kbasep_ctx_user_reg_page_mapping_init(kctx);
+
+ if (unlikely(err))
+ kbase_csf_tiler_heap_context_term(kctx);
+ }
+
+ if (unlikely(err))
kbase_csf_kcpu_queue_context_term(kctx);
}
@@ -1822,7 +1788,6 @@
* for queue groups & kcpu queues, hence no need to explicitly remove
* those debugfs files.
*/
- kbase_csf_event_wait_remove_all(kctx);
/* Wait for a GPU reset if it is happening, prevent it if not happening */
err = kbase_reset_gpu_prevent_and_wait(kbdev);
@@ -1835,17 +1800,24 @@
reset_prevented = true;
mutex_lock(&kctx->csf.lock);
+
/* Iterate through the queue groups that were not terminated by
* userspace and issue the term request to firmware for them.
*/
for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
- if (kctx->csf.queue_groups[i])
- term_queue_group(kctx->csf.queue_groups[i]);
+ struct kbase_queue_group *group = kctx->csf.queue_groups[i];
+
+ if (group) {
+ remove_pending_group_fatal_error(group);
+ term_queue_group(group);
+ }
}
mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
+
+ cancel_work_sync(&kctx->csf.pending_submission_work);
/* Now that all queue groups have been terminated, there can be no
* more OoM or timer event interrupts but there can be inflight work
@@ -1891,200 +1863,45 @@
* only one reference left that was taken when queue was
* registered.
*/
- if (atomic_read(&queue->refcount) != 1)
- dev_warn(kctx->kbdev->dev,
- "Releasing queue with incorrect refcounting!\n");
+ WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
list_del_init(&queue->link);
release_queue(queue);
}
mutex_unlock(&kctx->csf.lock);
+ kbasep_ctx_user_reg_page_mapping_term(kctx);
kbase_csf_tiler_heap_context_term(kctx);
kbase_csf_kcpu_queue_context_term(kctx);
kbase_csf_scheduler_context_term(kctx);
+ kbase_csf_event_term(kctx);
mutex_destroy(&kctx->csf.lock);
-}
-
-int kbase_csf_event_wait_add(struct kbase_context *kctx,
- kbase_csf_event_callback *callback, void *param)
-{
- int err = -ENOMEM;
- struct kbase_csf_event *event =
- kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL);
-
- if (event) {
- unsigned long flags;
-
- event->kctx = kctx;
- event->callback = callback;
- event->param = param;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
- list_add_tail(&event->link, &kctx->csf.event_callback_list);
- dev_dbg(kctx->kbdev->dev,
- "Added event handler %pK with param %pK\n", event,
- event->param);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
- err = 0;
- }
-
- return err;
-}
-
-void kbase_csf_event_wait_remove(struct kbase_context *kctx,
- kbase_csf_event_callback *callback, void *param)
-{
- struct kbase_csf_event *event;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- list_for_each_entry(event, &kctx->csf.event_callback_list, link) {
- if ((event->callback == callback) && (event->param == param)) {
- list_del(&event->link);
- dev_dbg(kctx->kbdev->dev,
- "Removed event handler %pK with param %pK\n",
- event, event->param);
- kfree(event);
- break;
- }
- }
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
-bool kbase_csf_read_error(struct kbase_context *kctx,
- struct base_csf_notification *event_data)
-{
- bool got_event = true;
- struct kbase_csf_notification *error_data = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- if (likely(!list_empty(&kctx->csf.error_list))) {
- error_data = list_first_entry(&kctx->csf.error_list,
- struct kbase_csf_notification, link);
- list_del_init(&error_data->link);
- *event_data = error_data->data;
- dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
- (void *)error_data, (void *)kctx);
- } else {
- got_event = false;
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
- return got_event;
-}
-
-bool kbase_csf_error_pending(struct kbase_context *kctx)
-{
- bool event_pended = false;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
- event_pended = !list_empty(&kctx->csf.error_list);
- dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
- event_pended ? "An" : "No", (void *)kctx);
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-
- return event_pended;
-}
-
-void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
-{
- struct kbase_csf_event *event, *next_event;
- unsigned long flags;
-
- dev_dbg(kctx->kbdev->dev,
- "Signal event (%s GPU notify) for context %pK\n",
- notify_gpu ? "with" : "without", (void *)kctx);
-
- /* First increment the signal count and wake up event thread.
- */
- atomic_set(&kctx->event_count, 1);
- kbase_event_wakeup(kctx);
-
- /* Signal the CSF firmware. This is to ensure that pending command
- * stream synch object wait operations are re-evaluated.
- * Write to GLB_DOORBELL would suffice as spec says that all pending
- * synch object wait operations are re-evaluated on a write to any
- * CS_DOORBELL/GLB_DOORBELL register.
- */
- if (notify_gpu) {
- spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
- if (kctx->kbdev->pm.backend.gpu_powered)
- kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
- spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
- }
-
- /* Now invoke the callbacks registered on backend side.
- * Allow item removal inside the loop, if requested by the callback.
- */
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- list_for_each_entry_safe(
- event, next_event, &kctx->csf.event_callback_list, link) {
- enum kbase_csf_event_callback_action action;
-
- dev_dbg(kctx->kbdev->dev,
- "Calling event handler %pK with param %pK\n",
- (void *)event, event->param);
- action = event->callback(event->param);
- if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
- list_del(&event->link);
- kfree(event);
- }
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
-}
-
-void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
-{
- struct kbase_csf_event *event, *next_event;
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->csf.event_lock, flags);
-
- list_for_each_entry_safe(
- event, next_event, &kctx->csf.event_callback_list, link) {
- list_del(&event->link);
- dev_dbg(kctx->kbdev->dev,
- "Removed event handler %pK with param %pK\n",
- (void *)event, event->param);
- kfree(event);
- }
-
- spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
}
/**
* handle_oom_event - Handle the OoM event generated by the firmware for the
* CSI.
*
+ * @group: Pointer to the CSG group the oom-event belongs to.
+ * @stream: Pointer to the structure containing info provided by the firmware
+ * about the CSI.
+ *
* This function will handle the OoM event request from the firmware for the
* CS. It will retrieve the address of heap context and heap's
* statistics (like number of render passes in-flight) from the CS's kernel
- * kernel output page and pass them to the tiler heap function to allocate a
+ * output page and pass them to the tiler heap function to allocate a
* new chunk.
* It will also update the CS's kernel input page with the address
* of a new chunk that was allocated.
*
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
- * @stream: Pointer to the structure containing info provided by the firmware
- * about the CSI.
- *
* Return: 0 if successfully handled the request, otherwise a negative error
* code on failure.
*/
-static int handle_oom_event(struct kbase_context *const kctx,
- struct kbase_csf_cmd_stream_info const *const stream)
+static int handle_oom_event(struct kbase_queue_group *const group,
+ struct kbase_csf_cmd_stream_info const *const stream)
{
+ struct kbase_context *const kctx = group->kctx;
u64 gpu_heap_va =
kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) |
((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
@@ -2098,25 +1915,36 @@
u32 pending_frag_count;
u64 new_chunk_ptr;
int err;
+ bool frag_end_err = false;
if ((frag_end > vt_end) || (vt_end >= vt_start)) {
- dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
+ frag_end_err = true;
+ dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
vt_start, vt_end, frag_end);
- return -EINVAL;
}
-
- renderpasses_in_flight = vt_start - frag_end;
- pending_frag_count = vt_end - frag_end;
+ if (frag_end_err) {
+ renderpasses_in_flight = 1;
+ pending_frag_count = 1;
+ } else {
+ renderpasses_in_flight = vt_start - frag_end;
+ pending_frag_count = vt_end - frag_end;
+ }
err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
- /* It is okay to acknowledge with a NULL chunk (firmware will then wait
- * for the fragment jobs to complete and release chunks)
- */
- if (err == -EBUSY)
+ if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) &&
+ (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) {
+ /* The group allows incremental rendering, trigger it */
new_chunk_ptr = 0;
- else if (err)
+ dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n",
+ group->handle, group->csg_nr);
+ } else if (err == -EBUSY) {
+ /* Acknowledge with a NULL chunk (firmware will then wait for
+ * the fragment jobs to complete and release chunks)
+ */
+ new_chunk_ptr = 0;
+ } else if (err)
return err;
kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
@@ -2149,8 +1977,40 @@
BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
} } } };
- add_error(group->kctx, &group->error_tiler_oom, &error);
+ kbase_csf_event_add_error(group->kctx,
+ &group->error_tiler_oom,
+ &error);
kbase_event_wakeup(group->kctx);
+}
+
+static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
+{
+ int err;
+ const unsigned int cache_flush_wait_timeout_ms = 2000;
+
+ kbase_pm_lock(kbdev);
+ /* With the advent of partial cache flush, dirty cache lines could
+ * be left in the GPU L2 caches by terminating the queue group here
+ * without waiting for proper cache maintenance. A full cache flush
+ * here will prevent these dirty cache lines from being arbitrarily
+ * evicted later and possible causing memory corruption.
+ */
+ if (kbdev->pm.backend.gpu_powered) {
+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+ err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
+
+ if (err) {
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Timeout waiting for cache clean to complete after fatal error",
+ kbase_backend_get_cycle_cnt(kbdev));
+
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu(kbdev);
+ }
+ }
+
+ kbase_pm_unlock(kbdev);
}
/**
@@ -2165,8 +2025,8 @@
* notification to allow the firmware to report out-of-memory again in future.
* If the out-of-memory condition was successfully handled then this function
* rings the relevant doorbell to notify the firmware; otherwise, it terminates
- * the GPU command queue group to which the queue is bound. See
- * term_queue_group() for details.
+ * the GPU command queue group to which the queue is bound and notify a waiting
+ * user space client of the failure.
*/
static void kbase_queue_oom_event(struct kbase_queue *const queue)
{
@@ -2178,6 +2038,7 @@
struct kbase_csf_cmd_stream_info const *stream;
int csi_index = queue->csi_index;
u32 cs_oom_ack, cs_oom_req;
+ unsigned long flags;
lockdep_assert_held(&kctx->csf.lock);
@@ -2221,22 +2082,25 @@
if (cs_oom_ack == cs_oom_req)
goto unlock;
- err = handle_oom_event(kctx, stream);
+ err = handle_oom_event(group, stream);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
CS_REQ_TILER_OOM_MASK);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
- if (err) {
+ if (unlikely(err)) {
dev_warn(
kbdev->dev,
"Queue group to be terminated, couldn't handle the OoM event\n");
+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM);
kbase_csf_scheduler_unlock(kbdev);
term_queue_group(group);
+ flush_gpu_cache_on_fatal_error(kbdev);
report_tiler_oom_error(group);
return;
}
-
- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
unlock:
kbase_csf_scheduler_unlock(kbdev);
}
@@ -2258,6 +2122,7 @@
struct kbase_device *const kbdev = kctx->kbdev;
int err = kbase_reset_gpu_try_prevent(kbdev);
+
/* Regardless of whether reset failed or is currently happening, exit
* early
*/
@@ -2294,7 +2159,7 @@
"Notify the event notification thread, forward progress timeout (%llu cycles)\n",
kbase_csf_timeout_get(group->kctx->kbdev));
- add_error(group->kctx, &group->error_timeout, &error);
+ kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
kbase_event_wakeup(group->kctx);
}
@@ -2310,12 +2175,13 @@
struct kbase_queue_group *const group =
container_of(data, struct kbase_queue_group, timer_event_work);
struct kbase_context *const kctx = group->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
bool reset_prevented = false;
- int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
if (err)
dev_warn(
- kctx->kbdev->dev,
+ kbdev->dev,
"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
group->handle);
else
@@ -2324,11 +2190,12 @@
mutex_lock(&kctx->csf.lock);
term_queue_group(group);
+ flush_gpu_cache_on_fatal_error(kbdev);
report_group_timeout_error(group);
mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
- kbase_reset_gpu_allow(kctx->kbdev);
+ kbase_reset_gpu_allow(kbdev);
}
/**
@@ -2336,12 +2203,91 @@
*
* @group: Pointer to GPU queue group for which the timeout event is received.
*
+ * Notify a waiting user space client of the timeout.
* Enqueue a work item to terminate the group and notify the event notification
* thread of progress timeout fault for the GPU command queue group.
*/
static void handle_progress_timer_event(struct kbase_queue_group *const group)
{
+ kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
+ DF_PROGRESS_TIMER_TIMEOUT);
+
queue_work(group->kctx->csf.wq, &group->timer_event_work);
+}
+
+/**
+ * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected
+ * memory for the protected mode suspend buffer.
+ * @group: Pointer to the GPU queue group.
+ *
+ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise
+ * negative error value.
+ */
+static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group)
+{
+ struct kbase_device *const kbdev = group->kctx->kbdev;
+ struct kbase_context *kctx = group->kctx;
+ struct tagged_addr *phys = NULL;
+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+ size_t nr_pages;
+ int err = 0;
+
+ if (likely(sbuf->pma))
+ return 0;
+
+ nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
+ if (unlikely(!phys)) {
+ err = -ENOMEM;
+ goto phys_free;
+ }
+
+ mutex_lock(&kctx->csf.lock);
+ kbase_csf_scheduler_lock(kbdev);
+
+ if (unlikely(!group->csg_reg)) {
+ /* The only chance of the bound csg_reg is removed from the group is
+ * that it has been put off slot by the scheduler and the csg_reg resource
+ * is contended by other groups. In this case, it needs another occasion for
+ * mapping the pma, which needs a bound csg_reg. Since the group is already
+ * off-slot, returning no error is harmless as the scheduler, when place the
+ * group back on-slot again would do the required MMU map operation on the
+ * allocated and retained pma.
+ */
+ WARN_ON(group->csg_nr >= 0);
+ dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode",
+ group->kctx->tgid, group->kctx->id, group->handle);
+ goto unlock;
+ }
+
+ /* Allocate the protected mode pages */
+ sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
+ if (unlikely(!sbuf->pma)) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ /* Map the bound susp_reg to the just allocated pma pages */
+ err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+
+unlock:
+ kbase_csf_scheduler_unlock(kbdev);
+ mutex_unlock(&kctx->csf.lock);
+phys_free:
+ kfree(phys);
+ return err;
+}
+
+static void report_group_fatal_error(struct kbase_queue_group *const group)
+{
+ struct base_gpu_queue_group_error const
+ err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+ .payload = { .fatal_group = {
+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
+ } } };
+
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ kbase_event_wakeup(group->kctx);
}
/**
@@ -2356,53 +2302,48 @@
{
struct kbase_queue_group *const group =
container_of(data, struct kbase_queue_group, protm_event_work);
+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+ int err = 0;
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN,
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
group, 0u);
- kbase_csf_scheduler_group_protm_enter(group);
+
+ err = alloc_grp_protected_suspend_buffer_pages(group);
+ if (!err) {
+ kbase_csf_scheduler_group_protm_enter(group);
+ } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
+ sbuf->alloc_retries++;
+ /* try again to allocate pages */
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
+ } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
+ dev_err(group->kctx->kbdev->dev,
+ "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
+ group->handle, group->kctx->tgid, group->kctx->id);
+ report_group_fatal_error(group);
+ }
+
KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
group, 0u);
-}
-
-static void report_queue_fatal_error(struct kbase_queue *const queue,
- u32 cs_fatal, u64 cs_fatal_info,
- u8 group_handle)
-{
- struct base_csf_notification error =
- { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
- .payload = {
- .csg_error = {
- .handle = group_handle,
- .error = {
- .error_type =
- BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
- .payload = {
- .fatal_queue = {
- .sideband =
- cs_fatal_info,
- .status = cs_fatal,
- .csi_index =
- queue->csi_index,
- } } } } } };
-
- add_error(queue->kctx, &queue->error, &error);
- kbase_event_wakeup(queue->kctx);
}
/**
* handle_fault_event - Handler for CS fault.
*
* @queue: Pointer to queue for which fault event was received.
- * @stream: Pointer to the structure containing info provided by the
- * firmware about the CSI.
+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
+ * the queue.
*
- * Prints meaningful CS fault information.
- *
+ * Print required information about the CS fault and notify the user space client
+ * about the fault.
*/
static void
-handle_fault_event(struct kbase_queue *const queue,
- struct kbase_csf_cmd_stream_info const *const stream)
+handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
{
+ struct kbase_device *const kbdev = queue->kctx->kbdev;
+ struct kbase_csf_cmd_stream_group_info const *ginfo =
+ &kbdev->csf.global_iface.groups[queue->group->csg_nr];
+ struct kbase_csf_cmd_stream_info const *stream =
+ &ginfo->streams[queue->csi_index];
const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
const u64 cs_fault_info =
kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
@@ -2414,7 +2355,6 @@
CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
const u64 cs_fault_info_exception_data =
CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
- struct kbase_device *const kbdev = queue->kctx->kbdev;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
@@ -2429,29 +2369,86 @@
kbase_gpu_exception_name(cs_fault_exception_type),
cs_fault_exception_data, cs_fault_info_exception_data);
- if (cs_fault_exception_type ==
- CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT)
- report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2,
- 0, queue->group->handle);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ /* CS_RESOURCE_TERMINATED type fault event can be ignored from the
+ * standpoint of dump on error. It is used to report fault for the CSIs
+ * that are associated with the same CSG as the CSI for which the actual
+ * fault was reported by the Iterator.
+ * Dumping would be triggered when the actual fault is reported.
+ *
+ * CS_INHERIT_FAULT can also be ignored. It could happen due to the error
+ * in other types of queues (cpu/kcpu). If a fault had occurred in some
+ * other GPU queue then the dump would have been performed anyways when
+ * that fault was reported.
+ */
+ if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
+ (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
+ if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
+ get_queue(queue);
+ queue->cs_error = cs_fault;
+ queue->cs_error_info = cs_fault_info;
+ queue->cs_error_fatal = false;
+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
+ release_queue(queue);
+ return;
+ }
+ }
+#endif
+
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FAULT_MASK);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
+}
+
+static void report_queue_fatal_error(struct kbase_queue *const queue,
+ u32 cs_fatal, u64 cs_fatal_info,
+ u8 group_handle)
+{
+ struct base_csf_notification error = {
+ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .handle = group_handle,
+ .error = {
+ .error_type =
+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+ .payload = {
+ .fatal_queue = {
+ .sideband = cs_fatal_info,
+ .status = cs_fatal,
+ .csi_index = queue->csi_index,
+ }
+ }
+ }
+ }
+ }
+ };
+
+ kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
+ kbase_event_wakeup(queue->kctx);
}
/**
- * fatal_event_worker - Handle the fatal error for the GPU queue
+ * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
*
* @data: Pointer to a work_struct embedded in GPU command queue.
*
* Terminate the CSG and report the error to userspace.
*/
-static void fatal_event_worker(struct work_struct *const data)
+static void cs_error_worker(struct work_struct *const data)
{
struct kbase_queue *const queue =
- container_of(data, struct kbase_queue, fatal_event_work);
+ container_of(data, struct kbase_queue, cs_error_work);
struct kbase_context *const kctx = queue->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_queue_group *group;
u8 group_handle;
bool reset_prevented = false;
- int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ int err;
+
+ kbase_debug_csf_fault_wait_completion(kbdev);
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
if (err)
dev_warn(
@@ -2468,9 +2465,35 @@
goto unlock;
}
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (!queue->cs_error_fatal) {
+ unsigned long flags;
+ int slot_num;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
+ if (slot_num >= 0) {
+ struct kbase_csf_cmd_stream_group_info const *ginfo =
+ &kbdev->csf.global_iface.groups[slot_num];
+ struct kbase_csf_cmd_stream_info const *stream =
+ &ginfo->streams[queue->csi_index];
+ u32 const cs_ack =
+ kbase_csf_firmware_cs_output(stream, CS_ACK);
+
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FAULT_MASK);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
+ slot_num, true);
+ }
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ goto unlock;
+ }
+#endif
+
group_handle = group->handle;
term_queue_group(group);
- report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
+ flush_gpu_cache_on_fatal_error(kbdev);
+ report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
group_handle);
unlock:
@@ -2486,14 +2509,18 @@
* @queue: Pointer to queue for which fatal event was received.
* @stream: Pointer to the structure containing info provided by the
* firmware about the CSI.
+ * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
+ * the queue.
*
- * Prints meaningful CS fatal information.
+ * Notify a waiting user space client of the CS fatal and prints meaningful
+ * information.
* Enqueue a work item to terminate the group and report the fatal error
* to user space.
*/
static void
handle_fatal_event(struct kbase_queue *const queue,
- struct kbase_csf_cmd_stream_info const *const stream)
+ struct kbase_csf_cmd_stream_info const *const stream,
+ u32 cs_ack)
{
const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
const u64 cs_fatal_info =
@@ -2523,51 +2550,26 @@
if (cs_fatal_exception_type ==
CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR);
queue_work(system_wq, &kbdev->csf.fw_error_work);
} else {
+ kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL);
+ if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
+ queue->group->cs_unrecoverable = true;
+ if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(queue->kctx->kbdev);
+ }
get_queue(queue);
- queue->cs_fatal = cs_fatal;
- queue->cs_fatal_info = cs_fatal_info;
- if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
+ queue->cs_error = cs_fatal;
+ queue->cs_error_info = cs_fatal_info;
+ queue->cs_error_fatal = true;
+ if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
release_queue(queue);
}
-}
-/**
- * handle_queue_exception_event - Handler for CS fatal/fault exception events.
- *
- * @queue: Pointer to queue for which fatal/fault event was received.
- * @cs_req: Value of the CS_REQ register from the CS's input page.
- * @cs_ack: Value of the CS_ACK register from the CS's output page.
- */
-static void handle_queue_exception_event(struct kbase_queue *const queue,
- const u32 cs_req, const u32 cs_ack)
-{
- struct kbase_csf_cmd_stream_group_info const *ginfo;
- struct kbase_csf_cmd_stream_info const *stream;
- struct kbase_context *const kctx = queue->kctx;
- struct kbase_device *const kbdev = kctx->kbdev;
- struct kbase_queue_group *group = queue->group;
- int csi_index = queue->csi_index;
- int slot_num = group->csg_nr;
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FATAL_MASK);
- kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
- ginfo = &kbdev->csf.global_iface.groups[slot_num];
- stream = &ginfo->streams[csi_index];
-
- if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
- handle_fatal_event(queue, stream);
- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
- CS_REQ_FATAL_MASK);
- }
-
- if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
- handle_fault_event(queue, stream);
- kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
- CS_REQ_FAULT_MASK);
- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
- }
}
/**
@@ -2577,6 +2579,9 @@
* @ginfo: The CSG interface provided by the firmware.
* @irqreq: CSG's IRQ request bitmask (one bit per CS).
* @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
+ * @track: Pointer that tracks the highest scanout priority idle CSG
+ * and any newly potentially viable protected mode requesting
+ * CSG in current IRQ context.
*
* If the interrupt request bitmask differs from the acknowledge bitmask
* then the firmware is notifying the host of an event concerning those
@@ -2585,8 +2590,9 @@
* the request and acknowledge registers for the individual CS(s).
*/
static void process_cs_interrupts(struct kbase_queue_group *const group,
- struct kbase_csf_cmd_stream_group_info const *const ginfo,
- u32 const irqreq, u32 const irqack)
+ struct kbase_csf_cmd_stream_group_info const *const ginfo,
+ u32 const irqreq, u32 const irqack,
+ struct irq_idle_and_protm_track *track)
{
struct kbase_device *const kbdev = group->kctx->kbdev;
u32 remaining = irqreq ^ irqack;
@@ -2616,10 +2622,16 @@
kbase_csf_firmware_cs_output(stream, CS_ACK);
struct workqueue_struct *wq = group->kctx->csf.wq;
- if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
- (cs_ack & CS_ACK_EXCEPTION_MASK)) {
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
- handle_queue_exception_event(queue, cs_req, cs_ack);
+ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
+ group, queue, cs_req ^ cs_ack);
+ handle_fatal_event(queue, stream, cs_ack);
+ }
+
+ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
+ group, queue, cs_req ^ cs_ack);
+ handle_fault_event(queue, cs_ack);
}
/* PROTM_PEND and TILER_OOM can be safely ignored
@@ -2630,30 +2642,37 @@
u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND,
- group, queue, cs_req_remain ^ cs_ack_remain);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev,
+ CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED,
+ group, queue,
+ cs_req_remain ^ cs_ack_remain);
continue;
}
if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
(cs_ack & CS_ACK_TILER_OOM_MASK))) {
get_queue(queue);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue,
- cs_req ^ cs_ack);
- if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
+ group, queue, cs_req ^ cs_ack);
+ if (!queue_work(wq, &queue->oom_event_work)) {
/* The work item shall not have been
* already queued, there can be only
* one pending OoM event for a
* queue.
*/
+ dev_warn(
+ kbdev->dev,
+ "Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
+ queue->csi_index, group->handle, queue->kctx->tgid,
+ queue->kctx->id);
release_queue(queue);
}
}
if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
(cs_ack & CS_ACK_PROTM_PEND_MASK)) {
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue,
- cs_req ^ cs_ack);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND,
+ group, queue, cs_req ^ cs_ack);
dev_dbg(kbdev->dev,
"Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
@@ -2661,15 +2680,34 @@
group->csg_nr);
bitmap_set(group->protm_pending_bitmap, i, 1);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue,
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue,
group->protm_pending_bitmap[0]);
protm_pend = true;
}
}
}
- if (protm_pend)
- queue_work(group->kctx->csf.wq, &group->protm_event_work);
+ if (protm_pend) {
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ if (scheduler->tick_protm_pending_seq > group->scan_seq_num) {
+ scheduler->tick_protm_pending_seq = group->scan_seq_num;
+ track->protm_grp = group;
+ }
+
+ if (!group->protected_suspend_buf.pma)
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
+
+ if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
+ clear_bit(group->csg_nr,
+ scheduler->csg_slots_idle_mask);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
+ scheduler->csg_slots_idle_mask[0]);
+ dev_dbg(kbdev->dev,
+ "Group-%d on slot %d de-idled by protm request",
+ group->handle, group->csg_nr);
+ }
+ }
}
/**
@@ -2677,6 +2715,8 @@
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @csg_nr: CSG number.
+ * @track: Pointer that tracks the highest idle CSG and the newly possible viable
+ * protected mode requesting group, in current IRQ context.
*
* Handles interrupts for a CSG and for CSs within it.
*
@@ -2687,8 +2727,8 @@
*
* See process_cs_interrupts() for details of per-stream interrupt handling.
*/
-static void process_csg_interrupts(struct kbase_device *const kbdev,
- int const csg_nr)
+static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr,
+ struct irq_idle_and_protm_track *track)
{
struct kbase_csf_cmd_stream_group_info *ginfo;
struct kbase_queue_group *group = NULL;
@@ -2699,8 +2739,6 @@
if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
return;
- KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr);
-
ginfo = &kbdev->csf.global_iface.groups[csg_nr];
req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
@@ -2709,7 +2747,7 @@
/* There may not be any pending CSG/CS interrupts to process */
if ((req == ack) && (irqreq == irqack))
- goto out;
+ return;
/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
* examining the CS_ACK & CS_REQ bits. This would ensure that Host
@@ -2730,21 +2768,30 @@
* slot scheduler spinlock is required.
*/
if (!group)
- goto out;
+ return;
if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
- goto out;
+ return;
+
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
kbase_csf_firmware_csg_input_mask(ginfo,
CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
+
+ /* SYNC_UPDATE events shall invalidate GPU idle event */
+ atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
+
kbase_csf_event_signal_cpu_only(group->kctx);
}
if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
CSG_REQ_IDLE_MASK);
@@ -2752,34 +2799,45 @@
set_bit(csg_nr, scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
scheduler->csg_slots_idle_mask[0]);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack);
dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
group->handle, csg_nr);
- /* Check if the scheduling tick can be advanced */
- if (kbase_csf_scheduler_all_csgs_idle(kbdev) &&
- !scheduler->gpu_idle_fw_timer_enabled) {
- kbase_csf_scheduler_advance_tick_nolock(kbdev);
+ if (atomic_read(&scheduler->non_idle_offslot_grps)) {
+ /* If there are non-idle CSGs waiting for a slot, fire
+ * a tock for a replacement.
+ */
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS,
+ group, req ^ ack);
+ kbase_csf_scheduler_invoke_tock(kbdev);
+ } else {
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS,
+ group, req ^ ack);
+ }
+
+ if (group->scan_seq_num < track->idle_seq) {
+ track->idle_seq = group->scan_seq_num;
+ track->idle_slot = csg_nr;
}
}
if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
- CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
+ CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
- group, req ^ ack);
- dev_info(kbdev->dev,
- "Timeout notification received for group %u of ctx %d_%d on slot %d\n",
- group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
+ req ^ ack);
+ dev_info(
+ kbdev->dev,
+ "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
+ kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
+ group->kctx->id, csg_nr);
handle_progress_timer_event(group);
}
- process_cs_interrupts(group, ginfo, irqreq, irqack);
+ process_cs_interrupts(group, ginfo, irqreq, irqack, track);
-out:
- /* group may still be NULL here */
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
}
@@ -2868,105 +2926,264 @@
}
}
+/**
+ * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @glb_req: Global request register value.
+ * @glb_ack: Global acknowledge register value.
+ *
+ * This function checks if the PROTM_ENTER Global request had completed and
+ * appropriately sends notification about the protected mode entry to components
+ * like IPA, HWC, IPA_CONTROL.
+ */
+static inline void check_protm_enter_req_complete(struct kbase_device *kbdev,
+ u32 glb_req, u32 glb_ack)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ if (likely(!kbdev->csf.scheduler.active_protm_grp))
+ return;
+
+ if (kbdev->protected_mode)
+ return;
+
+ if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) !=
+ (glb_ack & GLB_REQ_PROTM_ENTER_MASK))
+ return;
+
+ dev_dbg(kbdev->dev, "Protected mode entry interrupt received");
+
+ kbdev->protected_mode = true;
+ kbase_ipa_protection_mode_switch_event(kbdev);
+ kbase_ipa_control_protm_entered(kbdev);
+ kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * process_protm_exit - Handle the protected mode exit interrupt
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @glb_ack: Global acknowledge register value.
+ *
+ * This function handles the PROTM_EXIT interrupt and sends notification
+ * about the protected mode exit to components like HWC, IPA_CONTROL.
+ */
+static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
+{
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
+
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_PROTM_EXIT_MASK);
+
+ if (likely(scheduler->active_protm_grp)) {
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT,
+ scheduler->active_protm_grp, 0u);
+ scheduler->active_protm_grp = NULL;
+ } else {
+ dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group");
+ }
+
+ if (!WARN_ON(!kbdev->protected_mode)) {
+ kbdev->protected_mode = false;
+ kbase_ipa_control_protm_exited(kbdev);
+ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
+ }
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ kbase_debug_coresight_csf_enable_pmode_exit(kbdev);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+}
+
+static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
+ struct irq_idle_and_protm_track *track)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct kbase_queue_group *group = track->protm_grp;
+ u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID))
+ return;
+
+ /* Handle protm from the tracked information */
+ if (track->idle_seq < current_protm_pending_seq) {
+ /* If the protm enter was prevented due to groups priority, then fire a tock
+ * for the scheduler to re-examine the case.
+ */
+ dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
+ kbase_csf_scheduler_invoke_tock(kbdev);
+ } else if (group) {
+ u32 i, num_groups = kbdev->csf.global_iface.group_num;
+ struct kbase_queue_group *grp;
+ bool tock_triggered = false;
+
+ /* A new protm request, and track->idle_seq is not sufficient, check across
+ * previously notified idle CSGs in the current tick/tock cycle.
+ */
+ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
+ if (i == track->idle_slot)
+ continue;
+ grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i);
+ /* If not NULL then the group pointer cannot disappear as the
+ * scheduler spinlock is held.
+ */
+ if (grp == NULL)
+ continue;
+
+ if (grp->scan_seq_num < current_protm_pending_seq) {
+ tock_triggered = true;
+ dev_dbg(kbdev->dev,
+ "Attempt new protm from tick/tock idle slot %d\n", i);
+ kbase_csf_scheduler_invoke_tock(kbdev);
+ break;
+ }
+ }
+
+ if (!tock_triggered) {
+ dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n",
+ group->handle, group->csg_nr);
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
+ }
+ }
+}
+
+static void order_job_irq_clear_with_iface_mem_read(void)
+{
+ /* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the
+ * read from interface memory. The ordering is needed considering the way
+ * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers
+ * without any synchronization. Without the barrier there is no guarantee
+ * about the ordering, the write to IRQ_CLEAR can take effect after the read
+ * from interface memory and that could cause a problem for the scenario where
+ * FW sends back to back notifications for the same CSG for events like
+ * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the
+ * first event. Similar thing can happen with glb events like CFG_ALLOC_EN
+ * acknowledgment and GPU idle notification.
+ *
+ * MCU CPU
+ * --------------- ----------------
+ * Update interface memory Write to IRQ_CLEAR to clear current IRQ
+ * <barrier> <barrier>
+ * Write to IRQ_RAWSTAT to raise new IRQ Read interface memory
+ */
+
+ /* CPU and GPU would be in the same Outer shareable domain */
+ dmb(osh);
+}
+
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
{
- unsigned long flags;
- u32 remaining = val;
+ bool deferred_handling_glb_idle_irq = false;
lockdep_assert_held(&kbdev->hwaccess_lock);
- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
- if (val & JOB_IRQ_GLOBAL_IF) {
- const struct kbase_csf_global_iface *const global_iface =
- &kbdev->csf.global_iface;
- struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ do {
+ unsigned long flags;
+ u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
+ struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
+ bool glb_idle_irq_received = false;
- kbdev->csf.interrupt_received = true;
- remaining &= ~JOB_IRQ_GLOBAL_IF;
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
+ order_job_irq_clear_with_iface_mem_read();
- if (!kbdev->csf.firmware_reloaded)
- kbase_csf_firmware_reload_completed(kbdev);
- else if (global_iface->output) {
- u32 glb_req, glb_ack;
-
+ if (csg_interrupts != 0) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- glb_req = kbase_csf_firmware_global_input_read(
- global_iface, GLB_REQ);
- glb_ack = kbase_csf_firmware_global_output(
- global_iface, GLB_ACK);
- KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
+ /* Looping through and track the highest idle and protm groups */
+ while (csg_interrupts != 0) {
+ int const csg_nr = ffs(csg_interrupts) - 1;
- if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) {
- dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
- kbase_csf_firmware_global_input_mask(
- global_iface, GLB_REQ, glb_ack,
- GLB_REQ_PROTM_EXIT_MASK);
- WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev));
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u);
- scheduler->active_protm_grp = NULL;
- kbdev->protected_mode = false;
- kbase_ipa_control_protm_exited(kbdev);
- kbase_hwcnt_backend_csf_protm_exited(
- &kbdev->hwcnt_gpu_iface);
+ process_csg_interrupts(kbdev, csg_nr, &track);
+ csg_interrupts &= ~(1 << csg_nr);
}
- /* Handle IDLE Hysteresis notification event */
- if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
- int non_idle_offslot_grps;
- bool can_suspend_on_idle;
- dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
- kbase_csf_firmware_global_input_mask(
+ /* Handle protm from the tracked information */
+ process_tracked_info_for_protm(kbdev, &track);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+ }
+
+ if (val & JOB_IRQ_GLOBAL_IF) {
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+
+ kbdev->csf.interrupt_received = true;
+
+ if (!kbdev->csf.firmware_reloaded)
+ kbase_csf_firmware_reload_completed(kbdev);
+ else if (global_iface->output) {
+ u32 glb_req, glb_ack;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ glb_req =
+ kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+ glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL,
+ glb_req ^ glb_ack);
+
+ check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
+
+ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
+ process_protm_exit(kbdev, glb_ack);
+
+ /* Handle IDLE Hysteresis notification event */
+ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
+ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
+ kbase_csf_firmware_global_input_mask(
global_iface, GLB_REQ, glb_ack,
GLB_REQ_IDLE_EVENT_MASK);
- non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
- can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
- ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
-
- if (!non_idle_offslot_grps) {
- if (can_suspend_on_idle)
- queue_work(system_highpri_wq,
- &scheduler->gpu_idle_work);
- } else {
- /* Advance the scheduling tick to get
- * the non-idle suspended groups loaded
- * soon.
+ glb_idle_irq_received = true;
+ /* Defer handling this IRQ to account for a race condition
+ * where the idle worker could be executed before we have
+ * finished handling all pending IRQs (including CSG IDLE
+ * IRQs).
*/
- kbase_csf_scheduler_advance_tick_nolock(
- kbdev);
+ deferred_handling_glb_idle_irq = true;
}
+
+ process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ /* Invoke the MCU state machine as a state transition
+ * might have completed.
+ */
+ kbase_pm_update_state(kbdev);
}
-
- process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
-
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
-
- /* Invoke the MCU state machine as a state transition
- * might have completed.
- */
- kbase_pm_update_state(kbdev);
}
- if (!remaining) {
- wake_up_all(&kbdev->csf.event_wait);
- KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
- return;
- }
- }
+ if (!glb_idle_irq_received)
+ break;
+ /* Attempt to serve potential IRQs that might have occurred
+ * whilst handling the previous IRQ. In case we have observed
+ * the GLB IDLE IRQ without all CSGs having been marked as
+ * idle, the GPU would be treated as no longer idle and left
+ * powered on.
+ */
+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+ } while (val);
- kbase_csf_scheduler_spin_lock(kbdev, &flags);
- while (remaining != 0) {
- int const csg_nr = ffs(remaining) - 1;
+ if (deferred_handling_glb_idle_irq) {
+ unsigned long flags;
- process_csg_interrupts(kbdev, csg_nr);
- remaining &= ~(1 << csg_nr);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ kbase_csf_scheduler_process_gpu_idle_event(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
- kbase_csf_scheduler_spin_unlock(kbdev, flags);
wake_up_all(&kbdev->csf.event_wait);
+
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
}
@@ -2989,13 +3206,12 @@
struct file *filp;
int ret;
- filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
+ filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
if (IS_ERR(filp))
return PTR_ERR(filp);
- ret = kbase_mem_pool_alloc_pages(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- 1, &phys, false);
+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+ false, NULL);
if (ret <= 0) {
fput(filp);
@@ -3011,30 +3227,34 @@
void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
{
- if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
- struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+ if (kbdev->csf.user_reg.filp) {
+ struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
- kbase_mem_pool_free(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
- false);
+ kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+ fput(kbdev->csf.user_reg.filp);
}
}
int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
{
struct tagged_addr phys;
+ struct file *filp;
struct page *page;
u32 *addr;
- int ret;
- kbdev->csf.dummy_user_reg_page = as_tagged(0);
+ kbdev->csf.user_reg.filp = NULL;
- ret = kbase_mem_pool_alloc_pages(
- &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
- false);
+ filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
+ if (IS_ERR(filp)) {
+ dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
+ return PTR_ERR(filp);
+ }
- if (ret <= 0)
- return ret;
+ if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+ false, NULL) <= 0) {
+ fput(filp);
+ return -ENOMEM;
+ }
page = as_page(phys);
addr = kmap_atomic(page);
@@ -3044,12 +3264,13 @@
*/
addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
- kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
DMA_BIDIRECTIONAL);
kunmap_atomic(addr);
- kbdev->csf.dummy_user_reg_page = phys;
-
+ kbdev->csf.user_reg.filp = filp;
+ kbdev->csf.user_reg.dummy_page = phys;
+ kbdev->csf.user_reg.file_offset = 0;
return 0;
}
@@ -3066,4 +3287,3 @@
return out_priority;
}
-
--
Gitblit v1.6.2