From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c | 1117 ++++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 727 insertions(+), 390 deletions(-)
diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
index 4e26a49..da8dde2 100644
--- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -33,8 +33,12 @@
static DEFINE_SPINLOCK(kbase_csf_fence_lock);
#endif
+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
+#define FENCE_WAIT_TIMEOUT_MS 3000
+#endif
+
static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue,
- bool ignore_waits);
+ bool drain_queue);
static void kcpu_queue_process_worker(struct work_struct *data);
@@ -45,9 +49,13 @@
{
struct kbase_context *const kctx = kcpu_queue->kctx;
struct kbase_va_region *reg;
+ struct kbase_mem_phy_alloc *alloc;
+ struct page **pages;
+ struct tagged_addr *pa;
+ long i;
int ret = 0;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
/* Take the processes mmap lock */
down_read(kbase_mem_get_process_mmap_lock());
@@ -72,10 +80,24 @@
* on the physical pages tracking object. When the last
* reference to the tracking object is dropped the pages
* would be unpinned if they weren't unpinned before.
+ *
+ * Region should be CPU cached: abort if it isn't.
*/
+ if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = kbase_jd_user_buf_pin_pages(kctx, reg);
if (ret)
goto out;
+
+ alloc = reg->gpu_alloc;
+ pa = kbase_get_gpu_phy_pages(reg);
+ pages = alloc->imported.user_buf.pages;
+
+ for (i = 0; i < alloc->nents; i++)
+ pa[i] = as_tagged(page_to_phys(pages[i]));
}
current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT;
@@ -99,7 +121,7 @@
struct kbase_va_region *reg;
int ret = 0;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
kbase_gpu_vm_lock(kctx);
@@ -167,13 +189,14 @@
&kctx->csf.kcpu_queues.jit_blocked_queues;
struct kbase_kcpu_command_queue *blocked_queue;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
list_for_each_entry(blocked_queue,
&kctx->csf.kcpu_queues.jit_blocked_queues,
jit_blocked) {
- struct kbase_kcpu_command const*const jit_alloc_cmd =
- &blocked_queue->commands[blocked_queue->start_offset];
+ struct kbase_kcpu_command const *const jit_alloc_cmd =
+ &blocked_queue->commands[blocked_queue->start_offset];
WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC);
if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) {
@@ -190,6 +213,12 @@
*
* @queue: The queue containing this JIT allocation
* @cmd: The JIT allocation command
+ *
+ * Return:
+ * * 0 - allocation OK
+ * * -EINVAL - missing info or JIT ID still in use
+ * * -EAGAIN - Retry
+ * * -ENOMEM - no memory. unable to allocate
*/
static int kbase_kcpu_jit_allocate_process(
struct kbase_kcpu_command_queue *queue,
@@ -206,23 +235,26 @@
u32 i;
int ret;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-
- if (alloc_info->blocked) {
- list_del(&queue->jit_blocked);
- alloc_info->blocked = false;
- }
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!info))
return -EINVAL;
+
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
/* Check if all JIT IDs are not in use */
for (i = 0; i < count; i++, info++) {
/* The JIT ID is still in use so fail the allocation */
if (kctx->jit_alloc[info->id]) {
- dev_warn(kctx->kbdev->dev, "JIT ID still in use\n");
- return -EINVAL;
+ dev_dbg(kctx->kbdev->dev, "JIT ID still in use");
+ ret = -EINVAL;
+ goto fail;
}
+ }
+
+ if (alloc_info->blocked) {
+ list_del(&queue->jit_blocked);
+ alloc_info->blocked = false;
}
/* Now start the allocation loop */
@@ -238,7 +270,7 @@
break;
if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) {
- u8 const*const free_ids = jit_cmd->info.jit_free.ids;
+ u8 const *const free_ids = jit_cmd->info.jit_free.ids;
if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) {
/*
@@ -259,7 +291,7 @@
*/
dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd);
ret = -ENOMEM;
- goto fail;
+ goto fail_rollback;
}
/* There are pending frees for an active allocation
@@ -277,7 +309,8 @@
kctx->jit_alloc[info->id] = NULL;
}
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto fail;
}
/* Bind it to the user provided ID. */
@@ -289,11 +322,11 @@
* Write the address of the JIT allocation to the user provided
* GPU allocation.
*/
- ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
- &mapping);
+ ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+ KBASE_REG_CPU_WR, &mapping);
if (!ptr) {
ret = -ENOMEM;
- goto fail;
+ goto fail_rollback;
}
reg = kctx->jit_alloc[info->id];
@@ -302,9 +335,11 @@
kbase_vunmap(kctx, &mapping);
}
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
+
return 0;
-fail:
+fail_rollback:
/* Roll back completely */
for (i = 0, info = alloc_info->info; i < count; i++, info++) {
/* Free the allocations that were successful.
@@ -317,6 +352,8 @@
kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC;
}
+fail:
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return ret;
}
@@ -328,15 +365,16 @@
{
struct kbase_context *const kctx = kcpu_queue->kctx;
void __user *data = u64_to_user_ptr(alloc_info->info);
- struct base_jit_alloc_info *info;
+ struct base_jit_alloc_info *info = NULL;
u32 count = alloc_info->count;
int ret = 0;
u32 i;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
- if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
- count > ARRAY_SIZE(kctx->jit_alloc)) {
+ if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) ||
+ (count > kcpu_queue->kctx->jit_max_allocations) || (!data) ||
+ !kbase_mem_allow_alloc(kctx)) {
ret = -EINVAL;
goto out;
}
@@ -371,11 +409,13 @@
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC;
- list_add_tail(¤t_command->info.jit_alloc.node,
- &kctx->csf.kcpu_queues.jit_cmds_head);
current_command->info.jit_alloc.info = info;
current_command->info.jit_alloc.count = count;
current_command->info.jit_alloc.blocked = false;
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
+ list_add_tail(¤t_command->info.jit_alloc.node,
+ &kctx->csf.kcpu_queues.jit_cmds_head);
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return 0;
out_free:
@@ -394,7 +434,9 @@
struct kbase_kcpu_command_queue *queue,
struct kbase_kcpu_command *cmd)
{
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
+
+ mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock);
/* Remove this command from the jit_cmds_head list */
list_del(&cmd->info.jit_alloc.node);
@@ -408,6 +450,8 @@
cmd->info.jit_alloc.blocked = false;
}
+ mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock);
+
kfree(cmd->info.jit_alloc.info);
}
@@ -420,18 +464,17 @@
{
struct kbase_kcpu_command_queue *blocked_queue;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
/*
* Reschedule all queues blocked by JIT_ALLOC commands.
* NOTE: This code traverses the list of blocked queues directly. It
* only works as long as the queued works are not executed at the same
* time. This precondition is true since we're holding the
- * kbase_csf_kcpu_queue_context.lock .
+ * kbase_csf_kcpu_queue_context.jit_lock .
*/
- list_for_each_entry(blocked_queue,
- &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
- queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work);
+ list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
+ queue_work(blocked_queue->wq, &blocked_queue->work);
}
static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
@@ -448,17 +491,18 @@
if (WARN_ON(!ids))
return -EINVAL;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
- KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
- queue->kctx->kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
+ queue);
for (i = 0; i < count; i++) {
u64 pages_used = 0;
int item_err = 0;
if (!kctx->jit_alloc[ids[i]]) {
- dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n");
+ dev_dbg(kctx->kbdev->dev, "invalid JIT free ID");
rc = -EINVAL;
item_err = rc;
} else {
@@ -480,15 +524,17 @@
queue->kctx->kbdev, queue, item_err, pages_used);
}
- /* Free the list of ids */
- kfree(ids);
-
/*
* Remove this command from the jit_cmds_head list and retry pending
* allocations.
*/
list_del(&cmd->info.jit_free.node);
kbase_kcpu_jit_retry_pending_allocs(kctx);
+
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
+
+ /* Free the list of ids */
+ kfree(ids);
return rc;
}
@@ -505,7 +551,7 @@
int ret;
u32 i;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
/* Sanity checks */
if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) {
@@ -551,10 +597,12 @@
}
current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE;
- list_add_tail(¤t_command->info.jit_free.node,
- &kctx->csf.kcpu_queues.jit_cmds_head);
current_command->info.jit_free.ids = ids;
current_command->info.jit_free.count = count;
+ mutex_lock(&kctx->csf.kcpu_queues.jit_lock);
+ list_add_tail(¤t_command->info.jit_free.node,
+ &kctx->csf.kcpu_queues.jit_cmds_head);
+ mutex_unlock(&kctx->csf.kcpu_queues.jit_lock);
return 0;
out_free:
@@ -563,6 +611,7 @@
return ret;
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
static int kbase_csf_queue_group_suspend_prepare(
struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_group_suspend_info *suspend_buf,
@@ -570,18 +619,19 @@
{
struct kbase_context *const kctx = kcpu_queue->kctx;
struct kbase_suspend_copy_buffer *sus_buf = NULL;
+ const u32 csg_suspend_buf_size =
+ kctx->kbdev->csf.global_iface.groups[0].suspend_size;
u64 addr = suspend_buf->buffer;
u64 page_addr = addr & PAGE_MASK;
- u64 end_addr = addr + suspend_buf->size - 1;
+ u64 end_addr = addr + csg_suspend_buf_size - 1;
u64 last_page_addr = end_addr & PAGE_MASK;
int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1;
int pinned_pages = 0, ret = 0;
struct kbase_va_region *reg;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
- if (suspend_buf->size <
- kctx->kbdev->csf.global_iface.groups[0].suspend_size)
+ if (suspend_buf->size < csg_suspend_buf_size)
return -EINVAL;
ret = kbase_csf_queue_group_handle_is_valid(kctx,
@@ -593,7 +643,7 @@
if (!sus_buf)
return -ENOMEM;
- sus_buf->size = suspend_buf->size;
+ sus_buf->size = csg_suspend_buf_size;
sus_buf->nr_pages = nr_pages;
sus_buf->offset = addr & ~PAGE_MASK;
@@ -629,10 +679,11 @@
struct tagged_addr *page_array;
u64 start, end, i;
- if (!(reg->flags & BASE_MEM_SAME_VA) ||
- reg->nr_pages < nr_pages ||
- kbase_reg_current_backed_size(reg) !=
- reg->nr_pages) {
+ if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
+ (kbase_reg_current_backed_size(reg) < nr_pages) ||
+ !(reg->flags & KBASE_REG_CPU_WR) ||
+ (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
+ (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) {
ret = -EINVAL;
goto out_clean_pages;
}
@@ -676,14 +727,14 @@
{
return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle);
}
+#endif
static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
{
struct kbase_kcpu_command_queue *kcpu_queue =
(struct kbase_kcpu_command_queue *)param;
- struct kbase_context *const kctx = kcpu_queue->kctx;
- queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
+ queue_work(kcpu_queue->wq, &kcpu_queue->work);
return KBASE_CSF_EVENT_CALLBACK_KEEP;
}
@@ -713,7 +764,7 @@
{
u32 i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_wait->objs))
return -EINVAL;
@@ -727,10 +778,10 @@
cqs_wait->objs[i].addr, &mapping);
if (!queue->command_started) {
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev,
+ queue);
queue->command_started = true;
- KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START,
queue, cqs_wait->nr_objs, 0);
}
@@ -741,24 +792,24 @@
return -EINVAL;
}
- sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val;
+ sig_set =
+ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val;
if (sig_set) {
bool error = false;
bitmap_set(cqs_wait->signaled, i, 1);
if ((cqs_wait->inherit_err_flags & (1U << i)) &&
- evt[BASEP_EVENT_ERR_INDEX] > 0) {
+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) {
queue->has_error = true;
error = true;
}
- KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END,
+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END,
queue, cqs_wait->objs[i].addr,
error);
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
- kbdev, queue,
- evt[BASEP_EVENT_ERR_INDEX]);
+ kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
queue->command_started = false;
}
@@ -775,14 +826,36 @@
return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs);
}
+static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type)
+{
+ return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64;
+}
+
+static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type)
+{
+ BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES);
+ BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES);
+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type));
+
+ switch (data_type) {
+ default:
+ return false;
+ case BASEP_CQS_DATA_TYPE_U32:
+ return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0;
+ case BASEP_CQS_DATA_TYPE_U64:
+ return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0;
+ }
+}
+
static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
struct base_kcpu_command_cqs_wait_info *cqs_wait_info,
struct kbase_kcpu_command *current_command)
{
struct base_cqs_wait_info *objs;
unsigned int nr_objs = cqs_wait_info->nr_objs;
+ unsigned int i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -798,6 +871,17 @@
nr_objs * sizeof(*objs))) {
kfree(objs);
return -ENOMEM;
+ }
+
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
+ kfree(objs);
+ return -EINVAL;
+ }
}
if (++queue->cqs_wait_count == 1) {
@@ -836,7 +920,7 @@
{
unsigned int i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_set->objs))
return;
@@ -848,22 +932,20 @@
evt = (u32 *)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_set->objs[i].addr, &mapping);
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue,
- evt ? 0 : 1);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1);
if (!evt) {
dev_warn(kbdev->dev,
"Sync memory %llx already freed", cqs_set->objs[i].addr);
queue->has_error = true;
} else {
- evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error;
/* Set to signaled */
- evt[BASEP_EVENT_VAL_INDEX]++;
+ evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++;
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
- KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET,
- queue, cqs_set->objs[i].addr,
- evt[BASEP_EVENT_ERR_INDEX]);
+ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr,
+ evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
}
}
@@ -878,11 +960,11 @@
struct base_kcpu_command_cqs_set_info *cqs_set_info,
struct kbase_kcpu_command *current_command)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
struct base_cqs_set *objs;
unsigned int nr_objs = cqs_set_info->nr_objs;
+ unsigned int i;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -898,6 +980,17 @@
nr_objs * sizeof(*objs))) {
kfree(objs);
return -ENOMEM;
+ }
+
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
+ kfree(objs);
+ return -EINVAL;
+ }
}
current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET;
@@ -932,7 +1025,7 @@
{
u32 i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_wait_operation->objs))
return -EINVAL;
@@ -942,12 +1035,16 @@
if (!test_bit(i, cqs_wait_operation->signaled)) {
struct kbase_vmap_struct *mapping;
bool sig_set;
- u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx,
- cqs_wait_operation->objs[i].addr, &mapping);
+ uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get(
+ queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
+ u64 val = 0;
- /* GPUCORE-28172 RDT to review */
- if (!queue->command_started)
+ if (!queue->command_started) {
queue->command_started = true;
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(
+ kbdev, queue);
+ }
+
if (!evt) {
dev_warn(kbdev->dev,
@@ -956,15 +1053,32 @@
return -EINVAL;
}
+ switch (cqs_wait_operation->objs[i].data_type) {
+ default:
+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(
+ cqs_wait_operation->objs[i].data_type));
+ kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+ queue->has_error = true;
+ return -EINVAL;
+ case BASEP_CQS_DATA_TYPE_U32:
+ val = *(u32 *)evt;
+ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
+ break;
+ case BASEP_CQS_DATA_TYPE_U64:
+ val = *(u64 *)evt;
+ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
+ break;
+ }
+
switch (cqs_wait_operation->objs[i].operation) {
case BASEP_CQS_WAIT_OPERATION_LE:
- sig_set = *evt <= cqs_wait_operation->objs[i].val;
+ sig_set = val <= cqs_wait_operation->objs[i].val;
break;
case BASEP_CQS_WAIT_OPERATION_GT:
- sig_set = *evt > cqs_wait_operation->objs[i].val;
+ sig_set = val > cqs_wait_operation->objs[i].val;
break;
default:
- dev_warn(kbdev->dev,
+ dev_dbg(kbdev->dev,
"Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation);
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
@@ -973,27 +1087,15 @@
return -EINVAL;
}
- /* Increment evt up to the error_state value depending on the CQS data type */
- switch (cqs_wait_operation->objs[i].data_type) {
- default:
- dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
- /* Fallthrough - hint to compiler that there's really only 2 options at present */
- case BASEP_CQS_DATA_TYPE_U32:
- evt = (u64 *)((u8 *)evt + sizeof(u32));
- break;
- case BASEP_CQS_DATA_TYPE_U64:
- evt = (u64 *)((u8 *)evt + sizeof(u64));
- break;
- }
-
if (sig_set) {
bitmap_set(cqs_wait_operation->signaled, i, 1);
if ((cqs_wait_operation->inherit_err_flags & (1U << i)) &&
- *evt > 0) {
+ *(u32 *)evt > 0) {
queue->has_error = true;
}
- /* GPUCORE-28172 RDT to review */
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(
+ kbdev, queue, *(u32 *)evt);
queue->command_started = false;
}
@@ -1017,8 +1119,9 @@
{
struct base_cqs_wait_operation_info *objs;
unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
+ unsigned int i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -1034,6 +1137,18 @@
nr_objs * sizeof(*objs))) {
kfree(objs);
return -ENOMEM;
+ }
+
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
+ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
+ kfree(objs);
+ return -EINVAL;
+ }
}
if (++queue->cqs_wait_count == 1) {
@@ -1066,6 +1181,44 @@
return 0;
}
+static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue,
+ uintptr_t evt, u8 operation, u64 val)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+
+ switch (operation) {
+ case BASEP_CQS_SET_OPERATION_ADD:
+ *(u32 *)evt += (u32)val;
+ break;
+ case BASEP_CQS_SET_OPERATION_SET:
+ *(u32 *)evt = val;
+ break;
+ default:
+ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
+ queue->has_error = true;
+ break;
+ }
+}
+
+static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue,
+ uintptr_t evt, u8 operation, u64 val)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+
+ switch (operation) {
+ case BASEP_CQS_SET_OPERATION_ADD:
+ *(u64 *)evt += val;
+ break;
+ case BASEP_CQS_SET_OPERATION_SET:
+ *(u64 *)evt = val;
+ break;
+ default:
+ dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
+ queue->has_error = true;
+ break;
+ }
+}
+
static void kbase_kcpu_cqs_set_operation_process(
struct kbase_device *kbdev,
struct kbase_kcpu_command_queue *queue,
@@ -1073,57 +1226,49 @@
{
unsigned int i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
if (WARN_ON(!cqs_set_operation->objs))
return;
for (i = 0; i < cqs_set_operation->nr_objs; i++) {
struct kbase_vmap_struct *mapping;
- u64 *evt;
+ uintptr_t evt;
- evt = (u64 *)kbase_phy_alloc_mapping_get(
+ evt = (uintptr_t)kbase_phy_alloc_mapping_get(
queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
-
- /* GPUCORE-28172 RDT to review */
if (!evt) {
dev_warn(kbdev->dev,
"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
queue->has_error = true;
} else {
- switch (cqs_set_operation->objs[i].operation) {
- case BASEP_CQS_SET_OPERATION_ADD:
- *evt += cqs_set_operation->objs[i].val;
- break;
- case BASEP_CQS_SET_OPERATION_SET:
- *evt = cqs_set_operation->objs[i].val;
- break;
- default:
- dev_warn(kbdev->dev,
- "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation);
- queue->has_error = true;
- break;
- }
+ struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i];
- /* Increment evt up to the error_state value depending on the CQS data type */
- switch (cqs_set_operation->objs[i].data_type) {
+ switch (obj->data_type) {
default:
- dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
- /* Fallthrough - hint to compiler that there's really only 2 options at present */
+ WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type));
+ queue->has_error = true;
+ goto skip_err_propagation;
case BASEP_CQS_DATA_TYPE_U32:
- evt = (u64 *)((u8 *)evt + sizeof(u32));
+ kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation,
+ obj->val);
+ evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
break;
case BASEP_CQS_DATA_TYPE_U64:
- evt = (u64 *)((u8 *)evt + sizeof(u64));
+ kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation,
+ obj->val);
+ evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
break;
}
- /* GPUCORE-28172 RDT to review */
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(
+ kbdev, queue, *(u32 *)evt ? 1 : 0);
/* Always propagate errors */
- *evt = queue->has_error;
+ *(u32 *)evt = queue->has_error;
+skip_err_propagation:
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
}
}
@@ -1139,11 +1284,11 @@
struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info,
struct kbase_kcpu_command *current_command)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
struct base_cqs_set_operation_info *objs;
unsigned int nr_objs = cqs_set_operation_info->nr_objs;
+ unsigned int i;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
return -EINVAL;
@@ -1159,6 +1304,18 @@
nr_objs * sizeof(*objs))) {
kfree(objs);
return -ENOMEM;
+ }
+
+ /* Check the CQS objects as early as possible. By checking their alignment
+ * (required alignment equals to size for Sync32 and Sync64 objects), we can
+ * prevent overrunning the supplied event page.
+ */
+ for (i = 0; i < nr_objs; i++) {
+ if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
+ !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
+ kfree(objs);
+ return -EINVAL;
+ }
}
current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION;
@@ -1182,20 +1339,23 @@
struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue;
struct kbase_context *const kctx = kcpu_queue->kctx;
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue,
+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
+ /* Fence gets signaled. Deactivate the timer for fence-wait timeout */
+ del_timer(&kcpu_queue->fence_timeout);
+#endif
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
fence->context, fence->seqno);
/* Resume kcpu command queue processing. */
- queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work);
+ queue_work(kcpu_queue->wq, &kcpu_queue->work);
}
-static void kbase_kcpu_fence_wait_cancel(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct kbase_kcpu_command_fence_info *fence_info)
+static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (WARN_ON(!fence_info->fence))
return;
@@ -1204,8 +1364,15 @@
bool removed = dma_fence_remove_callback(fence_info->fence,
&fence_info->fence_cb);
+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
+ /* Fence-wait cancelled or fence signaled. In the latter case
+ * the timer would already have been deactivated inside
+ * kbase_csf_fence_wait_callback().
+ */
+ del_timer_sync(&kcpu_queue->fence_timeout);
+#endif
if (removed)
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END,
kcpu_queue, fence_info->fence->context,
fence_info->fence->seqno);
}
@@ -1216,6 +1383,80 @@
fence_info->fence = NULL;
}
+
+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
+/**
+ * fence_timeout_callback() - Timeout callback function for fence-wait
+ *
+ * @timer: Timer struct
+ *
+ * Context and seqno of the timed-out fence will be displayed in dmesg.
+ * If the fence has been signalled a work will be enqueued to process
+ * the fence-wait without displaying debugging information.
+ */
+static void fence_timeout_callback(struct timer_list *timer)
+{
+ struct kbase_kcpu_command_queue *kcpu_queue =
+ container_of(timer, struct kbase_kcpu_command_queue, fence_timeout);
+ struct kbase_context *const kctx = kcpu_queue->kctx;
+ struct kbase_kcpu_command *cmd = &kcpu_queue->commands[kcpu_queue->start_offset];
+ struct kbase_kcpu_command_fence_info *fence_info;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence;
+#else
+ struct dma_fence *fence;
+#endif
+ struct kbase_sync_fence_info info;
+
+ if (cmd->type != BASE_KCPU_COMMAND_TYPE_FENCE_WAIT) {
+ dev_err(kctx->kbdev->dev,
+ "%s: Unexpected command type %d in ctx:%d_%d kcpu queue:%u", __func__,
+ cmd->type, kctx->tgid, kctx->id, kcpu_queue->id);
+ return;
+ }
+
+ fence_info = &cmd->info.fence;
+
+ fence = kbase_fence_get(fence_info);
+ if (!fence) {
+ dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid,
+ kctx->id, kcpu_queue->id);
+ return;
+ }
+
+ kbase_sync_fence_info_get(fence, &info);
+
+ if (info.status == 1) {
+ queue_work(kcpu_queue->wq, &kcpu_queue->work);
+ } else if (info.status == 0) {
+ dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums",
+ FENCE_WAIT_TIMEOUT_MS);
+ dev_warn(kctx->kbdev->dev,
+ "ctx:%d_%d kcpu queue:%u still waiting for fence[%pK] context#seqno:%s",
+ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name);
+ } else {
+ dev_warn(kctx->kbdev->dev, "fence has got error");
+ dev_warn(kctx->kbdev->dev,
+ "ctx:%d_%d kcpu queue:%u faulty fence[%pK] context#seqno:%s error(%d)",
+ kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name, info.status);
+ }
+
+ kbase_fence_put(fence);
+}
+
+/**
+ * fence_timeout_start() - Start a timer to check fence-wait timeout
+ *
+ * @cmd: KCPU command queue
+ *
+ * Activate a timer to check whether a fence-wait command in the queue
+ * gets completed within FENCE_WAIT_TIMEOUT_MS
+ */
+static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd)
+{
+ mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS));
+}
+#endif
/**
* kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command
@@ -1236,8 +1477,9 @@
#else
struct dma_fence *fence;
#endif
+ struct kbase_context *const kctx = kcpu_queue->kctx;
- lockdep_assert_held(&kcpu_queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
if (WARN_ON(!fence_info->fence))
return -EINVAL;
@@ -1251,14 +1493,26 @@
&fence_info->fence_cb,
kbase_csf_fence_wait_callback);
- KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev,
- FENCE_WAIT_START, kcpu_queue,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev,
+ KCPU_FENCE_WAIT_START, kcpu_queue,
fence->context, fence->seqno);
fence_status = cb_err;
- if (cb_err == 0)
+ if (cb_err == 0) {
kcpu_queue->fence_wait_processed = true;
- else if (cb_err == -ENOENT)
+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
+ fence_timeout_start(kcpu_queue);
+#endif
+ } else if (cb_err == -ENOENT) {
fence_status = dma_fence_get_status(fence);
+ if (!fence_status) {
+ struct kbase_sync_fence_info info;
+
+ kbase_sync_fence_info_get(fence, &info);
+ dev_warn(kctx->kbdev->dev,
+ "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u",
+ info.name, kctx->tgid, kctx->id, kcpu_queue->id);
+ }
+ }
}
/*
@@ -1271,17 +1525,15 @@
*/
if (fence_status)
- kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
+ kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
return fence_status;
}
-static int kbase_kcpu_fence_wait_prepare(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct base_kcpu_command_fence_info *fence_info,
- struct kbase_kcpu_command *current_command)
+static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct base_kcpu_command_fence_info *fence_info,
+ struct kbase_kcpu_command *current_command)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_in;
#else
@@ -1289,10 +1541,9 @@
#endif
struct base_fence fence;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
- if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
- sizeof(fence)))
+ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
return -ENOMEM;
fence_in = sync_file_get_fence(fence.basep.fd);
@@ -1303,13 +1554,11 @@
current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT;
current_command->info.fence.fence = fence_in;
current_command->info.fence.kcpu_queue = kcpu_queue;
-
return 0;
}
-static int kbase_kcpu_fence_signal_process(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct kbase_kcpu_command_fence_info *fence_info)
+static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
int ret;
@@ -1320,45 +1569,50 @@
ret = dma_fence_signal(fence_info->fence);
if (unlikely(ret < 0)) {
- dev_warn(kctx->kbdev->dev,
- "fence_signal() failed with %d\n", ret);
+ dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret);
+ /* Treated as a success */
+ ret = 0;
}
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue,
fence_info->fence->context,
fence_info->fence->seqno);
- dma_fence_put(fence_info->fence);
+ /* dma_fence refcount needs to be decreased to release it. */
+ kbase_fence_put(fence_info->fence);
fence_info->fence = NULL;
return ret;
}
-static int kbase_kcpu_fence_signal_prepare(
- struct kbase_kcpu_command_queue *kcpu_queue,
- struct base_kcpu_command_fence_info *fence_info,
- struct kbase_kcpu_command *current_command)
+static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command *current_command,
+ struct base_fence *fence, struct sync_file **sync_file,
+ int *fd)
{
- struct kbase_context *const kctx = kcpu_queue->kctx;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
struct fence *fence_out;
#else
struct dma_fence *fence_out;
#endif
- struct base_fence fence;
- struct sync_file *sync_file;
+ struct kbase_kcpu_dma_fence *kcpu_fence;
int ret = 0;
- int fd;
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&kcpu_queue->lock);
- if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
- sizeof(fence)))
- return -EFAULT;
-
- fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL);
- if (!fence_out)
+ kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL);
+ if (!kcpu_fence)
return -ENOMEM;
+
+ /* Set reference to KCPU metadata and increment refcount */
+ kcpu_fence->metadata = kcpu_queue->metadata;
+ WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
+
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ fence_out = (struct fence *)kcpu_fence;
+#else
+ fence_out = (struct dma_fence *)kcpu_fence;
+#endif
dma_fence_init(fence_out,
&kbase_fence_ops,
@@ -1376,43 +1630,98 @@
#endif
/* create a sync_file fd representing the fence */
- sync_file = sync_file_create(fence_out);
- if (!sync_file) {
-#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
- dma_fence_put(fence_out);
-#endif
+ *sync_file = sync_file_create(fence_out);
+ if (!(*sync_file)) {
ret = -ENOMEM;
goto file_create_fail;
}
- fd = get_unused_fd_flags(O_CLOEXEC);
- if (fd < 0) {
- ret = fd;
+ *fd = get_unused_fd_flags(O_CLOEXEC);
+ if (*fd < 0) {
+ ret = *fd;
goto fd_flags_fail;
}
- fd_install(fd, sync_file->file);
-
- fence.basep.fd = fd;
+ fence->basep.fd = *fd;
current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL;
current_command->info.fence.fence = fence_out;
- if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence,
- sizeof(fence))) {
- ret = -EFAULT;
- goto fd_flags_fail;
- }
-
return 0;
fd_flags_fail:
- fput(sync_file->file);
+ fput((*sync_file)->file);
file_create_fail:
- dma_fence_put(fence_out);
+ /*
+ * Upon failure, dma_fence refcount that was increased by
+ * dma_fence_get() or sync_file_create() needs to be decreased
+ * to release it.
+ */
+ kbase_fence_put(fence_out);
+ current_command->info.fence.fence = NULL;
return ret;
}
+
+static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct base_kcpu_command_fence_info *fence_info,
+ struct kbase_kcpu_command *current_command)
+{
+ struct base_fence fence;
+ struct sync_file *sync_file = NULL;
+ int fd;
+ int ret = 0;
+
+ lockdep_assert_held(&kcpu_queue->lock);
+
+ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
+ return -EFAULT;
+
+ ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence,
+ sizeof(fence))) {
+ ret = -EFAULT;
+ goto fail;
+ }
+
+ /* 'sync_file' pointer can't be safely dereferenced once 'fd' is
+ * installed, so the install step needs to be done at the last
+ * before returning success.
+ */
+ fd_install(fd, sync_file->file);
+ return 0;
+
+fail:
+ fput(sync_file->file);
+ kbase_fence_put(current_command->info.fence.fence);
+ current_command->info.fence.fence = NULL;
+
+ return ret;
+}
+
+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info)
+{
+ if (!kcpu_queue || !fence_info)
+ return -EINVAL;
+
+ return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process);
+
+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command *current_command,
+ struct base_fence *fence, struct sync_file **sync_file, int *fd)
+{
+ if (!kcpu_queue || !current_command || !fence || !sync_file || !fd)
+ return -EINVAL;
+
+ return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init);
#endif /* CONFIG_SYNC_FILE */
static void kcpu_queue_process_worker(struct work_struct *data)
@@ -1420,11 +1729,9 @@
struct kbase_kcpu_command_queue *queue = container_of(data,
struct kbase_kcpu_command_queue, work);
- mutex_lock(&queue->kctx->csf.kcpu_queues.lock);
-
+ mutex_lock(&queue->lock);
kcpu_queue_process(queue, false);
-
- mutex_unlock(&queue->kctx->csf.kcpu_queues.lock);
+ mutex_unlock(&queue->lock);
}
static int delete_queue(struct kbase_context *kctx, u32 id)
@@ -1437,8 +1744,22 @@
struct kbase_kcpu_command_queue *queue =
kctx->csf.kcpu_queues.array[id];
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE,
queue, queue->num_pending_cmds, queue->cqs_wait_count);
+
+ /* Disassociate the queue from the system to prevent further
+ * submissions. Draining pending commands would be acceptable
+ * even if a new queue is created using the same ID.
+ */
+ kctx->csf.kcpu_queues.array[id] = NULL;
+ bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
+
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
+
+ mutex_lock(&queue->lock);
+
+ /* Metadata struct may outlive KCPU queue. */
+ kbase_kcpu_dma_fence_meta_put(queue->metadata);
/* Drain the remaining work for this queue first and go past
* all the waits.
@@ -1451,22 +1772,22 @@
/* All CQS wait commands should have been cleaned up */
WARN_ON(queue->cqs_wait_count);
- kctx->csf.kcpu_queues.array[id] = NULL;
- bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1);
-
/* Fire the tracepoint with the mutex held to enforce correct
* ordering with the summary stream.
*/
KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue);
- mutex_unlock(&kctx->csf.kcpu_queues.lock);
+ mutex_unlock(&queue->lock);
cancel_work_sync(&queue->work);
+ destroy_workqueue(queue->wq);
+
+ mutex_destroy(&queue->lock);
kfree(queue);
} else {
- dev_warn(kctx->kbdev->dev,
- "Attempt to delete a non-existent KCPU queue\n");
+ dev_dbg(kctx->kbdev->dev,
+ "Attempt to delete a non-existent KCPU queue");
mutex_unlock(&kctx->csf.kcpu_queues.lock);
err = -EINVAL;
}
@@ -1481,8 +1802,7 @@
{
u8 i;
- KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
for (i = 0; i < jit_alloc->count; i++) {
const u8 id = jit_alloc->info[i].id;
const struct kbase_va_region *reg = queue->kctx->jit_alloc[id];
@@ -1512,26 +1832,24 @@
struct kbase_device *kbdev,
const struct kbase_kcpu_command_queue *queue)
{
- KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
}
static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
struct kbase_device *kbdev,
const struct kbase_kcpu_command_queue *queue)
{
- KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue);
}
static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
- bool ignore_waits)
+ bool drain_queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
bool process_next = true;
size_t i;
- lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
+ lockdep_assert_held(&queue->lock);
for (i = 0; i != queue->num_pending_cmds; ++i) {
struct kbase_kcpu_command *cmd =
@@ -1541,16 +1859,15 @@
switch (cmd->type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
if (!queue->command_started) {
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev,
+ queue);
queue->command_started = true;
}
status = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
- if (ignore_waits) {
- kbase_kcpu_fence_wait_cancel(queue,
- &cmd->info.fence);
+ if (drain_queue) {
+ kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence);
} else {
status = kbase_kcpu_fence_wait_process(queue,
&cmd->info.fence);
@@ -1575,14 +1892,12 @@
}
break;
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue);
status = 0;
#if IS_ENABLED(CONFIG_SYNC_FILE)
- status = kbase_kcpu_fence_signal_process(
- queue, &cmd->info.fence);
+ status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence);
if (status < 0)
queue->has_error = true;
@@ -1594,14 +1909,14 @@
queue->has_error = true;
#endif
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(
- kbdev, queue, status);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue,
+ status);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
status = kbase_kcpu_cqs_wait_process(kbdev, queue,
&cmd->info.cqs_wait);
- if (!status && !ignore_waits) {
+ if (!status && !drain_queue) {
process_next = false;
} else {
/* Either all CQS objects were signaled or
@@ -1623,7 +1938,7 @@
status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue,
&cmd->info.cqs_wait_operation);
- if (!status && !ignore_waits) {
+ if (!status && !drain_queue) {
process_next = false;
} else {
/* Either all CQS objects were signaled or
@@ -1645,35 +1960,36 @@
/* Clear the queue's error state */
queue->has_error = false;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue);
break;
case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
struct kbase_ctx_ext_res_meta *meta = NULL;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
- kbdev, queue);
+ if (!drain_queue) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev,
+ queue);
- kbase_gpu_vm_lock(queue->kctx);
- meta = kbase_sticky_resource_acquire(
- queue->kctx, cmd->info.import.gpu_va);
- kbase_gpu_vm_unlock(queue->kctx);
+ kbase_gpu_vm_lock(queue->kctx);
+ meta = kbase_sticky_resource_acquire(
+ queue->kctx, cmd->info.import.gpu_va);
+ kbase_gpu_vm_unlock(queue->kctx);
- if (meta == NULL) {
- queue->has_error = true;
- dev_warn(kbdev->dev,
- "failed to map an external resource\n");
+ if (meta == NULL) {
+ queue->has_error = true;
+ dev_dbg(
+ kbdev->dev,
+ "failed to map an external resource");
+ }
+
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(
+ kbdev, queue, meta ? 0 : 1);
}
-
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(
- kbdev, queue, meta ? 0 : 1);
break;
}
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
bool ret;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue);
kbase_gpu_vm_lock(queue->kctx);
ret = kbase_sticky_resource_release(
@@ -1682,19 +1998,19 @@
if (!ret) {
queue->has_error = true;
- dev_warn(kbdev->dev,
- "failed to release the reference. resource not found\n");
+ dev_dbg(kbdev->dev,
+ "failed to release the reference. resource not found");
}
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
- kbdev, queue, ret ? 0 : 1);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue,
+ ret ? 0 : 1);
break;
}
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: {
bool ret;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev,
+ queue);
kbase_gpu_vm_lock(queue->kctx);
ret = kbase_sticky_resource_release_force(
@@ -1703,8 +2019,8 @@
if (!ret) {
queue->has_error = true;
- dev_warn(kbdev->dev,
- "failed to release the reference. resource not found\n");
+ dev_dbg(kbdev->dev,
+ "failed to release the reference. resource not found");
}
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(
@@ -1713,29 +2029,36 @@
}
case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:
{
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
- kbdev, queue);
-
- status = kbase_kcpu_jit_allocate_process(queue, cmd);
- if (status == -EAGAIN) {
- process_next = false;
- } else {
- if (status != 0)
- queue->has_error = true;
-
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
- kbdev, queue, &cmd->info.jit_alloc,
- status);
-
+ if (drain_queue) {
+ /* We still need to call this function to clean the JIT alloc info up */
kbase_kcpu_jit_allocate_finish(queue, cmd);
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
+ } else {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev,
+ queue);
+
+ status = kbase_kcpu_jit_allocate_process(queue,
+ cmd);
+ if (status == -EAGAIN) {
+ process_next = false;
+ } else {
+ if (status != 0)
+ queue->has_error = true;
+
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
+ kbdev, queue,
+ &cmd->info.jit_alloc, status);
+
+ kbase_kcpu_jit_allocate_finish(queue,
+ cmd);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
kbdev, queue);
+ }
}
+
break;
}
- case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(
- kbdev, queue);
+ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);
status = kbase_kcpu_jit_free_process(queue, cmd);
if (status)
@@ -1744,21 +2067,25 @@
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
kbdev, queue);
break;
+ }
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: {
struct kbase_suspend_copy_buffer *sus_buf =
cmd->info.suspend_buf_copy.sus_buf;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(
- kbdev, queue);
+ if (!drain_queue) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(
+ kbdev, queue);
- status = kbase_csf_queue_group_suspend_process(
+ status = kbase_csf_queue_group_suspend_process(
queue->kctx, sus_buf,
cmd->info.suspend_buf_copy.group_handle);
- if (status)
- queue->has_error = true;
+ if (status)
+ queue->has_error = true;
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
- kbdev, queue, status);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
+ kbdev, queue, status);
+ }
if (!sus_buf->cpu_alloc) {
int i;
@@ -1768,36 +2095,18 @@
} else {
kbase_mem_phy_alloc_kernel_unmapped(
sus_buf->cpu_alloc);
- kbase_mem_phy_alloc_put(sus_buf->cpu_alloc);
+ kbase_mem_phy_alloc_put(
+ sus_buf->cpu_alloc);
}
kfree(sus_buf->pages);
kfree(sus_buf);
break;
}
-#if MALI_UNIT_TEST
- case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: {
- u64 time = ktime_get_raw_ns();
- void *target_page = kmap(*cmd->info.sample_time.page);
-
- if (target_page) {
- memcpy(target_page +
- cmd->info.sample_time.page_offset,
- &time, sizeof(time));
- kunmap(*cmd->info.sample_time.page);
- } else {
- dev_warn(kbdev->dev,
- "Could not kmap target page\n");
- queue->has_error = true;
- }
- put_page(*cmd->info.sample_time.page);
- kfree(cmd->info.sample_time.page);
- break;
- }
-#endif /* MALI_UNIT_TEST */
+#endif
default:
- dev_warn(kbdev->dev,
- "Unrecognized command type\n");
+ dev_dbg(kbdev->dev,
+ "Unrecognized command type");
break;
} /* switch */
@@ -1835,12 +2144,12 @@
switch (cmd->type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(
- kbdev, queue, cmd->info.fence.fence);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue,
+ cmd->info.fence.fence);
break;
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(
- kbdev, queue, cmd->info.fence.fence);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue,
+ cmd->info.fence.fence);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
{
@@ -1862,32 +2171,48 @@
unsigned int i;
for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(
- kbdev, queue, sets[i].addr);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue,
+ sets[i].addr);
}
break;
}
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
{
- /* GPUCORE-28172 RDT to review */
+ const struct base_cqs_wait_operation_info *waits =
+ cmd->info.cqs_wait_operation.objs;
+ u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags;
+ unsigned int i;
+
+ for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(
+ kbdev, queue, waits[i].addr, waits[i].val,
+ waits[i].operation, waits[i].data_type,
+ (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0);
+ }
break;
}
case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
{
- /* GPUCORE-28172 RDT to review */
+ const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs;
+ unsigned int i;
+
+ for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(
+ kbdev, queue, sets[i].addr, sets[i].val,
+ sets[i].operation, sets[i].data_type);
+ }
break;
}
case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
- queue);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue);
break;
case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(
- kbdev, queue, cmd->info.import.gpu_va);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue,
+ cmd->info.import.gpu_va);
break;
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:
- KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(
- kbdev, queue, cmd->info.import.gpu_va);
+ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue,
+ cmd->info.import.gpu_va);
break;
case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE(
@@ -1897,50 +2222,41 @@
{
u8 i;
- KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
for (i = 0; i < cmd->info.jit_alloc.count; i++) {
const struct base_jit_alloc_info *info =
&cmd->info.jit_alloc.info[i];
KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
- kbdev, queue, info->gpu_alloc_addr,
- info->va_pages, info->commit_pages,
- info->extension, info->id, info->bin_id,
- info->max_allocations, info->flags,
- info->usage_id);
+ kbdev, queue, info->gpu_alloc_addr, info->va_pages,
+ info->commit_pages, info->extension, info->id, info->bin_id,
+ info->max_allocations, info->flags, info->usage_id);
}
- KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
break;
}
case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
{
u8 i;
- KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
for (i = 0; i < cmd->info.jit_free.count; i++) {
KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(
kbdev, queue, cmd->info.jit_free.ids[i]);
}
- KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(
- kbdev, queue);
+ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
break;
}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
cmd->info.suspend_buf_copy.group_handle);
break;
-#if MALI_UNIT_TEST
- case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME:
- /*
- * This is test-only KCPU command, no need to have a timeline
- * entry
- */
+#endif
+ default:
+ dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
break;
-#endif /* MALI_UNIT_TEST */
}
}
@@ -1954,9 +2270,11 @@
/* The offset to the first command that is being processed or yet to
* be processed is of u8 type, so the number of commands inside the
- * queue cannot be more than 256.
+ * queue cannot be more than 256. The current implementation expects
+ * exactly 256, any other size will require the addition of wrapping
+ * logic.
*/
- BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256);
+ BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256);
/* Whilst the backend interface allows enqueueing multiple commands in
* a single operation, the Base interface does not expose any mechanism
@@ -1966,19 +2284,35 @@
* in the set.
*/
if (enq->nr_commands != 1) {
- dev_err(kctx->kbdev->dev,
- "More than one commands enqueued\n");
+ dev_dbg(kctx->kbdev->dev,
+ "More than one commands enqueued");
return -EINVAL;
}
+ /* There might be a race between one thread trying to enqueue commands to the queue
+ * and other thread trying to delete the same queue.
+ * This racing could lead to use-after-free problem by enqueuing thread if
+ * resources for the queue has already been freed by deleting thread.
+ *
+ * To prevent the issue, two mutexes are acquired/release asymmetrically as follows.
+ *
+ * Lock A (kctx mutex)
+ * Lock B (queue mutex)
+ * Unlock A
+ * Unlock B
+ *
+ * With the kctx mutex being held, enqueuing thread will check the queue
+ * and will return error code if the queue had already been deleted.
+ */
mutex_lock(&kctx->csf.kcpu_queues.lock);
-
- if (!kctx->csf.kcpu_queues.array[enq->id]) {
- ret = -EINVAL;
- goto out;
- }
-
queue = kctx->csf.kcpu_queues.array[enq->id];
+ if (queue == NULL) {
+ dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id);
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
+ return -EINVAL;
+ }
+ mutex_lock(&queue->lock);
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
if (kcpu_queue_get_space(queue) < enq->nr_commands) {
ret = -EBUSY;
@@ -1993,7 +2327,7 @@
* for the possibility to roll back.
*/
- for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) {
+ for (i = 0; (i != enq->nr_commands) && !ret; ++i) {
struct kbase_kcpu_command *kcpu_cmd =
&queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)];
struct base_kcpu_command command;
@@ -2016,7 +2350,7 @@
}
}
- kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds;
+ kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num);
switch (command.type) {
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
#if IS_ENABLED(CONFIG_SYNC_FILE)
@@ -2076,45 +2410,16 @@
ret = kbase_kcpu_jit_free_prepare(queue,
&command.info.jit_free, kcpu_cmd);
break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
ret = kbase_csf_queue_group_suspend_prepare(queue,
&command.info.suspend_buf_copy,
kcpu_cmd);
break;
-#if MALI_UNIT_TEST
- case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: {
- int const page_cnt = 1;
-
- kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME;
- kcpu_cmd->info.sample_time.page_addr =
- command.info.sample_time.time & PAGE_MASK;
- kcpu_cmd->info.sample_time.page_offset =
- command.info.sample_time.time & ~PAGE_MASK;
- kcpu_cmd->info.sample_time.page = kcalloc(
- page_cnt, sizeof(struct page *), GFP_KERNEL);
- if (!kcpu_cmd->info.sample_time.page) {
- ret = -ENOMEM;
- } else {
- int pinned_pages = get_user_pages_fast(
- kcpu_cmd->info.sample_time.page_addr,
- page_cnt, 1,
- kcpu_cmd->info.sample_time.page);
-
- if (pinned_pages < 0) {
- ret = pinned_pages;
- kfree(kcpu_cmd->info.sample_time.page);
- } else if (pinned_pages != page_cnt) {
- ret = -EINVAL;
- kfree(kcpu_cmd->info.sample_time.page);
- }
- }
-
- break;
- }
-#endif /* MALI_UNIT_TEST */
+#endif
default:
- dev_warn(queue->kctx->kbdev->dev,
- "Unknown command type %u\n", command.type);
+ dev_dbg(queue->kctx->kbdev->dev,
+ "Unknown command type %u", command.type);
ret = -EINVAL;
break;
}
@@ -2135,13 +2440,10 @@
queue->num_pending_cmds += enq->nr_commands;
kcpu_queue_process(queue, false);
- } else {
- /* Roll back the number of enqueued commands */
- kctx->csf.kcpu_queues.num_cmds -= i;
}
out:
- mutex_unlock(&kctx->csf.kcpu_queues.lock);
+ mutex_unlock(&queue->lock);
return ret;
}
@@ -2155,14 +2457,9 @@
for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx)
kctx->csf.kcpu_queues.array[idx] = NULL;
- kctx->csf.kcpu_queues.wq = alloc_workqueue("mali_kbase_csf_kcpu",
- WQ_UNBOUND | WQ_HIGHPRI, 0);
- if (!kctx->csf.kcpu_queues.wq)
- return -ENOMEM;
-
mutex_init(&kctx->csf.kcpu_queues.lock);
- kctx->csf.kcpu_queues.num_cmds = 0;
+ atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0);
return 0;
}
@@ -2180,9 +2477,9 @@
(void)delete_queue(kctx, id);
}
- destroy_workqueue(kctx->csf.kcpu_queues.wq);
mutex_destroy(&kctx->csf.kcpu_queues.lock);
}
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term);
int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
struct kbase_ioctl_kcpu_queue_delete *del)
@@ -2195,8 +2492,11 @@
{
struct kbase_kcpu_command_queue *queue;
int idx;
+ int n;
int ret = 0;
-
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+ struct kbase_kcpu_dma_fence_meta *metadata;
+#endif
/* The queue id is of u8 type and we use the index of the kcpu_queues
* array as an id, so the number of elements in the array can't be
* more than 256.
@@ -2224,8 +2524,17 @@
goto out;
}
+ queue->wq = alloc_workqueue("mali_kbase_csf_kcpu_wq_%i", WQ_UNBOUND | WQ_HIGHPRI, 0, idx);
+ if (queue->wq == NULL) {
+ kfree(queue);
+ ret = -ENOMEM;
+
+ goto out;
+ }
+
bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1);
kctx->csf.kcpu_queues.array[idx] = queue;
+ mutex_init(&queue->lock);
queue->kctx = kctx;
queue->start_offset = 0;
queue->num_pending_cmds = 0;
@@ -2233,7 +2542,31 @@
queue->fence_context = dma_fence_context_alloc(1);
queue->fence_seqno = 0;
queue->fence_wait_processed = false;
-#endif
+
+ metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
+ if (!metadata) {
+ destroy_workqueue(queue->wq);
+ kfree(queue);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ metadata->kbdev = kctx->kbdev;
+ metadata->kctx_id = kctx->id;
+ n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu",
+ kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
+ if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
+ destroy_workqueue(queue->wq);
+ kfree(queue);
+ kfree(metadata);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ kbase_refcount_set(&metadata->refcount, 1);
+ queue->metadata = metadata;
+ atomic_inc(&kctx->kbdev->live_fence_metadata);
+#endif /* CONFIG_SYNC_FILE */
queue->enqueue_failed = false;
queue->command_started = false;
INIT_LIST_HEAD(&queue->jit_blocked);
@@ -2246,13 +2579,17 @@
/* Fire the tracepoint with the mutex held to enforce correct ordering
* with the summary stream.
*/
- KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(
- kctx->kbdev, queue, kctx->id, queue->num_pending_cmds);
+ KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id,
+ queue->num_pending_cmds);
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue,
queue->fence_context, 0);
+#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
+ kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
+#endif
out:
mutex_unlock(&kctx->csf.kcpu_queues.lock);
return ret;
}
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new);
--
Gitblit v1.6.2