From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c | 1117 ++++++++++++++++++++++++++++++++++++++-------------------- 1 files changed, 727 insertions(+), 390 deletions(-) diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c index 4e26a49..2b4d4a4 100644 --- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,8 +33,12 @@ static DEFINE_SPINLOCK(kbase_csf_fence_lock); #endif +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG +#define FENCE_WAIT_TIMEOUT_MS 3000 +#endif + static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, - bool ignore_waits); + bool drain_queue); static void kcpu_queue_process_worker(struct work_struct *data); @@ -45,9 +49,13 @@ { struct kbase_context *const kctx = kcpu_queue->kctx; struct kbase_va_region *reg; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + struct tagged_addr *pa; + long i; int ret = 0; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); /* Take the processes mmap lock */ down_read(kbase_mem_get_process_mmap_lock()); @@ -72,10 +80,24 @@ * on the physical pages tracking object. When the last * reference to the tracking object is dropped the pages * would be unpinned if they weren't unpinned before. + * + * Region should be CPU cached: abort if it isn't. */ + if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) { + ret = -EINVAL; + goto out; + } + ret = kbase_jd_user_buf_pin_pages(kctx, reg); if (ret) goto out; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + pages = alloc->imported.user_buf.pages; + + for (i = 0; i < alloc->nents; i++) + pa[i] = as_tagged(page_to_phys(pages[i])); } current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT; @@ -99,7 +121,7 @@ struct kbase_va_region *reg; int ret = 0; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); kbase_gpu_vm_lock(kctx); @@ -167,13 +189,14 @@ &kctx->csf.kcpu_queues.jit_blocked_queues; struct kbase_kcpu_command_queue *blocked_queue; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) { - struct kbase_kcpu_command const*const jit_alloc_cmd = - &blocked_queue->commands[blocked_queue->start_offset]; + struct kbase_kcpu_command const *const jit_alloc_cmd = + &blocked_queue->commands[blocked_queue->start_offset]; WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC); if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) { @@ -190,6 +213,12 @@ * * @queue: The queue containing this JIT allocation * @cmd: The JIT allocation command + * + * Return: + * * 0 - allocation OK + * * -EINVAL - missing info or JIT ID still in use + * * -EAGAIN - Retry + * * -ENOMEM - no memory. unable to allocate */ static int kbase_kcpu_jit_allocate_process( struct kbase_kcpu_command_queue *queue, @@ -206,23 +235,26 @@ u32 i; int ret; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); - - if (alloc_info->blocked) { - list_del(&queue->jit_blocked); - alloc_info->blocked = false; - } + lockdep_assert_held(&queue->lock); if (WARN_ON(!info)) return -EINVAL; + + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); /* Check if all JIT IDs are not in use */ for (i = 0; i < count; i++, info++) { /* The JIT ID is still in use so fail the allocation */ if (kctx->jit_alloc[info->id]) { - dev_warn(kctx->kbdev->dev, "JIT ID still in use\n"); - return -EINVAL; + dev_dbg(kctx->kbdev->dev, "JIT ID still in use"); + ret = -EINVAL; + goto fail; } + } + + if (alloc_info->blocked) { + list_del(&queue->jit_blocked); + alloc_info->blocked = false; } /* Now start the allocation loop */ @@ -238,7 +270,7 @@ break; if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) { - u8 const*const free_ids = jit_cmd->info.jit_free.ids; + u8 const *const free_ids = jit_cmd->info.jit_free.ids; if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) { /* @@ -259,7 +291,7 @@ */ dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); ret = -ENOMEM; - goto fail; + goto fail_rollback; } /* There are pending frees for an active allocation @@ -277,7 +309,8 @@ kctx->jit_alloc[info->id] = NULL; } - return -EAGAIN; + ret = -EAGAIN; + goto fail; } /* Bind it to the user provided ID. */ @@ -289,11 +322,11 @@ * Write the address of the JIT allocation to the user provided * GPU allocation. */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); + ptr = kbase_vmap_prot(kctx, info->gpu_alloc_addr, sizeof(*ptr), + KBASE_REG_CPU_WR, &mapping); if (!ptr) { ret = -ENOMEM; - goto fail; + goto fail_rollback; } reg = kctx->jit_alloc[info->id]; @@ -302,9 +335,11 @@ kbase_vunmap(kctx, &mapping); } + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); + return 0; -fail: +fail_rollback: /* Roll back completely */ for (i = 0, info = alloc_info->info; i < count; i++, info++) { /* Free the allocations that were successful. @@ -317,6 +352,8 @@ kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; } +fail: + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return ret; } @@ -328,15 +365,16 @@ { struct kbase_context *const kctx = kcpu_queue->kctx; void __user *data = u64_to_user_ptr(alloc_info->info); - struct base_jit_alloc_info *info; + struct base_jit_alloc_info *info = NULL; u32 count = alloc_info->count; int ret = 0; u32 i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); - if (!data || count > kcpu_queue->kctx->jit_max_allocations || - count > ARRAY_SIZE(kctx->jit_alloc)) { + if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) || + (count > kcpu_queue->kctx->jit_max_allocations) || (!data) || + !kbase_mem_allow_alloc(kctx)) { ret = -EINVAL; goto out; } @@ -371,11 +409,13 @@ } current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC; - list_add_tail(¤t_command->info.jit_alloc.node, - &kctx->csf.kcpu_queues.jit_cmds_head); current_command->info.jit_alloc.info = info; current_command->info.jit_alloc.count = count; current_command->info.jit_alloc.blocked = false; + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); + list_add_tail(¤t_command->info.jit_alloc.node, + &kctx->csf.kcpu_queues.jit_cmds_head); + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return 0; out_free: @@ -394,7 +434,9 @@ struct kbase_kcpu_command_queue *queue, struct kbase_kcpu_command *cmd) { - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + + mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock); /* Remove this command from the jit_cmds_head list */ list_del(&cmd->info.jit_alloc.node); @@ -408,6 +450,8 @@ cmd->info.jit_alloc.blocked = false; } + mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock); + kfree(cmd->info.jit_alloc.info); } @@ -420,18 +464,17 @@ { struct kbase_kcpu_command_queue *blocked_queue; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); /* * Reschedule all queues blocked by JIT_ALLOC commands. * NOTE: This code traverses the list of blocked queues directly. It * only works as long as the queued works are not executed at the same * time. This precondition is true since we're holding the - * kbase_csf_kcpu_queue_context.lock . + * kbase_csf_kcpu_queue_context.jit_lock . */ - list_for_each_entry(blocked_queue, - &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) - queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work); + list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) + queue_work(blocked_queue->wq, &blocked_queue->work); } static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, @@ -448,17 +491,18 @@ if (WARN_ON(!ids)) return -EINVAL; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); - KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( - queue->kctx->kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev, + queue); for (i = 0; i < count; i++) { u64 pages_used = 0; int item_err = 0; if (!kctx->jit_alloc[ids[i]]) { - dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n"); + dev_dbg(kctx->kbdev->dev, "invalid JIT free ID"); rc = -EINVAL; item_err = rc; } else { @@ -480,15 +524,17 @@ queue->kctx->kbdev, queue, item_err, pages_used); } - /* Free the list of ids */ - kfree(ids); - /* * Remove this command from the jit_cmds_head list and retry pending * allocations. */ list_del(&cmd->info.jit_free.node); kbase_kcpu_jit_retry_pending_allocs(kctx); + + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); + + /* Free the list of ids */ + kfree(ids); return rc; } @@ -505,7 +551,7 @@ int ret; u32 i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); /* Sanity checks */ if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) { @@ -551,10 +597,12 @@ } current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE; - list_add_tail(¤t_command->info.jit_free.node, - &kctx->csf.kcpu_queues.jit_cmds_head); current_command->info.jit_free.ids = ids; current_command->info.jit_free.count = count; + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); + list_add_tail(¤t_command->info.jit_free.node, + &kctx->csf.kcpu_queues.jit_cmds_head); + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return 0; out_free: @@ -563,6 +611,7 @@ return ret; } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST static int kbase_csf_queue_group_suspend_prepare( struct kbase_kcpu_command_queue *kcpu_queue, struct base_kcpu_command_group_suspend_info *suspend_buf, @@ -570,18 +619,19 @@ { struct kbase_context *const kctx = kcpu_queue->kctx; struct kbase_suspend_copy_buffer *sus_buf = NULL; + const u32 csg_suspend_buf_size = + kctx->kbdev->csf.global_iface.groups[0].suspend_size; u64 addr = suspend_buf->buffer; u64 page_addr = addr & PAGE_MASK; - u64 end_addr = addr + suspend_buf->size - 1; + u64 end_addr = addr + csg_suspend_buf_size - 1; u64 last_page_addr = end_addr & PAGE_MASK; int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1; int pinned_pages = 0, ret = 0; struct kbase_va_region *reg; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); - if (suspend_buf->size < - kctx->kbdev->csf.global_iface.groups[0].suspend_size) + if (suspend_buf->size < csg_suspend_buf_size) return -EINVAL; ret = kbase_csf_queue_group_handle_is_valid(kctx, @@ -593,7 +643,7 @@ if (!sus_buf) return -ENOMEM; - sus_buf->size = suspend_buf->size; + sus_buf->size = csg_suspend_buf_size; sus_buf->nr_pages = nr_pages; sus_buf->offset = addr & ~PAGE_MASK; @@ -629,10 +679,11 @@ struct tagged_addr *page_array; u64 start, end, i; - if (!(reg->flags & BASE_MEM_SAME_VA) || - reg->nr_pages < nr_pages || - kbase_reg_current_backed_size(reg) != - reg->nr_pages) { + if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) || + (kbase_reg_current_backed_size(reg) < nr_pages) || + !(reg->flags & KBASE_REG_CPU_WR) || + (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || + (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) { ret = -EINVAL; goto out_clean_pages; } @@ -676,14 +727,14 @@ { return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle); } +#endif static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param; - struct kbase_context *const kctx = kcpu_queue->kctx; - queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); + queue_work(kcpu_queue->wq, &kcpu_queue->work); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -713,7 +764,7 @@ { u32 i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_wait->objs)) return -EINVAL; @@ -727,10 +778,10 @@ cqs_wait->objs[i].addr, &mapping); if (!queue->command_started) { - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev, + queue); queue->command_started = true; - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START, + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_START, queue, cqs_wait->nr_objs, 0); } @@ -741,24 +792,24 @@ return -EINVAL; } - sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val; + sig_set = + evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val; if (sig_set) { bool error = false; bitmap_set(cqs_wait->signaled, i, 1); if ((cqs_wait->inherit_err_flags & (1U << i)) && - evt[BASEP_EVENT_ERR_INDEX] > 0) { + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) { queue->has_error = true; error = true; } - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END, + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_WAIT_END, queue, cqs_wait->objs[i].addr, error); KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( - kbdev, queue, - evt[BASEP_EVENT_ERR_INDEX]); + kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); queue->command_started = false; } @@ -775,14 +826,36 @@ return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs); } +static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type) +{ + return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64; +} + +static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type) +{ + BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES); + BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES); + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type)); + + switch (data_type) { + default: + return false; + case BASEP_CQS_DATA_TYPE_U32: + return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0; + case BASEP_CQS_DATA_TYPE_U64: + return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0; + } +} + static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, struct base_kcpu_command_cqs_wait_info *cqs_wait_info, struct kbase_kcpu_command *current_command) { struct base_cqs_wait_info *objs; unsigned int nr_objs = cqs_wait_info->nr_objs; + unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -798,6 +871,17 @@ nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; + } + + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { + kfree(objs); + return -EINVAL; + } } if (++queue->cqs_wait_count == 1) { @@ -836,7 +920,7 @@ { unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_set->objs)) return; @@ -848,22 +932,20 @@ evt = (u32 *)kbase_phy_alloc_mapping_get( queue->kctx, cqs_set->objs[i].addr, &mapping); - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, - evt ? 0 : 1); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1); if (!evt) { dev_warn(kbdev->dev, "Sync memory %llx already freed", cqs_set->objs[i].addr); queue->has_error = true; } else { - evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error; /* Set to signaled */ - evt[BASEP_EVENT_VAL_INDEX]++; + evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++; kbase_phy_alloc_mapping_put(queue->kctx, mapping); - KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET, - queue, cqs_set->objs[i].addr, - evt[BASEP_EVENT_ERR_INDEX]); + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr, + evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]); } } @@ -878,11 +960,11 @@ struct base_kcpu_command_cqs_set_info *cqs_set_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; struct base_cqs_set *objs; unsigned int nr_objs = cqs_set_info->nr_objs; + unsigned int i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -898,6 +980,17 @@ nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; + } + + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) { + kfree(objs); + return -EINVAL; + } } current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; @@ -932,7 +1025,7 @@ { u32 i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_wait_operation->objs)) return -EINVAL; @@ -942,12 +1035,16 @@ if (!test_bit(i, cqs_wait_operation->signaled)) { struct kbase_vmap_struct *mapping; bool sig_set; - u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, - cqs_wait_operation->objs[i].addr, &mapping); + uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_wait_operation->objs[i].addr, &mapping); + u64 val = 0; - /* GPUCORE-28172 RDT to review */ - if (!queue->command_started) + if (!queue->command_started) { queue->command_started = true; + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START( + kbdev, queue); + } + if (!evt) { dev_warn(kbdev->dev, @@ -956,15 +1053,32 @@ return -EINVAL; } + switch (cqs_wait_operation->objs[i].data_type) { + default: + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid( + cqs_wait_operation->objs[i].data_type)); + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + queue->has_error = true; + return -EINVAL; + case BASEP_CQS_DATA_TYPE_U32: + val = *(u32 *)evt; + evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; + break; + case BASEP_CQS_DATA_TYPE_U64: + val = *(u64 *)evt; + evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; + break; + } + switch (cqs_wait_operation->objs[i].operation) { case BASEP_CQS_WAIT_OPERATION_LE: - sig_set = *evt <= cqs_wait_operation->objs[i].val; + sig_set = val <= cqs_wait_operation->objs[i].val; break; case BASEP_CQS_WAIT_OPERATION_GT: - sig_set = *evt > cqs_wait_operation->objs[i].val; + sig_set = val > cqs_wait_operation->objs[i].val; break; default: - dev_warn(kbdev->dev, + dev_dbg(kbdev->dev, "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); kbase_phy_alloc_mapping_put(queue->kctx, mapping); @@ -973,27 +1087,15 @@ return -EINVAL; } - /* Increment evt up to the error_state value depending on the CQS data type */ - switch (cqs_wait_operation->objs[i].data_type) { - default: - dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type); - /* Fallthrough - hint to compiler that there's really only 2 options at present */ - case BASEP_CQS_DATA_TYPE_U32: - evt = (u64 *)((u8 *)evt + sizeof(u32)); - break; - case BASEP_CQS_DATA_TYPE_U64: - evt = (u64 *)((u8 *)evt + sizeof(u64)); - break; - } - if (sig_set) { bitmap_set(cqs_wait_operation->signaled, i, 1); if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && - *evt > 0) { + *(u32 *)evt > 0) { queue->has_error = true; } - /* GPUCORE-28172 RDT to review */ + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END( + kbdev, queue, *(u32 *)evt); queue->command_started = false; } @@ -1017,8 +1119,9 @@ { struct base_cqs_wait_operation_info *objs; unsigned int nr_objs = cqs_wait_operation_info->nr_objs; + unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -1034,6 +1137,18 @@ nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; + } + + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || + !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { + kfree(objs); + return -EINVAL; + } } if (++queue->cqs_wait_count == 1) { @@ -1066,6 +1181,44 @@ return 0; } +static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue, + uintptr_t evt, u8 operation, u64 val) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + + switch (operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *(u32 *)evt += (u32)val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *(u32 *)evt = val; + break; + default: + dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); + queue->has_error = true; + break; + } +} + +static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue, + uintptr_t evt, u8 operation, u64 val) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + + switch (operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *(u64 *)evt += val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *(u64 *)evt = val; + break; + default: + dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation); + queue->has_error = true; + break; + } +} + static void kbase_kcpu_cqs_set_operation_process( struct kbase_device *kbdev, struct kbase_kcpu_command_queue *queue, @@ -1073,57 +1226,49 @@ { unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_set_operation->objs)) return; for (i = 0; i < cqs_set_operation->nr_objs; i++) { struct kbase_vmap_struct *mapping; - u64 *evt; + uintptr_t evt; - evt = (u64 *)kbase_phy_alloc_mapping_get( + evt = (uintptr_t)kbase_phy_alloc_mapping_get( queue->kctx, cqs_set_operation->objs[i].addr, &mapping); - - /* GPUCORE-28172 RDT to review */ if (!evt) { dev_warn(kbdev->dev, "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); queue->has_error = true; } else { - switch (cqs_set_operation->objs[i].operation) { - case BASEP_CQS_SET_OPERATION_ADD: - *evt += cqs_set_operation->objs[i].val; - break; - case BASEP_CQS_SET_OPERATION_SET: - *evt = cqs_set_operation->objs[i].val; - break; - default: - dev_warn(kbdev->dev, - "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation); - queue->has_error = true; - break; - } + struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i]; - /* Increment evt up to the error_state value depending on the CQS data type */ - switch (cqs_set_operation->objs[i].data_type) { + switch (obj->data_type) { default: - dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type); - /* Fallthrough - hint to compiler that there's really only 2 options at present */ + WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type)); + queue->has_error = true; + goto skip_err_propagation; case BASEP_CQS_DATA_TYPE_U32: - evt = (u64 *)((u8 *)evt + sizeof(u32)); + kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation, + obj->val); + evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET; break; case BASEP_CQS_DATA_TYPE_U64: - evt = (u64 *)((u8 *)evt + sizeof(u64)); + kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation, + obj->val); + evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET; break; } - /* GPUCORE-28172 RDT to review */ + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION( + kbdev, queue, *(u32 *)evt ? 1 : 0); /* Always propagate errors */ - *evt = queue->has_error; + *(u32 *)evt = queue->has_error; +skip_err_propagation: kbase_phy_alloc_mapping_put(queue->kctx, mapping); } } @@ -1139,11 +1284,11 @@ struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; struct base_cqs_set_operation_info *objs; unsigned int nr_objs = cqs_set_operation_info->nr_objs; + unsigned int i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -1159,6 +1304,18 @@ nr_objs * sizeof(*objs))) { kfree(objs); return -ENOMEM; + } + + /* Check the CQS objects as early as possible. By checking their alignment + * (required alignment equals to size for Sync32 and Sync64 objects), we can + * prevent overrunning the supplied event page. + */ + for (i = 0; i < nr_objs; i++) { + if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) || + !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) { + kfree(objs); + return -EINVAL; + } } current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; @@ -1182,20 +1339,23 @@ struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue; struct kbase_context *const kctx = kcpu_queue->kctx; - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue, +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + /* Fence gets signaled. Deactivate the timer for fence-wait timeout */ + del_timer(&kcpu_queue->fence_timeout); +#endif + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, fence->context, fence->seqno); /* Resume kcpu command queue processing. */ - queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); + queue_work(kcpu_queue->wq, &kcpu_queue->work); } -static void kbase_kcpu_fence_wait_cancel( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (WARN_ON(!fence_info->fence)) return; @@ -1204,8 +1364,15 @@ bool removed = dma_fence_remove_callback(fence_info->fence, &fence_info->fence_cb); +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + /* Fence-wait cancelled or fence signaled. In the latter case + * the timer would already have been deactivated inside + * kbase_csf_fence_wait_callback(). + */ + del_timer_sync(&kcpu_queue->fence_timeout); +#endif if (removed) - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, fence_info->fence->context, fence_info->fence->seqno); } @@ -1216,6 +1383,80 @@ fence_info->fence = NULL; } + +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG +/** + * fence_timeout_callback() - Timeout callback function for fence-wait + * + * @timer: Timer struct + * + * Context and seqno of the timed-out fence will be displayed in dmesg. + * If the fence has been signalled a work will be enqueued to process + * the fence-wait without displaying debugging information. + */ +static void fence_timeout_callback(struct timer_list *timer) +{ + struct kbase_kcpu_command_queue *kcpu_queue = + container_of(timer, struct kbase_kcpu_command_queue, fence_timeout); + struct kbase_context *const kctx = kcpu_queue->kctx; + struct kbase_kcpu_command *cmd = &kcpu_queue->commands[kcpu_queue->start_offset]; + struct kbase_kcpu_command_fence_info *fence_info; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct kbase_sync_fence_info info; + + if (cmd->type != BASE_KCPU_COMMAND_TYPE_FENCE_WAIT) { + dev_err(kctx->kbdev->dev, + "%s: Unexpected command type %d in ctx:%d_%d kcpu queue:%u", __func__, + cmd->type, kctx->tgid, kctx->id, kcpu_queue->id); + return; + } + + fence_info = &cmd->info.fence; + + fence = kbase_fence_get(fence_info); + if (!fence) { + dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid, + kctx->id, kcpu_queue->id); + return; + } + + kbase_sync_fence_info_get(fence, &info); + + if (info.status == 1) { + queue_work(kcpu_queue->wq, &kcpu_queue->work); + } else if (info.status == 0) { + dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", + FENCE_WAIT_TIMEOUT_MS); + dev_warn(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue:%u still waiting for fence[%pK] context#seqno:%s", + kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name); + } else { + dev_warn(kctx->kbdev->dev, "fence has got error"); + dev_warn(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue:%u faulty fence[%pK] context#seqno:%s error(%d)", + kctx->tgid, kctx->id, kcpu_queue->id, fence, info.name, info.status); + } + + kbase_fence_put(fence); +} + +/** + * fence_timeout_start() - Start a timer to check fence-wait timeout + * + * @cmd: KCPU command queue + * + * Activate a timer to check whether a fence-wait command in the queue + * gets completed within FENCE_WAIT_TIMEOUT_MS + */ +static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd) +{ + mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); +} +#endif /** * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command @@ -1236,8 +1477,9 @@ #else struct dma_fence *fence; #endif + struct kbase_context *const kctx = kcpu_queue->kctx; - lockdep_assert_held(&kcpu_queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (WARN_ON(!fence_info->fence)) return -EINVAL; @@ -1251,14 +1493,26 @@ &fence_info->fence_cb, kbase_csf_fence_wait_callback); - KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev, - FENCE_WAIT_START, kcpu_queue, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, + KCPU_FENCE_WAIT_START, kcpu_queue, fence->context, fence->seqno); fence_status = cb_err; - if (cb_err == 0) + if (cb_err == 0) { kcpu_queue->fence_wait_processed = true; - else if (cb_err == -ENOENT) +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + fence_timeout_start(kcpu_queue); +#endif + } else if (cb_err == -ENOENT) { fence_status = dma_fence_get_status(fence); + if (!fence_status) { + struct kbase_sync_fence_info info; + + kbase_sync_fence_info_get(fence, &info); + dev_warn(kctx->kbdev->dev, + "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u", + info.name, kctx->tgid, kctx->id, kcpu_queue->id); + } + } } /* @@ -1271,17 +1525,15 @@ */ if (fence_status) - kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info); + kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info); return fence_status; } -static int kbase_kcpu_fence_wait_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; #else @@ -1289,10 +1541,9 @@ #endif struct base_fence fence; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) return -ENOMEM; fence_in = sync_file_get_fence(fence.basep.fd); @@ -1303,13 +1554,11 @@ current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT; current_command->info.fence.fence = fence_in; current_command->info.fence.kcpu_queue = kcpu_queue; - return 0; } -static int kbase_kcpu_fence_signal_process( - struct kbase_kcpu_command_queue *kcpu_queue, - struct kbase_kcpu_command_fence_info *fence_info) +static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; int ret; @@ -1320,45 +1569,46 @@ ret = dma_fence_signal(fence_info->fence); if (unlikely(ret < 0)) { - dev_warn(kctx->kbdev->dev, - "fence_signal() failed with %d\n", ret); + dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret); + /* Treated as a success */ + ret = 0; } - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue, fence_info->fence->context, fence_info->fence->seqno); - dma_fence_put(fence_info->fence); + /* dma_fence refcount needs to be decreased to release it. */ + kbase_fence_put(fence_info->fence); fence_info->fence = NULL; return ret; } -static int kbase_kcpu_fence_signal_prepare( - struct kbase_kcpu_command_queue *kcpu_queue, - struct base_kcpu_command_fence_info *fence_info, - struct kbase_kcpu_command *current_command) +static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, + int *fd) { - struct kbase_context *const kctx = kcpu_queue->kctx; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_out; #else struct dma_fence *fence_out; #endif - struct base_fence fence; - struct sync_file *sync_file; + struct kbase_kcpu_dma_fence *kcpu_fence; int ret = 0; - int fd; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); - if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), - sizeof(fence))) - return -EFAULT; - - fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL); - if (!fence_out) + kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL); + if (!kcpu_fence) return -ENOMEM; + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + fence_out = (struct fence *)kcpu_fence; +#else + fence_out = (struct dma_fence *)kcpu_fence; +#endif dma_fence_init(fence_out, &kbase_fence_ops, @@ -1375,44 +1625,103 @@ dma_fence_get(fence_out); #endif + /* Set reference to KCPU metadata and increment refcount */ + kcpu_fence->metadata = kcpu_queue->metadata; + WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); + /* create a sync_file fd representing the fence */ - sync_file = sync_file_create(fence_out); - if (!sync_file) { -#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) - dma_fence_put(fence_out); -#endif + *sync_file = sync_file_create(fence_out); + if (!(*sync_file)) { ret = -ENOMEM; goto file_create_fail; } - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) { - ret = fd; + *fd = get_unused_fd_flags(O_CLOEXEC); + if (*fd < 0) { + ret = *fd; goto fd_flags_fail; } - fd_install(fd, sync_file->file); - - fence.basep.fd = fd; + fence->basep.fd = *fd; current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; current_command->info.fence.fence = fence_out; - if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, - sizeof(fence))) { - ret = -EFAULT; - goto fd_flags_fail; - } - return 0; fd_flags_fail: - fput(sync_file->file); + fput((*sync_file)->file); file_create_fail: - dma_fence_put(fence_out); + /* + * Upon failure, dma_fence refcount that was increased by + * dma_fence_get() or sync_file_create() needs to be decreased + * to release it. + */ + kbase_fence_put(fence_out); + current_command->info.fence.fence = NULL; return ret; } + +static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) +{ + struct base_fence fence; + struct sync_file *sync_file = NULL; + int fd; + int ret = 0; + + lockdep_assert_held(&kcpu_queue->lock); + + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) + return -EFAULT; + + ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd); + if (ret) + return ret; + + if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, + sizeof(fence))) { + ret = -EFAULT; + goto fail; + } + + /* 'sync_file' pointer can't be safely dereferenced once 'fd' is + * installed, so the install step needs to be done at the last + * before returning success. + */ + fd_install(fd, sync_file->file); + return 0; + +fail: + fput(sync_file->file); + kbase_fence_put(current_command->info.fence.fence); + current_command->info.fence.fence = NULL; + + return ret; +} + +int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + if (!kcpu_queue || !fence_info) + return -EINVAL; + + return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process); + +int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command *current_command, + struct base_fence *fence, struct sync_file **sync_file, int *fd) +{ + if (!kcpu_queue || !current_command || !fence || !sync_file || !fd) + return -EINVAL; + + return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd); +} +KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); #endif /* CONFIG_SYNC_FILE */ static void kcpu_queue_process_worker(struct work_struct *data) @@ -1420,11 +1729,9 @@ struct kbase_kcpu_command_queue *queue = container_of(data, struct kbase_kcpu_command_queue, work); - mutex_lock(&queue->kctx->csf.kcpu_queues.lock); - + mutex_lock(&queue->lock); kcpu_queue_process(queue, false); - - mutex_unlock(&queue->kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); } static int delete_queue(struct kbase_context *kctx, u32 id) @@ -1437,8 +1744,22 @@ struct kbase_kcpu_command_queue *queue = kctx->csf.kcpu_queues.array[id]; - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE, queue, queue->num_pending_cmds, queue->cqs_wait_count); + + /* Disassociate the queue from the system to prevent further + * submissions. Draining pending commands would be acceptable + * even if a new queue is created using the same ID. + */ + kctx->csf.kcpu_queues.array[id] = NULL; + bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + mutex_lock(&queue->lock); + + /* Metadata struct may outlive KCPU queue. */ + kbase_kcpu_dma_fence_meta_put(queue->metadata); /* Drain the remaining work for this queue first and go past * all the waits. @@ -1451,22 +1772,22 @@ /* All CQS wait commands should have been cleaned up */ WARN_ON(queue->cqs_wait_count); - kctx->csf.kcpu_queues.array[id] = NULL; - bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); - /* Fire the tracepoint with the mutex held to enforce correct * ordering with the summary stream. */ KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue); - mutex_unlock(&kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); cancel_work_sync(&queue->work); + destroy_workqueue(queue->wq); + + mutex_destroy(&queue->lock); kfree(queue); } else { - dev_warn(kctx->kbdev->dev, - "Attempt to delete a non-existent KCPU queue\n"); + dev_dbg(kctx->kbdev->dev, + "Attempt to delete a non-existent KCPU queue"); mutex_unlock(&kctx->csf.kcpu_queues.lock); err = -EINVAL; } @@ -1481,8 +1802,7 @@ { u8 i; - KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue); for (i = 0; i < jit_alloc->count; i++) { const u8 id = jit_alloc->info[i].id; const struct kbase_va_region *reg = queue->kctx->jit_alloc[id]; @@ -1512,26 +1832,24 @@ struct kbase_device *kbdev, const struct kbase_kcpu_command_queue *queue) { - KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue); } static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( struct kbase_device *kbdev, const struct kbase_kcpu_command_queue *queue) { - KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue); } static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, - bool ignore_waits) + bool drain_queue) { struct kbase_device *kbdev = queue->kctx->kbdev; bool process_next = true; size_t i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); for (i = 0; i != queue->num_pending_cmds; ++i) { struct kbase_kcpu_command *cmd = @@ -1541,16 +1859,15 @@ switch (cmd->type) { case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: if (!queue->command_started) { - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev, + queue); queue->command_started = true; } status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) - if (ignore_waits) { - kbase_kcpu_fence_wait_cancel(queue, - &cmd->info.fence); + if (drain_queue) { + kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence); } else { status = kbase_kcpu_fence_wait_process(queue, &cmd->info.fence); @@ -1575,14 +1892,12 @@ } break; case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue); status = 0; #if IS_ENABLED(CONFIG_SYNC_FILE) - status = kbase_kcpu_fence_signal_process( - queue, &cmd->info.fence); + status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence); if (status < 0) queue->has_error = true; @@ -1594,14 +1909,14 @@ queue->has_error = true; #endif - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( - kbdev, queue, status); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue, + status); break; case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: status = kbase_kcpu_cqs_wait_process(kbdev, queue, &cmd->info.cqs_wait); - if (!status && !ignore_waits) { + if (!status && !drain_queue) { process_next = false; } else { /* Either all CQS objects were signaled or @@ -1623,7 +1938,7 @@ status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, &cmd->info.cqs_wait_operation); - if (!status && !ignore_waits) { + if (!status && !drain_queue) { process_next = false; } else { /* Either all CQS objects were signaled or @@ -1645,35 +1960,36 @@ /* Clear the queue's error state */ queue->has_error = false; - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue); break; case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: { struct kbase_ctx_ext_res_meta *meta = NULL; - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( - kbdev, queue); + if (!drain_queue) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev, + queue); - kbase_gpu_vm_lock(queue->kctx); - meta = kbase_sticky_resource_acquire( - queue->kctx, cmd->info.import.gpu_va); - kbase_gpu_vm_unlock(queue->kctx); + kbase_gpu_vm_lock(queue->kctx); + meta = kbase_sticky_resource_acquire( + queue->kctx, cmd->info.import.gpu_va); + kbase_gpu_vm_unlock(queue->kctx); - if (meta == NULL) { - queue->has_error = true; - dev_warn(kbdev->dev, - "failed to map an external resource\n"); + if (meta == NULL) { + queue->has_error = true; + dev_dbg( + kbdev->dev, + "failed to map an external resource"); + } + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( + kbdev, queue, meta ? 0 : 1); } - - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( - kbdev, queue, meta ? 0 : 1); break; } case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: { bool ret; - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue); kbase_gpu_vm_lock(queue->kctx); ret = kbase_sticky_resource_release( @@ -1682,19 +1998,19 @@ if (!ret) { queue->has_error = true; - dev_warn(kbdev->dev, - "failed to release the reference. resource not found\n"); + dev_dbg(kbdev->dev, + "failed to release the reference. resource not found"); } - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( - kbdev, queue, ret ? 0 : 1); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue, + ret ? 0 : 1); break; } case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: { bool ret; - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev, + queue); kbase_gpu_vm_lock(queue->kctx); ret = kbase_sticky_resource_release_force( @@ -1703,8 +2019,8 @@ if (!ret) { queue->has_error = true; - dev_warn(kbdev->dev, - "failed to release the reference. resource not found\n"); + dev_dbg(kbdev->dev, + "failed to release the reference. resource not found"); } KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( @@ -1713,29 +2029,36 @@ } case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: { - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( - kbdev, queue); - - status = kbase_kcpu_jit_allocate_process(queue, cmd); - if (status == -EAGAIN) { - process_next = false; - } else { - if (status != 0) - queue->has_error = true; - - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( - kbdev, queue, &cmd->info.jit_alloc, - status); - + if (drain_queue) { + /* We still need to call this function to clean the JIT alloc info up */ kbase_kcpu_jit_allocate_finish(queue, cmd); - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( + } else { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev, + queue); + + status = kbase_kcpu_jit_allocate_process(queue, + cmd); + if (status == -EAGAIN) { + process_next = false; + } else { + if (status != 0) + queue->has_error = true; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( + kbdev, queue, + &cmd->info.jit_alloc, status); + + kbase_kcpu_jit_allocate_finish(queue, + cmd); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( kbdev, queue); + } } + break; } - case BASE_KCPU_COMMAND_TYPE_JIT_FREE: - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( - kbdev, queue); + case BASE_KCPU_COMMAND_TYPE_JIT_FREE: { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue); status = kbase_kcpu_jit_free_process(queue, cmd); if (status) @@ -1744,21 +2067,25 @@ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( kbdev, queue); break; + } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: { struct kbase_suspend_copy_buffer *sus_buf = cmd->info.suspend_buf_copy.sus_buf; - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( - kbdev, queue); + if (!drain_queue) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( + kbdev, queue); - status = kbase_csf_queue_group_suspend_process( + status = kbase_csf_queue_group_suspend_process( queue->kctx, sus_buf, cmd->info.suspend_buf_copy.group_handle); - if (status) - queue->has_error = true; + if (status) + queue->has_error = true; - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( - kbdev, queue, status); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( + kbdev, queue, status); + } if (!sus_buf->cpu_alloc) { int i; @@ -1768,36 +2095,18 @@ } else { kbase_mem_phy_alloc_kernel_unmapped( sus_buf->cpu_alloc); - kbase_mem_phy_alloc_put(sus_buf->cpu_alloc); + kbase_mem_phy_alloc_put( + sus_buf->cpu_alloc); } kfree(sus_buf->pages); kfree(sus_buf); break; } -#if MALI_UNIT_TEST - case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: { - u64 time = ktime_get_raw_ns(); - void *target_page = kmap(*cmd->info.sample_time.page); - - if (target_page) { - memcpy(target_page + - cmd->info.sample_time.page_offset, - &time, sizeof(time)); - kunmap(*cmd->info.sample_time.page); - } else { - dev_warn(kbdev->dev, - "Could not kmap target page\n"); - queue->has_error = true; - } - put_page(*cmd->info.sample_time.page); - kfree(cmd->info.sample_time.page); - break; - } -#endif /* MALI_UNIT_TEST */ +#endif default: - dev_warn(kbdev->dev, - "Unrecognized command type\n"); + dev_dbg(kbdev->dev, + "Unrecognized command type"); break; } /* switch */ @@ -1835,12 +2144,12 @@ switch (cmd->type) { case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( - kbdev, queue, cmd->info.fence.fence); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue, + cmd->info.fence.fence); break; case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( - kbdev, queue, cmd->info.fence.fence); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue, + cmd->info.fence.fence); break; case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: { @@ -1862,32 +2171,48 @@ unsigned int i; for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( - kbdev, queue, sets[i].addr); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue, + sets[i].addr); } break; } case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: { - /* GPUCORE-28172 RDT to review */ + const struct base_cqs_wait_operation_info *waits = + cmd->info.cqs_wait_operation.objs; + u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags; + unsigned int i; + + for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION( + kbdev, queue, waits[i].addr, waits[i].val, + waits[i].operation, waits[i].data_type, + (inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0); + } break; } case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: { - /* GPUCORE-28172 RDT to review */ + const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs; + unsigned int i; + + for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION( + kbdev, queue, sets[i].addr, sets[i].val, + sets[i].operation, sets[i].data_type); + } break; } case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, - queue); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue); break; case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( - kbdev, queue, cmd->info.import.gpu_va); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue, + cmd->info.import.gpu_va); break; case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: - KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( - kbdev, queue, cmd->info.import.gpu_va); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue, + cmd->info.import.gpu_va); break; case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( @@ -1897,50 +2222,41 @@ { u8 i; - KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); for (i = 0; i < cmd->info.jit_alloc.count; i++) { const struct base_jit_alloc_info *info = &cmd->info.jit_alloc.info[i]; KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( - kbdev, queue, info->gpu_alloc_addr, - info->va_pages, info->commit_pages, - info->extension, info->id, info->bin_id, - info->max_allocations, info->flags, - info->usage_id); + kbdev, queue, info->gpu_alloc_addr, info->va_pages, + info->commit_pages, info->extension, info->id, info->bin_id, + info->max_allocations, info->flags, info->usage_id); } - KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue); break; } case BASE_KCPU_COMMAND_TYPE_JIT_FREE: { u8 i; - KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); for (i = 0; i < cmd->info.jit_free.count; i++) { KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( kbdev, queue, cmd->info.jit_free.ids[i]); } - KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( - kbdev, queue); + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue); break; } +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( kbdev, queue, cmd->info.suspend_buf_copy.sus_buf, cmd->info.suspend_buf_copy.group_handle); break; -#if MALI_UNIT_TEST - case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: - /* - * This is test-only KCPU command, no need to have a timeline - * entry - */ +#endif + default: + dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type); break; -#endif /* MALI_UNIT_TEST */ } } @@ -1954,9 +2270,11 @@ /* The offset to the first command that is being processed or yet to * be processed is of u8 type, so the number of commands inside the - * queue cannot be more than 256. + * queue cannot be more than 256. The current implementation expects + * exactly 256, any other size will require the addition of wrapping + * logic. */ - BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256); + BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256); /* Whilst the backend interface allows enqueueing multiple commands in * a single operation, the Base interface does not expose any mechanism @@ -1966,19 +2284,35 @@ * in the set. */ if (enq->nr_commands != 1) { - dev_err(kctx->kbdev->dev, - "More than one commands enqueued\n"); + dev_dbg(kctx->kbdev->dev, + "More than one commands enqueued"); return -EINVAL; } + /* There might be a race between one thread trying to enqueue commands to the queue + * and other thread trying to delete the same queue. + * This racing could lead to use-after-free problem by enqueuing thread if + * resources for the queue has already been freed by deleting thread. + * + * To prevent the issue, two mutexes are acquired/release asymmetrically as follows. + * + * Lock A (kctx mutex) + * Lock B (queue mutex) + * Unlock A + * Unlock B + * + * With the kctx mutex being held, enqueuing thread will check the queue + * and will return error code if the queue had already been deleted. + */ mutex_lock(&kctx->csf.kcpu_queues.lock); - - if (!kctx->csf.kcpu_queues.array[enq->id]) { - ret = -EINVAL; - goto out; - } - queue = kctx->csf.kcpu_queues.array[enq->id]; + if (queue == NULL) { + dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id); + mutex_unlock(&kctx->csf.kcpu_queues.lock); + return -EINVAL; + } + mutex_lock(&queue->lock); + mutex_unlock(&kctx->csf.kcpu_queues.lock); if (kcpu_queue_get_space(queue) < enq->nr_commands) { ret = -EBUSY; @@ -1993,7 +2327,7 @@ * for the possibility to roll back. */ - for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) { + for (i = 0; (i != enq->nr_commands) && !ret; ++i) { struct kbase_kcpu_command *kcpu_cmd = &queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)]; struct base_kcpu_command command; @@ -2016,7 +2350,7 @@ } } - kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds; + kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num); switch (command.type) { case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: #if IS_ENABLED(CONFIG_SYNC_FILE) @@ -2076,45 +2410,16 @@ ret = kbase_kcpu_jit_free_prepare(queue, &command.info.jit_free, kcpu_cmd); break; +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: ret = kbase_csf_queue_group_suspend_prepare(queue, &command.info.suspend_buf_copy, kcpu_cmd); break; -#if MALI_UNIT_TEST - case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: { - int const page_cnt = 1; - - kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME; - kcpu_cmd->info.sample_time.page_addr = - command.info.sample_time.time & PAGE_MASK; - kcpu_cmd->info.sample_time.page_offset = - command.info.sample_time.time & ~PAGE_MASK; - kcpu_cmd->info.sample_time.page = kcalloc( - page_cnt, sizeof(struct page *), GFP_KERNEL); - if (!kcpu_cmd->info.sample_time.page) { - ret = -ENOMEM; - } else { - int pinned_pages = get_user_pages_fast( - kcpu_cmd->info.sample_time.page_addr, - page_cnt, 1, - kcpu_cmd->info.sample_time.page); - - if (pinned_pages < 0) { - ret = pinned_pages; - kfree(kcpu_cmd->info.sample_time.page); - } else if (pinned_pages != page_cnt) { - ret = -EINVAL; - kfree(kcpu_cmd->info.sample_time.page); - } - } - - break; - } -#endif /* MALI_UNIT_TEST */ +#endif default: - dev_warn(queue->kctx->kbdev->dev, - "Unknown command type %u\n", command.type); + dev_dbg(queue->kctx->kbdev->dev, + "Unknown command type %u", command.type); ret = -EINVAL; break; } @@ -2135,13 +2440,10 @@ queue->num_pending_cmds += enq->nr_commands; kcpu_queue_process(queue, false); - } else { - /* Roll back the number of enqueued commands */ - kctx->csf.kcpu_queues.num_cmds -= i; } out: - mutex_unlock(&kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); return ret; } @@ -2155,14 +2457,9 @@ for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) kctx->csf.kcpu_queues.array[idx] = NULL; - kctx->csf.kcpu_queues.wq = alloc_workqueue("mali_kbase_csf_kcpu", - WQ_UNBOUND | WQ_HIGHPRI, 0); - if (!kctx->csf.kcpu_queues.wq) - return -ENOMEM; - mutex_init(&kctx->csf.kcpu_queues.lock); - kctx->csf.kcpu_queues.num_cmds = 0; + atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0); return 0; } @@ -2180,9 +2477,9 @@ (void)delete_queue(kctx, id); } - destroy_workqueue(kctx->csf.kcpu_queues.wq); mutex_destroy(&kctx->csf.kcpu_queues.lock); } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term); int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_delete *del) @@ -2195,8 +2492,11 @@ { struct kbase_kcpu_command_queue *queue; int idx; + int n; int ret = 0; - +#if IS_ENABLED(CONFIG_SYNC_FILE) + struct kbase_kcpu_dma_fence_meta *metadata; +#endif /* The queue id is of u8 type and we use the index of the kcpu_queues * array as an id, so the number of elements in the array can't be * more than 256. @@ -2224,8 +2524,17 @@ goto out; } + queue->wq = alloc_workqueue("mali_kbase_csf_kcpu_wq_%i", WQ_UNBOUND | WQ_HIGHPRI, 0, idx); + if (queue->wq == NULL) { + kfree(queue); + ret = -ENOMEM; + + goto out; + } + bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1); kctx->csf.kcpu_queues.array[idx] = queue; + mutex_init(&queue->lock); queue->kctx = kctx; queue->start_offset = 0; queue->num_pending_cmds = 0; @@ -2233,7 +2542,31 @@ queue->fence_context = dma_fence_context_alloc(1); queue->fence_seqno = 0; queue->fence_wait_processed = false; -#endif + + metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + if (!metadata) { + destroy_workqueue(queue->wq); + kfree(queue); + ret = -ENOMEM; + goto out; + } + + metadata->kbdev = kctx->kbdev; + metadata->kctx_id = kctx->id; + n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", + kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context); + if (WARN_ON(n >= MAX_TIMELINE_NAME)) { + destroy_workqueue(queue->wq); + kfree(queue); + kfree(metadata); + ret = -EINVAL; + goto out; + } + + kbase_refcount_set(&metadata->refcount, 1); + queue->metadata = metadata; + atomic_inc(&kctx->kbdev->live_fence_metadata); +#endif /* CONFIG_SYNC_FILE */ queue->enqueue_failed = false; queue->command_started = false; INIT_LIST_HEAD(&queue->jit_blocked); @@ -2246,13 +2579,17 @@ /* Fire the tracepoint with the mutex held to enforce correct ordering * with the summary stream. */ - KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( - kctx->kbdev, queue, kctx->id, queue->num_pending_cmds); + KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id, + queue->num_pending_cmds); - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue, + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0); +#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG + kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); +#endif out: mutex_unlock(&kctx->csf.kcpu_queues.lock); return ret; } +KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); -- Gitblit v1.6.2