// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * */ /* * CSF GPU HWC backend firmware interface APIs. */ #include #include #include #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" #include #include "csf/mali_kbase_csf_firmware.h" #include "mali_kbase_hwcnt_backend_csf_if_fw.h" #include "mali_kbase_hwaccess_time.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include #include "mali_kbase_ccswe.h" /** The number of nanoseconds in a second. */ #define NSECS_IN_SEC 1000000000ull /* ns */ /* Ring buffer virtual address start at 4GB */ #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) /** * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface * used to save the manual and * auto HWC samples from * firmware. * @gpu_dump_base: Starting GPU base address of the ring buffer. * @cpu_dump_base: Starting CPU address for the mapping. * @buf_count: Buffer count in the ring buffer, MUST be power of 2. * @as_nr: Address space number for the memory mapping. * @phys: Physical memory allocation used by the mapping. * @num_pages: Size of the mapping, in memory pages. */ struct kbase_hwcnt_backend_csf_if_fw_ring_buf { u64 gpu_dump_base; void *cpu_dump_base; size_t buf_count; u32 as_nr; struct tagged_addr *phys; size_t num_pages; }; /** * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF * interface, used to communicate * with firmware. * @kbdev: KBase device. * @buf_bytes: The size in bytes for each buffer in the ring buffer. * @clk_cnt: The number of clock domains in the system. * The maximum is 64. * @clk_enable_map: Bitmask of enabled clocks * @rate_listener: Clock rate listener callback state. * @ccswe_shader_cores: Shader cores cycle count software estimator. */ struct kbase_hwcnt_backend_csf_if_fw_ctx { struct kbase_device *kbdev; size_t buf_bytes; u8 clk_cnt; u64 clk_enable_map; struct kbase_clk_rate_listener rate_listener; struct kbase_ccswe ccswe_shader_cores; }; static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; WARN_ON(!ctx); fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; kbase_csf_scheduler_spin_lock_assert_held(kbdev); } static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long *flags) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; WARN_ON(!ctx); fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; kbase_csf_scheduler_spin_lock(kbdev, flags); } static void kbasep_hwcnt_backend_csf_if_fw_unlock( struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; WARN_ON(!ctx); fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; kbase_csf_scheduler_spin_lock_assert_held(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); } /** * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback * * @rate_listener: Callback state * @clk_index: Clock index * @clk_rate_hz: Clock frequency(hz) */ static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change( struct kbase_clk_rate_listener *rate_listener, u32 clk_index, u32 clk_rate_hz) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener); u64 timestamp_ns; if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) return; timestamp_ns = ktime_get_raw_ns(); kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz); } /** * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking * * @fw_ctx: Non-NULL pointer to CSF firmware interface context. * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. */ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable( struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map) { struct kbase_device *kbdev = fw_ctx->kbdev; if (kbase_hwcnt_clk_enable_map_enabled( clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { /* software estimation for non-top clock domains */ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; u32 cur_freq; unsigned long flags; u64 timestamp_ns; timestamp_ns = ktime_get_raw_ns(); spin_lock_irqsave(&rtm->lock, flags); cur_freq = (u32)clk_data->clock_val; kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq); kbase_clk_rate_trace_manager_subscribe_no_lock( rtm, &fw_ctx->rate_listener); spin_unlock_irqrestore(&rtm->lock, flags); } fw_ctx->clk_enable_map = clk_enable_map; } /** * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking * * @fw_ctx: Non-NULL pointer to CSF firmware interface context. */ static void kbasep_hwcnt_backend_csf_if_fw_cc_disable( struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) { struct kbase_device *kbdev = fw_ctx->kbdev; struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; u64 clk_enable_map = fw_ctx->clk_enable_map; if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) kbase_clk_rate_trace_manager_unsubscribe( rtm, &fw_ctx->rate_listener); } static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; u32 prfcnt_size; u32 prfcnt_hw_size = 0; u32 prfcnt_fw_size = 0; u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_BYTES; WARN_ON(!ctx); WARN_ON(!prfcnt_info); fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; prfcnt_size = kbdev->csf.global_iface.prfcnt_size; prfcnt_hw_size = (prfcnt_size & 0xFF) << 8; prfcnt_fw_size = (prfcnt_size >> 16) << 8; fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; prfcnt_info->dump_bytes = fw_ctx->buf_bytes; prfcnt_info->prfcnt_block_size = prfcnt_block_size; prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; prfcnt_info->core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; prfcnt_info->clk_cnt = fw_ctx->clk_cnt; prfcnt_info->clearing_samples = true; /* Block size must be multiple of counter size. */ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_BYTES) != 0); /* Total size must be multiple of block size. */ WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0); } static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base, struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) { struct kbase_device *kbdev; struct tagged_addr *phys; struct page **page_list; void *cpu_addr; int ret; int i; size_t num_pages; u64 flags; struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; pgprot_t cpu_map_prot = PAGE_KERNEL; u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); WARN_ON(!cpu_dump_base); WARN_ON(!out_ring_buf); kbdev = fw_ctx->kbdev; /* The buffer count must be power of 2 */ if (!is_power_of_2(buf_count)) return -EINVAL; /* alignment failure */ if (gpu_va_base & (2048 - 1)) return -EINVAL; fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL); if (!fw_ring_buf) return -ENOMEM; num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count); phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); if (!phys) goto phys_alloc_error; page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); if (!page_list) goto page_list_alloc_error; /* Get physical page for the buffer */ ret = kbase_mem_pool_alloc_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, false); if (ret != num_pages) goto phys_mem_pool_alloc_error; /* Get the CPU virtual address */ for (i = 0; i < num_pages; i++) page_list[i] = as_page(phys[i]); cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); if (!cpu_addr) goto vmap_error; flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); /* Update MMU table */ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW); if (ret) goto mmu_insert_failed; kfree(page_list); fw_ring_buf->gpu_dump_base = gpu_va_base; fw_ring_buf->cpu_dump_base = cpu_addr; fw_ring_buf->phys = phys; fw_ring_buf->num_pages = num_pages; fw_ring_buf->buf_count = buf_count; fw_ring_buf->as_nr = MCU_AS_NR; *cpu_dump_base = fw_ring_buf->cpu_dump_base; *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; return 0; mmu_insert_failed: vunmap(cpu_addr); vmap_error: kbase_mem_pool_free_pages( &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, false, false); phys_mem_pool_alloc_error: kfree(page_list); page_list_alloc_error: kfree(phys); phys_alloc_error: kfree(fw_ring_buf); return -ENOMEM; } static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, u32 buf_index_first, u32 buf_index_last, bool for_cpu) { struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; size_t i; size_t pg_first; size_t pg_last; u64 start_address; u64 stop_address; u32 ring_buf_index_first; u32 ring_buf_index_last; WARN_ON(!ctx); WARN_ON(!ring_buf); /* The index arguments for this function form an inclusive, exclusive * range. * However, when masking back to the available buffers we will make this * inclusive at both ends so full flushes are not 0 -> 0. */ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); /* The start address is the offset of the first buffer. */ start_address = fw_ctx->buf_bytes * ring_buf_index_first; pg_first = start_address >> PAGE_SHIFT; /* The stop address is the last byte in the final buffer. */ stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1; pg_last = stop_address >> PAGE_SHIFT; /* Check whether the buffer range wraps. */ if (start_address > stop_address) { /* sync the first part to the end of ring buffer. */ for (i = pg_first; i < fw_ring_buf->num_pages; i++) { struct page *pg = as_page(fw_ring_buf->phys[i]); if (for_cpu) { kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, DMA_BIDIRECTIONAL); } else { kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, DMA_BIDIRECTIONAL); } } /* second part starts from page 0. */ pg_first = 0; } for (i = pg_first; i <= pg_last; i++) { struct page *pg = as_page(fw_ring_buf->phys[i]); if (for_cpu) { kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, DMA_BIDIRECTIONAL); } else { kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, DMA_BIDIRECTIONAL); } } } static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { CSTD_UNUSED(ctx); return ktime_get_raw_ns(); } static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) { struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; if (!fw_ring_buf) return; if (fw_ring_buf->phys) { u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; WARN_ON(kbase_mmu_teardown_pages( fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages, MCU_AS_NR)); vunmap(fw_ring_buf->cpu_dump_base); kbase_mem_pool_free_pages( &fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); kfree(fw_ring_buf->phys); kfree(fw_ring_buf); } } static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, struct kbase_hwcnt_backend_csf_if_enable *enable) { u32 prfcnt_config; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; WARN_ON(!ctx); WARN_ON(!ring_buf); WARN_ON(!enable); kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; /* Configure */ prfcnt_config = fw_ring_buf->buf_count; prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; /* Configure the ring buffer base address */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, fw_ring_buf->gpu_dump_base & U32_MAX); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, fw_ring_buf->gpu_dump_base >> 32); /* Set extract position to 0 */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); /* Configure the enable bitmap */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm); /* Configure the HWC set and buffer size */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config); kbdev->csf.hwcnt.enable_pending = true; /* Unmask the interrupts */ kbase_csf_firmware_global_input_mask( global_iface, GLB_ACK_IRQ_MASK, GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); kbase_csf_firmware_global_input_mask( global_iface, GLB_ACK_IRQ_MASK, GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); kbase_csf_firmware_global_input_mask( global_iface, GLB_ACK_IRQ_MASK, GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); kbase_csf_firmware_global_input_mask( global_iface, GLB_ACK_IRQ_MASK, GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); /* Enable the HWC */ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT), GLB_REQ_PRFCNT_ENABLE_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG); kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map); } static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; /* Disable the HWC */ kbdev->csf.hwcnt.enable_pending = true; kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); /* mask the interrupts */ kbase_csf_firmware_global_input_mask( global_iface, GLB_ACK_IRQ_MASK, 0, GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); kbase_csf_firmware_global_input_mask( global_iface, GLB_ACK_IRQ_MASK, 0, GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); kbase_csf_firmware_global_input_mask( global_iface, GLB_ACK_IRQ_MASK, 0, GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); /* In case we have a previous request in flight when the disable * happens. */ kbdev->csf.hwcnt.request_pending = false; kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); } static void kbasep_hwcnt_backend_csf_if_fw_dump_request( struct kbase_hwcnt_backend_csf_if_ctx *ctx) { u32 glb_req; struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; /* Trigger dumping */ kbdev->csf.hwcnt.request_pending = true; glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, GLB_REQ_PRFCNT_SAMPLE_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, u32 *insert_index) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); WARN_ON(!extract_index); WARN_ON(!insert_index); kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); *extract_index = kbase_csf_firmware_global_input_read( &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT); *insert_index = kbase_csf_firmware_global_output( &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT); } static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; WARN_ON(!ctx); kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); /* Set the raw extract index to release the buffer back to the ring * buffer. */ kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT, extract_idx); } static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, u64 clk_enable_map) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; u8 clk; u64 timestamp_ns = ktime_get_raw_ns(); WARN_ON(!ctx); WARN_ON(!cycle_counts); kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { if (!(clk_enable_map & (1ull << clk))) continue; if (clk == KBASE_CLOCK_DOMAIN_TOP) { /* Read cycle count for top clock domain. */ kbase_backend_get_gpu_time_norequest( fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL); } else { /* Estimate cycle count for non-top clock domain. */ cycle_counts[clk] = kbase_ccswe_cycle_at( &fw_ctx->ccswe_shader_cores, timestamp_ns); } } } /** * kbasep_hwcnt_backedn_csf_if_fw_cts_destroy() - Destroy a CSF FW interface context. * * @fw_ctx: Pointer to context to destroy. */ static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) { if (!fw_ctx) return; kfree(fw_ctx); } /** * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context. * * @kbdev: Non_NULL pointer to kbase device. * @out_ctx: Non-NULL pointer to where info is stored on success. * Return: 0 on success, else error code. */ static int kbasep_hwcnt_backend_csf_if_fw_ctx_create( struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) { u8 clk; int errcode = -ENOMEM; struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; WARN_ON(!kbdev); WARN_ON(!out_ctx); ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) goto error; ctx->kbdev = kbdev; /* Determine the number of available clock domains. */ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { if (kbdev->pm.clk_rtm.clks[clk] == NULL) break; } ctx->clk_cnt = clk; ctx->clk_enable_map = 0; kbase_ccswe_init(&ctx->ccswe_shader_cores); ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change; *out_ctx = ctx; return 0; error: kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx); return errcode; } void kbase_hwcnt_backend_csf_if_fw_destroy( struct kbase_hwcnt_backend_csf_if *if_fw) { if (!if_fw) return; kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx); memset(if_fw, 0, sizeof(*if_fw)); } int kbase_hwcnt_backend_csf_if_fw_create( struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw) { int errcode; struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; if (!kbdev || !if_fw) return -EINVAL; errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx); if (errcode) return errcode; if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index; return 0; }