// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * */ #include "mali_kbase_hwcnt_backend_csf.h" #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" #include #include #include #include #include #include #include #include #ifndef BASE_MAX_NR_CLOCKS_REGULATORS #define BASE_MAX_NR_CLOCKS_REGULATORS 2 #endif /** * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. * * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is * an error. * * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A dump has been requested and we are * waiting for an ACK, this ACK could come from either PRFCNT_ACK, * PROTMODE_ENTER_ACK, or if an error occurs. * * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert * immediately after receiving the ACK, so we know which index corresponds to * the buffer we requested. * * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and * now we have kicked off the worker. * * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now * we have kicked off the worker to accumulate up to that insert and then copy * the delta to the user buffer to prepare for dump_get(). * * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully. * * Valid state transitions: * IDLE -> REQUESTED (on dump request) * REQUESTED -> QUERYING_INSERT (on dump ack) * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission) * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating) * ACCUMULATING -> COMPLETED (on accumulation completion) * COMPLETED -> REQUESTED (on dump request) * COMPLETED -> IDLE (on disable) * ANY -> IDLE (on error) */ enum kbase_hwcnt_backend_csf_dump_state { KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE, KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED, KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT, KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED, KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING, KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED, }; /** * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states. * * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend * is disabled. * * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in * progress, waiting for firmware acknowledgment. * * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged, * enable is done. * * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in * progress, waiting for firmware acknowledgment. * * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been * acknowledged, waiting for dump workers to be finished. * * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An * unrecoverable error happened, waiting for dump workers to be finished. * * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: An unrecoverable error * happened, and dump workers have finished, waiting for reset. * * Valid state transitions: * DISABLED -> TRANSITIONING_TO_ENABLED (on enable) * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack) * ENABLED -> TRANSITIONING_TO_DISABLED (on disable) * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack) * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed) * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error) * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable * error) * UNRECOVERABLE_ERROR -> DISABLED (on before reset) */ enum kbase_hwcnt_backend_csf_enable_state { KBASE_HWCNT_BACKEND_CSF_DISABLED, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED, KBASE_HWCNT_BACKEND_CSF_ENABLED, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR, }; /** * struct kbase_hwcnt_backend_csf_info - Information used to create an instance * of a CSF hardware counter backend. * @backend: Pointer to access CSF backend. * @fw_in_protected_mode: True if FW is running in protected mode, else * false. * @unrecoverable_error_happened: True if an recoverable error happened, else * false. * @csf_if: CSF interface object pointer. * @ring_buf_cnt: Dump buffer count in the ring buffer. * @counter_set: The performance counter set to use. * @metadata: Hardware counter metadata. * @prfcnt_info: Performance counter information. */ struct kbase_hwcnt_backend_csf_info { struct kbase_hwcnt_backend_csf *backend; bool fw_in_protected_mode; bool unrecoverable_error_happened; struct kbase_hwcnt_backend_csf_if *csf_if; u32 ring_buf_cnt; enum kbase_hwcnt_set counter_set; const struct kbase_hwcnt_metadata *metadata; struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; }; /** * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout * information. * @fe_cnt: Front end block count. * @tiler_cnt: Tiler block count. * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. * @shader_cnt: Shader Core block count. * @block_cnt: Total block count (sum of all other block counts). * @shader_avail_mask: Bitmap of all shader cores in the system. * @offset_enable_mask: Offset of enable mask in the block. * @headers_per_block: Header size per block. * @counters_per_block: Counters size per block. * @values_per_block: Total size per block. */ struct kbase_hwcnt_csf_physical_layout { size_t fe_cnt; size_t tiler_cnt; size_t mmu_l2_cnt; size_t shader_cnt; size_t block_cnt; u64 shader_avail_mask; size_t offset_enable_mask; size_t headers_per_block; size_t counters_per_block; size_t values_per_block; }; /** * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend. * @info: CSF Info used to create the backend. * @dump_state: The dumping state of the backend. * @enable_state: The CSF backend internal enabled state. * @insert_index_to_accumulate: The insert index in the ring buffer which need * to accumulate up to. * @enable_state_waitq: Wait queue object used to notify the enable * changing flag is done. * @to_user_buf: HWC sample buffer for client user. * @accum_buf: HWC sample buffer used as an internal * accumulator. * @old_sample_buf: HWC sample buffer to save the previous values * for delta calculation. * @ring_buf: Opaque pointer for ring buffer object. * @ring_buf_cpu_base: CPU base address of the allocated ring buffer. * @clk_enable_map: The enable map specifying enabled clock domains. * @cycle_count_elapsed: Cycle count elapsed for a given sample period. * @prev_cycle_count: Previous cycle count to calculate the cycle * count for sample period. * @phys_layout: Physical memory layout information of HWC * sample buffer. * @dump_completed: Completion signaled by the dump worker when * it is completed accumulating up to the * insert_index_to_accumulate. * Should be initialized to the "complete" state. * @hwc_dump_workq: Single threaded work queue for HWC workers * execution. * @hwc_dump_work: Worker to accumulate samples. * @hwc_threshold_work: Worker for consuming available samples when * threshold interrupt raised. */ struct kbase_hwcnt_backend_csf { struct kbase_hwcnt_backend_csf_info *info; enum kbase_hwcnt_backend_csf_dump_state dump_state; enum kbase_hwcnt_backend_csf_enable_state enable_state; u32 insert_index_to_accumulate; wait_queue_head_t enable_state_waitq; u32 *to_user_buf; u32 *accum_buf; u32 *old_sample_buf; struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; void *ring_buf_cpu_base; u64 clk_enable_map; u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; struct kbase_hwcnt_csf_physical_layout phys_layout; struct completion dump_completed; struct workqueue_struct *hwc_dump_workq; struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; }; static bool kbasep_hwcnt_backend_csf_backend_exists( struct kbase_hwcnt_backend_csf_info *csf_info) { WARN_ON(!csf_info); csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); return (csf_info->backend != NULL); } /** * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count * tracking. * * @backend_csf: Non-NULL pointer to backend. * @enable_map: Non-NULL pointer to enable map specifying enabled counters. */ static void kbasep_hwcnt_backend_csf_cc_initial_sample( struct kbase_hwcnt_backend_csf *backend_csf, const struct kbase_hwcnt_enable_map *enable_map) { u64 clk_enable_map = enable_map->clk_enable_map; u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; /* Read cycle count from CSF interface for both clock domains. */ backend_csf->info->csf_if->get_gpu_cycle_count( backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map); kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; } /* Keep clk_enable_map for dump_request. */ backend_csf->clk_enable_map = clk_enable_map; } static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) { u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; backend_csf->info->csf_if->assert_lock_held( backend_csf->info->csf_if->ctx); backend_csf->info->csf_if->get_gpu_cycle_count( backend_csf->info->csf_if->ctx, cycle_counts, backend_csf->clk_enable_map); kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled( backend_csf->clk_enable_map, clk)) { backend_csf->cycle_count_elapsed[clk] = cycle_counts[clk] - backend_csf->prev_cycle_count[clk]; backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; } } } /* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) { struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if) return 0; return backend_csf->info->csf_if->timestamp_ns( backend_csf->info->csf_if->ctx); } /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to * guarantee headers are * enabled if any counter is * required. *@phys_enable_map: HWC physical enable map to be processed. */ static void kbasep_hwcnt_backend_csf_process_enable_map( struct kbase_hwcnt_physical_enable_map *phys_enable_map) { WARN_ON(!phys_enable_map); /* Enable header if any counter is required from user, the header is * controlled by bit 0 of the enable mask. */ if (phys_enable_map->fe_bm) phys_enable_map->fe_bm |= 1; if (phys_enable_map->tiler_bm) phys_enable_map->tiler_bm |= 1; if (phys_enable_map->mmu_l2_bm) phys_enable_map->mmu_l2_bm |= 1; if (phys_enable_map->shader_bm) phys_enable_map->shader_bm |= 1; } static void kbasep_hwcnt_backend_csf_init_layout( const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, struct kbase_hwcnt_csf_physical_layout *phys_layout) { WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); phys_layout->fe_cnt = 1; phys_layout->tiler_cnt = 1; phys_layout->mmu_l2_cnt = prfcnt_info->l2_count; phys_layout->shader_cnt = fls64(prfcnt_info->core_mask); phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt + phys_layout->mmu_l2_cnt + phys_layout->shader_cnt; phys_layout->shader_avail_mask = prfcnt_info->core_mask; phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; phys_layout->values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_BYTES; phys_layout->counters_per_block = phys_layout->values_per_block - phys_layout->headers_per_block; phys_layout->offset_enable_mask = KBASE_HWCNT_V5_PRFCNT_EN_HEADER; } static void kbasep_hwcnt_backend_csf_reset_internal_buffers( struct kbase_hwcnt_backend_csf *backend_csf) { memset(backend_csf->to_user_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); memset(backend_csf->accum_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); } static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( struct kbase_hwcnt_backend_csf *backend_csf, u32 *sample) { u32 block_idx; const struct kbase_hwcnt_csf_physical_layout *phys_layout; u32 *block_buf; phys_layout = &backend_csf->phys_layout; for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { block_buf = sample + block_idx * phys_layout->values_per_block; block_buf[phys_layout->offset_enable_mask] = 0; } } static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header( struct kbase_hwcnt_backend_csf *backend_csf) { u32 idx; u32 *sample; char *cpu_dump_base; size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( backend_csf, sample); } } static void kbasep_hwcnt_backend_csf_update_user_sample( struct kbase_hwcnt_backend_csf *backend_csf) { /* Copy the data into the sample and wait for the user to get it. */ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, backend_csf->info->prfcnt_info.dump_bytes); /* After copied data into user sample, clear the accumulator values to * prepare for the next accumulator, such as the next request or * threshold. */ memset(backend_csf->accum_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); } static void kbasep_hwcnt_backend_csf_accumulate_sample( const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) { size_t block_idx, ctr_idx; const u32 *old_block = old_sample_buf; const u32 *new_block = new_sample_buf; u32 *acc_block = accum_buf; for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { const u32 old_enable_mask = old_block[phys_layout->offset_enable_mask]; const u32 new_enable_mask = new_block[phys_layout->offset_enable_mask]; if (new_enable_mask == 0) { /* Hardware block was unavailable or we didn't turn on * any counters. Do nothing. */ } else { /* Hardware block was available and it had some counters * enabled. We need to update the accumulation buffer. */ /* Unconditionally copy the headers. */ memcpy(acc_block, new_block, phys_layout->headers_per_block * KBASE_HWCNT_VALUE_BYTES); /* Accumulate counter samples * * When accumulating samples we need to take into * account whether the counter sampling method involves * clearing counters back to zero after each sample is * taken. * * The intention for CSF was that all HW should use * counters which wrap to zero when their maximum value * is reached. This, combined with non-clearing * sampling, enables multiple concurrent users to * request samples without interfering with each other. * * However some early HW may not support wrapping * counters, for these GPUs counters must be cleared on * sample to avoid loss of data due to counters * saturating at their maximum value. */ if (!clearing_samples) { if (old_enable_mask == 0) { /* Hardware block was previously * unavailable. Accumulate the new * counters only, as we know previous * values are zeroes. */ for (ctr_idx = phys_layout ->headers_per_block; ctr_idx < phys_layout->values_per_block; ctr_idx++) { acc_block[ctr_idx] += new_block[ctr_idx]; } } else { /* Hardware block was previously * available. Accumulate the delta * between old and new counter values. */ for (ctr_idx = phys_layout ->headers_per_block; ctr_idx < phys_layout->values_per_block; ctr_idx++) { acc_block[ctr_idx] += new_block[ctr_idx] - old_block[ctr_idx]; } } } else { for (ctr_idx = phys_layout->headers_per_block; ctr_idx < phys_layout->values_per_block; ctr_idx++) { acc_block[ctr_idx] += new_block[ctr_idx]; } } } old_block += phys_layout->values_per_block; new_block += phys_layout->values_per_block; acc_block += phys_layout->values_per_block; } WARN_ON(old_block != old_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); WARN_ON(new_block != new_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); WARN_ON(acc_block != accum_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); (void)dump_bytes; } static void kbasep_hwcnt_backend_csf_accumulate_samples( struct kbase_hwcnt_backend_csf *backend_csf, u32 extract_index_to_start, u32 insert_index_to_stop) { u32 raw_idx; unsigned long flags; u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf; if (extract_index_to_start == insert_index_to_stop) /* No samples to accumulate. Early out. */ return; /* Sync all the buffers to CPU side before read the data. */ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, backend_csf->ring_buf, extract_index_to_start, insert_index_to_stop, true); /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { /* The logical "&" acts as a modulo operation since buf_count * must be a power of two. */ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; kbasep_hwcnt_backend_csf_accumulate_sample( &backend_csf->phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf, new_sample_buf, clearing_samples); old_sample_buf = new_sample_buf; } /* Save the newest buffer as the old buffer for next time. */ memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes); /* Reset the prfcnt_en header on each sample before releasing them. */ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( backend_csf, sample); } /* Sync zeroed buffers to avoid coherency issues on future use. */ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, backend_csf->ring_buf, extract_index_to_start, insert_index_to_stop, false); /* After consuming all samples between extract_idx and insert_idx, * set the raw extract index to insert_idx so that the sample buffers * can be released back to the ring buffer pool. */ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->csf_if->set_extract_index( backend_csf->info->csf_if->ctx, insert_index_to_stop); backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); } static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( struct kbase_hwcnt_backend_csf *backend_csf, enum kbase_hwcnt_backend_csf_enable_state new_state) { backend_csf->info->csf_if->assert_lock_held( backend_csf->info->csf_if->ctx); if (backend_csf->enable_state != new_state) { backend_csf->enable_state = new_state; wake_up(&backend_csf->enable_state_waitq); } } /** * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker. * @work: Work structure. * * To accumulate all available samples in the ring buffer when a request has * been done. * */ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf; u32 insert_index_to_acc; u32 extract_index; u32 insert_index; WARN_ON(!work); backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* The backend was disabled or had an error while the worker was being * launched. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return; } WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; insert_index_to_acc = backend_csf->insert_index_to_accumulate; /* Read the raw extract and insert indexes from the CSF interface. */ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, &insert_index); backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Accumulate up to the insert we grabbed at the prfcnt request * interrupt. */ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index_to_acc); /* Copy to the user buffer so if a threshold interrupt fires * between now and get(), the accumulations are untouched. */ kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); /* Dump done, set state back to COMPLETED for next request. */ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* The backend was disabled or had an error while we were accumulating. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return; } WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); /* Our work here is done - set the wait object and unblock waiters. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; complete_all(&backend_csf->dump_completed); backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); } /** * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker. * * @work: Work structure. * * Called when a HWC threshold interrupt raised to consume all available samples * in the ring buffer. */ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf; u32 extract_index; u32 insert_index; WARN_ON(!work); backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* Read the raw extract and insert indexes from the CSF interface. */ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, &insert_index); /* The backend was disabled or had an error while the worker was being * launched. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return; } /* Early out if we are not in the IDLE state or COMPLETED state, as this * means a concurrent dump is in progress and we don't want to * interfere. */ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return; } backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Accumulate everything we possibly can. We grabbed the insert index * immediately after we acquired the lock but before we checked whether * a concurrent dump was triggered. This ensures that if a concurrent * dump was triggered between releasing the lock and now, we know for a * fact that our insert will not exceed the concurrent dump's * insert_to_accumulate, so we don't risk accumulating too much data. */ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index); /* No need to wake up anything since it is not a user dump request. */ } static void kbase_hwcnt_backend_csf_submit_dump_worker( struct kbase_hwcnt_backend_csf_info *csf_info) { u32 extract_index; WARN_ON(!csf_info); csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED); WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); /* Save insert index now so that the dump worker only accumulates the * HWC data associated with this request. Extract index is not stored * as that needs to be checked when accumulating to prevent re-reading * buffers that have already been read and returned to the GPU. */ csf_info->csf_if->get_indexes( csf_info->csf_if->ctx, &extract_index, &csf_info->backend->insert_index_to_accumulate); csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; /* Submit the accumulator task into the work queue. */ queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work); } static void kbasep_hwcnt_backend_csf_get_physical_enable( struct kbase_hwcnt_backend_csf *backend_csf, const struct kbase_hwcnt_enable_map *enable_map, struct kbase_hwcnt_backend_csf_if_enable *enable) { enum kbase_hwcnt_physical_set phys_counter_set; struct kbase_hwcnt_physical_enable_map phys_enable_map; kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); /* process the enable_map to guarantee the block header is enabled which * is needed for delta calculation. */ kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map); kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set); /* Use processed enable_map to enable HWC in HW level. */ enable->fe_bm = phys_enable_map.fe_bm; enable->shader_bm = phys_enable_map.shader_bm; enable->tiler_bm = phys_enable_map.tiler_bm; enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; enable->counter_set = phys_counter_set; enable->clk_enable_map = enable_map->clk_enable_map; } /* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ static int kbasep_hwcnt_backend_csf_dump_enable_nolock( struct kbase_hwcnt_backend *backend, const struct kbase_hwcnt_enable_map *enable_map) { struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; struct kbase_hwcnt_backend_csf_if_enable enable; if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) return -EINVAL; backend_csf->info->csf_if->assert_lock_held( backend_csf->info->csf_if->ctx); kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); /* enable_state should be DISABLED before we transfer it to enabled */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) return -EIO; backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; WARN_ON(!completion_done(&backend_csf->dump_completed)); kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, backend_csf->ring_buf, &enable); kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); return 0; } /* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */ static int kbasep_hwcnt_backend_csf_dump_enable( struct kbase_hwcnt_backend *backend, const struct kbase_hwcnt_enable_map *enable_map) { int errcode; unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend_csf) return -EINVAL; backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map); backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return errcode; } static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) { backend_csf->info->csf_if->assert_lock_held( backend_csf->info->csf_if->ctx); while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, *lock_flags); wait_event( backend_csf->enable_state_waitq, (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags); } } /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_disable = false; WARN_ON(!backend_csf); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Make sure we wait until any previous enable or disable have completed * before doing anything. */ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED || backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { /* If we are already disabled or in an unrecoverable error * state, there is nothing for us to do. */ backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return; } if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); /* Only disable if we were previously enabled - in all other * cases the call to disable will have already been made. */ do_disable = true; } WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Block until any async work has completed. We have transitioned out of * the ENABLED state so we can guarantee no new work will concurrently * be submitted. */ flush_workqueue(backend_csf->hwc_dump_workq); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); if (do_disable) backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); switch (backend_csf->enable_state) { case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); break; case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); break; default: WARN_ON(true); break; } backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* After disable, zero the header of all buffers in the ring buffer back * to 0 to prepare for the next enable. */ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); /* Sync zeroed buffers to avoid coherency issues on future use. */ backend_csf->info->csf_if->ring_buf_sync( backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, backend_csf->info->ring_buf_cnt, false); /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare * for next enable. */ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); } /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, u64 *dump_time_ns) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_request = false; if (!backend_csf) return -EINVAL; backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* If we're transitioning to enabled there's nothing to accumulate, and * the user dump buffer is already zeroed. We can just short circuit to * the DUMP_COMPLETED state. */ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return 0; } /* Otherwise, make sure we're already enabled. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); return -EIO; } /* Make sure that this is either the first request since enable or the * previous dump has completed, so we can avoid midway through a dump. */ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { backend_csf->info->csf_if->unlock( backend_csf->info->csf_if->ctx, flags); /* HWC is disabled or another dump is ongoing, or we are on * fault. */ return -EIO; } /* Reset the completion so dump_wait() has something to wait on. */ reinit_completion(&backend_csf->dump_completed); if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && !backend_csf->info->fw_in_protected_mode) { /* Only do the request if we are fully enabled and not in * protected mode. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; do_request = true; } else { /* Skip the request and waiting for ack and go straight to * checking the insert and kicking off the worker to do the dump */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; } /* CSF firmware might enter protected mode now, but still call request. * That is fine, as we changed state while holding the lock, so the * protected mode enter function will query the insert and launch the * dumping worker. * At some point we will get the dump request ACK saying a dump is done, * but we can ignore it if we are not in the REQUESTED state and process * it in next round dumping worker. */ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); if (do_request) backend_csf->info->csf_if->dump_request( backend_csf->info->csf_if->ctx); else kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return 0; } /* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; int errcode; if (!backend_csf) return -EINVAL; wait_for_completion(&backend_csf->dump_completed); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Make sure the last dump actually succeeded. */ errcode = (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ? 0 : -EIO; backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return errcode; } /* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */ static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) { struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; int errcode; u64 ts; if (!backend_csf) return -EINVAL; /* Request a dump so we can clear all current counters. */ errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts); if (!errcode) /* Wait for the manual dump or auto dump to be done and * accumulator to be updated. */ errcode = kbasep_hwcnt_backend_csf_dump_wait(backend); return errcode; } /* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */ static int kbasep_hwcnt_backend_csf_dump_get( struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) { struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; int ret; size_t clk; if (!backend_csf || !dst || !dst_enable_map || (backend_csf->info->metadata != dst->metadata) || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { if (!kbase_hwcnt_clk_enable_map_enabled( dst_enable_map->clk_enable_map, clk)) continue; /* Extract elapsed cycle count for each clock domain. */ dst->clk_cnt_buf[clk] = backend_csf->cycle_count_elapsed[clk]; } /* We just return the user buffer without checking the current state, * as it is undefined to call this function without a prior succeeding * one to dump_wait(). */ ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); return ret; } /** * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend. * @backend_csf: Pointer to CSF backend to destroy. * * Can be safely called on a backend in any state of partial construction. * */ static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) { if (!backend_csf) return; destroy_workqueue(backend_csf->hwc_dump_workq); backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, backend_csf->ring_buf); kfree(backend_csf->accum_buf); backend_csf->accum_buf = NULL; kfree(backend_csf->old_sample_buf); backend_csf->old_sample_buf = NULL; kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; kfree(backend_csf); } /** * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance. * * @csf_info: Non-NULL pointer to backend info. * @out_backend: Non-NULL pointer to where backend is stored on success. * Return: 0 on success, else error code. */ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, struct kbase_hwcnt_backend_csf **out_backend) { struct kbase_hwcnt_backend_csf *backend_csf = NULL; int errcode = -ENOMEM; WARN_ON(!csf_info); WARN_ON(!out_backend); backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL); if (!backend_csf) goto alloc_error; backend_csf->info = csf_info; kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout); backend_csf->accum_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->accum_buf) goto err_alloc_acc_buf; backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->old_sample_buf) goto err_alloc_pre_sample_buf; backend_csf->to_user_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->to_user_buf) goto err_alloc_user_sample_buf; errcode = csf_info->csf_if->ring_buf_alloc( csf_info->csf_if->ctx, csf_info->ring_buf_cnt, &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf); if (errcode) goto err_ring_buf_alloc; /* Zero all performance enable header to prepare for first enable. */ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); /* Sync zeroed buffers to avoid coherency issues on use. */ backend_csf->info->csf_if->ring_buf_sync( backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, backend_csf->info->ring_buf_cnt, false); init_completion(&backend_csf->dump_completed); init_waitqueue_head(&backend_csf->enable_state_waitq); /* Allocate a single threaded work queue for dump worker and threshold * worker. */ backend_csf->hwc_dump_workq = alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); if (!backend_csf->hwc_dump_workq) goto err_alloc_workqueue; INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker); INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker); backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); *out_backend = backend_csf; return 0; destroy_workqueue(backend_csf->hwc_dump_workq); err_alloc_workqueue: backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, backend_csf->ring_buf); err_ring_buf_alloc: kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; err_alloc_user_sample_buf: kfree(backend_csf->old_sample_buf); backend_csf->old_sample_buf = NULL; err_alloc_pre_sample_buf: kfree(backend_csf->accum_buf); backend_csf->accum_buf = NULL; err_alloc_acc_buf: kfree(backend_csf); alloc_error: return errcode; } /* CSF backend implementation of kbase_hwcnt_backend_init_fn */ static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, struct kbase_hwcnt_backend **out_backend) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = NULL; struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info; int errcode; bool success = false; if (!info || !out_backend) return -EINVAL; /* Create the backend. */ errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf); if (errcode) return errcode; /* If it was not created before, attach it to csf_info. * Use spin lock to avoid concurrent initialization. */ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); if (csf_info->backend == NULL) { csf_info->backend = backend_csf; *out_backend = (struct kbase_hwcnt_backend *)backend_csf; success = true; if (csf_info->unrecoverable_error_happened) backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; } backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Destroy the new created backend if the backend has already created * before. In normal case, this won't happen if the client call init() * function properly. */ if (!success) { kbasep_hwcnt_backend_csf_destroy(backend_csf); return -EBUSY; } return 0; } /* CSF backend implementation of kbase_hwcnt_backend_term_fn */ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend) return; kbasep_hwcnt_backend_csf_dump_disable(backend); /* Set the backend in csf_info to NULL so we won't handle any external * notification anymore since we are terminating. */ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->backend = NULL; backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); kbasep_hwcnt_backend_csf_destroy(backend_csf); } /** * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info. * @info: Pointer to info to destroy. * * Can be safely called on a backend info in any state of partial construction. * */ static void kbasep_hwcnt_backend_csf_info_destroy( const struct kbase_hwcnt_backend_csf_info *info) { if (!info) return; /* The backend should be destroyed before the info object destroy. */ WARN_ON(info->backend != NULL); /* The metadata should be destroyed before the info object destroy. */ WARN_ON(info->metadata != NULL); kfree(info); } /** * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info. * * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure * used to create backend interface. * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer. * MUST be power of 2. * @out_info: Non-NULL pointer to where info is stored on success. * @return 0 on success, else error code. */ static int kbasep_hwcnt_backend_csf_info_create( struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, const struct kbase_hwcnt_backend_csf_info **out_info) { struct kbase_hwcnt_backend_csf_info *info = NULL; WARN_ON(!csf_if); WARN_ON(!out_info); WARN_ON(!is_power_of_2(ring_buf_cnt)); info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY) info->counter_set = KBASE_HWCNT_SET_SECONDARY; #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) info->counter_set = KBASE_HWCNT_SET_TERTIARY; #else /* Default to primary */ info->counter_set = KBASE_HWCNT_SET_PRIMARY; #endif info->backend = NULL; info->csf_if = csf_if; info->ring_buf_cnt = ring_buf_cnt; info->fw_in_protected_mode = false; info->unrecoverable_error_happened = false; *out_info = info; return 0; } /* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */ static const struct kbase_hwcnt_metadata * kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) { if (!info) return NULL; WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata); return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata; } static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( struct kbase_hwcnt_backend_csf *backend_csf) { bool do_disable = false; backend_csf->info->csf_if->assert_lock_held( backend_csf->info->csf_if->ctx); /* We are already in or transitioning to the unrecoverable error state. * Early out. */ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER)) return; /* If we are disabled, we know we have no pending workers, so skip the * waiting state. */ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); return; } /* Trigger a disable only if we are not already transitioning to * disabled, we don't want to disable twice if an unrecoverable error * happens while we are disabling. */ do_disable = (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); /* Transition the dump to the IDLE state and unblock any waiters. The * IDLE state signifies an error. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); /* Trigger a disable only if we are not already transitioning to * disabled, - we don't want to disable twice if an unrecoverable error * happens while we are disabling. */ if (do_disable) backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); } static void kbasep_hwcnt_backend_csf_handle_recoverable_error( struct kbase_hwcnt_backend_csf *backend_csf) { backend_csf->info->csf_if->assert_lock_held( backend_csf->info->csf_if->ctx); switch (backend_csf->enable_state) { case KBASE_HWCNT_BACKEND_CSF_DISABLED: case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: /* Already disabled or disabling, or in an unrecoverable error. * Nothing to be done to handle the error. */ return; case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: /* A seemingly recoverable error that occurs while we are * transitioning to enabled is probably unrecoverable. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( backend_csf); return; case KBASE_HWCNT_BACKEND_CSF_ENABLED: /* Start transitioning to the disabled state. We can't wait for * it as this recoverable error might be triggered from an * interrupt. The wait will be done in the eventual call to * disable(). */ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); /* Transition the dump to the IDLE state and unblock any * waiters. The IDLE state signifies an error. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); backend_csf->info->csf_if->dump_disable( backend_csf->info->csf_if->ctx); return; } } void kbase_hwcnt_backend_csf_protm_entered( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); csf_info->fw_in_protected_mode = true; /* Call on_prfcnt_sample() to trigger collection of the protected mode * entry auto-sample if there is currently a pending dump request. */ kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); } void kbase_hwcnt_backend_csf_protm_exited( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); csf_info->fw_in_protected_mode = false; } void kbase_hwcnt_backend_csf_on_unrecoverable_error( struct kbase_hwcnt_backend_interface *iface) { unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); csf_info->unrecoverable_error_happened = true; /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); return; } kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } void kbase_hwcnt_backend_csf_on_before_reset( struct kbase_hwcnt_backend_interface *iface) { unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); csf_info->unrecoverable_error_happened = false; /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); return; } backend_csf = csf_info->backend; if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) && (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { /* Before a reset occurs, we must either have been disabled * (else we lose data) or we should have encountered an * unrecoverable error. Either way, we will have disabled the * interface and waited for any workers that might have still * been in flight. * If not in these states, fire off one more disable to make * sure everything is turned off before the power is pulled. * We can't wait for this disable to complete, but it doesn't * really matter, the power is being pulled. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( csf_info->backend); } /* A reset is the only way to exit the unrecoverable error state */ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); } csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } void kbase_hwcnt_backend_csf_on_prfcnt_sample( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; backend_csf = csf_info->backend; /* If the current state is not REQUESTED, this HWC sample will be * skipped and processed in next dump_request. */ if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) return; backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); } void kbase_hwcnt_backend_csf_on_prfcnt_threshold( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; backend_csf = csf_info->backend; if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) /* Submit the threshold work into the work queue to consume the * available samples. */ queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work); } void kbase_hwcnt_backend_csf_on_prfcnt_overflow( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; /* Called when an overflow occurs. We treat this as a recoverable error, * so we start transitioning to the disabled state. * We could try and handle it while enabled, but in a real system we * never expect an overflow to occur so there is no point implementing * complex recovery code when we can just turn ourselves off instead for * a while. */ kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); } void kbase_hwcnt_backend_csf_on_prfcnt_enable( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; backend_csf = csf_info->backend; if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED); } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { /* Unexpected, but we are already in the right state so just * ignore it. */ } else { /* Unexpected state change, assume everything is broken until * we reset. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( csf_info->backend); } } void kbase_hwcnt_backend_csf_on_prfcnt_disable( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); /* Early out if the backend does not exist. */ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) return; backend_csf = csf_info->backend; if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { /* Unexpected, but we are already in the right state so just * ignore it. */ } else { /* Unexpected state change, assume everything is broken until * we reset. */ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( csf_info->backend); } } int kbase_hwcnt_backend_csf_metadata_init( struct kbase_hwcnt_backend_interface *iface) { int errcode; struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_gpu_info gpu_info; if (!iface) return -EINVAL; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; WARN_ON(!csf_info->csf_if->get_prfcnt_info); csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info); /* The clock domain counts should not exceed the number of maximum * number of clock regulators. */ if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) return -EIO; gpu_info.l2_count = csf_info->prfcnt_info.l2_count; gpu_info.core_mask = csf_info->prfcnt_info.core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_BYTES; errcode = kbase_hwcnt_csf_metadata_create( &gpu_info, csf_info->counter_set, &csf_info->metadata); if (errcode) return errcode; /* * Dump abstraction size should be exactly the same size and layout as * the physical dump size, for backwards compatibility. */ WARN_ON(csf_info->prfcnt_info.dump_bytes != csf_info->metadata->dump_buf_bytes); return 0; } void kbase_hwcnt_backend_csf_metadata_term( struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; if (!iface) return; csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; if (csf_info->metadata) { kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); csf_info->metadata = NULL; } } int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, struct kbase_hwcnt_backend_interface *iface) { int errcode; const struct kbase_hwcnt_backend_csf_info *info = NULL; if (!iface || !csf_if) return -EINVAL; /* The buffer count must be power of 2 */ if (!is_power_of_2(ring_buf_cnt)) return -EINVAL; errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, &info); if (errcode) return errcode; iface->info = (struct kbase_hwcnt_backend_info *)info; iface->metadata = kbasep_hwcnt_backend_csf_metadata; iface->init = kbasep_hwcnt_backend_csf_init; iface->term = kbasep_hwcnt_backend_csf_term; iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns; iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable; iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock; iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable; iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear; iface->dump_request = kbasep_hwcnt_backend_csf_dump_request; iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait; iface->dump_get = kbasep_hwcnt_backend_csf_dump_get; return 0; } void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) { if (!iface) return; kbasep_hwcnt_backend_csf_info_destroy( (const struct kbase_hwcnt_backend_csf_info *)iface->info); memset(iface, 0, sizeof(*iface)); }