// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * */ #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" #include #include static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; break; case KBASE_HWCNT_SET_SECONDARY: if (is_csf) { *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; } else { *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; } break; case KBASE_HWCNT_SET_TERTIARY: if (is_csf) { *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; } else { *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; } break; default: WARN_ON(true); } } static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; break; case KBASE_HWCNT_SET_SECONDARY: case KBASE_HWCNT_SET_TERTIARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; break; default: WARN_ON(true); } } static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; break; case KBASE_HWCNT_SET_SECONDARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; break; case KBASE_HWCNT_SET_TERTIARY: if (is_csf) { *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; } else { *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; } break; default: WARN_ON(true); } } static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; break; case KBASE_HWCNT_SET_SECONDARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; break; case KBASE_HWCNT_SET_TERTIARY: *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; break; default: WARN_ON(true); } } /** * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata * for the GPU. * @gpu_info: Non-NULL pointer to hwcnt info for current GPU. * @is_csf: true for CSF GPU, otherwise false. * @counter_set: The performance counter set to use. * @metadata: Non-NULL pointer to where created metadata is stored * on success. * * Return: 0 on success, else error code. */ static int kbasep_hwcnt_backend_gpu_metadata_create( const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf, enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **metadata) { struct kbase_hwcnt_description desc; struct kbase_hwcnt_group_description group; struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; size_t non_sc_block_count; size_t sc_block_count; WARN_ON(!gpu_info); WARN_ON(!metadata); /* Calculate number of block instances that aren't shader cores */ non_sc_block_count = 2 + gpu_info->l2_count; /* Calculate number of block instances that are shader cores */ sc_block_count = fls64(gpu_info->core_mask); /* * A system can have up to 64 shader cores, but the 64-bit * availability mask can't physically represent that many cores as well * as the other hardware blocks. * Error out if there are more blocks than our implementation can * support. */ if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) return -EINVAL; /* One Front End block */ kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); blks[0].inst_cnt = 1; blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* One Tiler block */ kbasep_get_tiler_block_type(&blks[1].type, counter_set); blks[1].inst_cnt = 1; blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* l2_count memsys blks */ kbasep_get_memsys_block_type(&blks[2].type, counter_set); blks[2].inst_cnt = gpu_info->l2_count; blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* * There are as many shader cores in the system as there are bits set in * the core mask. However, the dump buffer memory requirements need to * take into account the fact that the core mask may be non-contiguous. * * For example, a system with a core mask of 0b1011 has the same dump * buffer memory requirements as a system with 0b1111, but requires more * memory than a system with 0b0111. However, core 2 of the system with * 0b1011 doesn't physically exist, and the dump buffer memory that * accounts for that core will never be written to when we do a counter * dump. * * We find the core mask's last set bit to determine the memory * requirements, and embed the core mask into the availability mask so * we can determine later which shader cores physically exist. */ kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); blks[3].inst_cnt = sc_block_count; blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; group.blks = blks; desc.grp_cnt = 1; desc.grps = &group; desc.clk_cnt = gpu_info->clk_cnt; /* The JM, Tiler, and L2s are always available, and are before cores */ desc.avail_mask = (1ull << non_sc_block_count) - 1; /* Embed the core mask directly in the availability mask */ desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); return kbase_hwcnt_metadata_create(&desc, metadata); } /** * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the * GPU. * @gpu_info: Non-NULL pointer to hwcnt info for the GPU. * * Return: Size of buffer the GPU needs to perform a counter dump. */ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) { WARN_ON(!gpu_info); return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) * gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_BYTES; } int kbase_hwcnt_jm_metadata_create( const struct kbase_hwcnt_gpu_info *gpu_info, enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata, size_t *out_dump_bytes) { int errcode; const struct kbase_hwcnt_metadata *metadata; size_t dump_bytes; if (!gpu_info || !out_metadata || !out_dump_bytes) return -EINVAL; /* * For architectures where a max_config interface is available * from the arbiter, the v5 dump bytes and the metadata v5 are * based on the maximum possible allocation of the HW in the * GPU cause it needs to be prepared for the worst case where * all the available L2 cache and Shader cores are allocated. */ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); errcode = kbasep_hwcnt_backend_gpu_metadata_create( gpu_info, false, counter_set, &metadata); if (errcode) return errcode; /* * Dump abstraction size should be exactly the same size and layout as * the physical dump size, for backwards compatibility. */ WARN_ON(dump_bytes != metadata->dump_buf_bytes); *out_metadata = metadata; *out_dump_bytes = dump_bytes; return 0; } void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) { if (!metadata) return; kbase_hwcnt_metadata_destroy(metadata); } int kbase_hwcnt_csf_metadata_create( const struct kbase_hwcnt_gpu_info *gpu_info, enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata) { int errcode; const struct kbase_hwcnt_metadata *metadata; if (!gpu_info || !out_metadata) return -EINVAL; errcode = kbasep_hwcnt_backend_gpu_metadata_create( gpu_info, true, counter_set, &metadata); if (errcode) return errcode; *out_metadata = metadata; return 0; } void kbase_hwcnt_csf_metadata_destroy( const struct kbase_hwcnt_metadata *metadata) { if (!metadata) return; kbase_hwcnt_metadata_destroy(metadata); } int kbase_hwcnt_gpu_metadata_create_truncate_64( const struct kbase_hwcnt_metadata **dst_md, const struct kbase_hwcnt_metadata *src_md) { struct kbase_hwcnt_description desc; struct kbase_hwcnt_group_description group; struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; size_t prfcnt_values_per_block; size_t blk; if (!dst_md || !src_md || !src_md->grp_metadata || !src_md->grp_metadata[0].blk_metadata) return -EINVAL; /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block * count in the metadata. */ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) return -EINVAL; /* Get the values count in the first block. */ prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); /* check all blocks should have same values count. */ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); if (val_cnt != prfcnt_values_per_block) return -EINVAL; } /* Only support 64 and 128 entries per block. */ if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) return -EINVAL; if (prfcnt_values_per_block == 64) { /* If the values per block is 64, no need to truncate. */ *dst_md = NULL; return 0; } /* Truncate from 128 to 64 entries per block to keep API backward * compatibility. */ prfcnt_values_per_block = 64; for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { blks[blk].type = kbase_hwcnt_metadata_block_type(src_md, 0, blk); blks[blk].inst_cnt = kbase_hwcnt_metadata_block_instance_count( src_md, 0, blk); blks[blk].hdr_cnt = kbase_hwcnt_metadata_block_headers_count( src_md, 0, blk); blks[blk].ctr_cnt = prfcnt_values_per_block - blks[blk].hdr_cnt; } group.type = kbase_hwcnt_metadata_group_type(src_md, 0); group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; group.blks = blks; desc.grp_cnt = kbase_hwcnt_metadata_group_count(src_md); desc.avail_mask = src_md->avail_mask; desc.clk_cnt = src_md->clk_cnt; desc.grps = &group; return kbase_hwcnt_metadata_create(&desc, dst_md); } void kbase_hwcnt_dump_buffer_copy_strict_narrow( struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_dump_buffer *src, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || WARN_ON(dst->metadata == src->metadata) || WARN_ON(dst->metadata->grp_cnt != src->metadata->grp_cnt) || WARN_ON(src->metadata->grp_cnt != 1) || WARN_ON(dst->metadata->grp_metadata[0].blk_cnt != src->metadata->grp_metadata[0].blk_cnt) || WARN_ON(dst->metadata->grp_metadata[0].blk_cnt != 4) || WARN_ON(dst->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) return; /* Don't use src metadata since src buffer is bigger than dst buffer. */ metadata = dst->metadata; kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( dst, grp, blk, blk_inst); const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( src, grp, blk, blk_inst); const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( dst_enable_map, grp, blk, blk_inst); size_t val_cnt = kbase_hwcnt_metadata_block_values_count( metadata, grp, blk); /* Align upwards to include padding bytes */ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt); } kbase_hwcnt_metadata_for_each_clock(metadata, clk) { bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled( dst_enable_map->clk_enable_map, clk); dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; } } static bool is_block_type_shader( const u64 grp_type, const u64 blk_type, const size_t blk) { bool is_shader = false; /* Warn on unknown group type */ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) return false; if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3) is_shader = true; return is_shader; } static bool is_block_type_l2_cache( const u64 grp_type, const u64 blk_type) { bool is_l2_cache = false; switch (grp_type) { case KBASE_HWCNT_GPU_GROUP_TYPE_V5: if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2) is_l2_cache = true; break; default: /* Warn on unknown group type */ WARN_ON(true); } return is_l2_cache; } int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; size_t src_offset, grp, blk, blk_inst; u64 core_mask = pm_core_mask; /* Variables to deal with the current configuration */ int l2_count = 0; bool hw_res_available = true; if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; dump_src = (const u32 *)src; src_offset = 0; kbase_hwcnt_metadata_for_each_block( metadata, grp, blk, blk_inst) { const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( metadata, grp, blk); const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( metadata, grp, blk); const u64 blk_type = kbase_hwcnt_metadata_block_type( metadata, grp, blk); const bool is_shader_core = is_block_type_shader( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); const bool is_l2_cache = is_block_type_l2_cache( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); /* * If l2 blocks is greater than the current allocated number of * L2 slices, there is no hw allocated to that block. */ if (is_l2_cache) { l2_count++; if (l2_count > curr_config->num_l2_slices) hw_res_available = false; else hw_res_available = true; } /* * For the shader cores, the current shader_mask allocated is * always a subgroup of the maximum shader_mask, so after * jumping any L2 cache not available the available shader cores * will always have a matching set of blk instances available to * accumulate them. */ else { hw_res_available = true; } /* * Early out if no values in the dest block are enabled or if * the resource target of the block is not available in the HW. */ if (kbase_hwcnt_enable_map_block_enabled( dst_enable_map, grp, blk, blk_inst)) { u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( dst, grp, blk, blk_inst); const u32 *src_blk = dump_src + src_offset; if ((!is_shader_core || (core_mask & 1)) && hw_res_available) { if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, ctr_cnt); } else { kbase_hwcnt_dump_buffer_block_copy( dst_blk, src_blk, (hdr_cnt + ctr_cnt)); } } else if (!accumulate) { kbase_hwcnt_dump_buffer_block_zero( dst_blk, (hdr_cnt + ctr_cnt)); } } /* Just increase the src_offset if the HW is available */ if (hw_res_available) src_offset += (hdr_cnt + ctr_cnt); if (is_shader_core) core_mask = core_mask >> 1; } return 0; } int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; size_t src_offset, grp, blk, blk_inst; if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; dump_src = (const u32 *)src; src_offset = 0; kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( metadata, grp, blk); const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); /* Early out if no values in the dest block are enabled */ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( dst, grp, blk, blk_inst); const u32 *src_blk = dump_src + src_offset; if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, ctr_cnt); } else { kbase_hwcnt_dump_buffer_block_copy( dst_blk, src_blk, (hdr_cnt + ctr_cnt)); } } src_offset += (hdr_cnt + ctr_cnt); } return 0; } /** * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block * enable map abstraction to * a physical block enable * map. * @lo: Low 64 bits of block enable map abstraction. * @hi: High 64 bits of block enable map abstraction. * * The abstraction uses 128 bits to enable 128 block values, whereas the * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. * Therefore, this conversion is lossy. * * Return: 32-bit physical block enable map. */ static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical( u64 lo, u64 hi) { u32 phys = 0; u64 dwords[2] = {lo, hi}; size_t dword_idx; for (dword_idx = 0; dword_idx < 2; dword_idx++) { const u64 dword = dwords[dword_idx]; u16 packed = 0; size_t hword_bit; for (hword_bit = 0; hword_bit < 16; hword_bit++) { const size_t dword_bit = hword_bit * 4; const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) | ((dword >> (dword_bit + 1)) & 0x1) | ((dword >> (dword_bit + 2)) & 0x1) | ((dword >> (dword_bit + 3)) & 0x1); packed |= (mask << hword_bit); } phys |= ((u32)packed) << (16 * dword_idx); } return phys; } /** * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical * block enable map to a * block enable map * abstraction. * @phys: Physical 32-bit block enable map * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction * will be stored. * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction * will be stored. */ static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( u32 phys, u64 *lo, u64 *hi) { u64 dwords[2] = {0, 0}; size_t dword_idx; for (dword_idx = 0; dword_idx < 2; dword_idx++) { const u16 packed = phys >> (16 * dword_idx); u64 dword = 0; size_t hword_bit; for (hword_bit = 0; hword_bit < 16; hword_bit++) { const size_t dword_bit = hword_bit * 4; const u64 mask = (packed >> (hword_bit)) & 0x1; dword |= mask << (dword_bit + 0); dword |= mask << (dword_bit + 1); dword |= mask << (dword_bit + 2); dword |= mask << (dword_bit + 3); } dwords[dword_idx] = dword; } *lo = dwords[0]; *hi = dwords[1]; } void kbase_hwcnt_gpu_enable_map_to_physical( struct kbase_hwcnt_physical_enable_map *dst, const struct kbase_hwcnt_enable_map *src) { const struct kbase_hwcnt_metadata *metadata; u64 fe_bm = 0; u64 shader_bm = 0; u64 tiler_bm = 0; u64 mmu_l2_bm = 0; size_t grp, blk, blk_inst; if (WARN_ON(!src) || WARN_ON(!dst)) return; metadata = src->metadata; kbase_hwcnt_metadata_for_each_block( metadata, grp, blk, blk_inst) { const u64 grp_type = kbase_hwcnt_metadata_group_type( metadata, grp); const u64 blk_type = kbase_hwcnt_metadata_block_type( metadata, grp, blk); const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( src, grp, blk, blk_inst); if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: /* Nothing to do in this case. */ break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: fe_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: tiler_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: shader_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: mmu_l2_bm |= *blk_map; break; default: WARN_ON(true); } } else { WARN_ON(true); } } dst->fe_bm = kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0); dst->shader_bm = kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); dst->tiler_bm = kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0); dst->mmu_l2_bm = kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); } void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src) { switch (src) { case KBASE_HWCNT_SET_PRIMARY: *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; break; case KBASE_HWCNT_SET_SECONDARY: *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; break; case KBASE_HWCNT_SET_TERTIARY: *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; break; default: WARN_ON(true); } } void kbase_hwcnt_gpu_enable_map_from_physical( struct kbase_hwcnt_enable_map *dst, const struct kbase_hwcnt_physical_enable_map *src) { const struct kbase_hwcnt_metadata *metadata; u64 ignored_hi; u64 fe_bm; u64 shader_bm; u64 tiler_bm; u64 mmu_l2_bm; size_t grp, blk, blk_inst; if (WARN_ON(!src) || WARN_ON(!dst)) return; metadata = dst->metadata; kbasep_hwcnt_backend_gpu_block_map_from_physical( src->fe_bm, &fe_bm, &ignored_hi); kbasep_hwcnt_backend_gpu_block_map_from_physical( src->shader_bm, &shader_bm, &ignored_hi); kbasep_hwcnt_backend_gpu_block_map_from_physical( src->tiler_bm, &tiler_bm, &ignored_hi); kbasep_hwcnt_backend_gpu_block_map_from_physical( src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi); kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { const u64 grp_type = kbase_hwcnt_metadata_group_type( metadata, grp); const u64 blk_type = kbase_hwcnt_metadata_block_type( metadata, grp, blk); u64 *blk_map = kbase_hwcnt_enable_map_block_instance( dst, grp, blk, blk_inst); if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: /* Nothing to do in this case. */ break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: *blk_map = fe_bm; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: *blk_map = tiler_bm; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: *blk_map = shader_bm; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: *blk_map = mmu_l2_bm; break; default: WARN_ON(true); } } else { WARN_ON(true); } } } void kbase_hwcnt_gpu_patch_dump_headers( struct kbase_hwcnt_dump_buffer *buf, const struct kbase_hwcnt_enable_map *enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata)) return; metadata = buf->metadata; kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance( buf, grp, blk, blk_inst); const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( enable_map, grp, blk, blk_inst); const u32 prfcnt_en = kbasep_hwcnt_backend_gpu_block_map_to_physical( blk_map[0], 0); if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; } else { WARN_ON(true); } } }