/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
/*
|
*
|
* (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
|
*
|
* This program is free software and is provided to you under the terms of the
|
* GNU General Public License version 2 as published by the Free Software
|
* Foundation, and any use by you of this program is subject to the terms
|
* of such GNU license.
|
*
|
* This program is distributed in the hope that it will be useful,
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* GNU General Public License for more details.
|
*
|
* You should have received a copy of the GNU General Public License
|
* along with this program; if not, you can access it online at
|
* http://www.gnu.org/licenses/gpl-2.0.html.
|
*
|
*/
|
|
#ifndef _KBASE_HWCNT_GPU_H_
|
#define _KBASE_HWCNT_GPU_H_
|
|
#include <linux/types.h>
|
|
struct kbase_device;
|
struct kbase_hwcnt_metadata;
|
struct kbase_hwcnt_enable_map;
|
struct kbase_hwcnt_dump_buffer;
|
|
#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
|
#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
|
#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
|
#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
|
(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \
|
KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
|
/** Index of the PRFCNT_EN header into a V5 counter block */
|
#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
|
|
/**
|
* enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
|
* identify metadata groups.
|
* @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
|
*/
|
enum kbase_hwcnt_gpu_group_type {
|
KBASE_HWCNT_GPU_GROUP_TYPE_V5,
|
};
|
|
/**
|
* enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
|
* used to identify metadata blocks.
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a
|
* counter set that a block
|
* doesn't support is used).
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager
|
* or CSF HW).
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job
|
* manager or CSF HW).
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job
|
* manager or CSF HW).
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block.
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block.
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block.
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block.
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block.
|
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
|
*/
|
enum kbase_hwcnt_gpu_v5_block_type {
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
|
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
|
};
|
|
/**
|
* enum kbase_hwcnt_set - GPU hardware counter sets
|
* @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters
|
* @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
|
* @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters
|
*/
|
enum kbase_hwcnt_set {
|
KBASE_HWCNT_SET_PRIMARY,
|
KBASE_HWCNT_SET_SECONDARY,
|
KBASE_HWCNT_SET_TERTIARY,
|
};
|
|
/**
|
* struct kbase_hwcnt_physical_enable_map - Representation of enable map
|
* directly used by GPU.
|
* @fe_bm: Front end (JM/CSHW) counters selection bitmask.
|
* @shader_bm: Shader counters selection bitmask.
|
* @tiler_bm: Tiler counters selection bitmask.
|
* @mmu_l2_bm: MMU_L2 counters selection bitmask.
|
*/
|
struct kbase_hwcnt_physical_enable_map {
|
u32 fe_bm;
|
u32 shader_bm;
|
u32 tiler_bm;
|
u32 mmu_l2_bm;
|
};
|
|
/*
|
* Values for Hardware Counter SET_SELECT value.
|
* Directly passed to HW.
|
*/
|
enum kbase_hwcnt_physical_set {
|
KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0,
|
KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1,
|
KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2,
|
};
|
|
/**
|
* struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
|
* @l2_count: L2 cache count.
|
* @core_mask: Shader core mask. May be sparse.
|
* @clk_cnt: Number of clock domains available.
|
* @prfcnt_values_per_block: Total entries (header + counters) of performance
|
* counter per block.
|
*/
|
struct kbase_hwcnt_gpu_info {
|
size_t l2_count;
|
u64 core_mask;
|
u8 clk_cnt;
|
size_t prfcnt_values_per_block;
|
};
|
|
/**
|
* struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the
|
* GPU.
|
* @num_l2_slices: Current number of L2 slices allocated to the GPU.
|
* @shader_present: Current shader present bitmap that is allocated to the GPU.
|
*
|
* For architectures with the max_config interface available from the Arbiter,
|
* the current resources allocated may change during runtime due to a
|
* re-partitioning (possible with partition manager). Thus, the HWC needs to be
|
* prepared to report any possible set of counters. For this reason the memory
|
* layout in the userspace is based on the maximum possible allocation. On the
|
* other hand, each partition has just the view of its currently allocated
|
* resources. Therefore, it is necessary to correctly map the dumped HWC values
|
* from the registers into this maximum memory layout so that it can be exposed
|
* to the userspace side correctly.
|
*
|
* For L2 cache just the number is enough once the allocated ones will be
|
* accumulated on the first L2 slots available in the destination buffer.
|
*
|
* For the correct mapping of the shader cores it is necessary to jump all the
|
* L2 cache slots in the destination buffer that are not allocated. But, it is
|
* not necessary to add any logic to map the shader cores bitmap into the memory
|
* layout because the shader_present allocated will always be a subset of the
|
* maximum shader_present. It is possible because:
|
* 1 - Partitions are made of slices and they are always ordered from the ones
|
* with more shader cores to the ones with less.
|
* 2 - The shader cores in a slice are always contiguous.
|
* 3 - A partition can only have a contiguous set of slices allocated to it.
|
* So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with
|
* 3 cores and 1 with 2 cores. The maximum possible shader_present would be:
|
* 0x0011|0111|0111|1111 -> note the order and that the shader cores are
|
* contiguous in any slice.
|
* Supposing that a partition takes the two slices in the middle, the current
|
* config shader_present for this partition would be:
|
* 0x0111|0111 -> note that this is a subset of the maximum above and the slices
|
* are contiguous.
|
* Therefore, by directly copying any subset of the maximum possible
|
* shader_present the mapping is already achieved.
|
*/
|
struct kbase_hwcnt_curr_config {
|
size_t num_l2_slices;
|
u64 shader_present;
|
};
|
|
/**
|
* kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
|
* JM GPUs.
|
* @info: Non-NULL pointer to info struct.
|
* @counter_set: The performance counter set used.
|
* @out_metadata: Non-NULL pointer to where created metadata is stored on
|
* success.
|
* @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
|
* buffer is stored on success.
|
*
|
* Return: 0 on success, else error code.
|
*/
|
int kbase_hwcnt_jm_metadata_create(
|
const struct kbase_hwcnt_gpu_info *info,
|
enum kbase_hwcnt_set counter_set,
|
const struct kbase_hwcnt_metadata **out_metadata,
|
size_t *out_dump_bytes);
|
|
/**
|
* kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
|
*
|
* @metadata: Pointer to metadata to destroy.
|
*/
|
void kbase_hwcnt_jm_metadata_destroy(
|
const struct kbase_hwcnt_metadata *metadata);
|
|
/**
|
* kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
|
* CSF GPUs.
|
* @info: Non-NULL pointer to info struct.
|
* @counter_set: The performance counter set used.
|
* @out_metadata: Non-NULL pointer to where created metadata is stored on
|
* success.
|
*
|
* Return: 0 on success, else error code.
|
*/
|
int kbase_hwcnt_csf_metadata_create(
|
const struct kbase_hwcnt_gpu_info *info,
|
enum kbase_hwcnt_set counter_set,
|
const struct kbase_hwcnt_metadata **out_metadata);
|
|
/**
|
* kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
|
* metadata.
|
* @metadata: Pointer to metadata to destroy.
|
*/
|
void kbase_hwcnt_csf_metadata_destroy(
|
const struct kbase_hwcnt_metadata *metadata);
|
|
/**
|
* kbase_hwcnt_gpu_metadata_create_truncate_64() - Create HWC metadata with HWC
|
* block entries truncated
|
* to 64.
|
*
|
* @dst_md: Non-NULL pointer to where created metadata is stored on success.
|
* @src_md: Non-NULL pointer to the HWC metadata used as the source to create
|
* dst_md.
|
*
|
* If the total block entries in src_md is 64, metadata dst_md returns NULL
|
* since no need to truncate.
|
* if the total block entries in src_md is 128, then a new metadata with block
|
* entries truncated to 64 will be created for dst_md, which keeps the interface
|
* to user clients backward compatible.
|
* If the total block entries in src_md is other values, function returns error
|
* since it's not supported.
|
*
|
* Return: 0 on success, else error code.
|
*/
|
int kbase_hwcnt_gpu_metadata_create_truncate_64(
|
const struct kbase_hwcnt_metadata **dst_md,
|
const struct kbase_hwcnt_metadata *src_md);
|
|
/**
|
* kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values from
|
* src to dst.
|
*
|
* @dst: Non-NULL pointer to dst dump buffer.
|
* @src: Non-NULL pointer to src dump buffer.
|
* @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
|
*
|
* After the operation, all non-enabled values (including padding bytes) will be
|
* zero.
|
*
|
* The dst and src have different metadata, and the dst metadata is narrower
|
* than src metadata.
|
*/
|
void kbase_hwcnt_dump_buffer_copy_strict_narrow(
|
struct kbase_hwcnt_dump_buffer *dst,
|
const struct kbase_hwcnt_dump_buffer *src,
|
const struct kbase_hwcnt_enable_map *dst_enable_map);
|
|
/**
|
* kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
|
* dump buffer in src into the dump buffer
|
* abstraction in dst.
|
* @dst: Non-NULL pointer to dst dump buffer.
|
* @src: Non-NULL pointer to src raw dump buffer, of same length
|
* as returned in out_dump_bytes parameter of
|
* kbase_hwcnt_jm_metadata_create.
|
* @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
|
* @pm_core_mask: PM state synchronized shaders core mask with the dump.
|
* @curr_config: Current allocated hardware resources to correctly map the
|
* src raw dump buffer to the dst dump buffer.
|
* @accumulate: True if counters in src should be accumulated into dst,
|
* rather than copied.
|
*
|
* The dst and dst_enable_map MUST have been created from the same metadata as
|
* returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
|
* the length of src.
|
*
|
* Return: 0 on success, else error code.
|
*/
|
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
|
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
const u64 pm_core_mask,
|
const struct kbase_hwcnt_curr_config *curr_config,
|
bool accumulate);
|
|
/**
|
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
|
* dump buffer in src into the dump buffer
|
* abstraction in dst.
|
* @dst: Non-NULL pointer to dst dump buffer.
|
* @src: Non-NULL pointer to src raw dump buffer, of same length
|
* as returned in out_dump_bytes parameter of
|
* kbase_hwcnt_csf_metadata_create.
|
* @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
|
* @accumulate: True if counters in src should be accumulated into dst,
|
* rather than copied.
|
*
|
* The dst and dst_enable_map MUST have been created from the same metadata as
|
* returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
|
* the length of src.
|
*
|
* Return: 0 on success, else error code.
|
*/
|
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src,
|
const struct kbase_hwcnt_enable_map *dst_enable_map,
|
bool accumulate);
|
|
/**
|
* kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
|
* into a physical enable map.
|
* @dst: Non-NULL pointer to dst physical enable map.
|
* @src: Non-NULL pointer to src enable map abstraction.
|
*
|
* The src must have been created from a metadata returned from a call to
|
* kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
|
*
|
* This is a lossy conversion, as the enable map abstraction has one bit per
|
* individual counter block value, but the physical enable map uses 1 bit for
|
* every 4 counters, shared over all instances of a block.
|
*/
|
void kbase_hwcnt_gpu_enable_map_to_physical(
|
struct kbase_hwcnt_physical_enable_map *dst,
|
const struct kbase_hwcnt_enable_map *src);
|
|
/**
|
* kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
|
* SET_SELECT value.
|
*
|
* @dst: Non-NULL pointer to dst physical SET_SELECT value.
|
* @src: Non-NULL pointer to src counter set selection.
|
*/
|
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
|
enum kbase_hwcnt_set src);
|
|
/**
|
* kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
|
* an enable map abstraction.
|
* @dst: Non-NULL pointer to dst enable map abstraction.
|
* @src: Non-NULL pointer to src physical enable map.
|
*
|
* The dst must have been created from a metadata returned from a call to
|
* kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
|
*
|
* This is a lossy conversion, as the physical enable map can technically
|
* support counter blocks with 128 counters each, but no hardware actually uses
|
* more than 64, so the enable map abstraction has nowhere to store the enable
|
* information for the 64 non-existent counters.
|
*/
|
void kbase_hwcnt_gpu_enable_map_from_physical(
|
struct kbase_hwcnt_enable_map *dst,
|
const struct kbase_hwcnt_physical_enable_map *src);
|
|
/**
|
* kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
|
* enable headers in a dump buffer to
|
* reflect the specified enable map.
|
* @buf: Non-NULL pointer to dump buffer to patch.
|
* @enable_map: Non-NULL pointer to enable map.
|
*
|
* The buf and enable_map must have been created from a metadata returned from
|
* a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
|
*
|
* This function should be used before handing off a dump buffer over the
|
* kernel-user boundary, to ensure the header is accurate for the enable map
|
* used by the user.
|
*/
|
void kbase_hwcnt_gpu_patch_dump_headers(
|
struct kbase_hwcnt_dump_buffer *buf,
|
const struct kbase_hwcnt_enable_map *enable_map);
|
|
#endif /* _KBASE_HWCNT_GPU_H_ */
|