| .. | .. |
|---|
| 19 | 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|---|
| 20 | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
|---|
| 21 | 21 | */ |
|---|
| 22 | | - |
|---|
| 23 | | -#define pr_fmt(fmt) "kfd2kgd: " fmt |
|---|
| 24 | | - |
|---|
| 25 | | -#include <linux/module.h> |
|---|
| 26 | | -#include <linux/fdtable.h> |
|---|
| 27 | | -#include <linux/uaccess.h> |
|---|
| 28 | | -#include <linux/firmware.h> |
|---|
| 29 | | -#include <drm/drmP.h> |
|---|
| 30 | 22 | #include "amdgpu.h" |
|---|
| 31 | 23 | #include "amdgpu_amdkfd.h" |
|---|
| 32 | | -#include "amdgpu_ucode.h" |
|---|
| 33 | | -#include "soc15_hw_ip.h" |
|---|
| 34 | 24 | #include "gc/gc_9_0_offset.h" |
|---|
| 35 | 25 | #include "gc/gc_9_0_sh_mask.h" |
|---|
| 36 | 26 | #include "vega10_enum.h" |
|---|
| .. | .. |
|---|
| 46 | 36 | #include "v9_structs.h" |
|---|
| 47 | 37 | #include "soc15.h" |
|---|
| 48 | 38 | #include "soc15d.h" |
|---|
| 49 | | - |
|---|
| 50 | | -/* HACK: MMHUB and GC both have VM-related register with the same |
|---|
| 51 | | - * names but different offsets. Define the MMHUB register we need here |
|---|
| 52 | | - * with a prefix. A proper solution would be to move the functions |
|---|
| 53 | | - * programming these registers into gfx_v9_0.c and mmhub_v1_0.c |
|---|
| 54 | | - * respectively. |
|---|
| 55 | | - */ |
|---|
| 56 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 |
|---|
| 57 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 |
|---|
| 58 | | - |
|---|
| 59 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 |
|---|
| 60 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 |
|---|
| 61 | | - |
|---|
| 62 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b |
|---|
| 63 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 |
|---|
| 64 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c |
|---|
| 65 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 |
|---|
| 66 | | - |
|---|
| 67 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b |
|---|
| 68 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 |
|---|
| 69 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c |
|---|
| 70 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 |
|---|
| 71 | | - |
|---|
| 72 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b |
|---|
| 73 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 |
|---|
| 74 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c |
|---|
| 75 | | -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 |
|---|
| 76 | | - |
|---|
| 77 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 |
|---|
| 78 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 |
|---|
| 79 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 |
|---|
| 80 | | -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 |
|---|
| 81 | | - |
|---|
| 82 | | -#define V9_PIPE_PER_MEC (4) |
|---|
| 83 | | -#define V9_QUEUES_PER_PIPE_MEC (8) |
|---|
| 39 | +#include "gfx_v9_0.h" |
|---|
| 84 | 40 | |
|---|
| 85 | 41 | enum hqd_dequeue_request_type { |
|---|
| 86 | 42 | NO_ACTION = 0, |
|---|
| 87 | 43 | DRAIN_PIPE, |
|---|
| 88 | 44 | RESET_WAVES |
|---|
| 89 | 45 | }; |
|---|
| 90 | | - |
|---|
| 91 | | -/* |
|---|
| 92 | | - * Register access functions |
|---|
| 93 | | - */ |
|---|
| 94 | | - |
|---|
| 95 | | -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, |
|---|
| 96 | | - uint32_t sh_mem_config, |
|---|
| 97 | | - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, |
|---|
| 98 | | - uint32_t sh_mem_bases); |
|---|
| 99 | | -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, |
|---|
| 100 | | - unsigned int vmid); |
|---|
| 101 | | -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); |
|---|
| 102 | | -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
|---|
| 103 | | - uint32_t queue_id, uint32_t __user *wptr, |
|---|
| 104 | | - uint32_t wptr_shift, uint32_t wptr_mask, |
|---|
| 105 | | - struct mm_struct *mm); |
|---|
| 106 | | -static int kgd_hqd_dump(struct kgd_dev *kgd, |
|---|
| 107 | | - uint32_t pipe_id, uint32_t queue_id, |
|---|
| 108 | | - uint32_t (**dump)[2], uint32_t *n_regs); |
|---|
| 109 | | -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, |
|---|
| 110 | | - uint32_t __user *wptr, struct mm_struct *mm); |
|---|
| 111 | | -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, |
|---|
| 112 | | - uint32_t engine_id, uint32_t queue_id, |
|---|
| 113 | | - uint32_t (**dump)[2], uint32_t *n_regs); |
|---|
| 114 | | -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
|---|
| 115 | | - uint32_t pipe_id, uint32_t queue_id); |
|---|
| 116 | | -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); |
|---|
| 117 | | -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
|---|
| 118 | | - enum kfd_preempt_type reset_type, |
|---|
| 119 | | - unsigned int utimeout, uint32_t pipe_id, |
|---|
| 120 | | - uint32_t queue_id); |
|---|
| 121 | | -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, |
|---|
| 122 | | - unsigned int utimeout); |
|---|
| 123 | | -static int kgd_address_watch_disable(struct kgd_dev *kgd); |
|---|
| 124 | | -static int kgd_address_watch_execute(struct kgd_dev *kgd, |
|---|
| 125 | | - unsigned int watch_point_id, |
|---|
| 126 | | - uint32_t cntl_val, |
|---|
| 127 | | - uint32_t addr_hi, |
|---|
| 128 | | - uint32_t addr_lo); |
|---|
| 129 | | -static int kgd_wave_control_execute(struct kgd_dev *kgd, |
|---|
| 130 | | - uint32_t gfx_index_val, |
|---|
| 131 | | - uint32_t sq_cmd); |
|---|
| 132 | | -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, |
|---|
| 133 | | - unsigned int watch_point_id, |
|---|
| 134 | | - unsigned int reg_offset); |
|---|
| 135 | | - |
|---|
| 136 | | -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, |
|---|
| 137 | | - uint8_t vmid); |
|---|
| 138 | | -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, |
|---|
| 139 | | - uint8_t vmid); |
|---|
| 140 | | -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, |
|---|
| 141 | | - uint32_t page_table_base); |
|---|
| 142 | | -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); |
|---|
| 143 | | -static void set_scratch_backing_va(struct kgd_dev *kgd, |
|---|
| 144 | | - uint64_t va, uint32_t vmid); |
|---|
| 145 | | -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); |
|---|
| 146 | | -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); |
|---|
| 147 | | - |
|---|
| 148 | | -/* Because of REG_GET_FIELD() being used, we put this function in the |
|---|
| 149 | | - * asic specific file. |
|---|
| 150 | | - */ |
|---|
| 151 | | -static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, |
|---|
| 152 | | - struct tile_config *config) |
|---|
| 153 | | -{ |
|---|
| 154 | | - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 155 | | - |
|---|
| 156 | | - config->gb_addr_config = adev->gfx.config.gb_addr_config; |
|---|
| 157 | | - |
|---|
| 158 | | - config->tile_config_ptr = adev->gfx.config.tile_mode_array; |
|---|
| 159 | | - config->num_tile_configs = |
|---|
| 160 | | - ARRAY_SIZE(adev->gfx.config.tile_mode_array); |
|---|
| 161 | | - config->macro_tile_config_ptr = |
|---|
| 162 | | - adev->gfx.config.macrotile_mode_array; |
|---|
| 163 | | - config->num_macro_tile_configs = |
|---|
| 164 | | - ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); |
|---|
| 165 | | - |
|---|
| 166 | | - return 0; |
|---|
| 167 | | -} |
|---|
| 168 | | - |
|---|
| 169 | | -static const struct kfd2kgd_calls kfd2kgd = { |
|---|
| 170 | | - .init_gtt_mem_allocation = alloc_gtt_mem, |
|---|
| 171 | | - .free_gtt_mem = free_gtt_mem, |
|---|
| 172 | | - .get_local_mem_info = get_local_mem_info, |
|---|
| 173 | | - .get_gpu_clock_counter = get_gpu_clock_counter, |
|---|
| 174 | | - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, |
|---|
| 175 | | - .alloc_pasid = amdgpu_pasid_alloc, |
|---|
| 176 | | - .free_pasid = amdgpu_pasid_free, |
|---|
| 177 | | - .program_sh_mem_settings = kgd_program_sh_mem_settings, |
|---|
| 178 | | - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
|---|
| 179 | | - .init_interrupts = kgd_init_interrupts, |
|---|
| 180 | | - .hqd_load = kgd_hqd_load, |
|---|
| 181 | | - .hqd_sdma_load = kgd_hqd_sdma_load, |
|---|
| 182 | | - .hqd_dump = kgd_hqd_dump, |
|---|
| 183 | | - .hqd_sdma_dump = kgd_hqd_sdma_dump, |
|---|
| 184 | | - .hqd_is_occupied = kgd_hqd_is_occupied, |
|---|
| 185 | | - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, |
|---|
| 186 | | - .hqd_destroy = kgd_hqd_destroy, |
|---|
| 187 | | - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, |
|---|
| 188 | | - .address_watch_disable = kgd_address_watch_disable, |
|---|
| 189 | | - .address_watch_execute = kgd_address_watch_execute, |
|---|
| 190 | | - .wave_control_execute = kgd_wave_control_execute, |
|---|
| 191 | | - .address_watch_get_offset = kgd_address_watch_get_offset, |
|---|
| 192 | | - .get_atc_vmid_pasid_mapping_pasid = |
|---|
| 193 | | - get_atc_vmid_pasid_mapping_pasid, |
|---|
| 194 | | - .get_atc_vmid_pasid_mapping_valid = |
|---|
| 195 | | - get_atc_vmid_pasid_mapping_valid, |
|---|
| 196 | | - .get_fw_version = get_fw_version, |
|---|
| 197 | | - .set_scratch_backing_va = set_scratch_backing_va, |
|---|
| 198 | | - .get_tile_config = amdgpu_amdkfd_get_tile_config, |
|---|
| 199 | | - .get_cu_info = get_cu_info, |
|---|
| 200 | | - .get_vram_usage = amdgpu_amdkfd_get_vram_usage, |
|---|
| 201 | | - .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, |
|---|
| 202 | | - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, |
|---|
| 203 | | - .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, |
|---|
| 204 | | - .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, |
|---|
| 205 | | - .set_vm_context_page_table_base = set_vm_context_page_table_base, |
|---|
| 206 | | - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, |
|---|
| 207 | | - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, |
|---|
| 208 | | - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, |
|---|
| 209 | | - .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, |
|---|
| 210 | | - .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, |
|---|
| 211 | | - .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, |
|---|
| 212 | | - .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, |
|---|
| 213 | | - .invalidate_tlbs = invalidate_tlbs, |
|---|
| 214 | | - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, |
|---|
| 215 | | - .submit_ib = amdgpu_amdkfd_submit_ib, |
|---|
| 216 | | - .gpu_recover = amdgpu_amdkfd_gpu_reset, |
|---|
| 217 | | - .set_compute_idle = amdgpu_amdkfd_set_compute_idle |
|---|
| 218 | | -}; |
|---|
| 219 | | - |
|---|
| 220 | | -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) |
|---|
| 221 | | -{ |
|---|
| 222 | | - return (struct kfd2kgd_calls *)&kfd2kgd; |
|---|
| 223 | | -} |
|---|
| 224 | 46 | |
|---|
| 225 | 47 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) |
|---|
| 226 | 48 | { |
|---|
| .. | .. |
|---|
| 255 | 77 | lock_srbm(kgd, mec, pipe, queue_id, 0); |
|---|
| 256 | 78 | } |
|---|
| 257 | 79 | |
|---|
| 258 | | -static uint32_t get_queue_mask(struct amdgpu_device *adev, |
|---|
| 80 | +static uint64_t get_queue_mask(struct amdgpu_device *adev, |
|---|
| 259 | 81 | uint32_t pipe_id, uint32_t queue_id) |
|---|
| 260 | 82 | { |
|---|
| 261 | | - unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + |
|---|
| 262 | | - queue_id) & 31; |
|---|
| 83 | + unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + |
|---|
| 84 | + queue_id; |
|---|
| 263 | 85 | |
|---|
| 264 | | - return ((uint32_t)1) << bit; |
|---|
| 86 | + return 1ull << bit; |
|---|
| 265 | 87 | } |
|---|
| 266 | 88 | |
|---|
| 267 | 89 | static void release_queue(struct kgd_dev *kgd) |
|---|
| .. | .. |
|---|
| 269 | 91 | unlock_srbm(kgd); |
|---|
| 270 | 92 | } |
|---|
| 271 | 93 | |
|---|
| 272 | | -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, |
|---|
| 94 | +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, |
|---|
| 273 | 95 | uint32_t sh_mem_config, |
|---|
| 274 | 96 | uint32_t sh_mem_ape1_base, |
|---|
| 275 | 97 | uint32_t sh_mem_ape1_limit, |
|---|
| .. | .. |
|---|
| 279 | 101 | |
|---|
| 280 | 102 | lock_srbm(kgd, 0, 0, 0, vmid); |
|---|
| 281 | 103 | |
|---|
| 282 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); |
|---|
| 283 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); |
|---|
| 104 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); |
|---|
| 105 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); |
|---|
| 284 | 106 | /* APE1 no longer exists on GFX9 */ |
|---|
| 285 | 107 | |
|---|
| 286 | 108 | unlock_srbm(kgd); |
|---|
| 287 | 109 | } |
|---|
| 288 | 110 | |
|---|
| 289 | | -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, |
|---|
| 111 | +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, |
|---|
| 290 | 112 | unsigned int vmid) |
|---|
| 291 | 113 | { |
|---|
| 292 | 114 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| .. | .. |
|---|
| 347 | 169 | * but still works |
|---|
| 348 | 170 | */ |
|---|
| 349 | 171 | |
|---|
| 350 | | -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) |
|---|
| 172 | +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) |
|---|
| 351 | 173 | { |
|---|
| 352 | 174 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| 353 | 175 | uint32_t mec; |
|---|
| .. | .. |
|---|
| 367 | 189 | return 0; |
|---|
| 368 | 190 | } |
|---|
| 369 | 191 | |
|---|
| 370 | | -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, |
|---|
| 192 | +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, |
|---|
| 371 | 193 | unsigned int engine_id, |
|---|
| 372 | 194 | unsigned int queue_id) |
|---|
| 373 | 195 | { |
|---|
| 374 | | - uint32_t base[2] = { |
|---|
| 375 | | - SOC15_REG_OFFSET(SDMA0, 0, |
|---|
| 376 | | - mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, |
|---|
| 377 | | - SOC15_REG_OFFSET(SDMA1, 0, |
|---|
| 378 | | - mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL |
|---|
| 379 | | - }; |
|---|
| 380 | | - uint32_t retval; |
|---|
| 196 | + uint32_t sdma_engine_reg_base = 0; |
|---|
| 197 | + uint32_t sdma_rlc_reg_offset; |
|---|
| 381 | 198 | |
|---|
| 382 | | - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - |
|---|
| 383 | | - mmSDMA0_RLC0_RB_CNTL); |
|---|
| 199 | + switch (engine_id) { |
|---|
| 200 | + default: |
|---|
| 201 | + dev_warn(adev->dev, |
|---|
| 202 | + "Invalid sdma engine id (%d), using engine id 0\n", |
|---|
| 203 | + engine_id); |
|---|
| 204 | + fallthrough; |
|---|
| 205 | + case 0: |
|---|
| 206 | + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, |
|---|
| 207 | + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; |
|---|
| 208 | + break; |
|---|
| 209 | + case 1: |
|---|
| 210 | + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, |
|---|
| 211 | + mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; |
|---|
| 212 | + break; |
|---|
| 213 | + } |
|---|
| 384 | 214 | |
|---|
| 385 | | - pr_debug("sdma base address: 0x%x\n", retval); |
|---|
| 215 | + sdma_rlc_reg_offset = sdma_engine_reg_base |
|---|
| 216 | + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); |
|---|
| 386 | 217 | |
|---|
| 387 | | - return retval; |
|---|
| 218 | + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, |
|---|
| 219 | + queue_id, sdma_rlc_reg_offset); |
|---|
| 220 | + |
|---|
| 221 | + return sdma_rlc_reg_offset; |
|---|
| 388 | 222 | } |
|---|
| 389 | 223 | |
|---|
| 390 | 224 | static inline struct v9_mqd *get_mqd(void *mqd) |
|---|
| .. | .. |
|---|
| 397 | 231 | return (struct v9_sdma_mqd *)mqd; |
|---|
| 398 | 232 | } |
|---|
| 399 | 233 | |
|---|
| 400 | | -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
|---|
| 234 | +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
|---|
| 401 | 235 | uint32_t queue_id, uint32_t __user *wptr, |
|---|
| 402 | 236 | uint32_t wptr_shift, uint32_t wptr_mask, |
|---|
| 403 | 237 | struct mm_struct *mm) |
|---|
| .. | .. |
|---|
| 411 | 245 | |
|---|
| 412 | 246 | acquire_queue(kgd, pipe_id, queue_id); |
|---|
| 413 | 247 | |
|---|
| 414 | | - /* HIQ is set during driver init period with vmid set to 0*/ |
|---|
| 415 | | - if (m->cp_hqd_vmid == 0) { |
|---|
| 416 | | - uint32_t value, mec, pipe; |
|---|
| 417 | | - |
|---|
| 418 | | - mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
|---|
| 419 | | - pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
|---|
| 420 | | - |
|---|
| 421 | | - pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", |
|---|
| 422 | | - mec, pipe, queue_id); |
|---|
| 423 | | - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); |
|---|
| 424 | | - value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, |
|---|
| 425 | | - ((mec << 5) | (pipe << 3) | queue_id | 0x80)); |
|---|
| 426 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); |
|---|
| 427 | | - } |
|---|
| 428 | | - |
|---|
| 429 | 248 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ |
|---|
| 430 | 249 | mqd_hqd = &m->cp_mqd_base_addr_lo; |
|---|
| 431 | 250 | hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); |
|---|
| 432 | 251 | |
|---|
| 433 | 252 | for (reg = hqd_base; |
|---|
| 434 | 253 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) |
|---|
| 435 | | - WREG32(reg, mqd_hqd[reg - hqd_base]); |
|---|
| 254 | + WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); |
|---|
| 436 | 255 | |
|---|
| 437 | 256 | |
|---|
| 438 | 257 | /* Activate doorbell logic before triggering WPTR poll. */ |
|---|
| 439 | 258 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, |
|---|
| 440 | 259 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); |
|---|
| 441 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); |
|---|
| 260 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); |
|---|
| 442 | 261 | |
|---|
| 443 | 262 | if (wptr) { |
|---|
| 444 | 263 | /* Don't read wptr with get_user because the user |
|---|
| .. | .. |
|---|
| 467 | 286 | guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); |
|---|
| 468 | 287 | guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; |
|---|
| 469 | 288 | |
|---|
| 470 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), |
|---|
| 289 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), |
|---|
| 471 | 290 | lower_32_bits(guessed_wptr)); |
|---|
| 472 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), |
|---|
| 291 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), |
|---|
| 473 | 292 | upper_32_bits(guessed_wptr)); |
|---|
| 474 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), |
|---|
| 293 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), |
|---|
| 475 | 294 | lower_32_bits((uintptr_t)wptr)); |
|---|
| 476 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), |
|---|
| 295 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), |
|---|
| 477 | 296 | upper_32_bits((uintptr_t)wptr)); |
|---|
| 478 | 297 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), |
|---|
| 479 | | - get_queue_mask(adev, pipe_id, queue_id)); |
|---|
| 298 | + (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); |
|---|
| 480 | 299 | } |
|---|
| 481 | 300 | |
|---|
| 482 | 301 | /* Start the EOP fetcher */ |
|---|
| 483 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), |
|---|
| 302 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), |
|---|
| 484 | 303 | REG_SET_FIELD(m->cp_hqd_eop_rptr, |
|---|
| 485 | 304 | CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); |
|---|
| 486 | 305 | |
|---|
| 487 | 306 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); |
|---|
| 488 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); |
|---|
| 307 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); |
|---|
| 489 | 308 | |
|---|
| 490 | 309 | release_queue(kgd); |
|---|
| 491 | 310 | |
|---|
| 492 | 311 | return 0; |
|---|
| 493 | 312 | } |
|---|
| 494 | 313 | |
|---|
| 495 | | -static int kgd_hqd_dump(struct kgd_dev *kgd, |
|---|
| 314 | +int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, |
|---|
| 315 | + uint32_t pipe_id, uint32_t queue_id, |
|---|
| 316 | + uint32_t doorbell_off) |
|---|
| 317 | +{ |
|---|
| 318 | + struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| 319 | + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; |
|---|
| 320 | + struct v9_mqd *m; |
|---|
| 321 | + uint32_t mec, pipe; |
|---|
| 322 | + int r; |
|---|
| 323 | + |
|---|
| 324 | + m = get_mqd(mqd); |
|---|
| 325 | + |
|---|
| 326 | + acquire_queue(kgd, pipe_id, queue_id); |
|---|
| 327 | + |
|---|
| 328 | + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; |
|---|
| 329 | + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); |
|---|
| 330 | + |
|---|
| 331 | + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", |
|---|
| 332 | + mec, pipe, queue_id); |
|---|
| 333 | + |
|---|
| 334 | + spin_lock(&adev->gfx.kiq.ring_lock); |
|---|
| 335 | + r = amdgpu_ring_alloc(kiq_ring, 7); |
|---|
| 336 | + if (r) { |
|---|
| 337 | + pr_err("Failed to alloc KIQ (%d).\n", r); |
|---|
| 338 | + goto out_unlock; |
|---|
| 339 | + } |
|---|
| 340 | + |
|---|
| 341 | + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); |
|---|
| 342 | + amdgpu_ring_write(kiq_ring, |
|---|
| 343 | + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ |
|---|
| 344 | + PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ |
|---|
| 345 | + PACKET3_MAP_QUEUES_QUEUE(queue_id) | |
|---|
| 346 | + PACKET3_MAP_QUEUES_PIPE(pipe) | |
|---|
| 347 | + PACKET3_MAP_QUEUES_ME((mec - 1)) | |
|---|
| 348 | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ |
|---|
| 349 | + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ |
|---|
| 350 | + PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ |
|---|
| 351 | + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ |
|---|
| 352 | + amdgpu_ring_write(kiq_ring, |
|---|
| 353 | + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); |
|---|
| 354 | + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); |
|---|
| 355 | + amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); |
|---|
| 356 | + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); |
|---|
| 357 | + amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); |
|---|
| 358 | + amdgpu_ring_commit(kiq_ring); |
|---|
| 359 | + |
|---|
| 360 | +out_unlock: |
|---|
| 361 | + spin_unlock(&adev->gfx.kiq.ring_lock); |
|---|
| 362 | + release_queue(kgd); |
|---|
| 363 | + |
|---|
| 364 | + return r; |
|---|
| 365 | +} |
|---|
| 366 | + |
|---|
| 367 | +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, |
|---|
| 496 | 368 | uint32_t pipe_id, uint32_t queue_id, |
|---|
| 497 | 369 | uint32_t (**dump)[2], uint32_t *n_regs) |
|---|
| 498 | 370 | { |
|---|
| .. | .. |
|---|
| 529 | 401 | { |
|---|
| 530 | 402 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| 531 | 403 | struct v9_sdma_mqd *m; |
|---|
| 532 | | - uint32_t sdma_base_addr, sdmax_gfx_context_cntl; |
|---|
| 404 | + uint32_t sdma_rlc_reg_offset; |
|---|
| 533 | 405 | unsigned long end_jiffies; |
|---|
| 534 | 406 | uint32_t data; |
|---|
| 535 | 407 | uint64_t data64; |
|---|
| 536 | 408 | uint64_t __user *wptr64 = (uint64_t __user *)wptr; |
|---|
| 537 | 409 | |
|---|
| 538 | 410 | m = get_sdma_mqd(mqd); |
|---|
| 539 | | - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, |
|---|
| 411 | + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, |
|---|
| 540 | 412 | m->sdma_queue_id); |
|---|
| 541 | | - sdmax_gfx_context_cntl = m->sdma_engine_id ? |
|---|
| 542 | | - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : |
|---|
| 543 | | - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); |
|---|
| 544 | 413 | |
|---|
| 545 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, |
|---|
| 414 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, |
|---|
| 546 | 415 | m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); |
|---|
| 547 | 416 | |
|---|
| 548 | 417 | end_jiffies = msecs_to_jiffies(2000) + jiffies; |
|---|
| 549 | 418 | while (true) { |
|---|
| 550 | | - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); |
|---|
| 419 | + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); |
|---|
| 551 | 420 | if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) |
|---|
| 552 | 421 | break; |
|---|
| 553 | | - if (time_after(jiffies, end_jiffies)) |
|---|
| 422 | + if (time_after(jiffies, end_jiffies)) { |
|---|
| 423 | + pr_err("SDMA RLC not idle in %s\n", __func__); |
|---|
| 554 | 424 | return -ETIME; |
|---|
| 425 | + } |
|---|
| 555 | 426 | usleep_range(500, 1000); |
|---|
| 556 | 427 | } |
|---|
| 557 | | - data = RREG32(sdmax_gfx_context_cntl); |
|---|
| 558 | | - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, |
|---|
| 559 | | - RESUME_CTX, 0); |
|---|
| 560 | | - WREG32(sdmax_gfx_context_cntl, data); |
|---|
| 561 | 428 | |
|---|
| 562 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, |
|---|
| 429 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, |
|---|
| 563 | 430 | m->sdmax_rlcx_doorbell_offset); |
|---|
| 564 | 431 | |
|---|
| 565 | 432 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, |
|---|
| 566 | 433 | ENABLE, 1); |
|---|
| 567 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); |
|---|
| 568 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); |
|---|
| 569 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, |
|---|
| 434 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); |
|---|
| 435 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, |
|---|
| 436 | + m->sdmax_rlcx_rb_rptr); |
|---|
| 437 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, |
|---|
| 570 | 438 | m->sdmax_rlcx_rb_rptr_hi); |
|---|
| 571 | 439 | |
|---|
| 572 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); |
|---|
| 440 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); |
|---|
| 573 | 441 | if (read_user_wptr(mm, wptr64, data64)) { |
|---|
| 574 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, |
|---|
| 442 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, |
|---|
| 575 | 443 | lower_32_bits(data64)); |
|---|
| 576 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, |
|---|
| 444 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, |
|---|
| 577 | 445 | upper_32_bits(data64)); |
|---|
| 578 | 446 | } else { |
|---|
| 579 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, |
|---|
| 447 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, |
|---|
| 580 | 448 | m->sdmax_rlcx_rb_rptr); |
|---|
| 581 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, |
|---|
| 449 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, |
|---|
| 582 | 450 | m->sdmax_rlcx_rb_rptr_hi); |
|---|
| 583 | 451 | } |
|---|
| 584 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); |
|---|
| 452 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); |
|---|
| 585 | 453 | |
|---|
| 586 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); |
|---|
| 587 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, |
|---|
| 454 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); |
|---|
| 455 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, |
|---|
| 588 | 456 | m->sdmax_rlcx_rb_base_hi); |
|---|
| 589 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, |
|---|
| 457 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, |
|---|
| 590 | 458 | m->sdmax_rlcx_rb_rptr_addr_lo); |
|---|
| 591 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, |
|---|
| 459 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, |
|---|
| 592 | 460 | m->sdmax_rlcx_rb_rptr_addr_hi); |
|---|
| 593 | 461 | |
|---|
| 594 | 462 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, |
|---|
| 595 | 463 | RB_ENABLE, 1); |
|---|
| 596 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); |
|---|
| 464 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); |
|---|
| 597 | 465 | |
|---|
| 598 | 466 | return 0; |
|---|
| 599 | 467 | } |
|---|
| .. | .. |
|---|
| 603 | 471 | uint32_t (**dump)[2], uint32_t *n_regs) |
|---|
| 604 | 472 | { |
|---|
| 605 | 473 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| 606 | | - uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); |
|---|
| 474 | + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, |
|---|
| 475 | + engine_id, queue_id); |
|---|
| 607 | 476 | uint32_t i = 0, reg; |
|---|
| 608 | 477 | #undef HQD_N_REGS |
|---|
| 609 | 478 | #define HQD_N_REGS (19+6+7+10) |
|---|
| .. | .. |
|---|
| 613 | 482 | return -ENOMEM; |
|---|
| 614 | 483 | |
|---|
| 615 | 484 | for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) |
|---|
| 616 | | - DUMP_REG(sdma_base_addr + reg); |
|---|
| 485 | + DUMP_REG(sdma_rlc_reg_offset + reg); |
|---|
| 617 | 486 | for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) |
|---|
| 618 | | - DUMP_REG(sdma_base_addr + reg); |
|---|
| 487 | + DUMP_REG(sdma_rlc_reg_offset + reg); |
|---|
| 619 | 488 | for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; |
|---|
| 620 | 489 | reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) |
|---|
| 621 | | - DUMP_REG(sdma_base_addr + reg); |
|---|
| 490 | + DUMP_REG(sdma_rlc_reg_offset + reg); |
|---|
| 622 | 491 | for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; |
|---|
| 623 | 492 | reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) |
|---|
| 624 | | - DUMP_REG(sdma_base_addr + reg); |
|---|
| 493 | + DUMP_REG(sdma_rlc_reg_offset + reg); |
|---|
| 625 | 494 | |
|---|
| 626 | 495 | WARN_ON_ONCE(i != HQD_N_REGS); |
|---|
| 627 | 496 | *n_regs = i; |
|---|
| .. | .. |
|---|
| 629 | 498 | return 0; |
|---|
| 630 | 499 | } |
|---|
| 631 | 500 | |
|---|
| 632 | | -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
|---|
| 501 | +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, |
|---|
| 633 | 502 | uint32_t pipe_id, uint32_t queue_id) |
|---|
| 634 | 503 | { |
|---|
| 635 | 504 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| .. | .. |
|---|
| 655 | 524 | { |
|---|
| 656 | 525 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| 657 | 526 | struct v9_sdma_mqd *m; |
|---|
| 658 | | - uint32_t sdma_base_addr; |
|---|
| 527 | + uint32_t sdma_rlc_reg_offset; |
|---|
| 659 | 528 | uint32_t sdma_rlc_rb_cntl; |
|---|
| 660 | 529 | |
|---|
| 661 | 530 | m = get_sdma_mqd(mqd); |
|---|
| 662 | | - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, |
|---|
| 531 | + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, |
|---|
| 663 | 532 | m->sdma_queue_id); |
|---|
| 664 | 533 | |
|---|
| 665 | | - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); |
|---|
| 534 | + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); |
|---|
| 666 | 535 | |
|---|
| 667 | 536 | if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) |
|---|
| 668 | 537 | return true; |
|---|
| .. | .. |
|---|
| 670 | 539 | return false; |
|---|
| 671 | 540 | } |
|---|
| 672 | 541 | |
|---|
| 673 | | -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
|---|
| 542 | +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, |
|---|
| 674 | 543 | enum kfd_preempt_type reset_type, |
|---|
| 675 | 544 | unsigned int utimeout, uint32_t pipe_id, |
|---|
| 676 | 545 | uint32_t queue_id) |
|---|
| .. | .. |
|---|
| 681 | 550 | uint32_t temp; |
|---|
| 682 | 551 | struct v9_mqd *m = get_mqd(mqd); |
|---|
| 683 | 552 | |
|---|
| 684 | | - if (adev->in_gpu_reset) |
|---|
| 553 | + if (amdgpu_in_reset(adev)) |
|---|
| 685 | 554 | return -EIO; |
|---|
| 686 | 555 | |
|---|
| 687 | 556 | acquire_queue(kgd, pipe_id, queue_id); |
|---|
| 688 | 557 | |
|---|
| 689 | 558 | if (m->cp_hqd_vmid == 0) |
|---|
| 690 | | - WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); |
|---|
| 559 | + WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); |
|---|
| 691 | 560 | |
|---|
| 692 | 561 | switch (reset_type) { |
|---|
| 693 | 562 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: |
|---|
| .. | .. |
|---|
| 701 | 570 | break; |
|---|
| 702 | 571 | } |
|---|
| 703 | 572 | |
|---|
| 704 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); |
|---|
| 573 | + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); |
|---|
| 705 | 574 | |
|---|
| 706 | 575 | end_jiffies = (utimeout * HZ / 1000) + jiffies; |
|---|
| 707 | 576 | while (true) { |
|---|
| .. | .. |
|---|
| 725 | 594 | { |
|---|
| 726 | 595 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| 727 | 596 | struct v9_sdma_mqd *m; |
|---|
| 728 | | - uint32_t sdma_base_addr; |
|---|
| 597 | + uint32_t sdma_rlc_reg_offset; |
|---|
| 729 | 598 | uint32_t temp; |
|---|
| 730 | 599 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; |
|---|
| 731 | 600 | |
|---|
| 732 | 601 | m = get_sdma_mqd(mqd); |
|---|
| 733 | | - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, |
|---|
| 602 | + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, |
|---|
| 734 | 603 | m->sdma_queue_id); |
|---|
| 735 | 604 | |
|---|
| 736 | | - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); |
|---|
| 605 | + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); |
|---|
| 737 | 606 | temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; |
|---|
| 738 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); |
|---|
| 607 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); |
|---|
| 739 | 608 | |
|---|
| 740 | 609 | while (true) { |
|---|
| 741 | | - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); |
|---|
| 610 | + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); |
|---|
| 742 | 611 | if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) |
|---|
| 743 | 612 | break; |
|---|
| 744 | | - if (time_after(jiffies, end_jiffies)) |
|---|
| 613 | + if (time_after(jiffies, end_jiffies)) { |
|---|
| 614 | + pr_err("SDMA RLC not idle in %s\n", __func__); |
|---|
| 745 | 615 | return -ETIME; |
|---|
| 616 | + } |
|---|
| 746 | 617 | usleep_range(500, 1000); |
|---|
| 747 | 618 | } |
|---|
| 748 | 619 | |
|---|
| 749 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); |
|---|
| 750 | | - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, |
|---|
| 751 | | - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | |
|---|
| 620 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); |
|---|
| 621 | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, |
|---|
| 622 | + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | |
|---|
| 752 | 623 | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); |
|---|
| 753 | 624 | |
|---|
| 754 | | - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); |
|---|
| 625 | + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); |
|---|
| 755 | 626 | m->sdmax_rlcx_rb_rptr_hi = |
|---|
| 756 | | - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); |
|---|
| 627 | + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); |
|---|
| 757 | 628 | |
|---|
| 758 | 629 | return 0; |
|---|
| 759 | 630 | } |
|---|
| 760 | 631 | |
|---|
| 761 | | -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, |
|---|
| 762 | | - uint8_t vmid) |
|---|
| 632 | +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, |
|---|
| 633 | + uint8_t vmid, uint16_t *p_pasid) |
|---|
| 763 | 634 | { |
|---|
| 764 | | - uint32_t reg; |
|---|
| 635 | + uint32_t value; |
|---|
| 765 | 636 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
|---|
| 766 | 637 | |
|---|
| 767 | | - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) |
|---|
| 638 | + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) |
|---|
| 768 | 639 | + vmid); |
|---|
| 769 | | - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; |
|---|
| 640 | + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; |
|---|
| 641 | + |
|---|
| 642 | + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); |
|---|
| 770 | 643 | } |
|---|
| 771 | 644 | |
|---|
| 772 | | -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, |
|---|
| 773 | | - uint8_t vmid) |
|---|
| 774 | | -{ |
|---|
| 775 | | - uint32_t reg; |
|---|
| 776 | | - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
|---|
| 777 | | - |
|---|
| 778 | | - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) |
|---|
| 779 | | - + vmid); |
|---|
| 780 | | - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; |
|---|
| 781 | | -} |
|---|
| 782 | | - |
|---|
| 783 | | -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) |
|---|
| 784 | | -{ |
|---|
| 785 | | - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
|---|
| 786 | | - uint32_t req = (1 << vmid) | |
|---|
| 787 | | - (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ |
|---|
| 788 | | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | |
|---|
| 789 | | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | |
|---|
| 790 | | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | |
|---|
| 791 | | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | |
|---|
| 792 | | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; |
|---|
| 793 | | - |
|---|
| 794 | | - mutex_lock(&adev->srbm_mutex); |
|---|
| 795 | | - |
|---|
| 796 | | - /* Use legacy mode tlb invalidation. |
|---|
| 797 | | - * |
|---|
| 798 | | - * Currently on Raven the code below is broken for anything but |
|---|
| 799 | | - * legacy mode due to a MMHUB power gating problem. A workaround |
|---|
| 800 | | - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ |
|---|
| 801 | | - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack |
|---|
| 802 | | - * bit. |
|---|
| 803 | | - * |
|---|
| 804 | | - * TODO 1: agree on the right set of invalidation registers for |
|---|
| 805 | | - * KFD use. Use the last one for now. Invalidate both GC and |
|---|
| 806 | | - * MMHUB. |
|---|
| 807 | | - * |
|---|
| 808 | | - * TODO 2: support range-based invalidation, requires kfg2kgd |
|---|
| 809 | | - * interface change |
|---|
| 810 | | - */ |
|---|
| 811 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), |
|---|
| 812 | | - 0xffffffff); |
|---|
| 813 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), |
|---|
| 814 | | - 0x0000001f); |
|---|
| 815 | | - |
|---|
| 816 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, |
|---|
| 817 | | - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), |
|---|
| 818 | | - 0xffffffff); |
|---|
| 819 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, |
|---|
| 820 | | - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), |
|---|
| 821 | | - 0x0000001f); |
|---|
| 822 | | - |
|---|
| 823 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); |
|---|
| 824 | | - |
|---|
| 825 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), |
|---|
| 826 | | - req); |
|---|
| 827 | | - |
|---|
| 828 | | - while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & |
|---|
| 829 | | - (1 << vmid))) |
|---|
| 830 | | - cpu_relax(); |
|---|
| 831 | | - |
|---|
| 832 | | - while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, |
|---|
| 833 | | - mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & |
|---|
| 834 | | - (1 << vmid))) |
|---|
| 835 | | - cpu_relax(); |
|---|
| 836 | | - |
|---|
| 837 | | - mutex_unlock(&adev->srbm_mutex); |
|---|
| 838 | | - |
|---|
| 839 | | -} |
|---|
| 840 | | - |
|---|
| 841 | | -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) |
|---|
| 842 | | -{ |
|---|
| 843 | | - signed long r; |
|---|
| 844 | | - uint32_t seq; |
|---|
| 845 | | - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; |
|---|
| 846 | | - |
|---|
| 847 | | - spin_lock(&adev->gfx.kiq.ring_lock); |
|---|
| 848 | | - amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ |
|---|
| 849 | | - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); |
|---|
| 850 | | - amdgpu_ring_write(ring, |
|---|
| 851 | | - PACKET3_INVALIDATE_TLBS_DST_SEL(1) | |
|---|
| 852 | | - PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | |
|---|
| 853 | | - PACKET3_INVALIDATE_TLBS_PASID(pasid) | |
|---|
| 854 | | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ |
|---|
| 855 | | - amdgpu_fence_emit_polling(ring, &seq); |
|---|
| 856 | | - amdgpu_ring_commit(ring); |
|---|
| 857 | | - spin_unlock(&adev->gfx.kiq.ring_lock); |
|---|
| 858 | | - |
|---|
| 859 | | - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); |
|---|
| 860 | | - if (r < 1) { |
|---|
| 861 | | - DRM_ERROR("wait for kiq fence error: %ld.\n", r); |
|---|
| 862 | | - return -ETIME; |
|---|
| 863 | | - } |
|---|
| 864 | | - |
|---|
| 865 | | - return 0; |
|---|
| 866 | | -} |
|---|
| 867 | | - |
|---|
| 868 | | -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) |
|---|
| 869 | | -{ |
|---|
| 870 | | - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
|---|
| 871 | | - int vmid; |
|---|
| 872 | | - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; |
|---|
| 873 | | - |
|---|
| 874 | | - if (adev->in_gpu_reset) |
|---|
| 875 | | - return -EIO; |
|---|
| 876 | | - |
|---|
| 877 | | - if (ring->ready) |
|---|
| 878 | | - return invalidate_tlbs_with_kiq(adev, pasid); |
|---|
| 879 | | - |
|---|
| 880 | | - for (vmid = 0; vmid < 16; vmid++) { |
|---|
| 881 | | - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) |
|---|
| 882 | | - continue; |
|---|
| 883 | | - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { |
|---|
| 884 | | - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) |
|---|
| 885 | | - == pasid) { |
|---|
| 886 | | - write_vmid_invalidate_request(kgd, vmid); |
|---|
| 887 | | - break; |
|---|
| 888 | | - } |
|---|
| 889 | | - } |
|---|
| 890 | | - } |
|---|
| 891 | | - |
|---|
| 892 | | - return 0; |
|---|
| 893 | | -} |
|---|
| 894 | | - |
|---|
| 895 | | -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) |
|---|
| 896 | | -{ |
|---|
| 897 | | - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
|---|
| 898 | | - |
|---|
| 899 | | - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { |
|---|
| 900 | | - pr_err("non kfd vmid %d\n", vmid); |
|---|
| 901 | | - return 0; |
|---|
| 902 | | - } |
|---|
| 903 | | - |
|---|
| 904 | | - write_vmid_invalidate_request(kgd, vmid); |
|---|
| 905 | | - return 0; |
|---|
| 906 | | -} |
|---|
| 907 | | - |
|---|
| 908 | | -static int kgd_address_watch_disable(struct kgd_dev *kgd) |
|---|
| 645 | +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) |
|---|
| 909 | 646 | { |
|---|
| 910 | 647 | return 0; |
|---|
| 911 | 648 | } |
|---|
| 912 | 649 | |
|---|
| 913 | | -static int kgd_address_watch_execute(struct kgd_dev *kgd, |
|---|
| 650 | +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, |
|---|
| 914 | 651 | unsigned int watch_point_id, |
|---|
| 915 | 652 | uint32_t cntl_val, |
|---|
| 916 | 653 | uint32_t addr_hi, |
|---|
| .. | .. |
|---|
| 919 | 656 | return 0; |
|---|
| 920 | 657 | } |
|---|
| 921 | 658 | |
|---|
| 922 | | -static int kgd_wave_control_execute(struct kgd_dev *kgd, |
|---|
| 659 | +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, |
|---|
| 923 | 660 | uint32_t gfx_index_val, |
|---|
| 924 | 661 | uint32_t sq_cmd) |
|---|
| 925 | 662 | { |
|---|
| .. | .. |
|---|
| 928 | 665 | |
|---|
| 929 | 666 | mutex_lock(&adev->grbm_idx_mutex); |
|---|
| 930 | 667 | |
|---|
| 931 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); |
|---|
| 668 | + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); |
|---|
| 932 | 669 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); |
|---|
| 933 | 670 | |
|---|
| 934 | 671 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, |
|---|
| .. | .. |
|---|
| 938 | 675 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, |
|---|
| 939 | 676 | SE_BROADCAST_WRITES, 1); |
|---|
| 940 | 677 | |
|---|
| 941 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); |
|---|
| 678 | + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); |
|---|
| 942 | 679 | mutex_unlock(&adev->grbm_idx_mutex); |
|---|
| 943 | 680 | |
|---|
| 944 | 681 | return 0; |
|---|
| 945 | 682 | } |
|---|
| 946 | 683 | |
|---|
| 947 | | -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, |
|---|
| 684 | +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, |
|---|
| 948 | 685 | unsigned int watch_point_id, |
|---|
| 949 | 686 | unsigned int reg_offset) |
|---|
| 950 | 687 | { |
|---|
| 951 | 688 | return 0; |
|---|
| 952 | 689 | } |
|---|
| 953 | 690 | |
|---|
| 954 | | -static void set_scratch_backing_va(struct kgd_dev *kgd, |
|---|
| 955 | | - uint64_t va, uint32_t vmid) |
|---|
| 956 | | -{ |
|---|
| 957 | | - /* No longer needed on GFXv9. The scratch base address is |
|---|
| 958 | | - * passed to the shader by the CP. It's the user mode driver's |
|---|
| 959 | | - * responsibility. |
|---|
| 960 | | - */ |
|---|
| 961 | | -} |
|---|
| 962 | | - |
|---|
| 963 | | -/* FIXME: Does this need to be ASIC-specific code? */ |
|---|
| 964 | | -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) |
|---|
| 965 | | -{ |
|---|
| 966 | | - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
|---|
| 967 | | - const union amdgpu_firmware_header *hdr; |
|---|
| 968 | | - |
|---|
| 969 | | - switch (type) { |
|---|
| 970 | | - case KGD_ENGINE_PFP: |
|---|
| 971 | | - hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; |
|---|
| 972 | | - break; |
|---|
| 973 | | - |
|---|
| 974 | | - case KGD_ENGINE_ME: |
|---|
| 975 | | - hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; |
|---|
| 976 | | - break; |
|---|
| 977 | | - |
|---|
| 978 | | - case KGD_ENGINE_CE: |
|---|
| 979 | | - hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; |
|---|
| 980 | | - break; |
|---|
| 981 | | - |
|---|
| 982 | | - case KGD_ENGINE_MEC1: |
|---|
| 983 | | - hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; |
|---|
| 984 | | - break; |
|---|
| 985 | | - |
|---|
| 986 | | - case KGD_ENGINE_MEC2: |
|---|
| 987 | | - hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; |
|---|
| 988 | | - break; |
|---|
| 989 | | - |
|---|
| 990 | | - case KGD_ENGINE_RLC: |
|---|
| 991 | | - hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; |
|---|
| 992 | | - break; |
|---|
| 993 | | - |
|---|
| 994 | | - case KGD_ENGINE_SDMA1: |
|---|
| 995 | | - hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; |
|---|
| 996 | | - break; |
|---|
| 997 | | - |
|---|
| 998 | | - case KGD_ENGINE_SDMA2: |
|---|
| 999 | | - hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; |
|---|
| 1000 | | - break; |
|---|
| 1001 | | - |
|---|
| 1002 | | - default: |
|---|
| 1003 | | - return 0; |
|---|
| 1004 | | - } |
|---|
| 1005 | | - |
|---|
| 1006 | | - if (hdr == NULL) |
|---|
| 1007 | | - return 0; |
|---|
| 1008 | | - |
|---|
| 1009 | | - /* Only 12 bit in use*/ |
|---|
| 1010 | | - return hdr->common.ucode_version; |
|---|
| 1011 | | -} |
|---|
| 1012 | | - |
|---|
| 1013 | | -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, |
|---|
| 1014 | | - uint32_t page_table_base) |
|---|
| 691 | +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, |
|---|
| 692 | + uint32_t vmid, uint64_t page_table_base) |
|---|
| 1015 | 693 | { |
|---|
| 1016 | 694 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
|---|
| 1017 | | - uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | |
|---|
| 1018 | | - AMDGPU_PTE_VALID; |
|---|
| 1019 | 695 | |
|---|
| 1020 | 696 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { |
|---|
| 1021 | 697 | pr_err("trying to set page table base for wrong VMID %u\n", |
|---|
| .. | .. |
|---|
| 1023 | 699 | return; |
|---|
| 1024 | 700 | } |
|---|
| 1025 | 701 | |
|---|
| 1026 | | - /* TODO: take advantage of per-process address space size. For |
|---|
| 1027 | | - * now, all processes share the same address space size, like |
|---|
| 1028 | | - * on GFX8 and older. |
|---|
| 1029 | | - */ |
|---|
| 1030 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); |
|---|
| 1031 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); |
|---|
| 702 | + adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); |
|---|
| 1032 | 703 | |
|---|
| 1033 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), |
|---|
| 1034 | | - lower_32_bits(adev->vm_manager.max_pfn - 1)); |
|---|
| 1035 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), |
|---|
| 1036 | | - upper_32_bits(adev->vm_manager.max_pfn - 1)); |
|---|
| 1037 | | - |
|---|
| 1038 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); |
|---|
| 1039 | | - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); |
|---|
| 1040 | | - |
|---|
| 1041 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); |
|---|
| 1042 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); |
|---|
| 1043 | | - |
|---|
| 1044 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), |
|---|
| 1045 | | - lower_32_bits(adev->vm_manager.max_pfn - 1)); |
|---|
| 1046 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), |
|---|
| 1047 | | - upper_32_bits(adev->vm_manager.max_pfn - 1)); |
|---|
| 1048 | | - |
|---|
| 1049 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); |
|---|
| 1050 | | - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); |
|---|
| 704 | + adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); |
|---|
| 1051 | 705 | } |
|---|
| 706 | + |
|---|
| 707 | +static void lock_spi_csq_mutexes(struct amdgpu_device *adev) |
|---|
| 708 | +{ |
|---|
| 709 | + mutex_lock(&adev->srbm_mutex); |
|---|
| 710 | + mutex_lock(&adev->grbm_idx_mutex); |
|---|
| 711 | + |
|---|
| 712 | +} |
|---|
| 713 | + |
|---|
| 714 | +static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) |
|---|
| 715 | +{ |
|---|
| 716 | + mutex_unlock(&adev->grbm_idx_mutex); |
|---|
| 717 | + mutex_unlock(&adev->srbm_mutex); |
|---|
| 718 | +} |
|---|
| 719 | + |
|---|
| 720 | +/** |
|---|
| 721 | + * @get_wave_count: Read device registers to get number of waves in flight for |
|---|
| 722 | + * a particular queue. The method also returns the VMID associated with the |
|---|
| 723 | + * queue. |
|---|
| 724 | + * |
|---|
| 725 | + * @adev: Handle of device whose registers are to be read |
|---|
| 726 | + * @queue_idx: Index of queue in the queue-map bit-field |
|---|
| 727 | + * @wave_cnt: Output parameter updated with number of waves in flight |
|---|
| 728 | + * @vmid: Output parameter updated with VMID of queue whose wave count |
|---|
| 729 | + * is being collected |
|---|
| 730 | + */ |
|---|
| 731 | +static void get_wave_count(struct amdgpu_device *adev, int queue_idx, |
|---|
| 732 | + int *wave_cnt, int *vmid) |
|---|
| 733 | +{ |
|---|
| 734 | + int pipe_idx; |
|---|
| 735 | + int queue_slot; |
|---|
| 736 | + unsigned int reg_val; |
|---|
| 737 | + |
|---|
| 738 | + /* |
|---|
| 739 | + * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID |
|---|
| 740 | + * parameters to read out waves in flight. Get VMID if there are |
|---|
| 741 | + * non-zero waves in flight. |
|---|
| 742 | + */ |
|---|
| 743 | + *vmid = 0xFF; |
|---|
| 744 | + *wave_cnt = 0; |
|---|
| 745 | + pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; |
|---|
| 746 | + queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; |
|---|
| 747 | + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); |
|---|
| 748 | + reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + |
|---|
| 749 | + queue_slot); |
|---|
| 750 | + *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; |
|---|
| 751 | + if (*wave_cnt != 0) |
|---|
| 752 | + *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) & |
|---|
| 753 | + CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; |
|---|
| 754 | +} |
|---|
| 755 | + |
|---|
| 756 | +/** |
|---|
| 757 | + * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each |
|---|
| 758 | + * shader engine and aggregates the number of waves that are in flight for the |
|---|
| 759 | + * process whose pasid is provided as a parameter. The process could have ZERO |
|---|
| 760 | + * or more queues running and submitting waves to compute units. |
|---|
| 761 | + * |
|---|
| 762 | + * @kgd: Handle of device from which to get number of waves in flight |
|---|
| 763 | + * @pasid: Identifies the process for which this query call is invoked |
|---|
| 764 | + * @wave_cnt: Output parameter updated with number of waves in flight that |
|---|
| 765 | + * belong to process with given pasid |
|---|
| 766 | + * @max_waves_per_cu: Output parameter updated with maximum number of waves |
|---|
| 767 | + * possible per Compute Unit |
|---|
| 768 | + * |
|---|
| 769 | + * @note: It's possible that the device has too many queues (oversubscription) |
|---|
| 770 | + * in which case a VMID could be remapped to a different PASID. This could lead |
|---|
| 771 | + * to an iaccurate wave count. Following is a high-level sequence: |
|---|
| 772 | + * Time T1: vmid = getVmid(); vmid is associated with Pasid P1 |
|---|
| 773 | + * Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2 |
|---|
| 774 | + * In the sequence above wave count obtained from time T1 will be incorrectly |
|---|
| 775 | + * lost or added to total wave count. |
|---|
| 776 | + * |
|---|
| 777 | + * The registers that provide the waves in flight are: |
|---|
| 778 | + * |
|---|
| 779 | + * SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a |
|---|
| 780 | + * queue is slotted, OFF if there is no queue. A process could have ZERO or |
|---|
| 781 | + * more queues slotted and submitting waves to be run on compute units. Even |
|---|
| 782 | + * when there is a queue it is possible there could be zero wave fronts, this |
|---|
| 783 | + * can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem |
|---|
| 784 | + * command |
|---|
| 785 | + * |
|---|
| 786 | + * For each bit that is ON from above: |
|---|
| 787 | + * |
|---|
| 788 | + * Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the |
|---|
| 789 | + * number of waves that are in flight for the queue at specified index. The |
|---|
| 790 | + * index ranges from 0 to 7. |
|---|
| 791 | + * |
|---|
| 792 | + * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID |
|---|
| 793 | + * of the wave(s). |
|---|
| 794 | + * |
|---|
| 795 | + * Determine if VMID from above step maps to pasid provided as parameter. If |
|---|
| 796 | + * it matches agrregate the wave count. That the VMID will not match pasid is |
|---|
| 797 | + * a normal condition i.e. a device is expected to support multiple queues |
|---|
| 798 | + * from multiple proceses. |
|---|
| 799 | + * |
|---|
| 800 | + * Reading registers referenced above involves programming GRBM appropriately |
|---|
| 801 | + */ |
|---|
| 802 | +static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, |
|---|
| 803 | + int *pasid_wave_cnt, int *max_waves_per_cu) |
|---|
| 804 | +{ |
|---|
| 805 | + int qidx; |
|---|
| 806 | + int vmid; |
|---|
| 807 | + int se_idx; |
|---|
| 808 | + int sh_idx; |
|---|
| 809 | + int se_cnt; |
|---|
| 810 | + int sh_cnt; |
|---|
| 811 | + int wave_cnt; |
|---|
| 812 | + int queue_map; |
|---|
| 813 | + int pasid_tmp; |
|---|
| 814 | + int max_queue_cnt; |
|---|
| 815 | + int vmid_wave_cnt = 0; |
|---|
| 816 | + struct amdgpu_device *adev; |
|---|
| 817 | + DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); |
|---|
| 818 | + |
|---|
| 819 | + adev = get_amdgpu_device(kgd); |
|---|
| 820 | + lock_spi_csq_mutexes(adev); |
|---|
| 821 | + soc15_grbm_select(adev, 1, 0, 0, 0); |
|---|
| 822 | + |
|---|
| 823 | + /* |
|---|
| 824 | + * Iterate through the shader engines and arrays of the device |
|---|
| 825 | + * to get number of waves in flight |
|---|
| 826 | + */ |
|---|
| 827 | + bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap, |
|---|
| 828 | + KGD_MAX_QUEUES); |
|---|
| 829 | + max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * |
|---|
| 830 | + adev->gfx.mec.num_queue_per_pipe; |
|---|
| 831 | + sh_cnt = adev->gfx.config.max_sh_per_se; |
|---|
| 832 | + se_cnt = adev->gfx.config.max_shader_engines; |
|---|
| 833 | + for (se_idx = 0; se_idx < se_cnt; se_idx++) { |
|---|
| 834 | + for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { |
|---|
| 835 | + |
|---|
| 836 | + gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); |
|---|
| 837 | + queue_map = RREG32(SOC15_REG_OFFSET(GC, 0, |
|---|
| 838 | + mmSPI_CSQ_WF_ACTIVE_STATUS)); |
|---|
| 839 | + |
|---|
| 840 | + /* |
|---|
| 841 | + * Assumption: queue map encodes following schema: four |
|---|
| 842 | + * pipes per each micro-engine, with each pipe mapping |
|---|
| 843 | + * eight queues. This schema is true for GFX9 devices |
|---|
| 844 | + * and must be verified for newer device families |
|---|
| 845 | + */ |
|---|
| 846 | + for (qidx = 0; qidx < max_queue_cnt; qidx++) { |
|---|
| 847 | + |
|---|
| 848 | + /* Skip qeueus that are not associated with |
|---|
| 849 | + * compute functions |
|---|
| 850 | + */ |
|---|
| 851 | + if (!test_bit(qidx, cp_queue_bitmap)) |
|---|
| 852 | + continue; |
|---|
| 853 | + |
|---|
| 854 | + if (!(queue_map & (1 << qidx))) |
|---|
| 855 | + continue; |
|---|
| 856 | + |
|---|
| 857 | + /* Get number of waves in flight and aggregate them */ |
|---|
| 858 | + get_wave_count(adev, qidx, &wave_cnt, &vmid); |
|---|
| 859 | + if (wave_cnt != 0) { |
|---|
| 860 | + pasid_tmp = |
|---|
| 861 | + RREG32(SOC15_REG_OFFSET(OSSSYS, 0, |
|---|
| 862 | + mmIH_VMID_0_LUT) + vmid); |
|---|
| 863 | + if (pasid_tmp == pasid) |
|---|
| 864 | + vmid_wave_cnt += wave_cnt; |
|---|
| 865 | + } |
|---|
| 866 | + } |
|---|
| 867 | + } |
|---|
| 868 | + } |
|---|
| 869 | + |
|---|
| 870 | + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); |
|---|
| 871 | + soc15_grbm_select(adev, 0, 0, 0, 0); |
|---|
| 872 | + unlock_spi_csq_mutexes(adev); |
|---|
| 873 | + |
|---|
| 874 | + /* Update the output parameters and return */ |
|---|
| 875 | + *pasid_wave_cnt = vmid_wave_cnt; |
|---|
| 876 | + *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * |
|---|
| 877 | + adev->gfx.cu_info.max_waves_per_simd; |
|---|
| 878 | +} |
|---|
| 879 | + |
|---|
| 880 | +const struct kfd2kgd_calls gfx_v9_kfd2kgd = { |
|---|
| 881 | + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, |
|---|
| 882 | + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, |
|---|
| 883 | + .init_interrupts = kgd_gfx_v9_init_interrupts, |
|---|
| 884 | + .hqd_load = kgd_gfx_v9_hqd_load, |
|---|
| 885 | + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, |
|---|
| 886 | + .hqd_sdma_load = kgd_hqd_sdma_load, |
|---|
| 887 | + .hqd_dump = kgd_gfx_v9_hqd_dump, |
|---|
| 888 | + .hqd_sdma_dump = kgd_hqd_sdma_dump, |
|---|
| 889 | + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, |
|---|
| 890 | + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, |
|---|
| 891 | + .hqd_destroy = kgd_gfx_v9_hqd_destroy, |
|---|
| 892 | + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, |
|---|
| 893 | + .address_watch_disable = kgd_gfx_v9_address_watch_disable, |
|---|
| 894 | + .address_watch_execute = kgd_gfx_v9_address_watch_execute, |
|---|
| 895 | + .wave_control_execute = kgd_gfx_v9_wave_control_execute, |
|---|
| 896 | + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, |
|---|
| 897 | + .get_atc_vmid_pasid_mapping_info = |
|---|
| 898 | + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, |
|---|
| 899 | + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, |
|---|
| 900 | + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, |
|---|
| 901 | +}; |
|---|