| .. | .. |
|---|
| 22 | 22 | |
|---|
| 23 | 23 | #include "amdgpu_amdkfd.h" |
|---|
| 24 | 24 | #include "amd_shared.h" |
|---|
| 25 | | -#include <drm/drmP.h> |
|---|
| 25 | + |
|---|
| 26 | 26 | #include "amdgpu.h" |
|---|
| 27 | 27 | #include "amdgpu_gfx.h" |
|---|
| 28 | +#include "amdgpu_dma_buf.h" |
|---|
| 28 | 29 | #include <linux/module.h> |
|---|
| 30 | +#include <linux/dma-buf.h> |
|---|
| 31 | +#include "amdgpu_xgmi.h" |
|---|
| 32 | +#include <uapi/linux/kfd_ioctl.h> |
|---|
| 29 | 33 | |
|---|
| 30 | | -const struct kgd2kfd_calls *kgd2kfd; |
|---|
| 31 | | -bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); |
|---|
| 34 | +/* Total memory size in system memory and all GPU VRAM. Used to |
|---|
| 35 | + * estimate worst case amount of memory to reserve for page tables |
|---|
| 36 | + */ |
|---|
| 37 | +uint64_t amdgpu_amdkfd_total_mem_size; |
|---|
| 32 | 38 | |
|---|
| 33 | | -static const unsigned int compute_vmid_bitmap = 0xFF00; |
|---|
| 39 | +static bool kfd_initialized; |
|---|
| 34 | 40 | |
|---|
| 35 | 41 | int amdgpu_amdkfd_init(void) |
|---|
| 36 | 42 | { |
|---|
| 43 | + struct sysinfo si; |
|---|
| 37 | 44 | int ret; |
|---|
| 38 | 45 | |
|---|
| 39 | | -#if defined(CONFIG_HSA_AMD_MODULE) |
|---|
| 40 | | - int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); |
|---|
| 46 | + si_meminfo(&si); |
|---|
| 47 | + amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; |
|---|
| 48 | + amdgpu_amdkfd_total_mem_size *= si.mem_unit; |
|---|
| 41 | 49 | |
|---|
| 42 | | - kgd2kfd_init_p = symbol_request(kgd2kfd_init); |
|---|
| 43 | | - |
|---|
| 44 | | - if (kgd2kfd_init_p == NULL) |
|---|
| 45 | | - return -ENOENT; |
|---|
| 46 | | - |
|---|
| 47 | | - ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd); |
|---|
| 48 | | - if (ret) { |
|---|
| 49 | | - symbol_put(kgd2kfd_init); |
|---|
| 50 | | - kgd2kfd = NULL; |
|---|
| 51 | | - } |
|---|
| 52 | | - |
|---|
| 53 | | - |
|---|
| 54 | | -#elif defined(CONFIG_HSA_AMD) |
|---|
| 55 | | - |
|---|
| 56 | | - ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); |
|---|
| 57 | | - if (ret) |
|---|
| 58 | | - kgd2kfd = NULL; |
|---|
| 59 | | - |
|---|
| 60 | | -#else |
|---|
| 61 | | - kgd2kfd = NULL; |
|---|
| 62 | | - ret = -ENOENT; |
|---|
| 63 | | -#endif |
|---|
| 64 | | - |
|---|
| 65 | | -#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD) |
|---|
| 50 | + ret = kgd2kfd_init(); |
|---|
| 66 | 51 | amdgpu_amdkfd_gpuvm_init_mem_limits(); |
|---|
| 67 | | -#endif |
|---|
| 52 | + kfd_initialized = !ret; |
|---|
| 68 | 53 | |
|---|
| 69 | 54 | return ret; |
|---|
| 70 | 55 | } |
|---|
| 71 | 56 | |
|---|
| 72 | 57 | void amdgpu_amdkfd_fini(void) |
|---|
| 73 | 58 | { |
|---|
| 74 | | - if (kgd2kfd) { |
|---|
| 75 | | - kgd2kfd->exit(); |
|---|
| 76 | | - symbol_put(kgd2kfd_init); |
|---|
| 59 | + if (kfd_initialized) { |
|---|
| 60 | + kgd2kfd_exit(); |
|---|
| 61 | + kfd_initialized = false; |
|---|
| 77 | 62 | } |
|---|
| 78 | 63 | } |
|---|
| 79 | 64 | |
|---|
| 80 | 65 | void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) |
|---|
| 81 | 66 | { |
|---|
| 82 | | - const struct kfd2kgd_calls *kfd2kgd; |
|---|
| 67 | + bool vf = amdgpu_sriov_vf(adev); |
|---|
| 83 | 68 | |
|---|
| 84 | | - if (!kgd2kfd) |
|---|
| 69 | + if (!kfd_initialized) |
|---|
| 85 | 70 | return; |
|---|
| 86 | 71 | |
|---|
| 87 | | - switch (adev->asic_type) { |
|---|
| 88 | | -#ifdef CONFIG_DRM_AMDGPU_CIK |
|---|
| 89 | | - case CHIP_KAVERI: |
|---|
| 90 | | - case CHIP_HAWAII: |
|---|
| 91 | | - kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); |
|---|
| 92 | | - break; |
|---|
| 93 | | -#endif |
|---|
| 94 | | - case CHIP_CARRIZO: |
|---|
| 95 | | - case CHIP_TONGA: |
|---|
| 96 | | - case CHIP_FIJI: |
|---|
| 97 | | - case CHIP_POLARIS10: |
|---|
| 98 | | - case CHIP_POLARIS11: |
|---|
| 99 | | - kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); |
|---|
| 100 | | - break; |
|---|
| 101 | | - case CHIP_VEGA10: |
|---|
| 102 | | - case CHIP_RAVEN: |
|---|
| 103 | | - kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); |
|---|
| 104 | | - break; |
|---|
| 105 | | - default: |
|---|
| 106 | | - dev_info(adev->dev, "kfd not supported on this ASIC\n"); |
|---|
| 107 | | - return; |
|---|
| 108 | | - } |
|---|
| 72 | + adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, |
|---|
| 73 | + adev->pdev, adev->asic_type, vf); |
|---|
| 109 | 74 | |
|---|
| 110 | | - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, |
|---|
| 111 | | - adev->pdev, kfd2kgd); |
|---|
| 75 | + if (adev->kfd.dev) |
|---|
| 76 | + amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; |
|---|
| 112 | 77 | } |
|---|
| 113 | 78 | |
|---|
| 114 | 79 | /** |
|---|
| .. | .. |
|---|
| 148 | 113 | { |
|---|
| 149 | 114 | int i; |
|---|
| 150 | 115 | int last_valid_bit; |
|---|
| 151 | | - if (adev->kfd) { |
|---|
| 116 | + |
|---|
| 117 | + if (adev->kfd.dev) { |
|---|
| 152 | 118 | struct kgd2kfd_shared_resources gpu_resources = { |
|---|
| 153 | | - .compute_vmid_bitmap = compute_vmid_bitmap, |
|---|
| 119 | + .compute_vmid_bitmap = |
|---|
| 120 | + ((1 << AMDGPU_NUM_VMID) - 1) - |
|---|
| 121 | + ((1 << adev->vm_manager.first_kfd_vmid) - 1), |
|---|
| 154 | 122 | .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, |
|---|
| 155 | 123 | .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, |
|---|
| 156 | 124 | .gpuvm_size = min(adev->vm_manager.max_pfn |
|---|
| 157 | 125 | << AMDGPU_GPU_PAGE_SHIFT, |
|---|
| 158 | | - AMDGPU_VA_HOLE_START), |
|---|
| 159 | | - .drm_render_minor = adev->ddev->render->index |
|---|
| 126 | + AMDGPU_GMC_HOLE_START), |
|---|
| 127 | + .drm_render_minor = adev_to_drm(adev)->render->index, |
|---|
| 128 | + .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, |
|---|
| 129 | + |
|---|
| 160 | 130 | }; |
|---|
| 161 | 131 | |
|---|
| 162 | 132 | /* this is going to have a few of the MSBs set that we need to |
|---|
| 163 | | - * clear */ |
|---|
| 164 | | - bitmap_complement(gpu_resources.queue_bitmap, |
|---|
| 133 | + * clear |
|---|
| 134 | + */ |
|---|
| 135 | + bitmap_complement(gpu_resources.cp_queue_bitmap, |
|---|
| 165 | 136 | adev->gfx.mec.queue_bitmap, |
|---|
| 166 | 137 | KGD_MAX_QUEUES); |
|---|
| 167 | 138 | |
|---|
| 168 | | - /* remove the KIQ bit as well */ |
|---|
| 169 | | - if (adev->gfx.kiq.ring.ready) |
|---|
| 170 | | - clear_bit(amdgpu_gfx_queue_to_bit(adev, |
|---|
| 171 | | - adev->gfx.kiq.ring.me - 1, |
|---|
| 172 | | - adev->gfx.kiq.ring.pipe, |
|---|
| 173 | | - adev->gfx.kiq.ring.queue), |
|---|
| 174 | | - gpu_resources.queue_bitmap); |
|---|
| 175 | | - |
|---|
| 176 | 139 | /* According to linux/bitmap.h we shouldn't use bitmap_clear if |
|---|
| 177 | | - * nbits is not compile time constant */ |
|---|
| 140 | + * nbits is not compile time constant |
|---|
| 141 | + */ |
|---|
| 178 | 142 | last_valid_bit = 1 /* only first MEC can have compute queues */ |
|---|
| 179 | 143 | * adev->gfx.mec.num_pipe_per_mec |
|---|
| 180 | 144 | * adev->gfx.mec.num_queue_per_pipe; |
|---|
| 181 | 145 | for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) |
|---|
| 182 | | - clear_bit(i, gpu_resources.queue_bitmap); |
|---|
| 146 | + clear_bit(i, gpu_resources.cp_queue_bitmap); |
|---|
| 183 | 147 | |
|---|
| 184 | 148 | amdgpu_doorbell_get_kfd_info(adev, |
|---|
| 185 | 149 | &gpu_resources.doorbell_physical_address, |
|---|
| 186 | 150 | &gpu_resources.doorbell_aperture_size, |
|---|
| 187 | 151 | &gpu_resources.doorbell_start_offset); |
|---|
| 152 | + |
|---|
| 153 | + /* Since SOC15, BIF starts to statically use the |
|---|
| 154 | + * lower 12 bits of doorbell addresses for routing |
|---|
| 155 | + * based on settings in registers like |
|---|
| 156 | + * SDMA0_DOORBELL_RANGE etc.. |
|---|
| 157 | + * In order to route a doorbell to CP engine, the lower |
|---|
| 158 | + * 12 bits of its address has to be outside the range |
|---|
| 159 | + * set for SDMA, VCN, and IH blocks. |
|---|
| 160 | + */ |
|---|
| 188 | 161 | if (adev->asic_type >= CHIP_VEGA10) { |
|---|
| 189 | | - /* On SOC15 the BIF is involved in routing |
|---|
| 190 | | - * doorbells using the low 12 bits of the |
|---|
| 191 | | - * address. Communicate the assignments to |
|---|
| 192 | | - * KFD. KFD uses two doorbell pages per |
|---|
| 193 | | - * process in case of 64-bit doorbells so we |
|---|
| 194 | | - * can use each doorbell assignment twice. |
|---|
| 195 | | - */ |
|---|
| 196 | | - gpu_resources.sdma_doorbell[0][0] = |
|---|
| 197 | | - AMDGPU_DOORBELL64_sDMA_ENGINE0; |
|---|
| 198 | | - gpu_resources.sdma_doorbell[0][1] = |
|---|
| 199 | | - AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; |
|---|
| 200 | | - gpu_resources.sdma_doorbell[1][0] = |
|---|
| 201 | | - AMDGPU_DOORBELL64_sDMA_ENGINE1; |
|---|
| 202 | | - gpu_resources.sdma_doorbell[1][1] = |
|---|
| 203 | | - AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; |
|---|
| 204 | | - /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for |
|---|
| 205 | | - * SDMA, IH and VCN. So don't use them for the CP. |
|---|
| 206 | | - */ |
|---|
| 207 | | - gpu_resources.reserved_doorbell_mask = 0x1f0; |
|---|
| 208 | | - gpu_resources.reserved_doorbell_val = 0x0f0; |
|---|
| 162 | + gpu_resources.non_cp_doorbells_start = |
|---|
| 163 | + adev->doorbell_index.first_non_cp; |
|---|
| 164 | + gpu_resources.non_cp_doorbells_end = |
|---|
| 165 | + adev->doorbell_index.last_non_cp; |
|---|
| 209 | 166 | } |
|---|
| 210 | 167 | |
|---|
| 211 | | - kgd2kfd->device_init(adev->kfd, &gpu_resources); |
|---|
| 168 | + kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources); |
|---|
| 212 | 169 | } |
|---|
| 213 | 170 | } |
|---|
| 214 | 171 | |
|---|
| 215 | 172 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) |
|---|
| 216 | 173 | { |
|---|
| 217 | | - if (adev->kfd) { |
|---|
| 218 | | - kgd2kfd->device_exit(adev->kfd); |
|---|
| 219 | | - adev->kfd = NULL; |
|---|
| 174 | + if (adev->kfd.dev) { |
|---|
| 175 | + kgd2kfd_device_exit(adev->kfd.dev); |
|---|
| 176 | + adev->kfd.dev = NULL; |
|---|
| 220 | 177 | } |
|---|
| 221 | 178 | } |
|---|
| 222 | 179 | |
|---|
| 223 | 180 | void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, |
|---|
| 224 | 181 | const void *ih_ring_entry) |
|---|
| 225 | 182 | { |
|---|
| 226 | | - if (adev->kfd) |
|---|
| 227 | | - kgd2kfd->interrupt(adev->kfd, ih_ring_entry); |
|---|
| 183 | + if (adev->kfd.dev) |
|---|
| 184 | + kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); |
|---|
| 228 | 185 | } |
|---|
| 229 | 186 | |
|---|
| 230 | | -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) |
|---|
| 187 | +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) |
|---|
| 231 | 188 | { |
|---|
| 232 | | - if (adev->kfd) |
|---|
| 233 | | - kgd2kfd->suspend(adev->kfd); |
|---|
| 189 | + if (adev->kfd.dev) |
|---|
| 190 | + kgd2kfd_suspend(adev->kfd.dev, run_pm); |
|---|
| 234 | 191 | } |
|---|
| 235 | 192 | |
|---|
| 236 | | -int amdgpu_amdkfd_resume(struct amdgpu_device *adev) |
|---|
| 193 | +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) |
|---|
| 237 | 194 | { |
|---|
| 238 | 195 | int r = 0; |
|---|
| 239 | 196 | |
|---|
| 240 | | - if (adev->kfd) |
|---|
| 241 | | - r = kgd2kfd->resume(adev->kfd); |
|---|
| 197 | + if (adev->kfd.dev) |
|---|
| 198 | + r = kgd2kfd_resume_iommu(adev->kfd.dev); |
|---|
| 199 | + |
|---|
| 200 | + return r; |
|---|
| 201 | +} |
|---|
| 202 | + |
|---|
| 203 | +int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) |
|---|
| 204 | +{ |
|---|
| 205 | + int r = 0; |
|---|
| 206 | + |
|---|
| 207 | + if (adev->kfd.dev) |
|---|
| 208 | + r = kgd2kfd_resume(adev->kfd.dev, run_pm); |
|---|
| 242 | 209 | |
|---|
| 243 | 210 | return r; |
|---|
| 244 | 211 | } |
|---|
| .. | .. |
|---|
| 247 | 214 | { |
|---|
| 248 | 215 | int r = 0; |
|---|
| 249 | 216 | |
|---|
| 250 | | - if (adev->kfd) |
|---|
| 251 | | - r = kgd2kfd->pre_reset(adev->kfd); |
|---|
| 217 | + if (adev->kfd.dev) |
|---|
| 218 | + r = kgd2kfd_pre_reset(adev->kfd.dev); |
|---|
| 252 | 219 | |
|---|
| 253 | 220 | return r; |
|---|
| 254 | 221 | } |
|---|
| .. | .. |
|---|
| 257 | 224 | { |
|---|
| 258 | 225 | int r = 0; |
|---|
| 259 | 226 | |
|---|
| 260 | | - if (adev->kfd) |
|---|
| 261 | | - r = kgd2kfd->post_reset(adev->kfd); |
|---|
| 227 | + if (adev->kfd.dev) |
|---|
| 228 | + r = kgd2kfd_post_reset(adev->kfd.dev); |
|---|
| 262 | 229 | |
|---|
| 263 | 230 | return r; |
|---|
| 264 | 231 | } |
|---|
| .. | .. |
|---|
| 267 | 234 | { |
|---|
| 268 | 235 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 269 | 236 | |
|---|
| 270 | | - amdgpu_device_gpu_recover(adev, NULL, false); |
|---|
| 237 | + if (amdgpu_device_should_recover_gpu(adev)) |
|---|
| 238 | + amdgpu_device_gpu_recover(adev, NULL); |
|---|
| 271 | 239 | } |
|---|
| 272 | 240 | |
|---|
| 273 | | -int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, |
|---|
| 274 | | - void **mem_obj, uint64_t *gpu_addr, |
|---|
| 275 | | - void **cpu_ptr, bool mqd_gfx9) |
|---|
| 241 | +int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, |
|---|
| 242 | + void **mem_obj, uint64_t *gpu_addr, |
|---|
| 243 | + void **cpu_ptr, bool cp_mqd_gfx9) |
|---|
| 276 | 244 | { |
|---|
| 277 | 245 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 278 | 246 | struct amdgpu_bo *bo = NULL; |
|---|
| .. | .. |
|---|
| 288 | 256 | bp.type = ttm_bo_type_kernel; |
|---|
| 289 | 257 | bp.resv = NULL; |
|---|
| 290 | 258 | |
|---|
| 291 | | - if (mqd_gfx9) |
|---|
| 292 | | - bp.flags |= AMDGPU_GEM_CREATE_MQD_GFX9; |
|---|
| 259 | + if (cp_mqd_gfx9) |
|---|
| 260 | + bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9; |
|---|
| 293 | 261 | |
|---|
| 294 | 262 | r = amdgpu_bo_create(adev, &bp, &bo); |
|---|
| 295 | 263 | if (r) { |
|---|
| .. | .. |
|---|
| 342 | 310 | return r; |
|---|
| 343 | 311 | } |
|---|
| 344 | 312 | |
|---|
| 345 | | -void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) |
|---|
| 313 | +void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) |
|---|
| 346 | 314 | { |
|---|
| 347 | 315 | struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; |
|---|
| 348 | 316 | |
|---|
| .. | .. |
|---|
| 353 | 321 | amdgpu_bo_unref(&(bo)); |
|---|
| 354 | 322 | } |
|---|
| 355 | 323 | |
|---|
| 356 | | -void get_local_mem_info(struct kgd_dev *kgd, |
|---|
| 357 | | - struct kfd_local_mem_info *mem_info) |
|---|
| 324 | +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, |
|---|
| 325 | + void **mem_obj) |
|---|
| 326 | +{ |
|---|
| 327 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 328 | + struct amdgpu_bo *bo = NULL; |
|---|
| 329 | + struct amdgpu_bo_param bp; |
|---|
| 330 | + int r; |
|---|
| 331 | + |
|---|
| 332 | + memset(&bp, 0, sizeof(bp)); |
|---|
| 333 | + bp.size = size; |
|---|
| 334 | + bp.byte_align = 1; |
|---|
| 335 | + bp.domain = AMDGPU_GEM_DOMAIN_GWS; |
|---|
| 336 | + bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; |
|---|
| 337 | + bp.type = ttm_bo_type_device; |
|---|
| 338 | + bp.resv = NULL; |
|---|
| 339 | + |
|---|
| 340 | + r = amdgpu_bo_create(adev, &bp, &bo); |
|---|
| 341 | + if (r) { |
|---|
| 342 | + dev_err(adev->dev, |
|---|
| 343 | + "failed to allocate gws BO for amdkfd (%d)\n", r); |
|---|
| 344 | + return r; |
|---|
| 345 | + } |
|---|
| 346 | + |
|---|
| 347 | + *mem_obj = bo; |
|---|
| 348 | + return 0; |
|---|
| 349 | +} |
|---|
| 350 | + |
|---|
| 351 | +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) |
|---|
| 352 | +{ |
|---|
| 353 | + struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; |
|---|
| 354 | + |
|---|
| 355 | + amdgpu_bo_unref(&bo); |
|---|
| 356 | +} |
|---|
| 357 | + |
|---|
| 358 | +uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, |
|---|
| 359 | + enum kgd_engine_type type) |
|---|
| 360 | +{ |
|---|
| 361 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 362 | + |
|---|
| 363 | + switch (type) { |
|---|
| 364 | + case KGD_ENGINE_PFP: |
|---|
| 365 | + return adev->gfx.pfp_fw_version; |
|---|
| 366 | + |
|---|
| 367 | + case KGD_ENGINE_ME: |
|---|
| 368 | + return adev->gfx.me_fw_version; |
|---|
| 369 | + |
|---|
| 370 | + case KGD_ENGINE_CE: |
|---|
| 371 | + return adev->gfx.ce_fw_version; |
|---|
| 372 | + |
|---|
| 373 | + case KGD_ENGINE_MEC1: |
|---|
| 374 | + return adev->gfx.mec_fw_version; |
|---|
| 375 | + |
|---|
| 376 | + case KGD_ENGINE_MEC2: |
|---|
| 377 | + return adev->gfx.mec2_fw_version; |
|---|
| 378 | + |
|---|
| 379 | + case KGD_ENGINE_RLC: |
|---|
| 380 | + return adev->gfx.rlc_fw_version; |
|---|
| 381 | + |
|---|
| 382 | + case KGD_ENGINE_SDMA1: |
|---|
| 383 | + return adev->sdma.instance[0].fw_version; |
|---|
| 384 | + |
|---|
| 385 | + case KGD_ENGINE_SDMA2: |
|---|
| 386 | + return adev->sdma.instance[1].fw_version; |
|---|
| 387 | + |
|---|
| 388 | + default: |
|---|
| 389 | + return 0; |
|---|
| 390 | + } |
|---|
| 391 | + |
|---|
| 392 | + return 0; |
|---|
| 393 | +} |
|---|
| 394 | + |
|---|
| 395 | +void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, |
|---|
| 396 | + struct kfd_local_mem_info *mem_info) |
|---|
| 358 | 397 | { |
|---|
| 359 | 398 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 360 | 399 | uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : |
|---|
| .. | .. |
|---|
| 379 | 418 | |
|---|
| 380 | 419 | if (amdgpu_sriov_vf(adev)) |
|---|
| 381 | 420 | mem_info->mem_clk_max = adev->clock.default_mclk / 100; |
|---|
| 382 | | - else if (adev->powerplay.pp_funcs) |
|---|
| 383 | | - mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; |
|---|
| 384 | | - else |
|---|
| 421 | + else if (adev->pm.dpm_enabled) { |
|---|
| 422 | + if (amdgpu_emu_mode == 1) |
|---|
| 423 | + mem_info->mem_clk_max = 0; |
|---|
| 424 | + else |
|---|
| 425 | + mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; |
|---|
| 426 | + } else |
|---|
| 385 | 427 | mem_info->mem_clk_max = 100; |
|---|
| 386 | 428 | } |
|---|
| 387 | 429 | |
|---|
| 388 | | -uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) |
|---|
| 430 | +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) |
|---|
| 389 | 431 | { |
|---|
| 390 | 432 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 391 | 433 | |
|---|
| .. | .. |
|---|
| 394 | 436 | return 0; |
|---|
| 395 | 437 | } |
|---|
| 396 | 438 | |
|---|
| 397 | | -uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) |
|---|
| 439 | +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) |
|---|
| 398 | 440 | { |
|---|
| 399 | 441 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 400 | 442 | |
|---|
| 401 | 443 | /* the sclk is in quantas of 10kHz */ |
|---|
| 402 | 444 | if (amdgpu_sriov_vf(adev)) |
|---|
| 403 | 445 | return adev->clock.default_sclk / 100; |
|---|
| 404 | | - else if (adev->powerplay.pp_funcs) |
|---|
| 446 | + else if (adev->pm.dpm_enabled) |
|---|
| 405 | 447 | return amdgpu_dpm_get_sclk(adev, false) / 100; |
|---|
| 406 | 448 | else |
|---|
| 407 | 449 | return 100; |
|---|
| 408 | 450 | } |
|---|
| 409 | 451 | |
|---|
| 410 | | -void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) |
|---|
| 452 | +void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) |
|---|
| 411 | 453 | { |
|---|
| 412 | 454 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 413 | 455 | struct amdgpu_cu_info acu_info = adev->gfx.cu_info; |
|---|
| .. | .. |
|---|
| 430 | 472 | cu_info->lds_size = acu_info.lds_size; |
|---|
| 431 | 473 | } |
|---|
| 432 | 474 | |
|---|
| 475 | +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, |
|---|
| 476 | + struct kgd_dev **dma_buf_kgd, |
|---|
| 477 | + uint64_t *bo_size, void *metadata_buffer, |
|---|
| 478 | + size_t buffer_size, uint32_t *metadata_size, |
|---|
| 479 | + uint32_t *flags) |
|---|
| 480 | +{ |
|---|
| 481 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 482 | + struct dma_buf *dma_buf; |
|---|
| 483 | + struct drm_gem_object *obj; |
|---|
| 484 | + struct amdgpu_bo *bo; |
|---|
| 485 | + uint64_t metadata_flags; |
|---|
| 486 | + int r = -EINVAL; |
|---|
| 487 | + |
|---|
| 488 | + dma_buf = dma_buf_get(dma_buf_fd); |
|---|
| 489 | + if (IS_ERR(dma_buf)) |
|---|
| 490 | + return PTR_ERR(dma_buf); |
|---|
| 491 | + |
|---|
| 492 | + if (dma_buf->ops != &amdgpu_dmabuf_ops) |
|---|
| 493 | + /* Can't handle non-graphics buffers */ |
|---|
| 494 | + goto out_put; |
|---|
| 495 | + |
|---|
| 496 | + obj = dma_buf->priv; |
|---|
| 497 | + if (obj->dev->driver != adev_to_drm(adev)->driver) |
|---|
| 498 | + /* Can't handle buffers from different drivers */ |
|---|
| 499 | + goto out_put; |
|---|
| 500 | + |
|---|
| 501 | + adev = drm_to_adev(obj->dev); |
|---|
| 502 | + bo = gem_to_amdgpu_bo(obj); |
|---|
| 503 | + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | |
|---|
| 504 | + AMDGPU_GEM_DOMAIN_GTT))) |
|---|
| 505 | + /* Only VRAM and GTT BOs are supported */ |
|---|
| 506 | + goto out_put; |
|---|
| 507 | + |
|---|
| 508 | + r = 0; |
|---|
| 509 | + if (dma_buf_kgd) |
|---|
| 510 | + *dma_buf_kgd = (struct kgd_dev *)adev; |
|---|
| 511 | + if (bo_size) |
|---|
| 512 | + *bo_size = amdgpu_bo_size(bo); |
|---|
| 513 | + if (metadata_size) |
|---|
| 514 | + *metadata_size = bo->metadata_size; |
|---|
| 515 | + if (metadata_buffer) |
|---|
| 516 | + r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, |
|---|
| 517 | + metadata_size, &metadata_flags); |
|---|
| 518 | + if (flags) { |
|---|
| 519 | + *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? |
|---|
| 520 | + KFD_IOC_ALLOC_MEM_FLAGS_VRAM |
|---|
| 521 | + : KFD_IOC_ALLOC_MEM_FLAGS_GTT; |
|---|
| 522 | + |
|---|
| 523 | + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) |
|---|
| 524 | + *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; |
|---|
| 525 | + } |
|---|
| 526 | + |
|---|
| 527 | +out_put: |
|---|
| 528 | + dma_buf_put(dma_buf); |
|---|
| 529 | + return r; |
|---|
| 530 | +} |
|---|
| 531 | + |
|---|
| 433 | 532 | uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) |
|---|
| 434 | 533 | { |
|---|
| 435 | 534 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 535 | + struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); |
|---|
| 436 | 536 | |
|---|
| 437 | | - return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
|---|
| 537 | + return amdgpu_vram_mgr_usage(vram_man); |
|---|
| 538 | +} |
|---|
| 539 | + |
|---|
| 540 | +uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) |
|---|
| 541 | +{ |
|---|
| 542 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 543 | + |
|---|
| 544 | + return adev->gmc.xgmi.hive_id; |
|---|
| 545 | +} |
|---|
| 546 | + |
|---|
| 547 | +uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd) |
|---|
| 548 | +{ |
|---|
| 549 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 550 | + |
|---|
| 551 | + return adev->unique_id; |
|---|
| 552 | +} |
|---|
| 553 | + |
|---|
| 554 | +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) |
|---|
| 555 | +{ |
|---|
| 556 | + struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; |
|---|
| 557 | + struct amdgpu_device *adev = (struct amdgpu_device *)dst; |
|---|
| 558 | + int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); |
|---|
| 559 | + |
|---|
| 560 | + if (ret < 0) { |
|---|
| 561 | + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", |
|---|
| 562 | + adev->gmc.xgmi.physical_node_id, |
|---|
| 563 | + peer_adev->gmc.xgmi.physical_node_id, ret); |
|---|
| 564 | + ret = 0; |
|---|
| 565 | + } |
|---|
| 566 | + return (uint8_t)ret; |
|---|
| 567 | +} |
|---|
| 568 | + |
|---|
| 569 | +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) |
|---|
| 570 | +{ |
|---|
| 571 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 572 | + |
|---|
| 573 | + return adev->rmmio_remap.bus_addr; |
|---|
| 574 | +} |
|---|
| 575 | + |
|---|
| 576 | +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) |
|---|
| 577 | +{ |
|---|
| 578 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 579 | + |
|---|
| 580 | + return adev->gds.gws_size; |
|---|
| 581 | +} |
|---|
| 582 | + |
|---|
| 583 | +uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd) |
|---|
| 584 | +{ |
|---|
| 585 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 586 | + |
|---|
| 587 | + return adev->rev_id; |
|---|
| 588 | +} |
|---|
| 589 | + |
|---|
| 590 | +int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd) |
|---|
| 591 | +{ |
|---|
| 592 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 593 | + |
|---|
| 594 | + return adev->gmc.noretry; |
|---|
| 438 | 595 | } |
|---|
| 439 | 596 | |
|---|
| 440 | 597 | int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, |
|---|
| .. | .. |
|---|
| 478 | 635 | job->vmid = vmid; |
|---|
| 479 | 636 | |
|---|
| 480 | 637 | ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); |
|---|
| 638 | + |
|---|
| 481 | 639 | if (ret) { |
|---|
| 482 | 640 | DRM_ERROR("amdgpu: failed to schedule IB.\n"); |
|---|
| 483 | 641 | goto err_ib_sched; |
|---|
| .. | .. |
|---|
| 496 | 654 | { |
|---|
| 497 | 655 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 498 | 656 | |
|---|
| 499 | | - if (adev->powerplay.pp_funcs && |
|---|
| 500 | | - adev->powerplay.pp_funcs->switch_power_profile) |
|---|
| 501 | | - amdgpu_dpm_switch_power_profile(adev, |
|---|
| 502 | | - PP_SMC_POWER_PROFILE_COMPUTE, |
|---|
| 503 | | - !idle); |
|---|
| 657 | + amdgpu_dpm_switch_power_profile(adev, |
|---|
| 658 | + PP_SMC_POWER_PROFILE_COMPUTE, |
|---|
| 659 | + !idle); |
|---|
| 504 | 660 | } |
|---|
| 505 | 661 | |
|---|
| 506 | 662 | bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) |
|---|
| 507 | 663 | { |
|---|
| 508 | | - if (adev->kfd) { |
|---|
| 509 | | - if ((1 << vmid) & compute_vmid_bitmap) |
|---|
| 510 | | - return true; |
|---|
| 664 | + if (adev->kfd.dev) |
|---|
| 665 | + return vmid >= adev->vm_manager.first_kfd_vmid; |
|---|
| 666 | + |
|---|
| 667 | + return false; |
|---|
| 668 | +} |
|---|
| 669 | + |
|---|
| 670 | +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) |
|---|
| 671 | +{ |
|---|
| 672 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 673 | + |
|---|
| 674 | + if (adev->family == AMDGPU_FAMILY_AI) { |
|---|
| 675 | + int i; |
|---|
| 676 | + |
|---|
| 677 | + for (i = 0; i < adev->num_vmhubs; i++) |
|---|
| 678 | + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); |
|---|
| 679 | + } else { |
|---|
| 680 | + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); |
|---|
| 511 | 681 | } |
|---|
| 512 | 682 | |
|---|
| 513 | | - return false; |
|---|
| 514 | | -} |
|---|
| 515 | | - |
|---|
| 516 | | -#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) |
|---|
| 517 | | -bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) |
|---|
| 518 | | -{ |
|---|
| 519 | | - return false; |
|---|
| 520 | | -} |
|---|
| 521 | | - |
|---|
| 522 | | -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) |
|---|
| 523 | | -{ |
|---|
| 524 | | -} |
|---|
| 525 | | - |
|---|
| 526 | | -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, |
|---|
| 527 | | - struct amdgpu_vm *vm) |
|---|
| 528 | | -{ |
|---|
| 529 | | -} |
|---|
| 530 | | - |
|---|
| 531 | | -struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) |
|---|
| 532 | | -{ |
|---|
| 533 | | - return NULL; |
|---|
| 534 | | -} |
|---|
| 535 | | - |
|---|
| 536 | | -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) |
|---|
| 537 | | -{ |
|---|
| 538 | 683 | return 0; |
|---|
| 539 | 684 | } |
|---|
| 540 | 685 | |
|---|
| 541 | | -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) |
|---|
| 686 | +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) |
|---|
| 542 | 687 | { |
|---|
| 543 | | - return NULL; |
|---|
| 688 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 689 | + const uint32_t flush_type = 0; |
|---|
| 690 | + bool all_hub = false; |
|---|
| 691 | + |
|---|
| 692 | + if (adev->family == AMDGPU_FAMILY_AI || |
|---|
| 693 | + adev->family == AMDGPU_FAMILY_RV) |
|---|
| 694 | + all_hub = true; |
|---|
| 695 | + |
|---|
| 696 | + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); |
|---|
| 544 | 697 | } |
|---|
| 545 | 698 | |
|---|
| 546 | | -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) |
|---|
| 699 | +bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) |
|---|
| 547 | 700 | { |
|---|
| 548 | | - return NULL; |
|---|
| 549 | | -} |
|---|
| 701 | + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
|---|
| 550 | 702 | |
|---|
| 551 | | -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) |
|---|
| 552 | | -{ |
|---|
| 553 | | - return NULL; |
|---|
| 703 | + return adev->have_atomics_support; |
|---|
| 554 | 704 | } |
|---|
| 555 | | -#endif |
|---|