.. | .. |
---|
34 | 34 | |
---|
35 | 35 | struct pci_dev; |
---|
36 | 36 | |
---|
37 | | -#define KFD_INTERFACE_VERSION 2 |
---|
38 | 37 | #define KGD_MAX_QUEUES 128 |
---|
39 | 38 | |
---|
40 | 39 | struct kfd_dev; |
---|
.. | .. |
---|
86 | 85 | KGD_POOL_FRAMEBUFFER = 3, |
---|
87 | 86 | }; |
---|
88 | 87 | |
---|
89 | | -enum kgd_engine_type { |
---|
90 | | - KGD_ENGINE_PFP = 1, |
---|
91 | | - KGD_ENGINE_ME, |
---|
92 | | - KGD_ENGINE_CE, |
---|
93 | | - KGD_ENGINE_MEC1, |
---|
94 | | - KGD_ENGINE_MEC2, |
---|
95 | | - KGD_ENGINE_RLC, |
---|
96 | | - KGD_ENGINE_SDMA1, |
---|
97 | | - KGD_ENGINE_SDMA2, |
---|
98 | | - KGD_ENGINE_MAX |
---|
| 88 | +/** |
---|
| 89 | + * enum kfd_sched_policy |
---|
| 90 | + * |
---|
| 91 | + * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) |
---|
| 92 | + * scheduling. In this scheduling mode we're using the firmware code to |
---|
| 93 | + * schedule the user mode queues and kernel queues such as HIQ and DIQ. |
---|
| 94 | + * the HIQ queue is used as a special queue that dispatches the configuration |
---|
| 95 | + * to the cp and the user mode queues list that are currently running. |
---|
| 96 | + * the DIQ queue is a debugging queue that dispatches debugging commands to the |
---|
| 97 | + * firmware. |
---|
| 98 | + * in this scheduling mode user mode queues over subscription feature is |
---|
| 99 | + * enabled. |
---|
| 100 | + * |
---|
| 101 | + * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over |
---|
| 102 | + * subscription feature disabled. |
---|
| 103 | + * |
---|
| 104 | + * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly |
---|
| 105 | + * set the command processor registers and sets the queues "manually". This |
---|
| 106 | + * mode is used *ONLY* for debugging proposes. |
---|
| 107 | + * |
---|
| 108 | + */ |
---|
| 109 | +enum kfd_sched_policy { |
---|
| 110 | + KFD_SCHED_POLICY_HWS = 0, |
---|
| 111 | + KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, |
---|
| 112 | + KFD_SCHED_POLICY_NO_HWS |
---|
99 | 113 | }; |
---|
100 | 114 | |
---|
101 | 115 | struct kgd2kfd_shared_resources { |
---|
.. | .. |
---|
109 | 123 | uint32_t num_queue_per_pipe; |
---|
110 | 124 | |
---|
111 | 125 | /* Bit n == 1 means Queue n is available for KFD */ |
---|
112 | | - DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); |
---|
| 126 | + DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); |
---|
113 | 127 | |
---|
114 | | - /* Doorbell assignments (SOC15 and later chips only). Only |
---|
| 128 | + /* SDMA doorbell assignments (SOC15 and later chips only). Only |
---|
115 | 129 | * specific doorbells are routed to each SDMA engine. Others |
---|
116 | 130 | * are routed to IH and VCN. They are not usable by the CP. |
---|
117 | | - * |
---|
118 | | - * Any doorbell number D that satisfies the following condition |
---|
119 | | - * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val |
---|
120 | | - * |
---|
121 | | - * KFD currently uses 1024 (= 0x3ff) doorbells per process. If |
---|
122 | | - * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means |
---|
123 | | - * mask would be set to 0x1f8 and val set to 0x0f0. |
---|
124 | 131 | */ |
---|
125 | | - unsigned int sdma_doorbell[2][2]; |
---|
126 | | - unsigned int reserved_doorbell_mask; |
---|
127 | | - unsigned int reserved_doorbell_val; |
---|
| 132 | + uint32_t *sdma_doorbell_idx; |
---|
| 133 | + |
---|
| 134 | + /* From SOC15 onward, the doorbell index range not usable for CP |
---|
| 135 | + * queues. |
---|
| 136 | + */ |
---|
| 137 | + uint32_t non_cp_doorbells_start; |
---|
| 138 | + uint32_t non_cp_doorbells_end; |
---|
128 | 139 | |
---|
129 | 140 | /* Base address of doorbell aperture. */ |
---|
130 | 141 | phys_addr_t doorbell_physical_address; |
---|
.. | .. |
---|
140 | 151 | |
---|
141 | 152 | /* Minor device number of the render node */ |
---|
142 | 153 | int drm_render_minor; |
---|
| 154 | + |
---|
143 | 155 | }; |
---|
144 | 156 | |
---|
145 | 157 | struct tile_config { |
---|
.. | .. |
---|
153 | 165 | uint32_t num_ranks; |
---|
154 | 166 | }; |
---|
155 | 167 | |
---|
156 | | - |
---|
157 | | -/* |
---|
158 | | - * Allocation flag domains |
---|
159 | | - * NOTE: This must match the corresponding definitions in kfd_ioctl.h. |
---|
160 | | - */ |
---|
161 | | -#define ALLOC_MEM_FLAGS_VRAM (1 << 0) |
---|
162 | | -#define ALLOC_MEM_FLAGS_GTT (1 << 1) |
---|
163 | | -#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */ |
---|
164 | | -#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */ |
---|
165 | | - |
---|
166 | | -/* |
---|
167 | | - * Allocation flags attributes/access options. |
---|
168 | | - * NOTE: This must match the corresponding definitions in kfd_ioctl.h. |
---|
169 | | - */ |
---|
170 | | -#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) |
---|
171 | | -#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) |
---|
172 | | -#define ALLOC_MEM_FLAGS_PUBLIC (1 << 29) |
---|
173 | | -#define ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) /* TODO */ |
---|
174 | | -#define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) |
---|
175 | | -#define ALLOC_MEM_FLAGS_COHERENT (1 << 26) /* For GFXv9 or later */ |
---|
| 168 | +#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096 |
---|
176 | 169 | |
---|
177 | 170 | /** |
---|
178 | 171 | * struct kfd2kgd_calls |
---|
179 | | - * |
---|
180 | | - * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture. |
---|
181 | | - * The buffer can be used for mqds, hpds, kernel queue, fence and runlists |
---|
182 | | - * |
---|
183 | | - * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture |
---|
184 | | - * |
---|
185 | | - * @get_local_mem_info: Retrieves information about GPU local memory |
---|
186 | | - * |
---|
187 | | - * @get_gpu_clock_counter: Retrieves GPU clock counter |
---|
188 | | - * |
---|
189 | | - * @get_max_engine_clock_in_mhz: Retrieves maximum GPU clock in MHz |
---|
190 | | - * |
---|
191 | | - * @alloc_pasid: Allocate a PASID |
---|
192 | | - * @free_pasid: Free a PASID |
---|
193 | 172 | * |
---|
194 | 173 | * @program_sh_mem_settings: A function that should initiate the memory |
---|
195 | 174 | * properties such as main aperture memory type (cache / non cached) and |
---|
.. | .. |
---|
220 | 199 | * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that |
---|
221 | 200 | * SDMA hqd slot. |
---|
222 | 201 | * |
---|
223 | | - * @get_fw_version: Returns FW versions from the header |
---|
224 | | - * |
---|
225 | 202 | * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID. |
---|
226 | 203 | * Only used for no cp scheduling mode |
---|
227 | 204 | * |
---|
228 | | - * @get_tile_config: Returns GPU-specific tiling mode information |
---|
229 | | - * |
---|
230 | | - * @get_cu_info: Retrieves activated cu info |
---|
231 | | - * |
---|
232 | | - * @get_vram_usage: Returns current VRAM usage |
---|
233 | | - * |
---|
234 | | - * @create_process_vm: Create a VM address space for a given process and GPU |
---|
235 | | - * |
---|
236 | | - * @destroy_process_vm: Destroy a VM |
---|
237 | | - * |
---|
238 | | - * @get_process_page_dir: Get physical address of a VM page directory |
---|
239 | | - * |
---|
240 | 205 | * @set_vm_context_page_table_base: Program page table base for a VMID |
---|
241 | | - * |
---|
242 | | - * @alloc_memory_of_gpu: Allocate GPUVM memory |
---|
243 | | - * |
---|
244 | | - * @free_memory_of_gpu: Free GPUVM memory |
---|
245 | | - * |
---|
246 | | - * @map_memory_to_gpu: Map GPUVM memory into a specific VM address |
---|
247 | | - * space. Allocates and updates page tables and page directories as |
---|
248 | | - * needed. This function may return before all page table updates have |
---|
249 | | - * completed. This allows multiple map operations (on multiple GPUs) |
---|
250 | | - * to happen concurrently. Use sync_memory to synchronize with all |
---|
251 | | - * pending updates. |
---|
252 | | - * |
---|
253 | | - * @unmap_memor_to_gpu: Unmap GPUVM memory from a specific VM address space |
---|
254 | | - * |
---|
255 | | - * @sync_memory: Wait for pending page table updates to complete |
---|
256 | | - * |
---|
257 | | - * @map_gtt_bo_to_kernel: Map a GTT BO for kernel access |
---|
258 | | - * Pins the BO, maps it to kernel address space. Such BOs are never evicted. |
---|
259 | | - * The kernel virtual address remains valid until the BO is freed. |
---|
260 | | - * |
---|
261 | | - * @restore_process_bos: Restore all BOs that belong to the |
---|
262 | | - * process. This is intended for restoring memory mappings after a TTM |
---|
263 | | - * eviction. |
---|
264 | 206 | * |
---|
265 | 207 | * @invalidate_tlbs: Invalidate TLBs for a specific PASID |
---|
266 | 208 | * |
---|
267 | 209 | * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID |
---|
268 | 210 | * |
---|
269 | | - * @submit_ib: Submits an IB to the engine specified by inserting the |
---|
270 | | - * IB to the corresponding ring (ring type). The IB is executed with the |
---|
271 | | - * specified VMID in a user mode context. |
---|
272 | | - * |
---|
273 | | - * @get_vm_fault_info: Return information about a recent VM fault on |
---|
274 | | - * GFXv7 and v8. If multiple VM faults occurred since the last call of |
---|
275 | | - * this function, it will return information about the first of those |
---|
276 | | - * faults. On GFXv9 VM fault information is fully contained in the IH |
---|
277 | | - * packet and this function is not needed. |
---|
278 | | - * |
---|
279 | 211 | * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the |
---|
280 | 212 | * IH ring entry. This function allows the KFD ISR to get the VMID |
---|
281 | 213 | * from the fault status register as early as possible. |
---|
282 | 214 | * |
---|
283 | | - * @gpu_recover: let kgd reset gpu after kfd detect CPC hang |
---|
284 | | - * |
---|
285 | | - * @set_compute_idle: Indicates that compute is idle on a device. This |
---|
286 | | - * can be used to change power profiles depending on compute activity. |
---|
| 215 | + * @get_cu_occupancy: Function pointer that returns to caller the number |
---|
| 216 | + * of wave fronts that are in flight for all of the queues of a process |
---|
| 217 | + * as identified by its pasid. It is important to note that the value |
---|
| 218 | + * returned by this function is a snapshot of current moment and cannot |
---|
| 219 | + * guarantee any minimum for the number of waves in-flight. This function |
---|
| 220 | + * is defined for devices that belong to GFX9 and later GFX families. Care |
---|
| 221 | + * must be taken in calling this function as it is not defined for devices |
---|
| 222 | + * that belong to GFX8 and below GFX families. |
---|
287 | 223 | * |
---|
288 | 224 | * This structure contains function pointers to services that the kgd driver |
---|
289 | 225 | * provides to amdkfd driver. |
---|
290 | 226 | * |
---|
291 | 227 | */ |
---|
292 | 228 | struct kfd2kgd_calls { |
---|
293 | | - int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size, |
---|
294 | | - void **mem_obj, uint64_t *gpu_addr, |
---|
295 | | - void **cpu_ptr, bool mqd_gfx9); |
---|
296 | | - |
---|
297 | | - void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); |
---|
298 | | - |
---|
299 | | - void (*get_local_mem_info)(struct kgd_dev *kgd, |
---|
300 | | - struct kfd_local_mem_info *mem_info); |
---|
301 | | - uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); |
---|
302 | | - |
---|
303 | | - uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); |
---|
304 | | - |
---|
305 | | - int (*alloc_pasid)(unsigned int bits); |
---|
306 | | - void (*free_pasid)(unsigned int pasid); |
---|
307 | | - |
---|
308 | 229 | /* Register access functions */ |
---|
309 | 230 | void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid, |
---|
310 | 231 | uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, |
---|
311 | 232 | uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); |
---|
312 | 233 | |
---|
313 | | - int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, |
---|
| 234 | + int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, u32 pasid, |
---|
314 | 235 | unsigned int vmid); |
---|
315 | 236 | |
---|
316 | 237 | int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); |
---|
.. | .. |
---|
319 | 240 | uint32_t queue_id, uint32_t __user *wptr, |
---|
320 | 241 | uint32_t wptr_shift, uint32_t wptr_mask, |
---|
321 | 242 | struct mm_struct *mm); |
---|
| 243 | + |
---|
| 244 | + int (*hiq_mqd_load)(struct kgd_dev *kgd, void *mqd, |
---|
| 245 | + uint32_t pipe_id, uint32_t queue_id, |
---|
| 246 | + uint32_t doorbell_off); |
---|
322 | 247 | |
---|
323 | 248 | int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, |
---|
324 | 249 | uint32_t __user *wptr, struct mm_struct *mm); |
---|
.. | .. |
---|
355 | 280 | uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, |
---|
356 | 281 | unsigned int watch_point_id, |
---|
357 | 282 | unsigned int reg_offset); |
---|
358 | | - bool (*get_atc_vmid_pasid_mapping_valid)( |
---|
| 283 | + bool (*get_atc_vmid_pasid_mapping_info)( |
---|
359 | 284 | struct kgd_dev *kgd, |
---|
360 | | - uint8_t vmid); |
---|
361 | | - uint16_t (*get_atc_vmid_pasid_mapping_pasid)( |
---|
362 | | - struct kgd_dev *kgd, |
---|
363 | | - uint8_t vmid); |
---|
| 285 | + uint8_t vmid, |
---|
| 286 | + uint16_t *p_pasid); |
---|
364 | 287 | |
---|
365 | | - uint16_t (*get_fw_version)(struct kgd_dev *kgd, |
---|
366 | | - enum kgd_engine_type type); |
---|
| 288 | + /* No longer needed from GFXv9 onward. The scratch base address is |
---|
| 289 | + * passed to the shader by the CP. It's the user mode driver's |
---|
| 290 | + * responsibility. |
---|
| 291 | + */ |
---|
367 | 292 | void (*set_scratch_backing_va)(struct kgd_dev *kgd, |
---|
368 | 293 | uint64_t va, uint32_t vmid); |
---|
369 | | - int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); |
---|
370 | 294 | |
---|
371 | | - void (*get_cu_info)(struct kgd_dev *kgd, |
---|
372 | | - struct kfd_cu_info *cu_info); |
---|
373 | | - uint64_t (*get_vram_usage)(struct kgd_dev *kgd); |
---|
374 | | - |
---|
375 | | - int (*create_process_vm)(struct kgd_dev *kgd, void **vm, |
---|
376 | | - void **process_info, struct dma_fence **ef); |
---|
377 | | - int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp, |
---|
378 | | - void **vm, void **process_info, struct dma_fence **ef); |
---|
379 | | - void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); |
---|
380 | | - uint32_t (*get_process_page_dir)(void *vm); |
---|
381 | 295 | void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, |
---|
382 | | - uint32_t vmid, uint32_t page_table_base); |
---|
383 | | - int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va, |
---|
384 | | - uint64_t size, void *vm, |
---|
385 | | - struct kgd_mem **mem, uint64_t *offset, |
---|
386 | | - uint32_t flags); |
---|
387 | | - int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem); |
---|
388 | | - int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, |
---|
389 | | - void *vm); |
---|
390 | | - int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, |
---|
391 | | - void *vm); |
---|
392 | | - int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); |
---|
393 | | - int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, struct kgd_mem *mem, |
---|
394 | | - void **kptr, uint64_t *size); |
---|
395 | | - int (*restore_process_bos)(void *process_info, struct dma_fence **ef); |
---|
396 | | - |
---|
397 | | - int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); |
---|
398 | | - int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); |
---|
399 | | - |
---|
400 | | - int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, |
---|
401 | | - uint32_t vmid, uint64_t gpu_addr, |
---|
402 | | - uint32_t *ib_cmd, uint32_t ib_len); |
---|
403 | | - |
---|
404 | | - int (*get_vm_fault_info)(struct kgd_dev *kgd, |
---|
405 | | - struct kfd_vm_fault_info *info); |
---|
| 296 | + uint32_t vmid, uint64_t page_table_base); |
---|
406 | 297 | uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); |
---|
407 | 298 | |
---|
408 | | - void (*gpu_recover)(struct kgd_dev *kgd); |
---|
409 | | - |
---|
410 | | - void (*set_compute_idle)(struct kgd_dev *kgd, bool idle); |
---|
| 299 | + void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt, |
---|
| 300 | + int *max_waves_per_cu); |
---|
411 | 301 | }; |
---|
412 | | - |
---|
413 | | -/** |
---|
414 | | - * struct kgd2kfd_calls |
---|
415 | | - * |
---|
416 | | - * @exit: Notifies amdkfd that kgd module is unloaded |
---|
417 | | - * |
---|
418 | | - * @probe: Notifies amdkfd about a probe done on a device in the kgd driver. |
---|
419 | | - * |
---|
420 | | - * @device_init: Initialize the newly probed device (if it is a device that |
---|
421 | | - * amdkfd supports) |
---|
422 | | - * |
---|
423 | | - * @device_exit: Notifies amdkfd about a removal of a kgd device |
---|
424 | | - * |
---|
425 | | - * @suspend: Notifies amdkfd about a suspend action done to a kgd device |
---|
426 | | - * |
---|
427 | | - * @resume: Notifies amdkfd about a resume action done to a kgd device |
---|
428 | | - * |
---|
429 | | - * @quiesce_mm: Quiesce all user queue access to specified MM address space |
---|
430 | | - * |
---|
431 | | - * @resume_mm: Resume user queue access to specified MM address space |
---|
432 | | - * |
---|
433 | | - * @schedule_evict_and_restore_process: Schedules work queue that will prepare |
---|
434 | | - * for safe eviction of KFD BOs that belong to the specified process. |
---|
435 | | - * |
---|
436 | | - * @pre_reset: Notifies amdkfd that amdgpu about to reset the gpu |
---|
437 | | - * |
---|
438 | | - * @post_reset: Notify amdkfd that amgpu successfully reseted the gpu |
---|
439 | | - * |
---|
440 | | - * This structure contains function callback pointers so the kgd driver |
---|
441 | | - * will notify to the amdkfd about certain status changes. |
---|
442 | | - * |
---|
443 | | - */ |
---|
444 | | -struct kgd2kfd_calls { |
---|
445 | | - void (*exit)(void); |
---|
446 | | - struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev, |
---|
447 | | - const struct kfd2kgd_calls *f2g); |
---|
448 | | - bool (*device_init)(struct kfd_dev *kfd, |
---|
449 | | - const struct kgd2kfd_shared_resources *gpu_resources); |
---|
450 | | - void (*device_exit)(struct kfd_dev *kfd); |
---|
451 | | - void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); |
---|
452 | | - void (*suspend)(struct kfd_dev *kfd); |
---|
453 | | - int (*resume)(struct kfd_dev *kfd); |
---|
454 | | - int (*quiesce_mm)(struct mm_struct *mm); |
---|
455 | | - int (*resume_mm)(struct mm_struct *mm); |
---|
456 | | - int (*schedule_evict_and_restore_process)(struct mm_struct *mm, |
---|
457 | | - struct dma_fence *fence); |
---|
458 | | - int (*pre_reset)(struct kfd_dev *kfd); |
---|
459 | | - int (*post_reset)(struct kfd_dev *kfd); |
---|
460 | | -}; |
---|
461 | | - |
---|
462 | | -int kgd2kfd_init(unsigned interface_version, |
---|
463 | | - const struct kgd2kfd_calls **g2f); |
---|
464 | 302 | |
---|
465 | 303 | #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ |
---|