.. | .. |
---|
35 | 35 | #include <linux/kfifo.h> |
---|
36 | 36 | #include <linux/seq_file.h> |
---|
37 | 37 | #include <linux/kref.h> |
---|
| 38 | +#include <linux/sysfs.h> |
---|
| 39 | +#include <linux/device_cgroup.h> |
---|
| 40 | +#include <drm/drm_file.h> |
---|
| 41 | +#include <drm/drm_drv.h> |
---|
| 42 | +#include <drm/drm_device.h> |
---|
| 43 | +#include <drm/drm_ioctl.h> |
---|
38 | 44 | #include <kgd_kfd_interface.h> |
---|
| 45 | +#include <linux/swap.h> |
---|
39 | 46 | |
---|
40 | 47 | #include "amd_shared.h" |
---|
41 | 48 | |
---|
.. | .. |
---|
54 | 61 | * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these |
---|
55 | 62 | * defines are w.r.t to PAGE_SIZE |
---|
56 | 63 | */ |
---|
57 | | -#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) |
---|
| 64 | +#define KFD_MMAP_TYPE_SHIFT 62 |
---|
58 | 65 | #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) |
---|
59 | 66 | #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) |
---|
60 | 67 | #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) |
---|
61 | 68 | #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) |
---|
| 69 | +#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT) |
---|
62 | 70 | |
---|
63 | | -#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) |
---|
| 71 | +#define KFD_MMAP_GPU_ID_SHIFT 46 |
---|
64 | 72 | #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ |
---|
65 | 73 | << KFD_MMAP_GPU_ID_SHIFT) |
---|
66 | 74 | #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ |
---|
67 | 75 | & KFD_MMAP_GPU_ID_MASK) |
---|
68 | | -#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ |
---|
| 76 | +#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ |
---|
69 | 77 | >> KFD_MMAP_GPU_ID_SHIFT) |
---|
70 | | - |
---|
71 | | -#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) |
---|
72 | | -#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) |
---|
73 | 78 | |
---|
74 | 79 | /* |
---|
75 | 80 | * When working with cp scheduler we should assign the HIQ manually or via |
---|
.. | .. |
---|
92 | 97 | * Size of the per-process TBA+TMA buffer: 2 pages |
---|
93 | 98 | * |
---|
94 | 99 | * The first page is the TBA used for the CWSR ISA code. The second |
---|
95 | | - * page is used as TMA for daisy changing a user-mode trap handler. |
---|
| 100 | + * page is used as TMA for user-mode trap handler setup in daisy-chain mode. |
---|
96 | 101 | */ |
---|
97 | 102 | #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) |
---|
98 | 103 | #define KFD_CWSR_TMA_OFFSET PAGE_SIZE |
---|
| 104 | + |
---|
| 105 | +#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ |
---|
| 106 | + (KFD_MAX_NUM_OF_PROCESSES * \ |
---|
| 107 | + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) |
---|
| 108 | + |
---|
| 109 | +#define KFD_KERNEL_QUEUE_SIZE 2048 |
---|
| 110 | + |
---|
| 111 | +#define KFD_UNMAP_LATENCY_MS (4000) |
---|
| 112 | + |
---|
| 113 | +/* |
---|
| 114 | + * 512 = 0x200 |
---|
| 115 | + * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the |
---|
| 116 | + * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA. |
---|
| 117 | + * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC |
---|
| 118 | + * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in |
---|
| 119 | + * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE. |
---|
| 120 | + */ |
---|
| 121 | +#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 |
---|
| 122 | + |
---|
99 | 123 | |
---|
100 | 124 | /* |
---|
101 | 125 | * Kernel module parameter to specify maximum number of supported queues per |
---|
.. | .. |
---|
103 | 127 | */ |
---|
104 | 128 | extern int max_num_of_queues_per_device; |
---|
105 | 129 | |
---|
106 | | -#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096 |
---|
107 | | -#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ |
---|
108 | | - (KFD_MAX_NUM_OF_PROCESSES * \ |
---|
109 | | - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) |
---|
110 | | - |
---|
111 | | -#define KFD_KERNEL_QUEUE_SIZE 2048 |
---|
112 | 130 | |
---|
113 | 131 | /* Kernel module parameter to specify the scheduling policy */ |
---|
114 | 132 | extern int sched_policy; |
---|
.. | .. |
---|
139 | 157 | */ |
---|
140 | 158 | extern int ignore_crat; |
---|
141 | 159 | |
---|
142 | | -/* |
---|
143 | | - * Set sh_mem_config.retry_disable on Vega10 |
---|
144 | | - */ |
---|
145 | | -extern int noretry; |
---|
| 160 | +/* Set sh_mem_config.retry_disable on GFX v9 */ |
---|
| 161 | +extern int amdgpu_noretry; |
---|
146 | 162 | |
---|
147 | | -/* |
---|
148 | | - * Halt if HWS hang is detected |
---|
149 | | - */ |
---|
| 163 | +/* Halt if HWS hang is detected */ |
---|
150 | 164 | extern int halt_if_hws_hang; |
---|
151 | 165 | |
---|
152 | | -/** |
---|
153 | | - * enum kfd_sched_policy |
---|
154 | | - * |
---|
155 | | - * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) |
---|
156 | | - * scheduling. In this scheduling mode we're using the firmware code to |
---|
157 | | - * schedule the user mode queues and kernel queues such as HIQ and DIQ. |
---|
158 | | - * the HIQ queue is used as a special queue that dispatches the configuration |
---|
159 | | - * to the cp and the user mode queues list that are currently running. |
---|
160 | | - * the DIQ queue is a debugging queue that dispatches debugging commands to the |
---|
161 | | - * firmware. |
---|
162 | | - * in this scheduling mode user mode queues over subscription feature is |
---|
163 | | - * enabled. |
---|
164 | | - * |
---|
165 | | - * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over |
---|
166 | | - * subscription feature disabled. |
---|
167 | | - * |
---|
168 | | - * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly |
---|
169 | | - * set the command processor registers and sets the queues "manually". This |
---|
170 | | - * mode is used *ONLY* for debugging proposes. |
---|
171 | | - * |
---|
172 | | - */ |
---|
173 | | -enum kfd_sched_policy { |
---|
174 | | - KFD_SCHED_POLICY_HWS = 0, |
---|
175 | | - KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, |
---|
176 | | - KFD_SCHED_POLICY_NO_HWS |
---|
177 | | -}; |
---|
| 166 | +/* Whether MEC FW support GWS barriers */ |
---|
| 167 | +extern bool hws_gws_support; |
---|
| 168 | + |
---|
| 169 | +/* Queue preemption timeout in ms */ |
---|
| 170 | +extern int queue_preemption_timeout_ms; |
---|
| 171 | + |
---|
| 172 | +/* Enable eviction debug messages */ |
---|
| 173 | +extern bool debug_evictions; |
---|
178 | 174 | |
---|
179 | 175 | enum cache_policy { |
---|
180 | 176 | cache_policy_coherent, |
---|
.. | .. |
---|
193 | 189 | |
---|
194 | 190 | struct kfd_device_info { |
---|
195 | 191 | enum amd_asic_type asic_family; |
---|
| 192 | + const char *asic_name; |
---|
196 | 193 | const struct kfd_event_interrupt_class *event_interrupt_class; |
---|
197 | 194 | unsigned int max_pasid_bits; |
---|
198 | 195 | unsigned int max_no_of_hqd; |
---|
.. | .. |
---|
204 | 201 | bool needs_iommu_device; |
---|
205 | 202 | bool needs_pci_atomics; |
---|
206 | 203 | unsigned int num_sdma_engines; |
---|
| 204 | + unsigned int num_xgmi_sdma_engines; |
---|
| 205 | + unsigned int num_sdma_queues_per_engine; |
---|
207 | 206 | }; |
---|
208 | 207 | |
---|
209 | 208 | struct kfd_mem_obj { |
---|
.. | .. |
---|
225 | 224 | |
---|
226 | 225 | const struct kfd_device_info *device_info; |
---|
227 | 226 | struct pci_dev *pdev; |
---|
| 227 | + struct drm_device *ddev; |
---|
228 | 228 | |
---|
229 | 229 | unsigned int id; /* topology stub index */ |
---|
230 | 230 | |
---|
.. | .. |
---|
232 | 232 | * KFD. It is aligned for mapping |
---|
233 | 233 | * into user mode |
---|
234 | 234 | */ |
---|
235 | | - size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell |
---|
236 | | - * to HW doorbell, GFX reserved some |
---|
237 | | - * at the start) |
---|
| 235 | + size_t doorbell_base_dw_offset; /* Offset from the start of the PCI |
---|
| 236 | + * doorbell BAR to the first KFD |
---|
| 237 | + * doorbell in dwords. GFX reserves |
---|
| 238 | + * the segment before this offset. |
---|
238 | 239 | */ |
---|
239 | 240 | u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells |
---|
240 | 241 | * page used by kernel queue |
---|
.. | .. |
---|
273 | 274 | bool interrupts_active; |
---|
274 | 275 | |
---|
275 | 276 | /* Debug manager */ |
---|
276 | | - struct kfd_dbgmgr *dbgmgr; |
---|
| 277 | + struct kfd_dbgmgr *dbgmgr; |
---|
| 278 | + |
---|
| 279 | + /* Firmware versions */ |
---|
| 280 | + uint16_t mec_fw_version; |
---|
| 281 | + uint16_t mec2_fw_version; |
---|
| 282 | + uint16_t sdma_fw_version; |
---|
277 | 283 | |
---|
278 | 284 | /* Maximum process number mapped to HW scheduler */ |
---|
279 | 285 | unsigned int max_proc_per_quantum; |
---|
.. | .. |
---|
282 | 288 | bool cwsr_enabled; |
---|
283 | 289 | const void *cwsr_isa; |
---|
284 | 290 | unsigned int cwsr_isa_size; |
---|
285 | | -}; |
---|
286 | 291 | |
---|
287 | | -/* KGD2KFD callbacks */ |
---|
288 | | -void kgd2kfd_exit(void); |
---|
289 | | -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, |
---|
290 | | - struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); |
---|
291 | | -bool kgd2kfd_device_init(struct kfd_dev *kfd, |
---|
292 | | - const struct kgd2kfd_shared_resources *gpu_resources); |
---|
293 | | -void kgd2kfd_device_exit(struct kfd_dev *kfd); |
---|
| 292 | + /* xGMI */ |
---|
| 293 | + uint64_t hive_id; |
---|
| 294 | + |
---|
| 295 | + /* UUID */ |
---|
| 296 | + uint64_t unique_id; |
---|
| 297 | + |
---|
| 298 | + bool pci_atomic_requested; |
---|
| 299 | + |
---|
| 300 | + /* Use IOMMU v2 flag */ |
---|
| 301 | + bool use_iommu_v2; |
---|
| 302 | + |
---|
| 303 | + /* SRAM ECC flag */ |
---|
| 304 | + atomic_t sram_ecc_flag; |
---|
| 305 | + |
---|
| 306 | + /* Compute Profile ref. count */ |
---|
| 307 | + atomic_t compute_profile; |
---|
| 308 | + |
---|
| 309 | + /* Global GWS resource shared between processes */ |
---|
| 310 | + void *gws; |
---|
| 311 | + |
---|
| 312 | + /* Clients watching SMI events */ |
---|
| 313 | + struct list_head smi_clients; |
---|
| 314 | + spinlock_t smi_lock; |
---|
| 315 | + |
---|
| 316 | + uint32_t reset_seq_num; |
---|
| 317 | + |
---|
| 318 | + struct ida doorbell_ida; |
---|
| 319 | + unsigned int max_doorbell_slices; |
---|
| 320 | + |
---|
| 321 | + int noretry; |
---|
| 322 | +}; |
---|
294 | 323 | |
---|
295 | 324 | enum kfd_mempool { |
---|
296 | 325 | KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, |
---|
.. | .. |
---|
304 | 333 | struct device *kfd_chardev(void); |
---|
305 | 334 | |
---|
306 | 335 | /** |
---|
307 | | - * enum kfd_unmap_queues_filter |
---|
| 336 | + * enum kfd_unmap_queues_filter - Enum for queue filters. |
---|
308 | 337 | * |
---|
309 | 338 | * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue. |
---|
310 | 339 | * |
---|
.. | .. |
---|
323 | 352 | }; |
---|
324 | 353 | |
---|
325 | 354 | /** |
---|
326 | | - * enum kfd_queue_type |
---|
| 355 | + * enum kfd_queue_type - Enum for various queue types. |
---|
327 | 356 | * |
---|
328 | 357 | * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. |
---|
329 | 358 | * |
---|
330 | | - * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. |
---|
| 359 | + * @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type. |
---|
331 | 360 | * |
---|
332 | 361 | * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. |
---|
333 | 362 | * |
---|
334 | 363 | * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. |
---|
| 364 | + * |
---|
| 365 | + * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. |
---|
335 | 366 | */ |
---|
336 | 367 | enum kfd_queue_type { |
---|
337 | 368 | KFD_QUEUE_TYPE_COMPUTE, |
---|
338 | 369 | KFD_QUEUE_TYPE_SDMA, |
---|
339 | 370 | KFD_QUEUE_TYPE_HIQ, |
---|
340 | | - KFD_QUEUE_TYPE_DIQ |
---|
| 371 | + KFD_QUEUE_TYPE_DIQ, |
---|
| 372 | + KFD_QUEUE_TYPE_SDMA_XGMI |
---|
341 | 373 | }; |
---|
342 | 374 | |
---|
343 | 375 | enum kfd_queue_format { |
---|
344 | 376 | KFD_QUEUE_FORMAT_PM4, |
---|
345 | 377 | KFD_QUEUE_FORMAT_AQL |
---|
| 378 | +}; |
---|
| 379 | + |
---|
| 380 | +enum KFD_QUEUE_PRIORITY { |
---|
| 381 | + KFD_QUEUE_PRIORITY_MINIMUM = 0, |
---|
| 382 | + KFD_QUEUE_PRIORITY_MAXIMUM = 15 |
---|
346 | 383 | }; |
---|
347 | 384 | |
---|
348 | 385 | /** |
---|
.. | .. |
---|
371 | 408 | * |
---|
372 | 409 | * @write_ptr: Defines the number of dwords written to the ring buffer. |
---|
373 | 410 | * |
---|
374 | | - * @doorbell_ptr: This field aim is to notify the H/W of new packet written to |
---|
375 | | - * the queue ring buffer. This field should be similar to write_ptr and the |
---|
376 | | - * user should update this field after he updated the write_ptr. |
---|
| 411 | + * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring |
---|
| 412 | + * buffer. This field should be similar to write_ptr and the user should |
---|
| 413 | + * update this field after updating the write_ptr. |
---|
377 | 414 | * |
---|
378 | 415 | * @doorbell_off: The doorbell offset in the doorbell pci-bar. |
---|
379 | 416 | * |
---|
.. | .. |
---|
385 | 422 | * |
---|
386 | 423 | * @is_active: Defines if the queue is active or not. @is_active and |
---|
387 | 424 | * @is_evicted are protected by the DQM lock. |
---|
| 425 | + * |
---|
| 426 | + * @is_gws: Defines if the queue has been updated to be GWS-capable or not. |
---|
| 427 | + * @is_gws should be protected by the DQM lock, since changing it can yield the |
---|
| 428 | + * possibility of updating DQM state on number of GWS queues. |
---|
388 | 429 | * |
---|
389 | 430 | * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid |
---|
390 | 431 | * of the queue. |
---|
.. | .. |
---|
408 | 449 | bool is_interop; |
---|
409 | 450 | bool is_evicted; |
---|
410 | 451 | bool is_active; |
---|
| 452 | + bool is_gws; |
---|
411 | 453 | /* Not relevant for user mode queues in cp scheduling */ |
---|
412 | 454 | unsigned int vmid; |
---|
413 | 455 | /* Relevant only for sdma queues*/ |
---|
.. | .. |
---|
427 | 469 | uint32_t *cu_mask; |
---|
428 | 470 | }; |
---|
429 | 471 | |
---|
| 472 | +#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ |
---|
| 473 | + (q).queue_address != 0 && \ |
---|
| 474 | + (q).queue_percent > 0 && \ |
---|
| 475 | + !(q).is_evicted) |
---|
| 476 | + |
---|
430 | 477 | /** |
---|
431 | 478 | * struct queue |
---|
432 | 479 | * |
---|
433 | 480 | * @list: Queue linked list. |
---|
434 | 481 | * |
---|
435 | | - * @mqd: The queue MQD. |
---|
| 482 | + * @mqd: The queue MQD (memory queue descriptor). |
---|
436 | 483 | * |
---|
437 | 484 | * @mqd_mem_obj: The MQD local gpu memory object. |
---|
438 | 485 | * |
---|
.. | .. |
---|
441 | 488 | * @properties: The queue properties. |
---|
442 | 489 | * |
---|
443 | 490 | * @mec: Used only in no cp scheduling mode and identifies to micro engine id |
---|
444 | | - * that the queue should be execute on. |
---|
| 491 | + * that the queue should be executed on. |
---|
445 | 492 | * |
---|
446 | 493 | * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe |
---|
447 | 494 | * id. |
---|
.. | .. |
---|
451 | 498 | * @process: The kfd process that created this queue. |
---|
452 | 499 | * |
---|
453 | 500 | * @device: The kfd device that created this queue. |
---|
| 501 | + * |
---|
| 502 | + * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL |
---|
| 503 | + * otherwise. |
---|
454 | 504 | * |
---|
455 | 505 | * This structure represents user mode compute queues. |
---|
456 | 506 | * It contains all the necessary data to handle such queues. |
---|
.. | .. |
---|
473 | 523 | |
---|
474 | 524 | struct kfd_process *process; |
---|
475 | 525 | struct kfd_dev *device; |
---|
| 526 | + void *gws; |
---|
| 527 | + |
---|
| 528 | + /* procfs */ |
---|
| 529 | + struct kobject kobj; |
---|
476 | 530 | }; |
---|
477 | 531 | |
---|
478 | | -/* |
---|
479 | | - * Please read the kfd_mqd_manager.h description. |
---|
480 | | - */ |
---|
481 | 532 | enum KFD_MQD_TYPE { |
---|
482 | | - KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */ |
---|
483 | | - KFD_MQD_TYPE_HIQ, /* for hiq */ |
---|
| 533 | + KFD_MQD_TYPE_HIQ = 0, /* for hiq */ |
---|
484 | 534 | KFD_MQD_TYPE_CP, /* for cp queues and diq */ |
---|
485 | 535 | KFD_MQD_TYPE_SDMA, /* for sdma queues */ |
---|
| 536 | + KFD_MQD_TYPE_DIQ, /* for diq */ |
---|
486 | 537 | KFD_MQD_TYPE_MAX |
---|
| 538 | +}; |
---|
| 539 | + |
---|
| 540 | +enum KFD_PIPE_PRIORITY { |
---|
| 541 | + KFD_PIPE_PRIORITY_CS_LOW = 0, |
---|
| 542 | + KFD_PIPE_PRIORITY_CS_MEDIUM, |
---|
| 543 | + KFD_PIPE_PRIORITY_CS_HIGH |
---|
487 | 544 | }; |
---|
488 | 545 | |
---|
489 | 546 | struct scheduling_resources { |
---|
.. | .. |
---|
521 | 578 | */ |
---|
522 | 579 | bool reset_wavefronts; |
---|
523 | 580 | |
---|
524 | | - /* |
---|
525 | | - * All the memory management data should be here too |
---|
| 581 | + /* This flag tells us if this process has a GWS-capable |
---|
| 582 | + * queue that will be mapped into the runlist. It's |
---|
| 583 | + * possible to request a GWS BO, but not have the queue |
---|
| 584 | + * currently mapped, and this changes how the MAP_PROCESS |
---|
| 585 | + * PM4 packet is configured. |
---|
526 | 586 | */ |
---|
| 587 | + bool mapped_gws_queue; |
---|
| 588 | + |
---|
| 589 | + /* All the memory management data should be here too */ |
---|
527 | 590 | uint64_t gds_context_area; |
---|
| 591 | + /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */ |
---|
| 592 | + uint64_t page_table_base; |
---|
528 | 593 | uint32_t sh_mem_config; |
---|
529 | 594 | uint32_t sh_mem_bases; |
---|
530 | 595 | uint32_t sh_mem_ape1_base; |
---|
531 | 596 | uint32_t sh_mem_ape1_limit; |
---|
532 | | - uint32_t page_table_base; |
---|
533 | 597 | uint32_t gds_size; |
---|
534 | 598 | uint32_t num_gws; |
---|
535 | 599 | uint32_t num_oac; |
---|
.. | .. |
---|
558 | 622 | /* Approx. time before evicting the process again */ |
---|
559 | 623 | #define PROCESS_ACTIVE_TIME_MS 10 |
---|
560 | 624 | |
---|
561 | | -int kgd2kfd_quiesce_mm(struct mm_struct *mm); |
---|
562 | | -int kgd2kfd_resume_mm(struct mm_struct *mm); |
---|
563 | | -int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, |
---|
564 | | - struct dma_fence *fence); |
---|
565 | | - |
---|
566 | 625 | /* 8 byte handle containing GPU ID in the most significant 4 bytes and |
---|
567 | 626 | * idr_handle in the least significant 4 bytes |
---|
568 | 627 | */ |
---|
.. | .. |
---|
576 | 635 | PDD_BOUND, |
---|
577 | 636 | PDD_BOUND_SUSPENDED, |
---|
578 | 637 | }; |
---|
| 638 | + |
---|
| 639 | +#define MAX_SYSFS_FILENAME_LEN 15 |
---|
| 640 | + |
---|
| 641 | +/* |
---|
| 642 | + * SDMA counter runs at 100MHz frequency. |
---|
| 643 | + * We display SDMA activity in microsecond granularity in sysfs. |
---|
| 644 | + * As a result, the divisor is 100. |
---|
| 645 | + */ |
---|
| 646 | +#define SDMA_ACTIVITY_DIVISOR 100 |
---|
579 | 647 | |
---|
580 | 648 | /* Data that is per-process-per device. */ |
---|
581 | 649 | struct kfd_process_device { |
---|
.. | .. |
---|
615 | 683 | * function. |
---|
616 | 684 | */ |
---|
617 | 685 | bool already_dequeued; |
---|
| 686 | + bool runtime_inuse; |
---|
618 | 687 | |
---|
619 | 688 | /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ |
---|
620 | 689 | enum kfd_pdd_bound bound; |
---|
| 690 | + |
---|
| 691 | + /* VRAM usage */ |
---|
| 692 | + uint64_t vram_usage; |
---|
| 693 | + struct attribute attr_vram; |
---|
| 694 | + char vram_filename[MAX_SYSFS_FILENAME_LEN]; |
---|
| 695 | + |
---|
| 696 | + /* SDMA activity tracking */ |
---|
| 697 | + uint64_t sdma_past_activity_counter; |
---|
| 698 | + struct attribute attr_sdma; |
---|
| 699 | + char sdma_filename[MAX_SYSFS_FILENAME_LEN]; |
---|
| 700 | + |
---|
| 701 | + /* Eviction activity tracking */ |
---|
| 702 | + uint64_t last_evict_timestamp; |
---|
| 703 | + atomic64_t evict_duration_counter; |
---|
| 704 | + struct attribute attr_evict; |
---|
| 705 | + |
---|
| 706 | + struct kobject *kobj_stats; |
---|
| 707 | + unsigned int doorbell_index; |
---|
| 708 | + |
---|
| 709 | + /* |
---|
| 710 | + * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process |
---|
| 711 | + * that is associated with device encoded by "this" struct instance. The |
---|
| 712 | + * value reflects CU usage by all of the waves launched by this process |
---|
| 713 | + * on this device. A very important property of occupancy parameter is |
---|
| 714 | + * that its value is a snapshot of current use. |
---|
| 715 | + * |
---|
| 716 | + * Following is to be noted regarding how this parameter is reported: |
---|
| 717 | + * |
---|
| 718 | + * The number of waves that a CU can launch is limited by couple of |
---|
| 719 | + * parameters. These are encoded by struct amdgpu_cu_info instance |
---|
| 720 | + * that is part of every device definition. For GFX9 devices this |
---|
| 721 | + * translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves |
---|
| 722 | + * do not use scratch memory and 32 waves (max_scratch_slots_per_cu) |
---|
| 723 | + * when they do use scratch memory. This could change for future |
---|
| 724 | + * devices and therefore this example should be considered as a guide. |
---|
| 725 | + * |
---|
| 726 | + * All CU's of a device are available for the process. This may not be true |
---|
| 727 | + * under certain conditions - e.g. CU masking. |
---|
| 728 | + * |
---|
| 729 | + * Finally number of CU's that are occupied by a process is affected by both |
---|
| 730 | + * number of CU's a device has along with number of other competing processes |
---|
| 731 | + */ |
---|
| 732 | + struct attribute attr_cu_occupancy; |
---|
621 | 733 | }; |
---|
622 | 734 | |
---|
623 | 735 | #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) |
---|
.. | .. |
---|
654 | 766 | /* We want to receive a notification when the mm_struct is destroyed */ |
---|
655 | 767 | struct mmu_notifier mmu_notifier; |
---|
656 | 768 | |
---|
657 | | - /* Use for delayed freeing of kfd_process structure */ |
---|
658 | | - struct rcu_head rcu; |
---|
659 | | - |
---|
660 | | - unsigned int pasid; |
---|
661 | | - unsigned int doorbell_index; |
---|
| 769 | + u32 pasid; |
---|
662 | 770 | |
---|
663 | 771 | /* |
---|
664 | 772 | * List of kfd_process_device structures, |
---|
.. | .. |
---|
698 | 806 | * restored after an eviction |
---|
699 | 807 | */ |
---|
700 | 808 | unsigned long last_restore_timestamp; |
---|
| 809 | + |
---|
| 810 | + /* Kobj for our procfs */ |
---|
| 811 | + struct kobject *kobj; |
---|
| 812 | + struct kobject *kobj_queues; |
---|
| 813 | + struct attribute attr_pasid; |
---|
701 | 814 | }; |
---|
702 | 815 | |
---|
703 | 816 | #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ |
---|
.. | .. |
---|
705 | 818 | extern struct srcu_struct kfd_processes_srcu; |
---|
706 | 819 | |
---|
707 | 820 | /** |
---|
708 | | - * Ioctl function type. |
---|
| 821 | + * typedef amdkfd_ioctl_t - typedef for ioctl function pointer. |
---|
709 | 822 | * |
---|
710 | | - * \param filep pointer to file structure. |
---|
711 | | - * \param p amdkfd process pointer. |
---|
712 | | - * \param data pointer to arg that was copied from user. |
---|
| 823 | + * @filep: pointer to file structure. |
---|
| 824 | + * @p: amdkfd process pointer. |
---|
| 825 | + * @data: pointer to arg that was copied from user. |
---|
| 826 | + * |
---|
| 827 | + * Return: returns ioctl completion code. |
---|
713 | 828 | */ |
---|
714 | 829 | typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, |
---|
715 | 830 | void *data); |
---|
.. | .. |
---|
721 | 836 | unsigned int cmd_drv; |
---|
722 | 837 | const char *name; |
---|
723 | 838 | }; |
---|
| 839 | +bool kfd_dev_is_large_bar(struct kfd_dev *dev); |
---|
724 | 840 | |
---|
725 | 841 | int kfd_process_create_wq(void); |
---|
726 | 842 | void kfd_process_destroy_wq(void); |
---|
727 | 843 | struct kfd_process *kfd_create_process(struct file *filep); |
---|
728 | 844 | struct kfd_process *kfd_get_process(const struct task_struct *); |
---|
729 | | -struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); |
---|
| 845 | +struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); |
---|
730 | 846 | struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); |
---|
731 | 847 | void kfd_unref_process(struct kfd_process *p); |
---|
732 | 848 | int kfd_process_evict_queues(struct kfd_process *p); |
---|
.. | .. |
---|
767 | 883 | void kfd_pasid_exit(void); |
---|
768 | 884 | bool kfd_set_pasid_limit(unsigned int new_limit); |
---|
769 | 885 | unsigned int kfd_get_pasid_limit(void); |
---|
770 | | -unsigned int kfd_pasid_alloc(void); |
---|
771 | | -void kfd_pasid_free(unsigned int pasid); |
---|
| 886 | +u32 kfd_pasid_alloc(void); |
---|
| 887 | +void kfd_pasid_free(u32 pasid); |
---|
772 | 888 | |
---|
773 | 889 | /* Doorbells */ |
---|
774 | 890 | size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); |
---|
.. | .. |
---|
782 | 898 | u32 read_kernel_doorbell(u32 __iomem *db); |
---|
783 | 899 | void write_kernel_doorbell(void __iomem *db, u32 value); |
---|
784 | 900 | void write_kernel_doorbell64(void __iomem *db, u64 value); |
---|
785 | | -unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, |
---|
786 | | - struct kfd_process *process, |
---|
| 901 | +unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, |
---|
| 902 | + struct kfd_process_device *pdd, |
---|
787 | 903 | unsigned int doorbell_id); |
---|
788 | | -phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, |
---|
789 | | - struct kfd_process *process); |
---|
790 | | -int kfd_alloc_process_doorbells(struct kfd_process *process); |
---|
791 | | -void kfd_free_process_doorbells(struct kfd_process *process); |
---|
792 | | - |
---|
| 904 | +phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd); |
---|
| 905 | +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, |
---|
| 906 | + unsigned int *doorbell_index); |
---|
| 907 | +void kfd_free_process_doorbells(struct kfd_dev *kfd, |
---|
| 908 | + unsigned int doorbell_index); |
---|
793 | 909 | /* GTT Sub-Allocator */ |
---|
794 | 910 | |
---|
795 | 911 | int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, |
---|
.. | .. |
---|
798 | 914 | int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj); |
---|
799 | 915 | |
---|
800 | 916 | extern struct device *kfd_device; |
---|
| 917 | + |
---|
| 918 | +/* KFD's procfs */ |
---|
| 919 | +void kfd_procfs_init(void); |
---|
| 920 | +void kfd_procfs_shutdown(void); |
---|
| 921 | +int kfd_procfs_add_queue(struct queue *q); |
---|
| 922 | +void kfd_procfs_del_queue(struct queue *q); |
---|
801 | 923 | |
---|
802 | 924 | /* Topology */ |
---|
803 | 925 | int kfd_topology_init(void); |
---|
.. | .. |
---|
809 | 931 | struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); |
---|
810 | 932 | struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); |
---|
811 | 933 | struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); |
---|
| 934 | +struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); |
---|
812 | 935 | int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); |
---|
813 | 936 | int kfd_numa_node_to_apic_id(int numa_node_id); |
---|
| 937 | +void kfd_double_confirm_iommu_support(struct kfd_dev *gpu); |
---|
814 | 938 | |
---|
815 | 939 | /* Interrupts */ |
---|
816 | 940 | int kfd_interrupt_init(struct kfd_dev *dev); |
---|
817 | 941 | void kfd_interrupt_exit(struct kfd_dev *dev); |
---|
818 | | -void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); |
---|
819 | 942 | bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); |
---|
820 | 943 | bool interrupt_is_wanted(struct kfd_dev *dev, |
---|
821 | 944 | const uint32_t *ih_ring_entry, |
---|
822 | 945 | uint32_t *patched_ihre, bool *flag); |
---|
823 | | - |
---|
824 | | -/* Power Management */ |
---|
825 | | -void kgd2kfd_suspend(struct kfd_dev *kfd); |
---|
826 | | -int kgd2kfd_resume(struct kfd_dev *kfd); |
---|
827 | | - |
---|
828 | | -/* GPU reset */ |
---|
829 | | -int kgd2kfd_pre_reset(struct kfd_dev *kfd); |
---|
830 | | -int kgd2kfd_post_reset(struct kfd_dev *kfd); |
---|
831 | 946 | |
---|
832 | 947 | /* amdkfd Apertures */ |
---|
833 | 948 | int kfd_init_apertures(struct kfd_process *process); |
---|
.. | .. |
---|
838 | 953 | void print_queue_properties(struct queue_properties *q); |
---|
839 | 954 | void print_queue(struct queue *q); |
---|
840 | 955 | |
---|
841 | | -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, |
---|
842 | | - struct kfd_dev *dev); |
---|
843 | 956 | struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, |
---|
844 | 957 | struct kfd_dev *dev); |
---|
845 | 958 | struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, |
---|
.. | .. |
---|
850 | 963 | struct kfd_dev *dev); |
---|
851 | 964 | struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, |
---|
852 | 965 | struct kfd_dev *dev); |
---|
| 966 | +struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, |
---|
| 967 | + struct kfd_dev *dev); |
---|
853 | 968 | struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); |
---|
854 | 969 | void device_queue_manager_uninit(struct device_queue_manager *dqm); |
---|
855 | 970 | struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, |
---|
856 | 971 | enum kfd_queue_type type); |
---|
857 | | -void kernel_queue_uninit(struct kernel_queue *kq); |
---|
858 | | -int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid); |
---|
| 972 | +void kernel_queue_uninit(struct kernel_queue *kq, bool hanging); |
---|
| 973 | +int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid); |
---|
859 | 974 | |
---|
860 | 975 | /* Process Queue Manager */ |
---|
861 | 976 | struct process_queue_node { |
---|
.. | .. |
---|
872 | 987 | struct kfd_dev *dev, |
---|
873 | 988 | struct file *f, |
---|
874 | 989 | struct queue_properties *properties, |
---|
875 | | - unsigned int *qid); |
---|
| 990 | + unsigned int *qid, |
---|
| 991 | + uint32_t *p_doorbell_offset_in_process); |
---|
876 | 992 | int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); |
---|
877 | 993 | int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, |
---|
878 | 994 | struct queue_properties *p); |
---|
879 | 995 | int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, |
---|
880 | 996 | struct queue_properties *p); |
---|
| 997 | +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, |
---|
| 998 | + void *gws); |
---|
881 | 999 | struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, |
---|
882 | 1000 | unsigned int qid); |
---|
| 1001 | +struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, |
---|
| 1002 | + unsigned int qid); |
---|
| 1003 | +int pqm_get_wave_state(struct process_queue_manager *pqm, |
---|
| 1004 | + unsigned int qid, |
---|
| 1005 | + void __user *ctl_stack, |
---|
| 1006 | + u32 *ctl_stack_used_size, |
---|
| 1007 | + u32 *save_area_used_size); |
---|
883 | 1008 | |
---|
884 | | -int amdkfd_fence_wait_timeout(unsigned int *fence_addr, |
---|
885 | | - unsigned int fence_value, |
---|
886 | | - unsigned int timeout_ms); |
---|
| 1009 | +int amdkfd_fence_wait_timeout(uint64_t *fence_addr, |
---|
| 1010 | + uint64_t fence_value, |
---|
| 1011 | + unsigned int timeout_ms); |
---|
887 | 1012 | |
---|
888 | 1013 | /* Packet Manager */ |
---|
889 | 1014 | |
---|
.. | .. |
---|
897 | 1022 | bool allocated; |
---|
898 | 1023 | struct kfd_mem_obj *ib_buffer_obj; |
---|
899 | 1024 | unsigned int ib_size_bytes; |
---|
| 1025 | + bool is_over_subscription; |
---|
900 | 1026 | |
---|
901 | 1027 | const struct packet_manager_funcs *pmf; |
---|
902 | 1028 | }; |
---|
.. | .. |
---|
917 | 1043 | uint32_t filter_param, bool reset, |
---|
918 | 1044 | unsigned int sdma_engine); |
---|
919 | 1045 | int (*query_status)(struct packet_manager *pm, uint32_t *buffer, |
---|
920 | | - uint64_t fence_address, uint32_t fence_value); |
---|
| 1046 | + uint64_t fence_address, uint64_t fence_value); |
---|
921 | 1047 | int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); |
---|
922 | 1048 | |
---|
923 | 1049 | /* Packet sizes */ |
---|
.. | .. |
---|
934 | 1060 | extern const struct packet_manager_funcs kfd_v9_pm_funcs; |
---|
935 | 1061 | |
---|
936 | 1062 | int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); |
---|
937 | | -void pm_uninit(struct packet_manager *pm); |
---|
| 1063 | +void pm_uninit(struct packet_manager *pm, bool hanging); |
---|
938 | 1064 | int pm_send_set_resources(struct packet_manager *pm, |
---|
939 | 1065 | struct scheduling_resources *res); |
---|
940 | 1066 | int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); |
---|
941 | 1067 | int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, |
---|
942 | | - uint32_t fence_value); |
---|
| 1068 | + uint64_t fence_value); |
---|
943 | 1069 | |
---|
944 | 1070 | int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, |
---|
945 | 1071 | enum kfd_unmap_queues_filter mode, |
---|
.. | .. |
---|
950 | 1076 | |
---|
951 | 1077 | /* Following PM funcs can be shared among VI and AI */ |
---|
952 | 1078 | unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); |
---|
953 | | -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, |
---|
954 | | - struct scheduling_resources *res); |
---|
955 | 1079 | |
---|
956 | 1080 | uint64_t kfd_get_number_elems(struct kfd_dev *kfd); |
---|
957 | 1081 | |
---|
.. | .. |
---|
968 | 1092 | uint32_t num_events, void __user *data, |
---|
969 | 1093 | bool all, uint32_t user_timeout_ms, |
---|
970 | 1094 | uint32_t *wait_result); |
---|
971 | | -void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, |
---|
| 1095 | +void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, |
---|
972 | 1096 | uint32_t valid_id_bits); |
---|
973 | 1097 | void kfd_signal_iommu_event(struct kfd_dev *dev, |
---|
974 | | - unsigned int pasid, unsigned long address, |
---|
975 | | - bool is_write_requested, bool is_execute_requested); |
---|
976 | | -void kfd_signal_hw_exception_event(unsigned int pasid); |
---|
| 1098 | + u32 pasid, unsigned long address, |
---|
| 1099 | + bool is_write_requested, bool is_execute_requested); |
---|
| 1100 | +void kfd_signal_hw_exception_event(u32 pasid); |
---|
977 | 1101 | int kfd_set_event(struct kfd_process *p, uint32_t event_id); |
---|
978 | 1102 | int kfd_reset_event(struct kfd_process *p, uint32_t event_id); |
---|
979 | 1103 | int kfd_event_page_set(struct kfd_process *p, void *kernel_address, |
---|
.. | .. |
---|
984 | 1108 | uint64_t *event_page_offset, uint32_t *event_slot_index); |
---|
985 | 1109 | int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); |
---|
986 | 1110 | |
---|
987 | | -void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, |
---|
| 1111 | +void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, |
---|
988 | 1112 | struct kfd_vm_fault_info *info); |
---|
989 | 1113 | |
---|
990 | 1114 | void kfd_signal_reset_event(struct kfd_dev *dev); |
---|
.. | .. |
---|
995 | 1119 | |
---|
996 | 1120 | bool kfd_is_locked(void); |
---|
997 | 1121 | |
---|
| 1122 | +/* Compute profile */ |
---|
| 1123 | +void kfd_inc_compute_active(struct kfd_dev *dev); |
---|
| 1124 | +void kfd_dec_compute_active(struct kfd_dev *dev); |
---|
| 1125 | + |
---|
| 1126 | +/* Cgroup Support */ |
---|
| 1127 | +/* Check with device cgroup if @kfd device is accessible */ |
---|
| 1128 | +static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd) |
---|
| 1129 | +{ |
---|
| 1130 | +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) |
---|
| 1131 | + struct drm_device *ddev = kfd->ddev; |
---|
| 1132 | + |
---|
| 1133 | + return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, |
---|
| 1134 | + ddev->render->index, |
---|
| 1135 | + DEVCG_ACC_WRITE | DEVCG_ACC_READ); |
---|
| 1136 | +#else |
---|
| 1137 | + return 0; |
---|
| 1138 | +#endif |
---|
| 1139 | +} |
---|
| 1140 | + |
---|
998 | 1141 | /* Debugfs */ |
---|
999 | 1142 | #if defined(CONFIG_DEBUG_FS) |
---|
1000 | 1143 | |
---|