| .. | .. |
|---|
| 29 | 29 | #include <drm/drm_print.h> |
|---|
| 30 | 30 | |
|---|
| 31 | 31 | /* max number of rings */ |
|---|
| 32 | | -#define AMDGPU_MAX_RINGS 21 |
|---|
| 33 | | -#define AMDGPU_MAX_GFX_RINGS 1 |
|---|
| 32 | +#define AMDGPU_MAX_RINGS 28 |
|---|
| 33 | +#define AMDGPU_MAX_HWIP_RINGS 8 |
|---|
| 34 | +#define AMDGPU_MAX_GFX_RINGS 2 |
|---|
| 34 | 35 | #define AMDGPU_MAX_COMPUTE_RINGS 8 |
|---|
| 35 | 36 | #define AMDGPU_MAX_VCE_RINGS 3 |
|---|
| 36 | 37 | #define AMDGPU_MAX_UVD_ENC_RINGS 2 |
|---|
| 38 | + |
|---|
| 39 | +#define AMDGPU_RING_PRIO_DEFAULT 1 |
|---|
| 40 | +#define AMDGPU_RING_PRIO_MAX AMDGPU_GFX_PIPE_PRIO_MAX |
|---|
| 37 | 41 | |
|---|
| 38 | 42 | /* some special values for the owner field */ |
|---|
| 39 | 43 | #define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) |
|---|
| .. | .. |
|---|
| 46 | 50 | |
|---|
| 47 | 51 | #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) |
|---|
| 48 | 52 | |
|---|
| 53 | +#define AMDGPU_IB_POOL_SIZE (1024 * 1024) |
|---|
| 54 | + |
|---|
| 49 | 55 | enum amdgpu_ring_type { |
|---|
| 50 | | - AMDGPU_RING_TYPE_GFX, |
|---|
| 51 | | - AMDGPU_RING_TYPE_COMPUTE, |
|---|
| 52 | | - AMDGPU_RING_TYPE_SDMA, |
|---|
| 53 | | - AMDGPU_RING_TYPE_UVD, |
|---|
| 54 | | - AMDGPU_RING_TYPE_VCE, |
|---|
| 56 | + AMDGPU_RING_TYPE_GFX = AMDGPU_HW_IP_GFX, |
|---|
| 57 | + AMDGPU_RING_TYPE_COMPUTE = AMDGPU_HW_IP_COMPUTE, |
|---|
| 58 | + AMDGPU_RING_TYPE_SDMA = AMDGPU_HW_IP_DMA, |
|---|
| 59 | + AMDGPU_RING_TYPE_UVD = AMDGPU_HW_IP_UVD, |
|---|
| 60 | + AMDGPU_RING_TYPE_VCE = AMDGPU_HW_IP_VCE, |
|---|
| 61 | + AMDGPU_RING_TYPE_UVD_ENC = AMDGPU_HW_IP_UVD_ENC, |
|---|
| 62 | + AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC, |
|---|
| 63 | + AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC, |
|---|
| 64 | + AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG, |
|---|
| 55 | 65 | AMDGPU_RING_TYPE_KIQ, |
|---|
| 56 | | - AMDGPU_RING_TYPE_UVD_ENC, |
|---|
| 57 | | - AMDGPU_RING_TYPE_VCN_DEC, |
|---|
| 58 | | - AMDGPU_RING_TYPE_VCN_ENC, |
|---|
| 59 | | - AMDGPU_RING_TYPE_VCN_JPEG |
|---|
| 66 | + AMDGPU_RING_TYPE_MES |
|---|
| 67 | +}; |
|---|
| 68 | + |
|---|
| 69 | +enum amdgpu_ib_pool_type { |
|---|
| 70 | + /* Normal submissions to the top of the pipeline. */ |
|---|
| 71 | + AMDGPU_IB_POOL_DELAYED, |
|---|
| 72 | + /* Immediate submissions to the bottom of the pipeline. */ |
|---|
| 73 | + AMDGPU_IB_POOL_IMMEDIATE, |
|---|
| 74 | + /* Direct submission to the ring buffer during init and reset. */ |
|---|
| 75 | + AMDGPU_IB_POOL_DIRECT, |
|---|
| 76 | + |
|---|
| 77 | + AMDGPU_IB_POOL_MAX |
|---|
| 60 | 78 | }; |
|---|
| 61 | 79 | |
|---|
| 62 | 80 | struct amdgpu_device; |
|---|
| .. | .. |
|---|
| 64 | 82 | struct amdgpu_ib; |
|---|
| 65 | 83 | struct amdgpu_cs_parser; |
|---|
| 66 | 84 | struct amdgpu_job; |
|---|
| 85 | + |
|---|
| 86 | +struct amdgpu_sched { |
|---|
| 87 | + u32 num_scheds; |
|---|
| 88 | + struct drm_gpu_scheduler *sched[AMDGPU_MAX_HWIP_RINGS]; |
|---|
| 89 | +}; |
|---|
| 67 | 90 | |
|---|
| 68 | 91 | /* |
|---|
| 69 | 92 | * Fences. |
|---|
| .. | .. |
|---|
| 96 | 119 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev); |
|---|
| 97 | 120 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, |
|---|
| 98 | 121 | unsigned flags); |
|---|
| 99 | | -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); |
|---|
| 100 | | -void amdgpu_fence_process(struct amdgpu_ring *ring); |
|---|
| 122 | +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, |
|---|
| 123 | + uint32_t timeout); |
|---|
| 124 | +bool amdgpu_fence_process(struct amdgpu_ring *ring); |
|---|
| 101 | 125 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); |
|---|
| 102 | 126 | signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, |
|---|
| 103 | 127 | uint32_t wait_seq, |
|---|
| .. | .. |
|---|
| 114 | 138 | uint32_t align_mask; |
|---|
| 115 | 139 | u32 nop; |
|---|
| 116 | 140 | bool support_64bit_ptrs; |
|---|
| 141 | + bool no_user_fence; |
|---|
| 117 | 142 | unsigned vmhub; |
|---|
| 118 | 143 | unsigned extra_dw; |
|---|
| 119 | 144 | |
|---|
| .. | .. |
|---|
| 129 | 154 | unsigned emit_ib_size; |
|---|
| 130 | 155 | /* command emit functions */ |
|---|
| 131 | 156 | void (*emit_ib)(struct amdgpu_ring *ring, |
|---|
| 157 | + struct amdgpu_job *job, |
|---|
| 132 | 158 | struct amdgpu_ib *ib, |
|---|
| 133 | | - unsigned vmid, bool ctx_switch); |
|---|
| 159 | + uint32_t flags); |
|---|
| 134 | 160 | void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, |
|---|
| 135 | 161 | uint64_t seq, unsigned flags); |
|---|
| 136 | 162 | void (*emit_pipeline_sync)(struct amdgpu_ring *ring); |
|---|
| .. | .. |
|---|
| 157 | 183 | void (*end_use)(struct amdgpu_ring *ring); |
|---|
| 158 | 184 | void (*emit_switch_buffer) (struct amdgpu_ring *ring); |
|---|
| 159 | 185 | void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); |
|---|
| 160 | | - void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); |
|---|
| 186 | + void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, |
|---|
| 187 | + uint32_t reg_val_offs); |
|---|
| 161 | 188 | void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); |
|---|
| 162 | 189 | void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, |
|---|
| 163 | 190 | uint32_t val, uint32_t mask); |
|---|
| 164 | 191 | void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, |
|---|
| 165 | 192 | uint32_t reg0, uint32_t reg1, |
|---|
| 166 | 193 | uint32_t ref, uint32_t mask); |
|---|
| 167 | | - void (*emit_tmz)(struct amdgpu_ring *ring, bool start); |
|---|
| 168 | | - /* priority functions */ |
|---|
| 169 | | - void (*set_priority) (struct amdgpu_ring *ring, |
|---|
| 170 | | - enum drm_sched_priority priority); |
|---|
| 194 | + void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start, |
|---|
| 195 | + bool secure); |
|---|
| 196 | + /* Try to soft recover the ring to make the fence signal */ |
|---|
| 197 | + void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); |
|---|
| 198 | + int (*preempt_ib)(struct amdgpu_ring *ring); |
|---|
| 199 | + void (*emit_mem_sync)(struct amdgpu_ring *ring); |
|---|
| 171 | 200 | }; |
|---|
| 172 | 201 | |
|---|
| 173 | 202 | struct amdgpu_ring { |
|---|
| .. | .. |
|---|
| 175 | 204 | const struct amdgpu_ring_funcs *funcs; |
|---|
| 176 | 205 | struct amdgpu_fence_driver fence_drv; |
|---|
| 177 | 206 | struct drm_gpu_scheduler sched; |
|---|
| 178 | | - struct list_head lru_list; |
|---|
| 179 | 207 | |
|---|
| 180 | 208 | struct amdgpu_bo *ring_obj; |
|---|
| 181 | 209 | volatile uint32_t *ring; |
|---|
| .. | .. |
|---|
| 188 | 216 | uint64_t gpu_addr; |
|---|
| 189 | 217 | uint64_t ptr_mask; |
|---|
| 190 | 218 | uint32_t buf_mask; |
|---|
| 191 | | - bool ready; |
|---|
| 192 | 219 | u32 idx; |
|---|
| 193 | 220 | u32 me; |
|---|
| 194 | 221 | u32 pipe; |
|---|
| .. | .. |
|---|
| 204 | 231 | unsigned fence_offs; |
|---|
| 205 | 232 | uint64_t current_ctx; |
|---|
| 206 | 233 | char name[16]; |
|---|
| 234 | + u32 trail_seq; |
|---|
| 235 | + unsigned trail_fence_offs; |
|---|
| 236 | + u64 trail_fence_gpu_addr; |
|---|
| 237 | + volatile u32 *trail_fence_cpu_addr; |
|---|
| 207 | 238 | unsigned cond_exe_offs; |
|---|
| 208 | 239 | u64 cond_exe_gpu_addr; |
|---|
| 209 | 240 | volatile u32 *cond_exe_cpu_addr; |
|---|
| 210 | 241 | unsigned vm_inv_eng; |
|---|
| 211 | 242 | struct dma_fence *vmid_wait; |
|---|
| 212 | 243 | bool has_compute_vm_bug; |
|---|
| 244 | + bool no_scheduler; |
|---|
| 213 | 245 | |
|---|
| 214 | | - atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX]; |
|---|
| 246 | + atomic_t num_jobs[DRM_SCHED_PRIORITY_COUNT]; |
|---|
| 215 | 247 | struct mutex priority_mutex; |
|---|
| 216 | 248 | /* protected by priority_mutex */ |
|---|
| 217 | 249 | int priority; |
|---|
| .. | .. |
|---|
| 221 | 253 | #endif |
|---|
| 222 | 254 | }; |
|---|
| 223 | 255 | |
|---|
| 256 | +#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) |
|---|
| 257 | +#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) |
|---|
| 258 | +#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) |
|---|
| 259 | +#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) |
|---|
| 260 | +#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) |
|---|
| 261 | +#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) |
|---|
| 262 | +#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) |
|---|
| 263 | +#define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags))) |
|---|
| 264 | +#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) |
|---|
| 265 | +#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) |
|---|
| 266 | +#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) |
|---|
| 267 | +#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) |
|---|
| 268 | +#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) |
|---|
| 269 | +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) |
|---|
| 270 | +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) |
|---|
| 271 | +#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o)) |
|---|
| 272 | +#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) |
|---|
| 273 | +#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) |
|---|
| 274 | +#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) |
|---|
| 275 | +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) |
|---|
| 276 | +#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) |
|---|
| 277 | +#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) |
|---|
| 278 | +#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) |
|---|
| 279 | +#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) |
|---|
| 280 | + |
|---|
| 224 | 281 | int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); |
|---|
| 225 | 282 | void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); |
|---|
| 226 | 283 | void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); |
|---|
| 227 | 284 | void amdgpu_ring_commit(struct amdgpu_ring *ring); |
|---|
| 228 | 285 | void amdgpu_ring_undo(struct amdgpu_ring *ring); |
|---|
| 229 | | -void amdgpu_ring_priority_get(struct amdgpu_ring *ring, |
|---|
| 230 | | - enum drm_sched_priority priority); |
|---|
| 231 | | -void amdgpu_ring_priority_put(struct amdgpu_ring *ring, |
|---|
| 232 | | - enum drm_sched_priority priority); |
|---|
| 233 | 286 | int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, |
|---|
| 234 | | - unsigned ring_size, struct amdgpu_irq_src *irq_src, |
|---|
| 235 | | - unsigned irq_type); |
|---|
| 287 | + unsigned int ring_size, struct amdgpu_irq_src *irq_src, |
|---|
| 288 | + unsigned int irq_type, unsigned int prio); |
|---|
| 236 | 289 | void amdgpu_ring_fini(struct amdgpu_ring *ring); |
|---|
| 237 | | -int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, |
|---|
| 238 | | - int *blacklist, int num_blacklist, |
|---|
| 239 | | - bool lru_pipe_order, struct amdgpu_ring **ring); |
|---|
| 240 | | -void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); |
|---|
| 241 | 290 | void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, |
|---|
| 242 | 291 | uint32_t reg0, uint32_t val0, |
|---|
| 243 | 292 | uint32_t reg1, uint32_t val1); |
|---|
| 293 | +bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, |
|---|
| 294 | + struct dma_fence *fence); |
|---|
| 295 | + |
|---|
| 296 | +static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring, |
|---|
| 297 | + bool cond_exec) |
|---|
| 298 | +{ |
|---|
| 299 | + *ring->cond_exe_cpu_addr = cond_exec; |
|---|
| 300 | +} |
|---|
| 244 | 301 | |
|---|
| 245 | 302 | static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) |
|---|
| 246 | 303 | { |
|---|
| .. | .. |
|---|
| 290 | 347 | ring->count_dw -= count_dw; |
|---|
| 291 | 348 | } |
|---|
| 292 | 349 | |
|---|
| 350 | +int amdgpu_ring_test_helper(struct amdgpu_ring *ring); |
|---|
| 351 | + |
|---|
| 352 | +int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, |
|---|
| 353 | + struct amdgpu_ring *ring); |
|---|
| 354 | +void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring); |
|---|
| 355 | + |
|---|
| 293 | 356 | #endif |
|---|