hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
....@@ -28,8 +28,6 @@
2828 #include <linux/types.h>
2929 #include <linux/tracepoint.h>
3030
31
-#include <drm/drmP.h>
32
-
3331 #undef TRACE_SYSTEM
3432 #define TRACE_SYSTEM amdgpu
3533 #define TRACE_INCLUDE_FILE amdgpu_trace
....@@ -37,7 +35,7 @@
3735 #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
3836 job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
3937
40
-TRACE_EVENT(amdgpu_mm_rreg,
38
+TRACE_EVENT(amdgpu_device_rreg,
4139 TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
4240 TP_ARGS(did, reg, value),
4341 TP_STRUCT__entry(
....@@ -56,7 +54,7 @@
5654 (unsigned long)__entry->value)
5755 );
5856
59
-TRACE_EVENT(amdgpu_mm_wreg,
57
+TRACE_EVENT(amdgpu_device_wreg,
6058 TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
6159 TP_ARGS(did, reg, value),
6260 TP_STRUCT__entry(
....@@ -76,9 +74,10 @@
7674 );
7775
7876 TRACE_EVENT(amdgpu_iv,
79
- TP_PROTO(struct amdgpu_iv_entry *iv),
80
- TP_ARGS(iv),
77
+ TP_PROTO(unsigned ih, struct amdgpu_iv_entry *iv),
78
+ TP_ARGS(ih, iv),
8179 TP_STRUCT__entry(
80
+ __field(unsigned, ih)
8281 __field(unsigned, client_id)
8382 __field(unsigned, src_id)
8483 __field(unsigned, ring_id)
....@@ -90,6 +89,7 @@
9089 __array(unsigned, src_data, 4)
9190 ),
9291 TP_fast_assign(
92
+ __entry->ih = ih;
9393 __entry->client_id = iv->client_id;
9494 __entry->src_id = iv->src_id;
9595 __entry->ring_id = iv->ring_id;
....@@ -103,8 +103,9 @@
103103 __entry->src_data[2] = iv->src_data[2];
104104 __entry->src_data[3] = iv->src_data[3];
105105 ),
106
- TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
107
- __entry->client_id, __entry->src_id,
106
+ TP_printk("ih:%u client_id:%u src_id:%u ring:%u vmid:%u "
107
+ "timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
108
+ __entry->ih, __entry->client_id, __entry->src_id,
108109 __entry->ring_id, __entry->vmid,
109110 __entry->timestamp, __entry->pasid,
110111 __entry->src_data[0], __entry->src_data[1],
....@@ -150,10 +151,10 @@
150151
151152 TP_fast_assign(
152153 __entry->bo_list = p->bo_list;
153
- __entry->ring = p->ring->idx;
154
+ __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
154155 __entry->dw = p->job->ibs[i].length_dw;
155156 __entry->fences = amdgpu_fence_count_emitted(
156
- p->ring);
157
+ to_amdgpu_ring(p->entity->rq->sched));
157158 ),
158159 TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
159160 __entry->bo_list, __entry->ring, __entry->dw,
....@@ -169,7 +170,7 @@
169170 __field(unsigned int, context)
170171 __field(unsigned int, seqno)
171172 __field(struct dma_fence *, fence)
172
- __field(char *, ring_name)
173
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
173174 __field(u32, num_ibs)
174175 ),
175176
....@@ -178,12 +179,12 @@
178179 __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
179180 __entry->context = job->base.s_fence->finished.context;
180181 __entry->seqno = job->base.s_fence->finished.seqno;
181
- __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
182
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
182183 __entry->num_ibs = job->num_ibs;
183184 ),
184185 TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
185186 __entry->sched_job_id, __get_str(timeline), __entry->context,
186
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
187
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
187188 );
188189
189190 TRACE_EVENT(amdgpu_sched_run_job,
....@@ -194,7 +195,7 @@
194195 __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
195196 __field(unsigned int, context)
196197 __field(unsigned int, seqno)
197
- __field(char *, ring_name)
198
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
198199 __field(u32, num_ibs)
199200 ),
200201
....@@ -203,12 +204,12 @@
203204 __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
204205 __entry->context = job->base.s_fence->finished.context;
205206 __entry->seqno = job->base.s_fence->finished.seqno;
206
- __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
207
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
207208 __entry->num_ibs = job->num_ibs;
208209 ),
209210 TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
210211 __entry->sched_job_id, __get_str(timeline), __entry->context,
211
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
212
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
212213 );
213214
214215
....@@ -218,6 +219,7 @@
218219 TP_ARGS(vm, ring, job),
219220 TP_STRUCT__entry(
220221 __field(u32, pasid)
222
+ __string(ring, ring->name)
221223 __field(u32, ring)
222224 __field(u32, vmid)
223225 __field(u32, vm_hub)
....@@ -227,14 +229,14 @@
227229
228230 TP_fast_assign(
229231 __entry->pasid = vm->pasid;
230
- __entry->ring = ring->idx;
232
+ __assign_str(ring, ring->name)
231233 __entry->vmid = job->vmid;
232234 __entry->vm_hub = ring->funcs->vmhub,
233235 __entry->pd_addr = job->vm_pd_addr;
234236 __entry->needs_flush = job->vm_needs_flush;
235237 ),
236
- TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
237
- __entry->pasid, __entry->ring, __entry->vmid,
238
+ TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
239
+ __entry->pasid, __get_str(ring), __entry->vmid,
238240 __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
239241 );
240242
....@@ -319,16 +321,60 @@
319321 TP_ARGS(mapping)
320322 );
321323
324
+TRACE_EVENT(amdgpu_vm_update_ptes,
325
+ TP_PROTO(struct amdgpu_vm_update_params *p,
326
+ uint64_t start, uint64_t end,
327
+ unsigned int nptes, uint64_t dst,
328
+ uint64_t incr, uint64_t flags,
329
+ pid_t pid, uint64_t vm_ctx),
330
+ TP_ARGS(p, start, end, nptes, dst, incr, flags, pid, vm_ctx),
331
+ TP_STRUCT__entry(
332
+ __field(u64, start)
333
+ __field(u64, end)
334
+ __field(u64, flags)
335
+ __field(unsigned int, nptes)
336
+ __field(u64, incr)
337
+ __field(pid_t, pid)
338
+ __field(u64, vm_ctx)
339
+ __dynamic_array(u64, dst, nptes)
340
+ ),
341
+
342
+ TP_fast_assign(
343
+ unsigned int i;
344
+
345
+ __entry->start = start;
346
+ __entry->end = end;
347
+ __entry->flags = flags;
348
+ __entry->incr = incr;
349
+ __entry->nptes = nptes;
350
+ __entry->pid = pid;
351
+ __entry->vm_ctx = vm_ctx;
352
+ for (i = 0; i < nptes; ++i) {
353
+ u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
354
+ p->pages_addr, dst) : dst;
355
+
356
+ ((u64 *)__get_dynamic_array(dst))[i] = addr;
357
+ dst += incr;
358
+ }
359
+ ),
360
+ TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx,"
361
+ " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid,
362
+ __entry->vm_ctx, __entry->start, __entry->end,
363
+ __entry->flags, __entry->incr, __print_array(
364
+ __get_dynamic_array(dst), __entry->nptes, 8))
365
+);
366
+
322367 TRACE_EVENT(amdgpu_vm_set_ptes,
323368 TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
324
- uint32_t incr, uint64_t flags),
325
- TP_ARGS(pe, addr, count, incr, flags),
369
+ uint32_t incr, uint64_t flags, bool direct),
370
+ TP_ARGS(pe, addr, count, incr, flags, direct),
326371 TP_STRUCT__entry(
327372 __field(u64, pe)
328373 __field(u64, addr)
329374 __field(u32, count)
330375 __field(u32, incr)
331376 __field(u64, flags)
377
+ __field(bool, direct)
332378 ),
333379
334380 TP_fast_assign(
....@@ -337,28 +383,32 @@
337383 __entry->count = count;
338384 __entry->incr = incr;
339385 __entry->flags = flags;
386
+ __entry->direct = direct;
340387 ),
341
- TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u",
342
- __entry->pe, __entry->addr, __entry->incr,
343
- __entry->flags, __entry->count)
388
+ TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
389
+ "direct=%d", __entry->pe, __entry->addr, __entry->incr,
390
+ __entry->flags, __entry->count, __entry->direct)
344391 );
345392
346393 TRACE_EVENT(amdgpu_vm_copy_ptes,
347
- TP_PROTO(uint64_t pe, uint64_t src, unsigned count),
348
- TP_ARGS(pe, src, count),
394
+ TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct),
395
+ TP_ARGS(pe, src, count, direct),
349396 TP_STRUCT__entry(
350397 __field(u64, pe)
351398 __field(u64, src)
352399 __field(u32, count)
400
+ __field(bool, direct)
353401 ),
354402
355403 TP_fast_assign(
356404 __entry->pe = pe;
357405 __entry->src = src;
358406 __entry->count = count;
407
+ __entry->direct = direct;
359408 ),
360
- TP_printk("pe=%010Lx, src=%010Lx, count=%u",
361
- __entry->pe, __entry->src, __entry->count)
409
+ TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d",
410
+ __entry->pe, __entry->src, __entry->count,
411
+ __entry->direct)
362412 );
363413
364414 TRACE_EVENT(amdgpu_vm_flush,
....@@ -366,20 +416,20 @@
366416 uint64_t pd_addr),
367417 TP_ARGS(ring, vmid, pd_addr),
368418 TP_STRUCT__entry(
369
- __field(u32, ring)
419
+ __string(ring, ring->name)
370420 __field(u32, vmid)
371421 __field(u32, vm_hub)
372422 __field(u64, pd_addr)
373423 ),
374424
375425 TP_fast_assign(
376
- __entry->ring = ring->idx;
426
+ __assign_str(ring, ring->name)
377427 __entry->vmid = vmid;
378428 __entry->vm_hub = ring->funcs->vmhub;
379429 __entry->pd_addr = pd_addr;
380430 ),
381
- TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
382
- __entry->ring, __entry->vmid,
431
+ TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
432
+ __get_str(ring), __entry->vmid,
383433 __entry->vm_hub,__entry->pd_addr)
384434 );
385435
....@@ -462,6 +512,30 @@
462512 __entry->new_placement, __entry->bo_size)
463513 );
464514
515
+TRACE_EVENT(amdgpu_ib_pipe_sync,
516
+ TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
517
+ TP_ARGS(sched_job, fence),
518
+ TP_STRUCT__entry(
519
+ __string(ring, sched_job->base.sched->name)
520
+ __field(uint64_t, id)
521
+ __field(struct dma_fence *, fence)
522
+ __field(uint64_t, ctx)
523
+ __field(unsigned, seqno)
524
+ ),
525
+
526
+ TP_fast_assign(
527
+ __assign_str(ring, sched_job->base.sched->name)
528
+ __entry->id = sched_job->base.id;
529
+ __entry->fence = fence;
530
+ __entry->ctx = fence->context;
531
+ __entry->seqno = fence->seqno;
532
+ ),
533
+ TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
534
+ __get_str(ring), __entry->id,
535
+ __entry->fence, __entry->ctx,
536
+ __entry->seqno)
537
+);
538
+
465539 #undef AMDGPU_JOB_GET_TIMELINE_NAME
466540 #endif
467541