forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/drivers/gpu/drm/v3d/v3d_sched.c
....@@ -30,176 +30,362 @@
3030 return container_of(sched_job, struct v3d_job, base);
3131 }
3232
33
+static struct v3d_bin_job *
34
+to_bin_job(struct drm_sched_job *sched_job)
35
+{
36
+ return container_of(sched_job, struct v3d_bin_job, base.base);
37
+}
38
+
39
+static struct v3d_render_job *
40
+to_render_job(struct drm_sched_job *sched_job)
41
+{
42
+ return container_of(sched_job, struct v3d_render_job, base.base);
43
+}
44
+
45
+static struct v3d_tfu_job *
46
+to_tfu_job(struct drm_sched_job *sched_job)
47
+{
48
+ return container_of(sched_job, struct v3d_tfu_job, base.base);
49
+}
50
+
51
+static struct v3d_csd_job *
52
+to_csd_job(struct drm_sched_job *sched_job)
53
+{
54
+ return container_of(sched_job, struct v3d_csd_job, base.base);
55
+}
56
+
3357 static void
3458 v3d_job_free(struct drm_sched_job *sched_job)
3559 {
3660 struct v3d_job *job = to_v3d_job(sched_job);
3761
38
- v3d_exec_put(job->exec);
62
+ drm_sched_job_cleanup(sched_job);
63
+ v3d_job_put(job);
3964 }
4065
4166 /**
42
- * Returns the fences that the bin job depends on, one by one.
43
- * v3d_job_run() won't be called until all of them have been signaled.
67
+ * Returns the fences that the job depends on, one by one.
68
+ *
69
+ * If placed in the scheduler's .dependency method, the corresponding
70
+ * .run_job won't be called until all of them have been signaled.
4471 */
4572 static struct dma_fence *
4673 v3d_job_dependency(struct drm_sched_job *sched_job,
4774 struct drm_sched_entity *s_entity)
4875 {
4976 struct v3d_job *job = to_v3d_job(sched_job);
50
- struct v3d_exec_info *exec = job->exec;
51
- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
52
- struct dma_fence *fence;
53
-
54
- fence = job->in_fence;
55
- if (fence) {
56
- job->in_fence = NULL;
57
- return fence;
58
- }
59
-
60
- if (q == V3D_RENDER) {
61
- /* If we had a bin job, the render job definitely depends on
62
- * it. We first have to wait for bin to be scheduled, so that
63
- * its done_fence is created.
64
- */
65
- fence = exec->bin_done_fence;
66
- if (fence) {
67
- exec->bin_done_fence = NULL;
68
- return fence;
69
- }
70
- }
7177
7278 /* XXX: Wait on a fence for switching the GMP if necessary,
7379 * and then do so.
7480 */
7581
76
- return fence;
82
+ if (!xa_empty(&job->deps))
83
+ return xa_erase(&job->deps, job->last_dep++);
84
+
85
+ return NULL;
7786 }
7887
79
-static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
88
+static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
8089 {
81
- struct v3d_job *job = to_v3d_job(sched_job);
82
- struct v3d_exec_info *exec = job->exec;
83
- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
84
- struct v3d_dev *v3d = exec->v3d;
90
+ struct v3d_bin_job *job = to_bin_job(sched_job);
91
+ struct v3d_dev *v3d = job->base.v3d;
8592 struct drm_device *dev = &v3d->drm;
8693 struct dma_fence *fence;
8794 unsigned long irqflags;
8895
89
- if (unlikely(job->base.s_fence->finished.error))
96
+ if (unlikely(job->base.base.s_fence->finished.error))
9097 return NULL;
9198
9299 /* Lock required around bin_job update vs
93100 * v3d_overflow_mem_work().
94101 */
95102 spin_lock_irqsave(&v3d->job_lock, irqflags);
96
- if (q == V3D_BIN) {
97
- v3d->bin_job = job->exec;
98
-
99
- /* Clear out the overflow allocation, so we don't
100
- * reuse the overflow attached to a previous job.
101
- */
102
- V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
103
- } else {
104
- v3d->render_job = job->exec;
105
- }
103
+ v3d->bin_job = job;
104
+ /* Clear out the overflow allocation, so we don't
105
+ * reuse the overflow attached to a previous job.
106
+ */
107
+ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
106108 spin_unlock_irqrestore(&v3d->job_lock, irqflags);
107109
108
- /* Can we avoid this flush when q==RENDER? We need to be
109
- * careful of scheduling, though -- imagine job0 rendering to
110
- * texture and job1 reading, and them being executed as bin0,
111
- * bin1, render0, render1, so that render1's flush at bin time
112
- * wasn't enough.
113
- */
114110 v3d_invalidate_caches(v3d);
115111
116
- fence = v3d_fence_create(v3d, q);
112
+ fence = v3d_fence_create(v3d, V3D_BIN);
117113 if (IS_ERR(fence))
118114 return NULL;
119115
120
- if (job->done_fence)
121
- dma_fence_put(job->done_fence);
122
- job->done_fence = dma_fence_get(fence);
116
+ if (job->base.irq_fence)
117
+ dma_fence_put(job->base.irq_fence);
118
+ job->base.irq_fence = dma_fence_get(fence);
123119
124
- trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
120
+ trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
125121 job->start, job->end);
126
-
127
- if (q == V3D_BIN) {
128
- if (exec->qma) {
129
- V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
130
- V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
131
- }
132
- if (exec->qts) {
133
- V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
134
- V3D_CLE_CT0QTS_ENABLE |
135
- exec->qts);
136
- }
137
- } else {
138
- /* XXX: Set the QCFG */
139
- }
140122
141123 /* Set the current and end address of the control list.
142124 * Writing the end register is what starts the job.
143125 */
144
- V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
145
- V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
126
+ if (job->qma) {
127
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
128
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
129
+ }
130
+ if (job->qts) {
131
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
132
+ V3D_CLE_CT0QTS_ENABLE |
133
+ job->qts);
134
+ }
135
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
136
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
146137
147138 return fence;
148139 }
149140
150
-static void
151
-v3d_job_timedout(struct drm_sched_job *sched_job)
141
+static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
142
+{
143
+ struct v3d_render_job *job = to_render_job(sched_job);
144
+ struct v3d_dev *v3d = job->base.v3d;
145
+ struct drm_device *dev = &v3d->drm;
146
+ struct dma_fence *fence;
147
+
148
+ if (unlikely(job->base.base.s_fence->finished.error))
149
+ return NULL;
150
+
151
+ v3d->render_job = job;
152
+
153
+ /* Can we avoid this flush? We need to be careful of
154
+ * scheduling, though -- imagine job0 rendering to texture and
155
+ * job1 reading, and them being executed as bin0, bin1,
156
+ * render0, render1, so that render1's flush at bin time
157
+ * wasn't enough.
158
+ */
159
+ v3d_invalidate_caches(v3d);
160
+
161
+ fence = v3d_fence_create(v3d, V3D_RENDER);
162
+ if (IS_ERR(fence))
163
+ return NULL;
164
+
165
+ if (job->base.irq_fence)
166
+ dma_fence_put(job->base.irq_fence);
167
+ job->base.irq_fence = dma_fence_get(fence);
168
+
169
+ trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
170
+ job->start, job->end);
171
+
172
+ /* XXX: Set the QCFG */
173
+
174
+ /* Set the current and end address of the control list.
175
+ * Writing the end register is what starts the job.
176
+ */
177
+ V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
178
+ V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
179
+
180
+ return fence;
181
+}
182
+
183
+static struct dma_fence *
184
+v3d_tfu_job_run(struct drm_sched_job *sched_job)
185
+{
186
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
187
+ struct v3d_dev *v3d = job->base.v3d;
188
+ struct drm_device *dev = &v3d->drm;
189
+ struct dma_fence *fence;
190
+
191
+ fence = v3d_fence_create(v3d, V3D_TFU);
192
+ if (IS_ERR(fence))
193
+ return NULL;
194
+
195
+ v3d->tfu_job = job;
196
+ if (job->base.irq_fence)
197
+ dma_fence_put(job->base.irq_fence);
198
+ job->base.irq_fence = dma_fence_get(fence);
199
+
200
+ trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
201
+
202
+ V3D_WRITE(V3D_TFU_IIA, job->args.iia);
203
+ V3D_WRITE(V3D_TFU_IIS, job->args.iis);
204
+ V3D_WRITE(V3D_TFU_ICA, job->args.ica);
205
+ V3D_WRITE(V3D_TFU_IUA, job->args.iua);
206
+ V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
207
+ V3D_WRITE(V3D_TFU_IOS, job->args.ios);
208
+ V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
209
+ if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
210
+ V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
211
+ V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
212
+ V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
213
+ }
214
+ /* ICFG kicks off the job. */
215
+ V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
216
+
217
+ return fence;
218
+}
219
+
220
+static struct dma_fence *
221
+v3d_csd_job_run(struct drm_sched_job *sched_job)
222
+{
223
+ struct v3d_csd_job *job = to_csd_job(sched_job);
224
+ struct v3d_dev *v3d = job->base.v3d;
225
+ struct drm_device *dev = &v3d->drm;
226
+ struct dma_fence *fence;
227
+ int i;
228
+
229
+ v3d->csd_job = job;
230
+
231
+ v3d_invalidate_caches(v3d);
232
+
233
+ fence = v3d_fence_create(v3d, V3D_CSD);
234
+ if (IS_ERR(fence))
235
+ return NULL;
236
+
237
+ if (job->base.irq_fence)
238
+ dma_fence_put(job->base.irq_fence);
239
+ job->base.irq_fence = dma_fence_get(fence);
240
+
241
+ trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
242
+
243
+ for (i = 1; i <= 6; i++)
244
+ V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
245
+ /* CFG0 write kicks off the job. */
246
+ V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
247
+
248
+ return fence;
249
+}
250
+
251
+static struct dma_fence *
252
+v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
152253 {
153254 struct v3d_job *job = to_v3d_job(sched_job);
154
- struct v3d_exec_info *exec = job->exec;
155
- struct v3d_dev *v3d = exec->v3d;
156
- enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
255
+ struct v3d_dev *v3d = job->v3d;
256
+
257
+ v3d_clean_caches(v3d);
258
+
259
+ return NULL;
260
+}
261
+
262
+static void
263
+v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
264
+{
157265 enum v3d_queue q;
158
- u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
159
- u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
160
-
161
- /* If the current address or return address have changed, then
162
- * the GPU has probably made progress and we should delay the
163
- * reset. This could fail if the GPU got in an infinite loop
164
- * in the CL, but that is pretty unlikely outside of an i-g-t
165
- * testcase.
166
- */
167
- if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
168
- job->timedout_ctca = ctca;
169
- job->timedout_ctra = ctra;
170
-
171
- schedule_delayed_work(&job->base.work_tdr,
172
- job->base.sched->timeout);
173
- return;
174
- }
175266
176267 mutex_lock(&v3d->reset_lock);
177268
178269 /* block scheduler */
179
- for (q = 0; q < V3D_MAX_QUEUES; q++) {
180
- struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
270
+ for (q = 0; q < V3D_MAX_QUEUES; q++)
271
+ drm_sched_stop(&v3d->queue[q].sched, sched_job);
181272
182
- kthread_park(sched->thread);
183
- drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
184
- sched_job : NULL));
185
- }
273
+ if (sched_job)
274
+ drm_sched_increase_karma(sched_job);
186275
187276 /* get the GPU back into the init state */
188277 v3d_reset(v3d);
189278
279
+ for (q = 0; q < V3D_MAX_QUEUES; q++)
280
+ drm_sched_resubmit_jobs(&v3d->queue[q].sched);
281
+
190282 /* Unblock schedulers and restart their jobs. */
191283 for (q = 0; q < V3D_MAX_QUEUES; q++) {
192
- drm_sched_job_recovery(&v3d->queue[q].sched);
193
- kthread_unpark(v3d->queue[q].sched.thread);
284
+ drm_sched_start(&v3d->queue[q].sched, true);
194285 }
195286
196287 mutex_unlock(&v3d->reset_lock);
197288 }
198289
199
-static const struct drm_sched_backend_ops v3d_sched_ops = {
290
+/* If the current address or return address have changed, then the GPU
291
+ * has probably made progress and we should delay the reset. This
292
+ * could fail if the GPU got in an infinite loop in the CL, but that
293
+ * is pretty unlikely outside of an i-g-t testcase.
294
+ */
295
+static void
296
+v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
297
+ u32 *timedout_ctca, u32 *timedout_ctra)
298
+{
299
+ struct v3d_job *job = to_v3d_job(sched_job);
300
+ struct v3d_dev *v3d = job->v3d;
301
+ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
302
+ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
303
+
304
+ if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
305
+ *timedout_ctca = ctca;
306
+ *timedout_ctra = ctra;
307
+ return;
308
+ }
309
+
310
+ v3d_gpu_reset_for_timeout(v3d, sched_job);
311
+}
312
+
313
+static void
314
+v3d_bin_job_timedout(struct drm_sched_job *sched_job)
315
+{
316
+ struct v3d_bin_job *job = to_bin_job(sched_job);
317
+
318
+ v3d_cl_job_timedout(sched_job, V3D_BIN,
319
+ &job->timedout_ctca, &job->timedout_ctra);
320
+}
321
+
322
+static void
323
+v3d_render_job_timedout(struct drm_sched_job *sched_job)
324
+{
325
+ struct v3d_render_job *job = to_render_job(sched_job);
326
+
327
+ v3d_cl_job_timedout(sched_job, V3D_RENDER,
328
+ &job->timedout_ctca, &job->timedout_ctra);
329
+}
330
+
331
+static void
332
+v3d_generic_job_timedout(struct drm_sched_job *sched_job)
333
+{
334
+ struct v3d_job *job = to_v3d_job(sched_job);
335
+
336
+ v3d_gpu_reset_for_timeout(job->v3d, sched_job);
337
+}
338
+
339
+static void
340
+v3d_csd_job_timedout(struct drm_sched_job *sched_job)
341
+{
342
+ struct v3d_csd_job *job = to_csd_job(sched_job);
343
+ struct v3d_dev *v3d = job->base.v3d;
344
+ u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
345
+
346
+ /* If we've made progress, skip reset and let the timer get
347
+ * rearmed.
348
+ */
349
+ if (job->timedout_batches != batches) {
350
+ job->timedout_batches = batches;
351
+ return;
352
+ }
353
+
354
+ v3d_gpu_reset_for_timeout(v3d, sched_job);
355
+}
356
+
357
+static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
200358 .dependency = v3d_job_dependency,
201
- .run_job = v3d_job_run,
202
- .timedout_job = v3d_job_timedout,
359
+ .run_job = v3d_bin_job_run,
360
+ .timedout_job = v3d_bin_job_timedout,
361
+ .free_job = v3d_job_free,
362
+};
363
+
364
+static const struct drm_sched_backend_ops v3d_render_sched_ops = {
365
+ .dependency = v3d_job_dependency,
366
+ .run_job = v3d_render_job_run,
367
+ .timedout_job = v3d_render_job_timedout,
368
+ .free_job = v3d_job_free,
369
+};
370
+
371
+static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
372
+ .dependency = v3d_job_dependency,
373
+ .run_job = v3d_tfu_job_run,
374
+ .timedout_job = v3d_generic_job_timedout,
375
+ .free_job = v3d_job_free,
376
+};
377
+
378
+static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
379
+ .dependency = v3d_job_dependency,
380
+ .run_job = v3d_csd_job_run,
381
+ .timedout_job = v3d_csd_job_timedout,
382
+ .free_job = v3d_job_free
383
+};
384
+
385
+static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
386
+ .dependency = v3d_job_dependency,
387
+ .run_job = v3d_cache_clean_job_run,
388
+ .timedout_job = v3d_generic_job_timedout,
203389 .free_job = v3d_job_free
204390 };
205391
....@@ -212,25 +398,63 @@
212398 int ret;
213399
214400 ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
215
- &v3d_sched_ops,
401
+ &v3d_bin_sched_ops,
216402 hw_jobs_limit, job_hang_limit,
217403 msecs_to_jiffies(hang_limit_ms),
218404 "v3d_bin");
219405 if (ret) {
220
- dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
406
+ dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret);
221407 return ret;
222408 }
223409
224410 ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
225
- &v3d_sched_ops,
411
+ &v3d_render_sched_ops,
226412 hw_jobs_limit, job_hang_limit,
227413 msecs_to_jiffies(hang_limit_ms),
228414 "v3d_render");
229415 if (ret) {
230
- dev_err(v3d->dev, "Failed to create render scheduler: %d.",
416
+ dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.",
231417 ret);
232
- drm_sched_fini(&v3d->queue[V3D_BIN].sched);
418
+ v3d_sched_fini(v3d);
233419 return ret;
420
+ }
421
+
422
+ ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
423
+ &v3d_tfu_sched_ops,
424
+ hw_jobs_limit, job_hang_limit,
425
+ msecs_to_jiffies(hang_limit_ms),
426
+ "v3d_tfu");
427
+ if (ret) {
428
+ dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.",
429
+ ret);
430
+ v3d_sched_fini(v3d);
431
+ return ret;
432
+ }
433
+
434
+ if (v3d_has_csd(v3d)) {
435
+ ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
436
+ &v3d_csd_sched_ops,
437
+ hw_jobs_limit, job_hang_limit,
438
+ msecs_to_jiffies(hang_limit_ms),
439
+ "v3d_csd");
440
+ if (ret) {
441
+ dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.",
442
+ ret);
443
+ v3d_sched_fini(v3d);
444
+ return ret;
445
+ }
446
+
447
+ ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
448
+ &v3d_cache_clean_sched_ops,
449
+ hw_jobs_limit, job_hang_limit,
450
+ msecs_to_jiffies(hang_limit_ms),
451
+ "v3d_cache_clean");
452
+ if (ret) {
453
+ dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.",
454
+ ret);
455
+ v3d_sched_fini(v3d);
456
+ return ret;
457
+ }
234458 }
235459
236460 return 0;
....@@ -241,6 +465,8 @@
241465 {
242466 enum v3d_queue q;
243467
244
- for (q = 0; q < V3D_MAX_QUEUES; q++)
245
- drm_sched_fini(&v3d->queue[q].sched);
468
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
469
+ if (v3d->queue[q].sched.ready)
470
+ drm_sched_fini(&v3d->queue[q].sched);
471
+ }
246472 }