~hc/RK356X_SDK_RELEASE.git

..	..	@@ -30,176 +30,362 @@
30	30	return container_of(sched_job, struct v3d_job, base);
31	31	}
32	32
	33	+static struct v3d_bin_job *
	34	+to_bin_job(struct drm_sched_job *sched_job)
	35	+{
	36	+ return container_of(sched_job, struct v3d_bin_job, base.base);
	37	+}
	38	+
	39	+static struct v3d_render_job *
	40	+to_render_job(struct drm_sched_job *sched_job)
	41	+{
	42	+ return container_of(sched_job, struct v3d_render_job, base.base);
	43	+}
	44	+
	45	+static struct v3d_tfu_job *
	46	+to_tfu_job(struct drm_sched_job *sched_job)
	47	+{
	48	+ return container_of(sched_job, struct v3d_tfu_job, base.base);
	49	+}
	50	+
	51	+static struct v3d_csd_job *
	52	+to_csd_job(struct drm_sched_job *sched_job)
	53	+{
	54	+ return container_of(sched_job, struct v3d_csd_job, base.base);
	55	+}
	56	+
33	57	static void
34	58	v3d_job_free(struct drm_sched_job *sched_job)
35	59	{
36	60	struct v3d_job *job = to_v3d_job(sched_job);
37	61
38		- v3d_exec_put(job->exec);
	62	+ drm_sched_job_cleanup(sched_job);
	63	+ v3d_job_put(job);
39	64	}
40	65
41	66	/**
42		- * Returns the fences that the bin job depends on, one by one.
43		- * v3d_job_run() won't be called until all of them have been signaled.
	67	+ * Returns the fences that the job depends on, one by one.
	68	+ *
	69	+ * If placed in the scheduler's .dependency method, the corresponding
	70	+ * .run_job won't be called until all of them have been signaled.
44	71	*/
45	72	static struct dma_fence *
46	73	v3d_job_dependency(struct drm_sched_job *sched_job,
47	74	struct drm_sched_entity *s_entity)
48	75	{
49	76	struct v3d_job *job = to_v3d_job(sched_job);
50		- struct v3d_exec_info *exec = job->exec;
51		- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
52		- struct dma_fence *fence;
53		-
54		- fence = job->in_fence;
55		- if (fence) {
56		- job->in_fence = NULL;
57		- return fence;
58		- }
59		-
60		- if (q == V3D_RENDER) {
61		- /* If we had a bin job, the render job definitely depends on
62		- * it. We first have to wait for bin to be scheduled, so that
63		- * its done_fence is created.
64		- */
65		- fence = exec->bin_done_fence;
66		- if (fence) {
67		- exec->bin_done_fence = NULL;
68		- return fence;
69		- }
70		- }
71	77
72	78	/* XXX: Wait on a fence for switching the GMP if necessary,
73	79	* and then do so.
74	80	*/
75	81
76		- return fence;
	82	+ if (!xa_empty(&job->deps))
	83	+ return xa_erase(&job->deps, job->last_dep++);
	84	+
	85	+ return NULL;
77	86	}
78	87
79		-static struct dma_fence v3d_job_run(struct drm_sched_job sched_job)
	88	+static struct dma_fence v3d_bin_job_run(struct drm_sched_job sched_job)
80	89	{
81		- struct v3d_job *job = to_v3d_job(sched_job);
82		- struct v3d_exec_info *exec = job->exec;
83		- enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
84		- struct v3d_dev *v3d = exec->v3d;
	90	+ struct v3d_bin_job *job = to_bin_job(sched_job);
	91	+ struct v3d_dev *v3d = job->base.v3d;
85	92	struct drm_device *dev = &v3d->drm;
86	93	struct dma_fence *fence;
87	94	unsigned long irqflags;
88	95
89		- if (unlikely(job->base.s_fence->finished.error))
	96	+ if (unlikely(job->base.base.s_fence->finished.error))
90	97	return NULL;
91	98
92	99	/* Lock required around bin_job update vs
93	100	* v3d_overflow_mem_work().
94	101	*/
95	102	spin_lock_irqsave(&v3d->job_lock, irqflags);
96		- if (q == V3D_BIN) {
97		- v3d->bin_job = job->exec;
98		-
99		- /* Clear out the overflow allocation, so we don't
100		- * reuse the overflow attached to a previous job.
101		- */
102		- V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
103		- } else {
104		- v3d->render_job = job->exec;
105		- }
	103	+ v3d->bin_job = job;
	104	+ /* Clear out the overflow allocation, so we don't
	105	+ * reuse the overflow attached to a previous job.
	106	+ */
	107	+ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
106	108	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
107	109
108		- /* Can we avoid this flush when q==RENDER? We need to be
109		- * careful of scheduling, though -- imagine job0 rendering to
110		- * texture and job1 reading, and them being executed as bin0,
111		- * bin1, render0, render1, so that render1's flush at bin time
112		- * wasn't enough.
113		- */
114	110	v3d_invalidate_caches(v3d);
115	111
116		- fence = v3d_fence_create(v3d, q);
	112	+ fence = v3d_fence_create(v3d, V3D_BIN);
117	113	if (IS_ERR(fence))
118	114	return NULL;
119	115
120		- if (job->done_fence)
121		- dma_fence_put(job->done_fence);
122		- job->done_fence = dma_fence_get(fence);
	116	+ if (job->base.irq_fence)
	117	+ dma_fence_put(job->base.irq_fence);
	118	+ job->base.irq_fence = dma_fence_get(fence);
123	119
124		- trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
	120	+ trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
125	121	job->start, job->end);
126		-
127		- if (q == V3D_BIN) {
128		- if (exec->qma) {
129		- V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
130		- V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
131		- }
132		- if (exec->qts) {
133		- V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
134		- V3D_CLE_CT0QTS_ENABLE \|
135		- exec->qts);
136		- }
137		- } else {
138		- /* XXX: Set the QCFG */
139		- }
140	122
141	123	/* Set the current and end address of the control list.
142	124	* Writing the end register is what starts the job.
143	125	*/
144		- V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
145		- V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
	126	+ if (job->qma) {
	127	+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
	128	+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
	129	+ }
	130	+ if (job->qts) {
	131	+ V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
	132	+ V3D_CLE_CT0QTS_ENABLE \|
	133	+ job->qts);
	134	+ }
	135	+ V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
	136	+ V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
146	137
147	138	return fence;
148	139	}
149	140
150		-static void
151		-v3d_job_timedout(struct drm_sched_job *sched_job)
	141	+static struct dma_fence v3d_render_job_run(struct drm_sched_job sched_job)
	142	+{
	143	+ struct v3d_render_job *job = to_render_job(sched_job);
	144	+ struct v3d_dev *v3d = job->base.v3d;
	145	+ struct drm_device *dev = &v3d->drm;
	146	+ struct dma_fence *fence;
	147	+
	148	+ if (unlikely(job->base.base.s_fence->finished.error))
	149	+ return NULL;
	150	+
	151	+ v3d->render_job = job;
	152	+
	153	+ /* Can we avoid this flush? We need to be careful of
	154	+ * scheduling, though -- imagine job0 rendering to texture and
	155	+ * job1 reading, and them being executed as bin0, bin1,
	156	+ * render0, render1, so that render1's flush at bin time
	157	+ * wasn't enough.
	158	+ */
	159	+ v3d_invalidate_caches(v3d);
	160	+
	161	+ fence = v3d_fence_create(v3d, V3D_RENDER);
	162	+ if (IS_ERR(fence))
	163	+ return NULL;
	164	+
	165	+ if (job->base.irq_fence)
	166	+ dma_fence_put(job->base.irq_fence);
	167	+ job->base.irq_fence = dma_fence_get(fence);
	168	+
	169	+ trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
	170	+ job->start, job->end);
	171	+
	172	+ /* XXX: Set the QCFG */
	173	+
	174	+ /* Set the current and end address of the control list.
	175	+ * Writing the end register is what starts the job.
	176	+ */
	177	+ V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
	178	+ V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
	179	+
	180	+ return fence;
	181	+}
	182	+
	183	+static struct dma_fence *
	184	+v3d_tfu_job_run(struct drm_sched_job *sched_job)
	185	+{
	186	+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
	187	+ struct v3d_dev *v3d = job->base.v3d;
	188	+ struct drm_device *dev = &v3d->drm;
	189	+ struct dma_fence *fence;
	190	+
	191	+ fence = v3d_fence_create(v3d, V3D_TFU);
	192	+ if (IS_ERR(fence))
	193	+ return NULL;
	194	+
	195	+ v3d->tfu_job = job;
	196	+ if (job->base.irq_fence)
	197	+ dma_fence_put(job->base.irq_fence);
	198	+ job->base.irq_fence = dma_fence_get(fence);
	199	+
	200	+ trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
	201	+
	202	+ V3D_WRITE(V3D_TFU_IIA, job->args.iia);
	203	+ V3D_WRITE(V3D_TFU_IIS, job->args.iis);
	204	+ V3D_WRITE(V3D_TFU_ICA, job->args.ica);
	205	+ V3D_WRITE(V3D_TFU_IUA, job->args.iua);
	206	+ V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
	207	+ V3D_WRITE(V3D_TFU_IOS, job->args.ios);
	208	+ V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
	209	+ if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
	210	+ V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
	211	+ V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
	212	+ V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
	213	+ }
	214	+ /* ICFG kicks off the job. */
	215	+ V3D_WRITE(V3D_TFU_ICFG, job->args.icfg \| V3D_TFU_ICFG_IOC);
	216	+
	217	+ return fence;
	218	+}
	219	+
	220	+static struct dma_fence *
	221	+v3d_csd_job_run(struct drm_sched_job *sched_job)
	222	+{
	223	+ struct v3d_csd_job *job = to_csd_job(sched_job);
	224	+ struct v3d_dev *v3d = job->base.v3d;
	225	+ struct drm_device *dev = &v3d->drm;
	226	+ struct dma_fence *fence;
	227	+ int i;
	228	+
	229	+ v3d->csd_job = job;
	230	+
	231	+ v3d_invalidate_caches(v3d);
	232	+
	233	+ fence = v3d_fence_create(v3d, V3D_CSD);
	234	+ if (IS_ERR(fence))
	235	+ return NULL;
	236	+
	237	+ if (job->base.irq_fence)
	238	+ dma_fence_put(job->base.irq_fence);
	239	+ job->base.irq_fence = dma_fence_get(fence);
	240	+
	241	+ trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
	242	+
	243	+ for (i = 1; i <= 6; i++)
	244	+ V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
	245	+ /* CFG0 write kicks off the job. */
	246	+ V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
	247	+
	248	+ return fence;
	249	+}
	250	+
	251	+static struct dma_fence *
	252	+v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
152	253	{
153	254	struct v3d_job *job = to_v3d_job(sched_job);
154		- struct v3d_exec_info *exec = job->exec;
155		- struct v3d_dev *v3d = exec->v3d;
156		- enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
	255	+ struct v3d_dev *v3d = job->v3d;
	256	+
	257	+ v3d_clean_caches(v3d);
	258	+
	259	+ return NULL;
	260	+}
	261	+
	262	+static void
	263	+v3d_gpu_reset_for_timeout(struct v3d_dev v3d, struct drm_sched_job sched_job)
	264	+{
157	265	enum v3d_queue q;
158		- u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
159		- u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
160		-
161		- /* If the current address or return address have changed, then
162		- * the GPU has probably made progress and we should delay the
163		- * reset. This could fail if the GPU got in an infinite loop
164		- * in the CL, but that is pretty unlikely outside of an i-g-t
165		- * testcase.
166		- */
167		- if (job->timedout_ctca != ctca \|\| job->timedout_ctra != ctra) {
168		- job->timedout_ctca = ctca;
169		- job->timedout_ctra = ctra;
170		-
171		- schedule_delayed_work(&job->base.work_tdr,
172		- job->base.sched->timeout);
173		- return;
174		- }
175	266
176	267	mutex_lock(&v3d->reset_lock);
177	268
178	269	/* block scheduler */
179		- for (q = 0; q < V3D_MAX_QUEUES; q++) {
180		- struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
	270	+ for (q = 0; q < V3D_MAX_QUEUES; q++)
	271	+ drm_sched_stop(&v3d->queue[q].sched, sched_job);
181	272
182		- kthread_park(sched->thread);
183		- drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
184		- sched_job : NULL));
185		- }
	273	+ if (sched_job)
	274	+ drm_sched_increase_karma(sched_job);
186	275
187	276	/* get the GPU back into the init state */
188	277	v3d_reset(v3d);
189	278
	279	+ for (q = 0; q < V3D_MAX_QUEUES; q++)
	280	+ drm_sched_resubmit_jobs(&v3d->queue[q].sched);
	281	+
190	282	/* Unblock schedulers and restart their jobs. */
191	283	for (q = 0; q < V3D_MAX_QUEUES; q++) {
192		- drm_sched_job_recovery(&v3d->queue[q].sched);
193		- kthread_unpark(v3d->queue[q].sched.thread);
	284	+ drm_sched_start(&v3d->queue[q].sched, true);
194	285	}
195	286
196	287	mutex_unlock(&v3d->reset_lock);
197	288	}
198	289
199		-static const struct drm_sched_backend_ops v3d_sched_ops = {
	290	+/* If the current address or return address have changed, then the GPU
	291	+ * has probably made progress and we should delay the reset. This
	292	+ * could fail if the GPU got in an infinite loop in the CL, but that
	293	+ * is pretty unlikely outside of an i-g-t testcase.
	294	+ */
	295	+static void
	296	+v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
	297	+ u32 timedout_ctca, u32 timedout_ctra)
	298	+{
	299	+ struct v3d_job *job = to_v3d_job(sched_job);
	300	+ struct v3d_dev *v3d = job->v3d;
	301	+ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
	302	+ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
	303	+
	304	+ if (timedout_ctca != ctca \|\| timedout_ctra != ctra) {
	305	+ *timedout_ctca = ctca;
	306	+ *timedout_ctra = ctra;
	307	+ return;
	308	+ }
	309	+
	310	+ v3d_gpu_reset_for_timeout(v3d, sched_job);
	311	+}
	312	+
	313	+static void
	314	+v3d_bin_job_timedout(struct drm_sched_job *sched_job)
	315	+{
	316	+ struct v3d_bin_job *job = to_bin_job(sched_job);
	317	+
	318	+ v3d_cl_job_timedout(sched_job, V3D_BIN,
	319	+ &job->timedout_ctca, &job->timedout_ctra);
	320	+}
	321	+
	322	+static void
	323	+v3d_render_job_timedout(struct drm_sched_job *sched_job)
	324	+{
	325	+ struct v3d_render_job *job = to_render_job(sched_job);
	326	+
	327	+ v3d_cl_job_timedout(sched_job, V3D_RENDER,
	328	+ &job->timedout_ctca, &job->timedout_ctra);
	329	+}
	330	+
	331	+static void
	332	+v3d_generic_job_timedout(struct drm_sched_job *sched_job)
	333	+{
	334	+ struct v3d_job *job = to_v3d_job(sched_job);
	335	+
	336	+ v3d_gpu_reset_for_timeout(job->v3d, sched_job);
	337	+}
	338	+
	339	+static void
	340	+v3d_csd_job_timedout(struct drm_sched_job *sched_job)
	341	+{
	342	+ struct v3d_csd_job *job = to_csd_job(sched_job);
	343	+ struct v3d_dev *v3d = job->base.v3d;
	344	+ u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
	345	+
	346	+ /* If we've made progress, skip reset and let the timer get
	347	+ * rearmed.
	348	+ */
	349	+ if (job->timedout_batches != batches) {
	350	+ job->timedout_batches = batches;
	351	+ return;
	352	+ }
	353	+
	354	+ v3d_gpu_reset_for_timeout(v3d, sched_job);
	355	+}
	356	+
	357	+static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
200	358	.dependency = v3d_job_dependency,
201		- .run_job = v3d_job_run,
202		- .timedout_job = v3d_job_timedout,
	359	+ .run_job = v3d_bin_job_run,
	360	+ .timedout_job = v3d_bin_job_timedout,
	361	+ .free_job = v3d_job_free,
	362	+};
	363	+
	364	+static const struct drm_sched_backend_ops v3d_render_sched_ops = {
	365	+ .dependency = v3d_job_dependency,
	366	+ .run_job = v3d_render_job_run,
	367	+ .timedout_job = v3d_render_job_timedout,
	368	+ .free_job = v3d_job_free,
	369	+};
	370	+
	371	+static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
	372	+ .dependency = v3d_job_dependency,
	373	+ .run_job = v3d_tfu_job_run,
	374	+ .timedout_job = v3d_generic_job_timedout,
	375	+ .free_job = v3d_job_free,
	376	+};
	377	+
	378	+static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
	379	+ .dependency = v3d_job_dependency,
	380	+ .run_job = v3d_csd_job_run,
	381	+ .timedout_job = v3d_csd_job_timedout,
	382	+ .free_job = v3d_job_free
	383	+};
	384	+
	385	+static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
	386	+ .dependency = v3d_job_dependency,
	387	+ .run_job = v3d_cache_clean_job_run,
	388	+ .timedout_job = v3d_generic_job_timedout,
203	389	.free_job = v3d_job_free
204	390	};
205	391
..	..	@@ -212,25 +398,63 @@
212	398	int ret;
213	399
214	400	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
215		- &v3d_sched_ops,
	401	+ &v3d_bin_sched_ops,
216	402	hw_jobs_limit, job_hang_limit,
217	403	msecs_to_jiffies(hang_limit_ms),
218	404	"v3d_bin");
219	405	if (ret) {
220		- dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
	406	+ dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret);
221	407	return ret;
222	408	}
223	409
224	410	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
225		- &v3d_sched_ops,
	411	+ &v3d_render_sched_ops,
226	412	hw_jobs_limit, job_hang_limit,
227	413	msecs_to_jiffies(hang_limit_ms),
228	414	"v3d_render");
229	415	if (ret) {
230		- dev_err(v3d->dev, "Failed to create render scheduler: %d.",
	416	+ dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.",
231	417	ret);
232		- drm_sched_fini(&v3d->queue[V3D_BIN].sched);
	418	+ v3d_sched_fini(v3d);
233	419	return ret;
	420	+ }
	421	+
	422	+ ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
	423	+ &v3d_tfu_sched_ops,
	424	+ hw_jobs_limit, job_hang_limit,
	425	+ msecs_to_jiffies(hang_limit_ms),
	426	+ "v3d_tfu");
	427	+ if (ret) {
	428	+ dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.",
	429	+ ret);
	430	+ v3d_sched_fini(v3d);
	431	+ return ret;
	432	+ }
	433	+
	434	+ if (v3d_has_csd(v3d)) {
	435	+ ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
	436	+ &v3d_csd_sched_ops,
	437	+ hw_jobs_limit, job_hang_limit,
	438	+ msecs_to_jiffies(hang_limit_ms),
	439	+ "v3d_csd");
	440	+ if (ret) {
	441	+ dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.",
	442	+ ret);
	443	+ v3d_sched_fini(v3d);
	444	+ return ret;
	445	+ }
	446	+
	447	+ ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
	448	+ &v3d_cache_clean_sched_ops,
	449	+ hw_jobs_limit, job_hang_limit,
	450	+ msecs_to_jiffies(hang_limit_ms),
	451	+ "v3d_cache_clean");
	452	+ if (ret) {
	453	+ dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.",
	454	+ ret);
	455	+ v3d_sched_fini(v3d);
	456	+ return ret;
	457	+ }
234	458	}
235	459
236	460	return 0;
..	..	@@ -241,6 +465,8 @@
241	465	{
242	466	enum v3d_queue q;
243	467
244		- for (q = 0; q < V3D_MAX_QUEUES; q++)
245		- drm_sched_fini(&v3d->queue[q].sched);
	468	+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
	469	+ if (v3d->queue[q].sched.ready)
	470	+ drm_sched_fini(&v3d->queue[q].sched);
	471	+ }
246	472	}