hc
2024-05-10 cde9070d9970eef1f7ec2360586c802a16230ad8
kernel/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h
....@@ -1,7 +1,7 @@
11 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
22 /*
33 *
4
- * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -55,10 +55,11 @@
5555 * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
5656 * Compute jobs.
5757 * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
58
- * Non-Compute jobs.
58
+ * Non-Compute jobs.
5959 * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
60
- * contains compute-job atoms that aren't restricted to a coherent group,
61
- * and can run on all cores.
60
+ * contains compute-job atoms that aren't
61
+ * restricted to a coherent group,
62
+ * and can run on all cores.
6263 * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
6364 *
6465 * Each context attribute can be thought of as a boolean value that caches some
....@@ -114,7 +115,6 @@
114115 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
115116 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
116117 * enough to handle anyway.
117
- *
118118 *
119119 */
120120 enum kbasep_js_ctx_attr {
....@@ -187,47 +187,76 @@
187187 */
188188 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
189189
190
+/* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits
191
+ * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels.
192
+ *
193
+ * Must be strictly larger than the number of bits to represent a bitmap of
194
+ * priorities, so that we can do calculations such as:
195
+ * (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1
196
+ * ...without causing undefined behavior due to a shift beyond the width of the
197
+ * type
198
+ *
199
+ * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth
200
+ * moving to DECLARE_BITMAP()
201
+ */
202
+typedef u8 kbase_js_prio_bitmap_t;
203
+
204
+/* Ordering modification for kbase_js_atom_runs_before() */
205
+typedef u32 kbase_atom_ordering_flag_t;
206
+
207
+/* Atoms of the same context and priority should have their ordering decided by
208
+ * their seq_nr instead of their age.
209
+ *
210
+ * seq_nr is used as a more slowly changing variant of age - it increases once
211
+ * per group of related atoms, as determined by user-space. Hence, it can be
212
+ * used to limit re-ordering decisions (such as pre-emption) to only re-order
213
+ * between such groups, rather than re-order within those groups of atoms.
214
+ */
215
+#define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0)
216
+
190217 /**
191218 * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
192219 * @runpool_irq: Sub-structure to collect together Job Scheduling data used in
193
- * IRQ context. The hwaccess_lock must be held when accessing.
220
+ * IRQ context. The hwaccess_lock must be held when accessing.
194221 * @runpool_irq.submit_allowed: Bitvector indicating whether a currently
195
- * scheduled context is allowed to submit jobs. When bit 'N' is set in
196
- * this, it indicates whether the context bound to address space 'N' is
197
- * allowed to submit jobs.
222
+ * scheduled context is allowed to submit jobs.
223
+ * When bit 'N' is set in this, it indicates whether
224
+ * the context bound to address space 'N' is
225
+ * allowed to submit jobs.
198226 * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
199
- * Each is large enough to hold a refcount of the number of contexts
200
- * that can fit into the runpool. This is currently BASE_MAX_NR_AS.
201
- * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
202
- * the refcount. Hence, it's not worthwhile reducing this to
203
- * bit-manipulation on u32s to save space (where in contrast, 4 bit
204
- * sub-fields would be easy to do and would save space).
205
- * Whilst this must not become negative, the sign bit is used for:
206
- * - error detection in debug builds
207
- * - Optimization: it is undefined for a signed int to overflow, and so
208
- * the compiler can optimize for that never happening (thus, no masking
209
- * is required on updating the variable)
227
+ * Each is large enough to hold a refcount of the number of contexts
228
+ * that can fit into the runpool. This is currently BASE_MAX_NR_AS.
229
+ * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
230
+ * the refcount. Hence, it's not worthwhile reducing this to
231
+ * bit-manipulation on u32s to save space (where in contrast, 4 bit
232
+ * sub-fields would be easy to do and would save space).
233
+ * Whilst this must not become negative, the sign bit is used for:
234
+ * - error detection in debug builds
235
+ * - Optimization: it is undefined for a signed int to overflow, and so
236
+ * the compiler can optimize for that never happening (thus, no masking
237
+ * is required on updating the variable)
210238 * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
211
- * to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n'
212
- * is using core i (i.e. slot_affinity_refcount[n][i] > 0)
239
+ * to aid affinity checking.
240
+ * Element 'n' bit 'i' indicates that slot 'n'
241
+ * is using core i (i.e. slot_affinity_refcount[n][i] > 0)
213242 * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
214
- * by each slot. Used to generate the slot_affinities array of bitvectors.
215
- * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
216
- * because it is refcounted only when a job is definitely about to be
217
- * submitted to a slot, and is de-refcounted immediately after a job
218
- * finishes
243
+ * by each slot. Used to generate the slot_affinities array of bitvectors.
244
+ * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
245
+ * because it is refcounted only when a job is definitely about to be
246
+ * submitted to a slot, and is de-refcounted immediately after a job
247
+ * finishes
219248 * @schedule_sem: Scheduling semaphore. This must be held when calling
220
- * kbase_jm_kick()
249
+ * kbase_jm_kick()
221250 * @ctx_list_pullable: List of contexts that can currently be pulled from
222251 * @ctx_list_unpullable: List of contexts that can not currently be pulled
223
- * from, but have jobs currently running.
252
+ * from, but have jobs currently running.
224253 * @nr_user_contexts_running: Number of currently scheduled user contexts
225
- * (excluding ones that are not submitting jobs)
254
+ * (excluding ones that are not submitting jobs)
226255 * @nr_all_contexts_running: Number of currently scheduled contexts (including
227
- * ones that are not submitting jobs)
256
+ * ones that are not submitting jobs)
228257 * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
229
- * @note This is a write-once member, and so no locking is required to
230
- * read
258
+ * @note This is a write-once member, and so no locking is required to
259
+ * read
231260 * @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS
232261 * @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS
233262 * @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL
....@@ -241,16 +270,17 @@
241270 * @suspended_soft_jobs_list: List of suspended soft jobs
242271 * @softstop_always: Support soft-stop on a single context
243272 * @init_status:The initialized-flag is placed at the end, to avoid
244
- * cache-pollution (we should only be using this during init/term paths).
245
- * @note This is a write-once member, and so no locking is required to
246
- * read
273
+ * cache-pollution (we should only be using this during init/term paths).
274
+ * @note This is a write-once member, and so no locking is required to
275
+ * read
247276 * @nr_contexts_pullable:Number of contexts that can currently be pulled from
248277 * @nr_contexts_runnable:Number of contexts that can either be pulled from or
249
- * arecurrently running
278
+ * arecurrently running
250279 * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
280
+ * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free.
251281 * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
252
- * independently of the Run Pool.
253
- * Of course, you don't need the Run Pool lock to access this.
282
+ * independently of the Run Pool.
283
+ * Of course, you don't need the Run Pool lock to access this.
254284 * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
255285 *
256286 * This encapsulates the current context of the Job Scheduler on a particular
....@@ -300,6 +330,8 @@
300330 u32 nr_contexts_pullable;
301331 atomic_t nr_contexts_runnable;
302332 atomic_t soft_job_timeout_ms;
333
+ u32 js_free_wait_time_ms;
334
+
303335 struct mutex queue_mutex;
304336 /*
305337 * Run Pool mutex, for managing contexts within the runpool.
....@@ -358,7 +390,7 @@
358390 * @sched_priority: priority
359391 * @device_nr: Core group atom was executed on
360392 *
361
- * Subset of atom state that can be available after jd_done_nolock() is called
393
+ * Subset of atom state that can be available after kbase_jd_done_nolock() is called
362394 * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
363395 * because the original atom could disappear.
364396 */
....@@ -393,4 +425,23 @@
393425 */
394426 #define KBASEP_JS_TICK_RESOLUTION_US 1
395427
428
+/**
429
+ * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's
430
+ * use of a job slot
431
+ * @blocked: bitmap of priorities that this slot is blocked at
432
+ * @atoms_pulled: counts of atoms that have been pulled from this slot,
433
+ * across all priority levels
434
+ * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per
435
+ * priority level
436
+ *
437
+ * Controls how a slot from the &struct kbase_context's jsctx_queue is managed,
438
+ * for example to ensure correct ordering of atoms when atoms of different
439
+ * priorities are unpulled.
440
+ */
441
+struct kbase_jsctx_slot_tracking {
442
+ kbase_js_prio_bitmap_t blocked;
443
+ atomic_t atoms_pulled;
444
+ int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
445
+};
446
+
396447 #endif /* _KBASE_JS_DEFS_H_ */