hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
....@@ -1,7 +1,7 @@
11 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
22 /*
33 *
4
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -44,10 +44,48 @@
4444 * - Add ioctl 40: kbase_ioctl_cs_queue_register_ex, this is a new
4545 * queue registration call with extended format for supporting CS
4646 * trace configurations with CSF trace_command.
47
+ * 1.6:
48
+ * - Added new HW performance counters interface to all GPUs.
49
+ * 1.7:
50
+ * - Added reserved field to QUEUE_GROUP_CREATE ioctl for future use
51
+ * 1.8:
52
+ * - Removed Kernel legacy HWC interface
53
+ * 1.9:
54
+ * - Reorganization of GPU-VA memory zones, including addition of
55
+ * FIXED_VA zone and auto-initialization of EXEC_VA zone.
56
+ * - Added new Base memory allocation interface
57
+ * 1.10:
58
+ * - First release of new HW performance counters interface.
59
+ * 1.11:
60
+ * - Dummy model (no mali) backend will now clear HWC values after each sample
61
+ * 1.12:
62
+ * - Added support for incremental rendering flag in CSG create call
63
+ * 1.13:
64
+ * - Added ioctl to query a register of USER page.
65
+ * 1.14:
66
+ * - Added support for passing down the buffer descriptor VA in tiler heap init
67
+ * 1.15:
68
+ * - Enable new sync_wait GE condition
69
+ * 1.16:
70
+ * - Remove legacy definitions:
71
+ * - base_jit_alloc_info_10_2
72
+ * - base_jit_alloc_info_11_5
73
+ * - kbase_ioctl_mem_jit_init_10_2
74
+ * - kbase_ioctl_mem_jit_init_11_5
75
+ * 1.17:
76
+ * - Fix kinstr_prfcnt issues:
77
+ * - Missing implicit sample for CMD_STOP when HWCNT buffer is full.
78
+ * - Race condition when stopping periodic sampling.
79
+ * - prfcnt_block_metadata::block_idx gaps.
80
+ * - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
81
+ * 1.18:
82
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
83
+ * before allocating GPU memory for the context.
84
+ * - CPU mappings of USER_BUFFER imported memory handles must be cached.
4785 */
4886
4987 #define BASE_UK_VERSION_MAJOR 1
50
-#define BASE_UK_VERSION_MINOR 5
88
+#define BASE_UK_VERSION_MINOR 18
5189
5290 /**
5391 * struct kbase_ioctl_version_check - Check version compatibility between
....@@ -64,7 +102,6 @@
64102 #define KBASE_IOCTL_VERSION_CHECK_RESERVED \
65103 _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
66104
67
-
68105 /**
69106 * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the
70107 * base back-end
....@@ -74,7 +111,7 @@
74111 * @priority: Priority of the queue within a group when run within a process
75112 * @padding: Currently unused, must be zero
76113 *
77
- * @Note: There is an identical sub-section in kbase_ioctl_cs_queue_register_ex.
114
+ * Note: There is an identical sub-section in kbase_ioctl_cs_queue_register_ex.
78115 * Any change of this struct should also be mirrored to the latter.
79116 */
80117 struct kbase_ioctl_cs_queue_register {
....@@ -143,7 +180,7 @@
143180 * @ex_event_state: Trace event states configuration
144181 * @ex_padding: Currently unused, must be zero
145182 *
146
- * @Note: There is an identical sub-section at the start of this struct to that
183
+ * Note: There is an identical sub-section at the start of this struct to that
147184 * of @ref kbase_ioctl_cs_queue_register. Any change of this sub-section
148185 * must also be mirrored to the latter. Following the said sub-section,
149186 * the remaining fields forms the extension, marked with ex_*.
....@@ -177,7 +214,8 @@
177214 _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate)
178215
179216 /**
180
- * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
217
+ * union kbase_ioctl_cs_queue_group_create_1_6 - Create a GPU command queue
218
+ * group
181219 * @in: Input parameters
182220 * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use.
183221 * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
....@@ -196,7 +234,7 @@
196234 * @out.padding: Currently unused, must be zero
197235 * @out.group_uid: UID of the queue group available to base.
198236 */
199
-union kbase_ioctl_cs_queue_group_create {
237
+union kbase_ioctl_cs_queue_group_create_1_6 {
200238 struct {
201239 __u64 tiler_mask;
202240 __u64 fragment_mask;
....@@ -216,8 +254,58 @@
216254 } out;
217255 };
218256
219
-#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
220
- _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create)
257
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_6 \
258
+ _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6)
259
+
260
+/**
261
+ * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
262
+ * @in: Input parameters
263
+ * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use.
264
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
265
+ * @in.compute_mask: Mask of compute endpoints the group is allowed to use.
266
+ * @in.cs_min: Minimum number of CSs required.
267
+ * @in.priority: Queue group's priority within a process.
268
+ * @in.tiler_max: Maximum number of tiler endpoints the group is allowed
269
+ * to use.
270
+ * @in.fragment_max: Maximum number of fragment endpoints the group is
271
+ * allowed to use.
272
+ * @in.compute_max: Maximum number of compute endpoints the group is allowed
273
+ * to use.
274
+ * @in.csi_handlers: Flags to signal that the application intends to use CSI
275
+ * exception handlers in some linear buffers to deal with
276
+ * the given exception types.
277
+ * @in.padding: Currently unused, must be zero
278
+ * @out: Output parameters
279
+ * @out.group_handle: Handle of a newly created queue group.
280
+ * @out.padding: Currently unused, must be zero
281
+ * @out.group_uid: UID of the queue group available to base.
282
+ */
283
+union kbase_ioctl_cs_queue_group_create {
284
+ struct {
285
+ __u64 tiler_mask;
286
+ __u64 fragment_mask;
287
+ __u64 compute_mask;
288
+ __u8 cs_min;
289
+ __u8 priority;
290
+ __u8 tiler_max;
291
+ __u8 fragment_max;
292
+ __u8 compute_max;
293
+ __u8 csi_handlers;
294
+ __u8 padding[2];
295
+ /**
296
+ * @in.dvs_buf: buffer for deferred vertex shader
297
+ */
298
+ __u64 dvs_buf;
299
+ } in;
300
+ struct {
301
+ __u8 group_handle;
302
+ __u8 padding[3];
303
+ __u32 group_uid;
304
+ } out;
305
+};
306
+
307
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \
308
+ _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create)
221309
222310 /**
223311 * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group
....@@ -295,6 +383,7 @@
295383 * allowed.
296384 * @in.group_id: Group ID to be used for physical allocations.
297385 * @in.padding: Padding
386
+ * @in.buf_desc_va: Buffer descriptor GPU VA for tiler heap reclaims.
298387 * @out: Output parameters
299388 * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up
300389 * for the heap.
....@@ -310,6 +399,7 @@
310399 __u16 target_in_flight;
311400 __u8 group_id;
312401 __u8 padding;
402
+ __u64 buf_desc_va;
313403 } in;
314404 struct {
315405 __u64 gpu_heap_va;
....@@ -319,6 +409,43 @@
319409
320410 #define KBASE_IOCTL_CS_TILER_HEAP_INIT \
321411 _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init)
412
+
413
+/**
414
+ * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap,
415
+ * earlier version upto 1.13
416
+ * @in: Input parameters
417
+ * @in.chunk_size: Size of each chunk.
418
+ * @in.initial_chunks: Initial number of chunks that heap will be created with.
419
+ * @in.max_chunks: Maximum number of chunks that the heap is allowed to use.
420
+ * @in.target_in_flight: Number of render-passes that the driver should attempt to
421
+ * keep in flight for which allocation of new chunks is
422
+ * allowed.
423
+ * @in.group_id: Group ID to be used for physical allocations.
424
+ * @in.padding: Padding
425
+ * @out: Output parameters
426
+ * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up
427
+ * for the heap.
428
+ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap,
429
+ * actually points to the header of heap chunk and not to
430
+ * the low address of free memory in the chunk.
431
+ */
432
+union kbase_ioctl_cs_tiler_heap_init_1_13 {
433
+ struct {
434
+ __u32 chunk_size;
435
+ __u32 initial_chunks;
436
+ __u32 max_chunks;
437
+ __u16 target_in_flight;
438
+ __u8 group_id;
439
+ __u8 padding;
440
+ } in;
441
+ struct {
442
+ __u64 gpu_heap_va;
443
+ __u64 first_chunk_va;
444
+ } out;
445
+};
446
+
447
+#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \
448
+ _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13)
322449
323450 /**
324451 * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap
....@@ -340,7 +467,7 @@
340467 * @in: Input parameters
341468 * @in.max_group_num: The maximum number of groups to be read. Can be 0, in
342469 * which case groups_ptr is unused.
343
- * @in.max_total_stream _num: The maximum number of CSs to be read. Can be 0, in
470
+ * @in.max_total_stream_num: The maximum number of CSs to be read. Can be 0, in
344471 * which case streams_ptr is unused.
345472 * @in.groups_ptr: Pointer where to store all the group data (sequentially).
346473 * @in.streams_ptr: Pointer where to store all the CS data (sequentially).
....@@ -390,6 +517,60 @@
390517 #define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \
391518 _IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info)
392519
520
+/**
521
+ * union kbase_ioctl_mem_alloc_ex - Allocate memory on the GPU
522
+ * @in: Input parameters
523
+ * @in.va_pages: The number of pages of virtual address space to reserve
524
+ * @in.commit_pages: The number of physical pages to allocate
525
+ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
526
+ * @in.flags: Flags
527
+ * @in.fixed_address: The GPU virtual address requested for the allocation,
528
+ * if the allocation is using the BASE_MEM_FIXED flag.
529
+ * @in.extra: Space for extra parameters that may be added in the future.
530
+ * @out: Output parameters
531
+ * @out.flags: Flags
532
+ * @out.gpu_va: The GPU virtual address which is allocated
533
+ */
534
+union kbase_ioctl_mem_alloc_ex {
535
+ struct {
536
+ __u64 va_pages;
537
+ __u64 commit_pages;
538
+ __u64 extension;
539
+ __u64 flags;
540
+ __u64 fixed_address;
541
+ __u64 extra[3];
542
+ } in;
543
+ struct {
544
+ __u64 flags;
545
+ __u64 gpu_va;
546
+ } out;
547
+};
548
+
549
+#define KBASE_IOCTL_MEM_ALLOC_EX _IOWR(KBASE_IOCTL_TYPE, 59, union kbase_ioctl_mem_alloc_ex)
550
+
551
+/**
552
+ * union kbase_ioctl_read_user_page - Read a register of USER page
553
+ *
554
+ * @in: Input parameters.
555
+ * @in.offset: Register offset in USER page.
556
+ * @in.padding: Padding to round up to a multiple of 8 bytes, must be zero.
557
+ * @out: Output parameters.
558
+ * @out.val_lo: Value of 32bit register or the 1st half of 64bit register to be read.
559
+ * @out.val_hi: Value of the 2nd half of 64bit register to be read.
560
+ */
561
+union kbase_ioctl_read_user_page {
562
+ struct {
563
+ __u32 offset;
564
+ __u32 padding;
565
+ } in;
566
+ struct {
567
+ __u32 val_lo;
568
+ __u32 val_hi;
569
+ } out;
570
+};
571
+
572
+#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page)
573
+
393574 /***************
394575 * test ioctls *
395576 ***************/