hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
....@@ -22,31 +22,75 @@
2222 */
2323
2424 #include "kfd_mqd_manager.h"
25
+#include "amdgpu_amdkfd.h"
26
+#include "kfd_device_queue_manager.h"
2527
26
-struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
27
- struct kfd_dev *dev)
28
+/* Mapping queue priority to pipe priority, indexed by queue priority */
29
+int pipe_priority_map[] = {
30
+ KFD_PIPE_PRIORITY_CS_LOW,
31
+ KFD_PIPE_PRIORITY_CS_LOW,
32
+ KFD_PIPE_PRIORITY_CS_LOW,
33
+ KFD_PIPE_PRIORITY_CS_LOW,
34
+ KFD_PIPE_PRIORITY_CS_LOW,
35
+ KFD_PIPE_PRIORITY_CS_LOW,
36
+ KFD_PIPE_PRIORITY_CS_LOW,
37
+ KFD_PIPE_PRIORITY_CS_MEDIUM,
38
+ KFD_PIPE_PRIORITY_CS_MEDIUM,
39
+ KFD_PIPE_PRIORITY_CS_MEDIUM,
40
+ KFD_PIPE_PRIORITY_CS_MEDIUM,
41
+ KFD_PIPE_PRIORITY_CS_HIGH,
42
+ KFD_PIPE_PRIORITY_CS_HIGH,
43
+ KFD_PIPE_PRIORITY_CS_HIGH,
44
+ KFD_PIPE_PRIORITY_CS_HIGH,
45
+ KFD_PIPE_PRIORITY_CS_HIGH
46
+};
47
+
48
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev, struct queue_properties *q)
2849 {
29
- switch (dev->device_info->asic_family) {
30
- case CHIP_KAVERI:
31
- return mqd_manager_init_cik(type, dev);
32
- case CHIP_HAWAII:
33
- return mqd_manager_init_cik_hawaii(type, dev);
34
- case CHIP_CARRIZO:
35
- return mqd_manager_init_vi(type, dev);
36
- case CHIP_TONGA:
37
- case CHIP_FIJI:
38
- case CHIP_POLARIS10:
39
- case CHIP_POLARIS11:
40
- return mqd_manager_init_vi_tonga(type, dev);
41
- case CHIP_VEGA10:
42
- case CHIP_RAVEN:
43
- return mqd_manager_init_v9(type, dev);
44
- default:
45
- WARN(1, "Unexpected ASIC family %u",
46
- dev->device_info->asic_family);
47
- }
50
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
4851
49
- return NULL;
52
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
53
+ if (!mqd_mem_obj)
54
+ return NULL;
55
+
56
+ mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
57
+ mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
58
+ mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
59
+
60
+ return mqd_mem_obj;
61
+}
62
+
63
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
64
+ struct queue_properties *q)
65
+{
66
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
67
+ uint64_t offset;
68
+
69
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
70
+ if (!mqd_mem_obj)
71
+ return NULL;
72
+
73
+ offset = (q->sdma_engine_id *
74
+ dev->device_info->num_sdma_queues_per_engine +
75
+ q->sdma_queue_id) *
76
+ dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
77
+
78
+ offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
79
+
80
+ mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
81
+ + offset);
82
+ mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
83
+ mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
84
+ dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
85
+
86
+ return mqd_mem_obj;
87
+}
88
+
89
+void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
90
+ struct kfd_mem_obj *mqd_mem_obj)
91
+{
92
+ WARN_ON(!mqd_mem_obj->gtt_mem);
93
+ kfree(mqd_mem_obj);
5094 }
5195
5296 void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
....@@ -54,36 +98,78 @@
5498 uint32_t *se_mask)
5599 {
56100 struct kfd_cu_info cu_info;
57
- uint32_t cu_per_sh[4] = {0};
58
- int i, se, cu = 0;
59
-
60
- mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);
101
+ uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
102
+ int i, se, sh, cu;
103
+ amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
61104
62105 if (cu_mask_count > cu_info.cu_active_number)
63106 cu_mask_count = cu_info.cu_active_number;
64107
65
- for (se = 0; se < cu_info.num_shader_engines; se++)
66
- for (i = 0; i < 4; i++)
67
- cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]);
68
-
69
- /* Symmetrically map cu_mask to all SEs:
70
- * cu_mask[0] bit0 -> se_mask[0] bit0;
71
- * cu_mask[0] bit1 -> se_mask[1] bit0;
72
- * ... (if # SE is 4)
73
- * cu_mask[0] bit4 -> se_mask[0] bit1;
74
- * ...
108
+ /* Exceeding these bounds corrupts the stack and indicates a coding error.
109
+ * Returning with no CU's enabled will hang the queue, which should be
110
+ * attention grabbing.
75111 */
76
- se = 0;
77
- for (i = 0; i < cu_mask_count; i++) {
78
- if (cu_mask[i / 32] & (1 << (i % 32)))
79
- se_mask[se] |= 1 << cu;
112
+ if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
113
+ pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
114
+ return;
115
+ }
116
+ if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
117
+ pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
118
+ cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
119
+ return;
120
+ }
121
+ /* Count active CUs per SH.
122
+ *
123
+ * Some CUs in an SH may be disabled. HW expects disabled CUs to be
124
+ * represented in the high bits of each SH's enable mask (the upper and lower
125
+ * 16 bits of se_mask) and will take care of the actual distribution of
126
+ * disabled CUs within each SH automatically.
127
+ * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
128
+ *
129
+ * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
130
+ */
131
+ for (se = 0; se < cu_info.num_shader_engines; se++)
132
+ for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
133
+ cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
80134
81
- do {
82
- se++;
83
- if (se == cu_info.num_shader_engines) {
84
- se = 0;
85
- cu++;
135
+ /* Symmetrically map cu_mask to all SEs & SHs:
136
+ * se_mask programs up to 2 SH in the upper and lower 16 bits.
137
+ *
138
+ * Examples
139
+ * Assuming 1 SH/SE, 4 SEs:
140
+ * cu_mask[0] bit0 -> se_mask[0] bit0
141
+ * cu_mask[0] bit1 -> se_mask[1] bit0
142
+ * ...
143
+ * cu_mask[0] bit4 -> se_mask[0] bit1
144
+ * ...
145
+ *
146
+ * Assuming 2 SH/SE, 4 SEs
147
+ * cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0)
148
+ * cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0)
149
+ * ...
150
+ * cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0)
151
+ * cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0)
152
+ * ...
153
+ * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
154
+ * ...
155
+ *
156
+ * First ensure all CUs are disabled, then enable user specified CUs.
157
+ */
158
+ for (i = 0; i < cu_info.num_shader_engines; i++)
159
+ se_mask[i] = 0;
160
+
161
+ i = 0;
162
+ for (cu = 0; cu < 16; cu++) {
163
+ for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
164
+ for (se = 0; se < cu_info.num_shader_engines; se++) {
165
+ if (cu_per_sh[se][sh] > cu) {
166
+ if (cu_mask[i / 32] & (1 << (i % 32)))
167
+ se_mask[se] |= 1 << (cu + sh * 16);
168
+ i++;
169
+ if (i == cu_mask_count)
170
+ return;
171
+ }
86172 }
87
- } while (cu >= cu_per_sh[se] && cu < 32);
173
+ }
88174 }
89175 }