.. | .. |
---|
22 | 22 | */ |
---|
23 | 23 | |
---|
24 | 24 | #include "kfd_mqd_manager.h" |
---|
| 25 | +#include "amdgpu_amdkfd.h" |
---|
| 26 | +#include "kfd_device_queue_manager.h" |
---|
25 | 27 | |
---|
26 | | -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, |
---|
27 | | - struct kfd_dev *dev) |
---|
| 28 | +/* Mapping queue priority to pipe priority, indexed by queue priority */ |
---|
| 29 | +int pipe_priority_map[] = { |
---|
| 30 | + KFD_PIPE_PRIORITY_CS_LOW, |
---|
| 31 | + KFD_PIPE_PRIORITY_CS_LOW, |
---|
| 32 | + KFD_PIPE_PRIORITY_CS_LOW, |
---|
| 33 | + KFD_PIPE_PRIORITY_CS_LOW, |
---|
| 34 | + KFD_PIPE_PRIORITY_CS_LOW, |
---|
| 35 | + KFD_PIPE_PRIORITY_CS_LOW, |
---|
| 36 | + KFD_PIPE_PRIORITY_CS_LOW, |
---|
| 37 | + KFD_PIPE_PRIORITY_CS_MEDIUM, |
---|
| 38 | + KFD_PIPE_PRIORITY_CS_MEDIUM, |
---|
| 39 | + KFD_PIPE_PRIORITY_CS_MEDIUM, |
---|
| 40 | + KFD_PIPE_PRIORITY_CS_MEDIUM, |
---|
| 41 | + KFD_PIPE_PRIORITY_CS_HIGH, |
---|
| 42 | + KFD_PIPE_PRIORITY_CS_HIGH, |
---|
| 43 | + KFD_PIPE_PRIORITY_CS_HIGH, |
---|
| 44 | + KFD_PIPE_PRIORITY_CS_HIGH, |
---|
| 45 | + KFD_PIPE_PRIORITY_CS_HIGH |
---|
| 46 | +}; |
---|
| 47 | + |
---|
| 48 | +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev, struct queue_properties *q) |
---|
28 | 49 | { |
---|
29 | | - switch (dev->device_info->asic_family) { |
---|
30 | | - case CHIP_KAVERI: |
---|
31 | | - return mqd_manager_init_cik(type, dev); |
---|
32 | | - case CHIP_HAWAII: |
---|
33 | | - return mqd_manager_init_cik_hawaii(type, dev); |
---|
34 | | - case CHIP_CARRIZO: |
---|
35 | | - return mqd_manager_init_vi(type, dev); |
---|
36 | | - case CHIP_TONGA: |
---|
37 | | - case CHIP_FIJI: |
---|
38 | | - case CHIP_POLARIS10: |
---|
39 | | - case CHIP_POLARIS11: |
---|
40 | | - return mqd_manager_init_vi_tonga(type, dev); |
---|
41 | | - case CHIP_VEGA10: |
---|
42 | | - case CHIP_RAVEN: |
---|
43 | | - return mqd_manager_init_v9(type, dev); |
---|
44 | | - default: |
---|
45 | | - WARN(1, "Unexpected ASIC family %u", |
---|
46 | | - dev->device_info->asic_family); |
---|
47 | | - } |
---|
| 50 | + struct kfd_mem_obj *mqd_mem_obj = NULL; |
---|
48 | 51 | |
---|
49 | | - return NULL; |
---|
| 52 | + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); |
---|
| 53 | + if (!mqd_mem_obj) |
---|
| 54 | + return NULL; |
---|
| 55 | + |
---|
| 56 | + mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem; |
---|
| 57 | + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr; |
---|
| 58 | + mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr; |
---|
| 59 | + |
---|
| 60 | + return mqd_mem_obj; |
---|
| 61 | +} |
---|
| 62 | + |
---|
| 63 | +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, |
---|
| 64 | + struct queue_properties *q) |
---|
| 65 | +{ |
---|
| 66 | + struct kfd_mem_obj *mqd_mem_obj = NULL; |
---|
| 67 | + uint64_t offset; |
---|
| 68 | + |
---|
| 69 | + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); |
---|
| 70 | + if (!mqd_mem_obj) |
---|
| 71 | + return NULL; |
---|
| 72 | + |
---|
| 73 | + offset = (q->sdma_engine_id * |
---|
| 74 | + dev->device_info->num_sdma_queues_per_engine + |
---|
| 75 | + q->sdma_queue_id) * |
---|
| 76 | + dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; |
---|
| 77 | + |
---|
| 78 | + offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; |
---|
| 79 | + |
---|
| 80 | + mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem |
---|
| 81 | + + offset); |
---|
| 82 | + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset; |
---|
| 83 | + mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t) |
---|
| 84 | + dev->dqm->hiq_sdma_mqd.cpu_ptr + offset); |
---|
| 85 | + |
---|
| 86 | + return mqd_mem_obj; |
---|
| 87 | +} |
---|
| 88 | + |
---|
| 89 | +void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, |
---|
| 90 | + struct kfd_mem_obj *mqd_mem_obj) |
---|
| 91 | +{ |
---|
| 92 | + WARN_ON(!mqd_mem_obj->gtt_mem); |
---|
| 93 | + kfree(mqd_mem_obj); |
---|
50 | 94 | } |
---|
51 | 95 | |
---|
52 | 96 | void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, |
---|
.. | .. |
---|
54 | 98 | uint32_t *se_mask) |
---|
55 | 99 | { |
---|
56 | 100 | struct kfd_cu_info cu_info; |
---|
57 | | - uint32_t cu_per_sh[4] = {0}; |
---|
58 | | - int i, se, cu = 0; |
---|
59 | | - |
---|
60 | | - mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); |
---|
| 101 | + uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; |
---|
| 102 | + int i, se, sh, cu; |
---|
| 103 | + amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); |
---|
61 | 104 | |
---|
62 | 105 | if (cu_mask_count > cu_info.cu_active_number) |
---|
63 | 106 | cu_mask_count = cu_info.cu_active_number; |
---|
64 | 107 | |
---|
65 | | - for (se = 0; se < cu_info.num_shader_engines; se++) |
---|
66 | | - for (i = 0; i < 4; i++) |
---|
67 | | - cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]); |
---|
68 | | - |
---|
69 | | - /* Symmetrically map cu_mask to all SEs: |
---|
70 | | - * cu_mask[0] bit0 -> se_mask[0] bit0; |
---|
71 | | - * cu_mask[0] bit1 -> se_mask[1] bit0; |
---|
72 | | - * ... (if # SE is 4) |
---|
73 | | - * cu_mask[0] bit4 -> se_mask[0] bit1; |
---|
74 | | - * ... |
---|
| 108 | + /* Exceeding these bounds corrupts the stack and indicates a coding error. |
---|
| 109 | + * Returning with no CU's enabled will hang the queue, which should be |
---|
| 110 | + * attention grabbing. |
---|
75 | 111 | */ |
---|
76 | | - se = 0; |
---|
77 | | - for (i = 0; i < cu_mask_count; i++) { |
---|
78 | | - if (cu_mask[i / 32] & (1 << (i % 32))) |
---|
79 | | - se_mask[se] |= 1 << cu; |
---|
| 112 | + if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) { |
---|
| 113 | + pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines); |
---|
| 114 | + return; |
---|
| 115 | + } |
---|
| 116 | + if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) { |
---|
| 117 | + pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n", |
---|
| 118 | + cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines); |
---|
| 119 | + return; |
---|
| 120 | + } |
---|
| 121 | + /* Count active CUs per SH. |
---|
| 122 | + * |
---|
| 123 | + * Some CUs in an SH may be disabled. HW expects disabled CUs to be |
---|
| 124 | + * represented in the high bits of each SH's enable mask (the upper and lower |
---|
| 125 | + * 16 bits of se_mask) and will take care of the actual distribution of |
---|
| 126 | + * disabled CUs within each SH automatically. |
---|
| 127 | + * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1. |
---|
| 128 | + * |
---|
| 129 | + * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info. |
---|
| 130 | + */ |
---|
| 131 | + for (se = 0; se < cu_info.num_shader_engines; se++) |
---|
| 132 | + for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) |
---|
| 133 | + cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); |
---|
80 | 134 | |
---|
81 | | - do { |
---|
82 | | - se++; |
---|
83 | | - if (se == cu_info.num_shader_engines) { |
---|
84 | | - se = 0; |
---|
85 | | - cu++; |
---|
| 135 | + /* Symmetrically map cu_mask to all SEs & SHs: |
---|
| 136 | + * se_mask programs up to 2 SH in the upper and lower 16 bits. |
---|
| 137 | + * |
---|
| 138 | + * Examples |
---|
| 139 | + * Assuming 1 SH/SE, 4 SEs: |
---|
| 140 | + * cu_mask[0] bit0 -> se_mask[0] bit0 |
---|
| 141 | + * cu_mask[0] bit1 -> se_mask[1] bit0 |
---|
| 142 | + * ... |
---|
| 143 | + * cu_mask[0] bit4 -> se_mask[0] bit1 |
---|
| 144 | + * ... |
---|
| 145 | + * |
---|
| 146 | + * Assuming 2 SH/SE, 4 SEs |
---|
| 147 | + * cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0) |
---|
| 148 | + * cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0) |
---|
| 149 | + * ... |
---|
| 150 | + * cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0) |
---|
| 151 | + * cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0) |
---|
| 152 | + * ... |
---|
| 153 | + * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1) |
---|
| 154 | + * ... |
---|
| 155 | + * |
---|
| 156 | + * First ensure all CUs are disabled, then enable user specified CUs. |
---|
| 157 | + */ |
---|
| 158 | + for (i = 0; i < cu_info.num_shader_engines; i++) |
---|
| 159 | + se_mask[i] = 0; |
---|
| 160 | + |
---|
| 161 | + i = 0; |
---|
| 162 | + for (cu = 0; cu < 16; cu++) { |
---|
| 163 | + for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { |
---|
| 164 | + for (se = 0; se < cu_info.num_shader_engines; se++) { |
---|
| 165 | + if (cu_per_sh[se][sh] > cu) { |
---|
| 166 | + if (cu_mask[i / 32] & (1 << (i % 32))) |
---|
| 167 | + se_mask[se] |= 1 << (cu + sh * 16); |
---|
| 168 | + i++; |
---|
| 169 | + if (i == cu_mask_count) |
---|
| 170 | + return; |
---|
| 171 | + } |
---|
86 | 172 | } |
---|
87 | | - } while (cu >= cu_per_sh[se] && cu < 32); |
---|
| 173 | + } |
---|
88 | 174 | } |
---|
89 | 175 | } |
---|