.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
---|
2 | 2 | /* |
---|
3 | 3 | * |
---|
4 | | - * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. |
---|
| 4 | + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. |
---|
5 | 5 | * |
---|
6 | 6 | * This program is free software and is provided to you under the terms of the |
---|
7 | 7 | * GNU General Public License version 2 as published by the Free Software |
---|
.. | .. |
---|
27 | 27 | #include <linux/export.h> |
---|
28 | 28 | #include <linux/priority_control_manager.h> |
---|
29 | 29 | #include <linux/shmem_fs.h> |
---|
30 | | -#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h> |
---|
| 30 | +#include <csf/mali_kbase_csf_registers.h> |
---|
31 | 31 | #include "mali_kbase_csf_tiler_heap.h" |
---|
32 | 32 | #include <mmu/mali_kbase_mmu.h> |
---|
33 | 33 | #include "mali_kbase_csf_timeout.h" |
---|
34 | 34 | #include <csf/ipa_control/mali_kbase_csf_ipa_control.h> |
---|
| 35 | +#include <mali_kbase_hwaccess_time.h> |
---|
| 36 | +#include "mali_kbase_csf_event.h" |
---|
| 37 | +#include <tl/mali_kbase_tracepoints.h> |
---|
| 38 | +#include "mali_kbase_csf_mcu_shared_reg.h" |
---|
35 | 39 | |
---|
36 | 40 | #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) |
---|
37 | 41 | #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) |
---|
38 | | -#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) |
---|
39 | 42 | |
---|
40 | | -/** |
---|
41 | | - * struct kbase_csf_event - CSF event callback. |
---|
42 | | - * |
---|
43 | | - * This structure belongs to the list of events which is part of a Kbase |
---|
44 | | - * context, and describes a callback function with a custom parameter to pass |
---|
45 | | - * to it when a CSF event is signalled. |
---|
46 | | - * |
---|
47 | | - * @link: Link to the rest of the list. |
---|
48 | | - * @kctx: Pointer to the Kbase context this event belongs to. |
---|
49 | | - * @callback: Callback function to call when a CSF event is signalled. |
---|
50 | | - * @param: Parameter to pass to the callback function. |
---|
51 | | - */ |
---|
52 | | -struct kbase_csf_event { |
---|
53 | | - struct list_head link; |
---|
54 | | - struct kbase_context *kctx; |
---|
55 | | - kbase_csf_event_callback *callback; |
---|
56 | | - void *param; |
---|
57 | | -}; |
---|
| 43 | +#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */ |
---|
| 44 | +#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096) |
---|
| 45 | + |
---|
| 46 | +#define PROTM_ALLOC_MAX_RETRIES ((u8)5) |
---|
58 | 47 | |
---|
59 | 48 | const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { |
---|
60 | 49 | KBASE_QUEUE_GROUP_PRIORITY_HIGH, |
---|
.. | .. |
---|
68 | 57 | BASE_QUEUE_GROUP_PRIORITY_MEDIUM, |
---|
69 | 58 | BASE_QUEUE_GROUP_PRIORITY_LOW |
---|
70 | 59 | }; |
---|
| 60 | + |
---|
| 61 | +/* |
---|
| 62 | + * struct irq_idle_and_protm_track - Object that tracks the idle and protected mode |
---|
| 63 | + * request information in an interrupt case across |
---|
| 64 | + * groups. |
---|
| 65 | + * |
---|
| 66 | + * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt. |
---|
| 67 | + * If NULL, no such case observed in the tracked interrupt case. |
---|
| 68 | + * @idle_seq: The highest priority group that notified idle. If no such instance in the |
---|
| 69 | + * interrupt case, marked with the largest field value: U32_MAX. |
---|
| 70 | + * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case. |
---|
| 71 | + */ |
---|
| 72 | +struct irq_idle_and_protm_track { |
---|
| 73 | + struct kbase_queue_group *protm_grp; |
---|
| 74 | + u32 idle_seq; |
---|
| 75 | + s8 idle_slot; |
---|
| 76 | +}; |
---|
| 77 | + |
---|
| 78 | +/** |
---|
| 79 | + * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page. |
---|
| 80 | + * |
---|
| 81 | + * @kctx: Pointer to the kbase context |
---|
| 82 | + */ |
---|
| 83 | +static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx) |
---|
| 84 | +{ |
---|
| 85 | + struct kbase_device *kbdev = kctx->kbdev; |
---|
| 86 | + |
---|
| 87 | + if (unlikely(kctx->csf.user_reg.vma)) |
---|
| 88 | + dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d", |
---|
| 89 | + kctx->tgid, kctx->id); |
---|
| 90 | + if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link))) |
---|
| 91 | + list_del_init(&kctx->csf.user_reg.link); |
---|
| 92 | +} |
---|
| 93 | + |
---|
| 94 | +/** |
---|
| 95 | + * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page. |
---|
| 96 | + * |
---|
| 97 | + * @kctx: Pointer to the kbase context |
---|
| 98 | + * |
---|
| 99 | + * @return: 0 on success. |
---|
| 100 | + */ |
---|
| 101 | +static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx) |
---|
| 102 | +{ |
---|
| 103 | + INIT_LIST_HEAD(&kctx->csf.user_reg.link); |
---|
| 104 | + kctx->csf.user_reg.vma = NULL; |
---|
| 105 | + kctx->csf.user_reg.file_offset = 0; |
---|
| 106 | + |
---|
| 107 | + return 0; |
---|
| 108 | +} |
---|
71 | 109 | |
---|
72 | 110 | static void put_user_pages_mmap_handle(struct kbase_context *kctx, |
---|
73 | 111 | struct kbase_queue *queue) |
---|
.. | .. |
---|
129 | 167 | return 0; |
---|
130 | 168 | } |
---|
131 | 169 | |
---|
132 | | -static void gpu_munmap_user_io_pages(struct kbase_context *kctx, |
---|
133 | | - struct kbase_va_region *reg) |
---|
134 | | -{ |
---|
135 | | - size_t num_pages = 2; |
---|
136 | | - |
---|
137 | | - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, |
---|
138 | | - reg->start_pfn, num_pages, MCU_AS_NR); |
---|
139 | | - |
---|
140 | | - WARN_ON(reg->flags & KBASE_REG_FREE); |
---|
141 | | - |
---|
142 | | - mutex_lock(&kctx->kbdev->csf.reg_lock); |
---|
143 | | - kbase_remove_va_region(reg); |
---|
144 | | - mutex_unlock(&kctx->kbdev->csf.reg_lock); |
---|
145 | | -} |
---|
146 | | - |
---|
147 | 170 | static void init_user_io_pages(struct kbase_queue *queue) |
---|
148 | 171 | { |
---|
149 | 172 | u32 *input_addr = (u32 *)(queue->user_io_addr); |
---|
.. | .. |
---|
161 | 184 | output_addr[CS_ACTIVE/4] = 0; |
---|
162 | 185 | } |
---|
163 | 186 | |
---|
164 | | -/* Map the input/output pages in the shared interface segment of MCU firmware |
---|
165 | | - * address space. |
---|
166 | | - */ |
---|
167 | | -static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, |
---|
168 | | - struct tagged_addr *phys, struct kbase_va_region *reg) |
---|
169 | | -{ |
---|
170 | | - unsigned long mem_flags = KBASE_REG_GPU_RD; |
---|
171 | | - const size_t num_pages = 2; |
---|
172 | | - int ret; |
---|
173 | | - |
---|
174 | | -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ |
---|
175 | | - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ |
---|
176 | | - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) |
---|
177 | | - mem_flags |= |
---|
178 | | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); |
---|
179 | | -#else |
---|
180 | | - if (kbdev->system_coherency == COHERENCY_NONE) { |
---|
181 | | - mem_flags |= |
---|
182 | | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); |
---|
183 | | - } else { |
---|
184 | | - mem_flags |= KBASE_REG_SHARE_BOTH | |
---|
185 | | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); |
---|
186 | | - } |
---|
187 | | -#endif |
---|
188 | | - |
---|
189 | | - mutex_lock(&kbdev->csf.reg_lock); |
---|
190 | | - ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1); |
---|
191 | | - reg->flags &= ~KBASE_REG_FREE; |
---|
192 | | - mutex_unlock(&kbdev->csf.reg_lock); |
---|
193 | | - |
---|
194 | | - if (ret) |
---|
195 | | - return ret; |
---|
196 | | - |
---|
197 | | - /* Map input page */ |
---|
198 | | - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, |
---|
199 | | - reg->start_pfn, &phys[0], |
---|
200 | | - 1, mem_flags, MCU_AS_NR, |
---|
201 | | - KBASE_MEM_GROUP_CSF_IO); |
---|
202 | | - if (ret) |
---|
203 | | - goto bad_insert; |
---|
204 | | - |
---|
205 | | - /* Map output page, it needs rw access */ |
---|
206 | | - mem_flags |= KBASE_REG_GPU_WR; |
---|
207 | | - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, |
---|
208 | | - reg->start_pfn + 1, &phys[1], |
---|
209 | | - 1, mem_flags, MCU_AS_NR, |
---|
210 | | - KBASE_MEM_GROUP_CSF_IO); |
---|
211 | | - if (ret) |
---|
212 | | - goto bad_insert_output_page; |
---|
213 | | - |
---|
214 | | - return 0; |
---|
215 | | - |
---|
216 | | -bad_insert_output_page: |
---|
217 | | - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, |
---|
218 | | - reg->start_pfn, 1, MCU_AS_NR); |
---|
219 | | -bad_insert: |
---|
220 | | - mutex_lock(&kbdev->csf.reg_lock); |
---|
221 | | - kbase_remove_va_region(reg); |
---|
222 | | - mutex_unlock(&kbdev->csf.reg_lock); |
---|
223 | | - |
---|
224 | | - return ret; |
---|
225 | | -} |
---|
226 | | - |
---|
227 | 187 | static void kernel_unmap_user_io_pages(struct kbase_context *kctx, |
---|
228 | 188 | struct kbase_queue *queue) |
---|
229 | 189 | { |
---|
230 | | - const size_t num_pages = 2; |
---|
231 | | - |
---|
232 | 190 | kbase_gpu_vm_lock(kctx); |
---|
233 | 191 | |
---|
234 | 192 | vunmap(queue->user_io_addr); |
---|
235 | 193 | |
---|
236 | | - WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages)); |
---|
237 | | - atomic_sub(num_pages, &kctx->permanent_mapped_pages); |
---|
| 194 | + WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES); |
---|
| 195 | + atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages); |
---|
238 | 196 | |
---|
239 | 197 | kbase_gpu_vm_unlock(kctx); |
---|
240 | 198 | } |
---|
.. | .. |
---|
244 | 202 | { |
---|
245 | 203 | struct page *page_list[2]; |
---|
246 | 204 | pgprot_t cpu_map_prot; |
---|
| 205 | + unsigned long flags; |
---|
| 206 | + char *user_io_addr; |
---|
247 | 207 | int ret = 0; |
---|
248 | 208 | size_t i; |
---|
249 | 209 | |
---|
.. | .. |
---|
258 | 218 | /* The pages are mapped to Userspace also, so use the same mapping |
---|
259 | 219 | * attributes as used inside the CPU page fault handler. |
---|
260 | 220 | */ |
---|
261 | | -#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ |
---|
262 | | - ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ |
---|
263 | | - (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) |
---|
264 | | - cpu_map_prot = pgprot_device(PAGE_KERNEL); |
---|
265 | | -#else |
---|
266 | 221 | if (kctx->kbdev->system_coherency == COHERENCY_NONE) |
---|
267 | 222 | cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); |
---|
268 | 223 | else |
---|
269 | 224 | cpu_map_prot = PAGE_KERNEL; |
---|
270 | | -#endif |
---|
271 | 225 | |
---|
272 | 226 | for (i = 0; i < ARRAY_SIZE(page_list); i++) |
---|
273 | 227 | page_list[i] = as_page(queue->phys[i]); |
---|
274 | 228 | |
---|
275 | | - queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); |
---|
| 229 | + user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); |
---|
276 | 230 | |
---|
277 | | - if (!queue->user_io_addr) |
---|
| 231 | + if (!user_io_addr) { |
---|
| 232 | + dev_err(kctx->kbdev->dev, |
---|
| 233 | + "%s(): user_io_addr is NULL, queue: %p", |
---|
| 234 | + __func__, |
---|
| 235 | + queue); |
---|
278 | 236 | ret = -ENOMEM; |
---|
279 | | - else |
---|
| 237 | + } else { |
---|
280 | 238 | atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages); |
---|
| 239 | + } |
---|
| 240 | + |
---|
| 241 | + kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); |
---|
| 242 | + queue->user_io_addr = user_io_addr; |
---|
| 243 | + kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); |
---|
281 | 244 | |
---|
282 | 245 | unlock: |
---|
283 | 246 | kbase_gpu_vm_unlock(kctx); |
---|
.. | .. |
---|
310 | 273 | * If an explicit or implicit unbind was missed by the userspace then the |
---|
311 | 274 | * mapping will persist. On process exit kernel itself will remove the mapping. |
---|
312 | 275 | */ |
---|
313 | | -static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, |
---|
314 | | - struct kbase_queue *queue) |
---|
| 276 | +void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) |
---|
315 | 277 | { |
---|
316 | | - const size_t num_pages = 2; |
---|
317 | | - |
---|
318 | | - gpu_munmap_user_io_pages(kctx, queue->reg); |
---|
319 | 278 | kernel_unmap_user_io_pages(kctx, queue); |
---|
320 | 279 | |
---|
321 | 280 | kbase_mem_pool_free_pages( |
---|
322 | 281 | &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], |
---|
323 | | - num_pages, queue->phys, true, false); |
---|
| 282 | + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false); |
---|
| 283 | + kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES); |
---|
324 | 284 | |
---|
325 | | - kfree(queue->reg); |
---|
326 | | - queue->reg = NULL; |
---|
| 285 | + /* The user_io_gpu_va should have been unmapped inside the scheduler */ |
---|
| 286 | + WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping"); |
---|
327 | 287 | |
---|
328 | 288 | /* If the queue has already been terminated by userspace |
---|
329 | 289 | * then the ref count for queue object will drop to 0 here. |
---|
330 | 290 | */ |
---|
331 | 291 | release_queue(queue); |
---|
332 | 292 | } |
---|
| 293 | +KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages); |
---|
333 | 294 | |
---|
334 | | -int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, |
---|
335 | | - struct kbase_queue *queue) |
---|
| 295 | +int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue) |
---|
336 | 296 | { |
---|
337 | 297 | struct kbase_device *kbdev = kctx->kbdev; |
---|
338 | | - struct kbase_va_region *reg; |
---|
339 | | - const size_t num_pages = 2; |
---|
340 | 298 | int ret; |
---|
341 | 299 | |
---|
342 | 300 | lockdep_assert_held(&kctx->csf.lock); |
---|
343 | 301 | |
---|
344 | | - reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, |
---|
345 | | - num_pages, KBASE_REG_ZONE_MCU_SHARED); |
---|
346 | | - if (!reg) |
---|
| 302 | + ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], |
---|
| 303 | + KBASEP_NUM_CS_USER_IO_PAGES, |
---|
| 304 | + queue->phys, false, kctx->task); |
---|
| 305 | + if (ret != KBASEP_NUM_CS_USER_IO_PAGES) { |
---|
| 306 | + /* Marking both the phys to zero for indicating there is no phys allocated */ |
---|
| 307 | + queue->phys[0].tagged_addr = 0; |
---|
| 308 | + queue->phys[1].tagged_addr = 0; |
---|
347 | 309 | return -ENOMEM; |
---|
348 | | - |
---|
349 | | - ret = kbase_mem_pool_alloc_pages( |
---|
350 | | - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], |
---|
351 | | - num_pages, queue->phys, false); |
---|
352 | | - |
---|
353 | | - if (ret != num_pages) |
---|
354 | | - goto phys_alloc_failed; |
---|
| 310 | + } |
---|
355 | 311 | |
---|
356 | 312 | ret = kernel_map_user_io_pages(kctx, queue); |
---|
357 | 313 | if (ret) |
---|
358 | 314 | goto kernel_map_failed; |
---|
359 | 315 | |
---|
| 316 | + kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES); |
---|
360 | 317 | init_user_io_pages(queue); |
---|
361 | 318 | |
---|
362 | | - ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg); |
---|
363 | | - if (ret) |
---|
364 | | - goto gpu_mmap_failed; |
---|
365 | | - |
---|
366 | | - queue->reg = reg; |
---|
| 319 | + /* user_io_gpu_va is only mapped when scheduler decides to put the queue |
---|
| 320 | + * on slot at runtime. Initialize it to 0, signalling no mapping. |
---|
| 321 | + */ |
---|
| 322 | + queue->user_io_gpu_va = 0; |
---|
367 | 323 | |
---|
368 | 324 | mutex_lock(&kbdev->csf.reg_lock); |
---|
369 | | - if (kbdev->csf.db_file_offsets > |
---|
370 | | - (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) |
---|
| 325 | + if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) |
---|
371 | 326 | kbdev->csf.db_file_offsets = 0; |
---|
372 | 327 | |
---|
373 | 328 | queue->db_file_offset = kbdev->csf.db_file_offsets; |
---|
374 | 329 | kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES; |
---|
375 | | - |
---|
376 | | - WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n"); |
---|
| 330 | + WARN(kbase_refcount_read(&queue->refcount) != 1, |
---|
| 331 | + "Incorrect refcounting for queue object\n"); |
---|
377 | 332 | /* This is the second reference taken on the queue object and |
---|
378 | 333 | * would be dropped only when the IO mapping is removed either |
---|
379 | 334 | * explicitly by userspace or implicitly by kernel on process exit. |
---|
.. | .. |
---|
384 | 339 | |
---|
385 | 340 | return 0; |
---|
386 | 341 | |
---|
387 | | -gpu_mmap_failed: |
---|
388 | | - kernel_unmap_user_io_pages(kctx, queue); |
---|
389 | | - |
---|
390 | 342 | kernel_map_failed: |
---|
391 | | - kbase_mem_pool_free_pages( |
---|
392 | | - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], |
---|
393 | | - num_pages, queue->phys, false, false); |
---|
| 343 | + kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], |
---|
| 344 | + KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false); |
---|
| 345 | + /* Marking both the phys to zero for indicating there is no phys allocated */ |
---|
| 346 | + queue->phys[0].tagged_addr = 0; |
---|
| 347 | + queue->phys[1].tagged_addr = 0; |
---|
394 | 348 | |
---|
395 | | -phys_alloc_failed: |
---|
396 | | - kfree(reg); |
---|
397 | | - |
---|
398 | | - return -ENOMEM; |
---|
| 349 | + return ret; |
---|
399 | 350 | } |
---|
| 351 | +KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages); |
---|
400 | 352 | |
---|
401 | 353 | static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, |
---|
402 | 354 | u8 group_handle) |
---|
.. | .. |
---|
413 | 365 | |
---|
414 | 366 | return NULL; |
---|
415 | 367 | } |
---|
| 368 | + |
---|
| 369 | +struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle) |
---|
| 370 | +{ |
---|
| 371 | + return find_queue_group(kctx, group_handle); |
---|
| 372 | +} |
---|
| 373 | +KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group); |
---|
416 | 374 | |
---|
417 | 375 | int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, |
---|
418 | 376 | u8 group_handle) |
---|
.. | .. |
---|
442 | 400 | |
---|
443 | 401 | static void get_queue(struct kbase_queue *queue) |
---|
444 | 402 | { |
---|
445 | | - WARN_ON(!atomic_inc_not_zero(&queue->refcount)); |
---|
| 403 | + WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount)); |
---|
446 | 404 | } |
---|
447 | 405 | |
---|
448 | 406 | static void release_queue(struct kbase_queue *queue) |
---|
449 | 407 | { |
---|
450 | 408 | lockdep_assert_held(&queue->kctx->csf.lock); |
---|
451 | | - |
---|
452 | | - WARN_ON(atomic_read(&queue->refcount) <= 0); |
---|
453 | | - |
---|
454 | | - if (atomic_dec_and_test(&queue->refcount)) { |
---|
| 409 | + if (kbase_refcount_dec_and_test(&queue->refcount)) { |
---|
455 | 410 | /* The queue can't still be on the per context list. */ |
---|
456 | 411 | WARN_ON(!list_empty(&queue->link)); |
---|
457 | 412 | WARN_ON(queue->group); |
---|
| 413 | + dev_dbg(queue->kctx->kbdev->dev, |
---|
| 414 | + "Remove any pending command queue fatal from ctx %d_%d", |
---|
| 415 | + queue->kctx->tgid, queue->kctx->id); |
---|
| 416 | + kbase_csf_event_remove_error(queue->kctx, &queue->error); |
---|
| 417 | + |
---|
| 418 | + /* After this the Userspace would be able to free the |
---|
| 419 | + * memory for GPU queue. In case the Userspace missed |
---|
| 420 | + * terminating the queue, the cleanup will happen on |
---|
| 421 | + * context termination where tear down of region tracker |
---|
| 422 | + * would free up the GPU queue memory. |
---|
| 423 | + */ |
---|
| 424 | + kbase_gpu_vm_lock(queue->kctx); |
---|
| 425 | + kbase_va_region_no_user_free_dec(queue->queue_reg); |
---|
| 426 | + kbase_gpu_vm_unlock(queue->kctx); |
---|
| 427 | + |
---|
458 | 428 | kfree(queue); |
---|
459 | 429 | } |
---|
460 | 430 | } |
---|
461 | 431 | |
---|
462 | 432 | static void oom_event_worker(struct work_struct *data); |
---|
463 | | -static void fatal_event_worker(struct work_struct *data); |
---|
| 433 | +static void cs_error_worker(struct work_struct *data); |
---|
464 | 434 | |
---|
465 | 435 | /* Between reg and reg_ex, one and only one must be null */ |
---|
466 | 436 | static int csf_queue_register_internal(struct kbase_context *kctx, |
---|
.. | .. |
---|
475 | 445 | |
---|
476 | 446 | /* Only one pointer expected, otherwise coding error */ |
---|
477 | 447 | if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) { |
---|
478 | | - dev_err(kctx->kbdev->dev, |
---|
| 448 | + dev_dbg(kctx->kbdev->dev, |
---|
479 | 449 | "Error, one and only one param-ptr expected!"); |
---|
480 | 450 | return -EINVAL; |
---|
481 | 451 | } |
---|
.. | .. |
---|
508 | 478 | region = kbase_region_tracker_find_region_enclosing_address(kctx, |
---|
509 | 479 | queue_addr); |
---|
510 | 480 | |
---|
511 | | - if (kbase_is_region_invalid_or_free(region)) { |
---|
| 481 | + if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) || |
---|
| 482 | + region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { |
---|
512 | 483 | ret = -ENOENT; |
---|
513 | 484 | goto out_unlock_vm; |
---|
514 | 485 | } |
---|
.. | .. |
---|
525 | 496 | if (reg_ex && reg_ex->ex_buffer_size) { |
---|
526 | 497 | int buf_pages = (reg_ex->ex_buffer_size + |
---|
527 | 498 | (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; |
---|
| 499 | + struct kbase_va_region *region_ex = |
---|
| 500 | + kbase_region_tracker_find_region_enclosing_address(kctx, |
---|
| 501 | + reg_ex->ex_buffer_base); |
---|
528 | 502 | |
---|
529 | | - region = kbase_region_tracker_find_region_enclosing_address( |
---|
530 | | - kctx, reg_ex->ex_buffer_base); |
---|
531 | | - if (kbase_is_region_invalid_or_free(region)) { |
---|
| 503 | + if (kbase_is_region_invalid_or_free(region_ex)) { |
---|
532 | 504 | ret = -ENOENT; |
---|
533 | 505 | goto out_unlock_vm; |
---|
534 | 506 | } |
---|
535 | 507 | |
---|
536 | | - if (buf_pages > (region->nr_pages - |
---|
537 | | - ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - |
---|
538 | | - region->start_pfn))) { |
---|
| 508 | + if (buf_pages > (region_ex->nr_pages - |
---|
| 509 | + ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) { |
---|
539 | 510 | ret = -EINVAL; |
---|
540 | 511 | goto out_unlock_vm; |
---|
541 | 512 | } |
---|
542 | 513 | |
---|
543 | | - region = kbase_region_tracker_find_region_enclosing_address( |
---|
544 | | - kctx, reg_ex->ex_offset_var_addr); |
---|
545 | | - if (kbase_is_region_invalid_or_free(region)) { |
---|
| 514 | + region_ex = kbase_region_tracker_find_region_enclosing_address( |
---|
| 515 | + kctx, reg_ex->ex_offset_var_addr); |
---|
| 516 | + if (kbase_is_region_invalid_or_free(region_ex)) { |
---|
546 | 517 | ret = -ENOENT; |
---|
547 | 518 | goto out_unlock_vm; |
---|
548 | 519 | } |
---|
.. | .. |
---|
557 | 528 | |
---|
558 | 529 | queue->kctx = kctx; |
---|
559 | 530 | queue->base_addr = queue_addr; |
---|
| 531 | + |
---|
560 | 532 | queue->queue_reg = region; |
---|
| 533 | + kbase_va_region_no_user_free_inc(region); |
---|
| 534 | + |
---|
561 | 535 | queue->size = (queue_size << PAGE_SHIFT); |
---|
562 | 536 | queue->csi_index = KBASEP_IF_NR_INVALID; |
---|
563 | 537 | queue->enabled = false; |
---|
564 | 538 | |
---|
565 | 539 | queue->priority = reg->priority; |
---|
566 | | - atomic_set(&queue->refcount, 1); |
---|
| 540 | + kbase_refcount_set(&queue->refcount, 1); |
---|
567 | 541 | |
---|
568 | 542 | queue->group = NULL; |
---|
569 | 543 | queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; |
---|
.. | .. |
---|
574 | 548 | queue->sync_ptr = 0; |
---|
575 | 549 | queue->sync_value = 0; |
---|
576 | 550 | |
---|
| 551 | +#if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
| 552 | + queue->saved_cmd_ptr = 0; |
---|
| 553 | +#endif |
---|
| 554 | + |
---|
577 | 555 | queue->sb_status = 0; |
---|
578 | 556 | queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; |
---|
| 557 | + |
---|
| 558 | + atomic_set(&queue->pending, 0); |
---|
579 | 559 | |
---|
580 | 560 | INIT_LIST_HEAD(&queue->link); |
---|
581 | 561 | INIT_LIST_HEAD(&queue->error.link); |
---|
582 | 562 | INIT_WORK(&queue->oom_event_work, oom_event_worker); |
---|
583 | | - INIT_WORK(&queue->fatal_event_work, fatal_event_worker); |
---|
| 563 | + INIT_WORK(&queue->cs_error_work, cs_error_worker); |
---|
584 | 564 | list_add(&queue->link, &kctx->csf.queue_list); |
---|
585 | 565 | |
---|
586 | | - region->flags |= KBASE_REG_NO_USER_FREE; |
---|
| 566 | + queue->extract_ofs = 0; |
---|
| 567 | + |
---|
| 568 | + region->user_data = queue; |
---|
587 | 569 | |
---|
588 | 570 | /* Initialize the cs_trace configuration parameters, When buffer_size |
---|
589 | 571 | * is 0, trace is disabled. Here we only update the fields when |
---|
.. | .. |
---|
612 | 594 | int kbase_csf_queue_register(struct kbase_context *kctx, |
---|
613 | 595 | struct kbase_ioctl_cs_queue_register *reg) |
---|
614 | 596 | { |
---|
| 597 | + /* Validate the ring buffer configuration parameters */ |
---|
| 598 | + if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || |
---|
| 599 | + reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || |
---|
| 600 | + reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || |
---|
| 601 | + reg->buffer_gpu_addr & ~PAGE_MASK) |
---|
| 602 | + return -EINVAL; |
---|
| 603 | + |
---|
615 | 604 | return csf_queue_register_internal(kctx, reg, NULL); |
---|
616 | 605 | } |
---|
617 | 606 | |
---|
.. | .. |
---|
630 | 619 | if (glb_version < kbase_csf_interface_version(1, 1, 0)) |
---|
631 | 620 | return -EINVAL; |
---|
632 | 621 | |
---|
633 | | - /* Validate the cs_trace configuration parameters */ |
---|
634 | | - if (reg->ex_buffer_size && |
---|
635 | | - ((reg->ex_event_size > max_size) || |
---|
636 | | - (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || |
---|
637 | | - (reg->ex_buffer_size < min_buf_size))) |
---|
638 | | - return -EINVAL; |
---|
| 622 | + /* Validate the ring buffer configuration parameters */ |
---|
| 623 | + if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE || |
---|
| 624 | + reg->buffer_size > CS_RING_BUFFER_MAX_SIZE || |
---|
| 625 | + reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr || |
---|
| 626 | + reg->buffer_gpu_addr & ~PAGE_MASK) |
---|
| 627 | + return -EINVAL; |
---|
639 | 628 | |
---|
640 | | - return csf_queue_register_internal(kctx, NULL, reg); |
---|
| 629 | + /* Validate the cs_trace configuration parameters */ |
---|
| 630 | + if (reg->ex_buffer_size && |
---|
| 631 | + ((reg->ex_event_size > max_size) || |
---|
| 632 | + (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || |
---|
| 633 | + (reg->ex_buffer_size < min_buf_size))) |
---|
| 634 | + return -EINVAL; |
---|
| 635 | + |
---|
| 636 | + return csf_queue_register_internal(kctx, NULL, reg); |
---|
641 | 637 | } |
---|
642 | 638 | |
---|
643 | 639 | static void unbind_queue(struct kbase_context *kctx, |
---|
.. | .. |
---|
664 | 660 | queue = find_queue(kctx, term->buffer_gpu_addr); |
---|
665 | 661 | |
---|
666 | 662 | if (queue) { |
---|
667 | | - unsigned long flags; |
---|
668 | | - |
---|
669 | 663 | /* As the GPU queue has been terminated by the |
---|
670 | 664 | * user space, undo the actions that were performed when the |
---|
671 | 665 | * queue was registered i.e. remove the queue from the per |
---|
.. | .. |
---|
678 | 672 | unbind_queue(kctx, queue); |
---|
679 | 673 | |
---|
680 | 674 | kbase_gpu_vm_lock(kctx); |
---|
681 | | - if (!WARN_ON(!queue->queue_reg)) { |
---|
682 | | - /* After this the Userspace would be able to free the |
---|
683 | | - * memory for GPU queue. In case the Userspace missed |
---|
684 | | - * terminating the queue, the cleanup will happen on |
---|
685 | | - * context termination where teardown of region tracker |
---|
686 | | - * would free up the GPU queue memory. |
---|
687 | | - */ |
---|
688 | | - queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
689 | | - } |
---|
| 675 | + if (!WARN_ON(!queue->queue_reg)) |
---|
| 676 | + queue->queue_reg->user_data = NULL; |
---|
690 | 677 | kbase_gpu_vm_unlock(kctx); |
---|
691 | | - |
---|
692 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
693 | | - dev_dbg(kctx->kbdev->dev, |
---|
694 | | - "Remove any pending command queue fatal from context %pK\n", |
---|
695 | | - (void *)kctx); |
---|
696 | | - list_del_init(&queue->error.link); |
---|
697 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
698 | 678 | |
---|
699 | 679 | release_queue(queue); |
---|
700 | 680 | } |
---|
.. | .. |
---|
776 | 756 | return group; |
---|
777 | 757 | } |
---|
778 | 758 | |
---|
| 759 | +static void enqueue_gpu_submission_work(struct kbase_context *const kctx) |
---|
| 760 | +{ |
---|
| 761 | + queue_work(system_highpri_wq, &kctx->csf.pending_submission_work); |
---|
| 762 | +} |
---|
| 763 | + |
---|
| 764 | +/** |
---|
| 765 | + * pending_submission_worker() - Work item to process pending kicked GPU command queues. |
---|
| 766 | + * |
---|
| 767 | + * @work: Pointer to pending_submission_work. |
---|
| 768 | + * |
---|
| 769 | + * This function starts all pending queues, for which the work |
---|
| 770 | + * was previously submitted via ioctl call from application thread. |
---|
| 771 | + * If the queue is already scheduled and resident, it will be started |
---|
| 772 | + * right away, otherwise once the group is made resident. |
---|
| 773 | + */ |
---|
| 774 | +static void pending_submission_worker(struct work_struct *work) |
---|
| 775 | +{ |
---|
| 776 | + struct kbase_context *kctx = |
---|
| 777 | + container_of(work, struct kbase_context, csf.pending_submission_work); |
---|
| 778 | + struct kbase_device *kbdev = kctx->kbdev; |
---|
| 779 | + struct kbase_queue *queue; |
---|
| 780 | + int err = kbase_reset_gpu_prevent_and_wait(kbdev); |
---|
| 781 | + |
---|
| 782 | + if (err) { |
---|
| 783 | + dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue "); |
---|
| 784 | + return; |
---|
| 785 | + } |
---|
| 786 | + |
---|
| 787 | + mutex_lock(&kctx->csf.lock); |
---|
| 788 | + |
---|
| 789 | + /* Iterate through the queue list and schedule the pending ones for submission. */ |
---|
| 790 | + list_for_each_entry(queue, &kctx->csf.queue_list, link) { |
---|
| 791 | + if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) { |
---|
| 792 | + struct kbase_queue_group *group = get_bound_queue_group(queue); |
---|
| 793 | + int ret; |
---|
| 794 | + |
---|
| 795 | + if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) { |
---|
| 796 | + dev_dbg(kbdev->dev, "queue is not bound to a group"); |
---|
| 797 | + continue; |
---|
| 798 | + } |
---|
| 799 | + |
---|
| 800 | + ret = kbase_csf_scheduler_queue_start(queue); |
---|
| 801 | + if (unlikely(ret)) { |
---|
| 802 | + dev_dbg(kbdev->dev, "Failed to start queue"); |
---|
| 803 | + if (ret == -EBUSY) { |
---|
| 804 | + atomic_cmpxchg(&queue->pending, 0, 1); |
---|
| 805 | + enqueue_gpu_submission_work(kctx); |
---|
| 806 | + } |
---|
| 807 | + } |
---|
| 808 | + } |
---|
| 809 | + } |
---|
| 810 | + |
---|
| 811 | + mutex_unlock(&kctx->csf.lock); |
---|
| 812 | + |
---|
| 813 | + kbase_reset_gpu_allow(kbdev); |
---|
| 814 | +} |
---|
| 815 | + |
---|
779 | 816 | void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) |
---|
780 | 817 | { |
---|
781 | 818 | if (WARN_ON(slot < 0)) |
---|
782 | 819 | return; |
---|
| 820 | + |
---|
| 821 | + kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
783 | 822 | |
---|
784 | 823 | kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot)); |
---|
785 | 824 | } |
---|
.. | .. |
---|
793 | 832 | (u32) ((1U << kbdev->csf.global_iface.group_num) - 1); |
---|
794 | 833 | u32 value; |
---|
795 | 834 | |
---|
| 835 | + kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
| 836 | + |
---|
796 | 837 | if (WARN_ON(slot_bitmap > allowed_bitmap)) |
---|
797 | 838 | return; |
---|
| 839 | + |
---|
| 840 | + /* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and |
---|
| 841 | + * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request |
---|
| 842 | + * or 2 CSI requests overlap and FW ends up missing the 2nd request. |
---|
| 843 | + * Memory barrier is required, both on Host and FW side, to guarantee the ordering. |
---|
| 844 | + * |
---|
| 845 | + * 'osh' is used as CPU and GPU would be in the same Outer shareable domain. |
---|
| 846 | + */ |
---|
| 847 | + dmb(osh); |
---|
798 | 848 | |
---|
799 | 849 | value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); |
---|
800 | 850 | value ^= slot_bitmap; |
---|
.. | .. |
---|
822 | 872 | struct kbase_csf_cmd_stream_group_info *ginfo; |
---|
823 | 873 | u32 value; |
---|
824 | 874 | |
---|
| 875 | + kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
| 876 | + |
---|
825 | 877 | if (WARN_ON(csg_nr < 0) || |
---|
826 | 878 | WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) |
---|
827 | 879 | return; |
---|
.. | .. |
---|
831 | 883 | if (WARN_ON(csi_index < 0) || |
---|
832 | 884 | WARN_ON(csi_index >= ginfo->stream_num)) |
---|
833 | 885 | return; |
---|
| 886 | + |
---|
| 887 | + /* The access to CSG_DB_REQ/ACK needs to be ordered with respect to |
---|
| 888 | + * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to |
---|
| 889 | + * FW before CS_REQ/ACK is set. |
---|
| 890 | + * |
---|
| 891 | + * 'osh' is used as CPU and GPU would be in the same outer shareable domain. |
---|
| 892 | + */ |
---|
| 893 | + dmb(osh); |
---|
834 | 894 | |
---|
835 | 895 | value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); |
---|
836 | 896 | value ^= (1 << csi_index); |
---|
.. | .. |
---|
845 | 905 | struct kbase_ioctl_cs_queue_kick *kick) |
---|
846 | 906 | { |
---|
847 | 907 | struct kbase_device *kbdev = kctx->kbdev; |
---|
848 | | - struct kbase_queue_group *group; |
---|
849 | | - struct kbase_queue *queue; |
---|
| 908 | + bool trigger_submission = false; |
---|
| 909 | + struct kbase_va_region *region; |
---|
850 | 910 | int err = 0; |
---|
851 | 911 | |
---|
852 | | - err = kbase_reset_gpu_prevent_and_wait(kbdev); |
---|
853 | | - if (err) { |
---|
854 | | - dev_warn( |
---|
855 | | - kbdev->dev, |
---|
856 | | - "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)", |
---|
857 | | - kick->buffer_gpu_addr); |
---|
858 | | - return err; |
---|
859 | | - } |
---|
| 912 | + KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr); |
---|
860 | 913 | |
---|
861 | | - mutex_lock(&kctx->csf.lock); |
---|
862 | | - queue = find_queue(kctx, kick->buffer_gpu_addr); |
---|
863 | | - if (!queue) |
---|
864 | | - err = -EINVAL; |
---|
| 914 | + /* GPU work submission happening asynchronously to prevent the contention with |
---|
| 915 | + * scheduler lock and as the result blocking application thread. For this reason, |
---|
| 916 | + * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr |
---|
| 917 | + * from the context list of active va_regions. |
---|
| 918 | + * Once the target queue is found the pending flag is set to one atomically avoiding |
---|
| 919 | + * a race between submission ioctl thread and the work item. |
---|
| 920 | + */ |
---|
| 921 | + kbase_gpu_vm_lock(kctx); |
---|
| 922 | + region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr); |
---|
| 923 | + if (!kbase_is_region_invalid_or_free(region)) { |
---|
| 924 | + struct kbase_queue *queue = region->user_data; |
---|
865 | 925 | |
---|
866 | | - if (!err) { |
---|
867 | | - group = get_bound_queue_group(queue); |
---|
868 | | - if (!group) { |
---|
869 | | - dev_err(kctx->kbdev->dev, "queue not bound\n"); |
---|
870 | | - err = -EINVAL; |
---|
| 926 | + if (queue) { |
---|
| 927 | + atomic_cmpxchg(&queue->pending, 0, 1); |
---|
| 928 | + trigger_submission = true; |
---|
871 | 929 | } |
---|
| 930 | + } else { |
---|
| 931 | + dev_dbg(kbdev->dev, |
---|
| 932 | + "Attempt to kick GPU queue without a valid command buffer region"); |
---|
| 933 | + err = -EFAULT; |
---|
872 | 934 | } |
---|
| 935 | + kbase_gpu_vm_unlock(kctx); |
---|
873 | 936 | |
---|
874 | | - if (!err) |
---|
875 | | - err = kbase_csf_scheduler_queue_start(queue); |
---|
876 | | - mutex_unlock(&kctx->csf.lock); |
---|
877 | | - kbase_reset_gpu_allow(kbdev); |
---|
| 937 | + if (likely(trigger_submission)) |
---|
| 938 | + enqueue_gpu_submission_work(kctx); |
---|
878 | 939 | |
---|
879 | 940 | return err; |
---|
880 | 941 | } |
---|
.. | .. |
---|
884 | 945 | { |
---|
885 | 946 | lockdep_assert_held(&kctx->csf.lock); |
---|
886 | 947 | |
---|
| 948 | + if (WARN_ON(queue->csi_index < 0)) |
---|
| 949 | + return; |
---|
| 950 | + |
---|
887 | 951 | if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { |
---|
888 | 952 | unsigned long flags; |
---|
889 | 953 | |
---|
890 | 954 | kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); |
---|
891 | 955 | bitmap_clear(queue->group->protm_pending_bitmap, |
---|
892 | 956 | queue->csi_index, 1); |
---|
893 | | - KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR, |
---|
| 957 | + KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR, |
---|
894 | 958 | queue->group, queue, queue->group->protm_pending_bitmap[0]); |
---|
895 | 959 | queue->group->bound_queues[queue->csi_index] = NULL; |
---|
896 | 960 | queue->group = NULL; |
---|
897 | 961 | kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); |
---|
898 | 962 | |
---|
899 | 963 | put_user_pages_mmap_handle(kctx, queue); |
---|
| 964 | + WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID); |
---|
900 | 965 | queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; |
---|
901 | 966 | } |
---|
902 | 967 | } |
---|
.. | .. |
---|
938 | 1003 | } |
---|
939 | 1004 | } |
---|
940 | 1005 | |
---|
941 | | -void kbase_csf_queue_unbind(struct kbase_queue *queue) |
---|
| 1006 | +static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue) |
---|
| 1007 | +{ |
---|
| 1008 | + /* The queue's phys are zeroed when allocation fails. Both of them being |
---|
| 1009 | + * zero is an impossible condition for a successful allocated set of phy pages. |
---|
| 1010 | + */ |
---|
| 1011 | + |
---|
| 1012 | + return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr); |
---|
| 1013 | +} |
---|
| 1014 | + |
---|
| 1015 | +void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit) |
---|
942 | 1016 | { |
---|
943 | 1017 | struct kbase_context *kctx = queue->kctx; |
---|
944 | 1018 | |
---|
.. | .. |
---|
952 | 1026 | * whereas CSG TERM request would result in an immediate abort or |
---|
953 | 1027 | * cancellation of the pending work. |
---|
954 | 1028 | */ |
---|
955 | | - if (current->flags & PF_EXITING) { |
---|
| 1029 | + if (process_exit) { |
---|
956 | 1030 | struct kbase_queue_group *group = get_bound_queue_group(queue); |
---|
957 | 1031 | |
---|
958 | 1032 | if (group) |
---|
.. | .. |
---|
963 | 1037 | unbind_queue(kctx, queue); |
---|
964 | 1038 | } |
---|
965 | 1039 | |
---|
966 | | - /* Free the resources, if allocated for this queue. */ |
---|
967 | | - if (queue->reg) |
---|
| 1040 | + /* Free the resources, if allocated phys for this queue */ |
---|
| 1041 | + if (kbase_csf_queue_phys_allocated(queue)) |
---|
968 | 1042 | kbase_csf_free_command_stream_user_pages(kctx, queue); |
---|
969 | 1043 | } |
---|
970 | 1044 | |
---|
.. | .. |
---|
977 | 1051 | WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND); |
---|
978 | 1052 | unbind_stopped_queue(kctx, queue); |
---|
979 | 1053 | |
---|
980 | | - /* Free the resources, if allocated for this queue. */ |
---|
981 | | - if (queue->reg) |
---|
| 1054 | + /* Free the resources, if allocated phys for this queue */ |
---|
| 1055 | + if (kbase_csf_queue_phys_allocated(queue)) |
---|
982 | 1056 | kbase_csf_free_command_stream_user_pages(kctx, queue); |
---|
983 | 1057 | } |
---|
984 | 1058 | |
---|
.. | .. |
---|
1041 | 1115 | * @kctx: Pointer to kbase context where the queue group is created at |
---|
1042 | 1116 | * @s_buf: Pointer to suspend buffer that is attached to queue group |
---|
1043 | 1117 | * |
---|
1044 | | - * Return: 0 if suspend buffer is successfully allocated and reflected to GPU |
---|
1045 | | - * MMU page table. Otherwise -ENOMEM. |
---|
| 1118 | + * Return: 0 if phy-pages for the suspend buffer is successfully allocated. |
---|
| 1119 | + * Otherwise -ENOMEM or error code. |
---|
1046 | 1120 | */ |
---|
1047 | 1121 | static int create_normal_suspend_buffer(struct kbase_context *const kctx, |
---|
1048 | 1122 | struct kbase_normal_suspend_buffer *s_buf) |
---|
1049 | 1123 | { |
---|
1050 | | - struct kbase_va_region *reg = NULL; |
---|
1051 | | - const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; |
---|
1052 | 1124 | const size_t nr_pages = |
---|
1053 | 1125 | PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); |
---|
1054 | | - int err = 0; |
---|
| 1126 | + int err; |
---|
1055 | 1127 | |
---|
1056 | 1128 | lockdep_assert_held(&kctx->csf.lock); |
---|
1057 | 1129 | |
---|
1058 | | - /* Allocate and initialize Region Object */ |
---|
1059 | | - reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, |
---|
1060 | | - nr_pages, KBASE_REG_ZONE_MCU_SHARED); |
---|
1061 | | - |
---|
1062 | | - if (!reg) |
---|
1063 | | - return -ENOMEM; |
---|
| 1130 | + /* The suspend buffer's mapping address is valid only when the CSG is to |
---|
| 1131 | + * run on slot, initializing it 0, signalling the buffer is not mapped. |
---|
| 1132 | + */ |
---|
| 1133 | + s_buf->gpu_va = 0; |
---|
1064 | 1134 | |
---|
1065 | 1135 | s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL); |
---|
1066 | 1136 | |
---|
1067 | | - if (!s_buf->phy) { |
---|
1068 | | - err = -ENOMEM; |
---|
1069 | | - goto phy_alloc_failed; |
---|
1070 | | - } |
---|
1071 | | - |
---|
1072 | | - /* Get physical page for a normal suspend buffer */ |
---|
1073 | | - err = kbase_mem_pool_alloc_pages( |
---|
1074 | | - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], |
---|
1075 | | - nr_pages, &s_buf->phy[0], false); |
---|
1076 | | - |
---|
1077 | | - if (err < 0) |
---|
1078 | | - goto phy_pages_alloc_failed; |
---|
1079 | | - |
---|
1080 | | - /* Insert Region Object into rbtree and make virtual address available |
---|
1081 | | - * to map it to physical page |
---|
1082 | | - */ |
---|
1083 | | - mutex_lock(&kctx->kbdev->csf.reg_lock); |
---|
1084 | | - err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1); |
---|
1085 | | - reg->flags &= ~KBASE_REG_FREE; |
---|
1086 | | - mutex_unlock(&kctx->kbdev->csf.reg_lock); |
---|
1087 | | - |
---|
1088 | | - if (err) |
---|
1089 | | - goto add_va_region_failed; |
---|
1090 | | - |
---|
1091 | | - /* Update MMU table */ |
---|
1092 | | - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, |
---|
1093 | | - reg->start_pfn, &s_buf->phy[0], |
---|
1094 | | - nr_pages, mem_flags, |
---|
1095 | | - MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW); |
---|
1096 | | - if (err) |
---|
1097 | | - goto mmu_insert_failed; |
---|
1098 | | - |
---|
1099 | | - s_buf->reg = reg; |
---|
1100 | | - |
---|
1101 | | - return 0; |
---|
1102 | | - |
---|
1103 | | -mmu_insert_failed: |
---|
1104 | | - mutex_lock(&kctx->kbdev->csf.reg_lock); |
---|
1105 | | - WARN_ON(kbase_remove_va_region(reg)); |
---|
1106 | | - mutex_unlock(&kctx->kbdev->csf.reg_lock); |
---|
1107 | | - |
---|
1108 | | -add_va_region_failed: |
---|
1109 | | - kbase_mem_pool_free_pages( |
---|
1110 | | - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, |
---|
1111 | | - &s_buf->phy[0], false, false); |
---|
1112 | | - |
---|
1113 | | -phy_pages_alloc_failed: |
---|
1114 | | - kfree(s_buf->phy); |
---|
1115 | | -phy_alloc_failed: |
---|
1116 | | - kfree(reg); |
---|
1117 | | - |
---|
1118 | | - return err; |
---|
1119 | | -} |
---|
1120 | | - |
---|
1121 | | -/** |
---|
1122 | | - * create_protected_suspend_buffer() - Create protected-mode suspend buffer |
---|
1123 | | - * per queue group |
---|
1124 | | - * |
---|
1125 | | - * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
---|
1126 | | - * @s_buf: Pointer to suspend buffer that is attached to queue group |
---|
1127 | | - * |
---|
1128 | | - * Return: 0 if suspend buffer is successfully allocated and reflected to GPU |
---|
1129 | | - * MMU page table. Otherwise -ENOMEM. |
---|
1130 | | - */ |
---|
1131 | | -static int create_protected_suspend_buffer(struct kbase_device *const kbdev, |
---|
1132 | | - struct kbase_protected_suspend_buffer *s_buf) |
---|
1133 | | -{ |
---|
1134 | | - struct kbase_va_region *reg = NULL; |
---|
1135 | | - struct tagged_addr *phys = NULL; |
---|
1136 | | - const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; |
---|
1137 | | - const size_t nr_pages = |
---|
1138 | | - PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); |
---|
1139 | | - int err = 0; |
---|
1140 | | - |
---|
1141 | | - /* Allocate and initialize Region Object */ |
---|
1142 | | - reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, |
---|
1143 | | - nr_pages, KBASE_REG_ZONE_MCU_SHARED); |
---|
1144 | | - |
---|
1145 | | - if (!reg) |
---|
| 1137 | + if (!s_buf->phy) |
---|
1146 | 1138 | return -ENOMEM; |
---|
1147 | 1139 | |
---|
1148 | | - phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); |
---|
1149 | | - if (!phys) { |
---|
1150 | | - err = -ENOMEM; |
---|
1151 | | - goto phy_alloc_failed; |
---|
| 1140 | + /* Get physical page for a normal suspend buffer */ |
---|
| 1141 | + err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, |
---|
| 1142 | + &s_buf->phy[0], false, kctx->task); |
---|
| 1143 | + |
---|
| 1144 | + if (err < 0) { |
---|
| 1145 | + kfree(s_buf->phy); |
---|
| 1146 | + return err; |
---|
1152 | 1147 | } |
---|
1153 | 1148 | |
---|
1154 | | - s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, |
---|
1155 | | - nr_pages); |
---|
1156 | | - if (s_buf->pma == NULL) { |
---|
1157 | | - err = -ENOMEM; |
---|
1158 | | - goto pma_alloc_failed; |
---|
1159 | | - } |
---|
1160 | | - |
---|
1161 | | - /* Insert Region Object into rbtree and make virtual address available |
---|
1162 | | - * to map it to physical page |
---|
1163 | | - */ |
---|
1164 | | - mutex_lock(&kbdev->csf.reg_lock); |
---|
1165 | | - err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1); |
---|
1166 | | - reg->flags &= ~KBASE_REG_FREE; |
---|
1167 | | - mutex_unlock(&kbdev->csf.reg_lock); |
---|
1168 | | - |
---|
1169 | | - if (err) |
---|
1170 | | - goto add_va_region_failed; |
---|
1171 | | - |
---|
1172 | | - /* Update MMU table */ |
---|
1173 | | - err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, |
---|
1174 | | - reg->start_pfn, phys, |
---|
1175 | | - nr_pages, mem_flags, MCU_AS_NR, |
---|
1176 | | - KBASE_MEM_GROUP_CSF_FW); |
---|
1177 | | - if (err) |
---|
1178 | | - goto mmu_insert_failed; |
---|
1179 | | - |
---|
1180 | | - s_buf->reg = reg; |
---|
1181 | | - kfree(phys); |
---|
| 1149 | + kbase_process_page_usage_inc(kctx, nr_pages); |
---|
1182 | 1150 | return 0; |
---|
1183 | | - |
---|
1184 | | -mmu_insert_failed: |
---|
1185 | | - mutex_lock(&kbdev->csf.reg_lock); |
---|
1186 | | - WARN_ON(kbase_remove_va_region(reg)); |
---|
1187 | | - mutex_unlock(&kbdev->csf.reg_lock); |
---|
1188 | | - |
---|
1189 | | -add_va_region_failed: |
---|
1190 | | - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages); |
---|
1191 | | -pma_alloc_failed: |
---|
1192 | | - kfree(phys); |
---|
1193 | | -phy_alloc_failed: |
---|
1194 | | - kfree(reg); |
---|
1195 | | - |
---|
1196 | | - return err; |
---|
1197 | 1151 | } |
---|
1198 | 1152 | |
---|
1199 | 1153 | static void timer_event_worker(struct work_struct *data); |
---|
.. | .. |
---|
1214 | 1168 | static int create_suspend_buffers(struct kbase_context *const kctx, |
---|
1215 | 1169 | struct kbase_queue_group * const group) |
---|
1216 | 1170 | { |
---|
1217 | | - int err = 0; |
---|
1218 | | - |
---|
1219 | 1171 | if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { |
---|
1220 | 1172 | dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); |
---|
1221 | 1173 | return -ENOMEM; |
---|
1222 | 1174 | } |
---|
1223 | 1175 | |
---|
1224 | | - if (kctx->kbdev->csf.pma_dev) { |
---|
1225 | | - err = create_protected_suspend_buffer(kctx->kbdev, |
---|
1226 | | - &group->protected_suspend_buf); |
---|
1227 | | - if (err) { |
---|
1228 | | - term_normal_suspend_buffer(kctx, |
---|
1229 | | - &group->normal_suspend_buf); |
---|
1230 | | - dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n"); |
---|
1231 | | - } |
---|
1232 | | - } else { |
---|
1233 | | - group->protected_suspend_buf.reg = NULL; |
---|
1234 | | - } |
---|
| 1176 | + /* Protected suspend buffer, runtime binding so just initialize it */ |
---|
| 1177 | + group->protected_suspend_buf.gpu_va = 0; |
---|
| 1178 | + group->protected_suspend_buf.pma = NULL; |
---|
| 1179 | + group->protected_suspend_buf.alloc_retries = 0; |
---|
1235 | 1180 | |
---|
1236 | | - return err; |
---|
| 1181 | + return 0; |
---|
1237 | 1182 | } |
---|
1238 | 1183 | |
---|
1239 | 1184 | /** |
---|
.. | .. |
---|
1244 | 1189 | */ |
---|
1245 | 1190 | static u32 generate_group_uid(void) |
---|
1246 | 1191 | { |
---|
1247 | | - /* use first KBase device to store max UID */ |
---|
1248 | | - struct kbase_device *kbdev = kbase_find_device(-1); |
---|
1249 | | - u32 uid = 1; |
---|
| 1192 | + static atomic_t global_csg_uid = ATOMIC_INIT(0); |
---|
1250 | 1193 | |
---|
1251 | | - if (kbdev) |
---|
1252 | | - uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices); |
---|
1253 | | - else |
---|
1254 | | - WARN(1, "NULL kbase device pointer in group UID generation"); |
---|
1255 | | - |
---|
1256 | | - return uid; |
---|
| 1194 | + return (u32)atomic_inc_return(&global_csg_uid); |
---|
1257 | 1195 | } |
---|
1258 | 1196 | |
---|
1259 | 1197 | /** |
---|
.. | .. |
---|
1272 | 1210 | int group_handle = find_free_group_handle(kctx); |
---|
1273 | 1211 | |
---|
1274 | 1212 | if (group_handle < 0) { |
---|
1275 | | - dev_err(kctx->kbdev->dev, |
---|
1276 | | - "All queue group handles are already in use\n"); |
---|
| 1213 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1214 | + "All queue group handles are already in use"); |
---|
1277 | 1215 | } else { |
---|
1278 | 1216 | struct kbase_queue_group * const group = |
---|
1279 | 1217 | kmalloc(sizeof(struct kbase_queue_group), |
---|
.. | .. |
---|
1298 | 1236 | group->tiler_max = create->in.tiler_max; |
---|
1299 | 1237 | group->fragment_max = create->in.fragment_max; |
---|
1300 | 1238 | group->compute_max = create->in.compute_max; |
---|
| 1239 | + group->csi_handlers = create->in.csi_handlers; |
---|
1301 | 1240 | group->priority = kbase_csf_priority_queue_group_priority_to_relative( |
---|
1302 | 1241 | kbase_csf_priority_check(kctx->kbdev, create->in.priority)); |
---|
1303 | 1242 | group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; |
---|
1304 | 1243 | group->faulted = false; |
---|
| 1244 | + group->cs_unrecoverable = false; |
---|
| 1245 | + group->reevaluate_idle_status = false; |
---|
| 1246 | + |
---|
| 1247 | + group->csg_reg = NULL; |
---|
| 1248 | + group->csg_reg_bind_retries = 0; |
---|
| 1249 | + |
---|
| 1250 | + group->dvs_buf = create->in.dvs_buf; |
---|
| 1251 | + |
---|
| 1252 | +#if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
| 1253 | + group->deschedule_deferred_cnt = 0; |
---|
| 1254 | +#endif |
---|
1305 | 1255 | |
---|
1306 | 1256 | group->group_uid = generate_group_uid(); |
---|
1307 | 1257 | create->out.group_uid = group->group_uid; |
---|
.. | .. |
---|
1317 | 1267 | MAX_SUPPORTED_STREAMS_PER_GROUP); |
---|
1318 | 1268 | |
---|
1319 | 1269 | group->run_state = KBASE_CSF_GROUP_INACTIVE; |
---|
| 1270 | + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, |
---|
| 1271 | + group->run_state); |
---|
| 1272 | + |
---|
1320 | 1273 | err = create_suspend_buffers(kctx, group); |
---|
1321 | 1274 | |
---|
1322 | 1275 | if (err < 0) { |
---|
.. | .. |
---|
1336 | 1289 | return group_handle; |
---|
1337 | 1290 | } |
---|
1338 | 1291 | |
---|
| 1292 | +static bool dvs_supported(u32 csf_version) |
---|
| 1293 | +{ |
---|
| 1294 | + if (GLB_VERSION_MAJOR_GET(csf_version) < 3) |
---|
| 1295 | + return false; |
---|
| 1296 | + |
---|
| 1297 | + if (GLB_VERSION_MAJOR_GET(csf_version) == 3) |
---|
| 1298 | + if (GLB_VERSION_MINOR_GET(csf_version) < 2) |
---|
| 1299 | + return false; |
---|
| 1300 | + |
---|
| 1301 | + return true; |
---|
| 1302 | +} |
---|
| 1303 | + |
---|
1339 | 1304 | int kbase_csf_queue_group_create(struct kbase_context *const kctx, |
---|
1340 | 1305 | union kbase_ioctl_cs_queue_group_create *const create) |
---|
1341 | 1306 | { |
---|
.. | .. |
---|
1343 | 1308 | const u32 tiler_count = hweight64(create->in.tiler_mask); |
---|
1344 | 1309 | const u32 fragment_count = hweight64(create->in.fragment_mask); |
---|
1345 | 1310 | const u32 compute_count = hweight64(create->in.compute_mask); |
---|
| 1311 | + size_t i; |
---|
| 1312 | + |
---|
| 1313 | + for (i = 0; i < sizeof(create->in.padding); i++) { |
---|
| 1314 | + if (create->in.padding[i] != 0) { |
---|
| 1315 | + dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); |
---|
| 1316 | + return -EINVAL; |
---|
| 1317 | + } |
---|
| 1318 | + } |
---|
1346 | 1319 | |
---|
1347 | 1320 | mutex_lock(&kctx->csf.lock); |
---|
1348 | 1321 | |
---|
1349 | 1322 | if ((create->in.tiler_max > tiler_count) || |
---|
1350 | 1323 | (create->in.fragment_max > fragment_count) || |
---|
1351 | 1324 | (create->in.compute_max > compute_count)) { |
---|
1352 | | - dev_err(kctx->kbdev->dev, |
---|
1353 | | - "Invalid maximum number of endpoints for a queue group\n"); |
---|
| 1325 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1326 | + "Invalid maximum number of endpoints for a queue group"); |
---|
1354 | 1327 | err = -EINVAL; |
---|
1355 | 1328 | } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) { |
---|
1356 | | - dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n", |
---|
| 1329 | + dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u", |
---|
1357 | 1330 | (unsigned int)create->in.priority); |
---|
1358 | 1331 | err = -EINVAL; |
---|
1359 | 1332 | } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) { |
---|
1360 | | - dev_err(kctx->kbdev->dev, |
---|
1361 | | - "No CSG has at least %d CSs\n", |
---|
| 1333 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1334 | + "No CSG has at least %d CSs", |
---|
1362 | 1335 | create->in.cs_min); |
---|
| 1336 | + err = -EINVAL; |
---|
| 1337 | + } else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) { |
---|
| 1338 | + dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u", |
---|
| 1339 | + create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK); |
---|
| 1340 | + err = -EINVAL; |
---|
| 1341 | + } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && |
---|
| 1342 | + create->in.dvs_buf) { |
---|
| 1343 | + dev_warn( |
---|
| 1344 | + kctx->kbdev->dev, |
---|
| 1345 | + "GPU does not support DVS but userspace is trying to use it"); |
---|
| 1346 | + err = -EINVAL; |
---|
| 1347 | + } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) && |
---|
| 1348 | + !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) && |
---|
| 1349 | + CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) { |
---|
| 1350 | + dev_warn(kctx->kbdev->dev, |
---|
| 1351 | + "DVS buffer pointer is null but size is not 0"); |
---|
1363 | 1352 | err = -EINVAL; |
---|
1364 | 1353 | } else { |
---|
1365 | 1354 | /* For the CSG which satisfies the condition for having |
---|
.. | .. |
---|
1389 | 1378 | * @s_buf: Pointer to queue group suspend buffer to be freed |
---|
1390 | 1379 | */ |
---|
1391 | 1380 | static void term_normal_suspend_buffer(struct kbase_context *const kctx, |
---|
1392 | | - struct kbase_normal_suspend_buffer *s_buf) |
---|
| 1381 | + struct kbase_normal_suspend_buffer *s_buf) |
---|
1393 | 1382 | { |
---|
1394 | | - const size_t nr_pages = |
---|
1395 | | - PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); |
---|
| 1383 | + const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); |
---|
1396 | 1384 | |
---|
1397 | 1385 | lockdep_assert_held(&kctx->csf.lock); |
---|
1398 | 1386 | |
---|
1399 | | - WARN_ON(kbase_mmu_teardown_pages( |
---|
1400 | | - kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, |
---|
1401 | | - s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); |
---|
| 1387 | + /* The group should not have a bind remaining on any suspend buf region */ |
---|
| 1388 | + WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination"); |
---|
1402 | 1389 | |
---|
1403 | | - WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); |
---|
1404 | | - |
---|
1405 | | - mutex_lock(&kctx->kbdev->csf.reg_lock); |
---|
1406 | | - WARN_ON(kbase_remove_va_region(s_buf->reg)); |
---|
1407 | | - mutex_unlock(&kctx->kbdev->csf.reg_lock); |
---|
1408 | | - |
---|
1409 | | - kbase_mem_pool_free_pages( |
---|
1410 | | - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], |
---|
1411 | | - nr_pages, &s_buf->phy[0], false, false); |
---|
| 1390 | + kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, |
---|
| 1391 | + &s_buf->phy[0], false, false); |
---|
| 1392 | + kbase_process_page_usage_dec(kctx, nr_pages); |
---|
1412 | 1393 | |
---|
1413 | 1394 | kfree(s_buf->phy); |
---|
1414 | 1395 | s_buf->phy = NULL; |
---|
1415 | | - kfree(s_buf->reg); |
---|
1416 | | - s_buf->reg = NULL; |
---|
1417 | 1396 | } |
---|
1418 | 1397 | |
---|
1419 | 1398 | /** |
---|
1420 | | - * term_protected_suspend_buffer() - Free normal-mode suspend buffer of |
---|
| 1399 | + * term_protected_suspend_buffer() - Free protected-mode suspend buffer of |
---|
1421 | 1400 | * queue group |
---|
1422 | 1401 | * |
---|
1423 | 1402 | * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
---|
1424 | | - * @s_buf: Pointer to queue group suspend buffer to be freed |
---|
| 1403 | + * @sbuf: Pointer to queue group suspend buffer to be freed |
---|
1425 | 1404 | */ |
---|
1426 | 1405 | static void term_protected_suspend_buffer(struct kbase_device *const kbdev, |
---|
1427 | | - struct kbase_protected_suspend_buffer *s_buf) |
---|
| 1406 | + struct kbase_protected_suspend_buffer *sbuf) |
---|
1428 | 1407 | { |
---|
1429 | | - const size_t nr_pages = |
---|
1430 | | - PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); |
---|
1431 | | - |
---|
1432 | | - WARN_ON(kbase_mmu_teardown_pages( |
---|
1433 | | - kbdev, &kbdev->csf.mcu_mmu, |
---|
1434 | | - s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); |
---|
1435 | | - |
---|
1436 | | - WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); |
---|
1437 | | - |
---|
1438 | | - mutex_lock(&kbdev->csf.reg_lock); |
---|
1439 | | - WARN_ON(kbase_remove_va_region(s_buf->reg)); |
---|
1440 | | - mutex_unlock(&kbdev->csf.reg_lock); |
---|
1441 | | - |
---|
1442 | | - kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages); |
---|
1443 | | - s_buf->pma = NULL; |
---|
1444 | | - kfree(s_buf->reg); |
---|
1445 | | - s_buf->reg = NULL; |
---|
| 1408 | + WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!"); |
---|
| 1409 | + if (sbuf->pma) { |
---|
| 1410 | + const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); |
---|
| 1411 | + kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true); |
---|
| 1412 | + sbuf->pma = NULL; |
---|
| 1413 | + } |
---|
1446 | 1414 | } |
---|
1447 | 1415 | |
---|
1448 | 1416 | void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) |
---|
.. | .. |
---|
1474 | 1442 | &group->protected_suspend_buf); |
---|
1475 | 1443 | |
---|
1476 | 1444 | group->run_state = KBASE_CSF_GROUP_TERMINATED; |
---|
| 1445 | + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state); |
---|
1477 | 1446 | } |
---|
1478 | 1447 | |
---|
1479 | 1448 | /** |
---|
.. | .. |
---|
1504 | 1473 | kbase_csf_term_descheduled_queue_group(group); |
---|
1505 | 1474 | } |
---|
1506 | 1475 | |
---|
| 1476 | +/** |
---|
| 1477 | + * wait_group_deferred_deschedule_completion - Wait for refcount of the group to |
---|
| 1478 | + * become 0 that was taken when the group deschedule had to be deferred. |
---|
| 1479 | + * |
---|
| 1480 | + * @group: Pointer to GPU command queue group that is being deleted. |
---|
| 1481 | + * |
---|
| 1482 | + * This function is called when Userspace deletes the group and after the group |
---|
| 1483 | + * has been descheduled. The function synchronizes with the other threads that were |
---|
| 1484 | + * also trying to deschedule the group whilst the dumping was going on for a fault. |
---|
| 1485 | + * Please refer the documentation of wait_for_dump_complete_on_group_deschedule() |
---|
| 1486 | + * for more details. |
---|
| 1487 | + */ |
---|
| 1488 | +static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group) |
---|
| 1489 | +{ |
---|
| 1490 | +#if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
| 1491 | + struct kbase_context *kctx = group->kctx; |
---|
| 1492 | + |
---|
| 1493 | + lockdep_assert_held(&kctx->csf.lock); |
---|
| 1494 | + |
---|
| 1495 | + if (likely(!group->deschedule_deferred_cnt)) |
---|
| 1496 | + return; |
---|
| 1497 | + |
---|
| 1498 | + mutex_unlock(&kctx->csf.lock); |
---|
| 1499 | + wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt); |
---|
| 1500 | + mutex_lock(&kctx->csf.lock); |
---|
| 1501 | +#endif |
---|
| 1502 | +} |
---|
| 1503 | + |
---|
1507 | 1504 | static void cancel_queue_group_events(struct kbase_queue_group *group) |
---|
1508 | 1505 | { |
---|
1509 | 1506 | cancel_work_sync(&group->timer_event_work); |
---|
1510 | 1507 | cancel_work_sync(&group->protm_event_work); |
---|
| 1508 | +} |
---|
| 1509 | + |
---|
| 1510 | +static void remove_pending_group_fatal_error(struct kbase_queue_group *group) |
---|
| 1511 | +{ |
---|
| 1512 | + struct kbase_context *kctx = group->kctx; |
---|
| 1513 | + |
---|
| 1514 | + dev_dbg(kctx->kbdev->dev, |
---|
| 1515 | + "Remove any pending group fatal error from context %pK\n", |
---|
| 1516 | + (void *)group->kctx); |
---|
| 1517 | + |
---|
| 1518 | + kbase_csf_event_remove_error(kctx, &group->error_tiler_oom); |
---|
| 1519 | + kbase_csf_event_remove_error(kctx, &group->error_timeout); |
---|
| 1520 | + kbase_csf_event_remove_error(kctx, &group->error_fatal); |
---|
1511 | 1521 | } |
---|
1512 | 1522 | |
---|
1513 | 1523 | void kbase_csf_queue_group_terminate(struct kbase_context *kctx, |
---|
.. | .. |
---|
1532 | 1542 | group = find_queue_group(kctx, group_handle); |
---|
1533 | 1543 | |
---|
1534 | 1544 | if (group) { |
---|
1535 | | - unsigned long flags; |
---|
1536 | | - |
---|
1537 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
1538 | | - |
---|
1539 | | - dev_dbg(kbdev->dev, |
---|
1540 | | - "Remove any pending group fatal error from context %pK\n", |
---|
1541 | | - (void *)group->kctx); |
---|
1542 | | - |
---|
1543 | | - list_del_init(&group->error_tiler_oom.link); |
---|
1544 | | - list_del_init(&group->error_timeout.link); |
---|
1545 | | - list_del_init(&group->error_fatal.link); |
---|
1546 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
1547 | | - |
---|
1548 | | - term_queue_group(group); |
---|
1549 | 1545 | kctx->csf.queue_groups[group_handle] = NULL; |
---|
| 1546 | + /* Stop the running of the given group */ |
---|
| 1547 | + term_queue_group(group); |
---|
| 1548 | + mutex_unlock(&kctx->csf.lock); |
---|
| 1549 | + |
---|
| 1550 | + if (reset_prevented) { |
---|
| 1551 | + /* Allow GPU reset before cancelling the group specific |
---|
| 1552 | + * work item to avoid potential deadlock. |
---|
| 1553 | + * Reset prevention isn't needed after group termination. |
---|
| 1554 | + */ |
---|
| 1555 | + kbase_reset_gpu_allow(kbdev); |
---|
| 1556 | + reset_prevented = false; |
---|
| 1557 | + } |
---|
| 1558 | + |
---|
| 1559 | + /* Cancel any pending event callbacks. If one is in progress |
---|
| 1560 | + * then this thread waits synchronously for it to complete (which |
---|
| 1561 | + * is why we must unlock the context first). We already ensured |
---|
| 1562 | + * that no more callbacks can be enqueued by terminating the group. |
---|
| 1563 | + */ |
---|
| 1564 | + cancel_queue_group_events(group); |
---|
| 1565 | + |
---|
| 1566 | + mutex_lock(&kctx->csf.lock); |
---|
| 1567 | + |
---|
| 1568 | + /* Clean up after the termination */ |
---|
| 1569 | + remove_pending_group_fatal_error(group); |
---|
| 1570 | + |
---|
| 1571 | + wait_group_deferred_deschedule_completion(group); |
---|
1550 | 1572 | } |
---|
1551 | 1573 | |
---|
1552 | 1574 | mutex_unlock(&kctx->csf.lock); |
---|
1553 | 1575 | if (reset_prevented) |
---|
1554 | 1576 | kbase_reset_gpu_allow(kbdev); |
---|
1555 | 1577 | |
---|
1556 | | - if (!group) |
---|
1557 | | - return; |
---|
1558 | | - |
---|
1559 | | - /* Cancel any pending event callbacks. If one is in progress |
---|
1560 | | - * then this thread waits synchronously for it to complete (which |
---|
1561 | | - * is why we must unlock the context first). We already ensured |
---|
1562 | | - * that no more callbacks can be enqueued by terminating the group. |
---|
1563 | | - */ |
---|
1564 | | - cancel_queue_group_events(group); |
---|
1565 | 1578 | kfree(group); |
---|
1566 | 1579 | } |
---|
| 1580 | +KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate); |
---|
1567 | 1581 | |
---|
| 1582 | +#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST |
---|
1568 | 1583 | int kbase_csf_queue_group_suspend(struct kbase_context *kctx, |
---|
1569 | 1584 | struct kbase_suspend_copy_buffer *sus_buf, |
---|
1570 | 1585 | u8 group_handle) |
---|
.. | .. |
---|
1595 | 1610 | |
---|
1596 | 1611 | return err; |
---|
1597 | 1612 | } |
---|
1598 | | - |
---|
1599 | | -/** |
---|
1600 | | - * add_error() - Add an error to the list of errors to report to user space |
---|
1601 | | - * |
---|
1602 | | - * @kctx: Address of a base context associated with a GPU address space. |
---|
1603 | | - * @error: Address of the item to be added to the context's pending error list. |
---|
1604 | | - * @data: Error data to be returned to userspace. |
---|
1605 | | - * |
---|
1606 | | - * Does not wake up the event queue blocking a user thread in kbase_poll. This |
---|
1607 | | - * is to make it more efficient to add multiple errors. |
---|
1608 | | - * |
---|
1609 | | - * The added error must not already be on the context's list of errors waiting |
---|
1610 | | - * to be reported (e.g. because a previous error concerning the same object has |
---|
1611 | | - * not yet been reported). |
---|
1612 | | - */ |
---|
1613 | | -static void add_error(struct kbase_context *const kctx, |
---|
1614 | | - struct kbase_csf_notification *const error, |
---|
1615 | | - struct base_csf_notification const *const data) |
---|
1616 | | -{ |
---|
1617 | | - unsigned long flags; |
---|
1618 | | - |
---|
1619 | | - if (WARN_ON(!kctx)) |
---|
1620 | | - return; |
---|
1621 | | - |
---|
1622 | | - if (WARN_ON(!error)) |
---|
1623 | | - return; |
---|
1624 | | - |
---|
1625 | | - if (WARN_ON(!data)) |
---|
1626 | | - return; |
---|
1627 | | - |
---|
1628 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
1629 | | - |
---|
1630 | | - if (!WARN_ON(!list_empty(&error->link))) { |
---|
1631 | | - error->data = *data; |
---|
1632 | | - list_add_tail(&error->link, &kctx->csf.error_list); |
---|
1633 | | - dev_dbg(kctx->kbdev->dev, |
---|
1634 | | - "Added error %pK of type %d in context %pK\n", |
---|
1635 | | - (void *)error, data->type, (void *)kctx); |
---|
1636 | | - } |
---|
1637 | | - |
---|
1638 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
1639 | | -} |
---|
| 1613 | +#endif |
---|
1640 | 1614 | |
---|
1641 | 1615 | void kbase_csf_add_group_fatal_error( |
---|
1642 | 1616 | struct kbase_queue_group *const group, |
---|
.. | .. |
---|
1660 | 1634 | } |
---|
1661 | 1635 | }; |
---|
1662 | 1636 | |
---|
1663 | | - add_error(group->kctx, &group->error_fatal, &error); |
---|
| 1637 | + kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); |
---|
1664 | 1638 | } |
---|
1665 | 1639 | |
---|
1666 | 1640 | void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, |
---|
.. | .. |
---|
1698 | 1672 | |
---|
1699 | 1673 | int kbase_csf_ctx_init(struct kbase_context *kctx) |
---|
1700 | 1674 | { |
---|
1701 | | - struct kbase_device *kbdev = kctx->kbdev; |
---|
1702 | 1675 | int err = -ENOMEM; |
---|
1703 | 1676 | |
---|
1704 | | - INIT_LIST_HEAD(&kctx->csf.event_callback_list); |
---|
1705 | 1677 | INIT_LIST_HEAD(&kctx->csf.queue_list); |
---|
1706 | 1678 | INIT_LIST_HEAD(&kctx->csf.link); |
---|
1707 | | - INIT_LIST_HEAD(&kctx->csf.error_list); |
---|
1708 | 1679 | |
---|
1709 | | - spin_lock_init(&kctx->csf.event_lock); |
---|
1710 | | - kctx->csf.user_reg_vma = NULL; |
---|
1711 | | - mutex_lock(&kbdev->pm.lock); |
---|
1712 | | - /* The inode information for /dev/malixx file is not available at the |
---|
1713 | | - * time of device probe as the inode is created when the device node |
---|
1714 | | - * is created by udevd (through mknod). |
---|
1715 | | - */ |
---|
1716 | | - if (kctx->filp) { |
---|
1717 | | - if (!kbdev->csf.mali_file_inode) |
---|
1718 | | - kbdev->csf.mali_file_inode = kctx->filp->f_inode; |
---|
1719 | | - |
---|
1720 | | - /* inode is unique for a file */ |
---|
1721 | | - WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode); |
---|
1722 | | - } |
---|
1723 | | - mutex_unlock(&kbdev->pm.lock); |
---|
| 1680 | + kbase_csf_event_init(kctx); |
---|
1724 | 1681 | |
---|
1725 | 1682 | /* Mark all the cookies as 'free' */ |
---|
1726 | 1683 | bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); |
---|
.. | .. |
---|
1737 | 1694 | if (likely(!err)) { |
---|
1738 | 1695 | err = kbase_csf_tiler_heap_context_init(kctx); |
---|
1739 | 1696 | |
---|
1740 | | - if (likely(!err)) |
---|
| 1697 | + if (likely(!err)) { |
---|
1741 | 1698 | mutex_init(&kctx->csf.lock); |
---|
1742 | | - else |
---|
| 1699 | + INIT_WORK(&kctx->csf.pending_submission_work, |
---|
| 1700 | + pending_submission_worker); |
---|
| 1701 | + |
---|
| 1702 | + err = kbasep_ctx_user_reg_page_mapping_init(kctx); |
---|
| 1703 | + |
---|
| 1704 | + if (unlikely(err)) |
---|
| 1705 | + kbase_csf_tiler_heap_context_term(kctx); |
---|
| 1706 | + } |
---|
| 1707 | + |
---|
| 1708 | + if (unlikely(err)) |
---|
1743 | 1709 | kbase_csf_kcpu_queue_context_term(kctx); |
---|
1744 | 1710 | } |
---|
1745 | 1711 | |
---|
.. | .. |
---|
1822 | 1788 | * for queue groups & kcpu queues, hence no need to explicitly remove |
---|
1823 | 1789 | * those debugfs files. |
---|
1824 | 1790 | */ |
---|
1825 | | - kbase_csf_event_wait_remove_all(kctx); |
---|
1826 | 1791 | |
---|
1827 | 1792 | /* Wait for a GPU reset if it is happening, prevent it if not happening */ |
---|
1828 | 1793 | err = kbase_reset_gpu_prevent_and_wait(kbdev); |
---|
.. | .. |
---|
1835 | 1800 | reset_prevented = true; |
---|
1836 | 1801 | |
---|
1837 | 1802 | mutex_lock(&kctx->csf.lock); |
---|
| 1803 | + |
---|
1838 | 1804 | /* Iterate through the queue groups that were not terminated by |
---|
1839 | 1805 | * userspace and issue the term request to firmware for them. |
---|
1840 | 1806 | */ |
---|
1841 | 1807 | for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { |
---|
1842 | | - if (kctx->csf.queue_groups[i]) |
---|
1843 | | - term_queue_group(kctx->csf.queue_groups[i]); |
---|
| 1808 | + struct kbase_queue_group *group = kctx->csf.queue_groups[i]; |
---|
| 1809 | + |
---|
| 1810 | + if (group) { |
---|
| 1811 | + remove_pending_group_fatal_error(group); |
---|
| 1812 | + term_queue_group(group); |
---|
| 1813 | + } |
---|
1844 | 1814 | } |
---|
1845 | 1815 | mutex_unlock(&kctx->csf.lock); |
---|
1846 | 1816 | |
---|
1847 | 1817 | if (reset_prevented) |
---|
1848 | 1818 | kbase_reset_gpu_allow(kbdev); |
---|
| 1819 | + |
---|
| 1820 | + cancel_work_sync(&kctx->csf.pending_submission_work); |
---|
1849 | 1821 | |
---|
1850 | 1822 | /* Now that all queue groups have been terminated, there can be no |
---|
1851 | 1823 | * more OoM or timer event interrupts but there can be inflight work |
---|
.. | .. |
---|
1891 | 1863 | * only one reference left that was taken when queue was |
---|
1892 | 1864 | * registered. |
---|
1893 | 1865 | */ |
---|
1894 | | - if (atomic_read(&queue->refcount) != 1) |
---|
1895 | | - dev_warn(kctx->kbdev->dev, |
---|
1896 | | - "Releasing queue with incorrect refcounting!\n"); |
---|
| 1866 | + WARN_ON(kbase_refcount_read(&queue->refcount) != 1); |
---|
1897 | 1867 | list_del_init(&queue->link); |
---|
1898 | 1868 | release_queue(queue); |
---|
1899 | 1869 | } |
---|
1900 | 1870 | |
---|
1901 | 1871 | mutex_unlock(&kctx->csf.lock); |
---|
1902 | 1872 | |
---|
| 1873 | + kbasep_ctx_user_reg_page_mapping_term(kctx); |
---|
1903 | 1874 | kbase_csf_tiler_heap_context_term(kctx); |
---|
1904 | 1875 | kbase_csf_kcpu_queue_context_term(kctx); |
---|
1905 | 1876 | kbase_csf_scheduler_context_term(kctx); |
---|
| 1877 | + kbase_csf_event_term(kctx); |
---|
1906 | 1878 | |
---|
1907 | 1879 | mutex_destroy(&kctx->csf.lock); |
---|
1908 | | -} |
---|
1909 | | - |
---|
1910 | | -int kbase_csf_event_wait_add(struct kbase_context *kctx, |
---|
1911 | | - kbase_csf_event_callback *callback, void *param) |
---|
1912 | | -{ |
---|
1913 | | - int err = -ENOMEM; |
---|
1914 | | - struct kbase_csf_event *event = |
---|
1915 | | - kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL); |
---|
1916 | | - |
---|
1917 | | - if (event) { |
---|
1918 | | - unsigned long flags; |
---|
1919 | | - |
---|
1920 | | - event->kctx = kctx; |
---|
1921 | | - event->callback = callback; |
---|
1922 | | - event->param = param; |
---|
1923 | | - |
---|
1924 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
1925 | | - list_add_tail(&event->link, &kctx->csf.event_callback_list); |
---|
1926 | | - dev_dbg(kctx->kbdev->dev, |
---|
1927 | | - "Added event handler %pK with param %pK\n", event, |
---|
1928 | | - event->param); |
---|
1929 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
1930 | | - |
---|
1931 | | - err = 0; |
---|
1932 | | - } |
---|
1933 | | - |
---|
1934 | | - return err; |
---|
1935 | | -} |
---|
1936 | | - |
---|
1937 | | -void kbase_csf_event_wait_remove(struct kbase_context *kctx, |
---|
1938 | | - kbase_csf_event_callback *callback, void *param) |
---|
1939 | | -{ |
---|
1940 | | - struct kbase_csf_event *event; |
---|
1941 | | - unsigned long flags; |
---|
1942 | | - |
---|
1943 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
1944 | | - |
---|
1945 | | - list_for_each_entry(event, &kctx->csf.event_callback_list, link) { |
---|
1946 | | - if ((event->callback == callback) && (event->param == param)) { |
---|
1947 | | - list_del(&event->link); |
---|
1948 | | - dev_dbg(kctx->kbdev->dev, |
---|
1949 | | - "Removed event handler %pK with param %pK\n", |
---|
1950 | | - event, event->param); |
---|
1951 | | - kfree(event); |
---|
1952 | | - break; |
---|
1953 | | - } |
---|
1954 | | - } |
---|
1955 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
1956 | | -} |
---|
1957 | | - |
---|
1958 | | -bool kbase_csf_read_error(struct kbase_context *kctx, |
---|
1959 | | - struct base_csf_notification *event_data) |
---|
1960 | | -{ |
---|
1961 | | - bool got_event = true; |
---|
1962 | | - struct kbase_csf_notification *error_data = NULL; |
---|
1963 | | - unsigned long flags; |
---|
1964 | | - |
---|
1965 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
1966 | | - |
---|
1967 | | - if (likely(!list_empty(&kctx->csf.error_list))) { |
---|
1968 | | - error_data = list_first_entry(&kctx->csf.error_list, |
---|
1969 | | - struct kbase_csf_notification, link); |
---|
1970 | | - list_del_init(&error_data->link); |
---|
1971 | | - *event_data = error_data->data; |
---|
1972 | | - dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", |
---|
1973 | | - (void *)error_data, (void *)kctx); |
---|
1974 | | - } else { |
---|
1975 | | - got_event = false; |
---|
1976 | | - } |
---|
1977 | | - |
---|
1978 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
1979 | | - |
---|
1980 | | - return got_event; |
---|
1981 | | -} |
---|
1982 | | - |
---|
1983 | | -bool kbase_csf_error_pending(struct kbase_context *kctx) |
---|
1984 | | -{ |
---|
1985 | | - bool event_pended = false; |
---|
1986 | | - unsigned long flags; |
---|
1987 | | - |
---|
1988 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
1989 | | - event_pended = !list_empty(&kctx->csf.error_list); |
---|
1990 | | - dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", |
---|
1991 | | - event_pended ? "An" : "No", (void *)kctx); |
---|
1992 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
1993 | | - |
---|
1994 | | - return event_pended; |
---|
1995 | | -} |
---|
1996 | | - |
---|
1997 | | -void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) |
---|
1998 | | -{ |
---|
1999 | | - struct kbase_csf_event *event, *next_event; |
---|
2000 | | - unsigned long flags; |
---|
2001 | | - |
---|
2002 | | - dev_dbg(kctx->kbdev->dev, |
---|
2003 | | - "Signal event (%s GPU notify) for context %pK\n", |
---|
2004 | | - notify_gpu ? "with" : "without", (void *)kctx); |
---|
2005 | | - |
---|
2006 | | - /* First increment the signal count and wake up event thread. |
---|
2007 | | - */ |
---|
2008 | | - atomic_set(&kctx->event_count, 1); |
---|
2009 | | - kbase_event_wakeup(kctx); |
---|
2010 | | - |
---|
2011 | | - /* Signal the CSF firmware. This is to ensure that pending command |
---|
2012 | | - * stream synch object wait operations are re-evaluated. |
---|
2013 | | - * Write to GLB_DOORBELL would suffice as spec says that all pending |
---|
2014 | | - * synch object wait operations are re-evaluated on a write to any |
---|
2015 | | - * CS_DOORBELL/GLB_DOORBELL register. |
---|
2016 | | - */ |
---|
2017 | | - if (notify_gpu) { |
---|
2018 | | - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); |
---|
2019 | | - if (kctx->kbdev->pm.backend.gpu_powered) |
---|
2020 | | - kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); |
---|
2021 | | - KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u); |
---|
2022 | | - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); |
---|
2023 | | - } |
---|
2024 | | - |
---|
2025 | | - /* Now invoke the callbacks registered on backend side. |
---|
2026 | | - * Allow item removal inside the loop, if requested by the callback. |
---|
2027 | | - */ |
---|
2028 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
2029 | | - |
---|
2030 | | - list_for_each_entry_safe( |
---|
2031 | | - event, next_event, &kctx->csf.event_callback_list, link) { |
---|
2032 | | - enum kbase_csf_event_callback_action action; |
---|
2033 | | - |
---|
2034 | | - dev_dbg(kctx->kbdev->dev, |
---|
2035 | | - "Calling event handler %pK with param %pK\n", |
---|
2036 | | - (void *)event, event->param); |
---|
2037 | | - action = event->callback(event->param); |
---|
2038 | | - if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { |
---|
2039 | | - list_del(&event->link); |
---|
2040 | | - kfree(event); |
---|
2041 | | - } |
---|
2042 | | - } |
---|
2043 | | - |
---|
2044 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
2045 | | -} |
---|
2046 | | - |
---|
2047 | | -void kbase_csf_event_wait_remove_all(struct kbase_context *kctx) |
---|
2048 | | -{ |
---|
2049 | | - struct kbase_csf_event *event, *next_event; |
---|
2050 | | - unsigned long flags; |
---|
2051 | | - |
---|
2052 | | - spin_lock_irqsave(&kctx->csf.event_lock, flags); |
---|
2053 | | - |
---|
2054 | | - list_for_each_entry_safe( |
---|
2055 | | - event, next_event, &kctx->csf.event_callback_list, link) { |
---|
2056 | | - list_del(&event->link); |
---|
2057 | | - dev_dbg(kctx->kbdev->dev, |
---|
2058 | | - "Removed event handler %pK with param %pK\n", |
---|
2059 | | - (void *)event, event->param); |
---|
2060 | | - kfree(event); |
---|
2061 | | - } |
---|
2062 | | - |
---|
2063 | | - spin_unlock_irqrestore(&kctx->csf.event_lock, flags); |
---|
2064 | 1880 | } |
---|
2065 | 1881 | |
---|
2066 | 1882 | /** |
---|
2067 | 1883 | * handle_oom_event - Handle the OoM event generated by the firmware for the |
---|
2068 | 1884 | * CSI. |
---|
2069 | 1885 | * |
---|
| 1886 | + * @group: Pointer to the CSG group the oom-event belongs to. |
---|
| 1887 | + * @stream: Pointer to the structure containing info provided by the firmware |
---|
| 1888 | + * about the CSI. |
---|
| 1889 | + * |
---|
2070 | 1890 | * This function will handle the OoM event request from the firmware for the |
---|
2071 | 1891 | * CS. It will retrieve the address of heap context and heap's |
---|
2072 | 1892 | * statistics (like number of render passes in-flight) from the CS's kernel |
---|
2073 | | - * kernel output page and pass them to the tiler heap function to allocate a |
---|
| 1893 | + * output page and pass them to the tiler heap function to allocate a |
---|
2074 | 1894 | * new chunk. |
---|
2075 | 1895 | * It will also update the CS's kernel input page with the address |
---|
2076 | 1896 | * of a new chunk that was allocated. |
---|
2077 | 1897 | * |
---|
2078 | | - * @kctx: Pointer to the kbase context in which the tiler heap was initialized. |
---|
2079 | | - * @stream: Pointer to the structure containing info provided by the firmware |
---|
2080 | | - * about the CSI. |
---|
2081 | | - * |
---|
2082 | 1898 | * Return: 0 if successfully handled the request, otherwise a negative error |
---|
2083 | 1899 | * code on failure. |
---|
2084 | 1900 | */ |
---|
2085 | | -static int handle_oom_event(struct kbase_context *const kctx, |
---|
2086 | | - struct kbase_csf_cmd_stream_info const *const stream) |
---|
| 1901 | +static int handle_oom_event(struct kbase_queue_group *const group, |
---|
| 1902 | + struct kbase_csf_cmd_stream_info const *const stream) |
---|
2087 | 1903 | { |
---|
| 1904 | + struct kbase_context *const kctx = group->kctx; |
---|
2088 | 1905 | u64 gpu_heap_va = |
---|
2089 | 1906 | kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | |
---|
2090 | 1907 | ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); |
---|
.. | .. |
---|
2098 | 1915 | u32 pending_frag_count; |
---|
2099 | 1916 | u64 new_chunk_ptr; |
---|
2100 | 1917 | int err; |
---|
| 1918 | + bool frag_end_err = false; |
---|
2101 | 1919 | |
---|
2102 | 1920 | if ((frag_end > vt_end) || (vt_end >= vt_start)) { |
---|
2103 | | - dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", |
---|
| 1921 | + frag_end_err = true; |
---|
| 1922 | + dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", |
---|
2104 | 1923 | vt_start, vt_end, frag_end); |
---|
2105 | | - return -EINVAL; |
---|
2106 | 1924 | } |
---|
2107 | | - |
---|
2108 | | - renderpasses_in_flight = vt_start - frag_end; |
---|
2109 | | - pending_frag_count = vt_end - frag_end; |
---|
| 1925 | + if (frag_end_err) { |
---|
| 1926 | + renderpasses_in_flight = 1; |
---|
| 1927 | + pending_frag_count = 1; |
---|
| 1928 | + } else { |
---|
| 1929 | + renderpasses_in_flight = vt_start - frag_end; |
---|
| 1930 | + pending_frag_count = vt_end - frag_end; |
---|
| 1931 | + } |
---|
2110 | 1932 | |
---|
2111 | 1933 | err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, |
---|
2112 | 1934 | gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr); |
---|
2113 | 1935 | |
---|
2114 | | - /* It is okay to acknowledge with a NULL chunk (firmware will then wait |
---|
2115 | | - * for the fragment jobs to complete and release chunks) |
---|
2116 | | - */ |
---|
2117 | | - if (err == -EBUSY) |
---|
| 1936 | + if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) && |
---|
| 1937 | + (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) { |
---|
| 1938 | + /* The group allows incremental rendering, trigger it */ |
---|
2118 | 1939 | new_chunk_ptr = 0; |
---|
2119 | | - else if (err) |
---|
| 1940 | + dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n", |
---|
| 1941 | + group->handle, group->csg_nr); |
---|
| 1942 | + } else if (err == -EBUSY) { |
---|
| 1943 | + /* Acknowledge with a NULL chunk (firmware will then wait for |
---|
| 1944 | + * the fragment jobs to complete and release chunks) |
---|
| 1945 | + */ |
---|
| 1946 | + new_chunk_ptr = 0; |
---|
| 1947 | + } else if (err) |
---|
2120 | 1948 | return err; |
---|
2121 | 1949 | |
---|
2122 | 1950 | kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, |
---|
.. | .. |
---|
2149 | 1977 | BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, |
---|
2150 | 1978 | } } } }; |
---|
2151 | 1979 | |
---|
2152 | | - add_error(group->kctx, &group->error_tiler_oom, &error); |
---|
| 1980 | + kbase_csf_event_add_error(group->kctx, |
---|
| 1981 | + &group->error_tiler_oom, |
---|
| 1982 | + &error); |
---|
2153 | 1983 | kbase_event_wakeup(group->kctx); |
---|
| 1984 | +} |
---|
| 1985 | + |
---|
| 1986 | +static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) |
---|
| 1987 | +{ |
---|
| 1988 | + int err; |
---|
| 1989 | + const unsigned int cache_flush_wait_timeout_ms = 2000; |
---|
| 1990 | + |
---|
| 1991 | + kbase_pm_lock(kbdev); |
---|
| 1992 | + /* With the advent of partial cache flush, dirty cache lines could |
---|
| 1993 | + * be left in the GPU L2 caches by terminating the queue group here |
---|
| 1994 | + * without waiting for proper cache maintenance. A full cache flush |
---|
| 1995 | + * here will prevent these dirty cache lines from being arbitrarily |
---|
| 1996 | + * evicted later and possible causing memory corruption. |
---|
| 1997 | + */ |
---|
| 1998 | + if (kbdev->pm.backend.gpu_powered) { |
---|
| 1999 | + kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); |
---|
| 2000 | + err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms); |
---|
| 2001 | + |
---|
| 2002 | + if (err) { |
---|
| 2003 | + dev_warn( |
---|
| 2004 | + kbdev->dev, |
---|
| 2005 | + "[%llu] Timeout waiting for cache clean to complete after fatal error", |
---|
| 2006 | + kbase_backend_get_cycle_cnt(kbdev)); |
---|
| 2007 | + |
---|
| 2008 | + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) |
---|
| 2009 | + kbase_reset_gpu(kbdev); |
---|
| 2010 | + } |
---|
| 2011 | + } |
---|
| 2012 | + |
---|
| 2013 | + kbase_pm_unlock(kbdev); |
---|
2154 | 2014 | } |
---|
2155 | 2015 | |
---|
2156 | 2016 | /** |
---|
.. | .. |
---|
2165 | 2025 | * notification to allow the firmware to report out-of-memory again in future. |
---|
2166 | 2026 | * If the out-of-memory condition was successfully handled then this function |
---|
2167 | 2027 | * rings the relevant doorbell to notify the firmware; otherwise, it terminates |
---|
2168 | | - * the GPU command queue group to which the queue is bound. See |
---|
2169 | | - * term_queue_group() for details. |
---|
| 2028 | + * the GPU command queue group to which the queue is bound and notify a waiting |
---|
| 2029 | + * user space client of the failure. |
---|
2170 | 2030 | */ |
---|
2171 | 2031 | static void kbase_queue_oom_event(struct kbase_queue *const queue) |
---|
2172 | 2032 | { |
---|
.. | .. |
---|
2178 | 2038 | struct kbase_csf_cmd_stream_info const *stream; |
---|
2179 | 2039 | int csi_index = queue->csi_index; |
---|
2180 | 2040 | u32 cs_oom_ack, cs_oom_req; |
---|
| 2041 | + unsigned long flags; |
---|
2181 | 2042 | |
---|
2182 | 2043 | lockdep_assert_held(&kctx->csf.lock); |
---|
2183 | 2044 | |
---|
.. | .. |
---|
2221 | 2082 | if (cs_oom_ack == cs_oom_req) |
---|
2222 | 2083 | goto unlock; |
---|
2223 | 2084 | |
---|
2224 | | - err = handle_oom_event(kctx, stream); |
---|
| 2085 | + err = handle_oom_event(group, stream); |
---|
2225 | 2086 | |
---|
| 2087 | + kbase_csf_scheduler_spin_lock(kbdev, &flags); |
---|
2226 | 2088 | kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, |
---|
2227 | 2089 | CS_REQ_TILER_OOM_MASK); |
---|
| 2090 | + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); |
---|
| 2091 | + kbase_csf_scheduler_spin_unlock(kbdev, flags); |
---|
2228 | 2092 | |
---|
2229 | | - if (err) { |
---|
| 2093 | + if (unlikely(err)) { |
---|
2230 | 2094 | dev_warn( |
---|
2231 | 2095 | kbdev->dev, |
---|
2232 | 2096 | "Queue group to be terminated, couldn't handle the OoM event\n"); |
---|
| 2097 | + kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM); |
---|
2233 | 2098 | kbase_csf_scheduler_unlock(kbdev); |
---|
2234 | 2099 | term_queue_group(group); |
---|
| 2100 | + flush_gpu_cache_on_fatal_error(kbdev); |
---|
2235 | 2101 | report_tiler_oom_error(group); |
---|
2236 | 2102 | return; |
---|
2237 | 2103 | } |
---|
2238 | | - |
---|
2239 | | - kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); |
---|
2240 | 2104 | unlock: |
---|
2241 | 2105 | kbase_csf_scheduler_unlock(kbdev); |
---|
2242 | 2106 | } |
---|
.. | .. |
---|
2258 | 2122 | struct kbase_device *const kbdev = kctx->kbdev; |
---|
2259 | 2123 | |
---|
2260 | 2124 | int err = kbase_reset_gpu_try_prevent(kbdev); |
---|
| 2125 | + |
---|
2261 | 2126 | /* Regardless of whether reset failed or is currently happening, exit |
---|
2262 | 2127 | * early |
---|
2263 | 2128 | */ |
---|
.. | .. |
---|
2294 | 2159 | "Notify the event notification thread, forward progress timeout (%llu cycles)\n", |
---|
2295 | 2160 | kbase_csf_timeout_get(group->kctx->kbdev)); |
---|
2296 | 2161 | |
---|
2297 | | - add_error(group->kctx, &group->error_timeout, &error); |
---|
| 2162 | + kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error); |
---|
2298 | 2163 | kbase_event_wakeup(group->kctx); |
---|
2299 | 2164 | } |
---|
2300 | 2165 | |
---|
.. | .. |
---|
2310 | 2175 | struct kbase_queue_group *const group = |
---|
2311 | 2176 | container_of(data, struct kbase_queue_group, timer_event_work); |
---|
2312 | 2177 | struct kbase_context *const kctx = group->kctx; |
---|
| 2178 | + struct kbase_device *const kbdev = kctx->kbdev; |
---|
2313 | 2179 | bool reset_prevented = false; |
---|
2314 | | - int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev); |
---|
| 2180 | + int err = kbase_reset_gpu_prevent_and_wait(kbdev); |
---|
2315 | 2181 | |
---|
2316 | 2182 | if (err) |
---|
2317 | 2183 | dev_warn( |
---|
2318 | | - kctx->kbdev->dev, |
---|
| 2184 | + kbdev->dev, |
---|
2319 | 2185 | "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless", |
---|
2320 | 2186 | group->handle); |
---|
2321 | 2187 | else |
---|
.. | .. |
---|
2324 | 2190 | mutex_lock(&kctx->csf.lock); |
---|
2325 | 2191 | |
---|
2326 | 2192 | term_queue_group(group); |
---|
| 2193 | + flush_gpu_cache_on_fatal_error(kbdev); |
---|
2327 | 2194 | report_group_timeout_error(group); |
---|
2328 | 2195 | |
---|
2329 | 2196 | mutex_unlock(&kctx->csf.lock); |
---|
2330 | 2197 | if (reset_prevented) |
---|
2331 | | - kbase_reset_gpu_allow(kctx->kbdev); |
---|
| 2198 | + kbase_reset_gpu_allow(kbdev); |
---|
2332 | 2199 | } |
---|
2333 | 2200 | |
---|
2334 | 2201 | /** |
---|
.. | .. |
---|
2336 | 2203 | * |
---|
2337 | 2204 | * @group: Pointer to GPU queue group for which the timeout event is received. |
---|
2338 | 2205 | * |
---|
| 2206 | + * Notify a waiting user space client of the timeout. |
---|
2339 | 2207 | * Enqueue a work item to terminate the group and notify the event notification |
---|
2340 | 2208 | * thread of progress timeout fault for the GPU command queue group. |
---|
2341 | 2209 | */ |
---|
2342 | 2210 | static void handle_progress_timer_event(struct kbase_queue_group *const group) |
---|
2343 | 2211 | { |
---|
| 2212 | + kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, |
---|
| 2213 | + DF_PROGRESS_TIMER_TIMEOUT); |
---|
| 2214 | + |
---|
2344 | 2215 | queue_work(group->kctx->csf.wq, &group->timer_event_work); |
---|
| 2216 | +} |
---|
| 2217 | + |
---|
| 2218 | +/** |
---|
| 2219 | + * alloc_grp_protected_suspend_buffer_pages() - Allocate physical pages from the protected |
---|
| 2220 | + * memory for the protected mode suspend buffer. |
---|
| 2221 | + * @group: Pointer to the GPU queue group. |
---|
| 2222 | + * |
---|
| 2223 | + * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise |
---|
| 2224 | + * negative error value. |
---|
| 2225 | + */ |
---|
| 2226 | +static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group) |
---|
| 2227 | +{ |
---|
| 2228 | + struct kbase_device *const kbdev = group->kctx->kbdev; |
---|
| 2229 | + struct kbase_context *kctx = group->kctx; |
---|
| 2230 | + struct tagged_addr *phys = NULL; |
---|
| 2231 | + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; |
---|
| 2232 | + size_t nr_pages; |
---|
| 2233 | + int err = 0; |
---|
| 2234 | + |
---|
| 2235 | + if (likely(sbuf->pma)) |
---|
| 2236 | + return 0; |
---|
| 2237 | + |
---|
| 2238 | + nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); |
---|
| 2239 | + phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); |
---|
| 2240 | + if (unlikely(!phys)) { |
---|
| 2241 | + err = -ENOMEM; |
---|
| 2242 | + goto phys_free; |
---|
| 2243 | + } |
---|
| 2244 | + |
---|
| 2245 | + mutex_lock(&kctx->csf.lock); |
---|
| 2246 | + kbase_csf_scheduler_lock(kbdev); |
---|
| 2247 | + |
---|
| 2248 | + if (unlikely(!group->csg_reg)) { |
---|
| 2249 | + /* The only chance of the bound csg_reg is removed from the group is |
---|
| 2250 | + * that it has been put off slot by the scheduler and the csg_reg resource |
---|
| 2251 | + * is contended by other groups. In this case, it needs another occasion for |
---|
| 2252 | + * mapping the pma, which needs a bound csg_reg. Since the group is already |
---|
| 2253 | + * off-slot, returning no error is harmless as the scheduler, when place the |
---|
| 2254 | + * group back on-slot again would do the required MMU map operation on the |
---|
| 2255 | + * allocated and retained pma. |
---|
| 2256 | + */ |
---|
| 2257 | + WARN_ON(group->csg_nr >= 0); |
---|
| 2258 | + dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode", |
---|
| 2259 | + group->kctx->tgid, group->kctx->id, group->handle); |
---|
| 2260 | + goto unlock; |
---|
| 2261 | + } |
---|
| 2262 | + |
---|
| 2263 | + /* Allocate the protected mode pages */ |
---|
| 2264 | + sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true); |
---|
| 2265 | + if (unlikely(!sbuf->pma)) { |
---|
| 2266 | + err = -ENOMEM; |
---|
| 2267 | + goto unlock; |
---|
| 2268 | + } |
---|
| 2269 | + |
---|
| 2270 | + /* Map the bound susp_reg to the just allocated pma pages */ |
---|
| 2271 | + err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group); |
---|
| 2272 | + |
---|
| 2273 | +unlock: |
---|
| 2274 | + kbase_csf_scheduler_unlock(kbdev); |
---|
| 2275 | + mutex_unlock(&kctx->csf.lock); |
---|
| 2276 | +phys_free: |
---|
| 2277 | + kfree(phys); |
---|
| 2278 | + return err; |
---|
| 2279 | +} |
---|
| 2280 | + |
---|
| 2281 | +static void report_group_fatal_error(struct kbase_queue_group *const group) |
---|
| 2282 | +{ |
---|
| 2283 | + struct base_gpu_queue_group_error const |
---|
| 2284 | + err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, |
---|
| 2285 | + .payload = { .fatal_group = { |
---|
| 2286 | + .status = GPU_EXCEPTION_TYPE_SW_FAULT_0, |
---|
| 2287 | + } } }; |
---|
| 2288 | + |
---|
| 2289 | + kbase_csf_add_group_fatal_error(group, &err_payload); |
---|
| 2290 | + kbase_event_wakeup(group->kctx); |
---|
2345 | 2291 | } |
---|
2346 | 2292 | |
---|
2347 | 2293 | /** |
---|
.. | .. |
---|
2356 | 2302 | { |
---|
2357 | 2303 | struct kbase_queue_group *const group = |
---|
2358 | 2304 | container_of(data, struct kbase_queue_group, protm_event_work); |
---|
| 2305 | + struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf; |
---|
| 2306 | + int err = 0; |
---|
2359 | 2307 | |
---|
2360 | | - KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN, |
---|
| 2308 | + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, |
---|
2361 | 2309 | group, 0u); |
---|
2362 | | - kbase_csf_scheduler_group_protm_enter(group); |
---|
| 2310 | + |
---|
| 2311 | + err = alloc_grp_protected_suspend_buffer_pages(group); |
---|
| 2312 | + if (!err) { |
---|
| 2313 | + kbase_csf_scheduler_group_protm_enter(group); |
---|
| 2314 | + } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) { |
---|
| 2315 | + sbuf->alloc_retries++; |
---|
| 2316 | + /* try again to allocate pages */ |
---|
| 2317 | + queue_work(group->kctx->csf.wq, &group->protm_event_work); |
---|
| 2318 | + } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) { |
---|
| 2319 | + dev_err(group->kctx->kbdev->dev, |
---|
| 2320 | + "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d", |
---|
| 2321 | + group->handle, group->kctx->tgid, group->kctx->id); |
---|
| 2322 | + report_group_fatal_error(group); |
---|
| 2323 | + } |
---|
| 2324 | + |
---|
2363 | 2325 | KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, |
---|
2364 | 2326 | group, 0u); |
---|
2365 | | -} |
---|
2366 | | - |
---|
2367 | | -static void report_queue_fatal_error(struct kbase_queue *const queue, |
---|
2368 | | - u32 cs_fatal, u64 cs_fatal_info, |
---|
2369 | | - u8 group_handle) |
---|
2370 | | -{ |
---|
2371 | | - struct base_csf_notification error = |
---|
2372 | | - { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, |
---|
2373 | | - .payload = { |
---|
2374 | | - .csg_error = { |
---|
2375 | | - .handle = group_handle, |
---|
2376 | | - .error = { |
---|
2377 | | - .error_type = |
---|
2378 | | - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, |
---|
2379 | | - .payload = { |
---|
2380 | | - .fatal_queue = { |
---|
2381 | | - .sideband = |
---|
2382 | | - cs_fatal_info, |
---|
2383 | | - .status = cs_fatal, |
---|
2384 | | - .csi_index = |
---|
2385 | | - queue->csi_index, |
---|
2386 | | - } } } } } }; |
---|
2387 | | - |
---|
2388 | | - add_error(queue->kctx, &queue->error, &error); |
---|
2389 | | - kbase_event_wakeup(queue->kctx); |
---|
2390 | 2327 | } |
---|
2391 | 2328 | |
---|
2392 | 2329 | /** |
---|
2393 | 2330 | * handle_fault_event - Handler for CS fault. |
---|
2394 | 2331 | * |
---|
2395 | 2332 | * @queue: Pointer to queue for which fault event was received. |
---|
2396 | | - * @stream: Pointer to the structure containing info provided by the |
---|
2397 | | - * firmware about the CSI. |
---|
| 2333 | + * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for |
---|
| 2334 | + * the queue. |
---|
2398 | 2335 | * |
---|
2399 | | - * Prints meaningful CS fault information. |
---|
2400 | | - * |
---|
| 2336 | + * Print required information about the CS fault and notify the user space client |
---|
| 2337 | + * about the fault. |
---|
2401 | 2338 | */ |
---|
2402 | 2339 | static void |
---|
2403 | | -handle_fault_event(struct kbase_queue *const queue, |
---|
2404 | | - struct kbase_csf_cmd_stream_info const *const stream) |
---|
| 2340 | +handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) |
---|
2405 | 2341 | { |
---|
| 2342 | + struct kbase_device *const kbdev = queue->kctx->kbdev; |
---|
| 2343 | + struct kbase_csf_cmd_stream_group_info const *ginfo = |
---|
| 2344 | + &kbdev->csf.global_iface.groups[queue->group->csg_nr]; |
---|
| 2345 | + struct kbase_csf_cmd_stream_info const *stream = |
---|
| 2346 | + &ginfo->streams[queue->csi_index]; |
---|
2406 | 2347 | const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT); |
---|
2407 | 2348 | const u64 cs_fault_info = |
---|
2408 | 2349 | kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) | |
---|
.. | .. |
---|
2414 | 2355 | CS_FAULT_EXCEPTION_DATA_GET(cs_fault); |
---|
2415 | 2356 | const u64 cs_fault_info_exception_data = |
---|
2416 | 2357 | CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); |
---|
2417 | | - struct kbase_device *const kbdev = queue->kctx->kbdev; |
---|
2418 | 2358 | |
---|
2419 | 2359 | kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
2420 | 2360 | |
---|
.. | .. |
---|
2429 | 2369 | kbase_gpu_exception_name(cs_fault_exception_type), |
---|
2430 | 2370 | cs_fault_exception_data, cs_fault_info_exception_data); |
---|
2431 | 2371 | |
---|
2432 | | - if (cs_fault_exception_type == |
---|
2433 | | - CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT) |
---|
2434 | | - report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, |
---|
2435 | | - 0, queue->group->handle); |
---|
| 2372 | + |
---|
| 2373 | +#if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
| 2374 | + /* CS_RESOURCE_TERMINATED type fault event can be ignored from the |
---|
| 2375 | + * standpoint of dump on error. It is used to report fault for the CSIs |
---|
| 2376 | + * that are associated with the same CSG as the CSI for which the actual |
---|
| 2377 | + * fault was reported by the Iterator. |
---|
| 2378 | + * Dumping would be triggered when the actual fault is reported. |
---|
| 2379 | + * |
---|
| 2380 | + * CS_INHERIT_FAULT can also be ignored. It could happen due to the error |
---|
| 2381 | + * in other types of queues (cpu/kcpu). If a fault had occurred in some |
---|
| 2382 | + * other GPU queue then the dump would have been performed anyways when |
---|
| 2383 | + * that fault was reported. |
---|
| 2384 | + */ |
---|
| 2385 | + if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && |
---|
| 2386 | + (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { |
---|
| 2387 | + if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { |
---|
| 2388 | + get_queue(queue); |
---|
| 2389 | + queue->cs_error = cs_fault; |
---|
| 2390 | + queue->cs_error_info = cs_fault_info; |
---|
| 2391 | + queue->cs_error_fatal = false; |
---|
| 2392 | + if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) |
---|
| 2393 | + release_queue(queue); |
---|
| 2394 | + return; |
---|
| 2395 | + } |
---|
| 2396 | + } |
---|
| 2397 | +#endif |
---|
| 2398 | + |
---|
| 2399 | + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, |
---|
| 2400 | + CS_REQ_FAULT_MASK); |
---|
| 2401 | + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); |
---|
| 2402 | +} |
---|
| 2403 | + |
---|
| 2404 | +static void report_queue_fatal_error(struct kbase_queue *const queue, |
---|
| 2405 | + u32 cs_fatal, u64 cs_fatal_info, |
---|
| 2406 | + u8 group_handle) |
---|
| 2407 | +{ |
---|
| 2408 | + struct base_csf_notification error = { |
---|
| 2409 | + .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, |
---|
| 2410 | + .payload = { |
---|
| 2411 | + .csg_error = { |
---|
| 2412 | + .handle = group_handle, |
---|
| 2413 | + .error = { |
---|
| 2414 | + .error_type = |
---|
| 2415 | + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, |
---|
| 2416 | + .payload = { |
---|
| 2417 | + .fatal_queue = { |
---|
| 2418 | + .sideband = cs_fatal_info, |
---|
| 2419 | + .status = cs_fatal, |
---|
| 2420 | + .csi_index = queue->csi_index, |
---|
| 2421 | + } |
---|
| 2422 | + } |
---|
| 2423 | + } |
---|
| 2424 | + } |
---|
| 2425 | + } |
---|
| 2426 | + }; |
---|
| 2427 | + |
---|
| 2428 | + kbase_csf_event_add_error(queue->kctx, &queue->error, &error); |
---|
| 2429 | + kbase_event_wakeup(queue->kctx); |
---|
2436 | 2430 | } |
---|
2437 | 2431 | |
---|
2438 | 2432 | /** |
---|
2439 | | - * fatal_event_worker - Handle the fatal error for the GPU queue |
---|
| 2433 | + * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue |
---|
2440 | 2434 | * |
---|
2441 | 2435 | * @data: Pointer to a work_struct embedded in GPU command queue. |
---|
2442 | 2436 | * |
---|
2443 | 2437 | * Terminate the CSG and report the error to userspace. |
---|
2444 | 2438 | */ |
---|
2445 | | -static void fatal_event_worker(struct work_struct *const data) |
---|
| 2439 | +static void cs_error_worker(struct work_struct *const data) |
---|
2446 | 2440 | { |
---|
2447 | 2441 | struct kbase_queue *const queue = |
---|
2448 | | - container_of(data, struct kbase_queue, fatal_event_work); |
---|
| 2442 | + container_of(data, struct kbase_queue, cs_error_work); |
---|
2449 | 2443 | struct kbase_context *const kctx = queue->kctx; |
---|
2450 | 2444 | struct kbase_device *const kbdev = kctx->kbdev; |
---|
2451 | 2445 | struct kbase_queue_group *group; |
---|
2452 | 2446 | u8 group_handle; |
---|
2453 | 2447 | bool reset_prevented = false; |
---|
2454 | | - int err = kbase_reset_gpu_prevent_and_wait(kbdev); |
---|
| 2448 | + int err; |
---|
| 2449 | + |
---|
| 2450 | + kbase_debug_csf_fault_wait_completion(kbdev); |
---|
| 2451 | + err = kbase_reset_gpu_prevent_and_wait(kbdev); |
---|
2455 | 2452 | |
---|
2456 | 2453 | if (err) |
---|
2457 | 2454 | dev_warn( |
---|
.. | .. |
---|
2468 | 2465 | goto unlock; |
---|
2469 | 2466 | } |
---|
2470 | 2467 | |
---|
| 2468 | +#if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
| 2469 | + if (!queue->cs_error_fatal) { |
---|
| 2470 | + unsigned long flags; |
---|
| 2471 | + int slot_num; |
---|
| 2472 | + |
---|
| 2473 | + kbase_csf_scheduler_spin_lock(kbdev, &flags); |
---|
| 2474 | + slot_num = kbase_csf_scheduler_group_get_slot_locked(group); |
---|
| 2475 | + if (slot_num >= 0) { |
---|
| 2476 | + struct kbase_csf_cmd_stream_group_info const *ginfo = |
---|
| 2477 | + &kbdev->csf.global_iface.groups[slot_num]; |
---|
| 2478 | + struct kbase_csf_cmd_stream_info const *stream = |
---|
| 2479 | + &ginfo->streams[queue->csi_index]; |
---|
| 2480 | + u32 const cs_ack = |
---|
| 2481 | + kbase_csf_firmware_cs_output(stream, CS_ACK); |
---|
| 2482 | + |
---|
| 2483 | + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, |
---|
| 2484 | + CS_REQ_FAULT_MASK); |
---|
| 2485 | + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, |
---|
| 2486 | + slot_num, true); |
---|
| 2487 | + } |
---|
| 2488 | + kbase_csf_scheduler_spin_unlock(kbdev, flags); |
---|
| 2489 | + goto unlock; |
---|
| 2490 | + } |
---|
| 2491 | +#endif |
---|
| 2492 | + |
---|
2471 | 2493 | group_handle = group->handle; |
---|
2472 | 2494 | term_queue_group(group); |
---|
2473 | | - report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info, |
---|
| 2495 | + flush_gpu_cache_on_fatal_error(kbdev); |
---|
| 2496 | + report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, |
---|
2474 | 2497 | group_handle); |
---|
2475 | 2498 | |
---|
2476 | 2499 | unlock: |
---|
.. | .. |
---|
2486 | 2509 | * @queue: Pointer to queue for which fatal event was received. |
---|
2487 | 2510 | * @stream: Pointer to the structure containing info provided by the |
---|
2488 | 2511 | * firmware about the CSI. |
---|
| 2512 | + * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for |
---|
| 2513 | + * the queue. |
---|
2489 | 2514 | * |
---|
2490 | | - * Prints meaningful CS fatal information. |
---|
| 2515 | + * Notify a waiting user space client of the CS fatal and prints meaningful |
---|
| 2516 | + * information. |
---|
2491 | 2517 | * Enqueue a work item to terminate the group and report the fatal error |
---|
2492 | 2518 | * to user space. |
---|
2493 | 2519 | */ |
---|
2494 | 2520 | static void |
---|
2495 | 2521 | handle_fatal_event(struct kbase_queue *const queue, |
---|
2496 | | - struct kbase_csf_cmd_stream_info const *const stream) |
---|
| 2522 | + struct kbase_csf_cmd_stream_info const *const stream, |
---|
| 2523 | + u32 cs_ack) |
---|
2497 | 2524 | { |
---|
2498 | 2525 | const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL); |
---|
2499 | 2526 | const u64 cs_fatal_info = |
---|
.. | .. |
---|
2523 | 2550 | |
---|
2524 | 2551 | if (cs_fatal_exception_type == |
---|
2525 | 2552 | CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { |
---|
| 2553 | + kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR); |
---|
2526 | 2554 | queue_work(system_wq, &kbdev->csf.fw_error_work); |
---|
2527 | 2555 | } else { |
---|
| 2556 | + kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL); |
---|
| 2557 | + if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) { |
---|
| 2558 | + queue->group->cs_unrecoverable = true; |
---|
| 2559 | + if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) |
---|
| 2560 | + kbase_reset_gpu(queue->kctx->kbdev); |
---|
| 2561 | + } |
---|
2528 | 2562 | get_queue(queue); |
---|
2529 | | - queue->cs_fatal = cs_fatal; |
---|
2530 | | - queue->cs_fatal_info = cs_fatal_info; |
---|
2531 | | - if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work)) |
---|
| 2563 | + queue->cs_error = cs_fatal; |
---|
| 2564 | + queue->cs_error_info = cs_fatal_info; |
---|
| 2565 | + queue->cs_error_fatal = true; |
---|
| 2566 | + if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) |
---|
2532 | 2567 | release_queue(queue); |
---|
2533 | 2568 | } |
---|
2534 | | -} |
---|
2535 | 2569 | |
---|
2536 | | -/** |
---|
2537 | | - * handle_queue_exception_event - Handler for CS fatal/fault exception events. |
---|
2538 | | - * |
---|
2539 | | - * @queue: Pointer to queue for which fatal/fault event was received. |
---|
2540 | | - * @cs_req: Value of the CS_REQ register from the CS's input page. |
---|
2541 | | - * @cs_ack: Value of the CS_ACK register from the CS's output page. |
---|
2542 | | - */ |
---|
2543 | | -static void handle_queue_exception_event(struct kbase_queue *const queue, |
---|
2544 | | - const u32 cs_req, const u32 cs_ack) |
---|
2545 | | -{ |
---|
2546 | | - struct kbase_csf_cmd_stream_group_info const *ginfo; |
---|
2547 | | - struct kbase_csf_cmd_stream_info const *stream; |
---|
2548 | | - struct kbase_context *const kctx = queue->kctx; |
---|
2549 | | - struct kbase_device *const kbdev = kctx->kbdev; |
---|
2550 | | - struct kbase_queue_group *group = queue->group; |
---|
2551 | | - int csi_index = queue->csi_index; |
---|
2552 | | - int slot_num = group->csg_nr; |
---|
| 2570 | + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, |
---|
| 2571 | + CS_REQ_FATAL_MASK); |
---|
2553 | 2572 | |
---|
2554 | | - kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
2555 | | - |
---|
2556 | | - ginfo = &kbdev->csf.global_iface.groups[slot_num]; |
---|
2557 | | - stream = &ginfo->streams[csi_index]; |
---|
2558 | | - |
---|
2559 | | - if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { |
---|
2560 | | - handle_fatal_event(queue, stream); |
---|
2561 | | - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, |
---|
2562 | | - CS_REQ_FATAL_MASK); |
---|
2563 | | - } |
---|
2564 | | - |
---|
2565 | | - if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { |
---|
2566 | | - handle_fault_event(queue, stream); |
---|
2567 | | - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, |
---|
2568 | | - CS_REQ_FAULT_MASK); |
---|
2569 | | - kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); |
---|
2570 | | - } |
---|
2571 | 2573 | } |
---|
2572 | 2574 | |
---|
2573 | 2575 | /** |
---|
.. | .. |
---|
2577 | 2579 | * @ginfo: The CSG interface provided by the firmware. |
---|
2578 | 2580 | * @irqreq: CSG's IRQ request bitmask (one bit per CS). |
---|
2579 | 2581 | * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS). |
---|
| 2582 | + * @track: Pointer that tracks the highest scanout priority idle CSG |
---|
| 2583 | + * and any newly potentially viable protected mode requesting |
---|
| 2584 | + * CSG in current IRQ context. |
---|
2580 | 2585 | * |
---|
2581 | 2586 | * If the interrupt request bitmask differs from the acknowledge bitmask |
---|
2582 | 2587 | * then the firmware is notifying the host of an event concerning those |
---|
.. | .. |
---|
2585 | 2590 | * the request and acknowledge registers for the individual CS(s). |
---|
2586 | 2591 | */ |
---|
2587 | 2592 | static void process_cs_interrupts(struct kbase_queue_group *const group, |
---|
2588 | | - struct kbase_csf_cmd_stream_group_info const *const ginfo, |
---|
2589 | | - u32 const irqreq, u32 const irqack) |
---|
| 2593 | + struct kbase_csf_cmd_stream_group_info const *const ginfo, |
---|
| 2594 | + u32 const irqreq, u32 const irqack, |
---|
| 2595 | + struct irq_idle_and_protm_track *track) |
---|
2590 | 2596 | { |
---|
2591 | 2597 | struct kbase_device *const kbdev = group->kctx->kbdev; |
---|
2592 | 2598 | u32 remaining = irqreq ^ irqack; |
---|
.. | .. |
---|
2616 | 2622 | kbase_csf_firmware_cs_output(stream, CS_ACK); |
---|
2617 | 2623 | struct workqueue_struct *wq = group->kctx->csf.wq; |
---|
2618 | 2624 | |
---|
2619 | | - if ((cs_req & CS_REQ_EXCEPTION_MASK) ^ |
---|
2620 | | - (cs_ack & CS_ACK_EXCEPTION_MASK)) { |
---|
2621 | | - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack); |
---|
2622 | | - handle_queue_exception_event(queue, cs_req, cs_ack); |
---|
| 2625 | + if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { |
---|
| 2626 | + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, |
---|
| 2627 | + group, queue, cs_req ^ cs_ack); |
---|
| 2628 | + handle_fatal_event(queue, stream, cs_ack); |
---|
| 2629 | + } |
---|
| 2630 | + |
---|
| 2631 | + if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { |
---|
| 2632 | + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, |
---|
| 2633 | + group, queue, cs_req ^ cs_ack); |
---|
| 2634 | + handle_fault_event(queue, cs_ack); |
---|
2623 | 2635 | } |
---|
2624 | 2636 | |
---|
2625 | 2637 | /* PROTM_PEND and TILER_OOM can be safely ignored |
---|
.. | .. |
---|
2630 | 2642 | u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK; |
---|
2631 | 2643 | u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK; |
---|
2632 | 2644 | |
---|
2633 | | - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND, |
---|
2634 | | - group, queue, cs_req_remain ^ cs_ack_remain); |
---|
| 2645 | + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, |
---|
| 2646 | + CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED, |
---|
| 2647 | + group, queue, |
---|
| 2648 | + cs_req_remain ^ cs_ack_remain); |
---|
2635 | 2649 | continue; |
---|
2636 | 2650 | } |
---|
2637 | 2651 | |
---|
2638 | 2652 | if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ |
---|
2639 | 2653 | (cs_ack & CS_ACK_TILER_OOM_MASK))) { |
---|
2640 | 2654 | get_queue(queue); |
---|
2641 | | - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue, |
---|
2642 | | - cs_req ^ cs_ack); |
---|
2643 | | - if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) { |
---|
| 2655 | + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, |
---|
| 2656 | + group, queue, cs_req ^ cs_ack); |
---|
| 2657 | + if (!queue_work(wq, &queue->oom_event_work)) { |
---|
2644 | 2658 | /* The work item shall not have been |
---|
2645 | 2659 | * already queued, there can be only |
---|
2646 | 2660 | * one pending OoM event for a |
---|
2647 | 2661 | * queue. |
---|
2648 | 2662 | */ |
---|
| 2663 | + dev_warn( |
---|
| 2664 | + kbdev->dev, |
---|
| 2665 | + "Tiler OOM work pending: queue %d group %d (ctx %d_%d)", |
---|
| 2666 | + queue->csi_index, group->handle, queue->kctx->tgid, |
---|
| 2667 | + queue->kctx->id); |
---|
2649 | 2668 | release_queue(queue); |
---|
2650 | 2669 | } |
---|
2651 | 2670 | } |
---|
2652 | 2671 | |
---|
2653 | 2672 | if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ |
---|
2654 | 2673 | (cs_ack & CS_ACK_PROTM_PEND_MASK)) { |
---|
2655 | | - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue, |
---|
2656 | | - cs_req ^ cs_ack); |
---|
| 2674 | + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND, |
---|
| 2675 | + group, queue, cs_req ^ cs_ack); |
---|
2657 | 2676 | |
---|
2658 | 2677 | dev_dbg(kbdev->dev, |
---|
2659 | 2678 | "Protected mode entry request for queue on csi %d bound to group-%d on slot %d", |
---|
.. | .. |
---|
2661 | 2680 | group->csg_nr); |
---|
2662 | 2681 | |
---|
2663 | 2682 | bitmap_set(group->protm_pending_bitmap, i, 1); |
---|
2664 | | - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue, |
---|
| 2683 | + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue, |
---|
2665 | 2684 | group->protm_pending_bitmap[0]); |
---|
2666 | 2685 | protm_pend = true; |
---|
2667 | 2686 | } |
---|
2668 | 2687 | } |
---|
2669 | 2688 | } |
---|
2670 | 2689 | |
---|
2671 | | - if (protm_pend) |
---|
2672 | | - queue_work(group->kctx->csf.wq, &group->protm_event_work); |
---|
| 2690 | + if (protm_pend) { |
---|
| 2691 | + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
---|
| 2692 | + |
---|
| 2693 | + if (scheduler->tick_protm_pending_seq > group->scan_seq_num) { |
---|
| 2694 | + scheduler->tick_protm_pending_seq = group->scan_seq_num; |
---|
| 2695 | + track->protm_grp = group; |
---|
| 2696 | + } |
---|
| 2697 | + |
---|
| 2698 | + if (!group->protected_suspend_buf.pma) |
---|
| 2699 | + queue_work(group->kctx->csf.wq, &group->protm_event_work); |
---|
| 2700 | + |
---|
| 2701 | + if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { |
---|
| 2702 | + clear_bit(group->csg_nr, |
---|
| 2703 | + scheduler->csg_slots_idle_mask); |
---|
| 2704 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, |
---|
| 2705 | + scheduler->csg_slots_idle_mask[0]); |
---|
| 2706 | + dev_dbg(kbdev->dev, |
---|
| 2707 | + "Group-%d on slot %d de-idled by protm request", |
---|
| 2708 | + group->handle, group->csg_nr); |
---|
| 2709 | + } |
---|
| 2710 | + } |
---|
2673 | 2711 | } |
---|
2674 | 2712 | |
---|
2675 | 2713 | /** |
---|
.. | .. |
---|
2677 | 2715 | * |
---|
2678 | 2716 | * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
---|
2679 | 2717 | * @csg_nr: CSG number. |
---|
| 2718 | + * @track: Pointer that tracks the highest idle CSG and the newly possible viable |
---|
| 2719 | + * protected mode requesting group, in current IRQ context. |
---|
2680 | 2720 | * |
---|
2681 | 2721 | * Handles interrupts for a CSG and for CSs within it. |
---|
2682 | 2722 | * |
---|
.. | .. |
---|
2687 | 2727 | * |
---|
2688 | 2728 | * See process_cs_interrupts() for details of per-stream interrupt handling. |
---|
2689 | 2729 | */ |
---|
2690 | | -static void process_csg_interrupts(struct kbase_device *const kbdev, |
---|
2691 | | - int const csg_nr) |
---|
| 2730 | +static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr, |
---|
| 2731 | + struct irq_idle_and_protm_track *track) |
---|
2692 | 2732 | { |
---|
2693 | 2733 | struct kbase_csf_cmd_stream_group_info *ginfo; |
---|
2694 | 2734 | struct kbase_queue_group *group = NULL; |
---|
.. | .. |
---|
2699 | 2739 | if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) |
---|
2700 | 2740 | return; |
---|
2701 | 2741 | |
---|
2702 | | - KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr); |
---|
2703 | | - |
---|
2704 | 2742 | ginfo = &kbdev->csf.global_iface.groups[csg_nr]; |
---|
2705 | 2743 | req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); |
---|
2706 | 2744 | ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); |
---|
.. | .. |
---|
2709 | 2747 | |
---|
2710 | 2748 | /* There may not be any pending CSG/CS interrupts to process */ |
---|
2711 | 2749 | if ((req == ack) && (irqreq == irqack)) |
---|
2712 | | - goto out; |
---|
| 2750 | + return; |
---|
2713 | 2751 | |
---|
2714 | 2752 | /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before |
---|
2715 | 2753 | * examining the CS_ACK & CS_REQ bits. This would ensure that Host |
---|
.. | .. |
---|
2730 | 2768 | * slot scheduler spinlock is required. |
---|
2731 | 2769 | */ |
---|
2732 | 2770 | if (!group) |
---|
2733 | | - goto out; |
---|
| 2771 | + return; |
---|
2734 | 2772 | |
---|
2735 | 2773 | if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) |
---|
2736 | | - goto out; |
---|
| 2774 | + return; |
---|
| 2775 | + |
---|
| 2776 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); |
---|
2737 | 2777 | |
---|
2738 | 2778 | if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { |
---|
2739 | 2779 | kbase_csf_firmware_csg_input_mask(ginfo, |
---|
2740 | 2780 | CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); |
---|
2741 | 2781 | |
---|
2742 | | - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack); |
---|
| 2782 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); |
---|
| 2783 | + |
---|
| 2784 | + /* SYNC_UPDATE events shall invalidate GPU idle event */ |
---|
| 2785 | + atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); |
---|
| 2786 | + |
---|
2743 | 2787 | kbase_csf_event_signal_cpu_only(group->kctx); |
---|
2744 | 2788 | } |
---|
2745 | 2789 | |
---|
2746 | 2790 | if ((req ^ ack) & CSG_REQ_IDLE_MASK) { |
---|
2747 | 2791 | struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
---|
| 2792 | + |
---|
| 2793 | + KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE( |
---|
| 2794 | + kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr); |
---|
2748 | 2795 | |
---|
2749 | 2796 | kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, |
---|
2750 | 2797 | CSG_REQ_IDLE_MASK); |
---|
.. | .. |
---|
2752 | 2799 | set_bit(csg_nr, scheduler->csg_slots_idle_mask); |
---|
2753 | 2800 | KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, |
---|
2754 | 2801 | scheduler->csg_slots_idle_mask[0]); |
---|
2755 | | - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack); |
---|
| 2802 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_IDLE, group, req ^ ack); |
---|
2756 | 2803 | dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n", |
---|
2757 | 2804 | group->handle, csg_nr); |
---|
2758 | 2805 | |
---|
2759 | | - /* Check if the scheduling tick can be advanced */ |
---|
2760 | | - if (kbase_csf_scheduler_all_csgs_idle(kbdev) && |
---|
2761 | | - !scheduler->gpu_idle_fw_timer_enabled) { |
---|
2762 | | - kbase_csf_scheduler_advance_tick_nolock(kbdev); |
---|
| 2806 | + if (atomic_read(&scheduler->non_idle_offslot_grps)) { |
---|
| 2807 | + /* If there are non-idle CSGs waiting for a slot, fire |
---|
| 2808 | + * a tock for a replacement. |
---|
| 2809 | + */ |
---|
| 2810 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS, |
---|
| 2811 | + group, req ^ ack); |
---|
| 2812 | + kbase_csf_scheduler_invoke_tock(kbdev); |
---|
| 2813 | + } else { |
---|
| 2814 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS, |
---|
| 2815 | + group, req ^ ack); |
---|
| 2816 | + } |
---|
| 2817 | + |
---|
| 2818 | + if (group->scan_seq_num < track->idle_seq) { |
---|
| 2819 | + track->idle_seq = group->scan_seq_num; |
---|
| 2820 | + track->idle_slot = csg_nr; |
---|
2763 | 2821 | } |
---|
2764 | 2822 | } |
---|
2765 | 2823 | |
---|
2766 | 2824 | if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) { |
---|
2767 | 2825 | kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, |
---|
2768 | | - CSG_REQ_PROGRESS_TIMER_EVENT_MASK); |
---|
| 2826 | + CSG_REQ_PROGRESS_TIMER_EVENT_MASK); |
---|
2769 | 2827 | |
---|
2770 | | - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT, |
---|
2771 | | - group, req ^ ack); |
---|
2772 | | - dev_info(kbdev->dev, |
---|
2773 | | - "Timeout notification received for group %u of ctx %d_%d on slot %d\n", |
---|
2774 | | - group->handle, group->kctx->tgid, group->kctx->id, csg_nr); |
---|
| 2828 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group, |
---|
| 2829 | + req ^ ack); |
---|
| 2830 | + dev_info( |
---|
| 2831 | + kbdev->dev, |
---|
| 2832 | + "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n", |
---|
| 2833 | + kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, |
---|
| 2834 | + group->kctx->id, csg_nr); |
---|
2775 | 2835 | |
---|
2776 | 2836 | handle_progress_timer_event(group); |
---|
2777 | 2837 | } |
---|
2778 | 2838 | |
---|
2779 | | - process_cs_interrupts(group, ginfo, irqreq, irqack); |
---|
| 2839 | + process_cs_interrupts(group, ginfo, irqreq, irqack, track); |
---|
2780 | 2840 | |
---|
2781 | | -out: |
---|
2782 | | - /* group may still be NULL here */ |
---|
2783 | 2841 | KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, |
---|
2784 | 2842 | ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); |
---|
2785 | 2843 | } |
---|
.. | .. |
---|
2868 | 2926 | } |
---|
2869 | 2927 | } |
---|
2870 | 2928 | |
---|
| 2929 | +/** |
---|
| 2930 | + * check_protm_enter_req_complete - Check if PROTM_ENTER request completed |
---|
| 2931 | + * |
---|
| 2932 | + * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
---|
| 2933 | + * @glb_req: Global request register value. |
---|
| 2934 | + * @glb_ack: Global acknowledge register value. |
---|
| 2935 | + * |
---|
| 2936 | + * This function checks if the PROTM_ENTER Global request had completed and |
---|
| 2937 | + * appropriately sends notification about the protected mode entry to components |
---|
| 2938 | + * like IPA, HWC, IPA_CONTROL. |
---|
| 2939 | + */ |
---|
| 2940 | +static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, |
---|
| 2941 | + u32 glb_req, u32 glb_ack) |
---|
| 2942 | +{ |
---|
| 2943 | + lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
| 2944 | + kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
| 2945 | + |
---|
| 2946 | + if (likely(!kbdev->csf.scheduler.active_protm_grp)) |
---|
| 2947 | + return; |
---|
| 2948 | + |
---|
| 2949 | + if (kbdev->protected_mode) |
---|
| 2950 | + return; |
---|
| 2951 | + |
---|
| 2952 | + if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) != |
---|
| 2953 | + (glb_ack & GLB_REQ_PROTM_ENTER_MASK)) |
---|
| 2954 | + return; |
---|
| 2955 | + |
---|
| 2956 | + dev_dbg(kbdev->dev, "Protected mode entry interrupt received"); |
---|
| 2957 | + |
---|
| 2958 | + kbdev->protected_mode = true; |
---|
| 2959 | + kbase_ipa_protection_mode_switch_event(kbdev); |
---|
| 2960 | + kbase_ipa_control_protm_entered(kbdev); |
---|
| 2961 | + kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); |
---|
| 2962 | +} |
---|
| 2963 | + |
---|
| 2964 | +/** |
---|
| 2965 | + * process_protm_exit - Handle the protected mode exit interrupt |
---|
| 2966 | + * |
---|
| 2967 | + * @kbdev: Instance of a GPU platform device that implements a CSF interface. |
---|
| 2968 | + * @glb_ack: Global acknowledge register value. |
---|
| 2969 | + * |
---|
| 2970 | + * This function handles the PROTM_EXIT interrupt and sends notification |
---|
| 2971 | + * about the protected mode exit to components like HWC, IPA_CONTROL. |
---|
| 2972 | + */ |
---|
| 2973 | +static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack) |
---|
| 2974 | +{ |
---|
| 2975 | + const struct kbase_csf_global_iface *const global_iface = |
---|
| 2976 | + &kbdev->csf.global_iface; |
---|
| 2977 | + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
---|
| 2978 | + |
---|
| 2979 | + lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
| 2980 | + kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
| 2981 | + |
---|
| 2982 | + dev_dbg(kbdev->dev, "Protected mode exit interrupt received"); |
---|
| 2983 | + |
---|
| 2984 | + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack, |
---|
| 2985 | + GLB_REQ_PROTM_EXIT_MASK); |
---|
| 2986 | + |
---|
| 2987 | + if (likely(scheduler->active_protm_grp)) { |
---|
| 2988 | + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, |
---|
| 2989 | + scheduler->active_protm_grp, 0u); |
---|
| 2990 | + scheduler->active_protm_grp = NULL; |
---|
| 2991 | + } else { |
---|
| 2992 | + dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group"); |
---|
| 2993 | + } |
---|
| 2994 | + |
---|
| 2995 | + if (!WARN_ON(!kbdev->protected_mode)) { |
---|
| 2996 | + kbdev->protected_mode = false; |
---|
| 2997 | + kbase_ipa_control_protm_exited(kbdev); |
---|
| 2998 | + kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); |
---|
| 2999 | + } |
---|
| 3000 | + |
---|
| 3001 | +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) |
---|
| 3002 | + kbase_debug_coresight_csf_enable_pmode_exit(kbdev); |
---|
| 3003 | +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ |
---|
| 3004 | +} |
---|
| 3005 | + |
---|
| 3006 | +static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, |
---|
| 3007 | + struct irq_idle_and_protm_track *track) |
---|
| 3008 | +{ |
---|
| 3009 | + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
---|
| 3010 | + struct kbase_queue_group *group = track->protm_grp; |
---|
| 3011 | + u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq; |
---|
| 3012 | + |
---|
| 3013 | + kbase_csf_scheduler_spin_lock_assert_held(kbdev); |
---|
| 3014 | + |
---|
| 3015 | + if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID)) |
---|
| 3016 | + return; |
---|
| 3017 | + |
---|
| 3018 | + /* Handle protm from the tracked information */ |
---|
| 3019 | + if (track->idle_seq < current_protm_pending_seq) { |
---|
| 3020 | + /* If the protm enter was prevented due to groups priority, then fire a tock |
---|
| 3021 | + * for the scheduler to re-examine the case. |
---|
| 3022 | + */ |
---|
| 3023 | + dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot); |
---|
| 3024 | + kbase_csf_scheduler_invoke_tock(kbdev); |
---|
| 3025 | + } else if (group) { |
---|
| 3026 | + u32 i, num_groups = kbdev->csf.global_iface.group_num; |
---|
| 3027 | + struct kbase_queue_group *grp; |
---|
| 3028 | + bool tock_triggered = false; |
---|
| 3029 | + |
---|
| 3030 | + /* A new protm request, and track->idle_seq is not sufficient, check across |
---|
| 3031 | + * previously notified idle CSGs in the current tick/tock cycle. |
---|
| 3032 | + */ |
---|
| 3033 | + for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { |
---|
| 3034 | + if (i == track->idle_slot) |
---|
| 3035 | + continue; |
---|
| 3036 | + grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i); |
---|
| 3037 | + /* If not NULL then the group pointer cannot disappear as the |
---|
| 3038 | + * scheduler spinlock is held. |
---|
| 3039 | + */ |
---|
| 3040 | + if (grp == NULL) |
---|
| 3041 | + continue; |
---|
| 3042 | + |
---|
| 3043 | + if (grp->scan_seq_num < current_protm_pending_seq) { |
---|
| 3044 | + tock_triggered = true; |
---|
| 3045 | + dev_dbg(kbdev->dev, |
---|
| 3046 | + "Attempt new protm from tick/tock idle slot %d\n", i); |
---|
| 3047 | + kbase_csf_scheduler_invoke_tock(kbdev); |
---|
| 3048 | + break; |
---|
| 3049 | + } |
---|
| 3050 | + } |
---|
| 3051 | + |
---|
| 3052 | + if (!tock_triggered) { |
---|
| 3053 | + dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", |
---|
| 3054 | + group->handle, group->csg_nr); |
---|
| 3055 | + queue_work(group->kctx->csf.wq, &group->protm_event_work); |
---|
| 3056 | + } |
---|
| 3057 | + } |
---|
| 3058 | +} |
---|
| 3059 | + |
---|
| 3060 | +static void order_job_irq_clear_with_iface_mem_read(void) |
---|
| 3061 | +{ |
---|
| 3062 | + /* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the |
---|
| 3063 | + * read from interface memory. The ordering is needed considering the way |
---|
| 3064 | + * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers |
---|
| 3065 | + * without any synchronization. Without the barrier there is no guarantee |
---|
| 3066 | + * about the ordering, the write to IRQ_CLEAR can take effect after the read |
---|
| 3067 | + * from interface memory and that could cause a problem for the scenario where |
---|
| 3068 | + * FW sends back to back notifications for the same CSG for events like |
---|
| 3069 | + * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the |
---|
| 3070 | + * first event. Similar thing can happen with glb events like CFG_ALLOC_EN |
---|
| 3071 | + * acknowledgment and GPU idle notification. |
---|
| 3072 | + * |
---|
| 3073 | + * MCU CPU |
---|
| 3074 | + * --------------- ---------------- |
---|
| 3075 | + * Update interface memory Write to IRQ_CLEAR to clear current IRQ |
---|
| 3076 | + * <barrier> <barrier> |
---|
| 3077 | + * Write to IRQ_RAWSTAT to raise new IRQ Read interface memory |
---|
| 3078 | + */ |
---|
| 3079 | + |
---|
| 3080 | + /* CPU and GPU would be in the same Outer shareable domain */ |
---|
| 3081 | + dmb(osh); |
---|
| 3082 | +} |
---|
| 3083 | + |
---|
2871 | 3084 | void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) |
---|
2872 | 3085 | { |
---|
2873 | | - unsigned long flags; |
---|
2874 | | - u32 remaining = val; |
---|
| 3086 | + bool deferred_handling_glb_idle_irq = false; |
---|
2875 | 3087 | |
---|
2876 | 3088 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
2877 | 3089 | |
---|
2878 | | - KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val); |
---|
2879 | | - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); |
---|
| 3090 | + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val); |
---|
2880 | 3091 | |
---|
2881 | | - if (val & JOB_IRQ_GLOBAL_IF) { |
---|
2882 | | - const struct kbase_csf_global_iface *const global_iface = |
---|
2883 | | - &kbdev->csf.global_iface; |
---|
2884 | | - struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; |
---|
| 3092 | + do { |
---|
| 3093 | + unsigned long flags; |
---|
| 3094 | + u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF; |
---|
| 3095 | + struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX }; |
---|
| 3096 | + bool glb_idle_irq_received = false; |
---|
2885 | 3097 | |
---|
2886 | | - kbdev->csf.interrupt_received = true; |
---|
2887 | | - remaining &= ~JOB_IRQ_GLOBAL_IF; |
---|
| 3098 | + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); |
---|
| 3099 | + order_job_irq_clear_with_iface_mem_read(); |
---|
2888 | 3100 | |
---|
2889 | | - if (!kbdev->csf.firmware_reloaded) |
---|
2890 | | - kbase_csf_firmware_reload_completed(kbdev); |
---|
2891 | | - else if (global_iface->output) { |
---|
2892 | | - u32 glb_req, glb_ack; |
---|
2893 | | - |
---|
| 3101 | + if (csg_interrupts != 0) { |
---|
2894 | 3102 | kbase_csf_scheduler_spin_lock(kbdev, &flags); |
---|
2895 | | - glb_req = kbase_csf_firmware_global_input_read( |
---|
2896 | | - global_iface, GLB_REQ); |
---|
2897 | | - glb_ack = kbase_csf_firmware_global_output( |
---|
2898 | | - global_iface, GLB_ACK); |
---|
2899 | | - KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack); |
---|
| 3103 | + /* Looping through and track the highest idle and protm groups */ |
---|
| 3104 | + while (csg_interrupts != 0) { |
---|
| 3105 | + int const csg_nr = ffs(csg_interrupts) - 1; |
---|
2900 | 3106 | |
---|
2901 | | - if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) { |
---|
2902 | | - dev_dbg(kbdev->dev, "Protected mode exit interrupt received"); |
---|
2903 | | - kbase_csf_firmware_global_input_mask( |
---|
2904 | | - global_iface, GLB_REQ, glb_ack, |
---|
2905 | | - GLB_REQ_PROTM_EXIT_MASK); |
---|
2906 | | - WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev)); |
---|
2907 | | - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u); |
---|
2908 | | - scheduler->active_protm_grp = NULL; |
---|
2909 | | - kbdev->protected_mode = false; |
---|
2910 | | - kbase_ipa_control_protm_exited(kbdev); |
---|
2911 | | - kbase_hwcnt_backend_csf_protm_exited( |
---|
2912 | | - &kbdev->hwcnt_gpu_iface); |
---|
| 3107 | + process_csg_interrupts(kbdev, csg_nr, &track); |
---|
| 3108 | + csg_interrupts &= ~(1 << csg_nr); |
---|
2913 | 3109 | } |
---|
2914 | 3110 | |
---|
2915 | | - /* Handle IDLE Hysteresis notification event */ |
---|
2916 | | - if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { |
---|
2917 | | - int non_idle_offslot_grps; |
---|
2918 | | - bool can_suspend_on_idle; |
---|
2919 | | - dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); |
---|
2920 | | - kbase_csf_firmware_global_input_mask( |
---|
| 3111 | + /* Handle protm from the tracked information */ |
---|
| 3112 | + process_tracked_info_for_protm(kbdev, &track); |
---|
| 3113 | + kbase_csf_scheduler_spin_unlock(kbdev, flags); |
---|
| 3114 | + } |
---|
| 3115 | + |
---|
| 3116 | + if (val & JOB_IRQ_GLOBAL_IF) { |
---|
| 3117 | + const struct kbase_csf_global_iface *const global_iface = |
---|
| 3118 | + &kbdev->csf.global_iface; |
---|
| 3119 | + |
---|
| 3120 | + kbdev->csf.interrupt_received = true; |
---|
| 3121 | + |
---|
| 3122 | + if (!kbdev->csf.firmware_reloaded) |
---|
| 3123 | + kbase_csf_firmware_reload_completed(kbdev); |
---|
| 3124 | + else if (global_iface->output) { |
---|
| 3125 | + u32 glb_req, glb_ack; |
---|
| 3126 | + |
---|
| 3127 | + kbase_csf_scheduler_spin_lock(kbdev, &flags); |
---|
| 3128 | + glb_req = |
---|
| 3129 | + kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); |
---|
| 3130 | + glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK); |
---|
| 3131 | + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, |
---|
| 3132 | + glb_req ^ glb_ack); |
---|
| 3133 | + |
---|
| 3134 | + check_protm_enter_req_complete(kbdev, glb_req, glb_ack); |
---|
| 3135 | + |
---|
| 3136 | + if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) |
---|
| 3137 | + process_protm_exit(kbdev, glb_ack); |
---|
| 3138 | + |
---|
| 3139 | + /* Handle IDLE Hysteresis notification event */ |
---|
| 3140 | + if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { |
---|
| 3141 | + dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); |
---|
| 3142 | + kbase_csf_firmware_global_input_mask( |
---|
2921 | 3143 | global_iface, GLB_REQ, glb_ack, |
---|
2922 | 3144 | GLB_REQ_IDLE_EVENT_MASK); |
---|
2923 | 3145 | |
---|
2924 | | - non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); |
---|
2925 | | - can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); |
---|
2926 | | - KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL, |
---|
2927 | | - ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32)); |
---|
2928 | | - |
---|
2929 | | - if (!non_idle_offslot_grps) { |
---|
2930 | | - if (can_suspend_on_idle) |
---|
2931 | | - queue_work(system_highpri_wq, |
---|
2932 | | - &scheduler->gpu_idle_work); |
---|
2933 | | - } else { |
---|
2934 | | - /* Advance the scheduling tick to get |
---|
2935 | | - * the non-idle suspended groups loaded |
---|
2936 | | - * soon. |
---|
| 3146 | + glb_idle_irq_received = true; |
---|
| 3147 | + /* Defer handling this IRQ to account for a race condition |
---|
| 3148 | + * where the idle worker could be executed before we have |
---|
| 3149 | + * finished handling all pending IRQs (including CSG IDLE |
---|
| 3150 | + * IRQs). |
---|
2937 | 3151 | */ |
---|
2938 | | - kbase_csf_scheduler_advance_tick_nolock( |
---|
2939 | | - kbdev); |
---|
| 3152 | + deferred_handling_glb_idle_irq = true; |
---|
2940 | 3153 | } |
---|
| 3154 | + |
---|
| 3155 | + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); |
---|
| 3156 | + |
---|
| 3157 | + kbase_csf_scheduler_spin_unlock(kbdev, flags); |
---|
| 3158 | + |
---|
| 3159 | + /* Invoke the MCU state machine as a state transition |
---|
| 3160 | + * might have completed. |
---|
| 3161 | + */ |
---|
| 3162 | + kbase_pm_update_state(kbdev); |
---|
2941 | 3163 | } |
---|
2942 | | - |
---|
2943 | | - process_prfcnt_interrupts(kbdev, glb_req, glb_ack); |
---|
2944 | | - |
---|
2945 | | - kbase_csf_scheduler_spin_unlock(kbdev, flags); |
---|
2946 | | - |
---|
2947 | | - /* Invoke the MCU state machine as a state transition |
---|
2948 | | - * might have completed. |
---|
2949 | | - */ |
---|
2950 | | - kbase_pm_update_state(kbdev); |
---|
2951 | 3164 | } |
---|
2952 | 3165 | |
---|
2953 | | - if (!remaining) { |
---|
2954 | | - wake_up_all(&kbdev->csf.event_wait); |
---|
2955 | | - KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); |
---|
2956 | | - return; |
---|
2957 | | - } |
---|
2958 | | - } |
---|
| 3166 | + if (!glb_idle_irq_received) |
---|
| 3167 | + break; |
---|
| 3168 | + /* Attempt to serve potential IRQs that might have occurred |
---|
| 3169 | + * whilst handling the previous IRQ. In case we have observed |
---|
| 3170 | + * the GLB IDLE IRQ without all CSGs having been marked as |
---|
| 3171 | + * idle, the GPU would be treated as no longer idle and left |
---|
| 3172 | + * powered on. |
---|
| 3173 | + */ |
---|
| 3174 | + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); |
---|
| 3175 | + } while (val); |
---|
2959 | 3176 | |
---|
2960 | | - kbase_csf_scheduler_spin_lock(kbdev, &flags); |
---|
2961 | | - while (remaining != 0) { |
---|
2962 | | - int const csg_nr = ffs(remaining) - 1; |
---|
| 3177 | + if (deferred_handling_glb_idle_irq) { |
---|
| 3178 | + unsigned long flags; |
---|
2963 | 3179 | |
---|
2964 | | - process_csg_interrupts(kbdev, csg_nr); |
---|
2965 | | - remaining &= ~(1 << csg_nr); |
---|
| 3180 | + kbase_csf_scheduler_spin_lock(kbdev, &flags); |
---|
| 3181 | + kbase_csf_scheduler_process_gpu_idle_event(kbdev); |
---|
| 3182 | + kbase_csf_scheduler_spin_unlock(kbdev, flags); |
---|
2966 | 3183 | } |
---|
2967 | | - kbase_csf_scheduler_spin_unlock(kbdev, flags); |
---|
2968 | 3184 | |
---|
2969 | 3185 | wake_up_all(&kbdev->csf.event_wait); |
---|
| 3186 | + |
---|
2970 | 3187 | KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); |
---|
2971 | 3188 | } |
---|
2972 | 3189 | |
---|
.. | .. |
---|
2989 | 3206 | struct file *filp; |
---|
2990 | 3207 | int ret; |
---|
2991 | 3208 | |
---|
2992 | | - filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE); |
---|
| 3209 | + filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE); |
---|
2993 | 3210 | if (IS_ERR(filp)) |
---|
2994 | 3211 | return PTR_ERR(filp); |
---|
2995 | 3212 | |
---|
2996 | | - ret = kbase_mem_pool_alloc_pages( |
---|
2997 | | - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], |
---|
2998 | | - 1, &phys, false); |
---|
| 3213 | + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, |
---|
| 3214 | + false, NULL); |
---|
2999 | 3215 | |
---|
3000 | 3216 | if (ret <= 0) { |
---|
3001 | 3217 | fput(filp); |
---|
.. | .. |
---|
3011 | 3227 | |
---|
3012 | 3228 | void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) |
---|
3013 | 3229 | { |
---|
3014 | | - if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) { |
---|
3015 | | - struct page *page = as_page(kbdev->csf.dummy_user_reg_page); |
---|
| 3230 | + if (kbdev->csf.user_reg.filp) { |
---|
| 3231 | + struct page *page = as_page(kbdev->csf.user_reg.dummy_page); |
---|
3016 | 3232 | |
---|
3017 | | - kbase_mem_pool_free( |
---|
3018 | | - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, |
---|
3019 | | - false); |
---|
| 3233 | + kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false); |
---|
| 3234 | + fput(kbdev->csf.user_reg.filp); |
---|
3020 | 3235 | } |
---|
3021 | 3236 | } |
---|
3022 | 3237 | |
---|
3023 | 3238 | int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) |
---|
3024 | 3239 | { |
---|
3025 | 3240 | struct tagged_addr phys; |
---|
| 3241 | + struct file *filp; |
---|
3026 | 3242 | struct page *page; |
---|
3027 | 3243 | u32 *addr; |
---|
3028 | | - int ret; |
---|
3029 | 3244 | |
---|
3030 | | - kbdev->csf.dummy_user_reg_page = as_tagged(0); |
---|
| 3245 | + kbdev->csf.user_reg.filp = NULL; |
---|
3031 | 3246 | |
---|
3032 | | - ret = kbase_mem_pool_alloc_pages( |
---|
3033 | | - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, |
---|
3034 | | - false); |
---|
| 3247 | + filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE); |
---|
| 3248 | + if (IS_ERR(filp)) { |
---|
| 3249 | + dev_err(kbdev->dev, "failed to get an unlinked file for user_reg"); |
---|
| 3250 | + return PTR_ERR(filp); |
---|
| 3251 | + } |
---|
3035 | 3252 | |
---|
3036 | | - if (ret <= 0) |
---|
3037 | | - return ret; |
---|
| 3253 | + if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, |
---|
| 3254 | + false, NULL) <= 0) { |
---|
| 3255 | + fput(filp); |
---|
| 3256 | + return -ENOMEM; |
---|
| 3257 | + } |
---|
3038 | 3258 | |
---|
3039 | 3259 | page = as_page(phys); |
---|
3040 | 3260 | addr = kmap_atomic(page); |
---|
.. | .. |
---|
3044 | 3264 | */ |
---|
3045 | 3265 | addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE; |
---|
3046 | 3266 | |
---|
3047 | | - kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32), |
---|
| 3267 | + kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32), |
---|
3048 | 3268 | DMA_BIDIRECTIONAL); |
---|
3049 | 3269 | kunmap_atomic(addr); |
---|
3050 | 3270 | |
---|
3051 | | - kbdev->csf.dummy_user_reg_page = phys; |
---|
3052 | | - |
---|
| 3271 | + kbdev->csf.user_reg.filp = filp; |
---|
| 3272 | + kbdev->csf.user_reg.dummy_page = phys; |
---|
| 3273 | + kbdev->csf.user_reg.file_offset = 0; |
---|
3053 | 3274 | return 0; |
---|
3054 | 3275 | } |
---|
3055 | 3276 | |
---|
.. | .. |
---|
3066 | 3287 | |
---|
3067 | 3288 | return out_priority; |
---|
3068 | 3289 | } |
---|
3069 | | - |
---|