.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
---|
2 | 2 | /* |
---|
3 | 3 | * |
---|
4 | | - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. |
---|
| 4 | + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. |
---|
5 | 5 | * |
---|
6 | 6 | * This program is free software and is provided to you under the terms of the |
---|
7 | 7 | * GNU General Public License version 2 as published by the Free Software |
---|
.. | .. |
---|
20 | 20 | */ |
---|
21 | 21 | |
---|
22 | 22 | /** |
---|
23 | | - * Base kernel memory APIs |
---|
| 23 | + * DOC: Base kernel memory APIs |
---|
24 | 24 | */ |
---|
25 | 25 | #include <linux/dma-buf.h> |
---|
26 | 26 | #include <linux/kernel.h> |
---|
.. | .. |
---|
44 | 44 | #include <mali_kbase_config_defaults.h> |
---|
45 | 45 | #include <mali_kbase_trace_gpu_mem.h> |
---|
46 | 46 | |
---|
| 47 | +#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" |
---|
| 48 | +#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) |
---|
| 49 | + |
---|
| 50 | +#if MALI_JIT_PRESSURE_LIMIT_BASE |
---|
| 51 | + |
---|
47 | 52 | /* |
---|
48 | 53 | * Alignment of objects allocated by the GPU inside a just-in-time memory |
---|
49 | 54 | * region whose size is given by an end address |
---|
.. | .. |
---|
66 | 71 | */ |
---|
67 | 72 | #define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) |
---|
68 | 73 | |
---|
| 74 | +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ |
---|
69 | 75 | |
---|
70 | 76 | /* Forward declarations */ |
---|
71 | 77 | static void free_partial_locked(struct kbase_context *kctx, |
---|
.. | .. |
---|
89 | 95 | #error "Unknown CPU VA width for this architecture" |
---|
90 | 96 | #endif |
---|
91 | 97 | |
---|
92 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
93 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) |
---|
| 98 | + if (kbase_ctx_compat_mode(kctx)) |
---|
94 | 99 | cpu_va_bits = 32; |
---|
95 | | -#endif |
---|
96 | 100 | |
---|
97 | 101 | return cpu_va_bits; |
---|
98 | 102 | } |
---|
.. | .. |
---|
104 | 108 | u64 gpu_pfn) |
---|
105 | 109 | { |
---|
106 | 110 | struct rb_root *rbtree = NULL; |
---|
107 | | - struct kbase_reg_zone *exec_va_zone = |
---|
108 | | - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); |
---|
109 | 111 | |
---|
110 | | - /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA |
---|
111 | | - * zone if this has been initialized. |
---|
112 | | - */ |
---|
| 112 | + struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); |
---|
| 113 | + |
---|
| 114 | +#if MALI_USE_CSF |
---|
| 115 | + struct kbase_reg_zone *fixed_va_zone = |
---|
| 116 | + kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA); |
---|
| 117 | + |
---|
| 118 | + struct kbase_reg_zone *exec_fixed_va_zone = |
---|
| 119 | + kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA); |
---|
| 120 | + |
---|
| 121 | + if (gpu_pfn >= fixed_va_zone->base_pfn) { |
---|
| 122 | + rbtree = &kctx->reg_rbtree_fixed; |
---|
| 123 | + return rbtree; |
---|
| 124 | + } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) { |
---|
| 125 | + rbtree = &kctx->reg_rbtree_exec_fixed; |
---|
| 126 | + return rbtree; |
---|
| 127 | + } |
---|
| 128 | +#endif |
---|
113 | 129 | if (gpu_pfn >= exec_va_zone->base_pfn) |
---|
114 | 130 | rbtree = &kctx->reg_rbtree_exec; |
---|
115 | 131 | else { |
---|
116 | 132 | u64 same_va_end; |
---|
117 | 133 | |
---|
118 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
119 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { |
---|
120 | | -#endif /* CONFIG_64BIT */ |
---|
| 134 | + if (kbase_ctx_compat_mode(kctx)) { |
---|
121 | 135 | same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; |
---|
122 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
123 | 136 | } else { |
---|
124 | 137 | struct kbase_reg_zone *same_va_zone = |
---|
125 | 138 | kbase_ctx_reg_zone_get(kctx, |
---|
126 | 139 | KBASE_REG_ZONE_SAME_VA); |
---|
127 | 140 | same_va_end = kbase_reg_zone_end_pfn(same_va_zone); |
---|
128 | 141 | } |
---|
129 | | -#endif /* CONFIG_64BIT */ |
---|
130 | 142 | |
---|
131 | 143 | if (gpu_pfn >= same_va_end) |
---|
132 | 144 | rbtree = &kctx->reg_rbtree_custom; |
---|
.. | .. |
---|
350 | 362 | } |
---|
351 | 363 | |
---|
352 | 364 | /** |
---|
353 | | - * Remove a region object from the global list. |
---|
| 365 | + * kbase_remove_va_region - Remove a region object from the global list. |
---|
| 366 | + * |
---|
| 367 | + * @kbdev: The kbase device |
---|
354 | 368 | * @reg: Region object to remove |
---|
355 | 369 | * |
---|
356 | 370 | * The region reg is removed, possibly by merging with other free and |
---|
.. | .. |
---|
358 | 372 | * region lock held. The associated memory is not released (see |
---|
359 | 373 | * kbase_free_alloced_region). Internal use only. |
---|
360 | 374 | */ |
---|
361 | | -int kbase_remove_va_region(struct kbase_va_region *reg) |
---|
| 375 | +void kbase_remove_va_region(struct kbase_device *kbdev, |
---|
| 376 | + struct kbase_va_region *reg) |
---|
362 | 377 | { |
---|
363 | 378 | struct rb_node *rbprev; |
---|
364 | 379 | struct kbase_va_region *prev = NULL; |
---|
365 | 380 | struct rb_node *rbnext; |
---|
366 | 381 | struct kbase_va_region *next = NULL; |
---|
367 | 382 | struct rb_root *reg_rbtree = NULL; |
---|
| 383 | + struct kbase_va_region *orig_reg = reg; |
---|
368 | 384 | |
---|
369 | 385 | int merged_front = 0; |
---|
370 | 386 | int merged_back = 0; |
---|
371 | | - int err = 0; |
---|
372 | 387 | |
---|
373 | 388 | reg_rbtree = reg->rbtree; |
---|
| 389 | + |
---|
| 390 | + if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) |
---|
| 391 | + return; |
---|
374 | 392 | |
---|
375 | 393 | /* Try to merge with the previous block first */ |
---|
376 | 394 | rbprev = rb_prev(&(reg->rblink)); |
---|
.. | .. |
---|
378 | 396 | prev = rb_entry(rbprev, struct kbase_va_region, rblink); |
---|
379 | 397 | if (prev->flags & KBASE_REG_FREE) { |
---|
380 | 398 | /* We're compatible with the previous VMA, merge with |
---|
381 | | - * it |
---|
| 399 | + * it, handling any gaps for robustness. |
---|
382 | 400 | */ |
---|
| 401 | + u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; |
---|
| 402 | + |
---|
383 | 403 | WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != |
---|
384 | 404 | (reg->flags & KBASE_REG_ZONE_MASK)); |
---|
| 405 | + if (!WARN_ON(reg->start_pfn < prev_end_pfn)) |
---|
| 406 | + prev->nr_pages += reg->start_pfn - prev_end_pfn; |
---|
385 | 407 | prev->nr_pages += reg->nr_pages; |
---|
386 | 408 | rb_erase(&(reg->rblink), reg_rbtree); |
---|
387 | 409 | reg = prev; |
---|
.. | .. |
---|
393 | 415 | /* Note we do the lookup here as the tree may have been rebalanced. */ |
---|
394 | 416 | rbnext = rb_next(&(reg->rblink)); |
---|
395 | 417 | if (rbnext) { |
---|
396 | | - /* We're compatible with the next VMA, merge with it */ |
---|
397 | 418 | next = rb_entry(rbnext, struct kbase_va_region, rblink); |
---|
398 | 419 | if (next->flags & KBASE_REG_FREE) { |
---|
| 420 | + /* We're compatible with the next VMA, merge with it, |
---|
| 421 | + * handling any gaps for robustness. |
---|
| 422 | + */ |
---|
| 423 | + u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; |
---|
| 424 | + |
---|
399 | 425 | WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != |
---|
400 | 426 | (reg->flags & KBASE_REG_ZONE_MASK)); |
---|
| 427 | + if (!WARN_ON(next->start_pfn < reg_end_pfn)) |
---|
| 428 | + next->nr_pages += next->start_pfn - reg_end_pfn; |
---|
401 | 429 | next->start_pfn = reg->start_pfn; |
---|
402 | 430 | next->nr_pages += reg->nr_pages; |
---|
403 | 431 | rb_erase(&(reg->rblink), reg_rbtree); |
---|
404 | 432 | merged_back = 1; |
---|
405 | | - if (merged_front) { |
---|
406 | | - /* We already merged with prev, free it */ |
---|
407 | | - kfree(reg); |
---|
408 | | - } |
---|
409 | 433 | } |
---|
410 | 434 | } |
---|
411 | 435 | |
---|
412 | | - /* If we failed to merge then we need to add a new block */ |
---|
413 | | - if (!(merged_front || merged_back)) { |
---|
| 436 | + if (merged_front && merged_back) { |
---|
| 437 | + /* We already merged with prev, free it */ |
---|
| 438 | + kfree(reg); |
---|
| 439 | + } else if (!(merged_front || merged_back)) { |
---|
| 440 | + /* If we failed to merge then we need to add a new block */ |
---|
| 441 | + |
---|
414 | 442 | /* |
---|
415 | | - * We didn't merge anything. Add a new free |
---|
416 | | - * placeholder and remove the original one. |
---|
| 443 | + * We didn't merge anything. Try to add a new free |
---|
| 444 | + * placeholder, and in any case, remove the original one. |
---|
417 | 445 | */ |
---|
418 | 446 | struct kbase_va_region *free_reg; |
---|
419 | 447 | |
---|
420 | | - free_reg = kbase_alloc_free_region(reg_rbtree, |
---|
421 | | - reg->start_pfn, reg->nr_pages, |
---|
422 | | - reg->flags & KBASE_REG_ZONE_MASK); |
---|
| 448 | + free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, |
---|
| 449 | + reg->flags & KBASE_REG_ZONE_MASK); |
---|
423 | 450 | if (!free_reg) { |
---|
424 | | - err = -ENOMEM; |
---|
| 451 | + /* In case of failure, we cannot allocate a replacement |
---|
| 452 | + * free region, so we will be left with a 'gap' in the |
---|
| 453 | + * region tracker's address range (though, the rbtree |
---|
| 454 | + * will itself still be correct after erasing |
---|
| 455 | + * 'reg'). |
---|
| 456 | + * |
---|
| 457 | + * The gap will be rectified when an adjacent region is |
---|
| 458 | + * removed by one of the above merging paths. Other |
---|
| 459 | + * paths will gracefully fail to allocate if they try |
---|
| 460 | + * to allocate in the gap. |
---|
| 461 | + * |
---|
| 462 | + * There is nothing that the caller can do, since free |
---|
| 463 | + * paths must not fail. The existing 'reg' cannot be |
---|
| 464 | + * repurposed as the free region as callers must have |
---|
| 465 | + * freedom of use with it by virtue of it being owned |
---|
| 466 | + * by them, not the region tracker insert/remove code. |
---|
| 467 | + */ |
---|
| 468 | + dev_warn( |
---|
| 469 | + kbdev->dev, |
---|
| 470 | + "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx", |
---|
| 471 | + (unsigned long long)reg->start_pfn << PAGE_SHIFT, |
---|
| 472 | + (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT); |
---|
| 473 | + rb_erase(&(reg->rblink), reg_rbtree); |
---|
| 474 | + |
---|
425 | 475 | goto out; |
---|
426 | 476 | } |
---|
427 | 477 | rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); |
---|
428 | 478 | } |
---|
429 | 479 | |
---|
430 | | - out: |
---|
431 | | - return err; |
---|
| 480 | + /* This operation is always safe because the function never frees |
---|
| 481 | + * the region. If the region has been merged to both front and back, |
---|
| 482 | + * then it's the previous region that is supposed to be freed. |
---|
| 483 | + */ |
---|
| 484 | + orig_reg->start_pfn = 0; |
---|
| 485 | + |
---|
| 486 | +out: |
---|
| 487 | + return; |
---|
432 | 488 | } |
---|
433 | 489 | |
---|
434 | 490 | KBASE_EXPORT_TEST_API(kbase_remove_va_region); |
---|
.. | .. |
---|
437 | 493 | * kbase_insert_va_region_nolock - Insert a VA region to the list, |
---|
438 | 494 | * replacing the existing one. |
---|
439 | 495 | * |
---|
| 496 | + * @kbdev: The kbase device |
---|
440 | 497 | * @new_reg: The new region to insert |
---|
441 | 498 | * @at_reg: The region to replace |
---|
442 | 499 | * @start_pfn: The Page Frame Number to insert at |
---|
443 | 500 | * @nr_pages: The number of pages of the region |
---|
| 501 | + * |
---|
| 502 | + * Return: 0 on success, error code otherwise. |
---|
444 | 503 | */ |
---|
445 | | -static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, |
---|
446 | | - struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) |
---|
| 504 | +static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, |
---|
| 505 | + struct kbase_va_region *new_reg, |
---|
| 506 | + struct kbase_va_region *at_reg, u64 start_pfn, |
---|
| 507 | + size_t nr_pages) |
---|
447 | 508 | { |
---|
448 | 509 | struct rb_root *reg_rbtree = NULL; |
---|
449 | 510 | int err = 0; |
---|
.. | .. |
---|
456 | 517 | KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); |
---|
457 | 518 | /* at least nr_pages from start_pfn should be contained within at_reg */ |
---|
458 | 519 | KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); |
---|
| 520 | + /* having at_reg means the rb_tree should not be empty */ |
---|
| 521 | + if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) |
---|
| 522 | + return -ENOMEM; |
---|
459 | 523 | |
---|
460 | 524 | new_reg->start_pfn = start_pfn; |
---|
461 | 525 | new_reg->nr_pages = nr_pages; |
---|
.. | .. |
---|
484 | 548 | else { |
---|
485 | 549 | struct kbase_va_region *new_front_reg; |
---|
486 | 550 | |
---|
487 | | - new_front_reg = kbase_alloc_free_region(reg_rbtree, |
---|
488 | | - at_reg->start_pfn, |
---|
489 | | - start_pfn - at_reg->start_pfn, |
---|
490 | | - at_reg->flags & KBASE_REG_ZONE_MASK); |
---|
| 551 | + new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, |
---|
| 552 | + start_pfn - at_reg->start_pfn, |
---|
| 553 | + at_reg->flags & KBASE_REG_ZONE_MASK); |
---|
491 | 554 | |
---|
492 | 555 | if (new_front_reg) { |
---|
493 | 556 | at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; |
---|
.. | .. |
---|
511 | 574 | * @addr: the address to insert the region at |
---|
512 | 575 | * @nr_pages: the number of pages in the region |
---|
513 | 576 | * @align: the minimum alignment in pages |
---|
| 577 | + * |
---|
| 578 | + * Return: 0 on success, error code otherwise. |
---|
514 | 579 | */ |
---|
515 | 580 | int kbase_add_va_region(struct kbase_context *kctx, |
---|
516 | 581 | struct kbase_va_region *reg, u64 addr, |
---|
.. | .. |
---|
527 | 592 | |
---|
528 | 593 | lockdep_assert_held(&kctx->reg_lock); |
---|
529 | 594 | |
---|
530 | | - /* The executable allocation from the SAME_VA zone would already have an |
---|
| 595 | + /* The executable allocation from the SAME_VA zone should already have an |
---|
531 | 596 | * appropriately aligned GPU VA chosen for it. |
---|
532 | | - * Also the executable allocation from EXEC_VA zone doesn't need the |
---|
533 | | - * special alignment. |
---|
| 597 | + * Also, executable allocations from EXEC_VA don't need the special |
---|
| 598 | + * alignment. |
---|
534 | 599 | */ |
---|
| 600 | +#if MALI_USE_CSF |
---|
| 601 | + /* The same is also true for the EXEC_FIXED_VA zone. |
---|
| 602 | + */ |
---|
| 603 | +#endif |
---|
535 | 604 | if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && |
---|
| 605 | +#if MALI_USE_CSF |
---|
| 606 | + ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) && |
---|
| 607 | +#endif |
---|
536 | 608 | ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { |
---|
537 | 609 | if (cpu_va_bits > gpu_pc_bits) { |
---|
538 | 610 | align = max(align, (size_t)((1ULL << gpu_pc_bits) |
---|
.. | .. |
---|
564 | 636 | /** |
---|
565 | 637 | * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree |
---|
566 | 638 | * |
---|
567 | | - * Insert a region into the rbtree that was specified when the region was |
---|
568 | | - * created. If addr is 0 a free area in the rbtree is used, otherwise the |
---|
569 | | - * specified address is used. |
---|
570 | | - * |
---|
571 | 639 | * @kbdev: The kbase device |
---|
572 | 640 | * @reg: The region to add |
---|
573 | 641 | * @addr: The address to add the region at, or 0 to map at any available address |
---|
574 | 642 | * @nr_pages: The size of the region in pages |
---|
575 | 643 | * @align: The minimum alignment in pages |
---|
| 644 | + * |
---|
| 645 | + * Insert a region into the rbtree that was specified when the region was |
---|
| 646 | + * created. If addr is 0 a free area in the rbtree is used, otherwise the |
---|
| 647 | + * specified address is used. |
---|
| 648 | + * |
---|
| 649 | + * Return: 0 on success, error code otherwise. |
---|
576 | 650 | */ |
---|
577 | 651 | int kbase_add_va_region_rbtree(struct kbase_device *kbdev, |
---|
578 | 652 | struct kbase_va_region *reg, |
---|
.. | .. |
---|
613 | 687 | goto exit; |
---|
614 | 688 | } |
---|
615 | 689 | |
---|
616 | | - err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, |
---|
617 | | - nr_pages); |
---|
| 690 | + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages); |
---|
618 | 691 | if (err) { |
---|
619 | 692 | dev_warn(dev, "Failed to insert va region"); |
---|
620 | 693 | err = -ENOMEM; |
---|
.. | .. |
---|
639 | 712 | nr_pages, align_offset, align_mask, |
---|
640 | 713 | &start_pfn); |
---|
641 | 714 | if (tmp) { |
---|
642 | | - err = kbase_insert_va_region_nolock(reg, tmp, |
---|
643 | | - start_pfn, nr_pages); |
---|
| 715 | + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages); |
---|
644 | 716 | if (unlikely(err)) { |
---|
645 | 717 | dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", |
---|
646 | 718 | start_pfn, nr_pages); |
---|
.. | .. |
---|
659 | 731 | /* |
---|
660 | 732 | * @brief Initialize the internal region tracker data structure. |
---|
661 | 733 | */ |
---|
| 734 | +#if MALI_USE_CSF |
---|
| 735 | +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, |
---|
| 736 | + struct kbase_va_region *same_va_reg, |
---|
| 737 | + struct kbase_va_region *custom_va_reg, |
---|
| 738 | + struct kbase_va_region *exec_va_reg, |
---|
| 739 | + struct kbase_va_region *exec_fixed_va_reg, |
---|
| 740 | + struct kbase_va_region *fixed_va_reg) |
---|
| 741 | +{ |
---|
| 742 | + u64 last_zone_end_pfn; |
---|
| 743 | + |
---|
| 744 | + kctx->reg_rbtree_same = RB_ROOT; |
---|
| 745 | + kbase_region_tracker_insert(same_va_reg); |
---|
| 746 | + |
---|
| 747 | + last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages; |
---|
| 748 | + |
---|
| 749 | + /* Although custom_va_reg doesn't always exist, initialize |
---|
| 750 | + * unconditionally because of the mem_view debugfs |
---|
| 751 | + * implementation which relies on it being empty. |
---|
| 752 | + */ |
---|
| 753 | + kctx->reg_rbtree_custom = RB_ROOT; |
---|
| 754 | + kctx->reg_rbtree_exec = RB_ROOT; |
---|
| 755 | + |
---|
| 756 | + if (custom_va_reg) { |
---|
| 757 | + WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn); |
---|
| 758 | + kbase_region_tracker_insert(custom_va_reg); |
---|
| 759 | + last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages; |
---|
| 760 | + } |
---|
| 761 | + |
---|
| 762 | + /* Initialize exec, fixed and exec_fixed. These are always |
---|
| 763 | + * initialized at this stage, if they will exist at all. |
---|
| 764 | + */ |
---|
| 765 | + kctx->reg_rbtree_fixed = RB_ROOT; |
---|
| 766 | + kctx->reg_rbtree_exec_fixed = RB_ROOT; |
---|
| 767 | + |
---|
| 768 | + if (exec_va_reg) { |
---|
| 769 | + WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn); |
---|
| 770 | + kbase_region_tracker_insert(exec_va_reg); |
---|
| 771 | + last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages; |
---|
| 772 | + } |
---|
| 773 | + |
---|
| 774 | + if (exec_fixed_va_reg) { |
---|
| 775 | + WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn); |
---|
| 776 | + kbase_region_tracker_insert(exec_fixed_va_reg); |
---|
| 777 | + last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages; |
---|
| 778 | + } |
---|
| 779 | + |
---|
| 780 | + if (fixed_va_reg) { |
---|
| 781 | + WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn); |
---|
| 782 | + kbase_region_tracker_insert(fixed_va_reg); |
---|
| 783 | + last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages; |
---|
| 784 | + } |
---|
| 785 | +} |
---|
| 786 | +#else |
---|
662 | 787 | static void kbase_region_tracker_ds_init(struct kbase_context *kctx, |
---|
663 | 788 | struct kbase_va_region *same_va_reg, |
---|
664 | 789 | struct kbase_va_region *custom_va_reg) |
---|
.. | .. |
---|
679 | 804 | if (custom_va_reg) |
---|
680 | 805 | kbase_region_tracker_insert(custom_va_reg); |
---|
681 | 806 | } |
---|
| 807 | +#endif /* MALI_USE_CSF */ |
---|
| 808 | + |
---|
| 809 | +static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) |
---|
| 810 | +{ |
---|
| 811 | + struct kbase_context *kctx = NULL; |
---|
| 812 | + struct rb_root *rbtree = reg->rbtree; |
---|
| 813 | + |
---|
| 814 | + switch (reg->flags & KBASE_REG_ZONE_MASK) { |
---|
| 815 | + case KBASE_REG_ZONE_CUSTOM_VA: |
---|
| 816 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); |
---|
| 817 | + break; |
---|
| 818 | + case KBASE_REG_ZONE_SAME_VA: |
---|
| 819 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); |
---|
| 820 | + break; |
---|
| 821 | + case KBASE_REG_ZONE_EXEC_VA: |
---|
| 822 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); |
---|
| 823 | + break; |
---|
| 824 | +#if MALI_USE_CSF |
---|
| 825 | + case KBASE_REG_ZONE_EXEC_FIXED_VA: |
---|
| 826 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); |
---|
| 827 | + break; |
---|
| 828 | + case KBASE_REG_ZONE_FIXED_VA: |
---|
| 829 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); |
---|
| 830 | + break; |
---|
| 831 | + case KBASE_REG_ZONE_MCU_SHARED: |
---|
| 832 | + /* This is only expected to be called on driver unload. */ |
---|
| 833 | + break; |
---|
| 834 | +#endif |
---|
| 835 | + default: |
---|
| 836 | + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); |
---|
| 837 | + break; |
---|
| 838 | + } |
---|
| 839 | + |
---|
| 840 | + return kctx; |
---|
| 841 | +} |
---|
682 | 842 | |
---|
683 | 843 | static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) |
---|
684 | 844 | { |
---|
.. | .. |
---|
690 | 850 | if (rbnode) { |
---|
691 | 851 | rb_erase(rbnode, rbtree); |
---|
692 | 852 | reg = rb_entry(rbnode, struct kbase_va_region, rblink); |
---|
693 | | - WARN_ON(reg->va_refcnt != 1); |
---|
| 853 | + WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); |
---|
| 854 | + if (kbase_page_migration_enabled) |
---|
| 855 | + kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); |
---|
694 | 856 | /* Reset the start_pfn - as the rbtree is being |
---|
695 | 857 | * destroyed and we've already erased this region, there |
---|
696 | 858 | * is no further need to attempt to remove it. |
---|
.. | .. |
---|
707 | 869 | |
---|
708 | 870 | void kbase_region_tracker_term(struct kbase_context *kctx) |
---|
709 | 871 | { |
---|
| 872 | + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, |
---|
| 873 | + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", |
---|
| 874 | + kctx->tgid, kctx->id); |
---|
| 875 | + |
---|
710 | 876 | kbase_gpu_vm_lock(kctx); |
---|
711 | 877 | kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); |
---|
712 | 878 | kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); |
---|
713 | 879 | kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); |
---|
714 | 880 | #if MALI_USE_CSF |
---|
715 | 881 | WARN_ON(!list_empty(&kctx->csf.event_pages_head)); |
---|
| 882 | + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed); |
---|
| 883 | + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed); |
---|
| 884 | + |
---|
716 | 885 | #endif |
---|
717 | 886 | kbase_gpu_vm_unlock(kctx); |
---|
718 | 887 | } |
---|
.. | .. |
---|
724 | 893 | |
---|
725 | 894 | static size_t kbase_get_same_va_bits(struct kbase_context *kctx) |
---|
726 | 895 | { |
---|
727 | | - return min(kbase_get_num_cpu_va_bits(kctx), |
---|
728 | | - (size_t) kctx->kbdev->gpu_props.mmu.va_bits); |
---|
| 896 | + return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), |
---|
| 897 | + kctx->kbdev->gpu_props.mmu.va_bits); |
---|
729 | 898 | } |
---|
730 | 899 | |
---|
731 | 900 | int kbase_region_tracker_init(struct kbase_context *kctx) |
---|
.. | .. |
---|
734 | 903 | struct kbase_va_region *custom_va_reg = NULL; |
---|
735 | 904 | size_t same_va_bits = kbase_get_same_va_bits(kctx); |
---|
736 | 905 | u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; |
---|
737 | | - u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; |
---|
| 906 | + u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; |
---|
| 907 | + u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; |
---|
738 | 908 | u64 same_va_pages; |
---|
739 | 909 | u64 same_va_base = 1u; |
---|
740 | 910 | int err; |
---|
| 911 | +#if MALI_USE_CSF |
---|
| 912 | + struct kbase_va_region *exec_va_reg; |
---|
| 913 | + struct kbase_va_region *exec_fixed_va_reg; |
---|
| 914 | + struct kbase_va_region *fixed_va_reg; |
---|
| 915 | + |
---|
| 916 | + u64 exec_va_base; |
---|
| 917 | + u64 fixed_va_end; |
---|
| 918 | + u64 exec_fixed_va_base; |
---|
| 919 | + u64 fixed_va_base; |
---|
| 920 | + u64 fixed_va_pages; |
---|
| 921 | +#endif |
---|
741 | 922 | |
---|
742 | 923 | /* Take the lock as kbase_free_alloced_region requires it */ |
---|
743 | 924 | kbase_gpu_vm_lock(kctx); |
---|
744 | 925 | |
---|
745 | 926 | same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; |
---|
| 927 | + |
---|
| 928 | +#if MALI_USE_CSF |
---|
| 929 | + if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { |
---|
| 930 | + /* Depending on how the kernel is configured, it's possible (eg on aarch64) for |
---|
| 931 | + * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone |
---|
| 932 | + * doesn't cross into the exec_va zone. |
---|
| 933 | + */ |
---|
| 934 | + same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base; |
---|
| 935 | + } |
---|
| 936 | +#endif |
---|
| 937 | + |
---|
746 | 938 | /* all have SAME_VA */ |
---|
747 | | - same_va_reg = |
---|
748 | | - kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base, |
---|
749 | | - same_va_pages, KBASE_REG_ZONE_SAME_VA); |
---|
| 939 | + same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, |
---|
| 940 | + same_va_pages, KBASE_REG_ZONE_SAME_VA); |
---|
750 | 941 | |
---|
751 | 942 | if (!same_va_reg) { |
---|
752 | 943 | err = -ENOMEM; |
---|
.. | .. |
---|
755 | 946 | kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, |
---|
756 | 947 | same_va_pages); |
---|
757 | 948 | |
---|
758 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
759 | | - /* 32-bit clients have custom VA zones */ |
---|
760 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { |
---|
761 | | -#endif |
---|
| 949 | + if (kbase_ctx_compat_mode(kctx)) { |
---|
762 | 950 | if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { |
---|
763 | 951 | err = -EINVAL; |
---|
764 | 952 | goto fail_free_same_va; |
---|
.. | .. |
---|
770 | 958 | if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) |
---|
771 | 959 | custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; |
---|
772 | 960 | |
---|
773 | | - custom_va_reg = kbase_alloc_free_region( |
---|
774 | | - &kctx->reg_rbtree_custom, |
---|
775 | | - KBASE_REG_ZONE_CUSTOM_VA_BASE, |
---|
776 | | - custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); |
---|
| 961 | + custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, |
---|
| 962 | + KBASE_REG_ZONE_CUSTOM_VA_BASE, |
---|
| 963 | + custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); |
---|
777 | 964 | |
---|
778 | 965 | if (!custom_va_reg) { |
---|
779 | 966 | err = -ENOMEM; |
---|
.. | .. |
---|
782 | 969 | kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, |
---|
783 | 970 | KBASE_REG_ZONE_CUSTOM_VA_BASE, |
---|
784 | 971 | custom_va_size); |
---|
785 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
786 | 972 | } else { |
---|
787 | 973 | custom_va_size = 0; |
---|
788 | 974 | } |
---|
789 | | -#endif |
---|
| 975 | + |
---|
| 976 | +#if MALI_USE_CSF |
---|
| 977 | + /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ |
---|
| 978 | + exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64; |
---|
| 979 | + |
---|
| 980 | + /* Similarly the end of the FIXED_VA zone also depends on whether the client |
---|
| 981 | + * is 32 or 64-bits. |
---|
| 982 | + */ |
---|
| 983 | + fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; |
---|
| 984 | + |
---|
| 985 | + if (kbase_ctx_compat_mode(kctx)) { |
---|
| 986 | + exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; |
---|
| 987 | + fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; |
---|
| 988 | + } |
---|
| 989 | + |
---|
| 990 | + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, |
---|
| 991 | + KBASE_REG_ZONE_EXEC_VA_SIZE); |
---|
| 992 | + |
---|
| 993 | + exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, |
---|
| 994 | + KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); |
---|
| 995 | + |
---|
| 996 | + if (!exec_va_reg) { |
---|
| 997 | + err = -ENOMEM; |
---|
| 998 | + goto fail_free_custom_va; |
---|
| 999 | + } |
---|
| 1000 | + |
---|
| 1001 | + exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE; |
---|
| 1002 | + |
---|
| 1003 | + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base, |
---|
| 1004 | + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); |
---|
| 1005 | + |
---|
| 1006 | + exec_fixed_va_reg = |
---|
| 1007 | + kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, |
---|
| 1008 | + exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, |
---|
| 1009 | + KBASE_REG_ZONE_EXEC_FIXED_VA); |
---|
| 1010 | + |
---|
| 1011 | + if (!exec_fixed_va_reg) { |
---|
| 1012 | + err = -ENOMEM; |
---|
| 1013 | + goto fail_free_exec_va; |
---|
| 1014 | + } |
---|
| 1015 | + |
---|
| 1016 | + fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; |
---|
| 1017 | + fixed_va_pages = fixed_va_end - fixed_va_base; |
---|
| 1018 | + |
---|
| 1019 | + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); |
---|
| 1020 | + |
---|
| 1021 | + fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, |
---|
| 1022 | + fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); |
---|
| 1023 | + |
---|
| 1024 | + kctx->gpu_va_end = fixed_va_end; |
---|
| 1025 | + |
---|
| 1026 | + if (!fixed_va_reg) { |
---|
| 1027 | + err = -ENOMEM; |
---|
| 1028 | + goto fail_free_exec_fixed_va; |
---|
| 1029 | + } |
---|
| 1030 | + |
---|
| 1031 | + kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg, |
---|
| 1032 | + exec_fixed_va_reg, fixed_va_reg); |
---|
| 1033 | + |
---|
| 1034 | + INIT_LIST_HEAD(&kctx->csf.event_pages_head); |
---|
| 1035 | +#else |
---|
790 | 1036 | /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is |
---|
791 | 1037 | * initially U64_MAX |
---|
792 | 1038 | */ |
---|
.. | .. |
---|
794 | 1040 | /* Other zones are 0: kbase_create_context() uses vzalloc */ |
---|
795 | 1041 | |
---|
796 | 1042 | kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); |
---|
797 | | - |
---|
798 | 1043 | kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; |
---|
799 | | - kctx->jit_va = false; |
---|
800 | | - |
---|
801 | | -#if MALI_USE_CSF |
---|
802 | | - INIT_LIST_HEAD(&kctx->csf.event_pages_head); |
---|
803 | 1044 | #endif |
---|
| 1045 | + kctx->jit_va = false; |
---|
804 | 1046 | |
---|
805 | 1047 | kbase_gpu_vm_unlock(kctx); |
---|
806 | 1048 | return 0; |
---|
| 1049 | + |
---|
| 1050 | +#if MALI_USE_CSF |
---|
| 1051 | +fail_free_exec_fixed_va: |
---|
| 1052 | + kbase_free_alloced_region(exec_fixed_va_reg); |
---|
| 1053 | +fail_free_exec_va: |
---|
| 1054 | + kbase_free_alloced_region(exec_va_reg); |
---|
| 1055 | +fail_free_custom_va: |
---|
| 1056 | + if (custom_va_reg) |
---|
| 1057 | + kbase_free_alloced_region(custom_va_reg); |
---|
| 1058 | +#endif |
---|
807 | 1059 | |
---|
808 | 1060 | fail_free_same_va: |
---|
809 | 1061 | kbase_free_alloced_region(same_va_reg); |
---|
.. | .. |
---|
834 | 1086 | } |
---|
835 | 1087 | |
---|
836 | 1088 | /** |
---|
837 | | - * Determine if any allocations have been made on a context's region tracker |
---|
| 1089 | + * kbase_region_tracker_has_allocs - Determine if any allocations have been made |
---|
| 1090 | + * on a context's region tracker |
---|
| 1091 | + * |
---|
838 | 1092 | * @kctx: KBase context |
---|
839 | 1093 | * |
---|
840 | 1094 | * Check the context to determine if any allocations have been made yet from |
---|
.. | .. |
---|
862 | 1116 | unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); |
---|
863 | 1117 | unsigned long reg_zone; |
---|
864 | 1118 | |
---|
| 1119 | + if (!kbase_is_ctx_reg_zone(zone_bits)) |
---|
| 1120 | + continue; |
---|
865 | 1121 | zone = kbase_ctx_reg_zone_get(kctx, zone_bits); |
---|
866 | 1122 | zone_base_addr = zone->base_pfn << PAGE_SHIFT; |
---|
867 | 1123 | |
---|
.. | .. |
---|
901 | 1157 | return false; |
---|
902 | 1158 | } |
---|
903 | 1159 | |
---|
904 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
905 | 1160 | static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, |
---|
906 | 1161 | u64 jit_va_pages) |
---|
907 | 1162 | { |
---|
.. | .. |
---|
950 | 1205 | * Create a custom VA zone at the end of the VA for allocations which |
---|
951 | 1206 | * JIT can use so it doesn't have to allocate VA from the kernel. |
---|
952 | 1207 | */ |
---|
953 | | - custom_va_reg = |
---|
954 | | - kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start, |
---|
955 | | - jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); |
---|
| 1208 | + custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, |
---|
| 1209 | + jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); |
---|
956 | 1210 | |
---|
957 | 1211 | /* |
---|
958 | 1212 | * The context will be destroyed if we fail here so no point |
---|
.. | .. |
---|
969 | 1223 | kbase_region_tracker_insert(custom_va_reg); |
---|
970 | 1224 | return 0; |
---|
971 | 1225 | } |
---|
972 | | -#endif |
---|
973 | 1226 | |
---|
974 | 1227 | int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, |
---|
975 | 1228 | int max_allocations, int trim_level, int group_id, |
---|
.. | .. |
---|
1010 | 1263 | goto exit_unlock; |
---|
1011 | 1264 | } |
---|
1012 | 1265 | |
---|
1013 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
1014 | | - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) |
---|
| 1266 | + if (!kbase_ctx_compat_mode(kctx)) |
---|
1015 | 1267 | err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); |
---|
1016 | | -#endif |
---|
1017 | 1268 | /* |
---|
1018 | 1269 | * Nothing to do for 32-bit clients, JIT uses the existing |
---|
1019 | 1270 | * custom VA zone. |
---|
.. | .. |
---|
1039 | 1290 | |
---|
1040 | 1291 | int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) |
---|
1041 | 1292 | { |
---|
| 1293 | +#if !MALI_USE_CSF |
---|
1042 | 1294 | struct kbase_va_region *exec_va_reg; |
---|
1043 | 1295 | struct kbase_reg_zone *exec_va_zone; |
---|
1044 | 1296 | struct kbase_reg_zone *target_zone; |
---|
.. | .. |
---|
1047 | 1299 | unsigned long target_zone_bits; |
---|
1048 | 1300 | u64 exec_va_start; |
---|
1049 | 1301 | int err; |
---|
| 1302 | +#endif |
---|
1050 | 1303 | |
---|
1051 | 1304 | /* The EXEC_VA zone shall be created by making space either: |
---|
1052 | 1305 | * - for 64-bit clients, at the end of the process's address space |
---|
.. | .. |
---|
1060 | 1313 | if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) |
---|
1061 | 1314 | return -EINVAL; |
---|
1062 | 1315 | |
---|
| 1316 | +#if MALI_USE_CSF |
---|
| 1317 | + /* For CSF GPUs we now setup the EXEC_VA zone during initialization, |
---|
| 1318 | + * so this request is a null-op. |
---|
| 1319 | + */ |
---|
| 1320 | + return 0; |
---|
| 1321 | +#else |
---|
1063 | 1322 | kbase_gpu_vm_lock(kctx); |
---|
1064 | 1323 | |
---|
1065 | 1324 | /* Verify that we've not already created a EXEC_VA zone, and that the |
---|
.. | .. |
---|
1081 | 1340 | goto exit_unlock; |
---|
1082 | 1341 | } |
---|
1083 | 1342 | |
---|
1084 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
1085 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { |
---|
1086 | | -#endif |
---|
| 1343 | + if (kbase_ctx_compat_mode(kctx)) { |
---|
1087 | 1344 | /* 32-bit client: take from CUSTOM_VA zone */ |
---|
1088 | 1345 | target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; |
---|
1089 | | -#if IS_ENABLED(CONFIG_64BIT) |
---|
1090 | 1346 | } else { |
---|
1091 | 1347 | /* 64-bit client: take from SAME_VA zone */ |
---|
1092 | 1348 | target_zone_bits = KBASE_REG_ZONE_SAME_VA; |
---|
1093 | 1349 | } |
---|
1094 | | -#endif |
---|
| 1350 | + |
---|
1095 | 1351 | target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); |
---|
1096 | 1352 | target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; |
---|
1097 | 1353 | |
---|
.. | .. |
---|
1119 | 1375 | /* Taken from the end of the target zone */ |
---|
1120 | 1376 | exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; |
---|
1121 | 1377 | |
---|
1122 | | - exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, |
---|
1123 | | - exec_va_start, |
---|
1124 | | - exec_va_pages, |
---|
1125 | | - KBASE_REG_ZONE_EXEC_VA); |
---|
| 1378 | + exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, |
---|
| 1379 | + exec_va_pages, KBASE_REG_ZONE_EXEC_VA); |
---|
1126 | 1380 | if (!exec_va_reg) { |
---|
1127 | 1381 | err = -ENOMEM; |
---|
1128 | 1382 | goto exit_unlock; |
---|
.. | .. |
---|
1145 | 1399 | exit_unlock: |
---|
1146 | 1400 | kbase_gpu_vm_unlock(kctx); |
---|
1147 | 1401 | return err; |
---|
| 1402 | +#endif /* MALI_USE_CSF */ |
---|
1148 | 1403 | } |
---|
1149 | 1404 | |
---|
1150 | 1405 | #if MALI_USE_CSF |
---|
.. | .. |
---|
1164 | 1419 | |
---|
1165 | 1420 | kbdev->csf.shared_reg_rbtree = RB_ROOT; |
---|
1166 | 1421 | |
---|
1167 | | - shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, |
---|
1168 | | - shared_reg_start_pfn, |
---|
1169 | | - shared_reg_size, |
---|
1170 | | - KBASE_REG_ZONE_MCU_SHARED); |
---|
| 1422 | + shared_reg = |
---|
| 1423 | + kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, |
---|
| 1424 | + shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); |
---|
1171 | 1425 | if (!shared_reg) |
---|
1172 | 1426 | return -ENOMEM; |
---|
1173 | 1427 | |
---|
.. | .. |
---|
1176 | 1430 | } |
---|
1177 | 1431 | #endif |
---|
1178 | 1432 | |
---|
| 1433 | +static void kbasep_mem_page_size_init(struct kbase_device *kbdev) |
---|
| 1434 | +{ |
---|
| 1435 | +#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) |
---|
| 1436 | +#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) |
---|
| 1437 | + kbdev->pagesize_2mb = true; |
---|
| 1438 | + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) { |
---|
| 1439 | + dev_warn( |
---|
| 1440 | + kbdev->dev, |
---|
| 1441 | + "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n"); |
---|
| 1442 | + } |
---|
| 1443 | +#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ |
---|
| 1444 | + kbdev->pagesize_2mb = false; |
---|
| 1445 | +#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ |
---|
| 1446 | +#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ |
---|
| 1447 | + /* Set it to the default based on which GPU is present */ |
---|
| 1448 | + kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); |
---|
| 1449 | +#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ |
---|
| 1450 | +} |
---|
| 1451 | + |
---|
1179 | 1452 | int kbase_mem_init(struct kbase_device *kbdev) |
---|
1180 | 1453 | { |
---|
1181 | 1454 | int err = 0; |
---|
1182 | 1455 | struct kbasep_mem_device *memdev; |
---|
| 1456 | + char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE]; |
---|
1183 | 1457 | #if IS_ENABLED(CONFIG_OF) |
---|
1184 | 1458 | struct device_node *mgm_node = NULL; |
---|
1185 | 1459 | #endif |
---|
.. | .. |
---|
1188 | 1462 | |
---|
1189 | 1463 | memdev = &kbdev->memdev; |
---|
1190 | 1464 | |
---|
| 1465 | + kbasep_mem_page_size_init(kbdev); |
---|
| 1466 | + |
---|
| 1467 | + scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s", |
---|
| 1468 | + kbdev->devname); |
---|
| 1469 | + |
---|
| 1470 | + /* Initialize slab cache for kbase_va_regions */ |
---|
| 1471 | + kbdev->va_region_slab = |
---|
| 1472 | + kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL); |
---|
| 1473 | + if (kbdev->va_region_slab == NULL) { |
---|
| 1474 | + dev_err(kbdev->dev, "Failed to create va_region_slab\n"); |
---|
| 1475 | + return -ENOMEM; |
---|
| 1476 | + } |
---|
| 1477 | + |
---|
| 1478 | + kbase_mem_migrate_init(kbdev); |
---|
1191 | 1479 | kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, |
---|
1192 | 1480 | KBASE_MEM_POOL_MAX_SIZE_KCTX); |
---|
1193 | 1481 | |
---|
.. | .. |
---|
1250 | 1538 | kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, |
---|
1251 | 1539 | KBASE_MEM_POOL_MAX_SIZE_KBDEV); |
---|
1252 | 1540 | |
---|
1253 | | - err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, |
---|
1254 | | - &mem_pool_defaults, NULL); |
---|
| 1541 | + err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL); |
---|
1255 | 1542 | } |
---|
1256 | 1543 | |
---|
1257 | 1544 | return err; |
---|
.. | .. |
---|
1277 | 1564 | |
---|
1278 | 1565 | kbase_mem_pool_group_term(&kbdev->mem_pools); |
---|
1279 | 1566 | |
---|
| 1567 | + kbase_mem_migrate_term(kbdev); |
---|
| 1568 | + |
---|
| 1569 | + kmem_cache_destroy(kbdev->va_region_slab); |
---|
| 1570 | + kbdev->va_region_slab = NULL; |
---|
| 1571 | + |
---|
1280 | 1572 | WARN_ON(kbdev->total_gpu_pages); |
---|
1281 | 1573 | WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); |
---|
1282 | 1574 | WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); |
---|
.. | .. |
---|
1288 | 1580 | KBASE_EXPORT_TEST_API(kbase_mem_term); |
---|
1289 | 1581 | |
---|
1290 | 1582 | /** |
---|
1291 | | - * Allocate a free region object. |
---|
| 1583 | + * kbase_alloc_free_region - Allocate a free region object. |
---|
| 1584 | + * |
---|
| 1585 | + * @kbdev: kbase device |
---|
1292 | 1586 | * @rbtree: Backlink to the red-black tree of memory regions. |
---|
1293 | 1587 | * @start_pfn: The Page Frame Number in GPU virtual address space. |
---|
1294 | 1588 | * @nr_pages: The size of the region in pages. |
---|
.. | .. |
---|
1299 | 1593 | * |
---|
1300 | 1594 | * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. |
---|
1301 | 1595 | * |
---|
| 1596 | + * Return: pointer to the allocated region object on success, NULL otherwise. |
---|
1302 | 1597 | */ |
---|
1303 | | -struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, |
---|
1304 | | - u64 start_pfn, size_t nr_pages, int zone) |
---|
| 1598 | +struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, |
---|
| 1599 | + u64 start_pfn, size_t nr_pages, int zone) |
---|
1305 | 1600 | { |
---|
1306 | 1601 | struct kbase_va_region *new_reg; |
---|
1307 | 1602 | |
---|
.. | .. |
---|
1313 | 1608 | /* 64-bit address range is the max */ |
---|
1314 | 1609 | KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); |
---|
1315 | 1610 | |
---|
1316 | | - new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); |
---|
| 1611 | + new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); |
---|
1317 | 1612 | |
---|
1318 | 1613 | if (!new_reg) |
---|
1319 | 1614 | return NULL; |
---|
1320 | 1615 | |
---|
1321 | | - new_reg->va_refcnt = 1; |
---|
| 1616 | + kbase_refcount_set(&new_reg->va_refcnt, 1); |
---|
| 1617 | + atomic_set(&new_reg->no_user_free_count, 0); |
---|
1322 | 1618 | new_reg->cpu_alloc = NULL; /* no alloc bound yet */ |
---|
1323 | 1619 | new_reg->gpu_alloc = NULL; /* no alloc bound yet */ |
---|
1324 | 1620 | new_reg->rbtree = rbtree; |
---|
.. | .. |
---|
1337 | 1633 | |
---|
1338 | 1634 | KBASE_EXPORT_TEST_API(kbase_alloc_free_region); |
---|
1339 | 1635 | |
---|
1340 | | -static struct kbase_context *kbase_reg_flags_to_kctx( |
---|
1341 | | - struct kbase_va_region *reg) |
---|
1342 | | -{ |
---|
1343 | | - struct kbase_context *kctx = NULL; |
---|
1344 | | - struct rb_root *rbtree = reg->rbtree; |
---|
1345 | | - |
---|
1346 | | - switch (reg->flags & KBASE_REG_ZONE_MASK) { |
---|
1347 | | - case KBASE_REG_ZONE_CUSTOM_VA: |
---|
1348 | | - kctx = container_of(rbtree, struct kbase_context, |
---|
1349 | | - reg_rbtree_custom); |
---|
1350 | | - break; |
---|
1351 | | - case KBASE_REG_ZONE_SAME_VA: |
---|
1352 | | - kctx = container_of(rbtree, struct kbase_context, |
---|
1353 | | - reg_rbtree_same); |
---|
1354 | | - break; |
---|
1355 | | - case KBASE_REG_ZONE_EXEC_VA: |
---|
1356 | | - kctx = container_of(rbtree, struct kbase_context, |
---|
1357 | | - reg_rbtree_exec); |
---|
1358 | | - break; |
---|
1359 | | - default: |
---|
1360 | | - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); |
---|
1361 | | - break; |
---|
1362 | | - } |
---|
1363 | | - |
---|
1364 | | - return kctx; |
---|
1365 | | -} |
---|
1366 | | - |
---|
1367 | 1636 | /** |
---|
1368 | | - * Free a region object. |
---|
| 1637 | + * kbase_free_alloced_region - Free a region object. |
---|
| 1638 | + * |
---|
1369 | 1639 | * @reg: Region |
---|
1370 | 1640 | * |
---|
1371 | 1641 | * The described region must be freed of any mapping. |
---|
.. | .. |
---|
1374 | 1644 | * alloc object will be released. |
---|
1375 | 1645 | * It is a bug if no alloc object exists for non-free regions. |
---|
1376 | 1646 | * |
---|
| 1647 | + * If region is KBASE_REG_ZONE_MCU_SHARED it is freed |
---|
1377 | 1648 | */ |
---|
1378 | 1649 | void kbase_free_alloced_region(struct kbase_va_region *reg) |
---|
1379 | 1650 | { |
---|
.. | .. |
---|
1397 | 1668 | (void *)reg); |
---|
1398 | 1669 | #if MALI_USE_CSF |
---|
1399 | 1670 | if (reg->flags & KBASE_REG_CSF_EVENT) |
---|
| 1671 | + /* |
---|
| 1672 | + * This should not be reachable if called from 'mcu_shared' functions |
---|
| 1673 | + * such as: |
---|
| 1674 | + * kbase_csf_firmware_mcu_shared_mapping_init |
---|
| 1675 | + * kbase_csf_firmware_mcu_shared_mapping_term |
---|
| 1676 | + */ |
---|
| 1677 | + |
---|
1400 | 1678 | kbase_unlink_event_mem_page(kctx, reg); |
---|
1401 | 1679 | #endif |
---|
1402 | 1680 | |
---|
.. | .. |
---|
1410 | 1688 | * on the list at termination time of the region tracker. |
---|
1411 | 1689 | */ |
---|
1412 | 1690 | if (!list_empty(®->gpu_alloc->evict_node)) { |
---|
1413 | | - mutex_unlock(&kctx->jit_evict_lock); |
---|
1414 | | - |
---|
1415 | 1691 | /* |
---|
1416 | 1692 | * Unlink the physical allocation before unmaking it |
---|
1417 | 1693 | * evictable so that the allocation isn't grown back to |
---|
.. | .. |
---|
1421 | 1697 | reg->cpu_alloc->reg = NULL; |
---|
1422 | 1698 | if (reg->cpu_alloc != reg->gpu_alloc) |
---|
1423 | 1699 | reg->gpu_alloc->reg = NULL; |
---|
| 1700 | + |
---|
| 1701 | + mutex_unlock(&kctx->jit_evict_lock); |
---|
1424 | 1702 | |
---|
1425 | 1703 | /* |
---|
1426 | 1704 | * If a region has been made evictable then we must |
---|
.. | .. |
---|
1457 | 1735 | |
---|
1458 | 1736 | KBASE_EXPORT_TEST_API(kbase_free_alloced_region); |
---|
1459 | 1737 | |
---|
1460 | | -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) |
---|
| 1738 | +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, |
---|
| 1739 | + u64 addr, size_t nr_pages, size_t align, |
---|
| 1740 | + enum kbase_caller_mmu_sync_info mmu_sync_info) |
---|
1461 | 1741 | { |
---|
1462 | 1742 | int err; |
---|
1463 | 1743 | size_t i = 0; |
---|
.. | .. |
---|
1494 | 1774 | KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); |
---|
1495 | 1775 | for (i = 0; i < alloc->imported.alias.nents; i++) { |
---|
1496 | 1776 | if (alloc->imported.alias.aliased[i].alloc) { |
---|
1497 | | - err = kbase_mmu_insert_pages(kctx->kbdev, |
---|
1498 | | - &kctx->mmu, |
---|
1499 | | - reg->start_pfn + (i * stride), |
---|
1500 | | - alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, |
---|
1501 | | - alloc->imported.alias.aliased[i].length, |
---|
1502 | | - reg->flags & gwt_mask, |
---|
1503 | | - kctx->as_nr, |
---|
1504 | | - group_id); |
---|
| 1777 | + err = kbase_mmu_insert_aliased_pages( |
---|
| 1778 | + kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), |
---|
| 1779 | + alloc->imported.alias.aliased[i].alloc->pages + |
---|
| 1780 | + alloc->imported.alias.aliased[i].offset, |
---|
| 1781 | + alloc->imported.alias.aliased[i].length, |
---|
| 1782 | + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, |
---|
| 1783 | + NULL); |
---|
1505 | 1784 | if (err) |
---|
1506 | | - goto bad_insert; |
---|
| 1785 | + goto bad_aliased_insert; |
---|
1507 | 1786 | |
---|
1508 | 1787 | /* Note: mapping count is tracked at alias |
---|
1509 | 1788 | * creation time |
---|
1510 | 1789 | */ |
---|
1511 | 1790 | } else { |
---|
1512 | | - err = kbase_mmu_insert_single_page(kctx, |
---|
1513 | | - reg->start_pfn + i * stride, |
---|
1514 | | - kctx->aliasing_sink_page, |
---|
| 1791 | + err = kbase_mmu_insert_single_aliased_page( |
---|
| 1792 | + kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page, |
---|
1515 | 1793 | alloc->imported.alias.aliased[i].length, |
---|
1516 | | - (reg->flags & mask & gwt_mask) | attr, |
---|
1517 | | - group_id); |
---|
| 1794 | + (reg->flags & mask & gwt_mask) | attr, group_id, |
---|
| 1795 | + mmu_sync_info); |
---|
1518 | 1796 | |
---|
1519 | 1797 | if (err) |
---|
1520 | | - goto bad_insert; |
---|
| 1798 | + goto bad_aliased_insert; |
---|
1521 | 1799 | } |
---|
1522 | 1800 | } |
---|
1523 | 1801 | } else { |
---|
1524 | | - err = kbase_mmu_insert_pages(kctx->kbdev, |
---|
1525 | | - &kctx->mmu, |
---|
1526 | | - reg->start_pfn, |
---|
1527 | | - kbase_get_gpu_phy_pages(reg), |
---|
1528 | | - kbase_reg_current_backed_size(reg), |
---|
1529 | | - reg->flags & gwt_mask, |
---|
1530 | | - kctx->as_nr, |
---|
1531 | | - group_id); |
---|
| 1802 | + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || |
---|
| 1803 | + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { |
---|
| 1804 | + |
---|
| 1805 | + err = kbase_mmu_insert_imported_pages( |
---|
| 1806 | + kctx->kbdev, &kctx->mmu, reg->start_pfn, |
---|
| 1807 | + kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), |
---|
| 1808 | + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); |
---|
| 1809 | + } else { |
---|
| 1810 | + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
---|
| 1811 | + kbase_get_gpu_phy_pages(reg), |
---|
| 1812 | + kbase_reg_current_backed_size(reg), |
---|
| 1813 | + reg->flags & gwt_mask, kctx->as_nr, group_id, |
---|
| 1814 | + mmu_sync_info, reg, true); |
---|
| 1815 | + } |
---|
| 1816 | + |
---|
1532 | 1817 | if (err) |
---|
1533 | 1818 | goto bad_insert; |
---|
1534 | 1819 | kbase_mem_phy_alloc_gpu_mapped(alloc); |
---|
.. | .. |
---|
1538 | 1823 | !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && |
---|
1539 | 1824 | reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && |
---|
1540 | 1825 | reg->gpu_alloc->imported.umm.current_mapping_usage_count) { |
---|
1541 | | - /* For padded imported dma-buf memory, map the dummy aliasing |
---|
1542 | | - * page from the end of the dma-buf pages, to the end of the |
---|
1543 | | - * region using a read only mapping. |
---|
| 1826 | + /* For padded imported dma-buf or user-buf memory, map the dummy |
---|
| 1827 | + * aliasing page from the end of the imported pages, to the end of |
---|
| 1828 | + * the region using a read only mapping. |
---|
1544 | 1829 | * |
---|
1545 | 1830 | * Only map when it's imported dma-buf memory that is currently |
---|
1546 | 1831 | * mapped. |
---|
.. | .. |
---|
1548 | 1833 | * Assume reg->gpu_alloc->nents is the number of actual pages |
---|
1549 | 1834 | * in the dma-buf memory. |
---|
1550 | 1835 | */ |
---|
1551 | | - err = kbase_mmu_insert_single_page(kctx, |
---|
1552 | | - reg->start_pfn + reg->gpu_alloc->nents, |
---|
1553 | | - kctx->aliasing_sink_page, |
---|
1554 | | - reg->nr_pages - reg->gpu_alloc->nents, |
---|
1555 | | - (reg->flags | KBASE_REG_GPU_RD) & |
---|
1556 | | - ~KBASE_REG_GPU_WR, |
---|
1557 | | - KBASE_MEM_GROUP_SINK); |
---|
| 1836 | + err = kbase_mmu_insert_single_imported_page( |
---|
| 1837 | + kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page, |
---|
| 1838 | + reg->nr_pages - reg->gpu_alloc->nents, |
---|
| 1839 | + (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, |
---|
| 1840 | + mmu_sync_info); |
---|
1558 | 1841 | if (err) |
---|
1559 | 1842 | goto bad_insert; |
---|
1560 | 1843 | } |
---|
1561 | 1844 | |
---|
1562 | 1845 | return err; |
---|
1563 | 1846 | |
---|
1564 | | -bad_insert: |
---|
1565 | | - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, |
---|
1566 | | - reg->start_pfn, reg->nr_pages, |
---|
1567 | | - kctx->as_nr); |
---|
| 1847 | +bad_aliased_insert: |
---|
| 1848 | + while (i-- > 0) { |
---|
| 1849 | + struct tagged_addr *phys_alloc = NULL; |
---|
| 1850 | + u64 const stride = alloc->imported.alias.stride; |
---|
1568 | 1851 | |
---|
1569 | | - kbase_remove_va_region(reg); |
---|
| 1852 | + if (alloc->imported.alias.aliased[i].alloc != NULL) |
---|
| 1853 | + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + |
---|
| 1854 | + alloc->imported.alias.aliased[i].offset; |
---|
| 1855 | + |
---|
| 1856 | + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), |
---|
| 1857 | + phys_alloc, alloc->imported.alias.aliased[i].length, |
---|
| 1858 | + alloc->imported.alias.aliased[i].length, kctx->as_nr, |
---|
| 1859 | + false); |
---|
| 1860 | + } |
---|
| 1861 | +bad_insert: |
---|
| 1862 | + kbase_remove_va_region(kctx->kbdev, reg); |
---|
1570 | 1863 | |
---|
1571 | 1864 | return err; |
---|
1572 | 1865 | } |
---|
1573 | 1866 | |
---|
1574 | 1867 | KBASE_EXPORT_TEST_API(kbase_gpu_mmap); |
---|
1575 | 1868 | |
---|
1576 | | -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, |
---|
1577 | | - struct kbase_mem_phy_alloc *alloc, bool writeable); |
---|
| 1869 | +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, |
---|
| 1870 | + struct kbase_va_region *reg, bool writeable); |
---|
1578 | 1871 | |
---|
1579 | 1872 | int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) |
---|
1580 | 1873 | { |
---|
1581 | 1874 | int err = 0; |
---|
| 1875 | + struct kbase_mem_phy_alloc *alloc; |
---|
1582 | 1876 | |
---|
1583 | 1877 | if (reg->start_pfn == 0) |
---|
1584 | 1878 | return 0; |
---|
.. | .. |
---|
1586 | 1880 | if (!reg->gpu_alloc) |
---|
1587 | 1881 | return -EINVAL; |
---|
1588 | 1882 | |
---|
1589 | | - /* Tear down down GPU page tables, depending on memory type. */ |
---|
1590 | | - switch (reg->gpu_alloc->type) { |
---|
1591 | | - case KBASE_MEM_TYPE_ALIAS: /* Fall-through */ |
---|
1592 | | - case KBASE_MEM_TYPE_IMPORTED_UMM: |
---|
1593 | | - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, |
---|
1594 | | - reg->start_pfn, reg->nr_pages, kctx->as_nr); |
---|
| 1883 | + alloc = reg->gpu_alloc; |
---|
| 1884 | + |
---|
| 1885 | + /* Tear down GPU page tables, depending on memory type. */ |
---|
| 1886 | + switch (alloc->type) { |
---|
| 1887 | + case KBASE_MEM_TYPE_ALIAS: { |
---|
| 1888 | + size_t i = 0; |
---|
| 1889 | + /* Due to the way the number of valid PTEs and ATEs are tracked |
---|
| 1890 | + * currently, only the GPU virtual range that is backed & mapped |
---|
| 1891 | + * should be passed to the kbase_mmu_teardown_pages() function, |
---|
| 1892 | + * hence individual aliased regions needs to be unmapped |
---|
| 1893 | + * separately. |
---|
| 1894 | + */ |
---|
| 1895 | + for (i = 0; i < alloc->imported.alias.nents; i++) { |
---|
| 1896 | + struct tagged_addr *phys_alloc = NULL; |
---|
| 1897 | + int err_loop; |
---|
| 1898 | + |
---|
| 1899 | + if (alloc->imported.alias.aliased[i].alloc != NULL) |
---|
| 1900 | + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + |
---|
| 1901 | + alloc->imported.alias.aliased[i].offset; |
---|
| 1902 | + |
---|
| 1903 | + err_loop = kbase_mmu_teardown_pages( |
---|
| 1904 | + kctx->kbdev, &kctx->mmu, |
---|
| 1905 | + reg->start_pfn + (i * alloc->imported.alias.stride), |
---|
| 1906 | + phys_alloc, alloc->imported.alias.aliased[i].length, |
---|
| 1907 | + alloc->imported.alias.aliased[i].length, kctx->as_nr, |
---|
| 1908 | + false); |
---|
| 1909 | + |
---|
| 1910 | + if (WARN_ON_ONCE(err_loop)) |
---|
| 1911 | + err = err_loop; |
---|
| 1912 | + } |
---|
| 1913 | + } |
---|
1595 | 1914 | break; |
---|
1596 | | - default: |
---|
1597 | | - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, |
---|
1598 | | - reg->start_pfn, kbase_reg_current_backed_size(reg), |
---|
1599 | | - kctx->as_nr); |
---|
| 1915 | + case KBASE_MEM_TYPE_IMPORTED_UMM: { |
---|
| 1916 | + size_t nr_phys_pages = reg->nr_pages; |
---|
| 1917 | + size_t nr_virt_pages = reg->nr_pages; |
---|
| 1918 | + /* If the region has import padding and falls under the threshold for |
---|
| 1919 | + * issuing a partial GPU cache flush, we want to reduce the number of |
---|
| 1920 | + * physical pages that get flushed. |
---|
| 1921 | + |
---|
| 1922 | + * This is symmetric with case of mapping the memory, which first maps |
---|
| 1923 | + * each imported physical page to a separate virtual page, and then |
---|
| 1924 | + * maps the single aliasing sink page to each of the virtual padding |
---|
| 1925 | + * pages. |
---|
| 1926 | + */ |
---|
| 1927 | + if (reg->flags & KBASE_REG_IMPORT_PAD) |
---|
| 1928 | + nr_phys_pages = alloc->nents + 1; |
---|
| 1929 | + |
---|
| 1930 | + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
---|
| 1931 | + alloc->pages, nr_phys_pages, nr_virt_pages, |
---|
| 1932 | + kctx->as_nr, true); |
---|
| 1933 | + } |
---|
| 1934 | + break; |
---|
| 1935 | + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { |
---|
| 1936 | + size_t nr_reg_pages = kbase_reg_current_backed_size(reg); |
---|
| 1937 | + |
---|
| 1938 | + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
---|
| 1939 | + alloc->pages, nr_reg_pages, nr_reg_pages, |
---|
| 1940 | + kctx->as_nr, true); |
---|
| 1941 | + } |
---|
| 1942 | + break; |
---|
| 1943 | + default: { |
---|
| 1944 | + size_t nr_reg_pages = kbase_reg_current_backed_size(reg); |
---|
| 1945 | + |
---|
| 1946 | + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
---|
| 1947 | + alloc->pages, nr_reg_pages, nr_reg_pages, |
---|
| 1948 | + kctx->as_nr, false); |
---|
| 1949 | + } |
---|
1600 | 1950 | break; |
---|
1601 | 1951 | } |
---|
1602 | 1952 | |
---|
1603 | 1953 | /* Update tracking, and other cleanup, depending on memory type. */ |
---|
1604 | | - switch (reg->gpu_alloc->type) { |
---|
| 1954 | + switch (alloc->type) { |
---|
1605 | 1955 | case KBASE_MEM_TYPE_ALIAS: |
---|
1606 | 1956 | /* We mark the source allocs as unmapped from the GPU when |
---|
1607 | 1957 | * putting reg's allocs |
---|
1608 | 1958 | */ |
---|
1609 | 1959 | break; |
---|
1610 | 1960 | case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { |
---|
1611 | | - struct kbase_alloc_import_user_buf *user_buf = |
---|
1612 | | - ®->gpu_alloc->imported.user_buf; |
---|
| 1961 | + struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf; |
---|
1613 | 1962 | |
---|
1614 | | - if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { |
---|
1615 | | - user_buf->current_mapping_usage_count &= |
---|
1616 | | - ~PINNED_ON_IMPORT; |
---|
| 1963 | + if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { |
---|
| 1964 | + user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT; |
---|
1617 | 1965 | |
---|
1618 | | - /* The allocation could still have active mappings. */ |
---|
1619 | | - if (user_buf->current_mapping_usage_count == 0) { |
---|
1620 | | - kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, |
---|
1621 | | - (reg->flags & KBASE_REG_GPU_WR)); |
---|
1622 | | - } |
---|
| 1966 | + /* The allocation could still have active mappings. */ |
---|
| 1967 | + if (user_buf->current_mapping_usage_count == 0) { |
---|
| 1968 | + kbase_jd_user_buf_unmap(kctx, alloc, reg, |
---|
| 1969 | + (reg->flags & |
---|
| 1970 | + (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); |
---|
1623 | 1971 | } |
---|
1624 | 1972 | } |
---|
1625 | | - /* Fall-through */ |
---|
| 1973 | + } |
---|
| 1974 | + fallthrough; |
---|
1626 | 1975 | default: |
---|
1627 | 1976 | kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); |
---|
1628 | 1977 | break; |
---|
.. | .. |
---|
1741 | 2090 | BUG_ON(!cpu_page); |
---|
1742 | 2091 | BUG_ON(offset + size > PAGE_SIZE); |
---|
1743 | 2092 | |
---|
1744 | | - dma_addr = kbase_dma_addr(cpu_page) + offset; |
---|
| 2093 | + dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset; |
---|
| 2094 | + |
---|
1745 | 2095 | if (sync_fn == KBASE_SYNC_TO_CPU) |
---|
1746 | 2096 | dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, |
---|
1747 | 2097 | size, DMA_BIDIRECTIONAL); |
---|
.. | .. |
---|
1752 | 2102 | void *src = NULL; |
---|
1753 | 2103 | void *dst = NULL; |
---|
1754 | 2104 | struct page *gpu_page; |
---|
| 2105 | + dma_addr_t dma_addr; |
---|
1755 | 2106 | |
---|
1756 | 2107 | if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) |
---|
1757 | 2108 | return; |
---|
1758 | 2109 | |
---|
1759 | 2110 | gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); |
---|
| 2111 | + dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; |
---|
1760 | 2112 | |
---|
1761 | 2113 | if (sync_fn == KBASE_SYNC_TO_DEVICE) { |
---|
1762 | 2114 | src = ((unsigned char *)kmap(cpu_page)) + offset; |
---|
1763 | 2115 | dst = ((unsigned char *)kmap(gpu_page)) + offset; |
---|
1764 | 2116 | } else if (sync_fn == KBASE_SYNC_TO_CPU) { |
---|
1765 | | - dma_sync_single_for_cpu(kctx->kbdev->dev, |
---|
1766 | | - kbase_dma_addr(gpu_page) + offset, |
---|
1767 | | - size, DMA_BIDIRECTIONAL); |
---|
| 2117 | + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, |
---|
| 2118 | + DMA_BIDIRECTIONAL); |
---|
1768 | 2119 | src = ((unsigned char *)kmap(gpu_page)) + offset; |
---|
1769 | 2120 | dst = ((unsigned char *)kmap(cpu_page)) + offset; |
---|
1770 | 2121 | } |
---|
| 2122 | + |
---|
1771 | 2123 | memcpy(dst, src, size); |
---|
1772 | 2124 | kunmap(gpu_page); |
---|
1773 | 2125 | kunmap(cpu_page); |
---|
1774 | 2126 | if (sync_fn == KBASE_SYNC_TO_DEVICE) |
---|
1775 | | - dma_sync_single_for_device(kctx->kbdev->dev, |
---|
1776 | | - kbase_dma_addr(gpu_page) + offset, |
---|
1777 | | - size, DMA_BIDIRECTIONAL); |
---|
| 2127 | + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, |
---|
| 2128 | + DMA_BIDIRECTIONAL); |
---|
1778 | 2129 | } |
---|
1779 | 2130 | } |
---|
1780 | 2131 | |
---|
.. | .. |
---|
1920 | 2271 | __func__, (void *)reg, (void *)kctx); |
---|
1921 | 2272 | lockdep_assert_held(&kctx->reg_lock); |
---|
1922 | 2273 | |
---|
1923 | | - if (reg->flags & KBASE_REG_NO_USER_FREE) { |
---|
| 2274 | + if (kbase_va_region_is_no_user_free(reg)) { |
---|
1924 | 2275 | dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); |
---|
1925 | 2276 | return -EINVAL; |
---|
1926 | 2277 | } |
---|
1927 | 2278 | |
---|
1928 | | - /* |
---|
1929 | | - * Unlink the physical allocation before unmaking it evictable so |
---|
1930 | | - * that the allocation isn't grown back to its last backed size |
---|
1931 | | - * as we're going to unmap it anyway. |
---|
1932 | | - */ |
---|
1933 | | - reg->cpu_alloc->reg = NULL; |
---|
1934 | | - if (reg->cpu_alloc != reg->gpu_alloc) |
---|
1935 | | - reg->gpu_alloc->reg = NULL; |
---|
1936 | | - |
---|
1937 | | - /* |
---|
1938 | | - * If a region has been made evictable then we must unmake it |
---|
| 2279 | + /* If a region has been made evictable then we must unmake it |
---|
1939 | 2280 | * before trying to free it. |
---|
1940 | 2281 | * If the memory hasn't been reclaimed it will be unmapped and freed |
---|
1941 | 2282 | * below, if it has been reclaimed then the operations below are no-ops. |
---|
1942 | 2283 | */ |
---|
1943 | 2284 | if (reg->flags & KBASE_REG_DONT_NEED) { |
---|
1944 | | - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == |
---|
1945 | | - KBASE_MEM_TYPE_NATIVE); |
---|
| 2285 | + WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE); |
---|
| 2286 | + mutex_lock(&kctx->jit_evict_lock); |
---|
| 2287 | + /* Unlink the physical allocation before unmaking it evictable so |
---|
| 2288 | + * that the allocation isn't grown back to its last backed size |
---|
| 2289 | + * as we're going to unmap it anyway. |
---|
| 2290 | + */ |
---|
| 2291 | + reg->cpu_alloc->reg = NULL; |
---|
| 2292 | + if (reg->cpu_alloc != reg->gpu_alloc) |
---|
| 2293 | + reg->gpu_alloc->reg = NULL; |
---|
| 2294 | + mutex_unlock(&kctx->jit_evict_lock); |
---|
1946 | 2295 | kbase_mem_evictable_unmake(reg->gpu_alloc); |
---|
1947 | 2296 | } |
---|
1948 | 2297 | |
---|
.. | .. |
---|
1952 | 2301 | goto out; |
---|
1953 | 2302 | } |
---|
1954 | 2303 | |
---|
| 2304 | +#if MALI_USE_CSF |
---|
| 2305 | + if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) || |
---|
| 2306 | + ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { |
---|
| 2307 | + if (reg->flags & KBASE_REG_FIXED_ADDRESS) |
---|
| 2308 | + atomic64_dec(&kctx->num_fixed_allocs); |
---|
| 2309 | + else |
---|
| 2310 | + atomic64_dec(&kctx->num_fixable_allocs); |
---|
| 2311 | + } |
---|
| 2312 | +#endif |
---|
| 2313 | + |
---|
1955 | 2314 | /* This will also free the physical pages */ |
---|
1956 | 2315 | kbase_free_alloced_region(reg); |
---|
1957 | 2316 | |
---|
1958 | | - out: |
---|
| 2317 | +out: |
---|
1959 | 2318 | return err; |
---|
1960 | 2319 | } |
---|
1961 | 2320 | |
---|
1962 | 2321 | KBASE_EXPORT_TEST_API(kbase_mem_free_region); |
---|
1963 | 2322 | |
---|
1964 | 2323 | /** |
---|
1965 | | - * Free the region from the GPU and unregister it. |
---|
| 2324 | + * kbase_mem_free - Free the region from the GPU and unregister it. |
---|
| 2325 | + * |
---|
1966 | 2326 | * @kctx: KBase context |
---|
1967 | 2327 | * @gpu_addr: GPU address to free |
---|
1968 | 2328 | * |
---|
1969 | 2329 | * This function implements the free operation on a memory segment. |
---|
1970 | 2330 | * It will loudly fail if called with outstanding mappings. |
---|
| 2331 | + * |
---|
| 2332 | + * Return: 0 on success. |
---|
1971 | 2333 | */ |
---|
1972 | 2334 | int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) |
---|
1973 | 2335 | { |
---|
.. | .. |
---|
1979 | 2341 | __func__, gpu_addr, (void *)kctx); |
---|
1980 | 2342 | |
---|
1981 | 2343 | if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { |
---|
1982 | | - dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); |
---|
| 2344 | + dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__); |
---|
1983 | 2345 | return -EINVAL; |
---|
1984 | 2346 | } |
---|
1985 | 2347 | |
---|
1986 | 2348 | if (gpu_addr == 0) { |
---|
1987 | | - dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); |
---|
| 2349 | + dev_warn(kctx->kbdev->dev, |
---|
| 2350 | + "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n", |
---|
| 2351 | + __func__); |
---|
1988 | 2352 | return -EINVAL; |
---|
1989 | 2353 | } |
---|
1990 | 2354 | kbase_gpu_vm_lock(kctx); |
---|
.. | .. |
---|
2010 | 2374 | /* Validate the region */ |
---|
2011 | 2375 | reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); |
---|
2012 | 2376 | if (kbase_is_region_invalid_or_free(reg)) { |
---|
2013 | | - dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", |
---|
2014 | | - gpu_addr); |
---|
| 2377 | + dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX", |
---|
| 2378 | + __func__, gpu_addr); |
---|
2015 | 2379 | err = -EINVAL; |
---|
2016 | 2380 | goto out_unlock; |
---|
2017 | 2381 | } |
---|
.. | .. |
---|
2026 | 2390 | err = kbase_mem_free_region(kctx, reg); |
---|
2027 | 2391 | } |
---|
2028 | 2392 | |
---|
2029 | | - out_unlock: |
---|
| 2393 | +out_unlock: |
---|
2030 | 2394 | kbase_gpu_vm_unlock(kctx); |
---|
2031 | 2395 | return err; |
---|
2032 | 2396 | } |
---|
.. | .. |
---|
2126 | 2490 | if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) |
---|
2127 | 2491 | reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; |
---|
2128 | 2492 | |
---|
2129 | | - if (flags & BASEP_MEM_NO_USER_FREE) |
---|
2130 | | - reg->flags |= KBASE_REG_NO_USER_FREE; |
---|
| 2493 | + if (flags & BASEP_MEM_NO_USER_FREE) { |
---|
| 2494 | + kbase_gpu_vm_lock(kctx); |
---|
| 2495 | + kbase_va_region_no_user_free_inc(reg); |
---|
| 2496 | + kbase_gpu_vm_unlock(kctx); |
---|
| 2497 | + } |
---|
2131 | 2498 | |
---|
2132 | 2499 | if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) |
---|
2133 | 2500 | reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; |
---|
| 2501 | + |
---|
| 2502 | +#if MALI_USE_CSF |
---|
| 2503 | + if (flags & BASE_MEM_FIXED) |
---|
| 2504 | + reg->flags |= KBASE_REG_FIXED_ADDRESS; |
---|
| 2505 | +#endif |
---|
2134 | 2506 | |
---|
2135 | 2507 | return 0; |
---|
2136 | 2508 | } |
---|
.. | .. |
---|
2174 | 2546 | |
---|
2175 | 2547 | tp = alloc->pages + alloc->nents; |
---|
2176 | 2548 | |
---|
2177 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
2178 | 2549 | /* Check if we have enough pages requested so we can allocate a large |
---|
2179 | 2550 | * page (512 * 4KB = 2MB ) |
---|
2180 | 2551 | */ |
---|
2181 | | - if (nr_left >= (SZ_2M / SZ_4K)) { |
---|
| 2552 | + if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) { |
---|
2182 | 2553 | int nr_lp = nr_left / (SZ_2M / SZ_4K); |
---|
2183 | 2554 | |
---|
2184 | | - res = kbase_mem_pool_alloc_pages( |
---|
2185 | | - &kctx->mem_pools.large[alloc->group_id], |
---|
2186 | | - nr_lp * (SZ_2M / SZ_4K), |
---|
2187 | | - tp, |
---|
2188 | | - true); |
---|
| 2555 | + res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], |
---|
| 2556 | + nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task); |
---|
2189 | 2557 | |
---|
2190 | 2558 | if (res > 0) { |
---|
2191 | 2559 | nr_left -= res; |
---|
.. | .. |
---|
2239 | 2607 | |
---|
2240 | 2608 | err = kbase_mem_pool_grow( |
---|
2241 | 2609 | &kctx->mem_pools.large[alloc->group_id], |
---|
2242 | | - 1); |
---|
| 2610 | + 1, kctx->task); |
---|
2243 | 2611 | if (err) |
---|
2244 | 2612 | break; |
---|
2245 | 2613 | } while (1); |
---|
.. | .. |
---|
2280 | 2648 | } |
---|
2281 | 2649 | } |
---|
2282 | 2650 | } |
---|
2283 | | -no_new_partial: |
---|
2284 | | -#endif |
---|
2285 | 2651 | |
---|
| 2652 | +no_new_partial: |
---|
2286 | 2653 | if (nr_left) { |
---|
2287 | | - res = kbase_mem_pool_alloc_pages( |
---|
2288 | | - &kctx->mem_pools.small[alloc->group_id], |
---|
2289 | | - nr_left, tp, false); |
---|
| 2654 | + res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left, |
---|
| 2655 | + tp, false, kctx->task); |
---|
2290 | 2656 | if (res <= 0) |
---|
2291 | 2657 | goto alloc_failed; |
---|
2292 | 2658 | } |
---|
.. | .. |
---|
2345 | 2711 | |
---|
2346 | 2712 | lockdep_assert_held(&pool->pool_lock); |
---|
2347 | 2713 | |
---|
2348 | | -#if !defined(CONFIG_MALI_2MB_ALLOC) |
---|
2349 | | - WARN_ON(pool->order); |
---|
2350 | | -#endif |
---|
| 2714 | + kctx = alloc->imported.native.kctx; |
---|
| 2715 | + kbdev = kctx->kbdev; |
---|
| 2716 | + |
---|
| 2717 | + if (!kbdev->pagesize_2mb) |
---|
| 2718 | + WARN_ON(pool->order); |
---|
2351 | 2719 | |
---|
2352 | 2720 | if (alloc->reg) { |
---|
2353 | 2721 | if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) |
---|
2354 | 2722 | goto invalid_request; |
---|
2355 | 2723 | } |
---|
2356 | | - |
---|
2357 | | - kctx = alloc->imported.native.kctx; |
---|
2358 | | - kbdev = kctx->kbdev; |
---|
2359 | 2724 | |
---|
2360 | 2725 | lockdep_assert_held(&kctx->mem_partials_lock); |
---|
2361 | 2726 | |
---|
.. | .. |
---|
2375 | 2740 | tp = alloc->pages + alloc->nents; |
---|
2376 | 2741 | new_pages = tp; |
---|
2377 | 2742 | |
---|
2378 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
2379 | | - if (pool->order) { |
---|
| 2743 | + if (kbdev->pagesize_2mb && pool->order) { |
---|
2380 | 2744 | int nr_lp = nr_left / (SZ_2M / SZ_4K); |
---|
2381 | 2745 | |
---|
2382 | 2746 | res = kbase_mem_pool_alloc_pages_locked(pool, |
---|
.. | .. |
---|
2460 | 2824 | if (nr_left) |
---|
2461 | 2825 | goto alloc_failed; |
---|
2462 | 2826 | } else { |
---|
2463 | | -#endif |
---|
2464 | 2827 | res = kbase_mem_pool_alloc_pages_locked(pool, |
---|
2465 | 2828 | nr_left, |
---|
2466 | 2829 | tp); |
---|
2467 | 2830 | if (res <= 0) |
---|
2468 | 2831 | goto alloc_failed; |
---|
2469 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
2470 | 2832 | } |
---|
2471 | | -#endif |
---|
2472 | 2833 | |
---|
2473 | 2834 | KBASE_TLSTREAM_AUX_PAGESALLOC( |
---|
2474 | 2835 | kbdev, |
---|
.. | .. |
---|
2489 | 2850 | |
---|
2490 | 2851 | struct tagged_addr *start_free = alloc->pages + alloc->nents; |
---|
2491 | 2852 | |
---|
2492 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
2493 | | - if (pool->order) { |
---|
| 2853 | + if (kbdev->pagesize_2mb && pool->order) { |
---|
2494 | 2854 | while (nr_pages_to_free) { |
---|
2495 | 2855 | if (is_huge_head(*start_free)) { |
---|
2496 | 2856 | kbase_mem_pool_free_pages_locked( |
---|
.. | .. |
---|
2508 | 2868 | } |
---|
2509 | 2869 | } |
---|
2510 | 2870 | } else { |
---|
2511 | | -#endif |
---|
2512 | 2871 | kbase_mem_pool_free_pages_locked(pool, |
---|
2513 | 2872 | nr_pages_to_free, |
---|
2514 | 2873 | start_free, |
---|
2515 | 2874 | false, /* not dirty */ |
---|
2516 | 2875 | true); /* return to pool */ |
---|
2517 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
2518 | 2876 | } |
---|
2519 | | -#endif |
---|
2520 | 2877 | } |
---|
2521 | 2878 | |
---|
2522 | 2879 | kbase_process_page_usage_dec(kctx, nr_pages_requested); |
---|
.. | .. |
---|
2778 | 3135 | /** |
---|
2779 | 3136 | * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. |
---|
2780 | 3137 | * @alloc: The allocation for the imported user buffer. |
---|
| 3138 | + * |
---|
| 3139 | + * This must only be called when terminating an alloc, when its refcount |
---|
| 3140 | + * (number of users) has become 0. This also ensures it is only called once all |
---|
| 3141 | + * CPU mappings have been closed. |
---|
| 3142 | + * |
---|
| 3143 | + * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active |
---|
| 3144 | + * allocations |
---|
2781 | 3145 | */ |
---|
2782 | 3146 | static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); |
---|
2783 | 3147 | #endif |
---|
.. | .. |
---|
2908 | 3272 | out_term: |
---|
2909 | 3273 | return -1; |
---|
2910 | 3274 | } |
---|
2911 | | - |
---|
2912 | 3275 | KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); |
---|
| 3276 | + |
---|
| 3277 | +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, |
---|
| 3278 | + enum kbase_page_status status) |
---|
| 3279 | +{ |
---|
| 3280 | + u32 i = 0; |
---|
| 3281 | + |
---|
| 3282 | + for (; i < alloc->nents; i++) { |
---|
| 3283 | + struct tagged_addr phys = alloc->pages[i]; |
---|
| 3284 | + struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); |
---|
| 3285 | + |
---|
| 3286 | + /* Skip the 4KB page that is part of a large page, as the large page is |
---|
| 3287 | + * excluded from the migration process. |
---|
| 3288 | + */ |
---|
| 3289 | + if (is_huge(phys) || is_partial(phys)) |
---|
| 3290 | + continue; |
---|
| 3291 | + |
---|
| 3292 | + if (!page_md) |
---|
| 3293 | + continue; |
---|
| 3294 | + |
---|
| 3295 | + spin_lock(&page_md->migrate_lock); |
---|
| 3296 | + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); |
---|
| 3297 | + spin_unlock(&page_md->migrate_lock); |
---|
| 3298 | + } |
---|
| 3299 | +} |
---|
2913 | 3300 | |
---|
2914 | 3301 | bool kbase_check_alloc_flags(unsigned long flags) |
---|
2915 | 3302 | { |
---|
.. | .. |
---|
2983 | 3370 | (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) |
---|
2984 | 3371 | return false; |
---|
2985 | 3372 | |
---|
| 3373 | +#if MALI_USE_CSF |
---|
| 3374 | + if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED))) |
---|
| 3375 | + return false; |
---|
| 3376 | + |
---|
| 3377 | + if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED)) |
---|
| 3378 | + return false; |
---|
| 3379 | +#endif |
---|
| 3380 | + |
---|
2986 | 3381 | return true; |
---|
2987 | 3382 | } |
---|
2988 | 3383 | |
---|
.. | .. |
---|
3004 | 3399 | if (flags & BASE_MEM_GROW_ON_GPF) |
---|
3005 | 3400 | return false; |
---|
3006 | 3401 | |
---|
3007 | | -#if !MALI_USE_CSF |
---|
| 3402 | +#if MALI_USE_CSF |
---|
| 3403 | + /* Imported memory cannot be fixed */ |
---|
| 3404 | + if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE))) |
---|
| 3405 | + return false; |
---|
| 3406 | +#else |
---|
3008 | 3407 | /* Imported memory cannot be aligned to the end of its initial commit */ |
---|
3009 | 3408 | if (flags & BASE_MEM_TILER_ALIGN_TOP) |
---|
3010 | 3409 | return false; |
---|
.. | .. |
---|
3139 | 3538 | #undef KBASE_MSG_PRE |
---|
3140 | 3539 | } |
---|
3141 | 3540 | |
---|
3142 | | -/** |
---|
3143 | | - * Acquire the per-context region list lock |
---|
3144 | | - * @kctx: KBase context |
---|
3145 | | - */ |
---|
3146 | 3541 | void kbase_gpu_vm_lock(struct kbase_context *kctx) |
---|
3147 | 3542 | { |
---|
3148 | 3543 | KBASE_DEBUG_ASSERT(kctx != NULL); |
---|
.. | .. |
---|
3151 | 3546 | |
---|
3152 | 3547 | KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); |
---|
3153 | 3548 | |
---|
3154 | | -/** |
---|
3155 | | - * Release the per-context region list lock |
---|
3156 | | - * @kctx: KBase context |
---|
3157 | | - */ |
---|
3158 | 3549 | void kbase_gpu_vm_unlock(struct kbase_context *kctx) |
---|
3159 | 3550 | { |
---|
3160 | 3551 | KBASE_DEBUG_ASSERT(kctx != NULL); |
---|
.. | .. |
---|
3165 | 3556 | |
---|
3166 | 3557 | #if IS_ENABLED(CONFIG_DEBUG_FS) |
---|
3167 | 3558 | struct kbase_jit_debugfs_data { |
---|
3168 | | - int (*func)(struct kbase_jit_debugfs_data *); |
---|
| 3559 | + int (*func)(struct kbase_jit_debugfs_data *data); |
---|
3169 | 3560 | struct mutex lock; |
---|
3170 | 3561 | struct kbase_context *kctx; |
---|
3171 | 3562 | u64 active_value; |
---|
.. | .. |
---|
3388 | 3779 | void kbase_jit_debugfs_init(struct kbase_context *kctx) |
---|
3389 | 3780 | { |
---|
3390 | 3781 | /* prevent unprivileged use of debug file system |
---|
3391 | | - * in old kernel version |
---|
3392 | | - */ |
---|
3393 | | -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) |
---|
3394 | | - /* only for newer kernel version debug file system is safe */ |
---|
| 3782 | + * in old kernel version |
---|
| 3783 | + */ |
---|
3395 | 3784 | const mode_t mode = 0444; |
---|
3396 | | -#else |
---|
3397 | | - const mode_t mode = 0400; |
---|
3398 | | -#endif |
---|
3399 | 3785 | |
---|
3400 | 3786 | /* Caller already ensures this, but we keep the pattern for |
---|
3401 | 3787 | * maintenance safety. |
---|
.. | .. |
---|
3469 | 3855 | mutex_unlock(&kctx->jit_evict_lock); |
---|
3470 | 3856 | |
---|
3471 | 3857 | kbase_gpu_vm_lock(kctx); |
---|
3472 | | - reg->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
| 3858 | + |
---|
| 3859 | + /* |
---|
| 3860 | + * Incrementing the refcount is prevented on JIT regions. |
---|
| 3861 | + * If/when this ever changes we would need to compensate |
---|
| 3862 | + * by implementing "free on putting the last reference", |
---|
| 3863 | + * but only for JIT regions. |
---|
| 3864 | + */ |
---|
| 3865 | + WARN_ON(atomic_read(®->no_user_free_count) > 1); |
---|
| 3866 | + kbase_va_region_no_user_free_dec(reg); |
---|
3473 | 3867 | kbase_mem_free_region(kctx, reg); |
---|
3474 | 3868 | kbase_gpu_vm_unlock(kctx); |
---|
3475 | 3869 | } while (1); |
---|
.. | .. |
---|
3484 | 3878 | INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); |
---|
3485 | 3879 | |
---|
3486 | 3880 | #if MALI_USE_CSF |
---|
| 3881 | + mutex_init(&kctx->csf.kcpu_queues.jit_lock); |
---|
3487 | 3882 | INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); |
---|
3488 | 3883 | INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); |
---|
3489 | 3884 | #else /* !MALI_USE_CSF */ |
---|
.. | .. |
---|
3698 | 4093 | static int kbase_jit_grow(struct kbase_context *kctx, |
---|
3699 | 4094 | const struct base_jit_alloc_info *info, |
---|
3700 | 4095 | struct kbase_va_region *reg, |
---|
3701 | | - struct kbase_sub_alloc **prealloc_sas) |
---|
| 4096 | + struct kbase_sub_alloc **prealloc_sas, |
---|
| 4097 | + enum kbase_caller_mmu_sync_info mmu_sync_info) |
---|
3702 | 4098 | { |
---|
3703 | 4099 | size_t delta; |
---|
3704 | 4100 | size_t pages_required; |
---|
.. | .. |
---|
3728 | 4124 | delta = info->commit_pages - reg->gpu_alloc->nents; |
---|
3729 | 4125 | pages_required = delta; |
---|
3730 | 4126 | |
---|
3731 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
3732 | | - if (pages_required >= (SZ_2M / SZ_4K)) { |
---|
| 4127 | + if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) { |
---|
3733 | 4128 | pool = &kctx->mem_pools.large[kctx->jit_group_id]; |
---|
3734 | 4129 | /* Round up to number of 2 MB pages required */ |
---|
3735 | 4130 | pages_required += ((SZ_2M / SZ_4K) - 1); |
---|
3736 | 4131 | pages_required /= (SZ_2M / SZ_4K); |
---|
3737 | 4132 | } else { |
---|
3738 | | -#endif |
---|
3739 | 4133 | pool = &kctx->mem_pools.small[kctx->jit_group_id]; |
---|
3740 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
3741 | 4134 | } |
---|
3742 | | -#endif |
---|
3743 | 4135 | |
---|
3744 | 4136 | if (reg->cpu_alloc != reg->gpu_alloc) |
---|
3745 | 4137 | pages_required *= 2; |
---|
.. | .. |
---|
3760 | 4152 | spin_unlock(&kctx->mem_partials_lock); |
---|
3761 | 4153 | |
---|
3762 | 4154 | kbase_gpu_vm_unlock(kctx); |
---|
3763 | | - ret = kbase_mem_pool_grow(pool, pool_delta); |
---|
| 4155 | + ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); |
---|
3764 | 4156 | kbase_gpu_vm_lock(kctx); |
---|
3765 | 4157 | |
---|
3766 | 4158 | if (ret) |
---|
.. | .. |
---|
3795 | 4187 | spin_unlock(&kctx->mem_partials_lock); |
---|
3796 | 4188 | |
---|
3797 | 4189 | ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, |
---|
3798 | | - old_size); |
---|
| 4190 | + old_size, mmu_sync_info); |
---|
3799 | 4191 | /* |
---|
3800 | 4192 | * The grow failed so put the allocation back in the |
---|
3801 | 4193 | * pool and return failure. |
---|
.. | .. |
---|
3920 | 4312 | const struct base_jit_alloc_info *info, |
---|
3921 | 4313 | bool ignore_pressure_limit) |
---|
3922 | 4314 | { |
---|
3923 | | -#if MALI_USE_CSF |
---|
3924 | | - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); |
---|
3925 | | -#else |
---|
| 4315 | +#if !MALI_USE_CSF |
---|
3926 | 4316 | lockdep_assert_held(&kctx->jctx.lock); |
---|
3927 | | -#endif |
---|
| 4317 | +#else /* MALI_USE_CSF */ |
---|
| 4318 | + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); |
---|
| 4319 | +#endif /* !MALI_USE_CSF */ |
---|
3928 | 4320 | |
---|
3929 | 4321 | #if MALI_JIT_PRESSURE_LIMIT_BASE |
---|
3930 | 4322 | if (!ignore_pressure_limit && |
---|
.. | .. |
---|
4010 | 4402 | struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; |
---|
4011 | 4403 | int i; |
---|
4012 | 4404 | |
---|
4013 | | -#if MALI_USE_CSF |
---|
4014 | | - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); |
---|
4015 | | -#else |
---|
| 4405 | + /* Calls to this function are inherently synchronous, with respect to |
---|
| 4406 | + * MMU operations. |
---|
| 4407 | + */ |
---|
| 4408 | + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; |
---|
| 4409 | + |
---|
| 4410 | +#if !MALI_USE_CSF |
---|
4016 | 4411 | lockdep_assert_held(&kctx->jctx.lock); |
---|
4017 | | -#endif |
---|
| 4412 | +#else /* MALI_USE_CSF */ |
---|
| 4413 | + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); |
---|
| 4414 | +#endif /* !MALI_USE_CSF */ |
---|
4018 | 4415 | |
---|
4019 | 4416 | if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) |
---|
4020 | 4417 | return NULL; |
---|
4021 | 4418 | |
---|
4022 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
---|
4023 | | - /* Preallocate memory for the sub-allocation structs */ |
---|
4024 | | - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { |
---|
4025 | | - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); |
---|
4026 | | - if (!prealloc_sas[i]) |
---|
4027 | | - goto end; |
---|
| 4419 | + if (kctx->kbdev->pagesize_2mb) { |
---|
| 4420 | + /* Preallocate memory for the sub-allocation structs */ |
---|
| 4421 | + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { |
---|
| 4422 | + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); |
---|
| 4423 | + if (!prealloc_sas[i]) |
---|
| 4424 | + goto end; |
---|
| 4425 | + } |
---|
4028 | 4426 | } |
---|
4029 | | -#endif |
---|
4030 | 4427 | |
---|
4031 | 4428 | kbase_gpu_vm_lock(kctx); |
---|
4032 | 4429 | mutex_lock(&kctx->jit_evict_lock); |
---|
.. | .. |
---|
4102 | 4499 | * so any state protected by that lock might need to be |
---|
4103 | 4500 | * re-evaluated if more code is added here in future. |
---|
4104 | 4501 | */ |
---|
4105 | | - ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); |
---|
| 4502 | + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, |
---|
| 4503 | + mmu_sync_info); |
---|
4106 | 4504 | |
---|
4107 | 4505 | #if MALI_JIT_PRESSURE_LIMIT_BASE |
---|
4108 | 4506 | if (!ignore_pressure_limit) |
---|
.. | .. |
---|
4114 | 4512 | if (ret < 0) { |
---|
4115 | 4513 | /* |
---|
4116 | 4514 | * An update to an allocation from the pool failed, |
---|
4117 | | - * chances are slim a new allocation would fair any |
---|
| 4515 | + * chances are slim a new allocation would fare any |
---|
4118 | 4516 | * better so return the allocation to the pool and |
---|
4119 | 4517 | * return the function with failure. |
---|
4120 | 4518 | */ |
---|
.. | .. |
---|
4136 | 4534 | mutex_unlock(&kctx->jit_evict_lock); |
---|
4137 | 4535 | reg = NULL; |
---|
4138 | 4536 | goto end; |
---|
| 4537 | + } else { |
---|
| 4538 | + /* A suitable JIT allocation existed on the evict list, so we need |
---|
| 4539 | + * to make sure that the NOT_MOVABLE property is cleared. |
---|
| 4540 | + */ |
---|
| 4541 | + if (kbase_page_migration_enabled) { |
---|
| 4542 | + kbase_gpu_vm_lock(kctx); |
---|
| 4543 | + mutex_lock(&kctx->jit_evict_lock); |
---|
| 4544 | + kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); |
---|
| 4545 | + mutex_unlock(&kctx->jit_evict_lock); |
---|
| 4546 | + kbase_gpu_vm_unlock(kctx); |
---|
| 4547 | + } |
---|
4139 | 4548 | } |
---|
4140 | 4549 | } else { |
---|
4141 | 4550 | /* No suitable JIT allocation was found so create a new one */ |
---|
.. | .. |
---|
4150 | 4559 | flags |= BASE_MEM_TILER_ALIGN_TOP; |
---|
4151 | 4560 | #endif /* !MALI_USE_CSF */ |
---|
4152 | 4561 | |
---|
4153 | | - flags |= base_mem_group_id_set(kctx->jit_group_id); |
---|
| 4562 | + flags |= kbase_mem_group_id_set(kctx->jit_group_id); |
---|
4154 | 4563 | #if MALI_JIT_PRESSURE_LIMIT_BASE |
---|
4155 | 4564 | if (!ignore_pressure_limit) { |
---|
4156 | 4565 | flags |= BASEP_MEM_PERFORM_JIT_TRIM; |
---|
.. | .. |
---|
4165 | 4574 | mutex_unlock(&kctx->jit_evict_lock); |
---|
4166 | 4575 | kbase_gpu_vm_unlock(kctx); |
---|
4167 | 4576 | |
---|
4168 | | - reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, |
---|
4169 | | - info->extension, &flags, &gpu_addr); |
---|
| 4577 | + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, |
---|
| 4578 | + &flags, &gpu_addr, mmu_sync_info); |
---|
4170 | 4579 | if (!reg) { |
---|
4171 | 4580 | /* Most likely not enough GPU virtual space left for |
---|
4172 | 4581 | * the new JIT allocation. |
---|
.. | .. |
---|
4192 | 4601 | } |
---|
4193 | 4602 | } |
---|
4194 | 4603 | |
---|
| 4604 | + /* Similarly to tiler heap init, there is a short window of time |
---|
| 4605 | + * where the (either recycled or newly allocated, in our case) region has |
---|
| 4606 | + * "no user free" count incremented but is still missing the DONT_NEED flag, and |
---|
| 4607 | + * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the |
---|
| 4608 | + * allocation is the least bad option that doesn't lead to a security issue down the |
---|
| 4609 | + * line (it will eventually be cleaned up during context termination). |
---|
| 4610 | + * |
---|
| 4611 | + * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region |
---|
| 4612 | + * flags. |
---|
| 4613 | + */ |
---|
| 4614 | + kbase_gpu_vm_lock(kctx); |
---|
| 4615 | + if (unlikely(atomic_read(®->no_user_free_count) > 1)) { |
---|
| 4616 | + kbase_gpu_vm_unlock(kctx); |
---|
| 4617 | + dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n"); |
---|
| 4618 | + |
---|
| 4619 | + mutex_lock(&kctx->jit_evict_lock); |
---|
| 4620 | + list_move(®->jit_node, &kctx->jit_pool_head); |
---|
| 4621 | + mutex_unlock(&kctx->jit_evict_lock); |
---|
| 4622 | + |
---|
| 4623 | + reg = NULL; |
---|
| 4624 | + goto end; |
---|
| 4625 | + } |
---|
| 4626 | + |
---|
4195 | 4627 | trace_mali_jit_alloc(reg, info->id); |
---|
4196 | 4628 | |
---|
4197 | 4629 | kctx->jit_current_allocations++; |
---|
.. | .. |
---|
4209 | 4641 | kbase_jit_report_update_pressure(kctx, reg, info->va_pages, |
---|
4210 | 4642 | KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); |
---|
4211 | 4643 | #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ |
---|
| 4644 | + kbase_gpu_vm_unlock(kctx); |
---|
4212 | 4645 | |
---|
4213 | 4646 | end: |
---|
4214 | 4647 | for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) |
---|
.. | .. |
---|
4220 | 4653 | void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) |
---|
4221 | 4654 | { |
---|
4222 | 4655 | u64 old_pages; |
---|
| 4656 | + |
---|
| 4657 | +#if !MALI_USE_CSF |
---|
| 4658 | + lockdep_assert_held(&kctx->jctx.lock); |
---|
| 4659 | +#else /* MALI_USE_CSF */ |
---|
| 4660 | + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); |
---|
| 4661 | +#endif /* !MALI_USE_CSF */ |
---|
4223 | 4662 | |
---|
4224 | 4663 | /* JIT id not immediately available here, so use 0u */ |
---|
4225 | 4664 | trace_mali_jit_free(reg, 0u); |
---|
.. | .. |
---|
4273 | 4712 | |
---|
4274 | 4713 | list_move(®->jit_node, &kctx->jit_pool_head); |
---|
4275 | 4714 | |
---|
| 4715 | + /* Inactive JIT regions should be freed by the shrinker and not impacted |
---|
| 4716 | + * by page migration. Once freed, they will enter into the page migration |
---|
| 4717 | + * state machine via the mempools. |
---|
| 4718 | + */ |
---|
| 4719 | + if (kbase_page_migration_enabled) |
---|
| 4720 | + kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); |
---|
4276 | 4721 | mutex_unlock(&kctx->jit_evict_lock); |
---|
4277 | 4722 | } |
---|
4278 | 4723 | |
---|
.. | .. |
---|
4319 | 4764 | mutex_unlock(&kctx->jit_evict_lock); |
---|
4320 | 4765 | |
---|
4321 | 4766 | if (reg) { |
---|
4322 | | - reg->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
| 4767 | + /* |
---|
| 4768 | + * Incrementing the refcount is prevented on JIT regions. |
---|
| 4769 | + * If/when this ever changes we would need to compensate |
---|
| 4770 | + * by implementing "free on putting the last reference", |
---|
| 4771 | + * but only for JIT regions. |
---|
| 4772 | + */ |
---|
| 4773 | + WARN_ON(atomic_read(®->no_user_free_count) > 1); |
---|
| 4774 | + kbase_va_region_no_user_free_dec(reg); |
---|
4323 | 4775 | kbase_mem_free_region(kctx, reg); |
---|
4324 | 4776 | } |
---|
4325 | 4777 | |
---|
.. | .. |
---|
4341 | 4793 | list_del(&walker->jit_node); |
---|
4342 | 4794 | list_del_init(&walker->gpu_alloc->evict_node); |
---|
4343 | 4795 | mutex_unlock(&kctx->jit_evict_lock); |
---|
4344 | | - walker->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
| 4796 | + /* |
---|
| 4797 | + * Incrementing the refcount is prevented on JIT regions. |
---|
| 4798 | + * If/when this ever changes we would need to compensate |
---|
| 4799 | + * by implementing "free on putting the last reference", |
---|
| 4800 | + * but only for JIT regions. |
---|
| 4801 | + */ |
---|
| 4802 | + WARN_ON(atomic_read(&walker->no_user_free_count) > 1); |
---|
| 4803 | + kbase_va_region_no_user_free_dec(walker); |
---|
4345 | 4804 | kbase_mem_free_region(kctx, walker); |
---|
4346 | 4805 | mutex_lock(&kctx->jit_evict_lock); |
---|
4347 | 4806 | } |
---|
.. | .. |
---|
4353 | 4812 | list_del(&walker->jit_node); |
---|
4354 | 4813 | list_del_init(&walker->gpu_alloc->evict_node); |
---|
4355 | 4814 | mutex_unlock(&kctx->jit_evict_lock); |
---|
4356 | | - walker->flags &= ~KBASE_REG_NO_USER_FREE; |
---|
| 4815 | + /* |
---|
| 4816 | + * Incrementing the refcount is prevented on JIT regions. |
---|
| 4817 | + * If/when this ever changes we would need to compensate |
---|
| 4818 | + * by implementing "free on putting the last reference", |
---|
| 4819 | + * but only for JIT regions. |
---|
| 4820 | + */ |
---|
| 4821 | + WARN_ON(atomic_read(&walker->no_user_free_count) > 1); |
---|
| 4822 | + kbase_va_region_no_user_free_dec(walker); |
---|
4357 | 4823 | kbase_mem_free_region(kctx, walker); |
---|
4358 | 4824 | mutex_lock(&kctx->jit_evict_lock); |
---|
4359 | 4825 | } |
---|
.. | .. |
---|
4396 | 4862 | |
---|
4397 | 4863 | addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; |
---|
4398 | 4864 | |
---|
4399 | | - ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, |
---|
4400 | | - &mapping); |
---|
| 4865 | + ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, |
---|
| 4866 | + KBASE_REG_CPU_RD, &mapping); |
---|
4401 | 4867 | if (!ptr) { |
---|
4402 | 4868 | dev_warn(kctx->kbdev->dev, |
---|
4403 | 4869 | "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", |
---|
.. | .. |
---|
4455 | 4921 | } |
---|
4456 | 4922 | #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ |
---|
4457 | 4923 | |
---|
| 4924 | +void kbase_unpin_user_buf_page(struct page *page) |
---|
| 4925 | +{ |
---|
| 4926 | +#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE |
---|
| 4927 | + put_page(page); |
---|
| 4928 | +#else |
---|
| 4929 | + unpin_user_page(page); |
---|
| 4930 | +#endif |
---|
| 4931 | +} |
---|
| 4932 | + |
---|
4458 | 4933 | #if MALI_USE_CSF |
---|
4459 | 4934 | static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) |
---|
4460 | 4935 | { |
---|
4461 | | - if (alloc->nents) { |
---|
| 4936 | + /* In CSF builds, we keep pages pinned until the last reference is |
---|
| 4937 | + * released on the alloc. A refcount of 0 also means we can be sure |
---|
| 4938 | + * that all CPU mappings have been closed on this alloc, and no more |
---|
| 4939 | + * mappings of it will be created. |
---|
| 4940 | + * |
---|
| 4941 | + * Further, the WARN() below captures the restriction that this |
---|
| 4942 | + * function will not handle anything other than the alloc termination |
---|
| 4943 | + * path, because the caller of kbase_mem_phy_alloc_put() is not |
---|
| 4944 | + * required to hold the kctx's reg_lock, and so we could not handle |
---|
| 4945 | + * removing an existing CPU mapping here. |
---|
| 4946 | + * |
---|
| 4947 | + * Refer to this function's kernel-doc comments for alternatives for |
---|
| 4948 | + * unpinning a User buffer. |
---|
| 4949 | + */ |
---|
| 4950 | + |
---|
| 4951 | + if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, |
---|
| 4952 | + "must only be called on terminating an allocation")) { |
---|
4462 | 4953 | struct page **pages = alloc->imported.user_buf.pages; |
---|
4463 | 4954 | long i; |
---|
4464 | 4955 | |
---|
4465 | 4956 | WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages); |
---|
4466 | 4957 | |
---|
4467 | 4958 | for (i = 0; i < alloc->nents; i++) |
---|
4468 | | - put_page(pages[i]); |
---|
| 4959 | + kbase_unpin_user_buf_page(pages[i]); |
---|
| 4960 | + |
---|
| 4961 | + alloc->nents = 0; |
---|
4469 | 4962 | } |
---|
4470 | 4963 | } |
---|
4471 | 4964 | #endif |
---|
.. | .. |
---|
4479 | 4972 | struct mm_struct *mm = alloc->imported.user_buf.mm; |
---|
4480 | 4973 | long pinned_pages; |
---|
4481 | 4974 | long i; |
---|
| 4975 | + int write; |
---|
| 4976 | + |
---|
| 4977 | + lockdep_assert_held(&kctx->reg_lock); |
---|
4482 | 4978 | |
---|
4483 | 4979 | if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) |
---|
4484 | 4980 | return -EINVAL; |
---|
.. | .. |
---|
4493 | 4989 | if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) |
---|
4494 | 4990 | return -EINVAL; |
---|
4495 | 4991 | |
---|
4496 | | -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE |
---|
4497 | | - pinned_pages = get_user_pages(NULL, mm, |
---|
4498 | | - address, |
---|
4499 | | - alloc->imported.user_buf.nr_pages, |
---|
4500 | | -#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ |
---|
4501 | | -KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE |
---|
4502 | | - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, |
---|
4503 | | - pages, NULL); |
---|
| 4992 | + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); |
---|
| 4993 | + |
---|
| 4994 | +#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE |
---|
| 4995 | + pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, |
---|
| 4996 | + write ? FOLL_WRITE : 0, pages, NULL); |
---|
| 4997 | +#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE |
---|
| 4998 | + pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, |
---|
| 4999 | + write ? FOLL_WRITE : 0, pages, NULL, NULL); |
---|
4504 | 5000 | #else |
---|
4505 | | - reg->flags & KBASE_REG_GPU_WR, |
---|
4506 | | - 0, pages, NULL); |
---|
4507 | | -#endif |
---|
4508 | | -#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE |
---|
4509 | | - pinned_pages = get_user_pages_remote(NULL, mm, |
---|
4510 | | - address, |
---|
4511 | | - alloc->imported.user_buf.nr_pages, |
---|
4512 | | - reg->flags & KBASE_REG_GPU_WR, |
---|
4513 | | - 0, pages, NULL); |
---|
4514 | | -#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE |
---|
4515 | | - pinned_pages = get_user_pages_remote(NULL, mm, |
---|
4516 | | - address, |
---|
4517 | | - alloc->imported.user_buf.nr_pages, |
---|
4518 | | - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, |
---|
4519 | | - pages, NULL); |
---|
4520 | | -#else |
---|
4521 | | - pinned_pages = get_user_pages_remote(NULL, mm, |
---|
4522 | | - address, |
---|
4523 | | - alloc->imported.user_buf.nr_pages, |
---|
4524 | | - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, |
---|
4525 | | - pages, NULL, NULL); |
---|
| 5001 | + pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages, |
---|
| 5002 | + write ? FOLL_WRITE : 0, pages, NULL, NULL); |
---|
4526 | 5003 | #endif |
---|
4527 | 5004 | |
---|
4528 | 5005 | if (pinned_pages <= 0) |
---|
4529 | 5006 | return pinned_pages; |
---|
4530 | 5007 | |
---|
4531 | 5008 | if (pinned_pages != alloc->imported.user_buf.nr_pages) { |
---|
| 5009 | + /* Above code already ensures there will not have been a CPU |
---|
| 5010 | + * mapping by ensuring alloc->nents is 0 |
---|
| 5011 | + */ |
---|
4532 | 5012 | for (i = 0; i < pinned_pages; i++) |
---|
4533 | | - put_page(pages[i]); |
---|
| 5013 | + kbase_unpin_user_buf_page(pages[i]); |
---|
4534 | 5014 | return -ENOMEM; |
---|
4535 | 5015 | } |
---|
4536 | 5016 | |
---|
.. | .. |
---|
4542 | 5022 | static int kbase_jd_user_buf_map(struct kbase_context *kctx, |
---|
4543 | 5023 | struct kbase_va_region *reg) |
---|
4544 | 5024 | { |
---|
4545 | | - long pinned_pages; |
---|
| 5025 | + int err; |
---|
| 5026 | + long pinned_pages = 0; |
---|
4546 | 5027 | struct kbase_mem_phy_alloc *alloc; |
---|
4547 | 5028 | struct page **pages; |
---|
4548 | 5029 | struct tagged_addr *pa; |
---|
4549 | | - long i; |
---|
4550 | | - unsigned long address; |
---|
| 5030 | + long i, dma_mapped_pages; |
---|
4551 | 5031 | struct device *dev; |
---|
4552 | | - unsigned long offset; |
---|
4553 | | - unsigned long local_size; |
---|
4554 | 5032 | unsigned long gwt_mask = ~0; |
---|
4555 | | - int err = kbase_jd_user_buf_pin_pages(kctx, reg); |
---|
| 5033 | + /* Calls to this function are inherently asynchronous, with respect to |
---|
| 5034 | + * MMU operations. |
---|
| 5035 | + */ |
---|
| 5036 | + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; |
---|
| 5037 | + |
---|
| 5038 | + lockdep_assert_held(&kctx->reg_lock); |
---|
| 5039 | + |
---|
| 5040 | + err = kbase_jd_user_buf_pin_pages(kctx, reg); |
---|
4556 | 5041 | |
---|
4557 | 5042 | if (err) |
---|
4558 | 5043 | return err; |
---|
4559 | 5044 | |
---|
4560 | 5045 | alloc = reg->gpu_alloc; |
---|
4561 | 5046 | pa = kbase_get_gpu_phy_pages(reg); |
---|
4562 | | - address = alloc->imported.user_buf.address; |
---|
4563 | 5047 | pinned_pages = alloc->nents; |
---|
4564 | 5048 | pages = alloc->imported.user_buf.pages; |
---|
4565 | 5049 | dev = kctx->kbdev->dev; |
---|
4566 | | - offset = address & ~PAGE_MASK; |
---|
4567 | | - local_size = alloc->imported.user_buf.size; |
---|
4568 | 5050 | |
---|
| 5051 | + /* Manual CPU cache synchronization. |
---|
| 5052 | + * |
---|
| 5053 | + * The driver disables automatic CPU cache synchronization because the |
---|
| 5054 | + * memory pages that enclose the imported region may also contain |
---|
| 5055 | + * sub-regions which are not imported and that are allocated and used |
---|
| 5056 | + * by the user process. This may be the case of memory at the beginning |
---|
| 5057 | + * of the first page and at the end of the last page. Automatic CPU cache |
---|
| 5058 | + * synchronization would force some operations on those memory allocations, |
---|
| 5059 | + * unbeknown to the user process: in particular, a CPU cache invalidate |
---|
| 5060 | + * upon unmapping would destroy the content of dirty CPU caches and cause |
---|
| 5061 | + * the user process to lose CPU writes to the non-imported sub-regions. |
---|
| 5062 | + * |
---|
| 5063 | + * When the GPU claims ownership of the imported memory buffer, it shall |
---|
| 5064 | + * commit CPU writes for the whole of all pages that enclose the imported |
---|
| 5065 | + * region, otherwise the initial content of memory would be wrong. |
---|
| 5066 | + */ |
---|
4569 | 5067 | for (i = 0; i < pinned_pages; i++) { |
---|
4570 | 5068 | dma_addr_t dma_addr; |
---|
4571 | | - unsigned long min; |
---|
4572 | | - |
---|
4573 | | - min = MIN(PAGE_SIZE - offset, local_size); |
---|
4574 | | - dma_addr = dma_map_page(dev, pages[i], |
---|
4575 | | - offset, min, |
---|
4576 | | - DMA_BIDIRECTIONAL); |
---|
4577 | | - if (dma_mapping_error(dev, dma_addr)) |
---|
| 5069 | +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) |
---|
| 5070 | + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); |
---|
| 5071 | +#else |
---|
| 5072 | + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, |
---|
| 5073 | + DMA_ATTR_SKIP_CPU_SYNC); |
---|
| 5074 | +#endif |
---|
| 5075 | + err = dma_mapping_error(dev, dma_addr); |
---|
| 5076 | + if (err) |
---|
4578 | 5077 | goto unwind; |
---|
4579 | 5078 | |
---|
4580 | 5079 | alloc->imported.user_buf.dma_addrs[i] = dma_addr; |
---|
4581 | 5080 | pa[i] = as_tagged(page_to_phys(pages[i])); |
---|
4582 | 5081 | |
---|
4583 | | - local_size -= min; |
---|
4584 | | - offset = 0; |
---|
| 5082 | + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
---|
4585 | 5083 | } |
---|
4586 | 5084 | |
---|
4587 | 5085 | #ifdef CONFIG_MALI_CINSTR_GWT |
---|
.. | .. |
---|
4589 | 5087 | gwt_mask = ~KBASE_REG_GPU_WR; |
---|
4590 | 5088 | #endif |
---|
4591 | 5089 | |
---|
4592 | | - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
---|
4593 | | - pa, kbase_reg_current_backed_size(reg), |
---|
4594 | | - reg->flags & gwt_mask, kctx->as_nr, |
---|
4595 | | - alloc->group_id); |
---|
| 5090 | + err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, |
---|
| 5091 | + kbase_reg_current_backed_size(reg), |
---|
| 5092 | + reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, |
---|
| 5093 | + mmu_sync_info, NULL); |
---|
4596 | 5094 | if (err == 0) |
---|
4597 | 5095 | return 0; |
---|
4598 | 5096 | |
---|
4599 | 5097 | /* fall down */ |
---|
4600 | 5098 | unwind: |
---|
4601 | 5099 | alloc->nents = 0; |
---|
4602 | | - while (i--) { |
---|
4603 | | - dma_unmap_page(kctx->kbdev->dev, |
---|
4604 | | - alloc->imported.user_buf.dma_addrs[i], |
---|
4605 | | - PAGE_SIZE, DMA_BIDIRECTIONAL); |
---|
| 5100 | + dma_mapped_pages = i; |
---|
| 5101 | + /* Run the unmap loop in the same order as map loop, and perform again |
---|
| 5102 | + * CPU cache synchronization to re-write the content of dirty CPU caches |
---|
| 5103 | + * to memory. This is precautionary measure in case a GPU job has taken |
---|
| 5104 | + * advantage of a partially GPU-mapped range to write and corrupt the |
---|
| 5105 | + * content of memory, either inside or outside the imported region. |
---|
| 5106 | + * |
---|
| 5107 | + * Notice that this error recovery path doesn't try to be optimal and just |
---|
| 5108 | + * flushes the entire page range. |
---|
| 5109 | + */ |
---|
| 5110 | + for (i = 0; i < dma_mapped_pages; i++) { |
---|
| 5111 | + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; |
---|
| 5112 | + |
---|
| 5113 | + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
---|
| 5114 | +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) |
---|
| 5115 | + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
---|
| 5116 | +#else |
---|
| 5117 | + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, |
---|
| 5118 | + DMA_ATTR_SKIP_CPU_SYNC); |
---|
| 5119 | +#endif |
---|
4606 | 5120 | } |
---|
4607 | 5121 | |
---|
4608 | | - while (++i < pinned_pages) { |
---|
4609 | | - put_page(pages[i]); |
---|
| 5122 | + /* The user buffer could already have been previously pinned before |
---|
| 5123 | + * entering this function, and hence there could potentially be CPU |
---|
| 5124 | + * mappings of it |
---|
| 5125 | + */ |
---|
| 5126 | + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); |
---|
| 5127 | + |
---|
| 5128 | + for (i = 0; i < pinned_pages; i++) { |
---|
| 5129 | + kbase_unpin_user_buf_page(pages[i]); |
---|
4610 | 5130 | pages[i] = NULL; |
---|
4611 | 5131 | } |
---|
4612 | 5132 | |
---|
.. | .. |
---|
4617 | 5137 | * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT |
---|
4618 | 5138 | * have a corresponding call to kbase_jd_user_buf_unpin_pages(). |
---|
4619 | 5139 | */ |
---|
4620 | | -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, |
---|
4621 | | - struct kbase_mem_phy_alloc *alloc, bool writeable) |
---|
| 5140 | +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, |
---|
| 5141 | + struct kbase_va_region *reg, bool writeable) |
---|
4622 | 5142 | { |
---|
4623 | 5143 | long i; |
---|
4624 | 5144 | struct page **pages; |
---|
4625 | | - unsigned long size = alloc->imported.user_buf.size; |
---|
| 5145 | + unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; |
---|
| 5146 | + unsigned long remaining_size = alloc->imported.user_buf.size; |
---|
| 5147 | + |
---|
| 5148 | + lockdep_assert_held(&kctx->reg_lock); |
---|
4626 | 5149 | |
---|
4627 | 5150 | KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); |
---|
4628 | 5151 | pages = alloc->imported.user_buf.pages; |
---|
| 5152 | + |
---|
| 5153 | +#if !MALI_USE_CSF |
---|
| 5154 | + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); |
---|
| 5155 | +#else |
---|
| 5156 | + CSTD_UNUSED(reg); |
---|
| 5157 | +#endif |
---|
| 5158 | + |
---|
4629 | 5159 | for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { |
---|
4630 | | - unsigned long local_size; |
---|
| 5160 | + unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); |
---|
| 5161 | + /* Notice: this is a temporary variable that is used for DMA sync |
---|
| 5162 | + * operations, and that could be incremented by an offset if the |
---|
| 5163 | + * current page contains both imported and non-imported memory |
---|
| 5164 | + * sub-regions. |
---|
| 5165 | + * |
---|
| 5166 | + * It is valid to add an offset to this value, because the offset |
---|
| 5167 | + * is always kept within the physically contiguous dma-mapped range |
---|
| 5168 | + * and there's no need to translate to physical address to offset it. |
---|
| 5169 | + * |
---|
| 5170 | + * This variable is not going to be used for the actual DMA unmap |
---|
| 5171 | + * operation, that shall always use the original DMA address of the |
---|
| 5172 | + * whole memory page. |
---|
| 5173 | + */ |
---|
4631 | 5174 | dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; |
---|
4632 | 5175 | |
---|
4633 | | - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); |
---|
4634 | | - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, |
---|
4635 | | - DMA_BIDIRECTIONAL); |
---|
| 5176 | + /* Manual CPU cache synchronization. |
---|
| 5177 | + * |
---|
| 5178 | + * When the GPU returns ownership of the buffer to the CPU, the driver |
---|
| 5179 | + * needs to treat imported and non-imported memory differently. |
---|
| 5180 | + * |
---|
| 5181 | + * The first case to consider is non-imported sub-regions at the |
---|
| 5182 | + * beginning of the first page and at the end of last page. For these |
---|
| 5183 | + * sub-regions: CPU cache shall be committed with a clean+invalidate, |
---|
| 5184 | + * in order to keep the last CPU write. |
---|
| 5185 | + * |
---|
| 5186 | + * Imported region prefers the opposite treatment: this memory has been |
---|
| 5187 | + * legitimately mapped and used by the GPU, hence GPU writes shall be |
---|
| 5188 | + * committed to memory, while CPU cache shall be invalidated to make |
---|
| 5189 | + * sure that CPU reads the correct memory content. |
---|
| 5190 | + * |
---|
| 5191 | + * The following diagram shows the expect value of the variables |
---|
| 5192 | + * used in this loop in the corner case of an imported region encloed |
---|
| 5193 | + * by a single memory page: |
---|
| 5194 | + * |
---|
| 5195 | + * page boundary ->|---------- | <- dma_addr (initial value) |
---|
| 5196 | + * | | |
---|
| 5197 | + * | - - - - - | <- offset_within_page |
---|
| 5198 | + * |XXXXXXXXXXX|\ |
---|
| 5199 | + * |XXXXXXXXXXX| \ |
---|
| 5200 | + * |XXXXXXXXXXX| }- imported_size |
---|
| 5201 | + * |XXXXXXXXXXX| / |
---|
| 5202 | + * |XXXXXXXXXXX|/ |
---|
| 5203 | + * | - - - - - | <- offset_within_page + imported_size |
---|
| 5204 | + * | |\ |
---|
| 5205 | + * | | }- PAGE_SIZE - imported_size - offset_within_page |
---|
| 5206 | + * | |/ |
---|
| 5207 | + * page boundary ->|-----------| |
---|
| 5208 | + * |
---|
| 5209 | + * If the imported region is enclosed by more than one page, then |
---|
| 5210 | + * offset_within_page = 0 for any page after the first. |
---|
| 5211 | + */ |
---|
| 5212 | + |
---|
| 5213 | + /* Only for first page: handle non-imported range at the beginning. */ |
---|
| 5214 | + if (offset_within_page > 0) { |
---|
| 5215 | + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, |
---|
| 5216 | + DMA_BIDIRECTIONAL); |
---|
| 5217 | + dma_addr += offset_within_page; |
---|
| 5218 | + } |
---|
| 5219 | + |
---|
| 5220 | + /* For every page: handle imported range. */ |
---|
| 5221 | + if (imported_size > 0) |
---|
| 5222 | + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, |
---|
| 5223 | + DMA_BIDIRECTIONAL); |
---|
| 5224 | + |
---|
| 5225 | + /* Only for last page (that may coincide with first page): |
---|
| 5226 | + * handle non-imported range at the end. |
---|
| 5227 | + */ |
---|
| 5228 | + if ((imported_size + offset_within_page) < PAGE_SIZE) { |
---|
| 5229 | + dma_addr += imported_size; |
---|
| 5230 | + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, |
---|
| 5231 | + PAGE_SIZE - imported_size - offset_within_page, |
---|
| 5232 | + DMA_BIDIRECTIONAL); |
---|
| 5233 | + } |
---|
| 5234 | + |
---|
| 5235 | + /* Notice: use the original DMA address to unmap the whole memory page. */ |
---|
| 5236 | +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) |
---|
| 5237 | + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, |
---|
| 5238 | + DMA_BIDIRECTIONAL); |
---|
| 5239 | +#else |
---|
| 5240 | + dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], |
---|
| 5241 | + PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); |
---|
| 5242 | +#endif |
---|
4636 | 5243 | if (writeable) |
---|
4637 | 5244 | set_page_dirty_lock(pages[i]); |
---|
4638 | 5245 | #if !MALI_USE_CSF |
---|
4639 | | - put_page(pages[i]); |
---|
| 5246 | + kbase_unpin_user_buf_page(pages[i]); |
---|
4640 | 5247 | pages[i] = NULL; |
---|
4641 | 5248 | #endif |
---|
4642 | 5249 | |
---|
4643 | | - size -= local_size; |
---|
| 5250 | + remaining_size -= imported_size; |
---|
| 5251 | + offset_within_page = 0; |
---|
4644 | 5252 | } |
---|
4645 | 5253 | #if !MALI_USE_CSF |
---|
4646 | 5254 | alloc->nents = 0; |
---|
.. | .. |
---|
4687 | 5295 | return 0; |
---|
4688 | 5296 | } |
---|
4689 | 5297 | |
---|
4690 | | -struct kbase_mem_phy_alloc *kbase_map_external_resource( |
---|
4691 | | - struct kbase_context *kctx, struct kbase_va_region *reg, |
---|
4692 | | - struct mm_struct *locked_mm) |
---|
| 5298 | +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, |
---|
| 5299 | + struct mm_struct *locked_mm) |
---|
4693 | 5300 | { |
---|
4694 | | - int err; |
---|
| 5301 | + int err = 0; |
---|
| 5302 | + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; |
---|
4695 | 5303 | |
---|
4696 | 5304 | lockdep_assert_held(&kctx->reg_lock); |
---|
4697 | 5305 | |
---|
.. | .. |
---|
4700 | 5308 | case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { |
---|
4701 | 5309 | if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && |
---|
4702 | 5310 | (!reg->gpu_alloc->nents)) |
---|
4703 | | - goto exit; |
---|
| 5311 | + return -EINVAL; |
---|
4704 | 5312 | |
---|
4705 | 5313 | reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; |
---|
4706 | 5314 | if (reg->gpu_alloc->imported.user_buf |
---|
.. | .. |
---|
4708 | 5316 | err = kbase_jd_user_buf_map(kctx, reg); |
---|
4709 | 5317 | if (err) { |
---|
4710 | 5318 | reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; |
---|
4711 | | - goto exit; |
---|
| 5319 | + return err; |
---|
4712 | 5320 | } |
---|
4713 | 5321 | } |
---|
4714 | 5322 | } |
---|
.. | .. |
---|
4716 | 5324 | case KBASE_MEM_TYPE_IMPORTED_UMM: { |
---|
4717 | 5325 | err = kbase_mem_umm_map(kctx, reg); |
---|
4718 | 5326 | if (err) |
---|
4719 | | - goto exit; |
---|
| 5327 | + return err; |
---|
4720 | 5328 | break; |
---|
4721 | 5329 | } |
---|
4722 | 5330 | default: |
---|
4723 | | - goto exit; |
---|
| 5331 | + dev_dbg(kctx->kbdev->dev, |
---|
| 5332 | + "Invalid external resource GPU allocation type (%x) on mapping", |
---|
| 5333 | + alloc->type); |
---|
| 5334 | + return -EINVAL; |
---|
4724 | 5335 | } |
---|
4725 | 5336 | |
---|
4726 | | - return kbase_mem_phy_alloc_get(reg->gpu_alloc); |
---|
4727 | | -exit: |
---|
4728 | | - return NULL; |
---|
| 5337 | + kbase_va_region_alloc_get(kctx, reg); |
---|
| 5338 | + kbase_mem_phy_alloc_get(alloc); |
---|
| 5339 | + return err; |
---|
4729 | 5340 | } |
---|
4730 | 5341 | |
---|
4731 | | -void kbase_unmap_external_resource(struct kbase_context *kctx, |
---|
4732 | | - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) |
---|
| 5342 | +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) |
---|
4733 | 5343 | { |
---|
| 5344 | + /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the |
---|
| 5345 | + * unmapping operation. |
---|
| 5346 | + */ |
---|
| 5347 | + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; |
---|
| 5348 | + |
---|
| 5349 | + lockdep_assert_held(&kctx->reg_lock); |
---|
| 5350 | + |
---|
4734 | 5351 | switch (alloc->type) { |
---|
4735 | 5352 | case KBASE_MEM_TYPE_IMPORTED_UMM: { |
---|
4736 | 5353 | kbase_mem_umm_unmap(kctx, reg, alloc); |
---|
.. | .. |
---|
4742 | 5359 | if (alloc->imported.user_buf.current_mapping_usage_count == 0) { |
---|
4743 | 5360 | bool writeable = true; |
---|
4744 | 5361 | |
---|
4745 | | - if (!kbase_is_region_invalid_or_free(reg) && |
---|
4746 | | - reg->gpu_alloc == alloc) |
---|
4747 | | - kbase_mmu_teardown_pages( |
---|
4748 | | - kctx->kbdev, |
---|
4749 | | - &kctx->mmu, |
---|
4750 | | - reg->start_pfn, |
---|
4751 | | - kbase_reg_current_backed_size(reg), |
---|
4752 | | - kctx->as_nr); |
---|
| 5362 | + if (!kbase_is_region_invalid_or_free(reg)) { |
---|
| 5363 | + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
---|
| 5364 | + alloc->pages, |
---|
| 5365 | + kbase_reg_current_backed_size(reg), |
---|
| 5366 | + kbase_reg_current_backed_size(reg), |
---|
| 5367 | + kctx->as_nr, true); |
---|
| 5368 | + } |
---|
4753 | 5369 | |
---|
4754 | | - if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) |
---|
| 5370 | + if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) |
---|
4755 | 5371 | writeable = false; |
---|
4756 | 5372 | |
---|
4757 | | - kbase_jd_user_buf_unmap(kctx, alloc, writeable); |
---|
| 5373 | + kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); |
---|
4758 | 5374 | } |
---|
4759 | | - } |
---|
| 5375 | + } |
---|
4760 | 5376 | break; |
---|
4761 | 5377 | default: |
---|
4762 | | - break; |
---|
| 5378 | + WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", |
---|
| 5379 | + alloc->type); |
---|
| 5380 | + return; |
---|
4763 | 5381 | } |
---|
4764 | 5382 | kbase_mem_phy_alloc_put(alloc); |
---|
| 5383 | + kbase_va_region_alloc_put(kctx, reg); |
---|
| 5384 | +} |
---|
| 5385 | + |
---|
| 5386 | +static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) |
---|
| 5387 | +{ |
---|
| 5388 | + return reg->start_pfn << PAGE_SHIFT; |
---|
4765 | 5389 | } |
---|
4766 | 5390 | |
---|
4767 | 5391 | struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( |
---|
.. | .. |
---|
4777 | 5401 | * metadata which matches the region which is being acquired. |
---|
4778 | 5402 | */ |
---|
4779 | 5403 | list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { |
---|
4780 | | - if (walker->gpu_addr == gpu_addr) { |
---|
| 5404 | + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { |
---|
4781 | 5405 | meta = walker; |
---|
4782 | 5406 | meta->ref++; |
---|
4783 | 5407 | break; |
---|
.. | .. |
---|
4789 | 5413 | struct kbase_va_region *reg; |
---|
4790 | 5414 | |
---|
4791 | 5415 | /* Find the region */ |
---|
4792 | | - reg = kbase_region_tracker_find_region_enclosing_address( |
---|
4793 | | - kctx, gpu_addr); |
---|
| 5416 | + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); |
---|
4794 | 5417 | if (kbase_is_region_invalid_or_free(reg)) |
---|
4795 | 5418 | goto failed; |
---|
4796 | 5419 | |
---|
.. | .. |
---|
4798 | 5421 | meta = kzalloc(sizeof(*meta), GFP_KERNEL); |
---|
4799 | 5422 | if (!meta) |
---|
4800 | 5423 | goto failed; |
---|
4801 | | - |
---|
4802 | 5424 | /* |
---|
4803 | 5425 | * Fill in the metadata object and acquire a reference |
---|
4804 | 5426 | * for the physical resource. |
---|
4805 | 5427 | */ |
---|
4806 | | - meta->alloc = kbase_map_external_resource(kctx, reg, NULL); |
---|
4807 | | - meta->ref = 1; |
---|
| 5428 | + meta->reg = reg; |
---|
4808 | 5429 | |
---|
4809 | | - if (!meta->alloc) |
---|
| 5430 | + /* Map the external resource to the GPU allocation of the region |
---|
| 5431 | + * and acquire the reference to the VA region |
---|
| 5432 | + */ |
---|
| 5433 | + if (kbase_map_external_resource(kctx, meta->reg, NULL)) |
---|
4810 | 5434 | goto fail_map; |
---|
4811 | | - |
---|
4812 | | - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; |
---|
| 5435 | + meta->ref = 1; |
---|
4813 | 5436 | |
---|
4814 | 5437 | list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); |
---|
4815 | 5438 | } |
---|
.. | .. |
---|
4834 | 5457 | * metadata which matches the region which is being released. |
---|
4835 | 5458 | */ |
---|
4836 | 5459 | list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) |
---|
4837 | | - if (walker->gpu_addr == gpu_addr) |
---|
| 5460 | + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) |
---|
4838 | 5461 | return walker; |
---|
4839 | 5462 | |
---|
4840 | 5463 | return NULL; |
---|
.. | .. |
---|
4843 | 5466 | static void release_sticky_resource_meta(struct kbase_context *kctx, |
---|
4844 | 5467 | struct kbase_ctx_ext_res_meta *meta) |
---|
4845 | 5468 | { |
---|
4846 | | - struct kbase_va_region *reg; |
---|
4847 | | - |
---|
4848 | | - /* Drop the physical memory reference and free the metadata. */ |
---|
4849 | | - reg = kbase_region_tracker_find_region_enclosing_address( |
---|
4850 | | - kctx, |
---|
4851 | | - meta->gpu_addr); |
---|
4852 | | - |
---|
4853 | | - kbase_unmap_external_resource(kctx, reg, meta->alloc); |
---|
| 5469 | + kbase_unmap_external_resource(kctx, meta->reg); |
---|
4854 | 5470 | list_del(&meta->ext_res_node); |
---|
4855 | 5471 | kfree(meta); |
---|
4856 | 5472 | } |
---|