| .. | .. |
|---|
| 1 | 1 | // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
|---|
| 2 | 2 | /* |
|---|
| 3 | 3 | * |
|---|
| 4 | | - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. |
|---|
| 4 | + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. |
|---|
| 5 | 5 | * |
|---|
| 6 | 6 | * This program is free software and is provided to you under the terms of the |
|---|
| 7 | 7 | * GNU General Public License version 2 as published by the Free Software |
|---|
| .. | .. |
|---|
| 20 | 20 | */ |
|---|
| 21 | 21 | |
|---|
| 22 | 22 | /** |
|---|
| 23 | | - * Base kernel memory APIs |
|---|
| 23 | + * DOC: Base kernel memory APIs |
|---|
| 24 | 24 | */ |
|---|
| 25 | 25 | #include <linux/dma-buf.h> |
|---|
| 26 | 26 | #include <linux/kernel.h> |
|---|
| .. | .. |
|---|
| 44 | 44 | #include <mali_kbase_config_defaults.h> |
|---|
| 45 | 45 | #include <mali_kbase_trace_gpu_mem.h> |
|---|
| 46 | 46 | |
|---|
| 47 | +#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" |
|---|
| 48 | +#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) |
|---|
| 49 | + |
|---|
| 50 | +#if MALI_JIT_PRESSURE_LIMIT_BASE |
|---|
| 51 | + |
|---|
| 47 | 52 | /* |
|---|
| 48 | 53 | * Alignment of objects allocated by the GPU inside a just-in-time memory |
|---|
| 49 | 54 | * region whose size is given by an end address |
|---|
| .. | .. |
|---|
| 66 | 71 | */ |
|---|
| 67 | 72 | #define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) |
|---|
| 68 | 73 | |
|---|
| 74 | +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ |
|---|
| 69 | 75 | |
|---|
| 70 | 76 | /* Forward declarations */ |
|---|
| 71 | 77 | static void free_partial_locked(struct kbase_context *kctx, |
|---|
| .. | .. |
|---|
| 89 | 95 | #error "Unknown CPU VA width for this architecture" |
|---|
| 90 | 96 | #endif |
|---|
| 91 | 97 | |
|---|
| 92 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 93 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) |
|---|
| 98 | + if (kbase_ctx_compat_mode(kctx)) |
|---|
| 94 | 99 | cpu_va_bits = 32; |
|---|
| 95 | | -#endif |
|---|
| 96 | 100 | |
|---|
| 97 | 101 | return cpu_va_bits; |
|---|
| 98 | 102 | } |
|---|
| .. | .. |
|---|
| 104 | 108 | u64 gpu_pfn) |
|---|
| 105 | 109 | { |
|---|
| 106 | 110 | struct rb_root *rbtree = NULL; |
|---|
| 107 | | - struct kbase_reg_zone *exec_va_zone = |
|---|
| 108 | | - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); |
|---|
| 109 | 111 | |
|---|
| 110 | | - /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA |
|---|
| 111 | | - * zone if this has been initialized. |
|---|
| 112 | | - */ |
|---|
| 112 | + struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); |
|---|
| 113 | + |
|---|
| 114 | +#if MALI_USE_CSF |
|---|
| 115 | + struct kbase_reg_zone *fixed_va_zone = |
|---|
| 116 | + kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA); |
|---|
| 117 | + |
|---|
| 118 | + struct kbase_reg_zone *exec_fixed_va_zone = |
|---|
| 119 | + kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA); |
|---|
| 120 | + |
|---|
| 121 | + if (gpu_pfn >= fixed_va_zone->base_pfn) { |
|---|
| 122 | + rbtree = &kctx->reg_rbtree_fixed; |
|---|
| 123 | + return rbtree; |
|---|
| 124 | + } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) { |
|---|
| 125 | + rbtree = &kctx->reg_rbtree_exec_fixed; |
|---|
| 126 | + return rbtree; |
|---|
| 127 | + } |
|---|
| 128 | +#endif |
|---|
| 113 | 129 | if (gpu_pfn >= exec_va_zone->base_pfn) |
|---|
| 114 | 130 | rbtree = &kctx->reg_rbtree_exec; |
|---|
| 115 | 131 | else { |
|---|
| 116 | 132 | u64 same_va_end; |
|---|
| 117 | 133 | |
|---|
| 118 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 119 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { |
|---|
| 120 | | -#endif /* CONFIG_64BIT */ |
|---|
| 134 | + if (kbase_ctx_compat_mode(kctx)) { |
|---|
| 121 | 135 | same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; |
|---|
| 122 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 123 | 136 | } else { |
|---|
| 124 | 137 | struct kbase_reg_zone *same_va_zone = |
|---|
| 125 | 138 | kbase_ctx_reg_zone_get(kctx, |
|---|
| 126 | 139 | KBASE_REG_ZONE_SAME_VA); |
|---|
| 127 | 140 | same_va_end = kbase_reg_zone_end_pfn(same_va_zone); |
|---|
| 128 | 141 | } |
|---|
| 129 | | -#endif /* CONFIG_64BIT */ |
|---|
| 130 | 142 | |
|---|
| 131 | 143 | if (gpu_pfn >= same_va_end) |
|---|
| 132 | 144 | rbtree = &kctx->reg_rbtree_custom; |
|---|
| .. | .. |
|---|
| 350 | 362 | } |
|---|
| 351 | 363 | |
|---|
| 352 | 364 | /** |
|---|
| 353 | | - * Remove a region object from the global list. |
|---|
| 365 | + * kbase_remove_va_region - Remove a region object from the global list. |
|---|
| 366 | + * |
|---|
| 367 | + * @kbdev: The kbase device |
|---|
| 354 | 368 | * @reg: Region object to remove |
|---|
| 355 | 369 | * |
|---|
| 356 | 370 | * The region reg is removed, possibly by merging with other free and |
|---|
| .. | .. |
|---|
| 358 | 372 | * region lock held. The associated memory is not released (see |
|---|
| 359 | 373 | * kbase_free_alloced_region). Internal use only. |
|---|
| 360 | 374 | */ |
|---|
| 361 | | -int kbase_remove_va_region(struct kbase_va_region *reg) |
|---|
| 375 | +void kbase_remove_va_region(struct kbase_device *kbdev, |
|---|
| 376 | + struct kbase_va_region *reg) |
|---|
| 362 | 377 | { |
|---|
| 363 | 378 | struct rb_node *rbprev; |
|---|
| 364 | 379 | struct kbase_va_region *prev = NULL; |
|---|
| 365 | 380 | struct rb_node *rbnext; |
|---|
| 366 | 381 | struct kbase_va_region *next = NULL; |
|---|
| 367 | 382 | struct rb_root *reg_rbtree = NULL; |
|---|
| 383 | + struct kbase_va_region *orig_reg = reg; |
|---|
| 368 | 384 | |
|---|
| 369 | 385 | int merged_front = 0; |
|---|
| 370 | 386 | int merged_back = 0; |
|---|
| 371 | | - int err = 0; |
|---|
| 372 | 387 | |
|---|
| 373 | 388 | reg_rbtree = reg->rbtree; |
|---|
| 389 | + |
|---|
| 390 | + if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) |
|---|
| 391 | + return; |
|---|
| 374 | 392 | |
|---|
| 375 | 393 | /* Try to merge with the previous block first */ |
|---|
| 376 | 394 | rbprev = rb_prev(&(reg->rblink)); |
|---|
| .. | .. |
|---|
| 378 | 396 | prev = rb_entry(rbprev, struct kbase_va_region, rblink); |
|---|
| 379 | 397 | if (prev->flags & KBASE_REG_FREE) { |
|---|
| 380 | 398 | /* We're compatible with the previous VMA, merge with |
|---|
| 381 | | - * it |
|---|
| 399 | + * it, handling any gaps for robustness. |
|---|
| 382 | 400 | */ |
|---|
| 401 | + u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; |
|---|
| 402 | + |
|---|
| 383 | 403 | WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != |
|---|
| 384 | 404 | (reg->flags & KBASE_REG_ZONE_MASK)); |
|---|
| 405 | + if (!WARN_ON(reg->start_pfn < prev_end_pfn)) |
|---|
| 406 | + prev->nr_pages += reg->start_pfn - prev_end_pfn; |
|---|
| 385 | 407 | prev->nr_pages += reg->nr_pages; |
|---|
| 386 | 408 | rb_erase(&(reg->rblink), reg_rbtree); |
|---|
| 387 | 409 | reg = prev; |
|---|
| .. | .. |
|---|
| 393 | 415 | /* Note we do the lookup here as the tree may have been rebalanced. */ |
|---|
| 394 | 416 | rbnext = rb_next(&(reg->rblink)); |
|---|
| 395 | 417 | if (rbnext) { |
|---|
| 396 | | - /* We're compatible with the next VMA, merge with it */ |
|---|
| 397 | 418 | next = rb_entry(rbnext, struct kbase_va_region, rblink); |
|---|
| 398 | 419 | if (next->flags & KBASE_REG_FREE) { |
|---|
| 420 | + /* We're compatible with the next VMA, merge with it, |
|---|
| 421 | + * handling any gaps for robustness. |
|---|
| 422 | + */ |
|---|
| 423 | + u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; |
|---|
| 424 | + |
|---|
| 399 | 425 | WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != |
|---|
| 400 | 426 | (reg->flags & KBASE_REG_ZONE_MASK)); |
|---|
| 427 | + if (!WARN_ON(next->start_pfn < reg_end_pfn)) |
|---|
| 428 | + next->nr_pages += next->start_pfn - reg_end_pfn; |
|---|
| 401 | 429 | next->start_pfn = reg->start_pfn; |
|---|
| 402 | 430 | next->nr_pages += reg->nr_pages; |
|---|
| 403 | 431 | rb_erase(&(reg->rblink), reg_rbtree); |
|---|
| 404 | 432 | merged_back = 1; |
|---|
| 405 | | - if (merged_front) { |
|---|
| 406 | | - /* We already merged with prev, free it */ |
|---|
| 407 | | - kfree(reg); |
|---|
| 408 | | - } |
|---|
| 409 | 433 | } |
|---|
| 410 | 434 | } |
|---|
| 411 | 435 | |
|---|
| 412 | | - /* If we failed to merge then we need to add a new block */ |
|---|
| 413 | | - if (!(merged_front || merged_back)) { |
|---|
| 436 | + if (merged_front && merged_back) { |
|---|
| 437 | + /* We already merged with prev, free it */ |
|---|
| 438 | + kfree(reg); |
|---|
| 439 | + } else if (!(merged_front || merged_back)) { |
|---|
| 440 | + /* If we failed to merge then we need to add a new block */ |
|---|
| 441 | + |
|---|
| 414 | 442 | /* |
|---|
| 415 | | - * We didn't merge anything. Add a new free |
|---|
| 416 | | - * placeholder and remove the original one. |
|---|
| 443 | + * We didn't merge anything. Try to add a new free |
|---|
| 444 | + * placeholder, and in any case, remove the original one. |
|---|
| 417 | 445 | */ |
|---|
| 418 | 446 | struct kbase_va_region *free_reg; |
|---|
| 419 | 447 | |
|---|
| 420 | | - free_reg = kbase_alloc_free_region(reg_rbtree, |
|---|
| 421 | | - reg->start_pfn, reg->nr_pages, |
|---|
| 422 | | - reg->flags & KBASE_REG_ZONE_MASK); |
|---|
| 448 | + free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, |
|---|
| 449 | + reg->flags & KBASE_REG_ZONE_MASK); |
|---|
| 423 | 450 | if (!free_reg) { |
|---|
| 424 | | - err = -ENOMEM; |
|---|
| 451 | + /* In case of failure, we cannot allocate a replacement |
|---|
| 452 | + * free region, so we will be left with a 'gap' in the |
|---|
| 453 | + * region tracker's address range (though, the rbtree |
|---|
| 454 | + * will itself still be correct after erasing |
|---|
| 455 | + * 'reg'). |
|---|
| 456 | + * |
|---|
| 457 | + * The gap will be rectified when an adjacent region is |
|---|
| 458 | + * removed by one of the above merging paths. Other |
|---|
| 459 | + * paths will gracefully fail to allocate if they try |
|---|
| 460 | + * to allocate in the gap. |
|---|
| 461 | + * |
|---|
| 462 | + * There is nothing that the caller can do, since free |
|---|
| 463 | + * paths must not fail. The existing 'reg' cannot be |
|---|
| 464 | + * repurposed as the free region as callers must have |
|---|
| 465 | + * freedom of use with it by virtue of it being owned |
|---|
| 466 | + * by them, not the region tracker insert/remove code. |
|---|
| 467 | + */ |
|---|
| 468 | + dev_warn( |
|---|
| 469 | + kbdev->dev, |
|---|
| 470 | + "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx", |
|---|
| 471 | + (unsigned long long)reg->start_pfn << PAGE_SHIFT, |
|---|
| 472 | + (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT); |
|---|
| 473 | + rb_erase(&(reg->rblink), reg_rbtree); |
|---|
| 474 | + |
|---|
| 425 | 475 | goto out; |
|---|
| 426 | 476 | } |
|---|
| 427 | 477 | rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); |
|---|
| 428 | 478 | } |
|---|
| 429 | 479 | |
|---|
| 430 | | - out: |
|---|
| 431 | | - return err; |
|---|
| 480 | + /* This operation is always safe because the function never frees |
|---|
| 481 | + * the region. If the region has been merged to both front and back, |
|---|
| 482 | + * then it's the previous region that is supposed to be freed. |
|---|
| 483 | + */ |
|---|
| 484 | + orig_reg->start_pfn = 0; |
|---|
| 485 | + |
|---|
| 486 | +out: |
|---|
| 487 | + return; |
|---|
| 432 | 488 | } |
|---|
| 433 | 489 | |
|---|
| 434 | 490 | KBASE_EXPORT_TEST_API(kbase_remove_va_region); |
|---|
| .. | .. |
|---|
| 437 | 493 | * kbase_insert_va_region_nolock - Insert a VA region to the list, |
|---|
| 438 | 494 | * replacing the existing one. |
|---|
| 439 | 495 | * |
|---|
| 496 | + * @kbdev: The kbase device |
|---|
| 440 | 497 | * @new_reg: The new region to insert |
|---|
| 441 | 498 | * @at_reg: The region to replace |
|---|
| 442 | 499 | * @start_pfn: The Page Frame Number to insert at |
|---|
| 443 | 500 | * @nr_pages: The number of pages of the region |
|---|
| 501 | + * |
|---|
| 502 | + * Return: 0 on success, error code otherwise. |
|---|
| 444 | 503 | */ |
|---|
| 445 | | -static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, |
|---|
| 446 | | - struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) |
|---|
| 504 | +static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, |
|---|
| 505 | + struct kbase_va_region *new_reg, |
|---|
| 506 | + struct kbase_va_region *at_reg, u64 start_pfn, |
|---|
| 507 | + size_t nr_pages) |
|---|
| 447 | 508 | { |
|---|
| 448 | 509 | struct rb_root *reg_rbtree = NULL; |
|---|
| 449 | 510 | int err = 0; |
|---|
| .. | .. |
|---|
| 456 | 517 | KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); |
|---|
| 457 | 518 | /* at least nr_pages from start_pfn should be contained within at_reg */ |
|---|
| 458 | 519 | KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); |
|---|
| 520 | + /* having at_reg means the rb_tree should not be empty */ |
|---|
| 521 | + if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree))) |
|---|
| 522 | + return -ENOMEM; |
|---|
| 459 | 523 | |
|---|
| 460 | 524 | new_reg->start_pfn = start_pfn; |
|---|
| 461 | 525 | new_reg->nr_pages = nr_pages; |
|---|
| .. | .. |
|---|
| 484 | 548 | else { |
|---|
| 485 | 549 | struct kbase_va_region *new_front_reg; |
|---|
| 486 | 550 | |
|---|
| 487 | | - new_front_reg = kbase_alloc_free_region(reg_rbtree, |
|---|
| 488 | | - at_reg->start_pfn, |
|---|
| 489 | | - start_pfn - at_reg->start_pfn, |
|---|
| 490 | | - at_reg->flags & KBASE_REG_ZONE_MASK); |
|---|
| 551 | + new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, |
|---|
| 552 | + start_pfn - at_reg->start_pfn, |
|---|
| 553 | + at_reg->flags & KBASE_REG_ZONE_MASK); |
|---|
| 491 | 554 | |
|---|
| 492 | 555 | if (new_front_reg) { |
|---|
| 493 | 556 | at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; |
|---|
| .. | .. |
|---|
| 511 | 574 | * @addr: the address to insert the region at |
|---|
| 512 | 575 | * @nr_pages: the number of pages in the region |
|---|
| 513 | 576 | * @align: the minimum alignment in pages |
|---|
| 577 | + * |
|---|
| 578 | + * Return: 0 on success, error code otherwise. |
|---|
| 514 | 579 | */ |
|---|
| 515 | 580 | int kbase_add_va_region(struct kbase_context *kctx, |
|---|
| 516 | 581 | struct kbase_va_region *reg, u64 addr, |
|---|
| .. | .. |
|---|
| 527 | 592 | |
|---|
| 528 | 593 | lockdep_assert_held(&kctx->reg_lock); |
|---|
| 529 | 594 | |
|---|
| 530 | | - /* The executable allocation from the SAME_VA zone would already have an |
|---|
| 595 | + /* The executable allocation from the SAME_VA zone should already have an |
|---|
| 531 | 596 | * appropriately aligned GPU VA chosen for it. |
|---|
| 532 | | - * Also the executable allocation from EXEC_VA zone doesn't need the |
|---|
| 533 | | - * special alignment. |
|---|
| 597 | + * Also, executable allocations from EXEC_VA don't need the special |
|---|
| 598 | + * alignment. |
|---|
| 534 | 599 | */ |
|---|
| 600 | +#if MALI_USE_CSF |
|---|
| 601 | + /* The same is also true for the EXEC_FIXED_VA zone. |
|---|
| 602 | + */ |
|---|
| 603 | +#endif |
|---|
| 535 | 604 | if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && |
|---|
| 605 | +#if MALI_USE_CSF |
|---|
| 606 | + ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) && |
|---|
| 607 | +#endif |
|---|
| 536 | 608 | ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { |
|---|
| 537 | 609 | if (cpu_va_bits > gpu_pc_bits) { |
|---|
| 538 | 610 | align = max(align, (size_t)((1ULL << gpu_pc_bits) |
|---|
| .. | .. |
|---|
| 564 | 636 | /** |
|---|
| 565 | 637 | * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree |
|---|
| 566 | 638 | * |
|---|
| 567 | | - * Insert a region into the rbtree that was specified when the region was |
|---|
| 568 | | - * created. If addr is 0 a free area in the rbtree is used, otherwise the |
|---|
| 569 | | - * specified address is used. |
|---|
| 570 | | - * |
|---|
| 571 | 639 | * @kbdev: The kbase device |
|---|
| 572 | 640 | * @reg: The region to add |
|---|
| 573 | 641 | * @addr: The address to add the region at, or 0 to map at any available address |
|---|
| 574 | 642 | * @nr_pages: The size of the region in pages |
|---|
| 575 | 643 | * @align: The minimum alignment in pages |
|---|
| 644 | + * |
|---|
| 645 | + * Insert a region into the rbtree that was specified when the region was |
|---|
| 646 | + * created. If addr is 0 a free area in the rbtree is used, otherwise the |
|---|
| 647 | + * specified address is used. |
|---|
| 648 | + * |
|---|
| 649 | + * Return: 0 on success, error code otherwise. |
|---|
| 576 | 650 | */ |
|---|
| 577 | 651 | int kbase_add_va_region_rbtree(struct kbase_device *kbdev, |
|---|
| 578 | 652 | struct kbase_va_region *reg, |
|---|
| .. | .. |
|---|
| 613 | 687 | goto exit; |
|---|
| 614 | 688 | } |
|---|
| 615 | 689 | |
|---|
| 616 | | - err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, |
|---|
| 617 | | - nr_pages); |
|---|
| 690 | + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages); |
|---|
| 618 | 691 | if (err) { |
|---|
| 619 | 692 | dev_warn(dev, "Failed to insert va region"); |
|---|
| 620 | 693 | err = -ENOMEM; |
|---|
| .. | .. |
|---|
| 639 | 712 | nr_pages, align_offset, align_mask, |
|---|
| 640 | 713 | &start_pfn); |
|---|
| 641 | 714 | if (tmp) { |
|---|
| 642 | | - err = kbase_insert_va_region_nolock(reg, tmp, |
|---|
| 643 | | - start_pfn, nr_pages); |
|---|
| 715 | + err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages); |
|---|
| 644 | 716 | if (unlikely(err)) { |
|---|
| 645 | 717 | dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", |
|---|
| 646 | 718 | start_pfn, nr_pages); |
|---|
| .. | .. |
|---|
| 659 | 731 | /* |
|---|
| 660 | 732 | * @brief Initialize the internal region tracker data structure. |
|---|
| 661 | 733 | */ |
|---|
| 734 | +#if MALI_USE_CSF |
|---|
| 735 | +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, |
|---|
| 736 | + struct kbase_va_region *same_va_reg, |
|---|
| 737 | + struct kbase_va_region *custom_va_reg, |
|---|
| 738 | + struct kbase_va_region *exec_va_reg, |
|---|
| 739 | + struct kbase_va_region *exec_fixed_va_reg, |
|---|
| 740 | + struct kbase_va_region *fixed_va_reg) |
|---|
| 741 | +{ |
|---|
| 742 | + u64 last_zone_end_pfn; |
|---|
| 743 | + |
|---|
| 744 | + kctx->reg_rbtree_same = RB_ROOT; |
|---|
| 745 | + kbase_region_tracker_insert(same_va_reg); |
|---|
| 746 | + |
|---|
| 747 | + last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages; |
|---|
| 748 | + |
|---|
| 749 | + /* Although custom_va_reg doesn't always exist, initialize |
|---|
| 750 | + * unconditionally because of the mem_view debugfs |
|---|
| 751 | + * implementation which relies on it being empty. |
|---|
| 752 | + */ |
|---|
| 753 | + kctx->reg_rbtree_custom = RB_ROOT; |
|---|
| 754 | + kctx->reg_rbtree_exec = RB_ROOT; |
|---|
| 755 | + |
|---|
| 756 | + if (custom_va_reg) { |
|---|
| 757 | + WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn); |
|---|
| 758 | + kbase_region_tracker_insert(custom_va_reg); |
|---|
| 759 | + last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages; |
|---|
| 760 | + } |
|---|
| 761 | + |
|---|
| 762 | + /* Initialize exec, fixed and exec_fixed. These are always |
|---|
| 763 | + * initialized at this stage, if they will exist at all. |
|---|
| 764 | + */ |
|---|
| 765 | + kctx->reg_rbtree_fixed = RB_ROOT; |
|---|
| 766 | + kctx->reg_rbtree_exec_fixed = RB_ROOT; |
|---|
| 767 | + |
|---|
| 768 | + if (exec_va_reg) { |
|---|
| 769 | + WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn); |
|---|
| 770 | + kbase_region_tracker_insert(exec_va_reg); |
|---|
| 771 | + last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages; |
|---|
| 772 | + } |
|---|
| 773 | + |
|---|
| 774 | + if (exec_fixed_va_reg) { |
|---|
| 775 | + WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn); |
|---|
| 776 | + kbase_region_tracker_insert(exec_fixed_va_reg); |
|---|
| 777 | + last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages; |
|---|
| 778 | + } |
|---|
| 779 | + |
|---|
| 780 | + if (fixed_va_reg) { |
|---|
| 781 | + WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn); |
|---|
| 782 | + kbase_region_tracker_insert(fixed_va_reg); |
|---|
| 783 | + last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages; |
|---|
| 784 | + } |
|---|
| 785 | +} |
|---|
| 786 | +#else |
|---|
| 662 | 787 | static void kbase_region_tracker_ds_init(struct kbase_context *kctx, |
|---|
| 663 | 788 | struct kbase_va_region *same_va_reg, |
|---|
| 664 | 789 | struct kbase_va_region *custom_va_reg) |
|---|
| .. | .. |
|---|
| 679 | 804 | if (custom_va_reg) |
|---|
| 680 | 805 | kbase_region_tracker_insert(custom_va_reg); |
|---|
| 681 | 806 | } |
|---|
| 807 | +#endif /* MALI_USE_CSF */ |
|---|
| 808 | + |
|---|
| 809 | +static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) |
|---|
| 810 | +{ |
|---|
| 811 | + struct kbase_context *kctx = NULL; |
|---|
| 812 | + struct rb_root *rbtree = reg->rbtree; |
|---|
| 813 | + |
|---|
| 814 | + switch (reg->flags & KBASE_REG_ZONE_MASK) { |
|---|
| 815 | + case KBASE_REG_ZONE_CUSTOM_VA: |
|---|
| 816 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); |
|---|
| 817 | + break; |
|---|
| 818 | + case KBASE_REG_ZONE_SAME_VA: |
|---|
| 819 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); |
|---|
| 820 | + break; |
|---|
| 821 | + case KBASE_REG_ZONE_EXEC_VA: |
|---|
| 822 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); |
|---|
| 823 | + break; |
|---|
| 824 | +#if MALI_USE_CSF |
|---|
| 825 | + case KBASE_REG_ZONE_EXEC_FIXED_VA: |
|---|
| 826 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); |
|---|
| 827 | + break; |
|---|
| 828 | + case KBASE_REG_ZONE_FIXED_VA: |
|---|
| 829 | + kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); |
|---|
| 830 | + break; |
|---|
| 831 | + case KBASE_REG_ZONE_MCU_SHARED: |
|---|
| 832 | + /* This is only expected to be called on driver unload. */ |
|---|
| 833 | + break; |
|---|
| 834 | +#endif |
|---|
| 835 | + default: |
|---|
| 836 | + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); |
|---|
| 837 | + break; |
|---|
| 838 | + } |
|---|
| 839 | + |
|---|
| 840 | + return kctx; |
|---|
| 841 | +} |
|---|
| 682 | 842 | |
|---|
| 683 | 843 | static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) |
|---|
| 684 | 844 | { |
|---|
| .. | .. |
|---|
| 690 | 850 | if (rbnode) { |
|---|
| 691 | 851 | rb_erase(rbnode, rbtree); |
|---|
| 692 | 852 | reg = rb_entry(rbnode, struct kbase_va_region, rblink); |
|---|
| 693 | | - WARN_ON(reg->va_refcnt != 1); |
|---|
| 853 | + WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); |
|---|
| 854 | + if (kbase_page_migration_enabled) |
|---|
| 855 | + kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); |
|---|
| 694 | 856 | /* Reset the start_pfn - as the rbtree is being |
|---|
| 695 | 857 | * destroyed and we've already erased this region, there |
|---|
| 696 | 858 | * is no further need to attempt to remove it. |
|---|
| .. | .. |
|---|
| 707 | 869 | |
|---|
| 708 | 870 | void kbase_region_tracker_term(struct kbase_context *kctx) |
|---|
| 709 | 871 | { |
|---|
| 872 | + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, |
|---|
| 873 | + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", |
|---|
| 874 | + kctx->tgid, kctx->id); |
|---|
| 875 | + |
|---|
| 710 | 876 | kbase_gpu_vm_lock(kctx); |
|---|
| 711 | 877 | kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); |
|---|
| 712 | 878 | kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); |
|---|
| 713 | 879 | kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); |
|---|
| 714 | 880 | #if MALI_USE_CSF |
|---|
| 715 | 881 | WARN_ON(!list_empty(&kctx->csf.event_pages_head)); |
|---|
| 882 | + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed); |
|---|
| 883 | + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed); |
|---|
| 884 | + |
|---|
| 716 | 885 | #endif |
|---|
| 717 | 886 | kbase_gpu_vm_unlock(kctx); |
|---|
| 718 | 887 | } |
|---|
| .. | .. |
|---|
| 724 | 893 | |
|---|
| 725 | 894 | static size_t kbase_get_same_va_bits(struct kbase_context *kctx) |
|---|
| 726 | 895 | { |
|---|
| 727 | | - return min(kbase_get_num_cpu_va_bits(kctx), |
|---|
| 728 | | - (size_t) kctx->kbdev->gpu_props.mmu.va_bits); |
|---|
| 896 | + return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), |
|---|
| 897 | + kctx->kbdev->gpu_props.mmu.va_bits); |
|---|
| 729 | 898 | } |
|---|
| 730 | 899 | |
|---|
| 731 | 900 | int kbase_region_tracker_init(struct kbase_context *kctx) |
|---|
| .. | .. |
|---|
| 734 | 903 | struct kbase_va_region *custom_va_reg = NULL; |
|---|
| 735 | 904 | size_t same_va_bits = kbase_get_same_va_bits(kctx); |
|---|
| 736 | 905 | u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; |
|---|
| 737 | | - u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; |
|---|
| 906 | + u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; |
|---|
| 907 | + u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; |
|---|
| 738 | 908 | u64 same_va_pages; |
|---|
| 739 | 909 | u64 same_va_base = 1u; |
|---|
| 740 | 910 | int err; |
|---|
| 911 | +#if MALI_USE_CSF |
|---|
| 912 | + struct kbase_va_region *exec_va_reg; |
|---|
| 913 | + struct kbase_va_region *exec_fixed_va_reg; |
|---|
| 914 | + struct kbase_va_region *fixed_va_reg; |
|---|
| 915 | + |
|---|
| 916 | + u64 exec_va_base; |
|---|
| 917 | + u64 fixed_va_end; |
|---|
| 918 | + u64 exec_fixed_va_base; |
|---|
| 919 | + u64 fixed_va_base; |
|---|
| 920 | + u64 fixed_va_pages; |
|---|
| 921 | +#endif |
|---|
| 741 | 922 | |
|---|
| 742 | 923 | /* Take the lock as kbase_free_alloced_region requires it */ |
|---|
| 743 | 924 | kbase_gpu_vm_lock(kctx); |
|---|
| 744 | 925 | |
|---|
| 745 | 926 | same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; |
|---|
| 927 | + |
|---|
| 928 | +#if MALI_USE_CSF |
|---|
| 929 | + if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { |
|---|
| 930 | + /* Depending on how the kernel is configured, it's possible (eg on aarch64) for |
|---|
| 931 | + * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone |
|---|
| 932 | + * doesn't cross into the exec_va zone. |
|---|
| 933 | + */ |
|---|
| 934 | + same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base; |
|---|
| 935 | + } |
|---|
| 936 | +#endif |
|---|
| 937 | + |
|---|
| 746 | 938 | /* all have SAME_VA */ |
|---|
| 747 | | - same_va_reg = |
|---|
| 748 | | - kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base, |
|---|
| 749 | | - same_va_pages, KBASE_REG_ZONE_SAME_VA); |
|---|
| 939 | + same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, |
|---|
| 940 | + same_va_pages, KBASE_REG_ZONE_SAME_VA); |
|---|
| 750 | 941 | |
|---|
| 751 | 942 | if (!same_va_reg) { |
|---|
| 752 | 943 | err = -ENOMEM; |
|---|
| .. | .. |
|---|
| 755 | 946 | kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, |
|---|
| 756 | 947 | same_va_pages); |
|---|
| 757 | 948 | |
|---|
| 758 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 759 | | - /* 32-bit clients have custom VA zones */ |
|---|
| 760 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { |
|---|
| 761 | | -#endif |
|---|
| 949 | + if (kbase_ctx_compat_mode(kctx)) { |
|---|
| 762 | 950 | if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { |
|---|
| 763 | 951 | err = -EINVAL; |
|---|
| 764 | 952 | goto fail_free_same_va; |
|---|
| .. | .. |
|---|
| 770 | 958 | if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) |
|---|
| 771 | 959 | custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; |
|---|
| 772 | 960 | |
|---|
| 773 | | - custom_va_reg = kbase_alloc_free_region( |
|---|
| 774 | | - &kctx->reg_rbtree_custom, |
|---|
| 775 | | - KBASE_REG_ZONE_CUSTOM_VA_BASE, |
|---|
| 776 | | - custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); |
|---|
| 961 | + custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, |
|---|
| 962 | + KBASE_REG_ZONE_CUSTOM_VA_BASE, |
|---|
| 963 | + custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); |
|---|
| 777 | 964 | |
|---|
| 778 | 965 | if (!custom_va_reg) { |
|---|
| 779 | 966 | err = -ENOMEM; |
|---|
| .. | .. |
|---|
| 782 | 969 | kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, |
|---|
| 783 | 970 | KBASE_REG_ZONE_CUSTOM_VA_BASE, |
|---|
| 784 | 971 | custom_va_size); |
|---|
| 785 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 786 | 972 | } else { |
|---|
| 787 | 973 | custom_va_size = 0; |
|---|
| 788 | 974 | } |
|---|
| 789 | | -#endif |
|---|
| 975 | + |
|---|
| 976 | +#if MALI_USE_CSF |
|---|
| 977 | + /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ |
|---|
| 978 | + exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64; |
|---|
| 979 | + |
|---|
| 980 | + /* Similarly the end of the FIXED_VA zone also depends on whether the client |
|---|
| 981 | + * is 32 or 64-bits. |
|---|
| 982 | + */ |
|---|
| 983 | + fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; |
|---|
| 984 | + |
|---|
| 985 | + if (kbase_ctx_compat_mode(kctx)) { |
|---|
| 986 | + exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; |
|---|
| 987 | + fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; |
|---|
| 988 | + } |
|---|
| 989 | + |
|---|
| 990 | + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, |
|---|
| 991 | + KBASE_REG_ZONE_EXEC_VA_SIZE); |
|---|
| 992 | + |
|---|
| 993 | + exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, |
|---|
| 994 | + KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); |
|---|
| 995 | + |
|---|
| 996 | + if (!exec_va_reg) { |
|---|
| 997 | + err = -ENOMEM; |
|---|
| 998 | + goto fail_free_custom_va; |
|---|
| 999 | + } |
|---|
| 1000 | + |
|---|
| 1001 | + exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE; |
|---|
| 1002 | + |
|---|
| 1003 | + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base, |
|---|
| 1004 | + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); |
|---|
| 1005 | + |
|---|
| 1006 | + exec_fixed_va_reg = |
|---|
| 1007 | + kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, |
|---|
| 1008 | + exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, |
|---|
| 1009 | + KBASE_REG_ZONE_EXEC_FIXED_VA); |
|---|
| 1010 | + |
|---|
| 1011 | + if (!exec_fixed_va_reg) { |
|---|
| 1012 | + err = -ENOMEM; |
|---|
| 1013 | + goto fail_free_exec_va; |
|---|
| 1014 | + } |
|---|
| 1015 | + |
|---|
| 1016 | + fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; |
|---|
| 1017 | + fixed_va_pages = fixed_va_end - fixed_va_base; |
|---|
| 1018 | + |
|---|
| 1019 | + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); |
|---|
| 1020 | + |
|---|
| 1021 | + fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, |
|---|
| 1022 | + fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); |
|---|
| 1023 | + |
|---|
| 1024 | + kctx->gpu_va_end = fixed_va_end; |
|---|
| 1025 | + |
|---|
| 1026 | + if (!fixed_va_reg) { |
|---|
| 1027 | + err = -ENOMEM; |
|---|
| 1028 | + goto fail_free_exec_fixed_va; |
|---|
| 1029 | + } |
|---|
| 1030 | + |
|---|
| 1031 | + kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg, |
|---|
| 1032 | + exec_fixed_va_reg, fixed_va_reg); |
|---|
| 1033 | + |
|---|
| 1034 | + INIT_LIST_HEAD(&kctx->csf.event_pages_head); |
|---|
| 1035 | +#else |
|---|
| 790 | 1036 | /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is |
|---|
| 791 | 1037 | * initially U64_MAX |
|---|
| 792 | 1038 | */ |
|---|
| .. | .. |
|---|
| 794 | 1040 | /* Other zones are 0: kbase_create_context() uses vzalloc */ |
|---|
| 795 | 1041 | |
|---|
| 796 | 1042 | kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); |
|---|
| 797 | | - |
|---|
| 798 | 1043 | kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; |
|---|
| 799 | | - kctx->jit_va = false; |
|---|
| 800 | | - |
|---|
| 801 | | -#if MALI_USE_CSF |
|---|
| 802 | | - INIT_LIST_HEAD(&kctx->csf.event_pages_head); |
|---|
| 803 | 1044 | #endif |
|---|
| 1045 | + kctx->jit_va = false; |
|---|
| 804 | 1046 | |
|---|
| 805 | 1047 | kbase_gpu_vm_unlock(kctx); |
|---|
| 806 | 1048 | return 0; |
|---|
| 1049 | + |
|---|
| 1050 | +#if MALI_USE_CSF |
|---|
| 1051 | +fail_free_exec_fixed_va: |
|---|
| 1052 | + kbase_free_alloced_region(exec_fixed_va_reg); |
|---|
| 1053 | +fail_free_exec_va: |
|---|
| 1054 | + kbase_free_alloced_region(exec_va_reg); |
|---|
| 1055 | +fail_free_custom_va: |
|---|
| 1056 | + if (custom_va_reg) |
|---|
| 1057 | + kbase_free_alloced_region(custom_va_reg); |
|---|
| 1058 | +#endif |
|---|
| 807 | 1059 | |
|---|
| 808 | 1060 | fail_free_same_va: |
|---|
| 809 | 1061 | kbase_free_alloced_region(same_va_reg); |
|---|
| .. | .. |
|---|
| 834 | 1086 | } |
|---|
| 835 | 1087 | |
|---|
| 836 | 1088 | /** |
|---|
| 837 | | - * Determine if any allocations have been made on a context's region tracker |
|---|
| 1089 | + * kbase_region_tracker_has_allocs - Determine if any allocations have been made |
|---|
| 1090 | + * on a context's region tracker |
|---|
| 1091 | + * |
|---|
| 838 | 1092 | * @kctx: KBase context |
|---|
| 839 | 1093 | * |
|---|
| 840 | 1094 | * Check the context to determine if any allocations have been made yet from |
|---|
| .. | .. |
|---|
| 862 | 1116 | unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); |
|---|
| 863 | 1117 | unsigned long reg_zone; |
|---|
| 864 | 1118 | |
|---|
| 1119 | + if (!kbase_is_ctx_reg_zone(zone_bits)) |
|---|
| 1120 | + continue; |
|---|
| 865 | 1121 | zone = kbase_ctx_reg_zone_get(kctx, zone_bits); |
|---|
| 866 | 1122 | zone_base_addr = zone->base_pfn << PAGE_SHIFT; |
|---|
| 867 | 1123 | |
|---|
| .. | .. |
|---|
| 901 | 1157 | return false; |
|---|
| 902 | 1158 | } |
|---|
| 903 | 1159 | |
|---|
| 904 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 905 | 1160 | static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, |
|---|
| 906 | 1161 | u64 jit_va_pages) |
|---|
| 907 | 1162 | { |
|---|
| .. | .. |
|---|
| 950 | 1205 | * Create a custom VA zone at the end of the VA for allocations which |
|---|
| 951 | 1206 | * JIT can use so it doesn't have to allocate VA from the kernel. |
|---|
| 952 | 1207 | */ |
|---|
| 953 | | - custom_va_reg = |
|---|
| 954 | | - kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start, |
|---|
| 955 | | - jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); |
|---|
| 1208 | + custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, |
|---|
| 1209 | + jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); |
|---|
| 956 | 1210 | |
|---|
| 957 | 1211 | /* |
|---|
| 958 | 1212 | * The context will be destroyed if we fail here so no point |
|---|
| .. | .. |
|---|
| 969 | 1223 | kbase_region_tracker_insert(custom_va_reg); |
|---|
| 970 | 1224 | return 0; |
|---|
| 971 | 1225 | } |
|---|
| 972 | | -#endif |
|---|
| 973 | 1226 | |
|---|
| 974 | 1227 | int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, |
|---|
| 975 | 1228 | int max_allocations, int trim_level, int group_id, |
|---|
| .. | .. |
|---|
| 1010 | 1263 | goto exit_unlock; |
|---|
| 1011 | 1264 | } |
|---|
| 1012 | 1265 | |
|---|
| 1013 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 1014 | | - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) |
|---|
| 1266 | + if (!kbase_ctx_compat_mode(kctx)) |
|---|
| 1015 | 1267 | err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); |
|---|
| 1016 | | -#endif |
|---|
| 1017 | 1268 | /* |
|---|
| 1018 | 1269 | * Nothing to do for 32-bit clients, JIT uses the existing |
|---|
| 1019 | 1270 | * custom VA zone. |
|---|
| .. | .. |
|---|
| 1039 | 1290 | |
|---|
| 1040 | 1291 | int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) |
|---|
| 1041 | 1292 | { |
|---|
| 1293 | +#if !MALI_USE_CSF |
|---|
| 1042 | 1294 | struct kbase_va_region *exec_va_reg; |
|---|
| 1043 | 1295 | struct kbase_reg_zone *exec_va_zone; |
|---|
| 1044 | 1296 | struct kbase_reg_zone *target_zone; |
|---|
| .. | .. |
|---|
| 1047 | 1299 | unsigned long target_zone_bits; |
|---|
| 1048 | 1300 | u64 exec_va_start; |
|---|
| 1049 | 1301 | int err; |
|---|
| 1302 | +#endif |
|---|
| 1050 | 1303 | |
|---|
| 1051 | 1304 | /* The EXEC_VA zone shall be created by making space either: |
|---|
| 1052 | 1305 | * - for 64-bit clients, at the end of the process's address space |
|---|
| .. | .. |
|---|
| 1060 | 1313 | if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) |
|---|
| 1061 | 1314 | return -EINVAL; |
|---|
| 1062 | 1315 | |
|---|
| 1316 | +#if MALI_USE_CSF |
|---|
| 1317 | + /* For CSF GPUs we now setup the EXEC_VA zone during initialization, |
|---|
| 1318 | + * so this request is a null-op. |
|---|
| 1319 | + */ |
|---|
| 1320 | + return 0; |
|---|
| 1321 | +#else |
|---|
| 1063 | 1322 | kbase_gpu_vm_lock(kctx); |
|---|
| 1064 | 1323 | |
|---|
| 1065 | 1324 | /* Verify that we've not already created a EXEC_VA zone, and that the |
|---|
| .. | .. |
|---|
| 1081 | 1340 | goto exit_unlock; |
|---|
| 1082 | 1341 | } |
|---|
| 1083 | 1342 | |
|---|
| 1084 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 1085 | | - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { |
|---|
| 1086 | | -#endif |
|---|
| 1343 | + if (kbase_ctx_compat_mode(kctx)) { |
|---|
| 1087 | 1344 | /* 32-bit client: take from CUSTOM_VA zone */ |
|---|
| 1088 | 1345 | target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; |
|---|
| 1089 | | -#if IS_ENABLED(CONFIG_64BIT) |
|---|
| 1090 | 1346 | } else { |
|---|
| 1091 | 1347 | /* 64-bit client: take from SAME_VA zone */ |
|---|
| 1092 | 1348 | target_zone_bits = KBASE_REG_ZONE_SAME_VA; |
|---|
| 1093 | 1349 | } |
|---|
| 1094 | | -#endif |
|---|
| 1350 | + |
|---|
| 1095 | 1351 | target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); |
|---|
| 1096 | 1352 | target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; |
|---|
| 1097 | 1353 | |
|---|
| .. | .. |
|---|
| 1119 | 1375 | /* Taken from the end of the target zone */ |
|---|
| 1120 | 1376 | exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; |
|---|
| 1121 | 1377 | |
|---|
| 1122 | | - exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, |
|---|
| 1123 | | - exec_va_start, |
|---|
| 1124 | | - exec_va_pages, |
|---|
| 1125 | | - KBASE_REG_ZONE_EXEC_VA); |
|---|
| 1378 | + exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, |
|---|
| 1379 | + exec_va_pages, KBASE_REG_ZONE_EXEC_VA); |
|---|
| 1126 | 1380 | if (!exec_va_reg) { |
|---|
| 1127 | 1381 | err = -ENOMEM; |
|---|
| 1128 | 1382 | goto exit_unlock; |
|---|
| .. | .. |
|---|
| 1145 | 1399 | exit_unlock: |
|---|
| 1146 | 1400 | kbase_gpu_vm_unlock(kctx); |
|---|
| 1147 | 1401 | return err; |
|---|
| 1402 | +#endif /* MALI_USE_CSF */ |
|---|
| 1148 | 1403 | } |
|---|
| 1149 | 1404 | |
|---|
| 1150 | 1405 | #if MALI_USE_CSF |
|---|
| .. | .. |
|---|
| 1164 | 1419 | |
|---|
| 1165 | 1420 | kbdev->csf.shared_reg_rbtree = RB_ROOT; |
|---|
| 1166 | 1421 | |
|---|
| 1167 | | - shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, |
|---|
| 1168 | | - shared_reg_start_pfn, |
|---|
| 1169 | | - shared_reg_size, |
|---|
| 1170 | | - KBASE_REG_ZONE_MCU_SHARED); |
|---|
| 1422 | + shared_reg = |
|---|
| 1423 | + kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, |
|---|
| 1424 | + shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); |
|---|
| 1171 | 1425 | if (!shared_reg) |
|---|
| 1172 | 1426 | return -ENOMEM; |
|---|
| 1173 | 1427 | |
|---|
| .. | .. |
|---|
| 1176 | 1430 | } |
|---|
| 1177 | 1431 | #endif |
|---|
| 1178 | 1432 | |
|---|
| 1433 | +static void kbasep_mem_page_size_init(struct kbase_device *kbdev) |
|---|
| 1434 | +{ |
|---|
| 1435 | +#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) |
|---|
| 1436 | +#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) |
|---|
| 1437 | + kbdev->pagesize_2mb = true; |
|---|
| 1438 | + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) { |
|---|
| 1439 | + dev_warn( |
|---|
| 1440 | + kbdev->dev, |
|---|
| 1441 | + "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n"); |
|---|
| 1442 | + } |
|---|
| 1443 | +#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ |
|---|
| 1444 | + kbdev->pagesize_2mb = false; |
|---|
| 1445 | +#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */ |
|---|
| 1446 | +#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ |
|---|
| 1447 | + /* Set it to the default based on which GPU is present */ |
|---|
| 1448 | + kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC); |
|---|
| 1449 | +#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */ |
|---|
| 1450 | +} |
|---|
| 1451 | + |
|---|
| 1179 | 1452 | int kbase_mem_init(struct kbase_device *kbdev) |
|---|
| 1180 | 1453 | { |
|---|
| 1181 | 1454 | int err = 0; |
|---|
| 1182 | 1455 | struct kbasep_mem_device *memdev; |
|---|
| 1456 | + char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE]; |
|---|
| 1183 | 1457 | #if IS_ENABLED(CONFIG_OF) |
|---|
| 1184 | 1458 | struct device_node *mgm_node = NULL; |
|---|
| 1185 | 1459 | #endif |
|---|
| .. | .. |
|---|
| 1188 | 1462 | |
|---|
| 1189 | 1463 | memdev = &kbdev->memdev; |
|---|
| 1190 | 1464 | |
|---|
| 1465 | + kbasep_mem_page_size_init(kbdev); |
|---|
| 1466 | + |
|---|
| 1467 | + scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s", |
|---|
| 1468 | + kbdev->devname); |
|---|
| 1469 | + |
|---|
| 1470 | + /* Initialize slab cache for kbase_va_regions */ |
|---|
| 1471 | + kbdev->va_region_slab = |
|---|
| 1472 | + kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL); |
|---|
| 1473 | + if (kbdev->va_region_slab == NULL) { |
|---|
| 1474 | + dev_err(kbdev->dev, "Failed to create va_region_slab\n"); |
|---|
| 1475 | + return -ENOMEM; |
|---|
| 1476 | + } |
|---|
| 1477 | + |
|---|
| 1478 | + kbase_mem_migrate_init(kbdev); |
|---|
| 1191 | 1479 | kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, |
|---|
| 1192 | 1480 | KBASE_MEM_POOL_MAX_SIZE_KCTX); |
|---|
| 1193 | 1481 | |
|---|
| .. | .. |
|---|
| 1250 | 1538 | kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, |
|---|
| 1251 | 1539 | KBASE_MEM_POOL_MAX_SIZE_KBDEV); |
|---|
| 1252 | 1540 | |
|---|
| 1253 | | - err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, |
|---|
| 1254 | | - &mem_pool_defaults, NULL); |
|---|
| 1541 | + err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL); |
|---|
| 1255 | 1542 | } |
|---|
| 1256 | 1543 | |
|---|
| 1257 | 1544 | return err; |
|---|
| .. | .. |
|---|
| 1277 | 1564 | |
|---|
| 1278 | 1565 | kbase_mem_pool_group_term(&kbdev->mem_pools); |
|---|
| 1279 | 1566 | |
|---|
| 1567 | + kbase_mem_migrate_term(kbdev); |
|---|
| 1568 | + |
|---|
| 1569 | + kmem_cache_destroy(kbdev->va_region_slab); |
|---|
| 1570 | + kbdev->va_region_slab = NULL; |
|---|
| 1571 | + |
|---|
| 1280 | 1572 | WARN_ON(kbdev->total_gpu_pages); |
|---|
| 1281 | 1573 | WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); |
|---|
| 1282 | 1574 | WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); |
|---|
| .. | .. |
|---|
| 1288 | 1580 | KBASE_EXPORT_TEST_API(kbase_mem_term); |
|---|
| 1289 | 1581 | |
|---|
| 1290 | 1582 | /** |
|---|
| 1291 | | - * Allocate a free region object. |
|---|
| 1583 | + * kbase_alloc_free_region - Allocate a free region object. |
|---|
| 1584 | + * |
|---|
| 1585 | + * @kbdev: kbase device |
|---|
| 1292 | 1586 | * @rbtree: Backlink to the red-black tree of memory regions. |
|---|
| 1293 | 1587 | * @start_pfn: The Page Frame Number in GPU virtual address space. |
|---|
| 1294 | 1588 | * @nr_pages: The size of the region in pages. |
|---|
| .. | .. |
|---|
| 1299 | 1593 | * |
|---|
| 1300 | 1594 | * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. |
|---|
| 1301 | 1595 | * |
|---|
| 1596 | + * Return: pointer to the allocated region object on success, NULL otherwise. |
|---|
| 1302 | 1597 | */ |
|---|
| 1303 | | -struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, |
|---|
| 1304 | | - u64 start_pfn, size_t nr_pages, int zone) |
|---|
| 1598 | +struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, |
|---|
| 1599 | + u64 start_pfn, size_t nr_pages, int zone) |
|---|
| 1305 | 1600 | { |
|---|
| 1306 | 1601 | struct kbase_va_region *new_reg; |
|---|
| 1307 | 1602 | |
|---|
| .. | .. |
|---|
| 1313 | 1608 | /* 64-bit address range is the max */ |
|---|
| 1314 | 1609 | KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); |
|---|
| 1315 | 1610 | |
|---|
| 1316 | | - new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); |
|---|
| 1611 | + new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); |
|---|
| 1317 | 1612 | |
|---|
| 1318 | 1613 | if (!new_reg) |
|---|
| 1319 | 1614 | return NULL; |
|---|
| 1320 | 1615 | |
|---|
| 1321 | | - new_reg->va_refcnt = 1; |
|---|
| 1616 | + kbase_refcount_set(&new_reg->va_refcnt, 1); |
|---|
| 1617 | + atomic_set(&new_reg->no_user_free_count, 0); |
|---|
| 1322 | 1618 | new_reg->cpu_alloc = NULL; /* no alloc bound yet */ |
|---|
| 1323 | 1619 | new_reg->gpu_alloc = NULL; /* no alloc bound yet */ |
|---|
| 1324 | 1620 | new_reg->rbtree = rbtree; |
|---|
| .. | .. |
|---|
| 1337 | 1633 | |
|---|
| 1338 | 1634 | KBASE_EXPORT_TEST_API(kbase_alloc_free_region); |
|---|
| 1339 | 1635 | |
|---|
| 1340 | | -static struct kbase_context *kbase_reg_flags_to_kctx( |
|---|
| 1341 | | - struct kbase_va_region *reg) |
|---|
| 1342 | | -{ |
|---|
| 1343 | | - struct kbase_context *kctx = NULL; |
|---|
| 1344 | | - struct rb_root *rbtree = reg->rbtree; |
|---|
| 1345 | | - |
|---|
| 1346 | | - switch (reg->flags & KBASE_REG_ZONE_MASK) { |
|---|
| 1347 | | - case KBASE_REG_ZONE_CUSTOM_VA: |
|---|
| 1348 | | - kctx = container_of(rbtree, struct kbase_context, |
|---|
| 1349 | | - reg_rbtree_custom); |
|---|
| 1350 | | - break; |
|---|
| 1351 | | - case KBASE_REG_ZONE_SAME_VA: |
|---|
| 1352 | | - kctx = container_of(rbtree, struct kbase_context, |
|---|
| 1353 | | - reg_rbtree_same); |
|---|
| 1354 | | - break; |
|---|
| 1355 | | - case KBASE_REG_ZONE_EXEC_VA: |
|---|
| 1356 | | - kctx = container_of(rbtree, struct kbase_context, |
|---|
| 1357 | | - reg_rbtree_exec); |
|---|
| 1358 | | - break; |
|---|
| 1359 | | - default: |
|---|
| 1360 | | - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); |
|---|
| 1361 | | - break; |
|---|
| 1362 | | - } |
|---|
| 1363 | | - |
|---|
| 1364 | | - return kctx; |
|---|
| 1365 | | -} |
|---|
| 1366 | | - |
|---|
| 1367 | 1636 | /** |
|---|
| 1368 | | - * Free a region object. |
|---|
| 1637 | + * kbase_free_alloced_region - Free a region object. |
|---|
| 1638 | + * |
|---|
| 1369 | 1639 | * @reg: Region |
|---|
| 1370 | 1640 | * |
|---|
| 1371 | 1641 | * The described region must be freed of any mapping. |
|---|
| .. | .. |
|---|
| 1374 | 1644 | * alloc object will be released. |
|---|
| 1375 | 1645 | * It is a bug if no alloc object exists for non-free regions. |
|---|
| 1376 | 1646 | * |
|---|
| 1647 | + * If region is KBASE_REG_ZONE_MCU_SHARED it is freed |
|---|
| 1377 | 1648 | */ |
|---|
| 1378 | 1649 | void kbase_free_alloced_region(struct kbase_va_region *reg) |
|---|
| 1379 | 1650 | { |
|---|
| .. | .. |
|---|
| 1397 | 1668 | (void *)reg); |
|---|
| 1398 | 1669 | #if MALI_USE_CSF |
|---|
| 1399 | 1670 | if (reg->flags & KBASE_REG_CSF_EVENT) |
|---|
| 1671 | + /* |
|---|
| 1672 | + * This should not be reachable if called from 'mcu_shared' functions |
|---|
| 1673 | + * such as: |
|---|
| 1674 | + * kbase_csf_firmware_mcu_shared_mapping_init |
|---|
| 1675 | + * kbase_csf_firmware_mcu_shared_mapping_term |
|---|
| 1676 | + */ |
|---|
| 1677 | + |
|---|
| 1400 | 1678 | kbase_unlink_event_mem_page(kctx, reg); |
|---|
| 1401 | 1679 | #endif |
|---|
| 1402 | 1680 | |
|---|
| .. | .. |
|---|
| 1410 | 1688 | * on the list at termination time of the region tracker. |
|---|
| 1411 | 1689 | */ |
|---|
| 1412 | 1690 | if (!list_empty(®->gpu_alloc->evict_node)) { |
|---|
| 1413 | | - mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 1414 | | - |
|---|
| 1415 | 1691 | /* |
|---|
| 1416 | 1692 | * Unlink the physical allocation before unmaking it |
|---|
| 1417 | 1693 | * evictable so that the allocation isn't grown back to |
|---|
| .. | .. |
|---|
| 1421 | 1697 | reg->cpu_alloc->reg = NULL; |
|---|
| 1422 | 1698 | if (reg->cpu_alloc != reg->gpu_alloc) |
|---|
| 1423 | 1699 | reg->gpu_alloc->reg = NULL; |
|---|
| 1700 | + |
|---|
| 1701 | + mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 1424 | 1702 | |
|---|
| 1425 | 1703 | /* |
|---|
| 1426 | 1704 | * If a region has been made evictable then we must |
|---|
| .. | .. |
|---|
| 1457 | 1735 | |
|---|
| 1458 | 1736 | KBASE_EXPORT_TEST_API(kbase_free_alloced_region); |
|---|
| 1459 | 1737 | |
|---|
| 1460 | | -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) |
|---|
| 1738 | +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, |
|---|
| 1739 | + u64 addr, size_t nr_pages, size_t align, |
|---|
| 1740 | + enum kbase_caller_mmu_sync_info mmu_sync_info) |
|---|
| 1461 | 1741 | { |
|---|
| 1462 | 1742 | int err; |
|---|
| 1463 | 1743 | size_t i = 0; |
|---|
| .. | .. |
|---|
| 1494 | 1774 | KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); |
|---|
| 1495 | 1775 | for (i = 0; i < alloc->imported.alias.nents; i++) { |
|---|
| 1496 | 1776 | if (alloc->imported.alias.aliased[i].alloc) { |
|---|
| 1497 | | - err = kbase_mmu_insert_pages(kctx->kbdev, |
|---|
| 1498 | | - &kctx->mmu, |
|---|
| 1499 | | - reg->start_pfn + (i * stride), |
|---|
| 1500 | | - alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, |
|---|
| 1501 | | - alloc->imported.alias.aliased[i].length, |
|---|
| 1502 | | - reg->flags & gwt_mask, |
|---|
| 1503 | | - kctx->as_nr, |
|---|
| 1504 | | - group_id); |
|---|
| 1777 | + err = kbase_mmu_insert_aliased_pages( |
|---|
| 1778 | + kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), |
|---|
| 1779 | + alloc->imported.alias.aliased[i].alloc->pages + |
|---|
| 1780 | + alloc->imported.alias.aliased[i].offset, |
|---|
| 1781 | + alloc->imported.alias.aliased[i].length, |
|---|
| 1782 | + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, |
|---|
| 1783 | + NULL); |
|---|
| 1505 | 1784 | if (err) |
|---|
| 1506 | | - goto bad_insert; |
|---|
| 1785 | + goto bad_aliased_insert; |
|---|
| 1507 | 1786 | |
|---|
| 1508 | 1787 | /* Note: mapping count is tracked at alias |
|---|
| 1509 | 1788 | * creation time |
|---|
| 1510 | 1789 | */ |
|---|
| 1511 | 1790 | } else { |
|---|
| 1512 | | - err = kbase_mmu_insert_single_page(kctx, |
|---|
| 1513 | | - reg->start_pfn + i * stride, |
|---|
| 1514 | | - kctx->aliasing_sink_page, |
|---|
| 1791 | + err = kbase_mmu_insert_single_aliased_page( |
|---|
| 1792 | + kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page, |
|---|
| 1515 | 1793 | alloc->imported.alias.aliased[i].length, |
|---|
| 1516 | | - (reg->flags & mask & gwt_mask) | attr, |
|---|
| 1517 | | - group_id); |
|---|
| 1794 | + (reg->flags & mask & gwt_mask) | attr, group_id, |
|---|
| 1795 | + mmu_sync_info); |
|---|
| 1518 | 1796 | |
|---|
| 1519 | 1797 | if (err) |
|---|
| 1520 | | - goto bad_insert; |
|---|
| 1798 | + goto bad_aliased_insert; |
|---|
| 1521 | 1799 | } |
|---|
| 1522 | 1800 | } |
|---|
| 1523 | 1801 | } else { |
|---|
| 1524 | | - err = kbase_mmu_insert_pages(kctx->kbdev, |
|---|
| 1525 | | - &kctx->mmu, |
|---|
| 1526 | | - reg->start_pfn, |
|---|
| 1527 | | - kbase_get_gpu_phy_pages(reg), |
|---|
| 1528 | | - kbase_reg_current_backed_size(reg), |
|---|
| 1529 | | - reg->flags & gwt_mask, |
|---|
| 1530 | | - kctx->as_nr, |
|---|
| 1531 | | - group_id); |
|---|
| 1802 | + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || |
|---|
| 1803 | + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { |
|---|
| 1804 | + |
|---|
| 1805 | + err = kbase_mmu_insert_imported_pages( |
|---|
| 1806 | + kctx->kbdev, &kctx->mmu, reg->start_pfn, |
|---|
| 1807 | + kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), |
|---|
| 1808 | + reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); |
|---|
| 1809 | + } else { |
|---|
| 1810 | + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
|---|
| 1811 | + kbase_get_gpu_phy_pages(reg), |
|---|
| 1812 | + kbase_reg_current_backed_size(reg), |
|---|
| 1813 | + reg->flags & gwt_mask, kctx->as_nr, group_id, |
|---|
| 1814 | + mmu_sync_info, reg, true); |
|---|
| 1815 | + } |
|---|
| 1816 | + |
|---|
| 1532 | 1817 | if (err) |
|---|
| 1533 | 1818 | goto bad_insert; |
|---|
| 1534 | 1819 | kbase_mem_phy_alloc_gpu_mapped(alloc); |
|---|
| .. | .. |
|---|
| 1538 | 1823 | !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && |
|---|
| 1539 | 1824 | reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && |
|---|
| 1540 | 1825 | reg->gpu_alloc->imported.umm.current_mapping_usage_count) { |
|---|
| 1541 | | - /* For padded imported dma-buf memory, map the dummy aliasing |
|---|
| 1542 | | - * page from the end of the dma-buf pages, to the end of the |
|---|
| 1543 | | - * region using a read only mapping. |
|---|
| 1826 | + /* For padded imported dma-buf or user-buf memory, map the dummy |
|---|
| 1827 | + * aliasing page from the end of the imported pages, to the end of |
|---|
| 1828 | + * the region using a read only mapping. |
|---|
| 1544 | 1829 | * |
|---|
| 1545 | 1830 | * Only map when it's imported dma-buf memory that is currently |
|---|
| 1546 | 1831 | * mapped. |
|---|
| .. | .. |
|---|
| 1548 | 1833 | * Assume reg->gpu_alloc->nents is the number of actual pages |
|---|
| 1549 | 1834 | * in the dma-buf memory. |
|---|
| 1550 | 1835 | */ |
|---|
| 1551 | | - err = kbase_mmu_insert_single_page(kctx, |
|---|
| 1552 | | - reg->start_pfn + reg->gpu_alloc->nents, |
|---|
| 1553 | | - kctx->aliasing_sink_page, |
|---|
| 1554 | | - reg->nr_pages - reg->gpu_alloc->nents, |
|---|
| 1555 | | - (reg->flags | KBASE_REG_GPU_RD) & |
|---|
| 1556 | | - ~KBASE_REG_GPU_WR, |
|---|
| 1557 | | - KBASE_MEM_GROUP_SINK); |
|---|
| 1836 | + err = kbase_mmu_insert_single_imported_page( |
|---|
| 1837 | + kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page, |
|---|
| 1838 | + reg->nr_pages - reg->gpu_alloc->nents, |
|---|
| 1839 | + (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK, |
|---|
| 1840 | + mmu_sync_info); |
|---|
| 1558 | 1841 | if (err) |
|---|
| 1559 | 1842 | goto bad_insert; |
|---|
| 1560 | 1843 | } |
|---|
| 1561 | 1844 | |
|---|
| 1562 | 1845 | return err; |
|---|
| 1563 | 1846 | |
|---|
| 1564 | | -bad_insert: |
|---|
| 1565 | | - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, |
|---|
| 1566 | | - reg->start_pfn, reg->nr_pages, |
|---|
| 1567 | | - kctx->as_nr); |
|---|
| 1847 | +bad_aliased_insert: |
|---|
| 1848 | + while (i-- > 0) { |
|---|
| 1849 | + struct tagged_addr *phys_alloc = NULL; |
|---|
| 1850 | + u64 const stride = alloc->imported.alias.stride; |
|---|
| 1568 | 1851 | |
|---|
| 1569 | | - kbase_remove_va_region(reg); |
|---|
| 1852 | + if (alloc->imported.alias.aliased[i].alloc != NULL) |
|---|
| 1853 | + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + |
|---|
| 1854 | + alloc->imported.alias.aliased[i].offset; |
|---|
| 1855 | + |
|---|
| 1856 | + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), |
|---|
| 1857 | + phys_alloc, alloc->imported.alias.aliased[i].length, |
|---|
| 1858 | + alloc->imported.alias.aliased[i].length, kctx->as_nr, |
|---|
| 1859 | + false); |
|---|
| 1860 | + } |
|---|
| 1861 | +bad_insert: |
|---|
| 1862 | + kbase_remove_va_region(kctx->kbdev, reg); |
|---|
| 1570 | 1863 | |
|---|
| 1571 | 1864 | return err; |
|---|
| 1572 | 1865 | } |
|---|
| 1573 | 1866 | |
|---|
| 1574 | 1867 | KBASE_EXPORT_TEST_API(kbase_gpu_mmap); |
|---|
| 1575 | 1868 | |
|---|
| 1576 | | -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, |
|---|
| 1577 | | - struct kbase_mem_phy_alloc *alloc, bool writeable); |
|---|
| 1869 | +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, |
|---|
| 1870 | + struct kbase_va_region *reg, bool writeable); |
|---|
| 1578 | 1871 | |
|---|
| 1579 | 1872 | int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) |
|---|
| 1580 | 1873 | { |
|---|
| 1581 | 1874 | int err = 0; |
|---|
| 1875 | + struct kbase_mem_phy_alloc *alloc; |
|---|
| 1582 | 1876 | |
|---|
| 1583 | 1877 | if (reg->start_pfn == 0) |
|---|
| 1584 | 1878 | return 0; |
|---|
| .. | .. |
|---|
| 1586 | 1880 | if (!reg->gpu_alloc) |
|---|
| 1587 | 1881 | return -EINVAL; |
|---|
| 1588 | 1882 | |
|---|
| 1589 | | - /* Tear down down GPU page tables, depending on memory type. */ |
|---|
| 1590 | | - switch (reg->gpu_alloc->type) { |
|---|
| 1591 | | - case KBASE_MEM_TYPE_ALIAS: /* Fall-through */ |
|---|
| 1592 | | - case KBASE_MEM_TYPE_IMPORTED_UMM: |
|---|
| 1593 | | - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, |
|---|
| 1594 | | - reg->start_pfn, reg->nr_pages, kctx->as_nr); |
|---|
| 1883 | + alloc = reg->gpu_alloc; |
|---|
| 1884 | + |
|---|
| 1885 | + /* Tear down GPU page tables, depending on memory type. */ |
|---|
| 1886 | + switch (alloc->type) { |
|---|
| 1887 | + case KBASE_MEM_TYPE_ALIAS: { |
|---|
| 1888 | + size_t i = 0; |
|---|
| 1889 | + /* Due to the way the number of valid PTEs and ATEs are tracked |
|---|
| 1890 | + * currently, only the GPU virtual range that is backed & mapped |
|---|
| 1891 | + * should be passed to the kbase_mmu_teardown_pages() function, |
|---|
| 1892 | + * hence individual aliased regions needs to be unmapped |
|---|
| 1893 | + * separately. |
|---|
| 1894 | + */ |
|---|
| 1895 | + for (i = 0; i < alloc->imported.alias.nents; i++) { |
|---|
| 1896 | + struct tagged_addr *phys_alloc = NULL; |
|---|
| 1897 | + int err_loop; |
|---|
| 1898 | + |
|---|
| 1899 | + if (alloc->imported.alias.aliased[i].alloc != NULL) |
|---|
| 1900 | + phys_alloc = alloc->imported.alias.aliased[i].alloc->pages + |
|---|
| 1901 | + alloc->imported.alias.aliased[i].offset; |
|---|
| 1902 | + |
|---|
| 1903 | + err_loop = kbase_mmu_teardown_pages( |
|---|
| 1904 | + kctx->kbdev, &kctx->mmu, |
|---|
| 1905 | + reg->start_pfn + (i * alloc->imported.alias.stride), |
|---|
| 1906 | + phys_alloc, alloc->imported.alias.aliased[i].length, |
|---|
| 1907 | + alloc->imported.alias.aliased[i].length, kctx->as_nr, |
|---|
| 1908 | + false); |
|---|
| 1909 | + |
|---|
| 1910 | + if (WARN_ON_ONCE(err_loop)) |
|---|
| 1911 | + err = err_loop; |
|---|
| 1912 | + } |
|---|
| 1913 | + } |
|---|
| 1595 | 1914 | break; |
|---|
| 1596 | | - default: |
|---|
| 1597 | | - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, |
|---|
| 1598 | | - reg->start_pfn, kbase_reg_current_backed_size(reg), |
|---|
| 1599 | | - kctx->as_nr); |
|---|
| 1915 | + case KBASE_MEM_TYPE_IMPORTED_UMM: { |
|---|
| 1916 | + size_t nr_phys_pages = reg->nr_pages; |
|---|
| 1917 | + size_t nr_virt_pages = reg->nr_pages; |
|---|
| 1918 | + /* If the region has import padding and falls under the threshold for |
|---|
| 1919 | + * issuing a partial GPU cache flush, we want to reduce the number of |
|---|
| 1920 | + * physical pages that get flushed. |
|---|
| 1921 | + |
|---|
| 1922 | + * This is symmetric with case of mapping the memory, which first maps |
|---|
| 1923 | + * each imported physical page to a separate virtual page, and then |
|---|
| 1924 | + * maps the single aliasing sink page to each of the virtual padding |
|---|
| 1925 | + * pages. |
|---|
| 1926 | + */ |
|---|
| 1927 | + if (reg->flags & KBASE_REG_IMPORT_PAD) |
|---|
| 1928 | + nr_phys_pages = alloc->nents + 1; |
|---|
| 1929 | + |
|---|
| 1930 | + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
|---|
| 1931 | + alloc->pages, nr_phys_pages, nr_virt_pages, |
|---|
| 1932 | + kctx->as_nr, true); |
|---|
| 1933 | + } |
|---|
| 1934 | + break; |
|---|
| 1935 | + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { |
|---|
| 1936 | + size_t nr_reg_pages = kbase_reg_current_backed_size(reg); |
|---|
| 1937 | + |
|---|
| 1938 | + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
|---|
| 1939 | + alloc->pages, nr_reg_pages, nr_reg_pages, |
|---|
| 1940 | + kctx->as_nr, true); |
|---|
| 1941 | + } |
|---|
| 1942 | + break; |
|---|
| 1943 | + default: { |
|---|
| 1944 | + size_t nr_reg_pages = kbase_reg_current_backed_size(reg); |
|---|
| 1945 | + |
|---|
| 1946 | + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
|---|
| 1947 | + alloc->pages, nr_reg_pages, nr_reg_pages, |
|---|
| 1948 | + kctx->as_nr, false); |
|---|
| 1949 | + } |
|---|
| 1600 | 1950 | break; |
|---|
| 1601 | 1951 | } |
|---|
| 1602 | 1952 | |
|---|
| 1603 | 1953 | /* Update tracking, and other cleanup, depending on memory type. */ |
|---|
| 1604 | | - switch (reg->gpu_alloc->type) { |
|---|
| 1954 | + switch (alloc->type) { |
|---|
| 1605 | 1955 | case KBASE_MEM_TYPE_ALIAS: |
|---|
| 1606 | 1956 | /* We mark the source allocs as unmapped from the GPU when |
|---|
| 1607 | 1957 | * putting reg's allocs |
|---|
| 1608 | 1958 | */ |
|---|
| 1609 | 1959 | break; |
|---|
| 1610 | 1960 | case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { |
|---|
| 1611 | | - struct kbase_alloc_import_user_buf *user_buf = |
|---|
| 1612 | | - ®->gpu_alloc->imported.user_buf; |
|---|
| 1961 | + struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf; |
|---|
| 1613 | 1962 | |
|---|
| 1614 | | - if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { |
|---|
| 1615 | | - user_buf->current_mapping_usage_count &= |
|---|
| 1616 | | - ~PINNED_ON_IMPORT; |
|---|
| 1963 | + if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { |
|---|
| 1964 | + user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT; |
|---|
| 1617 | 1965 | |
|---|
| 1618 | | - /* The allocation could still have active mappings. */ |
|---|
| 1619 | | - if (user_buf->current_mapping_usage_count == 0) { |
|---|
| 1620 | | - kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, |
|---|
| 1621 | | - (reg->flags & KBASE_REG_GPU_WR)); |
|---|
| 1622 | | - } |
|---|
| 1966 | + /* The allocation could still have active mappings. */ |
|---|
| 1967 | + if (user_buf->current_mapping_usage_count == 0) { |
|---|
| 1968 | + kbase_jd_user_buf_unmap(kctx, alloc, reg, |
|---|
| 1969 | + (reg->flags & |
|---|
| 1970 | + (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); |
|---|
| 1623 | 1971 | } |
|---|
| 1624 | 1972 | } |
|---|
| 1625 | | - /* Fall-through */ |
|---|
| 1973 | + } |
|---|
| 1974 | + fallthrough; |
|---|
| 1626 | 1975 | default: |
|---|
| 1627 | 1976 | kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); |
|---|
| 1628 | 1977 | break; |
|---|
| .. | .. |
|---|
| 1741 | 2090 | BUG_ON(!cpu_page); |
|---|
| 1742 | 2091 | BUG_ON(offset + size > PAGE_SIZE); |
|---|
| 1743 | 2092 | |
|---|
| 1744 | | - dma_addr = kbase_dma_addr(cpu_page) + offset; |
|---|
| 2093 | + dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset; |
|---|
| 2094 | + |
|---|
| 1745 | 2095 | if (sync_fn == KBASE_SYNC_TO_CPU) |
|---|
| 1746 | 2096 | dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, |
|---|
| 1747 | 2097 | size, DMA_BIDIRECTIONAL); |
|---|
| .. | .. |
|---|
| 1752 | 2102 | void *src = NULL; |
|---|
| 1753 | 2103 | void *dst = NULL; |
|---|
| 1754 | 2104 | struct page *gpu_page; |
|---|
| 2105 | + dma_addr_t dma_addr; |
|---|
| 1755 | 2106 | |
|---|
| 1756 | 2107 | if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) |
|---|
| 1757 | 2108 | return; |
|---|
| 1758 | 2109 | |
|---|
| 1759 | 2110 | gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); |
|---|
| 2111 | + dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; |
|---|
| 1760 | 2112 | |
|---|
| 1761 | 2113 | if (sync_fn == KBASE_SYNC_TO_DEVICE) { |
|---|
| 1762 | 2114 | src = ((unsigned char *)kmap(cpu_page)) + offset; |
|---|
| 1763 | 2115 | dst = ((unsigned char *)kmap(gpu_page)) + offset; |
|---|
| 1764 | 2116 | } else if (sync_fn == KBASE_SYNC_TO_CPU) { |
|---|
| 1765 | | - dma_sync_single_for_cpu(kctx->kbdev->dev, |
|---|
| 1766 | | - kbase_dma_addr(gpu_page) + offset, |
|---|
| 1767 | | - size, DMA_BIDIRECTIONAL); |
|---|
| 2117 | + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, |
|---|
| 2118 | + DMA_BIDIRECTIONAL); |
|---|
| 1768 | 2119 | src = ((unsigned char *)kmap(gpu_page)) + offset; |
|---|
| 1769 | 2120 | dst = ((unsigned char *)kmap(cpu_page)) + offset; |
|---|
| 1770 | 2121 | } |
|---|
| 2122 | + |
|---|
| 1771 | 2123 | memcpy(dst, src, size); |
|---|
| 1772 | 2124 | kunmap(gpu_page); |
|---|
| 1773 | 2125 | kunmap(cpu_page); |
|---|
| 1774 | 2126 | if (sync_fn == KBASE_SYNC_TO_DEVICE) |
|---|
| 1775 | | - dma_sync_single_for_device(kctx->kbdev->dev, |
|---|
| 1776 | | - kbase_dma_addr(gpu_page) + offset, |
|---|
| 1777 | | - size, DMA_BIDIRECTIONAL); |
|---|
| 2127 | + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, |
|---|
| 2128 | + DMA_BIDIRECTIONAL); |
|---|
| 1778 | 2129 | } |
|---|
| 1779 | 2130 | } |
|---|
| 1780 | 2131 | |
|---|
| .. | .. |
|---|
| 1920 | 2271 | __func__, (void *)reg, (void *)kctx); |
|---|
| 1921 | 2272 | lockdep_assert_held(&kctx->reg_lock); |
|---|
| 1922 | 2273 | |
|---|
| 1923 | | - if (reg->flags & KBASE_REG_NO_USER_FREE) { |
|---|
| 2274 | + if (kbase_va_region_is_no_user_free(reg)) { |
|---|
| 1924 | 2275 | dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); |
|---|
| 1925 | 2276 | return -EINVAL; |
|---|
| 1926 | 2277 | } |
|---|
| 1927 | 2278 | |
|---|
| 1928 | | - /* |
|---|
| 1929 | | - * Unlink the physical allocation before unmaking it evictable so |
|---|
| 1930 | | - * that the allocation isn't grown back to its last backed size |
|---|
| 1931 | | - * as we're going to unmap it anyway. |
|---|
| 1932 | | - */ |
|---|
| 1933 | | - reg->cpu_alloc->reg = NULL; |
|---|
| 1934 | | - if (reg->cpu_alloc != reg->gpu_alloc) |
|---|
| 1935 | | - reg->gpu_alloc->reg = NULL; |
|---|
| 1936 | | - |
|---|
| 1937 | | - /* |
|---|
| 1938 | | - * If a region has been made evictable then we must unmake it |
|---|
| 2279 | + /* If a region has been made evictable then we must unmake it |
|---|
| 1939 | 2280 | * before trying to free it. |
|---|
| 1940 | 2281 | * If the memory hasn't been reclaimed it will be unmapped and freed |
|---|
| 1941 | 2282 | * below, if it has been reclaimed then the operations below are no-ops. |
|---|
| 1942 | 2283 | */ |
|---|
| 1943 | 2284 | if (reg->flags & KBASE_REG_DONT_NEED) { |
|---|
| 1944 | | - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == |
|---|
| 1945 | | - KBASE_MEM_TYPE_NATIVE); |
|---|
| 2285 | + WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE); |
|---|
| 2286 | + mutex_lock(&kctx->jit_evict_lock); |
|---|
| 2287 | + /* Unlink the physical allocation before unmaking it evictable so |
|---|
| 2288 | + * that the allocation isn't grown back to its last backed size |
|---|
| 2289 | + * as we're going to unmap it anyway. |
|---|
| 2290 | + */ |
|---|
| 2291 | + reg->cpu_alloc->reg = NULL; |
|---|
| 2292 | + if (reg->cpu_alloc != reg->gpu_alloc) |
|---|
| 2293 | + reg->gpu_alloc->reg = NULL; |
|---|
| 2294 | + mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 1946 | 2295 | kbase_mem_evictable_unmake(reg->gpu_alloc); |
|---|
| 1947 | 2296 | } |
|---|
| 1948 | 2297 | |
|---|
| .. | .. |
|---|
| 1952 | 2301 | goto out; |
|---|
| 1953 | 2302 | } |
|---|
| 1954 | 2303 | |
|---|
| 2304 | +#if MALI_USE_CSF |
|---|
| 2305 | + if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) || |
|---|
| 2306 | + ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { |
|---|
| 2307 | + if (reg->flags & KBASE_REG_FIXED_ADDRESS) |
|---|
| 2308 | + atomic64_dec(&kctx->num_fixed_allocs); |
|---|
| 2309 | + else |
|---|
| 2310 | + atomic64_dec(&kctx->num_fixable_allocs); |
|---|
| 2311 | + } |
|---|
| 2312 | +#endif |
|---|
| 2313 | + |
|---|
| 1955 | 2314 | /* This will also free the physical pages */ |
|---|
| 1956 | 2315 | kbase_free_alloced_region(reg); |
|---|
| 1957 | 2316 | |
|---|
| 1958 | | - out: |
|---|
| 2317 | +out: |
|---|
| 1959 | 2318 | return err; |
|---|
| 1960 | 2319 | } |
|---|
| 1961 | 2320 | |
|---|
| 1962 | 2321 | KBASE_EXPORT_TEST_API(kbase_mem_free_region); |
|---|
| 1963 | 2322 | |
|---|
| 1964 | 2323 | /** |
|---|
| 1965 | | - * Free the region from the GPU and unregister it. |
|---|
| 2324 | + * kbase_mem_free - Free the region from the GPU and unregister it. |
|---|
| 2325 | + * |
|---|
| 1966 | 2326 | * @kctx: KBase context |
|---|
| 1967 | 2327 | * @gpu_addr: GPU address to free |
|---|
| 1968 | 2328 | * |
|---|
| 1969 | 2329 | * This function implements the free operation on a memory segment. |
|---|
| 1970 | 2330 | * It will loudly fail if called with outstanding mappings. |
|---|
| 2331 | + * |
|---|
| 2332 | + * Return: 0 on success. |
|---|
| 1971 | 2333 | */ |
|---|
| 1972 | 2334 | int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) |
|---|
| 1973 | 2335 | { |
|---|
| .. | .. |
|---|
| 1979 | 2341 | __func__, gpu_addr, (void *)kctx); |
|---|
| 1980 | 2342 | |
|---|
| 1981 | 2343 | if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { |
|---|
| 1982 | | - dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); |
|---|
| 2344 | + dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__); |
|---|
| 1983 | 2345 | return -EINVAL; |
|---|
| 1984 | 2346 | } |
|---|
| 1985 | 2347 | |
|---|
| 1986 | 2348 | if (gpu_addr == 0) { |
|---|
| 1987 | | - dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); |
|---|
| 2349 | + dev_warn(kctx->kbdev->dev, |
|---|
| 2350 | + "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n", |
|---|
| 2351 | + __func__); |
|---|
| 1988 | 2352 | return -EINVAL; |
|---|
| 1989 | 2353 | } |
|---|
| 1990 | 2354 | kbase_gpu_vm_lock(kctx); |
|---|
| .. | .. |
|---|
| 2010 | 2374 | /* Validate the region */ |
|---|
| 2011 | 2375 | reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); |
|---|
| 2012 | 2376 | if (kbase_is_region_invalid_or_free(reg)) { |
|---|
| 2013 | | - dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", |
|---|
| 2014 | | - gpu_addr); |
|---|
| 2377 | + dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX", |
|---|
| 2378 | + __func__, gpu_addr); |
|---|
| 2015 | 2379 | err = -EINVAL; |
|---|
| 2016 | 2380 | goto out_unlock; |
|---|
| 2017 | 2381 | } |
|---|
| .. | .. |
|---|
| 2026 | 2390 | err = kbase_mem_free_region(kctx, reg); |
|---|
| 2027 | 2391 | } |
|---|
| 2028 | 2392 | |
|---|
| 2029 | | - out_unlock: |
|---|
| 2393 | +out_unlock: |
|---|
| 2030 | 2394 | kbase_gpu_vm_unlock(kctx); |
|---|
| 2031 | 2395 | return err; |
|---|
| 2032 | 2396 | } |
|---|
| .. | .. |
|---|
| 2126 | 2490 | if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) |
|---|
| 2127 | 2491 | reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; |
|---|
| 2128 | 2492 | |
|---|
| 2129 | | - if (flags & BASEP_MEM_NO_USER_FREE) |
|---|
| 2130 | | - reg->flags |= KBASE_REG_NO_USER_FREE; |
|---|
| 2493 | + if (flags & BASEP_MEM_NO_USER_FREE) { |
|---|
| 2494 | + kbase_gpu_vm_lock(kctx); |
|---|
| 2495 | + kbase_va_region_no_user_free_inc(reg); |
|---|
| 2496 | + kbase_gpu_vm_unlock(kctx); |
|---|
| 2497 | + } |
|---|
| 2131 | 2498 | |
|---|
| 2132 | 2499 | if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) |
|---|
| 2133 | 2500 | reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; |
|---|
| 2501 | + |
|---|
| 2502 | +#if MALI_USE_CSF |
|---|
| 2503 | + if (flags & BASE_MEM_FIXED) |
|---|
| 2504 | + reg->flags |= KBASE_REG_FIXED_ADDRESS; |
|---|
| 2505 | +#endif |
|---|
| 2134 | 2506 | |
|---|
| 2135 | 2507 | return 0; |
|---|
| 2136 | 2508 | } |
|---|
| .. | .. |
|---|
| 2174 | 2546 | |
|---|
| 2175 | 2547 | tp = alloc->pages + alloc->nents; |
|---|
| 2176 | 2548 | |
|---|
| 2177 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 2178 | 2549 | /* Check if we have enough pages requested so we can allocate a large |
|---|
| 2179 | 2550 | * page (512 * 4KB = 2MB ) |
|---|
| 2180 | 2551 | */ |
|---|
| 2181 | | - if (nr_left >= (SZ_2M / SZ_4K)) { |
|---|
| 2552 | + if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) { |
|---|
| 2182 | 2553 | int nr_lp = nr_left / (SZ_2M / SZ_4K); |
|---|
| 2183 | 2554 | |
|---|
| 2184 | | - res = kbase_mem_pool_alloc_pages( |
|---|
| 2185 | | - &kctx->mem_pools.large[alloc->group_id], |
|---|
| 2186 | | - nr_lp * (SZ_2M / SZ_4K), |
|---|
| 2187 | | - tp, |
|---|
| 2188 | | - true); |
|---|
| 2555 | + res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], |
|---|
| 2556 | + nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task); |
|---|
| 2189 | 2557 | |
|---|
| 2190 | 2558 | if (res > 0) { |
|---|
| 2191 | 2559 | nr_left -= res; |
|---|
| .. | .. |
|---|
| 2239 | 2607 | |
|---|
| 2240 | 2608 | err = kbase_mem_pool_grow( |
|---|
| 2241 | 2609 | &kctx->mem_pools.large[alloc->group_id], |
|---|
| 2242 | | - 1); |
|---|
| 2610 | + 1, kctx->task); |
|---|
| 2243 | 2611 | if (err) |
|---|
| 2244 | 2612 | break; |
|---|
| 2245 | 2613 | } while (1); |
|---|
| .. | .. |
|---|
| 2280 | 2648 | } |
|---|
| 2281 | 2649 | } |
|---|
| 2282 | 2650 | } |
|---|
| 2283 | | -no_new_partial: |
|---|
| 2284 | | -#endif |
|---|
| 2285 | 2651 | |
|---|
| 2652 | +no_new_partial: |
|---|
| 2286 | 2653 | if (nr_left) { |
|---|
| 2287 | | - res = kbase_mem_pool_alloc_pages( |
|---|
| 2288 | | - &kctx->mem_pools.small[alloc->group_id], |
|---|
| 2289 | | - nr_left, tp, false); |
|---|
| 2654 | + res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left, |
|---|
| 2655 | + tp, false, kctx->task); |
|---|
| 2290 | 2656 | if (res <= 0) |
|---|
| 2291 | 2657 | goto alloc_failed; |
|---|
| 2292 | 2658 | } |
|---|
| .. | .. |
|---|
| 2345 | 2711 | |
|---|
| 2346 | 2712 | lockdep_assert_held(&pool->pool_lock); |
|---|
| 2347 | 2713 | |
|---|
| 2348 | | -#if !defined(CONFIG_MALI_2MB_ALLOC) |
|---|
| 2349 | | - WARN_ON(pool->order); |
|---|
| 2350 | | -#endif |
|---|
| 2714 | + kctx = alloc->imported.native.kctx; |
|---|
| 2715 | + kbdev = kctx->kbdev; |
|---|
| 2716 | + |
|---|
| 2717 | + if (!kbdev->pagesize_2mb) |
|---|
| 2718 | + WARN_ON(pool->order); |
|---|
| 2351 | 2719 | |
|---|
| 2352 | 2720 | if (alloc->reg) { |
|---|
| 2353 | 2721 | if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) |
|---|
| 2354 | 2722 | goto invalid_request; |
|---|
| 2355 | 2723 | } |
|---|
| 2356 | | - |
|---|
| 2357 | | - kctx = alloc->imported.native.kctx; |
|---|
| 2358 | | - kbdev = kctx->kbdev; |
|---|
| 2359 | 2724 | |
|---|
| 2360 | 2725 | lockdep_assert_held(&kctx->mem_partials_lock); |
|---|
| 2361 | 2726 | |
|---|
| .. | .. |
|---|
| 2375 | 2740 | tp = alloc->pages + alloc->nents; |
|---|
| 2376 | 2741 | new_pages = tp; |
|---|
| 2377 | 2742 | |
|---|
| 2378 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 2379 | | - if (pool->order) { |
|---|
| 2743 | + if (kbdev->pagesize_2mb && pool->order) { |
|---|
| 2380 | 2744 | int nr_lp = nr_left / (SZ_2M / SZ_4K); |
|---|
| 2381 | 2745 | |
|---|
| 2382 | 2746 | res = kbase_mem_pool_alloc_pages_locked(pool, |
|---|
| .. | .. |
|---|
| 2460 | 2824 | if (nr_left) |
|---|
| 2461 | 2825 | goto alloc_failed; |
|---|
| 2462 | 2826 | } else { |
|---|
| 2463 | | -#endif |
|---|
| 2464 | 2827 | res = kbase_mem_pool_alloc_pages_locked(pool, |
|---|
| 2465 | 2828 | nr_left, |
|---|
| 2466 | 2829 | tp); |
|---|
| 2467 | 2830 | if (res <= 0) |
|---|
| 2468 | 2831 | goto alloc_failed; |
|---|
| 2469 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 2470 | 2832 | } |
|---|
| 2471 | | -#endif |
|---|
| 2472 | 2833 | |
|---|
| 2473 | 2834 | KBASE_TLSTREAM_AUX_PAGESALLOC( |
|---|
| 2474 | 2835 | kbdev, |
|---|
| .. | .. |
|---|
| 2489 | 2850 | |
|---|
| 2490 | 2851 | struct tagged_addr *start_free = alloc->pages + alloc->nents; |
|---|
| 2491 | 2852 | |
|---|
| 2492 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 2493 | | - if (pool->order) { |
|---|
| 2853 | + if (kbdev->pagesize_2mb && pool->order) { |
|---|
| 2494 | 2854 | while (nr_pages_to_free) { |
|---|
| 2495 | 2855 | if (is_huge_head(*start_free)) { |
|---|
| 2496 | 2856 | kbase_mem_pool_free_pages_locked( |
|---|
| .. | .. |
|---|
| 2508 | 2868 | } |
|---|
| 2509 | 2869 | } |
|---|
| 2510 | 2870 | } else { |
|---|
| 2511 | | -#endif |
|---|
| 2512 | 2871 | kbase_mem_pool_free_pages_locked(pool, |
|---|
| 2513 | 2872 | nr_pages_to_free, |
|---|
| 2514 | 2873 | start_free, |
|---|
| 2515 | 2874 | false, /* not dirty */ |
|---|
| 2516 | 2875 | true); /* return to pool */ |
|---|
| 2517 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 2518 | 2876 | } |
|---|
| 2519 | | -#endif |
|---|
| 2520 | 2877 | } |
|---|
| 2521 | 2878 | |
|---|
| 2522 | 2879 | kbase_process_page_usage_dec(kctx, nr_pages_requested); |
|---|
| .. | .. |
|---|
| 2778 | 3135 | /** |
|---|
| 2779 | 3136 | * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. |
|---|
| 2780 | 3137 | * @alloc: The allocation for the imported user buffer. |
|---|
| 3138 | + * |
|---|
| 3139 | + * This must only be called when terminating an alloc, when its refcount |
|---|
| 3140 | + * (number of users) has become 0. This also ensures it is only called once all |
|---|
| 3141 | + * CPU mappings have been closed. |
|---|
| 3142 | + * |
|---|
| 3143 | + * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active |
|---|
| 3144 | + * allocations |
|---|
| 2781 | 3145 | */ |
|---|
| 2782 | 3146 | static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); |
|---|
| 2783 | 3147 | #endif |
|---|
| .. | .. |
|---|
| 2908 | 3272 | out_term: |
|---|
| 2909 | 3273 | return -1; |
|---|
| 2910 | 3274 | } |
|---|
| 2911 | | - |
|---|
| 2912 | 3275 | KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); |
|---|
| 3276 | + |
|---|
| 3277 | +void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc, |
|---|
| 3278 | + enum kbase_page_status status) |
|---|
| 3279 | +{ |
|---|
| 3280 | + u32 i = 0; |
|---|
| 3281 | + |
|---|
| 3282 | + for (; i < alloc->nents; i++) { |
|---|
| 3283 | + struct tagged_addr phys = alloc->pages[i]; |
|---|
| 3284 | + struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys)); |
|---|
| 3285 | + |
|---|
| 3286 | + /* Skip the 4KB page that is part of a large page, as the large page is |
|---|
| 3287 | + * excluded from the migration process. |
|---|
| 3288 | + */ |
|---|
| 3289 | + if (is_huge(phys) || is_partial(phys)) |
|---|
| 3290 | + continue; |
|---|
| 3291 | + |
|---|
| 3292 | + if (!page_md) |
|---|
| 3293 | + continue; |
|---|
| 3294 | + |
|---|
| 3295 | + spin_lock(&page_md->migrate_lock); |
|---|
| 3296 | + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); |
|---|
| 3297 | + spin_unlock(&page_md->migrate_lock); |
|---|
| 3298 | + } |
|---|
| 3299 | +} |
|---|
| 2913 | 3300 | |
|---|
| 2914 | 3301 | bool kbase_check_alloc_flags(unsigned long flags) |
|---|
| 2915 | 3302 | { |
|---|
| .. | .. |
|---|
| 2983 | 3370 | (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) |
|---|
| 2984 | 3371 | return false; |
|---|
| 2985 | 3372 | |
|---|
| 3373 | +#if MALI_USE_CSF |
|---|
| 3374 | + if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED))) |
|---|
| 3375 | + return false; |
|---|
| 3376 | + |
|---|
| 3377 | + if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED)) |
|---|
| 3378 | + return false; |
|---|
| 3379 | +#endif |
|---|
| 3380 | + |
|---|
| 2986 | 3381 | return true; |
|---|
| 2987 | 3382 | } |
|---|
| 2988 | 3383 | |
|---|
| .. | .. |
|---|
| 3004 | 3399 | if (flags & BASE_MEM_GROW_ON_GPF) |
|---|
| 3005 | 3400 | return false; |
|---|
| 3006 | 3401 | |
|---|
| 3007 | | -#if !MALI_USE_CSF |
|---|
| 3402 | +#if MALI_USE_CSF |
|---|
| 3403 | + /* Imported memory cannot be fixed */ |
|---|
| 3404 | + if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE))) |
|---|
| 3405 | + return false; |
|---|
| 3406 | +#else |
|---|
| 3008 | 3407 | /* Imported memory cannot be aligned to the end of its initial commit */ |
|---|
| 3009 | 3408 | if (flags & BASE_MEM_TILER_ALIGN_TOP) |
|---|
| 3010 | 3409 | return false; |
|---|
| .. | .. |
|---|
| 3139 | 3538 | #undef KBASE_MSG_PRE |
|---|
| 3140 | 3539 | } |
|---|
| 3141 | 3540 | |
|---|
| 3142 | | -/** |
|---|
| 3143 | | - * Acquire the per-context region list lock |
|---|
| 3144 | | - * @kctx: KBase context |
|---|
| 3145 | | - */ |
|---|
| 3146 | 3541 | void kbase_gpu_vm_lock(struct kbase_context *kctx) |
|---|
| 3147 | 3542 | { |
|---|
| 3148 | 3543 | KBASE_DEBUG_ASSERT(kctx != NULL); |
|---|
| .. | .. |
|---|
| 3151 | 3546 | |
|---|
| 3152 | 3547 | KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); |
|---|
| 3153 | 3548 | |
|---|
| 3154 | | -/** |
|---|
| 3155 | | - * Release the per-context region list lock |
|---|
| 3156 | | - * @kctx: KBase context |
|---|
| 3157 | | - */ |
|---|
| 3158 | 3549 | void kbase_gpu_vm_unlock(struct kbase_context *kctx) |
|---|
| 3159 | 3550 | { |
|---|
| 3160 | 3551 | KBASE_DEBUG_ASSERT(kctx != NULL); |
|---|
| .. | .. |
|---|
| 3165 | 3556 | |
|---|
| 3166 | 3557 | #if IS_ENABLED(CONFIG_DEBUG_FS) |
|---|
| 3167 | 3558 | struct kbase_jit_debugfs_data { |
|---|
| 3168 | | - int (*func)(struct kbase_jit_debugfs_data *); |
|---|
| 3559 | + int (*func)(struct kbase_jit_debugfs_data *data); |
|---|
| 3169 | 3560 | struct mutex lock; |
|---|
| 3170 | 3561 | struct kbase_context *kctx; |
|---|
| 3171 | 3562 | u64 active_value; |
|---|
| .. | .. |
|---|
| 3388 | 3779 | void kbase_jit_debugfs_init(struct kbase_context *kctx) |
|---|
| 3389 | 3780 | { |
|---|
| 3390 | 3781 | /* prevent unprivileged use of debug file system |
|---|
| 3391 | | - * in old kernel version |
|---|
| 3392 | | - */ |
|---|
| 3393 | | -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) |
|---|
| 3394 | | - /* only for newer kernel version debug file system is safe */ |
|---|
| 3782 | + * in old kernel version |
|---|
| 3783 | + */ |
|---|
| 3395 | 3784 | const mode_t mode = 0444; |
|---|
| 3396 | | -#else |
|---|
| 3397 | | - const mode_t mode = 0400; |
|---|
| 3398 | | -#endif |
|---|
| 3399 | 3785 | |
|---|
| 3400 | 3786 | /* Caller already ensures this, but we keep the pattern for |
|---|
| 3401 | 3787 | * maintenance safety. |
|---|
| .. | .. |
|---|
| 3469 | 3855 | mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 3470 | 3856 | |
|---|
| 3471 | 3857 | kbase_gpu_vm_lock(kctx); |
|---|
| 3472 | | - reg->flags &= ~KBASE_REG_NO_USER_FREE; |
|---|
| 3858 | + |
|---|
| 3859 | + /* |
|---|
| 3860 | + * Incrementing the refcount is prevented on JIT regions. |
|---|
| 3861 | + * If/when this ever changes we would need to compensate |
|---|
| 3862 | + * by implementing "free on putting the last reference", |
|---|
| 3863 | + * but only for JIT regions. |
|---|
| 3864 | + */ |
|---|
| 3865 | + WARN_ON(atomic_read(®->no_user_free_count) > 1); |
|---|
| 3866 | + kbase_va_region_no_user_free_dec(reg); |
|---|
| 3473 | 3867 | kbase_mem_free_region(kctx, reg); |
|---|
| 3474 | 3868 | kbase_gpu_vm_unlock(kctx); |
|---|
| 3475 | 3869 | } while (1); |
|---|
| .. | .. |
|---|
| 3484 | 3878 | INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); |
|---|
| 3485 | 3879 | |
|---|
| 3486 | 3880 | #if MALI_USE_CSF |
|---|
| 3881 | + mutex_init(&kctx->csf.kcpu_queues.jit_lock); |
|---|
| 3487 | 3882 | INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); |
|---|
| 3488 | 3883 | INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); |
|---|
| 3489 | 3884 | #else /* !MALI_USE_CSF */ |
|---|
| .. | .. |
|---|
| 3698 | 4093 | static int kbase_jit_grow(struct kbase_context *kctx, |
|---|
| 3699 | 4094 | const struct base_jit_alloc_info *info, |
|---|
| 3700 | 4095 | struct kbase_va_region *reg, |
|---|
| 3701 | | - struct kbase_sub_alloc **prealloc_sas) |
|---|
| 4096 | + struct kbase_sub_alloc **prealloc_sas, |
|---|
| 4097 | + enum kbase_caller_mmu_sync_info mmu_sync_info) |
|---|
| 3702 | 4098 | { |
|---|
| 3703 | 4099 | size_t delta; |
|---|
| 3704 | 4100 | size_t pages_required; |
|---|
| .. | .. |
|---|
| 3728 | 4124 | delta = info->commit_pages - reg->gpu_alloc->nents; |
|---|
| 3729 | 4125 | pages_required = delta; |
|---|
| 3730 | 4126 | |
|---|
| 3731 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 3732 | | - if (pages_required >= (SZ_2M / SZ_4K)) { |
|---|
| 4127 | + if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) { |
|---|
| 3733 | 4128 | pool = &kctx->mem_pools.large[kctx->jit_group_id]; |
|---|
| 3734 | 4129 | /* Round up to number of 2 MB pages required */ |
|---|
| 3735 | 4130 | pages_required += ((SZ_2M / SZ_4K) - 1); |
|---|
| 3736 | 4131 | pages_required /= (SZ_2M / SZ_4K); |
|---|
| 3737 | 4132 | } else { |
|---|
| 3738 | | -#endif |
|---|
| 3739 | 4133 | pool = &kctx->mem_pools.small[kctx->jit_group_id]; |
|---|
| 3740 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 3741 | 4134 | } |
|---|
| 3742 | | -#endif |
|---|
| 3743 | 4135 | |
|---|
| 3744 | 4136 | if (reg->cpu_alloc != reg->gpu_alloc) |
|---|
| 3745 | 4137 | pages_required *= 2; |
|---|
| .. | .. |
|---|
| 3760 | 4152 | spin_unlock(&kctx->mem_partials_lock); |
|---|
| 3761 | 4153 | |
|---|
| 3762 | 4154 | kbase_gpu_vm_unlock(kctx); |
|---|
| 3763 | | - ret = kbase_mem_pool_grow(pool, pool_delta); |
|---|
| 4155 | + ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task); |
|---|
| 3764 | 4156 | kbase_gpu_vm_lock(kctx); |
|---|
| 3765 | 4157 | |
|---|
| 3766 | 4158 | if (ret) |
|---|
| .. | .. |
|---|
| 3795 | 4187 | spin_unlock(&kctx->mem_partials_lock); |
|---|
| 3796 | 4188 | |
|---|
| 3797 | 4189 | ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, |
|---|
| 3798 | | - old_size); |
|---|
| 4190 | + old_size, mmu_sync_info); |
|---|
| 3799 | 4191 | /* |
|---|
| 3800 | 4192 | * The grow failed so put the allocation back in the |
|---|
| 3801 | 4193 | * pool and return failure. |
|---|
| .. | .. |
|---|
| 3920 | 4312 | const struct base_jit_alloc_info *info, |
|---|
| 3921 | 4313 | bool ignore_pressure_limit) |
|---|
| 3922 | 4314 | { |
|---|
| 3923 | | -#if MALI_USE_CSF |
|---|
| 3924 | | - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); |
|---|
| 3925 | | -#else |
|---|
| 4315 | +#if !MALI_USE_CSF |
|---|
| 3926 | 4316 | lockdep_assert_held(&kctx->jctx.lock); |
|---|
| 3927 | | -#endif |
|---|
| 4317 | +#else /* MALI_USE_CSF */ |
|---|
| 4318 | + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); |
|---|
| 4319 | +#endif /* !MALI_USE_CSF */ |
|---|
| 3928 | 4320 | |
|---|
| 3929 | 4321 | #if MALI_JIT_PRESSURE_LIMIT_BASE |
|---|
| 3930 | 4322 | if (!ignore_pressure_limit && |
|---|
| .. | .. |
|---|
| 4010 | 4402 | struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; |
|---|
| 4011 | 4403 | int i; |
|---|
| 4012 | 4404 | |
|---|
| 4013 | | -#if MALI_USE_CSF |
|---|
| 4014 | | - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); |
|---|
| 4015 | | -#else |
|---|
| 4405 | + /* Calls to this function are inherently synchronous, with respect to |
|---|
| 4406 | + * MMU operations. |
|---|
| 4407 | + */ |
|---|
| 4408 | + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; |
|---|
| 4409 | + |
|---|
| 4410 | +#if !MALI_USE_CSF |
|---|
| 4016 | 4411 | lockdep_assert_held(&kctx->jctx.lock); |
|---|
| 4017 | | -#endif |
|---|
| 4412 | +#else /* MALI_USE_CSF */ |
|---|
| 4413 | + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); |
|---|
| 4414 | +#endif /* !MALI_USE_CSF */ |
|---|
| 4018 | 4415 | |
|---|
| 4019 | 4416 | if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) |
|---|
| 4020 | 4417 | return NULL; |
|---|
| 4021 | 4418 | |
|---|
| 4022 | | -#ifdef CONFIG_MALI_2MB_ALLOC |
|---|
| 4023 | | - /* Preallocate memory for the sub-allocation structs */ |
|---|
| 4024 | | - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { |
|---|
| 4025 | | - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); |
|---|
| 4026 | | - if (!prealloc_sas[i]) |
|---|
| 4027 | | - goto end; |
|---|
| 4419 | + if (kctx->kbdev->pagesize_2mb) { |
|---|
| 4420 | + /* Preallocate memory for the sub-allocation structs */ |
|---|
| 4421 | + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { |
|---|
| 4422 | + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); |
|---|
| 4423 | + if (!prealloc_sas[i]) |
|---|
| 4424 | + goto end; |
|---|
| 4425 | + } |
|---|
| 4028 | 4426 | } |
|---|
| 4029 | | -#endif |
|---|
| 4030 | 4427 | |
|---|
| 4031 | 4428 | kbase_gpu_vm_lock(kctx); |
|---|
| 4032 | 4429 | mutex_lock(&kctx->jit_evict_lock); |
|---|
| .. | .. |
|---|
| 4102 | 4499 | * so any state protected by that lock might need to be |
|---|
| 4103 | 4500 | * re-evaluated if more code is added here in future. |
|---|
| 4104 | 4501 | */ |
|---|
| 4105 | | - ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); |
|---|
| 4502 | + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas, |
|---|
| 4503 | + mmu_sync_info); |
|---|
| 4106 | 4504 | |
|---|
| 4107 | 4505 | #if MALI_JIT_PRESSURE_LIMIT_BASE |
|---|
| 4108 | 4506 | if (!ignore_pressure_limit) |
|---|
| .. | .. |
|---|
| 4114 | 4512 | if (ret < 0) { |
|---|
| 4115 | 4513 | /* |
|---|
| 4116 | 4514 | * An update to an allocation from the pool failed, |
|---|
| 4117 | | - * chances are slim a new allocation would fair any |
|---|
| 4515 | + * chances are slim a new allocation would fare any |
|---|
| 4118 | 4516 | * better so return the allocation to the pool and |
|---|
| 4119 | 4517 | * return the function with failure. |
|---|
| 4120 | 4518 | */ |
|---|
| .. | .. |
|---|
| 4136 | 4534 | mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4137 | 4535 | reg = NULL; |
|---|
| 4138 | 4536 | goto end; |
|---|
| 4537 | + } else { |
|---|
| 4538 | + /* A suitable JIT allocation existed on the evict list, so we need |
|---|
| 4539 | + * to make sure that the NOT_MOVABLE property is cleared. |
|---|
| 4540 | + */ |
|---|
| 4541 | + if (kbase_page_migration_enabled) { |
|---|
| 4542 | + kbase_gpu_vm_lock(kctx); |
|---|
| 4543 | + mutex_lock(&kctx->jit_evict_lock); |
|---|
| 4544 | + kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); |
|---|
| 4545 | + mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4546 | + kbase_gpu_vm_unlock(kctx); |
|---|
| 4547 | + } |
|---|
| 4139 | 4548 | } |
|---|
| 4140 | 4549 | } else { |
|---|
| 4141 | 4550 | /* No suitable JIT allocation was found so create a new one */ |
|---|
| .. | .. |
|---|
| 4150 | 4559 | flags |= BASE_MEM_TILER_ALIGN_TOP; |
|---|
| 4151 | 4560 | #endif /* !MALI_USE_CSF */ |
|---|
| 4152 | 4561 | |
|---|
| 4153 | | - flags |= base_mem_group_id_set(kctx->jit_group_id); |
|---|
| 4562 | + flags |= kbase_mem_group_id_set(kctx->jit_group_id); |
|---|
| 4154 | 4563 | #if MALI_JIT_PRESSURE_LIMIT_BASE |
|---|
| 4155 | 4564 | if (!ignore_pressure_limit) { |
|---|
| 4156 | 4565 | flags |= BASEP_MEM_PERFORM_JIT_TRIM; |
|---|
| .. | .. |
|---|
| 4165 | 4574 | mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4166 | 4575 | kbase_gpu_vm_unlock(kctx); |
|---|
| 4167 | 4576 | |
|---|
| 4168 | | - reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, |
|---|
| 4169 | | - info->extension, &flags, &gpu_addr); |
|---|
| 4577 | + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension, |
|---|
| 4578 | + &flags, &gpu_addr, mmu_sync_info); |
|---|
| 4170 | 4579 | if (!reg) { |
|---|
| 4171 | 4580 | /* Most likely not enough GPU virtual space left for |
|---|
| 4172 | 4581 | * the new JIT allocation. |
|---|
| .. | .. |
|---|
| 4192 | 4601 | } |
|---|
| 4193 | 4602 | } |
|---|
| 4194 | 4603 | |
|---|
| 4604 | + /* Similarly to tiler heap init, there is a short window of time |
|---|
| 4605 | + * where the (either recycled or newly allocated, in our case) region has |
|---|
| 4606 | + * "no user free" count incremented but is still missing the DONT_NEED flag, and |
|---|
| 4607 | + * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the |
|---|
| 4608 | + * allocation is the least bad option that doesn't lead to a security issue down the |
|---|
| 4609 | + * line (it will eventually be cleaned up during context termination). |
|---|
| 4610 | + * |
|---|
| 4611 | + * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region |
|---|
| 4612 | + * flags. |
|---|
| 4613 | + */ |
|---|
| 4614 | + kbase_gpu_vm_lock(kctx); |
|---|
| 4615 | + if (unlikely(atomic_read(®->no_user_free_count) > 1)) { |
|---|
| 4616 | + kbase_gpu_vm_unlock(kctx); |
|---|
| 4617 | + dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n"); |
|---|
| 4618 | + |
|---|
| 4619 | + mutex_lock(&kctx->jit_evict_lock); |
|---|
| 4620 | + list_move(®->jit_node, &kctx->jit_pool_head); |
|---|
| 4621 | + mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4622 | + |
|---|
| 4623 | + reg = NULL; |
|---|
| 4624 | + goto end; |
|---|
| 4625 | + } |
|---|
| 4626 | + |
|---|
| 4195 | 4627 | trace_mali_jit_alloc(reg, info->id); |
|---|
| 4196 | 4628 | |
|---|
| 4197 | 4629 | kctx->jit_current_allocations++; |
|---|
| .. | .. |
|---|
| 4209 | 4641 | kbase_jit_report_update_pressure(kctx, reg, info->va_pages, |
|---|
| 4210 | 4642 | KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); |
|---|
| 4211 | 4643 | #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ |
|---|
| 4644 | + kbase_gpu_vm_unlock(kctx); |
|---|
| 4212 | 4645 | |
|---|
| 4213 | 4646 | end: |
|---|
| 4214 | 4647 | for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) |
|---|
| .. | .. |
|---|
| 4220 | 4653 | void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) |
|---|
| 4221 | 4654 | { |
|---|
| 4222 | 4655 | u64 old_pages; |
|---|
| 4656 | + |
|---|
| 4657 | +#if !MALI_USE_CSF |
|---|
| 4658 | + lockdep_assert_held(&kctx->jctx.lock); |
|---|
| 4659 | +#else /* MALI_USE_CSF */ |
|---|
| 4660 | + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); |
|---|
| 4661 | +#endif /* !MALI_USE_CSF */ |
|---|
| 4223 | 4662 | |
|---|
| 4224 | 4663 | /* JIT id not immediately available here, so use 0u */ |
|---|
| 4225 | 4664 | trace_mali_jit_free(reg, 0u); |
|---|
| .. | .. |
|---|
| 4273 | 4712 | |
|---|
| 4274 | 4713 | list_move(®->jit_node, &kctx->jit_pool_head); |
|---|
| 4275 | 4714 | |
|---|
| 4715 | + /* Inactive JIT regions should be freed by the shrinker and not impacted |
|---|
| 4716 | + * by page migration. Once freed, they will enter into the page migration |
|---|
| 4717 | + * state machine via the mempools. |
|---|
| 4718 | + */ |
|---|
| 4719 | + if (kbase_page_migration_enabled) |
|---|
| 4720 | + kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); |
|---|
| 4276 | 4721 | mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4277 | 4722 | } |
|---|
| 4278 | 4723 | |
|---|
| .. | .. |
|---|
| 4319 | 4764 | mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4320 | 4765 | |
|---|
| 4321 | 4766 | if (reg) { |
|---|
| 4322 | | - reg->flags &= ~KBASE_REG_NO_USER_FREE; |
|---|
| 4767 | + /* |
|---|
| 4768 | + * Incrementing the refcount is prevented on JIT regions. |
|---|
| 4769 | + * If/when this ever changes we would need to compensate |
|---|
| 4770 | + * by implementing "free on putting the last reference", |
|---|
| 4771 | + * but only for JIT regions. |
|---|
| 4772 | + */ |
|---|
| 4773 | + WARN_ON(atomic_read(®->no_user_free_count) > 1); |
|---|
| 4774 | + kbase_va_region_no_user_free_dec(reg); |
|---|
| 4323 | 4775 | kbase_mem_free_region(kctx, reg); |
|---|
| 4324 | 4776 | } |
|---|
| 4325 | 4777 | |
|---|
| .. | .. |
|---|
| 4341 | 4793 | list_del(&walker->jit_node); |
|---|
| 4342 | 4794 | list_del_init(&walker->gpu_alloc->evict_node); |
|---|
| 4343 | 4795 | mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4344 | | - walker->flags &= ~KBASE_REG_NO_USER_FREE; |
|---|
| 4796 | + /* |
|---|
| 4797 | + * Incrementing the refcount is prevented on JIT regions. |
|---|
| 4798 | + * If/when this ever changes we would need to compensate |
|---|
| 4799 | + * by implementing "free on putting the last reference", |
|---|
| 4800 | + * but only for JIT regions. |
|---|
| 4801 | + */ |
|---|
| 4802 | + WARN_ON(atomic_read(&walker->no_user_free_count) > 1); |
|---|
| 4803 | + kbase_va_region_no_user_free_dec(walker); |
|---|
| 4345 | 4804 | kbase_mem_free_region(kctx, walker); |
|---|
| 4346 | 4805 | mutex_lock(&kctx->jit_evict_lock); |
|---|
| 4347 | 4806 | } |
|---|
| .. | .. |
|---|
| 4353 | 4812 | list_del(&walker->jit_node); |
|---|
| 4354 | 4813 | list_del_init(&walker->gpu_alloc->evict_node); |
|---|
| 4355 | 4814 | mutex_unlock(&kctx->jit_evict_lock); |
|---|
| 4356 | | - walker->flags &= ~KBASE_REG_NO_USER_FREE; |
|---|
| 4815 | + /* |
|---|
| 4816 | + * Incrementing the refcount is prevented on JIT regions. |
|---|
| 4817 | + * If/when this ever changes we would need to compensate |
|---|
| 4818 | + * by implementing "free on putting the last reference", |
|---|
| 4819 | + * but only for JIT regions. |
|---|
| 4820 | + */ |
|---|
| 4821 | + WARN_ON(atomic_read(&walker->no_user_free_count) > 1); |
|---|
| 4822 | + kbase_va_region_no_user_free_dec(walker); |
|---|
| 4357 | 4823 | kbase_mem_free_region(kctx, walker); |
|---|
| 4358 | 4824 | mutex_lock(&kctx->jit_evict_lock); |
|---|
| 4359 | 4825 | } |
|---|
| .. | .. |
|---|
| 4396 | 4862 | |
|---|
| 4397 | 4863 | addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; |
|---|
| 4398 | 4864 | |
|---|
| 4399 | | - ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, |
|---|
| 4400 | | - &mapping); |
|---|
| 4865 | + ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, |
|---|
| 4866 | + KBASE_REG_CPU_RD, &mapping); |
|---|
| 4401 | 4867 | if (!ptr) { |
|---|
| 4402 | 4868 | dev_warn(kctx->kbdev->dev, |
|---|
| 4403 | 4869 | "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", |
|---|
| .. | .. |
|---|
| 4455 | 4921 | } |
|---|
| 4456 | 4922 | #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ |
|---|
| 4457 | 4923 | |
|---|
| 4924 | +void kbase_unpin_user_buf_page(struct page *page) |
|---|
| 4925 | +{ |
|---|
| 4926 | +#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE |
|---|
| 4927 | + put_page(page); |
|---|
| 4928 | +#else |
|---|
| 4929 | + unpin_user_page(page); |
|---|
| 4930 | +#endif |
|---|
| 4931 | +} |
|---|
| 4932 | + |
|---|
| 4458 | 4933 | #if MALI_USE_CSF |
|---|
| 4459 | 4934 | static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) |
|---|
| 4460 | 4935 | { |
|---|
| 4461 | | - if (alloc->nents) { |
|---|
| 4936 | + /* In CSF builds, we keep pages pinned until the last reference is |
|---|
| 4937 | + * released on the alloc. A refcount of 0 also means we can be sure |
|---|
| 4938 | + * that all CPU mappings have been closed on this alloc, and no more |
|---|
| 4939 | + * mappings of it will be created. |
|---|
| 4940 | + * |
|---|
| 4941 | + * Further, the WARN() below captures the restriction that this |
|---|
| 4942 | + * function will not handle anything other than the alloc termination |
|---|
| 4943 | + * path, because the caller of kbase_mem_phy_alloc_put() is not |
|---|
| 4944 | + * required to hold the kctx's reg_lock, and so we could not handle |
|---|
| 4945 | + * removing an existing CPU mapping here. |
|---|
| 4946 | + * |
|---|
| 4947 | + * Refer to this function's kernel-doc comments for alternatives for |
|---|
| 4948 | + * unpinning a User buffer. |
|---|
| 4949 | + */ |
|---|
| 4950 | + |
|---|
| 4951 | + if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, |
|---|
| 4952 | + "must only be called on terminating an allocation")) { |
|---|
| 4462 | 4953 | struct page **pages = alloc->imported.user_buf.pages; |
|---|
| 4463 | 4954 | long i; |
|---|
| 4464 | 4955 | |
|---|
| 4465 | 4956 | WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages); |
|---|
| 4466 | 4957 | |
|---|
| 4467 | 4958 | for (i = 0; i < alloc->nents; i++) |
|---|
| 4468 | | - put_page(pages[i]); |
|---|
| 4959 | + kbase_unpin_user_buf_page(pages[i]); |
|---|
| 4960 | + |
|---|
| 4961 | + alloc->nents = 0; |
|---|
| 4469 | 4962 | } |
|---|
| 4470 | 4963 | } |
|---|
| 4471 | 4964 | #endif |
|---|
| .. | .. |
|---|
| 4479 | 4972 | struct mm_struct *mm = alloc->imported.user_buf.mm; |
|---|
| 4480 | 4973 | long pinned_pages; |
|---|
| 4481 | 4974 | long i; |
|---|
| 4975 | + int write; |
|---|
| 4976 | + |
|---|
| 4977 | + lockdep_assert_held(&kctx->reg_lock); |
|---|
| 4482 | 4978 | |
|---|
| 4483 | 4979 | if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) |
|---|
| 4484 | 4980 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 4493 | 4989 | if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) |
|---|
| 4494 | 4990 | return -EINVAL; |
|---|
| 4495 | 4991 | |
|---|
| 4496 | | -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE |
|---|
| 4497 | | - pinned_pages = get_user_pages(NULL, mm, |
|---|
| 4498 | | - address, |
|---|
| 4499 | | - alloc->imported.user_buf.nr_pages, |
|---|
| 4500 | | -#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ |
|---|
| 4501 | | -KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE |
|---|
| 4502 | | - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, |
|---|
| 4503 | | - pages, NULL); |
|---|
| 4992 | + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); |
|---|
| 4993 | + |
|---|
| 4994 | +#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE |
|---|
| 4995 | + pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, |
|---|
| 4996 | + write ? FOLL_WRITE : 0, pages, NULL); |
|---|
| 4997 | +#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE |
|---|
| 4998 | + pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, |
|---|
| 4999 | + write ? FOLL_WRITE : 0, pages, NULL, NULL); |
|---|
| 4504 | 5000 | #else |
|---|
| 4505 | | - reg->flags & KBASE_REG_GPU_WR, |
|---|
| 4506 | | - 0, pages, NULL); |
|---|
| 4507 | | -#endif |
|---|
| 4508 | | -#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE |
|---|
| 4509 | | - pinned_pages = get_user_pages_remote(NULL, mm, |
|---|
| 4510 | | - address, |
|---|
| 4511 | | - alloc->imported.user_buf.nr_pages, |
|---|
| 4512 | | - reg->flags & KBASE_REG_GPU_WR, |
|---|
| 4513 | | - 0, pages, NULL); |
|---|
| 4514 | | -#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE |
|---|
| 4515 | | - pinned_pages = get_user_pages_remote(NULL, mm, |
|---|
| 4516 | | - address, |
|---|
| 4517 | | - alloc->imported.user_buf.nr_pages, |
|---|
| 4518 | | - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, |
|---|
| 4519 | | - pages, NULL); |
|---|
| 4520 | | -#else |
|---|
| 4521 | | - pinned_pages = get_user_pages_remote(NULL, mm, |
|---|
| 4522 | | - address, |
|---|
| 4523 | | - alloc->imported.user_buf.nr_pages, |
|---|
| 4524 | | - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, |
|---|
| 4525 | | - pages, NULL, NULL); |
|---|
| 5001 | + pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages, |
|---|
| 5002 | + write ? FOLL_WRITE : 0, pages, NULL, NULL); |
|---|
| 4526 | 5003 | #endif |
|---|
| 4527 | 5004 | |
|---|
| 4528 | 5005 | if (pinned_pages <= 0) |
|---|
| 4529 | 5006 | return pinned_pages; |
|---|
| 4530 | 5007 | |
|---|
| 4531 | 5008 | if (pinned_pages != alloc->imported.user_buf.nr_pages) { |
|---|
| 5009 | + /* Above code already ensures there will not have been a CPU |
|---|
| 5010 | + * mapping by ensuring alloc->nents is 0 |
|---|
| 5011 | + */ |
|---|
| 4532 | 5012 | for (i = 0; i < pinned_pages; i++) |
|---|
| 4533 | | - put_page(pages[i]); |
|---|
| 5013 | + kbase_unpin_user_buf_page(pages[i]); |
|---|
| 4534 | 5014 | return -ENOMEM; |
|---|
| 4535 | 5015 | } |
|---|
| 4536 | 5016 | |
|---|
| .. | .. |
|---|
| 4542 | 5022 | static int kbase_jd_user_buf_map(struct kbase_context *kctx, |
|---|
| 4543 | 5023 | struct kbase_va_region *reg) |
|---|
| 4544 | 5024 | { |
|---|
| 4545 | | - long pinned_pages; |
|---|
| 5025 | + int err; |
|---|
| 5026 | + long pinned_pages = 0; |
|---|
| 4546 | 5027 | struct kbase_mem_phy_alloc *alloc; |
|---|
| 4547 | 5028 | struct page **pages; |
|---|
| 4548 | 5029 | struct tagged_addr *pa; |
|---|
| 4549 | | - long i; |
|---|
| 4550 | | - unsigned long address; |
|---|
| 5030 | + long i, dma_mapped_pages; |
|---|
| 4551 | 5031 | struct device *dev; |
|---|
| 4552 | | - unsigned long offset; |
|---|
| 4553 | | - unsigned long local_size; |
|---|
| 4554 | 5032 | unsigned long gwt_mask = ~0; |
|---|
| 4555 | | - int err = kbase_jd_user_buf_pin_pages(kctx, reg); |
|---|
| 5033 | + /* Calls to this function are inherently asynchronous, with respect to |
|---|
| 5034 | + * MMU operations. |
|---|
| 5035 | + */ |
|---|
| 5036 | + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; |
|---|
| 5037 | + |
|---|
| 5038 | + lockdep_assert_held(&kctx->reg_lock); |
|---|
| 5039 | + |
|---|
| 5040 | + err = kbase_jd_user_buf_pin_pages(kctx, reg); |
|---|
| 4556 | 5041 | |
|---|
| 4557 | 5042 | if (err) |
|---|
| 4558 | 5043 | return err; |
|---|
| 4559 | 5044 | |
|---|
| 4560 | 5045 | alloc = reg->gpu_alloc; |
|---|
| 4561 | 5046 | pa = kbase_get_gpu_phy_pages(reg); |
|---|
| 4562 | | - address = alloc->imported.user_buf.address; |
|---|
| 4563 | 5047 | pinned_pages = alloc->nents; |
|---|
| 4564 | 5048 | pages = alloc->imported.user_buf.pages; |
|---|
| 4565 | 5049 | dev = kctx->kbdev->dev; |
|---|
| 4566 | | - offset = address & ~PAGE_MASK; |
|---|
| 4567 | | - local_size = alloc->imported.user_buf.size; |
|---|
| 4568 | 5050 | |
|---|
| 5051 | + /* Manual CPU cache synchronization. |
|---|
| 5052 | + * |
|---|
| 5053 | + * The driver disables automatic CPU cache synchronization because the |
|---|
| 5054 | + * memory pages that enclose the imported region may also contain |
|---|
| 5055 | + * sub-regions which are not imported and that are allocated and used |
|---|
| 5056 | + * by the user process. This may be the case of memory at the beginning |
|---|
| 5057 | + * of the first page and at the end of the last page. Automatic CPU cache |
|---|
| 5058 | + * synchronization would force some operations on those memory allocations, |
|---|
| 5059 | + * unbeknown to the user process: in particular, a CPU cache invalidate |
|---|
| 5060 | + * upon unmapping would destroy the content of dirty CPU caches and cause |
|---|
| 5061 | + * the user process to lose CPU writes to the non-imported sub-regions. |
|---|
| 5062 | + * |
|---|
| 5063 | + * When the GPU claims ownership of the imported memory buffer, it shall |
|---|
| 5064 | + * commit CPU writes for the whole of all pages that enclose the imported |
|---|
| 5065 | + * region, otherwise the initial content of memory would be wrong. |
|---|
| 5066 | + */ |
|---|
| 4569 | 5067 | for (i = 0; i < pinned_pages; i++) { |
|---|
| 4570 | 5068 | dma_addr_t dma_addr; |
|---|
| 4571 | | - unsigned long min; |
|---|
| 4572 | | - |
|---|
| 4573 | | - min = MIN(PAGE_SIZE - offset, local_size); |
|---|
| 4574 | | - dma_addr = dma_map_page(dev, pages[i], |
|---|
| 4575 | | - offset, min, |
|---|
| 4576 | | - DMA_BIDIRECTIONAL); |
|---|
| 4577 | | - if (dma_mapping_error(dev, dma_addr)) |
|---|
| 5069 | +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) |
|---|
| 5070 | + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); |
|---|
| 5071 | +#else |
|---|
| 5072 | + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, |
|---|
| 5073 | + DMA_ATTR_SKIP_CPU_SYNC); |
|---|
| 5074 | +#endif |
|---|
| 5075 | + err = dma_mapping_error(dev, dma_addr); |
|---|
| 5076 | + if (err) |
|---|
| 4578 | 5077 | goto unwind; |
|---|
| 4579 | 5078 | |
|---|
| 4580 | 5079 | alloc->imported.user_buf.dma_addrs[i] = dma_addr; |
|---|
| 4581 | 5080 | pa[i] = as_tagged(page_to_phys(pages[i])); |
|---|
| 4582 | 5081 | |
|---|
| 4583 | | - local_size -= min; |
|---|
| 4584 | | - offset = 0; |
|---|
| 5082 | + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
|---|
| 4585 | 5083 | } |
|---|
| 4586 | 5084 | |
|---|
| 4587 | 5085 | #ifdef CONFIG_MALI_CINSTR_GWT |
|---|
| .. | .. |
|---|
| 4589 | 5087 | gwt_mask = ~KBASE_REG_GPU_WR; |
|---|
| 4590 | 5088 | #endif |
|---|
| 4591 | 5089 | |
|---|
| 4592 | | - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
|---|
| 4593 | | - pa, kbase_reg_current_backed_size(reg), |
|---|
| 4594 | | - reg->flags & gwt_mask, kctx->as_nr, |
|---|
| 4595 | | - alloc->group_id); |
|---|
| 5090 | + err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, |
|---|
| 5091 | + kbase_reg_current_backed_size(reg), |
|---|
| 5092 | + reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, |
|---|
| 5093 | + mmu_sync_info, NULL); |
|---|
| 4596 | 5094 | if (err == 0) |
|---|
| 4597 | 5095 | return 0; |
|---|
| 4598 | 5096 | |
|---|
| 4599 | 5097 | /* fall down */ |
|---|
| 4600 | 5098 | unwind: |
|---|
| 4601 | 5099 | alloc->nents = 0; |
|---|
| 4602 | | - while (i--) { |
|---|
| 4603 | | - dma_unmap_page(kctx->kbdev->dev, |
|---|
| 4604 | | - alloc->imported.user_buf.dma_addrs[i], |
|---|
| 4605 | | - PAGE_SIZE, DMA_BIDIRECTIONAL); |
|---|
| 5100 | + dma_mapped_pages = i; |
|---|
| 5101 | + /* Run the unmap loop in the same order as map loop, and perform again |
|---|
| 5102 | + * CPU cache synchronization to re-write the content of dirty CPU caches |
|---|
| 5103 | + * to memory. This is precautionary measure in case a GPU job has taken |
|---|
| 5104 | + * advantage of a partially GPU-mapped range to write and corrupt the |
|---|
| 5105 | + * content of memory, either inside or outside the imported region. |
|---|
| 5106 | + * |
|---|
| 5107 | + * Notice that this error recovery path doesn't try to be optimal and just |
|---|
| 5108 | + * flushes the entire page range. |
|---|
| 5109 | + */ |
|---|
| 5110 | + for (i = 0; i < dma_mapped_pages; i++) { |
|---|
| 5111 | + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; |
|---|
| 5112 | + |
|---|
| 5113 | + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
|---|
| 5114 | +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) |
|---|
| 5115 | + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); |
|---|
| 5116 | +#else |
|---|
| 5117 | + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, |
|---|
| 5118 | + DMA_ATTR_SKIP_CPU_SYNC); |
|---|
| 5119 | +#endif |
|---|
| 4606 | 5120 | } |
|---|
| 4607 | 5121 | |
|---|
| 4608 | | - while (++i < pinned_pages) { |
|---|
| 4609 | | - put_page(pages[i]); |
|---|
| 5122 | + /* The user buffer could already have been previously pinned before |
|---|
| 5123 | + * entering this function, and hence there could potentially be CPU |
|---|
| 5124 | + * mappings of it |
|---|
| 5125 | + */ |
|---|
| 5126 | + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); |
|---|
| 5127 | + |
|---|
| 5128 | + for (i = 0; i < pinned_pages; i++) { |
|---|
| 5129 | + kbase_unpin_user_buf_page(pages[i]); |
|---|
| 4610 | 5130 | pages[i] = NULL; |
|---|
| 4611 | 5131 | } |
|---|
| 4612 | 5132 | |
|---|
| .. | .. |
|---|
| 4617 | 5137 | * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT |
|---|
| 4618 | 5138 | * have a corresponding call to kbase_jd_user_buf_unpin_pages(). |
|---|
| 4619 | 5139 | */ |
|---|
| 4620 | | -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, |
|---|
| 4621 | | - struct kbase_mem_phy_alloc *alloc, bool writeable) |
|---|
| 5140 | +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, |
|---|
| 5141 | + struct kbase_va_region *reg, bool writeable) |
|---|
| 4622 | 5142 | { |
|---|
| 4623 | 5143 | long i; |
|---|
| 4624 | 5144 | struct page **pages; |
|---|
| 4625 | | - unsigned long size = alloc->imported.user_buf.size; |
|---|
| 5145 | + unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; |
|---|
| 5146 | + unsigned long remaining_size = alloc->imported.user_buf.size; |
|---|
| 5147 | + |
|---|
| 5148 | + lockdep_assert_held(&kctx->reg_lock); |
|---|
| 4626 | 5149 | |
|---|
| 4627 | 5150 | KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); |
|---|
| 4628 | 5151 | pages = alloc->imported.user_buf.pages; |
|---|
| 5152 | + |
|---|
| 5153 | +#if !MALI_USE_CSF |
|---|
| 5154 | + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); |
|---|
| 5155 | +#else |
|---|
| 5156 | + CSTD_UNUSED(reg); |
|---|
| 5157 | +#endif |
|---|
| 5158 | + |
|---|
| 4629 | 5159 | for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { |
|---|
| 4630 | | - unsigned long local_size; |
|---|
| 5160 | + unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page); |
|---|
| 5161 | + /* Notice: this is a temporary variable that is used for DMA sync |
|---|
| 5162 | + * operations, and that could be incremented by an offset if the |
|---|
| 5163 | + * current page contains both imported and non-imported memory |
|---|
| 5164 | + * sub-regions. |
|---|
| 5165 | + * |
|---|
| 5166 | + * It is valid to add an offset to this value, because the offset |
|---|
| 5167 | + * is always kept within the physically contiguous dma-mapped range |
|---|
| 5168 | + * and there's no need to translate to physical address to offset it. |
|---|
| 5169 | + * |
|---|
| 5170 | + * This variable is not going to be used for the actual DMA unmap |
|---|
| 5171 | + * operation, that shall always use the original DMA address of the |
|---|
| 5172 | + * whole memory page. |
|---|
| 5173 | + */ |
|---|
| 4631 | 5174 | dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; |
|---|
| 4632 | 5175 | |
|---|
| 4633 | | - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); |
|---|
| 4634 | | - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, |
|---|
| 4635 | | - DMA_BIDIRECTIONAL); |
|---|
| 5176 | + /* Manual CPU cache synchronization. |
|---|
| 5177 | + * |
|---|
| 5178 | + * When the GPU returns ownership of the buffer to the CPU, the driver |
|---|
| 5179 | + * needs to treat imported and non-imported memory differently. |
|---|
| 5180 | + * |
|---|
| 5181 | + * The first case to consider is non-imported sub-regions at the |
|---|
| 5182 | + * beginning of the first page and at the end of last page. For these |
|---|
| 5183 | + * sub-regions: CPU cache shall be committed with a clean+invalidate, |
|---|
| 5184 | + * in order to keep the last CPU write. |
|---|
| 5185 | + * |
|---|
| 5186 | + * Imported region prefers the opposite treatment: this memory has been |
|---|
| 5187 | + * legitimately mapped and used by the GPU, hence GPU writes shall be |
|---|
| 5188 | + * committed to memory, while CPU cache shall be invalidated to make |
|---|
| 5189 | + * sure that CPU reads the correct memory content. |
|---|
| 5190 | + * |
|---|
| 5191 | + * The following diagram shows the expect value of the variables |
|---|
| 5192 | + * used in this loop in the corner case of an imported region encloed |
|---|
| 5193 | + * by a single memory page: |
|---|
| 5194 | + * |
|---|
| 5195 | + * page boundary ->|---------- | <- dma_addr (initial value) |
|---|
| 5196 | + * | | |
|---|
| 5197 | + * | - - - - - | <- offset_within_page |
|---|
| 5198 | + * |XXXXXXXXXXX|\ |
|---|
| 5199 | + * |XXXXXXXXXXX| \ |
|---|
| 5200 | + * |XXXXXXXXXXX| }- imported_size |
|---|
| 5201 | + * |XXXXXXXXXXX| / |
|---|
| 5202 | + * |XXXXXXXXXXX|/ |
|---|
| 5203 | + * | - - - - - | <- offset_within_page + imported_size |
|---|
| 5204 | + * | |\ |
|---|
| 5205 | + * | | }- PAGE_SIZE - imported_size - offset_within_page |
|---|
| 5206 | + * | |/ |
|---|
| 5207 | + * page boundary ->|-----------| |
|---|
| 5208 | + * |
|---|
| 5209 | + * If the imported region is enclosed by more than one page, then |
|---|
| 5210 | + * offset_within_page = 0 for any page after the first. |
|---|
| 5211 | + */ |
|---|
| 5212 | + |
|---|
| 5213 | + /* Only for first page: handle non-imported range at the beginning. */ |
|---|
| 5214 | + if (offset_within_page > 0) { |
|---|
| 5215 | + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, |
|---|
| 5216 | + DMA_BIDIRECTIONAL); |
|---|
| 5217 | + dma_addr += offset_within_page; |
|---|
| 5218 | + } |
|---|
| 5219 | + |
|---|
| 5220 | + /* For every page: handle imported range. */ |
|---|
| 5221 | + if (imported_size > 0) |
|---|
| 5222 | + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, |
|---|
| 5223 | + DMA_BIDIRECTIONAL); |
|---|
| 5224 | + |
|---|
| 5225 | + /* Only for last page (that may coincide with first page): |
|---|
| 5226 | + * handle non-imported range at the end. |
|---|
| 5227 | + */ |
|---|
| 5228 | + if ((imported_size + offset_within_page) < PAGE_SIZE) { |
|---|
| 5229 | + dma_addr += imported_size; |
|---|
| 5230 | + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, |
|---|
| 5231 | + PAGE_SIZE - imported_size - offset_within_page, |
|---|
| 5232 | + DMA_BIDIRECTIONAL); |
|---|
| 5233 | + } |
|---|
| 5234 | + |
|---|
| 5235 | + /* Notice: use the original DMA address to unmap the whole memory page. */ |
|---|
| 5236 | +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) |
|---|
| 5237 | + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, |
|---|
| 5238 | + DMA_BIDIRECTIONAL); |
|---|
| 5239 | +#else |
|---|
| 5240 | + dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], |
|---|
| 5241 | + PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); |
|---|
| 5242 | +#endif |
|---|
| 4636 | 5243 | if (writeable) |
|---|
| 4637 | 5244 | set_page_dirty_lock(pages[i]); |
|---|
| 4638 | 5245 | #if !MALI_USE_CSF |
|---|
| 4639 | | - put_page(pages[i]); |
|---|
| 5246 | + kbase_unpin_user_buf_page(pages[i]); |
|---|
| 4640 | 5247 | pages[i] = NULL; |
|---|
| 4641 | 5248 | #endif |
|---|
| 4642 | 5249 | |
|---|
| 4643 | | - size -= local_size; |
|---|
| 5250 | + remaining_size -= imported_size; |
|---|
| 5251 | + offset_within_page = 0; |
|---|
| 4644 | 5252 | } |
|---|
| 4645 | 5253 | #if !MALI_USE_CSF |
|---|
| 4646 | 5254 | alloc->nents = 0; |
|---|
| .. | .. |
|---|
| 4687 | 5295 | return 0; |
|---|
| 4688 | 5296 | } |
|---|
| 4689 | 5297 | |
|---|
| 4690 | | -struct kbase_mem_phy_alloc *kbase_map_external_resource( |
|---|
| 4691 | | - struct kbase_context *kctx, struct kbase_va_region *reg, |
|---|
| 4692 | | - struct mm_struct *locked_mm) |
|---|
| 5298 | +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, |
|---|
| 5299 | + struct mm_struct *locked_mm) |
|---|
| 4693 | 5300 | { |
|---|
| 4694 | | - int err; |
|---|
| 5301 | + int err = 0; |
|---|
| 5302 | + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; |
|---|
| 4695 | 5303 | |
|---|
| 4696 | 5304 | lockdep_assert_held(&kctx->reg_lock); |
|---|
| 4697 | 5305 | |
|---|
| .. | .. |
|---|
| 4700 | 5308 | case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { |
|---|
| 4701 | 5309 | if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && |
|---|
| 4702 | 5310 | (!reg->gpu_alloc->nents)) |
|---|
| 4703 | | - goto exit; |
|---|
| 5311 | + return -EINVAL; |
|---|
| 4704 | 5312 | |
|---|
| 4705 | 5313 | reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; |
|---|
| 4706 | 5314 | if (reg->gpu_alloc->imported.user_buf |
|---|
| .. | .. |
|---|
| 4708 | 5316 | err = kbase_jd_user_buf_map(kctx, reg); |
|---|
| 4709 | 5317 | if (err) { |
|---|
| 4710 | 5318 | reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; |
|---|
| 4711 | | - goto exit; |
|---|
| 5319 | + return err; |
|---|
| 4712 | 5320 | } |
|---|
| 4713 | 5321 | } |
|---|
| 4714 | 5322 | } |
|---|
| .. | .. |
|---|
| 4716 | 5324 | case KBASE_MEM_TYPE_IMPORTED_UMM: { |
|---|
| 4717 | 5325 | err = kbase_mem_umm_map(kctx, reg); |
|---|
| 4718 | 5326 | if (err) |
|---|
| 4719 | | - goto exit; |
|---|
| 5327 | + return err; |
|---|
| 4720 | 5328 | break; |
|---|
| 4721 | 5329 | } |
|---|
| 4722 | 5330 | default: |
|---|
| 4723 | | - goto exit; |
|---|
| 5331 | + dev_dbg(kctx->kbdev->dev, |
|---|
| 5332 | + "Invalid external resource GPU allocation type (%x) on mapping", |
|---|
| 5333 | + alloc->type); |
|---|
| 5334 | + return -EINVAL; |
|---|
| 4724 | 5335 | } |
|---|
| 4725 | 5336 | |
|---|
| 4726 | | - return kbase_mem_phy_alloc_get(reg->gpu_alloc); |
|---|
| 4727 | | -exit: |
|---|
| 4728 | | - return NULL; |
|---|
| 5337 | + kbase_va_region_alloc_get(kctx, reg); |
|---|
| 5338 | + kbase_mem_phy_alloc_get(alloc); |
|---|
| 5339 | + return err; |
|---|
| 4729 | 5340 | } |
|---|
| 4730 | 5341 | |
|---|
| 4731 | | -void kbase_unmap_external_resource(struct kbase_context *kctx, |
|---|
| 4732 | | - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) |
|---|
| 5342 | +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) |
|---|
| 4733 | 5343 | { |
|---|
| 5344 | + /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the |
|---|
| 5345 | + * unmapping operation. |
|---|
| 5346 | + */ |
|---|
| 5347 | + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; |
|---|
| 5348 | + |
|---|
| 5349 | + lockdep_assert_held(&kctx->reg_lock); |
|---|
| 5350 | + |
|---|
| 4734 | 5351 | switch (alloc->type) { |
|---|
| 4735 | 5352 | case KBASE_MEM_TYPE_IMPORTED_UMM: { |
|---|
| 4736 | 5353 | kbase_mem_umm_unmap(kctx, reg, alloc); |
|---|
| .. | .. |
|---|
| 4742 | 5359 | if (alloc->imported.user_buf.current_mapping_usage_count == 0) { |
|---|
| 4743 | 5360 | bool writeable = true; |
|---|
| 4744 | 5361 | |
|---|
| 4745 | | - if (!kbase_is_region_invalid_or_free(reg) && |
|---|
| 4746 | | - reg->gpu_alloc == alloc) |
|---|
| 4747 | | - kbase_mmu_teardown_pages( |
|---|
| 4748 | | - kctx->kbdev, |
|---|
| 4749 | | - &kctx->mmu, |
|---|
| 4750 | | - reg->start_pfn, |
|---|
| 4751 | | - kbase_reg_current_backed_size(reg), |
|---|
| 4752 | | - kctx->as_nr); |
|---|
| 5362 | + if (!kbase_is_region_invalid_or_free(reg)) { |
|---|
| 5363 | + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, |
|---|
| 5364 | + alloc->pages, |
|---|
| 5365 | + kbase_reg_current_backed_size(reg), |
|---|
| 5366 | + kbase_reg_current_backed_size(reg), |
|---|
| 5367 | + kctx->as_nr, true); |
|---|
| 5368 | + } |
|---|
| 4753 | 5369 | |
|---|
| 4754 | | - if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) |
|---|
| 5370 | + if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) |
|---|
| 4755 | 5371 | writeable = false; |
|---|
| 4756 | 5372 | |
|---|
| 4757 | | - kbase_jd_user_buf_unmap(kctx, alloc, writeable); |
|---|
| 5373 | + kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); |
|---|
| 4758 | 5374 | } |
|---|
| 4759 | | - } |
|---|
| 5375 | + } |
|---|
| 4760 | 5376 | break; |
|---|
| 4761 | 5377 | default: |
|---|
| 4762 | | - break; |
|---|
| 5378 | + WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", |
|---|
| 5379 | + alloc->type); |
|---|
| 5380 | + return; |
|---|
| 4763 | 5381 | } |
|---|
| 4764 | 5382 | kbase_mem_phy_alloc_put(alloc); |
|---|
| 5383 | + kbase_va_region_alloc_put(kctx, reg); |
|---|
| 5384 | +} |
|---|
| 5385 | + |
|---|
| 5386 | +static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) |
|---|
| 5387 | +{ |
|---|
| 5388 | + return reg->start_pfn << PAGE_SHIFT; |
|---|
| 4765 | 5389 | } |
|---|
| 4766 | 5390 | |
|---|
| 4767 | 5391 | struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( |
|---|
| .. | .. |
|---|
| 4777 | 5401 | * metadata which matches the region which is being acquired. |
|---|
| 4778 | 5402 | */ |
|---|
| 4779 | 5403 | list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { |
|---|
| 4780 | | - if (walker->gpu_addr == gpu_addr) { |
|---|
| 5404 | + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { |
|---|
| 4781 | 5405 | meta = walker; |
|---|
| 4782 | 5406 | meta->ref++; |
|---|
| 4783 | 5407 | break; |
|---|
| .. | .. |
|---|
| 4789 | 5413 | struct kbase_va_region *reg; |
|---|
| 4790 | 5414 | |
|---|
| 4791 | 5415 | /* Find the region */ |
|---|
| 4792 | | - reg = kbase_region_tracker_find_region_enclosing_address( |
|---|
| 4793 | | - kctx, gpu_addr); |
|---|
| 5416 | + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); |
|---|
| 4794 | 5417 | if (kbase_is_region_invalid_or_free(reg)) |
|---|
| 4795 | 5418 | goto failed; |
|---|
| 4796 | 5419 | |
|---|
| .. | .. |
|---|
| 4798 | 5421 | meta = kzalloc(sizeof(*meta), GFP_KERNEL); |
|---|
| 4799 | 5422 | if (!meta) |
|---|
| 4800 | 5423 | goto failed; |
|---|
| 4801 | | - |
|---|
| 4802 | 5424 | /* |
|---|
| 4803 | 5425 | * Fill in the metadata object and acquire a reference |
|---|
| 4804 | 5426 | * for the physical resource. |
|---|
| 4805 | 5427 | */ |
|---|
| 4806 | | - meta->alloc = kbase_map_external_resource(kctx, reg, NULL); |
|---|
| 4807 | | - meta->ref = 1; |
|---|
| 5428 | + meta->reg = reg; |
|---|
| 4808 | 5429 | |
|---|
| 4809 | | - if (!meta->alloc) |
|---|
| 5430 | + /* Map the external resource to the GPU allocation of the region |
|---|
| 5431 | + * and acquire the reference to the VA region |
|---|
| 5432 | + */ |
|---|
| 5433 | + if (kbase_map_external_resource(kctx, meta->reg, NULL)) |
|---|
| 4810 | 5434 | goto fail_map; |
|---|
| 4811 | | - |
|---|
| 4812 | | - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; |
|---|
| 5435 | + meta->ref = 1; |
|---|
| 4813 | 5436 | |
|---|
| 4814 | 5437 | list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); |
|---|
| 4815 | 5438 | } |
|---|
| .. | .. |
|---|
| 4834 | 5457 | * metadata which matches the region which is being released. |
|---|
| 4835 | 5458 | */ |
|---|
| 4836 | 5459 | list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) |
|---|
| 4837 | | - if (walker->gpu_addr == gpu_addr) |
|---|
| 5460 | + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) |
|---|
| 4838 | 5461 | return walker; |
|---|
| 4839 | 5462 | |
|---|
| 4840 | 5463 | return NULL; |
|---|
| .. | .. |
|---|
| 4843 | 5466 | static void release_sticky_resource_meta(struct kbase_context *kctx, |
|---|
| 4844 | 5467 | struct kbase_ctx_ext_res_meta *meta) |
|---|
| 4845 | 5468 | { |
|---|
| 4846 | | - struct kbase_va_region *reg; |
|---|
| 4847 | | - |
|---|
| 4848 | | - /* Drop the physical memory reference and free the metadata. */ |
|---|
| 4849 | | - reg = kbase_region_tracker_find_region_enclosing_address( |
|---|
| 4850 | | - kctx, |
|---|
| 4851 | | - meta->gpu_addr); |
|---|
| 4852 | | - |
|---|
| 4853 | | - kbase_unmap_external_resource(kctx, reg, meta->alloc); |
|---|
| 5469 | + kbase_unmap_external_resource(kctx, meta->reg); |
|---|
| 4854 | 5470 | list_del(&meta->ext_res_node); |
|---|
| 4855 | 5471 | kfree(meta); |
|---|
| 4856 | 5472 | } |
|---|