forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-01-31 f70575805708cabdedea7498aaa3f710fde4d920
kernel/drivers/gpu/arm/bifrost/mali_kbase_mem.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -20,7 +20,7 @@
2020 */
2121
2222 /**
23
- * Base kernel memory APIs
23
+ * DOC: Base kernel memory APIs
2424 */
2525 #include <linux/dma-buf.h>
2626 #include <linux/kernel.h>
....@@ -44,6 +44,11 @@
4444 #include <mali_kbase_config_defaults.h>
4545 #include <mali_kbase_trace_gpu_mem.h>
4646
47
+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
48
+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
49
+
50
+#if MALI_JIT_PRESSURE_LIMIT_BASE
51
+
4752 /*
4853 * Alignment of objects allocated by the GPU inside a just-in-time memory
4954 * region whose size is given by an end address
....@@ -66,6 +71,7 @@
6671 */
6772 #define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u)
6873
74
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
6975
7076 /* Forward declarations */
7177 static void free_partial_locked(struct kbase_context *kctx,
....@@ -89,10 +95,8 @@
8995 #error "Unknown CPU VA width for this architecture"
9096 #endif
9197
92
-#if IS_ENABLED(CONFIG_64BIT)
93
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
98
+ if (kbase_ctx_compat_mode(kctx))
9499 cpu_va_bits = 32;
95
-#endif
96100
97101 return cpu_va_bits;
98102 }
....@@ -104,29 +108,37 @@
104108 u64 gpu_pfn)
105109 {
106110 struct rb_root *rbtree = NULL;
107
- struct kbase_reg_zone *exec_va_zone =
108
- kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
109111
110
- /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA
111
- * zone if this has been initialized.
112
- */
112
+ struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
113
+
114
+#if MALI_USE_CSF
115
+ struct kbase_reg_zone *fixed_va_zone =
116
+ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA);
117
+
118
+ struct kbase_reg_zone *exec_fixed_va_zone =
119
+ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA);
120
+
121
+ if (gpu_pfn >= fixed_va_zone->base_pfn) {
122
+ rbtree = &kctx->reg_rbtree_fixed;
123
+ return rbtree;
124
+ } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) {
125
+ rbtree = &kctx->reg_rbtree_exec_fixed;
126
+ return rbtree;
127
+ }
128
+#endif
113129 if (gpu_pfn >= exec_va_zone->base_pfn)
114130 rbtree = &kctx->reg_rbtree_exec;
115131 else {
116132 u64 same_va_end;
117133
118
-#if IS_ENABLED(CONFIG_64BIT)
119
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
120
-#endif /* CONFIG_64BIT */
134
+ if (kbase_ctx_compat_mode(kctx)) {
121135 same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
122
-#if IS_ENABLED(CONFIG_64BIT)
123136 } else {
124137 struct kbase_reg_zone *same_va_zone =
125138 kbase_ctx_reg_zone_get(kctx,
126139 KBASE_REG_ZONE_SAME_VA);
127140 same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
128141 }
129
-#endif /* CONFIG_64BIT */
130142
131143 if (gpu_pfn >= same_va_end)
132144 rbtree = &kctx->reg_rbtree_custom;
....@@ -350,7 +362,9 @@
350362 }
351363
352364 /**
353
- * Remove a region object from the global list.
365
+ * kbase_remove_va_region - Remove a region object from the global list.
366
+ *
367
+ * @kbdev: The kbase device
354368 * @reg: Region object to remove
355369 *
356370 * The region reg is removed, possibly by merging with other free and
....@@ -358,19 +372,23 @@
358372 * region lock held. The associated memory is not released (see
359373 * kbase_free_alloced_region). Internal use only.
360374 */
361
-int kbase_remove_va_region(struct kbase_va_region *reg)
375
+void kbase_remove_va_region(struct kbase_device *kbdev,
376
+ struct kbase_va_region *reg)
362377 {
363378 struct rb_node *rbprev;
364379 struct kbase_va_region *prev = NULL;
365380 struct rb_node *rbnext;
366381 struct kbase_va_region *next = NULL;
367382 struct rb_root *reg_rbtree = NULL;
383
+ struct kbase_va_region *orig_reg = reg;
368384
369385 int merged_front = 0;
370386 int merged_back = 0;
371
- int err = 0;
372387
373388 reg_rbtree = reg->rbtree;
389
+
390
+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
391
+ return;
374392
375393 /* Try to merge with the previous block first */
376394 rbprev = rb_prev(&(reg->rblink));
....@@ -378,10 +396,14 @@
378396 prev = rb_entry(rbprev, struct kbase_va_region, rblink);
379397 if (prev->flags & KBASE_REG_FREE) {
380398 /* We're compatible with the previous VMA, merge with
381
- * it
399
+ * it, handling any gaps for robustness.
382400 */
401
+ u64 prev_end_pfn = prev->start_pfn + prev->nr_pages;
402
+
383403 WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
384404 (reg->flags & KBASE_REG_ZONE_MASK));
405
+ if (!WARN_ON(reg->start_pfn < prev_end_pfn))
406
+ prev->nr_pages += reg->start_pfn - prev_end_pfn;
385407 prev->nr_pages += reg->nr_pages;
386408 rb_erase(&(reg->rblink), reg_rbtree);
387409 reg = prev;
....@@ -393,42 +415,76 @@
393415 /* Note we do the lookup here as the tree may have been rebalanced. */
394416 rbnext = rb_next(&(reg->rblink));
395417 if (rbnext) {
396
- /* We're compatible with the next VMA, merge with it */
397418 next = rb_entry(rbnext, struct kbase_va_region, rblink);
398419 if (next->flags & KBASE_REG_FREE) {
420
+ /* We're compatible with the next VMA, merge with it,
421
+ * handling any gaps for robustness.
422
+ */
423
+ u64 reg_end_pfn = reg->start_pfn + reg->nr_pages;
424
+
399425 WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
400426 (reg->flags & KBASE_REG_ZONE_MASK));
427
+ if (!WARN_ON(next->start_pfn < reg_end_pfn))
428
+ next->nr_pages += next->start_pfn - reg_end_pfn;
401429 next->start_pfn = reg->start_pfn;
402430 next->nr_pages += reg->nr_pages;
403431 rb_erase(&(reg->rblink), reg_rbtree);
404432 merged_back = 1;
405
- if (merged_front) {
406
- /* We already merged with prev, free it */
407
- kfree(reg);
408
- }
409433 }
410434 }
411435
412
- /* If we failed to merge then we need to add a new block */
413
- if (!(merged_front || merged_back)) {
436
+ if (merged_front && merged_back) {
437
+ /* We already merged with prev, free it */
438
+ kfree(reg);
439
+ } else if (!(merged_front || merged_back)) {
440
+ /* If we failed to merge then we need to add a new block */
441
+
414442 /*
415
- * We didn't merge anything. Add a new free
416
- * placeholder and remove the original one.
443
+ * We didn't merge anything. Try to add a new free
444
+ * placeholder, and in any case, remove the original one.
417445 */
418446 struct kbase_va_region *free_reg;
419447
420
- free_reg = kbase_alloc_free_region(reg_rbtree,
421
- reg->start_pfn, reg->nr_pages,
422
- reg->flags & KBASE_REG_ZONE_MASK);
448
+ free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
449
+ reg->flags & KBASE_REG_ZONE_MASK);
423450 if (!free_reg) {
424
- err = -ENOMEM;
451
+ /* In case of failure, we cannot allocate a replacement
452
+ * free region, so we will be left with a 'gap' in the
453
+ * region tracker's address range (though, the rbtree
454
+ * will itself still be correct after erasing
455
+ * 'reg').
456
+ *
457
+ * The gap will be rectified when an adjacent region is
458
+ * removed by one of the above merging paths. Other
459
+ * paths will gracefully fail to allocate if they try
460
+ * to allocate in the gap.
461
+ *
462
+ * There is nothing that the caller can do, since free
463
+ * paths must not fail. The existing 'reg' cannot be
464
+ * repurposed as the free region as callers must have
465
+ * freedom of use with it by virtue of it being owned
466
+ * by them, not the region tracker insert/remove code.
467
+ */
468
+ dev_warn(
469
+ kbdev->dev,
470
+ "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx",
471
+ (unsigned long long)reg->start_pfn << PAGE_SHIFT,
472
+ (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT);
473
+ rb_erase(&(reg->rblink), reg_rbtree);
474
+
425475 goto out;
426476 }
427477 rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
428478 }
429479
430
- out:
431
- return err;
480
+ /* This operation is always safe because the function never frees
481
+ * the region. If the region has been merged to both front and back,
482
+ * then it's the previous region that is supposed to be freed.
483
+ */
484
+ orig_reg->start_pfn = 0;
485
+
486
+out:
487
+ return;
432488 }
433489
434490 KBASE_EXPORT_TEST_API(kbase_remove_va_region);
....@@ -437,13 +493,18 @@
437493 * kbase_insert_va_region_nolock - Insert a VA region to the list,
438494 * replacing the existing one.
439495 *
496
+ * @kbdev: The kbase device
440497 * @new_reg: The new region to insert
441498 * @at_reg: The region to replace
442499 * @start_pfn: The Page Frame Number to insert at
443500 * @nr_pages: The number of pages of the region
501
+ *
502
+ * Return: 0 on success, error code otherwise.
444503 */
445
-static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
446
- struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
504
+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
505
+ struct kbase_va_region *new_reg,
506
+ struct kbase_va_region *at_reg, u64 start_pfn,
507
+ size_t nr_pages)
447508 {
448509 struct rb_root *reg_rbtree = NULL;
449510 int err = 0;
....@@ -456,6 +517,9 @@
456517 KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
457518 /* at least nr_pages from start_pfn should be contained within at_reg */
458519 KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
520
+ /* having at_reg means the rb_tree should not be empty */
521
+ if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
522
+ return -ENOMEM;
459523
460524 new_reg->start_pfn = start_pfn;
461525 new_reg->nr_pages = nr_pages;
....@@ -484,10 +548,9 @@
484548 else {
485549 struct kbase_va_region *new_front_reg;
486550
487
- new_front_reg = kbase_alloc_free_region(reg_rbtree,
488
- at_reg->start_pfn,
489
- start_pfn - at_reg->start_pfn,
490
- at_reg->flags & KBASE_REG_ZONE_MASK);
551
+ new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
552
+ start_pfn - at_reg->start_pfn,
553
+ at_reg->flags & KBASE_REG_ZONE_MASK);
491554
492555 if (new_front_reg) {
493556 at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
....@@ -511,6 +574,8 @@
511574 * @addr: the address to insert the region at
512575 * @nr_pages: the number of pages in the region
513576 * @align: the minimum alignment in pages
577
+ *
578
+ * Return: 0 on success, error code otherwise.
514579 */
515580 int kbase_add_va_region(struct kbase_context *kctx,
516581 struct kbase_va_region *reg, u64 addr,
....@@ -527,12 +592,19 @@
527592
528593 lockdep_assert_held(&kctx->reg_lock);
529594
530
- /* The executable allocation from the SAME_VA zone would already have an
595
+ /* The executable allocation from the SAME_VA zone should already have an
531596 * appropriately aligned GPU VA chosen for it.
532
- * Also the executable allocation from EXEC_VA zone doesn't need the
533
- * special alignment.
597
+ * Also, executable allocations from EXEC_VA don't need the special
598
+ * alignment.
534599 */
600
+#if MALI_USE_CSF
601
+ /* The same is also true for the EXEC_FIXED_VA zone.
602
+ */
603
+#endif
535604 if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
605
+#if MALI_USE_CSF
606
+ ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) &&
607
+#endif
536608 ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
537609 if (cpu_va_bits > gpu_pc_bits) {
538610 align = max(align, (size_t)((1ULL << gpu_pc_bits)
....@@ -564,15 +636,17 @@
564636 /**
565637 * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
566638 *
567
- * Insert a region into the rbtree that was specified when the region was
568
- * created. If addr is 0 a free area in the rbtree is used, otherwise the
569
- * specified address is used.
570
- *
571639 * @kbdev: The kbase device
572640 * @reg: The region to add
573641 * @addr: The address to add the region at, or 0 to map at any available address
574642 * @nr_pages: The size of the region in pages
575643 * @align: The minimum alignment in pages
644
+ *
645
+ * Insert a region into the rbtree that was specified when the region was
646
+ * created. If addr is 0 a free area in the rbtree is used, otherwise the
647
+ * specified address is used.
648
+ *
649
+ * Return: 0 on success, error code otherwise.
576650 */
577651 int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
578652 struct kbase_va_region *reg,
....@@ -613,8 +687,7 @@
613687 goto exit;
614688 }
615689
616
- err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
617
- nr_pages);
690
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
618691 if (err) {
619692 dev_warn(dev, "Failed to insert va region");
620693 err = -ENOMEM;
....@@ -639,8 +712,7 @@
639712 nr_pages, align_offset, align_mask,
640713 &start_pfn);
641714 if (tmp) {
642
- err = kbase_insert_va_region_nolock(reg, tmp,
643
- start_pfn, nr_pages);
715
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
644716 if (unlikely(err)) {
645717 dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
646718 start_pfn, nr_pages);
....@@ -659,6 +731,59 @@
659731 /*
660732 * @brief Initialize the internal region tracker data structure.
661733 */
734
+#if MALI_USE_CSF
735
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
736
+ struct kbase_va_region *same_va_reg,
737
+ struct kbase_va_region *custom_va_reg,
738
+ struct kbase_va_region *exec_va_reg,
739
+ struct kbase_va_region *exec_fixed_va_reg,
740
+ struct kbase_va_region *fixed_va_reg)
741
+{
742
+ u64 last_zone_end_pfn;
743
+
744
+ kctx->reg_rbtree_same = RB_ROOT;
745
+ kbase_region_tracker_insert(same_va_reg);
746
+
747
+ last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages;
748
+
749
+ /* Although custom_va_reg doesn't always exist, initialize
750
+ * unconditionally because of the mem_view debugfs
751
+ * implementation which relies on it being empty.
752
+ */
753
+ kctx->reg_rbtree_custom = RB_ROOT;
754
+ kctx->reg_rbtree_exec = RB_ROOT;
755
+
756
+ if (custom_va_reg) {
757
+ WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn);
758
+ kbase_region_tracker_insert(custom_va_reg);
759
+ last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages;
760
+ }
761
+
762
+ /* Initialize exec, fixed and exec_fixed. These are always
763
+ * initialized at this stage, if they will exist at all.
764
+ */
765
+ kctx->reg_rbtree_fixed = RB_ROOT;
766
+ kctx->reg_rbtree_exec_fixed = RB_ROOT;
767
+
768
+ if (exec_va_reg) {
769
+ WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn);
770
+ kbase_region_tracker_insert(exec_va_reg);
771
+ last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages;
772
+ }
773
+
774
+ if (exec_fixed_va_reg) {
775
+ WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn);
776
+ kbase_region_tracker_insert(exec_fixed_va_reg);
777
+ last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages;
778
+ }
779
+
780
+ if (fixed_va_reg) {
781
+ WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn);
782
+ kbase_region_tracker_insert(fixed_va_reg);
783
+ last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages;
784
+ }
785
+}
786
+#else
662787 static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
663788 struct kbase_va_region *same_va_reg,
664789 struct kbase_va_region *custom_va_reg)
....@@ -679,6 +804,41 @@
679804 if (custom_va_reg)
680805 kbase_region_tracker_insert(custom_va_reg);
681806 }
807
+#endif /* MALI_USE_CSF */
808
+
809
+static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
810
+{
811
+ struct kbase_context *kctx = NULL;
812
+ struct rb_root *rbtree = reg->rbtree;
813
+
814
+ switch (reg->flags & KBASE_REG_ZONE_MASK) {
815
+ case KBASE_REG_ZONE_CUSTOM_VA:
816
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
817
+ break;
818
+ case KBASE_REG_ZONE_SAME_VA:
819
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
820
+ break;
821
+ case KBASE_REG_ZONE_EXEC_VA:
822
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
823
+ break;
824
+#if MALI_USE_CSF
825
+ case KBASE_REG_ZONE_EXEC_FIXED_VA:
826
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
827
+ break;
828
+ case KBASE_REG_ZONE_FIXED_VA:
829
+ kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
830
+ break;
831
+ case KBASE_REG_ZONE_MCU_SHARED:
832
+ /* This is only expected to be called on driver unload. */
833
+ break;
834
+#endif
835
+ default:
836
+ WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
837
+ break;
838
+ }
839
+
840
+ return kctx;
841
+}
682842
683843 static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
684844 {
....@@ -690,7 +850,9 @@
690850 if (rbnode) {
691851 rb_erase(rbnode, rbtree);
692852 reg = rb_entry(rbnode, struct kbase_va_region, rblink);
693
- WARN_ON(reg->va_refcnt != 1);
853
+ WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
854
+ if (kbase_page_migration_enabled)
855
+ kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
694856 /* Reset the start_pfn - as the rbtree is being
695857 * destroyed and we've already erased this region, there
696858 * is no further need to attempt to remove it.
....@@ -707,12 +869,19 @@
707869
708870 void kbase_region_tracker_term(struct kbase_context *kctx)
709871 {
872
+ WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
873
+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
874
+ kctx->tgid, kctx->id);
875
+
710876 kbase_gpu_vm_lock(kctx);
711877 kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
712878 kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
713879 kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
714880 #if MALI_USE_CSF
715881 WARN_ON(!list_empty(&kctx->csf.event_pages_head));
882
+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed);
883
+ kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed);
884
+
716885 #endif
717886 kbase_gpu_vm_unlock(kctx);
718887 }
....@@ -724,8 +893,8 @@
724893
725894 static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
726895 {
727
- return min(kbase_get_num_cpu_va_bits(kctx),
728
- (size_t) kctx->kbdev->gpu_props.mmu.va_bits);
896
+ return min_t(size_t, kbase_get_num_cpu_va_bits(kctx),
897
+ kctx->kbdev->gpu_props.mmu.va_bits);
729898 }
730899
731900 int kbase_region_tracker_init(struct kbase_context *kctx)
....@@ -734,19 +903,41 @@
734903 struct kbase_va_region *custom_va_reg = NULL;
735904 size_t same_va_bits = kbase_get_same_va_bits(kctx);
736905 u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
737
- u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
906
+ u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
907
+ u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
738908 u64 same_va_pages;
739909 u64 same_va_base = 1u;
740910 int err;
911
+#if MALI_USE_CSF
912
+ struct kbase_va_region *exec_va_reg;
913
+ struct kbase_va_region *exec_fixed_va_reg;
914
+ struct kbase_va_region *fixed_va_reg;
915
+
916
+ u64 exec_va_base;
917
+ u64 fixed_va_end;
918
+ u64 exec_fixed_va_base;
919
+ u64 fixed_va_base;
920
+ u64 fixed_va_pages;
921
+#endif
741922
742923 /* Take the lock as kbase_free_alloced_region requires it */
743924 kbase_gpu_vm_lock(kctx);
744925
745926 same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
927
+
928
+#if MALI_USE_CSF
929
+ if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
930
+ /* Depending on how the kernel is configured, it's possible (eg on aarch64) for
931
+ * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone
932
+ * doesn't cross into the exec_va zone.
933
+ */
934
+ same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base;
935
+ }
936
+#endif
937
+
746938 /* all have SAME_VA */
747
- same_va_reg =
748
- kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
749
- same_va_pages, KBASE_REG_ZONE_SAME_VA);
939
+ same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
940
+ same_va_pages, KBASE_REG_ZONE_SAME_VA);
750941
751942 if (!same_va_reg) {
752943 err = -ENOMEM;
....@@ -755,10 +946,7 @@
755946 kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
756947 same_va_pages);
757948
758
-#if IS_ENABLED(CONFIG_64BIT)
759
- /* 32-bit clients have custom VA zones */
760
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
761
-#endif
949
+ if (kbase_ctx_compat_mode(kctx)) {
762950 if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
763951 err = -EINVAL;
764952 goto fail_free_same_va;
....@@ -770,10 +958,9 @@
770958 if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
771959 custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
772960
773
- custom_va_reg = kbase_alloc_free_region(
774
- &kctx->reg_rbtree_custom,
775
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
776
- custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
961
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
962
+ KBASE_REG_ZONE_CUSTOM_VA_BASE,
963
+ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
777964
778965 if (!custom_va_reg) {
779966 err = -ENOMEM;
....@@ -782,11 +969,70 @@
782969 kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
783970 KBASE_REG_ZONE_CUSTOM_VA_BASE,
784971 custom_va_size);
785
-#if IS_ENABLED(CONFIG_64BIT)
786972 } else {
787973 custom_va_size = 0;
788974 }
789
-#endif
975
+
976
+#if MALI_USE_CSF
977
+ /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
978
+ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64;
979
+
980
+ /* Similarly the end of the FIXED_VA zone also depends on whether the client
981
+ * is 32 or 64-bits.
982
+ */
983
+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
984
+
985
+ if (kbase_ctx_compat_mode(kctx)) {
986
+ exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
987
+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
988
+ }
989
+
990
+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
991
+ KBASE_REG_ZONE_EXEC_VA_SIZE);
992
+
993
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
994
+ KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
995
+
996
+ if (!exec_va_reg) {
997
+ err = -ENOMEM;
998
+ goto fail_free_custom_va;
999
+ }
1000
+
1001
+ exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE;
1002
+
1003
+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base,
1004
+ KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
1005
+
1006
+ exec_fixed_va_reg =
1007
+ kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
1008
+ exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
1009
+ KBASE_REG_ZONE_EXEC_FIXED_VA);
1010
+
1011
+ if (!exec_fixed_va_reg) {
1012
+ err = -ENOMEM;
1013
+ goto fail_free_exec_va;
1014
+ }
1015
+
1016
+ fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
1017
+ fixed_va_pages = fixed_va_end - fixed_va_base;
1018
+
1019
+ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
1020
+
1021
+ fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
1022
+ fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
1023
+
1024
+ kctx->gpu_va_end = fixed_va_end;
1025
+
1026
+ if (!fixed_va_reg) {
1027
+ err = -ENOMEM;
1028
+ goto fail_free_exec_fixed_va;
1029
+ }
1030
+
1031
+ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg,
1032
+ exec_fixed_va_reg, fixed_va_reg);
1033
+
1034
+ INIT_LIST_HEAD(&kctx->csf.event_pages_head);
1035
+#else
7901036 /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
7911037 * initially U64_MAX
7921038 */
....@@ -794,16 +1040,22 @@
7941040 /* Other zones are 0: kbase_create_context() uses vzalloc */
7951041
7961042 kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
797
-
7981043 kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
799
- kctx->jit_va = false;
800
-
801
-#if MALI_USE_CSF
802
- INIT_LIST_HEAD(&kctx->csf.event_pages_head);
8031044 #endif
1045
+ kctx->jit_va = false;
8041046
8051047 kbase_gpu_vm_unlock(kctx);
8061048 return 0;
1049
+
1050
+#if MALI_USE_CSF
1051
+fail_free_exec_fixed_va:
1052
+ kbase_free_alloced_region(exec_fixed_va_reg);
1053
+fail_free_exec_va:
1054
+ kbase_free_alloced_region(exec_va_reg);
1055
+fail_free_custom_va:
1056
+ if (custom_va_reg)
1057
+ kbase_free_alloced_region(custom_va_reg);
1058
+#endif
8071059
8081060 fail_free_same_va:
8091061 kbase_free_alloced_region(same_va_reg);
....@@ -834,7 +1086,9 @@
8341086 }
8351087
8361088 /**
837
- * Determine if any allocations have been made on a context's region tracker
1089
+ * kbase_region_tracker_has_allocs - Determine if any allocations have been made
1090
+ * on a context's region tracker
1091
+ *
8381092 * @kctx: KBase context
8391093 *
8401094 * Check the context to determine if any allocations have been made yet from
....@@ -862,6 +1116,8 @@
8621116 unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
8631117 unsigned long reg_zone;
8641118
1119
+ if (!kbase_is_ctx_reg_zone(zone_bits))
1120
+ continue;
8651121 zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
8661122 zone_base_addr = zone->base_pfn << PAGE_SHIFT;
8671123
....@@ -901,7 +1157,6 @@
9011157 return false;
9021158 }
9031159
904
-#if IS_ENABLED(CONFIG_64BIT)
9051160 static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
9061161 u64 jit_va_pages)
9071162 {
....@@ -950,9 +1205,8 @@
9501205 * Create a custom VA zone at the end of the VA for allocations which
9511206 * JIT can use so it doesn't have to allocate VA from the kernel.
9521207 */
953
- custom_va_reg =
954
- kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
955
- jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
1208
+ custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
1209
+ jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
9561210
9571211 /*
9581212 * The context will be destroyed if we fail here so no point
....@@ -969,7 +1223,6 @@
9691223 kbase_region_tracker_insert(custom_va_reg);
9701224 return 0;
9711225 }
972
-#endif
9731226
9741227 int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
9751228 int max_allocations, int trim_level, int group_id,
....@@ -1010,10 +1263,8 @@
10101263 goto exit_unlock;
10111264 }
10121265
1013
-#if IS_ENABLED(CONFIG_64BIT)
1014
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
1266
+ if (!kbase_ctx_compat_mode(kctx))
10151267 err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
1016
-#endif
10171268 /*
10181269 * Nothing to do for 32-bit clients, JIT uses the existing
10191270 * custom VA zone.
....@@ -1039,6 +1290,7 @@
10391290
10401291 int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
10411292 {
1293
+#if !MALI_USE_CSF
10421294 struct kbase_va_region *exec_va_reg;
10431295 struct kbase_reg_zone *exec_va_zone;
10441296 struct kbase_reg_zone *target_zone;
....@@ -1047,6 +1299,7 @@
10471299 unsigned long target_zone_bits;
10481300 u64 exec_va_start;
10491301 int err;
1302
+#endif
10501303
10511304 /* The EXEC_VA zone shall be created by making space either:
10521305 * - for 64-bit clients, at the end of the process's address space
....@@ -1060,6 +1313,12 @@
10601313 if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
10611314 return -EINVAL;
10621315
1316
+#if MALI_USE_CSF
1317
+ /* For CSF GPUs we now setup the EXEC_VA zone during initialization,
1318
+ * so this request is a null-op.
1319
+ */
1320
+ return 0;
1321
+#else
10631322 kbase_gpu_vm_lock(kctx);
10641323
10651324 /* Verify that we've not already created a EXEC_VA zone, and that the
....@@ -1081,17 +1340,14 @@
10811340 goto exit_unlock;
10821341 }
10831342
1084
-#if IS_ENABLED(CONFIG_64BIT)
1085
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
1086
-#endif
1343
+ if (kbase_ctx_compat_mode(kctx)) {
10871344 /* 32-bit client: take from CUSTOM_VA zone */
10881345 target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
1089
-#if IS_ENABLED(CONFIG_64BIT)
10901346 } else {
10911347 /* 64-bit client: take from SAME_VA zone */
10921348 target_zone_bits = KBASE_REG_ZONE_SAME_VA;
10931349 }
1094
-#endif
1350
+
10951351 target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
10961352 target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
10971353
....@@ -1119,10 +1375,8 @@
11191375 /* Taken from the end of the target zone */
11201376 exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
11211377
1122
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
1123
- exec_va_start,
1124
- exec_va_pages,
1125
- KBASE_REG_ZONE_EXEC_VA);
1378
+ exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
1379
+ exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
11261380 if (!exec_va_reg) {
11271381 err = -ENOMEM;
11281382 goto exit_unlock;
....@@ -1145,6 +1399,7 @@
11451399 exit_unlock:
11461400 kbase_gpu_vm_unlock(kctx);
11471401 return err;
1402
+#endif /* MALI_USE_CSF */
11481403 }
11491404
11501405 #if MALI_USE_CSF
....@@ -1164,10 +1419,9 @@
11641419
11651420 kbdev->csf.shared_reg_rbtree = RB_ROOT;
11661421
1167
- shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree,
1168
- shared_reg_start_pfn,
1169
- shared_reg_size,
1170
- KBASE_REG_ZONE_MCU_SHARED);
1422
+ shared_reg =
1423
+ kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
1424
+ shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
11711425 if (!shared_reg)
11721426 return -ENOMEM;
11731427
....@@ -1176,10 +1430,30 @@
11761430 }
11771431 #endif
11781432
1433
+static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
1434
+{
1435
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
1436
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
1437
+ kbdev->pagesize_2mb = true;
1438
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
1439
+ dev_warn(
1440
+ kbdev->dev,
1441
+ "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
1442
+ }
1443
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
1444
+ kbdev->pagesize_2mb = false;
1445
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
1446
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
1447
+ /* Set it to the default based on which GPU is present */
1448
+ kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
1449
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
1450
+}
1451
+
11791452 int kbase_mem_init(struct kbase_device *kbdev)
11801453 {
11811454 int err = 0;
11821455 struct kbasep_mem_device *memdev;
1456
+ char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
11831457 #if IS_ENABLED(CONFIG_OF)
11841458 struct device_node *mgm_node = NULL;
11851459 #endif
....@@ -1188,6 +1462,20 @@
11881462
11891463 memdev = &kbdev->memdev;
11901464
1465
+ kbasep_mem_page_size_init(kbdev);
1466
+
1467
+ scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
1468
+ kbdev->devname);
1469
+
1470
+ /* Initialize slab cache for kbase_va_regions */
1471
+ kbdev->va_region_slab =
1472
+ kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
1473
+ if (kbdev->va_region_slab == NULL) {
1474
+ dev_err(kbdev->dev, "Failed to create va_region_slab\n");
1475
+ return -ENOMEM;
1476
+ }
1477
+
1478
+ kbase_mem_migrate_init(kbdev);
11911479 kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
11921480 KBASE_MEM_POOL_MAX_SIZE_KCTX);
11931481
....@@ -1250,8 +1538,7 @@
12501538 kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults,
12511539 KBASE_MEM_POOL_MAX_SIZE_KBDEV);
12521540
1253
- err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev,
1254
- &mem_pool_defaults, NULL);
1541
+ err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL);
12551542 }
12561543
12571544 return err;
....@@ -1277,6 +1564,11 @@
12771564
12781565 kbase_mem_pool_group_term(&kbdev->mem_pools);
12791566
1567
+ kbase_mem_migrate_term(kbdev);
1568
+
1569
+ kmem_cache_destroy(kbdev->va_region_slab);
1570
+ kbdev->va_region_slab = NULL;
1571
+
12801572 WARN_ON(kbdev->total_gpu_pages);
12811573 WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
12821574 WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
....@@ -1288,7 +1580,9 @@
12881580 KBASE_EXPORT_TEST_API(kbase_mem_term);
12891581
12901582 /**
1291
- * Allocate a free region object.
1583
+ * kbase_alloc_free_region - Allocate a free region object.
1584
+ *
1585
+ * @kbdev: kbase device
12921586 * @rbtree: Backlink to the red-black tree of memory regions.
12931587 * @start_pfn: The Page Frame Number in GPU virtual address space.
12941588 * @nr_pages: The size of the region in pages.
....@@ -1299,9 +1593,10 @@
12991593 *
13001594 * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
13011595 *
1596
+ * Return: pointer to the allocated region object on success, NULL otherwise.
13021597 */
1303
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
1304
- u64 start_pfn, size_t nr_pages, int zone)
1598
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
1599
+ u64 start_pfn, size_t nr_pages, int zone)
13051600 {
13061601 struct kbase_va_region *new_reg;
13071602
....@@ -1313,12 +1608,13 @@
13131608 /* 64-bit address range is the max */
13141609 KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
13151610
1316
- new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
1611
+ new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
13171612
13181613 if (!new_reg)
13191614 return NULL;
13201615
1321
- new_reg->va_refcnt = 1;
1616
+ kbase_refcount_set(&new_reg->va_refcnt, 1);
1617
+ atomic_set(&new_reg->no_user_free_count, 0);
13221618 new_reg->cpu_alloc = NULL; /* no alloc bound yet */
13231619 new_reg->gpu_alloc = NULL; /* no alloc bound yet */
13241620 new_reg->rbtree = rbtree;
....@@ -1337,35 +1633,9 @@
13371633
13381634 KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
13391635
1340
-static struct kbase_context *kbase_reg_flags_to_kctx(
1341
- struct kbase_va_region *reg)
1342
-{
1343
- struct kbase_context *kctx = NULL;
1344
- struct rb_root *rbtree = reg->rbtree;
1345
-
1346
- switch (reg->flags & KBASE_REG_ZONE_MASK) {
1347
- case KBASE_REG_ZONE_CUSTOM_VA:
1348
- kctx = container_of(rbtree, struct kbase_context,
1349
- reg_rbtree_custom);
1350
- break;
1351
- case KBASE_REG_ZONE_SAME_VA:
1352
- kctx = container_of(rbtree, struct kbase_context,
1353
- reg_rbtree_same);
1354
- break;
1355
- case KBASE_REG_ZONE_EXEC_VA:
1356
- kctx = container_of(rbtree, struct kbase_context,
1357
- reg_rbtree_exec);
1358
- break;
1359
- default:
1360
- WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
1361
- break;
1362
- }
1363
-
1364
- return kctx;
1365
-}
1366
-
13671636 /**
1368
- * Free a region object.
1637
+ * kbase_free_alloced_region - Free a region object.
1638
+ *
13691639 * @reg: Region
13701640 *
13711641 * The described region must be freed of any mapping.
....@@ -1374,6 +1644,7 @@
13741644 * alloc object will be released.
13751645 * It is a bug if no alloc object exists for non-free regions.
13761646 *
1647
+ * If region is KBASE_REG_ZONE_MCU_SHARED it is freed
13771648 */
13781649 void kbase_free_alloced_region(struct kbase_va_region *reg)
13791650 {
....@@ -1397,6 +1668,13 @@
13971668 (void *)reg);
13981669 #if MALI_USE_CSF
13991670 if (reg->flags & KBASE_REG_CSF_EVENT)
1671
+ /*
1672
+ * This should not be reachable if called from 'mcu_shared' functions
1673
+ * such as:
1674
+ * kbase_csf_firmware_mcu_shared_mapping_init
1675
+ * kbase_csf_firmware_mcu_shared_mapping_term
1676
+ */
1677
+
14001678 kbase_unlink_event_mem_page(kctx, reg);
14011679 #endif
14021680
....@@ -1410,8 +1688,6 @@
14101688 * on the list at termination time of the region tracker.
14111689 */
14121690 if (!list_empty(&reg->gpu_alloc->evict_node)) {
1413
- mutex_unlock(&kctx->jit_evict_lock);
1414
-
14151691 /*
14161692 * Unlink the physical allocation before unmaking it
14171693 * evictable so that the allocation isn't grown back to
....@@ -1421,6 +1697,8 @@
14211697 reg->cpu_alloc->reg = NULL;
14221698 if (reg->cpu_alloc != reg->gpu_alloc)
14231699 reg->gpu_alloc->reg = NULL;
1700
+
1701
+ mutex_unlock(&kctx->jit_evict_lock);
14241702
14251703 /*
14261704 * If a region has been made evictable then we must
....@@ -1457,7 +1735,9 @@
14571735
14581736 KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
14591737
1460
-int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
1738
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
1739
+ u64 addr, size_t nr_pages, size_t align,
1740
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
14611741 {
14621742 int err;
14631743 size_t i = 0;
....@@ -1494,41 +1774,46 @@
14941774 KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
14951775 for (i = 0; i < alloc->imported.alias.nents; i++) {
14961776 if (alloc->imported.alias.aliased[i].alloc) {
1497
- err = kbase_mmu_insert_pages(kctx->kbdev,
1498
- &kctx->mmu,
1499
- reg->start_pfn + (i * stride),
1500
- alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
1501
- alloc->imported.alias.aliased[i].length,
1502
- reg->flags & gwt_mask,
1503
- kctx->as_nr,
1504
- group_id);
1777
+ err = kbase_mmu_insert_aliased_pages(
1778
+ kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
1779
+ alloc->imported.alias.aliased[i].alloc->pages +
1780
+ alloc->imported.alias.aliased[i].offset,
1781
+ alloc->imported.alias.aliased[i].length,
1782
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
1783
+ NULL);
15051784 if (err)
1506
- goto bad_insert;
1785
+ goto bad_aliased_insert;
15071786
15081787 /* Note: mapping count is tracked at alias
15091788 * creation time
15101789 */
15111790 } else {
1512
- err = kbase_mmu_insert_single_page(kctx,
1513
- reg->start_pfn + i * stride,
1514
- kctx->aliasing_sink_page,
1791
+ err = kbase_mmu_insert_single_aliased_page(
1792
+ kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
15151793 alloc->imported.alias.aliased[i].length,
1516
- (reg->flags & mask & gwt_mask) | attr,
1517
- group_id);
1794
+ (reg->flags & mask & gwt_mask) | attr, group_id,
1795
+ mmu_sync_info);
15181796
15191797 if (err)
1520
- goto bad_insert;
1798
+ goto bad_aliased_insert;
15211799 }
15221800 }
15231801 } else {
1524
- err = kbase_mmu_insert_pages(kctx->kbdev,
1525
- &kctx->mmu,
1526
- reg->start_pfn,
1527
- kbase_get_gpu_phy_pages(reg),
1528
- kbase_reg_current_backed_size(reg),
1529
- reg->flags & gwt_mask,
1530
- kctx->as_nr,
1531
- group_id);
1802
+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
1803
+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
1804
+
1805
+ err = kbase_mmu_insert_imported_pages(
1806
+ kctx->kbdev, &kctx->mmu, reg->start_pfn,
1807
+ kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
1808
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
1809
+ } else {
1810
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1811
+ kbase_get_gpu_phy_pages(reg),
1812
+ kbase_reg_current_backed_size(reg),
1813
+ reg->flags & gwt_mask, kctx->as_nr, group_id,
1814
+ mmu_sync_info, reg, true);
1815
+ }
1816
+
15321817 if (err)
15331818 goto bad_insert;
15341819 kbase_mem_phy_alloc_gpu_mapped(alloc);
....@@ -1538,9 +1823,9 @@
15381823 !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
15391824 reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
15401825 reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
1541
- /* For padded imported dma-buf memory, map the dummy aliasing
1542
- * page from the end of the dma-buf pages, to the end of the
1543
- * region using a read only mapping.
1826
+ /* For padded imported dma-buf or user-buf memory, map the dummy
1827
+ * aliasing page from the end of the imported pages, to the end of
1828
+ * the region using a read only mapping.
15441829 *
15451830 * Only map when it's imported dma-buf memory that is currently
15461831 * mapped.
....@@ -1548,37 +1833,46 @@
15481833 * Assume reg->gpu_alloc->nents is the number of actual pages
15491834 * in the dma-buf memory.
15501835 */
1551
- err = kbase_mmu_insert_single_page(kctx,
1552
- reg->start_pfn + reg->gpu_alloc->nents,
1553
- kctx->aliasing_sink_page,
1554
- reg->nr_pages - reg->gpu_alloc->nents,
1555
- (reg->flags | KBASE_REG_GPU_RD) &
1556
- ~KBASE_REG_GPU_WR,
1557
- KBASE_MEM_GROUP_SINK);
1836
+ err = kbase_mmu_insert_single_imported_page(
1837
+ kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
1838
+ reg->nr_pages - reg->gpu_alloc->nents,
1839
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
1840
+ mmu_sync_info);
15581841 if (err)
15591842 goto bad_insert;
15601843 }
15611844
15621845 return err;
15631846
1564
-bad_insert:
1565
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
1566
- reg->start_pfn, reg->nr_pages,
1567
- kctx->as_nr);
1847
+bad_aliased_insert:
1848
+ while (i-- > 0) {
1849
+ struct tagged_addr *phys_alloc = NULL;
1850
+ u64 const stride = alloc->imported.alias.stride;
15681851
1569
- kbase_remove_va_region(reg);
1852
+ if (alloc->imported.alias.aliased[i].alloc != NULL)
1853
+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
1854
+ alloc->imported.alias.aliased[i].offset;
1855
+
1856
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
1857
+ phys_alloc, alloc->imported.alias.aliased[i].length,
1858
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
1859
+ false);
1860
+ }
1861
+bad_insert:
1862
+ kbase_remove_va_region(kctx->kbdev, reg);
15701863
15711864 return err;
15721865 }
15731866
15741867 KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
15751868
1576
-static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
1577
- struct kbase_mem_phy_alloc *alloc, bool writeable);
1869
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
1870
+ struct kbase_va_region *reg, bool writeable);
15781871
15791872 int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
15801873 {
15811874 int err = 0;
1875
+ struct kbase_mem_phy_alloc *alloc;
15821876
15831877 if (reg->start_pfn == 0)
15841878 return 0;
....@@ -1586,43 +1880,98 @@
15861880 if (!reg->gpu_alloc)
15871881 return -EINVAL;
15881882
1589
- /* Tear down down GPU page tables, depending on memory type. */
1590
- switch (reg->gpu_alloc->type) {
1591
- case KBASE_MEM_TYPE_ALIAS: /* Fall-through */
1592
- case KBASE_MEM_TYPE_IMPORTED_UMM:
1593
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
1594
- reg->start_pfn, reg->nr_pages, kctx->as_nr);
1883
+ alloc = reg->gpu_alloc;
1884
+
1885
+ /* Tear down GPU page tables, depending on memory type. */
1886
+ switch (alloc->type) {
1887
+ case KBASE_MEM_TYPE_ALIAS: {
1888
+ size_t i = 0;
1889
+ /* Due to the way the number of valid PTEs and ATEs are tracked
1890
+ * currently, only the GPU virtual range that is backed & mapped
1891
+ * should be passed to the kbase_mmu_teardown_pages() function,
1892
+ * hence individual aliased regions needs to be unmapped
1893
+ * separately.
1894
+ */
1895
+ for (i = 0; i < alloc->imported.alias.nents; i++) {
1896
+ struct tagged_addr *phys_alloc = NULL;
1897
+ int err_loop;
1898
+
1899
+ if (alloc->imported.alias.aliased[i].alloc != NULL)
1900
+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
1901
+ alloc->imported.alias.aliased[i].offset;
1902
+
1903
+ err_loop = kbase_mmu_teardown_pages(
1904
+ kctx->kbdev, &kctx->mmu,
1905
+ reg->start_pfn + (i * alloc->imported.alias.stride),
1906
+ phys_alloc, alloc->imported.alias.aliased[i].length,
1907
+ alloc->imported.alias.aliased[i].length, kctx->as_nr,
1908
+ false);
1909
+
1910
+ if (WARN_ON_ONCE(err_loop))
1911
+ err = err_loop;
1912
+ }
1913
+ }
15951914 break;
1596
- default:
1597
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
1598
- reg->start_pfn, kbase_reg_current_backed_size(reg),
1599
- kctx->as_nr);
1915
+ case KBASE_MEM_TYPE_IMPORTED_UMM: {
1916
+ size_t nr_phys_pages = reg->nr_pages;
1917
+ size_t nr_virt_pages = reg->nr_pages;
1918
+ /* If the region has import padding and falls under the threshold for
1919
+ * issuing a partial GPU cache flush, we want to reduce the number of
1920
+ * physical pages that get flushed.
1921
+
1922
+ * This is symmetric with case of mapping the memory, which first maps
1923
+ * each imported physical page to a separate virtual page, and then
1924
+ * maps the single aliasing sink page to each of the virtual padding
1925
+ * pages.
1926
+ */
1927
+ if (reg->flags & KBASE_REG_IMPORT_PAD)
1928
+ nr_phys_pages = alloc->nents + 1;
1929
+
1930
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1931
+ alloc->pages, nr_phys_pages, nr_virt_pages,
1932
+ kctx->as_nr, true);
1933
+ }
1934
+ break;
1935
+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
1936
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
1937
+
1938
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1939
+ alloc->pages, nr_reg_pages, nr_reg_pages,
1940
+ kctx->as_nr, true);
1941
+ }
1942
+ break;
1943
+ default: {
1944
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
1945
+
1946
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1947
+ alloc->pages, nr_reg_pages, nr_reg_pages,
1948
+ kctx->as_nr, false);
1949
+ }
16001950 break;
16011951 }
16021952
16031953 /* Update tracking, and other cleanup, depending on memory type. */
1604
- switch (reg->gpu_alloc->type) {
1954
+ switch (alloc->type) {
16051955 case KBASE_MEM_TYPE_ALIAS:
16061956 /* We mark the source allocs as unmapped from the GPU when
16071957 * putting reg's allocs
16081958 */
16091959 break;
16101960 case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
1611
- struct kbase_alloc_import_user_buf *user_buf =
1612
- &reg->gpu_alloc->imported.user_buf;
1961
+ struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf;
16131962
1614
- if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
1615
- user_buf->current_mapping_usage_count &=
1616
- ~PINNED_ON_IMPORT;
1963
+ if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
1964
+ user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT;
16171965
1618
- /* The allocation could still have active mappings. */
1619
- if (user_buf->current_mapping_usage_count == 0) {
1620
- kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
1621
- (reg->flags & KBASE_REG_GPU_WR));
1622
- }
1966
+ /* The allocation could still have active mappings. */
1967
+ if (user_buf->current_mapping_usage_count == 0) {
1968
+ kbase_jd_user_buf_unmap(kctx, alloc, reg,
1969
+ (reg->flags &
1970
+ (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)));
16231971 }
16241972 }
1625
- /* Fall-through */
1973
+ }
1974
+ fallthrough;
16261975 default:
16271976 kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
16281977 break;
....@@ -1741,7 +2090,8 @@
17412090 BUG_ON(!cpu_page);
17422091 BUG_ON(offset + size > PAGE_SIZE);
17432092
1744
- dma_addr = kbase_dma_addr(cpu_page) + offset;
2093
+ dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset;
2094
+
17452095 if (sync_fn == KBASE_SYNC_TO_CPU)
17462096 dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
17472097 size, DMA_BIDIRECTIONAL);
....@@ -1752,29 +2102,30 @@
17522102 void *src = NULL;
17532103 void *dst = NULL;
17542104 struct page *gpu_page;
2105
+ dma_addr_t dma_addr;
17552106
17562107 if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
17572108 return;
17582109
17592110 gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
2111
+ dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset;
17602112
17612113 if (sync_fn == KBASE_SYNC_TO_DEVICE) {
17622114 src = ((unsigned char *)kmap(cpu_page)) + offset;
17632115 dst = ((unsigned char *)kmap(gpu_page)) + offset;
17642116 } else if (sync_fn == KBASE_SYNC_TO_CPU) {
1765
- dma_sync_single_for_cpu(kctx->kbdev->dev,
1766
- kbase_dma_addr(gpu_page) + offset,
1767
- size, DMA_BIDIRECTIONAL);
2117
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size,
2118
+ DMA_BIDIRECTIONAL);
17682119 src = ((unsigned char *)kmap(gpu_page)) + offset;
17692120 dst = ((unsigned char *)kmap(cpu_page)) + offset;
17702121 }
2122
+
17712123 memcpy(dst, src, size);
17722124 kunmap(gpu_page);
17732125 kunmap(cpu_page);
17742126 if (sync_fn == KBASE_SYNC_TO_DEVICE)
1775
- dma_sync_single_for_device(kctx->kbdev->dev,
1776
- kbase_dma_addr(gpu_page) + offset,
1777
- size, DMA_BIDIRECTIONAL);
2127
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size,
2128
+ DMA_BIDIRECTIONAL);
17782129 }
17792130 }
17802131
....@@ -1920,29 +2271,27 @@
19202271 __func__, (void *)reg, (void *)kctx);
19212272 lockdep_assert_held(&kctx->reg_lock);
19222273
1923
- if (reg->flags & KBASE_REG_NO_USER_FREE) {
2274
+ if (kbase_va_region_is_no_user_free(reg)) {
19242275 dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
19252276 return -EINVAL;
19262277 }
19272278
1928
- /*
1929
- * Unlink the physical allocation before unmaking it evictable so
1930
- * that the allocation isn't grown back to its last backed size
1931
- * as we're going to unmap it anyway.
1932
- */
1933
- reg->cpu_alloc->reg = NULL;
1934
- if (reg->cpu_alloc != reg->gpu_alloc)
1935
- reg->gpu_alloc->reg = NULL;
1936
-
1937
- /*
1938
- * If a region has been made evictable then we must unmake it
2279
+ /* If a region has been made evictable then we must unmake it
19392280 * before trying to free it.
19402281 * If the memory hasn't been reclaimed it will be unmapped and freed
19412282 * below, if it has been reclaimed then the operations below are no-ops.
19422283 */
19432284 if (reg->flags & KBASE_REG_DONT_NEED) {
1944
- KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
1945
- KBASE_MEM_TYPE_NATIVE);
2285
+ WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE);
2286
+ mutex_lock(&kctx->jit_evict_lock);
2287
+ /* Unlink the physical allocation before unmaking it evictable so
2288
+ * that the allocation isn't grown back to its last backed size
2289
+ * as we're going to unmap it anyway.
2290
+ */
2291
+ reg->cpu_alloc->reg = NULL;
2292
+ if (reg->cpu_alloc != reg->gpu_alloc)
2293
+ reg->gpu_alloc->reg = NULL;
2294
+ mutex_unlock(&kctx->jit_evict_lock);
19462295 kbase_mem_evictable_unmake(reg->gpu_alloc);
19472296 }
19482297
....@@ -1952,22 +2301,35 @@
19522301 goto out;
19532302 }
19542303
2304
+#if MALI_USE_CSF
2305
+ if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) ||
2306
+ ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
2307
+ if (reg->flags & KBASE_REG_FIXED_ADDRESS)
2308
+ atomic64_dec(&kctx->num_fixed_allocs);
2309
+ else
2310
+ atomic64_dec(&kctx->num_fixable_allocs);
2311
+ }
2312
+#endif
2313
+
19552314 /* This will also free the physical pages */
19562315 kbase_free_alloced_region(reg);
19572316
1958
- out:
2317
+out:
19592318 return err;
19602319 }
19612320
19622321 KBASE_EXPORT_TEST_API(kbase_mem_free_region);
19632322
19642323 /**
1965
- * Free the region from the GPU and unregister it.
2324
+ * kbase_mem_free - Free the region from the GPU and unregister it.
2325
+ *
19662326 * @kctx: KBase context
19672327 * @gpu_addr: GPU address to free
19682328 *
19692329 * This function implements the free operation on a memory segment.
19702330 * It will loudly fail if called with outstanding mappings.
2331
+ *
2332
+ * Return: 0 on success.
19712333 */
19722334 int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
19732335 {
....@@ -1979,12 +2341,14 @@
19792341 __func__, gpu_addr, (void *)kctx);
19802342
19812343 if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
1982
- dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
2344
+ dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__);
19832345 return -EINVAL;
19842346 }
19852347
19862348 if (gpu_addr == 0) {
1987
- dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
2349
+ dev_warn(kctx->kbdev->dev,
2350
+ "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n",
2351
+ __func__);
19882352 return -EINVAL;
19892353 }
19902354 kbase_gpu_vm_lock(kctx);
....@@ -2010,8 +2374,8 @@
20102374 /* Validate the region */
20112375 reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
20122376 if (kbase_is_region_invalid_or_free(reg)) {
2013
- dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
2014
- gpu_addr);
2377
+ dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX",
2378
+ __func__, gpu_addr);
20152379 err = -EINVAL;
20162380 goto out_unlock;
20172381 }
....@@ -2026,7 +2390,7 @@
20262390 err = kbase_mem_free_region(kctx, reg);
20272391 }
20282392
2029
- out_unlock:
2393
+out_unlock:
20302394 kbase_gpu_vm_unlock(kctx);
20312395 return err;
20322396 }
....@@ -2126,11 +2490,19 @@
21262490 if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
21272491 reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
21282492
2129
- if (flags & BASEP_MEM_NO_USER_FREE)
2130
- reg->flags |= KBASE_REG_NO_USER_FREE;
2493
+ if (flags & BASEP_MEM_NO_USER_FREE) {
2494
+ kbase_gpu_vm_lock(kctx);
2495
+ kbase_va_region_no_user_free_inc(reg);
2496
+ kbase_gpu_vm_unlock(kctx);
2497
+ }
21312498
21322499 if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
21332500 reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
2501
+
2502
+#if MALI_USE_CSF
2503
+ if (flags & BASE_MEM_FIXED)
2504
+ reg->flags |= KBASE_REG_FIXED_ADDRESS;
2505
+#endif
21342506
21352507 return 0;
21362508 }
....@@ -2174,18 +2546,14 @@
21742546
21752547 tp = alloc->pages + alloc->nents;
21762548
2177
-#ifdef CONFIG_MALI_2MB_ALLOC
21782549 /* Check if we have enough pages requested so we can allocate a large
21792550 * page (512 * 4KB = 2MB )
21802551 */
2181
- if (nr_left >= (SZ_2M / SZ_4K)) {
2552
+ if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
21822553 int nr_lp = nr_left / (SZ_2M / SZ_4K);
21832554
2184
- res = kbase_mem_pool_alloc_pages(
2185
- &kctx->mem_pools.large[alloc->group_id],
2186
- nr_lp * (SZ_2M / SZ_4K),
2187
- tp,
2188
- true);
2555
+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
2556
+ nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task);
21892557
21902558 if (res > 0) {
21912559 nr_left -= res;
....@@ -2239,7 +2607,7 @@
22392607
22402608 err = kbase_mem_pool_grow(
22412609 &kctx->mem_pools.large[alloc->group_id],
2242
- 1);
2610
+ 1, kctx->task);
22432611 if (err)
22442612 break;
22452613 } while (1);
....@@ -2280,13 +2648,11 @@
22802648 }
22812649 }
22822650 }
2283
-no_new_partial:
2284
-#endif
22852651
2652
+no_new_partial:
22862653 if (nr_left) {
2287
- res = kbase_mem_pool_alloc_pages(
2288
- &kctx->mem_pools.small[alloc->group_id],
2289
- nr_left, tp, false);
2654
+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
2655
+ tp, false, kctx->task);
22902656 if (res <= 0)
22912657 goto alloc_failed;
22922658 }
....@@ -2345,17 +2711,16 @@
23452711
23462712 lockdep_assert_held(&pool->pool_lock);
23472713
2348
-#if !defined(CONFIG_MALI_2MB_ALLOC)
2349
- WARN_ON(pool->order);
2350
-#endif
2714
+ kctx = alloc->imported.native.kctx;
2715
+ kbdev = kctx->kbdev;
2716
+
2717
+ if (!kbdev->pagesize_2mb)
2718
+ WARN_ON(pool->order);
23512719
23522720 if (alloc->reg) {
23532721 if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
23542722 goto invalid_request;
23552723 }
2356
-
2357
- kctx = alloc->imported.native.kctx;
2358
- kbdev = kctx->kbdev;
23592724
23602725 lockdep_assert_held(&kctx->mem_partials_lock);
23612726
....@@ -2375,8 +2740,7 @@
23752740 tp = alloc->pages + alloc->nents;
23762741 new_pages = tp;
23772742
2378
-#ifdef CONFIG_MALI_2MB_ALLOC
2379
- if (pool->order) {
2743
+ if (kbdev->pagesize_2mb && pool->order) {
23802744 int nr_lp = nr_left / (SZ_2M / SZ_4K);
23812745
23822746 res = kbase_mem_pool_alloc_pages_locked(pool,
....@@ -2460,15 +2824,12 @@
24602824 if (nr_left)
24612825 goto alloc_failed;
24622826 } else {
2463
-#endif
24642827 res = kbase_mem_pool_alloc_pages_locked(pool,
24652828 nr_left,
24662829 tp);
24672830 if (res <= 0)
24682831 goto alloc_failed;
2469
-#ifdef CONFIG_MALI_2MB_ALLOC
24702832 }
2471
-#endif
24722833
24732834 KBASE_TLSTREAM_AUX_PAGESALLOC(
24742835 kbdev,
....@@ -2489,8 +2850,7 @@
24892850
24902851 struct tagged_addr *start_free = alloc->pages + alloc->nents;
24912852
2492
-#ifdef CONFIG_MALI_2MB_ALLOC
2493
- if (pool->order) {
2853
+ if (kbdev->pagesize_2mb && pool->order) {
24942854 while (nr_pages_to_free) {
24952855 if (is_huge_head(*start_free)) {
24962856 kbase_mem_pool_free_pages_locked(
....@@ -2508,15 +2868,12 @@
25082868 }
25092869 }
25102870 } else {
2511
-#endif
25122871 kbase_mem_pool_free_pages_locked(pool,
25132872 nr_pages_to_free,
25142873 start_free,
25152874 false, /* not dirty */
25162875 true); /* return to pool */
2517
-#ifdef CONFIG_MALI_2MB_ALLOC
25182876 }
2519
-#endif
25202877 }
25212878
25222879 kbase_process_page_usage_dec(kctx, nr_pages_requested);
....@@ -2778,6 +3135,13 @@
27783135 /**
27793136 * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer.
27803137 * @alloc: The allocation for the imported user buffer.
3138
+ *
3139
+ * This must only be called when terminating an alloc, when its refcount
3140
+ * (number of users) has become 0. This also ensures it is only called once all
3141
+ * CPU mappings have been closed.
3142
+ *
3143
+ * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active
3144
+ * allocations
27813145 */
27823146 static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc);
27833147 #endif
....@@ -2908,8 +3272,31 @@
29083272 out_term:
29093273 return -1;
29103274 }
2911
-
29123275 KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
3276
+
3277
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
3278
+ enum kbase_page_status status)
3279
+{
3280
+ u32 i = 0;
3281
+
3282
+ for (; i < alloc->nents; i++) {
3283
+ struct tagged_addr phys = alloc->pages[i];
3284
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
3285
+
3286
+ /* Skip the 4KB page that is part of a large page, as the large page is
3287
+ * excluded from the migration process.
3288
+ */
3289
+ if (is_huge(phys) || is_partial(phys))
3290
+ continue;
3291
+
3292
+ if (!page_md)
3293
+ continue;
3294
+
3295
+ spin_lock(&page_md->migrate_lock);
3296
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
3297
+ spin_unlock(&page_md->migrate_lock);
3298
+ }
3299
+}
29133300
29143301 bool kbase_check_alloc_flags(unsigned long flags)
29153302 {
....@@ -2983,6 +3370,14 @@
29833370 (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
29843371 return false;
29853372
3373
+#if MALI_USE_CSF
3374
+ if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED)))
3375
+ return false;
3376
+
3377
+ if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED))
3378
+ return false;
3379
+#endif
3380
+
29863381 return true;
29873382 }
29883383
....@@ -3004,7 +3399,11 @@
30043399 if (flags & BASE_MEM_GROW_ON_GPF)
30053400 return false;
30063401
3007
-#if !MALI_USE_CSF
3402
+#if MALI_USE_CSF
3403
+ /* Imported memory cannot be fixed */
3404
+ if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)))
3405
+ return false;
3406
+#else
30083407 /* Imported memory cannot be aligned to the end of its initial commit */
30093408 if (flags & BASE_MEM_TILER_ALIGN_TOP)
30103409 return false;
....@@ -3139,10 +3538,6 @@
31393538 #undef KBASE_MSG_PRE
31403539 }
31413540
3142
-/**
3143
- * Acquire the per-context region list lock
3144
- * @kctx: KBase context
3145
- */
31463541 void kbase_gpu_vm_lock(struct kbase_context *kctx)
31473542 {
31483543 KBASE_DEBUG_ASSERT(kctx != NULL);
....@@ -3151,10 +3546,6 @@
31513546
31523547 KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
31533548
3154
-/**
3155
- * Release the per-context region list lock
3156
- * @kctx: KBase context
3157
- */
31583549 void kbase_gpu_vm_unlock(struct kbase_context *kctx)
31593550 {
31603551 KBASE_DEBUG_ASSERT(kctx != NULL);
....@@ -3165,7 +3556,7 @@
31653556
31663557 #if IS_ENABLED(CONFIG_DEBUG_FS)
31673558 struct kbase_jit_debugfs_data {
3168
- int (*func)(struct kbase_jit_debugfs_data *);
3559
+ int (*func)(struct kbase_jit_debugfs_data *data);
31693560 struct mutex lock;
31703561 struct kbase_context *kctx;
31713562 u64 active_value;
....@@ -3388,14 +3779,9 @@
33883779 void kbase_jit_debugfs_init(struct kbase_context *kctx)
33893780 {
33903781 /* prevent unprivileged use of debug file system
3391
- * in old kernel version
3392
- */
3393
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
3394
- /* only for newer kernel version debug file system is safe */
3782
+ * in old kernel version
3783
+ */
33953784 const mode_t mode = 0444;
3396
-#else
3397
- const mode_t mode = 0400;
3398
-#endif
33993785
34003786 /* Caller already ensures this, but we keep the pattern for
34013787 * maintenance safety.
....@@ -3469,7 +3855,15 @@
34693855 mutex_unlock(&kctx->jit_evict_lock);
34703856
34713857 kbase_gpu_vm_lock(kctx);
3472
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
3858
+
3859
+ /*
3860
+ * Incrementing the refcount is prevented on JIT regions.
3861
+ * If/when this ever changes we would need to compensate
3862
+ * by implementing "free on putting the last reference",
3863
+ * but only for JIT regions.
3864
+ */
3865
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
3866
+ kbase_va_region_no_user_free_dec(reg);
34733867 kbase_mem_free_region(kctx, reg);
34743868 kbase_gpu_vm_unlock(kctx);
34753869 } while (1);
....@@ -3484,6 +3878,7 @@
34843878 INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
34853879
34863880 #if MALI_USE_CSF
3881
+ mutex_init(&kctx->csf.kcpu_queues.jit_lock);
34873882 INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head);
34883883 INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues);
34893884 #else /* !MALI_USE_CSF */
....@@ -3698,7 +4093,8 @@
36984093 static int kbase_jit_grow(struct kbase_context *kctx,
36994094 const struct base_jit_alloc_info *info,
37004095 struct kbase_va_region *reg,
3701
- struct kbase_sub_alloc **prealloc_sas)
4096
+ struct kbase_sub_alloc **prealloc_sas,
4097
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
37024098 {
37034099 size_t delta;
37044100 size_t pages_required;
....@@ -3728,18 +4124,14 @@
37284124 delta = info->commit_pages - reg->gpu_alloc->nents;
37294125 pages_required = delta;
37304126
3731
-#ifdef CONFIG_MALI_2MB_ALLOC
3732
- if (pages_required >= (SZ_2M / SZ_4K)) {
4127
+ if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
37334128 pool = &kctx->mem_pools.large[kctx->jit_group_id];
37344129 /* Round up to number of 2 MB pages required */
37354130 pages_required += ((SZ_2M / SZ_4K) - 1);
37364131 pages_required /= (SZ_2M / SZ_4K);
37374132 } else {
3738
-#endif
37394133 pool = &kctx->mem_pools.small[kctx->jit_group_id];
3740
-#ifdef CONFIG_MALI_2MB_ALLOC
37414134 }
3742
-#endif
37434135
37444136 if (reg->cpu_alloc != reg->gpu_alloc)
37454137 pages_required *= 2;
....@@ -3760,7 +4152,7 @@
37604152 spin_unlock(&kctx->mem_partials_lock);
37614153
37624154 kbase_gpu_vm_unlock(kctx);
3763
- ret = kbase_mem_pool_grow(pool, pool_delta);
4155
+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
37644156 kbase_gpu_vm_lock(kctx);
37654157
37664158 if (ret)
....@@ -3795,7 +4187,7 @@
37954187 spin_unlock(&kctx->mem_partials_lock);
37964188
37974189 ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
3798
- old_size);
4190
+ old_size, mmu_sync_info);
37994191 /*
38004192 * The grow failed so put the allocation back in the
38014193 * pool and return failure.
....@@ -3920,11 +4312,11 @@
39204312 const struct base_jit_alloc_info *info,
39214313 bool ignore_pressure_limit)
39224314 {
3923
-#if MALI_USE_CSF
3924
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
3925
-#else
4315
+#if !MALI_USE_CSF
39264316 lockdep_assert_held(&kctx->jctx.lock);
3927
-#endif
4317
+#else /* MALI_USE_CSF */
4318
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
4319
+#endif /* !MALI_USE_CSF */
39284320
39294321 #if MALI_JIT_PRESSURE_LIMIT_BASE
39304322 if (!ignore_pressure_limit &&
....@@ -4010,23 +4402,28 @@
40104402 struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
40114403 int i;
40124404
4013
-#if MALI_USE_CSF
4014
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
4015
-#else
4405
+ /* Calls to this function are inherently synchronous, with respect to
4406
+ * MMU operations.
4407
+ */
4408
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
4409
+
4410
+#if !MALI_USE_CSF
40164411 lockdep_assert_held(&kctx->jctx.lock);
4017
-#endif
4412
+#else /* MALI_USE_CSF */
4413
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
4414
+#endif /* !MALI_USE_CSF */
40184415
40194416 if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
40204417 return NULL;
40214418
4022
-#ifdef CONFIG_MALI_2MB_ALLOC
4023
- /* Preallocate memory for the sub-allocation structs */
4024
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
4025
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
4026
- if (!prealloc_sas[i])
4027
- goto end;
4419
+ if (kctx->kbdev->pagesize_2mb) {
4420
+ /* Preallocate memory for the sub-allocation structs */
4421
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
4422
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
4423
+ if (!prealloc_sas[i])
4424
+ goto end;
4425
+ }
40284426 }
4029
-#endif
40304427
40314428 kbase_gpu_vm_lock(kctx);
40324429 mutex_lock(&kctx->jit_evict_lock);
....@@ -4102,7 +4499,8 @@
41024499 * so any state protected by that lock might need to be
41034500 * re-evaluated if more code is added here in future.
41044501 */
4105
- ret = kbase_jit_grow(kctx, info, reg, prealloc_sas);
4502
+ ret = kbase_jit_grow(kctx, info, reg, prealloc_sas,
4503
+ mmu_sync_info);
41064504
41074505 #if MALI_JIT_PRESSURE_LIMIT_BASE
41084506 if (!ignore_pressure_limit)
....@@ -4114,7 +4512,7 @@
41144512 if (ret < 0) {
41154513 /*
41164514 * An update to an allocation from the pool failed,
4117
- * chances are slim a new allocation would fair any
4515
+ * chances are slim a new allocation would fare any
41184516 * better so return the allocation to the pool and
41194517 * return the function with failure.
41204518 */
....@@ -4136,6 +4534,17 @@
41364534 mutex_unlock(&kctx->jit_evict_lock);
41374535 reg = NULL;
41384536 goto end;
4537
+ } else {
4538
+ /* A suitable JIT allocation existed on the evict list, so we need
4539
+ * to make sure that the NOT_MOVABLE property is cleared.
4540
+ */
4541
+ if (kbase_page_migration_enabled) {
4542
+ kbase_gpu_vm_lock(kctx);
4543
+ mutex_lock(&kctx->jit_evict_lock);
4544
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
4545
+ mutex_unlock(&kctx->jit_evict_lock);
4546
+ kbase_gpu_vm_unlock(kctx);
4547
+ }
41394548 }
41404549 } else {
41414550 /* No suitable JIT allocation was found so create a new one */
....@@ -4150,7 +4559,7 @@
41504559 flags |= BASE_MEM_TILER_ALIGN_TOP;
41514560 #endif /* !MALI_USE_CSF */
41524561
4153
- flags |= base_mem_group_id_set(kctx->jit_group_id);
4562
+ flags |= kbase_mem_group_id_set(kctx->jit_group_id);
41544563 #if MALI_JIT_PRESSURE_LIMIT_BASE
41554564 if (!ignore_pressure_limit) {
41564565 flags |= BASEP_MEM_PERFORM_JIT_TRIM;
....@@ -4165,8 +4574,8 @@
41654574 mutex_unlock(&kctx->jit_evict_lock);
41664575 kbase_gpu_vm_unlock(kctx);
41674576
4168
- reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
4169
- info->extension, &flags, &gpu_addr);
4577
+ reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension,
4578
+ &flags, &gpu_addr, mmu_sync_info);
41704579 if (!reg) {
41714580 /* Most likely not enough GPU virtual space left for
41724581 * the new JIT allocation.
....@@ -4192,6 +4601,29 @@
41924601 }
41934602 }
41944603
4604
+ /* Similarly to tiler heap init, there is a short window of time
4605
+ * where the (either recycled or newly allocated, in our case) region has
4606
+ * "no user free" count incremented but is still missing the DONT_NEED flag, and
4607
+ * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
4608
+ * allocation is the least bad option that doesn't lead to a security issue down the
4609
+ * line (it will eventually be cleaned up during context termination).
4610
+ *
4611
+ * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region
4612
+ * flags.
4613
+ */
4614
+ kbase_gpu_vm_lock(kctx);
4615
+ if (unlikely(atomic_read(&reg->no_user_free_count) > 1)) {
4616
+ kbase_gpu_vm_unlock(kctx);
4617
+ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");
4618
+
4619
+ mutex_lock(&kctx->jit_evict_lock);
4620
+ list_move(&reg->jit_node, &kctx->jit_pool_head);
4621
+ mutex_unlock(&kctx->jit_evict_lock);
4622
+
4623
+ reg = NULL;
4624
+ goto end;
4625
+ }
4626
+
41954627 trace_mali_jit_alloc(reg, info->id);
41964628
41974629 kctx->jit_current_allocations++;
....@@ -4209,6 +4641,7 @@
42094641 kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
42104642 KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
42114643 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4644
+ kbase_gpu_vm_unlock(kctx);
42124645
42134646 end:
42144647 for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
....@@ -4220,6 +4653,12 @@
42204653 void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
42214654 {
42224655 u64 old_pages;
4656
+
4657
+#if !MALI_USE_CSF
4658
+ lockdep_assert_held(&kctx->jctx.lock);
4659
+#else /* MALI_USE_CSF */
4660
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
4661
+#endif /* !MALI_USE_CSF */
42234662
42244663 /* JIT id not immediately available here, so use 0u */
42254664 trace_mali_jit_free(reg, 0u);
....@@ -4273,6 +4712,12 @@
42734712
42744713 list_move(&reg->jit_node, &kctx->jit_pool_head);
42754714
4715
+ /* Inactive JIT regions should be freed by the shrinker and not impacted
4716
+ * by page migration. Once freed, they will enter into the page migration
4717
+ * state machine via the mempools.
4718
+ */
4719
+ if (kbase_page_migration_enabled)
4720
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
42764721 mutex_unlock(&kctx->jit_evict_lock);
42774722 }
42784723
....@@ -4319,7 +4764,14 @@
43194764 mutex_unlock(&kctx->jit_evict_lock);
43204765
43214766 if (reg) {
4322
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
4767
+ /*
4768
+ * Incrementing the refcount is prevented on JIT regions.
4769
+ * If/when this ever changes we would need to compensate
4770
+ * by implementing "free on putting the last reference",
4771
+ * but only for JIT regions.
4772
+ */
4773
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
4774
+ kbase_va_region_no_user_free_dec(reg);
43234775 kbase_mem_free_region(kctx, reg);
43244776 }
43254777
....@@ -4341,7 +4793,14 @@
43414793 list_del(&walker->jit_node);
43424794 list_del_init(&walker->gpu_alloc->evict_node);
43434795 mutex_unlock(&kctx->jit_evict_lock);
4344
- walker->flags &= ~KBASE_REG_NO_USER_FREE;
4796
+ /*
4797
+ * Incrementing the refcount is prevented on JIT regions.
4798
+ * If/when this ever changes we would need to compensate
4799
+ * by implementing "free on putting the last reference",
4800
+ * but only for JIT regions.
4801
+ */
4802
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
4803
+ kbase_va_region_no_user_free_dec(walker);
43454804 kbase_mem_free_region(kctx, walker);
43464805 mutex_lock(&kctx->jit_evict_lock);
43474806 }
....@@ -4353,7 +4812,14 @@
43534812 list_del(&walker->jit_node);
43544813 list_del_init(&walker->gpu_alloc->evict_node);
43554814 mutex_unlock(&kctx->jit_evict_lock);
4356
- walker->flags &= ~KBASE_REG_NO_USER_FREE;
4815
+ /*
4816
+ * Incrementing the refcount is prevented on JIT regions.
4817
+ * If/when this ever changes we would need to compensate
4818
+ * by implementing "free on putting the last reference",
4819
+ * but only for JIT regions.
4820
+ */
4821
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
4822
+ kbase_va_region_no_user_free_dec(walker);
43574823 kbase_mem_free_region(kctx, walker);
43584824 mutex_lock(&kctx->jit_evict_lock);
43594825 }
....@@ -4396,8 +4862,8 @@
43964862
43974863 addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset;
43984864
4399
- ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
4400
- &mapping);
4865
+ ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
4866
+ KBASE_REG_CPU_RD, &mapping);
44014867 if (!ptr) {
44024868 dev_warn(kctx->kbdev->dev,
44034869 "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n",
....@@ -4455,17 +4921,44 @@
44554921 }
44564922 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
44574923
4924
+void kbase_unpin_user_buf_page(struct page *page)
4925
+{
4926
+#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
4927
+ put_page(page);
4928
+#else
4929
+ unpin_user_page(page);
4930
+#endif
4931
+}
4932
+
44584933 #if MALI_USE_CSF
44594934 static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
44604935 {
4461
- if (alloc->nents) {
4936
+ /* In CSF builds, we keep pages pinned until the last reference is
4937
+ * released on the alloc. A refcount of 0 also means we can be sure
4938
+ * that all CPU mappings have been closed on this alloc, and no more
4939
+ * mappings of it will be created.
4940
+ *
4941
+ * Further, the WARN() below captures the restriction that this
4942
+ * function will not handle anything other than the alloc termination
4943
+ * path, because the caller of kbase_mem_phy_alloc_put() is not
4944
+ * required to hold the kctx's reg_lock, and so we could not handle
4945
+ * removing an existing CPU mapping here.
4946
+ *
4947
+ * Refer to this function's kernel-doc comments for alternatives for
4948
+ * unpinning a User buffer.
4949
+ */
4950
+
4951
+ if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0,
4952
+ "must only be called on terminating an allocation")) {
44624953 struct page **pages = alloc->imported.user_buf.pages;
44634954 long i;
44644955
44654956 WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages);
44664957
44674958 for (i = 0; i < alloc->nents; i++)
4468
- put_page(pages[i]);
4959
+ kbase_unpin_user_buf_page(pages[i]);
4960
+
4961
+ alloc->nents = 0;
44694962 }
44704963 }
44714964 #endif
....@@ -4479,6 +4972,9 @@
44794972 struct mm_struct *mm = alloc->imported.user_buf.mm;
44804973 long pinned_pages;
44814974 long i;
4975
+ int write;
4976
+
4977
+ lockdep_assert_held(&kctx->reg_lock);
44824978
44834979 if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF))
44844980 return -EINVAL;
....@@ -4493,44 +4989,28 @@
44934989 if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm))
44944990 return -EINVAL;
44954991
4496
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
4497
- pinned_pages = get_user_pages(NULL, mm,
4498
- address,
4499
- alloc->imported.user_buf.nr_pages,
4500
-#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
4501
-KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
4502
- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
4503
- pages, NULL);
4992
+ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
4993
+
4994
+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
4995
+ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
4996
+ write ? FOLL_WRITE : 0, pages, NULL);
4997
+#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
4998
+ pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
4999
+ write ? FOLL_WRITE : 0, pages, NULL, NULL);
45045000 #else
4505
- reg->flags & KBASE_REG_GPU_WR,
4506
- 0, pages, NULL);
4507
-#endif
4508
-#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
4509
- pinned_pages = get_user_pages_remote(NULL, mm,
4510
- address,
4511
- alloc->imported.user_buf.nr_pages,
4512
- reg->flags & KBASE_REG_GPU_WR,
4513
- 0, pages, NULL);
4514
-#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
4515
- pinned_pages = get_user_pages_remote(NULL, mm,
4516
- address,
4517
- alloc->imported.user_buf.nr_pages,
4518
- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
4519
- pages, NULL);
4520
-#else
4521
- pinned_pages = get_user_pages_remote(NULL, mm,
4522
- address,
4523
- alloc->imported.user_buf.nr_pages,
4524
- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
4525
- pages, NULL, NULL);
5001
+ pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages,
5002
+ write ? FOLL_WRITE : 0, pages, NULL, NULL);
45265003 #endif
45275004
45285005 if (pinned_pages <= 0)
45295006 return pinned_pages;
45305007
45315008 if (pinned_pages != alloc->imported.user_buf.nr_pages) {
5009
+ /* Above code already ensures there will not have been a CPU
5010
+ * mapping by ensuring alloc->nents is 0
5011
+ */
45325012 for (i = 0; i < pinned_pages; i++)
4533
- put_page(pages[i]);
5013
+ kbase_unpin_user_buf_page(pages[i]);
45345014 return -ENOMEM;
45355015 }
45365016
....@@ -4542,46 +5022,64 @@
45425022 static int kbase_jd_user_buf_map(struct kbase_context *kctx,
45435023 struct kbase_va_region *reg)
45445024 {
4545
- long pinned_pages;
5025
+ int err;
5026
+ long pinned_pages = 0;
45465027 struct kbase_mem_phy_alloc *alloc;
45475028 struct page **pages;
45485029 struct tagged_addr *pa;
4549
- long i;
4550
- unsigned long address;
5030
+ long i, dma_mapped_pages;
45515031 struct device *dev;
4552
- unsigned long offset;
4553
- unsigned long local_size;
45545032 unsigned long gwt_mask = ~0;
4555
- int err = kbase_jd_user_buf_pin_pages(kctx, reg);
5033
+ /* Calls to this function are inherently asynchronous, with respect to
5034
+ * MMU operations.
5035
+ */
5036
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
5037
+
5038
+ lockdep_assert_held(&kctx->reg_lock);
5039
+
5040
+ err = kbase_jd_user_buf_pin_pages(kctx, reg);
45565041
45575042 if (err)
45585043 return err;
45595044
45605045 alloc = reg->gpu_alloc;
45615046 pa = kbase_get_gpu_phy_pages(reg);
4562
- address = alloc->imported.user_buf.address;
45635047 pinned_pages = alloc->nents;
45645048 pages = alloc->imported.user_buf.pages;
45655049 dev = kctx->kbdev->dev;
4566
- offset = address & ~PAGE_MASK;
4567
- local_size = alloc->imported.user_buf.size;
45685050
5051
+ /* Manual CPU cache synchronization.
5052
+ *
5053
+ * The driver disables automatic CPU cache synchronization because the
5054
+ * memory pages that enclose the imported region may also contain
5055
+ * sub-regions which are not imported and that are allocated and used
5056
+ * by the user process. This may be the case of memory at the beginning
5057
+ * of the first page and at the end of the last page. Automatic CPU cache
5058
+ * synchronization would force some operations on those memory allocations,
5059
+ * unbeknown to the user process: in particular, a CPU cache invalidate
5060
+ * upon unmapping would destroy the content of dirty CPU caches and cause
5061
+ * the user process to lose CPU writes to the non-imported sub-regions.
5062
+ *
5063
+ * When the GPU claims ownership of the imported memory buffer, it shall
5064
+ * commit CPU writes for the whole of all pages that enclose the imported
5065
+ * region, otherwise the initial content of memory would be wrong.
5066
+ */
45695067 for (i = 0; i < pinned_pages; i++) {
45705068 dma_addr_t dma_addr;
4571
- unsigned long min;
4572
-
4573
- min = MIN(PAGE_SIZE - offset, local_size);
4574
- dma_addr = dma_map_page(dev, pages[i],
4575
- offset, min,
4576
- DMA_BIDIRECTIONAL);
4577
- if (dma_mapping_error(dev, dma_addr))
5069
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
5070
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
5071
+#else
5072
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
5073
+ DMA_ATTR_SKIP_CPU_SYNC);
5074
+#endif
5075
+ err = dma_mapping_error(dev, dma_addr);
5076
+ if (err)
45785077 goto unwind;
45795078
45805079 alloc->imported.user_buf.dma_addrs[i] = dma_addr;
45815080 pa[i] = as_tagged(page_to_phys(pages[i]));
45825081
4583
- local_size -= min;
4584
- offset = 0;
5082
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
45855083 }
45865084
45875085 #ifdef CONFIG_MALI_CINSTR_GWT
....@@ -4589,24 +5087,46 @@
45895087 gwt_mask = ~KBASE_REG_GPU_WR;
45905088 #endif
45915089
4592
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
4593
- pa, kbase_reg_current_backed_size(reg),
4594
- reg->flags & gwt_mask, kctx->as_nr,
4595
- alloc->group_id);
5090
+ err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
5091
+ kbase_reg_current_backed_size(reg),
5092
+ reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
5093
+ mmu_sync_info, NULL);
45965094 if (err == 0)
45975095 return 0;
45985096
45995097 /* fall down */
46005098 unwind:
46015099 alloc->nents = 0;
4602
- while (i--) {
4603
- dma_unmap_page(kctx->kbdev->dev,
4604
- alloc->imported.user_buf.dma_addrs[i],
4605
- PAGE_SIZE, DMA_BIDIRECTIONAL);
5100
+ dma_mapped_pages = i;
5101
+ /* Run the unmap loop in the same order as map loop, and perform again
5102
+ * CPU cache synchronization to re-write the content of dirty CPU caches
5103
+ * to memory. This is precautionary measure in case a GPU job has taken
5104
+ * advantage of a partially GPU-mapped range to write and corrupt the
5105
+ * content of memory, either inside or outside the imported region.
5106
+ *
5107
+ * Notice that this error recovery path doesn't try to be optimal and just
5108
+ * flushes the entire page range.
5109
+ */
5110
+ for (i = 0; i < dma_mapped_pages; i++) {
5111
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
5112
+
5113
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
5114
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
5115
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
5116
+#else
5117
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
5118
+ DMA_ATTR_SKIP_CPU_SYNC);
5119
+#endif
46065120 }
46075121
4608
- while (++i < pinned_pages) {
4609
- put_page(pages[i]);
5122
+ /* The user buffer could already have been previously pinned before
5123
+ * entering this function, and hence there could potentially be CPU
5124
+ * mappings of it
5125
+ */
5126
+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages);
5127
+
5128
+ for (i = 0; i < pinned_pages; i++) {
5129
+ kbase_unpin_user_buf_page(pages[i]);
46105130 pages[i] = NULL;
46115131 }
46125132
....@@ -4617,30 +5137,118 @@
46175137 * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
46185138 * have a corresponding call to kbase_jd_user_buf_unpin_pages().
46195139 */
4620
-static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
4621
- struct kbase_mem_phy_alloc *alloc, bool writeable)
5140
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
5141
+ struct kbase_va_region *reg, bool writeable)
46225142 {
46235143 long i;
46245144 struct page **pages;
4625
- unsigned long size = alloc->imported.user_buf.size;
5145
+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
5146
+ unsigned long remaining_size = alloc->imported.user_buf.size;
5147
+
5148
+ lockdep_assert_held(&kctx->reg_lock);
46265149
46275150 KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
46285151 pages = alloc->imported.user_buf.pages;
5152
+
5153
+#if !MALI_USE_CSF
5154
+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents);
5155
+#else
5156
+ CSTD_UNUSED(reg);
5157
+#endif
5158
+
46295159 for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
4630
- unsigned long local_size;
5160
+ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
5161
+ /* Notice: this is a temporary variable that is used for DMA sync
5162
+ * operations, and that could be incremented by an offset if the
5163
+ * current page contains both imported and non-imported memory
5164
+ * sub-regions.
5165
+ *
5166
+ * It is valid to add an offset to this value, because the offset
5167
+ * is always kept within the physically contiguous dma-mapped range
5168
+ * and there's no need to translate to physical address to offset it.
5169
+ *
5170
+ * This variable is not going to be used for the actual DMA unmap
5171
+ * operation, that shall always use the original DMA address of the
5172
+ * whole memory page.
5173
+ */
46315174 dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
46325175
4633
- local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
4634
- dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
4635
- DMA_BIDIRECTIONAL);
5176
+ /* Manual CPU cache synchronization.
5177
+ *
5178
+ * When the GPU returns ownership of the buffer to the CPU, the driver
5179
+ * needs to treat imported and non-imported memory differently.
5180
+ *
5181
+ * The first case to consider is non-imported sub-regions at the
5182
+ * beginning of the first page and at the end of last page. For these
5183
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
5184
+ * in order to keep the last CPU write.
5185
+ *
5186
+ * Imported region prefers the opposite treatment: this memory has been
5187
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
5188
+ * committed to memory, while CPU cache shall be invalidated to make
5189
+ * sure that CPU reads the correct memory content.
5190
+ *
5191
+ * The following diagram shows the expect value of the variables
5192
+ * used in this loop in the corner case of an imported region encloed
5193
+ * by a single memory page:
5194
+ *
5195
+ * page boundary ->|---------- | <- dma_addr (initial value)
5196
+ * | |
5197
+ * | - - - - - | <- offset_within_page
5198
+ * |XXXXXXXXXXX|\
5199
+ * |XXXXXXXXXXX| \
5200
+ * |XXXXXXXXXXX| }- imported_size
5201
+ * |XXXXXXXXXXX| /
5202
+ * |XXXXXXXXXXX|/
5203
+ * | - - - - - | <- offset_within_page + imported_size
5204
+ * | |\
5205
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
5206
+ * | |/
5207
+ * page boundary ->|-----------|
5208
+ *
5209
+ * If the imported region is enclosed by more than one page, then
5210
+ * offset_within_page = 0 for any page after the first.
5211
+ */
5212
+
5213
+ /* Only for first page: handle non-imported range at the beginning. */
5214
+ if (offset_within_page > 0) {
5215
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
5216
+ DMA_BIDIRECTIONAL);
5217
+ dma_addr += offset_within_page;
5218
+ }
5219
+
5220
+ /* For every page: handle imported range. */
5221
+ if (imported_size > 0)
5222
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
5223
+ DMA_BIDIRECTIONAL);
5224
+
5225
+ /* Only for last page (that may coincide with first page):
5226
+ * handle non-imported range at the end.
5227
+ */
5228
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
5229
+ dma_addr += imported_size;
5230
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
5231
+ PAGE_SIZE - imported_size - offset_within_page,
5232
+ DMA_BIDIRECTIONAL);
5233
+ }
5234
+
5235
+ /* Notice: use the original DMA address to unmap the whole memory page. */
5236
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
5237
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
5238
+ DMA_BIDIRECTIONAL);
5239
+#else
5240
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
5241
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
5242
+#endif
46365243 if (writeable)
46375244 set_page_dirty_lock(pages[i]);
46385245 #if !MALI_USE_CSF
4639
- put_page(pages[i]);
5246
+ kbase_unpin_user_buf_page(pages[i]);
46405247 pages[i] = NULL;
46415248 #endif
46425249
4643
- size -= local_size;
5250
+ remaining_size -= imported_size;
5251
+ offset_within_page = 0;
46445252 }
46455253 #if !MALI_USE_CSF
46465254 alloc->nents = 0;
....@@ -4687,11 +5295,11 @@
46875295 return 0;
46885296 }
46895297
4690
-struct kbase_mem_phy_alloc *kbase_map_external_resource(
4691
- struct kbase_context *kctx, struct kbase_va_region *reg,
4692
- struct mm_struct *locked_mm)
5298
+int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg,
5299
+ struct mm_struct *locked_mm)
46935300 {
4694
- int err;
5301
+ int err = 0;
5302
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
46955303
46965304 lockdep_assert_held(&kctx->reg_lock);
46975305
....@@ -4700,7 +5308,7 @@
47005308 case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
47015309 if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) &&
47025310 (!reg->gpu_alloc->nents))
4703
- goto exit;
5311
+ return -EINVAL;
47045312
47055313 reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
47065314 if (reg->gpu_alloc->imported.user_buf
....@@ -4708,7 +5316,7 @@
47085316 err = kbase_jd_user_buf_map(kctx, reg);
47095317 if (err) {
47105318 reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
4711
- goto exit;
5319
+ return err;
47125320 }
47135321 }
47145322 }
....@@ -4716,21 +5324,30 @@
47165324 case KBASE_MEM_TYPE_IMPORTED_UMM: {
47175325 err = kbase_mem_umm_map(kctx, reg);
47185326 if (err)
4719
- goto exit;
5327
+ return err;
47205328 break;
47215329 }
47225330 default:
4723
- goto exit;
5331
+ dev_dbg(kctx->kbdev->dev,
5332
+ "Invalid external resource GPU allocation type (%x) on mapping",
5333
+ alloc->type);
5334
+ return -EINVAL;
47245335 }
47255336
4726
- return kbase_mem_phy_alloc_get(reg->gpu_alloc);
4727
-exit:
4728
- return NULL;
5337
+ kbase_va_region_alloc_get(kctx, reg);
5338
+ kbase_mem_phy_alloc_get(alloc);
5339
+ return err;
47295340 }
47305341
4731
-void kbase_unmap_external_resource(struct kbase_context *kctx,
4732
- struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
5342
+void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg)
47335343 {
5344
+ /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the
5345
+ * unmapping operation.
5346
+ */
5347
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
5348
+
5349
+ lockdep_assert_held(&kctx->reg_lock);
5350
+
47345351 switch (alloc->type) {
47355352 case KBASE_MEM_TYPE_IMPORTED_UMM: {
47365353 kbase_mem_umm_unmap(kctx, reg, alloc);
....@@ -4742,26 +5359,33 @@
47425359 if (alloc->imported.user_buf.current_mapping_usage_count == 0) {
47435360 bool writeable = true;
47445361
4745
- if (!kbase_is_region_invalid_or_free(reg) &&
4746
- reg->gpu_alloc == alloc)
4747
- kbase_mmu_teardown_pages(
4748
- kctx->kbdev,
4749
- &kctx->mmu,
4750
- reg->start_pfn,
4751
- kbase_reg_current_backed_size(reg),
4752
- kctx->as_nr);
5362
+ if (!kbase_is_region_invalid_or_free(reg)) {
5363
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
5364
+ alloc->pages,
5365
+ kbase_reg_current_backed_size(reg),
5366
+ kbase_reg_current_backed_size(reg),
5367
+ kctx->as_nr, true);
5368
+ }
47535369
4754
- if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
5370
+ if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
47555371 writeable = false;
47565372
4757
- kbase_jd_user_buf_unmap(kctx, alloc, writeable);
5373
+ kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable);
47585374 }
4759
- }
5375
+ }
47605376 break;
47615377 default:
4762
- break;
5378
+ WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping",
5379
+ alloc->type);
5380
+ return;
47635381 }
47645382 kbase_mem_phy_alloc_put(alloc);
5383
+ kbase_va_region_alloc_put(kctx, reg);
5384
+}
5385
+
5386
+static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg)
5387
+{
5388
+ return reg->start_pfn << PAGE_SHIFT;
47655389 }
47665390
47675391 struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
....@@ -4777,7 +5401,7 @@
47775401 * metadata which matches the region which is being acquired.
47785402 */
47795403 list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
4780
- if (walker->gpu_addr == gpu_addr) {
5404
+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) {
47815405 meta = walker;
47825406 meta->ref++;
47835407 break;
....@@ -4789,8 +5413,7 @@
47895413 struct kbase_va_region *reg;
47905414
47915415 /* Find the region */
4792
- reg = kbase_region_tracker_find_region_enclosing_address(
4793
- kctx, gpu_addr);
5416
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
47945417 if (kbase_is_region_invalid_or_free(reg))
47955418 goto failed;
47965419
....@@ -4798,18 +5421,18 @@
47985421 meta = kzalloc(sizeof(*meta), GFP_KERNEL);
47995422 if (!meta)
48005423 goto failed;
4801
-
48025424 /*
48035425 * Fill in the metadata object and acquire a reference
48045426 * for the physical resource.
48055427 */
4806
- meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
4807
- meta->ref = 1;
5428
+ meta->reg = reg;
48085429
4809
- if (!meta->alloc)
5430
+ /* Map the external resource to the GPU allocation of the region
5431
+ * and acquire the reference to the VA region
5432
+ */
5433
+ if (kbase_map_external_resource(kctx, meta->reg, NULL))
48105434 goto fail_map;
4811
-
4812
- meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
5435
+ meta->ref = 1;
48135436
48145437 list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
48155438 }
....@@ -4834,7 +5457,7 @@
48345457 * metadata which matches the region which is being released.
48355458 */
48365459 list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node)
4837
- if (walker->gpu_addr == gpu_addr)
5460
+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr)
48385461 return walker;
48395462
48405463 return NULL;
....@@ -4843,14 +5466,7 @@
48435466 static void release_sticky_resource_meta(struct kbase_context *kctx,
48445467 struct kbase_ctx_ext_res_meta *meta)
48455468 {
4846
- struct kbase_va_region *reg;
4847
-
4848
- /* Drop the physical memory reference and free the metadata. */
4849
- reg = kbase_region_tracker_find_region_enclosing_address(
4850
- kctx,
4851
- meta->gpu_addr);
4852
-
4853
- kbase_unmap_external_resource(kctx, reg, meta->alloc);
5469
+ kbase_unmap_external_resource(kctx, meta->reg);
48545470 list_del(&meta->ext_res_node);
48555471 kfree(meta);
48565472 }