hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -25,6 +25,7 @@
2525
2626 #include <linux/kernel.h>
2727 #include <linux/dma-mapping.h>
28
+#include <linux/migrate.h>
2829 #include <mali_kbase.h>
2930 #include <gpu/mali_kbase_gpu_fault.h>
3031 #include <gpu/mali_kbase_gpu_regmap.h>
....@@ -41,54 +42,265 @@
4142 #include <mmu/mali_kbase_mmu_internal.h>
4243 #include <mali_kbase_cs_experimental.h>
4344 #include <device/mali_kbase_device.h>
45
+#include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
46
+#if !MALI_USE_CSF
47
+#include <mali_kbase_hwaccess_jm.h>
48
+#endif
4449
4550 #include <mali_kbase_trace_gpu_mem.h>
46
-#define KBASE_MMU_PAGE_ENTRIES 512
51
+#include <backend/gpu/mali_kbase_pm_internal.h>
52
+
53
+/* Threshold used to decide whether to flush full caches or just a physical range */
54
+#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20
55
+#define MGM_DEFAULT_PTE_GROUP (0)
56
+
57
+/* Macro to convert updated PDGs to flags indicating levels skip in flush */
58
+#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF)
59
+
60
+/* Small wrapper function to factor out GPU-dependent context releasing */
61
+static void release_ctx(struct kbase_device *kbdev,
62
+ struct kbase_context *kctx)
63
+{
64
+#if MALI_USE_CSF
65
+ CSTD_UNUSED(kbdev);
66
+ kbase_ctx_sched_release_ctx_lock(kctx);
67
+#else /* MALI_USE_CSF */
68
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
69
+#endif /* MALI_USE_CSF */
70
+}
71
+
72
+static void mmu_hw_operation_begin(struct kbase_device *kbdev)
73
+{
74
+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
75
+#if MALI_USE_CSF
76
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) {
77
+ unsigned long flags;
78
+
79
+ lockdep_assert_held(&kbdev->mmu_hw_mutex);
80
+
81
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
82
+ WARN_ON_ONCE(kbdev->mmu_hw_operation_in_progress);
83
+ kbdev->mmu_hw_operation_in_progress = true;
84
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
85
+ }
86
+#endif /* MALI_USE_CSF */
87
+#endif /* !CONFIG_MALI_BIFROST_NO_MALI */
88
+}
89
+
90
+static void mmu_hw_operation_end(struct kbase_device *kbdev)
91
+{
92
+#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
93
+#if MALI_USE_CSF
94
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) {
95
+ unsigned long flags;
96
+
97
+ lockdep_assert_held(&kbdev->mmu_hw_mutex);
98
+
99
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
100
+ WARN_ON_ONCE(!kbdev->mmu_hw_operation_in_progress);
101
+ kbdev->mmu_hw_operation_in_progress = false;
102
+ /* Invoke the PM state machine, the L2 power off may have been
103
+ * skipped due to the MMU command.
104
+ */
105
+ kbase_pm_update_state(kbdev);
106
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
107
+ }
108
+#endif /* MALI_USE_CSF */
109
+#endif /* !CONFIG_MALI_BIFROST_NO_MALI */
110
+}
47111
48112 /**
49
- * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
50
- * @kctx: The KBase context.
51
- * @vpfn: The virtual page frame number to start the flush on.
52
- * @nr: The number of pages to flush.
53
- * @sync: Set if the operation should be synchronous or not.
113
+ * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done
114
+ * through GPU_CONTROL interface.
54115 *
55
- * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
116
+ * @kbdev: kbase device to check GPU model ID on.
56117 *
57
- * If sync is not set then transactions still in flight when the flush is issued
58
- * may use the old page tables and the data they write will not be written out
59
- * to memory, this function returns after the flush has been issued but
60
- * before all accesses which might effect the flushed region have completed.
118
+ * This function returns whether a cache flush for page table update should
119
+ * run through GPU_CONTROL interface or MMU_AS_CONTROL interface.
61120 *
62
- * If sync is set then accesses in the flushed region will be drained
63
- * before data is flush and invalidated through L1, L2 and into memory,
64
- * after which point this function will return.
121
+ * Return: True if cache flush should be done on GPU command.
65122 */
66
-static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
67
- u64 vpfn, size_t nr, bool sync);
123
+static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
124
+{
125
+ uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id &
126
+ GPU_ID2_ARCH_MAJOR) >>
127
+ GPU_ID2_ARCH_MAJOR_SHIFT;
128
+
129
+ return arch_maj_cur > 11;
130
+}
68131
69132 /**
70
- * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
71
- * @kbdev: Device pointer.
72
- * @vpfn: The virtual page frame number to start the flush on.
73
- * @nr: The number of pages to flush.
74
- * @sync: Set if the operation should be synchronous or not.
75
- * @as_nr: GPU address space number for which flush + invalidate is required.
133
+ * mmu_flush_pa_range() - Flush physical address range
76134 *
77
- * This is used for MMU tables which do not belong to a user space context.
135
+ * @kbdev: kbase device to issue the MMU operation on.
136
+ * @phys: Starting address of the physical range to start the operation on.
137
+ * @nr_bytes: Number of bytes to work on.
138
+ * @op: Type of cache flush operation to perform.
139
+ *
140
+ * Issue a cache flush physical range command.
78141 */
79
-static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
80
- u64 vpfn, size_t nr, bool sync, int as_nr);
142
+#if MALI_USE_CSF
143
+static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes,
144
+ enum kbase_mmu_op_type op)
145
+{
146
+ u32 flush_op;
147
+
148
+ lockdep_assert_held(&kbdev->hwaccess_lock);
149
+
150
+ /* Translate operation to command */
151
+ if (op == KBASE_MMU_OP_FLUSH_PT)
152
+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2;
153
+ else if (op == KBASE_MMU_OP_FLUSH_MEM)
154
+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
155
+ else {
156
+ dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
157
+ return;
158
+ }
159
+
160
+ if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op))
161
+ dev_err(kbdev->dev, "Flush for physical address range did not complete");
162
+}
163
+#endif
81164
82165 /**
83
- * kbase_mmu_sync_pgd() - sync page directory to memory when needed.
84
- * @kbdev: Device pointer.
85
- * @handle: Address of DMA region.
86
- * @size: Size of the region to sync.
166
+ * mmu_invalidate() - Perform an invalidate operation on MMU caches.
167
+ * @kbdev: The Kbase device.
168
+ * @kctx: The Kbase context.
169
+ * @as_nr: GPU address space number for which invalidate is required.
170
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
171
+ * operation to perform.
87172 *
88
- * This should be called after each page directory update.
173
+ * Perform an MMU invalidate operation on a particual address space
174
+ * by issuing a UNLOCK command.
89175 */
90
-static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
91
- dma_addr_t handle, size_t size)
176
+static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
177
+ const struct kbase_mmu_hw_op_param *op_param)
178
+{
179
+ unsigned long flags;
180
+
181
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
182
+
183
+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
184
+ as_nr = kctx ? kctx->as_nr : as_nr;
185
+ if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param))
186
+ dev_err(kbdev->dev,
187
+ "Invalidate after GPU page table update did not complete");
188
+ }
189
+
190
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
191
+}
192
+
193
+/* Perform a flush/invalidate on a particular address space
194
+ */
195
+static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
196
+ const struct kbase_mmu_hw_op_param *op_param)
197
+{
198
+ unsigned long flags;
199
+
200
+ /* AS transaction begin */
201
+ mutex_lock(&kbdev->mmu_hw_mutex);
202
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
203
+
204
+ if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
205
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
206
+
207
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
208
+ mutex_unlock(&kbdev->mmu_hw_mutex);
209
+ /* AS transaction end */
210
+}
211
+
212
+/**
213
+ * mmu_flush_invalidate() - Perform a flush operation on GPU caches.
214
+ * @kbdev: The Kbase device.
215
+ * @kctx: The Kbase context.
216
+ * @as_nr: GPU address space number for which flush + invalidate is required.
217
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
218
+ * operation to perform.
219
+ *
220
+ * This function performs the cache flush operation described by @op_param.
221
+ * The function retains a reference to the given @kctx and releases it
222
+ * after performing the flush operation.
223
+ *
224
+ * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue
225
+ * a cache flush + invalidate to the L2 caches and invalidate the TLBs.
226
+ *
227
+ * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue
228
+ * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as
229
+ * invalidating the TLBs.
230
+ */
231
+static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
232
+ const struct kbase_mmu_hw_op_param *op_param)
233
+{
234
+ bool ctx_is_in_runpool;
235
+
236
+ /* Early out if there is nothing to do */
237
+ if (op_param->nr == 0)
238
+ return;
239
+
240
+ /* If no context is provided then MMU operation is performed on address
241
+ * space which does not belong to user space context. Otherwise, retain
242
+ * refcount to context provided and release after flush operation.
243
+ */
244
+ if (!kctx) {
245
+ mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param);
246
+ } else {
247
+#if !MALI_USE_CSF
248
+ mutex_lock(&kbdev->js_data.queue_mutex);
249
+ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
250
+ mutex_unlock(&kbdev->js_data.queue_mutex);
251
+#else
252
+ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
253
+#endif /* !MALI_USE_CSF */
254
+
255
+ if (ctx_is_in_runpool) {
256
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
257
+
258
+ mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param);
259
+
260
+ release_ctx(kbdev, kctx);
261
+ }
262
+ }
263
+}
264
+
265
+/**
266
+ * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via
267
+ * the GPU_CONTROL interface
268
+ * @kbdev: The Kbase device.
269
+ * @kctx: The Kbase context.
270
+ * @as_nr: GPU address space number for which flush + invalidate is required.
271
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
272
+ * operation to perform.
273
+ *
274
+ * Perform a flush/invalidate on a particular address space via the GPU_CONTROL
275
+ * interface.
276
+ */
277
+static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
278
+ int as_nr, const struct kbase_mmu_hw_op_param *op_param)
279
+{
280
+ unsigned long flags;
281
+
282
+ /* AS transaction begin */
283
+ mutex_lock(&kbdev->mmu_hw_mutex);
284
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
285
+
286
+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
287
+ as_nr = kctx ? kctx->as_nr : as_nr;
288
+ if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param))
289
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
290
+ }
291
+
292
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
293
+ mutex_unlock(&kbdev->mmu_hw_mutex);
294
+}
295
+
296
+static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx,
297
+ phys_addr_t phys, size_t size,
298
+ enum kbase_mmu_op_type flush_op)
299
+{
300
+ kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op);
301
+}
302
+
303
+static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
92304 {
93305 /* In non-coherent system, ensure the GPU can read
94306 * the pages from memory
....@@ -96,6 +308,34 @@
96308 if (kbdev->system_coherency == COHERENCY_NONE)
97309 dma_sync_single_for_device(kbdev->dev, handle, size,
98310 DMA_TO_DEVICE);
311
+}
312
+
313
+/**
314
+ * kbase_mmu_sync_pgd() - sync page directory to memory when needed.
315
+ * @kbdev: Device pointer.
316
+ * @kctx: Context pointer.
317
+ * @phys: Starting physical address of the destination region.
318
+ * @handle: Address of DMA region.
319
+ * @size: Size of the region to sync.
320
+ * @flush_op: MMU cache flush operation to perform on the physical address
321
+ * range, if GPU control is available.
322
+ *
323
+ * This function is called whenever the association between a virtual address
324
+ * range and a physical address range changes, because a mapping is created or
325
+ * destroyed.
326
+ * One of the effects of this operation is performing an MMU cache flush
327
+ * operation only on the physical address range affected by this function, if
328
+ * GPU control is available.
329
+ *
330
+ * This should be called after each page directory update.
331
+ */
332
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx,
333
+ phys_addr_t phys, dma_addr_t handle, size_t size,
334
+ enum kbase_mmu_op_type flush_op)
335
+{
336
+
337
+ kbase_mmu_sync_pgd_cpu(kbdev, handle, size);
338
+ kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op);
99339 }
100340
101341 /*
....@@ -107,9 +347,154 @@
107347 * a 4kB physical page.
108348 */
109349
110
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
111
- struct tagged_addr *phys, size_t nr,
112
- unsigned long flags, int group_id);
350
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
351
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
352
+ unsigned long flags, int group_id, u64 *dirty_pgds);
353
+
354
+/**
355
+ * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
356
+ * free memory of the page directories
357
+ *
358
+ * @kbdev: Device pointer.
359
+ * @mmut: GPU MMU page table.
360
+ * @pgds: Physical addresses of page directories to be freed.
361
+ * @vpfn: The virtual page frame number.
362
+ * @level: The level of MMU page table.
363
+ * @flush_op: The type of MMU flush operation to perform.
364
+ * @dirty_pgds: Flags to track every level where a PGD has been updated.
365
+ */
366
+static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
367
+ struct kbase_mmu_table *mmut, phys_addr_t *pgds,
368
+ u64 vpfn, int level,
369
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds);
370
+
371
+static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
372
+{
373
+ atomic_sub(1, &kbdev->memdev.used_pages);
374
+
375
+ /* If MMU tables belong to a context then pages will have been accounted
376
+ * against it, so we must decrement the usage counts here.
377
+ */
378
+ if (mmut->kctx) {
379
+ kbase_process_page_usage_dec(mmut->kctx, 1);
380
+ atomic_sub(1, &mmut->kctx->used_pages);
381
+ }
382
+
383
+ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
384
+}
385
+
386
+static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
387
+ struct kbase_mmu_table *mmut,
388
+ struct page *p)
389
+{
390
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
391
+ bool page_is_isolated = false;
392
+
393
+ lockdep_assert_held(&mmut->mmu_lock);
394
+
395
+ if (!kbase_page_migration_enabled)
396
+ return false;
397
+
398
+ spin_lock(&page_md->migrate_lock);
399
+ if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) {
400
+ WARN_ON_ONCE(!mmut->kctx);
401
+ if (IS_PAGE_ISOLATED(page_md->status)) {
402
+ page_md->status = PAGE_STATUS_SET(page_md->status,
403
+ FREE_PT_ISOLATED_IN_PROGRESS);
404
+ page_md->data.free_pt_isolated.kbdev = kbdev;
405
+ page_is_isolated = true;
406
+ } else {
407
+ page_md->status =
408
+ PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
409
+ }
410
+ } else {
411
+ WARN_ON_ONCE(mmut->kctx);
412
+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
413
+ }
414
+ spin_unlock(&page_md->migrate_lock);
415
+
416
+ if (unlikely(page_is_isolated)) {
417
+ /* Do the CPU cache flush and accounting here for the isolated
418
+ * PGD page, which is done inside kbase_mmu_free_pgd() for the
419
+ * PGD page that did not get isolated.
420
+ */
421
+ dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE,
422
+ DMA_BIDIRECTIONAL);
423
+ kbase_mmu_account_freed_pgd(kbdev, mmut);
424
+ }
425
+
426
+ return page_is_isolated;
427
+}
428
+
429
+/**
430
+ * kbase_mmu_free_pgd() - Free memory of the page directory
431
+ *
432
+ * @kbdev: Device pointer.
433
+ * @mmut: GPU MMU page table.
434
+ * @pgd: Physical address of page directory to be freed.
435
+ *
436
+ * This function is supposed to be called with mmu_lock held and after
437
+ * ensuring that GPU won't be able to access the page.
438
+ */
439
+static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
440
+ phys_addr_t pgd)
441
+{
442
+ struct page *p;
443
+ bool page_is_isolated = false;
444
+
445
+ lockdep_assert_held(&mmut->mmu_lock);
446
+
447
+ p = pfn_to_page(PFN_DOWN(pgd));
448
+ page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
449
+
450
+ if (likely(!page_is_isolated)) {
451
+ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
452
+ kbase_mmu_account_freed_pgd(kbdev, mmut);
453
+ }
454
+}
455
+
456
+/**
457
+ * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list
458
+ *
459
+ * @kbdev: Device pointer.
460
+ * @mmut: GPU MMU page table.
461
+ *
462
+ * This function will call kbase_mmu_free_pgd() on each page directory page
463
+ * present in the list of free PGDs inside @mmut.
464
+ *
465
+ * The function is supposed to be called after the GPU cache and MMU TLB has
466
+ * been invalidated post the teardown loop.
467
+ *
468
+ * The mmu_lock shall be held prior to calling the function.
469
+ */
470
+static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
471
+{
472
+ size_t i;
473
+
474
+ lockdep_assert_held(&mmut->mmu_lock);
475
+
476
+ for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
477
+ kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i]));
478
+
479
+ mmut->scratch_mem.free_pgds.head_index = 0;
480
+}
481
+
482
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p)
483
+{
484
+ lockdep_assert_held(&mmut->mmu_lock);
485
+
486
+ if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
487
+ return;
488
+
489
+ mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p;
490
+}
491
+
492
+static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
493
+{
494
+ lockdep_assert_held(&mmut->mmu_lock);
495
+
496
+ mmut->scratch_mem.free_pgds.head_index = 0;
497
+}
113498
114499 /**
115500 * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
....@@ -138,7 +523,7 @@
138523 if (!multiple) {
139524 dev_warn(
140525 kbdev->dev,
141
- "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
526
+ "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW",
142527 ((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
143528 return minimum_extra;
144529 }
....@@ -191,22 +576,70 @@
191576 }
192577
193578 #ifdef CONFIG_MALI_CINSTR_GWT
194
-static void kbase_gpu_mmu_handle_write_faulting_as(
195
- struct kbase_device *kbdev,
196
- struct kbase_as *faulting_as,
197
- u64 start_pfn, size_t nr, u32 op)
579
+static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
580
+ struct kbase_as *faulting_as,
581
+ u64 start_pfn, size_t nr,
582
+ u32 kctx_id, u64 dirty_pgds)
198583 {
584
+ /* Calls to this function are inherently synchronous, with respect to
585
+ * MMU operations.
586
+ */
587
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
588
+ struct kbase_mmu_hw_op_param op_param;
589
+ int ret = 0;
590
+
199591 mutex_lock(&kbdev->mmu_hw_mutex);
200592
201593 kbase_mmu_hw_clear_fault(kbdev, faulting_as,
202594 KBASE_MMU_FAULT_TYPE_PAGE);
203
- kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn,
204
- nr, op, 1);
595
+
596
+ /* flush L2 and unlock the VA (resumes the MMU) */
597
+ op_param.vpfn = start_pfn;
598
+ op_param.nr = nr;
599
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
600
+ op_param.kctx_id = kctx_id;
601
+ op_param.mmu_sync_info = mmu_sync_info;
602
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
603
+ unsigned long irq_flags;
604
+
605
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
606
+ op_param.flush_skip_levels =
607
+ pgd_level_to_skip_flush(dirty_pgds);
608
+ ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
609
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
610
+ } else {
611
+ mmu_hw_operation_begin(kbdev);
612
+ ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
613
+ mmu_hw_operation_end(kbdev);
614
+ }
205615
206616 mutex_unlock(&kbdev->mmu_hw_mutex);
207617
618
+ if (ret)
619
+ dev_err(kbdev->dev,
620
+ "Flush for GPU page fault due to write access did not complete");
621
+
208622 kbase_mmu_hw_enable_fault(kbdev, faulting_as,
209623 KBASE_MMU_FAULT_TYPE_PAGE);
624
+}
625
+
626
+static void set_gwt_element_page_addr_and_size(
627
+ struct kbasep_gwt_list_element *element,
628
+ u64 fault_page_addr, struct tagged_addr fault_phys)
629
+{
630
+ u64 fault_pfn = fault_page_addr >> PAGE_SHIFT;
631
+ unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1);
632
+
633
+ /* If the fault address lies within a 2MB page, then consider
634
+ * the whole 2MB page for dumping to avoid incomplete dumps.
635
+ */
636
+ if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) {
637
+ element->page_addr = fault_page_addr & ~(SZ_2M - 1);
638
+ element->num_pages = NUM_4K_PAGES_IN_2MB_PAGE;
639
+ } else {
640
+ element->page_addr = fault_page_addr;
641
+ element->num_pages = 1;
642
+ }
210643 }
211644
212645 static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
....@@ -215,11 +648,11 @@
215648 struct kbasep_gwt_list_element *pos;
216649 struct kbase_va_region *region;
217650 struct kbase_device *kbdev;
651
+ struct tagged_addr *fault_phys_addr;
218652 struct kbase_fault *fault;
219653 u64 fault_pfn, pfn_offset;
220
- u32 op;
221
- int ret;
222654 int as_no;
655
+ u64 dirty_pgds = 0;
223656
224657 as_no = faulting_as->number;
225658 kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
....@@ -247,15 +680,18 @@
247680 return;
248681 }
249682
683
+ pfn_offset = fault_pfn - region->start_pfn;
684
+ fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset];
685
+
250686 /* Capture addresses of faulting write location
251687 * for job dumping if write tracking is enabled.
252688 */
253689 if (kctx->gwt_enabled) {
254
- u64 page_addr = fault->addr & PAGE_MASK;
690
+ u64 fault_page_addr = fault->addr & PAGE_MASK;
255691 bool found = false;
256692 /* Check if this write was already handled. */
257693 list_for_each_entry(pos, &kctx->gwt_current_list, link) {
258
- if (page_addr == pos->page_addr) {
694
+ if (fault_page_addr == pos->page_addr) {
259695 found = true;
260696 break;
261697 }
....@@ -265,8 +701,8 @@
265701 pos = kmalloc(sizeof(*pos), GFP_KERNEL);
266702 if (pos) {
267703 pos->region = region;
268
- pos->page_addr = page_addr;
269
- pos->num_pages = 1;
704
+ set_gwt_element_page_addr_and_size(pos,
705
+ fault_page_addr, *fault_phys_addr);
270706 list_add(&pos->link, &kctx->gwt_current_list);
271707 } else {
272708 dev_warn(kbdev->dev, "kmalloc failure");
....@@ -274,17 +710,12 @@
274710 }
275711 }
276712
277
- pfn_offset = fault_pfn - region->start_pfn;
278713 /* Now make this faulting page writable to GPU. */
279
- ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
280
- &kbase_get_gpu_phy_pages(region)[pfn_offset],
281
- 1, region->flags, region->gpu_alloc->group_id);
714
+ kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1,
715
+ region->flags, region->gpu_alloc->group_id, &dirty_pgds);
282716
283
- /* flush L2 and unlock the VA (resumes the MMU) */
284
- op = AS_COMMAND_FLUSH_PT;
285
-
286
- kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as,
287
- fault_pfn, 1, op);
717
+ kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
718
+ kctx->id, dirty_pgds);
288719
289720 kbase_gpu_vm_unlock(kctx);
290721 }
....@@ -315,75 +746,32 @@
315746 }
316747 #endif
317748
318
-#define MAX_POOL_LEVEL 2
319
-
320749 /**
321
- * page_fault_try_alloc - Try to allocate memory from a context pool
322
- * @kctx: Context pointer
323
- * @region: Region to grow
324
- * @new_pages: Number of 4 kB pages to allocate
325
- * @pages_to_grow: Pointer to variable to store number of outstanding pages on
326
- * failure. This can be either 4 kB or 2 MB pages, depending on
327
- * the number of pages requested.
328
- * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
329
- * for 2 MB, false for 4 kB.
330
- * @prealloc_sas: Pointer to kbase_sub_alloc structures
750
+ * estimate_pool_space_required - Determine how much a pool should be grown by to support a future
751
+ * allocation
752
+ * @pool: The memory pool to check, including its linked pools
753
+ * @pages_required: Number of 4KiB pages require for the pool to support a future allocation
331754 *
332
- * This function will try to allocate as many pages as possible from the context
333
- * pool, then if required will try to allocate the remaining pages from the
334
- * device pool.
755
+ * The value returned is accounting for the size of @pool and the size of each memory pool linked to
756
+ * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to
757
+ * allocate from.
335758 *
336
- * This function will not allocate any new memory beyond that that is already
337
- * present in the context or device pools. This is because it is intended to be
338
- * called with the vm_lock held, which could cause recursive locking if the
339
- * allocation caused the out-of-memory killer to run.
759
+ * Note: this is only an estimate, because even during the calculation the memory pool(s) involved
760
+ * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an
761
+ * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller
762
+ * should keep attempting an allocation and then re-growing with a new value queried form this
763
+ * function until the allocation succeeds.
340764 *
341
- * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
342
- * a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
343
- *
344
- * Return: true if successful, false on failure
765
+ * Return: an estimate of the amount of extra 4KiB pages in @pool that are required to satisfy an
766
+ * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the
767
+ * allocation.
345768 */
346
-static bool page_fault_try_alloc(struct kbase_context *kctx,
347
- struct kbase_va_region *region, size_t new_pages,
348
- int *pages_to_grow, bool *grow_2mb_pool,
349
- struct kbase_sub_alloc **prealloc_sas)
769
+static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required)
350770 {
351
- struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
352
- struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
353
- size_t pages_alloced[MAX_POOL_LEVEL] = {0};
354
- struct kbase_mem_pool *pool, *root_pool;
355
- int pool_level = 0;
356
- bool alloc_failed = false;
357771 size_t pages_still_required;
358772
359
- if (WARN_ON(region->gpu_alloc->group_id >=
360
- MEMORY_GROUP_MANAGER_NR_GROUPS)) {
361
- /* Do not try to grow the memory pool */
362
- *pages_to_grow = 0;
363
- return false;
364
- }
365
-
366
-#ifdef CONFIG_MALI_2MB_ALLOC
367
- if (new_pages >= (SZ_2M / SZ_4K)) {
368
- root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
369
- *grow_2mb_pool = true;
370
- } else {
371
-#endif
372
- root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
373
- *grow_2mb_pool = false;
374
-#ifdef CONFIG_MALI_2MB_ALLOC
375
- }
376
-#endif
377
-
378
- if (region->gpu_alloc != region->cpu_alloc)
379
- new_pages *= 2;
380
-
381
- pages_still_required = new_pages;
382
-
383
- /* Determine how many pages are in the pools before trying to allocate.
384
- * Don't attempt to allocate & free if the allocation can't succeed.
385
- */
386
- for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
773
+ for (pages_still_required = pages_required; pool != NULL && pages_still_required;
774
+ pool = pool->next_pool) {
387775 size_t pool_size_4k;
388776
389777 kbase_mem_pool_lock(pool);
....@@ -395,10 +783,71 @@
395783 pages_still_required -= pool_size_4k;
396784
397785 kbase_mem_pool_unlock(pool);
398
-
399
- if (!pages_still_required)
400
- break;
401786 }
787
+ return pages_still_required;
788
+}
789
+
790
+/**
791
+ * page_fault_try_alloc - Try to allocate memory from a context pool
792
+ * @kctx: Context pointer
793
+ * @region: Region to grow
794
+ * @new_pages: Number of 4 KiB pages to allocate
795
+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be
796
+ * either 4 KiB or 2 MiB pages, depending on the number of pages requested.
797
+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for
798
+ * 4 KiB.
799
+ * @prealloc_sas: Pointer to kbase_sub_alloc structures
800
+ *
801
+ * This function will try to allocate as many pages as possible from the context pool, then if
802
+ * required will try to allocate the remaining pages from the device pool.
803
+ *
804
+ * This function will not allocate any new memory beyond that is already present in the context or
805
+ * device pools. This is because it is intended to be called whilst the thread has acquired the
806
+ * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is
807
+ * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close().
808
+ *
809
+ * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB
810
+ * pages, otherwise it will be a count of 4 KiB pages.
811
+ *
812
+ * Return: true if successful, false on failure
813
+ */
814
+static bool page_fault_try_alloc(struct kbase_context *kctx,
815
+ struct kbase_va_region *region, size_t new_pages,
816
+ int *pages_to_grow, bool *grow_2mb_pool,
817
+ struct kbase_sub_alloc **prealloc_sas)
818
+{
819
+ size_t total_gpu_pages_alloced = 0;
820
+ size_t total_cpu_pages_alloced = 0;
821
+ struct kbase_mem_pool *pool, *root_pool;
822
+ bool alloc_failed = false;
823
+ size_t pages_still_required;
824
+ size_t total_mempools_free_4k = 0;
825
+
826
+ lockdep_assert_held(&kctx->reg_lock);
827
+ lockdep_assert_held(&kctx->mem_partials_lock);
828
+
829
+ if (WARN_ON(region->gpu_alloc->group_id >=
830
+ MEMORY_GROUP_MANAGER_NR_GROUPS)) {
831
+ /* Do not try to grow the memory pool */
832
+ *pages_to_grow = 0;
833
+ return false;
834
+ }
835
+
836
+ if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) {
837
+ root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
838
+ *grow_2mb_pool = true;
839
+ } else {
840
+ root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
841
+ *grow_2mb_pool = false;
842
+ }
843
+
844
+ if (region->gpu_alloc != region->cpu_alloc)
845
+ new_pages *= 2;
846
+
847
+ /* Determine how many pages are in the pools before trying to allocate.
848
+ * Don't attempt to allocate & free if the allocation can't succeed.
849
+ */
850
+ pages_still_required = estimate_pool_space_required(root_pool, new_pages);
402851
403852 if (pages_still_required) {
404853 /* Insufficient pages in pools. Don't try to allocate - just
....@@ -409,11 +858,11 @@
409858 return false;
410859 }
411860
412
- /* Since we've dropped the pool locks, the amount of memory in the pools
413
- * may change between the above check and the actual allocation.
861
+ /* Since we're not holding any of the mempool locks, the amount of memory in the pools may
862
+ * change between the above estimate and the actual allocation.
414863 */
415
- pool = root_pool;
416
- for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
864
+ pages_still_required = new_pages;
865
+ for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) {
417866 size_t pool_size_4k;
418867 size_t pages_to_alloc_4k;
419868 size_t pages_to_alloc_4k_per_alloc;
....@@ -422,93 +871,91 @@
422871
423872 /* Allocate as much as possible from this pool*/
424873 pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
425
- pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
874
+ total_mempools_free_4k += pool_size_4k;
875
+ pages_to_alloc_4k = MIN(pages_still_required, pool_size_4k);
426876 if (region->gpu_alloc == region->cpu_alloc)
427877 pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
428878 else
429879 pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
430880
431
- pages_alloced[pool_level] = pages_to_alloc_4k;
432881 if (pages_to_alloc_4k) {
433
- gpu_pages[pool_level] =
434
- kbase_alloc_phy_pages_helper_locked(
435
- region->gpu_alloc, pool,
436
- pages_to_alloc_4k_per_alloc,
437
- &prealloc_sas[0]);
882
+ struct tagged_addr *gpu_pages =
883
+ kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool,
884
+ pages_to_alloc_4k_per_alloc,
885
+ &prealloc_sas[0]);
438886
439
- if (!gpu_pages[pool_level]) {
887
+ if (!gpu_pages)
440888 alloc_failed = true;
441
- } else if (region->gpu_alloc != region->cpu_alloc) {
442
- cpu_pages[pool_level] =
443
- kbase_alloc_phy_pages_helper_locked(
444
- region->cpu_alloc, pool,
445
- pages_to_alloc_4k_per_alloc,
446
- &prealloc_sas[1]);
889
+ else
890
+ total_gpu_pages_alloced += pages_to_alloc_4k_per_alloc;
447891
448
- if (!cpu_pages[pool_level])
892
+ if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) {
893
+ struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked(
894
+ region->cpu_alloc, pool, pages_to_alloc_4k_per_alloc,
895
+ &prealloc_sas[1]);
896
+
897
+ if (!cpu_pages)
449898 alloc_failed = true;
899
+ else
900
+ total_cpu_pages_alloced += pages_to_alloc_4k_per_alloc;
450901 }
451902 }
452903
453904 kbase_mem_pool_unlock(pool);
454905
455906 if (alloc_failed) {
456
- WARN_ON(!new_pages);
457
- WARN_ON(pages_to_alloc_4k >= new_pages);
458
- WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
907
+ WARN_ON(!pages_still_required);
908
+ WARN_ON(pages_to_alloc_4k >= pages_still_required);
909
+ WARN_ON(pages_to_alloc_4k_per_alloc >= pages_still_required);
459910 break;
460911 }
461912
462
- new_pages -= pages_to_alloc_4k;
463
-
464
- if (!new_pages)
465
- break;
466
-
467
- pool = pool->next_pool;
468
- if (!pool)
469
- break;
913
+ pages_still_required -= pages_to_alloc_4k;
470914 }
471915
472
- if (new_pages) {
473
- /* Allocation was unsuccessful */
474
- int max_pool_level = pool_level;
475
-
476
- pool = root_pool;
477
-
478
- /* Free memory allocated so far */
479
- for (pool_level = 0; pool_level <= max_pool_level;
480
- pool_level++) {
481
- kbase_mem_pool_lock(pool);
482
-
483
- if (region->gpu_alloc != region->cpu_alloc) {
484
- if (pages_alloced[pool_level] &&
485
- cpu_pages[pool_level])
486
- kbase_free_phy_pages_helper_locked(
487
- region->cpu_alloc,
488
- pool, cpu_pages[pool_level],
489
- pages_alloced[pool_level]);
490
- }
491
-
492
- if (pages_alloced[pool_level] && gpu_pages[pool_level])
493
- kbase_free_phy_pages_helper_locked(
494
- region->gpu_alloc,
495
- pool, gpu_pages[pool_level],
496
- pages_alloced[pool_level]);
497
-
498
- kbase_mem_pool_unlock(pool);
499
-
500
- pool = pool->next_pool;
501
- }
502
-
503
- /*
504
- * If the allocation failed despite there being enough memory in
505
- * the pool, then just fail. Otherwise, try to grow the memory
506
- * pool.
916
+ if (pages_still_required) {
917
+ /* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation,
918
+ * so must in any case use kbase_free_phy_pages_helper() rather than
919
+ * kbase_free_phy_pages_helper_locked()
507920 */
508
- if (alloc_failed)
921
+ if (total_gpu_pages_alloced > 0)
922
+ kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced);
923
+ if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0)
924
+ kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced);
925
+
926
+ if (alloc_failed) {
927
+ /* Note that in allocating from the above memory pools, we always ensure
928
+ * never to request more than is available in each pool with the pool's
929
+ * lock held. Hence failing to allocate in such situations would be unusual
930
+ * and we should cancel the growth instead (as re-growing the memory pool
931
+ * might not fix the situation)
932
+ */
933
+ dev_warn(
934
+ kctx->kbdev->dev,
935
+ "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available",
936
+ new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced,
937
+ total_mempools_free_4k);
509938 *pages_to_grow = 0;
510
- else
511
- *pages_to_grow = new_pages;
939
+ } else {
940
+ /* Tell the caller to try to grow the memory pool
941
+ *
942
+ * Freeing pages above may have spilled or returned them to the OS, so we
943
+ * have to take into account how many are still in the pool before giving a
944
+ * new estimate for growth required of the pool. We can just re-estimate a
945
+ * new value.
946
+ */
947
+ pages_still_required = estimate_pool_space_required(root_pool, new_pages);
948
+ if (pages_still_required) {
949
+ *pages_to_grow = pages_still_required;
950
+ } else {
951
+ /* It's possible another thread could've grown the pool to be just
952
+ * big enough after we rolled back the allocation. Request at least
953
+ * one more page to ensure the caller doesn't fail the growth by
954
+ * conflating it with the alloc_failed case above
955
+ */
956
+ *pages_to_grow = 1u;
957
+ }
958
+ }
512959
513960 return false;
514961 }
....@@ -517,18 +964,6 @@
517964 *pages_to_grow = 0;
518965
519966 return true;
520
-}
521
-
522
-/* Small wrapper function to factor out GPU-dependent context releasing */
523
-static void release_ctx(struct kbase_device *kbdev,
524
- struct kbase_context *kctx)
525
-{
526
-#if MALI_USE_CSF
527
- CSTD_UNUSED(kbdev);
528
- kbase_ctx_sched_release_ctx_lock(kctx);
529
-#else /* MALI_USE_CSF */
530
- kbasep_js_runpool_release_ctx(kbdev, kctx);
531
-#endif /* MALI_USE_CSF */
532967 }
533968
534969 void kbase_mmu_page_fault_worker(struct work_struct *data)
....@@ -554,15 +989,19 @@
554989 size_t pages_trimmed = 0;
555990 #endif
556991
992
+ /* Calls to this function are inherently synchronous, with respect to
993
+ * MMU operations.
994
+ */
995
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
996
+
557997 faulting_as = container_of(data, struct kbase_as, work_pagefault);
558998 fault = &faulting_as->pf_data;
559999 fault_pfn = fault->addr >> PAGE_SHIFT;
5601000 as_no = faulting_as->number;
5611001
5621002 kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
563
- dev_dbg(kbdev->dev,
564
- "Entering %s %pK, fault_pfn %lld, as_no %d\n",
565
- __func__, (void *)data, fault_pfn, as_no);
1003
+ dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data,
1004
+ fault_pfn, as_no);
5661005
5671006 /* Grab the context that was already refcounted in kbase_mmu_interrupt()
5681007 * Therefore, it cannot be scheduled out of this AS until we explicitly
....@@ -585,8 +1024,7 @@
5851024 #ifdef CONFIG_MALI_ARBITER_SUPPORT
5861025 /* check if we still have GPU */
5871026 if (unlikely(kbase_is_gpu_removed(kbdev))) {
588
- dev_dbg(kbdev->dev,
589
- "%s: GPU has been removed\n", __func__);
1027
+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
5901028 goto fault_done;
5911029 }
5921030 #endif
....@@ -649,20 +1087,24 @@
6491087 goto fault_done;
6501088 }
6511089
652
-#ifdef CONFIG_MALI_2MB_ALLOC
653
- /* Preallocate memory for the sub-allocation structs if necessary */
654
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
655
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
656
- if (!prealloc_sas[i]) {
657
- kbase_mmu_report_fault_and_kill(kctx, faulting_as,
658
- "Failed pre-allocating memory for sub-allocations' metadata",
659
- fault);
660
- goto fault_done;
1090
+page_fault_retry:
1091
+ if (kbdev->pagesize_2mb) {
1092
+ /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
1093
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
1094
+ if (!prealloc_sas[i]) {
1095
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
1096
+
1097
+ if (!prealloc_sas[i]) {
1098
+ kbase_mmu_report_fault_and_kill(
1099
+ kctx, faulting_as,
1100
+ "Failed pre-allocating memory for sub-allocations' metadata",
1101
+ fault);
1102
+ goto fault_done;
1103
+ }
1104
+ }
6611105 }
6621106 }
663
-#endif /* CONFIG_MALI_2MB_ALLOC */
6641107
665
-page_fault_retry:
6661108 /* so we have a translation fault,
6671109 * let's see if it is for growable memory
6681110 */
....@@ -720,6 +1162,8 @@
7201162 current_backed_size = kbase_reg_current_backed_size(region);
7211163
7221164 if (fault_rel_pfn < current_backed_size) {
1165
+ struct kbase_mmu_hw_op_param op_param;
1166
+
7231167 dev_dbg(kbdev->dev,
7241168 "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
7251169 fault->addr, region->start_pfn,
....@@ -738,8 +1182,29 @@
7381182 * transaction (which should cause the other page fault to be
7391183 * raised again).
7401184 */
741
- kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
742
- AS_COMMAND_UNLOCK, 1);
1185
+ op_param.mmu_sync_info = mmu_sync_info;
1186
+ op_param.kctx_id = kctx->id;
1187
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
1188
+ mmu_hw_operation_begin(kbdev);
1189
+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
1190
+ &op_param);
1191
+ mmu_hw_operation_end(kbdev);
1192
+ } else {
1193
+ /* Can safely skip the invalidate for all levels in case
1194
+ * of duplicate page faults.
1195
+ */
1196
+ op_param.flush_skip_levels = 0xF;
1197
+ op_param.vpfn = fault_pfn;
1198
+ op_param.nr = 1;
1199
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
1200
+ &op_param);
1201
+ }
1202
+
1203
+ if (err) {
1204
+ dev_err(kbdev->dev,
1205
+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx",
1206
+ fault->addr);
1207
+ }
7431208
7441209 mutex_unlock(&kbdev->mmu_hw_mutex);
7451210
....@@ -754,18 +1219,41 @@
7541219
7551220 /* cap to max vsize */
7561221 new_pages = min(new_pages, region->nr_pages - current_backed_size);
757
- dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n",
758
- new_pages);
1222
+ dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages);
7591223
7601224 if (new_pages == 0) {
1225
+ struct kbase_mmu_hw_op_param op_param;
1226
+
7611227 mutex_lock(&kbdev->mmu_hw_mutex);
7621228
7631229 /* Duplicate of a fault we've already handled, nothing to do */
7641230 kbase_mmu_hw_clear_fault(kbdev, faulting_as,
7651231 KBASE_MMU_FAULT_TYPE_PAGE);
1232
+
7661233 /* See comment [1] about UNLOCK usage */
767
- kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
768
- AS_COMMAND_UNLOCK, 1);
1234
+ op_param.mmu_sync_info = mmu_sync_info;
1235
+ op_param.kctx_id = kctx->id;
1236
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
1237
+ mmu_hw_operation_begin(kbdev);
1238
+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
1239
+ &op_param);
1240
+ mmu_hw_operation_end(kbdev);
1241
+ } else {
1242
+ /* Can safely skip the invalidate for all levels in case
1243
+ * of duplicate page faults.
1244
+ */
1245
+ op_param.flush_skip_levels = 0xF;
1246
+ op_param.vpfn = fault_pfn;
1247
+ op_param.nr = 1;
1248
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
1249
+ &op_param);
1250
+ }
1251
+
1252
+ if (err) {
1253
+ dev_err(kbdev->dev,
1254
+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx",
1255
+ fault->addr);
1256
+ }
7691257
7701258 mutex_unlock(&kbdev->mmu_hw_mutex);
7711259
....@@ -790,8 +1278,9 @@
7901278 spin_unlock(&kctx->mem_partials_lock);
7911279
7921280 if (grown) {
1281
+ u64 dirty_pgds = 0;
7931282 u64 pfn_offset;
794
- u32 op;
1283
+ struct kbase_mmu_hw_op_param op_param;
7951284
7961285 /* alloc success */
7971286 WARN_ON(kbase_reg_current_backed_size(region) >
....@@ -807,10 +1296,10 @@
8071296 * so the no_flush version of insert_pages is used which allows
8081297 * us to unlock the MMU as we see fit.
8091298 */
810
- err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
811
- region->start_pfn + pfn_offset,
812
- &kbase_get_gpu_phy_pages(region)[pfn_offset],
813
- new_pages, region->flags, region->gpu_alloc->group_id);
1299
+ err = kbase_mmu_insert_pages_no_flush(
1300
+ kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
1301
+ &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags,
1302
+ region->gpu_alloc->group_id, &dirty_pgds, region, false);
8141303 if (err) {
8151304 kbase_free_phy_pages_helper(region->gpu_alloc,
8161305 new_pages);
....@@ -829,23 +1318,18 @@
8291318 (u64)new_pages);
8301319 trace_mali_mmu_page_fault_grow(region, fault, new_pages);
8311320
832
-#if MALI_INCREMENTAL_RENDERING
1321
+#if MALI_INCREMENTAL_RENDERING_JM
8331322 /* Switch to incremental rendering if we have nearly run out of
8341323 * memory in a JIT memory allocation.
8351324 */
8361325 if (region->threshold_pages &&
8371326 kbase_reg_current_backed_size(region) >
8381327 region->threshold_pages) {
839
-
840
- dev_dbg(kctx->kbdev->dev,
841
- "%zu pages exceeded IR threshold %zu\n",
842
- new_pages + current_backed_size,
843
- region->threshold_pages);
1328
+ dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu",
1329
+ new_pages + current_backed_size, region->threshold_pages);
8441330
8451331 if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
846
- dev_dbg(kctx->kbdev->dev,
847
- "Get region %pK for IR\n",
848
- (void *)region);
1332
+ dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region);
8491333 kbase_va_region_alloc_get(kctx, region);
8501334 }
8511335 }
....@@ -853,9 +1337,6 @@
8531337
8541338 /* AS transaction begin */
8551339 mutex_lock(&kbdev->mmu_hw_mutex);
856
-
857
- /* flush L2 and unlock the VA (resumes the MMU) */
858
- op = AS_COMMAND_FLUSH_PT;
8591340
8601341 /* clear MMU interrupt - this needs to be done after updating
8611342 * the page tables but before issuing a FLUSH command. The
....@@ -868,9 +1349,30 @@
8681349 kbase_mmu_hw_clear_fault(kbdev, faulting_as,
8691350 KBASE_MMU_FAULT_TYPE_PAGE);
8701351
871
- kbase_mmu_hw_do_operation(kbdev, faulting_as,
872
- fault->addr >> PAGE_SHIFT,
873
- new_pages, op, 1);
1352
+ op_param.vpfn = region->start_pfn + pfn_offset;
1353
+ op_param.nr = new_pages;
1354
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
1355
+ op_param.kctx_id = kctx->id;
1356
+ op_param.mmu_sync_info = mmu_sync_info;
1357
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
1358
+ /* Unlock to invalidate the TLB (and resume the MMU) */
1359
+ op_param.flush_skip_levels =
1360
+ pgd_level_to_skip_flush(dirty_pgds);
1361
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
1362
+ &op_param);
1363
+ } else {
1364
+ /* flush L2 and unlock the VA (resumes the MMU) */
1365
+ mmu_hw_operation_begin(kbdev);
1366
+ err = kbase_mmu_hw_do_flush(kbdev, faulting_as,
1367
+ &op_param);
1368
+ mmu_hw_operation_end(kbdev);
1369
+ }
1370
+
1371
+ if (err) {
1372
+ dev_err(kbdev->dev,
1373
+ "Flush for GPU page table update did not complete on handling page fault @ 0x%llx",
1374
+ fault->addr);
1375
+ }
8741376
8751377 mutex_unlock(&kbdev->mmu_hw_mutex);
8761378 /* AS transaction end */
....@@ -915,8 +1417,7 @@
9151417 * Otherwise fail the allocation.
9161418 */
9171419 if (pages_to_grow > 0) {
918
-#ifdef CONFIG_MALI_2MB_ALLOC
919
- if (grow_2mb_pool) {
1420
+ if (kbdev->pagesize_2mb && grow_2mb_pool) {
9201421 /* Round page requirement up to nearest 2 MB */
9211422 struct kbase_mem_pool *const lp_mem_pool =
9221423 &kctx->mem_pools.large[
....@@ -927,25 +1428,22 @@
9271428 >> lp_mem_pool->order;
9281429
9291430 ret = kbase_mem_pool_grow(lp_mem_pool,
930
- pages_to_grow);
1431
+ pages_to_grow, kctx->task);
9311432 } else {
932
-#endif
9331433 struct kbase_mem_pool *const mem_pool =
9341434 &kctx->mem_pools.small[
9351435 region->gpu_alloc->group_id];
9361436
9371437 ret = kbase_mem_pool_grow(mem_pool,
938
- pages_to_grow);
939
-#ifdef CONFIG_MALI_2MB_ALLOC
1438
+ pages_to_grow, kctx->task);
9401439 }
941
-#endif
9421440 }
9431441 if (ret < 0) {
9441442 /* failed to extend, handle as a normal PF */
9451443 kbase_mmu_report_fault_and_kill(kctx, faulting_as,
9461444 "Page allocation failure", fault);
9471445 } else {
948
- dev_dbg(kbdev->dev, "Try again after pool_grow\n");
1446
+ dev_dbg(kbdev->dev, "Try again after pool_grow");
9491447 goto page_fault_retry;
9501448 }
9511449 }
....@@ -972,27 +1470,25 @@
9721470 release_ctx(kbdev, kctx);
9731471
9741472 atomic_dec(&kbdev->faults_pending);
975
- dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
1473
+ dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
9761474 }
9771475
9781476 static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
9791477 struct kbase_mmu_table *mmut)
9801478 {
9811479 u64 *page;
982
- int i;
9831480 struct page *p;
1481
+ phys_addr_t pgd;
9841482
985
-#ifdef CONFIG_MALI_2MB_ALLOC
986
- p = kbase_mem_pool_alloc(&kbdev->mem_pools.large[mmut->group_id]);
987
-#else /* CONFIG_MALI_2MB_ALLOC */
9881483 p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
989
-#endif /* CONFIG_MALI_2MB_ALLOC */
9901484 if (!p)
991
- return 0;
1485
+ return KBASE_MMU_INVALID_PGD_ADDRESS;
9921486
9931487 page = kmap(p);
9941488 if (page == NULL)
9951489 goto alloc_free;
1490
+
1491
+ pgd = page_to_phys(p);
9961492
9971493 /* If the MMU tables belong to a context then account the memory usage
9981494 * to that context, otherwise the MMU tables are device wide and are
....@@ -1014,37 +1510,42 @@
10141510
10151511 kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
10161512
1017
- for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
1018
- kbdev->mmu_mode->entry_invalidate(&page[i]);
1513
+ kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES);
10191514
1020
- kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
1515
+ /* As this page is newly created, therefore there is no content to
1516
+ * clean or invalidate in the GPU caches.
1517
+ */
1518
+ kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE);
10211519
10221520 kunmap(p);
1023
- return page_to_phys(p);
1521
+ return pgd;
10241522
10251523 alloc_free:
1026
-
1027
-#ifdef CONFIG_MALI_2MB_ALLOC
1028
- kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], p, false);
1029
-#else /* CONFIG_MALI_2MB_ALLOC */
10301524 kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false);
1031
-#endif /* CONFIG_MALI_2MB_ALLOC */
10321525
1033
- return 0;
1526
+ return KBASE_MMU_INVALID_PGD_ADDRESS;
10341527 }
10351528
1036
-/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
1037
- * new table from the pool if needed and possible
1529
+/**
1530
+ * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1
1531
+ *
1532
+ * @kbdev: Device pointer.
1533
+ * @mmut: GPU MMU page table.
1534
+ * @pgd: Physical addresse of level N page directory.
1535
+ * @vpfn: The virtual page frame number.
1536
+ * @level: The level of MMU page table (N).
1537
+ *
1538
+ * Return:
1539
+ * * 0 - OK
1540
+ * * -EFAULT - level N+1 PGD does not exist
1541
+ * * -EINVAL - kmap() failed for level N PGD PFN
10381542 */
1039
-static int mmu_get_next_pgd(struct kbase_device *kbdev,
1040
- struct kbase_mmu_table *mmut,
1041
- phys_addr_t *pgd, u64 vpfn, int level)
1543
+static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1544
+ phys_addr_t *pgd, u64 vpfn, int level)
10421545 {
10431546 u64 *page;
10441547 phys_addr_t target_pgd;
10451548 struct page *p;
1046
-
1047
- KBASE_DEBUG_ASSERT(*pgd);
10481549
10491550 lockdep_assert_held(&mmut->mmu_lock);
10501551
....@@ -1058,25 +1559,19 @@
10581559 p = pfn_to_page(PFN_DOWN(*pgd));
10591560 page = kmap(p);
10601561 if (page == NULL) {
1061
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
1562
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
10621563 return -EINVAL;
10631564 }
10641565
1065
- target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
1066
-
1067
- if (!target_pgd) {
1068
- target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
1069
- if (!target_pgd) {
1070
- dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
1071
- __func__);
1072
- kunmap(p);
1073
- return -ENOMEM;
1074
- }
1075
-
1076
- kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
1077
-
1078
- kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
1079
- /* Rely on the caller to update the address space flags. */
1566
+ if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
1567
+ dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
1568
+ vpfn);
1569
+ kunmap(p);
1570
+ return -EFAULT;
1571
+ } else {
1572
+ target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
1573
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
1574
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
10801575 }
10811576
10821577 kunmap(p);
....@@ -1085,14 +1580,69 @@
10851580 return 0;
10861581 }
10871582
1088
-/*
1089
- * Returns the PGD for the specified level of translation
1583
+/**
1584
+ * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level
1585
+ *
1586
+ * @kbdev: Device pointer.
1587
+ * @mmut: GPU MMU page table.
1588
+ * @vpfn: The virtual page frame number.
1589
+ * @in_level: The level of MMU page table (N).
1590
+ * @out_level: Set to the level of the lowest valid PGD found on success.
1591
+ * Invalid on error.
1592
+ * @out_pgd: Set to the lowest valid PGD found on success.
1593
+ * Invalid on error.
1594
+ *
1595
+ * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or
1596
+ * closest to in_level
1597
+ *
1598
+ * Terminology:
1599
+ * Level-0 = Top-level = highest
1600
+ * Level-3 = Bottom-level = lowest
1601
+ *
1602
+ * Return:
1603
+ * * 0 - OK
1604
+ * * -EINVAL - kmap() failed during page table walk.
10901605 */
1091
-static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
1092
- struct kbase_mmu_table *mmut,
1093
- u64 vpfn,
1094
- int level,
1095
- phys_addr_t *out_pgd)
1606
+static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1607
+ u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd)
1608
+{
1609
+ phys_addr_t pgd;
1610
+ int l;
1611
+ int err = 0;
1612
+
1613
+ lockdep_assert_held(&mmut->mmu_lock);
1614
+ pgd = mmut->pgd;
1615
+
1616
+ for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) {
1617
+ err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
1618
+
1619
+ /* Handle failure condition */
1620
+ if (err) {
1621
+ dev_dbg(kbdev->dev,
1622
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
1623
+ __func__, l + 1);
1624
+ break;
1625
+ }
1626
+ }
1627
+
1628
+ *out_pgd = pgd;
1629
+ *out_level = l;
1630
+
1631
+ /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid.
1632
+ * This implies that we have found the lowest valid pgd. Reset the error code.
1633
+ */
1634
+ if (err == -EFAULT)
1635
+ err = 0;
1636
+
1637
+ return err;
1638
+}
1639
+
1640
+/*
1641
+ * On success, sets out_pgd to the PGD for the specified level of translation
1642
+ * Returns -EFAULT if a valid PGD is not found
1643
+ */
1644
+static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
1645
+ int level, phys_addr_t *out_pgd)
10961646 {
10971647 phys_addr_t pgd;
10981648 int l;
....@@ -1104,9 +1654,9 @@
11041654 int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
11051655 /* Handle failure condition */
11061656 if (err) {
1107
- dev_dbg(kbdev->dev,
1108
- "%s: mmu_get_next_pgd failure at level %d\n",
1109
- __func__, l);
1657
+ dev_err(kbdev->dev,
1658
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
1659
+ __func__, l + 1);
11101660 return err;
11111661 }
11121662 }
....@@ -1116,20 +1666,11 @@
11161666 return 0;
11171667 }
11181668
1119
-static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
1120
- struct kbase_mmu_table *mmut,
1121
- u64 vpfn,
1122
- phys_addr_t *out_pgd)
1123
-{
1124
- return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
1125
- out_pgd);
1126
-}
1127
-
11281669 static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
1129
- struct kbase_mmu_table *mmut,
1130
- u64 from_vpfn, u64 to_vpfn)
1670
+ struct kbase_mmu_table *mmut, u64 from_vpfn,
1671
+ u64 to_vpfn, u64 *dirty_pgds,
1672
+ struct tagged_addr *phys, bool ignore_page_migration)
11311673 {
1132
- phys_addr_t pgd;
11331674 u64 vpfn = from_vpfn;
11341675 struct kbase_mmu_mode const *mmu_mode;
11351676
....@@ -1140,30 +1681,36 @@
11401681 lockdep_assert_held(&mmut->mmu_lock);
11411682
11421683 mmu_mode = kbdev->mmu_mode;
1684
+ kbase_mmu_reset_free_pgds_list(mmut);
11431685
11441686 while (vpfn < to_vpfn) {
1145
- unsigned int i;
11461687 unsigned int idx = vpfn & 0x1FF;
11471688 unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
11481689 unsigned int pcount = 0;
11491690 unsigned int left = to_vpfn - vpfn;
11501691 int level;
11511692 u64 *page;
1693
+ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
1694
+ phys_addr_t pgd = mmut->pgd;
1695
+ struct page *p = phys_to_page(pgd);
1696
+
1697
+ register unsigned int num_of_valid_entries;
11521698
11531699 if (count > left)
11541700 count = left;
11551701
11561702 /* need to check if this is a 2MB page or a 4kB */
1157
- pgd = mmut->pgd;
1158
-
11591703 for (level = MIDGARD_MMU_TOPLEVEL;
11601704 level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
11611705 idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
1162
- page = kmap(phys_to_page(pgd));
1706
+ pgds[level] = pgd;
1707
+ page = kmap(p);
11631708 if (mmu_mode->ate_is_valid(page[idx], level))
11641709 break; /* keep the mapping */
1165
- kunmap(phys_to_page(pgd));
1166
- pgd = mmu_mode->pte_to_phy_addr(page[idx]);
1710
+ kunmap(p);
1711
+ pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
1712
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx]));
1713
+ p = phys_to_page(pgd);
11671714 }
11681715
11691716 switch (level) {
....@@ -1176,49 +1723,311 @@
11761723 pcount = count;
11771724 break;
11781725 default:
1179
- dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
1180
- __func__, level);
1726
+ dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level);
11811727 goto next;
11821728 }
11831729
1730
+ if (dirty_pgds && pcount > 0)
1731
+ *dirty_pgds |= 1ULL << level;
1732
+
1733
+ num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
1734
+ if (WARN_ON_ONCE(num_of_valid_entries < pcount))
1735
+ num_of_valid_entries = 0;
1736
+ else
1737
+ num_of_valid_entries -= pcount;
1738
+
11841739 /* Invalidate the entries we added */
1185
- for (i = 0; i < pcount; i++)
1186
- mmu_mode->entry_invalidate(&page[idx + i]);
1740
+ mmu_mode->entries_invalidate(&page[idx], pcount);
11871741
1188
- kbase_mmu_sync_pgd(kbdev,
1189
- kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
1190
- 8 * pcount);
1191
- kunmap(phys_to_page(pgd));
1742
+ if (!num_of_valid_entries) {
1743
+ kunmap(p);
11921744
1745
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
1746
+
1747
+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
1748
+ KBASE_MMU_OP_NONE, dirty_pgds);
1749
+ vpfn += count;
1750
+ continue;
1751
+ }
1752
+
1753
+ mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
1754
+
1755
+ /* MMU cache flush strategy is NONE because GPU cache maintenance is
1756
+ * going to be done by the caller
1757
+ */
1758
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
1759
+ kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount,
1760
+ KBASE_MMU_OP_NONE);
1761
+ kunmap(p);
11931762 next:
11941763 vpfn += count;
11951764 }
1765
+
1766
+ /* If page migration is enabled: the only way to recover from failure
1767
+ * is to mark all pages as not movable. It is not predictable what's
1768
+ * going to happen to these pages at this stage. They might return
1769
+ * movable once they are returned to a memory pool.
1770
+ */
1771
+ if (kbase_page_migration_enabled && !ignore_page_migration && phys) {
1772
+ const u64 num_pages = to_vpfn - from_vpfn + 1;
1773
+ u64 i;
1774
+
1775
+ for (i = 0; i < num_pages; i++) {
1776
+ struct page *phys_page = as_page(phys[i]);
1777
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
1778
+
1779
+ if (page_md) {
1780
+ spin_lock(&page_md->migrate_lock);
1781
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
1782
+ spin_unlock(&page_md->migrate_lock);
1783
+ }
1784
+ }
1785
+ }
11961786 }
11971787
1198
-/*
1199
- * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
1788
+static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
1789
+ struct kbase_mmu_table *mmut, const u64 vpfn,
1790
+ size_t nr, u64 dirty_pgds,
1791
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
1792
+ bool insert_pages_failed)
1793
+{
1794
+ struct kbase_mmu_hw_op_param op_param;
1795
+ int as_nr = 0;
1796
+
1797
+ op_param.vpfn = vpfn;
1798
+ op_param.nr = nr;
1799
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
1800
+ op_param.mmu_sync_info = mmu_sync_info;
1801
+ op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF;
1802
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds);
1803
+
1804
+#if MALI_USE_CSF
1805
+ as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR;
1806
+#else
1807
+ WARN_ON(!mmut->kctx);
1808
+#endif
1809
+
1810
+ /* MMU cache flush strategy depends on whether GPU control commands for
1811
+ * flushing physical address ranges are supported. The new physical pages
1812
+ * are not present in GPU caches therefore they don't need any cache
1813
+ * maintenance, but PGDs in the page table may or may not be created anew.
1814
+ *
1815
+ * Operations that affect the whole GPU cache shall only be done if it's
1816
+ * impossible to update physical ranges.
1817
+ *
1818
+ * On GPUs where flushing by physical address range is supported,
1819
+ * full cache flush is done when an error occurs during
1820
+ * insert_pages() to keep the error handling simpler.
1821
+ */
1822
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed)
1823
+ mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
1824
+ else
1825
+ mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
1826
+}
1827
+
1828
+/**
1829
+ * update_parent_pgds() - Updates the page table from bottom level towards
1830
+ * the top level to insert a new ATE
1831
+ *
1832
+ * @kbdev: Device pointer.
1833
+ * @mmut: GPU MMU page table.
1834
+ * @cur_level: The level of MMU page table where the ATE needs to be added.
1835
+ * The bottom PGD level.
1836
+ * @insert_level: The level of MMU page table where the chain of newly allocated
1837
+ * PGDs needs to be linked-in/inserted.
1838
+ * The top-most PDG level to be updated.
1839
+ * @insert_vpfn: The virtual page frame number for the ATE.
1840
+ * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
1841
+ * the physical addresses of newly allocated PGDs from index
1842
+ * insert_level+1 to cur_level, and an existing PGD at index
1843
+ * insert_level.
1844
+ *
1845
+ * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
1846
+ * at insert_level which already exists in the MMU Page Tables.Migration status is also
1847
+ * updated for all the newly allocated PGD pages.
1848
+ *
1849
+ * Return:
1850
+ * * 0 - OK
1851
+ * * -EFAULT - level N+1 PGD does not exist
1852
+ * * -EINVAL - kmap() failed for level N PGD PFN
12001853 */
1201
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
1202
- struct tagged_addr phys, size_t nr,
1203
- unsigned long flags, int const group_id)
1854
+static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1855
+ int cur_level, int insert_level, u64 insert_vpfn,
1856
+ phys_addr_t *pgds_to_insert)
1857
+{
1858
+ int pgd_index;
1859
+ int err = 0;
1860
+
1861
+ /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1)
1862
+ * Loop runs from the bottom-most to the top-most level so that all entries in the chain
1863
+ * are valid when they are inserted into the MMU Page table via the insert_level PGD.
1864
+ */
1865
+ for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) {
1866
+ int parent_index = pgd_index - 1;
1867
+ phys_addr_t parent_pgd = pgds_to_insert[parent_index];
1868
+ unsigned int current_valid_entries;
1869
+ u64 pte;
1870
+ phys_addr_t target_pgd = pgds_to_insert[pgd_index];
1871
+ u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
1872
+ struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
1873
+ u64 *parent_page_va;
1874
+
1875
+ if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) {
1876
+ err = -EFAULT;
1877
+ goto failure_recovery;
1878
+ }
1879
+
1880
+ parent_page_va = kmap(parent_page);
1881
+ if (unlikely(parent_page_va == NULL)) {
1882
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
1883
+ err = -EINVAL;
1884
+ goto failure_recovery;
1885
+ }
1886
+
1887
+ current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
1888
+
1889
+ kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
1890
+ parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
1891
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
1892
+ kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
1893
+ kunmap(parent_page);
1894
+
1895
+ if (parent_index != insert_level) {
1896
+ /* Newly allocated PGDs */
1897
+ kbase_mmu_sync_pgd_cpu(
1898
+ kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
1899
+ sizeof(u64));
1900
+ } else {
1901
+ /* A new valid entry is added to an existing PGD. Perform the
1902
+ * invalidate operation for GPU cache as it could be having a
1903
+ * cacheline that contains the entry (in an invalid form).
1904
+ */
1905
+ kbase_mmu_sync_pgd(
1906
+ kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
1907
+ kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
1908
+ sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
1909
+ }
1910
+
1911
+ /* Update the new target_pgd page to its stable state */
1912
+ if (kbase_page_migration_enabled) {
1913
+ struct kbase_page_metadata *page_md =
1914
+ kbase_page_private(phys_to_page(target_pgd));
1915
+
1916
+ spin_lock(&page_md->migrate_lock);
1917
+
1918
+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
1919
+ IS_PAGE_ISOLATED(page_md->status));
1920
+
1921
+ if (mmut->kctx) {
1922
+ page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
1923
+ page_md->data.pt_mapped.mmut = mmut;
1924
+ page_md->data.pt_mapped.pgd_vpfn_level =
1925
+ PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index);
1926
+ } else {
1927
+ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
1928
+ }
1929
+
1930
+ spin_unlock(&page_md->migrate_lock);
1931
+ }
1932
+ }
1933
+
1934
+ return 0;
1935
+
1936
+failure_recovery:
1937
+ /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */
1938
+ for (; pgd_index < cur_level; pgd_index++) {
1939
+ phys_addr_t pgd = pgds_to_insert[pgd_index];
1940
+ struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
1941
+ u64 *pgd_page_va = kmap(pgd_page);
1942
+ u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
1943
+
1944
+ kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
1945
+ kunmap(pgd_page);
1946
+ }
1947
+
1948
+ return err;
1949
+}
1950
+
1951
+/**
1952
+ * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to
1953
+ * level_high (inclusive)
1954
+ *
1955
+ * @kbdev: Device pointer.
1956
+ * @mmut: GPU MMU page table.
1957
+ * @level_low: The lower bound for the levels for which the PGD allocs are required
1958
+ * @level_high: The higher bound for the levels for which the PGD allocs are required
1959
+ * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
1960
+ * newly allocated PGD addresses to.
1961
+ *
1962
+ * Numerically, level_low < level_high, not to be confused with top level and
1963
+ * bottom level concepts for MMU PGDs. They are only used as low and high bounds
1964
+ * in an incrementing for-loop.
1965
+ *
1966
+ * Return:
1967
+ * * 0 - OK
1968
+ * * -ENOMEM - allocation failed for a PGD.
1969
+ */
1970
+static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1971
+ phys_addr_t *new_pgds, int level_low, int level_high)
1972
+{
1973
+ int err = 0;
1974
+ int i;
1975
+
1976
+ lockdep_assert_held(&mmut->mmu_lock);
1977
+
1978
+ for (i = level_low; i <= level_high; i++) {
1979
+ do {
1980
+ new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
1981
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
1982
+ break;
1983
+
1984
+ mutex_unlock(&mmut->mmu_lock);
1985
+ err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
1986
+ level_high, NULL);
1987
+ mutex_lock(&mmut->mmu_lock);
1988
+ if (err) {
1989
+ dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
1990
+ __func__, err);
1991
+
1992
+ /* Free all PGDs allocated in previous successful iterations
1993
+ * from (i-1) to level_low
1994
+ */
1995
+ for (i = (i - 1); i >= level_low; i--) {
1996
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
1997
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
1998
+ }
1999
+
2000
+ return err;
2001
+ }
2002
+ } while (1);
2003
+ }
2004
+
2005
+ return 0;
2006
+}
2007
+
2008
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
2009
+ struct tagged_addr phys, size_t nr, unsigned long flags,
2010
+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
2011
+ bool ignore_page_migration)
12042012 {
12052013 phys_addr_t pgd;
12062014 u64 *pgd_page;
1207
- /* In case the insert_single_page only partially completes
1208
- * we need to be able to recover
1209
- */
1210
- bool recover_required = false;
1211
- u64 start_vpfn = vpfn;
1212
- size_t recover_count = 0;
2015
+ u64 insert_vpfn = start_vpfn;
12132016 size_t remain = nr;
12142017 int err;
12152018 struct kbase_device *kbdev;
2019
+ u64 dirty_pgds = 0;
2020
+ unsigned int i;
2021
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
2022
+ enum kbase_mmu_op_type flush_op;
2023
+ struct kbase_mmu_table *mmut = &kctx->mmu;
2024
+ int l, cur_level, insert_level;
12162025
12172026 if (WARN_ON(kctx == NULL))
12182027 return -EINVAL;
12192028
12202029 /* 64-bit address range is the max */
1221
- KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
2030
+ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
12222031
12232032 kbdev = kctx->kbdev;
12242033
....@@ -1226,76 +2035,87 @@
12262035 if (nr == 0)
12272036 return 0;
12282037
1229
- mutex_lock(&kctx->mmu.mmu_lock);
2038
+ /* If page migration is enabled, pages involved in multiple GPU mappings
2039
+ * are always treated as not movable.
2040
+ */
2041
+ if (kbase_page_migration_enabled && !ignore_page_migration) {
2042
+ struct page *phys_page = as_page(phys);
2043
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
2044
+
2045
+ if (page_md) {
2046
+ spin_lock(&page_md->migrate_lock);
2047
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
2048
+ spin_unlock(&page_md->migrate_lock);
2049
+ }
2050
+ }
2051
+
2052
+ mutex_lock(&mmut->mmu_lock);
12302053
12312054 while (remain) {
1232
- unsigned int i;
1233
- unsigned int index = vpfn & 0x1FF;
1234
- unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
2055
+ unsigned int vindex = insert_vpfn & 0x1FF;
2056
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
12352057 struct page *p;
2058
+ register unsigned int num_of_valid_entries;
2059
+ bool newly_created_pgd = false;
12362060
12372061 if (count > remain)
12382062 count = remain;
12392063
2064
+ cur_level = MIDGARD_MMU_BOTTOMLEVEL;
2065
+ insert_level = cur_level;
2066
+
12402067 /*
1241
- * Repeatedly calling mmu_get_bottom_pte() is clearly
2068
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
12422069 * suboptimal. We don't have to re-parse the whole tree
12432070 * each time (just cache the l0-l2 sequence).
12442071 * On the other hand, it's only a gain when we map more than
12452072 * 256 pages at once (on average). Do we really care?
12462073 */
1247
- do {
1248
- err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
1249
- vpfn, &pgd);
1250
- if (err != -ENOMEM)
1251
- break;
1252
- /* Fill the memory pool with enough pages for
1253
- * the page walk to succeed
1254
- */
1255
- mutex_unlock(&kctx->mmu.mmu_lock);
1256
- err = kbase_mem_pool_grow(
1257
-#ifdef CONFIG_MALI_2MB_ALLOC
1258
- &kbdev->mem_pools.large[
1259
-#else
1260
- &kbdev->mem_pools.small[
1261
-#endif
1262
- kctx->mmu.group_id],
1263
- MIDGARD_MMU_BOTTOMLEVEL);
1264
- mutex_lock(&kctx->mmu.mmu_lock);
1265
- } while (!err);
2074
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
2075
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
2076
+ &pgd);
2077
+
12662078 if (err) {
1267
- dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
1268
- if (recover_required) {
1269
- /* Invalidate the pages we have partially
1270
- * completed
1271
- */
1272
- mmu_insert_pages_failure_recovery(kbdev,
1273
- &kctx->mmu,
1274
- start_vpfn,
1275
- start_vpfn + recover_count);
1276
- }
2079
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
2080
+ __func__, err);
12772081 goto fail_unlock;
2082
+ }
2083
+
2084
+ /* No valid pgd at cur_level */
2085
+ if (insert_level != cur_level) {
2086
+ /* Allocate new pgds for all missing levels from the required level
2087
+ * down to the lowest valid pgd at insert_level
2088
+ */
2089
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
2090
+ cur_level);
2091
+ if (err)
2092
+ goto fail_unlock;
2093
+
2094
+ newly_created_pgd = true;
2095
+
2096
+ new_pgds[insert_level] = pgd;
2097
+
2098
+ /* If we didn't find an existing valid pgd at cur_level,
2099
+ * we've now allocated one. The ATE in the next step should
2100
+ * be inserted in this newly allocated pgd.
2101
+ */
2102
+ pgd = new_pgds[cur_level];
12782103 }
12792104
12802105 p = pfn_to_page(PFN_DOWN(pgd));
12812106 pgd_page = kmap(p);
12822107 if (!pgd_page) {
1283
- dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
1284
- if (recover_required) {
1285
- /* Invalidate the pages we have partially
1286
- * completed
1287
- */
1288
- mmu_insert_pages_failure_recovery(kbdev,
1289
- &kctx->mmu,
1290
- start_vpfn,
1291
- start_vpfn + recover_count);
1292
- }
2108
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
12932109 err = -ENOMEM;
1294
- goto fail_unlock;
2110
+
2111
+ goto fail_unlock_free_pgds;
12952112 }
12962113
2114
+ num_of_valid_entries =
2115
+ kbdev->mmu_mode->get_num_valid_entries(pgd_page);
2116
+
12972117 for (i = 0; i < count; i++) {
1298
- unsigned int ofs = index + i;
2118
+ unsigned int ofs = vindex + i;
12992119
13002120 /* Fail if the current page is a valid ATE entry */
13012121 KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
....@@ -1304,56 +2124,167 @@
13042124 phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id);
13052125 }
13062126
1307
- vpfn += count;
1308
- remain -= count;
2127
+ kbdev->mmu_mode->set_num_valid_entries(
2128
+ pgd_page, num_of_valid_entries + count);
13092129
1310
- kbase_mmu_sync_pgd(kbdev,
1311
- kbase_dma_addr(p) + (index * sizeof(u64)),
1312
- count * sizeof(u64));
2130
+ dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL);
13132131
1314
- kunmap(p);
1315
- /* We have started modifying the page table.
1316
- * If further pages need inserting and fail we need to undo what
1317
- * has already taken place
2132
+ /* MMU cache flush operation here will depend on whether bottom level
2133
+ * PGD is newly created or not.
2134
+ *
2135
+ * If bottom level PGD is newly created then no GPU cache maintenance is
2136
+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache
2137
+ * maintenance is required for existing PGD.
13182138 */
1319
- recover_required = true;
1320
- recover_count += count;
2139
+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
2140
+
2141
+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
2142
+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
2143
+ flush_op);
2144
+
2145
+ if (newly_created_pgd) {
2146
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
2147
+ new_pgds);
2148
+ if (err) {
2149
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
2150
+ __func__, err);
2151
+
2152
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
2153
+
2154
+ kunmap(p);
2155
+ goto fail_unlock_free_pgds;
2156
+ }
2157
+ }
2158
+
2159
+ insert_vpfn += count;
2160
+ remain -= count;
2161
+ kunmap(p);
13212162 }
1322
- mutex_unlock(&kctx->mmu.mmu_lock);
1323
- kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false);
2163
+
2164
+ mutex_unlock(&mmut->mmu_lock);
2165
+
2166
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
2167
+ false);
2168
+
13242169 return 0;
13252170
2171
+fail_unlock_free_pgds:
2172
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
2173
+ for (l = cur_level; l > insert_level; l--)
2174
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
2175
+
13262176 fail_unlock:
1327
- mutex_unlock(&kctx->mmu.mmu_lock);
1328
- kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false);
2177
+ if (insert_vpfn != start_vpfn) {
2178
+ /* Invalidate the pages we have partially completed */
2179
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds,
2180
+ NULL, true);
2181
+ }
2182
+
2183
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
2184
+ true);
2185
+ kbase_mmu_free_pgds_list(kbdev, mmut);
2186
+ mutex_unlock(&mmut->mmu_lock);
2187
+
13292188 return err;
13302189 }
13312190
1332
-static inline void cleanup_empty_pte(struct kbase_device *kbdev,
1333
- struct kbase_mmu_table *mmut, u64 *pte)
2191
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
2192
+ struct tagged_addr phys, size_t nr, unsigned long flags,
2193
+ int const group_id,
2194
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
13342195 {
1335
- phys_addr_t tmp_pgd;
1336
- struct page *tmp_p;
2196
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
2197
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
2198
+ false);
2199
+}
13372200
1338
- tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
1339
- tmp_p = phys_to_page(tmp_pgd);
1340
-#ifdef CONFIG_MALI_2MB_ALLOC
1341
- kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id],
1342
-#else
1343
- kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
1344
-#endif
1345
- tmp_p, false);
2201
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
2202
+ struct tagged_addr phys, size_t nr, unsigned long flags,
2203
+ int const group_id,
2204
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
2205
+{
2206
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
2207
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
2208
+ false);
2209
+}
13462210
1347
- /* If the MMU tables belong to a context then we accounted the memory
1348
- * usage to that context, so decrement here.
2211
+static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
2212
+ struct kbase_va_region *reg,
2213
+ struct kbase_mmu_table *mmut, const u64 vpfn)
2214
+{
2215
+ struct page *phys_page = as_page(phys);
2216
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
2217
+
2218
+ spin_lock(&page_md->migrate_lock);
2219
+
2220
+ /* If no GPU va region is given: the metadata provided are
2221
+ * invalid.
2222
+ *
2223
+ * If the page is already allocated and mapped: this is
2224
+ * an additional GPU mapping, probably to create a memory
2225
+ * alias, which means it is no longer possible to migrate
2226
+ * the page easily because tracking all the GPU mappings
2227
+ * would be too costly.
2228
+ *
2229
+ * In any case: the page becomes not movable. It is kept
2230
+ * alive, but attempts to migrate it will fail. The page
2231
+ * will be freed if it is still not movable when it returns
2232
+ * to a memory pool. Notice that the movable flag is not
2233
+ * cleared because that would require taking the page lock.
13492234 */
1350
- if (mmut->kctx) {
1351
- kbase_process_page_usage_dec(mmut->kctx, 1);
1352
- atomic_sub(1, &mmut->kctx->used_pages);
2235
+ if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) {
2236
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
2237
+ } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) {
2238
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED);
2239
+ page_md->data.mapped.reg = reg;
2240
+ page_md->data.mapped.mmut = mmut;
2241
+ page_md->data.mapped.vpfn = vpfn;
13532242 }
1354
- atomic_sub(1, &kbdev->memdev.used_pages);
13552243
1356
- kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
2244
+ spin_unlock(&page_md->migrate_lock);
2245
+}
2246
+
2247
+static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
2248
+ struct tagged_addr *phys, size_t requested_nr)
2249
+{
2250
+ size_t i;
2251
+
2252
+ for (i = 0; i < requested_nr; i++) {
2253
+ struct page *phys_page = as_page(phys[i]);
2254
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
2255
+
2256
+ /* Skip the 4KB page that is part of a large page, as the large page is
2257
+ * excluded from the migration process.
2258
+ */
2259
+ if (is_huge(phys[i]) || is_partial(phys[i]))
2260
+ continue;
2261
+
2262
+ if (page_md) {
2263
+ u8 status;
2264
+
2265
+ spin_lock(&page_md->migrate_lock);
2266
+ status = PAGE_STATUS_GET(page_md->status);
2267
+
2268
+ if (status == ALLOCATED_MAPPED) {
2269
+ if (IS_PAGE_ISOLATED(page_md->status)) {
2270
+ page_md->status = PAGE_STATUS_SET(
2271
+ page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS);
2272
+ page_md->data.free_isolated.kbdev = kbdev;
2273
+ /* At this point, we still have a reference
2274
+ * to the page via its page migration metadata,
2275
+ * and any page with the FREE_ISOLATED_IN_PROGRESS
2276
+ * status will subsequently be freed in either
2277
+ * kbase_page_migrate() or kbase_page_putback()
2278
+ */
2279
+ phys[i] = as_tagged(0);
2280
+ } else
2281
+ page_md->status = PAGE_STATUS_SET(page_md->status,
2282
+ (u8)FREE_IN_PROGRESS);
2283
+ }
2284
+
2285
+ spin_unlock(&page_md->migrate_lock);
2286
+ }
2287
+ }
13572288 }
13582289
13592290 u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
....@@ -1367,12 +2298,10 @@
13672298 group_id, level, entry);
13682299 }
13692300
1370
-int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
1371
- struct kbase_mmu_table *mmut,
1372
- const u64 start_vpfn,
1373
- struct tagged_addr *phys, size_t nr,
1374
- unsigned long flags,
1375
- int const group_id)
2301
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2302
+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
2303
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
2304
+ struct kbase_va_region *reg, bool ignore_page_migration)
13762305 {
13772306 phys_addr_t pgd;
13782307 u64 *pgd_page;
....@@ -1380,6 +2309,9 @@
13802309 size_t remain = nr;
13812310 int err;
13822311 struct kbase_mmu_mode const *mmu_mode;
2312
+ unsigned int i;
2313
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
2314
+ int l, cur_level, insert_level;
13832315
13842316 /* Note that 0 is a valid start_vpfn */
13852317 /* 64-bit address range is the max */
....@@ -1394,11 +2326,12 @@
13942326 mutex_lock(&mmut->mmu_lock);
13952327
13962328 while (remain) {
1397
- unsigned int i;
13982329 unsigned int vindex = insert_vpfn & 0x1FF;
13992330 unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
14002331 struct page *p;
1401
- int cur_level;
2332
+ register unsigned int num_of_valid_entries;
2333
+ bool newly_created_pgd = false;
2334
+ enum kbase_mmu_op_type flush_op;
14022335
14032336 if (count > remain)
14042337 count = remain;
....@@ -1408,69 +2341,64 @@
14082341 else
14092342 cur_level = MIDGARD_MMU_BOTTOMLEVEL;
14102343
2344
+ insert_level = cur_level;
2345
+
14112346 /*
1412
- * Repeatedly calling mmu_get_pgd_at_level() is clearly
2347
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
14132348 * suboptimal. We don't have to re-parse the whole tree
14142349 * each time (just cache the l0-l2 sequence).
14152350 * On the other hand, it's only a gain when we map more than
14162351 * 256 pages at once (on average). Do we really care?
14172352 */
1418
- do {
1419
- err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
1420
- cur_level, &pgd);
1421
- if (err != -ENOMEM)
1422
- break;
1423
- /* Fill the memory pool with enough pages for
1424
- * the page walk to succeed
1425
- */
1426
- mutex_unlock(&mmut->mmu_lock);
1427
- err = kbase_mem_pool_grow(
1428
-#ifdef CONFIG_MALI_2MB_ALLOC
1429
- &kbdev->mem_pools.large[mmut->group_id],
1430
-#else
1431
- &kbdev->mem_pools.small[mmut->group_id],
1432
-#endif
1433
- cur_level);
1434
- mutex_lock(&mmut->mmu_lock);
1435
- } while (!err);
2353
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
2354
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
2355
+ &pgd);
14362356
14372357 if (err) {
1438
- dev_warn(kbdev->dev,
1439
- "%s: mmu_get_bottom_pgd failure\n", __func__);
1440
- if (insert_vpfn != start_vpfn) {
1441
- /* Invalidate the pages we have partially
1442
- * completed
1443
- */
1444
- mmu_insert_pages_failure_recovery(kbdev,
1445
- mmut, start_vpfn, insert_vpfn);
1446
- }
2358
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
2359
+ __func__, err);
14472360 goto fail_unlock;
2361
+ }
2362
+
2363
+ /* No valid pgd at cur_level */
2364
+ if (insert_level != cur_level) {
2365
+ /* Allocate new pgds for all missing levels from the required level
2366
+ * down to the lowest valid pgd at insert_level
2367
+ */
2368
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
2369
+ cur_level);
2370
+ if (err)
2371
+ goto fail_unlock;
2372
+
2373
+ newly_created_pgd = true;
2374
+
2375
+ new_pgds[insert_level] = pgd;
2376
+
2377
+ /* If we didn't find an existing valid pgd at cur_level,
2378
+ * we've now allocated one. The ATE in the next step should
2379
+ * be inserted in this newly allocated pgd.
2380
+ */
2381
+ pgd = new_pgds[cur_level];
14482382 }
14492383
14502384 p = pfn_to_page(PFN_DOWN(pgd));
14512385 pgd_page = kmap(p);
14522386 if (!pgd_page) {
1453
- dev_warn(kbdev->dev, "%s: kmap failure\n",
1454
- __func__);
1455
- if (insert_vpfn != start_vpfn) {
1456
- /* Invalidate the pages we have partially
1457
- * completed
1458
- */
1459
- mmu_insert_pages_failure_recovery(kbdev,
1460
- mmut, start_vpfn, insert_vpfn);
1461
- }
2387
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
14622388 err = -ENOMEM;
1463
- goto fail_unlock;
2389
+
2390
+ goto fail_unlock_free_pgds;
14642391 }
2392
+
2393
+ num_of_valid_entries =
2394
+ mmu_mode->get_num_valid_entries(pgd_page);
14652395
14662396 if (cur_level == MIDGARD_MMU_LEVEL(2)) {
14672397 int level_index = (insert_vpfn >> 9) & 0x1FF;
1468
- u64 *target = &pgd_page[level_index];
2398
+ pgd_page[level_index] =
2399
+ kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id);
14692400
1470
- if (mmu_mode->pte_is_valid(*target, cur_level))
1471
- cleanup_empty_pte(kbdev, mmut, target);
1472
- *target = kbase_mmu_create_ate(kbdev, *phys, flags,
1473
- cur_level, group_id);
2401
+ num_of_valid_entries++;
14742402 } else {
14752403 for (i = 0; i < count; i++) {
14762404 unsigned int ofs = vindex + i;
....@@ -1487,24 +2415,77 @@
14872415
14882416 *target = kbase_mmu_create_ate(kbdev,
14892417 phys[i], flags, cur_level, group_id);
2418
+
2419
+ /* If page migration is enabled, this is the right time
2420
+ * to update the status of the page.
2421
+ */
2422
+ if (kbase_page_migration_enabled && !ignore_page_migration &&
2423
+ !is_huge(phys[i]) && !is_partial(phys[i]))
2424
+ kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
2425
+ insert_vpfn + i);
2426
+ }
2427
+ num_of_valid_entries += count;
2428
+ }
2429
+
2430
+ mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
2431
+
2432
+ if (dirty_pgds)
2433
+ *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level);
2434
+
2435
+ /* MMU cache flush operation here will depend on whether bottom level
2436
+ * PGD is newly created or not.
2437
+ *
2438
+ * If bottom level PGD is newly created then no GPU cache maintenance is
2439
+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache
2440
+ * maintenance is required for existing PGD.
2441
+ */
2442
+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
2443
+
2444
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
2445
+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
2446
+ flush_op);
2447
+
2448
+ if (newly_created_pgd) {
2449
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
2450
+ new_pgds);
2451
+ if (err) {
2452
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
2453
+ __func__, err);
2454
+
2455
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
2456
+
2457
+ kunmap(p);
2458
+ goto fail_unlock_free_pgds;
14902459 }
14912460 }
14922461
14932462 phys += count;
14942463 insert_vpfn += count;
14952464 remain -= count;
1496
-
1497
- kbase_mmu_sync_pgd(kbdev,
1498
- kbase_dma_addr(p) + (vindex * sizeof(u64)),
1499
- count * sizeof(u64));
1500
-
15012465 kunmap(p);
15022466 }
15032467
1504
- err = 0;
2468
+ mutex_unlock(&mmut->mmu_lock);
2469
+
2470
+ return 0;
2471
+
2472
+fail_unlock_free_pgds:
2473
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
2474
+ for (l = cur_level; l > insert_level; l--)
2475
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
15052476
15062477 fail_unlock:
2478
+ if (insert_vpfn != start_vpfn) {
2479
+ /* Invalidate the pages we have partially completed */
2480
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds,
2481
+ phys, ignore_page_migration);
2482
+ }
2483
+
2484
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
2485
+ dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
2486
+ kbase_mmu_free_pgds_list(kbdev, mmut);
15072487 mutex_unlock(&mmut->mmu_lock);
2488
+
15082489 return err;
15092490 }
15102491
....@@ -1512,167 +2493,80 @@
15122493 * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
15132494 * number 'as_nr'.
15142495 */
1515
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
1516
- struct kbase_mmu_table *mmut, u64 vpfn,
1517
- struct tagged_addr *phys, size_t nr,
1518
- unsigned long flags, int as_nr, int const group_id)
2496
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
2497
+ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
2498
+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
2499
+ struct kbase_va_region *reg, bool ignore_page_migration)
15192500 {
15202501 int err;
2502
+ u64 dirty_pgds = 0;
15212503
1522
- err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
1523
- phys, nr, flags, group_id);
2504
+ /* Early out if there is nothing to do */
2505
+ if (nr == 0)
2506
+ return 0;
15242507
1525
- if (mmut->kctx)
1526
- kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
1527
- else
1528
- kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false,
1529
- as_nr);
2508
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
2509
+ &dirty_pgds, reg, ignore_page_migration);
2510
+ if (err)
2511
+ return err;
15302512
1531
- return err;
2513
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
2514
+
2515
+ return 0;
15322516 }
15332517
15342518 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
15352519
1536
-/**
1537
- * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
1538
- * without retaining the kbase context.
1539
- * @kctx: The KBase context.
1540
- * @vpfn: The virtual page frame number to start the flush on.
1541
- * @nr: The number of pages to flush.
1542
- * @sync: Set if the operation should be synchronous or not.
1543
- *
1544
- * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
1545
- * other locking.
1546
- */
1547
-static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
1548
- u64 vpfn, size_t nr, bool sync)
2520
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2521
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
2522
+ unsigned long flags, int as_nr, int const group_id,
2523
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
2524
+ struct kbase_va_region *reg)
15492525 {
1550
- struct kbase_device *kbdev = kctx->kbdev;
15512526 int err;
1552
- u32 op;
2527
+ u64 dirty_pgds = 0;
15532528
15542529 /* Early out if there is nothing to do */
15552530 if (nr == 0)
1556
- return;
2531
+ return 0;
15572532
1558
- if (sync)
1559
- op = AS_COMMAND_FLUSH_MEM;
1560
- else
1561
- op = AS_COMMAND_FLUSH_PT;
1562
-
1563
- err = kbase_mmu_hw_do_operation(kbdev,
1564
- &kbdev->as[kctx->as_nr],
1565
- vpfn, nr, op, 0);
1566
- if (err) {
1567
- /* Flush failed to complete, assume the
1568
- * GPU has hung and perform a reset to recover
1569
- */
1570
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
1571
-
1572
- if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
1573
- kbase_reset_gpu_locked(kbdev);
1574
- }
1575
-}
1576
-
1577
-/* Perform a flush/invalidate on a particular address space
1578
- */
1579
-static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
1580
- struct kbase_as *as,
1581
- u64 vpfn, size_t nr, bool sync)
1582
-{
1583
- int err;
1584
- u32 op;
1585
- bool gpu_powered;
1586
- unsigned long flags;
1587
-
1588
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1589
- gpu_powered = kbdev->pm.backend.gpu_powered;
1590
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1591
-
1592
- /* GPU is off so there's no need to perform flush/invalidate.
1593
- * But even if GPU is not actually powered down, after gpu_powered flag
1594
- * was set to false, it is still safe to skip the flush/invalidate.
1595
- * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE
1596
- * which is sent when address spaces are restored after gpu_powered flag
1597
- * is set to true. Flushing of L2 cache is certainly not required as L2
1598
- * cache is definitely off if gpu_powered is false.
2533
+ /* Imported allocations don't have metadata and therefore always ignore the
2534
+ * page migration logic.
15992535 */
1600
- if (!gpu_powered)
1601
- return;
2536
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
2537
+ &dirty_pgds, reg, true);
2538
+ if (err)
2539
+ return err;
16022540
1603
- if (kbase_pm_context_active_handle_suspend(kbdev,
1604
- KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
1605
- /* GPU has just been powered off due to system suspend.
1606
- * So again, no need to perform flush/invalidate.
1607
- */
1608
- return;
1609
- }
2541
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
16102542
1611
- /* AS transaction begin */
1612
- mutex_lock(&kbdev->mmu_hw_mutex);
1613
-
1614
- if (sync)
1615
- op = AS_COMMAND_FLUSH_MEM;
1616
- else
1617
- op = AS_COMMAND_FLUSH_PT;
1618
-
1619
- err = kbase_mmu_hw_do_operation(kbdev,
1620
- as, vpfn, nr, op, 0);
1621
-
1622
- if (err) {
1623
- /* Flush failed to complete, assume the GPU has hung and
1624
- * perform a reset to recover
1625
- */
1626
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
1627
-
1628
- if (kbase_prepare_to_reset_gpu(
1629
- kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
1630
- kbase_reset_gpu(kbdev);
1631
- }
1632
-
1633
- mutex_unlock(&kbdev->mmu_hw_mutex);
1634
- /* AS transaction end */
1635
-
1636
- kbase_pm_context_idle(kbdev);
2543
+ return 0;
16372544 }
16382545
1639
-static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
1640
- u64 vpfn, size_t nr, bool sync, int as_nr)
2546
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2547
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
2548
+ unsigned long flags, int as_nr, int const group_id,
2549
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
2550
+ struct kbase_va_region *reg)
16412551 {
1642
- /* Skip if there is nothing to do */
1643
- if (nr) {
1644
- kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
1645
- nr, sync);
1646
- }
1647
-}
1648
-
1649
-static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
1650
- u64 vpfn, size_t nr, bool sync)
1651
-{
1652
- struct kbase_device *kbdev;
1653
- bool ctx_is_in_runpool;
2552
+ int err;
2553
+ u64 dirty_pgds = 0;
16542554
16552555 /* Early out if there is nothing to do */
16562556 if (nr == 0)
1657
- return;
2557
+ return 0;
16582558
1659
- kbdev = kctx->kbdev;
1660
-#if !MALI_USE_CSF
1661
- mutex_lock(&kbdev->js_data.queue_mutex);
1662
- ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
1663
- mutex_unlock(&kbdev->js_data.queue_mutex);
1664
-#else
1665
- ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
1666
-#endif /* !MALI_USE_CSF */
2559
+ /* Memory aliases are always built on top of existing allocations,
2560
+ * therefore the state of physical pages shall be updated.
2561
+ */
2562
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
2563
+ &dirty_pgds, reg, false);
2564
+ if (err)
2565
+ return err;
16672566
1668
- if (ctx_is_in_runpool) {
1669
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
2567
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
16702568
1671
- kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
1672
- vpfn, nr, sync);
1673
-
1674
- release_ctx(kbdev, kctx);
1675
- }
2569
+ return 0;
16762570 }
16772571
16782572 void kbase_mmu_update(struct kbase_device *kbdev,
....@@ -1697,6 +2591,14 @@
16972591
16982592 void kbase_mmu_disable(struct kbase_context *kctx)
16992593 {
2594
+ /* Calls to this function are inherently asynchronous, with respect to
2595
+ * MMU operations.
2596
+ */
2597
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
2598
+ struct kbase_device *kbdev = kctx->kbdev;
2599
+ struct kbase_mmu_hw_op_param op_param = { 0 };
2600
+ int lock_err, flush_err;
2601
+
17002602 /* ASSERT that the context has a valid as_nr, which is only the case
17012603 * when it's scheduled in.
17022604 *
....@@ -1707,69 +2609,201 @@
17072609 lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
17082610 lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
17092611
1710
- /*
1711
- * The address space is being disabled, drain all knowledge of it out
1712
- * from the caches as pages and page tables might be freed after this.
1713
- *
1714
- * The job scheduler code will already be holding the locks and context
1715
- * so just do the flush.
1716
- */
1717
- kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
2612
+ op_param.vpfn = 0;
2613
+ op_param.nr = ~0;
2614
+ op_param.op = KBASE_MMU_OP_FLUSH_MEM;
2615
+ op_param.kctx_id = kctx->id;
2616
+ op_param.mmu_sync_info = mmu_sync_info;
17182617
1719
- kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
2618
+#if MALI_USE_CSF
2619
+ /* 0xF value used to prevent skipping of any levels when flushing */
2620
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
2621
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
2622
+#endif
2623
+
2624
+ /* lock MMU to prevent existing jobs on GPU from executing while the AS is
2625
+ * not yet disabled
2626
+ */
2627
+ lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
2628
+ if (lock_err)
2629
+ dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
2630
+ kctx->id);
2631
+
2632
+ /* Issue the flush command only when L2 cache is in stable power on state.
2633
+ * Any other state for L2 cache implies that shader cores are powered off,
2634
+ * which in turn implies there is no execution happening on the GPU.
2635
+ */
2636
+ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
2637
+ flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
2638
+ GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
2639
+ if (flush_err)
2640
+ dev_err(kbdev->dev,
2641
+ "Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
2642
+ kctx->as_nr, kctx->tgid, kctx->id);
2643
+ }
2644
+ kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
2645
+
2646
+ if (!lock_err) {
2647
+ /* unlock the MMU to allow it to resume */
2648
+ lock_err =
2649
+ kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
2650
+ if (lock_err)
2651
+ dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
2652
+ kctx->tgid, kctx->id);
2653
+ }
2654
+
2655
+#if !MALI_USE_CSF
2656
+ /*
2657
+ * JM GPUs has some L1 read only caches that need to be invalidated
2658
+ * with START_FLUSH configuration. Purge the MMU disabled kctx from
2659
+ * the slot_rb tracking field so such invalidation is performed when
2660
+ * a new katom is executed on the affected slots.
2661
+ */
2662
+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
2663
+#endif
17202664 }
17212665 KBASE_EXPORT_TEST_API(kbase_mmu_disable);
17222666
1723
-/*
1724
- * We actually only discard the ATE, and not the page table
1725
- * pages. There is a potential DoS here, as we'll leak memory by
1726
- * having PTEs that are potentially unused. Will require physical
1727
- * page accounting, so MMU pages are part of the process allocation.
1728
- *
1729
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
1730
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
1731
- * These locks must be taken in the correct order with respect to others
1732
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
1733
- * information.
1734
- */
1735
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
1736
- struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
2667
+static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
2668
+ struct kbase_mmu_table *mmut, phys_addr_t *pgds,
2669
+ u64 vpfn, int level,
2670
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds)
17372671 {
1738
- phys_addr_t pgd;
1739
- u64 start_vpfn = vpfn;
1740
- size_t requested_nr = nr;
1741
- struct kbase_mmu_mode const *mmu_mode;
1742
- int err = -EFAULT;
2672
+ int current_level;
17432673
1744
- if (nr == 0) {
1745
- /* early out if nothing to do */
1746
- return 0;
2674
+ lockdep_assert_held(&mmut->mmu_lock);
2675
+
2676
+ for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0);
2677
+ current_level--) {
2678
+ phys_addr_t current_pgd = pgds[current_level];
2679
+ struct page *p = phys_to_page(current_pgd);
2680
+ u64 *current_page = kmap(p);
2681
+ unsigned int current_valid_entries =
2682
+ kbdev->mmu_mode->get_num_valid_entries(current_page);
2683
+ int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
2684
+
2685
+ /* We need to track every level that needs updating */
2686
+ if (dirty_pgds)
2687
+ *dirty_pgds |= 1ULL << current_level;
2688
+
2689
+ kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
2690
+ if (current_valid_entries == 1 &&
2691
+ current_level != MIDGARD_MMU_LEVEL(0)) {
2692
+ kunmap(p);
2693
+
2694
+ /* Ensure the cacheline containing the last valid entry
2695
+ * of PGD is invalidated from the GPU cache, before the
2696
+ * PGD page is freed.
2697
+ */
2698
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
2699
+ current_pgd + (index * sizeof(u64)),
2700
+ sizeof(u64), flush_op);
2701
+
2702
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
2703
+ } else {
2704
+ current_valid_entries--;
2705
+
2706
+ kbdev->mmu_mode->set_num_valid_entries(
2707
+ current_page, current_valid_entries);
2708
+
2709
+ kunmap(p);
2710
+
2711
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
2712
+ kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64),
2713
+ flush_op);
2714
+ break;
2715
+ }
17472716 }
2717
+}
17482718
1749
- mutex_lock(&mmut->mmu_lock);
2719
+/**
2720
+ * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
2721
+ *
2722
+ * @kbdev: Pointer to kbase device.
2723
+ * @kctx: Pointer to kbase context.
2724
+ * @as_nr: Address space number, for GPU cache maintenance operations
2725
+ * that happen outside a specific kbase context.
2726
+ * @phys: Array of physical pages to flush.
2727
+ * @phys_page_nr: Number of physical pages to flush.
2728
+ * @op_param: Non-NULL pointer to struct containing information about the flush
2729
+ * operation to perform.
2730
+ *
2731
+ * This function will do one of three things:
2732
+ * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
2733
+ * individual pages that were unmapped if feature is supported on GPU.
2734
+ * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
2735
+ * supported on GPU or,
2736
+ * 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
2737
+ *
2738
+ * When performing a partial GPU cache flush, the number of physical
2739
+ * pages does not have to be identical to the number of virtual pages on the MMU,
2740
+ * to support a single physical address flush for an aliased page.
2741
+ */
2742
+static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
2743
+ struct kbase_context *kctx, int as_nr,
2744
+ struct tagged_addr *phys, size_t phys_page_nr,
2745
+ struct kbase_mmu_hw_op_param *op_param)
2746
+{
2747
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
2748
+ /* Full cache flush through the MMU_COMMAND */
2749
+ mmu_flush_invalidate(kbdev, kctx, as_nr, op_param);
2750
+ } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
2751
+ /* Full cache flush through the GPU_CONTROL */
2752
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param);
2753
+ }
2754
+#if MALI_USE_CSF
2755
+ else {
2756
+ /* Partial GPU cache flush with MMU cache invalidation */
2757
+ unsigned long irq_flags;
2758
+ unsigned int i;
2759
+ bool flush_done = false;
17502760
1751
- mmu_mode = kbdev->mmu_mode;
2761
+ mmu_invalidate(kbdev, kctx, as_nr, op_param);
2762
+
2763
+ for (i = 0; !flush_done && i < phys_page_nr; i++) {
2764
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
2765
+ if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
2766
+ mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
2767
+ KBASE_MMU_OP_FLUSH_MEM);
2768
+ else
2769
+ flush_done = true;
2770
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
2771
+ }
2772
+ }
2773
+#endif
2774
+}
2775
+
2776
+static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2777
+ u64 vpfn, size_t nr, u64 *dirty_pgds,
2778
+ struct list_head *free_pgds_list,
2779
+ enum kbase_mmu_op_type flush_op)
2780
+{
2781
+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
2782
+
2783
+ lockdep_assert_held(&mmut->mmu_lock);
2784
+ kbase_mmu_reset_free_pgds_list(mmut);
17522785
17532786 while (nr) {
1754
- unsigned int i;
17552787 unsigned int index = vpfn & 0x1FF;
17562788 unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
17572789 unsigned int pcount;
17582790 int level;
17592791 u64 *page;
2792
+ phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
2793
+ register unsigned int num_of_valid_entries;
2794
+ phys_addr_t pgd = mmut->pgd;
2795
+ struct page *p = phys_to_page(pgd);
17602796
17612797 if (count > nr)
17622798 count = nr;
17632799
1764
- /* need to check if this is a 2MB or a 4kB page */
1765
- pgd = mmut->pgd;
1766
-
2800
+ /* need to check if this is a 2MB page or a 4kB */
17672801 for (level = MIDGARD_MMU_TOPLEVEL;
17682802 level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
17692803 phys_addr_t next_pgd;
17702804
17712805 index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
1772
- page = kmap(phys_to_page(pgd));
2806
+ page = kmap(p);
17732807 if (mmu_mode->ate_is_valid(page[index], level))
17742808 break; /* keep the mapping */
17752809 else if (!mmu_mode->pte_is_valid(page[index], level)) {
....@@ -1792,27 +2826,31 @@
17922826 count = nr;
17932827 goto next;
17942828 }
1795
- next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
1796
- kunmap(phys_to_page(pgd));
2829
+ next_pgd = mmu_mode->pte_to_phy_addr(
2830
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
2831
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index]));
2832
+ kunmap(p);
2833
+ pgds[level] = pgd;
17972834 pgd = next_pgd;
2835
+ p = phys_to_page(pgd);
17982836 }
17992837
18002838 switch (level) {
18012839 case MIDGARD_MMU_LEVEL(0):
18022840 case MIDGARD_MMU_LEVEL(1):
1803
- dev_warn(kbdev->dev,
1804
- "%s: No support for ATEs at level %d\n",
1805
- __func__, level);
1806
- kunmap(phys_to_page(pgd));
2841
+ dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
2842
+ level);
2843
+ kunmap(p);
18072844 goto out;
18082845 case MIDGARD_MMU_LEVEL(2):
18092846 /* can only teardown if count >= 512 */
18102847 if (count >= 512) {
18112848 pcount = 1;
18122849 } else {
1813
- dev_warn(kbdev->dev,
1814
- "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
1815
- __func__, count);
2850
+ dev_warn(
2851
+ kbdev->dev,
2852
+ "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down",
2853
+ __func__, count);
18162854 pcount = 0;
18172855 }
18182856 break;
....@@ -1821,72 +2859,177 @@
18212859 pcount = count;
18222860 break;
18232861 default:
1824
- dev_err(kbdev->dev,
1825
- "%s: found non-mapped memory, early out\n",
1826
- __func__);
2862
+ dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__);
18272863 vpfn += count;
18282864 nr -= count;
18292865 continue;
18302866 }
18312867
2868
+ if (pcount > 0)
2869
+ *dirty_pgds |= 1ULL << level;
2870
+
2871
+ num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
2872
+ if (WARN_ON_ONCE(num_of_valid_entries < pcount))
2873
+ num_of_valid_entries = 0;
2874
+ else
2875
+ num_of_valid_entries -= pcount;
2876
+
18322877 /* Invalidate the entries we added */
1833
- for (i = 0; i < pcount; i++)
1834
- mmu_mode->entry_invalidate(&page[index + i]);
2878
+ mmu_mode->entries_invalidate(&page[index], pcount);
18352879
1836
- kbase_mmu_sync_pgd(kbdev,
1837
- kbase_dma_addr(phys_to_page(pgd)) +
1838
- 8 * index, 8*pcount);
2880
+ if (!num_of_valid_entries) {
2881
+ kunmap(p);
18392882
2883
+ /* Ensure the cacheline(s) containing the last valid entries
2884
+ * of PGD is invalidated from the GPU cache, before the
2885
+ * PGD page is freed.
2886
+ */
2887
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
2888
+ pgd + (index * sizeof(u64)),
2889
+ pcount * sizeof(u64), flush_op);
2890
+
2891
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
2892
+
2893
+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
2894
+ flush_op, dirty_pgds);
2895
+
2896
+ vpfn += count;
2897
+ nr -= count;
2898
+ continue;
2899
+ }
2900
+
2901
+ mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
2902
+
2903
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
2904
+ kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64),
2905
+ flush_op);
18402906 next:
1841
- kunmap(phys_to_page(pgd));
2907
+ kunmap(p);
18422908 vpfn += count;
18432909 nr -= count;
18442910 }
1845
- err = 0;
18462911 out:
1847
- mutex_unlock(&mmut->mmu_lock);
2912
+ return 0;
2913
+}
18482914
1849
- if (mmut->kctx)
1850
- kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr,
1851
- true);
1852
- else
1853
- kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr,
1854
- true, as_nr);
2915
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
2916
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
2917
+ int as_nr, bool ignore_page_migration)
2918
+{
2919
+ u64 start_vpfn = vpfn;
2920
+ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
2921
+ struct kbase_mmu_hw_op_param op_param;
2922
+ int err = -EFAULT;
2923
+ u64 dirty_pgds = 0;
2924
+ LIST_HEAD(free_pgds_list);
2925
+
2926
+ /* Calls to this function are inherently asynchronous, with respect to
2927
+ * MMU operations.
2928
+ */
2929
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
2930
+
2931
+ /* This function performs two operations: MMU maintenance and flushing
2932
+ * the caches. To ensure internal consistency between the caches and the
2933
+ * MMU, it does not make sense to be able to flush only the physical pages
2934
+ * from the cache and keep the PTE, nor does it make sense to use this
2935
+ * function to remove a PTE and keep the physical pages in the cache.
2936
+ *
2937
+ * However, we have legitimate cases where we can try to tear down a mapping
2938
+ * with zero virtual and zero physical pages, so we must have the following
2939
+ * behaviour:
2940
+ * - if both physical and virtual page counts are zero, return early
2941
+ * - if either physical and virtual page counts are zero, return early
2942
+ * - if there are fewer physical pages than virtual pages, return -EINVAL
2943
+ */
2944
+ if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0))
2945
+ return 0;
2946
+
2947
+ if (unlikely(nr_virt_pages < nr_phys_pages))
2948
+ return -EINVAL;
2949
+
2950
+ /* MMU cache flush strategy depends on the number of pages to unmap. In both cases
2951
+ * the operation is invalidate but the granularity of cache maintenance may change
2952
+ * according to the situation.
2953
+ *
2954
+ * If GPU control command operations are present and the number of pages is "small",
2955
+ * then the optimal strategy is flushing on the physical address range of the pages
2956
+ * which are affected by the operation. That implies both the PGDs which are modified
2957
+ * or removed from the page table and the physical pages which are freed from memory.
2958
+ *
2959
+ * Otherwise, there's no alternative to invalidating the whole GPU cache.
2960
+ */
2961
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys &&
2962
+ nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
2963
+ flush_op = KBASE_MMU_OP_FLUSH_PT;
2964
+
2965
+ mutex_lock(&mmut->mmu_lock);
2966
+
2967
+ err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
2968
+ &free_pgds_list, flush_op);
2969
+
2970
+ /* Set up MMU operation parameters. See above about MMU cache flush strategy. */
2971
+ op_param = (struct kbase_mmu_hw_op_param){
2972
+ .vpfn = start_vpfn,
2973
+ .nr = nr_virt_pages,
2974
+ .mmu_sync_info = mmu_sync_info,
2975
+ .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
2976
+ .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
2977
+ KBASE_MMU_OP_FLUSH_MEM,
2978
+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
2979
+ };
2980
+ mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages,
2981
+ &op_param);
2982
+
2983
+ /* If page migration is enabled: the status of all physical pages involved
2984
+ * shall be updated, unless they are not movable. Their status shall be
2985
+ * updated before releasing the lock to protect against concurrent
2986
+ * requests to migrate the pages, if they have been isolated.
2987
+ */
2988
+ if (kbase_page_migration_enabled && phys && !ignore_page_migration)
2989
+ kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
2990
+
2991
+ kbase_mmu_free_pgds_list(kbdev, mmut);
2992
+
2993
+ mutex_unlock(&mmut->mmu_lock);
18552994
18562995 return err;
18572996 }
1858
-
18592997 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
18602998
18612999 /**
1862
- * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU
3000
+ * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
3001
+ * page table entries
18633002 *
1864
- * This will update page table entries that already exist on the GPU based on
1865
- * the new flags that are passed. It is used as a response to the changes of
1866
- * the memory attributes
1867
- *
1868
- * The caller is responsible for validating the memory attributes
1869
- *
1870
- * @kctx: Kbase context
3003
+ * @kbdev: Pointer to kbase device.
3004
+ * @mmut: The involved MMU table
18713005 * @vpfn: Virtual PFN (Page Frame Number) of the first page to update
1872
- * @phys: Tagged physical addresses of the physical pages to replace the
1873
- * current mappings
3006
+ * @phys: Pointer to the array of tagged physical addresses of the physical
3007
+ * pages that are pointed to by the page table entries (that need to
3008
+ * be updated). The pointer should be within the reg->gpu_alloc->pages
3009
+ * array.
18743010 * @nr: Number of pages to update
18753011 * @flags: Flags
18763012 * @group_id: The physical memory group in which the page was allocated.
18773013 * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
3014
+ * @dirty_pgds: Flags to track every level where a PGD has been updated.
3015
+ *
3016
+ * This will update page table entries that already exist on the GPU based on
3017
+ * new flags and replace any existing phy pages that are passed (the PGD pages
3018
+ * remain unchanged). It is used as a response to the changes of phys as well
3019
+ * as the the memory attributes.
3020
+ *
3021
+ * The caller is responsible for validating the memory attributes.
3022
+ *
3023
+ * Return: 0 if the attributes data in page table entries were updated
3024
+ * successfully, otherwise an error code.
18783025 */
1879
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
1880
- struct tagged_addr *phys, size_t nr,
1881
- unsigned long flags, int const group_id)
3026
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
3027
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
3028
+ unsigned long flags, int const group_id, u64 *dirty_pgds)
18823029 {
18833030 phys_addr_t pgd;
18843031 u64 *pgd_page;
18853032 int err;
1886
- struct kbase_device *kbdev;
1887
-
1888
- if (WARN_ON(kctx == NULL))
1889
- return -EINVAL;
18903033
18913034 KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
18923035
....@@ -1894,146 +3037,550 @@
18943037 if (nr == 0)
18953038 return 0;
18963039
1897
- mutex_lock(&kctx->mmu.mmu_lock);
1898
-
1899
- kbdev = kctx->kbdev;
3040
+ mutex_lock(&mmut->mmu_lock);
19003041
19013042 while (nr) {
19023043 unsigned int i;
19033044 unsigned int index = vpfn & 0x1FF;
19043045 size_t count = KBASE_MMU_PAGE_ENTRIES - index;
19053046 struct page *p;
3047
+ register unsigned int num_of_valid_entries;
3048
+ int cur_level = MIDGARD_MMU_BOTTOMLEVEL;
19063049
19073050 if (count > nr)
19083051 count = nr;
19093052
1910
- do {
1911
- err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
1912
- vpfn, &pgd);
1913
- if (err != -ENOMEM)
1914
- break;
1915
- /* Fill the memory pool with enough pages for
1916
- * the page walk to succeed
1917
- */
1918
- mutex_unlock(&kctx->mmu.mmu_lock);
1919
- err = kbase_mem_pool_grow(
1920
-#ifdef CONFIG_MALI_2MB_ALLOC
1921
- &kbdev->mem_pools.large[
1922
-#else
1923
- &kbdev->mem_pools.small[
1924
-#endif
1925
- kctx->mmu.group_id],
1926
- MIDGARD_MMU_BOTTOMLEVEL);
1927
- mutex_lock(&kctx->mmu.mmu_lock);
1928
- } while (!err);
1929
- if (err) {
1930
- dev_warn(kbdev->dev,
1931
- "mmu_get_bottom_pgd failure\n");
3053
+ if (is_huge(*phys) && (index == index_in_large_page(*phys)))
3054
+ cur_level = MIDGARD_MMU_LEVEL(2);
3055
+
3056
+ err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd);
3057
+ if (WARN_ON(err))
19323058 goto fail_unlock;
1933
- }
19343059
19353060 p = pfn_to_page(PFN_DOWN(pgd));
19363061 pgd_page = kmap(p);
19373062 if (!pgd_page) {
1938
- dev_warn(kbdev->dev, "kmap failure\n");
3063
+ dev_warn(kbdev->dev, "kmap failure on update_pages");
19393064 err = -ENOMEM;
19403065 goto fail_unlock;
19413066 }
19423067
1943
- for (i = 0; i < count; i++)
1944
- pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
1945
- phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
1946
- group_id);
3068
+ num_of_valid_entries =
3069
+ kbdev->mmu_mode->get_num_valid_entries(pgd_page);
3070
+
3071
+ if (cur_level == MIDGARD_MMU_LEVEL(2)) {
3072
+ int level_index = (vpfn >> 9) & 0x1FF;
3073
+ struct tagged_addr *target_phys =
3074
+ phys - index_in_large_page(*phys);
3075
+
3076
+#ifdef CONFIG_MALI_BIFROST_DEBUG
3077
+ WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
3078
+ pgd_page[level_index], MIDGARD_MMU_LEVEL(2)));
3079
+#endif
3080
+ pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
3081
+ *target_phys, flags, MIDGARD_MMU_LEVEL(2),
3082
+ group_id);
3083
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
3084
+ kbase_dma_addr(p) + (level_index * sizeof(u64)),
3085
+ sizeof(u64), KBASE_MMU_OP_NONE);
3086
+ } else {
3087
+ for (i = 0; i < count; i++) {
3088
+#ifdef CONFIG_MALI_BIFROST_DEBUG
3089
+ WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
3090
+ pgd_page[index + i],
3091
+ MIDGARD_MMU_BOTTOMLEVEL));
3092
+#endif
3093
+ pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
3094
+ phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
3095
+ group_id);
3096
+ }
3097
+
3098
+ /* MMU cache flush strategy is NONE because GPU cache maintenance
3099
+ * will be done by the caller.
3100
+ */
3101
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
3102
+ kbase_dma_addr(p) + (index * sizeof(u64)),
3103
+ count * sizeof(u64), KBASE_MMU_OP_NONE);
3104
+ }
3105
+
3106
+ kbdev->mmu_mode->set_num_valid_entries(pgd_page,
3107
+ num_of_valid_entries);
3108
+
3109
+ if (dirty_pgds && count > 0)
3110
+ *dirty_pgds |= 1ULL << cur_level;
19473111
19483112 phys += count;
19493113 vpfn += count;
19503114 nr -= count;
19513115
1952
- kbase_mmu_sync_pgd(kbdev,
1953
- kbase_dma_addr(p) + (index * sizeof(u64)),
1954
- count * sizeof(u64));
1955
-
1956
- kunmap(pfn_to_page(PFN_DOWN(pgd)));
3116
+ kunmap(p);
19573117 }
19583118
1959
- mutex_unlock(&kctx->mmu.mmu_lock);
3119
+ mutex_unlock(&mmut->mmu_lock);
19603120 return 0;
19613121
19623122 fail_unlock:
1963
- mutex_unlock(&kctx->mmu.mmu_lock);
3123
+ mutex_unlock(&mmut->mmu_lock);
19643124 return err;
19653125 }
19663126
1967
-int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
1968
- struct tagged_addr *phys, size_t nr,
1969
- unsigned long flags, int const group_id)
3127
+static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx,
3128
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
3129
+ unsigned long flags, int const group_id)
19703130 {
19713131 int err;
3132
+ struct kbase_mmu_hw_op_param op_param;
3133
+ u64 dirty_pgds = 0;
3134
+ struct kbase_mmu_table *mmut;
3135
+ /* Calls to this function are inherently asynchronous, with respect to
3136
+ * MMU operations.
3137
+ */
3138
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
3139
+ int as_nr;
19723140
1973
- err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags,
1974
- group_id);
1975
- kbase_mmu_flush_invalidate(kctx, vpfn, nr, true);
3141
+#if !MALI_USE_CSF
3142
+ if (unlikely(kctx == NULL))
3143
+ return -EINVAL;
3144
+
3145
+ as_nr = kctx->as_nr;
3146
+ mmut = &kctx->mmu;
3147
+#else
3148
+ if (kctx) {
3149
+ mmut = &kctx->mmu;
3150
+ as_nr = kctx->as_nr;
3151
+ } else {
3152
+ mmut = &kbdev->csf.mcu_mmu;
3153
+ as_nr = MCU_AS_NR;
3154
+ }
3155
+#endif
3156
+
3157
+ err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
3158
+ &dirty_pgds);
3159
+
3160
+ op_param = (const struct kbase_mmu_hw_op_param){
3161
+ .vpfn = vpfn,
3162
+ .nr = nr,
3163
+ .op = KBASE_MMU_OP_FLUSH_MEM,
3164
+ .kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
3165
+ .mmu_sync_info = mmu_sync_info,
3166
+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
3167
+ };
3168
+
3169
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
3170
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param);
3171
+ else
3172
+ mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param);
3173
+
19763174 return err;
19773175 }
19783176
1979
-static void mmu_teardown_level(struct kbase_device *kbdev,
1980
- struct kbase_mmu_table *mmut, phys_addr_t pgd,
1981
- int level, u64 *pgd_page_buffer)
3177
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys,
3178
+ size_t nr, unsigned long flags, int const group_id)
19823179 {
1983
- phys_addr_t target_pgd;
1984
- struct page *p;
3180
+ if (unlikely(kctx == NULL))
3181
+ return -EINVAL;
3182
+
3183
+ return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id);
3184
+}
3185
+
3186
+#if MALI_USE_CSF
3187
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
3188
+ size_t nr, unsigned long flags, int const group_id)
3189
+{
3190
+ return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id);
3191
+}
3192
+#endif /* MALI_USE_CSF */
3193
+
3194
+static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
3195
+{
3196
+ lockdep_assert_held(&kbdev->hwaccess_lock);
3197
+
3198
+ WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress);
3199
+ kbdev->mmu_page_migrate_in_progress = true;
3200
+}
3201
+
3202
+static void mmu_page_migration_transaction_end(struct kbase_device *kbdev)
3203
+{
3204
+ lockdep_assert_held(&kbdev->hwaccess_lock);
3205
+ WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress);
3206
+ kbdev->mmu_page_migrate_in_progress = false;
3207
+ /* Invoke the PM state machine, as the MMU page migration session
3208
+ * may have deferred a transition in L2 state machine.
3209
+ */
3210
+ kbase_pm_update_state(kbdev);
3211
+}
3212
+
3213
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
3214
+ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level)
3215
+{
3216
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys));
3217
+ struct kbase_mmu_hw_op_param op_param;
3218
+ struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
3219
+ page_md->data.mapped.mmut :
3220
+ page_md->data.pt_mapped.mmut;
3221
+ struct kbase_device *kbdev;
3222
+ phys_addr_t pgd;
3223
+ u64 *old_page, *new_page, *pgd_page, *target, vpfn;
3224
+ int index, check_state, ret = 0;
3225
+ unsigned long hwaccess_flags = 0;
3226
+ unsigned int num_of_valid_entries;
3227
+ u8 vmap_count = 0;
3228
+
3229
+ /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
3230
+ * here we skip the no kctx case, which is only used with MCU's mmut.
3231
+ */
3232
+ if (!mmut->kctx)
3233
+ return -EINVAL;
3234
+
3235
+ if (level > MIDGARD_MMU_BOTTOMLEVEL)
3236
+ return -EINVAL;
3237
+ else if (level == MIDGARD_MMU_BOTTOMLEVEL)
3238
+ vpfn = page_md->data.mapped.vpfn;
3239
+ else
3240
+ vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level);
3241
+
3242
+ kbdev = mmut->kctx->kbdev;
3243
+ index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
3244
+
3245
+ /* Create all mappings before copying content.
3246
+ * This is done as early as possible because is the only operation that may
3247
+ * fail. It is possible to do this before taking any locks because the
3248
+ * pages to migrate are not going to change and even the parent PGD is not
3249
+ * going to be affected by any other concurrent operation, since the page
3250
+ * has been isolated before migration and therefore it cannot disappear in
3251
+ * the middle of this function.
3252
+ */
3253
+ old_page = kmap(as_page(old_phys));
3254
+ if (!old_page) {
3255
+ dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
3256
+ ret = -EINVAL;
3257
+ goto old_page_map_error;
3258
+ }
3259
+
3260
+ new_page = kmap(as_page(new_phys));
3261
+ if (!new_page) {
3262
+ dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
3263
+ ret = -EINVAL;
3264
+ goto new_page_map_error;
3265
+ }
3266
+
3267
+ /* GPU cache maintenance affects both memory content and page table,
3268
+ * but at two different stages. A single virtual memory page is affected
3269
+ * by the migration.
3270
+ *
3271
+ * Notice that the MMU maintenance is done in the following steps:
3272
+ *
3273
+ * 1) The MMU region is locked without performing any other operation.
3274
+ * This lock must cover the entire migration process, in order to
3275
+ * prevent any GPU access to the virtual page whose physical page
3276
+ * is being migrated.
3277
+ * 2) Immediately after locking: the MMU region content is flushed via
3278
+ * GPU control while the lock is taken and without unlocking.
3279
+ * The region must stay locked for the duration of the whole page
3280
+ * migration procedure.
3281
+ * This is necessary to make sure that pending writes to the old page
3282
+ * are finalized before copying content to the new page.
3283
+ * 3) Before unlocking: changes to the page table are flushed.
3284
+ * Finer-grained GPU control operations are used if possible, otherwise
3285
+ * the whole GPU cache shall be flushed again.
3286
+ * This is necessary to make sure that the GPU accesses the new page
3287
+ * after migration.
3288
+ * 4) The MMU region is unlocked.
3289
+ */
3290
+#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1))
3291
+ op_param.mmu_sync_info = CALLER_MMU_ASYNC;
3292
+ op_param.kctx_id = mmut->kctx->id;
3293
+ op_param.vpfn = vpfn & PGD_VPFN_MASK(level);
3294
+ op_param.nr = 1 << ((3 - level) * 9);
3295
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
3296
+ /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */
3297
+ op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
3298
+ pgd_level_to_skip_flush(1ULL << level) :
3299
+ pgd_level_to_skip_flush(3ULL << level);
3300
+
3301
+ mutex_lock(&mmut->mmu_lock);
3302
+
3303
+ /* The state was evaluated before entering this function, but it could
3304
+ * have changed before the mmu_lock was taken. However, the state
3305
+ * transitions which are possible at this point are only two, and in both
3306
+ * cases it is a stable state progressing to a "free in progress" state.
3307
+ *
3308
+ * After taking the mmu_lock the state can no longer change: read it again
3309
+ * and make sure that it hasn't changed before continuing.
3310
+ */
3311
+ spin_lock(&page_md->migrate_lock);
3312
+ check_state = PAGE_STATUS_GET(page_md->status);
3313
+ if (level == MIDGARD_MMU_BOTTOMLEVEL)
3314
+ vmap_count = page_md->vmap_count;
3315
+ spin_unlock(&page_md->migrate_lock);
3316
+
3317
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
3318
+ if (check_state != ALLOCATED_MAPPED) {
3319
+ dev_dbg(kbdev->dev,
3320
+ "%s: state changed to %d (was %d), abort page migration", __func__,
3321
+ check_state, ALLOCATED_MAPPED);
3322
+ ret = -EAGAIN;
3323
+ goto page_state_change_out;
3324
+ } else if (vmap_count > 0) {
3325
+ dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration",
3326
+ __func__);
3327
+ ret = -EAGAIN;
3328
+ goto page_state_change_out;
3329
+ }
3330
+ } else {
3331
+ if (check_state != PT_MAPPED) {
3332
+ dev_dbg(kbdev->dev,
3333
+ "%s: state changed to %d (was %d), abort PGD page migration",
3334
+ __func__, check_state, PT_MAPPED);
3335
+ WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS);
3336
+ ret = -EAGAIN;
3337
+ goto page_state_change_out;
3338
+ }
3339
+ }
3340
+
3341
+ ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd);
3342
+ if (ret) {
3343
+ dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
3344
+ goto get_pgd_at_level_error;
3345
+ }
3346
+
3347
+ pgd_page = kmap(phys_to_page(pgd));
3348
+ if (!pgd_page) {
3349
+ dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
3350
+ ret = -EINVAL;
3351
+ goto pgd_page_map_error;
3352
+ }
3353
+
3354
+ mutex_lock(&kbdev->pm.lock);
3355
+ mutex_lock(&kbdev->mmu_hw_mutex);
3356
+
3357
+ /* Lock MMU region and flush GPU cache by using GPU control,
3358
+ * in order to keep MMU region locked.
3359
+ */
3360
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
3361
+ if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) {
3362
+ /* Defer the migration as L2 is in a transitional phase */
3363
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3364
+ mutex_unlock(&kbdev->mmu_hw_mutex);
3365
+ mutex_unlock(&kbdev->pm.lock);
3366
+ dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
3367
+ ret = -EAGAIN;
3368
+ goto l2_state_defer_out;
3369
+ }
3370
+ /* Prevent transitional phases in L2 by starting the transaction */
3371
+ mmu_page_migration_transaction_begin(kbdev);
3372
+ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
3373
+ int as_nr = mmut->kctx->as_nr;
3374
+ struct kbase_as *as = &kbdev->as[as_nr];
3375
+
3376
+ ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param);
3377
+ if (!ret) {
3378
+ ret = kbase_gpu_cache_flush_and_busy_wait(
3379
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
3380
+ }
3381
+ if (ret)
3382
+ mmu_page_migration_transaction_end(kbdev);
3383
+ }
3384
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3385
+
3386
+ if (ret < 0) {
3387
+ mutex_unlock(&kbdev->mmu_hw_mutex);
3388
+ mutex_unlock(&kbdev->pm.lock);
3389
+ dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
3390
+ goto undo_mappings;
3391
+ }
3392
+
3393
+ /* Copy memory content.
3394
+ *
3395
+ * It is necessary to claim the ownership of the DMA buffer for the old
3396
+ * page before performing the copy, to make sure of reading a consistent
3397
+ * version of its content, before copying. After the copy, ownership of
3398
+ * the DMA buffer for the new page is given to the GPU in order to make
3399
+ * the content visible to potential GPU access that may happen as soon as
3400
+ * this function releases the lock on the MMU region.
3401
+ */
3402
+ dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
3403
+ memcpy(new_page, old_page, PAGE_SIZE);
3404
+ dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
3405
+
3406
+ /* Remap GPU virtual page.
3407
+ *
3408
+ * This code rests on the assumption that page migration is only enabled
3409
+ * for 4 kB pages, that necessarily live in the bottom level of the MMU
3410
+ * page table. For this reason, the PGD level tells us inequivocably
3411
+ * whether the page being migrated is a "content page" or another PGD
3412
+ * of the page table:
3413
+ *
3414
+ * - Bottom level implies ATE (Address Translation Entry)
3415
+ * - Any other level implies PTE (Page Table Entry)
3416
+ *
3417
+ * The current implementation doesn't handle the case of a level 0 PGD,
3418
+ * that is: the root PGD of the page table.
3419
+ */
3420
+ target = &pgd_page[index];
3421
+
3422
+ /* Certain entries of a page table page encode the count of valid entries
3423
+ * present in that page. So need to save & restore the count information
3424
+ * when updating the PTE/ATE to point to the new page.
3425
+ */
3426
+ num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
3427
+
3428
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
3429
+ WARN_ON_ONCE((*target & 1UL) == 0);
3430
+ *target =
3431
+ kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags,
3432
+ level, page_md->data.mapped.reg->gpu_alloc->group_id);
3433
+ } else {
3434
+ u64 managed_pte;
3435
+
3436
+#ifdef CONFIG_MALI_BIFROST_DEBUG
3437
+ /* The PTE should be pointing to the page being migrated */
3438
+ WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr(
3439
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
3440
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
3441
+#endif
3442
+ kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
3443
+ *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
3444
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
3445
+ }
3446
+
3447
+ kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
3448
+
3449
+ /* This function always updates a single entry inside an existing PGD,
3450
+ * therefore cache maintenance is necessary and affects a single entry.
3451
+ */
3452
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
3453
+ kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64),
3454
+ KBASE_MMU_OP_FLUSH_PT);
3455
+
3456
+ /* Unlock MMU region.
3457
+ *
3458
+ * Notice that GPUs which don't issue flush commands via GPU control
3459
+ * still need an additional GPU cache flush here, this time only
3460
+ * for the page table, because the function call above to sync PGDs
3461
+ * won't have any effect on them.
3462
+ */
3463
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
3464
+ if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
3465
+ int as_nr = mmut->kctx->as_nr;
3466
+ struct kbase_as *as = &kbdev->as[as_nr];
3467
+
3468
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
3469
+ ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
3470
+ } else {
3471
+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
3472
+ GPU_COMMAND_CACHE_CLN_INV_L2);
3473
+ if (!ret)
3474
+ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
3475
+ }
3476
+ }
3477
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3478
+ /* Releasing locks before checking the migration transaction error state */
3479
+ mutex_unlock(&kbdev->mmu_hw_mutex);
3480
+ mutex_unlock(&kbdev->pm.lock);
3481
+
3482
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
3483
+ /* Release the transition prevention in L2 by ending the transaction */
3484
+ mmu_page_migration_transaction_end(kbdev);
3485
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3486
+
3487
+ /* Checking the final migration transaction error state */
3488
+ if (ret < 0) {
3489
+ dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
3490
+ goto undo_mappings;
3491
+ }
3492
+
3493
+ /* Undertaking metadata transfer, while we are holding the mmu_lock */
3494
+ spin_lock(&page_md->migrate_lock);
3495
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
3496
+ size_t page_array_index =
3497
+ page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn;
3498
+
3499
+ WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED);
3500
+
3501
+ /* Replace page in array of pages of the physical allocation. */
3502
+ page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
3503
+ }
3504
+ /* Update the new page dma_addr with the transferred metadata from the old_page */
3505
+ page_md->dma_addr = new_dma_addr;
3506
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
3507
+ spin_unlock(&page_md->migrate_lock);
3508
+ set_page_private(as_page(new_phys), (unsigned long)page_md);
3509
+ /* Old page metatdata pointer cleared as it now owned by the new page */
3510
+ set_page_private(as_page(old_phys), 0);
3511
+
3512
+l2_state_defer_out:
3513
+ kunmap(phys_to_page(pgd));
3514
+pgd_page_map_error:
3515
+get_pgd_at_level_error:
3516
+page_state_change_out:
3517
+ mutex_unlock(&mmut->mmu_lock);
3518
+
3519
+ kunmap(as_page(new_phys));
3520
+new_page_map_error:
3521
+ kunmap(as_page(old_phys));
3522
+old_page_map_error:
3523
+ return ret;
3524
+
3525
+undo_mappings:
3526
+ /* Unlock the MMU table and undo mappings. */
3527
+ mutex_unlock(&mmut->mmu_lock);
3528
+ kunmap(phys_to_page(pgd));
3529
+ kunmap(as_page(new_phys));
3530
+ kunmap(as_page(old_phys));
3531
+
3532
+ return ret;
3533
+}
3534
+
3535
+static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
3536
+ phys_addr_t pgd, unsigned int level)
3537
+{
19853538 u64 *pgd_page;
19863539 int i;
1987
- struct kbase_mmu_mode const *mmu_mode;
3540
+ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
3541
+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
3542
+ u64 *pgd_page_buffer = NULL;
3543
+ struct page *p = phys_to_page(pgd);
19883544
19893545 lockdep_assert_held(&mmut->mmu_lock);
19903546
1991
- pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
3547
+ pgd_page = kmap_atomic(p);
19923548 /* kmap_atomic should NEVER fail. */
1993
- if (WARN_ON(pgd_page == NULL))
3549
+ if (WARN_ON_ONCE(pgd_page == NULL))
19943550 return;
1995
- /* Copy the page to our preallocated buffer so that we can minimize
1996
- * kmap_atomic usage
3551
+ if (level < MIDGARD_MMU_BOTTOMLEVEL) {
3552
+ /* Copy the page to our preallocated buffer so that we can minimize
3553
+ * kmap_atomic usage
3554
+ */
3555
+ pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
3556
+ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
3557
+ }
3558
+
3559
+ /* When page migration is enabled, kbase_region_tracker_term() would ensure
3560
+ * there are no pages left mapped on the GPU for a context. Hence the count
3561
+ * of valid entries is expected to be zero here.
19973562 */
1998
- memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
3563
+ if (kbase_page_migration_enabled && mmut->kctx)
3564
+ WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
3565
+ /* Invalidate page after copying */
3566
+ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
19993567 kunmap_atomic(pgd_page);
20003568 pgd_page = pgd_page_buffer;
20013569
2002
- mmu_mode = kbdev->mmu_mode;
2003
-
2004
- for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
2005
- target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
2006
-
2007
- if (target_pgd) {
3570
+ if (level < MIDGARD_MMU_BOTTOMLEVEL) {
3571
+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
20083572 if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
2009
- mmu_teardown_level(kbdev, mmut,
2010
- target_pgd,
2011
- level + 1,
2012
- pgd_page_buffer +
2013
- (PAGE_SIZE / sizeof(u64)));
3573
+ phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr(
3574
+ mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev,
3575
+ MGM_DEFAULT_PTE_GROUP,
3576
+ level, pgd_page[i]));
3577
+
3578
+ mmu_teardown_level(kbdev, mmut, target_pgd, level + 1);
20143579 }
20153580 }
20163581 }
20173582
2018
- p = pfn_to_page(PFN_DOWN(pgd));
2019
-#ifdef CONFIG_MALI_2MB_ALLOC
2020
- kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id],
2021
-#else
2022
- kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
2023
-#endif
2024
- p, true);
2025
-
2026
- atomic_sub(1, &kbdev->memdev.used_pages);
2027
-
2028
- /* If MMU tables belong to a context then pages will have been accounted
2029
- * against it, so we must decrement the usage counts here.
2030
- */
2031
- if (mmut->kctx) {
2032
- kbase_process_page_usage_dec(mmut->kctx, 1);
2033
- atomic_sub(1, &mmut->kctx->used_pages);
2034
- }
2035
-
2036
- kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
3583
+ kbase_mmu_free_pgd(kbdev, mmut, pgd);
20373584 }
20383585
20393586 int kbase_mmu_init(struct kbase_device *const kbdev,
....@@ -2044,31 +3591,26 @@
20443591 WARN_ON(group_id < 0))
20453592 return -EINVAL;
20463593
3594
+ compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)),
3595
+ "List of free PGDs may not be large enough.");
3596
+ compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL,
3597
+ "Array of MMU levels is not large enough.");
3598
+
20473599 mmut->group_id = group_id;
20483600 mutex_init(&mmut->mmu_lock);
20493601 mmut->kctx = kctx;
3602
+ mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS;
20503603
2051
- /* Preallocate MMU depth of four pages for mmu_teardown_level to use */
2052
- mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
2053
-
2054
- if (mmut->mmu_teardown_pages == NULL)
2055
- return -ENOMEM;
2056
-
2057
- mmut->pgd = 0;
20583604 /* We allocate pages into the kbdev memory pool, then
20593605 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
20603606 * avoid allocations from the kernel happening with the lock held.
20613607 */
2062
- while (!mmut->pgd) {
3608
+ while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
20633609 int err;
20643610
20653611 err = kbase_mem_pool_grow(
2066
-#ifdef CONFIG_MALI_2MB_ALLOC
2067
- &kbdev->mem_pools.large[mmut->group_id],
2068
-#else
20693612 &kbdev->mem_pools.small[mmut->group_id],
2070
-#endif
2071
- MIDGARD_MMU_BOTTOMLEVEL);
3613
+ MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
20723614 if (err) {
20733615 kbase_mmu_term(kbdev, mmut);
20743616 return -ENOMEM;
....@@ -2084,25 +3626,43 @@
20843626
20853627 void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
20863628 {
2087
- if (mmut->pgd) {
3629
+ WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
3630
+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
3631
+ mmut->kctx->tgid, mmut->kctx->id);
3632
+
3633
+ if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) {
20883634 mutex_lock(&mmut->mmu_lock);
2089
- mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL,
2090
- mmut->mmu_teardown_pages);
3635
+ mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
20913636 mutex_unlock(&mmut->mmu_lock);
20923637
20933638 if (mmut->kctx)
20943639 KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
20953640 }
20963641
2097
- kfree(mmut->mmu_teardown_pages);
20983642 mutex_destroy(&mmut->mmu_lock);
20993643 }
21003644
2101
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i)
3645
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
21023646 {
21033647 destroy_workqueue(kbdev->as[i].pf_wq);
21043648 }
21053649
3650
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
3651
+ phys_addr_t phys, size_t size,
3652
+ enum kbase_mmu_op_type flush_op)
3653
+{
3654
+#if MALI_USE_CSF
3655
+ unsigned long irq_flags;
3656
+
3657
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
3658
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
3659
+ kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
3660
+ mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
3661
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
3662
+#endif
3663
+}
3664
+
3665
+#ifdef CONFIG_MALI_VECTOR_DUMP
21063666 static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
21073667 int level, char ** const buffer, size_t *size_left)
21083668 {
....@@ -2123,7 +3683,7 @@
21233683
21243684 pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
21253685 if (!pgd_page) {
2126
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
3686
+ dev_warn(kbdev->dev, "%s: kmap failure", __func__);
21273687 return 0;
21283688 }
21293689
....@@ -2148,7 +3708,9 @@
21483708 for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
21493709 if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
21503710 target_pgd = mmu_mode->pte_to_phy_addr(
2151
- pgd_page[i]);
3711
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
3712
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP,
3713
+ level, pgd_page[i]));
21523714
21533715 dump_size = kbasep_mmu_dump_level(kctx,
21543716 target_pgd, level + 1,
....@@ -2242,6 +3804,7 @@
22423804 return NULL;
22433805 }
22443806 KBASE_EXPORT_TEST_API(kbase_mmu_dump);
3807
+#endif /* CONFIG_MALI_VECTOR_DUMP */
22453808
22463809 void kbase_mmu_bus_fault_worker(struct work_struct *data)
22473810 {
....@@ -2274,8 +3837,7 @@
22743837 #ifdef CONFIG_MALI_ARBITER_SUPPORT
22753838 /* check if we still have GPU */
22763839 if (unlikely(kbase_is_gpu_removed(kbdev))) {
2277
- dev_dbg(kbdev->dev,
2278
- "%s: GPU has been removed\n", __func__);
3840
+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
22793841 release_ctx(kbdev, kctx);
22803842 atomic_dec(&kbdev->faults_pending);
22813843 return;
....@@ -2293,6 +3855,13 @@
22933855
22943856 }
22953857
3858
+#if MALI_USE_CSF
3859
+ /* Before the GPU power off, wait is done for the completion of
3860
+ * in-flight MMU fault work items. So GPU is expected to remain
3861
+ * powered up whilst the bus fault handling is being done.
3862
+ */
3863
+ kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
3864
+#else
22963865 /* NOTE: If GPU already powered off for suspend,
22973866 * we don't need to switch to unmapped
22983867 */
....@@ -2301,6 +3870,7 @@
23013870 kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
23023871 kbase_pm_context_idle(kbdev);
23033872 }
3873
+#endif
23043874
23053875 release_ctx(kbdev, kctx);
23063876