hc
2024-05-10 61598093bbdd283a7edc367d900f223070ead8d2
kernel/drivers/gpu/drm/i915/i915_gem_gtt.c
....@@ -1,26 +1,7 @@
1
+// SPDX-License-Identifier: MIT
12 /*
23 * Copyright © 2010 Daniel Vetter
3
- * Copyright © 2011-2014 Intel Corporation
4
- *
5
- * Permission is hereby granted, free of charge, to any person obtaining a
6
- * copy of this software and associated documentation files (the "Software"),
7
- * to deal in the Software without restriction, including without limitation
8
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
- * and/or sell copies of the Software, and to permit persons to whom the
10
- * Software is furnished to do so, subject to the following conditions:
11
- *
12
- * The above copyright notice and this permission notice (including the next
13
- * paragraph) shall be included in all copies or substantial portions of the
14
- * Software.
15
- *
16
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
- * IN THE SOFTWARE.
23
- *
4
+ * Copyright © 2020 Intel Corporation
245 */
256
267 #include <linux/slab.h> /* fault-inject.h is not standalone! */
....@@ -32,2417 +13,16 @@
3213 #include <linux/stop_machine.h>
3314
3415 #include <asm/set_memory.h>
16
+#include <asm/smp.h>
3517
36
-#include <drm/drmP.h>
37
-#include <drm/i915_drm.h>
18
+#include "display/intel_frontbuffer.h"
19
+#include "gt/intel_gt.h"
20
+#include "gt/intel_gt_requests.h"
3821
3922 #include "i915_drv.h"
40
-#include "i915_vgpu.h"
23
+#include "i915_scatterlist.h"
4124 #include "i915_trace.h"
42
-#include "intel_drv.h"
43
-#include "intel_frontbuffer.h"
44
-
45
-#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
46
-
47
-/**
48
- * DOC: Global GTT views
49
- *
50
- * Background and previous state
51
- *
52
- * Historically objects could exists (be bound) in global GTT space only as
53
- * singular instances with a view representing all of the object's backing pages
54
- * in a linear fashion. This view will be called a normal view.
55
- *
56
- * To support multiple views of the same object, where the number of mapped
57
- * pages is not equal to the backing store, or where the layout of the pages
58
- * is not linear, concept of a GGTT view was added.
59
- *
60
- * One example of an alternative view is a stereo display driven by a single
61
- * image. In this case we would have a framebuffer looking like this
62
- * (2x2 pages):
63
- *
64
- * 12
65
- * 34
66
- *
67
- * Above would represent a normal GGTT view as normally mapped for GPU or CPU
68
- * rendering. In contrast, fed to the display engine would be an alternative
69
- * view which could look something like this:
70
- *
71
- * 1212
72
- * 3434
73
- *
74
- * In this example both the size and layout of pages in the alternative view is
75
- * different from the normal view.
76
- *
77
- * Implementation and usage
78
- *
79
- * GGTT views are implemented using VMAs and are distinguished via enum
80
- * i915_ggtt_view_type and struct i915_ggtt_view.
81
- *
82
- * A new flavour of core GEM functions which work with GGTT bound objects were
83
- * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
84
- * renaming in large amounts of code. They take the struct i915_ggtt_view
85
- * parameter encapsulating all metadata required to implement a view.
86
- *
87
- * As a helper for callers which are only interested in the normal view,
88
- * globally const i915_ggtt_view_normal singleton instance exists. All old core
89
- * GEM API functions, the ones not taking the view parameter, are operating on,
90
- * or with the normal GGTT view.
91
- *
92
- * Code wanting to add or use a new GGTT view needs to:
93
- *
94
- * 1. Add a new enum with a suitable name.
95
- * 2. Extend the metadata in the i915_ggtt_view structure if required.
96
- * 3. Add support to i915_get_vma_pages().
97
- *
98
- * New views are required to build a scatter-gather table from within the
99
- * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
100
- * exists for the lifetime of an VMA.
101
- *
102
- * Core API is designed to have copy semantics which means that passed in
103
- * struct i915_ggtt_view does not need to be persistent (left around after
104
- * calling the core API functions).
105
- *
106
- */
107
-
108
-static int
109
-i915_get_ggtt_vma_pages(struct i915_vma *vma);
110
-
111
-static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
112
-{
113
- /*
114
- * Note that as an uncached mmio write, this will flush the
115
- * WCB of the writes into the GGTT before it triggers the invalidate.
116
- */
117
- I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
118
-}
119
-
120
-static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
121
-{
122
- gen6_ggtt_invalidate(dev_priv);
123
- I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
124
-}
125
-
126
-static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
127
-{
128
- intel_gtt_chipset_flush();
129
-}
130
-
131
-static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
132
-{
133
- i915->ggtt.invalidate(i915);
134
-}
135
-
136
-int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
137
- int enable_ppgtt)
138
-{
139
- bool has_full_ppgtt;
140
- bool has_full_48bit_ppgtt;
141
-
142
- if (!dev_priv->info.has_aliasing_ppgtt)
143
- return 0;
144
-
145
- has_full_ppgtt = dev_priv->info.has_full_ppgtt;
146
- has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
147
-
148
- if (intel_vgpu_active(dev_priv)) {
149
- /* GVT-g has no support for 32bit ppgtt */
150
- has_full_ppgtt = false;
151
- has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
152
- }
153
-
154
- /*
155
- * We don't allow disabling PPGTT for gen9+ as it's a requirement for
156
- * execlists, the sole mechanism available to submit work.
157
- */
158
- if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
159
- return 0;
160
-
161
- /* Full PPGTT is required by the Gen9 cmdparser */
162
- if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9)
163
- return 1;
164
-
165
- if (enable_ppgtt == 2 && has_full_ppgtt)
166
- return 2;
167
-
168
- if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
169
- return 3;
170
-
171
- /* Disable ppgtt on SNB if VT-d is on. */
172
- if (IS_GEN6(dev_priv) && intel_vtd_active()) {
173
- DRM_INFO("Disabling PPGTT because VT-d is on\n");
174
- return 0;
175
- }
176
-
177
- /* Early VLV doesn't have this */
178
- if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
179
- DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
180
- return 0;
181
- }
182
-
183
- if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
184
- if (has_full_48bit_ppgtt)
185
- return 3;
186
-
187
- if (has_full_ppgtt)
188
- return 2;
189
- }
190
-
191
- return 1;
192
-}
193
-
194
-static int ppgtt_bind_vma(struct i915_vma *vma,
195
- enum i915_cache_level cache_level,
196
- u32 unused)
197
-{
198
- u32 pte_flags;
199
- int err;
200
-
201
- if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
202
- err = vma->vm->allocate_va_range(vma->vm,
203
- vma->node.start, vma->size);
204
- if (err)
205
- return err;
206
- }
207
-
208
- /* Applicable to VLV, and gen8+ */
209
- pte_flags = 0;
210
- if (i915_gem_object_is_readonly(vma->obj))
211
- pte_flags |= PTE_READ_ONLY;
212
-
213
- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
214
-
215
- return 0;
216
-}
217
-
218
-static void ppgtt_unbind_vma(struct i915_vma *vma)
219
-{
220
- vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
221
-}
222
-
223
-static int ppgtt_set_pages(struct i915_vma *vma)
224
-{
225
- GEM_BUG_ON(vma->pages);
226
-
227
- vma->pages = vma->obj->mm.pages;
228
-
229
- vma->page_sizes = vma->obj->mm.page_sizes;
230
-
231
- return 0;
232
-}
233
-
234
-static void clear_pages(struct i915_vma *vma)
235
-{
236
- GEM_BUG_ON(!vma->pages);
237
-
238
- if (vma->pages != vma->obj->mm.pages) {
239
- sg_free_table(vma->pages);
240
- kfree(vma->pages);
241
- }
242
- vma->pages = NULL;
243
-
244
- memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
245
-}
246
-
247
-static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
248
- enum i915_cache_level level,
249
- u32 flags)
250
-{
251
- gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
252
-
253
- if (unlikely(flags & PTE_READ_ONLY))
254
- pte &= ~_PAGE_RW;
255
-
256
- switch (level) {
257
- case I915_CACHE_NONE:
258
- pte |= PPAT_UNCACHED;
259
- break;
260
- case I915_CACHE_WT:
261
- pte |= PPAT_DISPLAY_ELLC;
262
- break;
263
- default:
264
- pte |= PPAT_CACHED;
265
- break;
266
- }
267
-
268
- return pte;
269
-}
270
-
271
-static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
272
- const enum i915_cache_level level)
273
-{
274
- gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
275
- pde |= addr;
276
- if (level != I915_CACHE_NONE)
277
- pde |= PPAT_CACHED_PDE;
278
- else
279
- pde |= PPAT_UNCACHED;
280
- return pde;
281
-}
282
-
283
-#define gen8_pdpe_encode gen8_pde_encode
284
-#define gen8_pml4e_encode gen8_pde_encode
285
-
286
-static gen6_pte_t snb_pte_encode(dma_addr_t addr,
287
- enum i915_cache_level level,
288
- u32 unused)
289
-{
290
- gen6_pte_t pte = GEN6_PTE_VALID;
291
- pte |= GEN6_PTE_ADDR_ENCODE(addr);
292
-
293
- switch (level) {
294
- case I915_CACHE_L3_LLC:
295
- case I915_CACHE_LLC:
296
- pte |= GEN6_PTE_CACHE_LLC;
297
- break;
298
- case I915_CACHE_NONE:
299
- pte |= GEN6_PTE_UNCACHED;
300
- break;
301
- default:
302
- MISSING_CASE(level);
303
- }
304
-
305
- return pte;
306
-}
307
-
308
-static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
309
- enum i915_cache_level level,
310
- u32 unused)
311
-{
312
- gen6_pte_t pte = GEN6_PTE_VALID;
313
- pte |= GEN6_PTE_ADDR_ENCODE(addr);
314
-
315
- switch (level) {
316
- case I915_CACHE_L3_LLC:
317
- pte |= GEN7_PTE_CACHE_L3_LLC;
318
- break;
319
- case I915_CACHE_LLC:
320
- pte |= GEN6_PTE_CACHE_LLC;
321
- break;
322
- case I915_CACHE_NONE:
323
- pte |= GEN6_PTE_UNCACHED;
324
- break;
325
- default:
326
- MISSING_CASE(level);
327
- }
328
-
329
- return pte;
330
-}
331
-
332
-static gen6_pte_t byt_pte_encode(dma_addr_t addr,
333
- enum i915_cache_level level,
334
- u32 flags)
335
-{
336
- gen6_pte_t pte = GEN6_PTE_VALID;
337
- pte |= GEN6_PTE_ADDR_ENCODE(addr);
338
-
339
- if (!(flags & PTE_READ_ONLY))
340
- pte |= BYT_PTE_WRITEABLE;
341
-
342
- if (level != I915_CACHE_NONE)
343
- pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
344
-
345
- return pte;
346
-}
347
-
348
-static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
349
- enum i915_cache_level level,
350
- u32 unused)
351
-{
352
- gen6_pte_t pte = GEN6_PTE_VALID;
353
- pte |= HSW_PTE_ADDR_ENCODE(addr);
354
-
355
- if (level != I915_CACHE_NONE)
356
- pte |= HSW_WB_LLC_AGE3;
357
-
358
- return pte;
359
-}
360
-
361
-static gen6_pte_t iris_pte_encode(dma_addr_t addr,
362
- enum i915_cache_level level,
363
- u32 unused)
364
-{
365
- gen6_pte_t pte = GEN6_PTE_VALID;
366
- pte |= HSW_PTE_ADDR_ENCODE(addr);
367
-
368
- switch (level) {
369
- case I915_CACHE_NONE:
370
- break;
371
- case I915_CACHE_WT:
372
- pte |= HSW_WT_ELLC_LLC_AGE3;
373
- break;
374
- default:
375
- pte |= HSW_WB_ELLC_LLC_AGE3;
376
- break;
377
- }
378
-
379
- return pte;
380
-}
381
-
382
-static void stash_init(struct pagestash *stash)
383
-{
384
- pagevec_init(&stash->pvec);
385
- spin_lock_init(&stash->lock);
386
-}
387
-
388
-static struct page *stash_pop_page(struct pagestash *stash)
389
-{
390
- struct page *page = NULL;
391
-
392
- spin_lock(&stash->lock);
393
- if (likely(stash->pvec.nr))
394
- page = stash->pvec.pages[--stash->pvec.nr];
395
- spin_unlock(&stash->lock);
396
-
397
- return page;
398
-}
399
-
400
-static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
401
-{
402
- int nr;
403
-
404
- spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
405
-
406
- nr = min_t(int, pvec->nr, pagevec_space(&stash->pvec));
407
- memcpy(stash->pvec.pages + stash->pvec.nr,
408
- pvec->pages + pvec->nr - nr,
409
- sizeof(pvec->pages[0]) * nr);
410
- stash->pvec.nr += nr;
411
-
412
- spin_unlock(&stash->lock);
413
-
414
- pvec->nr -= nr;
415
-}
416
-
417
-static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
418
-{
419
- struct pagevec stack;
420
- struct page *page;
421
-
422
- if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
423
- i915_gem_shrink_all(vm->i915);
424
-
425
- page = stash_pop_page(&vm->free_pages);
426
- if (page)
427
- return page;
428
-
429
- if (!vm->pt_kmap_wc)
430
- return alloc_page(gfp);
431
-
432
- /* Look in our global stash of WC pages... */
433
- page = stash_pop_page(&vm->i915->mm.wc_stash);
434
- if (page)
435
- return page;
436
-
437
- /*
438
- * Otherwise batch allocate pages to amortize cost of set_pages_wc.
439
- *
440
- * We have to be careful as page allocation may trigger the shrinker
441
- * (via direct reclaim) which will fill up the WC stash underneath us.
442
- * So we add our WB pages into a temporary pvec on the stack and merge
443
- * them into the WC stash after all the allocations are complete.
444
- */
445
- pagevec_init(&stack);
446
- do {
447
- struct page *page;
448
-
449
- page = alloc_page(gfp);
450
- if (unlikely(!page))
451
- break;
452
-
453
- stack.pages[stack.nr++] = page;
454
- } while (pagevec_space(&stack));
455
-
456
- if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
457
- page = stack.pages[--stack.nr];
458
-
459
- /* Merge spare WC pages to the global stash */
460
- stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
461
-
462
- /* Push any surplus WC pages onto the local VM stash */
463
- if (stack.nr)
464
- stash_push_pagevec(&vm->free_pages, &stack);
465
- }
466
-
467
- /* Return unwanted leftovers */
468
- if (unlikely(stack.nr)) {
469
- WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
470
- __pagevec_release(&stack);
471
- }
472
-
473
- return page;
474
-}
475
-
476
-static void vm_free_pages_release(struct i915_address_space *vm,
477
- bool immediate)
478
-{
479
- struct pagevec *pvec = &vm->free_pages.pvec;
480
- struct pagevec stack;
481
-
482
- lockdep_assert_held(&vm->free_pages.lock);
483
- GEM_BUG_ON(!pagevec_count(pvec));
484
-
485
- if (vm->pt_kmap_wc) {
486
- /*
487
- * When we use WC, first fill up the global stash and then
488
- * only if full immediately free the overflow.
489
- */
490
- stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
491
-
492
- /*
493
- * As we have made some room in the VM's free_pages,
494
- * we can wait for it to fill again. Unless we are
495
- * inside i915_address_space_fini() and must
496
- * immediately release the pages!
497
- */
498
- if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
499
- return;
500
-
501
- /*
502
- * We have to drop the lock to allow ourselves to sleep,
503
- * so take a copy of the pvec and clear the stash for
504
- * others to use it as we sleep.
505
- */
506
- stack = *pvec;
507
- pagevec_reinit(pvec);
508
- spin_unlock(&vm->free_pages.lock);
509
-
510
- pvec = &stack;
511
- set_pages_array_wb(pvec->pages, pvec->nr);
512
-
513
- spin_lock(&vm->free_pages.lock);
514
- }
515
-
516
- __pagevec_release(pvec);
517
-}
518
-
519
-static void vm_free_page(struct i915_address_space *vm, struct page *page)
520
-{
521
- /*
522
- * On !llc, we need to change the pages back to WB. We only do so
523
- * in bulk, so we rarely need to change the page attributes here,
524
- * but doing so requires a stop_machine() from deep inside arch/x86/mm.
525
- * To make detection of the possible sleep more likely, use an
526
- * unconditional might_sleep() for everybody.
527
- */
528
- might_sleep();
529
- spin_lock(&vm->free_pages.lock);
530
- if (!pagevec_add(&vm->free_pages.pvec, page))
531
- vm_free_pages_release(vm, false);
532
- spin_unlock(&vm->free_pages.lock);
533
-}
534
-
535
-static void i915_address_space_init(struct i915_address_space *vm,
536
- struct drm_i915_private *dev_priv)
537
-{
538
- /*
539
- * The vm->mutex must be reclaim safe (for use in the shrinker).
540
- * Do a dummy acquire now under fs_reclaim so that any allocation
541
- * attempt holding the lock is immediately reported by lockdep.
542
- */
543
- mutex_init(&vm->mutex);
544
- i915_gem_shrinker_taints_mutex(&vm->mutex);
545
-
546
- GEM_BUG_ON(!vm->total);
547
- drm_mm_init(&vm->mm, 0, vm->total);
548
- vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
549
-
550
- stash_init(&vm->free_pages);
551
-
552
- INIT_LIST_HEAD(&vm->active_list);
553
- INIT_LIST_HEAD(&vm->inactive_list);
554
- INIT_LIST_HEAD(&vm->unbound_list);
555
-}
556
-
557
-static void i915_address_space_fini(struct i915_address_space *vm)
558
-{
559
- spin_lock(&vm->free_pages.lock);
560
- if (pagevec_count(&vm->free_pages.pvec))
561
- vm_free_pages_release(vm, true);
562
- GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
563
- spin_unlock(&vm->free_pages.lock);
564
-
565
- drm_mm_takedown(&vm->mm);
566
-
567
- mutex_destroy(&vm->mutex);
568
-}
569
-
570
-static int __setup_page_dma(struct i915_address_space *vm,
571
- struct i915_page_dma *p,
572
- gfp_t gfp)
573
-{
574
- p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
575
- if (unlikely(!p->page))
576
- return -ENOMEM;
577
-
578
- p->daddr = dma_map_page_attrs(vm->dma,
579
- p->page, 0, PAGE_SIZE,
580
- PCI_DMA_BIDIRECTIONAL,
581
- DMA_ATTR_SKIP_CPU_SYNC |
582
- DMA_ATTR_NO_WARN);
583
- if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
584
- vm_free_page(vm, p->page);
585
- return -ENOMEM;
586
- }
587
-
588
- return 0;
589
-}
590
-
591
-static int setup_page_dma(struct i915_address_space *vm,
592
- struct i915_page_dma *p)
593
-{
594
- return __setup_page_dma(vm, p, __GFP_HIGHMEM);
595
-}
596
-
597
-static void cleanup_page_dma(struct i915_address_space *vm,
598
- struct i915_page_dma *p)
599
-{
600
- dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
601
- vm_free_page(vm, p->page);
602
-}
603
-
604
-#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
605
-
606
-#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
607
-#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
608
-#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
609
-#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
610
-
611
-static void fill_page_dma(struct i915_address_space *vm,
612
- struct i915_page_dma *p,
613
- const u64 val)
614
-{
615
- u64 * const vaddr = kmap_atomic(p->page);
616
-
617
- memset64(vaddr, val, PAGE_SIZE / sizeof(val));
618
-
619
- kunmap_atomic(vaddr);
620
-}
621
-
622
-static void fill_page_dma_32(struct i915_address_space *vm,
623
- struct i915_page_dma *p,
624
- const u32 v)
625
-{
626
- fill_page_dma(vm, p, (u64)v << 32 | v);
627
-}
628
-
629
-static int
630
-setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
631
-{
632
- unsigned long size;
633
-
634
- /*
635
- * In order to utilize 64K pages for an object with a size < 2M, we will
636
- * need to support a 64K scratch page, given that every 16th entry for a
637
- * page-table operating in 64K mode must point to a properly aligned 64K
638
- * region, including any PTEs which happen to point to scratch.
639
- *
640
- * This is only relevant for the 48b PPGTT where we support
641
- * huge-gtt-pages, see also i915_vma_insert().
642
- *
643
- * TODO: we should really consider write-protecting the scratch-page and
644
- * sharing between ppgtt
645
- */
646
- size = I915_GTT_PAGE_SIZE_4K;
647
- if (i915_vm_is_48bit(vm) &&
648
- HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
649
- size = I915_GTT_PAGE_SIZE_64K;
650
- gfp |= __GFP_NOWARN;
651
- }
652
- gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
653
-
654
- do {
655
- int order = get_order(size);
656
- struct page *page;
657
- dma_addr_t addr;
658
-
659
- page = alloc_pages(gfp, order);
660
- if (unlikely(!page))
661
- goto skip;
662
-
663
- addr = dma_map_page_attrs(vm->dma,
664
- page, 0, size,
665
- PCI_DMA_BIDIRECTIONAL,
666
- DMA_ATTR_SKIP_CPU_SYNC |
667
- DMA_ATTR_NO_WARN);
668
- if (unlikely(dma_mapping_error(vm->dma, addr)))
669
- goto free_page;
670
-
671
- if (unlikely(!IS_ALIGNED(addr, size)))
672
- goto unmap_page;
673
-
674
- vm->scratch_page.page = page;
675
- vm->scratch_page.daddr = addr;
676
- vm->scratch_page.order = order;
677
- return 0;
678
-
679
-unmap_page:
680
- dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
681
-free_page:
682
- __free_pages(page, order);
683
-skip:
684
- if (size == I915_GTT_PAGE_SIZE_4K)
685
- return -ENOMEM;
686
-
687
- size = I915_GTT_PAGE_SIZE_4K;
688
- gfp &= ~__GFP_NOWARN;
689
- } while (1);
690
-}
691
-
692
-static void cleanup_scratch_page(struct i915_address_space *vm)
693
-{
694
- struct i915_page_dma *p = &vm->scratch_page;
695
-
696
- dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
697
- PCI_DMA_BIDIRECTIONAL);
698
- __free_pages(p->page, p->order);
699
-}
700
-
701
-static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
702
-{
703
- struct i915_page_table *pt;
704
-
705
- pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
706
- if (unlikely(!pt))
707
- return ERR_PTR(-ENOMEM);
708
-
709
- if (unlikely(setup_px(vm, pt))) {
710
- kfree(pt);
711
- return ERR_PTR(-ENOMEM);
712
- }
713
-
714
- pt->used_ptes = 0;
715
- return pt;
716
-}
717
-
718
-static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
719
-{
720
- cleanup_px(vm, pt);
721
- kfree(pt);
722
-}
723
-
724
-static void gen8_initialize_pt(struct i915_address_space *vm,
725
- struct i915_page_table *pt)
726
-{
727
- fill_px(vm, pt,
728
- gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
729
-}
730
-
731
-static void gen6_initialize_pt(struct gen6_hw_ppgtt *ppgtt,
732
- struct i915_page_table *pt)
733
-{
734
- fill32_px(&ppgtt->base.vm, pt, ppgtt->scratch_pte);
735
-}
736
-
737
-static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
738
-{
739
- struct i915_page_directory *pd;
740
-
741
- pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
742
- if (unlikely(!pd))
743
- return ERR_PTR(-ENOMEM);
744
-
745
- if (unlikely(setup_px(vm, pd))) {
746
- kfree(pd);
747
- return ERR_PTR(-ENOMEM);
748
- }
749
-
750
- pd->used_pdes = 0;
751
- return pd;
752
-}
753
-
754
-static void free_pd(struct i915_address_space *vm,
755
- struct i915_page_directory *pd)
756
-{
757
- cleanup_px(vm, pd);
758
- kfree(pd);
759
-}
760
-
761
-static void gen8_initialize_pd(struct i915_address_space *vm,
762
- struct i915_page_directory *pd)
763
-{
764
- fill_px(vm, pd,
765
- gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
766
- memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES);
767
-}
768
-
769
-static int __pdp_init(struct i915_address_space *vm,
770
- struct i915_page_directory_pointer *pdp)
771
-{
772
- const unsigned int pdpes = i915_pdpes_per_pdp(vm);
773
-
774
- pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
775
- I915_GFP_ALLOW_FAIL);
776
- if (unlikely(!pdp->page_directory))
777
- return -ENOMEM;
778
-
779
- memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
780
-
781
- return 0;
782
-}
783
-
784
-static void __pdp_fini(struct i915_page_directory_pointer *pdp)
785
-{
786
- kfree(pdp->page_directory);
787
- pdp->page_directory = NULL;
788
-}
789
-
790
-static inline bool use_4lvl(const struct i915_address_space *vm)
791
-{
792
- return i915_vm_is_48bit(vm);
793
-}
794
-
795
-static struct i915_page_directory_pointer *
796
-alloc_pdp(struct i915_address_space *vm)
797
-{
798
- struct i915_page_directory_pointer *pdp;
799
- int ret = -ENOMEM;
800
-
801
- GEM_BUG_ON(!use_4lvl(vm));
802
-
803
- pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
804
- if (!pdp)
805
- return ERR_PTR(-ENOMEM);
806
-
807
- ret = __pdp_init(vm, pdp);
808
- if (ret)
809
- goto fail_bitmap;
810
-
811
- ret = setup_px(vm, pdp);
812
- if (ret)
813
- goto fail_page_m;
814
-
815
- return pdp;
816
-
817
-fail_page_m:
818
- __pdp_fini(pdp);
819
-fail_bitmap:
820
- kfree(pdp);
821
-
822
- return ERR_PTR(ret);
823
-}
824
-
825
-static void free_pdp(struct i915_address_space *vm,
826
- struct i915_page_directory_pointer *pdp)
827
-{
828
- __pdp_fini(pdp);
829
-
830
- if (!use_4lvl(vm))
831
- return;
832
-
833
- cleanup_px(vm, pdp);
834
- kfree(pdp);
835
-}
836
-
837
-static void gen8_initialize_pdp(struct i915_address_space *vm,
838
- struct i915_page_directory_pointer *pdp)
839
-{
840
- gen8_ppgtt_pdpe_t scratch_pdpe;
841
-
842
- scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
843
-
844
- fill_px(vm, pdp, scratch_pdpe);
845
-}
846
-
847
-static void gen8_initialize_pml4(struct i915_address_space *vm,
848
- struct i915_pml4 *pml4)
849
-{
850
- fill_px(vm, pml4,
851
- gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
852
- memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
853
-}
854
-
855
-/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
856
- * the page table structures, we mark them dirty so that
857
- * context switching/execlist queuing code takes extra steps
858
- * to ensure that tlbs are flushed.
859
- */
860
-static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
861
-{
862
- ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask;
863
-}
864
-
865
-/* Removes entries from a single page table, releasing it if it's empty.
866
- * Caller can use the return value to update higher-level entries.
867
- */
868
-static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
869
- struct i915_page_table *pt,
870
- u64 start, u64 length)
871
-{
872
- unsigned int num_entries = gen8_pte_count(start, length);
873
- unsigned int pte = gen8_pte_index(start);
874
- unsigned int pte_end = pte + num_entries;
875
- const gen8_pte_t scratch_pte =
876
- gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
877
- gen8_pte_t *vaddr;
878
-
879
- GEM_BUG_ON(num_entries > pt->used_ptes);
880
-
881
- pt->used_ptes -= num_entries;
882
- if (!pt->used_ptes)
883
- return true;
884
-
885
- vaddr = kmap_atomic_px(pt);
886
- while (pte < pte_end)
887
- vaddr[pte++] = scratch_pte;
888
- kunmap_atomic(vaddr);
889
-
890
- return false;
891
-}
892
-
893
-static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
894
- struct i915_page_directory *pd,
895
- struct i915_page_table *pt,
896
- unsigned int pde)
897
-{
898
- gen8_pde_t *vaddr;
899
-
900
- pd->page_table[pde] = pt;
901
-
902
- vaddr = kmap_atomic_px(pd);
903
- vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
904
- kunmap_atomic(vaddr);
905
-}
906
-
907
-static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
908
- struct i915_page_directory *pd,
909
- u64 start, u64 length)
910
-{
911
- struct i915_page_table *pt;
912
- u32 pde;
913
-
914
- gen8_for_each_pde(pt, pd, start, length, pde) {
915
- GEM_BUG_ON(pt == vm->scratch_pt);
916
-
917
- if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
918
- continue;
919
-
920
- gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
921
- GEM_BUG_ON(!pd->used_pdes);
922
- pd->used_pdes--;
923
-
924
- free_pt(vm, pt);
925
- }
926
-
927
- return !pd->used_pdes;
928
-}
929
-
930
-static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
931
- struct i915_page_directory_pointer *pdp,
932
- struct i915_page_directory *pd,
933
- unsigned int pdpe)
934
-{
935
- gen8_ppgtt_pdpe_t *vaddr;
936
-
937
- pdp->page_directory[pdpe] = pd;
938
- if (!use_4lvl(vm))
939
- return;
940
-
941
- vaddr = kmap_atomic_px(pdp);
942
- vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
943
- kunmap_atomic(vaddr);
944
-}
945
-
946
-/* Removes entries from a single page dir pointer, releasing it if it's empty.
947
- * Caller can use the return value to update higher-level entries
948
- */
949
-static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
950
- struct i915_page_directory_pointer *pdp,
951
- u64 start, u64 length)
952
-{
953
- struct i915_page_directory *pd;
954
- unsigned int pdpe;
955
-
956
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
957
- GEM_BUG_ON(pd == vm->scratch_pd);
958
-
959
- if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
960
- continue;
961
-
962
- gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
963
- GEM_BUG_ON(!pdp->used_pdpes);
964
- pdp->used_pdpes--;
965
-
966
- free_pd(vm, pd);
967
- }
968
-
969
- return !pdp->used_pdpes;
970
-}
971
-
972
-static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
973
- u64 start, u64 length)
974
-{
975
- gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
976
-}
977
-
978
-static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
979
- struct i915_page_directory_pointer *pdp,
980
- unsigned int pml4e)
981
-{
982
- gen8_ppgtt_pml4e_t *vaddr;
983
-
984
- pml4->pdps[pml4e] = pdp;
985
-
986
- vaddr = kmap_atomic_px(pml4);
987
- vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
988
- kunmap_atomic(vaddr);
989
-}
990
-
991
-/* Removes entries from a single pml4.
992
- * This is the top-level structure in 4-level page tables used on gen8+.
993
- * Empty entries are always scratch pml4e.
994
- */
995
-static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
996
- u64 start, u64 length)
997
-{
998
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
999
- struct i915_pml4 *pml4 = &ppgtt->pml4;
1000
- struct i915_page_directory_pointer *pdp;
1001
- unsigned int pml4e;
1002
-
1003
- GEM_BUG_ON(!use_4lvl(vm));
1004
-
1005
- gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1006
- GEM_BUG_ON(pdp == vm->scratch_pdp);
1007
-
1008
- if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
1009
- continue;
1010
-
1011
- gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1012
-
1013
- free_pdp(vm, pdp);
1014
- }
1015
-}
1016
-
1017
-static inline struct sgt_dma {
1018
- struct scatterlist *sg;
1019
- dma_addr_t dma, max;
1020
-} sgt_dma(struct i915_vma *vma) {
1021
- struct scatterlist *sg = vma->pages->sgl;
1022
- dma_addr_t addr = sg_dma_address(sg);
1023
- return (struct sgt_dma) { sg, addr, addr + sg->length };
1024
-}
1025
-
1026
-struct gen8_insert_pte {
1027
- u16 pml4e;
1028
- u16 pdpe;
1029
- u16 pde;
1030
- u16 pte;
1031
-};
1032
-
1033
-static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
1034
-{
1035
- return (struct gen8_insert_pte) {
1036
- gen8_pml4e_index(start),
1037
- gen8_pdpe_index(start),
1038
- gen8_pde_index(start),
1039
- gen8_pte_index(start),
1040
- };
1041
-}
1042
-
1043
-static __always_inline bool
1044
-gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
1045
- struct i915_page_directory_pointer *pdp,
1046
- struct sgt_dma *iter,
1047
- struct gen8_insert_pte *idx,
1048
- enum i915_cache_level cache_level,
1049
- u32 flags)
1050
-{
1051
- struct i915_page_directory *pd;
1052
- const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
1053
- gen8_pte_t *vaddr;
1054
- bool ret;
1055
-
1056
- GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
1057
- pd = pdp->page_directory[idx->pdpe];
1058
- vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1059
- do {
1060
- vaddr[idx->pte] = pte_encode | iter->dma;
1061
-
1062
- iter->dma += I915_GTT_PAGE_SIZE;
1063
- if (iter->dma >= iter->max) {
1064
- iter->sg = __sg_next(iter->sg);
1065
- if (!iter->sg) {
1066
- ret = false;
1067
- break;
1068
- }
1069
-
1070
- iter->dma = sg_dma_address(iter->sg);
1071
- iter->max = iter->dma + iter->sg->length;
1072
- }
1073
-
1074
- if (++idx->pte == GEN8_PTES) {
1075
- idx->pte = 0;
1076
-
1077
- if (++idx->pde == I915_PDES) {
1078
- idx->pde = 0;
1079
-
1080
- /* Limited by sg length for 3lvl */
1081
- if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1082
- idx->pdpe = 0;
1083
- ret = true;
1084
- break;
1085
- }
1086
-
1087
- GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
1088
- pd = pdp->page_directory[idx->pdpe];
1089
- }
1090
-
1091
- kunmap_atomic(vaddr);
1092
- vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1093
- }
1094
- } while (1);
1095
- kunmap_atomic(vaddr);
1096
-
1097
- return ret;
1098
-}
1099
-
1100
-static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1101
- struct i915_vma *vma,
1102
- enum i915_cache_level cache_level,
1103
- u32 flags)
1104
-{
1105
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1106
- struct sgt_dma iter = sgt_dma(vma);
1107
- struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1108
-
1109
- gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1110
- cache_level, flags);
1111
-
1112
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1113
-}
1114
-
1115
-static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1116
- struct i915_page_directory_pointer **pdps,
1117
- struct sgt_dma *iter,
1118
- enum i915_cache_level cache_level,
1119
- u32 flags)
1120
-{
1121
- const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
1122
- u64 start = vma->node.start;
1123
- dma_addr_t rem = iter->sg->length;
1124
-
1125
- do {
1126
- struct gen8_insert_pte idx = gen8_insert_pte(start);
1127
- struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1128
- struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1129
- unsigned int page_size;
1130
- bool maybe_64K = false;
1131
- gen8_pte_t encode = pte_encode;
1132
- gen8_pte_t *vaddr;
1133
- u16 index, max;
1134
-
1135
- if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1136
- IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1137
- rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1138
- index = idx.pde;
1139
- max = I915_PDES;
1140
- page_size = I915_GTT_PAGE_SIZE_2M;
1141
-
1142
- encode |= GEN8_PDE_PS_2M;
1143
-
1144
- vaddr = kmap_atomic_px(pd);
1145
- } else {
1146
- struct i915_page_table *pt = pd->page_table[idx.pde];
1147
-
1148
- index = idx.pte;
1149
- max = GEN8_PTES;
1150
- page_size = I915_GTT_PAGE_SIZE;
1151
-
1152
- if (!index &&
1153
- vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1154
- IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1155
- (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1156
- rem >= (max - index) << PAGE_SHIFT))
1157
- maybe_64K = true;
1158
-
1159
- vaddr = kmap_atomic_px(pt);
1160
- }
1161
-
1162
- do {
1163
- GEM_BUG_ON(iter->sg->length < page_size);
1164
- vaddr[index++] = encode | iter->dma;
1165
-
1166
- start += page_size;
1167
- iter->dma += page_size;
1168
- rem -= page_size;
1169
- if (iter->dma >= iter->max) {
1170
- iter->sg = __sg_next(iter->sg);
1171
- if (!iter->sg)
1172
- break;
1173
-
1174
- rem = iter->sg->length;
1175
- iter->dma = sg_dma_address(iter->sg);
1176
- iter->max = iter->dma + rem;
1177
-
1178
- if (maybe_64K && index < max &&
1179
- !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1180
- (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1181
- rem >= (max - index) << PAGE_SHIFT)))
1182
- maybe_64K = false;
1183
-
1184
- if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1185
- break;
1186
- }
1187
- } while (rem >= page_size && index < max);
1188
-
1189
- kunmap_atomic(vaddr);
1190
-
1191
- /*
1192
- * Is it safe to mark the 2M block as 64K? -- Either we have
1193
- * filled whole page-table with 64K entries, or filled part of
1194
- * it and have reached the end of the sg table and we have
1195
- * enough padding.
1196
- */
1197
- if (maybe_64K &&
1198
- (index == max ||
1199
- (i915_vm_has_scratch_64K(vma->vm) &&
1200
- !iter->sg && IS_ALIGNED(vma->node.start +
1201
- vma->node.size,
1202
- I915_GTT_PAGE_SIZE_2M)))) {
1203
- vaddr = kmap_atomic_px(pd);
1204
- vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1205
- kunmap_atomic(vaddr);
1206
- page_size = I915_GTT_PAGE_SIZE_64K;
1207
-
1208
- /*
1209
- * We write all 4K page entries, even when using 64K
1210
- * pages. In order to verify that the HW isn't cheating
1211
- * by using the 4K PTE instead of the 64K PTE, we want
1212
- * to remove all the surplus entries. If the HW skipped
1213
- * the 64K PTE, it will read/write into the scratch page
1214
- * instead - which we detect as missing results during
1215
- * selftests.
1216
- */
1217
- if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
1218
- u16 i;
1219
-
1220
- encode = pte_encode | vma->vm->scratch_page.daddr;
1221
- vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
1222
-
1223
- for (i = 1; i < index; i += 16)
1224
- memset64(vaddr + i, encode, 15);
1225
-
1226
- kunmap_atomic(vaddr);
1227
- }
1228
- }
1229
-
1230
- vma->page_sizes.gtt |= page_size;
1231
- } while (iter->sg);
1232
-}
1233
-
1234
-static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1235
- struct i915_vma *vma,
1236
- enum i915_cache_level cache_level,
1237
- u32 flags)
1238
-{
1239
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1240
- struct sgt_dma iter = sgt_dma(vma);
1241
- struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
1242
-
1243
- if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1244
- gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level,
1245
- flags);
1246
- } else {
1247
- struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1248
-
1249
- while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1250
- &iter, &idx, cache_level,
1251
- flags))
1252
- GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1253
-
1254
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1255
- }
1256
-}
1257
-
1258
-static void gen8_free_page_tables(struct i915_address_space *vm,
1259
- struct i915_page_directory *pd)
1260
-{
1261
- int i;
1262
-
1263
- if (!px_page(pd))
1264
- return;
1265
-
1266
- for (i = 0; i < I915_PDES; i++) {
1267
- if (pd->page_table[i] != vm->scratch_pt)
1268
- free_pt(vm, pd->page_table[i]);
1269
- }
1270
-}
1271
-
1272
-static int gen8_init_scratch(struct i915_address_space *vm)
1273
-{
1274
- int ret;
1275
-
1276
- ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1277
- if (ret)
1278
- return ret;
1279
-
1280
- vm->scratch_pt = alloc_pt(vm);
1281
- if (IS_ERR(vm->scratch_pt)) {
1282
- ret = PTR_ERR(vm->scratch_pt);
1283
- goto free_scratch_page;
1284
- }
1285
-
1286
- vm->scratch_pd = alloc_pd(vm);
1287
- if (IS_ERR(vm->scratch_pd)) {
1288
- ret = PTR_ERR(vm->scratch_pd);
1289
- goto free_pt;
1290
- }
1291
-
1292
- if (use_4lvl(vm)) {
1293
- vm->scratch_pdp = alloc_pdp(vm);
1294
- if (IS_ERR(vm->scratch_pdp)) {
1295
- ret = PTR_ERR(vm->scratch_pdp);
1296
- goto free_pd;
1297
- }
1298
- }
1299
-
1300
- gen8_initialize_pt(vm, vm->scratch_pt);
1301
- gen8_initialize_pd(vm, vm->scratch_pd);
1302
- if (use_4lvl(vm))
1303
- gen8_initialize_pdp(vm, vm->scratch_pdp);
1304
-
1305
- return 0;
1306
-
1307
-free_pd:
1308
- free_pd(vm, vm->scratch_pd);
1309
-free_pt:
1310
- free_pt(vm, vm->scratch_pt);
1311
-free_scratch_page:
1312
- cleanup_scratch_page(vm);
1313
-
1314
- return ret;
1315
-}
1316
-
1317
-static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1318
-{
1319
- struct i915_address_space *vm = &ppgtt->vm;
1320
- struct drm_i915_private *dev_priv = vm->i915;
1321
- enum vgt_g2v_type msg;
1322
- int i;
1323
-
1324
- if (use_4lvl(vm)) {
1325
- const u64 daddr = px_dma(&ppgtt->pml4);
1326
-
1327
- I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1328
- I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1329
-
1330
- msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1331
- VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1332
- } else {
1333
- for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1334
- const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1335
-
1336
- I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1337
- I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1338
- }
1339
-
1340
- msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1341
- VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1342
- }
1343
-
1344
- I915_WRITE(vgtif_reg(g2v_notify), msg);
1345
-
1346
- return 0;
1347
-}
1348
-
1349
-static void gen8_free_scratch(struct i915_address_space *vm)
1350
-{
1351
- if (use_4lvl(vm))
1352
- free_pdp(vm, vm->scratch_pdp);
1353
- free_pd(vm, vm->scratch_pd);
1354
- free_pt(vm, vm->scratch_pt);
1355
- cleanup_scratch_page(vm);
1356
-}
1357
-
1358
-static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1359
- struct i915_page_directory_pointer *pdp)
1360
-{
1361
- const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1362
- int i;
1363
-
1364
- for (i = 0; i < pdpes; i++) {
1365
- if (pdp->page_directory[i] == vm->scratch_pd)
1366
- continue;
1367
-
1368
- gen8_free_page_tables(vm, pdp->page_directory[i]);
1369
- free_pd(vm, pdp->page_directory[i]);
1370
- }
1371
-
1372
- free_pdp(vm, pdp);
1373
-}
1374
-
1375
-static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1376
-{
1377
- int i;
1378
-
1379
- for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1380
- if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp)
1381
- continue;
1382
-
1383
- gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]);
1384
- }
1385
-
1386
- cleanup_px(&ppgtt->vm, &ppgtt->pml4);
1387
-}
1388
-
1389
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1390
-{
1391
- struct drm_i915_private *dev_priv = vm->i915;
1392
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1393
-
1394
- if (intel_vgpu_active(dev_priv))
1395
- gen8_ppgtt_notify_vgt(ppgtt, false);
1396
-
1397
- if (use_4lvl(vm))
1398
- gen8_ppgtt_cleanup_4lvl(ppgtt);
1399
- else
1400
- gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp);
1401
-
1402
- gen8_free_scratch(vm);
1403
-}
1404
-
1405
-static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1406
- struct i915_page_directory *pd,
1407
- u64 start, u64 length)
1408
-{
1409
- struct i915_page_table *pt;
1410
- u64 from = start;
1411
- unsigned int pde;
1412
-
1413
- gen8_for_each_pde(pt, pd, start, length, pde) {
1414
- int count = gen8_pte_count(start, length);
1415
-
1416
- if (pt == vm->scratch_pt) {
1417
- pd->used_pdes++;
1418
-
1419
- pt = alloc_pt(vm);
1420
- if (IS_ERR(pt)) {
1421
- pd->used_pdes--;
1422
- goto unwind;
1423
- }
1424
-
1425
- if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1426
- gen8_initialize_pt(vm, pt);
1427
-
1428
- gen8_ppgtt_set_pde(vm, pd, pt, pde);
1429
- GEM_BUG_ON(pd->used_pdes > I915_PDES);
1430
- }
1431
-
1432
- pt->used_ptes += count;
1433
- }
1434
- return 0;
1435
-
1436
-unwind:
1437
- gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1438
- return -ENOMEM;
1439
-}
1440
-
1441
-static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1442
- struct i915_page_directory_pointer *pdp,
1443
- u64 start, u64 length)
1444
-{
1445
- struct i915_page_directory *pd;
1446
- u64 from = start;
1447
- unsigned int pdpe;
1448
- int ret;
1449
-
1450
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1451
- if (pd == vm->scratch_pd) {
1452
- pdp->used_pdpes++;
1453
-
1454
- pd = alloc_pd(vm);
1455
- if (IS_ERR(pd)) {
1456
- pdp->used_pdpes--;
1457
- goto unwind;
1458
- }
1459
-
1460
- gen8_initialize_pd(vm, pd);
1461
- gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1462
- GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
1463
-
1464
- mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
1465
- }
1466
-
1467
- ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1468
- if (unlikely(ret))
1469
- goto unwind_pd;
1470
- }
1471
-
1472
- return 0;
1473
-
1474
-unwind_pd:
1475
- if (!pd->used_pdes) {
1476
- gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1477
- GEM_BUG_ON(!pdp->used_pdpes);
1478
- pdp->used_pdpes--;
1479
- free_pd(vm, pd);
1480
- }
1481
-unwind:
1482
- gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1483
- return -ENOMEM;
1484
-}
1485
-
1486
-static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1487
- u64 start, u64 length)
1488
-{
1489
- return gen8_ppgtt_alloc_pdp(vm,
1490
- &i915_vm_to_ppgtt(vm)->pdp, start, length);
1491
-}
1492
-
1493
-static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1494
- u64 start, u64 length)
1495
-{
1496
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1497
- struct i915_pml4 *pml4 = &ppgtt->pml4;
1498
- struct i915_page_directory_pointer *pdp;
1499
- u64 from = start;
1500
- u32 pml4e;
1501
- int ret;
1502
-
1503
- gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1504
- if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1505
- pdp = alloc_pdp(vm);
1506
- if (IS_ERR(pdp))
1507
- goto unwind;
1508
-
1509
- gen8_initialize_pdp(vm, pdp);
1510
- gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1511
- }
1512
-
1513
- ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1514
- if (unlikely(ret))
1515
- goto unwind_pdp;
1516
- }
1517
-
1518
- return 0;
1519
-
1520
-unwind_pdp:
1521
- if (!pdp->used_pdpes) {
1522
- gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1523
- free_pdp(vm, pdp);
1524
- }
1525
-unwind:
1526
- gen8_ppgtt_clear_4lvl(vm, from, start - from);
1527
- return -ENOMEM;
1528
-}
1529
-
1530
-static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
1531
- struct i915_page_directory_pointer *pdp,
1532
- u64 start, u64 length,
1533
- gen8_pte_t scratch_pte,
1534
- struct seq_file *m)
1535
-{
1536
- struct i915_address_space *vm = &ppgtt->vm;
1537
- struct i915_page_directory *pd;
1538
- u32 pdpe;
1539
-
1540
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1541
- struct i915_page_table *pt;
1542
- u64 pd_len = length;
1543
- u64 pd_start = start;
1544
- u32 pde;
1545
-
1546
- if (pdp->page_directory[pdpe] == ppgtt->vm.scratch_pd)
1547
- continue;
1548
-
1549
- seq_printf(m, "\tPDPE #%d\n", pdpe);
1550
- gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1551
- u32 pte;
1552
- gen8_pte_t *pt_vaddr;
1553
-
1554
- if (pd->page_table[pde] == ppgtt->vm.scratch_pt)
1555
- continue;
1556
-
1557
- pt_vaddr = kmap_atomic_px(pt);
1558
- for (pte = 0; pte < GEN8_PTES; pte += 4) {
1559
- u64 va = (pdpe << GEN8_PDPE_SHIFT |
1560
- pde << GEN8_PDE_SHIFT |
1561
- pte << GEN8_PTE_SHIFT);
1562
- int i;
1563
- bool found = false;
1564
-
1565
- for (i = 0; i < 4; i++)
1566
- if (pt_vaddr[pte + i] != scratch_pte)
1567
- found = true;
1568
- if (!found)
1569
- continue;
1570
-
1571
- seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1572
- for (i = 0; i < 4; i++) {
1573
- if (pt_vaddr[pte + i] != scratch_pte)
1574
- seq_printf(m, " %llx", pt_vaddr[pte + i]);
1575
- else
1576
- seq_puts(m, " SCRATCH ");
1577
- }
1578
- seq_puts(m, "\n");
1579
- }
1580
- kunmap_atomic(pt_vaddr);
1581
- }
1582
- }
1583
-}
1584
-
1585
-static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1586
-{
1587
- struct i915_address_space *vm = &ppgtt->vm;
1588
- const gen8_pte_t scratch_pte =
1589
- gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1590
- u64 start = 0, length = ppgtt->vm.total;
1591
-
1592
- if (use_4lvl(vm)) {
1593
- u64 pml4e;
1594
- struct i915_pml4 *pml4 = &ppgtt->pml4;
1595
- struct i915_page_directory_pointer *pdp;
1596
-
1597
- gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1598
- if (pml4->pdps[pml4e] == ppgtt->vm.scratch_pdp)
1599
- continue;
1600
-
1601
- seq_printf(m, " PML4E #%llu\n", pml4e);
1602
- gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
1603
- }
1604
- } else {
1605
- gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
1606
- }
1607
-}
1608
-
1609
-static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1610
-{
1611
- struct i915_address_space *vm = &ppgtt->vm;
1612
- struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1613
- struct i915_page_directory *pd;
1614
- u64 start = 0, length = ppgtt->vm.total;
1615
- u64 from = start;
1616
- unsigned int pdpe;
1617
-
1618
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1619
- pd = alloc_pd(vm);
1620
- if (IS_ERR(pd))
1621
- goto unwind;
1622
-
1623
- gen8_initialize_pd(vm, pd);
1624
- gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1625
- pdp->used_pdpes++;
1626
- }
1627
-
1628
- pdp->used_pdpes++; /* never remove */
1629
- return 0;
1630
-
1631
-unwind:
1632
- start -= from;
1633
- gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1634
- gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1635
- free_pd(vm, pd);
1636
- }
1637
- pdp->used_pdpes = 0;
1638
- return -ENOMEM;
1639
-}
1640
-
1641
-/*
1642
- * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1643
- * with a net effect resembling a 2-level page table in normal x86 terms. Each
1644
- * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1645
- * space.
1646
- *
1647
- */
1648
-static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
1649
-{
1650
- struct i915_hw_ppgtt *ppgtt;
1651
- int err;
1652
-
1653
- ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1654
- if (!ppgtt)
1655
- return ERR_PTR(-ENOMEM);
1656
-
1657
- kref_init(&ppgtt->ref);
1658
-
1659
- ppgtt->vm.i915 = i915;
1660
- ppgtt->vm.dma = &i915->drm.pdev->dev;
1661
-
1662
- ppgtt->vm.total = USES_FULL_48BIT_PPGTT(i915) ?
1663
- 1ULL << 48 :
1664
- 1ULL << 32;
1665
-
1666
- /*
1667
- * From bdw, there is support for read-only pages in the PPGTT.
1668
- *
1669
- * XXX GVT is not honouring the lack of RW in the PTE bits.
1670
- */
1671
- ppgtt->vm.has_read_only = !intel_vgpu_active(i915);
1672
-
1673
- i915_address_space_init(&ppgtt->vm, i915);
1674
-
1675
- /* There are only few exceptions for gen >=6. chv and bxt.
1676
- * And we are not sure about the latter so play safe for now.
1677
- */
1678
- if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
1679
- ppgtt->vm.pt_kmap_wc = true;
1680
-
1681
- err = gen8_init_scratch(&ppgtt->vm);
1682
- if (err)
1683
- goto err_free;
1684
-
1685
- if (use_4lvl(&ppgtt->vm)) {
1686
- err = setup_px(&ppgtt->vm, &ppgtt->pml4);
1687
- if (err)
1688
- goto err_scratch;
1689
-
1690
- gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4);
1691
-
1692
- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1693
- ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
1694
- ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
1695
- } else {
1696
- err = __pdp_init(&ppgtt->vm, &ppgtt->pdp);
1697
- if (err)
1698
- goto err_scratch;
1699
-
1700
- if (intel_vgpu_active(i915)) {
1701
- err = gen8_preallocate_top_level_pdp(ppgtt);
1702
- if (err) {
1703
- __pdp_fini(&ppgtt->pdp);
1704
- goto err_scratch;
1705
- }
1706
- }
1707
-
1708
- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1709
- ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
1710
- ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
1711
- }
1712
-
1713
- if (intel_vgpu_active(i915))
1714
- gen8_ppgtt_notify_vgt(ppgtt, true);
1715
-
1716
- ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1717
- ppgtt->debug_dump = gen8_dump_ppgtt;
1718
-
1719
- ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
1720
- ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
1721
- ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
1722
- ppgtt->vm.vma_ops.clear_pages = clear_pages;
1723
-
1724
- return ppgtt;
1725
-
1726
-err_scratch:
1727
- gen8_free_scratch(&ppgtt->vm);
1728
-err_free:
1729
- kfree(ppgtt);
1730
- return ERR_PTR(err);
1731
-}
1732
-
1733
-static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
1734
-{
1735
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
1736
- const gen6_pte_t scratch_pte = ppgtt->scratch_pte;
1737
- struct i915_page_table *pt;
1738
- u32 pte, pde;
1739
-
1740
- gen6_for_all_pdes(pt, &base->pd, pde) {
1741
- gen6_pte_t *vaddr;
1742
-
1743
- if (pt == base->vm.scratch_pt)
1744
- continue;
1745
-
1746
- if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
1747
- u32 expected =
1748
- GEN6_PDE_ADDR_ENCODE(px_dma(pt)) |
1749
- GEN6_PDE_VALID;
1750
- u32 pd_entry = readl(ppgtt->pd_addr + pde);
1751
-
1752
- if (pd_entry != expected)
1753
- seq_printf(m,
1754
- "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1755
- pde,
1756
- pd_entry,
1757
- expected);
1758
-
1759
- seq_printf(m, "\tPDE: %x\n", pd_entry);
1760
- }
1761
-
1762
- vaddr = kmap_atomic_px(base->pd.page_table[pde]);
1763
- for (pte = 0; pte < GEN6_PTES; pte += 4) {
1764
- int i;
1765
-
1766
- for (i = 0; i < 4; i++)
1767
- if (vaddr[pte + i] != scratch_pte)
1768
- break;
1769
- if (i == 4)
1770
- continue;
1771
-
1772
- seq_printf(m, "\t\t(%03d, %04d) %08llx: ",
1773
- pde, pte,
1774
- (pde * GEN6_PTES + pte) * I915_GTT_PAGE_SIZE);
1775
- for (i = 0; i < 4; i++) {
1776
- if (vaddr[pte + i] != scratch_pte)
1777
- seq_printf(m, " %08x", vaddr[pte + i]);
1778
- else
1779
- seq_puts(m, " SCRATCH");
1780
- }
1781
- seq_puts(m, "\n");
1782
- }
1783
- kunmap_atomic(vaddr);
1784
- }
1785
-}
1786
-
1787
-/* Write pde (index) from the page directory @pd to the page table @pt */
1788
-static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
1789
- const unsigned int pde,
1790
- const struct i915_page_table *pt)
1791
-{
1792
- /* Caller needs to make sure the write completes if necessary */
1793
- iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1794
- ppgtt->pd_addr + pde);
1795
-}
1796
-
1797
-static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
1798
-{
1799
- struct intel_engine_cs *engine;
1800
- enum intel_engine_id id;
1801
-
1802
- for_each_engine(engine, dev_priv, id) {
1803
- u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
1804
- GEN8_GFX_PPGTT_48B : 0;
1805
- I915_WRITE(RING_MODE_GEN7(engine),
1806
- _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1807
- }
1808
-}
1809
-
1810
-static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1811
-{
1812
- struct intel_engine_cs *engine;
1813
- u32 ecochk, ecobits;
1814
- enum intel_engine_id id;
1815
-
1816
- ecobits = I915_READ(GAC_ECO_BITS);
1817
- I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1818
-
1819
- ecochk = I915_READ(GAM_ECOCHK);
1820
- if (IS_HASWELL(dev_priv)) {
1821
- ecochk |= ECOCHK_PPGTT_WB_HSW;
1822
- } else {
1823
- ecochk |= ECOCHK_PPGTT_LLC_IVB;
1824
- ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1825
- }
1826
- I915_WRITE(GAM_ECOCHK, ecochk);
1827
-
1828
- for_each_engine(engine, dev_priv, id) {
1829
- /* GFX_MODE is per-ring on gen7+ */
1830
- I915_WRITE(RING_MODE_GEN7(engine),
1831
- _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1832
- }
1833
-}
1834
-
1835
-static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1836
-{
1837
- u32 ecochk, gab_ctl, ecobits;
1838
-
1839
- ecobits = I915_READ(GAC_ECO_BITS);
1840
- I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1841
- ECOBITS_PPGTT_CACHE64B);
1842
-
1843
- gab_ctl = I915_READ(GAB_CTL);
1844
- I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1845
-
1846
- ecochk = I915_READ(GAM_ECOCHK);
1847
- I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1848
-
1849
- I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1850
-}
1851
-
1852
-/* PPGTT support for Sandybdrige/Gen6 and later */
1853
-static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1854
- u64 start, u64 length)
1855
-{
1856
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1857
- unsigned int first_entry = start >> PAGE_SHIFT;
1858
- unsigned int pde = first_entry / GEN6_PTES;
1859
- unsigned int pte = first_entry % GEN6_PTES;
1860
- unsigned int num_entries = length >> PAGE_SHIFT;
1861
- const gen6_pte_t scratch_pte = ppgtt->scratch_pte;
1862
-
1863
- while (num_entries) {
1864
- struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
1865
- const unsigned int end = min(pte + num_entries, GEN6_PTES);
1866
- const unsigned int count = end - pte;
1867
- gen6_pte_t *vaddr;
1868
-
1869
- GEM_BUG_ON(pt == vm->scratch_pt);
1870
-
1871
- num_entries -= count;
1872
-
1873
- GEM_BUG_ON(count > pt->used_ptes);
1874
- pt->used_ptes -= count;
1875
- if (!pt->used_ptes)
1876
- ppgtt->scan_for_unused_pt = true;
1877
-
1878
- /*
1879
- * Note that the hw doesn't support removing PDE on the fly
1880
- * (they are cached inside the context with no means to
1881
- * invalidate the cache), so we can only reset the PTE
1882
- * entries back to scratch.
1883
- */
1884
-
1885
- vaddr = kmap_atomic_px(pt);
1886
- do {
1887
- vaddr[pte++] = scratch_pte;
1888
- } while (pte < end);
1889
- kunmap_atomic(vaddr);
1890
-
1891
- pte = 0;
1892
- }
1893
-}
1894
-
1895
-static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1896
- struct i915_vma *vma,
1897
- enum i915_cache_level cache_level,
1898
- u32 flags)
1899
-{
1900
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1901
- unsigned first_entry = vma->node.start >> PAGE_SHIFT;
1902
- unsigned act_pt = first_entry / GEN6_PTES;
1903
- unsigned act_pte = first_entry % GEN6_PTES;
1904
- const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1905
- struct sgt_dma iter = sgt_dma(vma);
1906
- gen6_pte_t *vaddr;
1907
-
1908
- GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt);
1909
-
1910
- vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1911
- do {
1912
- vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1913
-
1914
- iter.dma += I915_GTT_PAGE_SIZE;
1915
- if (iter.dma == iter.max) {
1916
- iter.sg = __sg_next(iter.sg);
1917
- if (!iter.sg)
1918
- break;
1919
-
1920
- iter.dma = sg_dma_address(iter.sg);
1921
- iter.max = iter.dma + iter.sg->length;
1922
- }
1923
-
1924
- if (++act_pte == GEN6_PTES) {
1925
- kunmap_atomic(vaddr);
1926
- vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1927
- act_pte = 0;
1928
- }
1929
- } while (1);
1930
- kunmap_atomic(vaddr);
1931
-
1932
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1933
-}
1934
-
1935
-static int gen6_alloc_va_range(struct i915_address_space *vm,
1936
- u64 start, u64 length)
1937
-{
1938
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1939
- struct i915_page_table *pt;
1940
- u64 from = start;
1941
- unsigned int pde;
1942
- bool flush = false;
1943
-
1944
- gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
1945
- const unsigned int count = gen6_pte_count(start, length);
1946
-
1947
- if (pt == vm->scratch_pt) {
1948
- pt = alloc_pt(vm);
1949
- if (IS_ERR(pt))
1950
- goto unwind_out;
1951
-
1952
- gen6_initialize_pt(ppgtt, pt);
1953
- ppgtt->base.pd.page_table[pde] = pt;
1954
-
1955
- if (i915_vma_is_bound(ppgtt->vma,
1956
- I915_VMA_GLOBAL_BIND)) {
1957
- gen6_write_pde(ppgtt, pde, pt);
1958
- flush = true;
1959
- }
1960
-
1961
- GEM_BUG_ON(pt->used_ptes);
1962
- }
1963
-
1964
- pt->used_ptes += count;
1965
- }
1966
-
1967
- if (flush) {
1968
- mark_tlbs_dirty(&ppgtt->base);
1969
- gen6_ggtt_invalidate(ppgtt->base.vm.i915);
1970
- }
1971
-
1972
- return 0;
1973
-
1974
-unwind_out:
1975
- gen6_ppgtt_clear_range(vm, from, start - from);
1976
- return -ENOMEM;
1977
-}
1978
-
1979
-static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
1980
-{
1981
- struct i915_address_space * const vm = &ppgtt->base.vm;
1982
- struct i915_page_table *unused;
1983
- u32 pde;
1984
- int ret;
1985
-
1986
- ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1987
- if (ret)
1988
- return ret;
1989
-
1990
- ppgtt->scratch_pte =
1991
- vm->pte_encode(vm->scratch_page.daddr,
1992
- I915_CACHE_NONE, PTE_READ_ONLY);
1993
-
1994
- vm->scratch_pt = alloc_pt(vm);
1995
- if (IS_ERR(vm->scratch_pt)) {
1996
- cleanup_scratch_page(vm);
1997
- return PTR_ERR(vm->scratch_pt);
1998
- }
1999
-
2000
- gen6_initialize_pt(ppgtt, vm->scratch_pt);
2001
- gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
2002
- ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
2003
-
2004
- return 0;
2005
-}
2006
-
2007
-static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
2008
-{
2009
- free_pt(vm, vm->scratch_pt);
2010
- cleanup_scratch_page(vm);
2011
-}
2012
-
2013
-static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt)
2014
-{
2015
- struct i915_page_table *pt;
2016
- u32 pde;
2017
-
2018
- gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
2019
- if (pt != ppgtt->base.vm.scratch_pt)
2020
- free_pt(&ppgtt->base.vm, pt);
2021
-}
2022
-
2023
-static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
2024
-{
2025
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
2026
-
2027
- i915_vma_destroy(ppgtt->vma);
2028
-
2029
- gen6_ppgtt_free_pd(ppgtt);
2030
- gen6_ppgtt_free_scratch(vm);
2031
-}
2032
-
2033
-static int pd_vma_set_pages(struct i915_vma *vma)
2034
-{
2035
- vma->pages = ERR_PTR(-ENODEV);
2036
- return 0;
2037
-}
2038
-
2039
-static void pd_vma_clear_pages(struct i915_vma *vma)
2040
-{
2041
- GEM_BUG_ON(!vma->pages);
2042
-
2043
- vma->pages = NULL;
2044
-}
2045
-
2046
-static int pd_vma_bind(struct i915_vma *vma,
2047
- enum i915_cache_level cache_level,
2048
- u32 unused)
2049
-{
2050
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
2051
- struct gen6_hw_ppgtt *ppgtt = vma->private;
2052
- u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
2053
- struct i915_page_table *pt;
2054
- unsigned int pde;
2055
-
2056
- ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
2057
- ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
2058
-
2059
- gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
2060
- gen6_write_pde(ppgtt, pde, pt);
2061
-
2062
- mark_tlbs_dirty(&ppgtt->base);
2063
- gen6_ggtt_invalidate(ppgtt->base.vm.i915);
2064
-
2065
- return 0;
2066
-}
2067
-
2068
-static void pd_vma_unbind(struct i915_vma *vma)
2069
-{
2070
- struct gen6_hw_ppgtt *ppgtt = vma->private;
2071
- struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
2072
- struct i915_page_table *pt;
2073
- unsigned int pde;
2074
-
2075
- if (!ppgtt->scan_for_unused_pt)
2076
- return;
2077
-
2078
- /* Free all no longer used page tables */
2079
- gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
2080
- if (pt->used_ptes || pt == scratch_pt)
2081
- continue;
2082
-
2083
- free_pt(&ppgtt->base.vm, pt);
2084
- ppgtt->base.pd.page_table[pde] = scratch_pt;
2085
- }
2086
-
2087
- ppgtt->scan_for_unused_pt = false;
2088
-}
2089
-
2090
-static const struct i915_vma_ops pd_vma_ops = {
2091
- .set_pages = pd_vma_set_pages,
2092
- .clear_pages = pd_vma_clear_pages,
2093
- .bind_vma = pd_vma_bind,
2094
- .unbind_vma = pd_vma_unbind,
2095
-};
2096
-
2097
-static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
2098
-{
2099
- struct drm_i915_private *i915 = ppgtt->base.vm.i915;
2100
- struct i915_ggtt *ggtt = &i915->ggtt;
2101
- struct i915_vma *vma;
2102
-
2103
- GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
2104
- GEM_BUG_ON(size > ggtt->vm.total);
2105
-
2106
- vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
2107
- if (!vma)
2108
- return ERR_PTR(-ENOMEM);
2109
-
2110
- init_request_active(&vma->last_fence, NULL);
2111
-
2112
- vma->vm = &ggtt->vm;
2113
- vma->ops = &pd_vma_ops;
2114
- vma->private = ppgtt;
2115
-
2116
- vma->active = RB_ROOT;
2117
-
2118
- vma->size = size;
2119
- vma->fence_size = size;
2120
- vma->flags = I915_VMA_GGTT;
2121
- vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
2122
-
2123
- INIT_LIST_HEAD(&vma->obj_link);
2124
- list_add(&vma->vm_link, &vma->vm->unbound_list);
2125
-
2126
- return vma;
2127
-}
2128
-
2129
-int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
2130
-{
2131
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
2132
- int err;
2133
-
2134
- /*
2135
- * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
2136
- * which will be pinned into every active context.
2137
- * (When vma->pin_count becomes atomic, I expect we will naturally
2138
- * need a larger, unpacked, type and kill this redundancy.)
2139
- */
2140
- if (ppgtt->pin_count++)
2141
- return 0;
2142
-
2143
- /*
2144
- * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2145
- * allocator works in address space sizes, so it's multiplied by page
2146
- * size. We allocate at the top of the GTT to avoid fragmentation.
2147
- */
2148
- err = i915_vma_pin(ppgtt->vma,
2149
- 0, GEN6_PD_ALIGN,
2150
- PIN_GLOBAL | PIN_HIGH);
2151
- if (err)
2152
- goto unpin;
2153
-
2154
- return 0;
2155
-
2156
-unpin:
2157
- ppgtt->pin_count = 0;
2158
- return err;
2159
-}
2160
-
2161
-void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
2162
-{
2163
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
2164
-
2165
- GEM_BUG_ON(!ppgtt->pin_count);
2166
- if (--ppgtt->pin_count)
2167
- return;
2168
-
2169
- i915_vma_unpin(ppgtt->vma);
2170
-}
2171
-
2172
-static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
2173
-{
2174
- struct i915_ggtt * const ggtt = &i915->ggtt;
2175
- struct gen6_hw_ppgtt *ppgtt;
2176
- int err;
2177
-
2178
- ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2179
- if (!ppgtt)
2180
- return ERR_PTR(-ENOMEM);
2181
-
2182
- kref_init(&ppgtt->base.ref);
2183
-
2184
- ppgtt->base.vm.i915 = i915;
2185
- ppgtt->base.vm.dma = &i915->drm.pdev->dev;
2186
-
2187
- ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
2188
-
2189
- i915_address_space_init(&ppgtt->base.vm, i915);
2190
-
2191
- ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
2192
- ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
2193
- ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
2194
- ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
2195
- ppgtt->base.debug_dump = gen6_dump_ppgtt;
2196
-
2197
- ppgtt->base.vm.vma_ops.bind_vma = ppgtt_bind_vma;
2198
- ppgtt->base.vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
2199
- ppgtt->base.vm.vma_ops.set_pages = ppgtt_set_pages;
2200
- ppgtt->base.vm.vma_ops.clear_pages = clear_pages;
2201
-
2202
- ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
2203
-
2204
- err = gen6_ppgtt_init_scratch(ppgtt);
2205
- if (err)
2206
- goto err_free;
2207
-
2208
- ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
2209
- if (IS_ERR(ppgtt->vma)) {
2210
- err = PTR_ERR(ppgtt->vma);
2211
- goto err_scratch;
2212
- }
2213
-
2214
- return &ppgtt->base;
2215
-
2216
-err_scratch:
2217
- gen6_ppgtt_free_scratch(&ppgtt->base.vm);
2218
-err_free:
2219
- kfree(ppgtt);
2220
- return ERR_PTR(err);
2221
-}
2222
-
2223
-static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2224
-{
2225
- /* This function is for gtt related workarounds. This function is
2226
- * called on driver load and after a GPU reset, so you can place
2227
- * workarounds here even if they get overwritten by GPU reset.
2228
- */
2229
- /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
2230
- if (IS_BROADWELL(dev_priv))
2231
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2232
- else if (IS_CHERRYVIEW(dev_priv))
2233
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2234
- else if (IS_GEN9_LP(dev_priv))
2235
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2236
- else if (INTEL_GEN(dev_priv) >= 9)
2237
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2238
-
2239
- /*
2240
- * To support 64K PTEs we need to first enable the use of the
2241
- * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2242
- * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2243
- * shouldn't be needed after GEN10.
2244
- *
2245
- * 64K pages were first introduced from BDW+, although technically they
2246
- * only *work* from gen9+. For pre-BDW we instead have the option for
2247
- * 32K pages, but we don't currently have any support for it in our
2248
- * driver.
2249
- */
2250
- if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2251
- INTEL_GEN(dev_priv) <= 10)
2252
- I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2253
- I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2254
- GAMW_ECO_ENABLE_64K_IPS_FIELD);
2255
-}
2256
-
2257
-int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2258
-{
2259
- gtt_write_workarounds(dev_priv);
2260
-
2261
- /* In the case of execlists, PPGTT is enabled by the context descriptor
2262
- * and the PDPs are contained within the context itself. We don't
2263
- * need to do anything here. */
2264
- if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
2265
- return 0;
2266
-
2267
- if (!USES_PPGTT(dev_priv))
2268
- return 0;
2269
-
2270
- if (IS_GEN6(dev_priv))
2271
- gen6_ppgtt_enable(dev_priv);
2272
- else if (IS_GEN7(dev_priv))
2273
- gen7_ppgtt_enable(dev_priv);
2274
- else if (INTEL_GEN(dev_priv) >= 8)
2275
- gen8_ppgtt_enable(dev_priv);
2276
- else
2277
- MISSING_CASE(INTEL_GEN(dev_priv));
2278
-
2279
- return 0;
2280
-}
2281
-
2282
-static struct i915_hw_ppgtt *
2283
-__hw_ppgtt_create(struct drm_i915_private *i915)
2284
-{
2285
- if (INTEL_GEN(i915) < 8)
2286
- return gen6_ppgtt_create(i915);
2287
- else
2288
- return gen8_ppgtt_create(i915);
2289
-}
2290
-
2291
-struct i915_hw_ppgtt *
2292
-i915_ppgtt_create(struct drm_i915_private *i915,
2293
- struct drm_i915_file_private *fpriv)
2294
-{
2295
- struct i915_hw_ppgtt *ppgtt;
2296
-
2297
- ppgtt = __hw_ppgtt_create(i915);
2298
- if (IS_ERR(ppgtt))
2299
- return ppgtt;
2300
-
2301
- ppgtt->vm.file = fpriv;
2302
-
2303
- trace_i915_ppgtt_create(&ppgtt->vm);
2304
-
2305
- return ppgtt;
2306
-}
2307
-
2308
-void i915_ppgtt_close(struct i915_address_space *vm)
2309
-{
2310
- GEM_BUG_ON(vm->closed);
2311
- vm->closed = true;
2312
-}
2313
-
2314
-static void ppgtt_destroy_vma(struct i915_address_space *vm)
2315
-{
2316
- struct list_head *phases[] = {
2317
- &vm->active_list,
2318
- &vm->inactive_list,
2319
- &vm->unbound_list,
2320
- NULL,
2321
- }, **phase;
2322
-
2323
- vm->closed = true;
2324
- for (phase = phases; *phase; phase++) {
2325
- struct i915_vma *vma, *vn;
2326
-
2327
- list_for_each_entry_safe(vma, vn, *phase, vm_link)
2328
- i915_vma_destroy(vma);
2329
- }
2330
-}
2331
-
2332
-void i915_ppgtt_release(struct kref *kref)
2333
-{
2334
- struct i915_hw_ppgtt *ppgtt =
2335
- container_of(kref, struct i915_hw_ppgtt, ref);
2336
-
2337
- trace_i915_ppgtt_release(&ppgtt->vm);
2338
-
2339
- ppgtt_destroy_vma(&ppgtt->vm);
2340
-
2341
- GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
2342
- GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
2343
- GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
2344
-
2345
- ppgtt->vm.cleanup(&ppgtt->vm);
2346
- i915_address_space_fini(&ppgtt->vm);
2347
- kfree(ppgtt);
2348
-}
2349
-
2350
-/* Certain Gen5 chipsets require require idling the GPU before
2351
- * unmapping anything from the GTT when VT-d is enabled.
2352
- */
2353
-static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2354
-{
2355
- /* Query intel_iommu to see if we need the workaround. Presumably that
2356
- * was loaded first.
2357
- */
2358
- return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
2359
-}
2360
-
2361
-static void gen6_check_and_clear_faults(struct drm_i915_private *dev_priv)
2362
-{
2363
- struct intel_engine_cs *engine;
2364
- enum intel_engine_id id;
2365
- u32 fault;
2366
-
2367
- for_each_engine(engine, dev_priv, id) {
2368
- fault = I915_READ(RING_FAULT_REG(engine));
2369
- if (fault & RING_FAULT_VALID) {
2370
- DRM_DEBUG_DRIVER("Unexpected fault\n"
2371
- "\tAddr: 0x%08lx\n"
2372
- "\tAddress space: %s\n"
2373
- "\tSource ID: %d\n"
2374
- "\tType: %d\n",
2375
- fault & PAGE_MASK,
2376
- fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2377
- RING_FAULT_SRCID(fault),
2378
- RING_FAULT_FAULT_TYPE(fault));
2379
- I915_WRITE(RING_FAULT_REG(engine),
2380
- fault & ~RING_FAULT_VALID);
2381
- }
2382
- }
2383
-
2384
- POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2385
-}
2386
-
2387
-static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv)
2388
-{
2389
- u32 fault = I915_READ(GEN8_RING_FAULT_REG);
2390
-
2391
- if (fault & RING_FAULT_VALID) {
2392
- u32 fault_data0, fault_data1;
2393
- u64 fault_addr;
2394
-
2395
- fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
2396
- fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
2397
- fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
2398
- ((u64)fault_data0 << 12);
2399
-
2400
- DRM_DEBUG_DRIVER("Unexpected fault\n"
2401
- "\tAddr: 0x%08x_%08x\n"
2402
- "\tAddress space: %s\n"
2403
- "\tEngine ID: %d\n"
2404
- "\tSource ID: %d\n"
2405
- "\tType: %d\n",
2406
- upper_32_bits(fault_addr),
2407
- lower_32_bits(fault_addr),
2408
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
2409
- GEN8_RING_FAULT_ENGINE_ID(fault),
2410
- RING_FAULT_SRCID(fault),
2411
- RING_FAULT_FAULT_TYPE(fault));
2412
- I915_WRITE(GEN8_RING_FAULT_REG,
2413
- fault & ~RING_FAULT_VALID);
2414
- }
2415
-
2416
- POSTING_READ(GEN8_RING_FAULT_REG);
2417
-}
2418
-
2419
-void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2420
-{
2421
- /* From GEN8 onwards we only have one 'All Engine Fault Register' */
2422
- if (INTEL_GEN(dev_priv) >= 8)
2423
- gen8_check_and_clear_faults(dev_priv);
2424
- else if (INTEL_GEN(dev_priv) >= 6)
2425
- gen6_check_and_clear_faults(dev_priv);
2426
- else
2427
- return;
2428
-}
2429
-
2430
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2431
-{
2432
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
2433
-
2434
- /* Don't bother messing with faults pre GEN6 as we have little
2435
- * documentation supporting that it's a good idea.
2436
- */
2437
- if (INTEL_GEN(dev_priv) < 6)
2438
- return;
2439
-
2440
- i915_check_and_clear_faults(dev_priv);
2441
-
2442
- ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
2443
-
2444
- i915_ggtt_invalidate(dev_priv);
2445
-}
25
+#include "i915_vgpu.h"
244626
244727 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
244828 struct sg_table *pages)
....@@ -2451,10 +31,13 @@
245131 if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
245232 pages->sgl, pages->nents,
245333 PCI_DMA_BIDIRECTIONAL,
34
+ DMA_ATTR_SKIP_CPU_SYNC |
35
+ DMA_ATTR_NO_KERNEL_MAPPING |
245436 DMA_ATTR_NO_WARN))
245537 return 0;
245638
2457
- /* If the DMA remap fails, one cause can be that we have
39
+ /*
40
+ * If the DMA remap fails, one cause can be that we have
245841 * too many objects pinned in a small remapping table,
245942 * such as swiotlb. Incrementally purge all other objects and
246043 * try again - if there are no more pages to remove from
....@@ -2464,365 +47,9 @@
246447 } while (i915_gem_shrink(to_i915(obj->base.dev),
246548 obj->base.size >> PAGE_SHIFT, NULL,
246649 I915_SHRINK_BOUND |
2467
- I915_SHRINK_UNBOUND |
2468
- I915_SHRINK_ACTIVE));
50
+ I915_SHRINK_UNBOUND));
246951
247052 return -ENOSPC;
2471
-}
2472
-
2473
-static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2474
-{
2475
- writeq(pte, addr);
2476
-}
2477
-
2478
-static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2479
- dma_addr_t addr,
2480
- u64 offset,
2481
- enum i915_cache_level level,
2482
- u32 unused)
2483
-{
2484
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2485
- gen8_pte_t __iomem *pte =
2486
- (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2487
-
2488
- gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
2489
-
2490
- ggtt->invalidate(vm->i915);
2491
-}
2492
-
2493
-static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2494
- struct i915_vma *vma,
2495
- enum i915_cache_level level,
2496
- u32 flags)
2497
-{
2498
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2499
- struct sgt_iter sgt_iter;
2500
- gen8_pte_t __iomem *gtt_entries;
2501
- const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
2502
- dma_addr_t addr;
2503
-
2504
- /*
2505
- * Note that we ignore PTE_READ_ONLY here. The caller must be careful
2506
- * not to allow the user to override access to a read only page.
2507
- */
2508
-
2509
- gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2510
- gtt_entries += vma->node.start >> PAGE_SHIFT;
2511
- for_each_sgt_dma(addr, sgt_iter, vma->pages)
2512
- gen8_set_pte(gtt_entries++, pte_encode | addr);
2513
-
2514
- /*
2515
- * We want to flush the TLBs only after we're certain all the PTE
2516
- * updates have finished.
2517
- */
2518
- ggtt->invalidate(vm->i915);
2519
-}
2520
-
2521
-static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2522
- dma_addr_t addr,
2523
- u64 offset,
2524
- enum i915_cache_level level,
2525
- u32 flags)
2526
-{
2527
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2528
- gen6_pte_t __iomem *pte =
2529
- (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2530
-
2531
- iowrite32(vm->pte_encode(addr, level, flags), pte);
2532
-
2533
- ggtt->invalidate(vm->i915);
2534
-}
2535
-
2536
-/*
2537
- * Binds an object into the global gtt with the specified cache level. The object
2538
- * will be accessible to the GPU via commands whose operands reference offsets
2539
- * within the global GTT as well as accessible by the GPU through the GMADR
2540
- * mapped BAR (dev_priv->mm.gtt->gtt).
2541
- */
2542
-static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2543
- struct i915_vma *vma,
2544
- enum i915_cache_level level,
2545
- u32 flags)
2546
-{
2547
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2548
- gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2549
- unsigned int i = vma->node.start >> PAGE_SHIFT;
2550
- struct sgt_iter iter;
2551
- dma_addr_t addr;
2552
- for_each_sgt_dma(addr, iter, vma->pages)
2553
- iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2554
-
2555
- /*
2556
- * We want to flush the TLBs only after we're certain all the PTE
2557
- * updates have finished.
2558
- */
2559
- ggtt->invalidate(vm->i915);
2560
-}
2561
-
2562
-static void nop_clear_range(struct i915_address_space *vm,
2563
- u64 start, u64 length)
2564
-{
2565
-}
2566
-
2567
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2568
- u64 start, u64 length)
2569
-{
2570
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2571
- unsigned first_entry = start >> PAGE_SHIFT;
2572
- unsigned num_entries = length >> PAGE_SHIFT;
2573
- const gen8_pte_t scratch_pte =
2574
- gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
2575
- gen8_pte_t __iomem *gtt_base =
2576
- (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2577
- const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2578
- int i;
2579
-
2580
- if (WARN(num_entries > max_entries,
2581
- "First entry = %d; Num entries = %d (max=%d)\n",
2582
- first_entry, num_entries, max_entries))
2583
- num_entries = max_entries;
2584
-
2585
- for (i = 0; i < num_entries; i++)
2586
- gen8_set_pte(&gtt_base[i], scratch_pte);
2587
-}
2588
-
2589
-static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2590
-{
2591
- struct drm_i915_private *dev_priv = vm->i915;
2592
-
2593
- /*
2594
- * Make sure the internal GAM fifo has been cleared of all GTT
2595
- * writes before exiting stop_machine(). This guarantees that
2596
- * any aperture accesses waiting to start in another process
2597
- * cannot back up behind the GTT writes causing a hang.
2598
- * The register can be any arbitrary GAM register.
2599
- */
2600
- POSTING_READ(GFX_FLSH_CNTL_GEN6);
2601
-}
2602
-
2603
-struct insert_page {
2604
- struct i915_address_space *vm;
2605
- dma_addr_t addr;
2606
- u64 offset;
2607
- enum i915_cache_level level;
2608
-};
2609
-
2610
-static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2611
-{
2612
- struct insert_page *arg = _arg;
2613
-
2614
- gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2615
- bxt_vtd_ggtt_wa(arg->vm);
2616
-
2617
- return 0;
2618
-}
2619
-
2620
-static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2621
- dma_addr_t addr,
2622
- u64 offset,
2623
- enum i915_cache_level level,
2624
- u32 unused)
2625
-{
2626
- struct insert_page arg = { vm, addr, offset, level };
2627
-
2628
- stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2629
-}
2630
-
2631
-struct insert_entries {
2632
- struct i915_address_space *vm;
2633
- struct i915_vma *vma;
2634
- enum i915_cache_level level;
2635
- u32 flags;
2636
-};
2637
-
2638
-static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2639
-{
2640
- struct insert_entries *arg = _arg;
2641
-
2642
- gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
2643
- bxt_vtd_ggtt_wa(arg->vm);
2644
-
2645
- return 0;
2646
-}
2647
-
2648
-static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2649
- struct i915_vma *vma,
2650
- enum i915_cache_level level,
2651
- u32 flags)
2652
-{
2653
- struct insert_entries arg = { vm, vma, level, flags };
2654
-
2655
- stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2656
-}
2657
-
2658
-struct clear_range {
2659
- struct i915_address_space *vm;
2660
- u64 start;
2661
- u64 length;
2662
-};
2663
-
2664
-static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2665
-{
2666
- struct clear_range *arg = _arg;
2667
-
2668
- gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2669
- bxt_vtd_ggtt_wa(arg->vm);
2670
-
2671
- return 0;
2672
-}
2673
-
2674
-static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2675
- u64 start,
2676
- u64 length)
2677
-{
2678
- struct clear_range arg = { vm, start, length };
2679
-
2680
- stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2681
-}
2682
-
2683
-static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2684
- u64 start, u64 length)
2685
-{
2686
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2687
- unsigned first_entry = start >> PAGE_SHIFT;
2688
- unsigned num_entries = length >> PAGE_SHIFT;
2689
- gen6_pte_t scratch_pte, __iomem *gtt_base =
2690
- (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2691
- const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2692
- int i;
2693
-
2694
- if (WARN(num_entries > max_entries,
2695
- "First entry = %d; Num entries = %d (max=%d)\n",
2696
- first_entry, num_entries, max_entries))
2697
- num_entries = max_entries;
2698
-
2699
- scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2700
- I915_CACHE_LLC, 0);
2701
-
2702
- for (i = 0; i < num_entries; i++)
2703
- iowrite32(scratch_pte, &gtt_base[i]);
2704
-}
2705
-
2706
-static void i915_ggtt_insert_page(struct i915_address_space *vm,
2707
- dma_addr_t addr,
2708
- u64 offset,
2709
- enum i915_cache_level cache_level,
2710
- u32 unused)
2711
-{
2712
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2713
- AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2714
-
2715
- intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2716
-}
2717
-
2718
-static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2719
- struct i915_vma *vma,
2720
- enum i915_cache_level cache_level,
2721
- u32 unused)
2722
-{
2723
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2724
- AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2725
-
2726
- intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2727
- flags);
2728
-}
2729
-
2730
-static void i915_ggtt_clear_range(struct i915_address_space *vm,
2731
- u64 start, u64 length)
2732
-{
2733
- intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2734
-}
2735
-
2736
-static int ggtt_bind_vma(struct i915_vma *vma,
2737
- enum i915_cache_level cache_level,
2738
- u32 flags)
2739
-{
2740
- struct drm_i915_private *i915 = vma->vm->i915;
2741
- struct drm_i915_gem_object *obj = vma->obj;
2742
- u32 pte_flags;
2743
-
2744
- /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
2745
- pte_flags = 0;
2746
- if (i915_gem_object_is_readonly(obj))
2747
- pte_flags |= PTE_READ_ONLY;
2748
-
2749
- intel_runtime_pm_get(i915);
2750
- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2751
- intel_runtime_pm_put(i915);
2752
-
2753
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2754
-
2755
- /*
2756
- * Without aliasing PPGTT there's no difference between
2757
- * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2758
- * upgrade to both bound if we bind either to avoid double-binding.
2759
- */
2760
- vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2761
-
2762
- return 0;
2763
-}
2764
-
2765
-static void ggtt_unbind_vma(struct i915_vma *vma)
2766
-{
2767
- struct drm_i915_private *i915 = vma->vm->i915;
2768
-
2769
- intel_runtime_pm_get(i915);
2770
- vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2771
- intel_runtime_pm_put(i915);
2772
-}
2773
-
2774
-static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2775
- enum i915_cache_level cache_level,
2776
- u32 flags)
2777
-{
2778
- struct drm_i915_private *i915 = vma->vm->i915;
2779
- u32 pte_flags;
2780
- int ret;
2781
-
2782
- /* Currently applicable only to VLV */
2783
- pte_flags = 0;
2784
- if (i915_gem_object_is_readonly(vma->obj))
2785
- pte_flags |= PTE_READ_ONLY;
2786
-
2787
- if (flags & I915_VMA_LOCAL_BIND) {
2788
- struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2789
-
2790
- if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
2791
- ret = appgtt->vm.allocate_va_range(&appgtt->vm,
2792
- vma->node.start,
2793
- vma->size);
2794
- if (ret)
2795
- return ret;
2796
- }
2797
-
2798
- appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
2799
- pte_flags);
2800
- }
2801
-
2802
- if (flags & I915_VMA_GLOBAL_BIND) {
2803
- intel_runtime_pm_get(i915);
2804
- vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2805
- intel_runtime_pm_put(i915);
2806
- }
2807
-
2808
- return 0;
2809
-}
2810
-
2811
-static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2812
-{
2813
- struct drm_i915_private *i915 = vma->vm->i915;
2814
-
2815
- if (vma->flags & I915_VMA_GLOBAL_BIND) {
2816
- intel_runtime_pm_get(i915);
2817
- vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2818
- intel_runtime_pm_put(i915);
2819
- }
2820
-
2821
- if (vma->flags & I915_VMA_LOCAL_BIND) {
2822
- struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
2823
-
2824
- vm->clear_range(vm, vma->node.start, vma->size);
2825
- }
282653 }
282754
282855 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
....@@ -2833,1072 +60,17 @@
283360 struct i915_ggtt *ggtt = &dev_priv->ggtt;
283461
283562 if (unlikely(ggtt->do_idle_maps)) {
2836
- if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
2837
- DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
63
+ /* XXX This does not prevent more requests being submitted! */
64
+ if (intel_gt_retire_requests_timeout(ggtt->vm.gt,
65
+ -MAX_SCHEDULE_TIMEOUT)) {
66
+ drm_err(&dev_priv->drm,
67
+ "Failed to wait for idle; VT'd may hang.\n");
283868 /* Wait a bit, in hopes it avoids the hang */
283969 udelay(10);
284070 }
284171 }
284272
284373 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2844
-}
2845
-
2846
-static int ggtt_set_pages(struct i915_vma *vma)
2847
-{
2848
- int ret;
2849
-
2850
- GEM_BUG_ON(vma->pages);
2851
-
2852
- ret = i915_get_ggtt_vma_pages(vma);
2853
- if (ret)
2854
- return ret;
2855
-
2856
- vma->page_sizes = vma->obj->mm.page_sizes;
2857
-
2858
- return 0;
2859
-}
2860
-
2861
-static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2862
- unsigned long color,
2863
- u64 *start,
2864
- u64 *end)
2865
-{
2866
- if (node->allocated && node->color != color)
2867
- *start += I915_GTT_PAGE_SIZE;
2868
-
2869
- /* Also leave a space between the unallocated reserved node after the
2870
- * GTT and any objects within the GTT, i.e. we use the color adjustment
2871
- * to insert a guard page to prevent prefetches crossing over the
2872
- * GTT boundary.
2873
- */
2874
- node = list_next_entry(node, node_list);
2875
- if (node->color != color)
2876
- *end -= I915_GTT_PAGE_SIZE;
2877
-}
2878
-
2879
-int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2880
-{
2881
- struct i915_ggtt *ggtt = &i915->ggtt;
2882
- struct i915_hw_ppgtt *ppgtt;
2883
- int err;
2884
-
2885
- ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM));
2886
- if (IS_ERR(ppgtt))
2887
- return PTR_ERR(ppgtt);
2888
-
2889
- if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
2890
- err = -ENODEV;
2891
- goto err_ppgtt;
2892
- }
2893
-
2894
- /*
2895
- * Note we only pre-allocate as far as the end of the global
2896
- * GTT. On 48b / 4-level page-tables, the difference is very,
2897
- * very significant! We have to preallocate as GVT/vgpu does
2898
- * not like the page directory disappearing.
2899
- */
2900
- err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
2901
- if (err)
2902
- goto err_ppgtt;
2903
-
2904
- i915->mm.aliasing_ppgtt = ppgtt;
2905
-
2906
- GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
2907
- ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
2908
-
2909
- GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
2910
- ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
2911
-
2912
- return 0;
2913
-
2914
-err_ppgtt:
2915
- i915_ppgtt_put(ppgtt);
2916
- return err;
2917
-}
2918
-
2919
-void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2920
-{
2921
- struct i915_ggtt *ggtt = &i915->ggtt;
2922
- struct i915_hw_ppgtt *ppgtt;
2923
-
2924
- ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2925
- if (!ppgtt)
2926
- return;
2927
-
2928
- i915_ppgtt_put(ppgtt);
2929
-
2930
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
2931
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
2932
-}
2933
-
2934
-int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2935
-{
2936
- /* Let GEM Manage all of the aperture.
2937
- *
2938
- * However, leave one page at the end still bound to the scratch page.
2939
- * There are a number of places where the hardware apparently prefetches
2940
- * past the end of the object, and we've seen multiple hangs with the
2941
- * GPU head pointer stuck in a batchbuffer bound at the last page of the
2942
- * aperture. One page should be enough to keep any prefetching inside
2943
- * of the aperture.
2944
- */
2945
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
2946
- unsigned long hole_start, hole_end;
2947
- struct drm_mm_node *entry;
2948
- int ret;
2949
-
2950
- ret = intel_vgt_balloon(dev_priv);
2951
- if (ret)
2952
- return ret;
2953
-
2954
- /* Reserve a mappable slot for our lockless error capture */
2955
- ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
2956
- PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2957
- 0, ggtt->mappable_end,
2958
- DRM_MM_INSERT_LOW);
2959
- if (ret)
2960
- return ret;
2961
-
2962
- /* Clear any non-preallocated blocks */
2963
- drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
2964
- DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2965
- hole_start, hole_end);
2966
- ggtt->vm.clear_range(&ggtt->vm, hole_start,
2967
- hole_end - hole_start);
2968
- }
2969
-
2970
- /* And finally clear the reserved guard page */
2971
- ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
2972
-
2973
- if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2974
- ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2975
- if (ret)
2976
- goto err;
2977
- }
2978
-
2979
- return 0;
2980
-
2981
-err:
2982
- drm_mm_remove_node(&ggtt->error_capture);
2983
- return ret;
2984
-}
2985
-
2986
-/**
2987
- * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2988
- * @dev_priv: i915 device
2989
- */
2990
-void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2991
-{
2992
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
2993
- struct i915_vma *vma, *vn;
2994
- struct pagevec *pvec;
2995
-
2996
- ggtt->vm.closed = true;
2997
-
2998
- mutex_lock(&dev_priv->drm.struct_mutex);
2999
- i915_gem_fini_aliasing_ppgtt(dev_priv);
3000
-
3001
- GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
3002
- list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
3003
- WARN_ON(i915_vma_unbind(vma));
3004
-
3005
- if (drm_mm_node_allocated(&ggtt->error_capture))
3006
- drm_mm_remove_node(&ggtt->error_capture);
3007
-
3008
- if (drm_mm_initialized(&ggtt->vm.mm)) {
3009
- intel_vgt_deballoon(dev_priv);
3010
- i915_address_space_fini(&ggtt->vm);
3011
- }
3012
-
3013
- ggtt->vm.cleanup(&ggtt->vm);
3014
-
3015
- pvec = &dev_priv->mm.wc_stash.pvec;
3016
- if (pvec->nr) {
3017
- set_pages_array_wb(pvec->pages, pvec->nr);
3018
- __pagevec_release(pvec);
3019
- }
3020
-
3021
- mutex_unlock(&dev_priv->drm.struct_mutex);
3022
-
3023
- arch_phys_wc_del(ggtt->mtrr);
3024
- io_mapping_fini(&ggtt->iomap);
3025
-
3026
- i915_gem_cleanup_stolen(&dev_priv->drm);
3027
-}
3028
-
3029
-static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
3030
-{
3031
- snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
3032
- snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
3033
- return snb_gmch_ctl << 20;
3034
-}
3035
-
3036
-static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
3037
-{
3038
- bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
3039
- bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
3040
- if (bdw_gmch_ctl)
3041
- bdw_gmch_ctl = 1 << bdw_gmch_ctl;
3042
-
3043
-#ifdef CONFIG_X86_32
3044
- /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
3045
- if (bdw_gmch_ctl > 4)
3046
- bdw_gmch_ctl = 4;
3047
-#endif
3048
-
3049
- return bdw_gmch_ctl << 20;
3050
-}
3051
-
3052
-static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
3053
-{
3054
- gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
3055
- gmch_ctrl &= SNB_GMCH_GGMS_MASK;
3056
-
3057
- if (gmch_ctrl)
3058
- return 1 << (20 + gmch_ctrl);
3059
-
3060
- return 0;
3061
-}
3062
-
3063
-static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
3064
-{
3065
- struct drm_i915_private *dev_priv = ggtt->vm.i915;
3066
- struct pci_dev *pdev = dev_priv->drm.pdev;
3067
- phys_addr_t phys_addr;
3068
- int ret;
3069
-
3070
- /* For Modern GENs the PTEs and register space are split in the BAR */
3071
- phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
3072
-
3073
- /*
3074
- * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
3075
- * will be dropped. For WC mappings in general we have 64 byte burst
3076
- * writes when the WC buffer is flushed, so we can't use it, but have to
3077
- * resort to an uncached mapping. The WC issue is easily caught by the
3078
- * readback check when writing GTT PTE entries.
3079
- */
3080
- if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
3081
- ggtt->gsm = ioremap_nocache(phys_addr, size);
3082
- else
3083
- ggtt->gsm = ioremap_wc(phys_addr, size);
3084
- if (!ggtt->gsm) {
3085
- DRM_ERROR("Failed to map the ggtt page table\n");
3086
- return -ENOMEM;
3087
- }
3088
-
3089
- ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
3090
- if (ret) {
3091
- DRM_ERROR("Scratch setup failed\n");
3092
- /* iounmap will also get called at remove, but meh */
3093
- iounmap(ggtt->gsm);
3094
- return ret;
3095
- }
3096
-
3097
- return 0;
3098
-}
3099
-
3100
-static struct intel_ppat_entry *
3101
-__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
3102
-{
3103
- struct intel_ppat_entry *entry = &ppat->entries[index];
3104
-
3105
- GEM_BUG_ON(index >= ppat->max_entries);
3106
- GEM_BUG_ON(test_bit(index, ppat->used));
3107
-
3108
- entry->ppat = ppat;
3109
- entry->value = value;
3110
- kref_init(&entry->ref);
3111
- set_bit(index, ppat->used);
3112
- set_bit(index, ppat->dirty);
3113
-
3114
- return entry;
3115
-}
3116
-
3117
-static void __free_ppat_entry(struct intel_ppat_entry *entry)
3118
-{
3119
- struct intel_ppat *ppat = entry->ppat;
3120
- unsigned int index = entry - ppat->entries;
3121
-
3122
- GEM_BUG_ON(index >= ppat->max_entries);
3123
- GEM_BUG_ON(!test_bit(index, ppat->used));
3124
-
3125
- entry->value = ppat->clear_value;
3126
- clear_bit(index, ppat->used);
3127
- set_bit(index, ppat->dirty);
3128
-}
3129
-
3130
-/**
3131
- * intel_ppat_get - get a usable PPAT entry
3132
- * @i915: i915 device instance
3133
- * @value: the PPAT value required by the caller
3134
- *
3135
- * The function tries to search if there is an existing PPAT entry which
3136
- * matches with the required value. If perfectly matched, the existing PPAT
3137
- * entry will be used. If only partially matched, it will try to check if
3138
- * there is any available PPAT index. If yes, it will allocate a new PPAT
3139
- * index for the required entry and update the HW. If not, the partially
3140
- * matched entry will be used.
3141
- */
3142
-const struct intel_ppat_entry *
3143
-intel_ppat_get(struct drm_i915_private *i915, u8 value)
3144
-{
3145
- struct intel_ppat *ppat = &i915->ppat;
3146
- struct intel_ppat_entry *entry = NULL;
3147
- unsigned int scanned, best_score;
3148
- int i;
3149
-
3150
- GEM_BUG_ON(!ppat->max_entries);
3151
-
3152
- scanned = best_score = 0;
3153
- for_each_set_bit(i, ppat->used, ppat->max_entries) {
3154
- unsigned int score;
3155
-
3156
- score = ppat->match(ppat->entries[i].value, value);
3157
- if (score > best_score) {
3158
- entry = &ppat->entries[i];
3159
- if (score == INTEL_PPAT_PERFECT_MATCH) {
3160
- kref_get(&entry->ref);
3161
- return entry;
3162
- }
3163
- best_score = score;
3164
- }
3165
- scanned++;
3166
- }
3167
-
3168
- if (scanned == ppat->max_entries) {
3169
- if (!entry)
3170
- return ERR_PTR(-ENOSPC);
3171
-
3172
- kref_get(&entry->ref);
3173
- return entry;
3174
- }
3175
-
3176
- i = find_first_zero_bit(ppat->used, ppat->max_entries);
3177
- entry = __alloc_ppat_entry(ppat, i, value);
3178
- ppat->update_hw(i915);
3179
- return entry;
3180
-}
3181
-
3182
-static void release_ppat(struct kref *kref)
3183
-{
3184
- struct intel_ppat_entry *entry =
3185
- container_of(kref, struct intel_ppat_entry, ref);
3186
- struct drm_i915_private *i915 = entry->ppat->i915;
3187
-
3188
- __free_ppat_entry(entry);
3189
- entry->ppat->update_hw(i915);
3190
-}
3191
-
3192
-/**
3193
- * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3194
- * @entry: an intel PPAT entry
3195
- *
3196
- * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3197
- * entry is dynamically allocated, its reference count will be decreased. Once
3198
- * the reference count becomes into zero, the PPAT index becomes free again.
3199
- */
3200
-void intel_ppat_put(const struct intel_ppat_entry *entry)
3201
-{
3202
- struct intel_ppat *ppat = entry->ppat;
3203
- unsigned int index = entry - ppat->entries;
3204
-
3205
- GEM_BUG_ON(!ppat->max_entries);
3206
-
3207
- kref_put(&ppat->entries[index].ref, release_ppat);
3208
-}
3209
-
3210
-static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3211
-{
3212
- struct intel_ppat *ppat = &dev_priv->ppat;
3213
- int i;
3214
-
3215
- for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3216
- I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3217
- clear_bit(i, ppat->dirty);
3218
- }
3219
-}
3220
-
3221
-static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3222
-{
3223
- struct intel_ppat *ppat = &dev_priv->ppat;
3224
- u64 pat = 0;
3225
- int i;
3226
-
3227
- for (i = 0; i < ppat->max_entries; i++)
3228
- pat |= GEN8_PPAT(i, ppat->entries[i].value);
3229
-
3230
- bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3231
-
3232
- I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3233
- I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3234
-}
3235
-
3236
-static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3237
-{
3238
- unsigned int score = 0;
3239
- enum {
3240
- AGE_MATCH = BIT(0),
3241
- TC_MATCH = BIT(1),
3242
- CA_MATCH = BIT(2),
3243
- };
3244
-
3245
- /* Cache attribute has to be matched. */
3246
- if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3247
- return 0;
3248
-
3249
- score |= CA_MATCH;
3250
-
3251
- if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3252
- score |= TC_MATCH;
3253
-
3254
- if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3255
- score |= AGE_MATCH;
3256
-
3257
- if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3258
- return INTEL_PPAT_PERFECT_MATCH;
3259
-
3260
- return score;
3261
-}
3262
-
3263
-static unsigned int chv_private_pat_match(u8 src, u8 dst)
3264
-{
3265
- return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3266
- INTEL_PPAT_PERFECT_MATCH : 0;
3267
-}
3268
-
3269
-static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3270
-{
3271
- ppat->max_entries = 8;
3272
- ppat->update_hw = cnl_private_pat_update_hw;
3273
- ppat->match = bdw_private_pat_match;
3274
- ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3275
-
3276
- __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3277
- __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3278
- __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3279
- __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3280
- __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3281
- __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3282
- __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3283
- __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3284
-}
3285
-
3286
-/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3287
- * bits. When using advanced contexts each context stores its own PAT, but
3288
- * writing this data shouldn't be harmful even in those cases. */
3289
-static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3290
-{
3291
- ppat->max_entries = 8;
3292
- ppat->update_hw = bdw_private_pat_update_hw;
3293
- ppat->match = bdw_private_pat_match;
3294
- ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3295
-
3296
- if (!USES_PPGTT(ppat->i915)) {
3297
- /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3298
- * so RTL will always use the value corresponding to
3299
- * pat_sel = 000".
3300
- * So let's disable cache for GGTT to avoid screen corruptions.
3301
- * MOCS still can be used though.
3302
- * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3303
- * before this patch, i.e. the same uncached + snooping access
3304
- * like on gen6/7 seems to be in effect.
3305
- * - So this just fixes blitter/render access. Again it looks
3306
- * like it's not just uncached access, but uncached + snooping.
3307
- * So we can still hold onto all our assumptions wrt cpu
3308
- * clflushing on LLC machines.
3309
- */
3310
- __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3311
- return;
3312
- }
3313
-
3314
- __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */
3315
- __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */
3316
- __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
3317
- __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */
3318
- __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3319
- __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3320
- __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3321
- __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3322
-}
3323
-
3324
-static void chv_setup_private_ppat(struct intel_ppat *ppat)
3325
-{
3326
- ppat->max_entries = 8;
3327
- ppat->update_hw = bdw_private_pat_update_hw;
3328
- ppat->match = chv_private_pat_match;
3329
- ppat->clear_value = CHV_PPAT_SNOOP;
3330
-
3331
- /*
3332
- * Map WB on BDW to snooped on CHV.
3333
- *
3334
- * Only the snoop bit has meaning for CHV, the rest is
3335
- * ignored.
3336
- *
3337
- * The hardware will never snoop for certain types of accesses:
3338
- * - CPU GTT (GMADR->GGTT->no snoop->memory)
3339
- * - PPGTT page tables
3340
- * - some other special cycles
3341
- *
3342
- * As with BDW, we also need to consider the following for GT accesses:
3343
- * "For GGTT, there is NO pat_sel[2:0] from the entry,
3344
- * so RTL will always use the value corresponding to
3345
- * pat_sel = 000".
3346
- * Which means we must set the snoop bit in PAT entry 0
3347
- * in order to keep the global status page working.
3348
- */
3349
-
3350
- __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3351
- __alloc_ppat_entry(ppat, 1, 0);
3352
- __alloc_ppat_entry(ppat, 2, 0);
3353
- __alloc_ppat_entry(ppat, 3, 0);
3354
- __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3355
- __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3356
- __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3357
- __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3358
-}
3359
-
3360
-static void gen6_gmch_remove(struct i915_address_space *vm)
3361
-{
3362
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3363
-
3364
- iounmap(ggtt->gsm);
3365
- cleanup_scratch_page(vm);
3366
-}
3367
-
3368
-static void setup_private_pat(struct drm_i915_private *dev_priv)
3369
-{
3370
- struct intel_ppat *ppat = &dev_priv->ppat;
3371
- int i;
3372
-
3373
- ppat->i915 = dev_priv;
3374
-
3375
- if (INTEL_GEN(dev_priv) >= 10)
3376
- cnl_setup_private_ppat(ppat);
3377
- else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3378
- chv_setup_private_ppat(ppat);
3379
- else
3380
- bdw_setup_private_ppat(ppat);
3381
-
3382
- GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3383
-
3384
- for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3385
- ppat->entries[i].value = ppat->clear_value;
3386
- ppat->entries[i].ppat = ppat;
3387
- set_bit(i, ppat->dirty);
3388
- }
3389
-
3390
- ppat->update_hw(dev_priv);
3391
-}
3392
-
3393
-static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3394
-{
3395
- struct drm_i915_private *dev_priv = ggtt->vm.i915;
3396
- struct pci_dev *pdev = dev_priv->drm.pdev;
3397
- unsigned int size;
3398
- u16 snb_gmch_ctl;
3399
- int err;
3400
-
3401
- /* TODO: We're not aware of mappable constraints on gen8 yet */
3402
- ggtt->gmadr =
3403
- (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3404
- pci_resource_len(pdev, 2));
3405
- ggtt->mappable_end = resource_size(&ggtt->gmadr);
3406
-
3407
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3408
- if (!err)
3409
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3410
- if (err)
3411
- DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3412
-
3413
- pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3414
- if (IS_CHERRYVIEW(dev_priv))
3415
- size = chv_get_total_gtt_size(snb_gmch_ctl);
3416
- else
3417
- size = gen8_get_total_gtt_size(snb_gmch_ctl);
3418
-
3419
- ggtt->vm.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3420
- ggtt->vm.cleanup = gen6_gmch_remove;
3421
- ggtt->vm.insert_page = gen8_ggtt_insert_page;
3422
- ggtt->vm.clear_range = nop_clear_range;
3423
- if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3424
- ggtt->vm.clear_range = gen8_ggtt_clear_range;
3425
-
3426
- ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
3427
-
3428
- /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3429
- if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3430
- ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3431
- ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
3432
- if (ggtt->vm.clear_range != nop_clear_range)
3433
- ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3434
- }
3435
-
3436
- ggtt->invalidate = gen6_ggtt_invalidate;
3437
-
3438
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
3439
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
3440
- ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
3441
- ggtt->vm.vma_ops.clear_pages = clear_pages;
3442
-
3443
- setup_private_pat(dev_priv);
3444
-
3445
- return ggtt_probe_common(ggtt, size);
3446
-}
3447
-
3448
-static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3449
-{
3450
- struct drm_i915_private *dev_priv = ggtt->vm.i915;
3451
- struct pci_dev *pdev = dev_priv->drm.pdev;
3452
- unsigned int size;
3453
- u16 snb_gmch_ctl;
3454
- int err;
3455
-
3456
- ggtt->gmadr =
3457
- (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3458
- pci_resource_len(pdev, 2));
3459
- ggtt->mappable_end = resource_size(&ggtt->gmadr);
3460
-
3461
- /* 64/512MB is the current min/max we actually know of, but this is just
3462
- * a coarse sanity check.
3463
- */
3464
- if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3465
- DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
3466
- return -ENXIO;
3467
- }
3468
-
3469
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3470
- if (!err)
3471
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3472
- if (err)
3473
- DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3474
- pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3475
-
3476
- size = gen6_get_total_gtt_size(snb_gmch_ctl);
3477
- ggtt->vm.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3478
-
3479
- ggtt->vm.clear_range = gen6_ggtt_clear_range;
3480
- ggtt->vm.insert_page = gen6_ggtt_insert_page;
3481
- ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
3482
- ggtt->vm.cleanup = gen6_gmch_remove;
3483
-
3484
- ggtt->invalidate = gen6_ggtt_invalidate;
3485
-
3486
- if (HAS_EDRAM(dev_priv))
3487
- ggtt->vm.pte_encode = iris_pte_encode;
3488
- else if (IS_HASWELL(dev_priv))
3489
- ggtt->vm.pte_encode = hsw_pte_encode;
3490
- else if (IS_VALLEYVIEW(dev_priv))
3491
- ggtt->vm.pte_encode = byt_pte_encode;
3492
- else if (INTEL_GEN(dev_priv) >= 7)
3493
- ggtt->vm.pte_encode = ivb_pte_encode;
3494
- else
3495
- ggtt->vm.pte_encode = snb_pte_encode;
3496
-
3497
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
3498
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
3499
- ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
3500
- ggtt->vm.vma_ops.clear_pages = clear_pages;
3501
-
3502
- return ggtt_probe_common(ggtt, size);
3503
-}
3504
-
3505
-static void i915_gmch_remove(struct i915_address_space *vm)
3506
-{
3507
- intel_gmch_remove();
3508
-}
3509
-
3510
-static int i915_gmch_probe(struct i915_ggtt *ggtt)
3511
-{
3512
- struct drm_i915_private *dev_priv = ggtt->vm.i915;
3513
- phys_addr_t gmadr_base;
3514
- int ret;
3515
-
3516
- ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3517
- if (!ret) {
3518
- DRM_ERROR("failed to set up gmch\n");
3519
- return -EIO;
3520
- }
3521
-
3522
- intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
3523
-
3524
- ggtt->gmadr =
3525
- (struct resource) DEFINE_RES_MEM(gmadr_base,
3526
- ggtt->mappable_end);
3527
-
3528
- ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3529
- ggtt->vm.insert_page = i915_ggtt_insert_page;
3530
- ggtt->vm.insert_entries = i915_ggtt_insert_entries;
3531
- ggtt->vm.clear_range = i915_ggtt_clear_range;
3532
- ggtt->vm.cleanup = i915_gmch_remove;
3533
-
3534
- ggtt->invalidate = gmch_ggtt_invalidate;
3535
-
3536
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
3537
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
3538
- ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
3539
- ggtt->vm.vma_ops.clear_pages = clear_pages;
3540
-
3541
- if (unlikely(ggtt->do_idle_maps))
3542
- DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3543
-
3544
- return 0;
3545
-}
3546
-
3547
-/**
3548
- * i915_ggtt_probe_hw - Probe GGTT hardware location
3549
- * @dev_priv: i915 device
3550
- */
3551
-int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3552
-{
3553
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
3554
- int ret;
3555
-
3556
- ggtt->vm.i915 = dev_priv;
3557
- ggtt->vm.dma = &dev_priv->drm.pdev->dev;
3558
-
3559
- if (INTEL_GEN(dev_priv) <= 5)
3560
- ret = i915_gmch_probe(ggtt);
3561
- else if (INTEL_GEN(dev_priv) < 8)
3562
- ret = gen6_gmch_probe(ggtt);
3563
- else
3564
- ret = gen8_gmch_probe(ggtt);
3565
- if (ret)
3566
- return ret;
3567
-
3568
- /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3569
- * This is easier than doing range restriction on the fly, as we
3570
- * currently don't have any bits spare to pass in this upper
3571
- * restriction!
3572
- */
3573
- if (USES_GUC(dev_priv)) {
3574
- ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP);
3575
- ggtt->mappable_end =
3576
- min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3577
- }
3578
-
3579
- if ((ggtt->vm.total - 1) >> 32) {
3580
- DRM_ERROR("We never expected a Global GTT with more than 32bits"
3581
- " of address space! Found %lldM!\n",
3582
- ggtt->vm.total >> 20);
3583
- ggtt->vm.total = 1ULL << 32;
3584
- ggtt->mappable_end =
3585
- min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3586
- }
3587
-
3588
- if (ggtt->mappable_end > ggtt->vm.total) {
3589
- DRM_ERROR("mappable aperture extends past end of GGTT,"
3590
- " aperture=%pa, total=%llx\n",
3591
- &ggtt->mappable_end, ggtt->vm.total);
3592
- ggtt->mappable_end = ggtt->vm.total;
3593
- }
3594
-
3595
- /* GMADR is the PCI mmio aperture into the global GTT. */
3596
- DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
3597
- DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
3598
- DRM_DEBUG_DRIVER("DSM size = %lluM\n",
3599
- (u64)resource_size(&intel_graphics_stolen_res) >> 20);
3600
- if (intel_vtd_active())
3601
- DRM_INFO("VT-d active for gfx access\n");
3602
-
3603
- return 0;
3604
-}
3605
-
3606
-/**
3607
- * i915_ggtt_init_hw - Initialize GGTT hardware
3608
- * @dev_priv: i915 device
3609
- */
3610
-int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3611
-{
3612
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
3613
- int ret;
3614
-
3615
- stash_init(&dev_priv->mm.wc_stash);
3616
-
3617
- /* Note that we use page colouring to enforce a guard page at the
3618
- * end of the address space. This is required as the CS may prefetch
3619
- * beyond the end of the batch buffer, across the page boundary,
3620
- * and beyond the end of the GTT if we do not provide a guard.
3621
- */
3622
- mutex_lock(&dev_priv->drm.struct_mutex);
3623
- i915_address_space_init(&ggtt->vm, dev_priv);
3624
-
3625
- /* Only VLV supports read-only GGTT mappings */
3626
- ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
3627
-
3628
- if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
3629
- ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
3630
- mutex_unlock(&dev_priv->drm.struct_mutex);
3631
-
3632
- if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
3633
- dev_priv->ggtt.gmadr.start,
3634
- dev_priv->ggtt.mappable_end)) {
3635
- ret = -EIO;
3636
- goto out_gtt_cleanup;
3637
- }
3638
-
3639
- ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
3640
-
3641
- /*
3642
- * Initialise stolen early so that we may reserve preallocated
3643
- * objects for the BIOS to KMS transition.
3644
- */
3645
- ret = i915_gem_init_stolen(dev_priv);
3646
- if (ret)
3647
- goto out_gtt_cleanup;
3648
-
3649
- return 0;
3650
-
3651
-out_gtt_cleanup:
3652
- ggtt->vm.cleanup(&ggtt->vm);
3653
- return ret;
3654
-}
3655
-
3656
-int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3657
-{
3658
- if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3659
- return -EIO;
3660
-
3661
- return 0;
3662
-}
3663
-
3664
-void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3665
-{
3666
- GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3667
-
3668
- i915->ggtt.invalidate = guc_ggtt_invalidate;
3669
-
3670
- i915_ggtt_invalidate(i915);
3671
-}
3672
-
3673
-void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3674
-{
3675
- /* We should only be called after i915_ggtt_enable_guc() */
3676
- GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3677
-
3678
- i915->ggtt.invalidate = gen6_ggtt_invalidate;
3679
-
3680
- i915_ggtt_invalidate(i915);
3681
-}
3682
-
3683
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3684
-{
3685
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
3686
- struct i915_vma *vma, *vn;
3687
-
3688
- i915_check_and_clear_faults(dev_priv);
3689
-
3690
- /* First fill our portion of the GTT with scratch pages */
3691
- ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
3692
-
3693
- ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
3694
-
3695
- /* clflush objects bound into the GGTT and rebind them. */
3696
- GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
3697
- list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
3698
- struct drm_i915_gem_object *obj = vma->obj;
3699
-
3700
- if (!(vma->flags & I915_VMA_GLOBAL_BIND))
3701
- continue;
3702
-
3703
- if (!i915_vma_unbind(vma))
3704
- continue;
3705
-
3706
- WARN_ON(i915_vma_bind(vma,
3707
- obj ? obj->cache_level : 0,
3708
- PIN_UPDATE));
3709
- if (obj)
3710
- WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3711
- }
3712
-
3713
- ggtt->vm.closed = false;
3714
- i915_ggtt_invalidate(dev_priv);
3715
-
3716
- if (INTEL_GEN(dev_priv) >= 8) {
3717
- struct intel_ppat *ppat = &dev_priv->ppat;
3718
-
3719
- bitmap_set(ppat->dirty, 0, ppat->max_entries);
3720
- dev_priv->ppat.update_hw(dev_priv);
3721
- return;
3722
- }
3723
-}
3724
-
3725
-static struct scatterlist *
3726
-rotate_pages(const dma_addr_t *in, unsigned int offset,
3727
- unsigned int width, unsigned int height,
3728
- unsigned int stride,
3729
- struct sg_table *st, struct scatterlist *sg)
3730
-{
3731
- unsigned int column, row;
3732
- unsigned int src_idx;
3733
-
3734
- for (column = 0; column < width; column++) {
3735
- src_idx = stride * (height - 1) + column;
3736
- for (row = 0; row < height; row++) {
3737
- st->nents++;
3738
- /* We don't need the pages, but need to initialize
3739
- * the entries so the sg list can be happily traversed.
3740
- * The only thing we need are DMA addresses.
3741
- */
3742
- sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
3743
- sg_dma_address(sg) = in[offset + src_idx];
3744
- sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
3745
- sg = sg_next(sg);
3746
- src_idx -= stride;
3747
- }
3748
- }
3749
-
3750
- return sg;
3751
-}
3752
-
3753
-static noinline struct sg_table *
3754
-intel_rotate_pages(struct intel_rotation_info *rot_info,
3755
- struct drm_i915_gem_object *obj)
3756
-{
3757
- const unsigned long n_pages = obj->base.size / I915_GTT_PAGE_SIZE;
3758
- unsigned int size = intel_rotation_info_size(rot_info);
3759
- struct sgt_iter sgt_iter;
3760
- dma_addr_t dma_addr;
3761
- unsigned long i;
3762
- dma_addr_t *page_addr_list;
3763
- struct sg_table *st;
3764
- struct scatterlist *sg;
3765
- int ret = -ENOMEM;
3766
-
3767
- /* Allocate a temporary list of source pages for random access. */
3768
- page_addr_list = kvmalloc_array(n_pages,
3769
- sizeof(dma_addr_t),
3770
- GFP_KERNEL);
3771
- if (!page_addr_list)
3772
- return ERR_PTR(ret);
3773
-
3774
- /* Allocate target SG list. */
3775
- st = kmalloc(sizeof(*st), GFP_KERNEL);
3776
- if (!st)
3777
- goto err_st_alloc;
3778
-
3779
- ret = sg_alloc_table(st, size, GFP_KERNEL);
3780
- if (ret)
3781
- goto err_sg_alloc;
3782
-
3783
- /* Populate source page list from the object. */
3784
- i = 0;
3785
- for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
3786
- page_addr_list[i++] = dma_addr;
3787
-
3788
- GEM_BUG_ON(i != n_pages);
3789
- st->nents = 0;
3790
- sg = st->sgl;
3791
-
3792
- for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3793
- sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3794
- rot_info->plane[i].width, rot_info->plane[i].height,
3795
- rot_info->plane[i].stride, st, sg);
3796
- }
3797
-
3798
- kvfree(page_addr_list);
3799
-
3800
- return st;
3801
-
3802
-err_sg_alloc:
3803
- kfree(st);
3804
-err_st_alloc:
3805
- kvfree(page_addr_list);
3806
-
3807
- DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3808
- obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3809
-
3810
- return ERR_PTR(ret);
3811
-}
3812
-
3813
-static noinline struct sg_table *
3814
-intel_partial_pages(const struct i915_ggtt_view *view,
3815
- struct drm_i915_gem_object *obj)
3816
-{
3817
- struct sg_table *st;
3818
- struct scatterlist *sg, *iter;
3819
- unsigned int count = view->partial.size;
3820
- unsigned int offset;
3821
- int ret = -ENOMEM;
3822
-
3823
- st = kmalloc(sizeof(*st), GFP_KERNEL);
3824
- if (!st)
3825
- goto err_st_alloc;
3826
-
3827
- ret = sg_alloc_table(st, count, GFP_KERNEL);
3828
- if (ret)
3829
- goto err_sg_alloc;
3830
-
3831
- iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3832
- GEM_BUG_ON(!iter);
3833
-
3834
- sg = st->sgl;
3835
- st->nents = 0;
3836
- do {
3837
- unsigned int len;
3838
-
3839
- len = min(iter->length - (offset << PAGE_SHIFT),
3840
- count << PAGE_SHIFT);
3841
- sg_set_page(sg, NULL, len, 0);
3842
- sg_dma_address(sg) =
3843
- sg_dma_address(iter) + (offset << PAGE_SHIFT);
3844
- sg_dma_len(sg) = len;
3845
-
3846
- st->nents++;
3847
- count -= len >> PAGE_SHIFT;
3848
- if (count == 0) {
3849
- sg_mark_end(sg);
3850
- return st;
3851
- }
3852
-
3853
- sg = __sg_next(sg);
3854
- iter = __sg_next(iter);
3855
- offset = 0;
3856
- } while (1);
3857
-
3858
-err_sg_alloc:
3859
- kfree(st);
3860
-err_st_alloc:
3861
- return ERR_PTR(ret);
3862
-}
3863
-
3864
-static int
3865
-i915_get_ggtt_vma_pages(struct i915_vma *vma)
3866
-{
3867
- int ret;
3868
-
3869
- /* The vma->pages are only valid within the lifespan of the borrowed
3870
- * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3871
- * must be the vma->pages. A simple rule is that vma->pages must only
3872
- * be accessed when the obj->mm.pages are pinned.
3873
- */
3874
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3875
-
3876
- switch (vma->ggtt_view.type) {
3877
- default:
3878
- GEM_BUG_ON(vma->ggtt_view.type);
3879
- /* fall through */
3880
- case I915_GGTT_VIEW_NORMAL:
3881
- vma->pages = vma->obj->mm.pages;
3882
- return 0;
3883
-
3884
- case I915_GGTT_VIEW_ROTATED:
3885
- vma->pages =
3886
- intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3887
- break;
3888
-
3889
- case I915_GGTT_VIEW_PARTIAL:
3890
- vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3891
- break;
3892
- }
3893
-
3894
- ret = 0;
3895
- if (unlikely(IS_ERR(vma->pages))) {
3896
- ret = PTR_ERR(vma->pages);
3897
- vma->pages = NULL;
3898
- DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3899
- vma->ggtt_view.type, ret);
3900
- }
3901
- return ret;
390274 }
390375
390476 /**
....@@ -3937,7 +109,7 @@
3937109 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3938110 GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3939111 GEM_BUG_ON(range_overflows(offset, size, vm->total));
3940
- GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
112
+ GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
3941113 GEM_BUG_ON(drm_mm_node_allocated(node));
3942114
3943115 node->size = size;
....@@ -4026,7 +198,8 @@
4026198 u64 offset;
4027199 int err;
4028200
4029
- lockdep_assert_held(&vm->i915->drm.struct_mutex);
201
+ lockdep_assert_held(&vm->mutex);
202
+
4030203 GEM_BUG_ON(!size);
4031204 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
4032205 GEM_BUG_ON(alignment && !is_power_of_2(alignment));
....@@ -4034,7 +207,7 @@
4034207 GEM_BUG_ON(start >= end);
4035208 GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
4036209 GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
4037
- GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
210
+ GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
4038211 GEM_BUG_ON(drm_mm_node_allocated(node));
4039212
4040213 if (unlikely(range_overflows(start, size, end)))
....@@ -4077,7 +250,8 @@
4077250 if (flags & PIN_NOEVICT)
4078251 return -ENOSPC;
4079252
4080
- /* No free space, pick a slot at random.
253
+ /*
254
+ * No free space, pick a slot at random.
4081255 *
4082256 * There is a pathological case here using a GTT shared between
4083257 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
....@@ -4105,6 +279,9 @@
4105279 if (err != -ENOSPC)
4106280 return err;
4107281
282
+ if (flags & PIN_NOSEARCH)
283
+ return -ENOSPC;
284
+
4108285 /* Randomly selected placement is pinned, do a search */
4109286 err = i915_gem_evict_something(vm, size, alignment, color,
4110287 start, end, flags);
....@@ -4117,6 +294,5 @@
4117294 }
4118295
4119296 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4120
-#include "selftests/mock_gtt.c"
4121297 #include "selftests/i915_gem_gtt.c"
4122298 #endif