forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-11 04dd17822334871b23ea2862f7798fb0e0007777
kernel/drivers/gpu/drm/i915/i915_gem.c
....@@ -25,247 +25,83 @@
2525 *
2626 */
2727
28
-#include <drm/drmP.h>
2928 #include <drm/drm_vma_manager.h>
30
-#include <drm/i915_drm.h>
31
-#include "i915_drv.h"
32
-#include "i915_gem_clflush.h"
33
-#include "i915_vgpu.h"
34
-#include "i915_trace.h"
35
-#include "intel_drv.h"
36
-#include "intel_frontbuffer.h"
37
-#include "intel_mocs.h"
38
-#include "intel_workarounds.h"
39
-#include "i915_gemfs.h"
4029 #include <linux/dma-fence-array.h>
4130 #include <linux/kthread.h>
42
-#include <linux/reservation.h>
31
+#include <linux/dma-resv.h>
4332 #include <linux/shmem_fs.h>
4433 #include <linux/slab.h>
4534 #include <linux/stop_machine.h>
4635 #include <linux/swap.h>
4736 #include <linux/pci.h>
4837 #include <linux/dma-buf.h>
38
+#include <linux/mman.h>
4939
50
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
40
+#include "display/intel_display.h"
41
+#include "display/intel_frontbuffer.h"
5142
52
-static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
53
-{
54
- if (obj->cache_dirty)
55
- return false;
43
+#include "gem/i915_gem_clflush.h"
44
+#include "gem/i915_gem_context.h"
45
+#include "gem/i915_gem_ioctls.h"
46
+#include "gem/i915_gem_mman.h"
47
+#include "gem/i915_gem_region.h"
48
+#include "gt/intel_engine_user.h"
49
+#include "gt/intel_gt.h"
50
+#include "gt/intel_gt_pm.h"
51
+#include "gt/intel_workarounds.h"
5652
57
- if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
58
- return true;
53
+#include "i915_drv.h"
54
+#include "i915_trace.h"
55
+#include "i915_vgpu.h"
5956
60
- return obj->pin_global; /* currently in use by HW, keep flushed */
61
-}
57
+#include "intel_pm.h"
6258
6359 static int
64
-insert_mappable_node(struct i915_ggtt *ggtt,
65
- struct drm_mm_node *node, u32 size)
60
+insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size)
6661 {
62
+ int err;
63
+
64
+ err = mutex_lock_interruptible(&ggtt->vm.mutex);
65
+ if (err)
66
+ return err;
67
+
6768 memset(node, 0, sizeof(*node));
68
- return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
69
- size, 0, I915_COLOR_UNEVICTABLE,
70
- 0, ggtt->mappable_end,
71
- DRM_MM_INSERT_LOW);
69
+ err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
70
+ size, 0, I915_COLOR_UNEVICTABLE,
71
+ 0, ggtt->mappable_end,
72
+ DRM_MM_INSERT_LOW);
73
+
74
+ mutex_unlock(&ggtt->vm.mutex);
75
+
76
+ return err;
7277 }
7378
7479 static void
75
-remove_mappable_node(struct drm_mm_node *node)
80
+remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node)
7681 {
82
+ mutex_lock(&ggtt->vm.mutex);
7783 drm_mm_remove_node(node);
78
-}
79
-
80
-/* some bookkeeping */
81
-static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
82
- u64 size)
83
-{
84
- spin_lock(&dev_priv->mm.object_stat_lock);
85
- dev_priv->mm.object_count++;
86
- dev_priv->mm.object_memory += size;
87
- spin_unlock(&dev_priv->mm.object_stat_lock);
88
-}
89
-
90
-static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
91
- u64 size)
92
-{
93
- spin_lock(&dev_priv->mm.object_stat_lock);
94
- dev_priv->mm.object_count--;
95
- dev_priv->mm.object_memory -= size;
96
- spin_unlock(&dev_priv->mm.object_stat_lock);
97
-}
98
-
99
-static int
100
-i915_gem_wait_for_error(struct i915_gpu_error *error)
101
-{
102
- int ret;
103
-
104
- might_sleep();
105
-
106
- /*
107
- * Only wait 10 seconds for the gpu reset to complete to avoid hanging
108
- * userspace. If it takes that long something really bad is going on and
109
- * we should simply try to bail out and fail as gracefully as possible.
110
- */
111
- ret = wait_event_interruptible_timeout(error->reset_queue,
112
- !i915_reset_backoff(error),
113
- I915_RESET_TIMEOUT);
114
- if (ret == 0) {
115
- DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
116
- return -EIO;
117
- } else if (ret < 0) {
118
- return ret;
119
- } else {
120
- return 0;
121
- }
122
-}
123
-
124
-int i915_mutex_lock_interruptible(struct drm_device *dev)
125
-{
126
- struct drm_i915_private *dev_priv = to_i915(dev);
127
- int ret;
128
-
129
- ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
130
- if (ret)
131
- return ret;
132
-
133
- ret = mutex_lock_interruptible(&dev->struct_mutex);
134
- if (ret)
135
- return ret;
136
-
137
- return 0;
138
-}
139
-
140
-static u32 __i915_gem_park(struct drm_i915_private *i915)
141
-{
142
- GEM_TRACE("\n");
143
-
144
- lockdep_assert_held(&i915->drm.struct_mutex);
145
- GEM_BUG_ON(i915->gt.active_requests);
146
- GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
147
-
148
- if (!i915->gt.awake)
149
- return I915_EPOCH_INVALID;
150
-
151
- GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
152
-
153
- /*
154
- * Be paranoid and flush a concurrent interrupt to make sure
155
- * we don't reactivate any irq tasklets after parking.
156
- *
157
- * FIXME: Note that even though we have waited for execlists to be idle,
158
- * there may still be an in-flight interrupt even though the CSB
159
- * is now empty. synchronize_irq() makes sure that a residual interrupt
160
- * is completed before we continue, but it doesn't prevent the HW from
161
- * raising a spurious interrupt later. To complete the shield we should
162
- * coordinate disabling the CS irq with flushing the interrupts.
163
- */
164
- synchronize_irq(i915->drm.irq);
165
-
166
- intel_engines_park(i915);
167
- i915_timelines_park(i915);
168
-
169
- i915_pmu_gt_parked(i915);
170
- i915_vma_parked(i915);
171
-
172
- i915->gt.awake = false;
173
-
174
- if (INTEL_GEN(i915) >= 6)
175
- gen6_rps_idle(i915);
176
-
177
- if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
178
- i915_rc6_ctx_wa_check(i915);
179
- intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
180
- }
181
-
182
- intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
183
-
184
- intel_runtime_pm_put(i915);
185
-
186
- return i915->gt.epoch;
187
-}
188
-
189
-void i915_gem_park(struct drm_i915_private *i915)
190
-{
191
- GEM_TRACE("\n");
192
-
193
- lockdep_assert_held(&i915->drm.struct_mutex);
194
- GEM_BUG_ON(i915->gt.active_requests);
195
-
196
- if (!i915->gt.awake)
197
- return;
198
-
199
- /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
200
- mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
201
-}
202
-
203
-void i915_gem_unpark(struct drm_i915_private *i915)
204
-{
205
- GEM_TRACE("\n");
206
-
207
- lockdep_assert_held(&i915->drm.struct_mutex);
208
- GEM_BUG_ON(!i915->gt.active_requests);
209
-
210
- if (i915->gt.awake)
211
- return;
212
-
213
- intel_runtime_pm_get_noresume(i915);
214
-
215
- /*
216
- * It seems that the DMC likes to transition between the DC states a lot
217
- * when there are no connected displays (no active power domains) during
218
- * command submission.
219
- *
220
- * This activity has negative impact on the performance of the chip with
221
- * huge latencies observed in the interrupt handler and elsewhere.
222
- *
223
- * Work around it by grabbing a GT IRQ power domain whilst there is any
224
- * GT activity, preventing any DC state transitions.
225
- */
226
- intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
227
-
228
- if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
229
- intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
230
-
231
- i915->gt.awake = true;
232
- if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
233
- i915->gt.epoch = 1;
234
-
235
- intel_enable_gt_powersave(i915);
236
- i915_update_gfx_val(i915);
237
- if (INTEL_GEN(i915) >= 6)
238
- gen6_rps_busy(i915);
239
- i915_pmu_gt_unparked(i915);
240
-
241
- intel_engines_unpark(i915);
242
-
243
- i915_queue_hangcheck(i915);
244
-
245
- queue_delayed_work(i915->wq,
246
- &i915->gt.retire_work,
247
- round_jiffies_up_relative(HZ));
84
+ mutex_unlock(&ggtt->vm.mutex);
24885 }
24986
25087 int
25188 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
25289 struct drm_file *file)
25390 {
254
- struct drm_i915_private *dev_priv = to_i915(dev);
255
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
91
+ struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
25692 struct drm_i915_gem_get_aperture *args = data;
25793 struct i915_vma *vma;
25894 u64 pinned;
25995
96
+ if (mutex_lock_interruptible(&ggtt->vm.mutex))
97
+ return -EINTR;
98
+
26099 pinned = ggtt->vm.reserved;
261
- mutex_lock(&dev->struct_mutex);
262
- list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
100
+ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
263101 if (i915_vma_is_pinned(vma))
264102 pinned += vma->node.size;
265
- list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
266
- if (i915_vma_is_pinned(vma))
267
- pinned += vma->node.size;
268
- mutex_unlock(&dev->struct_mutex);
103
+
104
+ mutex_unlock(&ggtt->vm.mutex);
269105
270106 args->aper_size = ggtt->vm.total;
271107 args->aper_available_size = args->aper_size - pinned;
....@@ -273,465 +109,97 @@
273109 return 0;
274110 }
275111
276
-static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
112
+int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
113
+ unsigned long flags)
277114 {
278
- struct address_space *mapping = obj->base.filp->f_mapping;
279
- drm_dma_handle_t *phys;
280
- struct sg_table *st;
281
- struct scatterlist *sg;
282
- char *vaddr;
283
- int i;
284
- int err;
285
-
286
- if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
287
- return -EINVAL;
288
-
289
- /* Always aligning to the object size, allows a single allocation
290
- * to handle all possible callers, and given typical object sizes,
291
- * the alignment of the buddy allocation will naturally match.
292
- */
293
- phys = drm_pci_alloc(obj->base.dev,
294
- roundup_pow_of_two(obj->base.size),
295
- roundup_pow_of_two(obj->base.size));
296
- if (!phys)
297
- return -ENOMEM;
298
-
299
- vaddr = phys->vaddr;
300
- for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
301
- struct page *page;
302
- char *src;
303
-
304
- page = shmem_read_mapping_page(mapping, i);
305
- if (IS_ERR(page)) {
306
- err = PTR_ERR(page);
307
- goto err_phys;
308
- }
309
-
310
- src = kmap_atomic(page);
311
- memcpy(vaddr, src, PAGE_SIZE);
312
- drm_clflush_virt_range(vaddr, PAGE_SIZE);
313
- kunmap_atomic(src);
314
-
315
- put_page(page);
316
- vaddr += PAGE_SIZE;
317
- }
318
-
319
- i915_gem_chipset_flush(to_i915(obj->base.dev));
320
-
321
- st = kmalloc(sizeof(*st), GFP_KERNEL);
322
- if (!st) {
323
- err = -ENOMEM;
324
- goto err_phys;
325
- }
326
-
327
- if (sg_alloc_table(st, 1, GFP_KERNEL)) {
328
- kfree(st);
329
- err = -ENOMEM;
330
- goto err_phys;
331
- }
332
-
333
- sg = st->sgl;
334
- sg->offset = 0;
335
- sg->length = obj->base.size;
336
-
337
- sg_dma_address(sg) = phys->busaddr;
338
- sg_dma_len(sg) = obj->base.size;
339
-
340
- obj->phys_handle = phys;
341
-
342
- __i915_gem_object_set_pages(obj, st, sg->length);
343
-
344
- return 0;
345
-
346
-err_phys:
347
- drm_pci_free(obj->base.dev, phys);
348
-
349
- return err;
350
-}
351
-
352
-static void __start_cpu_write(struct drm_i915_gem_object *obj)
353
-{
354
- obj->read_domains = I915_GEM_DOMAIN_CPU;
355
- obj->write_domain = I915_GEM_DOMAIN_CPU;
356
- if (cpu_write_needs_clflush(obj))
357
- obj->cache_dirty = true;
358
-}
359
-
360
-static void
361
-__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
362
- struct sg_table *pages,
363
- bool needs_clflush)
364
-{
365
- GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
366
-
367
- if (obj->mm.madv == I915_MADV_DONTNEED)
368
- obj->mm.dirty = false;
369
-
370
- if (needs_clflush &&
371
- (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
372
- !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
373
- drm_clflush_sg(pages);
374
-
375
- __start_cpu_write(obj);
376
-}
377
-
378
-static void
379
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
380
- struct sg_table *pages)
381
-{
382
- __i915_gem_object_release_shmem(obj, pages, false);
383
-
384
- if (obj->mm.dirty) {
385
- struct address_space *mapping = obj->base.filp->f_mapping;
386
- char *vaddr = obj->phys_handle->vaddr;
387
- int i;
388
-
389
- for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
390
- struct page *page;
391
- char *dst;
392
-
393
- page = shmem_read_mapping_page(mapping, i);
394
- if (IS_ERR(page))
395
- continue;
396
-
397
- dst = kmap_atomic(page);
398
- drm_clflush_virt_range(vaddr, PAGE_SIZE);
399
- memcpy(dst, vaddr, PAGE_SIZE);
400
- kunmap_atomic(dst);
401
-
402
- set_page_dirty(page);
403
- if (obj->mm.madv == I915_MADV_WILLNEED)
404
- mark_page_accessed(page);
405
- put_page(page);
406
- vaddr += PAGE_SIZE;
407
- }
408
- obj->mm.dirty = false;
409
- }
410
-
411
- sg_free_table(pages);
412
- kfree(pages);
413
-
414
- drm_pci_free(obj->base.dev, obj->phys_handle);
415
-}
416
-
417
-static void
418
-i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
419
-{
420
- i915_gem_object_unpin_pages(obj);
421
-}
422
-
423
-static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
424
- .get_pages = i915_gem_object_get_pages_phys,
425
- .put_pages = i915_gem_object_put_pages_phys,
426
- .release = i915_gem_object_release_phys,
427
-};
428
-
429
-static const struct drm_i915_gem_object_ops i915_gem_object_ops;
430
-
431
-int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
432
-{
433
- struct i915_vma *vma;
115
+ struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm;
434116 LIST_HEAD(still_in_list);
117
+ intel_wakeref_t wakeref;
118
+ struct i915_vma *vma;
435119 int ret;
436120
437
- lockdep_assert_held(&obj->base.dev->struct_mutex);
121
+ if (list_empty(&obj->vma.list))
122
+ return 0;
438123
439
- /* Closed vma are removed from the obj->vma_list - but they may
440
- * still have an active binding on the object. To remove those we
441
- * must wait for all rendering to complete to the object (as unbinding
442
- * must anyway), and retire the requests.
124
+ /*
125
+ * As some machines use ACPI to handle runtime-resume callbacks, and
126
+ * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex
127
+ * as they are required by the shrinker. Ergo, we wake the device up
128
+ * first just in case.
443129 */
444
- ret = i915_gem_object_set_to_cpu_domain(obj, false);
445
- if (ret)
446
- return ret;
130
+ wakeref = intel_runtime_pm_get(rpm);
447131
448
- while ((vma = list_first_entry_or_null(&obj->vma_list,
449
- struct i915_vma,
450
- obj_link))) {
132
+try_again:
133
+ ret = 0;
134
+ spin_lock(&obj->vma.lock);
135
+ while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
136
+ struct i915_vma,
137
+ obj_link))) {
138
+ struct i915_address_space *vm = vma->vm;
139
+
451140 list_move_tail(&vma->obj_link, &still_in_list);
452
- ret = i915_vma_unbind(vma);
453
- if (ret)
141
+ if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
142
+ continue;
143
+
144
+ if (flags & I915_GEM_OBJECT_UNBIND_TEST) {
145
+ ret = -EBUSY;
454146 break;
147
+ }
148
+
149
+ ret = -EAGAIN;
150
+ if (!i915_vm_tryopen(vm))
151
+ break;
152
+
153
+ /* Prevent vma being freed by i915_vma_parked as we unbind */
154
+ vma = __i915_vma_get(vma);
155
+ spin_unlock(&obj->vma.lock);
156
+
157
+ if (vma) {
158
+ ret = -EBUSY;
159
+ if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
160
+ !i915_vma_is_active(vma))
161
+ ret = i915_vma_unbind(vma);
162
+
163
+ __i915_vma_put(vma);
164
+ }
165
+
166
+ i915_vm_close(vm);
167
+ spin_lock(&obj->vma.lock);
455168 }
456
- list_splice(&still_in_list, &obj->vma_list);
169
+ list_splice_init(&still_in_list, &obj->vma.list);
170
+ spin_unlock(&obj->vma.lock);
171
+
172
+ if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) {
173
+ rcu_barrier(); /* flush the i915_vm_release() */
174
+ goto try_again;
175
+ }
176
+
177
+ intel_runtime_pm_put(rpm, wakeref);
457178
458179 return ret;
459180 }
460181
461
-static long
462
-i915_gem_object_wait_fence(struct dma_fence *fence,
463
- unsigned int flags,
464
- long timeout,
465
- struct intel_rps_client *rps_client)
466
-{
467
- struct i915_request *rq;
468
-
469
- BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
470
-
471
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
472
- return timeout;
473
-
474
- if (!dma_fence_is_i915(fence))
475
- return dma_fence_wait_timeout(fence,
476
- flags & I915_WAIT_INTERRUPTIBLE,
477
- timeout);
478
-
479
- rq = to_request(fence);
480
- if (i915_request_completed(rq))
481
- goto out;
482
-
483
- /*
484
- * This client is about to stall waiting for the GPU. In many cases
485
- * this is undesirable and limits the throughput of the system, as
486
- * many clients cannot continue processing user input/output whilst
487
- * blocked. RPS autotuning may take tens of milliseconds to respond
488
- * to the GPU load and thus incurs additional latency for the client.
489
- * We can circumvent that by promoting the GPU frequency to maximum
490
- * before we wait. This makes the GPU throttle up much more quickly
491
- * (good for benchmarks and user experience, e.g. window animations),
492
- * but at a cost of spending more power processing the workload
493
- * (bad for battery). Not all clients even want their results
494
- * immediately and for them we should just let the GPU select its own
495
- * frequency to maximise efficiency. To prevent a single client from
496
- * forcing the clocks too high for the whole system, we only allow
497
- * each client to waitboost once in a busy period.
498
- */
499
- if (rps_client && !i915_request_started(rq)) {
500
- if (INTEL_GEN(rq->i915) >= 6)
501
- gen6_rps_boost(rq, rps_client);
502
- }
503
-
504
- timeout = i915_request_wait(rq, flags, timeout);
505
-
506
-out:
507
- if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
508
- i915_request_retire_upto(rq);
509
-
510
- return timeout;
511
-}
512
-
513
-static long
514
-i915_gem_object_wait_reservation(struct reservation_object *resv,
515
- unsigned int flags,
516
- long timeout,
517
- struct intel_rps_client *rps_client)
518
-{
519
- unsigned int seq = read_seqbegin(&resv->seq);
520
- struct dma_fence *excl;
521
- bool prune_fences = false;
522
-
523
- if (flags & I915_WAIT_ALL) {
524
- struct dma_fence **shared;
525
- unsigned int count, i;
526
- int ret;
527
-
528
- ret = reservation_object_get_fences_rcu(resv,
529
- &excl, &count, &shared);
530
- if (ret)
531
- return ret;
532
-
533
- for (i = 0; i < count; i++) {
534
- timeout = i915_gem_object_wait_fence(shared[i],
535
- flags, timeout,
536
- rps_client);
537
- if (timeout < 0)
538
- break;
539
-
540
- dma_fence_put(shared[i]);
541
- }
542
-
543
- for (; i < count; i++)
544
- dma_fence_put(shared[i]);
545
- kfree(shared);
546
-
547
- /*
548
- * If both shared fences and an exclusive fence exist,
549
- * then by construction the shared fences must be later
550
- * than the exclusive fence. If we successfully wait for
551
- * all the shared fences, we know that the exclusive fence
552
- * must all be signaled. If all the shared fences are
553
- * signaled, we can prune the array and recover the
554
- * floating references on the fences/requests.
555
- */
556
- prune_fences = count && timeout >= 0;
557
- } else {
558
- excl = reservation_object_get_excl_rcu(resv);
559
- }
560
-
561
- if (excl && timeout >= 0)
562
- timeout = i915_gem_object_wait_fence(excl, flags, timeout,
563
- rps_client);
564
-
565
- dma_fence_put(excl);
566
-
567
- /*
568
- * Opportunistically prune the fences iff we know they have *all* been
569
- * signaled and that the reservation object has not been changed (i.e.
570
- * no new fences have been added).
571
- */
572
- if (prune_fences && !read_seqretry(&resv->seq, seq)) {
573
- if (reservation_object_trylock(resv)) {
574
- if (!read_seqretry(&resv->seq, seq))
575
- reservation_object_add_excl_fence(resv, NULL);
576
- reservation_object_unlock(resv);
577
- }
578
- }
579
-
580
- return timeout;
581
-}
582
-
583
-static void __fence_set_priority(struct dma_fence *fence,
584
- const struct i915_sched_attr *attr)
585
-{
586
- struct i915_request *rq;
587
- struct intel_engine_cs *engine;
588
-
589
- if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
590
- return;
591
-
592
- rq = to_request(fence);
593
- engine = rq->engine;
594
-
595
- local_bh_disable();
596
- rcu_read_lock(); /* RCU serialisation for set-wedged protection */
597
- if (engine->schedule)
598
- engine->schedule(rq, attr);
599
- rcu_read_unlock();
600
- local_bh_enable(); /* kick the tasklets if queues were reprioritised */
601
-}
602
-
603
-static void fence_set_priority(struct dma_fence *fence,
604
- const struct i915_sched_attr *attr)
605
-{
606
- /* Recurse once into a fence-array */
607
- if (dma_fence_is_array(fence)) {
608
- struct dma_fence_array *array = to_dma_fence_array(fence);
609
- int i;
610
-
611
- for (i = 0; i < array->num_fences; i++)
612
- __fence_set_priority(array->fences[i], attr);
613
- } else {
614
- __fence_set_priority(fence, attr);
615
- }
616
-}
617
-
618
-int
619
-i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
620
- unsigned int flags,
621
- const struct i915_sched_attr *attr)
622
-{
623
- struct dma_fence *excl;
624
-
625
- if (flags & I915_WAIT_ALL) {
626
- struct dma_fence **shared;
627
- unsigned int count, i;
628
- int ret;
629
-
630
- ret = reservation_object_get_fences_rcu(obj->resv,
631
- &excl, &count, &shared);
632
- if (ret)
633
- return ret;
634
-
635
- for (i = 0; i < count; i++) {
636
- fence_set_priority(shared[i], attr);
637
- dma_fence_put(shared[i]);
638
- }
639
-
640
- kfree(shared);
641
- } else {
642
- excl = reservation_object_get_excl_rcu(obj->resv);
643
- }
644
-
645
- if (excl) {
646
- fence_set_priority(excl, attr);
647
- dma_fence_put(excl);
648
- }
649
- return 0;
650
-}
651
-
652
-/**
653
- * Waits for rendering to the object to be completed
654
- * @obj: i915 gem object
655
- * @flags: how to wait (under a lock, for all rendering or just for writes etc)
656
- * @timeout: how long to wait
657
- * @rps_client: client (user process) to charge for any waitboosting
658
- */
659
-int
660
-i915_gem_object_wait(struct drm_i915_gem_object *obj,
661
- unsigned int flags,
662
- long timeout,
663
- struct intel_rps_client *rps_client)
664
-{
665
- might_sleep();
666
-#if IS_ENABLED(CONFIG_LOCKDEP)
667
- GEM_BUG_ON(debug_locks &&
668
- !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
669
- !!(flags & I915_WAIT_LOCKED));
670
-#endif
671
- GEM_BUG_ON(timeout < 0);
672
-
673
- timeout = i915_gem_object_wait_reservation(obj->resv,
674
- flags, timeout,
675
- rps_client);
676
- return timeout < 0 ? timeout : 0;
677
-}
678
-
679
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
680
-{
681
- struct drm_i915_file_private *fpriv = file->driver_priv;
682
-
683
- return &fpriv->rps_client;
684
-}
685
-
686
-static int
687
-i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
688
- struct drm_i915_gem_pwrite *args,
689
- struct drm_file *file)
690
-{
691
- void *vaddr = obj->phys_handle->vaddr + args->offset;
692
- char __user *user_data = u64_to_user_ptr(args->data_ptr);
693
-
694
- /* We manually control the domain here and pretend that it
695
- * remains coherent i.e. in the GTT domain, like shmem_pwrite.
696
- */
697
- intel_fb_obj_invalidate(obj, ORIGIN_CPU);
698
- if (copy_from_user(vaddr, user_data, args->size))
699
- return -EFAULT;
700
-
701
- drm_clflush_virt_range(vaddr, args->size);
702
- i915_gem_chipset_flush(to_i915(obj->base.dev));
703
-
704
- intel_fb_obj_flush(obj, ORIGIN_CPU);
705
- return 0;
706
-}
707
-
708
-void *i915_gem_object_alloc(struct drm_i915_private *dev_priv)
709
-{
710
- return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
711
-}
712
-
713
-void i915_gem_object_free(struct drm_i915_gem_object *obj)
714
-{
715
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
716
- kmem_cache_free(dev_priv->objects, obj);
717
-}
718
-
719182 static int
720183 i915_gem_create(struct drm_file *file,
721
- struct drm_i915_private *dev_priv,
722
- uint64_t size,
723
- uint32_t *handle_p)
184
+ struct intel_memory_region *mr,
185
+ u64 *size_p,
186
+ u32 *handle_p)
724187 {
725188 struct drm_i915_gem_object *obj;
726
- int ret;
727189 u32 handle;
190
+ u64 size;
191
+ int ret;
728192
729
- size = roundup(size, PAGE_SIZE);
193
+ GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
194
+ size = round_up(*size_p, mr->min_page_size);
730195 if (size == 0)
731196 return -EINVAL;
732197
198
+ /* For most of the ABI (e.g. mmap) we think in system pages */
199
+ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
200
+
733201 /* Allocate the new object */
734
- obj = i915_gem_object_create(dev_priv, size);
202
+ obj = i915_gem_object_create_region(mr, size, 0);
735203 if (IS_ERR(obj))
736204 return PTR_ERR(obj);
737205
....@@ -742,6 +210,7 @@
742210 return ret;
743211
744212 *handle_p = handle;
213
+ *size_p = size;
745214 return 0;
746215 }
747216
....@@ -750,17 +219,45 @@
750219 struct drm_device *dev,
751220 struct drm_mode_create_dumb *args)
752221 {
753
- /* have to work out size/pitch and return them */
754
- args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
755
- args->size = args->pitch * args->height;
756
- return i915_gem_create(file, to_i915(dev),
757
- args->size, &args->handle);
758
-}
222
+ enum intel_memory_type mem_type;
223
+ int cpp = DIV_ROUND_UP(args->bpp, 8);
224
+ u32 format;
759225
760
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
761
-{
762
- return !(obj->cache_level == I915_CACHE_NONE ||
763
- obj->cache_level == I915_CACHE_WT);
226
+ switch (cpp) {
227
+ case 1:
228
+ format = DRM_FORMAT_C8;
229
+ break;
230
+ case 2:
231
+ format = DRM_FORMAT_RGB565;
232
+ break;
233
+ case 4:
234
+ format = DRM_FORMAT_XRGB8888;
235
+ break;
236
+ default:
237
+ return -EINVAL;
238
+ }
239
+
240
+ /* have to work out size/pitch and return them */
241
+ args->pitch = ALIGN(args->width * cpp, 64);
242
+
243
+ /* align stride to page size so that we can remap */
244
+ if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
245
+ DRM_FORMAT_MOD_LINEAR))
246
+ args->pitch = ALIGN(args->pitch, 4096);
247
+
248
+ if (args->pitch < args->width)
249
+ return -EINVAL;
250
+
251
+ args->size = mul_u32_u32(args->pitch, args->height);
252
+
253
+ mem_type = INTEL_MEMORY_SYSTEM;
254
+ if (HAS_LMEM(to_i915(dev)))
255
+ mem_type = INTEL_MEMORY_LOCAL;
256
+
257
+ return i915_gem_create(file,
258
+ intel_memory_region_by_type(to_i915(dev),
259
+ mem_type),
260
+ &args->size, &args->handle);
764261 }
765262
766263 /**
....@@ -773,357 +270,63 @@
773270 i915_gem_create_ioctl(struct drm_device *dev, void *data,
774271 struct drm_file *file)
775272 {
776
- struct drm_i915_private *dev_priv = to_i915(dev);
273
+ struct drm_i915_private *i915 = to_i915(dev);
777274 struct drm_i915_gem_create *args = data;
778275
779
- i915_gem_flush_free_objects(dev_priv);
276
+ i915_gem_flush_free_objects(i915);
780277
781
- return i915_gem_create(file, dev_priv,
782
- args->size, &args->handle);
278
+ return i915_gem_create(file,
279
+ intel_memory_region_by_type(i915,
280
+ INTEL_MEMORY_SYSTEM),
281
+ &args->size, &args->handle);
783282 }
784283
785
-static inline enum fb_op_origin
786
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
787
-{
788
- return (domain == I915_GEM_DOMAIN_GTT ?
789
- obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
790
-}
791
-
792
-void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
793
-{
794
- /*
795
- * No actual flushing is required for the GTT write domain for reads
796
- * from the GTT domain. Writes to it "immediately" go to main memory
797
- * as far as we know, so there's no chipset flush. It also doesn't
798
- * land in the GPU render cache.
799
- *
800
- * However, we do have to enforce the order so that all writes through
801
- * the GTT land before any writes to the device, such as updates to
802
- * the GATT itself.
803
- *
804
- * We also have to wait a bit for the writes to land from the GTT.
805
- * An uncached read (i.e. mmio) seems to be ideal for the round-trip
806
- * timing. This issue has only been observed when switching quickly
807
- * between GTT writes and CPU reads from inside the kernel on recent hw,
808
- * and it appears to only affect discrete GTT blocks (i.e. on LLC
809
- * system agents we cannot reproduce this behaviour, until Cannonlake
810
- * that was!).
811
- */
812
-
813
- i915_gem_chipset_flush(dev_priv);
814
-
815
- intel_runtime_pm_get(dev_priv);
816
- spin_lock_irq(&dev_priv->uncore.lock);
817
-
818
- POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
819
-
820
- spin_unlock_irq(&dev_priv->uncore.lock);
821
- intel_runtime_pm_put(dev_priv);
822
-}
823
-
824
-static void
825
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
826
-{
827
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
828
- struct i915_vma *vma;
829
-
830
- if (!(obj->write_domain & flush_domains))
831
- return;
832
-
833
- switch (obj->write_domain) {
834
- case I915_GEM_DOMAIN_GTT:
835
- i915_gem_flush_ggtt_writes(dev_priv);
836
-
837
- intel_fb_obj_flush(obj,
838
- fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
839
-
840
- for_each_ggtt_vma(vma, obj) {
841
- if (vma->iomap)
842
- continue;
843
-
844
- i915_vma_unset_ggtt_write(vma);
845
- }
846
- break;
847
-
848
- case I915_GEM_DOMAIN_WC:
849
- wmb();
850
- break;
851
-
852
- case I915_GEM_DOMAIN_CPU:
853
- i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
854
- break;
855
-
856
- case I915_GEM_DOMAIN_RENDER:
857
- if (gpu_write_needs_clflush(obj))
858
- obj->cache_dirty = true;
859
- break;
860
- }
861
-
862
- obj->write_domain = 0;
863
-}
864
-
865
-static inline int
866
-__copy_to_user_swizzled(char __user *cpu_vaddr,
867
- const char *gpu_vaddr, int gpu_offset,
868
- int length)
869
-{
870
- int ret, cpu_offset = 0;
871
-
872
- while (length > 0) {
873
- int cacheline_end = ALIGN(gpu_offset + 1, 64);
874
- int this_length = min(cacheline_end - gpu_offset, length);
875
- int swizzled_gpu_offset = gpu_offset ^ 64;
876
-
877
- ret = __copy_to_user(cpu_vaddr + cpu_offset,
878
- gpu_vaddr + swizzled_gpu_offset,
879
- this_length);
880
- if (ret)
881
- return ret + length;
882
-
883
- cpu_offset += this_length;
884
- gpu_offset += this_length;
885
- length -= this_length;
886
- }
887
-
888
- return 0;
889
-}
890
-
891
-static inline int
892
-__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
893
- const char __user *cpu_vaddr,
894
- int length)
895
-{
896
- int ret, cpu_offset = 0;
897
-
898
- while (length > 0) {
899
- int cacheline_end = ALIGN(gpu_offset + 1, 64);
900
- int this_length = min(cacheline_end - gpu_offset, length);
901
- int swizzled_gpu_offset = gpu_offset ^ 64;
902
-
903
- ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
904
- cpu_vaddr + cpu_offset,
905
- this_length);
906
- if (ret)
907
- return ret + length;
908
-
909
- cpu_offset += this_length;
910
- gpu_offset += this_length;
911
- length -= this_length;
912
- }
913
-
914
- return 0;
915
-}
916
-
917
-/*
918
- * Pins the specified object's pages and synchronizes the object with
919
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
920
- * flush the object from the CPU cache.
921
- */
922
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
923
- unsigned int *needs_clflush)
924
-{
925
- int ret;
926
-
927
- lockdep_assert_held(&obj->base.dev->struct_mutex);
928
-
929
- *needs_clflush = 0;
930
- if (!i915_gem_object_has_struct_page(obj))
931
- return -ENODEV;
932
-
933
- ret = i915_gem_object_wait(obj,
934
- I915_WAIT_INTERRUPTIBLE |
935
- I915_WAIT_LOCKED,
936
- MAX_SCHEDULE_TIMEOUT,
937
- NULL);
938
- if (ret)
939
- return ret;
940
-
941
- ret = i915_gem_object_pin_pages(obj);
942
- if (ret)
943
- return ret;
944
-
945
- if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
946
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
947
- ret = i915_gem_object_set_to_cpu_domain(obj, false);
948
- if (ret)
949
- goto err_unpin;
950
- else
951
- goto out;
952
- }
953
-
954
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
955
-
956
- /* If we're not in the cpu read domain, set ourself into the gtt
957
- * read domain and manually flush cachelines (if required). This
958
- * optimizes for the case when the gpu will dirty the data
959
- * anyway again before the next pread happens.
960
- */
961
- if (!obj->cache_dirty &&
962
- !(obj->read_domains & I915_GEM_DOMAIN_CPU))
963
- *needs_clflush = CLFLUSH_BEFORE;
964
-
965
-out:
966
- /* return with the pages pinned */
967
- return 0;
968
-
969
-err_unpin:
970
- i915_gem_object_unpin_pages(obj);
971
- return ret;
972
-}
973
-
974
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
975
- unsigned int *needs_clflush)
976
-{
977
- int ret;
978
-
979
- lockdep_assert_held(&obj->base.dev->struct_mutex);
980
-
981
- *needs_clflush = 0;
982
- if (!i915_gem_object_has_struct_page(obj))
983
- return -ENODEV;
984
-
985
- ret = i915_gem_object_wait(obj,
986
- I915_WAIT_INTERRUPTIBLE |
987
- I915_WAIT_LOCKED |
988
- I915_WAIT_ALL,
989
- MAX_SCHEDULE_TIMEOUT,
990
- NULL);
991
- if (ret)
992
- return ret;
993
-
994
- ret = i915_gem_object_pin_pages(obj);
995
- if (ret)
996
- return ret;
997
-
998
- if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
999
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
1000
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
1001
- if (ret)
1002
- goto err_unpin;
1003
- else
1004
- goto out;
1005
- }
1006
-
1007
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
1008
-
1009
- /* If we're not in the cpu write domain, set ourself into the
1010
- * gtt write domain and manually flush cachelines (as required).
1011
- * This optimizes for the case when the gpu will use the data
1012
- * right away and we therefore have to clflush anyway.
1013
- */
1014
- if (!obj->cache_dirty) {
1015
- *needs_clflush |= CLFLUSH_AFTER;
1016
-
1017
- /*
1018
- * Same trick applies to invalidate partially written
1019
- * cachelines read before writing.
1020
- */
1021
- if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
1022
- *needs_clflush |= CLFLUSH_BEFORE;
1023
- }
1024
-
1025
-out:
1026
- intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1027
- obj->mm.dirty = true;
1028
- /* return with the pages pinned */
1029
- return 0;
1030
-
1031
-err_unpin:
1032
- i915_gem_object_unpin_pages(obj);
1033
- return ret;
1034
-}
1035
-
1036
-static void
1037
-shmem_clflush_swizzled_range(char *addr, unsigned long length,
1038
- bool swizzled)
1039
-{
1040
- if (unlikely(swizzled)) {
1041
- unsigned long start = (unsigned long) addr;
1042
- unsigned long end = (unsigned long) addr + length;
1043
-
1044
- /* For swizzling simply ensure that we always flush both
1045
- * channels. Lame, but simple and it works. Swizzled
1046
- * pwrite/pread is far from a hotpath - current userspace
1047
- * doesn't use it at all. */
1048
- start = round_down(start, 128);
1049
- end = round_up(end, 128);
1050
-
1051
- drm_clflush_virt_range((void *)start, end - start);
1052
- } else {
1053
- drm_clflush_virt_range(addr, length);
1054
- }
1055
-
1056
-}
1057
-
1058
-/* Only difference to the fast-path function is that this can handle bit17
1059
- * and uses non-atomic copy and kmap functions. */
1060284 static int
1061
-shmem_pread_slow(struct page *page, int offset, int length,
1062
- char __user *user_data,
1063
- bool page_do_bit17_swizzling, bool needs_clflush)
285
+shmem_pread(struct page *page, int offset, int len, char __user *user_data,
286
+ bool needs_clflush)
1064287 {
1065288 char *vaddr;
1066289 int ret;
1067290
1068291 vaddr = kmap(page);
1069
- if (needs_clflush)
1070
- shmem_clflush_swizzled_range(vaddr + offset, length,
1071
- page_do_bit17_swizzling);
1072292
1073
- if (page_do_bit17_swizzling)
1074
- ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
1075
- else
1076
- ret = __copy_to_user(user_data, vaddr + offset, length);
293
+ if (needs_clflush)
294
+ drm_clflush_virt_range(vaddr + offset, len);
295
+
296
+ ret = __copy_to_user(user_data, vaddr + offset, len);
297
+
1077298 kunmap(page);
1078299
1079
- return ret ? - EFAULT : 0;
1080
-}
1081
-
1082
-static int
1083
-shmem_pread(struct page *page, int offset, int length, char __user *user_data,
1084
- bool page_do_bit17_swizzling, bool needs_clflush)
1085
-{
1086
- int ret;
1087
-
1088
- ret = -ENODEV;
1089
- if (!page_do_bit17_swizzling) {
1090
- char *vaddr = kmap_atomic(page);
1091
-
1092
- if (needs_clflush)
1093
- drm_clflush_virt_range(vaddr + offset, length);
1094
- ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
1095
- kunmap_atomic(vaddr);
1096
- }
1097
- if (ret == 0)
1098
- return 0;
1099
-
1100
- return shmem_pread_slow(page, offset, length, user_data,
1101
- page_do_bit17_swizzling, needs_clflush);
300
+ return ret ? -EFAULT : 0;
1102301 }
1103302
1104303 static int
1105304 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
1106305 struct drm_i915_gem_pread *args)
1107306 {
1108
- char __user *user_data;
1109
- u64 remain;
1110
- unsigned int obj_do_bit17_swizzling;
1111307 unsigned int needs_clflush;
1112308 unsigned int idx, offset;
309
+ struct dma_fence *fence;
310
+ char __user *user_data;
311
+ u64 remain;
1113312 int ret;
1114313
1115
- obj_do_bit17_swizzling = 0;
1116
- if (i915_gem_object_needs_bit17_swizzle(obj))
1117
- obj_do_bit17_swizzling = BIT(17);
1118
-
1119
- ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
314
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
1120315 if (ret)
1121316 return ret;
1122317
1123
- ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
1124
- mutex_unlock(&obj->base.dev->struct_mutex);
1125
- if (ret)
318
+ ret = i915_gem_object_prepare_read(obj, &needs_clflush);
319
+ if (ret) {
320
+ i915_gem_object_unlock(obj);
1126321 return ret;
322
+ }
323
+
324
+ fence = i915_gem_object_lock_fence(obj);
325
+ i915_gem_object_finish_access(obj);
326
+ i915_gem_object_unlock(obj);
327
+
328
+ if (!fence)
329
+ return -ENOMEM;
1127330
1128331 remain = args->size;
1129332 user_data = u64_to_user_ptr(args->data_ptr);
....@@ -1133,7 +336,6 @@
1133336 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1134337
1135338 ret = shmem_pread(page, offset, length, user_data,
1136
- page_to_phys(page) & obj_do_bit17_swizzling,
1137339 needs_clflush);
1138340 if (ret)
1139341 break;
....@@ -1143,7 +345,7 @@
1143345 offset = 0;
1144346 }
1145347
1146
- i915_gem_obj_finish_shmem_access(obj);
348
+ i915_gem_object_unlock_fence(obj, fence);
1147349 return ret;
1148350 }
1149351
....@@ -1177,42 +379,47 @@
1177379 {
1178380 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1179381 struct i915_ggtt *ggtt = &i915->ggtt;
382
+ intel_wakeref_t wakeref;
1180383 struct drm_mm_node node;
1181
- struct i915_vma *vma;
384
+ struct dma_fence *fence;
1182385 void __user *user_data;
386
+ struct i915_vma *vma;
1183387 u64 remain, offset;
1184388 int ret;
1185389
1186
- ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1187
- if (ret)
1188
- return ret;
1189
-
1190
- intel_runtime_pm_get(i915);
1191
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1192
- PIN_MAPPABLE |
1193
- PIN_NONFAULT |
1194
- PIN_NONBLOCK);
390
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
391
+ vma = ERR_PTR(-ENODEV);
392
+ if (!i915_gem_object_is_tiled(obj))
393
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
394
+ PIN_MAPPABLE |
395
+ PIN_NONBLOCK /* NOWARN */ |
396
+ PIN_NOEVICT);
1195397 if (!IS_ERR(vma)) {
1196398 node.start = i915_ggtt_offset(vma);
1197
- node.allocated = false;
1198
- ret = i915_vma_put_fence(vma);
1199
- if (ret) {
1200
- i915_vma_unpin(vma);
1201
- vma = ERR_PTR(ret);
1202
- }
1203
- }
1204
- if (IS_ERR(vma)) {
399
+ node.flags = 0;
400
+ } else {
1205401 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1206402 if (ret)
1207
- goto out_unlock;
1208
- GEM_BUG_ON(!node.allocated);
403
+ goto out_rpm;
404
+ GEM_BUG_ON(!drm_mm_node_allocated(&node));
1209405 }
1210406
1211
- ret = i915_gem_object_set_to_gtt_domain(obj, false);
407
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
1212408 if (ret)
1213409 goto out_unpin;
1214410
1215
- mutex_unlock(&i915->drm.struct_mutex);
411
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
412
+ if (ret) {
413
+ i915_gem_object_unlock(obj);
414
+ goto out_unpin;
415
+ }
416
+
417
+ fence = i915_gem_object_lock_fence(obj);
418
+ i915_gem_object_unlock(obj);
419
+ if (!fence) {
420
+ ret = -ENOMEM;
421
+ goto out_unpin;
422
+ }
1216423
1217424 user_data = u64_to_user_ptr(args->data_ptr);
1218425 remain = args->size;
....@@ -1229,12 +436,10 @@
1229436 unsigned page_offset = offset_in_page(offset);
1230437 unsigned page_length = PAGE_SIZE - page_offset;
1231438 page_length = remain < page_length ? remain : page_length;
1232
- if (node.allocated) {
1233
- wmb();
439
+ if (drm_mm_node_allocated(&node)) {
1234440 ggtt->vm.insert_page(&ggtt->vm,
1235441 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1236442 node.start, I915_CACHE_NONE, 0);
1237
- wmb();
1238443 } else {
1239444 page_base += offset & PAGE_MASK;
1240445 }
....@@ -1250,19 +455,16 @@
1250455 offset += page_length;
1251456 }
1252457
1253
- mutex_lock(&i915->drm.struct_mutex);
458
+ i915_gem_object_unlock_fence(obj, fence);
1254459 out_unpin:
1255
- if (node.allocated) {
1256
- wmb();
460
+ if (drm_mm_node_allocated(&node)) {
1257461 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1258
- remove_mappable_node(&node);
462
+ remove_mappable_node(ggtt, &node);
1259463 } else {
1260464 i915_vma_unpin(vma);
1261465 }
1262
-out_unlock:
1263
- intel_runtime_pm_put(i915);
1264
- mutex_unlock(&i915->drm.struct_mutex);
1265
-
466
+out_rpm:
467
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1266468 return ret;
1267469 }
1268470
....@@ -1285,8 +487,7 @@
1285487 if (args->size == 0)
1286488 return 0;
1287489
1288
- if (!access_ok(VERIFY_WRITE,
1289
- u64_to_user_ptr(args->data_ptr),
490
+ if (!access_ok(u64_to_user_ptr(args->data_ptr),
1290491 args->size))
1291492 return -EFAULT;
1292493
....@@ -1302,10 +503,15 @@
1302503
1303504 trace_i915_gem_object_pread(obj, args->offset, args->size);
1304505
506
+ ret = -ENODEV;
507
+ if (obj->ops->pread)
508
+ ret = obj->ops->pread(obj, args);
509
+ if (ret != -ENODEV)
510
+ goto out;
511
+
1305512 ret = i915_gem_object_wait(obj,
1306513 I915_WAIT_INTERRUPTIBLE,
1307
- MAX_SCHEDULE_TIMEOUT,
1308
- to_rps_client(file));
514
+ MAX_SCHEDULE_TIMEOUT);
1309515 if (ret)
1310516 goto out;
1311517
....@@ -1362,15 +568,14 @@
1362568 {
1363569 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1364570 struct i915_ggtt *ggtt = &i915->ggtt;
571
+ struct intel_runtime_pm *rpm = &i915->runtime_pm;
572
+ intel_wakeref_t wakeref;
1365573 struct drm_mm_node node;
574
+ struct dma_fence *fence;
1366575 struct i915_vma *vma;
1367576 u64 remain, offset;
1368577 void __user *user_data;
1369578 int ret;
1370
-
1371
- ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1372
- if (ret)
1373
- return ret;
1374579
1375580 if (i915_gem_object_has_struct_page(obj)) {
1376581 /*
....@@ -1380,42 +585,48 @@
1380585 * This easily dwarfs any performance advantage from
1381586 * using the cache bypass of indirect GGTT access.
1382587 */
1383
- if (!intel_runtime_pm_get_if_in_use(i915)) {
1384
- ret = -EFAULT;
1385
- goto out_unlock;
1386
- }
588
+ wakeref = intel_runtime_pm_get_if_in_use(rpm);
589
+ if (!wakeref)
590
+ return -EFAULT;
1387591 } else {
1388592 /* No backing pages, no fallback, we must force GGTT access */
1389
- intel_runtime_pm_get(i915);
593
+ wakeref = intel_runtime_pm_get(rpm);
1390594 }
1391595
1392
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1393
- PIN_MAPPABLE |
1394
- PIN_NONFAULT |
1395
- PIN_NONBLOCK);
596
+ vma = ERR_PTR(-ENODEV);
597
+ if (!i915_gem_object_is_tiled(obj))
598
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
599
+ PIN_MAPPABLE |
600
+ PIN_NONBLOCK /* NOWARN */ |
601
+ PIN_NOEVICT);
1396602 if (!IS_ERR(vma)) {
1397603 node.start = i915_ggtt_offset(vma);
1398
- node.allocated = false;
1399
- ret = i915_vma_put_fence(vma);
1400
- if (ret) {
1401
- i915_vma_unpin(vma);
1402
- vma = ERR_PTR(ret);
1403
- }
1404
- }
1405
- if (IS_ERR(vma)) {
604
+ node.flags = 0;
605
+ } else {
1406606 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1407607 if (ret)
1408608 goto out_rpm;
1409
- GEM_BUG_ON(!node.allocated);
609
+ GEM_BUG_ON(!drm_mm_node_allocated(&node));
1410610 }
1411611
1412
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
612
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
1413613 if (ret)
1414614 goto out_unpin;
1415615
1416
- mutex_unlock(&i915->drm.struct_mutex);
616
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
617
+ if (ret) {
618
+ i915_gem_object_unlock(obj);
619
+ goto out_unpin;
620
+ }
1417621
1418
- intel_fb_obj_invalidate(obj, ORIGIN_CPU);
622
+ fence = i915_gem_object_lock_fence(obj);
623
+ i915_gem_object_unlock(obj);
624
+ if (!fence) {
625
+ ret = -ENOMEM;
626
+ goto out_unpin;
627
+ }
628
+
629
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
1419630
1420631 user_data = u64_to_user_ptr(args->data_ptr);
1421632 offset = args->offset;
....@@ -1431,8 +642,9 @@
1431642 unsigned int page_offset = offset_in_page(offset);
1432643 unsigned int page_length = PAGE_SIZE - page_offset;
1433644 page_length = remain < page_length ? remain : page_length;
1434
- if (node.allocated) {
1435
- wmb(); /* flush the write before we modify the GGTT */
645
+ if (drm_mm_node_allocated(&node)) {
646
+ /* flush the write before we modify the GGTT */
647
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1436648 ggtt->vm.insert_page(&ggtt->vm,
1437649 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1438650 node.start, I915_CACHE_NONE, 0);
....@@ -1456,49 +668,21 @@
1456668 user_data += page_length;
1457669 offset += page_length;
1458670 }
1459
- intel_fb_obj_flush(obj, ORIGIN_CPU);
1460671
1461
- mutex_lock(&i915->drm.struct_mutex);
672
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
673
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
674
+
675
+ i915_gem_object_unlock_fence(obj, fence);
1462676 out_unpin:
1463
- if (node.allocated) {
1464
- wmb();
677
+ if (drm_mm_node_allocated(&node)) {
1465678 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1466
- remove_mappable_node(&node);
679
+ remove_mappable_node(ggtt, &node);
1467680 } else {
1468681 i915_vma_unpin(vma);
1469682 }
1470683 out_rpm:
1471
- intel_runtime_pm_put(i915);
1472
-out_unlock:
1473
- mutex_unlock(&i915->drm.struct_mutex);
684
+ intel_runtime_pm_put(rpm, wakeref);
1474685 return ret;
1475
-}
1476
-
1477
-static int
1478
-shmem_pwrite_slow(struct page *page, int offset, int length,
1479
- char __user *user_data,
1480
- bool page_do_bit17_swizzling,
1481
- bool needs_clflush_before,
1482
- bool needs_clflush_after)
1483
-{
1484
- char *vaddr;
1485
- int ret;
1486
-
1487
- vaddr = kmap(page);
1488
- if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1489
- shmem_clflush_swizzled_range(vaddr + offset, length,
1490
- page_do_bit17_swizzling);
1491
- if (page_do_bit17_swizzling)
1492
- ret = __copy_from_user_swizzled(vaddr, offset, user_data,
1493
- length);
1494
- else
1495
- ret = __copy_from_user(vaddr + offset, user_data, length);
1496
- if (needs_clflush_after)
1497
- shmem_clflush_swizzled_range(vaddr + offset, length,
1498
- page_do_bit17_swizzling);
1499
- kunmap(page);
1500
-
1501
- return ret ? -EFAULT : 0;
1502686 }
1503687
1504688 /* Per-page copy function for the shmem pwrite fastpath.
....@@ -1508,58 +692,54 @@
1508692 */
1509693 static int
1510694 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1511
- bool page_do_bit17_swizzling,
1512695 bool needs_clflush_before,
1513696 bool needs_clflush_after)
1514697 {
698
+ char *vaddr;
1515699 int ret;
1516700
1517
- ret = -ENODEV;
1518
- if (!page_do_bit17_swizzling) {
1519
- char *vaddr = kmap_atomic(page);
701
+ vaddr = kmap(page);
1520702
1521
- if (needs_clflush_before)
1522
- drm_clflush_virt_range(vaddr + offset, len);
1523
- ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
1524
- if (needs_clflush_after)
1525
- drm_clflush_virt_range(vaddr + offset, len);
703
+ if (needs_clflush_before)
704
+ drm_clflush_virt_range(vaddr + offset, len);
1526705
1527
- kunmap_atomic(vaddr);
1528
- }
1529
- if (ret == 0)
1530
- return ret;
706
+ ret = __copy_from_user(vaddr + offset, user_data, len);
707
+ if (!ret && needs_clflush_after)
708
+ drm_clflush_virt_range(vaddr + offset, len);
1531709
1532
- return shmem_pwrite_slow(page, offset, len, user_data,
1533
- page_do_bit17_swizzling,
1534
- needs_clflush_before,
1535
- needs_clflush_after);
710
+ kunmap(page);
711
+
712
+ return ret ? -EFAULT : 0;
1536713 }
1537714
1538715 static int
1539716 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1540717 const struct drm_i915_gem_pwrite *args)
1541718 {
1542
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
1543
- void __user *user_data;
1544
- u64 remain;
1545
- unsigned int obj_do_bit17_swizzling;
1546719 unsigned int partial_cacheline_write;
1547720 unsigned int needs_clflush;
1548721 unsigned int offset, idx;
722
+ struct dma_fence *fence;
723
+ void __user *user_data;
724
+ u64 remain;
1549725 int ret;
1550726
1551
- ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
727
+ ret = i915_gem_object_lock_interruptible(obj, NULL);
1552728 if (ret)
1553729 return ret;
1554730
1555
- ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1556
- mutex_unlock(&i915->drm.struct_mutex);
1557
- if (ret)
731
+ ret = i915_gem_object_prepare_write(obj, &needs_clflush);
732
+ if (ret) {
733
+ i915_gem_object_unlock(obj);
1558734 return ret;
735
+ }
1559736
1560
- obj_do_bit17_swizzling = 0;
1561
- if (i915_gem_object_needs_bit17_swizzle(obj))
1562
- obj_do_bit17_swizzling = BIT(17);
737
+ fence = i915_gem_object_lock_fence(obj);
738
+ i915_gem_object_finish_access(obj);
739
+ i915_gem_object_unlock(obj);
740
+
741
+ if (!fence)
742
+ return -ENOMEM;
1563743
1564744 /* If we don't overwrite a cacheline completely we need to be
1565745 * careful to have up-to-date data by first clflushing. Don't
....@@ -1577,7 +757,6 @@
1577757 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1578758
1579759 ret = shmem_pwrite(page, offset, length, user_data,
1580
- page_to_phys(page) & obj_do_bit17_swizzling,
1581760 (offset | length) & partial_cacheline_write,
1582761 needs_clflush & CLFLUSH_AFTER);
1583762 if (ret)
....@@ -1588,8 +767,9 @@
1588767 offset = 0;
1589768 }
1590769
1591
- intel_fb_obj_flush(obj, ORIGIN_CPU);
1592
- i915_gem_obj_finish_shmem_access(obj);
770
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
771
+ i915_gem_object_unlock_fence(obj, fence);
772
+
1593773 return ret;
1594774 }
1595775
....@@ -1612,9 +792,7 @@
1612792 if (args->size == 0)
1613793 return 0;
1614794
1615
- if (!access_ok(VERIFY_READ,
1616
- u64_to_user_ptr(args->data_ptr),
1617
- args->size))
795
+ if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1618796 return -EFAULT;
1619797
1620798 obj = i915_gem_object_lookup(file, args->handle);
....@@ -1644,8 +822,7 @@
1644822 ret = i915_gem_object_wait(obj,
1645823 I915_WAIT_INTERRUPTIBLE |
1646824 I915_WAIT_ALL,
1647
- MAX_SCHEDULE_TIMEOUT,
1648
- to_rps_client(file));
825
+ MAX_SCHEDULE_TIMEOUT);
1649826 if (ret)
1650827 goto err;
1651828
....@@ -1669,9 +846,7 @@
1669846 ret = i915_gem_gtt_pwrite_fast(obj, args);
1670847
1671848 if (ret == -EFAULT || ret == -ENOSPC) {
1672
- if (obj->phys_handle)
1673
- ret = i915_gem_phys_pwrite(obj, args, file);
1674
- else
849
+ if (i915_gem_object_has_struct_page(obj))
1675850 ret = i915_gem_shmem_pwrite(obj, args);
1676851 }
1677852
....@@ -1679,125 +854,6 @@
1679854 err:
1680855 i915_gem_object_put(obj);
1681856 return ret;
1682
-}
1683
-
1684
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1685
-{
1686
- struct drm_i915_private *i915;
1687
- struct list_head *list;
1688
- struct i915_vma *vma;
1689
-
1690
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1691
-
1692
- for_each_ggtt_vma(vma, obj) {
1693
- if (i915_vma_is_active(vma))
1694
- continue;
1695
-
1696
- if (!drm_mm_node_allocated(&vma->node))
1697
- continue;
1698
-
1699
- list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1700
- }
1701
-
1702
- i915 = to_i915(obj->base.dev);
1703
- spin_lock(&i915->mm.obj_lock);
1704
- list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1705
- list_move_tail(&obj->mm.link, list);
1706
- spin_unlock(&i915->mm.obj_lock);
1707
-}
1708
-
1709
-/**
1710
- * Called when user space prepares to use an object with the CPU, either
1711
- * through the mmap ioctl's mapping or a GTT mapping.
1712
- * @dev: drm device
1713
- * @data: ioctl data blob
1714
- * @file: drm file
1715
- */
1716
-int
1717
-i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1718
- struct drm_file *file)
1719
-{
1720
- struct drm_i915_gem_set_domain *args = data;
1721
- struct drm_i915_gem_object *obj;
1722
- uint32_t read_domains = args->read_domains;
1723
- uint32_t write_domain = args->write_domain;
1724
- int err;
1725
-
1726
- /* Only handle setting domains to types used by the CPU. */
1727
- if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1728
- return -EINVAL;
1729
-
1730
- /* Having something in the write domain implies it's in the read
1731
- * domain, and only that read domain. Enforce that in the request.
1732
- */
1733
- if (write_domain != 0 && read_domains != write_domain)
1734
- return -EINVAL;
1735
-
1736
- obj = i915_gem_object_lookup(file, args->handle);
1737
- if (!obj)
1738
- return -ENOENT;
1739
-
1740
- /* Try to flush the object off the GPU without holding the lock.
1741
- * We will repeat the flush holding the lock in the normal manner
1742
- * to catch cases where we are gazumped.
1743
- */
1744
- err = i915_gem_object_wait(obj,
1745
- I915_WAIT_INTERRUPTIBLE |
1746
- (write_domain ? I915_WAIT_ALL : 0),
1747
- MAX_SCHEDULE_TIMEOUT,
1748
- to_rps_client(file));
1749
- if (err)
1750
- goto out;
1751
-
1752
- /*
1753
- * Proxy objects do not control access to the backing storage, ergo
1754
- * they cannot be used as a means to manipulate the cache domain
1755
- * tracking for that backing storage. The proxy object is always
1756
- * considered to be outside of any cache domain.
1757
- */
1758
- if (i915_gem_object_is_proxy(obj)) {
1759
- err = -ENXIO;
1760
- goto out;
1761
- }
1762
-
1763
- /*
1764
- * Flush and acquire obj->pages so that we are coherent through
1765
- * direct access in memory with previous cached writes through
1766
- * shmemfs and that our cache domain tracking remains valid.
1767
- * For example, if the obj->filp was moved to swap without us
1768
- * being notified and releasing the pages, we would mistakenly
1769
- * continue to assume that the obj remained out of the CPU cached
1770
- * domain.
1771
- */
1772
- err = i915_gem_object_pin_pages(obj);
1773
- if (err)
1774
- goto out;
1775
-
1776
- err = i915_mutex_lock_interruptible(dev);
1777
- if (err)
1778
- goto out_unpin;
1779
-
1780
- if (read_domains & I915_GEM_DOMAIN_WC)
1781
- err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1782
- else if (read_domains & I915_GEM_DOMAIN_GTT)
1783
- err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1784
- else
1785
- err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1786
-
1787
- /* And bump the LRU for this access */
1788
- i915_gem_object_bump_inactive_ggtt(obj);
1789
-
1790
- mutex_unlock(&dev->struct_mutex);
1791
-
1792
- if (write_domain != 0)
1793
- intel_fb_obj_invalidate(obj,
1794
- fb_write_origin(obj, write_domain));
1795
-
1796
-out_unpin:
1797
- i915_gem_object_unpin_pages(obj);
1798
-out:
1799
- i915_gem_object_put(obj);
1800
- return err;
1801857 }
1802858
1803859 /**
....@@ -1829,424 +885,7 @@
1829885 return 0;
1830886 }
1831887
1832
-static inline bool
1833
-__vma_matches(struct vm_area_struct *vma, struct file *filp,
1834
- unsigned long addr, unsigned long size)
1835
-{
1836
- if (vma->vm_file != filp)
1837
- return false;
1838
-
1839
- return vma->vm_start == addr &&
1840
- (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1841
-}
1842
-
1843
-/**
1844
- * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1845
- * it is mapped to.
1846
- * @dev: drm device
1847
- * @data: ioctl data blob
1848
- * @file: drm file
1849
- *
1850
- * While the mapping holds a reference on the contents of the object, it doesn't
1851
- * imply a ref on the object itself.
1852
- *
1853
- * IMPORTANT:
1854
- *
1855
- * DRM driver writers who look a this function as an example for how to do GEM
1856
- * mmap support, please don't implement mmap support like here. The modern way
1857
- * to implement DRM mmap support is with an mmap offset ioctl (like
1858
- * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1859
- * That way debug tooling like valgrind will understand what's going on, hiding
1860
- * the mmap call in a driver private ioctl will break that. The i915 driver only
1861
- * does cpu mmaps this way because we didn't know better.
1862
- */
1863
-int
1864
-i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1865
- struct drm_file *file)
1866
-{
1867
- struct drm_i915_gem_mmap *args = data;
1868
- struct drm_i915_gem_object *obj;
1869
- unsigned long addr;
1870
-
1871
- if (args->flags & ~(I915_MMAP_WC))
1872
- return -EINVAL;
1873
-
1874
- if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1875
- return -ENODEV;
1876
-
1877
- obj = i915_gem_object_lookup(file, args->handle);
1878
- if (!obj)
1879
- return -ENOENT;
1880
-
1881
- /* prime objects have no backing filp to GEM mmap
1882
- * pages from.
1883
- */
1884
- if (!obj->base.filp) {
1885
- addr = -ENXIO;
1886
- goto err;
1887
- }
1888
-
1889
- if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1890
- addr = -EINVAL;
1891
- goto err;
1892
- }
1893
-
1894
- addr = vm_mmap(obj->base.filp, 0, args->size,
1895
- PROT_READ | PROT_WRITE, MAP_SHARED,
1896
- args->offset);
1897
- if (IS_ERR_VALUE(addr))
1898
- goto err;
1899
-
1900
- if (args->flags & I915_MMAP_WC) {
1901
- struct mm_struct *mm = current->mm;
1902
- struct vm_area_struct *vma;
1903
-
1904
- if (down_write_killable(&mm->mmap_sem)) {
1905
- addr = -EINTR;
1906
- goto err;
1907
- }
1908
- vma = find_vma(mm, addr);
1909
- if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1910
- vma->vm_page_prot =
1911
- pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1912
- else
1913
- addr = -ENOMEM;
1914
- up_write(&mm->mmap_sem);
1915
- if (IS_ERR_VALUE(addr))
1916
- goto err;
1917
-
1918
- /* This may race, but that's ok, it only gets set */
1919
- WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1920
- }
1921
- i915_gem_object_put(obj);
1922
-
1923
- args->addr_ptr = (uint64_t) addr;
1924
- return 0;
1925
-
1926
-err:
1927
- i915_gem_object_put(obj);
1928
- return addr;
1929
-}
1930
-
1931
-static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1932
-{
1933
- return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1934
-}
1935
-
1936
-/**
1937
- * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1938
- *
1939
- * A history of the GTT mmap interface:
1940
- *
1941
- * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1942
- * aligned and suitable for fencing, and still fit into the available
1943
- * mappable space left by the pinned display objects. A classic problem
1944
- * we called the page-fault-of-doom where we would ping-pong between
1945
- * two objects that could not fit inside the GTT and so the memcpy
1946
- * would page one object in at the expense of the other between every
1947
- * single byte.
1948
- *
1949
- * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1950
- * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1951
- * object is too large for the available space (or simply too large
1952
- * for the mappable aperture!), a view is created instead and faulted
1953
- * into userspace. (This view is aligned and sized appropriately for
1954
- * fenced access.)
1955
- *
1956
- * 2 - Recognise WC as a separate cache domain so that we can flush the
1957
- * delayed writes via GTT before performing direct access via WC.
1958
- *
1959
- * Restrictions:
1960
- *
1961
- * * snoopable objects cannot be accessed via the GTT. It can cause machine
1962
- * hangs on some architectures, corruption on others. An attempt to service
1963
- * a GTT page fault from a snoopable object will generate a SIGBUS.
1964
- *
1965
- * * the object must be able to fit into RAM (physical memory, though no
1966
- * limited to the mappable aperture).
1967
- *
1968
- *
1969
- * Caveats:
1970
- *
1971
- * * a new GTT page fault will synchronize rendering from the GPU and flush
1972
- * all data to system memory. Subsequent access will not be synchronized.
1973
- *
1974
- * * all mappings are revoked on runtime device suspend.
1975
- *
1976
- * * there are only 8, 16 or 32 fence registers to share between all users
1977
- * (older machines require fence register for display and blitter access
1978
- * as well). Contention of the fence registers will cause the previous users
1979
- * to be unmapped and any new access will generate new page faults.
1980
- *
1981
- * * running out of memory while servicing a fault may generate a SIGBUS,
1982
- * rather than the expected SIGSEGV.
1983
- */
1984
-int i915_gem_mmap_gtt_version(void)
1985
-{
1986
- return 2;
1987
-}
1988
-
1989
-static inline struct i915_ggtt_view
1990
-compute_partial_view(struct drm_i915_gem_object *obj,
1991
- pgoff_t page_offset,
1992
- unsigned int chunk)
1993
-{
1994
- struct i915_ggtt_view view;
1995
-
1996
- if (i915_gem_object_is_tiled(obj))
1997
- chunk = roundup(chunk, tile_row_pages(obj));
1998
-
1999
- view.type = I915_GGTT_VIEW_PARTIAL;
2000
- view.partial.offset = rounddown(page_offset, chunk);
2001
- view.partial.size =
2002
- min_t(unsigned int, chunk,
2003
- (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
2004
-
2005
- /* If the partial covers the entire object, just create a normal VMA. */
2006
- if (chunk >= obj->base.size >> PAGE_SHIFT)
2007
- view.type = I915_GGTT_VIEW_NORMAL;
2008
-
2009
- return view;
2010
-}
2011
-
2012
-/**
2013
- * i915_gem_fault - fault a page into the GTT
2014
- * @vmf: fault info
2015
- *
2016
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
2017
- * from userspace. The fault handler takes care of binding the object to
2018
- * the GTT (if needed), allocating and programming a fence register (again,
2019
- * only if needed based on whether the old reg is still valid or the object
2020
- * is tiled) and inserting a new PTE into the faulting process.
2021
- *
2022
- * Note that the faulting process may involve evicting existing objects
2023
- * from the GTT and/or fence registers to make room. So performance may
2024
- * suffer if the GTT working set is large or there are few fence registers
2025
- * left.
2026
- *
2027
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
2028
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
2029
- */
2030
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
2031
-{
2032
-#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
2033
- struct vm_area_struct *area = vmf->vma;
2034
- struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
2035
- struct drm_device *dev = obj->base.dev;
2036
- struct drm_i915_private *dev_priv = to_i915(dev);
2037
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
2038
- bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
2039
- struct i915_vma *vma;
2040
- pgoff_t page_offset;
2041
- int ret;
2042
-
2043
- /* Sanity check that we allow writing into this object */
2044
- if (i915_gem_object_is_readonly(obj) && write)
2045
- return VM_FAULT_SIGBUS;
2046
-
2047
- /* We don't use vmf->pgoff since that has the fake offset */
2048
- page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
2049
-
2050
- trace_i915_gem_object_fault(obj, page_offset, true, write);
2051
-
2052
- /* Try to flush the object off the GPU first without holding the lock.
2053
- * Upon acquiring the lock, we will perform our sanity checks and then
2054
- * repeat the flush holding the lock in the normal manner to catch cases
2055
- * where we are gazumped.
2056
- */
2057
- ret = i915_gem_object_wait(obj,
2058
- I915_WAIT_INTERRUPTIBLE,
2059
- MAX_SCHEDULE_TIMEOUT,
2060
- NULL);
2061
- if (ret)
2062
- goto err;
2063
-
2064
- ret = i915_gem_object_pin_pages(obj);
2065
- if (ret)
2066
- goto err;
2067
-
2068
- intel_runtime_pm_get(dev_priv);
2069
-
2070
- ret = i915_mutex_lock_interruptible(dev);
2071
- if (ret)
2072
- goto err_rpm;
2073
-
2074
- /* Access to snoopable pages through the GTT is incoherent. */
2075
- if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
2076
- ret = -EFAULT;
2077
- goto err_unlock;
2078
- }
2079
-
2080
-
2081
- /* Now pin it into the GTT as needed */
2082
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
2083
- PIN_MAPPABLE |
2084
- PIN_NONBLOCK |
2085
- PIN_NONFAULT);
2086
- if (IS_ERR(vma)) {
2087
- /* Use a partial view if it is bigger than available space */
2088
- struct i915_ggtt_view view =
2089
- compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
2090
- unsigned int flags;
2091
-
2092
- flags = PIN_MAPPABLE;
2093
- if (view.type == I915_GGTT_VIEW_NORMAL)
2094
- flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
2095
-
2096
- /*
2097
- * Userspace is now writing through an untracked VMA, abandon
2098
- * all hope that the hardware is able to track future writes.
2099
- */
2100
- obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
2101
-
2102
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
2103
- if (IS_ERR(vma) && !view.type) {
2104
- flags = PIN_MAPPABLE;
2105
- view.type = I915_GGTT_VIEW_PARTIAL;
2106
- vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
2107
- }
2108
- }
2109
- if (IS_ERR(vma)) {
2110
- ret = PTR_ERR(vma);
2111
- goto err_unlock;
2112
- }
2113
-
2114
- ret = i915_gem_object_set_to_gtt_domain(obj, write);
2115
- if (ret)
2116
- goto err_unpin;
2117
-
2118
- ret = i915_vma_pin_fence(vma);
2119
- if (ret)
2120
- goto err_unpin;
2121
-
2122
- /* Finally, remap it using the new GTT offset */
2123
- ret = remap_io_mapping(area,
2124
- area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
2125
- (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
2126
- min_t(u64, vma->size, area->vm_end - area->vm_start),
2127
- &ggtt->iomap);
2128
- if (ret)
2129
- goto err_fence;
2130
-
2131
- /* Mark as being mmapped into userspace for later revocation */
2132
- assert_rpm_wakelock_held(dev_priv);
2133
- if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
2134
- list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
2135
- GEM_BUG_ON(!obj->userfault_count);
2136
-
2137
- i915_vma_set_ggtt_write(vma);
2138
-
2139
-err_fence:
2140
- i915_vma_unpin_fence(vma);
2141
-err_unpin:
2142
- __i915_vma_unpin(vma);
2143
-err_unlock:
2144
- mutex_unlock(&dev->struct_mutex);
2145
-err_rpm:
2146
- intel_runtime_pm_put(dev_priv);
2147
- i915_gem_object_unpin_pages(obj);
2148
-err:
2149
- switch (ret) {
2150
- case -EIO:
2151
- /*
2152
- * We eat errors when the gpu is terminally wedged to avoid
2153
- * userspace unduly crashing (gl has no provisions for mmaps to
2154
- * fail). But any other -EIO isn't ours (e.g. swap in failure)
2155
- * and so needs to be reported.
2156
- */
2157
- if (!i915_terminally_wedged(&dev_priv->gpu_error))
2158
- return VM_FAULT_SIGBUS;
2159
- /* else: fall through */
2160
- case -EAGAIN:
2161
- /*
2162
- * EAGAIN means the gpu is hung and we'll wait for the error
2163
- * handler to reset everything when re-faulting in
2164
- * i915_mutex_lock_interruptible.
2165
- */
2166
- case 0:
2167
- case -ERESTARTSYS:
2168
- case -EINTR:
2169
- case -EBUSY:
2170
- /*
2171
- * EBUSY is ok: this just means that another thread
2172
- * already did the job.
2173
- */
2174
- return VM_FAULT_NOPAGE;
2175
- case -ENOMEM:
2176
- return VM_FAULT_OOM;
2177
- case -ENOSPC:
2178
- case -EFAULT:
2179
- return VM_FAULT_SIGBUS;
2180
- default:
2181
- WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2182
- return VM_FAULT_SIGBUS;
2183
- }
2184
-}
2185
-
2186
-static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
2187
-{
2188
- struct i915_vma *vma;
2189
-
2190
- GEM_BUG_ON(!obj->userfault_count);
2191
-
2192
- obj->userfault_count = 0;
2193
- list_del(&obj->userfault_link);
2194
- drm_vma_node_unmap(&obj->base.vma_node,
2195
- obj->base.dev->anon_inode->i_mapping);
2196
-
2197
- for_each_ggtt_vma(vma, obj)
2198
- i915_vma_unset_userfault(vma);
2199
-}
2200
-
2201
-/**
2202
- * i915_gem_release_mmap - remove physical page mappings
2203
- * @obj: obj in question
2204
- *
2205
- * Preserve the reservation of the mmapping with the DRM core code, but
2206
- * relinquish ownership of the pages back to the system.
2207
- *
2208
- * It is vital that we remove the page mapping if we have mapped a tiled
2209
- * object through the GTT and then lose the fence register due to
2210
- * resource pressure. Similarly if the object has been moved out of the
2211
- * aperture, than pages mapped into userspace must be revoked. Removing the
2212
- * mapping will then trigger a page fault on the next user access, allowing
2213
- * fixup by i915_gem_fault().
2214
- */
2215
-void
2216
-i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2217
-{
2218
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
2219
-
2220
- /* Serialisation between user GTT access and our code depends upon
2221
- * revoking the CPU's PTE whilst the mutex is held. The next user
2222
- * pagefault then has to wait until we release the mutex.
2223
- *
2224
- * Note that RPM complicates somewhat by adding an additional
2225
- * requirement that operations to the GGTT be made holding the RPM
2226
- * wakeref.
2227
- */
2228
- lockdep_assert_held(&i915->drm.struct_mutex);
2229
- intel_runtime_pm_get(i915);
2230
-
2231
- if (!obj->userfault_count)
2232
- goto out;
2233
-
2234
- __i915_gem_object_release_mmap(obj);
2235
-
2236
- /* Ensure that the CPU's PTE are revoked and there are not outstanding
2237
- * memory transactions from userspace before we return. The TLB
2238
- * flushing implied above by changing the PTE above *should* be
2239
- * sufficient, an extra barrier here just provides us with a bit
2240
- * of paranoid documentation about our requirement to serialise
2241
- * memory writes before touching registers / GSM.
2242
- */
2243
- wmb();
2244
-
2245
-out:
2246
- intel_runtime_pm_put(i915);
2247
-}
2248
-
2249
-void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
888
+void i915_gem_runtime_suspend(struct drm_i915_private *i915)
2250889 {
2251890 struct drm_i915_gem_object *obj, *on;
2252891 int i;
....@@ -2259,17 +898,19 @@
2259898 */
2260899
2261900 list_for_each_entry_safe(obj, on,
2262
- &dev_priv->mm.userfault_list, userfault_link)
2263
- __i915_gem_object_release_mmap(obj);
901
+ &i915->ggtt.userfault_list, userfault_link)
902
+ __i915_gem_object_release_mmap_gtt(obj);
2264903
2265
- /* The fence will be lost when the device powers down. If any were
904
+ /*
905
+ * The fence will be lost when the device powers down. If any were
2266906 * in use by hardware (i.e. they are pinned), we should not be powering
2267907 * down! All other fences will be reacquired by the user upon waking.
2268908 */
2269
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
2270
- struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
909
+ for (i = 0; i < i915->ggtt.num_fences; i++) {
910
+ struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
2271911
2272
- /* Ideally we want to assert that the fence register is not
912
+ /*
913
+ * Ideally we want to assert that the fence register is not
2273914 * live at this point (i.e. that no piece of code will be
2274915 * trying to write through fence + GTT, as that both violates
2275916 * our tracking of activity and associated locking/barriers,
....@@ -2288,2287 +929,44 @@
2288929 }
2289930 }
2290931
2291
-static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
932
+static void discard_ggtt_vma(struct i915_vma *vma)
2292933 {
2293
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2294
- int err;
934
+ struct drm_i915_gem_object *obj = vma->obj;
2295935
2296
- err = drm_gem_create_mmap_offset(&obj->base);
2297
- if (likely(!err))
2298
- return 0;
2299
-
2300
- /* Attempt to reap some mmap space from dead objects */
2301
- do {
2302
- err = i915_gem_wait_for_idle(dev_priv,
2303
- I915_WAIT_INTERRUPTIBLE,
2304
- MAX_SCHEDULE_TIMEOUT);
2305
- if (err)
2306
- break;
2307
-
2308
- i915_gem_drain_freed_objects(dev_priv);
2309
- err = drm_gem_create_mmap_offset(&obj->base);
2310
- if (!err)
2311
- break;
2312
-
2313
- } while (flush_delayed_work(&dev_priv->gt.retire_work));
2314
-
2315
- return err;
2316
-}
2317
-
2318
-static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2319
-{
2320
- drm_gem_free_mmap_offset(&obj->base);
2321
-}
2322
-
2323
-int
2324
-i915_gem_mmap_gtt(struct drm_file *file,
2325
- struct drm_device *dev,
2326
- uint32_t handle,
2327
- uint64_t *offset)
2328
-{
2329
- struct drm_i915_gem_object *obj;
2330
- int ret;
2331
-
2332
- obj = i915_gem_object_lookup(file, handle);
2333
- if (!obj)
2334
- return -ENOENT;
2335
-
2336
- ret = i915_gem_object_create_mmap_offset(obj);
2337
- if (ret == 0)
2338
- *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2339
-
2340
- i915_gem_object_put(obj);
2341
- return ret;
2342
-}
2343
-
2344
-/**
2345
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2346
- * @dev: DRM device
2347
- * @data: GTT mapping ioctl data
2348
- * @file: GEM object info
2349
- *
2350
- * Simply returns the fake offset to userspace so it can mmap it.
2351
- * The mmap call will end up in drm_gem_mmap(), which will set things
2352
- * up so we can get faults in the handler above.
2353
- *
2354
- * The fault handler will take care of binding the object into the GTT
2355
- * (since it may have been evicted to make room for something), allocating
2356
- * a fence register, and mapping the appropriate aperture address into
2357
- * userspace.
2358
- */
2359
-int
2360
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2361
- struct drm_file *file)
2362
-{
2363
- struct drm_i915_gem_mmap_gtt *args = data;
2364
-
2365
- return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2366
-}
2367
-
2368
-/* Immediately discard the backing storage */
2369
-static void
2370
-i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2371
-{
2372
- i915_gem_object_free_mmap_offset(obj);
2373
-
2374
- if (obj->base.filp == NULL)
2375
- return;
2376
-
2377
- /* Our goal here is to return as much of the memory as
2378
- * is possible back to the system as we are called from OOM.
2379
- * To do this we must instruct the shmfs to drop all of its
2380
- * backing pages, *now*.
2381
- */
2382
- shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2383
- obj->mm.madv = __I915_MADV_PURGED;
2384
- obj->mm.pages = ERR_PTR(-EFAULT);
2385
-}
2386
-
2387
-/* Try to discard unwanted pages */
2388
-void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2389
-{
2390
- struct address_space *mapping;
2391
-
2392
- lockdep_assert_held(&obj->mm.lock);
2393
- GEM_BUG_ON(i915_gem_object_has_pages(obj));
2394
-
2395
- switch (obj->mm.madv) {
2396
- case I915_MADV_DONTNEED:
2397
- i915_gem_object_truncate(obj);
2398
- case __I915_MADV_PURGED:
2399
- return;
936
+ spin_lock(&obj->vma.lock);
937
+ if (!RB_EMPTY_NODE(&vma->obj_node)) {
938
+ rb_erase(&vma->obj_node, &obj->vma.tree);
939
+ RB_CLEAR_NODE(&vma->obj_node);
2400940 }
2401
-
2402
- if (obj->base.filp == NULL)
2403
- return;
2404
-
2405
- mapping = obj->base.filp->f_mapping,
2406
- invalidate_mapping_pages(mapping, 0, (loff_t)-1);
941
+ spin_unlock(&obj->vma.lock);
2407942 }
2408943
2409
-/*
2410
- * Move pages to appropriate lru and release the pagevec, decrementing the
2411
- * ref count of those pages.
2412
- */
2413
-static void check_release_pagevec(struct pagevec *pvec)
2414
-{
2415
- check_move_unevictable_pages(pvec);
2416
- __pagevec_release(pvec);
2417
- cond_resched();
2418
-}
2419
-
2420
-static void
2421
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2422
- struct sg_table *pages)
2423
-{
2424
- struct sgt_iter sgt_iter;
2425
- struct pagevec pvec;
2426
- struct page *page;
2427
-
2428
- __i915_gem_object_release_shmem(obj, pages, true);
2429
-
2430
- i915_gem_gtt_finish_pages(obj, pages);
2431
-
2432
- if (i915_gem_object_needs_bit17_swizzle(obj))
2433
- i915_gem_object_save_bit_17_swizzle(obj, pages);
2434
-
2435
- mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2436
-
2437
- pagevec_init(&pvec);
2438
- for_each_sgt_page(page, sgt_iter, pages) {
2439
- if (obj->mm.dirty)
2440
- set_page_dirty(page);
2441
-
2442
- if (obj->mm.madv == I915_MADV_WILLNEED)
2443
- mark_page_accessed(page);
2444
-
2445
- put_page(page);
2446
- }
2447
- obj->mm.dirty = false;
2448
-
2449
- sg_free_table(pages);
2450
- kfree(pages);
2451
-}
2452
-
2453
-static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2454
-{
2455
- struct radix_tree_iter iter;
2456
- void __rcu **slot;
2457
-
2458
- rcu_read_lock();
2459
- radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2460
- radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2461
- rcu_read_unlock();
2462
-}
2463
-
2464
-struct reg_and_bit {
2465
- i915_reg_t reg;
2466
- u32 bit;
2467
-};
2468
-
2469
-static struct reg_and_bit
2470
-get_reg_and_bit(const struct intel_engine_cs *engine,
2471
- const i915_reg_t *regs, const unsigned int num)
2472
-{
2473
- const unsigned int class = engine->class;
2474
- struct reg_and_bit rb = { .bit = 1 };
2475
-
2476
- if (WARN_ON_ONCE(class >= num || !regs[class].reg))
2477
- return rb;
2478
-
2479
- rb.reg = regs[class];
2480
- if (class == VIDEO_DECODE_CLASS)
2481
- rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
2482
-
2483
- return rb;
2484
-}
2485
-
2486
-static void invalidate_tlbs(struct drm_i915_private *dev_priv)
2487
-{
2488
- static const i915_reg_t gen8_regs[] = {
2489
- [RENDER_CLASS] = GEN8_RTCR,
2490
- [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
2491
- [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
2492
- [COPY_ENGINE_CLASS] = GEN8_BTCR,
2493
- };
2494
- const unsigned int num = ARRAY_SIZE(gen8_regs);
2495
- const i915_reg_t *regs = gen8_regs;
2496
- struct intel_engine_cs *engine;
2497
- enum intel_engine_id id;
2498
-
2499
- if (INTEL_GEN(dev_priv) < 8)
2500
- return;
2501
-
2502
- GEM_TRACE("\n");
2503
-
2504
- assert_rpm_wakelock_held(dev_priv);
2505
-
2506
- mutex_lock(&dev_priv->tlb_invalidate_lock);
2507
- intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
2508
-
2509
- for_each_engine(engine, dev_priv, id) {
2510
- /*
2511
- * HW architecture suggest typical invalidation time at 40us,
2512
- * with pessimistic cases up to 100us and a recommendation to
2513
- * cap at 1ms. We go a bit higher just in case.
2514
- */
2515
- const unsigned int timeout_us = 100;
2516
- const unsigned int timeout_ms = 4;
2517
- struct reg_and_bit rb;
2518
-
2519
- rb = get_reg_and_bit(engine, regs, num);
2520
- if (!i915_mmio_reg_offset(rb.reg))
2521
- continue;
2522
-
2523
- I915_WRITE_FW(rb.reg, rb.bit);
2524
- if (__intel_wait_for_register_fw(dev_priv,
2525
- rb.reg, rb.bit, 0,
2526
- timeout_us, timeout_ms,
2527
- NULL))
2528
- DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n",
2529
- engine->name, timeout_ms);
2530
- }
2531
-
2532
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2533
- mutex_unlock(&dev_priv->tlb_invalidate_lock);
2534
-}
2535
-
2536
-static struct sg_table *
2537
-__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
944
+struct i915_vma *
945
+i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
946
+ struct i915_gem_ww_ctx *ww,
947
+ const struct i915_ggtt_view *view,
948
+ u64 size, u64 alignment, u64 flags)
2538949 {
2539950 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2540
- struct sg_table *pages;
2541
-
2542
- pages = fetch_and_zero(&obj->mm.pages);
2543
- if (!pages)
2544
- return NULL;
2545
-
2546
- spin_lock(&i915->mm.obj_lock);
2547
- list_del(&obj->mm.link);
2548
- spin_unlock(&i915->mm.obj_lock);
2549
-
2550
- if (obj->mm.mapping) {
2551
- void *ptr;
2552
-
2553
- ptr = page_mask_bits(obj->mm.mapping);
2554
- if (is_vmalloc_addr(ptr))
2555
- vunmap(ptr);
2556
- else
2557
- kunmap(kmap_to_page(ptr));
2558
-
2559
- obj->mm.mapping = NULL;
2560
- }
2561
-
2562
- __i915_gem_object_reset_page_iter(obj);
2563
- obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2564
-
2565
- if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
2566
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
2567
-
2568
- if (intel_runtime_pm_get_if_in_use(i915)) {
2569
- invalidate_tlbs(i915);
2570
- intel_runtime_pm_put(i915);
2571
- }
2572
- }
2573
-
2574
- return pages;
2575
-}
2576
-
2577
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2578
- enum i915_mm_subclass subclass)
2579
-{
2580
- struct sg_table *pages;
2581
-
2582
- if (i915_gem_object_has_pinned_pages(obj))
2583
- return;
2584
-
2585
- GEM_BUG_ON(obj->bind_count);
2586
- if (!i915_gem_object_has_pages(obj))
2587
- return;
2588
-
2589
- /* May be called by shrinker from within get_pages() (on another bo) */
2590
- mutex_lock_nested(&obj->mm.lock, subclass);
2591
- if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
2592
- goto unlock;
2593
-
2594
- /*
2595
- * ->put_pages might need to allocate memory for the bit17 swizzle
2596
- * array, hence protect them from being reaped by removing them from gtt
2597
- * lists early.
2598
- */
2599
- pages = __i915_gem_object_unset_pages(obj);
2600
- if (!IS_ERR(pages))
2601
- obj->ops->put_pages(obj, pages);
2602
-
2603
-unlock:
2604
- mutex_unlock(&obj->mm.lock);
2605
-}
2606
-
2607
-static bool i915_sg_trim(struct sg_table *orig_st)
2608
-{
2609
- struct sg_table new_st;
2610
- struct scatterlist *sg, *new_sg;
2611
- unsigned int i;
2612
-
2613
- if (orig_st->nents == orig_st->orig_nents)
2614
- return false;
2615
-
2616
- if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2617
- return false;
2618
-
2619
- new_sg = new_st.sgl;
2620
- for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2621
- sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2622
- /* called before being DMA mapped, no need to copy sg->dma_* */
2623
- new_sg = sg_next(new_sg);
2624
- }
2625
- GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2626
-
2627
- sg_free_table(orig_st);
2628
-
2629
- *orig_st = new_st;
2630
- return true;
2631
-}
2632
-
2633
-static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2634
-{
2635
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2636
- const unsigned long page_count = obj->base.size / PAGE_SIZE;
2637
- unsigned long i;
2638
- struct address_space *mapping;
2639
- struct sg_table *st;
2640
- struct scatterlist *sg;
2641
- struct sgt_iter sgt_iter;
2642
- struct page *page;
2643
- unsigned long last_pfn = 0; /* suppress gcc warning */
2644
- unsigned int max_segment = i915_sg_segment_size();
2645
- unsigned int sg_page_sizes;
2646
- struct pagevec pvec;
2647
- gfp_t noreclaim;
2648
- int ret;
2649
-
2650
- /* Assert that the object is not currently in any GPU domain. As it
2651
- * wasn't in the GTT, there shouldn't be any way it could have been in
2652
- * a GPU cache
2653
- */
2654
- GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2655
- GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2656
-
2657
- st = kmalloc(sizeof(*st), GFP_KERNEL);
2658
- if (st == NULL)
2659
- return -ENOMEM;
2660
-
2661
-rebuild_st:
2662
- if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2663
- kfree(st);
2664
- return -ENOMEM;
2665
- }
2666
-
2667
- /* Get the list of pages out of our struct file. They'll be pinned
2668
- * at this point until we release them.
2669
- *
2670
- * Fail silently without starting the shrinker
2671
- */
2672
- mapping = obj->base.filp->f_mapping;
2673
- mapping_set_unevictable(mapping);
2674
- noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2675
- noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2676
-
2677
- sg = st->sgl;
2678
- st->nents = 0;
2679
- sg_page_sizes = 0;
2680
- for (i = 0; i < page_count; i++) {
2681
- const unsigned int shrink[] = {
2682
- I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2683
- 0,
2684
- }, *s = shrink;
2685
- gfp_t gfp = noreclaim;
2686
-
2687
- do {
2688
- page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2689
- if (likely(!IS_ERR(page)))
2690
- break;
2691
-
2692
- if (!*s) {
2693
- ret = PTR_ERR(page);
2694
- goto err_sg;
2695
- }
2696
-
2697
- i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2698
- cond_resched();
2699
-
2700
- /* We've tried hard to allocate the memory by reaping
2701
- * our own buffer, now let the real VM do its job and
2702
- * go down in flames if truly OOM.
2703
- *
2704
- * However, since graphics tend to be disposable,
2705
- * defer the oom here by reporting the ENOMEM back
2706
- * to userspace.
2707
- */
2708
- if (!*s) {
2709
- /* reclaim and warn, but no oom */
2710
- gfp = mapping_gfp_mask(mapping);
2711
-
2712
- /* Our bo are always dirty and so we require
2713
- * kswapd to reclaim our pages (direct reclaim
2714
- * does not effectively begin pageout of our
2715
- * buffers on its own). However, direct reclaim
2716
- * only waits for kswapd when under allocation
2717
- * congestion. So as a result __GFP_RECLAIM is
2718
- * unreliable and fails to actually reclaim our
2719
- * dirty pages -- unless you try over and over
2720
- * again with !__GFP_NORETRY. However, we still
2721
- * want to fail this allocation rather than
2722
- * trigger the out-of-memory killer and for
2723
- * this we want __GFP_RETRY_MAYFAIL.
2724
- */
2725
- gfp |= __GFP_RETRY_MAYFAIL;
2726
- }
2727
- } while (1);
2728
-
2729
- if (!i ||
2730
- sg->length >= max_segment ||
2731
- page_to_pfn(page) != last_pfn + 1) {
2732
- if (i) {
2733
- sg_page_sizes |= sg->length;
2734
- sg = sg_next(sg);
2735
- }
2736
- st->nents++;
2737
- sg_set_page(sg, page, PAGE_SIZE, 0);
2738
- } else {
2739
- sg->length += PAGE_SIZE;
2740
- }
2741
- last_pfn = page_to_pfn(page);
2742
-
2743
- /* Check that the i965g/gm workaround works. */
2744
- WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2745
- }
2746
- if (sg) { /* loop terminated early; short sg table */
2747
- sg_page_sizes |= sg->length;
2748
- sg_mark_end(sg);
2749
- }
2750
-
2751
- /* Trim unused sg entries to avoid wasting memory. */
2752
- i915_sg_trim(st);
2753
-
2754
- ret = i915_gem_gtt_prepare_pages(obj, st);
2755
- if (ret) {
2756
- /* DMA remapping failed? One possible cause is that
2757
- * it could not reserve enough large entries, asking
2758
- * for PAGE_SIZE chunks instead may be helpful.
2759
- */
2760
- if (max_segment > PAGE_SIZE) {
2761
- for_each_sgt_page(page, sgt_iter, st)
2762
- put_page(page);
2763
- sg_free_table(st);
2764
-
2765
- max_segment = PAGE_SIZE;
2766
- goto rebuild_st;
2767
- } else {
2768
- dev_warn(&dev_priv->drm.pdev->dev,
2769
- "Failed to DMA remap %lu pages\n",
2770
- page_count);
2771
- goto err_pages;
2772
- }
2773
- }
2774
-
2775
- if (i915_gem_object_needs_bit17_swizzle(obj))
2776
- i915_gem_object_do_bit_17_swizzle(obj, st);
2777
-
2778
- __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2779
-
2780
- return 0;
2781
-
2782
-err_sg:
2783
- sg_mark_end(sg);
2784
-err_pages:
2785
- mapping_clear_unevictable(mapping);
2786
- pagevec_init(&pvec);
2787
- for_each_sgt_page(page, sgt_iter, st) {
2788
- if (!pagevec_add(&pvec, page))
2789
- check_release_pagevec(&pvec);
2790
- }
2791
- if (pagevec_count(&pvec))
2792
- check_release_pagevec(&pvec);
2793
- sg_free_table(st);
2794
- kfree(st);
2795
-
2796
- /* shmemfs first checks if there is enough memory to allocate the page
2797
- * and reports ENOSPC should there be insufficient, along with the usual
2798
- * ENOMEM for a genuine allocation failure.
2799
- *
2800
- * We use ENOSPC in our driver to mean that we have run out of aperture
2801
- * space and so want to translate the error from shmemfs back to our
2802
- * usual understanding of ENOMEM.
2803
- */
2804
- if (ret == -ENOSPC)
2805
- ret = -ENOMEM;
2806
-
2807
- return ret;
2808
-}
2809
-
2810
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2811
- struct sg_table *pages,
2812
- unsigned int sg_page_sizes)
2813
-{
2814
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
2815
- unsigned long supported = INTEL_INFO(i915)->page_sizes;
2816
- int i;
2817
-
2818
- lockdep_assert_held(&obj->mm.lock);
2819
-
2820
- obj->mm.get_page.sg_pos = pages->sgl;
2821
- obj->mm.get_page.sg_idx = 0;
2822
-
2823
- obj->mm.pages = pages;
2824
-
2825
- if (i915_gem_object_is_tiled(obj) &&
2826
- i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2827
- GEM_BUG_ON(obj->mm.quirked);
2828
- __i915_gem_object_pin_pages(obj);
2829
- obj->mm.quirked = true;
2830
- }
2831
-
2832
- GEM_BUG_ON(!sg_page_sizes);
2833
- obj->mm.page_sizes.phys = sg_page_sizes;
2834
-
2835
- /*
2836
- * Calculate the supported page-sizes which fit into the given
2837
- * sg_page_sizes. This will give us the page-sizes which we may be able
2838
- * to use opportunistically when later inserting into the GTT. For
2839
- * example if phys=2G, then in theory we should be able to use 1G, 2M,
2840
- * 64K or 4K pages, although in practice this will depend on a number of
2841
- * other factors.
2842
- */
2843
- obj->mm.page_sizes.sg = 0;
2844
- for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2845
- if (obj->mm.page_sizes.phys & ~0u << i)
2846
- obj->mm.page_sizes.sg |= BIT(i);
2847
- }
2848
- GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2849
-
2850
- spin_lock(&i915->mm.obj_lock);
2851
- list_add(&obj->mm.link, &i915->mm.unbound_list);
2852
- spin_unlock(&i915->mm.obj_lock);
2853
-}
2854
-
2855
-static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2856
-{
2857
- int err;
2858
-
2859
- if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2860
- DRM_DEBUG("Attempting to obtain a purgeable object\n");
2861
- return -EFAULT;
2862
- }
2863
-
2864
- err = obj->ops->get_pages(obj);
2865
- GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2866
-
2867
- return err;
2868
-}
2869
-
2870
-/* Ensure that the associated pages are gathered from the backing storage
2871
- * and pinned into our object. i915_gem_object_pin_pages() may be called
2872
- * multiple times before they are released by a single call to
2873
- * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2874
- * either as a result of memory pressure (reaping pages under the shrinker)
2875
- * or as the object is itself released.
2876
- */
2877
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2878
-{
2879
- int err;
2880
-
2881
- err = mutex_lock_interruptible(&obj->mm.lock);
2882
- if (err)
2883
- return err;
2884
-
2885
- if (unlikely(!i915_gem_object_has_pages(obj))) {
2886
- GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2887
-
2888
- err = ____i915_gem_object_get_pages(obj);
2889
- if (err)
2890
- goto unlock;
2891
-
2892
- smp_mb__before_atomic();
2893
- }
2894
- atomic_inc(&obj->mm.pages_pin_count);
2895
-
2896
-unlock:
2897
- mutex_unlock(&obj->mm.lock);
2898
- return err;
2899
-}
2900
-
2901
-/* The 'mapping' part of i915_gem_object_pin_map() below */
2902
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2903
- enum i915_map_type type)
2904
-{
2905
- unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2906
- struct sg_table *sgt = obj->mm.pages;
2907
- struct sgt_iter sgt_iter;
2908
- struct page *page;
2909
- struct page *stack_pages[32];
2910
- struct page **pages = stack_pages;
2911
- unsigned long i = 0;
2912
- pgprot_t pgprot;
2913
- void *addr;
2914
-
2915
- /* A single page can always be kmapped */
2916
- if (n_pages == 1 && type == I915_MAP_WB)
2917
- return kmap(sg_page(sgt->sgl));
2918
-
2919
- if (n_pages > ARRAY_SIZE(stack_pages)) {
2920
- /* Too big for stack -- allocate temporary array instead */
2921
- pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2922
- if (!pages)
2923
- return NULL;
2924
- }
2925
-
2926
- for_each_sgt_page(page, sgt_iter, sgt)
2927
- pages[i++] = page;
2928
-
2929
- /* Check that we have the expected number of pages */
2930
- GEM_BUG_ON(i != n_pages);
2931
-
2932
- switch (type) {
2933
- default:
2934
- MISSING_CASE(type);
2935
- /* fallthrough to use PAGE_KERNEL anyway */
2936
- case I915_MAP_WB:
2937
- pgprot = PAGE_KERNEL;
2938
- break;
2939
- case I915_MAP_WC:
2940
- pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2941
- break;
2942
- }
2943
- addr = vmap(pages, n_pages, 0, pgprot);
2944
-
2945
- if (pages != stack_pages)
2946
- kvfree(pages);
2947
-
2948
- return addr;
2949
-}
2950
-
2951
-/* get, pin, and map the pages of the object into kernel space */
2952
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2953
- enum i915_map_type type)
2954
-{
2955
- enum i915_map_type has_type;
2956
- bool pinned;
2957
- void *ptr;
2958
- int ret;
2959
-
2960
- if (unlikely(!i915_gem_object_has_struct_page(obj)))
2961
- return ERR_PTR(-ENXIO);
2962
-
2963
- ret = mutex_lock_interruptible(&obj->mm.lock);
2964
- if (ret)
2965
- return ERR_PTR(ret);
2966
-
2967
- pinned = !(type & I915_MAP_OVERRIDE);
2968
- type &= ~I915_MAP_OVERRIDE;
2969
-
2970
- if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2971
- if (unlikely(!i915_gem_object_has_pages(obj))) {
2972
- GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2973
-
2974
- ret = ____i915_gem_object_get_pages(obj);
2975
- if (ret)
2976
- goto err_unlock;
2977
-
2978
- smp_mb__before_atomic();
2979
- }
2980
- atomic_inc(&obj->mm.pages_pin_count);
2981
- pinned = false;
2982
- }
2983
- GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2984
-
2985
- ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2986
- if (ptr && has_type != type) {
2987
- if (pinned) {
2988
- ret = -EBUSY;
2989
- goto err_unpin;
2990
- }
2991
-
2992
- if (is_vmalloc_addr(ptr))
2993
- vunmap(ptr);
2994
- else
2995
- kunmap(kmap_to_page(ptr));
2996
-
2997
- ptr = obj->mm.mapping = NULL;
2998
- }
2999
-
3000
- if (!ptr) {
3001
- ptr = i915_gem_object_map(obj, type);
3002
- if (!ptr) {
3003
- ret = -ENOMEM;
3004
- goto err_unpin;
3005
- }
3006
-
3007
- obj->mm.mapping = page_pack_bits(ptr, type);
3008
- }
3009
-
3010
-out_unlock:
3011
- mutex_unlock(&obj->mm.lock);
3012
- return ptr;
3013
-
3014
-err_unpin:
3015
- atomic_dec(&obj->mm.pages_pin_count);
3016
-err_unlock:
3017
- ptr = ERR_PTR(ret);
3018
- goto out_unlock;
3019
-}
3020
-
3021
-static int
3022
-i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
3023
- const struct drm_i915_gem_pwrite *arg)
3024
-{
3025
- struct address_space *mapping = obj->base.filp->f_mapping;
3026
- char __user *user_data = u64_to_user_ptr(arg->data_ptr);
3027
- u64 remain, offset;
3028
- unsigned int pg;
3029
-
3030
- /* Before we instantiate/pin the backing store for our use, we
3031
- * can prepopulate the shmemfs filp efficiently using a write into
3032
- * the pagecache. We avoid the penalty of instantiating all the
3033
- * pages, important if the user is just writing to a few and never
3034
- * uses the object on the GPU, and using a direct write into shmemfs
3035
- * allows it to avoid the cost of retrieving a page (either swapin
3036
- * or clearing-before-use) before it is overwritten.
3037
- */
3038
- if (i915_gem_object_has_pages(obj))
3039
- return -ENODEV;
3040
-
3041
- if (obj->mm.madv != I915_MADV_WILLNEED)
3042
- return -EFAULT;
3043
-
3044
- /* Before the pages are instantiated the object is treated as being
3045
- * in the CPU domain. The pages will be clflushed as required before
3046
- * use, and we can freely write into the pages directly. If userspace
3047
- * races pwrite with any other operation; corruption will ensue -
3048
- * that is userspace's prerogative!
3049
- */
3050
-
3051
- remain = arg->size;
3052
- offset = arg->offset;
3053
- pg = offset_in_page(offset);
3054
-
3055
- do {
3056
- unsigned int len, unwritten;
3057
- struct page *page;
3058
- void *data, *vaddr;
3059
- int err;
3060
-
3061
- len = PAGE_SIZE - pg;
3062
- if (len > remain)
3063
- len = remain;
3064
-
3065
- err = pagecache_write_begin(obj->base.filp, mapping,
3066
- offset, len, 0,
3067
- &page, &data);
3068
- if (err < 0)
3069
- return err;
3070
-
3071
- vaddr = kmap(page);
3072
- unwritten = copy_from_user(vaddr + pg, user_data, len);
3073
- kunmap(page);
3074
-
3075
- err = pagecache_write_end(obj->base.filp, mapping,
3076
- offset, len, len - unwritten,
3077
- page, data);
3078
- if (err < 0)
3079
- return err;
3080
-
3081
- if (unwritten)
3082
- return -EFAULT;
3083
-
3084
- remain -= len;
3085
- user_data += len;
3086
- offset += len;
3087
- pg = 0;
3088
- } while (remain);
3089
-
3090
- return 0;
3091
-}
3092
-
3093
-static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
3094
- const struct i915_gem_context *ctx)
3095
-{
3096
- unsigned int score;
3097
- unsigned long prev_hang;
3098
-
3099
- if (i915_gem_context_is_banned(ctx))
3100
- score = I915_CLIENT_SCORE_CONTEXT_BAN;
3101
- else
3102
- score = 0;
3103
-
3104
- prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
3105
- if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
3106
- score += I915_CLIENT_SCORE_HANG_FAST;
3107
-
3108
- if (score) {
3109
- atomic_add(score, &file_priv->ban_score);
3110
-
3111
- DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
3112
- ctx->name, score,
3113
- atomic_read(&file_priv->ban_score));
3114
- }
3115
-}
3116
-
3117
-static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
3118
-{
3119
- unsigned int score;
3120
- bool banned, bannable;
3121
-
3122
- atomic_inc(&ctx->guilty_count);
3123
-
3124
- bannable = i915_gem_context_is_bannable(ctx);
3125
- score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
3126
- banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
3127
-
3128
- /* Cool contexts don't accumulate client ban score */
3129
- if (!bannable)
3130
- return;
3131
-
3132
- if (banned) {
3133
- DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
3134
- ctx->name, atomic_read(&ctx->guilty_count),
3135
- score);
3136
- i915_gem_context_set_banned(ctx);
3137
- }
3138
-
3139
- if (!IS_ERR_OR_NULL(ctx->file_priv))
3140
- i915_gem_client_mark_guilty(ctx->file_priv, ctx);
3141
-}
3142
-
3143
-static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
3144
-{
3145
- atomic_inc(&ctx->active_count);
3146
-}
3147
-
3148
-struct i915_request *
3149
-i915_gem_find_active_request(struct intel_engine_cs *engine)
3150
-{
3151
- struct i915_request *request, *active = NULL;
3152
- unsigned long flags;
3153
-
3154
- /*
3155
- * We are called by the error capture, reset and to dump engine
3156
- * state at random points in time. In particular, note that neither is
3157
- * crucially ordered with an interrupt. After a hang, the GPU is dead
3158
- * and we assume that no more writes can happen (we waited long enough
3159
- * for all writes that were in transaction to be flushed) - adding an
3160
- * extra delay for a recent interrupt is pointless. Hence, we do
3161
- * not need an engine->irq_seqno_barrier() before the seqno reads.
3162
- * At all other times, we must assume the GPU is still running, but
3163
- * we only care about the snapshot of this moment.
3164
- */
3165
- spin_lock_irqsave(&engine->timeline.lock, flags);
3166
- list_for_each_entry(request, &engine->timeline.requests, link) {
3167
- if (__i915_request_completed(request, request->global_seqno))
3168
- continue;
3169
-
3170
- active = request;
3171
- break;
3172
- }
3173
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
3174
-
3175
- return active;
3176
-}
3177
-
3178
-/*
3179
- * Ensure irq handler finishes, and not run again.
3180
- * Also return the active request so that we only search for it once.
3181
- */
3182
-struct i915_request *
3183
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
3184
-{
3185
- struct i915_request *request;
3186
-
3187
- /*
3188
- * During the reset sequence, we must prevent the engine from
3189
- * entering RC6. As the context state is undefined until we restart
3190
- * the engine, if it does enter RC6 during the reset, the state
3191
- * written to the powercontext is undefined and so we may lose
3192
- * GPU state upon resume, i.e. fail to restart after a reset.
3193
- */
3194
- intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
3195
-
3196
- request = engine->reset.prepare(engine);
3197
- if (request && request->fence.error == -EIO)
3198
- request = ERR_PTR(-EIO); /* Previous reset failed! */
3199
-
3200
- return request;
3201
-}
3202
-
3203
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
3204
-{
3205
- struct intel_engine_cs *engine;
3206
- struct i915_request *request;
3207
- enum intel_engine_id id;
3208
- int err = 0;
3209
-
3210
- for_each_engine(engine, dev_priv, id) {
3211
- request = i915_gem_reset_prepare_engine(engine);
3212
- if (IS_ERR(request)) {
3213
- err = PTR_ERR(request);
3214
- continue;
3215
- }
3216
-
3217
- engine->hangcheck.active_request = request;
3218
- }
3219
-
3220
- i915_gem_revoke_fences(dev_priv);
3221
- intel_uc_sanitize(dev_priv);
3222
-
3223
- return err;
3224
-}
3225
-
3226
-static void engine_skip_context(struct i915_request *request)
3227
-{
3228
- struct intel_engine_cs *engine = request->engine;
3229
- struct i915_gem_context *hung_ctx = request->gem_context;
3230
- struct i915_timeline *timeline = request->timeline;
3231
- unsigned long flags;
3232
-
3233
- GEM_BUG_ON(timeline == &engine->timeline);
3234
-
3235
- spin_lock_irqsave(&engine->timeline.lock, flags);
3236
- spin_lock(&timeline->lock);
3237
-
3238
- list_for_each_entry_continue(request, &engine->timeline.requests, link)
3239
- if (request->gem_context == hung_ctx)
3240
- i915_request_skip(request, -EIO);
3241
-
3242
- list_for_each_entry(request, &timeline->requests, link)
3243
- i915_request_skip(request, -EIO);
3244
-
3245
- spin_unlock(&timeline->lock);
3246
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
3247
-}
3248
-
3249
-/* Returns the request if it was guilty of the hang */
3250
-static struct i915_request *
3251
-i915_gem_reset_request(struct intel_engine_cs *engine,
3252
- struct i915_request *request,
3253
- bool stalled)
3254
-{
3255
- /* The guilty request will get skipped on a hung engine.
3256
- *
3257
- * Users of client default contexts do not rely on logical
3258
- * state preserved between batches so it is safe to execute
3259
- * queued requests following the hang. Non default contexts
3260
- * rely on preserved state, so skipping a batch loses the
3261
- * evolution of the state and it needs to be considered corrupted.
3262
- * Executing more queued batches on top of corrupted state is
3263
- * risky. But we take the risk by trying to advance through
3264
- * the queued requests in order to make the client behaviour
3265
- * more predictable around resets, by not throwing away random
3266
- * amount of batches it has prepared for execution. Sophisticated
3267
- * clients can use gem_reset_stats_ioctl and dma fence status
3268
- * (exported via sync_file info ioctl on explicit fences) to observe
3269
- * when it loses the context state and should rebuild accordingly.
3270
- *
3271
- * The context ban, and ultimately the client ban, mechanism are safety
3272
- * valves if client submission ends up resulting in nothing more than
3273
- * subsequent hangs.
3274
- */
3275
-
3276
- if (i915_request_completed(request)) {
3277
- GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
3278
- engine->name, request->global_seqno,
3279
- request->fence.context, request->fence.seqno,
3280
- intel_engine_get_seqno(engine));
3281
- stalled = false;
3282
- }
3283
-
3284
- if (stalled) {
3285
- i915_gem_context_mark_guilty(request->gem_context);
3286
- i915_request_skip(request, -EIO);
3287
-
3288
- /* If this context is now banned, skip all pending requests. */
3289
- if (i915_gem_context_is_banned(request->gem_context))
3290
- engine_skip_context(request);
3291
- } else {
3292
- /*
3293
- * Since this is not the hung engine, it may have advanced
3294
- * since the hang declaration. Double check by refinding
3295
- * the active request at the time of the reset.
3296
- */
3297
- request = i915_gem_find_active_request(engine);
3298
- if (request) {
3299
- unsigned long flags;
3300
-
3301
- i915_gem_context_mark_innocent(request->gem_context);
3302
- dma_fence_set_error(&request->fence, -EAGAIN);
3303
-
3304
- /* Rewind the engine to replay the incomplete rq */
3305
- spin_lock_irqsave(&engine->timeline.lock, flags);
3306
- request = list_prev_entry(request, link);
3307
- if (&request->link == &engine->timeline.requests)
3308
- request = NULL;
3309
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
3310
- }
3311
- }
3312
-
3313
- return request;
3314
-}
3315
-
3316
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
3317
- struct i915_request *request,
3318
- bool stalled)
3319
-{
3320
- /*
3321
- * Make sure this write is visible before we re-enable the interrupt
3322
- * handlers on another CPU, as tasklet_enable() resolves to just
3323
- * a compiler barrier which is insufficient for our purpose here.
3324
- */
3325
- smp_store_mb(engine->irq_posted, 0);
3326
-
3327
- if (request)
3328
- request = i915_gem_reset_request(engine, request, stalled);
3329
-
3330
- /* Setup the CS to resume from the breadcrumb of the hung request */
3331
- engine->reset.reset(engine, request);
3332
-}
3333
-
3334
-void i915_gem_reset(struct drm_i915_private *dev_priv,
3335
- unsigned int stalled_mask)
3336
-{
3337
- struct intel_engine_cs *engine;
3338
- enum intel_engine_id id;
3339
-
3340
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
3341
-
3342
- i915_retire_requests(dev_priv);
3343
-
3344
- for_each_engine(engine, dev_priv, id) {
3345
- struct intel_context *ce;
3346
-
3347
- i915_gem_reset_engine(engine,
3348
- engine->hangcheck.active_request,
3349
- stalled_mask & ENGINE_MASK(id));
3350
- ce = fetch_and_zero(&engine->last_retired_context);
3351
- if (ce)
3352
- intel_context_unpin(ce);
3353
-
3354
- /*
3355
- * Ostensibily, we always want a context loaded for powersaving,
3356
- * so if the engine is idle after the reset, send a request
3357
- * to load our scratch kernel_context.
3358
- *
3359
- * More mysteriously, if we leave the engine idle after a reset,
3360
- * the next userspace batch may hang, with what appears to be
3361
- * an incoherent read by the CS (presumably stale TLB). An
3362
- * empty request appears sufficient to paper over the glitch.
3363
- */
3364
- if (intel_engine_is_idle(engine)) {
3365
- struct i915_request *rq;
3366
-
3367
- rq = i915_request_alloc(engine,
3368
- dev_priv->kernel_context);
3369
- if (!IS_ERR(rq))
3370
- i915_request_add(rq);
3371
- }
3372
- }
3373
-
3374
- i915_gem_restore_fences(dev_priv);
3375
-}
3376
-
3377
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
3378
-{
3379
- engine->reset.finish(engine);
3380
-
3381
- intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
3382
-}
3383
-
3384
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
3385
-{
3386
- struct intel_engine_cs *engine;
3387
- enum intel_engine_id id;
3388
-
3389
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
3390
-
3391
- for_each_engine(engine, dev_priv, id) {
3392
- engine->hangcheck.active_request = NULL;
3393
- i915_gem_reset_finish_engine(engine);
3394
- }
3395
-}
3396
-
3397
-static void nop_submit_request(struct i915_request *request)
3398
-{
3399
- GEM_TRACE("%s fence %llx:%d -> -EIO\n",
3400
- request->engine->name,
3401
- request->fence.context, request->fence.seqno);
3402
- dma_fence_set_error(&request->fence, -EIO);
3403
-
3404
- i915_request_submit(request);
3405
-}
3406
-
3407
-static void nop_complete_submit_request(struct i915_request *request)
3408
-{
3409
- unsigned long flags;
3410
-
3411
- GEM_TRACE("%s fence %llx:%d -> -EIO\n",
3412
- request->engine->name,
3413
- request->fence.context, request->fence.seqno);
3414
- dma_fence_set_error(&request->fence, -EIO);
3415
-
3416
- spin_lock_irqsave(&request->engine->timeline.lock, flags);
3417
- __i915_request_submit(request);
3418
- intel_engine_init_global_seqno(request->engine, request->global_seqno);
3419
- spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
3420
-}
3421
-
3422
-void i915_gem_set_wedged(struct drm_i915_private *i915)
3423
-{
3424
- struct intel_engine_cs *engine;
3425
- enum intel_engine_id id;
3426
-
3427
- GEM_TRACE("start\n");
3428
-
3429
- if (GEM_SHOW_DEBUG()) {
3430
- struct drm_printer p = drm_debug_printer(__func__);
3431
-
3432
- for_each_engine(engine, i915, id)
3433
- intel_engine_dump(engine, &p, "%s\n", engine->name);
3434
- }
3435
-
3436
- set_bit(I915_WEDGED, &i915->gpu_error.flags);
3437
- smp_mb__after_atomic();
3438
-
3439
- /*
3440
- * First, stop submission to hw, but do not yet complete requests by
3441
- * rolling the global seqno forward (since this would complete requests
3442
- * for which we haven't set the fence error to EIO yet).
3443
- */
3444
- for_each_engine(engine, i915, id) {
3445
- i915_gem_reset_prepare_engine(engine);
3446
-
3447
- engine->submit_request = nop_submit_request;
3448
- engine->schedule = NULL;
3449
- }
3450
- i915->caps.scheduler = 0;
3451
-
3452
- /* Even if the GPU reset fails, it should still stop the engines */
3453
- intel_gpu_reset(i915, ALL_ENGINES);
3454
-
3455
- /*
3456
- * Make sure no one is running the old callback before we proceed with
3457
- * cancelling requests and resetting the completion tracking. Otherwise
3458
- * we might submit a request to the hardware which never completes.
3459
- */
3460
- synchronize_rcu();
3461
-
3462
- for_each_engine(engine, i915, id) {
3463
- /* Mark all executing requests as skipped */
3464
- engine->cancel_requests(engine);
3465
-
3466
- /*
3467
- * Only once we've force-cancelled all in-flight requests can we
3468
- * start to complete all requests.
3469
- */
3470
- engine->submit_request = nop_complete_submit_request;
3471
- }
3472
-
3473
- /*
3474
- * Make sure no request can slip through without getting completed by
3475
- * either this call here to intel_engine_init_global_seqno, or the one
3476
- * in nop_complete_submit_request.
3477
- */
3478
- synchronize_rcu();
3479
-
3480
- for_each_engine(engine, i915, id) {
3481
- unsigned long flags;
3482
-
3483
- /*
3484
- * Mark all pending requests as complete so that any concurrent
3485
- * (lockless) lookup doesn't try and wait upon the request as we
3486
- * reset it.
3487
- */
3488
- spin_lock_irqsave(&engine->timeline.lock, flags);
3489
- intel_engine_init_global_seqno(engine,
3490
- intel_engine_last_submit(engine));
3491
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
3492
-
3493
- i915_gem_reset_finish_engine(engine);
3494
- }
3495
-
3496
- GEM_TRACE("end\n");
3497
-
3498
- wake_up_all(&i915->gpu_error.reset_queue);
3499
-}
3500
-
3501
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
3502
-{
3503
- struct i915_timeline *tl;
3504
-
3505
- lockdep_assert_held(&i915->drm.struct_mutex);
3506
- if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
3507
- return true;
3508
-
3509
- GEM_TRACE("start\n");
3510
-
3511
- /*
3512
- * Before unwedging, make sure that all pending operations
3513
- * are flushed and errored out - we may have requests waiting upon
3514
- * third party fences. We marked all inflight requests as EIO, and
3515
- * every execbuf since returned EIO, for consistency we want all
3516
- * the currently pending requests to also be marked as EIO, which
3517
- * is done inside our nop_submit_request - and so we must wait.
3518
- *
3519
- * No more can be submitted until we reset the wedged bit.
3520
- */
3521
- list_for_each_entry(tl, &i915->gt.timelines, link) {
3522
- struct i915_request *rq;
3523
-
3524
- rq = i915_gem_active_peek(&tl->last_request,
3525
- &i915->drm.struct_mutex);
3526
- if (!rq)
3527
- continue;
3528
-
3529
- /*
3530
- * We can't use our normal waiter as we want to
3531
- * avoid recursively trying to handle the current
3532
- * reset. The basic dma_fence_default_wait() installs
3533
- * a callback for dma_fence_signal(), which is
3534
- * triggered by our nop handler (indirectly, the
3535
- * callback enables the signaler thread which is
3536
- * woken by the nop_submit_request() advancing the seqno
3537
- * and when the seqno passes the fence, the signaler
3538
- * then signals the fence waking us up).
3539
- */
3540
- if (dma_fence_default_wait(&rq->fence, true,
3541
- MAX_SCHEDULE_TIMEOUT) < 0)
3542
- return false;
3543
- }
3544
- i915_retire_requests(i915);
3545
- GEM_BUG_ON(i915->gt.active_requests);
3546
-
3547
- /*
3548
- * Undo nop_submit_request. We prevent all new i915 requests from
3549
- * being queued (by disallowing execbuf whilst wedged) so having
3550
- * waited for all active requests above, we know the system is idle
3551
- * and do not have to worry about a thread being inside
3552
- * engine->submit_request() as we swap over. So unlike installing
3553
- * the nop_submit_request on reset, we can do this from normal
3554
- * context and do not require stop_machine().
3555
- */
3556
- intel_engines_reset_default_submission(i915);
3557
- i915_gem_contexts_lost(i915);
3558
-
3559
- GEM_TRACE("end\n");
3560
-
3561
- smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
3562
- clear_bit(I915_WEDGED, &i915->gpu_error.flags);
3563
-
3564
- return true;
3565
-}
3566
-
3567
-static void
3568
-i915_gem_retire_work_handler(struct work_struct *work)
3569
-{
3570
- struct drm_i915_private *dev_priv =
3571
- container_of(work, typeof(*dev_priv), gt.retire_work.work);
3572
- struct drm_device *dev = &dev_priv->drm;
3573
-
3574
- /* Come back later if the device is busy... */
3575
- if (mutex_trylock(&dev->struct_mutex)) {
3576
- i915_retire_requests(dev_priv);
3577
- mutex_unlock(&dev->struct_mutex);
3578
- }
3579
-
3580
- /*
3581
- * Keep the retire handler running until we are finally idle.
3582
- * We do not need to do this test under locking as in the worst-case
3583
- * we queue the retire worker once too often.
3584
- */
3585
- if (READ_ONCE(dev_priv->gt.awake))
3586
- queue_delayed_work(dev_priv->wq,
3587
- &dev_priv->gt.retire_work,
3588
- round_jiffies_up_relative(HZ));
3589
-}
3590
-
3591
-static void shrink_caches(struct drm_i915_private *i915)
3592
-{
3593
- /*
3594
- * kmem_cache_shrink() discards empty slabs and reorders partially
3595
- * filled slabs to prioritise allocating from the mostly full slabs,
3596
- * with the aim of reducing fragmentation.
3597
- */
3598
- kmem_cache_shrink(i915->priorities);
3599
- kmem_cache_shrink(i915->dependencies);
3600
- kmem_cache_shrink(i915->requests);
3601
- kmem_cache_shrink(i915->luts);
3602
- kmem_cache_shrink(i915->vmas);
3603
- kmem_cache_shrink(i915->objects);
3604
-}
3605
-
3606
-struct sleep_rcu_work {
3607
- union {
3608
- struct rcu_head rcu;
3609
- struct work_struct work;
3610
- };
3611
- struct drm_i915_private *i915;
3612
- unsigned int epoch;
3613
-};
3614
-
3615
-static inline bool
3616
-same_epoch(struct drm_i915_private *i915, unsigned int epoch)
3617
-{
3618
- /*
3619
- * There is a small chance that the epoch wrapped since we started
3620
- * sleeping. If we assume that epoch is at least a u32, then it will
3621
- * take at least 2^32 * 100ms for it to wrap, or about 326 years.
3622
- */
3623
- return epoch == READ_ONCE(i915->gt.epoch);
3624
-}
3625
-
3626
-static void __sleep_work(struct work_struct *work)
3627
-{
3628
- struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
3629
- struct drm_i915_private *i915 = s->i915;
3630
- unsigned int epoch = s->epoch;
3631
-
3632
- kfree(s);
3633
- if (same_epoch(i915, epoch))
3634
- shrink_caches(i915);
3635
-}
3636
-
3637
-static void __sleep_rcu(struct rcu_head *rcu)
3638
-{
3639
- struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
3640
- struct drm_i915_private *i915 = s->i915;
3641
-
3642
- if (same_epoch(i915, s->epoch)) {
3643
- INIT_WORK(&s->work, __sleep_work);
3644
- queue_work(i915->wq, &s->work);
3645
- } else {
3646
- kfree(s);
3647
- }
3648
-}
3649
-
3650
-static inline bool
3651
-new_requests_since_last_retire(const struct drm_i915_private *i915)
3652
-{
3653
- return (READ_ONCE(i915->gt.active_requests) ||
3654
- work_pending(&i915->gt.idle_work.work));
3655
-}
3656
-
3657
-static void assert_kernel_context_is_current(struct drm_i915_private *i915)
3658
-{
3659
- struct intel_engine_cs *engine;
3660
- enum intel_engine_id id;
3661
-
3662
- if (i915_terminally_wedged(&i915->gpu_error))
3663
- return;
3664
-
3665
- GEM_BUG_ON(i915->gt.active_requests);
3666
- for_each_engine(engine, i915, id) {
3667
- GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
3668
- GEM_BUG_ON(engine->last_retired_context !=
3669
- to_intel_context(i915->kernel_context, engine));
3670
- }
3671
-}
3672
-
3673
-static void
3674
-i915_gem_idle_work_handler(struct work_struct *work)
3675
-{
3676
- struct drm_i915_private *dev_priv =
3677
- container_of(work, typeof(*dev_priv), gt.idle_work.work);
3678
- unsigned int epoch = I915_EPOCH_INVALID;
3679
- bool rearm_hangcheck;
3680
-
3681
- if (!READ_ONCE(dev_priv->gt.awake))
3682
- return;
3683
-
3684
- if (READ_ONCE(dev_priv->gt.active_requests))
3685
- return;
3686
-
3687
- /*
3688
- * Flush out the last user context, leaving only the pinned
3689
- * kernel context resident. When we are idling on the kernel_context,
3690
- * no more new requests (with a context switch) are emitted and we
3691
- * can finally rest. A consequence is that the idle work handler is
3692
- * always called at least twice before idling (and if the system is
3693
- * idle that implies a round trip through the retire worker).
3694
- */
3695
- mutex_lock(&dev_priv->drm.struct_mutex);
3696
- i915_gem_switch_to_kernel_context(dev_priv);
3697
- mutex_unlock(&dev_priv->drm.struct_mutex);
3698
-
3699
- GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
3700
- READ_ONCE(dev_priv->gt.active_requests));
3701
-
3702
- /*
3703
- * Wait for last execlists context complete, but bail out in case a
3704
- * new request is submitted. As we don't trust the hardware, we
3705
- * continue on if the wait times out. This is necessary to allow
3706
- * the machine to suspend even if the hardware dies, and we will
3707
- * try to recover in resume (after depriving the hardware of power,
3708
- * it may be in a better mmod).
3709
- */
3710
- __wait_for(if (new_requests_since_last_retire(dev_priv)) return,
3711
- intel_engines_are_idle(dev_priv),
3712
- I915_IDLE_ENGINES_TIMEOUT * 1000,
3713
- 10, 500);
3714
-
3715
- rearm_hangcheck =
3716
- cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3717
-
3718
- if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
3719
- /* Currently busy, come back later */
3720
- mod_delayed_work(dev_priv->wq,
3721
- &dev_priv->gt.idle_work,
3722
- msecs_to_jiffies(50));
3723
- goto out_rearm;
3724
- }
3725
-
3726
- /*
3727
- * New request retired after this work handler started, extend active
3728
- * period until next instance of the work.
3729
- */
3730
- if (new_requests_since_last_retire(dev_priv))
3731
- goto out_unlock;
3732
-
3733
- epoch = __i915_gem_park(dev_priv);
3734
-
3735
- assert_kernel_context_is_current(dev_priv);
3736
-
3737
- rearm_hangcheck = false;
3738
-out_unlock:
3739
- mutex_unlock(&dev_priv->drm.struct_mutex);
3740
-
3741
-out_rearm:
3742
- if (rearm_hangcheck) {
3743
- GEM_BUG_ON(!dev_priv->gt.awake);
3744
- i915_queue_hangcheck(dev_priv);
3745
- }
3746
-
3747
- /*
3748
- * When we are idle, it is an opportune time to reap our caches.
3749
- * However, we have many objects that utilise RCU and the ordered
3750
- * i915->wq that this work is executing on. To try and flush any
3751
- * pending frees now we are idle, we first wait for an RCU grace
3752
- * period, and then queue a task (that will run last on the wq) to
3753
- * shrink and re-optimize the caches.
3754
- */
3755
- if (same_epoch(dev_priv, epoch)) {
3756
- struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
3757
- if (s) {
3758
- s->i915 = dev_priv;
3759
- s->epoch = epoch;
3760
- call_rcu(&s->rcu, __sleep_rcu);
3761
- }
3762
- }
3763
-}
3764
-
3765
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3766
-{
3767
- struct drm_i915_private *i915 = to_i915(gem->dev);
3768
- struct drm_i915_gem_object *obj = to_intel_bo(gem);
3769
- struct drm_i915_file_private *fpriv = file->driver_priv;
3770
- struct i915_lut_handle *lut, *ln;
3771
-
3772
- mutex_lock(&i915->drm.struct_mutex);
3773
-
3774
- list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
3775
- struct i915_gem_context *ctx = lut->ctx;
3776
- struct i915_vma *vma;
3777
-
3778
- GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
3779
- if (ctx->file_priv != fpriv)
3780
- continue;
3781
-
3782
- vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
3783
- GEM_BUG_ON(vma->obj != obj);
3784
-
3785
- /* We allow the process to have multiple handles to the same
3786
- * vma, in the same fd namespace, by virtue of flink/open.
3787
- */
3788
- GEM_BUG_ON(!vma->open_count);
3789
- if (!--vma->open_count && !i915_vma_is_ggtt(vma))
3790
- i915_vma_close(vma);
3791
-
3792
- list_del(&lut->obj_link);
3793
- list_del(&lut->ctx_link);
3794
-
3795
- kmem_cache_free(i915->luts, lut);
3796
- __i915_gem_object_release_unless_active(obj);
3797
- }
3798
-
3799
- mutex_unlock(&i915->drm.struct_mutex);
3800
-}
3801
-
3802
-static unsigned long to_wait_timeout(s64 timeout_ns)
3803
-{
3804
- if (timeout_ns < 0)
3805
- return MAX_SCHEDULE_TIMEOUT;
3806
-
3807
- if (timeout_ns == 0)
3808
- return 0;
3809
-
3810
- return nsecs_to_jiffies_timeout(timeout_ns);
3811
-}
3812
-
3813
-/**
3814
- * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3815
- * @dev: drm device pointer
3816
- * @data: ioctl data blob
3817
- * @file: drm file pointer
3818
- *
3819
- * Returns 0 if successful, else an error is returned with the remaining time in
3820
- * the timeout parameter.
3821
- * -ETIME: object is still busy after timeout
3822
- * -ERESTARTSYS: signal interrupted the wait
3823
- * -ENONENT: object doesn't exist
3824
- * Also possible, but rare:
3825
- * -EAGAIN: incomplete, restart syscall
3826
- * -ENOMEM: damn
3827
- * -ENODEV: Internal IRQ fail
3828
- * -E?: The add request failed
3829
- *
3830
- * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3831
- * non-zero timeout parameter the wait ioctl will wait for the given number of
3832
- * nanoseconds on an object becoming unbusy. Since the wait itself does so
3833
- * without holding struct_mutex the object may become re-busied before this
3834
- * function completes. A similar but shorter * race condition exists in the busy
3835
- * ioctl
3836
- */
3837
-int
3838
-i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3839
-{
3840
- struct drm_i915_gem_wait *args = data;
3841
- struct drm_i915_gem_object *obj;
3842
- ktime_t start;
3843
- long ret;
3844
-
3845
- if (args->flags != 0)
3846
- return -EINVAL;
3847
-
3848
- obj = i915_gem_object_lookup(file, args->bo_handle);
3849
- if (!obj)
3850
- return -ENOENT;
3851
-
3852
- start = ktime_get();
3853
-
3854
- ret = i915_gem_object_wait(obj,
3855
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
3856
- to_wait_timeout(args->timeout_ns),
3857
- to_rps_client(file));
3858
-
3859
- if (args->timeout_ns > 0) {
3860
- args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3861
- if (args->timeout_ns < 0)
3862
- args->timeout_ns = 0;
3863
-
3864
- /*
3865
- * Apparently ktime isn't accurate enough and occasionally has a
3866
- * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3867
- * things up to make the test happy. We allow up to 1 jiffy.
3868
- *
3869
- * This is a regression from the timespec->ktime conversion.
3870
- */
3871
- if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3872
- args->timeout_ns = 0;
3873
-
3874
- /* Asked to wait beyond the jiffie/scheduler precision? */
3875
- if (ret == -ETIME && args->timeout_ns)
3876
- ret = -EAGAIN;
3877
- }
3878
-
3879
- i915_gem_object_put(obj);
3880
- return ret;
3881
-}
3882
-
3883
-static long wait_for_timeline(struct i915_timeline *tl,
3884
- unsigned int flags, long timeout)
3885
-{
3886
- struct i915_request *rq;
3887
-
3888
- rq = i915_gem_active_get_unlocked(&tl->last_request);
3889
- if (!rq)
3890
- return timeout;
3891
-
3892
- /*
3893
- * "Race-to-idle".
3894
- *
3895
- * Switching to the kernel context is often used a synchronous
3896
- * step prior to idling, e.g. in suspend for flushing all
3897
- * current operations to memory before sleeping. These we
3898
- * want to complete as quickly as possible to avoid prolonged
3899
- * stalls, so allow the gpu to boost to maximum clocks.
3900
- */
3901
- if (flags & I915_WAIT_FOR_IDLE_BOOST)
3902
- gen6_rps_boost(rq, NULL);
3903
-
3904
- timeout = i915_request_wait(rq, flags, timeout);
3905
- i915_request_put(rq);
3906
-
3907
- return timeout;
3908
-}
3909
-
3910
-static int wait_for_engines(struct drm_i915_private *i915)
3911
-{
3912
- if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
3913
- dev_err(i915->drm.dev,
3914
- "Failed to idle engines, declaring wedged!\n");
3915
- GEM_TRACE_DUMP();
3916
- i915_gem_set_wedged(i915);
3917
- return -EIO;
3918
- }
3919
-
3920
- return 0;
3921
-}
3922
-
3923
-int i915_gem_wait_for_idle(struct drm_i915_private *i915,
3924
- unsigned int flags, long timeout)
3925
-{
3926
- GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
3927
- flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
3928
- timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
3929
-
3930
- /* If the device is asleep, we have no requests outstanding */
3931
- if (!READ_ONCE(i915->gt.awake))
3932
- return 0;
3933
-
3934
- if (flags & I915_WAIT_LOCKED) {
3935
- struct i915_timeline *tl;
3936
- int err;
3937
-
3938
- lockdep_assert_held(&i915->drm.struct_mutex);
3939
-
3940
- list_for_each_entry(tl, &i915->gt.timelines, link) {
3941
- timeout = wait_for_timeline(tl, flags, timeout);
3942
- if (timeout < 0)
3943
- return timeout;
3944
- }
3945
-
3946
- err = wait_for_engines(i915);
3947
- if (err)
3948
- return err;
3949
-
3950
- i915_retire_requests(i915);
3951
- GEM_BUG_ON(i915->gt.active_requests);
3952
- } else {
3953
- struct intel_engine_cs *engine;
3954
- enum intel_engine_id id;
3955
-
3956
- for_each_engine(engine, i915, id) {
3957
- struct i915_timeline *tl = &engine->timeline;
3958
-
3959
- timeout = wait_for_timeline(tl, flags, timeout);
3960
- if (timeout < 0)
3961
- return timeout;
3962
- }
3963
- }
3964
-
3965
- return 0;
3966
-}
3967
-
3968
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3969
-{
3970
- /*
3971
- * We manually flush the CPU domain so that we can override and
3972
- * force the flush for the display, and perform it asyncrhonously.
3973
- */
3974
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3975
- if (obj->cache_dirty)
3976
- i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3977
- obj->write_domain = 0;
3978
-}
3979
-
3980
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3981
-{
3982
- if (!READ_ONCE(obj->pin_global))
3983
- return;
3984
-
3985
- mutex_lock(&obj->base.dev->struct_mutex);
3986
- __i915_gem_object_flush_for_display(obj);
3987
- mutex_unlock(&obj->base.dev->struct_mutex);
3988
-}
3989
-
3990
-/**
3991
- * Moves a single object to the WC read, and possibly write domain.
3992
- * @obj: object to act on
3993
- * @write: ask for write access or read only
3994
- *
3995
- * This function returns when the move is complete, including waiting on
3996
- * flushes to occur.
3997
- */
3998
-int
3999
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
4000
-{
4001
- int ret;
4002
-
4003
- lockdep_assert_held(&obj->base.dev->struct_mutex);
4004
-
4005
- ret = i915_gem_object_wait(obj,
4006
- I915_WAIT_INTERRUPTIBLE |
4007
- I915_WAIT_LOCKED |
4008
- (write ? I915_WAIT_ALL : 0),
4009
- MAX_SCHEDULE_TIMEOUT,
4010
- NULL);
4011
- if (ret)
4012
- return ret;
4013
-
4014
- if (obj->write_domain == I915_GEM_DOMAIN_WC)
4015
- return 0;
4016
-
4017
- /* Flush and acquire obj->pages so that we are coherent through
4018
- * direct access in memory with previous cached writes through
4019
- * shmemfs and that our cache domain tracking remains valid.
4020
- * For example, if the obj->filp was moved to swap without us
4021
- * being notified and releasing the pages, we would mistakenly
4022
- * continue to assume that the obj remained out of the CPU cached
4023
- * domain.
4024
- */
4025
- ret = i915_gem_object_pin_pages(obj);
4026
- if (ret)
4027
- return ret;
4028
-
4029
- flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
4030
-
4031
- /* Serialise direct access to this object with the barriers for
4032
- * coherent writes from the GPU, by effectively invalidating the
4033
- * WC domain upon first access.
4034
- */
4035
- if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
4036
- mb();
4037
-
4038
- /* It should now be out of any other write domains, and we can update
4039
- * the domain values for our changes.
4040
- */
4041
- GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
4042
- obj->read_domains |= I915_GEM_DOMAIN_WC;
4043
- if (write) {
4044
- obj->read_domains = I915_GEM_DOMAIN_WC;
4045
- obj->write_domain = I915_GEM_DOMAIN_WC;
4046
- obj->mm.dirty = true;
4047
- }
4048
-
4049
- i915_gem_object_unpin_pages(obj);
4050
- return 0;
4051
-}
4052
-
4053
-/**
4054
- * Moves a single object to the GTT read, and possibly write domain.
4055
- * @obj: object to act on
4056
- * @write: ask for write access or read only
4057
- *
4058
- * This function returns when the move is complete, including waiting on
4059
- * flushes to occur.
4060
- */
4061
-int
4062
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
4063
-{
4064
- int ret;
4065
-
4066
- lockdep_assert_held(&obj->base.dev->struct_mutex);
4067
-
4068
- ret = i915_gem_object_wait(obj,
4069
- I915_WAIT_INTERRUPTIBLE |
4070
- I915_WAIT_LOCKED |
4071
- (write ? I915_WAIT_ALL : 0),
4072
- MAX_SCHEDULE_TIMEOUT,
4073
- NULL);
4074
- if (ret)
4075
- return ret;
4076
-
4077
- if (obj->write_domain == I915_GEM_DOMAIN_GTT)
4078
- return 0;
4079
-
4080
- /* Flush and acquire obj->pages so that we are coherent through
4081
- * direct access in memory with previous cached writes through
4082
- * shmemfs and that our cache domain tracking remains valid.
4083
- * For example, if the obj->filp was moved to swap without us
4084
- * being notified and releasing the pages, we would mistakenly
4085
- * continue to assume that the obj remained out of the CPU cached
4086
- * domain.
4087
- */
4088
- ret = i915_gem_object_pin_pages(obj);
4089
- if (ret)
4090
- return ret;
4091
-
4092
- flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
4093
-
4094
- /* Serialise direct access to this object with the barriers for
4095
- * coherent writes from the GPU, by effectively invalidating the
4096
- * GTT domain upon first access.
4097
- */
4098
- if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
4099
- mb();
4100
-
4101
- /* It should now be out of any other write domains, and we can update
4102
- * the domain values for our changes.
4103
- */
4104
- GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
4105
- obj->read_domains |= I915_GEM_DOMAIN_GTT;
4106
- if (write) {
4107
- obj->read_domains = I915_GEM_DOMAIN_GTT;
4108
- obj->write_domain = I915_GEM_DOMAIN_GTT;
4109
- obj->mm.dirty = true;
4110
- }
4111
-
4112
- i915_gem_object_unpin_pages(obj);
4113
- return 0;
4114
-}
4115
-
4116
-/**
4117
- * Changes the cache-level of an object across all VMA.
4118
- * @obj: object to act on
4119
- * @cache_level: new cache level to set for the object
4120
- *
4121
- * After this function returns, the object will be in the new cache-level
4122
- * across all GTT and the contents of the backing storage will be coherent,
4123
- * with respect to the new cache-level. In order to keep the backing storage
4124
- * coherent for all users, we only allow a single cache level to be set
4125
- * globally on the object and prevent it from being changed whilst the
4126
- * hardware is reading from the object. That is if the object is currently
4127
- * on the scanout it will be set to uncached (or equivalent display
4128
- * cache coherency) and all non-MOCS GPU access will also be uncached so
4129
- * that all direct access to the scanout remains coherent.
4130
- */
4131
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
4132
- enum i915_cache_level cache_level)
4133
-{
951
+ struct i915_ggtt *ggtt = &i915->ggtt;
4134952 struct i915_vma *vma;
4135953 int ret;
4136
-
4137
- lockdep_assert_held(&obj->base.dev->struct_mutex);
4138
-
4139
- if (obj->cache_level == cache_level)
4140
- return 0;
4141
-
4142
- /* Inspect the list of currently bound VMA and unbind any that would
4143
- * be invalid given the new cache-level. This is principally to
4144
- * catch the issue of the CS prefetch crossing page boundaries and
4145
- * reading an invalid PTE on older architectures.
4146
- */
4147
-restart:
4148
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
4149
- if (!drm_mm_node_allocated(&vma->node))
4150
- continue;
4151
-
4152
- if (i915_vma_is_pinned(vma)) {
4153
- DRM_DEBUG("can not change the cache level of pinned objects\n");
4154
- return -EBUSY;
4155
- }
4156
-
4157
- if (!i915_vma_is_closed(vma) &&
4158
- i915_gem_valid_gtt_space(vma, cache_level))
4159
- continue;
4160
-
4161
- ret = i915_vma_unbind(vma);
4162
- if (ret)
4163
- return ret;
4164
-
4165
- /* As unbinding may affect other elements in the
4166
- * obj->vma_list (due to side-effects from retiring
4167
- * an active vma), play safe and restart the iterator.
4168
- */
4169
- goto restart;
4170
- }
4171
-
4172
- /* We can reuse the existing drm_mm nodes but need to change the
4173
- * cache-level on the PTE. We could simply unbind them all and
4174
- * rebind with the correct cache-level on next use. However since
4175
- * we already have a valid slot, dma mapping, pages etc, we may as
4176
- * rewrite the PTE in the belief that doing so tramples upon less
4177
- * state and so involves less work.
4178
- */
4179
- if (obj->bind_count) {
4180
- /* Before we change the PTE, the GPU must not be accessing it.
4181
- * If we wait upon the object, we know that all the bound
4182
- * VMA are no longer active.
4183
- */
4184
- ret = i915_gem_object_wait(obj,
4185
- I915_WAIT_INTERRUPTIBLE |
4186
- I915_WAIT_LOCKED |
4187
- I915_WAIT_ALL,
4188
- MAX_SCHEDULE_TIMEOUT,
4189
- NULL);
4190
- if (ret)
4191
- return ret;
4192
-
4193
- if (!HAS_LLC(to_i915(obj->base.dev)) &&
4194
- cache_level != I915_CACHE_NONE) {
4195
- /* Access to snoopable pages through the GTT is
4196
- * incoherent and on some machines causes a hard
4197
- * lockup. Relinquish the CPU mmaping to force
4198
- * userspace to refault in the pages and we can
4199
- * then double check if the GTT mapping is still
4200
- * valid for that pointer access.
4201
- */
4202
- i915_gem_release_mmap(obj);
4203
-
4204
- /* As we no longer need a fence for GTT access,
4205
- * we can relinquish it now (and so prevent having
4206
- * to steal a fence from someone else on the next
4207
- * fence request). Note GPU activity would have
4208
- * dropped the fence as all snoopable access is
4209
- * supposed to be linear.
4210
- */
4211
- for_each_ggtt_vma(vma, obj) {
4212
- ret = i915_vma_put_fence(vma);
4213
- if (ret)
4214
- return ret;
4215
- }
4216
- } else {
4217
- /* We either have incoherent backing store and
4218
- * so no GTT access or the architecture is fully
4219
- * coherent. In such cases, existing GTT mmaps
4220
- * ignore the cache bit in the PTE and we can
4221
- * rewrite it without confusing the GPU or having
4222
- * to force userspace to fault back in its mmaps.
4223
- */
4224
- }
4225
-
4226
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
4227
- if (!drm_mm_node_allocated(&vma->node))
4228
- continue;
4229
-
4230
- ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
4231
- if (ret)
4232
- return ret;
4233
- }
4234
- }
4235
-
4236
- list_for_each_entry(vma, &obj->vma_list, obj_link)
4237
- vma->node.color = cache_level;
4238
- i915_gem_object_set_cache_coherency(obj, cache_level);
4239
- obj->cache_dirty = true; /* Always invalidate stale cachelines */
4240
-
4241
- return 0;
4242
-}
4243
-
4244
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
4245
- struct drm_file *file)
4246
-{
4247
- struct drm_i915_gem_caching *args = data;
4248
- struct drm_i915_gem_object *obj;
4249
- int err = 0;
4250
-
4251
- rcu_read_lock();
4252
- obj = i915_gem_object_lookup_rcu(file, args->handle);
4253
- if (!obj) {
4254
- err = -ENOENT;
4255
- goto out;
4256
- }
4257
-
4258
- switch (obj->cache_level) {
4259
- case I915_CACHE_LLC:
4260
- case I915_CACHE_L3_LLC:
4261
- args->caching = I915_CACHING_CACHED;
4262
- break;
4263
-
4264
- case I915_CACHE_WT:
4265
- args->caching = I915_CACHING_DISPLAY;
4266
- break;
4267
-
4268
- default:
4269
- args->caching = I915_CACHING_NONE;
4270
- break;
4271
- }
4272
-out:
4273
- rcu_read_unlock();
4274
- return err;
4275
-}
4276
-
4277
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
4278
- struct drm_file *file)
4279
-{
4280
- struct drm_i915_private *i915 = to_i915(dev);
4281
- struct drm_i915_gem_caching *args = data;
4282
- struct drm_i915_gem_object *obj;
4283
- enum i915_cache_level level;
4284
- int ret = 0;
4285
-
4286
- switch (args->caching) {
4287
- case I915_CACHING_NONE:
4288
- level = I915_CACHE_NONE;
4289
- break;
4290
- case I915_CACHING_CACHED:
4291
- /*
4292
- * Due to a HW issue on BXT A stepping, GPU stores via a
4293
- * snooped mapping may leave stale data in a corresponding CPU
4294
- * cacheline, whereas normally such cachelines would get
4295
- * invalidated.
4296
- */
4297
- if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
4298
- return -ENODEV;
4299
-
4300
- level = I915_CACHE_LLC;
4301
- break;
4302
- case I915_CACHING_DISPLAY:
4303
- level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
4304
- break;
4305
- default:
4306
- return -EINVAL;
4307
- }
4308
-
4309
- obj = i915_gem_object_lookup(file, args->handle);
4310
- if (!obj)
4311
- return -ENOENT;
4312
-
4313
- /*
4314
- * The caching mode of proxy object is handled by its generator, and
4315
- * not allowed to be changed by userspace.
4316
- */
4317
- if (i915_gem_object_is_proxy(obj)) {
4318
- ret = -ENXIO;
4319
- goto out;
4320
- }
4321
-
4322
- if (obj->cache_level == level)
4323
- goto out;
4324
-
4325
- ret = i915_gem_object_wait(obj,
4326
- I915_WAIT_INTERRUPTIBLE,
4327
- MAX_SCHEDULE_TIMEOUT,
4328
- to_rps_client(file));
4329
- if (ret)
4330
- goto out;
4331
-
4332
- ret = i915_mutex_lock_interruptible(dev);
4333
- if (ret)
4334
- goto out;
4335
-
4336
- ret = i915_gem_object_set_cache_level(obj, level);
4337
- mutex_unlock(&dev->struct_mutex);
4338
-
4339
-out:
4340
- i915_gem_object_put(obj);
4341
- return ret;
4342
-}
4343
-
4344
-/*
4345
- * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
4346
- * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
4347
- * (for pageflips). We only flush the caches while preparing the buffer for
4348
- * display, the callers are responsible for frontbuffer flush.
4349
- */
4350
-struct i915_vma *
4351
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
4352
- u32 alignment,
4353
- const struct i915_ggtt_view *view,
4354
- unsigned int flags)
4355
-{
4356
- struct i915_vma *vma;
4357
- int ret;
4358
-
4359
- lockdep_assert_held(&obj->base.dev->struct_mutex);
4360
-
4361
- /* Mark the global pin early so that we account for the
4362
- * display coherency whilst setting up the cache domains.
4363
- */
4364
- obj->pin_global++;
4365
-
4366
- /* The display engine is not coherent with the LLC cache on gen6. As
4367
- * a result, we make sure that the pinning that is about to occur is
4368
- * done with uncached PTEs. This is lowest common denominator for all
4369
- * chipsets.
4370
- *
4371
- * However for gen6+, we could do better by using the GFDT bit instead
4372
- * of uncaching, which would allow us to flush all the LLC-cached data
4373
- * with that bit in the PTE to main memory with just one PIPE_CONTROL.
4374
- */
4375
- ret = i915_gem_object_set_cache_level(obj,
4376
- HAS_WT(to_i915(obj->base.dev)) ?
4377
- I915_CACHE_WT : I915_CACHE_NONE);
4378
- if (ret) {
4379
- vma = ERR_PTR(ret);
4380
- goto err_unpin_global;
4381
- }
4382
-
4383
- /* As the user may map the buffer once pinned in the display plane
4384
- * (e.g. libkms for the bootup splash), we have to ensure that we
4385
- * always use map_and_fenceable for all scanout buffers. However,
4386
- * it may simply be too big to fit into mappable, in which case
4387
- * put it anyway and hope that userspace can cope (but always first
4388
- * try to preserve the existing ABI).
4389
- */
4390
- vma = ERR_PTR(-ENOSPC);
4391
- if ((flags & PIN_MAPPABLE) == 0 &&
4392
- (!view || view->type == I915_GGTT_VIEW_NORMAL))
4393
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
4394
- flags |
4395
- PIN_MAPPABLE |
4396
- PIN_NONBLOCK);
4397
- if (IS_ERR(vma))
4398
- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
4399
- if (IS_ERR(vma))
4400
- goto err_unpin_global;
4401
-
4402
- vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
4403
-
4404
- __i915_gem_object_flush_for_display(obj);
4405
-
4406
- /* It should now be out of any other write domains, and we can update
4407
- * the domain values for our changes.
4408
- */
4409
- obj->read_domains |= I915_GEM_DOMAIN_GTT;
4410
-
4411
- return vma;
4412
-
4413
-err_unpin_global:
4414
- obj->pin_global--;
4415
- return vma;
4416
-}
4417
-
4418
-void
4419
-i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
4420
-{
4421
- lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
4422
-
4423
- if (WARN_ON(vma->obj->pin_global == 0))
4424
- return;
4425
-
4426
- if (--vma->obj->pin_global == 0)
4427
- vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
4428
-
4429
- /* Bump the LRU to try and avoid premature eviction whilst flipping */
4430
- i915_gem_object_bump_inactive_ggtt(vma->obj);
4431
-
4432
- i915_vma_unpin(vma);
4433
-}
4434
-
4435
-/**
4436
- * Moves a single object to the CPU read, and possibly write domain.
4437
- * @obj: object to act on
4438
- * @write: requesting write or read-only access
4439
- *
4440
- * This function returns when the move is complete, including waiting on
4441
- * flushes to occur.
4442
- */
4443
-int
4444
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
4445
-{
4446
- int ret;
4447
-
4448
- lockdep_assert_held(&obj->base.dev->struct_mutex);
4449
-
4450
- ret = i915_gem_object_wait(obj,
4451
- I915_WAIT_INTERRUPTIBLE |
4452
- I915_WAIT_LOCKED |
4453
- (write ? I915_WAIT_ALL : 0),
4454
- MAX_SCHEDULE_TIMEOUT,
4455
- NULL);
4456
- if (ret)
4457
- return ret;
4458
-
4459
- flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
4460
-
4461
- /* Flush the CPU cache if it's still invalid. */
4462
- if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4463
- i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
4464
- obj->read_domains |= I915_GEM_DOMAIN_CPU;
4465
- }
4466
-
4467
- /* It should now be out of any other write domains, and we can update
4468
- * the domain values for our changes.
4469
- */
4470
- GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
4471
-
4472
- /* If we're writing through the CPU, then the GPU read domains will
4473
- * need to be invalidated at next use.
4474
- */
4475
- if (write)
4476
- __start_cpu_write(obj);
4477
-
4478
- return 0;
4479
-}
4480
-
4481
-/* Throttle our rendering by waiting until the ring has completed our requests
4482
- * emitted over 20 msec ago.
4483
- *
4484
- * Note that if we were to use the current jiffies each time around the loop,
4485
- * we wouldn't escape the function with any frames outstanding if the time to
4486
- * render a frame was over 20ms.
4487
- *
4488
- * This should get us reasonable parallelism between CPU and GPU but also
4489
- * relatively low latency when blocking on a particular request to finish.
4490
- */
4491
-static int
4492
-i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
4493
-{
4494
- struct drm_i915_private *dev_priv = to_i915(dev);
4495
- struct drm_i915_file_private *file_priv = file->driver_priv;
4496
- unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
4497
- struct i915_request *request, *target = NULL;
4498
- long ret;
4499
-
4500
- /* ABI: return -EIO if already wedged */
4501
- if (i915_terminally_wedged(&dev_priv->gpu_error))
4502
- return -EIO;
4503
-
4504
- spin_lock(&file_priv->mm.lock);
4505
- list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
4506
- if (time_after_eq(request->emitted_jiffies, recent_enough))
4507
- break;
4508
-
4509
- if (target) {
4510
- list_del(&target->client_link);
4511
- target->file_priv = NULL;
4512
- }
4513
-
4514
- target = request;
4515
- }
4516
- if (target)
4517
- i915_request_get(target);
4518
- spin_unlock(&file_priv->mm.lock);
4519
-
4520
- if (target == NULL)
4521
- return 0;
4522
-
4523
- ret = i915_request_wait(target,
4524
- I915_WAIT_INTERRUPTIBLE,
4525
- MAX_SCHEDULE_TIMEOUT);
4526
- i915_request_put(target);
4527
-
4528
- return ret < 0 ? ret : 0;
4529
-}
4530
-
4531
-struct i915_vma *
4532
-i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
4533
- const struct i915_ggtt_view *view,
4534
- u64 size,
4535
- u64 alignment,
4536
- u64 flags)
4537
-{
4538
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4539
- struct i915_address_space *vm = &dev_priv->ggtt.vm;
4540
-
4541
- return i915_gem_object_pin(obj, vm, view, size, alignment,
4542
- flags | PIN_GLOBAL);
4543
-}
4544
-
4545
-struct i915_vma *
4546
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
4547
- struct i915_address_space *vm,
4548
- const struct i915_ggtt_view *view,
4549
- u64 size,
4550
- u64 alignment,
4551
- u64 flags)
4552
-{
4553
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
4554
- struct i915_vma *vma;
4555
- int ret;
4556
-
4557
- lockdep_assert_held(&obj->base.dev->struct_mutex);
4558954
4559955 if (flags & PIN_MAPPABLE &&
4560956 (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
4561
- /* If the required space is larger than the available
957
+ /*
958
+ * If the required space is larger than the available
4562959 * aperture, we will not able to find a slot for the
4563960 * object and unbinding the object now will be in
4564961 * vain. Worse, doing so may cause us to ping-pong
4565962 * the object in and out of the Global GTT and
4566963 * waste a lot of cycles under the mutex.
4567964 */
4568
- if (obj->base.size > dev_priv->ggtt.mappable_end)
965
+ if (obj->base.size > ggtt->mappable_end)
4569966 return ERR_PTR(-E2BIG);
4570967
4571
- /* If NONBLOCK is set the caller is optimistically
968
+ /*
969
+ * If NONBLOCK is set the caller is optimistically
4572970 * trying to cache the full object within the mappable
4573971 * aperture, and *must* have a fallback in place for
4574972 * situations where we cannot bind the object. We
....@@ -4584,12 +982,13 @@
4584982 * we could try to minimise harm to others.
4585983 */
4586984 if (flags & PIN_NONBLOCK &&
4587
- obj->base.size > dev_priv->ggtt.mappable_end / 2)
985
+ obj->base.size > ggtt->mappable_end / 2)
4588986 return ERR_PTR(-ENOSPC);
4589987 }
4590988
4591
- vma = i915_vma_instance(obj, vm, view);
4592
- if (unlikely(IS_ERR(vma)))
989
+new_vma:
990
+ vma = i915_vma_instance(obj, &ggtt->vm, view);
991
+ if (IS_ERR(vma))
4593992 return vma;
4594993
4595994 if (i915_vma_misplaced(vma, size, alignment, flags)) {
....@@ -4598,166 +997,44 @@
4598997 return ERR_PTR(-ENOSPC);
4599998
4600999 if (flags & PIN_MAPPABLE &&
4601
- vma->fence_size > dev_priv->ggtt.mappable_end / 2)
1000
+ vma->fence_size > ggtt->mappable_end / 2)
46021001 return ERR_PTR(-ENOSPC);
46031002 }
46041003
4605
- WARN(i915_vma_is_pinned(vma),
4606
- "bo is already pinned in ggtt with incorrect alignment:"
4607
- " offset=%08x, req.alignment=%llx,"
4608
- " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
4609
- i915_ggtt_offset(vma), alignment,
4610
- !!(flags & PIN_MAPPABLE),
4611
- i915_vma_is_map_and_fenceable(vma));
1004
+ if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) {
1005
+ discard_ggtt_vma(vma);
1006
+ goto new_vma;
1007
+ }
1008
+
46121009 ret = i915_vma_unbind(vma);
46131010 if (ret)
46141011 return ERR_PTR(ret);
46151012 }
46161013
4617
- ret = i915_vma_pin(vma, size, alignment, flags);
1014
+ ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
46181015 if (ret)
46191016 return ERR_PTR(ret);
46201017
4621
- return vma;
4622
-}
4623
-
4624
-static __always_inline unsigned int __busy_read_flag(unsigned int id)
4625
-{
4626
- /* Note that we could alias engines in the execbuf API, but
4627
- * that would be very unwise as it prevents userspace from
4628
- * fine control over engine selection. Ahem.
4629
- *
4630
- * This should be something like EXEC_MAX_ENGINE instead of
4631
- * I915_NUM_ENGINES.
4632
- */
4633
- BUILD_BUG_ON(I915_NUM_ENGINES > 16);
4634
- return 0x10000 << id;
4635
-}
4636
-
4637
-static __always_inline unsigned int __busy_write_id(unsigned int id)
4638
-{
4639
- /* The uABI guarantees an active writer is also amongst the read
4640
- * engines. This would be true if we accessed the activity tracking
4641
- * under the lock, but as we perform the lookup of the object and
4642
- * its activity locklessly we can not guarantee that the last_write
4643
- * being active implies that we have set the same engine flag from
4644
- * last_read - hence we always set both read and write busy for
4645
- * last_write.
4646
- */
4647
- return id | __busy_read_flag(id);
4648
-}
4649
-
4650
-static __always_inline unsigned int
4651
-__busy_set_if_active(const struct dma_fence *fence,
4652
- unsigned int (*flag)(unsigned int id))
4653
-{
4654
- struct i915_request *rq;
4655
-
4656
- /* We have to check the current hw status of the fence as the uABI
4657
- * guarantees forward progress. We could rely on the idle worker
4658
- * to eventually flush us, but to minimise latency just ask the
4659
- * hardware.
4660
- *
4661
- * Note we only report on the status of native fences.
4662
- */
4663
- if (!dma_fence_is_i915(fence))
4664
- return 0;
4665
-
4666
- /* opencode to_request() in order to avoid const warnings */
4667
- rq = container_of(fence, struct i915_request, fence);
4668
- if (i915_request_completed(rq))
4669
- return 0;
4670
-
4671
- return flag(rq->engine->uabi_id);
4672
-}
4673
-
4674
-static __always_inline unsigned int
4675
-busy_check_reader(const struct dma_fence *fence)
4676
-{
4677
- return __busy_set_if_active(fence, __busy_read_flag);
4678
-}
4679
-
4680
-static __always_inline unsigned int
4681
-busy_check_writer(const struct dma_fence *fence)
4682
-{
4683
- if (!fence)
4684
- return 0;
4685
-
4686
- return __busy_set_if_active(fence, __busy_write_id);
4687
-}
4688
-
4689
-int
4690
-i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4691
- struct drm_file *file)
4692
-{
4693
- struct drm_i915_gem_busy *args = data;
4694
- struct drm_i915_gem_object *obj;
4695
- struct reservation_object_list *list;
4696
- unsigned int seq;
4697
- int err;
4698
-
4699
- err = -ENOENT;
4700
- rcu_read_lock();
4701
- obj = i915_gem_object_lookup_rcu(file, args->handle);
4702
- if (!obj)
4703
- goto out;
4704
-
4705
- /* A discrepancy here is that we do not report the status of
4706
- * non-i915 fences, i.e. even though we may report the object as idle,
4707
- * a call to set-domain may still stall waiting for foreign rendering.
4708
- * This also means that wait-ioctl may report an object as busy,
4709
- * where busy-ioctl considers it idle.
4710
- *
4711
- * We trade the ability to warn of foreign fences to report on which
4712
- * i915 engines are active for the object.
4713
- *
4714
- * Alternatively, we can trade that extra information on read/write
4715
- * activity with
4716
- * args->busy =
4717
- * !reservation_object_test_signaled_rcu(obj->resv, true);
4718
- * to report the overall busyness. This is what the wait-ioctl does.
4719
- *
4720
- */
4721
-retry:
4722
- seq = read_seqbegin(&obj->resv->seq);
4723
-
4724
- /* Translate the exclusive fence to the READ *and* WRITE engine */
4725
- args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
4726
-
4727
- /* Translate shared fences to READ set of engines */
4728
- list = rcu_dereference(obj->resv->fence);
4729
- if (list) {
4730
- unsigned int shared_count = list->shared_count, i;
4731
-
4732
- for (i = 0; i < shared_count; ++i) {
4733
- struct dma_fence *fence =
4734
- rcu_dereference(list->shared[i]);
4735
-
4736
- args->busy |= busy_check_reader(fence);
4737
- }
1018
+ if (vma->fence && !i915_gem_object_is_tiled(obj)) {
1019
+ mutex_lock(&ggtt->vm.mutex);
1020
+ i915_vma_revoke_fence(vma);
1021
+ mutex_unlock(&ggtt->vm.mutex);
47381022 }
47391023
4740
- if (args->busy && read_seqretry(&obj->resv->seq, seq))
4741
- goto retry;
1024
+ ret = i915_vma_wait_for_bind(vma);
1025
+ if (ret) {
1026
+ i915_vma_unpin(vma);
1027
+ return ERR_PTR(ret);
1028
+ }
47421029
4743
- err = 0;
4744
-out:
4745
- rcu_read_unlock();
4746
- return err;
4747
-}
4748
-
4749
-int
4750
-i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4751
- struct drm_file *file_priv)
4752
-{
4753
- return i915_gem_ring_throttle(dev, file_priv);
1030
+ return vma;
47541031 }
47551032
47561033 int
47571034 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
47581035 struct drm_file *file_priv)
47591036 {
4760
- struct drm_i915_private *dev_priv = to_i915(dev);
1037
+ struct drm_i915_private *i915 = to_i915(dev);
47611038 struct drm_i915_gem_madvise *args = data;
47621039 struct drm_i915_gem_object *obj;
47631040 int err;
....@@ -4780,7 +1057,7 @@
47801057
47811058 if (i915_gem_object_has_pages(obj) &&
47821059 i915_gem_object_is_tiled(obj) &&
4783
- dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
1060
+ i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
47841061 if (obj->mm.madv == I915_MADV_WILLNEED) {
47851062 GEM_BUG_ON(!obj->mm.quirked);
47861063 __i915_gem_object_unpin_pages(obj);
....@@ -4796,6 +1073,24 @@
47961073 if (obj->mm.madv != __I915_MADV_PURGED)
47971074 obj->mm.madv = args->madv;
47981075
1076
+ if (i915_gem_object_has_pages(obj)) {
1077
+ struct list_head *list;
1078
+
1079
+ if (i915_gem_object_is_shrinkable(obj)) {
1080
+ unsigned long flags;
1081
+
1082
+ spin_lock_irqsave(&i915->mm.obj_lock, flags);
1083
+
1084
+ if (obj->mm.madv != I915_MADV_WILLNEED)
1085
+ list = &i915->mm.purge_list;
1086
+ else
1087
+ list = &i915->mm.shrink_list;
1088
+ list_move_tail(&obj->mm.link, list);
1089
+
1090
+ spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
1091
+ }
1092
+ }
1093
+
47991094 /* if the object is no longer attached, discard its backing storage */
48001095 if (obj->mm.madv == I915_MADV_DONTNEED &&
48011096 !i915_gem_object_has_pages(obj))
....@@ -4809,793 +1104,6 @@
48091104 return err;
48101105 }
48111106
4812
-static void
4813
-frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
4814
-{
4815
- struct drm_i915_gem_object *obj =
4816
- container_of(active, typeof(*obj), frontbuffer_write);
4817
-
4818
- intel_fb_obj_flush(obj, ORIGIN_CS);
4819
-}
4820
-
4821
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
4822
- const struct drm_i915_gem_object_ops *ops)
4823
-{
4824
- mutex_init(&obj->mm.lock);
4825
-
4826
- INIT_LIST_HEAD(&obj->vma_list);
4827
- INIT_LIST_HEAD(&obj->lut_list);
4828
- INIT_LIST_HEAD(&obj->batch_pool_link);
4829
-
4830
- obj->ops = ops;
4831
-
4832
- reservation_object_init(&obj->__builtin_resv);
4833
- obj->resv = &obj->__builtin_resv;
4834
-
4835
- obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4836
- init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
4837
-
4838
- obj->mm.madv = I915_MADV_WILLNEED;
4839
- INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4840
- mutex_init(&obj->mm.get_page.lock);
4841
-
4842
- i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4843
-}
4844
-
4845
-static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4846
- .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4847
- I915_GEM_OBJECT_IS_SHRINKABLE,
4848
-
4849
- .get_pages = i915_gem_object_get_pages_gtt,
4850
- .put_pages = i915_gem_object_put_pages_gtt,
4851
-
4852
- .pwrite = i915_gem_object_pwrite_gtt,
4853
-};
4854
-
4855
-static int i915_gem_object_create_shmem(struct drm_device *dev,
4856
- struct drm_gem_object *obj,
4857
- size_t size)
4858
-{
4859
- struct drm_i915_private *i915 = to_i915(dev);
4860
- unsigned long flags = VM_NORESERVE;
4861
- struct file *filp;
4862
-
4863
- drm_gem_private_object_init(dev, obj, size);
4864
-
4865
- if (i915->mm.gemfs)
4866
- filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
4867
- flags);
4868
- else
4869
- filp = shmem_file_setup("i915", size, flags);
4870
-
4871
- if (IS_ERR(filp))
4872
- return PTR_ERR(filp);
4873
-
4874
- obj->filp = filp;
4875
-
4876
- return 0;
4877
-}
4878
-
4879
-struct drm_i915_gem_object *
4880
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4881
-{
4882
- struct drm_i915_gem_object *obj;
4883
- struct address_space *mapping;
4884
- unsigned int cache_level;
4885
- gfp_t mask;
4886
- int ret;
4887
-
4888
- /* There is a prevalence of the assumption that we fit the object's
4889
- * page count inside a 32bit _signed_ variable. Let's document this and
4890
- * catch if we ever need to fix it. In the meantime, if you do spot
4891
- * such a local variable, please consider fixing!
4892
- */
4893
- if (size >> PAGE_SHIFT > INT_MAX)
4894
- return ERR_PTR(-E2BIG);
4895
-
4896
- if (overflows_type(size, obj->base.size))
4897
- return ERR_PTR(-E2BIG);
4898
-
4899
- obj = i915_gem_object_alloc(dev_priv);
4900
- if (obj == NULL)
4901
- return ERR_PTR(-ENOMEM);
4902
-
4903
- ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
4904
- if (ret)
4905
- goto fail;
4906
-
4907
- mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4908
- if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4909
- /* 965gm cannot relocate objects above 4GiB. */
4910
- mask &= ~__GFP_HIGHMEM;
4911
- mask |= __GFP_DMA32;
4912
- }
4913
-
4914
- mapping = obj->base.filp->f_mapping;
4915
- mapping_set_gfp_mask(mapping, mask);
4916
- GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
4917
-
4918
- i915_gem_object_init(obj, &i915_gem_object_ops);
4919
-
4920
- obj->write_domain = I915_GEM_DOMAIN_CPU;
4921
- obj->read_domains = I915_GEM_DOMAIN_CPU;
4922
-
4923
- if (HAS_LLC(dev_priv))
4924
- /* On some devices, we can have the GPU use the LLC (the CPU
4925
- * cache) for about a 10% performance improvement
4926
- * compared to uncached. Graphics requests other than
4927
- * display scanout are coherent with the CPU in
4928
- * accessing this cache. This means in this mode we
4929
- * don't need to clflush on the CPU side, and on the
4930
- * GPU side we only need to flush internal caches to
4931
- * get data visible to the CPU.
4932
- *
4933
- * However, we maintain the display planes as UC, and so
4934
- * need to rebind when first used as such.
4935
- */
4936
- cache_level = I915_CACHE_LLC;
4937
- else
4938
- cache_level = I915_CACHE_NONE;
4939
-
4940
- i915_gem_object_set_cache_coherency(obj, cache_level);
4941
-
4942
- trace_i915_gem_object_create(obj);
4943
-
4944
- return obj;
4945
-
4946
-fail:
4947
- i915_gem_object_free(obj);
4948
- return ERR_PTR(ret);
4949
-}
4950
-
4951
-static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4952
-{
4953
- /* If we are the last user of the backing storage (be it shmemfs
4954
- * pages or stolen etc), we know that the pages are going to be
4955
- * immediately released. In this case, we can then skip copying
4956
- * back the contents from the GPU.
4957
- */
4958
-
4959
- if (obj->mm.madv != I915_MADV_WILLNEED)
4960
- return false;
4961
-
4962
- if (obj->base.filp == NULL)
4963
- return true;
4964
-
4965
- /* At first glance, this looks racy, but then again so would be
4966
- * userspace racing mmap against close. However, the first external
4967
- * reference to the filp can only be obtained through the
4968
- * i915_gem_mmap_ioctl() which safeguards us against the user
4969
- * acquiring such a reference whilst we are in the middle of
4970
- * freeing the object.
4971
- */
4972
- return atomic_long_read(&obj->base.filp->f_count) == 1;
4973
-}
4974
-
4975
-static void __i915_gem_free_objects(struct drm_i915_private *i915,
4976
- struct llist_node *freed)
4977
-{
4978
- struct drm_i915_gem_object *obj, *on;
4979
-
4980
- intel_runtime_pm_get(i915);
4981
- llist_for_each_entry_safe(obj, on, freed, freed) {
4982
- struct i915_vma *vma, *vn;
4983
-
4984
- trace_i915_gem_object_destroy(obj);
4985
-
4986
- mutex_lock(&i915->drm.struct_mutex);
4987
-
4988
- GEM_BUG_ON(i915_gem_object_is_active(obj));
4989
- list_for_each_entry_safe(vma, vn,
4990
- &obj->vma_list, obj_link) {
4991
- GEM_BUG_ON(i915_vma_is_active(vma));
4992
- vma->flags &= ~I915_VMA_PIN_MASK;
4993
- i915_vma_destroy(vma);
4994
- }
4995
- GEM_BUG_ON(!list_empty(&obj->vma_list));
4996
- GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4997
-
4998
- /* This serializes freeing with the shrinker. Since the free
4999
- * is delayed, first by RCU then by the workqueue, we want the
5000
- * shrinker to be able to free pages of unreferenced objects,
5001
- * or else we may oom whilst there are plenty of deferred
5002
- * freed objects.
5003
- */
5004
- if (i915_gem_object_has_pages(obj)) {
5005
- spin_lock(&i915->mm.obj_lock);
5006
- list_del_init(&obj->mm.link);
5007
- spin_unlock(&i915->mm.obj_lock);
5008
- }
5009
-
5010
- mutex_unlock(&i915->drm.struct_mutex);
5011
-
5012
- GEM_BUG_ON(obj->bind_count);
5013
- GEM_BUG_ON(obj->userfault_count);
5014
- GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
5015
- GEM_BUG_ON(!list_empty(&obj->lut_list));
5016
-
5017
- if (obj->ops->release)
5018
- obj->ops->release(obj);
5019
-
5020
- if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
5021
- atomic_set(&obj->mm.pages_pin_count, 0);
5022
- __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
5023
- GEM_BUG_ON(i915_gem_object_has_pages(obj));
5024
-
5025
- if (obj->base.import_attach)
5026
- drm_prime_gem_destroy(&obj->base, NULL);
5027
-
5028
- reservation_object_fini(&obj->__builtin_resv);
5029
- drm_gem_object_release(&obj->base);
5030
- i915_gem_info_remove_obj(i915, obj->base.size);
5031
-
5032
- kfree(obj->bit_17);
5033
- i915_gem_object_free(obj);
5034
-
5035
- GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
5036
- atomic_dec(&i915->mm.free_count);
5037
-
5038
- if (on)
5039
- cond_resched();
5040
- }
5041
- intel_runtime_pm_put(i915);
5042
-}
5043
-
5044
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
5045
-{
5046
- struct llist_node *freed;
5047
-
5048
- /* Free the oldest, most stale object to keep the free_list short */
5049
- freed = NULL;
5050
- if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
5051
- /* Only one consumer of llist_del_first() allowed */
5052
- spin_lock(&i915->mm.free_lock);
5053
- freed = llist_del_first(&i915->mm.free_list);
5054
- spin_unlock(&i915->mm.free_lock);
5055
- }
5056
- if (unlikely(freed)) {
5057
- freed->next = NULL;
5058
- __i915_gem_free_objects(i915, freed);
5059
- }
5060
-}
5061
-
5062
-static void __i915_gem_free_work(struct work_struct *work)
5063
-{
5064
- struct drm_i915_private *i915 =
5065
- container_of(work, struct drm_i915_private, mm.free_work);
5066
- struct llist_node *freed;
5067
-
5068
- /*
5069
- * All file-owned VMA should have been released by this point through
5070
- * i915_gem_close_object(), or earlier by i915_gem_context_close().
5071
- * However, the object may also be bound into the global GTT (e.g.
5072
- * older GPUs without per-process support, or for direct access through
5073
- * the GTT either for the user or for scanout). Those VMA still need to
5074
- * unbound now.
5075
- */
5076
-
5077
- spin_lock(&i915->mm.free_lock);
5078
- while ((freed = llist_del_all(&i915->mm.free_list))) {
5079
- spin_unlock(&i915->mm.free_lock);
5080
-
5081
- __i915_gem_free_objects(i915, freed);
5082
- if (need_resched())
5083
- return;
5084
-
5085
- spin_lock(&i915->mm.free_lock);
5086
- }
5087
- spin_unlock(&i915->mm.free_lock);
5088
-}
5089
-
5090
-static void __i915_gem_free_object_rcu(struct rcu_head *head)
5091
-{
5092
- struct drm_i915_gem_object *obj =
5093
- container_of(head, typeof(*obj), rcu);
5094
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
5095
-
5096
- /*
5097
- * Since we require blocking on struct_mutex to unbind the freed
5098
- * object from the GPU before releasing resources back to the
5099
- * system, we can not do that directly from the RCU callback (which may
5100
- * be a softirq context), but must instead then defer that work onto a
5101
- * kthread. We use the RCU callback rather than move the freed object
5102
- * directly onto the work queue so that we can mix between using the
5103
- * worker and performing frees directly from subsequent allocations for
5104
- * crude but effective memory throttling.
5105
- */
5106
- if (llist_add(&obj->freed, &i915->mm.free_list))
5107
- queue_work(i915->wq, &i915->mm.free_work);
5108
-}
5109
-
5110
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
5111
-{
5112
- struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
5113
-
5114
- if (obj->mm.quirked)
5115
- __i915_gem_object_unpin_pages(obj);
5116
-
5117
- if (discard_backing_storage(obj))
5118
- obj->mm.madv = I915_MADV_DONTNEED;
5119
-
5120
- /*
5121
- * Before we free the object, make sure any pure RCU-only
5122
- * read-side critical sections are complete, e.g.
5123
- * i915_gem_busy_ioctl(). For the corresponding synchronized
5124
- * lookup see i915_gem_object_lookup_rcu().
5125
- */
5126
- atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
5127
- call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
5128
-}
5129
-
5130
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
5131
-{
5132
- lockdep_assert_held(&obj->base.dev->struct_mutex);
5133
-
5134
- if (!i915_gem_object_has_active_reference(obj) &&
5135
- i915_gem_object_is_active(obj))
5136
- i915_gem_object_set_active_reference(obj);
5137
- else
5138
- i915_gem_object_put(obj);
5139
-}
5140
-
5141
-void i915_gem_sanitize(struct drm_i915_private *i915)
5142
-{
5143
- int err;
5144
-
5145
- GEM_TRACE("\n");
5146
-
5147
- mutex_lock(&i915->drm.struct_mutex);
5148
-
5149
- intel_runtime_pm_get(i915);
5150
- intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
5151
-
5152
- /*
5153
- * As we have just resumed the machine and woken the device up from
5154
- * deep PCI sleep (presumably D3_cold), assume the HW has been reset
5155
- * back to defaults, recovering from whatever wedged state we left it
5156
- * in and so worth trying to use the device once more.
5157
- */
5158
- if (i915_terminally_wedged(&i915->gpu_error))
5159
- i915_gem_unset_wedged(i915);
5160
-
5161
- /*
5162
- * If we inherit context state from the BIOS or earlier occupants
5163
- * of the GPU, the GPU may be in an inconsistent state when we
5164
- * try to take over. The only way to remove the earlier state
5165
- * is by resetting. However, resetting on earlier gen is tricky as
5166
- * it may impact the display and we are uncertain about the stability
5167
- * of the reset, so this could be applied to even earlier gen.
5168
- */
5169
- err = -ENODEV;
5170
- if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
5171
- err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
5172
- if (!err)
5173
- intel_engines_sanitize(i915);
5174
-
5175
- intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
5176
- intel_runtime_pm_put(i915);
5177
-
5178
- i915_gem_contexts_lost(i915);
5179
- mutex_unlock(&i915->drm.struct_mutex);
5180
-}
5181
-
5182
-int i915_gem_suspend(struct drm_i915_private *i915)
5183
-{
5184
- int ret;
5185
-
5186
- GEM_TRACE("\n");
5187
-
5188
- intel_runtime_pm_get(i915);
5189
- intel_suspend_gt_powersave(i915);
5190
-
5191
- mutex_lock(&i915->drm.struct_mutex);
5192
-
5193
- /*
5194
- * We have to flush all the executing contexts to main memory so
5195
- * that they can saved in the hibernation image. To ensure the last
5196
- * context image is coherent, we have to switch away from it. That
5197
- * leaves the i915->kernel_context still active when
5198
- * we actually suspend, and its image in memory may not match the GPU
5199
- * state. Fortunately, the kernel_context is disposable and we do
5200
- * not rely on its state.
5201
- */
5202
- if (!i915_terminally_wedged(&i915->gpu_error)) {
5203
- ret = i915_gem_switch_to_kernel_context(i915);
5204
- if (ret)
5205
- goto err_unlock;
5206
-
5207
- ret = i915_gem_wait_for_idle(i915,
5208
- I915_WAIT_INTERRUPTIBLE |
5209
- I915_WAIT_LOCKED |
5210
- I915_WAIT_FOR_IDLE_BOOST,
5211
- MAX_SCHEDULE_TIMEOUT);
5212
- if (ret && ret != -EIO)
5213
- goto err_unlock;
5214
-
5215
- assert_kernel_context_is_current(i915);
5216
- }
5217
- i915_retire_requests(i915); /* ensure we flush after wedging */
5218
-
5219
- mutex_unlock(&i915->drm.struct_mutex);
5220
-
5221
- intel_uc_suspend(i915);
5222
-
5223
- cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
5224
- cancel_delayed_work_sync(&i915->gt.retire_work);
5225
-
5226
- /*
5227
- * As the idle_work is rearming if it detects a race, play safe and
5228
- * repeat the flush until it is definitely idle.
5229
- */
5230
- drain_delayed_work(&i915->gt.idle_work);
5231
-
5232
- /*
5233
- * Assert that we successfully flushed all the work and
5234
- * reset the GPU back to its idle, low power state.
5235
- */
5236
- WARN_ON(i915->gt.awake);
5237
- if (WARN_ON(!intel_engines_are_idle(i915)))
5238
- i915_gem_set_wedged(i915); /* no hope, discard everything */
5239
-
5240
- intel_runtime_pm_put(i915);
5241
- return 0;
5242
-
5243
-err_unlock:
5244
- mutex_unlock(&i915->drm.struct_mutex);
5245
- intel_runtime_pm_put(i915);
5246
- return ret;
5247
-}
5248
-
5249
-void i915_gem_suspend_late(struct drm_i915_private *i915)
5250
-{
5251
- struct drm_i915_gem_object *obj;
5252
- struct list_head *phases[] = {
5253
- &i915->mm.unbound_list,
5254
- &i915->mm.bound_list,
5255
- NULL
5256
- }, **phase;
5257
-
5258
- /*
5259
- * Neither the BIOS, ourselves or any other kernel
5260
- * expects the system to be in execlists mode on startup,
5261
- * so we need to reset the GPU back to legacy mode. And the only
5262
- * known way to disable logical contexts is through a GPU reset.
5263
- *
5264
- * So in order to leave the system in a known default configuration,
5265
- * always reset the GPU upon unload and suspend. Afterwards we then
5266
- * clean up the GEM state tracking, flushing off the requests and
5267
- * leaving the system in a known idle state.
5268
- *
5269
- * Note that is of the upmost importance that the GPU is idle and
5270
- * all stray writes are flushed *before* we dismantle the backing
5271
- * storage for the pinned objects.
5272
- *
5273
- * However, since we are uncertain that resetting the GPU on older
5274
- * machines is a good idea, we don't - just in case it leaves the
5275
- * machine in an unusable condition.
5276
- */
5277
-
5278
- mutex_lock(&i915->drm.struct_mutex);
5279
- for (phase = phases; *phase; phase++) {
5280
- list_for_each_entry(obj, *phase, mm.link)
5281
- WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
5282
- }
5283
- mutex_unlock(&i915->drm.struct_mutex);
5284
-
5285
- intel_uc_sanitize(i915);
5286
- i915_gem_sanitize(i915);
5287
-}
5288
-
5289
-void i915_gem_resume(struct drm_i915_private *i915)
5290
-{
5291
- GEM_TRACE("\n");
5292
-
5293
- WARN_ON(i915->gt.awake);
5294
-
5295
- mutex_lock(&i915->drm.struct_mutex);
5296
- intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
5297
-
5298
- i915_gem_restore_gtt_mappings(i915);
5299
- i915_gem_restore_fences(i915);
5300
-
5301
- /*
5302
- * As we didn't flush the kernel context before suspend, we cannot
5303
- * guarantee that the context image is complete. So let's just reset
5304
- * it and start again.
5305
- */
5306
- i915->gt.resume(i915);
5307
-
5308
- if (i915_gem_init_hw(i915))
5309
- goto err_wedged;
5310
-
5311
- intel_uc_resume(i915);
5312
-
5313
- /* Always reload a context for powersaving. */
5314
- if (i915_gem_switch_to_kernel_context(i915))
5315
- goto err_wedged;
5316
-
5317
-out_unlock:
5318
- intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
5319
- mutex_unlock(&i915->drm.struct_mutex);
5320
- return;
5321
-
5322
-err_wedged:
5323
- if (!i915_terminally_wedged(&i915->gpu_error)) {
5324
- DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
5325
- i915_gem_set_wedged(i915);
5326
- }
5327
- goto out_unlock;
5328
-}
5329
-
5330
-void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
5331
-{
5332
- if (INTEL_GEN(dev_priv) < 5 ||
5333
- dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
5334
- return;
5335
-
5336
- I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
5337
- DISP_TILE_SURFACE_SWIZZLING);
5338
-
5339
- if (IS_GEN5(dev_priv))
5340
- return;
5341
-
5342
- I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
5343
- if (IS_GEN6(dev_priv))
5344
- I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
5345
- else if (IS_GEN7(dev_priv))
5346
- I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
5347
- else if (IS_GEN8(dev_priv))
5348
- I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
5349
- else
5350
- BUG();
5351
-}
5352
-
5353
-static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
5354
-{
5355
- I915_WRITE(RING_CTL(base), 0);
5356
- I915_WRITE(RING_HEAD(base), 0);
5357
- I915_WRITE(RING_TAIL(base), 0);
5358
- I915_WRITE(RING_START(base), 0);
5359
-}
5360
-
5361
-static void init_unused_rings(struct drm_i915_private *dev_priv)
5362
-{
5363
- if (IS_I830(dev_priv)) {
5364
- init_unused_ring(dev_priv, PRB1_BASE);
5365
- init_unused_ring(dev_priv, SRB0_BASE);
5366
- init_unused_ring(dev_priv, SRB1_BASE);
5367
- init_unused_ring(dev_priv, SRB2_BASE);
5368
- init_unused_ring(dev_priv, SRB3_BASE);
5369
- } else if (IS_GEN2(dev_priv)) {
5370
- init_unused_ring(dev_priv, SRB0_BASE);
5371
- init_unused_ring(dev_priv, SRB1_BASE);
5372
- } else if (IS_GEN3(dev_priv)) {
5373
- init_unused_ring(dev_priv, PRB1_BASE);
5374
- init_unused_ring(dev_priv, PRB2_BASE);
5375
- }
5376
-}
5377
-
5378
-static int __i915_gem_restart_engines(void *data)
5379
-{
5380
- struct drm_i915_private *i915 = data;
5381
- struct intel_engine_cs *engine;
5382
- enum intel_engine_id id;
5383
- int err;
5384
-
5385
- for_each_engine(engine, i915, id) {
5386
- err = engine->init_hw(engine);
5387
- if (err) {
5388
- DRM_ERROR("Failed to restart %s (%d)\n",
5389
- engine->name, err);
5390
- return err;
5391
- }
5392
- }
5393
-
5394
- return 0;
5395
-}
5396
-
5397
-int i915_gem_init_hw(struct drm_i915_private *dev_priv)
5398
-{
5399
- int ret;
5400
-
5401
- dev_priv->gt.last_init_time = ktime_get();
5402
-
5403
- /* Double layer security blanket, see i915_gem_init() */
5404
- intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5405
-
5406
- if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
5407
- I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
5408
-
5409
- if (IS_HASWELL(dev_priv))
5410
- I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
5411
- LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
5412
-
5413
- if (HAS_PCH_NOP(dev_priv)) {
5414
- if (IS_IVYBRIDGE(dev_priv)) {
5415
- u32 temp = I915_READ(GEN7_MSG_CTL);
5416
- temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
5417
- I915_WRITE(GEN7_MSG_CTL, temp);
5418
- } else if (INTEL_GEN(dev_priv) >= 7) {
5419
- u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
5420
- temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
5421
- I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
5422
- }
5423
- }
5424
-
5425
- intel_gt_workarounds_apply(dev_priv);
5426
-
5427
- i915_gem_init_swizzling(dev_priv);
5428
-
5429
- /*
5430
- * At least 830 can leave some of the unused rings
5431
- * "active" (ie. head != tail) after resume which
5432
- * will prevent c3 entry. Makes sure all unused rings
5433
- * are totally idle.
5434
- */
5435
- init_unused_rings(dev_priv);
5436
-
5437
- BUG_ON(!dev_priv->kernel_context);
5438
- if (i915_terminally_wedged(&dev_priv->gpu_error)) {
5439
- ret = -EIO;
5440
- goto out;
5441
- }
5442
-
5443
- ret = i915_ppgtt_init_hw(dev_priv);
5444
- if (ret) {
5445
- DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
5446
- goto out;
5447
- }
5448
-
5449
- ret = intel_wopcm_init_hw(&dev_priv->wopcm);
5450
- if (ret) {
5451
- DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
5452
- goto out;
5453
- }
5454
-
5455
- /* We can't enable contexts until all firmware is loaded */
5456
- ret = intel_uc_init_hw(dev_priv);
5457
- if (ret) {
5458
- DRM_ERROR("Enabling uc failed (%d)\n", ret);
5459
- goto out;
5460
- }
5461
-
5462
- intel_mocs_init_l3cc_table(dev_priv);
5463
-
5464
- /* Only when the HW is re-initialised, can we replay the requests */
5465
- ret = __i915_gem_restart_engines(dev_priv);
5466
- if (ret)
5467
- goto cleanup_uc;
5468
-
5469
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5470
-
5471
- return 0;
5472
-
5473
-cleanup_uc:
5474
- intel_uc_fini_hw(dev_priv);
5475
-out:
5476
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5477
-
5478
- return ret;
5479
-}
5480
-
5481
-static int __intel_engines_record_defaults(struct drm_i915_private *i915)
5482
-{
5483
- struct i915_gem_context *ctx;
5484
- struct intel_engine_cs *engine;
5485
- enum intel_engine_id id;
5486
- int err;
5487
-
5488
- /*
5489
- * As we reset the gpu during very early sanitisation, the current
5490
- * register state on the GPU should reflect its defaults values.
5491
- * We load a context onto the hw (with restore-inhibit), then switch
5492
- * over to a second context to save that default register state. We
5493
- * can then prime every new context with that state so they all start
5494
- * from the same default HW values.
5495
- */
5496
-
5497
- ctx = i915_gem_context_create_kernel(i915, 0);
5498
- if (IS_ERR(ctx))
5499
- return PTR_ERR(ctx);
5500
-
5501
- for_each_engine(engine, i915, id) {
5502
- struct i915_request *rq;
5503
-
5504
- rq = i915_request_alloc(engine, ctx);
5505
- if (IS_ERR(rq)) {
5506
- err = PTR_ERR(rq);
5507
- goto out_ctx;
5508
- }
5509
-
5510
- err = 0;
5511
- if (engine->init_context)
5512
- err = engine->init_context(rq);
5513
-
5514
- i915_request_add(rq);
5515
- if (err)
5516
- goto err_active;
5517
- }
5518
-
5519
- err = i915_gem_switch_to_kernel_context(i915);
5520
- if (err)
5521
- goto err_active;
5522
-
5523
- if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) {
5524
- i915_gem_set_wedged(i915);
5525
- err = -EIO; /* Caller will declare us wedged */
5526
- goto err_active;
5527
- }
5528
-
5529
- assert_kernel_context_is_current(i915);
5530
-
5531
- for_each_engine(engine, i915, id) {
5532
- struct i915_vma *state;
5533
-
5534
- state = to_intel_context(ctx, engine)->state;
5535
- if (!state)
5536
- continue;
5537
-
5538
- /*
5539
- * As we will hold a reference to the logical state, it will
5540
- * not be torn down with the context, and importantly the
5541
- * object will hold onto its vma (making it possible for a
5542
- * stray GTT write to corrupt our defaults). Unmap the vma
5543
- * from the GTT to prevent such accidents and reclaim the
5544
- * space.
5545
- */
5546
- err = i915_vma_unbind(state);
5547
- if (err)
5548
- goto err_active;
5549
-
5550
- err = i915_gem_object_set_to_cpu_domain(state->obj, false);
5551
- if (err)
5552
- goto err_active;
5553
-
5554
- engine->default_state = i915_gem_object_get(state->obj);
5555
- }
5556
-
5557
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
5558
- unsigned int found = intel_engines_has_context_isolation(i915);
5559
-
5560
- /*
5561
- * Make sure that classes with multiple engine instances all
5562
- * share the same basic configuration.
5563
- */
5564
- for_each_engine(engine, i915, id) {
5565
- unsigned int bit = BIT(engine->uabi_class);
5566
- unsigned int expected = engine->default_state ? bit : 0;
5567
-
5568
- if ((found & bit) != expected) {
5569
- DRM_ERROR("mismatching default context state for class %d on engine %s\n",
5570
- engine->uabi_class, engine->name);
5571
- }
5572
- }
5573
- }
5574
-
5575
-out_ctx:
5576
- i915_gem_context_set_closed(ctx);
5577
- i915_gem_context_put(ctx);
5578
- return err;
5579
-
5580
-err_active:
5581
- /*
5582
- * If we have to abandon now, we expect the engines to be idle
5583
- * and ready to be torn-down. First try to flush any remaining
5584
- * request, ensure we are pointing at the kernel context and
5585
- * then remove it.
5586
- */
5587
- if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
5588
- goto out_ctx;
5589
-
5590
- if (WARN_ON(i915_gem_wait_for_idle(i915,
5591
- I915_WAIT_LOCKED,
5592
- MAX_SCHEDULE_TIMEOUT)))
5593
- goto out_ctx;
5594
-
5595
- i915_gem_contexts_lost(i915);
5596
- goto out_ctx;
5597
-}
5598
-
55991107 int i915_gem_init(struct drm_i915_private *dev_priv)
56001108 {
56011109 int ret;
....@@ -5605,64 +1113,18 @@
56051113 mkwrite_device_info(dev_priv)->page_sizes =
56061114 I915_GTT_PAGE_SIZE_4K;
56071115
5608
- dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
5609
-
5610
- if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
5611
- dev_priv->gt.resume = intel_lr_context_resume;
5612
- dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
5613
- } else {
5614
- dev_priv->gt.resume = intel_legacy_submission_resume;
5615
- dev_priv->gt.cleanup_engine = intel_engine_cleanup;
5616
- }
5617
-
56181116 ret = i915_gem_init_userptr(dev_priv);
56191117 if (ret)
56201118 return ret;
56211119
5622
- ret = intel_uc_init_misc(dev_priv);
5623
- if (ret)
5624
- return ret;
1120
+ intel_uc_fetch_firmwares(&dev_priv->gt.uc);
1121
+ intel_wopcm_init(&dev_priv->wopcm);
56251122
5626
- ret = intel_wopcm_init(&dev_priv->wopcm);
5627
- if (ret)
5628
- goto err_uc_misc;
5629
-
5630
- /* This is just a security blanket to placate dragons.
5631
- * On some systems, we very sporadically observe that the first TLBs
5632
- * used by the CS may be stale, despite us poking the TLB reset. If
5633
- * we hold the forcewake during initialisation these problems
5634
- * just magically go away.
5635
- */
5636
- mutex_lock(&dev_priv->drm.struct_mutex);
5637
- intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5638
-
5639
- ret = i915_gem_init_ggtt(dev_priv);
1123
+ ret = i915_init_ggtt(dev_priv);
56401124 if (ret) {
56411125 GEM_BUG_ON(ret == -EIO);
56421126 goto err_unlock;
56431127 }
5644
-
5645
- ret = i915_gem_contexts_init(dev_priv);
5646
- if (ret) {
5647
- GEM_BUG_ON(ret == -EIO);
5648
- goto err_ggtt;
5649
- }
5650
-
5651
- ret = intel_engines_init(dev_priv);
5652
- if (ret) {
5653
- GEM_BUG_ON(ret == -EIO);
5654
- goto err_context;
5655
- }
5656
-
5657
- intel_init_gt_powersave(dev_priv);
5658
-
5659
- ret = intel_uc_init(dev_priv);
5660
- if (ret)
5661
- goto err_pm;
5662
-
5663
- ret = i915_gem_init_hw(dev_priv);
5664
- if (ret)
5665
- goto err_uc_init;
56661128
56671129 /*
56681130 * Despite its name intel_init_clock_gating applies both display
....@@ -5675,22 +1137,9 @@
56751137 */
56761138 intel_init_clock_gating(dev_priv);
56771139
5678
- ret = __intel_engines_record_defaults(dev_priv);
1140
+ ret = intel_gt_init(&dev_priv->gt);
56791141 if (ret)
5680
- goto err_init_hw;
5681
-
5682
- if (i915_inject_load_failure()) {
5683
- ret = -ENODEV;
5684
- goto err_init_hw;
5685
- }
5686
-
5687
- if (i915_inject_load_failure()) {
5688
- ret = -EIO;
5689
- goto err_init_hw;
5690
- }
5691
-
5692
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5693
- mutex_unlock(&dev_priv->drm.struct_mutex);
1142
+ goto err_unlock;
56941143
56951144 return 0;
56961145
....@@ -5700,222 +1149,96 @@
57001149 * HW as irrevisibly wedged, but keep enough state around that the
57011150 * driver doesn't explode during runtime.
57021151 */
5703
-err_init_hw:
5704
- mutex_unlock(&dev_priv->drm.struct_mutex);
5705
-
5706
- WARN_ON(i915_gem_suspend(dev_priv));
5707
- i915_gem_suspend_late(dev_priv);
5708
-
1152
+err_unlock:
57091153 i915_gem_drain_workqueue(dev_priv);
57101154
5711
- mutex_lock(&dev_priv->drm.struct_mutex);
5712
- intel_uc_fini_hw(dev_priv);
5713
-err_uc_init:
5714
- intel_uc_fini(dev_priv);
5715
-err_pm:
57161155 if (ret != -EIO) {
5717
- intel_cleanup_gt_powersave(dev_priv);
5718
- i915_gem_cleanup_engines(dev_priv);
5719
- }
5720
-err_context:
5721
- if (ret != -EIO)
5722
- i915_gem_contexts_fini(dev_priv);
5723
-err_ggtt:
5724
-err_unlock:
5725
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5726
- mutex_unlock(&dev_priv->drm.struct_mutex);
5727
-
5728
-err_uc_misc:
5729
- intel_uc_fini_misc(dev_priv);
5730
-
5731
- if (ret != -EIO)
1156
+ intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
57321157 i915_gem_cleanup_userptr(dev_priv);
1158
+ }
57331159
57341160 if (ret == -EIO) {
5735
- mutex_lock(&dev_priv->drm.struct_mutex);
5736
-
57371161 /*
5738
- * Allow engine initialisation to fail by marking the GPU as
5739
- * wedged. But we only want to do this where the GPU is angry,
1162
+ * Allow engines or uC initialisation to fail by marking the GPU
1163
+ * as wedged. But we only want to do this when the GPU is angry,
57401164 * for all other failure, such as an allocation failure, bail.
57411165 */
5742
- if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
5743
- i915_load_error(dev_priv,
5744
- "Failed to initialize GPU, declaring it wedged!\n");
5745
- i915_gem_set_wedged(dev_priv);
1166
+ if (!intel_gt_is_wedged(&dev_priv->gt)) {
1167
+ i915_probe_error(dev_priv,
1168
+ "Failed to initialize GPU, declaring it wedged!\n");
1169
+ intel_gt_set_wedged(&dev_priv->gt);
57461170 }
57471171
57481172 /* Minimal basic recovery for KMS */
57491173 ret = i915_ggtt_enable_hw(dev_priv);
5750
- i915_gem_restore_gtt_mappings(dev_priv);
5751
- i915_gem_restore_fences(dev_priv);
1174
+ i915_ggtt_resume(&dev_priv->ggtt);
57521175 intel_init_clock_gating(dev_priv);
5753
-
5754
- mutex_unlock(&dev_priv->drm.struct_mutex);
57551176 }
57561177
57571178 i915_gem_drain_freed_objects(dev_priv);
57581179 return ret;
57591180 }
57601181
5761
-void i915_gem_fini(struct drm_i915_private *dev_priv)
1182
+void i915_gem_driver_register(struct drm_i915_private *i915)
57621183 {
1184
+ i915_gem_driver_register__shrinker(i915);
1185
+
1186
+ intel_engines_driver_register(i915);
1187
+}
1188
+
1189
+void i915_gem_driver_unregister(struct drm_i915_private *i915)
1190
+{
1191
+ i915_gem_driver_unregister__shrinker(i915);
1192
+}
1193
+
1194
+void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
1195
+{
1196
+ intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
1197
+
57631198 i915_gem_suspend_late(dev_priv);
5764
- intel_disable_gt_powersave(dev_priv);
1199
+ intel_gt_driver_remove(&dev_priv->gt);
1200
+ dev_priv->uabi_engines = RB_ROOT;
57651201
57661202 /* Flush any outstanding unpin_work. */
57671203 i915_gem_drain_workqueue(dev_priv);
57681204
5769
- mutex_lock(&dev_priv->drm.struct_mutex);
5770
- intel_uc_fini_hw(dev_priv);
5771
- intel_uc_fini(dev_priv);
5772
- i915_gem_cleanup_engines(dev_priv);
5773
- i915_gem_contexts_fini(dev_priv);
5774
- mutex_unlock(&dev_priv->drm.struct_mutex);
1205
+ i915_gem_drain_freed_objects(dev_priv);
1206
+}
57751207
5776
- intel_cleanup_gt_powersave(dev_priv);
1208
+void i915_gem_driver_release(struct drm_i915_private *dev_priv)
1209
+{
1210
+ i915_gem_driver_release__contexts(dev_priv);
57771211
5778
- intel_uc_fini_misc(dev_priv);
1212
+ intel_gt_driver_release(&dev_priv->gt);
1213
+
1214
+ intel_wa_list_free(&dev_priv->gt_wa_list);
1215
+
1216
+ intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
57791217 i915_gem_cleanup_userptr(dev_priv);
57801218
57811219 i915_gem_drain_freed_objects(dev_priv);
57821220
5783
- WARN_ON(!list_empty(&dev_priv->contexts.list));
5784
-}
5785
-
5786
-void i915_gem_init_mmio(struct drm_i915_private *i915)
5787
-{
5788
- i915_gem_sanitize(i915);
5789
-}
5790
-
5791
-void
5792
-i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
5793
-{
5794
- struct intel_engine_cs *engine;
5795
- enum intel_engine_id id;
5796
-
5797
- for_each_engine(engine, dev_priv, id)
5798
- dev_priv->gt.cleanup_engine(engine);
5799
-}
5800
-
5801
-void
5802
-i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5803
-{
5804
- int i;
5805
-
5806
- if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5807
- !IS_CHERRYVIEW(dev_priv))
5808
- dev_priv->num_fence_regs = 32;
5809
- else if (INTEL_GEN(dev_priv) >= 4 ||
5810
- IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
5811
- IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
5812
- dev_priv->num_fence_regs = 16;
5813
- else
5814
- dev_priv->num_fence_regs = 8;
5815
-
5816
- if (intel_vgpu_active(dev_priv))
5817
- dev_priv->num_fence_regs =
5818
- I915_READ(vgtif_reg(avail_rs.fence_num));
5819
-
5820
- /* Initialize fence registers to zero */
5821
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
5822
- struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
5823
-
5824
- fence->i915 = dev_priv;
5825
- fence->id = i;
5826
- list_add_tail(&fence->link, &dev_priv->mm.fence_list);
5827
- }
5828
- i915_gem_restore_fences(dev_priv);
5829
-
5830
- i915_gem_detect_bit_6_swizzle(dev_priv);
1221
+ drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list));
58311222 }
58321223
58331224 static void i915_gem_init__mm(struct drm_i915_private *i915)
58341225 {
5835
- spin_lock_init(&i915->mm.object_stat_lock);
58361226 spin_lock_init(&i915->mm.obj_lock);
5837
- spin_lock_init(&i915->mm.free_lock);
58381227
58391228 init_llist_head(&i915->mm.free_list);
58401229
5841
- INIT_LIST_HEAD(&i915->mm.unbound_list);
5842
- INIT_LIST_HEAD(&i915->mm.bound_list);
5843
- INIT_LIST_HEAD(&i915->mm.fence_list);
5844
- INIT_LIST_HEAD(&i915->mm.userfault_list);
1230
+ INIT_LIST_HEAD(&i915->mm.purge_list);
1231
+ INIT_LIST_HEAD(&i915->mm.shrink_list);
58451232
5846
- INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
1233
+ i915_gem_init__objects(i915);
58471234 }
58481235
5849
-int i915_gem_init_early(struct drm_i915_private *dev_priv)
1236
+void i915_gem_init_early(struct drm_i915_private *dev_priv)
58501237 {
5851
- int err = -ENOMEM;
5852
-
5853
- dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
5854
- if (!dev_priv->objects)
5855
- goto err_out;
5856
-
5857
- dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
5858
- if (!dev_priv->vmas)
5859
- goto err_objects;
5860
-
5861
- dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
5862
- if (!dev_priv->luts)
5863
- goto err_vmas;
5864
-
5865
- dev_priv->requests = KMEM_CACHE(i915_request,
5866
- SLAB_HWCACHE_ALIGN |
5867
- SLAB_RECLAIM_ACCOUNT |
5868
- SLAB_TYPESAFE_BY_RCU);
5869
- if (!dev_priv->requests)
5870
- goto err_luts;
5871
-
5872
- dev_priv->dependencies = KMEM_CACHE(i915_dependency,
5873
- SLAB_HWCACHE_ALIGN |
5874
- SLAB_RECLAIM_ACCOUNT);
5875
- if (!dev_priv->dependencies)
5876
- goto err_requests;
5877
-
5878
- dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
5879
- if (!dev_priv->priorities)
5880
- goto err_dependencies;
5881
-
5882
- INIT_LIST_HEAD(&dev_priv->gt.timelines);
5883
- INIT_LIST_HEAD(&dev_priv->gt.active_rings);
5884
- INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
5885
-
58861238 i915_gem_init__mm(dev_priv);
5887
-
5888
- INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5889
- i915_gem_retire_work_handler);
5890
- INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5891
- i915_gem_idle_work_handler);
5892
- init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5893
- init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5894
-
5895
- atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
1239
+ i915_gem_init__contexts(dev_priv);
58961240
58971241 spin_lock_init(&dev_priv->fb_tracking.lock);
5898
-
5899
- mutex_init(&dev_priv->tlb_invalidate_lock);
5900
-
5901
- err = i915_gemfs_init(dev_priv);
5902
- if (err)
5903
- DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
5904
-
5905
- return 0;
5906
-
5907
-err_dependencies:
5908
- kmem_cache_destroy(dev_priv->dependencies);
5909
-err_requests:
5910
- kmem_cache_destroy(dev_priv->requests);
5911
-err_luts:
5912
- kmem_cache_destroy(dev_priv->luts);
5913
-err_vmas:
5914
- kmem_cache_destroy(dev_priv->vmas);
5915
-err_objects:
5916
- kmem_cache_destroy(dev_priv->objects);
5917
-err_out:
5918
- return err;
59191242 }
59201243
59211244 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
....@@ -5923,20 +1246,7 @@
59231246 i915_gem_drain_freed_objects(dev_priv);
59241247 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
59251248 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
5926
- WARN_ON(dev_priv->mm.object_count);
5927
- WARN_ON(!list_empty(&dev_priv->gt.timelines));
5928
-
5929
- kmem_cache_destroy(dev_priv->priorities);
5930
- kmem_cache_destroy(dev_priv->dependencies);
5931
- kmem_cache_destroy(dev_priv->requests);
5932
- kmem_cache_destroy(dev_priv->luts);
5933
- kmem_cache_destroy(dev_priv->vmas);
5934
- kmem_cache_destroy(dev_priv->objects);
5935
-
5936
- /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
5937
- rcu_barrier();
5938
-
5939
- i915_gemfs_fini(dev_priv);
1249
+ drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count);
59401250 }
59411251
59421252 int i915_gem_freeze(struct drm_i915_private *dev_priv)
....@@ -5952,11 +1262,7 @@
59521262 int i915_gem_freeze_late(struct drm_i915_private *i915)
59531263 {
59541264 struct drm_i915_gem_object *obj;
5955
- struct list_head *phases[] = {
5956
- &i915->mm.unbound_list,
5957
- &i915->mm.bound_list,
5958
- NULL
5959
- }, **phase;
1265
+ intel_wakeref_t wakeref;
59601266
59611267 /*
59621268 * Called just before we write the hibernation image.
....@@ -5973,32 +1279,21 @@
59731279 * the objects as well, see i915_gem_freeze()
59741280 */
59751281
5976
- i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
1282
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1283
+
1284
+ i915_gem_shrink(i915, -1UL, NULL, ~0);
59771285 i915_gem_drain_freed_objects(i915);
59781286
5979
- mutex_lock(&i915->drm.struct_mutex);
5980
- for (phase = phases; *phase; phase++) {
5981
- list_for_each_entry(obj, *phase, mm.link)
5982
- WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
1287
+ list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
1288
+ i915_gem_object_lock(obj, NULL);
1289
+ drm_WARN_ON(&i915->drm,
1290
+ i915_gem_object_set_to_cpu_domain(obj, true));
1291
+ i915_gem_object_unlock(obj);
59831292 }
5984
- mutex_unlock(&i915->drm.struct_mutex);
1293
+
1294
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
59851295
59861296 return 0;
5987
-}
5988
-
5989
-void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5990
-{
5991
- struct drm_i915_file_private *file_priv = file->driver_priv;
5992
- struct i915_request *request;
5993
-
5994
- /* Clean up our request list when the client is going away, so that
5995
- * later retire_requests won't dereference our soon-to-be-gone
5996
- * file_priv.
5997
- */
5998
- spin_lock(&file_priv->mm.lock);
5999
- list_for_each_entry(request, &file_priv->mm.request_list, client_link)
6000
- request->file_priv = NULL;
6001
- spin_unlock(&file_priv->mm.lock);
60021297 }
60031298
60041299 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
....@@ -6016,9 +1311,6 @@
60161311 file_priv->dev_priv = i915;
60171312 file_priv->file = file;
60181313
6019
- spin_lock_init(&file_priv->mm.lock);
6020
- INIT_LIST_HEAD(&file_priv->mm.request_list);
6021
-
60221314 file_priv->bsd_engine = -1;
60231315 file_priv->hang_timestamp = jiffies;
60241316
....@@ -6029,311 +1321,59 @@
60291321 return ret;
60301322 }
60311323
6032
-/**
6033
- * i915_gem_track_fb - update frontbuffer tracking
6034
- * @old: current GEM buffer for the frontbuffer slots
6035
- * @new: new GEM buffer for the frontbuffer slots
6036
- * @frontbuffer_bits: bitmask of frontbuffer slots
6037
- *
6038
- * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
6039
- * from @old and setting them in @new. Both @old and @new can be NULL.
6040
- */
6041
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
6042
- struct drm_i915_gem_object *new,
6043
- unsigned frontbuffer_bits)
1324
+void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
60441325 {
6045
- /* Control of individual bits within the mask are guarded by
6046
- * the owning plane->mutex, i.e. we can never see concurrent
6047
- * manipulation of individual bits. But since the bitfield as a whole
6048
- * is updated using RMW, we need to use atomics in order to update
6049
- * the bits.
6050
- */
6051
- BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
6052
- sizeof(atomic_t) * BITS_PER_BYTE);
6053
-
6054
- if (old) {
6055
- WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
6056
- atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
6057
- }
6058
-
6059
- if (new) {
6060
- WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
6061
- atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
6062
- }
1326
+ ww_acquire_init(&ww->ctx, &reservation_ww_class);
1327
+ INIT_LIST_HEAD(&ww->obj_list);
1328
+ ww->intr = intr;
1329
+ ww->contended = NULL;
60631330 }
60641331
6065
-/* Allocate a new GEM object and fill it with the supplied data */
6066
-struct drm_i915_gem_object *
6067
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
6068
- const void *data, size_t size)
1332
+static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
60691333 {
60701334 struct drm_i915_gem_object *obj;
6071
- struct file *file;
6072
- size_t offset;
6073
- int err;
60741335
6075
- obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
6076
- if (IS_ERR(obj))
6077
- return obj;
6078
-
6079
- GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
6080
-
6081
- file = obj->base.filp;
6082
- offset = 0;
6083
- do {
6084
- unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
6085
- struct page *page;
6086
- void *pgdata, *vaddr;
6087
-
6088
- err = pagecache_write_begin(file, file->f_mapping,
6089
- offset, len, 0,
6090
- &page, &pgdata);
6091
- if (err < 0)
6092
- goto fail;
6093
-
6094
- vaddr = kmap(page);
6095
- memcpy(vaddr, data, len);
6096
- kunmap(page);
6097
-
6098
- err = pagecache_write_end(file, file->f_mapping,
6099
- offset, len, len,
6100
- page, pgdata);
6101
- if (err < 0)
6102
- goto fail;
6103
-
6104
- size -= len;
6105
- data += len;
6106
- offset += len;
6107
- } while (size);
6108
-
6109
- return obj;
6110
-
6111
-fail:
6112
- i915_gem_object_put(obj);
6113
- return ERR_PTR(err);
6114
-}
6115
-
6116
-struct scatterlist *
6117
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
6118
- unsigned int n,
6119
- unsigned int *offset)
6120
-{
6121
- struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
6122
- struct scatterlist *sg;
6123
- unsigned int idx, count;
6124
-
6125
- might_sleep();
6126
- GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
6127
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
6128
-
6129
- /* As we iterate forward through the sg, we record each entry in a
6130
- * radixtree for quick repeated (backwards) lookups. If we have seen
6131
- * this index previously, we will have an entry for it.
6132
- *
6133
- * Initial lookup is O(N), but this is amortized to O(1) for
6134
- * sequential page access (where each new request is consecutive
6135
- * to the previous one). Repeated lookups are O(lg(obj->base.size)),
6136
- * i.e. O(1) with a large constant!
6137
- */
6138
- if (n < READ_ONCE(iter->sg_idx))
6139
- goto lookup;
6140
-
6141
- mutex_lock(&iter->lock);
6142
-
6143
- /* We prefer to reuse the last sg so that repeated lookup of this
6144
- * (or the subsequent) sg are fast - comparing against the last
6145
- * sg is faster than going through the radixtree.
6146
- */
6147
-
6148
- sg = iter->sg_pos;
6149
- idx = iter->sg_idx;
6150
- count = __sg_page_count(sg);
6151
-
6152
- while (idx + count <= n) {
6153
- unsigned long exception, i;
6154
- int ret;
6155
-
6156
- /* If we cannot allocate and insert this entry, or the
6157
- * individual pages from this range, cancel updating the
6158
- * sg_idx so that on this lookup we are forced to linearly
6159
- * scan onwards, but on future lookups we will try the
6160
- * insertion again (in which case we need to be careful of
6161
- * the error return reporting that we have already inserted
6162
- * this index).
6163
- */
6164
- ret = radix_tree_insert(&iter->radix, idx, sg);
6165
- if (ret && ret != -EEXIST)
6166
- goto scan;
6167
-
6168
- exception =
6169
- RADIX_TREE_EXCEPTIONAL_ENTRY |
6170
- idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
6171
- for (i = 1; i < count; i++) {
6172
- ret = radix_tree_insert(&iter->radix, idx + i,
6173
- (void *)exception);
6174
- if (ret && ret != -EEXIST)
6175
- goto scan;
6176
- }
6177
-
6178
- idx += count;
6179
- sg = ____sg_next(sg);
6180
- count = __sg_page_count(sg);
1336
+ while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
1337
+ list_del(&obj->obj_link);
1338
+ i915_gem_object_unlock(obj);
61811339 }
6182
-
6183
-scan:
6184
- iter->sg_pos = sg;
6185
- iter->sg_idx = idx;
6186
-
6187
- mutex_unlock(&iter->lock);
6188
-
6189
- if (unlikely(n < idx)) /* insertion completed by another thread */
6190
- goto lookup;
6191
-
6192
- /* In case we failed to insert the entry into the radixtree, we need
6193
- * to look beyond the current sg.
6194
- */
6195
- while (idx + count <= n) {
6196
- idx += count;
6197
- sg = ____sg_next(sg);
6198
- count = __sg_page_count(sg);
6199
- }
6200
-
6201
- *offset = n - idx;
6202
- return sg;
6203
-
6204
-lookup:
6205
- rcu_read_lock();
6206
-
6207
- sg = radix_tree_lookup(&iter->radix, n);
6208
- GEM_BUG_ON(!sg);
6209
-
6210
- /* If this index is in the middle of multi-page sg entry,
6211
- * the radixtree will contain an exceptional entry that points
6212
- * to the start of that range. We will return the pointer to
6213
- * the base page and the offset of this page within the
6214
- * sg entry's range.
6215
- */
6216
- *offset = 0;
6217
- if (unlikely(radix_tree_exception(sg))) {
6218
- unsigned long base =
6219
- (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
6220
-
6221
- sg = radix_tree_lookup(&iter->radix, base);
6222
- GEM_BUG_ON(!sg);
6223
-
6224
- *offset = n - base;
6225
- }
6226
-
6227
- rcu_read_unlock();
6228
-
6229
- return sg;
62301340 }
62311341
6232
-struct page *
6233
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
1342
+void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
62341343 {
6235
- struct scatterlist *sg;
6236
- unsigned int offset;
6237
-
6238
- GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
6239
-
6240
- sg = i915_gem_object_get_sg(obj, n, &offset);
6241
- return nth_page(sg_page(sg), offset);
1344
+ list_del(&obj->obj_link);
1345
+ i915_gem_object_unlock(obj);
62421346 }
62431347
6244
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
6245
-struct page *
6246
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
6247
- unsigned int n)
1348
+void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
62481349 {
6249
- struct page *page;
6250
-
6251
- page = i915_gem_object_get_page(obj, n);
6252
- if (!obj->mm.dirty)
6253
- set_page_dirty(page);
6254
-
6255
- return page;
1350
+ i915_gem_ww_ctx_unlock_all(ww);
1351
+ WARN_ON(ww->contended);
1352
+ ww_acquire_fini(&ww->ctx);
62561353 }
62571354
6258
-dma_addr_t
6259
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
6260
- unsigned long n)
1355
+int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
62611356 {
6262
- struct scatterlist *sg;
6263
- unsigned int offset;
1357
+ int ret = 0;
62641358
6265
- sg = i915_gem_object_get_sg(obj, n, &offset);
6266
- return sg_dma_address(sg) + (offset << PAGE_SHIFT);
6267
-}
6268
-
6269
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
6270
-{
6271
- struct sg_table *pages;
6272
- int err;
6273
-
6274
- if (align > obj->base.size)
1359
+ if (WARN_ON(!ww->contended))
62751360 return -EINVAL;
62761361
6277
- if (obj->ops == &i915_gem_phys_ops)
6278
- return 0;
1362
+ i915_gem_ww_ctx_unlock_all(ww);
1363
+ if (ww->intr)
1364
+ ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
1365
+ else
1366
+ dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
62791367
6280
- if (obj->ops != &i915_gem_object_ops)
6281
- return -EINVAL;
1368
+ if (!ret)
1369
+ list_add_tail(&ww->contended->obj_link, &ww->obj_list);
62821370
6283
- err = i915_gem_object_unbind(obj);
6284
- if (err)
6285
- return err;
1371
+ ww->contended = NULL;
62861372
6287
- mutex_lock(&obj->mm.lock);
6288
-
6289
- if (obj->mm.madv != I915_MADV_WILLNEED) {
6290
- err = -EFAULT;
6291
- goto err_unlock;
6292
- }
6293
-
6294
- if (obj->mm.quirked) {
6295
- err = -EFAULT;
6296
- goto err_unlock;
6297
- }
6298
-
6299
- if (obj->mm.mapping) {
6300
- err = -EBUSY;
6301
- goto err_unlock;
6302
- }
6303
-
6304
- pages = __i915_gem_object_unset_pages(obj);
6305
-
6306
- obj->ops = &i915_gem_phys_ops;
6307
-
6308
- err = ____i915_gem_object_get_pages(obj);
6309
- if (err)
6310
- goto err_xfer;
6311
-
6312
- /* Perma-pin (until release) the physical set of pages */
6313
- __i915_gem_object_pin_pages(obj);
6314
-
6315
- if (!IS_ERR_OR_NULL(pages))
6316
- i915_gem_object_ops.put_pages(obj, pages);
6317
- mutex_unlock(&obj->mm.lock);
6318
- return 0;
6319
-
6320
-err_xfer:
6321
- obj->ops = &i915_gem_object_ops;
6322
- if (!IS_ERR_OR_NULL(pages)) {
6323
- unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
6324
-
6325
- __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
6326
- }
6327
-err_unlock:
6328
- mutex_unlock(&obj->mm.lock);
6329
- return err;
1373
+ return ret;
63301374 }
63311375
63321376 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6333
-#include "selftests/scatterlist.c"
63341377 #include "selftests/mock_gem_device.c"
6335
-#include "selftests/huge_gem_object.c"
6336
-#include "selftests/huge_pages.c"
6337
-#include "selftests/i915_gem_object.c"
6338
-#include "selftests/i915_gem_coherency.c"
1378
+#include "selftests/i915_gem.c"
63391379 #endif