hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/vfio/vfio_iommu_spapr_tce.c
....@@ -1,12 +1,9 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * VFIO: IOMMU DMA mapping support for TCE on POWER
34 *
45 * Copyright (C) 2013 IBM Corp. All rights reserved.
56 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License version 2 as
9
- * published by the Free Software Foundation.
107 *
118 * Derived from original vfio_iommu_type1.c:
129 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
....@@ -22,6 +19,7 @@
2219 #include <linux/vmalloc.h>
2320 #include <linux/sched/mm.h>
2421 #include <linux/sched/signal.h>
22
+#include <linux/mm.h>
2523
2624 #include <asm/iommu.h>
2725 #include <asm/tce.h>
....@@ -33,51 +31,6 @@
3331
3432 static void tce_iommu_detach_group(void *iommu_data,
3533 struct iommu_group *iommu_group);
36
-
37
-static long try_increment_locked_vm(struct mm_struct *mm, long npages)
38
-{
39
- long ret = 0, locked, lock_limit;
40
-
41
- if (WARN_ON_ONCE(!mm))
42
- return -EPERM;
43
-
44
- if (!npages)
45
- return 0;
46
-
47
- down_write(&mm->mmap_sem);
48
- locked = mm->locked_vm + npages;
49
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
50
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
51
- ret = -ENOMEM;
52
- else
53
- mm->locked_vm += npages;
54
-
55
- pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
56
- npages << PAGE_SHIFT,
57
- mm->locked_vm << PAGE_SHIFT,
58
- rlimit(RLIMIT_MEMLOCK),
59
- ret ? " - exceeded" : "");
60
-
61
- up_write(&mm->mmap_sem);
62
-
63
- return ret;
64
-}
65
-
66
-static void decrement_locked_vm(struct mm_struct *mm, long npages)
67
-{
68
- if (!mm || !npages)
69
- return;
70
-
71
- down_write(&mm->mmap_sem);
72
- if (WARN_ON_ONCE(npages > mm->locked_vm))
73
- npages = mm->locked_vm;
74
- mm->locked_vm -= npages;
75
- pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
76
- npages << PAGE_SHIFT,
77
- mm->locked_vm << PAGE_SHIFT,
78
- rlimit(RLIMIT_MEMLOCK));
79
- up_write(&mm->mmap_sem);
80
-}
8134
8235 /*
8336 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
....@@ -126,7 +79,7 @@
12679 }
12780 BUG_ON(!current->mm);
12881 container->mm = current->mm;
129
- atomic_inc(&container->mm->mm_count);
82
+ mmgrab(container->mm);
13083
13184 return 0;
13285 }
....@@ -152,11 +105,12 @@
152105 struct mm_iommu_table_group_mem_t *mem;
153106 struct tce_iommu_prereg *tcemem;
154107 bool found = false;
108
+ long ret;
155109
156110 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
157111 return -EINVAL;
158112
159
- mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
113
+ mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
160114 if (!mem)
161115 return -ENOENT;
162116
....@@ -168,9 +122,13 @@
168122 }
169123
170124 if (!found)
171
- return -ENOENT;
125
+ ret = -ENOENT;
126
+ else
127
+ ret = tce_iommu_prereg_free(container, tcemem);
172128
173
- return tce_iommu_prereg_free(container, tcemem);
129
+ mm_iommu_put(container->mm, mem);
130
+
131
+ return ret;
174132 }
175133
176134 static long tce_iommu_register_pages(struct tce_container *container,
....@@ -185,22 +143,24 @@
185143 ((vaddr + size) < vaddr))
186144 return -EINVAL;
187145
188
- mem = mm_iommu_find(container->mm, vaddr, entries);
146
+ mem = mm_iommu_get(container->mm, vaddr, entries);
189147 if (mem) {
190148 list_for_each_entry(tcemem, &container->prereg_list, next) {
191
- if (tcemem->mem == mem)
192
- return -EBUSY;
149
+ if (tcemem->mem == mem) {
150
+ ret = -EBUSY;
151
+ goto put_exit;
152
+ }
193153 }
154
+ } else {
155
+ ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
156
+ if (ret)
157
+ return ret;
194158 }
195
-
196
- ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
197
- if (ret)
198
- return ret;
199159
200160 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
201161 if (!tcemem) {
202
- mm_iommu_put(container->mm, mem);
203
- return -ENOMEM;
162
+ ret = -ENOMEM;
163
+ goto put_exit;
204164 }
205165
206166 tcemem->mem = mem;
....@@ -209,16 +169,28 @@
209169 container->enabled = true;
210170
211171 return 0;
172
+
173
+put_exit:
174
+ mm_iommu_put(container->mm, mem);
175
+ return ret;
212176 }
213177
214
-static bool tce_page_is_contained(struct page *page, unsigned page_shift)
178
+static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
179
+ unsigned int it_page_shift)
215180 {
181
+ struct page *page;
182
+ unsigned long size = 0;
183
+
184
+ if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
185
+ return size == (1UL << it_page_shift);
186
+
187
+ page = pfn_to_page(hpa >> PAGE_SHIFT);
216188 /*
217189 * Check that the TCE table granularity is not bigger than the size of
218190 * a page we just found. Otherwise the hardware can get access to
219191 * a bigger memory chunk that it should.
220192 */
221
- return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
193
+ return page_shift(compound_head(page)) >= it_page_shift;
222194 }
223195
224196 static inline bool tce_groups_attached(struct tce_container *container)
....@@ -317,7 +289,7 @@
317289 return ret;
318290
319291 locked = table_group->tce32_size >> PAGE_SHIFT;
320
- ret = try_increment_locked_vm(container->mm, locked);
292
+ ret = account_locked_vm(container->mm, locked, true);
321293 if (ret)
322294 return ret;
323295
....@@ -336,7 +308,7 @@
336308 container->enabled = false;
337309
338310 BUG_ON(!container->mm);
339
- decrement_locked_vm(container->mm, container->locked_pages);
311
+ account_locked_vm(container->mm, container->locked_pages, false);
340312 }
341313
342314 static void *tce_iommu_open(unsigned long arg)
....@@ -411,7 +383,7 @@
411383 struct page *page;
412384
413385 page = pfn_to_page(hpa >> PAGE_SHIFT);
414
- put_page(page);
386
+ unpin_user_page(page);
415387 }
416388
417389 static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
....@@ -440,7 +412,7 @@
440412 struct mm_iommu_table_group_mem_t *mem = NULL;
441413 int ret;
442414 unsigned long hpa = 0;
443
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
415
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
444416
445417 if (!pua)
446418 return;
....@@ -463,13 +435,33 @@
463435 unsigned long oldhpa;
464436 long ret;
465437 enum dma_data_direction direction;
438
+ unsigned long lastentry = entry + pages, firstentry = entry;
466439
467
- for ( ; pages; --pages, ++entry) {
440
+ for ( ; entry < lastentry; ++entry) {
441
+ if (tbl->it_indirect_levels && tbl->it_userspace) {
442
+ /*
443
+ * For multilevel tables, we can take a shortcut here
444
+ * and skip some TCEs as we know that the userspace
445
+ * addresses cache is a mirror of the real TCE table
446
+ * and if it is missing some indirect levels, then
447
+ * the hardware table does not have them allocated
448
+ * either and therefore does not require updating.
449
+ */
450
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
451
+ entry);
452
+ if (!pua) {
453
+ /* align to level_size which is power of two */
454
+ entry |= tbl->it_level_size - 1;
455
+ continue;
456
+ }
457
+ }
458
+
468459 cond_resched();
469460
470461 direction = DMA_NONE;
471462 oldhpa = 0;
472
- ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
463
+ ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa,
464
+ &direction);
473465 if (ret)
474466 continue;
475467
....@@ -484,6 +476,8 @@
484476 tce_iommu_unuse_page(container, oldhpa);
485477 }
486478
479
+ iommu_tce_kill(tbl, firstentry, pages);
480
+
487481 return 0;
488482 }
489483
....@@ -492,8 +486,9 @@
492486 struct page *page = NULL;
493487 enum dma_data_direction direction = iommu_tce_direction(tce);
494488
495
- if (get_user_pages_fast(tce & PAGE_MASK, 1,
496
- direction != DMA_TO_DEVICE, &page) != 1)
489
+ if (pin_user_pages_fast(tce & PAGE_MASK, 1,
490
+ direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
491
+ &page) != 1)
497492 return -EFAULT;
498493
499494 *hpa = __pa((unsigned long) page_address(page));
....@@ -507,7 +502,6 @@
507502 enum dma_data_direction direction)
508503 {
509504 long i, ret = 0;
510
- struct page *page;
511505 unsigned long hpa;
512506 enum dma_data_direction dirtmp;
513507
....@@ -518,15 +512,16 @@
518512 if (ret)
519513 break;
520514
521
- page = pfn_to_page(hpa >> PAGE_SHIFT);
522
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
515
+ if (!tce_page_is_contained(container->mm, hpa,
516
+ tbl->it_page_shift)) {
523517 ret = -EPERM;
524518 break;
525519 }
526520
527521 hpa |= offset;
528522 dirtmp = direction;
529
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
523
+ ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
524
+ &hpa, &dirtmp);
530525 if (ret) {
531526 tce_iommu_unuse_page(container, hpa);
532527 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
....@@ -543,6 +538,8 @@
543538
544539 if (ret)
545540 tce_iommu_clear(container, tbl, entry, i);
541
+ else
542
+ iommu_tce_kill(tbl, entry, pages);
546543
547544 return ret;
548545 }
....@@ -553,7 +550,6 @@
553550 enum dma_data_direction direction)
554551 {
555552 long i, ret = 0;
556
- struct page *page;
557553 unsigned long hpa;
558554 enum dma_data_direction dirtmp;
559555
....@@ -566,8 +562,8 @@
566562 if (ret)
567563 break;
568564
569
- page = pfn_to_page(hpa >> PAGE_SHIFT);
570
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
565
+ if (!tce_page_is_contained(container->mm, hpa,
566
+ tbl->it_page_shift)) {
571567 ret = -EPERM;
572568 break;
573569 }
....@@ -580,7 +576,8 @@
580576 if (mm_iommu_mapped_inc(mem))
581577 break;
582578
583
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
579
+ ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
580
+ &hpa, &dirtmp);
584581 if (ret) {
585582 /* dirtmp cannot be DMA_NONE here */
586583 tce_iommu_unuse_page_v2(container, tbl, entry + i);
....@@ -600,6 +597,8 @@
600597
601598 if (ret)
602599 tce_iommu_clear(container, tbl, entry, i);
600
+ else
601
+ iommu_tce_kill(tbl, entry, pages);
603602
604603 return ret;
605604 }
....@@ -619,7 +618,7 @@
619618 if (!table_size)
620619 return -EINVAL;
621620
622
- ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
621
+ ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
623622 if (ret)
624623 return ret;
625624
....@@ -638,7 +637,7 @@
638637 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
639638
640639 iommu_tce_table_put(tbl);
641
- decrement_locked_vm(container->mm, pages);
640
+ account_locked_vm(container->mm, pages, false);
642641 }
643642
644643 static long tce_iommu_create_window(struct tce_container *container,
....@@ -1196,7 +1195,8 @@
11961195 }
11971196
11981197 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1199
- table_group->ops->unset_window(table_group, i);
1198
+ if (container->tables[i])
1199
+ table_group->ops->unset_window(table_group, i);
12001200
12011201 table_group->ops->release_ownership(table_group);
12021202 }
....@@ -1240,7 +1240,7 @@
12401240 static int tce_iommu_attach_group(void *iommu_data,
12411241 struct iommu_group *iommu_group)
12421242 {
1243
- int ret;
1243
+ int ret = 0;
12441244 struct tce_container *container = iommu_data;
12451245 struct iommu_table_group *table_group;
12461246 struct tce_iommu_group *tcegrp = NULL;
....@@ -1293,13 +1293,13 @@
12931293 !table_group->ops->release_ownership) {
12941294 if (container->v2) {
12951295 ret = -EPERM;
1296
- goto unlock_exit;
1296
+ goto free_exit;
12971297 }
12981298 ret = tce_iommu_take_ownership(container, table_group);
12991299 } else {
13001300 if (!container->v2) {
13011301 ret = -EPERM;
1302
- goto unlock_exit;
1302
+ goto free_exit;
13031303 }
13041304 ret = tce_iommu_take_ownership_ddw(container, table_group);
13051305 if (!tce_groups_attached(container) && !container->tables[0])
....@@ -1311,10 +1311,11 @@
13111311 list_add(&tcegrp->next, &container->group_list);
13121312 }
13131313
1314
-unlock_exit:
1314
+free_exit:
13151315 if (ret && tcegrp)
13161316 kfree(tcegrp);
13171317
1318
+unlock_exit:
13181319 mutex_unlock(&container->lock);
13191320
13201321 return ret;
....@@ -1358,7 +1359,7 @@
13581359 mutex_unlock(&container->lock);
13591360 }
13601361
1361
-const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1362
+static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
13621363 .name = "iommu-vfio-powerpc",
13631364 .owner = THIS_MODULE,
13641365 .open = tce_iommu_open,