.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * VFIO: IOMMU DMA mapping support for TCE on POWER |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2013 IBM Corp. All rights reserved. |
---|
5 | 6 | * Author: Alexey Kardashevskiy <aik@ozlabs.ru> |
---|
6 | | - * |
---|
7 | | - * This program is free software; you can redistribute it and/or modify |
---|
8 | | - * it under the terms of the GNU General Public License version 2 as |
---|
9 | | - * published by the Free Software Foundation. |
---|
10 | 7 | * |
---|
11 | 8 | * Derived from original vfio_iommu_type1.c: |
---|
12 | 9 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
---|
.. | .. |
---|
22 | 19 | #include <linux/vmalloc.h> |
---|
23 | 20 | #include <linux/sched/mm.h> |
---|
24 | 21 | #include <linux/sched/signal.h> |
---|
| 22 | +#include <linux/mm.h> |
---|
25 | 23 | |
---|
26 | 24 | #include <asm/iommu.h> |
---|
27 | 25 | #include <asm/tce.h> |
---|
.. | .. |
---|
33 | 31 | |
---|
34 | 32 | static void tce_iommu_detach_group(void *iommu_data, |
---|
35 | 33 | struct iommu_group *iommu_group); |
---|
36 | | - |
---|
37 | | -static long try_increment_locked_vm(struct mm_struct *mm, long npages) |
---|
38 | | -{ |
---|
39 | | - long ret = 0, locked, lock_limit; |
---|
40 | | - |
---|
41 | | - if (WARN_ON_ONCE(!mm)) |
---|
42 | | - return -EPERM; |
---|
43 | | - |
---|
44 | | - if (!npages) |
---|
45 | | - return 0; |
---|
46 | | - |
---|
47 | | - down_write(&mm->mmap_sem); |
---|
48 | | - locked = mm->locked_vm + npages; |
---|
49 | | - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
---|
50 | | - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) |
---|
51 | | - ret = -ENOMEM; |
---|
52 | | - else |
---|
53 | | - mm->locked_vm += npages; |
---|
54 | | - |
---|
55 | | - pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid, |
---|
56 | | - npages << PAGE_SHIFT, |
---|
57 | | - mm->locked_vm << PAGE_SHIFT, |
---|
58 | | - rlimit(RLIMIT_MEMLOCK), |
---|
59 | | - ret ? " - exceeded" : ""); |
---|
60 | | - |
---|
61 | | - up_write(&mm->mmap_sem); |
---|
62 | | - |
---|
63 | | - return ret; |
---|
64 | | -} |
---|
65 | | - |
---|
66 | | -static void decrement_locked_vm(struct mm_struct *mm, long npages) |
---|
67 | | -{ |
---|
68 | | - if (!mm || !npages) |
---|
69 | | - return; |
---|
70 | | - |
---|
71 | | - down_write(&mm->mmap_sem); |
---|
72 | | - if (WARN_ON_ONCE(npages > mm->locked_vm)) |
---|
73 | | - npages = mm->locked_vm; |
---|
74 | | - mm->locked_vm -= npages; |
---|
75 | | - pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid, |
---|
76 | | - npages << PAGE_SHIFT, |
---|
77 | | - mm->locked_vm << PAGE_SHIFT, |
---|
78 | | - rlimit(RLIMIT_MEMLOCK)); |
---|
79 | | - up_write(&mm->mmap_sem); |
---|
80 | | -} |
---|
81 | 34 | |
---|
82 | 35 | /* |
---|
83 | 36 | * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation |
---|
.. | .. |
---|
126 | 79 | } |
---|
127 | 80 | BUG_ON(!current->mm); |
---|
128 | 81 | container->mm = current->mm; |
---|
129 | | - atomic_inc(&container->mm->mm_count); |
---|
| 82 | + mmgrab(container->mm); |
---|
130 | 83 | |
---|
131 | 84 | return 0; |
---|
132 | 85 | } |
---|
.. | .. |
---|
152 | 105 | struct mm_iommu_table_group_mem_t *mem; |
---|
153 | 106 | struct tce_iommu_prereg *tcemem; |
---|
154 | 107 | bool found = false; |
---|
| 108 | + long ret; |
---|
155 | 109 | |
---|
156 | 110 | if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) |
---|
157 | 111 | return -EINVAL; |
---|
158 | 112 | |
---|
159 | | - mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT); |
---|
| 113 | + mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT); |
---|
160 | 114 | if (!mem) |
---|
161 | 115 | return -ENOENT; |
---|
162 | 116 | |
---|
.. | .. |
---|
168 | 122 | } |
---|
169 | 123 | |
---|
170 | 124 | if (!found) |
---|
171 | | - return -ENOENT; |
---|
| 125 | + ret = -ENOENT; |
---|
| 126 | + else |
---|
| 127 | + ret = tce_iommu_prereg_free(container, tcemem); |
---|
172 | 128 | |
---|
173 | | - return tce_iommu_prereg_free(container, tcemem); |
---|
| 129 | + mm_iommu_put(container->mm, mem); |
---|
| 130 | + |
---|
| 131 | + return ret; |
---|
174 | 132 | } |
---|
175 | 133 | |
---|
176 | 134 | static long tce_iommu_register_pages(struct tce_container *container, |
---|
.. | .. |
---|
185 | 143 | ((vaddr + size) < vaddr)) |
---|
186 | 144 | return -EINVAL; |
---|
187 | 145 | |
---|
188 | | - mem = mm_iommu_find(container->mm, vaddr, entries); |
---|
| 146 | + mem = mm_iommu_get(container->mm, vaddr, entries); |
---|
189 | 147 | if (mem) { |
---|
190 | 148 | list_for_each_entry(tcemem, &container->prereg_list, next) { |
---|
191 | | - if (tcemem->mem == mem) |
---|
192 | | - return -EBUSY; |
---|
| 149 | + if (tcemem->mem == mem) { |
---|
| 150 | + ret = -EBUSY; |
---|
| 151 | + goto put_exit; |
---|
| 152 | + } |
---|
193 | 153 | } |
---|
| 154 | + } else { |
---|
| 155 | + ret = mm_iommu_new(container->mm, vaddr, entries, &mem); |
---|
| 156 | + if (ret) |
---|
| 157 | + return ret; |
---|
194 | 158 | } |
---|
195 | | - |
---|
196 | | - ret = mm_iommu_get(container->mm, vaddr, entries, &mem); |
---|
197 | | - if (ret) |
---|
198 | | - return ret; |
---|
199 | 159 | |
---|
200 | 160 | tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL); |
---|
201 | 161 | if (!tcemem) { |
---|
202 | | - mm_iommu_put(container->mm, mem); |
---|
203 | | - return -ENOMEM; |
---|
| 162 | + ret = -ENOMEM; |
---|
| 163 | + goto put_exit; |
---|
204 | 164 | } |
---|
205 | 165 | |
---|
206 | 166 | tcemem->mem = mem; |
---|
.. | .. |
---|
209 | 169 | container->enabled = true; |
---|
210 | 170 | |
---|
211 | 171 | return 0; |
---|
| 172 | + |
---|
| 173 | +put_exit: |
---|
| 174 | + mm_iommu_put(container->mm, mem); |
---|
| 175 | + return ret; |
---|
212 | 176 | } |
---|
213 | 177 | |
---|
214 | | -static bool tce_page_is_contained(struct page *page, unsigned page_shift) |
---|
| 178 | +static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa, |
---|
| 179 | + unsigned int it_page_shift) |
---|
215 | 180 | { |
---|
| 181 | + struct page *page; |
---|
| 182 | + unsigned long size = 0; |
---|
| 183 | + |
---|
| 184 | + if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size)) |
---|
| 185 | + return size == (1UL << it_page_shift); |
---|
| 186 | + |
---|
| 187 | + page = pfn_to_page(hpa >> PAGE_SHIFT); |
---|
216 | 188 | /* |
---|
217 | 189 | * Check that the TCE table granularity is not bigger than the size of |
---|
218 | 190 | * a page we just found. Otherwise the hardware can get access to |
---|
219 | 191 | * a bigger memory chunk that it should. |
---|
220 | 192 | */ |
---|
221 | | - return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; |
---|
| 193 | + return page_shift(compound_head(page)) >= it_page_shift; |
---|
222 | 194 | } |
---|
223 | 195 | |
---|
224 | 196 | static inline bool tce_groups_attached(struct tce_container *container) |
---|
.. | .. |
---|
317 | 289 | return ret; |
---|
318 | 290 | |
---|
319 | 291 | locked = table_group->tce32_size >> PAGE_SHIFT; |
---|
320 | | - ret = try_increment_locked_vm(container->mm, locked); |
---|
| 292 | + ret = account_locked_vm(container->mm, locked, true); |
---|
321 | 293 | if (ret) |
---|
322 | 294 | return ret; |
---|
323 | 295 | |
---|
.. | .. |
---|
336 | 308 | container->enabled = false; |
---|
337 | 309 | |
---|
338 | 310 | BUG_ON(!container->mm); |
---|
339 | | - decrement_locked_vm(container->mm, container->locked_pages); |
---|
| 311 | + account_locked_vm(container->mm, container->locked_pages, false); |
---|
340 | 312 | } |
---|
341 | 313 | |
---|
342 | 314 | static void *tce_iommu_open(unsigned long arg) |
---|
.. | .. |
---|
411 | 383 | struct page *page; |
---|
412 | 384 | |
---|
413 | 385 | page = pfn_to_page(hpa >> PAGE_SHIFT); |
---|
414 | | - put_page(page); |
---|
| 386 | + unpin_user_page(page); |
---|
415 | 387 | } |
---|
416 | 388 | |
---|
417 | 389 | static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, |
---|
.. | .. |
---|
440 | 412 | struct mm_iommu_table_group_mem_t *mem = NULL; |
---|
441 | 413 | int ret; |
---|
442 | 414 | unsigned long hpa = 0; |
---|
443 | | - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); |
---|
| 415 | + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry); |
---|
444 | 416 | |
---|
445 | 417 | if (!pua) |
---|
446 | 418 | return; |
---|
.. | .. |
---|
463 | 435 | unsigned long oldhpa; |
---|
464 | 436 | long ret; |
---|
465 | 437 | enum dma_data_direction direction; |
---|
| 438 | + unsigned long lastentry = entry + pages, firstentry = entry; |
---|
466 | 439 | |
---|
467 | | - for ( ; pages; --pages, ++entry) { |
---|
| 440 | + for ( ; entry < lastentry; ++entry) { |
---|
| 441 | + if (tbl->it_indirect_levels && tbl->it_userspace) { |
---|
| 442 | + /* |
---|
| 443 | + * For multilevel tables, we can take a shortcut here |
---|
| 444 | + * and skip some TCEs as we know that the userspace |
---|
| 445 | + * addresses cache is a mirror of the real TCE table |
---|
| 446 | + * and if it is missing some indirect levels, then |
---|
| 447 | + * the hardware table does not have them allocated |
---|
| 448 | + * either and therefore does not require updating. |
---|
| 449 | + */ |
---|
| 450 | + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, |
---|
| 451 | + entry); |
---|
| 452 | + if (!pua) { |
---|
| 453 | + /* align to level_size which is power of two */ |
---|
| 454 | + entry |= tbl->it_level_size - 1; |
---|
| 455 | + continue; |
---|
| 456 | + } |
---|
| 457 | + } |
---|
| 458 | + |
---|
468 | 459 | cond_resched(); |
---|
469 | 460 | |
---|
470 | 461 | direction = DMA_NONE; |
---|
471 | 462 | oldhpa = 0; |
---|
472 | | - ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction); |
---|
| 463 | + ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa, |
---|
| 464 | + &direction); |
---|
473 | 465 | if (ret) |
---|
474 | 466 | continue; |
---|
475 | 467 | |
---|
.. | .. |
---|
484 | 476 | tce_iommu_unuse_page(container, oldhpa); |
---|
485 | 477 | } |
---|
486 | 478 | |
---|
| 479 | + iommu_tce_kill(tbl, firstentry, pages); |
---|
| 480 | + |
---|
487 | 481 | return 0; |
---|
488 | 482 | } |
---|
489 | 483 | |
---|
.. | .. |
---|
492 | 486 | struct page *page = NULL; |
---|
493 | 487 | enum dma_data_direction direction = iommu_tce_direction(tce); |
---|
494 | 488 | |
---|
495 | | - if (get_user_pages_fast(tce & PAGE_MASK, 1, |
---|
496 | | - direction != DMA_TO_DEVICE, &page) != 1) |
---|
| 489 | + if (pin_user_pages_fast(tce & PAGE_MASK, 1, |
---|
| 490 | + direction != DMA_TO_DEVICE ? FOLL_WRITE : 0, |
---|
| 491 | + &page) != 1) |
---|
497 | 492 | return -EFAULT; |
---|
498 | 493 | |
---|
499 | 494 | *hpa = __pa((unsigned long) page_address(page)); |
---|
.. | .. |
---|
507 | 502 | enum dma_data_direction direction) |
---|
508 | 503 | { |
---|
509 | 504 | long i, ret = 0; |
---|
510 | | - struct page *page; |
---|
511 | 505 | unsigned long hpa; |
---|
512 | 506 | enum dma_data_direction dirtmp; |
---|
513 | 507 | |
---|
.. | .. |
---|
518 | 512 | if (ret) |
---|
519 | 513 | break; |
---|
520 | 514 | |
---|
521 | | - page = pfn_to_page(hpa >> PAGE_SHIFT); |
---|
522 | | - if (!tce_page_is_contained(page, tbl->it_page_shift)) { |
---|
| 515 | + if (!tce_page_is_contained(container->mm, hpa, |
---|
| 516 | + tbl->it_page_shift)) { |
---|
523 | 517 | ret = -EPERM; |
---|
524 | 518 | break; |
---|
525 | 519 | } |
---|
526 | 520 | |
---|
527 | 521 | hpa |= offset; |
---|
528 | 522 | dirtmp = direction; |
---|
529 | | - ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); |
---|
| 523 | + ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i, |
---|
| 524 | + &hpa, &dirtmp); |
---|
530 | 525 | if (ret) { |
---|
531 | 526 | tce_iommu_unuse_page(container, hpa); |
---|
532 | 527 | pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", |
---|
.. | .. |
---|
543 | 538 | |
---|
544 | 539 | if (ret) |
---|
545 | 540 | tce_iommu_clear(container, tbl, entry, i); |
---|
| 541 | + else |
---|
| 542 | + iommu_tce_kill(tbl, entry, pages); |
---|
546 | 543 | |
---|
547 | 544 | return ret; |
---|
548 | 545 | } |
---|
.. | .. |
---|
553 | 550 | enum dma_data_direction direction) |
---|
554 | 551 | { |
---|
555 | 552 | long i, ret = 0; |
---|
556 | | - struct page *page; |
---|
557 | 553 | unsigned long hpa; |
---|
558 | 554 | enum dma_data_direction dirtmp; |
---|
559 | 555 | |
---|
.. | .. |
---|
566 | 562 | if (ret) |
---|
567 | 563 | break; |
---|
568 | 564 | |
---|
569 | | - page = pfn_to_page(hpa >> PAGE_SHIFT); |
---|
570 | | - if (!tce_page_is_contained(page, tbl->it_page_shift)) { |
---|
| 565 | + if (!tce_page_is_contained(container->mm, hpa, |
---|
| 566 | + tbl->it_page_shift)) { |
---|
571 | 567 | ret = -EPERM; |
---|
572 | 568 | break; |
---|
573 | 569 | } |
---|
.. | .. |
---|
580 | 576 | if (mm_iommu_mapped_inc(mem)) |
---|
581 | 577 | break; |
---|
582 | 578 | |
---|
583 | | - ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); |
---|
| 579 | + ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i, |
---|
| 580 | + &hpa, &dirtmp); |
---|
584 | 581 | if (ret) { |
---|
585 | 582 | /* dirtmp cannot be DMA_NONE here */ |
---|
586 | 583 | tce_iommu_unuse_page_v2(container, tbl, entry + i); |
---|
.. | .. |
---|
600 | 597 | |
---|
601 | 598 | if (ret) |
---|
602 | 599 | tce_iommu_clear(container, tbl, entry, i); |
---|
| 600 | + else |
---|
| 601 | + iommu_tce_kill(tbl, entry, pages); |
---|
603 | 602 | |
---|
604 | 603 | return ret; |
---|
605 | 604 | } |
---|
.. | .. |
---|
619 | 618 | if (!table_size) |
---|
620 | 619 | return -EINVAL; |
---|
621 | 620 | |
---|
622 | | - ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT); |
---|
| 621 | + ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true); |
---|
623 | 622 | if (ret) |
---|
624 | 623 | return ret; |
---|
625 | 624 | |
---|
.. | .. |
---|
638 | 637 | unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; |
---|
639 | 638 | |
---|
640 | 639 | iommu_tce_table_put(tbl); |
---|
641 | | - decrement_locked_vm(container->mm, pages); |
---|
| 640 | + account_locked_vm(container->mm, pages, false); |
---|
642 | 641 | } |
---|
643 | 642 | |
---|
644 | 643 | static long tce_iommu_create_window(struct tce_container *container, |
---|
.. | .. |
---|
1196 | 1195 | } |
---|
1197 | 1196 | |
---|
1198 | 1197 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) |
---|
1199 | | - table_group->ops->unset_window(table_group, i); |
---|
| 1198 | + if (container->tables[i]) |
---|
| 1199 | + table_group->ops->unset_window(table_group, i); |
---|
1200 | 1200 | |
---|
1201 | 1201 | table_group->ops->release_ownership(table_group); |
---|
1202 | 1202 | } |
---|
.. | .. |
---|
1240 | 1240 | static int tce_iommu_attach_group(void *iommu_data, |
---|
1241 | 1241 | struct iommu_group *iommu_group) |
---|
1242 | 1242 | { |
---|
1243 | | - int ret; |
---|
| 1243 | + int ret = 0; |
---|
1244 | 1244 | struct tce_container *container = iommu_data; |
---|
1245 | 1245 | struct iommu_table_group *table_group; |
---|
1246 | 1246 | struct tce_iommu_group *tcegrp = NULL; |
---|
.. | .. |
---|
1293 | 1293 | !table_group->ops->release_ownership) { |
---|
1294 | 1294 | if (container->v2) { |
---|
1295 | 1295 | ret = -EPERM; |
---|
1296 | | - goto unlock_exit; |
---|
| 1296 | + goto free_exit; |
---|
1297 | 1297 | } |
---|
1298 | 1298 | ret = tce_iommu_take_ownership(container, table_group); |
---|
1299 | 1299 | } else { |
---|
1300 | 1300 | if (!container->v2) { |
---|
1301 | 1301 | ret = -EPERM; |
---|
1302 | | - goto unlock_exit; |
---|
| 1302 | + goto free_exit; |
---|
1303 | 1303 | } |
---|
1304 | 1304 | ret = tce_iommu_take_ownership_ddw(container, table_group); |
---|
1305 | 1305 | if (!tce_groups_attached(container) && !container->tables[0]) |
---|
.. | .. |
---|
1311 | 1311 | list_add(&tcegrp->next, &container->group_list); |
---|
1312 | 1312 | } |
---|
1313 | 1313 | |
---|
1314 | | -unlock_exit: |
---|
| 1314 | +free_exit: |
---|
1315 | 1315 | if (ret && tcegrp) |
---|
1316 | 1316 | kfree(tcegrp); |
---|
1317 | 1317 | |
---|
| 1318 | +unlock_exit: |
---|
1318 | 1319 | mutex_unlock(&container->lock); |
---|
1319 | 1320 | |
---|
1320 | 1321 | return ret; |
---|
.. | .. |
---|
1358 | 1359 | mutex_unlock(&container->lock); |
---|
1359 | 1360 | } |
---|
1360 | 1361 | |
---|
1361 | | -const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { |
---|
| 1362 | +static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { |
---|
1362 | 1363 | .name = "iommu-vfio-powerpc", |
---|
1363 | 1364 | .owner = THIS_MODULE, |
---|
1364 | 1365 | .open = tce_iommu_open, |
---|