.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Dynamic DMA mapping support. |
---|
3 | 4 | * |
---|
.. | .. |
---|
21 | 22 | |
---|
22 | 23 | #include <linux/cache.h> |
---|
23 | 24 | #include <linux/dma-direct.h> |
---|
| 25 | +#include <linux/dma-map-ops.h> |
---|
24 | 26 | #include <linux/mm.h> |
---|
25 | 27 | #include <linux/export.h> |
---|
26 | 28 | #include <linux/spinlock.h> |
---|
.. | .. |
---|
34 | 36 | #include <linux/scatterlist.h> |
---|
35 | 37 | #include <linux/mem_encrypt.h> |
---|
36 | 38 | #include <linux/set_memory.h> |
---|
| 39 | +#ifdef CONFIG_DEBUG_FS |
---|
| 40 | +#include <linux/debugfs.h> |
---|
| 41 | +#endif |
---|
37 | 42 | |
---|
38 | 43 | #include <asm/io.h> |
---|
39 | 44 | #include <asm/dma.h> |
---|
40 | 45 | |
---|
41 | 46 | #include <linux/init.h> |
---|
42 | | -#include <linux/bootmem.h> |
---|
| 47 | +#include <linux/memblock.h> |
---|
43 | 48 | #include <linux/iommu-helper.h> |
---|
44 | 49 | |
---|
45 | 50 | #define CREATE_TRACE_POINTS |
---|
46 | 51 | #include <trace/events/swiotlb.h> |
---|
47 | | - |
---|
48 | | -#define OFFSET(val,align) ((unsigned long) \ |
---|
49 | | - ( (val) & ( (align) - 1))) |
---|
50 | 52 | |
---|
51 | 53 | #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) |
---|
52 | 54 | |
---|
.. | .. |
---|
64 | 66 | * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this |
---|
65 | 67 | * API. |
---|
66 | 68 | */ |
---|
67 | | -static phys_addr_t io_tlb_start, io_tlb_end; |
---|
| 69 | +phys_addr_t io_tlb_start, io_tlb_end; |
---|
68 | 70 | |
---|
69 | 71 | /* |
---|
70 | 72 | * The number of IO TLB blocks (in groups of 64) between io_tlb_start and |
---|
.. | .. |
---|
73 | 75 | static unsigned long io_tlb_nslabs; |
---|
74 | 76 | |
---|
75 | 77 | /* |
---|
76 | | - * When the IOMMU overflows we return a fallback buffer. This sets the size. |
---|
| 78 | + * The number of used IO TLB block |
---|
77 | 79 | */ |
---|
78 | | -static unsigned long io_tlb_overflow = 32*1024; |
---|
79 | | - |
---|
80 | | -static phys_addr_t io_tlb_overflow_buffer; |
---|
| 80 | +static unsigned long io_tlb_used; |
---|
81 | 81 | |
---|
82 | 82 | /* |
---|
83 | 83 | * This is a free list describing the number of free entries available from |
---|
.. | .. |
---|
90 | 90 | * Max segment that we can provide which (if pages are contingous) will |
---|
91 | 91 | * not be bounced (unless SWIOTLB_FORCE is set). |
---|
92 | 92 | */ |
---|
93 | | -unsigned int max_segment; |
---|
| 93 | +static unsigned int max_segment; |
---|
94 | 94 | |
---|
95 | 95 | /* |
---|
96 | 96 | * We need to save away the original address corresponding to a mapped entry |
---|
.. | .. |
---|
126 | 126 | return 0; |
---|
127 | 127 | } |
---|
128 | 128 | early_param("swiotlb", setup_io_tlb_npages); |
---|
129 | | -/* make io_tlb_overflow tunable too? */ |
---|
| 129 | + |
---|
| 130 | +static bool no_iotlb_memory; |
---|
130 | 131 | |
---|
131 | 132 | unsigned long swiotlb_nr_tbl(void) |
---|
132 | 133 | { |
---|
133 | | - return io_tlb_nslabs; |
---|
| 134 | + return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs; |
---|
134 | 135 | } |
---|
135 | 136 | EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); |
---|
136 | 137 | |
---|
137 | 138 | unsigned int swiotlb_max_segment(void) |
---|
138 | 139 | { |
---|
139 | | - return max_segment; |
---|
| 140 | + return unlikely(no_iotlb_memory) ? 0 : max_segment; |
---|
140 | 141 | } |
---|
141 | 142 | EXPORT_SYMBOL_GPL(swiotlb_max_segment); |
---|
142 | 143 | |
---|
.. | .. |
---|
159 | 160 | return size ? size : (IO_TLB_DEFAULT_SIZE); |
---|
160 | 161 | } |
---|
161 | 162 | |
---|
162 | | -static bool no_iotlb_memory; |
---|
163 | | - |
---|
164 | 163 | void swiotlb_print_info(void) |
---|
165 | 164 | { |
---|
166 | 165 | unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
---|
.. | .. |
---|
170 | 169 | return; |
---|
171 | 170 | } |
---|
172 | 171 | |
---|
173 | | - pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n", |
---|
174 | | - (unsigned long long)io_tlb_start, |
---|
175 | | - (unsigned long long)io_tlb_end, |
---|
| 172 | + pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end, |
---|
176 | 173 | bytes >> 20); |
---|
| 174 | +} |
---|
| 175 | + |
---|
| 176 | +static inline unsigned long io_tlb_offset(unsigned long val) |
---|
| 177 | +{ |
---|
| 178 | + return val & (IO_TLB_SEGSIZE - 1); |
---|
| 179 | +} |
---|
| 180 | + |
---|
| 181 | +static inline unsigned long nr_slots(u64 val) |
---|
| 182 | +{ |
---|
| 183 | + return DIV_ROUND_UP(val, IO_TLB_SIZE); |
---|
177 | 184 | } |
---|
178 | 185 | |
---|
179 | 186 | /* |
---|
.. | .. |
---|
194 | 201 | bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); |
---|
195 | 202 | set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); |
---|
196 | 203 | memset(vaddr, 0, bytes); |
---|
197 | | - |
---|
198 | | - vaddr = phys_to_virt(io_tlb_overflow_buffer); |
---|
199 | | - bytes = PAGE_ALIGN(io_tlb_overflow); |
---|
200 | | - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); |
---|
201 | | - memset(vaddr, 0, bytes); |
---|
202 | 204 | } |
---|
203 | 205 | |
---|
204 | 206 | int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) |
---|
205 | 207 | { |
---|
206 | | - void *v_overflow_buffer; |
---|
207 | 208 | unsigned long i, bytes; |
---|
| 209 | + size_t alloc_size; |
---|
208 | 210 | |
---|
209 | 211 | bytes = nslabs << IO_TLB_SHIFT; |
---|
210 | 212 | |
---|
.. | .. |
---|
213 | 215 | io_tlb_end = io_tlb_start + bytes; |
---|
214 | 216 | |
---|
215 | 217 | /* |
---|
216 | | - * Get the overflow emergency buffer |
---|
217 | | - */ |
---|
218 | | - v_overflow_buffer = memblock_virt_alloc_low_nopanic( |
---|
219 | | - PAGE_ALIGN(io_tlb_overflow), |
---|
220 | | - PAGE_SIZE); |
---|
221 | | - if (!v_overflow_buffer) |
---|
222 | | - return -ENOMEM; |
---|
223 | | - |
---|
224 | | - io_tlb_overflow_buffer = __pa(v_overflow_buffer); |
---|
225 | | - |
---|
226 | | - /* |
---|
227 | 218 | * Allocate and initialize the free list array. This array is used |
---|
228 | 219 | * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE |
---|
229 | 220 | * between io_tlb_start and io_tlb_end. |
---|
230 | 221 | */ |
---|
231 | | - io_tlb_list = memblock_virt_alloc( |
---|
232 | | - PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), |
---|
233 | | - PAGE_SIZE); |
---|
234 | | - io_tlb_orig_addr = memblock_virt_alloc( |
---|
235 | | - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), |
---|
236 | | - PAGE_SIZE); |
---|
| 222 | + alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int)); |
---|
| 223 | + io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE); |
---|
| 224 | + if (!io_tlb_list) |
---|
| 225 | + panic("%s: Failed to allocate %zu bytes align=0x%lx\n", |
---|
| 226 | + __func__, alloc_size, PAGE_SIZE); |
---|
| 227 | + |
---|
| 228 | + alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)); |
---|
| 229 | + io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE); |
---|
| 230 | + if (!io_tlb_orig_addr) |
---|
| 231 | + panic("%s: Failed to allocate %zu bytes align=0x%lx\n", |
---|
| 232 | + __func__, alloc_size, PAGE_SIZE); |
---|
| 233 | + |
---|
237 | 234 | for (i = 0; i < io_tlb_nslabs; i++) { |
---|
238 | | - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); |
---|
| 235 | + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); |
---|
239 | 236 | io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
---|
240 | 237 | } |
---|
241 | 238 | io_tlb_index = 0; |
---|
.. | .. |
---|
267 | 264 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
---|
268 | 265 | |
---|
269 | 266 | /* Get IO TLB memory from the low pages */ |
---|
270 | | - vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); |
---|
| 267 | + vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); |
---|
271 | 268 | if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) |
---|
272 | 269 | return; |
---|
273 | 270 | |
---|
.. | .. |
---|
329 | 326 | return rc; |
---|
330 | 327 | } |
---|
331 | 328 | |
---|
| 329 | +static void swiotlb_cleanup(void) |
---|
| 330 | +{ |
---|
| 331 | + io_tlb_end = 0; |
---|
| 332 | + io_tlb_start = 0; |
---|
| 333 | + io_tlb_nslabs = 0; |
---|
| 334 | + max_segment = 0; |
---|
| 335 | +} |
---|
| 336 | + |
---|
332 | 337 | int |
---|
333 | 338 | swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) |
---|
334 | 339 | { |
---|
335 | 340 | unsigned long i, bytes; |
---|
336 | | - unsigned char *v_overflow_buffer; |
---|
337 | 341 | |
---|
338 | 342 | bytes = nslabs << IO_TLB_SHIFT; |
---|
339 | 343 | |
---|
.. | .. |
---|
343 | 347 | |
---|
344 | 348 | set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); |
---|
345 | 349 | memset(tlb, 0, bytes); |
---|
346 | | - |
---|
347 | | - /* |
---|
348 | | - * Get the overflow emergency buffer |
---|
349 | | - */ |
---|
350 | | - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, |
---|
351 | | - get_order(io_tlb_overflow)); |
---|
352 | | - if (!v_overflow_buffer) |
---|
353 | | - goto cleanup2; |
---|
354 | | - |
---|
355 | | - set_memory_decrypted((unsigned long)v_overflow_buffer, |
---|
356 | | - io_tlb_overflow >> PAGE_SHIFT); |
---|
357 | | - memset(v_overflow_buffer, 0, io_tlb_overflow); |
---|
358 | | - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); |
---|
359 | 350 | |
---|
360 | 351 | /* |
---|
361 | 352 | * Allocate and initialize the free list array. This array is used |
---|
.. | .. |
---|
375 | 366 | goto cleanup4; |
---|
376 | 367 | |
---|
377 | 368 | for (i = 0; i < io_tlb_nslabs; i++) { |
---|
378 | | - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); |
---|
| 369 | + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); |
---|
379 | 370 | io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
---|
380 | 371 | } |
---|
381 | 372 | io_tlb_index = 0; |
---|
.. | .. |
---|
394 | 385 | sizeof(int))); |
---|
395 | 386 | io_tlb_list = NULL; |
---|
396 | 387 | cleanup3: |
---|
397 | | - free_pages((unsigned long)v_overflow_buffer, |
---|
398 | | - get_order(io_tlb_overflow)); |
---|
399 | | - io_tlb_overflow_buffer = 0; |
---|
400 | | -cleanup2: |
---|
401 | | - io_tlb_end = 0; |
---|
402 | | - io_tlb_start = 0; |
---|
403 | | - io_tlb_nslabs = 0; |
---|
404 | | - max_segment = 0; |
---|
| 388 | + swiotlb_cleanup(); |
---|
405 | 389 | return -ENOMEM; |
---|
406 | 390 | } |
---|
407 | 391 | |
---|
.. | .. |
---|
411 | 395 | return; |
---|
412 | 396 | |
---|
413 | 397 | if (late_alloc) { |
---|
414 | | - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), |
---|
415 | | - get_order(io_tlb_overflow)); |
---|
416 | 398 | free_pages((unsigned long)io_tlb_orig_addr, |
---|
417 | 399 | get_order(io_tlb_nslabs * sizeof(phys_addr_t))); |
---|
418 | 400 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * |
---|
.. | .. |
---|
420 | 402 | free_pages((unsigned long)phys_to_virt(io_tlb_start), |
---|
421 | 403 | get_order(io_tlb_nslabs << IO_TLB_SHIFT)); |
---|
422 | 404 | } else { |
---|
423 | | - memblock_free_late(io_tlb_overflow_buffer, |
---|
424 | | - PAGE_ALIGN(io_tlb_overflow)); |
---|
425 | 405 | memblock_free_late(__pa(io_tlb_orig_addr), |
---|
426 | 406 | PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); |
---|
427 | 407 | memblock_free_late(__pa(io_tlb_list), |
---|
.. | .. |
---|
429 | 409 | memblock_free_late(io_tlb_start, |
---|
430 | 410 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); |
---|
431 | 411 | } |
---|
432 | | - io_tlb_nslabs = 0; |
---|
433 | | - max_segment = 0; |
---|
434 | | -} |
---|
435 | | - |
---|
436 | | -int is_swiotlb_buffer(phys_addr_t paddr) |
---|
437 | | -{ |
---|
438 | | - return paddr >= io_tlb_start && paddr < io_tlb_end; |
---|
| 412 | + swiotlb_cleanup(); |
---|
439 | 413 | } |
---|
440 | 414 | |
---|
441 | 415 | /* |
---|
442 | | - * Bounce: copy the swiotlb buffer back to the original dma location |
---|
| 416 | + * Bounce: copy the swiotlb buffer from or back to the original dma location |
---|
443 | 417 | */ |
---|
444 | 418 | static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, |
---|
445 | 419 | size_t size, enum dma_data_direction dir) |
---|
.. | .. |
---|
478 | 452 | } |
---|
479 | 453 | } |
---|
480 | 454 | |
---|
481 | | -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, |
---|
482 | | - dma_addr_t tbl_dma_addr, |
---|
483 | | - phys_addr_t orig_addr, size_t size, |
---|
484 | | - enum dma_data_direction dir, |
---|
485 | | - unsigned long attrs) |
---|
| 455 | +static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) |
---|
486 | 456 | { |
---|
| 457 | + return start + (idx << IO_TLB_SHIFT); |
---|
| 458 | +} |
---|
| 459 | + |
---|
| 460 | +/* |
---|
| 461 | + * Return the offset into a iotlb slot required to keep the device happy. |
---|
| 462 | + */ |
---|
| 463 | +static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) |
---|
| 464 | +{ |
---|
| 465 | + return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); |
---|
| 466 | +} |
---|
| 467 | + |
---|
| 468 | +/* |
---|
| 469 | + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. |
---|
| 470 | + */ |
---|
| 471 | +static inline unsigned long get_max_slots(unsigned long boundary_mask) |
---|
| 472 | +{ |
---|
| 473 | + if (boundary_mask == ~0UL) |
---|
| 474 | + return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); |
---|
| 475 | + return nr_slots(boundary_mask + 1); |
---|
| 476 | +} |
---|
| 477 | + |
---|
| 478 | +static unsigned int wrap_index(unsigned int index) |
---|
| 479 | +{ |
---|
| 480 | + if (index >= io_tlb_nslabs) |
---|
| 481 | + return 0; |
---|
| 482 | + return index; |
---|
| 483 | +} |
---|
| 484 | + |
---|
| 485 | +/* |
---|
| 486 | + * Find a suitable number of IO TLB entries size that will fit this request and |
---|
| 487 | + * allocate a buffer from that IO TLB pool. |
---|
| 488 | + */ |
---|
| 489 | +static int find_slots(struct device *dev, phys_addr_t orig_addr, |
---|
| 490 | + size_t alloc_size) |
---|
| 491 | +{ |
---|
| 492 | + unsigned long boundary_mask = dma_get_seg_boundary(dev); |
---|
| 493 | + dma_addr_t tbl_dma_addr = |
---|
| 494 | + phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask; |
---|
| 495 | + unsigned long max_slots = get_max_slots(boundary_mask); |
---|
| 496 | + unsigned int iotlb_align_mask = |
---|
| 497 | + dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); |
---|
| 498 | + unsigned int nslots = nr_slots(alloc_size), stride; |
---|
| 499 | + unsigned int index, wrap, count = 0, i; |
---|
487 | 500 | unsigned long flags; |
---|
488 | | - phys_addr_t tlb_addr; |
---|
489 | | - unsigned int nslots, stride, index, wrap; |
---|
490 | | - int i; |
---|
491 | | - unsigned long mask; |
---|
492 | | - unsigned long offset_slots; |
---|
493 | | - unsigned long max_slots; |
---|
494 | | - |
---|
495 | | - if (no_iotlb_memory) |
---|
496 | | - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); |
---|
497 | | - |
---|
498 | | - if (mem_encrypt_active()) |
---|
499 | | - pr_warn_once("%s is active and system is using DMA bounce buffers\n", |
---|
500 | | - sme_active() ? "SME" : "SEV"); |
---|
501 | | - |
---|
502 | | - mask = dma_get_seg_boundary(hwdev); |
---|
503 | | - |
---|
504 | | - tbl_dma_addr &= mask; |
---|
505 | | - |
---|
506 | | - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
---|
507 | | - |
---|
508 | | - /* |
---|
509 | | - * Carefully handle integer overflow which can occur when mask == ~0UL. |
---|
510 | | - */ |
---|
511 | | - max_slots = mask + 1 |
---|
512 | | - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT |
---|
513 | | - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); |
---|
514 | | - |
---|
515 | | - /* |
---|
516 | | - * For mappings greater than or equal to a page, we limit the stride |
---|
517 | | - * (and hence alignment) to a page size. |
---|
518 | | - */ |
---|
519 | | - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
---|
520 | | - if (size >= PAGE_SIZE) |
---|
521 | | - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); |
---|
522 | | - else |
---|
523 | | - stride = 1; |
---|
524 | 501 | |
---|
525 | 502 | BUG_ON(!nslots); |
---|
526 | 503 | |
---|
527 | 504 | /* |
---|
528 | | - * Find suitable number of IO TLB entries size that will fit this |
---|
529 | | - * request and allocate a buffer from that IO TLB pool. |
---|
| 505 | + * For mappings with an alignment requirement don't bother looping to |
---|
| 506 | + * unaligned slots once we found an aligned one. For allocations of |
---|
| 507 | + * PAGE_SIZE or larger only look for page aligned allocations. |
---|
530 | 508 | */ |
---|
531 | | - spin_lock_irqsave(&io_tlb_lock, flags); |
---|
532 | | - index = ALIGN(io_tlb_index, stride); |
---|
533 | | - if (index >= io_tlb_nslabs) |
---|
534 | | - index = 0; |
---|
535 | | - wrap = index; |
---|
| 509 | + stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; |
---|
| 510 | + if (alloc_size >= PAGE_SIZE) |
---|
| 511 | + stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); |
---|
536 | 512 | |
---|
| 513 | + spin_lock_irqsave(&io_tlb_lock, flags); |
---|
| 514 | + if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) |
---|
| 515 | + goto not_found; |
---|
| 516 | + |
---|
| 517 | + index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); |
---|
537 | 518 | do { |
---|
538 | | - while (iommu_is_span_boundary(index, nslots, offset_slots, |
---|
539 | | - max_slots)) { |
---|
540 | | - index += stride; |
---|
541 | | - if (index >= io_tlb_nslabs) |
---|
542 | | - index = 0; |
---|
543 | | - if (index == wrap) |
---|
544 | | - goto not_found; |
---|
| 519 | + if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != |
---|
| 520 | + (orig_addr & iotlb_align_mask)) { |
---|
| 521 | + index = wrap_index(index + 1); |
---|
| 522 | + continue; |
---|
545 | 523 | } |
---|
546 | 524 | |
---|
547 | 525 | /* |
---|
.. | .. |
---|
549 | 527 | * contiguous buffers, we allocate the buffers from that slot |
---|
550 | 528 | * and mark the entries as '0' indicating unavailable. |
---|
551 | 529 | */ |
---|
552 | | - if (io_tlb_list[index] >= nslots) { |
---|
553 | | - int count = 0; |
---|
554 | | - |
---|
555 | | - for (i = index; i < (int) (index + nslots); i++) |
---|
556 | | - io_tlb_list[i] = 0; |
---|
557 | | - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) |
---|
558 | | - io_tlb_list[i] = ++count; |
---|
559 | | - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); |
---|
560 | | - |
---|
561 | | - /* |
---|
562 | | - * Update the indices to avoid searching in the next |
---|
563 | | - * round. |
---|
564 | | - */ |
---|
565 | | - io_tlb_index = ((index + nslots) < io_tlb_nslabs |
---|
566 | | - ? (index + nslots) : 0); |
---|
567 | | - |
---|
568 | | - goto found; |
---|
| 530 | + if (!iommu_is_span_boundary(index, nslots, |
---|
| 531 | + nr_slots(tbl_dma_addr), |
---|
| 532 | + max_slots)) { |
---|
| 533 | + if (io_tlb_list[index] >= nslots) |
---|
| 534 | + goto found; |
---|
569 | 535 | } |
---|
570 | | - index += stride; |
---|
571 | | - if (index >= io_tlb_nslabs) |
---|
572 | | - index = 0; |
---|
| 536 | + index = wrap_index(index + stride); |
---|
573 | 537 | } while (index != wrap); |
---|
574 | 538 | |
---|
575 | 539 | not_found: |
---|
576 | 540 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
---|
577 | | - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) |
---|
578 | | - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); |
---|
579 | | - return SWIOTLB_MAP_ERROR; |
---|
| 541 | + return -1; |
---|
| 542 | + |
---|
580 | 543 | found: |
---|
| 544 | + for (i = index; i < index + nslots; i++) |
---|
| 545 | + io_tlb_list[i] = 0; |
---|
| 546 | + for (i = index - 1; |
---|
| 547 | + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && |
---|
| 548 | + io_tlb_list[i]; i--) |
---|
| 549 | + io_tlb_list[i] = ++count; |
---|
| 550 | + |
---|
| 551 | + /* |
---|
| 552 | + * Update the indices to avoid searching in the next round. |
---|
| 553 | + */ |
---|
| 554 | + if (index + nslots < io_tlb_nslabs) |
---|
| 555 | + io_tlb_index = index + nslots; |
---|
| 556 | + else |
---|
| 557 | + io_tlb_index = 0; |
---|
| 558 | + io_tlb_used += nslots; |
---|
| 559 | + |
---|
581 | 560 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
---|
| 561 | + return index; |
---|
| 562 | +} |
---|
| 563 | + |
---|
| 564 | +phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, |
---|
| 565 | + size_t mapping_size, size_t alloc_size, |
---|
| 566 | + enum dma_data_direction dir, unsigned long attrs) |
---|
| 567 | +{ |
---|
| 568 | + unsigned int offset = swiotlb_align_offset(dev, orig_addr); |
---|
| 569 | + unsigned int i; |
---|
| 570 | + int index; |
---|
| 571 | + phys_addr_t tlb_addr; |
---|
| 572 | + |
---|
| 573 | + if (no_iotlb_memory) |
---|
| 574 | + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); |
---|
| 575 | + |
---|
| 576 | + if (mem_encrypt_active()) |
---|
| 577 | + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); |
---|
| 578 | + |
---|
| 579 | + if (mapping_size > alloc_size) { |
---|
| 580 | + dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", |
---|
| 581 | + mapping_size, alloc_size); |
---|
| 582 | + return (phys_addr_t)DMA_MAPPING_ERROR; |
---|
| 583 | + } |
---|
| 584 | + |
---|
| 585 | + index = find_slots(dev, orig_addr, alloc_size + offset); |
---|
| 586 | + if (index == -1) { |
---|
| 587 | + if (!(attrs & DMA_ATTR_NO_WARN)) |
---|
| 588 | + dev_warn_ratelimited(dev, |
---|
| 589 | + "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", |
---|
| 590 | + alloc_size, io_tlb_nslabs, io_tlb_used); |
---|
| 591 | + return (phys_addr_t)DMA_MAPPING_ERROR; |
---|
| 592 | + } |
---|
582 | 593 | |
---|
583 | 594 | /* |
---|
584 | 595 | * Save away the mapping from the original address to the DMA address. |
---|
585 | 596 | * This is needed when we sync the memory. Then we sync the buffer if |
---|
586 | 597 | * needed. |
---|
587 | 598 | */ |
---|
588 | | - for (i = 0; i < nslots; i++) |
---|
589 | | - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); |
---|
590 | | - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && |
---|
591 | | - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) |
---|
592 | | - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); |
---|
| 599 | + for (i = 0; i < nr_slots(alloc_size + offset); i++) |
---|
| 600 | + io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); |
---|
593 | 601 | |
---|
| 602 | + tlb_addr = slot_addr(io_tlb_start, index) + offset; |
---|
| 603 | + /* |
---|
| 604 | + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig |
---|
| 605 | + * to the tlb buffer, if we knew for sure the device will |
---|
| 606 | + * overwirte the entire current content. But we don't. Thus |
---|
| 607 | + * unconditional bounce may prevent leaking swiotlb content (i.e. |
---|
| 608 | + * kernel memory) to user-space. |
---|
| 609 | + */ |
---|
| 610 | + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); |
---|
594 | 611 | return tlb_addr; |
---|
595 | | -} |
---|
596 | | - |
---|
597 | | -/* |
---|
598 | | - * Allocates bounce buffer and returns its physical address. |
---|
599 | | - */ |
---|
600 | | -static phys_addr_t |
---|
601 | | -map_single(struct device *hwdev, phys_addr_t phys, size_t size, |
---|
602 | | - enum dma_data_direction dir, unsigned long attrs) |
---|
603 | | -{ |
---|
604 | | - dma_addr_t start_dma_addr; |
---|
605 | | - |
---|
606 | | - if (swiotlb_force == SWIOTLB_NO_FORCE) { |
---|
607 | | - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", |
---|
608 | | - &phys); |
---|
609 | | - return SWIOTLB_MAP_ERROR; |
---|
610 | | - } |
---|
611 | | - |
---|
612 | | - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); |
---|
613 | | - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, |
---|
614 | | - dir, attrs); |
---|
615 | 612 | } |
---|
616 | 613 | |
---|
617 | 614 | /* |
---|
618 | 615 | * tlb_addr is the physical address of the bounce buffer to unmap. |
---|
619 | 616 | */ |
---|
620 | 617 | void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, |
---|
621 | | - size_t size, enum dma_data_direction dir, |
---|
622 | | - unsigned long attrs) |
---|
| 618 | + size_t mapping_size, size_t alloc_size, |
---|
| 619 | + enum dma_data_direction dir, unsigned long attrs) |
---|
623 | 620 | { |
---|
624 | 621 | unsigned long flags; |
---|
625 | | - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
---|
626 | | - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; |
---|
| 622 | + unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); |
---|
| 623 | + int i, count, nslots = nr_slots(alloc_size + offset); |
---|
| 624 | + int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; |
---|
627 | 625 | phys_addr_t orig_addr = io_tlb_orig_addr[index]; |
---|
628 | 626 | |
---|
629 | 627 | /* |
---|
.. | .. |
---|
632 | 630 | if (orig_addr != INVALID_PHYS_ADDR && |
---|
633 | 631 | !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && |
---|
634 | 632 | ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) |
---|
635 | | - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); |
---|
| 633 | + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE); |
---|
636 | 634 | |
---|
637 | 635 | /* |
---|
638 | 636 | * Return the buffer to the free list by setting the corresponding |
---|
.. | .. |
---|
641 | 639 | * with slots below and above the pool being returned. |
---|
642 | 640 | */ |
---|
643 | 641 | spin_lock_irqsave(&io_tlb_lock, flags); |
---|
644 | | - { |
---|
645 | | - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? |
---|
646 | | - io_tlb_list[index + nslots] : 0); |
---|
647 | | - /* |
---|
648 | | - * Step 1: return the slots to the free list, merging the |
---|
649 | | - * slots with superceeding slots |
---|
650 | | - */ |
---|
651 | | - for (i = index + nslots - 1; i >= index; i--) { |
---|
652 | | - io_tlb_list[i] = ++count; |
---|
653 | | - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
---|
654 | | - } |
---|
655 | | - /* |
---|
656 | | - * Step 2: merge the returned slots with the preceding slots, |
---|
657 | | - * if available (non zero) |
---|
658 | | - */ |
---|
659 | | - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) |
---|
660 | | - io_tlb_list[i] = ++count; |
---|
| 642 | + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) |
---|
| 643 | + count = io_tlb_list[index + nslots]; |
---|
| 644 | + else |
---|
| 645 | + count = 0; |
---|
| 646 | + |
---|
| 647 | + /* |
---|
| 648 | + * Step 1: return the slots to the free list, merging the slots with |
---|
| 649 | + * superceeding slots |
---|
| 650 | + */ |
---|
| 651 | + for (i = index + nslots - 1; i >= index; i--) { |
---|
| 652 | + io_tlb_list[i] = ++count; |
---|
| 653 | + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
---|
661 | 654 | } |
---|
| 655 | + |
---|
| 656 | + /* |
---|
| 657 | + * Step 2: merge the returned slots with the preceding slots, if |
---|
| 658 | + * available (non zero) |
---|
| 659 | + */ |
---|
| 660 | + for (i = index - 1; |
---|
| 661 | + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; |
---|
| 662 | + i--) |
---|
| 663 | + io_tlb_list[i] = ++count; |
---|
| 664 | + io_tlb_used -= nslots; |
---|
662 | 665 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
---|
663 | 666 | } |
---|
664 | 667 | |
---|
.. | .. |
---|
671 | 674 | |
---|
672 | 675 | if (orig_addr == INVALID_PHYS_ADDR) |
---|
673 | 676 | return; |
---|
674 | | - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); |
---|
| 677 | + |
---|
| 678 | + orig_addr += (tlb_addr & (IO_TLB_SIZE - 1)) - |
---|
| 679 | + swiotlb_align_offset(hwdev, orig_addr); |
---|
675 | 680 | |
---|
676 | 681 | switch (target) { |
---|
677 | 682 | case SYNC_FOR_CPU: |
---|
.. | .. |
---|
693 | 698 | } |
---|
694 | 699 | } |
---|
695 | 700 | |
---|
696 | | -static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, |
---|
697 | | - size_t size) |
---|
698 | | -{ |
---|
699 | | - u64 mask = DMA_BIT_MASK(32); |
---|
700 | | - |
---|
701 | | - if (dev && dev->coherent_dma_mask) |
---|
702 | | - mask = dev->coherent_dma_mask; |
---|
703 | | - return addr + size - 1 <= mask; |
---|
704 | | -} |
---|
705 | | - |
---|
706 | | -static void * |
---|
707 | | -swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, |
---|
708 | | - unsigned long attrs) |
---|
709 | | -{ |
---|
710 | | - phys_addr_t phys_addr; |
---|
711 | | - |
---|
712 | | - if (swiotlb_force == SWIOTLB_NO_FORCE) |
---|
713 | | - goto out_warn; |
---|
714 | | - |
---|
715 | | - phys_addr = swiotlb_tbl_map_single(dev, |
---|
716 | | - __phys_to_dma(dev, io_tlb_start), |
---|
717 | | - 0, size, DMA_FROM_DEVICE, attrs); |
---|
718 | | - if (phys_addr == SWIOTLB_MAP_ERROR) |
---|
719 | | - goto out_warn; |
---|
720 | | - |
---|
721 | | - *dma_handle = __phys_to_dma(dev, phys_addr); |
---|
722 | | - if (!dma_coherent_ok(dev, *dma_handle, size)) |
---|
723 | | - goto out_unmap; |
---|
724 | | - |
---|
725 | | - memset(phys_to_virt(phys_addr), 0, size); |
---|
726 | | - return phys_to_virt(phys_addr); |
---|
727 | | - |
---|
728 | | -out_unmap: |
---|
729 | | - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", |
---|
730 | | - (unsigned long long)dev->coherent_dma_mask, |
---|
731 | | - (unsigned long long)*dma_handle); |
---|
732 | | - |
---|
733 | | - /* |
---|
734 | | - * DMA_TO_DEVICE to avoid memcpy in unmap_single. |
---|
735 | | - * DMA_ATTR_SKIP_CPU_SYNC is optional. |
---|
736 | | - */ |
---|
737 | | - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, |
---|
738 | | - DMA_ATTR_SKIP_CPU_SYNC); |
---|
739 | | -out_warn: |
---|
740 | | - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { |
---|
741 | | - dev_warn(dev, |
---|
742 | | - "swiotlb: coherent allocation failed, size=%zu\n", |
---|
743 | | - size); |
---|
744 | | - dump_stack(); |
---|
745 | | - } |
---|
746 | | - return NULL; |
---|
747 | | -} |
---|
748 | | - |
---|
749 | | -static bool swiotlb_free_buffer(struct device *dev, size_t size, |
---|
750 | | - dma_addr_t dma_addr) |
---|
751 | | -{ |
---|
752 | | - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); |
---|
753 | | - |
---|
754 | | - WARN_ON_ONCE(irqs_disabled()); |
---|
755 | | - |
---|
756 | | - if (!is_swiotlb_buffer(phys_addr)) |
---|
757 | | - return false; |
---|
758 | | - |
---|
759 | | - /* |
---|
760 | | - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. |
---|
761 | | - * DMA_ATTR_SKIP_CPU_SYNC is optional. |
---|
762 | | - */ |
---|
763 | | - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, |
---|
764 | | - DMA_ATTR_SKIP_CPU_SYNC); |
---|
765 | | - return true; |
---|
766 | | -} |
---|
767 | | - |
---|
768 | 701 | /* |
---|
769 | | - * Map a single buffer of the indicated size for DMA in streaming mode. The |
---|
770 | | - * physical address to use is returned. |
---|
771 | | - * |
---|
772 | | - * Once the device is given the dma address, the device owns this memory until |
---|
773 | | - * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. |
---|
| 702 | + * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing |
---|
| 703 | + * to the device copy the data into it as well. |
---|
774 | 704 | */ |
---|
775 | | -dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, |
---|
776 | | - unsigned long offset, size_t size, |
---|
777 | | - enum dma_data_direction dir, |
---|
778 | | - unsigned long attrs) |
---|
| 705 | +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, |
---|
| 706 | + enum dma_data_direction dir, unsigned long attrs) |
---|
779 | 707 | { |
---|
780 | | - phys_addr_t map, phys = page_to_phys(page) + offset; |
---|
781 | | - dma_addr_t dev_addr = phys_to_dma(dev, phys); |
---|
| 708 | + phys_addr_t swiotlb_addr; |
---|
| 709 | + dma_addr_t dma_addr; |
---|
782 | 710 | |
---|
783 | | - BUG_ON(dir == DMA_NONE); |
---|
784 | | - /* |
---|
785 | | - * If the address happens to be in the device's DMA window, |
---|
786 | | - * we can safely return the device addr and not worry about bounce |
---|
787 | | - * buffering it. |
---|
788 | | - */ |
---|
789 | | - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) |
---|
790 | | - return dev_addr; |
---|
| 711 | + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, |
---|
| 712 | + swiotlb_force); |
---|
791 | 713 | |
---|
792 | | - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); |
---|
793 | | - |
---|
794 | | - /* Oh well, have to allocate and map a bounce buffer. */ |
---|
795 | | - map = map_single(dev, phys, size, dir, attrs); |
---|
796 | | - if (map == SWIOTLB_MAP_ERROR) |
---|
797 | | - return __phys_to_dma(dev, io_tlb_overflow_buffer); |
---|
798 | | - |
---|
799 | | - dev_addr = __phys_to_dma(dev, map); |
---|
| 714 | + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, |
---|
| 715 | + attrs); |
---|
| 716 | + if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) |
---|
| 717 | + return DMA_MAPPING_ERROR; |
---|
800 | 718 | |
---|
801 | 719 | /* Ensure that the address returned is DMA'ble */ |
---|
802 | | - if (dma_capable(dev, dev_addr, size)) |
---|
803 | | - return dev_addr; |
---|
804 | | - |
---|
805 | | - attrs |= DMA_ATTR_SKIP_CPU_SYNC; |
---|
806 | | - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); |
---|
807 | | - |
---|
808 | | - return __phys_to_dma(dev, io_tlb_overflow_buffer); |
---|
809 | | -} |
---|
810 | | - |
---|
811 | | -/* |
---|
812 | | - * Unmap a single streaming mode DMA translation. The dma_addr and size must |
---|
813 | | - * match what was provided for in a previous swiotlb_map_page call. All |
---|
814 | | - * other usages are undefined. |
---|
815 | | - * |
---|
816 | | - * After this call, reads by the cpu to the buffer are guaranteed to see |
---|
817 | | - * whatever the device wrote there. |
---|
818 | | - */ |
---|
819 | | -static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, |
---|
820 | | - size_t size, enum dma_data_direction dir, |
---|
821 | | - unsigned long attrs) |
---|
822 | | -{ |
---|
823 | | - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
---|
824 | | - |
---|
825 | | - BUG_ON(dir == DMA_NONE); |
---|
826 | | - |
---|
827 | | - if (is_swiotlb_buffer(paddr)) { |
---|
828 | | - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); |
---|
829 | | - return; |
---|
| 720 | + dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); |
---|
| 721 | + if (unlikely(!dma_capable(dev, dma_addr, size, true))) { |
---|
| 722 | + swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, |
---|
| 723 | + attrs | DMA_ATTR_SKIP_CPU_SYNC); |
---|
| 724 | + dev_WARN_ONCE(dev, 1, |
---|
| 725 | + "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", |
---|
| 726 | + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); |
---|
| 727 | + return DMA_MAPPING_ERROR; |
---|
830 | 728 | } |
---|
831 | 729 | |
---|
832 | | - if (dir != DMA_FROM_DEVICE) |
---|
833 | | - return; |
---|
| 730 | + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) |
---|
| 731 | + arch_sync_dma_for_device(swiotlb_addr, size, dir); |
---|
| 732 | + return dma_addr; |
---|
| 733 | +} |
---|
| 734 | + |
---|
| 735 | +size_t swiotlb_max_mapping_size(struct device *dev) |
---|
| 736 | +{ |
---|
| 737 | + int min_align_mask = dma_get_min_align_mask(dev); |
---|
| 738 | + int min_align = 0; |
---|
834 | 739 | |
---|
835 | 740 | /* |
---|
836 | | - * phys_to_virt doesn't work with hihgmem page but we could |
---|
837 | | - * call dma_mark_clean() with hihgmem page here. However, we |
---|
838 | | - * are fine since dma_mark_clean() is null on POWERPC. We can |
---|
839 | | - * make dma_mark_clean() take a physical address if necessary. |
---|
| 741 | + * swiotlb_find_slots() skips slots according to |
---|
| 742 | + * min align mask. This affects max mapping size. |
---|
| 743 | + * Take it into acount here. |
---|
840 | 744 | */ |
---|
841 | | - dma_mark_clean(phys_to_virt(paddr), size); |
---|
| 745 | + if (min_align_mask) |
---|
| 746 | + min_align = roundup(min_align_mask, IO_TLB_SIZE); |
---|
| 747 | + |
---|
| 748 | + return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; |
---|
842 | 749 | } |
---|
843 | 750 | |
---|
844 | | -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, |
---|
845 | | - size_t size, enum dma_data_direction dir, |
---|
846 | | - unsigned long attrs) |
---|
| 751 | +bool is_swiotlb_active(void) |
---|
847 | 752 | { |
---|
848 | | - unmap_single(hwdev, dev_addr, size, dir, attrs); |
---|
849 | | -} |
---|
850 | | - |
---|
851 | | -/* |
---|
852 | | - * Make physical memory consistent for a single streaming mode DMA translation |
---|
853 | | - * after a transfer. |
---|
854 | | - * |
---|
855 | | - * If you perform a swiotlb_map_page() but wish to interrogate the buffer |
---|
856 | | - * using the cpu, yet do not wish to teardown the dma mapping, you must |
---|
857 | | - * call this function before doing so. At the next point you give the dma |
---|
858 | | - * address back to the card, you must first perform a |
---|
859 | | - * swiotlb_dma_sync_for_device, and then the device again owns the buffer |
---|
860 | | - */ |
---|
861 | | -static void |
---|
862 | | -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, |
---|
863 | | - size_t size, enum dma_data_direction dir, |
---|
864 | | - enum dma_sync_target target) |
---|
865 | | -{ |
---|
866 | | - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
---|
867 | | - |
---|
868 | | - BUG_ON(dir == DMA_NONE); |
---|
869 | | - |
---|
870 | | - if (is_swiotlb_buffer(paddr)) { |
---|
871 | | - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); |
---|
872 | | - return; |
---|
873 | | - } |
---|
874 | | - |
---|
875 | | - if (dir != DMA_FROM_DEVICE) |
---|
876 | | - return; |
---|
877 | | - |
---|
878 | | - dma_mark_clean(phys_to_virt(paddr), size); |
---|
879 | | -} |
---|
880 | | - |
---|
881 | | -void |
---|
882 | | -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, |
---|
883 | | - size_t size, enum dma_data_direction dir) |
---|
884 | | -{ |
---|
885 | | - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); |
---|
886 | | -} |
---|
887 | | - |
---|
888 | | -void |
---|
889 | | -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, |
---|
890 | | - size_t size, enum dma_data_direction dir) |
---|
891 | | -{ |
---|
892 | | - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); |
---|
893 | | -} |
---|
894 | | - |
---|
895 | | -/* |
---|
896 | | - * Map a set of buffers described by scatterlist in streaming mode for DMA. |
---|
897 | | - * This is the scatter-gather version of the above swiotlb_map_page |
---|
898 | | - * interface. Here the scatter gather list elements are each tagged with the |
---|
899 | | - * appropriate dma address and length. They are obtained via |
---|
900 | | - * sg_dma_{address,length}(SG). |
---|
901 | | - * |
---|
902 | | - * NOTE: An implementation may be able to use a smaller number of |
---|
903 | | - * DMA address/length pairs than there are SG table elements. |
---|
904 | | - * (for example via virtual mapping capabilities) |
---|
905 | | - * The routine returns the number of addr/length pairs actually |
---|
906 | | - * used, at most nents. |
---|
907 | | - * |
---|
908 | | - * Device ownership issues as mentioned above for swiotlb_map_page are the |
---|
909 | | - * same here. |
---|
910 | | - */ |
---|
911 | | -int |
---|
912 | | -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, |
---|
913 | | - enum dma_data_direction dir, unsigned long attrs) |
---|
914 | | -{ |
---|
915 | | - struct scatterlist *sg; |
---|
916 | | - int i; |
---|
917 | | - |
---|
918 | | - BUG_ON(dir == DMA_NONE); |
---|
919 | | - |
---|
920 | | - for_each_sg(sgl, sg, nelems, i) { |
---|
921 | | - phys_addr_t paddr = sg_phys(sg); |
---|
922 | | - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); |
---|
923 | | - |
---|
924 | | - if (swiotlb_force == SWIOTLB_FORCE || |
---|
925 | | - !dma_capable(hwdev, dev_addr, sg->length)) { |
---|
926 | | - phys_addr_t map = map_single(hwdev, sg_phys(sg), |
---|
927 | | - sg->length, dir, attrs); |
---|
928 | | - if (map == SWIOTLB_MAP_ERROR) { |
---|
929 | | - /* Don't panic here, we expect map_sg users |
---|
930 | | - to do proper error handling. */ |
---|
931 | | - attrs |= DMA_ATTR_SKIP_CPU_SYNC; |
---|
932 | | - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, |
---|
933 | | - attrs); |
---|
934 | | - sg_dma_len(sgl) = 0; |
---|
935 | | - return 0; |
---|
936 | | - } |
---|
937 | | - sg->dma_address = __phys_to_dma(hwdev, map); |
---|
938 | | - } else |
---|
939 | | - sg->dma_address = dev_addr; |
---|
940 | | - sg_dma_len(sg) = sg->length; |
---|
941 | | - } |
---|
942 | | - return nelems; |
---|
943 | | -} |
---|
944 | | - |
---|
945 | | -/* |
---|
946 | | - * Unmap a set of streaming mode DMA translations. Again, cpu read rules |
---|
947 | | - * concerning calls here are the same as for swiotlb_unmap_page() above. |
---|
948 | | - */ |
---|
949 | | -void |
---|
950 | | -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, |
---|
951 | | - int nelems, enum dma_data_direction dir, |
---|
952 | | - unsigned long attrs) |
---|
953 | | -{ |
---|
954 | | - struct scatterlist *sg; |
---|
955 | | - int i; |
---|
956 | | - |
---|
957 | | - BUG_ON(dir == DMA_NONE); |
---|
958 | | - |
---|
959 | | - for_each_sg(sgl, sg, nelems, i) |
---|
960 | | - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, |
---|
961 | | - attrs); |
---|
962 | | -} |
---|
963 | | - |
---|
964 | | -/* |
---|
965 | | - * Make physical memory consistent for a set of streaming mode DMA translations |
---|
966 | | - * after a transfer. |
---|
967 | | - * |
---|
968 | | - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules |
---|
969 | | - * and usage. |
---|
970 | | - */ |
---|
971 | | -static void |
---|
972 | | -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, |
---|
973 | | - int nelems, enum dma_data_direction dir, |
---|
974 | | - enum dma_sync_target target) |
---|
975 | | -{ |
---|
976 | | - struct scatterlist *sg; |
---|
977 | | - int i; |
---|
978 | | - |
---|
979 | | - for_each_sg(sgl, sg, nelems, i) |
---|
980 | | - swiotlb_sync_single(hwdev, sg->dma_address, |
---|
981 | | - sg_dma_len(sg), dir, target); |
---|
982 | | -} |
---|
983 | | - |
---|
984 | | -void |
---|
985 | | -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, |
---|
986 | | - int nelems, enum dma_data_direction dir) |
---|
987 | | -{ |
---|
988 | | - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); |
---|
989 | | -} |
---|
990 | | - |
---|
991 | | -void |
---|
992 | | -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, |
---|
993 | | - int nelems, enum dma_data_direction dir) |
---|
994 | | -{ |
---|
995 | | - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); |
---|
996 | | -} |
---|
997 | | - |
---|
998 | | -int |
---|
999 | | -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) |
---|
1000 | | -{ |
---|
1001 | | - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); |
---|
1002 | | -} |
---|
1003 | | - |
---|
1004 | | -/* |
---|
1005 | | - * Return whether the given device DMA address mask can be supported |
---|
1006 | | - * properly. For example, if your device can only drive the low 24-bits |
---|
1007 | | - * during bus mastering, then you would pass 0x00ffffff as the mask to |
---|
1008 | | - * this function. |
---|
1009 | | - */ |
---|
1010 | | -int |
---|
1011 | | -swiotlb_dma_supported(struct device *hwdev, u64 mask) |
---|
1012 | | -{ |
---|
1013 | | - return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; |
---|
1014 | | -} |
---|
1015 | | - |
---|
1016 | | -void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, |
---|
1017 | | - gfp_t gfp, unsigned long attrs) |
---|
1018 | | -{ |
---|
1019 | | - void *vaddr; |
---|
1020 | | - |
---|
1021 | | - /* temporary workaround: */ |
---|
1022 | | - if (gfp & __GFP_NOWARN) |
---|
1023 | | - attrs |= DMA_ATTR_NO_WARN; |
---|
1024 | | - |
---|
1025 | 753 | /* |
---|
1026 | | - * Don't print a warning when the first allocation attempt fails. |
---|
1027 | | - * swiotlb_alloc_coherent() will print a warning when the DMA memory |
---|
1028 | | - * allocation ultimately failed. |
---|
| 754 | + * When SWIOTLB is initialized, even if io_tlb_start points to physical |
---|
| 755 | + * address zero, io_tlb_end surely doesn't. |
---|
1029 | 756 | */ |
---|
1030 | | - gfp |= __GFP_NOWARN; |
---|
1031 | | - |
---|
1032 | | - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); |
---|
1033 | | - if (!vaddr) |
---|
1034 | | - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); |
---|
1035 | | - return vaddr; |
---|
| 757 | + return io_tlb_end != 0; |
---|
1036 | 758 | } |
---|
1037 | 759 | |
---|
1038 | | -void swiotlb_free(struct device *dev, size_t size, void *vaddr, |
---|
1039 | | - dma_addr_t dma_addr, unsigned long attrs) |
---|
| 760 | +#ifdef CONFIG_DEBUG_FS |
---|
| 761 | + |
---|
| 762 | +static int __init swiotlb_create_debugfs(void) |
---|
1040 | 763 | { |
---|
1041 | | - if (!swiotlb_free_buffer(dev, size, dma_addr)) |
---|
1042 | | - dma_direct_free(dev, size, vaddr, dma_addr, attrs); |
---|
| 764 | + struct dentry *root; |
---|
| 765 | + |
---|
| 766 | + root = debugfs_create_dir("swiotlb", NULL); |
---|
| 767 | + debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs); |
---|
| 768 | + debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used); |
---|
| 769 | + return 0; |
---|
1043 | 770 | } |
---|
1044 | 771 | |
---|
1045 | | -const struct dma_map_ops swiotlb_dma_ops = { |
---|
1046 | | - .mapping_error = swiotlb_dma_mapping_error, |
---|
1047 | | - .alloc = swiotlb_alloc, |
---|
1048 | | - .free = swiotlb_free, |
---|
1049 | | - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, |
---|
1050 | | - .sync_single_for_device = swiotlb_sync_single_for_device, |
---|
1051 | | - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, |
---|
1052 | | - .sync_sg_for_device = swiotlb_sync_sg_for_device, |
---|
1053 | | - .map_sg = swiotlb_map_sg_attrs, |
---|
1054 | | - .unmap_sg = swiotlb_unmap_sg_attrs, |
---|
1055 | | - .map_page = swiotlb_map_page, |
---|
1056 | | - .unmap_page = swiotlb_unmap_page, |
---|
1057 | | - .dma_supported = dma_direct_supported, |
---|
1058 | | -}; |
---|
1059 | | -EXPORT_SYMBOL(swiotlb_dma_ops); |
---|
| 772 | +late_initcall(swiotlb_create_debugfs); |
---|
| 773 | + |
---|
| 774 | +#endif |
---|