| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Dynamic DMA mapping support. |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 21 | 22 | |
|---|
| 22 | 23 | #include <linux/cache.h> |
|---|
| 23 | 24 | #include <linux/dma-direct.h> |
|---|
| 25 | +#include <linux/dma-map-ops.h> |
|---|
| 24 | 26 | #include <linux/mm.h> |
|---|
| 25 | 27 | #include <linux/export.h> |
|---|
| 26 | 28 | #include <linux/spinlock.h> |
|---|
| .. | .. |
|---|
| 34 | 36 | #include <linux/scatterlist.h> |
|---|
| 35 | 37 | #include <linux/mem_encrypt.h> |
|---|
| 36 | 38 | #include <linux/set_memory.h> |
|---|
| 39 | +#ifdef CONFIG_DEBUG_FS |
|---|
| 40 | +#include <linux/debugfs.h> |
|---|
| 41 | +#endif |
|---|
| 37 | 42 | |
|---|
| 38 | 43 | #include <asm/io.h> |
|---|
| 39 | 44 | #include <asm/dma.h> |
|---|
| 40 | 45 | |
|---|
| 41 | 46 | #include <linux/init.h> |
|---|
| 42 | | -#include <linux/bootmem.h> |
|---|
| 47 | +#include <linux/memblock.h> |
|---|
| 43 | 48 | #include <linux/iommu-helper.h> |
|---|
| 44 | 49 | |
|---|
| 45 | 50 | #define CREATE_TRACE_POINTS |
|---|
| 46 | 51 | #include <trace/events/swiotlb.h> |
|---|
| 47 | | - |
|---|
| 48 | | -#define OFFSET(val,align) ((unsigned long) \ |
|---|
| 49 | | - ( (val) & ( (align) - 1))) |
|---|
| 50 | 52 | |
|---|
| 51 | 53 | #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) |
|---|
| 52 | 54 | |
|---|
| .. | .. |
|---|
| 64 | 66 | * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this |
|---|
| 65 | 67 | * API. |
|---|
| 66 | 68 | */ |
|---|
| 67 | | -static phys_addr_t io_tlb_start, io_tlb_end; |
|---|
| 69 | +phys_addr_t io_tlb_start, io_tlb_end; |
|---|
| 68 | 70 | |
|---|
| 69 | 71 | /* |
|---|
| 70 | 72 | * The number of IO TLB blocks (in groups of 64) between io_tlb_start and |
|---|
| .. | .. |
|---|
| 73 | 75 | static unsigned long io_tlb_nslabs; |
|---|
| 74 | 76 | |
|---|
| 75 | 77 | /* |
|---|
| 76 | | - * When the IOMMU overflows we return a fallback buffer. This sets the size. |
|---|
| 78 | + * The number of used IO TLB block |
|---|
| 77 | 79 | */ |
|---|
| 78 | | -static unsigned long io_tlb_overflow = 32*1024; |
|---|
| 79 | | - |
|---|
| 80 | | -static phys_addr_t io_tlb_overflow_buffer; |
|---|
| 80 | +static unsigned long io_tlb_used; |
|---|
| 81 | 81 | |
|---|
| 82 | 82 | /* |
|---|
| 83 | 83 | * This is a free list describing the number of free entries available from |
|---|
| .. | .. |
|---|
| 90 | 90 | * Max segment that we can provide which (if pages are contingous) will |
|---|
| 91 | 91 | * not be bounced (unless SWIOTLB_FORCE is set). |
|---|
| 92 | 92 | */ |
|---|
| 93 | | -unsigned int max_segment; |
|---|
| 93 | +static unsigned int max_segment; |
|---|
| 94 | 94 | |
|---|
| 95 | 95 | /* |
|---|
| 96 | 96 | * We need to save away the original address corresponding to a mapped entry |
|---|
| .. | .. |
|---|
| 126 | 126 | return 0; |
|---|
| 127 | 127 | } |
|---|
| 128 | 128 | early_param("swiotlb", setup_io_tlb_npages); |
|---|
| 129 | | -/* make io_tlb_overflow tunable too? */ |
|---|
| 129 | + |
|---|
| 130 | +static bool no_iotlb_memory; |
|---|
| 130 | 131 | |
|---|
| 131 | 132 | unsigned long swiotlb_nr_tbl(void) |
|---|
| 132 | 133 | { |
|---|
| 133 | | - return io_tlb_nslabs; |
|---|
| 134 | + return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs; |
|---|
| 134 | 135 | } |
|---|
| 135 | 136 | EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); |
|---|
| 136 | 137 | |
|---|
| 137 | 138 | unsigned int swiotlb_max_segment(void) |
|---|
| 138 | 139 | { |
|---|
| 139 | | - return max_segment; |
|---|
| 140 | + return unlikely(no_iotlb_memory) ? 0 : max_segment; |
|---|
| 140 | 141 | } |
|---|
| 141 | 142 | EXPORT_SYMBOL_GPL(swiotlb_max_segment); |
|---|
| 142 | 143 | |
|---|
| .. | .. |
|---|
| 159 | 160 | return size ? size : (IO_TLB_DEFAULT_SIZE); |
|---|
| 160 | 161 | } |
|---|
| 161 | 162 | |
|---|
| 162 | | -static bool no_iotlb_memory; |
|---|
| 163 | | - |
|---|
| 164 | 163 | void swiotlb_print_info(void) |
|---|
| 165 | 164 | { |
|---|
| 166 | 165 | unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
|---|
| .. | .. |
|---|
| 170 | 169 | return; |
|---|
| 171 | 170 | } |
|---|
| 172 | 171 | |
|---|
| 173 | | - pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n", |
|---|
| 174 | | - (unsigned long long)io_tlb_start, |
|---|
| 175 | | - (unsigned long long)io_tlb_end, |
|---|
| 172 | + pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end, |
|---|
| 176 | 173 | bytes >> 20); |
|---|
| 174 | +} |
|---|
| 175 | + |
|---|
| 176 | +static inline unsigned long io_tlb_offset(unsigned long val) |
|---|
| 177 | +{ |
|---|
| 178 | + return val & (IO_TLB_SEGSIZE - 1); |
|---|
| 179 | +} |
|---|
| 180 | + |
|---|
| 181 | +static inline unsigned long nr_slots(u64 val) |
|---|
| 182 | +{ |
|---|
| 183 | + return DIV_ROUND_UP(val, IO_TLB_SIZE); |
|---|
| 177 | 184 | } |
|---|
| 178 | 185 | |
|---|
| 179 | 186 | /* |
|---|
| .. | .. |
|---|
| 194 | 201 | bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); |
|---|
| 195 | 202 | set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); |
|---|
| 196 | 203 | memset(vaddr, 0, bytes); |
|---|
| 197 | | - |
|---|
| 198 | | - vaddr = phys_to_virt(io_tlb_overflow_buffer); |
|---|
| 199 | | - bytes = PAGE_ALIGN(io_tlb_overflow); |
|---|
| 200 | | - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); |
|---|
| 201 | | - memset(vaddr, 0, bytes); |
|---|
| 202 | 204 | } |
|---|
| 203 | 205 | |
|---|
| 204 | 206 | int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) |
|---|
| 205 | 207 | { |
|---|
| 206 | | - void *v_overflow_buffer; |
|---|
| 207 | 208 | unsigned long i, bytes; |
|---|
| 209 | + size_t alloc_size; |
|---|
| 208 | 210 | |
|---|
| 209 | 211 | bytes = nslabs << IO_TLB_SHIFT; |
|---|
| 210 | 212 | |
|---|
| .. | .. |
|---|
| 213 | 215 | io_tlb_end = io_tlb_start + bytes; |
|---|
| 214 | 216 | |
|---|
| 215 | 217 | /* |
|---|
| 216 | | - * Get the overflow emergency buffer |
|---|
| 217 | | - */ |
|---|
| 218 | | - v_overflow_buffer = memblock_virt_alloc_low_nopanic( |
|---|
| 219 | | - PAGE_ALIGN(io_tlb_overflow), |
|---|
| 220 | | - PAGE_SIZE); |
|---|
| 221 | | - if (!v_overflow_buffer) |
|---|
| 222 | | - return -ENOMEM; |
|---|
| 223 | | - |
|---|
| 224 | | - io_tlb_overflow_buffer = __pa(v_overflow_buffer); |
|---|
| 225 | | - |
|---|
| 226 | | - /* |
|---|
| 227 | 218 | * Allocate and initialize the free list array. This array is used |
|---|
| 228 | 219 | * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE |
|---|
| 229 | 220 | * between io_tlb_start and io_tlb_end. |
|---|
| 230 | 221 | */ |
|---|
| 231 | | - io_tlb_list = memblock_virt_alloc( |
|---|
| 232 | | - PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), |
|---|
| 233 | | - PAGE_SIZE); |
|---|
| 234 | | - io_tlb_orig_addr = memblock_virt_alloc( |
|---|
| 235 | | - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), |
|---|
| 236 | | - PAGE_SIZE); |
|---|
| 222 | + alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int)); |
|---|
| 223 | + io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE); |
|---|
| 224 | + if (!io_tlb_list) |
|---|
| 225 | + panic("%s: Failed to allocate %zu bytes align=0x%lx\n", |
|---|
| 226 | + __func__, alloc_size, PAGE_SIZE); |
|---|
| 227 | + |
|---|
| 228 | + alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)); |
|---|
| 229 | + io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE); |
|---|
| 230 | + if (!io_tlb_orig_addr) |
|---|
| 231 | + panic("%s: Failed to allocate %zu bytes align=0x%lx\n", |
|---|
| 232 | + __func__, alloc_size, PAGE_SIZE); |
|---|
| 233 | + |
|---|
| 237 | 234 | for (i = 0; i < io_tlb_nslabs; i++) { |
|---|
| 238 | | - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); |
|---|
| 235 | + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); |
|---|
| 239 | 236 | io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
|---|
| 240 | 237 | } |
|---|
| 241 | 238 | io_tlb_index = 0; |
|---|
| .. | .. |
|---|
| 267 | 264 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
|---|
| 268 | 265 | |
|---|
| 269 | 266 | /* Get IO TLB memory from the low pages */ |
|---|
| 270 | | - vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); |
|---|
| 267 | + vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); |
|---|
| 271 | 268 | if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) |
|---|
| 272 | 269 | return; |
|---|
| 273 | 270 | |
|---|
| .. | .. |
|---|
| 329 | 326 | return rc; |
|---|
| 330 | 327 | } |
|---|
| 331 | 328 | |
|---|
| 329 | +static void swiotlb_cleanup(void) |
|---|
| 330 | +{ |
|---|
| 331 | + io_tlb_end = 0; |
|---|
| 332 | + io_tlb_start = 0; |
|---|
| 333 | + io_tlb_nslabs = 0; |
|---|
| 334 | + max_segment = 0; |
|---|
| 335 | +} |
|---|
| 336 | + |
|---|
| 332 | 337 | int |
|---|
| 333 | 338 | swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) |
|---|
| 334 | 339 | { |
|---|
| 335 | 340 | unsigned long i, bytes; |
|---|
| 336 | | - unsigned char *v_overflow_buffer; |
|---|
| 337 | 341 | |
|---|
| 338 | 342 | bytes = nslabs << IO_TLB_SHIFT; |
|---|
| 339 | 343 | |
|---|
| .. | .. |
|---|
| 343 | 347 | |
|---|
| 344 | 348 | set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); |
|---|
| 345 | 349 | memset(tlb, 0, bytes); |
|---|
| 346 | | - |
|---|
| 347 | | - /* |
|---|
| 348 | | - * Get the overflow emergency buffer |
|---|
| 349 | | - */ |
|---|
| 350 | | - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, |
|---|
| 351 | | - get_order(io_tlb_overflow)); |
|---|
| 352 | | - if (!v_overflow_buffer) |
|---|
| 353 | | - goto cleanup2; |
|---|
| 354 | | - |
|---|
| 355 | | - set_memory_decrypted((unsigned long)v_overflow_buffer, |
|---|
| 356 | | - io_tlb_overflow >> PAGE_SHIFT); |
|---|
| 357 | | - memset(v_overflow_buffer, 0, io_tlb_overflow); |
|---|
| 358 | | - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); |
|---|
| 359 | 350 | |
|---|
| 360 | 351 | /* |
|---|
| 361 | 352 | * Allocate and initialize the free list array. This array is used |
|---|
| .. | .. |
|---|
| 375 | 366 | goto cleanup4; |
|---|
| 376 | 367 | |
|---|
| 377 | 368 | for (i = 0; i < io_tlb_nslabs; i++) { |
|---|
| 378 | | - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); |
|---|
| 369 | + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); |
|---|
| 379 | 370 | io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
|---|
| 380 | 371 | } |
|---|
| 381 | 372 | io_tlb_index = 0; |
|---|
| .. | .. |
|---|
| 394 | 385 | sizeof(int))); |
|---|
| 395 | 386 | io_tlb_list = NULL; |
|---|
| 396 | 387 | cleanup3: |
|---|
| 397 | | - free_pages((unsigned long)v_overflow_buffer, |
|---|
| 398 | | - get_order(io_tlb_overflow)); |
|---|
| 399 | | - io_tlb_overflow_buffer = 0; |
|---|
| 400 | | -cleanup2: |
|---|
| 401 | | - io_tlb_end = 0; |
|---|
| 402 | | - io_tlb_start = 0; |
|---|
| 403 | | - io_tlb_nslabs = 0; |
|---|
| 404 | | - max_segment = 0; |
|---|
| 388 | + swiotlb_cleanup(); |
|---|
| 405 | 389 | return -ENOMEM; |
|---|
| 406 | 390 | } |
|---|
| 407 | 391 | |
|---|
| .. | .. |
|---|
| 411 | 395 | return; |
|---|
| 412 | 396 | |
|---|
| 413 | 397 | if (late_alloc) { |
|---|
| 414 | | - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), |
|---|
| 415 | | - get_order(io_tlb_overflow)); |
|---|
| 416 | 398 | free_pages((unsigned long)io_tlb_orig_addr, |
|---|
| 417 | 399 | get_order(io_tlb_nslabs * sizeof(phys_addr_t))); |
|---|
| 418 | 400 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * |
|---|
| .. | .. |
|---|
| 420 | 402 | free_pages((unsigned long)phys_to_virt(io_tlb_start), |
|---|
| 421 | 403 | get_order(io_tlb_nslabs << IO_TLB_SHIFT)); |
|---|
| 422 | 404 | } else { |
|---|
| 423 | | - memblock_free_late(io_tlb_overflow_buffer, |
|---|
| 424 | | - PAGE_ALIGN(io_tlb_overflow)); |
|---|
| 425 | 405 | memblock_free_late(__pa(io_tlb_orig_addr), |
|---|
| 426 | 406 | PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); |
|---|
| 427 | 407 | memblock_free_late(__pa(io_tlb_list), |
|---|
| .. | .. |
|---|
| 429 | 409 | memblock_free_late(io_tlb_start, |
|---|
| 430 | 410 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); |
|---|
| 431 | 411 | } |
|---|
| 432 | | - io_tlb_nslabs = 0; |
|---|
| 433 | | - max_segment = 0; |
|---|
| 434 | | -} |
|---|
| 435 | | - |
|---|
| 436 | | -int is_swiotlb_buffer(phys_addr_t paddr) |
|---|
| 437 | | -{ |
|---|
| 438 | | - return paddr >= io_tlb_start && paddr < io_tlb_end; |
|---|
| 412 | + swiotlb_cleanup(); |
|---|
| 439 | 413 | } |
|---|
| 440 | 414 | |
|---|
| 441 | 415 | /* |
|---|
| 442 | | - * Bounce: copy the swiotlb buffer back to the original dma location |
|---|
| 416 | + * Bounce: copy the swiotlb buffer from or back to the original dma location |
|---|
| 443 | 417 | */ |
|---|
| 444 | 418 | static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, |
|---|
| 445 | 419 | size_t size, enum dma_data_direction dir) |
|---|
| .. | .. |
|---|
| 478 | 452 | } |
|---|
| 479 | 453 | } |
|---|
| 480 | 454 | |
|---|
| 481 | | -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, |
|---|
| 482 | | - dma_addr_t tbl_dma_addr, |
|---|
| 483 | | - phys_addr_t orig_addr, size_t size, |
|---|
| 484 | | - enum dma_data_direction dir, |
|---|
| 485 | | - unsigned long attrs) |
|---|
| 455 | +static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) |
|---|
| 486 | 456 | { |
|---|
| 457 | + return start + (idx << IO_TLB_SHIFT); |
|---|
| 458 | +} |
|---|
| 459 | + |
|---|
| 460 | +/* |
|---|
| 461 | + * Return the offset into a iotlb slot required to keep the device happy. |
|---|
| 462 | + */ |
|---|
| 463 | +static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) |
|---|
| 464 | +{ |
|---|
| 465 | + return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); |
|---|
| 466 | +} |
|---|
| 467 | + |
|---|
| 468 | +/* |
|---|
| 469 | + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. |
|---|
| 470 | + */ |
|---|
| 471 | +static inline unsigned long get_max_slots(unsigned long boundary_mask) |
|---|
| 472 | +{ |
|---|
| 473 | + if (boundary_mask == ~0UL) |
|---|
| 474 | + return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); |
|---|
| 475 | + return nr_slots(boundary_mask + 1); |
|---|
| 476 | +} |
|---|
| 477 | + |
|---|
| 478 | +static unsigned int wrap_index(unsigned int index) |
|---|
| 479 | +{ |
|---|
| 480 | + if (index >= io_tlb_nslabs) |
|---|
| 481 | + return 0; |
|---|
| 482 | + return index; |
|---|
| 483 | +} |
|---|
| 484 | + |
|---|
| 485 | +/* |
|---|
| 486 | + * Find a suitable number of IO TLB entries size that will fit this request and |
|---|
| 487 | + * allocate a buffer from that IO TLB pool. |
|---|
| 488 | + */ |
|---|
| 489 | +static int find_slots(struct device *dev, phys_addr_t orig_addr, |
|---|
| 490 | + size_t alloc_size) |
|---|
| 491 | +{ |
|---|
| 492 | + unsigned long boundary_mask = dma_get_seg_boundary(dev); |
|---|
| 493 | + dma_addr_t tbl_dma_addr = |
|---|
| 494 | + phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask; |
|---|
| 495 | + unsigned long max_slots = get_max_slots(boundary_mask); |
|---|
| 496 | + unsigned int iotlb_align_mask = |
|---|
| 497 | + dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); |
|---|
| 498 | + unsigned int nslots = nr_slots(alloc_size), stride; |
|---|
| 499 | + unsigned int index, wrap, count = 0, i; |
|---|
| 487 | 500 | unsigned long flags; |
|---|
| 488 | | - phys_addr_t tlb_addr; |
|---|
| 489 | | - unsigned int nslots, stride, index, wrap; |
|---|
| 490 | | - int i; |
|---|
| 491 | | - unsigned long mask; |
|---|
| 492 | | - unsigned long offset_slots; |
|---|
| 493 | | - unsigned long max_slots; |
|---|
| 494 | | - |
|---|
| 495 | | - if (no_iotlb_memory) |
|---|
| 496 | | - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); |
|---|
| 497 | | - |
|---|
| 498 | | - if (mem_encrypt_active()) |
|---|
| 499 | | - pr_warn_once("%s is active and system is using DMA bounce buffers\n", |
|---|
| 500 | | - sme_active() ? "SME" : "SEV"); |
|---|
| 501 | | - |
|---|
| 502 | | - mask = dma_get_seg_boundary(hwdev); |
|---|
| 503 | | - |
|---|
| 504 | | - tbl_dma_addr &= mask; |
|---|
| 505 | | - |
|---|
| 506 | | - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
|---|
| 507 | | - |
|---|
| 508 | | - /* |
|---|
| 509 | | - * Carefully handle integer overflow which can occur when mask == ~0UL. |
|---|
| 510 | | - */ |
|---|
| 511 | | - max_slots = mask + 1 |
|---|
| 512 | | - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT |
|---|
| 513 | | - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); |
|---|
| 514 | | - |
|---|
| 515 | | - /* |
|---|
| 516 | | - * For mappings greater than or equal to a page, we limit the stride |
|---|
| 517 | | - * (and hence alignment) to a page size. |
|---|
| 518 | | - */ |
|---|
| 519 | | - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
|---|
| 520 | | - if (size >= PAGE_SIZE) |
|---|
| 521 | | - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); |
|---|
| 522 | | - else |
|---|
| 523 | | - stride = 1; |
|---|
| 524 | 501 | |
|---|
| 525 | 502 | BUG_ON(!nslots); |
|---|
| 526 | 503 | |
|---|
| 527 | 504 | /* |
|---|
| 528 | | - * Find suitable number of IO TLB entries size that will fit this |
|---|
| 529 | | - * request and allocate a buffer from that IO TLB pool. |
|---|
| 505 | + * For mappings with an alignment requirement don't bother looping to |
|---|
| 506 | + * unaligned slots once we found an aligned one. For allocations of |
|---|
| 507 | + * PAGE_SIZE or larger only look for page aligned allocations. |
|---|
| 530 | 508 | */ |
|---|
| 531 | | - spin_lock_irqsave(&io_tlb_lock, flags); |
|---|
| 532 | | - index = ALIGN(io_tlb_index, stride); |
|---|
| 533 | | - if (index >= io_tlb_nslabs) |
|---|
| 534 | | - index = 0; |
|---|
| 535 | | - wrap = index; |
|---|
| 509 | + stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; |
|---|
| 510 | + if (alloc_size >= PAGE_SIZE) |
|---|
| 511 | + stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); |
|---|
| 536 | 512 | |
|---|
| 513 | + spin_lock_irqsave(&io_tlb_lock, flags); |
|---|
| 514 | + if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) |
|---|
| 515 | + goto not_found; |
|---|
| 516 | + |
|---|
| 517 | + index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); |
|---|
| 537 | 518 | do { |
|---|
| 538 | | - while (iommu_is_span_boundary(index, nslots, offset_slots, |
|---|
| 539 | | - max_slots)) { |
|---|
| 540 | | - index += stride; |
|---|
| 541 | | - if (index >= io_tlb_nslabs) |
|---|
| 542 | | - index = 0; |
|---|
| 543 | | - if (index == wrap) |
|---|
| 544 | | - goto not_found; |
|---|
| 519 | + if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != |
|---|
| 520 | + (orig_addr & iotlb_align_mask)) { |
|---|
| 521 | + index = wrap_index(index + 1); |
|---|
| 522 | + continue; |
|---|
| 545 | 523 | } |
|---|
| 546 | 524 | |
|---|
| 547 | 525 | /* |
|---|
| .. | .. |
|---|
| 549 | 527 | * contiguous buffers, we allocate the buffers from that slot |
|---|
| 550 | 528 | * and mark the entries as '0' indicating unavailable. |
|---|
| 551 | 529 | */ |
|---|
| 552 | | - if (io_tlb_list[index] >= nslots) { |
|---|
| 553 | | - int count = 0; |
|---|
| 554 | | - |
|---|
| 555 | | - for (i = index; i < (int) (index + nslots); i++) |
|---|
| 556 | | - io_tlb_list[i] = 0; |
|---|
| 557 | | - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) |
|---|
| 558 | | - io_tlb_list[i] = ++count; |
|---|
| 559 | | - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); |
|---|
| 560 | | - |
|---|
| 561 | | - /* |
|---|
| 562 | | - * Update the indices to avoid searching in the next |
|---|
| 563 | | - * round. |
|---|
| 564 | | - */ |
|---|
| 565 | | - io_tlb_index = ((index + nslots) < io_tlb_nslabs |
|---|
| 566 | | - ? (index + nslots) : 0); |
|---|
| 567 | | - |
|---|
| 568 | | - goto found; |
|---|
| 530 | + if (!iommu_is_span_boundary(index, nslots, |
|---|
| 531 | + nr_slots(tbl_dma_addr), |
|---|
| 532 | + max_slots)) { |
|---|
| 533 | + if (io_tlb_list[index] >= nslots) |
|---|
| 534 | + goto found; |
|---|
| 569 | 535 | } |
|---|
| 570 | | - index += stride; |
|---|
| 571 | | - if (index >= io_tlb_nslabs) |
|---|
| 572 | | - index = 0; |
|---|
| 536 | + index = wrap_index(index + stride); |
|---|
| 573 | 537 | } while (index != wrap); |
|---|
| 574 | 538 | |
|---|
| 575 | 539 | not_found: |
|---|
| 576 | 540 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
|---|
| 577 | | - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) |
|---|
| 578 | | - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); |
|---|
| 579 | | - return SWIOTLB_MAP_ERROR; |
|---|
| 541 | + return -1; |
|---|
| 542 | + |
|---|
| 580 | 543 | found: |
|---|
| 544 | + for (i = index; i < index + nslots; i++) |
|---|
| 545 | + io_tlb_list[i] = 0; |
|---|
| 546 | + for (i = index - 1; |
|---|
| 547 | + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && |
|---|
| 548 | + io_tlb_list[i]; i--) |
|---|
| 549 | + io_tlb_list[i] = ++count; |
|---|
| 550 | + |
|---|
| 551 | + /* |
|---|
| 552 | + * Update the indices to avoid searching in the next round. |
|---|
| 553 | + */ |
|---|
| 554 | + if (index + nslots < io_tlb_nslabs) |
|---|
| 555 | + io_tlb_index = index + nslots; |
|---|
| 556 | + else |
|---|
| 557 | + io_tlb_index = 0; |
|---|
| 558 | + io_tlb_used += nslots; |
|---|
| 559 | + |
|---|
| 581 | 560 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
|---|
| 561 | + return index; |
|---|
| 562 | +} |
|---|
| 563 | + |
|---|
| 564 | +phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, |
|---|
| 565 | + size_t mapping_size, size_t alloc_size, |
|---|
| 566 | + enum dma_data_direction dir, unsigned long attrs) |
|---|
| 567 | +{ |
|---|
| 568 | + unsigned int offset = swiotlb_align_offset(dev, orig_addr); |
|---|
| 569 | + unsigned int i; |
|---|
| 570 | + int index; |
|---|
| 571 | + phys_addr_t tlb_addr; |
|---|
| 572 | + |
|---|
| 573 | + if (no_iotlb_memory) |
|---|
| 574 | + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); |
|---|
| 575 | + |
|---|
| 576 | + if (mem_encrypt_active()) |
|---|
| 577 | + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); |
|---|
| 578 | + |
|---|
| 579 | + if (mapping_size > alloc_size) { |
|---|
| 580 | + dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", |
|---|
| 581 | + mapping_size, alloc_size); |
|---|
| 582 | + return (phys_addr_t)DMA_MAPPING_ERROR; |
|---|
| 583 | + } |
|---|
| 584 | + |
|---|
| 585 | + index = find_slots(dev, orig_addr, alloc_size + offset); |
|---|
| 586 | + if (index == -1) { |
|---|
| 587 | + if (!(attrs & DMA_ATTR_NO_WARN)) |
|---|
| 588 | + dev_warn_ratelimited(dev, |
|---|
| 589 | + "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", |
|---|
| 590 | + alloc_size, io_tlb_nslabs, io_tlb_used); |
|---|
| 591 | + return (phys_addr_t)DMA_MAPPING_ERROR; |
|---|
| 592 | + } |
|---|
| 582 | 593 | |
|---|
| 583 | 594 | /* |
|---|
| 584 | 595 | * Save away the mapping from the original address to the DMA address. |
|---|
| 585 | 596 | * This is needed when we sync the memory. Then we sync the buffer if |
|---|
| 586 | 597 | * needed. |
|---|
| 587 | 598 | */ |
|---|
| 588 | | - for (i = 0; i < nslots; i++) |
|---|
| 589 | | - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); |
|---|
| 590 | | - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && |
|---|
| 591 | | - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) |
|---|
| 592 | | - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); |
|---|
| 599 | + for (i = 0; i < nr_slots(alloc_size + offset); i++) |
|---|
| 600 | + io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); |
|---|
| 593 | 601 | |
|---|
| 602 | + tlb_addr = slot_addr(io_tlb_start, index) + offset; |
|---|
| 603 | + /* |
|---|
| 604 | + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig |
|---|
| 605 | + * to the tlb buffer, if we knew for sure the device will |
|---|
| 606 | + * overwirte the entire current content. But we don't. Thus |
|---|
| 607 | + * unconditional bounce may prevent leaking swiotlb content (i.e. |
|---|
| 608 | + * kernel memory) to user-space. |
|---|
| 609 | + */ |
|---|
| 610 | + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); |
|---|
| 594 | 611 | return tlb_addr; |
|---|
| 595 | | -} |
|---|
| 596 | | - |
|---|
| 597 | | -/* |
|---|
| 598 | | - * Allocates bounce buffer and returns its physical address. |
|---|
| 599 | | - */ |
|---|
| 600 | | -static phys_addr_t |
|---|
| 601 | | -map_single(struct device *hwdev, phys_addr_t phys, size_t size, |
|---|
| 602 | | - enum dma_data_direction dir, unsigned long attrs) |
|---|
| 603 | | -{ |
|---|
| 604 | | - dma_addr_t start_dma_addr; |
|---|
| 605 | | - |
|---|
| 606 | | - if (swiotlb_force == SWIOTLB_NO_FORCE) { |
|---|
| 607 | | - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", |
|---|
| 608 | | - &phys); |
|---|
| 609 | | - return SWIOTLB_MAP_ERROR; |
|---|
| 610 | | - } |
|---|
| 611 | | - |
|---|
| 612 | | - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); |
|---|
| 613 | | - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, |
|---|
| 614 | | - dir, attrs); |
|---|
| 615 | 612 | } |
|---|
| 616 | 613 | |
|---|
| 617 | 614 | /* |
|---|
| 618 | 615 | * tlb_addr is the physical address of the bounce buffer to unmap. |
|---|
| 619 | 616 | */ |
|---|
| 620 | 617 | void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, |
|---|
| 621 | | - size_t size, enum dma_data_direction dir, |
|---|
| 622 | | - unsigned long attrs) |
|---|
| 618 | + size_t mapping_size, size_t alloc_size, |
|---|
| 619 | + enum dma_data_direction dir, unsigned long attrs) |
|---|
| 623 | 620 | { |
|---|
| 624 | 621 | unsigned long flags; |
|---|
| 625 | | - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
|---|
| 626 | | - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; |
|---|
| 622 | + unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); |
|---|
| 623 | + int i, count, nslots = nr_slots(alloc_size + offset); |
|---|
| 624 | + int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; |
|---|
| 627 | 625 | phys_addr_t orig_addr = io_tlb_orig_addr[index]; |
|---|
| 628 | 626 | |
|---|
| 629 | 627 | /* |
|---|
| .. | .. |
|---|
| 632 | 630 | if (orig_addr != INVALID_PHYS_ADDR && |
|---|
| 633 | 631 | !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && |
|---|
| 634 | 632 | ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) |
|---|
| 635 | | - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); |
|---|
| 633 | + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE); |
|---|
| 636 | 634 | |
|---|
| 637 | 635 | /* |
|---|
| 638 | 636 | * Return the buffer to the free list by setting the corresponding |
|---|
| .. | .. |
|---|
| 641 | 639 | * with slots below and above the pool being returned. |
|---|
| 642 | 640 | */ |
|---|
| 643 | 641 | spin_lock_irqsave(&io_tlb_lock, flags); |
|---|
| 644 | | - { |
|---|
| 645 | | - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? |
|---|
| 646 | | - io_tlb_list[index + nslots] : 0); |
|---|
| 647 | | - /* |
|---|
| 648 | | - * Step 1: return the slots to the free list, merging the |
|---|
| 649 | | - * slots with superceeding slots |
|---|
| 650 | | - */ |
|---|
| 651 | | - for (i = index + nslots - 1; i >= index; i--) { |
|---|
| 652 | | - io_tlb_list[i] = ++count; |
|---|
| 653 | | - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
|---|
| 654 | | - } |
|---|
| 655 | | - /* |
|---|
| 656 | | - * Step 2: merge the returned slots with the preceding slots, |
|---|
| 657 | | - * if available (non zero) |
|---|
| 658 | | - */ |
|---|
| 659 | | - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) |
|---|
| 660 | | - io_tlb_list[i] = ++count; |
|---|
| 642 | + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) |
|---|
| 643 | + count = io_tlb_list[index + nslots]; |
|---|
| 644 | + else |
|---|
| 645 | + count = 0; |
|---|
| 646 | + |
|---|
| 647 | + /* |
|---|
| 648 | + * Step 1: return the slots to the free list, merging the slots with |
|---|
| 649 | + * superceeding slots |
|---|
| 650 | + */ |
|---|
| 651 | + for (i = index + nslots - 1; i >= index; i--) { |
|---|
| 652 | + io_tlb_list[i] = ++count; |
|---|
| 653 | + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; |
|---|
| 661 | 654 | } |
|---|
| 655 | + |
|---|
| 656 | + /* |
|---|
| 657 | + * Step 2: merge the returned slots with the preceding slots, if |
|---|
| 658 | + * available (non zero) |
|---|
| 659 | + */ |
|---|
| 660 | + for (i = index - 1; |
|---|
| 661 | + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; |
|---|
| 662 | + i--) |
|---|
| 663 | + io_tlb_list[i] = ++count; |
|---|
| 664 | + io_tlb_used -= nslots; |
|---|
| 662 | 665 | spin_unlock_irqrestore(&io_tlb_lock, flags); |
|---|
| 663 | 666 | } |
|---|
| 664 | 667 | |
|---|
| .. | .. |
|---|
| 671 | 674 | |
|---|
| 672 | 675 | if (orig_addr == INVALID_PHYS_ADDR) |
|---|
| 673 | 676 | return; |
|---|
| 674 | | - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); |
|---|
| 677 | + |
|---|
| 678 | + orig_addr += (tlb_addr & (IO_TLB_SIZE - 1)) - |
|---|
| 679 | + swiotlb_align_offset(hwdev, orig_addr); |
|---|
| 675 | 680 | |
|---|
| 676 | 681 | switch (target) { |
|---|
| 677 | 682 | case SYNC_FOR_CPU: |
|---|
| .. | .. |
|---|
| 693 | 698 | } |
|---|
| 694 | 699 | } |
|---|
| 695 | 700 | |
|---|
| 696 | | -static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, |
|---|
| 697 | | - size_t size) |
|---|
| 698 | | -{ |
|---|
| 699 | | - u64 mask = DMA_BIT_MASK(32); |
|---|
| 700 | | - |
|---|
| 701 | | - if (dev && dev->coherent_dma_mask) |
|---|
| 702 | | - mask = dev->coherent_dma_mask; |
|---|
| 703 | | - return addr + size - 1 <= mask; |
|---|
| 704 | | -} |
|---|
| 705 | | - |
|---|
| 706 | | -static void * |
|---|
| 707 | | -swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, |
|---|
| 708 | | - unsigned long attrs) |
|---|
| 709 | | -{ |
|---|
| 710 | | - phys_addr_t phys_addr; |
|---|
| 711 | | - |
|---|
| 712 | | - if (swiotlb_force == SWIOTLB_NO_FORCE) |
|---|
| 713 | | - goto out_warn; |
|---|
| 714 | | - |
|---|
| 715 | | - phys_addr = swiotlb_tbl_map_single(dev, |
|---|
| 716 | | - __phys_to_dma(dev, io_tlb_start), |
|---|
| 717 | | - 0, size, DMA_FROM_DEVICE, attrs); |
|---|
| 718 | | - if (phys_addr == SWIOTLB_MAP_ERROR) |
|---|
| 719 | | - goto out_warn; |
|---|
| 720 | | - |
|---|
| 721 | | - *dma_handle = __phys_to_dma(dev, phys_addr); |
|---|
| 722 | | - if (!dma_coherent_ok(dev, *dma_handle, size)) |
|---|
| 723 | | - goto out_unmap; |
|---|
| 724 | | - |
|---|
| 725 | | - memset(phys_to_virt(phys_addr), 0, size); |
|---|
| 726 | | - return phys_to_virt(phys_addr); |
|---|
| 727 | | - |
|---|
| 728 | | -out_unmap: |
|---|
| 729 | | - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", |
|---|
| 730 | | - (unsigned long long)dev->coherent_dma_mask, |
|---|
| 731 | | - (unsigned long long)*dma_handle); |
|---|
| 732 | | - |
|---|
| 733 | | - /* |
|---|
| 734 | | - * DMA_TO_DEVICE to avoid memcpy in unmap_single. |
|---|
| 735 | | - * DMA_ATTR_SKIP_CPU_SYNC is optional. |
|---|
| 736 | | - */ |
|---|
| 737 | | - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, |
|---|
| 738 | | - DMA_ATTR_SKIP_CPU_SYNC); |
|---|
| 739 | | -out_warn: |
|---|
| 740 | | - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { |
|---|
| 741 | | - dev_warn(dev, |
|---|
| 742 | | - "swiotlb: coherent allocation failed, size=%zu\n", |
|---|
| 743 | | - size); |
|---|
| 744 | | - dump_stack(); |
|---|
| 745 | | - } |
|---|
| 746 | | - return NULL; |
|---|
| 747 | | -} |
|---|
| 748 | | - |
|---|
| 749 | | -static bool swiotlb_free_buffer(struct device *dev, size_t size, |
|---|
| 750 | | - dma_addr_t dma_addr) |
|---|
| 751 | | -{ |
|---|
| 752 | | - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); |
|---|
| 753 | | - |
|---|
| 754 | | - WARN_ON_ONCE(irqs_disabled()); |
|---|
| 755 | | - |
|---|
| 756 | | - if (!is_swiotlb_buffer(phys_addr)) |
|---|
| 757 | | - return false; |
|---|
| 758 | | - |
|---|
| 759 | | - /* |
|---|
| 760 | | - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. |
|---|
| 761 | | - * DMA_ATTR_SKIP_CPU_SYNC is optional. |
|---|
| 762 | | - */ |
|---|
| 763 | | - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, |
|---|
| 764 | | - DMA_ATTR_SKIP_CPU_SYNC); |
|---|
| 765 | | - return true; |
|---|
| 766 | | -} |
|---|
| 767 | | - |
|---|
| 768 | 701 | /* |
|---|
| 769 | | - * Map a single buffer of the indicated size for DMA in streaming mode. The |
|---|
| 770 | | - * physical address to use is returned. |
|---|
| 771 | | - * |
|---|
| 772 | | - * Once the device is given the dma address, the device owns this memory until |
|---|
| 773 | | - * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. |
|---|
| 702 | + * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing |
|---|
| 703 | + * to the device copy the data into it as well. |
|---|
| 774 | 704 | */ |
|---|
| 775 | | -dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, |
|---|
| 776 | | - unsigned long offset, size_t size, |
|---|
| 777 | | - enum dma_data_direction dir, |
|---|
| 778 | | - unsigned long attrs) |
|---|
| 705 | +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, |
|---|
| 706 | + enum dma_data_direction dir, unsigned long attrs) |
|---|
| 779 | 707 | { |
|---|
| 780 | | - phys_addr_t map, phys = page_to_phys(page) + offset; |
|---|
| 781 | | - dma_addr_t dev_addr = phys_to_dma(dev, phys); |
|---|
| 708 | + phys_addr_t swiotlb_addr; |
|---|
| 709 | + dma_addr_t dma_addr; |
|---|
| 782 | 710 | |
|---|
| 783 | | - BUG_ON(dir == DMA_NONE); |
|---|
| 784 | | - /* |
|---|
| 785 | | - * If the address happens to be in the device's DMA window, |
|---|
| 786 | | - * we can safely return the device addr and not worry about bounce |
|---|
| 787 | | - * buffering it. |
|---|
| 788 | | - */ |
|---|
| 789 | | - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) |
|---|
| 790 | | - return dev_addr; |
|---|
| 711 | + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, |
|---|
| 712 | + swiotlb_force); |
|---|
| 791 | 713 | |
|---|
| 792 | | - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); |
|---|
| 793 | | - |
|---|
| 794 | | - /* Oh well, have to allocate and map a bounce buffer. */ |
|---|
| 795 | | - map = map_single(dev, phys, size, dir, attrs); |
|---|
| 796 | | - if (map == SWIOTLB_MAP_ERROR) |
|---|
| 797 | | - return __phys_to_dma(dev, io_tlb_overflow_buffer); |
|---|
| 798 | | - |
|---|
| 799 | | - dev_addr = __phys_to_dma(dev, map); |
|---|
| 714 | + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, |
|---|
| 715 | + attrs); |
|---|
| 716 | + if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) |
|---|
| 717 | + return DMA_MAPPING_ERROR; |
|---|
| 800 | 718 | |
|---|
| 801 | 719 | /* Ensure that the address returned is DMA'ble */ |
|---|
| 802 | | - if (dma_capable(dev, dev_addr, size)) |
|---|
| 803 | | - return dev_addr; |
|---|
| 804 | | - |
|---|
| 805 | | - attrs |= DMA_ATTR_SKIP_CPU_SYNC; |
|---|
| 806 | | - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); |
|---|
| 807 | | - |
|---|
| 808 | | - return __phys_to_dma(dev, io_tlb_overflow_buffer); |
|---|
| 809 | | -} |
|---|
| 810 | | - |
|---|
| 811 | | -/* |
|---|
| 812 | | - * Unmap a single streaming mode DMA translation. The dma_addr and size must |
|---|
| 813 | | - * match what was provided for in a previous swiotlb_map_page call. All |
|---|
| 814 | | - * other usages are undefined. |
|---|
| 815 | | - * |
|---|
| 816 | | - * After this call, reads by the cpu to the buffer are guaranteed to see |
|---|
| 817 | | - * whatever the device wrote there. |
|---|
| 818 | | - */ |
|---|
| 819 | | -static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, |
|---|
| 820 | | - size_t size, enum dma_data_direction dir, |
|---|
| 821 | | - unsigned long attrs) |
|---|
| 822 | | -{ |
|---|
| 823 | | - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
|---|
| 824 | | - |
|---|
| 825 | | - BUG_ON(dir == DMA_NONE); |
|---|
| 826 | | - |
|---|
| 827 | | - if (is_swiotlb_buffer(paddr)) { |
|---|
| 828 | | - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); |
|---|
| 829 | | - return; |
|---|
| 720 | + dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); |
|---|
| 721 | + if (unlikely(!dma_capable(dev, dma_addr, size, true))) { |
|---|
| 722 | + swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, |
|---|
| 723 | + attrs | DMA_ATTR_SKIP_CPU_SYNC); |
|---|
| 724 | + dev_WARN_ONCE(dev, 1, |
|---|
| 725 | + "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", |
|---|
| 726 | + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); |
|---|
| 727 | + return DMA_MAPPING_ERROR; |
|---|
| 830 | 728 | } |
|---|
| 831 | 729 | |
|---|
| 832 | | - if (dir != DMA_FROM_DEVICE) |
|---|
| 833 | | - return; |
|---|
| 730 | + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) |
|---|
| 731 | + arch_sync_dma_for_device(swiotlb_addr, size, dir); |
|---|
| 732 | + return dma_addr; |
|---|
| 733 | +} |
|---|
| 734 | + |
|---|
| 735 | +size_t swiotlb_max_mapping_size(struct device *dev) |
|---|
| 736 | +{ |
|---|
| 737 | + int min_align_mask = dma_get_min_align_mask(dev); |
|---|
| 738 | + int min_align = 0; |
|---|
| 834 | 739 | |
|---|
| 835 | 740 | /* |
|---|
| 836 | | - * phys_to_virt doesn't work with hihgmem page but we could |
|---|
| 837 | | - * call dma_mark_clean() with hihgmem page here. However, we |
|---|
| 838 | | - * are fine since dma_mark_clean() is null on POWERPC. We can |
|---|
| 839 | | - * make dma_mark_clean() take a physical address if necessary. |
|---|
| 741 | + * swiotlb_find_slots() skips slots according to |
|---|
| 742 | + * min align mask. This affects max mapping size. |
|---|
| 743 | + * Take it into acount here. |
|---|
| 840 | 744 | */ |
|---|
| 841 | | - dma_mark_clean(phys_to_virt(paddr), size); |
|---|
| 745 | + if (min_align_mask) |
|---|
| 746 | + min_align = roundup(min_align_mask, IO_TLB_SIZE); |
|---|
| 747 | + |
|---|
| 748 | + return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; |
|---|
| 842 | 749 | } |
|---|
| 843 | 750 | |
|---|
| 844 | | -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, |
|---|
| 845 | | - size_t size, enum dma_data_direction dir, |
|---|
| 846 | | - unsigned long attrs) |
|---|
| 751 | +bool is_swiotlb_active(void) |
|---|
| 847 | 752 | { |
|---|
| 848 | | - unmap_single(hwdev, dev_addr, size, dir, attrs); |
|---|
| 849 | | -} |
|---|
| 850 | | - |
|---|
| 851 | | -/* |
|---|
| 852 | | - * Make physical memory consistent for a single streaming mode DMA translation |
|---|
| 853 | | - * after a transfer. |
|---|
| 854 | | - * |
|---|
| 855 | | - * If you perform a swiotlb_map_page() but wish to interrogate the buffer |
|---|
| 856 | | - * using the cpu, yet do not wish to teardown the dma mapping, you must |
|---|
| 857 | | - * call this function before doing so. At the next point you give the dma |
|---|
| 858 | | - * address back to the card, you must first perform a |
|---|
| 859 | | - * swiotlb_dma_sync_for_device, and then the device again owns the buffer |
|---|
| 860 | | - */ |
|---|
| 861 | | -static void |
|---|
| 862 | | -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, |
|---|
| 863 | | - size_t size, enum dma_data_direction dir, |
|---|
| 864 | | - enum dma_sync_target target) |
|---|
| 865 | | -{ |
|---|
| 866 | | - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
|---|
| 867 | | - |
|---|
| 868 | | - BUG_ON(dir == DMA_NONE); |
|---|
| 869 | | - |
|---|
| 870 | | - if (is_swiotlb_buffer(paddr)) { |
|---|
| 871 | | - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); |
|---|
| 872 | | - return; |
|---|
| 873 | | - } |
|---|
| 874 | | - |
|---|
| 875 | | - if (dir != DMA_FROM_DEVICE) |
|---|
| 876 | | - return; |
|---|
| 877 | | - |
|---|
| 878 | | - dma_mark_clean(phys_to_virt(paddr), size); |
|---|
| 879 | | -} |
|---|
| 880 | | - |
|---|
| 881 | | -void |
|---|
| 882 | | -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, |
|---|
| 883 | | - size_t size, enum dma_data_direction dir) |
|---|
| 884 | | -{ |
|---|
| 885 | | - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); |
|---|
| 886 | | -} |
|---|
| 887 | | - |
|---|
| 888 | | -void |
|---|
| 889 | | -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, |
|---|
| 890 | | - size_t size, enum dma_data_direction dir) |
|---|
| 891 | | -{ |
|---|
| 892 | | - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); |
|---|
| 893 | | -} |
|---|
| 894 | | - |
|---|
| 895 | | -/* |
|---|
| 896 | | - * Map a set of buffers described by scatterlist in streaming mode for DMA. |
|---|
| 897 | | - * This is the scatter-gather version of the above swiotlb_map_page |
|---|
| 898 | | - * interface. Here the scatter gather list elements are each tagged with the |
|---|
| 899 | | - * appropriate dma address and length. They are obtained via |
|---|
| 900 | | - * sg_dma_{address,length}(SG). |
|---|
| 901 | | - * |
|---|
| 902 | | - * NOTE: An implementation may be able to use a smaller number of |
|---|
| 903 | | - * DMA address/length pairs than there are SG table elements. |
|---|
| 904 | | - * (for example via virtual mapping capabilities) |
|---|
| 905 | | - * The routine returns the number of addr/length pairs actually |
|---|
| 906 | | - * used, at most nents. |
|---|
| 907 | | - * |
|---|
| 908 | | - * Device ownership issues as mentioned above for swiotlb_map_page are the |
|---|
| 909 | | - * same here. |
|---|
| 910 | | - */ |
|---|
| 911 | | -int |
|---|
| 912 | | -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, |
|---|
| 913 | | - enum dma_data_direction dir, unsigned long attrs) |
|---|
| 914 | | -{ |
|---|
| 915 | | - struct scatterlist *sg; |
|---|
| 916 | | - int i; |
|---|
| 917 | | - |
|---|
| 918 | | - BUG_ON(dir == DMA_NONE); |
|---|
| 919 | | - |
|---|
| 920 | | - for_each_sg(sgl, sg, nelems, i) { |
|---|
| 921 | | - phys_addr_t paddr = sg_phys(sg); |
|---|
| 922 | | - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); |
|---|
| 923 | | - |
|---|
| 924 | | - if (swiotlb_force == SWIOTLB_FORCE || |
|---|
| 925 | | - !dma_capable(hwdev, dev_addr, sg->length)) { |
|---|
| 926 | | - phys_addr_t map = map_single(hwdev, sg_phys(sg), |
|---|
| 927 | | - sg->length, dir, attrs); |
|---|
| 928 | | - if (map == SWIOTLB_MAP_ERROR) { |
|---|
| 929 | | - /* Don't panic here, we expect map_sg users |
|---|
| 930 | | - to do proper error handling. */ |
|---|
| 931 | | - attrs |= DMA_ATTR_SKIP_CPU_SYNC; |
|---|
| 932 | | - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, |
|---|
| 933 | | - attrs); |
|---|
| 934 | | - sg_dma_len(sgl) = 0; |
|---|
| 935 | | - return 0; |
|---|
| 936 | | - } |
|---|
| 937 | | - sg->dma_address = __phys_to_dma(hwdev, map); |
|---|
| 938 | | - } else |
|---|
| 939 | | - sg->dma_address = dev_addr; |
|---|
| 940 | | - sg_dma_len(sg) = sg->length; |
|---|
| 941 | | - } |
|---|
| 942 | | - return nelems; |
|---|
| 943 | | -} |
|---|
| 944 | | - |
|---|
| 945 | | -/* |
|---|
| 946 | | - * Unmap a set of streaming mode DMA translations. Again, cpu read rules |
|---|
| 947 | | - * concerning calls here are the same as for swiotlb_unmap_page() above. |
|---|
| 948 | | - */ |
|---|
| 949 | | -void |
|---|
| 950 | | -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, |
|---|
| 951 | | - int nelems, enum dma_data_direction dir, |
|---|
| 952 | | - unsigned long attrs) |
|---|
| 953 | | -{ |
|---|
| 954 | | - struct scatterlist *sg; |
|---|
| 955 | | - int i; |
|---|
| 956 | | - |
|---|
| 957 | | - BUG_ON(dir == DMA_NONE); |
|---|
| 958 | | - |
|---|
| 959 | | - for_each_sg(sgl, sg, nelems, i) |
|---|
| 960 | | - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, |
|---|
| 961 | | - attrs); |
|---|
| 962 | | -} |
|---|
| 963 | | - |
|---|
| 964 | | -/* |
|---|
| 965 | | - * Make physical memory consistent for a set of streaming mode DMA translations |
|---|
| 966 | | - * after a transfer. |
|---|
| 967 | | - * |
|---|
| 968 | | - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules |
|---|
| 969 | | - * and usage. |
|---|
| 970 | | - */ |
|---|
| 971 | | -static void |
|---|
| 972 | | -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, |
|---|
| 973 | | - int nelems, enum dma_data_direction dir, |
|---|
| 974 | | - enum dma_sync_target target) |
|---|
| 975 | | -{ |
|---|
| 976 | | - struct scatterlist *sg; |
|---|
| 977 | | - int i; |
|---|
| 978 | | - |
|---|
| 979 | | - for_each_sg(sgl, sg, nelems, i) |
|---|
| 980 | | - swiotlb_sync_single(hwdev, sg->dma_address, |
|---|
| 981 | | - sg_dma_len(sg), dir, target); |
|---|
| 982 | | -} |
|---|
| 983 | | - |
|---|
| 984 | | -void |
|---|
| 985 | | -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, |
|---|
| 986 | | - int nelems, enum dma_data_direction dir) |
|---|
| 987 | | -{ |
|---|
| 988 | | - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); |
|---|
| 989 | | -} |
|---|
| 990 | | - |
|---|
| 991 | | -void |
|---|
| 992 | | -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, |
|---|
| 993 | | - int nelems, enum dma_data_direction dir) |
|---|
| 994 | | -{ |
|---|
| 995 | | - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); |
|---|
| 996 | | -} |
|---|
| 997 | | - |
|---|
| 998 | | -int |
|---|
| 999 | | -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) |
|---|
| 1000 | | -{ |
|---|
| 1001 | | - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); |
|---|
| 1002 | | -} |
|---|
| 1003 | | - |
|---|
| 1004 | | -/* |
|---|
| 1005 | | - * Return whether the given device DMA address mask can be supported |
|---|
| 1006 | | - * properly. For example, if your device can only drive the low 24-bits |
|---|
| 1007 | | - * during bus mastering, then you would pass 0x00ffffff as the mask to |
|---|
| 1008 | | - * this function. |
|---|
| 1009 | | - */ |
|---|
| 1010 | | -int |
|---|
| 1011 | | -swiotlb_dma_supported(struct device *hwdev, u64 mask) |
|---|
| 1012 | | -{ |
|---|
| 1013 | | - return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; |
|---|
| 1014 | | -} |
|---|
| 1015 | | - |
|---|
| 1016 | | -void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, |
|---|
| 1017 | | - gfp_t gfp, unsigned long attrs) |
|---|
| 1018 | | -{ |
|---|
| 1019 | | - void *vaddr; |
|---|
| 1020 | | - |
|---|
| 1021 | | - /* temporary workaround: */ |
|---|
| 1022 | | - if (gfp & __GFP_NOWARN) |
|---|
| 1023 | | - attrs |= DMA_ATTR_NO_WARN; |
|---|
| 1024 | | - |
|---|
| 1025 | 753 | /* |
|---|
| 1026 | | - * Don't print a warning when the first allocation attempt fails. |
|---|
| 1027 | | - * swiotlb_alloc_coherent() will print a warning when the DMA memory |
|---|
| 1028 | | - * allocation ultimately failed. |
|---|
| 754 | + * When SWIOTLB is initialized, even if io_tlb_start points to physical |
|---|
| 755 | + * address zero, io_tlb_end surely doesn't. |
|---|
| 1029 | 756 | */ |
|---|
| 1030 | | - gfp |= __GFP_NOWARN; |
|---|
| 1031 | | - |
|---|
| 1032 | | - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); |
|---|
| 1033 | | - if (!vaddr) |
|---|
| 1034 | | - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); |
|---|
| 1035 | | - return vaddr; |
|---|
| 757 | + return io_tlb_end != 0; |
|---|
| 1036 | 758 | } |
|---|
| 1037 | 759 | |
|---|
| 1038 | | -void swiotlb_free(struct device *dev, size_t size, void *vaddr, |
|---|
| 1039 | | - dma_addr_t dma_addr, unsigned long attrs) |
|---|
| 760 | +#ifdef CONFIG_DEBUG_FS |
|---|
| 761 | + |
|---|
| 762 | +static int __init swiotlb_create_debugfs(void) |
|---|
| 1040 | 763 | { |
|---|
| 1041 | | - if (!swiotlb_free_buffer(dev, size, dma_addr)) |
|---|
| 1042 | | - dma_direct_free(dev, size, vaddr, dma_addr, attrs); |
|---|
| 764 | + struct dentry *root; |
|---|
| 765 | + |
|---|
| 766 | + root = debugfs_create_dir("swiotlb", NULL); |
|---|
| 767 | + debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs); |
|---|
| 768 | + debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used); |
|---|
| 769 | + return 0; |
|---|
| 1043 | 770 | } |
|---|
| 1044 | 771 | |
|---|
| 1045 | | -const struct dma_map_ops swiotlb_dma_ops = { |
|---|
| 1046 | | - .mapping_error = swiotlb_dma_mapping_error, |
|---|
| 1047 | | - .alloc = swiotlb_alloc, |
|---|
| 1048 | | - .free = swiotlb_free, |
|---|
| 1049 | | - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, |
|---|
| 1050 | | - .sync_single_for_device = swiotlb_sync_single_for_device, |
|---|
| 1051 | | - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, |
|---|
| 1052 | | - .sync_sg_for_device = swiotlb_sync_sg_for_device, |
|---|
| 1053 | | - .map_sg = swiotlb_map_sg_attrs, |
|---|
| 1054 | | - .unmap_sg = swiotlb_unmap_sg_attrs, |
|---|
| 1055 | | - .map_page = swiotlb_map_page, |
|---|
| 1056 | | - .unmap_page = swiotlb_unmap_page, |
|---|
| 1057 | | - .dma_supported = dma_direct_supported, |
|---|
| 1058 | | -}; |
|---|
| 1059 | | -EXPORT_SYMBOL(swiotlb_dma_ops); |
|---|
| 772 | +late_initcall(swiotlb_create_debugfs); |
|---|
| 773 | + |
|---|
| 774 | +#endif |
|---|