| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 2 | +#include <crypto/hash.h> |
|---|
| 1 | 3 | #include <linux/export.h> |
|---|
| 2 | 4 | #include <linux/bvec.h> |
|---|
| 5 | +#include <linux/fault-inject-usercopy.h> |
|---|
| 3 | 6 | #include <linux/uio.h> |
|---|
| 4 | 7 | #include <linux/pagemap.h> |
|---|
| 5 | 8 | #include <linux/slab.h> |
|---|
| 6 | 9 | #include <linux/vmalloc.h> |
|---|
| 7 | 10 | #include <linux/splice.h> |
|---|
| 11 | +#include <linux/compat.h> |
|---|
| 8 | 12 | #include <net/checksum.h> |
|---|
| 13 | +#include <linux/scatterlist.h> |
|---|
| 14 | +#include <linux/instrumented.h> |
|---|
| 9 | 15 | |
|---|
| 10 | 16 | #define PIPE_PARANOIA /* for now */ |
|---|
| 11 | 17 | |
|---|
| .. | .. |
|---|
| 83 | 89 | const struct kvec *kvec; \ |
|---|
| 84 | 90 | struct kvec v; \ |
|---|
| 85 | 91 | iterate_kvec(i, n, v, kvec, skip, (K)) \ |
|---|
| 92 | + } else if (unlikely(i->type & ITER_DISCARD)) { \ |
|---|
| 86 | 93 | } else { \ |
|---|
| 87 | 94 | const struct iovec *iov; \ |
|---|
| 88 | 95 | struct iovec v; \ |
|---|
| .. | .. |
|---|
| 114 | 121 | } \ |
|---|
| 115 | 122 | i->nr_segs -= kvec - i->kvec; \ |
|---|
| 116 | 123 | i->kvec = kvec; \ |
|---|
| 124 | + } else if (unlikely(i->type & ITER_DISCARD)) { \ |
|---|
| 125 | + skip += n; \ |
|---|
| 117 | 126 | } else { \ |
|---|
| 118 | 127 | const struct iovec *iov; \ |
|---|
| 119 | 128 | struct iovec v; \ |
|---|
| .. | .. |
|---|
| 132 | 141 | |
|---|
| 133 | 142 | static int copyout(void __user *to, const void *from, size_t n) |
|---|
| 134 | 143 | { |
|---|
| 135 | | - if (access_ok(VERIFY_WRITE, to, n)) { |
|---|
| 136 | | - kasan_check_read(from, n); |
|---|
| 144 | + if (should_fail_usercopy()) |
|---|
| 145 | + return n; |
|---|
| 146 | + if (access_ok(to, n)) { |
|---|
| 147 | + instrument_copy_to_user(to, from, n); |
|---|
| 137 | 148 | n = raw_copy_to_user(to, from, n); |
|---|
| 138 | 149 | } |
|---|
| 139 | 150 | return n; |
|---|
| .. | .. |
|---|
| 141 | 152 | |
|---|
| 142 | 153 | static int copyin(void *to, const void __user *from, size_t n) |
|---|
| 143 | 154 | { |
|---|
| 144 | | - if (access_ok(VERIFY_READ, from, n)) { |
|---|
| 145 | | - kasan_check_write(to, n); |
|---|
| 155 | + if (should_fail_usercopy()) |
|---|
| 156 | + return n; |
|---|
| 157 | + if (access_ok(from, n)) { |
|---|
| 158 | + instrument_copy_from_user(to, from, n); |
|---|
| 146 | 159 | n = raw_copy_from_user(to, from, n); |
|---|
| 147 | 160 | } |
|---|
| 148 | 161 | return n; |
|---|
| .. | .. |
|---|
| 320 | 333 | static bool sanity(const struct iov_iter *i) |
|---|
| 321 | 334 | { |
|---|
| 322 | 335 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 323 | | - int idx = i->idx; |
|---|
| 324 | | - int next = pipe->curbuf + pipe->nrbufs; |
|---|
| 336 | + unsigned int p_head = pipe->head; |
|---|
| 337 | + unsigned int p_tail = pipe->tail; |
|---|
| 338 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 339 | + unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); |
|---|
| 340 | + unsigned int i_head = i->head; |
|---|
| 341 | + unsigned int idx; |
|---|
| 342 | + |
|---|
| 325 | 343 | if (i->iov_offset) { |
|---|
| 326 | 344 | struct pipe_buffer *p; |
|---|
| 327 | | - if (unlikely(!pipe->nrbufs)) |
|---|
| 345 | + if (unlikely(p_occupancy == 0)) |
|---|
| 328 | 346 | goto Bad; // pipe must be non-empty |
|---|
| 329 | | - if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) |
|---|
| 347 | + if (unlikely(i_head != p_head - 1)) |
|---|
| 330 | 348 | goto Bad; // must be at the last buffer... |
|---|
| 331 | 349 | |
|---|
| 332 | | - p = &pipe->bufs[idx]; |
|---|
| 350 | + p = &pipe->bufs[i_head & p_mask]; |
|---|
| 333 | 351 | if (unlikely(p->offset + p->len != i->iov_offset)) |
|---|
| 334 | 352 | goto Bad; // ... at the end of segment |
|---|
| 335 | 353 | } else { |
|---|
| 336 | | - if (idx != (next & (pipe->buffers - 1))) |
|---|
| 354 | + if (i_head != p_head) |
|---|
| 337 | 355 | goto Bad; // must be right after the last buffer |
|---|
| 338 | 356 | } |
|---|
| 339 | 357 | return true; |
|---|
| 340 | 358 | Bad: |
|---|
| 341 | | - printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); |
|---|
| 342 | | - printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", |
|---|
| 343 | | - pipe->curbuf, pipe->nrbufs, pipe->buffers); |
|---|
| 344 | | - for (idx = 0; idx < pipe->buffers; idx++) |
|---|
| 359 | + printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); |
|---|
| 360 | + printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", |
|---|
| 361 | + p_head, p_tail, pipe->ring_size); |
|---|
| 362 | + for (idx = 0; idx < pipe->ring_size; idx++) |
|---|
| 345 | 363 | printk(KERN_ERR "[%p %p %d %d]\n", |
|---|
| 346 | 364 | pipe->bufs[idx].ops, |
|---|
| 347 | 365 | pipe->bufs[idx].page, |
|---|
| .. | .. |
|---|
| 354 | 372 | #define sanity(i) true |
|---|
| 355 | 373 | #endif |
|---|
| 356 | 374 | |
|---|
| 357 | | -static inline int next_idx(int idx, struct pipe_inode_info *pipe) |
|---|
| 358 | | -{ |
|---|
| 359 | | - return (idx + 1) & (pipe->buffers - 1); |
|---|
| 360 | | -} |
|---|
| 361 | | - |
|---|
| 362 | 375 | static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, |
|---|
| 363 | 376 | struct iov_iter *i) |
|---|
| 364 | 377 | { |
|---|
| 365 | 378 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 366 | 379 | struct pipe_buffer *buf; |
|---|
| 380 | + unsigned int p_tail = pipe->tail; |
|---|
| 381 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 382 | + unsigned int i_head = i->head; |
|---|
| 367 | 383 | size_t off; |
|---|
| 368 | | - int idx; |
|---|
| 369 | 384 | |
|---|
| 370 | 385 | if (unlikely(bytes > i->count)) |
|---|
| 371 | 386 | bytes = i->count; |
|---|
| .. | .. |
|---|
| 377 | 392 | return 0; |
|---|
| 378 | 393 | |
|---|
| 379 | 394 | off = i->iov_offset; |
|---|
| 380 | | - idx = i->idx; |
|---|
| 381 | | - buf = &pipe->bufs[idx]; |
|---|
| 395 | + buf = &pipe->bufs[i_head & p_mask]; |
|---|
| 382 | 396 | if (off) { |
|---|
| 383 | 397 | if (offset == off && buf->page == page) { |
|---|
| 384 | 398 | /* merge with the last one */ |
|---|
| .. | .. |
|---|
| 386 | 400 | i->iov_offset += bytes; |
|---|
| 387 | 401 | goto out; |
|---|
| 388 | 402 | } |
|---|
| 389 | | - idx = next_idx(idx, pipe); |
|---|
| 390 | | - buf = &pipe->bufs[idx]; |
|---|
| 403 | + i_head++; |
|---|
| 404 | + buf = &pipe->bufs[i_head & p_mask]; |
|---|
| 391 | 405 | } |
|---|
| 392 | | - if (idx == pipe->curbuf && pipe->nrbufs) |
|---|
| 406 | + if (pipe_full(i_head, p_tail, pipe->max_usage)) |
|---|
| 393 | 407 | return 0; |
|---|
| 394 | | - pipe->nrbufs++; |
|---|
| 408 | + |
|---|
| 395 | 409 | buf->ops = &page_cache_pipe_buf_ops; |
|---|
| 396 | 410 | buf->flags = 0; |
|---|
| 397 | | - get_page(buf->page = page); |
|---|
| 411 | + get_page(page); |
|---|
| 412 | + buf->page = page; |
|---|
| 398 | 413 | buf->offset = offset; |
|---|
| 399 | 414 | buf->len = bytes; |
|---|
| 415 | + |
|---|
| 416 | + pipe->head = i_head + 1; |
|---|
| 400 | 417 | i->iov_offset = offset + bytes; |
|---|
| 401 | | - i->idx = idx; |
|---|
| 418 | + i->head = i_head; |
|---|
| 402 | 419 | out: |
|---|
| 403 | 420 | i->count -= bytes; |
|---|
| 404 | 421 | return bytes; |
|---|
| .. | .. |
|---|
| 429 | 446 | } |
|---|
| 430 | 447 | EXPORT_SYMBOL(iov_iter_fault_in_readable); |
|---|
| 431 | 448 | |
|---|
| 432 | | -void iov_iter_init(struct iov_iter *i, int direction, |
|---|
| 449 | +void iov_iter_init(struct iov_iter *i, unsigned int direction, |
|---|
| 433 | 450 | const struct iovec *iov, unsigned long nr_segs, |
|---|
| 434 | 451 | size_t count) |
|---|
| 435 | 452 | { |
|---|
| 453 | + WARN_ON(direction & ~(READ | WRITE)); |
|---|
| 454 | + direction &= READ | WRITE; |
|---|
| 455 | + |
|---|
| 436 | 456 | /* It will get better. Eventually... */ |
|---|
| 437 | 457 | if (uaccess_kernel()) { |
|---|
| 438 | | - direction |= ITER_KVEC; |
|---|
| 439 | | - i->type = direction; |
|---|
| 458 | + i->type = ITER_KVEC | direction; |
|---|
| 440 | 459 | i->kvec = (struct kvec *)iov; |
|---|
| 441 | 460 | } else { |
|---|
| 442 | | - i->type = direction; |
|---|
| 461 | + i->type = ITER_IOVEC | direction; |
|---|
| 443 | 462 | i->iov = iov; |
|---|
| 444 | 463 | } |
|---|
| 445 | 464 | i->nr_segs = nr_segs; |
|---|
| .. | .. |
|---|
| 447 | 466 | i->count = count; |
|---|
| 448 | 467 | } |
|---|
| 449 | 468 | EXPORT_SYMBOL(iov_iter_init); |
|---|
| 450 | | - |
|---|
| 451 | | -static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) |
|---|
| 452 | | -{ |
|---|
| 453 | | - char *from = kmap_atomic(page); |
|---|
| 454 | | - memcpy(to, from + offset, len); |
|---|
| 455 | | - kunmap_atomic(from); |
|---|
| 456 | | -} |
|---|
| 457 | | - |
|---|
| 458 | | -static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) |
|---|
| 459 | | -{ |
|---|
| 460 | | - char *to = kmap_atomic(page); |
|---|
| 461 | | - memcpy(to + offset, from, len); |
|---|
| 462 | | - kunmap_atomic(to); |
|---|
| 463 | | -} |
|---|
| 464 | 469 | |
|---|
| 465 | 470 | static void memzero_page(struct page *page, size_t offset, size_t len) |
|---|
| 466 | 471 | { |
|---|
| .. | .. |
|---|
| 474 | 479 | return buf->ops == &default_pipe_buf_ops; |
|---|
| 475 | 480 | } |
|---|
| 476 | 481 | |
|---|
| 477 | | -static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) |
|---|
| 482 | +static inline void data_start(const struct iov_iter *i, |
|---|
| 483 | + unsigned int *iter_headp, size_t *offp) |
|---|
| 478 | 484 | { |
|---|
| 485 | + unsigned int p_mask = i->pipe->ring_size - 1; |
|---|
| 486 | + unsigned int iter_head = i->head; |
|---|
| 479 | 487 | size_t off = i->iov_offset; |
|---|
| 480 | | - int idx = i->idx; |
|---|
| 481 | | - if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { |
|---|
| 482 | | - idx = next_idx(idx, i->pipe); |
|---|
| 488 | + |
|---|
| 489 | + if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || |
|---|
| 490 | + off == PAGE_SIZE)) { |
|---|
| 491 | + iter_head++; |
|---|
| 483 | 492 | off = 0; |
|---|
| 484 | 493 | } |
|---|
| 485 | | - *idxp = idx; |
|---|
| 494 | + *iter_headp = iter_head; |
|---|
| 486 | 495 | *offp = off; |
|---|
| 487 | 496 | } |
|---|
| 488 | 497 | |
|---|
| 489 | 498 | static size_t push_pipe(struct iov_iter *i, size_t size, |
|---|
| 490 | | - int *idxp, size_t *offp) |
|---|
| 499 | + int *iter_headp, size_t *offp) |
|---|
| 491 | 500 | { |
|---|
| 492 | 501 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 502 | + unsigned int p_tail = pipe->tail; |
|---|
| 503 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 504 | + unsigned int iter_head; |
|---|
| 493 | 505 | size_t off; |
|---|
| 494 | | - int idx; |
|---|
| 495 | 506 | ssize_t left; |
|---|
| 496 | 507 | |
|---|
| 497 | 508 | if (unlikely(size > i->count)) |
|---|
| .. | .. |
|---|
| 500 | 511 | return 0; |
|---|
| 501 | 512 | |
|---|
| 502 | 513 | left = size; |
|---|
| 503 | | - data_start(i, &idx, &off); |
|---|
| 504 | | - *idxp = idx; |
|---|
| 514 | + data_start(i, &iter_head, &off); |
|---|
| 515 | + *iter_headp = iter_head; |
|---|
| 505 | 516 | *offp = off; |
|---|
| 506 | 517 | if (off) { |
|---|
| 507 | 518 | left -= PAGE_SIZE - off; |
|---|
| 508 | 519 | if (left <= 0) { |
|---|
| 509 | | - pipe->bufs[idx].len += size; |
|---|
| 520 | + pipe->bufs[iter_head & p_mask].len += size; |
|---|
| 510 | 521 | return size; |
|---|
| 511 | 522 | } |
|---|
| 512 | | - pipe->bufs[idx].len = PAGE_SIZE; |
|---|
| 513 | | - idx = next_idx(idx, pipe); |
|---|
| 523 | + pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; |
|---|
| 524 | + iter_head++; |
|---|
| 514 | 525 | } |
|---|
| 515 | | - while (idx != pipe->curbuf || !pipe->nrbufs) { |
|---|
| 526 | + while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { |
|---|
| 527 | + struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; |
|---|
| 516 | 528 | struct page *page = alloc_page(GFP_USER); |
|---|
| 517 | 529 | if (!page) |
|---|
| 518 | 530 | break; |
|---|
| 519 | | - pipe->nrbufs++; |
|---|
| 520 | | - pipe->bufs[idx].ops = &default_pipe_buf_ops; |
|---|
| 521 | | - pipe->bufs[idx].flags = 0; |
|---|
| 522 | | - pipe->bufs[idx].page = page; |
|---|
| 523 | | - pipe->bufs[idx].offset = 0; |
|---|
| 524 | | - if (left <= PAGE_SIZE) { |
|---|
| 525 | | - pipe->bufs[idx].len = left; |
|---|
| 531 | + |
|---|
| 532 | + buf->ops = &default_pipe_buf_ops; |
|---|
| 533 | + buf->flags = 0; |
|---|
| 534 | + buf->page = page; |
|---|
| 535 | + buf->offset = 0; |
|---|
| 536 | + buf->len = min_t(ssize_t, left, PAGE_SIZE); |
|---|
| 537 | + left -= buf->len; |
|---|
| 538 | + iter_head++; |
|---|
| 539 | + pipe->head = iter_head; |
|---|
| 540 | + |
|---|
| 541 | + if (left == 0) |
|---|
| 526 | 542 | return size; |
|---|
| 527 | | - } |
|---|
| 528 | | - pipe->bufs[idx].len = PAGE_SIZE; |
|---|
| 529 | | - left -= PAGE_SIZE; |
|---|
| 530 | | - idx = next_idx(idx, pipe); |
|---|
| 531 | 543 | } |
|---|
| 532 | 544 | return size - left; |
|---|
| 533 | 545 | } |
|---|
| .. | .. |
|---|
| 536 | 548 | struct iov_iter *i) |
|---|
| 537 | 549 | { |
|---|
| 538 | 550 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 551 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 552 | + unsigned int i_head; |
|---|
| 539 | 553 | size_t n, off; |
|---|
| 540 | | - int idx; |
|---|
| 541 | 554 | |
|---|
| 542 | 555 | if (!sanity(i)) |
|---|
| 543 | 556 | return 0; |
|---|
| 544 | 557 | |
|---|
| 545 | | - bytes = n = push_pipe(i, bytes, &idx, &off); |
|---|
| 558 | + bytes = n = push_pipe(i, bytes, &i_head, &off); |
|---|
| 546 | 559 | if (unlikely(!n)) |
|---|
| 547 | 560 | return 0; |
|---|
| 548 | | - for ( ; n; idx = next_idx(idx, pipe), off = 0) { |
|---|
| 561 | + do { |
|---|
| 549 | 562 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); |
|---|
| 550 | | - memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); |
|---|
| 551 | | - i->idx = idx; |
|---|
| 563 | + memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); |
|---|
| 564 | + i->head = i_head; |
|---|
| 552 | 565 | i->iov_offset = off + chunk; |
|---|
| 553 | 566 | n -= chunk; |
|---|
| 554 | 567 | addr += chunk; |
|---|
| 555 | | - } |
|---|
| 568 | + off = 0; |
|---|
| 569 | + i_head++; |
|---|
| 570 | + } while (n); |
|---|
| 556 | 571 | i->count -= bytes; |
|---|
| 572 | + return bytes; |
|---|
| 573 | +} |
|---|
| 574 | + |
|---|
| 575 | +static __wsum csum_and_memcpy(void *to, const void *from, size_t len, |
|---|
| 576 | + __wsum sum, size_t off) |
|---|
| 577 | +{ |
|---|
| 578 | + __wsum next = csum_partial_copy_nocheck(from, to, len); |
|---|
| 579 | + return csum_block_add(sum, next, off); |
|---|
| 580 | +} |
|---|
| 581 | + |
|---|
| 582 | +static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, |
|---|
| 583 | + struct csum_state *csstate, |
|---|
| 584 | + struct iov_iter *i) |
|---|
| 585 | +{ |
|---|
| 586 | + struct pipe_inode_info *pipe = i->pipe; |
|---|
| 587 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 588 | + __wsum sum = csstate->csum; |
|---|
| 589 | + size_t off = csstate->off; |
|---|
| 590 | + unsigned int i_head; |
|---|
| 591 | + size_t n, r; |
|---|
| 592 | + |
|---|
| 593 | + if (!sanity(i)) |
|---|
| 594 | + return 0; |
|---|
| 595 | + |
|---|
| 596 | + bytes = n = push_pipe(i, bytes, &i_head, &r); |
|---|
| 597 | + if (unlikely(!n)) |
|---|
| 598 | + return 0; |
|---|
| 599 | + do { |
|---|
| 600 | + size_t chunk = min_t(size_t, n, PAGE_SIZE - r); |
|---|
| 601 | + char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); |
|---|
| 602 | + sum = csum_and_memcpy(p + r, addr, chunk, sum, off); |
|---|
| 603 | + kunmap_atomic(p); |
|---|
| 604 | + i->head = i_head; |
|---|
| 605 | + i->iov_offset = r + chunk; |
|---|
| 606 | + n -= chunk; |
|---|
| 607 | + off += chunk; |
|---|
| 608 | + addr += chunk; |
|---|
| 609 | + r = 0; |
|---|
| 610 | + i_head++; |
|---|
| 611 | + } while (n); |
|---|
| 612 | + i->count -= bytes; |
|---|
| 613 | + csstate->csum = sum; |
|---|
| 614 | + csstate->off = off; |
|---|
| 557 | 615 | return bytes; |
|---|
| 558 | 616 | } |
|---|
| 559 | 617 | |
|---|
| 560 | 618 | size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 561 | 619 | { |
|---|
| 562 | 620 | const char *from = addr; |
|---|
| 563 | | - if (unlikely(i->type & ITER_PIPE)) |
|---|
| 621 | + if (unlikely(iov_iter_is_pipe(i))) |
|---|
| 564 | 622 | return copy_pipe_to_iter(addr, bytes, i); |
|---|
| 565 | 623 | if (iter_is_iovec(i)) |
|---|
| 566 | 624 | might_fault(); |
|---|
| .. | .. |
|---|
| 575 | 633 | } |
|---|
| 576 | 634 | EXPORT_SYMBOL(_copy_to_iter); |
|---|
| 577 | 635 | |
|---|
| 578 | | -#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE |
|---|
| 579 | | -static int copyout_mcsafe(void __user *to, const void *from, size_t n) |
|---|
| 636 | +#ifdef CONFIG_ARCH_HAS_COPY_MC |
|---|
| 637 | +static int copyout_mc(void __user *to, const void *from, size_t n) |
|---|
| 580 | 638 | { |
|---|
| 581 | | - if (access_ok(VERIFY_WRITE, to, n)) { |
|---|
| 582 | | - kasan_check_read(from, n); |
|---|
| 583 | | - n = copy_to_user_mcsafe((__force void *) to, from, n); |
|---|
| 639 | + if (access_ok(to, n)) { |
|---|
| 640 | + instrument_copy_to_user(to, from, n); |
|---|
| 641 | + n = copy_mc_to_user((__force void *) to, from, n); |
|---|
| 584 | 642 | } |
|---|
| 585 | 643 | return n; |
|---|
| 586 | 644 | } |
|---|
| 587 | 645 | |
|---|
| 588 | | -static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, |
|---|
| 646 | +static unsigned long copy_mc_to_page(struct page *page, size_t offset, |
|---|
| 589 | 647 | const char *from, size_t len) |
|---|
| 590 | 648 | { |
|---|
| 591 | 649 | unsigned long ret; |
|---|
| 592 | 650 | char *to; |
|---|
| 593 | 651 | |
|---|
| 594 | 652 | to = kmap_atomic(page); |
|---|
| 595 | | - ret = memcpy_mcsafe(to + offset, from, len); |
|---|
| 653 | + ret = copy_mc_to_kernel(to + offset, from, len); |
|---|
| 596 | 654 | kunmap_atomic(to); |
|---|
| 597 | 655 | |
|---|
| 598 | 656 | return ret; |
|---|
| 599 | 657 | } |
|---|
| 600 | 658 | |
|---|
| 601 | | -static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, |
|---|
| 659 | +static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, |
|---|
| 602 | 660 | struct iov_iter *i) |
|---|
| 603 | 661 | { |
|---|
| 604 | 662 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 663 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 664 | + unsigned int i_head; |
|---|
| 605 | 665 | size_t n, off, xfer = 0; |
|---|
| 606 | | - int idx; |
|---|
| 607 | 666 | |
|---|
| 608 | 667 | if (!sanity(i)) |
|---|
| 609 | 668 | return 0; |
|---|
| 610 | 669 | |
|---|
| 611 | | - bytes = n = push_pipe(i, bytes, &idx, &off); |
|---|
| 670 | + bytes = n = push_pipe(i, bytes, &i_head, &off); |
|---|
| 612 | 671 | if (unlikely(!n)) |
|---|
| 613 | 672 | return 0; |
|---|
| 614 | | - for ( ; n; idx = next_idx(idx, pipe), off = 0) { |
|---|
| 673 | + do { |
|---|
| 615 | 674 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); |
|---|
| 616 | 675 | unsigned long rem; |
|---|
| 617 | 676 | |
|---|
| 618 | | - rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, |
|---|
| 619 | | - chunk); |
|---|
| 620 | | - i->idx = idx; |
|---|
| 677 | + rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, |
|---|
| 678 | + off, addr, chunk); |
|---|
| 679 | + i->head = i_head; |
|---|
| 621 | 680 | i->iov_offset = off + chunk - rem; |
|---|
| 622 | 681 | xfer += chunk - rem; |
|---|
| 623 | 682 | if (rem) |
|---|
| 624 | 683 | break; |
|---|
| 625 | 684 | n -= chunk; |
|---|
| 626 | 685 | addr += chunk; |
|---|
| 627 | | - } |
|---|
| 686 | + off = 0; |
|---|
| 687 | + i_head++; |
|---|
| 688 | + } while (n); |
|---|
| 628 | 689 | i->count -= xfer; |
|---|
| 629 | 690 | return xfer; |
|---|
| 630 | 691 | } |
|---|
| 631 | 692 | |
|---|
| 632 | 693 | /** |
|---|
| 633 | | - * _copy_to_iter_mcsafe - copy to user with source-read error exception handling |
|---|
| 694 | + * _copy_mc_to_iter - copy to iter with source memory error exception handling |
|---|
| 634 | 695 | * @addr: source kernel address |
|---|
| 635 | 696 | * @bytes: total transfer length |
|---|
| 636 | 697 | * @iter: destination iterator |
|---|
| 637 | 698 | * |
|---|
| 638 | | - * The pmem driver arranges for filesystem-dax to use this facility via |
|---|
| 639 | | - * dax_copy_to_iter() for protecting read/write to persistent memory. |
|---|
| 640 | | - * Unless / until an architecture can guarantee identical performance |
|---|
| 641 | | - * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a |
|---|
| 642 | | - * performance regression to switch more users to the mcsafe version. |
|---|
| 699 | + * The pmem driver deploys this for the dax operation |
|---|
| 700 | + * (dax_copy_to_iter()) for dax reads (bypass page-cache and the |
|---|
| 701 | + * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes |
|---|
| 702 | + * successfully copied. |
|---|
| 643 | 703 | * |
|---|
| 644 | | - * Otherwise, the main differences between this and typical _copy_to_iter(). |
|---|
| 704 | + * The main differences between this and typical _copy_to_iter(). |
|---|
| 645 | 705 | * |
|---|
| 646 | 706 | * * Typical tail/residue handling after a fault retries the copy |
|---|
| 647 | 707 | * byte-by-byte until the fault happens again. Re-triggering machine |
|---|
| .. | .. |
|---|
| 652 | 712 | * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. |
|---|
| 653 | 713 | * Compare to copy_to_iter() where only ITER_IOVEC attempts might return |
|---|
| 654 | 714 | * a short copy. |
|---|
| 655 | | - * |
|---|
| 656 | | - * See MCSAFE_TEST for self-test. |
|---|
| 657 | 715 | */ |
|---|
| 658 | | -size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 716 | +size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 659 | 717 | { |
|---|
| 660 | 718 | const char *from = addr; |
|---|
| 661 | 719 | unsigned long rem, curr_addr, s_addr = (unsigned long) addr; |
|---|
| 662 | 720 | |
|---|
| 663 | | - if (unlikely(i->type & ITER_PIPE)) |
|---|
| 664 | | - return copy_pipe_to_iter_mcsafe(addr, bytes, i); |
|---|
| 721 | + if (unlikely(iov_iter_is_pipe(i))) |
|---|
| 722 | + return copy_mc_pipe_to_iter(addr, bytes, i); |
|---|
| 665 | 723 | if (iter_is_iovec(i)) |
|---|
| 666 | 724 | might_fault(); |
|---|
| 667 | 725 | iterate_and_advance(i, bytes, v, |
|---|
| 668 | | - copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), |
|---|
| 726 | + copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, |
|---|
| 727 | + v.iov_len), |
|---|
| 669 | 728 | ({ |
|---|
| 670 | | - rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, |
|---|
| 671 | | - (from += v.bv_len) - v.bv_len, v.bv_len); |
|---|
| 729 | + rem = copy_mc_to_page(v.bv_page, v.bv_offset, |
|---|
| 730 | + (from += v.bv_len) - v.bv_len, v.bv_len); |
|---|
| 672 | 731 | if (rem) { |
|---|
| 673 | 732 | curr_addr = (unsigned long) from; |
|---|
| 674 | 733 | bytes = curr_addr - s_addr - rem; |
|---|
| .. | .. |
|---|
| 676 | 735 | } |
|---|
| 677 | 736 | }), |
|---|
| 678 | 737 | ({ |
|---|
| 679 | | - rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, |
|---|
| 680 | | - v.iov_len); |
|---|
| 738 | + rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) |
|---|
| 739 | + - v.iov_len, v.iov_len); |
|---|
| 681 | 740 | if (rem) { |
|---|
| 682 | 741 | curr_addr = (unsigned long) from; |
|---|
| 683 | 742 | bytes = curr_addr - s_addr - rem; |
|---|
| .. | .. |
|---|
| 688 | 747 | |
|---|
| 689 | 748 | return bytes; |
|---|
| 690 | 749 | } |
|---|
| 691 | | -EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); |
|---|
| 692 | | -#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ |
|---|
| 750 | +EXPORT_SYMBOL_GPL(_copy_mc_to_iter); |
|---|
| 751 | +#endif /* CONFIG_ARCH_HAS_COPY_MC */ |
|---|
| 693 | 752 | |
|---|
| 694 | 753 | size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 695 | 754 | { |
|---|
| 696 | 755 | char *to = addr; |
|---|
| 697 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 756 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 698 | 757 | WARN_ON(1); |
|---|
| 699 | 758 | return 0; |
|---|
| 700 | 759 | } |
|---|
| .. | .. |
|---|
| 714 | 773 | bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 715 | 774 | { |
|---|
| 716 | 775 | char *to = addr; |
|---|
| 717 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 776 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 718 | 777 | WARN_ON(1); |
|---|
| 719 | 778 | return false; |
|---|
| 720 | 779 | } |
|---|
| .. | .. |
|---|
| 741 | 800 | size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 742 | 801 | { |
|---|
| 743 | 802 | char *to = addr; |
|---|
| 744 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 803 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 745 | 804 | WARN_ON(1); |
|---|
| 746 | 805 | return 0; |
|---|
| 747 | 806 | } |
|---|
| .. | .. |
|---|
| 775 | 834 | size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 776 | 835 | { |
|---|
| 777 | 836 | char *to = addr; |
|---|
| 778 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 837 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 779 | 838 | WARN_ON(1); |
|---|
| 780 | 839 | return 0; |
|---|
| 781 | 840 | } |
|---|
| .. | .. |
|---|
| 796 | 855 | bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 797 | 856 | { |
|---|
| 798 | 857 | char *to = addr; |
|---|
| 799 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 858 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 800 | 859 | WARN_ON(1); |
|---|
| 801 | 860 | return false; |
|---|
| 802 | 861 | } |
|---|
| .. | .. |
|---|
| 835 | 894 | head = compound_head(page); |
|---|
| 836 | 895 | v += (page - head) << PAGE_SHIFT; |
|---|
| 837 | 896 | |
|---|
| 838 | | - if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head)))) |
|---|
| 897 | + if (likely(n <= v && v <= (page_size(head)))) |
|---|
| 839 | 898 | return true; |
|---|
| 840 | 899 | WARN_ON(1); |
|---|
| 841 | 900 | return false; |
|---|
| .. | .. |
|---|
| 851 | 910 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); |
|---|
| 852 | 911 | kunmap_atomic(kaddr); |
|---|
| 853 | 912 | return wanted; |
|---|
| 854 | | - } else if (likely(!(i->type & ITER_PIPE))) |
|---|
| 913 | + } else if (unlikely(iov_iter_is_discard(i))) { |
|---|
| 914 | + if (unlikely(i->count < bytes)) |
|---|
| 915 | + bytes = i->count; |
|---|
| 916 | + i->count -= bytes; |
|---|
| 917 | + return bytes; |
|---|
| 918 | + } else if (likely(!iov_iter_is_pipe(i))) |
|---|
| 855 | 919 | return copy_page_to_iter_iovec(page, offset, bytes, i); |
|---|
| 856 | 920 | else |
|---|
| 857 | 921 | return copy_page_to_iter_pipe(page, offset, bytes, i); |
|---|
| .. | .. |
|---|
| 863 | 927 | { |
|---|
| 864 | 928 | if (unlikely(!page_copy_sane(page, offset, bytes))) |
|---|
| 865 | 929 | return 0; |
|---|
| 866 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 930 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
|---|
| 867 | 931 | WARN_ON(1); |
|---|
| 868 | 932 | return 0; |
|---|
| 869 | 933 | } |
|---|
| .. | .. |
|---|
| 880 | 944 | static size_t pipe_zero(size_t bytes, struct iov_iter *i) |
|---|
| 881 | 945 | { |
|---|
| 882 | 946 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 947 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 948 | + unsigned int i_head; |
|---|
| 883 | 949 | size_t n, off; |
|---|
| 884 | | - int idx; |
|---|
| 885 | 950 | |
|---|
| 886 | 951 | if (!sanity(i)) |
|---|
| 887 | 952 | return 0; |
|---|
| 888 | 953 | |
|---|
| 889 | | - bytes = n = push_pipe(i, bytes, &idx, &off); |
|---|
| 954 | + bytes = n = push_pipe(i, bytes, &i_head, &off); |
|---|
| 890 | 955 | if (unlikely(!n)) |
|---|
| 891 | 956 | return 0; |
|---|
| 892 | 957 | |
|---|
| 893 | | - for ( ; n; idx = next_idx(idx, pipe), off = 0) { |
|---|
| 958 | + do { |
|---|
| 894 | 959 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); |
|---|
| 895 | | - memzero_page(pipe->bufs[idx].page, off, chunk); |
|---|
| 896 | | - i->idx = idx; |
|---|
| 960 | + memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); |
|---|
| 961 | + i->head = i_head; |
|---|
| 897 | 962 | i->iov_offset = off + chunk; |
|---|
| 898 | 963 | n -= chunk; |
|---|
| 899 | | - } |
|---|
| 964 | + off = 0; |
|---|
| 965 | + i_head++; |
|---|
| 966 | + } while (n); |
|---|
| 900 | 967 | i->count -= bytes; |
|---|
| 901 | 968 | return bytes; |
|---|
| 902 | 969 | } |
|---|
| 903 | 970 | |
|---|
| 904 | 971 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) |
|---|
| 905 | 972 | { |
|---|
| 906 | | - if (unlikely(i->type & ITER_PIPE)) |
|---|
| 973 | + if (unlikely(iov_iter_is_pipe(i))) |
|---|
| 907 | 974 | return pipe_zero(bytes, i); |
|---|
| 908 | 975 | iterate_and_advance(i, bytes, v, |
|---|
| 909 | 976 | clear_user(v.iov_base, v.iov_len), |
|---|
| .. | .. |
|---|
| 923 | 990 | kunmap_atomic(kaddr); |
|---|
| 924 | 991 | return 0; |
|---|
| 925 | 992 | } |
|---|
| 926 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 993 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
|---|
| 927 | 994 | kunmap_atomic(kaddr); |
|---|
| 928 | 995 | WARN_ON(1); |
|---|
| 929 | 996 | return 0; |
|---|
| .. | .. |
|---|
| 942 | 1009 | static inline void pipe_truncate(struct iov_iter *i) |
|---|
| 943 | 1010 | { |
|---|
| 944 | 1011 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 945 | | - if (pipe->nrbufs) { |
|---|
| 1012 | + unsigned int p_tail = pipe->tail; |
|---|
| 1013 | + unsigned int p_head = pipe->head; |
|---|
| 1014 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 1015 | + |
|---|
| 1016 | + if (!pipe_empty(p_head, p_tail)) { |
|---|
| 1017 | + struct pipe_buffer *buf; |
|---|
| 1018 | + unsigned int i_head = i->head; |
|---|
| 946 | 1019 | size_t off = i->iov_offset; |
|---|
| 947 | | - int idx = i->idx; |
|---|
| 948 | | - int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); |
|---|
| 1020 | + |
|---|
| 949 | 1021 | if (off) { |
|---|
| 950 | | - pipe->bufs[idx].len = off - pipe->bufs[idx].offset; |
|---|
| 951 | | - idx = next_idx(idx, pipe); |
|---|
| 952 | | - nrbufs++; |
|---|
| 1022 | + buf = &pipe->bufs[i_head & p_mask]; |
|---|
| 1023 | + buf->len = off - buf->offset; |
|---|
| 1024 | + i_head++; |
|---|
| 953 | 1025 | } |
|---|
| 954 | | - while (pipe->nrbufs > nrbufs) { |
|---|
| 955 | | - pipe_buf_release(pipe, &pipe->bufs[idx]); |
|---|
| 956 | | - idx = next_idx(idx, pipe); |
|---|
| 957 | | - pipe->nrbufs--; |
|---|
| 1026 | + while (p_head != i_head) { |
|---|
| 1027 | + p_head--; |
|---|
| 1028 | + pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); |
|---|
| 958 | 1029 | } |
|---|
| 1030 | + |
|---|
| 1031 | + pipe->head = p_head; |
|---|
| 959 | 1032 | } |
|---|
| 960 | 1033 | } |
|---|
| 961 | 1034 | |
|---|
| .. | .. |
|---|
| 966 | 1039 | size = i->count; |
|---|
| 967 | 1040 | if (size) { |
|---|
| 968 | 1041 | struct pipe_buffer *buf; |
|---|
| 1042 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 1043 | + unsigned int i_head = i->head; |
|---|
| 969 | 1044 | size_t off = i->iov_offset, left = size; |
|---|
| 970 | | - int idx = i->idx; |
|---|
| 1045 | + |
|---|
| 971 | 1046 | if (off) /* make it relative to the beginning of buffer */ |
|---|
| 972 | | - left += off - pipe->bufs[idx].offset; |
|---|
| 1047 | + left += off - pipe->bufs[i_head & p_mask].offset; |
|---|
| 973 | 1048 | while (1) { |
|---|
| 974 | | - buf = &pipe->bufs[idx]; |
|---|
| 1049 | + buf = &pipe->bufs[i_head & p_mask]; |
|---|
| 975 | 1050 | if (left <= buf->len) |
|---|
| 976 | 1051 | break; |
|---|
| 977 | 1052 | left -= buf->len; |
|---|
| 978 | | - idx = next_idx(idx, pipe); |
|---|
| 1053 | + i_head++; |
|---|
| 979 | 1054 | } |
|---|
| 980 | | - i->idx = idx; |
|---|
| 1055 | + i->head = i_head; |
|---|
| 981 | 1056 | i->iov_offset = buf->offset + left; |
|---|
| 982 | 1057 | } |
|---|
| 983 | 1058 | i->count -= size; |
|---|
| .. | .. |
|---|
| 987 | 1062 | |
|---|
| 988 | 1063 | void iov_iter_advance(struct iov_iter *i, size_t size) |
|---|
| 989 | 1064 | { |
|---|
| 990 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1065 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 991 | 1066 | pipe_advance(i, size); |
|---|
| 1067 | + return; |
|---|
| 1068 | + } |
|---|
| 1069 | + if (unlikely(iov_iter_is_discard(i))) { |
|---|
| 1070 | + i->count -= size; |
|---|
| 992 | 1071 | return; |
|---|
| 993 | 1072 | } |
|---|
| 994 | 1073 | iterate_and_advance(i, size, v, 0, 0, 0) |
|---|
| .. | .. |
|---|
| 1002 | 1081 | if (WARN_ON(unroll > MAX_RW_COUNT)) |
|---|
| 1003 | 1082 | return; |
|---|
| 1004 | 1083 | i->count += unroll; |
|---|
| 1005 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1084 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 1006 | 1085 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 1007 | | - int idx = i->idx; |
|---|
| 1086 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 1087 | + unsigned int i_head = i->head; |
|---|
| 1008 | 1088 | size_t off = i->iov_offset; |
|---|
| 1009 | 1089 | while (1) { |
|---|
| 1010 | | - size_t n = off - pipe->bufs[idx].offset; |
|---|
| 1090 | + struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; |
|---|
| 1091 | + size_t n = off - b->offset; |
|---|
| 1011 | 1092 | if (unroll < n) { |
|---|
| 1012 | 1093 | off -= unroll; |
|---|
| 1013 | 1094 | break; |
|---|
| 1014 | 1095 | } |
|---|
| 1015 | 1096 | unroll -= n; |
|---|
| 1016 | | - if (!unroll && idx == i->start_idx) { |
|---|
| 1097 | + if (!unroll && i_head == i->start_head) { |
|---|
| 1017 | 1098 | off = 0; |
|---|
| 1018 | 1099 | break; |
|---|
| 1019 | 1100 | } |
|---|
| 1020 | | - if (!idx--) |
|---|
| 1021 | | - idx = pipe->buffers - 1; |
|---|
| 1022 | | - off = pipe->bufs[idx].offset + pipe->bufs[idx].len; |
|---|
| 1101 | + i_head--; |
|---|
| 1102 | + b = &pipe->bufs[i_head & p_mask]; |
|---|
| 1103 | + off = b->offset + b->len; |
|---|
| 1023 | 1104 | } |
|---|
| 1024 | 1105 | i->iov_offset = off; |
|---|
| 1025 | | - i->idx = idx; |
|---|
| 1106 | + i->head = i_head; |
|---|
| 1026 | 1107 | pipe_truncate(i); |
|---|
| 1027 | 1108 | return; |
|---|
| 1028 | 1109 | } |
|---|
| 1110 | + if (unlikely(iov_iter_is_discard(i))) |
|---|
| 1111 | + return; |
|---|
| 1029 | 1112 | if (unroll <= i->iov_offset) { |
|---|
| 1030 | 1113 | i->iov_offset -= unroll; |
|---|
| 1031 | 1114 | return; |
|---|
| 1032 | 1115 | } |
|---|
| 1033 | 1116 | unroll -= i->iov_offset; |
|---|
| 1034 | | - if (i->type & ITER_BVEC) { |
|---|
| 1117 | + if (iov_iter_is_bvec(i)) { |
|---|
| 1035 | 1118 | const struct bio_vec *bvec = i->bvec; |
|---|
| 1036 | 1119 | while (1) { |
|---|
| 1037 | 1120 | size_t n = (--bvec)->bv_len; |
|---|
| .. | .. |
|---|
| 1064 | 1147 | */ |
|---|
| 1065 | 1148 | size_t iov_iter_single_seg_count(const struct iov_iter *i) |
|---|
| 1066 | 1149 | { |
|---|
| 1067 | | - if (unlikely(i->type & ITER_PIPE)) |
|---|
| 1150 | + if (unlikely(iov_iter_is_pipe(i))) |
|---|
| 1068 | 1151 | return i->count; // it is a silly place, anyway |
|---|
| 1069 | 1152 | if (i->nr_segs == 1) |
|---|
| 1070 | 1153 | return i->count; |
|---|
| 1071 | | - else if (i->type & ITER_BVEC) |
|---|
| 1154 | + if (unlikely(iov_iter_is_discard(i))) |
|---|
| 1155 | + return i->count; |
|---|
| 1156 | + else if (iov_iter_is_bvec(i)) |
|---|
| 1072 | 1157 | return min(i->count, i->bvec->bv_len - i->iov_offset); |
|---|
| 1073 | 1158 | else |
|---|
| 1074 | 1159 | return min(i->count, i->iov->iov_len - i->iov_offset); |
|---|
| 1075 | 1160 | } |
|---|
| 1076 | 1161 | EXPORT_SYMBOL(iov_iter_single_seg_count); |
|---|
| 1077 | 1162 | |
|---|
| 1078 | | -void iov_iter_kvec(struct iov_iter *i, int direction, |
|---|
| 1163 | +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, |
|---|
| 1079 | 1164 | const struct kvec *kvec, unsigned long nr_segs, |
|---|
| 1080 | 1165 | size_t count) |
|---|
| 1081 | 1166 | { |
|---|
| 1082 | | - BUG_ON(!(direction & ITER_KVEC)); |
|---|
| 1083 | | - i->type = direction; |
|---|
| 1167 | + WARN_ON(direction & ~(READ | WRITE)); |
|---|
| 1168 | + i->type = ITER_KVEC | (direction & (READ | WRITE)); |
|---|
| 1084 | 1169 | i->kvec = kvec; |
|---|
| 1085 | 1170 | i->nr_segs = nr_segs; |
|---|
| 1086 | 1171 | i->iov_offset = 0; |
|---|
| .. | .. |
|---|
| 1088 | 1173 | } |
|---|
| 1089 | 1174 | EXPORT_SYMBOL(iov_iter_kvec); |
|---|
| 1090 | 1175 | |
|---|
| 1091 | | -void iov_iter_bvec(struct iov_iter *i, int direction, |
|---|
| 1176 | +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, |
|---|
| 1092 | 1177 | const struct bio_vec *bvec, unsigned long nr_segs, |
|---|
| 1093 | 1178 | size_t count) |
|---|
| 1094 | 1179 | { |
|---|
| 1095 | | - BUG_ON(!(direction & ITER_BVEC)); |
|---|
| 1096 | | - i->type = direction; |
|---|
| 1180 | + WARN_ON(direction & ~(READ | WRITE)); |
|---|
| 1181 | + i->type = ITER_BVEC | (direction & (READ | WRITE)); |
|---|
| 1097 | 1182 | i->bvec = bvec; |
|---|
| 1098 | 1183 | i->nr_segs = nr_segs; |
|---|
| 1099 | 1184 | i->iov_offset = 0; |
|---|
| .. | .. |
|---|
| 1101 | 1186 | } |
|---|
| 1102 | 1187 | EXPORT_SYMBOL(iov_iter_bvec); |
|---|
| 1103 | 1188 | |
|---|
| 1104 | | -void iov_iter_pipe(struct iov_iter *i, int direction, |
|---|
| 1189 | +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, |
|---|
| 1105 | 1190 | struct pipe_inode_info *pipe, |
|---|
| 1106 | 1191 | size_t count) |
|---|
| 1107 | 1192 | { |
|---|
| 1108 | | - BUG_ON(direction != ITER_PIPE); |
|---|
| 1109 | | - WARN_ON(pipe->nrbufs == pipe->buffers); |
|---|
| 1110 | | - i->type = direction; |
|---|
| 1193 | + BUG_ON(direction != READ); |
|---|
| 1194 | + WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); |
|---|
| 1195 | + i->type = ITER_PIPE | READ; |
|---|
| 1111 | 1196 | i->pipe = pipe; |
|---|
| 1112 | | - i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); |
|---|
| 1197 | + i->head = pipe->head; |
|---|
| 1113 | 1198 | i->iov_offset = 0; |
|---|
| 1114 | 1199 | i->count = count; |
|---|
| 1115 | | - i->start_idx = i->idx; |
|---|
| 1200 | + i->start_head = i->head; |
|---|
| 1116 | 1201 | } |
|---|
| 1117 | 1202 | EXPORT_SYMBOL(iov_iter_pipe); |
|---|
| 1203 | + |
|---|
| 1204 | +/** |
|---|
| 1205 | + * iov_iter_discard - Initialise an I/O iterator that discards data |
|---|
| 1206 | + * @i: The iterator to initialise. |
|---|
| 1207 | + * @direction: The direction of the transfer. |
|---|
| 1208 | + * @count: The size of the I/O buffer in bytes. |
|---|
| 1209 | + * |
|---|
| 1210 | + * Set up an I/O iterator that just discards everything that's written to it. |
|---|
| 1211 | + * It's only available as a READ iterator. |
|---|
| 1212 | + */ |
|---|
| 1213 | +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) |
|---|
| 1214 | +{ |
|---|
| 1215 | + BUG_ON(direction != READ); |
|---|
| 1216 | + i->type = ITER_DISCARD | READ; |
|---|
| 1217 | + i->count = count; |
|---|
| 1218 | + i->iov_offset = 0; |
|---|
| 1219 | +} |
|---|
| 1220 | +EXPORT_SYMBOL(iov_iter_discard); |
|---|
| 1118 | 1221 | |
|---|
| 1119 | 1222 | unsigned long iov_iter_alignment(const struct iov_iter *i) |
|---|
| 1120 | 1223 | { |
|---|
| 1121 | 1224 | unsigned long res = 0; |
|---|
| 1122 | 1225 | size_t size = i->count; |
|---|
| 1123 | 1226 | |
|---|
| 1124 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1125 | | - if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) |
|---|
| 1227 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 1228 | + unsigned int p_mask = i->pipe->ring_size - 1; |
|---|
| 1229 | + |
|---|
| 1230 | + if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) |
|---|
| 1126 | 1231 | return size | i->iov_offset; |
|---|
| 1127 | 1232 | return size; |
|---|
| 1128 | 1233 | } |
|---|
| .. | .. |
|---|
| 1140 | 1245 | unsigned long res = 0; |
|---|
| 1141 | 1246 | size_t size = i->count; |
|---|
| 1142 | 1247 | |
|---|
| 1143 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1248 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
|---|
| 1144 | 1249 | WARN_ON(1); |
|---|
| 1145 | 1250 | return ~0U; |
|---|
| 1146 | 1251 | } |
|---|
| .. | .. |
|---|
| 1160 | 1265 | static inline ssize_t __pipe_get_pages(struct iov_iter *i, |
|---|
| 1161 | 1266 | size_t maxsize, |
|---|
| 1162 | 1267 | struct page **pages, |
|---|
| 1163 | | - int idx, |
|---|
| 1268 | + int iter_head, |
|---|
| 1164 | 1269 | size_t *start) |
|---|
| 1165 | 1270 | { |
|---|
| 1166 | 1271 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 1167 | | - ssize_t n = push_pipe(i, maxsize, &idx, start); |
|---|
| 1272 | + unsigned int p_mask = pipe->ring_size - 1; |
|---|
| 1273 | + ssize_t n = push_pipe(i, maxsize, &iter_head, start); |
|---|
| 1168 | 1274 | if (!n) |
|---|
| 1169 | 1275 | return -EFAULT; |
|---|
| 1170 | 1276 | |
|---|
| 1171 | 1277 | maxsize = n; |
|---|
| 1172 | 1278 | n += *start; |
|---|
| 1173 | 1279 | while (n > 0) { |
|---|
| 1174 | | - get_page(*pages++ = pipe->bufs[idx].page); |
|---|
| 1175 | | - idx = next_idx(idx, pipe); |
|---|
| 1280 | + get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); |
|---|
| 1281 | + iter_head++; |
|---|
| 1176 | 1282 | n -= PAGE_SIZE; |
|---|
| 1177 | 1283 | } |
|---|
| 1178 | 1284 | |
|---|
| .. | .. |
|---|
| 1183 | 1289 | struct page **pages, size_t maxsize, unsigned maxpages, |
|---|
| 1184 | 1290 | size_t *start) |
|---|
| 1185 | 1291 | { |
|---|
| 1186 | | - unsigned npages; |
|---|
| 1292 | + unsigned int iter_head, npages; |
|---|
| 1187 | 1293 | size_t capacity; |
|---|
| 1188 | | - int idx; |
|---|
| 1189 | 1294 | |
|---|
| 1190 | 1295 | if (!maxsize) |
|---|
| 1191 | 1296 | return 0; |
|---|
| .. | .. |
|---|
| 1193 | 1298 | if (!sanity(i)) |
|---|
| 1194 | 1299 | return -EFAULT; |
|---|
| 1195 | 1300 | |
|---|
| 1196 | | - data_start(i, &idx, start); |
|---|
| 1197 | | - /* some of this one + all after this one */ |
|---|
| 1198 | | - npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; |
|---|
| 1199 | | - capacity = min(npages,maxpages) * PAGE_SIZE - *start; |
|---|
| 1301 | + data_start(i, &iter_head, start); |
|---|
| 1302 | + /* Amount of free space: some of this one + all after this one */ |
|---|
| 1303 | + npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); |
|---|
| 1304 | + capacity = min(npages, maxpages) * PAGE_SIZE - *start; |
|---|
| 1200 | 1305 | |
|---|
| 1201 | | - return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); |
|---|
| 1306 | + return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); |
|---|
| 1202 | 1307 | } |
|---|
| 1203 | 1308 | |
|---|
| 1204 | 1309 | ssize_t iov_iter_get_pages(struct iov_iter *i, |
|---|
| .. | .. |
|---|
| 1208 | 1313 | if (maxsize > i->count) |
|---|
| 1209 | 1314 | maxsize = i->count; |
|---|
| 1210 | 1315 | |
|---|
| 1211 | | - if (unlikely(i->type & ITER_PIPE)) |
|---|
| 1316 | + if (unlikely(iov_iter_is_pipe(i))) |
|---|
| 1212 | 1317 | return pipe_get_pages(i, pages, maxsize, maxpages, start); |
|---|
| 1318 | + if (unlikely(iov_iter_is_discard(i))) |
|---|
| 1319 | + return -EFAULT; |
|---|
| 1320 | + |
|---|
| 1213 | 1321 | iterate_all_kinds(i, maxsize, v, ({ |
|---|
| 1214 | 1322 | unsigned long addr = (unsigned long)v.iov_base; |
|---|
| 1215 | 1323 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
|---|
| .. | .. |
|---|
| 1220 | 1328 | len = maxpages * PAGE_SIZE; |
|---|
| 1221 | 1329 | addr &= ~(PAGE_SIZE - 1); |
|---|
| 1222 | 1330 | n = DIV_ROUND_UP(len, PAGE_SIZE); |
|---|
| 1223 | | - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); |
|---|
| 1224 | | - if (unlikely(res < 0)) |
|---|
| 1331 | + res = get_user_pages_fast(addr, n, |
|---|
| 1332 | + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, |
|---|
| 1333 | + pages); |
|---|
| 1334 | + if (unlikely(res <= 0)) |
|---|
| 1225 | 1335 | return res; |
|---|
| 1226 | 1336 | return (res == n ? len : res * PAGE_SIZE) - *start; |
|---|
| 1227 | 1337 | 0;}),({ |
|---|
| .. | .. |
|---|
| 1247 | 1357 | size_t *start) |
|---|
| 1248 | 1358 | { |
|---|
| 1249 | 1359 | struct page **p; |
|---|
| 1360 | + unsigned int iter_head, npages; |
|---|
| 1250 | 1361 | ssize_t n; |
|---|
| 1251 | | - int idx; |
|---|
| 1252 | | - int npages; |
|---|
| 1253 | 1362 | |
|---|
| 1254 | 1363 | if (!maxsize) |
|---|
| 1255 | 1364 | return 0; |
|---|
| .. | .. |
|---|
| 1257 | 1366 | if (!sanity(i)) |
|---|
| 1258 | 1367 | return -EFAULT; |
|---|
| 1259 | 1368 | |
|---|
| 1260 | | - data_start(i, &idx, start); |
|---|
| 1261 | | - /* some of this one + all after this one */ |
|---|
| 1262 | | - npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; |
|---|
| 1369 | + data_start(i, &iter_head, start); |
|---|
| 1370 | + /* Amount of free space: some of this one + all after this one */ |
|---|
| 1371 | + npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); |
|---|
| 1263 | 1372 | n = npages * PAGE_SIZE - *start; |
|---|
| 1264 | 1373 | if (maxsize > n) |
|---|
| 1265 | 1374 | maxsize = n; |
|---|
| .. | .. |
|---|
| 1268 | 1377 | p = get_pages_array(npages); |
|---|
| 1269 | 1378 | if (!p) |
|---|
| 1270 | 1379 | return -ENOMEM; |
|---|
| 1271 | | - n = __pipe_get_pages(i, maxsize, p, idx, start); |
|---|
| 1380 | + n = __pipe_get_pages(i, maxsize, p, iter_head, start); |
|---|
| 1272 | 1381 | if (n > 0) |
|---|
| 1273 | 1382 | *pages = p; |
|---|
| 1274 | 1383 | else |
|---|
| .. | .. |
|---|
| 1285 | 1394 | if (maxsize > i->count) |
|---|
| 1286 | 1395 | maxsize = i->count; |
|---|
| 1287 | 1396 | |
|---|
| 1288 | | - if (unlikely(i->type & ITER_PIPE)) |
|---|
| 1397 | + if (unlikely(iov_iter_is_pipe(i))) |
|---|
| 1289 | 1398 | return pipe_get_pages_alloc(i, pages, maxsize, start); |
|---|
| 1399 | + if (unlikely(iov_iter_is_discard(i))) |
|---|
| 1400 | + return -EFAULT; |
|---|
| 1401 | + |
|---|
| 1290 | 1402 | iterate_all_kinds(i, maxsize, v, ({ |
|---|
| 1291 | 1403 | unsigned long addr = (unsigned long)v.iov_base; |
|---|
| 1292 | 1404 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
|---|
| .. | .. |
|---|
| 1298 | 1410 | p = get_pages_array(n); |
|---|
| 1299 | 1411 | if (!p) |
|---|
| 1300 | 1412 | return -ENOMEM; |
|---|
| 1301 | | - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); |
|---|
| 1302 | | - if (unlikely(res < 0)) { |
|---|
| 1413 | + res = get_user_pages_fast(addr, n, |
|---|
| 1414 | + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); |
|---|
| 1415 | + if (unlikely(res <= 0)) { |
|---|
| 1303 | 1416 | kvfree(p); |
|---|
| 1417 | + *pages = NULL; |
|---|
| 1304 | 1418 | return res; |
|---|
| 1305 | 1419 | } |
|---|
| 1306 | 1420 | *pages = p; |
|---|
| .. | .. |
|---|
| 1328 | 1442 | __wsum sum, next; |
|---|
| 1329 | 1443 | size_t off = 0; |
|---|
| 1330 | 1444 | sum = *csum; |
|---|
| 1331 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1445 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
|---|
| 1332 | 1446 | WARN_ON(1); |
|---|
| 1333 | 1447 | return 0; |
|---|
| 1334 | 1448 | } |
|---|
| 1335 | 1449 | iterate_and_advance(i, bytes, v, ({ |
|---|
| 1336 | | - int err = 0; |
|---|
| 1337 | 1450 | next = csum_and_copy_from_user(v.iov_base, |
|---|
| 1338 | 1451 | (to += v.iov_len) - v.iov_len, |
|---|
| 1339 | | - v.iov_len, 0, &err); |
|---|
| 1340 | | - if (!err) { |
|---|
| 1452 | + v.iov_len); |
|---|
| 1453 | + if (next) { |
|---|
| 1341 | 1454 | sum = csum_block_add(sum, next, off); |
|---|
| 1342 | 1455 | off += v.iov_len; |
|---|
| 1343 | 1456 | } |
|---|
| 1344 | | - err ? v.iov_len : 0; |
|---|
| 1457 | + next ? 0 : v.iov_len; |
|---|
| 1345 | 1458 | }), ({ |
|---|
| 1346 | 1459 | char *p = kmap_atomic(v.bv_page); |
|---|
| 1347 | | - next = csum_partial_copy_nocheck(p + v.bv_offset, |
|---|
| 1348 | | - (to += v.bv_len) - v.bv_len, |
|---|
| 1349 | | - v.bv_len, 0); |
|---|
| 1460 | + sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, |
|---|
| 1461 | + p + v.bv_offset, v.bv_len, |
|---|
| 1462 | + sum, off); |
|---|
| 1350 | 1463 | kunmap_atomic(p); |
|---|
| 1351 | | - sum = csum_block_add(sum, next, off); |
|---|
| 1352 | 1464 | off += v.bv_len; |
|---|
| 1353 | 1465 | }),({ |
|---|
| 1354 | | - next = csum_partial_copy_nocheck(v.iov_base, |
|---|
| 1355 | | - (to += v.iov_len) - v.iov_len, |
|---|
| 1356 | | - v.iov_len, 0); |
|---|
| 1357 | | - sum = csum_block_add(sum, next, off); |
|---|
| 1466 | + sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, |
|---|
| 1467 | + v.iov_base, v.iov_len, |
|---|
| 1468 | + sum, off); |
|---|
| 1358 | 1469 | off += v.iov_len; |
|---|
| 1359 | 1470 | }) |
|---|
| 1360 | 1471 | ) |
|---|
| .. | .. |
|---|
| 1370 | 1481 | __wsum sum, next; |
|---|
| 1371 | 1482 | size_t off = 0; |
|---|
| 1372 | 1483 | sum = *csum; |
|---|
| 1373 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1484 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
|---|
| 1374 | 1485 | WARN_ON(1); |
|---|
| 1375 | 1486 | return false; |
|---|
| 1376 | 1487 | } |
|---|
| 1377 | 1488 | if (unlikely(i->count < bytes)) |
|---|
| 1378 | 1489 | return false; |
|---|
| 1379 | 1490 | iterate_all_kinds(i, bytes, v, ({ |
|---|
| 1380 | | - int err = 0; |
|---|
| 1381 | 1491 | next = csum_and_copy_from_user(v.iov_base, |
|---|
| 1382 | 1492 | (to += v.iov_len) - v.iov_len, |
|---|
| 1383 | | - v.iov_len, 0, &err); |
|---|
| 1384 | | - if (err) |
|---|
| 1493 | + v.iov_len); |
|---|
| 1494 | + if (!next) |
|---|
| 1385 | 1495 | return false; |
|---|
| 1386 | 1496 | sum = csum_block_add(sum, next, off); |
|---|
| 1387 | 1497 | off += v.iov_len; |
|---|
| 1388 | 1498 | 0; |
|---|
| 1389 | 1499 | }), ({ |
|---|
| 1390 | 1500 | char *p = kmap_atomic(v.bv_page); |
|---|
| 1391 | | - next = csum_partial_copy_nocheck(p + v.bv_offset, |
|---|
| 1392 | | - (to += v.bv_len) - v.bv_len, |
|---|
| 1393 | | - v.bv_len, 0); |
|---|
| 1501 | + sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, |
|---|
| 1502 | + p + v.bv_offset, v.bv_len, |
|---|
| 1503 | + sum, off); |
|---|
| 1394 | 1504 | kunmap_atomic(p); |
|---|
| 1395 | | - sum = csum_block_add(sum, next, off); |
|---|
| 1396 | 1505 | off += v.bv_len; |
|---|
| 1397 | 1506 | }),({ |
|---|
| 1398 | | - next = csum_partial_copy_nocheck(v.iov_base, |
|---|
| 1399 | | - (to += v.iov_len) - v.iov_len, |
|---|
| 1400 | | - v.iov_len, 0); |
|---|
| 1401 | | - sum = csum_block_add(sum, next, off); |
|---|
| 1507 | + sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, |
|---|
| 1508 | + v.iov_base, v.iov_len, |
|---|
| 1509 | + sum, off); |
|---|
| 1402 | 1510 | off += v.iov_len; |
|---|
| 1403 | 1511 | }) |
|---|
| 1404 | 1512 | ) |
|---|
| .. | .. |
|---|
| 1408 | 1516 | } |
|---|
| 1409 | 1517 | EXPORT_SYMBOL(csum_and_copy_from_iter_full); |
|---|
| 1410 | 1518 | |
|---|
| 1411 | | -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, |
|---|
| 1519 | +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, |
|---|
| 1412 | 1520 | struct iov_iter *i) |
|---|
| 1413 | 1521 | { |
|---|
| 1522 | + struct csum_state *csstate = _csstate; |
|---|
| 1414 | 1523 | const char *from = addr; |
|---|
| 1415 | 1524 | __wsum sum, next; |
|---|
| 1416 | | - size_t off = 0; |
|---|
| 1417 | | - sum = *csum; |
|---|
| 1418 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1525 | + size_t off; |
|---|
| 1526 | + |
|---|
| 1527 | + if (unlikely(iov_iter_is_pipe(i))) |
|---|
| 1528 | + return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); |
|---|
| 1529 | + |
|---|
| 1530 | + sum = csstate->csum; |
|---|
| 1531 | + off = csstate->off; |
|---|
| 1532 | + if (unlikely(iov_iter_is_discard(i))) { |
|---|
| 1419 | 1533 | WARN_ON(1); /* for now */ |
|---|
| 1420 | 1534 | return 0; |
|---|
| 1421 | 1535 | } |
|---|
| 1422 | 1536 | iterate_and_advance(i, bytes, v, ({ |
|---|
| 1423 | | - int err = 0; |
|---|
| 1424 | 1537 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, |
|---|
| 1425 | 1538 | v.iov_base, |
|---|
| 1426 | | - v.iov_len, 0, &err); |
|---|
| 1427 | | - if (!err) { |
|---|
| 1539 | + v.iov_len); |
|---|
| 1540 | + if (next) { |
|---|
| 1428 | 1541 | sum = csum_block_add(sum, next, off); |
|---|
| 1429 | 1542 | off += v.iov_len; |
|---|
| 1430 | 1543 | } |
|---|
| 1431 | | - err ? v.iov_len : 0; |
|---|
| 1544 | + next ? 0 : v.iov_len; |
|---|
| 1432 | 1545 | }), ({ |
|---|
| 1433 | 1546 | char *p = kmap_atomic(v.bv_page); |
|---|
| 1434 | | - next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, |
|---|
| 1435 | | - p + v.bv_offset, |
|---|
| 1436 | | - v.bv_len, 0); |
|---|
| 1547 | + sum = csum_and_memcpy(p + v.bv_offset, |
|---|
| 1548 | + (from += v.bv_len) - v.bv_len, |
|---|
| 1549 | + v.bv_len, sum, off); |
|---|
| 1437 | 1550 | kunmap_atomic(p); |
|---|
| 1438 | | - sum = csum_block_add(sum, next, off); |
|---|
| 1439 | 1551 | off += v.bv_len; |
|---|
| 1440 | 1552 | }),({ |
|---|
| 1441 | | - next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, |
|---|
| 1442 | | - v.iov_base, |
|---|
| 1443 | | - v.iov_len, 0); |
|---|
| 1444 | | - sum = csum_block_add(sum, next, off); |
|---|
| 1553 | + sum = csum_and_memcpy(v.iov_base, |
|---|
| 1554 | + (from += v.iov_len) - v.iov_len, |
|---|
| 1555 | + v.iov_len, sum, off); |
|---|
| 1445 | 1556 | off += v.iov_len; |
|---|
| 1446 | 1557 | }) |
|---|
| 1447 | 1558 | ) |
|---|
| 1448 | | - *csum = sum; |
|---|
| 1559 | + csstate->csum = sum; |
|---|
| 1560 | + csstate->off = off; |
|---|
| 1449 | 1561 | return bytes; |
|---|
| 1450 | 1562 | } |
|---|
| 1451 | 1563 | EXPORT_SYMBOL(csum_and_copy_to_iter); |
|---|
| 1564 | + |
|---|
| 1565 | +size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, |
|---|
| 1566 | + struct iov_iter *i) |
|---|
| 1567 | +{ |
|---|
| 1568 | +#ifdef CONFIG_CRYPTO_HASH |
|---|
| 1569 | + struct ahash_request *hash = hashp; |
|---|
| 1570 | + struct scatterlist sg; |
|---|
| 1571 | + size_t copied; |
|---|
| 1572 | + |
|---|
| 1573 | + copied = copy_to_iter(addr, bytes, i); |
|---|
| 1574 | + sg_init_one(&sg, addr, copied); |
|---|
| 1575 | + ahash_request_set_crypt(hash, &sg, NULL, copied); |
|---|
| 1576 | + crypto_ahash_update(hash); |
|---|
| 1577 | + return copied; |
|---|
| 1578 | +#else |
|---|
| 1579 | + return 0; |
|---|
| 1580 | +#endif |
|---|
| 1581 | +} |
|---|
| 1582 | +EXPORT_SYMBOL(hash_and_copy_to_iter); |
|---|
| 1452 | 1583 | |
|---|
| 1453 | 1584 | int iov_iter_npages(const struct iov_iter *i, int maxpages) |
|---|
| 1454 | 1585 | { |
|---|
| .. | .. |
|---|
| 1457 | 1588 | |
|---|
| 1458 | 1589 | if (!size) |
|---|
| 1459 | 1590 | return 0; |
|---|
| 1591 | + if (unlikely(iov_iter_is_discard(i))) |
|---|
| 1592 | + return 0; |
|---|
| 1460 | 1593 | |
|---|
| 1461 | | - if (unlikely(i->type & ITER_PIPE)) { |
|---|
| 1594 | + if (unlikely(iov_iter_is_pipe(i))) { |
|---|
| 1462 | 1595 | struct pipe_inode_info *pipe = i->pipe; |
|---|
| 1596 | + unsigned int iter_head; |
|---|
| 1463 | 1597 | size_t off; |
|---|
| 1464 | | - int idx; |
|---|
| 1465 | 1598 | |
|---|
| 1466 | 1599 | if (!sanity(i)) |
|---|
| 1467 | 1600 | return 0; |
|---|
| 1468 | 1601 | |
|---|
| 1469 | | - data_start(i, &idx, &off); |
|---|
| 1602 | + data_start(i, &iter_head, &off); |
|---|
| 1470 | 1603 | /* some of this one + all after this one */ |
|---|
| 1471 | | - npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; |
|---|
| 1604 | + npages = pipe_space_for_user(iter_head, pipe->tail, pipe); |
|---|
| 1472 | 1605 | if (npages >= maxpages) |
|---|
| 1473 | 1606 | return maxpages; |
|---|
| 1474 | 1607 | } else iterate_all_kinds(i, size, v, ({ |
|---|
| .. | .. |
|---|
| 1496 | 1629 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) |
|---|
| 1497 | 1630 | { |
|---|
| 1498 | 1631 | *new = *old; |
|---|
| 1499 | | - if (unlikely(new->type & ITER_PIPE)) { |
|---|
| 1632 | + if (unlikely(iov_iter_is_pipe(new))) { |
|---|
| 1500 | 1633 | WARN_ON(1); |
|---|
| 1501 | 1634 | return NULL; |
|---|
| 1502 | 1635 | } |
|---|
| 1503 | | - if (new->type & ITER_BVEC) |
|---|
| 1636 | + if (unlikely(iov_iter_is_discard(new))) |
|---|
| 1637 | + return NULL; |
|---|
| 1638 | + if (iov_iter_is_bvec(new)) |
|---|
| 1504 | 1639 | return new->bvec = kmemdup(new->bvec, |
|---|
| 1505 | 1640 | new->nr_segs * sizeof(struct bio_vec), |
|---|
| 1506 | 1641 | flags); |
|---|
| .. | .. |
|---|
| 1512 | 1647 | } |
|---|
| 1513 | 1648 | EXPORT_SYMBOL(dup_iter); |
|---|
| 1514 | 1649 | |
|---|
| 1650 | +static int copy_compat_iovec_from_user(struct iovec *iov, |
|---|
| 1651 | + const struct iovec __user *uvec, unsigned long nr_segs) |
|---|
| 1652 | +{ |
|---|
| 1653 | + const struct compat_iovec __user *uiov = |
|---|
| 1654 | + (const struct compat_iovec __user *)uvec; |
|---|
| 1655 | + int ret = -EFAULT, i; |
|---|
| 1656 | + |
|---|
| 1657 | + if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) |
|---|
| 1658 | + return -EFAULT; |
|---|
| 1659 | + |
|---|
| 1660 | + for (i = 0; i < nr_segs; i++) { |
|---|
| 1661 | + compat_uptr_t buf; |
|---|
| 1662 | + compat_ssize_t len; |
|---|
| 1663 | + |
|---|
| 1664 | + unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); |
|---|
| 1665 | + unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); |
|---|
| 1666 | + |
|---|
| 1667 | + /* check for compat_size_t not fitting in compat_ssize_t .. */ |
|---|
| 1668 | + if (len < 0) { |
|---|
| 1669 | + ret = -EINVAL; |
|---|
| 1670 | + goto uaccess_end; |
|---|
| 1671 | + } |
|---|
| 1672 | + iov[i].iov_base = compat_ptr(buf); |
|---|
| 1673 | + iov[i].iov_len = len; |
|---|
| 1674 | + } |
|---|
| 1675 | + |
|---|
| 1676 | + ret = 0; |
|---|
| 1677 | +uaccess_end: |
|---|
| 1678 | + user_access_end(); |
|---|
| 1679 | + return ret; |
|---|
| 1680 | +} |
|---|
| 1681 | + |
|---|
| 1682 | +static int copy_iovec_from_user(struct iovec *iov, |
|---|
| 1683 | + const struct iovec __user *uvec, unsigned long nr_segs) |
|---|
| 1684 | +{ |
|---|
| 1685 | + unsigned long seg; |
|---|
| 1686 | + |
|---|
| 1687 | + if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) |
|---|
| 1688 | + return -EFAULT; |
|---|
| 1689 | + for (seg = 0; seg < nr_segs; seg++) { |
|---|
| 1690 | + if ((ssize_t)iov[seg].iov_len < 0) |
|---|
| 1691 | + return -EINVAL; |
|---|
| 1692 | + } |
|---|
| 1693 | + |
|---|
| 1694 | + return 0; |
|---|
| 1695 | +} |
|---|
| 1696 | + |
|---|
| 1697 | +struct iovec *iovec_from_user(const struct iovec __user *uvec, |
|---|
| 1698 | + unsigned long nr_segs, unsigned long fast_segs, |
|---|
| 1699 | + struct iovec *fast_iov, bool compat) |
|---|
| 1700 | +{ |
|---|
| 1701 | + struct iovec *iov = fast_iov; |
|---|
| 1702 | + int ret; |
|---|
| 1703 | + |
|---|
| 1704 | + /* |
|---|
| 1705 | + * SuS says "The readv() function *may* fail if the iovcnt argument was |
|---|
| 1706 | + * less than or equal to 0, or greater than {IOV_MAX}. Linux has |
|---|
| 1707 | + * traditionally returned zero for zero segments, so... |
|---|
| 1708 | + */ |
|---|
| 1709 | + if (nr_segs == 0) |
|---|
| 1710 | + return iov; |
|---|
| 1711 | + if (nr_segs > UIO_MAXIOV) |
|---|
| 1712 | + return ERR_PTR(-EINVAL); |
|---|
| 1713 | + if (nr_segs > fast_segs) { |
|---|
| 1714 | + iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); |
|---|
| 1715 | + if (!iov) |
|---|
| 1716 | + return ERR_PTR(-ENOMEM); |
|---|
| 1717 | + } |
|---|
| 1718 | + |
|---|
| 1719 | + if (compat) |
|---|
| 1720 | + ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); |
|---|
| 1721 | + else |
|---|
| 1722 | + ret = copy_iovec_from_user(iov, uvec, nr_segs); |
|---|
| 1723 | + if (ret) { |
|---|
| 1724 | + if (iov != fast_iov) |
|---|
| 1725 | + kfree(iov); |
|---|
| 1726 | + return ERR_PTR(ret); |
|---|
| 1727 | + } |
|---|
| 1728 | + |
|---|
| 1729 | + return iov; |
|---|
| 1730 | +} |
|---|
| 1731 | + |
|---|
| 1732 | +ssize_t __import_iovec(int type, const struct iovec __user *uvec, |
|---|
| 1733 | + unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, |
|---|
| 1734 | + struct iov_iter *i, bool compat) |
|---|
| 1735 | +{ |
|---|
| 1736 | + ssize_t total_len = 0; |
|---|
| 1737 | + unsigned long seg; |
|---|
| 1738 | + struct iovec *iov; |
|---|
| 1739 | + |
|---|
| 1740 | + iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); |
|---|
| 1741 | + if (IS_ERR(iov)) { |
|---|
| 1742 | + *iovp = NULL; |
|---|
| 1743 | + return PTR_ERR(iov); |
|---|
| 1744 | + } |
|---|
| 1745 | + |
|---|
| 1746 | + /* |
|---|
| 1747 | + * According to the Single Unix Specification we should return EINVAL if |
|---|
| 1748 | + * an element length is < 0 when cast to ssize_t or if the total length |
|---|
| 1749 | + * would overflow the ssize_t return value of the system call. |
|---|
| 1750 | + * |
|---|
| 1751 | + * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the |
|---|
| 1752 | + * overflow case. |
|---|
| 1753 | + */ |
|---|
| 1754 | + for (seg = 0; seg < nr_segs; seg++) { |
|---|
| 1755 | + ssize_t len = (ssize_t)iov[seg].iov_len; |
|---|
| 1756 | + |
|---|
| 1757 | + if (!access_ok(iov[seg].iov_base, len)) { |
|---|
| 1758 | + if (iov != *iovp) |
|---|
| 1759 | + kfree(iov); |
|---|
| 1760 | + *iovp = NULL; |
|---|
| 1761 | + return -EFAULT; |
|---|
| 1762 | + } |
|---|
| 1763 | + |
|---|
| 1764 | + if (len > MAX_RW_COUNT - total_len) { |
|---|
| 1765 | + len = MAX_RW_COUNT - total_len; |
|---|
| 1766 | + iov[seg].iov_len = len; |
|---|
| 1767 | + } |
|---|
| 1768 | + total_len += len; |
|---|
| 1769 | + } |
|---|
| 1770 | + |
|---|
| 1771 | + iov_iter_init(i, type, iov, nr_segs, total_len); |
|---|
| 1772 | + if (iov == *iovp) |
|---|
| 1773 | + *iovp = NULL; |
|---|
| 1774 | + else |
|---|
| 1775 | + *iovp = iov; |
|---|
| 1776 | + return total_len; |
|---|
| 1777 | +} |
|---|
| 1778 | + |
|---|
| 1515 | 1779 | /** |
|---|
| 1516 | 1780 | * import_iovec() - Copy an array of &struct iovec from userspace |
|---|
| 1517 | 1781 | * into the kernel, check that it is valid, and initialize a new |
|---|
| 1518 | 1782 | * &struct iov_iter iterator to access it. |
|---|
| 1519 | 1783 | * |
|---|
| 1520 | 1784 | * @type: One of %READ or %WRITE. |
|---|
| 1521 | | - * @uvector: Pointer to the userspace array. |
|---|
| 1785 | + * @uvec: Pointer to the userspace array. |
|---|
| 1522 | 1786 | * @nr_segs: Number of elements in userspace array. |
|---|
| 1523 | 1787 | * @fast_segs: Number of elements in @iov. |
|---|
| 1524 | | - * @iov: (input and output parameter) Pointer to pointer to (usually small |
|---|
| 1788 | + * @iovp: (input and output parameter) Pointer to pointer to (usually small |
|---|
| 1525 | 1789 | * on-stack) kernel array. |
|---|
| 1526 | 1790 | * @i: Pointer to iterator that will be initialized on success. |
|---|
| 1527 | 1791 | * |
|---|
| .. | .. |
|---|
| 1532 | 1796 | * on-stack array was used or not (and regardless of whether this function |
|---|
| 1533 | 1797 | * returns an error or not). |
|---|
| 1534 | 1798 | * |
|---|
| 1535 | | - * Return: 0 on success or negative error code on error. |
|---|
| 1799 | + * Return: Negative error code on error, bytes imported on success |
|---|
| 1536 | 1800 | */ |
|---|
| 1537 | | -int import_iovec(int type, const struct iovec __user * uvector, |
|---|
| 1801 | +ssize_t import_iovec(int type, const struct iovec __user *uvec, |
|---|
| 1538 | 1802 | unsigned nr_segs, unsigned fast_segs, |
|---|
| 1539 | | - struct iovec **iov, struct iov_iter *i) |
|---|
| 1803 | + struct iovec **iovp, struct iov_iter *i) |
|---|
| 1540 | 1804 | { |
|---|
| 1541 | | - ssize_t n; |
|---|
| 1542 | | - struct iovec *p; |
|---|
| 1543 | | - n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, |
|---|
| 1544 | | - *iov, &p); |
|---|
| 1545 | | - if (n < 0) { |
|---|
| 1546 | | - if (p != *iov) |
|---|
| 1547 | | - kfree(p); |
|---|
| 1548 | | - *iov = NULL; |
|---|
| 1549 | | - return n; |
|---|
| 1550 | | - } |
|---|
| 1551 | | - iov_iter_init(i, type, p, nr_segs, n); |
|---|
| 1552 | | - *iov = p == *iov ? NULL : p; |
|---|
| 1553 | | - return 0; |
|---|
| 1805 | + return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, |
|---|
| 1806 | + in_compat_syscall()); |
|---|
| 1554 | 1807 | } |
|---|
| 1555 | 1808 | EXPORT_SYMBOL(import_iovec); |
|---|
| 1556 | | - |
|---|
| 1557 | | -#ifdef CONFIG_COMPAT |
|---|
| 1558 | | -#include <linux/compat.h> |
|---|
| 1559 | | - |
|---|
| 1560 | | -int compat_import_iovec(int type, const struct compat_iovec __user * uvector, |
|---|
| 1561 | | - unsigned nr_segs, unsigned fast_segs, |
|---|
| 1562 | | - struct iovec **iov, struct iov_iter *i) |
|---|
| 1563 | | -{ |
|---|
| 1564 | | - ssize_t n; |
|---|
| 1565 | | - struct iovec *p; |
|---|
| 1566 | | - n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, |
|---|
| 1567 | | - *iov, &p); |
|---|
| 1568 | | - if (n < 0) { |
|---|
| 1569 | | - if (p != *iov) |
|---|
| 1570 | | - kfree(p); |
|---|
| 1571 | | - *iov = NULL; |
|---|
| 1572 | | - return n; |
|---|
| 1573 | | - } |
|---|
| 1574 | | - iov_iter_init(i, type, p, nr_segs, n); |
|---|
| 1575 | | - *iov = p == *iov ? NULL : p; |
|---|
| 1576 | | - return 0; |
|---|
| 1577 | | -} |
|---|
| 1578 | | -#endif |
|---|
| 1579 | 1809 | |
|---|
| 1580 | 1810 | int import_single_range(int rw, void __user *buf, size_t len, |
|---|
| 1581 | 1811 | struct iovec *iov, struct iov_iter *i) |
|---|
| 1582 | 1812 | { |
|---|
| 1583 | 1813 | if (len > MAX_RW_COUNT) |
|---|
| 1584 | 1814 | len = MAX_RW_COUNT; |
|---|
| 1585 | | - if (unlikely(!access_ok(!rw, buf, len))) |
|---|
| 1815 | + if (unlikely(!access_ok(buf, len))) |
|---|
| 1586 | 1816 | return -EFAULT; |
|---|
| 1587 | 1817 | |
|---|
| 1588 | 1818 | iov->iov_base = buf; |
|---|
| .. | .. |
|---|
| 1592 | 1822 | } |
|---|
| 1593 | 1823 | EXPORT_SYMBOL(import_single_range); |
|---|
| 1594 | 1824 | |
|---|
| 1595 | | -int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, |
|---|
| 1596 | | - int (*f)(struct kvec *vec, void *context), |
|---|
| 1597 | | - void *context) |
|---|
| 1825 | +/** |
|---|
| 1826 | + * iov_iter_restore() - Restore a &struct iov_iter to the same state as when |
|---|
| 1827 | + * iov_iter_save_state() was called. |
|---|
| 1828 | + * |
|---|
| 1829 | + * @i: &struct iov_iter to restore |
|---|
| 1830 | + * @state: state to restore from |
|---|
| 1831 | + * |
|---|
| 1832 | + * Used after iov_iter_save_state() to bring restore @i, if operations may |
|---|
| 1833 | + * have advanced it. |
|---|
| 1834 | + * |
|---|
| 1835 | + * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC |
|---|
| 1836 | + */ |
|---|
| 1837 | +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) |
|---|
| 1598 | 1838 | { |
|---|
| 1599 | | - struct kvec w; |
|---|
| 1600 | | - int err = -EINVAL; |
|---|
| 1601 | | - if (!bytes) |
|---|
| 1602 | | - return 0; |
|---|
| 1603 | | - |
|---|
| 1604 | | - iterate_all_kinds(i, bytes, v, -EINVAL, ({ |
|---|
| 1605 | | - w.iov_base = kmap(v.bv_page) + v.bv_offset; |
|---|
| 1606 | | - w.iov_len = v.bv_len; |
|---|
| 1607 | | - err = f(&w, context); |
|---|
| 1608 | | - kunmap(v.bv_page); |
|---|
| 1609 | | - err;}), ({ |
|---|
| 1610 | | - w = v; |
|---|
| 1611 | | - err = f(&w, context);}) |
|---|
| 1612 | | - ) |
|---|
| 1613 | | - return err; |
|---|
| 1839 | + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && |
|---|
| 1840 | + !iov_iter_is_kvec(i)) |
|---|
| 1841 | + return; |
|---|
| 1842 | + i->iov_offset = state->iov_offset; |
|---|
| 1843 | + i->count = state->count; |
|---|
| 1844 | + /* |
|---|
| 1845 | + * For the *vec iters, nr_segs + iov is constant - if we increment |
|---|
| 1846 | + * the vec, then we also decrement the nr_segs count. Hence we don't |
|---|
| 1847 | + * need to track both of these, just one is enough and we can deduct |
|---|
| 1848 | + * the other from that. ITER_KVEC and ITER_IOVEC are the same struct |
|---|
| 1849 | + * size, so we can just increment the iov pointer as they are unionzed. |
|---|
| 1850 | + * ITER_BVEC _may_ be the same size on some archs, but on others it is |
|---|
| 1851 | + * not. Be safe and handle it separately. |
|---|
| 1852 | + */ |
|---|
| 1853 | + BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); |
|---|
| 1854 | + if (iov_iter_is_bvec(i)) |
|---|
| 1855 | + i->bvec -= state->nr_segs - i->nr_segs; |
|---|
| 1856 | + else |
|---|
| 1857 | + i->iov -= state->nr_segs - i->nr_segs; |
|---|
| 1858 | + i->nr_segs = state->nr_segs; |
|---|
| 1614 | 1859 | } |
|---|
| 1615 | | -EXPORT_SYMBOL(iov_iter_for_each_range); |
|---|