.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
| 2 | +#include <crypto/hash.h> |
---|
1 | 3 | #include <linux/export.h> |
---|
2 | 4 | #include <linux/bvec.h> |
---|
| 5 | +#include <linux/fault-inject-usercopy.h> |
---|
3 | 6 | #include <linux/uio.h> |
---|
4 | 7 | #include <linux/pagemap.h> |
---|
5 | 8 | #include <linux/slab.h> |
---|
6 | 9 | #include <linux/vmalloc.h> |
---|
7 | 10 | #include <linux/splice.h> |
---|
| 11 | +#include <linux/compat.h> |
---|
8 | 12 | #include <net/checksum.h> |
---|
| 13 | +#include <linux/scatterlist.h> |
---|
| 14 | +#include <linux/instrumented.h> |
---|
9 | 15 | |
---|
10 | 16 | #define PIPE_PARANOIA /* for now */ |
---|
11 | 17 | |
---|
.. | .. |
---|
83 | 89 | const struct kvec *kvec; \ |
---|
84 | 90 | struct kvec v; \ |
---|
85 | 91 | iterate_kvec(i, n, v, kvec, skip, (K)) \ |
---|
| 92 | + } else if (unlikely(i->type & ITER_DISCARD)) { \ |
---|
86 | 93 | } else { \ |
---|
87 | 94 | const struct iovec *iov; \ |
---|
88 | 95 | struct iovec v; \ |
---|
.. | .. |
---|
114 | 121 | } \ |
---|
115 | 122 | i->nr_segs -= kvec - i->kvec; \ |
---|
116 | 123 | i->kvec = kvec; \ |
---|
| 124 | + } else if (unlikely(i->type & ITER_DISCARD)) { \ |
---|
| 125 | + skip += n; \ |
---|
117 | 126 | } else { \ |
---|
118 | 127 | const struct iovec *iov; \ |
---|
119 | 128 | struct iovec v; \ |
---|
.. | .. |
---|
132 | 141 | |
---|
133 | 142 | static int copyout(void __user *to, const void *from, size_t n) |
---|
134 | 143 | { |
---|
135 | | - if (access_ok(VERIFY_WRITE, to, n)) { |
---|
136 | | - kasan_check_read(from, n); |
---|
| 144 | + if (should_fail_usercopy()) |
---|
| 145 | + return n; |
---|
| 146 | + if (access_ok(to, n)) { |
---|
| 147 | + instrument_copy_to_user(to, from, n); |
---|
137 | 148 | n = raw_copy_to_user(to, from, n); |
---|
138 | 149 | } |
---|
139 | 150 | return n; |
---|
.. | .. |
---|
141 | 152 | |
---|
142 | 153 | static int copyin(void *to, const void __user *from, size_t n) |
---|
143 | 154 | { |
---|
144 | | - if (access_ok(VERIFY_READ, from, n)) { |
---|
145 | | - kasan_check_write(to, n); |
---|
| 155 | + if (should_fail_usercopy()) |
---|
| 156 | + return n; |
---|
| 157 | + if (access_ok(from, n)) { |
---|
| 158 | + instrument_copy_from_user(to, from, n); |
---|
146 | 159 | n = raw_copy_from_user(to, from, n); |
---|
147 | 160 | } |
---|
148 | 161 | return n; |
---|
.. | .. |
---|
320 | 333 | static bool sanity(const struct iov_iter *i) |
---|
321 | 334 | { |
---|
322 | 335 | struct pipe_inode_info *pipe = i->pipe; |
---|
323 | | - int idx = i->idx; |
---|
324 | | - int next = pipe->curbuf + pipe->nrbufs; |
---|
| 336 | + unsigned int p_head = pipe->head; |
---|
| 337 | + unsigned int p_tail = pipe->tail; |
---|
| 338 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 339 | + unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); |
---|
| 340 | + unsigned int i_head = i->head; |
---|
| 341 | + unsigned int idx; |
---|
| 342 | + |
---|
325 | 343 | if (i->iov_offset) { |
---|
326 | 344 | struct pipe_buffer *p; |
---|
327 | | - if (unlikely(!pipe->nrbufs)) |
---|
| 345 | + if (unlikely(p_occupancy == 0)) |
---|
328 | 346 | goto Bad; // pipe must be non-empty |
---|
329 | | - if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) |
---|
| 347 | + if (unlikely(i_head != p_head - 1)) |
---|
330 | 348 | goto Bad; // must be at the last buffer... |
---|
331 | 349 | |
---|
332 | | - p = &pipe->bufs[idx]; |
---|
| 350 | + p = &pipe->bufs[i_head & p_mask]; |
---|
333 | 351 | if (unlikely(p->offset + p->len != i->iov_offset)) |
---|
334 | 352 | goto Bad; // ... at the end of segment |
---|
335 | 353 | } else { |
---|
336 | | - if (idx != (next & (pipe->buffers - 1))) |
---|
| 354 | + if (i_head != p_head) |
---|
337 | 355 | goto Bad; // must be right after the last buffer |
---|
338 | 356 | } |
---|
339 | 357 | return true; |
---|
340 | 358 | Bad: |
---|
341 | | - printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); |
---|
342 | | - printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", |
---|
343 | | - pipe->curbuf, pipe->nrbufs, pipe->buffers); |
---|
344 | | - for (idx = 0; idx < pipe->buffers; idx++) |
---|
| 359 | + printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); |
---|
| 360 | + printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", |
---|
| 361 | + p_head, p_tail, pipe->ring_size); |
---|
| 362 | + for (idx = 0; idx < pipe->ring_size; idx++) |
---|
345 | 363 | printk(KERN_ERR "[%p %p %d %d]\n", |
---|
346 | 364 | pipe->bufs[idx].ops, |
---|
347 | 365 | pipe->bufs[idx].page, |
---|
.. | .. |
---|
354 | 372 | #define sanity(i) true |
---|
355 | 373 | #endif |
---|
356 | 374 | |
---|
357 | | -static inline int next_idx(int idx, struct pipe_inode_info *pipe) |
---|
358 | | -{ |
---|
359 | | - return (idx + 1) & (pipe->buffers - 1); |
---|
360 | | -} |
---|
361 | | - |
---|
362 | 375 | static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, |
---|
363 | 376 | struct iov_iter *i) |
---|
364 | 377 | { |
---|
365 | 378 | struct pipe_inode_info *pipe = i->pipe; |
---|
366 | 379 | struct pipe_buffer *buf; |
---|
| 380 | + unsigned int p_tail = pipe->tail; |
---|
| 381 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 382 | + unsigned int i_head = i->head; |
---|
367 | 383 | size_t off; |
---|
368 | | - int idx; |
---|
369 | 384 | |
---|
370 | 385 | if (unlikely(bytes > i->count)) |
---|
371 | 386 | bytes = i->count; |
---|
.. | .. |
---|
377 | 392 | return 0; |
---|
378 | 393 | |
---|
379 | 394 | off = i->iov_offset; |
---|
380 | | - idx = i->idx; |
---|
381 | | - buf = &pipe->bufs[idx]; |
---|
| 395 | + buf = &pipe->bufs[i_head & p_mask]; |
---|
382 | 396 | if (off) { |
---|
383 | 397 | if (offset == off && buf->page == page) { |
---|
384 | 398 | /* merge with the last one */ |
---|
.. | .. |
---|
386 | 400 | i->iov_offset += bytes; |
---|
387 | 401 | goto out; |
---|
388 | 402 | } |
---|
389 | | - idx = next_idx(idx, pipe); |
---|
390 | | - buf = &pipe->bufs[idx]; |
---|
| 403 | + i_head++; |
---|
| 404 | + buf = &pipe->bufs[i_head & p_mask]; |
---|
391 | 405 | } |
---|
392 | | - if (idx == pipe->curbuf && pipe->nrbufs) |
---|
| 406 | + if (pipe_full(i_head, p_tail, pipe->max_usage)) |
---|
393 | 407 | return 0; |
---|
394 | | - pipe->nrbufs++; |
---|
| 408 | + |
---|
395 | 409 | buf->ops = &page_cache_pipe_buf_ops; |
---|
396 | 410 | buf->flags = 0; |
---|
397 | | - get_page(buf->page = page); |
---|
| 411 | + get_page(page); |
---|
| 412 | + buf->page = page; |
---|
398 | 413 | buf->offset = offset; |
---|
399 | 414 | buf->len = bytes; |
---|
| 415 | + |
---|
| 416 | + pipe->head = i_head + 1; |
---|
400 | 417 | i->iov_offset = offset + bytes; |
---|
401 | | - i->idx = idx; |
---|
| 418 | + i->head = i_head; |
---|
402 | 419 | out: |
---|
403 | 420 | i->count -= bytes; |
---|
404 | 421 | return bytes; |
---|
.. | .. |
---|
429 | 446 | } |
---|
430 | 447 | EXPORT_SYMBOL(iov_iter_fault_in_readable); |
---|
431 | 448 | |
---|
432 | | -void iov_iter_init(struct iov_iter *i, int direction, |
---|
| 449 | +void iov_iter_init(struct iov_iter *i, unsigned int direction, |
---|
433 | 450 | const struct iovec *iov, unsigned long nr_segs, |
---|
434 | 451 | size_t count) |
---|
435 | 452 | { |
---|
| 453 | + WARN_ON(direction & ~(READ | WRITE)); |
---|
| 454 | + direction &= READ | WRITE; |
---|
| 455 | + |
---|
436 | 456 | /* It will get better. Eventually... */ |
---|
437 | 457 | if (uaccess_kernel()) { |
---|
438 | | - direction |= ITER_KVEC; |
---|
439 | | - i->type = direction; |
---|
| 458 | + i->type = ITER_KVEC | direction; |
---|
440 | 459 | i->kvec = (struct kvec *)iov; |
---|
441 | 460 | } else { |
---|
442 | | - i->type = direction; |
---|
| 461 | + i->type = ITER_IOVEC | direction; |
---|
443 | 462 | i->iov = iov; |
---|
444 | 463 | } |
---|
445 | 464 | i->nr_segs = nr_segs; |
---|
.. | .. |
---|
474 | 493 | return buf->ops == &default_pipe_buf_ops; |
---|
475 | 494 | } |
---|
476 | 495 | |
---|
477 | | -static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) |
---|
| 496 | +static inline void data_start(const struct iov_iter *i, |
---|
| 497 | + unsigned int *iter_headp, size_t *offp) |
---|
478 | 498 | { |
---|
| 499 | + unsigned int p_mask = i->pipe->ring_size - 1; |
---|
| 500 | + unsigned int iter_head = i->head; |
---|
479 | 501 | size_t off = i->iov_offset; |
---|
480 | | - int idx = i->idx; |
---|
481 | | - if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { |
---|
482 | | - idx = next_idx(idx, i->pipe); |
---|
| 502 | + |
---|
| 503 | + if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || |
---|
| 504 | + off == PAGE_SIZE)) { |
---|
| 505 | + iter_head++; |
---|
483 | 506 | off = 0; |
---|
484 | 507 | } |
---|
485 | | - *idxp = idx; |
---|
| 508 | + *iter_headp = iter_head; |
---|
486 | 509 | *offp = off; |
---|
487 | 510 | } |
---|
488 | 511 | |
---|
489 | 512 | static size_t push_pipe(struct iov_iter *i, size_t size, |
---|
490 | | - int *idxp, size_t *offp) |
---|
| 513 | + int *iter_headp, size_t *offp) |
---|
491 | 514 | { |
---|
492 | 515 | struct pipe_inode_info *pipe = i->pipe; |
---|
| 516 | + unsigned int p_tail = pipe->tail; |
---|
| 517 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 518 | + unsigned int iter_head; |
---|
493 | 519 | size_t off; |
---|
494 | | - int idx; |
---|
495 | 520 | ssize_t left; |
---|
496 | 521 | |
---|
497 | 522 | if (unlikely(size > i->count)) |
---|
.. | .. |
---|
500 | 525 | return 0; |
---|
501 | 526 | |
---|
502 | 527 | left = size; |
---|
503 | | - data_start(i, &idx, &off); |
---|
504 | | - *idxp = idx; |
---|
| 528 | + data_start(i, &iter_head, &off); |
---|
| 529 | + *iter_headp = iter_head; |
---|
505 | 530 | *offp = off; |
---|
506 | 531 | if (off) { |
---|
507 | 532 | left -= PAGE_SIZE - off; |
---|
508 | 533 | if (left <= 0) { |
---|
509 | | - pipe->bufs[idx].len += size; |
---|
| 534 | + pipe->bufs[iter_head & p_mask].len += size; |
---|
510 | 535 | return size; |
---|
511 | 536 | } |
---|
512 | | - pipe->bufs[idx].len = PAGE_SIZE; |
---|
513 | | - idx = next_idx(idx, pipe); |
---|
| 537 | + pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; |
---|
| 538 | + iter_head++; |
---|
514 | 539 | } |
---|
515 | | - while (idx != pipe->curbuf || !pipe->nrbufs) { |
---|
| 540 | + while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { |
---|
| 541 | + struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; |
---|
516 | 542 | struct page *page = alloc_page(GFP_USER); |
---|
517 | 543 | if (!page) |
---|
518 | 544 | break; |
---|
519 | | - pipe->nrbufs++; |
---|
520 | | - pipe->bufs[idx].ops = &default_pipe_buf_ops; |
---|
521 | | - pipe->bufs[idx].flags = 0; |
---|
522 | | - pipe->bufs[idx].page = page; |
---|
523 | | - pipe->bufs[idx].offset = 0; |
---|
524 | | - if (left <= PAGE_SIZE) { |
---|
525 | | - pipe->bufs[idx].len = left; |
---|
| 545 | + |
---|
| 546 | + buf->ops = &default_pipe_buf_ops; |
---|
| 547 | + buf->flags = 0; |
---|
| 548 | + buf->page = page; |
---|
| 549 | + buf->offset = 0; |
---|
| 550 | + buf->len = min_t(ssize_t, left, PAGE_SIZE); |
---|
| 551 | + left -= buf->len; |
---|
| 552 | + iter_head++; |
---|
| 553 | + pipe->head = iter_head; |
---|
| 554 | + |
---|
| 555 | + if (left == 0) |
---|
526 | 556 | return size; |
---|
527 | | - } |
---|
528 | | - pipe->bufs[idx].len = PAGE_SIZE; |
---|
529 | | - left -= PAGE_SIZE; |
---|
530 | | - idx = next_idx(idx, pipe); |
---|
531 | 557 | } |
---|
532 | 558 | return size - left; |
---|
533 | 559 | } |
---|
.. | .. |
---|
536 | 562 | struct iov_iter *i) |
---|
537 | 563 | { |
---|
538 | 564 | struct pipe_inode_info *pipe = i->pipe; |
---|
| 565 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 566 | + unsigned int i_head; |
---|
539 | 567 | size_t n, off; |
---|
540 | | - int idx; |
---|
541 | 568 | |
---|
542 | 569 | if (!sanity(i)) |
---|
543 | 570 | return 0; |
---|
544 | 571 | |
---|
545 | | - bytes = n = push_pipe(i, bytes, &idx, &off); |
---|
| 572 | + bytes = n = push_pipe(i, bytes, &i_head, &off); |
---|
546 | 573 | if (unlikely(!n)) |
---|
547 | 574 | return 0; |
---|
548 | | - for ( ; n; idx = next_idx(idx, pipe), off = 0) { |
---|
| 575 | + do { |
---|
549 | 576 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); |
---|
550 | | - memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); |
---|
551 | | - i->idx = idx; |
---|
| 577 | + memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); |
---|
| 578 | + i->head = i_head; |
---|
552 | 579 | i->iov_offset = off + chunk; |
---|
553 | 580 | n -= chunk; |
---|
554 | 581 | addr += chunk; |
---|
555 | | - } |
---|
| 582 | + off = 0; |
---|
| 583 | + i_head++; |
---|
| 584 | + } while (n); |
---|
556 | 585 | i->count -= bytes; |
---|
| 586 | + return bytes; |
---|
| 587 | +} |
---|
| 588 | + |
---|
| 589 | +static __wsum csum_and_memcpy(void *to, const void *from, size_t len, |
---|
| 590 | + __wsum sum, size_t off) |
---|
| 591 | +{ |
---|
| 592 | + __wsum next = csum_partial_copy_nocheck(from, to, len); |
---|
| 593 | + return csum_block_add(sum, next, off); |
---|
| 594 | +} |
---|
| 595 | + |
---|
| 596 | +static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, |
---|
| 597 | + struct csum_state *csstate, |
---|
| 598 | + struct iov_iter *i) |
---|
| 599 | +{ |
---|
| 600 | + struct pipe_inode_info *pipe = i->pipe; |
---|
| 601 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 602 | + __wsum sum = csstate->csum; |
---|
| 603 | + size_t off = csstate->off; |
---|
| 604 | + unsigned int i_head; |
---|
| 605 | + size_t n, r; |
---|
| 606 | + |
---|
| 607 | + if (!sanity(i)) |
---|
| 608 | + return 0; |
---|
| 609 | + |
---|
| 610 | + bytes = n = push_pipe(i, bytes, &i_head, &r); |
---|
| 611 | + if (unlikely(!n)) |
---|
| 612 | + return 0; |
---|
| 613 | + do { |
---|
| 614 | + size_t chunk = min_t(size_t, n, PAGE_SIZE - r); |
---|
| 615 | + char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); |
---|
| 616 | + sum = csum_and_memcpy(p + r, addr, chunk, sum, off); |
---|
| 617 | + kunmap_atomic(p); |
---|
| 618 | + i->head = i_head; |
---|
| 619 | + i->iov_offset = r + chunk; |
---|
| 620 | + n -= chunk; |
---|
| 621 | + off += chunk; |
---|
| 622 | + addr += chunk; |
---|
| 623 | + r = 0; |
---|
| 624 | + i_head++; |
---|
| 625 | + } while (n); |
---|
| 626 | + i->count -= bytes; |
---|
| 627 | + csstate->csum = sum; |
---|
| 628 | + csstate->off = off; |
---|
557 | 629 | return bytes; |
---|
558 | 630 | } |
---|
559 | 631 | |
---|
560 | 632 | size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
---|
561 | 633 | { |
---|
562 | 634 | const char *from = addr; |
---|
563 | | - if (unlikely(i->type & ITER_PIPE)) |
---|
| 635 | + if (unlikely(iov_iter_is_pipe(i))) |
---|
564 | 636 | return copy_pipe_to_iter(addr, bytes, i); |
---|
565 | 637 | if (iter_is_iovec(i)) |
---|
566 | 638 | might_fault(); |
---|
.. | .. |
---|
575 | 647 | } |
---|
576 | 648 | EXPORT_SYMBOL(_copy_to_iter); |
---|
577 | 649 | |
---|
578 | | -#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE |
---|
579 | | -static int copyout_mcsafe(void __user *to, const void *from, size_t n) |
---|
| 650 | +#ifdef CONFIG_ARCH_HAS_COPY_MC |
---|
| 651 | +static int copyout_mc(void __user *to, const void *from, size_t n) |
---|
580 | 652 | { |
---|
581 | | - if (access_ok(VERIFY_WRITE, to, n)) { |
---|
582 | | - kasan_check_read(from, n); |
---|
583 | | - n = copy_to_user_mcsafe((__force void *) to, from, n); |
---|
| 653 | + if (access_ok(to, n)) { |
---|
| 654 | + instrument_copy_to_user(to, from, n); |
---|
| 655 | + n = copy_mc_to_user((__force void *) to, from, n); |
---|
584 | 656 | } |
---|
585 | 657 | return n; |
---|
586 | 658 | } |
---|
587 | 659 | |
---|
588 | | -static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, |
---|
| 660 | +static unsigned long copy_mc_to_page(struct page *page, size_t offset, |
---|
589 | 661 | const char *from, size_t len) |
---|
590 | 662 | { |
---|
591 | 663 | unsigned long ret; |
---|
592 | 664 | char *to; |
---|
593 | 665 | |
---|
594 | 666 | to = kmap_atomic(page); |
---|
595 | | - ret = memcpy_mcsafe(to + offset, from, len); |
---|
| 667 | + ret = copy_mc_to_kernel(to + offset, from, len); |
---|
596 | 668 | kunmap_atomic(to); |
---|
597 | 669 | |
---|
598 | 670 | return ret; |
---|
599 | 671 | } |
---|
600 | 672 | |
---|
601 | | -static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, |
---|
| 673 | +static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, |
---|
602 | 674 | struct iov_iter *i) |
---|
603 | 675 | { |
---|
604 | 676 | struct pipe_inode_info *pipe = i->pipe; |
---|
| 677 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 678 | + unsigned int i_head; |
---|
605 | 679 | size_t n, off, xfer = 0; |
---|
606 | | - int idx; |
---|
607 | 680 | |
---|
608 | 681 | if (!sanity(i)) |
---|
609 | 682 | return 0; |
---|
610 | 683 | |
---|
611 | | - bytes = n = push_pipe(i, bytes, &idx, &off); |
---|
| 684 | + bytes = n = push_pipe(i, bytes, &i_head, &off); |
---|
612 | 685 | if (unlikely(!n)) |
---|
613 | 686 | return 0; |
---|
614 | | - for ( ; n; idx = next_idx(idx, pipe), off = 0) { |
---|
| 687 | + do { |
---|
615 | 688 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); |
---|
616 | 689 | unsigned long rem; |
---|
617 | 690 | |
---|
618 | | - rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, |
---|
619 | | - chunk); |
---|
620 | | - i->idx = idx; |
---|
| 691 | + rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, |
---|
| 692 | + off, addr, chunk); |
---|
| 693 | + i->head = i_head; |
---|
621 | 694 | i->iov_offset = off + chunk - rem; |
---|
622 | 695 | xfer += chunk - rem; |
---|
623 | 696 | if (rem) |
---|
624 | 697 | break; |
---|
625 | 698 | n -= chunk; |
---|
626 | 699 | addr += chunk; |
---|
627 | | - } |
---|
| 700 | + off = 0; |
---|
| 701 | + i_head++; |
---|
| 702 | + } while (n); |
---|
628 | 703 | i->count -= xfer; |
---|
629 | 704 | return xfer; |
---|
630 | 705 | } |
---|
631 | 706 | |
---|
632 | 707 | /** |
---|
633 | | - * _copy_to_iter_mcsafe - copy to user with source-read error exception handling |
---|
| 708 | + * _copy_mc_to_iter - copy to iter with source memory error exception handling |
---|
634 | 709 | * @addr: source kernel address |
---|
635 | 710 | * @bytes: total transfer length |
---|
636 | 711 | * @iter: destination iterator |
---|
637 | 712 | * |
---|
638 | | - * The pmem driver arranges for filesystem-dax to use this facility via |
---|
639 | | - * dax_copy_to_iter() for protecting read/write to persistent memory. |
---|
640 | | - * Unless / until an architecture can guarantee identical performance |
---|
641 | | - * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a |
---|
642 | | - * performance regression to switch more users to the mcsafe version. |
---|
| 713 | + * The pmem driver deploys this for the dax operation |
---|
| 714 | + * (dax_copy_to_iter()) for dax reads (bypass page-cache and the |
---|
| 715 | + * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes |
---|
| 716 | + * successfully copied. |
---|
643 | 717 | * |
---|
644 | | - * Otherwise, the main differences between this and typical _copy_to_iter(). |
---|
| 718 | + * The main differences between this and typical _copy_to_iter(). |
---|
645 | 719 | * |
---|
646 | 720 | * * Typical tail/residue handling after a fault retries the copy |
---|
647 | 721 | * byte-by-byte until the fault happens again. Re-triggering machine |
---|
.. | .. |
---|
652 | 726 | * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. |
---|
653 | 727 | * Compare to copy_to_iter() where only ITER_IOVEC attempts might return |
---|
654 | 728 | * a short copy. |
---|
655 | | - * |
---|
656 | | - * See MCSAFE_TEST for self-test. |
---|
657 | 729 | */ |
---|
658 | | -size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) |
---|
| 730 | +size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
---|
659 | 731 | { |
---|
660 | 732 | const char *from = addr; |
---|
661 | 733 | unsigned long rem, curr_addr, s_addr = (unsigned long) addr; |
---|
662 | 734 | |
---|
663 | | - if (unlikely(i->type & ITER_PIPE)) |
---|
664 | | - return copy_pipe_to_iter_mcsafe(addr, bytes, i); |
---|
| 735 | + if (unlikely(iov_iter_is_pipe(i))) |
---|
| 736 | + return copy_mc_pipe_to_iter(addr, bytes, i); |
---|
665 | 737 | if (iter_is_iovec(i)) |
---|
666 | 738 | might_fault(); |
---|
667 | 739 | iterate_and_advance(i, bytes, v, |
---|
668 | | - copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), |
---|
| 740 | + copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, |
---|
| 741 | + v.iov_len), |
---|
669 | 742 | ({ |
---|
670 | | - rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, |
---|
671 | | - (from += v.bv_len) - v.bv_len, v.bv_len); |
---|
| 743 | + rem = copy_mc_to_page(v.bv_page, v.bv_offset, |
---|
| 744 | + (from += v.bv_len) - v.bv_len, v.bv_len); |
---|
672 | 745 | if (rem) { |
---|
673 | 746 | curr_addr = (unsigned long) from; |
---|
674 | 747 | bytes = curr_addr - s_addr - rem; |
---|
.. | .. |
---|
676 | 749 | } |
---|
677 | 750 | }), |
---|
678 | 751 | ({ |
---|
679 | | - rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, |
---|
680 | | - v.iov_len); |
---|
| 752 | + rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) |
---|
| 753 | + - v.iov_len, v.iov_len); |
---|
681 | 754 | if (rem) { |
---|
682 | 755 | curr_addr = (unsigned long) from; |
---|
683 | 756 | bytes = curr_addr - s_addr - rem; |
---|
.. | .. |
---|
688 | 761 | |
---|
689 | 762 | return bytes; |
---|
690 | 763 | } |
---|
691 | | -EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); |
---|
692 | | -#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ |
---|
| 764 | +EXPORT_SYMBOL_GPL(_copy_mc_to_iter); |
---|
| 765 | +#endif /* CONFIG_ARCH_HAS_COPY_MC */ |
---|
693 | 766 | |
---|
694 | 767 | size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) |
---|
695 | 768 | { |
---|
696 | 769 | char *to = addr; |
---|
697 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 770 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
698 | 771 | WARN_ON(1); |
---|
699 | 772 | return 0; |
---|
700 | 773 | } |
---|
.. | .. |
---|
714 | 787 | bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) |
---|
715 | 788 | { |
---|
716 | 789 | char *to = addr; |
---|
717 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 790 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
718 | 791 | WARN_ON(1); |
---|
719 | 792 | return false; |
---|
720 | 793 | } |
---|
.. | .. |
---|
741 | 814 | size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
---|
742 | 815 | { |
---|
743 | 816 | char *to = addr; |
---|
744 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 817 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
745 | 818 | WARN_ON(1); |
---|
746 | 819 | return 0; |
---|
747 | 820 | } |
---|
.. | .. |
---|
775 | 848 | size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) |
---|
776 | 849 | { |
---|
777 | 850 | char *to = addr; |
---|
778 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 851 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
779 | 852 | WARN_ON(1); |
---|
780 | 853 | return 0; |
---|
781 | 854 | } |
---|
.. | .. |
---|
796 | 869 | bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) |
---|
797 | 870 | { |
---|
798 | 871 | char *to = addr; |
---|
799 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 872 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
800 | 873 | WARN_ON(1); |
---|
801 | 874 | return false; |
---|
802 | 875 | } |
---|
.. | .. |
---|
835 | 908 | head = compound_head(page); |
---|
836 | 909 | v += (page - head) << PAGE_SHIFT; |
---|
837 | 910 | |
---|
838 | | - if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head)))) |
---|
| 911 | + if (likely(n <= v && v <= (page_size(head)))) |
---|
839 | 912 | return true; |
---|
840 | 913 | WARN_ON(1); |
---|
841 | 914 | return false; |
---|
.. | .. |
---|
851 | 924 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); |
---|
852 | 925 | kunmap_atomic(kaddr); |
---|
853 | 926 | return wanted; |
---|
854 | | - } else if (likely(!(i->type & ITER_PIPE))) |
---|
| 927 | + } else if (unlikely(iov_iter_is_discard(i))) { |
---|
| 928 | + if (unlikely(i->count < bytes)) |
---|
| 929 | + bytes = i->count; |
---|
| 930 | + i->count -= bytes; |
---|
| 931 | + return bytes; |
---|
| 932 | + } else if (likely(!iov_iter_is_pipe(i))) |
---|
855 | 933 | return copy_page_to_iter_iovec(page, offset, bytes, i); |
---|
856 | 934 | else |
---|
857 | 935 | return copy_page_to_iter_pipe(page, offset, bytes, i); |
---|
.. | .. |
---|
863 | 941 | { |
---|
864 | 942 | if (unlikely(!page_copy_sane(page, offset, bytes))) |
---|
865 | 943 | return 0; |
---|
866 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 944 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
---|
867 | 945 | WARN_ON(1); |
---|
868 | 946 | return 0; |
---|
869 | 947 | } |
---|
.. | .. |
---|
880 | 958 | static size_t pipe_zero(size_t bytes, struct iov_iter *i) |
---|
881 | 959 | { |
---|
882 | 960 | struct pipe_inode_info *pipe = i->pipe; |
---|
| 961 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 962 | + unsigned int i_head; |
---|
883 | 963 | size_t n, off; |
---|
884 | | - int idx; |
---|
885 | 964 | |
---|
886 | 965 | if (!sanity(i)) |
---|
887 | 966 | return 0; |
---|
888 | 967 | |
---|
889 | | - bytes = n = push_pipe(i, bytes, &idx, &off); |
---|
| 968 | + bytes = n = push_pipe(i, bytes, &i_head, &off); |
---|
890 | 969 | if (unlikely(!n)) |
---|
891 | 970 | return 0; |
---|
892 | 971 | |
---|
893 | | - for ( ; n; idx = next_idx(idx, pipe), off = 0) { |
---|
| 972 | + do { |
---|
894 | 973 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); |
---|
895 | | - memzero_page(pipe->bufs[idx].page, off, chunk); |
---|
896 | | - i->idx = idx; |
---|
| 974 | + memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); |
---|
| 975 | + i->head = i_head; |
---|
897 | 976 | i->iov_offset = off + chunk; |
---|
898 | 977 | n -= chunk; |
---|
899 | | - } |
---|
| 978 | + off = 0; |
---|
| 979 | + i_head++; |
---|
| 980 | + } while (n); |
---|
900 | 981 | i->count -= bytes; |
---|
901 | 982 | return bytes; |
---|
902 | 983 | } |
---|
903 | 984 | |
---|
904 | 985 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) |
---|
905 | 986 | { |
---|
906 | | - if (unlikely(i->type & ITER_PIPE)) |
---|
| 987 | + if (unlikely(iov_iter_is_pipe(i))) |
---|
907 | 988 | return pipe_zero(bytes, i); |
---|
908 | 989 | iterate_and_advance(i, bytes, v, |
---|
909 | 990 | clear_user(v.iov_base, v.iov_len), |
---|
.. | .. |
---|
923 | 1004 | kunmap_atomic(kaddr); |
---|
924 | 1005 | return 0; |
---|
925 | 1006 | } |
---|
926 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1007 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
---|
927 | 1008 | kunmap_atomic(kaddr); |
---|
928 | 1009 | WARN_ON(1); |
---|
929 | 1010 | return 0; |
---|
.. | .. |
---|
942 | 1023 | static inline void pipe_truncate(struct iov_iter *i) |
---|
943 | 1024 | { |
---|
944 | 1025 | struct pipe_inode_info *pipe = i->pipe; |
---|
945 | | - if (pipe->nrbufs) { |
---|
| 1026 | + unsigned int p_tail = pipe->tail; |
---|
| 1027 | + unsigned int p_head = pipe->head; |
---|
| 1028 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 1029 | + |
---|
| 1030 | + if (!pipe_empty(p_head, p_tail)) { |
---|
| 1031 | + struct pipe_buffer *buf; |
---|
| 1032 | + unsigned int i_head = i->head; |
---|
946 | 1033 | size_t off = i->iov_offset; |
---|
947 | | - int idx = i->idx; |
---|
948 | | - int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); |
---|
| 1034 | + |
---|
949 | 1035 | if (off) { |
---|
950 | | - pipe->bufs[idx].len = off - pipe->bufs[idx].offset; |
---|
951 | | - idx = next_idx(idx, pipe); |
---|
952 | | - nrbufs++; |
---|
| 1036 | + buf = &pipe->bufs[i_head & p_mask]; |
---|
| 1037 | + buf->len = off - buf->offset; |
---|
| 1038 | + i_head++; |
---|
953 | 1039 | } |
---|
954 | | - while (pipe->nrbufs > nrbufs) { |
---|
955 | | - pipe_buf_release(pipe, &pipe->bufs[idx]); |
---|
956 | | - idx = next_idx(idx, pipe); |
---|
957 | | - pipe->nrbufs--; |
---|
| 1040 | + while (p_head != i_head) { |
---|
| 1041 | + p_head--; |
---|
| 1042 | + pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); |
---|
958 | 1043 | } |
---|
| 1044 | + |
---|
| 1045 | + pipe->head = p_head; |
---|
959 | 1046 | } |
---|
960 | 1047 | } |
---|
961 | 1048 | |
---|
.. | .. |
---|
966 | 1053 | size = i->count; |
---|
967 | 1054 | if (size) { |
---|
968 | 1055 | struct pipe_buffer *buf; |
---|
| 1056 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 1057 | + unsigned int i_head = i->head; |
---|
969 | 1058 | size_t off = i->iov_offset, left = size; |
---|
970 | | - int idx = i->idx; |
---|
| 1059 | + |
---|
971 | 1060 | if (off) /* make it relative to the beginning of buffer */ |
---|
972 | | - left += off - pipe->bufs[idx].offset; |
---|
| 1061 | + left += off - pipe->bufs[i_head & p_mask].offset; |
---|
973 | 1062 | while (1) { |
---|
974 | | - buf = &pipe->bufs[idx]; |
---|
| 1063 | + buf = &pipe->bufs[i_head & p_mask]; |
---|
975 | 1064 | if (left <= buf->len) |
---|
976 | 1065 | break; |
---|
977 | 1066 | left -= buf->len; |
---|
978 | | - idx = next_idx(idx, pipe); |
---|
| 1067 | + i_head++; |
---|
979 | 1068 | } |
---|
980 | | - i->idx = idx; |
---|
| 1069 | + i->head = i_head; |
---|
981 | 1070 | i->iov_offset = buf->offset + left; |
---|
982 | 1071 | } |
---|
983 | 1072 | i->count -= size; |
---|
.. | .. |
---|
987 | 1076 | |
---|
988 | 1077 | void iov_iter_advance(struct iov_iter *i, size_t size) |
---|
989 | 1078 | { |
---|
990 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1079 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
991 | 1080 | pipe_advance(i, size); |
---|
| 1081 | + return; |
---|
| 1082 | + } |
---|
| 1083 | + if (unlikely(iov_iter_is_discard(i))) { |
---|
| 1084 | + i->count -= size; |
---|
992 | 1085 | return; |
---|
993 | 1086 | } |
---|
994 | 1087 | iterate_and_advance(i, size, v, 0, 0, 0) |
---|
.. | .. |
---|
1002 | 1095 | if (WARN_ON(unroll > MAX_RW_COUNT)) |
---|
1003 | 1096 | return; |
---|
1004 | 1097 | i->count += unroll; |
---|
1005 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1098 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
1006 | 1099 | struct pipe_inode_info *pipe = i->pipe; |
---|
1007 | | - int idx = i->idx; |
---|
| 1100 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 1101 | + unsigned int i_head = i->head; |
---|
1008 | 1102 | size_t off = i->iov_offset; |
---|
1009 | 1103 | while (1) { |
---|
1010 | | - size_t n = off - pipe->bufs[idx].offset; |
---|
| 1104 | + struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; |
---|
| 1105 | + size_t n = off - b->offset; |
---|
1011 | 1106 | if (unroll < n) { |
---|
1012 | 1107 | off -= unroll; |
---|
1013 | 1108 | break; |
---|
1014 | 1109 | } |
---|
1015 | 1110 | unroll -= n; |
---|
1016 | | - if (!unroll && idx == i->start_idx) { |
---|
| 1111 | + if (!unroll && i_head == i->start_head) { |
---|
1017 | 1112 | off = 0; |
---|
1018 | 1113 | break; |
---|
1019 | 1114 | } |
---|
1020 | | - if (!idx--) |
---|
1021 | | - idx = pipe->buffers - 1; |
---|
1022 | | - off = pipe->bufs[idx].offset + pipe->bufs[idx].len; |
---|
| 1115 | + i_head--; |
---|
| 1116 | + b = &pipe->bufs[i_head & p_mask]; |
---|
| 1117 | + off = b->offset + b->len; |
---|
1023 | 1118 | } |
---|
1024 | 1119 | i->iov_offset = off; |
---|
1025 | | - i->idx = idx; |
---|
| 1120 | + i->head = i_head; |
---|
1026 | 1121 | pipe_truncate(i); |
---|
1027 | 1122 | return; |
---|
1028 | 1123 | } |
---|
| 1124 | + if (unlikely(iov_iter_is_discard(i))) |
---|
| 1125 | + return; |
---|
1029 | 1126 | if (unroll <= i->iov_offset) { |
---|
1030 | 1127 | i->iov_offset -= unroll; |
---|
1031 | 1128 | return; |
---|
1032 | 1129 | } |
---|
1033 | 1130 | unroll -= i->iov_offset; |
---|
1034 | | - if (i->type & ITER_BVEC) { |
---|
| 1131 | + if (iov_iter_is_bvec(i)) { |
---|
1035 | 1132 | const struct bio_vec *bvec = i->bvec; |
---|
1036 | 1133 | while (1) { |
---|
1037 | 1134 | size_t n = (--bvec)->bv_len; |
---|
.. | .. |
---|
1064 | 1161 | */ |
---|
1065 | 1162 | size_t iov_iter_single_seg_count(const struct iov_iter *i) |
---|
1066 | 1163 | { |
---|
1067 | | - if (unlikely(i->type & ITER_PIPE)) |
---|
| 1164 | + if (unlikely(iov_iter_is_pipe(i))) |
---|
1068 | 1165 | return i->count; // it is a silly place, anyway |
---|
1069 | 1166 | if (i->nr_segs == 1) |
---|
1070 | 1167 | return i->count; |
---|
1071 | | - else if (i->type & ITER_BVEC) |
---|
| 1168 | + if (unlikely(iov_iter_is_discard(i))) |
---|
| 1169 | + return i->count; |
---|
| 1170 | + else if (iov_iter_is_bvec(i)) |
---|
1072 | 1171 | return min(i->count, i->bvec->bv_len - i->iov_offset); |
---|
1073 | 1172 | else |
---|
1074 | 1173 | return min(i->count, i->iov->iov_len - i->iov_offset); |
---|
1075 | 1174 | } |
---|
1076 | 1175 | EXPORT_SYMBOL(iov_iter_single_seg_count); |
---|
1077 | 1176 | |
---|
1078 | | -void iov_iter_kvec(struct iov_iter *i, int direction, |
---|
| 1177 | +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, |
---|
1079 | 1178 | const struct kvec *kvec, unsigned long nr_segs, |
---|
1080 | 1179 | size_t count) |
---|
1081 | 1180 | { |
---|
1082 | | - BUG_ON(!(direction & ITER_KVEC)); |
---|
1083 | | - i->type = direction; |
---|
| 1181 | + WARN_ON(direction & ~(READ | WRITE)); |
---|
| 1182 | + i->type = ITER_KVEC | (direction & (READ | WRITE)); |
---|
1084 | 1183 | i->kvec = kvec; |
---|
1085 | 1184 | i->nr_segs = nr_segs; |
---|
1086 | 1185 | i->iov_offset = 0; |
---|
.. | .. |
---|
1088 | 1187 | } |
---|
1089 | 1188 | EXPORT_SYMBOL(iov_iter_kvec); |
---|
1090 | 1189 | |
---|
1091 | | -void iov_iter_bvec(struct iov_iter *i, int direction, |
---|
| 1190 | +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, |
---|
1092 | 1191 | const struct bio_vec *bvec, unsigned long nr_segs, |
---|
1093 | 1192 | size_t count) |
---|
1094 | 1193 | { |
---|
1095 | | - BUG_ON(!(direction & ITER_BVEC)); |
---|
1096 | | - i->type = direction; |
---|
| 1194 | + WARN_ON(direction & ~(READ | WRITE)); |
---|
| 1195 | + i->type = ITER_BVEC | (direction & (READ | WRITE)); |
---|
1097 | 1196 | i->bvec = bvec; |
---|
1098 | 1197 | i->nr_segs = nr_segs; |
---|
1099 | 1198 | i->iov_offset = 0; |
---|
.. | .. |
---|
1101 | 1200 | } |
---|
1102 | 1201 | EXPORT_SYMBOL(iov_iter_bvec); |
---|
1103 | 1202 | |
---|
1104 | | -void iov_iter_pipe(struct iov_iter *i, int direction, |
---|
| 1203 | +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, |
---|
1105 | 1204 | struct pipe_inode_info *pipe, |
---|
1106 | 1205 | size_t count) |
---|
1107 | 1206 | { |
---|
1108 | | - BUG_ON(direction != ITER_PIPE); |
---|
1109 | | - WARN_ON(pipe->nrbufs == pipe->buffers); |
---|
1110 | | - i->type = direction; |
---|
| 1207 | + BUG_ON(direction != READ); |
---|
| 1208 | + WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); |
---|
| 1209 | + i->type = ITER_PIPE | READ; |
---|
1111 | 1210 | i->pipe = pipe; |
---|
1112 | | - i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); |
---|
| 1211 | + i->head = pipe->head; |
---|
1113 | 1212 | i->iov_offset = 0; |
---|
1114 | 1213 | i->count = count; |
---|
1115 | | - i->start_idx = i->idx; |
---|
| 1214 | + i->start_head = i->head; |
---|
1116 | 1215 | } |
---|
1117 | 1216 | EXPORT_SYMBOL(iov_iter_pipe); |
---|
| 1217 | + |
---|
| 1218 | +/** |
---|
| 1219 | + * iov_iter_discard - Initialise an I/O iterator that discards data |
---|
| 1220 | + * @i: The iterator to initialise. |
---|
| 1221 | + * @direction: The direction of the transfer. |
---|
| 1222 | + * @count: The size of the I/O buffer in bytes. |
---|
| 1223 | + * |
---|
| 1224 | + * Set up an I/O iterator that just discards everything that's written to it. |
---|
| 1225 | + * It's only available as a READ iterator. |
---|
| 1226 | + */ |
---|
| 1227 | +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) |
---|
| 1228 | +{ |
---|
| 1229 | + BUG_ON(direction != READ); |
---|
| 1230 | + i->type = ITER_DISCARD | READ; |
---|
| 1231 | + i->count = count; |
---|
| 1232 | + i->iov_offset = 0; |
---|
| 1233 | +} |
---|
| 1234 | +EXPORT_SYMBOL(iov_iter_discard); |
---|
1118 | 1235 | |
---|
1119 | 1236 | unsigned long iov_iter_alignment(const struct iov_iter *i) |
---|
1120 | 1237 | { |
---|
1121 | 1238 | unsigned long res = 0; |
---|
1122 | 1239 | size_t size = i->count; |
---|
1123 | 1240 | |
---|
1124 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
1125 | | - if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) |
---|
| 1241 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
| 1242 | + unsigned int p_mask = i->pipe->ring_size - 1; |
---|
| 1243 | + |
---|
| 1244 | + if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) |
---|
1126 | 1245 | return size | i->iov_offset; |
---|
1127 | 1246 | return size; |
---|
1128 | 1247 | } |
---|
.. | .. |
---|
1140 | 1259 | unsigned long res = 0; |
---|
1141 | 1260 | size_t size = i->count; |
---|
1142 | 1261 | |
---|
1143 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1262 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
---|
1144 | 1263 | WARN_ON(1); |
---|
1145 | 1264 | return ~0U; |
---|
1146 | 1265 | } |
---|
.. | .. |
---|
1160 | 1279 | static inline ssize_t __pipe_get_pages(struct iov_iter *i, |
---|
1161 | 1280 | size_t maxsize, |
---|
1162 | 1281 | struct page **pages, |
---|
1163 | | - int idx, |
---|
| 1282 | + int iter_head, |
---|
1164 | 1283 | size_t *start) |
---|
1165 | 1284 | { |
---|
1166 | 1285 | struct pipe_inode_info *pipe = i->pipe; |
---|
1167 | | - ssize_t n = push_pipe(i, maxsize, &idx, start); |
---|
| 1286 | + unsigned int p_mask = pipe->ring_size - 1; |
---|
| 1287 | + ssize_t n = push_pipe(i, maxsize, &iter_head, start); |
---|
1168 | 1288 | if (!n) |
---|
1169 | 1289 | return -EFAULT; |
---|
1170 | 1290 | |
---|
1171 | 1291 | maxsize = n; |
---|
1172 | 1292 | n += *start; |
---|
1173 | 1293 | while (n > 0) { |
---|
1174 | | - get_page(*pages++ = pipe->bufs[idx].page); |
---|
1175 | | - idx = next_idx(idx, pipe); |
---|
| 1294 | + get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); |
---|
| 1295 | + iter_head++; |
---|
1176 | 1296 | n -= PAGE_SIZE; |
---|
1177 | 1297 | } |
---|
1178 | 1298 | |
---|
.. | .. |
---|
1183 | 1303 | struct page **pages, size_t maxsize, unsigned maxpages, |
---|
1184 | 1304 | size_t *start) |
---|
1185 | 1305 | { |
---|
1186 | | - unsigned npages; |
---|
| 1306 | + unsigned int iter_head, npages; |
---|
1187 | 1307 | size_t capacity; |
---|
1188 | | - int idx; |
---|
1189 | 1308 | |
---|
1190 | 1309 | if (!maxsize) |
---|
1191 | 1310 | return 0; |
---|
.. | .. |
---|
1193 | 1312 | if (!sanity(i)) |
---|
1194 | 1313 | return -EFAULT; |
---|
1195 | 1314 | |
---|
1196 | | - data_start(i, &idx, start); |
---|
1197 | | - /* some of this one + all after this one */ |
---|
1198 | | - npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; |
---|
1199 | | - capacity = min(npages,maxpages) * PAGE_SIZE - *start; |
---|
| 1315 | + data_start(i, &iter_head, start); |
---|
| 1316 | + /* Amount of free space: some of this one + all after this one */ |
---|
| 1317 | + npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); |
---|
| 1318 | + capacity = min(npages, maxpages) * PAGE_SIZE - *start; |
---|
1200 | 1319 | |
---|
1201 | | - return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); |
---|
| 1320 | + return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); |
---|
1202 | 1321 | } |
---|
1203 | 1322 | |
---|
1204 | 1323 | ssize_t iov_iter_get_pages(struct iov_iter *i, |
---|
.. | .. |
---|
1208 | 1327 | if (maxsize > i->count) |
---|
1209 | 1328 | maxsize = i->count; |
---|
1210 | 1329 | |
---|
1211 | | - if (unlikely(i->type & ITER_PIPE)) |
---|
| 1330 | + if (unlikely(iov_iter_is_pipe(i))) |
---|
1212 | 1331 | return pipe_get_pages(i, pages, maxsize, maxpages, start); |
---|
| 1332 | + if (unlikely(iov_iter_is_discard(i))) |
---|
| 1333 | + return -EFAULT; |
---|
| 1334 | + |
---|
1213 | 1335 | iterate_all_kinds(i, maxsize, v, ({ |
---|
1214 | 1336 | unsigned long addr = (unsigned long)v.iov_base; |
---|
1215 | 1337 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
---|
.. | .. |
---|
1220 | 1342 | len = maxpages * PAGE_SIZE; |
---|
1221 | 1343 | addr &= ~(PAGE_SIZE - 1); |
---|
1222 | 1344 | n = DIV_ROUND_UP(len, PAGE_SIZE); |
---|
1223 | | - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); |
---|
1224 | | - if (unlikely(res < 0)) |
---|
| 1345 | + res = get_user_pages_fast(addr, n, |
---|
| 1346 | + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, |
---|
| 1347 | + pages); |
---|
| 1348 | + if (unlikely(res <= 0)) |
---|
1225 | 1349 | return res; |
---|
1226 | 1350 | return (res == n ? len : res * PAGE_SIZE) - *start; |
---|
1227 | 1351 | 0;}),({ |
---|
.. | .. |
---|
1247 | 1371 | size_t *start) |
---|
1248 | 1372 | { |
---|
1249 | 1373 | struct page **p; |
---|
| 1374 | + unsigned int iter_head, npages; |
---|
1250 | 1375 | ssize_t n; |
---|
1251 | | - int idx; |
---|
1252 | | - int npages; |
---|
1253 | 1376 | |
---|
1254 | 1377 | if (!maxsize) |
---|
1255 | 1378 | return 0; |
---|
.. | .. |
---|
1257 | 1380 | if (!sanity(i)) |
---|
1258 | 1381 | return -EFAULT; |
---|
1259 | 1382 | |
---|
1260 | | - data_start(i, &idx, start); |
---|
1261 | | - /* some of this one + all after this one */ |
---|
1262 | | - npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; |
---|
| 1383 | + data_start(i, &iter_head, start); |
---|
| 1384 | + /* Amount of free space: some of this one + all after this one */ |
---|
| 1385 | + npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); |
---|
1263 | 1386 | n = npages * PAGE_SIZE - *start; |
---|
1264 | 1387 | if (maxsize > n) |
---|
1265 | 1388 | maxsize = n; |
---|
.. | .. |
---|
1268 | 1391 | p = get_pages_array(npages); |
---|
1269 | 1392 | if (!p) |
---|
1270 | 1393 | return -ENOMEM; |
---|
1271 | | - n = __pipe_get_pages(i, maxsize, p, idx, start); |
---|
| 1394 | + n = __pipe_get_pages(i, maxsize, p, iter_head, start); |
---|
1272 | 1395 | if (n > 0) |
---|
1273 | 1396 | *pages = p; |
---|
1274 | 1397 | else |
---|
.. | .. |
---|
1285 | 1408 | if (maxsize > i->count) |
---|
1286 | 1409 | maxsize = i->count; |
---|
1287 | 1410 | |
---|
1288 | | - if (unlikely(i->type & ITER_PIPE)) |
---|
| 1411 | + if (unlikely(iov_iter_is_pipe(i))) |
---|
1289 | 1412 | return pipe_get_pages_alloc(i, pages, maxsize, start); |
---|
| 1413 | + if (unlikely(iov_iter_is_discard(i))) |
---|
| 1414 | + return -EFAULT; |
---|
| 1415 | + |
---|
1290 | 1416 | iterate_all_kinds(i, maxsize, v, ({ |
---|
1291 | 1417 | unsigned long addr = (unsigned long)v.iov_base; |
---|
1292 | 1418 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
---|
.. | .. |
---|
1298 | 1424 | p = get_pages_array(n); |
---|
1299 | 1425 | if (!p) |
---|
1300 | 1426 | return -ENOMEM; |
---|
1301 | | - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); |
---|
1302 | | - if (unlikely(res < 0)) { |
---|
| 1427 | + res = get_user_pages_fast(addr, n, |
---|
| 1428 | + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); |
---|
| 1429 | + if (unlikely(res <= 0)) { |
---|
1303 | 1430 | kvfree(p); |
---|
| 1431 | + *pages = NULL; |
---|
1304 | 1432 | return res; |
---|
1305 | 1433 | } |
---|
1306 | 1434 | *pages = p; |
---|
.. | .. |
---|
1328 | 1456 | __wsum sum, next; |
---|
1329 | 1457 | size_t off = 0; |
---|
1330 | 1458 | sum = *csum; |
---|
1331 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1459 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
---|
1332 | 1460 | WARN_ON(1); |
---|
1333 | 1461 | return 0; |
---|
1334 | 1462 | } |
---|
1335 | 1463 | iterate_and_advance(i, bytes, v, ({ |
---|
1336 | | - int err = 0; |
---|
1337 | 1464 | next = csum_and_copy_from_user(v.iov_base, |
---|
1338 | 1465 | (to += v.iov_len) - v.iov_len, |
---|
1339 | | - v.iov_len, 0, &err); |
---|
1340 | | - if (!err) { |
---|
| 1466 | + v.iov_len); |
---|
| 1467 | + if (next) { |
---|
1341 | 1468 | sum = csum_block_add(sum, next, off); |
---|
1342 | 1469 | off += v.iov_len; |
---|
1343 | 1470 | } |
---|
1344 | | - err ? v.iov_len : 0; |
---|
| 1471 | + next ? 0 : v.iov_len; |
---|
1345 | 1472 | }), ({ |
---|
1346 | 1473 | char *p = kmap_atomic(v.bv_page); |
---|
1347 | | - next = csum_partial_copy_nocheck(p + v.bv_offset, |
---|
1348 | | - (to += v.bv_len) - v.bv_len, |
---|
1349 | | - v.bv_len, 0); |
---|
| 1474 | + sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, |
---|
| 1475 | + p + v.bv_offset, v.bv_len, |
---|
| 1476 | + sum, off); |
---|
1350 | 1477 | kunmap_atomic(p); |
---|
1351 | | - sum = csum_block_add(sum, next, off); |
---|
1352 | 1478 | off += v.bv_len; |
---|
1353 | 1479 | }),({ |
---|
1354 | | - next = csum_partial_copy_nocheck(v.iov_base, |
---|
1355 | | - (to += v.iov_len) - v.iov_len, |
---|
1356 | | - v.iov_len, 0); |
---|
1357 | | - sum = csum_block_add(sum, next, off); |
---|
| 1480 | + sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, |
---|
| 1481 | + v.iov_base, v.iov_len, |
---|
| 1482 | + sum, off); |
---|
1358 | 1483 | off += v.iov_len; |
---|
1359 | 1484 | }) |
---|
1360 | 1485 | ) |
---|
.. | .. |
---|
1370 | 1495 | __wsum sum, next; |
---|
1371 | 1496 | size_t off = 0; |
---|
1372 | 1497 | sum = *csum; |
---|
1373 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1498 | + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { |
---|
1374 | 1499 | WARN_ON(1); |
---|
1375 | 1500 | return false; |
---|
1376 | 1501 | } |
---|
1377 | 1502 | if (unlikely(i->count < bytes)) |
---|
1378 | 1503 | return false; |
---|
1379 | 1504 | iterate_all_kinds(i, bytes, v, ({ |
---|
1380 | | - int err = 0; |
---|
1381 | 1505 | next = csum_and_copy_from_user(v.iov_base, |
---|
1382 | 1506 | (to += v.iov_len) - v.iov_len, |
---|
1383 | | - v.iov_len, 0, &err); |
---|
1384 | | - if (err) |
---|
| 1507 | + v.iov_len); |
---|
| 1508 | + if (!next) |
---|
1385 | 1509 | return false; |
---|
1386 | 1510 | sum = csum_block_add(sum, next, off); |
---|
1387 | 1511 | off += v.iov_len; |
---|
1388 | 1512 | 0; |
---|
1389 | 1513 | }), ({ |
---|
1390 | 1514 | char *p = kmap_atomic(v.bv_page); |
---|
1391 | | - next = csum_partial_copy_nocheck(p + v.bv_offset, |
---|
1392 | | - (to += v.bv_len) - v.bv_len, |
---|
1393 | | - v.bv_len, 0); |
---|
| 1515 | + sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, |
---|
| 1516 | + p + v.bv_offset, v.bv_len, |
---|
| 1517 | + sum, off); |
---|
1394 | 1518 | kunmap_atomic(p); |
---|
1395 | | - sum = csum_block_add(sum, next, off); |
---|
1396 | 1519 | off += v.bv_len; |
---|
1397 | 1520 | }),({ |
---|
1398 | | - next = csum_partial_copy_nocheck(v.iov_base, |
---|
1399 | | - (to += v.iov_len) - v.iov_len, |
---|
1400 | | - v.iov_len, 0); |
---|
1401 | | - sum = csum_block_add(sum, next, off); |
---|
| 1521 | + sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, |
---|
| 1522 | + v.iov_base, v.iov_len, |
---|
| 1523 | + sum, off); |
---|
1402 | 1524 | off += v.iov_len; |
---|
1403 | 1525 | }) |
---|
1404 | 1526 | ) |
---|
.. | .. |
---|
1408 | 1530 | } |
---|
1409 | 1531 | EXPORT_SYMBOL(csum_and_copy_from_iter_full); |
---|
1410 | 1532 | |
---|
1411 | | -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, |
---|
| 1533 | +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, |
---|
1412 | 1534 | struct iov_iter *i) |
---|
1413 | 1535 | { |
---|
| 1536 | + struct csum_state *csstate = _csstate; |
---|
1414 | 1537 | const char *from = addr; |
---|
1415 | 1538 | __wsum sum, next; |
---|
1416 | | - size_t off = 0; |
---|
1417 | | - sum = *csum; |
---|
1418 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1539 | + size_t off; |
---|
| 1540 | + |
---|
| 1541 | + if (unlikely(iov_iter_is_pipe(i))) |
---|
| 1542 | + return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); |
---|
| 1543 | + |
---|
| 1544 | + sum = csstate->csum; |
---|
| 1545 | + off = csstate->off; |
---|
| 1546 | + if (unlikely(iov_iter_is_discard(i))) { |
---|
1419 | 1547 | WARN_ON(1); /* for now */ |
---|
1420 | 1548 | return 0; |
---|
1421 | 1549 | } |
---|
1422 | 1550 | iterate_and_advance(i, bytes, v, ({ |
---|
1423 | | - int err = 0; |
---|
1424 | 1551 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, |
---|
1425 | 1552 | v.iov_base, |
---|
1426 | | - v.iov_len, 0, &err); |
---|
1427 | | - if (!err) { |
---|
| 1553 | + v.iov_len); |
---|
| 1554 | + if (next) { |
---|
1428 | 1555 | sum = csum_block_add(sum, next, off); |
---|
1429 | 1556 | off += v.iov_len; |
---|
1430 | 1557 | } |
---|
1431 | | - err ? v.iov_len : 0; |
---|
| 1558 | + next ? 0 : v.iov_len; |
---|
1432 | 1559 | }), ({ |
---|
1433 | 1560 | char *p = kmap_atomic(v.bv_page); |
---|
1434 | | - next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, |
---|
1435 | | - p + v.bv_offset, |
---|
1436 | | - v.bv_len, 0); |
---|
| 1561 | + sum = csum_and_memcpy(p + v.bv_offset, |
---|
| 1562 | + (from += v.bv_len) - v.bv_len, |
---|
| 1563 | + v.bv_len, sum, off); |
---|
1437 | 1564 | kunmap_atomic(p); |
---|
1438 | | - sum = csum_block_add(sum, next, off); |
---|
1439 | 1565 | off += v.bv_len; |
---|
1440 | 1566 | }),({ |
---|
1441 | | - next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, |
---|
1442 | | - v.iov_base, |
---|
1443 | | - v.iov_len, 0); |
---|
1444 | | - sum = csum_block_add(sum, next, off); |
---|
| 1567 | + sum = csum_and_memcpy(v.iov_base, |
---|
| 1568 | + (from += v.iov_len) - v.iov_len, |
---|
| 1569 | + v.iov_len, sum, off); |
---|
1445 | 1570 | off += v.iov_len; |
---|
1446 | 1571 | }) |
---|
1447 | 1572 | ) |
---|
1448 | | - *csum = sum; |
---|
| 1573 | + csstate->csum = sum; |
---|
| 1574 | + csstate->off = off; |
---|
1449 | 1575 | return bytes; |
---|
1450 | 1576 | } |
---|
1451 | 1577 | EXPORT_SYMBOL(csum_and_copy_to_iter); |
---|
| 1578 | + |
---|
| 1579 | +size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, |
---|
| 1580 | + struct iov_iter *i) |
---|
| 1581 | +{ |
---|
| 1582 | +#ifdef CONFIG_CRYPTO_HASH |
---|
| 1583 | + struct ahash_request *hash = hashp; |
---|
| 1584 | + struct scatterlist sg; |
---|
| 1585 | + size_t copied; |
---|
| 1586 | + |
---|
| 1587 | + copied = copy_to_iter(addr, bytes, i); |
---|
| 1588 | + sg_init_one(&sg, addr, copied); |
---|
| 1589 | + ahash_request_set_crypt(hash, &sg, NULL, copied); |
---|
| 1590 | + crypto_ahash_update(hash); |
---|
| 1591 | + return copied; |
---|
| 1592 | +#else |
---|
| 1593 | + return 0; |
---|
| 1594 | +#endif |
---|
| 1595 | +} |
---|
| 1596 | +EXPORT_SYMBOL(hash_and_copy_to_iter); |
---|
1452 | 1597 | |
---|
1453 | 1598 | int iov_iter_npages(const struct iov_iter *i, int maxpages) |
---|
1454 | 1599 | { |
---|
.. | .. |
---|
1457 | 1602 | |
---|
1458 | 1603 | if (!size) |
---|
1459 | 1604 | return 0; |
---|
| 1605 | + if (unlikely(iov_iter_is_discard(i))) |
---|
| 1606 | + return 0; |
---|
1460 | 1607 | |
---|
1461 | | - if (unlikely(i->type & ITER_PIPE)) { |
---|
| 1608 | + if (unlikely(iov_iter_is_pipe(i))) { |
---|
1462 | 1609 | struct pipe_inode_info *pipe = i->pipe; |
---|
| 1610 | + unsigned int iter_head; |
---|
1463 | 1611 | size_t off; |
---|
1464 | | - int idx; |
---|
1465 | 1612 | |
---|
1466 | 1613 | if (!sanity(i)) |
---|
1467 | 1614 | return 0; |
---|
1468 | 1615 | |
---|
1469 | | - data_start(i, &idx, &off); |
---|
| 1616 | + data_start(i, &iter_head, &off); |
---|
1470 | 1617 | /* some of this one + all after this one */ |
---|
1471 | | - npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; |
---|
| 1618 | + npages = pipe_space_for_user(iter_head, pipe->tail, pipe); |
---|
1472 | 1619 | if (npages >= maxpages) |
---|
1473 | 1620 | return maxpages; |
---|
1474 | 1621 | } else iterate_all_kinds(i, size, v, ({ |
---|
.. | .. |
---|
1496 | 1643 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) |
---|
1497 | 1644 | { |
---|
1498 | 1645 | *new = *old; |
---|
1499 | | - if (unlikely(new->type & ITER_PIPE)) { |
---|
| 1646 | + if (unlikely(iov_iter_is_pipe(new))) { |
---|
1500 | 1647 | WARN_ON(1); |
---|
1501 | 1648 | return NULL; |
---|
1502 | 1649 | } |
---|
1503 | | - if (new->type & ITER_BVEC) |
---|
| 1650 | + if (unlikely(iov_iter_is_discard(new))) |
---|
| 1651 | + return NULL; |
---|
| 1652 | + if (iov_iter_is_bvec(new)) |
---|
1504 | 1653 | return new->bvec = kmemdup(new->bvec, |
---|
1505 | 1654 | new->nr_segs * sizeof(struct bio_vec), |
---|
1506 | 1655 | flags); |
---|
.. | .. |
---|
1512 | 1661 | } |
---|
1513 | 1662 | EXPORT_SYMBOL(dup_iter); |
---|
1514 | 1663 | |
---|
| 1664 | +static int copy_compat_iovec_from_user(struct iovec *iov, |
---|
| 1665 | + const struct iovec __user *uvec, unsigned long nr_segs) |
---|
| 1666 | +{ |
---|
| 1667 | + const struct compat_iovec __user *uiov = |
---|
| 1668 | + (const struct compat_iovec __user *)uvec; |
---|
| 1669 | + int ret = -EFAULT, i; |
---|
| 1670 | + |
---|
| 1671 | + if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) |
---|
| 1672 | + return -EFAULT; |
---|
| 1673 | + |
---|
| 1674 | + for (i = 0; i < nr_segs; i++) { |
---|
| 1675 | + compat_uptr_t buf; |
---|
| 1676 | + compat_ssize_t len; |
---|
| 1677 | + |
---|
| 1678 | + unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); |
---|
| 1679 | + unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); |
---|
| 1680 | + |
---|
| 1681 | + /* check for compat_size_t not fitting in compat_ssize_t .. */ |
---|
| 1682 | + if (len < 0) { |
---|
| 1683 | + ret = -EINVAL; |
---|
| 1684 | + goto uaccess_end; |
---|
| 1685 | + } |
---|
| 1686 | + iov[i].iov_base = compat_ptr(buf); |
---|
| 1687 | + iov[i].iov_len = len; |
---|
| 1688 | + } |
---|
| 1689 | + |
---|
| 1690 | + ret = 0; |
---|
| 1691 | +uaccess_end: |
---|
| 1692 | + user_access_end(); |
---|
| 1693 | + return ret; |
---|
| 1694 | +} |
---|
| 1695 | + |
---|
| 1696 | +static int copy_iovec_from_user(struct iovec *iov, |
---|
| 1697 | + const struct iovec __user *uvec, unsigned long nr_segs) |
---|
| 1698 | +{ |
---|
| 1699 | + unsigned long seg; |
---|
| 1700 | + |
---|
| 1701 | + if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) |
---|
| 1702 | + return -EFAULT; |
---|
| 1703 | + for (seg = 0; seg < nr_segs; seg++) { |
---|
| 1704 | + if ((ssize_t)iov[seg].iov_len < 0) |
---|
| 1705 | + return -EINVAL; |
---|
| 1706 | + } |
---|
| 1707 | + |
---|
| 1708 | + return 0; |
---|
| 1709 | +} |
---|
| 1710 | + |
---|
| 1711 | +struct iovec *iovec_from_user(const struct iovec __user *uvec, |
---|
| 1712 | + unsigned long nr_segs, unsigned long fast_segs, |
---|
| 1713 | + struct iovec *fast_iov, bool compat) |
---|
| 1714 | +{ |
---|
| 1715 | + struct iovec *iov = fast_iov; |
---|
| 1716 | + int ret; |
---|
| 1717 | + |
---|
| 1718 | + /* |
---|
| 1719 | + * SuS says "The readv() function *may* fail if the iovcnt argument was |
---|
| 1720 | + * less than or equal to 0, or greater than {IOV_MAX}. Linux has |
---|
| 1721 | + * traditionally returned zero for zero segments, so... |
---|
| 1722 | + */ |
---|
| 1723 | + if (nr_segs == 0) |
---|
| 1724 | + return iov; |
---|
| 1725 | + if (nr_segs > UIO_MAXIOV) |
---|
| 1726 | + return ERR_PTR(-EINVAL); |
---|
| 1727 | + if (nr_segs > fast_segs) { |
---|
| 1728 | + iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); |
---|
| 1729 | + if (!iov) |
---|
| 1730 | + return ERR_PTR(-ENOMEM); |
---|
| 1731 | + } |
---|
| 1732 | + |
---|
| 1733 | + if (compat) |
---|
| 1734 | + ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); |
---|
| 1735 | + else |
---|
| 1736 | + ret = copy_iovec_from_user(iov, uvec, nr_segs); |
---|
| 1737 | + if (ret) { |
---|
| 1738 | + if (iov != fast_iov) |
---|
| 1739 | + kfree(iov); |
---|
| 1740 | + return ERR_PTR(ret); |
---|
| 1741 | + } |
---|
| 1742 | + |
---|
| 1743 | + return iov; |
---|
| 1744 | +} |
---|
| 1745 | + |
---|
| 1746 | +ssize_t __import_iovec(int type, const struct iovec __user *uvec, |
---|
| 1747 | + unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, |
---|
| 1748 | + struct iov_iter *i, bool compat) |
---|
| 1749 | +{ |
---|
| 1750 | + ssize_t total_len = 0; |
---|
| 1751 | + unsigned long seg; |
---|
| 1752 | + struct iovec *iov; |
---|
| 1753 | + |
---|
| 1754 | + iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); |
---|
| 1755 | + if (IS_ERR(iov)) { |
---|
| 1756 | + *iovp = NULL; |
---|
| 1757 | + return PTR_ERR(iov); |
---|
| 1758 | + } |
---|
| 1759 | + |
---|
| 1760 | + /* |
---|
| 1761 | + * According to the Single Unix Specification we should return EINVAL if |
---|
| 1762 | + * an element length is < 0 when cast to ssize_t or if the total length |
---|
| 1763 | + * would overflow the ssize_t return value of the system call. |
---|
| 1764 | + * |
---|
| 1765 | + * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the |
---|
| 1766 | + * overflow case. |
---|
| 1767 | + */ |
---|
| 1768 | + for (seg = 0; seg < nr_segs; seg++) { |
---|
| 1769 | + ssize_t len = (ssize_t)iov[seg].iov_len; |
---|
| 1770 | + |
---|
| 1771 | + if (!access_ok(iov[seg].iov_base, len)) { |
---|
| 1772 | + if (iov != *iovp) |
---|
| 1773 | + kfree(iov); |
---|
| 1774 | + *iovp = NULL; |
---|
| 1775 | + return -EFAULT; |
---|
| 1776 | + } |
---|
| 1777 | + |
---|
| 1778 | + if (len > MAX_RW_COUNT - total_len) { |
---|
| 1779 | + len = MAX_RW_COUNT - total_len; |
---|
| 1780 | + iov[seg].iov_len = len; |
---|
| 1781 | + } |
---|
| 1782 | + total_len += len; |
---|
| 1783 | + } |
---|
| 1784 | + |
---|
| 1785 | + iov_iter_init(i, type, iov, nr_segs, total_len); |
---|
| 1786 | + if (iov == *iovp) |
---|
| 1787 | + *iovp = NULL; |
---|
| 1788 | + else |
---|
| 1789 | + *iovp = iov; |
---|
| 1790 | + return total_len; |
---|
| 1791 | +} |
---|
| 1792 | + |
---|
1515 | 1793 | /** |
---|
1516 | 1794 | * import_iovec() - Copy an array of &struct iovec from userspace |
---|
1517 | 1795 | * into the kernel, check that it is valid, and initialize a new |
---|
1518 | 1796 | * &struct iov_iter iterator to access it. |
---|
1519 | 1797 | * |
---|
1520 | 1798 | * @type: One of %READ or %WRITE. |
---|
1521 | | - * @uvector: Pointer to the userspace array. |
---|
| 1799 | + * @uvec: Pointer to the userspace array. |
---|
1522 | 1800 | * @nr_segs: Number of elements in userspace array. |
---|
1523 | 1801 | * @fast_segs: Number of elements in @iov. |
---|
1524 | | - * @iov: (input and output parameter) Pointer to pointer to (usually small |
---|
| 1802 | + * @iovp: (input and output parameter) Pointer to pointer to (usually small |
---|
1525 | 1803 | * on-stack) kernel array. |
---|
1526 | 1804 | * @i: Pointer to iterator that will be initialized on success. |
---|
1527 | 1805 | * |
---|
.. | .. |
---|
1532 | 1810 | * on-stack array was used or not (and regardless of whether this function |
---|
1533 | 1811 | * returns an error or not). |
---|
1534 | 1812 | * |
---|
1535 | | - * Return: 0 on success or negative error code on error. |
---|
| 1813 | + * Return: Negative error code on error, bytes imported on success |
---|
1536 | 1814 | */ |
---|
1537 | | -int import_iovec(int type, const struct iovec __user * uvector, |
---|
| 1815 | +ssize_t import_iovec(int type, const struct iovec __user *uvec, |
---|
1538 | 1816 | unsigned nr_segs, unsigned fast_segs, |
---|
1539 | | - struct iovec **iov, struct iov_iter *i) |
---|
| 1817 | + struct iovec **iovp, struct iov_iter *i) |
---|
1540 | 1818 | { |
---|
1541 | | - ssize_t n; |
---|
1542 | | - struct iovec *p; |
---|
1543 | | - n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, |
---|
1544 | | - *iov, &p); |
---|
1545 | | - if (n < 0) { |
---|
1546 | | - if (p != *iov) |
---|
1547 | | - kfree(p); |
---|
1548 | | - *iov = NULL; |
---|
1549 | | - return n; |
---|
1550 | | - } |
---|
1551 | | - iov_iter_init(i, type, p, nr_segs, n); |
---|
1552 | | - *iov = p == *iov ? NULL : p; |
---|
1553 | | - return 0; |
---|
| 1819 | + return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, |
---|
| 1820 | + in_compat_syscall()); |
---|
1554 | 1821 | } |
---|
1555 | 1822 | EXPORT_SYMBOL(import_iovec); |
---|
1556 | | - |
---|
1557 | | -#ifdef CONFIG_COMPAT |
---|
1558 | | -#include <linux/compat.h> |
---|
1559 | | - |
---|
1560 | | -int compat_import_iovec(int type, const struct compat_iovec __user * uvector, |
---|
1561 | | - unsigned nr_segs, unsigned fast_segs, |
---|
1562 | | - struct iovec **iov, struct iov_iter *i) |
---|
1563 | | -{ |
---|
1564 | | - ssize_t n; |
---|
1565 | | - struct iovec *p; |
---|
1566 | | - n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, |
---|
1567 | | - *iov, &p); |
---|
1568 | | - if (n < 0) { |
---|
1569 | | - if (p != *iov) |
---|
1570 | | - kfree(p); |
---|
1571 | | - *iov = NULL; |
---|
1572 | | - return n; |
---|
1573 | | - } |
---|
1574 | | - iov_iter_init(i, type, p, nr_segs, n); |
---|
1575 | | - *iov = p == *iov ? NULL : p; |
---|
1576 | | - return 0; |
---|
1577 | | -} |
---|
1578 | | -#endif |
---|
1579 | 1823 | |
---|
1580 | 1824 | int import_single_range(int rw, void __user *buf, size_t len, |
---|
1581 | 1825 | struct iovec *iov, struct iov_iter *i) |
---|
1582 | 1826 | { |
---|
1583 | 1827 | if (len > MAX_RW_COUNT) |
---|
1584 | 1828 | len = MAX_RW_COUNT; |
---|
1585 | | - if (unlikely(!access_ok(!rw, buf, len))) |
---|
| 1829 | + if (unlikely(!access_ok(buf, len))) |
---|
1586 | 1830 | return -EFAULT; |
---|
1587 | 1831 | |
---|
1588 | 1832 | iov->iov_base = buf; |
---|
.. | .. |
---|
1592 | 1836 | } |
---|
1593 | 1837 | EXPORT_SYMBOL(import_single_range); |
---|
1594 | 1838 | |
---|
1595 | | -int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, |
---|
1596 | | - int (*f)(struct kvec *vec, void *context), |
---|
1597 | | - void *context) |
---|
| 1839 | +/** |
---|
| 1840 | + * iov_iter_restore() - Restore a &struct iov_iter to the same state as when |
---|
| 1841 | + * iov_iter_save_state() was called. |
---|
| 1842 | + * |
---|
| 1843 | + * @i: &struct iov_iter to restore |
---|
| 1844 | + * @state: state to restore from |
---|
| 1845 | + * |
---|
| 1846 | + * Used after iov_iter_save_state() to bring restore @i, if operations may |
---|
| 1847 | + * have advanced it. |
---|
| 1848 | + * |
---|
| 1849 | + * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC |
---|
| 1850 | + */ |
---|
| 1851 | +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) |
---|
1598 | 1852 | { |
---|
1599 | | - struct kvec w; |
---|
1600 | | - int err = -EINVAL; |
---|
1601 | | - if (!bytes) |
---|
1602 | | - return 0; |
---|
1603 | | - |
---|
1604 | | - iterate_all_kinds(i, bytes, v, -EINVAL, ({ |
---|
1605 | | - w.iov_base = kmap(v.bv_page) + v.bv_offset; |
---|
1606 | | - w.iov_len = v.bv_len; |
---|
1607 | | - err = f(&w, context); |
---|
1608 | | - kunmap(v.bv_page); |
---|
1609 | | - err;}), ({ |
---|
1610 | | - w = v; |
---|
1611 | | - err = f(&w, context);}) |
---|
1612 | | - ) |
---|
1613 | | - return err; |
---|
| 1853 | + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && |
---|
| 1854 | + !iov_iter_is_kvec(i)) |
---|
| 1855 | + return; |
---|
| 1856 | + i->iov_offset = state->iov_offset; |
---|
| 1857 | + i->count = state->count; |
---|
| 1858 | + /* |
---|
| 1859 | + * For the *vec iters, nr_segs + iov is constant - if we increment |
---|
| 1860 | + * the vec, then we also decrement the nr_segs count. Hence we don't |
---|
| 1861 | + * need to track both of these, just one is enough and we can deduct |
---|
| 1862 | + * the other from that. ITER_KVEC and ITER_IOVEC are the same struct |
---|
| 1863 | + * size, so we can just increment the iov pointer as they are unionzed. |
---|
| 1864 | + * ITER_BVEC _may_ be the same size on some archs, but on others it is |
---|
| 1865 | + * not. Be safe and handle it separately. |
---|
| 1866 | + */ |
---|
| 1867 | + BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); |
---|
| 1868 | + if (iov_iter_is_bvec(i)) |
---|
| 1869 | + i->bvec -= state->nr_segs - i->nr_segs; |
---|
| 1870 | + else |
---|
| 1871 | + i->iov -= state->nr_segs - i->nr_segs; |
---|
| 1872 | + i->nr_segs = state->nr_segs; |
---|
1614 | 1873 | } |
---|
1615 | | -EXPORT_SYMBOL(iov_iter_for_each_range); |
---|