| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> |
|---|
| 3 | | - * |
|---|
| 4 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 5 | | - * it under the terms of the GNU General Public License version 2 as |
|---|
| 6 | | - * published by the Free Software Foundation. |
|---|
| 7 | | - * |
|---|
| 8 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 9 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 10 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 11 | | - * GNU General Public License for more details. |
|---|
| 12 | | - * |
|---|
| 13 | | - * You should have received a copy of the GNU General Public Licens |
|---|
| 14 | | - * along with this program; if not, write to the Free Software |
|---|
| 15 | | - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- |
|---|
| 16 | | - * |
|---|
| 17 | 4 | */ |
|---|
| 18 | 5 | #include <linux/mm.h> |
|---|
| 19 | 6 | #include <linux/swap.h> |
|---|
| .. | .. |
|---|
| 29 | 16 | #include <linux/workqueue.h> |
|---|
| 30 | 17 | #include <linux/cgroup.h> |
|---|
| 31 | 18 | #include <linux/blk-cgroup.h> |
|---|
| 19 | +#include <linux/highmem.h> |
|---|
| 20 | +#include <linux/sched/sysctl.h> |
|---|
| 32 | 21 | #include <linux/blk-crypto.h> |
|---|
| 33 | 22 | |
|---|
| 34 | 23 | #include <trace/events/block.h> |
|---|
| .. | .. |
|---|
| 245 | 234 | |
|---|
| 246 | 235 | void bio_uninit(struct bio *bio) |
|---|
| 247 | 236 | { |
|---|
| 248 | | - bio_disassociate_task(bio); |
|---|
| 237 | +#ifdef CONFIG_BLK_CGROUP |
|---|
| 238 | + if (bio->bi_blkg) { |
|---|
| 239 | + blkg_put(bio->bi_blkg); |
|---|
| 240 | + bio->bi_blkg = NULL; |
|---|
| 241 | + } |
|---|
| 242 | +#endif |
|---|
| 243 | + if (bio_integrity(bio)) |
|---|
| 244 | + bio_integrity_free(bio); |
|---|
| 249 | 245 | |
|---|
| 250 | 246 | bio_crypt_free_ctx(bio); |
|---|
| 251 | 247 | } |
|---|
| .. | .. |
|---|
| 331 | 327 | /** |
|---|
| 332 | 328 | * bio_chain - chain bio completions |
|---|
| 333 | 329 | * @bio: the target bio |
|---|
| 334 | | - * @parent: the @bio's parent bio |
|---|
| 330 | + * @parent: the parent bio of @bio |
|---|
| 335 | 331 | * |
|---|
| 336 | 332 | * The caller won't have a bi_end_io called when @bio completes - instead, |
|---|
| 337 | 333 | * @parent's bi_end_io won't be called until both @parent and @bio have |
|---|
| .. | .. |
|---|
| 362 | 358 | if (!bio) |
|---|
| 363 | 359 | break; |
|---|
| 364 | 360 | |
|---|
| 365 | | - generic_make_request(bio); |
|---|
| 361 | + submit_bio_noacct(bio); |
|---|
| 366 | 362 | } |
|---|
| 367 | 363 | } |
|---|
| 368 | 364 | |
|---|
| .. | .. |
|---|
| 420 | 416 | * submit the previously allocated bio for IO before attempting to allocate |
|---|
| 421 | 417 | * a new one. Failure to do so can cause deadlocks under memory pressure. |
|---|
| 422 | 418 | * |
|---|
| 423 | | - * Note that when running under generic_make_request() (i.e. any block |
|---|
| 419 | + * Note that when running under submit_bio_noacct() (i.e. any block |
|---|
| 424 | 420 | * driver), bios are not submitted until after you return - see the code in |
|---|
| 425 | | - * generic_make_request() that converts recursion into iteration, to prevent |
|---|
| 421 | + * submit_bio_noacct() that converts recursion into iteration, to prevent |
|---|
| 426 | 422 | * stack overflows. |
|---|
| 427 | 423 | * |
|---|
| 428 | 424 | * This would normally mean allocating multiple bios under |
|---|
| 429 | | - * generic_make_request() would be susceptible to deadlocks, but we have |
|---|
| 425 | + * submit_bio_noacct() would be susceptible to deadlocks, but we have |
|---|
| 430 | 426 | * deadlock avoidance code that resubmits any blocked bios from a rescuer |
|---|
| 431 | 427 | * thread. |
|---|
| 432 | 428 | * |
|---|
| 433 | 429 | * However, we do not guarantee forward progress for allocations from other |
|---|
| 434 | 430 | * mempools. Doing multiple allocations from the same mempool under |
|---|
| 435 | | - * generic_make_request() should be avoided - instead, use bio_set's front_pad |
|---|
| 431 | + * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad |
|---|
| 436 | 432 | * for per bio allocations. |
|---|
| 437 | 433 | * |
|---|
| 438 | 434 | * RETURNS: |
|---|
| .. | .. |
|---|
| 452 | 448 | if (nr_iovecs > UIO_MAXIOV) |
|---|
| 453 | 449 | return NULL; |
|---|
| 454 | 450 | |
|---|
| 455 | | - p = kmalloc(sizeof(struct bio) + |
|---|
| 456 | | - nr_iovecs * sizeof(struct bio_vec), |
|---|
| 457 | | - gfp_mask); |
|---|
| 451 | + p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask); |
|---|
| 458 | 452 | front_pad = 0; |
|---|
| 459 | 453 | inline_vecs = nr_iovecs; |
|---|
| 460 | 454 | } else { |
|---|
| .. | .. |
|---|
| 463 | 457 | nr_iovecs > 0)) |
|---|
| 464 | 458 | return NULL; |
|---|
| 465 | 459 | /* |
|---|
| 466 | | - * generic_make_request() converts recursion to iteration; this |
|---|
| 460 | + * submit_bio_noacct() converts recursion to iteration; this |
|---|
| 467 | 461 | * means if we're running beneath it, any bios we allocate and |
|---|
| 468 | 462 | * submit will not be submitted (and thus freed) until after we |
|---|
| 469 | 463 | * return. |
|---|
| 470 | 464 | * |
|---|
| 471 | 465 | * This exposes us to a potential deadlock if we allocate |
|---|
| 472 | 466 | * multiple bios from the same bio_set() while running |
|---|
| 473 | | - * underneath generic_make_request(). If we were to allocate |
|---|
| 467 | + * underneath submit_bio_noacct(). If we were to allocate |
|---|
| 474 | 468 | * multiple bios (say a stacking block driver that was splitting |
|---|
| 475 | 469 | * bios), we would deadlock if we exhausted the mempool's |
|---|
| 476 | 470 | * reserve. |
|---|
| .. | .. |
|---|
| 551 | 545 | EXPORT_SYMBOL(zero_fill_bio_iter); |
|---|
| 552 | 546 | |
|---|
| 553 | 547 | /** |
|---|
| 548 | + * bio_truncate - truncate the bio to small size of @new_size |
|---|
| 549 | + * @bio: the bio to be truncated |
|---|
| 550 | + * @new_size: new size for truncating the bio |
|---|
| 551 | + * |
|---|
| 552 | + * Description: |
|---|
| 553 | + * Truncate the bio to new size of @new_size. If bio_op(bio) is |
|---|
| 554 | + * REQ_OP_READ, zero the truncated part. This function should only |
|---|
| 555 | + * be used for handling corner cases, such as bio eod. |
|---|
| 556 | + */ |
|---|
| 557 | +void bio_truncate(struct bio *bio, unsigned new_size) |
|---|
| 558 | +{ |
|---|
| 559 | + struct bio_vec bv; |
|---|
| 560 | + struct bvec_iter iter; |
|---|
| 561 | + unsigned int done = 0; |
|---|
| 562 | + bool truncated = false; |
|---|
| 563 | + |
|---|
| 564 | + if (new_size >= bio->bi_iter.bi_size) |
|---|
| 565 | + return; |
|---|
| 566 | + |
|---|
| 567 | + if (bio_op(bio) != REQ_OP_READ) |
|---|
| 568 | + goto exit; |
|---|
| 569 | + |
|---|
| 570 | + bio_for_each_segment(bv, bio, iter) { |
|---|
| 571 | + if (done + bv.bv_len > new_size) { |
|---|
| 572 | + unsigned offset; |
|---|
| 573 | + |
|---|
| 574 | + if (!truncated) |
|---|
| 575 | + offset = new_size - done; |
|---|
| 576 | + else |
|---|
| 577 | + offset = 0; |
|---|
| 578 | + zero_user(bv.bv_page, bv.bv_offset + offset, |
|---|
| 579 | + bv.bv_len - offset); |
|---|
| 580 | + truncated = true; |
|---|
| 581 | + } |
|---|
| 582 | + done += bv.bv_len; |
|---|
| 583 | + } |
|---|
| 584 | + |
|---|
| 585 | + exit: |
|---|
| 586 | + /* |
|---|
| 587 | + * Don't touch bvec table here and make it really immutable, since |
|---|
| 588 | + * fs bio user has to retrieve all pages via bio_for_each_segment_all |
|---|
| 589 | + * in its .end_bio() callback. |
|---|
| 590 | + * |
|---|
| 591 | + * It is enough to truncate bio by updating .bi_size since we can make |
|---|
| 592 | + * correct bvec with the updated .bi_size for drivers. |
|---|
| 593 | + */ |
|---|
| 594 | + bio->bi_iter.bi_size = new_size; |
|---|
| 595 | +} |
|---|
| 596 | + |
|---|
| 597 | +/** |
|---|
| 598 | + * guard_bio_eod - truncate a BIO to fit the block device |
|---|
| 599 | + * @bio: bio to truncate |
|---|
| 600 | + * |
|---|
| 601 | + * This allows us to do IO even on the odd last sectors of a device, even if the |
|---|
| 602 | + * block size is some multiple of the physical sector size. |
|---|
| 603 | + * |
|---|
| 604 | + * We'll just truncate the bio to the size of the device, and clear the end of |
|---|
| 605 | + * the buffer head manually. Truly out-of-range accesses will turn into actual |
|---|
| 606 | + * I/O errors, this only handles the "we need to be able to do I/O at the final |
|---|
| 607 | + * sector" case. |
|---|
| 608 | + */ |
|---|
| 609 | +void guard_bio_eod(struct bio *bio) |
|---|
| 610 | +{ |
|---|
| 611 | + sector_t maxsector; |
|---|
| 612 | + struct hd_struct *part; |
|---|
| 613 | + |
|---|
| 614 | + rcu_read_lock(); |
|---|
| 615 | + part = __disk_get_part(bio->bi_disk, bio->bi_partno); |
|---|
| 616 | + if (part) |
|---|
| 617 | + maxsector = part_nr_sects_read(part); |
|---|
| 618 | + else |
|---|
| 619 | + maxsector = get_capacity(bio->bi_disk); |
|---|
| 620 | + rcu_read_unlock(); |
|---|
| 621 | + |
|---|
| 622 | + if (!maxsector) |
|---|
| 623 | + return; |
|---|
| 624 | + |
|---|
| 625 | + /* |
|---|
| 626 | + * If the *whole* IO is past the end of the device, |
|---|
| 627 | + * let it through, and the IO layer will turn it into |
|---|
| 628 | + * an EIO. |
|---|
| 629 | + */ |
|---|
| 630 | + if (unlikely(bio->bi_iter.bi_sector >= maxsector)) |
|---|
| 631 | + return; |
|---|
| 632 | + |
|---|
| 633 | + maxsector -= bio->bi_iter.bi_sector; |
|---|
| 634 | + if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) |
|---|
| 635 | + return; |
|---|
| 636 | + |
|---|
| 637 | + bio_truncate(bio, maxsector << 9); |
|---|
| 638 | +} |
|---|
| 639 | + |
|---|
| 640 | +/** |
|---|
| 554 | 641 | * bio_put - release a reference to a bio |
|---|
| 555 | 642 | * @bio: bio to release reference to |
|---|
| 556 | 643 | * |
|---|
| .. | .. |
|---|
| 573 | 660 | } |
|---|
| 574 | 661 | } |
|---|
| 575 | 662 | EXPORT_SYMBOL(bio_put); |
|---|
| 576 | | - |
|---|
| 577 | | -inline int bio_phys_segments(struct request_queue *q, struct bio *bio) |
|---|
| 578 | | -{ |
|---|
| 579 | | - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
|---|
| 580 | | - blk_recount_segments(q, bio); |
|---|
| 581 | | - |
|---|
| 582 | | - return bio->bi_phys_segments; |
|---|
| 583 | | -} |
|---|
| 584 | | -EXPORT_SYMBOL(bio_phys_segments); |
|---|
| 585 | 663 | |
|---|
| 586 | 664 | /** |
|---|
| 587 | 665 | * __bio_clone_fast - clone a bio that shares the original bio's biovec |
|---|
| .. | .. |
|---|
| 613 | 691 | bio->bi_iter = bio_src->bi_iter; |
|---|
| 614 | 692 | bio->bi_io_vec = bio_src->bi_io_vec; |
|---|
| 615 | 693 | |
|---|
| 616 | | - bio_clone_blkcg_association(bio, bio_src); |
|---|
| 694 | + bio_clone_blkg_association(bio, bio_src); |
|---|
| 695 | + blkcg_bio_issue_init(bio); |
|---|
| 617 | 696 | } |
|---|
| 618 | 697 | EXPORT_SYMBOL(__bio_clone_fast); |
|---|
| 619 | 698 | |
|---|
| .. | .. |
|---|
| 635 | 714 | |
|---|
| 636 | 715 | __bio_clone_fast(b, bio); |
|---|
| 637 | 716 | |
|---|
| 638 | | - bio_crypt_clone(b, bio, gfp_mask); |
|---|
| 717 | + if (bio_crypt_clone(b, bio, gfp_mask) < 0) |
|---|
| 718 | + goto err_put; |
|---|
| 639 | 719 | |
|---|
| 640 | 720 | if (bio_integrity(bio) && |
|---|
| 641 | | - bio_integrity_clone(b, bio, gfp_mask) < 0) { |
|---|
| 642 | | - bio_put(b); |
|---|
| 643 | | - return NULL; |
|---|
| 644 | | - } |
|---|
| 721 | + bio_integrity_clone(b, bio, gfp_mask) < 0) |
|---|
| 722 | + goto err_put; |
|---|
| 645 | 723 | |
|---|
| 646 | 724 | return b; |
|---|
| 725 | + |
|---|
| 726 | +err_put: |
|---|
| 727 | + bio_put(b); |
|---|
| 728 | + return NULL; |
|---|
| 647 | 729 | } |
|---|
| 648 | 730 | EXPORT_SYMBOL(bio_clone_fast); |
|---|
| 649 | 731 | |
|---|
| 650 | | -/** |
|---|
| 651 | | - * bio_add_pc_page - attempt to add page to bio |
|---|
| 652 | | - * @q: the target queue |
|---|
| 653 | | - * @bio: destination bio |
|---|
| 654 | | - * @page: page to add |
|---|
| 655 | | - * @len: vec entry length |
|---|
| 656 | | - * @offset: vec entry offset |
|---|
| 657 | | - * |
|---|
| 658 | | - * Attempt to add a page to the bio_vec maplist. This can fail for a |
|---|
| 659 | | - * number of reasons, such as the bio being full or target block device |
|---|
| 660 | | - * limitations. The target block device must allow bio's up to PAGE_SIZE, |
|---|
| 661 | | - * so it is always possible to add a single page to an empty bio. |
|---|
| 662 | | - * |
|---|
| 663 | | - * This should only be used by REQ_PC bios. |
|---|
| 664 | | - */ |
|---|
| 665 | | -int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page |
|---|
| 666 | | - *page, unsigned int len, unsigned int offset) |
|---|
| 732 | +const char *bio_devname(struct bio *bio, char *buf) |
|---|
| 667 | 733 | { |
|---|
| 668 | | - int retried_segments = 0; |
|---|
| 734 | + return disk_name(bio->bi_disk, bio->bi_partno, buf); |
|---|
| 735 | +} |
|---|
| 736 | +EXPORT_SYMBOL(bio_devname); |
|---|
| 737 | + |
|---|
| 738 | +static inline bool page_is_mergeable(const struct bio_vec *bv, |
|---|
| 739 | + struct page *page, unsigned int len, unsigned int off, |
|---|
| 740 | + bool *same_page) |
|---|
| 741 | +{ |
|---|
| 742 | + size_t bv_end = bv->bv_offset + bv->bv_len; |
|---|
| 743 | + phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1; |
|---|
| 744 | + phys_addr_t page_addr = page_to_phys(page); |
|---|
| 745 | + |
|---|
| 746 | + if (vec_end_addr + 1 != page_addr + off) |
|---|
| 747 | + return false; |
|---|
| 748 | + if (xen_domain() && !xen_biovec_phys_mergeable(bv, page)) |
|---|
| 749 | + return false; |
|---|
| 750 | + |
|---|
| 751 | + *same_page = ((vec_end_addr & PAGE_MASK) == page_addr); |
|---|
| 752 | + if (*same_page) |
|---|
| 753 | + return true; |
|---|
| 754 | + return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE); |
|---|
| 755 | +} |
|---|
| 756 | + |
|---|
| 757 | +/* |
|---|
| 758 | + * Try to merge a page into a segment, while obeying the hardware segment |
|---|
| 759 | + * size limit. This is not for normal read/write bios, but for passthrough |
|---|
| 760 | + * or Zone Append operations that we can't split. |
|---|
| 761 | + */ |
|---|
| 762 | +static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio, |
|---|
| 763 | + struct page *page, unsigned len, |
|---|
| 764 | + unsigned offset, bool *same_page) |
|---|
| 765 | +{ |
|---|
| 766 | + struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
|---|
| 767 | + unsigned long mask = queue_segment_boundary(q); |
|---|
| 768 | + phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset; |
|---|
| 769 | + phys_addr_t addr2 = page_to_phys(page) + offset + len - 1; |
|---|
| 770 | + |
|---|
| 771 | + if ((addr1 | mask) != (addr2 | mask)) |
|---|
| 772 | + return false; |
|---|
| 773 | + if (bv->bv_len + len > queue_max_segment_size(q)) |
|---|
| 774 | + return false; |
|---|
| 775 | + return __bio_try_merge_page(bio, page, len, offset, same_page); |
|---|
| 776 | +} |
|---|
| 777 | + |
|---|
| 778 | +/** |
|---|
| 779 | + * bio_add_hw_page - attempt to add a page to a bio with hw constraints |
|---|
| 780 | + * @q: the target queue |
|---|
| 781 | + * @bio: destination bio |
|---|
| 782 | + * @page: page to add |
|---|
| 783 | + * @len: vec entry length |
|---|
| 784 | + * @offset: vec entry offset |
|---|
| 785 | + * @max_sectors: maximum number of sectors that can be added |
|---|
| 786 | + * @same_page: return if the segment has been merged inside the same page |
|---|
| 787 | + * |
|---|
| 788 | + * Add a page to a bio while respecting the hardware max_sectors, max_segment |
|---|
| 789 | + * and gap limitations. |
|---|
| 790 | + */ |
|---|
| 791 | +int bio_add_hw_page(struct request_queue *q, struct bio *bio, |
|---|
| 792 | + struct page *page, unsigned int len, unsigned int offset, |
|---|
| 793 | + unsigned int max_sectors, bool *same_page) |
|---|
| 794 | +{ |
|---|
| 669 | 795 | struct bio_vec *bvec; |
|---|
| 670 | 796 | |
|---|
| 671 | | - /* |
|---|
| 672 | | - * cloned bio must not modify vec list |
|---|
| 673 | | - */ |
|---|
| 674 | | - if (unlikely(bio_flagged(bio, BIO_CLONED))) |
|---|
| 797 | + if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) |
|---|
| 675 | 798 | return 0; |
|---|
| 676 | 799 | |
|---|
| 677 | | - if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q)) |
|---|
| 800 | + if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors) |
|---|
| 678 | 801 | return 0; |
|---|
| 679 | 802 | |
|---|
| 680 | | - /* |
|---|
| 681 | | - * For filesystems with a blocksize smaller than the pagesize |
|---|
| 682 | | - * we will often be called with the same page as last time and |
|---|
| 683 | | - * a consecutive offset. Optimize this special case. |
|---|
| 684 | | - */ |
|---|
| 685 | 803 | if (bio->bi_vcnt > 0) { |
|---|
| 686 | | - struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
|---|
| 687 | | - |
|---|
| 688 | | - if (page == prev->bv_page && |
|---|
| 689 | | - offset == prev->bv_offset + prev->bv_len) { |
|---|
| 690 | | - prev->bv_len += len; |
|---|
| 691 | | - bio->bi_iter.bi_size += len; |
|---|
| 692 | | - goto done; |
|---|
| 693 | | - } |
|---|
| 804 | + if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page)) |
|---|
| 805 | + return len; |
|---|
| 694 | 806 | |
|---|
| 695 | 807 | /* |
|---|
| 696 | | - * If the queue doesn't support SG gaps and adding this |
|---|
| 697 | | - * offset would create a gap, disallow it. |
|---|
| 808 | + * If the queue doesn't support SG gaps and adding this segment |
|---|
| 809 | + * would create a gap, disallow it. |
|---|
| 698 | 810 | */ |
|---|
| 699 | | - if (bvec_gap_to_prev(q, prev, offset)) |
|---|
| 811 | + bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
|---|
| 812 | + if (bvec_gap_to_prev(q, bvec, offset)) |
|---|
| 700 | 813 | return 0; |
|---|
| 701 | 814 | } |
|---|
| 702 | 815 | |
|---|
| 703 | | - if (bio_full(bio)) |
|---|
| 816 | + if (bio_full(bio, len)) |
|---|
| 704 | 817 | return 0; |
|---|
| 705 | 818 | |
|---|
| 706 | | - /* |
|---|
| 707 | | - * setup the new entry, we might clear it again later if we |
|---|
| 708 | | - * cannot add the page |
|---|
| 709 | | - */ |
|---|
| 819 | + if (bio->bi_vcnt >= queue_max_segments(q)) |
|---|
| 820 | + return 0; |
|---|
| 821 | + |
|---|
| 710 | 822 | bvec = &bio->bi_io_vec[bio->bi_vcnt]; |
|---|
| 711 | 823 | bvec->bv_page = page; |
|---|
| 712 | 824 | bvec->bv_len = len; |
|---|
| 713 | 825 | bvec->bv_offset = offset; |
|---|
| 714 | 826 | bio->bi_vcnt++; |
|---|
| 715 | | - bio->bi_phys_segments++; |
|---|
| 716 | 827 | bio->bi_iter.bi_size += len; |
|---|
| 717 | | - |
|---|
| 718 | | - /* |
|---|
| 719 | | - * Perform a recount if the number of segments is greater |
|---|
| 720 | | - * than queue_max_segments(q). |
|---|
| 721 | | - */ |
|---|
| 722 | | - |
|---|
| 723 | | - while (bio->bi_phys_segments > queue_max_segments(q)) { |
|---|
| 724 | | - |
|---|
| 725 | | - if (retried_segments) |
|---|
| 726 | | - goto failed; |
|---|
| 727 | | - |
|---|
| 728 | | - retried_segments = 1; |
|---|
| 729 | | - blk_recount_segments(q, bio); |
|---|
| 730 | | - } |
|---|
| 731 | | - |
|---|
| 732 | | - /* If we may be able to merge these biovecs, force a recount */ |
|---|
| 733 | | - if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
|---|
| 734 | | - bio_clear_flag(bio, BIO_SEG_VALID); |
|---|
| 735 | | - |
|---|
| 736 | | - done: |
|---|
| 737 | 828 | return len; |
|---|
| 829 | +} |
|---|
| 738 | 830 | |
|---|
| 739 | | - failed: |
|---|
| 740 | | - bvec->bv_page = NULL; |
|---|
| 741 | | - bvec->bv_len = 0; |
|---|
| 742 | | - bvec->bv_offset = 0; |
|---|
| 743 | | - bio->bi_vcnt--; |
|---|
| 744 | | - bio->bi_iter.bi_size -= len; |
|---|
| 745 | | - blk_recount_segments(q, bio); |
|---|
| 746 | | - return 0; |
|---|
| 831 | +/** |
|---|
| 832 | + * bio_add_pc_page - attempt to add page to passthrough bio |
|---|
| 833 | + * @q: the target queue |
|---|
| 834 | + * @bio: destination bio |
|---|
| 835 | + * @page: page to add |
|---|
| 836 | + * @len: vec entry length |
|---|
| 837 | + * @offset: vec entry offset |
|---|
| 838 | + * |
|---|
| 839 | + * Attempt to add a page to the bio_vec maplist. This can fail for a |
|---|
| 840 | + * number of reasons, such as the bio being full or target block device |
|---|
| 841 | + * limitations. The target block device must allow bio's up to PAGE_SIZE, |
|---|
| 842 | + * so it is always possible to add a single page to an empty bio. |
|---|
| 843 | + * |
|---|
| 844 | + * This should only be used by passthrough bios. |
|---|
| 845 | + */ |
|---|
| 846 | +int bio_add_pc_page(struct request_queue *q, struct bio *bio, |
|---|
| 847 | + struct page *page, unsigned int len, unsigned int offset) |
|---|
| 848 | +{ |
|---|
| 849 | + bool same_page = false; |
|---|
| 850 | + return bio_add_hw_page(q, bio, page, len, offset, |
|---|
| 851 | + queue_max_hw_sectors(q), &same_page); |
|---|
| 747 | 852 | } |
|---|
| 748 | 853 | EXPORT_SYMBOL(bio_add_pc_page); |
|---|
| 749 | 854 | |
|---|
| 750 | 855 | /** |
|---|
| 751 | 856 | * __bio_try_merge_page - try appending data to an existing bvec. |
|---|
| 752 | 857 | * @bio: destination bio |
|---|
| 753 | | - * @page: page to add |
|---|
| 858 | + * @page: start page to add |
|---|
| 754 | 859 | * @len: length of the data to add |
|---|
| 755 | | - * @off: offset of the data in @page |
|---|
| 860 | + * @off: offset of the data relative to @page |
|---|
| 861 | + * @same_page: return if the segment has been merged inside the same page |
|---|
| 756 | 862 | * |
|---|
| 757 | 863 | * Try to add the data at @page + @off to the last bvec of @bio. This is a |
|---|
| 758 | | - * a useful optimisation for file systems with a block size smaller than the |
|---|
| 864 | + * useful optimisation for file systems with a block size smaller than the |
|---|
| 759 | 865 | * page size. |
|---|
| 866 | + * |
|---|
| 867 | + * Warn if (@len, @off) crosses pages in case that @same_page is true. |
|---|
| 760 | 868 | * |
|---|
| 761 | 869 | * Return %true on success or %false on failure. |
|---|
| 762 | 870 | */ |
|---|
| 763 | 871 | bool __bio_try_merge_page(struct bio *bio, struct page *page, |
|---|
| 764 | | - unsigned int len, unsigned int off) |
|---|
| 872 | + unsigned int len, unsigned int off, bool *same_page) |
|---|
| 765 | 873 | { |
|---|
| 766 | 874 | if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) |
|---|
| 767 | 875 | return false; |
|---|
| .. | .. |
|---|
| 769 | 877 | if (bio->bi_vcnt > 0) { |
|---|
| 770 | 878 | struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
|---|
| 771 | 879 | |
|---|
| 772 | | - if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) { |
|---|
| 880 | + if (page_is_mergeable(bv, page, len, off, same_page)) { |
|---|
| 881 | + if (bio->bi_iter.bi_size > UINT_MAX - len) { |
|---|
| 882 | + *same_page = false; |
|---|
| 883 | + return false; |
|---|
| 884 | + } |
|---|
| 773 | 885 | bv->bv_len += len; |
|---|
| 774 | 886 | bio->bi_iter.bi_size += len; |
|---|
| 775 | 887 | return true; |
|---|
| .. | .. |
|---|
| 780 | 892 | EXPORT_SYMBOL_GPL(__bio_try_merge_page); |
|---|
| 781 | 893 | |
|---|
| 782 | 894 | /** |
|---|
| 783 | | - * __bio_add_page - add page to a bio in a new segment |
|---|
| 895 | + * __bio_add_page - add page(s) to a bio in a new segment |
|---|
| 784 | 896 | * @bio: destination bio |
|---|
| 785 | | - * @page: page to add |
|---|
| 786 | | - * @len: length of the data to add |
|---|
| 787 | | - * @off: offset of the data in @page |
|---|
| 897 | + * @page: start page to add |
|---|
| 898 | + * @len: length of the data to add, may cross pages |
|---|
| 899 | + * @off: offset of the data relative to @page, may cross pages |
|---|
| 788 | 900 | * |
|---|
| 789 | 901 | * Add the data at @page + @off to @bio as a new bvec. The caller must ensure |
|---|
| 790 | 902 | * that @bio has space for another bvec. |
|---|
| .. | .. |
|---|
| 795 | 907 | struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt]; |
|---|
| 796 | 908 | |
|---|
| 797 | 909 | WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); |
|---|
| 798 | | - WARN_ON_ONCE(bio_full(bio)); |
|---|
| 910 | + WARN_ON_ONCE(bio_full(bio, len)); |
|---|
| 799 | 911 | |
|---|
| 800 | 912 | bv->bv_page = page; |
|---|
| 801 | 913 | bv->bv_offset = off; |
|---|
| .. | .. |
|---|
| 810 | 922 | EXPORT_SYMBOL_GPL(__bio_add_page); |
|---|
| 811 | 923 | |
|---|
| 812 | 924 | /** |
|---|
| 813 | | - * bio_add_page - attempt to add page to bio |
|---|
| 925 | + * bio_add_page - attempt to add page(s) to bio |
|---|
| 814 | 926 | * @bio: destination bio |
|---|
| 815 | | - * @page: page to add |
|---|
| 816 | | - * @len: vec entry length |
|---|
| 817 | | - * @offset: vec entry offset |
|---|
| 927 | + * @page: start page to add |
|---|
| 928 | + * @len: vec entry length, may cross pages |
|---|
| 929 | + * @offset: vec entry offset relative to @page, may cross pages |
|---|
| 818 | 930 | * |
|---|
| 819 | | - * Attempt to add a page to the bio_vec maplist. This will only fail |
|---|
| 931 | + * Attempt to add page(s) to the bio_vec maplist. This will only fail |
|---|
| 820 | 932 | * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio. |
|---|
| 821 | 933 | */ |
|---|
| 822 | 934 | int bio_add_page(struct bio *bio, struct page *page, |
|---|
| 823 | 935 | unsigned int len, unsigned int offset) |
|---|
| 824 | 936 | { |
|---|
| 825 | | - if (!__bio_try_merge_page(bio, page, len, offset)) { |
|---|
| 826 | | - if (bio_full(bio)) |
|---|
| 937 | + bool same_page = false; |
|---|
| 938 | + |
|---|
| 939 | + if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { |
|---|
| 940 | + if (bio_full(bio, len)) |
|---|
| 827 | 941 | return 0; |
|---|
| 828 | 942 | __bio_add_page(bio, page, len, offset); |
|---|
| 829 | 943 | } |
|---|
| 830 | 944 | return len; |
|---|
| 831 | 945 | } |
|---|
| 832 | 946 | EXPORT_SYMBOL(bio_add_page); |
|---|
| 947 | + |
|---|
| 948 | +void bio_release_pages(struct bio *bio, bool mark_dirty) |
|---|
| 949 | +{ |
|---|
| 950 | + struct bvec_iter_all iter_all; |
|---|
| 951 | + struct bio_vec *bvec; |
|---|
| 952 | + |
|---|
| 953 | + if (bio_flagged(bio, BIO_NO_PAGE_REF)) |
|---|
| 954 | + return; |
|---|
| 955 | + |
|---|
| 956 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
|---|
| 957 | + if (mark_dirty && !PageCompound(bvec->bv_page)) |
|---|
| 958 | + set_page_dirty_lock(bvec->bv_page); |
|---|
| 959 | + put_page(bvec->bv_page); |
|---|
| 960 | + } |
|---|
| 961 | +} |
|---|
| 962 | +EXPORT_SYMBOL_GPL(bio_release_pages); |
|---|
| 963 | + |
|---|
| 964 | +static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) |
|---|
| 965 | +{ |
|---|
| 966 | + const struct bio_vec *bv = iter->bvec; |
|---|
| 967 | + unsigned int len; |
|---|
| 968 | + size_t size; |
|---|
| 969 | + |
|---|
| 970 | + if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len)) |
|---|
| 971 | + return -EINVAL; |
|---|
| 972 | + |
|---|
| 973 | + len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count); |
|---|
| 974 | + size = bio_add_page(bio, bv->bv_page, len, |
|---|
| 975 | + bv->bv_offset + iter->iov_offset); |
|---|
| 976 | + if (unlikely(size != len)) |
|---|
| 977 | + return -EINVAL; |
|---|
| 978 | + iov_iter_advance(iter, size); |
|---|
| 979 | + return 0; |
|---|
| 980 | +} |
|---|
| 981 | + |
|---|
| 982 | +static void bio_put_pages(struct page **pages, size_t size, size_t off) |
|---|
| 983 | +{ |
|---|
| 984 | + size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE); |
|---|
| 985 | + |
|---|
| 986 | + for (i = 0; i < nr; i++) |
|---|
| 987 | + put_page(pages[i]); |
|---|
| 988 | +} |
|---|
| 989 | + |
|---|
| 990 | +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) |
|---|
| 833 | 991 | |
|---|
| 834 | 992 | /** |
|---|
| 835 | 993 | * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio |
|---|
| .. | .. |
|---|
| 839 | 997 | * Pins pages from *iter and appends them to @bio's bvec array. The |
|---|
| 840 | 998 | * pages will have to be released using put_page() when done. |
|---|
| 841 | 999 | * For multi-segment *iter, this function only adds pages from the |
|---|
| 842 | | - * the next non-empty segment of the iov iterator. |
|---|
| 1000 | + * next non-empty segment of the iov iterator. |
|---|
| 843 | 1001 | */ |
|---|
| 844 | 1002 | static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) |
|---|
| 845 | 1003 | { |
|---|
| 846 | | - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; |
|---|
| 1004 | + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; |
|---|
| 1005 | + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; |
|---|
| 847 | 1006 | struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; |
|---|
| 848 | 1007 | struct page **pages = (struct page **)bv; |
|---|
| 1008 | + bool same_page = false; |
|---|
| 1009 | + ssize_t size, left; |
|---|
| 1010 | + unsigned len, i; |
|---|
| 849 | 1011 | size_t offset; |
|---|
| 850 | | - ssize_t size; |
|---|
| 1012 | + |
|---|
| 1013 | + /* |
|---|
| 1014 | + * Move page array up in the allocated memory for the bio vecs as far as |
|---|
| 1015 | + * possible so that we can start filling biovecs from the beginning |
|---|
| 1016 | + * without overwriting the temporary page array. |
|---|
| 1017 | + */ |
|---|
| 1018 | + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); |
|---|
| 1019 | + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); |
|---|
| 851 | 1020 | |
|---|
| 852 | 1021 | size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); |
|---|
| 853 | 1022 | if (unlikely(size <= 0)) |
|---|
| 854 | 1023 | return size ? size : -EFAULT; |
|---|
| 855 | | - idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; |
|---|
| 856 | 1024 | |
|---|
| 857 | | - /* |
|---|
| 858 | | - * Deep magic below: We need to walk the pinned pages backwards |
|---|
| 859 | | - * because we are abusing the space allocated for the bio_vecs |
|---|
| 860 | | - * for the page array. Because the bio_vecs are larger than the |
|---|
| 861 | | - * page pointers by definition this will always work. But it also |
|---|
| 862 | | - * means we can't use bio_add_page, so any changes to it's semantics |
|---|
| 863 | | - * need to be reflected here as well. |
|---|
| 864 | | - */ |
|---|
| 865 | | - bio->bi_iter.bi_size += size; |
|---|
| 866 | | - bio->bi_vcnt += nr_pages; |
|---|
| 1025 | + for (left = size, i = 0; left > 0; left -= len, i++) { |
|---|
| 1026 | + struct page *page = pages[i]; |
|---|
| 867 | 1027 | |
|---|
| 868 | | - while (idx--) { |
|---|
| 869 | | - bv[idx].bv_page = pages[idx]; |
|---|
| 870 | | - bv[idx].bv_len = PAGE_SIZE; |
|---|
| 871 | | - bv[idx].bv_offset = 0; |
|---|
| 1028 | + len = min_t(size_t, PAGE_SIZE - offset, left); |
|---|
| 1029 | + |
|---|
| 1030 | + if (__bio_try_merge_page(bio, page, len, offset, &same_page)) { |
|---|
| 1031 | + if (same_page) |
|---|
| 1032 | + put_page(page); |
|---|
| 1033 | + } else { |
|---|
| 1034 | + if (WARN_ON_ONCE(bio_full(bio, len))) { |
|---|
| 1035 | + bio_put_pages(pages + i, left, offset); |
|---|
| 1036 | + return -EINVAL; |
|---|
| 1037 | + } |
|---|
| 1038 | + __bio_add_page(bio, page, len, offset); |
|---|
| 1039 | + } |
|---|
| 1040 | + offset = 0; |
|---|
| 872 | 1041 | } |
|---|
| 873 | | - |
|---|
| 874 | | - bv[0].bv_offset += offset; |
|---|
| 875 | | - bv[0].bv_len -= offset; |
|---|
| 876 | | - bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; |
|---|
| 877 | 1042 | |
|---|
| 878 | 1043 | iov_iter_advance(iter, size); |
|---|
| 879 | 1044 | return 0; |
|---|
| 880 | 1045 | } |
|---|
| 881 | 1046 | |
|---|
| 1047 | +static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) |
|---|
| 1048 | +{ |
|---|
| 1049 | + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; |
|---|
| 1050 | + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; |
|---|
| 1051 | + struct request_queue *q = bio->bi_disk->queue; |
|---|
| 1052 | + unsigned int max_append_sectors = queue_max_zone_append_sectors(q); |
|---|
| 1053 | + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; |
|---|
| 1054 | + struct page **pages = (struct page **)bv; |
|---|
| 1055 | + ssize_t size, left; |
|---|
| 1056 | + unsigned len, i; |
|---|
| 1057 | + size_t offset; |
|---|
| 1058 | + int ret = 0; |
|---|
| 1059 | + |
|---|
| 1060 | + /* |
|---|
| 1061 | + * Move page array up in the allocated memory for the bio vecs as far as |
|---|
| 1062 | + * possible so that we can start filling biovecs from the beginning |
|---|
| 1063 | + * without overwriting the temporary page array. |
|---|
| 1064 | + */ |
|---|
| 1065 | + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); |
|---|
| 1066 | + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); |
|---|
| 1067 | + |
|---|
| 1068 | + size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); |
|---|
| 1069 | + if (unlikely(size <= 0)) |
|---|
| 1070 | + return size ? size : -EFAULT; |
|---|
| 1071 | + |
|---|
| 1072 | + for (left = size, i = 0; left > 0; left -= len, i++) { |
|---|
| 1073 | + struct page *page = pages[i]; |
|---|
| 1074 | + bool same_page = false; |
|---|
| 1075 | + |
|---|
| 1076 | + len = min_t(size_t, PAGE_SIZE - offset, left); |
|---|
| 1077 | + if (bio_add_hw_page(q, bio, page, len, offset, |
|---|
| 1078 | + max_append_sectors, &same_page) != len) { |
|---|
| 1079 | + bio_put_pages(pages + i, left, offset); |
|---|
| 1080 | + ret = -EINVAL; |
|---|
| 1081 | + break; |
|---|
| 1082 | + } |
|---|
| 1083 | + if (same_page) |
|---|
| 1084 | + put_page(page); |
|---|
| 1085 | + offset = 0; |
|---|
| 1086 | + } |
|---|
| 1087 | + |
|---|
| 1088 | + iov_iter_advance(iter, size - left); |
|---|
| 1089 | + return ret; |
|---|
| 1090 | +} |
|---|
| 1091 | + |
|---|
| 882 | 1092 | /** |
|---|
| 883 | | - * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio |
|---|
| 1093 | + * bio_iov_iter_get_pages - add user or kernel pages to a bio |
|---|
| 884 | 1094 | * @bio: bio to add pages to |
|---|
| 885 | | - * @iter: iov iterator describing the region to be mapped |
|---|
| 1095 | + * @iter: iov iterator describing the region to be added |
|---|
| 886 | 1096 | * |
|---|
| 887 | | - * Pins pages from *iter and appends them to @bio's bvec array. The |
|---|
| 888 | | - * pages will have to be released using put_page() when done. |
|---|
| 1097 | + * This takes either an iterator pointing to user memory, or one pointing to |
|---|
| 1098 | + * kernel pages (BVEC iterator). If we're adding user pages, we pin them and |
|---|
| 1099 | + * map them into the kernel. On IO completion, the caller should put those |
|---|
| 1100 | + * pages. If we're adding kernel pages, and the caller told us it's safe to |
|---|
| 1101 | + * do so, we just have to add the pages to the bio directly. We don't grab an |
|---|
| 1102 | + * extra reference to those pages (the user should already have that), and we |
|---|
| 1103 | + * don't put the page on IO completion. The caller needs to check if the bio is |
|---|
| 1104 | + * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be |
|---|
| 1105 | + * released. |
|---|
| 1106 | + * |
|---|
| 889 | 1107 | * The function tries, but does not guarantee, to pin as many pages as |
|---|
| 890 | | - * fit into the bio, or are requested in *iter, whatever is smaller. |
|---|
| 891 | | - * If MM encounters an error pinning the requested pages, it stops. |
|---|
| 892 | | - * Error is returned only if 0 pages could be pinned. |
|---|
| 1108 | + * fit into the bio, or are requested in @iter, whatever is smaller. If |
|---|
| 1109 | + * MM encounters an error pinning the requested pages, it stops. Error |
|---|
| 1110 | + * is returned only if 0 pages could be pinned. |
|---|
| 893 | 1111 | */ |
|---|
| 894 | 1112 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) |
|---|
| 895 | 1113 | { |
|---|
| 896 | | - unsigned short orig_vcnt = bio->bi_vcnt; |
|---|
| 1114 | + const bool is_bvec = iov_iter_is_bvec(iter); |
|---|
| 1115 | + int ret; |
|---|
| 1116 | + |
|---|
| 1117 | + if (WARN_ON_ONCE(bio->bi_vcnt)) |
|---|
| 1118 | + return -EINVAL; |
|---|
| 897 | 1119 | |
|---|
| 898 | 1120 | do { |
|---|
| 899 | | - int ret = __bio_iov_iter_get_pages(bio, iter); |
|---|
| 1121 | + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { |
|---|
| 1122 | + if (WARN_ON_ONCE(is_bvec)) |
|---|
| 1123 | + return -EINVAL; |
|---|
| 1124 | + ret = __bio_iov_append_get_pages(bio, iter); |
|---|
| 1125 | + } else { |
|---|
| 1126 | + if (is_bvec) |
|---|
| 1127 | + ret = __bio_iov_bvec_add_pages(bio, iter); |
|---|
| 1128 | + else |
|---|
| 1129 | + ret = __bio_iov_iter_get_pages(bio, iter); |
|---|
| 1130 | + } |
|---|
| 1131 | + } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); |
|---|
| 900 | 1132 | |
|---|
| 901 | | - if (unlikely(ret)) |
|---|
| 902 | | - return bio->bi_vcnt > orig_vcnt ? 0 : ret; |
|---|
| 903 | | - |
|---|
| 904 | | - } while (iov_iter_count(iter) && !bio_full(bio)); |
|---|
| 905 | | - |
|---|
| 906 | | - return 0; |
|---|
| 1133 | + if (is_bvec) |
|---|
| 1134 | + bio_set_flag(bio, BIO_NO_PAGE_REF); |
|---|
| 1135 | + return bio->bi_vcnt ? 0 : ret; |
|---|
| 907 | 1136 | } |
|---|
| 908 | 1137 | EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); |
|---|
| 909 | 1138 | |
|---|
| .. | .. |
|---|
| 926 | 1155 | int submit_bio_wait(struct bio *bio) |
|---|
| 927 | 1156 | { |
|---|
| 928 | 1157 | DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map); |
|---|
| 1158 | + unsigned long hang_check; |
|---|
| 929 | 1159 | |
|---|
| 930 | 1160 | bio->bi_private = &done; |
|---|
| 931 | 1161 | bio->bi_end_io = submit_bio_wait_endio; |
|---|
| 932 | 1162 | bio->bi_opf |= REQ_SYNC; |
|---|
| 933 | 1163 | submit_bio(bio); |
|---|
| 934 | | - wait_for_completion_io(&done); |
|---|
| 1164 | + |
|---|
| 1165 | + /* Prevent hang_check timer from firing at us during very long I/O */ |
|---|
| 1166 | + hang_check = sysctl_hung_task_timeout_secs; |
|---|
| 1167 | + if (hang_check) |
|---|
| 1168 | + while (!wait_for_completion_io_timeout(&done, |
|---|
| 1169 | + hang_check * (HZ/2))) |
|---|
| 1170 | + ; |
|---|
| 1171 | + else |
|---|
| 1172 | + wait_for_completion_io(&done); |
|---|
| 935 | 1173 | |
|---|
| 936 | 1174 | return blk_status_to_errno(bio->bi_status); |
|---|
| 937 | 1175 | } |
|---|
| .. | .. |
|---|
| 1043 | 1281 | } |
|---|
| 1044 | 1282 | EXPORT_SYMBOL(bio_list_copy_data); |
|---|
| 1045 | 1283 | |
|---|
| 1046 | | -struct bio_map_data { |
|---|
| 1047 | | - int is_our_pages; |
|---|
| 1048 | | - struct iov_iter iter; |
|---|
| 1049 | | - struct iovec iov[]; |
|---|
| 1050 | | -}; |
|---|
| 1051 | | - |
|---|
| 1052 | | -static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, |
|---|
| 1053 | | - gfp_t gfp_mask) |
|---|
| 1054 | | -{ |
|---|
| 1055 | | - struct bio_map_data *bmd; |
|---|
| 1056 | | - if (data->nr_segs > UIO_MAXIOV) |
|---|
| 1057 | | - return NULL; |
|---|
| 1058 | | - |
|---|
| 1059 | | - bmd = kmalloc(sizeof(struct bio_map_data) + |
|---|
| 1060 | | - sizeof(struct iovec) * data->nr_segs, gfp_mask); |
|---|
| 1061 | | - if (!bmd) |
|---|
| 1062 | | - return NULL; |
|---|
| 1063 | | - memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); |
|---|
| 1064 | | - bmd->iter = *data; |
|---|
| 1065 | | - bmd->iter.iov = bmd->iov; |
|---|
| 1066 | | - return bmd; |
|---|
| 1067 | | -} |
|---|
| 1068 | | - |
|---|
| 1069 | | -/** |
|---|
| 1070 | | - * bio_copy_from_iter - copy all pages from iov_iter to bio |
|---|
| 1071 | | - * @bio: The &struct bio which describes the I/O as destination |
|---|
| 1072 | | - * @iter: iov_iter as source |
|---|
| 1073 | | - * |
|---|
| 1074 | | - * Copy all pages from iov_iter to bio. |
|---|
| 1075 | | - * Returns 0 on success, or error on failure. |
|---|
| 1076 | | - */ |
|---|
| 1077 | | -static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) |
|---|
| 1078 | | -{ |
|---|
| 1079 | | - int i; |
|---|
| 1080 | | - struct bio_vec *bvec; |
|---|
| 1081 | | - |
|---|
| 1082 | | - bio_for_each_segment_all(bvec, bio, i) { |
|---|
| 1083 | | - ssize_t ret; |
|---|
| 1084 | | - |
|---|
| 1085 | | - ret = copy_page_from_iter(bvec->bv_page, |
|---|
| 1086 | | - bvec->bv_offset, |
|---|
| 1087 | | - bvec->bv_len, |
|---|
| 1088 | | - iter); |
|---|
| 1089 | | - |
|---|
| 1090 | | - if (!iov_iter_count(iter)) |
|---|
| 1091 | | - break; |
|---|
| 1092 | | - |
|---|
| 1093 | | - if (ret < bvec->bv_len) |
|---|
| 1094 | | - return -EFAULT; |
|---|
| 1095 | | - } |
|---|
| 1096 | | - |
|---|
| 1097 | | - return 0; |
|---|
| 1098 | | -} |
|---|
| 1099 | | - |
|---|
| 1100 | | -/** |
|---|
| 1101 | | - * bio_copy_to_iter - copy all pages from bio to iov_iter |
|---|
| 1102 | | - * @bio: The &struct bio which describes the I/O as source |
|---|
| 1103 | | - * @iter: iov_iter as destination |
|---|
| 1104 | | - * |
|---|
| 1105 | | - * Copy all pages from bio to iov_iter. |
|---|
| 1106 | | - * Returns 0 on success, or error on failure. |
|---|
| 1107 | | - */ |
|---|
| 1108 | | -static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) |
|---|
| 1109 | | -{ |
|---|
| 1110 | | - int i; |
|---|
| 1111 | | - struct bio_vec *bvec; |
|---|
| 1112 | | - |
|---|
| 1113 | | - bio_for_each_segment_all(bvec, bio, i) { |
|---|
| 1114 | | - ssize_t ret; |
|---|
| 1115 | | - |
|---|
| 1116 | | - ret = copy_page_to_iter(bvec->bv_page, |
|---|
| 1117 | | - bvec->bv_offset, |
|---|
| 1118 | | - bvec->bv_len, |
|---|
| 1119 | | - &iter); |
|---|
| 1120 | | - |
|---|
| 1121 | | - if (!iov_iter_count(&iter)) |
|---|
| 1122 | | - break; |
|---|
| 1123 | | - |
|---|
| 1124 | | - if (ret < bvec->bv_len) |
|---|
| 1125 | | - return -EFAULT; |
|---|
| 1126 | | - } |
|---|
| 1127 | | - |
|---|
| 1128 | | - return 0; |
|---|
| 1129 | | -} |
|---|
| 1130 | | - |
|---|
| 1131 | 1284 | void bio_free_pages(struct bio *bio) |
|---|
| 1132 | 1285 | { |
|---|
| 1133 | 1286 | struct bio_vec *bvec; |
|---|
| 1134 | | - int i; |
|---|
| 1287 | + struct bvec_iter_all iter_all; |
|---|
| 1135 | 1288 | |
|---|
| 1136 | | - bio_for_each_segment_all(bvec, bio, i) |
|---|
| 1289 | + bio_for_each_segment_all(bvec, bio, iter_all) |
|---|
| 1137 | 1290 | __free_page(bvec->bv_page); |
|---|
| 1138 | 1291 | } |
|---|
| 1139 | 1292 | EXPORT_SYMBOL(bio_free_pages); |
|---|
| 1140 | | - |
|---|
| 1141 | | -/** |
|---|
| 1142 | | - * bio_uncopy_user - finish previously mapped bio |
|---|
| 1143 | | - * @bio: bio being terminated |
|---|
| 1144 | | - * |
|---|
| 1145 | | - * Free pages allocated from bio_copy_user_iov() and write back data |
|---|
| 1146 | | - * to user space in case of a read. |
|---|
| 1147 | | - */ |
|---|
| 1148 | | -int bio_uncopy_user(struct bio *bio) |
|---|
| 1149 | | -{ |
|---|
| 1150 | | - struct bio_map_data *bmd = bio->bi_private; |
|---|
| 1151 | | - int ret = 0; |
|---|
| 1152 | | - |
|---|
| 1153 | | - if (!bio_flagged(bio, BIO_NULL_MAPPED)) { |
|---|
| 1154 | | - /* |
|---|
| 1155 | | - * if we're in a workqueue, the request is orphaned, so |
|---|
| 1156 | | - * don't copy into a random user address space, just free |
|---|
| 1157 | | - * and return -EINTR so user space doesn't expect any data. |
|---|
| 1158 | | - */ |
|---|
| 1159 | | - if (!current->mm) |
|---|
| 1160 | | - ret = -EINTR; |
|---|
| 1161 | | - else if (bio_data_dir(bio) == READ) |
|---|
| 1162 | | - ret = bio_copy_to_iter(bio, bmd->iter); |
|---|
| 1163 | | - if (bmd->is_our_pages) |
|---|
| 1164 | | - bio_free_pages(bio); |
|---|
| 1165 | | - } |
|---|
| 1166 | | - kfree(bmd); |
|---|
| 1167 | | - bio_put(bio); |
|---|
| 1168 | | - return ret; |
|---|
| 1169 | | -} |
|---|
| 1170 | | - |
|---|
| 1171 | | -/** |
|---|
| 1172 | | - * bio_copy_user_iov - copy user data to bio |
|---|
| 1173 | | - * @q: destination block queue |
|---|
| 1174 | | - * @map_data: pointer to the rq_map_data holding pages (if necessary) |
|---|
| 1175 | | - * @iter: iovec iterator |
|---|
| 1176 | | - * @gfp_mask: memory allocation flags |
|---|
| 1177 | | - * |
|---|
| 1178 | | - * Prepares and returns a bio for indirect user io, bouncing data |
|---|
| 1179 | | - * to/from kernel pages as necessary. Must be paired with |
|---|
| 1180 | | - * call bio_uncopy_user() on io completion. |
|---|
| 1181 | | - */ |
|---|
| 1182 | | -struct bio *bio_copy_user_iov(struct request_queue *q, |
|---|
| 1183 | | - struct rq_map_data *map_data, |
|---|
| 1184 | | - struct iov_iter *iter, |
|---|
| 1185 | | - gfp_t gfp_mask) |
|---|
| 1186 | | -{ |
|---|
| 1187 | | - struct bio_map_data *bmd; |
|---|
| 1188 | | - struct page *page; |
|---|
| 1189 | | - struct bio *bio; |
|---|
| 1190 | | - int i = 0, ret; |
|---|
| 1191 | | - int nr_pages; |
|---|
| 1192 | | - unsigned int len = iter->count; |
|---|
| 1193 | | - unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; |
|---|
| 1194 | | - |
|---|
| 1195 | | - bmd = bio_alloc_map_data(iter, gfp_mask); |
|---|
| 1196 | | - if (!bmd) |
|---|
| 1197 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1198 | | - |
|---|
| 1199 | | - /* |
|---|
| 1200 | | - * We need to do a deep copy of the iov_iter including the iovecs. |
|---|
| 1201 | | - * The caller provided iov might point to an on-stack or otherwise |
|---|
| 1202 | | - * shortlived one. |
|---|
| 1203 | | - */ |
|---|
| 1204 | | - bmd->is_our_pages = map_data ? 0 : 1; |
|---|
| 1205 | | - |
|---|
| 1206 | | - nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); |
|---|
| 1207 | | - if (nr_pages > BIO_MAX_PAGES) |
|---|
| 1208 | | - nr_pages = BIO_MAX_PAGES; |
|---|
| 1209 | | - |
|---|
| 1210 | | - ret = -ENOMEM; |
|---|
| 1211 | | - bio = bio_kmalloc(gfp_mask, nr_pages); |
|---|
| 1212 | | - if (!bio) |
|---|
| 1213 | | - goto out_bmd; |
|---|
| 1214 | | - |
|---|
| 1215 | | - ret = 0; |
|---|
| 1216 | | - |
|---|
| 1217 | | - if (map_data) { |
|---|
| 1218 | | - nr_pages = 1 << map_data->page_order; |
|---|
| 1219 | | - i = map_data->offset / PAGE_SIZE; |
|---|
| 1220 | | - } |
|---|
| 1221 | | - while (len) { |
|---|
| 1222 | | - unsigned int bytes = PAGE_SIZE; |
|---|
| 1223 | | - |
|---|
| 1224 | | - bytes -= offset; |
|---|
| 1225 | | - |
|---|
| 1226 | | - if (bytes > len) |
|---|
| 1227 | | - bytes = len; |
|---|
| 1228 | | - |
|---|
| 1229 | | - if (map_data) { |
|---|
| 1230 | | - if (i == map_data->nr_entries * nr_pages) { |
|---|
| 1231 | | - ret = -ENOMEM; |
|---|
| 1232 | | - break; |
|---|
| 1233 | | - } |
|---|
| 1234 | | - |
|---|
| 1235 | | - page = map_data->pages[i / nr_pages]; |
|---|
| 1236 | | - page += (i % nr_pages); |
|---|
| 1237 | | - |
|---|
| 1238 | | - i++; |
|---|
| 1239 | | - } else { |
|---|
| 1240 | | - page = alloc_page(q->bounce_gfp | gfp_mask); |
|---|
| 1241 | | - if (!page) { |
|---|
| 1242 | | - ret = -ENOMEM; |
|---|
| 1243 | | - break; |
|---|
| 1244 | | - } |
|---|
| 1245 | | - } |
|---|
| 1246 | | - |
|---|
| 1247 | | - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { |
|---|
| 1248 | | - if (!map_data) |
|---|
| 1249 | | - __free_page(page); |
|---|
| 1250 | | - break; |
|---|
| 1251 | | - } |
|---|
| 1252 | | - |
|---|
| 1253 | | - len -= bytes; |
|---|
| 1254 | | - offset = 0; |
|---|
| 1255 | | - } |
|---|
| 1256 | | - |
|---|
| 1257 | | - if (ret) |
|---|
| 1258 | | - goto cleanup; |
|---|
| 1259 | | - |
|---|
| 1260 | | - if (map_data) |
|---|
| 1261 | | - map_data->offset += bio->bi_iter.bi_size; |
|---|
| 1262 | | - |
|---|
| 1263 | | - /* |
|---|
| 1264 | | - * success |
|---|
| 1265 | | - */ |
|---|
| 1266 | | - if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || |
|---|
| 1267 | | - (map_data && map_data->from_user)) { |
|---|
| 1268 | | - ret = bio_copy_from_iter(bio, iter); |
|---|
| 1269 | | - if (ret) |
|---|
| 1270 | | - goto cleanup; |
|---|
| 1271 | | - } else { |
|---|
| 1272 | | - if (bmd->is_our_pages) |
|---|
| 1273 | | - zero_fill_bio(bio); |
|---|
| 1274 | | - iov_iter_advance(iter, bio->bi_iter.bi_size); |
|---|
| 1275 | | - } |
|---|
| 1276 | | - |
|---|
| 1277 | | - bio->bi_private = bmd; |
|---|
| 1278 | | - if (map_data && map_data->null_mapped) |
|---|
| 1279 | | - bio_set_flag(bio, BIO_NULL_MAPPED); |
|---|
| 1280 | | - return bio; |
|---|
| 1281 | | -cleanup: |
|---|
| 1282 | | - if (!map_data) |
|---|
| 1283 | | - bio_free_pages(bio); |
|---|
| 1284 | | - bio_put(bio); |
|---|
| 1285 | | -out_bmd: |
|---|
| 1286 | | - kfree(bmd); |
|---|
| 1287 | | - return ERR_PTR(ret); |
|---|
| 1288 | | -} |
|---|
| 1289 | | - |
|---|
| 1290 | | -/** |
|---|
| 1291 | | - * bio_map_user_iov - map user iovec into bio |
|---|
| 1292 | | - * @q: the struct request_queue for the bio |
|---|
| 1293 | | - * @iter: iovec iterator |
|---|
| 1294 | | - * @gfp_mask: memory allocation flags |
|---|
| 1295 | | - * |
|---|
| 1296 | | - * Map the user space address into a bio suitable for io to a block |
|---|
| 1297 | | - * device. Returns an error pointer in case of error. |
|---|
| 1298 | | - */ |
|---|
| 1299 | | -struct bio *bio_map_user_iov(struct request_queue *q, |
|---|
| 1300 | | - struct iov_iter *iter, |
|---|
| 1301 | | - gfp_t gfp_mask) |
|---|
| 1302 | | -{ |
|---|
| 1303 | | - int j; |
|---|
| 1304 | | - struct bio *bio; |
|---|
| 1305 | | - int ret; |
|---|
| 1306 | | - struct bio_vec *bvec; |
|---|
| 1307 | | - |
|---|
| 1308 | | - if (!iov_iter_count(iter)) |
|---|
| 1309 | | - return ERR_PTR(-EINVAL); |
|---|
| 1310 | | - |
|---|
| 1311 | | - bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); |
|---|
| 1312 | | - if (!bio) |
|---|
| 1313 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1314 | | - |
|---|
| 1315 | | - while (iov_iter_count(iter)) { |
|---|
| 1316 | | - struct page **pages; |
|---|
| 1317 | | - ssize_t bytes; |
|---|
| 1318 | | - size_t offs, added = 0; |
|---|
| 1319 | | - int npages; |
|---|
| 1320 | | - |
|---|
| 1321 | | - bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); |
|---|
| 1322 | | - if (unlikely(bytes <= 0)) { |
|---|
| 1323 | | - ret = bytes ? bytes : -EFAULT; |
|---|
| 1324 | | - goto out_unmap; |
|---|
| 1325 | | - } |
|---|
| 1326 | | - |
|---|
| 1327 | | - npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); |
|---|
| 1328 | | - |
|---|
| 1329 | | - if (unlikely(offs & queue_dma_alignment(q))) { |
|---|
| 1330 | | - ret = -EINVAL; |
|---|
| 1331 | | - j = 0; |
|---|
| 1332 | | - } else { |
|---|
| 1333 | | - for (j = 0; j < npages; j++) { |
|---|
| 1334 | | - struct page *page = pages[j]; |
|---|
| 1335 | | - unsigned int n = PAGE_SIZE - offs; |
|---|
| 1336 | | - unsigned short prev_bi_vcnt = bio->bi_vcnt; |
|---|
| 1337 | | - |
|---|
| 1338 | | - if (n > bytes) |
|---|
| 1339 | | - n = bytes; |
|---|
| 1340 | | - |
|---|
| 1341 | | - if (!bio_add_pc_page(q, bio, page, n, offs)) |
|---|
| 1342 | | - break; |
|---|
| 1343 | | - |
|---|
| 1344 | | - /* |
|---|
| 1345 | | - * check if vector was merged with previous |
|---|
| 1346 | | - * drop page reference if needed |
|---|
| 1347 | | - */ |
|---|
| 1348 | | - if (bio->bi_vcnt == prev_bi_vcnt) |
|---|
| 1349 | | - put_page(page); |
|---|
| 1350 | | - |
|---|
| 1351 | | - added += n; |
|---|
| 1352 | | - bytes -= n; |
|---|
| 1353 | | - offs = 0; |
|---|
| 1354 | | - } |
|---|
| 1355 | | - iov_iter_advance(iter, added); |
|---|
| 1356 | | - } |
|---|
| 1357 | | - /* |
|---|
| 1358 | | - * release the pages we didn't map into the bio, if any |
|---|
| 1359 | | - */ |
|---|
| 1360 | | - while (j < npages) |
|---|
| 1361 | | - put_page(pages[j++]); |
|---|
| 1362 | | - kvfree(pages); |
|---|
| 1363 | | - /* couldn't stuff something into bio? */ |
|---|
| 1364 | | - if (bytes) |
|---|
| 1365 | | - break; |
|---|
| 1366 | | - } |
|---|
| 1367 | | - |
|---|
| 1368 | | - bio_set_flag(bio, BIO_USER_MAPPED); |
|---|
| 1369 | | - |
|---|
| 1370 | | - /* |
|---|
| 1371 | | - * subtle -- if bio_map_user_iov() ended up bouncing a bio, |
|---|
| 1372 | | - * it would normally disappear when its bi_end_io is run. |
|---|
| 1373 | | - * however, we need it for the unmap, so grab an extra |
|---|
| 1374 | | - * reference to it |
|---|
| 1375 | | - */ |
|---|
| 1376 | | - bio_get(bio); |
|---|
| 1377 | | - return bio; |
|---|
| 1378 | | - |
|---|
| 1379 | | - out_unmap: |
|---|
| 1380 | | - bio_for_each_segment_all(bvec, bio, j) { |
|---|
| 1381 | | - put_page(bvec->bv_page); |
|---|
| 1382 | | - } |
|---|
| 1383 | | - bio_put(bio); |
|---|
| 1384 | | - return ERR_PTR(ret); |
|---|
| 1385 | | -} |
|---|
| 1386 | | - |
|---|
| 1387 | | -static void __bio_unmap_user(struct bio *bio) |
|---|
| 1388 | | -{ |
|---|
| 1389 | | - struct bio_vec *bvec; |
|---|
| 1390 | | - int i; |
|---|
| 1391 | | - |
|---|
| 1392 | | - /* |
|---|
| 1393 | | - * make sure we dirty pages we wrote to |
|---|
| 1394 | | - */ |
|---|
| 1395 | | - bio_for_each_segment_all(bvec, bio, i) { |
|---|
| 1396 | | - if (bio_data_dir(bio) == READ) |
|---|
| 1397 | | - set_page_dirty_lock(bvec->bv_page); |
|---|
| 1398 | | - |
|---|
| 1399 | | - put_page(bvec->bv_page); |
|---|
| 1400 | | - } |
|---|
| 1401 | | - |
|---|
| 1402 | | - bio_put(bio); |
|---|
| 1403 | | -} |
|---|
| 1404 | | - |
|---|
| 1405 | | -/** |
|---|
| 1406 | | - * bio_unmap_user - unmap a bio |
|---|
| 1407 | | - * @bio: the bio being unmapped |
|---|
| 1408 | | - * |
|---|
| 1409 | | - * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from |
|---|
| 1410 | | - * process context. |
|---|
| 1411 | | - * |
|---|
| 1412 | | - * bio_unmap_user() may sleep. |
|---|
| 1413 | | - */ |
|---|
| 1414 | | -void bio_unmap_user(struct bio *bio) |
|---|
| 1415 | | -{ |
|---|
| 1416 | | - __bio_unmap_user(bio); |
|---|
| 1417 | | - bio_put(bio); |
|---|
| 1418 | | -} |
|---|
| 1419 | | - |
|---|
| 1420 | | -static void bio_map_kern_endio(struct bio *bio) |
|---|
| 1421 | | -{ |
|---|
| 1422 | | - bio_put(bio); |
|---|
| 1423 | | -} |
|---|
| 1424 | | - |
|---|
| 1425 | | -/** |
|---|
| 1426 | | - * bio_map_kern - map kernel address into bio |
|---|
| 1427 | | - * @q: the struct request_queue for the bio |
|---|
| 1428 | | - * @data: pointer to buffer to map |
|---|
| 1429 | | - * @len: length in bytes |
|---|
| 1430 | | - * @gfp_mask: allocation flags for bio allocation |
|---|
| 1431 | | - * |
|---|
| 1432 | | - * Map the kernel address into a bio suitable for io to a block |
|---|
| 1433 | | - * device. Returns an error pointer in case of error. |
|---|
| 1434 | | - */ |
|---|
| 1435 | | -struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, |
|---|
| 1436 | | - gfp_t gfp_mask) |
|---|
| 1437 | | -{ |
|---|
| 1438 | | - unsigned long kaddr = (unsigned long)data; |
|---|
| 1439 | | - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
|---|
| 1440 | | - unsigned long start = kaddr >> PAGE_SHIFT; |
|---|
| 1441 | | - const int nr_pages = end - start; |
|---|
| 1442 | | - int offset, i; |
|---|
| 1443 | | - struct bio *bio; |
|---|
| 1444 | | - |
|---|
| 1445 | | - bio = bio_kmalloc(gfp_mask, nr_pages); |
|---|
| 1446 | | - if (!bio) |
|---|
| 1447 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1448 | | - |
|---|
| 1449 | | - offset = offset_in_page(kaddr); |
|---|
| 1450 | | - for (i = 0; i < nr_pages; i++) { |
|---|
| 1451 | | - unsigned int bytes = PAGE_SIZE - offset; |
|---|
| 1452 | | - |
|---|
| 1453 | | - if (len <= 0) |
|---|
| 1454 | | - break; |
|---|
| 1455 | | - |
|---|
| 1456 | | - if (bytes > len) |
|---|
| 1457 | | - bytes = len; |
|---|
| 1458 | | - |
|---|
| 1459 | | - if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, |
|---|
| 1460 | | - offset) < bytes) { |
|---|
| 1461 | | - /* we don't support partial mappings */ |
|---|
| 1462 | | - bio_put(bio); |
|---|
| 1463 | | - return ERR_PTR(-EINVAL); |
|---|
| 1464 | | - } |
|---|
| 1465 | | - |
|---|
| 1466 | | - data += bytes; |
|---|
| 1467 | | - len -= bytes; |
|---|
| 1468 | | - offset = 0; |
|---|
| 1469 | | - } |
|---|
| 1470 | | - |
|---|
| 1471 | | - bio->bi_end_io = bio_map_kern_endio; |
|---|
| 1472 | | - return bio; |
|---|
| 1473 | | -} |
|---|
| 1474 | | -EXPORT_SYMBOL(bio_map_kern); |
|---|
| 1475 | | - |
|---|
| 1476 | | -static void bio_copy_kern_endio(struct bio *bio) |
|---|
| 1477 | | -{ |
|---|
| 1478 | | - bio_free_pages(bio); |
|---|
| 1479 | | - bio_put(bio); |
|---|
| 1480 | | -} |
|---|
| 1481 | | - |
|---|
| 1482 | | -static void bio_copy_kern_endio_read(struct bio *bio) |
|---|
| 1483 | | -{ |
|---|
| 1484 | | - char *p = bio->bi_private; |
|---|
| 1485 | | - struct bio_vec *bvec; |
|---|
| 1486 | | - int i; |
|---|
| 1487 | | - |
|---|
| 1488 | | - bio_for_each_segment_all(bvec, bio, i) { |
|---|
| 1489 | | - memcpy(p, page_address(bvec->bv_page), bvec->bv_len); |
|---|
| 1490 | | - p += bvec->bv_len; |
|---|
| 1491 | | - } |
|---|
| 1492 | | - |
|---|
| 1493 | | - bio_copy_kern_endio(bio); |
|---|
| 1494 | | -} |
|---|
| 1495 | | - |
|---|
| 1496 | | -/** |
|---|
| 1497 | | - * bio_copy_kern - copy kernel address into bio |
|---|
| 1498 | | - * @q: the struct request_queue for the bio |
|---|
| 1499 | | - * @data: pointer to buffer to copy |
|---|
| 1500 | | - * @len: length in bytes |
|---|
| 1501 | | - * @gfp_mask: allocation flags for bio and page allocation |
|---|
| 1502 | | - * @reading: data direction is READ |
|---|
| 1503 | | - * |
|---|
| 1504 | | - * copy the kernel address into a bio suitable for io to a block |
|---|
| 1505 | | - * device. Returns an error pointer in case of error. |
|---|
| 1506 | | - */ |
|---|
| 1507 | | -struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
|---|
| 1508 | | - gfp_t gfp_mask, int reading) |
|---|
| 1509 | | -{ |
|---|
| 1510 | | - unsigned long kaddr = (unsigned long)data; |
|---|
| 1511 | | - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
|---|
| 1512 | | - unsigned long start = kaddr >> PAGE_SHIFT; |
|---|
| 1513 | | - struct bio *bio; |
|---|
| 1514 | | - void *p = data; |
|---|
| 1515 | | - int nr_pages = 0; |
|---|
| 1516 | | - |
|---|
| 1517 | | - /* |
|---|
| 1518 | | - * Overflow, abort |
|---|
| 1519 | | - */ |
|---|
| 1520 | | - if (end < start) |
|---|
| 1521 | | - return ERR_PTR(-EINVAL); |
|---|
| 1522 | | - |
|---|
| 1523 | | - nr_pages = end - start; |
|---|
| 1524 | | - bio = bio_kmalloc(gfp_mask, nr_pages); |
|---|
| 1525 | | - if (!bio) |
|---|
| 1526 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1527 | | - |
|---|
| 1528 | | - while (len) { |
|---|
| 1529 | | - struct page *page; |
|---|
| 1530 | | - unsigned int bytes = PAGE_SIZE; |
|---|
| 1531 | | - |
|---|
| 1532 | | - if (bytes > len) |
|---|
| 1533 | | - bytes = len; |
|---|
| 1534 | | - |
|---|
| 1535 | | - page = alloc_page(q->bounce_gfp | gfp_mask); |
|---|
| 1536 | | - if (!page) |
|---|
| 1537 | | - goto cleanup; |
|---|
| 1538 | | - |
|---|
| 1539 | | - if (!reading) |
|---|
| 1540 | | - memcpy(page_address(page), p, bytes); |
|---|
| 1541 | | - |
|---|
| 1542 | | - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) |
|---|
| 1543 | | - break; |
|---|
| 1544 | | - |
|---|
| 1545 | | - len -= bytes; |
|---|
| 1546 | | - p += bytes; |
|---|
| 1547 | | - } |
|---|
| 1548 | | - |
|---|
| 1549 | | - if (reading) { |
|---|
| 1550 | | - bio->bi_end_io = bio_copy_kern_endio_read; |
|---|
| 1551 | | - bio->bi_private = data; |
|---|
| 1552 | | - } else { |
|---|
| 1553 | | - bio->bi_end_io = bio_copy_kern_endio; |
|---|
| 1554 | | - } |
|---|
| 1555 | | - |
|---|
| 1556 | | - return bio; |
|---|
| 1557 | | - |
|---|
| 1558 | | -cleanup: |
|---|
| 1559 | | - bio_free_pages(bio); |
|---|
| 1560 | | - bio_put(bio); |
|---|
| 1561 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1562 | | -} |
|---|
| 1563 | 1293 | |
|---|
| 1564 | 1294 | /* |
|---|
| 1565 | 1295 | * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions |
|---|
| .. | .. |
|---|
| 1593 | 1323 | void bio_set_pages_dirty(struct bio *bio) |
|---|
| 1594 | 1324 | { |
|---|
| 1595 | 1325 | struct bio_vec *bvec; |
|---|
| 1596 | | - int i; |
|---|
| 1326 | + struct bvec_iter_all iter_all; |
|---|
| 1597 | 1327 | |
|---|
| 1598 | | - bio_for_each_segment_all(bvec, bio, i) { |
|---|
| 1328 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
|---|
| 1599 | 1329 | if (!PageCompound(bvec->bv_page)) |
|---|
| 1600 | 1330 | set_page_dirty_lock(bvec->bv_page); |
|---|
| 1601 | 1331 | } |
|---|
| 1602 | | -} |
|---|
| 1603 | | -EXPORT_SYMBOL_GPL(bio_set_pages_dirty); |
|---|
| 1604 | | - |
|---|
| 1605 | | -static void bio_release_pages(struct bio *bio) |
|---|
| 1606 | | -{ |
|---|
| 1607 | | - struct bio_vec *bvec; |
|---|
| 1608 | | - int i; |
|---|
| 1609 | | - |
|---|
| 1610 | | - bio_for_each_segment_all(bvec, bio, i) |
|---|
| 1611 | | - put_page(bvec->bv_page); |
|---|
| 1612 | 1332 | } |
|---|
| 1613 | 1333 | |
|---|
| 1614 | 1334 | /* |
|---|
| .. | .. |
|---|
| 1643 | 1363 | while ((bio = next) != NULL) { |
|---|
| 1644 | 1364 | next = bio->bi_private; |
|---|
| 1645 | 1365 | |
|---|
| 1646 | | - bio_set_pages_dirty(bio); |
|---|
| 1647 | | - bio_release_pages(bio); |
|---|
| 1366 | + bio_release_pages(bio, true); |
|---|
| 1648 | 1367 | bio_put(bio); |
|---|
| 1649 | 1368 | } |
|---|
| 1650 | 1369 | } |
|---|
| .. | .. |
|---|
| 1653 | 1372 | { |
|---|
| 1654 | 1373 | struct bio_vec *bvec; |
|---|
| 1655 | 1374 | unsigned long flags; |
|---|
| 1656 | | - int i; |
|---|
| 1375 | + struct bvec_iter_all iter_all; |
|---|
| 1657 | 1376 | |
|---|
| 1658 | | - bio_for_each_segment_all(bvec, bio, i) { |
|---|
| 1377 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
|---|
| 1659 | 1378 | if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page)) |
|---|
| 1660 | 1379 | goto defer; |
|---|
| 1661 | 1380 | } |
|---|
| 1662 | 1381 | |
|---|
| 1663 | | - bio_release_pages(bio); |
|---|
| 1382 | + bio_release_pages(bio, false); |
|---|
| 1664 | 1383 | bio_put(bio); |
|---|
| 1665 | 1384 | return; |
|---|
| 1666 | 1385 | defer: |
|---|
| .. | .. |
|---|
| 1670 | 1389 | spin_unlock_irqrestore(&bio_dirty_lock, flags); |
|---|
| 1671 | 1390 | schedule_work(&bio_dirty_work); |
|---|
| 1672 | 1391 | } |
|---|
| 1673 | | -EXPORT_SYMBOL_GPL(bio_check_pages_dirty); |
|---|
| 1674 | | - |
|---|
| 1675 | | -void generic_start_io_acct(struct request_queue *q, int op, |
|---|
| 1676 | | - unsigned long sectors, struct hd_struct *part) |
|---|
| 1677 | | -{ |
|---|
| 1678 | | - const int sgrp = op_stat_group(op); |
|---|
| 1679 | | - int cpu = part_stat_lock(); |
|---|
| 1680 | | - |
|---|
| 1681 | | - part_round_stats(q, cpu, part); |
|---|
| 1682 | | - part_stat_inc(cpu, part, ios[sgrp]); |
|---|
| 1683 | | - part_stat_add(cpu, part, sectors[sgrp], sectors); |
|---|
| 1684 | | - part_inc_in_flight(q, part, op_is_write(op)); |
|---|
| 1685 | | - |
|---|
| 1686 | | - part_stat_unlock(); |
|---|
| 1687 | | -} |
|---|
| 1688 | | -EXPORT_SYMBOL(generic_start_io_acct); |
|---|
| 1689 | | - |
|---|
| 1690 | | -void generic_end_io_acct(struct request_queue *q, int req_op, |
|---|
| 1691 | | - struct hd_struct *part, unsigned long start_time) |
|---|
| 1692 | | -{ |
|---|
| 1693 | | - unsigned long duration = jiffies - start_time; |
|---|
| 1694 | | - const int sgrp = op_stat_group(req_op); |
|---|
| 1695 | | - int cpu = part_stat_lock(); |
|---|
| 1696 | | - |
|---|
| 1697 | | - part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); |
|---|
| 1698 | | - part_round_stats(q, cpu, part); |
|---|
| 1699 | | - part_dec_in_flight(q, part, op_is_write(req_op)); |
|---|
| 1700 | | - |
|---|
| 1701 | | - part_stat_unlock(); |
|---|
| 1702 | | -} |
|---|
| 1703 | | -EXPORT_SYMBOL(generic_end_io_acct); |
|---|
| 1704 | | - |
|---|
| 1705 | | -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
|---|
| 1706 | | -void bio_flush_dcache_pages(struct bio *bi) |
|---|
| 1707 | | -{ |
|---|
| 1708 | | - struct bio_vec bvec; |
|---|
| 1709 | | - struct bvec_iter iter; |
|---|
| 1710 | | - |
|---|
| 1711 | | - bio_for_each_segment(bvec, bi, iter) |
|---|
| 1712 | | - flush_dcache_page(bvec.bv_page); |
|---|
| 1713 | | -} |
|---|
| 1714 | | -EXPORT_SYMBOL(bio_flush_dcache_pages); |
|---|
| 1715 | | -#endif |
|---|
| 1716 | 1392 | |
|---|
| 1717 | 1393 | static inline bool bio_remaining_done(struct bio *bio) |
|---|
| 1718 | 1394 | { |
|---|
| .. | .. |
|---|
| 1752 | 1428 | again: |
|---|
| 1753 | 1429 | if (!bio_remaining_done(bio)) |
|---|
| 1754 | 1430 | return; |
|---|
| 1755 | | - |
|---|
| 1756 | | - if (!blk_crypto_endio(bio)) |
|---|
| 1757 | | - return; |
|---|
| 1758 | | - |
|---|
| 1759 | 1431 | if (!bio_integrity_endio(bio)) |
|---|
| 1760 | 1432 | return; |
|---|
| 1761 | 1433 | |
|---|
| .. | .. |
|---|
| 1776 | 1448 | } |
|---|
| 1777 | 1449 | |
|---|
| 1778 | 1450 | if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) { |
|---|
| 1779 | | - trace_block_bio_complete(bio->bi_disk->queue, bio, |
|---|
| 1780 | | - blk_status_to_errno(bio->bi_status)); |
|---|
| 1451 | + trace_block_bio_complete(bio->bi_disk->queue, bio); |
|---|
| 1781 | 1452 | bio_clear_flag(bio, BIO_TRACE_COMPLETION); |
|---|
| 1782 | 1453 | } |
|---|
| 1783 | 1454 | |
|---|
| .. | .. |
|---|
| 1800 | 1471 | * @bio, and updates @bio to represent the remaining sectors. |
|---|
| 1801 | 1472 | * |
|---|
| 1802 | 1473 | * Unless this is a discard request the newly allocated bio will point |
|---|
| 1803 | | - * to @bio's bi_io_vec; it is the caller's responsibility to ensure that |
|---|
| 1804 | | - * @bio is not freed before the split. |
|---|
| 1474 | + * to @bio's bi_io_vec. It is the caller's responsibility to ensure that |
|---|
| 1475 | + * neither @bio nor @bs are freed before the split bio. |
|---|
| 1805 | 1476 | */ |
|---|
| 1806 | 1477 | struct bio *bio_split(struct bio *bio, int sectors, |
|---|
| 1807 | 1478 | gfp_t gfp, struct bio_set *bs) |
|---|
| .. | .. |
|---|
| 1810 | 1481 | |
|---|
| 1811 | 1482 | BUG_ON(sectors <= 0); |
|---|
| 1812 | 1483 | BUG_ON(sectors >= bio_sectors(bio)); |
|---|
| 1484 | + |
|---|
| 1485 | + /* Zone append commands cannot be split */ |
|---|
| 1486 | + if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND)) |
|---|
| 1487 | + return NULL; |
|---|
| 1813 | 1488 | |
|---|
| 1814 | 1489 | split = bio_clone_fast(bio, gfp, bs); |
|---|
| 1815 | 1490 | if (!split) |
|---|
| .. | .. |
|---|
| 1821 | 1496 | bio_integrity_trim(split); |
|---|
| 1822 | 1497 | |
|---|
| 1823 | 1498 | bio_advance(bio, split->bi_iter.bi_size); |
|---|
| 1824 | | - bio->bi_iter.bi_done = 0; |
|---|
| 1825 | 1499 | |
|---|
| 1826 | 1500 | if (bio_flagged(bio, BIO_TRACE_COMPLETION)) |
|---|
| 1827 | 1501 | bio_set_flag(split, BIO_TRACE_COMPLETION); |
|---|
| .. | .. |
|---|
| 1846 | 1520 | if (offset == 0 && size == bio->bi_iter.bi_size) |
|---|
| 1847 | 1521 | return; |
|---|
| 1848 | 1522 | |
|---|
| 1849 | | - bio_clear_flag(bio, BIO_SEG_VALID); |
|---|
| 1850 | | - |
|---|
| 1851 | 1523 | bio_advance(bio, offset << 9); |
|---|
| 1852 | | - |
|---|
| 1853 | 1524 | bio->bi_iter.bi_size = size; |
|---|
| 1854 | 1525 | |
|---|
| 1855 | 1526 | if (bio_integrity(bio)) |
|---|
| .. | .. |
|---|
| 1968 | 1639 | } |
|---|
| 1969 | 1640 | EXPORT_SYMBOL(bioset_init_from_src); |
|---|
| 1970 | 1641 | |
|---|
| 1971 | | -#ifdef CONFIG_BLK_CGROUP |
|---|
| 1972 | | - |
|---|
| 1973 | | -#ifdef CONFIG_MEMCG |
|---|
| 1974 | | -/** |
|---|
| 1975 | | - * bio_associate_blkcg_from_page - associate a bio with the page's blkcg |
|---|
| 1976 | | - * @bio: target bio |
|---|
| 1977 | | - * @page: the page to lookup the blkcg from |
|---|
| 1978 | | - * |
|---|
| 1979 | | - * Associate @bio with the blkcg from @page's owning memcg. This works like |
|---|
| 1980 | | - * every other associate function wrt references. |
|---|
| 1981 | | - */ |
|---|
| 1982 | | -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) |
|---|
| 1983 | | -{ |
|---|
| 1984 | | - struct cgroup_subsys_state *blkcg_css; |
|---|
| 1985 | | - |
|---|
| 1986 | | - if (unlikely(bio->bi_css)) |
|---|
| 1987 | | - return -EBUSY; |
|---|
| 1988 | | - if (!page->mem_cgroup) |
|---|
| 1989 | | - return 0; |
|---|
| 1990 | | - blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, |
|---|
| 1991 | | - &io_cgrp_subsys); |
|---|
| 1992 | | - bio->bi_css = blkcg_css; |
|---|
| 1993 | | - return 0; |
|---|
| 1994 | | -} |
|---|
| 1995 | | -#endif /* CONFIG_MEMCG */ |
|---|
| 1996 | | - |
|---|
| 1997 | | -/** |
|---|
| 1998 | | - * bio_associate_blkcg - associate a bio with the specified blkcg |
|---|
| 1999 | | - * @bio: target bio |
|---|
| 2000 | | - * @blkcg_css: css of the blkcg to associate |
|---|
| 2001 | | - * |
|---|
| 2002 | | - * Associate @bio with the blkcg specified by @blkcg_css. Block layer will |
|---|
| 2003 | | - * treat @bio as if it were issued by a task which belongs to the blkcg. |
|---|
| 2004 | | - * |
|---|
| 2005 | | - * This function takes an extra reference of @blkcg_css which will be put |
|---|
| 2006 | | - * when @bio is released. The caller must own @bio and is responsible for |
|---|
| 2007 | | - * synchronizing calls to this function. |
|---|
| 2008 | | - */ |
|---|
| 2009 | | -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) |
|---|
| 2010 | | -{ |
|---|
| 2011 | | - if (unlikely(bio->bi_css)) |
|---|
| 2012 | | - return -EBUSY; |
|---|
| 2013 | | - css_get(blkcg_css); |
|---|
| 2014 | | - bio->bi_css = blkcg_css; |
|---|
| 2015 | | - return 0; |
|---|
| 2016 | | -} |
|---|
| 2017 | | -EXPORT_SYMBOL_GPL(bio_associate_blkcg); |
|---|
| 2018 | | - |
|---|
| 2019 | | -/** |
|---|
| 2020 | | - * bio_associate_blkg - associate a bio with the specified blkg |
|---|
| 2021 | | - * @bio: target bio |
|---|
| 2022 | | - * @blkg: the blkg to associate |
|---|
| 2023 | | - * |
|---|
| 2024 | | - * Associate @bio with the blkg specified by @blkg. This is the queue specific |
|---|
| 2025 | | - * blkcg information associated with the @bio, a reference will be taken on the |
|---|
| 2026 | | - * @blkg and will be freed when the bio is freed. |
|---|
| 2027 | | - */ |
|---|
| 2028 | | -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) |
|---|
| 2029 | | -{ |
|---|
| 2030 | | - if (unlikely(bio->bi_blkg)) |
|---|
| 2031 | | - return -EBUSY; |
|---|
| 2032 | | - if (!blkg_try_get(blkg)) |
|---|
| 2033 | | - return -ENODEV; |
|---|
| 2034 | | - bio->bi_blkg = blkg; |
|---|
| 2035 | | - return 0; |
|---|
| 2036 | | -} |
|---|
| 2037 | | - |
|---|
| 2038 | | -/** |
|---|
| 2039 | | - * bio_disassociate_task - undo bio_associate_current() |
|---|
| 2040 | | - * @bio: target bio |
|---|
| 2041 | | - */ |
|---|
| 2042 | | -void bio_disassociate_task(struct bio *bio) |
|---|
| 2043 | | -{ |
|---|
| 2044 | | - if (bio->bi_ioc) { |
|---|
| 2045 | | - put_io_context(bio->bi_ioc); |
|---|
| 2046 | | - bio->bi_ioc = NULL; |
|---|
| 2047 | | - } |
|---|
| 2048 | | - if (bio->bi_css) { |
|---|
| 2049 | | - css_put(bio->bi_css); |
|---|
| 2050 | | - bio->bi_css = NULL; |
|---|
| 2051 | | - } |
|---|
| 2052 | | - if (bio->bi_blkg) { |
|---|
| 2053 | | - blkg_put(bio->bi_blkg); |
|---|
| 2054 | | - bio->bi_blkg = NULL; |
|---|
| 2055 | | - } |
|---|
| 2056 | | -} |
|---|
| 2057 | | - |
|---|
| 2058 | | -/** |
|---|
| 2059 | | - * bio_clone_blkcg_association - clone blkcg association from src to dst bio |
|---|
| 2060 | | - * @dst: destination bio |
|---|
| 2061 | | - * @src: source bio |
|---|
| 2062 | | - */ |
|---|
| 2063 | | -void bio_clone_blkcg_association(struct bio *dst, struct bio *src) |
|---|
| 2064 | | -{ |
|---|
| 2065 | | - if (src->bi_css) |
|---|
| 2066 | | - WARN_ON(bio_associate_blkcg(dst, src->bi_css)); |
|---|
| 2067 | | -} |
|---|
| 2068 | | -EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); |
|---|
| 2069 | | -#endif /* CONFIG_BLK_CGROUP */ |
|---|
| 2070 | | - |
|---|
| 2071 | 1642 | static void __init biovec_init_slabs(void) |
|---|
| 2072 | 1643 | { |
|---|
| 2073 | 1644 | int i; |
|---|
| .. | .. |
|---|
| 2093 | 1664 | bio_slab_nr = 0; |
|---|
| 2094 | 1665 | bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), |
|---|
| 2095 | 1666 | GFP_KERNEL); |
|---|
| 1667 | + |
|---|
| 1668 | + BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET); |
|---|
| 1669 | + |
|---|
| 2096 | 1670 | if (!bio_slabs) |
|---|
| 2097 | 1671 | panic("bio: can't allocate bios\n"); |
|---|
| 2098 | 1672 | |
|---|