.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or modify |
---|
5 | | - * it under the terms of the GNU General Public License version 2 as |
---|
6 | | - * published by the Free Software Foundation. |
---|
7 | | - * |
---|
8 | | - * This program is distributed in the hope that it will be useful, |
---|
9 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
10 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
11 | | - * GNU General Public License for more details. |
---|
12 | | - * |
---|
13 | | - * You should have received a copy of the GNU General Public Licens |
---|
14 | | - * along with this program; if not, write to the Free Software |
---|
15 | | - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- |
---|
16 | | - * |
---|
17 | 4 | */ |
---|
18 | 5 | #include <linux/mm.h> |
---|
19 | 6 | #include <linux/swap.h> |
---|
.. | .. |
---|
29 | 16 | #include <linux/workqueue.h> |
---|
30 | 17 | #include <linux/cgroup.h> |
---|
31 | 18 | #include <linux/blk-cgroup.h> |
---|
| 19 | +#include <linux/highmem.h> |
---|
| 20 | +#include <linux/sched/sysctl.h> |
---|
32 | 21 | #include <linux/blk-crypto.h> |
---|
33 | 22 | |
---|
34 | 23 | #include <trace/events/block.h> |
---|
.. | .. |
---|
245 | 234 | |
---|
246 | 235 | void bio_uninit(struct bio *bio) |
---|
247 | 236 | { |
---|
248 | | - bio_disassociate_task(bio); |
---|
| 237 | +#ifdef CONFIG_BLK_CGROUP |
---|
| 238 | + if (bio->bi_blkg) { |
---|
| 239 | + blkg_put(bio->bi_blkg); |
---|
| 240 | + bio->bi_blkg = NULL; |
---|
| 241 | + } |
---|
| 242 | +#endif |
---|
| 243 | + if (bio_integrity(bio)) |
---|
| 244 | + bio_integrity_free(bio); |
---|
249 | 245 | |
---|
250 | 246 | bio_crypt_free_ctx(bio); |
---|
251 | 247 | } |
---|
.. | .. |
---|
331 | 327 | /** |
---|
332 | 328 | * bio_chain - chain bio completions |
---|
333 | 329 | * @bio: the target bio |
---|
334 | | - * @parent: the @bio's parent bio |
---|
| 330 | + * @parent: the parent bio of @bio |
---|
335 | 331 | * |
---|
336 | 332 | * The caller won't have a bi_end_io called when @bio completes - instead, |
---|
337 | 333 | * @parent's bi_end_io won't be called until both @parent and @bio have |
---|
.. | .. |
---|
362 | 358 | if (!bio) |
---|
363 | 359 | break; |
---|
364 | 360 | |
---|
365 | | - generic_make_request(bio); |
---|
| 361 | + submit_bio_noacct(bio); |
---|
366 | 362 | } |
---|
367 | 363 | } |
---|
368 | 364 | |
---|
.. | .. |
---|
420 | 416 | * submit the previously allocated bio for IO before attempting to allocate |
---|
421 | 417 | * a new one. Failure to do so can cause deadlocks under memory pressure. |
---|
422 | 418 | * |
---|
423 | | - * Note that when running under generic_make_request() (i.e. any block |
---|
| 419 | + * Note that when running under submit_bio_noacct() (i.e. any block |
---|
424 | 420 | * driver), bios are not submitted until after you return - see the code in |
---|
425 | | - * generic_make_request() that converts recursion into iteration, to prevent |
---|
| 421 | + * submit_bio_noacct() that converts recursion into iteration, to prevent |
---|
426 | 422 | * stack overflows. |
---|
427 | 423 | * |
---|
428 | 424 | * This would normally mean allocating multiple bios under |
---|
429 | | - * generic_make_request() would be susceptible to deadlocks, but we have |
---|
| 425 | + * submit_bio_noacct() would be susceptible to deadlocks, but we have |
---|
430 | 426 | * deadlock avoidance code that resubmits any blocked bios from a rescuer |
---|
431 | 427 | * thread. |
---|
432 | 428 | * |
---|
433 | 429 | * However, we do not guarantee forward progress for allocations from other |
---|
434 | 430 | * mempools. Doing multiple allocations from the same mempool under |
---|
435 | | - * generic_make_request() should be avoided - instead, use bio_set's front_pad |
---|
| 431 | + * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad |
---|
436 | 432 | * for per bio allocations. |
---|
437 | 433 | * |
---|
438 | 434 | * RETURNS: |
---|
.. | .. |
---|
452 | 448 | if (nr_iovecs > UIO_MAXIOV) |
---|
453 | 449 | return NULL; |
---|
454 | 450 | |
---|
455 | | - p = kmalloc(sizeof(struct bio) + |
---|
456 | | - nr_iovecs * sizeof(struct bio_vec), |
---|
457 | | - gfp_mask); |
---|
| 451 | + p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask); |
---|
458 | 452 | front_pad = 0; |
---|
459 | 453 | inline_vecs = nr_iovecs; |
---|
460 | 454 | } else { |
---|
.. | .. |
---|
463 | 457 | nr_iovecs > 0)) |
---|
464 | 458 | return NULL; |
---|
465 | 459 | /* |
---|
466 | | - * generic_make_request() converts recursion to iteration; this |
---|
| 460 | + * submit_bio_noacct() converts recursion to iteration; this |
---|
467 | 461 | * means if we're running beneath it, any bios we allocate and |
---|
468 | 462 | * submit will not be submitted (and thus freed) until after we |
---|
469 | 463 | * return. |
---|
470 | 464 | * |
---|
471 | 465 | * This exposes us to a potential deadlock if we allocate |
---|
472 | 466 | * multiple bios from the same bio_set() while running |
---|
473 | | - * underneath generic_make_request(). If we were to allocate |
---|
| 467 | + * underneath submit_bio_noacct(). If we were to allocate |
---|
474 | 468 | * multiple bios (say a stacking block driver that was splitting |
---|
475 | 469 | * bios), we would deadlock if we exhausted the mempool's |
---|
476 | 470 | * reserve. |
---|
.. | .. |
---|
551 | 545 | EXPORT_SYMBOL(zero_fill_bio_iter); |
---|
552 | 546 | |
---|
553 | 547 | /** |
---|
| 548 | + * bio_truncate - truncate the bio to small size of @new_size |
---|
| 549 | + * @bio: the bio to be truncated |
---|
| 550 | + * @new_size: new size for truncating the bio |
---|
| 551 | + * |
---|
| 552 | + * Description: |
---|
| 553 | + * Truncate the bio to new size of @new_size. If bio_op(bio) is |
---|
| 554 | + * REQ_OP_READ, zero the truncated part. This function should only |
---|
| 555 | + * be used for handling corner cases, such as bio eod. |
---|
| 556 | + */ |
---|
| 557 | +void bio_truncate(struct bio *bio, unsigned new_size) |
---|
| 558 | +{ |
---|
| 559 | + struct bio_vec bv; |
---|
| 560 | + struct bvec_iter iter; |
---|
| 561 | + unsigned int done = 0; |
---|
| 562 | + bool truncated = false; |
---|
| 563 | + |
---|
| 564 | + if (new_size >= bio->bi_iter.bi_size) |
---|
| 565 | + return; |
---|
| 566 | + |
---|
| 567 | + if (bio_op(bio) != REQ_OP_READ) |
---|
| 568 | + goto exit; |
---|
| 569 | + |
---|
| 570 | + bio_for_each_segment(bv, bio, iter) { |
---|
| 571 | + if (done + bv.bv_len > new_size) { |
---|
| 572 | + unsigned offset; |
---|
| 573 | + |
---|
| 574 | + if (!truncated) |
---|
| 575 | + offset = new_size - done; |
---|
| 576 | + else |
---|
| 577 | + offset = 0; |
---|
| 578 | + zero_user(bv.bv_page, bv.bv_offset + offset, |
---|
| 579 | + bv.bv_len - offset); |
---|
| 580 | + truncated = true; |
---|
| 581 | + } |
---|
| 582 | + done += bv.bv_len; |
---|
| 583 | + } |
---|
| 584 | + |
---|
| 585 | + exit: |
---|
| 586 | + /* |
---|
| 587 | + * Don't touch bvec table here and make it really immutable, since |
---|
| 588 | + * fs bio user has to retrieve all pages via bio_for_each_segment_all |
---|
| 589 | + * in its .end_bio() callback. |
---|
| 590 | + * |
---|
| 591 | + * It is enough to truncate bio by updating .bi_size since we can make |
---|
| 592 | + * correct bvec with the updated .bi_size for drivers. |
---|
| 593 | + */ |
---|
| 594 | + bio->bi_iter.bi_size = new_size; |
---|
| 595 | +} |
---|
| 596 | + |
---|
| 597 | +/** |
---|
| 598 | + * guard_bio_eod - truncate a BIO to fit the block device |
---|
| 599 | + * @bio: bio to truncate |
---|
| 600 | + * |
---|
| 601 | + * This allows us to do IO even on the odd last sectors of a device, even if the |
---|
| 602 | + * block size is some multiple of the physical sector size. |
---|
| 603 | + * |
---|
| 604 | + * We'll just truncate the bio to the size of the device, and clear the end of |
---|
| 605 | + * the buffer head manually. Truly out-of-range accesses will turn into actual |
---|
| 606 | + * I/O errors, this only handles the "we need to be able to do I/O at the final |
---|
| 607 | + * sector" case. |
---|
| 608 | + */ |
---|
| 609 | +void guard_bio_eod(struct bio *bio) |
---|
| 610 | +{ |
---|
| 611 | + sector_t maxsector; |
---|
| 612 | + struct hd_struct *part; |
---|
| 613 | + |
---|
| 614 | + rcu_read_lock(); |
---|
| 615 | + part = __disk_get_part(bio->bi_disk, bio->bi_partno); |
---|
| 616 | + if (part) |
---|
| 617 | + maxsector = part_nr_sects_read(part); |
---|
| 618 | + else |
---|
| 619 | + maxsector = get_capacity(bio->bi_disk); |
---|
| 620 | + rcu_read_unlock(); |
---|
| 621 | + |
---|
| 622 | + if (!maxsector) |
---|
| 623 | + return; |
---|
| 624 | + |
---|
| 625 | + /* |
---|
| 626 | + * If the *whole* IO is past the end of the device, |
---|
| 627 | + * let it through, and the IO layer will turn it into |
---|
| 628 | + * an EIO. |
---|
| 629 | + */ |
---|
| 630 | + if (unlikely(bio->bi_iter.bi_sector >= maxsector)) |
---|
| 631 | + return; |
---|
| 632 | + |
---|
| 633 | + maxsector -= bio->bi_iter.bi_sector; |
---|
| 634 | + if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) |
---|
| 635 | + return; |
---|
| 636 | + |
---|
| 637 | + bio_truncate(bio, maxsector << 9); |
---|
| 638 | +} |
---|
| 639 | + |
---|
| 640 | +/** |
---|
554 | 641 | * bio_put - release a reference to a bio |
---|
555 | 642 | * @bio: bio to release reference to |
---|
556 | 643 | * |
---|
.. | .. |
---|
573 | 660 | } |
---|
574 | 661 | } |
---|
575 | 662 | EXPORT_SYMBOL(bio_put); |
---|
576 | | - |
---|
577 | | -inline int bio_phys_segments(struct request_queue *q, struct bio *bio) |
---|
578 | | -{ |
---|
579 | | - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
---|
580 | | - blk_recount_segments(q, bio); |
---|
581 | | - |
---|
582 | | - return bio->bi_phys_segments; |
---|
583 | | -} |
---|
584 | | -EXPORT_SYMBOL(bio_phys_segments); |
---|
585 | 663 | |
---|
586 | 664 | /** |
---|
587 | 665 | * __bio_clone_fast - clone a bio that shares the original bio's biovec |
---|
.. | .. |
---|
613 | 691 | bio->bi_iter = bio_src->bi_iter; |
---|
614 | 692 | bio->bi_io_vec = bio_src->bi_io_vec; |
---|
615 | 693 | |
---|
616 | | - bio_clone_blkcg_association(bio, bio_src); |
---|
| 694 | + bio_clone_blkg_association(bio, bio_src); |
---|
| 695 | + blkcg_bio_issue_init(bio); |
---|
617 | 696 | } |
---|
618 | 697 | EXPORT_SYMBOL(__bio_clone_fast); |
---|
619 | 698 | |
---|
.. | .. |
---|
635 | 714 | |
---|
636 | 715 | __bio_clone_fast(b, bio); |
---|
637 | 716 | |
---|
638 | | - bio_crypt_clone(b, bio, gfp_mask); |
---|
| 717 | + if (bio_crypt_clone(b, bio, gfp_mask) < 0) |
---|
| 718 | + goto err_put; |
---|
639 | 719 | |
---|
640 | 720 | if (bio_integrity(bio) && |
---|
641 | | - bio_integrity_clone(b, bio, gfp_mask) < 0) { |
---|
642 | | - bio_put(b); |
---|
643 | | - return NULL; |
---|
644 | | - } |
---|
| 721 | + bio_integrity_clone(b, bio, gfp_mask) < 0) |
---|
| 722 | + goto err_put; |
---|
645 | 723 | |
---|
646 | 724 | return b; |
---|
| 725 | + |
---|
| 726 | +err_put: |
---|
| 727 | + bio_put(b); |
---|
| 728 | + return NULL; |
---|
647 | 729 | } |
---|
648 | 730 | EXPORT_SYMBOL(bio_clone_fast); |
---|
649 | 731 | |
---|
650 | | -/** |
---|
651 | | - * bio_add_pc_page - attempt to add page to bio |
---|
652 | | - * @q: the target queue |
---|
653 | | - * @bio: destination bio |
---|
654 | | - * @page: page to add |
---|
655 | | - * @len: vec entry length |
---|
656 | | - * @offset: vec entry offset |
---|
657 | | - * |
---|
658 | | - * Attempt to add a page to the bio_vec maplist. This can fail for a |
---|
659 | | - * number of reasons, such as the bio being full or target block device |
---|
660 | | - * limitations. The target block device must allow bio's up to PAGE_SIZE, |
---|
661 | | - * so it is always possible to add a single page to an empty bio. |
---|
662 | | - * |
---|
663 | | - * This should only be used by REQ_PC bios. |
---|
664 | | - */ |
---|
665 | | -int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page |
---|
666 | | - *page, unsigned int len, unsigned int offset) |
---|
| 732 | +const char *bio_devname(struct bio *bio, char *buf) |
---|
667 | 733 | { |
---|
668 | | - int retried_segments = 0; |
---|
| 734 | + return disk_name(bio->bi_disk, bio->bi_partno, buf); |
---|
| 735 | +} |
---|
| 736 | +EXPORT_SYMBOL(bio_devname); |
---|
| 737 | + |
---|
| 738 | +static inline bool page_is_mergeable(const struct bio_vec *bv, |
---|
| 739 | + struct page *page, unsigned int len, unsigned int off, |
---|
| 740 | + bool *same_page) |
---|
| 741 | +{ |
---|
| 742 | + size_t bv_end = bv->bv_offset + bv->bv_len; |
---|
| 743 | + phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1; |
---|
| 744 | + phys_addr_t page_addr = page_to_phys(page); |
---|
| 745 | + |
---|
| 746 | + if (vec_end_addr + 1 != page_addr + off) |
---|
| 747 | + return false; |
---|
| 748 | + if (xen_domain() && !xen_biovec_phys_mergeable(bv, page)) |
---|
| 749 | + return false; |
---|
| 750 | + |
---|
| 751 | + *same_page = ((vec_end_addr & PAGE_MASK) == page_addr); |
---|
| 752 | + if (*same_page) |
---|
| 753 | + return true; |
---|
| 754 | + return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE); |
---|
| 755 | +} |
---|
| 756 | + |
---|
| 757 | +/* |
---|
| 758 | + * Try to merge a page into a segment, while obeying the hardware segment |
---|
| 759 | + * size limit. This is not for normal read/write bios, but for passthrough |
---|
| 760 | + * or Zone Append operations that we can't split. |
---|
| 761 | + */ |
---|
| 762 | +static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio, |
---|
| 763 | + struct page *page, unsigned len, |
---|
| 764 | + unsigned offset, bool *same_page) |
---|
| 765 | +{ |
---|
| 766 | + struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
---|
| 767 | + unsigned long mask = queue_segment_boundary(q); |
---|
| 768 | + phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset; |
---|
| 769 | + phys_addr_t addr2 = page_to_phys(page) + offset + len - 1; |
---|
| 770 | + |
---|
| 771 | + if ((addr1 | mask) != (addr2 | mask)) |
---|
| 772 | + return false; |
---|
| 773 | + if (bv->bv_len + len > queue_max_segment_size(q)) |
---|
| 774 | + return false; |
---|
| 775 | + return __bio_try_merge_page(bio, page, len, offset, same_page); |
---|
| 776 | +} |
---|
| 777 | + |
---|
| 778 | +/** |
---|
| 779 | + * bio_add_hw_page - attempt to add a page to a bio with hw constraints |
---|
| 780 | + * @q: the target queue |
---|
| 781 | + * @bio: destination bio |
---|
| 782 | + * @page: page to add |
---|
| 783 | + * @len: vec entry length |
---|
| 784 | + * @offset: vec entry offset |
---|
| 785 | + * @max_sectors: maximum number of sectors that can be added |
---|
| 786 | + * @same_page: return if the segment has been merged inside the same page |
---|
| 787 | + * |
---|
| 788 | + * Add a page to a bio while respecting the hardware max_sectors, max_segment |
---|
| 789 | + * and gap limitations. |
---|
| 790 | + */ |
---|
| 791 | +int bio_add_hw_page(struct request_queue *q, struct bio *bio, |
---|
| 792 | + struct page *page, unsigned int len, unsigned int offset, |
---|
| 793 | + unsigned int max_sectors, bool *same_page) |
---|
| 794 | +{ |
---|
669 | 795 | struct bio_vec *bvec; |
---|
670 | 796 | |
---|
671 | | - /* |
---|
672 | | - * cloned bio must not modify vec list |
---|
673 | | - */ |
---|
674 | | - if (unlikely(bio_flagged(bio, BIO_CLONED))) |
---|
| 797 | + if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) |
---|
675 | 798 | return 0; |
---|
676 | 799 | |
---|
677 | | - if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q)) |
---|
| 800 | + if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors) |
---|
678 | 801 | return 0; |
---|
679 | 802 | |
---|
680 | | - /* |
---|
681 | | - * For filesystems with a blocksize smaller than the pagesize |
---|
682 | | - * we will often be called with the same page as last time and |
---|
683 | | - * a consecutive offset. Optimize this special case. |
---|
684 | | - */ |
---|
685 | 803 | if (bio->bi_vcnt > 0) { |
---|
686 | | - struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
---|
687 | | - |
---|
688 | | - if (page == prev->bv_page && |
---|
689 | | - offset == prev->bv_offset + prev->bv_len) { |
---|
690 | | - prev->bv_len += len; |
---|
691 | | - bio->bi_iter.bi_size += len; |
---|
692 | | - goto done; |
---|
693 | | - } |
---|
| 804 | + if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page)) |
---|
| 805 | + return len; |
---|
694 | 806 | |
---|
695 | 807 | /* |
---|
696 | | - * If the queue doesn't support SG gaps and adding this |
---|
697 | | - * offset would create a gap, disallow it. |
---|
| 808 | + * If the queue doesn't support SG gaps and adding this segment |
---|
| 809 | + * would create a gap, disallow it. |
---|
698 | 810 | */ |
---|
699 | | - if (bvec_gap_to_prev(q, prev, offset)) |
---|
| 811 | + bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
---|
| 812 | + if (bvec_gap_to_prev(q, bvec, offset)) |
---|
700 | 813 | return 0; |
---|
701 | 814 | } |
---|
702 | 815 | |
---|
703 | | - if (bio_full(bio)) |
---|
| 816 | + if (bio_full(bio, len)) |
---|
704 | 817 | return 0; |
---|
705 | 818 | |
---|
706 | | - /* |
---|
707 | | - * setup the new entry, we might clear it again later if we |
---|
708 | | - * cannot add the page |
---|
709 | | - */ |
---|
| 819 | + if (bio->bi_vcnt >= queue_max_segments(q)) |
---|
| 820 | + return 0; |
---|
| 821 | + |
---|
710 | 822 | bvec = &bio->bi_io_vec[bio->bi_vcnt]; |
---|
711 | 823 | bvec->bv_page = page; |
---|
712 | 824 | bvec->bv_len = len; |
---|
713 | 825 | bvec->bv_offset = offset; |
---|
714 | 826 | bio->bi_vcnt++; |
---|
715 | | - bio->bi_phys_segments++; |
---|
716 | 827 | bio->bi_iter.bi_size += len; |
---|
717 | | - |
---|
718 | | - /* |
---|
719 | | - * Perform a recount if the number of segments is greater |
---|
720 | | - * than queue_max_segments(q). |
---|
721 | | - */ |
---|
722 | | - |
---|
723 | | - while (bio->bi_phys_segments > queue_max_segments(q)) { |
---|
724 | | - |
---|
725 | | - if (retried_segments) |
---|
726 | | - goto failed; |
---|
727 | | - |
---|
728 | | - retried_segments = 1; |
---|
729 | | - blk_recount_segments(q, bio); |
---|
730 | | - } |
---|
731 | | - |
---|
732 | | - /* If we may be able to merge these biovecs, force a recount */ |
---|
733 | | - if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
---|
734 | | - bio_clear_flag(bio, BIO_SEG_VALID); |
---|
735 | | - |
---|
736 | | - done: |
---|
737 | 828 | return len; |
---|
| 829 | +} |
---|
738 | 830 | |
---|
739 | | - failed: |
---|
740 | | - bvec->bv_page = NULL; |
---|
741 | | - bvec->bv_len = 0; |
---|
742 | | - bvec->bv_offset = 0; |
---|
743 | | - bio->bi_vcnt--; |
---|
744 | | - bio->bi_iter.bi_size -= len; |
---|
745 | | - blk_recount_segments(q, bio); |
---|
746 | | - return 0; |
---|
| 831 | +/** |
---|
| 832 | + * bio_add_pc_page - attempt to add page to passthrough bio |
---|
| 833 | + * @q: the target queue |
---|
| 834 | + * @bio: destination bio |
---|
| 835 | + * @page: page to add |
---|
| 836 | + * @len: vec entry length |
---|
| 837 | + * @offset: vec entry offset |
---|
| 838 | + * |
---|
| 839 | + * Attempt to add a page to the bio_vec maplist. This can fail for a |
---|
| 840 | + * number of reasons, such as the bio being full or target block device |
---|
| 841 | + * limitations. The target block device must allow bio's up to PAGE_SIZE, |
---|
| 842 | + * so it is always possible to add a single page to an empty bio. |
---|
| 843 | + * |
---|
| 844 | + * This should only be used by passthrough bios. |
---|
| 845 | + */ |
---|
| 846 | +int bio_add_pc_page(struct request_queue *q, struct bio *bio, |
---|
| 847 | + struct page *page, unsigned int len, unsigned int offset) |
---|
| 848 | +{ |
---|
| 849 | + bool same_page = false; |
---|
| 850 | + return bio_add_hw_page(q, bio, page, len, offset, |
---|
| 851 | + queue_max_hw_sectors(q), &same_page); |
---|
747 | 852 | } |
---|
748 | 853 | EXPORT_SYMBOL(bio_add_pc_page); |
---|
749 | 854 | |
---|
750 | 855 | /** |
---|
751 | 856 | * __bio_try_merge_page - try appending data to an existing bvec. |
---|
752 | 857 | * @bio: destination bio |
---|
753 | | - * @page: page to add |
---|
| 858 | + * @page: start page to add |
---|
754 | 859 | * @len: length of the data to add |
---|
755 | | - * @off: offset of the data in @page |
---|
| 860 | + * @off: offset of the data relative to @page |
---|
| 861 | + * @same_page: return if the segment has been merged inside the same page |
---|
756 | 862 | * |
---|
757 | 863 | * Try to add the data at @page + @off to the last bvec of @bio. This is a |
---|
758 | | - * a useful optimisation for file systems with a block size smaller than the |
---|
| 864 | + * useful optimisation for file systems with a block size smaller than the |
---|
759 | 865 | * page size. |
---|
| 866 | + * |
---|
| 867 | + * Warn if (@len, @off) crosses pages in case that @same_page is true. |
---|
760 | 868 | * |
---|
761 | 869 | * Return %true on success or %false on failure. |
---|
762 | 870 | */ |
---|
763 | 871 | bool __bio_try_merge_page(struct bio *bio, struct page *page, |
---|
764 | | - unsigned int len, unsigned int off) |
---|
| 872 | + unsigned int len, unsigned int off, bool *same_page) |
---|
765 | 873 | { |
---|
766 | 874 | if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) |
---|
767 | 875 | return false; |
---|
.. | .. |
---|
769 | 877 | if (bio->bi_vcnt > 0) { |
---|
770 | 878 | struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
---|
771 | 879 | |
---|
772 | | - if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) { |
---|
| 880 | + if (page_is_mergeable(bv, page, len, off, same_page)) { |
---|
| 881 | + if (bio->bi_iter.bi_size > UINT_MAX - len) { |
---|
| 882 | + *same_page = false; |
---|
| 883 | + return false; |
---|
| 884 | + } |
---|
773 | 885 | bv->bv_len += len; |
---|
774 | 886 | bio->bi_iter.bi_size += len; |
---|
775 | 887 | return true; |
---|
.. | .. |
---|
780 | 892 | EXPORT_SYMBOL_GPL(__bio_try_merge_page); |
---|
781 | 893 | |
---|
782 | 894 | /** |
---|
783 | | - * __bio_add_page - add page to a bio in a new segment |
---|
| 895 | + * __bio_add_page - add page(s) to a bio in a new segment |
---|
784 | 896 | * @bio: destination bio |
---|
785 | | - * @page: page to add |
---|
786 | | - * @len: length of the data to add |
---|
787 | | - * @off: offset of the data in @page |
---|
| 897 | + * @page: start page to add |
---|
| 898 | + * @len: length of the data to add, may cross pages |
---|
| 899 | + * @off: offset of the data relative to @page, may cross pages |
---|
788 | 900 | * |
---|
789 | 901 | * Add the data at @page + @off to @bio as a new bvec. The caller must ensure |
---|
790 | 902 | * that @bio has space for another bvec. |
---|
.. | .. |
---|
795 | 907 | struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt]; |
---|
796 | 908 | |
---|
797 | 909 | WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); |
---|
798 | | - WARN_ON_ONCE(bio_full(bio)); |
---|
| 910 | + WARN_ON_ONCE(bio_full(bio, len)); |
---|
799 | 911 | |
---|
800 | 912 | bv->bv_page = page; |
---|
801 | 913 | bv->bv_offset = off; |
---|
.. | .. |
---|
810 | 922 | EXPORT_SYMBOL_GPL(__bio_add_page); |
---|
811 | 923 | |
---|
812 | 924 | /** |
---|
813 | | - * bio_add_page - attempt to add page to bio |
---|
| 925 | + * bio_add_page - attempt to add page(s) to bio |
---|
814 | 926 | * @bio: destination bio |
---|
815 | | - * @page: page to add |
---|
816 | | - * @len: vec entry length |
---|
817 | | - * @offset: vec entry offset |
---|
| 927 | + * @page: start page to add |
---|
| 928 | + * @len: vec entry length, may cross pages |
---|
| 929 | + * @offset: vec entry offset relative to @page, may cross pages |
---|
818 | 930 | * |
---|
819 | | - * Attempt to add a page to the bio_vec maplist. This will only fail |
---|
| 931 | + * Attempt to add page(s) to the bio_vec maplist. This will only fail |
---|
820 | 932 | * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio. |
---|
821 | 933 | */ |
---|
822 | 934 | int bio_add_page(struct bio *bio, struct page *page, |
---|
823 | 935 | unsigned int len, unsigned int offset) |
---|
824 | 936 | { |
---|
825 | | - if (!__bio_try_merge_page(bio, page, len, offset)) { |
---|
826 | | - if (bio_full(bio)) |
---|
| 937 | + bool same_page = false; |
---|
| 938 | + |
---|
| 939 | + if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { |
---|
| 940 | + if (bio_full(bio, len)) |
---|
827 | 941 | return 0; |
---|
828 | 942 | __bio_add_page(bio, page, len, offset); |
---|
829 | 943 | } |
---|
830 | 944 | return len; |
---|
831 | 945 | } |
---|
832 | 946 | EXPORT_SYMBOL(bio_add_page); |
---|
| 947 | + |
---|
| 948 | +void bio_release_pages(struct bio *bio, bool mark_dirty) |
---|
| 949 | +{ |
---|
| 950 | + struct bvec_iter_all iter_all; |
---|
| 951 | + struct bio_vec *bvec; |
---|
| 952 | + |
---|
| 953 | + if (bio_flagged(bio, BIO_NO_PAGE_REF)) |
---|
| 954 | + return; |
---|
| 955 | + |
---|
| 956 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
---|
| 957 | + if (mark_dirty && !PageCompound(bvec->bv_page)) |
---|
| 958 | + set_page_dirty_lock(bvec->bv_page); |
---|
| 959 | + put_page(bvec->bv_page); |
---|
| 960 | + } |
---|
| 961 | +} |
---|
| 962 | +EXPORT_SYMBOL_GPL(bio_release_pages); |
---|
| 963 | + |
---|
| 964 | +static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) |
---|
| 965 | +{ |
---|
| 966 | + const struct bio_vec *bv = iter->bvec; |
---|
| 967 | + unsigned int len; |
---|
| 968 | + size_t size; |
---|
| 969 | + |
---|
| 970 | + if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len)) |
---|
| 971 | + return -EINVAL; |
---|
| 972 | + |
---|
| 973 | + len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count); |
---|
| 974 | + size = bio_add_page(bio, bv->bv_page, len, |
---|
| 975 | + bv->bv_offset + iter->iov_offset); |
---|
| 976 | + if (unlikely(size != len)) |
---|
| 977 | + return -EINVAL; |
---|
| 978 | + iov_iter_advance(iter, size); |
---|
| 979 | + return 0; |
---|
| 980 | +} |
---|
| 981 | + |
---|
| 982 | +static void bio_put_pages(struct page **pages, size_t size, size_t off) |
---|
| 983 | +{ |
---|
| 984 | + size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE); |
---|
| 985 | + |
---|
| 986 | + for (i = 0; i < nr; i++) |
---|
| 987 | + put_page(pages[i]); |
---|
| 988 | +} |
---|
| 989 | + |
---|
| 990 | +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) |
---|
833 | 991 | |
---|
834 | 992 | /** |
---|
835 | 993 | * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio |
---|
.. | .. |
---|
839 | 997 | * Pins pages from *iter and appends them to @bio's bvec array. The |
---|
840 | 998 | * pages will have to be released using put_page() when done. |
---|
841 | 999 | * For multi-segment *iter, this function only adds pages from the |
---|
842 | | - * the next non-empty segment of the iov iterator. |
---|
| 1000 | + * next non-empty segment of the iov iterator. |
---|
843 | 1001 | */ |
---|
844 | 1002 | static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) |
---|
845 | 1003 | { |
---|
846 | | - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; |
---|
| 1004 | + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; |
---|
| 1005 | + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; |
---|
847 | 1006 | struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; |
---|
848 | 1007 | struct page **pages = (struct page **)bv; |
---|
| 1008 | + bool same_page = false; |
---|
| 1009 | + ssize_t size, left; |
---|
| 1010 | + unsigned len, i; |
---|
849 | 1011 | size_t offset; |
---|
850 | | - ssize_t size; |
---|
| 1012 | + |
---|
| 1013 | + /* |
---|
| 1014 | + * Move page array up in the allocated memory for the bio vecs as far as |
---|
| 1015 | + * possible so that we can start filling biovecs from the beginning |
---|
| 1016 | + * without overwriting the temporary page array. |
---|
| 1017 | + */ |
---|
| 1018 | + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); |
---|
| 1019 | + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); |
---|
851 | 1020 | |
---|
852 | 1021 | size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); |
---|
853 | 1022 | if (unlikely(size <= 0)) |
---|
854 | 1023 | return size ? size : -EFAULT; |
---|
855 | | - idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; |
---|
856 | 1024 | |
---|
857 | | - /* |
---|
858 | | - * Deep magic below: We need to walk the pinned pages backwards |
---|
859 | | - * because we are abusing the space allocated for the bio_vecs |
---|
860 | | - * for the page array. Because the bio_vecs are larger than the |
---|
861 | | - * page pointers by definition this will always work. But it also |
---|
862 | | - * means we can't use bio_add_page, so any changes to it's semantics |
---|
863 | | - * need to be reflected here as well. |
---|
864 | | - */ |
---|
865 | | - bio->bi_iter.bi_size += size; |
---|
866 | | - bio->bi_vcnt += nr_pages; |
---|
| 1025 | + for (left = size, i = 0; left > 0; left -= len, i++) { |
---|
| 1026 | + struct page *page = pages[i]; |
---|
867 | 1027 | |
---|
868 | | - while (idx--) { |
---|
869 | | - bv[idx].bv_page = pages[idx]; |
---|
870 | | - bv[idx].bv_len = PAGE_SIZE; |
---|
871 | | - bv[idx].bv_offset = 0; |
---|
| 1028 | + len = min_t(size_t, PAGE_SIZE - offset, left); |
---|
| 1029 | + |
---|
| 1030 | + if (__bio_try_merge_page(bio, page, len, offset, &same_page)) { |
---|
| 1031 | + if (same_page) |
---|
| 1032 | + put_page(page); |
---|
| 1033 | + } else { |
---|
| 1034 | + if (WARN_ON_ONCE(bio_full(bio, len))) { |
---|
| 1035 | + bio_put_pages(pages + i, left, offset); |
---|
| 1036 | + return -EINVAL; |
---|
| 1037 | + } |
---|
| 1038 | + __bio_add_page(bio, page, len, offset); |
---|
| 1039 | + } |
---|
| 1040 | + offset = 0; |
---|
872 | 1041 | } |
---|
873 | | - |
---|
874 | | - bv[0].bv_offset += offset; |
---|
875 | | - bv[0].bv_len -= offset; |
---|
876 | | - bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; |
---|
877 | 1042 | |
---|
878 | 1043 | iov_iter_advance(iter, size); |
---|
879 | 1044 | return 0; |
---|
880 | 1045 | } |
---|
881 | 1046 | |
---|
| 1047 | +static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) |
---|
| 1048 | +{ |
---|
| 1049 | + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; |
---|
| 1050 | + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; |
---|
| 1051 | + struct request_queue *q = bio->bi_disk->queue; |
---|
| 1052 | + unsigned int max_append_sectors = queue_max_zone_append_sectors(q); |
---|
| 1053 | + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; |
---|
| 1054 | + struct page **pages = (struct page **)bv; |
---|
| 1055 | + ssize_t size, left; |
---|
| 1056 | + unsigned len, i; |
---|
| 1057 | + size_t offset; |
---|
| 1058 | + int ret = 0; |
---|
| 1059 | + |
---|
| 1060 | + /* |
---|
| 1061 | + * Move page array up in the allocated memory for the bio vecs as far as |
---|
| 1062 | + * possible so that we can start filling biovecs from the beginning |
---|
| 1063 | + * without overwriting the temporary page array. |
---|
| 1064 | + */ |
---|
| 1065 | + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); |
---|
| 1066 | + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); |
---|
| 1067 | + |
---|
| 1068 | + size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); |
---|
| 1069 | + if (unlikely(size <= 0)) |
---|
| 1070 | + return size ? size : -EFAULT; |
---|
| 1071 | + |
---|
| 1072 | + for (left = size, i = 0; left > 0; left -= len, i++) { |
---|
| 1073 | + struct page *page = pages[i]; |
---|
| 1074 | + bool same_page = false; |
---|
| 1075 | + |
---|
| 1076 | + len = min_t(size_t, PAGE_SIZE - offset, left); |
---|
| 1077 | + if (bio_add_hw_page(q, bio, page, len, offset, |
---|
| 1078 | + max_append_sectors, &same_page) != len) { |
---|
| 1079 | + bio_put_pages(pages + i, left, offset); |
---|
| 1080 | + ret = -EINVAL; |
---|
| 1081 | + break; |
---|
| 1082 | + } |
---|
| 1083 | + if (same_page) |
---|
| 1084 | + put_page(page); |
---|
| 1085 | + offset = 0; |
---|
| 1086 | + } |
---|
| 1087 | + |
---|
| 1088 | + iov_iter_advance(iter, size - left); |
---|
| 1089 | + return ret; |
---|
| 1090 | +} |
---|
| 1091 | + |
---|
882 | 1092 | /** |
---|
883 | | - * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio |
---|
| 1093 | + * bio_iov_iter_get_pages - add user or kernel pages to a bio |
---|
884 | 1094 | * @bio: bio to add pages to |
---|
885 | | - * @iter: iov iterator describing the region to be mapped |
---|
| 1095 | + * @iter: iov iterator describing the region to be added |
---|
886 | 1096 | * |
---|
887 | | - * Pins pages from *iter and appends them to @bio's bvec array. The |
---|
888 | | - * pages will have to be released using put_page() when done. |
---|
| 1097 | + * This takes either an iterator pointing to user memory, or one pointing to |
---|
| 1098 | + * kernel pages (BVEC iterator). If we're adding user pages, we pin them and |
---|
| 1099 | + * map them into the kernel. On IO completion, the caller should put those |
---|
| 1100 | + * pages. If we're adding kernel pages, and the caller told us it's safe to |
---|
| 1101 | + * do so, we just have to add the pages to the bio directly. We don't grab an |
---|
| 1102 | + * extra reference to those pages (the user should already have that), and we |
---|
| 1103 | + * don't put the page on IO completion. The caller needs to check if the bio is |
---|
| 1104 | + * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be |
---|
| 1105 | + * released. |
---|
| 1106 | + * |
---|
889 | 1107 | * The function tries, but does not guarantee, to pin as many pages as |
---|
890 | | - * fit into the bio, or are requested in *iter, whatever is smaller. |
---|
891 | | - * If MM encounters an error pinning the requested pages, it stops. |
---|
892 | | - * Error is returned only if 0 pages could be pinned. |
---|
| 1108 | + * fit into the bio, or are requested in @iter, whatever is smaller. If |
---|
| 1109 | + * MM encounters an error pinning the requested pages, it stops. Error |
---|
| 1110 | + * is returned only if 0 pages could be pinned. |
---|
893 | 1111 | */ |
---|
894 | 1112 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) |
---|
895 | 1113 | { |
---|
896 | | - unsigned short orig_vcnt = bio->bi_vcnt; |
---|
| 1114 | + const bool is_bvec = iov_iter_is_bvec(iter); |
---|
| 1115 | + int ret; |
---|
| 1116 | + |
---|
| 1117 | + if (WARN_ON_ONCE(bio->bi_vcnt)) |
---|
| 1118 | + return -EINVAL; |
---|
897 | 1119 | |
---|
898 | 1120 | do { |
---|
899 | | - int ret = __bio_iov_iter_get_pages(bio, iter); |
---|
| 1121 | + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { |
---|
| 1122 | + if (WARN_ON_ONCE(is_bvec)) |
---|
| 1123 | + return -EINVAL; |
---|
| 1124 | + ret = __bio_iov_append_get_pages(bio, iter); |
---|
| 1125 | + } else { |
---|
| 1126 | + if (is_bvec) |
---|
| 1127 | + ret = __bio_iov_bvec_add_pages(bio, iter); |
---|
| 1128 | + else |
---|
| 1129 | + ret = __bio_iov_iter_get_pages(bio, iter); |
---|
| 1130 | + } |
---|
| 1131 | + } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); |
---|
900 | 1132 | |
---|
901 | | - if (unlikely(ret)) |
---|
902 | | - return bio->bi_vcnt > orig_vcnt ? 0 : ret; |
---|
903 | | - |
---|
904 | | - } while (iov_iter_count(iter) && !bio_full(bio)); |
---|
905 | | - |
---|
906 | | - return 0; |
---|
| 1133 | + if (is_bvec) |
---|
| 1134 | + bio_set_flag(bio, BIO_NO_PAGE_REF); |
---|
| 1135 | + return bio->bi_vcnt ? 0 : ret; |
---|
907 | 1136 | } |
---|
908 | 1137 | EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); |
---|
909 | 1138 | |
---|
.. | .. |
---|
926 | 1155 | int submit_bio_wait(struct bio *bio) |
---|
927 | 1156 | { |
---|
928 | 1157 | DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map); |
---|
| 1158 | + unsigned long hang_check; |
---|
929 | 1159 | |
---|
930 | 1160 | bio->bi_private = &done; |
---|
931 | 1161 | bio->bi_end_io = submit_bio_wait_endio; |
---|
932 | 1162 | bio->bi_opf |= REQ_SYNC; |
---|
933 | 1163 | submit_bio(bio); |
---|
934 | | - wait_for_completion_io(&done); |
---|
| 1164 | + |
---|
| 1165 | + /* Prevent hang_check timer from firing at us during very long I/O */ |
---|
| 1166 | + hang_check = sysctl_hung_task_timeout_secs; |
---|
| 1167 | + if (hang_check) |
---|
| 1168 | + while (!wait_for_completion_io_timeout(&done, |
---|
| 1169 | + hang_check * (HZ/2))) |
---|
| 1170 | + ; |
---|
| 1171 | + else |
---|
| 1172 | + wait_for_completion_io(&done); |
---|
935 | 1173 | |
---|
936 | 1174 | return blk_status_to_errno(bio->bi_status); |
---|
937 | 1175 | } |
---|
.. | .. |
---|
1043 | 1281 | } |
---|
1044 | 1282 | EXPORT_SYMBOL(bio_list_copy_data); |
---|
1045 | 1283 | |
---|
1046 | | -struct bio_map_data { |
---|
1047 | | - int is_our_pages; |
---|
1048 | | - struct iov_iter iter; |
---|
1049 | | - struct iovec iov[]; |
---|
1050 | | -}; |
---|
1051 | | - |
---|
1052 | | -static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, |
---|
1053 | | - gfp_t gfp_mask) |
---|
1054 | | -{ |
---|
1055 | | - struct bio_map_data *bmd; |
---|
1056 | | - if (data->nr_segs > UIO_MAXIOV) |
---|
1057 | | - return NULL; |
---|
1058 | | - |
---|
1059 | | - bmd = kmalloc(sizeof(struct bio_map_data) + |
---|
1060 | | - sizeof(struct iovec) * data->nr_segs, gfp_mask); |
---|
1061 | | - if (!bmd) |
---|
1062 | | - return NULL; |
---|
1063 | | - memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); |
---|
1064 | | - bmd->iter = *data; |
---|
1065 | | - bmd->iter.iov = bmd->iov; |
---|
1066 | | - return bmd; |
---|
1067 | | -} |
---|
1068 | | - |
---|
1069 | | -/** |
---|
1070 | | - * bio_copy_from_iter - copy all pages from iov_iter to bio |
---|
1071 | | - * @bio: The &struct bio which describes the I/O as destination |
---|
1072 | | - * @iter: iov_iter as source |
---|
1073 | | - * |
---|
1074 | | - * Copy all pages from iov_iter to bio. |
---|
1075 | | - * Returns 0 on success, or error on failure. |
---|
1076 | | - */ |
---|
1077 | | -static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) |
---|
1078 | | -{ |
---|
1079 | | - int i; |
---|
1080 | | - struct bio_vec *bvec; |
---|
1081 | | - |
---|
1082 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
1083 | | - ssize_t ret; |
---|
1084 | | - |
---|
1085 | | - ret = copy_page_from_iter(bvec->bv_page, |
---|
1086 | | - bvec->bv_offset, |
---|
1087 | | - bvec->bv_len, |
---|
1088 | | - iter); |
---|
1089 | | - |
---|
1090 | | - if (!iov_iter_count(iter)) |
---|
1091 | | - break; |
---|
1092 | | - |
---|
1093 | | - if (ret < bvec->bv_len) |
---|
1094 | | - return -EFAULT; |
---|
1095 | | - } |
---|
1096 | | - |
---|
1097 | | - return 0; |
---|
1098 | | -} |
---|
1099 | | - |
---|
1100 | | -/** |
---|
1101 | | - * bio_copy_to_iter - copy all pages from bio to iov_iter |
---|
1102 | | - * @bio: The &struct bio which describes the I/O as source |
---|
1103 | | - * @iter: iov_iter as destination |
---|
1104 | | - * |
---|
1105 | | - * Copy all pages from bio to iov_iter. |
---|
1106 | | - * Returns 0 on success, or error on failure. |
---|
1107 | | - */ |
---|
1108 | | -static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) |
---|
1109 | | -{ |
---|
1110 | | - int i; |
---|
1111 | | - struct bio_vec *bvec; |
---|
1112 | | - |
---|
1113 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
1114 | | - ssize_t ret; |
---|
1115 | | - |
---|
1116 | | - ret = copy_page_to_iter(bvec->bv_page, |
---|
1117 | | - bvec->bv_offset, |
---|
1118 | | - bvec->bv_len, |
---|
1119 | | - &iter); |
---|
1120 | | - |
---|
1121 | | - if (!iov_iter_count(&iter)) |
---|
1122 | | - break; |
---|
1123 | | - |
---|
1124 | | - if (ret < bvec->bv_len) |
---|
1125 | | - return -EFAULT; |
---|
1126 | | - } |
---|
1127 | | - |
---|
1128 | | - return 0; |
---|
1129 | | -} |
---|
1130 | | - |
---|
1131 | 1284 | void bio_free_pages(struct bio *bio) |
---|
1132 | 1285 | { |
---|
1133 | 1286 | struct bio_vec *bvec; |
---|
1134 | | - int i; |
---|
| 1287 | + struct bvec_iter_all iter_all; |
---|
1135 | 1288 | |
---|
1136 | | - bio_for_each_segment_all(bvec, bio, i) |
---|
| 1289 | + bio_for_each_segment_all(bvec, bio, iter_all) |
---|
1137 | 1290 | __free_page(bvec->bv_page); |
---|
1138 | 1291 | } |
---|
1139 | 1292 | EXPORT_SYMBOL(bio_free_pages); |
---|
1140 | | - |
---|
1141 | | -/** |
---|
1142 | | - * bio_uncopy_user - finish previously mapped bio |
---|
1143 | | - * @bio: bio being terminated |
---|
1144 | | - * |
---|
1145 | | - * Free pages allocated from bio_copy_user_iov() and write back data |
---|
1146 | | - * to user space in case of a read. |
---|
1147 | | - */ |
---|
1148 | | -int bio_uncopy_user(struct bio *bio) |
---|
1149 | | -{ |
---|
1150 | | - struct bio_map_data *bmd = bio->bi_private; |
---|
1151 | | - int ret = 0; |
---|
1152 | | - |
---|
1153 | | - if (!bio_flagged(bio, BIO_NULL_MAPPED)) { |
---|
1154 | | - /* |
---|
1155 | | - * if we're in a workqueue, the request is orphaned, so |
---|
1156 | | - * don't copy into a random user address space, just free |
---|
1157 | | - * and return -EINTR so user space doesn't expect any data. |
---|
1158 | | - */ |
---|
1159 | | - if (!current->mm) |
---|
1160 | | - ret = -EINTR; |
---|
1161 | | - else if (bio_data_dir(bio) == READ) |
---|
1162 | | - ret = bio_copy_to_iter(bio, bmd->iter); |
---|
1163 | | - if (bmd->is_our_pages) |
---|
1164 | | - bio_free_pages(bio); |
---|
1165 | | - } |
---|
1166 | | - kfree(bmd); |
---|
1167 | | - bio_put(bio); |
---|
1168 | | - return ret; |
---|
1169 | | -} |
---|
1170 | | - |
---|
1171 | | -/** |
---|
1172 | | - * bio_copy_user_iov - copy user data to bio |
---|
1173 | | - * @q: destination block queue |
---|
1174 | | - * @map_data: pointer to the rq_map_data holding pages (if necessary) |
---|
1175 | | - * @iter: iovec iterator |
---|
1176 | | - * @gfp_mask: memory allocation flags |
---|
1177 | | - * |
---|
1178 | | - * Prepares and returns a bio for indirect user io, bouncing data |
---|
1179 | | - * to/from kernel pages as necessary. Must be paired with |
---|
1180 | | - * call bio_uncopy_user() on io completion. |
---|
1181 | | - */ |
---|
1182 | | -struct bio *bio_copy_user_iov(struct request_queue *q, |
---|
1183 | | - struct rq_map_data *map_data, |
---|
1184 | | - struct iov_iter *iter, |
---|
1185 | | - gfp_t gfp_mask) |
---|
1186 | | -{ |
---|
1187 | | - struct bio_map_data *bmd; |
---|
1188 | | - struct page *page; |
---|
1189 | | - struct bio *bio; |
---|
1190 | | - int i = 0, ret; |
---|
1191 | | - int nr_pages; |
---|
1192 | | - unsigned int len = iter->count; |
---|
1193 | | - unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; |
---|
1194 | | - |
---|
1195 | | - bmd = bio_alloc_map_data(iter, gfp_mask); |
---|
1196 | | - if (!bmd) |
---|
1197 | | - return ERR_PTR(-ENOMEM); |
---|
1198 | | - |
---|
1199 | | - /* |
---|
1200 | | - * We need to do a deep copy of the iov_iter including the iovecs. |
---|
1201 | | - * The caller provided iov might point to an on-stack or otherwise |
---|
1202 | | - * shortlived one. |
---|
1203 | | - */ |
---|
1204 | | - bmd->is_our_pages = map_data ? 0 : 1; |
---|
1205 | | - |
---|
1206 | | - nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); |
---|
1207 | | - if (nr_pages > BIO_MAX_PAGES) |
---|
1208 | | - nr_pages = BIO_MAX_PAGES; |
---|
1209 | | - |
---|
1210 | | - ret = -ENOMEM; |
---|
1211 | | - bio = bio_kmalloc(gfp_mask, nr_pages); |
---|
1212 | | - if (!bio) |
---|
1213 | | - goto out_bmd; |
---|
1214 | | - |
---|
1215 | | - ret = 0; |
---|
1216 | | - |
---|
1217 | | - if (map_data) { |
---|
1218 | | - nr_pages = 1 << map_data->page_order; |
---|
1219 | | - i = map_data->offset / PAGE_SIZE; |
---|
1220 | | - } |
---|
1221 | | - while (len) { |
---|
1222 | | - unsigned int bytes = PAGE_SIZE; |
---|
1223 | | - |
---|
1224 | | - bytes -= offset; |
---|
1225 | | - |
---|
1226 | | - if (bytes > len) |
---|
1227 | | - bytes = len; |
---|
1228 | | - |
---|
1229 | | - if (map_data) { |
---|
1230 | | - if (i == map_data->nr_entries * nr_pages) { |
---|
1231 | | - ret = -ENOMEM; |
---|
1232 | | - break; |
---|
1233 | | - } |
---|
1234 | | - |
---|
1235 | | - page = map_data->pages[i / nr_pages]; |
---|
1236 | | - page += (i % nr_pages); |
---|
1237 | | - |
---|
1238 | | - i++; |
---|
1239 | | - } else { |
---|
1240 | | - page = alloc_page(q->bounce_gfp | gfp_mask); |
---|
1241 | | - if (!page) { |
---|
1242 | | - ret = -ENOMEM; |
---|
1243 | | - break; |
---|
1244 | | - } |
---|
1245 | | - } |
---|
1246 | | - |
---|
1247 | | - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { |
---|
1248 | | - if (!map_data) |
---|
1249 | | - __free_page(page); |
---|
1250 | | - break; |
---|
1251 | | - } |
---|
1252 | | - |
---|
1253 | | - len -= bytes; |
---|
1254 | | - offset = 0; |
---|
1255 | | - } |
---|
1256 | | - |
---|
1257 | | - if (ret) |
---|
1258 | | - goto cleanup; |
---|
1259 | | - |
---|
1260 | | - if (map_data) |
---|
1261 | | - map_data->offset += bio->bi_iter.bi_size; |
---|
1262 | | - |
---|
1263 | | - /* |
---|
1264 | | - * success |
---|
1265 | | - */ |
---|
1266 | | - if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || |
---|
1267 | | - (map_data && map_data->from_user)) { |
---|
1268 | | - ret = bio_copy_from_iter(bio, iter); |
---|
1269 | | - if (ret) |
---|
1270 | | - goto cleanup; |
---|
1271 | | - } else { |
---|
1272 | | - if (bmd->is_our_pages) |
---|
1273 | | - zero_fill_bio(bio); |
---|
1274 | | - iov_iter_advance(iter, bio->bi_iter.bi_size); |
---|
1275 | | - } |
---|
1276 | | - |
---|
1277 | | - bio->bi_private = bmd; |
---|
1278 | | - if (map_data && map_data->null_mapped) |
---|
1279 | | - bio_set_flag(bio, BIO_NULL_MAPPED); |
---|
1280 | | - return bio; |
---|
1281 | | -cleanup: |
---|
1282 | | - if (!map_data) |
---|
1283 | | - bio_free_pages(bio); |
---|
1284 | | - bio_put(bio); |
---|
1285 | | -out_bmd: |
---|
1286 | | - kfree(bmd); |
---|
1287 | | - return ERR_PTR(ret); |
---|
1288 | | -} |
---|
1289 | | - |
---|
1290 | | -/** |
---|
1291 | | - * bio_map_user_iov - map user iovec into bio |
---|
1292 | | - * @q: the struct request_queue for the bio |
---|
1293 | | - * @iter: iovec iterator |
---|
1294 | | - * @gfp_mask: memory allocation flags |
---|
1295 | | - * |
---|
1296 | | - * Map the user space address into a bio suitable for io to a block |
---|
1297 | | - * device. Returns an error pointer in case of error. |
---|
1298 | | - */ |
---|
1299 | | -struct bio *bio_map_user_iov(struct request_queue *q, |
---|
1300 | | - struct iov_iter *iter, |
---|
1301 | | - gfp_t gfp_mask) |
---|
1302 | | -{ |
---|
1303 | | - int j; |
---|
1304 | | - struct bio *bio; |
---|
1305 | | - int ret; |
---|
1306 | | - struct bio_vec *bvec; |
---|
1307 | | - |
---|
1308 | | - if (!iov_iter_count(iter)) |
---|
1309 | | - return ERR_PTR(-EINVAL); |
---|
1310 | | - |
---|
1311 | | - bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); |
---|
1312 | | - if (!bio) |
---|
1313 | | - return ERR_PTR(-ENOMEM); |
---|
1314 | | - |
---|
1315 | | - while (iov_iter_count(iter)) { |
---|
1316 | | - struct page **pages; |
---|
1317 | | - ssize_t bytes; |
---|
1318 | | - size_t offs, added = 0; |
---|
1319 | | - int npages; |
---|
1320 | | - |
---|
1321 | | - bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); |
---|
1322 | | - if (unlikely(bytes <= 0)) { |
---|
1323 | | - ret = bytes ? bytes : -EFAULT; |
---|
1324 | | - goto out_unmap; |
---|
1325 | | - } |
---|
1326 | | - |
---|
1327 | | - npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); |
---|
1328 | | - |
---|
1329 | | - if (unlikely(offs & queue_dma_alignment(q))) { |
---|
1330 | | - ret = -EINVAL; |
---|
1331 | | - j = 0; |
---|
1332 | | - } else { |
---|
1333 | | - for (j = 0; j < npages; j++) { |
---|
1334 | | - struct page *page = pages[j]; |
---|
1335 | | - unsigned int n = PAGE_SIZE - offs; |
---|
1336 | | - unsigned short prev_bi_vcnt = bio->bi_vcnt; |
---|
1337 | | - |
---|
1338 | | - if (n > bytes) |
---|
1339 | | - n = bytes; |
---|
1340 | | - |
---|
1341 | | - if (!bio_add_pc_page(q, bio, page, n, offs)) |
---|
1342 | | - break; |
---|
1343 | | - |
---|
1344 | | - /* |
---|
1345 | | - * check if vector was merged with previous |
---|
1346 | | - * drop page reference if needed |
---|
1347 | | - */ |
---|
1348 | | - if (bio->bi_vcnt == prev_bi_vcnt) |
---|
1349 | | - put_page(page); |
---|
1350 | | - |
---|
1351 | | - added += n; |
---|
1352 | | - bytes -= n; |
---|
1353 | | - offs = 0; |
---|
1354 | | - } |
---|
1355 | | - iov_iter_advance(iter, added); |
---|
1356 | | - } |
---|
1357 | | - /* |
---|
1358 | | - * release the pages we didn't map into the bio, if any |
---|
1359 | | - */ |
---|
1360 | | - while (j < npages) |
---|
1361 | | - put_page(pages[j++]); |
---|
1362 | | - kvfree(pages); |
---|
1363 | | - /* couldn't stuff something into bio? */ |
---|
1364 | | - if (bytes) |
---|
1365 | | - break; |
---|
1366 | | - } |
---|
1367 | | - |
---|
1368 | | - bio_set_flag(bio, BIO_USER_MAPPED); |
---|
1369 | | - |
---|
1370 | | - /* |
---|
1371 | | - * subtle -- if bio_map_user_iov() ended up bouncing a bio, |
---|
1372 | | - * it would normally disappear when its bi_end_io is run. |
---|
1373 | | - * however, we need it for the unmap, so grab an extra |
---|
1374 | | - * reference to it |
---|
1375 | | - */ |
---|
1376 | | - bio_get(bio); |
---|
1377 | | - return bio; |
---|
1378 | | - |
---|
1379 | | - out_unmap: |
---|
1380 | | - bio_for_each_segment_all(bvec, bio, j) { |
---|
1381 | | - put_page(bvec->bv_page); |
---|
1382 | | - } |
---|
1383 | | - bio_put(bio); |
---|
1384 | | - return ERR_PTR(ret); |
---|
1385 | | -} |
---|
1386 | | - |
---|
1387 | | -static void __bio_unmap_user(struct bio *bio) |
---|
1388 | | -{ |
---|
1389 | | - struct bio_vec *bvec; |
---|
1390 | | - int i; |
---|
1391 | | - |
---|
1392 | | - /* |
---|
1393 | | - * make sure we dirty pages we wrote to |
---|
1394 | | - */ |
---|
1395 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
1396 | | - if (bio_data_dir(bio) == READ) |
---|
1397 | | - set_page_dirty_lock(bvec->bv_page); |
---|
1398 | | - |
---|
1399 | | - put_page(bvec->bv_page); |
---|
1400 | | - } |
---|
1401 | | - |
---|
1402 | | - bio_put(bio); |
---|
1403 | | -} |
---|
1404 | | - |
---|
1405 | | -/** |
---|
1406 | | - * bio_unmap_user - unmap a bio |
---|
1407 | | - * @bio: the bio being unmapped |
---|
1408 | | - * |
---|
1409 | | - * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from |
---|
1410 | | - * process context. |
---|
1411 | | - * |
---|
1412 | | - * bio_unmap_user() may sleep. |
---|
1413 | | - */ |
---|
1414 | | -void bio_unmap_user(struct bio *bio) |
---|
1415 | | -{ |
---|
1416 | | - __bio_unmap_user(bio); |
---|
1417 | | - bio_put(bio); |
---|
1418 | | -} |
---|
1419 | | - |
---|
1420 | | -static void bio_map_kern_endio(struct bio *bio) |
---|
1421 | | -{ |
---|
1422 | | - bio_put(bio); |
---|
1423 | | -} |
---|
1424 | | - |
---|
1425 | | -/** |
---|
1426 | | - * bio_map_kern - map kernel address into bio |
---|
1427 | | - * @q: the struct request_queue for the bio |
---|
1428 | | - * @data: pointer to buffer to map |
---|
1429 | | - * @len: length in bytes |
---|
1430 | | - * @gfp_mask: allocation flags for bio allocation |
---|
1431 | | - * |
---|
1432 | | - * Map the kernel address into a bio suitable for io to a block |
---|
1433 | | - * device. Returns an error pointer in case of error. |
---|
1434 | | - */ |
---|
1435 | | -struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, |
---|
1436 | | - gfp_t gfp_mask) |
---|
1437 | | -{ |
---|
1438 | | - unsigned long kaddr = (unsigned long)data; |
---|
1439 | | - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
---|
1440 | | - unsigned long start = kaddr >> PAGE_SHIFT; |
---|
1441 | | - const int nr_pages = end - start; |
---|
1442 | | - int offset, i; |
---|
1443 | | - struct bio *bio; |
---|
1444 | | - |
---|
1445 | | - bio = bio_kmalloc(gfp_mask, nr_pages); |
---|
1446 | | - if (!bio) |
---|
1447 | | - return ERR_PTR(-ENOMEM); |
---|
1448 | | - |
---|
1449 | | - offset = offset_in_page(kaddr); |
---|
1450 | | - for (i = 0; i < nr_pages; i++) { |
---|
1451 | | - unsigned int bytes = PAGE_SIZE - offset; |
---|
1452 | | - |
---|
1453 | | - if (len <= 0) |
---|
1454 | | - break; |
---|
1455 | | - |
---|
1456 | | - if (bytes > len) |
---|
1457 | | - bytes = len; |
---|
1458 | | - |
---|
1459 | | - if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, |
---|
1460 | | - offset) < bytes) { |
---|
1461 | | - /* we don't support partial mappings */ |
---|
1462 | | - bio_put(bio); |
---|
1463 | | - return ERR_PTR(-EINVAL); |
---|
1464 | | - } |
---|
1465 | | - |
---|
1466 | | - data += bytes; |
---|
1467 | | - len -= bytes; |
---|
1468 | | - offset = 0; |
---|
1469 | | - } |
---|
1470 | | - |
---|
1471 | | - bio->bi_end_io = bio_map_kern_endio; |
---|
1472 | | - return bio; |
---|
1473 | | -} |
---|
1474 | | -EXPORT_SYMBOL(bio_map_kern); |
---|
1475 | | - |
---|
1476 | | -static void bio_copy_kern_endio(struct bio *bio) |
---|
1477 | | -{ |
---|
1478 | | - bio_free_pages(bio); |
---|
1479 | | - bio_put(bio); |
---|
1480 | | -} |
---|
1481 | | - |
---|
1482 | | -static void bio_copy_kern_endio_read(struct bio *bio) |
---|
1483 | | -{ |
---|
1484 | | - char *p = bio->bi_private; |
---|
1485 | | - struct bio_vec *bvec; |
---|
1486 | | - int i; |
---|
1487 | | - |
---|
1488 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
1489 | | - memcpy(p, page_address(bvec->bv_page), bvec->bv_len); |
---|
1490 | | - p += bvec->bv_len; |
---|
1491 | | - } |
---|
1492 | | - |
---|
1493 | | - bio_copy_kern_endio(bio); |
---|
1494 | | -} |
---|
1495 | | - |
---|
1496 | | -/** |
---|
1497 | | - * bio_copy_kern - copy kernel address into bio |
---|
1498 | | - * @q: the struct request_queue for the bio |
---|
1499 | | - * @data: pointer to buffer to copy |
---|
1500 | | - * @len: length in bytes |
---|
1501 | | - * @gfp_mask: allocation flags for bio and page allocation |
---|
1502 | | - * @reading: data direction is READ |
---|
1503 | | - * |
---|
1504 | | - * copy the kernel address into a bio suitable for io to a block |
---|
1505 | | - * device. Returns an error pointer in case of error. |
---|
1506 | | - */ |
---|
1507 | | -struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
---|
1508 | | - gfp_t gfp_mask, int reading) |
---|
1509 | | -{ |
---|
1510 | | - unsigned long kaddr = (unsigned long)data; |
---|
1511 | | - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
---|
1512 | | - unsigned long start = kaddr >> PAGE_SHIFT; |
---|
1513 | | - struct bio *bio; |
---|
1514 | | - void *p = data; |
---|
1515 | | - int nr_pages = 0; |
---|
1516 | | - |
---|
1517 | | - /* |
---|
1518 | | - * Overflow, abort |
---|
1519 | | - */ |
---|
1520 | | - if (end < start) |
---|
1521 | | - return ERR_PTR(-EINVAL); |
---|
1522 | | - |
---|
1523 | | - nr_pages = end - start; |
---|
1524 | | - bio = bio_kmalloc(gfp_mask, nr_pages); |
---|
1525 | | - if (!bio) |
---|
1526 | | - return ERR_PTR(-ENOMEM); |
---|
1527 | | - |
---|
1528 | | - while (len) { |
---|
1529 | | - struct page *page; |
---|
1530 | | - unsigned int bytes = PAGE_SIZE; |
---|
1531 | | - |
---|
1532 | | - if (bytes > len) |
---|
1533 | | - bytes = len; |
---|
1534 | | - |
---|
1535 | | - page = alloc_page(q->bounce_gfp | gfp_mask); |
---|
1536 | | - if (!page) |
---|
1537 | | - goto cleanup; |
---|
1538 | | - |
---|
1539 | | - if (!reading) |
---|
1540 | | - memcpy(page_address(page), p, bytes); |
---|
1541 | | - |
---|
1542 | | - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) |
---|
1543 | | - break; |
---|
1544 | | - |
---|
1545 | | - len -= bytes; |
---|
1546 | | - p += bytes; |
---|
1547 | | - } |
---|
1548 | | - |
---|
1549 | | - if (reading) { |
---|
1550 | | - bio->bi_end_io = bio_copy_kern_endio_read; |
---|
1551 | | - bio->bi_private = data; |
---|
1552 | | - } else { |
---|
1553 | | - bio->bi_end_io = bio_copy_kern_endio; |
---|
1554 | | - } |
---|
1555 | | - |
---|
1556 | | - return bio; |
---|
1557 | | - |
---|
1558 | | -cleanup: |
---|
1559 | | - bio_free_pages(bio); |
---|
1560 | | - bio_put(bio); |
---|
1561 | | - return ERR_PTR(-ENOMEM); |
---|
1562 | | -} |
---|
1563 | 1293 | |
---|
1564 | 1294 | /* |
---|
1565 | 1295 | * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions |
---|
.. | .. |
---|
1593 | 1323 | void bio_set_pages_dirty(struct bio *bio) |
---|
1594 | 1324 | { |
---|
1595 | 1325 | struct bio_vec *bvec; |
---|
1596 | | - int i; |
---|
| 1326 | + struct bvec_iter_all iter_all; |
---|
1597 | 1327 | |
---|
1598 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
| 1328 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
---|
1599 | 1329 | if (!PageCompound(bvec->bv_page)) |
---|
1600 | 1330 | set_page_dirty_lock(bvec->bv_page); |
---|
1601 | 1331 | } |
---|
1602 | | -} |
---|
1603 | | -EXPORT_SYMBOL_GPL(bio_set_pages_dirty); |
---|
1604 | | - |
---|
1605 | | -static void bio_release_pages(struct bio *bio) |
---|
1606 | | -{ |
---|
1607 | | - struct bio_vec *bvec; |
---|
1608 | | - int i; |
---|
1609 | | - |
---|
1610 | | - bio_for_each_segment_all(bvec, bio, i) |
---|
1611 | | - put_page(bvec->bv_page); |
---|
1612 | 1332 | } |
---|
1613 | 1333 | |
---|
1614 | 1334 | /* |
---|
.. | .. |
---|
1643 | 1363 | while ((bio = next) != NULL) { |
---|
1644 | 1364 | next = bio->bi_private; |
---|
1645 | 1365 | |
---|
1646 | | - bio_set_pages_dirty(bio); |
---|
1647 | | - bio_release_pages(bio); |
---|
| 1366 | + bio_release_pages(bio, true); |
---|
1648 | 1367 | bio_put(bio); |
---|
1649 | 1368 | } |
---|
1650 | 1369 | } |
---|
.. | .. |
---|
1653 | 1372 | { |
---|
1654 | 1373 | struct bio_vec *bvec; |
---|
1655 | 1374 | unsigned long flags; |
---|
1656 | | - int i; |
---|
| 1375 | + struct bvec_iter_all iter_all; |
---|
1657 | 1376 | |
---|
1658 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
| 1377 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
---|
1659 | 1378 | if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page)) |
---|
1660 | 1379 | goto defer; |
---|
1661 | 1380 | } |
---|
1662 | 1381 | |
---|
1663 | | - bio_release_pages(bio); |
---|
| 1382 | + bio_release_pages(bio, false); |
---|
1664 | 1383 | bio_put(bio); |
---|
1665 | 1384 | return; |
---|
1666 | 1385 | defer: |
---|
.. | .. |
---|
1670 | 1389 | spin_unlock_irqrestore(&bio_dirty_lock, flags); |
---|
1671 | 1390 | schedule_work(&bio_dirty_work); |
---|
1672 | 1391 | } |
---|
1673 | | -EXPORT_SYMBOL_GPL(bio_check_pages_dirty); |
---|
1674 | | - |
---|
1675 | | -void generic_start_io_acct(struct request_queue *q, int op, |
---|
1676 | | - unsigned long sectors, struct hd_struct *part) |
---|
1677 | | -{ |
---|
1678 | | - const int sgrp = op_stat_group(op); |
---|
1679 | | - int cpu = part_stat_lock(); |
---|
1680 | | - |
---|
1681 | | - part_round_stats(q, cpu, part); |
---|
1682 | | - part_stat_inc(cpu, part, ios[sgrp]); |
---|
1683 | | - part_stat_add(cpu, part, sectors[sgrp], sectors); |
---|
1684 | | - part_inc_in_flight(q, part, op_is_write(op)); |
---|
1685 | | - |
---|
1686 | | - part_stat_unlock(); |
---|
1687 | | -} |
---|
1688 | | -EXPORT_SYMBOL(generic_start_io_acct); |
---|
1689 | | - |
---|
1690 | | -void generic_end_io_acct(struct request_queue *q, int req_op, |
---|
1691 | | - struct hd_struct *part, unsigned long start_time) |
---|
1692 | | -{ |
---|
1693 | | - unsigned long duration = jiffies - start_time; |
---|
1694 | | - const int sgrp = op_stat_group(req_op); |
---|
1695 | | - int cpu = part_stat_lock(); |
---|
1696 | | - |
---|
1697 | | - part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); |
---|
1698 | | - part_round_stats(q, cpu, part); |
---|
1699 | | - part_dec_in_flight(q, part, op_is_write(req_op)); |
---|
1700 | | - |
---|
1701 | | - part_stat_unlock(); |
---|
1702 | | -} |
---|
1703 | | -EXPORT_SYMBOL(generic_end_io_acct); |
---|
1704 | | - |
---|
1705 | | -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
---|
1706 | | -void bio_flush_dcache_pages(struct bio *bi) |
---|
1707 | | -{ |
---|
1708 | | - struct bio_vec bvec; |
---|
1709 | | - struct bvec_iter iter; |
---|
1710 | | - |
---|
1711 | | - bio_for_each_segment(bvec, bi, iter) |
---|
1712 | | - flush_dcache_page(bvec.bv_page); |
---|
1713 | | -} |
---|
1714 | | -EXPORT_SYMBOL(bio_flush_dcache_pages); |
---|
1715 | | -#endif |
---|
1716 | 1392 | |
---|
1717 | 1393 | static inline bool bio_remaining_done(struct bio *bio) |
---|
1718 | 1394 | { |
---|
.. | .. |
---|
1752 | 1428 | again: |
---|
1753 | 1429 | if (!bio_remaining_done(bio)) |
---|
1754 | 1430 | return; |
---|
1755 | | - |
---|
1756 | | - if (!blk_crypto_endio(bio)) |
---|
1757 | | - return; |
---|
1758 | | - |
---|
1759 | 1431 | if (!bio_integrity_endio(bio)) |
---|
1760 | 1432 | return; |
---|
1761 | 1433 | |
---|
.. | .. |
---|
1776 | 1448 | } |
---|
1777 | 1449 | |
---|
1778 | 1450 | if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) { |
---|
1779 | | - trace_block_bio_complete(bio->bi_disk->queue, bio, |
---|
1780 | | - blk_status_to_errno(bio->bi_status)); |
---|
| 1451 | + trace_block_bio_complete(bio->bi_disk->queue, bio); |
---|
1781 | 1452 | bio_clear_flag(bio, BIO_TRACE_COMPLETION); |
---|
1782 | 1453 | } |
---|
1783 | 1454 | |
---|
.. | .. |
---|
1800 | 1471 | * @bio, and updates @bio to represent the remaining sectors. |
---|
1801 | 1472 | * |
---|
1802 | 1473 | * Unless this is a discard request the newly allocated bio will point |
---|
1803 | | - * to @bio's bi_io_vec; it is the caller's responsibility to ensure that |
---|
1804 | | - * @bio is not freed before the split. |
---|
| 1474 | + * to @bio's bi_io_vec. It is the caller's responsibility to ensure that |
---|
| 1475 | + * neither @bio nor @bs are freed before the split bio. |
---|
1805 | 1476 | */ |
---|
1806 | 1477 | struct bio *bio_split(struct bio *bio, int sectors, |
---|
1807 | 1478 | gfp_t gfp, struct bio_set *bs) |
---|
.. | .. |
---|
1810 | 1481 | |
---|
1811 | 1482 | BUG_ON(sectors <= 0); |
---|
1812 | 1483 | BUG_ON(sectors >= bio_sectors(bio)); |
---|
| 1484 | + |
---|
| 1485 | + /* Zone append commands cannot be split */ |
---|
| 1486 | + if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND)) |
---|
| 1487 | + return NULL; |
---|
1813 | 1488 | |
---|
1814 | 1489 | split = bio_clone_fast(bio, gfp, bs); |
---|
1815 | 1490 | if (!split) |
---|
.. | .. |
---|
1821 | 1496 | bio_integrity_trim(split); |
---|
1822 | 1497 | |
---|
1823 | 1498 | bio_advance(bio, split->bi_iter.bi_size); |
---|
1824 | | - bio->bi_iter.bi_done = 0; |
---|
1825 | 1499 | |
---|
1826 | 1500 | if (bio_flagged(bio, BIO_TRACE_COMPLETION)) |
---|
1827 | 1501 | bio_set_flag(split, BIO_TRACE_COMPLETION); |
---|
.. | .. |
---|
1846 | 1520 | if (offset == 0 && size == bio->bi_iter.bi_size) |
---|
1847 | 1521 | return; |
---|
1848 | 1522 | |
---|
1849 | | - bio_clear_flag(bio, BIO_SEG_VALID); |
---|
1850 | | - |
---|
1851 | 1523 | bio_advance(bio, offset << 9); |
---|
1852 | | - |
---|
1853 | 1524 | bio->bi_iter.bi_size = size; |
---|
1854 | 1525 | |
---|
1855 | 1526 | if (bio_integrity(bio)) |
---|
.. | .. |
---|
1968 | 1639 | } |
---|
1969 | 1640 | EXPORT_SYMBOL(bioset_init_from_src); |
---|
1970 | 1641 | |
---|
1971 | | -#ifdef CONFIG_BLK_CGROUP |
---|
1972 | | - |
---|
1973 | | -#ifdef CONFIG_MEMCG |
---|
1974 | | -/** |
---|
1975 | | - * bio_associate_blkcg_from_page - associate a bio with the page's blkcg |
---|
1976 | | - * @bio: target bio |
---|
1977 | | - * @page: the page to lookup the blkcg from |
---|
1978 | | - * |
---|
1979 | | - * Associate @bio with the blkcg from @page's owning memcg. This works like |
---|
1980 | | - * every other associate function wrt references. |
---|
1981 | | - */ |
---|
1982 | | -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) |
---|
1983 | | -{ |
---|
1984 | | - struct cgroup_subsys_state *blkcg_css; |
---|
1985 | | - |
---|
1986 | | - if (unlikely(bio->bi_css)) |
---|
1987 | | - return -EBUSY; |
---|
1988 | | - if (!page->mem_cgroup) |
---|
1989 | | - return 0; |
---|
1990 | | - blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, |
---|
1991 | | - &io_cgrp_subsys); |
---|
1992 | | - bio->bi_css = blkcg_css; |
---|
1993 | | - return 0; |
---|
1994 | | -} |
---|
1995 | | -#endif /* CONFIG_MEMCG */ |
---|
1996 | | - |
---|
1997 | | -/** |
---|
1998 | | - * bio_associate_blkcg - associate a bio with the specified blkcg |
---|
1999 | | - * @bio: target bio |
---|
2000 | | - * @blkcg_css: css of the blkcg to associate |
---|
2001 | | - * |
---|
2002 | | - * Associate @bio with the blkcg specified by @blkcg_css. Block layer will |
---|
2003 | | - * treat @bio as if it were issued by a task which belongs to the blkcg. |
---|
2004 | | - * |
---|
2005 | | - * This function takes an extra reference of @blkcg_css which will be put |
---|
2006 | | - * when @bio is released. The caller must own @bio and is responsible for |
---|
2007 | | - * synchronizing calls to this function. |
---|
2008 | | - */ |
---|
2009 | | -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) |
---|
2010 | | -{ |
---|
2011 | | - if (unlikely(bio->bi_css)) |
---|
2012 | | - return -EBUSY; |
---|
2013 | | - css_get(blkcg_css); |
---|
2014 | | - bio->bi_css = blkcg_css; |
---|
2015 | | - return 0; |
---|
2016 | | -} |
---|
2017 | | -EXPORT_SYMBOL_GPL(bio_associate_blkcg); |
---|
2018 | | - |
---|
2019 | | -/** |
---|
2020 | | - * bio_associate_blkg - associate a bio with the specified blkg |
---|
2021 | | - * @bio: target bio |
---|
2022 | | - * @blkg: the blkg to associate |
---|
2023 | | - * |
---|
2024 | | - * Associate @bio with the blkg specified by @blkg. This is the queue specific |
---|
2025 | | - * blkcg information associated with the @bio, a reference will be taken on the |
---|
2026 | | - * @blkg and will be freed when the bio is freed. |
---|
2027 | | - */ |
---|
2028 | | -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) |
---|
2029 | | -{ |
---|
2030 | | - if (unlikely(bio->bi_blkg)) |
---|
2031 | | - return -EBUSY; |
---|
2032 | | - if (!blkg_try_get(blkg)) |
---|
2033 | | - return -ENODEV; |
---|
2034 | | - bio->bi_blkg = blkg; |
---|
2035 | | - return 0; |
---|
2036 | | -} |
---|
2037 | | - |
---|
2038 | | -/** |
---|
2039 | | - * bio_disassociate_task - undo bio_associate_current() |
---|
2040 | | - * @bio: target bio |
---|
2041 | | - */ |
---|
2042 | | -void bio_disassociate_task(struct bio *bio) |
---|
2043 | | -{ |
---|
2044 | | - if (bio->bi_ioc) { |
---|
2045 | | - put_io_context(bio->bi_ioc); |
---|
2046 | | - bio->bi_ioc = NULL; |
---|
2047 | | - } |
---|
2048 | | - if (bio->bi_css) { |
---|
2049 | | - css_put(bio->bi_css); |
---|
2050 | | - bio->bi_css = NULL; |
---|
2051 | | - } |
---|
2052 | | - if (bio->bi_blkg) { |
---|
2053 | | - blkg_put(bio->bi_blkg); |
---|
2054 | | - bio->bi_blkg = NULL; |
---|
2055 | | - } |
---|
2056 | | -} |
---|
2057 | | - |
---|
2058 | | -/** |
---|
2059 | | - * bio_clone_blkcg_association - clone blkcg association from src to dst bio |
---|
2060 | | - * @dst: destination bio |
---|
2061 | | - * @src: source bio |
---|
2062 | | - */ |
---|
2063 | | -void bio_clone_blkcg_association(struct bio *dst, struct bio *src) |
---|
2064 | | -{ |
---|
2065 | | - if (src->bi_css) |
---|
2066 | | - WARN_ON(bio_associate_blkcg(dst, src->bi_css)); |
---|
2067 | | -} |
---|
2068 | | -EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); |
---|
2069 | | -#endif /* CONFIG_BLK_CGROUP */ |
---|
2070 | | - |
---|
2071 | 1642 | static void __init biovec_init_slabs(void) |
---|
2072 | 1643 | { |
---|
2073 | 1644 | int i; |
---|
.. | .. |
---|
2093 | 1664 | bio_slab_nr = 0; |
---|
2094 | 1665 | bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), |
---|
2095 | 1666 | GFP_KERNEL); |
---|
| 1667 | + |
---|
| 1668 | + BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET); |
---|
| 1669 | + |
---|
2096 | 1670 | if (!bio_slabs) |
---|
2097 | 1671 | panic("bio: can't allocate bios\n"); |
---|
2098 | 1672 | |
---|