hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/block/bio.c
....@@ -1,19 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
23 * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License version 2 as
6
- * published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful,
9
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- * GNU General Public License for more details.
12
- *
13
- * You should have received a copy of the GNU General Public Licens
14
- * along with this program; if not, write to the Free Software
15
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
16
- *
174 */
185 #include <linux/mm.h>
196 #include <linux/swap.h>
....@@ -29,6 +16,8 @@
2916 #include <linux/workqueue.h>
3017 #include <linux/cgroup.h>
3118 #include <linux/blk-cgroup.h>
19
+#include <linux/highmem.h>
20
+#include <linux/sched/sysctl.h>
3221 #include <linux/blk-crypto.h>
3322
3423 #include <trace/events/block.h>
....@@ -245,7 +234,14 @@
245234
246235 void bio_uninit(struct bio *bio)
247236 {
248
- bio_disassociate_task(bio);
237
+#ifdef CONFIG_BLK_CGROUP
238
+ if (bio->bi_blkg) {
239
+ blkg_put(bio->bi_blkg);
240
+ bio->bi_blkg = NULL;
241
+ }
242
+#endif
243
+ if (bio_integrity(bio))
244
+ bio_integrity_free(bio);
249245
250246 bio_crypt_free_ctx(bio);
251247 }
....@@ -331,7 +327,7 @@
331327 /**
332328 * bio_chain - chain bio completions
333329 * @bio: the target bio
334
- * @parent: the @bio's parent bio
330
+ * @parent: the parent bio of @bio
335331 *
336332 * The caller won't have a bi_end_io called when @bio completes - instead,
337333 * @parent's bi_end_io won't be called until both @parent and @bio have
....@@ -362,7 +358,7 @@
362358 if (!bio)
363359 break;
364360
365
- generic_make_request(bio);
361
+ submit_bio_noacct(bio);
366362 }
367363 }
368364
....@@ -420,19 +416,19 @@
420416 * submit the previously allocated bio for IO before attempting to allocate
421417 * a new one. Failure to do so can cause deadlocks under memory pressure.
422418 *
423
- * Note that when running under generic_make_request() (i.e. any block
419
+ * Note that when running under submit_bio_noacct() (i.e. any block
424420 * driver), bios are not submitted until after you return - see the code in
425
- * generic_make_request() that converts recursion into iteration, to prevent
421
+ * submit_bio_noacct() that converts recursion into iteration, to prevent
426422 * stack overflows.
427423 *
428424 * This would normally mean allocating multiple bios under
429
- * generic_make_request() would be susceptible to deadlocks, but we have
425
+ * submit_bio_noacct() would be susceptible to deadlocks, but we have
430426 * deadlock avoidance code that resubmits any blocked bios from a rescuer
431427 * thread.
432428 *
433429 * However, we do not guarantee forward progress for allocations from other
434430 * mempools. Doing multiple allocations from the same mempool under
435
- * generic_make_request() should be avoided - instead, use bio_set's front_pad
431
+ * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
436432 * for per bio allocations.
437433 *
438434 * RETURNS:
....@@ -452,9 +448,7 @@
452448 if (nr_iovecs > UIO_MAXIOV)
453449 return NULL;
454450
455
- p = kmalloc(sizeof(struct bio) +
456
- nr_iovecs * sizeof(struct bio_vec),
457
- gfp_mask);
451
+ p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
458452 front_pad = 0;
459453 inline_vecs = nr_iovecs;
460454 } else {
....@@ -463,14 +457,14 @@
463457 nr_iovecs > 0))
464458 return NULL;
465459 /*
466
- * generic_make_request() converts recursion to iteration; this
460
+ * submit_bio_noacct() converts recursion to iteration; this
467461 * means if we're running beneath it, any bios we allocate and
468462 * submit will not be submitted (and thus freed) until after we
469463 * return.
470464 *
471465 * This exposes us to a potential deadlock if we allocate
472466 * multiple bios from the same bio_set() while running
473
- * underneath generic_make_request(). If we were to allocate
467
+ * underneath submit_bio_noacct(). If we were to allocate
474468 * multiple bios (say a stacking block driver that was splitting
475469 * bios), we would deadlock if we exhausted the mempool's
476470 * reserve.
....@@ -551,6 +545,99 @@
551545 EXPORT_SYMBOL(zero_fill_bio_iter);
552546
553547 /**
548
+ * bio_truncate - truncate the bio to small size of @new_size
549
+ * @bio: the bio to be truncated
550
+ * @new_size: new size for truncating the bio
551
+ *
552
+ * Description:
553
+ * Truncate the bio to new size of @new_size. If bio_op(bio) is
554
+ * REQ_OP_READ, zero the truncated part. This function should only
555
+ * be used for handling corner cases, such as bio eod.
556
+ */
557
+void bio_truncate(struct bio *bio, unsigned new_size)
558
+{
559
+ struct bio_vec bv;
560
+ struct bvec_iter iter;
561
+ unsigned int done = 0;
562
+ bool truncated = false;
563
+
564
+ if (new_size >= bio->bi_iter.bi_size)
565
+ return;
566
+
567
+ if (bio_op(bio) != REQ_OP_READ)
568
+ goto exit;
569
+
570
+ bio_for_each_segment(bv, bio, iter) {
571
+ if (done + bv.bv_len > new_size) {
572
+ unsigned offset;
573
+
574
+ if (!truncated)
575
+ offset = new_size - done;
576
+ else
577
+ offset = 0;
578
+ zero_user(bv.bv_page, bv.bv_offset + offset,
579
+ bv.bv_len - offset);
580
+ truncated = true;
581
+ }
582
+ done += bv.bv_len;
583
+ }
584
+
585
+ exit:
586
+ /*
587
+ * Don't touch bvec table here and make it really immutable, since
588
+ * fs bio user has to retrieve all pages via bio_for_each_segment_all
589
+ * in its .end_bio() callback.
590
+ *
591
+ * It is enough to truncate bio by updating .bi_size since we can make
592
+ * correct bvec with the updated .bi_size for drivers.
593
+ */
594
+ bio->bi_iter.bi_size = new_size;
595
+}
596
+
597
+/**
598
+ * guard_bio_eod - truncate a BIO to fit the block device
599
+ * @bio: bio to truncate
600
+ *
601
+ * This allows us to do IO even on the odd last sectors of a device, even if the
602
+ * block size is some multiple of the physical sector size.
603
+ *
604
+ * We'll just truncate the bio to the size of the device, and clear the end of
605
+ * the buffer head manually. Truly out-of-range accesses will turn into actual
606
+ * I/O errors, this only handles the "we need to be able to do I/O at the final
607
+ * sector" case.
608
+ */
609
+void guard_bio_eod(struct bio *bio)
610
+{
611
+ sector_t maxsector;
612
+ struct hd_struct *part;
613
+
614
+ rcu_read_lock();
615
+ part = __disk_get_part(bio->bi_disk, bio->bi_partno);
616
+ if (part)
617
+ maxsector = part_nr_sects_read(part);
618
+ else
619
+ maxsector = get_capacity(bio->bi_disk);
620
+ rcu_read_unlock();
621
+
622
+ if (!maxsector)
623
+ return;
624
+
625
+ /*
626
+ * If the *whole* IO is past the end of the device,
627
+ * let it through, and the IO layer will turn it into
628
+ * an EIO.
629
+ */
630
+ if (unlikely(bio->bi_iter.bi_sector >= maxsector))
631
+ return;
632
+
633
+ maxsector -= bio->bi_iter.bi_sector;
634
+ if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
635
+ return;
636
+
637
+ bio_truncate(bio, maxsector << 9);
638
+}
639
+
640
+/**
554641 * bio_put - release a reference to a bio
555642 * @bio: bio to release reference to
556643 *
....@@ -573,15 +660,6 @@
573660 }
574661 }
575662 EXPORT_SYMBOL(bio_put);
576
-
577
-inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
578
-{
579
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
580
- blk_recount_segments(q, bio);
581
-
582
- return bio->bi_phys_segments;
583
-}
584
-EXPORT_SYMBOL(bio_phys_segments);
585663
586664 /**
587665 * __bio_clone_fast - clone a bio that shares the original bio's biovec
....@@ -613,7 +691,8 @@
613691 bio->bi_iter = bio_src->bi_iter;
614692 bio->bi_io_vec = bio_src->bi_io_vec;
615693
616
- bio_clone_blkcg_association(bio, bio_src);
694
+ bio_clone_blkg_association(bio, bio_src);
695
+ blkcg_bio_issue_init(bio);
617696 }
618697 EXPORT_SYMBOL(__bio_clone_fast);
619698
....@@ -635,133 +714,162 @@
635714
636715 __bio_clone_fast(b, bio);
637716
638
- bio_crypt_clone(b, bio, gfp_mask);
717
+ if (bio_crypt_clone(b, bio, gfp_mask) < 0)
718
+ goto err_put;
639719
640720 if (bio_integrity(bio) &&
641
- bio_integrity_clone(b, bio, gfp_mask) < 0) {
642
- bio_put(b);
643
- return NULL;
644
- }
721
+ bio_integrity_clone(b, bio, gfp_mask) < 0)
722
+ goto err_put;
645723
646724 return b;
725
+
726
+err_put:
727
+ bio_put(b);
728
+ return NULL;
647729 }
648730 EXPORT_SYMBOL(bio_clone_fast);
649731
650
-/**
651
- * bio_add_pc_page - attempt to add page to bio
652
- * @q: the target queue
653
- * @bio: destination bio
654
- * @page: page to add
655
- * @len: vec entry length
656
- * @offset: vec entry offset
657
- *
658
- * Attempt to add a page to the bio_vec maplist. This can fail for a
659
- * number of reasons, such as the bio being full or target block device
660
- * limitations. The target block device must allow bio's up to PAGE_SIZE,
661
- * so it is always possible to add a single page to an empty bio.
662
- *
663
- * This should only be used by REQ_PC bios.
664
- */
665
-int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
666
- *page, unsigned int len, unsigned int offset)
732
+const char *bio_devname(struct bio *bio, char *buf)
667733 {
668
- int retried_segments = 0;
734
+ return disk_name(bio->bi_disk, bio->bi_partno, buf);
735
+}
736
+EXPORT_SYMBOL(bio_devname);
737
+
738
+static inline bool page_is_mergeable(const struct bio_vec *bv,
739
+ struct page *page, unsigned int len, unsigned int off,
740
+ bool *same_page)
741
+{
742
+ size_t bv_end = bv->bv_offset + bv->bv_len;
743
+ phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
744
+ phys_addr_t page_addr = page_to_phys(page);
745
+
746
+ if (vec_end_addr + 1 != page_addr + off)
747
+ return false;
748
+ if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
749
+ return false;
750
+
751
+ *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
752
+ if (*same_page)
753
+ return true;
754
+ return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
755
+}
756
+
757
+/*
758
+ * Try to merge a page into a segment, while obeying the hardware segment
759
+ * size limit. This is not for normal read/write bios, but for passthrough
760
+ * or Zone Append operations that we can't split.
761
+ */
762
+static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
763
+ struct page *page, unsigned len,
764
+ unsigned offset, bool *same_page)
765
+{
766
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
767
+ unsigned long mask = queue_segment_boundary(q);
768
+ phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
769
+ phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
770
+
771
+ if ((addr1 | mask) != (addr2 | mask))
772
+ return false;
773
+ if (bv->bv_len + len > queue_max_segment_size(q))
774
+ return false;
775
+ return __bio_try_merge_page(bio, page, len, offset, same_page);
776
+}
777
+
778
+/**
779
+ * bio_add_hw_page - attempt to add a page to a bio with hw constraints
780
+ * @q: the target queue
781
+ * @bio: destination bio
782
+ * @page: page to add
783
+ * @len: vec entry length
784
+ * @offset: vec entry offset
785
+ * @max_sectors: maximum number of sectors that can be added
786
+ * @same_page: return if the segment has been merged inside the same page
787
+ *
788
+ * Add a page to a bio while respecting the hardware max_sectors, max_segment
789
+ * and gap limitations.
790
+ */
791
+int bio_add_hw_page(struct request_queue *q, struct bio *bio,
792
+ struct page *page, unsigned int len, unsigned int offset,
793
+ unsigned int max_sectors, bool *same_page)
794
+{
669795 struct bio_vec *bvec;
670796
671
- /*
672
- * cloned bio must not modify vec list
673
- */
674
- if (unlikely(bio_flagged(bio, BIO_CLONED)))
797
+ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
675798 return 0;
676799
677
- if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
800
+ if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
678801 return 0;
679802
680
- /*
681
- * For filesystems with a blocksize smaller than the pagesize
682
- * we will often be called with the same page as last time and
683
- * a consecutive offset. Optimize this special case.
684
- */
685803 if (bio->bi_vcnt > 0) {
686
- struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
687
-
688
- if (page == prev->bv_page &&
689
- offset == prev->bv_offset + prev->bv_len) {
690
- prev->bv_len += len;
691
- bio->bi_iter.bi_size += len;
692
- goto done;
693
- }
804
+ if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
805
+ return len;
694806
695807 /*
696
- * If the queue doesn't support SG gaps and adding this
697
- * offset would create a gap, disallow it.
808
+ * If the queue doesn't support SG gaps and adding this segment
809
+ * would create a gap, disallow it.
698810 */
699
- if (bvec_gap_to_prev(q, prev, offset))
811
+ bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
812
+ if (bvec_gap_to_prev(q, bvec, offset))
700813 return 0;
701814 }
702815
703
- if (bio_full(bio))
816
+ if (bio_full(bio, len))
704817 return 0;
705818
706
- /*
707
- * setup the new entry, we might clear it again later if we
708
- * cannot add the page
709
- */
819
+ if (bio->bi_vcnt >= queue_max_segments(q))
820
+ return 0;
821
+
710822 bvec = &bio->bi_io_vec[bio->bi_vcnt];
711823 bvec->bv_page = page;
712824 bvec->bv_len = len;
713825 bvec->bv_offset = offset;
714826 bio->bi_vcnt++;
715
- bio->bi_phys_segments++;
716827 bio->bi_iter.bi_size += len;
717
-
718
- /*
719
- * Perform a recount if the number of segments is greater
720
- * than queue_max_segments(q).
721
- */
722
-
723
- while (bio->bi_phys_segments > queue_max_segments(q)) {
724
-
725
- if (retried_segments)
726
- goto failed;
727
-
728
- retried_segments = 1;
729
- blk_recount_segments(q, bio);
730
- }
731
-
732
- /* If we may be able to merge these biovecs, force a recount */
733
- if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
734
- bio_clear_flag(bio, BIO_SEG_VALID);
735
-
736
- done:
737828 return len;
829
+}
738830
739
- failed:
740
- bvec->bv_page = NULL;
741
- bvec->bv_len = 0;
742
- bvec->bv_offset = 0;
743
- bio->bi_vcnt--;
744
- bio->bi_iter.bi_size -= len;
745
- blk_recount_segments(q, bio);
746
- return 0;
831
+/**
832
+ * bio_add_pc_page - attempt to add page to passthrough bio
833
+ * @q: the target queue
834
+ * @bio: destination bio
835
+ * @page: page to add
836
+ * @len: vec entry length
837
+ * @offset: vec entry offset
838
+ *
839
+ * Attempt to add a page to the bio_vec maplist. This can fail for a
840
+ * number of reasons, such as the bio being full or target block device
841
+ * limitations. The target block device must allow bio's up to PAGE_SIZE,
842
+ * so it is always possible to add a single page to an empty bio.
843
+ *
844
+ * This should only be used by passthrough bios.
845
+ */
846
+int bio_add_pc_page(struct request_queue *q, struct bio *bio,
847
+ struct page *page, unsigned int len, unsigned int offset)
848
+{
849
+ bool same_page = false;
850
+ return bio_add_hw_page(q, bio, page, len, offset,
851
+ queue_max_hw_sectors(q), &same_page);
747852 }
748853 EXPORT_SYMBOL(bio_add_pc_page);
749854
750855 /**
751856 * __bio_try_merge_page - try appending data to an existing bvec.
752857 * @bio: destination bio
753
- * @page: page to add
858
+ * @page: start page to add
754859 * @len: length of the data to add
755
- * @off: offset of the data in @page
860
+ * @off: offset of the data relative to @page
861
+ * @same_page: return if the segment has been merged inside the same page
756862 *
757863 * Try to add the data at @page + @off to the last bvec of @bio. This is a
758
- * a useful optimisation for file systems with a block size smaller than the
864
+ * useful optimisation for file systems with a block size smaller than the
759865 * page size.
866
+ *
867
+ * Warn if (@len, @off) crosses pages in case that @same_page is true.
760868 *
761869 * Return %true on success or %false on failure.
762870 */
763871 bool __bio_try_merge_page(struct bio *bio, struct page *page,
764
- unsigned int len, unsigned int off)
872
+ unsigned int len, unsigned int off, bool *same_page)
765873 {
766874 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
767875 return false;
....@@ -769,7 +877,11 @@
769877 if (bio->bi_vcnt > 0) {
770878 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
771879
772
- if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
880
+ if (page_is_mergeable(bv, page, len, off, same_page)) {
881
+ if (bio->bi_iter.bi_size > UINT_MAX - len) {
882
+ *same_page = false;
883
+ return false;
884
+ }
773885 bv->bv_len += len;
774886 bio->bi_iter.bi_size += len;
775887 return true;
....@@ -780,11 +892,11 @@
780892 EXPORT_SYMBOL_GPL(__bio_try_merge_page);
781893
782894 /**
783
- * __bio_add_page - add page to a bio in a new segment
895
+ * __bio_add_page - add page(s) to a bio in a new segment
784896 * @bio: destination bio
785
- * @page: page to add
786
- * @len: length of the data to add
787
- * @off: offset of the data in @page
897
+ * @page: start page to add
898
+ * @len: length of the data to add, may cross pages
899
+ * @off: offset of the data relative to @page, may cross pages
788900 *
789901 * Add the data at @page + @off to @bio as a new bvec. The caller must ensure
790902 * that @bio has space for another bvec.
....@@ -795,7 +907,7 @@
795907 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
796908
797909 WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
798
- WARN_ON_ONCE(bio_full(bio));
910
+ WARN_ON_ONCE(bio_full(bio, len));
799911
800912 bv->bv_page = page;
801913 bv->bv_offset = off;
....@@ -810,26 +922,72 @@
810922 EXPORT_SYMBOL_GPL(__bio_add_page);
811923
812924 /**
813
- * bio_add_page - attempt to add page to bio
925
+ * bio_add_page - attempt to add page(s) to bio
814926 * @bio: destination bio
815
- * @page: page to add
816
- * @len: vec entry length
817
- * @offset: vec entry offset
927
+ * @page: start page to add
928
+ * @len: vec entry length, may cross pages
929
+ * @offset: vec entry offset relative to @page, may cross pages
818930 *
819
- * Attempt to add a page to the bio_vec maplist. This will only fail
931
+ * Attempt to add page(s) to the bio_vec maplist. This will only fail
820932 * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
821933 */
822934 int bio_add_page(struct bio *bio, struct page *page,
823935 unsigned int len, unsigned int offset)
824936 {
825
- if (!__bio_try_merge_page(bio, page, len, offset)) {
826
- if (bio_full(bio))
937
+ bool same_page = false;
938
+
939
+ if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
940
+ if (bio_full(bio, len))
827941 return 0;
828942 __bio_add_page(bio, page, len, offset);
829943 }
830944 return len;
831945 }
832946 EXPORT_SYMBOL(bio_add_page);
947
+
948
+void bio_release_pages(struct bio *bio, bool mark_dirty)
949
+{
950
+ struct bvec_iter_all iter_all;
951
+ struct bio_vec *bvec;
952
+
953
+ if (bio_flagged(bio, BIO_NO_PAGE_REF))
954
+ return;
955
+
956
+ bio_for_each_segment_all(bvec, bio, iter_all) {
957
+ if (mark_dirty && !PageCompound(bvec->bv_page))
958
+ set_page_dirty_lock(bvec->bv_page);
959
+ put_page(bvec->bv_page);
960
+ }
961
+}
962
+EXPORT_SYMBOL_GPL(bio_release_pages);
963
+
964
+static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
965
+{
966
+ const struct bio_vec *bv = iter->bvec;
967
+ unsigned int len;
968
+ size_t size;
969
+
970
+ if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
971
+ return -EINVAL;
972
+
973
+ len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
974
+ size = bio_add_page(bio, bv->bv_page, len,
975
+ bv->bv_offset + iter->iov_offset);
976
+ if (unlikely(size != len))
977
+ return -EINVAL;
978
+ iov_iter_advance(iter, size);
979
+ return 0;
980
+}
981
+
982
+static void bio_put_pages(struct page **pages, size_t size, size_t off)
983
+{
984
+ size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
985
+
986
+ for (i = 0; i < nr; i++)
987
+ put_page(pages[i]);
988
+}
989
+
990
+#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
833991
834992 /**
835993 * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
....@@ -839,71 +997,142 @@
839997 * Pins pages from *iter and appends them to @bio's bvec array. The
840998 * pages will have to be released using put_page() when done.
841999 * For multi-segment *iter, this function only adds pages from the
842
- * the next non-empty segment of the iov iterator.
1000
+ * next non-empty segment of the iov iterator.
8431001 */
8441002 static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
8451003 {
846
- unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
1004
+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
1005
+ unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
8471006 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
8481007 struct page **pages = (struct page **)bv;
1008
+ bool same_page = false;
1009
+ ssize_t size, left;
1010
+ unsigned len, i;
8491011 size_t offset;
850
- ssize_t size;
1012
+
1013
+ /*
1014
+ * Move page array up in the allocated memory for the bio vecs as far as
1015
+ * possible so that we can start filling biovecs from the beginning
1016
+ * without overwriting the temporary page array.
1017
+ */
1018
+ BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1019
+ pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
8511020
8521021 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
8531022 if (unlikely(size <= 0))
8541023 return size ? size : -EFAULT;
855
- idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
8561024
857
- /*
858
- * Deep magic below: We need to walk the pinned pages backwards
859
- * because we are abusing the space allocated for the bio_vecs
860
- * for the page array. Because the bio_vecs are larger than the
861
- * page pointers by definition this will always work. But it also
862
- * means we can't use bio_add_page, so any changes to it's semantics
863
- * need to be reflected here as well.
864
- */
865
- bio->bi_iter.bi_size += size;
866
- bio->bi_vcnt += nr_pages;
1025
+ for (left = size, i = 0; left > 0; left -= len, i++) {
1026
+ struct page *page = pages[i];
8671027
868
- while (idx--) {
869
- bv[idx].bv_page = pages[idx];
870
- bv[idx].bv_len = PAGE_SIZE;
871
- bv[idx].bv_offset = 0;
1028
+ len = min_t(size_t, PAGE_SIZE - offset, left);
1029
+
1030
+ if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
1031
+ if (same_page)
1032
+ put_page(page);
1033
+ } else {
1034
+ if (WARN_ON_ONCE(bio_full(bio, len))) {
1035
+ bio_put_pages(pages + i, left, offset);
1036
+ return -EINVAL;
1037
+ }
1038
+ __bio_add_page(bio, page, len, offset);
1039
+ }
1040
+ offset = 0;
8721041 }
873
-
874
- bv[0].bv_offset += offset;
875
- bv[0].bv_len -= offset;
876
- bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
8771042
8781043 iov_iter_advance(iter, size);
8791044 return 0;
8801045 }
8811046
1047
+static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
1048
+{
1049
+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
1050
+ unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
1051
+ struct request_queue *q = bio->bi_disk->queue;
1052
+ unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
1053
+ struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
1054
+ struct page **pages = (struct page **)bv;
1055
+ ssize_t size, left;
1056
+ unsigned len, i;
1057
+ size_t offset;
1058
+ int ret = 0;
1059
+
1060
+ /*
1061
+ * Move page array up in the allocated memory for the bio vecs as far as
1062
+ * possible so that we can start filling biovecs from the beginning
1063
+ * without overwriting the temporary page array.
1064
+ */
1065
+ BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1066
+ pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
1067
+
1068
+ size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1069
+ if (unlikely(size <= 0))
1070
+ return size ? size : -EFAULT;
1071
+
1072
+ for (left = size, i = 0; left > 0; left -= len, i++) {
1073
+ struct page *page = pages[i];
1074
+ bool same_page = false;
1075
+
1076
+ len = min_t(size_t, PAGE_SIZE - offset, left);
1077
+ if (bio_add_hw_page(q, bio, page, len, offset,
1078
+ max_append_sectors, &same_page) != len) {
1079
+ bio_put_pages(pages + i, left, offset);
1080
+ ret = -EINVAL;
1081
+ break;
1082
+ }
1083
+ if (same_page)
1084
+ put_page(page);
1085
+ offset = 0;
1086
+ }
1087
+
1088
+ iov_iter_advance(iter, size - left);
1089
+ return ret;
1090
+}
1091
+
8821092 /**
883
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
1093
+ * bio_iov_iter_get_pages - add user or kernel pages to a bio
8841094 * @bio: bio to add pages to
885
- * @iter: iov iterator describing the region to be mapped
1095
+ * @iter: iov iterator describing the region to be added
8861096 *
887
- * Pins pages from *iter and appends them to @bio's bvec array. The
888
- * pages will have to be released using put_page() when done.
1097
+ * This takes either an iterator pointing to user memory, or one pointing to
1098
+ * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
1099
+ * map them into the kernel. On IO completion, the caller should put those
1100
+ * pages. If we're adding kernel pages, and the caller told us it's safe to
1101
+ * do so, we just have to add the pages to the bio directly. We don't grab an
1102
+ * extra reference to those pages (the user should already have that), and we
1103
+ * don't put the page on IO completion. The caller needs to check if the bio is
1104
+ * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
1105
+ * released.
1106
+ *
8891107 * The function tries, but does not guarantee, to pin as many pages as
890
- * fit into the bio, or are requested in *iter, whatever is smaller.
891
- * If MM encounters an error pinning the requested pages, it stops.
892
- * Error is returned only if 0 pages could be pinned.
1108
+ * fit into the bio, or are requested in @iter, whatever is smaller. If
1109
+ * MM encounters an error pinning the requested pages, it stops. Error
1110
+ * is returned only if 0 pages could be pinned.
8931111 */
8941112 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
8951113 {
896
- unsigned short orig_vcnt = bio->bi_vcnt;
1114
+ const bool is_bvec = iov_iter_is_bvec(iter);
1115
+ int ret;
1116
+
1117
+ if (WARN_ON_ONCE(bio->bi_vcnt))
1118
+ return -EINVAL;
8971119
8981120 do {
899
- int ret = __bio_iov_iter_get_pages(bio, iter);
1121
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
1122
+ if (WARN_ON_ONCE(is_bvec))
1123
+ return -EINVAL;
1124
+ ret = __bio_iov_append_get_pages(bio, iter);
1125
+ } else {
1126
+ if (is_bvec)
1127
+ ret = __bio_iov_bvec_add_pages(bio, iter);
1128
+ else
1129
+ ret = __bio_iov_iter_get_pages(bio, iter);
1130
+ }
1131
+ } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
9001132
901
- if (unlikely(ret))
902
- return bio->bi_vcnt > orig_vcnt ? 0 : ret;
903
-
904
- } while (iov_iter_count(iter) && !bio_full(bio));
905
-
906
- return 0;
1133
+ if (is_bvec)
1134
+ bio_set_flag(bio, BIO_NO_PAGE_REF);
1135
+ return bio->bi_vcnt ? 0 : ret;
9071136 }
9081137 EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
9091138
....@@ -926,12 +1155,21 @@
9261155 int submit_bio_wait(struct bio *bio)
9271156 {
9281157 DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
1158
+ unsigned long hang_check;
9291159
9301160 bio->bi_private = &done;
9311161 bio->bi_end_io = submit_bio_wait_endio;
9321162 bio->bi_opf |= REQ_SYNC;
9331163 submit_bio(bio);
934
- wait_for_completion_io(&done);
1164
+
1165
+ /* Prevent hang_check timer from firing at us during very long I/O */
1166
+ hang_check = sysctl_hung_task_timeout_secs;
1167
+ if (hang_check)
1168
+ while (!wait_for_completion_io_timeout(&done,
1169
+ hang_check * (HZ/2)))
1170
+ ;
1171
+ else
1172
+ wait_for_completion_io(&done);
9351173
9361174 return blk_status_to_errno(bio->bi_status);
9371175 }
....@@ -1043,523 +1281,15 @@
10431281 }
10441282 EXPORT_SYMBOL(bio_list_copy_data);
10451283
1046
-struct bio_map_data {
1047
- int is_our_pages;
1048
- struct iov_iter iter;
1049
- struct iovec iov[];
1050
-};
1051
-
1052
-static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
1053
- gfp_t gfp_mask)
1054
-{
1055
- struct bio_map_data *bmd;
1056
- if (data->nr_segs > UIO_MAXIOV)
1057
- return NULL;
1058
-
1059
- bmd = kmalloc(sizeof(struct bio_map_data) +
1060
- sizeof(struct iovec) * data->nr_segs, gfp_mask);
1061
- if (!bmd)
1062
- return NULL;
1063
- memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
1064
- bmd->iter = *data;
1065
- bmd->iter.iov = bmd->iov;
1066
- return bmd;
1067
-}
1068
-
1069
-/**
1070
- * bio_copy_from_iter - copy all pages from iov_iter to bio
1071
- * @bio: The &struct bio which describes the I/O as destination
1072
- * @iter: iov_iter as source
1073
- *
1074
- * Copy all pages from iov_iter to bio.
1075
- * Returns 0 on success, or error on failure.
1076
- */
1077
-static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
1078
-{
1079
- int i;
1080
- struct bio_vec *bvec;
1081
-
1082
- bio_for_each_segment_all(bvec, bio, i) {
1083
- ssize_t ret;
1084
-
1085
- ret = copy_page_from_iter(bvec->bv_page,
1086
- bvec->bv_offset,
1087
- bvec->bv_len,
1088
- iter);
1089
-
1090
- if (!iov_iter_count(iter))
1091
- break;
1092
-
1093
- if (ret < bvec->bv_len)
1094
- return -EFAULT;
1095
- }
1096
-
1097
- return 0;
1098
-}
1099
-
1100
-/**
1101
- * bio_copy_to_iter - copy all pages from bio to iov_iter
1102
- * @bio: The &struct bio which describes the I/O as source
1103
- * @iter: iov_iter as destination
1104
- *
1105
- * Copy all pages from bio to iov_iter.
1106
- * Returns 0 on success, or error on failure.
1107
- */
1108
-static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
1109
-{
1110
- int i;
1111
- struct bio_vec *bvec;
1112
-
1113
- bio_for_each_segment_all(bvec, bio, i) {
1114
- ssize_t ret;
1115
-
1116
- ret = copy_page_to_iter(bvec->bv_page,
1117
- bvec->bv_offset,
1118
- bvec->bv_len,
1119
- &iter);
1120
-
1121
- if (!iov_iter_count(&iter))
1122
- break;
1123
-
1124
- if (ret < bvec->bv_len)
1125
- return -EFAULT;
1126
- }
1127
-
1128
- return 0;
1129
-}
1130
-
11311284 void bio_free_pages(struct bio *bio)
11321285 {
11331286 struct bio_vec *bvec;
1134
- int i;
1287
+ struct bvec_iter_all iter_all;
11351288
1136
- bio_for_each_segment_all(bvec, bio, i)
1289
+ bio_for_each_segment_all(bvec, bio, iter_all)
11371290 __free_page(bvec->bv_page);
11381291 }
11391292 EXPORT_SYMBOL(bio_free_pages);
1140
-
1141
-/**
1142
- * bio_uncopy_user - finish previously mapped bio
1143
- * @bio: bio being terminated
1144
- *
1145
- * Free pages allocated from bio_copy_user_iov() and write back data
1146
- * to user space in case of a read.
1147
- */
1148
-int bio_uncopy_user(struct bio *bio)
1149
-{
1150
- struct bio_map_data *bmd = bio->bi_private;
1151
- int ret = 0;
1152
-
1153
- if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
1154
- /*
1155
- * if we're in a workqueue, the request is orphaned, so
1156
- * don't copy into a random user address space, just free
1157
- * and return -EINTR so user space doesn't expect any data.
1158
- */
1159
- if (!current->mm)
1160
- ret = -EINTR;
1161
- else if (bio_data_dir(bio) == READ)
1162
- ret = bio_copy_to_iter(bio, bmd->iter);
1163
- if (bmd->is_our_pages)
1164
- bio_free_pages(bio);
1165
- }
1166
- kfree(bmd);
1167
- bio_put(bio);
1168
- return ret;
1169
-}
1170
-
1171
-/**
1172
- * bio_copy_user_iov - copy user data to bio
1173
- * @q: destination block queue
1174
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
1175
- * @iter: iovec iterator
1176
- * @gfp_mask: memory allocation flags
1177
- *
1178
- * Prepares and returns a bio for indirect user io, bouncing data
1179
- * to/from kernel pages as necessary. Must be paired with
1180
- * call bio_uncopy_user() on io completion.
1181
- */
1182
-struct bio *bio_copy_user_iov(struct request_queue *q,
1183
- struct rq_map_data *map_data,
1184
- struct iov_iter *iter,
1185
- gfp_t gfp_mask)
1186
-{
1187
- struct bio_map_data *bmd;
1188
- struct page *page;
1189
- struct bio *bio;
1190
- int i = 0, ret;
1191
- int nr_pages;
1192
- unsigned int len = iter->count;
1193
- unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
1194
-
1195
- bmd = bio_alloc_map_data(iter, gfp_mask);
1196
- if (!bmd)
1197
- return ERR_PTR(-ENOMEM);
1198
-
1199
- /*
1200
- * We need to do a deep copy of the iov_iter including the iovecs.
1201
- * The caller provided iov might point to an on-stack or otherwise
1202
- * shortlived one.
1203
- */
1204
- bmd->is_our_pages = map_data ? 0 : 1;
1205
-
1206
- nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
1207
- if (nr_pages > BIO_MAX_PAGES)
1208
- nr_pages = BIO_MAX_PAGES;
1209
-
1210
- ret = -ENOMEM;
1211
- bio = bio_kmalloc(gfp_mask, nr_pages);
1212
- if (!bio)
1213
- goto out_bmd;
1214
-
1215
- ret = 0;
1216
-
1217
- if (map_data) {
1218
- nr_pages = 1 << map_data->page_order;
1219
- i = map_data->offset / PAGE_SIZE;
1220
- }
1221
- while (len) {
1222
- unsigned int bytes = PAGE_SIZE;
1223
-
1224
- bytes -= offset;
1225
-
1226
- if (bytes > len)
1227
- bytes = len;
1228
-
1229
- if (map_data) {
1230
- if (i == map_data->nr_entries * nr_pages) {
1231
- ret = -ENOMEM;
1232
- break;
1233
- }
1234
-
1235
- page = map_data->pages[i / nr_pages];
1236
- page += (i % nr_pages);
1237
-
1238
- i++;
1239
- } else {
1240
- page = alloc_page(q->bounce_gfp | gfp_mask);
1241
- if (!page) {
1242
- ret = -ENOMEM;
1243
- break;
1244
- }
1245
- }
1246
-
1247
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
1248
- if (!map_data)
1249
- __free_page(page);
1250
- break;
1251
- }
1252
-
1253
- len -= bytes;
1254
- offset = 0;
1255
- }
1256
-
1257
- if (ret)
1258
- goto cleanup;
1259
-
1260
- if (map_data)
1261
- map_data->offset += bio->bi_iter.bi_size;
1262
-
1263
- /*
1264
- * success
1265
- */
1266
- if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
1267
- (map_data && map_data->from_user)) {
1268
- ret = bio_copy_from_iter(bio, iter);
1269
- if (ret)
1270
- goto cleanup;
1271
- } else {
1272
- if (bmd->is_our_pages)
1273
- zero_fill_bio(bio);
1274
- iov_iter_advance(iter, bio->bi_iter.bi_size);
1275
- }
1276
-
1277
- bio->bi_private = bmd;
1278
- if (map_data && map_data->null_mapped)
1279
- bio_set_flag(bio, BIO_NULL_MAPPED);
1280
- return bio;
1281
-cleanup:
1282
- if (!map_data)
1283
- bio_free_pages(bio);
1284
- bio_put(bio);
1285
-out_bmd:
1286
- kfree(bmd);
1287
- return ERR_PTR(ret);
1288
-}
1289
-
1290
-/**
1291
- * bio_map_user_iov - map user iovec into bio
1292
- * @q: the struct request_queue for the bio
1293
- * @iter: iovec iterator
1294
- * @gfp_mask: memory allocation flags
1295
- *
1296
- * Map the user space address into a bio suitable for io to a block
1297
- * device. Returns an error pointer in case of error.
1298
- */
1299
-struct bio *bio_map_user_iov(struct request_queue *q,
1300
- struct iov_iter *iter,
1301
- gfp_t gfp_mask)
1302
-{
1303
- int j;
1304
- struct bio *bio;
1305
- int ret;
1306
- struct bio_vec *bvec;
1307
-
1308
- if (!iov_iter_count(iter))
1309
- return ERR_PTR(-EINVAL);
1310
-
1311
- bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
1312
- if (!bio)
1313
- return ERR_PTR(-ENOMEM);
1314
-
1315
- while (iov_iter_count(iter)) {
1316
- struct page **pages;
1317
- ssize_t bytes;
1318
- size_t offs, added = 0;
1319
- int npages;
1320
-
1321
- bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
1322
- if (unlikely(bytes <= 0)) {
1323
- ret = bytes ? bytes : -EFAULT;
1324
- goto out_unmap;
1325
- }
1326
-
1327
- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
1328
-
1329
- if (unlikely(offs & queue_dma_alignment(q))) {
1330
- ret = -EINVAL;
1331
- j = 0;
1332
- } else {
1333
- for (j = 0; j < npages; j++) {
1334
- struct page *page = pages[j];
1335
- unsigned int n = PAGE_SIZE - offs;
1336
- unsigned short prev_bi_vcnt = bio->bi_vcnt;
1337
-
1338
- if (n > bytes)
1339
- n = bytes;
1340
-
1341
- if (!bio_add_pc_page(q, bio, page, n, offs))
1342
- break;
1343
-
1344
- /*
1345
- * check if vector was merged with previous
1346
- * drop page reference if needed
1347
- */
1348
- if (bio->bi_vcnt == prev_bi_vcnt)
1349
- put_page(page);
1350
-
1351
- added += n;
1352
- bytes -= n;
1353
- offs = 0;
1354
- }
1355
- iov_iter_advance(iter, added);
1356
- }
1357
- /*
1358
- * release the pages we didn't map into the bio, if any
1359
- */
1360
- while (j < npages)
1361
- put_page(pages[j++]);
1362
- kvfree(pages);
1363
- /* couldn't stuff something into bio? */
1364
- if (bytes)
1365
- break;
1366
- }
1367
-
1368
- bio_set_flag(bio, BIO_USER_MAPPED);
1369
-
1370
- /*
1371
- * subtle -- if bio_map_user_iov() ended up bouncing a bio,
1372
- * it would normally disappear when its bi_end_io is run.
1373
- * however, we need it for the unmap, so grab an extra
1374
- * reference to it
1375
- */
1376
- bio_get(bio);
1377
- return bio;
1378
-
1379
- out_unmap:
1380
- bio_for_each_segment_all(bvec, bio, j) {
1381
- put_page(bvec->bv_page);
1382
- }
1383
- bio_put(bio);
1384
- return ERR_PTR(ret);
1385
-}
1386
-
1387
-static void __bio_unmap_user(struct bio *bio)
1388
-{
1389
- struct bio_vec *bvec;
1390
- int i;
1391
-
1392
- /*
1393
- * make sure we dirty pages we wrote to
1394
- */
1395
- bio_for_each_segment_all(bvec, bio, i) {
1396
- if (bio_data_dir(bio) == READ)
1397
- set_page_dirty_lock(bvec->bv_page);
1398
-
1399
- put_page(bvec->bv_page);
1400
- }
1401
-
1402
- bio_put(bio);
1403
-}
1404
-
1405
-/**
1406
- * bio_unmap_user - unmap a bio
1407
- * @bio: the bio being unmapped
1408
- *
1409
- * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
1410
- * process context.
1411
- *
1412
- * bio_unmap_user() may sleep.
1413
- */
1414
-void bio_unmap_user(struct bio *bio)
1415
-{
1416
- __bio_unmap_user(bio);
1417
- bio_put(bio);
1418
-}
1419
-
1420
-static void bio_map_kern_endio(struct bio *bio)
1421
-{
1422
- bio_put(bio);
1423
-}
1424
-
1425
-/**
1426
- * bio_map_kern - map kernel address into bio
1427
- * @q: the struct request_queue for the bio
1428
- * @data: pointer to buffer to map
1429
- * @len: length in bytes
1430
- * @gfp_mask: allocation flags for bio allocation
1431
- *
1432
- * Map the kernel address into a bio suitable for io to a block
1433
- * device. Returns an error pointer in case of error.
1434
- */
1435
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1436
- gfp_t gfp_mask)
1437
-{
1438
- unsigned long kaddr = (unsigned long)data;
1439
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1440
- unsigned long start = kaddr >> PAGE_SHIFT;
1441
- const int nr_pages = end - start;
1442
- int offset, i;
1443
- struct bio *bio;
1444
-
1445
- bio = bio_kmalloc(gfp_mask, nr_pages);
1446
- if (!bio)
1447
- return ERR_PTR(-ENOMEM);
1448
-
1449
- offset = offset_in_page(kaddr);
1450
- for (i = 0; i < nr_pages; i++) {
1451
- unsigned int bytes = PAGE_SIZE - offset;
1452
-
1453
- if (len <= 0)
1454
- break;
1455
-
1456
- if (bytes > len)
1457
- bytes = len;
1458
-
1459
- if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1460
- offset) < bytes) {
1461
- /* we don't support partial mappings */
1462
- bio_put(bio);
1463
- return ERR_PTR(-EINVAL);
1464
- }
1465
-
1466
- data += bytes;
1467
- len -= bytes;
1468
- offset = 0;
1469
- }
1470
-
1471
- bio->bi_end_io = bio_map_kern_endio;
1472
- return bio;
1473
-}
1474
-EXPORT_SYMBOL(bio_map_kern);
1475
-
1476
-static void bio_copy_kern_endio(struct bio *bio)
1477
-{
1478
- bio_free_pages(bio);
1479
- bio_put(bio);
1480
-}
1481
-
1482
-static void bio_copy_kern_endio_read(struct bio *bio)
1483
-{
1484
- char *p = bio->bi_private;
1485
- struct bio_vec *bvec;
1486
- int i;
1487
-
1488
- bio_for_each_segment_all(bvec, bio, i) {
1489
- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
1490
- p += bvec->bv_len;
1491
- }
1492
-
1493
- bio_copy_kern_endio(bio);
1494
-}
1495
-
1496
-/**
1497
- * bio_copy_kern - copy kernel address into bio
1498
- * @q: the struct request_queue for the bio
1499
- * @data: pointer to buffer to copy
1500
- * @len: length in bytes
1501
- * @gfp_mask: allocation flags for bio and page allocation
1502
- * @reading: data direction is READ
1503
- *
1504
- * copy the kernel address into a bio suitable for io to a block
1505
- * device. Returns an error pointer in case of error.
1506
- */
1507
-struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1508
- gfp_t gfp_mask, int reading)
1509
-{
1510
- unsigned long kaddr = (unsigned long)data;
1511
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1512
- unsigned long start = kaddr >> PAGE_SHIFT;
1513
- struct bio *bio;
1514
- void *p = data;
1515
- int nr_pages = 0;
1516
-
1517
- /*
1518
- * Overflow, abort
1519
- */
1520
- if (end < start)
1521
- return ERR_PTR(-EINVAL);
1522
-
1523
- nr_pages = end - start;
1524
- bio = bio_kmalloc(gfp_mask, nr_pages);
1525
- if (!bio)
1526
- return ERR_PTR(-ENOMEM);
1527
-
1528
- while (len) {
1529
- struct page *page;
1530
- unsigned int bytes = PAGE_SIZE;
1531
-
1532
- if (bytes > len)
1533
- bytes = len;
1534
-
1535
- page = alloc_page(q->bounce_gfp | gfp_mask);
1536
- if (!page)
1537
- goto cleanup;
1538
-
1539
- if (!reading)
1540
- memcpy(page_address(page), p, bytes);
1541
-
1542
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
1543
- break;
1544
-
1545
- len -= bytes;
1546
- p += bytes;
1547
- }
1548
-
1549
- if (reading) {
1550
- bio->bi_end_io = bio_copy_kern_endio_read;
1551
- bio->bi_private = data;
1552
- } else {
1553
- bio->bi_end_io = bio_copy_kern_endio;
1554
- }
1555
-
1556
- return bio;
1557
-
1558
-cleanup:
1559
- bio_free_pages(bio);
1560
- bio_put(bio);
1561
- return ERR_PTR(-ENOMEM);
1562
-}
15631293
15641294 /*
15651295 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
....@@ -1593,22 +1323,12 @@
15931323 void bio_set_pages_dirty(struct bio *bio)
15941324 {
15951325 struct bio_vec *bvec;
1596
- int i;
1326
+ struct bvec_iter_all iter_all;
15971327
1598
- bio_for_each_segment_all(bvec, bio, i) {
1328
+ bio_for_each_segment_all(bvec, bio, iter_all) {
15991329 if (!PageCompound(bvec->bv_page))
16001330 set_page_dirty_lock(bvec->bv_page);
16011331 }
1602
-}
1603
-EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
1604
-
1605
-static void bio_release_pages(struct bio *bio)
1606
-{
1607
- struct bio_vec *bvec;
1608
- int i;
1609
-
1610
- bio_for_each_segment_all(bvec, bio, i)
1611
- put_page(bvec->bv_page);
16121332 }
16131333
16141334 /*
....@@ -1643,8 +1363,7 @@
16431363 while ((bio = next) != NULL) {
16441364 next = bio->bi_private;
16451365
1646
- bio_set_pages_dirty(bio);
1647
- bio_release_pages(bio);
1366
+ bio_release_pages(bio, true);
16481367 bio_put(bio);
16491368 }
16501369 }
....@@ -1653,14 +1372,14 @@
16531372 {
16541373 struct bio_vec *bvec;
16551374 unsigned long flags;
1656
- int i;
1375
+ struct bvec_iter_all iter_all;
16571376
1658
- bio_for_each_segment_all(bvec, bio, i) {
1377
+ bio_for_each_segment_all(bvec, bio, iter_all) {
16591378 if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
16601379 goto defer;
16611380 }
16621381
1663
- bio_release_pages(bio);
1382
+ bio_release_pages(bio, false);
16641383 bio_put(bio);
16651384 return;
16661385 defer:
....@@ -1670,49 +1389,6 @@
16701389 spin_unlock_irqrestore(&bio_dirty_lock, flags);
16711390 schedule_work(&bio_dirty_work);
16721391 }
1673
-EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
1674
-
1675
-void generic_start_io_acct(struct request_queue *q, int op,
1676
- unsigned long sectors, struct hd_struct *part)
1677
-{
1678
- const int sgrp = op_stat_group(op);
1679
- int cpu = part_stat_lock();
1680
-
1681
- part_round_stats(q, cpu, part);
1682
- part_stat_inc(cpu, part, ios[sgrp]);
1683
- part_stat_add(cpu, part, sectors[sgrp], sectors);
1684
- part_inc_in_flight(q, part, op_is_write(op));
1685
-
1686
- part_stat_unlock();
1687
-}
1688
-EXPORT_SYMBOL(generic_start_io_acct);
1689
-
1690
-void generic_end_io_acct(struct request_queue *q, int req_op,
1691
- struct hd_struct *part, unsigned long start_time)
1692
-{
1693
- unsigned long duration = jiffies - start_time;
1694
- const int sgrp = op_stat_group(req_op);
1695
- int cpu = part_stat_lock();
1696
-
1697
- part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
1698
- part_round_stats(q, cpu, part);
1699
- part_dec_in_flight(q, part, op_is_write(req_op));
1700
-
1701
- part_stat_unlock();
1702
-}
1703
-EXPORT_SYMBOL(generic_end_io_acct);
1704
-
1705
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1706
-void bio_flush_dcache_pages(struct bio *bi)
1707
-{
1708
- struct bio_vec bvec;
1709
- struct bvec_iter iter;
1710
-
1711
- bio_for_each_segment(bvec, bi, iter)
1712
- flush_dcache_page(bvec.bv_page);
1713
-}
1714
-EXPORT_SYMBOL(bio_flush_dcache_pages);
1715
-#endif
17161392
17171393 static inline bool bio_remaining_done(struct bio *bio)
17181394 {
....@@ -1752,10 +1428,6 @@
17521428 again:
17531429 if (!bio_remaining_done(bio))
17541430 return;
1755
-
1756
- if (!blk_crypto_endio(bio))
1757
- return;
1758
-
17591431 if (!bio_integrity_endio(bio))
17601432 return;
17611433
....@@ -1776,8 +1448,7 @@
17761448 }
17771449
17781450 if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
1779
- trace_block_bio_complete(bio->bi_disk->queue, bio,
1780
- blk_status_to_errno(bio->bi_status));
1451
+ trace_block_bio_complete(bio->bi_disk->queue, bio);
17811452 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
17821453 }
17831454
....@@ -1800,8 +1471,8 @@
18001471 * @bio, and updates @bio to represent the remaining sectors.
18011472 *
18021473 * Unless this is a discard request the newly allocated bio will point
1803
- * to @bio's bi_io_vec; it is the caller's responsibility to ensure that
1804
- * @bio is not freed before the split.
1474
+ * to @bio's bi_io_vec. It is the caller's responsibility to ensure that
1475
+ * neither @bio nor @bs are freed before the split bio.
18051476 */
18061477 struct bio *bio_split(struct bio *bio, int sectors,
18071478 gfp_t gfp, struct bio_set *bs)
....@@ -1810,6 +1481,10 @@
18101481
18111482 BUG_ON(sectors <= 0);
18121483 BUG_ON(sectors >= bio_sectors(bio));
1484
+
1485
+ /* Zone append commands cannot be split */
1486
+ if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
1487
+ return NULL;
18131488
18141489 split = bio_clone_fast(bio, gfp, bs);
18151490 if (!split)
....@@ -1821,7 +1496,6 @@
18211496 bio_integrity_trim(split);
18221497
18231498 bio_advance(bio, split->bi_iter.bi_size);
1824
- bio->bi_iter.bi_done = 0;
18251499
18261500 if (bio_flagged(bio, BIO_TRACE_COMPLETION))
18271501 bio_set_flag(split, BIO_TRACE_COMPLETION);
....@@ -1846,10 +1520,7 @@
18461520 if (offset == 0 && size == bio->bi_iter.bi_size)
18471521 return;
18481522
1849
- bio_clear_flag(bio, BIO_SEG_VALID);
1850
-
18511523 bio_advance(bio, offset << 9);
1852
-
18531524 bio->bi_iter.bi_size = size;
18541525
18551526 if (bio_integrity(bio))
....@@ -1968,106 +1639,6 @@
19681639 }
19691640 EXPORT_SYMBOL(bioset_init_from_src);
19701641
1971
-#ifdef CONFIG_BLK_CGROUP
1972
-
1973
-#ifdef CONFIG_MEMCG
1974
-/**
1975
- * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
1976
- * @bio: target bio
1977
- * @page: the page to lookup the blkcg from
1978
- *
1979
- * Associate @bio with the blkcg from @page's owning memcg. This works like
1980
- * every other associate function wrt references.
1981
- */
1982
-int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
1983
-{
1984
- struct cgroup_subsys_state *blkcg_css;
1985
-
1986
- if (unlikely(bio->bi_css))
1987
- return -EBUSY;
1988
- if (!page->mem_cgroup)
1989
- return 0;
1990
- blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
1991
- &io_cgrp_subsys);
1992
- bio->bi_css = blkcg_css;
1993
- return 0;
1994
-}
1995
-#endif /* CONFIG_MEMCG */
1996
-
1997
-/**
1998
- * bio_associate_blkcg - associate a bio with the specified blkcg
1999
- * @bio: target bio
2000
- * @blkcg_css: css of the blkcg to associate
2001
- *
2002
- * Associate @bio with the blkcg specified by @blkcg_css. Block layer will
2003
- * treat @bio as if it were issued by a task which belongs to the blkcg.
2004
- *
2005
- * This function takes an extra reference of @blkcg_css which will be put
2006
- * when @bio is released. The caller must own @bio and is responsible for
2007
- * synchronizing calls to this function.
2008
- */
2009
-int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
2010
-{
2011
- if (unlikely(bio->bi_css))
2012
- return -EBUSY;
2013
- css_get(blkcg_css);
2014
- bio->bi_css = blkcg_css;
2015
- return 0;
2016
-}
2017
-EXPORT_SYMBOL_GPL(bio_associate_blkcg);
2018
-
2019
-/**
2020
- * bio_associate_blkg - associate a bio with the specified blkg
2021
- * @bio: target bio
2022
- * @blkg: the blkg to associate
2023
- *
2024
- * Associate @bio with the blkg specified by @blkg. This is the queue specific
2025
- * blkcg information associated with the @bio, a reference will be taken on the
2026
- * @blkg and will be freed when the bio is freed.
2027
- */
2028
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
2029
-{
2030
- if (unlikely(bio->bi_blkg))
2031
- return -EBUSY;
2032
- if (!blkg_try_get(blkg))
2033
- return -ENODEV;
2034
- bio->bi_blkg = blkg;
2035
- return 0;
2036
-}
2037
-
2038
-/**
2039
- * bio_disassociate_task - undo bio_associate_current()
2040
- * @bio: target bio
2041
- */
2042
-void bio_disassociate_task(struct bio *bio)
2043
-{
2044
- if (bio->bi_ioc) {
2045
- put_io_context(bio->bi_ioc);
2046
- bio->bi_ioc = NULL;
2047
- }
2048
- if (bio->bi_css) {
2049
- css_put(bio->bi_css);
2050
- bio->bi_css = NULL;
2051
- }
2052
- if (bio->bi_blkg) {
2053
- blkg_put(bio->bi_blkg);
2054
- bio->bi_blkg = NULL;
2055
- }
2056
-}
2057
-
2058
-/**
2059
- * bio_clone_blkcg_association - clone blkcg association from src to dst bio
2060
- * @dst: destination bio
2061
- * @src: source bio
2062
- */
2063
-void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
2064
-{
2065
- if (src->bi_css)
2066
- WARN_ON(bio_associate_blkcg(dst, src->bi_css));
2067
-}
2068
-EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
2069
-#endif /* CONFIG_BLK_CGROUP */
2070
-
20711642 static void __init biovec_init_slabs(void)
20721643 {
20731644 int i;
....@@ -2093,6 +1664,9 @@
20931664 bio_slab_nr = 0;
20941665 bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab),
20951666 GFP_KERNEL);
1667
+
1668
+ BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET);
1669
+
20961670 if (!bio_slabs)
20971671 panic("bio: can't allocate bios\n");
20981672