~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,19 +1,6 @@
	1	+// SPDX-License-Identifier: GPL-2.0
1	2	/*
2	3	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
3		- *
4		- * This program is free software; you can redistribute it and/or modify
5		- * it under the terms of the GNU General Public License version 2 as
6		- * published by the Free Software Foundation.
7		- *
8		- * This program is distributed in the hope that it will be useful,
9		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
10		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11		- * GNU General Public License for more details.
12		- *
13		- * You should have received a copy of the GNU General Public Licens
14		- * along with this program; if not, write to the Free Software
15		- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
16		- *
17	4	*/
18	5	#include <linux/mm.h>
19	6	#include <linux/swap.h>
..	..	@@ -29,6 +16,8 @@
29	16	#include <linux/workqueue.h>
30	17	#include <linux/cgroup.h>
31	18	#include <linux/blk-cgroup.h>
	19	+#include <linux/highmem.h>
	20	+#include <linux/sched/sysctl.h>
32	21	#include <linux/blk-crypto.h>
33	22
34	23	#include <trace/events/block.h>
..	..	@@ -245,7 +234,14 @@
245	234
246	235	void bio_uninit(struct bio *bio)
247	236	{
248		- bio_disassociate_task(bio);
	237	+#ifdef CONFIG_BLK_CGROUP
	238	+ if (bio->bi_blkg) {
	239	+ blkg_put(bio->bi_blkg);
	240	+ bio->bi_blkg = NULL;
	241	+ }
	242	+#endif
	243	+ if (bio_integrity(bio))
	244	+ bio_integrity_free(bio);
249	245
250	246	bio_crypt_free_ctx(bio);
251	247	}
..	..	@@ -331,7 +327,7 @@
331	327	/**
332	328	* bio_chain - chain bio completions
333	329	* @bio: the target bio
334		- * @parent: the @bio's parent bio
	330	+ * @parent: the parent bio of @bio
335	331	*
336	332	* The caller won't have a bi_end_io called when @bio completes - instead,
337	333	* @parent's bi_end_io won't be called until both @parent and @bio have
..	..	@@ -362,7 +358,7 @@
362	358	if (!bio)
363	359	break;
364	360
365		- generic_make_request(bio);
	361	+ submit_bio_noacct(bio);
366	362	}
367	363	}
368	364
..	..	@@ -420,19 +416,19 @@
420	416	* submit the previously allocated bio for IO before attempting to allocate
421	417	* a new one. Failure to do so can cause deadlocks under memory pressure.
422	418	*
423		- * Note that when running under generic_make_request() (i.e. any block
	419	+ * Note that when running under submit_bio_noacct() (i.e. any block
424	420	* driver), bios are not submitted until after you return - see the code in
425		- * generic_make_request() that converts recursion into iteration, to prevent
	421	+ * submit_bio_noacct() that converts recursion into iteration, to prevent
426	422	* stack overflows.
427	423	*
428	424	* This would normally mean allocating multiple bios under
429		- * generic_make_request() would be susceptible to deadlocks, but we have
	425	+ * submit_bio_noacct() would be susceptible to deadlocks, but we have
430	426	* deadlock avoidance code that resubmits any blocked bios from a rescuer
431	427	* thread.
432	428	*
433	429	* However, we do not guarantee forward progress for allocations from other
434	430	* mempools. Doing multiple allocations from the same mempool under
435		- * generic_make_request() should be avoided - instead, use bio_set's front_pad
	431	+ * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
436	432	* for per bio allocations.
437	433	*
438	434	* RETURNS:
..	..	@@ -452,9 +448,7 @@
452	448	if (nr_iovecs > UIO_MAXIOV)
453	449	return NULL;
454	450
455		- p = kmalloc(sizeof(struct bio) +
456		- nr_iovecs * sizeof(struct bio_vec),
457		- gfp_mask);
	451	+ p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
458	452	front_pad = 0;
459	453	inline_vecs = nr_iovecs;
460	454	} else {
..	..	@@ -463,14 +457,14 @@
463	457	nr_iovecs > 0))
464	458	return NULL;
465	459	/*
466		- * generic_make_request() converts recursion to iteration; this
	460	+ * submit_bio_noacct() converts recursion to iteration; this
467	461	* means if we're running beneath it, any bios we allocate and
468	462	* submit will not be submitted (and thus freed) until after we
469	463	* return.
470	464	*
471	465	* This exposes us to a potential deadlock if we allocate
472	466	* multiple bios from the same bio_set() while running
473		- * underneath generic_make_request(). If we were to allocate
	467	+ * underneath submit_bio_noacct(). If we were to allocate
474	468	* multiple bios (say a stacking block driver that was splitting
475	469	* bios), we would deadlock if we exhausted the mempool's
476	470	* reserve.
..	..	@@ -551,6 +545,99 @@
551	545	EXPORT_SYMBOL(zero_fill_bio_iter);
552	546
553	547	/**
	548	+ * bio_truncate - truncate the bio to small size of @new_size
	549	+ * @bio: the bio to be truncated
	550	+ * @new_size: new size for truncating the bio
	551	+ *
	552	+ * Description:
	553	+ * Truncate the bio to new size of @new_size. If bio_op(bio) is
	554	+ * REQ_OP_READ, zero the truncated part. This function should only
	555	+ * be used for handling corner cases, such as bio eod.
	556	+ */
	557	+void bio_truncate(struct bio *bio, unsigned new_size)
	558	+{
	559	+ struct bio_vec bv;
	560	+ struct bvec_iter iter;
	561	+ unsigned int done = 0;
	562	+ bool truncated = false;
	563	+
	564	+ if (new_size >= bio->bi_iter.bi_size)
	565	+ return;
	566	+
	567	+ if (bio_op(bio) != REQ_OP_READ)
	568	+ goto exit;
	569	+
	570	+ bio_for_each_segment(bv, bio, iter) {
	571	+ if (done + bv.bv_len > new_size) {
	572	+ unsigned offset;
	573	+
	574	+ if (!truncated)
	575	+ offset = new_size - done;
	576	+ else
	577	+ offset = 0;
	578	+ zero_user(bv.bv_page, bv.bv_offset + offset,
	579	+ bv.bv_len - offset);
	580	+ truncated = true;
	581	+ }
	582	+ done += bv.bv_len;
	583	+ }
	584	+
	585	+ exit:
	586	+ /*
	587	+ * Don't touch bvec table here and make it really immutable, since
	588	+ * fs bio user has to retrieve all pages via bio_for_each_segment_all
	589	+ * in its .end_bio() callback.
	590	+ *
	591	+ * It is enough to truncate bio by updating .bi_size since we can make
	592	+ * correct bvec with the updated .bi_size for drivers.
	593	+ */
	594	+ bio->bi_iter.bi_size = new_size;
	595	+}
	596	+
	597	+/**
	598	+ * guard_bio_eod - truncate a BIO to fit the block device
	599	+ * @bio: bio to truncate
	600	+ *
	601	+ * This allows us to do IO even on the odd last sectors of a device, even if the
	602	+ * block size is some multiple of the physical sector size.
	603	+ *
	604	+ * We'll just truncate the bio to the size of the device, and clear the end of
	605	+ * the buffer head manually. Truly out-of-range accesses will turn into actual
	606	+ * I/O errors, this only handles the "we need to be able to do I/O at the final
	607	+ * sector" case.
	608	+ */
	609	+void guard_bio_eod(struct bio *bio)
	610	+{
	611	+ sector_t maxsector;
	612	+ struct hd_struct *part;
	613	+
	614	+ rcu_read_lock();
	615	+ part = __disk_get_part(bio->bi_disk, bio->bi_partno);
	616	+ if (part)
	617	+ maxsector = part_nr_sects_read(part);
	618	+ else
	619	+ maxsector = get_capacity(bio->bi_disk);
	620	+ rcu_read_unlock();
	621	+
	622	+ if (!maxsector)
	623	+ return;
	624	+
	625	+ /*
	626	+ * If the whole IO is past the end of the device,
	627	+ * let it through, and the IO layer will turn it into
	628	+ * an EIO.
	629	+ */
	630	+ if (unlikely(bio->bi_iter.bi_sector >= maxsector))
	631	+ return;
	632	+
	633	+ maxsector -= bio->bi_iter.bi_sector;
	634	+ if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
	635	+ return;
	636	+
	637	+ bio_truncate(bio, maxsector << 9);
	638	+}
	639	+
	640	+/**
554	641	* bio_put - release a reference to a bio
555	642	* @bio: bio to release reference to
556	643	*
..	..	@@ -573,15 +660,6 @@
573	660	}
574	661	}
575	662	EXPORT_SYMBOL(bio_put);
576		-
577		-inline int bio_phys_segments(struct request_queue q, struct bio bio)
578		-{
579		- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
580		- blk_recount_segments(q, bio);
581		-
582		- return bio->bi_phys_segments;
583		-}
584		-EXPORT_SYMBOL(bio_phys_segments);
585	663
586	664	/**
587	665	* __bio_clone_fast - clone a bio that shares the original bio's biovec
..	..	@@ -613,7 +691,8 @@
613	691	bio->bi_iter = bio_src->bi_iter;
614	692	bio->bi_io_vec = bio_src->bi_io_vec;
615	693
616		- bio_clone_blkcg_association(bio, bio_src);
	694	+ bio_clone_blkg_association(bio, bio_src);
	695	+ blkcg_bio_issue_init(bio);
617	696	}
618	697	EXPORT_SYMBOL(__bio_clone_fast);
619	698
..	..	@@ -635,133 +714,162 @@
635	714
636	715	__bio_clone_fast(b, bio);
637	716
638		- bio_crypt_clone(b, bio, gfp_mask);
	717	+ if (bio_crypt_clone(b, bio, gfp_mask) < 0)
	718	+ goto err_put;
639	719
640	720	if (bio_integrity(bio) &&
641		- bio_integrity_clone(b, bio, gfp_mask) < 0) {
642		- bio_put(b);
643		- return NULL;
644		- }
	721	+ bio_integrity_clone(b, bio, gfp_mask) < 0)
	722	+ goto err_put;
645	723
646	724	return b;
	725	+
	726	+err_put:
	727	+ bio_put(b);
	728	+ return NULL;
647	729	}
648	730	EXPORT_SYMBOL(bio_clone_fast);
649	731
650		-/**
651		- * bio_add_pc_page - attempt to add page to bio
652		- * @q: the target queue
653		- * @bio: destination bio
654		- * @page: page to add
655		- * @len: vec entry length
656		- * @offset: vec entry offset
657		- *
658		- * Attempt to add a page to the bio_vec maplist. This can fail for a
659		- * number of reasons, such as the bio being full or target block device
660		- * limitations. The target block device must allow bio's up to PAGE_SIZE,
661		- * so it is always possible to add a single page to an empty bio.
662		- *
663		- * This should only be used by REQ_PC bios.
664		- */
665		-int bio_add_pc_page(struct request_queue q, struct bio bio, struct page
666		- *page, unsigned int len, unsigned int offset)
	732	+const char bio_devname(struct bio bio, char *buf)
667	733	{
668		- int retried_segments = 0;
	734	+ return disk_name(bio->bi_disk, bio->bi_partno, buf);
	735	+}
	736	+EXPORT_SYMBOL(bio_devname);
	737	+
	738	+static inline bool page_is_mergeable(const struct bio_vec *bv,
	739	+ struct page *page, unsigned int len, unsigned int off,
	740	+ bool *same_page)
	741	+{
	742	+ size_t bv_end = bv->bv_offset + bv->bv_len;
	743	+ phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
	744	+ phys_addr_t page_addr = page_to_phys(page);
	745	+
	746	+ if (vec_end_addr + 1 != page_addr + off)
	747	+ return false;
	748	+ if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
	749	+ return false;
	750	+
	751	+ *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
	752	+ if (*same_page)
	753	+ return true;
	754	+ return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
	755	+}
	756	+
	757	+/*
	758	+ * Try to merge a page into a segment, while obeying the hardware segment
	759	+ * size limit. This is not for normal read/write bios, but for passthrough
	760	+ * or Zone Append operations that we can't split.
	761	+ */
	762	+static bool bio_try_merge_hw_seg(struct request_queue q, struct bio bio,
	763	+ struct page *page, unsigned len,
	764	+ unsigned offset, bool *same_page)
	765	+{
	766	+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
	767	+ unsigned long mask = queue_segment_boundary(q);
	768	+ phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
	769	+ phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
	770	+
	771	+ if ((addr1 \| mask) != (addr2 \| mask))
	772	+ return false;
	773	+ if (bv->bv_len + len > queue_max_segment_size(q))
	774	+ return false;
	775	+ return __bio_try_merge_page(bio, page, len, offset, same_page);
	776	+}
	777	+
	778	+/**
	779	+ * bio_add_hw_page - attempt to add a page to a bio with hw constraints
	780	+ * @q: the target queue
	781	+ * @bio: destination bio
	782	+ * @page: page to add
	783	+ * @len: vec entry length
	784	+ * @offset: vec entry offset
	785	+ * @max_sectors: maximum number of sectors that can be added
	786	+ * @same_page: return if the segment has been merged inside the same page
	787	+ *
	788	+ * Add a page to a bio while respecting the hardware max_sectors, max_segment
	789	+ * and gap limitations.
	790	+ */
	791	+int bio_add_hw_page(struct request_queue q, struct bio bio,
	792	+ struct page *page, unsigned int len, unsigned int offset,
	793	+ unsigned int max_sectors, bool *same_page)
	794	+{
669	795	struct bio_vec *bvec;
670	796
671		- /*
672		- * cloned bio must not modify vec list
673		- */
674		- if (unlikely(bio_flagged(bio, BIO_CLONED)))
	797	+ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
675	798	return 0;
676	799
677		- if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
	800	+ if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
678	801	return 0;
679	802
680		- /*
681		- * For filesystems with a blocksize smaller than the pagesize
682		- * we will often be called with the same page as last time and
683		- * a consecutive offset. Optimize this special case.
684		- */
685	803	if (bio->bi_vcnt > 0) {
686		- struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
687		-
688		- if (page == prev->bv_page &&
689		- offset == prev->bv_offset + prev->bv_len) {
690		- prev->bv_len += len;
691		- bio->bi_iter.bi_size += len;
692		- goto done;
693		- }
	804	+ if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
	805	+ return len;
694	806
695	807	/*
696		- * If the queue doesn't support SG gaps and adding this
697		- * offset would create a gap, disallow it.
	808	+ * If the queue doesn't support SG gaps and adding this segment
	809	+ * would create a gap, disallow it.
698	810	*/
699		- if (bvec_gap_to_prev(q, prev, offset))
	811	+ bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
	812	+ if (bvec_gap_to_prev(q, bvec, offset))
700	813	return 0;
701	814	}
702	815
703		- if (bio_full(bio))
	816	+ if (bio_full(bio, len))
704	817	return 0;
705	818
706		- /*
707		- * setup the new entry, we might clear it again later if we
708		- * cannot add the page
709		- */
	819	+ if (bio->bi_vcnt >= queue_max_segments(q))
	820	+ return 0;
	821	+
710	822	bvec = &bio->bi_io_vec[bio->bi_vcnt];
711	823	bvec->bv_page = page;
712	824	bvec->bv_len = len;
713	825	bvec->bv_offset = offset;
714	826	bio->bi_vcnt++;
715		- bio->bi_phys_segments++;
716	827	bio->bi_iter.bi_size += len;
717		-
718		- /*
719		- * Perform a recount if the number of segments is greater
720		- * than queue_max_segments(q).
721		- */
722		-
723		- while (bio->bi_phys_segments > queue_max_segments(q)) {
724		-
725		- if (retried_segments)
726		- goto failed;
727		-
728		- retried_segments = 1;
729		- blk_recount_segments(q, bio);
730		- }
731		-
732		- /* If we may be able to merge these biovecs, force a recount */
733		- if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
734		- bio_clear_flag(bio, BIO_SEG_VALID);
735		-
736		- done:
737	828	return len;
	829	+}
738	830
739		- failed:
740		- bvec->bv_page = NULL;
741		- bvec->bv_len = 0;
742		- bvec->bv_offset = 0;
743		- bio->bi_vcnt--;
744		- bio->bi_iter.bi_size -= len;
745		- blk_recount_segments(q, bio);
746		- return 0;
	831	+/**
	832	+ * bio_add_pc_page - attempt to add page to passthrough bio
	833	+ * @q: the target queue
	834	+ * @bio: destination bio
	835	+ * @page: page to add
	836	+ * @len: vec entry length
	837	+ * @offset: vec entry offset
	838	+ *
	839	+ * Attempt to add a page to the bio_vec maplist. This can fail for a
	840	+ * number of reasons, such as the bio being full or target block device
	841	+ * limitations. The target block device must allow bio's up to PAGE_SIZE,
	842	+ * so it is always possible to add a single page to an empty bio.
	843	+ *
	844	+ * This should only be used by passthrough bios.
	845	+ */
	846	+int bio_add_pc_page(struct request_queue q, struct bio bio,
	847	+ struct page *page, unsigned int len, unsigned int offset)
	848	+{
	849	+ bool same_page = false;
	850	+ return bio_add_hw_page(q, bio, page, len, offset,
	851	+ queue_max_hw_sectors(q), &same_page);
747	852	}
748	853	EXPORT_SYMBOL(bio_add_pc_page);
749	854
750	855	/**
751	856	* __bio_try_merge_page - try appending data to an existing bvec.
752	857	* @bio: destination bio
753		- * @page: page to add
	858	+ * @page: start page to add
754	859	* @len: length of the data to add
755		- * @off: offset of the data in @page
	860	+ * @off: offset of the data relative to @page
	861	+ * @same_page: return if the segment has been merged inside the same page
756	862	*
757	863	* Try to add the data at @page + @off to the last bvec of @bio. This is a
758		- * a useful optimisation for file systems with a block size smaller than the
	864	+ * useful optimisation for file systems with a block size smaller than the
759	865	* page size.
	866	+ *
	867	+ * Warn if (@len, @off) crosses pages in case that @same_page is true.
760	868	*
761	869	* Return %true on success or %false on failure.
762	870	*/
763	871	bool __bio_try_merge_page(struct bio bio, struct page page,
764		- unsigned int len, unsigned int off)
	872	+ unsigned int len, unsigned int off, bool *same_page)
765	873	{
766	874	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
767	875	return false;
..	..	@@ -769,7 +877,11 @@
769	877	if (bio->bi_vcnt > 0) {
770	878	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
771	879
772		- if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
	880	+ if (page_is_mergeable(bv, page, len, off, same_page)) {
	881	+ if (bio->bi_iter.bi_size > UINT_MAX - len) {
	882	+ *same_page = false;
	883	+ return false;
	884	+ }
773	885	bv->bv_len += len;
774	886	bio->bi_iter.bi_size += len;
775	887	return true;
..	..	@@ -780,11 +892,11 @@
780	892	EXPORT_SYMBOL_GPL(__bio_try_merge_page);
781	893
782	894	/**
783		- * __bio_add_page - add page to a bio in a new segment
	895	+ * __bio_add_page - add page(s) to a bio in a new segment
784	896	* @bio: destination bio
785		- * @page: page to add
786		- * @len: length of the data to add
787		- * @off: offset of the data in @page
	897	+ * @page: start page to add
	898	+ * @len: length of the data to add, may cross pages
	899	+ * @off: offset of the data relative to @page, may cross pages
788	900	*
789	901	* Add the data at @page + @off to @bio as a new bvec. The caller must ensure
790	902	* that @bio has space for another bvec.
..	..	@@ -795,7 +907,7 @@
795	907	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
796	908
797	909	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
798		- WARN_ON_ONCE(bio_full(bio));
	910	+ WARN_ON_ONCE(bio_full(bio, len));
799	911
800	912	bv->bv_page = page;
801	913	bv->bv_offset = off;
..	..	@@ -810,26 +922,72 @@
810	922	EXPORT_SYMBOL_GPL(__bio_add_page);
811	923
812	924	/**
813		- * bio_add_page - attempt to add page to bio
	925	+ * bio_add_page - attempt to add page(s) to bio
814	926	* @bio: destination bio
815		- * @page: page to add
816		- * @len: vec entry length
817		- * @offset: vec entry offset
	927	+ * @page: start page to add
	928	+ * @len: vec entry length, may cross pages
	929	+ * @offset: vec entry offset relative to @page, may cross pages
818	930	*
819		- * Attempt to add a page to the bio_vec maplist. This will only fail
	931	+ * Attempt to add page(s) to the bio_vec maplist. This will only fail
820	932	* if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
821	933	*/
822	934	int bio_add_page(struct bio bio, struct page page,
823	935	unsigned int len, unsigned int offset)
824	936	{
825		- if (!__bio_try_merge_page(bio, page, len, offset)) {
826		- if (bio_full(bio))
	937	+ bool same_page = false;
	938	+
	939	+ if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
	940	+ if (bio_full(bio, len))
827	941	return 0;
828	942	__bio_add_page(bio, page, len, offset);
829	943	}
830	944	return len;
831	945	}
832	946	EXPORT_SYMBOL(bio_add_page);
	947	+
	948	+void bio_release_pages(struct bio *bio, bool mark_dirty)
	949	+{
	950	+ struct bvec_iter_all iter_all;
	951	+ struct bio_vec *bvec;
	952	+
	953	+ if (bio_flagged(bio, BIO_NO_PAGE_REF))
	954	+ return;
	955	+
	956	+ bio_for_each_segment_all(bvec, bio, iter_all) {
	957	+ if (mark_dirty && !PageCompound(bvec->bv_page))
	958	+ set_page_dirty_lock(bvec->bv_page);
	959	+ put_page(bvec->bv_page);
	960	+ }
	961	+}
	962	+EXPORT_SYMBOL_GPL(bio_release_pages);
	963	+
	964	+static int __bio_iov_bvec_add_pages(struct bio bio, struct iov_iter iter)
	965	+{
	966	+ const struct bio_vec *bv = iter->bvec;
	967	+ unsigned int len;
	968	+ size_t size;
	969	+
	970	+ if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
	971	+ return -EINVAL;
	972	+
	973	+ len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
	974	+ size = bio_add_page(bio, bv->bv_page, len,
	975	+ bv->bv_offset + iter->iov_offset);
	976	+ if (unlikely(size != len))
	977	+ return -EINVAL;
	978	+ iov_iter_advance(iter, size);
	979	+ return 0;
	980	+}
	981	+
	982	+static void bio_put_pages(struct page **pages, size_t size, size_t off)
	983	+{
	984	+ size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
	985	+
	986	+ for (i = 0; i < nr; i++)
	987	+ put_page(pages[i]);
	988	+}
	989	+
	990	+#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
833	991
834	992	/**
835	993	* __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
..	..	@@ -839,71 +997,142 @@
839	997	* Pins pages from *iter and appends them to @bio's bvec array. The
840	998	* pages will have to be released using put_page() when done.
841	999	* For multi-segment *iter, this function only adds pages from the
842		- * the next non-empty segment of the iov iterator.
	1000	+ * next non-empty segment of the iov iterator.
843	1001	*/
844	1002	static int __bio_iov_iter_get_pages(struct bio bio, struct iov_iter iter)
845	1003	{
846		- unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
	1004	+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
	1005	+ unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
847	1006	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
848	1007	struct page pages = (struct page )bv;
	1008	+ bool same_page = false;
	1009	+ ssize_t size, left;
	1010	+ unsigned len, i;
849	1011	size_t offset;
850		- ssize_t size;
	1012	+
	1013	+ /*
	1014	+ * Move page array up in the allocated memory for the bio vecs as far as
	1015	+ * possible so that we can start filling biovecs from the beginning
	1016	+ * without overwriting the temporary page array.
	1017	+ */
	1018	+ BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
	1019	+ pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
851	1020
852	1021	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
853	1022	if (unlikely(size <= 0))
854	1023	return size ? size : -EFAULT;
855		- idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
856	1024
857		- /*
858		- * Deep magic below: We need to walk the pinned pages backwards
859		- * because we are abusing the space allocated for the bio_vecs
860		- * for the page array. Because the bio_vecs are larger than the
861		- * page pointers by definition this will always work. But it also
862		- * means we can't use bio_add_page, so any changes to it's semantics
863		- * need to be reflected here as well.
864		- */
865		- bio->bi_iter.bi_size += size;
866		- bio->bi_vcnt += nr_pages;
	1025	+ for (left = size, i = 0; left > 0; left -= len, i++) {
	1026	+ struct page *page = pages[i];
867	1027
868		- while (idx--) {
869		- bv[idx].bv_page = pages[idx];
870		- bv[idx].bv_len = PAGE_SIZE;
871		- bv[idx].bv_offset = 0;
	1028	+ len = min_t(size_t, PAGE_SIZE - offset, left);
	1029	+
	1030	+ if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
	1031	+ if (same_page)
	1032	+ put_page(page);
	1033	+ } else {
	1034	+ if (WARN_ON_ONCE(bio_full(bio, len))) {
	1035	+ bio_put_pages(pages + i, left, offset);
	1036	+ return -EINVAL;
	1037	+ }
	1038	+ __bio_add_page(bio, page, len, offset);
	1039	+ }
	1040	+ offset = 0;
872	1041	}
873		-
874		- bv[0].bv_offset += offset;
875		- bv[0].bv_len -= offset;
876		- bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
877	1042
878	1043	iov_iter_advance(iter, size);
879	1044	return 0;
880	1045	}
881	1046
	1047	+static int __bio_iov_append_get_pages(struct bio bio, struct iov_iter iter)
	1048	+{
	1049	+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
	1050	+ unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
	1051	+ struct request_queue *q = bio->bi_disk->queue;
	1052	+ unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
	1053	+ struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
	1054	+ struct page pages = (struct page )bv;
	1055	+ ssize_t size, left;
	1056	+ unsigned len, i;
	1057	+ size_t offset;
	1058	+ int ret = 0;
	1059	+
	1060	+ /*
	1061	+ * Move page array up in the allocated memory for the bio vecs as far as
	1062	+ * possible so that we can start filling biovecs from the beginning
	1063	+ * without overwriting the temporary page array.
	1064	+ */
	1065	+ BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
	1066	+ pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
	1067	+
	1068	+ size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
	1069	+ if (unlikely(size <= 0))
	1070	+ return size ? size : -EFAULT;
	1071	+
	1072	+ for (left = size, i = 0; left > 0; left -= len, i++) {
	1073	+ struct page *page = pages[i];
	1074	+ bool same_page = false;
	1075	+
	1076	+ len = min_t(size_t, PAGE_SIZE - offset, left);
	1077	+ if (bio_add_hw_page(q, bio, page, len, offset,
	1078	+ max_append_sectors, &same_page) != len) {
	1079	+ bio_put_pages(pages + i, left, offset);
	1080	+ ret = -EINVAL;
	1081	+ break;
	1082	+ }
	1083	+ if (same_page)
	1084	+ put_page(page);
	1085	+ offset = 0;
	1086	+ }
	1087	+
	1088	+ iov_iter_advance(iter, size - left);
	1089	+ return ret;
	1090	+}
	1091	+
882	1092	/**
883		- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
	1093	+ * bio_iov_iter_get_pages - add user or kernel pages to a bio
884	1094	* @bio: bio to add pages to
885		- * @iter: iov iterator describing the region to be mapped
	1095	+ * @iter: iov iterator describing the region to be added
886	1096	*
887		- * Pins pages from *iter and appends them to @bio's bvec array. The
888		- * pages will have to be released using put_page() when done.
	1097	+ * This takes either an iterator pointing to user memory, or one pointing to
	1098	+ * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
	1099	+ * map them into the kernel. On IO completion, the caller should put those
	1100	+ * pages. If we're adding kernel pages, and the caller told us it's safe to
	1101	+ * do so, we just have to add the pages to the bio directly. We don't grab an
	1102	+ * extra reference to those pages (the user should already have that), and we
	1103	+ * don't put the page on IO completion. The caller needs to check if the bio is
	1104	+ * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
	1105	+ * released.
	1106	+ *
889	1107	* The function tries, but does not guarantee, to pin as many pages as
890		- * fit into the bio, or are requested in *iter, whatever is smaller.
891		- * If MM encounters an error pinning the requested pages, it stops.
892		- * Error is returned only if 0 pages could be pinned.
	1108	+ * fit into the bio, or are requested in @iter, whatever is smaller. If
	1109	+ * MM encounters an error pinning the requested pages, it stops. Error
	1110	+ * is returned only if 0 pages could be pinned.
893	1111	*/
894	1112	int bio_iov_iter_get_pages(struct bio bio, struct iov_iter iter)
895	1113	{
896		- unsigned short orig_vcnt = bio->bi_vcnt;
	1114	+ const bool is_bvec = iov_iter_is_bvec(iter);
	1115	+ int ret;
	1116	+
	1117	+ if (WARN_ON_ONCE(bio->bi_vcnt))
	1118	+ return -EINVAL;
897	1119
898	1120	do {
899		- int ret = __bio_iov_iter_get_pages(bio, iter);
	1121	+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
	1122	+ if (WARN_ON_ONCE(is_bvec))
	1123	+ return -EINVAL;
	1124	+ ret = __bio_iov_append_get_pages(bio, iter);
	1125	+ } else {
	1126	+ if (is_bvec)
	1127	+ ret = __bio_iov_bvec_add_pages(bio, iter);
	1128	+ else
	1129	+ ret = __bio_iov_iter_get_pages(bio, iter);
	1130	+ }
	1131	+ } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
900	1132
901		- if (unlikely(ret))
902		- return bio->bi_vcnt > orig_vcnt ? 0 : ret;
903		-
904		- } while (iov_iter_count(iter) && !bio_full(bio));
905		-
906		- return 0;
	1133	+ if (is_bvec)
	1134	+ bio_set_flag(bio, BIO_NO_PAGE_REF);
	1135	+ return bio->bi_vcnt ? 0 : ret;
907	1136	}
908	1137	EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
909	1138
..	..	@@ -926,12 +1155,21 @@
926	1155	int submit_bio_wait(struct bio *bio)
927	1156	{
928	1157	DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
	1158	+ unsigned long hang_check;
929	1159
930	1160	bio->bi_private = &done;
931	1161	bio->bi_end_io = submit_bio_wait_endio;
932	1162	bio->bi_opf \|= REQ_SYNC;
933	1163	submit_bio(bio);
934		- wait_for_completion_io(&done);
	1164	+
	1165	+ /* Prevent hang_check timer from firing at us during very long I/O */
	1166	+ hang_check = sysctl_hung_task_timeout_secs;
	1167	+ if (hang_check)
	1168	+ while (!wait_for_completion_io_timeout(&done,
	1169	+ hang_check * (HZ/2)))
	1170	+ ;
	1171	+ else
	1172	+ wait_for_completion_io(&done);
935	1173
936	1174	return blk_status_to_errno(bio->bi_status);
937	1175	}
..	..	@@ -1043,523 +1281,15 @@
1043	1281	}
1044	1282	EXPORT_SYMBOL(bio_list_copy_data);
1045	1283
1046		-struct bio_map_data {
1047		- int is_our_pages;
1048		- struct iov_iter iter;
1049		- struct iovec iov[];
1050		-};
1051		-
1052		-static struct bio_map_data bio_alloc_map_data(struct iov_iter data,
1053		- gfp_t gfp_mask)
1054		-{
1055		- struct bio_map_data *bmd;
1056		- if (data->nr_segs > UIO_MAXIOV)
1057		- return NULL;
1058		-
1059		- bmd = kmalloc(sizeof(struct bio_map_data) +
1060		- sizeof(struct iovec) * data->nr_segs, gfp_mask);
1061		- if (!bmd)
1062		- return NULL;
1063		- memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
1064		- bmd->iter = *data;
1065		- bmd->iter.iov = bmd->iov;
1066		- return bmd;
1067		-}
1068		-
1069		-/**
1070		- * bio_copy_from_iter - copy all pages from iov_iter to bio
1071		- * @bio: The &struct bio which describes the I/O as destination
1072		- * @iter: iov_iter as source
1073		- *
1074		- * Copy all pages from iov_iter to bio.
1075		- * Returns 0 on success, or error on failure.
1076		- */
1077		-static int bio_copy_from_iter(struct bio bio, struct iov_iter iter)
1078		-{
1079		- int i;
1080		- struct bio_vec *bvec;
1081		-
1082		- bio_for_each_segment_all(bvec, bio, i) {
1083		- ssize_t ret;
1084		-
1085		- ret = copy_page_from_iter(bvec->bv_page,
1086		- bvec->bv_offset,
1087		- bvec->bv_len,
1088		- iter);
1089		-
1090		- if (!iov_iter_count(iter))
1091		- break;
1092		-
1093		- if (ret < bvec->bv_len)
1094		- return -EFAULT;
1095		- }
1096		-
1097		- return 0;
1098		-}
1099		-
1100		-/**
1101		- * bio_copy_to_iter - copy all pages from bio to iov_iter
1102		- * @bio: The &struct bio which describes the I/O as source
1103		- * @iter: iov_iter as destination
1104		- *
1105		- * Copy all pages from bio to iov_iter.
1106		- * Returns 0 on success, or error on failure.
1107		- */
1108		-static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
1109		-{
1110		- int i;
1111		- struct bio_vec *bvec;
1112		-
1113		- bio_for_each_segment_all(bvec, bio, i) {
1114		- ssize_t ret;
1115		-
1116		- ret = copy_page_to_iter(bvec->bv_page,
1117		- bvec->bv_offset,
1118		- bvec->bv_len,
1119		- &iter);
1120		-
1121		- if (!iov_iter_count(&iter))
1122		- break;
1123		-
1124		- if (ret < bvec->bv_len)
1125		- return -EFAULT;
1126		- }
1127		-
1128		- return 0;
1129		-}
1130		-
1131	1284	void bio_free_pages(struct bio *bio)
1132	1285	{
1133	1286	struct bio_vec *bvec;
1134		- int i;
	1287	+ struct bvec_iter_all iter_all;
1135	1288
1136		- bio_for_each_segment_all(bvec, bio, i)
	1289	+ bio_for_each_segment_all(bvec, bio, iter_all)
1137	1290	__free_page(bvec->bv_page);
1138	1291	}
1139	1292	EXPORT_SYMBOL(bio_free_pages);
1140		-
1141		-/**
1142		- * bio_uncopy_user - finish previously mapped bio
1143		- * @bio: bio being terminated
1144		- *
1145		- * Free pages allocated from bio_copy_user_iov() and write back data
1146		- * to user space in case of a read.
1147		- */
1148		-int bio_uncopy_user(struct bio *bio)
1149		-{
1150		- struct bio_map_data *bmd = bio->bi_private;
1151		- int ret = 0;
1152		-
1153		- if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
1154		- /*
1155		- * if we're in a workqueue, the request is orphaned, so
1156		- * don't copy into a random user address space, just free
1157		- * and return -EINTR so user space doesn't expect any data.
1158		- */
1159		- if (!current->mm)
1160		- ret = -EINTR;
1161		- else if (bio_data_dir(bio) == READ)
1162		- ret = bio_copy_to_iter(bio, bmd->iter);
1163		- if (bmd->is_our_pages)
1164		- bio_free_pages(bio);
1165		- }
1166		- kfree(bmd);
1167		- bio_put(bio);
1168		- return ret;
1169		-}
1170		-
1171		-/**
1172		- * bio_copy_user_iov - copy user data to bio
1173		- * @q: destination block queue
1174		- * @map_data: pointer to the rq_map_data holding pages (if necessary)
1175		- * @iter: iovec iterator
1176		- * @gfp_mask: memory allocation flags
1177		- *
1178		- * Prepares and returns a bio for indirect user io, bouncing data
1179		- * to/from kernel pages as necessary. Must be paired with
1180		- * call bio_uncopy_user() on io completion.
1181		- */
1182		-struct bio bio_copy_user_iov(struct request_queue q,
1183		- struct rq_map_data *map_data,
1184		- struct iov_iter *iter,
1185		- gfp_t gfp_mask)
1186		-{
1187		- struct bio_map_data *bmd;
1188		- struct page *page;
1189		- struct bio *bio;
1190		- int i = 0, ret;
1191		- int nr_pages;
1192		- unsigned int len = iter->count;
1193		- unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
1194		-
1195		- bmd = bio_alloc_map_data(iter, gfp_mask);
1196		- if (!bmd)
1197		- return ERR_PTR(-ENOMEM);
1198		-
1199		- /*
1200		- * We need to do a deep copy of the iov_iter including the iovecs.
1201		- * The caller provided iov might point to an on-stack or otherwise
1202		- * shortlived one.
1203		- */
1204		- bmd->is_our_pages = map_data ? 0 : 1;
1205		-
1206		- nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
1207		- if (nr_pages > BIO_MAX_PAGES)
1208		- nr_pages = BIO_MAX_PAGES;
1209		-
1210		- ret = -ENOMEM;
1211		- bio = bio_kmalloc(gfp_mask, nr_pages);
1212		- if (!bio)
1213		- goto out_bmd;
1214		-
1215		- ret = 0;
1216		-
1217		- if (map_data) {
1218		- nr_pages = 1 << map_data->page_order;
1219		- i = map_data->offset / PAGE_SIZE;
1220		- }
1221		- while (len) {
1222		- unsigned int bytes = PAGE_SIZE;
1223		-
1224		- bytes -= offset;
1225		-
1226		- if (bytes > len)
1227		- bytes = len;
1228		-
1229		- if (map_data) {
1230		- if (i == map_data->nr_entries * nr_pages) {
1231		- ret = -ENOMEM;
1232		- break;
1233		- }
1234		-
1235		- page = map_data->pages[i / nr_pages];
1236		- page += (i % nr_pages);
1237		-
1238		- i++;
1239		- } else {
1240		- page = alloc_page(q->bounce_gfp \| gfp_mask);
1241		- if (!page) {
1242		- ret = -ENOMEM;
1243		- break;
1244		- }
1245		- }
1246		-
1247		- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
1248		- if (!map_data)
1249		- __free_page(page);
1250		- break;
1251		- }
1252		-
1253		- len -= bytes;
1254		- offset = 0;
1255		- }
1256		-
1257		- if (ret)
1258		- goto cleanup;
1259		-
1260		- if (map_data)
1261		- map_data->offset += bio->bi_iter.bi_size;
1262		-
1263		- /*
1264		- * success
1265		- */
1266		- if (((iter->type & WRITE) && (!map_data \|\| !map_data->null_mapped)) \|\|
1267		- (map_data && map_data->from_user)) {
1268		- ret = bio_copy_from_iter(bio, iter);
1269		- if (ret)
1270		- goto cleanup;
1271		- } else {
1272		- if (bmd->is_our_pages)
1273		- zero_fill_bio(bio);
1274		- iov_iter_advance(iter, bio->bi_iter.bi_size);
1275		- }
1276		-
1277		- bio->bi_private = bmd;
1278		- if (map_data && map_data->null_mapped)
1279		- bio_set_flag(bio, BIO_NULL_MAPPED);
1280		- return bio;
1281		-cleanup:
1282		- if (!map_data)
1283		- bio_free_pages(bio);
1284		- bio_put(bio);
1285		-out_bmd:
1286		- kfree(bmd);
1287		- return ERR_PTR(ret);
1288		-}
1289		-
1290		-/**
1291		- * bio_map_user_iov - map user iovec into bio
1292		- * @q: the struct request_queue for the bio
1293		- * @iter: iovec iterator
1294		- * @gfp_mask: memory allocation flags
1295		- *
1296		- * Map the user space address into a bio suitable for io to a block
1297		- * device. Returns an error pointer in case of error.
1298		- */
1299		-struct bio bio_map_user_iov(struct request_queue q,
1300		- struct iov_iter *iter,
1301		- gfp_t gfp_mask)
1302		-{
1303		- int j;
1304		- struct bio *bio;
1305		- int ret;
1306		- struct bio_vec *bvec;
1307		-
1308		- if (!iov_iter_count(iter))
1309		- return ERR_PTR(-EINVAL);
1310		-
1311		- bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
1312		- if (!bio)
1313		- return ERR_PTR(-ENOMEM);
1314		-
1315		- while (iov_iter_count(iter)) {
1316		- struct page **pages;
1317		- ssize_t bytes;
1318		- size_t offs, added = 0;
1319		- int npages;
1320		-
1321		- bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
1322		- if (unlikely(bytes <= 0)) {
1323		- ret = bytes ? bytes : -EFAULT;
1324		- goto out_unmap;
1325		- }
1326		-
1327		- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
1328		-
1329		- if (unlikely(offs & queue_dma_alignment(q))) {
1330		- ret = -EINVAL;
1331		- j = 0;
1332		- } else {
1333		- for (j = 0; j < npages; j++) {
1334		- struct page *page = pages[j];
1335		- unsigned int n = PAGE_SIZE - offs;
1336		- unsigned short prev_bi_vcnt = bio->bi_vcnt;
1337		-
1338		- if (n > bytes)
1339		- n = bytes;
1340		-
1341		- if (!bio_add_pc_page(q, bio, page, n, offs))
1342		- break;
1343		-
1344		- /*
1345		- * check if vector was merged with previous
1346		- * drop page reference if needed
1347		- */
1348		- if (bio->bi_vcnt == prev_bi_vcnt)
1349		- put_page(page);
1350		-
1351		- added += n;
1352		- bytes -= n;
1353		- offs = 0;
1354		- }
1355		- iov_iter_advance(iter, added);
1356		- }
1357		- /*
1358		- * release the pages we didn't map into the bio, if any
1359		- */
1360		- while (j < npages)
1361		- put_page(pages[j++]);
1362		- kvfree(pages);
1363		- /* couldn't stuff something into bio? */
1364		- if (bytes)
1365		- break;
1366		- }
1367		-
1368		- bio_set_flag(bio, BIO_USER_MAPPED);
1369		-
1370		- /*
1371		- * subtle -- if bio_map_user_iov() ended up bouncing a bio,
1372		- * it would normally disappear when its bi_end_io is run.
1373		- * however, we need it for the unmap, so grab an extra
1374		- * reference to it
1375		- */
1376		- bio_get(bio);
1377		- return bio;
1378		-
1379		- out_unmap:
1380		- bio_for_each_segment_all(bvec, bio, j) {
1381		- put_page(bvec->bv_page);
1382		- }
1383		- bio_put(bio);
1384		- return ERR_PTR(ret);
1385		-}
1386		-
1387		-static void __bio_unmap_user(struct bio *bio)
1388		-{
1389		- struct bio_vec *bvec;
1390		- int i;
1391		-
1392		- /*
1393		- * make sure we dirty pages we wrote to
1394		- */
1395		- bio_for_each_segment_all(bvec, bio, i) {
1396		- if (bio_data_dir(bio) == READ)
1397		- set_page_dirty_lock(bvec->bv_page);
1398		-
1399		- put_page(bvec->bv_page);
1400		- }
1401		-
1402		- bio_put(bio);
1403		-}
1404		-
1405		-/**
1406		- * bio_unmap_user - unmap a bio
1407		- * @bio: the bio being unmapped
1408		- *
1409		- * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
1410		- * process context.
1411		- *
1412		- * bio_unmap_user() may sleep.
1413		- */
1414		-void bio_unmap_user(struct bio *bio)
1415		-{
1416		- __bio_unmap_user(bio);
1417		- bio_put(bio);
1418		-}
1419		-
1420		-static void bio_map_kern_endio(struct bio *bio)
1421		-{
1422		- bio_put(bio);
1423		-}
1424		-
1425		-/**
1426		- * bio_map_kern - map kernel address into bio
1427		- * @q: the struct request_queue for the bio
1428		- * @data: pointer to buffer to map
1429		- * @len: length in bytes
1430		- * @gfp_mask: allocation flags for bio allocation
1431		- *
1432		- * Map the kernel address into a bio suitable for io to a block
1433		- * device. Returns an error pointer in case of error.
1434		- */
1435		-struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
1436		- gfp_t gfp_mask)
1437		-{
1438		- unsigned long kaddr = (unsigned long)data;
1439		- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1440		- unsigned long start = kaddr >> PAGE_SHIFT;
1441		- const int nr_pages = end - start;
1442		- int offset, i;
1443		- struct bio *bio;
1444		-
1445		- bio = bio_kmalloc(gfp_mask, nr_pages);
1446		- if (!bio)
1447		- return ERR_PTR(-ENOMEM);
1448		-
1449		- offset = offset_in_page(kaddr);
1450		- for (i = 0; i < nr_pages; i++) {
1451		- unsigned int bytes = PAGE_SIZE - offset;
1452		-
1453		- if (len <= 0)
1454		- break;
1455		-
1456		- if (bytes > len)
1457		- bytes = len;
1458		-
1459		- if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1460		- offset) < bytes) {
1461		- /* we don't support partial mappings */
1462		- bio_put(bio);
1463		- return ERR_PTR(-EINVAL);
1464		- }
1465		-
1466		- data += bytes;
1467		- len -= bytes;
1468		- offset = 0;
1469		- }
1470		-
1471		- bio->bi_end_io = bio_map_kern_endio;
1472		- return bio;
1473		-}
1474		-EXPORT_SYMBOL(bio_map_kern);
1475		-
1476		-static void bio_copy_kern_endio(struct bio *bio)
1477		-{
1478		- bio_free_pages(bio);
1479		- bio_put(bio);
1480		-}
1481		-
1482		-static void bio_copy_kern_endio_read(struct bio *bio)
1483		-{
1484		- char *p = bio->bi_private;
1485		- struct bio_vec *bvec;
1486		- int i;
1487		-
1488		- bio_for_each_segment_all(bvec, bio, i) {
1489		- memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
1490		- p += bvec->bv_len;
1491		- }
1492		-
1493		- bio_copy_kern_endio(bio);
1494		-}
1495		-
1496		-/**
1497		- * bio_copy_kern - copy kernel address into bio
1498		- * @q: the struct request_queue for the bio
1499		- * @data: pointer to buffer to copy
1500		- * @len: length in bytes
1501		- * @gfp_mask: allocation flags for bio and page allocation
1502		- * @reading: data direction is READ
1503		- *
1504		- * copy the kernel address into a bio suitable for io to a block
1505		- * device. Returns an error pointer in case of error.
1506		- */
1507		-struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
1508		- gfp_t gfp_mask, int reading)
1509		-{
1510		- unsigned long kaddr = (unsigned long)data;
1511		- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1512		- unsigned long start = kaddr >> PAGE_SHIFT;
1513		- struct bio *bio;
1514		- void *p = data;
1515		- int nr_pages = 0;
1516		-
1517		- /*
1518		- * Overflow, abort
1519		- */
1520		- if (end < start)
1521		- return ERR_PTR(-EINVAL);
1522		-
1523		- nr_pages = end - start;
1524		- bio = bio_kmalloc(gfp_mask, nr_pages);
1525		- if (!bio)
1526		- return ERR_PTR(-ENOMEM);
1527		-
1528		- while (len) {
1529		- struct page *page;
1530		- unsigned int bytes = PAGE_SIZE;
1531		-
1532		- if (bytes > len)
1533		- bytes = len;
1534		-
1535		- page = alloc_page(q->bounce_gfp \| gfp_mask);
1536		- if (!page)
1537		- goto cleanup;
1538		-
1539		- if (!reading)
1540		- memcpy(page_address(page), p, bytes);
1541		-
1542		- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
1543		- break;
1544		-
1545		- len -= bytes;
1546		- p += bytes;
1547		- }
1548		-
1549		- if (reading) {
1550		- bio->bi_end_io = bio_copy_kern_endio_read;
1551		- bio->bi_private = data;
1552		- } else {
1553		- bio->bi_end_io = bio_copy_kern_endio;
1554		- }
1555		-
1556		- return bio;
1557		-
1558		-cleanup:
1559		- bio_free_pages(bio);
1560		- bio_put(bio);
1561		- return ERR_PTR(-ENOMEM);
1562		-}
1563	1293
1564	1294	/*
1565	1295	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
..	..	@@ -1593,22 +1323,12 @@
1593	1323	void bio_set_pages_dirty(struct bio *bio)
1594	1324	{
1595	1325	struct bio_vec *bvec;
1596		- int i;
	1326	+ struct bvec_iter_all iter_all;
1597	1327
1598		- bio_for_each_segment_all(bvec, bio, i) {
	1328	+ bio_for_each_segment_all(bvec, bio, iter_all) {
1599	1329	if (!PageCompound(bvec->bv_page))
1600	1330	set_page_dirty_lock(bvec->bv_page);
1601	1331	}
1602		-}
1603		-EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
1604		-
1605		-static void bio_release_pages(struct bio *bio)
1606		-{
1607		- struct bio_vec *bvec;
1608		- int i;
1609		-
1610		- bio_for_each_segment_all(bvec, bio, i)
1611		- put_page(bvec->bv_page);
1612	1332	}
1613	1333
1614	1334	/*
..	..	@@ -1643,8 +1363,7 @@
1643	1363	while ((bio = next) != NULL) {
1644	1364	next = bio->bi_private;
1645	1365
1646		- bio_set_pages_dirty(bio);
1647		- bio_release_pages(bio);
	1366	+ bio_release_pages(bio, true);
1648	1367	bio_put(bio);
1649	1368	}
1650	1369	}
..	..	@@ -1653,14 +1372,14 @@
1653	1372	{
1654	1373	struct bio_vec *bvec;
1655	1374	unsigned long flags;
1656		- int i;
	1375	+ struct bvec_iter_all iter_all;
1657	1376
1658		- bio_for_each_segment_all(bvec, bio, i) {
	1377	+ bio_for_each_segment_all(bvec, bio, iter_all) {
1659	1378	if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
1660	1379	goto defer;
1661	1380	}
1662	1381
1663		- bio_release_pages(bio);
	1382	+ bio_release_pages(bio, false);
1664	1383	bio_put(bio);
1665	1384	return;
1666	1385	defer:
..	..	@@ -1670,49 +1389,6 @@
1670	1389	spin_unlock_irqrestore(&bio_dirty_lock, flags);
1671	1390	schedule_work(&bio_dirty_work);
1672	1391	}
1673		-EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
1674		-
1675		-void generic_start_io_acct(struct request_queue *q, int op,
1676		- unsigned long sectors, struct hd_struct *part)
1677		-{
1678		- const int sgrp = op_stat_group(op);
1679		- int cpu = part_stat_lock();
1680		-
1681		- part_round_stats(q, cpu, part);
1682		- part_stat_inc(cpu, part, ios[sgrp]);
1683		- part_stat_add(cpu, part, sectors[sgrp], sectors);
1684		- part_inc_in_flight(q, part, op_is_write(op));
1685		-
1686		- part_stat_unlock();
1687		-}
1688		-EXPORT_SYMBOL(generic_start_io_acct);
1689		-
1690		-void generic_end_io_acct(struct request_queue *q, int req_op,
1691		- struct hd_struct *part, unsigned long start_time)
1692		-{
1693		- unsigned long duration = jiffies - start_time;
1694		- const int sgrp = op_stat_group(req_op);
1695		- int cpu = part_stat_lock();
1696		-
1697		- part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
1698		- part_round_stats(q, cpu, part);
1699		- part_dec_in_flight(q, part, op_is_write(req_op));
1700		-
1701		- part_stat_unlock();
1702		-}
1703		-EXPORT_SYMBOL(generic_end_io_acct);
1704		-
1705		-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1706		-void bio_flush_dcache_pages(struct bio *bi)
1707		-{
1708		- struct bio_vec bvec;
1709		- struct bvec_iter iter;
1710		-
1711		- bio_for_each_segment(bvec, bi, iter)
1712		- flush_dcache_page(bvec.bv_page);
1713		-}
1714		-EXPORT_SYMBOL(bio_flush_dcache_pages);
1715		-#endif
1716	1392
1717	1393	static inline bool bio_remaining_done(struct bio *bio)
1718	1394	{
..	..	@@ -1752,10 +1428,6 @@
1752	1428	again:
1753	1429	if (!bio_remaining_done(bio))
1754	1430	return;
1755		-
1756		- if (!blk_crypto_endio(bio))
1757		- return;
1758		-
1759	1431	if (!bio_integrity_endio(bio))
1760	1432	return;
1761	1433
..	..	@@ -1776,8 +1448,7 @@
1776	1448	}
1777	1449
1778	1450	if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
1779		- trace_block_bio_complete(bio->bi_disk->queue, bio,
1780		- blk_status_to_errno(bio->bi_status));
	1451	+ trace_block_bio_complete(bio->bi_disk->queue, bio);
1781	1452	bio_clear_flag(bio, BIO_TRACE_COMPLETION);
1782	1453	}
1783	1454
..	..	@@ -1800,8 +1471,8 @@
1800	1471	* @bio, and updates @bio to represent the remaining sectors.
1801	1472	*
1802	1473	* Unless this is a discard request the newly allocated bio will point
1803		- * to @bio's bi_io_vec; it is the caller's responsibility to ensure that
1804		- * @bio is not freed before the split.
	1474	+ * to @bio's bi_io_vec. It is the caller's responsibility to ensure that
	1475	+ * neither @bio nor @bs are freed before the split bio.
1805	1476	*/
1806	1477	struct bio bio_split(struct bio bio, int sectors,
1807	1478	gfp_t gfp, struct bio_set *bs)
..	..	@@ -1810,6 +1481,10 @@
1810	1481
1811	1482	BUG_ON(sectors <= 0);
1812	1483	BUG_ON(sectors >= bio_sectors(bio));
	1484	+
	1485	+ /* Zone append commands cannot be split */
	1486	+ if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
	1487	+ return NULL;
1813	1488
1814	1489	split = bio_clone_fast(bio, gfp, bs);
1815	1490	if (!split)
..	..	@@ -1821,7 +1496,6 @@
1821	1496	bio_integrity_trim(split);
1822	1497
1823	1498	bio_advance(bio, split->bi_iter.bi_size);
1824		- bio->bi_iter.bi_done = 0;
1825	1499
1826	1500	if (bio_flagged(bio, BIO_TRACE_COMPLETION))
1827	1501	bio_set_flag(split, BIO_TRACE_COMPLETION);
..	..	@@ -1846,10 +1520,7 @@
1846	1520	if (offset == 0 && size == bio->bi_iter.bi_size)
1847	1521	return;
1848	1522
1849		- bio_clear_flag(bio, BIO_SEG_VALID);
1850		-
1851	1523	bio_advance(bio, offset << 9);
1852		-
1853	1524	bio->bi_iter.bi_size = size;
1854	1525
1855	1526	if (bio_integrity(bio))
..	..	@@ -1968,106 +1639,6 @@
1968	1639	}
1969	1640	EXPORT_SYMBOL(bioset_init_from_src);
1970	1641
1971		-#ifdef CONFIG_BLK_CGROUP
1972		-
1973		-#ifdef CONFIG_MEMCG
1974		-/**
1975		- * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
1976		- * @bio: target bio
1977		- * @page: the page to lookup the blkcg from
1978		- *
1979		- * Associate @bio with the blkcg from @page's owning memcg. This works like
1980		- * every other associate function wrt references.
1981		- */
1982		-int bio_associate_blkcg_from_page(struct bio bio, struct page page)
1983		-{
1984		- struct cgroup_subsys_state *blkcg_css;
1985		-
1986		- if (unlikely(bio->bi_css))
1987		- return -EBUSY;
1988		- if (!page->mem_cgroup)
1989		- return 0;
1990		- blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
1991		- &io_cgrp_subsys);
1992		- bio->bi_css = blkcg_css;
1993		- return 0;
1994		-}
1995		-#endif /* CONFIG_MEMCG */
1996		-
1997		-/**
1998		- * bio_associate_blkcg - associate a bio with the specified blkcg
1999		- * @bio: target bio
2000		- * @blkcg_css: css of the blkcg to associate
2001		- *
2002		- * Associate @bio with the blkcg specified by @blkcg_css. Block layer will
2003		- * treat @bio as if it were issued by a task which belongs to the blkcg.
2004		- *
2005		- * This function takes an extra reference of @blkcg_css which will be put
2006		- * when @bio is released. The caller must own @bio and is responsible for
2007		- * synchronizing calls to this function.
2008		- */
2009		-int bio_associate_blkcg(struct bio bio, struct cgroup_subsys_state blkcg_css)
2010		-{
2011		- if (unlikely(bio->bi_css))
2012		- return -EBUSY;
2013		- css_get(blkcg_css);
2014		- bio->bi_css = blkcg_css;
2015		- return 0;
2016		-}
2017		-EXPORT_SYMBOL_GPL(bio_associate_blkcg);
2018		-
2019		-/**
2020		- * bio_associate_blkg - associate a bio with the specified blkg
2021		- * @bio: target bio
2022		- * @blkg: the blkg to associate
2023		- *
2024		- * Associate @bio with the blkg specified by @blkg. This is the queue specific
2025		- * blkcg information associated with the @bio, a reference will be taken on the
2026		- * @blkg and will be freed when the bio is freed.
2027		- */
2028		-int bio_associate_blkg(struct bio bio, struct blkcg_gq blkg)
2029		-{
2030		- if (unlikely(bio->bi_blkg))
2031		- return -EBUSY;
2032		- if (!blkg_try_get(blkg))
2033		- return -ENODEV;
2034		- bio->bi_blkg = blkg;
2035		- return 0;
2036		-}
2037		-
2038		-/**
2039		- * bio_disassociate_task - undo bio_associate_current()
2040		- * @bio: target bio
2041		- */
2042		-void bio_disassociate_task(struct bio *bio)
2043		-{
2044		- if (bio->bi_ioc) {
2045		- put_io_context(bio->bi_ioc);
2046		- bio->bi_ioc = NULL;
2047		- }
2048		- if (bio->bi_css) {
2049		- css_put(bio->bi_css);
2050		- bio->bi_css = NULL;
2051		- }
2052		- if (bio->bi_blkg) {
2053		- blkg_put(bio->bi_blkg);
2054		- bio->bi_blkg = NULL;
2055		- }
2056		-}
2057		-
2058		-/**
2059		- * bio_clone_blkcg_association - clone blkcg association from src to dst bio
2060		- * @dst: destination bio
2061		- * @src: source bio
2062		- */
2063		-void bio_clone_blkcg_association(struct bio dst, struct bio src)
2064		-{
2065		- if (src->bi_css)
2066		- WARN_ON(bio_associate_blkcg(dst, src->bi_css));
2067		-}
2068		-EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
2069		-#endif /* CONFIG_BLK_CGROUP */
2070		-
2071	1642	static void __init biovec_init_slabs(void)
2072	1643	{
2073	1644	int i;
..	..	@@ -2093,6 +1664,9 @@
2093	1664	bio_slab_nr = 0;
2094	1665	bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab),
2095	1666	GFP_KERNEL);
	1667	+
	1668	+ BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET);
	1669	+
2096	1670	if (!bio_slabs)
2097	1671	panic("bio: can't allocate bios\n");
2098	1672