.. | .. |
---|
7 | 7 | #include <linux/bio.h> |
---|
8 | 8 | #include <linux/blkdev.h> |
---|
9 | 9 | #include <linux/scatterlist.h> |
---|
| 10 | +#ifndef __GENKSYMS__ |
---|
| 11 | +#include <linux/blk-cgroup.h> |
---|
| 12 | +#endif |
---|
10 | 13 | |
---|
11 | 14 | #include <trace/events/block.h> |
---|
12 | 15 | |
---|
13 | 16 | #include "blk.h" |
---|
| 17 | +#include "blk-rq-qos.h" |
---|
| 18 | + |
---|
| 19 | +static inline bool bio_will_gap(struct request_queue *q, |
---|
| 20 | + struct request *prev_rq, struct bio *prev, struct bio *next) |
---|
| 21 | +{ |
---|
| 22 | + struct bio_vec pb, nb; |
---|
| 23 | + |
---|
| 24 | + if (!bio_has_data(prev) || !queue_virt_boundary(q)) |
---|
| 25 | + return false; |
---|
| 26 | + |
---|
| 27 | + /* |
---|
| 28 | + * Don't merge if the 1st bio starts with non-zero offset, otherwise it |
---|
| 29 | + * is quite difficult to respect the sg gap limit. We work hard to |
---|
| 30 | + * merge a huge number of small single bios in case of mkfs. |
---|
| 31 | + */ |
---|
| 32 | + if (prev_rq) |
---|
| 33 | + bio_get_first_bvec(prev_rq->bio, &pb); |
---|
| 34 | + else |
---|
| 35 | + bio_get_first_bvec(prev, &pb); |
---|
| 36 | + if (pb.bv_offset & queue_virt_boundary(q)) |
---|
| 37 | + return true; |
---|
| 38 | + |
---|
| 39 | + /* |
---|
| 40 | + * We don't need to worry about the situation that the merged segment |
---|
| 41 | + * ends in unaligned virt boundary: |
---|
| 42 | + * |
---|
| 43 | + * - if 'pb' ends aligned, the merged segment ends aligned |
---|
| 44 | + * - if 'pb' ends unaligned, the next bio must include |
---|
| 45 | + * one single bvec of 'nb', otherwise the 'nb' can't |
---|
| 46 | + * merge with 'pb' |
---|
| 47 | + */ |
---|
| 48 | + bio_get_last_bvec(prev, &pb); |
---|
| 49 | + bio_get_first_bvec(next, &nb); |
---|
| 50 | + if (biovec_phys_mergeable(q, &pb, &nb)) |
---|
| 51 | + return false; |
---|
| 52 | + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); |
---|
| 53 | +} |
---|
| 54 | + |
---|
| 55 | +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) |
---|
| 56 | +{ |
---|
| 57 | + return bio_will_gap(req->q, req, req->biotail, bio); |
---|
| 58 | +} |
---|
| 59 | + |
---|
| 60 | +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) |
---|
| 61 | +{ |
---|
| 62 | + return bio_will_gap(req->q, NULL, bio, req->bio); |
---|
| 63 | +} |
---|
14 | 64 | |
---|
15 | 65 | static struct bio *blk_bio_discard_split(struct request_queue *q, |
---|
16 | 66 | struct bio *bio, |
---|
.. | .. |
---|
59 | 109 | static struct bio *blk_bio_write_zeroes_split(struct request_queue *q, |
---|
60 | 110 | struct bio *bio, struct bio_set *bs, unsigned *nsegs) |
---|
61 | 111 | { |
---|
62 | | - *nsegs = 1; |
---|
| 112 | + *nsegs = 0; |
---|
63 | 113 | |
---|
64 | 114 | if (!q->limits.max_write_zeroes_sectors) |
---|
65 | 115 | return NULL; |
---|
.. | .. |
---|
86 | 136 | return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs); |
---|
87 | 137 | } |
---|
88 | 138 | |
---|
| 139 | +/* |
---|
| 140 | + * Return the maximum number of sectors from the start of a bio that may be |
---|
| 141 | + * submitted as a single request to a block device. If enough sectors remain, |
---|
| 142 | + * align the end to the physical block size. Otherwise align the end to the |
---|
| 143 | + * logical block size. This approach minimizes the number of non-aligned |
---|
| 144 | + * requests that are submitted to a block device if the start of a bio is not |
---|
| 145 | + * aligned to a physical block boundary. |
---|
| 146 | + */ |
---|
89 | 147 | static inline unsigned get_max_io_size(struct request_queue *q, |
---|
90 | 148 | struct bio *bio) |
---|
91 | 149 | { |
---|
92 | | - unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector); |
---|
93 | | - unsigned mask = queue_logical_block_size(q) - 1; |
---|
| 150 | + unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0); |
---|
| 151 | + unsigned max_sectors = sectors; |
---|
| 152 | + unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT; |
---|
| 153 | + unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT; |
---|
| 154 | + unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1); |
---|
94 | 155 | |
---|
95 | | - /* aligned to logical block size */ |
---|
96 | | - sectors &= ~(mask >> 9); |
---|
| 156 | + max_sectors += start_offset; |
---|
| 157 | + max_sectors &= ~(pbs - 1); |
---|
| 158 | + if (max_sectors > start_offset) |
---|
| 159 | + return max_sectors - start_offset; |
---|
97 | 160 | |
---|
98 | | - return sectors; |
---|
| 161 | + return sectors & ~(lbs - 1); |
---|
99 | 162 | } |
---|
100 | 163 | |
---|
| 164 | +static inline unsigned get_max_segment_size(const struct request_queue *q, |
---|
| 165 | + struct page *start_page, |
---|
| 166 | + unsigned long offset) |
---|
| 167 | +{ |
---|
| 168 | + unsigned long mask = queue_segment_boundary(q); |
---|
| 169 | + |
---|
| 170 | + offset = mask & (page_to_phys(start_page) + offset); |
---|
| 171 | + |
---|
| 172 | + /* |
---|
| 173 | + * overflow may be triggered in case of zero page physical address |
---|
| 174 | + * on 32bit arch, use queue's max segment size when that happens. |
---|
| 175 | + */ |
---|
| 176 | + return min_not_zero(mask - offset + 1, |
---|
| 177 | + (unsigned long)queue_max_segment_size(q)); |
---|
| 178 | +} |
---|
| 179 | + |
---|
| 180 | +/** |
---|
| 181 | + * bvec_split_segs - verify whether or not a bvec should be split in the middle |
---|
| 182 | + * @q: [in] request queue associated with the bio associated with @bv |
---|
| 183 | + * @bv: [in] bvec to examine |
---|
| 184 | + * @nsegs: [in,out] Number of segments in the bio being built. Incremented |
---|
| 185 | + * by the number of segments from @bv that may be appended to that |
---|
| 186 | + * bio without exceeding @max_segs |
---|
| 187 | + * @sectors: [in,out] Number of sectors in the bio being built. Incremented |
---|
| 188 | + * by the number of sectors from @bv that may be appended to that |
---|
| 189 | + * bio without exceeding @max_sectors |
---|
| 190 | + * @max_segs: [in] upper bound for *@nsegs |
---|
| 191 | + * @max_sectors: [in] upper bound for *@sectors |
---|
| 192 | + * |
---|
| 193 | + * When splitting a bio, it can happen that a bvec is encountered that is too |
---|
| 194 | + * big to fit in a single segment and hence that it has to be split in the |
---|
| 195 | + * middle. This function verifies whether or not that should happen. The value |
---|
| 196 | + * %true is returned if and only if appending the entire @bv to a bio with |
---|
| 197 | + * *@nsegs segments and *@sectors sectors would make that bio unacceptable for |
---|
| 198 | + * the block driver. |
---|
| 199 | + */ |
---|
| 200 | +static bool bvec_split_segs(const struct request_queue *q, |
---|
| 201 | + const struct bio_vec *bv, unsigned *nsegs, |
---|
| 202 | + unsigned *sectors, unsigned max_segs, |
---|
| 203 | + unsigned max_sectors) |
---|
| 204 | +{ |
---|
| 205 | + unsigned max_len = (min(max_sectors, UINT_MAX >> 9) - *sectors) << 9; |
---|
| 206 | + unsigned len = min(bv->bv_len, max_len); |
---|
| 207 | + unsigned total_len = 0; |
---|
| 208 | + unsigned seg_size = 0; |
---|
| 209 | + |
---|
| 210 | + while (len && *nsegs < max_segs) { |
---|
| 211 | + seg_size = get_max_segment_size(q, bv->bv_page, |
---|
| 212 | + bv->bv_offset + total_len); |
---|
| 213 | + seg_size = min(seg_size, len); |
---|
| 214 | + |
---|
| 215 | + (*nsegs)++; |
---|
| 216 | + total_len += seg_size; |
---|
| 217 | + len -= seg_size; |
---|
| 218 | + |
---|
| 219 | + if ((bv->bv_offset + total_len) & queue_virt_boundary(q)) |
---|
| 220 | + break; |
---|
| 221 | + } |
---|
| 222 | + |
---|
| 223 | + *sectors += total_len >> 9; |
---|
| 224 | + |
---|
| 225 | + /* tell the caller to split the bvec if it is too big to fit */ |
---|
| 226 | + return len > 0 || bv->bv_len > max_len; |
---|
| 227 | +} |
---|
| 228 | + |
---|
| 229 | +/** |
---|
| 230 | + * blk_bio_segment_split - split a bio in two bios |
---|
| 231 | + * @q: [in] request queue pointer |
---|
| 232 | + * @bio: [in] bio to be split |
---|
| 233 | + * @bs: [in] bio set to allocate the clone from |
---|
| 234 | + * @segs: [out] number of segments in the bio with the first half of the sectors |
---|
| 235 | + * |
---|
| 236 | + * Clone @bio, update the bi_iter of the clone to represent the first sectors |
---|
| 237 | + * of @bio and update @bio->bi_iter to represent the remaining sectors. The |
---|
| 238 | + * following is guaranteed for the cloned bio: |
---|
| 239 | + * - That it has at most get_max_io_size(@q, @bio) sectors. |
---|
| 240 | + * - That it has at most queue_max_segments(@q) segments. |
---|
| 241 | + * |
---|
| 242 | + * Except for discard requests the cloned bio will point at the bi_io_vec of |
---|
| 243 | + * the original bio. It is the responsibility of the caller to ensure that the |
---|
| 244 | + * original bio is not freed before the cloned bio. The caller is also |
---|
| 245 | + * responsible for ensuring that @bs is only destroyed after processing of the |
---|
| 246 | + * split bio has finished. |
---|
| 247 | + */ |
---|
101 | 248 | static struct bio *blk_bio_segment_split(struct request_queue *q, |
---|
102 | 249 | struct bio *bio, |
---|
103 | 250 | struct bio_set *bs, |
---|
.. | .. |
---|
105 | 252 | { |
---|
106 | 253 | struct bio_vec bv, bvprv, *bvprvp = NULL; |
---|
107 | 254 | struct bvec_iter iter; |
---|
108 | | - unsigned seg_size = 0, nsegs = 0, sectors = 0; |
---|
109 | | - unsigned front_seg_size = bio->bi_seg_front_size; |
---|
110 | | - bool do_split = true; |
---|
111 | | - struct bio *new = NULL; |
---|
| 255 | + unsigned nsegs = 0, sectors = 0; |
---|
112 | 256 | const unsigned max_sectors = get_max_io_size(q, bio); |
---|
| 257 | + const unsigned max_segs = queue_max_segments(q); |
---|
113 | 258 | |
---|
114 | | - bio_for_each_segment(bv, bio, iter) { |
---|
| 259 | + bio_for_each_bvec(bv, bio, iter) { |
---|
115 | 260 | /* |
---|
116 | 261 | * If the queue doesn't support SG gaps and adding this |
---|
117 | 262 | * offset would create a gap, disallow it. |
---|
.. | .. |
---|
119 | 264 | if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) |
---|
120 | 265 | goto split; |
---|
121 | 266 | |
---|
122 | | - if (sectors + (bv.bv_len >> 9) > max_sectors) { |
---|
123 | | - /* |
---|
124 | | - * Consider this a new segment if we're splitting in |
---|
125 | | - * the middle of this vector. |
---|
126 | | - */ |
---|
127 | | - if (nsegs < queue_max_segments(q) && |
---|
128 | | - sectors < max_sectors) { |
---|
129 | | - nsegs++; |
---|
130 | | - sectors = max_sectors; |
---|
131 | | - } |
---|
132 | | - goto split; |
---|
133 | | - } |
---|
134 | | - |
---|
135 | | - if (bvprvp && blk_queue_cluster(q)) { |
---|
136 | | - if (seg_size + bv.bv_len > queue_max_segment_size(q)) |
---|
137 | | - goto new_segment; |
---|
138 | | - if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) |
---|
139 | | - goto new_segment; |
---|
140 | | - if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) |
---|
141 | | - goto new_segment; |
---|
142 | | - |
---|
143 | | - seg_size += bv.bv_len; |
---|
144 | | - bvprv = bv; |
---|
145 | | - bvprvp = &bvprv; |
---|
| 267 | + if (nsegs < max_segs && |
---|
| 268 | + sectors + (bv.bv_len >> 9) <= max_sectors && |
---|
| 269 | + bv.bv_offset + bv.bv_len <= PAGE_SIZE) { |
---|
| 270 | + nsegs++; |
---|
146 | 271 | sectors += bv.bv_len >> 9; |
---|
147 | | - |
---|
148 | | - continue; |
---|
149 | | - } |
---|
150 | | -new_segment: |
---|
151 | | - if (nsegs == queue_max_segments(q)) |
---|
| 272 | + } else if (bvec_split_segs(q, &bv, &nsegs, §ors, max_segs, |
---|
| 273 | + max_sectors)) { |
---|
152 | 274 | goto split; |
---|
| 275 | + } |
---|
153 | 276 | |
---|
154 | | - if (nsegs == 1 && seg_size > front_seg_size) |
---|
155 | | - front_seg_size = seg_size; |
---|
156 | | - |
---|
157 | | - nsegs++; |
---|
158 | 277 | bvprv = bv; |
---|
159 | 278 | bvprvp = &bvprv; |
---|
160 | | - seg_size = bv.bv_len; |
---|
161 | | - sectors += bv.bv_len >> 9; |
---|
162 | | - |
---|
163 | 279 | } |
---|
164 | 280 | |
---|
165 | | - do_split = false; |
---|
| 281 | + *segs = nsegs; |
---|
| 282 | + return NULL; |
---|
166 | 283 | split: |
---|
167 | 284 | *segs = nsegs; |
---|
168 | | - |
---|
169 | | - if (do_split) { |
---|
170 | | - new = bio_split(bio, sectors, GFP_NOIO, bs); |
---|
171 | | - if (new) |
---|
172 | | - bio = new; |
---|
173 | | - } |
---|
174 | | - |
---|
175 | | - if (nsegs == 1 && seg_size > front_seg_size) |
---|
176 | | - front_seg_size = seg_size; |
---|
177 | | - bio->bi_seg_front_size = front_seg_size; |
---|
178 | | - if (seg_size > bio->bi_seg_back_size) |
---|
179 | | - bio->bi_seg_back_size = seg_size; |
---|
180 | | - |
---|
181 | | - return do_split ? new : NULL; |
---|
| 285 | + return bio_split(bio, sectors, GFP_NOIO, bs); |
---|
182 | 286 | } |
---|
183 | 287 | |
---|
184 | | -void blk_queue_split(struct request_queue *q, struct bio **bio) |
---|
| 288 | +/** |
---|
| 289 | + * __blk_queue_split - split a bio and submit the second half |
---|
| 290 | + * @bio: [in, out] bio to be split |
---|
| 291 | + * @nr_segs: [out] number of segments in the first bio |
---|
| 292 | + * |
---|
| 293 | + * Split a bio into two bios, chain the two bios, submit the second half and |
---|
| 294 | + * store a pointer to the first half in *@bio. If the second bio is still too |
---|
| 295 | + * big it will be split by a recursive call to this function. Since this |
---|
| 296 | + * function may allocate a new bio from @bio->bi_disk->queue->bio_split, it is |
---|
| 297 | + * the responsibility of the caller to ensure that |
---|
| 298 | + * @bio->bi_disk->queue->bio_split is only released after processing of the |
---|
| 299 | + * split bio has finished. |
---|
| 300 | + */ |
---|
| 301 | +void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) |
---|
185 | 302 | { |
---|
186 | | - struct bio *split, *res; |
---|
187 | | - unsigned nsegs; |
---|
| 303 | + struct request_queue *q = (*bio)->bi_disk->queue; |
---|
| 304 | + struct bio *split = NULL; |
---|
188 | 305 | |
---|
189 | 306 | switch (bio_op(*bio)) { |
---|
190 | 307 | case REQ_OP_DISCARD: |
---|
191 | 308 | case REQ_OP_SECURE_ERASE: |
---|
192 | | - split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs); |
---|
| 309 | + split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs); |
---|
193 | 310 | break; |
---|
194 | 311 | case REQ_OP_WRITE_ZEROES: |
---|
195 | | - split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs); |
---|
| 312 | + split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, |
---|
| 313 | + nr_segs); |
---|
196 | 314 | break; |
---|
197 | 315 | case REQ_OP_WRITE_SAME: |
---|
198 | | - split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs); |
---|
| 316 | + split = blk_bio_write_same_split(q, *bio, &q->bio_split, |
---|
| 317 | + nr_segs); |
---|
199 | 318 | break; |
---|
200 | 319 | default: |
---|
201 | | - split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs); |
---|
| 320 | + /* |
---|
| 321 | + * All drivers must accept single-segments bios that are <= |
---|
| 322 | + * PAGE_SIZE. This is a quick and dirty check that relies on |
---|
| 323 | + * the fact that bi_io_vec[0] is always valid if a bio has data. |
---|
| 324 | + * The check might lead to occasional false negatives when bios |
---|
| 325 | + * are cloned, but compared to the performance impact of cloned |
---|
| 326 | + * bios themselves the loop below doesn't matter anyway. |
---|
| 327 | + */ |
---|
| 328 | + if (!q->limits.chunk_sectors && |
---|
| 329 | + (*bio)->bi_vcnt == 1 && |
---|
| 330 | + ((*bio)->bi_io_vec[0].bv_len + |
---|
| 331 | + (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) { |
---|
| 332 | + *nr_segs = 1; |
---|
| 333 | + break; |
---|
| 334 | + } |
---|
| 335 | + split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs); |
---|
202 | 336 | break; |
---|
203 | 337 | } |
---|
204 | | - |
---|
205 | | - /* physical segments can be figured out during splitting */ |
---|
206 | | - res = split ? split : *bio; |
---|
207 | | - res->bi_phys_segments = nsegs; |
---|
208 | | - bio_set_flag(res, BIO_SEG_VALID); |
---|
209 | 338 | |
---|
210 | 339 | if (split) { |
---|
211 | 340 | /* there isn't chance to merge the splitted bio */ |
---|
212 | 341 | split->bi_opf |= REQ_NOMERGE; |
---|
213 | 342 | |
---|
214 | | - /* |
---|
215 | | - * Since we're recursing into make_request here, ensure |
---|
216 | | - * that we mark this bio as already having entered the queue. |
---|
217 | | - * If not, and the queue is going away, we can get stuck |
---|
218 | | - * forever on waiting for the queue reference to drop. But |
---|
219 | | - * that will never happen, as we're already holding a |
---|
220 | | - * reference to it. |
---|
221 | | - */ |
---|
222 | | - bio_set_flag(*bio, BIO_QUEUE_ENTERED); |
---|
223 | | - |
---|
224 | 343 | bio_chain(split, *bio); |
---|
225 | 344 | trace_block_split(q, split, (*bio)->bi_iter.bi_sector); |
---|
226 | | - generic_make_request(*bio); |
---|
| 345 | + submit_bio_noacct(*bio); |
---|
227 | 346 | *bio = split; |
---|
| 347 | + |
---|
| 348 | + blk_throtl_charge_bio_split(*bio); |
---|
228 | 349 | } |
---|
| 350 | +} |
---|
| 351 | + |
---|
| 352 | +/** |
---|
| 353 | + * blk_queue_split - split a bio and submit the second half |
---|
| 354 | + * @bio: [in, out] bio to be split |
---|
| 355 | + * |
---|
| 356 | + * Split a bio into two bios, chains the two bios, submit the second half and |
---|
| 357 | + * store a pointer to the first half in *@bio. Since this function may allocate |
---|
| 358 | + * a new bio from @bio->bi_disk->queue->bio_split, it is the responsibility of |
---|
| 359 | + * the caller to ensure that @bio->bi_disk->queue->bio_split is only released |
---|
| 360 | + * after processing of the split bio has finished. |
---|
| 361 | + */ |
---|
| 362 | +void blk_queue_split(struct bio **bio) |
---|
| 363 | +{ |
---|
| 364 | + unsigned int nr_segs; |
---|
| 365 | + |
---|
| 366 | + __blk_queue_split(bio, &nr_segs); |
---|
229 | 367 | } |
---|
230 | 368 | EXPORT_SYMBOL(blk_queue_split); |
---|
231 | 369 | |
---|
232 | | -static unsigned int __blk_recalc_rq_segments(struct request_queue *q, |
---|
233 | | - struct bio *bio, |
---|
234 | | - bool no_sg_merge) |
---|
| 370 | +unsigned int blk_recalc_rq_segments(struct request *rq) |
---|
235 | 371 | { |
---|
236 | | - struct bio_vec bv, bvprv = { NULL }; |
---|
237 | | - int cluster, prev = 0; |
---|
238 | | - unsigned int seg_size, nr_phys_segs; |
---|
239 | | - struct bio *fbio, *bbio; |
---|
240 | | - struct bvec_iter iter; |
---|
| 372 | + unsigned int nr_phys_segs = 0; |
---|
| 373 | + unsigned int nr_sectors = 0; |
---|
| 374 | + struct req_iterator iter; |
---|
| 375 | + struct bio_vec bv; |
---|
241 | 376 | |
---|
242 | | - if (!bio) |
---|
| 377 | + if (!rq->bio) |
---|
243 | 378 | return 0; |
---|
244 | 379 | |
---|
245 | | - switch (bio_op(bio)) { |
---|
| 380 | + switch (bio_op(rq->bio)) { |
---|
246 | 381 | case REQ_OP_DISCARD: |
---|
247 | 382 | case REQ_OP_SECURE_ERASE: |
---|
| 383 | + if (queue_max_discard_segments(rq->q) > 1) { |
---|
| 384 | + struct bio *bio = rq->bio; |
---|
| 385 | + |
---|
| 386 | + for_each_bio(bio) |
---|
| 387 | + nr_phys_segs++; |
---|
| 388 | + return nr_phys_segs; |
---|
| 389 | + } |
---|
| 390 | + return 1; |
---|
248 | 391 | case REQ_OP_WRITE_ZEROES: |
---|
249 | 392 | return 0; |
---|
250 | 393 | case REQ_OP_WRITE_SAME: |
---|
251 | 394 | return 1; |
---|
252 | 395 | } |
---|
253 | 396 | |
---|
254 | | - fbio = bio; |
---|
255 | | - cluster = blk_queue_cluster(q); |
---|
256 | | - seg_size = 0; |
---|
257 | | - nr_phys_segs = 0; |
---|
258 | | - for_each_bio(bio) { |
---|
259 | | - bio_for_each_segment(bv, bio, iter) { |
---|
260 | | - /* |
---|
261 | | - * If SG merging is disabled, each bio vector is |
---|
262 | | - * a segment |
---|
263 | | - */ |
---|
264 | | - if (no_sg_merge) |
---|
265 | | - goto new_segment; |
---|
266 | | - |
---|
267 | | - if (prev && cluster) { |
---|
268 | | - if (seg_size + bv.bv_len |
---|
269 | | - > queue_max_segment_size(q)) |
---|
270 | | - goto new_segment; |
---|
271 | | - if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) |
---|
272 | | - goto new_segment; |
---|
273 | | - if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) |
---|
274 | | - goto new_segment; |
---|
275 | | - |
---|
276 | | - seg_size += bv.bv_len; |
---|
277 | | - bvprv = bv; |
---|
278 | | - continue; |
---|
279 | | - } |
---|
280 | | -new_segment: |
---|
281 | | - if (nr_phys_segs == 1 && seg_size > |
---|
282 | | - fbio->bi_seg_front_size) |
---|
283 | | - fbio->bi_seg_front_size = seg_size; |
---|
284 | | - |
---|
285 | | - nr_phys_segs++; |
---|
286 | | - bvprv = bv; |
---|
287 | | - prev = 1; |
---|
288 | | - seg_size = bv.bv_len; |
---|
289 | | - } |
---|
290 | | - bbio = bio; |
---|
291 | | - } |
---|
292 | | - |
---|
293 | | - if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) |
---|
294 | | - fbio->bi_seg_front_size = seg_size; |
---|
295 | | - if (seg_size > bbio->bi_seg_back_size) |
---|
296 | | - bbio->bi_seg_back_size = seg_size; |
---|
297 | | - |
---|
| 397 | + rq_for_each_bvec(bv, rq, iter) |
---|
| 398 | + bvec_split_segs(rq->q, &bv, &nr_phys_segs, &nr_sectors, |
---|
| 399 | + UINT_MAX, UINT_MAX); |
---|
298 | 400 | return nr_phys_segs; |
---|
299 | 401 | } |
---|
300 | 402 | |
---|
301 | | -void blk_recalc_rq_segments(struct request *rq) |
---|
| 403 | +static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, |
---|
| 404 | + struct scatterlist *sglist) |
---|
302 | 405 | { |
---|
303 | | - bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, |
---|
304 | | - &rq->q->queue_flags); |
---|
305 | | - |
---|
306 | | - rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, |
---|
307 | | - no_sg_merge); |
---|
308 | | -} |
---|
309 | | - |
---|
310 | | -void blk_recount_segments(struct request_queue *q, struct bio *bio) |
---|
311 | | -{ |
---|
312 | | - unsigned short seg_cnt = bio_segments(bio); |
---|
313 | | - |
---|
314 | | - if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && |
---|
315 | | - (seg_cnt < queue_max_segments(q))) |
---|
316 | | - bio->bi_phys_segments = seg_cnt; |
---|
317 | | - else { |
---|
318 | | - struct bio *nxt = bio->bi_next; |
---|
319 | | - |
---|
320 | | - bio->bi_next = NULL; |
---|
321 | | - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); |
---|
322 | | - bio->bi_next = nxt; |
---|
323 | | - } |
---|
324 | | - |
---|
325 | | - bio_set_flag(bio, BIO_SEG_VALID); |
---|
326 | | -} |
---|
327 | | -EXPORT_SYMBOL(blk_recount_segments); |
---|
328 | | - |
---|
329 | | -static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, |
---|
330 | | - struct bio *nxt) |
---|
331 | | -{ |
---|
332 | | - struct bio_vec end_bv = { NULL }, nxt_bv; |
---|
333 | | - |
---|
334 | | - if (!blk_queue_cluster(q)) |
---|
335 | | - return 0; |
---|
336 | | - |
---|
337 | | - if (bio->bi_seg_back_size + nxt->bi_seg_front_size > |
---|
338 | | - queue_max_segment_size(q)) |
---|
339 | | - return 0; |
---|
340 | | - |
---|
341 | | - if (!bio_has_data(bio)) |
---|
342 | | - return 1; |
---|
343 | | - |
---|
344 | | - bio_get_last_bvec(bio, &end_bv); |
---|
345 | | - bio_get_first_bvec(nxt, &nxt_bv); |
---|
346 | | - |
---|
347 | | - if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) |
---|
348 | | - return 0; |
---|
| 406 | + if (!*sg) |
---|
| 407 | + return sglist; |
---|
349 | 408 | |
---|
350 | 409 | /* |
---|
351 | | - * bio and nxt are contiguous in memory; check if the queue allows |
---|
352 | | - * these two to be merged into one |
---|
| 410 | + * If the driver previously mapped a shorter list, we could see a |
---|
| 411 | + * termination bit prematurely unless it fully inits the sg table |
---|
| 412 | + * on each mapping. We KNOW that there must be more entries here |
---|
| 413 | + * or the driver would be buggy, so force clear the termination bit |
---|
| 414 | + * to avoid doing a full sg_init_table() in drivers for each command. |
---|
353 | 415 | */ |
---|
354 | | - if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) |
---|
355 | | - return 1; |
---|
356 | | - |
---|
357 | | - return 0; |
---|
| 416 | + sg_unmark_end(*sg); |
---|
| 417 | + return sg_next(*sg); |
---|
358 | 418 | } |
---|
359 | 419 | |
---|
360 | | -static inline void |
---|
361 | | -__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, |
---|
362 | | - struct scatterlist *sglist, struct bio_vec *bvprv, |
---|
363 | | - struct scatterlist **sg, int *nsegs, int *cluster) |
---|
| 420 | +static unsigned blk_bvec_map_sg(struct request_queue *q, |
---|
| 421 | + struct bio_vec *bvec, struct scatterlist *sglist, |
---|
| 422 | + struct scatterlist **sg) |
---|
| 423 | +{ |
---|
| 424 | + unsigned nbytes = bvec->bv_len; |
---|
| 425 | + unsigned nsegs = 0, total = 0; |
---|
| 426 | + |
---|
| 427 | + while (nbytes > 0) { |
---|
| 428 | + unsigned offset = bvec->bv_offset + total; |
---|
| 429 | + unsigned len = min(get_max_segment_size(q, bvec->bv_page, |
---|
| 430 | + offset), nbytes); |
---|
| 431 | + struct page *page = bvec->bv_page; |
---|
| 432 | + |
---|
| 433 | + /* |
---|
| 434 | + * Unfortunately a fair number of drivers barf on scatterlists |
---|
| 435 | + * that have an offset larger than PAGE_SIZE, despite other |
---|
| 436 | + * subsystems dealing with that invariant just fine. For now |
---|
| 437 | + * stick to the legacy format where we never present those from |
---|
| 438 | + * the block layer, but the code below should be removed once |
---|
| 439 | + * these offenders (mostly MMC/SD drivers) are fixed. |
---|
| 440 | + */ |
---|
| 441 | + page += (offset >> PAGE_SHIFT); |
---|
| 442 | + offset &= ~PAGE_MASK; |
---|
| 443 | + |
---|
| 444 | + *sg = blk_next_sg(sg, sglist); |
---|
| 445 | + sg_set_page(*sg, page, len, offset); |
---|
| 446 | + |
---|
| 447 | + total += len; |
---|
| 448 | + nbytes -= len; |
---|
| 449 | + nsegs++; |
---|
| 450 | + } |
---|
| 451 | + |
---|
| 452 | + return nsegs; |
---|
| 453 | +} |
---|
| 454 | + |
---|
| 455 | +static inline int __blk_bvec_map_sg(struct bio_vec bv, |
---|
| 456 | + struct scatterlist *sglist, struct scatterlist **sg) |
---|
| 457 | +{ |
---|
| 458 | + *sg = blk_next_sg(sg, sglist); |
---|
| 459 | + sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); |
---|
| 460 | + return 1; |
---|
| 461 | +} |
---|
| 462 | + |
---|
| 463 | +/* only try to merge bvecs into one sg if they are from two bios */ |
---|
| 464 | +static inline bool |
---|
| 465 | +__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec, |
---|
| 466 | + struct bio_vec *bvprv, struct scatterlist **sg) |
---|
364 | 467 | { |
---|
365 | 468 | |
---|
366 | 469 | int nbytes = bvec->bv_len; |
---|
367 | 470 | |
---|
368 | | - if (*sg && *cluster) { |
---|
369 | | - if ((*sg)->length + nbytes > queue_max_segment_size(q)) |
---|
370 | | - goto new_segment; |
---|
| 471 | + if (!*sg) |
---|
| 472 | + return false; |
---|
371 | 473 | |
---|
372 | | - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) |
---|
373 | | - goto new_segment; |
---|
374 | | - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) |
---|
375 | | - goto new_segment; |
---|
| 474 | + if ((*sg)->length + nbytes > queue_max_segment_size(q)) |
---|
| 475 | + return false; |
---|
376 | 476 | |
---|
377 | | - (*sg)->length += nbytes; |
---|
378 | | - } else { |
---|
379 | | -new_segment: |
---|
380 | | - if (!*sg) |
---|
381 | | - *sg = sglist; |
---|
382 | | - else { |
---|
383 | | - /* |
---|
384 | | - * If the driver previously mapped a shorter |
---|
385 | | - * list, we could see a termination bit |
---|
386 | | - * prematurely unless it fully inits the sg |
---|
387 | | - * table on each mapping. We KNOW that there |
---|
388 | | - * must be more entries here or the driver |
---|
389 | | - * would be buggy, so force clear the |
---|
390 | | - * termination bit to avoid doing a full |
---|
391 | | - * sg_init_table() in drivers for each command. |
---|
392 | | - */ |
---|
393 | | - sg_unmark_end(*sg); |
---|
394 | | - *sg = sg_next(*sg); |
---|
395 | | - } |
---|
| 477 | + if (!biovec_phys_mergeable(q, bvprv, bvec)) |
---|
| 478 | + return false; |
---|
396 | 479 | |
---|
397 | | - sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); |
---|
398 | | - (*nsegs)++; |
---|
399 | | - } |
---|
400 | | - *bvprv = *bvec; |
---|
401 | | -} |
---|
| 480 | + (*sg)->length += nbytes; |
---|
402 | 481 | |
---|
403 | | -static inline int __blk_bvec_map_sg(struct request_queue *q, struct bio_vec bv, |
---|
404 | | - struct scatterlist *sglist, struct scatterlist **sg) |
---|
405 | | -{ |
---|
406 | | - *sg = sglist; |
---|
407 | | - sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); |
---|
408 | | - return 1; |
---|
| 482 | + return true; |
---|
409 | 483 | } |
---|
410 | 484 | |
---|
411 | 485 | static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, |
---|
.. | .. |
---|
414 | 488 | { |
---|
415 | 489 | struct bio_vec bvec, bvprv = { NULL }; |
---|
416 | 490 | struct bvec_iter iter; |
---|
417 | | - int cluster = blk_queue_cluster(q), nsegs = 0; |
---|
| 491 | + int nsegs = 0; |
---|
| 492 | + bool new_bio = false; |
---|
418 | 493 | |
---|
419 | | - for_each_bio(bio) |
---|
420 | | - bio_for_each_segment(bvec, bio, iter) |
---|
421 | | - __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, |
---|
422 | | - &nsegs, &cluster); |
---|
| 494 | + for_each_bio(bio) { |
---|
| 495 | + bio_for_each_bvec(bvec, bio, iter) { |
---|
| 496 | + /* |
---|
| 497 | + * Only try to merge bvecs from two bios given we |
---|
| 498 | + * have done bio internal merge when adding pages |
---|
| 499 | + * to bio |
---|
| 500 | + */ |
---|
| 501 | + if (new_bio && |
---|
| 502 | + __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg)) |
---|
| 503 | + goto next_bvec; |
---|
| 504 | + |
---|
| 505 | + if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE) |
---|
| 506 | + nsegs += __blk_bvec_map_sg(bvec, sglist, sg); |
---|
| 507 | + else |
---|
| 508 | + nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg); |
---|
| 509 | + next_bvec: |
---|
| 510 | + new_bio = false; |
---|
| 511 | + } |
---|
| 512 | + if (likely(bio->bi_iter.bi_size)) { |
---|
| 513 | + bvprv = bvec; |
---|
| 514 | + new_bio = true; |
---|
| 515 | + } |
---|
| 516 | + } |
---|
423 | 517 | |
---|
424 | 518 | return nsegs; |
---|
425 | 519 | } |
---|
.. | .. |
---|
428 | 522 | * map a request to scatterlist, return number of sg entries setup. Caller |
---|
429 | 523 | * must make sure sg can hold rq->nr_phys_segments entries |
---|
430 | 524 | */ |
---|
431 | | -int blk_rq_map_sg(struct request_queue *q, struct request *rq, |
---|
432 | | - struct scatterlist *sglist) |
---|
| 525 | +int __blk_rq_map_sg(struct request_queue *q, struct request *rq, |
---|
| 526 | + struct scatterlist *sglist, struct scatterlist **last_sg) |
---|
433 | 527 | { |
---|
434 | | - struct scatterlist *sg = NULL; |
---|
435 | 528 | int nsegs = 0; |
---|
436 | 529 | |
---|
437 | 530 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) |
---|
438 | | - nsegs = __blk_bvec_map_sg(q, rq->special_vec, sglist, &sg); |
---|
| 531 | + nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg); |
---|
439 | 532 | else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME) |
---|
440 | | - nsegs = __blk_bvec_map_sg(q, bio_iovec(rq->bio), sglist, &sg); |
---|
| 533 | + nsegs = __blk_bvec_map_sg(bio_iovec(rq->bio), sglist, last_sg); |
---|
441 | 534 | else if (rq->bio) |
---|
442 | | - nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); |
---|
| 535 | + nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg); |
---|
443 | 536 | |
---|
444 | | - if (unlikely(rq->rq_flags & RQF_COPY_USER) && |
---|
445 | | - (blk_rq_bytes(rq) & q->dma_pad_mask)) { |
---|
446 | | - unsigned int pad_len = |
---|
447 | | - (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; |
---|
448 | | - |
---|
449 | | - sg->length += pad_len; |
---|
450 | | - rq->extra_len += pad_len; |
---|
451 | | - } |
---|
452 | | - |
---|
453 | | - if (q->dma_drain_size && q->dma_drain_needed(rq)) { |
---|
454 | | - if (op_is_write(req_op(rq))) |
---|
455 | | - memset(q->dma_drain_buffer, 0, q->dma_drain_size); |
---|
456 | | - |
---|
457 | | - sg_unmark_end(sg); |
---|
458 | | - sg = sg_next(sg); |
---|
459 | | - sg_set_page(sg, virt_to_page(q->dma_drain_buffer), |
---|
460 | | - q->dma_drain_size, |
---|
461 | | - ((unsigned long)q->dma_drain_buffer) & |
---|
462 | | - (PAGE_SIZE - 1)); |
---|
463 | | - nsegs++; |
---|
464 | | - rq->extra_len += q->dma_drain_size; |
---|
465 | | - } |
---|
466 | | - |
---|
467 | | - if (sg) |
---|
468 | | - sg_mark_end(sg); |
---|
| 537 | + if (*last_sg) |
---|
| 538 | + sg_mark_end(*last_sg); |
---|
469 | 539 | |
---|
470 | 540 | /* |
---|
471 | 541 | * Something must have been wrong if the figured number of |
---|
.. | .. |
---|
475 | 545 | |
---|
476 | 546 | return nsegs; |
---|
477 | 547 | } |
---|
478 | | -EXPORT_SYMBOL(blk_rq_map_sg); |
---|
| 548 | +EXPORT_SYMBOL(__blk_rq_map_sg); |
---|
479 | 549 | |
---|
480 | | -static inline int ll_new_hw_segment(struct request_queue *q, |
---|
481 | | - struct request *req, |
---|
482 | | - struct bio *bio) |
---|
| 550 | +static inline unsigned int blk_rq_get_max_segments(struct request *rq) |
---|
483 | 551 | { |
---|
484 | | - int nr_phys_segs = bio_phys_segments(q, bio); |
---|
| 552 | + if (req_op(rq) == REQ_OP_DISCARD) |
---|
| 553 | + return queue_max_discard_segments(rq->q); |
---|
| 554 | + return queue_max_segments(rq->q); |
---|
| 555 | +} |
---|
485 | 556 | |
---|
486 | | - if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) |
---|
| 557 | +static inline int ll_new_hw_segment(struct request *req, struct bio *bio, |
---|
| 558 | + unsigned int nr_phys_segs) |
---|
| 559 | +{ |
---|
| 560 | + if (!blk_cgroup_mergeable(req, bio)) |
---|
487 | 561 | goto no_merge; |
---|
488 | 562 | |
---|
489 | | - if (blk_integrity_merge_bio(q, req, bio) == false) |
---|
| 563 | + if (blk_integrity_merge_bio(req->q, req, bio) == false) |
---|
| 564 | + goto no_merge; |
---|
| 565 | + |
---|
| 566 | + /* discard request merge won't add new segment */ |
---|
| 567 | + if (req_op(req) == REQ_OP_DISCARD) |
---|
| 568 | + return 1; |
---|
| 569 | + |
---|
| 570 | + if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req)) |
---|
490 | 571 | goto no_merge; |
---|
491 | 572 | |
---|
492 | 573 | /* |
---|
.. | .. |
---|
497 | 578 | return 1; |
---|
498 | 579 | |
---|
499 | 580 | no_merge: |
---|
500 | | - req_set_nomerge(q, req); |
---|
| 581 | + req_set_nomerge(req->q, req); |
---|
501 | 582 | return 0; |
---|
502 | 583 | } |
---|
503 | 584 | |
---|
504 | | -int ll_back_merge_fn(struct request_queue *q, struct request *req, |
---|
505 | | - struct bio *bio) |
---|
| 585 | +int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) |
---|
506 | 586 | { |
---|
507 | 587 | if (req_gap_back_merge(req, bio)) |
---|
508 | 588 | return 0; |
---|
509 | 589 | if (blk_integrity_rq(req) && |
---|
510 | 590 | integrity_req_gap_back_merge(req, bio)) |
---|
511 | 591 | return 0; |
---|
| 592 | + if (!bio_crypt_ctx_back_mergeable(req, bio)) |
---|
| 593 | + return 0; |
---|
512 | 594 | if (blk_rq_sectors(req) + bio_sectors(bio) > |
---|
513 | 595 | blk_rq_get_max_sectors(req, blk_rq_pos(req))) { |
---|
514 | | - req_set_nomerge(q, req); |
---|
| 596 | + req_set_nomerge(req->q, req); |
---|
515 | 597 | return 0; |
---|
516 | 598 | } |
---|
517 | | - if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), bio)) |
---|
518 | | - return 0; |
---|
519 | | - if (!bio_flagged(req->biotail, BIO_SEG_VALID)) |
---|
520 | | - blk_recount_segments(q, req->biotail); |
---|
521 | | - if (!bio_flagged(bio, BIO_SEG_VALID)) |
---|
522 | | - blk_recount_segments(q, bio); |
---|
523 | 599 | |
---|
524 | | - return ll_new_hw_segment(q, req, bio); |
---|
| 600 | + return ll_new_hw_segment(req, bio, nr_segs); |
---|
525 | 601 | } |
---|
526 | 602 | |
---|
527 | | -int ll_front_merge_fn(struct request_queue *q, struct request *req, |
---|
528 | | - struct bio *bio) |
---|
| 603 | +static int ll_front_merge_fn(struct request *req, struct bio *bio, |
---|
| 604 | + unsigned int nr_segs) |
---|
529 | 605 | { |
---|
530 | | - |
---|
531 | 606 | if (req_gap_front_merge(req, bio)) |
---|
532 | 607 | return 0; |
---|
533 | 608 | if (blk_integrity_rq(req) && |
---|
534 | 609 | integrity_req_gap_front_merge(req, bio)) |
---|
535 | 610 | return 0; |
---|
| 611 | + if (!bio_crypt_ctx_front_mergeable(req, bio)) |
---|
| 612 | + return 0; |
---|
536 | 613 | if (blk_rq_sectors(req) + bio_sectors(bio) > |
---|
537 | 614 | blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) { |
---|
538 | | - req_set_nomerge(q, req); |
---|
| 615 | + req_set_nomerge(req->q, req); |
---|
539 | 616 | return 0; |
---|
540 | 617 | } |
---|
541 | | - if (!bio_crypt_ctx_mergeable(bio, bio->bi_iter.bi_size, req->bio)) |
---|
542 | | - return 0; |
---|
543 | | - if (!bio_flagged(bio, BIO_SEG_VALID)) |
---|
544 | | - blk_recount_segments(q, bio); |
---|
545 | | - if (!bio_flagged(req->bio, BIO_SEG_VALID)) |
---|
546 | | - blk_recount_segments(q, req->bio); |
---|
547 | 618 | |
---|
548 | | - return ll_new_hw_segment(q, req, bio); |
---|
549 | | -} |
---|
550 | | - |
---|
551 | | -/* |
---|
552 | | - * blk-mq uses req->special to carry normal driver per-request payload, it |
---|
553 | | - * does not indicate a prepared command that we cannot merge with. |
---|
554 | | - */ |
---|
555 | | -static bool req_no_special_merge(struct request *req) |
---|
556 | | -{ |
---|
557 | | - struct request_queue *q = req->q; |
---|
558 | | - |
---|
559 | | - return !q->mq_ops && req->special; |
---|
| 619 | + return ll_new_hw_segment(req, bio, nr_segs); |
---|
560 | 620 | } |
---|
561 | 621 | |
---|
562 | 622 | static bool req_attempt_discard_merge(struct request_queue *q, struct request *req, |
---|
.. | .. |
---|
581 | 641 | struct request *next) |
---|
582 | 642 | { |
---|
583 | 643 | int total_phys_segments; |
---|
584 | | - unsigned int seg_size = |
---|
585 | | - req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; |
---|
586 | | - |
---|
587 | | - /* |
---|
588 | | - * First check if the either of the requests are re-queued |
---|
589 | | - * requests. Can't merge them if they are. |
---|
590 | | - */ |
---|
591 | | - if (req_no_special_merge(req) || req_no_special_merge(next)) |
---|
592 | | - return 0; |
---|
593 | 644 | |
---|
594 | 645 | if (req_gap_back_merge(req, next->bio)) |
---|
595 | 646 | return 0; |
---|
.. | .. |
---|
602 | 653 | return 0; |
---|
603 | 654 | |
---|
604 | 655 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; |
---|
605 | | - if (blk_phys_contig_segment(q, req->biotail, next->bio)) { |
---|
606 | | - if (req->nr_phys_segments == 1) |
---|
607 | | - req->bio->bi_seg_front_size = seg_size; |
---|
608 | | - if (next->nr_phys_segments == 1) |
---|
609 | | - next->biotail->bi_seg_back_size = seg_size; |
---|
610 | | - total_phys_segments--; |
---|
611 | | - } |
---|
| 656 | + if (total_phys_segments > blk_rq_get_max_segments(req)) |
---|
| 657 | + return 0; |
---|
612 | 658 | |
---|
613 | | - if (total_phys_segments > queue_max_segments(q)) |
---|
| 659 | + if (!blk_cgroup_mergeable(req, next->bio)) |
---|
614 | 660 | return 0; |
---|
615 | 661 | |
---|
616 | 662 | if (blk_integrity_merge_rq(q, req, next) == false) |
---|
617 | 663 | return 0; |
---|
618 | 664 | |
---|
619 | | - if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), next->bio)) |
---|
| 665 | + if (!bio_crypt_ctx_merge_rq(req, next)) |
---|
620 | 666 | return 0; |
---|
621 | 667 | |
---|
622 | 668 | /* Merge is OK... */ |
---|
.. | .. |
---|
654 | 700 | rq->rq_flags |= RQF_MIXED_MERGE; |
---|
655 | 701 | } |
---|
656 | 702 | |
---|
657 | | -static void blk_account_io_merge(struct request *req) |
---|
| 703 | +static void blk_account_io_merge_request(struct request *req) |
---|
658 | 704 | { |
---|
659 | 705 | if (blk_do_io_stat(req)) { |
---|
660 | | - struct hd_struct *part; |
---|
661 | | - int cpu; |
---|
662 | | - |
---|
663 | | - cpu = part_stat_lock(); |
---|
664 | | - part = req->part; |
---|
665 | | - |
---|
666 | | - part_round_stats(req->q, cpu, part); |
---|
667 | | - part_dec_in_flight(req->q, part, rq_data_dir(req)); |
---|
668 | | - |
---|
669 | | - hd_struct_put(part); |
---|
| 706 | + part_stat_lock(); |
---|
| 707 | + part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); |
---|
670 | 708 | part_stat_unlock(); |
---|
| 709 | + |
---|
| 710 | + hd_struct_put(req->part); |
---|
671 | 711 | } |
---|
672 | 712 | } |
---|
673 | | -/* |
---|
674 | | - * Two cases of handling DISCARD merge: |
---|
675 | | - * If max_discard_segments > 1, the driver takes every bio |
---|
676 | | - * as a range and send them to controller together. The ranges |
---|
677 | | - * needn't to be contiguous. |
---|
678 | | - * Otherwise, the bios/requests will be handled as same as |
---|
679 | | - * others which should be contiguous. |
---|
680 | | - */ |
---|
681 | | -static inline bool blk_discard_mergable(struct request *req) |
---|
682 | | -{ |
---|
683 | | - if (req_op(req) == REQ_OP_DISCARD && |
---|
684 | | - queue_max_discard_segments(req->q) > 1) |
---|
685 | | - return true; |
---|
686 | | - return false; |
---|
687 | | -} |
---|
688 | 713 | |
---|
689 | | -enum elv_merge blk_try_req_merge(struct request *req, struct request *next) |
---|
| 714 | +static enum elv_merge blk_try_req_merge(struct request *req, |
---|
| 715 | + struct request *next) |
---|
690 | 716 | { |
---|
691 | 717 | if (blk_discard_mergable(req)) |
---|
692 | 718 | return ELEVATOR_DISCARD_MERGE; |
---|
.. | .. |
---|
703 | 729 | static struct request *attempt_merge(struct request_queue *q, |
---|
704 | 730 | struct request *req, struct request *next) |
---|
705 | 731 | { |
---|
706 | | - if (!q->mq_ops) |
---|
707 | | - lockdep_assert_held(q->queue_lock); |
---|
708 | | - |
---|
709 | 732 | if (!rq_mergeable(req) || !rq_mergeable(next)) |
---|
710 | 733 | return NULL; |
---|
711 | 734 | |
---|
.. | .. |
---|
713 | 736 | return NULL; |
---|
714 | 737 | |
---|
715 | 738 | if (rq_data_dir(req) != rq_data_dir(next) |
---|
716 | | - || req->rq_disk != next->rq_disk |
---|
717 | | - || req_no_special_merge(next)) |
---|
| 739 | + || req->rq_disk != next->rq_disk) |
---|
718 | 740 | return NULL; |
---|
719 | 741 | |
---|
720 | 742 | if (req_op(req) == REQ_OP_WRITE_SAME && |
---|
.. | .. |
---|
726 | 748 | * non-hint IO. |
---|
727 | 749 | */ |
---|
728 | 750 | if (req->write_hint != next->write_hint) |
---|
| 751 | + return NULL; |
---|
| 752 | + |
---|
| 753 | + if (req->ioprio != next->ioprio) |
---|
729 | 754 | return NULL; |
---|
730 | 755 | |
---|
731 | 756 | /* |
---|
.. | .. |
---|
778 | 803 | if (!blk_discard_mergable(req)) |
---|
779 | 804 | elv_merge_requests(q, req, next); |
---|
780 | 805 | |
---|
| 806 | + blk_crypto_rq_put_keyslot(next); |
---|
| 807 | + |
---|
781 | 808 | /* |
---|
782 | 809 | * 'next' is going away, so update stats accordingly |
---|
783 | 810 | */ |
---|
784 | | - blk_account_io_merge(next); |
---|
| 811 | + blk_account_io_merge_request(next); |
---|
785 | 812 | |
---|
786 | | - req->ioprio = ioprio_best(req->ioprio, next->ioprio); |
---|
787 | | - if (blk_rq_cpu_valid(next)) |
---|
788 | | - req->cpu = next->cpu; |
---|
| 813 | + trace_block_rq_merge(q, next); |
---|
789 | 814 | |
---|
790 | 815 | /* |
---|
791 | 816 | * ownership of bio passed from next to req, return 'next' for |
---|
.. | .. |
---|
795 | 820 | return next; |
---|
796 | 821 | } |
---|
797 | 822 | |
---|
798 | | -struct request *attempt_back_merge(struct request_queue *q, struct request *rq) |
---|
| 823 | +static struct request *attempt_back_merge(struct request_queue *q, |
---|
| 824 | + struct request *rq) |
---|
799 | 825 | { |
---|
800 | 826 | struct request *next = elv_latter_request(q, rq); |
---|
801 | 827 | |
---|
.. | .. |
---|
805 | 831 | return NULL; |
---|
806 | 832 | } |
---|
807 | 833 | |
---|
808 | | -struct request *attempt_front_merge(struct request_queue *q, struct request *rq) |
---|
| 834 | +static struct request *attempt_front_merge(struct request_queue *q, |
---|
| 835 | + struct request *rq) |
---|
809 | 836 | { |
---|
810 | 837 | struct request *prev = elv_former_request(q, rq); |
---|
811 | 838 | |
---|
.. | .. |
---|
818 | 845 | int blk_attempt_req_merge(struct request_queue *q, struct request *rq, |
---|
819 | 846 | struct request *next) |
---|
820 | 847 | { |
---|
821 | | - struct elevator_queue *e = q->elevator; |
---|
822 | 848 | struct request *free; |
---|
823 | | - |
---|
824 | | - if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) |
---|
825 | | - if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) |
---|
826 | | - return 0; |
---|
827 | 849 | |
---|
828 | 850 | free = attempt_merge(q, rq, next); |
---|
829 | 851 | if (free) { |
---|
830 | | - __blk_put_request(q, free); |
---|
| 852 | + blk_put_request(free); |
---|
831 | 853 | return 1; |
---|
832 | 854 | } |
---|
833 | 855 | |
---|
.. | .. |
---|
846 | 868 | if (bio_data_dir(bio) != rq_data_dir(rq)) |
---|
847 | 869 | return false; |
---|
848 | 870 | |
---|
849 | | - /* must be same device and not a special request */ |
---|
850 | | - if (rq->rq_disk != bio->bi_disk || req_no_special_merge(rq)) |
---|
| 871 | + /* must be same device */ |
---|
| 872 | + if (rq->rq_disk != bio->bi_disk) |
---|
| 873 | + return false; |
---|
| 874 | + |
---|
| 875 | + /* don't merge across cgroup boundaries */ |
---|
| 876 | + if (!blk_cgroup_mergeable(rq, bio)) |
---|
851 | 877 | return false; |
---|
852 | 878 | |
---|
853 | 879 | /* only merge integrity protected bio into ditto rq */ |
---|
854 | 880 | if (blk_integrity_merge_bio(rq->q, rq, bio) == false) |
---|
| 881 | + return false; |
---|
| 882 | + |
---|
| 883 | + /* Only merge if the crypt contexts are compatible */ |
---|
| 884 | + if (!bio_crypt_rq_ctx_compatible(rq, bio)) |
---|
855 | 885 | return false; |
---|
856 | 886 | |
---|
857 | 887 | /* must be using the same buffer */ |
---|
.. | .. |
---|
866 | 896 | if (rq->write_hint != bio->bi_write_hint) |
---|
867 | 897 | return false; |
---|
868 | 898 | |
---|
869 | | - /* Only merge if the crypt contexts are compatible */ |
---|
870 | | - if (!bio_crypt_ctx_compatible(bio, rq->bio)) |
---|
| 899 | + if (rq->ioprio != bio_prio(bio)) |
---|
871 | 900 | return false; |
---|
872 | 901 | |
---|
873 | 902 | return true; |
---|
.. | .. |
---|
883 | 912 | return ELEVATOR_FRONT_MERGE; |
---|
884 | 913 | return ELEVATOR_NO_MERGE; |
---|
885 | 914 | } |
---|
| 915 | + |
---|
| 916 | +static void blk_account_io_merge_bio(struct request *req) |
---|
| 917 | +{ |
---|
| 918 | + if (!blk_do_io_stat(req)) |
---|
| 919 | + return; |
---|
| 920 | + |
---|
| 921 | + part_stat_lock(); |
---|
| 922 | + part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); |
---|
| 923 | + part_stat_unlock(); |
---|
| 924 | +} |
---|
| 925 | + |
---|
| 926 | +enum bio_merge_status { |
---|
| 927 | + BIO_MERGE_OK, |
---|
| 928 | + BIO_MERGE_NONE, |
---|
| 929 | + BIO_MERGE_FAILED, |
---|
| 930 | +}; |
---|
| 931 | + |
---|
| 932 | +static enum bio_merge_status bio_attempt_back_merge(struct request *req, |
---|
| 933 | + struct bio *bio, unsigned int nr_segs) |
---|
| 934 | +{ |
---|
| 935 | + const int ff = bio->bi_opf & REQ_FAILFAST_MASK; |
---|
| 936 | + |
---|
| 937 | + if (!ll_back_merge_fn(req, bio, nr_segs)) |
---|
| 938 | + return BIO_MERGE_FAILED; |
---|
| 939 | + |
---|
| 940 | + trace_block_bio_backmerge(req->q, req, bio); |
---|
| 941 | + rq_qos_merge(req->q, req, bio); |
---|
| 942 | + |
---|
| 943 | + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
---|
| 944 | + blk_rq_set_mixed_merge(req); |
---|
| 945 | + |
---|
| 946 | + req->biotail->bi_next = bio; |
---|
| 947 | + req->biotail = bio; |
---|
| 948 | + req->__data_len += bio->bi_iter.bi_size; |
---|
| 949 | + |
---|
| 950 | + bio_crypt_free_ctx(bio); |
---|
| 951 | + |
---|
| 952 | + blk_account_io_merge_bio(req); |
---|
| 953 | + return BIO_MERGE_OK; |
---|
| 954 | +} |
---|
| 955 | + |
---|
| 956 | +static enum bio_merge_status bio_attempt_front_merge(struct request *req, |
---|
| 957 | + struct bio *bio, unsigned int nr_segs) |
---|
| 958 | +{ |
---|
| 959 | + const int ff = bio->bi_opf & REQ_FAILFAST_MASK; |
---|
| 960 | + |
---|
| 961 | + if (!ll_front_merge_fn(req, bio, nr_segs)) |
---|
| 962 | + return BIO_MERGE_FAILED; |
---|
| 963 | + |
---|
| 964 | + trace_block_bio_frontmerge(req->q, req, bio); |
---|
| 965 | + rq_qos_merge(req->q, req, bio); |
---|
| 966 | + |
---|
| 967 | + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
---|
| 968 | + blk_rq_set_mixed_merge(req); |
---|
| 969 | + |
---|
| 970 | + bio->bi_next = req->bio; |
---|
| 971 | + req->bio = bio; |
---|
| 972 | + |
---|
| 973 | + req->__sector = bio->bi_iter.bi_sector; |
---|
| 974 | + req->__data_len += bio->bi_iter.bi_size; |
---|
| 975 | + |
---|
| 976 | + bio_crypt_do_front_merge(req, bio); |
---|
| 977 | + |
---|
| 978 | + blk_account_io_merge_bio(req); |
---|
| 979 | + return BIO_MERGE_OK; |
---|
| 980 | +} |
---|
| 981 | + |
---|
| 982 | +static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q, |
---|
| 983 | + struct request *req, struct bio *bio) |
---|
| 984 | +{ |
---|
| 985 | + unsigned short segments = blk_rq_nr_discard_segments(req); |
---|
| 986 | + |
---|
| 987 | + if (segments >= queue_max_discard_segments(q)) |
---|
| 988 | + goto no_merge; |
---|
| 989 | + if (blk_rq_sectors(req) + bio_sectors(bio) > |
---|
| 990 | + blk_rq_get_max_sectors(req, blk_rq_pos(req))) |
---|
| 991 | + goto no_merge; |
---|
| 992 | + |
---|
| 993 | + rq_qos_merge(q, req, bio); |
---|
| 994 | + |
---|
| 995 | + req->biotail->bi_next = bio; |
---|
| 996 | + req->biotail = bio; |
---|
| 997 | + req->__data_len += bio->bi_iter.bi_size; |
---|
| 998 | + req->nr_phys_segments = segments + 1; |
---|
| 999 | + |
---|
| 1000 | + blk_account_io_merge_bio(req); |
---|
| 1001 | + return BIO_MERGE_OK; |
---|
| 1002 | +no_merge: |
---|
| 1003 | + req_set_nomerge(q, req); |
---|
| 1004 | + return BIO_MERGE_FAILED; |
---|
| 1005 | +} |
---|
| 1006 | + |
---|
| 1007 | +static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, |
---|
| 1008 | + struct request *rq, |
---|
| 1009 | + struct bio *bio, |
---|
| 1010 | + unsigned int nr_segs, |
---|
| 1011 | + bool sched_allow_merge) |
---|
| 1012 | +{ |
---|
| 1013 | + if (!blk_rq_merge_ok(rq, bio)) |
---|
| 1014 | + return BIO_MERGE_NONE; |
---|
| 1015 | + |
---|
| 1016 | + switch (blk_try_merge(rq, bio)) { |
---|
| 1017 | + case ELEVATOR_BACK_MERGE: |
---|
| 1018 | + if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio)) |
---|
| 1019 | + return bio_attempt_back_merge(rq, bio, nr_segs); |
---|
| 1020 | + break; |
---|
| 1021 | + case ELEVATOR_FRONT_MERGE: |
---|
| 1022 | + if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio)) |
---|
| 1023 | + return bio_attempt_front_merge(rq, bio, nr_segs); |
---|
| 1024 | + break; |
---|
| 1025 | + case ELEVATOR_DISCARD_MERGE: |
---|
| 1026 | + return bio_attempt_discard_merge(q, rq, bio); |
---|
| 1027 | + default: |
---|
| 1028 | + return BIO_MERGE_NONE; |
---|
| 1029 | + } |
---|
| 1030 | + |
---|
| 1031 | + return BIO_MERGE_FAILED; |
---|
| 1032 | +} |
---|
| 1033 | + |
---|
| 1034 | +/** |
---|
| 1035 | + * blk_attempt_plug_merge - try to merge with %current's plugged list |
---|
| 1036 | + * @q: request_queue new bio is being queued at |
---|
| 1037 | + * @bio: new bio being queued |
---|
| 1038 | + * @nr_segs: number of segments in @bio |
---|
| 1039 | + * @same_queue_rq: pointer to &struct request that gets filled in when |
---|
| 1040 | + * another request associated with @q is found on the plug list |
---|
| 1041 | + * (optional, may be %NULL) |
---|
| 1042 | + * |
---|
| 1043 | + * Determine whether @bio being queued on @q can be merged with a request |
---|
| 1044 | + * on %current's plugged list. Returns %true if merge was successful, |
---|
| 1045 | + * otherwise %false. |
---|
| 1046 | + * |
---|
| 1047 | + * Plugging coalesces IOs from the same issuer for the same purpose without |
---|
| 1048 | + * going through @q->queue_lock. As such it's more of an issuing mechanism |
---|
| 1049 | + * than scheduling, and the request, while may have elvpriv data, is not |
---|
| 1050 | + * added on the elevator at this point. In addition, we don't have |
---|
| 1051 | + * reliable access to the elevator outside queue lock. Only check basic |
---|
| 1052 | + * merging parameters without querying the elevator. |
---|
| 1053 | + * |
---|
| 1054 | + * Caller must ensure !blk_queue_nomerges(q) beforehand. |
---|
| 1055 | + */ |
---|
| 1056 | +bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
---|
| 1057 | + unsigned int nr_segs, struct request **same_queue_rq) |
---|
| 1058 | +{ |
---|
| 1059 | + struct blk_plug *plug; |
---|
| 1060 | + struct request *rq; |
---|
| 1061 | + struct list_head *plug_list; |
---|
| 1062 | + |
---|
| 1063 | + plug = blk_mq_plug(q, bio); |
---|
| 1064 | + if (!plug) |
---|
| 1065 | + return false; |
---|
| 1066 | + |
---|
| 1067 | + plug_list = &plug->mq_list; |
---|
| 1068 | + |
---|
| 1069 | + list_for_each_entry_reverse(rq, plug_list, queuelist) { |
---|
| 1070 | + if (rq->q == q && same_queue_rq) { |
---|
| 1071 | + /* |
---|
| 1072 | + * Only blk-mq multiple hardware queues case checks the |
---|
| 1073 | + * rq in the same queue, there should be only one such |
---|
| 1074 | + * rq in a queue |
---|
| 1075 | + **/ |
---|
| 1076 | + *same_queue_rq = rq; |
---|
| 1077 | + } |
---|
| 1078 | + |
---|
| 1079 | + if (rq->q != q) |
---|
| 1080 | + continue; |
---|
| 1081 | + |
---|
| 1082 | + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == |
---|
| 1083 | + BIO_MERGE_OK) |
---|
| 1084 | + return true; |
---|
| 1085 | + } |
---|
| 1086 | + |
---|
| 1087 | + return false; |
---|
| 1088 | +} |
---|
| 1089 | + |
---|
| 1090 | +/* |
---|
| 1091 | + * Iterate list of requests and see if we can merge this bio with any |
---|
| 1092 | + * of them. |
---|
| 1093 | + */ |
---|
| 1094 | +bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, |
---|
| 1095 | + struct bio *bio, unsigned int nr_segs) |
---|
| 1096 | +{ |
---|
| 1097 | + struct request *rq; |
---|
| 1098 | + int checked = 8; |
---|
| 1099 | + |
---|
| 1100 | + list_for_each_entry_reverse(rq, list, queuelist) { |
---|
| 1101 | + if (!checked--) |
---|
| 1102 | + break; |
---|
| 1103 | + |
---|
| 1104 | + switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) { |
---|
| 1105 | + case BIO_MERGE_NONE: |
---|
| 1106 | + continue; |
---|
| 1107 | + case BIO_MERGE_OK: |
---|
| 1108 | + return true; |
---|
| 1109 | + case BIO_MERGE_FAILED: |
---|
| 1110 | + return false; |
---|
| 1111 | + } |
---|
| 1112 | + |
---|
| 1113 | + } |
---|
| 1114 | + |
---|
| 1115 | + return false; |
---|
| 1116 | +} |
---|
| 1117 | +EXPORT_SYMBOL_GPL(blk_bio_list_merge); |
---|
| 1118 | + |
---|
| 1119 | +bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, |
---|
| 1120 | + unsigned int nr_segs, struct request **merged_request) |
---|
| 1121 | +{ |
---|
| 1122 | + struct request *rq; |
---|
| 1123 | + |
---|
| 1124 | + switch (elv_merge(q, &rq, bio)) { |
---|
| 1125 | + case ELEVATOR_BACK_MERGE: |
---|
| 1126 | + if (!blk_mq_sched_allow_merge(q, rq, bio)) |
---|
| 1127 | + return false; |
---|
| 1128 | + if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK) |
---|
| 1129 | + return false; |
---|
| 1130 | + *merged_request = attempt_back_merge(q, rq); |
---|
| 1131 | + if (!*merged_request) |
---|
| 1132 | + elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); |
---|
| 1133 | + return true; |
---|
| 1134 | + case ELEVATOR_FRONT_MERGE: |
---|
| 1135 | + if (!blk_mq_sched_allow_merge(q, rq, bio)) |
---|
| 1136 | + return false; |
---|
| 1137 | + if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK) |
---|
| 1138 | + return false; |
---|
| 1139 | + *merged_request = attempt_front_merge(q, rq); |
---|
| 1140 | + if (!*merged_request) |
---|
| 1141 | + elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); |
---|
| 1142 | + return true; |
---|
| 1143 | + case ELEVATOR_DISCARD_MERGE: |
---|
| 1144 | + return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK; |
---|
| 1145 | + default: |
---|
| 1146 | + return false; |
---|
| 1147 | + } |
---|
| 1148 | +} |
---|
| 1149 | +EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); |
---|