hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/block/blk-map.c
....@@ -11,6 +11,512 @@
1111
1212 #include "blk.h"
1313
14
+struct bio_map_data {
15
+ bool is_our_pages : 1;
16
+ bool is_null_mapped : 1;
17
+ struct iov_iter iter;
18
+ struct iovec iov[];
19
+};
20
+
21
+static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
22
+ gfp_t gfp_mask)
23
+{
24
+ struct bio_map_data *bmd;
25
+
26
+ if (data->nr_segs > UIO_MAXIOV)
27
+ return NULL;
28
+
29
+ bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
30
+ if (!bmd)
31
+ return NULL;
32
+ memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
33
+ bmd->iter = *data;
34
+ bmd->iter.iov = bmd->iov;
35
+ return bmd;
36
+}
37
+
38
+/**
39
+ * bio_copy_from_iter - copy all pages from iov_iter to bio
40
+ * @bio: The &struct bio which describes the I/O as destination
41
+ * @iter: iov_iter as source
42
+ *
43
+ * Copy all pages from iov_iter to bio.
44
+ * Returns 0 on success, or error on failure.
45
+ */
46
+static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
47
+{
48
+ struct bio_vec *bvec;
49
+ struct bvec_iter_all iter_all;
50
+
51
+ bio_for_each_segment_all(bvec, bio, iter_all) {
52
+ ssize_t ret;
53
+
54
+ ret = copy_page_from_iter(bvec->bv_page,
55
+ bvec->bv_offset,
56
+ bvec->bv_len,
57
+ iter);
58
+
59
+ if (!iov_iter_count(iter))
60
+ break;
61
+
62
+ if (ret < bvec->bv_len)
63
+ return -EFAULT;
64
+ }
65
+
66
+ return 0;
67
+}
68
+
69
+/**
70
+ * bio_copy_to_iter - copy all pages from bio to iov_iter
71
+ * @bio: The &struct bio which describes the I/O as source
72
+ * @iter: iov_iter as destination
73
+ *
74
+ * Copy all pages from bio to iov_iter.
75
+ * Returns 0 on success, or error on failure.
76
+ */
77
+static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
78
+{
79
+ struct bio_vec *bvec;
80
+ struct bvec_iter_all iter_all;
81
+
82
+ bio_for_each_segment_all(bvec, bio, iter_all) {
83
+ ssize_t ret;
84
+
85
+ ret = copy_page_to_iter(bvec->bv_page,
86
+ bvec->bv_offset,
87
+ bvec->bv_len,
88
+ &iter);
89
+
90
+ if (!iov_iter_count(&iter))
91
+ break;
92
+
93
+ if (ret < bvec->bv_len)
94
+ return -EFAULT;
95
+ }
96
+
97
+ return 0;
98
+}
99
+
100
+/**
101
+ * bio_uncopy_user - finish previously mapped bio
102
+ * @bio: bio being terminated
103
+ *
104
+ * Free pages allocated from bio_copy_user_iov() and write back data
105
+ * to user space in case of a read.
106
+ */
107
+static int bio_uncopy_user(struct bio *bio)
108
+{
109
+ struct bio_map_data *bmd = bio->bi_private;
110
+ int ret = 0;
111
+
112
+ if (!bmd->is_null_mapped) {
113
+ /*
114
+ * if we're in a workqueue, the request is orphaned, so
115
+ * don't copy into a random user address space, just free
116
+ * and return -EINTR so user space doesn't expect any data.
117
+ */
118
+ if (!current->mm)
119
+ ret = -EINTR;
120
+ else if (bio_data_dir(bio) == READ)
121
+ ret = bio_copy_to_iter(bio, bmd->iter);
122
+ if (bmd->is_our_pages)
123
+ bio_free_pages(bio);
124
+ }
125
+ kfree(bmd);
126
+ bio_put(bio);
127
+ return ret;
128
+}
129
+
130
+static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
131
+ struct iov_iter *iter, gfp_t gfp_mask)
132
+{
133
+ struct bio_map_data *bmd;
134
+ struct page *page;
135
+ struct bio *bio, *bounce_bio;
136
+ int i = 0, ret;
137
+ int nr_pages;
138
+ unsigned int len = iter->count;
139
+ unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
140
+
141
+ bmd = bio_alloc_map_data(iter, gfp_mask);
142
+ if (!bmd)
143
+ return -ENOMEM;
144
+
145
+ /*
146
+ * We need to do a deep copy of the iov_iter including the iovecs.
147
+ * The caller provided iov might point to an on-stack or otherwise
148
+ * shortlived one.
149
+ */
150
+ bmd->is_our_pages = !map_data;
151
+ bmd->is_null_mapped = (map_data && map_data->null_mapped);
152
+
153
+ nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
154
+ if (nr_pages > BIO_MAX_PAGES)
155
+ nr_pages = BIO_MAX_PAGES;
156
+
157
+ ret = -ENOMEM;
158
+ bio = bio_kmalloc(gfp_mask, nr_pages);
159
+ if (!bio)
160
+ goto out_bmd;
161
+ bio->bi_opf |= req_op(rq);
162
+
163
+ if (map_data) {
164
+ nr_pages = 1 << map_data->page_order;
165
+ i = map_data->offset / PAGE_SIZE;
166
+ }
167
+ while (len) {
168
+ unsigned int bytes = PAGE_SIZE;
169
+
170
+ bytes -= offset;
171
+
172
+ if (bytes > len)
173
+ bytes = len;
174
+
175
+ if (map_data) {
176
+ if (i == map_data->nr_entries * nr_pages) {
177
+ ret = -ENOMEM;
178
+ goto cleanup;
179
+ }
180
+
181
+ page = map_data->pages[i / nr_pages];
182
+ page += (i % nr_pages);
183
+
184
+ i++;
185
+ } else {
186
+ page = alloc_page(rq->q->bounce_gfp | gfp_mask);
187
+ if (!page) {
188
+ ret = -ENOMEM;
189
+ goto cleanup;
190
+ }
191
+ }
192
+
193
+ if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
194
+ if (!map_data)
195
+ __free_page(page);
196
+ break;
197
+ }
198
+
199
+ len -= bytes;
200
+ offset = 0;
201
+ }
202
+
203
+ if (map_data)
204
+ map_data->offset += bio->bi_iter.bi_size;
205
+
206
+ /*
207
+ * success
208
+ */
209
+ if ((iov_iter_rw(iter) == WRITE &&
210
+ (!map_data || !map_data->null_mapped)) ||
211
+ (map_data && map_data->from_user)) {
212
+ ret = bio_copy_from_iter(bio, iter);
213
+ if (ret)
214
+ goto cleanup;
215
+ } else {
216
+ if (bmd->is_our_pages)
217
+ zero_fill_bio(bio);
218
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
219
+ }
220
+
221
+ bio->bi_private = bmd;
222
+
223
+ bounce_bio = bio;
224
+ ret = blk_rq_append_bio(rq, &bounce_bio);
225
+ if (ret)
226
+ goto cleanup;
227
+
228
+ /*
229
+ * We link the bounce buffer in and could have to traverse it later, so
230
+ * we have to get a ref to prevent it from being freed
231
+ */
232
+ bio_get(bounce_bio);
233
+ return 0;
234
+cleanup:
235
+ if (!map_data)
236
+ bio_free_pages(bio);
237
+ bio_put(bio);
238
+out_bmd:
239
+ kfree(bmd);
240
+ return ret;
241
+}
242
+
243
+static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
244
+ gfp_t gfp_mask)
245
+{
246
+ unsigned int max_sectors = queue_max_hw_sectors(rq->q);
247
+ struct bio *bio, *bounce_bio;
248
+ int ret;
249
+ int j;
250
+
251
+ if (!iov_iter_count(iter))
252
+ return -EINVAL;
253
+
254
+ bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
255
+ if (!bio)
256
+ return -ENOMEM;
257
+ bio->bi_opf |= req_op(rq);
258
+
259
+ while (iov_iter_count(iter)) {
260
+ struct page **pages;
261
+ ssize_t bytes;
262
+ size_t offs, added = 0;
263
+ int npages;
264
+
265
+ bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
266
+ if (unlikely(bytes <= 0)) {
267
+ ret = bytes ? bytes : -EFAULT;
268
+ goto out_unmap;
269
+ }
270
+
271
+ npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
272
+
273
+ if (unlikely(offs & queue_dma_alignment(rq->q))) {
274
+ ret = -EINVAL;
275
+ j = 0;
276
+ } else {
277
+ for (j = 0; j < npages; j++) {
278
+ struct page *page = pages[j];
279
+ unsigned int n = PAGE_SIZE - offs;
280
+ bool same_page = false;
281
+
282
+ if (n > bytes)
283
+ n = bytes;
284
+
285
+ if (!bio_add_hw_page(rq->q, bio, page, n, offs,
286
+ max_sectors, &same_page)) {
287
+ if (same_page)
288
+ put_page(page);
289
+ break;
290
+ }
291
+
292
+ added += n;
293
+ bytes -= n;
294
+ offs = 0;
295
+ }
296
+ iov_iter_advance(iter, added);
297
+ }
298
+ /*
299
+ * release the pages we didn't map into the bio, if any
300
+ */
301
+ while (j < npages)
302
+ put_page(pages[j++]);
303
+ kvfree(pages);
304
+ /* couldn't stuff something into bio? */
305
+ if (bytes)
306
+ break;
307
+ }
308
+
309
+ /*
310
+ * Subtle: if we end up needing to bounce a bio, it would normally
311
+ * disappear when its bi_end_io is run. However, we need the original
312
+ * bio for the unmap, so grab an extra reference to it
313
+ */
314
+ bio_get(bio);
315
+
316
+ bounce_bio = bio;
317
+ ret = blk_rq_append_bio(rq, &bounce_bio);
318
+ if (ret)
319
+ goto out_put_orig;
320
+
321
+ /*
322
+ * We link the bounce buffer in and could have to traverse it
323
+ * later, so we have to get a ref to prevent it from being freed
324
+ */
325
+ bio_get(bounce_bio);
326
+ return 0;
327
+
328
+ out_put_orig:
329
+ bio_put(bio);
330
+ out_unmap:
331
+ bio_release_pages(bio, false);
332
+ bio_put(bio);
333
+ return ret;
334
+}
335
+
336
+/**
337
+ * bio_unmap_user - unmap a bio
338
+ * @bio: the bio being unmapped
339
+ *
340
+ * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
341
+ * process context.
342
+ *
343
+ * bio_unmap_user() may sleep.
344
+ */
345
+static void bio_unmap_user(struct bio *bio)
346
+{
347
+ bio_release_pages(bio, bio_data_dir(bio) == READ);
348
+ bio_put(bio);
349
+ bio_put(bio);
350
+}
351
+
352
+static void bio_invalidate_vmalloc_pages(struct bio *bio)
353
+{
354
+#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
355
+ if (bio->bi_private && !op_is_write(bio_op(bio))) {
356
+ unsigned long i, len = 0;
357
+
358
+ for (i = 0; i < bio->bi_vcnt; i++)
359
+ len += bio->bi_io_vec[i].bv_len;
360
+ invalidate_kernel_vmap_range(bio->bi_private, len);
361
+ }
362
+#endif
363
+}
364
+
365
+static void bio_map_kern_endio(struct bio *bio)
366
+{
367
+ bio_invalidate_vmalloc_pages(bio);
368
+ bio_put(bio);
369
+}
370
+
371
+/**
372
+ * bio_map_kern - map kernel address into bio
373
+ * @q: the struct request_queue for the bio
374
+ * @data: pointer to buffer to map
375
+ * @len: length in bytes
376
+ * @gfp_mask: allocation flags for bio allocation
377
+ *
378
+ * Map the kernel address into a bio suitable for io to a block
379
+ * device. Returns an error pointer in case of error.
380
+ */
381
+static struct bio *bio_map_kern(struct request_queue *q, void *data,
382
+ unsigned int len, gfp_t gfp_mask)
383
+{
384
+ unsigned long kaddr = (unsigned long)data;
385
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
386
+ unsigned long start = kaddr >> PAGE_SHIFT;
387
+ const int nr_pages = end - start;
388
+ bool is_vmalloc = is_vmalloc_addr(data);
389
+ struct page *page;
390
+ int offset, i;
391
+ struct bio *bio;
392
+
393
+ bio = bio_kmalloc(gfp_mask, nr_pages);
394
+ if (!bio)
395
+ return ERR_PTR(-ENOMEM);
396
+
397
+ if (is_vmalloc) {
398
+ flush_kernel_vmap_range(data, len);
399
+ bio->bi_private = data;
400
+ }
401
+
402
+ offset = offset_in_page(kaddr);
403
+ for (i = 0; i < nr_pages; i++) {
404
+ unsigned int bytes = PAGE_SIZE - offset;
405
+
406
+ if (len <= 0)
407
+ break;
408
+
409
+ if (bytes > len)
410
+ bytes = len;
411
+
412
+ if (!is_vmalloc)
413
+ page = virt_to_page(data);
414
+ else
415
+ page = vmalloc_to_page(data);
416
+ if (bio_add_pc_page(q, bio, page, bytes,
417
+ offset) < bytes) {
418
+ /* we don't support partial mappings */
419
+ bio_put(bio);
420
+ return ERR_PTR(-EINVAL);
421
+ }
422
+
423
+ data += bytes;
424
+ len -= bytes;
425
+ offset = 0;
426
+ }
427
+
428
+ bio->bi_end_io = bio_map_kern_endio;
429
+ return bio;
430
+}
431
+
432
+static void bio_copy_kern_endio(struct bio *bio)
433
+{
434
+ bio_free_pages(bio);
435
+ bio_put(bio);
436
+}
437
+
438
+static void bio_copy_kern_endio_read(struct bio *bio)
439
+{
440
+ char *p = bio->bi_private;
441
+ struct bio_vec *bvec;
442
+ struct bvec_iter_all iter_all;
443
+
444
+ bio_for_each_segment_all(bvec, bio, iter_all) {
445
+ memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
446
+ p += bvec->bv_len;
447
+ }
448
+
449
+ bio_copy_kern_endio(bio);
450
+}
451
+
452
+/**
453
+ * bio_copy_kern - copy kernel address into bio
454
+ * @q: the struct request_queue for the bio
455
+ * @data: pointer to buffer to copy
456
+ * @len: length in bytes
457
+ * @gfp_mask: allocation flags for bio and page allocation
458
+ * @reading: data direction is READ
459
+ *
460
+ * copy the kernel address into a bio suitable for io to a block
461
+ * device. Returns an error pointer in case of error.
462
+ */
463
+static struct bio *bio_copy_kern(struct request_queue *q, void *data,
464
+ unsigned int len, gfp_t gfp_mask, int reading)
465
+{
466
+ unsigned long kaddr = (unsigned long)data;
467
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
468
+ unsigned long start = kaddr >> PAGE_SHIFT;
469
+ struct bio *bio;
470
+ void *p = data;
471
+ int nr_pages = 0;
472
+
473
+ /*
474
+ * Overflow, abort
475
+ */
476
+ if (end < start)
477
+ return ERR_PTR(-EINVAL);
478
+
479
+ nr_pages = end - start;
480
+ bio = bio_kmalloc(gfp_mask, nr_pages);
481
+ if (!bio)
482
+ return ERR_PTR(-ENOMEM);
483
+
484
+ while (len) {
485
+ struct page *page;
486
+ unsigned int bytes = PAGE_SIZE;
487
+
488
+ if (bytes > len)
489
+ bytes = len;
490
+
491
+ page = alloc_page(q->bounce_gfp | __GFP_ZERO | gfp_mask);
492
+ if (!page)
493
+ goto cleanup;
494
+
495
+ if (!reading)
496
+ memcpy(page_address(page), p, bytes);
497
+
498
+ if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
499
+ break;
500
+
501
+ len -= bytes;
502
+ p += bytes;
503
+ }
504
+
505
+ if (reading) {
506
+ bio->bi_end_io = bio_copy_kern_endio_read;
507
+ bio->bi_private = data;
508
+ } else {
509
+ bio->bi_end_io = bio_copy_kern_endio;
510
+ }
511
+
512
+ return bio;
513
+
514
+cleanup:
515
+ bio_free_pages(bio);
516
+ bio_put(bio);
517
+ return ERR_PTR(-ENOMEM);
518
+}
519
+
14520 /*
15521 * Append a bio to a passthrough request. Only works if the bio can be merged
16522 * into the request based on the driver constraints.
....@@ -18,13 +524,19 @@
18524 int blk_rq_append_bio(struct request *rq, struct bio **bio)
19525 {
20526 struct bio *orig_bio = *bio;
527
+ struct bvec_iter iter;
528
+ struct bio_vec bv;
529
+ unsigned int nr_segs = 0;
21530
22531 blk_queue_bounce(rq->q, bio);
23532
533
+ bio_for_each_bvec(bv, *bio, iter)
534
+ nr_segs++;
535
+
24536 if (!rq->bio) {
25
- blk_rq_bio_prep(rq->q, rq, *bio);
537
+ blk_rq_bio_prep(rq, *bio, nr_segs);
26538 } else {
27
- if (!ll_back_merge_fn(rq->q, rq, *bio)) {
539
+ if (!ll_back_merge_fn(rq, *bio, nr_segs)) {
28540 if (orig_bio != *bio) {
29541 bio_put(*bio);
30542 *bio = orig_bio;
....@@ -35,60 +547,12 @@
35547 rq->biotail->bi_next = *bio;
36548 rq->biotail = *bio;
37549 rq->__data_len += (*bio)->bi_iter.bi_size;
550
+ bio_crypt_free_ctx(*bio);
38551 }
39552
40553 return 0;
41554 }
42555 EXPORT_SYMBOL(blk_rq_append_bio);
43
-
44
-static int __blk_rq_unmap_user(struct bio *bio)
45
-{
46
- int ret = 0;
47
-
48
- if (bio) {
49
- if (bio_flagged(bio, BIO_USER_MAPPED))
50
- bio_unmap_user(bio);
51
- else
52
- ret = bio_uncopy_user(bio);
53
- }
54
-
55
- return ret;
56
-}
57
-
58
-static int __blk_rq_map_user_iov(struct request *rq,
59
- struct rq_map_data *map_data, struct iov_iter *iter,
60
- gfp_t gfp_mask, bool copy)
61
-{
62
- struct request_queue *q = rq->q;
63
- struct bio *bio, *orig_bio;
64
- int ret;
65
-
66
- if (copy)
67
- bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
68
- else
69
- bio = bio_map_user_iov(q, iter, gfp_mask);
70
-
71
- if (IS_ERR(bio))
72
- return PTR_ERR(bio);
73
-
74
- bio->bi_opf &= ~REQ_OP_MASK;
75
- bio->bi_opf |= req_op(rq);
76
-
77
- orig_bio = bio;
78
-
79
- /*
80
- * We link the bounce buffer in and could have to traverse it
81
- * later so we have to get a ref to prevent it from being freed
82
- */
83
- ret = blk_rq_append_bio(rq, &bio);
84
- if (ret) {
85
- __blk_rq_unmap_user(orig_bio);
86
- return ret;
87
- }
88
- bio_get(bio);
89
-
90
- return 0;
91
-}
92556
93557 /**
94558 * blk_rq_map_user_iov - map user data to a request, for passthrough requests
....@@ -133,15 +597,16 @@
133597
134598 i = *iter;
135599 do {
136
- ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
600
+ if (copy)
601
+ ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
602
+ else
603
+ ret = bio_map_user_iov(rq, &i, gfp_mask);
137604 if (ret)
138605 goto unmap_rq;
139606 if (!bio)
140607 bio = rq->bio;
141608 } while (iov_iter_count(&i));
142609
143
- if (!bio_flagged(bio, BIO_USER_MAPPED))
144
- rq->rq_flags |= RQF_COPY_USER;
145610 return 0;
146611
147612 unmap_rq:
....@@ -186,9 +651,13 @@
186651 if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
187652 mapped_bio = bio->bi_private;
188653
189
- ret2 = __blk_rq_unmap_user(mapped_bio);
190
- if (ret2 && !ret)
191
- ret = ret2;
654
+ if (bio->bi_private) {
655
+ ret2 = bio_uncopy_user(mapped_bio);
656
+ if (ret2 && !ret)
657
+ ret = ret2;
658
+ } else {
659
+ bio_unmap_user(mapped_bio);
660
+ }
192661
193662 mapped_bio = bio;
194663 bio = bio->bi_next;
....@@ -217,7 +686,6 @@
217686 {
218687 int reading = rq_data_dir(rq) == READ;
219688 unsigned long addr = (unsigned long) kbuf;
220
- int do_copy = 0;
221689 struct bio *bio, *orig_bio;
222690 int ret;
223691
....@@ -226,8 +694,7 @@
226694 if (!len || !kbuf)
227695 return -EINVAL;
228696
229
- do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
230
- if (do_copy)
697
+ if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf))
231698 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
232699 else
233700 bio = bio_map_kern(q, kbuf, len, gfp_mask);
....@@ -237,9 +704,6 @@
237704
238705 bio->bi_opf &= ~REQ_OP_MASK;
239706 bio->bi_opf |= req_op(rq);
240
-
241
- if (do_copy)
242
- rq->rq_flags |= RQF_COPY_USER;
243707
244708 orig_bio = bio;
245709 ret = blk_rq_append_bio(rq, &bio);