hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/ext4/page-io.c
....@@ -31,18 +31,56 @@
3131 #include "acl.h"
3232
3333 static struct kmem_cache *io_end_cachep;
34
+static struct kmem_cache *io_end_vec_cachep;
3435
3536 int __init ext4_init_pageio(void)
3637 {
3738 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
3839 if (io_end_cachep == NULL)
3940 return -ENOMEM;
41
+
42
+ io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
43
+ if (io_end_vec_cachep == NULL) {
44
+ kmem_cache_destroy(io_end_cachep);
45
+ return -ENOMEM;
46
+ }
4047 return 0;
4148 }
4249
4350 void ext4_exit_pageio(void)
4451 {
4552 kmem_cache_destroy(io_end_cachep);
53
+ kmem_cache_destroy(io_end_vec_cachep);
54
+}
55
+
56
+struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
57
+{
58
+ struct ext4_io_end_vec *io_end_vec;
59
+
60
+ io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
61
+ if (!io_end_vec)
62
+ return ERR_PTR(-ENOMEM);
63
+ INIT_LIST_HEAD(&io_end_vec->list);
64
+ list_add_tail(&io_end_vec->list, &io_end->list_vec);
65
+ return io_end_vec;
66
+}
67
+
68
+static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
69
+{
70
+ struct ext4_io_end_vec *io_end_vec, *tmp;
71
+
72
+ if (list_empty(&io_end->list_vec))
73
+ return;
74
+ list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
75
+ list_del(&io_end_vec->list);
76
+ kmem_cache_free(io_end_vec_cachep, io_end_vec);
77
+ }
78
+}
79
+
80
+struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
81
+{
82
+ BUG_ON(list_empty(&io_end->list_vec));
83
+ return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
4684 }
4785
4886 /*
....@@ -61,10 +99,10 @@
6199
62100 static void ext4_finish_bio(struct bio *bio)
63101 {
64
- int i;
65102 struct bio_vec *bvec;
103
+ struct bvec_iter_all iter_all;
66104
67
- bio_for_each_segment_all(bvec, bio, i) {
105
+ bio_for_each_segment_all(bvec, bio, iter_all) {
68106 struct page *page = bvec->bv_page;
69107 struct page *bounce_page = NULL;
70108 struct buffer_head *bh, *head;
....@@ -87,10 +125,10 @@
87125 }
88126 bh = head = page_buffers(page);
89127 /*
90
- * We check all buffers in the page under BH_Uptodate_Lock
128
+ * We check all buffers in the page under b_uptodate_lock
91129 * to avoid races with other end io clearing async_write flags
92130 */
93
- flags = bh_uptodate_lock_irqsave(head);
131
+ spin_lock_irqsave(&head->b_uptodate_lock, flags);
94132 do {
95133 if (bh_offset(bh) < bio_start ||
96134 bh_offset(bh) + bh->b_size > bio_end) {
....@@ -99,10 +137,12 @@
99137 continue;
100138 }
101139 clear_buffer_async_write(bh);
102
- if (bio->bi_status)
140
+ if (bio->bi_status) {
141
+ set_buffer_write_io_error(bh);
103142 buffer_io_error(bh);
143
+ }
104144 } while ((bh = bh->b_this_page) != head);
105
- bh_uptodate_unlock_irqrestore(head, flags);
145
+ spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
106146 if (!under_io) {
107147 fscrypt_free_bounce_page(bounce_page);
108148 end_page_writeback(page);
....@@ -123,6 +163,7 @@
123163 ext4_finish_bio(bio);
124164 bio_put(bio);
125165 }
166
+ ext4_free_io_end_vec(io_end);
126167 kmem_cache_free(io_end_cachep, io_end);
127168 }
128169
....@@ -134,29 +175,26 @@
134175 * cannot get to ext4_ext_truncate() before all IOs overlapping that range are
135176 * completed (happens from ext4_free_ioend()).
136177 */
137
-static int ext4_end_io(ext4_io_end_t *io)
178
+static int ext4_end_io_end(ext4_io_end_t *io_end)
138179 {
139
- struct inode *inode = io->inode;
140
- loff_t offset = io->offset;
141
- ssize_t size = io->size;
142
- handle_t *handle = io->handle;
180
+ struct inode *inode = io_end->inode;
181
+ handle_t *handle = io_end->handle;
143182 int ret = 0;
144183
145
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
184
+ ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
146185 "list->prev 0x%p\n",
147
- io, inode->i_ino, io->list.next, io->list.prev);
186
+ io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
148187
149
- io->handle = NULL; /* Following call will use up the handle */
150
- ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
188
+ io_end->handle = NULL; /* Following call will use up the handle */
189
+ ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
151190 if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
152191 ext4_msg(inode->i_sb, KERN_EMERG,
153192 "failed to convert unwritten extents to written "
154193 "extents -- potential data loss! "
155
- "(inode %lu, offset %llu, size %zd, error %d)",
156
- inode->i_ino, offset, size, ret);
194
+ "(inode %lu, error %d)", inode->i_ino, ret);
157195 }
158
- ext4_clear_io_unwritten_flag(io);
159
- ext4_release_io_end(io);
196
+ ext4_clear_io_unwritten_flag(io_end);
197
+ ext4_release_io_end(io_end);
160198 return ret;
161199 }
162200
....@@ -164,21 +202,21 @@
164202 {
165203 #ifdef EXT4FS_DEBUG
166204 struct list_head *cur, *before, *after;
167
- ext4_io_end_t *io, *io0, *io1;
205
+ ext4_io_end_t *io_end, *io_end0, *io_end1;
168206
169207 if (list_empty(head))
170208 return;
171209
172210 ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
173
- list_for_each_entry(io, head, list) {
174
- cur = &io->list;
211
+ list_for_each_entry(io_end, head, list) {
212
+ cur = &io_end->list;
175213 before = cur->prev;
176
- io0 = container_of(before, ext4_io_end_t, list);
214
+ io_end0 = container_of(before, ext4_io_end_t, list);
177215 after = cur->next;
178
- io1 = container_of(after, ext4_io_end_t, list);
216
+ io_end1 = container_of(after, ext4_io_end_t, list);
179217
180218 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
181
- io, inode->i_ino, io0, io1);
219
+ io_end, inode->i_ino, io_end0, io_end1);
182220 }
183221 #endif
184222 }
....@@ -205,7 +243,7 @@
205243 static int ext4_do_flush_completed_IO(struct inode *inode,
206244 struct list_head *head)
207245 {
208
- ext4_io_end_t *io;
246
+ ext4_io_end_t *io_end;
209247 struct list_head unwritten;
210248 unsigned long flags;
211249 struct ext4_inode_info *ei = EXT4_I(inode);
....@@ -217,11 +255,11 @@
217255 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
218256
219257 while (!list_empty(&unwritten)) {
220
- io = list_entry(unwritten.next, ext4_io_end_t, list);
221
- BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
222
- list_del_init(&io->list);
258
+ io_end = list_entry(unwritten.next, ext4_io_end_t, list);
259
+ BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
260
+ list_del_init(&io_end->list);
223261
224
- err = ext4_end_io(io);
262
+ err = ext4_end_io_end(io_end);
225263 if (unlikely(!ret && err))
226264 ret = err;
227265 }
....@@ -240,19 +278,22 @@
240278
241279 ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
242280 {
243
- ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
244
- if (io) {
245
- io->inode = inode;
246
- INIT_LIST_HEAD(&io->list);
247
- atomic_set(&io->count, 1);
281
+ ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags);
282
+
283
+ if (io_end) {
284
+ io_end->inode = inode;
285
+ INIT_LIST_HEAD(&io_end->list);
286
+ INIT_LIST_HEAD(&io_end->list_vec);
287
+ atomic_set(&io_end->count, 1);
248288 }
249
- return io;
289
+ return io_end;
250290 }
251291
252292 void ext4_put_io_end_defer(ext4_io_end_t *io_end)
253293 {
254294 if (atomic_dec_and_test(&io_end->count)) {
255
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
295
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
296
+ list_empty(&io_end->list_vec)) {
256297 ext4_release_io_end(io_end);
257298 return;
258299 }
....@@ -266,9 +307,8 @@
266307
267308 if (atomic_dec_and_test(&io_end->count)) {
268309 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
269
- err = ext4_convert_unwritten_extents(io_end->handle,
270
- io_end->inode, io_end->offset,
271
- io_end->size);
310
+ err = ext4_convert_unwritten_io_end_vec(io_end->handle,
311
+ io_end);
272312 io_end->handle = NULL;
273313 ext4_clear_io_unwritten_flag(io_end);
274314 }
....@@ -305,10 +345,8 @@
305345 struct inode *inode = io_end->inode;
306346
307347 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
308
- "(offset %llu size %ld starting block %llu)",
348
+ "starting block %llu)",
309349 bio->bi_status, inode->i_ino,
310
- (unsigned long long) io_end->offset,
311
- (long) io_end->size,
312350 (unsigned long long)
313351 bi_sector >> (inode->i_blkbits - 9));
314352 mapping_set_error(inode->i_mapping,
....@@ -356,29 +394,31 @@
356394 io->io_end = NULL;
357395 }
358396
359
-static int io_submit_init_bio(struct ext4_io_submit *io,
360
- struct buffer_head *bh)
397
+static void io_submit_init_bio(struct ext4_io_submit *io,
398
+ struct buffer_head *bh)
361399 {
362400 struct bio *bio;
363401
402
+ /*
403
+ * bio_alloc will _always_ be able to allocate a bio if
404
+ * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
405
+ */
364406 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
365
- if (!bio)
366
- return -ENOMEM;
367407 fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
368
- wbc_init_bio(io->io_wbc, bio);
369408 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
370409 bio_set_dev(bio, bh->b_bdev);
371410 bio->bi_end_io = ext4_end_bio;
372411 bio->bi_private = ext4_get_io_end(io->io_end);
373412 io->io_bio = bio;
374413 io->io_next_block = bh->b_blocknr;
375
- return 0;
414
+ wbc_init_bio(io->io_wbc, bio);
376415 }
377416
378
-static int io_submit_add_bh(struct ext4_io_submit *io,
379
- struct inode *inode,
380
- struct page *page,
381
- struct buffer_head *bh)
417
+static void io_submit_add_bh(struct ext4_io_submit *io,
418
+ struct inode *inode,
419
+ struct page *pagecache_page,
420
+ struct page *bounce_page,
421
+ struct buffer_head *bh)
382422 {
383423 int ret;
384424
....@@ -388,17 +428,15 @@
388428 ext4_io_submit(io);
389429 }
390430 if (io->io_bio == NULL) {
391
- ret = io_submit_init_bio(io, bh);
392
- if (ret)
393
- return ret;
431
+ io_submit_init_bio(io, bh);
394432 io->io_bio->bi_write_hint = inode->i_write_hint;
395433 }
396
- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
434
+ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
435
+ bh->b_size, bh_offset(bh));
397436 if (ret != bh->b_size)
398437 goto submit_and_retry;
399
- wbc_account_io(io->io_wbc, page, bh->b_size);
438
+ wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
400439 io->io_next_block++;
401
- return 0;
402440 }
403441
404442 int ext4_bio_write_page(struct ext4_io_submit *io,
....@@ -459,18 +497,24 @@
459497 ext4_io_submit(io);
460498 continue;
461499 }
462
- if (buffer_new(bh)) {
500
+ if (buffer_new(bh))
463501 clear_buffer_new(bh);
464
- clean_bdev_bh_alias(bh);
465
- }
466502 set_buffer_async_write(bh);
467503 nr_to_submit++;
468504 } while ((bh = bh->b_this_page) != head);
469505
470506 bh = head = page_buffers(page);
471507
508
+ /*
509
+ * If any blocks are being written to an encrypted file, encrypt them
510
+ * into a bounce page. For simplicity, just encrypt until the last
511
+ * block which might be needed. This may cause some unneeded blocks
512
+ * (e.g. holes) to be unnecessarily encrypted, but this is rare and
513
+ * can't happen in the common case of blocksize == PAGE_SIZE.
514
+ */
472515 if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) {
473516 gfp_t gfp_flags = GFP_NOFS;
517
+ unsigned int enc_bytes = round_up(len, i_blocksize(inode));
474518
475519 /*
476520 * Since bounce page allocation uses a mempool, we can only use
....@@ -480,7 +524,7 @@
480524 if (io->io_bio)
481525 gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
482526 retry_encrypt:
483
- bounce_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE,
527
+ bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
484528 0, gfp_flags);
485529 if (IS_ERR(bounce_page)) {
486530 ret = PTR_ERR(bounce_page);
....@@ -494,8 +538,14 @@
494538 congestion_wait(BLK_RW_ASYNC, HZ/50);
495539 goto retry_encrypt;
496540 }
497
- bounce_page = NULL;
498
- goto out;
541
+
542
+ printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
543
+ redirty_page_for_writepage(wbc, page);
544
+ do {
545
+ clear_buffer_async_write(bh);
546
+ bh = bh->b_this_page;
547
+ } while (bh != head);
548
+ goto unlock;
499549 }
500550 }
501551
....@@ -503,30 +553,12 @@
503553 do {
504554 if (!buffer_async_write(bh))
505555 continue;
506
- ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
507
- if (ret) {
508
- /*
509
- * We only get here on ENOMEM. Not much else
510
- * we can do but mark the page as dirty, and
511
- * better luck next time.
512
- */
513
- break;
514
- }
556
+ io_submit_add_bh(io, inode, page, bounce_page, bh);
515557 nr_submitted++;
516558 clear_buffer_dirty(bh);
517559 } while ((bh = bh->b_this_page) != head);
518560
519
- /* Error stopped previous loop? Clean up buffers... */
520
- if (ret) {
521
- out:
522
- fscrypt_free_bounce_page(bounce_page);
523
- printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
524
- redirty_page_for_writepage(wbc, page);
525
- do {
526
- clear_buffer_async_write(bh);
527
- bh = bh->b_this_page;
528
- } while (bh != head);
529
- }
561
+unlock:
530562 unlock_page(page);
531563 /* Nothing submitted - we have to end page writeback */
532564 if (!nr_submitted)