hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/ext4/page-io.c
....@@ -31,18 +31,56 @@
3131 #include "acl.h"
3232
3333 static struct kmem_cache *io_end_cachep;
34
+static struct kmem_cache *io_end_vec_cachep;
3435
3536 int __init ext4_init_pageio(void)
3637 {
3738 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
3839 if (io_end_cachep == NULL)
3940 return -ENOMEM;
41
+
42
+ io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
43
+ if (io_end_vec_cachep == NULL) {
44
+ kmem_cache_destroy(io_end_cachep);
45
+ return -ENOMEM;
46
+ }
4047 return 0;
4148 }
4249
4350 void ext4_exit_pageio(void)
4451 {
4552 kmem_cache_destroy(io_end_cachep);
53
+ kmem_cache_destroy(io_end_vec_cachep);
54
+}
55
+
56
+struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
57
+{
58
+ struct ext4_io_end_vec *io_end_vec;
59
+
60
+ io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
61
+ if (!io_end_vec)
62
+ return ERR_PTR(-ENOMEM);
63
+ INIT_LIST_HEAD(&io_end_vec->list);
64
+ list_add_tail(&io_end_vec->list, &io_end->list_vec);
65
+ return io_end_vec;
66
+}
67
+
68
+static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
69
+{
70
+ struct ext4_io_end_vec *io_end_vec, *tmp;
71
+
72
+ if (list_empty(&io_end->list_vec))
73
+ return;
74
+ list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
75
+ list_del(&io_end_vec->list);
76
+ kmem_cache_free(io_end_vec_cachep, io_end_vec);
77
+ }
78
+}
79
+
80
+struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
81
+{
82
+ BUG_ON(list_empty(&io_end->list_vec));
83
+ return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
4684 }
4785
4886 /*
....@@ -61,10 +99,10 @@
6199
62100 static void ext4_finish_bio(struct bio *bio)
63101 {
64
- int i;
65102 struct bio_vec *bvec;
103
+ struct bvec_iter_all iter_all;
66104
67
- bio_for_each_segment_all(bvec, bio, i) {
105
+ bio_for_each_segment_all(bvec, bio, iter_all) {
68106 struct page *page = bvec->bv_page;
69107 struct page *bounce_page = NULL;
70108 struct buffer_head *bh, *head;
....@@ -87,11 +125,10 @@
87125 }
88126 bh = head = page_buffers(page);
89127 /*
90
- * We check all buffers in the page under BH_Uptodate_Lock
128
+ * We check all buffers in the page under b_uptodate_lock
91129 * to avoid races with other end io clearing async_write flags
92130 */
93
- local_irq_save(flags);
94
- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
131
+ spin_lock_irqsave(&head->b_uptodate_lock, flags);
95132 do {
96133 if (bh_offset(bh) < bio_start ||
97134 bh_offset(bh) + bh->b_size > bio_end) {
....@@ -100,11 +137,12 @@
100137 continue;
101138 }
102139 clear_buffer_async_write(bh);
103
- if (bio->bi_status)
140
+ if (bio->bi_status) {
141
+ set_buffer_write_io_error(bh);
104142 buffer_io_error(bh);
143
+ }
105144 } while ((bh = bh->b_this_page) != head);
106
- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
107
- local_irq_restore(flags);
145
+ spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
108146 if (!under_io) {
109147 fscrypt_free_bounce_page(bounce_page);
110148 end_page_writeback(page);
....@@ -125,6 +163,7 @@
125163 ext4_finish_bio(bio);
126164 bio_put(bio);
127165 }
166
+ ext4_free_io_end_vec(io_end);
128167 kmem_cache_free(io_end_cachep, io_end);
129168 }
130169
....@@ -136,29 +175,26 @@
136175 * cannot get to ext4_ext_truncate() before all IOs overlapping that range are
137176 * completed (happens from ext4_free_ioend()).
138177 */
139
-static int ext4_end_io(ext4_io_end_t *io)
178
+static int ext4_end_io_end(ext4_io_end_t *io_end)
140179 {
141
- struct inode *inode = io->inode;
142
- loff_t offset = io->offset;
143
- ssize_t size = io->size;
144
- handle_t *handle = io->handle;
180
+ struct inode *inode = io_end->inode;
181
+ handle_t *handle = io_end->handle;
145182 int ret = 0;
146183
147
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
184
+ ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
148185 "list->prev 0x%p\n",
149
- io, inode->i_ino, io->list.next, io->list.prev);
186
+ io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
150187
151
- io->handle = NULL; /* Following call will use up the handle */
152
- ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
188
+ io_end->handle = NULL; /* Following call will use up the handle */
189
+ ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
153190 if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
154191 ext4_msg(inode->i_sb, KERN_EMERG,
155192 "failed to convert unwritten extents to written "
156193 "extents -- potential data loss! "
157
- "(inode %lu, offset %llu, size %zd, error %d)",
158
- inode->i_ino, offset, size, ret);
194
+ "(inode %lu, error %d)", inode->i_ino, ret);
159195 }
160
- ext4_clear_io_unwritten_flag(io);
161
- ext4_release_io_end(io);
196
+ ext4_clear_io_unwritten_flag(io_end);
197
+ ext4_release_io_end(io_end);
162198 return ret;
163199 }
164200
....@@ -166,21 +202,21 @@
166202 {
167203 #ifdef EXT4FS_DEBUG
168204 struct list_head *cur, *before, *after;
169
- ext4_io_end_t *io, *io0, *io1;
205
+ ext4_io_end_t *io_end, *io_end0, *io_end1;
170206
171207 if (list_empty(head))
172208 return;
173209
174210 ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
175
- list_for_each_entry(io, head, list) {
176
- cur = &io->list;
211
+ list_for_each_entry(io_end, head, list) {
212
+ cur = &io_end->list;
177213 before = cur->prev;
178
- io0 = container_of(before, ext4_io_end_t, list);
214
+ io_end0 = container_of(before, ext4_io_end_t, list);
179215 after = cur->next;
180
- io1 = container_of(after, ext4_io_end_t, list);
216
+ io_end1 = container_of(after, ext4_io_end_t, list);
181217
182218 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
183
- io, inode->i_ino, io0, io1);
219
+ io_end, inode->i_ino, io_end0, io_end1);
184220 }
185221 #endif
186222 }
....@@ -207,7 +243,7 @@
207243 static int ext4_do_flush_completed_IO(struct inode *inode,
208244 struct list_head *head)
209245 {
210
- ext4_io_end_t *io;
246
+ ext4_io_end_t *io_end;
211247 struct list_head unwritten;
212248 unsigned long flags;
213249 struct ext4_inode_info *ei = EXT4_I(inode);
....@@ -219,11 +255,11 @@
219255 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
220256
221257 while (!list_empty(&unwritten)) {
222
- io = list_entry(unwritten.next, ext4_io_end_t, list);
223
- BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
224
- list_del_init(&io->list);
258
+ io_end = list_entry(unwritten.next, ext4_io_end_t, list);
259
+ BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
260
+ list_del_init(&io_end->list);
225261
226
- err = ext4_end_io(io);
262
+ err = ext4_end_io_end(io_end);
227263 if (unlikely(!ret && err))
228264 ret = err;
229265 }
....@@ -242,19 +278,22 @@
242278
243279 ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
244280 {
245
- ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
246
- if (io) {
247
- io->inode = inode;
248
- INIT_LIST_HEAD(&io->list);
249
- atomic_set(&io->count, 1);
281
+ ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags);
282
+
283
+ if (io_end) {
284
+ io_end->inode = inode;
285
+ INIT_LIST_HEAD(&io_end->list);
286
+ INIT_LIST_HEAD(&io_end->list_vec);
287
+ atomic_set(&io_end->count, 1);
250288 }
251
- return io;
289
+ return io_end;
252290 }
253291
254292 void ext4_put_io_end_defer(ext4_io_end_t *io_end)
255293 {
256294 if (atomic_dec_and_test(&io_end->count)) {
257
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
295
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
296
+ list_empty(&io_end->list_vec)) {
258297 ext4_release_io_end(io_end);
259298 return;
260299 }
....@@ -268,9 +307,8 @@
268307
269308 if (atomic_dec_and_test(&io_end->count)) {
270309 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
271
- err = ext4_convert_unwritten_extents(io_end->handle,
272
- io_end->inode, io_end->offset,
273
- io_end->size);
310
+ err = ext4_convert_unwritten_io_end_vec(io_end->handle,
311
+ io_end);
274312 io_end->handle = NULL;
275313 ext4_clear_io_unwritten_flag(io_end);
276314 }
....@@ -307,10 +345,8 @@
307345 struct inode *inode = io_end->inode;
308346
309347 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
310
- "(offset %llu size %ld starting block %llu)",
348
+ "starting block %llu)",
311349 bio->bi_status, inode->i_ino,
312
- (unsigned long long) io_end->offset,
313
- (long) io_end->size,
314350 (unsigned long long)
315351 bi_sector >> (inode->i_blkbits - 9));
316352 mapping_set_error(inode->i_mapping,
....@@ -358,29 +394,31 @@
358394 io->io_end = NULL;
359395 }
360396
361
-static int io_submit_init_bio(struct ext4_io_submit *io,
362
- struct buffer_head *bh)
397
+static void io_submit_init_bio(struct ext4_io_submit *io,
398
+ struct buffer_head *bh)
363399 {
364400 struct bio *bio;
365401
402
+ /*
403
+ * bio_alloc will _always_ be able to allocate a bio if
404
+ * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
405
+ */
366406 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
367
- if (!bio)
368
- return -ENOMEM;
369407 fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
370
- wbc_init_bio(io->io_wbc, bio);
371408 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
372409 bio_set_dev(bio, bh->b_bdev);
373410 bio->bi_end_io = ext4_end_bio;
374411 bio->bi_private = ext4_get_io_end(io->io_end);
375412 io->io_bio = bio;
376413 io->io_next_block = bh->b_blocknr;
377
- return 0;
414
+ wbc_init_bio(io->io_wbc, bio);
378415 }
379416
380
-static int io_submit_add_bh(struct ext4_io_submit *io,
381
- struct inode *inode,
382
- struct page *page,
383
- struct buffer_head *bh)
417
+static void io_submit_add_bh(struct ext4_io_submit *io,
418
+ struct inode *inode,
419
+ struct page *pagecache_page,
420
+ struct page *bounce_page,
421
+ struct buffer_head *bh)
384422 {
385423 int ret;
386424
....@@ -390,17 +428,15 @@
390428 ext4_io_submit(io);
391429 }
392430 if (io->io_bio == NULL) {
393
- ret = io_submit_init_bio(io, bh);
394
- if (ret)
395
- return ret;
431
+ io_submit_init_bio(io, bh);
396432 io->io_bio->bi_write_hint = inode->i_write_hint;
397433 }
398
- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
434
+ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
435
+ bh->b_size, bh_offset(bh));
399436 if (ret != bh->b_size)
400437 goto submit_and_retry;
401
- wbc_account_io(io->io_wbc, page, bh->b_size);
438
+ wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
402439 io->io_next_block++;
403
- return 0;
404440 }
405441
406442 int ext4_bio_write_page(struct ext4_io_submit *io,
....@@ -461,18 +497,24 @@
461497 ext4_io_submit(io);
462498 continue;
463499 }
464
- if (buffer_new(bh)) {
500
+ if (buffer_new(bh))
465501 clear_buffer_new(bh);
466
- clean_bdev_bh_alias(bh);
467
- }
468502 set_buffer_async_write(bh);
469503 nr_to_submit++;
470504 } while ((bh = bh->b_this_page) != head);
471505
472506 bh = head = page_buffers(page);
473507
508
+ /*
509
+ * If any blocks are being written to an encrypted file, encrypt them
510
+ * into a bounce page. For simplicity, just encrypt until the last
511
+ * block which might be needed. This may cause some unneeded blocks
512
+ * (e.g. holes) to be unnecessarily encrypted, but this is rare and
513
+ * can't happen in the common case of blocksize == PAGE_SIZE.
514
+ */
474515 if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) {
475516 gfp_t gfp_flags = GFP_NOFS;
517
+ unsigned int enc_bytes = round_up(len, i_blocksize(inode));
476518
477519 /*
478520 * Since bounce page allocation uses a mempool, we can only use
....@@ -482,7 +524,7 @@
482524 if (io->io_bio)
483525 gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
484526 retry_encrypt:
485
- bounce_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE,
527
+ bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
486528 0, gfp_flags);
487529 if (IS_ERR(bounce_page)) {
488530 ret = PTR_ERR(bounce_page);
....@@ -496,8 +538,14 @@
496538 congestion_wait(BLK_RW_ASYNC, HZ/50);
497539 goto retry_encrypt;
498540 }
499
- bounce_page = NULL;
500
- goto out;
541
+
542
+ printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
543
+ redirty_page_for_writepage(wbc, page);
544
+ do {
545
+ clear_buffer_async_write(bh);
546
+ bh = bh->b_this_page;
547
+ } while (bh != head);
548
+ goto unlock;
501549 }
502550 }
503551
....@@ -505,30 +553,12 @@
505553 do {
506554 if (!buffer_async_write(bh))
507555 continue;
508
- ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
509
- if (ret) {
510
- /*
511
- * We only get here on ENOMEM. Not much else
512
- * we can do but mark the page as dirty, and
513
- * better luck next time.
514
- */
515
- break;
516
- }
556
+ io_submit_add_bh(io, inode, page, bounce_page, bh);
517557 nr_submitted++;
518558 clear_buffer_dirty(bh);
519559 } while ((bh = bh->b_this_page) != head);
520560
521
- /* Error stopped previous loop? Clean up buffers... */
522
- if (ret) {
523
- out:
524
- fscrypt_free_bounce_page(bounce_page);
525
- printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
526
- redirty_page_for_writepage(wbc, page);
527
- do {
528
- clear_buffer_async_write(bh);
529
- bh = bh->b_this_page;
530
- } while (bh != head);
531
- }
561
+unlock:
532562 unlock_page(page);
533563 /* Nothing submitted - we have to end page writeback */
534564 if (!nr_submitted)