.. | .. |
---|
31 | 31 | #include "acl.h" |
---|
32 | 32 | |
---|
33 | 33 | static struct kmem_cache *io_end_cachep; |
---|
| 34 | +static struct kmem_cache *io_end_vec_cachep; |
---|
34 | 35 | |
---|
35 | 36 | int __init ext4_init_pageio(void) |
---|
36 | 37 | { |
---|
37 | 38 | io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); |
---|
38 | 39 | if (io_end_cachep == NULL) |
---|
39 | 40 | return -ENOMEM; |
---|
| 41 | + |
---|
| 42 | + io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0); |
---|
| 43 | + if (io_end_vec_cachep == NULL) { |
---|
| 44 | + kmem_cache_destroy(io_end_cachep); |
---|
| 45 | + return -ENOMEM; |
---|
| 46 | + } |
---|
40 | 47 | return 0; |
---|
41 | 48 | } |
---|
42 | 49 | |
---|
43 | 50 | void ext4_exit_pageio(void) |
---|
44 | 51 | { |
---|
45 | 52 | kmem_cache_destroy(io_end_cachep); |
---|
| 53 | + kmem_cache_destroy(io_end_vec_cachep); |
---|
| 54 | +} |
---|
| 55 | + |
---|
| 56 | +struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end) |
---|
| 57 | +{ |
---|
| 58 | + struct ext4_io_end_vec *io_end_vec; |
---|
| 59 | + |
---|
| 60 | + io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS); |
---|
| 61 | + if (!io_end_vec) |
---|
| 62 | + return ERR_PTR(-ENOMEM); |
---|
| 63 | + INIT_LIST_HEAD(&io_end_vec->list); |
---|
| 64 | + list_add_tail(&io_end_vec->list, &io_end->list_vec); |
---|
| 65 | + return io_end_vec; |
---|
| 66 | +} |
---|
| 67 | + |
---|
| 68 | +static void ext4_free_io_end_vec(ext4_io_end_t *io_end) |
---|
| 69 | +{ |
---|
| 70 | + struct ext4_io_end_vec *io_end_vec, *tmp; |
---|
| 71 | + |
---|
| 72 | + if (list_empty(&io_end->list_vec)) |
---|
| 73 | + return; |
---|
| 74 | + list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) { |
---|
| 75 | + list_del(&io_end_vec->list); |
---|
| 76 | + kmem_cache_free(io_end_vec_cachep, io_end_vec); |
---|
| 77 | + } |
---|
| 78 | +} |
---|
| 79 | + |
---|
| 80 | +struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end) |
---|
| 81 | +{ |
---|
| 82 | + BUG_ON(list_empty(&io_end->list_vec)); |
---|
| 83 | + return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list); |
---|
46 | 84 | } |
---|
47 | 85 | |
---|
48 | 86 | /* |
---|
.. | .. |
---|
61 | 99 | |
---|
62 | 100 | static void ext4_finish_bio(struct bio *bio) |
---|
63 | 101 | { |
---|
64 | | - int i; |
---|
65 | 102 | struct bio_vec *bvec; |
---|
| 103 | + struct bvec_iter_all iter_all; |
---|
66 | 104 | |
---|
67 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
| 105 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
---|
68 | 106 | struct page *page = bvec->bv_page; |
---|
69 | 107 | struct page *bounce_page = NULL; |
---|
70 | 108 | struct buffer_head *bh, *head; |
---|
.. | .. |
---|
87 | 125 | } |
---|
88 | 126 | bh = head = page_buffers(page); |
---|
89 | 127 | /* |
---|
90 | | - * We check all buffers in the page under BH_Uptodate_Lock |
---|
| 128 | + * We check all buffers in the page under b_uptodate_lock |
---|
91 | 129 | * to avoid races with other end io clearing async_write flags |
---|
92 | 130 | */ |
---|
93 | | - flags = bh_uptodate_lock_irqsave(head); |
---|
| 131 | + spin_lock_irqsave(&head->b_uptodate_lock, flags); |
---|
94 | 132 | do { |
---|
95 | 133 | if (bh_offset(bh) < bio_start || |
---|
96 | 134 | bh_offset(bh) + bh->b_size > bio_end) { |
---|
.. | .. |
---|
99 | 137 | continue; |
---|
100 | 138 | } |
---|
101 | 139 | clear_buffer_async_write(bh); |
---|
102 | | - if (bio->bi_status) |
---|
| 140 | + if (bio->bi_status) { |
---|
| 141 | + set_buffer_write_io_error(bh); |
---|
103 | 142 | buffer_io_error(bh); |
---|
| 143 | + } |
---|
104 | 144 | } while ((bh = bh->b_this_page) != head); |
---|
105 | | - bh_uptodate_unlock_irqrestore(head, flags); |
---|
| 145 | + spin_unlock_irqrestore(&head->b_uptodate_lock, flags); |
---|
106 | 146 | if (!under_io) { |
---|
107 | 147 | fscrypt_free_bounce_page(bounce_page); |
---|
108 | 148 | end_page_writeback(page); |
---|
.. | .. |
---|
123 | 163 | ext4_finish_bio(bio); |
---|
124 | 164 | bio_put(bio); |
---|
125 | 165 | } |
---|
| 166 | + ext4_free_io_end_vec(io_end); |
---|
126 | 167 | kmem_cache_free(io_end_cachep, io_end); |
---|
127 | 168 | } |
---|
128 | 169 | |
---|
.. | .. |
---|
134 | 175 | * cannot get to ext4_ext_truncate() before all IOs overlapping that range are |
---|
135 | 176 | * completed (happens from ext4_free_ioend()). |
---|
136 | 177 | */ |
---|
137 | | -static int ext4_end_io(ext4_io_end_t *io) |
---|
| 178 | +static int ext4_end_io_end(ext4_io_end_t *io_end) |
---|
138 | 179 | { |
---|
139 | | - struct inode *inode = io->inode; |
---|
140 | | - loff_t offset = io->offset; |
---|
141 | | - ssize_t size = io->size; |
---|
142 | | - handle_t *handle = io->handle; |
---|
| 180 | + struct inode *inode = io_end->inode; |
---|
| 181 | + handle_t *handle = io_end->handle; |
---|
143 | 182 | int ret = 0; |
---|
144 | 183 | |
---|
145 | | - ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
---|
| 184 | + ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p," |
---|
146 | 185 | "list->prev 0x%p\n", |
---|
147 | | - io, inode->i_ino, io->list.next, io->list.prev); |
---|
| 186 | + io_end, inode->i_ino, io_end->list.next, io_end->list.prev); |
---|
148 | 187 | |
---|
149 | | - io->handle = NULL; /* Following call will use up the handle */ |
---|
150 | | - ret = ext4_convert_unwritten_extents(handle, inode, offset, size); |
---|
| 188 | + io_end->handle = NULL; /* Following call will use up the handle */ |
---|
| 189 | + ret = ext4_convert_unwritten_io_end_vec(handle, io_end); |
---|
151 | 190 | if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) { |
---|
152 | 191 | ext4_msg(inode->i_sb, KERN_EMERG, |
---|
153 | 192 | "failed to convert unwritten extents to written " |
---|
154 | 193 | "extents -- potential data loss! " |
---|
155 | | - "(inode %lu, offset %llu, size %zd, error %d)", |
---|
156 | | - inode->i_ino, offset, size, ret); |
---|
| 194 | + "(inode %lu, error %d)", inode->i_ino, ret); |
---|
157 | 195 | } |
---|
158 | | - ext4_clear_io_unwritten_flag(io); |
---|
159 | | - ext4_release_io_end(io); |
---|
| 196 | + ext4_clear_io_unwritten_flag(io_end); |
---|
| 197 | + ext4_release_io_end(io_end); |
---|
160 | 198 | return ret; |
---|
161 | 199 | } |
---|
162 | 200 | |
---|
.. | .. |
---|
164 | 202 | { |
---|
165 | 203 | #ifdef EXT4FS_DEBUG |
---|
166 | 204 | struct list_head *cur, *before, *after; |
---|
167 | | - ext4_io_end_t *io, *io0, *io1; |
---|
| 205 | + ext4_io_end_t *io_end, *io_end0, *io_end1; |
---|
168 | 206 | |
---|
169 | 207 | if (list_empty(head)) |
---|
170 | 208 | return; |
---|
171 | 209 | |
---|
172 | 210 | ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); |
---|
173 | | - list_for_each_entry(io, head, list) { |
---|
174 | | - cur = &io->list; |
---|
| 211 | + list_for_each_entry(io_end, head, list) { |
---|
| 212 | + cur = &io_end->list; |
---|
175 | 213 | before = cur->prev; |
---|
176 | | - io0 = container_of(before, ext4_io_end_t, list); |
---|
| 214 | + io_end0 = container_of(before, ext4_io_end_t, list); |
---|
177 | 215 | after = cur->next; |
---|
178 | | - io1 = container_of(after, ext4_io_end_t, list); |
---|
| 216 | + io_end1 = container_of(after, ext4_io_end_t, list); |
---|
179 | 217 | |
---|
180 | 218 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", |
---|
181 | | - io, inode->i_ino, io0, io1); |
---|
| 219 | + io_end, inode->i_ino, io_end0, io_end1); |
---|
182 | 220 | } |
---|
183 | 221 | #endif |
---|
184 | 222 | } |
---|
.. | .. |
---|
205 | 243 | static int ext4_do_flush_completed_IO(struct inode *inode, |
---|
206 | 244 | struct list_head *head) |
---|
207 | 245 | { |
---|
208 | | - ext4_io_end_t *io; |
---|
| 246 | + ext4_io_end_t *io_end; |
---|
209 | 247 | struct list_head unwritten; |
---|
210 | 248 | unsigned long flags; |
---|
211 | 249 | struct ext4_inode_info *ei = EXT4_I(inode); |
---|
.. | .. |
---|
217 | 255 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
---|
218 | 256 | |
---|
219 | 257 | while (!list_empty(&unwritten)) { |
---|
220 | | - io = list_entry(unwritten.next, ext4_io_end_t, list); |
---|
221 | | - BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); |
---|
222 | | - list_del_init(&io->list); |
---|
| 258 | + io_end = list_entry(unwritten.next, ext4_io_end_t, list); |
---|
| 259 | + BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); |
---|
| 260 | + list_del_init(&io_end->list); |
---|
223 | 261 | |
---|
224 | | - err = ext4_end_io(io); |
---|
| 262 | + err = ext4_end_io_end(io_end); |
---|
225 | 263 | if (unlikely(!ret && err)) |
---|
226 | 264 | ret = err; |
---|
227 | 265 | } |
---|
.. | .. |
---|
240 | 278 | |
---|
241 | 279 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
---|
242 | 280 | { |
---|
243 | | - ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); |
---|
244 | | - if (io) { |
---|
245 | | - io->inode = inode; |
---|
246 | | - INIT_LIST_HEAD(&io->list); |
---|
247 | | - atomic_set(&io->count, 1); |
---|
| 281 | + ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags); |
---|
| 282 | + |
---|
| 283 | + if (io_end) { |
---|
| 284 | + io_end->inode = inode; |
---|
| 285 | + INIT_LIST_HEAD(&io_end->list); |
---|
| 286 | + INIT_LIST_HEAD(&io_end->list_vec); |
---|
| 287 | + atomic_set(&io_end->count, 1); |
---|
248 | 288 | } |
---|
249 | | - return io; |
---|
| 289 | + return io_end; |
---|
250 | 290 | } |
---|
251 | 291 | |
---|
252 | 292 | void ext4_put_io_end_defer(ext4_io_end_t *io_end) |
---|
253 | 293 | { |
---|
254 | 294 | if (atomic_dec_and_test(&io_end->count)) { |
---|
255 | | - if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { |
---|
| 295 | + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || |
---|
| 296 | + list_empty(&io_end->list_vec)) { |
---|
256 | 297 | ext4_release_io_end(io_end); |
---|
257 | 298 | return; |
---|
258 | 299 | } |
---|
.. | .. |
---|
266 | 307 | |
---|
267 | 308 | if (atomic_dec_and_test(&io_end->count)) { |
---|
268 | 309 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { |
---|
269 | | - err = ext4_convert_unwritten_extents(io_end->handle, |
---|
270 | | - io_end->inode, io_end->offset, |
---|
271 | | - io_end->size); |
---|
| 310 | + err = ext4_convert_unwritten_io_end_vec(io_end->handle, |
---|
| 311 | + io_end); |
---|
272 | 312 | io_end->handle = NULL; |
---|
273 | 313 | ext4_clear_io_unwritten_flag(io_end); |
---|
274 | 314 | } |
---|
.. | .. |
---|
305 | 345 | struct inode *inode = io_end->inode; |
---|
306 | 346 | |
---|
307 | 347 | ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " |
---|
308 | | - "(offset %llu size %ld starting block %llu)", |
---|
| 348 | + "starting block %llu)", |
---|
309 | 349 | bio->bi_status, inode->i_ino, |
---|
310 | | - (unsigned long long) io_end->offset, |
---|
311 | | - (long) io_end->size, |
---|
312 | 350 | (unsigned long long) |
---|
313 | 351 | bi_sector >> (inode->i_blkbits - 9)); |
---|
314 | 352 | mapping_set_error(inode->i_mapping, |
---|
.. | .. |
---|
356 | 394 | io->io_end = NULL; |
---|
357 | 395 | } |
---|
358 | 396 | |
---|
359 | | -static int io_submit_init_bio(struct ext4_io_submit *io, |
---|
360 | | - struct buffer_head *bh) |
---|
| 397 | +static void io_submit_init_bio(struct ext4_io_submit *io, |
---|
| 398 | + struct buffer_head *bh) |
---|
361 | 399 | { |
---|
362 | 400 | struct bio *bio; |
---|
363 | 401 | |
---|
| 402 | + /* |
---|
| 403 | + * bio_alloc will _always_ be able to allocate a bio if |
---|
| 404 | + * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). |
---|
| 405 | + */ |
---|
364 | 406 | bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); |
---|
365 | | - if (!bio) |
---|
366 | | - return -ENOMEM; |
---|
367 | 407 | fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); |
---|
368 | | - wbc_init_bio(io->io_wbc, bio); |
---|
369 | 408 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
---|
370 | 409 | bio_set_dev(bio, bh->b_bdev); |
---|
371 | 410 | bio->bi_end_io = ext4_end_bio; |
---|
372 | 411 | bio->bi_private = ext4_get_io_end(io->io_end); |
---|
373 | 412 | io->io_bio = bio; |
---|
374 | 413 | io->io_next_block = bh->b_blocknr; |
---|
375 | | - return 0; |
---|
| 414 | + wbc_init_bio(io->io_wbc, bio); |
---|
376 | 415 | } |
---|
377 | 416 | |
---|
378 | | -static int io_submit_add_bh(struct ext4_io_submit *io, |
---|
379 | | - struct inode *inode, |
---|
380 | | - struct page *page, |
---|
381 | | - struct buffer_head *bh) |
---|
| 417 | +static void io_submit_add_bh(struct ext4_io_submit *io, |
---|
| 418 | + struct inode *inode, |
---|
| 419 | + struct page *pagecache_page, |
---|
| 420 | + struct page *bounce_page, |
---|
| 421 | + struct buffer_head *bh) |
---|
382 | 422 | { |
---|
383 | 423 | int ret; |
---|
384 | 424 | |
---|
.. | .. |
---|
388 | 428 | ext4_io_submit(io); |
---|
389 | 429 | } |
---|
390 | 430 | if (io->io_bio == NULL) { |
---|
391 | | - ret = io_submit_init_bio(io, bh); |
---|
392 | | - if (ret) |
---|
393 | | - return ret; |
---|
| 431 | + io_submit_init_bio(io, bh); |
---|
394 | 432 | io->io_bio->bi_write_hint = inode->i_write_hint; |
---|
395 | 433 | } |
---|
396 | | - ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); |
---|
| 434 | + ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page, |
---|
| 435 | + bh->b_size, bh_offset(bh)); |
---|
397 | 436 | if (ret != bh->b_size) |
---|
398 | 437 | goto submit_and_retry; |
---|
399 | | - wbc_account_io(io->io_wbc, page, bh->b_size); |
---|
| 438 | + wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size); |
---|
400 | 439 | io->io_next_block++; |
---|
401 | | - return 0; |
---|
402 | 440 | } |
---|
403 | 441 | |
---|
404 | 442 | int ext4_bio_write_page(struct ext4_io_submit *io, |
---|
.. | .. |
---|
459 | 497 | ext4_io_submit(io); |
---|
460 | 498 | continue; |
---|
461 | 499 | } |
---|
462 | | - if (buffer_new(bh)) { |
---|
| 500 | + if (buffer_new(bh)) |
---|
463 | 501 | clear_buffer_new(bh); |
---|
464 | | - clean_bdev_bh_alias(bh); |
---|
465 | | - } |
---|
466 | 502 | set_buffer_async_write(bh); |
---|
467 | 503 | nr_to_submit++; |
---|
468 | 504 | } while ((bh = bh->b_this_page) != head); |
---|
469 | 505 | |
---|
470 | 506 | bh = head = page_buffers(page); |
---|
471 | 507 | |
---|
| 508 | + /* |
---|
| 509 | + * If any blocks are being written to an encrypted file, encrypt them |
---|
| 510 | + * into a bounce page. For simplicity, just encrypt until the last |
---|
| 511 | + * block which might be needed. This may cause some unneeded blocks |
---|
| 512 | + * (e.g. holes) to be unnecessarily encrypted, but this is rare and |
---|
| 513 | + * can't happen in the common case of blocksize == PAGE_SIZE. |
---|
| 514 | + */ |
---|
472 | 515 | if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) { |
---|
473 | 516 | gfp_t gfp_flags = GFP_NOFS; |
---|
| 517 | + unsigned int enc_bytes = round_up(len, i_blocksize(inode)); |
---|
474 | 518 | |
---|
475 | 519 | /* |
---|
476 | 520 | * Since bounce page allocation uses a mempool, we can only use |
---|
.. | .. |
---|
480 | 524 | if (io->io_bio) |
---|
481 | 525 | gfp_flags = GFP_NOWAIT | __GFP_NOWARN; |
---|
482 | 526 | retry_encrypt: |
---|
483 | | - bounce_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE, |
---|
| 527 | + bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, |
---|
484 | 528 | 0, gfp_flags); |
---|
485 | 529 | if (IS_ERR(bounce_page)) { |
---|
486 | 530 | ret = PTR_ERR(bounce_page); |
---|
.. | .. |
---|
494 | 538 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
---|
495 | 539 | goto retry_encrypt; |
---|
496 | 540 | } |
---|
497 | | - bounce_page = NULL; |
---|
498 | | - goto out; |
---|
| 541 | + |
---|
| 542 | + printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); |
---|
| 543 | + redirty_page_for_writepage(wbc, page); |
---|
| 544 | + do { |
---|
| 545 | + clear_buffer_async_write(bh); |
---|
| 546 | + bh = bh->b_this_page; |
---|
| 547 | + } while (bh != head); |
---|
| 548 | + goto unlock; |
---|
499 | 549 | } |
---|
500 | 550 | } |
---|
501 | 551 | |
---|
.. | .. |
---|
503 | 553 | do { |
---|
504 | 554 | if (!buffer_async_write(bh)) |
---|
505 | 555 | continue; |
---|
506 | | - ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh); |
---|
507 | | - if (ret) { |
---|
508 | | - /* |
---|
509 | | - * We only get here on ENOMEM. Not much else |
---|
510 | | - * we can do but mark the page as dirty, and |
---|
511 | | - * better luck next time. |
---|
512 | | - */ |
---|
513 | | - break; |
---|
514 | | - } |
---|
| 556 | + io_submit_add_bh(io, inode, page, bounce_page, bh); |
---|
515 | 557 | nr_submitted++; |
---|
516 | 558 | clear_buffer_dirty(bh); |
---|
517 | 559 | } while ((bh = bh->b_this_page) != head); |
---|
518 | 560 | |
---|
519 | | - /* Error stopped previous loop? Clean up buffers... */ |
---|
520 | | - if (ret) { |
---|
521 | | - out: |
---|
522 | | - fscrypt_free_bounce_page(bounce_page); |
---|
523 | | - printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); |
---|
524 | | - redirty_page_for_writepage(wbc, page); |
---|
525 | | - do { |
---|
526 | | - clear_buffer_async_write(bh); |
---|
527 | | - bh = bh->b_this_page; |
---|
528 | | - } while (bh != head); |
---|
529 | | - } |
---|
| 561 | +unlock: |
---|
530 | 562 | unlock_page(page); |
---|
531 | 563 | /* Nothing submitted - we have to end page writeback */ |
---|
532 | 564 | if (!nr_submitted) |
---|