hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/ext4/readpage.c
....@@ -7,8 +7,8 @@
77 *
88 * This was originally taken from fs/mpage.c
99 *
10
- * The intent is the ext4_mpage_readpages() function here is intended
11
- * to replace mpage_readpages() in the general case, not just for
10
+ * The ext4_mpage_readpages() function here is intended to
11
+ * replace mpage_readahead() in the general case, not just for
1212 * encrypted files. It has some limitations (see below), where it
1313 * will fall back to read_block_full_page(), but these limitations
1414 * should only be hit when page_size != block_size.
....@@ -58,6 +58,7 @@
5858 STEP_INITIAL = 0,
5959 STEP_DECRYPT,
6060 STEP_VERITY,
61
+ STEP_MAX,
6162 };
6263
6364 struct bio_post_read_ctx {
....@@ -71,9 +72,9 @@
7172 {
7273 struct page *page;
7374 struct bio_vec *bv;
74
- int i;
75
+ struct bvec_iter_all iter_all;
7576
76
- bio_for_each_segment_all(bv, bio, i) {
77
+ bio_for_each_segment_all(bv, bio, iter_all) {
7778 page = bv->bv_page;
7879
7980 /* PG_error was set if any post_read step failed */
....@@ -107,10 +108,22 @@
107108 {
108109 struct bio_post_read_ctx *ctx =
109110 container_of(work, struct bio_post_read_ctx, work);
111
+ struct bio *bio = ctx->bio;
110112
111
- fsverity_verify_bio(ctx->bio);
113
+ /*
114
+ * fsverity_verify_bio() may call readpages() again, and although verity
115
+ * will be disabled for that, decryption may still be needed, causing
116
+ * another bio_post_read_ctx to be allocated. So to guarantee that
117
+ * mempool_alloc() never deadlocks we must free the current ctx first.
118
+ * This is safe because verity is the last post-read step.
119
+ */
120
+ BUILD_BUG_ON(STEP_VERITY + 1 != STEP_MAX);
121
+ mempool_free(ctx, bio_post_read_ctx_pool);
122
+ bio->bi_private = NULL;
112123
113
- bio_post_read_processing(ctx);
124
+ fsverity_verify_bio(bio);
125
+
126
+ __read_end_io(bio);
114127 }
115128
116129 static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
....@@ -128,7 +141,7 @@
128141 return;
129142 }
130143 ctx->cur_step++;
131
- /* fall-through */
144
+ fallthrough;
132145 case STEP_VERITY:
133146 if (ctx->enabled_steps & (1 << STEP_VERITY)) {
134147 INIT_WORK(&ctx->work, verity_work);
....@@ -136,7 +149,7 @@
136149 return;
137150 }
138151 ctx->cur_step++;
139
- /* fall-through */
152
+ fallthrough;
140153 default:
141154 __read_end_io(ctx->bio);
142155 }
....@@ -191,12 +204,11 @@
191204 idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
192205 }
193206
194
-static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode,
195
- struct bio *bio,
196
- pgoff_t first_idx)
207
+static void ext4_set_bio_post_read_ctx(struct bio *bio,
208
+ const struct inode *inode,
209
+ pgoff_t first_idx)
197210 {
198211 unsigned int post_read_steps = 0;
199
- struct bio_post_read_ctx *ctx = NULL;
200212
201213 if (fscrypt_inode_uses_fs_layer_crypto(inode))
202214 post_read_steps |= 1 << STEP_DECRYPT;
....@@ -205,14 +217,14 @@
205217 post_read_steps |= 1 << STEP_VERITY;
206218
207219 if (post_read_steps) {
208
- ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
209
- if (!ctx)
210
- return ERR_PTR(-ENOMEM);
220
+ /* Due to the mempool, this never fails. */
221
+ struct bio_post_read_ctx *ctx =
222
+ mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
223
+
211224 ctx->bio = bio;
212225 ctx->enabled_steps = post_read_steps;
213226 bio->bi_private = ctx;
214227 }
215
- return ctx;
216228 }
217229
218230 static inline loff_t ext4_readpage_limit(struct inode *inode)
....@@ -248,14 +260,12 @@
248260 submit_bio(bio);
249261 }
250262
251
-int ext4_mpage_readpages(struct address_space *mapping,
252
- struct list_head *pages, struct page *page,
253
- unsigned nr_pages, bool is_readahead)
263
+int ext4_mpage_readpages(struct inode *inode,
264
+ struct readahead_control *rac, struct page *page)
254265 {
255266 struct bio *bio = NULL;
256267 sector_t last_block_in_bio = 0;
257268
258
- struct inode *inode = mapping->host;
259269 const unsigned blkbits = inode->i_blkbits;
260270 const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
261271 const unsigned blocksize = 1 << blkbits;
....@@ -269,6 +279,7 @@
269279 int length;
270280 unsigned relative_block = 0;
271281 struct ext4_map_blocks map;
282
+ unsigned int nr_pages = rac ? readahead_count(rac) : 1;
272283
273284 map.m_pblk = 0;
274285 map.m_lblk = 0;
....@@ -279,13 +290,9 @@
279290 int fully_mapped = 1;
280291 unsigned first_hole = blocks_per_page;
281292
282
- prefetchw(&page->flags);
283
- if (pages) {
284
- page = list_entry(pages->prev, struct page, lru);
285
- list_del(&page->lru);
286
- if (add_to_page_cache_lru(page, mapping, page->index,
287
- readahead_gfp_mask(mapping)))
288
- goto next_page;
293
+ if (rac) {
294
+ page = readahead_page(rac);
295
+ prefetchw(&page->flags);
289296 }
290297
291298 if (page_has_buffers(page))
....@@ -399,26 +406,20 @@
399406 bio = NULL;
400407 }
401408 if (bio == NULL) {
402
- struct bio_post_read_ctx *ctx;
403
-
409
+ /*
410
+ * bio_alloc will _always_ be able to allocate a bio if
411
+ * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
412
+ */
404413 bio = bio_alloc(GFP_KERNEL,
405414 min_t(int, nr_pages, BIO_MAX_PAGES));
406
- if (!bio)
407
- goto set_error_page;
408415 fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
409416 GFP_KERNEL);
410
- ctx = get_bio_post_read_ctx(inode, bio, page->index);
411
- if (IS_ERR(ctx)) {
412
- bio_put(bio);
413
- bio = NULL;
414
- goto set_error_page;
415
- }
417
+ ext4_set_bio_post_read_ctx(bio, inode, page->index);
416418 bio_set_dev(bio, bdev);
417419 bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
418420 bio->bi_end_io = mpage_end_io;
419
- bio->bi_private = ctx;
420421 bio_set_op_attrs(bio, REQ_OP_READ,
421
- is_readahead ? REQ_RAHEAD : 0);
422
+ rac ? REQ_RAHEAD : 0);
422423 }
423424
424425 length = first_hole << blkbits;
....@@ -443,10 +444,9 @@
443444 else
444445 unlock_page(page);
445446 next_page:
446
- if (pages)
447
+ if (rac)
447448 put_page(page);
448449 }
449
- BUG_ON(pages && !list_empty(pages));
450450 if (bio)
451451 ext4_submit_bio_read(bio);
452452 return 0;