hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/f2fs/data.c
....@@ -14,16 +14,17 @@
1414 #include <linux/pagevec.h>
1515 #include <linux/blkdev.h>
1616 #include <linux/bio.h>
17
+#include <linux/blk-crypto.h>
1718 #include <linux/swap.h>
1819 #include <linux/prefetch.h>
1920 #include <linux/uio.h>
2021 #include <linux/cleancache.h>
2122 #include <linux/sched/signal.h>
23
+#include <linux/fiemap.h>
2224
2325 #include "f2fs.h"
2426 #include "node.h"
2527 #include "segment.h"
26
-#include "trace.h"
2728 #include <trace/events/f2fs.h>
2829 #include <trace/events/android_fs.h>
2930
....@@ -49,27 +50,6 @@
4950 bioset_exit(&f2fs_bioset);
5051 }
5152
52
-static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask,
53
- unsigned int nr_iovecs)
54
-{
55
- return bio_alloc_bioset(gfp_mask, nr_iovecs, &f2fs_bioset);
56
-}
57
-
58
-struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio)
59
-{
60
- if (noio) {
61
- /* No failure on bio allocation */
62
- return __f2fs_bio_alloc(GFP_NOIO, npages);
63
- }
64
-
65
- if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
66
- f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO);
67
- return NULL;
68
- }
69
-
70
- return __f2fs_bio_alloc(GFP_KERNEL, npages);
71
-}
72
-
7353 static bool __is_cp_guaranteed(struct page *page)
7454 {
7555 struct address_space *mapping = page->mapping;
....@@ -79,18 +59,19 @@
7959 if (!mapping)
8060 return false;
8161
82
- if (f2fs_is_compressed_page(page))
83
- return false;
84
-
8562 inode = mapping->host;
8663 sbi = F2FS_I_SB(inode);
8764
8865 if (inode->i_ino == F2FS_META_INO(sbi) ||
89
- inode->i_ino == F2FS_NODE_INO(sbi) ||
90
- S_ISDIR(inode->i_mode) ||
91
- (S_ISREG(inode->i_mode) &&
66
+ inode->i_ino == F2FS_NODE_INO(sbi) ||
67
+ S_ISDIR(inode->i_mode))
68
+ return true;
69
+
70
+ if (f2fs_is_compressed_page(page))
71
+ return false;
72
+ if ((S_ISREG(inode->i_mode) &&
9273 (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
93
- is_cold_data(page))
74
+ page_private_gcing(page))
9475 return true;
9576 return false;
9677 }
....@@ -114,10 +95,21 @@
11495
11596 /* postprocessing steps for read bios */
11697 enum bio_post_read_step {
117
- STEP_DECRYPT,
118
- STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
119
- STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
120
- STEP_VERITY,
98
+#ifdef CONFIG_FS_ENCRYPTION
99
+ STEP_DECRYPT = 1 << 0,
100
+#else
101
+ STEP_DECRYPT = 0, /* compile out the decryption-related code */
102
+#endif
103
+#ifdef CONFIG_F2FS_FS_COMPRESSION
104
+ STEP_DECOMPRESS = 1 << 1,
105
+#else
106
+ STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
107
+#endif
108
+#ifdef CONFIG_FS_VERITY
109
+ STEP_VERITY = 1 << 2,
110
+#else
111
+ STEP_VERITY = 0, /* compile out the verity-related code */
112
+#endif
121113 };
122114
123115 struct bio_post_read_ctx {
....@@ -127,25 +119,27 @@
127119 unsigned int enabled_steps;
128120 };
129121
130
-static void __read_end_io(struct bio *bio, bool compr, bool verity)
122
+static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
131123 {
132
- struct page *page;
133124 struct bio_vec *bv;
134
- int i;
125
+ struct bvec_iter_all iter_all;
135126
136
- bio_for_each_segment_all(bv, bio, i) {
137
- page = bv->bv_page;
127
+ /*
128
+ * Update and unlock the bio's pagecache pages, and put the
129
+ * decompression context for any compressed pages.
130
+ */
131
+ bio_for_each_segment_all(bv, bio, iter_all) {
132
+ struct page *page = bv->bv_page;
138133
139
-#ifdef CONFIG_F2FS_FS_COMPRESSION
140
- if (compr && f2fs_is_compressed_page(page)) {
141
- f2fs_decompress_pages(bio, page, verity);
134
+ if (f2fs_is_compressed_page(page)) {
135
+ if (bio->bi_status)
136
+ f2fs_end_read_compressed_page(page, true, 0,
137
+ in_task);
138
+ f2fs_put_page_dic(page, in_task);
142139 continue;
143140 }
144
- if (verity)
145
- continue;
146
-#endif
147141
148
- /* PG_error was set if any post_read step failed */
142
+ /* PG_error was set if decryption or verity failed. */
149143 if (bio->bi_status || PageError(page)) {
150144 ClearPageUptodate(page);
151145 /* will re-read again later */
....@@ -156,106 +150,109 @@
156150 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
157151 unlock_page(page);
158152 }
153
+
154
+ if (bio->bi_private)
155
+ mempool_free(bio->bi_private, bio_post_read_ctx_pool);
156
+ bio_put(bio);
159157 }
160158
161
-static void f2fs_release_read_bio(struct bio *bio);
162
-static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
163
-{
164
- if (!compr)
165
- __read_end_io(bio, false, verity);
166
- f2fs_release_read_bio(bio);
167
-}
168
-
169
-static void f2fs_decompress_bio(struct bio *bio, bool verity)
170
-{
171
- __read_end_io(bio, true, verity);
172
-}
173
-
174
-static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
175
-
176
-static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
177
-{
178
- fscrypt_decrypt_bio(ctx->bio);
179
-}
180
-
181
-static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
182
-{
183
- f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
184
-}
185
-
186
-#ifdef CONFIG_F2FS_FS_COMPRESSION
187
-static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
188
-{
189
- f2fs_decompress_end_io(rpages, cluster_size, false, true);
190
-}
191
-
192
-static void f2fs_verify_bio(struct bio *bio)
193
-{
194
- struct bio_vec *bv;
195
- int i;
196
-
197
- bio_for_each_segment_all(bv, bio, i) {
198
- struct page *page = bv->bv_page;
199
- struct decompress_io_ctx *dic;
200
-
201
- dic = (struct decompress_io_ctx *)page_private(page);
202
-
203
- if (dic) {
204
- if (refcount_dec_not_one(&dic->ref))
205
- continue;
206
- f2fs_verify_pages(dic->rpages,
207
- dic->cluster_size);
208
- f2fs_free_dic(dic);
209
- continue;
210
- }
211
-
212
- if (bio->bi_status || PageError(page))
213
- goto clear_uptodate;
214
-
215
- if (fsverity_verify_page(page)) {
216
- SetPageUptodate(page);
217
- goto unlock;
218
- }
219
-clear_uptodate:
220
- ClearPageUptodate(page);
221
- ClearPageError(page);
222
-unlock:
223
- dec_page_count(F2FS_P_SB(page), __read_io_type(page));
224
- unlock_page(page);
225
- }
226
-}
227
-#endif
228
-
229
-static void f2fs_verity_work(struct work_struct *work)
159
+static void f2fs_verify_bio(struct work_struct *work)
230160 {
231161 struct bio_post_read_ctx *ctx =
232162 container_of(work, struct bio_post_read_ctx, work);
233163 struct bio *bio = ctx->bio;
234
-#ifdef CONFIG_F2FS_FS_COMPRESSION
235
- unsigned int enabled_steps = ctx->enabled_steps;
236
-#endif
164
+ bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
237165
238166 /*
239167 * fsverity_verify_bio() may call readpages() again, and while verity
240
- * will be disabled for this, decryption may still be needed, resulting
241
- * in another bio_post_read_ctx being allocated. So to prevent
242
- * deadlocks we need to release the current ctx to the mempool first.
243
- * This assumes that verity is the last post-read step.
168
+ * will be disabled for this, decryption and/or decompression may still
169
+ * be needed, resulting in another bio_post_read_ctx being allocated.
170
+ * So to prevent deadlocks we need to release the current ctx to the
171
+ * mempool first. This assumes that verity is the last post-read step.
244172 */
245173 mempool_free(ctx, bio_post_read_ctx_pool);
246174 bio->bi_private = NULL;
247175
248
-#ifdef CONFIG_F2FS_FS_COMPRESSION
249
- /* previous step is decompression */
250
- if (enabled_steps & (1 << STEP_DECOMPRESS)) {
251
- f2fs_verify_bio(bio);
252
- f2fs_release_read_bio(bio);
253
- return;
254
- }
255
-#endif
176
+ /*
177
+ * Verify the bio's pages with fs-verity. Exclude compressed pages,
178
+ * as those were handled separately by f2fs_end_read_compressed_page().
179
+ */
180
+ if (may_have_compressed_pages) {
181
+ struct bio_vec *bv;
182
+ struct bvec_iter_all iter_all;
256183
257
- fsverity_verify_bio(bio);
258
- __f2fs_read_end_io(bio, false, false);
184
+ bio_for_each_segment_all(bv, bio, iter_all) {
185
+ struct page *page = bv->bv_page;
186
+
187
+ if (!f2fs_is_compressed_page(page) &&
188
+ !PageError(page) && !fsverity_verify_page(page))
189
+ SetPageError(page);
190
+ }
191
+ } else {
192
+ fsverity_verify_bio(bio);
193
+ }
194
+
195
+ f2fs_finish_read_bio(bio, true);
196
+}
197
+
198
+/*
199
+ * If the bio's data needs to be verified with fs-verity, then enqueue the
200
+ * verity work for the bio. Otherwise finish the bio now.
201
+ *
202
+ * Note that to avoid deadlocks, the verity work can't be done on the
203
+ * decryption/decompression workqueue. This is because verifying the data pages
204
+ * can involve reading verity metadata pages from the file, and these verity
205
+ * metadata pages may be encrypted and/or compressed.
206
+ */
207
+static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
208
+{
209
+ struct bio_post_read_ctx *ctx = bio->bi_private;
210
+
211
+ if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
212
+ INIT_WORK(&ctx->work, f2fs_verify_bio);
213
+ fsverity_enqueue_verify_work(&ctx->work);
214
+ } else {
215
+ f2fs_finish_read_bio(bio, in_task);
216
+ }
217
+}
218
+
219
+/*
220
+ * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
221
+ * remaining page was read by @ctx->bio.
222
+ *
223
+ * Note that a bio may span clusters (even a mix of compressed and uncompressed
224
+ * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
225
+ * that the bio includes at least one compressed page. The actual decompression
226
+ * is done on a per-cluster basis, not a per-bio basis.
227
+ */
228
+static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
229
+ bool in_task)
230
+{
231
+ struct bio_vec *bv;
232
+ struct bvec_iter_all iter_all;
233
+ bool all_compressed = true;
234
+ block_t blkaddr = SECTOR_TO_BLOCK(ctx->bio->bi_iter.bi_sector);
235
+
236
+ bio_for_each_segment_all(bv, ctx->bio, iter_all) {
237
+ struct page *page = bv->bv_page;
238
+
239
+ /* PG_error was set if decryption failed. */
240
+ if (f2fs_is_compressed_page(page))
241
+ f2fs_end_read_compressed_page(page, PageError(page),
242
+ blkaddr, in_task);
243
+ else
244
+ all_compressed = false;
245
+
246
+ blkaddr++;
247
+ }
248
+
249
+ /*
250
+ * Optimization: if all the bio's pages are compressed, then scheduling
251
+ * the per-bio verity work is unnecessary, as verity will be fully
252
+ * handled at the compression cluster level.
253
+ */
254
+ if (all_compressed)
255
+ ctx->enabled_steps &= ~STEP_VERITY;
259256 }
260257
261258 static void f2fs_post_read_work(struct work_struct *work)
....@@ -263,107 +260,75 @@
263260 struct bio_post_read_ctx *ctx =
264261 container_of(work, struct bio_post_read_ctx, work);
265262
266
- if (ctx->enabled_steps & (1 << STEP_DECRYPT))
267
- f2fs_decrypt_work(ctx);
263
+ if (ctx->enabled_steps & STEP_DECRYPT)
264
+ fscrypt_decrypt_bio(ctx->bio);
268265
269
- if (ctx->enabled_steps & (1 << STEP_DECOMPRESS))
270
- f2fs_decompress_work(ctx);
266
+ if (ctx->enabled_steps & STEP_DECOMPRESS)
267
+ f2fs_handle_step_decompress(ctx, true);
271268
272
- if (ctx->enabled_steps & (1 << STEP_VERITY)) {
273
- INIT_WORK(&ctx->work, f2fs_verity_work);
274
- fsverity_enqueue_verify_work(&ctx->work);
275
- return;
276
- }
277
-
278
- __f2fs_read_end_io(ctx->bio,
279
- ctx->enabled_steps & (1 << STEP_DECOMPRESS), false);
280
-}
281
-
282
-static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi,
283
- struct work_struct *work)
284
-{
285
- queue_work(sbi->post_read_wq, work);
286
-}
287
-
288
-static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
289
-{
290
- /*
291
- * We use different work queues for decryption and for verity because
292
- * verity may require reading metadata pages that need decryption, and
293
- * we shouldn't recurse to the same workqueue.
294
- */
295
-
296
- if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
297
- ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
298
- INIT_WORK(&ctx->work, f2fs_post_read_work);
299
- f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
300
- return;
301
- }
302
-
303
- if (ctx->enabled_steps & (1 << STEP_VERITY)) {
304
- INIT_WORK(&ctx->work, f2fs_verity_work);
305
- fsverity_enqueue_verify_work(&ctx->work);
306
- return;
307
- }
308
-
309
- __f2fs_read_end_io(ctx->bio, false, false);
310
-}
311
-
312
-static bool f2fs_bio_post_read_required(struct bio *bio)
313
-{
314
- return bio->bi_private;
269
+ f2fs_verify_and_finish_bio(ctx->bio, true);
315270 }
316271
317272 static void f2fs_read_end_io(struct bio *bio)
318273 {
319
- struct page *first_page = bio->bi_io_vec[0].bv_page;
320
- struct f2fs_sb_info *sbi = F2FS_P_SB(first_page);
274
+ struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
275
+ struct bio_post_read_ctx *ctx = bio->bi_private;
276
+ bool intask = in_task();
321277
322278 if (time_to_inject(sbi, FAULT_READ_IO)) {
323279 f2fs_show_injection_info(sbi, FAULT_READ_IO);
324280 bio->bi_status = BLK_STS_IOERR;
325281 }
326282
327
- if (f2fs_bio_post_read_required(bio)) {
328
- struct bio_post_read_ctx *ctx = bio->bi_private;
329
-
330
- bio_post_read_processing(ctx);
283
+ if (bio->bi_status) {
284
+ f2fs_finish_read_bio(bio, intask);
331285 return;
332286 }
333287
334
- if (first_page != NULL &&
335
- __read_io_type(first_page) == F2FS_RD_DATA) {
336
- trace_android_fs_dataread_end(first_page->mapping->host,
337
- page_offset(first_page),
338
- bio->bi_iter.bi_size);
288
+ if (ctx) {
289
+ unsigned int enabled_steps = ctx->enabled_steps &
290
+ (STEP_DECRYPT | STEP_DECOMPRESS);
291
+
292
+ /*
293
+ * If we have only decompression step between decompression and
294
+ * decrypt, we don't need post processing for this.
295
+ */
296
+ if (enabled_steps == STEP_DECOMPRESS &&
297
+ !f2fs_low_mem_mode(sbi)) {
298
+ f2fs_handle_step_decompress(ctx, intask);
299
+ } else if (enabled_steps) {
300
+ INIT_WORK(&ctx->work, f2fs_post_read_work);
301
+ queue_work(ctx->sbi->post_read_wq, &ctx->work);
302
+ return;
303
+ }
339304 }
340305
341
- __f2fs_read_end_io(bio, false, false);
306
+ f2fs_verify_and_finish_bio(bio, intask);
342307 }
343308
344309 static void f2fs_write_end_io(struct bio *bio)
345310 {
346311 struct f2fs_sb_info *sbi = bio->bi_private;
347312 struct bio_vec *bvec;
348
- int i;
313
+ struct bvec_iter_all iter_all;
349314
350315 if (time_to_inject(sbi, FAULT_WRITE_IO)) {
351316 f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
352317 bio->bi_status = BLK_STS_IOERR;
353318 }
354319
355
- bio_for_each_segment_all(bvec, bio, i) {
320
+ bio_for_each_segment_all(bvec, bio, iter_all) {
356321 struct page *page = bvec->bv_page;
357322 enum count_type type = WB_DATA_TYPE(page);
358323
359
- if (IS_DUMMY_WRITTEN_PAGE(page)) {
360
- set_page_private(page, (unsigned long)NULL);
361
- ClearPagePrivate(page);
324
+ if (page_private_dummy(page)) {
325
+ clear_page_private_dummy(page);
362326 unlock_page(page);
363327 mempool_free(page, sbi->write_io_dummy);
364328
365329 if (unlikely(bio->bi_status))
366
- f2fs_stop_checkpoint(sbi, true);
330
+ f2fs_stop_checkpoint(sbi, true,
331
+ STOP_CP_REASON_WRITE_FAIL);
367332 continue;
368333 }
369334
....@@ -379,7 +344,8 @@
379344 if (unlikely(bio->bi_status)) {
380345 mapping_set_error(page->mapping, -EIO);
381346 if (type == F2FS_WB_CP_DATA)
382
- f2fs_stop_checkpoint(sbi, true);
347
+ f2fs_stop_checkpoint(sbi, true,
348
+ STOP_CP_REASON_WRITE_FAIL);
383349 }
384350
385351 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
....@@ -388,7 +354,7 @@
388354 dec_page_count(sbi, type);
389355 if (f2fs_in_warm_node_list(sbi, page))
390356 f2fs_del_fsync_node_entry(sbi, page);
391
- clear_cold_data(page);
357
+ clear_page_private_gcing(page);
392358 end_page_writeback(page);
393359 }
394360 if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
....@@ -449,7 +415,7 @@
449415 struct f2fs_sb_info *sbi = fio->sbi;
450416 struct bio *bio;
451417
452
- bio = f2fs_bio_alloc(sbi, npages, true);
418
+ bio = bio_alloc_bioset(GFP_NOIO, npages, &f2fs_bioset);
453419
454420 f2fs_target_device(sbi, fio->new_blkaddr, bio);
455421 if (is_read_io(fio->op)) {
....@@ -510,7 +476,7 @@
510476 if (f2fs_lfs_mode(sbi) && current->plug)
511477 blk_finish_plug(current->plug);
512478
513
- if (F2FS_IO_ALIGNED(sbi))
479
+ if (!F2FS_IO_ALIGNED(sbi))
514480 goto submit_io;
515481
516482 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
....@@ -526,10 +492,11 @@
526492 GFP_NOIO | __GFP_NOFAIL);
527493 f2fs_bug_on(sbi, !page);
528494
529
- zero_user_segment(page, 0, PAGE_SIZE);
530
- SetPagePrivate(page);
531
- set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
532495 lock_page(page);
496
+
497
+ zero_user_segment(page, 0, PAGE_SIZE);
498
+ set_page_private_dummy(page);
499
+
533500 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
534501 f2fs_bug_on(sbi, 1);
535502 }
....@@ -546,32 +513,6 @@
546513 else
547514 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
548515 submit_bio(bio);
549
-}
550
-
551
-static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi,
552
- struct bio *bio, enum page_type type)
553
-{
554
- if (trace_android_fs_dataread_start_enabled() && (type == DATA)) {
555
- struct page *first_page = bio->bi_io_vec[0].bv_page;
556
-
557
- if (first_page != NULL &&
558
- __read_io_type(first_page) == F2FS_RD_DATA) {
559
- char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
560
-
561
- path = android_fstrace_get_pathname(pathbuf,
562
- MAX_TRACE_PATHBUF_LEN,
563
- first_page->mapping->host);
564
-
565
- trace_android_fs_dataread_start(
566
- first_page->mapping->host,
567
- page_offset(first_page),
568
- bio->bi_iter.bi_size,
569
- current->pid,
570
- path,
571
- current->comm);
572
- }
573
- }
574
- __submit_bio(sbi, bio, type);
575516 }
576517
577518 void f2fs_submit_bio(struct f2fs_sb_info *sbi,
....@@ -631,7 +572,7 @@
631572 struct page *page, nid_t ino)
632573 {
633574 struct bio_vec *bvec;
634
- int i;
575
+ struct bvec_iter_all iter_all;
635576
636577 if (!bio)
637578 return false;
....@@ -639,7 +580,7 @@
639580 if (!inode && !page && !ino)
640581 return true;
641582
642
- bio_for_each_segment_all(bvec, bio, i) {
583
+ bio_for_each_segment_all(bvec, bio, iter_all) {
643584 struct page *target = bvec->bv_page;
644585
645586 if (fscrypt_is_bounce_page(target)) {
....@@ -670,7 +611,7 @@
670611 enum page_type btype = PAGE_TYPE_OF_BIO(type);
671612 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
672613
673
- down_write(&io->io_rwsem);
614
+ f2fs_down_write(&io->io_rwsem);
674615
675616 /* change META to META_FLUSH in the checkpoint procedure */
676617 if (type >= META_FLUSH) {
....@@ -681,7 +622,7 @@
681622 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
682623 }
683624 __submit_merged_bio(io);
684
- up_write(&io->io_rwsem);
625
+ f2fs_up_write(&io->io_rwsem);
685626 }
686627
687628 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
....@@ -696,9 +637,9 @@
696637 enum page_type btype = PAGE_TYPE_OF_BIO(type);
697638 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
698639
699
- down_read(&io->io_rwsem);
640
+ f2fs_down_read(&io->io_rwsem);
700641 ret = __has_merged_page(io->bio, inode, page, ino);
701
- up_read(&io->io_rwsem);
642
+ f2fs_up_read(&io->io_rwsem);
702643 }
703644 if (ret)
704645 __f2fs_submit_merged_write(sbi, type, temp);
....@@ -744,7 +685,6 @@
744685 return -EFSCORRUPTED;
745686
746687 trace_f2fs_submit_page_bio(page, fio);
747
- f2fs_trace_ios(fio, 0);
748688
749689 /* Allocate a new bio */
750690 bio = __bio_alloc(fio, 1);
....@@ -758,7 +698,7 @@
758698 }
759699
760700 if (fio->io_wbc && !is_read_io(fio->op))
761
- wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
701
+ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
762702
763703 __attach_io_flag(fio);
764704 bio_set_op_attrs(bio, fio->op, fio->op_flags);
....@@ -766,16 +706,16 @@
766706 inc_page_count(fio->sbi, is_read_io(fio->op) ?
767707 __read_io_type(page): WB_DATA_TYPE(fio->page));
768708
769
- if (is_read_io(fio->op))
770
- __f2fs_submit_read_bio(fio->sbi, bio, fio->type);
771
- else
772
- __submit_bio(fio->sbi, bio, fio->type);
709
+ __submit_bio(fio->sbi, bio, fio->type);
773710 return 0;
774711 }
775712
776713 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
777714 block_t last_blkaddr, block_t cur_blkaddr)
778715 {
716
+ if (unlikely(sbi->max_io_bytes &&
717
+ bio->bi_iter.bi_size >= sbi->max_io_bytes))
718
+ return false;
779719 if (last_blkaddr + 1 != cur_blkaddr)
780720 return false;
781721 return __same_bdev(sbi, cur_blkaddr, bio);
....@@ -823,9 +763,9 @@
823763 if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
824764 f2fs_bug_on(sbi, 1);
825765
826
- down_write(&io->bio_list_lock);
766
+ f2fs_down_write(&io->bio_list_lock);
827767 list_add_tail(&be->list, &io->bio_list);
828
- up_write(&io->bio_list_lock);
768
+ f2fs_up_write(&io->bio_list_lock);
829769 }
830770
831771 static void del_bio_entry(struct bio_entry *be)
....@@ -847,16 +787,17 @@
847787 struct list_head *head = &io->bio_list;
848788 struct bio_entry *be;
849789
850
- down_write(&io->bio_list_lock);
790
+ f2fs_down_write(&io->bio_list_lock);
851791 list_for_each_entry(be, head, list) {
852792 if (be->bio != *bio)
853793 continue;
854794
855795 found = true;
856796
857
- if (page_is_mergeable(sbi, *bio, *fio->last_block,
858
- fio->new_blkaddr) &&
859
- f2fs_crypt_mergeable_bio(*bio,
797
+ f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
798
+ *fio->last_block,
799
+ fio->new_blkaddr));
800
+ if (f2fs_crypt_mergeable_bio(*bio,
860801 fio->page->mapping->host,
861802 fio->page->index, fio) &&
862803 bio_add_page(*bio, page, PAGE_SIZE, 0) ==
....@@ -870,7 +811,7 @@
870811 __submit_bio(sbi, *bio, DATA);
871812 break;
872813 }
873
- up_write(&io->bio_list_lock);
814
+ f2fs_up_write(&io->bio_list_lock);
874815 }
875816
876817 if (ret) {
....@@ -888,6 +829,8 @@
888829 bool found = false;
889830 struct bio *target = bio ? *bio : NULL;
890831
832
+ f2fs_bug_on(sbi, !target && !page);
833
+
891834 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
892835 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
893836 struct list_head *head = &io->bio_list;
....@@ -896,7 +839,7 @@
896839 if (list_empty(head))
897840 continue;
898841
899
- down_read(&io->bio_list_lock);
842
+ f2fs_down_read(&io->bio_list_lock);
900843 list_for_each_entry(be, head, list) {
901844 if (target)
902845 found = (target == be->bio);
....@@ -906,14 +849,14 @@
906849 if (found)
907850 break;
908851 }
909
- up_read(&io->bio_list_lock);
852
+ f2fs_up_read(&io->bio_list_lock);
910853
911854 if (!found)
912855 continue;
913856
914857 found = false;
915858
916
- down_write(&io->bio_list_lock);
859
+ f2fs_down_write(&io->bio_list_lock);
917860 list_for_each_entry(be, head, list) {
918861 if (target)
919862 found = (target == be->bio);
....@@ -926,7 +869,7 @@
926869 break;
927870 }
928871 }
929
- up_write(&io->bio_list_lock);
872
+ f2fs_up_write(&io->bio_list_lock);
930873 }
931874
932875 if (found)
....@@ -948,15 +891,16 @@
948891 return -EFSCORRUPTED;
949892
950893 trace_f2fs_submit_page_bio(page, fio);
951
- f2fs_trace_ios(fio, 0);
952894
895
+ if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
896
+ fio->new_blkaddr))
897
+ f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
953898 alloc_new:
954899 if (!bio) {
955900 bio = __bio_alloc(fio, BIO_MAX_PAGES);
956
- f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
957
- fio->page->index, fio,
958
- GFP_NOIO);
959901 __attach_io_flag(fio);
902
+ f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
903
+ fio->page->index, fio, GFP_NOIO);
960904 bio_set_op_attrs(bio, fio->op, fio->op_flags);
961905
962906 add_bio_entry(fio->sbi, bio, page, fio->temp);
....@@ -966,7 +910,7 @@
966910 }
967911
968912 if (fio->io_wbc)
969
- wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
913
+ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
970914
971915 inc_page_count(fio->sbi, WB_DATA_TYPE(page));
972916
....@@ -985,7 +929,7 @@
985929
986930 f2fs_bug_on(sbi, is_read_io(fio->op));
987931
988
- down_write(&io->io_rwsem);
932
+ f2fs_down_write(&io->io_rwsem);
989933 next:
990934 if (fio->in_list) {
991935 spin_lock(&io->io_lock);
....@@ -1017,7 +961,7 @@
1017961 (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
1018962 fio->new_blkaddr) ||
1019963 !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
1020
- fio->page->index, fio)))
964
+ bio_page->index, fio)))
1021965 __submit_merged_bio(io);
1022966 alloc_new:
1023967 if (io->bio == NULL) {
....@@ -1030,8 +974,7 @@
1030974 }
1031975 io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
1032976 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
1033
- fio->page->index, fio,
1034
- GFP_NOIO);
977
+ bio_page->index, fio, GFP_NOIO);
1035978 io->fio = *fio;
1036979 }
1037980
....@@ -1041,10 +984,9 @@
1041984 }
1042985
1043986 if (fio->io_wbc)
1044
- wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
987
+ wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
1045988
1046989 io->last_block_in_bio = fio->new_blkaddr;
1047
- f2fs_trace_ios(fio, 0);
1048990
1049991 trace_f2fs_submit_page_write(fio->page, fio);
1050992 skip:
....@@ -1054,13 +996,7 @@
1054996 if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1055997 !f2fs_is_checkpoint_ready(sbi))
1056998 __submit_merged_bio(io);
1057
- up_write(&io->io_rwsem);
1058
-}
1059
-
1060
-static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
1061
-{
1062
- return fsverity_active(inode) &&
1063
- idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
999
+ f2fs_up_write(&io->io_rwsem);
10641000 }
10651001
10661002 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
....@@ -1072,8 +1008,9 @@
10721008 struct bio_post_read_ctx *ctx;
10731009 unsigned int post_read_steps = 0;
10741010
1075
- bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES),
1076
- for_write);
1011
+ bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
1012
+ min_t(int, nr_pages, BIO_MAX_PAGES),
1013
+ &f2fs_bioset);
10771014 if (!bio)
10781015 return ERR_PTR(-ENOMEM);
10791016
....@@ -1084,13 +1021,19 @@
10841021 bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
10851022
10861023 if (fscrypt_inode_uses_fs_layer_crypto(inode))
1087
- post_read_steps |= 1 << STEP_DECRYPT;
1088
- if (f2fs_compressed_file(inode))
1089
- post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
1090
- if (f2fs_need_verity(inode, first_idx))
1091
- post_read_steps |= 1 << STEP_VERITY;
1024
+ post_read_steps |= STEP_DECRYPT;
10921025
1093
- if (post_read_steps) {
1026
+ if (f2fs_need_verity(inode, first_idx))
1027
+ post_read_steps |= STEP_VERITY;
1028
+
1029
+ /*
1030
+ * STEP_DECOMPRESS is handled specially, since a compressed file might
1031
+ * contain both compressed and uncompressed clusters. We'll allocate a
1032
+ * bio_post_read_ctx if the file is compressed, but the caller is
1033
+ * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1034
+ */
1035
+
1036
+ if (post_read_steps || f2fs_compressed_file(inode)) {
10941037 /* Due to the mempool, this never fails. */
10951038 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
10961039 ctx->bio = bio;
....@@ -1102,21 +1045,15 @@
11021045 return bio;
11031046 }
11041047
1105
-static void f2fs_release_read_bio(struct bio *bio)
1106
-{
1107
- if (bio->bi_private)
1108
- mempool_free(bio->bi_private, bio_post_read_ctx_pool);
1109
- bio_put(bio);
1110
-}
1111
-
11121048 /* This can handle encryption stuffs */
11131049 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1114
- block_t blkaddr, bool for_write)
1050
+ block_t blkaddr, int op_flags, bool for_write)
11151051 {
11161052 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
11171053 struct bio *bio;
11181054
1119
- bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write);
1055
+ bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1056
+ page->index, for_write);
11201057 if (IS_ERR(bio))
11211058 return PTR_ERR(bio);
11221059
....@@ -1130,7 +1067,7 @@
11301067 ClearPageError(page);
11311068 inc_page_count(sbi, F2FS_RD_DATA);
11321069 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
1133
- __f2fs_submit_read_bio(sbi, bio, DATA);
1070
+ __submit_bio(sbi, bio, DATA);
11341071 return 0;
11351072 }
11361073
....@@ -1166,7 +1103,7 @@
11661103 {
11671104 dn->data_blkaddr = blkaddr;
11681105 f2fs_set_data_blkaddr(dn);
1169
- f2fs_update_extent_cache(dn);
1106
+ f2fs_update_read_extent_cache(dn);
11701107 }
11711108
11721109 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
....@@ -1190,6 +1127,7 @@
11901127
11911128 for (; count > 0; dn->ofs_in_node++) {
11921129 block_t blkaddr = f2fs_data_blkaddr(dn);
1130
+
11931131 if (blkaddr == NULL_ADDR) {
11941132 dn->data_blkaddr = NEW_ADDR;
11951133 __set_data_blkaddr(dn);
....@@ -1231,10 +1169,10 @@
12311169
12321170 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
12331171 {
1234
- struct extent_info ei = {0,0,0};
1172
+ struct extent_info ei = {0, };
12351173 struct inode *inode = dn->inode;
12361174
1237
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1175
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
12381176 dn->data_blkaddr = ei.blk + index - ei.fofs;
12391177 return 0;
12401178 }
....@@ -1248,14 +1186,14 @@
12481186 struct address_space *mapping = inode->i_mapping;
12491187 struct dnode_of_data dn;
12501188 struct page *page;
1251
- struct extent_info ei = {0,0,0};
1189
+ struct extent_info ei = {0, };
12521190 int err;
12531191
12541192 page = f2fs_grab_cache_page(mapping, index, for_write);
12551193 if (!page)
12561194 return ERR_PTR(-ENOMEM);
12571195
1258
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1196
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
12591197 dn.data_blkaddr = ei.blk + index - ei.fofs;
12601198 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
12611199 DATA_GENERIC_ENHANCE_READ)) {
....@@ -1303,7 +1241,8 @@
13031241 return page;
13041242 }
13051243
1306
- err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write);
1244
+ err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1245
+ op_flags, for_write);
13071246 if (err)
13081247 goto put_err;
13091248 return page;
....@@ -1437,7 +1376,7 @@
14371376 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
14381377 return -EPERM;
14391378
1440
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
1379
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
14411380 if (err)
14421381 return err;
14431382
....@@ -1452,10 +1391,12 @@
14521391 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
14531392 old_blkaddr = dn->data_blkaddr;
14541393 f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1455
- &sum, seg_type, NULL, false);
1456
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1394
+ &sum, seg_type, NULL);
1395
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
14571396 invalidate_mapping_pages(META_MAPPING(sbi),
14581397 old_blkaddr, old_blkaddr);
1398
+ f2fs_invalidate_compress_page(sbi, old_blkaddr);
1399
+ }
14591400 f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
14601401
14611402 /*
....@@ -1512,13 +1453,13 @@
15121453 return err;
15131454 }
15141455
1515
-void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1456
+void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
15161457 {
15171458 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
15181459 if (lock)
1519
- down_read(&sbi->node_change);
1460
+ f2fs_down_read(&sbi->node_change);
15201461 else
1521
- up_read(&sbi->node_change);
1462
+ f2fs_up_read(&sbi->node_change);
15221463 } else {
15231464 if (lock)
15241465 f2fs_lock_op(sbi);
....@@ -1543,7 +1484,7 @@
15431484 int err = 0, ofs = 1;
15441485 unsigned int ofs_in_node, last_ofs_in_node;
15451486 blkcnt_t prealloc;
1546
- struct extent_info ei = {0,0,0};
1487
+ struct extent_info ei = {0, };
15471488 block_t blkaddr;
15481489 unsigned int start_pgofs;
15491490
....@@ -1557,7 +1498,7 @@
15571498 pgofs = (pgoff_t)map->m_lblk;
15581499 end = pgofs + maxblocks;
15591500
1560
- if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1501
+ if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) {
15611502 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
15621503 map->m_may_create)
15631504 goto next_dnode;
....@@ -1577,7 +1518,7 @@
15771518
15781519 next_dnode:
15791520 if (map->m_may_create)
1580
- __do_map_lock(sbi, flag, true);
1521
+ f2fs_do_map_lock(sbi, flag, true);
15811522
15821523 /* When reading holes, we need its node page */
15831524 set_new_dnode(&dn, inode, NULL, NULL, 0);
....@@ -1585,7 +1526,21 @@
15851526 if (err) {
15861527 if (flag == F2FS_GET_BLOCK_BMAP)
15871528 map->m_pblk = 0;
1529
+
15881530 if (err == -ENOENT) {
1531
+ /*
1532
+ * There is one exceptional case that read_node_page()
1533
+ * may return -ENOENT due to filesystem has been
1534
+ * shutdown or cp_error, so force to convert error
1535
+ * number to EIO for such case.
1536
+ */
1537
+ if (map->m_may_create &&
1538
+ (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1539
+ f2fs_cp_error(sbi))) {
1540
+ err = -EIO;
1541
+ goto unlock_out;
1542
+ }
1543
+
15891544 err = 0;
15901545 if (map->m_next_pgofs)
15911546 *map->m_next_pgofs =
....@@ -1717,7 +1672,7 @@
17171672 if (map->m_flags & F2FS_MAP_MAPPED) {
17181673 unsigned int ofs = start_pgofs - map->m_lblk;
17191674
1720
- f2fs_update_extent_cache_range(&dn,
1675
+ f2fs_update_read_extent_cache_range(&dn,
17211676 start_pgofs, map->m_pblk + ofs,
17221677 map->m_len - ofs);
17231678 }
....@@ -1726,7 +1681,7 @@
17261681 f2fs_put_dnode(&dn);
17271682
17281683 if (map->m_may_create) {
1729
- __do_map_lock(sbi, flag, false);
1684
+ f2fs_do_map_lock(sbi, flag, false);
17301685 f2fs_balance_fs(sbi, dn.node_changed);
17311686 }
17321687 goto next_dnode;
....@@ -1742,7 +1697,7 @@
17421697 if (map->m_flags & F2FS_MAP_MAPPED) {
17431698 unsigned int ofs = start_pgofs - map->m_lblk;
17441699
1745
- f2fs_update_extent_cache_range(&dn,
1700
+ f2fs_update_read_extent_cache_range(&dn,
17461701 start_pgofs, map->m_pblk + ofs,
17471702 map->m_len - ofs);
17481703 }
....@@ -1752,7 +1707,7 @@
17521707 f2fs_put_dnode(&dn);
17531708 unlock_out:
17541709 if (map->m_may_create) {
1755
- __do_map_lock(sbi, flag, false);
1710
+ f2fs_do_map_lock(sbi, flag, false);
17561711 f2fs_balance_fs(sbi, dn.node_changed);
17571712 }
17581713 out:
....@@ -1786,6 +1741,16 @@
17861741 return true;
17871742 }
17881743
1744
+static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1745
+{
1746
+ return (bytes >> inode->i_blkbits);
1747
+}
1748
+
1749
+static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1750
+{
1751
+ return (blks << inode->i_blkbits);
1752
+}
1753
+
17891754 static int __get_data_block(struct inode *inode, sector_t iblock,
17901755 struct buffer_head *bh, int create, int flag,
17911756 pgoff_t *next_pgofs, int seg_type, bool may_write)
....@@ -1794,7 +1759,7 @@
17941759 int err;
17951760
17961761 map.m_lblk = iblock;
1797
- map.m_len = bh->b_size >> inode->i_blkbits;
1762
+ map.m_len = bytes_to_blks(inode, bh->b_size);
17981763 map.m_next_pgofs = next_pgofs;
17991764 map.m_next_extent = NULL;
18001765 map.m_seg_type = seg_type;
....@@ -1804,18 +1769,9 @@
18041769 if (!err) {
18051770 map_bh(bh, inode->i_sb, map.m_pblk);
18061771 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1807
- bh->b_size = (u64)map.m_len << inode->i_blkbits;
1772
+ bh->b_size = blks_to_bytes(inode, map.m_len);
18081773 }
18091774 return err;
1810
-}
1811
-
1812
-static int get_data_block(struct inode *inode, sector_t iblock,
1813
- struct buffer_head *bh_result, int create, int flag,
1814
- pgoff_t *next_pgofs)
1815
-{
1816
- return __get_data_block(inode, iblock, bh_result, create,
1817
- flag, next_pgofs,
1818
- NO_CHECK_TYPE, create);
18191775 }
18201776
18211777 static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
....@@ -1824,7 +1780,7 @@
18241780 return __get_data_block(inode, iblock, bh_result, create,
18251781 F2FS_GET_BLOCK_DIO, NULL,
18261782 f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1827
- IS_SWAPFILE(inode) ? false : true);
1783
+ true);
18281784 }
18291785
18301786 static int get_data_block_dio(struct inode *inode, sector_t iblock,
....@@ -1834,28 +1790,6 @@
18341790 F2FS_GET_BLOCK_DIO, NULL,
18351791 f2fs_rw_hint_to_seg_type(inode->i_write_hint),
18361792 false);
1837
-}
1838
-
1839
-static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1840
- struct buffer_head *bh_result, int create)
1841
-{
1842
- /* Block number less than F2FS MAX BLOCKS */
1843
- if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1844
- return -EFBIG;
1845
-
1846
- return __get_data_block(inode, iblock, bh_result, create,
1847
- F2FS_GET_BLOCK_BMAP, NULL,
1848
- NO_CHECK_TYPE, create);
1849
-}
1850
-
1851
-static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
1852
-{
1853
- return (offset >> inode->i_blkbits);
1854
-}
1855
-
1856
-static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
1857
-{
1858
- return (blk << inode->i_blkbits);
18591793 }
18601794
18611795 static int f2fs_xattr_fiemap(struct inode *inode,
....@@ -1877,13 +1811,13 @@
18771811 if (!page)
18781812 return -ENOMEM;
18791813
1880
- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
1814
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
18811815 if (err) {
18821816 f2fs_put_page(page, 1);
18831817 return err;
18841818 }
18851819
1886
- phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1820
+ phys = blks_to_bytes(inode, ni.blk_addr);
18871821 offset = offsetof(struct f2fs_inode, i_addr) +
18881822 sizeof(__le32) * (DEF_ADDRS_PER_INODE -
18891823 get_inline_xattr_addrs(inode));
....@@ -1899,6 +1833,7 @@
18991833 flags |= FIEMAP_EXTENT_LAST;
19001834
19011835 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1836
+ trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
19021837 if (err || err == 1)
19031838 return err;
19041839 }
....@@ -1908,13 +1843,13 @@
19081843 if (!page)
19091844 return -ENOMEM;
19101845
1911
- err = f2fs_get_node_info(sbi, xnid, &ni);
1846
+ err = f2fs_get_node_info(sbi, xnid, &ni, false);
19121847 if (err) {
19131848 f2fs_put_page(page, 1);
19141849 return err;
19151850 }
19161851
1917
- phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1852
+ phys = blks_to_bytes(inode, ni.blk_addr);
19181853 len = inode->i_sb->s_blocksize;
19191854
19201855 f2fs_put_page(page, 1);
....@@ -1922,8 +1857,10 @@
19221857 flags = FIEMAP_EXTENT_LAST;
19231858 }
19241859
1925
- if (phys)
1860
+ if (phys) {
19261861 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1862
+ trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1863
+ }
19271864
19281865 return (err < 0 ? err : 0);
19291866 }
....@@ -1950,14 +1887,16 @@
19501887 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
19511888 u64 start, u64 len)
19521889 {
1953
- struct buffer_head map_bh;
1890
+ struct f2fs_map_blocks map;
19541891 sector_t start_blk, last_blk;
19551892 pgoff_t next_pgofs;
19561893 u64 logical = 0, phys = 0, size = 0;
19571894 u32 flags = 0;
19581895 int ret = 0;
1959
- bool compr_cluster = false;
1896
+ bool compr_cluster = false, compr_appended;
19601897 unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1898
+ unsigned int count_in_cluster = 0;
1899
+ loff_t maxbytes;
19611900
19621901 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
19631902 ret = f2fs_precache_extents(inode);
....@@ -1965,11 +1904,20 @@
19651904 return ret;
19661905 }
19671906
1968
- ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1907
+ ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
19691908 if (ret)
19701909 return ret;
19711910
19721911 inode_lock(inode);
1912
+
1913
+ maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1914
+ if (start > maxbytes) {
1915
+ ret = -EFBIG;
1916
+ goto out;
1917
+ }
1918
+
1919
+ if (len > maxbytes || (maxbytes - len) < start)
1920
+ len = maxbytes - start;
19731921
19741922 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
19751923 ret = f2fs_xattr_fiemap(inode, fieinfo);
....@@ -1982,41 +1930,55 @@
19821930 goto out;
19831931 }
19841932
1985
- if (logical_to_blk(inode, len) == 0)
1986
- len = blk_to_logical(inode, 1);
1933
+ if (bytes_to_blks(inode, len) == 0)
1934
+ len = blks_to_bytes(inode, 1);
19871935
1988
- start_blk = logical_to_blk(inode, start);
1989
- last_blk = logical_to_blk(inode, start + len - 1);
1936
+ start_blk = bytes_to_blks(inode, start);
1937
+ last_blk = bytes_to_blks(inode, start + len - 1);
19901938
19911939 next:
1992
- memset(&map_bh, 0, sizeof(struct buffer_head));
1993
- map_bh.b_size = len;
1940
+ memset(&map, 0, sizeof(map));
1941
+ map.m_lblk = start_blk;
1942
+ map.m_len = bytes_to_blks(inode, len);
1943
+ map.m_next_pgofs = &next_pgofs;
1944
+ map.m_seg_type = NO_CHECK_TYPE;
19941945
1995
- if (compr_cluster)
1996
- map_bh.b_size = blk_to_logical(inode, cluster_size - 1);
1946
+ if (compr_cluster) {
1947
+ map.m_lblk += 1;
1948
+ map.m_len = cluster_size - count_in_cluster;
1949
+ }
19971950
1998
- ret = get_data_block(inode, start_blk, &map_bh, 0,
1999
- F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1951
+ ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
20001952 if (ret)
20011953 goto out;
20021954
20031955 /* HOLE */
2004
- if (!buffer_mapped(&map_bh)) {
1956
+ if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
20051957 start_blk = next_pgofs;
20061958
2007
- if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
1959
+ if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
20081960 max_inode_blocks(inode)))
20091961 goto prep_next;
20101962
20111963 flags |= FIEMAP_EXTENT_LAST;
20121964 }
20131965
1966
+ compr_appended = false;
1967
+ /* In a case of compressed cluster, append this to the last extent */
1968
+ if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
1969
+ !(map.m_flags & F2FS_MAP_FLAGS))) {
1970
+ compr_appended = true;
1971
+ goto skip_fill;
1972
+ }
1973
+
20141974 if (size) {
1975
+ flags |= FIEMAP_EXTENT_MERGED;
20151976 if (IS_ENCRYPTED(inode))
20161977 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
20171978
20181979 ret = fiemap_fill_next_extent(fieinfo, logical,
20191980 phys, size, flags);
1981
+ trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
20201982 if (ret)
20211983 goto out;
20221984 size = 0;
....@@ -2025,38 +1987,36 @@
20251987 if (start_blk > last_blk)
20261988 goto out;
20271989
2028
- if (compr_cluster) {
2029
- compr_cluster = false;
2030
-
2031
-
2032
- logical = blk_to_logical(inode, start_blk - 1);
2033
- phys = blk_to_logical(inode, map_bh.b_blocknr);
2034
- size = blk_to_logical(inode, cluster_size);
2035
-
2036
- flags |= FIEMAP_EXTENT_ENCODED;
2037
-
2038
- start_blk += cluster_size - 1;
2039
-
2040
- if (start_blk > last_blk)
2041
- goto out;
2042
-
2043
- goto prep_next;
2044
- }
2045
-
2046
- if (map_bh.b_blocknr == COMPRESS_ADDR) {
1990
+skip_fill:
1991
+ if (map.m_pblk == COMPRESS_ADDR) {
20471992 compr_cluster = true;
2048
- start_blk++;
2049
- goto prep_next;
1993
+ count_in_cluster = 1;
1994
+ } else if (compr_appended) {
1995
+ unsigned int appended_blks = cluster_size -
1996
+ count_in_cluster + 1;
1997
+ size += blks_to_bytes(inode, appended_blks);
1998
+ start_blk += appended_blks;
1999
+ compr_cluster = false;
2000
+ } else {
2001
+ logical = blks_to_bytes(inode, start_blk);
2002
+ phys = __is_valid_data_blkaddr(map.m_pblk) ?
2003
+ blks_to_bytes(inode, map.m_pblk) : 0;
2004
+ size = blks_to_bytes(inode, map.m_len);
2005
+ flags = 0;
2006
+
2007
+ if (compr_cluster) {
2008
+ flags = FIEMAP_EXTENT_ENCODED;
2009
+ count_in_cluster += map.m_len;
2010
+ if (count_in_cluster == cluster_size) {
2011
+ compr_cluster = false;
2012
+ size += blks_to_bytes(inode, 1);
2013
+ }
2014
+ } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
2015
+ flags = FIEMAP_EXTENT_UNWRITTEN;
2016
+ }
2017
+
2018
+ start_blk += bytes_to_blks(inode, size);
20502019 }
2051
-
2052
- logical = blk_to_logical(inode, start_blk);
2053
- phys = blk_to_logical(inode, map_bh.b_blocknr);
2054
- size = map_bh.b_size;
2055
- flags = 0;
2056
- if (buffer_unwritten(&map_bh))
2057
- flags = FIEMAP_EXTENT_UNWRITTEN;
2058
-
2059
- start_blk += logical_to_blk(inode, size);
20602020
20612021 prep_next:
20622022 cond_resched();
....@@ -2089,8 +2049,7 @@
20892049 bool is_readahead)
20902050 {
20912051 struct bio *bio = *bio_ret;
2092
- const unsigned blkbits = inode->i_blkbits;
2093
- const unsigned blocksize = 1 << blkbits;
2052
+ const unsigned blocksize = blks_to_bytes(inode, 1);
20942053 sector_t block_in_file;
20952054 sector_t last_block;
20962055 sector_t last_block_in_file;
....@@ -2099,8 +2058,8 @@
20992058
21002059 block_in_file = (sector_t)page_index(page);
21012060 last_block = block_in_file + nr_pages;
2102
- last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >>
2103
- blkbits;
2061
+ last_block_in_file = bytes_to_blks(inode,
2062
+ f2fs_readpage_limit(inode) + blocksize - 1);
21042063 if (last_block > last_block_in_file)
21052064 last_block = last_block_in_file;
21062065
....@@ -2163,7 +2122,7 @@
21632122 *last_block_in_bio, block_nr) ||
21642123 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
21652124 submit_and_realloc:
2166
- __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2125
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
21672126 bio = NULL;
21682127 }
21692128 if (bio == NULL) {
....@@ -2193,7 +2152,7 @@
21932152 goto out;
21942153 confused:
21952154 if (bio) {
2196
- __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2155
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
21972156 bio = NULL;
21982157 }
21992158 unlock_page(page);
....@@ -2213,16 +2172,17 @@
22132172 struct bio *bio = *bio_ret;
22142173 unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
22152174 sector_t last_block_in_file;
2216
- const unsigned blkbits = inode->i_blkbits;
2217
- const unsigned blocksize = 1 << blkbits;
2175
+ const unsigned blocksize = blks_to_bytes(inode, 1);
22182176 struct decompress_io_ctx *dic = NULL;
2177
+ struct extent_info ei = {};
2178
+ bool from_dnode = true;
22192179 int i;
22202180 int ret = 0;
22212181
22222182 f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
22232183
2224
- last_block_in_file = (f2fs_readpage_limit(inode) +
2225
- blocksize - 1) >> blkbits;
2184
+ last_block_in_file = bytes_to_blks(inode,
2185
+ f2fs_readpage_limit(inode) + blocksize - 1);
22262186
22272187 /* get rid of pages beyond EOF */
22282188 for (i = 0; i < cc->cluster_size; i++) {
....@@ -2238,6 +2198,8 @@
22382198 continue;
22392199 }
22402200 unlock_page(page);
2201
+ if (for_write)
2202
+ put_page(page);
22412203 cc->rpages[i] = NULL;
22422204 cc->nr_rpages--;
22432205 }
....@@ -2246,20 +2208,26 @@
22462208 if (f2fs_cluster_is_empty(cc))
22472209 goto out;
22482210
2211
+ if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
2212
+ from_dnode = false;
2213
+
2214
+ if (!from_dnode)
2215
+ goto skip_reading_dnode;
2216
+
22492217 set_new_dnode(&dn, inode, NULL, NULL, 0);
22502218 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
22512219 if (ret)
22522220 goto out;
22532221
2254
- /* cluster was overwritten as normal cluster */
2255
- if (dn.data_blkaddr != COMPRESS_ADDR)
2256
- goto out;
2222
+ f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
22572223
2224
+skip_reading_dnode:
22582225 for (i = 1; i < cc->cluster_size; i++) {
22592226 block_t blkaddr;
22602227
2261
- blkaddr = data_blkaddr(dn.inode, dn.node_page,
2262
- dn.ofs_in_node + i);
2228
+ blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2229
+ dn.ofs_in_node + i) :
2230
+ ei.blk + i - 1;
22632231
22642232 if (!__is_valid_data_blkaddr(blkaddr))
22652233 break;
....@@ -2269,6 +2237,9 @@
22692237 goto out_put_dnode;
22702238 }
22712239 cc->nr_cpages++;
2240
+
2241
+ if (!from_dnode && i >= ei.c_len)
2242
+ break;
22722243 }
22732244
22742245 /* nothing to decompress */
....@@ -2283,13 +2254,22 @@
22832254 goto out_put_dnode;
22842255 }
22852256
2286
- for (i = 0; i < dic->nr_cpages; i++) {
2257
+ for (i = 0; i < cc->nr_cpages; i++) {
22872258 struct page *page = dic->cpages[i];
22882259 block_t blkaddr;
22892260 struct bio_post_read_ctx *ctx;
22902261
2291
- blkaddr = data_blkaddr(dn.inode, dn.node_page,
2292
- dn.ofs_in_node + i + 1);
2262
+ blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2263
+ dn.ofs_in_node + i + 1) :
2264
+ ei.blk + i;
2265
+
2266
+ f2fs_wait_on_block_writeback(inode, blkaddr);
2267
+
2268
+ if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2269
+ if (atomic_dec_and_test(&dic->remaining_pages))
2270
+ f2fs_decompress_cluster(dic, true);
2271
+ continue;
2272
+ }
22932273
22942274 if (bio && (!page_is_mergeable(sbi, bio,
22952275 *last_block_in_bio, blkaddr) ||
....@@ -2305,29 +2285,19 @@
23052285 page->index, for_write);
23062286 if (IS_ERR(bio)) {
23072287 ret = PTR_ERR(bio);
2308
- dic->failed = true;
2309
- if (refcount_sub_and_test(dic->nr_cpages - i,
2310
- &dic->ref)) {
2311
- f2fs_decompress_end_io(dic->rpages,
2312
- cc->cluster_size, true,
2313
- false);
2314
- f2fs_free_dic(dic);
2315
- }
2288
+ f2fs_decompress_end_io(dic, ret, true);
23162289 f2fs_put_dnode(&dn);
23172290 *bio_ret = NULL;
23182291 return ret;
23192292 }
23202293 }
23212294
2322
- f2fs_wait_on_block_writeback(inode, blkaddr);
2323
-
23242295 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
23252296 goto submit_and_realloc;
23262297
2327
- /* tag STEP_DECOMPRESS to handle IO in wq */
23282298 ctx = bio->bi_private;
2329
- if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
2330
- ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
2299
+ ctx->enabled_steps |= STEP_DECOMPRESS;
2300
+ refcount_inc(&dic->refcnt);
23312301
23322302 inc_page_count(sbi, F2FS_RD_DATA);
23332303 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
....@@ -2336,15 +2306,23 @@
23362306 *last_block_in_bio = blkaddr;
23372307 }
23382308
2339
- f2fs_put_dnode(&dn);
2309
+ if (from_dnode)
2310
+ f2fs_put_dnode(&dn);
23402311
23412312 *bio_ret = bio;
23422313 return 0;
23432314
23442315 out_put_dnode:
2345
- f2fs_put_dnode(&dn);
2316
+ if (from_dnode)
2317
+ f2fs_put_dnode(&dn);
23462318 out:
2347
- f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false);
2319
+ for (i = 0; i < cc->cluster_size; i++) {
2320
+ if (cc->rpages[i]) {
2321
+ ClearPageUptodate(cc->rpages[i]);
2322
+ ClearPageError(cc->rpages[i]);
2323
+ unlock_page(cc->rpages[i]);
2324
+ }
2325
+ }
23482326 *bio_ret = bio;
23492327 return ret;
23502328 }
....@@ -2353,19 +2331,12 @@
23532331 /*
23542332 * This function was originally taken from fs/mpage.c, and customized for f2fs.
23552333 * Major change was from block_size == page_size in f2fs by default.
2356
- *
2357
- * Note that the aops->readpages() function is ONLY used for read-ahead. If
2358
- * this function ever deviates from doing just read-ahead, it should either
2359
- * use ->readpage() or do the necessary surgery to decouple ->readpages()
2360
- * from read-ahead.
23612334 */
2362
-int f2fs_mpage_readpages(struct address_space *mapping,
2363
- struct list_head *pages, struct page *page,
2364
- unsigned nr_pages, bool is_readahead)
2335
+static int f2fs_mpage_readpages(struct inode *inode,
2336
+ struct readahead_control *rac, struct page *page)
23652337 {
23662338 struct bio *bio = NULL;
23672339 sector_t last_block_in_bio = 0;
2368
- struct inode *inode = mapping->host;
23692340 struct f2fs_map_blocks map;
23702341 #ifdef CONFIG_F2FS_FS_COMPRESSION
23712342 struct compress_ctx cc = {
....@@ -2379,6 +2350,7 @@
23792350 .nr_cpages = 0,
23802351 };
23812352 #endif
2353
+ unsigned nr_pages = rac ? readahead_count(rac) : 1;
23822354 unsigned max_nr_pages = nr_pages;
23832355 int ret = 0;
23842356
....@@ -2392,15 +2364,9 @@
23922364 map.m_may_create = false;
23932365
23942366 for (; nr_pages; nr_pages--) {
2395
- if (pages) {
2396
- page = list_last_entry(pages, struct page, lru);
2397
-
2367
+ if (rac) {
2368
+ page = readahead_page(rac);
23982369 prefetchw(&page->flags);
2399
- list_del(&page->lru);
2400
- if (add_to_page_cache_lru(page, mapping,
2401
- page_index(page),
2402
- readahead_gfp_mask(mapping)))
2403
- goto next_page;
24042370 }
24052371
24062372 #ifdef CONFIG_F2FS_FS_COMPRESSION
....@@ -2410,8 +2376,8 @@
24102376 ret = f2fs_read_multi_pages(&cc, &bio,
24112377 max_nr_pages,
24122378 &last_block_in_bio,
2413
- is_readahead, false);
2414
- f2fs_destroy_compress_ctx(&cc);
2379
+ rac != NULL, false);
2380
+ f2fs_destroy_compress_ctx(&cc, false);
24152381 if (ret)
24162382 goto set_error_page;
24172383 }
....@@ -2433,7 +2399,7 @@
24332399 #endif
24342400
24352401 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2436
- &bio, &last_block_in_bio, is_readahead);
2402
+ &bio, &last_block_in_bio, rac);
24372403 if (ret) {
24382404 #ifdef CONFIG_F2FS_FS_COMPRESSION
24392405 set_error_page:
....@@ -2442,8 +2408,10 @@
24422408 zero_user_segment(page, 0, PAGE_SIZE);
24432409 unlock_page(page);
24442410 }
2411
+#ifdef CONFIG_F2FS_FS_COMPRESSION
24452412 next_page:
2446
- if (pages)
2413
+#endif
2414
+ if (rac)
24472415 put_page(page);
24482416
24492417 #ifdef CONFIG_F2FS_FS_COMPRESSION
....@@ -2453,16 +2421,15 @@
24532421 ret = f2fs_read_multi_pages(&cc, &bio,
24542422 max_nr_pages,
24552423 &last_block_in_bio,
2456
- is_readahead, false);
2457
- f2fs_destroy_compress_ctx(&cc);
2424
+ rac != NULL, false);
2425
+ f2fs_destroy_compress_ctx(&cc, false);
24582426 }
24592427 }
24602428 #endif
24612429 }
2462
- BUG_ON(pages && !list_empty(pages));
24632430 if (bio)
2464
- __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2465
- return pages ? 0 : ret;
2431
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
2432
+ return ret;
24662433 }
24672434
24682435 static int f2fs_read_data_page(struct file *file, struct page *page)
....@@ -2481,28 +2448,24 @@
24812448 if (f2fs_has_inline_data(inode))
24822449 ret = f2fs_read_inline_data(inode, page);
24832450 if (ret == -EAGAIN)
2484
- ret = f2fs_mpage_readpages(page_file_mapping(page),
2485
- NULL, page, 1, false);
2451
+ ret = f2fs_mpage_readpages(inode, NULL, page);
24862452 return ret;
24872453 }
24882454
2489
-static int f2fs_read_data_pages(struct file *file,
2490
- struct address_space *mapping,
2491
- struct list_head *pages, unsigned nr_pages)
2455
+static void f2fs_readahead(struct readahead_control *rac)
24922456 {
2493
- struct inode *inode = mapping->host;
2494
- struct page *page = list_last_entry(pages, struct page, lru);
2457
+ struct inode *inode = rac->mapping->host;
24952458
2496
- trace_f2fs_readpages(inode, page, nr_pages);
2459
+ trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
24972460
24982461 if (!f2fs_is_compress_backend_ready(inode))
2499
- return 0;
2462
+ return;
25002463
25012464 /* If the file has inline data, skip readpages */
25022465 if (f2fs_has_inline_data(inode))
2503
- return 0;
2466
+ return;
25042467
2505
- return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
2468
+ f2fs_mpage_readpages(inode, rac, NULL);
25062469 }
25072470
25082471 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
....@@ -2552,6 +2515,9 @@
25522515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
25532516 unsigned int policy = SM_I(sbi)->ipu_policy;
25542517
2518
+ if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
2519
+ is_inode_flag_set(inode, FI_OPU_WRITE))
2520
+ return false;
25552521 if (policy & (0x1 << F2FS_IPU_FORCE))
25562522 return true;
25572523 if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
....@@ -2586,11 +2552,15 @@
25862552
25872553 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
25882554 {
2555
+ /* swap file is migrating in aligned write mode */
2556
+ if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2557
+ return false;
2558
+
25892559 if (f2fs_is_pinned_file(inode))
25902560 return true;
25912561
25922562 /* if this is cold file, we should overwrite to avoid fragmentation */
2593
- if (file_is_cold(inode))
2563
+ if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
25942564 return true;
25952565
25962566 return check_inplace_update_policy(inode, fio);
....@@ -2613,10 +2583,18 @@
26132583 return true;
26142584 if (f2fs_is_atomic_file(inode))
26152585 return true;
2586
+
2587
+ /* swap file is migrating in aligned write mode */
2588
+ if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2589
+ return true;
2590
+
2591
+ if (is_inode_flag_set(inode, FI_OPU_WRITE))
2592
+ return true;
2593
+
26162594 if (fio) {
2617
- if (is_cold_data(fio->page))
2595
+ if (page_private_gcing(fio->page))
26182596 return true;
2619
- if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
2597
+ if (page_private_dummy(fio->page))
26202598 return true;
26212599 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
26222600 f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
....@@ -2640,14 +2618,14 @@
26402618 struct page *page = fio->page;
26412619 struct inode *inode = page->mapping->host;
26422620 struct dnode_of_data dn;
2643
- struct extent_info ei = {0,0,0};
2621
+ struct extent_info ei = {0, };
26442622 struct node_info ni;
26452623 bool ipu_force = false;
26462624 int err = 0;
26472625
26482626 set_new_dnode(&dn, inode, NULL, NULL, 0);
26492627 if (need_inplace_update(fio) &&
2650
- f2fs_lookup_extent_cache(inode, page->index, &ei)) {
2628
+ f2fs_lookup_read_extent_cache(inode, page->index, &ei)) {
26512629 fio->old_blkaddr = ei.blk + page->index - ei.fofs;
26522630
26532631 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
....@@ -2672,7 +2650,7 @@
26722650 /* This page is already truncated */
26732651 if (fio->old_blkaddr == NULL_ADDR) {
26742652 ClearPageUptodate(page);
2675
- clear_cold_data(page);
2653
+ clear_page_private_gcing(page);
26762654 goto out_writepage;
26772655 }
26782656 got_it:
....@@ -2719,7 +2697,7 @@
27192697 fio->need_lock = LOCK_REQ;
27202698 }
27212699
2722
- err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
2700
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
27232701 if (err)
27242702 goto out_writepage;
27252703
....@@ -2754,7 +2732,8 @@
27542732 sector_t *last_block,
27552733 struct writeback_control *wbc,
27562734 enum iostat_type io_type,
2757
- int compr_blocks)
2735
+ int compr_blocks,
2736
+ bool allow_balance)
27582737 {
27592738 struct inode *inode = page->mapping->host;
27602739 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
....@@ -2777,6 +2756,7 @@
27772756 .submitted = false,
27782757 .compr_blocks = compr_blocks,
27792758 .need_lock = LOCK_RETRY,
2759
+ .post_read = f2fs_post_read_required(inode),
27802760 .io_type = io_type,
27812761 .io_wbc = wbc,
27822762 .bio = bio,
....@@ -2792,7 +2772,8 @@
27922772 * don't drop any dirty dentry pages for keeping lastest
27932773 * directory structure.
27942774 */
2795
- if (S_ISDIR(inode->i_mode))
2775
+ if (S_ISDIR(inode->i_mode) &&
2776
+ !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
27962777 goto redirty_out;
27972778 goto out;
27982779 }
....@@ -2825,8 +2806,20 @@
28252806
28262807 /* Dentry/quota blocks are controlled by checkpoint */
28272808 if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
2809
+ /*
2810
+ * We need to wait for node_write to avoid block allocation during
2811
+ * checkpoint. This can only happen to quota writes which can cause
2812
+ * the below discard race condition.
2813
+ */
2814
+ if (IS_NOQUOTA(inode))
2815
+ f2fs_down_read(&sbi->node_write);
2816
+
28282817 fio.need_lock = LOCK_DONE;
28292818 err = f2fs_do_write_data_page(&fio);
2819
+
2820
+ if (IS_NOQUOTA(inode))
2821
+ f2fs_up_read(&sbi->node_write);
2822
+
28302823 goto done;
28312824 }
28322825
....@@ -2869,7 +2862,7 @@
28692862 inode_dec_dirty_pages(inode);
28702863 if (err) {
28712864 ClearPageUptodate(page);
2872
- clear_cold_data(page);
2865
+ clear_page_private_gcing(page);
28732866 }
28742867
28752868 if (wbc->for_reclaim) {
....@@ -2880,12 +2873,13 @@
28802873 }
28812874 unlock_page(page);
28822875 if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2883
- !F2FS_I(inode)->cp_task)
2876
+ !F2FS_I(inode)->wb_task && allow_balance)
28842877 f2fs_balance_fs(sbi, need_balance_fs);
28852878
28862879 if (unlikely(f2fs_cp_error(sbi))) {
28872880 f2fs_submit_merged_write(sbi, DATA);
2888
- f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2881
+ if (bio && *bio)
2882
+ f2fs_submit_merged_ipu_write(sbi, bio, NULL);
28892883 submitted = NULL;
28902884 }
28912885
....@@ -2927,7 +2921,7 @@
29272921 #endif
29282922
29292923 return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2930
- wbc, FS_DATA_IO, 0);
2924
+ wbc, FS_DATA_IO, 0, true);
29312925 }
29322926
29332927 /*
....@@ -2962,12 +2956,11 @@
29622956 };
29632957 #endif
29642958 int nr_pages;
2965
- pgoff_t uninitialized_var(writeback_index);
29662959 pgoff_t index;
29672960 pgoff_t end; /* Inclusive */
29682961 pgoff_t done_index;
29692962 int range_whole = 0;
2970
- int tag;
2963
+ xa_mark_t tag;
29712964 int nwritten = 0;
29722965 int submitted = 0;
29732966 int i;
....@@ -2981,8 +2974,7 @@
29812974 clear_inode_flag(mapping->host, FI_HOT_DATA);
29822975
29832976 if (wbc->range_cyclic) {
2984
- writeback_index = mapping->writeback_index; /* prev offset */
2985
- index = writeback_index;
2977
+ index = mapping->writeback_index; /* prev offset */
29862978 end = -1;
29872979 } else {
29882980 index = wbc->range_start >> PAGE_SHIFT;
....@@ -3097,7 +3089,8 @@
30973089 }
30983090 #endif
30993091 ret = f2fs_write_single_data_page(page, &submitted,
3100
- &bio, &last_block, wbc, io_type, 0);
3092
+ &bio, &last_block, wbc, io_type,
3093
+ 0, true);
31013094 if (ret == AOP_WRITEPAGE_ACTIVATE)
31023095 unlock_page(page);
31033096 #ifdef CONFIG_F2FS_FS_COMPRESSION
....@@ -3152,6 +3145,8 @@
31523145 retry = 0;
31533146 }
31543147 }
3148
+ if (f2fs_compressed_file(inode))
3149
+ f2fs_destroy_compress_ctx(&cc, false);
31553150 #endif
31563151 if (retry) {
31573152 index = 0;
....@@ -3177,7 +3172,7 @@
31773172 struct writeback_control *wbc)
31783173 {
31793174 /* to avoid deadlock in path of data flush */
3180
- if (F2FS_I(inode)->cp_task)
3175
+ if (F2FS_I(inode)->wb_task)
31813176 return false;
31823177
31833178 if (!S_ISREG(inode->i_mode))
....@@ -3185,7 +3180,7 @@
31853180 if (IS_NOQUOTA(inode))
31863181 return false;
31873182
3188
- if (f2fs_compressed_file(inode))
3183
+ if (f2fs_need_compress_data(inode))
31893184 return true;
31903185 if (wbc->sync_mode != WB_SYNC_ALL)
31913186 return true;
....@@ -3222,8 +3217,8 @@
32223217 f2fs_available_free_memory(sbi, DIRTY_DENTS))
32233218 goto skip_write;
32243219
3225
- /* skip writing during file defragment */
3226
- if (is_inode_flag_set(inode, FI_DO_DEFRAG))
3220
+ /* skip writing in file defragment preparing stage */
3221
+ if (is_inode_flag_set(inode, FI_SKIP_WRITES))
32273222 goto skip_write;
32283223
32293224 trace_f2fs_writepages(mapping->host, wbc, DATA);
....@@ -3231,8 +3226,12 @@
32313226 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
32323227 if (wbc->sync_mode == WB_SYNC_ALL)
32333228 atomic_inc(&sbi->wb_sync_req[DATA]);
3234
- else if (atomic_read(&sbi->wb_sync_req[DATA]))
3229
+ else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3230
+ /* to avoid potential deadlock */
3231
+ if (current->plug)
3232
+ blk_finish_plug(current->plug);
32353233 goto skip_write;
3234
+ }
32363235
32373236 if (__should_serialize_io(inode, wbc)) {
32383237 mutex_lock(&sbi->writepages);
....@@ -3282,14 +3281,14 @@
32823281
32833282 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
32843283 if (to > i_size && !f2fs_verity_in_progress(inode)) {
3285
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3286
- down_write(&F2FS_I(inode)->i_mmap_sem);
3284
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3285
+ f2fs_down_write(&F2FS_I(inode)->i_mmap_sem);
32873286
32883287 truncate_pagecache(inode, i_size);
32893288 f2fs_truncate_blocks(inode, i_size, true);
32903289
3291
- up_write(&F2FS_I(inode)->i_mmap_sem);
3292
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3290
+ f2fs_up_write(&F2FS_I(inode)->i_mmap_sem);
3291
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
32933292 }
32943293 }
32953294
....@@ -3302,7 +3301,7 @@
33023301 struct dnode_of_data dn;
33033302 struct page *ipage;
33043303 bool locked = false;
3305
- struct extent_info ei = {0,0,0};
3304
+ struct extent_info ei = {0, };
33063305 int err = 0;
33073306 int flag;
33083307
....@@ -3323,7 +3322,7 @@
33233322
33243323 if (f2fs_has_inline_data(inode) ||
33253324 (pos & PAGE_MASK) >= i_size_read(inode)) {
3326
- __do_map_lock(sbi, flag, true);
3325
+ f2fs_do_map_lock(sbi, flag, true);
33273326 locked = true;
33283327 }
33293328
....@@ -3342,7 +3341,7 @@
33423341 f2fs_do_read_inline_data(page, ipage);
33433342 set_inode_flag(inode, FI_DATA_EXIST);
33443343 if (inode->i_nlink)
3345
- set_inline_node(ipage);
3344
+ set_page_private_inline(ipage);
33463345 } else {
33473346 err = f2fs_convert_inline_page(&dn, page);
33483347 if (err)
....@@ -3353,14 +3352,14 @@
33533352 } else if (locked) {
33543353 err = f2fs_get_block(&dn, index);
33553354 } else {
3356
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3355
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
33573356 dn.data_blkaddr = ei.blk + index - ei.fofs;
33583357 } else {
33593358 /* hole case */
33603359 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
33613360 if (err || dn.data_blkaddr == NULL_ADDR) {
33623361 f2fs_put_dnode(&dn);
3363
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
3362
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
33643363 true);
33653364 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
33663365 locked = true;
....@@ -3376,7 +3375,7 @@
33763375 f2fs_put_dnode(&dn);
33773376 unlock_out:
33783377 if (locked)
3379
- __do_map_lock(sbi, flag, false);
3378
+ f2fs_do_map_lock(sbi, flag, false);
33803379 return err;
33813380 }
33823381
....@@ -3392,7 +3391,13 @@
33923391 block_t blkaddr = NULL_ADDR;
33933392 int err = 0;
33943393
3395
- if (trace_android_fs_datawrite_start_enabled()) {
3394
+ /*
3395
+ * Should avoid quota operations which can make deadlock:
3396
+ * kswapd -> f2fs_evict_inode -> dquot_drop ->
3397
+ * f2fs_dquot_commit -> f2fs_write_begin ->
3398
+ * d_obtain_alias -> __d_alloc -> kmem_cache_alloc(GFP_KERNEL)
3399
+ */
3400
+ if (trace_android_fs_datawrite_start_enabled() && !IS_NOQUOTA(inode)) {
33963401 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
33973402
33983403 path = android_fstrace_get_pathname(pathbuf,
....@@ -3433,6 +3438,9 @@
34333438 int ret;
34343439
34353440 *fsdata = NULL;
3441
+
3442
+ if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3443
+ goto repeat;
34363444
34373445 ret = f2fs_prepare_compress_overwrite(inode, pagep,
34383446 index, fsdata);
....@@ -3498,7 +3506,7 @@
34983506 err = -EFSCORRUPTED;
34993507 goto fail;
35003508 }
3501
- err = f2fs_submit_page_read(inode, page, blkaddr, true);
3509
+ err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
35023510 if (err)
35033511 goto fail;
35043512
....@@ -3549,6 +3557,10 @@
35493557 if (f2fs_compressed_file(inode) && fsdata) {
35503558 f2fs_compress_write_end(inode, fsdata, page->index, copied);
35513559 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3560
+
3561
+ if (pos + copied > i_size_read(inode) &&
3562
+ !f2fs_verity_in_progress(inode))
3563
+ f2fs_i_size_write(inode, pos + copied);
35523564 return copied;
35533565 }
35543566 #endif
....@@ -3600,7 +3612,7 @@
36003612 bio->bi_private = dio->orig_private;
36013613 bio->bi_end_io = dio->orig_end_io;
36023614
3603
- kvfree(dio);
3615
+ kfree(dio);
36043616
36053617 bio_endio(bio);
36063618 }
....@@ -3686,21 +3698,21 @@
36863698 iocb->ki_hint = WRITE_LIFE_NOT_SET;
36873699
36883700 if (iocb->ki_flags & IOCB_NOWAIT) {
3689
- if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
3701
+ if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[rw])) {
36903702 iocb->ki_hint = hint;
36913703 err = -EAGAIN;
36923704 goto out;
36933705 }
3694
- if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
3695
- up_read(&fi->i_gc_rwsem[rw]);
3706
+ if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
3707
+ f2fs_up_read(&fi->i_gc_rwsem[rw]);
36963708 iocb->ki_hint = hint;
36973709 err = -EAGAIN;
36983710 goto out;
36993711 }
37003712 } else {
3701
- down_read(&fi->i_gc_rwsem[rw]);
3713
+ f2fs_down_read(&fi->i_gc_rwsem[rw]);
37023714 if (do_opu)
3703
- down_read(&fi->i_gc_rwsem[READ]);
3715
+ f2fs_down_read(&fi->i_gc_rwsem[READ]);
37043716 }
37053717
37063718 err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
....@@ -3710,9 +3722,9 @@
37103722 DIO_SKIP_HOLES);
37113723
37123724 if (do_opu)
3713
- up_read(&fi->i_gc_rwsem[READ]);
3725
+ f2fs_up_read(&fi->i_gc_rwsem[READ]);
37143726
3715
- up_read(&fi->i_gc_rwsem[rw]);
3727
+ f2fs_up_read(&fi->i_gc_rwsem[rw]);
37163728
37173729 if (rw == WRITE) {
37183730 if (whint_mode == WHINT_MODE_OFF)
....@@ -3722,12 +3734,18 @@
37223734 err);
37233735 if (!do_opu)
37243736 set_inode_flag(inode, FI_UPDATE_WRITE);
3737
+ } else if (err == -EIOCBQUEUED) {
3738
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
3739
+ count - iov_iter_count(iter));
37253740 } else if (err < 0) {
37263741 f2fs_write_failed(mapping, offset + count);
37273742 }
37283743 } else {
37293744 if (err > 0)
37303745 f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
3746
+ else if (err == -EIOCBQUEUED)
3747
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
3748
+ count - iov_iter_count(iter));
37313749 }
37323750
37333751 out:
....@@ -3764,12 +3782,20 @@
37643782 }
37653783 }
37663784
3767
- clear_cold_data(page);
3785
+ clear_page_private_gcing(page);
37683786
3769
- if (IS_ATOMIC_WRITTEN_PAGE(page))
3787
+ if (test_opt(sbi, COMPRESS_CACHE)) {
3788
+ if (f2fs_compressed_file(inode))
3789
+ f2fs_invalidate_compress_pages(sbi, inode->i_ino);
3790
+ if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3791
+ clear_page_private_data(page);
3792
+ }
3793
+
3794
+ if (page_private_atomic(page))
37703795 return f2fs_drop_inmem_page(inode, page);
37713796
3772
- f2fs_clear_page_private(page);
3797
+ detach_page_private(page);
3798
+ set_page_private(page, 0);
37733799 }
37743800
37753801 int f2fs_release_page(struct page *page, gfp_t wait)
....@@ -3779,11 +3805,23 @@
37793805 return 0;
37803806
37813807 /* This is atomic written page, keep Private */
3782
- if (IS_ATOMIC_WRITTEN_PAGE(page))
3808
+ if (page_private_atomic(page))
37833809 return 0;
37843810
3785
- clear_cold_data(page);
3786
- f2fs_clear_page_private(page);
3811
+ if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
3812
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3813
+ struct inode *inode = page->mapping->host;
3814
+
3815
+ if (f2fs_compressed_file(inode))
3816
+ f2fs_invalidate_compress_pages(sbi, inode->i_ino);
3817
+ if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3818
+ clear_page_private_data(page);
3819
+ }
3820
+
3821
+ clear_page_private_gcing(page);
3822
+
3823
+ detach_page_private(page);
3824
+ set_page_private(page, 0);
37873825 return 1;
37883826 }
37893827
....@@ -3799,7 +3837,7 @@
37993837 return __set_page_dirty_nobuffers(page);
38003838
38013839 if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
3802
- if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
3840
+ if (!page_private_atomic(page)) {
38033841 f2fs_register_inmem_page(inode, page);
38043842 return 1;
38053843 }
....@@ -3841,10 +3879,9 @@
38413879 }
38423880
38433881 f2fs_put_dnode(&dn);
3844
-
38453882 return blknr;
38463883 #else
3847
- return -EOPNOTSUPP;
3884
+ return 0;
38483885 #endif
38493886 }
38503887
....@@ -3852,18 +3889,36 @@
38523889 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
38533890 {
38543891 struct inode *inode = mapping->host;
3892
+ sector_t blknr = 0;
38553893
38563894 if (f2fs_has_inline_data(inode))
3857
- return 0;
3895
+ goto out;
38583896
38593897 /* make sure allocating whole blocks */
38603898 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
38613899 filemap_write_and_wait(mapping);
38623900
3863
- if (f2fs_compressed_file(inode))
3864
- return f2fs_bmap_compress(inode, block);
3901
+ /* Block number less than F2FS MAX BLOCKS */
3902
+ if (unlikely(block >= max_file_blocks(inode)))
3903
+ goto out;
38653904
3866
- return generic_block_bmap(mapping, block, get_data_block_bmap);
3905
+ if (f2fs_compressed_file(inode)) {
3906
+ blknr = f2fs_bmap_compress(inode, block);
3907
+ } else {
3908
+ struct f2fs_map_blocks map;
3909
+
3910
+ memset(&map, 0, sizeof(map));
3911
+ map.m_lblk = block;
3912
+ map.m_len = 1;
3913
+ map.m_next_pgofs = NULL;
3914
+ map.m_seg_type = NO_CHECK_TYPE;
3915
+
3916
+ if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
3917
+ blknr = map.m_pblk;
3918
+ }
3919
+out:
3920
+ trace_f2fs_bmap(inode, block, blknr);
3921
+ return blknr;
38673922 }
38683923
38693924 #ifdef CONFIG_MIGRATION
....@@ -3874,7 +3929,7 @@
38743929 {
38753930 int rc, extra_count;
38763931 struct f2fs_inode_info *fi = F2FS_I(mapping->host);
3877
- bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
3932
+ bool atomic_written = page_private_atomic(page);
38783933
38793934 BUG_ON(PageWriteback(page));
38803935
....@@ -3889,7 +3944,7 @@
38893944 /* one extra reference was held for atomic_write page */
38903945 extra_count = atomic_written ? 1 : 0;
38913946 rc = migrate_page_move_mapping(mapping, newpage,
3892
- page, NULL, mode, extra_count);
3947
+ page, extra_count);
38933948 if (rc != MIGRATEPAGE_SUCCESS) {
38943949 if (atomic_written)
38953950 mutex_unlock(&fi->inmem_lock);
....@@ -3898,6 +3953,7 @@
38983953
38993954 if (atomic_written) {
39003955 struct inmem_pages *cur;
3956
+
39013957 list_for_each_entry(cur, &fi->inmem_pages, list)
39023958 if (cur->page == page) {
39033959 cur->page = newpage;
....@@ -3908,9 +3964,16 @@
39083964 get_page(newpage);
39093965 }
39103966
3967
+ /* guarantee to start from no stale private field */
3968
+ set_page_private(newpage, 0);
39113969 if (PagePrivate(page)) {
3912
- f2fs_set_page_private(newpage, page_private(page));
3913
- f2fs_clear_page_private(page);
3970
+ set_page_private(newpage, page_private(page));
3971
+ SetPagePrivate(newpage);
3972
+ get_page(newpage);
3973
+
3974
+ set_page_private(page, 0);
3975
+ ClearPagePrivate(page);
3976
+ put_page(page);
39143977 }
39153978
39163979 if (mode != MIGRATE_SYNC_NO_COPY)
....@@ -3923,97 +3986,172 @@
39233986 #endif
39243987
39253988 #ifdef CONFIG_SWAP
3926
-/* Copied from generic_swapfile_activate() to check any holes */
3989
+static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3990
+ unsigned int blkcnt)
3991
+{
3992
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3993
+ unsigned int blkofs;
3994
+ unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3995
+ unsigned int secidx = start_blk / blk_per_sec;
3996
+ unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3997
+ int ret = 0;
3998
+
3999
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4000
+ f2fs_down_write(&F2FS_I(inode)->i_mmap_sem);
4001
+
4002
+ set_inode_flag(inode, FI_ALIGNED_WRITE);
4003
+ set_inode_flag(inode, FI_OPU_WRITE);
4004
+
4005
+ for (; secidx < end_sec; secidx++) {
4006
+ f2fs_down_write(&sbi->pin_sem);
4007
+
4008
+ f2fs_lock_op(sbi);
4009
+ f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
4010
+ f2fs_unlock_op(sbi);
4011
+
4012
+ set_inode_flag(inode, FI_SKIP_WRITES);
4013
+
4014
+ for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
4015
+ struct page *page;
4016
+ unsigned int blkidx = secidx * blk_per_sec + blkofs;
4017
+
4018
+ page = f2fs_get_lock_data_page(inode, blkidx, true);
4019
+ if (IS_ERR(page)) {
4020
+ f2fs_up_write(&sbi->pin_sem);
4021
+ ret = PTR_ERR(page);
4022
+ goto done;
4023
+ }
4024
+
4025
+ set_page_dirty(page);
4026
+ f2fs_put_page(page, 1);
4027
+ }
4028
+
4029
+ clear_inode_flag(inode, FI_SKIP_WRITES);
4030
+
4031
+ ret = filemap_fdatawrite(inode->i_mapping);
4032
+
4033
+ f2fs_up_write(&sbi->pin_sem);
4034
+
4035
+ if (ret)
4036
+ break;
4037
+ }
4038
+
4039
+done:
4040
+ clear_inode_flag(inode, FI_SKIP_WRITES);
4041
+ clear_inode_flag(inode, FI_OPU_WRITE);
4042
+ clear_inode_flag(inode, FI_ALIGNED_WRITE);
4043
+
4044
+ f2fs_up_write(&F2FS_I(inode)->i_mmap_sem);
4045
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4046
+
4047
+ return ret;
4048
+}
4049
+
39274050 static int check_swap_activate(struct swap_info_struct *sis,
39284051 struct file *swap_file, sector_t *span)
39294052 {
39304053 struct address_space *mapping = swap_file->f_mapping;
39314054 struct inode *inode = mapping->host;
3932
- unsigned blocks_per_page;
3933
- unsigned long page_no;
3934
- unsigned blkbits;
3935
- sector_t probe_block;
3936
- sector_t last_block;
3937
- sector_t lowest_block = -1;
3938
- sector_t highest_block = 0;
4055
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4056
+ sector_t cur_lblock;
4057
+ sector_t last_lblock;
4058
+ sector_t pblock;
4059
+ sector_t lowest_pblock = -1;
4060
+ sector_t highest_pblock = 0;
39394061 int nr_extents = 0;
3940
- int ret;
3941
-
3942
- blkbits = inode->i_blkbits;
3943
- blocks_per_page = PAGE_SIZE >> blkbits;
4062
+ unsigned long nr_pblocks;
4063
+ unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
4064
+ unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
4065
+ unsigned int not_aligned = 0;
4066
+ int ret = 0;
39444067
39454068 /*
39464069 * Map all the blocks into the extent list. This code doesn't try
39474070 * to be very smart.
39484071 */
3949
- probe_block = 0;
3950
- page_no = 0;
3951
- last_block = i_size_read(inode) >> blkbits;
3952
- while ((probe_block + blocks_per_page) <= last_block &&
3953
- page_no < sis->max) {
3954
- unsigned block_in_page;
3955
- sector_t first_block;
4072
+ cur_lblock = 0;
4073
+ last_lblock = bytes_to_blks(inode, i_size_read(inode));
39564074
4075
+ while (cur_lblock < last_lblock && cur_lblock < sis->max) {
4076
+ struct f2fs_map_blocks map;
4077
+retry:
39574078 cond_resched();
39584079
3959
- first_block = bmap(inode, probe_block);
3960
- if (first_block == 0)
3961
- goto bad_bmap;
4080
+ memset(&map, 0, sizeof(map));
4081
+ map.m_lblk = cur_lblock;
4082
+ map.m_len = last_lblock - cur_lblock;
4083
+ map.m_next_pgofs = NULL;
4084
+ map.m_next_extent = NULL;
4085
+ map.m_seg_type = NO_CHECK_TYPE;
4086
+ map.m_may_create = false;
39624087
3963
- /*
3964
- * It must be PAGE_SIZE aligned on-disk
3965
- */
3966
- if (first_block & (blocks_per_page - 1)) {
3967
- probe_block++;
3968
- goto reprobe;
4088
+ ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
4089
+ if (ret)
4090
+ goto out;
4091
+
4092
+ /* hole */
4093
+ if (!(map.m_flags & F2FS_MAP_FLAGS)) {
4094
+ f2fs_err(sbi, "Swapfile has holes");
4095
+ ret = -EINVAL;
4096
+ goto out;
39694097 }
39704098
3971
- for (block_in_page = 1; block_in_page < blocks_per_page;
3972
- block_in_page++) {
3973
- sector_t block;
4099
+ pblock = map.m_pblk;
4100
+ nr_pblocks = map.m_len;
39744101
3975
- block = bmap(inode, probe_block + block_in_page);
3976
- if (block == 0)
3977
- goto bad_bmap;
3978
- if (block != first_block + block_in_page) {
3979
- /* Discontiguity */
3980
- probe_block++;
3981
- goto reprobe;
4102
+ if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
4103
+ nr_pblocks & sec_blks_mask) {
4104
+ not_aligned++;
4105
+
4106
+ nr_pblocks = roundup(nr_pblocks, blks_per_sec);
4107
+ if (cur_lblock + nr_pblocks > sis->max)
4108
+ nr_pblocks -= blks_per_sec;
4109
+
4110
+ if (!nr_pblocks) {
4111
+ /* this extent is last one */
4112
+ nr_pblocks = map.m_len;
4113
+ f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
4114
+ goto next;
39824115 }
3983
- }
39844116
3985
- first_block >>= (PAGE_SHIFT - blkbits);
3986
- if (page_no) { /* exclude the header page */
3987
- if (first_block < lowest_block)
3988
- lowest_block = first_block;
3989
- if (first_block > highest_block)
3990
- highest_block = first_block;
4117
+ ret = f2fs_migrate_blocks(inode, cur_lblock,
4118
+ nr_pblocks);
4119
+ if (ret)
4120
+ goto out;
4121
+ goto retry;
4122
+ }
4123
+next:
4124
+ if (cur_lblock + nr_pblocks >= sis->max)
4125
+ nr_pblocks = sis->max - cur_lblock;
4126
+
4127
+ if (cur_lblock) { /* exclude the header page */
4128
+ if (pblock < lowest_pblock)
4129
+ lowest_pblock = pblock;
4130
+ if (pblock + nr_pblocks - 1 > highest_pblock)
4131
+ highest_pblock = pblock + nr_pblocks - 1;
39914132 }
39924133
39934134 /*
39944135 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
39954136 */
3996
- ret = add_swap_extent(sis, page_no, 1, first_block);
4137
+ ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
39974138 if (ret < 0)
39984139 goto out;
39994140 nr_extents += ret;
4000
- page_no++;
4001
- probe_block += blocks_per_page;
4002
-reprobe:
4003
- continue;
4141
+ cur_lblock += nr_pblocks;
40044142 }
40054143 ret = nr_extents;
4006
- *span = 1 + highest_block - lowest_block;
4007
- if (page_no == 0)
4008
- page_no = 1; /* force Empty message */
4009
- sis->max = page_no;
4010
- sis->pages = page_no - 1;
4011
- sis->highest_bit = page_no - 1;
4144
+ *span = 1 + highest_pblock - lowest_pblock;
4145
+ if (cur_lblock == 0)
4146
+ cur_lblock = 1; /* force Empty message */
4147
+ sis->max = cur_lblock;
4148
+ sis->pages = cur_lblock - 1;
4149
+ sis->highest_bit = cur_lblock - 1;
40124150 out:
4151
+ if (not_aligned)
4152
+ f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
4153
+ not_aligned, blks_per_sec * F2FS_BLKSIZE);
40134154 return ret;
4014
-bad_bmap:
4015
- pr_err("swapon: swapfile has holes\n");
4016
- return -EINVAL;
40174155 }
40184156
40194157 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
....@@ -4028,19 +4166,26 @@
40284166 if (f2fs_readonly(F2FS_I_SB(inode)->sb))
40294167 return -EROFS;
40304168
4169
+ if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
4170
+ f2fs_err(F2FS_I_SB(inode),
4171
+ "Swapfile not supported in LFS mode");
4172
+ return -EINVAL;
4173
+ }
4174
+
40314175 ret = f2fs_convert_inline_inode(inode);
40324176 if (ret)
40334177 return ret;
40344178
4035
- if (f2fs_disable_compressed_file(inode))
4179
+ if (!f2fs_disable_compressed_file(inode))
40364180 return -EINVAL;
4181
+
4182
+ f2fs_precache_extents(inode);
40374183
40384184 ret = check_swap_activate(sis, file, span);
40394185 if (ret < 0)
40404186 return ret;
40414187
40424188 set_inode_flag(inode, FI_PIN_FILE);
4043
- f2fs_precache_extents(inode);
40444189 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
40454190 return ret;
40464191 }
....@@ -4065,7 +4210,7 @@
40654210
40664211 const struct address_space_operations f2fs_dblock_aops = {
40674212 .readpage = f2fs_read_data_page,
4068
- .readpages = f2fs_read_data_pages,
4213
+ .readahead = f2fs_readahead,
40694214 .writepage = f2fs_write_data_page,
40704215 .writepages = f2fs_write_data_pages,
40714216 .write_begin = f2fs_write_begin,
....@@ -4082,13 +4227,13 @@
40824227 #endif
40834228 };
40844229
4085
-void f2fs_clear_radix_tree_dirty_tag(struct page *page)
4230
+void f2fs_clear_page_cache_dirty_tag(struct page *page)
40864231 {
40874232 struct address_space *mapping = page_mapping(page);
40884233 unsigned long flags;
40894234
40904235 xa_lock_irqsave(&mapping->i_pages, flags);
4091
- radix_tree_tag_clear(&mapping->i_pages, page_index(page),
4236
+ __xa_clear_mark(&mapping->i_pages, page_index(page),
40924237 PAGECACHE_TAG_DIRTY);
40934238 xa_unlock_irqrestore(&mapping->i_pages, flags);
40944239 }