hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/f2fs/node.c
....@@ -17,7 +17,6 @@
1717 #include "node.h"
1818 #include "segment.h"
1919 #include "xattr.h"
20
-#include "trace.h"
2120 #include <trace/events/f2fs.h>
2221
2322 #define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
....@@ -44,10 +43,14 @@
4443 bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
4544 {
4645 struct f2fs_nm_info *nm_i = NM_I(sbi);
46
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
4747 struct sysinfo val;
4848 unsigned long avail_ram;
4949 unsigned long mem_size = 0;
5050 bool res = false;
51
+
52
+ if (!nm_i)
53
+ return true;
5154
5255 si_meminfo(&val);
5356
....@@ -55,15 +58,15 @@
5558 avail_ram = val.totalram - val.totalhigh;
5659
5760 /*
58
- * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
61
+ * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively
5962 */
6063 if (type == FREE_NIDS) {
6164 mem_size = (nm_i->nid_cnt[FREE_NID] *
6265 sizeof(struct free_nid)) >> PAGE_SHIFT;
6366 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
6467 } else if (type == NAT_ENTRIES) {
65
- mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
66
- PAGE_SHIFT;
68
+ mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
69
+ sizeof(struct nat_entry)) >> PAGE_SHIFT;
6770 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
6871 if (excess_cached_nats(sbi))
6972 res = false;
....@@ -80,16 +83,38 @@
8083 sizeof(struct ino_entry);
8184 mem_size >>= PAGE_SHIFT;
8285 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
83
- } else if (type == EXTENT_CACHE) {
84
- mem_size = (atomic_read(&sbi->total_ext_tree) *
86
+ } else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) {
87
+ enum extent_type etype = type == READ_EXTENT_CACHE ?
88
+ EX_READ : EX_BLOCK_AGE;
89
+ struct extent_tree_info *eti = &sbi->extent_tree[etype];
90
+
91
+ mem_size = (atomic_read(&eti->total_ext_tree) *
8592 sizeof(struct extent_tree) +
86
- atomic_read(&sbi->total_ext_node) *
93
+ atomic_read(&eti->total_ext_node) *
8794 sizeof(struct extent_node)) >> PAGE_SHIFT;
88
- res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
95
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
8996 } else if (type == INMEM_PAGES) {
9097 /* it allows 20% / total_ram for inmemory pages */
9198 mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
9299 res = mem_size < (val.totalram / 5);
100
+ } else if (type == DISCARD_CACHE) {
101
+ mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
102
+ sizeof(struct discard_cmd)) >> PAGE_SHIFT;
103
+ res = mem_size < (avail_ram * nm_i->ram_thresh / 100);
104
+ } else if (type == COMPRESS_PAGE) {
105
+#ifdef CONFIG_F2FS_FS_COMPRESSION
106
+ unsigned long free_ram = val.freeram;
107
+
108
+ /*
109
+ * free memory is lower than watermark or cached page count
110
+ * exceed threshold, deny caching compress page.
111
+ */
112
+ res = (free_ram > avail_ram * sbi->compress_watermark / 100) &&
113
+ (COMPRESS_MAPPING(sbi)->nrpages <
114
+ free_ram * sbi->compress_percent / 100);
115
+#else
116
+ res = false;
117
+#endif
93118 } else {
94119 if (!sbi->sb->s_bdi->wb.dirty_exceeded)
95120 return true;
....@@ -100,7 +125,7 @@
100125 static void clear_node_page_dirty(struct page *page)
101126 {
102127 if (PageDirty(page)) {
103
- f2fs_clear_radix_tree_dirty_tag(page);
128
+ f2fs_clear_page_cache_dirty_tag(page);
104129 clear_page_dirty_for_io(page);
105130 dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
106131 }
....@@ -109,7 +134,7 @@
109134
110135 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
111136 {
112
- return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
137
+ return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid));
113138 }
114139
115140 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
....@@ -177,7 +202,8 @@
177202 list_add_tail(&ne->list, &nm_i->nat_entries);
178203 spin_unlock(&nm_i->nat_list_lock);
179204
180
- nm_i->nat_cnt++;
205
+ nm_i->nat_cnt[TOTAL_NAT]++;
206
+ nm_i->nat_cnt[RECLAIMABLE_NAT]++;
181207 return ne;
182208 }
183209
....@@ -207,7 +233,8 @@
207233 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
208234 {
209235 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
210
- nm_i->nat_cnt--;
236
+ nm_i->nat_cnt[TOTAL_NAT]--;
237
+ nm_i->nat_cnt[RECLAIMABLE_NAT]--;
211238 __free_nat_entry(e);
212239 }
213240
....@@ -253,7 +280,8 @@
253280 if (get_nat_flag(ne, IS_DIRTY))
254281 goto refresh_list;
255282
256
- nm_i->dirty_nat_cnt++;
283
+ nm_i->nat_cnt[DIRTY_NAT]++;
284
+ nm_i->nat_cnt[RECLAIMABLE_NAT]--;
257285 set_nat_flag(ne, IS_DIRTY, true);
258286 refresh_list:
259287 spin_lock(&nm_i->nat_list_lock);
....@@ -273,7 +301,8 @@
273301
274302 set_nat_flag(ne, IS_DIRTY, false);
275303 set->entry_cnt--;
276
- nm_i->dirty_nat_cnt--;
304
+ nm_i->nat_cnt[DIRTY_NAT]--;
305
+ nm_i->nat_cnt[RECLAIMABLE_NAT]++;
277306 }
278307
279308 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
....@@ -355,14 +384,14 @@
355384 struct nat_entry *e;
356385 bool need = false;
357386
358
- down_read(&nm_i->nat_tree_lock);
387
+ f2fs_down_read(&nm_i->nat_tree_lock);
359388 e = __lookup_nat_cache(nm_i, nid);
360389 if (e) {
361390 if (!get_nat_flag(e, IS_CHECKPOINTED) &&
362391 !get_nat_flag(e, HAS_FSYNCED_INODE))
363392 need = true;
364393 }
365
- up_read(&nm_i->nat_tree_lock);
394
+ f2fs_up_read(&nm_i->nat_tree_lock);
366395 return need;
367396 }
368397
....@@ -372,11 +401,11 @@
372401 struct nat_entry *e;
373402 bool is_cp = true;
374403
375
- down_read(&nm_i->nat_tree_lock);
404
+ f2fs_down_read(&nm_i->nat_tree_lock);
376405 e = __lookup_nat_cache(nm_i, nid);
377406 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
378407 is_cp = false;
379
- up_read(&nm_i->nat_tree_lock);
408
+ f2fs_up_read(&nm_i->nat_tree_lock);
380409 return is_cp;
381410 }
382411
....@@ -386,13 +415,13 @@
386415 struct nat_entry *e;
387416 bool need_update = true;
388417
389
- down_read(&nm_i->nat_tree_lock);
418
+ f2fs_down_read(&nm_i->nat_tree_lock);
390419 e = __lookup_nat_cache(nm_i, ino);
391420 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
392421 (get_nat_flag(e, IS_CHECKPOINTED) ||
393422 get_nat_flag(e, HAS_FSYNCED_INODE)))
394423 need_update = false;
395
- up_read(&nm_i->nat_tree_lock);
424
+ f2fs_up_read(&nm_i->nat_tree_lock);
396425 return need_update;
397426 }
398427
....@@ -403,11 +432,15 @@
403432 struct f2fs_nm_info *nm_i = NM_I(sbi);
404433 struct nat_entry *new, *e;
405434
435
+ /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
436
+ if (f2fs_rwsem_is_locked(&sbi->cp_global_sem))
437
+ return;
438
+
406439 new = __alloc_nat_entry(nid, false);
407440 if (!new)
408441 return;
409442
410
- down_write(&nm_i->nat_tree_lock);
443
+ f2fs_down_write(&nm_i->nat_tree_lock);
411444 e = __lookup_nat_cache(nm_i, nid);
412445 if (!e)
413446 e = __init_nat_entry(nm_i, new, ne, false);
....@@ -416,7 +449,7 @@
416449 nat_get_blkaddr(e) !=
417450 le32_to_cpu(ne->block_addr) ||
418451 nat_get_version(e) != ne->version);
419
- up_write(&nm_i->nat_tree_lock);
452
+ f2fs_up_write(&nm_i->nat_tree_lock);
420453 if (e != new)
421454 __free_nat_entry(new);
422455 }
....@@ -428,7 +461,7 @@
428461 struct nat_entry *e;
429462 struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
430463
431
- down_write(&nm_i->nat_tree_lock);
464
+ f2fs_down_write(&nm_i->nat_tree_lock);
432465 e = __lookup_nat_cache(nm_i, ni->nid);
433466 if (!e) {
434467 e = __init_nat_entry(nm_i, new, NULL, true);
....@@ -459,6 +492,7 @@
459492 /* increment version no as node is removed */
460493 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
461494 unsigned char version = nat_get_version(e);
495
+
462496 nat_set_version(e, inc_node_version(version));
463497 }
464498
....@@ -476,7 +510,7 @@
476510 set_nat_flag(e, HAS_FSYNCED_INODE, true);
477511 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
478512 }
479
- up_write(&nm_i->nat_tree_lock);
513
+ f2fs_up_write(&nm_i->nat_tree_lock);
480514 }
481515
482516 int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
....@@ -484,7 +518,7 @@
484518 struct f2fs_nm_info *nm_i = NM_I(sbi);
485519 int nr = nr_shrink;
486520
487
- if (!down_write_trylock(&nm_i->nat_tree_lock))
521
+ if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock))
488522 return 0;
489523
490524 spin_lock(&nm_i->nat_list_lock);
....@@ -506,12 +540,12 @@
506540 }
507541 spin_unlock(&nm_i->nat_list_lock);
508542
509
- up_write(&nm_i->nat_tree_lock);
543
+ f2fs_up_write(&nm_i->nat_tree_lock);
510544 return nr - nr_shrink;
511545 }
512546
513547 int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
514
- struct node_info *ni)
548
+ struct node_info *ni, bool checkpoint_context)
515549 {
516550 struct f2fs_nm_info *nm_i = NM_I(sbi);
517551 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
....@@ -526,36 +560,46 @@
526560 int i;
527561
528562 ni->nid = nid;
529
-
563
+retry:
530564 /* Check nat cache */
531
- down_read(&nm_i->nat_tree_lock);
565
+ f2fs_down_read(&nm_i->nat_tree_lock);
532566 e = __lookup_nat_cache(nm_i, nid);
533567 if (e) {
534568 ni->ino = nat_get_ino(e);
535569 ni->blk_addr = nat_get_blkaddr(e);
536570 ni->version = nat_get_version(e);
537
- up_read(&nm_i->nat_tree_lock);
571
+ f2fs_up_read(&nm_i->nat_tree_lock);
538572 return 0;
539573 }
540574
541
- memset(&ne, 0, sizeof(struct f2fs_nat_entry));
575
+ /*
576
+ * Check current segment summary by trying to grab journal_rwsem first.
577
+ * This sem is on the critical path on the checkpoint requiring the above
578
+ * nat_tree_lock. Therefore, we should retry, if we failed to grab here
579
+ * while not bothering checkpoint.
580
+ */
581
+ if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
582
+ down_read(&curseg->journal_rwsem);
583
+ } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) ||
584
+ !down_read_trylock(&curseg->journal_rwsem)) {
585
+ f2fs_up_read(&nm_i->nat_tree_lock);
586
+ goto retry;
587
+ }
542588
543
- /* Check current segment summary */
544
- down_read(&curseg->journal_rwsem);
545589 i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
546590 if (i >= 0) {
547591 ne = nat_in_journal(journal, i);
548592 node_info_from_raw_nat(ni, &ne);
549593 }
550
- up_read(&curseg->journal_rwsem);
594
+ up_read(&curseg->journal_rwsem);
551595 if (i >= 0) {
552
- up_read(&nm_i->nat_tree_lock);
596
+ f2fs_up_read(&nm_i->nat_tree_lock);
553597 goto cache;
554598 }
555599
556600 /* Fill node_info from nat page */
557601 index = current_nat_addr(sbi, nid);
558
- up_read(&nm_i->nat_tree_lock);
602
+ f2fs_up_read(&nm_i->nat_tree_lock);
559603
560604 page = f2fs_get_meta_page(sbi, index);
561605 if (IS_ERR(page))
....@@ -618,8 +662,10 @@
618662 switch (dn->max_level) {
619663 case 3:
620664 base += 2 * indirect_blks;
665
+ fallthrough;
621666 case 2:
622667 base += 2 * direct_blks;
668
+ fallthrough;
623669 case 1:
624670 base += direct_index;
625671 break;
....@@ -804,6 +850,26 @@
804850 dn->ofs_in_node = offset[level];
805851 dn->node_page = npage[level];
806852 dn->data_blkaddr = f2fs_data_blkaddr(dn);
853
+
854
+ if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
855
+ f2fs_sb_has_readonly(sbi)) {
856
+ unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn);
857
+ block_t blkaddr;
858
+
859
+ if (!c_len)
860
+ goto out;
861
+
862
+ blkaddr = f2fs_data_blkaddr(dn);
863
+ if (blkaddr == COMPRESS_ADDR)
864
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
865
+ dn->ofs_in_node + 1);
866
+
867
+ f2fs_update_read_extent_tree_range_compressed(dn->inode,
868
+ index, blkaddr,
869
+ F2FS_I(dn->inode)->i_cluster_size,
870
+ c_len);
871
+ }
872
+out:
807873 return 0;
808874
809875 release_pages:
....@@ -828,7 +894,7 @@
828894 int err;
829895 pgoff_t index;
830896
831
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
897
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
832898 if (err)
833899 return err;
834900
....@@ -868,7 +934,7 @@
868934
869935 /* get direct node */
870936 page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
871
- if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
937
+ if (PTR_ERR(page) == -ENOENT)
872938 return 1;
873939 else if (IS_ERR(page))
874940 return PTR_ERR(page);
....@@ -878,8 +944,10 @@
878944 dn->ofs_in_node = 0;
879945 f2fs_truncate_data_blocks(dn);
880946 err = truncate_node(dn);
881
- if (err)
947
+ if (err) {
948
+ f2fs_put_page(page, 1);
882949 return err;
950
+ }
883951
884952 return 1;
885953 }
....@@ -1039,8 +1107,10 @@
10391107 trace_f2fs_truncate_inode_blocks_enter(inode, from);
10401108
10411109 level = get_node_path(inode, from, offset, noffset);
1042
- if (level < 0)
1110
+ if (level < 0) {
1111
+ trace_f2fs_truncate_inode_blocks_exit(inode, level);
10431112 return level;
1113
+ }
10441114
10451115 page = f2fs_get_node_page(sbi, inode->i_ino);
10461116 if (IS_ERR(page)) {
....@@ -1225,12 +1295,16 @@
12251295 goto fail;
12261296
12271297 #ifdef CONFIG_F2FS_CHECK_FS
1228
- err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
1298
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
12291299 if (err) {
12301300 dec_valid_node_count(sbi, dn->inode, !ofs);
12311301 goto fail;
12321302 }
1233
- f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
1303
+ if (unlikely(new_ni.blk_addr != NULL_ADDR)) {
1304
+ err = -EFSCORRUPTED;
1305
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
1306
+ goto fail;
1307
+ }
12341308 #endif
12351309 new_ni.nid = dn->nid;
12361310 new_ni.ino = dn->inode->i_ino;
....@@ -1287,12 +1361,12 @@
12871361 return LOCKED_PAGE;
12881362 }
12891363
1290
- err = f2fs_get_node_info(sbi, page->index, &ni);
1364
+ err = f2fs_get_node_info(sbi, page->index, &ni, false);
12911365 if (err)
12921366 return err;
12931367
1294
- if (unlikely(ni.blk_addr == NULL_ADDR) ||
1295
- is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
1368
+ /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
1369
+ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) {
12961370 ClearPageUptodate(page);
12971371 return -ENOENT;
12981372 }
....@@ -1320,9 +1394,7 @@
13201394 if (f2fs_check_nid_range(sbi, nid))
13211395 return;
13221396
1323
- rcu_read_lock();
1324
- apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
1325
- rcu_read_unlock();
1397
+ apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid);
13261398 if (apage)
13271399 return;
13281400
....@@ -1378,11 +1450,12 @@
13781450 goto out_err;
13791451 }
13801452 page_hit:
1381
- if(unlikely(nid != nid_of_node(page))) {
1453
+ if (unlikely(nid != nid_of_node(page))) {
13821454 f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
13831455 nid, nid_of_node(page), ino_of_node(page),
13841456 ofs_of_node(page), cpver_of_node(page),
13851457 next_blkaddr_of_node(page));
1458
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
13861459 err = -EINVAL;
13871460 out_err:
13881461 ClearPageUptodate(page);
....@@ -1542,21 +1615,21 @@
15421615 nid = nid_of_node(page);
15431616 f2fs_bug_on(sbi, page->index != nid);
15441617
1545
- if (f2fs_get_node_info(sbi, nid, &ni))
1618
+ if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
15461619 goto redirty_out;
15471620
15481621 if (wbc->for_reclaim) {
1549
- if (!down_read_trylock(&sbi->node_write))
1622
+ if (!f2fs_down_read_trylock(&sbi->node_write))
15501623 goto redirty_out;
15511624 } else {
1552
- down_read(&sbi->node_write);
1625
+ f2fs_down_read(&sbi->node_write);
15531626 }
15541627
15551628 /* This page is already truncated */
15561629 if (unlikely(ni.blk_addr == NULL_ADDR)) {
15571630 ClearPageUptodate(page);
15581631 dec_page_count(sbi, F2FS_DIRTY_NODES);
1559
- up_read(&sbi->node_write);
1632
+ f2fs_up_read(&sbi->node_write);
15601633 unlock_page(page);
15611634 return 0;
15621635 }
....@@ -1564,7 +1637,7 @@
15641637 if (__is_valid_data_blkaddr(ni.blk_addr) &&
15651638 !f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
15661639 DATA_GENERIC_ENHANCE)) {
1567
- up_read(&sbi->node_write);
1640
+ f2fs_up_read(&sbi->node_write);
15681641 goto redirty_out;
15691642 }
15701643
....@@ -1585,7 +1658,7 @@
15851658 f2fs_do_write_node_page(nid, &fio);
15861659 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
15871660 dec_page_count(sbi, F2FS_DIRTY_NODES);
1588
- up_read(&sbi->node_write);
1661
+ f2fs_up_read(&sbi->node_write);
15891662
15901663 if (wbc->for_reclaim) {
15911664 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
....@@ -1726,7 +1799,7 @@
17261799 set_dentry_mark(page,
17271800 f2fs_need_dentry_mark(sbi, ino));
17281801 }
1729
- /* may be written by other thread */
1802
+ /* may be written by other thread */
17301803 if (!PageDirty(page))
17311804 set_page_dirty(page);
17321805 }
....@@ -1770,7 +1843,7 @@
17701843 out:
17711844 if (nwritten)
17721845 f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
1773
- return ret ? -EIO: 0;
1846
+ return ret ? -EIO : 0;
17741847 }
17751848
17761849 static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
....@@ -1814,12 +1887,11 @@
18141887 return true;
18151888 }
18161889
1817
-int f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
1890
+void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
18181891 {
18191892 pgoff_t index = 0;
18201893 struct pagevec pvec;
18211894 int nr_pages;
1822
- int ret = 0;
18231895
18241896 pagevec_init(&pvec);
18251897
....@@ -1847,8 +1919,8 @@
18471919 }
18481920
18491921 /* flush inline_data, if it's async context. */
1850
- if (is_inline_node(page)) {
1851
- clear_inline_node(page);
1922
+ if (page_private_inline(page)) {
1923
+ clear_page_private_inline(page);
18521924 unlock_page(page);
18531925 flush_inline_data(sbi, ino_of_node(page));
18541926 continue;
....@@ -1858,7 +1930,6 @@
18581930 pagevec_release(&pvec);
18591931 cond_resched();
18601932 }
1861
- return ret;
18621933 }
18631934
18641935 int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
....@@ -1924,9 +1995,13 @@
19241995 goto continue_unlock;
19251996 }
19261997
1927
- /* flush inline_data, if it's async context. */
1928
- if (do_balance && is_inline_node(page)) {
1929
- clear_inline_node(page);
1998
+ /* flush inline_data/inode, if it's async context. */
1999
+ if (!do_balance)
2000
+ goto write_node;
2001
+
2002
+ /* flush inline_data */
2003
+ if (page_private_inline(page)) {
2004
+ clear_page_private_inline(page);
19302005 unlock_page(page);
19312006 flush_inline_data(sbi, ino_of_node(page));
19322007 goto lock_node;
....@@ -1938,7 +2013,7 @@
19382013 if (flush_dirty_inode(page))
19392014 goto lock_node;
19402015 }
1941
-
2016
+write_node:
19422017 f2fs_wait_on_page_writeback(page, NODE, true, true);
19432018
19442019 if (!clear_page_dirty_for_io(page))
....@@ -2046,8 +2121,12 @@
20462121
20472122 if (wbc->sync_mode == WB_SYNC_ALL)
20482123 atomic_inc(&sbi->wb_sync_req[NODE]);
2049
- else if (atomic_read(&sbi->wb_sync_req[NODE]))
2124
+ else if (atomic_read(&sbi->wb_sync_req[NODE])) {
2125
+ /* to avoid potential deadlock */
2126
+ if (current->plug)
2127
+ blk_finish_plug(current->plug);
20502128 goto skip_write;
2129
+ }
20512130
20522131 trace_f2fs_writepages(mapping->host, wbc, NODE);
20532132
....@@ -2080,8 +2159,7 @@
20802159 if (!PageDirty(page)) {
20812160 __set_page_dirty_nobuffers(page);
20822161 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
2083
- f2fs_set_page_private(page, 0);
2084
- f2fs_trace_pid(page);
2162
+ set_page_private_reference(page);
20852163 return 1;
20862164 }
20872165 return 0;
....@@ -2097,7 +2175,7 @@
20972175 .invalidatepage = f2fs_invalidate_page,
20982176 .releasepage = f2fs_release_page,
20992177 #ifdef CONFIG_MIGRATION
2100
- .migratepage = f2fs_migrate_page,
2178
+ .migratepage = f2fs_migrate_page,
21012179 #endif
21022180 };
21032181
....@@ -2108,18 +2186,16 @@
21082186 }
21092187
21102188 static int __insert_free_nid(struct f2fs_sb_info *sbi,
2111
- struct free_nid *i, enum nid_state state)
2189
+ struct free_nid *i)
21122190 {
21132191 struct f2fs_nm_info *nm_i = NM_I(sbi);
2114
-
21152192 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
2193
+
21162194 if (err)
21172195 return err;
21182196
2119
- f2fs_bug_on(sbi, state != i->state);
2120
- nm_i->nid_cnt[state]++;
2121
- if (state == FREE_NID)
2122
- list_add_tail(&i->list, &nm_i->free_nid_list);
2197
+ nm_i->nid_cnt[FREE_NID]++;
2198
+ list_add_tail(&i->list, &nm_i->free_nid_list);
21232199 return 0;
21242200 }
21252201
....@@ -2241,7 +2317,7 @@
22412317 }
22422318 }
22432319 ret = true;
2244
- err = __insert_free_nid(sbi, i, FREE_NID);
2320
+ err = __insert_free_nid(sbi, i);
22452321 err_out:
22462322 if (update) {
22472323 update_free_nid_bitmap(sbi, nid, ret, build);
....@@ -2335,7 +2411,7 @@
23352411 unsigned int i, idx;
23362412 nid_t nid;
23372413
2338
- down_read(&nm_i->nat_tree_lock);
2414
+ f2fs_down_read(&nm_i->nat_tree_lock);
23392415
23402416 for (i = 0; i < nm_i->nat_blocks; i++) {
23412417 if (!test_bit_le(i, nm_i->nat_block_bitmap))
....@@ -2358,7 +2434,7 @@
23582434 out:
23592435 scan_curseg_cache(sbi);
23602436
2361
- up_read(&nm_i->nat_tree_lock);
2437
+ f2fs_up_read(&nm_i->nat_tree_lock);
23622438 }
23632439
23642440 static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
....@@ -2393,7 +2469,7 @@
23932469 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
23942470 META_NAT, true);
23952471
2396
- down_read(&nm_i->nat_tree_lock);
2472
+ f2fs_down_read(&nm_i->nat_tree_lock);
23972473
23982474 while (1) {
23992475 if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
....@@ -2408,7 +2484,7 @@
24082484 }
24092485
24102486 if (ret) {
2411
- up_read(&nm_i->nat_tree_lock);
2487
+ f2fs_up_read(&nm_i->nat_tree_lock);
24122488 f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
24132489 return ret;
24142490 }
....@@ -2428,7 +2504,7 @@
24282504 /* find free nids from current sum_pages */
24292505 scan_curseg_cache(sbi);
24302506
2431
- up_read(&nm_i->nat_tree_lock);
2507
+ f2fs_up_read(&nm_i->nat_tree_lock);
24322508
24332509 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
24342510 nm_i->ra_nid_pages, META_NAT, false);
....@@ -2588,9 +2664,15 @@
25882664
25892665 ri = F2FS_INODE(page);
25902666 if (ri->i_inline & F2FS_INLINE_XATTR) {
2591
- set_inode_flag(inode, FI_INLINE_XATTR);
2667
+ if (!f2fs_has_inline_xattr(inode)) {
2668
+ set_inode_flag(inode, FI_INLINE_XATTR);
2669
+ stat_inc_inline_xattr(inode);
2670
+ }
25922671 } else {
2593
- clear_inode_flag(inode, FI_INLINE_XATTR);
2672
+ if (f2fs_has_inline_xattr(inode)) {
2673
+ stat_dec_inline_xattr(inode);
2674
+ clear_inode_flag(inode, FI_INLINE_XATTR);
2675
+ }
25942676 goto update_inode;
25952677 }
25962678
....@@ -2620,7 +2702,7 @@
26202702 goto recover_xnid;
26212703
26222704 /* 1: invalidate the previous xattr nid */
2623
- err = f2fs_get_node_info(sbi, prev_xnid, &ni);
2705
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
26242706 if (err)
26252707 return err;
26262708
....@@ -2660,7 +2742,7 @@
26602742 struct page *ipage;
26612743 int err;
26622744
2663
- err = f2fs_get_node_info(sbi, ino, &old_ni);
2745
+ err = f2fs_get_node_info(sbi, ino, &old_ni, false);
26642746 if (err)
26652747 return err;
26662748
....@@ -2684,7 +2766,7 @@
26842766 src = F2FS_INODE(page);
26852767 dst = F2FS_INODE(ipage);
26862768
2687
- memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
2769
+ memcpy(dst, src, offsetof(struct f2fs_inode, i_ext));
26882770 dst->i_size = 0;
26892771 dst->i_blocks = cpu_to_le64(1);
26902772 dst->i_links = cpu_to_le32(1);
....@@ -2945,17 +3027,20 @@
29453027 LIST_HEAD(sets);
29463028 int err = 0;
29473029
2948
- /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
3030
+ /*
3031
+ * during unmount, let's flush nat_bits before checking
3032
+ * nat_cnt[DIRTY_NAT].
3033
+ */
29493034 if (enabled_nat_bits(sbi, cpc)) {
2950
- down_write(&nm_i->nat_tree_lock);
3035
+ f2fs_down_write(&nm_i->nat_tree_lock);
29513036 remove_nats_in_journal(sbi);
2952
- up_write(&nm_i->nat_tree_lock);
3037
+ f2fs_up_write(&nm_i->nat_tree_lock);
29533038 }
29543039
2955
- if (!nm_i->dirty_nat_cnt)
3040
+ if (!nm_i->nat_cnt[DIRTY_NAT])
29563041 return 0;
29573042
2958
- down_write(&nm_i->nat_tree_lock);
3043
+ f2fs_down_write(&nm_i->nat_tree_lock);
29593044
29603045 /*
29613046 * if there are no enough space in journal to store dirty nat
....@@ -2963,12 +3048,14 @@
29633048 * into nat entry set.
29643049 */
29653050 if (enabled_nat_bits(sbi, cpc) ||
2966
- !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
3051
+ !__has_cursum_space(journal,
3052
+ nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
29673053 remove_nats_in_journal(sbi);
29683054
29693055 while ((found = __gang_lookup_nat_set(nm_i,
29703056 set_idx, SETVEC_SIZE, setvec))) {
29713057 unsigned idx;
3058
+
29723059 set_idx = setvec[found - 1]->set + 1;
29733060 for (idx = 0; idx < found; idx++)
29743061 __adjust_nat_entry_set(setvec[idx], &sets,
....@@ -2982,7 +3069,7 @@
29823069 break;
29833070 }
29843071
2985
- up_write(&nm_i->nat_tree_lock);
3072
+ f2fs_up_write(&nm_i->nat_tree_lock);
29863073 /* Allow dirty nats by node block allocation in write_begin */
29873074
29883075 return err;
....@@ -3087,7 +3174,6 @@
30873174 F2FS_RESERVED_NODE_NUM;
30883175 nm_i->nid_cnt[FREE_NID] = 0;
30893176 nm_i->nid_cnt[PREALLOC_NID] = 0;
3090
- nm_i->nat_cnt = 0;
30913177 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
30923178 nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
30933179 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
....@@ -3101,14 +3187,11 @@
31013187
31023188 mutex_init(&nm_i->build_lock);
31033189 spin_lock_init(&nm_i->nid_list_lock);
3104
- init_rwsem(&nm_i->nat_tree_lock);
3190
+ init_f2fs_rwsem(&nm_i->nat_tree_lock);
31053191
31063192 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
31073193 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
31083194 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
3109
- if (!version_bitmap)
3110
- return -EFAULT;
3111
-
31123195 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
31133196 GFP_KERNEL);
31143197 if (!nm_i->nat_bitmap)
....@@ -3210,7 +3293,7 @@
32103293 spin_unlock(&nm_i->nid_list_lock);
32113294
32123295 /* destroy nat cache */
3213
- down_write(&nm_i->nat_tree_lock);
3296
+ f2fs_down_write(&nm_i->nat_tree_lock);
32143297 while ((found = __gang_lookup_nat_cache(nm_i,
32153298 nid, NATVEC_SIZE, natvec))) {
32163299 unsigned idx;
....@@ -3224,7 +3307,7 @@
32243307 __del_from_nat_cache(nm_i, natvec[idx]);
32253308 }
32263309 }
3227
- f2fs_bug_on(sbi, nm_i->nat_cnt);
3310
+ f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]);
32283311
32293312 /* destroy nat set cache */
32303313 nid = 0;
....@@ -3240,7 +3323,7 @@
32403323 kmem_cache_free(nat_entry_set_slab, setvec[idx]);
32413324 }
32423325 }
3243
- up_write(&nm_i->nat_tree_lock);
3326
+ f2fs_up_write(&nm_i->nat_tree_lock);
32443327
32453328 kvfree(nm_i->nat_block_bitmap);
32463329 if (nm_i->free_nid_bitmap) {
....@@ -3258,7 +3341,7 @@
32583341 kvfree(nm_i->nat_bitmap_mir);
32593342 #endif
32603343 sbi->nm_info = NULL;
3261
- kvfree(nm_i);
3344
+ kfree(nm_i);
32623345 }
32633346
32643347 int __init f2fs_create_node_manager_caches(void)