hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/f2fs/extent_cache.c
....@@ -6,6 +6,10 @@
66 * Copyright (c) 2015 Samsung Electronics
77 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
88 * Chao Yu <chao2.yu@samsung.com>
9
+ *
10
+ * block_age-based extent cache added by:
11
+ * Copyright (c) 2022 xiaomi Co., Ltd.
12
+ * http://www.xiaomi.com/
913 */
1014
1115 #include <linux/fs.h>
....@@ -14,6 +18,111 @@
1418 #include "f2fs.h"
1519 #include "node.h"
1620 #include <trace/events/f2fs.h>
21
+
22
+static void __set_extent_info(struct extent_info *ei,
23
+ unsigned int fofs, unsigned int len,
24
+ block_t blk, bool keep_clen,
25
+ unsigned long age, unsigned long last_blocks,
26
+ enum extent_type type)
27
+{
28
+ ei->fofs = fofs;
29
+ ei->len = len;
30
+
31
+ if (type == EX_READ) {
32
+ ei->blk = blk;
33
+ if (keep_clen)
34
+ return;
35
+#ifdef CONFIG_F2FS_FS_COMPRESSION
36
+ ei->c_len = 0;
37
+#endif
38
+ } else if (type == EX_BLOCK_AGE) {
39
+ ei->age = age;
40
+ ei->last_blocks = last_blocks;
41
+ }
42
+}
43
+
44
+static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
45
+{
46
+ if (type == EX_READ)
47
+ return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
48
+ S_ISREG(inode->i_mode);
49
+ if (type == EX_BLOCK_AGE)
50
+ return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
51
+ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
52
+ return false;
53
+}
54
+
55
+static bool __may_extent_tree(struct inode *inode, enum extent_type type)
56
+{
57
+ /*
58
+ * for recovered files during mount do not create extents
59
+ * if shrinker is not registered.
60
+ */
61
+ if (list_empty(&F2FS_I_SB(inode)->s_list))
62
+ return false;
63
+
64
+ if (!__init_may_extent_tree(inode, type))
65
+ return false;
66
+
67
+ if (type == EX_READ) {
68
+ if (is_inode_flag_set(inode, FI_NO_EXTENT))
69
+ return false;
70
+ if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
71
+ !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
72
+ return false;
73
+ } else if (type == EX_BLOCK_AGE) {
74
+ if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
75
+ return false;
76
+ if (file_is_cold(inode))
77
+ return false;
78
+ }
79
+ return true;
80
+}
81
+
82
+static void __try_update_largest_extent(struct extent_tree *et,
83
+ struct extent_node *en)
84
+{
85
+ if (et->type != EX_READ)
86
+ return;
87
+ if (en->ei.len <= et->largest.len)
88
+ return;
89
+
90
+ et->largest = en->ei;
91
+ et->largest_updated = true;
92
+}
93
+
94
+static bool __is_extent_mergeable(struct extent_info *back,
95
+ struct extent_info *front, enum extent_type type)
96
+{
97
+ if (type == EX_READ) {
98
+#ifdef CONFIG_F2FS_FS_COMPRESSION
99
+ if (back->c_len && back->len != back->c_len)
100
+ return false;
101
+ if (front->c_len && front->len != front->c_len)
102
+ return false;
103
+#endif
104
+ return (back->fofs + back->len == front->fofs &&
105
+ back->blk + back->len == front->blk);
106
+ } else if (type == EX_BLOCK_AGE) {
107
+ return (back->fofs + back->len == front->fofs &&
108
+ abs(back->age - front->age) <= SAME_AGE_REGION &&
109
+ abs(back->last_blocks - front->last_blocks) <=
110
+ SAME_AGE_REGION);
111
+ }
112
+ return false;
113
+}
114
+
115
+static bool __is_back_mergeable(struct extent_info *cur,
116
+ struct extent_info *back, enum extent_type type)
117
+{
118
+ return __is_extent_mergeable(back, cur, type);
119
+}
120
+
121
+static bool __is_front_mergeable(struct extent_info *cur,
122
+ struct extent_info *front, enum extent_type type)
123
+{
124
+ return __is_extent_mergeable(cur, front, type);
125
+}
17126
18127 static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
19128 unsigned int ofs)
....@@ -56,6 +165,29 @@
56165 return __lookup_rb_tree_slow(root, ofs);
57166
58167 return re;
168
+}
169
+
170
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
171
+ struct rb_root_cached *root,
172
+ struct rb_node **parent,
173
+ unsigned long long key, bool *leftmost)
174
+{
175
+ struct rb_node **p = &root->rb_root.rb_node;
176
+ struct rb_entry *re;
177
+
178
+ while (*p) {
179
+ *parent = *p;
180
+ re = rb_entry(*parent, struct rb_entry, rb_node);
181
+
182
+ if (key < re->key) {
183
+ p = &(*p)->rb_left;
184
+ } else {
185
+ p = &(*p)->rb_right;
186
+ *leftmost = false;
187
+ }
188
+ }
189
+
190
+ return p;
59191 }
60192
61193 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
....@@ -166,7 +298,7 @@
166298 }
167299
168300 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
169
- struct rb_root_cached *root)
301
+ struct rb_root_cached *root, bool check_key)
170302 {
171303 #ifdef CONFIG_F2FS_CHECK_FS
172304 struct rb_node *cur = rb_first_cached(root), *next;
....@@ -183,13 +315,23 @@
183315 cur_re = rb_entry(cur, struct rb_entry, rb_node);
184316 next_re = rb_entry(next, struct rb_entry, rb_node);
185317
318
+ if (check_key) {
319
+ if (cur_re->key > next_re->key) {
320
+ f2fs_info(sbi, "inconsistent rbtree, "
321
+ "cur(%llu) next(%llu)",
322
+ cur_re->key, next_re->key);
323
+ return false;
324
+ }
325
+ goto next;
326
+ }
327
+
186328 if (cur_re->ofs + cur_re->len > next_re->ofs) {
187329 f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
188330 cur_re->ofs, cur_re->len,
189331 next_re->ofs, next_re->len);
190332 return false;
191333 }
192
-
334
+next:
193335 cur = next;
194336 }
195337 #endif
....@@ -204,6 +346,7 @@
204346 struct rb_node *parent, struct rb_node **p,
205347 bool leftmost)
206348 {
349
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
207350 struct extent_node *en;
208351
209352 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
....@@ -217,16 +360,18 @@
217360 rb_link_node(&en->rb_node, parent, p);
218361 rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
219362 atomic_inc(&et->node_cnt);
220
- atomic_inc(&sbi->total_ext_node);
363
+ atomic_inc(&eti->total_ext_node);
221364 return en;
222365 }
223366
224367 static void __detach_extent_node(struct f2fs_sb_info *sbi,
225368 struct extent_tree *et, struct extent_node *en)
226369 {
370
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
371
+
227372 rb_erase_cached(&en->rb_node, &et->root);
228373 atomic_dec(&et->node_cnt);
229
- atomic_dec(&sbi->total_ext_node);
374
+ atomic_dec(&eti->total_ext_node);
230375
231376 if (et->cached_en == en)
232377 et->cached_en = NULL;
....@@ -242,58 +387,48 @@
242387 static void __release_extent_node(struct f2fs_sb_info *sbi,
243388 struct extent_tree *et, struct extent_node *en)
244389 {
245
- spin_lock(&sbi->extent_lock);
390
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
391
+
392
+ spin_lock(&eti->extent_lock);
246393 f2fs_bug_on(sbi, list_empty(&en->list));
247394 list_del_init(&en->list);
248
- spin_unlock(&sbi->extent_lock);
395
+ spin_unlock(&eti->extent_lock);
249396
250397 __detach_extent_node(sbi, et, en);
251398 }
252399
253
-static struct extent_tree *__grab_extent_tree(struct inode *inode)
400
+static struct extent_tree *__grab_extent_tree(struct inode *inode,
401
+ enum extent_type type)
254402 {
255403 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
404
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
256405 struct extent_tree *et;
257406 nid_t ino = inode->i_ino;
258407
259
- mutex_lock(&sbi->extent_tree_lock);
260
- et = radix_tree_lookup(&sbi->extent_tree_root, ino);
408
+ mutex_lock(&eti->extent_tree_lock);
409
+ et = radix_tree_lookup(&eti->extent_tree_root, ino);
261410 if (!et) {
262411 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
263
- f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
412
+ f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
264413 memset(et, 0, sizeof(struct extent_tree));
265414 et->ino = ino;
415
+ et->type = type;
266416 et->root = RB_ROOT_CACHED;
267417 et->cached_en = NULL;
268418 rwlock_init(&et->lock);
269419 INIT_LIST_HEAD(&et->list);
270420 atomic_set(&et->node_cnt, 0);
271
- atomic_inc(&sbi->total_ext_tree);
421
+ atomic_inc(&eti->total_ext_tree);
272422 } else {
273
- atomic_dec(&sbi->total_zombie_tree);
423
+ atomic_dec(&eti->total_zombie_tree);
274424 list_del_init(&et->list);
275425 }
276
- mutex_unlock(&sbi->extent_tree_lock);
426
+ mutex_unlock(&eti->extent_tree_lock);
277427
278428 /* never died until evict_inode */
279
- F2FS_I(inode)->extent_tree = et;
429
+ F2FS_I(inode)->extent_tree[type] = et;
280430
281431 return et;
282
-}
283
-
284
-static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
285
- struct extent_tree *et, struct extent_info *ei)
286
-{
287
- struct rb_node **p = &et->root.rb_root.rb_node;
288
- struct extent_node *en;
289
-
290
- en = __attach_extent_node(sbi, et, ei, NULL, p, true);
291
- if (!en)
292
- return NULL;
293
-
294
- et->largest = en->ei;
295
- et->cached_en = en;
296
- return en;
297432 }
298433
299434 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
....@@ -324,73 +459,89 @@
324459 }
325460 }
326461
327
-/* return true, if inode page is changed */
328
-static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
462
+void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
329463 {
330464 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
465
+ struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
466
+ struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
331467 struct extent_tree *et;
332468 struct extent_node *en;
333469 struct extent_info ei;
334470
335
- if (!f2fs_may_extent_tree(inode)) {
336
- /* drop largest extent */
471
+ if (!__may_extent_tree(inode, EX_READ)) {
472
+ /* drop largest read extent */
337473 if (i_ext && i_ext->len) {
474
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
338475 i_ext->len = 0;
339
- return true;
476
+ set_page_dirty(ipage);
340477 }
341
- return false;
478
+ goto out;
342479 }
343480
344
- et = __grab_extent_tree(inode);
481
+ et = __grab_extent_tree(inode, EX_READ);
345482
346483 if (!i_ext || !i_ext->len)
347
- return false;
484
+ goto out;
348485
349
- get_extent_info(&ei, i_ext);
486
+ get_read_extent_info(&ei, i_ext);
350487
351488 write_lock(&et->lock);
352489 if (atomic_read(&et->node_cnt))
353
- goto out;
490
+ goto unlock_out;
354491
355
- en = __init_extent_tree(sbi, et, &ei);
492
+ en = __attach_extent_node(sbi, et, &ei, NULL,
493
+ &et->root.rb_root.rb_node, true);
356494 if (en) {
357
- spin_lock(&sbi->extent_lock);
358
- list_add_tail(&en->list, &sbi->extent_list);
359
- spin_unlock(&sbi->extent_lock);
495
+ et->largest = en->ei;
496
+ et->cached_en = en;
497
+
498
+ spin_lock(&eti->extent_lock);
499
+ list_add_tail(&en->list, &eti->extent_list);
500
+ spin_unlock(&eti->extent_lock);
360501 }
361
-out:
502
+unlock_out:
362503 write_unlock(&et->lock);
363
- return false;
364
-}
365
-
366
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
367
-{
368
- bool ret = __f2fs_init_extent_tree(inode, i_ext);
369
-
370
- if (!F2FS_I(inode)->extent_tree)
504
+out:
505
+ if (!F2FS_I(inode)->extent_tree[EX_READ])
371506 set_inode_flag(inode, FI_NO_EXTENT);
372
-
373
- return ret;
374507 }
375508
376
-static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
377
- struct extent_info *ei)
509
+void f2fs_init_age_extent_tree(struct inode *inode)
510
+{
511
+ if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
512
+ return;
513
+ __grab_extent_tree(inode, EX_BLOCK_AGE);
514
+}
515
+
516
+void f2fs_init_extent_tree(struct inode *inode)
517
+{
518
+ /* initialize read cache */
519
+ if (__init_may_extent_tree(inode, EX_READ))
520
+ __grab_extent_tree(inode, EX_READ);
521
+
522
+ /* initialize block age cache */
523
+ if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
524
+ __grab_extent_tree(inode, EX_BLOCK_AGE);
525
+}
526
+
527
+static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
528
+ struct extent_info *ei, enum extent_type type)
378529 {
379530 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
380
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
531
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
532
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
381533 struct extent_node *en;
382534 bool ret = false;
383535
384
- f2fs_bug_on(sbi, !et);
385
-
386536 if (!et)
387
- return ret;
537
+ return false;
388538
389
- trace_f2fs_lookup_extent_tree_start(inode, pgofs);
539
+ trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
390540
391541 read_lock(&et->lock);
392542
393
- if (et->largest.fofs <= pgofs &&
543
+ if (type == EX_READ &&
544
+ et->largest.fofs <= pgofs &&
394545 et->largest.fofs + et->largest.len > pgofs) {
395546 *ei = et->largest;
396547 ret = true;
....@@ -404,23 +555,26 @@
404555 goto out;
405556
406557 if (en == et->cached_en)
407
- stat_inc_cached_node_hit(sbi);
558
+ stat_inc_cached_node_hit(sbi, type);
408559 else
409
- stat_inc_rbtree_node_hit(sbi);
560
+ stat_inc_rbtree_node_hit(sbi, type);
410561
411562 *ei = en->ei;
412
- spin_lock(&sbi->extent_lock);
563
+ spin_lock(&eti->extent_lock);
413564 if (!list_empty(&en->list)) {
414
- list_move_tail(&en->list, &sbi->extent_list);
565
+ list_move_tail(&en->list, &eti->extent_list);
415566 et->cached_en = en;
416567 }
417
- spin_unlock(&sbi->extent_lock);
568
+ spin_unlock(&eti->extent_lock);
418569 ret = true;
419570 out:
420
- stat_inc_total_hit(sbi);
571
+ stat_inc_total_hit(sbi, type);
421572 read_unlock(&et->lock);
422573
423
- trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
574
+ if (type == EX_READ)
575
+ trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
576
+ else if (type == EX_BLOCK_AGE)
577
+ trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
424578 return ret;
425579 }
426580
....@@ -429,18 +583,20 @@
429583 struct extent_node *prev_ex,
430584 struct extent_node *next_ex)
431585 {
586
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
432587 struct extent_node *en = NULL;
433588
434
- if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
589
+ if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
435590 prev_ex->ei.len += ei->len;
436591 ei = &prev_ex->ei;
437592 en = prev_ex;
438593 }
439594
440
- if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
595
+ if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
441596 next_ex->ei.fofs = ei->fofs;
442
- next_ex->ei.blk = ei->blk;
443597 next_ex->ei.len += ei->len;
598
+ if (et->type == EX_READ)
599
+ next_ex->ei.blk = ei->blk;
444600 if (en)
445601 __release_extent_node(sbi, et, prev_ex);
446602
....@@ -452,12 +608,12 @@
452608
453609 __try_update_largest_extent(et, en);
454610
455
- spin_lock(&sbi->extent_lock);
611
+ spin_lock(&eti->extent_lock);
456612 if (!list_empty(&en->list)) {
457
- list_move_tail(&en->list, &sbi->extent_list);
613
+ list_move_tail(&en->list, &eti->extent_list);
458614 et->cached_en = en;
459615 }
460
- spin_unlock(&sbi->extent_lock);
616
+ spin_unlock(&eti->extent_lock);
461617 return en;
462618 }
463619
....@@ -467,6 +623,7 @@
467623 struct rb_node *insert_parent,
468624 bool leftmost)
469625 {
626
+ struct extent_tree_info *eti = &sbi->extent_tree[et->type];
470627 struct rb_node **p;
471628 struct rb_node *parent = NULL;
472629 struct extent_node *en = NULL;
....@@ -489,47 +646,54 @@
489646 __try_update_largest_extent(et, en);
490647
491648 /* update in global extent list */
492
- spin_lock(&sbi->extent_lock);
493
- list_add_tail(&en->list, &sbi->extent_list);
649
+ spin_lock(&eti->extent_lock);
650
+ list_add_tail(&en->list, &eti->extent_list);
494651 et->cached_en = en;
495
- spin_unlock(&sbi->extent_lock);
652
+ spin_unlock(&eti->extent_lock);
496653 return en;
497654 }
498655
499
-static void f2fs_update_extent_tree_range(struct inode *inode,
500
- pgoff_t fofs, block_t blkaddr, unsigned int len)
656
+static void __update_extent_tree_range(struct inode *inode,
657
+ struct extent_info *tei, enum extent_type type)
501658 {
502659 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
503
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
660
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
504661 struct extent_node *en = NULL, *en1 = NULL;
505662 struct extent_node *prev_en = NULL, *next_en = NULL;
506663 struct extent_info ei, dei, prev;
507664 struct rb_node **insert_p = NULL, *insert_parent = NULL;
665
+ unsigned int fofs = tei->fofs, len = tei->len;
508666 unsigned int end = fofs + len;
509
- unsigned int pos = (unsigned int)fofs;
510667 bool updated = false;
511668 bool leftmost = false;
512669
513670 if (!et)
514671 return;
515672
516
- trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
673
+ if (type == EX_READ)
674
+ trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
675
+ tei->blk, 0);
676
+ else if (type == EX_BLOCK_AGE)
677
+ trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
678
+ tei->age, tei->last_blocks);
517679
518680 write_lock(&et->lock);
519681
520
- if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
521
- write_unlock(&et->lock);
522
- return;
682
+ if (type == EX_READ) {
683
+ if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
684
+ write_unlock(&et->lock);
685
+ return;
686
+ }
687
+
688
+ prev = et->largest;
689
+ dei.len = 0;
690
+
691
+ /*
692
+ * drop largest extent before lookup, in case it's already
693
+ * been shrunk from extent tree
694
+ */
695
+ __drop_largest_extent(et, fofs, len);
523696 }
524
-
525
- prev = et->largest;
526
- dei.len = 0;
527
-
528
- /*
529
- * drop largest extent before lookup, in case it's already
530
- * been shrunk from extent tree
531
- */
532
- __drop_largest_extent(et, fofs, len);
533697
534698 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
535699 en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
....@@ -550,26 +714,32 @@
550714
551715 dei = en->ei;
552716 org_end = dei.fofs + dei.len;
553
- f2fs_bug_on(sbi, pos >= org_end);
717
+ f2fs_bug_on(sbi, fofs >= org_end);
554718
555
- if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
556
- en->ei.len = pos - en->ei.fofs;
719
+ if (fofs > dei.fofs && (type != EX_READ ||
720
+ fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
721
+ en->ei.len = fofs - en->ei.fofs;
557722 prev_en = en;
558723 parts = 1;
559724 }
560725
561
- if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
726
+ if (end < org_end && (type != EX_READ ||
727
+ org_end - end >= F2FS_MIN_EXTENT_LEN)) {
562728 if (parts) {
563
- set_extent_info(&ei, end,
564
- end - dei.fofs + dei.blk,
565
- org_end - end);
729
+ __set_extent_info(&ei,
730
+ end, org_end - end,
731
+ end - dei.fofs + dei.blk, false,
732
+ dei.age, dei.last_blocks,
733
+ type);
566734 en1 = __insert_extent_tree(sbi, et, &ei,
567735 NULL, NULL, true);
568736 next_en = en1;
569737 } else {
570
- en->ei.fofs = end;
571
- en->ei.blk += end - dei.fofs;
572
- en->ei.len -= end - dei.fofs;
738
+ __set_extent_info(&en->ei,
739
+ end, en->ei.len - (end - dei.fofs),
740
+ en->ei.blk + (end - dei.fofs), true,
741
+ dei.age, dei.last_blocks,
742
+ type);
573743 next_en = en;
574744 }
575745 parts++;
....@@ -599,10 +769,15 @@
599769 en = next_en;
600770 }
601771
602
- /* 3. update extent in extent cache */
603
- if (blkaddr) {
772
+ if (type == EX_BLOCK_AGE)
773
+ goto update_age_extent_cache;
604774
605
- set_extent_info(&ei, fofs, blkaddr, len);
775
+ /* 3. update extent in read extent cache */
776
+ BUG_ON(type != EX_READ);
777
+
778
+ if (tei->blk) {
779
+ __set_extent_info(&ei, fofs, len, tei->blk, false,
780
+ 0, 0, EX_READ);
606781 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
607782 __insert_extent_tree(sbi, et, &ei,
608783 insert_p, insert_parent, leftmost);
....@@ -624,31 +799,182 @@
624799 et->largest_updated = false;
625800 updated = true;
626801 }
802
+ goto out_read_extent_cache;
803
+update_age_extent_cache:
804
+ if (!tei->last_blocks)
805
+ goto out_read_extent_cache;
627806
807
+ __set_extent_info(&ei, fofs, len, 0, false,
808
+ tei->age, tei->last_blocks, EX_BLOCK_AGE);
809
+ if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
810
+ __insert_extent_tree(sbi, et, &ei,
811
+ insert_p, insert_parent, leftmost);
812
+out_read_extent_cache:
628813 write_unlock(&et->lock);
629814
630815 if (updated)
631816 f2fs_mark_inode_dirty_sync(inode, true);
632817 }
633818
634
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
819
+#ifdef CONFIG_F2FS_FS_COMPRESSION
820
+void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
821
+ pgoff_t fofs, block_t blkaddr, unsigned int llen,
822
+ unsigned int c_len)
635823 {
824
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
825
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
826
+ struct extent_node *en = NULL;
827
+ struct extent_node *prev_en = NULL, *next_en = NULL;
828
+ struct extent_info ei;
829
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
830
+ bool leftmost = false;
831
+
832
+ trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
833
+ blkaddr, c_len);
834
+
835
+ /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
836
+ if (is_inode_flag_set(inode, FI_NO_EXTENT))
837
+ return;
838
+
839
+ write_lock(&et->lock);
840
+
841
+ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
842
+ (struct rb_entry *)et->cached_en, fofs,
843
+ (struct rb_entry **)&prev_en,
844
+ (struct rb_entry **)&next_en,
845
+ &insert_p, &insert_parent, false,
846
+ &leftmost);
847
+ if (en)
848
+ goto unlock_out;
849
+
850
+ __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
851
+ ei.c_len = c_len;
852
+
853
+ if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
854
+ __insert_extent_tree(sbi, et, &ei,
855
+ insert_p, insert_parent, leftmost);
856
+unlock_out:
857
+ write_unlock(&et->lock);
858
+}
859
+#endif
860
+
861
+static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
862
+ unsigned long long new,
863
+ unsigned long long old)
864
+{
865
+ unsigned int rem_old, rem_new;
866
+ unsigned long long res;
867
+ unsigned int weight = sbi->last_age_weight;
868
+
869
+ res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
870
+ + div_u64_rem(old, 100, &rem_old) * weight;
871
+
872
+ if (rem_new)
873
+ res += rem_new * (100 - weight) / 100;
874
+ if (rem_old)
875
+ res += rem_old * weight / 100;
876
+
877
+ return res;
878
+}
879
+
880
+/* This returns a new age and allocated blocks in ei */
881
+static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
882
+ block_t blkaddr)
883
+{
884
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
885
+ loff_t f_size = i_size_read(inode);
886
+ unsigned long long cur_blocks =
887
+ atomic64_read(&sbi->allocated_data_blocks);
888
+ struct extent_info tei = *ei; /* only fofs and len are valid */
889
+
890
+ /*
891
+ * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
892
+ * file block even in seq write. So don't record age for newly last file
893
+ * block here.
894
+ */
895
+ if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
896
+ blkaddr == NEW_ADDR)
897
+ return -EINVAL;
898
+
899
+ if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
900
+ unsigned long long cur_age;
901
+
902
+ if (cur_blocks >= tei.last_blocks)
903
+ cur_age = cur_blocks - tei.last_blocks;
904
+ else
905
+ /* allocated_data_blocks overflow */
906
+ cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
907
+
908
+ if (tei.age)
909
+ ei->age = __calculate_block_age(sbi, cur_age, tei.age);
910
+ else
911
+ ei->age = cur_age;
912
+ ei->last_blocks = cur_blocks;
913
+ WARN_ON(ei->age > cur_blocks);
914
+ return 0;
915
+ }
916
+
917
+ f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
918
+
919
+ /* the data block was allocated for the first time */
920
+ if (blkaddr == NEW_ADDR)
921
+ goto out;
922
+
923
+ if (__is_valid_data_blkaddr(blkaddr) &&
924
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
925
+ f2fs_bug_on(sbi, 1);
926
+ return -EINVAL;
927
+ }
928
+out:
929
+ /*
930
+ * init block age with zero, this can happen when the block age extent
931
+ * was reclaimed due to memory constraint or system reboot
932
+ */
933
+ ei->age = 0;
934
+ ei->last_blocks = cur_blocks;
935
+ return 0;
936
+}
937
+
938
+static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
939
+{
940
+ struct extent_info ei = {};
941
+
942
+ if (!__may_extent_tree(dn->inode, type))
943
+ return;
944
+
945
+ ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
946
+ dn->ofs_in_node;
947
+ ei.len = 1;
948
+
949
+ if (type == EX_READ) {
950
+ if (dn->data_blkaddr == NEW_ADDR)
951
+ ei.blk = NULL_ADDR;
952
+ else
953
+ ei.blk = dn->data_blkaddr;
954
+ } else if (type == EX_BLOCK_AGE) {
955
+ if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
956
+ return;
957
+ }
958
+ __update_extent_tree_range(dn->inode, &ei, type);
959
+}
960
+
961
+static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
962
+ enum extent_type type)
963
+{
964
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
636965 struct extent_tree *et, *next;
637966 struct extent_node *en;
638967 unsigned int node_cnt = 0, tree_cnt = 0;
639968 int remained;
640969
641
- if (!test_opt(sbi, EXTENT_CACHE))
642
- return 0;
643
-
644
- if (!atomic_read(&sbi->total_zombie_tree))
970
+ if (!atomic_read(&eti->total_zombie_tree))
645971 goto free_node;
646972
647
- if (!mutex_trylock(&sbi->extent_tree_lock))
973
+ if (!mutex_trylock(&eti->extent_tree_lock))
648974 goto out;
649975
650976 /* 1. remove unreferenced extent tree */
651
- list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
977
+ list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
652978 if (atomic_read(&et->node_cnt)) {
653979 write_lock(&et->lock);
654980 node_cnt += __free_extent_tree(sbi, et);
....@@ -656,61 +982,137 @@
656982 }
657983 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
658984 list_del_init(&et->list);
659
- radix_tree_delete(&sbi->extent_tree_root, et->ino);
985
+ radix_tree_delete(&eti->extent_tree_root, et->ino);
660986 kmem_cache_free(extent_tree_slab, et);
661
- atomic_dec(&sbi->total_ext_tree);
662
- atomic_dec(&sbi->total_zombie_tree);
987
+ atomic_dec(&eti->total_ext_tree);
988
+ atomic_dec(&eti->total_zombie_tree);
663989 tree_cnt++;
664990
665991 if (node_cnt + tree_cnt >= nr_shrink)
666992 goto unlock_out;
667993 cond_resched();
668994 }
669
- mutex_unlock(&sbi->extent_tree_lock);
995
+ mutex_unlock(&eti->extent_tree_lock);
670996
671997 free_node:
672998 /* 2. remove LRU extent entries */
673
- if (!mutex_trylock(&sbi->extent_tree_lock))
999
+ if (!mutex_trylock(&eti->extent_tree_lock))
6741000 goto out;
6751001
6761002 remained = nr_shrink - (node_cnt + tree_cnt);
6771003
678
- spin_lock(&sbi->extent_lock);
1004
+ spin_lock(&eti->extent_lock);
6791005 for (; remained > 0; remained--) {
680
- if (list_empty(&sbi->extent_list))
1006
+ if (list_empty(&eti->extent_list))
6811007 break;
682
- en = list_first_entry(&sbi->extent_list,
1008
+ en = list_first_entry(&eti->extent_list,
6831009 struct extent_node, list);
6841010 et = en->et;
6851011 if (!write_trylock(&et->lock)) {
6861012 /* refresh this extent node's position in extent list */
687
- list_move_tail(&en->list, &sbi->extent_list);
1013
+ list_move_tail(&en->list, &eti->extent_list);
6881014 continue;
6891015 }
6901016
6911017 list_del_init(&en->list);
692
- spin_unlock(&sbi->extent_lock);
1018
+ spin_unlock(&eti->extent_lock);
6931019
6941020 __detach_extent_node(sbi, et, en);
6951021
6961022 write_unlock(&et->lock);
6971023 node_cnt++;
698
- spin_lock(&sbi->extent_lock);
1024
+ spin_lock(&eti->extent_lock);
6991025 }
700
- spin_unlock(&sbi->extent_lock);
1026
+ spin_unlock(&eti->extent_lock);
7011027
7021028 unlock_out:
703
- mutex_unlock(&sbi->extent_tree_lock);
1029
+ mutex_unlock(&eti->extent_tree_lock);
7041030 out:
705
- trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
1031
+ trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
7061032
7071033 return node_cnt + tree_cnt;
7081034 }
7091035
710
-unsigned int f2fs_destroy_extent_node(struct inode *inode)
1036
+/* read extent cache operations */
1037
+bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
1038
+ struct extent_info *ei)
1039
+{
1040
+ if (!__may_extent_tree(inode, EX_READ))
1041
+ return false;
1042
+
1043
+ return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
1044
+}
1045
+
1046
+void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
1047
+{
1048
+ return __update_extent_cache(dn, EX_READ);
1049
+}
1050
+
1051
+void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
1052
+ pgoff_t fofs, block_t blkaddr, unsigned int len)
1053
+{
1054
+ struct extent_info ei = {
1055
+ .fofs = fofs,
1056
+ .len = len,
1057
+ .blk = blkaddr,
1058
+ };
1059
+
1060
+ if (!__may_extent_tree(dn->inode, EX_READ))
1061
+ return;
1062
+
1063
+ __update_extent_tree_range(dn->inode, &ei, EX_READ);
1064
+}
1065
+
1066
+unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1067
+{
1068
+ if (!test_opt(sbi, READ_EXTENT_CACHE))
1069
+ return 0;
1070
+
1071
+ return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1072
+}
1073
+
1074
+/* block age extent cache operations */
1075
+bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1076
+ struct extent_info *ei)
1077
+{
1078
+ if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1079
+ return false;
1080
+
1081
+ return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1082
+}
1083
+
1084
+void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1085
+{
1086
+ return __update_extent_cache(dn, EX_BLOCK_AGE);
1087
+}
1088
+
1089
+void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1090
+ pgoff_t fofs, unsigned int len)
1091
+{
1092
+ struct extent_info ei = {
1093
+ .fofs = fofs,
1094
+ .len = len,
1095
+ };
1096
+
1097
+ if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1098
+ return;
1099
+
1100
+ __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1101
+}
1102
+
1103
+unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1104
+{
1105
+ if (!test_opt(sbi, AGE_EXTENT_CACHE))
1106
+ return 0;
1107
+
1108
+ return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1109
+}
1110
+
1111
+static unsigned int __destroy_extent_node(struct inode *inode,
1112
+ enum extent_type type)
7111113 {
7121114 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
713
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
1115
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
7141116 unsigned int node_cnt = 0;
7151117
7161118 if (!et || !atomic_read(&et->node_cnt))
....@@ -723,32 +1125,47 @@
7231125 return node_cnt;
7241126 }
7251127
726
-void f2fs_drop_extent_tree(struct inode *inode)
1128
+void f2fs_destroy_extent_node(struct inode *inode)
1129
+{
1130
+ __destroy_extent_node(inode, EX_READ);
1131
+ __destroy_extent_node(inode, EX_BLOCK_AGE);
1132
+}
1133
+
1134
+static void __drop_extent_tree(struct inode *inode, enum extent_type type)
7271135 {
7281136 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
729
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
1137
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
7301138 bool updated = false;
7311139
732
- if (!f2fs_may_extent_tree(inode))
1140
+ if (!__may_extent_tree(inode, type))
7331141 return;
7341142
735
- set_inode_flag(inode, FI_NO_EXTENT);
736
-
7371143 write_lock(&et->lock);
1144
+ set_inode_flag(inode, FI_NO_EXTENT);
7381145 __free_extent_tree(sbi, et);
739
- if (et->largest.len) {
740
- et->largest.len = 0;
741
- updated = true;
1146
+ if (type == EX_READ) {
1147
+ set_inode_flag(inode, FI_NO_EXTENT);
1148
+ if (et->largest.len) {
1149
+ et->largest.len = 0;
1150
+ updated = true;
1151
+ }
7421152 }
7431153 write_unlock(&et->lock);
7441154 if (updated)
7451155 f2fs_mark_inode_dirty_sync(inode, true);
7461156 }
7471157
748
-void f2fs_destroy_extent_tree(struct inode *inode)
1158
+void f2fs_drop_extent_tree(struct inode *inode)
1159
+{
1160
+ __drop_extent_tree(inode, EX_READ);
1161
+ __drop_extent_tree(inode, EX_BLOCK_AGE);
1162
+}
1163
+
1164
+static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
7491165 {
7501166 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
751
- struct extent_tree *et = F2FS_I(inode)->extent_tree;
1167
+ struct extent_tree_info *eti = &sbi->extent_tree[type];
1168
+ struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
7521169 unsigned int node_cnt = 0;
7531170
7541171 if (!et)
....@@ -756,76 +1173,57 @@
7561173
7571174 if (inode->i_nlink && !is_bad_inode(inode) &&
7581175 atomic_read(&et->node_cnt)) {
759
- mutex_lock(&sbi->extent_tree_lock);
760
- list_add_tail(&et->list, &sbi->zombie_list);
761
- atomic_inc(&sbi->total_zombie_tree);
762
- mutex_unlock(&sbi->extent_tree_lock);
1176
+ mutex_lock(&eti->extent_tree_lock);
1177
+ list_add_tail(&et->list, &eti->zombie_list);
1178
+ atomic_inc(&eti->total_zombie_tree);
1179
+ mutex_unlock(&eti->extent_tree_lock);
7631180 return;
7641181 }
7651182
7661183 /* free all extent info belong to this extent tree */
767
- node_cnt = f2fs_destroy_extent_node(inode);
1184
+ node_cnt = __destroy_extent_node(inode, type);
7681185
7691186 /* delete extent tree entry in radix tree */
770
- mutex_lock(&sbi->extent_tree_lock);
1187
+ mutex_lock(&eti->extent_tree_lock);
7711188 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
772
- radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
1189
+ radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
7731190 kmem_cache_free(extent_tree_slab, et);
774
- atomic_dec(&sbi->total_ext_tree);
775
- mutex_unlock(&sbi->extent_tree_lock);
1191
+ atomic_dec(&eti->total_ext_tree);
1192
+ mutex_unlock(&eti->extent_tree_lock);
7761193
777
- F2FS_I(inode)->extent_tree = NULL;
1194
+ F2FS_I(inode)->extent_tree[type] = NULL;
7781195
779
- trace_f2fs_destroy_extent_tree(inode, node_cnt);
1196
+ trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
7801197 }
7811198
782
-bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
783
- struct extent_info *ei)
1199
+void f2fs_destroy_extent_tree(struct inode *inode)
7841200 {
785
- if (!f2fs_may_extent_tree(inode))
786
- return false;
787
-
788
- return f2fs_lookup_extent_tree(inode, pgofs, ei);
1201
+ __destroy_extent_tree(inode, EX_READ);
1202
+ __destroy_extent_tree(inode, EX_BLOCK_AGE);
7891203 }
7901204
791
-void f2fs_update_extent_cache(struct dnode_of_data *dn)
1205
+static void __init_extent_tree_info(struct extent_tree_info *eti)
7921206 {
793
- pgoff_t fofs;
794
- block_t blkaddr;
795
-
796
- if (!f2fs_may_extent_tree(dn->inode))
797
- return;
798
-
799
- if (dn->data_blkaddr == NEW_ADDR)
800
- blkaddr = NULL_ADDR;
801
- else
802
- blkaddr = dn->data_blkaddr;
803
-
804
- fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
805
- dn->ofs_in_node;
806
- f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
807
-}
808
-
809
-void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
810
- pgoff_t fofs, block_t blkaddr, unsigned int len)
811
-
812
-{
813
- if (!f2fs_may_extent_tree(dn->inode))
814
- return;
815
-
816
- f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
1207
+ INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1208
+ mutex_init(&eti->extent_tree_lock);
1209
+ INIT_LIST_HEAD(&eti->extent_list);
1210
+ spin_lock_init(&eti->extent_lock);
1211
+ atomic_set(&eti->total_ext_tree, 0);
1212
+ INIT_LIST_HEAD(&eti->zombie_list);
1213
+ atomic_set(&eti->total_zombie_tree, 0);
1214
+ atomic_set(&eti->total_ext_node, 0);
8171215 }
8181216
8191217 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
8201218 {
821
- INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
822
- mutex_init(&sbi->extent_tree_lock);
823
- INIT_LIST_HEAD(&sbi->extent_list);
824
- spin_lock_init(&sbi->extent_lock);
825
- atomic_set(&sbi->total_ext_tree, 0);
826
- INIT_LIST_HEAD(&sbi->zombie_list);
827
- atomic_set(&sbi->total_zombie_tree, 0);
828
- atomic_set(&sbi->total_ext_node, 0);
1219
+ __init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1220
+ __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1221
+
1222
+ /* initialize for block age extents */
1223
+ atomic64_set(&sbi->allocated_data_blocks, 0);
1224
+ sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1225
+ sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1226
+ sbi->last_age_weight = LAST_AGE_WEIGHT;
8291227 }
8301228
8311229 int __init f2fs_create_extent_cache(void)