hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/fs/f2fs/segment.h
....@@ -16,13 +16,21 @@
1616 #define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
1717
1818 #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
19
+#define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */
1920
2021 /* L: Logical segment # in volume, R: Relative segment # in main area */
2122 #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
2223 #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
2324
2425 #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
25
-#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
26
+#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
27
+#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
28
+
29
+static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
30
+ unsigned short seg_type)
31
+{
32
+ f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
33
+}
2634
2735 #define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
2836 #define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
....@@ -34,7 +42,9 @@
3442 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
3543 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
3644 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
37
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
45
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
46
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
47
+ ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
3848
3949 #define IS_CURSEC(sbi, secno) \
4050 (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
....@@ -48,7 +58,11 @@
4858 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
4959 (sbi)->segs_per_sec) || \
5060 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
51
- (sbi)->segs_per_sec)) \
61
+ (sbi)->segs_per_sec) || \
62
+ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
63
+ (sbi)->segs_per_sec) || \
64
+ ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
65
+ (sbi)->segs_per_sec))
5266
5367 #define MAIN_BLKADDR(sbi) \
5468 (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
....@@ -132,20 +146,25 @@
132146 * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
133147 * LFS writes data sequentially with cleaning operations.
134148 * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
149
+ * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
150
+ * fragmented segment which has similar aging degree.
135151 */
136152 enum {
137153 LFS = 0,
138
- SSR
154
+ SSR,
155
+ AT_SSR,
139156 };
140157
141158 /*
142159 * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
143160 * GC_CB is based on cost-benefit algorithm.
144161 * GC_GREEDY is based on greedy algorithm.
162
+ * GC_AT is based on age-threshold algorithm.
145163 */
146164 enum {
147165 GC_CB = 0,
148166 GC_GREEDY,
167
+ GC_AT,
149168 ALLOC_NEXT,
150169 FLUSH_DEVICE,
151170 MAX_GC_POLICY,
....@@ -154,24 +173,28 @@
154173 /*
155174 * BG_GC means the background cleaning job.
156175 * FG_GC means the on-demand cleaning job.
157
- * FORCE_FG_GC means on-demand cleaning job in background.
158176 */
159177 enum {
160178 BG_GC = 0,
161179 FG_GC,
162
- FORCE_FG_GC,
163180 };
164181
165182 /* for a function parameter to select a victim segment */
166183 struct victim_sel_policy {
167184 int alloc_mode; /* LFS or SSR */
168185 int gc_mode; /* GC_CB or GC_GREEDY */
169
- unsigned long *dirty_segmap; /* dirty segment bitmap */
170
- unsigned int max_search; /* maximum # of segments to search */
186
+ unsigned long *dirty_bitmap; /* dirty segment/section bitmap */
187
+ unsigned int max_search; /*
188
+ * maximum # of segments/sections
189
+ * to search
190
+ */
171191 unsigned int offset; /* last scanned bitmap offset */
172192 unsigned int ofs_unit; /* bitmap search unit */
173193 unsigned int min_cost; /* minimum cost */
194
+ unsigned long long oldest_age; /* oldest age of segments having the same min cost */
174195 unsigned int min_segno; /* segment # having min. cost */
196
+ unsigned long long age; /* mtime of GCed section*/
197
+ unsigned long long age_threshold;/* age threshold */
175198 };
176199
177200 struct seg_entry {
....@@ -184,7 +207,7 @@
184207 unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
185208 #endif
186209 /*
187
- * # of valid blocks and the validity bitmap stored in the the last
210
+ * # of valid blocks and the validity bitmap stored in the last
188211 * checkpoint pack. This information is used by the SSR mode.
189212 */
190213 unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */
....@@ -237,6 +260,8 @@
237260 unsigned long long mounted_time; /* mount time */
238261 unsigned long long min_mtime; /* min. modification time */
239262 unsigned long long max_mtime; /* max. modification time */
263
+ unsigned long long dirty_min_mtime; /* rerange candidates in GC_AT */
264
+ unsigned long long dirty_max_mtime; /* rerange candidates in GC_AT */
240265
241266 unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
242267 };
....@@ -266,6 +291,7 @@
266291 struct dirty_seglist_info {
267292 const struct victim_selection *v_ops; /* victim selction operation */
268293 unsigned long *dirty_segmap[NR_DIRTY_TYPE];
294
+ unsigned long *dirty_secmap;
269295 struct mutex seglist_lock; /* lock for segment bitmaps */
270296 int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
271297 unsigned long *victim_secmap; /* background GC victims */
....@@ -274,7 +300,7 @@
274300 /* victim selection function for cleaning and SSR */
275301 struct victim_selection {
276302 int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
277
- int, int, char);
303
+ int, int, char, unsigned long long);
278304 };
279305
280306 /* for active log information */
....@@ -284,10 +310,12 @@
284310 struct rw_semaphore journal_rwsem; /* protect journal area */
285311 struct f2fs_journal *journal; /* cached journal info */
286312 unsigned char alloc_type; /* current allocation type */
313
+ unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
287314 unsigned int segno; /* current segment number */
288315 unsigned short next_blkoff; /* next block offset to write */
289316 unsigned int zone; /* current zone number */
290317 unsigned int next_segno; /* preallocated segment */
318
+ bool inited; /* indicate inmem log is inited */
291319 };
292320
293321 struct sit_entry_set {
....@@ -301,8 +329,6 @@
301329 */
302330 static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
303331 {
304
- if (type == CURSEG_COLD_DATA_PINNED)
305
- type = CURSEG_COLD_DATA;
306332 return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
307333 }
308334
....@@ -334,8 +360,20 @@
334360 }
335361
336362 static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
337
- unsigned int segno)
363
+ unsigned int segno, bool use_section)
338364 {
365
+ if (use_section && __is_large_section(sbi)) {
366
+ unsigned int start_segno = START_SEGNO(segno);
367
+ unsigned int blocks = 0;
368
+ int i;
369
+
370
+ for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
371
+ struct seg_entry *se = get_seg_entry(sbi, start_segno);
372
+
373
+ blocks += se->ckpt_valid_blocks;
374
+ }
375
+ return blocks;
376
+ }
339377 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
340378 }
341379
....@@ -407,6 +445,7 @@
407445 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
408446 unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
409447 unsigned int next;
448
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
410449
411450 spin_lock(&free_i->segmap_lock);
412451 clear_bit(segno, free_i->free_segmap);
....@@ -414,7 +453,7 @@
414453
415454 next = find_next_bit(free_i->free_segmap,
416455 start_segno + sbi->segs_per_sec, start_segno);
417
- if (next >= start_segno + sbi->segs_per_sec) {
456
+ if (next >= start_segno + usable_segs) {
418457 clear_bit(secno, free_i->free_secmap);
419458 free_i->free_sections++;
420459 }
....@@ -434,22 +473,23 @@
434473 }
435474
436475 static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
437
- unsigned int segno)
476
+ unsigned int segno, bool inmem)
438477 {
439478 struct free_segmap_info *free_i = FREE_I(sbi);
440479 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
441480 unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
442481 unsigned int next;
482
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
443483
444484 spin_lock(&free_i->segmap_lock);
445485 if (test_and_clear_bit(segno, free_i->free_segmap)) {
446486 free_i->free_segments++;
447487
448
- if (IS_CURSEC(sbi, secno))
488
+ if (!inmem && IS_CURSEC(sbi, secno))
449489 goto skip_free;
450490 next = find_next_bit(free_i->free_segmap,
451491 start_segno + sbi->segs_per_sec, start_segno);
452
- if (next >= start_segno + sbi->segs_per_sec) {
492
+ if (next >= start_segno + usable_segs) {
453493 if (test_and_clear_bit(secno, free_i->free_secmap))
454494 free_i->free_sections++;
455495 }
....@@ -496,9 +536,10 @@
496536 return FREE_I(sbi)->free_segments;
497537 }
498538
499
-static inline int reserved_segments(struct f2fs_sb_info *sbi)
539
+static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
500540 {
501
- return SM_I(sbi)->reserved_segments;
541
+ return SM_I(sbi)->reserved_segments +
542
+ SM_I(sbi)->additional_reserved_segments;
502543 }
503544
504545 static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
....@@ -528,22 +569,21 @@
528569
529570 static inline int reserved_sections(struct f2fs_sb_info *sbi)
530571 {
531
- return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
572
+ return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
532573 }
533574
534
-static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
575
+static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
576
+ unsigned int node_blocks, unsigned int dent_blocks)
535577 {
536
- unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
537
- get_pages(sbi, F2FS_DIRTY_DENTS);
538
- unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
578
+
539579 unsigned int segno, left_blocks;
540580 int i;
541581
542582 /* check current node segment */
543583 for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
544584 segno = CURSEG_I(sbi, i)->segno;
545
- left_blocks = sbi->blocks_per_seg -
546
- get_seg_entry(sbi, segno)->ckpt_valid_blocks;
585
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
586
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
547587
548588 if (node_blocks > left_blocks)
549589 return false;
....@@ -551,7 +591,7 @@
551591
552592 /* check current data segment */
553593 segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
554
- left_blocks = sbi->blocks_per_seg -
594
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
555595 get_seg_entry(sbi, segno)->ckpt_valid_blocks;
556596 if (dent_blocks > left_blocks)
557597 return false;
....@@ -561,19 +601,28 @@
561601 static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
562602 int freed, int needed)
563603 {
564
- int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
565
- int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
566
- int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
604
+ unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
605
+ get_pages(sbi, F2FS_DIRTY_DENTS) +
606
+ get_pages(sbi, F2FS_DIRTY_IMETA);
607
+ unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
608
+ unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi);
609
+ unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi);
610
+ unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi);
611
+ unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi);
612
+ unsigned int free, need_lower, need_upper;
567613
568614 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
569615 return false;
570616
571
- if (free_sections(sbi) + freed == reserved_sections(sbi) + needed &&
572
- has_curseg_enough_space(sbi))
617
+ free = free_sections(sbi) + freed;
618
+ need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed;
619
+ need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
620
+
621
+ if (free > need_upper)
573622 return false;
574
- return (free_sections(sbi) + freed) <=
575
- (node_secs + 2 * dent_secs + imeta_secs +
576
- reserved_sections(sbi) + needed);
623
+ else if (free <= need_lower)
624
+ return true;
625
+ return !has_curseg_enough_space(sbi, node_blocks, dent_blocks);
577626 }
578627
579628 static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
....@@ -610,7 +659,9 @@
610659 * pages over min_fsync_blocks. (=default option)
611660 * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests.
612661 * F2FS_IPU_NOCACHE - disable IPU bio cache.
613
- * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode)
662
+ * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has
663
+ * FI_OPU_WRITE flag.
664
+ * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode)
614665 */
615666 #define DEF_MIN_IPU_UTIL 70
616667 #define DEF_MIN_FSYNC_BLOCKS 8
....@@ -626,6 +677,7 @@
626677 F2FS_IPU_FSYNC,
627678 F2FS_IPU_ASYNC,
628679 F2FS_IPU_NOCACHE,
680
+ F2FS_IPU_HONOR_OPU_WRITE,
629681 };
630682
631683 static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
....@@ -673,21 +725,22 @@
673725 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
674726 int valid_blocks = 0;
675727 int cur_pos = 0, next_pos;
728
+ unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi, segno);
676729
677730 /* check bitmap with valid block count */
678731 do {
679732 if (is_valid) {
680733 next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
681
- sbi->blocks_per_seg,
734
+ usable_blks_per_seg,
682735 cur_pos);
683736 valid_blocks += next_pos - cur_pos;
684737 } else
685738 next_pos = find_next_bit_le(&raw_sit->valid_map,
686
- sbi->blocks_per_seg,
739
+ usable_blks_per_seg,
687740 cur_pos);
688741 cur_pos = next_pos;
689742 is_valid = !is_valid;
690
- } while (cur_pos < sbi->blocks_per_seg);
743
+ } while (cur_pos < usable_blks_per_seg);
691744
692745 if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
693746 f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
....@@ -696,8 +749,13 @@
696749 return -EFSCORRUPTED;
697750 }
698751
752
+ if (usable_blks_per_seg < sbi->blocks_per_seg)
753
+ f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
754
+ sbi->blocks_per_seg,
755
+ usable_blks_per_seg) != sbi->blocks_per_seg);
756
+
699757 /* check segment usage, and check boundary of a given segment number */
700
- if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
758
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
701759 || segno > TOTAL_SEGS(sbi) - 1)) {
702760 f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
703761 GET_SIT_VBLOCKS(raw_sit), segno);