hc
2024-10-12 a5969cabbb4660eab42b6ef0412cbbd1200cf14d
kernel/fs/f2fs/segment.h
....@@ -16,13 +16,21 @@
1616 #define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
1717
1818 #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
19
+#define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */
1920
2021 /* L: Logical segment # in volume, R: Relative segment # in main area */
2122 #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
2223 #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
2324
2425 #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
25
-#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
26
+#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
27
+#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
28
+
29
+static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
30
+ unsigned short seg_type)
31
+{
32
+ f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
33
+}
2634
2735 #define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
2836 #define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
....@@ -34,7 +42,9 @@
3442 ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
3543 ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
3644 ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
37
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
45
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
46
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
47
+ ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
3848
3949 #define IS_CURSEC(sbi, secno) \
4050 (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
....@@ -48,7 +58,11 @@
4858 ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
4959 (sbi)->segs_per_sec) || \
5060 ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
51
- (sbi)->segs_per_sec)) \
61
+ (sbi)->segs_per_sec) || \
62
+ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
63
+ (sbi)->segs_per_sec) || \
64
+ ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
65
+ (sbi)->segs_per_sec))
5266
5367 #define MAIN_BLKADDR(sbi) \
5468 (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
....@@ -87,6 +101,12 @@
87101 GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
88102 #define BLKS_PER_SEC(sbi) \
89103 ((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
104
+#define CAP_BLKS_PER_SEC(sbi) \
105
+ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \
106
+ (sbi)->unusable_blocks_per_sec)
107
+#define CAP_SEGS_PER_SEC(sbi) \
108
+ ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\
109
+ (sbi)->log_blocks_per_seg))
90110 #define GET_SEC_FROM_SEG(sbi, segno) \
91111 (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec)
92112 #define GET_SEG_FROM_SEC(sbi, secno) \
....@@ -132,20 +152,25 @@
132152 * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
133153 * LFS writes data sequentially with cleaning operations.
134154 * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
155
+ * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
156
+ * fragmented segment which has similar aging degree.
135157 */
136158 enum {
137159 LFS = 0,
138
- SSR
160
+ SSR,
161
+ AT_SSR,
139162 };
140163
141164 /*
142165 * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
143166 * GC_CB is based on cost-benefit algorithm.
144167 * GC_GREEDY is based on greedy algorithm.
168
+ * GC_AT is based on age-threshold algorithm.
145169 */
146170 enum {
147171 GC_CB = 0,
148172 GC_GREEDY,
173
+ GC_AT,
149174 ALLOC_NEXT,
150175 FLUSH_DEVICE,
151176 MAX_GC_POLICY,
....@@ -154,24 +179,28 @@
154179 /*
155180 * BG_GC means the background cleaning job.
156181 * FG_GC means the on-demand cleaning job.
157
- * FORCE_FG_GC means on-demand cleaning job in background.
158182 */
159183 enum {
160184 BG_GC = 0,
161185 FG_GC,
162
- FORCE_FG_GC,
163186 };
164187
165188 /* for a function parameter to select a victim segment */
166189 struct victim_sel_policy {
167190 int alloc_mode; /* LFS or SSR */
168191 int gc_mode; /* GC_CB or GC_GREEDY */
169
- unsigned long *dirty_segmap; /* dirty segment bitmap */
170
- unsigned int max_search; /* maximum # of segments to search */
192
+ unsigned long *dirty_bitmap; /* dirty segment/section bitmap */
193
+ unsigned int max_search; /*
194
+ * maximum # of segments/sections
195
+ * to search
196
+ */
171197 unsigned int offset; /* last scanned bitmap offset */
172198 unsigned int ofs_unit; /* bitmap search unit */
173199 unsigned int min_cost; /* minimum cost */
200
+ unsigned long long oldest_age; /* oldest age of segments having the same min cost */
174201 unsigned int min_segno; /* segment # having min. cost */
202
+ unsigned long long age; /* mtime of GCed section*/
203
+ unsigned long long age_threshold;/* age threshold */
175204 };
176205
177206 struct seg_entry {
....@@ -184,7 +213,7 @@
184213 unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
185214 #endif
186215 /*
187
- * # of valid blocks and the validity bitmap stored in the the last
216
+ * # of valid blocks and the validity bitmap stored in the last
188217 * checkpoint pack. This information is used by the SSR mode.
189218 */
190219 unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */
....@@ -237,6 +266,8 @@
237266 unsigned long long mounted_time; /* mount time */
238267 unsigned long long min_mtime; /* min. modification time */
239268 unsigned long long max_mtime; /* max. modification time */
269
+ unsigned long long dirty_min_mtime; /* rerange candidates in GC_AT */
270
+ unsigned long long dirty_max_mtime; /* rerange candidates in GC_AT */
240271
241272 unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
242273 };
....@@ -266,15 +297,19 @@
266297 struct dirty_seglist_info {
267298 const struct victim_selection *v_ops; /* victim selction operation */
268299 unsigned long *dirty_segmap[NR_DIRTY_TYPE];
300
+ unsigned long *dirty_secmap;
269301 struct mutex seglist_lock; /* lock for segment bitmaps */
270302 int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
271303 unsigned long *victim_secmap; /* background GC victims */
304
+ unsigned long *pinned_secmap; /* pinned victims from foreground GC */
305
+ unsigned int pinned_secmap_cnt; /* count of victims which has pinned data */
306
+ bool enable_pin_section; /* enable pinning section */
272307 };
273308
274309 /* victim selection function for cleaning and SSR */
275310 struct victim_selection {
276311 int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
277
- int, int, char);
312
+ int, int, char, unsigned long long);
278313 };
279314
280315 /* for active log information */
....@@ -284,10 +319,12 @@
284319 struct rw_semaphore journal_rwsem; /* protect journal area */
285320 struct f2fs_journal *journal; /* cached journal info */
286321 unsigned char alloc_type; /* current allocation type */
322
+ unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
287323 unsigned int segno; /* current segment number */
288324 unsigned short next_blkoff; /* next block offset to write */
289325 unsigned int zone; /* current zone number */
290326 unsigned int next_segno; /* preallocated segment */
327
+ bool inited; /* indicate inmem log is inited */
291328 };
292329
293330 struct sit_entry_set {
....@@ -301,8 +338,6 @@
301338 */
302339 static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
303340 {
304
- if (type == CURSEG_COLD_DATA_PINNED)
305
- type = CURSEG_COLD_DATA;
306341 return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
307342 }
308343
....@@ -334,8 +369,20 @@
334369 }
335370
336371 static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
337
- unsigned int segno)
372
+ unsigned int segno, bool use_section)
338373 {
374
+ if (use_section && __is_large_section(sbi)) {
375
+ unsigned int start_segno = START_SEGNO(segno);
376
+ unsigned int blocks = 0;
377
+ int i;
378
+
379
+ for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
380
+ struct seg_entry *se = get_seg_entry(sbi, start_segno);
381
+
382
+ blocks += se->ckpt_valid_blocks;
383
+ }
384
+ return blocks;
385
+ }
339386 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
340387 }
341388
....@@ -407,6 +454,7 @@
407454 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
408455 unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
409456 unsigned int next;
457
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
410458
411459 spin_lock(&free_i->segmap_lock);
412460 clear_bit(segno, free_i->free_segmap);
....@@ -414,7 +462,7 @@
414462
415463 next = find_next_bit(free_i->free_segmap,
416464 start_segno + sbi->segs_per_sec, start_segno);
417
- if (next >= start_segno + sbi->segs_per_sec) {
465
+ if (next >= start_segno + usable_segs) {
418466 clear_bit(secno, free_i->free_secmap);
419467 free_i->free_sections++;
420468 }
....@@ -434,22 +482,23 @@
434482 }
435483
436484 static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
437
- unsigned int segno)
485
+ unsigned int segno, bool inmem)
438486 {
439487 struct free_segmap_info *free_i = FREE_I(sbi);
440488 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
441489 unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
442490 unsigned int next;
491
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
443492
444493 spin_lock(&free_i->segmap_lock);
445494 if (test_and_clear_bit(segno, free_i->free_segmap)) {
446495 free_i->free_segments++;
447496
448
- if (IS_CURSEC(sbi, secno))
497
+ if (!inmem && IS_CURSEC(sbi, secno))
449498 goto skip_free;
450499 next = find_next_bit(free_i->free_segmap,
451500 start_segno + sbi->segs_per_sec, start_segno);
452
- if (next >= start_segno + sbi->segs_per_sec) {
501
+ if (next >= start_segno + usable_segs) {
453502 if (test_and_clear_bit(secno, free_i->free_secmap))
454503 free_i->free_sections++;
455504 }
....@@ -496,9 +545,10 @@
496545 return FREE_I(sbi)->free_segments;
497546 }
498547
499
-static inline int reserved_segments(struct f2fs_sb_info *sbi)
548
+static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
500549 {
501
- return SM_I(sbi)->reserved_segments;
550
+ return SM_I(sbi)->reserved_segments +
551
+ SM_I(sbi)->additional_reserved_segments;
502552 }
503553
504554 static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
....@@ -528,22 +578,21 @@
528578
529579 static inline int reserved_sections(struct f2fs_sb_info *sbi)
530580 {
531
- return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
581
+ return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
532582 }
533583
534
-static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
584
+static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
585
+ unsigned int node_blocks, unsigned int dent_blocks)
535586 {
536
- unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
537
- get_pages(sbi, F2FS_DIRTY_DENTS);
538
- unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
587
+
539588 unsigned int segno, left_blocks;
540589 int i;
541590
542591 /* check current node segment */
543592 for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
544593 segno = CURSEG_I(sbi, i)->segno;
545
- left_blocks = sbi->blocks_per_seg -
546
- get_seg_entry(sbi, segno)->ckpt_valid_blocks;
594
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
595
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
547596
548597 if (node_blocks > left_blocks)
549598 return false;
....@@ -551,7 +600,7 @@
551600
552601 /* check current data segment */
553602 segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
554
- left_blocks = sbi->blocks_per_seg -
603
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
555604 get_seg_entry(sbi, segno)->ckpt_valid_blocks;
556605 if (dent_blocks > left_blocks)
557606 return false;
....@@ -561,19 +610,28 @@
561610 static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
562611 int freed, int needed)
563612 {
564
- int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
565
- int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
566
- int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
613
+ unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
614
+ get_pages(sbi, F2FS_DIRTY_DENTS) +
615
+ get_pages(sbi, F2FS_DIRTY_IMETA);
616
+ unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
617
+ unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi);
618
+ unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi);
619
+ unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi);
620
+ unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi);
621
+ unsigned int free, need_lower, need_upper;
567622
568623 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
569624 return false;
570625
571
- if (free_sections(sbi) + freed == reserved_sections(sbi) + needed &&
572
- has_curseg_enough_space(sbi))
626
+ free = free_sections(sbi) + freed;
627
+ need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed;
628
+ need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
629
+
630
+ if (free > need_upper)
573631 return false;
574
- return (free_sections(sbi) + freed) <=
575
- (node_secs + 2 * dent_secs + imeta_secs +
576
- reserved_sections(sbi) + needed);
632
+ else if (free <= need_lower)
633
+ return true;
634
+ return !has_curseg_enough_space(sbi, node_blocks, dent_blocks);
577635 }
578636
579637 static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
....@@ -610,7 +668,9 @@
610668 * pages over min_fsync_blocks. (=default option)
611669 * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests.
612670 * F2FS_IPU_NOCACHE - disable IPU bio cache.
613
- * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode)
671
+ * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has
672
+ * FI_OPU_WRITE flag.
673
+ * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode)
614674 */
615675 #define DEF_MIN_IPU_UTIL 70
616676 #define DEF_MIN_FSYNC_BLOCKS 8
....@@ -626,6 +686,7 @@
626686 F2FS_IPU_FSYNC,
627687 F2FS_IPU_ASYNC,
628688 F2FS_IPU_NOCACHE,
689
+ F2FS_IPU_HONOR_OPU_WRITE,
629690 };
630691
631692 static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
....@@ -673,21 +734,22 @@
673734 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
674735 int valid_blocks = 0;
675736 int cur_pos = 0, next_pos;
737
+ unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi, segno);
676738
677739 /* check bitmap with valid block count */
678740 do {
679741 if (is_valid) {
680742 next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
681
- sbi->blocks_per_seg,
743
+ usable_blks_per_seg,
682744 cur_pos);
683745 valid_blocks += next_pos - cur_pos;
684746 } else
685747 next_pos = find_next_bit_le(&raw_sit->valid_map,
686
- sbi->blocks_per_seg,
748
+ usable_blks_per_seg,
687749 cur_pos);
688750 cur_pos = next_pos;
689751 is_valid = !is_valid;
690
- } while (cur_pos < sbi->blocks_per_seg);
752
+ } while (cur_pos < usable_blks_per_seg);
691753
692754 if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
693755 f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
....@@ -696,8 +758,13 @@
696758 return -EFSCORRUPTED;
697759 }
698760
761
+ if (usable_blks_per_seg < sbi->blocks_per_seg)
762
+ f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
763
+ sbi->blocks_per_seg,
764
+ usable_blks_per_seg) != sbi->blocks_per_seg);
765
+
699766 /* check segment usage, and check boundary of a given segment number */
700
- if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
767
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
701768 || segno > TOTAL_SEGS(sbi) - 1)) {
702769 f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
703770 GET_SIT_VBLOCKS(raw_sit), segno);