hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/fs/f2fs/gc.c
....@@ -7,6 +7,7 @@
77 */
88 #include <linux/fs.h>
99 #include <linux/module.h>
10
+#include <linux/mount.h>
1011 #include <linux/backing-dev.h>
1112 #include <linux/init.h>
1213 #include <linux/f2fs_fs.h>
....@@ -21,23 +22,33 @@
2122 #include "gc.h"
2223 #include <trace/events/f2fs.h>
2324
25
+static struct kmem_cache *victim_entry_slab;
26
+
27
+static unsigned int count_bits(const unsigned long *addr,
28
+ unsigned int offset, unsigned int len);
29
+
2430 static int gc_thread_func(void *data)
2531 {
2632 struct f2fs_sb_info *sbi = data;
2733 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
2834 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
35
+ wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
2936 unsigned int wait_ms;
3037
3138 wait_ms = gc_th->min_sleep_time;
3239
3340 set_freezable();
3441 do {
35
- bool sync_mode;
42
+ bool sync_mode, foreground = false;
3643
3744 wait_event_interruptible_timeout(*wq,
3845 kthread_should_stop() || freezing(current) ||
46
+ waitqueue_active(fggc_wq) ||
3947 gc_th->gc_wake,
4048 msecs_to_jiffies(wait_ms));
49
+
50
+ if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq))
51
+ foreground = true;
4152
4253 /* give it a try one time */
4354 if (gc_th->gc_wake)
....@@ -58,7 +69,8 @@
5869
5970 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
6071 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
61
- f2fs_stop_checkpoint(sbi, false);
72
+ f2fs_stop_checkpoint(sbi, false,
73
+ STOP_CP_REASON_FAULT_INJECT);
6274 }
6375
6476 if (!sb_start_write_trylock(sbi->sb)) {
....@@ -79,20 +91,24 @@
7991 * invalidated soon after by user update or deletion.
8092 * So, I'd like to wait some time to collect dirty segments.
8193 */
82
- if (sbi->gc_mode == GC_URGENT) {
94
+ if (sbi->gc_mode == GC_URGENT_HIGH ||
95
+ sbi->gc_mode == GC_URGENT_MID) {
8396 wait_ms = gc_th->urgent_sleep_time;
84
- down_write(&sbi->gc_lock);
97
+ f2fs_down_write(&sbi->gc_lock);
8598 goto do_gc;
8699 }
87100
88
- if (!down_write_trylock(&sbi->gc_lock)) {
101
+ if (foreground) {
102
+ f2fs_down_write(&sbi->gc_lock);
103
+ goto do_gc;
104
+ } else if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
89105 stat_other_skip_bggc_count(sbi);
90106 goto next;
91107 }
92108
93109 if (!is_idle(sbi, GC_TIME)) {
94110 increase_sleep_time(gc_th, &wait_ms);
95
- up_write(&sbi->gc_lock);
111
+ f2fs_up_write(&sbi->gc_lock);
96112 stat_io_skip_bggc_count(sbi);
97113 goto next;
98114 }
....@@ -102,13 +118,21 @@
102118 else
103119 increase_sleep_time(gc_th, &wait_ms);
104120 do_gc:
105
- stat_inc_bggc_count(sbi->stat_info);
121
+ if (!foreground)
122
+ stat_inc_bggc_count(sbi->stat_info);
106123
107124 sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC;
108125
126
+ /* foreground GC was been triggered via f2fs_balance_fs() */
127
+ if (foreground)
128
+ sync_mode = false;
129
+
109130 /* if return value is not zero, no victim was selected */
110
- if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO))
131
+ if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO))
111132 wait_ms = gc_th->no_gc_sleep_time;
133
+
134
+ if (foreground)
135
+ wake_up_all(&gc_th->fggc_wq);
112136
113137 trace_f2fs_background_gc(sbi->sb, wait_ms,
114138 prefree_segments(sbi), free_segments(sbi));
....@@ -139,15 +163,16 @@
139163 gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
140164 gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
141165
142
- gc_th->gc_wake= 0;
166
+ gc_th->gc_wake = 0;
143167
144168 sbi->gc_thread = gc_th;
145169 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
170
+ init_waitqueue_head(&sbi->gc_thread->fggc_wq);
146171 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
147172 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
148173 if (IS_ERR(gc_th->f2fs_gc_task)) {
149174 err = PTR_ERR(gc_th->f2fs_gc_task);
150
- kvfree(gc_th);
175
+ kfree(gc_th);
151176 sbi->gc_thread = NULL;
152177 }
153178 out:
....@@ -157,26 +182,41 @@
157182 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
158183 {
159184 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
185
+
160186 if (!gc_th)
161187 return;
162188 kthread_stop(gc_th->f2fs_gc_task);
163
- kvfree(gc_th);
189
+ wake_up_all(&gc_th->fggc_wq);
190
+ kfree(gc_th);
164191 sbi->gc_thread = NULL;
165192 }
166193
167194 static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
168195 {
169
- int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
196
+ int gc_mode;
197
+
198
+ if (gc_type == BG_GC) {
199
+ if (sbi->am.atgc_enabled)
200
+ gc_mode = GC_AT;
201
+ else
202
+ gc_mode = GC_CB;
203
+ } else {
204
+ gc_mode = GC_GREEDY;
205
+ }
170206
171207 switch (sbi->gc_mode) {
172208 case GC_IDLE_CB:
173209 gc_mode = GC_CB;
174210 break;
175211 case GC_IDLE_GREEDY:
176
- case GC_URGENT:
212
+ case GC_URGENT_HIGH:
177213 gc_mode = GC_GREEDY;
178214 break;
215
+ case GC_IDLE_AT:
216
+ gc_mode = GC_AT;
217
+ break;
179218 }
219
+
180220 return gc_mode;
181221 }
182222
....@@ -187,14 +227,25 @@
187227
188228 if (p->alloc_mode == SSR) {
189229 p->gc_mode = GC_GREEDY;
190
- p->dirty_segmap = dirty_i->dirty_segmap[type];
230
+ p->dirty_bitmap = dirty_i->dirty_segmap[type];
231
+ p->max_search = dirty_i->nr_dirty[type];
232
+ p->ofs_unit = 1;
233
+ } else if (p->alloc_mode == AT_SSR) {
234
+ p->gc_mode = GC_GREEDY;
235
+ p->dirty_bitmap = dirty_i->dirty_segmap[type];
191236 p->max_search = dirty_i->nr_dirty[type];
192237 p->ofs_unit = 1;
193238 } else {
194239 p->gc_mode = select_gc_type(sbi, gc_type);
195
- p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
196
- p->max_search = dirty_i->nr_dirty[DIRTY];
197240 p->ofs_unit = sbi->segs_per_sec;
241
+ if (__is_large_section(sbi)) {
242
+ p->dirty_bitmap = dirty_i->dirty_secmap;
243
+ p->max_search = count_bits(p->dirty_bitmap,
244
+ 0, MAIN_SECS(sbi));
245
+ } else {
246
+ p->dirty_bitmap = dirty_i->dirty_segmap[DIRTY];
247
+ p->max_search = dirty_i->nr_dirty[DIRTY];
248
+ }
198249 }
199250
200251 /*
....@@ -202,7 +253,8 @@
202253 * foreground GC and urgent GC cases.
203254 */
204255 if (gc_type != FG_GC &&
205
- (sbi->gc_mode != GC_URGENT) &&
256
+ (sbi->gc_mode != GC_URGENT_HIGH) &&
257
+ (p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) &&
206258 p->max_search > sbi->max_victim_search)
207259 p->max_search = sbi->max_victim_search;
208260
....@@ -220,9 +272,15 @@
220272 /* SSR allocates in a segment unit */
221273 if (p->alloc_mode == SSR)
222274 return sbi->blocks_per_seg;
275
+ else if (p->alloc_mode == AT_SSR)
276
+ return UINT_MAX;
277
+
278
+ /* LFS */
223279 if (p->gc_mode == GC_GREEDY)
224280 return 2 * sbi->blocks_per_seg * p->ofs_unit;
225281 else if (p->gc_mode == GC_CB)
282
+ return UINT_MAX;
283
+ else if (p->gc_mode == GC_AT)
226284 return UINT_MAX;
227285 else /* No other gc_mode */
228286 return 0;
....@@ -257,13 +315,14 @@
257315 unsigned char age = 0;
258316 unsigned char u;
259317 unsigned int i;
318
+ unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno);
260319
261
- for (i = 0; i < sbi->segs_per_sec; i++)
320
+ for (i = 0; i < usable_segs_per_sec; i++)
262321 mtime += get_seg_entry(sbi, start + i)->mtime;
263322 vblocks = get_valid_blocks(sbi, segno, true);
264323
265
- mtime = div_u64(mtime, sbi->segs_per_sec);
266
- vblocks = div_u64(vblocks, sbi->segs_per_sec);
324
+ mtime = div_u64(mtime, usable_segs_per_sec);
325
+ vblocks = div_u64(vblocks, usable_segs_per_sec);
267326
268327 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
269328
....@@ -288,8 +347,11 @@
288347 /* alloc_mode == LFS */
289348 if (p->gc_mode == GC_GREEDY)
290349 return get_valid_blocks(sbi, segno, true);
291
- else
350
+ else if (p->gc_mode == GC_CB)
292351 return get_cb_cost(sbi, segno);
352
+
353
+ f2fs_bug_on(sbi, 1);
354
+ return 0;
293355 }
294356
295357 static unsigned int count_bits(const unsigned long *addr,
....@@ -304,6 +366,317 @@
304366 return sum;
305367 }
306368
369
+static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
370
+ unsigned long long mtime, unsigned int segno,
371
+ struct rb_node *parent, struct rb_node **p,
372
+ bool left_most)
373
+{
374
+ struct atgc_management *am = &sbi->am;
375
+ struct victim_entry *ve;
376
+
377
+ ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS);
378
+
379
+ ve->mtime = mtime;
380
+ ve->segno = segno;
381
+
382
+ rb_link_node(&ve->rb_node, parent, p);
383
+ rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
384
+
385
+ list_add_tail(&ve->list, &am->victim_list);
386
+
387
+ am->victim_count++;
388
+
389
+ return ve;
390
+}
391
+
392
+static void insert_victim_entry(struct f2fs_sb_info *sbi,
393
+ unsigned long long mtime, unsigned int segno)
394
+{
395
+ struct atgc_management *am = &sbi->am;
396
+ struct rb_node **p;
397
+ struct rb_node *parent = NULL;
398
+ bool left_most = true;
399
+
400
+ p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
401
+ attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
402
+}
403
+
404
+static void add_victim_entry(struct f2fs_sb_info *sbi,
405
+ struct victim_sel_policy *p, unsigned int segno)
406
+{
407
+ struct sit_info *sit_i = SIT_I(sbi);
408
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
409
+ unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
410
+ unsigned long long mtime = 0;
411
+ unsigned int i;
412
+
413
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
414
+ if (p->gc_mode == GC_AT &&
415
+ get_valid_blocks(sbi, segno, true) == 0)
416
+ return;
417
+ }
418
+
419
+ for (i = 0; i < sbi->segs_per_sec; i++)
420
+ mtime += get_seg_entry(sbi, start + i)->mtime;
421
+ mtime = div_u64(mtime, sbi->segs_per_sec);
422
+
423
+ /* Handle if the system time has changed by the user */
424
+ if (mtime < sit_i->min_mtime)
425
+ sit_i->min_mtime = mtime;
426
+ if (mtime > sit_i->max_mtime)
427
+ sit_i->max_mtime = mtime;
428
+ if (mtime < sit_i->dirty_min_mtime)
429
+ sit_i->dirty_min_mtime = mtime;
430
+ if (mtime > sit_i->dirty_max_mtime)
431
+ sit_i->dirty_max_mtime = mtime;
432
+
433
+ /* don't choose young section as candidate */
434
+ if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
435
+ return;
436
+
437
+ insert_victim_entry(sbi, mtime, segno);
438
+}
439
+
440
+static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
441
+ struct victim_sel_policy *p)
442
+{
443
+ struct atgc_management *am = &sbi->am;
444
+ struct rb_node *parent = NULL;
445
+ bool left_most;
446
+
447
+ f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
448
+
449
+ return parent;
450
+}
451
+
452
+static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
453
+ struct victim_sel_policy *p)
454
+{
455
+ struct sit_info *sit_i = SIT_I(sbi);
456
+ struct atgc_management *am = &sbi->am;
457
+ struct rb_root_cached *root = &am->root;
458
+ struct rb_node *node;
459
+ struct rb_entry *re;
460
+ struct victim_entry *ve;
461
+ unsigned long long total_time;
462
+ unsigned long long age, u, accu;
463
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
464
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
465
+ unsigned int sec_blocks = BLKS_PER_SEC(sbi);
466
+ unsigned int vblocks;
467
+ unsigned int dirty_threshold = max(am->max_candidate_count,
468
+ am->candidate_ratio *
469
+ am->victim_count / 100);
470
+ unsigned int age_weight = am->age_weight;
471
+ unsigned int cost;
472
+ unsigned int iter = 0;
473
+
474
+ if (max_mtime < min_mtime)
475
+ return;
476
+
477
+ max_mtime += 1;
478
+ total_time = max_mtime - min_mtime;
479
+
480
+ accu = div64_u64(ULLONG_MAX, total_time);
481
+ accu = min_t(unsigned long long, div_u64(accu, 100),
482
+ DEFAULT_ACCURACY_CLASS);
483
+
484
+ node = rb_first_cached(root);
485
+next:
486
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
487
+ if (!re)
488
+ return;
489
+
490
+ ve = (struct victim_entry *)re;
491
+
492
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
493
+ goto skip;
494
+
495
+ /* age = 10000 * x% * 60 */
496
+ age = div64_u64(accu * (max_mtime - ve->mtime), total_time) *
497
+ age_weight;
498
+
499
+ vblocks = get_valid_blocks(sbi, ve->segno, true);
500
+ f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks);
501
+
502
+ /* u = 10000 * x% * 40 */
503
+ u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) *
504
+ (100 - age_weight);
505
+
506
+ f2fs_bug_on(sbi, age + u >= UINT_MAX);
507
+
508
+ cost = UINT_MAX - (age + u);
509
+ iter++;
510
+
511
+ if (cost < p->min_cost ||
512
+ (cost == p->min_cost && age > p->oldest_age)) {
513
+ p->min_cost = cost;
514
+ p->oldest_age = age;
515
+ p->min_segno = ve->segno;
516
+ }
517
+skip:
518
+ if (iter < dirty_threshold) {
519
+ node = rb_next(node);
520
+ goto next;
521
+ }
522
+}
523
+
524
+/*
525
+ * select candidates around source section in range of
526
+ * [target - dirty_threshold, target + dirty_threshold]
527
+ */
528
+static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
529
+ struct victim_sel_policy *p)
530
+{
531
+ struct sit_info *sit_i = SIT_I(sbi);
532
+ struct atgc_management *am = &sbi->am;
533
+ struct rb_node *node;
534
+ struct rb_entry *re;
535
+ struct victim_entry *ve;
536
+ unsigned long long age;
537
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
538
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
539
+ unsigned int seg_blocks = sbi->blocks_per_seg;
540
+ unsigned int vblocks;
541
+ unsigned int dirty_threshold = max(am->max_candidate_count,
542
+ am->candidate_ratio *
543
+ am->victim_count / 100);
544
+ unsigned int cost;
545
+ unsigned int iter = 0;
546
+ int stage = 0;
547
+
548
+ if (max_mtime < min_mtime)
549
+ return;
550
+ max_mtime += 1;
551
+next_stage:
552
+ node = lookup_central_victim(sbi, p);
553
+next_node:
554
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
555
+ if (!re) {
556
+ if (stage == 0)
557
+ goto skip_stage;
558
+ return;
559
+ }
560
+
561
+ ve = (struct victim_entry *)re;
562
+
563
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
564
+ goto skip_node;
565
+
566
+ age = max_mtime - ve->mtime;
567
+
568
+ vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks;
569
+ f2fs_bug_on(sbi, !vblocks);
570
+
571
+ /* rare case */
572
+ if (vblocks == seg_blocks)
573
+ goto skip_node;
574
+
575
+ iter++;
576
+
577
+ age = max_mtime - abs(p->age - age);
578
+ cost = UINT_MAX - vblocks;
579
+
580
+ if (cost < p->min_cost ||
581
+ (cost == p->min_cost && age > p->oldest_age)) {
582
+ p->min_cost = cost;
583
+ p->oldest_age = age;
584
+ p->min_segno = ve->segno;
585
+ }
586
+skip_node:
587
+ if (iter < dirty_threshold) {
588
+ if (stage == 0)
589
+ node = rb_prev(node);
590
+ else if (stage == 1)
591
+ node = rb_next(node);
592
+ goto next_node;
593
+ }
594
+skip_stage:
595
+ if (stage < 1) {
596
+ stage++;
597
+ iter = 0;
598
+ goto next_stage;
599
+ }
600
+}
601
+static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
602
+ struct victim_sel_policy *p)
603
+{
604
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
605
+ &sbi->am.root, true));
606
+
607
+ if (p->gc_mode == GC_AT)
608
+ atgc_lookup_victim(sbi, p);
609
+ else if (p->alloc_mode == AT_SSR)
610
+ atssr_lookup_victim(sbi, p);
611
+ else
612
+ f2fs_bug_on(sbi, 1);
613
+}
614
+
615
+static void release_victim_entry(struct f2fs_sb_info *sbi)
616
+{
617
+ struct atgc_management *am = &sbi->am;
618
+ struct victim_entry *ve, *tmp;
619
+
620
+ list_for_each_entry_safe(ve, tmp, &am->victim_list, list) {
621
+ list_del(&ve->list);
622
+ kmem_cache_free(victim_entry_slab, ve);
623
+ am->victim_count--;
624
+ }
625
+
626
+ am->root = RB_ROOT_CACHED;
627
+
628
+ f2fs_bug_on(sbi, am->victim_count);
629
+ f2fs_bug_on(sbi, !list_empty(&am->victim_list));
630
+}
631
+
632
+static bool f2fs_pin_section(struct f2fs_sb_info *sbi, unsigned int segno)
633
+{
634
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
635
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
636
+
637
+ if (!dirty_i->enable_pin_section)
638
+ return false;
639
+ if (!test_and_set_bit(secno, dirty_i->pinned_secmap))
640
+ dirty_i->pinned_secmap_cnt++;
641
+ return true;
642
+}
643
+
644
+static bool f2fs_pinned_section_exists(struct dirty_seglist_info *dirty_i)
645
+{
646
+ return dirty_i->pinned_secmap_cnt;
647
+}
648
+
649
+static bool f2fs_section_is_pinned(struct dirty_seglist_info *dirty_i,
650
+ unsigned int secno)
651
+{
652
+ return dirty_i->enable_pin_section &&
653
+ f2fs_pinned_section_exists(dirty_i) &&
654
+ test_bit(secno, dirty_i->pinned_secmap);
655
+}
656
+
657
+static void f2fs_unpin_all_sections(struct f2fs_sb_info *sbi, bool enable)
658
+{
659
+ unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
660
+
661
+ if (f2fs_pinned_section_exists(DIRTY_I(sbi))) {
662
+ memset(DIRTY_I(sbi)->pinned_secmap, 0, bitmap_size);
663
+ DIRTY_I(sbi)->pinned_secmap_cnt = 0;
664
+ }
665
+ DIRTY_I(sbi)->enable_pin_section = enable;
666
+}
667
+
668
+static int f2fs_gc_pinned_control(struct inode *inode, int gc_type,
669
+ unsigned int segno)
670
+{
671
+ if (!f2fs_is_pinned_file(inode))
672
+ return 0;
673
+ if (gc_type != FG_GC)
674
+ return -EBUSY;
675
+ if (!f2fs_pin_section(F2FS_I_SB(inode), segno))
676
+ f2fs_pin_file_control(inode, true);
677
+ return -EAGAIN;
678
+}
679
+
307680 /*
308681 * This function is called from two paths.
309682 * One is garbage collection and the other is SSR segment selection.
....@@ -313,31 +686,51 @@
313686 * which has minimum valid blocks and removes it from dirty seglist.
314687 */
315688 static int get_victim_by_default(struct f2fs_sb_info *sbi,
316
- unsigned int *result, int gc_type, int type, char alloc_mode)
689
+ unsigned int *result, int gc_type, int type,
690
+ char alloc_mode, unsigned long long age)
317691 {
318692 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
319693 struct sit_info *sm = SIT_I(sbi);
320694 struct victim_sel_policy p;
321695 unsigned int secno, last_victim;
322696 unsigned int last_segment;
323
- unsigned int nsearched = 0;
697
+ unsigned int nsearched;
698
+ bool is_atgc;
699
+ int ret = 0;
324700
325701 mutex_lock(&dirty_i->seglist_lock);
326702 last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
327703
328704 p.alloc_mode = alloc_mode;
329
- select_policy(sbi, gc_type, type, &p);
705
+ p.age = age;
706
+ p.age_threshold = sbi->am.age_threshold;
330707
708
+retry:
709
+ select_policy(sbi, gc_type, type, &p);
331710 p.min_segno = NULL_SEGNO;
711
+ p.oldest_age = 0;
332712 p.min_cost = get_max_cost(sbi, &p);
333713
714
+ is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR);
715
+ nsearched = 0;
716
+
717
+ if (is_atgc)
718
+ SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX;
719
+
334720 if (*result != NULL_SEGNO) {
335
- if (get_valid_blocks(sbi, *result, false) &&
336
- !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
721
+ if (!get_valid_blocks(sbi, *result, false)) {
722
+ ret = -ENODATA;
723
+ goto out;
724
+ }
725
+
726
+ if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
727
+ ret = -EBUSY;
728
+ else
337729 p.min_segno = *result;
338730 goto out;
339731 }
340732
733
+ ret = -ENODATA;
341734 if (p.max_search == 0)
342735 goto out;
343736
....@@ -365,10 +758,14 @@
365758 }
366759
367760 while (1) {
368
- unsigned long cost;
369
- unsigned int segno;
761
+ unsigned long cost, *dirty_bitmap;
762
+ unsigned int unit_no, segno;
370763
371
- segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
764
+ dirty_bitmap = p.dirty_bitmap;
765
+ unit_no = find_next_bit(dirty_bitmap,
766
+ last_segment / p.ofs_unit,
767
+ p.offset / p.ofs_unit);
768
+ segno = unit_no * p.ofs_unit;
372769 if (segno >= last_segment) {
373770 if (sm->last_victim[p.gc_mode]) {
374771 last_segment =
....@@ -381,14 +778,7 @@
381778 }
382779
383780 p.offset = segno + p.ofs_unit;
384
- if (p.ofs_unit > 1) {
385
- p.offset -= segno % p.ofs_unit;
386
- nsearched += count_bits(p.dirty_segmap,
387
- p.offset - p.ofs_unit,
388
- p.ofs_unit);
389
- } else {
390
- nsearched++;
391
- }
781
+ nsearched++;
392782
393783 #ifdef CONFIG_F2FS_CHECK_FS
394784 /*
....@@ -404,13 +794,37 @@
404794
405795 if (sec_usage_check(sbi, secno))
406796 goto next;
797
+
407798 /* Don't touch checkpointed data */
408
- if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
409
- get_ckpt_valid_blocks(sbi, segno) &&
410
- p.alloc_mode != SSR))
411
- goto next;
799
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
800
+ if (p.alloc_mode == LFS) {
801
+ /*
802
+ * LFS is set to find source section during GC.
803
+ * The victim should have no checkpointed data.
804
+ */
805
+ if (get_ckpt_valid_blocks(sbi, segno, true))
806
+ goto next;
807
+ } else {
808
+ /*
809
+ * SSR | AT_SSR are set to find target segment
810
+ * for writes which can be full by checkpointed
811
+ * and newly written blocks.
812
+ */
813
+ if (!f2fs_segment_has_free_slot(sbi, segno))
814
+ goto next;
815
+ }
816
+ }
817
+
412818 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
413819 goto next;
820
+
821
+ if (gc_type == FG_GC && f2fs_section_is_pinned(dirty_i, secno))
822
+ goto next;
823
+
824
+ if (is_atgc) {
825
+ add_victim_entry(sbi, &p, segno);
826
+ goto next;
827
+ }
414828
415829 cost = get_gc_cost(sbi, segno, &p);
416830
....@@ -421,14 +835,28 @@
421835 next:
422836 if (nsearched >= p.max_search) {
423837 if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
424
- sm->last_victim[p.gc_mode] = last_victim + 1;
838
+ sm->last_victim[p.gc_mode] =
839
+ last_victim + p.ofs_unit;
425840 else
426
- sm->last_victim[p.gc_mode] = segno + 1;
841
+ sm->last_victim[p.gc_mode] = segno + p.ofs_unit;
427842 sm->last_victim[p.gc_mode] %=
428843 (MAIN_SECS(sbi) * sbi->segs_per_sec);
429844 break;
430845 }
431846 }
847
+
848
+ /* get victim for GC_AT/AT_SSR */
849
+ if (is_atgc) {
850
+ lookup_victim_by_age(sbi, &p);
851
+ release_victim_entry(sbi);
852
+ }
853
+
854
+ if (is_atgc && p.min_segno == NULL_SEGNO &&
855
+ sm->elapsed_time < p.age_threshold) {
856
+ p.age_threshold = 0;
857
+ goto retry;
858
+ }
859
+
432860 if (p.min_segno != NULL_SEGNO) {
433861 got_it:
434862 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
....@@ -440,6 +868,7 @@
440868 else
441869 set_bit(secno, dirty_i->victim_secmap);
442870 }
871
+ ret = 0;
443872
444873 }
445874 out:
....@@ -449,7 +878,7 @@
449878 prefree_segments(sbi), free_segments(sbi));
450879 mutex_unlock(&dirty_i->seglist_lock);
451880
452
- return (p.min_segno == NULL_SEGNO) ? 0 : 1;
881
+ return ret;
453882 }
454883
455884 static const struct victim_selection default_v_ops = {
....@@ -484,6 +913,7 @@
484913 static void put_gc_inode(struct gc_inode_list *gc_list)
485914 {
486915 struct inode_entry *ie, *next_ie;
916
+
487917 list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
488918 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
489919 iput(ie->inode);
....@@ -520,6 +950,7 @@
520950 int phase = 0;
521951 bool fggc = (gc_type == FG_GC);
522952 int submitted = 0;
953
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
523954
524955 start_addr = START_BLOCK(sbi, segno);
525956
....@@ -529,7 +960,7 @@
529960 if (fggc && phase == 2)
530961 atomic_inc(&sbi->wb_sync_req[NODE]);
531962
532
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
963
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
533964 nid_t nid = le32_to_cpu(entry->nid);
534965 struct page *node_page;
535966 struct node_info ni;
....@@ -564,7 +995,7 @@
564995 continue;
565996 }
566997
567
- if (f2fs_get_node_info(sbi, nid, &ni)) {
998
+ if (f2fs_get_node_info(sbi, nid, &ni, false)) {
568999 f2fs_put_page(node_page, 1);
5691000 continue;
5701001 }
....@@ -607,9 +1038,11 @@
6071038 bidx = node_ofs - 1;
6081039 } else if (node_ofs <= indirect_blks) {
6091040 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
1041
+
6101042 bidx = node_ofs - 2 - dec;
6111043 } else {
6121044 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
1045
+
6131046 bidx = node_ofs - 5 - dec;
6141047 }
6151048 return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
....@@ -620,7 +1053,7 @@
6201053 {
6211054 struct page *node_page;
6221055 nid_t nid;
623
- unsigned int ofs_in_node;
1056
+ unsigned int ofs_in_node, max_addrs, base;
6241057 block_t source_blkaddr;
6251058
6261059 nid = le32_to_cpu(sum->nid);
....@@ -630,7 +1063,7 @@
6301063 if (IS_ERR(node_page))
6311064 return false;
6321065
633
- if (f2fs_get_node_info(sbi, nid, dni)) {
1066
+ if (f2fs_get_node_info(sbi, nid, dni, false)) {
6341067 f2fs_put_page(node_page, 1);
6351068 return false;
6361069 }
....@@ -641,8 +1074,25 @@
6411074 set_sbi_flag(sbi, SBI_NEED_FSCK);
6421075 }
6431076
644
- if (f2fs_check_nid_range(sbi, dni->ino))
1077
+ if (f2fs_check_nid_range(sbi, dni->ino)) {
1078
+ f2fs_put_page(node_page, 1);
6451079 return false;
1080
+ }
1081
+
1082
+ if (IS_INODE(node_page)) {
1083
+ base = offset_in_addr(F2FS_INODE(node_page));
1084
+ max_addrs = DEF_ADDRS_PER_INODE;
1085
+ } else {
1086
+ base = 0;
1087
+ max_addrs = DEF_ADDRS_PER_BLOCK;
1088
+ }
1089
+
1090
+ if (base + ofs_in_node >= max_addrs) {
1091
+ f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u",
1092
+ base, ofs_in_node, max_addrs, dni->ino, dni->nid);
1093
+ f2fs_put_page(node_page, 1);
1094
+ return false;
1095
+ }
6461096
6471097 *nofs = ofs_of_node(node_page);
6481098 source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
....@@ -655,8 +1105,8 @@
6551105
6561106 if (unlikely(check_valid_map(sbi, segno, offset))) {
6571107 if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
658
- f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u\n",
659
- blkaddr, source_blkaddr, segno);
1108
+ f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
1109
+ blkaddr, source_blkaddr, segno);
6601110 f2fs_bug_on(sbi, 1);
6611111 }
6621112 }
....@@ -672,7 +1122,7 @@
6721122 struct address_space *mapping = inode->i_mapping;
6731123 struct dnode_of_data dn;
6741124 struct page *page;
675
- struct extent_info ei = {0, 0, 0};
1125
+ struct extent_info ei = {0, };
6761126 struct f2fs_io_info fio = {
6771127 .sbi = sbi,
6781128 .ino = inode->i_ino,
....@@ -690,7 +1140,7 @@
6901140 if (!page)
6911141 return -ENOMEM;
6921142
693
- if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1143
+ if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
6941144 dn.data_blkaddr = ei.blk + index - ei.fofs;
6951145 if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
6961146 DATA_GENERIC_ENHANCE_READ))) {
....@@ -778,6 +1228,9 @@
7781228 block_t newaddr;
7791229 int err = 0;
7801230 bool lfs_mode = f2fs_lfs_mode(fio.sbi);
1231
+ int type = fio.sbi->am.atgc_enabled && (gc_type == BG_GC) &&
1232
+ (fio.sbi->gc_mode != GC_URGENT_HIGH) ?
1233
+ CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
7811234
7821235 /* do not read out */
7831236 page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
....@@ -796,11 +1249,9 @@
7961249 goto out;
7971250 }
7981251
799
- if (f2fs_is_pinned_file(inode)) {
800
- f2fs_pin_file_control(inode, true);
801
- err = -EAGAIN;
1252
+ err = f2fs_gc_pinned_control(inode, gc_type, segno);
1253
+ if (err)
8021254 goto out;
803
- }
8041255
8051256 set_new_dnode(&dn, inode, NULL, NULL, 0);
8061257 err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
....@@ -821,23 +1272,23 @@
8211272
8221273 f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
8231274
824
- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
1275
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
8251276 if (err)
8261277 goto put_out;
827
-
828
- set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
8291278
8301279 /* read page */
8311280 fio.page = page;
8321281 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
8331282
8341283 if (lfs_mode)
835
- down_write(&fio.sbi->io_order_lock);
1284
+ f2fs_down_write(&fio.sbi->io_order_lock);
8361285
8371286 mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
8381287 fio.old_blkaddr, false);
839
- if (!mpage)
1288
+ if (!mpage) {
1289
+ err = -ENOMEM;
8401290 goto up_out;
1291
+ }
8411292
8421293 fio.encrypted_page = mpage;
8431294
....@@ -861,8 +1312,11 @@
8611312 }
8621313 }
8631314
1315
+ set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
1316
+
1317
+ /* allocate block address */
8641318 f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
865
- &sum, CURSEG_COLD_DATA, NULL, false);
1319
+ &sum, type, NULL);
8661320
8671321 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
8681322 newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
....@@ -879,6 +1333,7 @@
8791333 f2fs_put_page(mpage, 1);
8801334 invalidate_mapping_pages(META_MAPPING(fio.sbi),
8811335 fio.old_blkaddr, fio.old_blkaddr);
1336
+ f2fs_invalidate_compress_page(fio.sbi, fio.old_blkaddr);
8821337
8831338 set_page_dirty(fio.encrypted_page);
8841339 if (clear_page_dirty_for_io(fio.encrypted_page))
....@@ -886,9 +1341,6 @@
8861341
8871342 set_page_writeback(fio.encrypted_page);
8881343 ClearPageError(page);
889
-
890
- /* allocate block address */
891
- f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
8921344
8931345 fio.op = REQ_OP_WRITE;
8941346 fio.op_flags = REQ_SYNC;
....@@ -912,10 +1364,10 @@
9121364 recover_block:
9131365 if (err)
9141366 f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
915
- true, true);
1367
+ true, true, true);
9161368 up_out:
9171369 if (lfs_mode)
918
- up_write(&fio.sbi->io_order_lock);
1370
+ f2fs_up_write(&fio.sbi->io_order_lock);
9191371 put_out:
9201372 f2fs_put_dnode(&dn);
9211373 out:
....@@ -944,12 +1396,9 @@
9441396 err = -EAGAIN;
9451397 goto out;
9461398 }
947
- if (f2fs_is_pinned_file(inode)) {
948
- if (gc_type == FG_GC)
949
- f2fs_pin_file_control(inode, true);
950
- err = -EAGAIN;
1399
+ err = f2fs_gc_pinned_control(inode, gc_type, segno);
1400
+ if (err)
9511401 goto out;
952
- }
9531402
9541403 if (gc_type == BG_GC) {
9551404 if (PageWriteback(page)) {
....@@ -957,7 +1406,7 @@
9571406 goto out;
9581407 }
9591408 set_page_dirty(page);
960
- set_cold_data(page);
1409
+ set_page_private_gcing(page);
9611410 } else {
9621411 struct f2fs_io_info fio = {
9631412 .sbi = F2FS_I_SB(inode),
....@@ -983,11 +1432,11 @@
9831432 f2fs_remove_dirty_inode(inode);
9841433 }
9851434
986
- set_cold_data(page);
1435
+ set_page_private_gcing(page);
9871436
9881437 err = f2fs_do_write_data_page(&fio);
9891438 if (err) {
990
- clear_cold_data(page);
1439
+ clear_page_private_gcing(page);
9911440 if (err == -ENOMEM) {
9921441 congestion_wait(BLK_RW_ASYNC,
9931442 DEFAULT_IO_TIMEOUT);
....@@ -1010,7 +1459,8 @@
10101459 * the victim data block is ignored.
10111460 */
10121461 static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
1013
- struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
1462
+ struct gc_inode_list *gc_list, unsigned int segno, int gc_type,
1463
+ bool force_migrate)
10141464 {
10151465 struct super_block *sb = sbi->sb;
10161466 struct f2fs_summary *entry;
....@@ -1018,13 +1468,14 @@
10181468 int off;
10191469 int phase = 0;
10201470 int submitted = 0;
1471
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
10211472
10221473 start_addr = START_BLOCK(sbi, segno);
10231474
10241475 next_step:
10251476 entry = sum;
10261477
1027
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
1478
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
10281479 struct page *data_page;
10291480 struct inode *inode;
10301481 struct node_info dni; /* dnode info for the data */
....@@ -1038,8 +1489,8 @@
10381489 * race condition along with SSR block allocation.
10391490 */
10401491 if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
1041
- get_valid_blocks(sbi, segno, true) ==
1042
- BLKS_PER_SEC(sbi))
1492
+ (!force_migrate && get_valid_blocks(sbi, segno, true) ==
1493
+ BLKS_PER_SEC(sbi)))
10431494 return submitted;
10441495
10451496 if (check_valid_map(sbi, segno, off) == 0)
....@@ -1068,13 +1519,19 @@
10681519 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
10691520
10701521 if (phase == 3) {
1522
+ int err;
1523
+
10711524 inode = f2fs_iget(sb, dni.ino);
1072
- if (IS_ERR(inode) || is_bad_inode(inode)) {
1073
- set_sbi_flag(sbi, SBI_NEED_FSCK);
1525
+ if (IS_ERR(inode) || is_bad_inode(inode))
10741526 continue;
1527
+
1528
+ err = f2fs_gc_pinned_control(inode, gc_type, segno);
1529
+ if (err == -EAGAIN) {
1530
+ iput(inode);
1531
+ return submitted;
10751532 }
10761533
1077
- if (!down_write_trylock(
1534
+ if (!f2fs_down_write_trylock(
10781535 &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
10791536 iput(inode);
10801537 sbi->skipped_gc_rwsem++;
....@@ -1087,7 +1544,7 @@
10871544 if (f2fs_post_read_required(inode)) {
10881545 int err = ra_data_block(inode, start_bidx);
10891546
1090
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1547
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
10911548 if (err) {
10921549 iput(inode);
10931550 continue;
....@@ -1098,7 +1555,7 @@
10981555
10991556 data_page = f2fs_get_read_data_page(inode,
11001557 start_bidx, REQ_RAHEAD, true);
1101
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1558
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
11021559 if (IS_ERR(data_page)) {
11031560 iput(inode);
11041561 continue;
....@@ -1117,14 +1574,14 @@
11171574 int err;
11181575
11191576 if (S_ISREG(inode->i_mode)) {
1120
- if (!down_write_trylock(&fi->i_gc_rwsem[READ])) {
1577
+ if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) {
11211578 sbi->skipped_gc_rwsem++;
11221579 continue;
11231580 }
1124
- if (!down_write_trylock(
1581
+ if (!f2fs_down_write_trylock(
11251582 &fi->i_gc_rwsem[WRITE])) {
11261583 sbi->skipped_gc_rwsem++;
1127
- up_write(&fi->i_gc_rwsem[READ]);
1584
+ f2fs_up_write(&fi->i_gc_rwsem[READ]);
11281585 continue;
11291586 }
11301587 locked = true;
....@@ -1147,8 +1604,8 @@
11471604 submitted++;
11481605
11491606 if (locked) {
1150
- up_write(&fi->i_gc_rwsem[WRITE]);
1151
- up_write(&fi->i_gc_rwsem[READ]);
1607
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
1608
+ f2fs_up_write(&fi->i_gc_rwsem[READ]);
11521609 }
11531610
11541611 stat_inc_data_blk_count(sbi, 1, gc_type);
....@@ -1169,14 +1626,15 @@
11691626
11701627 down_write(&sit_i->sentry_lock);
11711628 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
1172
- NO_CHECK_TYPE, LFS);
1629
+ NO_CHECK_TYPE, LFS, 0);
11731630 up_write(&sit_i->sentry_lock);
11741631 return ret;
11751632 }
11761633
11771634 static int do_garbage_collect(struct f2fs_sb_info *sbi,
11781635 unsigned int start_segno,
1179
- struct gc_inode_list *gc_list, int gc_type)
1636
+ struct gc_inode_list *gc_list, int gc_type,
1637
+ bool force_migrate)
11801638 {
11811639 struct page *sum_page;
11821640 struct f2fs_summary_block *sum;
....@@ -1190,6 +1648,17 @@
11901648
11911649 if (__is_large_section(sbi))
11921650 end_segno = rounddown(end_segno, sbi->segs_per_sec);
1651
+
1652
+ /*
1653
+ * zone-capacity can be less than zone-size in zoned devices,
1654
+ * resulting in less than expected usable segments in the zone,
1655
+ * calculate the end segno in the zone which can be garbage collected
1656
+ */
1657
+ if (f2fs_sb_has_blkzoned(sbi))
1658
+ end_segno -= sbi->segs_per_sec -
1659
+ f2fs_usable_segs_in_sec(sbi, segno);
1660
+
1661
+ sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
11931662
11941663 /* readahead multi ssa blocks those have contiguous address */
11951664 if (__is_large_section(sbi))
....@@ -1236,7 +1705,8 @@
12361705 f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
12371706 segno, type, GET_SUM_TYPE((&sum->footer)));
12381707 set_sbi_flag(sbi, SBI_NEED_FSCK);
1239
- f2fs_stop_checkpoint(sbi, false);
1708
+ f2fs_stop_checkpoint(sbi, false,
1709
+ STOP_CP_REASON_CORRUPTED_SUMMARY);
12401710 goto skip;
12411711 }
12421712
....@@ -1252,9 +1722,11 @@
12521722 gc_type);
12531723 else
12541724 submitted += gc_data_segment(sbi, sum->entries, gc_list,
1255
- segno, gc_type);
1725
+ segno, gc_type,
1726
+ force_migrate);
12561727
12571728 stat_inc_seg_count(sbi, type, gc_type);
1729
+ sbi->gc_reclaimed_segs[sbi->gc_mode]++;
12581730 migrated++;
12591731
12601732 freed:
....@@ -1262,8 +1734,9 @@
12621734 get_valid_blocks(sbi, segno, false) == 0)
12631735 seg_freed++;
12641736
1265
- if (__is_large_section(sbi) && segno + 1 < end_segno)
1266
- sbi->next_victim_seg[gc_type] = segno + 1;
1737
+ if (__is_large_section(sbi))
1738
+ sbi->next_victim_seg[gc_type] =
1739
+ (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO;
12671740 skip:
12681741 f2fs_put_page(sum_page, 0);
12691742 }
....@@ -1280,7 +1753,7 @@
12801753 }
12811754
12821755 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
1283
- bool background, unsigned int segno)
1756
+ bool background, bool force, unsigned int segno)
12841757 {
12851758 int gc_type = sync ? FG_GC : BG_GC;
12861759 int sec_freed = 0, seg_freed = 0, total_freed = 0;
....@@ -1338,13 +1811,21 @@
13381811 ret = -EINVAL;
13391812 goto stop;
13401813 }
1341
- if (!__get_victim(sbi, &segno, gc_type)) {
1342
- ret = -ENODATA;
1814
+retry:
1815
+ ret = __get_victim(sbi, &segno, gc_type);
1816
+ if (ret) {
1817
+ /* allow to search victim from sections has pinned data */
1818
+ if (ret == -ENODATA && gc_type == FG_GC &&
1819
+ f2fs_pinned_section_exists(DIRTY_I(sbi))) {
1820
+ f2fs_unpin_all_sections(sbi, false);
1821
+ goto retry;
1822
+ }
13431823 goto stop;
13441824 }
13451825
1346
- seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
1347
- if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
1826
+ seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
1827
+ if (gc_type == FG_GC &&
1828
+ seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
13481829 sec_freed++;
13491830 total_freed += seg_freed;
13501831
....@@ -1362,26 +1843,37 @@
13621843 if (sync)
13631844 goto stop;
13641845
1365
- if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
1366
- if (skipped_round <= MAX_SKIP_GC_COUNT ||
1367
- skipped_round * 2 < round) {
1368
- segno = NULL_SEGNO;
1369
- goto gc_more;
1370
- }
1846
+ if (!has_not_enough_free_secs(sbi, sec_freed, 0))
1847
+ goto stop;
13711848
1372
- if (first_skipped < last_skipped &&
1373
- (last_skipped - first_skipped) >
1374
- sbi->skipped_gc_rwsem) {
1375
- f2fs_drop_inmem_pages_all(sbi, true);
1376
- segno = NULL_SEGNO;
1377
- goto gc_more;
1378
- }
1379
- if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
1849
+ if (skipped_round <= MAX_SKIP_GC_COUNT || skipped_round * 2 < round) {
1850
+
1851
+ /* Write checkpoint to reclaim prefree segments */
1852
+ if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
1853
+ prefree_segments(sbi) &&
1854
+ !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
13801855 ret = f2fs_write_checkpoint(sbi, &cpc);
1856
+ if (ret)
1857
+ goto stop;
1858
+ }
1859
+ segno = NULL_SEGNO;
1860
+ goto gc_more;
13811861 }
1862
+ if (first_skipped < last_skipped &&
1863
+ (last_skipped - first_skipped) >
1864
+ sbi->skipped_gc_rwsem) {
1865
+ f2fs_drop_inmem_pages_all(sbi, true);
1866
+ segno = NULL_SEGNO;
1867
+ goto gc_more;
1868
+ }
1869
+ if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
1870
+ ret = f2fs_write_checkpoint(sbi, &cpc);
13821871 stop:
13831872 SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
13841873 SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
1874
+
1875
+ if (gc_type == FG_GC)
1876
+ f2fs_unpin_all_sections(sbi, true);
13851877
13861878 trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed,
13871879 get_pages(sbi, F2FS_DIRTY_NODES),
....@@ -1392,13 +1884,45 @@
13921884 reserved_segments(sbi),
13931885 prefree_segments(sbi));
13941886
1395
- up_write(&sbi->gc_lock);
1887
+ f2fs_up_write(&sbi->gc_lock);
13961888
13971889 put_gc_inode(&gc_list);
13981890
13991891 if (sync && !ret)
14001892 ret = sec_freed ? 0 : -EAGAIN;
14011893 return ret;
1894
+}
1895
+
1896
+int __init f2fs_create_garbage_collection_cache(void)
1897
+{
1898
+ victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
1899
+ sizeof(struct victim_entry));
1900
+ if (!victim_entry_slab)
1901
+ return -ENOMEM;
1902
+ return 0;
1903
+}
1904
+
1905
+void f2fs_destroy_garbage_collection_cache(void)
1906
+{
1907
+ kmem_cache_destroy(victim_entry_slab);
1908
+}
1909
+
1910
+static void init_atgc_management(struct f2fs_sb_info *sbi)
1911
+{
1912
+ struct atgc_management *am = &sbi->am;
1913
+
1914
+ if (test_opt(sbi, ATGC) &&
1915
+ SIT_I(sbi)->elapsed_time >= DEF_GC_THREAD_AGE_THRESHOLD)
1916
+ am->atgc_enabled = true;
1917
+
1918
+ am->root = RB_ROOT_CACHED;
1919
+ INIT_LIST_HEAD(&am->victim_list);
1920
+ am->victim_count = 0;
1921
+
1922
+ am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO;
1923
+ am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT;
1924
+ am->age_weight = DEF_GC_THREAD_AGE_WEIGHT;
1925
+ am->age_threshold = DEF_GC_THREAD_AGE_THRESHOLD;
14021926 }
14031927
14041928 void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
....@@ -1411,6 +1935,8 @@
14111935 if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
14121936 SIT_I(sbi)->last_victim[ALLOC_NEXT] =
14131937 GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
1938
+
1939
+ init_atgc_management(sbi);
14141940 }
14151941
14161942 static int free_segment_range(struct f2fs_sb_info *sbi,
....@@ -1438,8 +1964,8 @@
14381964 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
14391965
14401966 /* Move out cursegs from the target range */
1441
- for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
1442
- allocate_segment_for_resize(sbi, type, start, end);
1967
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
1968
+ f2fs_allocate_segment_for_resize(sbi, type, start, end);
14431969
14441970 /* do GC to move out valid blocks in the range */
14451971 for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
....@@ -1448,7 +1974,7 @@
14481974 .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
14491975 };
14501976
1451
- do_garbage_collect(sbi, segno, &gc_list, FG_GC);
1977
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
14521978 put_gc_inode(&gc_list);
14531979
14541980 if (!gc_only && get_valid_blocks(sbi, segno, true)) {
....@@ -1487,7 +2013,7 @@
14872013 long long block_count;
14882014 int segs = secs * sbi->segs_per_sec;
14892015
1490
- down_write(&sbi->sb_lock);
2016
+ f2fs_down_write(&sbi->sb_lock);
14912017
14922018 section_count = le32_to_cpu(raw_sb->section_count);
14932019 segment_count = le32_to_cpu(raw_sb->segment_count);
....@@ -1508,7 +2034,7 @@
15082034 cpu_to_le32(dev_segs + segs);
15092035 }
15102036
1511
- up_write(&sbi->sb_lock);
2037
+ f2fs_up_write(&sbi->sb_lock);
15122038 }
15132039
15142040 static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
....@@ -1539,8 +2065,9 @@
15392065 }
15402066 }
15412067
1542
-int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
2068
+int f2fs_resize_fs(struct file *filp, __u64 block_count)
15432069 {
2070
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
15442071 __u64 old_block_count, shrunk_blocks;
15452072 struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
15462073 unsigned int secs;
....@@ -1578,26 +2105,51 @@
15782105 return -EINVAL;
15792106 }
15802107
2108
+ err = mnt_want_write_file(filp);
2109
+ if (err)
2110
+ return err;
2111
+
15812112 shrunk_blocks = old_block_count - block_count;
15822113 secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
15832114
15842115 /* stop other GC */
1585
- if (!down_write_trylock(&sbi->gc_lock))
1586
- return -EAGAIN;
2116
+ if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2117
+ err = -EAGAIN;
2118
+ goto out_drop_write;
2119
+ }
15872120
15882121 /* stop CP to protect MAIN_SEC in free_segment_range */
15892122 f2fs_lock_op(sbi);
2123
+
2124
+ spin_lock(&sbi->stat_lock);
2125
+ if (shrunk_blocks + valid_user_blocks(sbi) +
2126
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
2127
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
2128
+ err = -ENOSPC;
2129
+ spin_unlock(&sbi->stat_lock);
2130
+
2131
+ if (err)
2132
+ goto out_unlock;
2133
+
15902134 err = free_segment_range(sbi, secs, true);
2135
+
2136
+out_unlock:
15912137 f2fs_unlock_op(sbi);
1592
- up_write(&sbi->gc_lock);
2138
+ f2fs_up_write(&sbi->gc_lock);
2139
+out_drop_write:
2140
+ mnt_drop_write_file(filp);
15932141 if (err)
15942142 return err;
15952143
1596
- set_sbi_flag(sbi, SBI_IS_RESIZEFS);
1597
-
15982144 freeze_super(sbi->sb);
1599
- down_write(&sbi->gc_lock);
1600
- mutex_lock(&sbi->cp_mutex);
2145
+
2146
+ if (f2fs_readonly(sbi->sb)) {
2147
+ thaw_super(sbi->sb);
2148
+ return -EROFS;
2149
+ }
2150
+
2151
+ f2fs_down_write(&sbi->gc_lock);
2152
+ f2fs_down_write(&sbi->cp_global_sem);
16012153
16022154 spin_lock(&sbi->stat_lock);
16032155 if (shrunk_blocks + valid_user_blocks(sbi) +
....@@ -1610,6 +2162,7 @@
16102162 if (err)
16112163 goto out_err;
16122164
2165
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
16132166 err = free_segment_range(sbi, secs, false);
16142167 if (err)
16152168 goto recover_out;
....@@ -1633,6 +2186,7 @@
16332186 f2fs_commit_super(sbi, false);
16342187 }
16352188 recover_out:
2189
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
16362190 if (err) {
16372191 set_sbi_flag(sbi, SBI_NEED_FSCK);
16382192 f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
....@@ -1642,9 +2196,8 @@
16422196 spin_unlock(&sbi->stat_lock);
16432197 }
16442198 out_err:
1645
- mutex_unlock(&sbi->cp_mutex);
1646
- up_write(&sbi->gc_lock);
2199
+ f2fs_up_write(&sbi->cp_global_sem);
2200
+ f2fs_up_write(&sbi->gc_lock);
16472201 thaw_super(sbi->sb);
1648
- clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
16492202 return err;
16502203 }