forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/drivers/md/bcache/writeback.c
....@@ -17,6 +17,15 @@
1717 #include <linux/sched/clock.h>
1818 #include <trace/events/bcache.h>
1919
20
+static void update_gc_after_writeback(struct cache_set *c)
21
+{
22
+ if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) ||
23
+ c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD)
24
+ return;
25
+
26
+ c->gc_after_writeback |= BCH_DO_AUTO_GC;
27
+}
28
+
2029 /* Rate limiting */
2130 static uint64_t __calc_target_rate(struct cached_dev *dc)
2231 {
....@@ -26,7 +35,7 @@
2635 * This is the size of the cache, minus the amount used for
2736 * flash-only devices
2837 */
29
- uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size -
38
+ uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size -
3039 atomic_long_read(&c->flash_dev_dirty_sectors);
3140
3241 /*
....@@ -110,24 +119,65 @@
110119 dc->writeback_rate_target = target;
111120 }
112121
122
+static bool idle_counter_exceeded(struct cache_set *c)
123
+{
124
+ int counter, dev_nr;
125
+
126
+ /*
127
+ * If c->idle_counter is overflow (idel for really long time),
128
+ * reset as 0 and not set maximum rate this time for code
129
+ * simplicity.
130
+ */
131
+ counter = atomic_inc_return(&c->idle_counter);
132
+ if (counter <= 0) {
133
+ atomic_set(&c->idle_counter, 0);
134
+ return false;
135
+ }
136
+
137
+ dev_nr = atomic_read(&c->attached_dev_nr);
138
+ if (dev_nr == 0)
139
+ return false;
140
+
141
+ /*
142
+ * c->idle_counter is increased by writeback thread of all
143
+ * attached backing devices, in order to represent a rough
144
+ * time period, counter should be divided by dev_nr.
145
+ * Otherwise the idle time cannot be larger with more backing
146
+ * device attached.
147
+ * The following calculation equals to checking
148
+ * (counter / dev_nr) < (dev_nr * 6)
149
+ */
150
+ if (counter < (dev_nr * dev_nr * 6))
151
+ return false;
152
+
153
+ return true;
154
+}
155
+
156
+/*
157
+ * Idle_counter is increased every time when update_writeback_rate() is
158
+ * called. If all backing devices attached to the same cache set have
159
+ * identical dc->writeback_rate_update_seconds values, it is about 6
160
+ * rounds of update_writeback_rate() on each backing device before
161
+ * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
162
+ * to each dc->writeback_rate.rate.
163
+ * In order to avoid extra locking cost for counting exact dirty cached
164
+ * devices number, c->attached_dev_nr is used to calculate the idle
165
+ * throushold. It might be bigger if not all cached device are in write-
166
+ * back mode, but it still works well with limited extra rounds of
167
+ * update_writeback_rate().
168
+ */
113169 static bool set_at_max_writeback_rate(struct cache_set *c,
114170 struct cached_dev *dc)
115171 {
116
- /*
117
- * Idle_counter is increased everytime when update_writeback_rate() is
118
- * called. If all backing devices attached to the same cache set have
119
- * identical dc->writeback_rate_update_seconds values, it is about 6
120
- * rounds of update_writeback_rate() on each backing device before
121
- * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
122
- * to each dc->writeback_rate.rate.
123
- * In order to avoid extra locking cost for counting exact dirty cached
124
- * devices number, c->attached_dev_nr is used to calculate the idle
125
- * throushold. It might be bigger if not all cached device are in write-
126
- * back mode, but it still works well with limited extra rounds of
127
- * update_writeback_rate().
128
- */
129
- if (atomic_inc_return(&c->idle_counter) <
130
- atomic_read(&c->attached_dev_nr) * 6)
172
+ /* Don't sst max writeback rate if it is disabled */
173
+ if (!c->idle_max_writeback_rate_enabled)
174
+ return false;
175
+
176
+ /* Don't set max writeback rate if gc is running */
177
+ if (!c->gc_mark_valid)
178
+ return false;
179
+
180
+ if (!idle_counter_exceeded(c))
131181 return false;
132182
133183 if (atomic_read(&c->at_max_writeback_rate) != 1)
....@@ -141,13 +191,10 @@
141191 dc->writeback_rate_change = 0;
142192
143193 /*
144
- * Check c->idle_counter and c->at_max_writeback_rate agagain in case
145
- * new I/O arrives during before set_at_max_writeback_rate() returns.
146
- * Then the writeback rate is set to 1, and its new value should be
147
- * decided via __update_writeback_rate().
194
+ * In case new I/O arrives during before
195
+ * set_at_max_writeback_rate() returns.
148196 */
149
- if ((atomic_read(&c->idle_counter) <
150
- atomic_read(&c->attached_dev_nr) * 6) ||
197
+ if (!idle_counter_exceeded(c) ||
151198 !atomic_read(&c->at_max_writeback_rate))
152199 return false;
153200
....@@ -167,7 +214,7 @@
167214 */
168215 set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
169216 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
170
- smp_mb();
217
+ smp_mb__after_atomic();
171218
172219 /*
173220 * CACHE_SET_IO_DISABLE might be set via sysfs interface,
....@@ -177,7 +224,7 @@
177224 test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
178225 clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
179226 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
180
- smp_mb();
227
+ smp_mb__after_atomic();
181228 return;
182229 }
183230
....@@ -191,6 +238,7 @@
191238 if (!set_at_max_writeback_rate(c, dc)) {
192239 down_read(&dc->writeback_lock);
193240 __update_writeback_rate(dc);
241
+ update_gc_after_writeback(c);
194242 up_read(&dc->writeback_lock);
195243 }
196244 }
....@@ -212,7 +260,7 @@
212260 */
213261 clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
214262 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
215
- smp_mb();
263
+ smp_mb__after_atomic();
216264 }
217265
218266 static unsigned int writeback_delay(struct cached_dev *dc,
....@@ -442,10 +490,8 @@
442490 for (i = 0; i < nk; i++) {
443491 w = keys[i];
444492
445
- io = kzalloc(sizeof(struct dirty_io) +
446
- sizeof(struct bio_vec) *
447
- DIV_ROUND_UP(KEY_SIZE(&w->key),
448
- PAGE_SECTORS),
493
+ io = kzalloc(struct_size(io, bio.bi_inline_vecs,
494
+ DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)),
449495 GFP_KERNEL);
450496 if (!io)
451497 goto err;
....@@ -693,6 +739,23 @@
693739 up_write(&dc->writeback_lock);
694740 break;
695741 }
742
+
743
+ /*
744
+ * When dirty data rate is high (e.g. 50%+), there might
745
+ * be heavy buckets fragmentation after writeback
746
+ * finished, which hurts following write performance.
747
+ * If users really care about write performance they
748
+ * may set BCH_ENABLE_AUTO_GC via sysfs, then when
749
+ * BCH_DO_AUTO_GC is set, garbage collection thread
750
+ * will be wake up here. After moving gc, the shrunk
751
+ * btree and discarded free buckets SSD space may be
752
+ * helpful for following write requests.
753
+ */
754
+ if (c->gc_after_writeback ==
755
+ (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) {
756
+ c->gc_after_writeback &= ~BCH_DO_AUTO_GC;
757
+ force_wake_up_gc(c);
758
+ }
696759 }
697760
698761 up_write(&dc->writeback_lock);
....@@ -724,13 +787,11 @@
724787
725788 /* Init */
726789 #define INIT_KEYS_EACH_TIME 500000
727
-#define INIT_KEYS_SLEEP_MS 100
728790
729791 struct sectors_dirty_init {
730792 struct btree_op op;
731793 unsigned int inode;
732794 size_t count;
733
- struct bkey start;
734795 };
735796
736797 static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
....@@ -746,16 +807,15 @@
746807 KEY_START(k), KEY_SIZE(k));
747808
748809 op->count++;
749
- if (atomic_read(&b->c->search_inflight) &&
750
- !(op->count % INIT_KEYS_EACH_TIME)) {
751
- bkey_copy_key(&op->start, k);
752
- return -EAGAIN;
753
- }
810
+ if (!(op->count % INIT_KEYS_EACH_TIME))
811
+ cond_resched();
754812
755813 return MAP_CONTINUE;
756814 }
757815
758
-void bch_sectors_dirty_init(struct bcache_device *d)
816
+static int bch_root_node_dirty_init(struct cache_set *c,
817
+ struct bcache_device *d,
818
+ struct bkey *k)
759819 {
760820 struct sectors_dirty_init op;
761821 int ret;
....@@ -763,19 +823,158 @@
763823 bch_btree_op_init(&op.op, -1);
764824 op.inode = d->id;
765825 op.count = 0;
766
- op.start = KEY(op.inode, 0, 0);
767826
768
- do {
769
- ret = bch_btree_map_keys(&op.op, d->c, &op.start,
770
- sectors_dirty_init_fn, 0);
771
- if (ret == -EAGAIN)
772
- schedule_timeout_interruptible(
773
- msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
774
- else if (ret < 0) {
775
- pr_warn("sectors dirty init failed, ret=%d!", ret);
776
- break;
827
+ ret = bcache_btree(map_keys_recurse,
828
+ k,
829
+ c->root,
830
+ &op.op,
831
+ &KEY(op.inode, 0, 0),
832
+ sectors_dirty_init_fn,
833
+ 0);
834
+ if (ret < 0)
835
+ pr_warn("sectors dirty init failed, ret=%d!\n", ret);
836
+
837
+ /*
838
+ * The op may be added to cache_set's btree_cache_wait
839
+ * in mca_cannibalize(), must ensure it is removed from
840
+ * the list and release btree_cache_alloc_lock before
841
+ * free op memory.
842
+ * Otherwise, the btree_cache_wait will be damaged.
843
+ */
844
+ bch_cannibalize_unlock(c);
845
+ finish_wait(&c->btree_cache_wait, &(&op.op)->wait);
846
+
847
+ return ret;
848
+}
849
+
850
+static int bch_dirty_init_thread(void *arg)
851
+{
852
+ struct dirty_init_thrd_info *info = arg;
853
+ struct bch_dirty_init_state *state = info->state;
854
+ struct cache_set *c = state->c;
855
+ struct btree_iter iter;
856
+ struct bkey *k, *p;
857
+ int cur_idx, prev_idx, skip_nr;
858
+
859
+ k = p = NULL;
860
+ cur_idx = prev_idx = 0;
861
+
862
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
863
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
864
+ BUG_ON(!k);
865
+
866
+ p = k;
867
+
868
+ while (k) {
869
+ spin_lock(&state->idx_lock);
870
+ cur_idx = state->key_idx;
871
+ state->key_idx++;
872
+ spin_unlock(&state->idx_lock);
873
+
874
+ skip_nr = cur_idx - prev_idx;
875
+
876
+ while (skip_nr) {
877
+ k = bch_btree_iter_next_filter(&iter,
878
+ &c->root->keys,
879
+ bch_ptr_bad);
880
+ if (k)
881
+ p = k;
882
+ else {
883
+ atomic_set(&state->enough, 1);
884
+ /* Update state->enough earlier */
885
+ smp_mb__after_atomic();
886
+ goto out;
887
+ }
888
+ skip_nr--;
777889 }
778
- } while (ret == -EAGAIN);
890
+
891
+ if (p) {
892
+ if (bch_root_node_dirty_init(c, state->d, p) < 0)
893
+ goto out;
894
+ }
895
+
896
+ p = NULL;
897
+ prev_idx = cur_idx;
898
+ }
899
+
900
+out:
901
+ /* In order to wake up state->wait in time */
902
+ smp_mb__before_atomic();
903
+ if (atomic_dec_and_test(&state->started))
904
+ wake_up(&state->wait);
905
+
906
+ return 0;
907
+}
908
+
909
+static int bch_btre_dirty_init_thread_nr(void)
910
+{
911
+ int n = num_online_cpus()/2;
912
+
913
+ if (n == 0)
914
+ n = 1;
915
+ else if (n > BCH_DIRTY_INIT_THRD_MAX)
916
+ n = BCH_DIRTY_INIT_THRD_MAX;
917
+
918
+ return n;
919
+}
920
+
921
+void bch_sectors_dirty_init(struct bcache_device *d)
922
+{
923
+ int i;
924
+ struct bkey *k = NULL;
925
+ struct btree_iter iter;
926
+ struct sectors_dirty_init op;
927
+ struct cache_set *c = d->c;
928
+ struct bch_dirty_init_state state;
929
+
930
+ /* Just count root keys if no leaf node */
931
+ rw_lock(0, c->root, c->root->level);
932
+ if (c->root->level == 0) {
933
+ bch_btree_op_init(&op.op, -1);
934
+ op.inode = d->id;
935
+ op.count = 0;
936
+
937
+ for_each_key_filter(&c->root->keys,
938
+ k, &iter, bch_ptr_invalid)
939
+ sectors_dirty_init_fn(&op.op, c->root, k);
940
+
941
+ rw_unlock(0, c->root);
942
+ return;
943
+ }
944
+
945
+ memset(&state, 0, sizeof(struct bch_dirty_init_state));
946
+ state.c = c;
947
+ state.d = d;
948
+ state.total_threads = bch_btre_dirty_init_thread_nr();
949
+ state.key_idx = 0;
950
+ spin_lock_init(&state.idx_lock);
951
+ atomic_set(&state.started, 0);
952
+ atomic_set(&state.enough, 0);
953
+ init_waitqueue_head(&state.wait);
954
+
955
+ for (i = 0; i < state.total_threads; i++) {
956
+ /* Fetch latest state.enough earlier */
957
+ smp_mb__before_atomic();
958
+ if (atomic_read(&state.enough))
959
+ break;
960
+
961
+ state.infos[i].state = &state;
962
+ state.infos[i].thread =
963
+ kthread_run(bch_dirty_init_thread, &state.infos[i],
964
+ "bch_dirtcnt[%d]", i);
965
+ if (IS_ERR(state.infos[i].thread)) {
966
+ pr_err("fails to run thread bch_dirty_init[%d]\n", i);
967
+ for (--i; i >= 0; i--)
968
+ kthread_stop(state.infos[i].thread);
969
+ goto out;
970
+ }
971
+ atomic_inc(&state.started);
972
+ }
973
+
974
+out:
975
+ /* Must wait for all threads to stop. */
976
+ wait_event(state.wait, atomic_read(&state.started) == 0);
977
+ rw_unlock(0, c->root);
779978 }
780979
781980 void bch_cached_dev_writeback_init(struct cached_dev *dc)