~hc/RK356X_SDK_RELEASE.git

..	..	@@ -17,6 +17,15 @@
17	17	#include <linux/sched/clock.h>
18	18	#include <trace/events/bcache.h>
19	19
	20	+static void update_gc_after_writeback(struct cache_set *c)
	21	+{
	22	+ if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) \|\|
	23	+ c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD)
	24	+ return;
	25	+
	26	+ c->gc_after_writeback \|= BCH_DO_AUTO_GC;
	27	+}
	28	+
20	29	/* Rate limiting */
21	30	static uint64_t __calc_target_rate(struct cached_dev *dc)
22	31	{
..	..	@@ -26,7 +35,7 @@
26	35	* This is the size of the cache, minus the amount used for
27	36	* flash-only devices
28	37	*/
29		- uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size -
	38	+ uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size -
30	39	atomic_long_read(&c->flash_dev_dirty_sectors);
31	40
32	41	/*
..	..	@@ -110,24 +119,65 @@
110	119	dc->writeback_rate_target = target;
111	120	}
112	121
	122	+static bool idle_counter_exceeded(struct cache_set *c)
	123	+{
	124	+ int counter, dev_nr;
	125	+
	126	+ /*
	127	+ * If c->idle_counter is overflow (idel for really long time),
	128	+ * reset as 0 and not set maximum rate this time for code
	129	+ * simplicity.
	130	+ */
	131	+ counter = atomic_inc_return(&c->idle_counter);
	132	+ if (counter <= 0) {
	133	+ atomic_set(&c->idle_counter, 0);
	134	+ return false;
	135	+ }
	136	+
	137	+ dev_nr = atomic_read(&c->attached_dev_nr);
	138	+ if (dev_nr == 0)
	139	+ return false;
	140	+
	141	+ /*
	142	+ * c->idle_counter is increased by writeback thread of all
	143	+ * attached backing devices, in order to represent a rough
	144	+ * time period, counter should be divided by dev_nr.
	145	+ * Otherwise the idle time cannot be larger with more backing
	146	+ * device attached.
	147	+ * The following calculation equals to checking
	148	+ * (counter / dev_nr) < (dev_nr * 6)
	149	+ */
	150	+ if (counter < (dev_nr * dev_nr * 6))
	151	+ return false;
	152	+
	153	+ return true;
	154	+}
	155	+
	156	+/*
	157	+ * Idle_counter is increased every time when update_writeback_rate() is
	158	+ * called. If all backing devices attached to the same cache set have
	159	+ * identical dc->writeback_rate_update_seconds values, it is about 6
	160	+ * rounds of update_writeback_rate() on each backing device before
	161	+ * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
	162	+ * to each dc->writeback_rate.rate.
	163	+ * In order to avoid extra locking cost for counting exact dirty cached
	164	+ * devices number, c->attached_dev_nr is used to calculate the idle
	165	+ * throushold. It might be bigger if not all cached device are in write-
	166	+ * back mode, but it still works well with limited extra rounds of
	167	+ * update_writeback_rate().
	168	+ */
113	169	static bool set_at_max_writeback_rate(struct cache_set *c,
114	170	struct cached_dev *dc)
115	171	{
116		- /*
117		- * Idle_counter is increased everytime when update_writeback_rate() is
118		- * called. If all backing devices attached to the same cache set have
119		- * identical dc->writeback_rate_update_seconds values, it is about 6
120		- * rounds of update_writeback_rate() on each backing device before
121		- * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
122		- * to each dc->writeback_rate.rate.
123		- * In order to avoid extra locking cost for counting exact dirty cached
124		- * devices number, c->attached_dev_nr is used to calculate the idle
125		- * throushold. It might be bigger if not all cached device are in write-
126		- * back mode, but it still works well with limited extra rounds of
127		- * update_writeback_rate().
128		- */
129		- if (atomic_inc_return(&c->idle_counter) <
130		- atomic_read(&c->attached_dev_nr) * 6)
	172	+ /* Don't sst max writeback rate if it is disabled */
	173	+ if (!c->idle_max_writeback_rate_enabled)
	174	+ return false;
	175	+
	176	+ /* Don't set max writeback rate if gc is running */
	177	+ if (!c->gc_mark_valid)
	178	+ return false;
	179	+
	180	+ if (!idle_counter_exceeded(c))
131	181	return false;
132	182
133	183	if (atomic_read(&c->at_max_writeback_rate) != 1)
..	..	@@ -141,13 +191,10 @@
141	191	dc->writeback_rate_change = 0;
142	192
143	193	/*
144		- * Check c->idle_counter and c->at_max_writeback_rate agagain in case
145		- * new I/O arrives during before set_at_max_writeback_rate() returns.
146		- * Then the writeback rate is set to 1, and its new value should be
147		- * decided via __update_writeback_rate().
	194	+ * In case new I/O arrives during before
	195	+ * set_at_max_writeback_rate() returns.
148	196	*/
149		- if ((atomic_read(&c->idle_counter) <
150		- atomic_read(&c->attached_dev_nr) * 6) \|\|
	197	+ if (!idle_counter_exceeded(c) \|\|
151	198	!atomic_read(&c->at_max_writeback_rate))
152	199	return false;
153	200
..	..	@@ -167,7 +214,7 @@
167	214	*/
168	215	set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
169	216	/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
170		- smp_mb();
	217	+ smp_mb__after_atomic();
171	218
172	219	/*
173	220	* CACHE_SET_IO_DISABLE might be set via sysfs interface,
..	..	@@ -177,7 +224,7 @@
177	224	test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
178	225	clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
179	226	/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
180		- smp_mb();
	227	+ smp_mb__after_atomic();
181	228	return;
182	229	}
183	230
..	..	@@ -191,6 +238,7 @@
191	238	if (!set_at_max_writeback_rate(c, dc)) {
192	239	down_read(&dc->writeback_lock);
193	240	__update_writeback_rate(dc);
	241	+ update_gc_after_writeback(c);
194	242	up_read(&dc->writeback_lock);
195	243	}
196	244	}
..	..	@@ -212,7 +260,7 @@
212	260	*/
213	261	clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
214	262	/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
215		- smp_mb();
	263	+ smp_mb__after_atomic();
216	264	}
217	265
218	266	static unsigned int writeback_delay(struct cached_dev *dc,
..	..	@@ -442,10 +490,8 @@
442	490	for (i = 0; i < nk; i++) {
443	491	w = keys[i];
444	492
445		- io = kzalloc(sizeof(struct dirty_io) +
446		- sizeof(struct bio_vec) *
447		- DIV_ROUND_UP(KEY_SIZE(&w->key),
448		- PAGE_SECTORS),
	493	+ io = kzalloc(struct_size(io, bio.bi_inline_vecs,
	494	+ DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)),
449	495	GFP_KERNEL);
450	496	if (!io)
451	497	goto err;
..	..	@@ -693,6 +739,23 @@
693	739	up_write(&dc->writeback_lock);
694	740	break;
695	741	}
	742	+
	743	+ /*
	744	+ * When dirty data rate is high (e.g. 50%+), there might
	745	+ * be heavy buckets fragmentation after writeback
	746	+ * finished, which hurts following write performance.
	747	+ * If users really care about write performance they
	748	+ * may set BCH_ENABLE_AUTO_GC via sysfs, then when
	749	+ * BCH_DO_AUTO_GC is set, garbage collection thread
	750	+ * will be wake up here. After moving gc, the shrunk
	751	+ * btree and discarded free buckets SSD space may be
	752	+ * helpful for following write requests.
	753	+ */
	754	+ if (c->gc_after_writeback ==
	755	+ (BCH_ENABLE_AUTO_GC\|BCH_DO_AUTO_GC)) {
	756	+ c->gc_after_writeback &= ~BCH_DO_AUTO_GC;
	757	+ force_wake_up_gc(c);
	758	+ }
696	759	}
697	760
698	761	up_write(&dc->writeback_lock);
..	..	@@ -724,13 +787,11 @@
724	787
725	788	/* Init */
726	789	#define INIT_KEYS_EACH_TIME 500000
727		-#define INIT_KEYS_SLEEP_MS 100
728	790
729	791	struct sectors_dirty_init {
730	792	struct btree_op op;
731	793	unsigned int inode;
732	794	size_t count;
733		- struct bkey start;
734	795	};
735	796
736	797	static int sectors_dirty_init_fn(struct btree_op _op, struct btree b,
..	..	@@ -746,16 +807,15 @@
746	807	KEY_START(k), KEY_SIZE(k));
747	808
748	809	op->count++;
749		- if (atomic_read(&b->c->search_inflight) &&
750		- !(op->count % INIT_KEYS_EACH_TIME)) {
751		- bkey_copy_key(&op->start, k);
752		- return -EAGAIN;
753		- }
	810	+ if (!(op->count % INIT_KEYS_EACH_TIME))
	811	+ cond_resched();
754	812
755	813	return MAP_CONTINUE;
756	814	}
757	815
758		-void bch_sectors_dirty_init(struct bcache_device *d)
	816	+static int bch_root_node_dirty_init(struct cache_set *c,
	817	+ struct bcache_device *d,
	818	+ struct bkey *k)
759	819	{
760	820	struct sectors_dirty_init op;
761	821	int ret;
..	..	@@ -763,19 +823,148 @@
763	823	bch_btree_op_init(&op.op, -1);
764	824	op.inode = d->id;
765	825	op.count = 0;
766		- op.start = KEY(op.inode, 0, 0);
767	826
768		- do {
769		- ret = bch_btree_map_keys(&op.op, d->c, &op.start,
770		- sectors_dirty_init_fn, 0);
771		- if (ret == -EAGAIN)
772		- schedule_timeout_interruptible(
773		- msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
774		- else if (ret < 0) {
775		- pr_warn("sectors dirty init failed, ret=%d!", ret);
776		- break;
	827	+ ret = bcache_btree(map_keys_recurse,
	828	+ k,
	829	+ c->root,
	830	+ &op.op,
	831	+ &KEY(op.inode, 0, 0),
	832	+ sectors_dirty_init_fn,
	833	+ 0);
	834	+ if (ret < 0)
	835	+ pr_warn("sectors dirty init failed, ret=%d!\n", ret);
	836	+
	837	+ return ret;
	838	+}
	839	+
	840	+static int bch_dirty_init_thread(void *arg)
	841	+{
	842	+ struct dirty_init_thrd_info *info = arg;
	843	+ struct bch_dirty_init_state *state = info->state;
	844	+ struct cache_set *c = state->c;
	845	+ struct btree_iter iter;
	846	+ struct bkey k, p;
	847	+ int cur_idx, prev_idx, skip_nr;
	848	+
	849	+ k = p = NULL;
	850	+ cur_idx = prev_idx = 0;
	851	+
	852	+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
	853	+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
	854	+ BUG_ON(!k);
	855	+
	856	+ p = k;
	857	+
	858	+ while (k) {
	859	+ spin_lock(&state->idx_lock);
	860	+ cur_idx = state->key_idx;
	861	+ state->key_idx++;
	862	+ spin_unlock(&state->idx_lock);
	863	+
	864	+ skip_nr = cur_idx - prev_idx;
	865	+
	866	+ while (skip_nr) {
	867	+ k = bch_btree_iter_next_filter(&iter,
	868	+ &c->root->keys,
	869	+ bch_ptr_bad);
	870	+ if (k)
	871	+ p = k;
	872	+ else {
	873	+ atomic_set(&state->enough, 1);
	874	+ /* Update state->enough earlier */
	875	+ smp_mb__after_atomic();
	876	+ goto out;
	877	+ }
	878	+ skip_nr--;
777	879	}
778		- } while (ret == -EAGAIN);
	880	+
	881	+ if (p) {
	882	+ if (bch_root_node_dirty_init(c, state->d, p) < 0)
	883	+ goto out;
	884	+ }
	885	+
	886	+ p = NULL;
	887	+ prev_idx = cur_idx;
	888	+ }
	889	+
	890	+out:
	891	+ /* In order to wake up state->wait in time */
	892	+ smp_mb__before_atomic();
	893	+ if (atomic_dec_and_test(&state->started))
	894	+ wake_up(&state->wait);
	895	+
	896	+ return 0;
	897	+}
	898	+
	899	+static int bch_btre_dirty_init_thread_nr(void)
	900	+{
	901	+ int n = num_online_cpus()/2;
	902	+
	903	+ if (n == 0)
	904	+ n = 1;
	905	+ else if (n > BCH_DIRTY_INIT_THRD_MAX)
	906	+ n = BCH_DIRTY_INIT_THRD_MAX;
	907	+
	908	+ return n;
	909	+}
	910	+
	911	+void bch_sectors_dirty_init(struct bcache_device *d)
	912	+{
	913	+ int i;
	914	+ struct bkey *k = NULL;
	915	+ struct btree_iter iter;
	916	+ struct sectors_dirty_init op;
	917	+ struct cache_set *c = d->c;
	918	+ struct bch_dirty_init_state state;
	919	+
	920	+ /* Just count root keys if no leaf node */
	921	+ rw_lock(0, c->root, c->root->level);
	922	+ if (c->root->level == 0) {
	923	+ bch_btree_op_init(&op.op, -1);
	924	+ op.inode = d->id;
	925	+ op.count = 0;
	926	+
	927	+ for_each_key_filter(&c->root->keys,
	928	+ k, &iter, bch_ptr_invalid)
	929	+ sectors_dirty_init_fn(&op.op, c->root, k);
	930	+
	931	+ rw_unlock(0, c->root);
	932	+ return;
	933	+ }
	934	+
	935	+ memset(&state, 0, sizeof(struct bch_dirty_init_state));
	936	+ state.c = c;
	937	+ state.d = d;
	938	+ state.total_threads = bch_btre_dirty_init_thread_nr();
	939	+ state.key_idx = 0;
	940	+ spin_lock_init(&state.idx_lock);
	941	+ atomic_set(&state.started, 0);
	942	+ atomic_set(&state.enough, 0);
	943	+ init_waitqueue_head(&state.wait);
	944	+
	945	+ for (i = 0; i < state.total_threads; i++) {
	946	+ /* Fetch latest state.enough earlier */
	947	+ smp_mb__before_atomic();
	948	+ if (atomic_read(&state.enough))
	949	+ break;
	950	+
	951	+ state.infos[i].state = &state;
	952	+ state.infos[i].thread =
	953	+ kthread_run(bch_dirty_init_thread, &state.infos[i],
	954	+ "bch_dirtcnt[%d]", i);
	955	+ if (IS_ERR(state.infos[i].thread)) {
	956	+ pr_err("fails to run thread bch_dirty_init[%d]\n", i);
	957	+ for (--i; i >= 0; i--)
	958	+ kthread_stop(state.infos[i].thread);
	959	+ goto out;
	960	+ }
	961	+ atomic_inc(&state.started);
	962	+ }
	963	+
	964	+out:
	965	+ /* Must wait for all threads to stop. */
	966	+ wait_event(state.wait, atomic_read(&state.started) == 0);
	967	+ rw_unlock(0, c->root);
779	968	}
780	969
781	970	void bch_cached_dev_writeback_init(struct cached_dev *dc)