~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0
1	2	/*
2	3	* Block rq-qos base io controller
3	4	*
..	..	@@ -85,17 +86,22 @@
85	86	struct blk_iolatency {
86	87	struct rq_qos rqos;
87	88	struct timer_list timer;
88		- atomic_t enabled;
	89	+
	90	+ /*
	91	+ * ->enabled is the master enable switch gating the throttling logic and
	92	+ * inflight tracking. The number of cgroups which have iolat enabled is
	93	+ * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
	94	+ * from ->enable_work with the request_queue frozen. For details, See
	95	+ * blkiolatency_enable_work_fn().
	96	+ */
	97	+ bool enabled;
	98	+ atomic_t enable_cnt;
	99	+ struct work_struct enable_work;
89	100	};
90	101
91	102	static inline struct blk_iolatency BLKIOLATENCY(struct rq_qos rqos)
92	103	{
93	104	return container_of(rqos, struct blk_iolatency, rqos);
94		-}
95		-
96		-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
97		-{
98		- return atomic_read(&blkiolat->enabled) > 0;
99	105	}
100	106
101	107	struct child_latency_info {
..	..	@@ -117,9 +123,22 @@
117	123	atomic_t scale_cookie;
118	124	};
119	125
	126	+struct percentile_stats {
	127	+ u64 total;
	128	+ u64 missed;
	129	+};
	130	+
	131	+struct latency_stat {
	132	+ union {
	133	+ struct percentile_stats ps;
	134	+ struct blk_rq_stat rqs;
	135	+ };
	136	+};
	137	+
120	138	struct iolatency_grp {
121	139	struct blkg_policy_data pd;
122		- struct blk_rq_stat __percpu *stats;
	140	+ struct latency_stat __percpu *stats;
	141	+ struct latency_stat cur_stat;
123	142	struct blk_iolatency *blkiolat;
124	143	struct rq_depth rq_depth;
125	144	struct rq_wait rq_wait;
..	..	@@ -134,6 +153,7 @@
134	153	/* Our current number of IO's for the last summation. */
135	154	u64 nr_samples;
136	155
	156	+ bool ssd;
137	157	struct child_latency_info child_lat;
138	158	};
139	159
..	..	@@ -174,29 +194,101 @@
174	194	return pd_to_blkg(&iolat->pd);
175	195	}
176	196
177		-static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
178		- wait_queue_entry_t *wait,
179		- bool first_block)
	197	+static inline void latency_stat_init(struct iolatency_grp *iolat,
	198	+ struct latency_stat *stat)
180	199	{
181		- struct rq_wait *rqw = &iolat->rq_wait;
	200	+ if (iolat->ssd) {
	201	+ stat->ps.total = 0;
	202	+ stat->ps.missed = 0;
	203	+ } else
	204	+ blk_rq_stat_init(&stat->rqs);
	205	+}
182	206
183		- if (first_block && waitqueue_active(&rqw->wait) &&
184		- rqw->wait.head.next != &wait->entry)
185		- return false;
	207	+static inline void latency_stat_sum(struct iolatency_grp *iolat,
	208	+ struct latency_stat *sum,
	209	+ struct latency_stat *stat)
	210	+{
	211	+ if (iolat->ssd) {
	212	+ sum->ps.total += stat->ps.total;
	213	+ sum->ps.missed += stat->ps.missed;
	214	+ } else
	215	+ blk_rq_stat_sum(&sum->rqs, &stat->rqs);
	216	+}
	217	+
	218	+static inline void latency_stat_record_time(struct iolatency_grp *iolat,
	219	+ u64 req_time)
	220	+{
	221	+ struct latency_stat *stat = get_cpu_ptr(iolat->stats);
	222	+ if (iolat->ssd) {
	223	+ if (req_time >= iolat->min_lat_nsec)
	224	+ stat->ps.missed++;
	225	+ stat->ps.total++;
	226	+ } else
	227	+ blk_rq_stat_add(&stat->rqs, req_time);
	228	+ put_cpu_ptr(stat);
	229	+}
	230	+
	231	+static inline bool latency_sum_ok(struct iolatency_grp *iolat,
	232	+ struct latency_stat *stat)
	233	+{
	234	+ if (iolat->ssd) {
	235	+ u64 thresh = div64_u64(stat->ps.total, 10);
	236	+ thresh = max(thresh, 1ULL);
	237	+ return stat->ps.missed < thresh;
	238	+ }
	239	+ return stat->rqs.mean <= iolat->min_lat_nsec;
	240	+}
	241	+
	242	+static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
	243	+ struct latency_stat *stat)
	244	+{
	245	+ if (iolat->ssd)
	246	+ return stat->ps.total;
	247	+ return stat->rqs.nr_samples;
	248	+}
	249	+
	250	+static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
	251	+ struct latency_stat *stat)
	252	+{
	253	+ int exp_idx;
	254	+
	255	+ if (iolat->ssd)
	256	+ return;
	257	+
	258	+ /*
	259	+ * calc_load() takes in a number stored in fixed point representation.
	260	+ * Because we are using this for IO time in ns, the values stored
	261	+ * are significantly larger than the FIXED_1 denominator (2048).
	262	+ * Therefore, rounding errors in the calculation are negligible and
	263	+ * can be ignored.
	264	+ */
	265	+ exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
	266	+ div64_u64(iolat->cur_win_nsec,
	267	+ BLKIOLATENCY_EXP_BUCKET_SIZE));
	268	+ iolat->lat_avg = calc_load(iolat->lat_avg,
	269	+ iolatency_exp_factors[exp_idx],
	270	+ stat->rqs.mean);
	271	+}
	272	+
	273	+static void iolat_cleanup_cb(struct rq_wait rqw, void private_data)
	274	+{
	275	+ atomic_dec(&rqw->inflight);
	276	+ wake_up(&rqw->wait);
	277	+}
	278	+
	279	+static bool iolat_acquire_inflight(struct rq_wait rqw, void private_data)
	280	+{
	281	+ struct iolatency_grp *iolat = private_data;
186	282	return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
187	283	}
188	284
189	285	static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
190	286	struct iolatency_grp *iolat,
191		- spinlock_t *lock, bool issue_as_root,
	287	+ bool issue_as_root,
192	288	bool use_memdelay)
193		- __releases(lock)
194		- __acquires(lock)
195	289	{
196	290	struct rq_wait *rqw = &iolat->rq_wait;
197	291	unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
198		- DEFINE_WAIT(wait);
199		- bool first_block = true;
200	292
201	293	if (use_delay)
202	294	blkcg_schedule_throttle(rqos->q, use_memdelay);
..	..	@@ -213,27 +305,7 @@
213	305	return;
214	306	}
215	307
216		- if (iolatency_may_queue(iolat, &wait, first_block))
217		- return;
218		-
219		- do {
220		- prepare_to_wait_exclusive(&rqw->wait, &wait,
221		- TASK_UNINTERRUPTIBLE);
222		-
223		- if (iolatency_may_queue(iolat, &wait, first_block))
224		- break;
225		- first_block = false;
226		-
227		- if (lock) {
228		- spin_unlock_irq(lock);
229		- io_schedule();
230		- spin_lock_irq(lock);
231		- } else {
232		- io_schedule();
233		- }
234		- } while (1);
235		-
236		- finish_wait(&rqw->wait, &wait);
	308	+ rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
237	309	}
238	310
239	311	#define SCALE_DOWN_FACTOR 2
..	..	@@ -257,7 +329,7 @@
257	329	struct child_latency_info *lat_info,
258	330	bool up)
259	331	{
260		- unsigned long qd = blk_queue_depth(blkiolat->rqos.q);
	332	+ unsigned long qd = blkiolat->rqos.q->nr_requests;
261	333	unsigned long scale = scale_amount(qd, up);
262	334	unsigned long old = atomic_read(&lat_info->scale_cookie);
263	335	unsigned long max_scale = qd << 1;
..	..	@@ -297,10 +369,9 @@
297	369	*/
298	370	static void scale_change(struct iolatency_grp *iolat, bool up)
299	371	{
300		- unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q);
	372	+ unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
301	373	unsigned long scale = scale_amount(qd, up);
302	374	unsigned long old = iolat->rq_depth.max_depth;
303		- bool changed = false;
304	375
305	376	if (old > qd)
306	377	old = qd;
..	..	@@ -310,15 +381,13 @@
310	381	return;
311	382
312	383	if (old < qd) {
313		- changed = true;
314	384	old += scale;
315	385	old = min(old, qd);
316	386	iolat->rq_depth.max_depth = old;
317	387	wake_up_all(&iolat->rq_wait.wait);
318	388	}
319		- } else if (old > 1) {
	389	+ } else {
320	390	old >>= 1;
321		- changed = true;
322	391	iolat->rq_depth.max_depth = max(old, 1UL);
323	392	}
324	393	}
..	..	@@ -371,7 +440,7 @@
371	440	* scale down event.
372	441	*/
373	442	samples_thresh = lat_info->nr_samples * 5;
374		- samples_thresh = div64_u64(samples_thresh, 100);
	443	+ samples_thresh = max(1ULL, div64_u64(samples_thresh, 100));
375	444	if (iolat->nr_samples <= samples_thresh)
376	445	return;
377	446	}
..	..	@@ -393,38 +462,15 @@
393	462	scale_change(iolat, direction > 0);
394	463	}
395	464
396		-static void blkcg_iolatency_throttle(struct rq_qos rqos, struct bio bio,
397		- spinlock_t *lock)
	465	+static void blkcg_iolatency_throttle(struct rq_qos rqos, struct bio bio)
398	466	{
399	467	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
400		- struct blkcg *blkcg;
401		- struct blkcg_gq *blkg;
402		- struct request_queue *q = rqos->q;
	468	+ struct blkcg_gq *blkg = bio->bi_blkg;
403	469	bool issue_as_root = bio_issue_as_root_blkg(bio);
404	470
405		- if (!blk_iolatency_enabled(blkiolat))
	471	+ if (!blkiolat->enabled)
406	472	return;
407	473
408		- rcu_read_lock();
409		- blkcg = bio_blkcg(bio);
410		- bio_associate_blkcg(bio, &blkcg->css);
411		- blkg = blkg_lookup(blkcg, q);
412		- if (unlikely(!blkg)) {
413		- if (!lock)
414		- spin_lock_irq(q->queue_lock);
415		- blkg = blkg_lookup_create(blkcg, q);
416		- if (IS_ERR(blkg))
417		- blkg = NULL;
418		- if (!lock)
419		- spin_unlock_irq(q->queue_lock);
420		- }
421		- if (!blkg)
422		- goto out;
423		-
424		- bio_issue_init(&bio->bi_issue, bio_sectors(bio));
425		- bio_associate_blkg(bio, blkg);
426		-out:
427		- rcu_read_unlock();
428	474	while (blkg && blkg->parent) {
429	475	struct iolatency_grp *iolat = blkg_to_lat(blkg);
430	476	if (!iolat) {
..	..	@@ -433,7 +479,7 @@
433	479	}
434	480
435	481	check_scale_change(iolat);
436		- __blkcg_iolatency_throttle(rqos, iolat, lock, issue_as_root,
	482	+ __blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
437	483	(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
438	484	blkg = blkg->parent;
439	485	}
..	..	@@ -445,7 +491,6 @@
445	491	struct bio_issue *issue, u64 now,
446	492	bool issue_as_root)
447	493	{
448		- struct blk_rq_stat *rq_stat;
449	494	u64 start = bio_issue_time(issue);
450	495	u64 req_time;
451	496
..	..	@@ -471,9 +516,7 @@
471	516	return;
472	517	}
473	518
474		- rq_stat = get_cpu_ptr(iolat->stats);
475		- blk_rq_stat_add(rq_stat, req_time);
476		- put_cpu_ptr(rq_stat);
	519	+ latency_stat_record_time(iolat, req_time);
477	520	}
478	521
479	522	#define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
..	..	@@ -484,17 +527,17 @@
484	527	struct blkcg_gq *blkg = lat_to_blkg(iolat);
485	528	struct iolatency_grp *parent;
486	529	struct child_latency_info *lat_info;
487		- struct blk_rq_stat stat;
	530	+ struct latency_stat stat;
488	531	unsigned long flags;
489		- int cpu, exp_idx;
	532	+ int cpu;
490	533
491		- blk_rq_stat_init(&stat);
	534	+ latency_stat_init(iolat, &stat);
492	535	preempt_disable();
493	536	for_each_online_cpu(cpu) {
494		- struct blk_rq_stat *s;
	537	+ struct latency_stat *s;
495	538	s = per_cpu_ptr(iolat->stats, cpu);
496		- blk_rq_stat_sum(&stat, s);
497		- blk_rq_stat_init(s);
	539	+ latency_stat_sum(iolat, &stat, s);
	540	+ latency_stat_init(iolat, s);
498	541	}
499	542	preempt_enable();
500	543
..	..	@@ -504,43 +547,36 @@
504	547
505	548	lat_info = &parent->child_lat;
506	549
507		- /*
508		- * calc_load() takes in a number stored in fixed point representation.
509		- * Because we are using this for IO time in ns, the values stored
510		- * are significantly larger than the FIXED_1 denominator (2048).
511		- * Therefore, rounding errors in the calculation are negligible and
512		- * can be ignored.
513		- */
514		- exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
515		- div64_u64(iolat->cur_win_nsec,
516		- BLKIOLATENCY_EXP_BUCKET_SIZE));
517		- iolat->lat_avg = calc_load(iolat->lat_avg,
518		- iolatency_exp_factors[exp_idx],
519		- stat.mean);
	550	+ iolat_update_total_lat_avg(iolat, &stat);
520	551
521	552	/* Everything is ok and we don't need to adjust the scale. */
522		- if (stat.mean <= iolat->min_lat_nsec &&
	553	+ if (latency_sum_ok(iolat, &stat) &&
523	554	atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
524	555	return;
525	556
526	557	/* Somebody beat us to the punch, just bail. */
527	558	spin_lock_irqsave(&lat_info->lock, flags);
	559	+
	560	+ latency_stat_sum(iolat, &iolat->cur_stat, &stat);
528	561	lat_info->nr_samples -= iolat->nr_samples;
529		- lat_info->nr_samples += stat.nr_samples;
530		- iolat->nr_samples = stat.nr_samples;
	562	+ lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat);
	563	+ iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat);
531	564
532	565	if ((lat_info->last_scale_event >= now \|\|
533		- now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
534		- lat_info->scale_lat <= iolat->min_lat_nsec)
	566	+ now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME))
535	567	goto out;
536	568
537		- if (stat.mean <= iolat->min_lat_nsec &&
538		- stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
	569	+ if (latency_sum_ok(iolat, &iolat->cur_stat) &&
	570	+ latency_sum_ok(iolat, &stat)) {
	571	+ if (latency_stat_samples(iolat, &iolat->cur_stat) <
	572	+ BLKIOLATENCY_MIN_GOOD_SAMPLES)
	573	+ goto out;
539	574	if (lat_info->scale_grp == iolat) {
540	575	lat_info->last_scale_event = now;
541	576	scale_cookie_change(iolat->blkiolat, lat_info, true);
542	577	}
543		- } else if (stat.mean > iolat->min_lat_nsec) {
	578	+ } else if (lat_info->scale_lat == 0 \|\|
	579	+ lat_info->scale_lat >= iolat->min_lat_nsec) {
544	580	lat_info->last_scale_event = now;
545	581	if (!lat_info->scale_grp \|\|
546	582	lat_info->scale_lat > iolat->min_lat_nsec) {
..	..	@@ -549,6 +585,7 @@
549	585	}
550	586	scale_cookie_change(iolat->blkiolat, lat_info, false);
551	587	}
	588	+ latency_stat_init(iolat, &iolat->cur_stat);
552	589	out:
553	590	spin_unlock_irqrestore(&lat_info->lock, flags);
554	591	}
..	..	@@ -559,23 +596,22 @@
559	596	struct rq_wait *rqw;
560	597	struct iolatency_grp *iolat;
561	598	u64 window_start;
562		- u64 now = ktime_to_ns(ktime_get());
	599	+ u64 now;
563	600	bool issue_as_root = bio_issue_as_root_blkg(bio);
564		- bool enabled = false;
565	601	int inflight = 0;
566	602
567	603	blkg = bio->bi_blkg;
568		- if (!blkg)
	604	+ if (!blkg \|\| !bio_flagged(bio, BIO_TRACKED))
569	605	return;
570	606
571	607	iolat = blkg_to_lat(bio->bi_blkg);
572	608	if (!iolat)
573	609	return;
574	610
575		- enabled = blk_iolatency_enabled(iolat->blkiolat);
576		- if (!enabled)
	611	+ if (!iolat->blkiolat->enabled)
577	612	return;
578	613
	614	+ now = ktime_to_ns(ktime_get());
579	615	while (blkg && blkg->parent) {
580	616	iolat = blkg_to_lat(blkg);
581	617	if (!iolat) {
..	..	@@ -611,6 +647,7 @@
611	647	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
612	648
613	649	del_timer_sync(&blkiolat->timer);
	650	+ flush_work(&blkiolat->enable_work);
614	651	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
615	652	kfree(blkiolat);
616	653	}
..	..	@@ -640,7 +677,7 @@
640	677	* We could be exiting, don't access the pd unless we have a
641	678	* ref on the blkg.
642	679	*/
643		- if (!blkg_try_get(blkg))
	680	+ if (!blkg_tryget(blkg))
644	681	continue;
645	682
646	683	iolat = blkg_to_lat(blkg);
..	..	@@ -682,6 +719,44 @@
682	719	rcu_read_unlock();
683	720	}
684	721
	722	+/**
	723	+ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
	724	+ * @work: enable_work of the blk_iolatency of interest
	725	+ *
	726	+ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
	727	+ * is relatively expensive as it involves walking up the hierarchy twice for
	728	+ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
	729	+ * want to disable the in-flight tracking.
	730	+ *
	731	+ * We have to make sure that the counting is balanced - we don't want to leak
	732	+ * the in-flight counts by disabling accounting in the completion path while IOs
	733	+ * are in flight. This is achieved by ensuring that no IO is in flight by
	734	+ * freezing the queue while flipping ->enabled. As this requires a sleepable
	735	+ * context, ->enabled flipping is punted to this work function.
	736	+ */
	737	+static void blkiolatency_enable_work_fn(struct work_struct *work)
	738	+{
	739	+ struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
	740	+ enable_work);
	741	+ bool enabled;
	742	+
	743	+ /*
	744	+ * There can only be one instance of this function running for @blkiolat
	745	+ * and it's guaranteed to be executed at least once after the latest
	746	+ * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
	747	+ * sufficient.
	748	+ *
	749	+ * Also, we know @blkiolat is safe to access as ->enable_work is flushed
	750	+ * in blkcg_iolatency_exit().
	751	+ */
	752	+ enabled = atomic_read(&blkiolat->enable_cnt);
	753	+ if (enabled != blkiolat->enabled) {
	754	+ blk_mq_freeze_queue(blkiolat->rqos.q);
	755	+ blkiolat->enabled = enabled;
	756	+ blk_mq_unfreeze_queue(blkiolat->rqos.q);
	757	+ }
	758	+}
	759	+
685	760	int blk_iolatency_init(struct request_queue *q)
686	761	{
687	762	struct blk_iolatency *blkiolat;
..	..	@@ -693,7 +768,7 @@
693	768	return -ENOMEM;
694	769
695	770	rqos = &blkiolat->rqos;
696		- rqos->id = RQ_QOS_CGROUP;
	771	+ rqos->id = RQ_QOS_LATENCY;
697	772	rqos->ops = &blkcg_iolatency_ops;
698	773	rqos->q = q;
699	774
..	..	@@ -707,17 +782,15 @@
707	782	}
708	783
709	784	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
	785	+ INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
710	786
711	787	return 0;
712	788	}
713	789
714		-/*
715		- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
716		- * return 0.
717		- */
718		-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
	790	+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
719	791	{
720	792	struct iolatency_grp *iolat = blkg_to_lat(blkg);
	793	+ struct blk_iolatency *blkiolat = iolat->blkiolat;
721	794	u64 oldval = iolat->min_lat_nsec;
722	795
723	796	iolat->min_lat_nsec = val;
..	..	@@ -725,13 +798,15 @@
725	798	iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
726	799	BLKIOLATENCY_MAX_WIN_SIZE);
727	800
728		- if (!oldval && val)
729		- return 1;
	801	+ if (!oldval && val) {
	802	+ if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
	803	+ schedule_work(&blkiolat->enable_work);
	804	+ }
730	805	if (oldval && !val) {
731	806	blkcg_clear_delay(blkg);
732		- return -1;
	807	+ if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
	808	+ schedule_work(&blkiolat->enable_work);
733	809	}
734		- return 0;
735	810	}
736	811
737	812	static void iolatency_clear_scaling(struct blkcg_gq *blkg)
..	..	@@ -757,21 +832,18 @@
757	832	{
758	833	struct blkcg *blkcg = css_to_blkcg(of_css(of));
759	834	struct blkcg_gq *blkg;
760		- struct blk_iolatency *blkiolat;
761	835	struct blkg_conf_ctx ctx;
762	836	struct iolatency_grp *iolat;
763	837	char p, tok;
764	838	u64 lat_val = 0;
765	839	u64 oldval;
766	840	int ret;
767		- int enable = 0;
768	841
769	842	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
770	843	if (ret)
771	844	return ret;
772	845
773	846	iolat = blkg_to_lat(ctx.blkg);
774		- blkiolat = iolat->blkiolat;
775	847	p = ctx.body;
776	848
777	849	ret = -EINVAL;
..	..	@@ -800,41 +872,12 @@
800	872	blkg = ctx.blkg;
801	873	oldval = iolat->min_lat_nsec;
802	874
803		- enable = iolatency_set_min_lat_nsec(blkg, lat_val);
804		- if (enable) {
805		- if (!blk_get_queue(blkg->q)) {
806		- ret = -ENODEV;
807		- goto out;
808		- }
809		-
810		- blkg_get(blkg);
811		- }
812		-
813		- if (oldval != iolat->min_lat_nsec) {
	875	+ iolatency_set_min_lat_nsec(blkg, lat_val);
	876	+ if (oldval != iolat->min_lat_nsec)
814	877	iolatency_clear_scaling(blkg);
815		- }
816		-
817	878	ret = 0;
818	879	out:
819	880	blkg_conf_finish(&ctx);
820		- if (ret == 0 && enable) {
821		- struct iolatency_grp *tmp = blkg_to_lat(blkg);
822		- struct blk_iolatency *blkiolat = tmp->blkiolat;
823		-
824		- blk_mq_freeze_queue(blkg->q);
825		-
826		- if (enable == 1)
827		- atomic_inc(&blkiolat->enabled);
828		- else if (enable == -1)
829		- atomic_dec(&blkiolat->enabled);
830		- else
831		- WARN_ON_ONCE(1);
832		-
833		- blk_mq_unfreeze_queue(blkg->q);
834		-
835		- blkg_put(blkg);
836		- blk_put_queue(blkg->q);
837		- }
838	881	return ret ?: nbytes;
839	882	}
840	883
..	..	@@ -859,13 +902,46 @@
859	902	return 0;
860	903	}
861	904
	905	+static size_t iolatency_ssd_stat(struct iolatency_grp iolat, char buf,
	906	+ size_t size)
	907	+{
	908	+ struct latency_stat stat;
	909	+ int cpu;
	910	+
	911	+ latency_stat_init(iolat, &stat);
	912	+ preempt_disable();
	913	+ for_each_online_cpu(cpu) {
	914	+ struct latency_stat *s;
	915	+ s = per_cpu_ptr(iolat->stats, cpu);
	916	+ latency_stat_sum(iolat, &stat, s);
	917	+ }
	918	+ preempt_enable();
	919	+
	920	+ if (iolat->rq_depth.max_depth == UINT_MAX)
	921	+ return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
	922	+ (unsigned long long)stat.ps.missed,
	923	+ (unsigned long long)stat.ps.total);
	924	+ return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
	925	+ (unsigned long long)stat.ps.missed,
	926	+ (unsigned long long)stat.ps.total,
	927	+ iolat->rq_depth.max_depth);
	928	+}
	929	+
862	930	static size_t iolatency_pd_stat(struct blkg_policy_data pd, char buf,
863	931	size_t size)
864	932	{
865	933	struct iolatency_grp *iolat = pd_to_lat(pd);
866		- unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
867		- unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
	934	+ unsigned long long avg_lat;
	935	+ unsigned long long cur_win;
868	936
	937	+ if (!blkcg_debug_stats)
	938	+ return 0;
	939	+
	940	+ if (iolat->ssd)
	941	+ return iolatency_ssd_stat(iolat, buf, size);
	942	+
	943	+ avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
	944	+ cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
869	945	if (iolat->rq_depth.max_depth == UINT_MAX)
870	946	return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
871	947	avg_lat, cur_win);
..	..	@@ -875,15 +951,17 @@
875	951	}
876	952
877	953
878		-static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
	954	+static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
	955	+ struct request_queue *q,
	956	+ struct blkcg *blkcg)
879	957	{
880	958	struct iolatency_grp *iolat;
881	959
882		- iolat = kzalloc_node(sizeof(*iolat), gfp, node);
	960	+ iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
883	961	if (!iolat)
884	962	return NULL;
885		- iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
886		- __alignof__(struct blk_rq_stat), gfp);
	963	+ iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
	964	+ __alignof__(struct latency_stat), gfp);
887	965	if (!iolat->stats) {
888	966	kfree(iolat);
889	967	return NULL;
..	..	@@ -900,15 +978,21 @@
900	978	u64 now = ktime_to_ns(ktime_get());
901	979	int cpu;
902	980
	981	+ if (blk_queue_nonrot(blkg->q))
	982	+ iolat->ssd = true;
	983	+ else
	984	+ iolat->ssd = false;
	985	+
903	986	for_each_possible_cpu(cpu) {
904		- struct blk_rq_stat *stat;
	987	+ struct latency_stat *stat;
905	988	stat = per_cpu_ptr(iolat->stats, cpu);
906		- blk_rq_stat_init(stat);
	989	+ latency_stat_init(iolat, stat);
907	990	}
908	991
	992	+ latency_stat_init(iolat, &iolat->cur_stat);
909	993	rq_wait_init(&iolat->rq_wait);
910	994	spin_lock_init(&iolat->child_lat.lock);
911		- iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q);
	995	+ iolat->rq_depth.queue_depth = blkg->q->nr_requests;
912	996	iolat->rq_depth.max_depth = UINT_MAX;
913	997	iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
914	998	iolat->blkiolat = blkiolat;
..	..	@@ -934,14 +1018,8 @@
934	1018	{
935	1019	struct iolatency_grp *iolat = pd_to_lat(pd);
936	1020	struct blkcg_gq *blkg = lat_to_blkg(iolat);
937		- struct blk_iolatency *blkiolat = iolat->blkiolat;
938		- int ret;
939	1021
940		- ret = iolatency_set_min_lat_nsec(blkg, 0);
941		- if (ret == 1)
942		- atomic_inc(&blkiolat->enabled);
943		- if (ret == -1)
944		- atomic_dec(&blkiolat->enabled);
	1022	+ iolatency_set_min_lat_nsec(blkg, 0);
945	1023	iolatency_clear_scaling(blkg);
946	1024	}
947	1025
..	..	@@ -978,7 +1056,7 @@
978	1056
979	1057	static void __exit iolatency_exit(void)
980	1058	{
981		- return blkcg_policy_unregister(&blkcg_policy_iolatency);
	1059	+ blkcg_policy_unregister(&blkcg_policy_iolatency);
982	1060	}
983	1061
984	1062	module_init(iolatency_init);