hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/block/blk-iolatency.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
23 * Block rq-qos base io controller
34 *
....@@ -85,17 +86,22 @@
8586 struct blk_iolatency {
8687 struct rq_qos rqos;
8788 struct timer_list timer;
88
- atomic_t enabled;
89
+
90
+ /*
91
+ * ->enabled is the master enable switch gating the throttling logic and
92
+ * inflight tracking. The number of cgroups which have iolat enabled is
93
+ * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
94
+ * from ->enable_work with the request_queue frozen. For details, See
95
+ * blkiolatency_enable_work_fn().
96
+ */
97
+ bool enabled;
98
+ atomic_t enable_cnt;
99
+ struct work_struct enable_work;
89100 };
90101
91102 static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
92103 {
93104 return container_of(rqos, struct blk_iolatency, rqos);
94
-}
95
-
96
-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
97
-{
98
- return atomic_read(&blkiolat->enabled) > 0;
99105 }
100106
101107 struct child_latency_info {
....@@ -117,9 +123,22 @@
117123 atomic_t scale_cookie;
118124 };
119125
126
+struct percentile_stats {
127
+ u64 total;
128
+ u64 missed;
129
+};
130
+
131
+struct latency_stat {
132
+ union {
133
+ struct percentile_stats ps;
134
+ struct blk_rq_stat rqs;
135
+ };
136
+};
137
+
120138 struct iolatency_grp {
121139 struct blkg_policy_data pd;
122
- struct blk_rq_stat __percpu *stats;
140
+ struct latency_stat __percpu *stats;
141
+ struct latency_stat cur_stat;
123142 struct blk_iolatency *blkiolat;
124143 struct rq_depth rq_depth;
125144 struct rq_wait rq_wait;
....@@ -134,6 +153,7 @@
134153 /* Our current number of IO's for the last summation. */
135154 u64 nr_samples;
136155
156
+ bool ssd;
137157 struct child_latency_info child_lat;
138158 };
139159
....@@ -174,29 +194,101 @@
174194 return pd_to_blkg(&iolat->pd);
175195 }
176196
177
-static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
178
- wait_queue_entry_t *wait,
179
- bool first_block)
197
+static inline void latency_stat_init(struct iolatency_grp *iolat,
198
+ struct latency_stat *stat)
180199 {
181
- struct rq_wait *rqw = &iolat->rq_wait;
200
+ if (iolat->ssd) {
201
+ stat->ps.total = 0;
202
+ stat->ps.missed = 0;
203
+ } else
204
+ blk_rq_stat_init(&stat->rqs);
205
+}
182206
183
- if (first_block && waitqueue_active(&rqw->wait) &&
184
- rqw->wait.head.next != &wait->entry)
185
- return false;
207
+static inline void latency_stat_sum(struct iolatency_grp *iolat,
208
+ struct latency_stat *sum,
209
+ struct latency_stat *stat)
210
+{
211
+ if (iolat->ssd) {
212
+ sum->ps.total += stat->ps.total;
213
+ sum->ps.missed += stat->ps.missed;
214
+ } else
215
+ blk_rq_stat_sum(&sum->rqs, &stat->rqs);
216
+}
217
+
218
+static inline void latency_stat_record_time(struct iolatency_grp *iolat,
219
+ u64 req_time)
220
+{
221
+ struct latency_stat *stat = get_cpu_ptr(iolat->stats);
222
+ if (iolat->ssd) {
223
+ if (req_time >= iolat->min_lat_nsec)
224
+ stat->ps.missed++;
225
+ stat->ps.total++;
226
+ } else
227
+ blk_rq_stat_add(&stat->rqs, req_time);
228
+ put_cpu_ptr(stat);
229
+}
230
+
231
+static inline bool latency_sum_ok(struct iolatency_grp *iolat,
232
+ struct latency_stat *stat)
233
+{
234
+ if (iolat->ssd) {
235
+ u64 thresh = div64_u64(stat->ps.total, 10);
236
+ thresh = max(thresh, 1ULL);
237
+ return stat->ps.missed < thresh;
238
+ }
239
+ return stat->rqs.mean <= iolat->min_lat_nsec;
240
+}
241
+
242
+static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
243
+ struct latency_stat *stat)
244
+{
245
+ if (iolat->ssd)
246
+ return stat->ps.total;
247
+ return stat->rqs.nr_samples;
248
+}
249
+
250
+static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
251
+ struct latency_stat *stat)
252
+{
253
+ int exp_idx;
254
+
255
+ if (iolat->ssd)
256
+ return;
257
+
258
+ /*
259
+ * calc_load() takes in a number stored in fixed point representation.
260
+ * Because we are using this for IO time in ns, the values stored
261
+ * are significantly larger than the FIXED_1 denominator (2048).
262
+ * Therefore, rounding errors in the calculation are negligible and
263
+ * can be ignored.
264
+ */
265
+ exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
266
+ div64_u64(iolat->cur_win_nsec,
267
+ BLKIOLATENCY_EXP_BUCKET_SIZE));
268
+ iolat->lat_avg = calc_load(iolat->lat_avg,
269
+ iolatency_exp_factors[exp_idx],
270
+ stat->rqs.mean);
271
+}
272
+
273
+static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data)
274
+{
275
+ atomic_dec(&rqw->inflight);
276
+ wake_up(&rqw->wait);
277
+}
278
+
279
+static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data)
280
+{
281
+ struct iolatency_grp *iolat = private_data;
186282 return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
187283 }
188284
189285 static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
190286 struct iolatency_grp *iolat,
191
- spinlock_t *lock, bool issue_as_root,
287
+ bool issue_as_root,
192288 bool use_memdelay)
193
- __releases(lock)
194
- __acquires(lock)
195289 {
196290 struct rq_wait *rqw = &iolat->rq_wait;
197291 unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
198
- DEFINE_WAIT(wait);
199
- bool first_block = true;
200292
201293 if (use_delay)
202294 blkcg_schedule_throttle(rqos->q, use_memdelay);
....@@ -213,27 +305,7 @@
213305 return;
214306 }
215307
216
- if (iolatency_may_queue(iolat, &wait, first_block))
217
- return;
218
-
219
- do {
220
- prepare_to_wait_exclusive(&rqw->wait, &wait,
221
- TASK_UNINTERRUPTIBLE);
222
-
223
- if (iolatency_may_queue(iolat, &wait, first_block))
224
- break;
225
- first_block = false;
226
-
227
- if (lock) {
228
- spin_unlock_irq(lock);
229
- io_schedule();
230
- spin_lock_irq(lock);
231
- } else {
232
- io_schedule();
233
- }
234
- } while (1);
235
-
236
- finish_wait(&rqw->wait, &wait);
308
+ rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
237309 }
238310
239311 #define SCALE_DOWN_FACTOR 2
....@@ -257,7 +329,7 @@
257329 struct child_latency_info *lat_info,
258330 bool up)
259331 {
260
- unsigned long qd = blk_queue_depth(blkiolat->rqos.q);
332
+ unsigned long qd = blkiolat->rqos.q->nr_requests;
261333 unsigned long scale = scale_amount(qd, up);
262334 unsigned long old = atomic_read(&lat_info->scale_cookie);
263335 unsigned long max_scale = qd << 1;
....@@ -297,10 +369,9 @@
297369 */
298370 static void scale_change(struct iolatency_grp *iolat, bool up)
299371 {
300
- unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q);
372
+ unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
301373 unsigned long scale = scale_amount(qd, up);
302374 unsigned long old = iolat->rq_depth.max_depth;
303
- bool changed = false;
304375
305376 if (old > qd)
306377 old = qd;
....@@ -310,15 +381,13 @@
310381 return;
311382
312383 if (old < qd) {
313
- changed = true;
314384 old += scale;
315385 old = min(old, qd);
316386 iolat->rq_depth.max_depth = old;
317387 wake_up_all(&iolat->rq_wait.wait);
318388 }
319
- } else if (old > 1) {
389
+ } else {
320390 old >>= 1;
321
- changed = true;
322391 iolat->rq_depth.max_depth = max(old, 1UL);
323392 }
324393 }
....@@ -371,7 +440,7 @@
371440 * scale down event.
372441 */
373442 samples_thresh = lat_info->nr_samples * 5;
374
- samples_thresh = div64_u64(samples_thresh, 100);
443
+ samples_thresh = max(1ULL, div64_u64(samples_thresh, 100));
375444 if (iolat->nr_samples <= samples_thresh)
376445 return;
377446 }
....@@ -393,38 +462,15 @@
393462 scale_change(iolat, direction > 0);
394463 }
395464
396
-static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
397
- spinlock_t *lock)
465
+static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
398466 {
399467 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
400
- struct blkcg *blkcg;
401
- struct blkcg_gq *blkg;
402
- struct request_queue *q = rqos->q;
468
+ struct blkcg_gq *blkg = bio->bi_blkg;
403469 bool issue_as_root = bio_issue_as_root_blkg(bio);
404470
405
- if (!blk_iolatency_enabled(blkiolat))
471
+ if (!blkiolat->enabled)
406472 return;
407473
408
- rcu_read_lock();
409
- blkcg = bio_blkcg(bio);
410
- bio_associate_blkcg(bio, &blkcg->css);
411
- blkg = blkg_lookup(blkcg, q);
412
- if (unlikely(!blkg)) {
413
- if (!lock)
414
- spin_lock_irq(q->queue_lock);
415
- blkg = blkg_lookup_create(blkcg, q);
416
- if (IS_ERR(blkg))
417
- blkg = NULL;
418
- if (!lock)
419
- spin_unlock_irq(q->queue_lock);
420
- }
421
- if (!blkg)
422
- goto out;
423
-
424
- bio_issue_init(&bio->bi_issue, bio_sectors(bio));
425
- bio_associate_blkg(bio, blkg);
426
-out:
427
- rcu_read_unlock();
428474 while (blkg && blkg->parent) {
429475 struct iolatency_grp *iolat = blkg_to_lat(blkg);
430476 if (!iolat) {
....@@ -433,7 +479,7 @@
433479 }
434480
435481 check_scale_change(iolat);
436
- __blkcg_iolatency_throttle(rqos, iolat, lock, issue_as_root,
482
+ __blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
437483 (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
438484 blkg = blkg->parent;
439485 }
....@@ -445,7 +491,6 @@
445491 struct bio_issue *issue, u64 now,
446492 bool issue_as_root)
447493 {
448
- struct blk_rq_stat *rq_stat;
449494 u64 start = bio_issue_time(issue);
450495 u64 req_time;
451496
....@@ -471,9 +516,7 @@
471516 return;
472517 }
473518
474
- rq_stat = get_cpu_ptr(iolat->stats);
475
- blk_rq_stat_add(rq_stat, req_time);
476
- put_cpu_ptr(rq_stat);
519
+ latency_stat_record_time(iolat, req_time);
477520 }
478521
479522 #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
....@@ -484,17 +527,17 @@
484527 struct blkcg_gq *blkg = lat_to_blkg(iolat);
485528 struct iolatency_grp *parent;
486529 struct child_latency_info *lat_info;
487
- struct blk_rq_stat stat;
530
+ struct latency_stat stat;
488531 unsigned long flags;
489
- int cpu, exp_idx;
532
+ int cpu;
490533
491
- blk_rq_stat_init(&stat);
534
+ latency_stat_init(iolat, &stat);
492535 preempt_disable();
493536 for_each_online_cpu(cpu) {
494
- struct blk_rq_stat *s;
537
+ struct latency_stat *s;
495538 s = per_cpu_ptr(iolat->stats, cpu);
496
- blk_rq_stat_sum(&stat, s);
497
- blk_rq_stat_init(s);
539
+ latency_stat_sum(iolat, &stat, s);
540
+ latency_stat_init(iolat, s);
498541 }
499542 preempt_enable();
500543
....@@ -504,43 +547,36 @@
504547
505548 lat_info = &parent->child_lat;
506549
507
- /*
508
- * calc_load() takes in a number stored in fixed point representation.
509
- * Because we are using this for IO time in ns, the values stored
510
- * are significantly larger than the FIXED_1 denominator (2048).
511
- * Therefore, rounding errors in the calculation are negligible and
512
- * can be ignored.
513
- */
514
- exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
515
- div64_u64(iolat->cur_win_nsec,
516
- BLKIOLATENCY_EXP_BUCKET_SIZE));
517
- iolat->lat_avg = calc_load(iolat->lat_avg,
518
- iolatency_exp_factors[exp_idx],
519
- stat.mean);
550
+ iolat_update_total_lat_avg(iolat, &stat);
520551
521552 /* Everything is ok and we don't need to adjust the scale. */
522
- if (stat.mean <= iolat->min_lat_nsec &&
553
+ if (latency_sum_ok(iolat, &stat) &&
523554 atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
524555 return;
525556
526557 /* Somebody beat us to the punch, just bail. */
527558 spin_lock_irqsave(&lat_info->lock, flags);
559
+
560
+ latency_stat_sum(iolat, &iolat->cur_stat, &stat);
528561 lat_info->nr_samples -= iolat->nr_samples;
529
- lat_info->nr_samples += stat.nr_samples;
530
- iolat->nr_samples = stat.nr_samples;
562
+ lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat);
563
+ iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat);
531564
532565 if ((lat_info->last_scale_event >= now ||
533
- now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
534
- lat_info->scale_lat <= iolat->min_lat_nsec)
566
+ now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME))
535567 goto out;
536568
537
- if (stat.mean <= iolat->min_lat_nsec &&
538
- stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
569
+ if (latency_sum_ok(iolat, &iolat->cur_stat) &&
570
+ latency_sum_ok(iolat, &stat)) {
571
+ if (latency_stat_samples(iolat, &iolat->cur_stat) <
572
+ BLKIOLATENCY_MIN_GOOD_SAMPLES)
573
+ goto out;
539574 if (lat_info->scale_grp == iolat) {
540575 lat_info->last_scale_event = now;
541576 scale_cookie_change(iolat->blkiolat, lat_info, true);
542577 }
543
- } else if (stat.mean > iolat->min_lat_nsec) {
578
+ } else if (lat_info->scale_lat == 0 ||
579
+ lat_info->scale_lat >= iolat->min_lat_nsec) {
544580 lat_info->last_scale_event = now;
545581 if (!lat_info->scale_grp ||
546582 lat_info->scale_lat > iolat->min_lat_nsec) {
....@@ -549,6 +585,7 @@
549585 }
550586 scale_cookie_change(iolat->blkiolat, lat_info, false);
551587 }
588
+ latency_stat_init(iolat, &iolat->cur_stat);
552589 out:
553590 spin_unlock_irqrestore(&lat_info->lock, flags);
554591 }
....@@ -559,23 +596,22 @@
559596 struct rq_wait *rqw;
560597 struct iolatency_grp *iolat;
561598 u64 window_start;
562
- u64 now = ktime_to_ns(ktime_get());
599
+ u64 now;
563600 bool issue_as_root = bio_issue_as_root_blkg(bio);
564
- bool enabled = false;
565601 int inflight = 0;
566602
567603 blkg = bio->bi_blkg;
568
- if (!blkg)
604
+ if (!blkg || !bio_flagged(bio, BIO_TRACKED))
569605 return;
570606
571607 iolat = blkg_to_lat(bio->bi_blkg);
572608 if (!iolat)
573609 return;
574610
575
- enabled = blk_iolatency_enabled(iolat->blkiolat);
576
- if (!enabled)
611
+ if (!iolat->blkiolat->enabled)
577612 return;
578613
614
+ now = ktime_to_ns(ktime_get());
579615 while (blkg && blkg->parent) {
580616 iolat = blkg_to_lat(blkg);
581617 if (!iolat) {
....@@ -611,6 +647,7 @@
611647 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
612648
613649 del_timer_sync(&blkiolat->timer);
650
+ flush_work(&blkiolat->enable_work);
614651 blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
615652 kfree(blkiolat);
616653 }
....@@ -640,7 +677,7 @@
640677 * We could be exiting, don't access the pd unless we have a
641678 * ref on the blkg.
642679 */
643
- if (!blkg_try_get(blkg))
680
+ if (!blkg_tryget(blkg))
644681 continue;
645682
646683 iolat = blkg_to_lat(blkg);
....@@ -682,6 +719,44 @@
682719 rcu_read_unlock();
683720 }
684721
722
+/**
723
+ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
724
+ * @work: enable_work of the blk_iolatency of interest
725
+ *
726
+ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
727
+ * is relatively expensive as it involves walking up the hierarchy twice for
728
+ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
729
+ * want to disable the in-flight tracking.
730
+ *
731
+ * We have to make sure that the counting is balanced - we don't want to leak
732
+ * the in-flight counts by disabling accounting in the completion path while IOs
733
+ * are in flight. This is achieved by ensuring that no IO is in flight by
734
+ * freezing the queue while flipping ->enabled. As this requires a sleepable
735
+ * context, ->enabled flipping is punted to this work function.
736
+ */
737
+static void blkiolatency_enable_work_fn(struct work_struct *work)
738
+{
739
+ struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
740
+ enable_work);
741
+ bool enabled;
742
+
743
+ /*
744
+ * There can only be one instance of this function running for @blkiolat
745
+ * and it's guaranteed to be executed at least once after the latest
746
+ * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
747
+ * sufficient.
748
+ *
749
+ * Also, we know @blkiolat is safe to access as ->enable_work is flushed
750
+ * in blkcg_iolatency_exit().
751
+ */
752
+ enabled = atomic_read(&blkiolat->enable_cnt);
753
+ if (enabled != blkiolat->enabled) {
754
+ blk_mq_freeze_queue(blkiolat->rqos.q);
755
+ blkiolat->enabled = enabled;
756
+ blk_mq_unfreeze_queue(blkiolat->rqos.q);
757
+ }
758
+}
759
+
685760 int blk_iolatency_init(struct request_queue *q)
686761 {
687762 struct blk_iolatency *blkiolat;
....@@ -693,7 +768,7 @@
693768 return -ENOMEM;
694769
695770 rqos = &blkiolat->rqos;
696
- rqos->id = RQ_QOS_CGROUP;
771
+ rqos->id = RQ_QOS_LATENCY;
697772 rqos->ops = &blkcg_iolatency_ops;
698773 rqos->q = q;
699774
....@@ -707,17 +782,15 @@
707782 }
708783
709784 timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
785
+ INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
710786
711787 return 0;
712788 }
713789
714
-/*
715
- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
716
- * return 0.
717
- */
718
-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
790
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
719791 {
720792 struct iolatency_grp *iolat = blkg_to_lat(blkg);
793
+ struct blk_iolatency *blkiolat = iolat->blkiolat;
721794 u64 oldval = iolat->min_lat_nsec;
722795
723796 iolat->min_lat_nsec = val;
....@@ -725,13 +798,15 @@
725798 iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
726799 BLKIOLATENCY_MAX_WIN_SIZE);
727800
728
- if (!oldval && val)
729
- return 1;
801
+ if (!oldval && val) {
802
+ if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
803
+ schedule_work(&blkiolat->enable_work);
804
+ }
730805 if (oldval && !val) {
731806 blkcg_clear_delay(blkg);
732
- return -1;
807
+ if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
808
+ schedule_work(&blkiolat->enable_work);
733809 }
734
- return 0;
735810 }
736811
737812 static void iolatency_clear_scaling(struct blkcg_gq *blkg)
....@@ -757,21 +832,18 @@
757832 {
758833 struct blkcg *blkcg = css_to_blkcg(of_css(of));
759834 struct blkcg_gq *blkg;
760
- struct blk_iolatency *blkiolat;
761835 struct blkg_conf_ctx ctx;
762836 struct iolatency_grp *iolat;
763837 char *p, *tok;
764838 u64 lat_val = 0;
765839 u64 oldval;
766840 int ret;
767
- int enable = 0;
768841
769842 ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
770843 if (ret)
771844 return ret;
772845
773846 iolat = blkg_to_lat(ctx.blkg);
774
- blkiolat = iolat->blkiolat;
775847 p = ctx.body;
776848
777849 ret = -EINVAL;
....@@ -800,41 +872,12 @@
800872 blkg = ctx.blkg;
801873 oldval = iolat->min_lat_nsec;
802874
803
- enable = iolatency_set_min_lat_nsec(blkg, lat_val);
804
- if (enable) {
805
- if (!blk_get_queue(blkg->q)) {
806
- ret = -ENODEV;
807
- goto out;
808
- }
809
-
810
- blkg_get(blkg);
811
- }
812
-
813
- if (oldval != iolat->min_lat_nsec) {
875
+ iolatency_set_min_lat_nsec(blkg, lat_val);
876
+ if (oldval != iolat->min_lat_nsec)
814877 iolatency_clear_scaling(blkg);
815
- }
816
-
817878 ret = 0;
818879 out:
819880 blkg_conf_finish(&ctx);
820
- if (ret == 0 && enable) {
821
- struct iolatency_grp *tmp = blkg_to_lat(blkg);
822
- struct blk_iolatency *blkiolat = tmp->blkiolat;
823
-
824
- blk_mq_freeze_queue(blkg->q);
825
-
826
- if (enable == 1)
827
- atomic_inc(&blkiolat->enabled);
828
- else if (enable == -1)
829
- atomic_dec(&blkiolat->enabled);
830
- else
831
- WARN_ON_ONCE(1);
832
-
833
- blk_mq_unfreeze_queue(blkg->q);
834
-
835
- blkg_put(blkg);
836
- blk_put_queue(blkg->q);
837
- }
838881 return ret ?: nbytes;
839882 }
840883
....@@ -859,13 +902,46 @@
859902 return 0;
860903 }
861904
905
+static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
906
+ size_t size)
907
+{
908
+ struct latency_stat stat;
909
+ int cpu;
910
+
911
+ latency_stat_init(iolat, &stat);
912
+ preempt_disable();
913
+ for_each_online_cpu(cpu) {
914
+ struct latency_stat *s;
915
+ s = per_cpu_ptr(iolat->stats, cpu);
916
+ latency_stat_sum(iolat, &stat, s);
917
+ }
918
+ preempt_enable();
919
+
920
+ if (iolat->rq_depth.max_depth == UINT_MAX)
921
+ return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
922
+ (unsigned long long)stat.ps.missed,
923
+ (unsigned long long)stat.ps.total);
924
+ return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
925
+ (unsigned long long)stat.ps.missed,
926
+ (unsigned long long)stat.ps.total,
927
+ iolat->rq_depth.max_depth);
928
+}
929
+
862930 static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
863931 size_t size)
864932 {
865933 struct iolatency_grp *iolat = pd_to_lat(pd);
866
- unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
867
- unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
934
+ unsigned long long avg_lat;
935
+ unsigned long long cur_win;
868936
937
+ if (!blkcg_debug_stats)
938
+ return 0;
939
+
940
+ if (iolat->ssd)
941
+ return iolatency_ssd_stat(iolat, buf, size);
942
+
943
+ avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
944
+ cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
869945 if (iolat->rq_depth.max_depth == UINT_MAX)
870946 return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
871947 avg_lat, cur_win);
....@@ -875,15 +951,17 @@
875951 }
876952
877953
878
-static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
954
+static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
955
+ struct request_queue *q,
956
+ struct blkcg *blkcg)
879957 {
880958 struct iolatency_grp *iolat;
881959
882
- iolat = kzalloc_node(sizeof(*iolat), gfp, node);
960
+ iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
883961 if (!iolat)
884962 return NULL;
885
- iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
886
- __alignof__(struct blk_rq_stat), gfp);
963
+ iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
964
+ __alignof__(struct latency_stat), gfp);
887965 if (!iolat->stats) {
888966 kfree(iolat);
889967 return NULL;
....@@ -900,15 +978,21 @@
900978 u64 now = ktime_to_ns(ktime_get());
901979 int cpu;
902980
981
+ if (blk_queue_nonrot(blkg->q))
982
+ iolat->ssd = true;
983
+ else
984
+ iolat->ssd = false;
985
+
903986 for_each_possible_cpu(cpu) {
904
- struct blk_rq_stat *stat;
987
+ struct latency_stat *stat;
905988 stat = per_cpu_ptr(iolat->stats, cpu);
906
- blk_rq_stat_init(stat);
989
+ latency_stat_init(iolat, stat);
907990 }
908991
992
+ latency_stat_init(iolat, &iolat->cur_stat);
909993 rq_wait_init(&iolat->rq_wait);
910994 spin_lock_init(&iolat->child_lat.lock);
911
- iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q);
995
+ iolat->rq_depth.queue_depth = blkg->q->nr_requests;
912996 iolat->rq_depth.max_depth = UINT_MAX;
913997 iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
914998 iolat->blkiolat = blkiolat;
....@@ -934,14 +1018,8 @@
9341018 {
9351019 struct iolatency_grp *iolat = pd_to_lat(pd);
9361020 struct blkcg_gq *blkg = lat_to_blkg(iolat);
937
- struct blk_iolatency *blkiolat = iolat->blkiolat;
938
- int ret;
9391021
940
- ret = iolatency_set_min_lat_nsec(blkg, 0);
941
- if (ret == 1)
942
- atomic_inc(&blkiolat->enabled);
943
- if (ret == -1)
944
- atomic_dec(&blkiolat->enabled);
1022
+ iolatency_set_min_lat_nsec(blkg, 0);
9451023 iolatency_clear_scaling(blkg);
9461024 }
9471025
....@@ -978,7 +1056,7 @@
9781056
9791057 static void __exit iolatency_exit(void)
9801058 {
981
- return blkcg_policy_unregister(&blkcg_policy_iolatency);
1059
+ blkcg_policy_unregister(&blkcg_policy_iolatency);
9821060 }
9831061
9841062 module_init(iolatency_init);