hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/include/linux/blk-cgroup.h
....@@ -14,13 +14,19 @@
1414 * Nauman Rafique <nauman@google.com>
1515 */
1616
17
-#include <linux/kthread.h>
17
+#include <linux/cgroup.h>
18
+#include <linux/percpu.h>
1819 #include <linux/percpu_counter.h>
20
+#include <linux/u64_stats_sync.h>
1921 #include <linux/seq_file.h>
2022 #include <linux/radix-tree.h>
2123 #include <linux/blkdev.h>
2224 #include <linux/atomic.h>
2325 #include <linux/kthread.h>
26
+#include <linux/fs.h>
27
+#ifndef __GENKSYMS__
28
+#include <linux/blk-mq.h>
29
+#endif
2430
2531 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
2632 #define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
....@@ -30,15 +36,12 @@
3036
3137 #ifdef CONFIG_BLK_CGROUP
3238
33
-enum blkg_rwstat_type {
34
- BLKG_RWSTAT_READ,
35
- BLKG_RWSTAT_WRITE,
36
- BLKG_RWSTAT_SYNC,
37
- BLKG_RWSTAT_ASYNC,
38
- BLKG_RWSTAT_DISCARD,
39
+enum blkg_iostat_type {
40
+ BLKG_IOSTAT_READ,
41
+ BLKG_IOSTAT_WRITE,
42
+ BLKG_IOSTAT_DISCARD,
3943
40
- BLKG_RWSTAT_NR,
41
- BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
44
+ BLKG_IOSTAT_NR,
4245 };
4346
4447 struct blkcg_gq;
....@@ -46,6 +49,7 @@
4649 struct blkcg {
4750 struct cgroup_subsys_state css;
4851 spinlock_t lock;
52
+ refcount_t online_pin;
4953
5054 struct radix_tree_root blkg_tree;
5155 struct blkcg_gq __rcu *blkg_hint;
....@@ -56,23 +60,18 @@
5660 struct list_head all_blkcgs_node;
5761 #ifdef CONFIG_CGROUP_WRITEBACK
5862 struct list_head cgwb_list;
59
- refcount_t cgwb_refcnt;
6063 #endif
6164 };
6265
63
-/*
64
- * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
65
- * recursive. Used to carry stats of dead children, and, for blkg_rwstat,
66
- * to carry result values from read and sum operations.
67
- */
68
-struct blkg_stat {
69
- struct percpu_counter cpu_cnt;
70
- atomic64_t aux_cnt;
66
+struct blkg_iostat {
67
+ u64 bytes[BLKG_IOSTAT_NR];
68
+ u64 ios[BLKG_IOSTAT_NR];
7169 };
7270
73
-struct blkg_rwstat {
74
- struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
75
- atomic64_t aux_cnt[BLKG_RWSTAT_NR];
71
+struct blkg_iostat_set {
72
+ struct u64_stats_sync sync;
73
+ struct blkg_iostat cur;
74
+ struct blkg_iostat last;
7675 };
7776
7877 /*
....@@ -113,43 +112,39 @@
113112 struct hlist_node blkcg_node;
114113 struct blkcg *blkcg;
115114
116
- /*
117
- * Each blkg gets congested separately and the congestion state is
118
- * propagated to the matching bdi_writeback_congested.
119
- */
120
- struct bdi_writeback_congested *wb_congested;
121
-
122115 /* all non-root blkcg_gq's are guaranteed to have access to parent */
123116 struct blkcg_gq *parent;
124117
125
- /* request allocation list for this blkcg-q pair */
126
- struct request_list rl;
127
-
128118 /* reference count */
129
- atomic_t refcnt;
119
+ struct percpu_ref refcnt;
130120
131121 /* is this blkg online? protected by both blkcg and q locks */
132122 bool online;
133123
134
- struct blkg_rwstat stat_bytes;
135
- struct blkg_rwstat stat_ios;
124
+ struct blkg_iostat_set __percpu *iostat_cpu;
125
+ struct blkg_iostat_set iostat;
136126
137127 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
138128
139
- struct rcu_head rcu_head;
129
+ spinlock_t async_bio_lock;
130
+ struct bio_list async_bios;
131
+ struct work_struct async_bio_work;
140132
141133 atomic_t use_delay;
142134 atomic64_t delay_nsec;
143135 atomic64_t delay_start;
144136 u64 last_delay;
145137 int last_use;
138
+
139
+ struct rcu_head rcu_head;
146140 };
147141
148142 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
149143 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
150144 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
151145 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
152
-typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
146
+typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
147
+ struct request_queue *q, struct blkcg *blkcg);
153148 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
154149 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
155150 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
....@@ -181,13 +176,11 @@
181176
182177 extern struct blkcg blkcg_root;
183178 extern struct cgroup_subsys_state * const blkcg_root_css;
179
+extern bool blkcg_debug_stats;
184180
185181 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
186182 struct request_queue *q, bool update_hint);
187
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
188
- struct request_queue *q);
189183 int blkcg_init_queue(struct request_queue *q);
190
-void blkcg_drain_queue(struct request_queue *q);
191184 void blkcg_exit_queue(struct request_queue *q);
192185
193186 /* Blkio controller policy registration */
....@@ -205,20 +198,6 @@
205198 const struct blkcg_policy *pol, int data,
206199 bool show_total);
207200 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
208
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
209
- const struct blkg_rwstat *rwstat);
210
-u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
211
-u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
212
- int off);
213
-int blkg_print_stat_bytes(struct seq_file *sf, void *v);
214
-int blkg_print_stat_ios(struct seq_file *sf, void *v);
215
-int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
216
-int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
217
-
218
-u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
219
- struct blkcg_policy *pol, int off);
220
-struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
221
- struct blkcg_policy *pol, int off);
222201
223202 struct blkg_conf_ctx {
224203 struct gendisk *disk;
....@@ -226,26 +205,67 @@
226205 char *body;
227206 };
228207
208
+struct gendisk *blkcg_conf_get_disk(char **inputp);
229209 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
230210 char *input, struct blkg_conf_ctx *ctx);
231211 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
232212
213
+/**
214
+ * blkcg_css - find the current css
215
+ *
216
+ * Find the css associated with either the kthread or the current task.
217
+ * This may return a dying css, so it is up to the caller to use tryget logic
218
+ * to confirm it is alive and well.
219
+ */
220
+static inline struct cgroup_subsys_state *blkcg_css(void)
221
+{
222
+ struct cgroup_subsys_state *css;
223
+
224
+ css = kthread_blkcg();
225
+ if (css)
226
+ return css;
227
+ return task_css(current, io_cgrp_id);
228
+}
233229
234230 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
235231 {
236232 return css ? container_of(css, struct blkcg, css) : NULL;
237233 }
238234
235
+/**
236
+ * __bio_blkcg - internal, inconsistent version to get blkcg
237
+ *
238
+ * DO NOT USE.
239
+ * This function is inconsistent and consequently is dangerous to use. The
240
+ * first part of the function returns a blkcg where a reference is owned by the
241
+ * bio. This means it does not need to be rcu protected as it cannot go away
242
+ * with the bio owning a reference to it. However, the latter potentially gets
243
+ * it from task_css(). This can race against task migration and the cgroup
244
+ * dying. It is also semantically different as it must be called rcu protected
245
+ * and is susceptible to failure when trying to get a reference to it.
246
+ * Therefore, it is not ok to assume that *_get() will always succeed on the
247
+ * blkcg returned here.
248
+ */
249
+static inline struct blkcg *__bio_blkcg(struct bio *bio)
250
+{
251
+ if (bio && bio->bi_blkg)
252
+ return bio->bi_blkg->blkcg;
253
+ return css_to_blkcg(blkcg_css());
254
+}
255
+
256
+/**
257
+ * bio_blkcg - grab the blkcg associated with a bio
258
+ * @bio: target bio
259
+ *
260
+ * This returns the blkcg associated with a bio, %NULL if not associated.
261
+ * Callers are expected to either handle %NULL or know association has been
262
+ * done prior to calling this.
263
+ */
239264 static inline struct blkcg *bio_blkcg(struct bio *bio)
240265 {
241
- struct cgroup_subsys_state *css;
242
-
243
- if (bio && bio->bi_css)
244
- return css_to_blkcg(bio->bi_css);
245
- css = kthread_blkcg();
246
- if (css)
247
- return css_to_blkcg(css);
248
- return css_to_blkcg(task_css(current, io_cgrp_id));
266
+ if (bio && bio->bi_blkg)
267
+ return bio->bi_blkg->blkcg;
268
+ return NULL;
249269 }
250270
251271 static inline bool blk_cgroup_congested(void)
....@@ -328,16 +348,12 @@
328348 * @q: request_queue of interest
329349 *
330350 * Lookup blkg for the @blkcg - @q pair. This function should be called
331
- * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
332
- * - see blk_queue_bypass_start() for details.
351
+ * under RCU read lock.
333352 */
334353 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
335354 struct request_queue *q)
336355 {
337356 WARN_ON_ONCE(!rcu_read_lock_held());
338
-
339
- if (unlikely(blk_queue_bypass(q)))
340
- return NULL;
341357 return __blkg_lookup(blkcg, q, false);
342358 }
343359
....@@ -389,46 +405,37 @@
389405
390406 extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
391407
392
-#ifdef CONFIG_CGROUP_WRITEBACK
393
-
394408 /**
395
- * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
409
+ * blkcg_pin_online - pin online state
396410 * @blkcg: blkcg of interest
397411 *
398
- * This is used to track the number of active wb's related to a blkcg.
412
+ * While pinned, a blkcg is kept online. This is primarily used to
413
+ * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
414
+ * while an associated cgwb is still active.
399415 */
400
-static inline void blkcg_cgwb_get(struct blkcg *blkcg)
416
+static inline void blkcg_pin_online(struct blkcg *blkcg)
401417 {
402
- refcount_inc(&blkcg->cgwb_refcnt);
418
+ refcount_inc(&blkcg->online_pin);
403419 }
404420
405421 /**
406
- * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
422
+ * blkcg_unpin_online - unpin online state
407423 * @blkcg: blkcg of interest
408424 *
409
- * This is used to track the number of active wb's related to a blkcg.
410
- * When this count goes to zero, all active wb has finished so the
425
+ * This is primarily used to impedance-match blkg and cgwb lifetimes so
426
+ * that blkg doesn't go offline while an associated cgwb is still active.
427
+ * When this count goes to zero, all active cgwbs have finished so the
411428 * blkcg can continue destruction by calling blkcg_destroy_blkgs().
412
- * This work may occur in cgwb_release_workfn() on the cgwb_release
413
- * workqueue.
414429 */
415
-static inline void blkcg_cgwb_put(struct blkcg *blkcg)
430
+static inline void blkcg_unpin_online(struct blkcg *blkcg)
416431 {
417
- if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
432
+ do {
433
+ if (!refcount_dec_and_test(&blkcg->online_pin))
434
+ break;
418435 blkcg_destroy_blkgs(blkcg);
436
+ blkcg = blkcg_parent(blkcg);
437
+ } while (blkcg);
419438 }
420
-
421
-#else
422
-
423
-static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
424
-
425
-static inline void blkcg_cgwb_put(struct blkcg *blkcg)
426
-{
427
- /* wb isn't being accounted, so trigger destruction right away */
428
- blkcg_destroy_blkgs(blkcg);
429
-}
430
-
431
-#endif
432439
433440 /**
434441 * blkg_path - format cgroup path of blkg
....@@ -451,26 +458,20 @@
451458 */
452459 static inline void blkg_get(struct blkcg_gq *blkg)
453460 {
454
- WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
455
- atomic_inc(&blkg->refcnt);
461
+ percpu_ref_get(&blkg->refcnt);
456462 }
457463
458464 /**
459
- * blkg_try_get - try and get a blkg reference
465
+ * blkg_tryget - try and get a blkg reference
460466 * @blkg: blkg to get
461467 *
462468 * This is for use when doing an RCU lookup of the blkg. We may be in the midst
463469 * of freeing this blkg, so we can only use it if the refcnt is not zero.
464470 */
465
-static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
471
+static inline bool blkg_tryget(struct blkcg_gq *blkg)
466472 {
467
- if (atomic_inc_not_zero(&blkg->refcnt))
468
- return blkg;
469
- return NULL;
473
+ return blkg && percpu_ref_tryget(&blkg->refcnt);
470474 }
471
-
472
-
473
-void __blkg_release_rcu(struct rcu_head *rcu);
474475
475476 /**
476477 * blkg_put - put a blkg reference
....@@ -478,9 +479,7 @@
478479 */
479480 static inline void blkg_put(struct blkcg_gq *blkg)
480481 {
481
- WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
482
- if (atomic_dec_and_test(&blkg->refcnt))
483
- call_rcu(&blkg->rcu_head, __blkg_release_rcu);
482
+ percpu_ref_put(&blkg->refcnt);
484483 }
485484
486485 /**
....@@ -515,331 +514,25 @@
515514 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
516515 (p_blkg)->q, false)))
517516
518
-/**
519
- * blk_get_rl - get request_list to use
520
- * @q: request_queue of interest
521
- * @bio: bio which will be attached to the allocated request (may be %NULL)
522
- *
523
- * The caller wants to allocate a request from @q to use for @bio. Find
524
- * the request_list to use and obtain a reference on it. Should be called
525
- * under queue_lock. This function is guaranteed to return non-%NULL
526
- * request_list.
527
- */
528
-static inline struct request_list *blk_get_rl(struct request_queue *q,
529
- struct bio *bio)
517
+bool __blkcg_punt_bio_submit(struct bio *bio);
518
+
519
+static inline bool blkcg_punt_bio_submit(struct bio *bio)
530520 {
531
- struct blkcg *blkcg;
532
- struct blkcg_gq *blkg;
533
-
534
- rcu_read_lock();
535
-
536
- blkcg = bio_blkcg(bio);
537
-
538
- /* bypass blkg lookup and use @q->root_rl directly for root */
539
- if (blkcg == &blkcg_root)
540
- goto root_rl;
541
-
542
- /*
543
- * Try to use blkg->rl. blkg lookup may fail under memory pressure
544
- * or if either the blkcg or queue is going away. Fall back to
545
- * root_rl in such cases.
546
- */
547
- blkg = blkg_lookup(blkcg, q);
548
- if (unlikely(!blkg))
549
- goto root_rl;
550
-
551
- blkg_get(blkg);
552
- rcu_read_unlock();
553
- return &blkg->rl;
554
-root_rl:
555
- rcu_read_unlock();
556
- return &q->root_rl;
557
-}
558
-
559
-/**
560
- * blk_put_rl - put request_list
561
- * @rl: request_list to put
562
- *
563
- * Put the reference acquired by blk_get_rl(). Should be called under
564
- * queue_lock.
565
- */
566
-static inline void blk_put_rl(struct request_list *rl)
567
-{
568
- if (rl->blkg->blkcg != &blkcg_root)
569
- blkg_put(rl->blkg);
570
-}
571
-
572
-/**
573
- * blk_rq_set_rl - associate a request with a request_list
574
- * @rq: request of interest
575
- * @rl: target request_list
576
- *
577
- * Associate @rq with @rl so that accounting and freeing can know the
578
- * request_list @rq came from.
579
- */
580
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
581
-{
582
- rq->rl = rl;
583
-}
584
-
585
-/**
586
- * blk_rq_rl - return the request_list a request came from
587
- * @rq: request of interest
588
- *
589
- * Return the request_list @rq is allocated from.
590
- */
591
-static inline struct request_list *blk_rq_rl(struct request *rq)
592
-{
593
- return rq->rl;
594
-}
595
-
596
-struct request_list *__blk_queue_next_rl(struct request_list *rl,
597
- struct request_queue *q);
598
-/**
599
- * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
600
- *
601
- * Should be used under queue_lock.
602
- */
603
-#define blk_queue_for_each_rl(rl, q) \
604
- for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
605
-
606
-static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
607
-{
608
- int ret;
609
-
610
- ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
611
- if (ret)
612
- return ret;
613
-
614
- atomic64_set(&stat->aux_cnt, 0);
615
- return 0;
616
-}
617
-
618
-static inline void blkg_stat_exit(struct blkg_stat *stat)
619
-{
620
- percpu_counter_destroy(&stat->cpu_cnt);
621
-}
622
-
623
-/**
624
- * blkg_stat_add - add a value to a blkg_stat
625
- * @stat: target blkg_stat
626
- * @val: value to add
627
- *
628
- * Add @val to @stat. The caller must ensure that IRQ on the same CPU
629
- * don't re-enter this function for the same counter.
630
- */
631
-static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
632
-{
633
- percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
634
-}
635
-
636
-/**
637
- * blkg_stat_read - read the current value of a blkg_stat
638
- * @stat: blkg_stat to read
639
- */
640
-static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
641
-{
642
- return percpu_counter_sum_positive(&stat->cpu_cnt);
643
-}
644
-
645
-/**
646
- * blkg_stat_reset - reset a blkg_stat
647
- * @stat: blkg_stat to reset
648
- */
649
-static inline void blkg_stat_reset(struct blkg_stat *stat)
650
-{
651
- percpu_counter_set(&stat->cpu_cnt, 0);
652
- atomic64_set(&stat->aux_cnt, 0);
653
-}
654
-
655
-/**
656
- * blkg_stat_add_aux - add a blkg_stat into another's aux count
657
- * @to: the destination blkg_stat
658
- * @from: the source
659
- *
660
- * Add @from's count including the aux one to @to's aux count.
661
- */
662
-static inline void blkg_stat_add_aux(struct blkg_stat *to,
663
- struct blkg_stat *from)
664
-{
665
- atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
666
- &to->aux_cnt);
667
-}
668
-
669
-static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
670
-{
671
- int i, ret;
672
-
673
- for (i = 0; i < BLKG_RWSTAT_NR; i++) {
674
- ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
675
- if (ret) {
676
- while (--i >= 0)
677
- percpu_counter_destroy(&rwstat->cpu_cnt[i]);
678
- return ret;
679
- }
680
- atomic64_set(&rwstat->aux_cnt[i], 0);
681
- }
682
- return 0;
683
-}
684
-
685
-static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
686
-{
687
- int i;
688
-
689
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
690
- percpu_counter_destroy(&rwstat->cpu_cnt[i]);
691
-}
692
-
693
-/**
694
- * blkg_rwstat_add - add a value to a blkg_rwstat
695
- * @rwstat: target blkg_rwstat
696
- * @op: REQ_OP and flags
697
- * @val: value to add
698
- *
699
- * Add @val to @rwstat. The counters are chosen according to @rw. The
700
- * caller is responsible for synchronizing calls to this function.
701
- */
702
-static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
703
- unsigned int op, uint64_t val)
704
-{
705
- struct percpu_counter *cnt;
706
-
707
- if (op_is_discard(op))
708
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
709
- else if (op_is_write(op))
710
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
521
+ if (bio->bi_opf & REQ_CGROUP_PUNT)
522
+ return __blkcg_punt_bio_submit(bio);
711523 else
712
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
713
-
714
- percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
715
-
716
- if (op_is_sync(op))
717
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
718
- else
719
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
720
-
721
- percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
524
+ return false;
722525 }
723526
724
-/**
725
- * blkg_rwstat_read - read the current values of a blkg_rwstat
726
- * @rwstat: blkg_rwstat to read
727
- *
728
- * Read the current snapshot of @rwstat and return it in the aux counts.
729
- */
730
-static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
527
+static inline void blkcg_bio_issue_init(struct bio *bio)
731528 {
732
- struct blkg_rwstat result;
733
- int i;
734
-
735
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
736
- atomic64_set(&result.aux_cnt[i],
737
- percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
738
- return result;
739
-}
740
-
741
-/**
742
- * blkg_rwstat_total - read the total count of a blkg_rwstat
743
- * @rwstat: blkg_rwstat to read
744
- *
745
- * Return the total count of @rwstat regardless of the IO direction. This
746
- * function can be called without synchronization and takes care of u64
747
- * atomicity.
748
- */
749
-static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
750
-{
751
- struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
752
-
753
- return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
754
- atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
755
-}
756
-
757
-/**
758
- * blkg_rwstat_reset - reset a blkg_rwstat
759
- * @rwstat: blkg_rwstat to reset
760
- */
761
-static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
762
-{
763
- int i;
764
-
765
- for (i = 0; i < BLKG_RWSTAT_NR; i++) {
766
- percpu_counter_set(&rwstat->cpu_cnt[i], 0);
767
- atomic64_set(&rwstat->aux_cnt[i], 0);
768
- }
769
-}
770
-
771
-/**
772
- * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
773
- * @to: the destination blkg_rwstat
774
- * @from: the source
775
- *
776
- * Add @from's count including the aux one to @to's aux count.
777
- */
778
-static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
779
- struct blkg_rwstat *from)
780
-{
781
- u64 sum[BLKG_RWSTAT_NR];
782
- int i;
783
-
784
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
785
- sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
786
-
787
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
788
- atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
789
- &to->aux_cnt[i]);
790
-}
791
-
792
-#ifdef CONFIG_BLK_DEV_THROTTLING
793
-extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
794
- struct bio *bio);
795
-#else
796
-static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
797
- struct bio *bio) { return false; }
798
-#endif
799
-
800
-static inline bool blkcg_bio_issue_check(struct request_queue *q,
801
- struct bio *bio)
802
-{
803
- struct blkcg *blkcg;
804
- struct blkcg_gq *blkg;
805
- bool throtl = false;
806
-
807
- rcu_read_lock();
808
- blkcg = bio_blkcg(bio);
809
-
810
- /* associate blkcg if bio hasn't attached one */
811
- bio_associate_blkcg(bio, &blkcg->css);
812
-
813
- blkg = blkg_lookup(blkcg, q);
814
- if (unlikely(!blkg)) {
815
- spin_lock_irq(q->queue_lock);
816
- blkg = blkg_lookup_create(blkcg, q);
817
- if (IS_ERR(blkg))
818
- blkg = NULL;
819
- spin_unlock_irq(q->queue_lock);
820
- }
821
-
822
- throtl = blk_throtl_bio(q, blkg, bio);
823
-
824
- if (!throtl) {
825
- blkg = blkg ?: q->root_blkg;
826
- /*
827
- * If the bio is flagged with BIO_QUEUE_ENTERED it means this
828
- * is a split bio and we would have already accounted for the
829
- * size of the bio.
830
- */
831
- if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
832
- blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
833
- bio->bi_iter.bi_size);
834
- blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
835
- }
836
-
837
- rcu_read_unlock();
838
- return !throtl;
529
+ bio_issue_init(&bio->bi_issue, bio_sectors(bio));
839530 }
840531
841532 static inline void blkcg_use_delay(struct blkcg_gq *blkg)
842533 {
534
+ if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
535
+ return;
843536 if (atomic_add_return(1, &blkg->use_delay) == 1)
844537 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
845538 }
....@@ -848,6 +541,8 @@
848541 {
849542 int old = atomic_read(&blkg->use_delay);
850543
544
+ if (WARN_ON_ONCE(old < 0))
545
+ return 0;
851546 if (old == 0)
852547 return 0;
853548
....@@ -872,22 +567,57 @@
872567 return 1;
873568 }
874569
570
+/**
571
+ * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
572
+ * @blkg: target blkg
573
+ * @delay: delay duration in nsecs
574
+ *
575
+ * When enabled with this function, the delay is not decayed and must be
576
+ * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
577
+ * blkcg_[un]use_delay() and blkcg_add_delay() usages.
578
+ */
579
+static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
580
+{
581
+ int old = atomic_read(&blkg->use_delay);
582
+
583
+ /* We only want 1 person setting the congestion count for this blkg. */
584
+ if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
585
+ atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
586
+
587
+ atomic64_set(&blkg->delay_nsec, delay);
588
+}
589
+
590
+/**
591
+ * blkcg_clear_delay - Disable allocator delay mechanism
592
+ * @blkg: target blkg
593
+ *
594
+ * Disable use_delay mechanism. See blkcg_set_delay().
595
+ */
875596 static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
876597 {
877598 int old = atomic_read(&blkg->use_delay);
878
- if (!old)
879
- return;
599
+
880600 /* We only want 1 person clearing the congestion count for this blkg. */
881
- while (old) {
882
- int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
883
- if (cur == old) {
884
- atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
885
- break;
886
- }
887
- old = cur;
888
- }
601
+ if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
602
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
889603 }
890604
605
+/**
606
+ * blk_cgroup_mergeable - Determine whether to allow or disallow merges
607
+ * @rq: request to merge into
608
+ * @bio: bio to merge
609
+ *
610
+ * @bio and @rq should belong to the same cgroup and their issue_as_root should
611
+ * match. The latter is necessary as we don't want to throttle e.g. a metadata
612
+ * update because it happens to be next to a regular IO.
613
+ */
614
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
615
+{
616
+ return rq->bio->bi_blkg == bio->bi_blkg &&
617
+ bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
618
+}
619
+
620
+void blk_cgroup_bio_start(struct bio *bio);
891621 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
892622 void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
893623 void blkcg_maybe_throttle_current(void);
....@@ -921,7 +651,6 @@
921651 static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
922652 { return NULL; }
923653 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
924
-static inline void blkcg_drain_queue(struct request_queue *q) { }
925654 static inline void blkcg_exit_queue(struct request_queue *q) { }
926655 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
927656 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
....@@ -930,6 +659,7 @@
930659 static inline void blkcg_deactivate_policy(struct request_queue *q,
931660 const struct blkcg_policy *pol) { }
932661
662
+static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
933663 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
934664
935665 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
....@@ -939,14 +669,10 @@
939669 static inline void blkg_get(struct blkcg_gq *blkg) { }
940670 static inline void blkg_put(struct blkcg_gq *blkg) { }
941671
942
-static inline struct request_list *blk_get_rl(struct request_queue *q,
943
- struct bio *bio) { return &q->root_rl; }
944
-static inline void blk_put_rl(struct request_list *rl) { }
945
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
946
-static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
947
-
948
-static inline bool blkcg_bio_issue_check(struct request_queue *q,
949
- struct bio *bio) { return true; }
672
+static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
673
+static inline void blkcg_bio_issue_init(struct bio *bio) { }
674
+static inline void blk_cgroup_bio_start(struct bio *bio) { }
675
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
950676
951677 #define blk_queue_for_each_rl(rl, q) \
952678 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)