.. | .. |
---|
14 | 14 | * Nauman Rafique <nauman@google.com> |
---|
15 | 15 | */ |
---|
16 | 16 | |
---|
17 | | -#include <linux/kthread.h> |
---|
| 17 | +#include <linux/cgroup.h> |
---|
| 18 | +#include <linux/percpu.h> |
---|
18 | 19 | #include <linux/percpu_counter.h> |
---|
| 20 | +#include <linux/u64_stats_sync.h> |
---|
19 | 21 | #include <linux/seq_file.h> |
---|
20 | 22 | #include <linux/radix-tree.h> |
---|
21 | 23 | #include <linux/blkdev.h> |
---|
22 | 24 | #include <linux/atomic.h> |
---|
23 | 25 | #include <linux/kthread.h> |
---|
| 26 | +#include <linux/fs.h> |
---|
| 27 | +#ifndef __GENKSYMS__ |
---|
| 28 | +#include <linux/blk-mq.h> |
---|
| 29 | +#endif |
---|
24 | 30 | |
---|
25 | 31 | /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ |
---|
26 | 32 | #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) |
---|
.. | .. |
---|
30 | 36 | |
---|
31 | 37 | #ifdef CONFIG_BLK_CGROUP |
---|
32 | 38 | |
---|
33 | | -enum blkg_rwstat_type { |
---|
34 | | - BLKG_RWSTAT_READ, |
---|
35 | | - BLKG_RWSTAT_WRITE, |
---|
36 | | - BLKG_RWSTAT_SYNC, |
---|
37 | | - BLKG_RWSTAT_ASYNC, |
---|
38 | | - BLKG_RWSTAT_DISCARD, |
---|
| 39 | +enum blkg_iostat_type { |
---|
| 40 | + BLKG_IOSTAT_READ, |
---|
| 41 | + BLKG_IOSTAT_WRITE, |
---|
| 42 | + BLKG_IOSTAT_DISCARD, |
---|
39 | 43 | |
---|
40 | | - BLKG_RWSTAT_NR, |
---|
41 | | - BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, |
---|
| 44 | + BLKG_IOSTAT_NR, |
---|
42 | 45 | }; |
---|
43 | 46 | |
---|
44 | 47 | struct blkcg_gq; |
---|
.. | .. |
---|
46 | 49 | struct blkcg { |
---|
47 | 50 | struct cgroup_subsys_state css; |
---|
48 | 51 | spinlock_t lock; |
---|
| 52 | + refcount_t online_pin; |
---|
49 | 53 | |
---|
50 | 54 | struct radix_tree_root blkg_tree; |
---|
51 | 55 | struct blkcg_gq __rcu *blkg_hint; |
---|
.. | .. |
---|
56 | 60 | struct list_head all_blkcgs_node; |
---|
57 | 61 | #ifdef CONFIG_CGROUP_WRITEBACK |
---|
58 | 62 | struct list_head cgwb_list; |
---|
59 | | - refcount_t cgwb_refcnt; |
---|
60 | 63 | #endif |
---|
61 | 64 | }; |
---|
62 | 65 | |
---|
63 | | -/* |
---|
64 | | - * blkg_[rw]stat->aux_cnt is excluded for local stats but included for |
---|
65 | | - * recursive. Used to carry stats of dead children, and, for blkg_rwstat, |
---|
66 | | - * to carry result values from read and sum operations. |
---|
67 | | - */ |
---|
68 | | -struct blkg_stat { |
---|
69 | | - struct percpu_counter cpu_cnt; |
---|
70 | | - atomic64_t aux_cnt; |
---|
| 66 | +struct blkg_iostat { |
---|
| 67 | + u64 bytes[BLKG_IOSTAT_NR]; |
---|
| 68 | + u64 ios[BLKG_IOSTAT_NR]; |
---|
71 | 69 | }; |
---|
72 | 70 | |
---|
73 | | -struct blkg_rwstat { |
---|
74 | | - struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; |
---|
75 | | - atomic64_t aux_cnt[BLKG_RWSTAT_NR]; |
---|
| 71 | +struct blkg_iostat_set { |
---|
| 72 | + struct u64_stats_sync sync; |
---|
| 73 | + struct blkg_iostat cur; |
---|
| 74 | + struct blkg_iostat last; |
---|
76 | 75 | }; |
---|
77 | 76 | |
---|
78 | 77 | /* |
---|
.. | .. |
---|
113 | 112 | struct hlist_node blkcg_node; |
---|
114 | 113 | struct blkcg *blkcg; |
---|
115 | 114 | |
---|
116 | | - /* |
---|
117 | | - * Each blkg gets congested separately and the congestion state is |
---|
118 | | - * propagated to the matching bdi_writeback_congested. |
---|
119 | | - */ |
---|
120 | | - struct bdi_writeback_congested *wb_congested; |
---|
121 | | - |
---|
122 | 115 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ |
---|
123 | 116 | struct blkcg_gq *parent; |
---|
124 | 117 | |
---|
125 | | - /* request allocation list for this blkcg-q pair */ |
---|
126 | | - struct request_list rl; |
---|
127 | | - |
---|
128 | 118 | /* reference count */ |
---|
129 | | - atomic_t refcnt; |
---|
| 119 | + struct percpu_ref refcnt; |
---|
130 | 120 | |
---|
131 | 121 | /* is this blkg online? protected by both blkcg and q locks */ |
---|
132 | 122 | bool online; |
---|
133 | 123 | |
---|
134 | | - struct blkg_rwstat stat_bytes; |
---|
135 | | - struct blkg_rwstat stat_ios; |
---|
| 124 | + struct blkg_iostat_set __percpu *iostat_cpu; |
---|
| 125 | + struct blkg_iostat_set iostat; |
---|
136 | 126 | |
---|
137 | 127 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
---|
138 | 128 | |
---|
139 | | - struct rcu_head rcu_head; |
---|
| 129 | + spinlock_t async_bio_lock; |
---|
| 130 | + struct bio_list async_bios; |
---|
| 131 | + struct work_struct async_bio_work; |
---|
140 | 132 | |
---|
141 | 133 | atomic_t use_delay; |
---|
142 | 134 | atomic64_t delay_nsec; |
---|
143 | 135 | atomic64_t delay_start; |
---|
144 | 136 | u64 last_delay; |
---|
145 | 137 | int last_use; |
---|
| 138 | + |
---|
| 139 | + struct rcu_head rcu_head; |
---|
146 | 140 | }; |
---|
147 | 141 | |
---|
148 | 142 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
---|
149 | 143 | typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); |
---|
150 | 144 | typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); |
---|
151 | 145 | typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); |
---|
152 | | -typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node); |
---|
| 146 | +typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, |
---|
| 147 | + struct request_queue *q, struct blkcg *blkcg); |
---|
153 | 148 | typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); |
---|
154 | 149 | typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); |
---|
155 | 150 | typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); |
---|
.. | .. |
---|
181 | 176 | |
---|
182 | 177 | extern struct blkcg blkcg_root; |
---|
183 | 178 | extern struct cgroup_subsys_state * const blkcg_root_css; |
---|
| 179 | +extern bool blkcg_debug_stats; |
---|
184 | 180 | |
---|
185 | 181 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, |
---|
186 | 182 | struct request_queue *q, bool update_hint); |
---|
187 | | -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
---|
188 | | - struct request_queue *q); |
---|
189 | 183 | int blkcg_init_queue(struct request_queue *q); |
---|
190 | | -void blkcg_drain_queue(struct request_queue *q); |
---|
191 | 184 | void blkcg_exit_queue(struct request_queue *q); |
---|
192 | 185 | |
---|
193 | 186 | /* Blkio controller policy registration */ |
---|
.. | .. |
---|
205 | 198 | const struct blkcg_policy *pol, int data, |
---|
206 | 199 | bool show_total); |
---|
207 | 200 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); |
---|
208 | | -u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
---|
209 | | - const struct blkg_rwstat *rwstat); |
---|
210 | | -u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); |
---|
211 | | -u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
---|
212 | | - int off); |
---|
213 | | -int blkg_print_stat_bytes(struct seq_file *sf, void *v); |
---|
214 | | -int blkg_print_stat_ios(struct seq_file *sf, void *v); |
---|
215 | | -int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); |
---|
216 | | -int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); |
---|
217 | | - |
---|
218 | | -u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, |
---|
219 | | - struct blkcg_policy *pol, int off); |
---|
220 | | -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, |
---|
221 | | - struct blkcg_policy *pol, int off); |
---|
222 | 201 | |
---|
223 | 202 | struct blkg_conf_ctx { |
---|
224 | 203 | struct gendisk *disk; |
---|
.. | .. |
---|
226 | 205 | char *body; |
---|
227 | 206 | }; |
---|
228 | 207 | |
---|
| 208 | +struct gendisk *blkcg_conf_get_disk(char **inputp); |
---|
229 | 209 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
---|
230 | 210 | char *input, struct blkg_conf_ctx *ctx); |
---|
231 | 211 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
---|
232 | 212 | |
---|
| 213 | +/** |
---|
| 214 | + * blkcg_css - find the current css |
---|
| 215 | + * |
---|
| 216 | + * Find the css associated with either the kthread or the current task. |
---|
| 217 | + * This may return a dying css, so it is up to the caller to use tryget logic |
---|
| 218 | + * to confirm it is alive and well. |
---|
| 219 | + */ |
---|
| 220 | +static inline struct cgroup_subsys_state *blkcg_css(void) |
---|
| 221 | +{ |
---|
| 222 | + struct cgroup_subsys_state *css; |
---|
| 223 | + |
---|
| 224 | + css = kthread_blkcg(); |
---|
| 225 | + if (css) |
---|
| 226 | + return css; |
---|
| 227 | + return task_css(current, io_cgrp_id); |
---|
| 228 | +} |
---|
233 | 229 | |
---|
234 | 230 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) |
---|
235 | 231 | { |
---|
236 | 232 | return css ? container_of(css, struct blkcg, css) : NULL; |
---|
237 | 233 | } |
---|
238 | 234 | |
---|
| 235 | +/** |
---|
| 236 | + * __bio_blkcg - internal, inconsistent version to get blkcg |
---|
| 237 | + * |
---|
| 238 | + * DO NOT USE. |
---|
| 239 | + * This function is inconsistent and consequently is dangerous to use. The |
---|
| 240 | + * first part of the function returns a blkcg where a reference is owned by the |
---|
| 241 | + * bio. This means it does not need to be rcu protected as it cannot go away |
---|
| 242 | + * with the bio owning a reference to it. However, the latter potentially gets |
---|
| 243 | + * it from task_css(). This can race against task migration and the cgroup |
---|
| 244 | + * dying. It is also semantically different as it must be called rcu protected |
---|
| 245 | + * and is susceptible to failure when trying to get a reference to it. |
---|
| 246 | + * Therefore, it is not ok to assume that *_get() will always succeed on the |
---|
| 247 | + * blkcg returned here. |
---|
| 248 | + */ |
---|
| 249 | +static inline struct blkcg *__bio_blkcg(struct bio *bio) |
---|
| 250 | +{ |
---|
| 251 | + if (bio && bio->bi_blkg) |
---|
| 252 | + return bio->bi_blkg->blkcg; |
---|
| 253 | + return css_to_blkcg(blkcg_css()); |
---|
| 254 | +} |
---|
| 255 | + |
---|
| 256 | +/** |
---|
| 257 | + * bio_blkcg - grab the blkcg associated with a bio |
---|
| 258 | + * @bio: target bio |
---|
| 259 | + * |
---|
| 260 | + * This returns the blkcg associated with a bio, %NULL if not associated. |
---|
| 261 | + * Callers are expected to either handle %NULL or know association has been |
---|
| 262 | + * done prior to calling this. |
---|
| 263 | + */ |
---|
239 | 264 | static inline struct blkcg *bio_blkcg(struct bio *bio) |
---|
240 | 265 | { |
---|
241 | | - struct cgroup_subsys_state *css; |
---|
242 | | - |
---|
243 | | - if (bio && bio->bi_css) |
---|
244 | | - return css_to_blkcg(bio->bi_css); |
---|
245 | | - css = kthread_blkcg(); |
---|
246 | | - if (css) |
---|
247 | | - return css_to_blkcg(css); |
---|
248 | | - return css_to_blkcg(task_css(current, io_cgrp_id)); |
---|
| 266 | + if (bio && bio->bi_blkg) |
---|
| 267 | + return bio->bi_blkg->blkcg; |
---|
| 268 | + return NULL; |
---|
249 | 269 | } |
---|
250 | 270 | |
---|
251 | 271 | static inline bool blk_cgroup_congested(void) |
---|
.. | .. |
---|
328 | 348 | * @q: request_queue of interest |
---|
329 | 349 | * |
---|
330 | 350 | * Lookup blkg for the @blkcg - @q pair. This function should be called |
---|
331 | | - * under RCU read lock and is guaranteed to return %NULL if @q is bypassing |
---|
332 | | - * - see blk_queue_bypass_start() for details. |
---|
| 351 | + * under RCU read lock. |
---|
333 | 352 | */ |
---|
334 | 353 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, |
---|
335 | 354 | struct request_queue *q) |
---|
336 | 355 | { |
---|
337 | 356 | WARN_ON_ONCE(!rcu_read_lock_held()); |
---|
338 | | - |
---|
339 | | - if (unlikely(blk_queue_bypass(q))) |
---|
340 | | - return NULL; |
---|
341 | 357 | return __blkg_lookup(blkcg, q, false); |
---|
342 | 358 | } |
---|
343 | 359 | |
---|
.. | .. |
---|
389 | 405 | |
---|
390 | 406 | extern void blkcg_destroy_blkgs(struct blkcg *blkcg); |
---|
391 | 407 | |
---|
392 | | -#ifdef CONFIG_CGROUP_WRITEBACK |
---|
393 | | - |
---|
394 | 408 | /** |
---|
395 | | - * blkcg_cgwb_get - get a reference for blkcg->cgwb_list |
---|
| 409 | + * blkcg_pin_online - pin online state |
---|
396 | 410 | * @blkcg: blkcg of interest |
---|
397 | 411 | * |
---|
398 | | - * This is used to track the number of active wb's related to a blkcg. |
---|
| 412 | + * While pinned, a blkcg is kept online. This is primarily used to |
---|
| 413 | + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline |
---|
| 414 | + * while an associated cgwb is still active. |
---|
399 | 415 | */ |
---|
400 | | -static inline void blkcg_cgwb_get(struct blkcg *blkcg) |
---|
| 416 | +static inline void blkcg_pin_online(struct blkcg *blkcg) |
---|
401 | 417 | { |
---|
402 | | - refcount_inc(&blkcg->cgwb_refcnt); |
---|
| 418 | + refcount_inc(&blkcg->online_pin); |
---|
403 | 419 | } |
---|
404 | 420 | |
---|
405 | 421 | /** |
---|
406 | | - * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list |
---|
| 422 | + * blkcg_unpin_online - unpin online state |
---|
407 | 423 | * @blkcg: blkcg of interest |
---|
408 | 424 | * |
---|
409 | | - * This is used to track the number of active wb's related to a blkcg. |
---|
410 | | - * When this count goes to zero, all active wb has finished so the |
---|
| 425 | + * This is primarily used to impedance-match blkg and cgwb lifetimes so |
---|
| 426 | + * that blkg doesn't go offline while an associated cgwb is still active. |
---|
| 427 | + * When this count goes to zero, all active cgwbs have finished so the |
---|
411 | 428 | * blkcg can continue destruction by calling blkcg_destroy_blkgs(). |
---|
412 | | - * This work may occur in cgwb_release_workfn() on the cgwb_release |
---|
413 | | - * workqueue. |
---|
414 | 429 | */ |
---|
415 | | -static inline void blkcg_cgwb_put(struct blkcg *blkcg) |
---|
| 430 | +static inline void blkcg_unpin_online(struct blkcg *blkcg) |
---|
416 | 431 | { |
---|
417 | | - if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) |
---|
| 432 | + do { |
---|
| 433 | + if (!refcount_dec_and_test(&blkcg->online_pin)) |
---|
| 434 | + break; |
---|
418 | 435 | blkcg_destroy_blkgs(blkcg); |
---|
| 436 | + blkcg = blkcg_parent(blkcg); |
---|
| 437 | + } while (blkcg); |
---|
419 | 438 | } |
---|
420 | | - |
---|
421 | | -#else |
---|
422 | | - |
---|
423 | | -static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } |
---|
424 | | - |
---|
425 | | -static inline void blkcg_cgwb_put(struct blkcg *blkcg) |
---|
426 | | -{ |
---|
427 | | - /* wb isn't being accounted, so trigger destruction right away */ |
---|
428 | | - blkcg_destroy_blkgs(blkcg); |
---|
429 | | -} |
---|
430 | | - |
---|
431 | | -#endif |
---|
432 | 439 | |
---|
433 | 440 | /** |
---|
434 | 441 | * blkg_path - format cgroup path of blkg |
---|
.. | .. |
---|
451 | 458 | */ |
---|
452 | 459 | static inline void blkg_get(struct blkcg_gq *blkg) |
---|
453 | 460 | { |
---|
454 | | - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); |
---|
455 | | - atomic_inc(&blkg->refcnt); |
---|
| 461 | + percpu_ref_get(&blkg->refcnt); |
---|
456 | 462 | } |
---|
457 | 463 | |
---|
458 | 464 | /** |
---|
459 | | - * blkg_try_get - try and get a blkg reference |
---|
| 465 | + * blkg_tryget - try and get a blkg reference |
---|
460 | 466 | * @blkg: blkg to get |
---|
461 | 467 | * |
---|
462 | 468 | * This is for use when doing an RCU lookup of the blkg. We may be in the midst |
---|
463 | 469 | * of freeing this blkg, so we can only use it if the refcnt is not zero. |
---|
464 | 470 | */ |
---|
465 | | -static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) |
---|
| 471 | +static inline bool blkg_tryget(struct blkcg_gq *blkg) |
---|
466 | 472 | { |
---|
467 | | - if (atomic_inc_not_zero(&blkg->refcnt)) |
---|
468 | | - return blkg; |
---|
469 | | - return NULL; |
---|
| 473 | + return blkg && percpu_ref_tryget(&blkg->refcnt); |
---|
470 | 474 | } |
---|
471 | | - |
---|
472 | | - |
---|
473 | | -void __blkg_release_rcu(struct rcu_head *rcu); |
---|
474 | 475 | |
---|
475 | 476 | /** |
---|
476 | 477 | * blkg_put - put a blkg reference |
---|
.. | .. |
---|
478 | 479 | */ |
---|
479 | 480 | static inline void blkg_put(struct blkcg_gq *blkg) |
---|
480 | 481 | { |
---|
481 | | - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); |
---|
482 | | - if (atomic_dec_and_test(&blkg->refcnt)) |
---|
483 | | - call_rcu(&blkg->rcu_head, __blkg_release_rcu); |
---|
| 482 | + percpu_ref_put(&blkg->refcnt); |
---|
484 | 483 | } |
---|
485 | 484 | |
---|
486 | 485 | /** |
---|
.. | .. |
---|
515 | 514 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ |
---|
516 | 515 | (p_blkg)->q, false))) |
---|
517 | 516 | |
---|
518 | | -/** |
---|
519 | | - * blk_get_rl - get request_list to use |
---|
520 | | - * @q: request_queue of interest |
---|
521 | | - * @bio: bio which will be attached to the allocated request (may be %NULL) |
---|
522 | | - * |
---|
523 | | - * The caller wants to allocate a request from @q to use for @bio. Find |
---|
524 | | - * the request_list to use and obtain a reference on it. Should be called |
---|
525 | | - * under queue_lock. This function is guaranteed to return non-%NULL |
---|
526 | | - * request_list. |
---|
527 | | - */ |
---|
528 | | -static inline struct request_list *blk_get_rl(struct request_queue *q, |
---|
529 | | - struct bio *bio) |
---|
| 517 | +bool __blkcg_punt_bio_submit(struct bio *bio); |
---|
| 518 | + |
---|
| 519 | +static inline bool blkcg_punt_bio_submit(struct bio *bio) |
---|
530 | 520 | { |
---|
531 | | - struct blkcg *blkcg; |
---|
532 | | - struct blkcg_gq *blkg; |
---|
533 | | - |
---|
534 | | - rcu_read_lock(); |
---|
535 | | - |
---|
536 | | - blkcg = bio_blkcg(bio); |
---|
537 | | - |
---|
538 | | - /* bypass blkg lookup and use @q->root_rl directly for root */ |
---|
539 | | - if (blkcg == &blkcg_root) |
---|
540 | | - goto root_rl; |
---|
541 | | - |
---|
542 | | - /* |
---|
543 | | - * Try to use blkg->rl. blkg lookup may fail under memory pressure |
---|
544 | | - * or if either the blkcg or queue is going away. Fall back to |
---|
545 | | - * root_rl in such cases. |
---|
546 | | - */ |
---|
547 | | - blkg = blkg_lookup(blkcg, q); |
---|
548 | | - if (unlikely(!blkg)) |
---|
549 | | - goto root_rl; |
---|
550 | | - |
---|
551 | | - blkg_get(blkg); |
---|
552 | | - rcu_read_unlock(); |
---|
553 | | - return &blkg->rl; |
---|
554 | | -root_rl: |
---|
555 | | - rcu_read_unlock(); |
---|
556 | | - return &q->root_rl; |
---|
557 | | -} |
---|
558 | | - |
---|
559 | | -/** |
---|
560 | | - * blk_put_rl - put request_list |
---|
561 | | - * @rl: request_list to put |
---|
562 | | - * |
---|
563 | | - * Put the reference acquired by blk_get_rl(). Should be called under |
---|
564 | | - * queue_lock. |
---|
565 | | - */ |
---|
566 | | -static inline void blk_put_rl(struct request_list *rl) |
---|
567 | | -{ |
---|
568 | | - if (rl->blkg->blkcg != &blkcg_root) |
---|
569 | | - blkg_put(rl->blkg); |
---|
570 | | -} |
---|
571 | | - |
---|
572 | | -/** |
---|
573 | | - * blk_rq_set_rl - associate a request with a request_list |
---|
574 | | - * @rq: request of interest |
---|
575 | | - * @rl: target request_list |
---|
576 | | - * |
---|
577 | | - * Associate @rq with @rl so that accounting and freeing can know the |
---|
578 | | - * request_list @rq came from. |
---|
579 | | - */ |
---|
580 | | -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) |
---|
581 | | -{ |
---|
582 | | - rq->rl = rl; |
---|
583 | | -} |
---|
584 | | - |
---|
585 | | -/** |
---|
586 | | - * blk_rq_rl - return the request_list a request came from |
---|
587 | | - * @rq: request of interest |
---|
588 | | - * |
---|
589 | | - * Return the request_list @rq is allocated from. |
---|
590 | | - */ |
---|
591 | | -static inline struct request_list *blk_rq_rl(struct request *rq) |
---|
592 | | -{ |
---|
593 | | - return rq->rl; |
---|
594 | | -} |
---|
595 | | - |
---|
596 | | -struct request_list *__blk_queue_next_rl(struct request_list *rl, |
---|
597 | | - struct request_queue *q); |
---|
598 | | -/** |
---|
599 | | - * blk_queue_for_each_rl - iterate through all request_lists of a request_queue |
---|
600 | | - * |
---|
601 | | - * Should be used under queue_lock. |
---|
602 | | - */ |
---|
603 | | -#define blk_queue_for_each_rl(rl, q) \ |
---|
604 | | - for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) |
---|
605 | | - |
---|
606 | | -static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) |
---|
607 | | -{ |
---|
608 | | - int ret; |
---|
609 | | - |
---|
610 | | - ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); |
---|
611 | | - if (ret) |
---|
612 | | - return ret; |
---|
613 | | - |
---|
614 | | - atomic64_set(&stat->aux_cnt, 0); |
---|
615 | | - return 0; |
---|
616 | | -} |
---|
617 | | - |
---|
618 | | -static inline void blkg_stat_exit(struct blkg_stat *stat) |
---|
619 | | -{ |
---|
620 | | - percpu_counter_destroy(&stat->cpu_cnt); |
---|
621 | | -} |
---|
622 | | - |
---|
623 | | -/** |
---|
624 | | - * blkg_stat_add - add a value to a blkg_stat |
---|
625 | | - * @stat: target blkg_stat |
---|
626 | | - * @val: value to add |
---|
627 | | - * |
---|
628 | | - * Add @val to @stat. The caller must ensure that IRQ on the same CPU |
---|
629 | | - * don't re-enter this function for the same counter. |
---|
630 | | - */ |
---|
631 | | -static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) |
---|
632 | | -{ |
---|
633 | | - percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); |
---|
634 | | -} |
---|
635 | | - |
---|
636 | | -/** |
---|
637 | | - * blkg_stat_read - read the current value of a blkg_stat |
---|
638 | | - * @stat: blkg_stat to read |
---|
639 | | - */ |
---|
640 | | -static inline uint64_t blkg_stat_read(struct blkg_stat *stat) |
---|
641 | | -{ |
---|
642 | | - return percpu_counter_sum_positive(&stat->cpu_cnt); |
---|
643 | | -} |
---|
644 | | - |
---|
645 | | -/** |
---|
646 | | - * blkg_stat_reset - reset a blkg_stat |
---|
647 | | - * @stat: blkg_stat to reset |
---|
648 | | - */ |
---|
649 | | -static inline void blkg_stat_reset(struct blkg_stat *stat) |
---|
650 | | -{ |
---|
651 | | - percpu_counter_set(&stat->cpu_cnt, 0); |
---|
652 | | - atomic64_set(&stat->aux_cnt, 0); |
---|
653 | | -} |
---|
654 | | - |
---|
655 | | -/** |
---|
656 | | - * blkg_stat_add_aux - add a blkg_stat into another's aux count |
---|
657 | | - * @to: the destination blkg_stat |
---|
658 | | - * @from: the source |
---|
659 | | - * |
---|
660 | | - * Add @from's count including the aux one to @to's aux count. |
---|
661 | | - */ |
---|
662 | | -static inline void blkg_stat_add_aux(struct blkg_stat *to, |
---|
663 | | - struct blkg_stat *from) |
---|
664 | | -{ |
---|
665 | | - atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), |
---|
666 | | - &to->aux_cnt); |
---|
667 | | -} |
---|
668 | | - |
---|
669 | | -static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) |
---|
670 | | -{ |
---|
671 | | - int i, ret; |
---|
672 | | - |
---|
673 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) { |
---|
674 | | - ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); |
---|
675 | | - if (ret) { |
---|
676 | | - while (--i >= 0) |
---|
677 | | - percpu_counter_destroy(&rwstat->cpu_cnt[i]); |
---|
678 | | - return ret; |
---|
679 | | - } |
---|
680 | | - atomic64_set(&rwstat->aux_cnt[i], 0); |
---|
681 | | - } |
---|
682 | | - return 0; |
---|
683 | | -} |
---|
684 | | - |
---|
685 | | -static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) |
---|
686 | | -{ |
---|
687 | | - int i; |
---|
688 | | - |
---|
689 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) |
---|
690 | | - percpu_counter_destroy(&rwstat->cpu_cnt[i]); |
---|
691 | | -} |
---|
692 | | - |
---|
693 | | -/** |
---|
694 | | - * blkg_rwstat_add - add a value to a blkg_rwstat |
---|
695 | | - * @rwstat: target blkg_rwstat |
---|
696 | | - * @op: REQ_OP and flags |
---|
697 | | - * @val: value to add |
---|
698 | | - * |
---|
699 | | - * Add @val to @rwstat. The counters are chosen according to @rw. The |
---|
700 | | - * caller is responsible for synchronizing calls to this function. |
---|
701 | | - */ |
---|
702 | | -static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, |
---|
703 | | - unsigned int op, uint64_t val) |
---|
704 | | -{ |
---|
705 | | - struct percpu_counter *cnt; |
---|
706 | | - |
---|
707 | | - if (op_is_discard(op)) |
---|
708 | | - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; |
---|
709 | | - else if (op_is_write(op)) |
---|
710 | | - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; |
---|
| 521 | + if (bio->bi_opf & REQ_CGROUP_PUNT) |
---|
| 522 | + return __blkcg_punt_bio_submit(bio); |
---|
711 | 523 | else |
---|
712 | | - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; |
---|
713 | | - |
---|
714 | | - percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); |
---|
715 | | - |
---|
716 | | - if (op_is_sync(op)) |
---|
717 | | - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; |
---|
718 | | - else |
---|
719 | | - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; |
---|
720 | | - |
---|
721 | | - percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); |
---|
| 524 | + return false; |
---|
722 | 525 | } |
---|
723 | 526 | |
---|
724 | | -/** |
---|
725 | | - * blkg_rwstat_read - read the current values of a blkg_rwstat |
---|
726 | | - * @rwstat: blkg_rwstat to read |
---|
727 | | - * |
---|
728 | | - * Read the current snapshot of @rwstat and return it in the aux counts. |
---|
729 | | - */ |
---|
730 | | -static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) |
---|
| 527 | +static inline void blkcg_bio_issue_init(struct bio *bio) |
---|
731 | 528 | { |
---|
732 | | - struct blkg_rwstat result; |
---|
733 | | - int i; |
---|
734 | | - |
---|
735 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) |
---|
736 | | - atomic64_set(&result.aux_cnt[i], |
---|
737 | | - percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); |
---|
738 | | - return result; |
---|
739 | | -} |
---|
740 | | - |
---|
741 | | -/** |
---|
742 | | - * blkg_rwstat_total - read the total count of a blkg_rwstat |
---|
743 | | - * @rwstat: blkg_rwstat to read |
---|
744 | | - * |
---|
745 | | - * Return the total count of @rwstat regardless of the IO direction. This |
---|
746 | | - * function can be called without synchronization and takes care of u64 |
---|
747 | | - * atomicity. |
---|
748 | | - */ |
---|
749 | | -static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) |
---|
750 | | -{ |
---|
751 | | - struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); |
---|
752 | | - |
---|
753 | | - return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + |
---|
754 | | - atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); |
---|
755 | | -} |
---|
756 | | - |
---|
757 | | -/** |
---|
758 | | - * blkg_rwstat_reset - reset a blkg_rwstat |
---|
759 | | - * @rwstat: blkg_rwstat to reset |
---|
760 | | - */ |
---|
761 | | -static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) |
---|
762 | | -{ |
---|
763 | | - int i; |
---|
764 | | - |
---|
765 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) { |
---|
766 | | - percpu_counter_set(&rwstat->cpu_cnt[i], 0); |
---|
767 | | - atomic64_set(&rwstat->aux_cnt[i], 0); |
---|
768 | | - } |
---|
769 | | -} |
---|
770 | | - |
---|
771 | | -/** |
---|
772 | | - * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count |
---|
773 | | - * @to: the destination blkg_rwstat |
---|
774 | | - * @from: the source |
---|
775 | | - * |
---|
776 | | - * Add @from's count including the aux one to @to's aux count. |
---|
777 | | - */ |
---|
778 | | -static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, |
---|
779 | | - struct blkg_rwstat *from) |
---|
780 | | -{ |
---|
781 | | - u64 sum[BLKG_RWSTAT_NR]; |
---|
782 | | - int i; |
---|
783 | | - |
---|
784 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) |
---|
785 | | - sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); |
---|
786 | | - |
---|
787 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) |
---|
788 | | - atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), |
---|
789 | | - &to->aux_cnt[i]); |
---|
790 | | -} |
---|
791 | | - |
---|
792 | | -#ifdef CONFIG_BLK_DEV_THROTTLING |
---|
793 | | -extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, |
---|
794 | | - struct bio *bio); |
---|
795 | | -#else |
---|
796 | | -static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, |
---|
797 | | - struct bio *bio) { return false; } |
---|
798 | | -#endif |
---|
799 | | - |
---|
800 | | -static inline bool blkcg_bio_issue_check(struct request_queue *q, |
---|
801 | | - struct bio *bio) |
---|
802 | | -{ |
---|
803 | | - struct blkcg *blkcg; |
---|
804 | | - struct blkcg_gq *blkg; |
---|
805 | | - bool throtl = false; |
---|
806 | | - |
---|
807 | | - rcu_read_lock(); |
---|
808 | | - blkcg = bio_blkcg(bio); |
---|
809 | | - |
---|
810 | | - /* associate blkcg if bio hasn't attached one */ |
---|
811 | | - bio_associate_blkcg(bio, &blkcg->css); |
---|
812 | | - |
---|
813 | | - blkg = blkg_lookup(blkcg, q); |
---|
814 | | - if (unlikely(!blkg)) { |
---|
815 | | - spin_lock_irq(q->queue_lock); |
---|
816 | | - blkg = blkg_lookup_create(blkcg, q); |
---|
817 | | - if (IS_ERR(blkg)) |
---|
818 | | - blkg = NULL; |
---|
819 | | - spin_unlock_irq(q->queue_lock); |
---|
820 | | - } |
---|
821 | | - |
---|
822 | | - throtl = blk_throtl_bio(q, blkg, bio); |
---|
823 | | - |
---|
824 | | - if (!throtl) { |
---|
825 | | - blkg = blkg ?: q->root_blkg; |
---|
826 | | - /* |
---|
827 | | - * If the bio is flagged with BIO_QUEUE_ENTERED it means this |
---|
828 | | - * is a split bio and we would have already accounted for the |
---|
829 | | - * size of the bio. |
---|
830 | | - */ |
---|
831 | | - if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) |
---|
832 | | - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, |
---|
833 | | - bio->bi_iter.bi_size); |
---|
834 | | - blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); |
---|
835 | | - } |
---|
836 | | - |
---|
837 | | - rcu_read_unlock(); |
---|
838 | | - return !throtl; |
---|
| 529 | + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); |
---|
839 | 530 | } |
---|
840 | 531 | |
---|
841 | 532 | static inline void blkcg_use_delay(struct blkcg_gq *blkg) |
---|
842 | 533 | { |
---|
| 534 | + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) |
---|
| 535 | + return; |
---|
843 | 536 | if (atomic_add_return(1, &blkg->use_delay) == 1) |
---|
844 | 537 | atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); |
---|
845 | 538 | } |
---|
.. | .. |
---|
848 | 541 | { |
---|
849 | 542 | int old = atomic_read(&blkg->use_delay); |
---|
850 | 543 | |
---|
| 544 | + if (WARN_ON_ONCE(old < 0)) |
---|
| 545 | + return 0; |
---|
851 | 546 | if (old == 0) |
---|
852 | 547 | return 0; |
---|
853 | 548 | |
---|
.. | .. |
---|
872 | 567 | return 1; |
---|
873 | 568 | } |
---|
874 | 569 | |
---|
| 570 | +/** |
---|
| 571 | + * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount |
---|
| 572 | + * @blkg: target blkg |
---|
| 573 | + * @delay: delay duration in nsecs |
---|
| 574 | + * |
---|
| 575 | + * When enabled with this function, the delay is not decayed and must be |
---|
| 576 | + * explicitly cleared with blkcg_clear_delay(). Must not be mixed with |
---|
| 577 | + * blkcg_[un]use_delay() and blkcg_add_delay() usages. |
---|
| 578 | + */ |
---|
| 579 | +static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) |
---|
| 580 | +{ |
---|
| 581 | + int old = atomic_read(&blkg->use_delay); |
---|
| 582 | + |
---|
| 583 | + /* We only want 1 person setting the congestion count for this blkg. */ |
---|
| 584 | + if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) |
---|
| 585 | + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); |
---|
| 586 | + |
---|
| 587 | + atomic64_set(&blkg->delay_nsec, delay); |
---|
| 588 | +} |
---|
| 589 | + |
---|
| 590 | +/** |
---|
| 591 | + * blkcg_clear_delay - Disable allocator delay mechanism |
---|
| 592 | + * @blkg: target blkg |
---|
| 593 | + * |
---|
| 594 | + * Disable use_delay mechanism. See blkcg_set_delay(). |
---|
| 595 | + */ |
---|
875 | 596 | static inline void blkcg_clear_delay(struct blkcg_gq *blkg) |
---|
876 | 597 | { |
---|
877 | 598 | int old = atomic_read(&blkg->use_delay); |
---|
878 | | - if (!old) |
---|
879 | | - return; |
---|
| 599 | + |
---|
880 | 600 | /* We only want 1 person clearing the congestion count for this blkg. */ |
---|
881 | | - while (old) { |
---|
882 | | - int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); |
---|
883 | | - if (cur == old) { |
---|
884 | | - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); |
---|
885 | | - break; |
---|
886 | | - } |
---|
887 | | - old = cur; |
---|
888 | | - } |
---|
| 601 | + if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) |
---|
| 602 | + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); |
---|
889 | 603 | } |
---|
890 | 604 | |
---|
| 605 | +/** |
---|
| 606 | + * blk_cgroup_mergeable - Determine whether to allow or disallow merges |
---|
| 607 | + * @rq: request to merge into |
---|
| 608 | + * @bio: bio to merge |
---|
| 609 | + * |
---|
| 610 | + * @bio and @rq should belong to the same cgroup and their issue_as_root should |
---|
| 611 | + * match. The latter is necessary as we don't want to throttle e.g. a metadata |
---|
| 612 | + * update because it happens to be next to a regular IO. |
---|
| 613 | + */ |
---|
| 614 | +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) |
---|
| 615 | +{ |
---|
| 616 | + return rq->bio->bi_blkg == bio->bi_blkg && |
---|
| 617 | + bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); |
---|
| 618 | +} |
---|
| 619 | + |
---|
| 620 | +void blk_cgroup_bio_start(struct bio *bio); |
---|
891 | 621 | void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); |
---|
892 | 622 | void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); |
---|
893 | 623 | void blkcg_maybe_throttle_current(void); |
---|
.. | .. |
---|
921 | 651 | static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) |
---|
922 | 652 | { return NULL; } |
---|
923 | 653 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } |
---|
924 | | -static inline void blkcg_drain_queue(struct request_queue *q) { } |
---|
925 | 654 | static inline void blkcg_exit_queue(struct request_queue *q) { } |
---|
926 | 655 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } |
---|
927 | 656 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } |
---|
.. | .. |
---|
930 | 659 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
---|
931 | 660 | const struct blkcg_policy *pol) { } |
---|
932 | 661 | |
---|
| 662 | +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } |
---|
933 | 663 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } |
---|
934 | 664 | |
---|
935 | 665 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
---|
.. | .. |
---|
939 | 669 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
---|
940 | 670 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
---|
941 | 671 | |
---|
942 | | -static inline struct request_list *blk_get_rl(struct request_queue *q, |
---|
943 | | - struct bio *bio) { return &q->root_rl; } |
---|
944 | | -static inline void blk_put_rl(struct request_list *rl) { } |
---|
945 | | -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } |
---|
946 | | -static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } |
---|
947 | | - |
---|
948 | | -static inline bool blkcg_bio_issue_check(struct request_queue *q, |
---|
949 | | - struct bio *bio) { return true; } |
---|
| 672 | +static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } |
---|
| 673 | +static inline void blkcg_bio_issue_init(struct bio *bio) { } |
---|
| 674 | +static inline void blk_cgroup_bio_start(struct bio *bio) { } |
---|
| 675 | +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } |
---|
950 | 676 | |
---|
951 | 677 | #define blk_queue_for_each_rl(rl, q) \ |
---|
952 | 678 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) |
---|