.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | 3 | * Common Block IO controller cgroup interface |
---|
3 | 4 | * |
---|
.. | .. |
---|
28 | 29 | #include <linux/ctype.h> |
---|
29 | 30 | #include <linux/blk-cgroup.h> |
---|
30 | 31 | #include <linux/tracehook.h> |
---|
| 32 | +#include <linux/psi.h> |
---|
31 | 33 | #include "blk.h" |
---|
| 34 | +#include "blk-ioprio.h" |
---|
32 | 35 | |
---|
33 | 36 | #define MAX_KEY_LEN 100 |
---|
34 | 37 | |
---|
.. | .. |
---|
46 | 49 | EXPORT_SYMBOL_GPL(blkcg_root); |
---|
47 | 50 | |
---|
48 | 51 | struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; |
---|
| 52 | +EXPORT_SYMBOL_GPL(blkcg_root_css); |
---|
49 | 53 | |
---|
50 | 54 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
---|
51 | 55 | |
---|
52 | 56 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ |
---|
53 | 57 | |
---|
54 | | -static bool blkcg_debug_stats = false; |
---|
| 58 | +bool blkcg_debug_stats = false; |
---|
| 59 | +static struct workqueue_struct *blkcg_punt_bio_wq; |
---|
55 | 60 | |
---|
56 | 61 | static bool blkcg_policy_enabled(struct request_queue *q, |
---|
57 | 62 | const struct blkcg_policy *pol) |
---|
.. | .. |
---|
76 | 81 | if (blkg->pd[i]) |
---|
77 | 82 | blkcg_policy[i]->pd_free_fn(blkg->pd[i]); |
---|
78 | 83 | |
---|
79 | | - if (blkg->blkcg != &blkcg_root) |
---|
80 | | - blk_exit_rl(blkg->q, &blkg->rl); |
---|
81 | | - |
---|
82 | | - blkg_rwstat_exit(&blkg->stat_ios); |
---|
83 | | - blkg_rwstat_exit(&blkg->stat_bytes); |
---|
| 84 | + free_percpu(blkg->iostat_cpu); |
---|
| 85 | + percpu_ref_exit(&blkg->refcnt); |
---|
84 | 86 | kfree(blkg); |
---|
| 87 | +} |
---|
| 88 | + |
---|
| 89 | +static void __blkg_release(struct rcu_head *rcu) |
---|
| 90 | +{ |
---|
| 91 | + struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); |
---|
| 92 | + |
---|
| 93 | + WARN_ON(!bio_list_empty(&blkg->async_bios)); |
---|
| 94 | + |
---|
| 95 | + /* release the blkcg and parent blkg refs this blkg has been holding */ |
---|
| 96 | + css_put(&blkg->blkcg->css); |
---|
| 97 | + if (blkg->parent) |
---|
| 98 | + blkg_put(blkg->parent); |
---|
| 99 | + blkg_free(blkg); |
---|
| 100 | +} |
---|
| 101 | + |
---|
| 102 | +/* |
---|
| 103 | + * A group is RCU protected, but having an rcu lock does not mean that one |
---|
| 104 | + * can access all the fields of blkg and assume these are valid. For |
---|
| 105 | + * example, don't try to follow throtl_data and request queue links. |
---|
| 106 | + * |
---|
| 107 | + * Having a reference to blkg under an rcu allows accesses to only values |
---|
| 108 | + * local to groups like group stats and group rate limits. |
---|
| 109 | + */ |
---|
| 110 | +static void blkg_release(struct percpu_ref *ref) |
---|
| 111 | +{ |
---|
| 112 | + struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); |
---|
| 113 | + |
---|
| 114 | + call_rcu(&blkg->rcu_head, __blkg_release); |
---|
| 115 | +} |
---|
| 116 | + |
---|
| 117 | +static void blkg_async_bio_workfn(struct work_struct *work) |
---|
| 118 | +{ |
---|
| 119 | + struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, |
---|
| 120 | + async_bio_work); |
---|
| 121 | + struct bio_list bios = BIO_EMPTY_LIST; |
---|
| 122 | + struct bio *bio; |
---|
| 123 | + struct blk_plug plug; |
---|
| 124 | + bool need_plug = false; |
---|
| 125 | + |
---|
| 126 | + /* as long as there are pending bios, @blkg can't go away */ |
---|
| 127 | + spin_lock_bh(&blkg->async_bio_lock); |
---|
| 128 | + bio_list_merge(&bios, &blkg->async_bios); |
---|
| 129 | + bio_list_init(&blkg->async_bios); |
---|
| 130 | + spin_unlock_bh(&blkg->async_bio_lock); |
---|
| 131 | + |
---|
| 132 | + /* start plug only when bio_list contains at least 2 bios */ |
---|
| 133 | + if (bios.head && bios.head->bi_next) { |
---|
| 134 | + need_plug = true; |
---|
| 135 | + blk_start_plug(&plug); |
---|
| 136 | + } |
---|
| 137 | + while ((bio = bio_list_pop(&bios))) |
---|
| 138 | + submit_bio(bio); |
---|
| 139 | + if (need_plug) |
---|
| 140 | + blk_finish_plug(&plug); |
---|
85 | 141 | } |
---|
86 | 142 | |
---|
87 | 143 | /** |
---|
.. | .. |
---|
96 | 152 | gfp_t gfp_mask) |
---|
97 | 153 | { |
---|
98 | 154 | struct blkcg_gq *blkg; |
---|
99 | | - int i; |
---|
| 155 | + int i, cpu; |
---|
100 | 156 | |
---|
101 | 157 | /* alloc and init base part */ |
---|
102 | 158 | blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); |
---|
103 | 159 | if (!blkg) |
---|
104 | 160 | return NULL; |
---|
105 | 161 | |
---|
106 | | - if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) || |
---|
107 | | - blkg_rwstat_init(&blkg->stat_ios, gfp_mask)) |
---|
| 162 | + if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask)) |
---|
| 163 | + goto err_free; |
---|
| 164 | + |
---|
| 165 | + blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask); |
---|
| 166 | + if (!blkg->iostat_cpu) |
---|
108 | 167 | goto err_free; |
---|
109 | 168 | |
---|
110 | 169 | blkg->q = q; |
---|
111 | 170 | INIT_LIST_HEAD(&blkg->q_node); |
---|
| 171 | + spin_lock_init(&blkg->async_bio_lock); |
---|
| 172 | + bio_list_init(&blkg->async_bios); |
---|
| 173 | + INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); |
---|
112 | 174 | blkg->blkcg = blkcg; |
---|
113 | | - atomic_set(&blkg->refcnt, 1); |
---|
114 | 175 | |
---|
115 | | - /* root blkg uses @q->root_rl, init rl only for !root blkgs */ |
---|
116 | | - if (blkcg != &blkcg_root) { |
---|
117 | | - if (blk_init_rl(&blkg->rl, q, gfp_mask)) |
---|
118 | | - goto err_free; |
---|
119 | | - blkg->rl.blkg = blkg; |
---|
120 | | - } |
---|
| 176 | + u64_stats_init(&blkg->iostat.sync); |
---|
| 177 | + for_each_possible_cpu(cpu) |
---|
| 178 | + u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync); |
---|
121 | 179 | |
---|
122 | 180 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
---|
123 | 181 | struct blkcg_policy *pol = blkcg_policy[i]; |
---|
.. | .. |
---|
127 | 185 | continue; |
---|
128 | 186 | |
---|
129 | 187 | /* alloc per-policy data and attach it to blkg */ |
---|
130 | | - pd = pol->pd_alloc_fn(gfp_mask, q->node); |
---|
| 188 | + pd = pol->pd_alloc_fn(gfp_mask, q, blkcg); |
---|
131 | 189 | if (!pd) |
---|
132 | 190 | goto err_free; |
---|
133 | 191 | |
---|
.. | .. |
---|
157 | 215 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); |
---|
158 | 216 | if (blkg && blkg->q == q) { |
---|
159 | 217 | if (update_hint) { |
---|
160 | | - lockdep_assert_held(q->queue_lock); |
---|
| 218 | + lockdep_assert_held(&q->queue_lock); |
---|
161 | 219 | rcu_assign_pointer(blkcg->blkg_hint, blkg); |
---|
162 | 220 | } |
---|
163 | 221 | return blkg; |
---|
.. | .. |
---|
176 | 234 | struct blkcg_gq *new_blkg) |
---|
177 | 235 | { |
---|
178 | 236 | struct blkcg_gq *blkg; |
---|
179 | | - struct bdi_writeback_congested *wb_congested; |
---|
180 | 237 | int i, ret; |
---|
181 | 238 | |
---|
182 | 239 | WARN_ON_ONCE(!rcu_read_lock_held()); |
---|
183 | | - lockdep_assert_held(q->queue_lock); |
---|
| 240 | + lockdep_assert_held(&q->queue_lock); |
---|
| 241 | + |
---|
| 242 | + /* request_queue is dying, do not create/recreate a blkg */ |
---|
| 243 | + if (blk_queue_dying(q)) { |
---|
| 244 | + ret = -ENODEV; |
---|
| 245 | + goto err_free_blkg; |
---|
| 246 | + } |
---|
184 | 247 | |
---|
185 | 248 | /* blkg holds a reference to blkcg */ |
---|
186 | 249 | if (!css_tryget_online(&blkcg->css)) { |
---|
.. | .. |
---|
188 | 251 | goto err_free_blkg; |
---|
189 | 252 | } |
---|
190 | 253 | |
---|
191 | | - wb_congested = wb_congested_get_create(q->backing_dev_info, |
---|
192 | | - blkcg->css.id, |
---|
193 | | - GFP_NOWAIT | __GFP_NOWARN); |
---|
194 | | - if (!wb_congested) { |
---|
195 | | - ret = -ENOMEM; |
---|
196 | | - goto err_put_css; |
---|
197 | | - } |
---|
198 | | - |
---|
199 | 254 | /* allocate */ |
---|
200 | 255 | if (!new_blkg) { |
---|
201 | 256 | new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN); |
---|
202 | 257 | if (unlikely(!new_blkg)) { |
---|
203 | 258 | ret = -ENOMEM; |
---|
204 | | - goto err_put_congested; |
---|
| 259 | + goto err_put_css; |
---|
205 | 260 | } |
---|
206 | 261 | } |
---|
207 | 262 | blkg = new_blkg; |
---|
208 | | - blkg->wb_congested = wb_congested; |
---|
209 | 263 | |
---|
210 | 264 | /* link parent */ |
---|
211 | 265 | if (blkcg_parent(blkcg)) { |
---|
212 | 266 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); |
---|
213 | 267 | if (WARN_ON_ONCE(!blkg->parent)) { |
---|
214 | 268 | ret = -ENODEV; |
---|
215 | | - goto err_put_congested; |
---|
| 269 | + goto err_put_css; |
---|
216 | 270 | } |
---|
217 | 271 | blkg_get(blkg->parent); |
---|
218 | 272 | } |
---|
.. | .. |
---|
249 | 303 | blkg_put(blkg); |
---|
250 | 304 | return ERR_PTR(ret); |
---|
251 | 305 | |
---|
252 | | -err_put_congested: |
---|
253 | | - wb_congested_put(wb_congested); |
---|
254 | 306 | err_put_css: |
---|
255 | 307 | css_put(&blkcg->css); |
---|
256 | 308 | err_free_blkg: |
---|
.. | .. |
---|
266 | 318 | * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to |
---|
267 | 319 | * create one. blkg creation is performed recursively from blkcg_root such |
---|
268 | 320 | * that all non-root blkg's have access to the parent blkg. This function |
---|
269 | | - * should be called under RCU read lock and @q->queue_lock. |
---|
| 321 | + * should be called under RCU read lock and takes @q->queue_lock. |
---|
270 | 322 | * |
---|
271 | | - * Returns pointer to the looked up or created blkg on success, ERR_PTR() |
---|
272 | | - * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not |
---|
273 | | - * dead and bypassing, returns ERR_PTR(-EBUSY). |
---|
| 323 | + * Returns the blkg or the closest blkg if blkg_create() fails as it walks |
---|
| 324 | + * down from root. |
---|
274 | 325 | */ |
---|
275 | | -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
---|
276 | | - struct request_queue *q) |
---|
| 326 | +static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
---|
| 327 | + struct request_queue *q) |
---|
277 | 328 | { |
---|
278 | 329 | struct blkcg_gq *blkg; |
---|
| 330 | + unsigned long flags; |
---|
279 | 331 | |
---|
280 | 332 | WARN_ON_ONCE(!rcu_read_lock_held()); |
---|
281 | | - lockdep_assert_held(q->queue_lock); |
---|
282 | 333 | |
---|
283 | | - /* |
---|
284 | | - * This could be the first entry point of blkcg implementation and |
---|
285 | | - * we shouldn't allow anything to go through for a bypassing queue. |
---|
286 | | - */ |
---|
287 | | - if (unlikely(blk_queue_bypass(q))) |
---|
288 | | - return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); |
---|
289 | | - |
---|
290 | | - blkg = __blkg_lookup(blkcg, q, true); |
---|
| 334 | + blkg = blkg_lookup(blkcg, q); |
---|
291 | 335 | if (blkg) |
---|
292 | 336 | return blkg; |
---|
293 | 337 | |
---|
| 338 | + spin_lock_irqsave(&q->queue_lock, flags); |
---|
| 339 | + blkg = __blkg_lookup(blkcg, q, true); |
---|
| 340 | + if (blkg) |
---|
| 341 | + goto found; |
---|
| 342 | + |
---|
294 | 343 | /* |
---|
295 | 344 | * Create blkgs walking down from blkcg_root to @blkcg, so that all |
---|
296 | | - * non-root blkgs have access to their parents. |
---|
| 345 | + * non-root blkgs have access to their parents. Returns the closest |
---|
| 346 | + * blkg to the intended blkg should blkg_create() fail. |
---|
297 | 347 | */ |
---|
298 | 348 | while (true) { |
---|
299 | 349 | struct blkcg *pos = blkcg; |
---|
300 | 350 | struct blkcg *parent = blkcg_parent(blkcg); |
---|
| 351 | + struct blkcg_gq *ret_blkg = q->root_blkg; |
---|
301 | 352 | |
---|
302 | | - while (parent && !__blkg_lookup(parent, q, false)) { |
---|
| 353 | + while (parent) { |
---|
| 354 | + blkg = __blkg_lookup(parent, q, false); |
---|
| 355 | + if (blkg) { |
---|
| 356 | + /* remember closest blkg */ |
---|
| 357 | + ret_blkg = blkg; |
---|
| 358 | + break; |
---|
| 359 | + } |
---|
303 | 360 | pos = parent; |
---|
304 | 361 | parent = blkcg_parent(parent); |
---|
305 | 362 | } |
---|
306 | 363 | |
---|
307 | 364 | blkg = blkg_create(pos, q, NULL); |
---|
308 | | - if (pos == blkcg || IS_ERR(blkg)) |
---|
309 | | - return blkg; |
---|
| 365 | + if (IS_ERR(blkg)) { |
---|
| 366 | + blkg = ret_blkg; |
---|
| 367 | + break; |
---|
| 368 | + } |
---|
| 369 | + if (pos == blkcg) |
---|
| 370 | + break; |
---|
310 | 371 | } |
---|
| 372 | + |
---|
| 373 | +found: |
---|
| 374 | + spin_unlock_irqrestore(&q->queue_lock, flags); |
---|
| 375 | + return blkg; |
---|
311 | 376 | } |
---|
312 | 377 | |
---|
313 | 378 | static void blkg_destroy(struct blkcg_gq *blkg) |
---|
314 | 379 | { |
---|
315 | 380 | struct blkcg *blkcg = blkg->blkcg; |
---|
316 | | - struct blkcg_gq *parent = blkg->parent; |
---|
317 | 381 | int i; |
---|
318 | 382 | |
---|
319 | | - lockdep_assert_held(blkg->q->queue_lock); |
---|
| 383 | + lockdep_assert_held(&blkg->q->queue_lock); |
---|
320 | 384 | lockdep_assert_held(&blkcg->lock); |
---|
321 | 385 | |
---|
322 | 386 | /* Something wrong if we are trying to remove same group twice */ |
---|
.. | .. |
---|
328 | 392 | |
---|
329 | 393 | if (blkg->pd[i] && pol->pd_offline_fn) |
---|
330 | 394 | pol->pd_offline_fn(blkg->pd[i]); |
---|
331 | | - } |
---|
332 | | - |
---|
333 | | - if (parent) { |
---|
334 | | - blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes); |
---|
335 | | - blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios); |
---|
336 | 395 | } |
---|
337 | 396 | |
---|
338 | 397 | blkg->online = false; |
---|
.. | .. |
---|
353 | 412 | * Put the reference taken at the time of creation so that when all |
---|
354 | 413 | * queues are gone, group can be destroyed. |
---|
355 | 414 | */ |
---|
356 | | - blkg_put(blkg); |
---|
| 415 | + percpu_ref_kill(&blkg->refcnt); |
---|
357 | 416 | } |
---|
358 | 417 | |
---|
359 | 418 | /** |
---|
.. | .. |
---|
366 | 425 | { |
---|
367 | 426 | struct blkcg_gq *blkg, *n; |
---|
368 | 427 | |
---|
369 | | - lockdep_assert_held(q->queue_lock); |
---|
370 | | - |
---|
| 428 | + spin_lock_irq(&q->queue_lock); |
---|
371 | 429 | list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { |
---|
372 | 430 | struct blkcg *blkcg = blkg->blkcg; |
---|
373 | 431 | |
---|
.. | .. |
---|
377 | 435 | } |
---|
378 | 436 | |
---|
379 | 437 | q->root_blkg = NULL; |
---|
380 | | - q->root_rl.blkg = NULL; |
---|
381 | | -} |
---|
382 | | - |
---|
383 | | -/* |
---|
384 | | - * A group is RCU protected, but having an rcu lock does not mean that one |
---|
385 | | - * can access all the fields of blkg and assume these are valid. For |
---|
386 | | - * example, don't try to follow throtl_data and request queue links. |
---|
387 | | - * |
---|
388 | | - * Having a reference to blkg under an rcu allows accesses to only values |
---|
389 | | - * local to groups like group stats and group rate limits. |
---|
390 | | - */ |
---|
391 | | -void __blkg_release_rcu(struct rcu_head *rcu_head) |
---|
392 | | -{ |
---|
393 | | - struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); |
---|
394 | | - |
---|
395 | | - /* release the blkcg and parent blkg refs this blkg has been holding */ |
---|
396 | | - css_put(&blkg->blkcg->css); |
---|
397 | | - if (blkg->parent) |
---|
398 | | - blkg_put(blkg->parent); |
---|
399 | | - |
---|
400 | | - wb_congested_put(blkg->wb_congested); |
---|
401 | | - |
---|
402 | | - blkg_free(blkg); |
---|
403 | | -} |
---|
404 | | -EXPORT_SYMBOL_GPL(__blkg_release_rcu); |
---|
405 | | - |
---|
406 | | -/* |
---|
407 | | - * The next function used by blk_queue_for_each_rl(). It's a bit tricky |
---|
408 | | - * because the root blkg uses @q->root_rl instead of its own rl. |
---|
409 | | - */ |
---|
410 | | -struct request_list *__blk_queue_next_rl(struct request_list *rl, |
---|
411 | | - struct request_queue *q) |
---|
412 | | -{ |
---|
413 | | - struct list_head *ent; |
---|
414 | | - struct blkcg_gq *blkg; |
---|
415 | | - |
---|
416 | | - /* |
---|
417 | | - * Determine the current blkg list_head. The first entry is |
---|
418 | | - * root_rl which is off @q->blkg_list and mapped to the head. |
---|
419 | | - */ |
---|
420 | | - if (rl == &q->root_rl) { |
---|
421 | | - ent = &q->blkg_list; |
---|
422 | | - /* There are no more block groups, hence no request lists */ |
---|
423 | | - if (list_empty(ent)) |
---|
424 | | - return NULL; |
---|
425 | | - } else { |
---|
426 | | - blkg = container_of(rl, struct blkcg_gq, rl); |
---|
427 | | - ent = &blkg->q_node; |
---|
428 | | - } |
---|
429 | | - |
---|
430 | | - /* walk to the next list_head, skip root blkcg */ |
---|
431 | | - ent = ent->next; |
---|
432 | | - if (ent == &q->root_blkg->q_node) |
---|
433 | | - ent = ent->next; |
---|
434 | | - if (ent == &q->blkg_list) |
---|
435 | | - return NULL; |
---|
436 | | - |
---|
437 | | - blkg = container_of(ent, struct blkcg_gq, q_node); |
---|
438 | | - return &blkg->rl; |
---|
| 438 | + spin_unlock_irq(&q->queue_lock); |
---|
439 | 439 | } |
---|
440 | 440 | |
---|
441 | 441 | static int blkcg_reset_stats(struct cgroup_subsys_state *css, |
---|
.. | .. |
---|
443 | 443 | { |
---|
444 | 444 | struct blkcg *blkcg = css_to_blkcg(css); |
---|
445 | 445 | struct blkcg_gq *blkg; |
---|
446 | | - int i; |
---|
| 446 | + int i, cpu; |
---|
447 | 447 | |
---|
448 | 448 | mutex_lock(&blkcg_pol_mutex); |
---|
449 | 449 | spin_lock_irq(&blkcg->lock); |
---|
.. | .. |
---|
454 | 454 | * anyway. If you get hit by a race, retry. |
---|
455 | 455 | */ |
---|
456 | 456 | hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { |
---|
457 | | - blkg_rwstat_reset(&blkg->stat_bytes); |
---|
458 | | - blkg_rwstat_reset(&blkg->stat_ios); |
---|
| 457 | + for_each_possible_cpu(cpu) { |
---|
| 458 | + struct blkg_iostat_set *bis = |
---|
| 459 | + per_cpu_ptr(blkg->iostat_cpu, cpu); |
---|
| 460 | + memset(bis, 0, sizeof(*bis)); |
---|
| 461 | + } |
---|
| 462 | + memset(&blkg->iostat, 0, sizeof(blkg->iostat)); |
---|
459 | 463 | |
---|
460 | 464 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
---|
461 | 465 | struct blkcg_policy *pol = blkcg_policy[i]; |
---|
.. | .. |
---|
477 | 481 | return bdi_dev_name(blkg->q->backing_dev_info); |
---|
478 | 482 | return NULL; |
---|
479 | 483 | } |
---|
480 | | -EXPORT_SYMBOL_GPL(blkg_dev_name); |
---|
481 | 484 | |
---|
482 | 485 | /** |
---|
483 | 486 | * blkcg_print_blkgs - helper for printing per-blkg data |
---|
.. | .. |
---|
508 | 511 | |
---|
509 | 512 | rcu_read_lock(); |
---|
510 | 513 | hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { |
---|
511 | | - spin_lock_irq(blkg->q->queue_lock); |
---|
| 514 | + spin_lock_irq(&blkg->q->queue_lock); |
---|
512 | 515 | if (blkcg_policy_enabled(blkg->q, pol)) |
---|
513 | 516 | total += prfill(sf, blkg->pd[pol->plid], data); |
---|
514 | | - spin_unlock_irq(blkg->q->queue_lock); |
---|
| 517 | + spin_unlock_irq(&blkg->q->queue_lock); |
---|
515 | 518 | } |
---|
516 | 519 | rcu_read_unlock(); |
---|
517 | 520 | |
---|
.. | .. |
---|
540 | 543 | } |
---|
541 | 544 | EXPORT_SYMBOL_GPL(__blkg_prfill_u64); |
---|
542 | 545 | |
---|
543 | | -/** |
---|
544 | | - * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat |
---|
545 | | - * @sf: seq_file to print to |
---|
546 | | - * @pd: policy private data of interest |
---|
547 | | - * @rwstat: rwstat to print |
---|
548 | | - * |
---|
549 | | - * Print @rwstat to @sf for the device assocaited with @pd. |
---|
550 | | - */ |
---|
551 | | -u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
---|
552 | | - const struct blkg_rwstat *rwstat) |
---|
553 | | -{ |
---|
554 | | - static const char *rwstr[] = { |
---|
555 | | - [BLKG_RWSTAT_READ] = "Read", |
---|
556 | | - [BLKG_RWSTAT_WRITE] = "Write", |
---|
557 | | - [BLKG_RWSTAT_SYNC] = "Sync", |
---|
558 | | - [BLKG_RWSTAT_ASYNC] = "Async", |
---|
559 | | - [BLKG_RWSTAT_DISCARD] = "Discard", |
---|
560 | | - }; |
---|
561 | | - const char *dname = blkg_dev_name(pd->blkg); |
---|
562 | | - u64 v; |
---|
563 | | - int i; |
---|
564 | | - |
---|
565 | | - if (!dname) |
---|
566 | | - return 0; |
---|
567 | | - |
---|
568 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) |
---|
569 | | - seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], |
---|
570 | | - (unsigned long long)atomic64_read(&rwstat->aux_cnt[i])); |
---|
571 | | - |
---|
572 | | - v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) + |
---|
573 | | - atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]) + |
---|
574 | | - atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_DISCARD]); |
---|
575 | | - seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); |
---|
576 | | - return v; |
---|
577 | | -} |
---|
578 | | -EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); |
---|
579 | | - |
---|
580 | | -/** |
---|
581 | | - * blkg_prfill_stat - prfill callback for blkg_stat |
---|
582 | | - * @sf: seq_file to print to |
---|
583 | | - * @pd: policy private data of interest |
---|
584 | | - * @off: offset to the blkg_stat in @pd |
---|
585 | | - * |
---|
586 | | - * prfill callback for printing a blkg_stat. |
---|
587 | | - */ |
---|
588 | | -u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) |
---|
589 | | -{ |
---|
590 | | - return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off)); |
---|
591 | | -} |
---|
592 | | -EXPORT_SYMBOL_GPL(blkg_prfill_stat); |
---|
593 | | - |
---|
594 | | -/** |
---|
595 | | - * blkg_prfill_rwstat - prfill callback for blkg_rwstat |
---|
596 | | - * @sf: seq_file to print to |
---|
597 | | - * @pd: policy private data of interest |
---|
598 | | - * @off: offset to the blkg_rwstat in @pd |
---|
599 | | - * |
---|
600 | | - * prfill callback for printing a blkg_rwstat. |
---|
601 | | - */ |
---|
602 | | -u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
---|
603 | | - int off) |
---|
604 | | -{ |
---|
605 | | - struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off); |
---|
606 | | - |
---|
607 | | - return __blkg_prfill_rwstat(sf, pd, &rwstat); |
---|
608 | | -} |
---|
609 | | -EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); |
---|
610 | | - |
---|
611 | | -static u64 blkg_prfill_rwstat_field(struct seq_file *sf, |
---|
612 | | - struct blkg_policy_data *pd, int off) |
---|
613 | | -{ |
---|
614 | | - struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->blkg + off); |
---|
615 | | - |
---|
616 | | - return __blkg_prfill_rwstat(sf, pd, &rwstat); |
---|
617 | | -} |
---|
618 | | - |
---|
619 | | -/** |
---|
620 | | - * blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes |
---|
621 | | - * @sf: seq_file to print to |
---|
622 | | - * @v: unused |
---|
623 | | - * |
---|
624 | | - * To be used as cftype->seq_show to print blkg->stat_bytes. |
---|
625 | | - * cftype->private must be set to the blkcg_policy. |
---|
626 | | - */ |
---|
627 | | -int blkg_print_stat_bytes(struct seq_file *sf, void *v) |
---|
628 | | -{ |
---|
629 | | - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
---|
630 | | - blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private, |
---|
631 | | - offsetof(struct blkcg_gq, stat_bytes), true); |
---|
632 | | - return 0; |
---|
633 | | -} |
---|
634 | | -EXPORT_SYMBOL_GPL(blkg_print_stat_bytes); |
---|
635 | | - |
---|
636 | | -/** |
---|
637 | | - * blkg_print_stat_bytes - seq_show callback for blkg->stat_ios |
---|
638 | | - * @sf: seq_file to print to |
---|
639 | | - * @v: unused |
---|
640 | | - * |
---|
641 | | - * To be used as cftype->seq_show to print blkg->stat_ios. cftype->private |
---|
642 | | - * must be set to the blkcg_policy. |
---|
643 | | - */ |
---|
644 | | -int blkg_print_stat_ios(struct seq_file *sf, void *v) |
---|
645 | | -{ |
---|
646 | | - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
---|
647 | | - blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private, |
---|
648 | | - offsetof(struct blkcg_gq, stat_ios), true); |
---|
649 | | - return 0; |
---|
650 | | -} |
---|
651 | | -EXPORT_SYMBOL_GPL(blkg_print_stat_ios); |
---|
652 | | - |
---|
653 | | -static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf, |
---|
654 | | - struct blkg_policy_data *pd, |
---|
655 | | - int off) |
---|
656 | | -{ |
---|
657 | | - struct blkg_rwstat rwstat = blkg_rwstat_recursive_sum(pd->blkg, |
---|
658 | | - NULL, off); |
---|
659 | | - return __blkg_prfill_rwstat(sf, pd, &rwstat); |
---|
660 | | -} |
---|
661 | | - |
---|
662 | | -/** |
---|
663 | | - * blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes |
---|
664 | | - * @sf: seq_file to print to |
---|
665 | | - * @v: unused |
---|
666 | | - */ |
---|
667 | | -int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v) |
---|
668 | | -{ |
---|
669 | | - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
---|
670 | | - blkg_prfill_rwstat_field_recursive, |
---|
671 | | - (void *)seq_cft(sf)->private, |
---|
672 | | - offsetof(struct blkcg_gq, stat_bytes), true); |
---|
673 | | - return 0; |
---|
674 | | -} |
---|
675 | | -EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive); |
---|
676 | | - |
---|
677 | | -/** |
---|
678 | | - * blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios |
---|
679 | | - * @sf: seq_file to print to |
---|
680 | | - * @v: unused |
---|
681 | | - */ |
---|
682 | | -int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v) |
---|
683 | | -{ |
---|
684 | | - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), |
---|
685 | | - blkg_prfill_rwstat_field_recursive, |
---|
686 | | - (void *)seq_cft(sf)->private, |
---|
687 | | - offsetof(struct blkcg_gq, stat_ios), true); |
---|
688 | | - return 0; |
---|
689 | | -} |
---|
690 | | -EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive); |
---|
691 | | - |
---|
692 | | -/** |
---|
693 | | - * blkg_stat_recursive_sum - collect hierarchical blkg_stat |
---|
694 | | - * @blkg: blkg of interest |
---|
695 | | - * @pol: blkcg_policy which contains the blkg_stat |
---|
696 | | - * @off: offset to the blkg_stat in blkg_policy_data or @blkg |
---|
697 | | - * |
---|
698 | | - * Collect the blkg_stat specified by @blkg, @pol and @off and all its |
---|
699 | | - * online descendants and their aux counts. The caller must be holding the |
---|
700 | | - * queue lock for online tests. |
---|
701 | | - * |
---|
702 | | - * If @pol is NULL, blkg_stat is at @off bytes into @blkg; otherwise, it is |
---|
703 | | - * at @off bytes into @blkg's blkg_policy_data of the policy. |
---|
704 | | - */ |
---|
705 | | -u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, |
---|
706 | | - struct blkcg_policy *pol, int off) |
---|
707 | | -{ |
---|
708 | | - struct blkcg_gq *pos_blkg; |
---|
709 | | - struct cgroup_subsys_state *pos_css; |
---|
710 | | - u64 sum = 0; |
---|
711 | | - |
---|
712 | | - lockdep_assert_held(blkg->q->queue_lock); |
---|
713 | | - |
---|
714 | | - rcu_read_lock(); |
---|
715 | | - blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { |
---|
716 | | - struct blkg_stat *stat; |
---|
717 | | - |
---|
718 | | - if (!pos_blkg->online) |
---|
719 | | - continue; |
---|
720 | | - |
---|
721 | | - if (pol) |
---|
722 | | - stat = (void *)blkg_to_pd(pos_blkg, pol) + off; |
---|
723 | | - else |
---|
724 | | - stat = (void *)blkg + off; |
---|
725 | | - |
---|
726 | | - sum += blkg_stat_read(stat) + atomic64_read(&stat->aux_cnt); |
---|
727 | | - } |
---|
728 | | - rcu_read_unlock(); |
---|
729 | | - |
---|
730 | | - return sum; |
---|
731 | | -} |
---|
732 | | -EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); |
---|
733 | | - |
---|
734 | | -/** |
---|
735 | | - * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat |
---|
736 | | - * @blkg: blkg of interest |
---|
737 | | - * @pol: blkcg_policy which contains the blkg_rwstat |
---|
738 | | - * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg |
---|
739 | | - * |
---|
740 | | - * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its |
---|
741 | | - * online descendants and their aux counts. The caller must be holding the |
---|
742 | | - * queue lock for online tests. |
---|
743 | | - * |
---|
744 | | - * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it |
---|
745 | | - * is at @off bytes into @blkg's blkg_policy_data of the policy. |
---|
746 | | - */ |
---|
747 | | -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, |
---|
748 | | - struct blkcg_policy *pol, int off) |
---|
749 | | -{ |
---|
750 | | - struct blkcg_gq *pos_blkg; |
---|
751 | | - struct cgroup_subsys_state *pos_css; |
---|
752 | | - struct blkg_rwstat sum = { }; |
---|
753 | | - int i; |
---|
754 | | - |
---|
755 | | - lockdep_assert_held(blkg->q->queue_lock); |
---|
756 | | - |
---|
757 | | - rcu_read_lock(); |
---|
758 | | - blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { |
---|
759 | | - struct blkg_rwstat *rwstat; |
---|
760 | | - |
---|
761 | | - if (!pos_blkg->online) |
---|
762 | | - continue; |
---|
763 | | - |
---|
764 | | - if (pol) |
---|
765 | | - rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off; |
---|
766 | | - else |
---|
767 | | - rwstat = (void *)pos_blkg + off; |
---|
768 | | - |
---|
769 | | - for (i = 0; i < BLKG_RWSTAT_NR; i++) |
---|
770 | | - atomic64_add(atomic64_read(&rwstat->aux_cnt[i]) + |
---|
771 | | - percpu_counter_sum_positive(&rwstat->cpu_cnt[i]), |
---|
772 | | - &sum.aux_cnt[i]); |
---|
773 | | - } |
---|
774 | | - rcu_read_unlock(); |
---|
775 | | - |
---|
776 | | - return sum; |
---|
777 | | -} |
---|
778 | | -EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); |
---|
779 | | - |
---|
780 | 546 | /* Performs queue bypass and policy enabled checks then looks up blkg. */ |
---|
781 | 547 | static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, |
---|
782 | 548 | const struct blkcg_policy *pol, |
---|
783 | 549 | struct request_queue *q) |
---|
784 | 550 | { |
---|
785 | 551 | WARN_ON_ONCE(!rcu_read_lock_held()); |
---|
786 | | - lockdep_assert_held(q->queue_lock); |
---|
| 552 | + lockdep_assert_held(&q->queue_lock); |
---|
787 | 553 | |
---|
788 | 554 | if (!blkcg_policy_enabled(q, pol)) |
---|
789 | 555 | return ERR_PTR(-EOPNOTSUPP); |
---|
790 | | - |
---|
791 | | - /* |
---|
792 | | - * This could be the first entry point of blkcg implementation and |
---|
793 | | - * we shouldn't allow anything to go through for a bypassing queue. |
---|
794 | | - */ |
---|
795 | | - if (unlikely(blk_queue_bypass(q))) |
---|
796 | | - return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); |
---|
797 | | - |
---|
798 | 556 | return __blkg_lookup(blkcg, q, true /* update_hint */); |
---|
| 557 | +} |
---|
| 558 | + |
---|
| 559 | +/** |
---|
| 560 | + * blkg_conf_prep - parse and prepare for per-blkg config update |
---|
| 561 | + * @inputp: input string pointer |
---|
| 562 | + * |
---|
| 563 | + * Parse the device node prefix part, MAJ:MIN, of per-blkg config update |
---|
| 564 | + * from @input and get and return the matching gendisk. *@inputp is |
---|
| 565 | + * updated to point past the device node prefix. Returns an ERR_PTR() |
---|
| 566 | + * value on error. |
---|
| 567 | + * |
---|
| 568 | + * Use this function iff blkg_conf_prep() can't be used for some reason. |
---|
| 569 | + */ |
---|
| 570 | +struct gendisk *blkcg_conf_get_disk(char **inputp) |
---|
| 571 | +{ |
---|
| 572 | + char *input = *inputp; |
---|
| 573 | + unsigned int major, minor; |
---|
| 574 | + struct gendisk *disk; |
---|
| 575 | + int key_len, part; |
---|
| 576 | + |
---|
| 577 | + if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) |
---|
| 578 | + return ERR_PTR(-EINVAL); |
---|
| 579 | + |
---|
| 580 | + input += key_len; |
---|
| 581 | + if (!isspace(*input)) |
---|
| 582 | + return ERR_PTR(-EINVAL); |
---|
| 583 | + input = skip_spaces(input); |
---|
| 584 | + |
---|
| 585 | + disk = get_gendisk(MKDEV(major, minor), &part); |
---|
| 586 | + if (!disk) |
---|
| 587 | + return ERR_PTR(-ENODEV); |
---|
| 588 | + if (part) { |
---|
| 589 | + put_disk_and_module(disk); |
---|
| 590 | + return ERR_PTR(-ENODEV); |
---|
| 591 | + } |
---|
| 592 | + |
---|
| 593 | + *inputp = input; |
---|
| 594 | + return disk; |
---|
799 | 595 | } |
---|
800 | 596 | |
---|
801 | 597 | /** |
---|
.. | .. |
---|
812 | 608 | */ |
---|
813 | 609 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
---|
814 | 610 | char *input, struct blkg_conf_ctx *ctx) |
---|
815 | | - __acquires(rcu) __acquires(disk->queue->queue_lock) |
---|
| 611 | + __acquires(rcu) __acquires(&disk->queue->queue_lock) |
---|
816 | 612 | { |
---|
817 | 613 | struct gendisk *disk; |
---|
818 | 614 | struct request_queue *q; |
---|
819 | 615 | struct blkcg_gq *blkg; |
---|
820 | | - unsigned int major, minor; |
---|
821 | | - int key_len, part, ret; |
---|
822 | | - char *body; |
---|
| 616 | + int ret; |
---|
823 | 617 | |
---|
824 | | - if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) |
---|
825 | | - return -EINVAL; |
---|
826 | | - |
---|
827 | | - body = input + key_len; |
---|
828 | | - if (!isspace(*body)) |
---|
829 | | - return -EINVAL; |
---|
830 | | - body = skip_spaces(body); |
---|
831 | | - |
---|
832 | | - disk = get_gendisk(MKDEV(major, minor), &part); |
---|
833 | | - if (!disk) |
---|
834 | | - return -ENODEV; |
---|
835 | | - if (part) { |
---|
836 | | - ret = -ENODEV; |
---|
837 | | - goto fail; |
---|
838 | | - } |
---|
| 618 | + disk = blkcg_conf_get_disk(&input); |
---|
| 619 | + if (IS_ERR(disk)) |
---|
| 620 | + return PTR_ERR(disk); |
---|
839 | 621 | |
---|
840 | 622 | q = disk->queue; |
---|
841 | 623 | |
---|
842 | 624 | rcu_read_lock(); |
---|
843 | | - spin_lock_irq(q->queue_lock); |
---|
| 625 | + spin_lock_irq(&q->queue_lock); |
---|
844 | 626 | |
---|
845 | 627 | blkg = blkg_lookup_check(blkcg, pol, q); |
---|
846 | 628 | if (IS_ERR(blkg)) { |
---|
.. | .. |
---|
867 | 649 | } |
---|
868 | 650 | |
---|
869 | 651 | /* Drop locks to do new blkg allocation with GFP_KERNEL. */ |
---|
870 | | - spin_unlock_irq(q->queue_lock); |
---|
| 652 | + spin_unlock_irq(&q->queue_lock); |
---|
871 | 653 | rcu_read_unlock(); |
---|
872 | 654 | |
---|
873 | 655 | new_blkg = blkg_alloc(pos, q, GFP_KERNEL); |
---|
.. | .. |
---|
883 | 665 | } |
---|
884 | 666 | |
---|
885 | 667 | rcu_read_lock(); |
---|
886 | | - spin_lock_irq(q->queue_lock); |
---|
| 668 | + spin_lock_irq(&q->queue_lock); |
---|
887 | 669 | |
---|
888 | 670 | blkg = blkg_lookup_check(pos, pol, q); |
---|
889 | 671 | if (IS_ERR(blkg)) { |
---|
.. | .. |
---|
896 | 678 | blkg_free(new_blkg); |
---|
897 | 679 | } else { |
---|
898 | 680 | blkg = blkg_create(pos, q, new_blkg); |
---|
899 | | - if (unlikely(IS_ERR(blkg))) { |
---|
| 681 | + if (IS_ERR(blkg)) { |
---|
900 | 682 | ret = PTR_ERR(blkg); |
---|
901 | 683 | goto fail_preloaded; |
---|
902 | 684 | } |
---|
.. | .. |
---|
910 | 692 | success: |
---|
911 | 693 | ctx->disk = disk; |
---|
912 | 694 | ctx->blkg = blkg; |
---|
913 | | - ctx->body = body; |
---|
| 695 | + ctx->body = input; |
---|
914 | 696 | return 0; |
---|
915 | 697 | |
---|
916 | 698 | fail_preloaded: |
---|
917 | 699 | radix_tree_preload_end(); |
---|
918 | 700 | fail_unlock: |
---|
919 | | - spin_unlock_irq(q->queue_lock); |
---|
| 701 | + spin_unlock_irq(&q->queue_lock); |
---|
920 | 702 | rcu_read_unlock(); |
---|
921 | 703 | fail: |
---|
922 | 704 | put_disk_and_module(disk); |
---|
.. | .. |
---|
942 | 724 | * with blkg_conf_prep(). |
---|
943 | 725 | */ |
---|
944 | 726 | void blkg_conf_finish(struct blkg_conf_ctx *ctx) |
---|
945 | | - __releases(ctx->disk->queue->queue_lock) __releases(rcu) |
---|
| 727 | + __releases(&ctx->disk->queue->queue_lock) __releases(rcu) |
---|
946 | 728 | { |
---|
947 | | - spin_unlock_irq(ctx->disk->queue->queue_lock); |
---|
| 729 | + spin_unlock_irq(&ctx->disk->queue->queue_lock); |
---|
948 | 730 | rcu_read_unlock(); |
---|
949 | 731 | put_disk_and_module(ctx->disk); |
---|
950 | 732 | } |
---|
951 | 733 | EXPORT_SYMBOL_GPL(blkg_conf_finish); |
---|
| 734 | + |
---|
| 735 | +static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src) |
---|
| 736 | +{ |
---|
| 737 | + int i; |
---|
| 738 | + |
---|
| 739 | + for (i = 0; i < BLKG_IOSTAT_NR; i++) { |
---|
| 740 | + dst->bytes[i] = src->bytes[i]; |
---|
| 741 | + dst->ios[i] = src->ios[i]; |
---|
| 742 | + } |
---|
| 743 | +} |
---|
| 744 | + |
---|
| 745 | +static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src) |
---|
| 746 | +{ |
---|
| 747 | + int i; |
---|
| 748 | + |
---|
| 749 | + for (i = 0; i < BLKG_IOSTAT_NR; i++) { |
---|
| 750 | + dst->bytes[i] += src->bytes[i]; |
---|
| 751 | + dst->ios[i] += src->ios[i]; |
---|
| 752 | + } |
---|
| 753 | +} |
---|
| 754 | + |
---|
| 755 | +static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src) |
---|
| 756 | +{ |
---|
| 757 | + int i; |
---|
| 758 | + |
---|
| 759 | + for (i = 0; i < BLKG_IOSTAT_NR; i++) { |
---|
| 760 | + dst->bytes[i] -= src->bytes[i]; |
---|
| 761 | + dst->ios[i] -= src->ios[i]; |
---|
| 762 | + } |
---|
| 763 | +} |
---|
| 764 | + |
---|
| 765 | +static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) |
---|
| 766 | +{ |
---|
| 767 | + struct blkcg *blkcg = css_to_blkcg(css); |
---|
| 768 | + struct blkcg_gq *blkg; |
---|
| 769 | + |
---|
| 770 | + rcu_read_lock(); |
---|
| 771 | + |
---|
| 772 | + hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { |
---|
| 773 | + struct blkcg_gq *parent = blkg->parent; |
---|
| 774 | + struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); |
---|
| 775 | + struct blkg_iostat cur, delta; |
---|
| 776 | + unsigned int seq; |
---|
| 777 | + |
---|
| 778 | + /* fetch the current per-cpu values */ |
---|
| 779 | + do { |
---|
| 780 | + seq = u64_stats_fetch_begin(&bisc->sync); |
---|
| 781 | + blkg_iostat_set(&cur, &bisc->cur); |
---|
| 782 | + } while (u64_stats_fetch_retry(&bisc->sync, seq)); |
---|
| 783 | + |
---|
| 784 | + /* propagate percpu delta to global */ |
---|
| 785 | + u64_stats_update_begin(&blkg->iostat.sync); |
---|
| 786 | + blkg_iostat_set(&delta, &cur); |
---|
| 787 | + blkg_iostat_sub(&delta, &bisc->last); |
---|
| 788 | + blkg_iostat_add(&blkg->iostat.cur, &delta); |
---|
| 789 | + blkg_iostat_add(&bisc->last, &delta); |
---|
| 790 | + u64_stats_update_end(&blkg->iostat.sync); |
---|
| 791 | + |
---|
| 792 | + /* propagate global delta to parent */ |
---|
| 793 | + if (parent) { |
---|
| 794 | + u64_stats_update_begin(&parent->iostat.sync); |
---|
| 795 | + blkg_iostat_set(&delta, &blkg->iostat.cur); |
---|
| 796 | + blkg_iostat_sub(&delta, &blkg->iostat.last); |
---|
| 797 | + blkg_iostat_add(&parent->iostat.cur, &delta); |
---|
| 798 | + blkg_iostat_add(&blkg->iostat.last, &delta); |
---|
| 799 | + u64_stats_update_end(&parent->iostat.sync); |
---|
| 800 | + } |
---|
| 801 | + } |
---|
| 802 | + |
---|
| 803 | + rcu_read_unlock(); |
---|
| 804 | +} |
---|
| 805 | + |
---|
| 806 | +/* |
---|
| 807 | + * The rstat algorithms intentionally don't handle the root cgroup to avoid |
---|
| 808 | + * incurring overhead when no cgroups are defined. For that reason, |
---|
| 809 | + * cgroup_rstat_flush in blkcg_print_stat does not actually fill out the |
---|
| 810 | + * iostat in the root cgroup's blkcg_gq. |
---|
| 811 | + * |
---|
| 812 | + * However, we would like to re-use the printing code between the root and |
---|
| 813 | + * non-root cgroups to the extent possible. For that reason, we simulate |
---|
| 814 | + * flushing the root cgroup's stats by explicitly filling in the iostat |
---|
| 815 | + * with disk level statistics. |
---|
| 816 | + */ |
---|
| 817 | +static void blkcg_fill_root_iostats(void) |
---|
| 818 | +{ |
---|
| 819 | + struct class_dev_iter iter; |
---|
| 820 | + struct device *dev; |
---|
| 821 | + |
---|
| 822 | + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); |
---|
| 823 | + while ((dev = class_dev_iter_next(&iter))) { |
---|
| 824 | + struct gendisk *disk = dev_to_disk(dev); |
---|
| 825 | + struct hd_struct *part = disk_get_part(disk, 0); |
---|
| 826 | + struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue); |
---|
| 827 | + struct blkg_iostat tmp; |
---|
| 828 | + int cpu; |
---|
| 829 | + |
---|
| 830 | + memset(&tmp, 0, sizeof(tmp)); |
---|
| 831 | + for_each_possible_cpu(cpu) { |
---|
| 832 | + struct disk_stats *cpu_dkstats; |
---|
| 833 | + |
---|
| 834 | + cpu_dkstats = per_cpu_ptr(part->dkstats, cpu); |
---|
| 835 | + tmp.ios[BLKG_IOSTAT_READ] += |
---|
| 836 | + cpu_dkstats->ios[STAT_READ]; |
---|
| 837 | + tmp.ios[BLKG_IOSTAT_WRITE] += |
---|
| 838 | + cpu_dkstats->ios[STAT_WRITE]; |
---|
| 839 | + tmp.ios[BLKG_IOSTAT_DISCARD] += |
---|
| 840 | + cpu_dkstats->ios[STAT_DISCARD]; |
---|
| 841 | + // convert sectors to bytes |
---|
| 842 | + tmp.bytes[BLKG_IOSTAT_READ] += |
---|
| 843 | + cpu_dkstats->sectors[STAT_READ] << 9; |
---|
| 844 | + tmp.bytes[BLKG_IOSTAT_WRITE] += |
---|
| 845 | + cpu_dkstats->sectors[STAT_WRITE] << 9; |
---|
| 846 | + tmp.bytes[BLKG_IOSTAT_DISCARD] += |
---|
| 847 | + cpu_dkstats->sectors[STAT_DISCARD] << 9; |
---|
| 848 | + |
---|
| 849 | + u64_stats_update_begin(&blkg->iostat.sync); |
---|
| 850 | + blkg_iostat_set(&blkg->iostat.cur, &tmp); |
---|
| 851 | + u64_stats_update_end(&blkg->iostat.sync); |
---|
| 852 | + } |
---|
| 853 | + disk_put_part(part); |
---|
| 854 | + } |
---|
| 855 | +} |
---|
952 | 856 | |
---|
953 | 857 | static int blkcg_print_stat(struct seq_file *sf, void *v) |
---|
954 | 858 | { |
---|
955 | 859 | struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); |
---|
956 | 860 | struct blkcg_gq *blkg; |
---|
957 | 861 | |
---|
| 862 | + if (!seq_css(sf)->parent) |
---|
| 863 | + blkcg_fill_root_iostats(); |
---|
| 864 | + else |
---|
| 865 | + cgroup_rstat_flush(blkcg->css.cgroup); |
---|
| 866 | + |
---|
958 | 867 | rcu_read_lock(); |
---|
959 | 868 | |
---|
960 | 869 | hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { |
---|
| 870 | + struct blkg_iostat_set *bis = &blkg->iostat; |
---|
961 | 871 | const char *dname; |
---|
962 | 872 | char *buf; |
---|
963 | | - struct blkg_rwstat rwstat; |
---|
964 | 873 | u64 rbytes, wbytes, rios, wios, dbytes, dios; |
---|
965 | 874 | size_t size = seq_get_buf(sf, &buf), off = 0; |
---|
966 | 875 | int i; |
---|
967 | 876 | bool has_stats = false; |
---|
| 877 | + unsigned seq; |
---|
968 | 878 | |
---|
969 | | - spin_lock_irq(blkg->q->queue_lock); |
---|
| 879 | + spin_lock_irq(&blkg->q->queue_lock); |
---|
970 | 880 | |
---|
971 | 881 | if (!blkg->online) |
---|
972 | 882 | goto skip; |
---|
.. | .. |
---|
983 | 893 | */ |
---|
984 | 894 | off += scnprintf(buf+off, size-off, "%s ", dname); |
---|
985 | 895 | |
---|
986 | | - rwstat = blkg_rwstat_recursive_sum(blkg, NULL, |
---|
987 | | - offsetof(struct blkcg_gq, stat_bytes)); |
---|
988 | | - rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); |
---|
989 | | - wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); |
---|
990 | | - dbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); |
---|
| 896 | + do { |
---|
| 897 | + seq = u64_stats_fetch_begin(&bis->sync); |
---|
991 | 898 | |
---|
992 | | - rwstat = blkg_rwstat_recursive_sum(blkg, NULL, |
---|
993 | | - offsetof(struct blkcg_gq, stat_ios)); |
---|
994 | | - rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); |
---|
995 | | - wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); |
---|
996 | | - dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); |
---|
| 899 | + rbytes = bis->cur.bytes[BLKG_IOSTAT_READ]; |
---|
| 900 | + wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE]; |
---|
| 901 | + dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD]; |
---|
| 902 | + rios = bis->cur.ios[BLKG_IOSTAT_READ]; |
---|
| 903 | + wios = bis->cur.ios[BLKG_IOSTAT_WRITE]; |
---|
| 904 | + dios = bis->cur.ios[BLKG_IOSTAT_DISCARD]; |
---|
| 905 | + } while (u64_stats_fetch_retry(&bis->sync, seq)); |
---|
997 | 906 | |
---|
998 | 907 | if (rbytes || wbytes || rios || wios) { |
---|
999 | 908 | has_stats = true; |
---|
.. | .. |
---|
1003 | 912 | dbytes, dios); |
---|
1004 | 913 | } |
---|
1005 | 914 | |
---|
1006 | | - if (!blkcg_debug_stats) |
---|
1007 | | - goto next; |
---|
1008 | | - |
---|
1009 | | - if (atomic_read(&blkg->use_delay)) { |
---|
| 915 | + if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) { |
---|
1010 | 916 | has_stats = true; |
---|
1011 | 917 | off += scnprintf(buf+off, size-off, |
---|
1012 | 918 | " use_delay=%d delay_nsec=%llu", |
---|
.. | .. |
---|
1026 | 932 | has_stats = true; |
---|
1027 | 933 | off += written; |
---|
1028 | 934 | } |
---|
1029 | | -next: |
---|
| 935 | + |
---|
1030 | 936 | if (has_stats) { |
---|
1031 | 937 | if (off < size - 1) { |
---|
1032 | 938 | off += scnprintf(buf+off, size-off, "\n"); |
---|
.. | .. |
---|
1036 | 942 | } |
---|
1037 | 943 | } |
---|
1038 | 944 | skip: |
---|
1039 | | - spin_unlock_irq(blkg->q->queue_lock); |
---|
| 945 | + spin_unlock_irq(&blkg->q->queue_lock); |
---|
1040 | 946 | } |
---|
1041 | 947 | |
---|
1042 | 948 | rcu_read_unlock(); |
---|
.. | .. |
---|
1046 | 952 | static struct cftype blkcg_files[] = { |
---|
1047 | 953 | { |
---|
1048 | 954 | .name = "stat", |
---|
1049 | | - .flags = CFTYPE_NOT_ON_ROOT, |
---|
1050 | 955 | .seq_show = blkcg_print_stat, |
---|
1051 | 956 | }, |
---|
1052 | 957 | { } /* terminate */ |
---|
.. | .. |
---|
1096 | 1001 | /* this prevents anyone from attaching or migrating to this blkcg */ |
---|
1097 | 1002 | wb_blkcg_offline(blkcg); |
---|
1098 | 1003 | |
---|
1099 | | - /* put the base cgwb reference allowing step 2 to be triggered */ |
---|
1100 | | - blkcg_cgwb_put(blkcg); |
---|
| 1004 | + /* put the base online pin allowing step 2 to be triggered */ |
---|
| 1005 | + blkcg_unpin_online(blkcg); |
---|
1101 | 1006 | } |
---|
1102 | 1007 | |
---|
1103 | 1008 | /** |
---|
.. | .. |
---|
1113 | 1018 | */ |
---|
1114 | 1019 | void blkcg_destroy_blkgs(struct blkcg *blkcg) |
---|
1115 | 1020 | { |
---|
| 1021 | + might_sleep(); |
---|
| 1022 | + |
---|
1116 | 1023 | spin_lock_irq(&blkcg->lock); |
---|
1117 | 1024 | |
---|
1118 | 1025 | while (!hlist_empty(&blkcg->blkg_list)) { |
---|
.. | .. |
---|
1120 | 1027 | struct blkcg_gq, blkcg_node); |
---|
1121 | 1028 | struct request_queue *q = blkg->q; |
---|
1122 | 1029 | |
---|
1123 | | - if (spin_trylock(q->queue_lock)) { |
---|
1124 | | - blkg_destroy(blkg); |
---|
1125 | | - spin_unlock(q->queue_lock); |
---|
1126 | | - } else { |
---|
| 1030 | + if (need_resched() || !spin_trylock(&q->queue_lock)) { |
---|
| 1031 | + /* |
---|
| 1032 | + * Given that the system can accumulate a huge number |
---|
| 1033 | + * of blkgs in pathological cases, check to see if we |
---|
| 1034 | + * need to rescheduling to avoid softlockup. |
---|
| 1035 | + */ |
---|
1127 | 1036 | spin_unlock_irq(&blkcg->lock); |
---|
1128 | | - cpu_relax(); |
---|
| 1037 | + cond_resched(); |
---|
1129 | 1038 | spin_lock_irq(&blkcg->lock); |
---|
| 1039 | + continue; |
---|
1130 | 1040 | } |
---|
| 1041 | + |
---|
| 1042 | + blkg_destroy(blkg); |
---|
| 1043 | + spin_unlock(&q->queue_lock); |
---|
1131 | 1044 | } |
---|
1132 | 1045 | |
---|
1133 | 1046 | spin_unlock_irq(&blkcg->lock); |
---|
.. | .. |
---|
1196 | 1109 | } |
---|
1197 | 1110 | |
---|
1198 | 1111 | spin_lock_init(&blkcg->lock); |
---|
| 1112 | + refcount_set(&blkcg->online_pin, 1); |
---|
1199 | 1113 | INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); |
---|
1200 | 1114 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
---|
1201 | 1115 | #ifdef CONFIG_CGROUP_WRITEBACK |
---|
1202 | 1116 | INIT_LIST_HEAD(&blkcg->cgwb_list); |
---|
1203 | | - refcount_set(&blkcg->cgwb_refcnt, 1); |
---|
1204 | 1117 | #endif |
---|
1205 | 1118 | list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); |
---|
1206 | 1119 | |
---|
.. | .. |
---|
1219 | 1132 | return ret; |
---|
1220 | 1133 | } |
---|
1221 | 1134 | |
---|
| 1135 | +static int blkcg_css_online(struct cgroup_subsys_state *css) |
---|
| 1136 | +{ |
---|
| 1137 | + struct blkcg *blkcg = css_to_blkcg(css); |
---|
| 1138 | + struct blkcg *parent = blkcg_parent(blkcg); |
---|
| 1139 | + |
---|
| 1140 | + /* |
---|
| 1141 | + * blkcg_pin_online() is used to delay blkcg offline so that blkgs |
---|
| 1142 | + * don't go offline while cgwbs are still active on them. Pin the |
---|
| 1143 | + * parent so that offline always happens towards the root. |
---|
| 1144 | + */ |
---|
| 1145 | + if (parent) |
---|
| 1146 | + blkcg_pin_online(parent); |
---|
| 1147 | + return 0; |
---|
| 1148 | +} |
---|
| 1149 | + |
---|
1222 | 1150 | /** |
---|
1223 | 1151 | * blkcg_init_queue - initialize blkcg part of request queue |
---|
1224 | 1152 | * @q: request_queue to initialize |
---|
1225 | 1153 | * |
---|
1226 | | - * Called from blk_alloc_queue_node(). Responsible for initializing blkcg |
---|
| 1154 | + * Called from blk_alloc_queue(). Responsible for initializing blkcg |
---|
1227 | 1155 | * part of new request_queue @q. |
---|
1228 | 1156 | * |
---|
1229 | 1157 | * RETURNS: |
---|
.. | .. |
---|
1243 | 1171 | |
---|
1244 | 1172 | /* Make sure the root blkg exists. */ |
---|
1245 | 1173 | rcu_read_lock(); |
---|
1246 | | - spin_lock_irq(q->queue_lock); |
---|
| 1174 | + spin_lock_irq(&q->queue_lock); |
---|
1247 | 1175 | blkg = blkg_create(&blkcg_root, q, new_blkg); |
---|
1248 | 1176 | if (IS_ERR(blkg)) |
---|
1249 | 1177 | goto err_unlock; |
---|
1250 | 1178 | q->root_blkg = blkg; |
---|
1251 | | - q->root_rl.blkg = blkg; |
---|
1252 | | - spin_unlock_irq(q->queue_lock); |
---|
| 1179 | + spin_unlock_irq(&q->queue_lock); |
---|
1253 | 1180 | rcu_read_unlock(); |
---|
1254 | 1181 | |
---|
1255 | 1182 | if (preloaded) |
---|
1256 | 1183 | radix_tree_preload_end(); |
---|
1257 | 1184 | |
---|
1258 | | - ret = blk_iolatency_init(q); |
---|
1259 | | - if (ret) { |
---|
1260 | | - spin_lock_irq(q->queue_lock); |
---|
1261 | | - blkg_destroy_all(q); |
---|
1262 | | - spin_unlock_irq(q->queue_lock); |
---|
1263 | | - return ret; |
---|
1264 | | - } |
---|
| 1185 | + ret = blk_ioprio_init(q); |
---|
| 1186 | + if (ret) |
---|
| 1187 | + goto err_destroy_all; |
---|
1265 | 1188 | |
---|
1266 | 1189 | ret = blk_throtl_init(q); |
---|
1267 | | - if (ret) { |
---|
1268 | | - spin_lock_irq(q->queue_lock); |
---|
1269 | | - blkg_destroy_all(q); |
---|
1270 | | - spin_unlock_irq(q->queue_lock); |
---|
1271 | | - } |
---|
1272 | | - return ret; |
---|
| 1190 | + if (ret) |
---|
| 1191 | + goto err_destroy_all; |
---|
1273 | 1192 | |
---|
| 1193 | + ret = blk_iolatency_init(q); |
---|
| 1194 | + if (ret) { |
---|
| 1195 | + blk_throtl_exit(q); |
---|
| 1196 | + goto err_destroy_all; |
---|
| 1197 | + } |
---|
| 1198 | + |
---|
| 1199 | + return 0; |
---|
| 1200 | + |
---|
| 1201 | +err_destroy_all: |
---|
| 1202 | + blkg_destroy_all(q); |
---|
| 1203 | + return ret; |
---|
1274 | 1204 | err_unlock: |
---|
1275 | | - spin_unlock_irq(q->queue_lock); |
---|
| 1205 | + spin_unlock_irq(&q->queue_lock); |
---|
1276 | 1206 | rcu_read_unlock(); |
---|
1277 | 1207 | if (preloaded) |
---|
1278 | 1208 | radix_tree_preload_end(); |
---|
.. | .. |
---|
1280 | 1210 | } |
---|
1281 | 1211 | |
---|
1282 | 1212 | /** |
---|
1283 | | - * blkcg_drain_queue - drain blkcg part of request_queue |
---|
1284 | | - * @q: request_queue to drain |
---|
1285 | | - * |
---|
1286 | | - * Called from blk_drain_queue(). Responsible for draining blkcg part. |
---|
1287 | | - */ |
---|
1288 | | -void blkcg_drain_queue(struct request_queue *q) |
---|
1289 | | -{ |
---|
1290 | | - lockdep_assert_held(q->queue_lock); |
---|
1291 | | - |
---|
1292 | | - /* |
---|
1293 | | - * @q could be exiting and already have destroyed all blkgs as |
---|
1294 | | - * indicated by NULL root_blkg. If so, don't confuse policies. |
---|
1295 | | - */ |
---|
1296 | | - if (!q->root_blkg) |
---|
1297 | | - return; |
---|
1298 | | - |
---|
1299 | | - blk_throtl_drain(q); |
---|
1300 | | -} |
---|
1301 | | - |
---|
1302 | | -/** |
---|
1303 | 1213 | * blkcg_exit_queue - exit and release blkcg part of request_queue |
---|
1304 | 1214 | * @q: request_queue being released |
---|
1305 | 1215 | * |
---|
1306 | | - * Called from blk_release_queue(). Responsible for exiting blkcg part. |
---|
| 1216 | + * Called from blk_exit_queue(). Responsible for exiting blkcg part. |
---|
1307 | 1217 | */ |
---|
1308 | 1218 | void blkcg_exit_queue(struct request_queue *q) |
---|
1309 | 1219 | { |
---|
1310 | | - spin_lock_irq(q->queue_lock); |
---|
1311 | 1220 | blkg_destroy_all(q); |
---|
1312 | | - spin_unlock_irq(q->queue_lock); |
---|
1313 | | - |
---|
1314 | 1221 | blk_throtl_exit(q); |
---|
1315 | 1222 | } |
---|
1316 | 1223 | |
---|
.. | .. |
---|
1369 | 1276 | |
---|
1370 | 1277 | struct cgroup_subsys io_cgrp_subsys = { |
---|
1371 | 1278 | .css_alloc = blkcg_css_alloc, |
---|
| 1279 | + .css_online = blkcg_css_online, |
---|
1372 | 1280 | .css_offline = blkcg_css_offline, |
---|
1373 | 1281 | .css_free = blkcg_css_free, |
---|
1374 | 1282 | .can_attach = blkcg_can_attach, |
---|
| 1283 | + .css_rstat_flush = blkcg_rstat_flush, |
---|
1375 | 1284 | .bind = blkcg_bind, |
---|
1376 | 1285 | .dfl_cftypes = blkcg_files, |
---|
1377 | 1286 | .legacy_cftypes = blkcg_legacy_files, |
---|
.. | .. |
---|
1408 | 1317 | const struct blkcg_policy *pol) |
---|
1409 | 1318 | { |
---|
1410 | 1319 | struct blkg_policy_data *pd_prealloc = NULL; |
---|
1411 | | - struct blkcg_gq *blkg; |
---|
| 1320 | + struct blkcg_gq *blkg, *pinned_blkg = NULL; |
---|
1412 | 1321 | int ret; |
---|
1413 | 1322 | |
---|
1414 | 1323 | if (blkcg_policy_enabled(q, pol)) |
---|
1415 | 1324 | return 0; |
---|
1416 | 1325 | |
---|
1417 | | - if (q->mq_ops) |
---|
| 1326 | + if (queue_is_mq(q)) |
---|
1418 | 1327 | blk_mq_freeze_queue(q); |
---|
1419 | | - else |
---|
1420 | | - blk_queue_bypass_start(q); |
---|
1421 | | -pd_prealloc: |
---|
1422 | | - if (!pd_prealloc) { |
---|
1423 | | - pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); |
---|
1424 | | - if (!pd_prealloc) { |
---|
1425 | | - ret = -ENOMEM; |
---|
1426 | | - goto out_bypass_end; |
---|
1427 | | - } |
---|
1428 | | - } |
---|
| 1328 | +retry: |
---|
| 1329 | + spin_lock_irq(&q->queue_lock); |
---|
1429 | 1330 | |
---|
1430 | | - spin_lock_irq(q->queue_lock); |
---|
1431 | | - |
---|
1432 | | - list_for_each_entry(blkg, &q->blkg_list, q_node) { |
---|
| 1331 | + /* blkg_list is pushed at the head, reverse walk to allocate parents first */ |
---|
| 1332 | + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { |
---|
1433 | 1333 | struct blkg_policy_data *pd; |
---|
1434 | 1334 | |
---|
1435 | 1335 | if (blkg->pd[pol->plid]) |
---|
1436 | 1336 | continue; |
---|
1437 | 1337 | |
---|
1438 | | - pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q->node); |
---|
1439 | | - if (!pd) |
---|
1440 | | - swap(pd, pd_prealloc); |
---|
| 1338 | + /* If prealloc matches, use it; otherwise try GFP_NOWAIT */ |
---|
| 1339 | + if (blkg == pinned_blkg) { |
---|
| 1340 | + pd = pd_prealloc; |
---|
| 1341 | + pd_prealloc = NULL; |
---|
| 1342 | + } else { |
---|
| 1343 | + pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, |
---|
| 1344 | + blkg->blkcg); |
---|
| 1345 | + } |
---|
| 1346 | + |
---|
1441 | 1347 | if (!pd) { |
---|
1442 | | - spin_unlock_irq(q->queue_lock); |
---|
1443 | | - goto pd_prealloc; |
---|
| 1348 | + /* |
---|
| 1349 | + * GFP_NOWAIT failed. Free the existing one and |
---|
| 1350 | + * prealloc for @blkg w/ GFP_KERNEL. |
---|
| 1351 | + */ |
---|
| 1352 | + if (pinned_blkg) |
---|
| 1353 | + blkg_put(pinned_blkg); |
---|
| 1354 | + blkg_get(blkg); |
---|
| 1355 | + pinned_blkg = blkg; |
---|
| 1356 | + |
---|
| 1357 | + spin_unlock_irq(&q->queue_lock); |
---|
| 1358 | + |
---|
| 1359 | + if (pd_prealloc) |
---|
| 1360 | + pol->pd_free_fn(pd_prealloc); |
---|
| 1361 | + pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, |
---|
| 1362 | + blkg->blkcg); |
---|
| 1363 | + if (pd_prealloc) |
---|
| 1364 | + goto retry; |
---|
| 1365 | + else |
---|
| 1366 | + goto enomem; |
---|
1444 | 1367 | } |
---|
1445 | 1368 | |
---|
1446 | 1369 | blkg->pd[pol->plid] = pd; |
---|
1447 | 1370 | pd->blkg = blkg; |
---|
1448 | 1371 | pd->plid = pol->plid; |
---|
1449 | | - if (pol->pd_init_fn) |
---|
1450 | | - pol->pd_init_fn(pd); |
---|
1451 | 1372 | } |
---|
| 1373 | + |
---|
| 1374 | + /* all allocated, init in the same order */ |
---|
| 1375 | + if (pol->pd_init_fn) |
---|
| 1376 | + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) |
---|
| 1377 | + pol->pd_init_fn(blkg->pd[pol->plid]); |
---|
| 1378 | + |
---|
| 1379 | + if (pol->pd_online_fn) |
---|
| 1380 | + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) |
---|
| 1381 | + pol->pd_online_fn(blkg->pd[pol->plid]); |
---|
1452 | 1382 | |
---|
1453 | 1383 | __set_bit(pol->plid, q->blkcg_pols); |
---|
1454 | 1384 | ret = 0; |
---|
1455 | 1385 | |
---|
1456 | | - spin_unlock_irq(q->queue_lock); |
---|
1457 | | -out_bypass_end: |
---|
1458 | | - if (q->mq_ops) |
---|
| 1386 | + spin_unlock_irq(&q->queue_lock); |
---|
| 1387 | +out: |
---|
| 1388 | + if (queue_is_mq(q)) |
---|
1459 | 1389 | blk_mq_unfreeze_queue(q); |
---|
1460 | | - else |
---|
1461 | | - blk_queue_bypass_end(q); |
---|
| 1390 | + if (pinned_blkg) |
---|
| 1391 | + blkg_put(pinned_blkg); |
---|
1462 | 1392 | if (pd_prealloc) |
---|
1463 | 1393 | pol->pd_free_fn(pd_prealloc); |
---|
1464 | 1394 | return ret; |
---|
| 1395 | + |
---|
| 1396 | +enomem: |
---|
| 1397 | + /* alloc failed, nothing's initialized yet, free everything */ |
---|
| 1398 | + spin_lock_irq(&q->queue_lock); |
---|
| 1399 | + list_for_each_entry(blkg, &q->blkg_list, q_node) { |
---|
| 1400 | + struct blkcg *blkcg = blkg->blkcg; |
---|
| 1401 | + |
---|
| 1402 | + spin_lock(&blkcg->lock); |
---|
| 1403 | + if (blkg->pd[pol->plid]) { |
---|
| 1404 | + pol->pd_free_fn(blkg->pd[pol->plid]); |
---|
| 1405 | + blkg->pd[pol->plid] = NULL; |
---|
| 1406 | + } |
---|
| 1407 | + spin_unlock(&blkcg->lock); |
---|
| 1408 | + } |
---|
| 1409 | + spin_unlock_irq(&q->queue_lock); |
---|
| 1410 | + ret = -ENOMEM; |
---|
| 1411 | + goto out; |
---|
1465 | 1412 | } |
---|
1466 | 1413 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); |
---|
1467 | 1414 | |
---|
.. | .. |
---|
1481 | 1428 | if (!blkcg_policy_enabled(q, pol)) |
---|
1482 | 1429 | return; |
---|
1483 | 1430 | |
---|
1484 | | - if (q->mq_ops) |
---|
| 1431 | + if (queue_is_mq(q)) |
---|
1485 | 1432 | blk_mq_freeze_queue(q); |
---|
1486 | | - else |
---|
1487 | | - blk_queue_bypass_start(q); |
---|
1488 | 1433 | |
---|
1489 | | - spin_lock_irq(q->queue_lock); |
---|
| 1434 | + spin_lock_irq(&q->queue_lock); |
---|
1490 | 1435 | |
---|
1491 | 1436 | __clear_bit(pol->plid, q->blkcg_pols); |
---|
1492 | 1437 | |
---|
1493 | 1438 | list_for_each_entry(blkg, &q->blkg_list, q_node) { |
---|
| 1439 | + struct blkcg *blkcg = blkg->blkcg; |
---|
| 1440 | + |
---|
| 1441 | + spin_lock(&blkcg->lock); |
---|
1494 | 1442 | if (blkg->pd[pol->plid]) { |
---|
1495 | 1443 | if (pol->pd_offline_fn) |
---|
1496 | 1444 | pol->pd_offline_fn(blkg->pd[pol->plid]); |
---|
1497 | 1445 | pol->pd_free_fn(blkg->pd[pol->plid]); |
---|
1498 | 1446 | blkg->pd[pol->plid] = NULL; |
---|
1499 | 1447 | } |
---|
| 1448 | + spin_unlock(&blkcg->lock); |
---|
1500 | 1449 | } |
---|
1501 | 1450 | |
---|
1502 | | - spin_unlock_irq(q->queue_lock); |
---|
| 1451 | + spin_unlock_irq(&q->queue_lock); |
---|
1503 | 1452 | |
---|
1504 | | - if (q->mq_ops) |
---|
| 1453 | + if (queue_is_mq(q)) |
---|
1505 | 1454 | blk_mq_unfreeze_queue(q); |
---|
1506 | | - else |
---|
1507 | | - blk_queue_bypass_end(q); |
---|
1508 | 1455 | } |
---|
1509 | 1456 | EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); |
---|
1510 | 1457 | |
---|
.. | .. |
---|
1554 | 1501 | blkcg->cpd[pol->plid] = cpd; |
---|
1555 | 1502 | cpd->blkcg = blkcg; |
---|
1556 | 1503 | cpd->plid = pol->plid; |
---|
1557 | | - pol->cpd_init_fn(cpd); |
---|
| 1504 | + if (pol->cpd_init_fn) |
---|
| 1505 | + pol->cpd_init_fn(cpd); |
---|
1558 | 1506 | } |
---|
1559 | 1507 | } |
---|
1560 | 1508 | |
---|
.. | .. |
---|
1627 | 1575 | } |
---|
1628 | 1576 | EXPORT_SYMBOL_GPL(blkcg_policy_unregister); |
---|
1629 | 1577 | |
---|
| 1578 | +bool __blkcg_punt_bio_submit(struct bio *bio) |
---|
| 1579 | +{ |
---|
| 1580 | + struct blkcg_gq *blkg = bio->bi_blkg; |
---|
| 1581 | + |
---|
| 1582 | + /* consume the flag first */ |
---|
| 1583 | + bio->bi_opf &= ~REQ_CGROUP_PUNT; |
---|
| 1584 | + |
---|
| 1585 | + /* never bounce for the root cgroup */ |
---|
| 1586 | + if (!blkg->parent) |
---|
| 1587 | + return false; |
---|
| 1588 | + |
---|
| 1589 | + spin_lock_bh(&blkg->async_bio_lock); |
---|
| 1590 | + bio_list_add(&blkg->async_bios, bio); |
---|
| 1591 | + spin_unlock_bh(&blkg->async_bio_lock); |
---|
| 1592 | + |
---|
| 1593 | + queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); |
---|
| 1594 | + return true; |
---|
| 1595 | +} |
---|
| 1596 | + |
---|
1630 | 1597 | /* |
---|
1631 | 1598 | * Scale the accumulated delay based on how long it has been since we updated |
---|
1632 | 1599 | * the delay. We only call this when we are adding delay, in case it's been a |
---|
.. | .. |
---|
1636 | 1603 | static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) |
---|
1637 | 1604 | { |
---|
1638 | 1605 | u64 old = atomic64_read(&blkg->delay_start); |
---|
| 1606 | + |
---|
| 1607 | + /* negative use_delay means no scaling, see blkcg_set_delay() */ |
---|
| 1608 | + if (atomic_read(&blkg->use_delay) < 0) |
---|
| 1609 | + return; |
---|
1639 | 1610 | |
---|
1640 | 1611 | /* |
---|
1641 | 1612 | * We only want to scale down every second. The idea here is that we |
---|
.. | .. |
---|
1688 | 1659 | */ |
---|
1689 | 1660 | static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) |
---|
1690 | 1661 | { |
---|
| 1662 | + unsigned long pflags; |
---|
| 1663 | + bool clamp; |
---|
1691 | 1664 | u64 now = ktime_to_ns(ktime_get()); |
---|
1692 | 1665 | u64 exp; |
---|
1693 | 1666 | u64 delay_nsec = 0; |
---|
1694 | 1667 | int tok; |
---|
1695 | 1668 | |
---|
1696 | 1669 | while (blkg->parent) { |
---|
1697 | | - if (atomic_read(&blkg->use_delay)) { |
---|
| 1670 | + int use_delay = atomic_read(&blkg->use_delay); |
---|
| 1671 | + |
---|
| 1672 | + if (use_delay) { |
---|
| 1673 | + u64 this_delay; |
---|
| 1674 | + |
---|
1698 | 1675 | blkcg_scale_delay(blkg, now); |
---|
1699 | | - delay_nsec = max_t(u64, delay_nsec, |
---|
1700 | | - atomic64_read(&blkg->delay_nsec)); |
---|
| 1676 | + this_delay = atomic64_read(&blkg->delay_nsec); |
---|
| 1677 | + if (this_delay > delay_nsec) { |
---|
| 1678 | + delay_nsec = this_delay; |
---|
| 1679 | + clamp = use_delay > 0; |
---|
| 1680 | + } |
---|
1701 | 1681 | } |
---|
1702 | 1682 | blkg = blkg->parent; |
---|
1703 | 1683 | } |
---|
.. | .. |
---|
1709 | 1689 | * Let's not sleep for all eternity if we've amassed a huge delay. |
---|
1710 | 1690 | * Swapping or metadata IO can accumulate 10's of seconds worth of |
---|
1711 | 1691 | * delay, and we want userspace to be able to do _something_ so cap the |
---|
1712 | | - * delays at 1 second. If there's 10's of seconds worth of delay then |
---|
1713 | | - * the tasks will be delayed for 1 second for every syscall. |
---|
| 1692 | + * delays at 0.25s. If there's 10's of seconds worth of delay then the |
---|
| 1693 | + * tasks will be delayed for 0.25 second for every syscall. If |
---|
| 1694 | + * blkcg_set_delay() was used as indicated by negative use_delay, the |
---|
| 1695 | + * caller is responsible for regulating the range. |
---|
1714 | 1696 | */ |
---|
1715 | | - delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); |
---|
| 1697 | + if (clamp) |
---|
| 1698 | + delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); |
---|
1716 | 1699 | |
---|
1717 | | - /* |
---|
1718 | | - * TODO: the use_memdelay flag is going to be for the upcoming psi stuff |
---|
1719 | | - * that hasn't landed upstream yet. Once that stuff is in place we need |
---|
1720 | | - * to do a psi_memstall_enter/leave if memdelay is set. |
---|
1721 | | - */ |
---|
| 1700 | + if (use_memdelay) |
---|
| 1701 | + psi_memstall_enter(&pflags); |
---|
1722 | 1702 | |
---|
1723 | 1703 | exp = ktime_add_ns(now, delay_nsec); |
---|
1724 | 1704 | tok = io_schedule_prepare(); |
---|
.. | .. |
---|
1728 | 1708 | break; |
---|
1729 | 1709 | } while (!fatal_signal_pending(current)); |
---|
1730 | 1710 | io_schedule_finish(tok); |
---|
| 1711 | + |
---|
| 1712 | + if (use_memdelay) |
---|
| 1713 | + psi_memstall_leave(&pflags); |
---|
1731 | 1714 | } |
---|
1732 | 1715 | |
---|
1733 | 1716 | /** |
---|
.. | .. |
---|
1766 | 1749 | blkg = blkg_lookup(blkcg, q); |
---|
1767 | 1750 | if (!blkg) |
---|
1768 | 1751 | goto out; |
---|
1769 | | - blkg = blkg_try_get(blkg); |
---|
1770 | | - if (!blkg) |
---|
| 1752 | + if (!blkg_tryget(blkg)) |
---|
1771 | 1753 | goto out; |
---|
1772 | 1754 | rcu_read_unlock(); |
---|
1773 | 1755 | |
---|
.. | .. |
---|
1779 | 1761 | rcu_read_unlock(); |
---|
1780 | 1762 | blk_put_queue(q); |
---|
1781 | 1763 | } |
---|
1782 | | -EXPORT_SYMBOL_GPL(blkcg_maybe_throttle_current); |
---|
1783 | 1764 | |
---|
1784 | 1765 | /** |
---|
1785 | 1766 | * blkcg_schedule_throttle - this task needs to check for throttling |
---|
1786 | | - * @q - the request queue IO was submitted on |
---|
1787 | | - * @use_memdelay - do we charge this to memory delay for PSI |
---|
| 1767 | + * @q: the request queue IO was submitted on |
---|
| 1768 | + * @use_memdelay: do we charge this to memory delay for PSI |
---|
1788 | 1769 | * |
---|
1789 | 1770 | * This is called by the IO controller when we know there's delay accumulated |
---|
1790 | 1771 | * for the blkg for this task. We do not pass the blkg because there are places |
---|
.. | .. |
---|
1817 | 1798 | |
---|
1818 | 1799 | /** |
---|
1819 | 1800 | * blkcg_add_delay - add delay to this blkg |
---|
1820 | | - * @now - the current time in nanoseconds |
---|
1821 | | - * @delta - how many nanoseconds of delay to add |
---|
| 1801 | + * @blkg: blkg of interest |
---|
| 1802 | + * @now: the current time in nanoseconds |
---|
| 1803 | + * @delta: how many nanoseconds of delay to add |
---|
1822 | 1804 | * |
---|
1823 | 1805 | * Charge @delta to the blkg's current delay accumulation. This is used to |
---|
1824 | 1806 | * throttle tasks if an IO controller thinks we need more throttling. |
---|
1825 | 1807 | */ |
---|
1826 | 1808 | void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta) |
---|
1827 | 1809 | { |
---|
| 1810 | + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) |
---|
| 1811 | + return; |
---|
1828 | 1812 | blkcg_scale_delay(blkg, now); |
---|
1829 | 1813 | atomic64_add(delta, &blkg->delay_nsec); |
---|
1830 | 1814 | } |
---|
1831 | | -EXPORT_SYMBOL_GPL(blkcg_add_delay); |
---|
| 1815 | + |
---|
| 1816 | +/** |
---|
| 1817 | + * blkg_tryget_closest - try and get a blkg ref on the closet blkg |
---|
| 1818 | + * @bio: target bio |
---|
| 1819 | + * @css: target css |
---|
| 1820 | + * |
---|
| 1821 | + * As the failure mode here is to walk up the blkg tree, this ensure that the |
---|
| 1822 | + * blkg->parent pointers are always valid. This returns the blkg that it ended |
---|
| 1823 | + * up taking a reference on or %NULL if no reference was taken. |
---|
| 1824 | + */ |
---|
| 1825 | +static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio, |
---|
| 1826 | + struct cgroup_subsys_state *css) |
---|
| 1827 | +{ |
---|
| 1828 | + struct blkcg_gq *blkg, *ret_blkg = NULL; |
---|
| 1829 | + |
---|
| 1830 | + rcu_read_lock(); |
---|
| 1831 | + blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue); |
---|
| 1832 | + while (blkg) { |
---|
| 1833 | + if (blkg_tryget(blkg)) { |
---|
| 1834 | + ret_blkg = blkg; |
---|
| 1835 | + break; |
---|
| 1836 | + } |
---|
| 1837 | + blkg = blkg->parent; |
---|
| 1838 | + } |
---|
| 1839 | + rcu_read_unlock(); |
---|
| 1840 | + |
---|
| 1841 | + return ret_blkg; |
---|
| 1842 | +} |
---|
| 1843 | + |
---|
| 1844 | +/** |
---|
| 1845 | + * bio_associate_blkg_from_css - associate a bio with a specified css |
---|
| 1846 | + * @bio: target bio |
---|
| 1847 | + * @css: target css |
---|
| 1848 | + * |
---|
| 1849 | + * Associate @bio with the blkg found by combining the css's blkg and the |
---|
| 1850 | + * request_queue of the @bio. An association failure is handled by walking up |
---|
| 1851 | + * the blkg tree. Therefore, the blkg associated can be anything between @blkg |
---|
| 1852 | + * and q->root_blkg. This situation only happens when a cgroup is dying and |
---|
| 1853 | + * then the remaining bios will spill to the closest alive blkg. |
---|
| 1854 | + * |
---|
| 1855 | + * A reference will be taken on the blkg and will be released when @bio is |
---|
| 1856 | + * freed. |
---|
| 1857 | + */ |
---|
| 1858 | +void bio_associate_blkg_from_css(struct bio *bio, |
---|
| 1859 | + struct cgroup_subsys_state *css) |
---|
| 1860 | +{ |
---|
| 1861 | + if (bio->bi_blkg) |
---|
| 1862 | + blkg_put(bio->bi_blkg); |
---|
| 1863 | + |
---|
| 1864 | + if (css && css->parent) { |
---|
| 1865 | + bio->bi_blkg = blkg_tryget_closest(bio, css); |
---|
| 1866 | + } else { |
---|
| 1867 | + blkg_get(bio->bi_disk->queue->root_blkg); |
---|
| 1868 | + bio->bi_blkg = bio->bi_disk->queue->root_blkg; |
---|
| 1869 | + } |
---|
| 1870 | +} |
---|
| 1871 | +EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); |
---|
| 1872 | + |
---|
| 1873 | +/** |
---|
| 1874 | + * bio_associate_blkg - associate a bio with a blkg |
---|
| 1875 | + * @bio: target bio |
---|
| 1876 | + * |
---|
| 1877 | + * Associate @bio with the blkg found from the bio's css and request_queue. |
---|
| 1878 | + * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is |
---|
| 1879 | + * already associated, the css is reused and association redone as the |
---|
| 1880 | + * request_queue may have changed. |
---|
| 1881 | + */ |
---|
| 1882 | +void bio_associate_blkg(struct bio *bio) |
---|
| 1883 | +{ |
---|
| 1884 | + struct cgroup_subsys_state *css; |
---|
| 1885 | + |
---|
| 1886 | + rcu_read_lock(); |
---|
| 1887 | + |
---|
| 1888 | + if (bio->bi_blkg) |
---|
| 1889 | + css = &bio_blkcg(bio)->css; |
---|
| 1890 | + else |
---|
| 1891 | + css = blkcg_css(); |
---|
| 1892 | + |
---|
| 1893 | + bio_associate_blkg_from_css(bio, css); |
---|
| 1894 | + |
---|
| 1895 | + rcu_read_unlock(); |
---|
| 1896 | +} |
---|
| 1897 | +EXPORT_SYMBOL_GPL(bio_associate_blkg); |
---|
| 1898 | + |
---|
| 1899 | +/** |
---|
| 1900 | + * bio_clone_blkg_association - clone blkg association from src to dst bio |
---|
| 1901 | + * @dst: destination bio |
---|
| 1902 | + * @src: source bio |
---|
| 1903 | + */ |
---|
| 1904 | +void bio_clone_blkg_association(struct bio *dst, struct bio *src) |
---|
| 1905 | +{ |
---|
| 1906 | + if (src->bi_blkg) |
---|
| 1907 | + bio_associate_blkg_from_css(dst, &bio_blkcg(src)->css); |
---|
| 1908 | +} |
---|
| 1909 | +EXPORT_SYMBOL_GPL(bio_clone_blkg_association); |
---|
| 1910 | + |
---|
| 1911 | +static int blk_cgroup_io_type(struct bio *bio) |
---|
| 1912 | +{ |
---|
| 1913 | + if (op_is_discard(bio->bi_opf)) |
---|
| 1914 | + return BLKG_IOSTAT_DISCARD; |
---|
| 1915 | + if (op_is_write(bio->bi_opf)) |
---|
| 1916 | + return BLKG_IOSTAT_WRITE; |
---|
| 1917 | + return BLKG_IOSTAT_READ; |
---|
| 1918 | +} |
---|
| 1919 | + |
---|
| 1920 | +void blk_cgroup_bio_start(struct bio *bio) |
---|
| 1921 | +{ |
---|
| 1922 | + int rwd = blk_cgroup_io_type(bio), cpu; |
---|
| 1923 | + struct blkg_iostat_set *bis; |
---|
| 1924 | + |
---|
| 1925 | + cpu = get_cpu(); |
---|
| 1926 | + bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); |
---|
| 1927 | + u64_stats_update_begin(&bis->sync); |
---|
| 1928 | + |
---|
| 1929 | + /* |
---|
| 1930 | + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split |
---|
| 1931 | + * bio and we would have already accounted for the size of the bio. |
---|
| 1932 | + */ |
---|
| 1933 | + if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { |
---|
| 1934 | + bio_set_flag(bio, BIO_CGROUP_ACCT); |
---|
| 1935 | + bis->cur.bytes[rwd] += bio->bi_iter.bi_size; |
---|
| 1936 | + } |
---|
| 1937 | + bis->cur.ios[rwd]++; |
---|
| 1938 | + |
---|
| 1939 | + u64_stats_update_end(&bis->sync); |
---|
| 1940 | + if (cgroup_subsys_on_dfl(io_cgrp_subsys)) |
---|
| 1941 | + cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu); |
---|
| 1942 | + put_cpu(); |
---|
| 1943 | +} |
---|
| 1944 | + |
---|
| 1945 | +static int __init blkcg_init(void) |
---|
| 1946 | +{ |
---|
| 1947 | + blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", |
---|
| 1948 | + WQ_MEM_RECLAIM | WQ_FREEZABLE | |
---|
| 1949 | + WQ_UNBOUND | WQ_SYSFS, 0); |
---|
| 1950 | + if (!blkcg_punt_bio_wq) |
---|
| 1951 | + return -ENOMEM; |
---|
| 1952 | + return 0; |
---|
| 1953 | +} |
---|
| 1954 | +subsys_initcall(blkcg_init); |
---|
1832 | 1955 | |
---|
1833 | 1956 | module_param(blkcg_debug_stats, bool, 0644); |
---|
1834 | 1957 | MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); |
---|