| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 2016 Facebook |
|---|
| 3 | 4 | * Copyright (C) 2013-2014 Jens Axboe |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or |
|---|
| 6 | | - * modify it under the terms of the GNU General Public |
|---|
| 7 | | - * License v2 as published by the Free Software Foundation. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 12 | | - * General Public License for more details. |
|---|
| 13 | | - * |
|---|
| 14 | | - * You should have received a copy of the GNU General Public License |
|---|
| 15 | | - * along with this program. If not, see <https://www.gnu.org/licenses/>. |
|---|
| 16 | 5 | */ |
|---|
| 17 | 6 | |
|---|
| 18 | 7 | #include <linux/sched.h> |
|---|
| 19 | 8 | #include <linux/random.h> |
|---|
| 20 | 9 | #include <linux/sbitmap.h> |
|---|
| 21 | 10 | #include <linux/seq_file.h> |
|---|
| 11 | + |
|---|
| 12 | +/* |
|---|
| 13 | + * See if we have deferred clears that we can batch move |
|---|
| 14 | + */ |
|---|
| 15 | +static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) |
|---|
| 16 | +{ |
|---|
| 17 | + unsigned long mask, val; |
|---|
| 18 | + bool ret = false; |
|---|
| 19 | + unsigned long flags; |
|---|
| 20 | + |
|---|
| 21 | + spin_lock_irqsave(&sb->map[index].swap_lock, flags); |
|---|
| 22 | + |
|---|
| 23 | + if (!sb->map[index].cleared) |
|---|
| 24 | + goto out_unlock; |
|---|
| 25 | + |
|---|
| 26 | + /* |
|---|
| 27 | + * First get a stable cleared mask, setting the old mask to 0. |
|---|
| 28 | + */ |
|---|
| 29 | + mask = xchg(&sb->map[index].cleared, 0); |
|---|
| 30 | + |
|---|
| 31 | + /* |
|---|
| 32 | + * Now clear the masked bits in our free word |
|---|
| 33 | + */ |
|---|
| 34 | + do { |
|---|
| 35 | + val = sb->map[index].word; |
|---|
| 36 | + } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); |
|---|
| 37 | + |
|---|
| 38 | + ret = true; |
|---|
| 39 | +out_unlock: |
|---|
| 40 | + spin_unlock_irqrestore(&sb->map[index].swap_lock, flags); |
|---|
| 41 | + return ret; |
|---|
| 42 | +} |
|---|
| 22 | 43 | |
|---|
| 23 | 44 | int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, |
|---|
| 24 | 45 | gfp_t flags, int node) |
|---|
| .. | .. |
|---|
| 59 | 80 | for (i = 0; i < sb->map_nr; i++) { |
|---|
| 60 | 81 | sb->map[i].depth = min(depth, bits_per_word); |
|---|
| 61 | 82 | depth -= sb->map[i].depth; |
|---|
| 83 | + spin_lock_init(&sb->map[i].swap_lock); |
|---|
| 62 | 84 | } |
|---|
| 63 | 85 | return 0; |
|---|
| 64 | 86 | } |
|---|
| .. | .. |
|---|
| 68 | 90 | { |
|---|
| 69 | 91 | unsigned int bits_per_word = 1U << sb->shift; |
|---|
| 70 | 92 | unsigned int i; |
|---|
| 93 | + |
|---|
| 94 | + for (i = 0; i < sb->map_nr; i++) |
|---|
| 95 | + sbitmap_deferred_clear(sb, i); |
|---|
| 71 | 96 | |
|---|
| 72 | 97 | sb->depth = depth; |
|---|
| 73 | 98 | sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); |
|---|
| .. | .. |
|---|
| 111 | 136 | return nr; |
|---|
| 112 | 137 | } |
|---|
| 113 | 138 | |
|---|
| 139 | +static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, |
|---|
| 140 | + unsigned int alloc_hint, bool round_robin) |
|---|
| 141 | +{ |
|---|
| 142 | + int nr; |
|---|
| 143 | + |
|---|
| 144 | + do { |
|---|
| 145 | + nr = __sbitmap_get_word(&sb->map[index].word, |
|---|
| 146 | + sb->map[index].depth, alloc_hint, |
|---|
| 147 | + !round_robin); |
|---|
| 148 | + if (nr != -1) |
|---|
| 149 | + break; |
|---|
| 150 | + if (!sbitmap_deferred_clear(sb, index)) |
|---|
| 151 | + break; |
|---|
| 152 | + } while (1); |
|---|
| 153 | + |
|---|
| 154 | + return nr; |
|---|
| 155 | +} |
|---|
| 156 | + |
|---|
| 114 | 157 | int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) |
|---|
| 115 | 158 | { |
|---|
| 116 | 159 | unsigned int i, index; |
|---|
| .. | .. |
|---|
| 118 | 161 | |
|---|
| 119 | 162 | index = SB_NR_TO_INDEX(sb, alloc_hint); |
|---|
| 120 | 163 | |
|---|
| 164 | + /* |
|---|
| 165 | + * Unless we're doing round robin tag allocation, just use the |
|---|
| 166 | + * alloc_hint to find the right word index. No point in looping |
|---|
| 167 | + * twice in find_next_zero_bit() for that case. |
|---|
| 168 | + */ |
|---|
| 169 | + if (round_robin) |
|---|
| 170 | + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); |
|---|
| 171 | + else |
|---|
| 172 | + alloc_hint = 0; |
|---|
| 173 | + |
|---|
| 121 | 174 | for (i = 0; i < sb->map_nr; i++) { |
|---|
| 122 | | - nr = __sbitmap_get_word(&sb->map[index].word, |
|---|
| 123 | | - sb->map[index].depth, |
|---|
| 124 | | - SB_NR_TO_BIT(sb, alloc_hint), |
|---|
| 125 | | - !round_robin); |
|---|
| 175 | + nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, |
|---|
| 176 | + round_robin); |
|---|
| 126 | 177 | if (nr != -1) { |
|---|
| 127 | 178 | nr += index << sb->shift; |
|---|
| 128 | 179 | break; |
|---|
| 129 | 180 | } |
|---|
| 130 | 181 | |
|---|
| 131 | 182 | /* Jump to next index. */ |
|---|
| 132 | | - index++; |
|---|
| 133 | | - alloc_hint = index << sb->shift; |
|---|
| 134 | | - |
|---|
| 135 | | - if (index >= sb->map_nr) { |
|---|
| 183 | + alloc_hint = 0; |
|---|
| 184 | + if (++index >= sb->map_nr) |
|---|
| 136 | 185 | index = 0; |
|---|
| 137 | | - alloc_hint = 0; |
|---|
| 138 | | - } |
|---|
| 139 | 186 | } |
|---|
| 140 | 187 | |
|---|
| 141 | 188 | return nr; |
|---|
| .. | .. |
|---|
| 151 | 198 | index = SB_NR_TO_INDEX(sb, alloc_hint); |
|---|
| 152 | 199 | |
|---|
| 153 | 200 | for (i = 0; i < sb->map_nr; i++) { |
|---|
| 201 | +again: |
|---|
| 154 | 202 | nr = __sbitmap_get_word(&sb->map[index].word, |
|---|
| 155 | 203 | min(sb->map[index].depth, shallow_depth), |
|---|
| 156 | 204 | SB_NR_TO_BIT(sb, alloc_hint), true); |
|---|
| .. | .. |
|---|
| 158 | 206 | nr += index << sb->shift; |
|---|
| 159 | 207 | break; |
|---|
| 160 | 208 | } |
|---|
| 209 | + |
|---|
| 210 | + if (sbitmap_deferred_clear(sb, index)) |
|---|
| 211 | + goto again; |
|---|
| 161 | 212 | |
|---|
| 162 | 213 | /* Jump to next index. */ |
|---|
| 163 | 214 | index++; |
|---|
| .. | .. |
|---|
| 178 | 229 | unsigned int i; |
|---|
| 179 | 230 | |
|---|
| 180 | 231 | for (i = 0; i < sb->map_nr; i++) { |
|---|
| 181 | | - if (sb->map[i].word) |
|---|
| 232 | + if (sb->map[i].word & ~sb->map[i].cleared) |
|---|
| 182 | 233 | return true; |
|---|
| 183 | 234 | } |
|---|
| 184 | 235 | return false; |
|---|
| 185 | 236 | } |
|---|
| 186 | 237 | EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); |
|---|
| 187 | 238 | |
|---|
| 188 | | -bool sbitmap_any_bit_clear(const struct sbitmap *sb) |
|---|
| 189 | | -{ |
|---|
| 190 | | - unsigned int i; |
|---|
| 191 | | - |
|---|
| 192 | | - for (i = 0; i < sb->map_nr; i++) { |
|---|
| 193 | | - const struct sbitmap_word *word = &sb->map[i]; |
|---|
| 194 | | - unsigned long ret; |
|---|
| 195 | | - |
|---|
| 196 | | - ret = find_first_zero_bit(&word->word, word->depth); |
|---|
| 197 | | - if (ret < word->depth) |
|---|
| 198 | | - return true; |
|---|
| 199 | | - } |
|---|
| 200 | | - return false; |
|---|
| 201 | | -} |
|---|
| 202 | | -EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); |
|---|
| 203 | | - |
|---|
| 204 | | -unsigned int sbitmap_weight(const struct sbitmap *sb) |
|---|
| 239 | +static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) |
|---|
| 205 | 240 | { |
|---|
| 206 | 241 | unsigned int i, weight = 0; |
|---|
| 207 | 242 | |
|---|
| 208 | 243 | for (i = 0; i < sb->map_nr; i++) { |
|---|
| 209 | 244 | const struct sbitmap_word *word = &sb->map[i]; |
|---|
| 210 | 245 | |
|---|
| 211 | | - weight += bitmap_weight(&word->word, word->depth); |
|---|
| 246 | + if (set) |
|---|
| 247 | + weight += bitmap_weight(&word->word, word->depth); |
|---|
| 248 | + else |
|---|
| 249 | + weight += bitmap_weight(&word->cleared, word->depth); |
|---|
| 212 | 250 | } |
|---|
| 213 | 251 | return weight; |
|---|
| 214 | 252 | } |
|---|
| 215 | | -EXPORT_SYMBOL_GPL(sbitmap_weight); |
|---|
| 253 | + |
|---|
| 254 | +static unsigned int sbitmap_weight(const struct sbitmap *sb) |
|---|
| 255 | +{ |
|---|
| 256 | + return __sbitmap_weight(sb, true); |
|---|
| 257 | +} |
|---|
| 258 | + |
|---|
| 259 | +static unsigned int sbitmap_cleared(const struct sbitmap *sb) |
|---|
| 260 | +{ |
|---|
| 261 | + return __sbitmap_weight(sb, false); |
|---|
| 262 | +} |
|---|
| 216 | 263 | |
|---|
| 217 | 264 | void sbitmap_show(struct sbitmap *sb, struct seq_file *m) |
|---|
| 218 | 265 | { |
|---|
| 219 | 266 | seq_printf(m, "depth=%u\n", sb->depth); |
|---|
| 220 | | - seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); |
|---|
| 267 | + seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); |
|---|
| 268 | + seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); |
|---|
| 221 | 269 | seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); |
|---|
| 222 | 270 | seq_printf(m, "map_nr=%u\n", sb->map_nr); |
|---|
| 223 | 271 | } |
|---|
| .. | .. |
|---|
| 244 | 292 | |
|---|
| 245 | 293 | for (i = 0; i < sb->map_nr; i++) { |
|---|
| 246 | 294 | unsigned long word = READ_ONCE(sb->map[i].word); |
|---|
| 295 | + unsigned long cleared = READ_ONCE(sb->map[i].cleared); |
|---|
| 247 | 296 | unsigned int word_bits = READ_ONCE(sb->map[i].depth); |
|---|
| 297 | + |
|---|
| 298 | + word &= ~cleared; |
|---|
| 248 | 299 | |
|---|
| 249 | 300 | while (word_bits > 0) { |
|---|
| 250 | 301 | unsigned int bits = min(8 - byte_bits, word_bits); |
|---|
| .. | .. |
|---|
| 325 | 376 | sbq->min_shallow_depth = UINT_MAX; |
|---|
| 326 | 377 | sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); |
|---|
| 327 | 378 | atomic_set(&sbq->wake_index, 0); |
|---|
| 379 | + atomic_set(&sbq->ws_active, 0); |
|---|
| 328 | 380 | |
|---|
| 329 | 381 | sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); |
|---|
| 330 | 382 | if (!sbq->ws) { |
|---|
| .. | .. |
|---|
| 440 | 492 | { |
|---|
| 441 | 493 | int i, wake_index; |
|---|
| 442 | 494 | |
|---|
| 495 | + if (!atomic_read(&sbq->ws_active)) |
|---|
| 496 | + return NULL; |
|---|
| 497 | + |
|---|
| 443 | 498 | wake_index = atomic_read(&sbq->wake_index); |
|---|
| 444 | 499 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { |
|---|
| 445 | 500 | struct sbq_wait_state *ws = &sbq->ws[wake_index]; |
|---|
| 446 | 501 | |
|---|
| 447 | 502 | if (waitqueue_active(&ws->wait)) { |
|---|
| 448 | | - int o = atomic_read(&sbq->wake_index); |
|---|
| 449 | | - |
|---|
| 450 | | - if (wake_index != o) |
|---|
| 451 | | - atomic_cmpxchg(&sbq->wake_index, o, wake_index); |
|---|
| 503 | + if (wake_index != atomic_read(&sbq->wake_index)) |
|---|
| 504 | + atomic_set(&sbq->wake_index, wake_index); |
|---|
| 452 | 505 | return ws; |
|---|
| 453 | 506 | } |
|---|
| 454 | 507 | |
|---|
| .. | .. |
|---|
| 509 | 562 | void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, |
|---|
| 510 | 563 | unsigned int cpu) |
|---|
| 511 | 564 | { |
|---|
| 512 | | - sbitmap_clear_bit_unlock(&sbq->sb, nr); |
|---|
| 565 | + /* |
|---|
| 566 | + * Once the clear bit is set, the bit may be allocated out. |
|---|
| 567 | + * |
|---|
| 568 | + * Orders READ/WRITE on the asssociated instance(such as request |
|---|
| 569 | + * of blk_mq) by this bit for avoiding race with re-allocation, |
|---|
| 570 | + * and its pair is the memory barrier implied in __sbitmap_get_word. |
|---|
| 571 | + * |
|---|
| 572 | + * One invariant is that the clear bit has to be zero when the bit |
|---|
| 573 | + * is in use. |
|---|
| 574 | + */ |
|---|
| 575 | + smp_mb__before_atomic(); |
|---|
| 576 | + sbitmap_deferred_clear_bit(&sbq->sb, nr); |
|---|
| 577 | + |
|---|
| 513 | 578 | /* |
|---|
| 514 | 579 | * Pairs with the memory barrier in set_current_state() to ensure the |
|---|
| 515 | 580 | * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker |
|---|
| .. | .. |
|---|
| 564 | 629 | |
|---|
| 565 | 630 | seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); |
|---|
| 566 | 631 | seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); |
|---|
| 632 | + seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); |
|---|
| 567 | 633 | |
|---|
| 568 | 634 | seq_puts(m, "ws={\n"); |
|---|
| 569 | 635 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { |
|---|
| .. | .. |
|---|
| 579 | 645 | seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); |
|---|
| 580 | 646 | } |
|---|
| 581 | 647 | EXPORT_SYMBOL_GPL(sbitmap_queue_show); |
|---|
| 648 | + |
|---|
| 649 | +void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, |
|---|
| 650 | + struct sbq_wait_state *ws, |
|---|
| 651 | + struct sbq_wait *sbq_wait) |
|---|
| 652 | +{ |
|---|
| 653 | + if (!sbq_wait->sbq) { |
|---|
| 654 | + sbq_wait->sbq = sbq; |
|---|
| 655 | + atomic_inc(&sbq->ws_active); |
|---|
| 656 | + add_wait_queue(&ws->wait, &sbq_wait->wait); |
|---|
| 657 | + } |
|---|
| 658 | +} |
|---|
| 659 | +EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); |
|---|
| 660 | + |
|---|
| 661 | +void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) |
|---|
| 662 | +{ |
|---|
| 663 | + list_del_init(&sbq_wait->wait.entry); |
|---|
| 664 | + if (sbq_wait->sbq) { |
|---|
| 665 | + atomic_dec(&sbq_wait->sbq->ws_active); |
|---|
| 666 | + sbq_wait->sbq = NULL; |
|---|
| 667 | + } |
|---|
| 668 | +} |
|---|
| 669 | +EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); |
|---|
| 670 | + |
|---|
| 671 | +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, |
|---|
| 672 | + struct sbq_wait_state *ws, |
|---|
| 673 | + struct sbq_wait *sbq_wait, int state) |
|---|
| 674 | +{ |
|---|
| 675 | + if (!sbq_wait->sbq) { |
|---|
| 676 | + atomic_inc(&sbq->ws_active); |
|---|
| 677 | + sbq_wait->sbq = sbq; |
|---|
| 678 | + } |
|---|
| 679 | + prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); |
|---|
| 680 | +} |
|---|
| 681 | +EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); |
|---|
| 682 | + |
|---|
| 683 | +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, |
|---|
| 684 | + struct sbq_wait *sbq_wait) |
|---|
| 685 | +{ |
|---|
| 686 | + finish_wait(&ws->wait, &sbq_wait->wait); |
|---|
| 687 | + if (sbq_wait->sbq) { |
|---|
| 688 | + atomic_dec(&sbq->ws_active); |
|---|
| 689 | + sbq_wait->sbq = NULL; |
|---|
| 690 | + } |
|---|
| 691 | +} |
|---|
| 692 | +EXPORT_SYMBOL_GPL(sbitmap_finish_wait); |
|---|