.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2016 Facebook |
---|
3 | 4 | * Copyright (C) 2013-2014 Jens Axboe |
---|
4 | | - * |
---|
5 | | - * This program is free software; you can redistribute it and/or |
---|
6 | | - * modify it under the terms of the GNU General Public |
---|
7 | | - * License v2 as published by the Free Software Foundation. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope that it will be useful, |
---|
10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
12 | | - * General Public License for more details. |
---|
13 | | - * |
---|
14 | | - * You should have received a copy of the GNU General Public License |
---|
15 | | - * along with this program. If not, see <https://www.gnu.org/licenses/>. |
---|
16 | 5 | */ |
---|
17 | 6 | |
---|
18 | 7 | #include <linux/sched.h> |
---|
19 | 8 | #include <linux/random.h> |
---|
20 | 9 | #include <linux/sbitmap.h> |
---|
21 | 10 | #include <linux/seq_file.h> |
---|
| 11 | + |
---|
| 12 | +/* |
---|
| 13 | + * See if we have deferred clears that we can batch move |
---|
| 14 | + */ |
---|
| 15 | +static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) |
---|
| 16 | +{ |
---|
| 17 | + unsigned long mask, val; |
---|
| 18 | + bool ret = false; |
---|
| 19 | + unsigned long flags; |
---|
| 20 | + |
---|
| 21 | + spin_lock_irqsave(&sb->map[index].swap_lock, flags); |
---|
| 22 | + |
---|
| 23 | + if (!sb->map[index].cleared) |
---|
| 24 | + goto out_unlock; |
---|
| 25 | + |
---|
| 26 | + /* |
---|
| 27 | + * First get a stable cleared mask, setting the old mask to 0. |
---|
| 28 | + */ |
---|
| 29 | + mask = xchg(&sb->map[index].cleared, 0); |
---|
| 30 | + |
---|
| 31 | + /* |
---|
| 32 | + * Now clear the masked bits in our free word |
---|
| 33 | + */ |
---|
| 34 | + do { |
---|
| 35 | + val = sb->map[index].word; |
---|
| 36 | + } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); |
---|
| 37 | + |
---|
| 38 | + ret = true; |
---|
| 39 | +out_unlock: |
---|
| 40 | + spin_unlock_irqrestore(&sb->map[index].swap_lock, flags); |
---|
| 41 | + return ret; |
---|
| 42 | +} |
---|
22 | 43 | |
---|
23 | 44 | int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, |
---|
24 | 45 | gfp_t flags, int node) |
---|
.. | .. |
---|
59 | 80 | for (i = 0; i < sb->map_nr; i++) { |
---|
60 | 81 | sb->map[i].depth = min(depth, bits_per_word); |
---|
61 | 82 | depth -= sb->map[i].depth; |
---|
| 83 | + spin_lock_init(&sb->map[i].swap_lock); |
---|
62 | 84 | } |
---|
63 | 85 | return 0; |
---|
64 | 86 | } |
---|
.. | .. |
---|
68 | 90 | { |
---|
69 | 91 | unsigned int bits_per_word = 1U << sb->shift; |
---|
70 | 92 | unsigned int i; |
---|
| 93 | + |
---|
| 94 | + for (i = 0; i < sb->map_nr; i++) |
---|
| 95 | + sbitmap_deferred_clear(sb, i); |
---|
71 | 96 | |
---|
72 | 97 | sb->depth = depth; |
---|
73 | 98 | sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); |
---|
.. | .. |
---|
111 | 136 | return nr; |
---|
112 | 137 | } |
---|
113 | 138 | |
---|
| 139 | +static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, |
---|
| 140 | + unsigned int alloc_hint, bool round_robin) |
---|
| 141 | +{ |
---|
| 142 | + int nr; |
---|
| 143 | + |
---|
| 144 | + do { |
---|
| 145 | + nr = __sbitmap_get_word(&sb->map[index].word, |
---|
| 146 | + sb->map[index].depth, alloc_hint, |
---|
| 147 | + !round_robin); |
---|
| 148 | + if (nr != -1) |
---|
| 149 | + break; |
---|
| 150 | + if (!sbitmap_deferred_clear(sb, index)) |
---|
| 151 | + break; |
---|
| 152 | + } while (1); |
---|
| 153 | + |
---|
| 154 | + return nr; |
---|
| 155 | +} |
---|
| 156 | + |
---|
114 | 157 | int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) |
---|
115 | 158 | { |
---|
116 | 159 | unsigned int i, index; |
---|
.. | .. |
---|
118 | 161 | |
---|
119 | 162 | index = SB_NR_TO_INDEX(sb, alloc_hint); |
---|
120 | 163 | |
---|
| 164 | + /* |
---|
| 165 | + * Unless we're doing round robin tag allocation, just use the |
---|
| 166 | + * alloc_hint to find the right word index. No point in looping |
---|
| 167 | + * twice in find_next_zero_bit() for that case. |
---|
| 168 | + */ |
---|
| 169 | + if (round_robin) |
---|
| 170 | + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); |
---|
| 171 | + else |
---|
| 172 | + alloc_hint = 0; |
---|
| 173 | + |
---|
121 | 174 | for (i = 0; i < sb->map_nr; i++) { |
---|
122 | | - nr = __sbitmap_get_word(&sb->map[index].word, |
---|
123 | | - sb->map[index].depth, |
---|
124 | | - SB_NR_TO_BIT(sb, alloc_hint), |
---|
125 | | - !round_robin); |
---|
| 175 | + nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, |
---|
| 176 | + round_robin); |
---|
126 | 177 | if (nr != -1) { |
---|
127 | 178 | nr += index << sb->shift; |
---|
128 | 179 | break; |
---|
129 | 180 | } |
---|
130 | 181 | |
---|
131 | 182 | /* Jump to next index. */ |
---|
132 | | - index++; |
---|
133 | | - alloc_hint = index << sb->shift; |
---|
134 | | - |
---|
135 | | - if (index >= sb->map_nr) { |
---|
| 183 | + alloc_hint = 0; |
---|
| 184 | + if (++index >= sb->map_nr) |
---|
136 | 185 | index = 0; |
---|
137 | | - alloc_hint = 0; |
---|
138 | | - } |
---|
139 | 186 | } |
---|
140 | 187 | |
---|
141 | 188 | return nr; |
---|
.. | .. |
---|
151 | 198 | index = SB_NR_TO_INDEX(sb, alloc_hint); |
---|
152 | 199 | |
---|
153 | 200 | for (i = 0; i < sb->map_nr; i++) { |
---|
| 201 | +again: |
---|
154 | 202 | nr = __sbitmap_get_word(&sb->map[index].word, |
---|
155 | 203 | min(sb->map[index].depth, shallow_depth), |
---|
156 | 204 | SB_NR_TO_BIT(sb, alloc_hint), true); |
---|
.. | .. |
---|
158 | 206 | nr += index << sb->shift; |
---|
159 | 207 | break; |
---|
160 | 208 | } |
---|
| 209 | + |
---|
| 210 | + if (sbitmap_deferred_clear(sb, index)) |
---|
| 211 | + goto again; |
---|
161 | 212 | |
---|
162 | 213 | /* Jump to next index. */ |
---|
163 | 214 | index++; |
---|
.. | .. |
---|
178 | 229 | unsigned int i; |
---|
179 | 230 | |
---|
180 | 231 | for (i = 0; i < sb->map_nr; i++) { |
---|
181 | | - if (sb->map[i].word) |
---|
| 232 | + if (sb->map[i].word & ~sb->map[i].cleared) |
---|
182 | 233 | return true; |
---|
183 | 234 | } |
---|
184 | 235 | return false; |
---|
185 | 236 | } |
---|
186 | 237 | EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); |
---|
187 | 238 | |
---|
188 | | -bool sbitmap_any_bit_clear(const struct sbitmap *sb) |
---|
189 | | -{ |
---|
190 | | - unsigned int i; |
---|
191 | | - |
---|
192 | | - for (i = 0; i < sb->map_nr; i++) { |
---|
193 | | - const struct sbitmap_word *word = &sb->map[i]; |
---|
194 | | - unsigned long ret; |
---|
195 | | - |
---|
196 | | - ret = find_first_zero_bit(&word->word, word->depth); |
---|
197 | | - if (ret < word->depth) |
---|
198 | | - return true; |
---|
199 | | - } |
---|
200 | | - return false; |
---|
201 | | -} |
---|
202 | | -EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); |
---|
203 | | - |
---|
204 | | -unsigned int sbitmap_weight(const struct sbitmap *sb) |
---|
| 239 | +static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) |
---|
205 | 240 | { |
---|
206 | 241 | unsigned int i, weight = 0; |
---|
207 | 242 | |
---|
208 | 243 | for (i = 0; i < sb->map_nr; i++) { |
---|
209 | 244 | const struct sbitmap_word *word = &sb->map[i]; |
---|
210 | 245 | |
---|
211 | | - weight += bitmap_weight(&word->word, word->depth); |
---|
| 246 | + if (set) |
---|
| 247 | + weight += bitmap_weight(&word->word, word->depth); |
---|
| 248 | + else |
---|
| 249 | + weight += bitmap_weight(&word->cleared, word->depth); |
---|
212 | 250 | } |
---|
213 | 251 | return weight; |
---|
214 | 252 | } |
---|
215 | | -EXPORT_SYMBOL_GPL(sbitmap_weight); |
---|
| 253 | + |
---|
| 254 | +static unsigned int sbitmap_weight(const struct sbitmap *sb) |
---|
| 255 | +{ |
---|
| 256 | + return __sbitmap_weight(sb, true); |
---|
| 257 | +} |
---|
| 258 | + |
---|
| 259 | +static unsigned int sbitmap_cleared(const struct sbitmap *sb) |
---|
| 260 | +{ |
---|
| 261 | + return __sbitmap_weight(sb, false); |
---|
| 262 | +} |
---|
216 | 263 | |
---|
217 | 264 | void sbitmap_show(struct sbitmap *sb, struct seq_file *m) |
---|
218 | 265 | { |
---|
219 | 266 | seq_printf(m, "depth=%u\n", sb->depth); |
---|
220 | | - seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); |
---|
| 267 | + seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); |
---|
| 268 | + seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); |
---|
221 | 269 | seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); |
---|
222 | 270 | seq_printf(m, "map_nr=%u\n", sb->map_nr); |
---|
223 | 271 | } |
---|
.. | .. |
---|
244 | 292 | |
---|
245 | 293 | for (i = 0; i < sb->map_nr; i++) { |
---|
246 | 294 | unsigned long word = READ_ONCE(sb->map[i].word); |
---|
| 295 | + unsigned long cleared = READ_ONCE(sb->map[i].cleared); |
---|
247 | 296 | unsigned int word_bits = READ_ONCE(sb->map[i].depth); |
---|
| 297 | + |
---|
| 298 | + word &= ~cleared; |
---|
248 | 299 | |
---|
249 | 300 | while (word_bits > 0) { |
---|
250 | 301 | unsigned int bits = min(8 - byte_bits, word_bits); |
---|
.. | .. |
---|
325 | 376 | sbq->min_shallow_depth = UINT_MAX; |
---|
326 | 377 | sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); |
---|
327 | 378 | atomic_set(&sbq->wake_index, 0); |
---|
| 379 | + atomic_set(&sbq->ws_active, 0); |
---|
328 | 380 | |
---|
329 | 381 | sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); |
---|
330 | 382 | if (!sbq->ws) { |
---|
.. | .. |
---|
440 | 492 | { |
---|
441 | 493 | int i, wake_index; |
---|
442 | 494 | |
---|
| 495 | + if (!atomic_read(&sbq->ws_active)) |
---|
| 496 | + return NULL; |
---|
| 497 | + |
---|
443 | 498 | wake_index = atomic_read(&sbq->wake_index); |
---|
444 | 499 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { |
---|
445 | 500 | struct sbq_wait_state *ws = &sbq->ws[wake_index]; |
---|
446 | 501 | |
---|
447 | 502 | if (waitqueue_active(&ws->wait)) { |
---|
448 | | - int o = atomic_read(&sbq->wake_index); |
---|
449 | | - |
---|
450 | | - if (wake_index != o) |
---|
451 | | - atomic_cmpxchg(&sbq->wake_index, o, wake_index); |
---|
| 503 | + if (wake_index != atomic_read(&sbq->wake_index)) |
---|
| 504 | + atomic_set(&sbq->wake_index, wake_index); |
---|
452 | 505 | return ws; |
---|
453 | 506 | } |
---|
454 | 507 | |
---|
.. | .. |
---|
509 | 562 | void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, |
---|
510 | 563 | unsigned int cpu) |
---|
511 | 564 | { |
---|
512 | | - sbitmap_clear_bit_unlock(&sbq->sb, nr); |
---|
| 565 | + /* |
---|
| 566 | + * Once the clear bit is set, the bit may be allocated out. |
---|
| 567 | + * |
---|
| 568 | + * Orders READ/WRITE on the asssociated instance(such as request |
---|
| 569 | + * of blk_mq) by this bit for avoiding race with re-allocation, |
---|
| 570 | + * and its pair is the memory barrier implied in __sbitmap_get_word. |
---|
| 571 | + * |
---|
| 572 | + * One invariant is that the clear bit has to be zero when the bit |
---|
| 573 | + * is in use. |
---|
| 574 | + */ |
---|
| 575 | + smp_mb__before_atomic(); |
---|
| 576 | + sbitmap_deferred_clear_bit(&sbq->sb, nr); |
---|
| 577 | + |
---|
513 | 578 | /* |
---|
514 | 579 | * Pairs with the memory barrier in set_current_state() to ensure the |
---|
515 | 580 | * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker |
---|
.. | .. |
---|
564 | 629 | |
---|
565 | 630 | seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); |
---|
566 | 631 | seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); |
---|
| 632 | + seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); |
---|
567 | 633 | |
---|
568 | 634 | seq_puts(m, "ws={\n"); |
---|
569 | 635 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { |
---|
.. | .. |
---|
579 | 645 | seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); |
---|
580 | 646 | } |
---|
581 | 647 | EXPORT_SYMBOL_GPL(sbitmap_queue_show); |
---|
| 648 | + |
---|
| 649 | +void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, |
---|
| 650 | + struct sbq_wait_state *ws, |
---|
| 651 | + struct sbq_wait *sbq_wait) |
---|
| 652 | +{ |
---|
| 653 | + if (!sbq_wait->sbq) { |
---|
| 654 | + sbq_wait->sbq = sbq; |
---|
| 655 | + atomic_inc(&sbq->ws_active); |
---|
| 656 | + add_wait_queue(&ws->wait, &sbq_wait->wait); |
---|
| 657 | + } |
---|
| 658 | +} |
---|
| 659 | +EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); |
---|
| 660 | + |
---|
| 661 | +void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) |
---|
| 662 | +{ |
---|
| 663 | + list_del_init(&sbq_wait->wait.entry); |
---|
| 664 | + if (sbq_wait->sbq) { |
---|
| 665 | + atomic_dec(&sbq_wait->sbq->ws_active); |
---|
| 666 | + sbq_wait->sbq = NULL; |
---|
| 667 | + } |
---|
| 668 | +} |
---|
| 669 | +EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); |
---|
| 670 | + |
---|
| 671 | +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, |
---|
| 672 | + struct sbq_wait_state *ws, |
---|
| 673 | + struct sbq_wait *sbq_wait, int state) |
---|
| 674 | +{ |
---|
| 675 | + if (!sbq_wait->sbq) { |
---|
| 676 | + atomic_inc(&sbq->ws_active); |
---|
| 677 | + sbq_wait->sbq = sbq; |
---|
| 678 | + } |
---|
| 679 | + prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); |
---|
| 680 | +} |
---|
| 681 | +EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); |
---|
| 682 | + |
---|
| 683 | +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, |
---|
| 684 | + struct sbq_wait *sbq_wait) |
---|
| 685 | +{ |
---|
| 686 | + finish_wait(&ws->wait, &sbq_wait->wait); |
---|
| 687 | + if (sbq_wait->sbq) { |
---|
| 688 | + atomic_dec(&sbq->ws_active); |
---|
| 689 | + sbq_wait->sbq = NULL; |
---|
| 690 | + } |
---|
| 691 | +} |
---|
| 692 | +EXPORT_SYMBOL_GPL(sbitmap_finish_wait); |
---|