hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/lib/sbitmap.c
....@@ -1,24 +1,45 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) 2016 Facebook
34 * Copyright (C) 2013-2014 Jens Axboe
4
- *
5
- * This program is free software; you can redistribute it and/or
6
- * modify it under the terms of the GNU General Public
7
- * License v2 as published by the Free Software Foundation.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
- * General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
165 */
176
187 #include <linux/sched.h>
198 #include <linux/random.h>
209 #include <linux/sbitmap.h>
2110 #include <linux/seq_file.h>
11
+
12
+/*
13
+ * See if we have deferred clears that we can batch move
14
+ */
15
+static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
16
+{
17
+ unsigned long mask, val;
18
+ bool ret = false;
19
+ unsigned long flags;
20
+
21
+ spin_lock_irqsave(&sb->map[index].swap_lock, flags);
22
+
23
+ if (!sb->map[index].cleared)
24
+ goto out_unlock;
25
+
26
+ /*
27
+ * First get a stable cleared mask, setting the old mask to 0.
28
+ */
29
+ mask = xchg(&sb->map[index].cleared, 0);
30
+
31
+ /*
32
+ * Now clear the masked bits in our free word
33
+ */
34
+ do {
35
+ val = sb->map[index].word;
36
+ } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val);
37
+
38
+ ret = true;
39
+out_unlock:
40
+ spin_unlock_irqrestore(&sb->map[index].swap_lock, flags);
41
+ return ret;
42
+}
2243
2344 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
2445 gfp_t flags, int node)
....@@ -59,6 +80,7 @@
5980 for (i = 0; i < sb->map_nr; i++) {
6081 sb->map[i].depth = min(depth, bits_per_word);
6182 depth -= sb->map[i].depth;
83
+ spin_lock_init(&sb->map[i].swap_lock);
6284 }
6385 return 0;
6486 }
....@@ -68,6 +90,9 @@
6890 {
6991 unsigned int bits_per_word = 1U << sb->shift;
7092 unsigned int i;
93
+
94
+ for (i = 0; i < sb->map_nr; i++)
95
+ sbitmap_deferred_clear(sb, i);
7196
7297 sb->depth = depth;
7398 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
....@@ -111,6 +136,24 @@
111136 return nr;
112137 }
113138
139
+static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
140
+ unsigned int alloc_hint, bool round_robin)
141
+{
142
+ int nr;
143
+
144
+ do {
145
+ nr = __sbitmap_get_word(&sb->map[index].word,
146
+ sb->map[index].depth, alloc_hint,
147
+ !round_robin);
148
+ if (nr != -1)
149
+ break;
150
+ if (!sbitmap_deferred_clear(sb, index))
151
+ break;
152
+ } while (1);
153
+
154
+ return nr;
155
+}
156
+
114157 int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
115158 {
116159 unsigned int i, index;
....@@ -118,24 +161,28 @@
118161
119162 index = SB_NR_TO_INDEX(sb, alloc_hint);
120163
164
+ /*
165
+ * Unless we're doing round robin tag allocation, just use the
166
+ * alloc_hint to find the right word index. No point in looping
167
+ * twice in find_next_zero_bit() for that case.
168
+ */
169
+ if (round_robin)
170
+ alloc_hint = SB_NR_TO_BIT(sb, alloc_hint);
171
+ else
172
+ alloc_hint = 0;
173
+
121174 for (i = 0; i < sb->map_nr; i++) {
122
- nr = __sbitmap_get_word(&sb->map[index].word,
123
- sb->map[index].depth,
124
- SB_NR_TO_BIT(sb, alloc_hint),
125
- !round_robin);
175
+ nr = sbitmap_find_bit_in_index(sb, index, alloc_hint,
176
+ round_robin);
126177 if (nr != -1) {
127178 nr += index << sb->shift;
128179 break;
129180 }
130181
131182 /* Jump to next index. */
132
- index++;
133
- alloc_hint = index << sb->shift;
134
-
135
- if (index >= sb->map_nr) {
183
+ alloc_hint = 0;
184
+ if (++index >= sb->map_nr)
136185 index = 0;
137
- alloc_hint = 0;
138
- }
139186 }
140187
141188 return nr;
....@@ -151,6 +198,7 @@
151198 index = SB_NR_TO_INDEX(sb, alloc_hint);
152199
153200 for (i = 0; i < sb->map_nr; i++) {
201
+again:
154202 nr = __sbitmap_get_word(&sb->map[index].word,
155203 min(sb->map[index].depth, shallow_depth),
156204 SB_NR_TO_BIT(sb, alloc_hint), true);
....@@ -158,6 +206,9 @@
158206 nr += index << sb->shift;
159207 break;
160208 }
209
+
210
+ if (sbitmap_deferred_clear(sb, index))
211
+ goto again;
161212
162213 /* Jump to next index. */
163214 index++;
....@@ -178,46 +229,43 @@
178229 unsigned int i;
179230
180231 for (i = 0; i < sb->map_nr; i++) {
181
- if (sb->map[i].word)
232
+ if (sb->map[i].word & ~sb->map[i].cleared)
182233 return true;
183234 }
184235 return false;
185236 }
186237 EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
187238
188
-bool sbitmap_any_bit_clear(const struct sbitmap *sb)
189
-{
190
- unsigned int i;
191
-
192
- for (i = 0; i < sb->map_nr; i++) {
193
- const struct sbitmap_word *word = &sb->map[i];
194
- unsigned long ret;
195
-
196
- ret = find_first_zero_bit(&word->word, word->depth);
197
- if (ret < word->depth)
198
- return true;
199
- }
200
- return false;
201
-}
202
-EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
203
-
204
-unsigned int sbitmap_weight(const struct sbitmap *sb)
239
+static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
205240 {
206241 unsigned int i, weight = 0;
207242
208243 for (i = 0; i < sb->map_nr; i++) {
209244 const struct sbitmap_word *word = &sb->map[i];
210245
211
- weight += bitmap_weight(&word->word, word->depth);
246
+ if (set)
247
+ weight += bitmap_weight(&word->word, word->depth);
248
+ else
249
+ weight += bitmap_weight(&word->cleared, word->depth);
212250 }
213251 return weight;
214252 }
215
-EXPORT_SYMBOL_GPL(sbitmap_weight);
253
+
254
+static unsigned int sbitmap_weight(const struct sbitmap *sb)
255
+{
256
+ return __sbitmap_weight(sb, true);
257
+}
258
+
259
+static unsigned int sbitmap_cleared(const struct sbitmap *sb)
260
+{
261
+ return __sbitmap_weight(sb, false);
262
+}
216263
217264 void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
218265 {
219266 seq_printf(m, "depth=%u\n", sb->depth);
220
- seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
267
+ seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb));
268
+ seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb));
221269 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
222270 seq_printf(m, "map_nr=%u\n", sb->map_nr);
223271 }
....@@ -244,7 +292,10 @@
244292
245293 for (i = 0; i < sb->map_nr; i++) {
246294 unsigned long word = READ_ONCE(sb->map[i].word);
295
+ unsigned long cleared = READ_ONCE(sb->map[i].cleared);
247296 unsigned int word_bits = READ_ONCE(sb->map[i].depth);
297
+
298
+ word &= ~cleared;
248299
249300 while (word_bits > 0) {
250301 unsigned int bits = min(8 - byte_bits, word_bits);
....@@ -325,6 +376,7 @@
325376 sbq->min_shallow_depth = UINT_MAX;
326377 sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
327378 atomic_set(&sbq->wake_index, 0);
379
+ atomic_set(&sbq->ws_active, 0);
328380
329381 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
330382 if (!sbq->ws) {
....@@ -440,15 +492,16 @@
440492 {
441493 int i, wake_index;
442494
495
+ if (!atomic_read(&sbq->ws_active))
496
+ return NULL;
497
+
443498 wake_index = atomic_read(&sbq->wake_index);
444499 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
445500 struct sbq_wait_state *ws = &sbq->ws[wake_index];
446501
447502 if (waitqueue_active(&ws->wait)) {
448
- int o = atomic_read(&sbq->wake_index);
449
-
450
- if (wake_index != o)
451
- atomic_cmpxchg(&sbq->wake_index, o, wake_index);
503
+ if (wake_index != atomic_read(&sbq->wake_index))
504
+ atomic_set(&sbq->wake_index, wake_index);
452505 return ws;
453506 }
454507
....@@ -509,7 +562,19 @@
509562 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
510563 unsigned int cpu)
511564 {
512
- sbitmap_clear_bit_unlock(&sbq->sb, nr);
565
+ /*
566
+ * Once the clear bit is set, the bit may be allocated out.
567
+ *
568
+ * Orders READ/WRITE on the asssociated instance(such as request
569
+ * of blk_mq) by this bit for avoiding race with re-allocation,
570
+ * and its pair is the memory barrier implied in __sbitmap_get_word.
571
+ *
572
+ * One invariant is that the clear bit has to be zero when the bit
573
+ * is in use.
574
+ */
575
+ smp_mb__before_atomic();
576
+ sbitmap_deferred_clear_bit(&sbq->sb, nr);
577
+
513578 /*
514579 * Pairs with the memory barrier in set_current_state() to ensure the
515580 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
....@@ -564,6 +629,7 @@
564629
565630 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
566631 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
632
+ seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active));
567633
568634 seq_puts(m, "ws={\n");
569635 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
....@@ -579,3 +645,48 @@
579645 seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
580646 }
581647 EXPORT_SYMBOL_GPL(sbitmap_queue_show);
648
+
649
+void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
650
+ struct sbq_wait_state *ws,
651
+ struct sbq_wait *sbq_wait)
652
+{
653
+ if (!sbq_wait->sbq) {
654
+ sbq_wait->sbq = sbq;
655
+ atomic_inc(&sbq->ws_active);
656
+ add_wait_queue(&ws->wait, &sbq_wait->wait);
657
+ }
658
+}
659
+EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);
660
+
661
+void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait)
662
+{
663
+ list_del_init(&sbq_wait->wait.entry);
664
+ if (sbq_wait->sbq) {
665
+ atomic_dec(&sbq_wait->sbq->ws_active);
666
+ sbq_wait->sbq = NULL;
667
+ }
668
+}
669
+EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue);
670
+
671
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
672
+ struct sbq_wait_state *ws,
673
+ struct sbq_wait *sbq_wait, int state)
674
+{
675
+ if (!sbq_wait->sbq) {
676
+ atomic_inc(&sbq->ws_active);
677
+ sbq_wait->sbq = sbq;
678
+ }
679
+ prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
680
+}
681
+EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);
682
+
683
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
684
+ struct sbq_wait *sbq_wait)
685
+{
686
+ finish_wait(&ws->wait, &sbq_wait->wait);
687
+ if (sbq_wait->sbq) {
688
+ atomic_dec(&sbq->ws_active);
689
+ sbq_wait->sbq = NULL;
690
+ }
691
+}
692
+EXPORT_SYMBOL_GPL(sbitmap_finish_wait);