hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/lib/rhashtable.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Resizable, Scalable, Concurrent Hash Table
34 *
....@@ -8,10 +9,6 @@
89 * Code partially derived from nft_hash
910 * Rewritten with rehash code from br_multicast plus single list
1011 * pointer as suggested by Josh Triplett
11
- *
12
- * This program is free software; you can redistribute it and/or modify
13
- * it under the terms of the GNU General Public License version 2 as
14
- * published by the Free Software Foundation.
1512 */
1613
1714 #include <linux/atomic.h>
....@@ -31,11 +28,10 @@
3128
3229 #define HASH_DEFAULT_SIZE 64UL
3330 #define HASH_MIN_SIZE 4U
34
-#define BUCKET_LOCKS_PER_CPU 32UL
3531
3632 union nested_table {
3733 union nested_table __rcu *table;
38
- struct rhash_head __rcu *bucket;
34
+ struct rhash_lock_head __rcu *bucket;
3935 };
4036
4137 static u32 head_hashfn(struct rhashtable *ht,
....@@ -56,14 +52,25 @@
5652
5753 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
5854 {
59
- spinlock_t *lock = rht_bucket_lock(tbl, hash);
60
-
61
- return (debug_locks) ? lockdep_is_held(lock) : 1;
55
+ if (!debug_locks)
56
+ return 1;
57
+ if (unlikely(tbl->nest))
58
+ return 1;
59
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
6260 }
6361 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
6462 #else
6563 #define ASSERT_RHT_MUTEX(HT)
6664 #endif
65
+
66
+static inline union nested_table *nested_table_top(
67
+ const struct bucket_table *tbl)
68
+{
69
+ /* The top-level bucket entry does not need RCU protection
70
+ * because it's set at the same time as tbl->nest.
71
+ */
72
+ return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
73
+}
6774
6875 static void nested_table_free(union nested_table *ntbl, unsigned int size)
6976 {
....@@ -71,7 +78,7 @@
7178 const unsigned int len = 1 << shift;
7279 unsigned int i;
7380
74
- ntbl = rcu_dereference_raw(ntbl->table);
81
+ ntbl = rcu_dereference_protected(ntbl->table, 1);
7582 if (!ntbl)
7683 return;
7784
....@@ -91,7 +98,7 @@
9198 union nested_table *ntbl;
9299 unsigned int i;
93100
94
- ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
101
+ ntbl = nested_table_top(tbl);
95102
96103 for (i = 0; i < len; i++)
97104 nested_table_free(ntbl + i, size);
....@@ -104,7 +111,6 @@
104111 if (tbl->nest)
105112 nested_bucket_table_free(tbl);
106113
107
- free_bucket_spinlocks(tbl->locks);
108114 kvfree(tbl);
109115 }
110116
....@@ -131,9 +137,11 @@
131137 INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
132138 }
133139
134
- rcu_assign_pointer(*prev, ntbl);
135
-
136
- return ntbl;
140
+ if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
141
+ return ntbl;
142
+ /* Raced with another thread. */
143
+ kfree(ntbl);
144
+ return rcu_dereference(*prev);
137145 }
138146
139147 static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
....@@ -169,11 +177,11 @@
169177 gfp_t gfp)
170178 {
171179 struct bucket_table *tbl = NULL;
172
- size_t size, max_locks;
180
+ size_t size;
173181 int i;
182
+ static struct lock_class_key __key;
174183
175
- size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
176
- tbl = kvzalloc(size, gfp);
184
+ tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
177185
178186 size = nbuckets;
179187
....@@ -185,18 +193,11 @@
185193 if (tbl == NULL)
186194 return NULL;
187195
196
+ lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
197
+
188198 tbl->size = size;
189199
190
- max_locks = size >> 1;
191
- if (tbl->nest)
192
- max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
193
-
194
- if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
195
- ht->p.locks_mul, gfp) < 0) {
196
- bucket_table_free(tbl);
197
- return NULL;
198
- }
199
-
200
+ rcu_head_init(&tbl->rcu);
200201 INIT_LIST_HEAD(&tbl->walkers);
201202
202203 tbl->hash_rnd = get_random_u32();
....@@ -220,14 +221,15 @@
220221 return new_tbl;
221222 }
222223
223
-static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
224
+static int rhashtable_rehash_one(struct rhashtable *ht,
225
+ struct rhash_lock_head __rcu **bkt,
226
+ unsigned int old_hash)
224227 {
225228 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
226229 struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
227
- struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
228230 int err = -EAGAIN;
229231 struct rhash_head *head, *next, *entry;
230
- spinlock_t *new_bucket_lock;
232
+ struct rhash_head __rcu **pprev = NULL;
231233 unsigned int new_hash;
232234
233235 if (new_tbl->nest)
....@@ -235,7 +237,8 @@
235237
236238 err = -ENOENT;
237239
238
- rht_for_each(entry, old_tbl, old_hash) {
240
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
241
+ old_tbl, old_hash) {
239242 err = 0;
240243 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
241244
....@@ -250,18 +253,19 @@
250253
251254 new_hash = head_hashfn(ht, new_tbl, entry);
252255
253
- new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
256
+ rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
254257
255
- spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
256
- head = rht_dereference_bucket(new_tbl->buckets[new_hash],
257
- new_tbl, new_hash);
258
+ head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
258259
259260 RCU_INIT_POINTER(entry->next, head);
260261
261
- rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
262
- spin_unlock(new_bucket_lock);
262
+ rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
263263
264
- rcu_assign_pointer(*pprev, next);
264
+ if (pprev)
265
+ rcu_assign_pointer(*pprev, next);
266
+ else
267
+ /* Need to preserved the bit lock. */
268
+ rht_assign_locked(bkt, next);
265269
266270 out:
267271 return err;
....@@ -271,20 +275,19 @@
271275 unsigned int old_hash)
272276 {
273277 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
274
- spinlock_t *old_bucket_lock;
278
+ struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
275279 int err;
276280
277
- old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
281
+ if (!bkt)
282
+ return 0;
283
+ rht_lock(old_tbl, bkt);
278284
279
- spin_lock_bh(old_bucket_lock);
280
- while (!(err = rhashtable_rehash_one(ht, old_hash)))
285
+ while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
281286 ;
282287
283
- if (err == -ENOENT) {
284
- old_tbl->rehash++;
288
+ if (err == -ENOENT)
285289 err = 0;
286
- }
287
- spin_unlock_bh(old_bucket_lock);
290
+ rht_unlock(old_tbl, bkt);
288291
289292 return err;
290293 }
....@@ -299,7 +302,8 @@
299302 * rcu_assign_pointer().
300303 */
301304
302
- if (cmpxchg(&old_tbl->future_tbl, NULL, new_tbl) != NULL)
305
+ if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
306
+ new_tbl) != NULL)
303307 return -EEXIST;
304308
305309 return 0;
....@@ -330,13 +334,16 @@
330334 spin_lock(&ht->lock);
331335 list_for_each_entry(walker, &old_tbl->walkers, list)
332336 walker->tbl = NULL;
333
- spin_unlock(&ht->lock);
334337
335338 /* Wait for readers. All new readers will see the new
336339 * table, and thus no references to the old table will
337340 * remain.
341
+ * We do this inside the locked region so that
342
+ * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
343
+ * to check if it should not re-link the table.
338344 */
339345 call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
346
+ spin_unlock(&ht->lock);
340347
341348 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
342349 }
....@@ -478,6 +485,7 @@
478485 }
479486
480487 static void *rhashtable_lookup_one(struct rhashtable *ht,
488
+ struct rhash_lock_head __rcu **bkt,
481489 struct bucket_table *tbl, unsigned int hash,
482490 const void *key, struct rhash_head *obj)
483491 {
....@@ -485,13 +493,12 @@
485493 .ht = ht,
486494 .key = key,
487495 };
488
- struct rhash_head __rcu **pprev;
496
+ struct rhash_head __rcu **pprev = NULL;
489497 struct rhash_head *head;
490498 int elasticity;
491499
492500 elasticity = RHT_ELASTICITY;
493
- pprev = rht_bucket_var(tbl, hash);
494
- rht_for_each_continue(head, *pprev, tbl, hash) {
501
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
495502 struct rhlist_head *list;
496503 struct rhlist_head *plist;
497504
....@@ -513,7 +520,11 @@
513520 RCU_INIT_POINTER(list->next, plist);
514521 head = rht_dereference_bucket(head->next, tbl, hash);
515522 RCU_INIT_POINTER(list->rhead.next, head);
516
- rcu_assign_pointer(*pprev, obj);
523
+ if (pprev)
524
+ rcu_assign_pointer(*pprev, obj);
525
+ else
526
+ /* Need to preserve the bit lock */
527
+ rht_assign_locked(bkt, obj);
517528
518529 return NULL;
519530 }
....@@ -524,13 +535,11 @@
524535 return ERR_PTR(-ENOENT);
525536 }
526537
527
-static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
528
- struct bucket_table *tbl,
529
- unsigned int hash,
530
- struct rhash_head *obj,
531
- void *data)
538
+static struct bucket_table *rhashtable_insert_one(
539
+ struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
540
+ struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
541
+ void *data)
532542 {
533
- struct rhash_head __rcu **pprev;
534543 struct bucket_table *new_tbl;
535544 struct rhash_head *head;
536545
....@@ -553,11 +562,7 @@
553562 if (unlikely(rht_grow_above_100(ht, tbl)))
554563 return ERR_PTR(-EAGAIN);
555564
556
- pprev = rht_bucket_insert(ht, tbl, hash);
557
- if (!pprev)
558
- return ERR_PTR(-ENOMEM);
559
-
560
- head = rht_dereference_bucket(*pprev, tbl, hash);
565
+ head = rht_ptr(bkt, tbl, hash);
561566
562567 RCU_INIT_POINTER(obj->next, head);
563568 if (ht->rhlist) {
....@@ -567,7 +572,10 @@
567572 RCU_INIT_POINTER(list->next, NULL);
568573 }
569574
570
- rcu_assign_pointer(*pprev, obj);
575
+ /* bkt is always the head of the list, so it holds
576
+ * the lock, which we need to preserve
577
+ */
578
+ rht_assign_locked(bkt, obj);
571579
572580 atomic_inc(&ht->nelems);
573581 if (rht_grow_above_75(ht, tbl))
....@@ -581,47 +589,35 @@
581589 {
582590 struct bucket_table *new_tbl;
583591 struct bucket_table *tbl;
592
+ struct rhash_lock_head __rcu **bkt;
584593 unsigned int hash;
585
- spinlock_t *lock;
586594 void *data;
587595
588
- tbl = rcu_dereference(ht->tbl);
596
+ new_tbl = rcu_dereference(ht->tbl);
589597
590
- /* All insertions must grab the oldest table containing
591
- * the hashed bucket that is yet to be rehashed.
592
- */
593
- for (;;) {
594
- hash = rht_head_hashfn(ht, tbl, obj, ht->p);
595
- lock = rht_bucket_lock(tbl, hash);
596
- spin_lock_bh(lock);
597
-
598
- if (tbl->rehash <= hash)
599
- break;
600
-
601
- spin_unlock_bh(lock);
602
- tbl = rht_dereference_rcu(tbl->future_tbl, ht);
603
- }
604
-
605
- data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
606
- new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
607
- if (PTR_ERR(new_tbl) != -EEXIST)
608
- data = ERR_CAST(new_tbl);
609
-
610
- while (!IS_ERR_OR_NULL(new_tbl)) {
598
+ do {
611599 tbl = new_tbl;
612600 hash = rht_head_hashfn(ht, tbl, obj, ht->p);
613
- spin_lock_nested(rht_bucket_lock(tbl, hash),
614
- SINGLE_DEPTH_NESTING);
601
+ if (rcu_access_pointer(tbl->future_tbl))
602
+ /* Failure is OK */
603
+ bkt = rht_bucket_var(tbl, hash);
604
+ else
605
+ bkt = rht_bucket_insert(ht, tbl, hash);
606
+ if (bkt == NULL) {
607
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
608
+ data = ERR_PTR(-EAGAIN);
609
+ } else {
610
+ rht_lock(tbl, bkt);
611
+ data = rhashtable_lookup_one(ht, bkt, tbl,
612
+ hash, key, obj);
613
+ new_tbl = rhashtable_insert_one(ht, bkt, tbl,
614
+ hash, obj, data);
615
+ if (PTR_ERR(new_tbl) != -EEXIST)
616
+ data = ERR_CAST(new_tbl);
615617
616
- data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
617
- new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
618
- if (PTR_ERR(new_tbl) != -EEXIST)
619
- data = ERR_CAST(new_tbl);
620
-
621
- spin_unlock(rht_bucket_lock(tbl, hash));
622
- }
623
-
624
- spin_unlock_bh(lock);
618
+ rht_unlock(tbl, bkt);
619
+ }
620
+ } while (!IS_ERR_OR_NULL(new_tbl));
625621
626622 if (PTR_ERR(data) == -EAGAIN)
627623 data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
....@@ -686,7 +682,7 @@
686682 * rhashtable_walk_exit - Free an iterator
687683 * @iter: Hash table Iterator
688684 *
689
- * This function frees resources allocated by rhashtable_walk_init.
685
+ * This function frees resources allocated by rhashtable_walk_enter.
690686 */
691687 void rhashtable_walk_exit(struct rhashtable_iter *iter)
692688 {
....@@ -943,10 +939,11 @@
943939 ht = iter->ht;
944940
945941 spin_lock(&ht->lock);
946
- if (tbl->rehash < tbl->size)
947
- list_add(&iter->walker.list, &tbl->walkers);
948
- else
942
+ if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
943
+ /* This bucket table is being freed, don't re-link it. */
949944 iter->walker.tbl = NULL;
945
+ else
946
+ list_add(&iter->walker.list, &tbl->walkers);
950947 spin_unlock(&ht->lock);
951948
952949 out:
....@@ -1045,11 +1042,6 @@
10451042 ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
10461043
10471044 size = rounded_hashtable_size(&ht->p);
1048
-
1049
- if (params->locks_mul)
1050
- ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
1051
- else
1052
- ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
10531045
10541046 ht->key_len = ht->p.key_len;
10551047 if (!params->hashfn) {
....@@ -1152,7 +1144,7 @@
11521144 struct rhash_head *pos, *next;
11531145
11541146 cond_resched();
1155
- for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
1147
+ for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
11561148 next = !rht_is_a_nulls(pos) ?
11571149 rht_dereference(pos->next, ht) : NULL;
11581150 !rht_is_a_nulls(pos);
....@@ -1179,18 +1171,16 @@
11791171 }
11801172 EXPORT_SYMBOL_GPL(rhashtable_destroy);
11811173
1182
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
1183
- unsigned int hash)
1174
+struct rhash_lock_head __rcu **__rht_bucket_nested(
1175
+ const struct bucket_table *tbl, unsigned int hash)
11841176 {
11851177 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1186
- static struct rhash_head __rcu *rhnull =
1187
- (struct rhash_head __rcu *)NULLS_MARKER(0);
11881178 unsigned int index = hash & ((1 << tbl->nest) - 1);
11891179 unsigned int size = tbl->size >> tbl->nest;
11901180 unsigned int subhash = hash;
11911181 union nested_table *ntbl;
11921182
1193
- ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1183
+ ntbl = nested_table_top(tbl);
11941184 ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
11951185 subhash >>= tbl->nest;
11961186
....@@ -1203,23 +1193,33 @@
12031193 }
12041194
12051195 if (!ntbl)
1206
- return &rhnull;
1196
+ return NULL;
12071197
12081198 return &ntbl[subhash].bucket;
12091199
12101200 }
1201
+EXPORT_SYMBOL_GPL(__rht_bucket_nested);
1202
+
1203
+struct rhash_lock_head __rcu **rht_bucket_nested(
1204
+ const struct bucket_table *tbl, unsigned int hash)
1205
+{
1206
+ static struct rhash_lock_head __rcu *rhnull;
1207
+
1208
+ if (!rhnull)
1209
+ INIT_RHT_NULLS_HEAD(rhnull);
1210
+ return __rht_bucket_nested(tbl, hash) ?: &rhnull;
1211
+}
12111212 EXPORT_SYMBOL_GPL(rht_bucket_nested);
12121213
1213
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
1214
- struct bucket_table *tbl,
1215
- unsigned int hash)
1214
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
1215
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
12161216 {
12171217 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
12181218 unsigned int index = hash & ((1 << tbl->nest) - 1);
12191219 unsigned int size = tbl->size >> tbl->nest;
12201220 union nested_table *ntbl;
12211221
1222
- ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1222
+ ntbl = nested_table_top(tbl);
12231223 hash >>= tbl->nest;
12241224 ntbl = nested_table_alloc(ht, &ntbl[index].table,
12251225 size <= (1 << shift));