hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/lib/idr.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 #include <linux/bitmap.h>
23 #include <linux/bug.h>
34 #include <linux/export.h>
....@@ -5,8 +6,6 @@
56 #include <linux/slab.h>
67 #include <linux/spinlock.h>
78 #include <linux/xarray.h>
8
-
9
-DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
109
1110 /**
1211 * idr_alloc_u32() - Allocate an ID.
....@@ -39,10 +38,8 @@
3938 unsigned int base = idr->idr_base;
4039 unsigned int id = *nextid;
4140
42
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
43
- return -EINVAL;
44
- if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
45
- idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
41
+ if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
42
+ idr->idr_rt.xa_flags |= IDR_RT_MARKER;
4643
4744 id = (id < base) ? 0 : id - base;
4845 radix_tree_iter_init(&iter, id);
....@@ -103,7 +100,7 @@
103100 * @end: The maximum ID (exclusive).
104101 * @gfp: Memory allocation flags.
105102 *
106
- * Allocates an unused ID in the range specified by @nextid and @end. If
103
+ * Allocates an unused ID in the range specified by @start and @end. If
107104 * @end is <= 0, it is treated as one larger than %INT_MAX. This allows
108105 * callers to use @start + N as @end as long as N is within integer range.
109106 * The search for an unused ID will start at the last ID allocated and will
....@@ -240,10 +237,9 @@
240237 entry = rcu_dereference_raw(*slot);
241238 if (!entry)
242239 continue;
243
- if (!radix_tree_deref_retry(entry))
240
+ if (!xa_is_internal(entry))
244241 break;
245
- if (slot != (void *)&idr->idr_rt.rnode &&
246
- entry != (void *)RADIX_TREE_INTERNAL_NODE)
242
+ if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry))
247243 break;
248244 slot = radix_tree_iter_retry(&iter);
249245 }
....@@ -297,15 +293,13 @@
297293 void __rcu **slot = NULL;
298294 void *entry;
299295
300
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
301
- return ERR_PTR(-EINVAL);
302296 id -= idr->idr_base;
303297
304298 entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
305299 if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
306300 return ERR_PTR(-ENOENT);
307301
308
- __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
302
+ __radix_tree_replace(&idr->idr_rt, node, slot, ptr);
309303
310304 return entry;
311305 }
....@@ -326,6 +320,9 @@
326320 * free the individual IDs in it. You can use ida_is_empty() to find
327321 * out whether the IDA has any IDs currently allocated.
328322 *
323
+ * The IDA handles its own locking. It is safe to call any of the IDA
324
+ * functions without synchronisation in your code.
325
+ *
329326 * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward
330327 * limitation, it should be quite straightforward to raise the maximum.
331328 */
....@@ -333,189 +330,37 @@
333330 /*
334331 * Developer's notes:
335332 *
336
- * The IDA uses the functionality provided by the IDR & radix tree to store
337
- * bitmaps in each entry. The IDR_FREE tag means there is at least one bit
338
- * free, unlike the IDR where it means at least one entry is free.
333
+ * The IDA uses the functionality provided by the XArray to store bitmaps in
334
+ * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap
335
+ * have been set.
339336 *
340
- * I considered telling the radix tree that each slot is an order-10 node
341
- * and storing the bit numbers in the radix tree, but the radix tree can't
342
- * allow a single multiorder entry at index 0, which would significantly
343
- * increase memory consumption for the IDA. So instead we divide the index
344
- * by the number of bits in the leaf bitmap before doing a radix tree lookup.
337
+ * I considered telling the XArray that each slot is an order-10 node
338
+ * and indexing by bit number, but the XArray can't allow a single multi-index
339
+ * entry in the head, which would significantly increase memory consumption
340
+ * for the IDA. So instead we divide the index by the number of bits in the
341
+ * leaf bitmap before doing a radix tree lookup.
345342 *
346343 * As an optimisation, if there are only a few low bits set in any given
347
- * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
348
- * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
349
- * directly in the entry. By being really tricksy, we could store
350
- * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
351
- * for 0-3 allocated IDs.
344
+ * leaf, instead of allocating a 128-byte bitmap, we store the bits
345
+ * as a value entry. Value entries never have the XA_FREE_MARK cleared
346
+ * because we can always convert them into a bitmap entry.
352347 *
353
- * We allow the radix tree 'exceptional' count to get out of date. Nothing
354
- * in the IDA nor the radix tree code checks it. If it becomes important
355
- * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
356
- * calls to radix_tree_iter_replace() which will correct the exceptional
357
- * count.
348
+ * It would be possible to optimise further; once we've run out of a
349
+ * single 128-byte bitmap, we currently switch to a 576-byte node, put
350
+ * the 128-byte bitmap in the first entry and then start allocating extra
351
+ * 128-byte entries. We could instead use the 512 bytes of the node's
352
+ * data as a bitmap before moving to that scheme. I do not believe this
353
+ * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
354
+ * users of the IDA and almost none of them use more than 1024 entries.
355
+ * Those that do use more than the 8192 IDs that the 512 bytes would
356
+ * provide.
358357 *
359
- * The IDA always requires a lock to alloc/free. If we add a 'test_bit'
358
+ * The IDA always uses a lock to alloc/free. If we add a 'test_bit'
360359 * equivalent, it will still need locking. Going to RCU lookup would require
361360 * using RCU to free bitmaps, and that's not trivial without embedding an
362361 * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
363362 * bitmap, which is excessive.
364363 */
365
-
366
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
367
-
368
-static int ida_get_new_above(struct ida *ida, int start)
369
-{
370
- struct radix_tree_root *root = &ida->ida_rt;
371
- void __rcu **slot;
372
- struct radix_tree_iter iter;
373
- struct ida_bitmap *bitmap;
374
- unsigned long index;
375
- unsigned bit, ebit;
376
- int new;
377
-
378
- index = start / IDA_BITMAP_BITS;
379
- bit = start % IDA_BITMAP_BITS;
380
- ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
381
-
382
- slot = radix_tree_iter_init(&iter, index);
383
- for (;;) {
384
- if (slot)
385
- slot = radix_tree_next_slot(slot, &iter,
386
- RADIX_TREE_ITER_TAGGED);
387
- if (!slot) {
388
- slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
389
- if (IS_ERR(slot)) {
390
- if (slot == ERR_PTR(-ENOMEM))
391
- return -EAGAIN;
392
- return PTR_ERR(slot);
393
- }
394
- }
395
- if (iter.index > index) {
396
- bit = 0;
397
- ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
398
- }
399
- new = iter.index * IDA_BITMAP_BITS;
400
- bitmap = rcu_dereference_raw(*slot);
401
- if (radix_tree_exception(bitmap)) {
402
- unsigned long tmp = (unsigned long)bitmap;
403
- ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
404
- if (ebit < BITS_PER_LONG) {
405
- tmp |= 1UL << ebit;
406
- rcu_assign_pointer(*slot, (void *)tmp);
407
- return new + ebit -
408
- RADIX_TREE_EXCEPTIONAL_SHIFT;
409
- }
410
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
411
- if (!bitmap)
412
- return -EAGAIN;
413
- bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
414
- rcu_assign_pointer(*slot, bitmap);
415
- }
416
-
417
- if (bitmap) {
418
- bit = find_next_zero_bit(bitmap->bitmap,
419
- IDA_BITMAP_BITS, bit);
420
- new += bit;
421
- if (new < 0)
422
- return -ENOSPC;
423
- if (bit == IDA_BITMAP_BITS)
424
- continue;
425
-
426
- __set_bit(bit, bitmap->bitmap);
427
- if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
428
- radix_tree_iter_tag_clear(root, &iter,
429
- IDR_FREE);
430
- } else {
431
- new += bit;
432
- if (new < 0)
433
- return -ENOSPC;
434
- if (ebit < BITS_PER_LONG) {
435
- bitmap = (void *)((1UL << ebit) |
436
- RADIX_TREE_EXCEPTIONAL_ENTRY);
437
- radix_tree_iter_replace(root, &iter, slot,
438
- bitmap);
439
- return new;
440
- }
441
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
442
- if (!bitmap)
443
- return -EAGAIN;
444
- __set_bit(bit, bitmap->bitmap);
445
- radix_tree_iter_replace(root, &iter, slot, bitmap);
446
- }
447
-
448
- return new;
449
- }
450
-}
451
-
452
-static void ida_remove(struct ida *ida, int id)
453
-{
454
- unsigned long index = id / IDA_BITMAP_BITS;
455
- unsigned offset = id % IDA_BITMAP_BITS;
456
- struct ida_bitmap *bitmap;
457
- unsigned long *btmp;
458
- struct radix_tree_iter iter;
459
- void __rcu **slot;
460
-
461
- slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
462
- if (!slot)
463
- goto err;
464
-
465
- bitmap = rcu_dereference_raw(*slot);
466
- if (radix_tree_exception(bitmap)) {
467
- btmp = (unsigned long *)slot;
468
- offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
469
- if (offset >= BITS_PER_LONG)
470
- goto err;
471
- } else {
472
- btmp = bitmap->bitmap;
473
- }
474
- if (!test_bit(offset, btmp))
475
- goto err;
476
-
477
- __clear_bit(offset, btmp);
478
- radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
479
- if (radix_tree_exception(bitmap)) {
480
- if (rcu_dereference_raw(*slot) ==
481
- (void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
482
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
483
- } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
484
- kfree(bitmap);
485
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
486
- }
487
- return;
488
- err:
489
- WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
490
-}
491
-
492
-/**
493
- * ida_destroy() - Free all IDs.
494
- * @ida: IDA handle.
495
- *
496
- * Calling this function frees all IDs and releases all resources used
497
- * by an IDA. When this call returns, the IDA is empty and can be reused
498
- * or freed. If the IDA is already empty, there is no need to call this
499
- * function.
500
- *
501
- * Context: Any context.
502
- */
503
-void ida_destroy(struct ida *ida)
504
-{
505
- unsigned long flags;
506
- struct radix_tree_iter iter;
507
- void __rcu **slot;
508
-
509
- xa_lock_irqsave(&ida->ida_rt, flags);
510
- radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
511
- struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
512
- if (!radix_tree_exception(bitmap))
513
- kfree(bitmap);
514
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
515
- }
516
- xa_unlock_irqrestore(&ida->ida_rt, flags);
517
-}
518
-EXPORT_SYMBOL(ida_destroy);
519364
520365 /**
521366 * ida_alloc_range() - Allocate an unused ID.
....@@ -527,15 +372,18 @@
527372 * Allocate an ID between @min and @max, inclusive. The allocated ID will
528373 * not exceed %INT_MAX, even if @max is larger.
529374 *
530
- * Context: Any context.
375
+ * Context: Any context. It is safe to call this function without
376
+ * locking in your code.
531377 * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
532378 * or %-ENOSPC if there are no free IDs.
533379 */
534380 int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
535381 gfp_t gfp)
536382 {
537
- int id = 0;
383
+ XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
384
+ unsigned bit = min % IDA_BITMAP_BITS;
538385 unsigned long flags;
386
+ struct ida_bitmap *bitmap, *alloc = NULL;
539387
540388 if ((int)min < 0)
541389 return -ENOSPC;
....@@ -543,22 +391,88 @@
543391 if ((int)max < 0)
544392 max = INT_MAX;
545393
546
-again:
547
- xa_lock_irqsave(&ida->ida_rt, flags);
548
- id = ida_get_new_above(ida, min);
549
- if (id > (int)max) {
550
- ida_remove(ida, id);
551
- id = -ENOSPC;
552
- }
553
- xa_unlock_irqrestore(&ida->ida_rt, flags);
394
+retry:
395
+ xas_lock_irqsave(&xas, flags);
396
+next:
397
+ bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
398
+ if (xas.xa_index > min / IDA_BITMAP_BITS)
399
+ bit = 0;
400
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
401
+ goto nospc;
554402
555
- if (unlikely(id == -EAGAIN)) {
556
- if (!ida_pre_get(ida, gfp))
557
- return -ENOMEM;
558
- goto again;
403
+ if (xa_is_value(bitmap)) {
404
+ unsigned long tmp = xa_to_value(bitmap);
405
+
406
+ if (bit < BITS_PER_XA_VALUE) {
407
+ bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
408
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
409
+ goto nospc;
410
+ if (bit < BITS_PER_XA_VALUE) {
411
+ tmp |= 1UL << bit;
412
+ xas_store(&xas, xa_mk_value(tmp));
413
+ goto out;
414
+ }
415
+ }
416
+ bitmap = alloc;
417
+ if (!bitmap)
418
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
419
+ if (!bitmap)
420
+ goto alloc;
421
+ bitmap->bitmap[0] = tmp;
422
+ xas_store(&xas, bitmap);
423
+ if (xas_error(&xas)) {
424
+ bitmap->bitmap[0] = 0;
425
+ goto out;
426
+ }
559427 }
560428
561
- return id;
429
+ if (bitmap) {
430
+ bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
431
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
432
+ goto nospc;
433
+ if (bit == IDA_BITMAP_BITS)
434
+ goto next;
435
+
436
+ __set_bit(bit, bitmap->bitmap);
437
+ if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
438
+ xas_clear_mark(&xas, XA_FREE_MARK);
439
+ } else {
440
+ if (bit < BITS_PER_XA_VALUE) {
441
+ bitmap = xa_mk_value(1UL << bit);
442
+ } else {
443
+ bitmap = alloc;
444
+ if (!bitmap)
445
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
446
+ if (!bitmap)
447
+ goto alloc;
448
+ __set_bit(bit, bitmap->bitmap);
449
+ }
450
+ xas_store(&xas, bitmap);
451
+ }
452
+out:
453
+ xas_unlock_irqrestore(&xas, flags);
454
+ if (xas_nomem(&xas, gfp)) {
455
+ xas.xa_index = min / IDA_BITMAP_BITS;
456
+ bit = min % IDA_BITMAP_BITS;
457
+ goto retry;
458
+ }
459
+ if (bitmap != alloc)
460
+ kfree(alloc);
461
+ if (xas_error(&xas))
462
+ return xas_error(&xas);
463
+ return xas.xa_index * IDA_BITMAP_BITS + bit;
464
+alloc:
465
+ xas_unlock_irqrestore(&xas, flags);
466
+ alloc = kzalloc(sizeof(*bitmap), gfp);
467
+ if (!alloc)
468
+ return -ENOMEM;
469
+ xas_set(&xas, min / IDA_BITMAP_BITS);
470
+ bit = min % IDA_BITMAP_BITS;
471
+ goto retry;
472
+nospc:
473
+ xas_unlock_irqrestore(&xas, flags);
474
+ kfree(alloc);
475
+ return -ENOSPC;
562476 }
563477 EXPORT_SYMBOL(ida_alloc_range);
564478
....@@ -567,15 +481,119 @@
567481 * @ida: IDA handle.
568482 * @id: Previously allocated ID.
569483 *
570
- * Context: Any context.
484
+ * Context: Any context. It is safe to call this function without
485
+ * locking in your code.
571486 */
572487 void ida_free(struct ida *ida, unsigned int id)
573488 {
489
+ XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
490
+ unsigned bit = id % IDA_BITMAP_BITS;
491
+ struct ida_bitmap *bitmap;
574492 unsigned long flags;
575493
576
- BUG_ON((int)id < 0);
577
- xa_lock_irqsave(&ida->ida_rt, flags);
578
- ida_remove(ida, id);
579
- xa_unlock_irqrestore(&ida->ida_rt, flags);
494
+ if ((int)id < 0)
495
+ return;
496
+
497
+ xas_lock_irqsave(&xas, flags);
498
+ bitmap = xas_load(&xas);
499
+
500
+ if (xa_is_value(bitmap)) {
501
+ unsigned long v = xa_to_value(bitmap);
502
+ if (bit >= BITS_PER_XA_VALUE)
503
+ goto err;
504
+ if (!(v & (1UL << bit)))
505
+ goto err;
506
+ v &= ~(1UL << bit);
507
+ if (!v)
508
+ goto delete;
509
+ xas_store(&xas, xa_mk_value(v));
510
+ } else {
511
+ if (!test_bit(bit, bitmap->bitmap))
512
+ goto err;
513
+ __clear_bit(bit, bitmap->bitmap);
514
+ xas_set_mark(&xas, XA_FREE_MARK);
515
+ if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
516
+ kfree(bitmap);
517
+delete:
518
+ xas_store(&xas, NULL);
519
+ }
520
+ }
521
+ xas_unlock_irqrestore(&xas, flags);
522
+ return;
523
+ err:
524
+ xas_unlock_irqrestore(&xas, flags);
525
+ WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
580526 }
581527 EXPORT_SYMBOL(ida_free);
528
+
529
+/**
530
+ * ida_destroy() - Free all IDs.
531
+ * @ida: IDA handle.
532
+ *
533
+ * Calling this function frees all IDs and releases all resources used
534
+ * by an IDA. When this call returns, the IDA is empty and can be reused
535
+ * or freed. If the IDA is already empty, there is no need to call this
536
+ * function.
537
+ *
538
+ * Context: Any context. It is safe to call this function without
539
+ * locking in your code.
540
+ */
541
+void ida_destroy(struct ida *ida)
542
+{
543
+ XA_STATE(xas, &ida->xa, 0);
544
+ struct ida_bitmap *bitmap;
545
+ unsigned long flags;
546
+
547
+ xas_lock_irqsave(&xas, flags);
548
+ xas_for_each(&xas, bitmap, ULONG_MAX) {
549
+ if (!xa_is_value(bitmap))
550
+ kfree(bitmap);
551
+ xas_store(&xas, NULL);
552
+ }
553
+ xas_unlock_irqrestore(&xas, flags);
554
+}
555
+EXPORT_SYMBOL(ida_destroy);
556
+
557
+#ifndef __KERNEL__
558
+extern void xa_dump_index(unsigned long index, unsigned int shift);
559
+#define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS)
560
+
561
+static void ida_dump_entry(void *entry, unsigned long index)
562
+{
563
+ unsigned long i;
564
+
565
+ if (!entry)
566
+ return;
567
+
568
+ if (xa_is_node(entry)) {
569
+ struct xa_node *node = xa_to_node(entry);
570
+ unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
571
+ XA_CHUNK_SHIFT;
572
+
573
+ xa_dump_index(index * IDA_BITMAP_BITS, shift);
574
+ xa_dump_node(node);
575
+ for (i = 0; i < XA_CHUNK_SIZE; i++)
576
+ ida_dump_entry(node->slots[i],
577
+ index | (i << node->shift));
578
+ } else if (xa_is_value(entry)) {
579
+ xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
580
+ pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
581
+ } else {
582
+ struct ida_bitmap *bitmap = entry;
583
+
584
+ xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
585
+ pr_cont("bitmap: %p data", bitmap);
586
+ for (i = 0; i < IDA_BITMAP_LONGS; i++)
587
+ pr_cont(" %lx", bitmap->bitmap[i]);
588
+ pr_cont("\n");
589
+ }
590
+}
591
+
592
+static void ida_dump(struct ida *ida)
593
+{
594
+ struct xarray *xa = &ida->xa;
595
+ pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
596
+ xa->xa_flags >> ROOT_TAG_SHIFT);
597
+ ida_dump_entry(xa->xa_head, 0);
598
+}
599
+#endif