hc
2024-05-10 10ebd8556b7990499c896a550e3d416b444211e6
kernel/kernel/bpf/lpm_trie.c
....@@ -1,12 +1,9 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Longest prefix match list implementation
34 *
45 * Copyright (c) 2016,2017 Daniel Mack
56 * Copyright (c) 2016 David Herrmann
6
- *
7
- * This file is subject to the terms and conditions of version 2 of the GNU
8
- * General Public License. See the file COPYING in the main directory of the
9
- * Linux distribution for more details.
107 */
118
129 #include <linux/bpf.h>
....@@ -28,7 +25,7 @@
2825 struct lpm_trie_node __rcu *child[2];
2926 u32 prefixlen;
3027 u32 flags;
31
- u8 data[0];
28
+ u8 data[];
3229 };
3330
3431 struct lpm_trie {
....@@ -37,7 +34,7 @@
3734 size_t n_entries;
3835 size_t max_prefixlen;
3936 size_t data_size;
40
- raw_spinlock_t lock;
37
+ spinlock_t lock;
4138 };
4239
4340 /* This trie implements a longest prefix match algorithm that can be used to
....@@ -168,20 +165,59 @@
168165 const struct lpm_trie_node *node,
169166 const struct bpf_lpm_trie_key *key)
170167 {
171
- size_t prefixlen = 0;
172
- size_t i;
168
+ u32 limit = min(node->prefixlen, key->prefixlen);
169
+ u32 prefixlen = 0, i = 0;
173170
174
- for (i = 0; i < trie->data_size; i++) {
175
- size_t b;
171
+ BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
172
+ BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
176173
177
- b = 8 - fls(node->data[i] ^ key->data[i]);
178
- prefixlen += b;
174
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
179175
180
- if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen)
181
- return min(node->prefixlen, key->prefixlen);
176
+ /* data_size >= 16 has very small probability.
177
+ * We do not use a loop for optimal code generation.
178
+ */
179
+ if (trie->data_size >= 8) {
180
+ u64 diff = be64_to_cpu(*(__be64 *)node->data ^
181
+ *(__be64 *)key->data);
182182
183
- if (b < 8)
184
- break;
183
+ prefixlen = 64 - fls64(diff);
184
+ if (prefixlen >= limit)
185
+ return limit;
186
+ if (diff)
187
+ return prefixlen;
188
+ i = 8;
189
+ }
190
+#endif
191
+
192
+ while (trie->data_size >= i + 4) {
193
+ u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^
194
+ *(__be32 *)&key->data[i]);
195
+
196
+ prefixlen += 32 - fls(diff);
197
+ if (prefixlen >= limit)
198
+ return limit;
199
+ if (diff)
200
+ return prefixlen;
201
+ i += 4;
202
+ }
203
+
204
+ if (trie->data_size >= i + 2) {
205
+ u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^
206
+ *(__be16 *)&key->data[i]);
207
+
208
+ prefixlen += 16 - fls(diff);
209
+ if (prefixlen >= limit)
210
+ return limit;
211
+ if (diff)
212
+ return prefixlen;
213
+ i += 2;
214
+ }
215
+
216
+ if (trie->data_size >= i + 1) {
217
+ prefixlen += 8 - fls(node->data[i] ^ key->data[i]);
218
+
219
+ if (prefixlen >= limit)
220
+ return limit;
185221 }
186222
187223 return prefixlen;
....@@ -279,7 +315,7 @@
279315 if (key->prefixlen > trie->max_prefixlen)
280316 return -EINVAL;
281317
282
- raw_spin_lock_irqsave(&trie->lock, irq_flags);
318
+ spin_lock_irqsave(&trie->lock, irq_flags);
283319
284320 /* Allocate and fill a new node */
285321
....@@ -386,7 +422,7 @@
386422 kfree(im_node);
387423 }
388424
389
- raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
425
+ spin_unlock_irqrestore(&trie->lock, irq_flags);
390426
391427 return ret;
392428 }
....@@ -406,7 +442,7 @@
406442 if (key->prefixlen > trie->max_prefixlen)
407443 return -EINVAL;
408444
409
- raw_spin_lock_irqsave(&trie->lock, irq_flags);
445
+ spin_lock_irqsave(&trie->lock, irq_flags);
410446
411447 /* Walk the tree looking for an exact key/length match and keeping
412448 * track of the path we traverse. We will need to know the node
....@@ -482,7 +518,7 @@
482518 kfree_rcu(node, rcu);
483519
484520 out:
485
- raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
521
+ spin_unlock_irqrestore(&trie->lock, irq_flags);
486522
487523 return ret;
488524 }
....@@ -499,7 +535,7 @@
499535 #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
500536
501537 #define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \
502
- BPF_F_RDONLY | BPF_F_WRONLY)
538
+ BPF_F_ACCESS_MASK)
503539
504540 static struct bpf_map *trie_alloc(union bpf_attr *attr)
505541 {
....@@ -507,13 +543,14 @@
507543 u64 cost = sizeof(*trie), cost_per_node;
508544 int ret;
509545
510
- if (!capable(CAP_SYS_ADMIN))
546
+ if (!bpf_capable())
511547 return ERR_PTR(-EPERM);
512548
513549 /* check sanity of attributes */
514550 if (attr->max_entries == 0 ||
515551 !(attr->map_flags & BPF_F_NO_PREALLOC) ||
516552 attr->map_flags & ~LPM_CREATE_FLAG_MASK ||
553
+ !bpf_map_flags_access_ok(attr->map_flags) ||
517554 attr->key_size < LPM_KEY_SIZE_MIN ||
518555 attr->key_size > LPM_KEY_SIZE_MAX ||
519556 attr->value_size < LPM_VAL_SIZE_MIN ||
....@@ -533,18 +570,12 @@
533570 cost_per_node = sizeof(struct lpm_trie_node) +
534571 attr->value_size + trie->data_size;
535572 cost += (u64) attr->max_entries * cost_per_node;
536
- if (cost >= U32_MAX - PAGE_SIZE) {
537
- ret = -E2BIG;
538
- goto out_err;
539
- }
540573
541
- trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
542
-
543
- ret = bpf_map_precharge_memlock(trie->map.pages);
574
+ ret = bpf_map_charge_init(&trie->map.memory, cost);
544575 if (ret)
545576 goto out_err;
546577
547
- raw_spin_lock_init(&trie->lock);
578
+ spin_lock_init(&trie->lock);
548579
549580 return &trie->map;
550581 out_err:
....@@ -557,11 +588,6 @@
557588 struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
558589 struct lpm_trie_node __rcu **slot;
559590 struct lpm_trie_node *node;
560
-
561
- /* Wait for outstanding programs to complete
562
- * update/lookup/delete/get_next_key and free the trie.
563
- */
564
- synchronize_rcu();
565591
566592 /* Always start at the root and walk down to a node that has no
567593 * children. Then free that node, nullify its reference in the parent
....@@ -695,6 +721,7 @@
695721 }
696722
697723 static int trie_check_btf(const struct bpf_map *map,
724
+ const struct btf *btf,
698725 const struct btf_type *key_type,
699726 const struct btf_type *value_type)
700727 {
....@@ -703,7 +730,9 @@
703730 -EINVAL : 0;
704731 }
705732
733
+static int trie_map_btf_id;
706734 const struct bpf_map_ops trie_map_ops = {
735
+ .map_meta_equal = bpf_map_meta_equal,
707736 .map_alloc = trie_alloc,
708737 .map_free = trie_free,
709738 .map_get_next_key = trie_get_next_key,
....@@ -711,4 +740,6 @@
711740 .map_update_elem = trie_update_elem,
712741 .map_delete_elem = trie_delete_elem,
713742 .map_check_btf = trie_check_btf,
743
+ .map_btf_name = "lpm_trie",
744
+ .map_btf_id = &trie_map_btf_id,
714745 };