hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/kernel/bpf/hashtab.c
....@@ -1,14 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
23 * Copyright (c) 2016 Facebook
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of version 2 of the GNU General Public
6
- * License as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful, but
9
- * WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
- * General Public License for more details.
124 */
135 #include <linux/bpf.h>
146 #include <linux/btf.h>
....@@ -17,17 +9,81 @@
179 #include <linux/rculist_nulls.h>
1810 #include <linux/random.h>
1911 #include <uapi/linux/btf.h>
12
+#include <linux/rcupdate_trace.h>
2013 #include "percpu_freelist.h"
2114 #include "bpf_lru_list.h"
2215 #include "map_in_map.h"
2316
2417 #define HTAB_CREATE_FLAG_MASK \
2518 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
26
- BPF_F_RDONLY | BPF_F_WRONLY)
19
+ BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
2720
21
+#define BATCH_OPS(_name) \
22
+ .map_lookup_batch = \
23
+ _name##_map_lookup_batch, \
24
+ .map_lookup_and_delete_batch = \
25
+ _name##_map_lookup_and_delete_batch, \
26
+ .map_update_batch = \
27
+ generic_map_update_batch, \
28
+ .map_delete_batch = \
29
+ generic_map_delete_batch
30
+
31
+/*
32
+ * The bucket lock has two protection scopes:
33
+ *
34
+ * 1) Serializing concurrent operations from BPF programs on differrent
35
+ * CPUs
36
+ *
37
+ * 2) Serializing concurrent operations from BPF programs and sys_bpf()
38
+ *
39
+ * BPF programs can execute in any context including perf, kprobes and
40
+ * tracing. As there are almost no limits where perf, kprobes and tracing
41
+ * can be invoked from the lock operations need to be protected against
42
+ * deadlocks. Deadlocks can be caused by recursion and by an invocation in
43
+ * the lock held section when functions which acquire this lock are invoked
44
+ * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU
45
+ * variable bpf_prog_active, which prevents BPF programs attached to perf
46
+ * events, kprobes and tracing to be invoked before the prior invocation
47
+ * from one of these contexts completed. sys_bpf() uses the same mechanism
48
+ * by pinning the task to the current CPU and incrementing the recursion
49
+ * protection accross the map operation.
50
+ *
51
+ * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
52
+ * operations like memory allocations (even with GFP_ATOMIC) from atomic
53
+ * contexts. This is required because even with GFP_ATOMIC the memory
54
+ * allocator calls into code pathes which acquire locks with long held lock
55
+ * sections. To ensure the deterministic behaviour these locks are regular
56
+ * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
57
+ * true atomic contexts on an RT kernel are the low level hardware
58
+ * handling, scheduling, low level interrupt handling, NMIs etc. None of
59
+ * these contexts should ever do memory allocations.
60
+ *
61
+ * As regular device interrupt handlers and soft interrupts are forced into
62
+ * thread context, the existing code which does
63
+ * spin_lock*(); alloc(GPF_ATOMIC); spin_unlock*();
64
+ * just works.
65
+ *
66
+ * In theory the BPF locks could be converted to regular spinlocks as well,
67
+ * but the bucket locks and percpu_freelist locks can be taken from
68
+ * arbitrary contexts (perf, kprobes, tracepoints) which are required to be
69
+ * atomic contexts even on RT. These mechanisms require preallocated maps,
70
+ * so there is no need to invoke memory allocations within the lock held
71
+ * sections.
72
+ *
73
+ * BPF maps which need dynamic allocation are only used from (forced)
74
+ * thread context on RT and can therefore use regular spinlocks which in
75
+ * turn allows to invoke memory allocations from the lock held section.
76
+ *
77
+ * On a non RT kernel this distinction is neither possible nor required.
78
+ * spinlock maps to raw_spinlock and the extra code is optimized out by the
79
+ * compiler.
80
+ */
2881 struct bucket {
2982 struct hlist_nulls_head head;
30
- raw_spinlock_t lock;
83
+ union {
84
+ raw_spinlock_t raw_lock;
85
+ spinlock_t lock;
86
+ };
3187 };
3288
3389 struct bpf_htab {
....@@ -54,6 +110,7 @@
54110 union {
55111 struct bpf_htab *htab;
56112 struct pcpu_freelist_node fnode;
113
+ struct htab_elem *batch_flink;
57114 };
58115 };
59116 };
....@@ -62,8 +119,53 @@
62119 struct bpf_lru_node lru_node;
63120 };
64121 u32 hash;
65
- char key[0] __aligned(8);
122
+ char key[] __aligned(8);
66123 };
124
+
125
+static inline bool htab_is_prealloc(const struct bpf_htab *htab)
126
+{
127
+ return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
128
+}
129
+
130
+static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
131
+{
132
+ return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
133
+}
134
+
135
+static void htab_init_buckets(struct bpf_htab *htab)
136
+{
137
+ unsigned i;
138
+
139
+ for (i = 0; i < htab->n_buckets; i++) {
140
+ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
141
+ if (htab_use_raw_lock(htab))
142
+ raw_spin_lock_init(&htab->buckets[i].raw_lock);
143
+ else
144
+ spin_lock_init(&htab->buckets[i].lock);
145
+ }
146
+}
147
+
148
+static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
149
+ struct bucket *b)
150
+{
151
+ unsigned long flags;
152
+
153
+ if (htab_use_raw_lock(htab))
154
+ raw_spin_lock_irqsave(&b->raw_lock, flags);
155
+ else
156
+ spin_lock_irqsave(&b->lock, flags);
157
+ return flags;
158
+}
159
+
160
+static inline void htab_unlock_bucket(const struct bpf_htab *htab,
161
+ struct bucket *b,
162
+ unsigned long flags)
163
+{
164
+ if (htab_use_raw_lock(htab))
165
+ raw_spin_unlock_irqrestore(&b->raw_lock, flags);
166
+ else
167
+ spin_unlock_irqrestore(&b->lock, flags);
168
+}
67169
68170 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
69171
....@@ -77,11 +179,6 @@
77179 {
78180 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
79181 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
80
-}
81
-
82
-static bool htab_is_prealloc(const struct bpf_htab *htab)
83
-{
84
- return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
85182 }
86183
87184 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
....@@ -124,6 +221,17 @@
124221 bpf_map_area_free(htab->elems);
125222 }
126223
224
+/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
225
+ * (bucket_lock). If both locks need to be acquired together, the lock
226
+ * order is always lru_lock -> bucket_lock and this only happens in
227
+ * bpf_lru_list.c logic. For example, certain code path of
228
+ * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
229
+ * will acquire lru_lock first followed by acquiring bucket_lock.
230
+ *
231
+ * In hashtab.c, to avoid deadlock, lock acquisition of
232
+ * bucket_lock followed by lru_lock is not allowed. In such cases,
233
+ * bucket_lock needs to be released first before acquiring lru_lock.
234
+ */
127235 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
128236 u32 hash)
129237 {
....@@ -244,6 +352,7 @@
244352 */
245353 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
246354 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
355
+ bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
247356 int numa_node = bpf_map_attr_numa_node(attr);
248357
249358 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
....@@ -251,14 +360,18 @@
251360 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
252361 offsetof(struct htab_elem, hash_node.pprev));
253362
254
- if (lru && !capable(CAP_SYS_ADMIN))
363
+ if (lru && !bpf_capable())
255364 /* LRU implementation is much complicated than other
256
- * maps. Hence, limit to CAP_SYS_ADMIN for now.
365
+ * maps. Hence, limit to CAP_BPF.
257366 */
258367 return -EPERM;
259368
260
- if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
261
- /* reserved bits should not be used */
369
+ if (zero_seed && !capable(CAP_SYS_ADMIN))
370
+ /* Guard against local DoS, and discourage production use. */
371
+ return -EPERM;
372
+
373
+ if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
374
+ !bpf_map_flags_access_ok(attr->map_flags))
262375 return -EINVAL;
263376
264377 if (!lru && percpu_lru)
....@@ -309,8 +422,8 @@
309422 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
310423 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
311424 struct bpf_htab *htab;
312
- int err, i;
313425 u64 cost;
426
+ int err;
314427
315428 htab = kzalloc(sizeof(*htab), GFP_USER);
316429 if (!htab)
....@@ -355,14 +468,8 @@
355468 else
356469 cost += (u64) htab->elem_size * num_possible_cpus();
357470
358
- if (cost >= U32_MAX - PAGE_SIZE)
359
- /* make sure page count doesn't overflow */
360
- goto free_htab;
361
-
362
- htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
363
-
364
- /* if map size is larger than memlock limit, reject it early */
365
- err = bpf_map_precharge_memlock(htab->map.pages);
471
+ /* if map size is larger than memlock limit, reject it */
472
+ err = bpf_map_charge_init(&htab->map.memory, cost);
366473 if (err)
367474 goto free_htab;
368475
....@@ -371,13 +478,14 @@
371478 sizeof(struct bucket),
372479 htab->map.numa_node);
373480 if (!htab->buckets)
374
- goto free_htab;
481
+ goto free_charge;
375482
376
- htab->hashrnd = get_random_int();
377
- for (i = 0; i < htab->n_buckets; i++) {
378
- INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
379
- raw_spin_lock_init(&htab->buckets[i].lock);
380
- }
483
+ if (htab->map.map_flags & BPF_F_ZERO_SEED)
484
+ htab->hashrnd = 0;
485
+ else
486
+ htab->hashrnd = get_random_int();
487
+
488
+ htab_init_buckets(htab);
381489
382490 if (prealloc) {
383491 err = prealloc_init(htab);
....@@ -400,6 +508,8 @@
400508 prealloc_destroy(htab);
401509 free_buckets:
402510 bpf_map_area_free(htab->buckets);
511
+free_charge:
512
+ bpf_map_charge_finish(&htab->map.memory);
403513 free_htab:
404514 kfree(htab);
405515 return ERR_PTR(err);
....@@ -468,8 +578,7 @@
468578 struct htab_elem *l;
469579 u32 hash, key_size;
470580
471
- /* Must be called with rcu_read_lock. */
472
- WARN_ON_ONCE(!rcu_read_lock_held());
581
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
473582
474583 key_size = map->key_size;
475584
....@@ -503,7 +612,7 @@
503612 * bpf_prog
504613 * __htab_map_lookup_elem
505614 */
506
-static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
615
+static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
507616 {
508617 struct bpf_insn *insn = insn_buf;
509618 const int ret = BPF_REG_0;
....@@ -542,7 +651,7 @@
542651 return __htab_lru_map_lookup_elem(map, key, false);
543652 }
544653
545
-static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
654
+static int htab_lru_map_gen_lookup(struct bpf_map *map,
546655 struct bpf_insn *insn_buf)
547656 {
548657 struct bpf_insn *insn = insn_buf;
....@@ -583,7 +692,7 @@
583692 b = __select_bucket(htab, tgt_l->hash);
584693 head = &b->head;
585694
586
- raw_spin_lock_irqsave(&b->lock, flags);
695
+ flags = htab_lock_bucket(htab, b);
587696
588697 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
589698 if (l == tgt_l) {
....@@ -591,7 +700,7 @@
591700 break;
592701 }
593702
594
- raw_spin_unlock_irqrestore(&b->lock, flags);
703
+ htab_unlock_bucket(htab, b, flags);
595704
596705 return l == tgt_l;
597706 }
....@@ -712,19 +821,36 @@
712821 }
713822 }
714823
824
+static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
825
+ void *value, bool onallcpus)
826
+{
827
+ /* When using prealloc and not setting the initial value on all cpus,
828
+ * zero-fill element values for other cpus (just as what happens when
829
+ * not using prealloc). Otherwise, bpf program has no way to ensure
830
+ * known initial values for cpus other than current one
831
+ * (onallcpus=false always when coming from bpf prog).
832
+ */
833
+ if (htab_is_prealloc(htab) && !onallcpus) {
834
+ u32 size = round_up(htab->map.value_size, 8);
835
+ int current_cpu = raw_smp_processor_id();
836
+ int cpu;
837
+
838
+ for_each_possible_cpu(cpu) {
839
+ if (cpu == current_cpu)
840
+ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
841
+ size);
842
+ else
843
+ memset(per_cpu_ptr(pptr, cpu), 0, size);
844
+ }
845
+ } else {
846
+ pcpu_copy_value(htab, pptr, value, onallcpus);
847
+ }
848
+}
849
+
715850 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
716851 {
717852 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
718853 BITS_PER_LONG == 64;
719
-}
720
-
721
-static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
722
-{
723
- u32 size = htab->map.value_size;
724
-
725
- if (percpu || fd_htab_map_needs_adjust(htab))
726
- size = round_up(size, 8);
727
- return size;
728854 }
729855
730856 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
....@@ -732,7 +858,7 @@
732858 bool percpu, bool onallcpus,
733859 struct htab_elem *old_elem)
734860 {
735
- u32 size = htab_size_value(htab, percpu);
861
+ u32 size = htab->map.value_size;
736862 bool prealloc = htab_is_prealloc(htab);
737863 struct htab_elem *l_new, **pl_new;
738864 void __percpu *pptr;
....@@ -771,10 +897,13 @@
771897 l_new = ERR_PTR(-ENOMEM);
772898 goto dec_count;
773899 }
900
+ check_and_init_map_lock(&htab->map,
901
+ l_new->key + round_up(key_size, 8));
774902 }
775903
776904 memcpy(l_new->key, key, key_size);
777905 if (percpu) {
906
+ size = round_up(size, 8);
778907 if (prealloc) {
779908 pptr = htab_elem_get_ptr(l_new, key_size);
780909 } else {
....@@ -788,12 +917,17 @@
788917 }
789918 }
790919
791
- pcpu_copy_value(htab, pptr, value, onallcpus);
920
+ pcpu_init_value(htab, pptr, value, onallcpus);
792921
793922 if (!prealloc)
794923 htab_elem_set_ptr(l_new, key_size, pptr);
795
- } else {
924
+ } else if (fd_htab_map_needs_adjust(htab)) {
925
+ size = round_up(size, 8);
796926 memcpy(l_new->key + round_up(key_size, 8), value, size);
927
+ } else {
928
+ copy_map_value(&htab->map,
929
+ l_new->key + round_up(key_size, 8),
930
+ value);
797931 }
798932
799933 l_new->hash = hash;
....@@ -806,11 +940,11 @@
806940 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
807941 u64 map_flags)
808942 {
809
- if (l_old && map_flags == BPF_NOEXIST)
943
+ if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
810944 /* elem already exists */
811945 return -EEXIST;
812946
813
- if (!l_old && map_flags == BPF_EXIST)
947
+ if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
814948 /* elem doesn't exist, cannot update it */
815949 return -ENOENT;
816950
....@@ -829,11 +963,11 @@
829963 u32 key_size, hash;
830964 int ret;
831965
832
- if (unlikely(map_flags > BPF_EXIST))
966
+ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
833967 /* unknown flags */
834968 return -EINVAL;
835969
836
- WARN_ON_ONCE(!rcu_read_lock_held());
970
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
837971
838972 key_size = map->key_size;
839973
....@@ -842,14 +976,49 @@
842976 b = __select_bucket(htab, hash);
843977 head = &b->head;
844978
845
- /* bpf_map_update_elem() can be called in_irq() */
846
- raw_spin_lock_irqsave(&b->lock, flags);
979
+ if (unlikely(map_flags & BPF_F_LOCK)) {
980
+ if (unlikely(!map_value_has_spin_lock(map)))
981
+ return -EINVAL;
982
+ /* find an element without taking the bucket lock */
983
+ l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
984
+ htab->n_buckets);
985
+ ret = check_flags(htab, l_old, map_flags);
986
+ if (ret)
987
+ return ret;
988
+ if (l_old) {
989
+ /* grab the element lock and update value in place */
990
+ copy_map_value_locked(map,
991
+ l_old->key + round_up(key_size, 8),
992
+ value, false);
993
+ return 0;
994
+ }
995
+ /* fall through, grab the bucket lock and lookup again.
996
+ * 99.9% chance that the element won't be found,
997
+ * but second lookup under lock has to be done.
998
+ */
999
+ }
1000
+
1001
+ flags = htab_lock_bucket(htab, b);
8471002
8481003 l_old = lookup_elem_raw(head, hash, key, key_size);
8491004
8501005 ret = check_flags(htab, l_old, map_flags);
8511006 if (ret)
8521007 goto err;
1008
+
1009
+ if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
1010
+ /* first lookup without the bucket lock didn't find the element,
1011
+ * but second lookup with the bucket lock found it.
1012
+ * This case is highly unlikely, but has to be dealt with:
1013
+ * grab the element lock in addition to the bucket lock
1014
+ * and update element in place
1015
+ */
1016
+ copy_map_value_locked(map,
1017
+ l_old->key + round_up(key_size, 8),
1018
+ value, false);
1019
+ ret = 0;
1020
+ goto err;
1021
+ }
8531022
8541023 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
8551024 l_old);
....@@ -870,7 +1039,7 @@
8701039 }
8711040 ret = 0;
8721041 err:
873
- raw_spin_unlock_irqrestore(&b->lock, flags);
1042
+ htab_unlock_bucket(htab, b, flags);
8741043 return ret;
8751044 }
8761045
....@@ -889,7 +1058,7 @@
8891058 /* unknown flags */
8901059 return -EINVAL;
8911060
892
- WARN_ON_ONCE(!rcu_read_lock_held());
1061
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
8931062
8941063 key_size = map->key_size;
8951064
....@@ -908,8 +1077,7 @@
9081077 return -ENOMEM;
9091078 memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
9101079
911
- /* bpf_map_update_elem() can be called in_irq() */
912
- raw_spin_lock_irqsave(&b->lock, flags);
1080
+ flags = htab_lock_bucket(htab, b);
9131081
9141082 l_old = lookup_elem_raw(head, hash, key, key_size);
9151083
....@@ -928,7 +1096,7 @@
9281096 ret = 0;
9291097
9301098 err:
931
- raw_spin_unlock_irqrestore(&b->lock, flags);
1099
+ htab_unlock_bucket(htab, b, flags);
9321100
9331101 if (ret)
9341102 bpf_lru_push_free(&htab->lru, &l_new->lru_node);
....@@ -963,8 +1131,7 @@
9631131 b = __select_bucket(htab, hash);
9641132 head = &b->head;
9651133
966
- /* bpf_map_update_elem() can be called in_irq() */
967
- raw_spin_lock_irqsave(&b->lock, flags);
1134
+ flags = htab_lock_bucket(htab, b);
9681135
9691136 l_old = lookup_elem_raw(head, hash, key, key_size);
9701137
....@@ -987,7 +1154,7 @@
9871154 }
9881155 ret = 0;
9891156 err:
990
- raw_spin_unlock_irqrestore(&b->lock, flags);
1157
+ htab_unlock_bucket(htab, b, flags);
9911158 return ret;
9921159 }
9931160
....@@ -1027,8 +1194,7 @@
10271194 return -ENOMEM;
10281195 }
10291196
1030
- /* bpf_map_update_elem() can be called in_irq() */
1031
- raw_spin_lock_irqsave(&b->lock, flags);
1197
+ flags = htab_lock_bucket(htab, b);
10321198
10331199 l_old = lookup_elem_raw(head, hash, key, key_size);
10341200
....@@ -1043,14 +1209,14 @@
10431209 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
10441210 value, onallcpus);
10451211 } else {
1046
- pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
1212
+ pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
10471213 value, onallcpus);
10481214 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
10491215 l_new = NULL;
10501216 }
10511217 ret = 0;
10521218 err:
1053
- raw_spin_unlock_irqrestore(&b->lock, flags);
1219
+ htab_unlock_bucket(htab, b, flags);
10541220 if (l_new)
10551221 bpf_lru_push_free(&htab->lru, &l_new->lru_node);
10561222 return ret;
....@@ -1080,7 +1246,7 @@
10801246 u32 hash, key_size;
10811247 int ret = -ENOENT;
10821248
1083
- WARN_ON_ONCE(!rcu_read_lock_held());
1249
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
10841250
10851251 key_size = map->key_size;
10861252
....@@ -1088,7 +1254,7 @@
10881254 b = __select_bucket(htab, hash);
10891255 head = &b->head;
10901256
1091
- raw_spin_lock_irqsave(&b->lock, flags);
1257
+ flags = htab_lock_bucket(htab, b);
10921258
10931259 l = lookup_elem_raw(head, hash, key, key_size);
10941260
....@@ -1098,7 +1264,7 @@
10981264 ret = 0;
10991265 }
11001266
1101
- raw_spin_unlock_irqrestore(&b->lock, flags);
1267
+ htab_unlock_bucket(htab, b, flags);
11021268 return ret;
11031269 }
11041270
....@@ -1112,7 +1278,7 @@
11121278 u32 hash, key_size;
11131279 int ret = -ENOENT;
11141280
1115
- WARN_ON_ONCE(!rcu_read_lock_held());
1281
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
11161282
11171283 key_size = map->key_size;
11181284
....@@ -1120,7 +1286,7 @@
11201286 b = __select_bucket(htab, hash);
11211287 head = &b->head;
11221288
1123
- raw_spin_lock_irqsave(&b->lock, flags);
1289
+ flags = htab_lock_bucket(htab, b);
11241290
11251291 l = lookup_elem_raw(head, hash, key, key_size);
11261292
....@@ -1129,7 +1295,7 @@
11291295 ret = 0;
11301296 }
11311297
1132
- raw_spin_unlock_irqrestore(&b->lock, flags);
1298
+ htab_unlock_bucket(htab, b, flags);
11331299 if (l)
11341300 bpf_lru_push_free(&htab->lru, &l->lru_node);
11351301 return ret;
....@@ -1156,12 +1322,10 @@
11561322 {
11571323 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
11581324
1159
- /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
1160
- * so the programs (can be more than one that used this map) were
1161
- * disconnected from events. Wait for outstanding critical sections in
1162
- * these programs to complete
1325
+ /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
1326
+ * bpf_free_used_maps() is called after bpf prog is no longer executing.
1327
+ * There is no need to synchronize_rcu() here to protect map elements.
11631328 */
1164
- synchronize_rcu();
11651329
11661330 /* some of free_htab_elem() callbacks for elements of this map may
11671331 * not have executed. Wait for them.
....@@ -1198,7 +1362,476 @@
11981362 rcu_read_unlock();
11991363 }
12001364
1365
+static int
1366
+__htab_map_lookup_and_delete_batch(struct bpf_map *map,
1367
+ const union bpf_attr *attr,
1368
+ union bpf_attr __user *uattr,
1369
+ bool do_delete, bool is_lru_map,
1370
+ bool is_percpu)
1371
+{
1372
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1373
+ u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
1374
+ void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
1375
+ void __user *uvalues = u64_to_user_ptr(attr->batch.values);
1376
+ void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
1377
+ void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1378
+ u32 batch, max_count, size, bucket_size;
1379
+ struct htab_elem *node_to_free = NULL;
1380
+ u64 elem_map_flags, map_flags;
1381
+ struct hlist_nulls_head *head;
1382
+ struct hlist_nulls_node *n;
1383
+ unsigned long flags = 0;
1384
+ bool locked = false;
1385
+ struct htab_elem *l;
1386
+ struct bucket *b;
1387
+ int ret = 0;
1388
+
1389
+ elem_map_flags = attr->batch.elem_flags;
1390
+ if ((elem_map_flags & ~BPF_F_LOCK) ||
1391
+ ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
1392
+ return -EINVAL;
1393
+
1394
+ map_flags = attr->batch.flags;
1395
+ if (map_flags)
1396
+ return -EINVAL;
1397
+
1398
+ max_count = attr->batch.count;
1399
+ if (!max_count)
1400
+ return 0;
1401
+
1402
+ if (put_user(0, &uattr->batch.count))
1403
+ return -EFAULT;
1404
+
1405
+ batch = 0;
1406
+ if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
1407
+ return -EFAULT;
1408
+
1409
+ if (batch >= htab->n_buckets)
1410
+ return -ENOENT;
1411
+
1412
+ key_size = htab->map.key_size;
1413
+ roundup_key_size = round_up(htab->map.key_size, 8);
1414
+ value_size = htab->map.value_size;
1415
+ size = round_up(value_size, 8);
1416
+ if (is_percpu)
1417
+ value_size = size * num_possible_cpus();
1418
+ total = 0;
1419
+ /* while experimenting with hash tables with sizes ranging from 10 to
1420
+ * 1000, it was observed that a bucket can have upto 5 entries.
1421
+ */
1422
+ bucket_size = 5;
1423
+
1424
+alloc:
1425
+ /* We cannot do copy_from_user or copy_to_user inside
1426
+ * the rcu_read_lock. Allocate enough space here.
1427
+ */
1428
+ keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN);
1429
+ values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN);
1430
+ if (!keys || !values) {
1431
+ ret = -ENOMEM;
1432
+ goto after_loop;
1433
+ }
1434
+
1435
+again:
1436
+ bpf_disable_instrumentation();
1437
+ rcu_read_lock();
1438
+again_nocopy:
1439
+ dst_key = keys;
1440
+ dst_val = values;
1441
+ b = &htab->buckets[batch];
1442
+ head = &b->head;
1443
+ /* do not grab the lock unless need it (bucket_cnt > 0). */
1444
+ if (locked)
1445
+ flags = htab_lock_bucket(htab, b);
1446
+
1447
+ bucket_cnt = 0;
1448
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
1449
+ bucket_cnt++;
1450
+
1451
+ if (bucket_cnt && !locked) {
1452
+ locked = true;
1453
+ goto again_nocopy;
1454
+ }
1455
+
1456
+ if (bucket_cnt > (max_count - total)) {
1457
+ if (total == 0)
1458
+ ret = -ENOSPC;
1459
+ /* Note that since bucket_cnt > 0 here, it is implicit
1460
+ * that the locked was grabbed, so release it.
1461
+ */
1462
+ htab_unlock_bucket(htab, b, flags);
1463
+ rcu_read_unlock();
1464
+ bpf_enable_instrumentation();
1465
+ goto after_loop;
1466
+ }
1467
+
1468
+ if (bucket_cnt > bucket_size) {
1469
+ bucket_size = bucket_cnt;
1470
+ /* Note that since bucket_cnt > 0 here, it is implicit
1471
+ * that the locked was grabbed, so release it.
1472
+ */
1473
+ htab_unlock_bucket(htab, b, flags);
1474
+ rcu_read_unlock();
1475
+ bpf_enable_instrumentation();
1476
+ kvfree(keys);
1477
+ kvfree(values);
1478
+ goto alloc;
1479
+ }
1480
+
1481
+ /* Next block is only safe to run if you have grabbed the lock */
1482
+ if (!locked)
1483
+ goto next_batch;
1484
+
1485
+ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1486
+ memcpy(dst_key, l->key, key_size);
1487
+
1488
+ if (is_percpu) {
1489
+ int off = 0, cpu;
1490
+ void __percpu *pptr;
1491
+
1492
+ pptr = htab_elem_get_ptr(l, map->key_size);
1493
+ for_each_possible_cpu(cpu) {
1494
+ bpf_long_memcpy(dst_val + off,
1495
+ per_cpu_ptr(pptr, cpu), size);
1496
+ off += size;
1497
+ }
1498
+ } else {
1499
+ value = l->key + roundup_key_size;
1500
+ if (elem_map_flags & BPF_F_LOCK)
1501
+ copy_map_value_locked(map, dst_val, value,
1502
+ true);
1503
+ else
1504
+ copy_map_value(map, dst_val, value);
1505
+ check_and_init_map_lock(map, dst_val);
1506
+ }
1507
+ if (do_delete) {
1508
+ hlist_nulls_del_rcu(&l->hash_node);
1509
+
1510
+ /* bpf_lru_push_free() will acquire lru_lock, which
1511
+ * may cause deadlock. See comments in function
1512
+ * prealloc_lru_pop(). Let us do bpf_lru_push_free()
1513
+ * after releasing the bucket lock.
1514
+ */
1515
+ if (is_lru_map) {
1516
+ l->batch_flink = node_to_free;
1517
+ node_to_free = l;
1518
+ } else {
1519
+ free_htab_elem(htab, l);
1520
+ }
1521
+ }
1522
+ dst_key += key_size;
1523
+ dst_val += value_size;
1524
+ }
1525
+
1526
+ htab_unlock_bucket(htab, b, flags);
1527
+ locked = false;
1528
+
1529
+ while (node_to_free) {
1530
+ l = node_to_free;
1531
+ node_to_free = node_to_free->batch_flink;
1532
+ bpf_lru_push_free(&htab->lru, &l->lru_node);
1533
+ }
1534
+
1535
+next_batch:
1536
+ /* If we are not copying data, we can go to next bucket and avoid
1537
+ * unlocking the rcu.
1538
+ */
1539
+ if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
1540
+ batch++;
1541
+ goto again_nocopy;
1542
+ }
1543
+
1544
+ rcu_read_unlock();
1545
+ bpf_enable_instrumentation();
1546
+ if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
1547
+ key_size * bucket_cnt) ||
1548
+ copy_to_user(uvalues + total * value_size, values,
1549
+ value_size * bucket_cnt))) {
1550
+ ret = -EFAULT;
1551
+ goto after_loop;
1552
+ }
1553
+
1554
+ total += bucket_cnt;
1555
+ batch++;
1556
+ if (batch >= htab->n_buckets) {
1557
+ ret = -ENOENT;
1558
+ goto after_loop;
1559
+ }
1560
+ goto again;
1561
+
1562
+after_loop:
1563
+ if (ret == -EFAULT)
1564
+ goto out;
1565
+
1566
+ /* copy # of entries and next batch */
1567
+ ubatch = u64_to_user_ptr(attr->batch.out_batch);
1568
+ if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
1569
+ put_user(total, &uattr->batch.count))
1570
+ ret = -EFAULT;
1571
+
1572
+out:
1573
+ kvfree(keys);
1574
+ kvfree(values);
1575
+ return ret;
1576
+}
1577
+
1578
+static int
1579
+htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1580
+ union bpf_attr __user *uattr)
1581
+{
1582
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1583
+ false, true);
1584
+}
1585
+
1586
+static int
1587
+htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1588
+ const union bpf_attr *attr,
1589
+ union bpf_attr __user *uattr)
1590
+{
1591
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1592
+ false, true);
1593
+}
1594
+
1595
+static int
1596
+htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1597
+ union bpf_attr __user *uattr)
1598
+{
1599
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1600
+ false, false);
1601
+}
1602
+
1603
+static int
1604
+htab_map_lookup_and_delete_batch(struct bpf_map *map,
1605
+ const union bpf_attr *attr,
1606
+ union bpf_attr __user *uattr)
1607
+{
1608
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1609
+ false, false);
1610
+}
1611
+
1612
+static int
1613
+htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
1614
+ const union bpf_attr *attr,
1615
+ union bpf_attr __user *uattr)
1616
+{
1617
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1618
+ true, true);
1619
+}
1620
+
1621
+static int
1622
+htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1623
+ const union bpf_attr *attr,
1624
+ union bpf_attr __user *uattr)
1625
+{
1626
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1627
+ true, true);
1628
+}
1629
+
1630
+static int
1631
+htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1632
+ union bpf_attr __user *uattr)
1633
+{
1634
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1635
+ true, false);
1636
+}
1637
+
1638
+static int
1639
+htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
1640
+ const union bpf_attr *attr,
1641
+ union bpf_attr __user *uattr)
1642
+{
1643
+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1644
+ true, false);
1645
+}
1646
+
1647
+struct bpf_iter_seq_hash_map_info {
1648
+ struct bpf_map *map;
1649
+ struct bpf_htab *htab;
1650
+ void *percpu_value_buf; // non-zero means percpu hash
1651
+ u32 bucket_id;
1652
+ u32 skip_elems;
1653
+};
1654
+
1655
+static struct htab_elem *
1656
+bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
1657
+ struct htab_elem *prev_elem)
1658
+{
1659
+ const struct bpf_htab *htab = info->htab;
1660
+ u32 skip_elems = info->skip_elems;
1661
+ u32 bucket_id = info->bucket_id;
1662
+ struct hlist_nulls_head *head;
1663
+ struct hlist_nulls_node *n;
1664
+ struct htab_elem *elem;
1665
+ struct bucket *b;
1666
+ u32 i, count;
1667
+
1668
+ if (bucket_id >= htab->n_buckets)
1669
+ return NULL;
1670
+
1671
+ /* try to find next elem in the same bucket */
1672
+ if (prev_elem) {
1673
+ /* no update/deletion on this bucket, prev_elem should be still valid
1674
+ * and we won't skip elements.
1675
+ */
1676
+ n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
1677
+ elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
1678
+ if (elem)
1679
+ return elem;
1680
+
1681
+ /* not found, unlock and go to the next bucket */
1682
+ b = &htab->buckets[bucket_id++];
1683
+ rcu_read_unlock();
1684
+ skip_elems = 0;
1685
+ }
1686
+
1687
+ for (i = bucket_id; i < htab->n_buckets; i++) {
1688
+ b = &htab->buckets[i];
1689
+ rcu_read_lock();
1690
+
1691
+ count = 0;
1692
+ head = &b->head;
1693
+ hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
1694
+ if (count >= skip_elems) {
1695
+ info->bucket_id = i;
1696
+ info->skip_elems = count;
1697
+ return elem;
1698
+ }
1699
+ count++;
1700
+ }
1701
+
1702
+ rcu_read_unlock();
1703
+ skip_elems = 0;
1704
+ }
1705
+
1706
+ info->bucket_id = i;
1707
+ info->skip_elems = 0;
1708
+ return NULL;
1709
+}
1710
+
1711
+static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
1712
+{
1713
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
1714
+ struct htab_elem *elem;
1715
+
1716
+ elem = bpf_hash_map_seq_find_next(info, NULL);
1717
+ if (!elem)
1718
+ return NULL;
1719
+
1720
+ if (*pos == 0)
1721
+ ++*pos;
1722
+ return elem;
1723
+}
1724
+
1725
+static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1726
+{
1727
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
1728
+
1729
+ ++*pos;
1730
+ ++info->skip_elems;
1731
+ return bpf_hash_map_seq_find_next(info, v);
1732
+}
1733
+
1734
+static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
1735
+{
1736
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
1737
+ u32 roundup_key_size, roundup_value_size;
1738
+ struct bpf_iter__bpf_map_elem ctx = {};
1739
+ struct bpf_map *map = info->map;
1740
+ struct bpf_iter_meta meta;
1741
+ int ret = 0, off = 0, cpu;
1742
+ struct bpf_prog *prog;
1743
+ void __percpu *pptr;
1744
+
1745
+ meta.seq = seq;
1746
+ prog = bpf_iter_get_info(&meta, elem == NULL);
1747
+ if (prog) {
1748
+ ctx.meta = &meta;
1749
+ ctx.map = info->map;
1750
+ if (elem) {
1751
+ roundup_key_size = round_up(map->key_size, 8);
1752
+ ctx.key = elem->key;
1753
+ if (!info->percpu_value_buf) {
1754
+ ctx.value = elem->key + roundup_key_size;
1755
+ } else {
1756
+ roundup_value_size = round_up(map->value_size, 8);
1757
+ pptr = htab_elem_get_ptr(elem, map->key_size);
1758
+ for_each_possible_cpu(cpu) {
1759
+ bpf_long_memcpy(info->percpu_value_buf + off,
1760
+ per_cpu_ptr(pptr, cpu),
1761
+ roundup_value_size);
1762
+ off += roundup_value_size;
1763
+ }
1764
+ ctx.value = info->percpu_value_buf;
1765
+ }
1766
+ }
1767
+ ret = bpf_iter_run_prog(prog, &ctx);
1768
+ }
1769
+
1770
+ return ret;
1771
+}
1772
+
1773
+static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
1774
+{
1775
+ return __bpf_hash_map_seq_show(seq, v);
1776
+}
1777
+
1778
+static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
1779
+{
1780
+ if (!v)
1781
+ (void)__bpf_hash_map_seq_show(seq, NULL);
1782
+ else
1783
+ rcu_read_unlock();
1784
+}
1785
+
1786
+static int bpf_iter_init_hash_map(void *priv_data,
1787
+ struct bpf_iter_aux_info *aux)
1788
+{
1789
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
1790
+ struct bpf_map *map = aux->map;
1791
+ void *value_buf;
1792
+ u32 buf_size;
1793
+
1794
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1795
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
1796
+ buf_size = round_up(map->value_size, 8) * num_possible_cpus();
1797
+ value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
1798
+ if (!value_buf)
1799
+ return -ENOMEM;
1800
+
1801
+ seq_info->percpu_value_buf = value_buf;
1802
+ }
1803
+
1804
+ bpf_map_inc_with_uref(map);
1805
+ seq_info->map = map;
1806
+ seq_info->htab = container_of(map, struct bpf_htab, map);
1807
+ return 0;
1808
+}
1809
+
1810
+static void bpf_iter_fini_hash_map(void *priv_data)
1811
+{
1812
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
1813
+
1814
+ bpf_map_put_with_uref(seq_info->map);
1815
+ kfree(seq_info->percpu_value_buf);
1816
+}
1817
+
1818
+static const struct seq_operations bpf_hash_map_seq_ops = {
1819
+ .start = bpf_hash_map_seq_start,
1820
+ .next = bpf_hash_map_seq_next,
1821
+ .stop = bpf_hash_map_seq_stop,
1822
+ .show = bpf_hash_map_seq_show,
1823
+};
1824
+
1825
+static const struct bpf_iter_seq_info iter_seq_info = {
1826
+ .seq_ops = &bpf_hash_map_seq_ops,
1827
+ .init_seq_private = bpf_iter_init_hash_map,
1828
+ .fini_seq_private = bpf_iter_fini_hash_map,
1829
+ .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
1830
+};
1831
+
1832
+static int htab_map_btf_id;
12011833 const struct bpf_map_ops htab_map_ops = {
1834
+ .map_meta_equal = bpf_map_meta_equal,
12021835 .map_alloc_check = htab_map_alloc_check,
12031836 .map_alloc = htab_map_alloc,
12041837 .map_free = htab_map_free,
....@@ -1208,9 +1841,15 @@
12081841 .map_delete_elem = htab_map_delete_elem,
12091842 .map_gen_lookup = htab_map_gen_lookup,
12101843 .map_seq_show_elem = htab_map_seq_show_elem,
1844
+ BATCH_OPS(htab),
1845
+ .map_btf_name = "bpf_htab",
1846
+ .map_btf_id = &htab_map_btf_id,
1847
+ .iter_seq_info = &iter_seq_info,
12111848 };
12121849
1850
+static int htab_lru_map_btf_id;
12131851 const struct bpf_map_ops htab_lru_map_ops = {
1852
+ .map_meta_equal = bpf_map_meta_equal,
12141853 .map_alloc_check = htab_map_alloc_check,
12151854 .map_alloc = htab_map_alloc,
12161855 .map_free = htab_map_free,
....@@ -1221,6 +1860,10 @@
12211860 .map_delete_elem = htab_lru_map_delete_elem,
12221861 .map_gen_lookup = htab_lru_map_gen_lookup,
12231862 .map_seq_show_elem = htab_map_seq_show_elem,
1863
+ BATCH_OPS(htab_lru),
1864
+ .map_btf_name = "bpf_htab",
1865
+ .map_btf_id = &htab_lru_map_btf_id,
1866
+ .iter_seq_info = &iter_seq_info,
12241867 };
12251868
12261869 /* Called from eBPF program */
....@@ -1296,7 +1939,38 @@
12961939 return ret;
12971940 }
12981941
1942
+static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
1943
+ struct seq_file *m)
1944
+{
1945
+ struct htab_elem *l;
1946
+ void __percpu *pptr;
1947
+ int cpu;
1948
+
1949
+ rcu_read_lock();
1950
+
1951
+ l = __htab_map_lookup_elem(map, key);
1952
+ if (!l) {
1953
+ rcu_read_unlock();
1954
+ return;
1955
+ }
1956
+
1957
+ btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
1958
+ seq_puts(m, ": {\n");
1959
+ pptr = htab_elem_get_ptr(l, map->key_size);
1960
+ for_each_possible_cpu(cpu) {
1961
+ seq_printf(m, "\tcpu%d: ", cpu);
1962
+ btf_type_seq_show(map->btf, map->btf_value_type_id,
1963
+ per_cpu_ptr(pptr, cpu), m);
1964
+ seq_puts(m, "\n");
1965
+ }
1966
+ seq_puts(m, "}\n");
1967
+
1968
+ rcu_read_unlock();
1969
+}
1970
+
1971
+static int htab_percpu_map_btf_id;
12991972 const struct bpf_map_ops htab_percpu_map_ops = {
1973
+ .map_meta_equal = bpf_map_meta_equal,
13001974 .map_alloc_check = htab_map_alloc_check,
13011975 .map_alloc = htab_map_alloc,
13021976 .map_free = htab_map_free,
....@@ -1304,9 +1978,16 @@
13041978 .map_lookup_elem = htab_percpu_map_lookup_elem,
13051979 .map_update_elem = htab_percpu_map_update_elem,
13061980 .map_delete_elem = htab_map_delete_elem,
1981
+ .map_seq_show_elem = htab_percpu_map_seq_show_elem,
1982
+ BATCH_OPS(htab_percpu),
1983
+ .map_btf_name = "bpf_htab",
1984
+ .map_btf_id = &htab_percpu_map_btf_id,
1985
+ .iter_seq_info = &iter_seq_info,
13071986 };
13081987
1988
+static int htab_lru_percpu_map_btf_id;
13091989 const struct bpf_map_ops htab_lru_percpu_map_ops = {
1990
+ .map_meta_equal = bpf_map_meta_equal,
13101991 .map_alloc_check = htab_map_alloc_check,
13111992 .map_alloc = htab_map_alloc,
13121993 .map_free = htab_map_free,
....@@ -1314,6 +1995,11 @@
13141995 .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
13151996 .map_update_elem = htab_lru_percpu_map_update_elem,
13161997 .map_delete_elem = htab_lru_map_delete_elem,
1998
+ .map_seq_show_elem = htab_percpu_map_seq_show_elem,
1999
+ BATCH_OPS(htab_lru_percpu),
2000
+ .map_btf_name = "bpf_htab",
2001
+ .map_btf_id = &htab_lru_percpu_map_btf_id,
2002
+ .iter_seq_info = &iter_seq_info,
13172003 };
13182004
13192005 static int fd_htab_map_alloc_check(union bpf_attr *attr)
....@@ -1412,7 +2098,7 @@
14122098 return READ_ONCE(*inner_map);
14132099 }
14142100
1415
-static u32 htab_of_map_gen_lookup(struct bpf_map *map,
2101
+static int htab_of_map_gen_lookup(struct bpf_map *map,
14162102 struct bpf_insn *insn_buf)
14172103 {
14182104 struct bpf_insn *insn = insn_buf;
....@@ -1436,6 +2122,7 @@
14362122 fd_htab_map_free(map);
14372123 }
14382124
2125
+static int htab_of_maps_map_btf_id;
14392126 const struct bpf_map_ops htab_of_maps_map_ops = {
14402127 .map_alloc_check = fd_htab_map_alloc_check,
14412128 .map_alloc = htab_of_map_alloc,
....@@ -1448,4 +2135,6 @@
14482135 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
14492136 .map_gen_lookup = htab_of_map_gen_lookup,
14502137 .map_check_btf = map_check_no_btf,
2138
+ .map_btf_name = "bpf_htab",
2139
+ .map_btf_id = &htab_of_maps_map_btf_id,
14512140 };