.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
---|
2 | | - * |
---|
3 | | - * This program is free software; you can redistribute it and/or |
---|
4 | | - * modify it under the terms of version 2 of the GNU General Public |
---|
5 | | - * License as published by the Free Software Foundation. |
---|
6 | | - * |
---|
7 | | - * This program is distributed in the hope that it will be useful, but |
---|
8 | | - * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
10 | | - * General Public License for more details. |
---|
11 | 3 | */ |
---|
12 | 4 | #include <linux/bpf.h> |
---|
13 | 5 | #include <linux/bpf_trace.h> |
---|
14 | 6 | #include <linux/bpf_lirc.h> |
---|
| 7 | +#include <linux/bpf_verifier.h> |
---|
15 | 8 | #include <linux/btf.h> |
---|
16 | 9 | #include <linux/syscalls.h> |
---|
17 | 10 | #include <linux/slab.h> |
---|
.. | .. |
---|
30 | 23 | #include <linux/cred.h> |
---|
31 | 24 | #include <linux/timekeeping.h> |
---|
32 | 25 | #include <linux/ctype.h> |
---|
33 | | -#include <linux/btf.h> |
---|
34 | 26 | #include <linux/nospec.h> |
---|
| 27 | +#include <linux/audit.h> |
---|
| 28 | +#include <uapi/linux/btf.h> |
---|
| 29 | +#include <linux/pgtable.h> |
---|
| 30 | +#include <linux/bpf_lsm.h> |
---|
| 31 | +#include <linux/poll.h> |
---|
| 32 | +#include <linux/bpf-netns.h> |
---|
| 33 | +#include <linux/rcupdate_trace.h> |
---|
35 | 34 | |
---|
36 | | -#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ |
---|
37 | | - (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ |
---|
38 | | - (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ |
---|
39 | | - (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) |
---|
| 35 | +#include <trace/hooks/syscall_check.h> |
---|
| 36 | + |
---|
| 37 | +#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ |
---|
| 38 | + (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ |
---|
| 39 | + (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) |
---|
| 40 | +#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) |
---|
40 | 41 | #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) |
---|
41 | | -#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) |
---|
| 42 | +#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ |
---|
| 43 | + IS_FD_HASH(map)) |
---|
42 | 44 | |
---|
43 | 45 | #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) |
---|
44 | 46 | |
---|
.. | .. |
---|
47 | 49 | static DEFINE_SPINLOCK(prog_idr_lock); |
---|
48 | 50 | static DEFINE_IDR(map_idr); |
---|
49 | 51 | static DEFINE_SPINLOCK(map_idr_lock); |
---|
| 52 | +static DEFINE_IDR(link_idr); |
---|
| 53 | +static DEFINE_SPINLOCK(link_idr_lock); |
---|
50 | 54 | |
---|
51 | 55 | int sysctl_unprivileged_bpf_disabled __read_mostly = |
---|
52 | 56 | IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; |
---|
53 | 57 | |
---|
54 | 58 | static const struct bpf_map_ops * const bpf_map_types[] = { |
---|
55 | | -#define BPF_PROG_TYPE(_id, _ops) |
---|
| 59 | +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) |
---|
56 | 60 | #define BPF_MAP_TYPE(_id, _ops) \ |
---|
57 | 61 | [_id] = &_ops, |
---|
| 62 | +#define BPF_LINK_TYPE(_id, _name) |
---|
58 | 63 | #include <linux/bpf_types.h> |
---|
59 | 64 | #undef BPF_PROG_TYPE |
---|
60 | 65 | #undef BPF_MAP_TYPE |
---|
| 66 | +#undef BPF_LINK_TYPE |
---|
61 | 67 | }; |
---|
62 | 68 | |
---|
63 | 69 | /* |
---|
.. | .. |
---|
73 | 79 | size_t expected_size, |
---|
74 | 80 | size_t actual_size) |
---|
75 | 81 | { |
---|
76 | | - unsigned char __user *addr; |
---|
77 | | - unsigned char __user *end; |
---|
78 | | - unsigned char val; |
---|
79 | | - int err; |
---|
| 82 | + unsigned char __user *addr = uaddr + expected_size; |
---|
| 83 | + int res; |
---|
80 | 84 | |
---|
81 | 85 | if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ |
---|
82 | 86 | return -E2BIG; |
---|
83 | 87 | |
---|
84 | | - if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) |
---|
85 | | - return -EFAULT; |
---|
86 | | - |
---|
87 | 88 | if (actual_size <= expected_size) |
---|
88 | 89 | return 0; |
---|
89 | 90 | |
---|
90 | | - addr = uaddr + expected_size; |
---|
91 | | - end = uaddr + actual_size; |
---|
92 | | - |
---|
93 | | - for (; addr < end; addr++) { |
---|
94 | | - err = get_user(val, addr); |
---|
95 | | - if (err) |
---|
96 | | - return err; |
---|
97 | | - if (val) |
---|
98 | | - return -E2BIG; |
---|
99 | | - } |
---|
100 | | - |
---|
101 | | - return 0; |
---|
| 91 | + res = check_zeroed_user(addr, actual_size - expected_size); |
---|
| 92 | + if (res < 0) |
---|
| 93 | + return res; |
---|
| 94 | + return res ? 0 : -E2BIG; |
---|
102 | 95 | } |
---|
103 | 96 | |
---|
104 | 97 | const struct bpf_map_ops bpf_map_offload_ops = { |
---|
| 98 | + .map_meta_equal = bpf_map_meta_equal, |
---|
105 | 99 | .map_alloc = bpf_map_offload_map_alloc, |
---|
106 | 100 | .map_free = bpf_map_offload_map_free, |
---|
107 | 101 | .map_check_btf = map_check_no_btf, |
---|
.. | .. |
---|
136 | 130 | return map; |
---|
137 | 131 | } |
---|
138 | 132 | |
---|
139 | | -void *bpf_map_area_alloc(size_t size, int numa_node) |
---|
| 133 | +static void bpf_map_write_active_inc(struct bpf_map *map) |
---|
140 | 134 | { |
---|
141 | | - /* We definitely need __GFP_NORETRY, so OOM killer doesn't |
---|
142 | | - * trigger under memory pressure as we really just want to |
---|
143 | | - * fail instead. |
---|
| 135 | + atomic64_inc(&map->writecnt); |
---|
| 136 | +} |
---|
| 137 | + |
---|
| 138 | +static void bpf_map_write_active_dec(struct bpf_map *map) |
---|
| 139 | +{ |
---|
| 140 | + atomic64_dec(&map->writecnt); |
---|
| 141 | +} |
---|
| 142 | + |
---|
| 143 | +bool bpf_map_write_active(const struct bpf_map *map) |
---|
| 144 | +{ |
---|
| 145 | + return atomic64_read(&map->writecnt) != 0; |
---|
| 146 | +} |
---|
| 147 | + |
---|
| 148 | +static u32 bpf_map_value_size(struct bpf_map *map) |
---|
| 149 | +{ |
---|
| 150 | + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
---|
| 151 | + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || |
---|
| 152 | + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || |
---|
| 153 | + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) |
---|
| 154 | + return round_up(map->value_size, 8) * num_possible_cpus(); |
---|
| 155 | + else if (IS_FD_MAP(map)) |
---|
| 156 | + return sizeof(u32); |
---|
| 157 | + else |
---|
| 158 | + return map->value_size; |
---|
| 159 | +} |
---|
| 160 | + |
---|
| 161 | +static void maybe_wait_bpf_programs(struct bpf_map *map) |
---|
| 162 | +{ |
---|
| 163 | + /* Wait for any running BPF programs to complete so that |
---|
| 164 | + * userspace, when we return to it, knows that all programs |
---|
| 165 | + * that could be running use the new map value. |
---|
144 | 166 | */ |
---|
145 | | - const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; |
---|
| 167 | + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || |
---|
| 168 | + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) |
---|
| 169 | + synchronize_rcu(); |
---|
| 170 | +} |
---|
| 171 | + |
---|
| 172 | +static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, |
---|
| 173 | + void *value, __u64 flags) |
---|
| 174 | +{ |
---|
| 175 | + int err; |
---|
| 176 | + |
---|
| 177 | + /* Need to create a kthread, thus must support schedule */ |
---|
| 178 | + if (bpf_map_is_dev_bound(map)) { |
---|
| 179 | + return bpf_map_offload_update_elem(map, key, value, flags); |
---|
| 180 | + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || |
---|
| 181 | + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { |
---|
| 182 | + return map->ops->map_update_elem(map, key, value, flags); |
---|
| 183 | + } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || |
---|
| 184 | + map->map_type == BPF_MAP_TYPE_SOCKMAP) { |
---|
| 185 | + return sock_map_update_elem_sys(map, key, value, flags); |
---|
| 186 | + } else if (IS_FD_PROG_ARRAY(map)) { |
---|
| 187 | + return bpf_fd_array_map_update_elem(map, f.file, key, value, |
---|
| 188 | + flags); |
---|
| 189 | + } |
---|
| 190 | + |
---|
| 191 | + bpf_disable_instrumentation(); |
---|
| 192 | + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
---|
| 193 | + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { |
---|
| 194 | + err = bpf_percpu_hash_update(map, key, value, flags); |
---|
| 195 | + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
---|
| 196 | + err = bpf_percpu_array_update(map, key, value, flags); |
---|
| 197 | + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { |
---|
| 198 | + err = bpf_percpu_cgroup_storage_update(map, key, value, |
---|
| 199 | + flags); |
---|
| 200 | + } else if (IS_FD_ARRAY(map)) { |
---|
| 201 | + rcu_read_lock(); |
---|
| 202 | + err = bpf_fd_array_map_update_elem(map, f.file, key, value, |
---|
| 203 | + flags); |
---|
| 204 | + rcu_read_unlock(); |
---|
| 205 | + } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { |
---|
| 206 | + rcu_read_lock(); |
---|
| 207 | + err = bpf_fd_htab_map_update_elem(map, f.file, key, value, |
---|
| 208 | + flags); |
---|
| 209 | + rcu_read_unlock(); |
---|
| 210 | + } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { |
---|
| 211 | + /* rcu_read_lock() is not needed */ |
---|
| 212 | + err = bpf_fd_reuseport_array_update_elem(map, key, value, |
---|
| 213 | + flags); |
---|
| 214 | + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || |
---|
| 215 | + map->map_type == BPF_MAP_TYPE_STACK) { |
---|
| 216 | + err = map->ops->map_push_elem(map, value, flags); |
---|
| 217 | + } else { |
---|
| 218 | + rcu_read_lock(); |
---|
| 219 | + err = map->ops->map_update_elem(map, key, value, flags); |
---|
| 220 | + rcu_read_unlock(); |
---|
| 221 | + } |
---|
| 222 | + bpf_enable_instrumentation(); |
---|
| 223 | + maybe_wait_bpf_programs(map); |
---|
| 224 | + |
---|
| 225 | + return err; |
---|
| 226 | +} |
---|
| 227 | + |
---|
| 228 | +static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, |
---|
| 229 | + __u64 flags) |
---|
| 230 | +{ |
---|
| 231 | + void *ptr; |
---|
| 232 | + int err; |
---|
| 233 | + |
---|
| 234 | + if (bpf_map_is_dev_bound(map)) |
---|
| 235 | + return bpf_map_offload_lookup_elem(map, key, value); |
---|
| 236 | + |
---|
| 237 | + bpf_disable_instrumentation(); |
---|
| 238 | + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
---|
| 239 | + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { |
---|
| 240 | + err = bpf_percpu_hash_copy(map, key, value); |
---|
| 241 | + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
---|
| 242 | + err = bpf_percpu_array_copy(map, key, value); |
---|
| 243 | + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { |
---|
| 244 | + err = bpf_percpu_cgroup_storage_copy(map, key, value); |
---|
| 245 | + } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { |
---|
| 246 | + err = bpf_stackmap_copy(map, key, value); |
---|
| 247 | + } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { |
---|
| 248 | + err = bpf_fd_array_map_lookup_elem(map, key, value); |
---|
| 249 | + } else if (IS_FD_HASH(map)) { |
---|
| 250 | + err = bpf_fd_htab_map_lookup_elem(map, key, value); |
---|
| 251 | + } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { |
---|
| 252 | + err = bpf_fd_reuseport_array_lookup_elem(map, key, value); |
---|
| 253 | + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || |
---|
| 254 | + map->map_type == BPF_MAP_TYPE_STACK) { |
---|
| 255 | + err = map->ops->map_peek_elem(map, value); |
---|
| 256 | + } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { |
---|
| 257 | + /* struct_ops map requires directly updating "value" */ |
---|
| 258 | + err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); |
---|
| 259 | + } else { |
---|
| 260 | + rcu_read_lock(); |
---|
| 261 | + if (map->ops->map_lookup_elem_sys_only) |
---|
| 262 | + ptr = map->ops->map_lookup_elem_sys_only(map, key); |
---|
| 263 | + else |
---|
| 264 | + ptr = map->ops->map_lookup_elem(map, key); |
---|
| 265 | + if (IS_ERR(ptr)) { |
---|
| 266 | + err = PTR_ERR(ptr); |
---|
| 267 | + } else if (!ptr) { |
---|
| 268 | + err = -ENOENT; |
---|
| 269 | + } else { |
---|
| 270 | + err = 0; |
---|
| 271 | + if (flags & BPF_F_LOCK) |
---|
| 272 | + /* lock 'ptr' and copy everything but lock */ |
---|
| 273 | + copy_map_value_locked(map, value, ptr, true); |
---|
| 274 | + else |
---|
| 275 | + copy_map_value(map, value, ptr); |
---|
| 276 | + /* mask lock, since value wasn't zero inited */ |
---|
| 277 | + check_and_init_map_lock(map, value); |
---|
| 278 | + } |
---|
| 279 | + rcu_read_unlock(); |
---|
| 280 | + } |
---|
| 281 | + |
---|
| 282 | + bpf_enable_instrumentation(); |
---|
| 283 | + maybe_wait_bpf_programs(map); |
---|
| 284 | + |
---|
| 285 | + return err; |
---|
| 286 | +} |
---|
| 287 | + |
---|
| 288 | +static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) |
---|
| 289 | +{ |
---|
| 290 | + /* We really just want to fail instead of triggering OOM killer |
---|
| 291 | + * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, |
---|
| 292 | + * which is used for lower order allocation requests. |
---|
| 293 | + * |
---|
| 294 | + * It has been observed that higher order allocation requests done by |
---|
| 295 | + * vmalloc with __GFP_NORETRY being set might fail due to not trying |
---|
| 296 | + * to reclaim memory from the page cache, thus we set |
---|
| 297 | + * __GFP_RETRY_MAYFAIL to avoid such situations. |
---|
| 298 | + */ |
---|
| 299 | + |
---|
| 300 | + const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO; |
---|
| 301 | + unsigned int flags = 0; |
---|
| 302 | + unsigned long align = 1; |
---|
146 | 303 | void *area; |
---|
147 | 304 | |
---|
148 | | - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { |
---|
149 | | - area = kmalloc_node(size, GFP_USER | flags, numa_node); |
---|
| 305 | + if (size >= SIZE_MAX) |
---|
| 306 | + return NULL; |
---|
| 307 | + |
---|
| 308 | + /* kmalloc()'ed memory can't be mmap()'ed */ |
---|
| 309 | + if (mmapable) { |
---|
| 310 | + BUG_ON(!PAGE_ALIGNED(size)); |
---|
| 311 | + align = SHMLBA; |
---|
| 312 | + flags = VM_USERMAP; |
---|
| 313 | + } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { |
---|
| 314 | + area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, |
---|
| 315 | + numa_node); |
---|
150 | 316 | if (area != NULL) |
---|
151 | 317 | return area; |
---|
152 | 318 | } |
---|
153 | 319 | |
---|
154 | | - return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, |
---|
155 | | - __builtin_return_address(0)); |
---|
| 320 | + return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, |
---|
| 321 | + gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, |
---|
| 322 | + flags, numa_node, __builtin_return_address(0)); |
---|
| 323 | +} |
---|
| 324 | + |
---|
| 325 | +void *bpf_map_area_alloc(u64 size, int numa_node) |
---|
| 326 | +{ |
---|
| 327 | + return __bpf_map_area_alloc(size, numa_node, false); |
---|
| 328 | +} |
---|
| 329 | + |
---|
| 330 | +void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) |
---|
| 331 | +{ |
---|
| 332 | + return __bpf_map_area_alloc(size, numa_node, true); |
---|
156 | 333 | } |
---|
157 | 334 | |
---|
158 | 335 | void bpf_map_area_free(void *area) |
---|
159 | 336 | { |
---|
160 | 337 | kvfree(area); |
---|
| 338 | +} |
---|
| 339 | + |
---|
| 340 | +static u32 bpf_map_flags_retain_permanent(u32 flags) |
---|
| 341 | +{ |
---|
| 342 | + /* Some map creation flags are not tied to the map object but |
---|
| 343 | + * rather to the map fd instead, so they have no meaning upon |
---|
| 344 | + * map object inspection since multiple file descriptors with |
---|
| 345 | + * different (access) properties can exist here. Thus, given |
---|
| 346 | + * this has zero meaning for the map itself, lets clear these |
---|
| 347 | + * from here. |
---|
| 348 | + */ |
---|
| 349 | + return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); |
---|
161 | 350 | } |
---|
162 | 351 | |
---|
163 | 352 | void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) |
---|
.. | .. |
---|
166 | 355 | map->key_size = attr->key_size; |
---|
167 | 356 | map->value_size = attr->value_size; |
---|
168 | 357 | map->max_entries = attr->max_entries; |
---|
169 | | - map->map_flags = attr->map_flags; |
---|
| 358 | + map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); |
---|
170 | 359 | map->numa_node = bpf_map_attr_numa_node(attr); |
---|
171 | | -} |
---|
172 | | - |
---|
173 | | -int bpf_map_precharge_memlock(u32 pages) |
---|
174 | | -{ |
---|
175 | | - struct user_struct *user = get_current_user(); |
---|
176 | | - unsigned long memlock_limit, cur; |
---|
177 | | - |
---|
178 | | - memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
---|
179 | | - cur = atomic_long_read(&user->locked_vm); |
---|
180 | | - free_uid(user); |
---|
181 | | - if (cur + pages > memlock_limit) |
---|
182 | | - return -EPERM; |
---|
183 | | - return 0; |
---|
184 | 360 | } |
---|
185 | 361 | |
---|
186 | 362 | static int bpf_charge_memlock(struct user_struct *user, u32 pages) |
---|
.. | .. |
---|
196 | 372 | |
---|
197 | 373 | static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) |
---|
198 | 374 | { |
---|
199 | | - atomic_long_sub(pages, &user->locked_vm); |
---|
| 375 | + if (user) |
---|
| 376 | + atomic_long_sub(pages, &user->locked_vm); |
---|
200 | 377 | } |
---|
201 | 378 | |
---|
202 | | -static int bpf_map_init_memlock(struct bpf_map *map) |
---|
| 379 | +int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) |
---|
203 | 380 | { |
---|
204 | | - struct user_struct *user = get_current_user(); |
---|
| 381 | + u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; |
---|
| 382 | + struct user_struct *user; |
---|
205 | 383 | int ret; |
---|
206 | 384 | |
---|
207 | | - ret = bpf_charge_memlock(user, map->pages); |
---|
| 385 | + if (size >= U32_MAX - PAGE_SIZE) |
---|
| 386 | + return -E2BIG; |
---|
| 387 | + |
---|
| 388 | + user = get_current_user(); |
---|
| 389 | + ret = bpf_charge_memlock(user, pages); |
---|
208 | 390 | if (ret) { |
---|
209 | 391 | free_uid(user); |
---|
210 | 392 | return ret; |
---|
211 | 393 | } |
---|
212 | | - map->user = user; |
---|
213 | | - return ret; |
---|
| 394 | + |
---|
| 395 | + mem->pages = pages; |
---|
| 396 | + mem->user = user; |
---|
| 397 | + |
---|
| 398 | + return 0; |
---|
214 | 399 | } |
---|
215 | 400 | |
---|
216 | | -static void bpf_map_release_memlock(struct bpf_map *map) |
---|
| 401 | +void bpf_map_charge_finish(struct bpf_map_memory *mem) |
---|
217 | 402 | { |
---|
218 | | - struct user_struct *user = map->user; |
---|
219 | | - bpf_uncharge_memlock(user, map->pages); |
---|
220 | | - free_uid(user); |
---|
| 403 | + bpf_uncharge_memlock(mem->user, mem->pages); |
---|
| 404 | + free_uid(mem->user); |
---|
| 405 | +} |
---|
| 406 | + |
---|
| 407 | +void bpf_map_charge_move(struct bpf_map_memory *dst, |
---|
| 408 | + struct bpf_map_memory *src) |
---|
| 409 | +{ |
---|
| 410 | + *dst = *src; |
---|
| 411 | + |
---|
| 412 | + /* Make sure src will not be used for the redundant uncharging. */ |
---|
| 413 | + memset(src, 0, sizeof(struct bpf_map_memory)); |
---|
221 | 414 | } |
---|
222 | 415 | |
---|
223 | 416 | int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) |
---|
224 | 417 | { |
---|
225 | 418 | int ret; |
---|
226 | 419 | |
---|
227 | | - ret = bpf_charge_memlock(map->user, pages); |
---|
| 420 | + ret = bpf_charge_memlock(map->memory.user, pages); |
---|
228 | 421 | if (ret) |
---|
229 | 422 | return ret; |
---|
230 | | - map->pages += pages; |
---|
| 423 | + map->memory.pages += pages; |
---|
231 | 424 | return ret; |
---|
232 | 425 | } |
---|
233 | 426 | |
---|
234 | 427 | void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) |
---|
235 | 428 | { |
---|
236 | | - bpf_uncharge_memlock(map->user, pages); |
---|
237 | | - map->pages -= pages; |
---|
| 429 | + bpf_uncharge_memlock(map->memory.user, pages); |
---|
| 430 | + map->memory.pages -= pages; |
---|
238 | 431 | } |
---|
239 | 432 | |
---|
240 | 433 | static int bpf_map_alloc_id(struct bpf_map *map) |
---|
.. | .. |
---|
285 | 478 | static void bpf_map_free_deferred(struct work_struct *work) |
---|
286 | 479 | { |
---|
287 | 480 | struct bpf_map *map = container_of(work, struct bpf_map, work); |
---|
| 481 | + struct bpf_map_memory mem; |
---|
288 | 482 | |
---|
289 | | - bpf_map_release_memlock(map); |
---|
| 483 | + bpf_map_charge_move(&mem, &map->memory); |
---|
290 | 484 | security_bpf_map_free(map); |
---|
291 | 485 | /* implementation dependent freeing */ |
---|
292 | 486 | map->ops->map_free(map); |
---|
| 487 | + bpf_map_charge_finish(&mem); |
---|
293 | 488 | } |
---|
294 | 489 | |
---|
295 | 490 | static void bpf_map_put_uref(struct bpf_map *map) |
---|
296 | 491 | { |
---|
297 | | - if (atomic_dec_and_test(&map->usercnt)) { |
---|
| 492 | + if (atomic64_dec_and_test(&map->usercnt)) { |
---|
298 | 493 | if (map->ops->map_release_uref) |
---|
299 | 494 | map->ops->map_release_uref(map); |
---|
300 | 495 | } |
---|
.. | .. |
---|
305 | 500 | */ |
---|
306 | 501 | static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) |
---|
307 | 502 | { |
---|
308 | | - if (atomic_dec_and_test(&map->refcnt)) { |
---|
| 503 | + if (atomic64_dec_and_test(&map->refcnt)) { |
---|
309 | 504 | /* bpf_map_free_id() must be called first */ |
---|
310 | 505 | bpf_map_free_id(map, do_idr_lock); |
---|
311 | 506 | btf_put(map->btf); |
---|
.. | .. |
---|
337 | 532 | return 0; |
---|
338 | 533 | } |
---|
339 | 534 | |
---|
| 535 | +static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) |
---|
| 536 | +{ |
---|
| 537 | + fmode_t mode = f.file->f_mode; |
---|
| 538 | + |
---|
| 539 | + /* Our file permissions may have been overridden by global |
---|
| 540 | + * map permissions facing syscall side. |
---|
| 541 | + */ |
---|
| 542 | + if (READ_ONCE(map->frozen)) |
---|
| 543 | + mode &= ~FMODE_CAN_WRITE; |
---|
| 544 | + return mode; |
---|
| 545 | +} |
---|
| 546 | + |
---|
340 | 547 | #ifdef CONFIG_PROC_FS |
---|
341 | 548 | static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) |
---|
342 | 549 | { |
---|
343 | 550 | const struct bpf_map *map = filp->private_data; |
---|
344 | 551 | const struct bpf_array *array; |
---|
345 | | - u32 owner_prog_type = 0; |
---|
346 | | - u32 owner_jited = 0; |
---|
| 552 | + u32 type = 0, jited = 0; |
---|
347 | 553 | |
---|
348 | 554 | if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { |
---|
349 | 555 | array = container_of(map, struct bpf_array, map); |
---|
350 | | - owner_prog_type = array->owner_prog_type; |
---|
351 | | - owner_jited = array->owner_jited; |
---|
| 556 | + spin_lock(&array->aux->owner.lock); |
---|
| 557 | + type = array->aux->owner.type; |
---|
| 558 | + jited = array->aux->owner.jited; |
---|
| 559 | + spin_unlock(&array->aux->owner.lock); |
---|
352 | 560 | } |
---|
353 | 561 | |
---|
354 | 562 | seq_printf(m, |
---|
.. | .. |
---|
358 | 566 | "max_entries:\t%u\n" |
---|
359 | 567 | "map_flags:\t%#x\n" |
---|
360 | 568 | "memlock:\t%llu\n" |
---|
361 | | - "map_id:\t%u\n", |
---|
| 569 | + "map_id:\t%u\n" |
---|
| 570 | + "frozen:\t%u\n", |
---|
362 | 571 | map->map_type, |
---|
363 | 572 | map->key_size, |
---|
364 | 573 | map->value_size, |
---|
365 | 574 | map->max_entries, |
---|
366 | 575 | map->map_flags, |
---|
367 | | - map->pages * 1ULL << PAGE_SHIFT, |
---|
368 | | - map->id); |
---|
369 | | - |
---|
370 | | - if (owner_prog_type) { |
---|
371 | | - seq_printf(m, "owner_prog_type:\t%u\n", |
---|
372 | | - owner_prog_type); |
---|
373 | | - seq_printf(m, "owner_jited:\t%u\n", |
---|
374 | | - owner_jited); |
---|
| 576 | + map->memory.pages * 1ULL << PAGE_SHIFT, |
---|
| 577 | + map->id, |
---|
| 578 | + READ_ONCE(map->frozen)); |
---|
| 579 | + if (type) { |
---|
| 580 | + seq_printf(m, "owner_prog_type:\t%u\n", type); |
---|
| 581 | + seq_printf(m, "owner_jited:\t%u\n", jited); |
---|
375 | 582 | } |
---|
376 | 583 | } |
---|
377 | 584 | #endif |
---|
.. | .. |
---|
394 | 601 | return -EINVAL; |
---|
395 | 602 | } |
---|
396 | 603 | |
---|
| 604 | +/* called for any extra memory-mapped regions (except initial) */ |
---|
| 605 | +static void bpf_map_mmap_open(struct vm_area_struct *vma) |
---|
| 606 | +{ |
---|
| 607 | + struct bpf_map *map = vma->vm_file->private_data; |
---|
| 608 | + |
---|
| 609 | + if (vma->vm_flags & VM_MAYWRITE) |
---|
| 610 | + bpf_map_write_active_inc(map); |
---|
| 611 | +} |
---|
| 612 | + |
---|
| 613 | +/* called for all unmapped memory region (including initial) */ |
---|
| 614 | +static void bpf_map_mmap_close(struct vm_area_struct *vma) |
---|
| 615 | +{ |
---|
| 616 | + struct bpf_map *map = vma->vm_file->private_data; |
---|
| 617 | + |
---|
| 618 | + if (vma->vm_flags & VM_MAYWRITE) |
---|
| 619 | + bpf_map_write_active_dec(map); |
---|
| 620 | +} |
---|
| 621 | + |
---|
| 622 | +static const struct vm_operations_struct bpf_map_default_vmops = { |
---|
| 623 | + .open = bpf_map_mmap_open, |
---|
| 624 | + .close = bpf_map_mmap_close, |
---|
| 625 | +}; |
---|
| 626 | + |
---|
| 627 | +static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) |
---|
| 628 | +{ |
---|
| 629 | + struct bpf_map *map = filp->private_data; |
---|
| 630 | + int err; |
---|
| 631 | + |
---|
| 632 | + if (!map->ops->map_mmap || map_value_has_spin_lock(map)) |
---|
| 633 | + return -ENOTSUPP; |
---|
| 634 | + |
---|
| 635 | + if (!(vma->vm_flags & VM_SHARED)) |
---|
| 636 | + return -EINVAL; |
---|
| 637 | + |
---|
| 638 | + mutex_lock(&map->freeze_mutex); |
---|
| 639 | + |
---|
| 640 | + if (vma->vm_flags & VM_WRITE) { |
---|
| 641 | + if (map->frozen) { |
---|
| 642 | + err = -EPERM; |
---|
| 643 | + goto out; |
---|
| 644 | + } |
---|
| 645 | + /* map is meant to be read-only, so do not allow mapping as |
---|
| 646 | + * writable, because it's possible to leak a writable page |
---|
| 647 | + * reference and allows user-space to still modify it after |
---|
| 648 | + * freezing, while verifier will assume contents do not change |
---|
| 649 | + */ |
---|
| 650 | + if (map->map_flags & BPF_F_RDONLY_PROG) { |
---|
| 651 | + err = -EACCES; |
---|
| 652 | + goto out; |
---|
| 653 | + } |
---|
| 654 | + } |
---|
| 655 | + |
---|
| 656 | + /* set default open/close callbacks */ |
---|
| 657 | + vma->vm_ops = &bpf_map_default_vmops; |
---|
| 658 | + vma->vm_private_data = map; |
---|
| 659 | + vma->vm_flags &= ~VM_MAYEXEC; |
---|
| 660 | + if (!(vma->vm_flags & VM_WRITE)) |
---|
| 661 | + /* disallow re-mapping with PROT_WRITE */ |
---|
| 662 | + vma->vm_flags &= ~VM_MAYWRITE; |
---|
| 663 | + |
---|
| 664 | + err = map->ops->map_mmap(map, vma); |
---|
| 665 | + if (err) |
---|
| 666 | + goto out; |
---|
| 667 | + |
---|
| 668 | + if (vma->vm_flags & VM_MAYWRITE) |
---|
| 669 | + bpf_map_write_active_inc(map); |
---|
| 670 | +out: |
---|
| 671 | + mutex_unlock(&map->freeze_mutex); |
---|
| 672 | + return err; |
---|
| 673 | +} |
---|
| 674 | + |
---|
| 675 | +static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) |
---|
| 676 | +{ |
---|
| 677 | + struct bpf_map *map = filp->private_data; |
---|
| 678 | + |
---|
| 679 | + if (map->ops->map_poll) |
---|
| 680 | + return map->ops->map_poll(map, filp, pts); |
---|
| 681 | + |
---|
| 682 | + return EPOLLERR; |
---|
| 683 | +} |
---|
| 684 | + |
---|
397 | 685 | const struct file_operations bpf_map_fops = { |
---|
398 | 686 | #ifdef CONFIG_PROC_FS |
---|
399 | 687 | .show_fdinfo = bpf_map_show_fdinfo, |
---|
.. | .. |
---|
401 | 689 | .release = bpf_map_release, |
---|
402 | 690 | .read = bpf_dummy_read, |
---|
403 | 691 | .write = bpf_dummy_write, |
---|
| 692 | + .mmap = bpf_map_mmap, |
---|
| 693 | + .poll = bpf_map_poll, |
---|
404 | 694 | }; |
---|
405 | 695 | |
---|
406 | 696 | int bpf_map_new_fd(struct bpf_map *map, int flags) |
---|
.. | .. |
---|
434 | 724 | offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ |
---|
435 | 725 | sizeof(attr->CMD##_LAST_FIELD)) != NULL |
---|
436 | 726 | |
---|
437 | | -/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. |
---|
438 | | - * Return 0 on success and < 0 on error. |
---|
| 727 | +/* dst and src must have at least "size" number of bytes. |
---|
| 728 | + * Return strlen on success and < 0 on error. |
---|
439 | 729 | */ |
---|
440 | | -static int bpf_obj_name_cpy(char *dst, const char *src) |
---|
| 730 | +int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) |
---|
441 | 731 | { |
---|
442 | | - const char *end = src + BPF_OBJ_NAME_LEN; |
---|
| 732 | + const char *end = src + size; |
---|
| 733 | + const char *orig_src = src; |
---|
443 | 734 | |
---|
444 | | - memset(dst, 0, BPF_OBJ_NAME_LEN); |
---|
445 | | - |
---|
446 | | - /* Copy all isalnum() and '_' char */ |
---|
| 735 | + memset(dst, 0, size); |
---|
| 736 | + /* Copy all isalnum(), '_' and '.' chars. */ |
---|
447 | 737 | while (src < end && *src) { |
---|
448 | | - if (!isalnum(*src) && *src != '_') |
---|
| 738 | + if (!isalnum(*src) && |
---|
| 739 | + *src != '_' && *src != '.') |
---|
449 | 740 | return -EINVAL; |
---|
450 | 741 | *dst++ = *src++; |
---|
451 | 742 | } |
---|
452 | 743 | |
---|
453 | | - /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ |
---|
| 744 | + /* No '\0' found in "size" number of bytes */ |
---|
454 | 745 | if (src == end) |
---|
455 | 746 | return -EINVAL; |
---|
456 | 747 | |
---|
457 | | - return 0; |
---|
| 748 | + return src - orig_src; |
---|
458 | 749 | } |
---|
459 | 750 | |
---|
460 | 751 | int map_check_no_btf(const struct bpf_map *map, |
---|
| 752 | + const struct btf *btf, |
---|
461 | 753 | const struct btf_type *key_type, |
---|
462 | 754 | const struct btf_type *value_type) |
---|
463 | 755 | { |
---|
464 | 756 | return -ENOTSUPP; |
---|
465 | 757 | } |
---|
466 | 758 | |
---|
467 | | -static int map_check_btf(const struct bpf_map *map, const struct btf *btf, |
---|
| 759 | +static int map_check_btf(struct bpf_map *map, const struct btf *btf, |
---|
468 | 760 | u32 btf_key_id, u32 btf_value_id) |
---|
469 | 761 | { |
---|
470 | 762 | const struct btf_type *key_type, *value_type; |
---|
471 | 763 | u32 key_size, value_size; |
---|
472 | 764 | int ret = 0; |
---|
473 | 765 | |
---|
474 | | - key_type = btf_type_id_size(btf, &btf_key_id, &key_size); |
---|
475 | | - if (!key_type || key_size != map->key_size) |
---|
476 | | - return -EINVAL; |
---|
| 766 | + /* Some maps allow key to be unspecified. */ |
---|
| 767 | + if (btf_key_id) { |
---|
| 768 | + key_type = btf_type_id_size(btf, &btf_key_id, &key_size); |
---|
| 769 | + if (!key_type || key_size != map->key_size) |
---|
| 770 | + return -EINVAL; |
---|
| 771 | + } else { |
---|
| 772 | + key_type = btf_type_by_id(btf, 0); |
---|
| 773 | + if (!map->ops->map_check_btf) |
---|
| 774 | + return -EINVAL; |
---|
| 775 | + } |
---|
477 | 776 | |
---|
478 | 777 | value_type = btf_type_id_size(btf, &btf_value_id, &value_size); |
---|
479 | 778 | if (!value_type || value_size != map->value_size) |
---|
480 | 779 | return -EINVAL; |
---|
481 | 780 | |
---|
| 781 | + map->spin_lock_off = btf_find_spin_lock(btf, value_type); |
---|
| 782 | + |
---|
| 783 | + if (map_value_has_spin_lock(map)) { |
---|
| 784 | + if (map->map_flags & BPF_F_RDONLY_PROG) |
---|
| 785 | + return -EACCES; |
---|
| 786 | + if (map->map_type != BPF_MAP_TYPE_HASH && |
---|
| 787 | + map->map_type != BPF_MAP_TYPE_ARRAY && |
---|
| 788 | + map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && |
---|
| 789 | + map->map_type != BPF_MAP_TYPE_SK_STORAGE && |
---|
| 790 | + map->map_type != BPF_MAP_TYPE_INODE_STORAGE) |
---|
| 791 | + return -ENOTSUPP; |
---|
| 792 | + if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > |
---|
| 793 | + map->value_size) { |
---|
| 794 | + WARN_ONCE(1, |
---|
| 795 | + "verifier bug spin_lock_off %d value_size %d\n", |
---|
| 796 | + map->spin_lock_off, map->value_size); |
---|
| 797 | + return -EFAULT; |
---|
| 798 | + } |
---|
| 799 | + } |
---|
| 800 | + |
---|
482 | 801 | if (map->ops->map_check_btf) |
---|
483 | | - ret = map->ops->map_check_btf(map, key_type, value_type); |
---|
| 802 | + ret = map->ops->map_check_btf(map, btf, key_type, value_type); |
---|
484 | 803 | |
---|
485 | 804 | return ret; |
---|
486 | 805 | } |
---|
487 | 806 | |
---|
488 | | -#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id |
---|
| 807 | +#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id |
---|
489 | 808 | /* called via syscall */ |
---|
490 | 809 | static int map_create(union bpf_attr *attr) |
---|
491 | 810 | { |
---|
492 | 811 | int numa_node = bpf_map_attr_numa_node(attr); |
---|
| 812 | + struct bpf_map_memory mem; |
---|
493 | 813 | struct bpf_map *map; |
---|
494 | 814 | int f_flags; |
---|
495 | 815 | int err; |
---|
.. | .. |
---|
497 | 817 | err = CHECK_ATTR(BPF_MAP_CREATE); |
---|
498 | 818 | if (err) |
---|
499 | 819 | return -EINVAL; |
---|
| 820 | + |
---|
| 821 | + if (attr->btf_vmlinux_value_type_id) { |
---|
| 822 | + if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || |
---|
| 823 | + attr->btf_key_type_id || attr->btf_value_type_id) |
---|
| 824 | + return -EINVAL; |
---|
| 825 | + } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { |
---|
| 826 | + return -EINVAL; |
---|
| 827 | + } |
---|
500 | 828 | |
---|
501 | 829 | f_flags = bpf_get_file_flag(attr->map_flags); |
---|
502 | 830 | if (f_flags < 0) |
---|
.. | .. |
---|
512 | 840 | if (IS_ERR(map)) |
---|
513 | 841 | return PTR_ERR(map); |
---|
514 | 842 | |
---|
515 | | - err = bpf_obj_name_cpy(map->name, attr->map_name); |
---|
516 | | - if (err) |
---|
517 | | - goto free_map_nouncharge; |
---|
| 843 | + err = bpf_obj_name_cpy(map->name, attr->map_name, |
---|
| 844 | + sizeof(attr->map_name)); |
---|
| 845 | + if (err < 0) |
---|
| 846 | + goto free_map; |
---|
518 | 847 | |
---|
519 | | - atomic_set(&map->refcnt, 1); |
---|
520 | | - atomic_set(&map->usercnt, 1); |
---|
| 848 | + atomic64_set(&map->refcnt, 1); |
---|
| 849 | + atomic64_set(&map->usercnt, 1); |
---|
| 850 | + mutex_init(&map->freeze_mutex); |
---|
521 | 851 | |
---|
522 | | - if (attr->btf_key_type_id || attr->btf_value_type_id) { |
---|
| 852 | + map->spin_lock_off = -EINVAL; |
---|
| 853 | + if (attr->btf_key_type_id || attr->btf_value_type_id || |
---|
| 854 | + /* Even the map's value is a kernel's struct, |
---|
| 855 | + * the bpf_prog.o must have BTF to begin with |
---|
| 856 | + * to figure out the corresponding kernel's |
---|
| 857 | + * counter part. Thus, attr->btf_fd has |
---|
| 858 | + * to be valid also. |
---|
| 859 | + */ |
---|
| 860 | + attr->btf_vmlinux_value_type_id) { |
---|
523 | 861 | struct btf *btf; |
---|
524 | | - |
---|
525 | | - if (!attr->btf_key_type_id || !attr->btf_value_type_id) { |
---|
526 | | - err = -EINVAL; |
---|
527 | | - goto free_map_nouncharge; |
---|
528 | | - } |
---|
529 | 862 | |
---|
530 | 863 | btf = btf_get_by_fd(attr->btf_fd); |
---|
531 | 864 | if (IS_ERR(btf)) { |
---|
532 | 865 | err = PTR_ERR(btf); |
---|
533 | | - goto free_map_nouncharge; |
---|
| 866 | + goto free_map; |
---|
534 | 867 | } |
---|
535 | | - |
---|
536 | | - err = map_check_btf(map, btf, attr->btf_key_type_id, |
---|
537 | | - attr->btf_value_type_id); |
---|
538 | | - if (err) { |
---|
539 | | - btf_put(btf); |
---|
540 | | - goto free_map_nouncharge; |
---|
541 | | - } |
---|
542 | | - |
---|
543 | 868 | map->btf = btf; |
---|
| 869 | + |
---|
| 870 | + if (attr->btf_value_type_id) { |
---|
| 871 | + err = map_check_btf(map, btf, attr->btf_key_type_id, |
---|
| 872 | + attr->btf_value_type_id); |
---|
| 873 | + if (err) |
---|
| 874 | + goto free_map; |
---|
| 875 | + } |
---|
| 876 | + |
---|
544 | 877 | map->btf_key_type_id = attr->btf_key_type_id; |
---|
545 | 878 | map->btf_value_type_id = attr->btf_value_type_id; |
---|
| 879 | + map->btf_vmlinux_value_type_id = |
---|
| 880 | + attr->btf_vmlinux_value_type_id; |
---|
546 | 881 | } |
---|
547 | 882 | |
---|
548 | 883 | err = security_bpf_map_alloc(map); |
---|
549 | 884 | if (err) |
---|
550 | | - goto free_map_nouncharge; |
---|
551 | | - |
---|
552 | | - err = bpf_map_init_memlock(map); |
---|
553 | | - if (err) |
---|
554 | | - goto free_map_sec; |
---|
| 885 | + goto free_map; |
---|
555 | 886 | |
---|
556 | 887 | err = bpf_map_alloc_id(map); |
---|
557 | 888 | if (err) |
---|
558 | | - goto free_map; |
---|
| 889 | + goto free_map_sec; |
---|
559 | 890 | |
---|
560 | 891 | err = bpf_map_new_fd(map, f_flags); |
---|
561 | 892 | if (err < 0) { |
---|
.. | .. |
---|
571 | 902 | |
---|
572 | 903 | return err; |
---|
573 | 904 | |
---|
574 | | -free_map: |
---|
575 | | - bpf_map_release_memlock(map); |
---|
576 | 905 | free_map_sec: |
---|
577 | 906 | security_bpf_map_free(map); |
---|
578 | | -free_map_nouncharge: |
---|
| 907 | +free_map: |
---|
579 | 908 | btf_put(map->btf); |
---|
| 909 | + bpf_map_charge_move(&mem, &map->memory); |
---|
580 | 910 | map->ops->map_free(map); |
---|
| 911 | + bpf_map_charge_finish(&mem); |
---|
581 | 912 | return err; |
---|
582 | 913 | } |
---|
583 | 914 | |
---|
.. | .. |
---|
596 | 927 | return f.file->private_data; |
---|
597 | 928 | } |
---|
598 | 929 | |
---|
599 | | -/* prog's and map's refcnt limit */ |
---|
600 | | -#define BPF_MAX_REFCNT 32768 |
---|
601 | | - |
---|
602 | | -struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) |
---|
| 930 | +void bpf_map_inc(struct bpf_map *map) |
---|
603 | 931 | { |
---|
604 | | - if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { |
---|
605 | | - atomic_dec(&map->refcnt); |
---|
606 | | - return ERR_PTR(-EBUSY); |
---|
607 | | - } |
---|
608 | | - if (uref) |
---|
609 | | - atomic_inc(&map->usercnt); |
---|
610 | | - return map; |
---|
| 932 | + atomic64_inc(&map->refcnt); |
---|
611 | 933 | } |
---|
612 | 934 | EXPORT_SYMBOL_GPL(bpf_map_inc); |
---|
| 935 | + |
---|
| 936 | +void bpf_map_inc_with_uref(struct bpf_map *map) |
---|
| 937 | +{ |
---|
| 938 | + atomic64_inc(&map->refcnt); |
---|
| 939 | + atomic64_inc(&map->usercnt); |
---|
| 940 | +} |
---|
| 941 | +EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); |
---|
| 942 | + |
---|
| 943 | +struct bpf_map *bpf_map_get(u32 ufd) |
---|
| 944 | +{ |
---|
| 945 | + struct fd f = fdget(ufd); |
---|
| 946 | + struct bpf_map *map; |
---|
| 947 | + |
---|
| 948 | + map = __bpf_map_get(f); |
---|
| 949 | + if (IS_ERR(map)) |
---|
| 950 | + return map; |
---|
| 951 | + |
---|
| 952 | + bpf_map_inc(map); |
---|
| 953 | + fdput(f); |
---|
| 954 | + |
---|
| 955 | + return map; |
---|
| 956 | +} |
---|
613 | 957 | |
---|
614 | 958 | struct bpf_map *bpf_map_get_with_uref(u32 ufd) |
---|
615 | 959 | { |
---|
.. | .. |
---|
620 | 964 | if (IS_ERR(map)) |
---|
621 | 965 | return map; |
---|
622 | 966 | |
---|
623 | | - map = bpf_map_inc(map, true); |
---|
| 967 | + bpf_map_inc_with_uref(map); |
---|
624 | 968 | fdput(f); |
---|
625 | 969 | |
---|
626 | 970 | return map; |
---|
627 | 971 | } |
---|
628 | 972 | |
---|
629 | 973 | /* map_idr_lock should have been held */ |
---|
630 | | -static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, |
---|
631 | | - bool uref) |
---|
| 974 | +static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) |
---|
632 | 975 | { |
---|
633 | 976 | int refold; |
---|
634 | 977 | |
---|
635 | | - refold = atomic_fetch_add_unless(&map->refcnt, 1, 0); |
---|
636 | | - |
---|
637 | | - if (refold >= BPF_MAX_REFCNT) { |
---|
638 | | - __bpf_map_put(map, false); |
---|
639 | | - return ERR_PTR(-EBUSY); |
---|
640 | | - } |
---|
641 | | - |
---|
| 978 | + refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); |
---|
642 | 979 | if (!refold) |
---|
643 | 980 | return ERR_PTR(-ENOENT); |
---|
644 | | - |
---|
645 | 981 | if (uref) |
---|
646 | | - atomic_inc(&map->usercnt); |
---|
| 982 | + atomic64_inc(&map->usercnt); |
---|
647 | 983 | |
---|
648 | 984 | return map; |
---|
649 | 985 | } |
---|
| 986 | + |
---|
| 987 | +struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) |
---|
| 988 | +{ |
---|
| 989 | + spin_lock_bh(&map_idr_lock); |
---|
| 990 | + map = __bpf_map_inc_not_zero(map, false); |
---|
| 991 | + spin_unlock_bh(&map_idr_lock); |
---|
| 992 | + |
---|
| 993 | + return map; |
---|
| 994 | +} |
---|
| 995 | +EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); |
---|
650 | 996 | |
---|
651 | 997 | int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) |
---|
652 | 998 | { |
---|
653 | 999 | return -ENOTSUPP; |
---|
654 | 1000 | } |
---|
655 | 1001 | |
---|
| 1002 | +static void *__bpf_copy_key(void __user *ukey, u64 key_size) |
---|
| 1003 | +{ |
---|
| 1004 | + if (key_size) |
---|
| 1005 | + return memdup_user(ukey, key_size); |
---|
| 1006 | + |
---|
| 1007 | + if (ukey) |
---|
| 1008 | + return ERR_PTR(-EINVAL); |
---|
| 1009 | + |
---|
| 1010 | + return NULL; |
---|
| 1011 | +} |
---|
| 1012 | + |
---|
656 | 1013 | /* last field in 'union bpf_attr' used by this command */ |
---|
657 | | -#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value |
---|
| 1014 | +#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags |
---|
658 | 1015 | |
---|
659 | 1016 | static int map_lookup_elem(union bpf_attr *attr) |
---|
660 | 1017 | { |
---|
.. | .. |
---|
662 | 1019 | void __user *uvalue = u64_to_user_ptr(attr->value); |
---|
663 | 1020 | int ufd = attr->map_fd; |
---|
664 | 1021 | struct bpf_map *map; |
---|
665 | | - void *key, *value, *ptr; |
---|
| 1022 | + void *key, *value; |
---|
666 | 1023 | u32 value_size; |
---|
667 | 1024 | struct fd f; |
---|
668 | 1025 | int err; |
---|
.. | .. |
---|
670 | 1027 | if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) |
---|
671 | 1028 | return -EINVAL; |
---|
672 | 1029 | |
---|
| 1030 | + if (attr->flags & ~BPF_F_LOCK) |
---|
| 1031 | + return -EINVAL; |
---|
| 1032 | + |
---|
673 | 1033 | f = fdget(ufd); |
---|
674 | 1034 | map = __bpf_map_get(f); |
---|
675 | 1035 | if (IS_ERR(map)) |
---|
676 | 1036 | return PTR_ERR(map); |
---|
677 | | - |
---|
678 | | - if (!(f.file->f_mode & FMODE_CAN_READ)) { |
---|
| 1037 | + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { |
---|
679 | 1038 | err = -EPERM; |
---|
680 | 1039 | goto err_put; |
---|
681 | 1040 | } |
---|
682 | 1041 | |
---|
683 | | - key = memdup_user(ukey, map->key_size); |
---|
| 1042 | + if ((attr->flags & BPF_F_LOCK) && |
---|
| 1043 | + !map_value_has_spin_lock(map)) { |
---|
| 1044 | + err = -EINVAL; |
---|
| 1045 | + goto err_put; |
---|
| 1046 | + } |
---|
| 1047 | + |
---|
| 1048 | + key = __bpf_copy_key(ukey, map->key_size); |
---|
684 | 1049 | if (IS_ERR(key)) { |
---|
685 | 1050 | err = PTR_ERR(key); |
---|
686 | 1051 | goto err_put; |
---|
687 | 1052 | } |
---|
688 | 1053 | |
---|
689 | | - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
---|
690 | | - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || |
---|
691 | | - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) |
---|
692 | | - value_size = round_up(map->value_size, 8) * num_possible_cpus(); |
---|
693 | | - else if (IS_FD_MAP(map)) |
---|
694 | | - value_size = sizeof(u32); |
---|
695 | | - else |
---|
696 | | - value_size = map->value_size; |
---|
| 1054 | + value_size = bpf_map_value_size(map); |
---|
697 | 1055 | |
---|
698 | 1056 | err = -ENOMEM; |
---|
699 | 1057 | value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); |
---|
700 | 1058 | if (!value) |
---|
701 | 1059 | goto free_key; |
---|
702 | 1060 | |
---|
703 | | - if (bpf_map_is_dev_bound(map)) { |
---|
704 | | - err = bpf_map_offload_lookup_elem(map, key, value); |
---|
705 | | - goto done; |
---|
706 | | - } |
---|
707 | | - |
---|
708 | | - preempt_disable(); |
---|
709 | | - this_cpu_inc(bpf_prog_active); |
---|
710 | | - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
---|
711 | | - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { |
---|
712 | | - err = bpf_percpu_hash_copy(map, key, value); |
---|
713 | | - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
---|
714 | | - err = bpf_percpu_array_copy(map, key, value); |
---|
715 | | - } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { |
---|
716 | | - err = bpf_stackmap_copy(map, key, value); |
---|
717 | | - } else if (IS_FD_ARRAY(map)) { |
---|
718 | | - err = bpf_fd_array_map_lookup_elem(map, key, value); |
---|
719 | | - } else if (IS_FD_HASH(map)) { |
---|
720 | | - err = bpf_fd_htab_map_lookup_elem(map, key, value); |
---|
721 | | - } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { |
---|
722 | | - err = bpf_fd_reuseport_array_lookup_elem(map, key, value); |
---|
723 | | - } else { |
---|
724 | | - rcu_read_lock(); |
---|
725 | | - if (map->ops->map_lookup_elem_sys_only) |
---|
726 | | - ptr = map->ops->map_lookup_elem_sys_only(map, key); |
---|
727 | | - else |
---|
728 | | - ptr = map->ops->map_lookup_elem(map, key); |
---|
729 | | - if (ptr) |
---|
730 | | - memcpy(value, ptr, value_size); |
---|
731 | | - rcu_read_unlock(); |
---|
732 | | - err = ptr ? 0 : -ENOENT; |
---|
733 | | - } |
---|
734 | | - this_cpu_dec(bpf_prog_active); |
---|
735 | | - preempt_enable(); |
---|
736 | | - |
---|
737 | | -done: |
---|
| 1061 | + err = bpf_map_copy_value(map, key, value, attr->flags); |
---|
738 | 1062 | if (err) |
---|
739 | 1063 | goto free_value; |
---|
740 | 1064 | |
---|
.. | .. |
---|
753 | 1077 | return err; |
---|
754 | 1078 | } |
---|
755 | 1079 | |
---|
756 | | -static void maybe_wait_bpf_programs(struct bpf_map *map) |
---|
757 | | -{ |
---|
758 | | - /* Wait for any running BPF programs to complete so that |
---|
759 | | - * userspace, when we return to it, knows that all programs |
---|
760 | | - * that could be running use the new map value. |
---|
761 | | - */ |
---|
762 | | - if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || |
---|
763 | | - map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) |
---|
764 | | - synchronize_rcu(); |
---|
765 | | -} |
---|
766 | 1080 | |
---|
767 | 1081 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags |
---|
768 | 1082 | |
---|
.. | .. |
---|
784 | 1098 | map = __bpf_map_get(f); |
---|
785 | 1099 | if (IS_ERR(map)) |
---|
786 | 1100 | return PTR_ERR(map); |
---|
787 | | - |
---|
788 | | - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { |
---|
| 1101 | + bpf_map_write_active_inc(map); |
---|
| 1102 | + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { |
---|
789 | 1103 | err = -EPERM; |
---|
790 | 1104 | goto err_put; |
---|
791 | 1105 | } |
---|
792 | 1106 | |
---|
793 | | - key = memdup_user(ukey, map->key_size); |
---|
| 1107 | + if ((attr->flags & BPF_F_LOCK) && |
---|
| 1108 | + !map_value_has_spin_lock(map)) { |
---|
| 1109 | + err = -EINVAL; |
---|
| 1110 | + goto err_put; |
---|
| 1111 | + } |
---|
| 1112 | + |
---|
| 1113 | + key = __bpf_copy_key(ukey, map->key_size); |
---|
794 | 1114 | if (IS_ERR(key)) { |
---|
795 | 1115 | err = PTR_ERR(key); |
---|
796 | 1116 | goto err_put; |
---|
.. | .. |
---|
798 | 1118 | |
---|
799 | 1119 | if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
---|
800 | 1120 | map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || |
---|
801 | | - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) |
---|
| 1121 | + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || |
---|
| 1122 | + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) |
---|
802 | 1123 | value_size = round_up(map->value_size, 8) * num_possible_cpus(); |
---|
803 | 1124 | else |
---|
804 | 1125 | value_size = map->value_size; |
---|
.. | .. |
---|
812 | 1133 | if (copy_from_user(value, uvalue, value_size) != 0) |
---|
813 | 1134 | goto free_value; |
---|
814 | 1135 | |
---|
815 | | - /* Need to create a kthread, thus must support schedule */ |
---|
816 | | - if (bpf_map_is_dev_bound(map)) { |
---|
817 | | - err = bpf_map_offload_update_elem(map, key, value, attr->flags); |
---|
818 | | - goto out; |
---|
819 | | - } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || |
---|
820 | | - map->map_type == BPF_MAP_TYPE_SOCKHASH || |
---|
821 | | - map->map_type == BPF_MAP_TYPE_SOCKMAP) { |
---|
822 | | - err = map->ops->map_update_elem(map, key, value, attr->flags); |
---|
823 | | - goto out; |
---|
824 | | - } |
---|
| 1136 | + err = bpf_map_update_value(map, f, key, value, attr->flags); |
---|
825 | 1137 | |
---|
826 | | - /* must increment bpf_prog_active to avoid kprobe+bpf triggering from |
---|
827 | | - * inside bpf map update or delete otherwise deadlocks are possible |
---|
828 | | - */ |
---|
829 | | - preempt_disable(); |
---|
830 | | - __this_cpu_inc(bpf_prog_active); |
---|
831 | | - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
---|
832 | | - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { |
---|
833 | | - err = bpf_percpu_hash_update(map, key, value, attr->flags); |
---|
834 | | - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
---|
835 | | - err = bpf_percpu_array_update(map, key, value, attr->flags); |
---|
836 | | - } else if (IS_FD_ARRAY(map)) { |
---|
837 | | - rcu_read_lock(); |
---|
838 | | - err = bpf_fd_array_map_update_elem(map, f.file, key, value, |
---|
839 | | - attr->flags); |
---|
840 | | - rcu_read_unlock(); |
---|
841 | | - } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { |
---|
842 | | - rcu_read_lock(); |
---|
843 | | - err = bpf_fd_htab_map_update_elem(map, f.file, key, value, |
---|
844 | | - attr->flags); |
---|
845 | | - rcu_read_unlock(); |
---|
846 | | - } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { |
---|
847 | | - /* rcu_read_lock() is not needed */ |
---|
848 | | - err = bpf_fd_reuseport_array_update_elem(map, key, value, |
---|
849 | | - attr->flags); |
---|
850 | | - } else { |
---|
851 | | - rcu_read_lock(); |
---|
852 | | - err = map->ops->map_update_elem(map, key, value, attr->flags); |
---|
853 | | - rcu_read_unlock(); |
---|
854 | | - } |
---|
855 | | - __this_cpu_dec(bpf_prog_active); |
---|
856 | | - preempt_enable(); |
---|
857 | | - maybe_wait_bpf_programs(map); |
---|
858 | | -out: |
---|
859 | 1138 | free_value: |
---|
860 | 1139 | kfree(value); |
---|
861 | 1140 | free_key: |
---|
862 | 1141 | kfree(key); |
---|
863 | 1142 | err_put: |
---|
| 1143 | + bpf_map_write_active_dec(map); |
---|
864 | 1144 | fdput(f); |
---|
865 | 1145 | return err; |
---|
866 | 1146 | } |
---|
.. | .. |
---|
883 | 1163 | map = __bpf_map_get(f); |
---|
884 | 1164 | if (IS_ERR(map)) |
---|
885 | 1165 | return PTR_ERR(map); |
---|
886 | | - |
---|
887 | | - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { |
---|
| 1166 | + bpf_map_write_active_inc(map); |
---|
| 1167 | + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { |
---|
888 | 1168 | err = -EPERM; |
---|
889 | 1169 | goto err_put; |
---|
890 | 1170 | } |
---|
891 | 1171 | |
---|
892 | | - key = memdup_user(ukey, map->key_size); |
---|
| 1172 | + key = __bpf_copy_key(ukey, map->key_size); |
---|
893 | 1173 | if (IS_ERR(key)) { |
---|
894 | 1174 | err = PTR_ERR(key); |
---|
895 | 1175 | goto err_put; |
---|
.. | .. |
---|
898 | 1178 | if (bpf_map_is_dev_bound(map)) { |
---|
899 | 1179 | err = bpf_map_offload_delete_elem(map, key); |
---|
900 | 1180 | goto out; |
---|
| 1181 | + } else if (IS_FD_PROG_ARRAY(map) || |
---|
| 1182 | + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { |
---|
| 1183 | + /* These maps require sleepable context */ |
---|
| 1184 | + err = map->ops->map_delete_elem(map, key); |
---|
| 1185 | + goto out; |
---|
901 | 1186 | } |
---|
902 | 1187 | |
---|
903 | | - preempt_disable(); |
---|
904 | | - __this_cpu_inc(bpf_prog_active); |
---|
| 1188 | + bpf_disable_instrumentation(); |
---|
905 | 1189 | rcu_read_lock(); |
---|
906 | 1190 | err = map->ops->map_delete_elem(map, key); |
---|
907 | 1191 | rcu_read_unlock(); |
---|
908 | | - __this_cpu_dec(bpf_prog_active); |
---|
909 | | - preempt_enable(); |
---|
| 1192 | + bpf_enable_instrumentation(); |
---|
910 | 1193 | maybe_wait_bpf_programs(map); |
---|
911 | 1194 | out: |
---|
912 | 1195 | kfree(key); |
---|
913 | 1196 | err_put: |
---|
| 1197 | + bpf_map_write_active_dec(map); |
---|
914 | 1198 | fdput(f); |
---|
915 | 1199 | return err; |
---|
916 | 1200 | } |
---|
.. | .. |
---|
935 | 1219 | map = __bpf_map_get(f); |
---|
936 | 1220 | if (IS_ERR(map)) |
---|
937 | 1221 | return PTR_ERR(map); |
---|
938 | | - |
---|
939 | | - if (!(f.file->f_mode & FMODE_CAN_READ)) { |
---|
| 1222 | + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { |
---|
940 | 1223 | err = -EPERM; |
---|
941 | 1224 | goto err_put; |
---|
942 | 1225 | } |
---|
943 | 1226 | |
---|
944 | 1227 | if (ukey) { |
---|
945 | | - key = memdup_user(ukey, map->key_size); |
---|
| 1228 | + key = __bpf_copy_key(ukey, map->key_size); |
---|
946 | 1229 | if (IS_ERR(key)) { |
---|
947 | 1230 | err = PTR_ERR(key); |
---|
948 | 1231 | goto err_put; |
---|
.. | .. |
---|
983 | 1266 | return err; |
---|
984 | 1267 | } |
---|
985 | 1268 | |
---|
| 1269 | +int generic_map_delete_batch(struct bpf_map *map, |
---|
| 1270 | + const union bpf_attr *attr, |
---|
| 1271 | + union bpf_attr __user *uattr) |
---|
| 1272 | +{ |
---|
| 1273 | + void __user *keys = u64_to_user_ptr(attr->batch.keys); |
---|
| 1274 | + u32 cp, max_count; |
---|
| 1275 | + int err = 0; |
---|
| 1276 | + void *key; |
---|
| 1277 | + |
---|
| 1278 | + if (attr->batch.elem_flags & ~BPF_F_LOCK) |
---|
| 1279 | + return -EINVAL; |
---|
| 1280 | + |
---|
| 1281 | + if ((attr->batch.elem_flags & BPF_F_LOCK) && |
---|
| 1282 | + !map_value_has_spin_lock(map)) { |
---|
| 1283 | + return -EINVAL; |
---|
| 1284 | + } |
---|
| 1285 | + |
---|
| 1286 | + max_count = attr->batch.count; |
---|
| 1287 | + if (!max_count) |
---|
| 1288 | + return 0; |
---|
| 1289 | + |
---|
| 1290 | + key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); |
---|
| 1291 | + if (!key) |
---|
| 1292 | + return -ENOMEM; |
---|
| 1293 | + |
---|
| 1294 | + for (cp = 0; cp < max_count; cp++) { |
---|
| 1295 | + err = -EFAULT; |
---|
| 1296 | + if (copy_from_user(key, keys + cp * map->key_size, |
---|
| 1297 | + map->key_size)) |
---|
| 1298 | + break; |
---|
| 1299 | + |
---|
| 1300 | + if (bpf_map_is_dev_bound(map)) { |
---|
| 1301 | + err = bpf_map_offload_delete_elem(map, key); |
---|
| 1302 | + break; |
---|
| 1303 | + } |
---|
| 1304 | + |
---|
| 1305 | + bpf_disable_instrumentation(); |
---|
| 1306 | + rcu_read_lock(); |
---|
| 1307 | + err = map->ops->map_delete_elem(map, key); |
---|
| 1308 | + rcu_read_unlock(); |
---|
| 1309 | + bpf_enable_instrumentation(); |
---|
| 1310 | + maybe_wait_bpf_programs(map); |
---|
| 1311 | + if (err) |
---|
| 1312 | + break; |
---|
| 1313 | + cond_resched(); |
---|
| 1314 | + } |
---|
| 1315 | + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) |
---|
| 1316 | + err = -EFAULT; |
---|
| 1317 | + |
---|
| 1318 | + kfree(key); |
---|
| 1319 | + return err; |
---|
| 1320 | +} |
---|
| 1321 | + |
---|
| 1322 | +int generic_map_update_batch(struct bpf_map *map, |
---|
| 1323 | + const union bpf_attr *attr, |
---|
| 1324 | + union bpf_attr __user *uattr) |
---|
| 1325 | +{ |
---|
| 1326 | + void __user *values = u64_to_user_ptr(attr->batch.values); |
---|
| 1327 | + void __user *keys = u64_to_user_ptr(attr->batch.keys); |
---|
| 1328 | + u32 value_size, cp, max_count; |
---|
| 1329 | + int ufd = attr->batch.map_fd; |
---|
| 1330 | + void *key, *value; |
---|
| 1331 | + struct fd f; |
---|
| 1332 | + int err = 0; |
---|
| 1333 | + |
---|
| 1334 | + if (attr->batch.elem_flags & ~BPF_F_LOCK) |
---|
| 1335 | + return -EINVAL; |
---|
| 1336 | + |
---|
| 1337 | + if ((attr->batch.elem_flags & BPF_F_LOCK) && |
---|
| 1338 | + !map_value_has_spin_lock(map)) { |
---|
| 1339 | + return -EINVAL; |
---|
| 1340 | + } |
---|
| 1341 | + |
---|
| 1342 | + value_size = bpf_map_value_size(map); |
---|
| 1343 | + |
---|
| 1344 | + max_count = attr->batch.count; |
---|
| 1345 | + if (!max_count) |
---|
| 1346 | + return 0; |
---|
| 1347 | + |
---|
| 1348 | + key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); |
---|
| 1349 | + if (!key) |
---|
| 1350 | + return -ENOMEM; |
---|
| 1351 | + |
---|
| 1352 | + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); |
---|
| 1353 | + if (!value) { |
---|
| 1354 | + kfree(key); |
---|
| 1355 | + return -ENOMEM; |
---|
| 1356 | + } |
---|
| 1357 | + |
---|
| 1358 | + f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ |
---|
| 1359 | + for (cp = 0; cp < max_count; cp++) { |
---|
| 1360 | + err = -EFAULT; |
---|
| 1361 | + if (copy_from_user(key, keys + cp * map->key_size, |
---|
| 1362 | + map->key_size) || |
---|
| 1363 | + copy_from_user(value, values + cp * value_size, value_size)) |
---|
| 1364 | + break; |
---|
| 1365 | + |
---|
| 1366 | + err = bpf_map_update_value(map, f, key, value, |
---|
| 1367 | + attr->batch.elem_flags); |
---|
| 1368 | + |
---|
| 1369 | + if (err) |
---|
| 1370 | + break; |
---|
| 1371 | + cond_resched(); |
---|
| 1372 | + } |
---|
| 1373 | + |
---|
| 1374 | + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) |
---|
| 1375 | + err = -EFAULT; |
---|
| 1376 | + |
---|
| 1377 | + kfree(value); |
---|
| 1378 | + kfree(key); |
---|
| 1379 | + fdput(f); |
---|
| 1380 | + return err; |
---|
| 1381 | +} |
---|
| 1382 | + |
---|
| 1383 | +#define MAP_LOOKUP_RETRIES 3 |
---|
| 1384 | + |
---|
| 1385 | +int generic_map_lookup_batch(struct bpf_map *map, |
---|
| 1386 | + const union bpf_attr *attr, |
---|
| 1387 | + union bpf_attr __user *uattr) |
---|
| 1388 | +{ |
---|
| 1389 | + void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); |
---|
| 1390 | + void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); |
---|
| 1391 | + void __user *values = u64_to_user_ptr(attr->batch.values); |
---|
| 1392 | + void __user *keys = u64_to_user_ptr(attr->batch.keys); |
---|
| 1393 | + void *buf, *buf_prevkey, *prev_key, *key, *value; |
---|
| 1394 | + int err, retry = MAP_LOOKUP_RETRIES; |
---|
| 1395 | + u32 value_size, cp, max_count; |
---|
| 1396 | + |
---|
| 1397 | + if (attr->batch.elem_flags & ~BPF_F_LOCK) |
---|
| 1398 | + return -EINVAL; |
---|
| 1399 | + |
---|
| 1400 | + if ((attr->batch.elem_flags & BPF_F_LOCK) && |
---|
| 1401 | + !map_value_has_spin_lock(map)) |
---|
| 1402 | + return -EINVAL; |
---|
| 1403 | + |
---|
| 1404 | + value_size = bpf_map_value_size(map); |
---|
| 1405 | + |
---|
| 1406 | + max_count = attr->batch.count; |
---|
| 1407 | + if (!max_count) |
---|
| 1408 | + return 0; |
---|
| 1409 | + |
---|
| 1410 | + if (put_user(0, &uattr->batch.count)) |
---|
| 1411 | + return -EFAULT; |
---|
| 1412 | + |
---|
| 1413 | + buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); |
---|
| 1414 | + if (!buf_prevkey) |
---|
| 1415 | + return -ENOMEM; |
---|
| 1416 | + |
---|
| 1417 | + buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); |
---|
| 1418 | + if (!buf) { |
---|
| 1419 | + kfree(buf_prevkey); |
---|
| 1420 | + return -ENOMEM; |
---|
| 1421 | + } |
---|
| 1422 | + |
---|
| 1423 | + err = -EFAULT; |
---|
| 1424 | + prev_key = NULL; |
---|
| 1425 | + if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) |
---|
| 1426 | + goto free_buf; |
---|
| 1427 | + key = buf; |
---|
| 1428 | + value = key + map->key_size; |
---|
| 1429 | + if (ubatch) |
---|
| 1430 | + prev_key = buf_prevkey; |
---|
| 1431 | + |
---|
| 1432 | + for (cp = 0; cp < max_count;) { |
---|
| 1433 | + rcu_read_lock(); |
---|
| 1434 | + err = map->ops->map_get_next_key(map, prev_key, key); |
---|
| 1435 | + rcu_read_unlock(); |
---|
| 1436 | + if (err) |
---|
| 1437 | + break; |
---|
| 1438 | + err = bpf_map_copy_value(map, key, value, |
---|
| 1439 | + attr->batch.elem_flags); |
---|
| 1440 | + |
---|
| 1441 | + if (err == -ENOENT) { |
---|
| 1442 | + if (retry) { |
---|
| 1443 | + retry--; |
---|
| 1444 | + continue; |
---|
| 1445 | + } |
---|
| 1446 | + err = -EINTR; |
---|
| 1447 | + break; |
---|
| 1448 | + } |
---|
| 1449 | + |
---|
| 1450 | + if (err) |
---|
| 1451 | + goto free_buf; |
---|
| 1452 | + |
---|
| 1453 | + if (copy_to_user(keys + cp * map->key_size, key, |
---|
| 1454 | + map->key_size)) { |
---|
| 1455 | + err = -EFAULT; |
---|
| 1456 | + goto free_buf; |
---|
| 1457 | + } |
---|
| 1458 | + if (copy_to_user(values + cp * value_size, value, value_size)) { |
---|
| 1459 | + err = -EFAULT; |
---|
| 1460 | + goto free_buf; |
---|
| 1461 | + } |
---|
| 1462 | + |
---|
| 1463 | + if (!prev_key) |
---|
| 1464 | + prev_key = buf_prevkey; |
---|
| 1465 | + |
---|
| 1466 | + swap(prev_key, key); |
---|
| 1467 | + retry = MAP_LOOKUP_RETRIES; |
---|
| 1468 | + cp++; |
---|
| 1469 | + cond_resched(); |
---|
| 1470 | + } |
---|
| 1471 | + |
---|
| 1472 | + if (err == -EFAULT) |
---|
| 1473 | + goto free_buf; |
---|
| 1474 | + |
---|
| 1475 | + if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || |
---|
| 1476 | + (cp && copy_to_user(uobatch, prev_key, map->key_size)))) |
---|
| 1477 | + err = -EFAULT; |
---|
| 1478 | + |
---|
| 1479 | +free_buf: |
---|
| 1480 | + kfree(buf_prevkey); |
---|
| 1481 | + kfree(buf); |
---|
| 1482 | + return err; |
---|
| 1483 | +} |
---|
| 1484 | + |
---|
| 1485 | +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value |
---|
| 1486 | + |
---|
| 1487 | +static int map_lookup_and_delete_elem(union bpf_attr *attr) |
---|
| 1488 | +{ |
---|
| 1489 | + void __user *ukey = u64_to_user_ptr(attr->key); |
---|
| 1490 | + void __user *uvalue = u64_to_user_ptr(attr->value); |
---|
| 1491 | + int ufd = attr->map_fd; |
---|
| 1492 | + struct bpf_map *map; |
---|
| 1493 | + void *key, *value; |
---|
| 1494 | + u32 value_size; |
---|
| 1495 | + struct fd f; |
---|
| 1496 | + int err; |
---|
| 1497 | + |
---|
| 1498 | + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) |
---|
| 1499 | + return -EINVAL; |
---|
| 1500 | + |
---|
| 1501 | + f = fdget(ufd); |
---|
| 1502 | + map = __bpf_map_get(f); |
---|
| 1503 | + if (IS_ERR(map)) |
---|
| 1504 | + return PTR_ERR(map); |
---|
| 1505 | + bpf_map_write_active_inc(map); |
---|
| 1506 | + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || |
---|
| 1507 | + !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { |
---|
| 1508 | + err = -EPERM; |
---|
| 1509 | + goto err_put; |
---|
| 1510 | + } |
---|
| 1511 | + |
---|
| 1512 | + key = __bpf_copy_key(ukey, map->key_size); |
---|
| 1513 | + if (IS_ERR(key)) { |
---|
| 1514 | + err = PTR_ERR(key); |
---|
| 1515 | + goto err_put; |
---|
| 1516 | + } |
---|
| 1517 | + |
---|
| 1518 | + value_size = map->value_size; |
---|
| 1519 | + |
---|
| 1520 | + err = -ENOMEM; |
---|
| 1521 | + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); |
---|
| 1522 | + if (!value) |
---|
| 1523 | + goto free_key; |
---|
| 1524 | + |
---|
| 1525 | + if (map->map_type == BPF_MAP_TYPE_QUEUE || |
---|
| 1526 | + map->map_type == BPF_MAP_TYPE_STACK) { |
---|
| 1527 | + err = map->ops->map_pop_elem(map, value); |
---|
| 1528 | + } else { |
---|
| 1529 | + err = -ENOTSUPP; |
---|
| 1530 | + } |
---|
| 1531 | + |
---|
| 1532 | + if (err) |
---|
| 1533 | + goto free_value; |
---|
| 1534 | + |
---|
| 1535 | + if (copy_to_user(uvalue, value, value_size) != 0) { |
---|
| 1536 | + err = -EFAULT; |
---|
| 1537 | + goto free_value; |
---|
| 1538 | + } |
---|
| 1539 | + |
---|
| 1540 | + err = 0; |
---|
| 1541 | + |
---|
| 1542 | +free_value: |
---|
| 1543 | + kfree(value); |
---|
| 1544 | +free_key: |
---|
| 1545 | + kfree(key); |
---|
| 1546 | +err_put: |
---|
| 1547 | + bpf_map_write_active_dec(map); |
---|
| 1548 | + fdput(f); |
---|
| 1549 | + return err; |
---|
| 1550 | +} |
---|
| 1551 | + |
---|
| 1552 | +#define BPF_MAP_FREEZE_LAST_FIELD map_fd |
---|
| 1553 | + |
---|
| 1554 | +static int map_freeze(const union bpf_attr *attr) |
---|
| 1555 | +{ |
---|
| 1556 | + int err = 0, ufd = attr->map_fd; |
---|
| 1557 | + struct bpf_map *map; |
---|
| 1558 | + struct fd f; |
---|
| 1559 | + |
---|
| 1560 | + if (CHECK_ATTR(BPF_MAP_FREEZE)) |
---|
| 1561 | + return -EINVAL; |
---|
| 1562 | + |
---|
| 1563 | + f = fdget(ufd); |
---|
| 1564 | + map = __bpf_map_get(f); |
---|
| 1565 | + if (IS_ERR(map)) |
---|
| 1566 | + return PTR_ERR(map); |
---|
| 1567 | + |
---|
| 1568 | + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { |
---|
| 1569 | + fdput(f); |
---|
| 1570 | + return -ENOTSUPP; |
---|
| 1571 | + } |
---|
| 1572 | + |
---|
| 1573 | + mutex_lock(&map->freeze_mutex); |
---|
| 1574 | + if (bpf_map_write_active(map)) { |
---|
| 1575 | + err = -EBUSY; |
---|
| 1576 | + goto err_put; |
---|
| 1577 | + } |
---|
| 1578 | + if (READ_ONCE(map->frozen)) { |
---|
| 1579 | + err = -EBUSY; |
---|
| 1580 | + goto err_put; |
---|
| 1581 | + } |
---|
| 1582 | + if (!bpf_capable()) { |
---|
| 1583 | + err = -EPERM; |
---|
| 1584 | + goto err_put; |
---|
| 1585 | + } |
---|
| 1586 | + |
---|
| 1587 | + WRITE_ONCE(map->frozen, true); |
---|
| 1588 | +err_put: |
---|
| 1589 | + mutex_unlock(&map->freeze_mutex); |
---|
| 1590 | + fdput(f); |
---|
| 1591 | + return err; |
---|
| 1592 | +} |
---|
| 1593 | + |
---|
986 | 1594 | static const struct bpf_prog_ops * const bpf_prog_types[] = { |
---|
987 | | -#define BPF_PROG_TYPE(_id, _name) \ |
---|
| 1595 | +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ |
---|
988 | 1596 | [_id] = & _name ## _prog_ops, |
---|
989 | 1597 | #define BPF_MAP_TYPE(_id, _ops) |
---|
| 1598 | +#define BPF_LINK_TYPE(_id, _name) |
---|
990 | 1599 | #include <linux/bpf_types.h> |
---|
991 | 1600 | #undef BPF_PROG_TYPE |
---|
992 | 1601 | #undef BPF_MAP_TYPE |
---|
| 1602 | +#undef BPF_LINK_TYPE |
---|
993 | 1603 | }; |
---|
994 | 1604 | |
---|
995 | 1605 | static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) |
---|
.. | .. |
---|
1011 | 1621 | return 0; |
---|
1012 | 1622 | } |
---|
1013 | 1623 | |
---|
1014 | | -/* drop refcnt on maps used by eBPF program and free auxilary data */ |
---|
1015 | | -static void free_used_maps(struct bpf_prog_aux *aux) |
---|
| 1624 | +enum bpf_audit { |
---|
| 1625 | + BPF_AUDIT_LOAD, |
---|
| 1626 | + BPF_AUDIT_UNLOAD, |
---|
| 1627 | + BPF_AUDIT_MAX, |
---|
| 1628 | +}; |
---|
| 1629 | + |
---|
| 1630 | +static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { |
---|
| 1631 | + [BPF_AUDIT_LOAD] = "LOAD", |
---|
| 1632 | + [BPF_AUDIT_UNLOAD] = "UNLOAD", |
---|
| 1633 | +}; |
---|
| 1634 | + |
---|
| 1635 | +static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) |
---|
1016 | 1636 | { |
---|
1017 | | - int i; |
---|
| 1637 | + struct audit_context *ctx = NULL; |
---|
| 1638 | + struct audit_buffer *ab; |
---|
1018 | 1639 | |
---|
1019 | | - if (aux->cgroup_storage) |
---|
1020 | | - bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage); |
---|
1021 | | - |
---|
1022 | | - for (i = 0; i < aux->used_map_cnt; i++) |
---|
1023 | | - bpf_map_put(aux->used_maps[i]); |
---|
1024 | | - |
---|
1025 | | - kfree(aux->used_maps); |
---|
| 1640 | + if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) |
---|
| 1641 | + return; |
---|
| 1642 | + if (audit_enabled == AUDIT_OFF) |
---|
| 1643 | + return; |
---|
| 1644 | + if (op == BPF_AUDIT_LOAD) |
---|
| 1645 | + ctx = audit_context(); |
---|
| 1646 | + ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); |
---|
| 1647 | + if (unlikely(!ab)) |
---|
| 1648 | + return; |
---|
| 1649 | + audit_log_format(ab, "prog-id=%u op=%s", |
---|
| 1650 | + prog->aux->id, bpf_audit_str[op]); |
---|
| 1651 | + audit_log_end(ab); |
---|
1026 | 1652 | } |
---|
1027 | 1653 | |
---|
1028 | 1654 | int __bpf_prog_charge(struct user_struct *user, u32 pages) |
---|
.. | .. |
---|
1117 | 1743 | { |
---|
1118 | 1744 | struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); |
---|
1119 | 1745 | |
---|
1120 | | - free_used_maps(aux); |
---|
| 1746 | + kvfree(aux->func_info); |
---|
| 1747 | + kfree(aux->func_info_aux); |
---|
1121 | 1748 | bpf_prog_uncharge_memlock(aux->prog); |
---|
1122 | 1749 | security_bpf_prog_free(aux); |
---|
1123 | 1750 | bpf_prog_free(aux->prog); |
---|
1124 | 1751 | } |
---|
1125 | 1752 | |
---|
| 1753 | +static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) |
---|
| 1754 | +{ |
---|
| 1755 | + bpf_prog_kallsyms_del_all(prog); |
---|
| 1756 | + btf_put(prog->aux->btf); |
---|
| 1757 | + bpf_prog_free_linfo(prog); |
---|
| 1758 | + |
---|
| 1759 | + if (deferred) { |
---|
| 1760 | + if (prog->aux->sleepable) |
---|
| 1761 | + call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); |
---|
| 1762 | + else |
---|
| 1763 | + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); |
---|
| 1764 | + } else { |
---|
| 1765 | + __bpf_prog_put_rcu(&prog->aux->rcu); |
---|
| 1766 | + } |
---|
| 1767 | +} |
---|
| 1768 | + |
---|
1126 | 1769 | static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) |
---|
1127 | 1770 | { |
---|
1128 | | - if (atomic_dec_and_test(&prog->aux->refcnt)) { |
---|
| 1771 | + if (atomic64_dec_and_test(&prog->aux->refcnt)) { |
---|
| 1772 | + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); |
---|
| 1773 | + bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); |
---|
1129 | 1774 | /* bpf_prog_free_id() must be called first */ |
---|
1130 | 1775 | bpf_prog_free_id(prog, do_idr_lock); |
---|
1131 | | - bpf_prog_kallsyms_del_all(prog); |
---|
1132 | | - |
---|
1133 | | - call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); |
---|
| 1776 | + __bpf_prog_put_noref(prog, true); |
---|
1134 | 1777 | } |
---|
1135 | 1778 | } |
---|
1136 | 1779 | |
---|
.. | .. |
---|
1148 | 1791 | return 0; |
---|
1149 | 1792 | } |
---|
1150 | 1793 | |
---|
| 1794 | +static void bpf_prog_get_stats(const struct bpf_prog *prog, |
---|
| 1795 | + struct bpf_prog_stats *stats) |
---|
| 1796 | +{ |
---|
| 1797 | + u64 nsecs = 0, cnt = 0; |
---|
| 1798 | + int cpu; |
---|
| 1799 | + |
---|
| 1800 | + for_each_possible_cpu(cpu) { |
---|
| 1801 | + const struct bpf_prog_stats *st; |
---|
| 1802 | + unsigned int start; |
---|
| 1803 | + u64 tnsecs, tcnt; |
---|
| 1804 | + |
---|
| 1805 | + st = per_cpu_ptr(prog->aux->stats, cpu); |
---|
| 1806 | + do { |
---|
| 1807 | + start = u64_stats_fetch_begin_irq(&st->syncp); |
---|
| 1808 | + tnsecs = st->nsecs; |
---|
| 1809 | + tcnt = st->cnt; |
---|
| 1810 | + } while (u64_stats_fetch_retry_irq(&st->syncp, start)); |
---|
| 1811 | + nsecs += tnsecs; |
---|
| 1812 | + cnt += tcnt; |
---|
| 1813 | + } |
---|
| 1814 | + stats->nsecs = nsecs; |
---|
| 1815 | + stats->cnt = cnt; |
---|
| 1816 | +} |
---|
| 1817 | + |
---|
1151 | 1818 | #ifdef CONFIG_PROC_FS |
---|
1152 | 1819 | static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) |
---|
1153 | 1820 | { |
---|
1154 | 1821 | const struct bpf_prog *prog = filp->private_data; |
---|
1155 | 1822 | char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; |
---|
| 1823 | + struct bpf_prog_stats stats; |
---|
1156 | 1824 | |
---|
| 1825 | + bpf_prog_get_stats(prog, &stats); |
---|
1157 | 1826 | bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); |
---|
1158 | 1827 | seq_printf(m, |
---|
1159 | 1828 | "prog_type:\t%u\n" |
---|
1160 | 1829 | "prog_jited:\t%u\n" |
---|
1161 | 1830 | "prog_tag:\t%s\n" |
---|
1162 | 1831 | "memlock:\t%llu\n" |
---|
1163 | | - "prog_id:\t%u\n", |
---|
| 1832 | + "prog_id:\t%u\n" |
---|
| 1833 | + "run_time_ns:\t%llu\n" |
---|
| 1834 | + "run_cnt:\t%llu\n", |
---|
1164 | 1835 | prog->type, |
---|
1165 | 1836 | prog->jited, |
---|
1166 | 1837 | prog_tag, |
---|
1167 | 1838 | prog->pages * 1ULL << PAGE_SHIFT, |
---|
1168 | | - prog->aux->id); |
---|
| 1839 | + prog->aux->id, |
---|
| 1840 | + stats.nsecs, |
---|
| 1841 | + stats.cnt); |
---|
1169 | 1842 | } |
---|
1170 | 1843 | #endif |
---|
1171 | 1844 | |
---|
.. | .. |
---|
1202 | 1875 | return f.file->private_data; |
---|
1203 | 1876 | } |
---|
1204 | 1877 | |
---|
1205 | | -struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) |
---|
| 1878 | +void bpf_prog_add(struct bpf_prog *prog, int i) |
---|
1206 | 1879 | { |
---|
1207 | | - if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { |
---|
1208 | | - atomic_sub(i, &prog->aux->refcnt); |
---|
1209 | | - return ERR_PTR(-EBUSY); |
---|
1210 | | - } |
---|
1211 | | - return prog; |
---|
| 1880 | + atomic64_add(i, &prog->aux->refcnt); |
---|
1212 | 1881 | } |
---|
1213 | 1882 | EXPORT_SYMBOL_GPL(bpf_prog_add); |
---|
1214 | 1883 | |
---|
.. | .. |
---|
1219 | 1888 | * path holds a reference to the program, thus atomic_sub() can |
---|
1220 | 1889 | * be safely used in such cases! |
---|
1221 | 1890 | */ |
---|
1222 | | - WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); |
---|
| 1891 | + WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); |
---|
1223 | 1892 | } |
---|
1224 | 1893 | EXPORT_SYMBOL_GPL(bpf_prog_sub); |
---|
1225 | 1894 | |
---|
1226 | | -struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) |
---|
| 1895 | +void bpf_prog_inc(struct bpf_prog *prog) |
---|
1227 | 1896 | { |
---|
1228 | | - return bpf_prog_add(prog, 1); |
---|
| 1897 | + atomic64_inc(&prog->aux->refcnt); |
---|
1229 | 1898 | } |
---|
1230 | 1899 | EXPORT_SYMBOL_GPL(bpf_prog_inc); |
---|
1231 | 1900 | |
---|
.. | .. |
---|
1234 | 1903 | { |
---|
1235 | 1904 | int refold; |
---|
1236 | 1905 | |
---|
1237 | | - refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0); |
---|
1238 | | - |
---|
1239 | | - if (refold >= BPF_MAX_REFCNT) { |
---|
1240 | | - __bpf_prog_put(prog, false); |
---|
1241 | | - return ERR_PTR(-EBUSY); |
---|
1242 | | - } |
---|
| 1906 | + refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); |
---|
1243 | 1907 | |
---|
1244 | 1908 | if (!refold) |
---|
1245 | 1909 | return ERR_PTR(-ENOENT); |
---|
.. | .. |
---|
1277 | 1941 | goto out; |
---|
1278 | 1942 | } |
---|
1279 | 1943 | |
---|
1280 | | - prog = bpf_prog_inc(prog); |
---|
| 1944 | + bpf_prog_inc(prog); |
---|
1281 | 1945 | out: |
---|
1282 | 1946 | fdput(f); |
---|
1283 | 1947 | return prog; |
---|
.. | .. |
---|
1322 | 1986 | } |
---|
1323 | 1987 | |
---|
1324 | 1988 | static int |
---|
1325 | | -bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, |
---|
1326 | | - enum bpf_attach_type expected_attach_type) |
---|
| 1989 | +bpf_prog_load_check_attach(enum bpf_prog_type prog_type, |
---|
| 1990 | + enum bpf_attach_type expected_attach_type, |
---|
| 1991 | + u32 btf_id, u32 prog_fd) |
---|
1327 | 1992 | { |
---|
| 1993 | + if (btf_id) { |
---|
| 1994 | + if (btf_id > BTF_MAX_TYPE) |
---|
| 1995 | + return -EINVAL; |
---|
| 1996 | + |
---|
| 1997 | + switch (prog_type) { |
---|
| 1998 | + case BPF_PROG_TYPE_TRACING: |
---|
| 1999 | + case BPF_PROG_TYPE_LSM: |
---|
| 2000 | + case BPF_PROG_TYPE_STRUCT_OPS: |
---|
| 2001 | + case BPF_PROG_TYPE_EXT: |
---|
| 2002 | + break; |
---|
| 2003 | + default: |
---|
| 2004 | + return -EINVAL; |
---|
| 2005 | + } |
---|
| 2006 | + } |
---|
| 2007 | + |
---|
| 2008 | + if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && |
---|
| 2009 | + prog_type != BPF_PROG_TYPE_EXT) |
---|
| 2010 | + return -EINVAL; |
---|
| 2011 | + |
---|
1328 | 2012 | switch (prog_type) { |
---|
1329 | 2013 | case BPF_PROG_TYPE_CGROUP_SOCK: |
---|
1330 | 2014 | switch (expected_attach_type) { |
---|
1331 | 2015 | case BPF_CGROUP_INET_SOCK_CREATE: |
---|
| 2016 | + case BPF_CGROUP_INET_SOCK_RELEASE: |
---|
1332 | 2017 | case BPF_CGROUP_INET4_POST_BIND: |
---|
1333 | 2018 | case BPF_CGROUP_INET6_POST_BIND: |
---|
1334 | 2019 | return 0; |
---|
.. | .. |
---|
1341 | 2026 | case BPF_CGROUP_INET6_BIND: |
---|
1342 | 2027 | case BPF_CGROUP_INET4_CONNECT: |
---|
1343 | 2028 | case BPF_CGROUP_INET6_CONNECT: |
---|
| 2029 | + case BPF_CGROUP_INET4_GETPEERNAME: |
---|
| 2030 | + case BPF_CGROUP_INET6_GETPEERNAME: |
---|
| 2031 | + case BPF_CGROUP_INET4_GETSOCKNAME: |
---|
| 2032 | + case BPF_CGROUP_INET6_GETSOCKNAME: |
---|
1344 | 2033 | case BPF_CGROUP_UDP4_SENDMSG: |
---|
1345 | 2034 | case BPF_CGROUP_UDP6_SENDMSG: |
---|
1346 | 2035 | case BPF_CGROUP_UDP4_RECVMSG: |
---|
.. | .. |
---|
1349 | 2038 | default: |
---|
1350 | 2039 | return -EINVAL; |
---|
1351 | 2040 | } |
---|
| 2041 | + case BPF_PROG_TYPE_CGROUP_SKB: |
---|
| 2042 | + switch (expected_attach_type) { |
---|
| 2043 | + case BPF_CGROUP_INET_INGRESS: |
---|
| 2044 | + case BPF_CGROUP_INET_EGRESS: |
---|
| 2045 | + return 0; |
---|
| 2046 | + default: |
---|
| 2047 | + return -EINVAL; |
---|
| 2048 | + } |
---|
| 2049 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
| 2050 | + switch (expected_attach_type) { |
---|
| 2051 | + case BPF_CGROUP_SETSOCKOPT: |
---|
| 2052 | + case BPF_CGROUP_GETSOCKOPT: |
---|
| 2053 | + return 0; |
---|
| 2054 | + default: |
---|
| 2055 | + return -EINVAL; |
---|
| 2056 | + } |
---|
| 2057 | + case BPF_PROG_TYPE_SK_LOOKUP: |
---|
| 2058 | + if (expected_attach_type == BPF_SK_LOOKUP) |
---|
| 2059 | + return 0; |
---|
| 2060 | + return -EINVAL; |
---|
| 2061 | + case BPF_PROG_TYPE_EXT: |
---|
| 2062 | + if (expected_attach_type) |
---|
| 2063 | + return -EINVAL; |
---|
| 2064 | + fallthrough; |
---|
1352 | 2065 | default: |
---|
1353 | 2066 | return 0; |
---|
1354 | 2067 | } |
---|
1355 | 2068 | } |
---|
1356 | 2069 | |
---|
1357 | | -/* last field in 'union bpf_attr' used by this command */ |
---|
1358 | | -#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type |
---|
| 2070 | +static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) |
---|
| 2071 | +{ |
---|
| 2072 | + switch (prog_type) { |
---|
| 2073 | + case BPF_PROG_TYPE_SCHED_CLS: |
---|
| 2074 | + case BPF_PROG_TYPE_SCHED_ACT: |
---|
| 2075 | + case BPF_PROG_TYPE_XDP: |
---|
| 2076 | + case BPF_PROG_TYPE_LWT_IN: |
---|
| 2077 | + case BPF_PROG_TYPE_LWT_OUT: |
---|
| 2078 | + case BPF_PROG_TYPE_LWT_XMIT: |
---|
| 2079 | + case BPF_PROG_TYPE_LWT_SEG6LOCAL: |
---|
| 2080 | + case BPF_PROG_TYPE_SK_SKB: |
---|
| 2081 | + case BPF_PROG_TYPE_SK_MSG: |
---|
| 2082 | + case BPF_PROG_TYPE_LIRC_MODE2: |
---|
| 2083 | + case BPF_PROG_TYPE_FLOW_DISSECTOR: |
---|
| 2084 | + case BPF_PROG_TYPE_CGROUP_DEVICE: |
---|
| 2085 | + case BPF_PROG_TYPE_CGROUP_SOCK: |
---|
| 2086 | + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
---|
| 2087 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
| 2088 | + case BPF_PROG_TYPE_CGROUP_SYSCTL: |
---|
| 2089 | + case BPF_PROG_TYPE_SOCK_OPS: |
---|
| 2090 | + case BPF_PROG_TYPE_EXT: /* extends any prog */ |
---|
| 2091 | + return true; |
---|
| 2092 | + case BPF_PROG_TYPE_CGROUP_SKB: |
---|
| 2093 | + /* always unpriv */ |
---|
| 2094 | + case BPF_PROG_TYPE_SK_REUSEPORT: |
---|
| 2095 | + /* equivalent to SOCKET_FILTER. need CAP_BPF only */ |
---|
| 2096 | + default: |
---|
| 2097 | + return false; |
---|
| 2098 | + } |
---|
| 2099 | +} |
---|
1359 | 2100 | |
---|
1360 | | -static int bpf_prog_load(union bpf_attr *attr) |
---|
| 2101 | +static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) |
---|
| 2102 | +{ |
---|
| 2103 | + switch (prog_type) { |
---|
| 2104 | + case BPF_PROG_TYPE_KPROBE: |
---|
| 2105 | + case BPF_PROG_TYPE_TRACEPOINT: |
---|
| 2106 | + case BPF_PROG_TYPE_PERF_EVENT: |
---|
| 2107 | + case BPF_PROG_TYPE_RAW_TRACEPOINT: |
---|
| 2108 | + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: |
---|
| 2109 | + case BPF_PROG_TYPE_TRACING: |
---|
| 2110 | + case BPF_PROG_TYPE_LSM: |
---|
| 2111 | + case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ |
---|
| 2112 | + case BPF_PROG_TYPE_EXT: /* extends any prog */ |
---|
| 2113 | + return true; |
---|
| 2114 | + default: |
---|
| 2115 | + return false; |
---|
| 2116 | + } |
---|
| 2117 | +} |
---|
| 2118 | + |
---|
| 2119 | +/* last field in 'union bpf_attr' used by this command */ |
---|
| 2120 | +#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd |
---|
| 2121 | + |
---|
| 2122 | +static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) |
---|
1361 | 2123 | { |
---|
1362 | 2124 | enum bpf_prog_type type = attr->prog_type; |
---|
1363 | 2125 | struct bpf_prog *prog; |
---|
.. | .. |
---|
1368 | 2130 | if (CHECK_ATTR(BPF_PROG_LOAD)) |
---|
1369 | 2131 | return -EINVAL; |
---|
1370 | 2132 | |
---|
1371 | | - if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT)) |
---|
| 2133 | + if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | |
---|
| 2134 | + BPF_F_ANY_ALIGNMENT | |
---|
| 2135 | + BPF_F_TEST_STATE_FREQ | |
---|
| 2136 | + BPF_F_SLEEPABLE | |
---|
| 2137 | + BPF_F_TEST_RND_HI32)) |
---|
1372 | 2138 | return -EINVAL; |
---|
1373 | 2139 | |
---|
1374 | 2140 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && |
---|
1375 | 2141 | (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && |
---|
1376 | | - !capable(CAP_SYS_ADMIN)) |
---|
| 2142 | + !bpf_capable()) |
---|
1377 | 2143 | return -EPERM; |
---|
1378 | 2144 | |
---|
1379 | 2145 | /* copy eBPF program license from user space */ |
---|
.. | .. |
---|
1385 | 2151 | /* eBPF programs must be GPL compatible to use GPL-ed functions */ |
---|
1386 | 2152 | is_gpl = license_is_gpl_compatible(license); |
---|
1387 | 2153 | |
---|
1388 | | - if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) |
---|
| 2154 | + if (attr->insn_cnt == 0 || |
---|
| 2155 | + attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) |
---|
1389 | 2156 | return -E2BIG; |
---|
1390 | | - |
---|
1391 | | - if (type == BPF_PROG_TYPE_KPROBE && |
---|
1392 | | - attr->kern_version != LINUX_VERSION_CODE) |
---|
1393 | | - return -EINVAL; |
---|
1394 | | - |
---|
1395 | 2157 | if (type != BPF_PROG_TYPE_SOCKET_FILTER && |
---|
1396 | 2158 | type != BPF_PROG_TYPE_CGROUP_SKB && |
---|
1397 | | - !capable(CAP_SYS_ADMIN)) |
---|
| 2159 | + !bpf_capable()) |
---|
| 2160 | + return -EPERM; |
---|
| 2161 | + |
---|
| 2162 | + if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) |
---|
| 2163 | + return -EPERM; |
---|
| 2164 | + if (is_perfmon_prog_type(type) && !perfmon_capable()) |
---|
1398 | 2165 | return -EPERM; |
---|
1399 | 2166 | |
---|
1400 | 2167 | bpf_prog_load_fixup_attach_type(attr); |
---|
1401 | | - if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type)) |
---|
| 2168 | + if (bpf_prog_load_check_attach(type, attr->expected_attach_type, |
---|
| 2169 | + attr->attach_btf_id, |
---|
| 2170 | + attr->attach_prog_fd)) |
---|
1402 | 2171 | return -EINVAL; |
---|
1403 | 2172 | |
---|
1404 | 2173 | /* plain bpf_prog allocation */ |
---|
.. | .. |
---|
1407 | 2176 | return -ENOMEM; |
---|
1408 | 2177 | |
---|
1409 | 2178 | prog->expected_attach_type = attr->expected_attach_type; |
---|
| 2179 | + prog->aux->attach_btf_id = attr->attach_btf_id; |
---|
| 2180 | + if (attr->attach_prog_fd) { |
---|
| 2181 | + struct bpf_prog *dst_prog; |
---|
| 2182 | + |
---|
| 2183 | + dst_prog = bpf_prog_get(attr->attach_prog_fd); |
---|
| 2184 | + if (IS_ERR(dst_prog)) { |
---|
| 2185 | + err = PTR_ERR(dst_prog); |
---|
| 2186 | + goto free_prog_nouncharge; |
---|
| 2187 | + } |
---|
| 2188 | + prog->aux->dst_prog = dst_prog; |
---|
| 2189 | + } |
---|
1410 | 2190 | |
---|
1411 | 2191 | prog->aux->offload_requested = !!attr->prog_ifindex; |
---|
| 2192 | + prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; |
---|
1412 | 2193 | |
---|
1413 | 2194 | err = security_bpf_prog_alloc(prog->aux); |
---|
1414 | 2195 | if (err) |
---|
.. | .. |
---|
1428 | 2209 | prog->orig_prog = NULL; |
---|
1429 | 2210 | prog->jited = 0; |
---|
1430 | 2211 | |
---|
1431 | | - atomic_set(&prog->aux->refcnt, 1); |
---|
| 2212 | + atomic64_set(&prog->aux->refcnt, 1); |
---|
1432 | 2213 | prog->gpl_compatible = is_gpl ? 1 : 0; |
---|
1433 | 2214 | |
---|
1434 | 2215 | if (bpf_prog_is_dev_bound(prog->aux)) { |
---|
.. | .. |
---|
1442 | 2223 | if (err < 0) |
---|
1443 | 2224 | goto free_prog; |
---|
1444 | 2225 | |
---|
1445 | | - prog->aux->load_time = ktime_get_boot_ns(); |
---|
1446 | | - err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); |
---|
1447 | | - if (err) |
---|
| 2226 | + prog->aux->load_time = ktime_get_boottime_ns(); |
---|
| 2227 | + err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, |
---|
| 2228 | + sizeof(attr->prog_name)); |
---|
| 2229 | + if (err < 0) |
---|
1448 | 2230 | goto free_prog; |
---|
1449 | 2231 | |
---|
1450 | 2232 | /* run eBPF verifier */ |
---|
1451 | | - err = bpf_check(&prog, attr); |
---|
| 2233 | + err = bpf_check(&prog, attr, uattr); |
---|
1452 | 2234 | if (err < 0) |
---|
1453 | 2235 | goto free_used_maps; |
---|
1454 | 2236 | |
---|
.. | .. |
---|
1475 | 2257 | * be using bpf_prog_put() given the program is exposed. |
---|
1476 | 2258 | */ |
---|
1477 | 2259 | bpf_prog_kallsyms_add(prog); |
---|
| 2260 | + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); |
---|
| 2261 | + bpf_audit_prog(prog, BPF_AUDIT_LOAD); |
---|
1478 | 2262 | |
---|
1479 | 2263 | err = bpf_prog_new_fd(prog); |
---|
1480 | 2264 | if (err < 0) |
---|
.. | .. |
---|
1482 | 2266 | return err; |
---|
1483 | 2267 | |
---|
1484 | 2268 | free_used_maps: |
---|
1485 | | - bpf_prog_kallsyms_del_subprogs(prog); |
---|
1486 | | - free_used_maps(prog->aux); |
---|
| 2269 | + /* In case we have subprogs, we need to wait for a grace |
---|
| 2270 | + * period before we can tear down JIT memory since symbols |
---|
| 2271 | + * are already exposed under kallsyms. |
---|
| 2272 | + */ |
---|
| 2273 | + __bpf_prog_put_noref(prog, prog->aux->func_cnt); |
---|
| 2274 | + return err; |
---|
1487 | 2275 | free_prog: |
---|
1488 | 2276 | bpf_prog_uncharge_memlock(prog); |
---|
1489 | 2277 | free_prog_sec: |
---|
.. | .. |
---|
1513 | 2301 | attr->file_flags); |
---|
1514 | 2302 | } |
---|
1515 | 2303 | |
---|
1516 | | -struct bpf_raw_tracepoint { |
---|
1517 | | - struct bpf_raw_event_map *btp; |
---|
1518 | | - struct bpf_prog *prog; |
---|
1519 | | -}; |
---|
1520 | | - |
---|
1521 | | -static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp) |
---|
| 2304 | +void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, |
---|
| 2305 | + const struct bpf_link_ops *ops, struct bpf_prog *prog) |
---|
1522 | 2306 | { |
---|
1523 | | - struct bpf_raw_tracepoint *raw_tp = filp->private_data; |
---|
| 2307 | + atomic64_set(&link->refcnt, 1); |
---|
| 2308 | + link->type = type; |
---|
| 2309 | + link->id = 0; |
---|
| 2310 | + link->ops = ops; |
---|
| 2311 | + link->prog = prog; |
---|
| 2312 | +} |
---|
1524 | 2313 | |
---|
1525 | | - if (raw_tp->prog) { |
---|
1526 | | - bpf_probe_unregister(raw_tp->btp, raw_tp->prog); |
---|
1527 | | - bpf_prog_put(raw_tp->prog); |
---|
| 2314 | +static void bpf_link_free_id(int id) |
---|
| 2315 | +{ |
---|
| 2316 | + if (!id) |
---|
| 2317 | + return; |
---|
| 2318 | + |
---|
| 2319 | + spin_lock_bh(&link_idr_lock); |
---|
| 2320 | + idr_remove(&link_idr, id); |
---|
| 2321 | + spin_unlock_bh(&link_idr_lock); |
---|
| 2322 | +} |
---|
| 2323 | + |
---|
| 2324 | +/* Clean up bpf_link and corresponding anon_inode file and FD. After |
---|
| 2325 | + * anon_inode is created, bpf_link can't be just kfree()'d due to deferred |
---|
| 2326 | + * anon_inode's release() call. This helper marksbpf_link as |
---|
| 2327 | + * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt |
---|
| 2328 | + * is not decremented, it's the responsibility of a calling code that failed |
---|
| 2329 | + * to complete bpf_link initialization. |
---|
| 2330 | + */ |
---|
| 2331 | +void bpf_link_cleanup(struct bpf_link_primer *primer) |
---|
| 2332 | +{ |
---|
| 2333 | + primer->link->prog = NULL; |
---|
| 2334 | + bpf_link_free_id(primer->id); |
---|
| 2335 | + fput(primer->file); |
---|
| 2336 | + put_unused_fd(primer->fd); |
---|
| 2337 | +} |
---|
| 2338 | + |
---|
| 2339 | +void bpf_link_inc(struct bpf_link *link) |
---|
| 2340 | +{ |
---|
| 2341 | + atomic64_inc(&link->refcnt); |
---|
| 2342 | +} |
---|
| 2343 | + |
---|
| 2344 | +/* bpf_link_free is guaranteed to be called from process context */ |
---|
| 2345 | +static void bpf_link_free(struct bpf_link *link) |
---|
| 2346 | +{ |
---|
| 2347 | + bpf_link_free_id(link->id); |
---|
| 2348 | + if (link->prog) { |
---|
| 2349 | + /* detach BPF program, clean up used resources */ |
---|
| 2350 | + link->ops->release(link); |
---|
| 2351 | + bpf_prog_put(link->prog); |
---|
1528 | 2352 | } |
---|
1529 | | - kfree(raw_tp); |
---|
| 2353 | + /* free bpf_link and its containing memory */ |
---|
| 2354 | + link->ops->dealloc(link); |
---|
| 2355 | +} |
---|
| 2356 | + |
---|
| 2357 | +static void bpf_link_put_deferred(struct work_struct *work) |
---|
| 2358 | +{ |
---|
| 2359 | + struct bpf_link *link = container_of(work, struct bpf_link, work); |
---|
| 2360 | + |
---|
| 2361 | + bpf_link_free(link); |
---|
| 2362 | +} |
---|
| 2363 | + |
---|
| 2364 | +/* bpf_link_put can be called from atomic context, but ensures that resources |
---|
| 2365 | + * are freed from process context |
---|
| 2366 | + */ |
---|
| 2367 | +void bpf_link_put(struct bpf_link *link) |
---|
| 2368 | +{ |
---|
| 2369 | + if (!atomic64_dec_and_test(&link->refcnt)) |
---|
| 2370 | + return; |
---|
| 2371 | + |
---|
| 2372 | + if (in_atomic()) { |
---|
| 2373 | + INIT_WORK(&link->work, bpf_link_put_deferred); |
---|
| 2374 | + schedule_work(&link->work); |
---|
| 2375 | + } else { |
---|
| 2376 | + bpf_link_free(link); |
---|
| 2377 | + } |
---|
| 2378 | +} |
---|
| 2379 | + |
---|
| 2380 | +static int bpf_link_release(struct inode *inode, struct file *filp) |
---|
| 2381 | +{ |
---|
| 2382 | + struct bpf_link *link = filp->private_data; |
---|
| 2383 | + |
---|
| 2384 | + bpf_link_put(link); |
---|
1530 | 2385 | return 0; |
---|
1531 | 2386 | } |
---|
1532 | 2387 | |
---|
1533 | | -static const struct file_operations bpf_raw_tp_fops = { |
---|
1534 | | - .release = bpf_raw_tracepoint_release, |
---|
| 2388 | +#ifdef CONFIG_PROC_FS |
---|
| 2389 | +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) |
---|
| 2390 | +#define BPF_MAP_TYPE(_id, _ops) |
---|
| 2391 | +#define BPF_LINK_TYPE(_id, _name) [_id] = #_name, |
---|
| 2392 | +static const char *bpf_link_type_strs[] = { |
---|
| 2393 | + [BPF_LINK_TYPE_UNSPEC] = "<invalid>", |
---|
| 2394 | +#include <linux/bpf_types.h> |
---|
| 2395 | +}; |
---|
| 2396 | +#undef BPF_PROG_TYPE |
---|
| 2397 | +#undef BPF_MAP_TYPE |
---|
| 2398 | +#undef BPF_LINK_TYPE |
---|
| 2399 | + |
---|
| 2400 | +static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) |
---|
| 2401 | +{ |
---|
| 2402 | + const struct bpf_link *link = filp->private_data; |
---|
| 2403 | + const struct bpf_prog *prog = link->prog; |
---|
| 2404 | + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; |
---|
| 2405 | + |
---|
| 2406 | + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); |
---|
| 2407 | + seq_printf(m, |
---|
| 2408 | + "link_type:\t%s\n" |
---|
| 2409 | + "link_id:\t%u\n" |
---|
| 2410 | + "prog_tag:\t%s\n" |
---|
| 2411 | + "prog_id:\t%u\n", |
---|
| 2412 | + bpf_link_type_strs[link->type], |
---|
| 2413 | + link->id, |
---|
| 2414 | + prog_tag, |
---|
| 2415 | + prog->aux->id); |
---|
| 2416 | + if (link->ops->show_fdinfo) |
---|
| 2417 | + link->ops->show_fdinfo(link, m); |
---|
| 2418 | +} |
---|
| 2419 | +#endif |
---|
| 2420 | + |
---|
| 2421 | +static const struct file_operations bpf_link_fops = { |
---|
| 2422 | +#ifdef CONFIG_PROC_FS |
---|
| 2423 | + .show_fdinfo = bpf_link_show_fdinfo, |
---|
| 2424 | +#endif |
---|
| 2425 | + .release = bpf_link_release, |
---|
1535 | 2426 | .read = bpf_dummy_read, |
---|
1536 | 2427 | .write = bpf_dummy_write, |
---|
| 2428 | +}; |
---|
| 2429 | + |
---|
| 2430 | +static int bpf_link_alloc_id(struct bpf_link *link) |
---|
| 2431 | +{ |
---|
| 2432 | + int id; |
---|
| 2433 | + |
---|
| 2434 | + idr_preload(GFP_KERNEL); |
---|
| 2435 | + spin_lock_bh(&link_idr_lock); |
---|
| 2436 | + id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); |
---|
| 2437 | + spin_unlock_bh(&link_idr_lock); |
---|
| 2438 | + idr_preload_end(); |
---|
| 2439 | + |
---|
| 2440 | + return id; |
---|
| 2441 | +} |
---|
| 2442 | + |
---|
| 2443 | +/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, |
---|
| 2444 | + * reserving unused FD and allocating ID from link_idr. This is to be paired |
---|
| 2445 | + * with bpf_link_settle() to install FD and ID and expose bpf_link to |
---|
| 2446 | + * user-space, if bpf_link is successfully attached. If not, bpf_link and |
---|
| 2447 | + * pre-allocated resources are to be freed with bpf_cleanup() call. All the |
---|
| 2448 | + * transient state is passed around in struct bpf_link_primer. |
---|
| 2449 | + * This is preferred way to create and initialize bpf_link, especially when |
---|
| 2450 | + * there are complicated and expensive operations inbetween creating bpf_link |
---|
| 2451 | + * itself and attaching it to BPF hook. By using bpf_link_prime() and |
---|
| 2452 | + * bpf_link_settle() kernel code using bpf_link doesn't have to perform |
---|
| 2453 | + * expensive (and potentially failing) roll back operations in a rare case |
---|
| 2454 | + * that file, FD, or ID can't be allocated. |
---|
| 2455 | + */ |
---|
| 2456 | +int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) |
---|
| 2457 | +{ |
---|
| 2458 | + struct file *file; |
---|
| 2459 | + int fd, id; |
---|
| 2460 | + |
---|
| 2461 | + fd = get_unused_fd_flags(O_CLOEXEC); |
---|
| 2462 | + if (fd < 0) |
---|
| 2463 | + return fd; |
---|
| 2464 | + |
---|
| 2465 | + |
---|
| 2466 | + id = bpf_link_alloc_id(link); |
---|
| 2467 | + if (id < 0) { |
---|
| 2468 | + put_unused_fd(fd); |
---|
| 2469 | + return id; |
---|
| 2470 | + } |
---|
| 2471 | + |
---|
| 2472 | + file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); |
---|
| 2473 | + if (IS_ERR(file)) { |
---|
| 2474 | + bpf_link_free_id(id); |
---|
| 2475 | + put_unused_fd(fd); |
---|
| 2476 | + return PTR_ERR(file); |
---|
| 2477 | + } |
---|
| 2478 | + |
---|
| 2479 | + primer->link = link; |
---|
| 2480 | + primer->file = file; |
---|
| 2481 | + primer->fd = fd; |
---|
| 2482 | + primer->id = id; |
---|
| 2483 | + return 0; |
---|
| 2484 | +} |
---|
| 2485 | + |
---|
| 2486 | +int bpf_link_settle(struct bpf_link_primer *primer) |
---|
| 2487 | +{ |
---|
| 2488 | + /* make bpf_link fetchable by ID */ |
---|
| 2489 | + spin_lock_bh(&link_idr_lock); |
---|
| 2490 | + primer->link->id = primer->id; |
---|
| 2491 | + spin_unlock_bh(&link_idr_lock); |
---|
| 2492 | + /* make bpf_link fetchable by FD */ |
---|
| 2493 | + fd_install(primer->fd, primer->file); |
---|
| 2494 | + /* pass through installed FD */ |
---|
| 2495 | + return primer->fd; |
---|
| 2496 | +} |
---|
| 2497 | + |
---|
| 2498 | +int bpf_link_new_fd(struct bpf_link *link) |
---|
| 2499 | +{ |
---|
| 2500 | + return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); |
---|
| 2501 | +} |
---|
| 2502 | + |
---|
| 2503 | +struct bpf_link *bpf_link_get_from_fd(u32 ufd) |
---|
| 2504 | +{ |
---|
| 2505 | + struct fd f = fdget(ufd); |
---|
| 2506 | + struct bpf_link *link; |
---|
| 2507 | + |
---|
| 2508 | + if (!f.file) |
---|
| 2509 | + return ERR_PTR(-EBADF); |
---|
| 2510 | + if (f.file->f_op != &bpf_link_fops) { |
---|
| 2511 | + fdput(f); |
---|
| 2512 | + return ERR_PTR(-EINVAL); |
---|
| 2513 | + } |
---|
| 2514 | + |
---|
| 2515 | + link = f.file->private_data; |
---|
| 2516 | + bpf_link_inc(link); |
---|
| 2517 | + fdput(f); |
---|
| 2518 | + |
---|
| 2519 | + return link; |
---|
| 2520 | +} |
---|
| 2521 | + |
---|
| 2522 | +struct bpf_tracing_link { |
---|
| 2523 | + struct bpf_link link; |
---|
| 2524 | + enum bpf_attach_type attach_type; |
---|
| 2525 | + struct bpf_trampoline *trampoline; |
---|
| 2526 | + struct bpf_prog *tgt_prog; |
---|
| 2527 | +}; |
---|
| 2528 | + |
---|
| 2529 | +static void bpf_tracing_link_release(struct bpf_link *link) |
---|
| 2530 | +{ |
---|
| 2531 | + struct bpf_tracing_link *tr_link = |
---|
| 2532 | + container_of(link, struct bpf_tracing_link, link); |
---|
| 2533 | + |
---|
| 2534 | + WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, |
---|
| 2535 | + tr_link->trampoline)); |
---|
| 2536 | + |
---|
| 2537 | + bpf_trampoline_put(tr_link->trampoline); |
---|
| 2538 | + |
---|
| 2539 | + /* tgt_prog is NULL if target is a kernel function */ |
---|
| 2540 | + if (tr_link->tgt_prog) |
---|
| 2541 | + bpf_prog_put(tr_link->tgt_prog); |
---|
| 2542 | +} |
---|
| 2543 | + |
---|
| 2544 | +static void bpf_tracing_link_dealloc(struct bpf_link *link) |
---|
| 2545 | +{ |
---|
| 2546 | + struct bpf_tracing_link *tr_link = |
---|
| 2547 | + container_of(link, struct bpf_tracing_link, link); |
---|
| 2548 | + |
---|
| 2549 | + kfree(tr_link); |
---|
| 2550 | +} |
---|
| 2551 | + |
---|
| 2552 | +static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, |
---|
| 2553 | + struct seq_file *seq) |
---|
| 2554 | +{ |
---|
| 2555 | + struct bpf_tracing_link *tr_link = |
---|
| 2556 | + container_of(link, struct bpf_tracing_link, link); |
---|
| 2557 | + |
---|
| 2558 | + seq_printf(seq, |
---|
| 2559 | + "attach_type:\t%d\n", |
---|
| 2560 | + tr_link->attach_type); |
---|
| 2561 | +} |
---|
| 2562 | + |
---|
| 2563 | +static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, |
---|
| 2564 | + struct bpf_link_info *info) |
---|
| 2565 | +{ |
---|
| 2566 | + struct bpf_tracing_link *tr_link = |
---|
| 2567 | + container_of(link, struct bpf_tracing_link, link); |
---|
| 2568 | + |
---|
| 2569 | + info->tracing.attach_type = tr_link->attach_type; |
---|
| 2570 | + |
---|
| 2571 | + return 0; |
---|
| 2572 | +} |
---|
| 2573 | + |
---|
| 2574 | +static const struct bpf_link_ops bpf_tracing_link_lops = { |
---|
| 2575 | + .release = bpf_tracing_link_release, |
---|
| 2576 | + .dealloc = bpf_tracing_link_dealloc, |
---|
| 2577 | + .show_fdinfo = bpf_tracing_link_show_fdinfo, |
---|
| 2578 | + .fill_link_info = bpf_tracing_link_fill_link_info, |
---|
| 2579 | +}; |
---|
| 2580 | + |
---|
| 2581 | +static int bpf_tracing_prog_attach(struct bpf_prog *prog, |
---|
| 2582 | + int tgt_prog_fd, |
---|
| 2583 | + u32 btf_id) |
---|
| 2584 | +{ |
---|
| 2585 | + struct bpf_link_primer link_primer; |
---|
| 2586 | + struct bpf_prog *tgt_prog = NULL; |
---|
| 2587 | + struct bpf_trampoline *tr = NULL; |
---|
| 2588 | + struct bpf_tracing_link *link; |
---|
| 2589 | + u64 key = 0; |
---|
| 2590 | + int err; |
---|
| 2591 | + |
---|
| 2592 | + switch (prog->type) { |
---|
| 2593 | + case BPF_PROG_TYPE_TRACING: |
---|
| 2594 | + if (prog->expected_attach_type != BPF_TRACE_FENTRY && |
---|
| 2595 | + prog->expected_attach_type != BPF_TRACE_FEXIT && |
---|
| 2596 | + prog->expected_attach_type != BPF_MODIFY_RETURN) { |
---|
| 2597 | + err = -EINVAL; |
---|
| 2598 | + goto out_put_prog; |
---|
| 2599 | + } |
---|
| 2600 | + break; |
---|
| 2601 | + case BPF_PROG_TYPE_EXT: |
---|
| 2602 | + if (prog->expected_attach_type != 0) { |
---|
| 2603 | + err = -EINVAL; |
---|
| 2604 | + goto out_put_prog; |
---|
| 2605 | + } |
---|
| 2606 | + break; |
---|
| 2607 | + case BPF_PROG_TYPE_LSM: |
---|
| 2608 | + if (prog->expected_attach_type != BPF_LSM_MAC) { |
---|
| 2609 | + err = -EINVAL; |
---|
| 2610 | + goto out_put_prog; |
---|
| 2611 | + } |
---|
| 2612 | + break; |
---|
| 2613 | + default: |
---|
| 2614 | + err = -EINVAL; |
---|
| 2615 | + goto out_put_prog; |
---|
| 2616 | + } |
---|
| 2617 | + |
---|
| 2618 | + if (!!tgt_prog_fd != !!btf_id) { |
---|
| 2619 | + err = -EINVAL; |
---|
| 2620 | + goto out_put_prog; |
---|
| 2621 | + } |
---|
| 2622 | + |
---|
| 2623 | + if (tgt_prog_fd) { |
---|
| 2624 | + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ |
---|
| 2625 | + if (prog->type != BPF_PROG_TYPE_EXT) { |
---|
| 2626 | + err = -EINVAL; |
---|
| 2627 | + goto out_put_prog; |
---|
| 2628 | + } |
---|
| 2629 | + |
---|
| 2630 | + tgt_prog = bpf_prog_get(tgt_prog_fd); |
---|
| 2631 | + if (IS_ERR(tgt_prog)) { |
---|
| 2632 | + err = PTR_ERR(tgt_prog); |
---|
| 2633 | + tgt_prog = NULL; |
---|
| 2634 | + goto out_put_prog; |
---|
| 2635 | + } |
---|
| 2636 | + |
---|
| 2637 | + key = bpf_trampoline_compute_key(tgt_prog, btf_id); |
---|
| 2638 | + } |
---|
| 2639 | + |
---|
| 2640 | + link = kzalloc(sizeof(*link), GFP_USER); |
---|
| 2641 | + if (!link) { |
---|
| 2642 | + err = -ENOMEM; |
---|
| 2643 | + goto out_put_prog; |
---|
| 2644 | + } |
---|
| 2645 | + bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, |
---|
| 2646 | + &bpf_tracing_link_lops, prog); |
---|
| 2647 | + link->attach_type = prog->expected_attach_type; |
---|
| 2648 | + |
---|
| 2649 | + mutex_lock(&prog->aux->dst_mutex); |
---|
| 2650 | + |
---|
| 2651 | + /* There are a few possible cases here: |
---|
| 2652 | + * |
---|
| 2653 | + * - if prog->aux->dst_trampoline is set, the program was just loaded |
---|
| 2654 | + * and not yet attached to anything, so we can use the values stored |
---|
| 2655 | + * in prog->aux |
---|
| 2656 | + * |
---|
| 2657 | + * - if prog->aux->dst_trampoline is NULL, the program has already been |
---|
| 2658 | + * attached to a target and its initial target was cleared (below) |
---|
| 2659 | + * |
---|
| 2660 | + * - if tgt_prog != NULL, the caller specified tgt_prog_fd + |
---|
| 2661 | + * target_btf_id using the link_create API. |
---|
| 2662 | + * |
---|
| 2663 | + * - if tgt_prog == NULL when this function was called using the old |
---|
| 2664 | + * raw_tracepoint_open API, and we need a target from prog->aux |
---|
| 2665 | + * |
---|
| 2666 | + * The combination of no saved target in prog->aux, and no target |
---|
| 2667 | + * specified on load is illegal, and we reject that here. |
---|
| 2668 | + */ |
---|
| 2669 | + if (!prog->aux->dst_trampoline && !tgt_prog) { |
---|
| 2670 | + err = -ENOENT; |
---|
| 2671 | + goto out_unlock; |
---|
| 2672 | + } |
---|
| 2673 | + |
---|
| 2674 | + if (!prog->aux->dst_trampoline || |
---|
| 2675 | + (key && key != prog->aux->dst_trampoline->key)) { |
---|
| 2676 | + /* If there is no saved target, or the specified target is |
---|
| 2677 | + * different from the destination specified at load time, we |
---|
| 2678 | + * need a new trampoline and a check for compatibility |
---|
| 2679 | + */ |
---|
| 2680 | + struct bpf_attach_target_info tgt_info = {}; |
---|
| 2681 | + |
---|
| 2682 | + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, |
---|
| 2683 | + &tgt_info); |
---|
| 2684 | + if (err) |
---|
| 2685 | + goto out_unlock; |
---|
| 2686 | + |
---|
| 2687 | + tr = bpf_trampoline_get(key, &tgt_info); |
---|
| 2688 | + if (!tr) { |
---|
| 2689 | + err = -ENOMEM; |
---|
| 2690 | + goto out_unlock; |
---|
| 2691 | + } |
---|
| 2692 | + } else { |
---|
| 2693 | + /* The caller didn't specify a target, or the target was the |
---|
| 2694 | + * same as the destination supplied during program load. This |
---|
| 2695 | + * means we can reuse the trampoline and reference from program |
---|
| 2696 | + * load time, and there is no need to allocate a new one. This |
---|
| 2697 | + * can only happen once for any program, as the saved values in |
---|
| 2698 | + * prog->aux are cleared below. |
---|
| 2699 | + */ |
---|
| 2700 | + tr = prog->aux->dst_trampoline; |
---|
| 2701 | + tgt_prog = prog->aux->dst_prog; |
---|
| 2702 | + } |
---|
| 2703 | + |
---|
| 2704 | + err = bpf_link_prime(&link->link, &link_primer); |
---|
| 2705 | + if (err) |
---|
| 2706 | + goto out_unlock; |
---|
| 2707 | + |
---|
| 2708 | + err = bpf_trampoline_link_prog(prog, tr); |
---|
| 2709 | + if (err) { |
---|
| 2710 | + bpf_link_cleanup(&link_primer); |
---|
| 2711 | + link = NULL; |
---|
| 2712 | + goto out_unlock; |
---|
| 2713 | + } |
---|
| 2714 | + |
---|
| 2715 | + link->tgt_prog = tgt_prog; |
---|
| 2716 | + link->trampoline = tr; |
---|
| 2717 | + |
---|
| 2718 | + /* Always clear the trampoline and target prog from prog->aux to make |
---|
| 2719 | + * sure the original attach destination is not kept alive after a |
---|
| 2720 | + * program is (re-)attached to another target. |
---|
| 2721 | + */ |
---|
| 2722 | + if (prog->aux->dst_prog && |
---|
| 2723 | + (tgt_prog_fd || tr != prog->aux->dst_trampoline)) |
---|
| 2724 | + /* got extra prog ref from syscall, or attaching to different prog */ |
---|
| 2725 | + bpf_prog_put(prog->aux->dst_prog); |
---|
| 2726 | + if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) |
---|
| 2727 | + /* we allocated a new trampoline, so free the old one */ |
---|
| 2728 | + bpf_trampoline_put(prog->aux->dst_trampoline); |
---|
| 2729 | + |
---|
| 2730 | + prog->aux->dst_prog = NULL; |
---|
| 2731 | + prog->aux->dst_trampoline = NULL; |
---|
| 2732 | + mutex_unlock(&prog->aux->dst_mutex); |
---|
| 2733 | + |
---|
| 2734 | + return bpf_link_settle(&link_primer); |
---|
| 2735 | +out_unlock: |
---|
| 2736 | + if (tr && tr != prog->aux->dst_trampoline) |
---|
| 2737 | + bpf_trampoline_put(tr); |
---|
| 2738 | + mutex_unlock(&prog->aux->dst_mutex); |
---|
| 2739 | + kfree(link); |
---|
| 2740 | +out_put_prog: |
---|
| 2741 | + if (tgt_prog_fd && tgt_prog) |
---|
| 2742 | + bpf_prog_put(tgt_prog); |
---|
| 2743 | + return err; |
---|
| 2744 | +} |
---|
| 2745 | + |
---|
| 2746 | +struct bpf_raw_tp_link { |
---|
| 2747 | + struct bpf_link link; |
---|
| 2748 | + struct bpf_raw_event_map *btp; |
---|
| 2749 | +}; |
---|
| 2750 | + |
---|
| 2751 | +static void bpf_raw_tp_link_release(struct bpf_link *link) |
---|
| 2752 | +{ |
---|
| 2753 | + struct bpf_raw_tp_link *raw_tp = |
---|
| 2754 | + container_of(link, struct bpf_raw_tp_link, link); |
---|
| 2755 | + |
---|
| 2756 | + bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); |
---|
| 2757 | + bpf_put_raw_tracepoint(raw_tp->btp); |
---|
| 2758 | +} |
---|
| 2759 | + |
---|
| 2760 | +static void bpf_raw_tp_link_dealloc(struct bpf_link *link) |
---|
| 2761 | +{ |
---|
| 2762 | + struct bpf_raw_tp_link *raw_tp = |
---|
| 2763 | + container_of(link, struct bpf_raw_tp_link, link); |
---|
| 2764 | + |
---|
| 2765 | + kfree(raw_tp); |
---|
| 2766 | +} |
---|
| 2767 | + |
---|
| 2768 | +static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, |
---|
| 2769 | + struct seq_file *seq) |
---|
| 2770 | +{ |
---|
| 2771 | + struct bpf_raw_tp_link *raw_tp_link = |
---|
| 2772 | + container_of(link, struct bpf_raw_tp_link, link); |
---|
| 2773 | + |
---|
| 2774 | + seq_printf(seq, |
---|
| 2775 | + "tp_name:\t%s\n", |
---|
| 2776 | + raw_tp_link->btp->tp->name); |
---|
| 2777 | +} |
---|
| 2778 | + |
---|
| 2779 | +static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, |
---|
| 2780 | + struct bpf_link_info *info) |
---|
| 2781 | +{ |
---|
| 2782 | + struct bpf_raw_tp_link *raw_tp_link = |
---|
| 2783 | + container_of(link, struct bpf_raw_tp_link, link); |
---|
| 2784 | + char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); |
---|
| 2785 | + const char *tp_name = raw_tp_link->btp->tp->name; |
---|
| 2786 | + u32 ulen = info->raw_tracepoint.tp_name_len; |
---|
| 2787 | + size_t tp_len = strlen(tp_name); |
---|
| 2788 | + |
---|
| 2789 | + if (!ulen ^ !ubuf) |
---|
| 2790 | + return -EINVAL; |
---|
| 2791 | + |
---|
| 2792 | + info->raw_tracepoint.tp_name_len = tp_len + 1; |
---|
| 2793 | + |
---|
| 2794 | + if (!ubuf) |
---|
| 2795 | + return 0; |
---|
| 2796 | + |
---|
| 2797 | + if (ulen >= tp_len + 1) { |
---|
| 2798 | + if (copy_to_user(ubuf, tp_name, tp_len + 1)) |
---|
| 2799 | + return -EFAULT; |
---|
| 2800 | + } else { |
---|
| 2801 | + char zero = '\0'; |
---|
| 2802 | + |
---|
| 2803 | + if (copy_to_user(ubuf, tp_name, ulen - 1)) |
---|
| 2804 | + return -EFAULT; |
---|
| 2805 | + if (put_user(zero, ubuf + ulen - 1)) |
---|
| 2806 | + return -EFAULT; |
---|
| 2807 | + return -ENOSPC; |
---|
| 2808 | + } |
---|
| 2809 | + |
---|
| 2810 | + return 0; |
---|
| 2811 | +} |
---|
| 2812 | + |
---|
| 2813 | +static const struct bpf_link_ops bpf_raw_tp_link_lops = { |
---|
| 2814 | + .release = bpf_raw_tp_link_release, |
---|
| 2815 | + .dealloc = bpf_raw_tp_link_dealloc, |
---|
| 2816 | + .show_fdinfo = bpf_raw_tp_link_show_fdinfo, |
---|
| 2817 | + .fill_link_info = bpf_raw_tp_link_fill_link_info, |
---|
1537 | 2818 | }; |
---|
1538 | 2819 | |
---|
1539 | 2820 | #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd |
---|
1540 | 2821 | |
---|
1541 | 2822 | static int bpf_raw_tracepoint_open(const union bpf_attr *attr) |
---|
1542 | 2823 | { |
---|
1543 | | - struct bpf_raw_tracepoint *raw_tp; |
---|
| 2824 | + struct bpf_link_primer link_primer; |
---|
| 2825 | + struct bpf_raw_tp_link *link; |
---|
1544 | 2826 | struct bpf_raw_event_map *btp; |
---|
1545 | 2827 | struct bpf_prog *prog; |
---|
1546 | | - char tp_name[128]; |
---|
1547 | | - int tp_fd, err; |
---|
| 2828 | + const char *tp_name; |
---|
| 2829 | + char buf[128]; |
---|
| 2830 | + int err; |
---|
1548 | 2831 | |
---|
1549 | | - if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name), |
---|
1550 | | - sizeof(tp_name) - 1) < 0) |
---|
1551 | | - return -EFAULT; |
---|
1552 | | - tp_name[sizeof(tp_name) - 1] = 0; |
---|
| 2832 | + if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) |
---|
| 2833 | + return -EINVAL; |
---|
1553 | 2834 | |
---|
1554 | | - btp = bpf_find_raw_tracepoint(tp_name); |
---|
1555 | | - if (!btp) |
---|
1556 | | - return -ENOENT; |
---|
| 2835 | + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); |
---|
| 2836 | + if (IS_ERR(prog)) |
---|
| 2837 | + return PTR_ERR(prog); |
---|
1557 | 2838 | |
---|
1558 | | - raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); |
---|
1559 | | - if (!raw_tp) |
---|
1560 | | - return -ENOMEM; |
---|
1561 | | - raw_tp->btp = btp; |
---|
1562 | | - |
---|
1563 | | - prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, |
---|
1564 | | - BPF_PROG_TYPE_RAW_TRACEPOINT); |
---|
1565 | | - if (IS_ERR(prog)) { |
---|
1566 | | - err = PTR_ERR(prog); |
---|
1567 | | - goto out_free_tp; |
---|
1568 | | - } |
---|
1569 | | - |
---|
1570 | | - err = bpf_probe_register(raw_tp->btp, prog); |
---|
1571 | | - if (err) |
---|
| 2839 | + switch (prog->type) { |
---|
| 2840 | + case BPF_PROG_TYPE_TRACING: |
---|
| 2841 | + case BPF_PROG_TYPE_EXT: |
---|
| 2842 | + case BPF_PROG_TYPE_LSM: |
---|
| 2843 | + if (attr->raw_tracepoint.name) { |
---|
| 2844 | + /* The attach point for this category of programs |
---|
| 2845 | + * should be specified via btf_id during program load. |
---|
| 2846 | + */ |
---|
| 2847 | + err = -EINVAL; |
---|
| 2848 | + goto out_put_prog; |
---|
| 2849 | + } |
---|
| 2850 | + if (prog->type == BPF_PROG_TYPE_TRACING && |
---|
| 2851 | + prog->expected_attach_type == BPF_TRACE_RAW_TP) { |
---|
| 2852 | + tp_name = prog->aux->attach_func_name; |
---|
| 2853 | + break; |
---|
| 2854 | + } |
---|
| 2855 | + err = bpf_tracing_prog_attach(prog, 0, 0); |
---|
| 2856 | + if (err >= 0) |
---|
| 2857 | + return err; |
---|
1572 | 2858 | goto out_put_prog; |
---|
1573 | | - |
---|
1574 | | - raw_tp->prog = prog; |
---|
1575 | | - tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, |
---|
1576 | | - O_CLOEXEC); |
---|
1577 | | - if (tp_fd < 0) { |
---|
1578 | | - bpf_probe_unregister(raw_tp->btp, prog); |
---|
1579 | | - err = tp_fd; |
---|
| 2859 | + case BPF_PROG_TYPE_RAW_TRACEPOINT: |
---|
| 2860 | + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: |
---|
| 2861 | + if (strncpy_from_user(buf, |
---|
| 2862 | + u64_to_user_ptr(attr->raw_tracepoint.name), |
---|
| 2863 | + sizeof(buf) - 1) < 0) { |
---|
| 2864 | + err = -EFAULT; |
---|
| 2865 | + goto out_put_prog; |
---|
| 2866 | + } |
---|
| 2867 | + buf[sizeof(buf) - 1] = 0; |
---|
| 2868 | + tp_name = buf; |
---|
| 2869 | + break; |
---|
| 2870 | + default: |
---|
| 2871 | + err = -EINVAL; |
---|
1580 | 2872 | goto out_put_prog; |
---|
1581 | 2873 | } |
---|
1582 | | - return tp_fd; |
---|
1583 | 2874 | |
---|
| 2875 | + btp = bpf_get_raw_tracepoint(tp_name); |
---|
| 2876 | + if (!btp) { |
---|
| 2877 | + err = -ENOENT; |
---|
| 2878 | + goto out_put_prog; |
---|
| 2879 | + } |
---|
| 2880 | + |
---|
| 2881 | + link = kzalloc(sizeof(*link), GFP_USER); |
---|
| 2882 | + if (!link) { |
---|
| 2883 | + err = -ENOMEM; |
---|
| 2884 | + goto out_put_btp; |
---|
| 2885 | + } |
---|
| 2886 | + bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, |
---|
| 2887 | + &bpf_raw_tp_link_lops, prog); |
---|
| 2888 | + link->btp = btp; |
---|
| 2889 | + |
---|
| 2890 | + err = bpf_link_prime(&link->link, &link_primer); |
---|
| 2891 | + if (err) { |
---|
| 2892 | + kfree(link); |
---|
| 2893 | + goto out_put_btp; |
---|
| 2894 | + } |
---|
| 2895 | + |
---|
| 2896 | + err = bpf_probe_register(link->btp, prog); |
---|
| 2897 | + if (err) { |
---|
| 2898 | + bpf_link_cleanup(&link_primer); |
---|
| 2899 | + goto out_put_btp; |
---|
| 2900 | + } |
---|
| 2901 | + |
---|
| 2902 | + return bpf_link_settle(&link_primer); |
---|
| 2903 | + |
---|
| 2904 | +out_put_btp: |
---|
| 2905 | + bpf_put_raw_tracepoint(btp); |
---|
1584 | 2906 | out_put_prog: |
---|
1585 | 2907 | bpf_prog_put(prog); |
---|
1586 | | -out_free_tp: |
---|
1587 | | - kfree(raw_tp); |
---|
1588 | 2908 | return err; |
---|
1589 | 2909 | } |
---|
1590 | 2910 | |
---|
.. | .. |
---|
1594 | 2914 | switch (prog->type) { |
---|
1595 | 2915 | case BPF_PROG_TYPE_CGROUP_SOCK: |
---|
1596 | 2916 | case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
---|
| 2917 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
| 2918 | + case BPF_PROG_TYPE_SK_LOOKUP: |
---|
1597 | 2919 | return attach_type == prog->expected_attach_type ? 0 : -EINVAL; |
---|
| 2920 | + case BPF_PROG_TYPE_CGROUP_SKB: |
---|
| 2921 | + if (!capable(CAP_NET_ADMIN)) |
---|
| 2922 | + /* cg-skb progs can be loaded by unpriv user. |
---|
| 2923 | + * check permissions at attach time. |
---|
| 2924 | + */ |
---|
| 2925 | + return -EPERM; |
---|
| 2926 | + return prog->enforce_expected_attach_type && |
---|
| 2927 | + prog->expected_attach_type != attach_type ? |
---|
| 2928 | + -EINVAL : 0; |
---|
1598 | 2929 | default: |
---|
1599 | 2930 | return 0; |
---|
1600 | 2931 | } |
---|
1601 | 2932 | } |
---|
1602 | 2933 | |
---|
1603 | | -#define BPF_PROG_ATTACH_LAST_FIELD attach_flags |
---|
| 2934 | +static enum bpf_prog_type |
---|
| 2935 | +attach_type_to_prog_type(enum bpf_attach_type attach_type) |
---|
| 2936 | +{ |
---|
| 2937 | + switch (attach_type) { |
---|
| 2938 | + case BPF_CGROUP_INET_INGRESS: |
---|
| 2939 | + case BPF_CGROUP_INET_EGRESS: |
---|
| 2940 | + return BPF_PROG_TYPE_CGROUP_SKB; |
---|
| 2941 | + case BPF_CGROUP_INET_SOCK_CREATE: |
---|
| 2942 | + case BPF_CGROUP_INET_SOCK_RELEASE: |
---|
| 2943 | + case BPF_CGROUP_INET4_POST_BIND: |
---|
| 2944 | + case BPF_CGROUP_INET6_POST_BIND: |
---|
| 2945 | + return BPF_PROG_TYPE_CGROUP_SOCK; |
---|
| 2946 | + case BPF_CGROUP_INET4_BIND: |
---|
| 2947 | + case BPF_CGROUP_INET6_BIND: |
---|
| 2948 | + case BPF_CGROUP_INET4_CONNECT: |
---|
| 2949 | + case BPF_CGROUP_INET6_CONNECT: |
---|
| 2950 | + case BPF_CGROUP_INET4_GETPEERNAME: |
---|
| 2951 | + case BPF_CGROUP_INET6_GETPEERNAME: |
---|
| 2952 | + case BPF_CGROUP_INET4_GETSOCKNAME: |
---|
| 2953 | + case BPF_CGROUP_INET6_GETSOCKNAME: |
---|
| 2954 | + case BPF_CGROUP_UDP4_SENDMSG: |
---|
| 2955 | + case BPF_CGROUP_UDP6_SENDMSG: |
---|
| 2956 | + case BPF_CGROUP_UDP4_RECVMSG: |
---|
| 2957 | + case BPF_CGROUP_UDP6_RECVMSG: |
---|
| 2958 | + return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; |
---|
| 2959 | + case BPF_CGROUP_SOCK_OPS: |
---|
| 2960 | + return BPF_PROG_TYPE_SOCK_OPS; |
---|
| 2961 | + case BPF_CGROUP_DEVICE: |
---|
| 2962 | + return BPF_PROG_TYPE_CGROUP_DEVICE; |
---|
| 2963 | + case BPF_SK_MSG_VERDICT: |
---|
| 2964 | + return BPF_PROG_TYPE_SK_MSG; |
---|
| 2965 | + case BPF_SK_SKB_STREAM_PARSER: |
---|
| 2966 | + case BPF_SK_SKB_STREAM_VERDICT: |
---|
| 2967 | + return BPF_PROG_TYPE_SK_SKB; |
---|
| 2968 | + case BPF_LIRC_MODE2: |
---|
| 2969 | + return BPF_PROG_TYPE_LIRC_MODE2; |
---|
| 2970 | + case BPF_FLOW_DISSECTOR: |
---|
| 2971 | + return BPF_PROG_TYPE_FLOW_DISSECTOR; |
---|
| 2972 | + case BPF_CGROUP_SYSCTL: |
---|
| 2973 | + return BPF_PROG_TYPE_CGROUP_SYSCTL; |
---|
| 2974 | + case BPF_CGROUP_GETSOCKOPT: |
---|
| 2975 | + case BPF_CGROUP_SETSOCKOPT: |
---|
| 2976 | + return BPF_PROG_TYPE_CGROUP_SOCKOPT; |
---|
| 2977 | + case BPF_TRACE_ITER: |
---|
| 2978 | + return BPF_PROG_TYPE_TRACING; |
---|
| 2979 | + case BPF_SK_LOOKUP: |
---|
| 2980 | + return BPF_PROG_TYPE_SK_LOOKUP; |
---|
| 2981 | + case BPF_XDP: |
---|
| 2982 | + return BPF_PROG_TYPE_XDP; |
---|
| 2983 | + default: |
---|
| 2984 | + return BPF_PROG_TYPE_UNSPEC; |
---|
| 2985 | + } |
---|
| 2986 | +} |
---|
| 2987 | + |
---|
| 2988 | +#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd |
---|
1604 | 2989 | |
---|
1605 | 2990 | #define BPF_F_ATTACH_MASK \ |
---|
1606 | | - (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) |
---|
| 2991 | + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) |
---|
1607 | 2992 | |
---|
1608 | 2993 | static int bpf_prog_attach(const union bpf_attr *attr) |
---|
1609 | 2994 | { |
---|
.. | .. |
---|
1611 | 2996 | struct bpf_prog *prog; |
---|
1612 | 2997 | int ret; |
---|
1613 | 2998 | |
---|
1614 | | - if (!capable(CAP_NET_ADMIN)) |
---|
1615 | | - return -EPERM; |
---|
1616 | | - |
---|
1617 | 2999 | if (CHECK_ATTR(BPF_PROG_ATTACH)) |
---|
1618 | 3000 | return -EINVAL; |
---|
1619 | 3001 | |
---|
1620 | 3002 | if (attr->attach_flags & ~BPF_F_ATTACH_MASK) |
---|
1621 | 3003 | return -EINVAL; |
---|
1622 | 3004 | |
---|
1623 | | - switch (attr->attach_type) { |
---|
1624 | | - case BPF_CGROUP_INET_INGRESS: |
---|
1625 | | - case BPF_CGROUP_INET_EGRESS: |
---|
1626 | | - ptype = BPF_PROG_TYPE_CGROUP_SKB; |
---|
1627 | | - break; |
---|
1628 | | - case BPF_CGROUP_INET_SOCK_CREATE: |
---|
1629 | | - case BPF_CGROUP_INET4_POST_BIND: |
---|
1630 | | - case BPF_CGROUP_INET6_POST_BIND: |
---|
1631 | | - ptype = BPF_PROG_TYPE_CGROUP_SOCK; |
---|
1632 | | - break; |
---|
1633 | | - case BPF_CGROUP_INET4_BIND: |
---|
1634 | | - case BPF_CGROUP_INET6_BIND: |
---|
1635 | | - case BPF_CGROUP_INET4_CONNECT: |
---|
1636 | | - case BPF_CGROUP_INET6_CONNECT: |
---|
1637 | | - case BPF_CGROUP_UDP4_SENDMSG: |
---|
1638 | | - case BPF_CGROUP_UDP6_SENDMSG: |
---|
1639 | | - case BPF_CGROUP_UDP4_RECVMSG: |
---|
1640 | | - case BPF_CGROUP_UDP6_RECVMSG: |
---|
1641 | | - ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; |
---|
1642 | | - break; |
---|
1643 | | - case BPF_CGROUP_SOCK_OPS: |
---|
1644 | | - ptype = BPF_PROG_TYPE_SOCK_OPS; |
---|
1645 | | - break; |
---|
1646 | | - case BPF_CGROUP_DEVICE: |
---|
1647 | | - ptype = BPF_PROG_TYPE_CGROUP_DEVICE; |
---|
1648 | | - break; |
---|
1649 | | - case BPF_SK_MSG_VERDICT: |
---|
1650 | | - ptype = BPF_PROG_TYPE_SK_MSG; |
---|
1651 | | - break; |
---|
1652 | | - case BPF_SK_SKB_STREAM_PARSER: |
---|
1653 | | - case BPF_SK_SKB_STREAM_VERDICT: |
---|
1654 | | - ptype = BPF_PROG_TYPE_SK_SKB; |
---|
1655 | | - break; |
---|
1656 | | - case BPF_LIRC_MODE2: |
---|
1657 | | - ptype = BPF_PROG_TYPE_LIRC_MODE2; |
---|
1658 | | - break; |
---|
1659 | | - default: |
---|
| 3005 | + ptype = attach_type_to_prog_type(attr->attach_type); |
---|
| 3006 | + if (ptype == BPF_PROG_TYPE_UNSPEC) |
---|
1660 | 3007 | return -EINVAL; |
---|
1661 | | - } |
---|
1662 | 3008 | |
---|
1663 | 3009 | prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); |
---|
1664 | 3010 | if (IS_ERR(prog)) |
---|
.. | .. |
---|
1672 | 3018 | switch (ptype) { |
---|
1673 | 3019 | case BPF_PROG_TYPE_SK_SKB: |
---|
1674 | 3020 | case BPF_PROG_TYPE_SK_MSG: |
---|
1675 | | - ret = sockmap_get_from_fd(attr, ptype, prog); |
---|
| 3021 | + ret = sock_map_get_from_fd(attr, prog); |
---|
1676 | 3022 | break; |
---|
1677 | 3023 | case BPF_PROG_TYPE_LIRC_MODE2: |
---|
1678 | 3024 | ret = lirc_prog_attach(attr, prog); |
---|
1679 | 3025 | break; |
---|
1680 | | - default: |
---|
| 3026 | + case BPF_PROG_TYPE_FLOW_DISSECTOR: |
---|
| 3027 | + ret = netns_bpf_prog_attach(attr, prog); |
---|
| 3028 | + break; |
---|
| 3029 | + case BPF_PROG_TYPE_CGROUP_DEVICE: |
---|
| 3030 | + case BPF_PROG_TYPE_CGROUP_SKB: |
---|
| 3031 | + case BPF_PROG_TYPE_CGROUP_SOCK: |
---|
| 3032 | + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
---|
| 3033 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
| 3034 | + case BPF_PROG_TYPE_CGROUP_SYSCTL: |
---|
| 3035 | + case BPF_PROG_TYPE_SOCK_OPS: |
---|
1681 | 3036 | ret = cgroup_bpf_prog_attach(attr, ptype, prog); |
---|
| 3037 | + break; |
---|
| 3038 | + default: |
---|
| 3039 | + ret = -EINVAL; |
---|
1682 | 3040 | } |
---|
1683 | 3041 | |
---|
1684 | 3042 | if (ret) |
---|
.. | .. |
---|
1692 | 3050 | { |
---|
1693 | 3051 | enum bpf_prog_type ptype; |
---|
1694 | 3052 | |
---|
1695 | | - if (!capable(CAP_NET_ADMIN)) |
---|
1696 | | - return -EPERM; |
---|
1697 | | - |
---|
1698 | 3053 | if (CHECK_ATTR(BPF_PROG_DETACH)) |
---|
1699 | 3054 | return -EINVAL; |
---|
1700 | 3055 | |
---|
1701 | | - switch (attr->attach_type) { |
---|
1702 | | - case BPF_CGROUP_INET_INGRESS: |
---|
1703 | | - case BPF_CGROUP_INET_EGRESS: |
---|
1704 | | - ptype = BPF_PROG_TYPE_CGROUP_SKB; |
---|
1705 | | - break; |
---|
1706 | | - case BPF_CGROUP_INET_SOCK_CREATE: |
---|
1707 | | - case BPF_CGROUP_INET4_POST_BIND: |
---|
1708 | | - case BPF_CGROUP_INET6_POST_BIND: |
---|
1709 | | - ptype = BPF_PROG_TYPE_CGROUP_SOCK; |
---|
1710 | | - break; |
---|
1711 | | - case BPF_CGROUP_INET4_BIND: |
---|
1712 | | - case BPF_CGROUP_INET6_BIND: |
---|
1713 | | - case BPF_CGROUP_INET4_CONNECT: |
---|
1714 | | - case BPF_CGROUP_INET6_CONNECT: |
---|
1715 | | - case BPF_CGROUP_UDP4_SENDMSG: |
---|
1716 | | - case BPF_CGROUP_UDP6_SENDMSG: |
---|
1717 | | - case BPF_CGROUP_UDP4_RECVMSG: |
---|
1718 | | - case BPF_CGROUP_UDP6_RECVMSG: |
---|
1719 | | - ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; |
---|
1720 | | - break; |
---|
1721 | | - case BPF_CGROUP_SOCK_OPS: |
---|
1722 | | - ptype = BPF_PROG_TYPE_SOCK_OPS; |
---|
1723 | | - break; |
---|
1724 | | - case BPF_CGROUP_DEVICE: |
---|
1725 | | - ptype = BPF_PROG_TYPE_CGROUP_DEVICE; |
---|
1726 | | - break; |
---|
1727 | | - case BPF_SK_MSG_VERDICT: |
---|
1728 | | - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL); |
---|
1729 | | - case BPF_SK_SKB_STREAM_PARSER: |
---|
1730 | | - case BPF_SK_SKB_STREAM_VERDICT: |
---|
1731 | | - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL); |
---|
1732 | | - case BPF_LIRC_MODE2: |
---|
| 3056 | + ptype = attach_type_to_prog_type(attr->attach_type); |
---|
| 3057 | + |
---|
| 3058 | + switch (ptype) { |
---|
| 3059 | + case BPF_PROG_TYPE_SK_MSG: |
---|
| 3060 | + case BPF_PROG_TYPE_SK_SKB: |
---|
| 3061 | + return sock_map_prog_detach(attr, ptype); |
---|
| 3062 | + case BPF_PROG_TYPE_LIRC_MODE2: |
---|
1733 | 3063 | return lirc_prog_detach(attr); |
---|
| 3064 | + case BPF_PROG_TYPE_FLOW_DISSECTOR: |
---|
| 3065 | + return netns_bpf_prog_detach(attr, ptype); |
---|
| 3066 | + case BPF_PROG_TYPE_CGROUP_DEVICE: |
---|
| 3067 | + case BPF_PROG_TYPE_CGROUP_SKB: |
---|
| 3068 | + case BPF_PROG_TYPE_CGROUP_SOCK: |
---|
| 3069 | + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
---|
| 3070 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
| 3071 | + case BPF_PROG_TYPE_CGROUP_SYSCTL: |
---|
| 3072 | + case BPF_PROG_TYPE_SOCK_OPS: |
---|
| 3073 | + return cgroup_bpf_prog_detach(attr, ptype); |
---|
1734 | 3074 | default: |
---|
1735 | 3075 | return -EINVAL; |
---|
1736 | 3076 | } |
---|
1737 | | - |
---|
1738 | | - return cgroup_bpf_prog_detach(attr, ptype); |
---|
1739 | 3077 | } |
---|
1740 | 3078 | |
---|
1741 | 3079 | #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt |
---|
.. | .. |
---|
1754 | 3092 | case BPF_CGROUP_INET_INGRESS: |
---|
1755 | 3093 | case BPF_CGROUP_INET_EGRESS: |
---|
1756 | 3094 | case BPF_CGROUP_INET_SOCK_CREATE: |
---|
| 3095 | + case BPF_CGROUP_INET_SOCK_RELEASE: |
---|
1757 | 3096 | case BPF_CGROUP_INET4_BIND: |
---|
1758 | 3097 | case BPF_CGROUP_INET6_BIND: |
---|
1759 | 3098 | case BPF_CGROUP_INET4_POST_BIND: |
---|
1760 | 3099 | case BPF_CGROUP_INET6_POST_BIND: |
---|
1761 | 3100 | case BPF_CGROUP_INET4_CONNECT: |
---|
1762 | 3101 | case BPF_CGROUP_INET6_CONNECT: |
---|
| 3102 | + case BPF_CGROUP_INET4_GETPEERNAME: |
---|
| 3103 | + case BPF_CGROUP_INET6_GETPEERNAME: |
---|
| 3104 | + case BPF_CGROUP_INET4_GETSOCKNAME: |
---|
| 3105 | + case BPF_CGROUP_INET6_GETSOCKNAME: |
---|
1763 | 3106 | case BPF_CGROUP_UDP4_SENDMSG: |
---|
1764 | 3107 | case BPF_CGROUP_UDP6_SENDMSG: |
---|
1765 | 3108 | case BPF_CGROUP_UDP4_RECVMSG: |
---|
1766 | 3109 | case BPF_CGROUP_UDP6_RECVMSG: |
---|
1767 | 3110 | case BPF_CGROUP_SOCK_OPS: |
---|
1768 | 3111 | case BPF_CGROUP_DEVICE: |
---|
1769 | | - break; |
---|
| 3112 | + case BPF_CGROUP_SYSCTL: |
---|
| 3113 | + case BPF_CGROUP_GETSOCKOPT: |
---|
| 3114 | + case BPF_CGROUP_SETSOCKOPT: |
---|
| 3115 | + return cgroup_bpf_prog_query(attr, uattr); |
---|
1770 | 3116 | case BPF_LIRC_MODE2: |
---|
1771 | 3117 | return lirc_prog_query(attr, uattr); |
---|
| 3118 | + case BPF_FLOW_DISSECTOR: |
---|
| 3119 | + case BPF_SK_LOOKUP: |
---|
| 3120 | + return netns_bpf_prog_query(attr, uattr); |
---|
1772 | 3121 | default: |
---|
1773 | 3122 | return -EINVAL; |
---|
1774 | 3123 | } |
---|
1775 | | - |
---|
1776 | | - return cgroup_bpf_prog_query(attr, uattr); |
---|
1777 | 3124 | } |
---|
1778 | 3125 | |
---|
1779 | | -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration |
---|
| 3126 | +#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu |
---|
1780 | 3127 | |
---|
1781 | 3128 | static int bpf_prog_test_run(const union bpf_attr *attr, |
---|
1782 | 3129 | union bpf_attr __user *uattr) |
---|
.. | .. |
---|
1784 | 3131 | struct bpf_prog *prog; |
---|
1785 | 3132 | int ret = -ENOTSUPP; |
---|
1786 | 3133 | |
---|
1787 | | - if (!capable(CAP_SYS_ADMIN)) |
---|
1788 | | - return -EPERM; |
---|
1789 | 3134 | if (CHECK_ATTR(BPF_PROG_TEST_RUN)) |
---|
| 3135 | + return -EINVAL; |
---|
| 3136 | + |
---|
| 3137 | + if ((attr->test.ctx_size_in && !attr->test.ctx_in) || |
---|
| 3138 | + (!attr->test.ctx_size_in && attr->test.ctx_in)) |
---|
| 3139 | + return -EINVAL; |
---|
| 3140 | + |
---|
| 3141 | + if ((attr->test.ctx_size_out && !attr->test.ctx_out) || |
---|
| 3142 | + (!attr->test.ctx_size_out && attr->test.ctx_out)) |
---|
1790 | 3143 | return -EINVAL; |
---|
1791 | 3144 | |
---|
1792 | 3145 | prog = bpf_prog_get(attr->test.prog_fd); |
---|
.. | .. |
---|
1828 | 3181 | return err; |
---|
1829 | 3182 | } |
---|
1830 | 3183 | |
---|
| 3184 | +struct bpf_map *bpf_map_get_curr_or_next(u32 *id) |
---|
| 3185 | +{ |
---|
| 3186 | + struct bpf_map *map; |
---|
| 3187 | + |
---|
| 3188 | + spin_lock_bh(&map_idr_lock); |
---|
| 3189 | +again: |
---|
| 3190 | + map = idr_get_next(&map_idr, id); |
---|
| 3191 | + if (map) { |
---|
| 3192 | + map = __bpf_map_inc_not_zero(map, false); |
---|
| 3193 | + if (IS_ERR(map)) { |
---|
| 3194 | + (*id)++; |
---|
| 3195 | + goto again; |
---|
| 3196 | + } |
---|
| 3197 | + } |
---|
| 3198 | + spin_unlock_bh(&map_idr_lock); |
---|
| 3199 | + |
---|
| 3200 | + return map; |
---|
| 3201 | +} |
---|
| 3202 | + |
---|
| 3203 | +struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) |
---|
| 3204 | +{ |
---|
| 3205 | + struct bpf_prog *prog; |
---|
| 3206 | + |
---|
| 3207 | + spin_lock_bh(&prog_idr_lock); |
---|
| 3208 | +again: |
---|
| 3209 | + prog = idr_get_next(&prog_idr, id); |
---|
| 3210 | + if (prog) { |
---|
| 3211 | + prog = bpf_prog_inc_not_zero(prog); |
---|
| 3212 | + if (IS_ERR(prog)) { |
---|
| 3213 | + (*id)++; |
---|
| 3214 | + goto again; |
---|
| 3215 | + } |
---|
| 3216 | + } |
---|
| 3217 | + spin_unlock_bh(&prog_idr_lock); |
---|
| 3218 | + |
---|
| 3219 | + return prog; |
---|
| 3220 | +} |
---|
| 3221 | + |
---|
1831 | 3222 | #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id |
---|
| 3223 | + |
---|
| 3224 | +struct bpf_prog *bpf_prog_by_id(u32 id) |
---|
| 3225 | +{ |
---|
| 3226 | + struct bpf_prog *prog; |
---|
| 3227 | + |
---|
| 3228 | + if (!id) |
---|
| 3229 | + return ERR_PTR(-ENOENT); |
---|
| 3230 | + |
---|
| 3231 | + spin_lock_bh(&prog_idr_lock); |
---|
| 3232 | + prog = idr_find(&prog_idr, id); |
---|
| 3233 | + if (prog) |
---|
| 3234 | + prog = bpf_prog_inc_not_zero(prog); |
---|
| 3235 | + else |
---|
| 3236 | + prog = ERR_PTR(-ENOENT); |
---|
| 3237 | + spin_unlock_bh(&prog_idr_lock); |
---|
| 3238 | + return prog; |
---|
| 3239 | +} |
---|
1832 | 3240 | |
---|
1833 | 3241 | static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) |
---|
1834 | 3242 | { |
---|
.. | .. |
---|
1842 | 3250 | if (!capable(CAP_SYS_ADMIN)) |
---|
1843 | 3251 | return -EPERM; |
---|
1844 | 3252 | |
---|
1845 | | - spin_lock_bh(&prog_idr_lock); |
---|
1846 | | - prog = idr_find(&prog_idr, id); |
---|
1847 | | - if (prog) |
---|
1848 | | - prog = bpf_prog_inc_not_zero(prog); |
---|
1849 | | - else |
---|
1850 | | - prog = ERR_PTR(-ENOENT); |
---|
1851 | | - spin_unlock_bh(&prog_idr_lock); |
---|
1852 | | - |
---|
| 3253 | + prog = bpf_prog_by_id(id); |
---|
1853 | 3254 | if (IS_ERR(prog)) |
---|
1854 | 3255 | return PTR_ERR(prog); |
---|
1855 | 3256 | |
---|
.. | .. |
---|
1883 | 3284 | spin_lock_bh(&map_idr_lock); |
---|
1884 | 3285 | map = idr_find(&map_idr, id); |
---|
1885 | 3286 | if (map) |
---|
1886 | | - map = bpf_map_inc_not_zero(map, true); |
---|
| 3287 | + map = __bpf_map_inc_not_zero(map, true); |
---|
1887 | 3288 | else |
---|
1888 | 3289 | map = ERR_PTR(-ENOENT); |
---|
1889 | 3290 | spin_unlock_bh(&map_idr_lock); |
---|
.. | .. |
---|
1899 | 3300 | } |
---|
1900 | 3301 | |
---|
1901 | 3302 | static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, |
---|
1902 | | - unsigned long addr) |
---|
| 3303 | + unsigned long addr, u32 *off, |
---|
| 3304 | + u32 *type) |
---|
1903 | 3305 | { |
---|
| 3306 | + const struct bpf_map *map; |
---|
1904 | 3307 | int i; |
---|
1905 | 3308 | |
---|
1906 | | - for (i = 0; i < prog->aux->used_map_cnt; i++) |
---|
1907 | | - if (prog->aux->used_maps[i] == (void *)addr) |
---|
1908 | | - return prog->aux->used_maps[i]; |
---|
1909 | | - return NULL; |
---|
| 3309 | + mutex_lock(&prog->aux->used_maps_mutex); |
---|
| 3310 | + for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { |
---|
| 3311 | + map = prog->aux->used_maps[i]; |
---|
| 3312 | + if (map == (void *)addr) { |
---|
| 3313 | + *type = BPF_PSEUDO_MAP_FD; |
---|
| 3314 | + goto out; |
---|
| 3315 | + } |
---|
| 3316 | + if (!map->ops->map_direct_value_meta) |
---|
| 3317 | + continue; |
---|
| 3318 | + if (!map->ops->map_direct_value_meta(map, addr, off)) { |
---|
| 3319 | + *type = BPF_PSEUDO_MAP_VALUE; |
---|
| 3320 | + goto out; |
---|
| 3321 | + } |
---|
| 3322 | + } |
---|
| 3323 | + map = NULL; |
---|
| 3324 | + |
---|
| 3325 | +out: |
---|
| 3326 | + mutex_unlock(&prog->aux->used_maps_mutex); |
---|
| 3327 | + return map; |
---|
1910 | 3328 | } |
---|
1911 | 3329 | |
---|
1912 | 3330 | static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, |
---|
.. | .. |
---|
1914 | 3332 | { |
---|
1915 | 3333 | const struct bpf_map *map; |
---|
1916 | 3334 | struct bpf_insn *insns; |
---|
| 3335 | + u32 off, type; |
---|
1917 | 3336 | u64 imm; |
---|
| 3337 | + u8 code; |
---|
1918 | 3338 | int i; |
---|
1919 | 3339 | |
---|
1920 | 3340 | insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), |
---|
.. | .. |
---|
1923 | 3343 | return insns; |
---|
1924 | 3344 | |
---|
1925 | 3345 | for (i = 0; i < prog->len; i++) { |
---|
1926 | | - if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { |
---|
| 3346 | + code = insns[i].code; |
---|
| 3347 | + |
---|
| 3348 | + if (code == (BPF_JMP | BPF_TAIL_CALL)) { |
---|
1927 | 3349 | insns[i].code = BPF_JMP | BPF_CALL; |
---|
1928 | 3350 | insns[i].imm = BPF_FUNC_tail_call; |
---|
1929 | 3351 | /* fall-through */ |
---|
1930 | 3352 | } |
---|
1931 | | - if (insns[i].code == (BPF_JMP | BPF_CALL) || |
---|
1932 | | - insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { |
---|
1933 | | - if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) |
---|
| 3353 | + if (code == (BPF_JMP | BPF_CALL) || |
---|
| 3354 | + code == (BPF_JMP | BPF_CALL_ARGS)) { |
---|
| 3355 | + if (code == (BPF_JMP | BPF_CALL_ARGS)) |
---|
1934 | 3356 | insns[i].code = BPF_JMP | BPF_CALL; |
---|
1935 | 3357 | if (!bpf_dump_raw_ok(f_cred)) |
---|
1936 | 3358 | insns[i].imm = 0; |
---|
1937 | 3359 | continue; |
---|
1938 | 3360 | } |
---|
1939 | | - |
---|
1940 | | - if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) |
---|
1941 | | - continue; |
---|
1942 | | - |
---|
1943 | | - imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; |
---|
1944 | | - map = bpf_map_from_imm(prog, imm); |
---|
1945 | | - if (map) { |
---|
1946 | | - insns[i].src_reg = BPF_PSEUDO_MAP_FD; |
---|
1947 | | - insns[i].imm = map->id; |
---|
1948 | | - insns[i + 1].imm = 0; |
---|
| 3361 | + if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { |
---|
| 3362 | + insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; |
---|
1949 | 3363 | continue; |
---|
1950 | 3364 | } |
---|
1951 | 3365 | |
---|
1952 | | - if (!bpf_dump_raw_ok(f_cred) && |
---|
1953 | | - imm == (unsigned long)prog->aux) { |
---|
1954 | | - insns[i].imm = 0; |
---|
1955 | | - insns[i + 1].imm = 0; |
---|
| 3366 | + if (code != (BPF_LD | BPF_IMM | BPF_DW)) |
---|
| 3367 | + continue; |
---|
| 3368 | + |
---|
| 3369 | + imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; |
---|
| 3370 | + map = bpf_map_from_imm(prog, imm, &off, &type); |
---|
| 3371 | + if (map) { |
---|
| 3372 | + insns[i].src_reg = type; |
---|
| 3373 | + insns[i].imm = map->id; |
---|
| 3374 | + insns[i + 1].imm = off; |
---|
1956 | 3375 | continue; |
---|
1957 | 3376 | } |
---|
1958 | 3377 | } |
---|
1959 | 3378 | |
---|
1960 | 3379 | return insns; |
---|
| 3380 | +} |
---|
| 3381 | + |
---|
| 3382 | +static int set_info_rec_size(struct bpf_prog_info *info) |
---|
| 3383 | +{ |
---|
| 3384 | + /* |
---|
| 3385 | + * Ensure info.*_rec_size is the same as kernel expected size |
---|
| 3386 | + * |
---|
| 3387 | + * or |
---|
| 3388 | + * |
---|
| 3389 | + * Only allow zero *_rec_size if both _rec_size and _cnt are |
---|
| 3390 | + * zero. In this case, the kernel will set the expected |
---|
| 3391 | + * _rec_size back to the info. |
---|
| 3392 | + */ |
---|
| 3393 | + |
---|
| 3394 | + if ((info->nr_func_info || info->func_info_rec_size) && |
---|
| 3395 | + info->func_info_rec_size != sizeof(struct bpf_func_info)) |
---|
| 3396 | + return -EINVAL; |
---|
| 3397 | + |
---|
| 3398 | + if ((info->nr_line_info || info->line_info_rec_size) && |
---|
| 3399 | + info->line_info_rec_size != sizeof(struct bpf_line_info)) |
---|
| 3400 | + return -EINVAL; |
---|
| 3401 | + |
---|
| 3402 | + if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && |
---|
| 3403 | + info->jited_line_info_rec_size != sizeof(__u64)) |
---|
| 3404 | + return -EINVAL; |
---|
| 3405 | + |
---|
| 3406 | + info->func_info_rec_size = sizeof(struct bpf_func_info); |
---|
| 3407 | + info->line_info_rec_size = sizeof(struct bpf_line_info); |
---|
| 3408 | + info->jited_line_info_rec_size = sizeof(__u64); |
---|
| 3409 | + |
---|
| 3410 | + return 0; |
---|
1961 | 3411 | } |
---|
1962 | 3412 | |
---|
1963 | 3413 | static int bpf_prog_get_info_by_fd(struct file *file, |
---|
.. | .. |
---|
1968 | 3418 | struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); |
---|
1969 | 3419 | struct bpf_prog_info info; |
---|
1970 | 3420 | u32 info_len = attr->info.info_len; |
---|
| 3421 | + struct bpf_prog_stats stats; |
---|
1971 | 3422 | char __user *uinsns; |
---|
1972 | 3423 | u32 ulen; |
---|
1973 | 3424 | int err; |
---|
.. | .. |
---|
1991 | 3442 | memcpy(info.tag, prog->tag, sizeof(prog->tag)); |
---|
1992 | 3443 | memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); |
---|
1993 | 3444 | |
---|
| 3445 | + mutex_lock(&prog->aux->used_maps_mutex); |
---|
1994 | 3446 | ulen = info.nr_map_ids; |
---|
1995 | 3447 | info.nr_map_ids = prog->aux->used_map_cnt; |
---|
1996 | 3448 | ulen = min_t(u32, info.nr_map_ids, ulen); |
---|
.. | .. |
---|
2000 | 3452 | |
---|
2001 | 3453 | for (i = 0; i < ulen; i++) |
---|
2002 | 3454 | if (put_user(prog->aux->used_maps[i]->id, |
---|
2003 | | - &user_map_ids[i])) |
---|
| 3455 | + &user_map_ids[i])) { |
---|
| 3456 | + mutex_unlock(&prog->aux->used_maps_mutex); |
---|
2004 | 3457 | return -EFAULT; |
---|
| 3458 | + } |
---|
2005 | 3459 | } |
---|
| 3460 | + mutex_unlock(&prog->aux->used_maps_mutex); |
---|
2006 | 3461 | |
---|
2007 | | - if (!capable(CAP_SYS_ADMIN)) { |
---|
| 3462 | + err = set_info_rec_size(&info); |
---|
| 3463 | + if (err) |
---|
| 3464 | + return err; |
---|
| 3465 | + |
---|
| 3466 | + bpf_prog_get_stats(prog, &stats); |
---|
| 3467 | + info.run_time_ns = stats.nsecs; |
---|
| 3468 | + info.run_cnt = stats.cnt; |
---|
| 3469 | + |
---|
| 3470 | + if (!bpf_capable()) { |
---|
2008 | 3471 | info.jited_prog_len = 0; |
---|
2009 | 3472 | info.xlated_prog_len = 0; |
---|
2010 | 3473 | info.nr_jited_ksyms = 0; |
---|
2011 | 3474 | info.nr_jited_func_lens = 0; |
---|
| 3475 | + info.nr_func_info = 0; |
---|
| 3476 | + info.nr_line_info = 0; |
---|
| 3477 | + info.nr_jited_line_info = 0; |
---|
2012 | 3478 | goto done; |
---|
2013 | 3479 | } |
---|
2014 | 3480 | |
---|
.. | .. |
---|
2089 | 3555 | } |
---|
2090 | 3556 | |
---|
2091 | 3557 | ulen = info.nr_jited_ksyms; |
---|
2092 | | - info.nr_jited_ksyms = prog->aux->func_cnt; |
---|
2093 | | - if (info.nr_jited_ksyms && ulen) { |
---|
| 3558 | + info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; |
---|
| 3559 | + if (ulen) { |
---|
2094 | 3560 | if (bpf_dump_raw_ok(file->f_cred)) { |
---|
| 3561 | + unsigned long ksym_addr; |
---|
2095 | 3562 | u64 __user *user_ksyms; |
---|
2096 | | - ulong ksym_addr; |
---|
2097 | 3563 | u32 i; |
---|
2098 | 3564 | |
---|
2099 | 3565 | /* copy the address of the kernel symbol |
---|
.. | .. |
---|
2101 | 3567 | */ |
---|
2102 | 3568 | ulen = min_t(u32, info.nr_jited_ksyms, ulen); |
---|
2103 | 3569 | user_ksyms = u64_to_user_ptr(info.jited_ksyms); |
---|
2104 | | - for (i = 0; i < ulen; i++) { |
---|
2105 | | - ksym_addr = (ulong) prog->aux->func[i]->bpf_func; |
---|
2106 | | - ksym_addr &= PAGE_MASK; |
---|
2107 | | - if (put_user((u64) ksym_addr, &user_ksyms[i])) |
---|
| 3570 | + if (prog->aux->func_cnt) { |
---|
| 3571 | + for (i = 0; i < ulen; i++) { |
---|
| 3572 | + ksym_addr = (unsigned long) |
---|
| 3573 | + prog->aux->func[i]->bpf_func; |
---|
| 3574 | + if (put_user((u64) ksym_addr, |
---|
| 3575 | + &user_ksyms[i])) |
---|
| 3576 | + return -EFAULT; |
---|
| 3577 | + } |
---|
| 3578 | + } else { |
---|
| 3579 | + ksym_addr = (unsigned long) prog->bpf_func; |
---|
| 3580 | + if (put_user((u64) ksym_addr, &user_ksyms[0])) |
---|
2108 | 3581 | return -EFAULT; |
---|
2109 | 3582 | } |
---|
2110 | 3583 | } else { |
---|
.. | .. |
---|
2113 | 3586 | } |
---|
2114 | 3587 | |
---|
2115 | 3588 | ulen = info.nr_jited_func_lens; |
---|
2116 | | - info.nr_jited_func_lens = prog->aux->func_cnt; |
---|
2117 | | - if (info.nr_jited_func_lens && ulen) { |
---|
| 3589 | + info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; |
---|
| 3590 | + if (ulen) { |
---|
2118 | 3591 | if (bpf_dump_raw_ok(file->f_cred)) { |
---|
2119 | 3592 | u32 __user *user_lens; |
---|
2120 | 3593 | u32 func_len, i; |
---|
.. | .. |
---|
2122 | 3595 | /* copy the JITed image lengths for each function */ |
---|
2123 | 3596 | ulen = min_t(u32, info.nr_jited_func_lens, ulen); |
---|
2124 | 3597 | user_lens = u64_to_user_ptr(info.jited_func_lens); |
---|
2125 | | - for (i = 0; i < ulen; i++) { |
---|
2126 | | - func_len = prog->aux->func[i]->jited_len; |
---|
2127 | | - if (put_user(func_len, &user_lens[i])) |
---|
| 3598 | + if (prog->aux->func_cnt) { |
---|
| 3599 | + for (i = 0; i < ulen; i++) { |
---|
| 3600 | + func_len = |
---|
| 3601 | + prog->aux->func[i]->jited_len; |
---|
| 3602 | + if (put_user(func_len, &user_lens[i])) |
---|
| 3603 | + return -EFAULT; |
---|
| 3604 | + } |
---|
| 3605 | + } else { |
---|
| 3606 | + func_len = prog->jited_len; |
---|
| 3607 | + if (put_user(func_len, &user_lens[0])) |
---|
2128 | 3608 | return -EFAULT; |
---|
2129 | 3609 | } |
---|
2130 | 3610 | } else { |
---|
2131 | 3611 | info.jited_func_lens = 0; |
---|
| 3612 | + } |
---|
| 3613 | + } |
---|
| 3614 | + |
---|
| 3615 | + if (prog->aux->btf) |
---|
| 3616 | + info.btf_id = btf_id(prog->aux->btf); |
---|
| 3617 | + |
---|
| 3618 | + ulen = info.nr_func_info; |
---|
| 3619 | + info.nr_func_info = prog->aux->func_info_cnt; |
---|
| 3620 | + if (info.nr_func_info && ulen) { |
---|
| 3621 | + char __user *user_finfo; |
---|
| 3622 | + |
---|
| 3623 | + user_finfo = u64_to_user_ptr(info.func_info); |
---|
| 3624 | + ulen = min_t(u32, info.nr_func_info, ulen); |
---|
| 3625 | + if (copy_to_user(user_finfo, prog->aux->func_info, |
---|
| 3626 | + info.func_info_rec_size * ulen)) |
---|
| 3627 | + return -EFAULT; |
---|
| 3628 | + } |
---|
| 3629 | + |
---|
| 3630 | + ulen = info.nr_line_info; |
---|
| 3631 | + info.nr_line_info = prog->aux->nr_linfo; |
---|
| 3632 | + if (info.nr_line_info && ulen) { |
---|
| 3633 | + __u8 __user *user_linfo; |
---|
| 3634 | + |
---|
| 3635 | + user_linfo = u64_to_user_ptr(info.line_info); |
---|
| 3636 | + ulen = min_t(u32, info.nr_line_info, ulen); |
---|
| 3637 | + if (copy_to_user(user_linfo, prog->aux->linfo, |
---|
| 3638 | + info.line_info_rec_size * ulen)) |
---|
| 3639 | + return -EFAULT; |
---|
| 3640 | + } |
---|
| 3641 | + |
---|
| 3642 | + ulen = info.nr_jited_line_info; |
---|
| 3643 | + if (prog->aux->jited_linfo) |
---|
| 3644 | + info.nr_jited_line_info = prog->aux->nr_linfo; |
---|
| 3645 | + else |
---|
| 3646 | + info.nr_jited_line_info = 0; |
---|
| 3647 | + if (info.nr_jited_line_info && ulen) { |
---|
| 3648 | + if (bpf_dump_raw_ok(file->f_cred)) { |
---|
| 3649 | + __u64 __user *user_linfo; |
---|
| 3650 | + u32 i; |
---|
| 3651 | + |
---|
| 3652 | + user_linfo = u64_to_user_ptr(info.jited_line_info); |
---|
| 3653 | + ulen = min_t(u32, info.nr_jited_line_info, ulen); |
---|
| 3654 | + for (i = 0; i < ulen; i++) { |
---|
| 3655 | + if (put_user((__u64)(long)prog->aux->jited_linfo[i], |
---|
| 3656 | + &user_linfo[i])) |
---|
| 3657 | + return -EFAULT; |
---|
| 3658 | + } |
---|
| 3659 | + } else { |
---|
| 3660 | + info.jited_line_info = 0; |
---|
| 3661 | + } |
---|
| 3662 | + } |
---|
| 3663 | + |
---|
| 3664 | + ulen = info.nr_prog_tags; |
---|
| 3665 | + info.nr_prog_tags = prog->aux->func_cnt ? : 1; |
---|
| 3666 | + if (ulen) { |
---|
| 3667 | + __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; |
---|
| 3668 | + u32 i; |
---|
| 3669 | + |
---|
| 3670 | + user_prog_tags = u64_to_user_ptr(info.prog_tags); |
---|
| 3671 | + ulen = min_t(u32, info.nr_prog_tags, ulen); |
---|
| 3672 | + if (prog->aux->func_cnt) { |
---|
| 3673 | + for (i = 0; i < ulen; i++) { |
---|
| 3674 | + if (copy_to_user(user_prog_tags[i], |
---|
| 3675 | + prog->aux->func[i]->tag, |
---|
| 3676 | + BPF_TAG_SIZE)) |
---|
| 3677 | + return -EFAULT; |
---|
| 3678 | + } |
---|
| 3679 | + } else { |
---|
| 3680 | + if (copy_to_user(user_prog_tags[0], |
---|
| 3681 | + prog->tag, BPF_TAG_SIZE)) |
---|
| 3682 | + return -EFAULT; |
---|
2132 | 3683 | } |
---|
2133 | 3684 | } |
---|
2134 | 3685 | |
---|
.. | .. |
---|
2169 | 3720 | info.btf_key_type_id = map->btf_key_type_id; |
---|
2170 | 3721 | info.btf_value_type_id = map->btf_value_type_id; |
---|
2171 | 3722 | } |
---|
| 3723 | + info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; |
---|
2172 | 3724 | |
---|
2173 | 3725 | if (bpf_map_is_dev_bound(map)) { |
---|
2174 | 3726 | err = bpf_map_offload_info_fill(&info, map); |
---|
.. | .. |
---|
2199 | 3751 | return btf_get_info_by_fd(btf, attr, uattr); |
---|
2200 | 3752 | } |
---|
2201 | 3753 | |
---|
| 3754 | +static int bpf_link_get_info_by_fd(struct file *file, |
---|
| 3755 | + struct bpf_link *link, |
---|
| 3756 | + const union bpf_attr *attr, |
---|
| 3757 | + union bpf_attr __user *uattr) |
---|
| 3758 | +{ |
---|
| 3759 | + struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); |
---|
| 3760 | + struct bpf_link_info info; |
---|
| 3761 | + u32 info_len = attr->info.info_len; |
---|
| 3762 | + int err; |
---|
| 3763 | + |
---|
| 3764 | + err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); |
---|
| 3765 | + if (err) |
---|
| 3766 | + return err; |
---|
| 3767 | + info_len = min_t(u32, sizeof(info), info_len); |
---|
| 3768 | + |
---|
| 3769 | + memset(&info, 0, sizeof(info)); |
---|
| 3770 | + if (copy_from_user(&info, uinfo, info_len)) |
---|
| 3771 | + return -EFAULT; |
---|
| 3772 | + |
---|
| 3773 | + info.type = link->type; |
---|
| 3774 | + info.id = link->id; |
---|
| 3775 | + info.prog_id = link->prog->aux->id; |
---|
| 3776 | + |
---|
| 3777 | + if (link->ops->fill_link_info) { |
---|
| 3778 | + err = link->ops->fill_link_info(link, &info); |
---|
| 3779 | + if (err) |
---|
| 3780 | + return err; |
---|
| 3781 | + } |
---|
| 3782 | + |
---|
| 3783 | + if (copy_to_user(uinfo, &info, info_len) || |
---|
| 3784 | + put_user(info_len, &uattr->info.info_len)) |
---|
| 3785 | + return -EFAULT; |
---|
| 3786 | + |
---|
| 3787 | + return 0; |
---|
| 3788 | +} |
---|
| 3789 | + |
---|
| 3790 | + |
---|
2202 | 3791 | #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info |
---|
2203 | 3792 | |
---|
2204 | 3793 | static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, |
---|
.. | .. |
---|
2223 | 3812 | uattr); |
---|
2224 | 3813 | else if (f.file->f_op == &btf_fops) |
---|
2225 | 3814 | err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); |
---|
| 3815 | + else if (f.file->f_op == &bpf_link_fops) |
---|
| 3816 | + err = bpf_link_get_info_by_fd(f.file, f.file->private_data, |
---|
| 3817 | + attr, uattr); |
---|
2226 | 3818 | else |
---|
2227 | 3819 | err = -EINVAL; |
---|
2228 | 3820 | |
---|
.. | .. |
---|
2237 | 3829 | if (CHECK_ATTR(BPF_BTF_LOAD)) |
---|
2238 | 3830 | return -EINVAL; |
---|
2239 | 3831 | |
---|
2240 | | - if (!capable(CAP_SYS_ADMIN)) |
---|
| 3832 | + if (!bpf_capable()) |
---|
2241 | 3833 | return -EPERM; |
---|
2242 | 3834 | |
---|
2243 | 3835 | return btf_new_fd(attr); |
---|
.. | .. |
---|
2325 | 3917 | if (attr->task_fd_query.flags != 0) |
---|
2326 | 3918 | return -EINVAL; |
---|
2327 | 3919 | |
---|
| 3920 | + rcu_read_lock(); |
---|
2328 | 3921 | task = get_pid_task(find_vpid(pid), PIDTYPE_PID); |
---|
| 3922 | + rcu_read_unlock(); |
---|
2329 | 3923 | if (!task) |
---|
2330 | 3924 | return -ENOENT; |
---|
2331 | 3925 | |
---|
.. | .. |
---|
2347 | 3941 | if (err) |
---|
2348 | 3942 | goto out; |
---|
2349 | 3943 | |
---|
2350 | | - if (file->f_op == &bpf_raw_tp_fops) { |
---|
2351 | | - struct bpf_raw_tracepoint *raw_tp = file->private_data; |
---|
2352 | | - struct bpf_raw_event_map *btp = raw_tp->btp; |
---|
| 3944 | + if (file->f_op == &bpf_link_fops) { |
---|
| 3945 | + struct bpf_link *link = file->private_data; |
---|
2353 | 3946 | |
---|
2354 | | - err = bpf_task_fd_query_copy(attr, uattr, |
---|
2355 | | - raw_tp->prog->aux->id, |
---|
2356 | | - BPF_FD_TYPE_RAW_TRACEPOINT, |
---|
2357 | | - btp->tp->name, 0, 0); |
---|
2358 | | - goto put_file; |
---|
| 3947 | + if (link->ops == &bpf_raw_tp_link_lops) { |
---|
| 3948 | + struct bpf_raw_tp_link *raw_tp = |
---|
| 3949 | + container_of(link, struct bpf_raw_tp_link, link); |
---|
| 3950 | + struct bpf_raw_event_map *btp = raw_tp->btp; |
---|
| 3951 | + |
---|
| 3952 | + err = bpf_task_fd_query_copy(attr, uattr, |
---|
| 3953 | + raw_tp->link.prog->aux->id, |
---|
| 3954 | + BPF_FD_TYPE_RAW_TRACEPOINT, |
---|
| 3955 | + btp->tp->name, 0, 0); |
---|
| 3956 | + goto put_file; |
---|
| 3957 | + } |
---|
| 3958 | + goto out_not_supp; |
---|
2359 | 3959 | } |
---|
2360 | 3960 | |
---|
2361 | 3961 | event = perf_get_event(file); |
---|
.. | .. |
---|
2375 | 3975 | goto put_file; |
---|
2376 | 3976 | } |
---|
2377 | 3977 | |
---|
| 3978 | +out_not_supp: |
---|
2378 | 3979 | err = -ENOTSUPP; |
---|
2379 | 3980 | put_file: |
---|
2380 | 3981 | fput(file); |
---|
.. | .. |
---|
2382 | 3983 | return err; |
---|
2383 | 3984 | } |
---|
2384 | 3985 | |
---|
| 3986 | +#define BPF_MAP_BATCH_LAST_FIELD batch.flags |
---|
| 3987 | + |
---|
| 3988 | +#define BPF_DO_BATCH(fn) \ |
---|
| 3989 | + do { \ |
---|
| 3990 | + if (!fn) { \ |
---|
| 3991 | + err = -ENOTSUPP; \ |
---|
| 3992 | + goto err_put; \ |
---|
| 3993 | + } \ |
---|
| 3994 | + err = fn(map, attr, uattr); \ |
---|
| 3995 | + } while (0) |
---|
| 3996 | + |
---|
| 3997 | +static int bpf_map_do_batch(const union bpf_attr *attr, |
---|
| 3998 | + union bpf_attr __user *uattr, |
---|
| 3999 | + int cmd) |
---|
| 4000 | +{ |
---|
| 4001 | + bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || |
---|
| 4002 | + cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; |
---|
| 4003 | + bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; |
---|
| 4004 | + struct bpf_map *map; |
---|
| 4005 | + int err, ufd; |
---|
| 4006 | + struct fd f; |
---|
| 4007 | + |
---|
| 4008 | + if (CHECK_ATTR(BPF_MAP_BATCH)) |
---|
| 4009 | + return -EINVAL; |
---|
| 4010 | + |
---|
| 4011 | + ufd = attr->batch.map_fd; |
---|
| 4012 | + f = fdget(ufd); |
---|
| 4013 | + map = __bpf_map_get(f); |
---|
| 4014 | + if (IS_ERR(map)) |
---|
| 4015 | + return PTR_ERR(map); |
---|
| 4016 | + if (has_write) |
---|
| 4017 | + bpf_map_write_active_inc(map); |
---|
| 4018 | + if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { |
---|
| 4019 | + err = -EPERM; |
---|
| 4020 | + goto err_put; |
---|
| 4021 | + } |
---|
| 4022 | + if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { |
---|
| 4023 | + err = -EPERM; |
---|
| 4024 | + goto err_put; |
---|
| 4025 | + } |
---|
| 4026 | + |
---|
| 4027 | + if (cmd == BPF_MAP_LOOKUP_BATCH) |
---|
| 4028 | + BPF_DO_BATCH(map->ops->map_lookup_batch); |
---|
| 4029 | + else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) |
---|
| 4030 | + BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); |
---|
| 4031 | + else if (cmd == BPF_MAP_UPDATE_BATCH) |
---|
| 4032 | + BPF_DO_BATCH(map->ops->map_update_batch); |
---|
| 4033 | + else |
---|
| 4034 | + BPF_DO_BATCH(map->ops->map_delete_batch); |
---|
| 4035 | +err_put: |
---|
| 4036 | + if (has_write) |
---|
| 4037 | + bpf_map_write_active_dec(map); |
---|
| 4038 | + fdput(f); |
---|
| 4039 | + return err; |
---|
| 4040 | +} |
---|
| 4041 | + |
---|
| 4042 | +static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
---|
| 4043 | +{ |
---|
| 4044 | + if (attr->link_create.attach_type != prog->expected_attach_type) |
---|
| 4045 | + return -EINVAL; |
---|
| 4046 | + |
---|
| 4047 | + if (prog->expected_attach_type == BPF_TRACE_ITER) |
---|
| 4048 | + return bpf_iter_link_attach(attr, prog); |
---|
| 4049 | + else if (prog->type == BPF_PROG_TYPE_EXT) |
---|
| 4050 | + return bpf_tracing_prog_attach(prog, |
---|
| 4051 | + attr->link_create.target_fd, |
---|
| 4052 | + attr->link_create.target_btf_id); |
---|
| 4053 | + return -EINVAL; |
---|
| 4054 | +} |
---|
| 4055 | + |
---|
| 4056 | +#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len |
---|
| 4057 | +static int link_create(union bpf_attr *attr) |
---|
| 4058 | +{ |
---|
| 4059 | + enum bpf_prog_type ptype; |
---|
| 4060 | + struct bpf_prog *prog; |
---|
| 4061 | + int ret; |
---|
| 4062 | + |
---|
| 4063 | + if (CHECK_ATTR(BPF_LINK_CREATE)) |
---|
| 4064 | + return -EINVAL; |
---|
| 4065 | + |
---|
| 4066 | + prog = bpf_prog_get(attr->link_create.prog_fd); |
---|
| 4067 | + if (IS_ERR(prog)) |
---|
| 4068 | + return PTR_ERR(prog); |
---|
| 4069 | + |
---|
| 4070 | + ret = bpf_prog_attach_check_attach_type(prog, |
---|
| 4071 | + attr->link_create.attach_type); |
---|
| 4072 | + if (ret) |
---|
| 4073 | + goto out; |
---|
| 4074 | + |
---|
| 4075 | + if (prog->type == BPF_PROG_TYPE_EXT) { |
---|
| 4076 | + ret = tracing_bpf_link_attach(attr, prog); |
---|
| 4077 | + goto out; |
---|
| 4078 | + } |
---|
| 4079 | + |
---|
| 4080 | + ptype = attach_type_to_prog_type(attr->link_create.attach_type); |
---|
| 4081 | + if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { |
---|
| 4082 | + ret = -EINVAL; |
---|
| 4083 | + goto out; |
---|
| 4084 | + } |
---|
| 4085 | + |
---|
| 4086 | + switch (ptype) { |
---|
| 4087 | + case BPF_PROG_TYPE_CGROUP_SKB: |
---|
| 4088 | + case BPF_PROG_TYPE_CGROUP_SOCK: |
---|
| 4089 | + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
---|
| 4090 | + case BPF_PROG_TYPE_SOCK_OPS: |
---|
| 4091 | + case BPF_PROG_TYPE_CGROUP_DEVICE: |
---|
| 4092 | + case BPF_PROG_TYPE_CGROUP_SYSCTL: |
---|
| 4093 | + case BPF_PROG_TYPE_CGROUP_SOCKOPT: |
---|
| 4094 | + ret = cgroup_bpf_link_attach(attr, prog); |
---|
| 4095 | + break; |
---|
| 4096 | + case BPF_PROG_TYPE_TRACING: |
---|
| 4097 | + ret = tracing_bpf_link_attach(attr, prog); |
---|
| 4098 | + break; |
---|
| 4099 | + case BPF_PROG_TYPE_FLOW_DISSECTOR: |
---|
| 4100 | + case BPF_PROG_TYPE_SK_LOOKUP: |
---|
| 4101 | + ret = netns_bpf_link_create(attr, prog); |
---|
| 4102 | + break; |
---|
| 4103 | +#ifdef CONFIG_NET |
---|
| 4104 | + case BPF_PROG_TYPE_XDP: |
---|
| 4105 | + ret = bpf_xdp_link_attach(attr, prog); |
---|
| 4106 | + break; |
---|
| 4107 | +#endif |
---|
| 4108 | + default: |
---|
| 4109 | + ret = -EINVAL; |
---|
| 4110 | + } |
---|
| 4111 | + |
---|
| 4112 | +out: |
---|
| 4113 | + if (ret < 0) |
---|
| 4114 | + bpf_prog_put(prog); |
---|
| 4115 | + return ret; |
---|
| 4116 | +} |
---|
| 4117 | + |
---|
| 4118 | +#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd |
---|
| 4119 | + |
---|
| 4120 | +static int link_update(union bpf_attr *attr) |
---|
| 4121 | +{ |
---|
| 4122 | + struct bpf_prog *old_prog = NULL, *new_prog; |
---|
| 4123 | + struct bpf_link *link; |
---|
| 4124 | + u32 flags; |
---|
| 4125 | + int ret; |
---|
| 4126 | + |
---|
| 4127 | + if (CHECK_ATTR(BPF_LINK_UPDATE)) |
---|
| 4128 | + return -EINVAL; |
---|
| 4129 | + |
---|
| 4130 | + flags = attr->link_update.flags; |
---|
| 4131 | + if (flags & ~BPF_F_REPLACE) |
---|
| 4132 | + return -EINVAL; |
---|
| 4133 | + |
---|
| 4134 | + link = bpf_link_get_from_fd(attr->link_update.link_fd); |
---|
| 4135 | + if (IS_ERR(link)) |
---|
| 4136 | + return PTR_ERR(link); |
---|
| 4137 | + |
---|
| 4138 | + new_prog = bpf_prog_get(attr->link_update.new_prog_fd); |
---|
| 4139 | + if (IS_ERR(new_prog)) { |
---|
| 4140 | + ret = PTR_ERR(new_prog); |
---|
| 4141 | + goto out_put_link; |
---|
| 4142 | + } |
---|
| 4143 | + |
---|
| 4144 | + if (flags & BPF_F_REPLACE) { |
---|
| 4145 | + old_prog = bpf_prog_get(attr->link_update.old_prog_fd); |
---|
| 4146 | + if (IS_ERR(old_prog)) { |
---|
| 4147 | + ret = PTR_ERR(old_prog); |
---|
| 4148 | + old_prog = NULL; |
---|
| 4149 | + goto out_put_progs; |
---|
| 4150 | + } |
---|
| 4151 | + } else if (attr->link_update.old_prog_fd) { |
---|
| 4152 | + ret = -EINVAL; |
---|
| 4153 | + goto out_put_progs; |
---|
| 4154 | + } |
---|
| 4155 | + |
---|
| 4156 | + if (link->ops->update_prog) |
---|
| 4157 | + ret = link->ops->update_prog(link, new_prog, old_prog); |
---|
| 4158 | + else |
---|
| 4159 | + ret = -EINVAL; |
---|
| 4160 | + |
---|
| 4161 | +out_put_progs: |
---|
| 4162 | + if (old_prog) |
---|
| 4163 | + bpf_prog_put(old_prog); |
---|
| 4164 | + if (ret) |
---|
| 4165 | + bpf_prog_put(new_prog); |
---|
| 4166 | +out_put_link: |
---|
| 4167 | + bpf_link_put(link); |
---|
| 4168 | + return ret; |
---|
| 4169 | +} |
---|
| 4170 | + |
---|
| 4171 | +#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd |
---|
| 4172 | + |
---|
| 4173 | +static int link_detach(union bpf_attr *attr) |
---|
| 4174 | +{ |
---|
| 4175 | + struct bpf_link *link; |
---|
| 4176 | + int ret; |
---|
| 4177 | + |
---|
| 4178 | + if (CHECK_ATTR(BPF_LINK_DETACH)) |
---|
| 4179 | + return -EINVAL; |
---|
| 4180 | + |
---|
| 4181 | + link = bpf_link_get_from_fd(attr->link_detach.link_fd); |
---|
| 4182 | + if (IS_ERR(link)) |
---|
| 4183 | + return PTR_ERR(link); |
---|
| 4184 | + |
---|
| 4185 | + if (link->ops->detach) |
---|
| 4186 | + ret = link->ops->detach(link); |
---|
| 4187 | + else |
---|
| 4188 | + ret = -EOPNOTSUPP; |
---|
| 4189 | + |
---|
| 4190 | + bpf_link_put(link); |
---|
| 4191 | + return ret; |
---|
| 4192 | +} |
---|
| 4193 | + |
---|
| 4194 | +static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) |
---|
| 4195 | +{ |
---|
| 4196 | + return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); |
---|
| 4197 | +} |
---|
| 4198 | + |
---|
| 4199 | +struct bpf_link *bpf_link_by_id(u32 id) |
---|
| 4200 | +{ |
---|
| 4201 | + struct bpf_link *link; |
---|
| 4202 | + |
---|
| 4203 | + if (!id) |
---|
| 4204 | + return ERR_PTR(-ENOENT); |
---|
| 4205 | + |
---|
| 4206 | + spin_lock_bh(&link_idr_lock); |
---|
| 4207 | + /* before link is "settled", ID is 0, pretend it doesn't exist yet */ |
---|
| 4208 | + link = idr_find(&link_idr, id); |
---|
| 4209 | + if (link) { |
---|
| 4210 | + if (link->id) |
---|
| 4211 | + link = bpf_link_inc_not_zero(link); |
---|
| 4212 | + else |
---|
| 4213 | + link = ERR_PTR(-EAGAIN); |
---|
| 4214 | + } else { |
---|
| 4215 | + link = ERR_PTR(-ENOENT); |
---|
| 4216 | + } |
---|
| 4217 | + spin_unlock_bh(&link_idr_lock); |
---|
| 4218 | + return link; |
---|
| 4219 | +} |
---|
| 4220 | + |
---|
| 4221 | +#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id |
---|
| 4222 | + |
---|
| 4223 | +static int bpf_link_get_fd_by_id(const union bpf_attr *attr) |
---|
| 4224 | +{ |
---|
| 4225 | + struct bpf_link *link; |
---|
| 4226 | + u32 id = attr->link_id; |
---|
| 4227 | + int fd; |
---|
| 4228 | + |
---|
| 4229 | + if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) |
---|
| 4230 | + return -EINVAL; |
---|
| 4231 | + |
---|
| 4232 | + if (!capable(CAP_SYS_ADMIN)) |
---|
| 4233 | + return -EPERM; |
---|
| 4234 | + |
---|
| 4235 | + link = bpf_link_by_id(id); |
---|
| 4236 | + if (IS_ERR(link)) |
---|
| 4237 | + return PTR_ERR(link); |
---|
| 4238 | + |
---|
| 4239 | + fd = bpf_link_new_fd(link); |
---|
| 4240 | + if (fd < 0) |
---|
| 4241 | + bpf_link_put(link); |
---|
| 4242 | + |
---|
| 4243 | + return fd; |
---|
| 4244 | +} |
---|
| 4245 | + |
---|
| 4246 | +DEFINE_MUTEX(bpf_stats_enabled_mutex); |
---|
| 4247 | + |
---|
| 4248 | +static int bpf_stats_release(struct inode *inode, struct file *file) |
---|
| 4249 | +{ |
---|
| 4250 | + mutex_lock(&bpf_stats_enabled_mutex); |
---|
| 4251 | + static_key_slow_dec(&bpf_stats_enabled_key.key); |
---|
| 4252 | + mutex_unlock(&bpf_stats_enabled_mutex); |
---|
| 4253 | + return 0; |
---|
| 4254 | +} |
---|
| 4255 | + |
---|
| 4256 | +static const struct file_operations bpf_stats_fops = { |
---|
| 4257 | + .release = bpf_stats_release, |
---|
| 4258 | +}; |
---|
| 4259 | + |
---|
| 4260 | +static int bpf_enable_runtime_stats(void) |
---|
| 4261 | +{ |
---|
| 4262 | + int fd; |
---|
| 4263 | + |
---|
| 4264 | + mutex_lock(&bpf_stats_enabled_mutex); |
---|
| 4265 | + |
---|
| 4266 | + /* Set a very high limit to avoid overflow */ |
---|
| 4267 | + if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { |
---|
| 4268 | + mutex_unlock(&bpf_stats_enabled_mutex); |
---|
| 4269 | + return -EBUSY; |
---|
| 4270 | + } |
---|
| 4271 | + |
---|
| 4272 | + fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); |
---|
| 4273 | + if (fd >= 0) |
---|
| 4274 | + static_key_slow_inc(&bpf_stats_enabled_key.key); |
---|
| 4275 | + |
---|
| 4276 | + mutex_unlock(&bpf_stats_enabled_mutex); |
---|
| 4277 | + return fd; |
---|
| 4278 | +} |
---|
| 4279 | + |
---|
| 4280 | +#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type |
---|
| 4281 | + |
---|
| 4282 | +static int bpf_enable_stats(union bpf_attr *attr) |
---|
| 4283 | +{ |
---|
| 4284 | + |
---|
| 4285 | + if (CHECK_ATTR(BPF_ENABLE_STATS)) |
---|
| 4286 | + return -EINVAL; |
---|
| 4287 | + |
---|
| 4288 | + if (!capable(CAP_SYS_ADMIN)) |
---|
| 4289 | + return -EPERM; |
---|
| 4290 | + |
---|
| 4291 | + switch (attr->enable_stats.type) { |
---|
| 4292 | + case BPF_STATS_RUN_TIME: |
---|
| 4293 | + return bpf_enable_runtime_stats(); |
---|
| 4294 | + default: |
---|
| 4295 | + break; |
---|
| 4296 | + } |
---|
| 4297 | + return -EINVAL; |
---|
| 4298 | +} |
---|
| 4299 | + |
---|
| 4300 | +#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags |
---|
| 4301 | + |
---|
| 4302 | +static int bpf_iter_create(union bpf_attr *attr) |
---|
| 4303 | +{ |
---|
| 4304 | + struct bpf_link *link; |
---|
| 4305 | + int err; |
---|
| 4306 | + |
---|
| 4307 | + if (CHECK_ATTR(BPF_ITER_CREATE)) |
---|
| 4308 | + return -EINVAL; |
---|
| 4309 | + |
---|
| 4310 | + if (attr->iter_create.flags) |
---|
| 4311 | + return -EINVAL; |
---|
| 4312 | + |
---|
| 4313 | + link = bpf_link_get_from_fd(attr->iter_create.link_fd); |
---|
| 4314 | + if (IS_ERR(link)) |
---|
| 4315 | + return PTR_ERR(link); |
---|
| 4316 | + |
---|
| 4317 | + err = bpf_iter_new_fd(link); |
---|
| 4318 | + bpf_link_put(link); |
---|
| 4319 | + |
---|
| 4320 | + return err; |
---|
| 4321 | +} |
---|
| 4322 | + |
---|
| 4323 | +#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags |
---|
| 4324 | + |
---|
| 4325 | +static int bpf_prog_bind_map(union bpf_attr *attr) |
---|
| 4326 | +{ |
---|
| 4327 | + struct bpf_prog *prog; |
---|
| 4328 | + struct bpf_map *map; |
---|
| 4329 | + struct bpf_map **used_maps_old, **used_maps_new; |
---|
| 4330 | + int i, ret = 0; |
---|
| 4331 | + |
---|
| 4332 | + if (CHECK_ATTR(BPF_PROG_BIND_MAP)) |
---|
| 4333 | + return -EINVAL; |
---|
| 4334 | + |
---|
| 4335 | + if (attr->prog_bind_map.flags) |
---|
| 4336 | + return -EINVAL; |
---|
| 4337 | + |
---|
| 4338 | + prog = bpf_prog_get(attr->prog_bind_map.prog_fd); |
---|
| 4339 | + if (IS_ERR(prog)) |
---|
| 4340 | + return PTR_ERR(prog); |
---|
| 4341 | + |
---|
| 4342 | + map = bpf_map_get(attr->prog_bind_map.map_fd); |
---|
| 4343 | + if (IS_ERR(map)) { |
---|
| 4344 | + ret = PTR_ERR(map); |
---|
| 4345 | + goto out_prog_put; |
---|
| 4346 | + } |
---|
| 4347 | + |
---|
| 4348 | + mutex_lock(&prog->aux->used_maps_mutex); |
---|
| 4349 | + |
---|
| 4350 | + used_maps_old = prog->aux->used_maps; |
---|
| 4351 | + |
---|
| 4352 | + for (i = 0; i < prog->aux->used_map_cnt; i++) |
---|
| 4353 | + if (used_maps_old[i] == map) { |
---|
| 4354 | + bpf_map_put(map); |
---|
| 4355 | + goto out_unlock; |
---|
| 4356 | + } |
---|
| 4357 | + |
---|
| 4358 | + used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, |
---|
| 4359 | + sizeof(used_maps_new[0]), |
---|
| 4360 | + GFP_KERNEL); |
---|
| 4361 | + if (!used_maps_new) { |
---|
| 4362 | + ret = -ENOMEM; |
---|
| 4363 | + goto out_unlock; |
---|
| 4364 | + } |
---|
| 4365 | + |
---|
| 4366 | + memcpy(used_maps_new, used_maps_old, |
---|
| 4367 | + sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); |
---|
| 4368 | + used_maps_new[prog->aux->used_map_cnt] = map; |
---|
| 4369 | + |
---|
| 4370 | + prog->aux->used_map_cnt++; |
---|
| 4371 | + prog->aux->used_maps = used_maps_new; |
---|
| 4372 | + |
---|
| 4373 | + kfree(used_maps_old); |
---|
| 4374 | + |
---|
| 4375 | +out_unlock: |
---|
| 4376 | + mutex_unlock(&prog->aux->used_maps_mutex); |
---|
| 4377 | + |
---|
| 4378 | + if (ret) |
---|
| 4379 | + bpf_map_put(map); |
---|
| 4380 | +out_prog_put: |
---|
| 4381 | + bpf_prog_put(prog); |
---|
| 4382 | + return ret; |
---|
| 4383 | +} |
---|
| 4384 | + |
---|
2385 | 4385 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
---|
2386 | 4386 | { |
---|
2387 | 4387 | union bpf_attr attr; |
---|
2388 | 4388 | int err; |
---|
2389 | 4389 | |
---|
2390 | | - if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) |
---|
| 4390 | + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) |
---|
2391 | 4391 | return -EPERM; |
---|
2392 | 4392 | |
---|
2393 | 4393 | err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); |
---|
.. | .. |
---|
2399 | 4399 | memset(&attr, 0, sizeof(attr)); |
---|
2400 | 4400 | if (copy_from_user(&attr, uattr, size) != 0) |
---|
2401 | 4401 | return -EFAULT; |
---|
| 4402 | + |
---|
| 4403 | + trace_android_vh_check_bpf_syscall(cmd, &attr, size); |
---|
2402 | 4404 | |
---|
2403 | 4405 | err = security_bpf(cmd, &attr, size); |
---|
2404 | 4406 | if (err < 0) |
---|
.. | .. |
---|
2420 | 4422 | case BPF_MAP_GET_NEXT_KEY: |
---|
2421 | 4423 | err = map_get_next_key(&attr); |
---|
2422 | 4424 | break; |
---|
| 4425 | + case BPF_MAP_FREEZE: |
---|
| 4426 | + err = map_freeze(&attr); |
---|
| 4427 | + break; |
---|
2423 | 4428 | case BPF_PROG_LOAD: |
---|
2424 | | - err = bpf_prog_load(&attr); |
---|
| 4429 | + err = bpf_prog_load(&attr, uattr); |
---|
2425 | 4430 | break; |
---|
2426 | 4431 | case BPF_OBJ_PIN: |
---|
2427 | 4432 | err = bpf_obj_pin(&attr); |
---|
.. | .. |
---|
2449 | 4454 | err = bpf_obj_get_next_id(&attr, uattr, |
---|
2450 | 4455 | &map_idr, &map_idr_lock); |
---|
2451 | 4456 | break; |
---|
| 4457 | + case BPF_BTF_GET_NEXT_ID: |
---|
| 4458 | + err = bpf_obj_get_next_id(&attr, uattr, |
---|
| 4459 | + &btf_idr, &btf_idr_lock); |
---|
| 4460 | + break; |
---|
2452 | 4461 | case BPF_PROG_GET_FD_BY_ID: |
---|
2453 | 4462 | err = bpf_prog_get_fd_by_id(&attr); |
---|
2454 | 4463 | break; |
---|
.. | .. |
---|
2470 | 4479 | case BPF_TASK_FD_QUERY: |
---|
2471 | 4480 | err = bpf_task_fd_query(&attr, uattr); |
---|
2472 | 4481 | break; |
---|
| 4482 | + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: |
---|
| 4483 | + err = map_lookup_and_delete_elem(&attr); |
---|
| 4484 | + break; |
---|
| 4485 | + case BPF_MAP_LOOKUP_BATCH: |
---|
| 4486 | + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH); |
---|
| 4487 | + break; |
---|
| 4488 | + case BPF_MAP_LOOKUP_AND_DELETE_BATCH: |
---|
| 4489 | + err = bpf_map_do_batch(&attr, uattr, |
---|
| 4490 | + BPF_MAP_LOOKUP_AND_DELETE_BATCH); |
---|
| 4491 | + break; |
---|
| 4492 | + case BPF_MAP_UPDATE_BATCH: |
---|
| 4493 | + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH); |
---|
| 4494 | + break; |
---|
| 4495 | + case BPF_MAP_DELETE_BATCH: |
---|
| 4496 | + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); |
---|
| 4497 | + break; |
---|
| 4498 | + case BPF_LINK_CREATE: |
---|
| 4499 | + err = link_create(&attr); |
---|
| 4500 | + break; |
---|
| 4501 | + case BPF_LINK_UPDATE: |
---|
| 4502 | + err = link_update(&attr); |
---|
| 4503 | + break; |
---|
| 4504 | + case BPF_LINK_GET_FD_BY_ID: |
---|
| 4505 | + err = bpf_link_get_fd_by_id(&attr); |
---|
| 4506 | + break; |
---|
| 4507 | + case BPF_LINK_GET_NEXT_ID: |
---|
| 4508 | + err = bpf_obj_get_next_id(&attr, uattr, |
---|
| 4509 | + &link_idr, &link_idr_lock); |
---|
| 4510 | + break; |
---|
| 4511 | + case BPF_ENABLE_STATS: |
---|
| 4512 | + err = bpf_enable_stats(&attr); |
---|
| 4513 | + break; |
---|
| 4514 | + case BPF_ITER_CREATE: |
---|
| 4515 | + err = bpf_iter_create(&attr); |
---|
| 4516 | + break; |
---|
| 4517 | + case BPF_LINK_DETACH: |
---|
| 4518 | + err = link_detach(&attr); |
---|
| 4519 | + break; |
---|
| 4520 | + case BPF_PROG_BIND_MAP: |
---|
| 4521 | + err = bpf_prog_bind_map(&attr); |
---|
| 4522 | + break; |
---|
2473 | 4523 | default: |
---|
2474 | 4524 | err = -EINVAL; |
---|
2475 | 4525 | break; |
---|