.. | .. |
---|
7 | 7 | #include <linux/slab.h> |
---|
8 | 8 | #include <linux/bpf.h> |
---|
9 | 9 | #include <linux/bpf_perf_event.h> |
---|
| 10 | +#include <linux/btf.h> |
---|
10 | 11 | #include <linux/filter.h> |
---|
11 | 12 | #include <linux/uaccess.h> |
---|
12 | 13 | #include <linux/ctype.h> |
---|
13 | 14 | #include <linux/kprobes.h> |
---|
| 15 | +#include <linux/spinlock.h> |
---|
14 | 16 | #include <linux/syscalls.h> |
---|
15 | 17 | #include <linux/error-injection.h> |
---|
| 18 | +#include <linux/btf_ids.h> |
---|
| 19 | + |
---|
| 20 | +#include <uapi/linux/bpf.h> |
---|
| 21 | +#include <uapi/linux/btf.h> |
---|
| 22 | + |
---|
| 23 | +#include <asm/tlb.h> |
---|
16 | 24 | |
---|
17 | 25 | #include "trace_probe.h" |
---|
18 | 26 | #include "trace.h" |
---|
19 | 27 | |
---|
| 28 | +#define CREATE_TRACE_POINTS |
---|
| 29 | +#include "bpf_trace.h" |
---|
| 30 | + |
---|
| 31 | +#define bpf_event_rcu_dereference(p) \ |
---|
| 32 | + rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) |
---|
| 33 | + |
---|
| 34 | +#ifdef CONFIG_MODULES |
---|
| 35 | +struct bpf_trace_module { |
---|
| 36 | + struct module *module; |
---|
| 37 | + struct list_head list; |
---|
| 38 | +}; |
---|
| 39 | + |
---|
| 40 | +static LIST_HEAD(bpf_trace_modules); |
---|
| 41 | +static DEFINE_MUTEX(bpf_module_mutex); |
---|
| 42 | + |
---|
| 43 | +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) |
---|
| 44 | +{ |
---|
| 45 | + struct bpf_raw_event_map *btp, *ret = NULL; |
---|
| 46 | + struct bpf_trace_module *btm; |
---|
| 47 | + unsigned int i; |
---|
| 48 | + |
---|
| 49 | + mutex_lock(&bpf_module_mutex); |
---|
| 50 | + list_for_each_entry(btm, &bpf_trace_modules, list) { |
---|
| 51 | + for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { |
---|
| 52 | + btp = &btm->module->bpf_raw_events[i]; |
---|
| 53 | + if (!strcmp(btp->tp->name, name)) { |
---|
| 54 | + if (try_module_get(btm->module)) |
---|
| 55 | + ret = btp; |
---|
| 56 | + goto out; |
---|
| 57 | + } |
---|
| 58 | + } |
---|
| 59 | + } |
---|
| 60 | +out: |
---|
| 61 | + mutex_unlock(&bpf_module_mutex); |
---|
| 62 | + return ret; |
---|
| 63 | +} |
---|
| 64 | +#else |
---|
| 65 | +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) |
---|
| 66 | +{ |
---|
| 67 | + return NULL; |
---|
| 68 | +} |
---|
| 69 | +#endif /* CONFIG_MODULES */ |
---|
| 70 | + |
---|
20 | 71 | u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); |
---|
21 | 72 | u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); |
---|
| 73 | + |
---|
| 74 | +static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, |
---|
| 75 | + u64 flags, const struct btf **btf, |
---|
| 76 | + s32 *btf_id); |
---|
22 | 77 | |
---|
23 | 78 | /** |
---|
24 | 79 | * trace_call_bpf - invoke BPF program |
---|
.. | .. |
---|
38 | 93 | { |
---|
39 | 94 | unsigned int ret; |
---|
40 | 95 | |
---|
41 | | - if (in_nmi()) /* not supported yet */ |
---|
42 | | - return 1; |
---|
43 | | - |
---|
44 | | - preempt_disable(); |
---|
| 96 | + cant_sleep(); |
---|
45 | 97 | |
---|
46 | 98 | if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { |
---|
47 | 99 | /* |
---|
.. | .. |
---|
73 | 125 | |
---|
74 | 126 | out: |
---|
75 | 127 | __this_cpu_dec(bpf_prog_active); |
---|
76 | | - preempt_enable(); |
---|
77 | 128 | |
---|
78 | 129 | return ret; |
---|
79 | 130 | } |
---|
80 | | -EXPORT_SYMBOL_GPL(trace_call_bpf); |
---|
81 | 131 | |
---|
82 | 132 | #ifdef CONFIG_BPF_KPROBE_OVERRIDE |
---|
83 | 133 | BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) |
---|
.. | .. |
---|
96 | 146 | }; |
---|
97 | 147 | #endif |
---|
98 | 148 | |
---|
99 | | -BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) |
---|
| 149 | +static __always_inline int |
---|
| 150 | +bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) |
---|
100 | 151 | { |
---|
101 | 152 | int ret; |
---|
102 | 153 | |
---|
103 | | - ret = probe_kernel_read(dst, unsafe_ptr, size); |
---|
| 154 | + ret = copy_from_user_nofault(dst, unsafe_ptr, size); |
---|
104 | 155 | if (unlikely(ret < 0)) |
---|
105 | 156 | memset(dst, 0, size); |
---|
106 | | - |
---|
107 | 157 | return ret; |
---|
108 | 158 | } |
---|
109 | 159 | |
---|
110 | | -static const struct bpf_func_proto bpf_probe_read_proto = { |
---|
111 | | - .func = bpf_probe_read, |
---|
| 160 | +BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size, |
---|
| 161 | + const void __user *, unsafe_ptr) |
---|
| 162 | +{ |
---|
| 163 | + return bpf_probe_read_user_common(dst, size, unsafe_ptr); |
---|
| 164 | +} |
---|
| 165 | + |
---|
| 166 | +const struct bpf_func_proto bpf_probe_read_user_proto = { |
---|
| 167 | + .func = bpf_probe_read_user, |
---|
112 | 168 | .gpl_only = true, |
---|
113 | 169 | .ret_type = RET_INTEGER, |
---|
114 | 170 | .arg1_type = ARG_PTR_TO_UNINIT_MEM, |
---|
.. | .. |
---|
116 | 172 | .arg3_type = ARG_ANYTHING, |
---|
117 | 173 | }; |
---|
118 | 174 | |
---|
119 | | -BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, |
---|
| 175 | +static __always_inline int |
---|
| 176 | +bpf_probe_read_user_str_common(void *dst, u32 size, |
---|
| 177 | + const void __user *unsafe_ptr) |
---|
| 178 | +{ |
---|
| 179 | + int ret; |
---|
| 180 | + |
---|
| 181 | + /* |
---|
| 182 | + * NB: We rely on strncpy_from_user() not copying junk past the NUL |
---|
| 183 | + * terminator into `dst`. |
---|
| 184 | + * |
---|
| 185 | + * strncpy_from_user() does long-sized strides in the fast path. If the |
---|
| 186 | + * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, |
---|
| 187 | + * then there could be junk after the NUL in `dst`. If user takes `dst` |
---|
| 188 | + * and keys a hash map with it, then semantically identical strings can |
---|
| 189 | + * occupy multiple entries in the map. |
---|
| 190 | + */ |
---|
| 191 | + ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); |
---|
| 192 | + if (unlikely(ret < 0)) |
---|
| 193 | + memset(dst, 0, size); |
---|
| 194 | + return ret; |
---|
| 195 | +} |
---|
| 196 | + |
---|
| 197 | +BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size, |
---|
| 198 | + const void __user *, unsafe_ptr) |
---|
| 199 | +{ |
---|
| 200 | + return bpf_probe_read_user_str_common(dst, size, unsafe_ptr); |
---|
| 201 | +} |
---|
| 202 | + |
---|
| 203 | +const struct bpf_func_proto bpf_probe_read_user_str_proto = { |
---|
| 204 | + .func = bpf_probe_read_user_str, |
---|
| 205 | + .gpl_only = true, |
---|
| 206 | + .ret_type = RET_INTEGER, |
---|
| 207 | + .arg1_type = ARG_PTR_TO_UNINIT_MEM, |
---|
| 208 | + .arg2_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 209 | + .arg3_type = ARG_ANYTHING, |
---|
| 210 | +}; |
---|
| 211 | + |
---|
| 212 | +static __always_inline int |
---|
| 213 | +bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) |
---|
| 214 | +{ |
---|
| 215 | + int ret; |
---|
| 216 | + |
---|
| 217 | + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); |
---|
| 218 | + if (unlikely(ret < 0)) |
---|
| 219 | + memset(dst, 0, size); |
---|
| 220 | + return ret; |
---|
| 221 | +} |
---|
| 222 | + |
---|
| 223 | +BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, |
---|
| 224 | + const void *, unsafe_ptr) |
---|
| 225 | +{ |
---|
| 226 | + return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); |
---|
| 227 | +} |
---|
| 228 | + |
---|
| 229 | +const struct bpf_func_proto bpf_probe_read_kernel_proto = { |
---|
| 230 | + .func = bpf_probe_read_kernel, |
---|
| 231 | + .gpl_only = true, |
---|
| 232 | + .ret_type = RET_INTEGER, |
---|
| 233 | + .arg1_type = ARG_PTR_TO_UNINIT_MEM, |
---|
| 234 | + .arg2_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 235 | + .arg3_type = ARG_ANYTHING, |
---|
| 236 | +}; |
---|
| 237 | + |
---|
| 238 | +static __always_inline int |
---|
| 239 | +bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) |
---|
| 240 | +{ |
---|
| 241 | + int ret; |
---|
| 242 | + |
---|
| 243 | + /* |
---|
| 244 | + * The strncpy_from_kernel_nofault() call will likely not fill the |
---|
| 245 | + * entire buffer, but that's okay in this circumstance as we're probing |
---|
| 246 | + * arbitrary memory anyway similar to bpf_probe_read_*() and might |
---|
| 247 | + * as well probe the stack. Thus, memory is explicitly cleared |
---|
| 248 | + * only in error case, so that improper users ignoring return |
---|
| 249 | + * code altogether don't copy garbage; otherwise length of string |
---|
| 250 | + * is returned that can be used for bpf_perf_event_output() et al. |
---|
| 251 | + */ |
---|
| 252 | + ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size); |
---|
| 253 | + if (unlikely(ret < 0)) |
---|
| 254 | + memset(dst, 0, size); |
---|
| 255 | + return ret; |
---|
| 256 | +} |
---|
| 257 | + |
---|
| 258 | +BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size, |
---|
| 259 | + const void *, unsafe_ptr) |
---|
| 260 | +{ |
---|
| 261 | + return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); |
---|
| 262 | +} |
---|
| 263 | + |
---|
| 264 | +const struct bpf_func_proto bpf_probe_read_kernel_str_proto = { |
---|
| 265 | + .func = bpf_probe_read_kernel_str, |
---|
| 266 | + .gpl_only = true, |
---|
| 267 | + .ret_type = RET_INTEGER, |
---|
| 268 | + .arg1_type = ARG_PTR_TO_UNINIT_MEM, |
---|
| 269 | + .arg2_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 270 | + .arg3_type = ARG_ANYTHING, |
---|
| 271 | +}; |
---|
| 272 | + |
---|
| 273 | +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE |
---|
| 274 | +BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size, |
---|
| 275 | + const void *, unsafe_ptr) |
---|
| 276 | +{ |
---|
| 277 | + if ((unsigned long)unsafe_ptr < TASK_SIZE) { |
---|
| 278 | + return bpf_probe_read_user_common(dst, size, |
---|
| 279 | + (__force void __user *)unsafe_ptr); |
---|
| 280 | + } |
---|
| 281 | + return bpf_probe_read_kernel_common(dst, size, unsafe_ptr); |
---|
| 282 | +} |
---|
| 283 | + |
---|
| 284 | +static const struct bpf_func_proto bpf_probe_read_compat_proto = { |
---|
| 285 | + .func = bpf_probe_read_compat, |
---|
| 286 | + .gpl_only = true, |
---|
| 287 | + .ret_type = RET_INTEGER, |
---|
| 288 | + .arg1_type = ARG_PTR_TO_UNINIT_MEM, |
---|
| 289 | + .arg2_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 290 | + .arg3_type = ARG_ANYTHING, |
---|
| 291 | +}; |
---|
| 292 | + |
---|
| 293 | +BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size, |
---|
| 294 | + const void *, unsafe_ptr) |
---|
| 295 | +{ |
---|
| 296 | + if ((unsigned long)unsafe_ptr < TASK_SIZE) { |
---|
| 297 | + return bpf_probe_read_user_str_common(dst, size, |
---|
| 298 | + (__force void __user *)unsafe_ptr); |
---|
| 299 | + } |
---|
| 300 | + return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr); |
---|
| 301 | +} |
---|
| 302 | + |
---|
| 303 | +static const struct bpf_func_proto bpf_probe_read_compat_str_proto = { |
---|
| 304 | + .func = bpf_probe_read_compat_str, |
---|
| 305 | + .gpl_only = true, |
---|
| 306 | + .ret_type = RET_INTEGER, |
---|
| 307 | + .arg1_type = ARG_PTR_TO_UNINIT_MEM, |
---|
| 308 | + .arg2_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 309 | + .arg3_type = ARG_ANYTHING, |
---|
| 310 | +}; |
---|
| 311 | +#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ |
---|
| 312 | + |
---|
| 313 | +BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, |
---|
120 | 314 | u32, size) |
---|
121 | 315 | { |
---|
122 | 316 | /* |
---|
.. | .. |
---|
126 | 320 | * access_ok() should prevent writing to non-user memory, but in |
---|
127 | 321 | * some situations (nommu, temporary switch, etc) access_ok() does |
---|
128 | 322 | * not provide enough validation, hence the check on KERNEL_DS. |
---|
| 323 | + * |
---|
| 324 | + * nmi_uaccess_okay() ensures the probe is not run in an interim |
---|
| 325 | + * state, when the task or mm are switched. This is specifically |
---|
| 326 | + * required to prevent the use of temporary mm. |
---|
129 | 327 | */ |
---|
130 | 328 | |
---|
131 | 329 | if (unlikely(in_interrupt() || |
---|
.. | .. |
---|
133 | 331 | return -EPERM; |
---|
134 | 332 | if (unlikely(uaccess_kernel())) |
---|
135 | 333 | return -EPERM; |
---|
136 | | - if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) |
---|
| 334 | + if (unlikely(!nmi_uaccess_okay())) |
---|
137 | 335 | return -EPERM; |
---|
138 | 336 | |
---|
139 | | - return probe_kernel_write(unsafe_ptr, src, size); |
---|
| 337 | + return copy_to_user_nofault(unsafe_ptr, src, size); |
---|
140 | 338 | } |
---|
141 | 339 | |
---|
142 | 340 | static const struct bpf_func_proto bpf_probe_write_user_proto = { |
---|
.. | .. |
---|
150 | 348 | |
---|
151 | 349 | static const struct bpf_func_proto *bpf_get_probe_write_proto(void) |
---|
152 | 350 | { |
---|
| 351 | + if (!capable(CAP_SYS_ADMIN)) |
---|
| 352 | + return NULL; |
---|
| 353 | + |
---|
153 | 354 | pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", |
---|
154 | 355 | current->comm, task_pid_nr(current)); |
---|
155 | 356 | |
---|
156 | 357 | return &bpf_probe_write_user_proto; |
---|
157 | 358 | } |
---|
158 | 359 | |
---|
| 360 | +static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, |
---|
| 361 | + size_t bufsz) |
---|
| 362 | +{ |
---|
| 363 | + void __user *user_ptr = (__force void __user *)unsafe_ptr; |
---|
| 364 | + |
---|
| 365 | + buf[0] = 0; |
---|
| 366 | + |
---|
| 367 | + switch (fmt_ptype) { |
---|
| 368 | + case 's': |
---|
| 369 | +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE |
---|
| 370 | + if ((unsigned long)unsafe_ptr < TASK_SIZE) { |
---|
| 371 | + strncpy_from_user_nofault(buf, user_ptr, bufsz); |
---|
| 372 | + break; |
---|
| 373 | + } |
---|
| 374 | + fallthrough; |
---|
| 375 | +#endif |
---|
| 376 | + case 'k': |
---|
| 377 | + strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); |
---|
| 378 | + break; |
---|
| 379 | + case 'u': |
---|
| 380 | + strncpy_from_user_nofault(buf, user_ptr, bufsz); |
---|
| 381 | + break; |
---|
| 382 | + } |
---|
| 383 | +} |
---|
| 384 | + |
---|
| 385 | +static DEFINE_RAW_SPINLOCK(trace_printk_lock); |
---|
| 386 | + |
---|
| 387 | +#define BPF_TRACE_PRINTK_SIZE 1024 |
---|
| 388 | + |
---|
| 389 | +static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) |
---|
| 390 | +{ |
---|
| 391 | + static char buf[BPF_TRACE_PRINTK_SIZE]; |
---|
| 392 | + unsigned long flags; |
---|
| 393 | + va_list ap; |
---|
| 394 | + int ret; |
---|
| 395 | + |
---|
| 396 | + raw_spin_lock_irqsave(&trace_printk_lock, flags); |
---|
| 397 | + va_start(ap, fmt); |
---|
| 398 | + ret = vsnprintf(buf, sizeof(buf), fmt, ap); |
---|
| 399 | + va_end(ap); |
---|
| 400 | + /* vsnprintf() will not append null for zero-length strings */ |
---|
| 401 | + if (ret == 0) |
---|
| 402 | + buf[0] = '\0'; |
---|
| 403 | + trace_bpf_trace_printk(buf); |
---|
| 404 | + raw_spin_unlock_irqrestore(&trace_printk_lock, flags); |
---|
| 405 | + |
---|
| 406 | + return ret; |
---|
| 407 | +} |
---|
| 408 | + |
---|
159 | 409 | /* |
---|
160 | 410 | * Only limited trace_printk() conversion specifiers allowed: |
---|
161 | | - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s |
---|
| 411 | + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s |
---|
162 | 412 | */ |
---|
163 | 413 | BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, |
---|
164 | 414 | u64, arg2, u64, arg3) |
---|
165 | 415 | { |
---|
| 416 | + int i, mod[3] = {}, fmt_cnt = 0; |
---|
| 417 | + char buf[64], fmt_ptype; |
---|
| 418 | + void *unsafe_ptr = NULL; |
---|
166 | 419 | bool str_seen = false; |
---|
167 | | - int mod[3] = {}; |
---|
168 | | - int fmt_cnt = 0; |
---|
169 | | - u64 unsafe_addr; |
---|
170 | | - char buf[64]; |
---|
171 | | - int i; |
---|
172 | 420 | |
---|
173 | 421 | /* |
---|
174 | 422 | * bpf_check()->check_func_arg()->check_stack_boundary() |
---|
.. | .. |
---|
194 | 442 | if (fmt[i] == 'l') { |
---|
195 | 443 | mod[fmt_cnt]++; |
---|
196 | 444 | i++; |
---|
197 | | - } else if (fmt[i] == 'p' || fmt[i] == 's') { |
---|
| 445 | + } else if (fmt[i] == 'p') { |
---|
198 | 446 | mod[fmt_cnt]++; |
---|
| 447 | + if ((fmt[i + 1] == 'k' || |
---|
| 448 | + fmt[i + 1] == 'u') && |
---|
| 449 | + fmt[i + 2] == 's') { |
---|
| 450 | + fmt_ptype = fmt[i + 1]; |
---|
| 451 | + i += 2; |
---|
| 452 | + goto fmt_str; |
---|
| 453 | + } |
---|
| 454 | + |
---|
| 455 | + if (fmt[i + 1] == 'B') { |
---|
| 456 | + i++; |
---|
| 457 | + goto fmt_next; |
---|
| 458 | + } |
---|
| 459 | + |
---|
199 | 460 | /* disallow any further format extensions */ |
---|
200 | 461 | if (fmt[i + 1] != 0 && |
---|
201 | 462 | !isspace(fmt[i + 1]) && |
---|
202 | 463 | !ispunct(fmt[i + 1])) |
---|
203 | 464 | return -EINVAL; |
---|
204 | | - fmt_cnt++; |
---|
205 | | - if (fmt[i] == 's') { |
---|
206 | | - if (str_seen) |
---|
207 | | - /* allow only one '%s' per fmt string */ |
---|
208 | | - return -EINVAL; |
---|
209 | | - str_seen = true; |
---|
210 | 465 | |
---|
211 | | - switch (fmt_cnt) { |
---|
212 | | - case 1: |
---|
213 | | - unsafe_addr = arg1; |
---|
214 | | - arg1 = (long) buf; |
---|
215 | | - break; |
---|
216 | | - case 2: |
---|
217 | | - unsafe_addr = arg2; |
---|
218 | | - arg2 = (long) buf; |
---|
219 | | - break; |
---|
220 | | - case 3: |
---|
221 | | - unsafe_addr = arg3; |
---|
222 | | - arg3 = (long) buf; |
---|
223 | | - break; |
---|
224 | | - } |
---|
225 | | - buf[0] = 0; |
---|
226 | | - strncpy_from_unsafe(buf, |
---|
227 | | - (void *) (long) unsafe_addr, |
---|
228 | | - sizeof(buf)); |
---|
| 466 | + goto fmt_next; |
---|
| 467 | + } else if (fmt[i] == 's') { |
---|
| 468 | + mod[fmt_cnt]++; |
---|
| 469 | + fmt_ptype = fmt[i]; |
---|
| 470 | +fmt_str: |
---|
| 471 | + if (str_seen) |
---|
| 472 | + /* allow only one '%s' per fmt string */ |
---|
| 473 | + return -EINVAL; |
---|
| 474 | + str_seen = true; |
---|
| 475 | + |
---|
| 476 | + if (fmt[i + 1] != 0 && |
---|
| 477 | + !isspace(fmt[i + 1]) && |
---|
| 478 | + !ispunct(fmt[i + 1])) |
---|
| 479 | + return -EINVAL; |
---|
| 480 | + |
---|
| 481 | + switch (fmt_cnt) { |
---|
| 482 | + case 0: |
---|
| 483 | + unsafe_ptr = (void *)(long)arg1; |
---|
| 484 | + arg1 = (long)buf; |
---|
| 485 | + break; |
---|
| 486 | + case 1: |
---|
| 487 | + unsafe_ptr = (void *)(long)arg2; |
---|
| 488 | + arg2 = (long)buf; |
---|
| 489 | + break; |
---|
| 490 | + case 2: |
---|
| 491 | + unsafe_ptr = (void *)(long)arg3; |
---|
| 492 | + arg3 = (long)buf; |
---|
| 493 | + break; |
---|
229 | 494 | } |
---|
230 | | - continue; |
---|
| 495 | + |
---|
| 496 | + bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype, |
---|
| 497 | + sizeof(buf)); |
---|
| 498 | + goto fmt_next; |
---|
231 | 499 | } |
---|
232 | 500 | |
---|
233 | 501 | if (fmt[i] == 'l') { |
---|
.. | .. |
---|
238 | 506 | if (fmt[i] != 'i' && fmt[i] != 'd' && |
---|
239 | 507 | fmt[i] != 'u' && fmt[i] != 'x') |
---|
240 | 508 | return -EINVAL; |
---|
| 509 | +fmt_next: |
---|
241 | 510 | fmt_cnt++; |
---|
242 | 511 | } |
---|
243 | 512 | |
---|
.. | .. |
---|
246 | 515 | */ |
---|
247 | 516 | #define __BPF_TP_EMIT() __BPF_ARG3_TP() |
---|
248 | 517 | #define __BPF_TP(...) \ |
---|
249 | | - __trace_printk(0 /* Fake ip */, \ |
---|
250 | | - fmt, ##__VA_ARGS__) |
---|
| 518 | + bpf_do_trace_printk(fmt, ##__VA_ARGS__) |
---|
251 | 519 | |
---|
252 | 520 | #define __BPF_ARG1_TP(...) \ |
---|
253 | 521 | ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ |
---|
.. | .. |
---|
284 | 552 | const struct bpf_func_proto *bpf_get_trace_printk_proto(void) |
---|
285 | 553 | { |
---|
286 | 554 | /* |
---|
287 | | - * this program might be calling bpf_trace_printk, |
---|
288 | | - * so allocate per-cpu printk buffers |
---|
| 555 | + * This program might be calling bpf_trace_printk, |
---|
| 556 | + * so enable the associated bpf_trace/bpf_trace_printk event. |
---|
| 557 | + * Repeat this each time as it is possible a user has |
---|
| 558 | + * disabled bpf_trace_printk events. By loading a program |
---|
| 559 | + * calling bpf_trace_printk() however the user has expressed |
---|
| 560 | + * the intent to see such events. |
---|
289 | 561 | */ |
---|
290 | | - trace_printk_init_buffers(); |
---|
| 562 | + if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) |
---|
| 563 | + pr_warn_ratelimited("could not enable bpf_trace_printk events"); |
---|
291 | 564 | |
---|
292 | 565 | return &bpf_trace_printk_proto; |
---|
293 | 566 | } |
---|
| 567 | + |
---|
| 568 | +#define MAX_SEQ_PRINTF_VARARGS 12 |
---|
| 569 | +#define MAX_SEQ_PRINTF_MAX_MEMCPY 6 |
---|
| 570 | +#define MAX_SEQ_PRINTF_STR_LEN 128 |
---|
| 571 | + |
---|
| 572 | +struct bpf_seq_printf_buf { |
---|
| 573 | + char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN]; |
---|
| 574 | +}; |
---|
| 575 | +static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf); |
---|
| 576 | +static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used); |
---|
| 577 | + |
---|
| 578 | +BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, |
---|
| 579 | + const void *, data, u32, data_len) |
---|
| 580 | +{ |
---|
| 581 | + int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0; |
---|
| 582 | + int i, buf_used, copy_size, num_args; |
---|
| 583 | + u64 params[MAX_SEQ_PRINTF_VARARGS]; |
---|
| 584 | + struct bpf_seq_printf_buf *bufs; |
---|
| 585 | + const u64 *args = data; |
---|
| 586 | + |
---|
| 587 | + buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used); |
---|
| 588 | + if (WARN_ON_ONCE(buf_used > 1)) { |
---|
| 589 | + err = -EBUSY; |
---|
| 590 | + goto out; |
---|
| 591 | + } |
---|
| 592 | + |
---|
| 593 | + bufs = this_cpu_ptr(&bpf_seq_printf_buf); |
---|
| 594 | + |
---|
| 595 | + /* |
---|
| 596 | + * bpf_check()->check_func_arg()->check_stack_boundary() |
---|
| 597 | + * guarantees that fmt points to bpf program stack, |
---|
| 598 | + * fmt_size bytes of it were initialized and fmt_size > 0 |
---|
| 599 | + */ |
---|
| 600 | + if (fmt[--fmt_size] != 0) |
---|
| 601 | + goto out; |
---|
| 602 | + |
---|
| 603 | + if (data_len & 7) |
---|
| 604 | + goto out; |
---|
| 605 | + |
---|
| 606 | + for (i = 0; i < fmt_size; i++) { |
---|
| 607 | + if (fmt[i] == '%') { |
---|
| 608 | + if (fmt[i + 1] == '%') |
---|
| 609 | + i++; |
---|
| 610 | + else if (!data || !data_len) |
---|
| 611 | + goto out; |
---|
| 612 | + } |
---|
| 613 | + } |
---|
| 614 | + |
---|
| 615 | + num_args = data_len / 8; |
---|
| 616 | + |
---|
| 617 | + /* check format string for allowed specifiers */ |
---|
| 618 | + for (i = 0; i < fmt_size; i++) { |
---|
| 619 | + /* only printable ascii for now. */ |
---|
| 620 | + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { |
---|
| 621 | + err = -EINVAL; |
---|
| 622 | + goto out; |
---|
| 623 | + } |
---|
| 624 | + |
---|
| 625 | + if (fmt[i] != '%') |
---|
| 626 | + continue; |
---|
| 627 | + |
---|
| 628 | + if (fmt[i + 1] == '%') { |
---|
| 629 | + i++; |
---|
| 630 | + continue; |
---|
| 631 | + } |
---|
| 632 | + |
---|
| 633 | + if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) { |
---|
| 634 | + err = -E2BIG; |
---|
| 635 | + goto out; |
---|
| 636 | + } |
---|
| 637 | + |
---|
| 638 | + if (fmt_cnt >= num_args) { |
---|
| 639 | + err = -EINVAL; |
---|
| 640 | + goto out; |
---|
| 641 | + } |
---|
| 642 | + |
---|
| 643 | + /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ |
---|
| 644 | + i++; |
---|
| 645 | + |
---|
| 646 | + /* skip optional "[0 +-][num]" width formating field */ |
---|
| 647 | + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || |
---|
| 648 | + fmt[i] == ' ') |
---|
| 649 | + i++; |
---|
| 650 | + if (fmt[i] >= '1' && fmt[i] <= '9') { |
---|
| 651 | + i++; |
---|
| 652 | + while (fmt[i] >= '0' && fmt[i] <= '9') |
---|
| 653 | + i++; |
---|
| 654 | + } |
---|
| 655 | + |
---|
| 656 | + if (fmt[i] == 's') { |
---|
| 657 | + void *unsafe_ptr; |
---|
| 658 | + |
---|
| 659 | + /* try our best to copy */ |
---|
| 660 | + if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) { |
---|
| 661 | + err = -E2BIG; |
---|
| 662 | + goto out; |
---|
| 663 | + } |
---|
| 664 | + |
---|
| 665 | + unsafe_ptr = (void *)(long)args[fmt_cnt]; |
---|
| 666 | + err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt], |
---|
| 667 | + unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN); |
---|
| 668 | + if (err < 0) |
---|
| 669 | + bufs->buf[memcpy_cnt][0] = '\0'; |
---|
| 670 | + params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt]; |
---|
| 671 | + |
---|
| 672 | + fmt_cnt++; |
---|
| 673 | + memcpy_cnt++; |
---|
| 674 | + continue; |
---|
| 675 | + } |
---|
| 676 | + |
---|
| 677 | + if (fmt[i] == 'p') { |
---|
| 678 | + if (fmt[i + 1] == 0 || |
---|
| 679 | + fmt[i + 1] == 'K' || |
---|
| 680 | + fmt[i + 1] == 'x' || |
---|
| 681 | + fmt[i + 1] == 'B') { |
---|
| 682 | + /* just kernel pointers */ |
---|
| 683 | + params[fmt_cnt] = args[fmt_cnt]; |
---|
| 684 | + fmt_cnt++; |
---|
| 685 | + continue; |
---|
| 686 | + } |
---|
| 687 | + |
---|
| 688 | + /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ |
---|
| 689 | + if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') { |
---|
| 690 | + err = -EINVAL; |
---|
| 691 | + goto out; |
---|
| 692 | + } |
---|
| 693 | + if (fmt[i + 2] != '4' && fmt[i + 2] != '6') { |
---|
| 694 | + err = -EINVAL; |
---|
| 695 | + goto out; |
---|
| 696 | + } |
---|
| 697 | + |
---|
| 698 | + if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) { |
---|
| 699 | + err = -E2BIG; |
---|
| 700 | + goto out; |
---|
| 701 | + } |
---|
| 702 | + |
---|
| 703 | + |
---|
| 704 | + copy_size = (fmt[i + 2] == '4') ? 4 : 16; |
---|
| 705 | + |
---|
| 706 | + err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt], |
---|
| 707 | + (void *) (long) args[fmt_cnt], |
---|
| 708 | + copy_size); |
---|
| 709 | + if (err < 0) |
---|
| 710 | + memset(bufs->buf[memcpy_cnt], 0, copy_size); |
---|
| 711 | + params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt]; |
---|
| 712 | + |
---|
| 713 | + i += 2; |
---|
| 714 | + fmt_cnt++; |
---|
| 715 | + memcpy_cnt++; |
---|
| 716 | + continue; |
---|
| 717 | + } |
---|
| 718 | + |
---|
| 719 | + if (fmt[i] == 'l') { |
---|
| 720 | + i++; |
---|
| 721 | + if (fmt[i] == 'l') |
---|
| 722 | + i++; |
---|
| 723 | + } |
---|
| 724 | + |
---|
| 725 | + if (fmt[i] != 'i' && fmt[i] != 'd' && |
---|
| 726 | + fmt[i] != 'u' && fmt[i] != 'x' && |
---|
| 727 | + fmt[i] != 'X') { |
---|
| 728 | + err = -EINVAL; |
---|
| 729 | + goto out; |
---|
| 730 | + } |
---|
| 731 | + |
---|
| 732 | + params[fmt_cnt] = args[fmt_cnt]; |
---|
| 733 | + fmt_cnt++; |
---|
| 734 | + } |
---|
| 735 | + |
---|
| 736 | + /* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give |
---|
| 737 | + * all of them to seq_printf(). |
---|
| 738 | + */ |
---|
| 739 | + seq_printf(m, fmt, params[0], params[1], params[2], params[3], |
---|
| 740 | + params[4], params[5], params[6], params[7], params[8], |
---|
| 741 | + params[9], params[10], params[11]); |
---|
| 742 | + |
---|
| 743 | + err = seq_has_overflowed(m) ? -EOVERFLOW : 0; |
---|
| 744 | +out: |
---|
| 745 | + this_cpu_dec(bpf_seq_printf_buf_used); |
---|
| 746 | + return err; |
---|
| 747 | +} |
---|
| 748 | + |
---|
| 749 | +BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file) |
---|
| 750 | + |
---|
| 751 | +static const struct bpf_func_proto bpf_seq_printf_proto = { |
---|
| 752 | + .func = bpf_seq_printf, |
---|
| 753 | + .gpl_only = true, |
---|
| 754 | + .ret_type = RET_INTEGER, |
---|
| 755 | + .arg1_type = ARG_PTR_TO_BTF_ID, |
---|
| 756 | + .arg1_btf_id = &btf_seq_file_ids[0], |
---|
| 757 | + .arg2_type = ARG_PTR_TO_MEM, |
---|
| 758 | + .arg3_type = ARG_CONST_SIZE, |
---|
| 759 | + .arg4_type = ARG_PTR_TO_MEM_OR_NULL, |
---|
| 760 | + .arg5_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 761 | +}; |
---|
| 762 | + |
---|
| 763 | +BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) |
---|
| 764 | +{ |
---|
| 765 | + return seq_write(m, data, len) ? -EOVERFLOW : 0; |
---|
| 766 | +} |
---|
| 767 | + |
---|
| 768 | +static const struct bpf_func_proto bpf_seq_write_proto = { |
---|
| 769 | + .func = bpf_seq_write, |
---|
| 770 | + .gpl_only = true, |
---|
| 771 | + .ret_type = RET_INTEGER, |
---|
| 772 | + .arg1_type = ARG_PTR_TO_BTF_ID, |
---|
| 773 | + .arg1_btf_id = &btf_seq_file_ids[0], |
---|
| 774 | + .arg2_type = ARG_PTR_TO_MEM, |
---|
| 775 | + .arg3_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 776 | +}; |
---|
| 777 | + |
---|
| 778 | +BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr, |
---|
| 779 | + u32, btf_ptr_size, u64, flags) |
---|
| 780 | +{ |
---|
| 781 | + const struct btf *btf; |
---|
| 782 | + s32 btf_id; |
---|
| 783 | + int ret; |
---|
| 784 | + |
---|
| 785 | + ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); |
---|
| 786 | + if (ret) |
---|
| 787 | + return ret; |
---|
| 788 | + |
---|
| 789 | + return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags); |
---|
| 790 | +} |
---|
| 791 | + |
---|
| 792 | +static const struct bpf_func_proto bpf_seq_printf_btf_proto = { |
---|
| 793 | + .func = bpf_seq_printf_btf, |
---|
| 794 | + .gpl_only = true, |
---|
| 795 | + .ret_type = RET_INTEGER, |
---|
| 796 | + .arg1_type = ARG_PTR_TO_BTF_ID, |
---|
| 797 | + .arg1_btf_id = &btf_seq_file_ids[0], |
---|
| 798 | + .arg2_type = ARG_PTR_TO_MEM, |
---|
| 799 | + .arg3_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 800 | + .arg4_type = ARG_ANYTHING, |
---|
| 801 | +}; |
---|
294 | 802 | |
---|
295 | 803 | static __always_inline int |
---|
296 | 804 | get_map_perf_counter(struct bpf_map *map, u64 flags, |
---|
.. | .. |
---|
392 | 900 | if (unlikely(event->oncpu != cpu)) |
---|
393 | 901 | return -EOPNOTSUPP; |
---|
394 | 902 | |
---|
395 | | - perf_event_output(event, sd, regs); |
---|
396 | | - return 0; |
---|
| 903 | + return perf_event_output(event, sd, regs); |
---|
397 | 904 | } |
---|
398 | 905 | |
---|
399 | 906 | /* |
---|
.. | .. |
---|
453 | 960 | .arg5_type = ARG_CONST_SIZE_OR_ZERO, |
---|
454 | 961 | }; |
---|
455 | 962 | |
---|
456 | | -static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); |
---|
457 | | -static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); |
---|
| 963 | +static DEFINE_PER_CPU(int, bpf_event_output_nest_level); |
---|
| 964 | +struct bpf_nested_pt_regs { |
---|
| 965 | + struct pt_regs regs[3]; |
---|
| 966 | +}; |
---|
| 967 | +static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); |
---|
| 968 | +static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); |
---|
458 | 969 | |
---|
459 | 970 | u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, |
---|
460 | 971 | void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) |
---|
461 | 972 | { |
---|
462 | | - struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); |
---|
463 | | - struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); |
---|
| 973 | + int nest_level = this_cpu_inc_return(bpf_event_output_nest_level); |
---|
464 | 974 | struct perf_raw_frag frag = { |
---|
465 | 975 | .copy = ctx_copy, |
---|
466 | 976 | .size = ctx_size, |
---|
.. | .. |
---|
475 | 985 | .data = meta, |
---|
476 | 986 | }, |
---|
477 | 987 | }; |
---|
| 988 | + struct perf_sample_data *sd; |
---|
| 989 | + struct pt_regs *regs; |
---|
| 990 | + u64 ret; |
---|
| 991 | + |
---|
| 992 | + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { |
---|
| 993 | + ret = -EBUSY; |
---|
| 994 | + goto out; |
---|
| 995 | + } |
---|
| 996 | + sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); |
---|
| 997 | + regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); |
---|
478 | 998 | |
---|
479 | 999 | perf_fetch_caller_regs(regs); |
---|
480 | 1000 | perf_sample_data_init(sd, 0, 0); |
---|
481 | 1001 | sd->raw = &raw; |
---|
482 | 1002 | |
---|
483 | | - return __bpf_perf_event_output(regs, map, flags, sd); |
---|
| 1003 | + ret = __bpf_perf_event_output(regs, map, flags, sd); |
---|
| 1004 | +out: |
---|
| 1005 | + this_cpu_dec(bpf_event_output_nest_level); |
---|
| 1006 | + return ret; |
---|
484 | 1007 | } |
---|
485 | 1008 | |
---|
486 | 1009 | BPF_CALL_0(bpf_get_current_task) |
---|
.. | .. |
---|
488 | 1011 | return (long) current; |
---|
489 | 1012 | } |
---|
490 | 1013 | |
---|
491 | | -static const struct bpf_func_proto bpf_get_current_task_proto = { |
---|
| 1014 | +const struct bpf_func_proto bpf_get_current_task_proto = { |
---|
492 | 1015 | .func = bpf_get_current_task, |
---|
493 | 1016 | .gpl_only = true, |
---|
494 | 1017 | .ret_type = RET_INTEGER, |
---|
.. | .. |
---|
517 | 1040 | .arg2_type = ARG_ANYTHING, |
---|
518 | 1041 | }; |
---|
519 | 1042 | |
---|
520 | | -BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, |
---|
521 | | - const void *, unsafe_ptr) |
---|
522 | | -{ |
---|
523 | | - int ret; |
---|
524 | | - |
---|
525 | | - /* |
---|
526 | | - * The strncpy_from_unsafe() call will likely not fill the entire |
---|
527 | | - * buffer, but that's okay in this circumstance as we're probing |
---|
528 | | - * arbitrary memory anyway similar to bpf_probe_read() and might |
---|
529 | | - * as well probe the stack. Thus, memory is explicitly cleared |
---|
530 | | - * only in error case, so that improper users ignoring return |
---|
531 | | - * code altogether don't copy garbage; otherwise length of string |
---|
532 | | - * is returned that can be used for bpf_perf_event_output() et al. |
---|
533 | | - */ |
---|
534 | | - ret = strncpy_from_unsafe(dst, unsafe_ptr, size); |
---|
535 | | - if (unlikely(ret < 0)) |
---|
536 | | - memset(dst, 0, size); |
---|
537 | | - |
---|
538 | | - return ret; |
---|
539 | | -} |
---|
540 | | - |
---|
541 | | -static const struct bpf_func_proto bpf_probe_read_str_proto = { |
---|
542 | | - .func = bpf_probe_read_str, |
---|
543 | | - .gpl_only = true, |
---|
544 | | - .ret_type = RET_INTEGER, |
---|
545 | | - .arg1_type = ARG_PTR_TO_UNINIT_MEM, |
---|
546 | | - .arg2_type = ARG_CONST_SIZE_OR_ZERO, |
---|
547 | | - .arg3_type = ARG_ANYTHING, |
---|
| 1043 | +struct send_signal_irq_work { |
---|
| 1044 | + struct irq_work irq_work; |
---|
| 1045 | + struct task_struct *task; |
---|
| 1046 | + u32 sig; |
---|
| 1047 | + enum pid_type type; |
---|
548 | 1048 | }; |
---|
549 | 1049 | |
---|
550 | | -static const struct bpf_func_proto * |
---|
551 | | -tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
| 1050 | +static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); |
---|
| 1051 | + |
---|
| 1052 | +static void do_bpf_send_signal(struct irq_work *entry) |
---|
| 1053 | +{ |
---|
| 1054 | + struct send_signal_irq_work *work; |
---|
| 1055 | + |
---|
| 1056 | + work = container_of(entry, struct send_signal_irq_work, irq_work); |
---|
| 1057 | + group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); |
---|
| 1058 | +} |
---|
| 1059 | + |
---|
| 1060 | +static int bpf_send_signal_common(u32 sig, enum pid_type type) |
---|
| 1061 | +{ |
---|
| 1062 | + struct send_signal_irq_work *work = NULL; |
---|
| 1063 | + |
---|
| 1064 | + /* Similar to bpf_probe_write_user, task needs to be |
---|
| 1065 | + * in a sound condition and kernel memory access be |
---|
| 1066 | + * permitted in order to send signal to the current |
---|
| 1067 | + * task. |
---|
| 1068 | + */ |
---|
| 1069 | + if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) |
---|
| 1070 | + return -EPERM; |
---|
| 1071 | + if (unlikely(uaccess_kernel())) |
---|
| 1072 | + return -EPERM; |
---|
| 1073 | + if (unlikely(!nmi_uaccess_okay())) |
---|
| 1074 | + return -EPERM; |
---|
| 1075 | + |
---|
| 1076 | + if (irqs_disabled()) { |
---|
| 1077 | + /* Do an early check on signal validity. Otherwise, |
---|
| 1078 | + * the error is lost in deferred irq_work. |
---|
| 1079 | + */ |
---|
| 1080 | + if (unlikely(!valid_signal(sig))) |
---|
| 1081 | + return -EINVAL; |
---|
| 1082 | + |
---|
| 1083 | + work = this_cpu_ptr(&send_signal_work); |
---|
| 1084 | + if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) |
---|
| 1085 | + return -EBUSY; |
---|
| 1086 | + |
---|
| 1087 | + /* Add the current task, which is the target of sending signal, |
---|
| 1088 | + * to the irq_work. The current task may change when queued |
---|
| 1089 | + * irq works get executed. |
---|
| 1090 | + */ |
---|
| 1091 | + work->task = current; |
---|
| 1092 | + work->sig = sig; |
---|
| 1093 | + work->type = type; |
---|
| 1094 | + irq_work_queue(&work->irq_work); |
---|
| 1095 | + return 0; |
---|
| 1096 | + } |
---|
| 1097 | + |
---|
| 1098 | + return group_send_sig_info(sig, SEND_SIG_PRIV, current, type); |
---|
| 1099 | +} |
---|
| 1100 | + |
---|
| 1101 | +BPF_CALL_1(bpf_send_signal, u32, sig) |
---|
| 1102 | +{ |
---|
| 1103 | + return bpf_send_signal_common(sig, PIDTYPE_TGID); |
---|
| 1104 | +} |
---|
| 1105 | + |
---|
| 1106 | +static const struct bpf_func_proto bpf_send_signal_proto = { |
---|
| 1107 | + .func = bpf_send_signal, |
---|
| 1108 | + .gpl_only = false, |
---|
| 1109 | + .ret_type = RET_INTEGER, |
---|
| 1110 | + .arg1_type = ARG_ANYTHING, |
---|
| 1111 | +}; |
---|
| 1112 | + |
---|
| 1113 | +BPF_CALL_1(bpf_send_signal_thread, u32, sig) |
---|
| 1114 | +{ |
---|
| 1115 | + return bpf_send_signal_common(sig, PIDTYPE_PID); |
---|
| 1116 | +} |
---|
| 1117 | + |
---|
| 1118 | +static const struct bpf_func_proto bpf_send_signal_thread_proto = { |
---|
| 1119 | + .func = bpf_send_signal_thread, |
---|
| 1120 | + .gpl_only = false, |
---|
| 1121 | + .ret_type = RET_INTEGER, |
---|
| 1122 | + .arg1_type = ARG_ANYTHING, |
---|
| 1123 | +}; |
---|
| 1124 | + |
---|
| 1125 | +BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) |
---|
| 1126 | +{ |
---|
| 1127 | + long len; |
---|
| 1128 | + char *p; |
---|
| 1129 | + |
---|
| 1130 | + if (!sz) |
---|
| 1131 | + return 0; |
---|
| 1132 | + |
---|
| 1133 | + p = d_path(path, buf, sz); |
---|
| 1134 | + if (IS_ERR(p)) { |
---|
| 1135 | + len = PTR_ERR(p); |
---|
| 1136 | + } else { |
---|
| 1137 | + len = buf + sz - p; |
---|
| 1138 | + memmove(buf, p, len); |
---|
| 1139 | + } |
---|
| 1140 | + |
---|
| 1141 | + return len; |
---|
| 1142 | +} |
---|
| 1143 | + |
---|
| 1144 | +BTF_SET_START(btf_allowlist_d_path) |
---|
| 1145 | +#ifdef CONFIG_SECURITY |
---|
| 1146 | +BTF_ID(func, security_file_permission) |
---|
| 1147 | +BTF_ID(func, security_inode_getattr) |
---|
| 1148 | +BTF_ID(func, security_file_open) |
---|
| 1149 | +#endif |
---|
| 1150 | +#ifdef CONFIG_SECURITY_PATH |
---|
| 1151 | +BTF_ID(func, security_path_truncate) |
---|
| 1152 | +#endif |
---|
| 1153 | +BTF_ID(func, vfs_truncate) |
---|
| 1154 | +BTF_ID(func, vfs_fallocate) |
---|
| 1155 | +BTF_ID(func, dentry_open) |
---|
| 1156 | +BTF_ID(func, vfs_getattr) |
---|
| 1157 | +BTF_ID(func, filp_close) |
---|
| 1158 | +BTF_SET_END(btf_allowlist_d_path) |
---|
| 1159 | + |
---|
| 1160 | +static bool bpf_d_path_allowed(const struct bpf_prog *prog) |
---|
| 1161 | +{ |
---|
| 1162 | + return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id); |
---|
| 1163 | +} |
---|
| 1164 | + |
---|
| 1165 | +BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) |
---|
| 1166 | + |
---|
| 1167 | +static const struct bpf_func_proto bpf_d_path_proto = { |
---|
| 1168 | + .func = bpf_d_path, |
---|
| 1169 | + .gpl_only = false, |
---|
| 1170 | + .ret_type = RET_INTEGER, |
---|
| 1171 | + .arg1_type = ARG_PTR_TO_BTF_ID, |
---|
| 1172 | + .arg1_btf_id = &bpf_d_path_btf_ids[0], |
---|
| 1173 | + .arg2_type = ARG_PTR_TO_MEM, |
---|
| 1174 | + .arg3_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 1175 | + .allowed = bpf_d_path_allowed, |
---|
| 1176 | +}; |
---|
| 1177 | + |
---|
| 1178 | +#define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \ |
---|
| 1179 | + BTF_F_PTR_RAW | BTF_F_ZERO) |
---|
| 1180 | + |
---|
| 1181 | +static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, |
---|
| 1182 | + u64 flags, const struct btf **btf, |
---|
| 1183 | + s32 *btf_id) |
---|
| 1184 | +{ |
---|
| 1185 | + const struct btf_type *t; |
---|
| 1186 | + |
---|
| 1187 | + if (unlikely(flags & ~(BTF_F_ALL))) |
---|
| 1188 | + return -EINVAL; |
---|
| 1189 | + |
---|
| 1190 | + if (btf_ptr_size != sizeof(struct btf_ptr)) |
---|
| 1191 | + return -EINVAL; |
---|
| 1192 | + |
---|
| 1193 | + *btf = bpf_get_btf_vmlinux(); |
---|
| 1194 | + |
---|
| 1195 | + if (IS_ERR_OR_NULL(*btf)) |
---|
| 1196 | + return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL; |
---|
| 1197 | + |
---|
| 1198 | + if (ptr->type_id > 0) |
---|
| 1199 | + *btf_id = ptr->type_id; |
---|
| 1200 | + else |
---|
| 1201 | + return -EINVAL; |
---|
| 1202 | + |
---|
| 1203 | + if (*btf_id > 0) |
---|
| 1204 | + t = btf_type_by_id(*btf, *btf_id); |
---|
| 1205 | + if (*btf_id <= 0 || !t) |
---|
| 1206 | + return -ENOENT; |
---|
| 1207 | + |
---|
| 1208 | + return 0; |
---|
| 1209 | +} |
---|
| 1210 | + |
---|
| 1211 | +BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr, |
---|
| 1212 | + u32, btf_ptr_size, u64, flags) |
---|
| 1213 | +{ |
---|
| 1214 | + const struct btf *btf; |
---|
| 1215 | + s32 btf_id; |
---|
| 1216 | + int ret; |
---|
| 1217 | + |
---|
| 1218 | + ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id); |
---|
| 1219 | + if (ret) |
---|
| 1220 | + return ret; |
---|
| 1221 | + |
---|
| 1222 | + return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size, |
---|
| 1223 | + flags); |
---|
| 1224 | +} |
---|
| 1225 | + |
---|
| 1226 | +const struct bpf_func_proto bpf_snprintf_btf_proto = { |
---|
| 1227 | + .func = bpf_snprintf_btf, |
---|
| 1228 | + .gpl_only = false, |
---|
| 1229 | + .ret_type = RET_INTEGER, |
---|
| 1230 | + .arg1_type = ARG_PTR_TO_MEM, |
---|
| 1231 | + .arg2_type = ARG_CONST_SIZE, |
---|
| 1232 | + .arg3_type = ARG_PTR_TO_MEM, |
---|
| 1233 | + .arg4_type = ARG_CONST_SIZE, |
---|
| 1234 | + .arg5_type = ARG_ANYTHING, |
---|
| 1235 | +}; |
---|
| 1236 | + |
---|
| 1237 | +const struct bpf_func_proto * |
---|
| 1238 | +bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
552 | 1239 | { |
---|
553 | 1240 | switch (func_id) { |
---|
554 | 1241 | case BPF_FUNC_map_lookup_elem: |
---|
.. | .. |
---|
557 | 1244 | return &bpf_map_update_elem_proto; |
---|
558 | 1245 | case BPF_FUNC_map_delete_elem: |
---|
559 | 1246 | return &bpf_map_delete_elem_proto; |
---|
560 | | - case BPF_FUNC_probe_read: |
---|
561 | | - return &bpf_probe_read_proto; |
---|
| 1247 | + case BPF_FUNC_map_push_elem: |
---|
| 1248 | + return &bpf_map_push_elem_proto; |
---|
| 1249 | + case BPF_FUNC_map_pop_elem: |
---|
| 1250 | + return &bpf_map_pop_elem_proto; |
---|
| 1251 | + case BPF_FUNC_map_peek_elem: |
---|
| 1252 | + return &bpf_map_peek_elem_proto; |
---|
562 | 1253 | case BPF_FUNC_ktime_get_ns: |
---|
563 | 1254 | return &bpf_ktime_get_ns_proto; |
---|
564 | 1255 | case BPF_FUNC_ktime_get_boot_ns: |
---|
.. | .. |
---|
581 | 1272 | return &bpf_get_numa_node_id_proto; |
---|
582 | 1273 | case BPF_FUNC_perf_event_read: |
---|
583 | 1274 | return &bpf_perf_event_read_proto; |
---|
584 | | - case BPF_FUNC_probe_write_user: |
---|
585 | | - return bpf_get_probe_write_proto(); |
---|
586 | 1275 | case BPF_FUNC_current_task_under_cgroup: |
---|
587 | 1276 | return &bpf_current_task_under_cgroup_proto; |
---|
588 | 1277 | case BPF_FUNC_get_prandom_u32: |
---|
589 | 1278 | return &bpf_get_prandom_u32_proto; |
---|
| 1279 | + case BPF_FUNC_probe_write_user: |
---|
| 1280 | + return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? |
---|
| 1281 | + NULL : bpf_get_probe_write_proto(); |
---|
| 1282 | + case BPF_FUNC_probe_read_user: |
---|
| 1283 | + return &bpf_probe_read_user_proto; |
---|
| 1284 | + case BPF_FUNC_probe_read_kernel: |
---|
| 1285 | + return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? |
---|
| 1286 | + NULL : &bpf_probe_read_kernel_proto; |
---|
| 1287 | + case BPF_FUNC_probe_read_user_str: |
---|
| 1288 | + return &bpf_probe_read_user_str_proto; |
---|
| 1289 | + case BPF_FUNC_probe_read_kernel_str: |
---|
| 1290 | + return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? |
---|
| 1291 | + NULL : &bpf_probe_read_kernel_str_proto; |
---|
| 1292 | +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE |
---|
| 1293 | + case BPF_FUNC_probe_read: |
---|
| 1294 | + return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? |
---|
| 1295 | + NULL : &bpf_probe_read_compat_proto; |
---|
590 | 1296 | case BPF_FUNC_probe_read_str: |
---|
591 | | - return &bpf_probe_read_str_proto; |
---|
| 1297 | + return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? |
---|
| 1298 | + NULL : &bpf_probe_read_compat_str_proto; |
---|
| 1299 | +#endif |
---|
592 | 1300 | #ifdef CONFIG_CGROUPS |
---|
593 | 1301 | case BPF_FUNC_get_current_cgroup_id: |
---|
594 | 1302 | return &bpf_get_current_cgroup_id_proto; |
---|
595 | 1303 | #endif |
---|
| 1304 | + case BPF_FUNC_send_signal: |
---|
| 1305 | + return &bpf_send_signal_proto; |
---|
| 1306 | + case BPF_FUNC_send_signal_thread: |
---|
| 1307 | + return &bpf_send_signal_thread_proto; |
---|
| 1308 | + case BPF_FUNC_perf_event_read_value: |
---|
| 1309 | + return &bpf_perf_event_read_value_proto; |
---|
| 1310 | + case BPF_FUNC_get_ns_current_pid_tgid: |
---|
| 1311 | + return &bpf_get_ns_current_pid_tgid_proto; |
---|
| 1312 | + case BPF_FUNC_ringbuf_output: |
---|
| 1313 | + return &bpf_ringbuf_output_proto; |
---|
| 1314 | + case BPF_FUNC_ringbuf_reserve: |
---|
| 1315 | + return &bpf_ringbuf_reserve_proto; |
---|
| 1316 | + case BPF_FUNC_ringbuf_submit: |
---|
| 1317 | + return &bpf_ringbuf_submit_proto; |
---|
| 1318 | + case BPF_FUNC_ringbuf_discard: |
---|
| 1319 | + return &bpf_ringbuf_discard_proto; |
---|
| 1320 | + case BPF_FUNC_ringbuf_query: |
---|
| 1321 | + return &bpf_ringbuf_query_proto; |
---|
| 1322 | + case BPF_FUNC_jiffies64: |
---|
| 1323 | + return &bpf_jiffies64_proto; |
---|
| 1324 | + case BPF_FUNC_get_task_stack: |
---|
| 1325 | + return &bpf_get_task_stack_proto; |
---|
| 1326 | + case BPF_FUNC_copy_from_user: |
---|
| 1327 | + return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; |
---|
| 1328 | + case BPF_FUNC_snprintf_btf: |
---|
| 1329 | + return &bpf_snprintf_btf_proto; |
---|
| 1330 | + case BPF_FUNC_per_cpu_ptr: |
---|
| 1331 | + return &bpf_per_cpu_ptr_proto; |
---|
| 1332 | + case BPF_FUNC_this_cpu_ptr: |
---|
| 1333 | + return &bpf_this_cpu_ptr_proto; |
---|
596 | 1334 | default: |
---|
597 | 1335 | return NULL; |
---|
598 | 1336 | } |
---|
.. | .. |
---|
608 | 1346 | return &bpf_get_stackid_proto; |
---|
609 | 1347 | case BPF_FUNC_get_stack: |
---|
610 | 1348 | return &bpf_get_stack_proto; |
---|
611 | | - case BPF_FUNC_perf_event_read_value: |
---|
612 | | - return &bpf_perf_event_read_value_proto; |
---|
613 | 1349 | #ifdef CONFIG_BPF_KPROBE_OVERRIDE |
---|
614 | 1350 | case BPF_FUNC_override_return: |
---|
615 | 1351 | return &bpf_override_return_proto; |
---|
616 | 1352 | #endif |
---|
617 | 1353 | default: |
---|
618 | | - return tracing_func_proto(func_id, prog); |
---|
| 1354 | + return bpf_tracing_func_proto(func_id, prog); |
---|
619 | 1355 | } |
---|
620 | 1356 | } |
---|
621 | 1357 | |
---|
.. | .. |
---|
725 | 1461 | case BPF_FUNC_get_stack: |
---|
726 | 1462 | return &bpf_get_stack_proto_tp; |
---|
727 | 1463 | default: |
---|
728 | | - return tracing_func_proto(func_id, prog); |
---|
| 1464 | + return bpf_tracing_func_proto(func_id, prog); |
---|
729 | 1465 | } |
---|
730 | 1466 | } |
---|
731 | 1467 | |
---|
.. | .. |
---|
778 | 1514 | .arg3_type = ARG_CONST_SIZE, |
---|
779 | 1515 | }; |
---|
780 | 1516 | |
---|
| 1517 | +BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, |
---|
| 1518 | + void *, buf, u32, size, u64, flags) |
---|
| 1519 | +{ |
---|
| 1520 | + static const u32 br_entry_size = sizeof(struct perf_branch_entry); |
---|
| 1521 | + struct perf_branch_stack *br_stack = ctx->data->br_stack; |
---|
| 1522 | + u32 to_copy; |
---|
| 1523 | + |
---|
| 1524 | + if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) |
---|
| 1525 | + return -EINVAL; |
---|
| 1526 | + |
---|
| 1527 | + if (unlikely(!br_stack)) |
---|
| 1528 | + return -ENOENT; |
---|
| 1529 | + |
---|
| 1530 | + if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) |
---|
| 1531 | + return br_stack->nr * br_entry_size; |
---|
| 1532 | + |
---|
| 1533 | + if (!buf || (size % br_entry_size != 0)) |
---|
| 1534 | + return -EINVAL; |
---|
| 1535 | + |
---|
| 1536 | + to_copy = min_t(u32, br_stack->nr * br_entry_size, size); |
---|
| 1537 | + memcpy(buf, br_stack->entries, to_copy); |
---|
| 1538 | + |
---|
| 1539 | + return to_copy; |
---|
| 1540 | +} |
---|
| 1541 | + |
---|
| 1542 | +static const struct bpf_func_proto bpf_read_branch_records_proto = { |
---|
| 1543 | + .func = bpf_read_branch_records, |
---|
| 1544 | + .gpl_only = true, |
---|
| 1545 | + .ret_type = RET_INTEGER, |
---|
| 1546 | + .arg1_type = ARG_PTR_TO_CTX, |
---|
| 1547 | + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, |
---|
| 1548 | + .arg3_type = ARG_CONST_SIZE_OR_ZERO, |
---|
| 1549 | + .arg4_type = ARG_ANYTHING, |
---|
| 1550 | +}; |
---|
| 1551 | + |
---|
781 | 1552 | static const struct bpf_func_proto * |
---|
782 | 1553 | pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
783 | 1554 | { |
---|
.. | .. |
---|
785 | 1556 | case BPF_FUNC_perf_event_output: |
---|
786 | 1557 | return &bpf_perf_event_output_proto_tp; |
---|
787 | 1558 | case BPF_FUNC_get_stackid: |
---|
788 | | - return &bpf_get_stackid_proto_tp; |
---|
| 1559 | + return &bpf_get_stackid_proto_pe; |
---|
789 | 1560 | case BPF_FUNC_get_stack: |
---|
790 | | - return &bpf_get_stack_proto_tp; |
---|
| 1561 | + return &bpf_get_stack_proto_pe; |
---|
791 | 1562 | case BPF_FUNC_perf_prog_read_value: |
---|
792 | 1563 | return &bpf_perf_prog_read_value_proto; |
---|
| 1564 | + case BPF_FUNC_read_branch_records: |
---|
| 1565 | + return &bpf_read_branch_records_proto; |
---|
793 | 1566 | default: |
---|
794 | | - return tracing_func_proto(func_id, prog); |
---|
| 1567 | + return bpf_tracing_func_proto(func_id, prog); |
---|
795 | 1568 | } |
---|
796 | 1569 | } |
---|
797 | 1570 | |
---|
.. | .. |
---|
852 | 1625 | .arg4_type = ARG_PTR_TO_MEM, |
---|
853 | 1626 | .arg5_type = ARG_CONST_SIZE_OR_ZERO, |
---|
854 | 1627 | }; |
---|
| 1628 | + |
---|
| 1629 | +extern const struct bpf_func_proto bpf_skb_output_proto; |
---|
| 1630 | +extern const struct bpf_func_proto bpf_xdp_output_proto; |
---|
855 | 1631 | |
---|
856 | 1632 | BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, |
---|
857 | 1633 | struct bpf_map *, map, u64, flags) |
---|
.. | .. |
---|
916 | 1692 | case BPF_FUNC_get_stack: |
---|
917 | 1693 | return &bpf_get_stack_proto_raw_tp; |
---|
918 | 1694 | default: |
---|
919 | | - return tracing_func_proto(func_id, prog); |
---|
| 1695 | + return bpf_tracing_func_proto(func_id, prog); |
---|
| 1696 | + } |
---|
| 1697 | +} |
---|
| 1698 | + |
---|
| 1699 | +const struct bpf_func_proto * |
---|
| 1700 | +tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
| 1701 | +{ |
---|
| 1702 | + switch (func_id) { |
---|
| 1703 | +#ifdef CONFIG_NET |
---|
| 1704 | + case BPF_FUNC_skb_output: |
---|
| 1705 | + return &bpf_skb_output_proto; |
---|
| 1706 | + case BPF_FUNC_xdp_output: |
---|
| 1707 | + return &bpf_xdp_output_proto; |
---|
| 1708 | + case BPF_FUNC_skc_to_tcp6_sock: |
---|
| 1709 | + return &bpf_skc_to_tcp6_sock_proto; |
---|
| 1710 | + case BPF_FUNC_skc_to_tcp_sock: |
---|
| 1711 | + return &bpf_skc_to_tcp_sock_proto; |
---|
| 1712 | + case BPF_FUNC_skc_to_tcp_timewait_sock: |
---|
| 1713 | + return &bpf_skc_to_tcp_timewait_sock_proto; |
---|
| 1714 | + case BPF_FUNC_skc_to_tcp_request_sock: |
---|
| 1715 | + return &bpf_skc_to_tcp_request_sock_proto; |
---|
| 1716 | + case BPF_FUNC_skc_to_udp6_sock: |
---|
| 1717 | + return &bpf_skc_to_udp6_sock_proto; |
---|
| 1718 | +#endif |
---|
| 1719 | + case BPF_FUNC_seq_printf: |
---|
| 1720 | + return prog->expected_attach_type == BPF_TRACE_ITER ? |
---|
| 1721 | + &bpf_seq_printf_proto : |
---|
| 1722 | + NULL; |
---|
| 1723 | + case BPF_FUNC_seq_write: |
---|
| 1724 | + return prog->expected_attach_type == BPF_TRACE_ITER ? |
---|
| 1725 | + &bpf_seq_write_proto : |
---|
| 1726 | + NULL; |
---|
| 1727 | + case BPF_FUNC_seq_printf_btf: |
---|
| 1728 | + return prog->expected_attach_type == BPF_TRACE_ITER ? |
---|
| 1729 | + &bpf_seq_printf_btf_proto : |
---|
| 1730 | + NULL; |
---|
| 1731 | + case BPF_FUNC_d_path: |
---|
| 1732 | + return &bpf_d_path_proto; |
---|
| 1733 | + default: |
---|
| 1734 | + return raw_tp_prog_func_proto(func_id, prog); |
---|
920 | 1735 | } |
---|
921 | 1736 | } |
---|
922 | 1737 | |
---|
.. | .. |
---|
925 | 1740 | const struct bpf_prog *prog, |
---|
926 | 1741 | struct bpf_insn_access_aux *info) |
---|
927 | 1742 | { |
---|
928 | | - /* largest tracepoint in the kernel has 12 args */ |
---|
929 | | - if (off < 0 || off >= sizeof(__u64) * 12) |
---|
| 1743 | + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) |
---|
930 | 1744 | return false; |
---|
931 | 1745 | if (type != BPF_READ) |
---|
932 | 1746 | return false; |
---|
.. | .. |
---|
935 | 1749 | return true; |
---|
936 | 1750 | } |
---|
937 | 1751 | |
---|
| 1752 | +static bool tracing_prog_is_valid_access(int off, int size, |
---|
| 1753 | + enum bpf_access_type type, |
---|
| 1754 | + const struct bpf_prog *prog, |
---|
| 1755 | + struct bpf_insn_access_aux *info) |
---|
| 1756 | +{ |
---|
| 1757 | + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) |
---|
| 1758 | + return false; |
---|
| 1759 | + if (type != BPF_READ) |
---|
| 1760 | + return false; |
---|
| 1761 | + if (off % size != 0) |
---|
| 1762 | + return false; |
---|
| 1763 | + return btf_ctx_access(off, size, type, prog, info); |
---|
| 1764 | +} |
---|
| 1765 | + |
---|
| 1766 | +int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, |
---|
| 1767 | + const union bpf_attr *kattr, |
---|
| 1768 | + union bpf_attr __user *uattr) |
---|
| 1769 | +{ |
---|
| 1770 | + return -ENOTSUPP; |
---|
| 1771 | +} |
---|
| 1772 | + |
---|
938 | 1773 | const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { |
---|
939 | 1774 | .get_func_proto = raw_tp_prog_func_proto, |
---|
940 | 1775 | .is_valid_access = raw_tp_prog_is_valid_access, |
---|
941 | 1776 | }; |
---|
942 | 1777 | |
---|
943 | 1778 | const struct bpf_prog_ops raw_tracepoint_prog_ops = { |
---|
| 1779 | +#ifdef CONFIG_NET |
---|
| 1780 | + .test_run = bpf_prog_test_run_raw_tp, |
---|
| 1781 | +#endif |
---|
| 1782 | +}; |
---|
| 1783 | + |
---|
| 1784 | +const struct bpf_verifier_ops tracing_verifier_ops = { |
---|
| 1785 | + .get_func_proto = tracing_prog_func_proto, |
---|
| 1786 | + .is_valid_access = tracing_prog_is_valid_access, |
---|
| 1787 | +}; |
---|
| 1788 | + |
---|
| 1789 | +const struct bpf_prog_ops tracing_prog_ops = { |
---|
| 1790 | + .test_run = bpf_prog_test_run_tracing, |
---|
| 1791 | +}; |
---|
| 1792 | + |
---|
| 1793 | +static bool raw_tp_writable_prog_is_valid_access(int off, int size, |
---|
| 1794 | + enum bpf_access_type type, |
---|
| 1795 | + const struct bpf_prog *prog, |
---|
| 1796 | + struct bpf_insn_access_aux *info) |
---|
| 1797 | +{ |
---|
| 1798 | + if (off == 0) { |
---|
| 1799 | + if (size != sizeof(u64) || type != BPF_READ) |
---|
| 1800 | + return false; |
---|
| 1801 | + info->reg_type = PTR_TO_TP_BUFFER; |
---|
| 1802 | + } |
---|
| 1803 | + return raw_tp_prog_is_valid_access(off, size, type, prog, info); |
---|
| 1804 | +} |
---|
| 1805 | + |
---|
| 1806 | +const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { |
---|
| 1807 | + .get_func_proto = raw_tp_prog_func_proto, |
---|
| 1808 | + .is_valid_access = raw_tp_writable_prog_is_valid_access, |
---|
| 1809 | +}; |
---|
| 1810 | + |
---|
| 1811 | +const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { |
---|
944 | 1812 | }; |
---|
945 | 1813 | |
---|
946 | 1814 | static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, |
---|
.. | .. |
---|
1033 | 1901 | int perf_event_attach_bpf_prog(struct perf_event *event, |
---|
1034 | 1902 | struct bpf_prog *prog) |
---|
1035 | 1903 | { |
---|
1036 | | - struct bpf_prog_array __rcu *old_array; |
---|
| 1904 | + struct bpf_prog_array *old_array; |
---|
1037 | 1905 | struct bpf_prog_array *new_array; |
---|
1038 | 1906 | int ret = -EEXIST; |
---|
1039 | 1907 | |
---|
.. | .. |
---|
1051 | 1919 | if (event->prog) |
---|
1052 | 1920 | goto unlock; |
---|
1053 | 1921 | |
---|
1054 | | - old_array = event->tp_event->prog_array; |
---|
| 1922 | + old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); |
---|
1055 | 1923 | if (old_array && |
---|
1056 | 1924 | bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { |
---|
1057 | 1925 | ret = -E2BIG; |
---|
.. | .. |
---|
1074 | 1942 | |
---|
1075 | 1943 | void perf_event_detach_bpf_prog(struct perf_event *event) |
---|
1076 | 1944 | { |
---|
1077 | | - struct bpf_prog_array __rcu *old_array; |
---|
| 1945 | + struct bpf_prog_array *old_array; |
---|
1078 | 1946 | struct bpf_prog_array *new_array; |
---|
1079 | 1947 | int ret; |
---|
1080 | 1948 | |
---|
.. | .. |
---|
1083 | 1951 | if (!event->prog) |
---|
1084 | 1952 | goto unlock; |
---|
1085 | 1953 | |
---|
1086 | | - old_array = event->tp_event->prog_array; |
---|
| 1954 | + old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); |
---|
1087 | 1955 | ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); |
---|
1088 | 1956 | if (ret == -ENOENT) |
---|
1089 | 1957 | goto unlock; |
---|
.. | .. |
---|
1105 | 1973 | { |
---|
1106 | 1974 | struct perf_event_query_bpf __user *uquery = info; |
---|
1107 | 1975 | struct perf_event_query_bpf query = {}; |
---|
| 1976 | + struct bpf_prog_array *progs; |
---|
1108 | 1977 | u32 *ids, prog_cnt, ids_len; |
---|
1109 | 1978 | int ret; |
---|
1110 | 1979 | |
---|
1111 | | - if (!capable(CAP_SYS_ADMIN)) |
---|
| 1980 | + if (!perfmon_capable()) |
---|
1112 | 1981 | return -EPERM; |
---|
1113 | 1982 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
---|
1114 | 1983 | return -EINVAL; |
---|
.. | .. |
---|
1129 | 1998 | */ |
---|
1130 | 1999 | |
---|
1131 | 2000 | mutex_lock(&bpf_event_mutex); |
---|
1132 | | - ret = bpf_prog_array_copy_info(event->tp_event->prog_array, |
---|
1133 | | - ids, |
---|
1134 | | - ids_len, |
---|
1135 | | - &prog_cnt); |
---|
| 2001 | + progs = bpf_event_rcu_dereference(event->tp_event->prog_array); |
---|
| 2002 | + ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); |
---|
1136 | 2003 | mutex_unlock(&bpf_event_mutex); |
---|
1137 | 2004 | |
---|
1138 | 2005 | if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || |
---|
.. | .. |
---|
1146 | 2013 | extern struct bpf_raw_event_map __start__bpf_raw_tp[]; |
---|
1147 | 2014 | extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; |
---|
1148 | 2015 | |
---|
1149 | | -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name) |
---|
| 2016 | +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) |
---|
1150 | 2017 | { |
---|
1151 | 2018 | struct bpf_raw_event_map *btp = __start__bpf_raw_tp; |
---|
1152 | 2019 | |
---|
.. | .. |
---|
1154 | 2021 | if (!strcmp(btp->tp->name, name)) |
---|
1155 | 2022 | return btp; |
---|
1156 | 2023 | } |
---|
1157 | | - return NULL; |
---|
| 2024 | + |
---|
| 2025 | + return bpf_get_raw_tracepoint_module(name); |
---|
| 2026 | +} |
---|
| 2027 | + |
---|
| 2028 | +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) |
---|
| 2029 | +{ |
---|
| 2030 | + struct module *mod; |
---|
| 2031 | + |
---|
| 2032 | + preempt_disable(); |
---|
| 2033 | + mod = __module_address((unsigned long)btp); |
---|
| 2034 | + module_put(mod); |
---|
| 2035 | + preempt_enable(); |
---|
1158 | 2036 | } |
---|
1159 | 2037 | |
---|
1160 | 2038 | static __always_inline |
---|
1161 | 2039 | void __bpf_trace_run(struct bpf_prog *prog, u64 *args) |
---|
1162 | 2040 | { |
---|
| 2041 | + cant_sleep(); |
---|
1163 | 2042 | rcu_read_lock(); |
---|
1164 | | - preempt_disable(); |
---|
1165 | 2043 | (void) BPF_PROG_RUN(prog, args); |
---|
1166 | | - preempt_enable(); |
---|
1167 | 2044 | rcu_read_unlock(); |
---|
1168 | 2045 | } |
---|
1169 | 2046 | |
---|
.. | .. |
---|
1221 | 2098 | * available in this tracepoint |
---|
1222 | 2099 | */ |
---|
1223 | 2100 | if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) |
---|
| 2101 | + return -EINVAL; |
---|
| 2102 | + |
---|
| 2103 | + if (prog->aux->max_tp_access > btp->writable_size) |
---|
1224 | 2104 | return -EINVAL; |
---|
1225 | 2105 | |
---|
1226 | 2106 | return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, |
---|
.. | .. |
---|
1283 | 2163 | |
---|
1284 | 2164 | return err; |
---|
1285 | 2165 | } |
---|
| 2166 | + |
---|
| 2167 | +static int __init send_signal_irq_work_init(void) |
---|
| 2168 | +{ |
---|
| 2169 | + int cpu; |
---|
| 2170 | + struct send_signal_irq_work *work; |
---|
| 2171 | + |
---|
| 2172 | + for_each_possible_cpu(cpu) { |
---|
| 2173 | + work = per_cpu_ptr(&send_signal_work, cpu); |
---|
| 2174 | + init_irq_work(&work->irq_work, do_bpf_send_signal); |
---|
| 2175 | + } |
---|
| 2176 | + return 0; |
---|
| 2177 | +} |
---|
| 2178 | + |
---|
| 2179 | +subsys_initcall(send_signal_irq_work_init); |
---|
| 2180 | + |
---|
| 2181 | +#ifdef CONFIG_MODULES |
---|
| 2182 | +static int bpf_event_notify(struct notifier_block *nb, unsigned long op, |
---|
| 2183 | + void *module) |
---|
| 2184 | +{ |
---|
| 2185 | + struct bpf_trace_module *btm, *tmp; |
---|
| 2186 | + struct module *mod = module; |
---|
| 2187 | + int ret = 0; |
---|
| 2188 | + |
---|
| 2189 | + if (mod->num_bpf_raw_events == 0 || |
---|
| 2190 | + (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) |
---|
| 2191 | + goto out; |
---|
| 2192 | + |
---|
| 2193 | + mutex_lock(&bpf_module_mutex); |
---|
| 2194 | + |
---|
| 2195 | + switch (op) { |
---|
| 2196 | + case MODULE_STATE_COMING: |
---|
| 2197 | + btm = kzalloc(sizeof(*btm), GFP_KERNEL); |
---|
| 2198 | + if (btm) { |
---|
| 2199 | + btm->module = module; |
---|
| 2200 | + list_add(&btm->list, &bpf_trace_modules); |
---|
| 2201 | + } else { |
---|
| 2202 | + ret = -ENOMEM; |
---|
| 2203 | + } |
---|
| 2204 | + break; |
---|
| 2205 | + case MODULE_STATE_GOING: |
---|
| 2206 | + list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { |
---|
| 2207 | + if (btm->module == module) { |
---|
| 2208 | + list_del(&btm->list); |
---|
| 2209 | + kfree(btm); |
---|
| 2210 | + break; |
---|
| 2211 | + } |
---|
| 2212 | + } |
---|
| 2213 | + break; |
---|
| 2214 | + } |
---|
| 2215 | + |
---|
| 2216 | + mutex_unlock(&bpf_module_mutex); |
---|
| 2217 | + |
---|
| 2218 | +out: |
---|
| 2219 | + return notifier_from_errno(ret); |
---|
| 2220 | +} |
---|
| 2221 | + |
---|
| 2222 | +static struct notifier_block bpf_module_nb = { |
---|
| 2223 | + .notifier_call = bpf_event_notify, |
---|
| 2224 | +}; |
---|
| 2225 | + |
---|
| 2226 | +static int __init bpf_event_init(void) |
---|
| 2227 | +{ |
---|
| 2228 | + register_module_notifier(&bpf_module_nb); |
---|
| 2229 | + return 0; |
---|
| 2230 | +} |
---|
| 2231 | + |
---|
| 2232 | +fs_initcall(bpf_event_init); |
---|
| 2233 | +#endif /* CONFIG_MODULES */ |
---|