.. | .. |
---|
13 | 13 | * Mode 2 allows user-defined system call filters in the form |
---|
14 | 14 | * of Berkeley Packet Filters/Linux Socket Filters. |
---|
15 | 15 | */ |
---|
| 16 | +#define pr_fmt(fmt) "seccomp: " fmt |
---|
16 | 17 | |
---|
17 | 18 | #include <linux/refcount.h> |
---|
18 | 19 | #include <linux/audit.h> |
---|
.. | .. |
---|
28 | 29 | #include <linux/syscalls.h> |
---|
29 | 30 | #include <linux/sysctl.h> |
---|
30 | 31 | |
---|
31 | | -/* Not exposed in headers: strictly internal use only. */ |
---|
32 | | -#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1) |
---|
33 | | - |
---|
34 | 32 | #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER |
---|
35 | 33 | #include <asm/syscall.h> |
---|
36 | 34 | #endif |
---|
37 | 35 | |
---|
38 | 36 | #ifdef CONFIG_SECCOMP_FILTER |
---|
| 37 | +#include <linux/file.h> |
---|
39 | 38 | #include <linux/filter.h> |
---|
40 | 39 | #include <linux/pid.h> |
---|
41 | 40 | #include <linux/ptrace.h> |
---|
42 | 41 | #include <linux/capability.h> |
---|
43 | 42 | #include <linux/tracehook.h> |
---|
44 | 43 | #include <linux/uaccess.h> |
---|
| 44 | +#include <linux/anon_inodes.h> |
---|
| 45 | +#include <linux/lockdep.h> |
---|
| 46 | + |
---|
| 47 | +/* |
---|
| 48 | + * When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced, it had the |
---|
| 49 | + * wrong direction flag in the ioctl number. This is the broken one, |
---|
| 50 | + * which the kernel needs to keep supporting until all userspaces stop |
---|
| 51 | + * using the wrong command number. |
---|
| 52 | + */ |
---|
| 53 | +#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64) |
---|
| 54 | + |
---|
| 55 | +enum notify_state { |
---|
| 56 | + SECCOMP_NOTIFY_INIT, |
---|
| 57 | + SECCOMP_NOTIFY_SENT, |
---|
| 58 | + SECCOMP_NOTIFY_REPLIED, |
---|
| 59 | +}; |
---|
| 60 | + |
---|
| 61 | +struct seccomp_knotif { |
---|
| 62 | + /* The struct pid of the task whose filter triggered the notification */ |
---|
| 63 | + struct task_struct *task; |
---|
| 64 | + |
---|
| 65 | + /* The "cookie" for this request; this is unique for this filter. */ |
---|
| 66 | + u64 id; |
---|
| 67 | + |
---|
| 68 | + /* |
---|
| 69 | + * The seccomp data. This pointer is valid the entire time this |
---|
| 70 | + * notification is active, since it comes from __seccomp_filter which |
---|
| 71 | + * eclipses the entire lifecycle here. |
---|
| 72 | + */ |
---|
| 73 | + const struct seccomp_data *data; |
---|
| 74 | + |
---|
| 75 | + /* |
---|
| 76 | + * Notification states. When SECCOMP_RET_USER_NOTIF is returned, a |
---|
| 77 | + * struct seccomp_knotif is created and starts out in INIT. Once the |
---|
| 78 | + * handler reads the notification off of an FD, it transitions to SENT. |
---|
| 79 | + * If a signal is received the state transitions back to INIT and |
---|
| 80 | + * another message is sent. When the userspace handler replies, state |
---|
| 81 | + * transitions to REPLIED. |
---|
| 82 | + */ |
---|
| 83 | + enum notify_state state; |
---|
| 84 | + |
---|
| 85 | + /* The return values, only valid when in SECCOMP_NOTIFY_REPLIED */ |
---|
| 86 | + int error; |
---|
| 87 | + long val; |
---|
| 88 | + u32 flags; |
---|
| 89 | + |
---|
| 90 | + /* |
---|
| 91 | + * Signals when this has changed states, such as the listener |
---|
| 92 | + * dying, a new seccomp addfd message, or changing to REPLIED |
---|
| 93 | + */ |
---|
| 94 | + struct completion ready; |
---|
| 95 | + |
---|
| 96 | + struct list_head list; |
---|
| 97 | + |
---|
| 98 | + /* outstanding addfd requests */ |
---|
| 99 | + struct list_head addfd; |
---|
| 100 | +}; |
---|
| 101 | + |
---|
| 102 | +/** |
---|
| 103 | + * struct seccomp_kaddfd - container for seccomp_addfd ioctl messages |
---|
| 104 | + * |
---|
| 105 | + * @file: A reference to the file to install in the other task |
---|
| 106 | + * @fd: The fd number to install it at. If the fd number is -1, it means the |
---|
| 107 | + * installing process should allocate the fd as normal. |
---|
| 108 | + * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC |
---|
| 109 | + * is allowed. |
---|
| 110 | + * @ret: The return value of the installing process. It is set to the fd num |
---|
| 111 | + * upon success (>= 0). |
---|
| 112 | + * @completion: Indicates that the installing process has completed fd |
---|
| 113 | + * installation, or gone away (either due to successful |
---|
| 114 | + * reply, or signal) |
---|
| 115 | + * |
---|
| 116 | + */ |
---|
| 117 | +struct seccomp_kaddfd { |
---|
| 118 | + struct file *file; |
---|
| 119 | + int fd; |
---|
| 120 | + unsigned int flags; |
---|
| 121 | + |
---|
| 122 | + /* To only be set on reply */ |
---|
| 123 | + int ret; |
---|
| 124 | + struct completion completion; |
---|
| 125 | + struct list_head list; |
---|
| 126 | +}; |
---|
| 127 | + |
---|
| 128 | +/** |
---|
| 129 | + * struct notification - container for seccomp userspace notifications. Since |
---|
| 130 | + * most seccomp filters will not have notification listeners attached and this |
---|
| 131 | + * structure is fairly large, we store the notification-specific stuff in a |
---|
| 132 | + * separate structure. |
---|
| 133 | + * |
---|
| 134 | + * @request: A semaphore that users of this notification can wait on for |
---|
| 135 | + * changes. Actual reads and writes are still controlled with |
---|
| 136 | + * filter->notify_lock. |
---|
| 137 | + * @next_id: The id of the next request. |
---|
| 138 | + * @notifications: A list of struct seccomp_knotif elements. |
---|
| 139 | + */ |
---|
| 140 | +struct notification { |
---|
| 141 | + struct semaphore request; |
---|
| 142 | + u64 next_id; |
---|
| 143 | + struct list_head notifications; |
---|
| 144 | +}; |
---|
| 145 | + |
---|
| 146 | +#ifdef SECCOMP_ARCH_NATIVE |
---|
| 147 | +/** |
---|
| 148 | + * struct action_cache - per-filter cache of seccomp actions per |
---|
| 149 | + * arch/syscall pair |
---|
| 150 | + * |
---|
| 151 | + * @allow_native: A bitmap where each bit represents whether the |
---|
| 152 | + * filter will always allow the syscall, for the |
---|
| 153 | + * native architecture. |
---|
| 154 | + * @allow_compat: A bitmap where each bit represents whether the |
---|
| 155 | + * filter will always allow the syscall, for the |
---|
| 156 | + * compat architecture. |
---|
| 157 | + */ |
---|
| 158 | +struct action_cache { |
---|
| 159 | + DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR); |
---|
| 160 | +#ifdef SECCOMP_ARCH_COMPAT |
---|
| 161 | + DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR); |
---|
| 162 | +#endif |
---|
| 163 | +}; |
---|
| 164 | +#else |
---|
| 165 | +struct action_cache { }; |
---|
| 166 | + |
---|
| 167 | +static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter, |
---|
| 168 | + const struct seccomp_data *sd) |
---|
| 169 | +{ |
---|
| 170 | + return false; |
---|
| 171 | +} |
---|
| 172 | + |
---|
| 173 | +static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter) |
---|
| 174 | +{ |
---|
| 175 | +} |
---|
| 176 | +#endif /* SECCOMP_ARCH_NATIVE */ |
---|
45 | 177 | |
---|
46 | 178 | /** |
---|
47 | 179 | * struct seccomp_filter - container for seccomp BPF programs |
---|
48 | 180 | * |
---|
49 | | - * @usage: reference count to manage the object lifetime. |
---|
50 | | - * get/put helpers should be used when accessing an instance |
---|
51 | | - * outside of a lifetime-guarded section. In general, this |
---|
52 | | - * is only needed for handling filters shared across tasks. |
---|
| 181 | + * @refs: Reference count to manage the object lifetime. |
---|
| 182 | + * A filter's reference count is incremented for each directly |
---|
| 183 | + * attached task, once for the dependent filter, and if |
---|
| 184 | + * requested for the user notifier. When @refs reaches zero, |
---|
| 185 | + * the filter can be freed. |
---|
| 186 | + * @users: A filter's @users count is incremented for each directly |
---|
| 187 | + * attached task (filter installation, fork(), thread_sync), |
---|
| 188 | + * and once for the dependent filter (tracked in filter->prev). |
---|
| 189 | + * When it reaches zero it indicates that no direct or indirect |
---|
| 190 | + * users of that filter exist. No new tasks can get associated with |
---|
| 191 | + * this filter after reaching 0. The @users count is always smaller |
---|
| 192 | + * or equal to @refs. Hence, reaching 0 for @users does not mean |
---|
| 193 | + * the filter can be freed. |
---|
| 194 | + * @cache: cache of arch/syscall mappings to actions |
---|
53 | 195 | * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged |
---|
54 | 196 | * @prev: points to a previously installed, or inherited, filter |
---|
55 | 197 | * @prog: the BPF program to evaluate |
---|
| 198 | + * @notif: the struct that holds all notification related information |
---|
| 199 | + * @notify_lock: A lock for all notification-related accesses. |
---|
| 200 | + * @wqh: A wait queue for poll if a notifier is in use. |
---|
56 | 201 | * |
---|
57 | 202 | * seccomp_filter objects are organized in a tree linked via the @prev |
---|
58 | 203 | * pointer. For any task, it appears to be a singly-linked list starting |
---|
.. | .. |
---|
62 | 207 | * how namespaces work. |
---|
63 | 208 | * |
---|
64 | 209 | * seccomp_filter objects should never be modified after being attached |
---|
65 | | - * to a task_struct (other than @usage). |
---|
| 210 | + * to a task_struct (other than @refs). |
---|
66 | 211 | */ |
---|
67 | 212 | struct seccomp_filter { |
---|
68 | | - refcount_t usage; |
---|
| 213 | + refcount_t refs; |
---|
| 214 | + refcount_t users; |
---|
69 | 215 | bool log; |
---|
| 216 | + struct action_cache cache; |
---|
70 | 217 | struct seccomp_filter *prev; |
---|
71 | 218 | struct bpf_prog *prog; |
---|
| 219 | + struct notification *notif; |
---|
| 220 | + struct mutex notify_lock; |
---|
| 221 | + wait_queue_head_t wqh; |
---|
72 | 222 | }; |
---|
73 | 223 | |
---|
74 | 224 | /* Limit any path through the tree to 256KB worth of instructions. */ |
---|
.. | .. |
---|
80 | 230 | */ |
---|
81 | 231 | static void populate_seccomp_data(struct seccomp_data *sd) |
---|
82 | 232 | { |
---|
| 233 | + /* |
---|
| 234 | + * Instead of using current_pt_reg(), we're already doing the work |
---|
| 235 | + * to safely fetch "current", so just use "task" everywhere below. |
---|
| 236 | + */ |
---|
83 | 237 | struct task_struct *task = current; |
---|
84 | 238 | struct pt_regs *regs = task_pt_regs(task); |
---|
85 | 239 | unsigned long args[6]; |
---|
86 | 240 | |
---|
87 | 241 | sd->nr = syscall_get_nr(task, regs); |
---|
88 | | - sd->arch = syscall_get_arch(); |
---|
89 | | - syscall_get_arguments(task, regs, 0, 6, args); |
---|
| 242 | + sd->arch = syscall_get_arch(task); |
---|
| 243 | + syscall_get_arguments(task, regs, args); |
---|
90 | 244 | sd->args[0] = args[0]; |
---|
91 | 245 | sd->args[1] = args[1]; |
---|
92 | 246 | sd->args[2] = args[2]; |
---|
.. | .. |
---|
178 | 332 | return 0; |
---|
179 | 333 | } |
---|
180 | 334 | |
---|
| 335 | +#ifdef SECCOMP_ARCH_NATIVE |
---|
| 336 | +static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap, |
---|
| 337 | + size_t bitmap_size, |
---|
| 338 | + int syscall_nr) |
---|
| 339 | +{ |
---|
| 340 | + if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size)) |
---|
| 341 | + return false; |
---|
| 342 | + syscall_nr = array_index_nospec(syscall_nr, bitmap_size); |
---|
| 343 | + |
---|
| 344 | + return test_bit(syscall_nr, bitmap); |
---|
| 345 | +} |
---|
| 346 | + |
---|
| 347 | +/** |
---|
| 348 | + * seccomp_cache_check_allow - lookup seccomp cache |
---|
| 349 | + * @sfilter: The seccomp filter |
---|
| 350 | + * @sd: The seccomp data to lookup the cache with |
---|
| 351 | + * |
---|
| 352 | + * Returns true if the seccomp_data is cached and allowed. |
---|
| 353 | + */ |
---|
| 354 | +static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter, |
---|
| 355 | + const struct seccomp_data *sd) |
---|
| 356 | +{ |
---|
| 357 | + int syscall_nr = sd->nr; |
---|
| 358 | + const struct action_cache *cache = &sfilter->cache; |
---|
| 359 | + |
---|
| 360 | +#ifndef SECCOMP_ARCH_COMPAT |
---|
| 361 | + /* A native-only architecture doesn't need to check sd->arch. */ |
---|
| 362 | + return seccomp_cache_check_allow_bitmap(cache->allow_native, |
---|
| 363 | + SECCOMP_ARCH_NATIVE_NR, |
---|
| 364 | + syscall_nr); |
---|
| 365 | +#else |
---|
| 366 | + if (likely(sd->arch == SECCOMP_ARCH_NATIVE)) |
---|
| 367 | + return seccomp_cache_check_allow_bitmap(cache->allow_native, |
---|
| 368 | + SECCOMP_ARCH_NATIVE_NR, |
---|
| 369 | + syscall_nr); |
---|
| 370 | + if (likely(sd->arch == SECCOMP_ARCH_COMPAT)) |
---|
| 371 | + return seccomp_cache_check_allow_bitmap(cache->allow_compat, |
---|
| 372 | + SECCOMP_ARCH_COMPAT_NR, |
---|
| 373 | + syscall_nr); |
---|
| 374 | +#endif /* SECCOMP_ARCH_COMPAT */ |
---|
| 375 | + |
---|
| 376 | + WARN_ON_ONCE(true); |
---|
| 377 | + return false; |
---|
| 378 | +} |
---|
| 379 | +#endif /* SECCOMP_ARCH_NATIVE */ |
---|
| 380 | + |
---|
181 | 381 | /** |
---|
182 | 382 | * seccomp_run_filters - evaluates all seccomp filters against @sd |
---|
183 | 383 | * @sd: optional seccomp data to be passed to filters |
---|
.. | .. |
---|
191 | 391 | static u32 seccomp_run_filters(const struct seccomp_data *sd, |
---|
192 | 392 | struct seccomp_filter **match) |
---|
193 | 393 | { |
---|
194 | | - struct seccomp_data sd_local; |
---|
195 | 394 | u32 ret = SECCOMP_RET_ALLOW; |
---|
196 | 395 | /* Make sure cross-thread synced filter points somewhere sane. */ |
---|
197 | 396 | struct seccomp_filter *f = |
---|
198 | 397 | READ_ONCE(current->seccomp.filter); |
---|
199 | 398 | |
---|
200 | 399 | /* Ensure unexpected behavior doesn't result in failing open. */ |
---|
201 | | - if (unlikely(WARN_ON(f == NULL))) |
---|
| 400 | + if (WARN_ON(f == NULL)) |
---|
202 | 401 | return SECCOMP_RET_KILL_PROCESS; |
---|
203 | 402 | |
---|
204 | | - if (!sd) { |
---|
205 | | - populate_seccomp_data(&sd_local); |
---|
206 | | - sd = &sd_local; |
---|
207 | | - } |
---|
| 403 | + if (seccomp_cache_check_allow(f, sd)) |
---|
| 404 | + return SECCOMP_RET_ALLOW; |
---|
208 | 405 | |
---|
209 | 406 | /* |
---|
210 | 407 | * All filters in the list are evaluated and the lowest BPF return |
---|
211 | 408 | * value always takes priority (ignoring the DATA). |
---|
212 | 409 | */ |
---|
213 | 410 | for (; f; f = f->prev) { |
---|
214 | | - u32 cur_ret = BPF_PROG_RUN(f->prog, sd); |
---|
| 411 | + u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd); |
---|
215 | 412 | |
---|
216 | 413 | if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) { |
---|
217 | 414 | ret = cur_ret; |
---|
.. | .. |
---|
272 | 469 | * Expects sighand and cred_guard_mutex locks to be held. |
---|
273 | 470 | * |
---|
274 | 471 | * Returns 0 on success, -ve on error, or the pid of a thread which was |
---|
275 | | - * either not in the correct seccomp mode or it did not have an ancestral |
---|
| 472 | + * either not in the correct seccomp mode or did not have an ancestral |
---|
276 | 473 | * seccomp filter. |
---|
277 | 474 | */ |
---|
278 | 475 | static inline pid_t seccomp_can_sync_threads(void) |
---|
.. | .. |
---|
300 | 497 | /* Return the first thread that cannot be synchronized. */ |
---|
301 | 498 | failed = task_pid_vnr(thread); |
---|
302 | 499 | /* If the pid cannot be resolved, then return -ESRCH */ |
---|
303 | | - if (unlikely(WARN_ON(failed == 0))) |
---|
| 500 | + if (WARN_ON(failed == 0)) |
---|
304 | 501 | failed = -ESRCH; |
---|
305 | 502 | return failed; |
---|
306 | 503 | } |
---|
307 | 504 | |
---|
308 | 505 | return 0; |
---|
| 506 | +} |
---|
| 507 | + |
---|
| 508 | +static inline void seccomp_filter_free(struct seccomp_filter *filter) |
---|
| 509 | +{ |
---|
| 510 | + if (filter) { |
---|
| 511 | + bpf_prog_destroy(filter->prog); |
---|
| 512 | + kfree(filter); |
---|
| 513 | + } |
---|
| 514 | +} |
---|
| 515 | + |
---|
| 516 | +static void __seccomp_filter_orphan(struct seccomp_filter *orig) |
---|
| 517 | +{ |
---|
| 518 | + while (orig && refcount_dec_and_test(&orig->users)) { |
---|
| 519 | + if (waitqueue_active(&orig->wqh)) |
---|
| 520 | + wake_up_poll(&orig->wqh, EPOLLHUP); |
---|
| 521 | + orig = orig->prev; |
---|
| 522 | + } |
---|
| 523 | +} |
---|
| 524 | + |
---|
| 525 | +static void __put_seccomp_filter(struct seccomp_filter *orig) |
---|
| 526 | +{ |
---|
| 527 | + /* Clean up single-reference branches iteratively. */ |
---|
| 528 | + while (orig && refcount_dec_and_test(&orig->refs)) { |
---|
| 529 | + struct seccomp_filter *freeme = orig; |
---|
| 530 | + orig = orig->prev; |
---|
| 531 | + seccomp_filter_free(freeme); |
---|
| 532 | + } |
---|
| 533 | +} |
---|
| 534 | + |
---|
| 535 | +static void __seccomp_filter_release(struct seccomp_filter *orig) |
---|
| 536 | +{ |
---|
| 537 | + /* Notify about any unused filters in the task's former filter tree. */ |
---|
| 538 | + __seccomp_filter_orphan(orig); |
---|
| 539 | + /* Finally drop all references to the task's former tree. */ |
---|
| 540 | + __put_seccomp_filter(orig); |
---|
| 541 | +} |
---|
| 542 | + |
---|
| 543 | +/** |
---|
| 544 | + * seccomp_filter_release - Detach the task from its filter tree, |
---|
| 545 | + * drop its reference count, and notify |
---|
| 546 | + * about unused filters |
---|
| 547 | + * |
---|
| 548 | + * This function should only be called when the task is exiting as |
---|
| 549 | + * it detaches it from its filter tree. As such, READ_ONCE() and |
---|
| 550 | + * barriers are not needed here, as would normally be needed. |
---|
| 551 | + */ |
---|
| 552 | +void seccomp_filter_release(struct task_struct *tsk) |
---|
| 553 | +{ |
---|
| 554 | + struct seccomp_filter *orig = tsk->seccomp.filter; |
---|
| 555 | + |
---|
| 556 | + /* Detach task from its filter tree. */ |
---|
| 557 | + tsk->seccomp.filter = NULL; |
---|
| 558 | + __seccomp_filter_release(orig); |
---|
309 | 559 | } |
---|
310 | 560 | |
---|
311 | 561 | /** |
---|
.. | .. |
---|
332 | 582 | |
---|
333 | 583 | /* Get a task reference for the new leaf node. */ |
---|
334 | 584 | get_seccomp_filter(caller); |
---|
| 585 | + |
---|
335 | 586 | /* |
---|
336 | 587 | * Drop the task reference to the shared ancestor since |
---|
337 | 588 | * current's path will hold a reference. (This also |
---|
338 | 589 | * allows a put before the assignment.) |
---|
339 | 590 | */ |
---|
340 | | - put_seccomp_filter(thread); |
---|
| 591 | + __seccomp_filter_release(thread->seccomp.filter); |
---|
| 592 | + |
---|
| 593 | + /* Make our new filter tree visible. */ |
---|
341 | 594 | smp_store_release(&thread->seccomp.filter, |
---|
342 | 595 | caller->seccomp.filter); |
---|
| 596 | + atomic_set(&thread->seccomp.filter_count, |
---|
| 597 | + atomic_read(&caller->seccomp.filter_count)); |
---|
343 | 598 | |
---|
344 | 599 | /* |
---|
345 | 600 | * Don't let an unprivileged task work around |
---|
.. | .. |
---|
372 | 627 | { |
---|
373 | 628 | struct seccomp_filter *sfilter; |
---|
374 | 629 | int ret; |
---|
375 | | - const bool save_orig = IS_ENABLED(CONFIG_CHECKPOINT_RESTORE); |
---|
| 630 | + const bool save_orig = |
---|
| 631 | +#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE) |
---|
| 632 | + true; |
---|
| 633 | +#else |
---|
| 634 | + false; |
---|
| 635 | +#endif |
---|
376 | 636 | |
---|
377 | 637 | if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) |
---|
378 | 638 | return ERR_PTR(-EINVAL); |
---|
.. | .. |
---|
394 | 654 | if (!sfilter) |
---|
395 | 655 | return ERR_PTR(-ENOMEM); |
---|
396 | 656 | |
---|
| 657 | + mutex_init(&sfilter->notify_lock); |
---|
397 | 658 | ret = bpf_prog_create_from_user(&sfilter->prog, fprog, |
---|
398 | 659 | seccomp_check_filter, save_orig); |
---|
399 | 660 | if (ret < 0) { |
---|
.. | .. |
---|
401 | 662 | return ERR_PTR(ret); |
---|
402 | 663 | } |
---|
403 | 664 | |
---|
404 | | - refcount_set(&sfilter->usage, 1); |
---|
| 665 | + refcount_set(&sfilter->refs, 1); |
---|
| 666 | + refcount_set(&sfilter->users, 1); |
---|
| 667 | + init_waitqueue_head(&sfilter->wqh); |
---|
405 | 668 | |
---|
406 | 669 | return sfilter; |
---|
407 | 670 | } |
---|
.. | .. |
---|
434 | 697 | return filter; |
---|
435 | 698 | } |
---|
436 | 699 | |
---|
| 700 | +#ifdef SECCOMP_ARCH_NATIVE |
---|
| 701 | +/** |
---|
| 702 | + * seccomp_is_const_allow - check if filter is constant allow with given data |
---|
| 703 | + * @fprog: The BPF programs |
---|
| 704 | + * @sd: The seccomp data to check against, only syscall number and arch |
---|
| 705 | + * number are considered constant. |
---|
| 706 | + */ |
---|
| 707 | +static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog, |
---|
| 708 | + struct seccomp_data *sd) |
---|
| 709 | +{ |
---|
| 710 | + unsigned int reg_value = 0; |
---|
| 711 | + unsigned int pc; |
---|
| 712 | + bool op_res; |
---|
| 713 | + |
---|
| 714 | + if (WARN_ON_ONCE(!fprog)) |
---|
| 715 | + return false; |
---|
| 716 | + |
---|
| 717 | + for (pc = 0; pc < fprog->len; pc++) { |
---|
| 718 | + struct sock_filter *insn = &fprog->filter[pc]; |
---|
| 719 | + u16 code = insn->code; |
---|
| 720 | + u32 k = insn->k; |
---|
| 721 | + |
---|
| 722 | + switch (code) { |
---|
| 723 | + case BPF_LD | BPF_W | BPF_ABS: |
---|
| 724 | + switch (k) { |
---|
| 725 | + case offsetof(struct seccomp_data, nr): |
---|
| 726 | + reg_value = sd->nr; |
---|
| 727 | + break; |
---|
| 728 | + case offsetof(struct seccomp_data, arch): |
---|
| 729 | + reg_value = sd->arch; |
---|
| 730 | + break; |
---|
| 731 | + default: |
---|
| 732 | + /* can't optimize (non-constant value load) */ |
---|
| 733 | + return false; |
---|
| 734 | + } |
---|
| 735 | + break; |
---|
| 736 | + case BPF_RET | BPF_K: |
---|
| 737 | + /* reached return with constant values only, check allow */ |
---|
| 738 | + return k == SECCOMP_RET_ALLOW; |
---|
| 739 | + case BPF_JMP | BPF_JA: |
---|
| 740 | + pc += insn->k; |
---|
| 741 | + break; |
---|
| 742 | + case BPF_JMP | BPF_JEQ | BPF_K: |
---|
| 743 | + case BPF_JMP | BPF_JGE | BPF_K: |
---|
| 744 | + case BPF_JMP | BPF_JGT | BPF_K: |
---|
| 745 | + case BPF_JMP | BPF_JSET | BPF_K: |
---|
| 746 | + switch (BPF_OP(code)) { |
---|
| 747 | + case BPF_JEQ: |
---|
| 748 | + op_res = reg_value == k; |
---|
| 749 | + break; |
---|
| 750 | + case BPF_JGE: |
---|
| 751 | + op_res = reg_value >= k; |
---|
| 752 | + break; |
---|
| 753 | + case BPF_JGT: |
---|
| 754 | + op_res = reg_value > k; |
---|
| 755 | + break; |
---|
| 756 | + case BPF_JSET: |
---|
| 757 | + op_res = !!(reg_value & k); |
---|
| 758 | + break; |
---|
| 759 | + default: |
---|
| 760 | + /* can't optimize (unknown jump) */ |
---|
| 761 | + return false; |
---|
| 762 | + } |
---|
| 763 | + |
---|
| 764 | + pc += op_res ? insn->jt : insn->jf; |
---|
| 765 | + break; |
---|
| 766 | + case BPF_ALU | BPF_AND | BPF_K: |
---|
| 767 | + reg_value &= k; |
---|
| 768 | + break; |
---|
| 769 | + default: |
---|
| 770 | + /* can't optimize (unknown insn) */ |
---|
| 771 | + return false; |
---|
| 772 | + } |
---|
| 773 | + } |
---|
| 774 | + |
---|
| 775 | + /* ran off the end of the filter?! */ |
---|
| 776 | + WARN_ON(1); |
---|
| 777 | + return false; |
---|
| 778 | +} |
---|
| 779 | + |
---|
| 780 | +static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter, |
---|
| 781 | + void *bitmap, const void *bitmap_prev, |
---|
| 782 | + size_t bitmap_size, int arch) |
---|
| 783 | +{ |
---|
| 784 | + struct sock_fprog_kern *fprog = sfilter->prog->orig_prog; |
---|
| 785 | + struct seccomp_data sd; |
---|
| 786 | + int nr; |
---|
| 787 | + |
---|
| 788 | + if (bitmap_prev) { |
---|
| 789 | + /* The new filter must be as restrictive as the last. */ |
---|
| 790 | + bitmap_copy(bitmap, bitmap_prev, bitmap_size); |
---|
| 791 | + } else { |
---|
| 792 | + /* Before any filters, all syscalls are always allowed. */ |
---|
| 793 | + bitmap_fill(bitmap, bitmap_size); |
---|
| 794 | + } |
---|
| 795 | + |
---|
| 796 | + for (nr = 0; nr < bitmap_size; nr++) { |
---|
| 797 | + /* No bitmap change: not a cacheable action. */ |
---|
| 798 | + if (!test_bit(nr, bitmap)) |
---|
| 799 | + continue; |
---|
| 800 | + |
---|
| 801 | + sd.nr = nr; |
---|
| 802 | + sd.arch = arch; |
---|
| 803 | + |
---|
| 804 | + /* No bitmap change: continue to always allow. */ |
---|
| 805 | + if (seccomp_is_const_allow(fprog, &sd)) |
---|
| 806 | + continue; |
---|
| 807 | + |
---|
| 808 | + /* |
---|
| 809 | + * Not a cacheable action: always run filters. |
---|
| 810 | + * atomic clear_bit() not needed, filter not visible yet. |
---|
| 811 | + */ |
---|
| 812 | + __clear_bit(nr, bitmap); |
---|
| 813 | + } |
---|
| 814 | +} |
---|
| 815 | + |
---|
| 816 | +/** |
---|
| 817 | + * seccomp_cache_prepare - emulate the filter to find cachable syscalls |
---|
| 818 | + * @sfilter: The seccomp filter |
---|
| 819 | + * |
---|
| 820 | + * Returns 0 if successful or -errno if error occurred. |
---|
| 821 | + */ |
---|
| 822 | +static void seccomp_cache_prepare(struct seccomp_filter *sfilter) |
---|
| 823 | +{ |
---|
| 824 | + struct action_cache *cache = &sfilter->cache; |
---|
| 825 | + const struct action_cache *cache_prev = |
---|
| 826 | + sfilter->prev ? &sfilter->prev->cache : NULL; |
---|
| 827 | + |
---|
| 828 | + seccomp_cache_prepare_bitmap(sfilter, cache->allow_native, |
---|
| 829 | + cache_prev ? cache_prev->allow_native : NULL, |
---|
| 830 | + SECCOMP_ARCH_NATIVE_NR, |
---|
| 831 | + SECCOMP_ARCH_NATIVE); |
---|
| 832 | + |
---|
| 833 | +#ifdef SECCOMP_ARCH_COMPAT |
---|
| 834 | + seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat, |
---|
| 835 | + cache_prev ? cache_prev->allow_compat : NULL, |
---|
| 836 | + SECCOMP_ARCH_COMPAT_NR, |
---|
| 837 | + SECCOMP_ARCH_COMPAT); |
---|
| 838 | +#endif /* SECCOMP_ARCH_COMPAT */ |
---|
| 839 | +} |
---|
| 840 | +#endif /* SECCOMP_ARCH_NATIVE */ |
---|
| 841 | + |
---|
437 | 842 | /** |
---|
438 | 843 | * seccomp_attach_filter: validate and attach filter |
---|
439 | 844 | * @flags: flags to change filter behavior |
---|
.. | .. |
---|
441 | 846 | * |
---|
442 | 847 | * Caller must be holding current->sighand->siglock lock. |
---|
443 | 848 | * |
---|
444 | | - * Returns 0 on success, -ve on error. |
---|
| 849 | + * Returns 0 on success, -ve on error, or |
---|
| 850 | + * - in TSYNC mode: the pid of a thread which was either not in the correct |
---|
| 851 | + * seccomp mode or did not have an ancestral seccomp filter |
---|
| 852 | + * - in NEW_LISTENER mode: the fd of the new listener |
---|
445 | 853 | */ |
---|
446 | 854 | static long seccomp_attach_filter(unsigned int flags, |
---|
447 | 855 | struct seccomp_filter *filter) |
---|
.. | .. |
---|
463 | 871 | int ret; |
---|
464 | 872 | |
---|
465 | 873 | ret = seccomp_can_sync_threads(); |
---|
466 | | - if (ret) |
---|
467 | | - return ret; |
---|
| 874 | + if (ret) { |
---|
| 875 | + if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) |
---|
| 876 | + return -ESRCH; |
---|
| 877 | + else |
---|
| 878 | + return ret; |
---|
| 879 | + } |
---|
468 | 880 | } |
---|
469 | 881 | |
---|
470 | 882 | /* Set log flag, if present. */ |
---|
.. | .. |
---|
476 | 888 | * task reference. |
---|
477 | 889 | */ |
---|
478 | 890 | filter->prev = current->seccomp.filter; |
---|
| 891 | + seccomp_cache_prepare(filter); |
---|
479 | 892 | current->seccomp.filter = filter; |
---|
| 893 | + atomic_inc(¤t->seccomp.filter_count); |
---|
480 | 894 | |
---|
481 | 895 | /* Now that the new filter is in place, synchronize to all threads. */ |
---|
482 | 896 | if (flags & SECCOMP_FILTER_FLAG_TSYNC) |
---|
.. | .. |
---|
487 | 901 | |
---|
488 | 902 | static void __get_seccomp_filter(struct seccomp_filter *filter) |
---|
489 | 903 | { |
---|
490 | | - /* Reference count is bounded by the number of total processes. */ |
---|
491 | | - refcount_inc(&filter->usage); |
---|
| 904 | + refcount_inc(&filter->refs); |
---|
492 | 905 | } |
---|
493 | 906 | |
---|
494 | 907 | /* get_seccomp_filter - increments the reference count of the filter on @tsk */ |
---|
.. | .. |
---|
498 | 911 | if (!orig) |
---|
499 | 912 | return; |
---|
500 | 913 | __get_seccomp_filter(orig); |
---|
| 914 | + refcount_inc(&orig->users); |
---|
501 | 915 | } |
---|
502 | 916 | |
---|
503 | | -static inline void seccomp_filter_free(struct seccomp_filter *filter) |
---|
504 | | -{ |
---|
505 | | - if (filter) { |
---|
506 | | - bpf_prog_destroy(filter->prog); |
---|
507 | | - kfree(filter); |
---|
508 | | - } |
---|
509 | | -} |
---|
510 | | - |
---|
511 | | -static void __put_seccomp_filter(struct seccomp_filter *orig) |
---|
512 | | -{ |
---|
513 | | - /* Clean up single-reference branches iteratively. */ |
---|
514 | | - while (orig && refcount_dec_and_test(&orig->usage)) { |
---|
515 | | - struct seccomp_filter *freeme = orig; |
---|
516 | | - orig = orig->prev; |
---|
517 | | - seccomp_filter_free(freeme); |
---|
518 | | - } |
---|
519 | | -} |
---|
520 | | - |
---|
521 | | -/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ |
---|
522 | | -void put_seccomp_filter(struct task_struct *tsk) |
---|
523 | | -{ |
---|
524 | | - __put_seccomp_filter(tsk->seccomp.filter); |
---|
525 | | -} |
---|
526 | | - |
---|
527 | | -static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason) |
---|
| 917 | +static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason) |
---|
528 | 918 | { |
---|
529 | 919 | clear_siginfo(info); |
---|
530 | 920 | info->si_signo = SIGSYS; |
---|
531 | 921 | info->si_code = SYS_SECCOMP; |
---|
532 | 922 | info->si_call_addr = (void __user *)KSTK_EIP(current); |
---|
533 | 923 | info->si_errno = reason; |
---|
534 | | - info->si_arch = syscall_get_arch(); |
---|
| 924 | + info->si_arch = syscall_get_arch(current); |
---|
535 | 925 | info->si_syscall = syscall; |
---|
536 | 926 | } |
---|
537 | 927 | |
---|
.. | .. |
---|
544 | 934 | */ |
---|
545 | 935 | static void seccomp_send_sigsys(int syscall, int reason) |
---|
546 | 936 | { |
---|
547 | | - struct siginfo info; |
---|
| 937 | + struct kernel_siginfo info; |
---|
548 | 938 | seccomp_init_siginfo(&info, syscall, reason); |
---|
549 | | - force_sig_info(SIGSYS, &info, current); |
---|
| 939 | + force_sig_info(&info); |
---|
550 | 940 | } |
---|
551 | 941 | #endif /* CONFIG_SECCOMP_FILTER */ |
---|
552 | 942 | |
---|
.. | .. |
---|
558 | 948 | #define SECCOMP_LOG_TRACE (1 << 4) |
---|
559 | 949 | #define SECCOMP_LOG_LOG (1 << 5) |
---|
560 | 950 | #define SECCOMP_LOG_ALLOW (1 << 6) |
---|
| 951 | +#define SECCOMP_LOG_USER_NOTIF (1 << 7) |
---|
561 | 952 | |
---|
562 | 953 | static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS | |
---|
563 | 954 | SECCOMP_LOG_KILL_THREAD | |
---|
564 | 955 | SECCOMP_LOG_TRAP | |
---|
565 | 956 | SECCOMP_LOG_ERRNO | |
---|
| 957 | + SECCOMP_LOG_USER_NOTIF | |
---|
566 | 958 | SECCOMP_LOG_TRACE | |
---|
567 | 959 | SECCOMP_LOG_LOG; |
---|
568 | 960 | |
---|
.. | .. |
---|
582 | 974 | break; |
---|
583 | 975 | case SECCOMP_RET_TRACE: |
---|
584 | 976 | log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; |
---|
| 977 | + break; |
---|
| 978 | + case SECCOMP_RET_USER_NOTIF: |
---|
| 979 | + log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF; |
---|
585 | 980 | break; |
---|
586 | 981 | case SECCOMP_RET_LOG: |
---|
587 | 982 | log = seccomp_actions_logged & SECCOMP_LOG_LOG; |
---|
.. | .. |
---|
613 | 1008 | */ |
---|
614 | 1009 | static const int mode1_syscalls[] = { |
---|
615 | 1010 | __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn, |
---|
616 | | - 0, /* null terminated */ |
---|
| 1011 | + -1, /* negative terminated */ |
---|
617 | 1012 | }; |
---|
618 | 1013 | |
---|
619 | 1014 | static void __secure_computing_strict(int this_syscall) |
---|
620 | 1015 | { |
---|
621 | | - const int *syscall_whitelist = mode1_syscalls; |
---|
| 1016 | + const int *allowed_syscalls = mode1_syscalls; |
---|
622 | 1017 | #ifdef CONFIG_COMPAT |
---|
623 | 1018 | if (in_compat_syscall()) |
---|
624 | | - syscall_whitelist = get_compat_mode1_syscalls(); |
---|
| 1019 | + allowed_syscalls = get_compat_mode1_syscalls(); |
---|
625 | 1020 | #endif |
---|
626 | 1021 | do { |
---|
627 | | - if (*syscall_whitelist == this_syscall) |
---|
| 1022 | + if (*allowed_syscalls == this_syscall) |
---|
628 | 1023 | return; |
---|
629 | | - } while (*++syscall_whitelist); |
---|
| 1024 | + } while (*++allowed_syscalls != -1); |
---|
630 | 1025 | |
---|
631 | 1026 | #ifdef SECCOMP_DEBUG |
---|
632 | 1027 | dump_stack(); |
---|
633 | 1028 | #endif |
---|
634 | | - current->seccomp.mode = SECCOMP_MODE_DEAD; |
---|
635 | 1029 | seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); |
---|
636 | 1030 | do_exit(SIGKILL); |
---|
637 | 1031 | } |
---|
.. | .. |
---|
655 | 1049 | #else |
---|
656 | 1050 | |
---|
657 | 1051 | #ifdef CONFIG_SECCOMP_FILTER |
---|
| 1052 | +static u64 seccomp_next_notify_id(struct seccomp_filter *filter) |
---|
| 1053 | +{ |
---|
| 1054 | + /* |
---|
| 1055 | + * Note: overflow is ok here, the id just needs to be unique per |
---|
| 1056 | + * filter. |
---|
| 1057 | + */ |
---|
| 1058 | + lockdep_assert_held(&filter->notify_lock); |
---|
| 1059 | + return filter->notif->next_id++; |
---|
| 1060 | +} |
---|
| 1061 | + |
---|
| 1062 | +static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) |
---|
| 1063 | +{ |
---|
| 1064 | + /* |
---|
| 1065 | + * Remove the notification, and reset the list pointers, indicating |
---|
| 1066 | + * that it has been handled. |
---|
| 1067 | + */ |
---|
| 1068 | + list_del_init(&addfd->list); |
---|
| 1069 | + addfd->ret = receive_fd_replace(addfd->fd, addfd->file, addfd->flags); |
---|
| 1070 | + complete(&addfd->completion); |
---|
| 1071 | +} |
---|
| 1072 | + |
---|
| 1073 | +static int seccomp_do_user_notification(int this_syscall, |
---|
| 1074 | + struct seccomp_filter *match, |
---|
| 1075 | + const struct seccomp_data *sd) |
---|
| 1076 | +{ |
---|
| 1077 | + int err; |
---|
| 1078 | + u32 flags = 0; |
---|
| 1079 | + long ret = 0; |
---|
| 1080 | + struct seccomp_knotif n = {}; |
---|
| 1081 | + struct seccomp_kaddfd *addfd, *tmp; |
---|
| 1082 | + |
---|
| 1083 | + mutex_lock(&match->notify_lock); |
---|
| 1084 | + err = -ENOSYS; |
---|
| 1085 | + if (!match->notif) |
---|
| 1086 | + goto out; |
---|
| 1087 | + |
---|
| 1088 | + n.task = current; |
---|
| 1089 | + n.state = SECCOMP_NOTIFY_INIT; |
---|
| 1090 | + n.data = sd; |
---|
| 1091 | + n.id = seccomp_next_notify_id(match); |
---|
| 1092 | + init_completion(&n.ready); |
---|
| 1093 | + list_add(&n.list, &match->notif->notifications); |
---|
| 1094 | + INIT_LIST_HEAD(&n.addfd); |
---|
| 1095 | + |
---|
| 1096 | + up(&match->notif->request); |
---|
| 1097 | + wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); |
---|
| 1098 | + |
---|
| 1099 | + /* |
---|
| 1100 | + * This is where we wait for a reply from userspace. |
---|
| 1101 | + */ |
---|
| 1102 | + do { |
---|
| 1103 | + mutex_unlock(&match->notify_lock); |
---|
| 1104 | + err = wait_for_completion_interruptible(&n.ready); |
---|
| 1105 | + mutex_lock(&match->notify_lock); |
---|
| 1106 | + if (err != 0) |
---|
| 1107 | + goto interrupted; |
---|
| 1108 | + |
---|
| 1109 | + addfd = list_first_entry_or_null(&n.addfd, |
---|
| 1110 | + struct seccomp_kaddfd, list); |
---|
| 1111 | + /* Check if we were woken up by a addfd message */ |
---|
| 1112 | + if (addfd) |
---|
| 1113 | + seccomp_handle_addfd(addfd); |
---|
| 1114 | + |
---|
| 1115 | + } while (n.state != SECCOMP_NOTIFY_REPLIED); |
---|
| 1116 | + |
---|
| 1117 | + ret = n.val; |
---|
| 1118 | + err = n.error; |
---|
| 1119 | + flags = n.flags; |
---|
| 1120 | + |
---|
| 1121 | +interrupted: |
---|
| 1122 | + /* If there were any pending addfd calls, clear them out */ |
---|
| 1123 | + list_for_each_entry_safe(addfd, tmp, &n.addfd, list) { |
---|
| 1124 | + /* The process went away before we got a chance to handle it */ |
---|
| 1125 | + addfd->ret = -ESRCH; |
---|
| 1126 | + list_del_init(&addfd->list); |
---|
| 1127 | + complete(&addfd->completion); |
---|
| 1128 | + } |
---|
| 1129 | + |
---|
| 1130 | + /* |
---|
| 1131 | + * Note that it's possible the listener died in between the time when |
---|
| 1132 | + * we were notified of a response (or a signal) and when we were able to |
---|
| 1133 | + * re-acquire the lock, so only delete from the list if the |
---|
| 1134 | + * notification actually exists. |
---|
| 1135 | + * |
---|
| 1136 | + * Also note that this test is only valid because there's no way to |
---|
| 1137 | + * *reattach* to a notifier right now. If one is added, we'll need to |
---|
| 1138 | + * keep track of the notif itself and make sure they match here. |
---|
| 1139 | + */ |
---|
| 1140 | + if (match->notif) |
---|
| 1141 | + list_del(&n.list); |
---|
| 1142 | +out: |
---|
| 1143 | + mutex_unlock(&match->notify_lock); |
---|
| 1144 | + |
---|
| 1145 | + /* Userspace requests to continue the syscall. */ |
---|
| 1146 | + if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) |
---|
| 1147 | + return 0; |
---|
| 1148 | + |
---|
| 1149 | + syscall_set_return_value(current, current_pt_regs(), |
---|
| 1150 | + err, ret); |
---|
| 1151 | + return -1; |
---|
| 1152 | +} |
---|
| 1153 | + |
---|
658 | 1154 | static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, |
---|
659 | 1155 | const bool recheck_after_trace) |
---|
660 | 1156 | { |
---|
661 | 1157 | u32 filter_ret, action; |
---|
662 | 1158 | struct seccomp_filter *match = NULL; |
---|
663 | 1159 | int data; |
---|
| 1160 | + struct seccomp_data sd_local; |
---|
664 | 1161 | |
---|
665 | 1162 | /* |
---|
666 | 1163 | * Make sure that any changes to mode from another thread have |
---|
667 | 1164 | * been seen after TIF_SECCOMP was seen. |
---|
668 | 1165 | */ |
---|
669 | 1166 | rmb(); |
---|
| 1167 | + |
---|
| 1168 | + if (!sd) { |
---|
| 1169 | + populate_seccomp_data(&sd_local); |
---|
| 1170 | + sd = &sd_local; |
---|
| 1171 | + } |
---|
670 | 1172 | |
---|
671 | 1173 | filter_ret = seccomp_run_filters(sd, &match); |
---|
672 | 1174 | data = filter_ret & SECCOMP_RET_DATA; |
---|
.. | .. |
---|
677 | 1179 | /* Set low-order bits as an errno, capped at MAX_ERRNO. */ |
---|
678 | 1180 | if (data > MAX_ERRNO) |
---|
679 | 1181 | data = MAX_ERRNO; |
---|
680 | | - syscall_set_return_value(current, task_pt_regs(current), |
---|
| 1182 | + syscall_set_return_value(current, current_pt_regs(), |
---|
681 | 1183 | -data, 0); |
---|
682 | 1184 | goto skip; |
---|
683 | 1185 | |
---|
684 | 1186 | case SECCOMP_RET_TRAP: |
---|
685 | 1187 | /* Show the handler the original registers. */ |
---|
686 | | - syscall_rollback(current, task_pt_regs(current)); |
---|
| 1188 | + syscall_rollback(current, current_pt_regs()); |
---|
687 | 1189 | /* Let the filter pass back 16 bits of data. */ |
---|
688 | 1190 | seccomp_send_sigsys(this_syscall, data); |
---|
689 | 1191 | goto skip; |
---|
.. | .. |
---|
696 | 1198 | /* ENOSYS these calls if there is no tracer attached. */ |
---|
697 | 1199 | if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { |
---|
698 | 1200 | syscall_set_return_value(current, |
---|
699 | | - task_pt_regs(current), |
---|
| 1201 | + current_pt_regs(), |
---|
700 | 1202 | -ENOSYS, 0); |
---|
701 | 1203 | goto skip; |
---|
702 | 1204 | } |
---|
.. | .. |
---|
716 | 1218 | if (fatal_signal_pending(current)) |
---|
717 | 1219 | goto skip; |
---|
718 | 1220 | /* Check if the tracer forced the syscall to be skipped. */ |
---|
719 | | - this_syscall = syscall_get_nr(current, task_pt_regs(current)); |
---|
| 1221 | + this_syscall = syscall_get_nr(current, current_pt_regs()); |
---|
720 | 1222 | if (this_syscall < 0) |
---|
721 | 1223 | goto skip; |
---|
722 | 1224 | |
---|
.. | .. |
---|
728 | 1230 | */ |
---|
729 | 1231 | if (__seccomp_filter(this_syscall, NULL, true)) |
---|
730 | 1232 | return -1; |
---|
| 1233 | + |
---|
| 1234 | + return 0; |
---|
| 1235 | + |
---|
| 1236 | + case SECCOMP_RET_USER_NOTIF: |
---|
| 1237 | + if (seccomp_do_user_notification(this_syscall, match, sd)) |
---|
| 1238 | + goto skip; |
---|
731 | 1239 | |
---|
732 | 1240 | return 0; |
---|
733 | 1241 | |
---|
.. | .. |
---|
746 | 1254 | case SECCOMP_RET_KILL_THREAD: |
---|
747 | 1255 | case SECCOMP_RET_KILL_PROCESS: |
---|
748 | 1256 | default: |
---|
749 | | - current->seccomp.mode = SECCOMP_MODE_DEAD; |
---|
750 | 1257 | seccomp_log(this_syscall, SIGSYS, action, true); |
---|
751 | 1258 | /* Dump core only if this is the last remaining thread. */ |
---|
752 | | - if (action == SECCOMP_RET_KILL_PROCESS || |
---|
| 1259 | + if (action != SECCOMP_RET_KILL_THREAD || |
---|
753 | 1260 | get_nr_threads(current) == 1) { |
---|
754 | | - siginfo_t info; |
---|
| 1261 | + kernel_siginfo_t info; |
---|
755 | 1262 | |
---|
756 | 1263 | /* Show the original registers in the dump. */ |
---|
757 | | - syscall_rollback(current, task_pt_regs(current)); |
---|
| 1264 | + syscall_rollback(current, current_pt_regs()); |
---|
758 | 1265 | /* Trigger a manual coredump since do_exit skips it. */ |
---|
759 | 1266 | seccomp_init_siginfo(&info, this_syscall, data); |
---|
760 | 1267 | do_coredump(&info); |
---|
761 | 1268 | } |
---|
762 | | - if (action == SECCOMP_RET_KILL_PROCESS) |
---|
763 | | - do_group_exit(SIGSYS); |
---|
764 | | - else |
---|
| 1269 | + if (action == SECCOMP_RET_KILL_THREAD) |
---|
765 | 1270 | do_exit(SIGSYS); |
---|
| 1271 | + else |
---|
| 1272 | + do_group_exit(SIGSYS); |
---|
766 | 1273 | } |
---|
767 | 1274 | |
---|
768 | 1275 | unreachable(); |
---|
.. | .. |
---|
791 | 1298 | return 0; |
---|
792 | 1299 | |
---|
793 | 1300 | this_syscall = sd ? sd->nr : |
---|
794 | | - syscall_get_nr(current, task_pt_regs(current)); |
---|
| 1301 | + syscall_get_nr(current, current_pt_regs()); |
---|
795 | 1302 | |
---|
796 | 1303 | switch (mode) { |
---|
797 | 1304 | case SECCOMP_MODE_STRICT: |
---|
.. | .. |
---|
799 | 1306 | return 0; |
---|
800 | 1307 | case SECCOMP_MODE_FILTER: |
---|
801 | 1308 | return __seccomp_filter(this_syscall, sd, false); |
---|
802 | | - /* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */ |
---|
803 | | - case SECCOMP_MODE_DEAD: |
---|
804 | | - WARN_ON_ONCE(1); |
---|
805 | | - do_exit(SIGKILL); |
---|
806 | | - return -1; |
---|
807 | 1309 | default: |
---|
808 | 1310 | BUG(); |
---|
809 | 1311 | } |
---|
.. | .. |
---|
845 | 1347 | } |
---|
846 | 1348 | |
---|
847 | 1349 | #ifdef CONFIG_SECCOMP_FILTER |
---|
| 1350 | +static void seccomp_notify_free(struct seccomp_filter *filter) |
---|
| 1351 | +{ |
---|
| 1352 | + kfree(filter->notif); |
---|
| 1353 | + filter->notif = NULL; |
---|
| 1354 | +} |
---|
| 1355 | + |
---|
| 1356 | +static void seccomp_notify_detach(struct seccomp_filter *filter) |
---|
| 1357 | +{ |
---|
| 1358 | + struct seccomp_knotif *knotif; |
---|
| 1359 | + |
---|
| 1360 | + if (!filter) |
---|
| 1361 | + return; |
---|
| 1362 | + |
---|
| 1363 | + mutex_lock(&filter->notify_lock); |
---|
| 1364 | + |
---|
| 1365 | + /* |
---|
| 1366 | + * If this file is being closed because e.g. the task who owned it |
---|
| 1367 | + * died, let's wake everyone up who was waiting on us. |
---|
| 1368 | + */ |
---|
| 1369 | + list_for_each_entry(knotif, &filter->notif->notifications, list) { |
---|
| 1370 | + if (knotif->state == SECCOMP_NOTIFY_REPLIED) |
---|
| 1371 | + continue; |
---|
| 1372 | + |
---|
| 1373 | + knotif->state = SECCOMP_NOTIFY_REPLIED; |
---|
| 1374 | + knotif->error = -ENOSYS; |
---|
| 1375 | + knotif->val = 0; |
---|
| 1376 | + |
---|
| 1377 | + /* |
---|
| 1378 | + * We do not need to wake up any pending addfd messages, as |
---|
| 1379 | + * the notifier will do that for us, as this just looks |
---|
| 1380 | + * like a standard reply. |
---|
| 1381 | + */ |
---|
| 1382 | + complete(&knotif->ready); |
---|
| 1383 | + } |
---|
| 1384 | + |
---|
| 1385 | + seccomp_notify_free(filter); |
---|
| 1386 | + mutex_unlock(&filter->notify_lock); |
---|
| 1387 | +} |
---|
| 1388 | + |
---|
| 1389 | +static int seccomp_notify_release(struct inode *inode, struct file *file) |
---|
| 1390 | +{ |
---|
| 1391 | + struct seccomp_filter *filter = file->private_data; |
---|
| 1392 | + |
---|
| 1393 | + seccomp_notify_detach(filter); |
---|
| 1394 | + __put_seccomp_filter(filter); |
---|
| 1395 | + return 0; |
---|
| 1396 | +} |
---|
| 1397 | + |
---|
| 1398 | +/* must be called with notif_lock held */ |
---|
| 1399 | +static inline struct seccomp_knotif * |
---|
| 1400 | +find_notification(struct seccomp_filter *filter, u64 id) |
---|
| 1401 | +{ |
---|
| 1402 | + struct seccomp_knotif *cur; |
---|
| 1403 | + |
---|
| 1404 | + lockdep_assert_held(&filter->notify_lock); |
---|
| 1405 | + |
---|
| 1406 | + list_for_each_entry(cur, &filter->notif->notifications, list) { |
---|
| 1407 | + if (cur->id == id) |
---|
| 1408 | + return cur; |
---|
| 1409 | + } |
---|
| 1410 | + |
---|
| 1411 | + return NULL; |
---|
| 1412 | +} |
---|
| 1413 | + |
---|
| 1414 | + |
---|
| 1415 | +static long seccomp_notify_recv(struct seccomp_filter *filter, |
---|
| 1416 | + void __user *buf) |
---|
| 1417 | +{ |
---|
| 1418 | + struct seccomp_knotif *knotif = NULL, *cur; |
---|
| 1419 | + struct seccomp_notif unotif; |
---|
| 1420 | + ssize_t ret; |
---|
| 1421 | + |
---|
| 1422 | + /* Verify that we're not given garbage to keep struct extensible. */ |
---|
| 1423 | + ret = check_zeroed_user(buf, sizeof(unotif)); |
---|
| 1424 | + if (ret < 0) |
---|
| 1425 | + return ret; |
---|
| 1426 | + if (!ret) |
---|
| 1427 | + return -EINVAL; |
---|
| 1428 | + |
---|
| 1429 | + memset(&unotif, 0, sizeof(unotif)); |
---|
| 1430 | + |
---|
| 1431 | + ret = down_interruptible(&filter->notif->request); |
---|
| 1432 | + if (ret < 0) |
---|
| 1433 | + return ret; |
---|
| 1434 | + |
---|
| 1435 | + mutex_lock(&filter->notify_lock); |
---|
| 1436 | + list_for_each_entry(cur, &filter->notif->notifications, list) { |
---|
| 1437 | + if (cur->state == SECCOMP_NOTIFY_INIT) { |
---|
| 1438 | + knotif = cur; |
---|
| 1439 | + break; |
---|
| 1440 | + } |
---|
| 1441 | + } |
---|
| 1442 | + |
---|
| 1443 | + /* |
---|
| 1444 | + * If we didn't find a notification, it could be that the task was |
---|
| 1445 | + * interrupted by a fatal signal between the time we were woken and |
---|
| 1446 | + * when we were able to acquire the rw lock. |
---|
| 1447 | + */ |
---|
| 1448 | + if (!knotif) { |
---|
| 1449 | + ret = -ENOENT; |
---|
| 1450 | + goto out; |
---|
| 1451 | + } |
---|
| 1452 | + |
---|
| 1453 | + unotif.id = knotif->id; |
---|
| 1454 | + unotif.pid = task_pid_vnr(knotif->task); |
---|
| 1455 | + unotif.data = *(knotif->data); |
---|
| 1456 | + |
---|
| 1457 | + knotif->state = SECCOMP_NOTIFY_SENT; |
---|
| 1458 | + wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM); |
---|
| 1459 | + ret = 0; |
---|
| 1460 | +out: |
---|
| 1461 | + mutex_unlock(&filter->notify_lock); |
---|
| 1462 | + |
---|
| 1463 | + if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) { |
---|
| 1464 | + ret = -EFAULT; |
---|
| 1465 | + |
---|
| 1466 | + /* |
---|
| 1467 | + * Userspace screwed up. To make sure that we keep this |
---|
| 1468 | + * notification alive, let's reset it back to INIT. It |
---|
| 1469 | + * may have died when we released the lock, so we need to make |
---|
| 1470 | + * sure it's still around. |
---|
| 1471 | + */ |
---|
| 1472 | + mutex_lock(&filter->notify_lock); |
---|
| 1473 | + knotif = find_notification(filter, unotif.id); |
---|
| 1474 | + if (knotif) { |
---|
| 1475 | + knotif->state = SECCOMP_NOTIFY_INIT; |
---|
| 1476 | + up(&filter->notif->request); |
---|
| 1477 | + } |
---|
| 1478 | + mutex_unlock(&filter->notify_lock); |
---|
| 1479 | + } |
---|
| 1480 | + |
---|
| 1481 | + return ret; |
---|
| 1482 | +} |
---|
| 1483 | + |
---|
| 1484 | +static long seccomp_notify_send(struct seccomp_filter *filter, |
---|
| 1485 | + void __user *buf) |
---|
| 1486 | +{ |
---|
| 1487 | + struct seccomp_notif_resp resp = {}; |
---|
| 1488 | + struct seccomp_knotif *knotif; |
---|
| 1489 | + long ret; |
---|
| 1490 | + |
---|
| 1491 | + if (copy_from_user(&resp, buf, sizeof(resp))) |
---|
| 1492 | + return -EFAULT; |
---|
| 1493 | + |
---|
| 1494 | + if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE) |
---|
| 1495 | + return -EINVAL; |
---|
| 1496 | + |
---|
| 1497 | + if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) && |
---|
| 1498 | + (resp.error || resp.val)) |
---|
| 1499 | + return -EINVAL; |
---|
| 1500 | + |
---|
| 1501 | + ret = mutex_lock_interruptible(&filter->notify_lock); |
---|
| 1502 | + if (ret < 0) |
---|
| 1503 | + return ret; |
---|
| 1504 | + |
---|
| 1505 | + knotif = find_notification(filter, resp.id); |
---|
| 1506 | + if (!knotif) { |
---|
| 1507 | + ret = -ENOENT; |
---|
| 1508 | + goto out; |
---|
| 1509 | + } |
---|
| 1510 | + |
---|
| 1511 | + /* Allow exactly one reply. */ |
---|
| 1512 | + if (knotif->state != SECCOMP_NOTIFY_SENT) { |
---|
| 1513 | + ret = -EINPROGRESS; |
---|
| 1514 | + goto out; |
---|
| 1515 | + } |
---|
| 1516 | + |
---|
| 1517 | + ret = 0; |
---|
| 1518 | + knotif->state = SECCOMP_NOTIFY_REPLIED; |
---|
| 1519 | + knotif->error = resp.error; |
---|
| 1520 | + knotif->val = resp.val; |
---|
| 1521 | + knotif->flags = resp.flags; |
---|
| 1522 | + complete(&knotif->ready); |
---|
| 1523 | +out: |
---|
| 1524 | + mutex_unlock(&filter->notify_lock); |
---|
| 1525 | + return ret; |
---|
| 1526 | +} |
---|
| 1527 | + |
---|
| 1528 | +static long seccomp_notify_id_valid(struct seccomp_filter *filter, |
---|
| 1529 | + void __user *buf) |
---|
| 1530 | +{ |
---|
| 1531 | + struct seccomp_knotif *knotif; |
---|
| 1532 | + u64 id; |
---|
| 1533 | + long ret; |
---|
| 1534 | + |
---|
| 1535 | + if (copy_from_user(&id, buf, sizeof(id))) |
---|
| 1536 | + return -EFAULT; |
---|
| 1537 | + |
---|
| 1538 | + ret = mutex_lock_interruptible(&filter->notify_lock); |
---|
| 1539 | + if (ret < 0) |
---|
| 1540 | + return ret; |
---|
| 1541 | + |
---|
| 1542 | + knotif = find_notification(filter, id); |
---|
| 1543 | + if (knotif && knotif->state == SECCOMP_NOTIFY_SENT) |
---|
| 1544 | + ret = 0; |
---|
| 1545 | + else |
---|
| 1546 | + ret = -ENOENT; |
---|
| 1547 | + |
---|
| 1548 | + mutex_unlock(&filter->notify_lock); |
---|
| 1549 | + return ret; |
---|
| 1550 | +} |
---|
| 1551 | + |
---|
| 1552 | +static long seccomp_notify_addfd(struct seccomp_filter *filter, |
---|
| 1553 | + struct seccomp_notif_addfd __user *uaddfd, |
---|
| 1554 | + unsigned int size) |
---|
| 1555 | +{ |
---|
| 1556 | + struct seccomp_notif_addfd addfd; |
---|
| 1557 | + struct seccomp_knotif *knotif; |
---|
| 1558 | + struct seccomp_kaddfd kaddfd; |
---|
| 1559 | + int ret; |
---|
| 1560 | + |
---|
| 1561 | + BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0); |
---|
| 1562 | + BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST); |
---|
| 1563 | + |
---|
| 1564 | + if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE) |
---|
| 1565 | + return -EINVAL; |
---|
| 1566 | + |
---|
| 1567 | + ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size); |
---|
| 1568 | + if (ret) |
---|
| 1569 | + return ret; |
---|
| 1570 | + |
---|
| 1571 | + if (addfd.newfd_flags & ~O_CLOEXEC) |
---|
| 1572 | + return -EINVAL; |
---|
| 1573 | + |
---|
| 1574 | + if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD) |
---|
| 1575 | + return -EINVAL; |
---|
| 1576 | + |
---|
| 1577 | + if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD)) |
---|
| 1578 | + return -EINVAL; |
---|
| 1579 | + |
---|
| 1580 | + kaddfd.file = fget(addfd.srcfd); |
---|
| 1581 | + if (!kaddfd.file) |
---|
| 1582 | + return -EBADF; |
---|
| 1583 | + |
---|
| 1584 | + kaddfd.flags = addfd.newfd_flags; |
---|
| 1585 | + kaddfd.fd = (addfd.flags & SECCOMP_ADDFD_FLAG_SETFD) ? |
---|
| 1586 | + addfd.newfd : -1; |
---|
| 1587 | + init_completion(&kaddfd.completion); |
---|
| 1588 | + |
---|
| 1589 | + ret = mutex_lock_interruptible(&filter->notify_lock); |
---|
| 1590 | + if (ret < 0) |
---|
| 1591 | + goto out; |
---|
| 1592 | + |
---|
| 1593 | + knotif = find_notification(filter, addfd.id); |
---|
| 1594 | + if (!knotif) { |
---|
| 1595 | + ret = -ENOENT; |
---|
| 1596 | + goto out_unlock; |
---|
| 1597 | + } |
---|
| 1598 | + |
---|
| 1599 | + /* |
---|
| 1600 | + * We do not want to allow for FD injection to occur before the |
---|
| 1601 | + * notification has been picked up by a userspace handler, or after |
---|
| 1602 | + * the notification has been replied to. |
---|
| 1603 | + */ |
---|
| 1604 | + if (knotif->state != SECCOMP_NOTIFY_SENT) { |
---|
| 1605 | + ret = -EINPROGRESS; |
---|
| 1606 | + goto out_unlock; |
---|
| 1607 | + } |
---|
| 1608 | + |
---|
| 1609 | + list_add(&kaddfd.list, &knotif->addfd); |
---|
| 1610 | + complete(&knotif->ready); |
---|
| 1611 | + mutex_unlock(&filter->notify_lock); |
---|
| 1612 | + |
---|
| 1613 | + /* Now we wait for it to be processed or be interrupted */ |
---|
| 1614 | + ret = wait_for_completion_interruptible(&kaddfd.completion); |
---|
| 1615 | + if (ret == 0) { |
---|
| 1616 | + /* |
---|
| 1617 | + * We had a successful completion. The other side has already |
---|
| 1618 | + * removed us from the addfd queue, and |
---|
| 1619 | + * wait_for_completion_interruptible has a memory barrier upon |
---|
| 1620 | + * success that lets us read this value directly without |
---|
| 1621 | + * locking. |
---|
| 1622 | + */ |
---|
| 1623 | + ret = kaddfd.ret; |
---|
| 1624 | + goto out; |
---|
| 1625 | + } |
---|
| 1626 | + |
---|
| 1627 | + mutex_lock(&filter->notify_lock); |
---|
| 1628 | + /* |
---|
| 1629 | + * Even though we were woken up by a signal and not a successful |
---|
| 1630 | + * completion, a completion may have happened in the mean time. |
---|
| 1631 | + * |
---|
| 1632 | + * We need to check again if the addfd request has been handled, |
---|
| 1633 | + * and if not, we will remove it from the queue. |
---|
| 1634 | + */ |
---|
| 1635 | + if (list_empty(&kaddfd.list)) |
---|
| 1636 | + ret = kaddfd.ret; |
---|
| 1637 | + else |
---|
| 1638 | + list_del(&kaddfd.list); |
---|
| 1639 | + |
---|
| 1640 | +out_unlock: |
---|
| 1641 | + mutex_unlock(&filter->notify_lock); |
---|
| 1642 | +out: |
---|
| 1643 | + fput(kaddfd.file); |
---|
| 1644 | + |
---|
| 1645 | + return ret; |
---|
| 1646 | +} |
---|
| 1647 | + |
---|
| 1648 | +static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, |
---|
| 1649 | + unsigned long arg) |
---|
| 1650 | +{ |
---|
| 1651 | + struct seccomp_filter *filter = file->private_data; |
---|
| 1652 | + void __user *buf = (void __user *)arg; |
---|
| 1653 | + |
---|
| 1654 | + /* Fixed-size ioctls */ |
---|
| 1655 | + switch (cmd) { |
---|
| 1656 | + case SECCOMP_IOCTL_NOTIF_RECV: |
---|
| 1657 | + return seccomp_notify_recv(filter, buf); |
---|
| 1658 | + case SECCOMP_IOCTL_NOTIF_SEND: |
---|
| 1659 | + return seccomp_notify_send(filter, buf); |
---|
| 1660 | + case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR: |
---|
| 1661 | + case SECCOMP_IOCTL_NOTIF_ID_VALID: |
---|
| 1662 | + return seccomp_notify_id_valid(filter, buf); |
---|
| 1663 | + } |
---|
| 1664 | + |
---|
| 1665 | + /* Extensible Argument ioctls */ |
---|
| 1666 | +#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK)) |
---|
| 1667 | + switch (EA_IOCTL(cmd)) { |
---|
| 1668 | + case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD): |
---|
| 1669 | + return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd)); |
---|
| 1670 | + default: |
---|
| 1671 | + return -EINVAL; |
---|
| 1672 | + } |
---|
| 1673 | +} |
---|
| 1674 | + |
---|
| 1675 | +static __poll_t seccomp_notify_poll(struct file *file, |
---|
| 1676 | + struct poll_table_struct *poll_tab) |
---|
| 1677 | +{ |
---|
| 1678 | + struct seccomp_filter *filter = file->private_data; |
---|
| 1679 | + __poll_t ret = 0; |
---|
| 1680 | + struct seccomp_knotif *cur; |
---|
| 1681 | + |
---|
| 1682 | + poll_wait(file, &filter->wqh, poll_tab); |
---|
| 1683 | + |
---|
| 1684 | + if (mutex_lock_interruptible(&filter->notify_lock) < 0) |
---|
| 1685 | + return EPOLLERR; |
---|
| 1686 | + |
---|
| 1687 | + list_for_each_entry(cur, &filter->notif->notifications, list) { |
---|
| 1688 | + if (cur->state == SECCOMP_NOTIFY_INIT) |
---|
| 1689 | + ret |= EPOLLIN | EPOLLRDNORM; |
---|
| 1690 | + if (cur->state == SECCOMP_NOTIFY_SENT) |
---|
| 1691 | + ret |= EPOLLOUT | EPOLLWRNORM; |
---|
| 1692 | + if ((ret & EPOLLIN) && (ret & EPOLLOUT)) |
---|
| 1693 | + break; |
---|
| 1694 | + } |
---|
| 1695 | + |
---|
| 1696 | + mutex_unlock(&filter->notify_lock); |
---|
| 1697 | + |
---|
| 1698 | + if (refcount_read(&filter->users) == 0) |
---|
| 1699 | + ret |= EPOLLHUP; |
---|
| 1700 | + |
---|
| 1701 | + return ret; |
---|
| 1702 | +} |
---|
| 1703 | + |
---|
| 1704 | +static const struct file_operations seccomp_notify_ops = { |
---|
| 1705 | + .poll = seccomp_notify_poll, |
---|
| 1706 | + .release = seccomp_notify_release, |
---|
| 1707 | + .unlocked_ioctl = seccomp_notify_ioctl, |
---|
| 1708 | + .compat_ioctl = seccomp_notify_ioctl, |
---|
| 1709 | +}; |
---|
| 1710 | + |
---|
| 1711 | +static struct file *init_listener(struct seccomp_filter *filter) |
---|
| 1712 | +{ |
---|
| 1713 | + struct file *ret; |
---|
| 1714 | + |
---|
| 1715 | + ret = ERR_PTR(-ENOMEM); |
---|
| 1716 | + filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); |
---|
| 1717 | + if (!filter->notif) |
---|
| 1718 | + goto out; |
---|
| 1719 | + |
---|
| 1720 | + sema_init(&filter->notif->request, 0); |
---|
| 1721 | + filter->notif->next_id = get_random_u64(); |
---|
| 1722 | + INIT_LIST_HEAD(&filter->notif->notifications); |
---|
| 1723 | + |
---|
| 1724 | + ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops, |
---|
| 1725 | + filter, O_RDWR); |
---|
| 1726 | + if (IS_ERR(ret)) |
---|
| 1727 | + goto out_notif; |
---|
| 1728 | + |
---|
| 1729 | + /* The file has a reference to it now */ |
---|
| 1730 | + __get_seccomp_filter(filter); |
---|
| 1731 | + |
---|
| 1732 | +out_notif: |
---|
| 1733 | + if (IS_ERR(ret)) |
---|
| 1734 | + seccomp_notify_free(filter); |
---|
| 1735 | +out: |
---|
| 1736 | + return ret; |
---|
| 1737 | +} |
---|
| 1738 | + |
---|
| 1739 | +/* |
---|
| 1740 | + * Does @new_child have a listener while an ancestor also has a listener? |
---|
| 1741 | + * If so, we'll want to reject this filter. |
---|
| 1742 | + * This only has to be tested for the current process, even in the TSYNC case, |
---|
| 1743 | + * because TSYNC installs @child with the same parent on all threads. |
---|
| 1744 | + * Note that @new_child is not hooked up to its parent at this point yet, so |
---|
| 1745 | + * we use current->seccomp.filter. |
---|
| 1746 | + */ |
---|
| 1747 | +static bool has_duplicate_listener(struct seccomp_filter *new_child) |
---|
| 1748 | +{ |
---|
| 1749 | + struct seccomp_filter *cur; |
---|
| 1750 | + |
---|
| 1751 | + /* must be protected against concurrent TSYNC */ |
---|
| 1752 | + lockdep_assert_held(¤t->sighand->siglock); |
---|
| 1753 | + |
---|
| 1754 | + if (!new_child->notif) |
---|
| 1755 | + return false; |
---|
| 1756 | + for (cur = current->seccomp.filter; cur; cur = cur->prev) { |
---|
| 1757 | + if (cur->notif) |
---|
| 1758 | + return true; |
---|
| 1759 | + } |
---|
| 1760 | + |
---|
| 1761 | + return false; |
---|
| 1762 | +} |
---|
| 1763 | + |
---|
848 | 1764 | /** |
---|
849 | 1765 | * seccomp_set_mode_filter: internal function for setting seccomp filter |
---|
850 | 1766 | * @flags: flags to change filter behavior |
---|
.. | .. |
---|
864 | 1780 | const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; |
---|
865 | 1781 | struct seccomp_filter *prepared = NULL; |
---|
866 | 1782 | long ret = -EINVAL; |
---|
| 1783 | + int listener = -1; |
---|
| 1784 | + struct file *listener_f = NULL; |
---|
867 | 1785 | |
---|
868 | 1786 | /* Validate flags. */ |
---|
869 | 1787 | if (flags & ~SECCOMP_FILTER_FLAG_MASK) |
---|
| 1788 | + return -EINVAL; |
---|
| 1789 | + |
---|
| 1790 | + /* |
---|
| 1791 | + * In the successful case, NEW_LISTENER returns the new listener fd. |
---|
| 1792 | + * But in the failure case, TSYNC returns the thread that died. If you |
---|
| 1793 | + * combine these two flags, there's no way to tell whether something |
---|
| 1794 | + * succeeded or failed. So, let's disallow this combination if the user |
---|
| 1795 | + * has not explicitly requested no errors from TSYNC. |
---|
| 1796 | + */ |
---|
| 1797 | + if ((flags & SECCOMP_FILTER_FLAG_TSYNC) && |
---|
| 1798 | + (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) && |
---|
| 1799 | + ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0)) |
---|
870 | 1800 | return -EINVAL; |
---|
871 | 1801 | |
---|
872 | 1802 | /* Prepare the new filter before holding any locks. */ |
---|
.. | .. |
---|
874 | 1804 | if (IS_ERR(prepared)) |
---|
875 | 1805 | return PTR_ERR(prepared); |
---|
876 | 1806 | |
---|
| 1807 | + if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { |
---|
| 1808 | + listener = get_unused_fd_flags(O_CLOEXEC); |
---|
| 1809 | + if (listener < 0) { |
---|
| 1810 | + ret = listener; |
---|
| 1811 | + goto out_free; |
---|
| 1812 | + } |
---|
| 1813 | + |
---|
| 1814 | + listener_f = init_listener(prepared); |
---|
| 1815 | + if (IS_ERR(listener_f)) { |
---|
| 1816 | + put_unused_fd(listener); |
---|
| 1817 | + ret = PTR_ERR(listener_f); |
---|
| 1818 | + goto out_free; |
---|
| 1819 | + } |
---|
| 1820 | + } |
---|
| 1821 | + |
---|
877 | 1822 | /* |
---|
878 | 1823 | * Make sure we cannot change seccomp or nnp state via TSYNC |
---|
879 | 1824 | * while another thread is in the middle of calling exec. |
---|
880 | 1825 | */ |
---|
881 | 1826 | if (flags & SECCOMP_FILTER_FLAG_TSYNC && |
---|
882 | 1827 | mutex_lock_killable(¤t->signal->cred_guard_mutex)) |
---|
883 | | - goto out_free; |
---|
| 1828 | + goto out_put_fd; |
---|
884 | 1829 | |
---|
885 | 1830 | spin_lock_irq(¤t->sighand->siglock); |
---|
886 | 1831 | |
---|
887 | 1832 | if (!seccomp_may_assign_mode(seccomp_mode)) |
---|
888 | 1833 | goto out; |
---|
| 1834 | + |
---|
| 1835 | + if (has_duplicate_listener(prepared)) { |
---|
| 1836 | + ret = -EBUSY; |
---|
| 1837 | + goto out; |
---|
| 1838 | + } |
---|
889 | 1839 | |
---|
890 | 1840 | ret = seccomp_attach_filter(flags, prepared); |
---|
891 | 1841 | if (ret) |
---|
.. | .. |
---|
898 | 1848 | spin_unlock_irq(¤t->sighand->siglock); |
---|
899 | 1849 | if (flags & SECCOMP_FILTER_FLAG_TSYNC) |
---|
900 | 1850 | mutex_unlock(¤t->signal->cred_guard_mutex); |
---|
| 1851 | +out_put_fd: |
---|
| 1852 | + if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { |
---|
| 1853 | + if (ret) { |
---|
| 1854 | + listener_f->private_data = NULL; |
---|
| 1855 | + fput(listener_f); |
---|
| 1856 | + put_unused_fd(listener); |
---|
| 1857 | + seccomp_notify_detach(prepared); |
---|
| 1858 | + } else { |
---|
| 1859 | + fd_install(listener, listener_f); |
---|
| 1860 | + ret = listener; |
---|
| 1861 | + } |
---|
| 1862 | + } |
---|
901 | 1863 | out_free: |
---|
902 | 1864 | seccomp_filter_free(prepared); |
---|
903 | 1865 | return ret; |
---|
.. | .. |
---|
922 | 1884 | case SECCOMP_RET_KILL_THREAD: |
---|
923 | 1885 | case SECCOMP_RET_TRAP: |
---|
924 | 1886 | case SECCOMP_RET_ERRNO: |
---|
| 1887 | + case SECCOMP_RET_USER_NOTIF: |
---|
925 | 1888 | case SECCOMP_RET_TRACE: |
---|
926 | 1889 | case SECCOMP_RET_LOG: |
---|
927 | 1890 | case SECCOMP_RET_ALLOW: |
---|
.. | .. |
---|
933 | 1896 | return 0; |
---|
934 | 1897 | } |
---|
935 | 1898 | |
---|
| 1899 | +static long seccomp_get_notif_sizes(void __user *usizes) |
---|
| 1900 | +{ |
---|
| 1901 | + struct seccomp_notif_sizes sizes = { |
---|
| 1902 | + .seccomp_notif = sizeof(struct seccomp_notif), |
---|
| 1903 | + .seccomp_notif_resp = sizeof(struct seccomp_notif_resp), |
---|
| 1904 | + .seccomp_data = sizeof(struct seccomp_data), |
---|
| 1905 | + }; |
---|
| 1906 | + |
---|
| 1907 | + if (copy_to_user(usizes, &sizes, sizeof(sizes))) |
---|
| 1908 | + return -EFAULT; |
---|
| 1909 | + |
---|
| 1910 | + return 0; |
---|
| 1911 | +} |
---|
| 1912 | + |
---|
936 | 1913 | /* Common entry point for both prctl and syscall. */ |
---|
937 | 1914 | static long do_seccomp(unsigned int op, unsigned int flags, |
---|
938 | | - const char __user *uargs) |
---|
| 1915 | + void __user *uargs) |
---|
939 | 1916 | { |
---|
940 | 1917 | switch (op) { |
---|
941 | 1918 | case SECCOMP_SET_MODE_STRICT: |
---|
.. | .. |
---|
949 | 1926 | return -EINVAL; |
---|
950 | 1927 | |
---|
951 | 1928 | return seccomp_get_action_avail(uargs); |
---|
| 1929 | + case SECCOMP_GET_NOTIF_SIZES: |
---|
| 1930 | + if (flags != 0) |
---|
| 1931 | + return -EINVAL; |
---|
| 1932 | + |
---|
| 1933 | + return seccomp_get_notif_sizes(uargs); |
---|
952 | 1934 | default: |
---|
953 | 1935 | return -EINVAL; |
---|
954 | 1936 | } |
---|
955 | 1937 | } |
---|
956 | 1938 | |
---|
957 | 1939 | SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, |
---|
958 | | - const char __user *, uargs) |
---|
| 1940 | + void __user *, uargs) |
---|
959 | 1941 | { |
---|
960 | 1942 | return do_seccomp(op, flags, uargs); |
---|
961 | 1943 | } |
---|
.. | .. |
---|
967 | 1949 | * |
---|
968 | 1950 | * Returns 0 on success or -EINVAL on failure. |
---|
969 | 1951 | */ |
---|
970 | | -long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) |
---|
| 1952 | +long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter) |
---|
971 | 1953 | { |
---|
972 | 1954 | unsigned int op; |
---|
973 | | - char __user *uargs; |
---|
| 1955 | + void __user *uargs; |
---|
974 | 1956 | |
---|
975 | 1957 | switch (seccomp_mode) { |
---|
976 | 1958 | case SECCOMP_MODE_STRICT: |
---|
.. | .. |
---|
1122 | 2104 | #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" |
---|
1123 | 2105 | #define SECCOMP_RET_TRAP_NAME "trap" |
---|
1124 | 2106 | #define SECCOMP_RET_ERRNO_NAME "errno" |
---|
| 2107 | +#define SECCOMP_RET_USER_NOTIF_NAME "user_notif" |
---|
1125 | 2108 | #define SECCOMP_RET_TRACE_NAME "trace" |
---|
1126 | 2109 | #define SECCOMP_RET_LOG_NAME "log" |
---|
1127 | 2110 | #define SECCOMP_RET_ALLOW_NAME "allow" |
---|
.. | .. |
---|
1131 | 2114 | SECCOMP_RET_KILL_THREAD_NAME " " |
---|
1132 | 2115 | SECCOMP_RET_TRAP_NAME " " |
---|
1133 | 2116 | SECCOMP_RET_ERRNO_NAME " " |
---|
| 2117 | + SECCOMP_RET_USER_NOTIF_NAME " " |
---|
1134 | 2118 | SECCOMP_RET_TRACE_NAME " " |
---|
1135 | 2119 | SECCOMP_RET_LOG_NAME " " |
---|
1136 | 2120 | SECCOMP_RET_ALLOW_NAME; |
---|
.. | .. |
---|
1145 | 2129 | { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, |
---|
1146 | 2130 | { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, |
---|
1147 | 2131 | { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, |
---|
| 2132 | + { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME }, |
---|
1148 | 2133 | { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, |
---|
1149 | 2134 | { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME }, |
---|
1150 | 2135 | { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, |
---|
.. | .. |
---|
1217 | 2202 | return true; |
---|
1218 | 2203 | } |
---|
1219 | 2204 | |
---|
1220 | | -static int read_actions_logged(struct ctl_table *ro_table, void __user *buffer, |
---|
| 2205 | +static int read_actions_logged(struct ctl_table *ro_table, void *buffer, |
---|
1221 | 2206 | size_t *lenp, loff_t *ppos) |
---|
1222 | 2207 | { |
---|
1223 | 2208 | char names[sizeof(seccomp_actions_avail)]; |
---|
.. | .. |
---|
1235 | 2220 | return proc_dostring(&table, 0, buffer, lenp, ppos); |
---|
1236 | 2221 | } |
---|
1237 | 2222 | |
---|
1238 | | -static int write_actions_logged(struct ctl_table *ro_table, void __user *buffer, |
---|
| 2223 | +static int write_actions_logged(struct ctl_table *ro_table, void *buffer, |
---|
1239 | 2224 | size_t *lenp, loff_t *ppos, u32 *actions_logged) |
---|
1240 | 2225 | { |
---|
1241 | 2226 | char names[sizeof(seccomp_actions_avail)]; |
---|
.. | .. |
---|
1297 | 2282 | } |
---|
1298 | 2283 | |
---|
1299 | 2284 | static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, |
---|
1300 | | - void __user *buffer, size_t *lenp, |
---|
| 2285 | + void *buffer, size_t *lenp, |
---|
1301 | 2286 | loff_t *ppos) |
---|
1302 | 2287 | { |
---|
1303 | 2288 | int ret; |
---|
.. | .. |
---|
1343 | 2328 | |
---|
1344 | 2329 | hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table); |
---|
1345 | 2330 | if (!hdr) |
---|
1346 | | - pr_warn("seccomp: sysctl registration failed\n"); |
---|
| 2331 | + pr_warn("sysctl registration failed\n"); |
---|
1347 | 2332 | else |
---|
1348 | 2333 | kmemleak_not_leak(hdr); |
---|
1349 | 2334 | |
---|