From 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 22 Oct 2024 10:36:11 +0000 Subject: [PATCH] 修改4g拨号为QMI,需要在系统里后台执行quectel-CM --- kernel/tools/testing/selftests/seccomp/seccomp_bpf.c | 1823 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 files changed, 1,494 insertions(+), 329 deletions(-) diff --git a/kernel/tools/testing/selftests/seccomp/seccomp_bpf.c b/kernel/tools/testing/selftests/seccomp/seccomp_bpf.c index 14cad65..413a7b9 100644 --- a/kernel/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/kernel/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. - * Use of this source code is governed by the GPLv2 license. * * Test code for seccomp bpf. */ +#define _GNU_SOURCE #include <sys/types.h> /* @@ -34,18 +35,29 @@ #include <stdbool.h> #include <string.h> #include <time.h> +#include <limits.h> #include <linux/elf.h> #include <sys/uio.h> #include <sys/utsname.h> #include <sys/fcntl.h> #include <sys/mman.h> #include <sys/times.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <linux/kcmp.h> +#include <sys/resource.h> -#define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> +#include <poll.h> #include "../kselftest_harness.h" +#include "../clone3/clone3_selftests.h" + +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 @@ -109,12 +121,20 @@ # define __NR_seccomp 383 # elif defined(__aarch64__) # define __NR_seccomp 277 +# elif defined(__riscv) +# define __NR_seccomp 277 +# elif defined(__csky__) +# define __NR_seccomp 277 # elif defined(__hppa__) # define __NR_seccomp 338 # elif defined(__powerpc__) # define __NR_seccomp 358 # elif defined(__s390__) # define __NR_seccomp 348 +# elif defined(__xtensa__) +# define __NR_seccomp 337 +# elif defined(__sh__) +# define __NR_seccomp 372 # else # warning "seccomp syscall number unknown for this architecture" # define __NR_seccomp 0xffff @@ -131,6 +151,10 @@ #ifndef SECCOMP_GET_ACTION_AVAIL #define SECCOMP_GET_ACTION_AVAIL 2 +#endif + +#ifndef SECCOMP_GET_NOTIF_SIZES +#define SECCOMP_GET_NOTIF_SIZES 3 #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC @@ -154,6 +178,92 @@ }; #endif +#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#endif + +#ifndef SECCOMP_RET_USER_NOTIF +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; +#endif + +#ifndef SECCOMP_IOCTL_NOTIF_ADDFD +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) + +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ + +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; +#endif + +struct seccomp_notif_addfd_small { + __u64 id; + char weird[4]; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ + SECCOMP_IOW(3, struct seccomp_notif_addfd_small) + +struct seccomp_notif_addfd_big { + union { + struct seccomp_notif_addfd addfd; + char buf[sizeof(struct seccomp_notif_addfd) + 8]; + }; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ + SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) + +#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY +#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 +#define PTRACE_EVENTMSG_SYSCALL_EXIT 2 +#endif + +#ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE +#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 +#endif + +#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH +#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -173,6 +283,40 @@ #define SIBLING_EXIT_UNKILLED 0xbadbeef #define SIBLING_EXIT_FAILURE 0xbadface #define SIBLING_EXIT_NEWPRIVS 0xbadfeed + +static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) +{ +#ifdef __NR_kcmp + errno = 0; + return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); +#else + errno = ENOSYS; + return -1; +#endif +} + +/* Have TH_LOG report actual location filecmp() is used. */ +#define filecmp(pid1, pid2, fd1, fd2) ({ \ + int _ret; \ + \ + _ret = __filecmp(pid1, pid2, fd1, fd2); \ + if (_ret != 0) { \ + if (_ret < 0 && errno == ENOSYS) { \ + TH_LOG("kcmp() syscall missing (test is less accurate)");\ + _ret = 0; \ + } \ + } \ + _ret; }) + +TEST(kcmp) +{ + int ret; + + ret = __filecmp(getpid(), getpid(), 1, 1); + EXPECT_EQ(ret, 0); + if (ret != 0 && errno == ENOSYS) + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); +} TEST(mode_strict_support) { @@ -630,8 +774,15 @@ return (void *)SIBLING_EXIT_UNKILLED; } +enum kill_t { + KILL_THREAD, + KILL_PROCESS, + RET_UNKNOWN +}; + /* Prepare a thread that will kill itself or both of us. */ -void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +void kill_thread_or_group(struct __test_metadata *_metadata, + enum kill_t kill_how) { pthread_t thread; void *status; @@ -647,11 +798,12 @@ .len = (unsigned short)ARRAY_SIZE(filter_thread), .filter = filter_thread, }; + int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, kill), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog_process = { @@ -664,13 +816,15 @@ } ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, - kill_process ? &prog_process : &prog_thread)); + kill_how == KILL_THREAD ? &prog_thread + : &prog_process)); /* * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS * flag cannot be downgraded by a new filter. */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + if (kill_how == KILL_PROCESS) + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); /* Start a thread that will exit immediately. */ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); @@ -698,7 +852,7 @@ child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, false); + kill_thread_or_group(_metadata, KILL_THREAD); _exit(38); } @@ -717,7 +871,7 @@ child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, true); + kill_thread_or_group(_metadata, KILL_PROCESS); _exit(38); } @@ -725,6 +879,27 @@ /* If the entire process was killed, we'll see SIGSYS. */ ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + +TEST(KILL_unknown) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, RET_UNKNOWN); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + EXPECT_TRUE(WIFSIGNALED(status)) { + TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); + } ASSERT_EQ(SIGSYS, WTERMSIG(status)); } @@ -776,7 +951,7 @@ ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(E2BIG, errno); } @@ -795,7 +970,7 @@ EXPECT_EQ(parent, syscall(__NR_getppid)); /* "errno" of 0 is ok. */ - EXPECT_EQ(0, read(0, NULL, 0)); + EXPECT_EQ(0, read(-1, NULL, 0)); } /* @@ -816,7 +991,7 @@ ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(4095, errno); } @@ -847,11 +1022,11 @@ ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); - EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(-1, read(-1, NULL, 0)); EXPECT_EQ(12, errno); } -FIXTURE_DATA(TRAP) { +FIXTURE(TRAP) { struct sock_fprog prog; }; @@ -962,7 +1137,7 @@ EXPECT_NE(0, (unsigned long)sigsys->_call_addr); } -FIXTURE_DATA(precedence) { +FIXTURE(precedence) { struct sock_fprog allow; struct sock_fprog log; struct sock_fprog trace; @@ -1408,6 +1583,7 @@ return tracer_pid; } + void teardown_trace_fixture(struct __test_metadata *_metadata, pid_t tracer) { @@ -1451,7 +1627,7 @@ EXPECT_EQ(0, ret); } -FIXTURE_DATA(TRACE_poke) { +FIXTURE(TRACE_poke) { struct sock_fprog prog; pid_t tracer; long poked; @@ -1522,45 +1698,157 @@ } #if defined(__x86_64__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_rax -# define SYSCALL_RET rax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_rax +# define SYSCALL_RET(_regs) (_regs).rax #elif defined(__i386__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_eax -# define SYSCALL_RET eax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_eax +# define SYSCALL_RET(_regs) (_regs).eax #elif defined(__arm__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM ARM_r7 -# define SYSCALL_RET ARM_r0 +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).ARM_r7 +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) +# define SYSCALL_RET(_regs) (_regs).ARM_r0 #elif defined(__aarch64__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM regs[8] -# define SYSCALL_RET regs[0] +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[8] +# ifndef NT_ARM_SYSTEM_CALL +# define NT_ARM_SYSTEM_CALL 0x404 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + struct iovec __v; \ + typeof(_nr) __nr = (_nr); \ + __v.iov_base = &__nr; \ + __v.iov_len = sizeof(__nr); \ + EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ + NT_ARM_SYSTEM_CALL, &__v)); \ + } while (0) +# define SYSCALL_RET(_regs) (_regs).regs[0] +#elif defined(__riscv) && __riscv_xlen == 64 +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).a7 +# define SYSCALL_RET(_regs) (_regs).a0 +#elif defined(__csky__) +# define ARCH_REGS struct pt_regs +# if defined(__CSKYABIV2__) +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# else +# define SYSCALL_NUM(_regs) (_regs).regs[9] +# endif +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__hppa__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM gr[20] -# define SYSCALL_RET gr[28] +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).gr[20] +# define SYSCALL_RET(_regs) (_regs).gr[28] #elif defined(__powerpc__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[0] -# define SYSCALL_RET gpr[3] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[0] +# define SYSCALL_RET(_regs) (_regs).gpr[3] +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + typeof(_val) _result = (_val); \ + if ((_regs.trap & 0xfff0) == 0x3000) { \ + /* \ + * scv 0 system call uses -ve result \ + * for error, so no need to adjust. \ + */ \ + SYSCALL_RET(_regs) = _result; \ + } else { \ + /* \ + * A syscall error is signaled by the \ + * CR0 SO bit and the code is stored as \ + * a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -_result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = _result; \ + (_regs).ccr &= ~0x10000000; \ + } \ + } \ + } while (0) +# define SYSCALL_RET_SET_ON_PTRACE_EXIT #elif defined(__s390__) -# define ARCH_REGS s390_regs -# define SYSCALL_NUM gprs[2] -# define SYSCALL_RET gprs[2] +# define ARCH_REGS s390_regs +# define SYSCALL_NUM(_regs) (_regs).gprs[2] +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__mips__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM regs[2] -# define SYSCALL_SYSCALL_NUM regs[4] -# define SYSCALL_RET regs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# include <asm/unistd_nr_n32.h> +# include <asm/unistd_nr_n64.h> +# include <asm/unistd_nr_o32.h> +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) \ + ({ \ + typeof((_regs).regs[2]) _nr; \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + _nr = (_regs).regs[4]; \ + else \ + _nr = (_regs).regs[2]; \ + _nr; \ + }) +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + (_regs).regs[4] = _nr; \ + else \ + (_regs).regs[2] = _nr; \ + } while (0) +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") +#elif defined(__xtensa__) +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).syscall +/* + * On xtensa syscall return value is in the register + * a2 of the current window which is not fixed. + */ +#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] +#elif defined(__sh__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# define SYSCALL_RET(_regs) (_regs).regs[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif +/* + * Most architectures can change the syscall by just updating the + * associated register. This is the default if not defined above. + */ +#ifndef SYSCALL_NUM_SET +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + SYSCALL_NUM(_regs) = (_nr); \ + } while (0) +#endif +/* + * Most architectures can change the syscall return value by just + * writing to the SYSCALL_RET register. This is the default if not + * defined above. If an architecture cannot set the return value + * (for example when the syscall and return value register is + * shared), report it with TH_LOG() in an arch-specific definition + * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. + */ +#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) +# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" +#endif +#ifndef SYSCALL_RET_SET +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + SYSCALL_RET(_regs) = (_val); \ + } while (0) +#endif + /* When the syscall return can't be changed, stub out the tests for it. */ -#ifdef SYSCALL_NUM_RET_SHARE_REG +#ifndef SYSCALL_RET # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else # define EXPECT_SYSCALL_RETURN(val, action) \ @@ -1575,115 +1863,95 @@ } while (0) #endif -/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for +/* + * Some architectures (e.g. powerpc) can only set syscall + * return values on syscall exit during ptrace. + */ +const bool ptrace_entry_set_syscall_nr = true; +const bool ptrace_entry_set_syscall_ret = +#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT + true; +#else + false; +#endif + +/* + * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) -#define HAVE_GETREGS +# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) +# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) +#else +# define ARCH_GETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) +# define ARCH_SETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) #endif /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { ARCH_REGS regs; -#ifdef HAVE_GETREGS - EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { - TH_LOG("PTRACE_GETREGS failed"); + + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return -1; } -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { - TH_LOG("PTRACE_GETREGSET failed"); - return -1; - } -#endif - -#if defined(__mips__) - if (regs.SYSCALL_NUM == __NR_O32_Linux) - return regs.SYSCALL_SYSCALL_NUM; -#endif - return regs.SYSCALL_NUM; + return SYSCALL_NUM(regs); } /* Architecture-specific syscall changing routine. */ -void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall, int result) +void __change_syscall(struct __test_metadata *_metadata, + pid_t tracee, long *syscall, long *ret) { - int ret; - ARCH_REGS regs; -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); -#endif - EXPECT_EQ(0, ret) {} + ARCH_REGS orig, regs; -#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) || defined(__hppa__) - { - regs.SYSCALL_NUM = syscall; + /* Do not get/set registers if we have nothing to do. */ + if (!syscall && !ret) + return; + + EXPECT_EQ(0, ARCH_GETREGS(regs)) { + return; } -#elif defined(__mips__) - { - if (regs.SYSCALL_NUM == __NR_O32_Linux) - regs.SYSCALL_SYSCALL_NUM = syscall; - else - regs.SYSCALL_NUM = syscall; - } + orig = regs; -#elif defined(__arm__) -# ifndef PTRACE_SET_SYSCALL -# define PTRACE_SET_SYSCALL 23 -# endif - { - ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); - EXPECT_EQ(0, ret); - } + if (syscall) + SYSCALL_NUM_SET(regs, *syscall); -#elif defined(__aarch64__) -# ifndef NT_ARM_SYSTEM_CALL -# define NT_ARM_SYSTEM_CALL 0x404 -# endif - { - iov.iov_base = &syscall; - iov.iov_len = sizeof(syscall); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, - &iov); - EXPECT_EQ(0, ret); - } + if (ret) + SYSCALL_RET_SET(regs, *ret); -#else - ASSERT_EQ(1, 0) { - TH_LOG("How is the syscall changed on this architecture?"); - } -#endif - - /* If syscall is skipped, change return value. */ - if (syscall == -1) -#ifdef SYSCALL_NUM_RET_SHARE_REG - TH_LOG("Can't modify syscall return on this architecture"); -#else - regs.SYSCALL_RET = result; -#endif - -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); -#else - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); -#endif - EXPECT_EQ(0, ret); + /* Flush any register changes made. */ + if (memcmp(&orig, ®s, sizeof(orig)) != 0) + EXPECT_EQ(0, ARCH_SETREGS(regs)); } -void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, +/* Change only syscall number. */ +void change_syscall_nr(struct __test_metadata *_metadata, + pid_t tracee, long syscall) +{ + __change_syscall(_metadata, tracee, &syscall, NULL); +} + +/* Change syscall return value (and set syscall number to -1). */ +void change_syscall_ret(struct __test_metadata *_metadata, + pid_t tracee, long ret) +{ + long syscall = -1; + + __change_syscall(_metadata, tracee, &syscall, &ret); +} + +void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { int ret; @@ -1698,17 +1966,17 @@ case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid, 0); + change_syscall_nr(_metadata, tracee, __NR_getppid); break; case 0x1003: /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, 45000); + change_syscall_ret(_metadata, tracee, 45000); break; case 0x1004: /* skip openat with error. */ EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, -ESRCH); + change_syscall_ret(_metadata, tracee, -ESRCH); break; case 0x1005: /* do nothing (allow getppid) */ @@ -1723,36 +1991,92 @@ } +FIXTURE(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; + long syscall_nr; +}; + void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { - int ret, nr; + int ret; unsigned long msg; static bool entry; + long syscall_nr_val, syscall_ret_val; + long *syscall_nr = NULL, *syscall_ret = NULL; + FIXTURE_DATA(TRACE_syscall) *self = args; - /* Make sure we got an empty message. */ + /* + * The traditional way to tell PTRACE_SYSCALL entry/exit + * is by counting. + */ + entry = !entry; + + /* Make sure we got an appropriate message. */ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); EXPECT_EQ(0, ret); - EXPECT_EQ(0, msg); + EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY + : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); - /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */ - entry = !entry; - if (!entry) + /* + * Some architectures only support setting return values during + * syscall exit under ptrace, and on exit the syscall number may + * no longer be available. Therefore, save the initial sycall + * number here, so it can be examined during both entry and exit + * phases. + */ + if (entry) + self->syscall_nr = get_syscall(_metadata, tracee); + + /* + * Depending on the architecture's syscall setting abilities, we + * pick which things to set during this phase (entry or exit). + */ + if (entry == ptrace_entry_set_syscall_nr) + syscall_nr = &syscall_nr_val; + if (entry == ptrace_entry_set_syscall_ret) + syscall_ret = &syscall_ret_val; + + /* Now handle the actual rewriting cases. */ + switch (self->syscall_nr) { + case __NR_getpid: + syscall_nr_val = __NR_getppid; + /* Never change syscall return for this case. */ + syscall_ret = NULL; + break; + case __NR_gettid: + syscall_nr_val = -1; + syscall_ret_val = 45000; + break; + case __NR_openat: + syscall_nr_val = -1; + syscall_ret_val = -ESRCH; + break; + default: + /* Unhandled, do nothing. */ return; + } - nr = get_syscall(_metadata, tracee); - - if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid, 0); - if (nr == __NR_gettid) - change_syscall(_metadata, tracee, -1, 45000); - if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1, -ESRCH); + __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); } -FIXTURE_DATA(TRACE_syscall) { - struct sock_fprog prog; - pid_t tracer, mytid, mypid, parent; +FIXTURE_VARIANT(TRACE_syscall) { + /* + * All of the SECCOMP_RET_TRACE behaviors can be tested with either + * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. + * This indicates if we should use SECCOMP_RET_TRACE (false), or + * ptrace (true). + */ + bool use_ptrace; +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { + .use_ptrace = true, +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { + .use_ptrace = false, }; FIXTURE_SETUP(TRACE_syscall) @@ -1770,12 +2094,11 @@ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; - - memset(&self->prog, 0, sizeof(self->prog)); - self->prog.filter = malloc(sizeof(filter)); - ASSERT_NE(NULL, self->prog.filter); - memcpy(self->prog.filter, filter, sizeof(filter)); - self->prog.len = (unsigned short)ARRAY_SIZE(filter); + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; /* Prepare some testable syscall results. */ self->mytid = syscall(__NR_gettid); @@ -1793,60 +2116,48 @@ ASSERT_NE(self->parent, self->mypid); /* Launch tracer. */ - self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL, - false); + self->tracer = setup_trace_fixture(_metadata, + variant->use_ptrace ? tracer_ptrace + : tracer_seccomp, + self, variant->use_ptrace); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + if (variant->use_ptrace) + return; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); } FIXTURE_TEARDOWN(TRACE_syscall) { teardown_trace_fixture(_metadata, self->tracer); - if (self->prog.filter) - free(self->prog.filter); } -TEST_F(TRACE_syscall, ptrace_syscall_redirected) +TEST(negative_ENOSYS) { - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - /* Tracer will redirect getpid to getppid. */ - EXPECT_NE(self->mypid, syscall(__NR_getpid)); + /* + * There should be no difference between an "internal" skip + * and userspace asking for syscall "-1". + */ + errno = 0; + EXPECT_EQ(-1, syscall(-1)); + EXPECT_EQ(errno, ENOSYS); + /* And no difference for "still not valid but not -1". */ + errno = 0; + EXPECT_EQ(-1, syscall(-101)); + EXPECT_EQ(errno, ENOSYS); } -TEST_F(TRACE_syscall, ptrace_syscall_errno) +TEST_F(TRACE_syscall, negative_ENOSYS) { - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - /* Tracer should skip the open syscall, resulting in ESRCH. */ - EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); -} - -TEST_F(TRACE_syscall, ptrace_syscall_faked) -{ - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - /* Tracer should skip the gettid syscall, resulting fake pid. */ - EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); + negative_ENOSYS(_metadata); } TEST_F(TRACE_syscall, syscall_allowed) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - /* getppid works as expected (no changes). */ EXPECT_EQ(self->parent, syscall(__NR_getppid)); EXPECT_NE(self->mypid, syscall(__NR_getppid)); @@ -1854,14 +2165,6 @@ TEST_F(TRACE_syscall, syscall_redirected) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - /* getpid has been redirected to getppid as expected. */ EXPECT_EQ(self->parent, syscall(__NR_getpid)); EXPECT_NE(self->mypid, syscall(__NR_getpid)); @@ -1869,33 +2172,17 @@ TEST_F(TRACE_syscall, syscall_errno) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* openat has been skipped and an errno return. */ + /* Tracer should skip the open syscall, resulting in ESRCH. */ EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); } TEST_F(TRACE_syscall, syscall_faked) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* gettid has been skipped and an altered return value stored. */ + /* Tracer skips the gettid syscall and store altered return value. */ EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } -TEST_F(TRACE_syscall, skip_after_RET_TRACE) +TEST_F(TRACE_syscall, skip_after) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -1910,14 +2197,7 @@ }; long ret; - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install fixture filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "errno on getppid" filter. */ + /* Install additional "errno on getppid" filter. */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); ASSERT_EQ(0, ret); @@ -1927,7 +2207,7 @@ EXPECT_EQ(EPERM, errno); } -TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) +TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -1942,77 +2222,7 @@ }; long ret; - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install fixture filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "death on getppid" filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Tracer will redirect getpid to getppid, and we should die. */ - EXPECT_NE(self->mypid, syscall(__NR_getpid)); -} - -TEST_F(TRACE_syscall, skip_after_ptrace) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; - long ret; - - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "errno on getppid" filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Tracer will redirect getpid to getppid, and we should see EPERM. */ - EXPECT_EQ(-1, syscall(__NR_getpid)); - EXPECT_EQ(EPERM, errno); -} - -TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; - long ret; - - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "death on getppid" filter. */ + /* Install additional "death on getppid" filter. */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); ASSERT_EQ(0, ret); @@ -2119,12 +2329,17 @@ { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, SECCOMP_FILTER_FLAG_LOG, - SECCOMP_FILTER_FLAG_SPEC_ALLOW }; - unsigned int flag, all_flags; + SECCOMP_FILTER_FLAG_SPEC_ALLOW, + SECCOMP_FILTER_FLAG_NEW_LISTENER, + SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; + unsigned int exclusive[] = { + SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_NEW_LISTENER }; + unsigned int flag, all_flags, exclusive_mask; int i; long ret; - /* Test detection of known-good filter flags */ + /* Test detection of individual known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { int bits = 0; @@ -2151,16 +2366,29 @@ all_flags |= flag; } - /* Test detection of all known-good filter flags */ - ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); - EXPECT_EQ(-1, ret); - EXPECT_EQ(EFAULT, errno) { - TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", - all_flags); + /* + * Test detection of all known-good filter flags combined. But + * for the exclusive flags we need to mask them out and try them + * individually for the "all flags" testing. + */ + exclusive_mask = 0; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) + exclusive_mask |= exclusive[i]; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) { + flag = all_flags & ~exclusive_mask; + flag |= exclusive[i]; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + flag); + } } - /* Test detection of an unknown filter flag */ + /* Test detection of an unknown filter flags, without exclusives. */ flag = -1; + flag &= ~exclusive_mask; ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); EXPECT_EQ(-1, ret); EXPECT_EQ(EINVAL, errno) { @@ -2237,7 +2465,7 @@ } \ } while (0) -FIXTURE_DATA(TSYNC) { +FIXTURE(TSYNC) { struct sock_fprog root_prog, apply_prog; struct tsync_sibling sibling[TSYNC_SIBLINGS]; sem_t started; @@ -2347,7 +2575,7 @@ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); if (!ret) return (void *)SIBLING_EXIT_NEWPRIVS; - read(0, NULL, 0); + read(-1, NULL, 0); return (void *)SIBLING_EXIT_UNKILLED; } @@ -2561,10 +2789,60 @@ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); } +TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) +{ + long ret, flags; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + flags = SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_TSYNC_ESRCH; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); + ASSERT_EQ(ESRCH, errno) { + TH_LOG("Did not return ESRCH for diverged sibling."); + } + ASSERT_EQ(-1, ret) { + TH_LOG("Did not fail on diverged sibling."); + } + + /* Wake the threads */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both unkilled. */ + PTHREAD_JOIN(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + PTHREAD_JOIN(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + TEST_F(TSYNC, two_siblings_not_under_filter) { long ret, sib; void *status; + struct timespec delay = { .tv_nsec = 100000000 }; ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); @@ -2618,7 +2896,7 @@ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0)) - sleep(0.1); + nanosleep(&delay, NULL); /* Switch to the remaining sibling */ sib = !sib; @@ -2643,7 +2921,7 @@ EXPECT_EQ(0, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0)) - sleep(0.1); + nanosleep(&delay, NULL); ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &self->apply_prog); @@ -2664,12 +2942,13 @@ offsetof(struct seccomp_data, nr)), #ifdef __NR_sigreturn - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), #endif - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), /* Allow __NR_write for easy logging. */ @@ -2756,7 +3035,8 @@ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); ASSERT_EQ(0x100, msg); - EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid)); + ret = get_syscall(_metadata, child_pid); + EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); /* Might as well check siginfo for sanity while we're here. */ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); @@ -2773,9 +3053,14 @@ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); ASSERT_EQ(true, WIFSTOPPED(status)); ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); - /* Verify signal delivery came from parent now. */ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); - EXPECT_EQ(getpid(), info.si_pid); + /* + * There is no siginfo on SIGSTOP any more, so we can't verify + * signal delivery came from parent now (getpid() == info.si_pid). + * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com + * At least verify the SIGSTOP via PTRACE_GETSIGINFO. + */ + EXPECT_EQ(SIGSTOP, info.si_signo); /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ ASSERT_EQ(0, kill(child_pid, SIGCONT)); @@ -2922,7 +3207,7 @@ /* Only real root can get metadata. */ if (geteuid()) { - XFAIL(return, "get_metadata requires real root"); + SKIP(return, "get_metadata requires real root"); return; } @@ -2940,11 +3225,11 @@ }; /* one with log, one without */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, + EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, &prog)); - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); + EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); - ASSERT_EQ(0, close(pipefd[0])); + EXPECT_EQ(0, close(pipefd[0])); ASSERT_EQ(1, write(pipefd[1], "1", 1)); ASSERT_EQ(0, close(pipefd[1])); @@ -2965,7 +3250,7 @@ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); EXPECT_EQ(sizeof(md), ret) { if (errno == EINVAL) - XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); + SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); } EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); @@ -2981,9 +3266,890 @@ ASSERT_EQ(0, kill(pid, SIGKILL)); } +static int user_notif_syscall(int nr, unsigned int flags) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); +} + +#define USER_NOTIF_MAGIC INT_MAX +TEST(user_notification_basic) +{ + pid_t pid; + long ret; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + struct pollfd pollfd; + + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = fork(); + ASSERT_GE(pid, 0); + + /* Check that we get -ENOSYS with no listener attached */ + if (pid == 0) { + if (user_notif_syscall(__NR_getppid, 0) < 0) + exit(1); + ret = syscall(__NR_getppid); + exit(ret >= 0 || errno != ENOSYS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* Add some no-op filters for grins. */ + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* Installing a second listener in the chain should EBUSY */ + EXPECT_EQ(user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER), + -1); + EXPECT_EQ(errno, EBUSY); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = syscall(__NR_getppid); + exit(ret != USER_NOTIF_MAGIC); + } + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLIN); + + /* Test that we can't pass garbage to the kernel. */ + memset(&req, 0, sizeof(req)); + req.pid = -1; + errno = 0; + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + if (ret) { + req.pid = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + } + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLOUT); + + EXPECT_EQ(req.data.nr, __NR_getppid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + /* check that we make sure flags == 0 */ + resp.flags = 1; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, EINVAL); + + resp.flags = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(user_notification_with_tsync) +{ + int ret; + unsigned int flags; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* these were exclusive */ + flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_TSYNC; + ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); + ASSERT_EQ(EINVAL, errno); + + /* but now they're not */ + flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; + ret = user_notif_syscall(__NR_getppid, flags); + close(ret); + ASSERT_LE(0, ret); +} + +TEST(user_notification_kill_in_middle) +{ + pid_t pid; + long ret; + int listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* + * Check that nothing bad happens when we kill the task in the middle + * of a syscall. + */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = syscall(__NR_getppid); + exit(ret != USER_NOTIF_MAGIC); + } + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); + + EXPECT_EQ(kill(pid, SIGKILL), 0); + EXPECT_EQ(waitpid(pid, NULL, 0), pid); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); + + resp.id = req.id; + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, ENOENT); +} + +static int handled = -1; + +static void signal_handler(int signal) +{ + if (write(handled, "c", 1) != 1) + perror("write from signal"); +} + +TEST(user_notification_signal) +{ + pid_t pid; + long ret; + int status, listener, sk_pair[2]; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + char c; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); + + listener = user_notif_syscall(__NR_gettid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + close(sk_pair[0]); + handled = sk_pair[1]; + if (signal(SIGUSR1, signal_handler) == SIG_ERR) { + perror("signal"); + exit(1); + } + /* + * ERESTARTSYS behavior is a bit hard to test, because we need + * to rely on a signal that has not yet been handled. Let's at + * least check that the error code gets propagated through, and + * hope that it doesn't break when there is actually a signal :) + */ + ret = syscall(__NR_gettid); + exit(!(ret == -1 && errno == 512)); + } + + close(sk_pair[1]); + + memset(&req, 0, sizeof(req)); + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + EXPECT_EQ(kill(pid, SIGUSR1), 0); + + /* + * Make sure the signal really is delivered, which means we're not + * stuck in the user notification code any more and the notification + * should be dead. + */ + EXPECT_EQ(read(sk_pair[0], &c, 1), 1); + + resp.id = req.id; + resp.error = -EPERM; + resp.val = 0; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, ENOENT); + + memset(&req, 0, sizeof(req)); + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + resp.id = req.id; + resp.error = -512; /* -ERESTARTSYS */ + resp.val = 0; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(user_notification_closed_listener) +{ + pid_t pid; + long ret; + int status, listener; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* + * Check that we get an ENOSYS when the listener is closed. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + close(listener); + ret = syscall(__NR_getppid); + exit(ret != -1 && errno != ENOSYS); + } + + close(listener); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +/* + * Check that a pid in a child namespace still shows up as valid in ours. + */ +TEST(user_notification_child_pid_ns) +{ + pid_t pid; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { + if (errno == EINVAL) + SKIP(return, "kernel missing CLONE_NEWUSER support"); + }; + + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + EXPECT_EQ(req.pid, pid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + close(listener); +} + +/* + * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. + * invalid. + */ +TEST(user_notification_sibling_pid_ns) +{ + pid_t pid, pid2; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ASSERT_EQ(unshare(CLONE_NEWPID), 0); + + pid2 = fork(); + ASSERT_GE(pid2, 0); + + if (pid2 == 0) + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + + EXPECT_EQ(waitpid(pid2, &status, 0), pid2); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + exit(WEXITSTATUS(status)); + } + + /* Create the sibling ns, and sibling in it. */ + ASSERT_EQ(unshare(CLONE_NEWPID), 0) { + if (errno == EPERM) + SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); + } + ASSERT_EQ(errno, 0); + + pid2 = fork(); + ASSERT_GE(pid2, 0); + + if (pid2 == 0) { + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + /* + * The pid should be 0, i.e. the task is in some namespace that + * we can't "see". + */ + EXPECT_EQ(req.pid, 0); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + exit(0); + } + + close(listener); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + EXPECT_EQ(waitpid(pid2, &status, 0), pid2); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(user_notification_fault_recv) +{ + pid_t pid; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + ASSERT_EQ(unshare(CLONE_NEWUSER), 0); + + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + + /* Do a bad recv() */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); + EXPECT_EQ(errno, EFAULT); + + /* We should still be able to receive this notification, though. */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + EXPECT_EQ(req.pid, pid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(seccomp_get_notif_sizes) +{ + struct seccomp_notif_sizes sizes; + + ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); + EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); + EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); +} + +TEST(user_notification_continue) +{ + pid_t pid; + long ret; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + struct pollfd pollfd; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int dup_fd, pipe_fds[2]; + pid_t self; + + ASSERT_GE(pipe(pipe_fds), 0); + + dup_fd = dup(pipe_fds[0]); + ASSERT_GE(dup_fd, 0); + EXPECT_NE(pipe_fds[0], dup_fd); + + self = getpid(); + ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); + exit(0); + } + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLIN); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLOUT); + + EXPECT_EQ(req.data.nr, __NR_dup); + + resp.id = req.id; + resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; + + /* + * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other + * args be set to 0. + */ + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, EINVAL); + + resp.error = USER_NOTIF_MAGIC; + resp.val = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, EINVAL); + + resp.error = 0; + resp.val = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { + if (errno == EINVAL) + SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); + } + +skip: + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)) { + if (WEXITSTATUS(status) == 2) { + SKIP(return, "Kernel does not support kcmp() syscall"); + return; + } + } +} + +TEST(user_notification_filter_empty) +{ + pid_t pid; + long ret; + int status; + struct pollfd pollfd; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int listener; + + listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + + close(listener); + + _exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + pollfd.fd = 200; + pollfd.events = POLLHUP; + + EXPECT_GT(poll(&pollfd, 1, 2000), 0); + EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); +} + +static void *do_thread(void *data) +{ + return NULL; +} + +TEST(user_notification_filter_empty_threaded) +{ + pid_t pid; + long ret; + int status; + struct pollfd pollfd; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t pid1, pid2; + int listener, status; + pthread_t thread; + + listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + + close(listener); + + pid1 = fork(); + if (pid1 < 0) + _exit(EXIT_FAILURE); + + if (pid1 == 0) + _exit(EXIT_SUCCESS); + + pid2 = fork(); + if (pid2 < 0) + _exit(EXIT_FAILURE); + + if (pid2 == 0) + _exit(EXIT_SUCCESS); + + if (pthread_create(&thread, NULL, do_thread, NULL) || + pthread_join(thread, NULL)) + _exit(EXIT_FAILURE); + + if (pthread_create(&thread, NULL, do_thread, NULL) || + pthread_join(thread, NULL)) + _exit(EXIT_FAILURE); + + if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || + WEXITSTATUS(status)) + _exit(EXIT_FAILURE); + + if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || + WEXITSTATUS(status)) + _exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + pollfd.fd = 200; + pollfd.events = POLLHUP; + + EXPECT_GT(poll(&pollfd, 1, 2000), 0); + EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); +} + +TEST(user_notification_addfd) +{ + pid_t pid; + long ret; + int status, listener, memfd, fd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif_addfd_small small = {}; + struct seccomp_notif_addfd_big big = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) + exit(1); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + } + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + addfd.srcfd = memfd; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0x0; + + /* Verify bad newfd_flags cannot be set */ + addfd.newfd_flags = ~O_CLOEXEC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd_flags = O_CLOEXEC; + + /* Verify bad flags cannot be set */ + addfd.flags = 0xff; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.flags = 0; + + /* Verify that remote_fd cannot be set without setting flags */ + addfd.newfd = 1; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd = 0; + + /* Verify small size cannot be set */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); + EXPECT_EQ(errno, EINVAL); + + /* Verify we can't send bits filled in unknown buffer area */ + memset(&big, 0xAA, sizeof(big)); + big.addfd = addfd; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); + EXPECT_EQ(errno, E2BIG); + + + /* Verify we can set an arbitrary remote fd */ + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + /* + * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), + * 4(listener), so the newly allocated fd should be 5. + */ + EXPECT_EQ(fd, 5); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Verify we can set an arbitrary remote fd with large size */ + memset(&big, 0x0, sizeof(big)); + big.addfd = addfd; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); + EXPECT_EQ(fd, 6); + + /* Verify we can set a specific remote fd */ + addfd.newfd = 42; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + EXPECT_EQ(fd, 42); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Resume syscall */ + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* + * This sets the ID of the ADD FD to the last request plus 1. The + * notification ID increments 1 per notification. + */ + addfd.id = req.id + 1; + + /* This spins until the underlying notification is generated */ + while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && + errno != -EINPROGRESS) + nanosleep(&delay, NULL); + + memset(&req, 0, sizeof(req)); + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + ASSERT_EQ(addfd.id, req.id); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + +TEST(user_notification_addfd_rlimit) +{ + pid_t pid; + long ret; + int status, listener, memfd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + const struct rlimit lim = { + .rlim_cur = 0, + .rlim_max = 0, + }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); + + addfd.srcfd = memfd; + addfd.newfd_flags = O_CLOEXEC; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0; + + /* Should probably spot check /proc/sys/fs/file-nr */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EMFILE); + + addfd.newfd = 100; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EBADF); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + /* * TODO: - * - add microbenchmarks * - expand NNP testing * - better arch-specific TRACE and TRAP handlers. * - endianness checking when appropriate @@ -2991,7 +4157,6 @@ * - arch value testing (x86 modes especially) * - verify that FILTER_FLAG_LOG filters generate log messages * - verify that RET_LOG generates log messages - * - ... */ TEST_HARNESS_MAIN -- Gitblit v1.6.2