hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/tools/testing/selftests/seccomp/seccomp_bpf.c
....@@ -1,10 +1,11 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3
- * Use of this source code is governed by the GPLv2 license.
44 *
55 * Test code for seccomp bpf.
66 */
77
8
+#define _GNU_SOURCE
89 #include <sys/types.h>
910
1011 /*
....@@ -34,18 +35,29 @@
3435 #include <stdbool.h>
3536 #include <string.h>
3637 #include <time.h>
38
+#include <limits.h>
3739 #include <linux/elf.h>
3840 #include <sys/uio.h>
3941 #include <sys/utsname.h>
4042 #include <sys/fcntl.h>
4143 #include <sys/mman.h>
4244 #include <sys/times.h>
45
+#include <sys/socket.h>
46
+#include <sys/ioctl.h>
47
+#include <linux/kcmp.h>
48
+#include <sys/resource.h>
4349
44
-#define _GNU_SOURCE
4550 #include <unistd.h>
4651 #include <sys/syscall.h>
52
+#include <poll.h>
4753
4854 #include "../kselftest_harness.h"
55
+#include "../clone3/clone3_selftests.h"
56
+
57
+/* Attempt to de-conflict with the selftests tree. */
58
+#ifndef SKIP
59
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
60
+#endif
4961
5062 #ifndef PR_SET_PTRACER
5163 # define PR_SET_PTRACER 0x59616d61
....@@ -109,12 +121,20 @@
109121 # define __NR_seccomp 383
110122 # elif defined(__aarch64__)
111123 # define __NR_seccomp 277
124
+# elif defined(__riscv)
125
+# define __NR_seccomp 277
126
+# elif defined(__csky__)
127
+# define __NR_seccomp 277
112128 # elif defined(__hppa__)
113129 # define __NR_seccomp 338
114130 # elif defined(__powerpc__)
115131 # define __NR_seccomp 358
116132 # elif defined(__s390__)
117133 # define __NR_seccomp 348
134
+# elif defined(__xtensa__)
135
+# define __NR_seccomp 337
136
+# elif defined(__sh__)
137
+# define __NR_seccomp 372
118138 # else
119139 # warning "seccomp syscall number unknown for this architecture"
120140 # define __NR_seccomp 0xffff
....@@ -131,6 +151,10 @@
131151
132152 #ifndef SECCOMP_GET_ACTION_AVAIL
133153 #define SECCOMP_GET_ACTION_AVAIL 2
154
+#endif
155
+
156
+#ifndef SECCOMP_GET_NOTIF_SIZES
157
+#define SECCOMP_GET_NOTIF_SIZES 3
134158 #endif
135159
136160 #ifndef SECCOMP_FILTER_FLAG_TSYNC
....@@ -154,6 +178,92 @@
154178 };
155179 #endif
156180
181
+#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
182
+#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
183
+#endif
184
+
185
+#ifndef SECCOMP_RET_USER_NOTIF
186
+#define SECCOMP_RET_USER_NOTIF 0x7fc00000U
187
+
188
+#define SECCOMP_IOC_MAGIC '!'
189
+#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
190
+#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
191
+#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type)
192
+#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type)
193
+
194
+/* Flags for seccomp notification fd ioctl. */
195
+#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
196
+#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
197
+ struct seccomp_notif_resp)
198
+#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
199
+
200
+struct seccomp_notif {
201
+ __u64 id;
202
+ __u32 pid;
203
+ __u32 flags;
204
+ struct seccomp_data data;
205
+};
206
+
207
+struct seccomp_notif_resp {
208
+ __u64 id;
209
+ __s64 val;
210
+ __s32 error;
211
+ __u32 flags;
212
+};
213
+
214
+struct seccomp_notif_sizes {
215
+ __u16 seccomp_notif;
216
+ __u16 seccomp_notif_resp;
217
+ __u16 seccomp_data;
218
+};
219
+#endif
220
+
221
+#ifndef SECCOMP_IOCTL_NOTIF_ADDFD
222
+/* On success, the return value is the remote process's added fd number */
223
+#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
224
+ struct seccomp_notif_addfd)
225
+
226
+/* valid flags for seccomp_notif_addfd */
227
+#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
228
+
229
+struct seccomp_notif_addfd {
230
+ __u64 id;
231
+ __u32 flags;
232
+ __u32 srcfd;
233
+ __u32 newfd;
234
+ __u32 newfd_flags;
235
+};
236
+#endif
237
+
238
+struct seccomp_notif_addfd_small {
239
+ __u64 id;
240
+ char weird[4];
241
+};
242
+#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
243
+ SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
244
+
245
+struct seccomp_notif_addfd_big {
246
+ union {
247
+ struct seccomp_notif_addfd addfd;
248
+ char buf[sizeof(struct seccomp_notif_addfd) + 8];
249
+ };
250
+};
251
+#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \
252
+ SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
253
+
254
+#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
255
+#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
256
+#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
257
+#endif
258
+
259
+#ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
260
+#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
261
+#endif
262
+
263
+#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
264
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
265
+#endif
266
+
157267 #ifndef seccomp
158268 int seccomp(unsigned int op, unsigned int flags, void *args)
159269 {
....@@ -173,6 +283,40 @@
173283 #define SIBLING_EXIT_UNKILLED 0xbadbeef
174284 #define SIBLING_EXIT_FAILURE 0xbadface
175285 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed
286
+
287
+static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
288
+{
289
+#ifdef __NR_kcmp
290
+ errno = 0;
291
+ return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
292
+#else
293
+ errno = ENOSYS;
294
+ return -1;
295
+#endif
296
+}
297
+
298
+/* Have TH_LOG report actual location filecmp() is used. */
299
+#define filecmp(pid1, pid2, fd1, fd2) ({ \
300
+ int _ret; \
301
+ \
302
+ _ret = __filecmp(pid1, pid2, fd1, fd2); \
303
+ if (_ret != 0) { \
304
+ if (_ret < 0 && errno == ENOSYS) { \
305
+ TH_LOG("kcmp() syscall missing (test is less accurate)");\
306
+ _ret = 0; \
307
+ } \
308
+ } \
309
+ _ret; })
310
+
311
+TEST(kcmp)
312
+{
313
+ int ret;
314
+
315
+ ret = __filecmp(getpid(), getpid(), 1, 1);
316
+ EXPECT_EQ(ret, 0);
317
+ if (ret != 0 && errno == ENOSYS)
318
+ SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
319
+}
176320
177321 TEST(mode_strict_support)
178322 {
....@@ -630,8 +774,15 @@
630774 return (void *)SIBLING_EXIT_UNKILLED;
631775 }
632776
777
+enum kill_t {
778
+ KILL_THREAD,
779
+ KILL_PROCESS,
780
+ RET_UNKNOWN
781
+};
782
+
633783 /* Prepare a thread that will kill itself or both of us. */
634
-void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
784
+void kill_thread_or_group(struct __test_metadata *_metadata,
785
+ enum kill_t kill_how)
635786 {
636787 pthread_t thread;
637788 void *status;
....@@ -647,11 +798,12 @@
647798 .len = (unsigned short)ARRAY_SIZE(filter_thread),
648799 .filter = filter_thread,
649800 };
801
+ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
650802 struct sock_filter filter_process[] = {
651803 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
652804 offsetof(struct seccomp_data, nr)),
653805 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
654
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
806
+ BPF_STMT(BPF_RET|BPF_K, kill),
655807 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
656808 };
657809 struct sock_fprog prog_process = {
....@@ -664,13 +816,15 @@
664816 }
665817
666818 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
667
- kill_process ? &prog_process : &prog_thread));
819
+ kill_how == KILL_THREAD ? &prog_thread
820
+ : &prog_process));
668821
669822 /*
670823 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
671824 * flag cannot be downgraded by a new filter.
672825 */
673
- ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
826
+ if (kill_how == KILL_PROCESS)
827
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
674828
675829 /* Start a thread that will exit immediately. */
676830 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
....@@ -698,7 +852,7 @@
698852 child_pid = fork();
699853 ASSERT_LE(0, child_pid);
700854 if (child_pid == 0) {
701
- kill_thread_or_group(_metadata, false);
855
+ kill_thread_or_group(_metadata, KILL_THREAD);
702856 _exit(38);
703857 }
704858
....@@ -717,7 +871,7 @@
717871 child_pid = fork();
718872 ASSERT_LE(0, child_pid);
719873 if (child_pid == 0) {
720
- kill_thread_or_group(_metadata, true);
874
+ kill_thread_or_group(_metadata, KILL_PROCESS);
721875 _exit(38);
722876 }
723877
....@@ -725,6 +879,27 @@
725879
726880 /* If the entire process was killed, we'll see SIGSYS. */
727881 ASSERT_TRUE(WIFSIGNALED(status));
882
+ ASSERT_EQ(SIGSYS, WTERMSIG(status));
883
+}
884
+
885
+TEST(KILL_unknown)
886
+{
887
+ int status;
888
+ pid_t child_pid;
889
+
890
+ child_pid = fork();
891
+ ASSERT_LE(0, child_pid);
892
+ if (child_pid == 0) {
893
+ kill_thread_or_group(_metadata, RET_UNKNOWN);
894
+ _exit(38);
895
+ }
896
+
897
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
898
+
899
+ /* If the entire process was killed, we'll see SIGSYS. */
900
+ EXPECT_TRUE(WIFSIGNALED(status)) {
901
+ TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
902
+ }
728903 ASSERT_EQ(SIGSYS, WTERMSIG(status));
729904 }
730905
....@@ -776,7 +951,7 @@
776951 ASSERT_EQ(0, ret);
777952
778953 EXPECT_EQ(parent, syscall(__NR_getppid));
779
- EXPECT_EQ(-1, read(0, NULL, 0));
954
+ EXPECT_EQ(-1, read(-1, NULL, 0));
780955 EXPECT_EQ(E2BIG, errno);
781956 }
782957
....@@ -795,7 +970,7 @@
795970
796971 EXPECT_EQ(parent, syscall(__NR_getppid));
797972 /* "errno" of 0 is ok. */
798
- EXPECT_EQ(0, read(0, NULL, 0));
973
+ EXPECT_EQ(0, read(-1, NULL, 0));
799974 }
800975
801976 /*
....@@ -816,7 +991,7 @@
816991 ASSERT_EQ(0, ret);
817992
818993 EXPECT_EQ(parent, syscall(__NR_getppid));
819
- EXPECT_EQ(-1, read(0, NULL, 0));
994
+ EXPECT_EQ(-1, read(-1, NULL, 0));
820995 EXPECT_EQ(4095, errno);
821996 }
822997
....@@ -847,11 +1022,11 @@
8471022 ASSERT_EQ(0, ret);
8481023
8491024 EXPECT_EQ(parent, syscall(__NR_getppid));
850
- EXPECT_EQ(-1, read(0, NULL, 0));
1025
+ EXPECT_EQ(-1, read(-1, NULL, 0));
8511026 EXPECT_EQ(12, errno);
8521027 }
8531028
854
-FIXTURE_DATA(TRAP) {
1029
+FIXTURE(TRAP) {
8551030 struct sock_fprog prog;
8561031 };
8571032
....@@ -962,7 +1137,7 @@
9621137 EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
9631138 }
9641139
965
-FIXTURE_DATA(precedence) {
1140
+FIXTURE(precedence) {
9661141 struct sock_fprog allow;
9671142 struct sock_fprog log;
9681143 struct sock_fprog trace;
....@@ -1408,6 +1583,7 @@
14081583
14091584 return tracer_pid;
14101585 }
1586
+
14111587 void teardown_trace_fixture(struct __test_metadata *_metadata,
14121588 pid_t tracer)
14131589 {
....@@ -1451,7 +1627,7 @@
14511627 EXPECT_EQ(0, ret);
14521628 }
14531629
1454
-FIXTURE_DATA(TRACE_poke) {
1630
+FIXTURE(TRACE_poke) {
14551631 struct sock_fprog prog;
14561632 pid_t tracer;
14571633 long poked;
....@@ -1522,45 +1698,157 @@
15221698 }
15231699
15241700 #if defined(__x86_64__)
1525
-# define ARCH_REGS struct user_regs_struct
1526
-# define SYSCALL_NUM orig_rax
1527
-# define SYSCALL_RET rax
1701
+# define ARCH_REGS struct user_regs_struct
1702
+# define SYSCALL_NUM(_regs) (_regs).orig_rax
1703
+# define SYSCALL_RET(_regs) (_regs).rax
15281704 #elif defined(__i386__)
1529
-# define ARCH_REGS struct user_regs_struct
1530
-# define SYSCALL_NUM orig_eax
1531
-# define SYSCALL_RET eax
1705
+# define ARCH_REGS struct user_regs_struct
1706
+# define SYSCALL_NUM(_regs) (_regs).orig_eax
1707
+# define SYSCALL_RET(_regs) (_regs).eax
15321708 #elif defined(__arm__)
1533
-# define ARCH_REGS struct pt_regs
1534
-# define SYSCALL_NUM ARM_r7
1535
-# define SYSCALL_RET ARM_r0
1709
+# define ARCH_REGS struct pt_regs
1710
+# define SYSCALL_NUM(_regs) (_regs).ARM_r7
1711
+# ifndef PTRACE_SET_SYSCALL
1712
+# define PTRACE_SET_SYSCALL 23
1713
+# endif
1714
+# define SYSCALL_NUM_SET(_regs, _nr) \
1715
+ EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
1716
+# define SYSCALL_RET(_regs) (_regs).ARM_r0
15361717 #elif defined(__aarch64__)
1537
-# define ARCH_REGS struct user_pt_regs
1538
-# define SYSCALL_NUM regs[8]
1539
-# define SYSCALL_RET regs[0]
1718
+# define ARCH_REGS struct user_pt_regs
1719
+# define SYSCALL_NUM(_regs) (_regs).regs[8]
1720
+# ifndef NT_ARM_SYSTEM_CALL
1721
+# define NT_ARM_SYSTEM_CALL 0x404
1722
+# endif
1723
+# define SYSCALL_NUM_SET(_regs, _nr) \
1724
+ do { \
1725
+ struct iovec __v; \
1726
+ typeof(_nr) __nr = (_nr); \
1727
+ __v.iov_base = &__nr; \
1728
+ __v.iov_len = sizeof(__nr); \
1729
+ EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
1730
+ NT_ARM_SYSTEM_CALL, &__v)); \
1731
+ } while (0)
1732
+# define SYSCALL_RET(_regs) (_regs).regs[0]
1733
+#elif defined(__riscv) && __riscv_xlen == 64
1734
+# define ARCH_REGS struct user_regs_struct
1735
+# define SYSCALL_NUM(_regs) (_regs).a7
1736
+# define SYSCALL_RET(_regs) (_regs).a0
1737
+#elif defined(__csky__)
1738
+# define ARCH_REGS struct pt_regs
1739
+# if defined(__CSKYABIV2__)
1740
+# define SYSCALL_NUM(_regs) (_regs).regs[3]
1741
+# else
1742
+# define SYSCALL_NUM(_regs) (_regs).regs[9]
1743
+# endif
1744
+# define SYSCALL_RET(_regs) (_regs).a0
15401745 #elif defined(__hppa__)
1541
-# define ARCH_REGS struct user_regs_struct
1542
-# define SYSCALL_NUM gr[20]
1543
-# define SYSCALL_RET gr[28]
1746
+# define ARCH_REGS struct user_regs_struct
1747
+# define SYSCALL_NUM(_regs) (_regs).gr[20]
1748
+# define SYSCALL_RET(_regs) (_regs).gr[28]
15441749 #elif defined(__powerpc__)
1545
-# define ARCH_REGS struct pt_regs
1546
-# define SYSCALL_NUM gpr[0]
1547
-# define SYSCALL_RET gpr[3]
1750
+# define ARCH_REGS struct pt_regs
1751
+# define SYSCALL_NUM(_regs) (_regs).gpr[0]
1752
+# define SYSCALL_RET(_regs) (_regs).gpr[3]
1753
+# define SYSCALL_RET_SET(_regs, _val) \
1754
+ do { \
1755
+ typeof(_val) _result = (_val); \
1756
+ if ((_regs.trap & 0xfff0) == 0x3000) { \
1757
+ /* \
1758
+ * scv 0 system call uses -ve result \
1759
+ * for error, so no need to adjust. \
1760
+ */ \
1761
+ SYSCALL_RET(_regs) = _result; \
1762
+ } else { \
1763
+ /* \
1764
+ * A syscall error is signaled by the \
1765
+ * CR0 SO bit and the code is stored as \
1766
+ * a positive value. \
1767
+ */ \
1768
+ if (_result < 0) { \
1769
+ SYSCALL_RET(_regs) = -_result; \
1770
+ (_regs).ccr |= 0x10000000; \
1771
+ } else { \
1772
+ SYSCALL_RET(_regs) = _result; \
1773
+ (_regs).ccr &= ~0x10000000; \
1774
+ } \
1775
+ } \
1776
+ } while (0)
1777
+# define SYSCALL_RET_SET_ON_PTRACE_EXIT
15481778 #elif defined(__s390__)
1549
-# define ARCH_REGS s390_regs
1550
-# define SYSCALL_NUM gprs[2]
1551
-# define SYSCALL_RET gprs[2]
1779
+# define ARCH_REGS s390_regs
1780
+# define SYSCALL_NUM(_regs) (_regs).gprs[2]
1781
+# define SYSCALL_RET_SET(_regs, _val) \
1782
+ TH_LOG("Can't modify syscall return on this architecture")
15521783 #elif defined(__mips__)
1553
-# define ARCH_REGS struct pt_regs
1554
-# define SYSCALL_NUM regs[2]
1555
-# define SYSCALL_SYSCALL_NUM regs[4]
1556
-# define SYSCALL_RET regs[2]
1557
-# define SYSCALL_NUM_RET_SHARE_REG
1784
+# include <asm/unistd_nr_n32.h>
1785
+# include <asm/unistd_nr_n64.h>
1786
+# include <asm/unistd_nr_o32.h>
1787
+# define ARCH_REGS struct pt_regs
1788
+# define SYSCALL_NUM(_regs) \
1789
+ ({ \
1790
+ typeof((_regs).regs[2]) _nr; \
1791
+ if ((_regs).regs[2] == __NR_O32_Linux) \
1792
+ _nr = (_regs).regs[4]; \
1793
+ else \
1794
+ _nr = (_regs).regs[2]; \
1795
+ _nr; \
1796
+ })
1797
+# define SYSCALL_NUM_SET(_regs, _nr) \
1798
+ do { \
1799
+ if ((_regs).regs[2] == __NR_O32_Linux) \
1800
+ (_regs).regs[4] = _nr; \
1801
+ else \
1802
+ (_regs).regs[2] = _nr; \
1803
+ } while (0)
1804
+# define SYSCALL_RET_SET(_regs, _val) \
1805
+ TH_LOG("Can't modify syscall return on this architecture")
1806
+#elif defined(__xtensa__)
1807
+# define ARCH_REGS struct user_pt_regs
1808
+# define SYSCALL_NUM(_regs) (_regs).syscall
1809
+/*
1810
+ * On xtensa syscall return value is in the register
1811
+ * a2 of the current window which is not fixed.
1812
+ */
1813
+#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
1814
+#elif defined(__sh__)
1815
+# define ARCH_REGS struct pt_regs
1816
+# define SYSCALL_NUM(_regs) (_regs).regs[3]
1817
+# define SYSCALL_RET(_regs) (_regs).regs[0]
15581818 #else
15591819 # error "Do not know how to find your architecture's registers and syscalls"
15601820 #endif
15611821
1822
+/*
1823
+ * Most architectures can change the syscall by just updating the
1824
+ * associated register. This is the default if not defined above.
1825
+ */
1826
+#ifndef SYSCALL_NUM_SET
1827
+# define SYSCALL_NUM_SET(_regs, _nr) \
1828
+ do { \
1829
+ SYSCALL_NUM(_regs) = (_nr); \
1830
+ } while (0)
1831
+#endif
1832
+/*
1833
+ * Most architectures can change the syscall return value by just
1834
+ * writing to the SYSCALL_RET register. This is the default if not
1835
+ * defined above. If an architecture cannot set the return value
1836
+ * (for example when the syscall and return value register is
1837
+ * shared), report it with TH_LOG() in an arch-specific definition
1838
+ * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
1839
+ */
1840
+#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
1841
+# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
1842
+#endif
1843
+#ifndef SYSCALL_RET_SET
1844
+# define SYSCALL_RET_SET(_regs, _val) \
1845
+ do { \
1846
+ SYSCALL_RET(_regs) = (_val); \
1847
+ } while (0)
1848
+#endif
1849
+
15621850 /* When the syscall return can't be changed, stub out the tests for it. */
1563
-#ifdef SYSCALL_NUM_RET_SHARE_REG
1851
+#ifndef SYSCALL_RET
15641852 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
15651853 #else
15661854 # define EXPECT_SYSCALL_RETURN(val, action) \
....@@ -1575,115 +1863,95 @@
15751863 } while (0)
15761864 #endif
15771865
1578
-/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1866
+/*
1867
+ * Some architectures (e.g. powerpc) can only set syscall
1868
+ * return values on syscall exit during ptrace.
1869
+ */
1870
+const bool ptrace_entry_set_syscall_nr = true;
1871
+const bool ptrace_entry_set_syscall_ret =
1872
+#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
1873
+ true;
1874
+#else
1875
+ false;
1876
+#endif
1877
+
1878
+/*
1879
+ * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
15791880 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
15801881 */
15811882 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1582
-#define HAVE_GETREGS
1883
+# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
1884
+# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
1885
+#else
1886
+# define ARCH_GETREGS(_regs) ({ \
1887
+ struct iovec __v; \
1888
+ __v.iov_base = &(_regs); \
1889
+ __v.iov_len = sizeof(_regs); \
1890
+ ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
1891
+ })
1892
+# define ARCH_SETREGS(_regs) ({ \
1893
+ struct iovec __v; \
1894
+ __v.iov_base = &(_regs); \
1895
+ __v.iov_len = sizeof(_regs); \
1896
+ ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
1897
+ })
15831898 #endif
15841899
15851900 /* Architecture-specific syscall fetching routine. */
15861901 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
15871902 {
15881903 ARCH_REGS regs;
1589
-#ifdef HAVE_GETREGS
1590
- EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1591
- TH_LOG("PTRACE_GETREGS failed");
1904
+
1905
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
15921906 return -1;
15931907 }
1594
-#else
1595
- struct iovec iov;
15961908
1597
- iov.iov_base = &regs;
1598
- iov.iov_len = sizeof(regs);
1599
- EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1600
- TH_LOG("PTRACE_GETREGSET failed");
1601
- return -1;
1602
- }
1603
-#endif
1604
-
1605
-#if defined(__mips__)
1606
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
1607
- return regs.SYSCALL_SYSCALL_NUM;
1608
-#endif
1609
- return regs.SYSCALL_NUM;
1909
+ return SYSCALL_NUM(regs);
16101910 }
16111911
16121912 /* Architecture-specific syscall changing routine. */
1613
-void change_syscall(struct __test_metadata *_metadata,
1614
- pid_t tracee, int syscall, int result)
1913
+void __change_syscall(struct __test_metadata *_metadata,
1914
+ pid_t tracee, long *syscall, long *ret)
16151915 {
1616
- int ret;
1617
- ARCH_REGS regs;
1618
-#ifdef HAVE_GETREGS
1619
- ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1620
-#else
1621
- struct iovec iov;
1622
- iov.iov_base = &regs;
1623
- iov.iov_len = sizeof(regs);
1624
- ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1625
-#endif
1626
- EXPECT_EQ(0, ret) {}
1916
+ ARCH_REGS orig, regs;
16271917
1628
-#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1629
- defined(__s390__) || defined(__hppa__)
1630
- {
1631
- regs.SYSCALL_NUM = syscall;
1918
+ /* Do not get/set registers if we have nothing to do. */
1919
+ if (!syscall && !ret)
1920
+ return;
1921
+
1922
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
1923
+ return;
16321924 }
1633
-#elif defined(__mips__)
1634
- {
1635
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
1636
- regs.SYSCALL_SYSCALL_NUM = syscall;
1637
- else
1638
- regs.SYSCALL_NUM = syscall;
1639
- }
1925
+ orig = regs;
16401926
1641
-#elif defined(__arm__)
1642
-# ifndef PTRACE_SET_SYSCALL
1643
-# define PTRACE_SET_SYSCALL 23
1644
-# endif
1645
- {
1646
- ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1647
- EXPECT_EQ(0, ret);
1648
- }
1927
+ if (syscall)
1928
+ SYSCALL_NUM_SET(regs, *syscall);
16491929
1650
-#elif defined(__aarch64__)
1651
-# ifndef NT_ARM_SYSTEM_CALL
1652
-# define NT_ARM_SYSTEM_CALL 0x404
1653
-# endif
1654
- {
1655
- iov.iov_base = &syscall;
1656
- iov.iov_len = sizeof(syscall);
1657
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1658
- &iov);
1659
- EXPECT_EQ(0, ret);
1660
- }
1930
+ if (ret)
1931
+ SYSCALL_RET_SET(regs, *ret);
16611932
1662
-#else
1663
- ASSERT_EQ(1, 0) {
1664
- TH_LOG("How is the syscall changed on this architecture?");
1665
- }
1666
-#endif
1667
-
1668
- /* If syscall is skipped, change return value. */
1669
- if (syscall == -1)
1670
-#ifdef SYSCALL_NUM_RET_SHARE_REG
1671
- TH_LOG("Can't modify syscall return on this architecture");
1672
-#else
1673
- regs.SYSCALL_RET = result;
1674
-#endif
1675
-
1676
-#ifdef HAVE_GETREGS
1677
- ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1678
-#else
1679
- iov.iov_base = &regs;
1680
- iov.iov_len = sizeof(regs);
1681
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1682
-#endif
1683
- EXPECT_EQ(0, ret);
1933
+ /* Flush any register changes made. */
1934
+ if (memcmp(&orig, &regs, sizeof(orig)) != 0)
1935
+ EXPECT_EQ(0, ARCH_SETREGS(regs));
16841936 }
16851937
1686
-void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1938
+/* Change only syscall number. */
1939
+void change_syscall_nr(struct __test_metadata *_metadata,
1940
+ pid_t tracee, long syscall)
1941
+{
1942
+ __change_syscall(_metadata, tracee, &syscall, NULL);
1943
+}
1944
+
1945
+/* Change syscall return value (and set syscall number to -1). */
1946
+void change_syscall_ret(struct __test_metadata *_metadata,
1947
+ pid_t tracee, long ret)
1948
+{
1949
+ long syscall = -1;
1950
+
1951
+ __change_syscall(_metadata, tracee, &syscall, &ret);
1952
+}
1953
+
1954
+void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
16871955 int status, void *args)
16881956 {
16891957 int ret;
....@@ -1698,17 +1966,17 @@
16981966 case 0x1002:
16991967 /* change getpid to getppid. */
17001968 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1701
- change_syscall(_metadata, tracee, __NR_getppid, 0);
1969
+ change_syscall_nr(_metadata, tracee, __NR_getppid);
17021970 break;
17031971 case 0x1003:
17041972 /* skip gettid with valid return code. */
17051973 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1706
- change_syscall(_metadata, tracee, -1, 45000);
1974
+ change_syscall_ret(_metadata, tracee, 45000);
17071975 break;
17081976 case 0x1004:
17091977 /* skip openat with error. */
17101978 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1711
- change_syscall(_metadata, tracee, -1, -ESRCH);
1979
+ change_syscall_ret(_metadata, tracee, -ESRCH);
17121980 break;
17131981 case 0x1005:
17141982 /* do nothing (allow getppid) */
....@@ -1723,36 +1991,92 @@
17231991
17241992 }
17251993
1994
+FIXTURE(TRACE_syscall) {
1995
+ struct sock_fprog prog;
1996
+ pid_t tracer, mytid, mypid, parent;
1997
+ long syscall_nr;
1998
+};
1999
+
17262000 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
17272001 int status, void *args)
17282002 {
1729
- int ret, nr;
2003
+ int ret;
17302004 unsigned long msg;
17312005 static bool entry;
2006
+ long syscall_nr_val, syscall_ret_val;
2007
+ long *syscall_nr = NULL, *syscall_ret = NULL;
2008
+ FIXTURE_DATA(TRACE_syscall) *self = args;
17322009
1733
- /* Make sure we got an empty message. */
2010
+ /*
2011
+ * The traditional way to tell PTRACE_SYSCALL entry/exit
2012
+ * is by counting.
2013
+ */
2014
+ entry = !entry;
2015
+
2016
+ /* Make sure we got an appropriate message. */
17342017 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
17352018 EXPECT_EQ(0, ret);
1736
- EXPECT_EQ(0, msg);
2019
+ EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
2020
+ : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
17372021
1738
- /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1739
- entry = !entry;
1740
- if (!entry)
2022
+ /*
2023
+ * Some architectures only support setting return values during
2024
+ * syscall exit under ptrace, and on exit the syscall number may
2025
+ * no longer be available. Therefore, save the initial sycall
2026
+ * number here, so it can be examined during both entry and exit
2027
+ * phases.
2028
+ */
2029
+ if (entry)
2030
+ self->syscall_nr = get_syscall(_metadata, tracee);
2031
+
2032
+ /*
2033
+ * Depending on the architecture's syscall setting abilities, we
2034
+ * pick which things to set during this phase (entry or exit).
2035
+ */
2036
+ if (entry == ptrace_entry_set_syscall_nr)
2037
+ syscall_nr = &syscall_nr_val;
2038
+ if (entry == ptrace_entry_set_syscall_ret)
2039
+ syscall_ret = &syscall_ret_val;
2040
+
2041
+ /* Now handle the actual rewriting cases. */
2042
+ switch (self->syscall_nr) {
2043
+ case __NR_getpid:
2044
+ syscall_nr_val = __NR_getppid;
2045
+ /* Never change syscall return for this case. */
2046
+ syscall_ret = NULL;
2047
+ break;
2048
+ case __NR_gettid:
2049
+ syscall_nr_val = -1;
2050
+ syscall_ret_val = 45000;
2051
+ break;
2052
+ case __NR_openat:
2053
+ syscall_nr_val = -1;
2054
+ syscall_ret_val = -ESRCH;
2055
+ break;
2056
+ default:
2057
+ /* Unhandled, do nothing. */
17412058 return;
2059
+ }
17422060
1743
- nr = get_syscall(_metadata, tracee);
1744
-
1745
- if (nr == __NR_getpid)
1746
- change_syscall(_metadata, tracee, __NR_getppid, 0);
1747
- if (nr == __NR_gettid)
1748
- change_syscall(_metadata, tracee, -1, 45000);
1749
- if (nr == __NR_openat)
1750
- change_syscall(_metadata, tracee, -1, -ESRCH);
2061
+ __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
17512062 }
17522063
1753
-FIXTURE_DATA(TRACE_syscall) {
1754
- struct sock_fprog prog;
1755
- pid_t tracer, mytid, mypid, parent;
2064
+FIXTURE_VARIANT(TRACE_syscall) {
2065
+ /*
2066
+ * All of the SECCOMP_RET_TRACE behaviors can be tested with either
2067
+ * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
2068
+ * This indicates if we should use SECCOMP_RET_TRACE (false), or
2069
+ * ptrace (true).
2070
+ */
2071
+ bool use_ptrace;
2072
+};
2073
+
2074
+FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
2075
+ .use_ptrace = true,
2076
+};
2077
+
2078
+FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
2079
+ .use_ptrace = false,
17562080 };
17572081
17582082 FIXTURE_SETUP(TRACE_syscall)
....@@ -1770,12 +2094,11 @@
17702094 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
17712095 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
17722096 };
1773
-
1774
- memset(&self->prog, 0, sizeof(self->prog));
1775
- self->prog.filter = malloc(sizeof(filter));
1776
- ASSERT_NE(NULL, self->prog.filter);
1777
- memcpy(self->prog.filter, filter, sizeof(filter));
1778
- self->prog.len = (unsigned short)ARRAY_SIZE(filter);
2097
+ struct sock_fprog prog = {
2098
+ .len = (unsigned short)ARRAY_SIZE(filter),
2099
+ .filter = filter,
2100
+ };
2101
+ long ret;
17792102
17802103 /* Prepare some testable syscall results. */
17812104 self->mytid = syscall(__NR_gettid);
....@@ -1793,60 +2116,48 @@
17932116 ASSERT_NE(self->parent, self->mypid);
17942117
17952118 /* Launch tracer. */
1796
- self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1797
- false);
2119
+ self->tracer = setup_trace_fixture(_metadata,
2120
+ variant->use_ptrace ? tracer_ptrace
2121
+ : tracer_seccomp,
2122
+ self, variant->use_ptrace);
2123
+
2124
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2125
+ ASSERT_EQ(0, ret);
2126
+
2127
+ if (variant->use_ptrace)
2128
+ return;
2129
+
2130
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2131
+ ASSERT_EQ(0, ret);
17982132 }
17992133
18002134 FIXTURE_TEARDOWN(TRACE_syscall)
18012135 {
18022136 teardown_trace_fixture(_metadata, self->tracer);
1803
- if (self->prog.filter)
1804
- free(self->prog.filter);
18052137 }
18062138
1807
-TEST_F(TRACE_syscall, ptrace_syscall_redirected)
2139
+TEST(negative_ENOSYS)
18082140 {
1809
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1810
- teardown_trace_fixture(_metadata, self->tracer);
1811
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1812
- true);
1813
-
1814
- /* Tracer will redirect getpid to getppid. */
1815
- EXPECT_NE(self->mypid, syscall(__NR_getpid));
2141
+ /*
2142
+ * There should be no difference between an "internal" skip
2143
+ * and userspace asking for syscall "-1".
2144
+ */
2145
+ errno = 0;
2146
+ EXPECT_EQ(-1, syscall(-1));
2147
+ EXPECT_EQ(errno, ENOSYS);
2148
+ /* And no difference for "still not valid but not -1". */
2149
+ errno = 0;
2150
+ EXPECT_EQ(-1, syscall(-101));
2151
+ EXPECT_EQ(errno, ENOSYS);
18162152 }
18172153
1818
-TEST_F(TRACE_syscall, ptrace_syscall_errno)
2154
+TEST_F(TRACE_syscall, negative_ENOSYS)
18192155 {
1820
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1821
- teardown_trace_fixture(_metadata, self->tracer);
1822
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1823
- true);
1824
-
1825
- /* Tracer should skip the open syscall, resulting in ESRCH. */
1826
- EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1827
-}
1828
-
1829
-TEST_F(TRACE_syscall, ptrace_syscall_faked)
1830
-{
1831
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1832
- teardown_trace_fixture(_metadata, self->tracer);
1833
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1834
- true);
1835
-
1836
- /* Tracer should skip the gettid syscall, resulting fake pid. */
1837
- EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
2156
+ negative_ENOSYS(_metadata);
18382157 }
18392158
18402159 TEST_F(TRACE_syscall, syscall_allowed)
18412160 {
1842
- long ret;
1843
-
1844
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1845
- ASSERT_EQ(0, ret);
1846
-
1847
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1848
- ASSERT_EQ(0, ret);
1849
-
18502161 /* getppid works as expected (no changes). */
18512162 EXPECT_EQ(self->parent, syscall(__NR_getppid));
18522163 EXPECT_NE(self->mypid, syscall(__NR_getppid));
....@@ -1854,14 +2165,6 @@
18542165
18552166 TEST_F(TRACE_syscall, syscall_redirected)
18562167 {
1857
- long ret;
1858
-
1859
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1860
- ASSERT_EQ(0, ret);
1861
-
1862
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1863
- ASSERT_EQ(0, ret);
1864
-
18652168 /* getpid has been redirected to getppid as expected. */
18662169 EXPECT_EQ(self->parent, syscall(__NR_getpid));
18672170 EXPECT_NE(self->mypid, syscall(__NR_getpid));
....@@ -1869,33 +2172,17 @@
18692172
18702173 TEST_F(TRACE_syscall, syscall_errno)
18712174 {
1872
- long ret;
1873
-
1874
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1875
- ASSERT_EQ(0, ret);
1876
-
1877
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1878
- ASSERT_EQ(0, ret);
1879
-
1880
- /* openat has been skipped and an errno return. */
2175
+ /* Tracer should skip the open syscall, resulting in ESRCH. */
18812176 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
18822177 }
18832178
18842179 TEST_F(TRACE_syscall, syscall_faked)
18852180 {
1886
- long ret;
1887
-
1888
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1889
- ASSERT_EQ(0, ret);
1890
-
1891
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1892
- ASSERT_EQ(0, ret);
1893
-
1894
- /* gettid has been skipped and an altered return value stored. */
2181
+ /* Tracer skips the gettid syscall and store altered return value. */
18952182 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
18962183 }
18972184
1898
-TEST_F(TRACE_syscall, skip_after_RET_TRACE)
2185
+TEST_F(TRACE_syscall, skip_after)
18992186 {
19002187 struct sock_filter filter[] = {
19012188 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
....@@ -1910,14 +2197,7 @@
19102197 };
19112198 long ret;
19122199
1913
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1914
- ASSERT_EQ(0, ret);
1915
-
1916
- /* Install fixture filter. */
1917
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1918
- ASSERT_EQ(0, ret);
1919
-
1920
- /* Install "errno on getppid" filter. */
2200
+ /* Install additional "errno on getppid" filter. */
19212201 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
19222202 ASSERT_EQ(0, ret);
19232203
....@@ -1927,7 +2207,7 @@
19272207 EXPECT_EQ(EPERM, errno);
19282208 }
19292209
1930
-TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
2210
+TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
19312211 {
19322212 struct sock_filter filter[] = {
19332213 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
....@@ -1942,77 +2222,7 @@
19422222 };
19432223 long ret;
19442224
1945
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1946
- ASSERT_EQ(0, ret);
1947
-
1948
- /* Install fixture filter. */
1949
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1950
- ASSERT_EQ(0, ret);
1951
-
1952
- /* Install "death on getppid" filter. */
1953
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1954
- ASSERT_EQ(0, ret);
1955
-
1956
- /* Tracer will redirect getpid to getppid, and we should die. */
1957
- EXPECT_NE(self->mypid, syscall(__NR_getpid));
1958
-}
1959
-
1960
-TEST_F(TRACE_syscall, skip_after_ptrace)
1961
-{
1962
- struct sock_filter filter[] = {
1963
- BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1964
- offsetof(struct seccomp_data, nr)),
1965
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1966
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1967
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1968
- };
1969
- struct sock_fprog prog = {
1970
- .len = (unsigned short)ARRAY_SIZE(filter),
1971
- .filter = filter,
1972
- };
1973
- long ret;
1974
-
1975
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1976
- teardown_trace_fixture(_metadata, self->tracer);
1977
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1978
- true);
1979
-
1980
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1981
- ASSERT_EQ(0, ret);
1982
-
1983
- /* Install "errno on getppid" filter. */
1984
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1985
- ASSERT_EQ(0, ret);
1986
-
1987
- /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1988
- EXPECT_EQ(-1, syscall(__NR_getpid));
1989
- EXPECT_EQ(EPERM, errno);
1990
-}
1991
-
1992
-TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1993
-{
1994
- struct sock_filter filter[] = {
1995
- BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1996
- offsetof(struct seccomp_data, nr)),
1997
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1998
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1999
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2000
- };
2001
- struct sock_fprog prog = {
2002
- .len = (unsigned short)ARRAY_SIZE(filter),
2003
- .filter = filter,
2004
- };
2005
- long ret;
2006
-
2007
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2008
- teardown_trace_fixture(_metadata, self->tracer);
2009
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2010
- true);
2011
-
2012
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2013
- ASSERT_EQ(0, ret);
2014
-
2015
- /* Install "death on getppid" filter. */
2225
+ /* Install additional "death on getppid" filter. */
20162226 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
20172227 ASSERT_EQ(0, ret);
20182228
....@@ -2119,12 +2329,17 @@
21192329 {
21202330 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
21212331 SECCOMP_FILTER_FLAG_LOG,
2122
- SECCOMP_FILTER_FLAG_SPEC_ALLOW };
2123
- unsigned int flag, all_flags;
2332
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2333
+ SECCOMP_FILTER_FLAG_NEW_LISTENER,
2334
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2335
+ unsigned int exclusive[] = {
2336
+ SECCOMP_FILTER_FLAG_TSYNC,
2337
+ SECCOMP_FILTER_FLAG_NEW_LISTENER };
2338
+ unsigned int flag, all_flags, exclusive_mask;
21242339 int i;
21252340 long ret;
21262341
2127
- /* Test detection of known-good filter flags */
2342
+ /* Test detection of individual known-good filter flags */
21282343 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
21292344 int bits = 0;
21302345
....@@ -2151,16 +2366,29 @@
21512366 all_flags |= flag;
21522367 }
21532368
2154
- /* Test detection of all known-good filter flags */
2155
- ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
2156
- EXPECT_EQ(-1, ret);
2157
- EXPECT_EQ(EFAULT, errno) {
2158
- TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2159
- all_flags);
2369
+ /*
2370
+ * Test detection of all known-good filter flags combined. But
2371
+ * for the exclusive flags we need to mask them out and try them
2372
+ * individually for the "all flags" testing.
2373
+ */
2374
+ exclusive_mask = 0;
2375
+ for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2376
+ exclusive_mask |= exclusive[i];
2377
+ for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2378
+ flag = all_flags & ~exclusive_mask;
2379
+ flag |= exclusive[i];
2380
+
2381
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2382
+ EXPECT_EQ(-1, ret);
2383
+ EXPECT_EQ(EFAULT, errno) {
2384
+ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2385
+ flag);
2386
+ }
21602387 }
21612388
2162
- /* Test detection of an unknown filter flag */
2389
+ /* Test detection of an unknown filter flags, without exclusives. */
21632390 flag = -1;
2391
+ flag &= ~exclusive_mask;
21642392 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
21652393 EXPECT_EQ(-1, ret);
21662394 EXPECT_EQ(EINVAL, errno) {
....@@ -2237,7 +2465,7 @@
22372465 } \
22382466 } while (0)
22392467
2240
-FIXTURE_DATA(TSYNC) {
2468
+FIXTURE(TSYNC) {
22412469 struct sock_fprog root_prog, apply_prog;
22422470 struct tsync_sibling sibling[TSYNC_SIBLINGS];
22432471 sem_t started;
....@@ -2347,7 +2575,7 @@
23472575 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
23482576 if (!ret)
23492577 return (void *)SIBLING_EXIT_NEWPRIVS;
2350
- read(0, NULL, 0);
2578
+ read(-1, NULL, 0);
23512579 return (void *)SIBLING_EXIT_UNKILLED;
23522580 }
23532581
....@@ -2561,10 +2789,60 @@
25612789 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
25622790 }
25632791
2792
+TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2793
+{
2794
+ long ret, flags;
2795
+ void *status;
2796
+
2797
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2798
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2799
+ }
2800
+
2801
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2802
+ ASSERT_NE(ENOSYS, errno) {
2803
+ TH_LOG("Kernel does not support seccomp syscall!");
2804
+ }
2805
+ ASSERT_EQ(0, ret) {
2806
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2807
+ }
2808
+ self->sibling[0].diverge = 1;
2809
+ tsync_start_sibling(&self->sibling[0]);
2810
+ tsync_start_sibling(&self->sibling[1]);
2811
+
2812
+ while (self->sibling_count < TSYNC_SIBLINGS) {
2813
+ sem_wait(&self->started);
2814
+ self->sibling_count++;
2815
+ }
2816
+
2817
+ flags = SECCOMP_FILTER_FLAG_TSYNC | \
2818
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2819
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2820
+ ASSERT_EQ(ESRCH, errno) {
2821
+ TH_LOG("Did not return ESRCH for diverged sibling.");
2822
+ }
2823
+ ASSERT_EQ(-1, ret) {
2824
+ TH_LOG("Did not fail on diverged sibling.");
2825
+ }
2826
+
2827
+ /* Wake the threads */
2828
+ pthread_mutex_lock(&self->mutex);
2829
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2830
+ TH_LOG("cond broadcast non-zero");
2831
+ }
2832
+ pthread_mutex_unlock(&self->mutex);
2833
+
2834
+ /* Ensure they are both unkilled. */
2835
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
2836
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2837
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
2838
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2839
+}
2840
+
25642841 TEST_F(TSYNC, two_siblings_not_under_filter)
25652842 {
25662843 long ret, sib;
25672844 void *status;
2845
+ struct timespec delay = { .tv_nsec = 100000000 };
25682846
25692847 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
25702848 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
....@@ -2618,7 +2896,7 @@
26182896 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
26192897 /* Poll for actual task death. pthread_join doesn't guarantee it. */
26202898 while (!kill(self->sibling[sib].system_tid, 0))
2621
- sleep(0.1);
2899
+ nanosleep(&delay, NULL);
26222900 /* Switch to the remaining sibling */
26232901 sib = !sib;
26242902
....@@ -2643,7 +2921,7 @@
26432921 EXPECT_EQ(0, (long)status);
26442922 /* Poll for actual task death. pthread_join doesn't guarantee it. */
26452923 while (!kill(self->sibling[sib].system_tid, 0))
2646
- sleep(0.1);
2924
+ nanosleep(&delay, NULL);
26472925
26482926 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
26492927 &self->apply_prog);
....@@ -2664,12 +2942,13 @@
26642942 offsetof(struct seccomp_data, nr)),
26652943
26662944 #ifdef __NR_sigreturn
2667
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2945
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
26682946 #endif
2669
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2670
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2671
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2672
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2947
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
2948
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
2949
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
2950
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
2951
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
26732952 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
26742953
26752954 /* Allow __NR_write for easy logging. */
....@@ -2756,7 +3035,8 @@
27563035 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
27573036 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
27583037 ASSERT_EQ(0x100, msg);
2759
- EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
3038
+ ret = get_syscall(_metadata, child_pid);
3039
+ EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
27603040
27613041 /* Might as well check siginfo for sanity while we're here. */
27623042 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
....@@ -2773,9 +3053,14 @@
27733053 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
27743054 ASSERT_EQ(true, WIFSTOPPED(status));
27753055 ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2776
- /* Verify signal delivery came from parent now. */
27773056 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2778
- EXPECT_EQ(getpid(), info.si_pid);
3057
+ /*
3058
+ * There is no siginfo on SIGSTOP any more, so we can't verify
3059
+ * signal delivery came from parent now (getpid() == info.si_pid).
3060
+ * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
3061
+ * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
3062
+ */
3063
+ EXPECT_EQ(SIGSTOP, info.si_signo);
27793064
27803065 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
27813066 ASSERT_EQ(0, kill(child_pid, SIGCONT));
....@@ -2922,7 +3207,7 @@
29223207
29233208 /* Only real root can get metadata. */
29243209 if (geteuid()) {
2925
- XFAIL(return, "get_metadata requires real root");
3210
+ SKIP(return, "get_metadata requires real root");
29263211 return;
29273212 }
29283213
....@@ -2940,11 +3225,11 @@
29403225 };
29413226
29423227 /* one with log, one without */
2943
- ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3228
+ EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
29443229 SECCOMP_FILTER_FLAG_LOG, &prog));
2945
- ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3230
+ EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
29463231
2947
- ASSERT_EQ(0, close(pipefd[0]));
3232
+ EXPECT_EQ(0, close(pipefd[0]));
29483233 ASSERT_EQ(1, write(pipefd[1], "1", 1));
29493234 ASSERT_EQ(0, close(pipefd[1]));
29503235
....@@ -2965,7 +3250,7 @@
29653250 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
29663251 EXPECT_EQ(sizeof(md), ret) {
29673252 if (errno == EINVAL)
2968
- XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3253
+ SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
29693254 }
29703255
29713256 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
....@@ -2981,9 +3266,890 @@
29813266 ASSERT_EQ(0, kill(pid, SIGKILL));
29823267 }
29833268
3269
+static int user_notif_syscall(int nr, unsigned int flags)
3270
+{
3271
+ struct sock_filter filter[] = {
3272
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
3273
+ offsetof(struct seccomp_data, nr)),
3274
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
3275
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
3276
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3277
+ };
3278
+
3279
+ struct sock_fprog prog = {
3280
+ .len = (unsigned short)ARRAY_SIZE(filter),
3281
+ .filter = filter,
3282
+ };
3283
+
3284
+ return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3285
+}
3286
+
3287
+#define USER_NOTIF_MAGIC INT_MAX
3288
+TEST(user_notification_basic)
3289
+{
3290
+ pid_t pid;
3291
+ long ret;
3292
+ int status, listener;
3293
+ struct seccomp_notif req = {};
3294
+ struct seccomp_notif_resp resp = {};
3295
+ struct pollfd pollfd;
3296
+
3297
+ struct sock_filter filter[] = {
3298
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3299
+ };
3300
+ struct sock_fprog prog = {
3301
+ .len = (unsigned short)ARRAY_SIZE(filter),
3302
+ .filter = filter,
3303
+ };
3304
+
3305
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3306
+ ASSERT_EQ(0, ret) {
3307
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3308
+ }
3309
+
3310
+ pid = fork();
3311
+ ASSERT_GE(pid, 0);
3312
+
3313
+ /* Check that we get -ENOSYS with no listener attached */
3314
+ if (pid == 0) {
3315
+ if (user_notif_syscall(__NR_getppid, 0) < 0)
3316
+ exit(1);
3317
+ ret = syscall(__NR_getppid);
3318
+ exit(ret >= 0 || errno != ENOSYS);
3319
+ }
3320
+
3321
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3322
+ EXPECT_EQ(true, WIFEXITED(status));
3323
+ EXPECT_EQ(0, WEXITSTATUS(status));
3324
+
3325
+ /* Add some no-op filters for grins. */
3326
+ EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3327
+ EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3328
+ EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3329
+ EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3330
+
3331
+ /* Check that the basic notification machinery works */
3332
+ listener = user_notif_syscall(__NR_getppid,
3333
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3334
+ ASSERT_GE(listener, 0);
3335
+
3336
+ /* Installing a second listener in the chain should EBUSY */
3337
+ EXPECT_EQ(user_notif_syscall(__NR_getppid,
3338
+ SECCOMP_FILTER_FLAG_NEW_LISTENER),
3339
+ -1);
3340
+ EXPECT_EQ(errno, EBUSY);
3341
+
3342
+ pid = fork();
3343
+ ASSERT_GE(pid, 0);
3344
+
3345
+ if (pid == 0) {
3346
+ ret = syscall(__NR_getppid);
3347
+ exit(ret != USER_NOTIF_MAGIC);
3348
+ }
3349
+
3350
+ pollfd.fd = listener;
3351
+ pollfd.events = POLLIN | POLLOUT;
3352
+
3353
+ EXPECT_GT(poll(&pollfd, 1, -1), 0);
3354
+ EXPECT_EQ(pollfd.revents, POLLIN);
3355
+
3356
+ /* Test that we can't pass garbage to the kernel. */
3357
+ memset(&req, 0, sizeof(req));
3358
+ req.pid = -1;
3359
+ errno = 0;
3360
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3361
+ EXPECT_EQ(-1, ret);
3362
+ EXPECT_EQ(EINVAL, errno);
3363
+
3364
+ if (ret) {
3365
+ req.pid = 0;
3366
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3367
+ }
3368
+
3369
+ pollfd.fd = listener;
3370
+ pollfd.events = POLLIN | POLLOUT;
3371
+
3372
+ EXPECT_GT(poll(&pollfd, 1, -1), 0);
3373
+ EXPECT_EQ(pollfd.revents, POLLOUT);
3374
+
3375
+ EXPECT_EQ(req.data.nr, __NR_getppid);
3376
+
3377
+ resp.id = req.id;
3378
+ resp.error = 0;
3379
+ resp.val = USER_NOTIF_MAGIC;
3380
+
3381
+ /* check that we make sure flags == 0 */
3382
+ resp.flags = 1;
3383
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3384
+ EXPECT_EQ(errno, EINVAL);
3385
+
3386
+ resp.flags = 0;
3387
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3388
+
3389
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3390
+ EXPECT_EQ(true, WIFEXITED(status));
3391
+ EXPECT_EQ(0, WEXITSTATUS(status));
3392
+}
3393
+
3394
+TEST(user_notification_with_tsync)
3395
+{
3396
+ int ret;
3397
+ unsigned int flags;
3398
+
3399
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3400
+ ASSERT_EQ(0, ret) {
3401
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3402
+ }
3403
+
3404
+ /* these were exclusive */
3405
+ flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3406
+ SECCOMP_FILTER_FLAG_TSYNC;
3407
+ ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
3408
+ ASSERT_EQ(EINVAL, errno);
3409
+
3410
+ /* but now they're not */
3411
+ flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3412
+ ret = user_notif_syscall(__NR_getppid, flags);
3413
+ close(ret);
3414
+ ASSERT_LE(0, ret);
3415
+}
3416
+
3417
+TEST(user_notification_kill_in_middle)
3418
+{
3419
+ pid_t pid;
3420
+ long ret;
3421
+ int listener;
3422
+ struct seccomp_notif req = {};
3423
+ struct seccomp_notif_resp resp = {};
3424
+
3425
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3426
+ ASSERT_EQ(0, ret) {
3427
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3428
+ }
3429
+
3430
+ listener = user_notif_syscall(__NR_getppid,
3431
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3432
+ ASSERT_GE(listener, 0);
3433
+
3434
+ /*
3435
+ * Check that nothing bad happens when we kill the task in the middle
3436
+ * of a syscall.
3437
+ */
3438
+ pid = fork();
3439
+ ASSERT_GE(pid, 0);
3440
+
3441
+ if (pid == 0) {
3442
+ ret = syscall(__NR_getppid);
3443
+ exit(ret != USER_NOTIF_MAGIC);
3444
+ }
3445
+
3446
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3447
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3448
+
3449
+ EXPECT_EQ(kill(pid, SIGKILL), 0);
3450
+ EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3451
+
3452
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3453
+
3454
+ resp.id = req.id;
3455
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3456
+ EXPECT_EQ(ret, -1);
3457
+ EXPECT_EQ(errno, ENOENT);
3458
+}
3459
+
3460
+static int handled = -1;
3461
+
3462
+static void signal_handler(int signal)
3463
+{
3464
+ if (write(handled, "c", 1) != 1)
3465
+ perror("write from signal");
3466
+}
3467
+
3468
+TEST(user_notification_signal)
3469
+{
3470
+ pid_t pid;
3471
+ long ret;
3472
+ int status, listener, sk_pair[2];
3473
+ struct seccomp_notif req = {};
3474
+ struct seccomp_notif_resp resp = {};
3475
+ char c;
3476
+
3477
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3478
+ ASSERT_EQ(0, ret) {
3479
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3480
+ }
3481
+
3482
+ ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3483
+
3484
+ listener = user_notif_syscall(__NR_gettid,
3485
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3486
+ ASSERT_GE(listener, 0);
3487
+
3488
+ pid = fork();
3489
+ ASSERT_GE(pid, 0);
3490
+
3491
+ if (pid == 0) {
3492
+ close(sk_pair[0]);
3493
+ handled = sk_pair[1];
3494
+ if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3495
+ perror("signal");
3496
+ exit(1);
3497
+ }
3498
+ /*
3499
+ * ERESTARTSYS behavior is a bit hard to test, because we need
3500
+ * to rely on a signal that has not yet been handled. Let's at
3501
+ * least check that the error code gets propagated through, and
3502
+ * hope that it doesn't break when there is actually a signal :)
3503
+ */
3504
+ ret = syscall(__NR_gettid);
3505
+ exit(!(ret == -1 && errno == 512));
3506
+ }
3507
+
3508
+ close(sk_pair[1]);
3509
+
3510
+ memset(&req, 0, sizeof(req));
3511
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3512
+
3513
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
3514
+
3515
+ /*
3516
+ * Make sure the signal really is delivered, which means we're not
3517
+ * stuck in the user notification code any more and the notification
3518
+ * should be dead.
3519
+ */
3520
+ EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3521
+
3522
+ resp.id = req.id;
3523
+ resp.error = -EPERM;
3524
+ resp.val = 0;
3525
+
3526
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3527
+ EXPECT_EQ(errno, ENOENT);
3528
+
3529
+ memset(&req, 0, sizeof(req));
3530
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3531
+
3532
+ resp.id = req.id;
3533
+ resp.error = -512; /* -ERESTARTSYS */
3534
+ resp.val = 0;
3535
+
3536
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3537
+
3538
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3539
+ EXPECT_EQ(true, WIFEXITED(status));
3540
+ EXPECT_EQ(0, WEXITSTATUS(status));
3541
+}
3542
+
3543
+TEST(user_notification_closed_listener)
3544
+{
3545
+ pid_t pid;
3546
+ long ret;
3547
+ int status, listener;
3548
+
3549
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3550
+ ASSERT_EQ(0, ret) {
3551
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3552
+ }
3553
+
3554
+ listener = user_notif_syscall(__NR_getppid,
3555
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3556
+ ASSERT_GE(listener, 0);
3557
+
3558
+ /*
3559
+ * Check that we get an ENOSYS when the listener is closed.
3560
+ */
3561
+ pid = fork();
3562
+ ASSERT_GE(pid, 0);
3563
+ if (pid == 0) {
3564
+ close(listener);
3565
+ ret = syscall(__NR_getppid);
3566
+ exit(ret != -1 && errno != ENOSYS);
3567
+ }
3568
+
3569
+ close(listener);
3570
+
3571
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3572
+ EXPECT_EQ(true, WIFEXITED(status));
3573
+ EXPECT_EQ(0, WEXITSTATUS(status));
3574
+}
3575
+
3576
+/*
3577
+ * Check that a pid in a child namespace still shows up as valid in ours.
3578
+ */
3579
+TEST(user_notification_child_pid_ns)
3580
+{
3581
+ pid_t pid;
3582
+ int status, listener;
3583
+ struct seccomp_notif req = {};
3584
+ struct seccomp_notif_resp resp = {};
3585
+
3586
+ ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
3587
+ if (errno == EINVAL)
3588
+ SKIP(return, "kernel missing CLONE_NEWUSER support");
3589
+ };
3590
+
3591
+ listener = user_notif_syscall(__NR_getppid,
3592
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3593
+ ASSERT_GE(listener, 0);
3594
+
3595
+ pid = fork();
3596
+ ASSERT_GE(pid, 0);
3597
+
3598
+ if (pid == 0)
3599
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3600
+
3601
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3602
+ EXPECT_EQ(req.pid, pid);
3603
+
3604
+ resp.id = req.id;
3605
+ resp.error = 0;
3606
+ resp.val = USER_NOTIF_MAGIC;
3607
+
3608
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3609
+
3610
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3611
+ EXPECT_EQ(true, WIFEXITED(status));
3612
+ EXPECT_EQ(0, WEXITSTATUS(status));
3613
+ close(listener);
3614
+}
3615
+
3616
+/*
3617
+ * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3618
+ * invalid.
3619
+ */
3620
+TEST(user_notification_sibling_pid_ns)
3621
+{
3622
+ pid_t pid, pid2;
3623
+ int status, listener;
3624
+ struct seccomp_notif req = {};
3625
+ struct seccomp_notif_resp resp = {};
3626
+
3627
+ ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3628
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3629
+ }
3630
+
3631
+ listener = user_notif_syscall(__NR_getppid,
3632
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3633
+ ASSERT_GE(listener, 0);
3634
+
3635
+ pid = fork();
3636
+ ASSERT_GE(pid, 0);
3637
+
3638
+ if (pid == 0) {
3639
+ ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3640
+
3641
+ pid2 = fork();
3642
+ ASSERT_GE(pid2, 0);
3643
+
3644
+ if (pid2 == 0)
3645
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3646
+
3647
+ EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3648
+ EXPECT_EQ(true, WIFEXITED(status));
3649
+ EXPECT_EQ(0, WEXITSTATUS(status));
3650
+ exit(WEXITSTATUS(status));
3651
+ }
3652
+
3653
+ /* Create the sibling ns, and sibling in it. */
3654
+ ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
3655
+ if (errno == EPERM)
3656
+ SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
3657
+ }
3658
+ ASSERT_EQ(errno, 0);
3659
+
3660
+ pid2 = fork();
3661
+ ASSERT_GE(pid2, 0);
3662
+
3663
+ if (pid2 == 0) {
3664
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3665
+ /*
3666
+ * The pid should be 0, i.e. the task is in some namespace that
3667
+ * we can't "see".
3668
+ */
3669
+ EXPECT_EQ(req.pid, 0);
3670
+
3671
+ resp.id = req.id;
3672
+ resp.error = 0;
3673
+ resp.val = USER_NOTIF_MAGIC;
3674
+
3675
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3676
+ exit(0);
3677
+ }
3678
+
3679
+ close(listener);
3680
+
3681
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3682
+ EXPECT_EQ(true, WIFEXITED(status));
3683
+ EXPECT_EQ(0, WEXITSTATUS(status));
3684
+
3685
+ EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3686
+ EXPECT_EQ(true, WIFEXITED(status));
3687
+ EXPECT_EQ(0, WEXITSTATUS(status));
3688
+}
3689
+
3690
+TEST(user_notification_fault_recv)
3691
+{
3692
+ pid_t pid;
3693
+ int status, listener;
3694
+ struct seccomp_notif req = {};
3695
+ struct seccomp_notif_resp resp = {};
3696
+
3697
+ ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3698
+
3699
+ listener = user_notif_syscall(__NR_getppid,
3700
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3701
+ ASSERT_GE(listener, 0);
3702
+
3703
+ pid = fork();
3704
+ ASSERT_GE(pid, 0);
3705
+
3706
+ if (pid == 0)
3707
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3708
+
3709
+ /* Do a bad recv() */
3710
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3711
+ EXPECT_EQ(errno, EFAULT);
3712
+
3713
+ /* We should still be able to receive this notification, though. */
3714
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3715
+ EXPECT_EQ(req.pid, pid);
3716
+
3717
+ resp.id = req.id;
3718
+ resp.error = 0;
3719
+ resp.val = USER_NOTIF_MAGIC;
3720
+
3721
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3722
+
3723
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3724
+ EXPECT_EQ(true, WIFEXITED(status));
3725
+ EXPECT_EQ(0, WEXITSTATUS(status));
3726
+}
3727
+
3728
+TEST(seccomp_get_notif_sizes)
3729
+{
3730
+ struct seccomp_notif_sizes sizes;
3731
+
3732
+ ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3733
+ EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3734
+ EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3735
+}
3736
+
3737
+TEST(user_notification_continue)
3738
+{
3739
+ pid_t pid;
3740
+ long ret;
3741
+ int status, listener;
3742
+ struct seccomp_notif req = {};
3743
+ struct seccomp_notif_resp resp = {};
3744
+ struct pollfd pollfd;
3745
+
3746
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3747
+ ASSERT_EQ(0, ret) {
3748
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3749
+ }
3750
+
3751
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3752
+ ASSERT_GE(listener, 0);
3753
+
3754
+ pid = fork();
3755
+ ASSERT_GE(pid, 0);
3756
+
3757
+ if (pid == 0) {
3758
+ int dup_fd, pipe_fds[2];
3759
+ pid_t self;
3760
+
3761
+ ASSERT_GE(pipe(pipe_fds), 0);
3762
+
3763
+ dup_fd = dup(pipe_fds[0]);
3764
+ ASSERT_GE(dup_fd, 0);
3765
+ EXPECT_NE(pipe_fds[0], dup_fd);
3766
+
3767
+ self = getpid();
3768
+ ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
3769
+ exit(0);
3770
+ }
3771
+
3772
+ pollfd.fd = listener;
3773
+ pollfd.events = POLLIN | POLLOUT;
3774
+
3775
+ EXPECT_GT(poll(&pollfd, 1, -1), 0);
3776
+ EXPECT_EQ(pollfd.revents, POLLIN);
3777
+
3778
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3779
+
3780
+ pollfd.fd = listener;
3781
+ pollfd.events = POLLIN | POLLOUT;
3782
+
3783
+ EXPECT_GT(poll(&pollfd, 1, -1), 0);
3784
+ EXPECT_EQ(pollfd.revents, POLLOUT);
3785
+
3786
+ EXPECT_EQ(req.data.nr, __NR_dup);
3787
+
3788
+ resp.id = req.id;
3789
+ resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3790
+
3791
+ /*
3792
+ * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3793
+ * args be set to 0.
3794
+ */
3795
+ resp.error = 0;
3796
+ resp.val = USER_NOTIF_MAGIC;
3797
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3798
+ EXPECT_EQ(errno, EINVAL);
3799
+
3800
+ resp.error = USER_NOTIF_MAGIC;
3801
+ resp.val = 0;
3802
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3803
+ EXPECT_EQ(errno, EINVAL);
3804
+
3805
+ resp.error = 0;
3806
+ resp.val = 0;
3807
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3808
+ if (errno == EINVAL)
3809
+ SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3810
+ }
3811
+
3812
+skip:
3813
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3814
+ EXPECT_EQ(true, WIFEXITED(status));
3815
+ EXPECT_EQ(0, WEXITSTATUS(status)) {
3816
+ if (WEXITSTATUS(status) == 2) {
3817
+ SKIP(return, "Kernel does not support kcmp() syscall");
3818
+ return;
3819
+ }
3820
+ }
3821
+}
3822
+
3823
+TEST(user_notification_filter_empty)
3824
+{
3825
+ pid_t pid;
3826
+ long ret;
3827
+ int status;
3828
+ struct pollfd pollfd;
3829
+ struct __clone_args args = {
3830
+ .flags = CLONE_FILES,
3831
+ .exit_signal = SIGCHLD,
3832
+ };
3833
+
3834
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3835
+ ASSERT_EQ(0, ret) {
3836
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3837
+ }
3838
+
3839
+ pid = sys_clone3(&args, sizeof(args));
3840
+ ASSERT_GE(pid, 0);
3841
+
3842
+ if (pid == 0) {
3843
+ int listener;
3844
+
3845
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3846
+ if (listener < 0)
3847
+ _exit(EXIT_FAILURE);
3848
+
3849
+ if (dup2(listener, 200) != 200)
3850
+ _exit(EXIT_FAILURE);
3851
+
3852
+ close(listener);
3853
+
3854
+ _exit(EXIT_SUCCESS);
3855
+ }
3856
+
3857
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3858
+ EXPECT_EQ(true, WIFEXITED(status));
3859
+ EXPECT_EQ(0, WEXITSTATUS(status));
3860
+
3861
+ /*
3862
+ * The seccomp filter has become unused so we should be notified once
3863
+ * the kernel gets around to cleaning up task struct.
3864
+ */
3865
+ pollfd.fd = 200;
3866
+ pollfd.events = POLLHUP;
3867
+
3868
+ EXPECT_GT(poll(&pollfd, 1, 2000), 0);
3869
+ EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
3870
+}
3871
+
3872
+static void *do_thread(void *data)
3873
+{
3874
+ return NULL;
3875
+}
3876
+
3877
+TEST(user_notification_filter_empty_threaded)
3878
+{
3879
+ pid_t pid;
3880
+ long ret;
3881
+ int status;
3882
+ struct pollfd pollfd;
3883
+ struct __clone_args args = {
3884
+ .flags = CLONE_FILES,
3885
+ .exit_signal = SIGCHLD,
3886
+ };
3887
+
3888
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3889
+ ASSERT_EQ(0, ret) {
3890
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3891
+ }
3892
+
3893
+ pid = sys_clone3(&args, sizeof(args));
3894
+ ASSERT_GE(pid, 0);
3895
+
3896
+ if (pid == 0) {
3897
+ pid_t pid1, pid2;
3898
+ int listener, status;
3899
+ pthread_t thread;
3900
+
3901
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3902
+ if (listener < 0)
3903
+ _exit(EXIT_FAILURE);
3904
+
3905
+ if (dup2(listener, 200) != 200)
3906
+ _exit(EXIT_FAILURE);
3907
+
3908
+ close(listener);
3909
+
3910
+ pid1 = fork();
3911
+ if (pid1 < 0)
3912
+ _exit(EXIT_FAILURE);
3913
+
3914
+ if (pid1 == 0)
3915
+ _exit(EXIT_SUCCESS);
3916
+
3917
+ pid2 = fork();
3918
+ if (pid2 < 0)
3919
+ _exit(EXIT_FAILURE);
3920
+
3921
+ if (pid2 == 0)
3922
+ _exit(EXIT_SUCCESS);
3923
+
3924
+ if (pthread_create(&thread, NULL, do_thread, NULL) ||
3925
+ pthread_join(thread, NULL))
3926
+ _exit(EXIT_FAILURE);
3927
+
3928
+ if (pthread_create(&thread, NULL, do_thread, NULL) ||
3929
+ pthread_join(thread, NULL))
3930
+ _exit(EXIT_FAILURE);
3931
+
3932
+ if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
3933
+ WEXITSTATUS(status))
3934
+ _exit(EXIT_FAILURE);
3935
+
3936
+ if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
3937
+ WEXITSTATUS(status))
3938
+ _exit(EXIT_FAILURE);
3939
+
3940
+ exit(EXIT_SUCCESS);
3941
+ }
3942
+
3943
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
3944
+ EXPECT_EQ(true, WIFEXITED(status));
3945
+ EXPECT_EQ(0, WEXITSTATUS(status));
3946
+
3947
+ /*
3948
+ * The seccomp filter has become unused so we should be notified once
3949
+ * the kernel gets around to cleaning up task struct.
3950
+ */
3951
+ pollfd.fd = 200;
3952
+ pollfd.events = POLLHUP;
3953
+
3954
+ EXPECT_GT(poll(&pollfd, 1, 2000), 0);
3955
+ EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
3956
+}
3957
+
3958
+TEST(user_notification_addfd)
3959
+{
3960
+ pid_t pid;
3961
+ long ret;
3962
+ int status, listener, memfd, fd;
3963
+ struct seccomp_notif_addfd addfd = {};
3964
+ struct seccomp_notif_addfd_small small = {};
3965
+ struct seccomp_notif_addfd_big big = {};
3966
+ struct seccomp_notif req = {};
3967
+ struct seccomp_notif_resp resp = {};
3968
+ /* 100 ms */
3969
+ struct timespec delay = { .tv_nsec = 100000000 };
3970
+
3971
+ memfd = memfd_create("test", 0);
3972
+ ASSERT_GE(memfd, 0);
3973
+
3974
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3975
+ ASSERT_EQ(0, ret) {
3976
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3977
+ }
3978
+
3979
+ /* Check that the basic notification machinery works */
3980
+ listener = user_notif_syscall(__NR_getppid,
3981
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
3982
+ ASSERT_GE(listener, 0);
3983
+
3984
+ pid = fork();
3985
+ ASSERT_GE(pid, 0);
3986
+
3987
+ if (pid == 0) {
3988
+ if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
3989
+ exit(1);
3990
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3991
+ }
3992
+
3993
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3994
+
3995
+ addfd.srcfd = memfd;
3996
+ addfd.newfd = 0;
3997
+ addfd.id = req.id;
3998
+ addfd.flags = 0x0;
3999
+
4000
+ /* Verify bad newfd_flags cannot be set */
4001
+ addfd.newfd_flags = ~O_CLOEXEC;
4002
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4003
+ EXPECT_EQ(errno, EINVAL);
4004
+ addfd.newfd_flags = O_CLOEXEC;
4005
+
4006
+ /* Verify bad flags cannot be set */
4007
+ addfd.flags = 0xff;
4008
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4009
+ EXPECT_EQ(errno, EINVAL);
4010
+ addfd.flags = 0;
4011
+
4012
+ /* Verify that remote_fd cannot be set without setting flags */
4013
+ addfd.newfd = 1;
4014
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4015
+ EXPECT_EQ(errno, EINVAL);
4016
+ addfd.newfd = 0;
4017
+
4018
+ /* Verify small size cannot be set */
4019
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
4020
+ EXPECT_EQ(errno, EINVAL);
4021
+
4022
+ /* Verify we can't send bits filled in unknown buffer area */
4023
+ memset(&big, 0xAA, sizeof(big));
4024
+ big.addfd = addfd;
4025
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
4026
+ EXPECT_EQ(errno, E2BIG);
4027
+
4028
+
4029
+ /* Verify we can set an arbitrary remote fd */
4030
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4031
+ /*
4032
+ * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd),
4033
+ * 4(listener), so the newly allocated fd should be 5.
4034
+ */
4035
+ EXPECT_EQ(fd, 5);
4036
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4037
+
4038
+ /* Verify we can set an arbitrary remote fd with large size */
4039
+ memset(&big, 0x0, sizeof(big));
4040
+ big.addfd = addfd;
4041
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
4042
+ EXPECT_EQ(fd, 6);
4043
+
4044
+ /* Verify we can set a specific remote fd */
4045
+ addfd.newfd = 42;
4046
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4047
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4048
+ EXPECT_EQ(fd, 42);
4049
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4050
+
4051
+ /* Resume syscall */
4052
+ resp.id = req.id;
4053
+ resp.error = 0;
4054
+ resp.val = USER_NOTIF_MAGIC;
4055
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4056
+
4057
+ /*
4058
+ * This sets the ID of the ADD FD to the last request plus 1. The
4059
+ * notification ID increments 1 per notification.
4060
+ */
4061
+ addfd.id = req.id + 1;
4062
+
4063
+ /* This spins until the underlying notification is generated */
4064
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4065
+ errno != -EINPROGRESS)
4066
+ nanosleep(&delay, NULL);
4067
+
4068
+ memset(&req, 0, sizeof(req));
4069
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4070
+ ASSERT_EQ(addfd.id, req.id);
4071
+
4072
+ resp.id = req.id;
4073
+ resp.error = 0;
4074
+ resp.val = USER_NOTIF_MAGIC;
4075
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4076
+
4077
+ /* Wait for child to finish. */
4078
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
4079
+ EXPECT_EQ(true, WIFEXITED(status));
4080
+ EXPECT_EQ(0, WEXITSTATUS(status));
4081
+
4082
+ close(memfd);
4083
+}
4084
+
4085
+TEST(user_notification_addfd_rlimit)
4086
+{
4087
+ pid_t pid;
4088
+ long ret;
4089
+ int status, listener, memfd;
4090
+ struct seccomp_notif_addfd addfd = {};
4091
+ struct seccomp_notif req = {};
4092
+ struct seccomp_notif_resp resp = {};
4093
+ const struct rlimit lim = {
4094
+ .rlim_cur = 0,
4095
+ .rlim_max = 0,
4096
+ };
4097
+
4098
+ memfd = memfd_create("test", 0);
4099
+ ASSERT_GE(memfd, 0);
4100
+
4101
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4102
+ ASSERT_EQ(0, ret) {
4103
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4104
+ }
4105
+
4106
+ /* Check that the basic notification machinery works */
4107
+ listener = user_notif_syscall(__NR_getppid,
4108
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
4109
+ ASSERT_GE(listener, 0);
4110
+
4111
+ pid = fork();
4112
+ ASSERT_GE(pid, 0);
4113
+
4114
+ if (pid == 0)
4115
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
4116
+
4117
+
4118
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4119
+
4120
+ ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
4121
+
4122
+ addfd.srcfd = memfd;
4123
+ addfd.newfd_flags = O_CLOEXEC;
4124
+ addfd.newfd = 0;
4125
+ addfd.id = req.id;
4126
+ addfd.flags = 0;
4127
+
4128
+ /* Should probably spot check /proc/sys/fs/file-nr */
4129
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4130
+ EXPECT_EQ(errno, EMFILE);
4131
+
4132
+ addfd.newfd = 100;
4133
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4134
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4135
+ EXPECT_EQ(errno, EBADF);
4136
+
4137
+ resp.id = req.id;
4138
+ resp.error = 0;
4139
+ resp.val = USER_NOTIF_MAGIC;
4140
+
4141
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4142
+
4143
+ /* Wait for child to finish. */
4144
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
4145
+ EXPECT_EQ(true, WIFEXITED(status));
4146
+ EXPECT_EQ(0, WEXITSTATUS(status));
4147
+
4148
+ close(memfd);
4149
+}
4150
+
29844151 /*
29854152 * TODO:
2986
- * - add microbenchmarks
29874153 * - expand NNP testing
29884154 * - better arch-specific TRACE and TRAP handlers.
29894155 * - endianness checking when appropriate
....@@ -2991,7 +4157,6 @@
29914157 * - arch value testing (x86 modes especially)
29924158 * - verify that FILTER_FLAG_LOG filters generate log messages
29934159 * - verify that RET_LOG generates log messages
2994
- * - ...
29954160 */
29964161
29974162 TEST_HARNESS_MAIN