.. | .. |
---|
4 | 4 | */ |
---|
5 | 5 | #define _GNU_SOURCE |
---|
6 | 6 | #include <assert.h> |
---|
| 7 | +#include <limits.h> |
---|
| 8 | +#include <stdbool.h> |
---|
| 9 | +#include <stddef.h> |
---|
7 | 10 | #include <stdio.h> |
---|
8 | 11 | #include <stdlib.h> |
---|
9 | 12 | #include <time.h> |
---|
10 | 13 | #include <unistd.h> |
---|
11 | 14 | #include <linux/filter.h> |
---|
12 | 15 | #include <linux/seccomp.h> |
---|
| 16 | +#include <sys/param.h> |
---|
13 | 17 | #include <sys/prctl.h> |
---|
14 | 18 | #include <sys/syscall.h> |
---|
15 | 19 | #include <sys/types.h> |
---|
.. | .. |
---|
18 | 22 | |
---|
19 | 23 | unsigned long long timing(clockid_t clk_id, unsigned long long samples) |
---|
20 | 24 | { |
---|
21 | | - pid_t pid, ret; |
---|
22 | | - unsigned long long i; |
---|
23 | 25 | struct timespec start, finish; |
---|
| 26 | + unsigned long long i; |
---|
| 27 | + pid_t pid, ret; |
---|
24 | 28 | |
---|
25 | 29 | pid = getpid(); |
---|
26 | 30 | assert(clock_gettime(clk_id, &start) == 0); |
---|
.. | .. |
---|
31 | 35 | assert(clock_gettime(clk_id, &finish) == 0); |
---|
32 | 36 | |
---|
33 | 37 | i = finish.tv_sec - start.tv_sec; |
---|
34 | | - i *= 1000000000; |
---|
| 38 | + i *= 1000000000ULL; |
---|
35 | 39 | i += finish.tv_nsec - start.tv_nsec; |
---|
36 | 40 | |
---|
37 | | - printf("%lu.%09lu - %lu.%09lu = %llu\n", |
---|
| 41 | + printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", |
---|
38 | 42 | finish.tv_sec, finish.tv_nsec, |
---|
39 | 43 | start.tv_sec, start.tv_nsec, |
---|
40 | | - i); |
---|
| 44 | + i, (double)i / 1000000000.0); |
---|
41 | 45 | |
---|
42 | 46 | return i; |
---|
43 | 47 | } |
---|
44 | 48 | |
---|
45 | 49 | unsigned long long calibrate(void) |
---|
46 | 50 | { |
---|
47 | | - unsigned long long i; |
---|
| 51 | + struct timespec start, finish; |
---|
| 52 | + unsigned long long i, samples, step = 9973; |
---|
| 53 | + pid_t pid, ret; |
---|
| 54 | + int seconds = 15; |
---|
48 | 55 | |
---|
49 | | - printf("Calibrating reasonable sample size...\n"); |
---|
| 56 | + printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); |
---|
50 | 57 | |
---|
51 | | - for (i = 5; ; i++) { |
---|
52 | | - unsigned long long samples = 1 << i; |
---|
| 58 | + samples = 0; |
---|
| 59 | + pid = getpid(); |
---|
| 60 | + assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0); |
---|
| 61 | + do { |
---|
| 62 | + for (i = 0; i < step; i++) { |
---|
| 63 | + ret = syscall(__NR_getpid); |
---|
| 64 | + assert(pid == ret); |
---|
| 65 | + } |
---|
| 66 | + assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0); |
---|
53 | 67 | |
---|
54 | | - /* Find something that takes more than 5 seconds to run. */ |
---|
55 | | - if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5) |
---|
56 | | - return samples; |
---|
| 68 | + samples += step; |
---|
| 69 | + i = finish.tv_sec - start.tv_sec; |
---|
| 70 | + i *= 1000000000ULL; |
---|
| 71 | + i += finish.tv_nsec - start.tv_nsec; |
---|
| 72 | + } while (i < 1000000000ULL); |
---|
| 73 | + |
---|
| 74 | + return samples * seconds; |
---|
| 75 | +} |
---|
| 76 | + |
---|
| 77 | +bool approx(int i_one, int i_two) |
---|
| 78 | +{ |
---|
| 79 | + double one = i_one, one_bump = one * 0.01; |
---|
| 80 | + double two = i_two, two_bump = two * 0.01; |
---|
| 81 | + |
---|
| 82 | + one_bump = one + MAX(one_bump, 2.0); |
---|
| 83 | + two_bump = two + MAX(two_bump, 2.0); |
---|
| 84 | + |
---|
| 85 | + /* Equal to, or within 1% or 2 digits */ |
---|
| 86 | + if (one == two || |
---|
| 87 | + (one > two && one <= two_bump) || |
---|
| 88 | + (two > one && two <= one_bump)) |
---|
| 89 | + return true; |
---|
| 90 | + return false; |
---|
| 91 | +} |
---|
| 92 | + |
---|
| 93 | +bool le(int i_one, int i_two) |
---|
| 94 | +{ |
---|
| 95 | + if (i_one <= i_two) |
---|
| 96 | + return true; |
---|
| 97 | + return false; |
---|
| 98 | +} |
---|
| 99 | + |
---|
| 100 | +long compare(const char *name_one, const char *name_eval, const char *name_two, |
---|
| 101 | + unsigned long long one, bool (*eval)(int, int), unsigned long long two) |
---|
| 102 | +{ |
---|
| 103 | + bool good; |
---|
| 104 | + |
---|
| 105 | + printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, |
---|
| 106 | + (long long)one, name_eval, (long long)two); |
---|
| 107 | + if (one > INT_MAX) { |
---|
| 108 | + printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); |
---|
| 109 | + return 1; |
---|
57 | 110 | } |
---|
| 111 | + if (two > INT_MAX) { |
---|
| 112 | + printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); |
---|
| 113 | + return 1; |
---|
| 114 | + } |
---|
| 115 | + |
---|
| 116 | + good = eval(one, two); |
---|
| 117 | + printf("%s\n", good ? "✔️" : "❌"); |
---|
| 118 | + |
---|
| 119 | + return good ? 0 : 1; |
---|
58 | 120 | } |
---|
59 | 121 | |
---|
60 | 122 | int main(int argc, char *argv[]) |
---|
61 | 123 | { |
---|
| 124 | + struct sock_filter bitmap_filter[] = { |
---|
| 125 | + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), |
---|
| 126 | + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
---|
| 127 | + }; |
---|
| 128 | + struct sock_fprog bitmap_prog = { |
---|
| 129 | + .len = (unsigned short)ARRAY_SIZE(bitmap_filter), |
---|
| 130 | + .filter = bitmap_filter, |
---|
| 131 | + }; |
---|
62 | 132 | struct sock_filter filter[] = { |
---|
| 133 | + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, args[0])), |
---|
63 | 134 | BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), |
---|
64 | 135 | }; |
---|
65 | 136 | struct sock_fprog prog = { |
---|
66 | 137 | .len = (unsigned short)ARRAY_SIZE(filter), |
---|
67 | 138 | .filter = filter, |
---|
68 | 139 | }; |
---|
69 | | - long ret; |
---|
70 | | - unsigned long long samples; |
---|
71 | | - unsigned long long native, filtered; |
---|
| 140 | + |
---|
| 141 | + long ret, bits; |
---|
| 142 | + unsigned long long samples, calc; |
---|
| 143 | + unsigned long long native, filter1, filter2, bitmap1, bitmap2; |
---|
| 144 | + unsigned long long entry, per_filter1, per_filter2; |
---|
| 145 | + |
---|
| 146 | + printf("Current BPF sysctl settings:\n"); |
---|
| 147 | + system("sysctl net.core.bpf_jit_enable"); |
---|
| 148 | + system("sysctl net.core.bpf_jit_harden"); |
---|
72 | 149 | |
---|
73 | 150 | if (argc > 1) |
---|
74 | 151 | samples = strtoull(argv[1], NULL, 0); |
---|
75 | 152 | else |
---|
76 | 153 | samples = calibrate(); |
---|
77 | 154 | |
---|
78 | | - printf("Benchmarking %llu samples...\n", samples); |
---|
| 155 | + printf("Benchmarking %llu syscalls...\n", samples); |
---|
79 | 156 | |
---|
| 157 | + /* Native call */ |
---|
80 | 158 | native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; |
---|
81 | 159 | printf("getpid native: %llu ns\n", native); |
---|
82 | 160 | |
---|
83 | 161 | ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); |
---|
84 | 162 | assert(ret == 0); |
---|
85 | 163 | |
---|
| 164 | + /* One filter resulting in a bitmap */ |
---|
| 165 | + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); |
---|
| 166 | + assert(ret == 0); |
---|
| 167 | + |
---|
| 168 | + bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; |
---|
| 169 | + printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); |
---|
| 170 | + |
---|
| 171 | + /* Second filter resulting in a bitmap */ |
---|
| 172 | + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); |
---|
| 173 | + assert(ret == 0); |
---|
| 174 | + |
---|
| 175 | + bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; |
---|
| 176 | + printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); |
---|
| 177 | + |
---|
| 178 | + /* Third filter, can no longer be converted to bitmap */ |
---|
86 | 179 | ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); |
---|
87 | 180 | assert(ret == 0); |
---|
88 | 181 | |
---|
89 | | - filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; |
---|
90 | | - printf("getpid RET_ALLOW: %llu ns\n", filtered); |
---|
| 182 | + filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; |
---|
| 183 | + printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); |
---|
91 | 184 | |
---|
92 | | - printf("Estimated seccomp overhead per syscall: %llu ns\n", |
---|
93 | | - filtered - native); |
---|
| 185 | + /* Fourth filter, can not be converted to bitmap because of filter 3 */ |
---|
| 186 | + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); |
---|
| 187 | + assert(ret == 0); |
---|
94 | 188 | |
---|
95 | | - if (filtered == native) |
---|
96 | | - printf("Trying running again with more samples.\n"); |
---|
| 189 | + filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; |
---|
| 190 | + printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); |
---|
97 | 191 | |
---|
| 192 | + /* Estimations */ |
---|
| 193 | +#define ESTIMATE(fmt, var, what) do { \ |
---|
| 194 | + var = (what); \ |
---|
| 195 | + printf("Estimated " fmt ": %llu ns\n", var); \ |
---|
| 196 | + if (var > INT_MAX) \ |
---|
| 197 | + goto more_samples; \ |
---|
| 198 | + } while (0) |
---|
| 199 | + |
---|
| 200 | + ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, |
---|
| 201 | + bitmap1 - native); |
---|
| 202 | + ESTIMATE("total seccomp overhead for 2 bitmapped filters", calc, |
---|
| 203 | + bitmap2 - native); |
---|
| 204 | + ESTIMATE("total seccomp overhead for 3 full filters", calc, |
---|
| 205 | + filter1 - native); |
---|
| 206 | + ESTIMATE("total seccomp overhead for 4 full filters", calc, |
---|
| 207 | + filter2 - native); |
---|
| 208 | + ESTIMATE("seccomp entry overhead", entry, |
---|
| 209 | + bitmap1 - native - (bitmap2 - bitmap1)); |
---|
| 210 | + ESTIMATE("seccomp per-filter overhead (last 2 diff)", per_filter1, |
---|
| 211 | + filter2 - filter1); |
---|
| 212 | + ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, |
---|
| 213 | + (filter2 - native - entry) / 4); |
---|
| 214 | + |
---|
| 215 | + printf("Expectations:\n"); |
---|
| 216 | + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); |
---|
| 217 | + bits = compare("native", "≤", "1 filter", native, le, filter1); |
---|
| 218 | + if (bits) |
---|
| 219 | + goto more_samples; |
---|
| 220 | + |
---|
| 221 | + ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", |
---|
| 222 | + per_filter1, approx, per_filter2); |
---|
| 223 | + |
---|
| 224 | + bits = compare("1 bitmapped", "≈", "2 bitmapped", |
---|
| 225 | + bitmap1 - native, approx, bitmap2 - native); |
---|
| 226 | + if (bits) { |
---|
| 227 | + printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); |
---|
| 228 | + goto out; |
---|
| 229 | + } |
---|
| 230 | + |
---|
| 231 | + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); |
---|
| 232 | + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); |
---|
| 233 | + ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", |
---|
| 234 | + entry + (per_filter1 * 4) + native, approx, filter2); |
---|
| 235 | + if (ret == 0) |
---|
| 236 | + goto out; |
---|
| 237 | + |
---|
| 238 | +more_samples: |
---|
| 239 | + printf("Saw unexpected benchmark result. Try running again with more samples?\n"); |
---|
| 240 | +out: |
---|
98 | 241 | return 0; |
---|
99 | 242 | } |
---|