hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/tools/testing/selftests/vm/userfaultfd.c
....@@ -1,10 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Stress userfaultfd syscall.
34 *
45 * Copyright (C) 2015 Red Hat, Inc.
5
- *
6
- * This work is licensed under the terms of the GNU GPL, version 2. See
7
- * the COPYING file in the top-level directory.
86 *
97 * This test allocates two virtual areas and bounces the physical
108 * memory across the two virtual areas (from area_src to area_dst)
....@@ -34,18 +32,6 @@
3432 * per-CPU threads 1 by triggering userfaults inside
3533 * pthread_mutex_lock will also verify the atomicity of the memory
3634 * transfer (UFFDIO_COPY).
37
- *
38
- * The program takes two parameters: the amounts of physical memory in
39
- * megabytes (MiB) of the area and the number of bounces to execute.
40
- *
41
- * # 100MiB 99999 bounces
42
- * ./userfaultfd 100 99999
43
- *
44
- * # 1GiB 99 bounces
45
- * ./userfaultfd 1000 99
46
- *
47
- * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
48
- * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
4935 */
5036
5137 #define _GNU_SOURCE
....@@ -60,6 +46,7 @@
6046 #include <signal.h>
6147 #include <poll.h>
6248 #include <string.h>
49
+#include <linux/mman.h>
6350 #include <sys/mman.h>
6451 #include <sys/syscall.h>
6552 #include <sys/ioctl.h>
....@@ -68,6 +55,7 @@
6855 #include <linux/userfaultfd.h>
6956 #include <setjmp.h>
7057 #include <stdbool.h>
58
+#include <assert.h>
7159
7260 #include "../kselftest.h"
7361
....@@ -90,15 +78,29 @@
9078 #define ALARM_INTERVAL_SECS 10
9179 static volatile bool test_uffdio_copy_eexist = true;
9280 static volatile bool test_uffdio_zeropage_eexist = true;
81
+/* Whether to test uffd write-protection */
82
+static bool test_uffdio_wp = false;
83
+/* Whether to test uffd minor faults */
84
+static bool test_uffdio_minor = false;
9385
9486 static bool map_shared;
87
+static int shm_fd;
9588 static int huge_fd;
9689 static char *huge_fd_off0;
9790 static unsigned long long *count_verify;
98
-static int uffd, uffd_flags, finished, *pipefd;
91
+static int uffd = -1;
92
+static int uffd_flags, finished, *pipefd;
9993 static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
10094 static char *zeropage;
10195 pthread_attr_t attr;
96
+
97
+/* Userfaultfd test statistics */
98
+struct uffd_stats {
99
+ int cpu;
100
+ unsigned long missing_faults;
101
+ unsigned long wp_faults;
102
+ unsigned long minor_faults;
103
+};
102104
103105 /* pthread_mutex_t starts at page offset 0 */
104106 #define area_mutex(___area, ___nr) \
....@@ -115,81 +117,145 @@
115117 ~(unsigned long)(sizeof(unsigned long long) \
116118 - 1)))
117119
118
-static int anon_release_pages(char *rel_area)
119
-{
120
- int ret = 0;
120
+const char *examples =
121
+ "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
122
+ "./userfaultfd anon 100 99999\n\n"
123
+ "# Run share memory test on 1GiB region with 99 bounces:\n"
124
+ "./userfaultfd shmem 1000 99\n\n"
125
+ "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
126
+ "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
127
+ "# Run the same hugetlb test but using shmem:\n"
128
+ "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
129
+ "# 10MiB-~6GiB 999 bounces anonymous test, "
130
+ "continue forever unless an error triggers\n"
131
+ "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
121132
122
- if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
123
- perror("madvise");
124
- ret = 1;
133
+static void usage(void)
134
+{
135
+ fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
136
+ "[hugetlbfs_file]\n\n");
137
+ fprintf(stderr, "Supported <test type>: anon, hugetlb, "
138
+ "hugetlb_shared, shmem\n\n");
139
+ fprintf(stderr, "Examples:\n\n");
140
+ fprintf(stderr, "%s", examples);
141
+ exit(1);
142
+}
143
+
144
+#define _err(fmt, ...) \
145
+ do { \
146
+ int ret = errno; \
147
+ fprintf(stderr, "ERROR: " fmt, ##__VA_ARGS__); \
148
+ fprintf(stderr, " (errno=%d, line=%d)\n", \
149
+ ret, __LINE__); \
150
+ } while (0)
151
+
152
+#define err(fmt, ...) \
153
+ do { \
154
+ _err(fmt, ##__VA_ARGS__); \
155
+ exit(1); \
156
+ } while (0)
157
+
158
+static void uffd_stats_reset(struct uffd_stats *uffd_stats,
159
+ unsigned long n_cpus)
160
+{
161
+ int i;
162
+
163
+ for (i = 0; i < n_cpus; i++) {
164
+ uffd_stats[i].cpu = i;
165
+ uffd_stats[i].missing_faults = 0;
166
+ uffd_stats[i].wp_faults = 0;
167
+ uffd_stats[i].minor_faults = 0;
168
+ }
169
+}
170
+
171
+static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
172
+{
173
+ int i;
174
+ unsigned long long miss_total = 0, wp_total = 0, minor_total = 0;
175
+
176
+ for (i = 0; i < n_cpus; i++) {
177
+ miss_total += stats[i].missing_faults;
178
+ wp_total += stats[i].wp_faults;
179
+ minor_total += stats[i].minor_faults;
125180 }
126181
127
- return ret;
182
+ printf("userfaults: ");
183
+ if (miss_total) {
184
+ printf("%llu missing (", miss_total);
185
+ for (i = 0; i < n_cpus; i++)
186
+ printf("%lu+", stats[i].missing_faults);
187
+ printf("\b) ");
188
+ }
189
+ if (wp_total) {
190
+ printf("%llu wp (", wp_total);
191
+ for (i = 0; i < n_cpus; i++)
192
+ printf("%lu+", stats[i].wp_faults);
193
+ printf("\b) ");
194
+ }
195
+ if (minor_total) {
196
+ printf("%llu minor (", minor_total);
197
+ for (i = 0; i < n_cpus; i++)
198
+ printf("%lu+", stats[i].minor_faults);
199
+ printf("\b)");
200
+ }
201
+ printf("\n");
202
+}
203
+
204
+static void anon_release_pages(char *rel_area)
205
+{
206
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
207
+ err("madvise(MADV_DONTNEED) failed");
128208 }
129209
130210 static void anon_allocate_area(void **alloc_area)
131211 {
132212 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
133213 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
134
- if (*alloc_area == MAP_FAILED) {
135
- fprintf(stderr, "mmap of anonymous memory failed");
136
- *alloc_area = NULL;
137
- }
214
+ if (*alloc_area == MAP_FAILED)
215
+ err("posix_memalign() failed");
138216 }
139217
140218 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
141219 {
142220 }
143221
144
-/* HugeTLB memory */
145
-static int hugetlb_release_pages(char *rel_area)
222
+static void hugetlb_release_pages(char *rel_area)
146223 {
147
- int ret = 0;
148
-
149224 if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
150
- rel_area == huge_fd_off0 ? 0 :
151
- nr_pages * page_size,
152
- nr_pages * page_size)) {
153
- perror("fallocate");
154
- ret = 1;
155
- }
156
-
157
- return ret;
225
+ rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
226
+ nr_pages * page_size))
227
+ err("fallocate() failed");
158228 }
159
-
160229
161230 static void hugetlb_allocate_area(void **alloc_area)
162231 {
163232 void *area_alias = NULL;
164233 char **alloc_area_alias;
234
+
165235 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
166236 (map_shared ? MAP_SHARED : MAP_PRIVATE) |
167237 MAP_HUGETLB,
168238 huge_fd, *alloc_area == area_src ? 0 :
169239 nr_pages * page_size);
170
- if (*alloc_area == MAP_FAILED) {
171
- fprintf(stderr, "mmap of hugetlbfs file failed\n");
172
- *alloc_area = NULL;
173
- }
240
+ if (*alloc_area == MAP_FAILED)
241
+ err("mmap of hugetlbfs file failed");
174242
175243 if (map_shared) {
176244 area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
177245 MAP_SHARED | MAP_HUGETLB,
178246 huge_fd, *alloc_area == area_src ? 0 :
179247 nr_pages * page_size);
180
- if (area_alias == MAP_FAILED) {
181
- if (munmap(*alloc_area, nr_pages * page_size) < 0)
182
- perror("hugetlb munmap"), exit(1);
183
- *alloc_area = NULL;
184
- return;
185
- }
248
+ if (area_alias == MAP_FAILED)
249
+ err("mmap of hugetlb file alias failed");
186250 }
251
+
187252 if (*alloc_area == area_src) {
188253 huge_fd_off0 = *alloc_area;
189254 alloc_area_alias = &area_src_alias;
190255 } else {
191256 alloc_area_alias = &area_dst_alias;
192257 }
258
+
193259 if (area_alias)
194260 *alloc_area_alias = area_alias;
195261 }
....@@ -207,39 +273,54 @@
207273 *start = (unsigned long) area_dst_alias + offset;
208274 }
209275
210
-/* Shared memory */
211
-static int shmem_release_pages(char *rel_area)
276
+static void shmem_release_pages(char *rel_area)
212277 {
213
- int ret = 0;
214
-
215
- if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
216
- perror("madvise");
217
- ret = 1;
218
- }
219
-
220
- return ret;
278
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
279
+ err("madvise(MADV_REMOVE) failed");
221280 }
222281
223282 static void shmem_allocate_area(void **alloc_area)
224283 {
284
+ void *area_alias = NULL;
285
+ bool is_src = alloc_area == (void **)&area_src;
286
+ unsigned long offset = is_src ? 0 : nr_pages * page_size;
287
+
225288 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
226
- MAP_ANONYMOUS | MAP_SHARED, -1, 0);
227
- if (*alloc_area == MAP_FAILED) {
228
- fprintf(stderr, "shared memory mmap failed\n");
229
- *alloc_area = NULL;
230
- }
289
+ MAP_SHARED, shm_fd, offset);
290
+ if (*alloc_area == MAP_FAILED)
291
+ err("mmap of memfd failed");
292
+
293
+ area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
294
+ MAP_SHARED, shm_fd, offset);
295
+ if (area_alias == MAP_FAILED)
296
+ err("mmap of memfd alias failed");
297
+
298
+ if (is_src)
299
+ area_src_alias = area_alias;
300
+ else
301
+ area_dst_alias = area_alias;
302
+}
303
+
304
+static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
305
+{
306
+ *start = (unsigned long)area_dst_alias + offset;
231307 }
232308
233309 struct uffd_test_ops {
234310 unsigned long expected_ioctls;
235311 void (*allocate_area)(void **alloc_area);
236
- int (*release_pages)(char *rel_area);
312
+ void (*release_pages)(char *rel_area);
237313 void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
238314 };
239315
240
-#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
316
+#define SHMEM_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
241317 (1 << _UFFDIO_COPY) | \
242318 (1 << _UFFDIO_ZEROPAGE))
319
+
320
+#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
321
+ (1 << _UFFDIO_COPY) | \
322
+ (1 << _UFFDIO_ZEROPAGE) | \
323
+ (1 << _UFFDIO_WRITEPROTECT))
243324
244325 static struct uffd_test_ops anon_uffd_test_ops = {
245326 .expected_ioctls = ANON_EXPECTED_IOCTLS,
....@@ -249,20 +330,142 @@
249330 };
250331
251332 static struct uffd_test_ops shmem_uffd_test_ops = {
252
- .expected_ioctls = ANON_EXPECTED_IOCTLS,
333
+ .expected_ioctls = SHMEM_EXPECTED_IOCTLS,
253334 .allocate_area = shmem_allocate_area,
254335 .release_pages = shmem_release_pages,
255
- .alias_mapping = noop_alias_mapping,
336
+ .alias_mapping = shmem_alias_mapping,
256337 };
257338
258339 static struct uffd_test_ops hugetlb_uffd_test_ops = {
259
- .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
340
+ .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC & ~(1 << _UFFDIO_CONTINUE),
260341 .allocate_area = hugetlb_allocate_area,
261342 .release_pages = hugetlb_release_pages,
262343 .alias_mapping = hugetlb_alias_mapping,
263344 };
264345
265346 static struct uffd_test_ops *uffd_test_ops;
347
+
348
+static void userfaultfd_open(uint64_t *features)
349
+{
350
+ struct uffdio_api uffdio_api;
351
+
352
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
353
+ if (uffd < 0)
354
+ err("userfaultfd syscall not available in this kernel");
355
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
356
+
357
+ uffdio_api.api = UFFD_API;
358
+ uffdio_api.features = *features;
359
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
360
+ err("UFFDIO_API failed.\nPlease make sure to "
361
+ "run with either root or ptrace capability.");
362
+ if (uffdio_api.api != UFFD_API)
363
+ err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
364
+
365
+ *features = uffdio_api.features;
366
+}
367
+
368
+static inline void munmap_area(void **area)
369
+{
370
+ if (*area)
371
+ if (munmap(*area, nr_pages * page_size))
372
+ err("munmap");
373
+
374
+ *area = NULL;
375
+}
376
+
377
+static void uffd_test_ctx_clear(void)
378
+{
379
+ size_t i;
380
+
381
+ if (pipefd) {
382
+ for (i = 0; i < nr_cpus * 2; ++i) {
383
+ if (close(pipefd[i]))
384
+ err("close pipefd");
385
+ }
386
+ free(pipefd);
387
+ pipefd = NULL;
388
+ }
389
+
390
+ if (count_verify) {
391
+ free(count_verify);
392
+ count_verify = NULL;
393
+ }
394
+
395
+ if (uffd != -1) {
396
+ if (close(uffd))
397
+ err("close uffd");
398
+ uffd = -1;
399
+ }
400
+
401
+ huge_fd_off0 = NULL;
402
+ munmap_area((void **)&area_src);
403
+ munmap_area((void **)&area_src_alias);
404
+ munmap_area((void **)&area_dst);
405
+ munmap_area((void **)&area_dst_alias);
406
+}
407
+
408
+static void uffd_test_ctx_init_ext(uint64_t *features)
409
+{
410
+ unsigned long nr, cpu;
411
+
412
+ uffd_test_ctx_clear();
413
+
414
+ uffd_test_ops->allocate_area((void **)&area_src);
415
+ uffd_test_ops->allocate_area((void **)&area_dst);
416
+
417
+ userfaultfd_open(features);
418
+
419
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
420
+ if (!count_verify)
421
+ err("count_verify");
422
+
423
+ for (nr = 0; nr < nr_pages; nr++) {
424
+ *area_mutex(area_src, nr) =
425
+ (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
426
+ count_verify[nr] = *area_count(area_src, nr) = 1;
427
+ /*
428
+ * In the transition between 255 to 256, powerpc will
429
+ * read out of order in my_bcmp and see both bytes as
430
+ * zero, so leave a placeholder below always non-zero
431
+ * after the count, to avoid my_bcmp to trigger false
432
+ * positives.
433
+ */
434
+ *(area_count(area_src, nr) + 1) = 1;
435
+ }
436
+
437
+ /*
438
+ * After initialization of area_src, we must explicitly release pages
439
+ * for area_dst to make sure it's fully empty. Otherwise we could have
440
+ * some area_dst pages be errornously initialized with zero pages,
441
+ * hence we could hit memory corruption later in the test.
442
+ *
443
+ * One example is when THP is globally enabled, above allocate_area()
444
+ * calls could have the two areas merged into a single VMA (as they
445
+ * will have the same VMA flags so they're mergeable). When we
446
+ * initialize the area_src above, it's possible that some part of
447
+ * area_dst could have been faulted in via one huge THP that will be
448
+ * shared between area_src and area_dst. It could cause some of the
449
+ * area_dst won't be trapped by missing userfaults.
450
+ *
451
+ * This release_pages() will guarantee even if that happened, we'll
452
+ * proactively split the thp and drop any accidentally initialized
453
+ * pages within area_dst.
454
+ */
455
+ uffd_test_ops->release_pages(area_dst);
456
+
457
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
458
+ if (!pipefd)
459
+ err("pipefd");
460
+ for (cpu = 0; cpu < nr_cpus; cpu++)
461
+ if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
462
+ err("pipe");
463
+}
464
+
465
+static inline void uffd_test_ctx_init(uint64_t features)
466
+{
467
+ uffd_test_ctx_init_ext(&features);
468
+}
266469
267470 static int my_bcmp(char *str1, char *str2, size_t n)
268471 {
....@@ -271,6 +474,45 @@
271474 if (str1[i] != str2[i])
272475 return 1;
273476 return 0;
477
+}
478
+
479
+static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
480
+{
481
+ struct uffdio_writeprotect prms = { 0 };
482
+
483
+ /* Write protection page faults */
484
+ prms.range.start = start;
485
+ prms.range.len = len;
486
+ /* Undo write-protect, do wakeup after that */
487
+ prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
488
+
489
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
490
+ err("clear WP failed: address=0x%"PRIx64, (uint64_t)start);
491
+}
492
+
493
+static void continue_range(int ufd, __u64 start, __u64 len)
494
+{
495
+ struct uffdio_continue req;
496
+ int ret;
497
+
498
+ req.range.start = start;
499
+ req.range.len = len;
500
+ req.mode = 0;
501
+
502
+ if (ioctl(ufd, UFFDIO_CONTINUE, &req))
503
+ err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
504
+ (uint64_t)start);
505
+
506
+ /*
507
+ * Error handling within the kernel for continue is subtly different
508
+ * from copy or zeropage, so it may be a source of bugs. Trigger an
509
+ * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
510
+ */
511
+ req.mapped = 0;
512
+ ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
513
+ if (ret >= 0 || req.mapped != -EEXIST)
514
+ err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
515
+ ret, (int64_t) req.mapped);
274516 }
275517
276518 static void *locking_thread(void *arg)
....@@ -282,7 +524,6 @@
282524 unsigned long long count;
283525 char randstate[64];
284526 unsigned int seed;
285
- time_t start;
286527
287528 if (bounces & BOUNCE_RANDOM) {
288529 seed = (unsigned int) time(NULL) - bounces;
....@@ -291,7 +532,7 @@
291532 bzero(&rand, sizeof(rand));
292533 bzero(&randstate, sizeof(randstate));
293534 if (initstate_r(seed, randstate, sizeof(randstate), &rand))
294
- fprintf(stderr, "srandom_r error\n"), exit(1);
535
+ err("initstate_r failed");
295536 } else {
296537 page_nr = -bounces;
297538 if (!(bounces & BOUNCE_RACINGFAULTS))
....@@ -301,84 +542,25 @@
301542 while (!finished) {
302543 if (bounces & BOUNCE_RANDOM) {
303544 if (random_r(&rand, &rand_nr))
304
- fprintf(stderr, "random_r 1 error\n"), exit(1);
545
+ err("random_r failed");
305546 page_nr = rand_nr;
306547 if (sizeof(page_nr) > sizeof(rand_nr)) {
307548 if (random_r(&rand, &rand_nr))
308
- fprintf(stderr, "random_r 2 error\n"), exit(1);
549
+ err("random_r failed");
309550 page_nr |= (((unsigned long) rand_nr) << 16) <<
310551 16;
311552 }
312553 } else
313554 page_nr += 1;
314555 page_nr %= nr_pages;
315
-
316
- start = time(NULL);
317
- if (bounces & BOUNCE_VERIFY) {
318
- count = *area_count(area_dst, page_nr);
319
- if (!count)
320
- fprintf(stderr,
321
- "page_nr %lu wrong count %Lu %Lu\n",
322
- page_nr, count,
323
- count_verify[page_nr]), exit(1);
324
-
325
-
326
- /*
327
- * We can't use bcmp (or memcmp) because that
328
- * returns 0 erroneously if the memory is
329
- * changing under it (even if the end of the
330
- * page is never changing and always
331
- * different).
332
- */
333
-#if 1
334
- if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
335
- page_size))
336
- fprintf(stderr,
337
- "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
338
- page_nr, count,
339
- count_verify[page_nr]), exit(1);
340
-#else
341
- unsigned long loops;
342
-
343
- loops = 0;
344
- /* uncomment the below line to test with mutex */
345
- /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
346
- while (!bcmp(area_dst + page_nr * page_size, zeropage,
347
- page_size)) {
348
- loops += 1;
349
- if (loops > 10)
350
- break;
351
- }
352
- /* uncomment below line to test with mutex */
353
- /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
354
- if (loops) {
355
- fprintf(stderr,
356
- "page_nr %lu all zero thread %lu %p %lu\n",
357
- page_nr, cpu, area_dst + page_nr * page_size,
358
- loops);
359
- if (loops > 10)
360
- exit(1);
361
- }
362
-#endif
363
- }
364
-
365556 pthread_mutex_lock(area_mutex(area_dst, page_nr));
366557 count = *area_count(area_dst, page_nr);
367
- if (count != count_verify[page_nr]) {
368
- fprintf(stderr,
369
- "page_nr %lu memory corruption %Lu %Lu\n",
370
- page_nr, count,
371
- count_verify[page_nr]), exit(1);
372
- }
558
+ if (count != count_verify[page_nr])
559
+ err("page_nr %lu memory corruption %llu %llu",
560
+ page_nr, count, count_verify[page_nr]);
373561 count++;
374562 *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
375563 pthread_mutex_unlock(area_mutex(area_dst, page_nr));
376
-
377
- if (time(NULL) - start > 1)
378
- fprintf(stderr,
379
- "userfault too slow %ld "
380
- "possible false positive with overcommit\n",
381
- time(NULL) - start);
382564 }
383565
384566 return NULL;
....@@ -393,11 +575,11 @@
393575 if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
394576 /* real retval in ufdio_copy.copy */
395577 if (uffdio_copy->copy != -EEXIST)
396
- fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
397
- uffdio_copy->copy), exit(1);
578
+ err("UFFDIO_COPY retry error: %"PRId64,
579
+ (int64_t)uffdio_copy->copy);
398580 } else {
399
- fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
400
- uffdio_copy->copy), exit(1);
581
+ err("UFFDIO_COPY retry unexpected: %"PRId64,
582
+ (int64_t)uffdio_copy->copy);
401583 }
402584 }
403585
....@@ -406,21 +588,22 @@
406588 struct uffdio_copy uffdio_copy;
407589
408590 if (offset >= nr_pages * page_size)
409
- fprintf(stderr, "unexpected offset %lu\n",
410
- offset), exit(1);
591
+ err("unexpected offset %lu\n", offset);
411592 uffdio_copy.dst = (unsigned long) area_dst + offset;
412593 uffdio_copy.src = (unsigned long) area_src + offset;
413594 uffdio_copy.len = page_size;
414
- uffdio_copy.mode = 0;
595
+ if (test_uffdio_wp)
596
+ uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
597
+ else
598
+ uffdio_copy.mode = 0;
415599 uffdio_copy.copy = 0;
416600 if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
417601 /* real retval in ufdio_copy.copy */
418602 if (uffdio_copy.copy != -EEXIST)
419
- fprintf(stderr, "UFFDIO_COPY error %Ld\n",
420
- uffdio_copy.copy), exit(1);
603
+ err("UFFDIO_COPY error: %"PRId64,
604
+ (int64_t)uffdio_copy.copy);
421605 } else if (uffdio_copy.copy != page_size) {
422
- fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
423
- uffdio_copy.copy), exit(1);
606
+ err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
424607 } else {
425608 if (test_uffdio_copy_eexist && retry) {
426609 test_uffdio_copy_eexist = false;
....@@ -441,16 +624,80 @@
441624 return __copy_page(ufd, offset, false);
442625 }
443626
627
+static int uffd_read_msg(int ufd, struct uffd_msg *msg)
628
+{
629
+ int ret = read(uffd, msg, sizeof(*msg));
630
+
631
+ if (ret != sizeof(*msg)) {
632
+ if (ret < 0) {
633
+ if (errno == EAGAIN)
634
+ return 1;
635
+ err("blocking read error");
636
+ } else {
637
+ err("short read");
638
+ }
639
+ }
640
+
641
+ return 0;
642
+}
643
+
644
+static void uffd_handle_page_fault(struct uffd_msg *msg,
645
+ struct uffd_stats *stats)
646
+{
647
+ unsigned long offset;
648
+
649
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
650
+ err("unexpected msg event %u", msg->event);
651
+
652
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
653
+ /* Write protect page faults */
654
+ wp_range(uffd, msg->arg.pagefault.address, page_size, false);
655
+ stats->wp_faults++;
656
+ } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) {
657
+ uint8_t *area;
658
+ int b;
659
+
660
+ /*
661
+ * Minor page faults
662
+ *
663
+ * To prove we can modify the original range for testing
664
+ * purposes, we're going to bit flip this range before
665
+ * continuing.
666
+ *
667
+ * Note that this requires all minor page fault tests operate on
668
+ * area_dst (non-UFFD-registered) and area_dst_alias
669
+ * (UFFD-registered).
670
+ */
671
+
672
+ area = (uint8_t *)(area_dst +
673
+ ((char *)msg->arg.pagefault.address -
674
+ area_dst_alias));
675
+ for (b = 0; b < page_size; ++b)
676
+ area[b] = ~area[b];
677
+ continue_range(uffd, msg->arg.pagefault.address, page_size);
678
+ stats->minor_faults++;
679
+ } else {
680
+ /* Missing page faults */
681
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
682
+ err("unexpected write fault");
683
+
684
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
685
+ offset &= ~(page_size-1);
686
+
687
+ if (copy_page(uffd, offset))
688
+ stats->missing_faults++;
689
+ }
690
+}
691
+
444692 static void *uffd_poll_thread(void *arg)
445693 {
446
- unsigned long cpu = (unsigned long) arg;
694
+ struct uffd_stats *stats = (struct uffd_stats *)arg;
695
+ unsigned long cpu = stats->cpu;
447696 struct pollfd pollfd[2];
448697 struct uffd_msg msg;
449698 struct uffdio_register uffd_reg;
450699 int ret;
451
- unsigned long offset;
452700 char tmp_chr;
453
- unsigned long userfaults = 0;
454701
455702 pollfd[0].fd = uffd;
456703 pollfd[0].events = POLLIN;
....@@ -459,38 +706,23 @@
459706
460707 for (;;) {
461708 ret = poll(pollfd, 2, -1);
462
- if (!ret)
463
- fprintf(stderr, "poll error %d\n", ret), exit(1);
464
- if (ret < 0)
465
- perror("poll"), exit(1);
709
+ if (ret <= 0)
710
+ err("poll error: %d", ret);
466711 if (pollfd[1].revents & POLLIN) {
467712 if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
468
- fprintf(stderr, "read pipefd error\n"),
469
- exit(1);
713
+ err("read pipefd error");
470714 break;
471715 }
472716 if (!(pollfd[0].revents & POLLIN))
473
- fprintf(stderr, "pollfd[0].revents %d\n",
474
- pollfd[0].revents), exit(1);
475
- ret = read(uffd, &msg, sizeof(msg));
476
- if (ret < 0) {
477
- if (errno == EAGAIN)
478
- continue;
479
- perror("nonblocking read error"), exit(1);
480
- }
717
+ err("pollfd[0].revents %d", pollfd[0].revents);
718
+ if (uffd_read_msg(uffd, &msg))
719
+ continue;
481720 switch (msg.event) {
482721 default:
483
- fprintf(stderr, "unexpected msg event %u\n",
484
- msg.event), exit(1);
722
+ err("unexpected msg event %u\n", msg.event);
485723 break;
486724 case UFFD_EVENT_PAGEFAULT:
487
- if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
488
- fprintf(stderr, "unexpected write fault\n"), exit(1);
489
- offset = (char *)(unsigned long)msg.arg.pagefault.address -
490
- area_dst;
491
- offset &= ~(page_size-1);
492
- if (copy_page(uffd, offset))
493
- userfaults++;
725
+ uffd_handle_page_fault(&msg, stats);
494726 break;
495727 case UFFD_EVENT_FORK:
496728 close(uffd);
....@@ -502,74 +734,74 @@
502734 uffd_reg.range.len = msg.arg.remove.end -
503735 msg.arg.remove.start;
504736 if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
505
- fprintf(stderr, "remove failure\n"), exit(1);
737
+ err("remove failure");
506738 break;
507739 case UFFD_EVENT_REMAP:
508740 area_dst = (char *)(unsigned long)msg.arg.remap.to;
509741 break;
510742 }
511743 }
512
- return (void *)userfaults;
744
+
745
+ return NULL;
513746 }
514747
515748 pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
516749
517750 static void *uffd_read_thread(void *arg)
518751 {
519
- unsigned long *this_cpu_userfaults;
752
+ struct uffd_stats *stats = (struct uffd_stats *)arg;
520753 struct uffd_msg msg;
521
- unsigned long offset;
522
- int ret;
523
-
524
- this_cpu_userfaults = (unsigned long *) arg;
525
- *this_cpu_userfaults = 0;
526754
527755 pthread_mutex_unlock(&uffd_read_mutex);
528756 /* from here cancellation is ok */
529757
530758 for (;;) {
531
- ret = read(uffd, &msg, sizeof(msg));
532
- if (ret != sizeof(msg)) {
533
- if (ret < 0)
534
- perror("blocking read error"), exit(1);
535
- else
536
- fprintf(stderr, "short read\n"), exit(1);
537
- }
538
- if (msg.event != UFFD_EVENT_PAGEFAULT)
539
- fprintf(stderr, "unexpected msg event %u\n",
540
- msg.event), exit(1);
541
- if (bounces & BOUNCE_VERIFY &&
542
- msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
543
- fprintf(stderr, "unexpected write fault\n"), exit(1);
544
- offset = (char *)(unsigned long)msg.arg.pagefault.address -
545
- area_dst;
546
- offset &= ~(page_size-1);
547
- if (copy_page(uffd, offset))
548
- (*this_cpu_userfaults)++;
759
+ if (uffd_read_msg(uffd, &msg))
760
+ continue;
761
+ uffd_handle_page_fault(&msg, stats);
549762 }
550
- return (void *)NULL;
763
+
764
+ return NULL;
551765 }
552766
553767 static void *background_thread(void *arg)
554768 {
555769 unsigned long cpu = (unsigned long) arg;
556
- unsigned long page_nr;
770
+ unsigned long page_nr, start_nr, mid_nr, end_nr;
557771
558
- for (page_nr = cpu * nr_pages_per_cpu;
559
- page_nr < (cpu+1) * nr_pages_per_cpu;
560
- page_nr++)
772
+ start_nr = cpu * nr_pages_per_cpu;
773
+ end_nr = (cpu+1) * nr_pages_per_cpu;
774
+ mid_nr = (start_nr + end_nr) / 2;
775
+
776
+ /* Copy the first half of the pages */
777
+ for (page_nr = start_nr; page_nr < mid_nr; page_nr++)
778
+ copy_page_retry(uffd, page_nr * page_size);
779
+
780
+ /*
781
+ * If we need to test uffd-wp, set it up now. Then we'll have
782
+ * at least the first half of the pages mapped already which
783
+ * can be write-protected for testing
784
+ */
785
+ if (test_uffdio_wp)
786
+ wp_range(uffd, (unsigned long)area_dst + start_nr * page_size,
787
+ nr_pages_per_cpu * page_size, true);
788
+
789
+ /*
790
+ * Continue the 2nd half of the page copying, handling write
791
+ * protection faults if any
792
+ */
793
+ for (page_nr = mid_nr; page_nr < end_nr; page_nr++)
561794 copy_page_retry(uffd, page_nr * page_size);
562795
563796 return NULL;
564797 }
565798
566
-static int stress(unsigned long *userfaults)
799
+static int stress(struct uffd_stats *uffd_stats)
567800 {
568801 unsigned long cpu;
569802 pthread_t locking_threads[nr_cpus];
570803 pthread_t uffd_threads[nr_cpus];
571804 pthread_t background_threads[nr_cpus];
572
- void **_userfaults = (void **) userfaults;
573805
574806 finished = 0;
575807 for (cpu = 0; cpu < nr_cpus; cpu++) {
....@@ -578,12 +810,13 @@
578810 return 1;
579811 if (bounces & BOUNCE_POLL) {
580812 if (pthread_create(&uffd_threads[cpu], &attr,
581
- uffd_poll_thread, (void *)cpu))
813
+ uffd_poll_thread,
814
+ (void *)&uffd_stats[cpu]))
582815 return 1;
583816 } else {
584817 if (pthread_create(&uffd_threads[cpu], &attr,
585818 uffd_read_thread,
586
- &_userfaults[cpu]))
819
+ (void *)&uffd_stats[cpu]))
587820 return 1;
588821 pthread_mutex_lock(&uffd_read_mutex);
589822 }
....@@ -604,17 +837,20 @@
604837 * UFFDIO_COPY without writing zero pages into area_dst
605838 * because the background threads already completed).
606839 */
607
- if (uffd_test_ops->release_pages(area_src))
608
- return 1;
840
+ uffd_test_ops->release_pages(area_src);
841
+
842
+ finished = 1;
843
+ for (cpu = 0; cpu < nr_cpus; cpu++)
844
+ if (pthread_join(locking_threads[cpu], NULL))
845
+ return 1;
609846
610847 for (cpu = 0; cpu < nr_cpus; cpu++) {
611848 char c;
612849 if (bounces & BOUNCE_POLL) {
613
- if (write(pipefd[cpu*2+1], &c, 1) != 1) {
614
- fprintf(stderr, "pipefd write error\n");
615
- return 1;
616
- }
617
- if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
850
+ if (write(pipefd[cpu*2+1], &c, 1) != 1)
851
+ err("pipefd write error");
852
+ if (pthread_join(uffd_threads[cpu],
853
+ (void *)&uffd_stats[cpu]))
618854 return 1;
619855 } else {
620856 if (pthread_cancel(uffd_threads[cpu]))
....@@ -622,37 +858,6 @@
622858 if (pthread_join(uffd_threads[cpu], NULL))
623859 return 1;
624860 }
625
- }
626
-
627
- finished = 1;
628
- for (cpu = 0; cpu < nr_cpus; cpu++)
629
- if (pthread_join(locking_threads[cpu], NULL))
630
- return 1;
631
-
632
- return 0;
633
-}
634
-
635
-static int userfaultfd_open(int features)
636
-{
637
- struct uffdio_api uffdio_api;
638
-
639
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
640
- if (uffd < 0) {
641
- fprintf(stderr,
642
- "userfaultfd syscall not available in this kernel\n");
643
- return 1;
644
- }
645
- uffd_flags = fcntl(uffd, F_GETFD, NULL);
646
-
647
- uffdio_api.api = UFFD_API;
648
- uffdio_api.features = features;
649
- if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
650
- fprintf(stderr, "UFFDIO_API\n");
651
- return 1;
652
- }
653
- if (uffdio_api.api != UFFD_API) {
654
- fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
655
- return 1;
656861 }
657862
658863 return 0;
....@@ -709,25 +914,35 @@
709914 memset(&act, 0, sizeof(act));
710915 act.sa_sigaction = sighndl;
711916 act.sa_flags = SA_SIGINFO;
712
- if (sigaction(SIGBUS, &act, 0)) {
713
- perror("sigaction");
714
- return 1;
715
- }
917
+ if (sigaction(SIGBUS, &act, 0))
918
+ err("sigaction");
716919 lastnr = (unsigned long)-1;
717920 }
718921
719922 for (nr = 0; nr < split_nr_pages; nr++) {
923
+ int steps = 1;
924
+ unsigned long offset = nr * page_size;
925
+
720926 if (signal_test) {
721927 if (sigsetjmp(*sigbuf, 1) != 0) {
722
- if (nr == lastnr) {
723
- fprintf(stderr, "Signal repeated\n");
724
- return 1;
725
- }
928
+ if (steps == 1 && nr == lastnr)
929
+ err("Signal repeated");
726930
727931 lastnr = nr;
728932 if (signal_test == 1) {
729
- if (copy_page(uffd, nr * page_size))
730
- signalled++;
933
+ if (steps == 1) {
934
+ /* This is a MISSING request */
935
+ steps++;
936
+ if (copy_page(uffd, offset))
937
+ signalled++;
938
+ } else {
939
+ /* This is a WP request */
940
+ assert(steps == 2);
941
+ wp_range(uffd,
942
+ (__u64)area_dst +
943
+ offset,
944
+ page_size, false);
945
+ }
731946 } else {
732947 signalled++;
733948 continue;
....@@ -736,12 +951,14 @@
736951 }
737952
738953 count = *area_count(area_dst, nr);
739
- if (count != count_verify[nr]) {
740
- fprintf(stderr,
741
- "nr %lu memory corruption %Lu %Lu\n",
742
- nr, count,
743
- count_verify[nr]), exit(1);
744
- }
954
+ if (count != count_verify[nr])
955
+ err("nr %lu memory corruption %llu %llu\n",
956
+ nr, count, count_verify[nr]);
957
+ /*
958
+ * Trigger write protection if there is by writing
959
+ * the same value back.
960
+ */
961
+ *area_count(area_dst, nr) = count;
745962 }
746963
747964 if (signal_test)
....@@ -753,25 +970,28 @@
753970 area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
754971 MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
755972 if (area_dst == MAP_FAILED)
756
- perror("mremap"), exit(1);
973
+ err("mremap");
974
+ /* Reset area_src since we just clobbered it */
975
+ area_src = NULL;
757976
758977 for (; nr < nr_pages; nr++) {
759978 count = *area_count(area_dst, nr);
760979 if (count != count_verify[nr]) {
761
- fprintf(stderr,
762
- "nr %lu memory corruption %Lu %Lu\n",
763
- nr, count,
764
- count_verify[nr]), exit(1);
980
+ err("nr %lu memory corruption %llu %llu\n",
981
+ nr, count, count_verify[nr]);
765982 }
983
+ /*
984
+ * Trigger write protection if there is by writing
985
+ * the same value back.
986
+ */
987
+ *area_count(area_dst, nr) = count;
766988 }
767989
768
- if (uffd_test_ops->release_pages(area_dst))
769
- return 1;
990
+ uffd_test_ops->release_pages(area_dst);
770991
771
- for (nr = 0; nr < nr_pages; nr++) {
992
+ for (nr = 0; nr < nr_pages; nr++)
772993 if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
773
- fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
774
- }
994
+ err("nr %lu is not zero", nr);
775995
776996 return 0;
777997 }
....@@ -785,11 +1005,11 @@
7851005 offset);
7861006 if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
7871007 if (uffdio_zeropage->zeropage != -EEXIST)
788
- fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
789
- uffdio_zeropage->zeropage), exit(1);
1008
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
1009
+ (int64_t)uffdio_zeropage->zeropage);
7901010 } else {
791
- fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
792
- uffdio_zeropage->zeropage), exit(1);
1011
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
1012
+ (int64_t)uffdio_zeropage->zeropage);
7931013 }
7941014 }
7951015
....@@ -798,35 +1018,26 @@
7981018 struct uffdio_zeropage uffdio_zeropage;
7991019 int ret;
8001020 unsigned long has_zeropage;
1021
+ __s64 res;
8011022
8021023 has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
8031024
8041025 if (offset >= nr_pages * page_size)
805
- fprintf(stderr, "unexpected offset %lu\n",
806
- offset), exit(1);
1026
+ err("unexpected offset %lu", offset);
8071027 uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
8081028 uffdio_zeropage.range.len = page_size;
8091029 uffdio_zeropage.mode = 0;
8101030 ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
1031
+ res = uffdio_zeropage.zeropage;
8111032 if (ret) {
8121033 /* real retval in ufdio_zeropage.zeropage */
813
- if (has_zeropage) {
814
- if (uffdio_zeropage.zeropage == -EEXIST)
815
- fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
816
- exit(1);
817
- else
818
- fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
819
- uffdio_zeropage.zeropage), exit(1);
820
- } else {
821
- if (uffdio_zeropage.zeropage != -EINVAL)
822
- fprintf(stderr,
823
- "UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
824
- uffdio_zeropage.zeropage), exit(1);
825
- }
1034
+ if (has_zeropage)
1035
+ err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
1036
+ else if (res != -EINVAL)
1037
+ err("UFFDIO_ZEROPAGE not -EINVAL");
8261038 } else if (has_zeropage) {
827
- if (uffdio_zeropage.zeropage != page_size) {
828
- fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
829
- uffdio_zeropage.zeropage), exit(1);
1039
+ if (res != page_size) {
1040
+ err("UFFDIO_ZEROPAGE unexpected size");
8301041 } else {
8311042 if (test_uffdio_zeropage_eexist && retry) {
8321043 test_uffdio_zeropage_eexist = false;
....@@ -835,11 +1046,8 @@
8351046 }
8361047 return 1;
8371048 }
838
- } else {
839
- fprintf(stderr,
840
- "UFFDIO_ZEROPAGE succeeded %Ld\n",
841
- uffdio_zeropage.zeropage), exit(1);
842
- }
1049
+ } else
1050
+ err("UFFDIO_ZEROPAGE succeeded");
8431051
8441052 return 0;
8451053 }
....@@ -858,30 +1066,24 @@
8581066 printf("testing UFFDIO_ZEROPAGE: ");
8591067 fflush(stdout);
8601068
861
- if (uffd_test_ops->release_pages(area_dst))
862
- return 1;
1069
+ uffd_test_ctx_init(0);
8631070
864
- if (userfaultfd_open(0) < 0)
865
- return 1;
8661071 uffdio_register.range.start = (unsigned long) area_dst;
8671072 uffdio_register.range.len = nr_pages * page_size;
8681073 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1074
+ if (test_uffdio_wp)
1075
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
8691076 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
870
- fprintf(stderr, "register failure\n"), exit(1);
1077
+ err("register failure");
8711078
8721079 expected_ioctls = uffd_test_ops->expected_ioctls;
873
- if ((uffdio_register.ioctls & expected_ioctls) !=
874
- expected_ioctls)
875
- fprintf(stderr,
876
- "unexpected missing ioctl for anon memory\n"),
877
- exit(1);
1080
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
1081
+ err("unexpected missing ioctl for anon memory");
8781082
879
- if (uffdio_zeropage(uffd, 0)) {
1083
+ if (uffdio_zeropage(uffd, 0))
8801084 if (my_bcmp(area_dst, zeropage, page_size))
881
- fprintf(stderr, "zeropage is not zero\n"), exit(1);
882
- }
1085
+ err("zeropage is not zero");
8831086
884
- close(uffd);
8851087 printf("done.\n");
8861088 return 0;
8871089 }
....@@ -890,60 +1092,54 @@
8901092 {
8911093 struct uffdio_register uffdio_register;
8921094 unsigned long expected_ioctls;
893
- unsigned long userfaults;
8941095 pthread_t uffd_mon;
8951096 int err, features;
8961097 pid_t pid;
8971098 char c;
1099
+ struct uffd_stats stats = { 0 };
8981100
8991101 printf("testing events (fork, remap, remove): ");
9001102 fflush(stdout);
9011103
902
- if (uffd_test_ops->release_pages(area_dst))
903
- return 1;
904
-
9051104 features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
9061105 UFFD_FEATURE_EVENT_REMOVE;
907
- if (userfaultfd_open(features) < 0)
908
- return 1;
1106
+ uffd_test_ctx_init(features);
1107
+
9091108 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
9101109
9111110 uffdio_register.range.start = (unsigned long) area_dst;
9121111 uffdio_register.range.len = nr_pages * page_size;
9131112 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1113
+ if (test_uffdio_wp)
1114
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
9141115 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
915
- fprintf(stderr, "register failure\n"), exit(1);
1116
+ err("register failure");
9161117
9171118 expected_ioctls = uffd_test_ops->expected_ioctls;
918
- if ((uffdio_register.ioctls & expected_ioctls) !=
919
- expected_ioctls)
920
- fprintf(stderr,
921
- "unexpected missing ioctl for anon memory\n"),
922
- exit(1);
1119
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
1120
+ err("unexpected missing ioctl for anon memory");
9231121
924
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
925
- perror("uffd_poll_thread create"), exit(1);
1122
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
1123
+ err("uffd_poll_thread create");
9261124
9271125 pid = fork();
9281126 if (pid < 0)
929
- perror("fork"), exit(1);
1127
+ err("fork");
9301128
9311129 if (!pid)
932
- return faulting_process(0);
1130
+ exit(faulting_process(0));
9331131
9341132 waitpid(pid, &err, 0);
9351133 if (err)
936
- fprintf(stderr, "faulting process failed\n"), exit(1);
937
-
1134
+ err("faulting process failed");
9381135 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
939
- perror("pipe write"), exit(1);
940
- if (pthread_join(uffd_mon, (void **)&userfaults))
1136
+ err("pipe write");
1137
+ if (pthread_join(uffd_mon, NULL))
9411138 return 1;
9421139
943
- close(uffd);
944
- printf("userfaults: %ld\n", userfaults);
1140
+ uffd_stats_report(&stats, 1);
9451141
946
- return userfaults != nr_pages;
1142
+ return stats.missing_faults != nr_pages;
9471143 }
9481144
9491145 static int userfaultfd_sig_test(void)
....@@ -955,119 +1151,155 @@
9551151 int err, features;
9561152 pid_t pid;
9571153 char c;
1154
+ struct uffd_stats stats = { 0 };
9581155
9591156 printf("testing signal delivery: ");
9601157 fflush(stdout);
9611158
962
- if (uffd_test_ops->release_pages(area_dst))
963
- return 1;
964
-
9651159 features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
966
- if (userfaultfd_open(features) < 0)
967
- return 1;
1160
+ uffd_test_ctx_init(features);
1161
+
9681162 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
9691163
9701164 uffdio_register.range.start = (unsigned long) area_dst;
9711165 uffdio_register.range.len = nr_pages * page_size;
9721166 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1167
+ if (test_uffdio_wp)
1168
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
9731169 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
974
- fprintf(stderr, "register failure\n"), exit(1);
1170
+ err("register failure");
9751171
9761172 expected_ioctls = uffd_test_ops->expected_ioctls;
977
- if ((uffdio_register.ioctls & expected_ioctls) !=
978
- expected_ioctls)
979
- fprintf(stderr,
980
- "unexpected missing ioctl for anon memory\n"),
981
- exit(1);
1173
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
1174
+ err("unexpected missing ioctl for anon memory");
9821175
9831176 if (faulting_process(1))
984
- fprintf(stderr, "faulting process failed\n"), exit(1);
1177
+ err("faulting process failed");
9851178
986
- if (uffd_test_ops->release_pages(area_dst))
987
- return 1;
1179
+ uffd_test_ops->release_pages(area_dst);
9881180
989
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
990
- perror("uffd_poll_thread create"), exit(1);
1181
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
1182
+ err("uffd_poll_thread create");
9911183
9921184 pid = fork();
9931185 if (pid < 0)
994
- perror("fork"), exit(1);
1186
+ err("fork");
9951187
9961188 if (!pid)
9971189 exit(faulting_process(2));
9981190
9991191 waitpid(pid, &err, 0);
10001192 if (err)
1001
- fprintf(stderr, "faulting process failed\n"), exit(1);
1002
-
1193
+ err("faulting process failed");
10031194 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
1004
- perror("pipe write"), exit(1);
1195
+ err("pipe write");
10051196 if (pthread_join(uffd_mon, (void **)&userfaults))
10061197 return 1;
10071198
10081199 printf("done.\n");
10091200 if (userfaults)
1010
- fprintf(stderr, "Signal test failed, userfaults: %ld\n",
1011
- userfaults);
1012
- close(uffd);
1201
+ err("Signal test failed, userfaults: %ld", userfaults);
1202
+
10131203 return userfaults != 0;
10141204 }
1205
+
1206
+static int userfaultfd_minor_test(void)
1207
+{
1208
+ struct uffdio_register uffdio_register;
1209
+ unsigned long expected_ioctls;
1210
+ unsigned long p;
1211
+ pthread_t uffd_mon;
1212
+ uint8_t expected_byte;
1213
+ void *expected_page;
1214
+ char c;
1215
+ struct uffd_stats stats = { 0 };
1216
+ uint64_t req_features, features_out;
1217
+
1218
+ if (!test_uffdio_minor)
1219
+ return 0;
1220
+
1221
+ printf("testing minor faults: ");
1222
+ fflush(stdout);
1223
+
1224
+ if (test_type == TEST_HUGETLB)
1225
+ req_features = UFFD_FEATURE_MINOR_HUGETLBFS;
1226
+ else if (test_type == TEST_SHMEM)
1227
+ req_features = UFFD_FEATURE_MINOR_SHMEM;
1228
+ else
1229
+ return 1;
1230
+
1231
+ features_out = req_features;
1232
+ uffd_test_ctx_init_ext(&features_out);
1233
+ /* If kernel reports required features aren't supported, skip test. */
1234
+ if ((features_out & req_features) != req_features) {
1235
+ printf("skipping test due to lack of feature support\n");
1236
+ fflush(stdout);
1237
+ return 0;
1238
+ }
1239
+
1240
+ uffdio_register.range.start = (unsigned long)area_dst_alias;
1241
+ uffdio_register.range.len = nr_pages * page_size;
1242
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR;
1243
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
1244
+ err("register failure");
1245
+
1246
+ expected_ioctls = uffd_test_ops->expected_ioctls;
1247
+ expected_ioctls |= 1 << _UFFDIO_CONTINUE;
1248
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
1249
+ err("unexpected missing ioctl(s)");
1250
+
1251
+ /*
1252
+ * After registering with UFFD, populate the non-UFFD-registered side of
1253
+ * the shared mapping. This should *not* trigger any UFFD minor faults.
1254
+ */
1255
+ for (p = 0; p < nr_pages; ++p) {
1256
+ memset(area_dst + (p * page_size), p % ((uint8_t)-1),
1257
+ page_size);
1258
+ }
1259
+
1260
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
1261
+ err("uffd_poll_thread create");
1262
+
1263
+ /*
1264
+ * Read each of the pages back using the UFFD-registered mapping. We
1265
+ * expect that the first time we touch a page, it will result in a minor
1266
+ * fault. uffd_poll_thread will resolve the fault by bit-flipping the
1267
+ * page's contents, and then issuing a CONTINUE ioctl.
1268
+ */
1269
+
1270
+ if (posix_memalign(&expected_page, page_size, page_size))
1271
+ err("out of memory");
1272
+
1273
+ for (p = 0; p < nr_pages; ++p) {
1274
+ expected_byte = ~((uint8_t)(p % ((uint8_t)-1)));
1275
+ memset(expected_page, expected_byte, page_size);
1276
+ if (my_bcmp(expected_page, area_dst_alias + (p * page_size),
1277
+ page_size))
1278
+ err("unexpected page contents after minor fault");
1279
+ }
1280
+
1281
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
1282
+ err("pipe write");
1283
+ if (pthread_join(uffd_mon, NULL))
1284
+ return 1;
1285
+
1286
+ uffd_stats_report(&stats, 1);
1287
+
1288
+ return stats.missing_faults != 0 || stats.minor_faults != nr_pages;
1289
+}
1290
+
10151291 static int userfaultfd_stress(void)
10161292 {
10171293 void *area;
10181294 char *tmp_area;
10191295 unsigned long nr;
10201296 struct uffdio_register uffdio_register;
1021
- unsigned long cpu;
1022
- int err;
1023
- unsigned long userfaults[nr_cpus];
1297
+ struct uffd_stats uffd_stats[nr_cpus];
10241298
1025
- uffd_test_ops->allocate_area((void **)&area_src);
1026
- if (!area_src)
1027
- return 1;
1028
- uffd_test_ops->allocate_area((void **)&area_dst);
1029
- if (!area_dst)
1030
- return 1;
1299
+ uffd_test_ctx_init(0);
10311300
1032
- if (userfaultfd_open(0) < 0)
1033
- return 1;
1034
-
1035
- count_verify = malloc(nr_pages * sizeof(unsigned long long));
1036
- if (!count_verify) {
1037
- perror("count_verify");
1038
- return 1;
1039
- }
1040
-
1041
- for (nr = 0; nr < nr_pages; nr++) {
1042
- *area_mutex(area_src, nr) = (pthread_mutex_t)
1043
- PTHREAD_MUTEX_INITIALIZER;
1044
- count_verify[nr] = *area_count(area_src, nr) = 1;
1045
- /*
1046
- * In the transition between 255 to 256, powerpc will
1047
- * read out of order in my_bcmp and see both bytes as
1048
- * zero, so leave a placeholder below always non-zero
1049
- * after the count, to avoid my_bcmp to trigger false
1050
- * positives.
1051
- */
1052
- *(area_count(area_src, nr) + 1) = 1;
1053
- }
1054
-
1055
- pipefd = malloc(sizeof(int) * nr_cpus * 2);
1056
- if (!pipefd) {
1057
- perror("pipefd");
1058
- return 1;
1059
- }
1060
- for (cpu = 0; cpu < nr_cpus; cpu++) {
1061
- if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
1062
- perror("pipe");
1063
- return 1;
1064
- }
1065
- }
1066
-
1067
- if (posix_memalign(&area, page_size, page_size)) {
1068
- fprintf(stderr, "out of memory\n");
1069
- return 1;
1070
- }
1301
+ if (posix_memalign(&area, page_size, page_size))
1302
+ err("out of memory");
10711303 zeropage = area;
10721304 bzero(zeropage, page_size);
10731305
....@@ -1076,7 +1308,6 @@
10761308 pthread_attr_init(&attr);
10771309 pthread_attr_setstacksize(&attr, 16*1024*1024);
10781310
1079
- err = 0;
10801311 while (bounces--) {
10811312 unsigned long expected_ioctls;
10821313
....@@ -1101,25 +1332,20 @@
11011332 uffdio_register.range.start = (unsigned long) area_dst;
11021333 uffdio_register.range.len = nr_pages * page_size;
11031334 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1104
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1105
- fprintf(stderr, "register failure\n");
1106
- return 1;
1107
- }
1335
+ if (test_uffdio_wp)
1336
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
1337
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
1338
+ err("register failure");
11081339 expected_ioctls = uffd_test_ops->expected_ioctls;
11091340 if ((uffdio_register.ioctls & expected_ioctls) !=
1110
- expected_ioctls) {
1111
- fprintf(stderr,
1112
- "unexpected missing ioctl for anon memory\n");
1113
- return 1;
1114
- }
1341
+ expected_ioctls)
1342
+ err("unexpected missing ioctl for anon memory");
11151343
11161344 if (area_dst_alias) {
11171345 uffdio_register.range.start = (unsigned long)
11181346 area_dst_alias;
1119
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1120
- fprintf(stderr, "register failure alias\n");
1121
- return 1;
1122
- }
1347
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
1348
+ err("register failure alias");
11231349 }
11241350
11251351 /*
....@@ -1146,41 +1372,36 @@
11461372 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
11471373 * required to MADV_DONTNEED here.
11481374 */
1149
- if (uffd_test_ops->release_pages(area_dst))
1150
- return 1;
1375
+ uffd_test_ops->release_pages(area_dst);
1376
+
1377
+ uffd_stats_reset(uffd_stats, nr_cpus);
11511378
11521379 /* bounce pass */
1153
- if (stress(userfaults))
1380
+ if (stress(uffd_stats))
11541381 return 1;
11551382
1383
+ /* Clear all the write protections if there is any */
1384
+ if (test_uffdio_wp)
1385
+ wp_range(uffd, (unsigned long)area_dst,
1386
+ nr_pages * page_size, false);
1387
+
11561388 /* unregister */
1157
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
1158
- fprintf(stderr, "unregister failure\n");
1159
- return 1;
1160
- }
1389
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
1390
+ err("unregister failure");
11611391 if (area_dst_alias) {
11621392 uffdio_register.range.start = (unsigned long) area_dst;
11631393 if (ioctl(uffd, UFFDIO_UNREGISTER,
1164
- &uffdio_register.range)) {
1165
- fprintf(stderr, "unregister failure alias\n");
1166
- return 1;
1167
- }
1394
+ &uffdio_register.range))
1395
+ err("unregister failure alias");
11681396 }
11691397
11701398 /* verification */
1171
- if (bounces & BOUNCE_VERIFY) {
1172
- for (nr = 0; nr < nr_pages; nr++) {
1173
- if (*area_count(area_dst, nr) != count_verify[nr]) {
1174
- fprintf(stderr,
1175
- "error area_count %Lu %Lu %lu\n",
1176
- *area_count(area_src, nr),
1177
- count_verify[nr],
1178
- nr);
1179
- err = 1;
1180
- bounces = 0;
1181
- }
1182
- }
1183
- }
1399
+ if (bounces & BOUNCE_VERIFY)
1400
+ for (nr = 0; nr < nr_pages; nr++)
1401
+ if (*area_count(area_dst, nr) != count_verify[nr])
1402
+ err("error area_count %llu %llu %lu\n",
1403
+ *area_count(area_src, nr),
1404
+ count_verify[nr], nr);
11841405
11851406 /* prepare next bounce */
11861407 tmp_area = area_src;
....@@ -1191,18 +1412,11 @@
11911412 area_src_alias = area_dst_alias;
11921413 area_dst_alias = tmp_area;
11931414
1194
- printf("userfaults:");
1195
- for (cpu = 0; cpu < nr_cpus; cpu++)
1196
- printf(" %lu", userfaults[cpu]);
1197
- printf("\n");
1415
+ uffd_stats_report(uffd_stats, nr_cpus);
11981416 }
11991417
1200
- if (err)
1201
- return err;
1202
-
1203
- close(uffd);
12041418 return userfaultfd_zeropage_test() || userfaultfd_sig_test()
1205
- || userfaultfd_events_test();
1419
+ || userfaultfd_events_test() || userfaultfd_minor_test();
12061420 }
12071421
12081422 /*
....@@ -1234,6 +1448,8 @@
12341448 if (!strcmp(type, "anon")) {
12351449 test_type = TEST_ANON;
12361450 uffd_test_ops = &anon_uffd_test_ops;
1451
+ /* Only enable write-protect test for anonymous test */
1452
+ test_uffdio_wp = true;
12371453 } else if (!strcmp(type, "hugetlb")) {
12381454 test_type = TEST_HUGETLB;
12391455 uffd_test_ops = &hugetlb_uffd_test_ops;
....@@ -1241,12 +1457,15 @@
12411457 map_shared = true;
12421458 test_type = TEST_HUGETLB;
12431459 uffd_test_ops = &hugetlb_uffd_test_ops;
1460
+ /* Minor faults require shared hugetlb; only enable here. */
1461
+ test_uffdio_minor = true;
12441462 } else if (!strcmp(type, "shmem")) {
12451463 map_shared = true;
12461464 test_type = TEST_SHMEM;
12471465 uffd_test_ops = &shmem_uffd_test_ops;
1466
+ test_uffdio_minor = true;
12481467 } else {
1249
- fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
1468
+ err("Unknown test type: %s", type);
12501469 }
12511470
12521471 if (test_type == TEST_HUGETLB)
....@@ -1255,11 +1474,10 @@
12551474 page_size = sysconf(_SC_PAGE_SIZE);
12561475
12571476 if (!page_size)
1258
- fprintf(stderr, "Unable to determine page size\n"),
1259
- exit(2);
1477
+ err("Unable to determine page size");
12601478 if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
12611479 > page_size)
1262
- fprintf(stderr, "Impossible to run this test\n"), exit(2);
1480
+ err("Impossible to run this test");
12631481 }
12641482
12651483 static void sigalrm(int sig)
....@@ -1274,11 +1492,10 @@
12741492 int main(int argc, char **argv)
12751493 {
12761494 if (argc < 4)
1277
- fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
1278
- exit(1);
1495
+ usage();
12791496
12801497 if (signal(SIGALRM, sigalrm) == SIG_ERR)
1281
- fprintf(stderr, "failed to arm SIGALRM"), exit(1);
1498
+ err("failed to arm SIGALRM");
12821499 alarm(ALARM_INTERVAL_SECS);
12831500
12841501 set_test_type(argv[1]);
....@@ -1287,32 +1504,35 @@
12871504 nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
12881505 nr_cpus;
12891506 if (!nr_pages_per_cpu) {
1290
- fprintf(stderr, "invalid MiB\n");
1291
- fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
1507
+ _err("invalid MiB");
1508
+ usage();
12921509 }
12931510
12941511 bounces = atoi(argv[3]);
12951512 if (bounces <= 0) {
1296
- fprintf(stderr, "invalid bounces\n");
1297
- fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
1513
+ _err("invalid bounces");
1514
+ usage();
12981515 }
12991516 nr_pages = nr_pages_per_cpu * nr_cpus;
13001517
13011518 if (test_type == TEST_HUGETLB) {
13021519 if (argc < 5)
1303
- fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"),
1304
- exit(1);
1520
+ usage();
13051521 huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
1306
- if (huge_fd < 0) {
1307
- fprintf(stderr, "Open of %s failed", argv[3]);
1308
- perror("open");
1309
- exit(1);
1310
- }
1311
- if (ftruncate(huge_fd, 0)) {
1312
- fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
1313
- perror("ftruncate");
1314
- exit(1);
1315
- }
1522
+ if (huge_fd < 0)
1523
+ err("Open of %s failed", argv[4]);
1524
+ if (ftruncate(huge_fd, 0))
1525
+ err("ftruncate %s to size 0 failed", argv[4]);
1526
+ } else if (test_type == TEST_SHMEM) {
1527
+ shm_fd = memfd_create(argv[0], 0);
1528
+ if (shm_fd < 0)
1529
+ err("memfd_create");
1530
+ if (ftruncate(shm_fd, nr_pages * page_size * 2))
1531
+ err("ftruncate");
1532
+ if (fallocate(shm_fd,
1533
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
1534
+ nr_pages * page_size * 2))
1535
+ err("fallocate");
13161536 }
13171537 printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
13181538 nr_pages, nr_pages_per_cpu);