hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/tools/testing/selftests/x86/fsgsbase.c
....@@ -1,7 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * fsgsbase.c, an fsgsbase test
34 * Copyright (c) 2014-2016 Andy Lutomirski
4
- * GPL v2
55 */
66
77 #define _GNU_SOURCE
....@@ -23,6 +23,10 @@
2323 #include <pthread.h>
2424 #include <asm/ldt.h>
2525 #include <sys/mman.h>
26
+#include <stddef.h>
27
+#include <sys/ptrace.h>
28
+#include <sys/wait.h>
29
+#include <setjmp.h>
2630
2731 #ifndef __x86_64__
2832 # error This test is 64-bit only
....@@ -30,6 +34,8 @@
3034
3135 static volatile sig_atomic_t want_segv;
3236 static volatile unsigned long segv_addr;
37
+
38
+static unsigned short *shared_scratch;
3339
3440 static int nerrs;
3541
....@@ -69,6 +75,43 @@
6975
7076 ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */
7177
78
+}
79
+
80
+static jmp_buf jmpbuf;
81
+
82
+static void sigill(int sig, siginfo_t *si, void *ctx_void)
83
+{
84
+ siglongjmp(jmpbuf, 1);
85
+}
86
+
87
+static bool have_fsgsbase;
88
+
89
+static inline unsigned long rdgsbase(void)
90
+{
91
+ unsigned long gsbase;
92
+
93
+ asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
94
+
95
+ return gsbase;
96
+}
97
+
98
+static inline unsigned long rdfsbase(void)
99
+{
100
+ unsigned long fsbase;
101
+
102
+ asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
103
+
104
+ return fsbase;
105
+}
106
+
107
+static inline void wrgsbase(unsigned long gsbase)
108
+{
109
+ asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
110
+}
111
+
112
+static inline void wrfsbase(unsigned long fsbase)
113
+{
114
+ asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
72115 }
73116
74117 enum which_base { FS, GS };
....@@ -199,16 +242,13 @@
199242 to_set, hard_zero ? " and clear gs" : "", sel);
200243 }
201244
202
-void do_unexpected_base(void)
245
+static __thread int set_thread_area_entry_number = -1;
246
+
247
+static unsigned short load_gs(void)
203248 {
204249 /*
205
- * The goal here is to try to arrange for GS == 0, GSBASE !=
206
- * 0, and for the the kernel the think that GSBASE == 0.
207
- *
208
- * To make the test as reliable as possible, this uses
209
- * explicit descriptorss. (This is not the only way. This
210
- * could use ARCH_SET_GS with a low, nonzero base, but the
211
- * relevant side effect of ARCH_SET_GS could change.)
250
+ * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
251
+ * that GSBASE == 0 (i.e. thread.gsbase == 0).
212252 */
213253
214254 /* Step 1: tell the kernel that we have GSBASE == 0. */
....@@ -228,8 +268,9 @@
228268 .useable = 0
229269 };
230270 if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
231
- printf("\tother thread: using LDT slot 0\n");
271
+ printf("\tusing LDT slot 0\n");
232272 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
273
+ return 0x7;
233274 } else {
234275 /* No modify_ldt for us (configured out, perhaps) */
235276
....@@ -239,30 +280,56 @@
239280 MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
240281 memcpy(low_desc, &desc, sizeof(desc));
241282
242
- low_desc->entry_number = -1;
283
+ low_desc->entry_number = set_thread_area_entry_number;
243284
244285 /* 32-bit set_thread_area */
245286 long ret;
246287 asm volatile ("int $0x80"
247
- : "=a" (ret) : "a" (243), "b" (low_desc)
288
+ : "=a" (ret), "+m" (*low_desc)
289
+ : "a" (243), "b" (low_desc)
248290 : "r8", "r9", "r10", "r11");
249291 memcpy(&desc, low_desc, sizeof(desc));
250292 munmap(low_desc, sizeof(desc));
251293
252294 if (ret != 0) {
253295 printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
254
- return;
296
+ return 0;
255297 }
256
- printf("\tother thread: using GDT slot %d\n", desc.entry_number);
257
- asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
298
+ printf("\tusing GDT slot %d\n", desc.entry_number);
299
+ set_thread_area_entry_number = desc.entry_number;
300
+
301
+ unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
302
+ asm volatile ("mov %0, %%gs" : : "rm" (gs));
303
+ return gs;
258304 }
305
+}
259306
260
- /*
261
- * Step 3: set the selector back to zero. On AMD chips, this will
262
- * preserve GSBASE.
263
- */
307
+void test_wrbase(unsigned short index, unsigned long base)
308
+{
309
+ unsigned short newindex;
310
+ unsigned long newbase;
264311
265
- asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
312
+ printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
313
+
314
+ asm volatile ("mov %0, %%gs" : : "rm" (index));
315
+ wrgsbase(base);
316
+
317
+ remote_base = 0;
318
+ ftx = 1;
319
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
320
+ while (ftx != 0)
321
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
322
+
323
+ asm volatile ("mov %%gs, %0" : "=rm" (newindex));
324
+ newbase = rdgsbase();
325
+
326
+ if (newindex == index && newbase == base) {
327
+ printf("[OK]\tIndex and base were preserved\n");
328
+ } else {
329
+ printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
330
+ newindex, newbase);
331
+ nerrs++;
332
+ }
266333 }
267334
268335 static void *threadproc(void *ctx)
....@@ -273,12 +340,19 @@
273340 if (ftx == 3)
274341 return NULL;
275342
276
- if (ftx == 1)
343
+ if (ftx == 1) {
277344 do_remote_base();
278
- else if (ftx == 2)
279
- do_unexpected_base();
280
- else
345
+ } else if (ftx == 2) {
346
+ /*
347
+ * On AMD chips, this causes GSBASE != 0, GS == 0, and
348
+ * thread.gsbase == 0.
349
+ */
350
+
351
+ load_gs();
352
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
353
+ } else {
281354 errx(1, "helper thread got bad command");
355
+ }
282356
283357 ftx = 0;
284358 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
....@@ -367,9 +441,169 @@
367441 }
368442 }
369443
444
+#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
445
+
446
+static void test_ptrace_write_gs_read_base(void)
447
+{
448
+ int status;
449
+ pid_t child = fork();
450
+
451
+ if (child < 0)
452
+ err(1, "fork");
453
+
454
+ if (child == 0) {
455
+ printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
456
+
457
+ printf("[RUN]\tARCH_SET_GS to 1\n");
458
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
459
+ err(1, "ARCH_SET_GS");
460
+
461
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
462
+ err(1, "PTRACE_TRACEME");
463
+
464
+ raise(SIGTRAP);
465
+ _exit(0);
466
+ }
467
+
468
+ wait(&status);
469
+
470
+ if (WSTOPSIG(status) == SIGTRAP) {
471
+ unsigned long base;
472
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
473
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
474
+
475
+ /* Read the initial base. It should be 1. */
476
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
477
+ if (base == 1) {
478
+ printf("[OK]\tGSBASE started at 1\n");
479
+ } else {
480
+ nerrs++;
481
+ printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
482
+ }
483
+
484
+ printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
485
+
486
+ /* Poke an LDT selector into GS. */
487
+ if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
488
+ err(1, "PTRACE_POKEUSER");
489
+
490
+ /* And read the base. */
491
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
492
+
493
+ if (base == 0 || base == 1) {
494
+ printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
495
+ } else {
496
+ nerrs++;
497
+ printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
498
+ }
499
+ }
500
+
501
+ ptrace(PTRACE_CONT, child, NULL, NULL);
502
+
503
+ wait(&status);
504
+ if (!WIFEXITED(status))
505
+ printf("[WARN]\tChild didn't exit cleanly.\n");
506
+}
507
+
508
+static void test_ptrace_write_gsbase(void)
509
+{
510
+ int status;
511
+ pid_t child = fork();
512
+
513
+ if (child < 0)
514
+ err(1, "fork");
515
+
516
+ if (child == 0) {
517
+ printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
518
+
519
+ *shared_scratch = load_gs();
520
+
521
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
522
+ err(1, "PTRACE_TRACEME");
523
+
524
+ raise(SIGTRAP);
525
+ _exit(0);
526
+ }
527
+
528
+ wait(&status);
529
+
530
+ if (WSTOPSIG(status) == SIGTRAP) {
531
+ unsigned long gs, base;
532
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
533
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
534
+
535
+ gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
536
+
537
+ if (gs != *shared_scratch) {
538
+ nerrs++;
539
+ printf("[FAIL]\tGS is not prepared with nonzero\n");
540
+ goto END;
541
+ }
542
+
543
+ if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
544
+ err(1, "PTRACE_POKEUSER");
545
+
546
+ gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
547
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
548
+
549
+ /*
550
+ * In a non-FSGSBASE system, the nonzero selector will load
551
+ * GSBASE (again). But what is tested here is whether the
552
+ * selector value is changed or not by the GSBASE write in
553
+ * a ptracer.
554
+ */
555
+ if (gs != *shared_scratch) {
556
+ nerrs++;
557
+ printf("[FAIL]\tGS changed to %lx\n", gs);
558
+
559
+ /*
560
+ * On older kernels, poking a nonzero value into the
561
+ * base would zero the selector. On newer kernels,
562
+ * this behavior has changed -- poking the base
563
+ * changes only the base and, if FSGSBASE is not
564
+ * available, this may have no effect once the tracee
565
+ * is resumed.
566
+ */
567
+ if (gs == 0)
568
+ printf("\tNote: this is expected behavior on older kernels.\n");
569
+ } else if (have_fsgsbase && (base != 0xFF)) {
570
+ nerrs++;
571
+ printf("[FAIL]\tGSBASE changed to %lx\n", base);
572
+ } else {
573
+ printf("[OK]\tGS remained 0x%hx", *shared_scratch);
574
+ if (have_fsgsbase)
575
+ printf(" and GSBASE changed to 0xFF");
576
+ printf("\n");
577
+ }
578
+ }
579
+
580
+END:
581
+ ptrace(PTRACE_CONT, child, NULL, NULL);
582
+ wait(&status);
583
+ if (!WIFEXITED(status))
584
+ printf("[WARN]\tChild didn't exit cleanly.\n");
585
+}
586
+
370587 int main()
371588 {
372589 pthread_t thread;
590
+
591
+ shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
592
+ MAP_ANONYMOUS | MAP_SHARED, -1, 0);
593
+
594
+ /* Do these tests before we have an LDT. */
595
+ test_ptrace_write_gs_read_base();
596
+
597
+ /* Probe FSGSBASE */
598
+ sethandler(SIGILL, sigill, 0);
599
+ if (sigsetjmp(jmpbuf, 1) == 0) {
600
+ rdfsbase();
601
+ have_fsgsbase = true;
602
+ printf("\tFSGSBASE instructions are enabled\n");
603
+ } else {
604
+ printf("\tFSGSBASE instructions are disabled\n");
605
+ }
606
+ clearhandler(SIGILL);
373607
374608 sethandler(SIGSEGV, sigsegv, 0);
375609
....@@ -417,11 +651,28 @@
417651
418652 test_unexpected_base();
419653
654
+ if (have_fsgsbase) {
655
+ unsigned short ss;
656
+
657
+ asm volatile ("mov %%ss, %0" : "=rm" (ss));
658
+
659
+ test_wrbase(0, 0);
660
+ test_wrbase(0, 1);
661
+ test_wrbase(0, 0x200000000);
662
+ test_wrbase(0, 0xffffffffffffffff);
663
+ test_wrbase(ss, 0);
664
+ test_wrbase(ss, 1);
665
+ test_wrbase(ss, 0x200000000);
666
+ test_wrbase(ss, 0xffffffffffffffff);
667
+ }
668
+
420669 ftx = 3; /* Kill the thread. */
421670 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
422671
423672 if (pthread_join(thread, NULL) != 0)
424673 err(1, "pthread_join");
425674
675
+ test_ptrace_write_gsbase();
676
+
426677 return nerrs == 0 ? 0 : 1;
427678 }