| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * fsgsbase.c, an fsgsbase test |
|---|
| 3 | 4 | * Copyright (c) 2014-2016 Andy Lutomirski |
|---|
| 4 | | - * GPL v2 |
|---|
| 5 | 5 | */ |
|---|
| 6 | 6 | |
|---|
| 7 | 7 | #define _GNU_SOURCE |
|---|
| .. | .. |
|---|
| 23 | 23 | #include <pthread.h> |
|---|
| 24 | 24 | #include <asm/ldt.h> |
|---|
| 25 | 25 | #include <sys/mman.h> |
|---|
| 26 | +#include <stddef.h> |
|---|
| 27 | +#include <sys/ptrace.h> |
|---|
| 28 | +#include <sys/wait.h> |
|---|
| 29 | +#include <setjmp.h> |
|---|
| 26 | 30 | |
|---|
| 27 | 31 | #ifndef __x86_64__ |
|---|
| 28 | 32 | # error This test is 64-bit only |
|---|
| .. | .. |
|---|
| 30 | 34 | |
|---|
| 31 | 35 | static volatile sig_atomic_t want_segv; |
|---|
| 32 | 36 | static volatile unsigned long segv_addr; |
|---|
| 37 | + |
|---|
| 38 | +static unsigned short *shared_scratch; |
|---|
| 33 | 39 | |
|---|
| 34 | 40 | static int nerrs; |
|---|
| 35 | 41 | |
|---|
| .. | .. |
|---|
| 69 | 75 | |
|---|
| 70 | 76 | ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */ |
|---|
| 71 | 77 | |
|---|
| 78 | +} |
|---|
| 79 | + |
|---|
| 80 | +static jmp_buf jmpbuf; |
|---|
| 81 | + |
|---|
| 82 | +static void sigill(int sig, siginfo_t *si, void *ctx_void) |
|---|
| 83 | +{ |
|---|
| 84 | + siglongjmp(jmpbuf, 1); |
|---|
| 85 | +} |
|---|
| 86 | + |
|---|
| 87 | +static bool have_fsgsbase; |
|---|
| 88 | + |
|---|
| 89 | +static inline unsigned long rdgsbase(void) |
|---|
| 90 | +{ |
|---|
| 91 | + unsigned long gsbase; |
|---|
| 92 | + |
|---|
| 93 | + asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory"); |
|---|
| 94 | + |
|---|
| 95 | + return gsbase; |
|---|
| 96 | +} |
|---|
| 97 | + |
|---|
| 98 | +static inline unsigned long rdfsbase(void) |
|---|
| 99 | +{ |
|---|
| 100 | + unsigned long fsbase; |
|---|
| 101 | + |
|---|
| 102 | + asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory"); |
|---|
| 103 | + |
|---|
| 104 | + return fsbase; |
|---|
| 105 | +} |
|---|
| 106 | + |
|---|
| 107 | +static inline void wrgsbase(unsigned long gsbase) |
|---|
| 108 | +{ |
|---|
| 109 | + asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); |
|---|
| 110 | +} |
|---|
| 111 | + |
|---|
| 112 | +static inline void wrfsbase(unsigned long fsbase) |
|---|
| 113 | +{ |
|---|
| 114 | + asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); |
|---|
| 72 | 115 | } |
|---|
| 73 | 116 | |
|---|
| 74 | 117 | enum which_base { FS, GS }; |
|---|
| .. | .. |
|---|
| 199 | 242 | to_set, hard_zero ? " and clear gs" : "", sel); |
|---|
| 200 | 243 | } |
|---|
| 201 | 244 | |
|---|
| 202 | | -void do_unexpected_base(void) |
|---|
| 245 | +static __thread int set_thread_area_entry_number = -1; |
|---|
| 246 | + |
|---|
| 247 | +static unsigned short load_gs(void) |
|---|
| 203 | 248 | { |
|---|
| 204 | 249 | /* |
|---|
| 205 | | - * The goal here is to try to arrange for GS == 0, GSBASE != |
|---|
| 206 | | - * 0, and for the the kernel the think that GSBASE == 0. |
|---|
| 207 | | - * |
|---|
| 208 | | - * To make the test as reliable as possible, this uses |
|---|
| 209 | | - * explicit descriptorss. (This is not the only way. This |
|---|
| 210 | | - * could use ARCH_SET_GS with a low, nonzero base, but the |
|---|
| 211 | | - * relevant side effect of ARCH_SET_GS could change.) |
|---|
| 250 | + * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think |
|---|
| 251 | + * that GSBASE == 0 (i.e. thread.gsbase == 0). |
|---|
| 212 | 252 | */ |
|---|
| 213 | 253 | |
|---|
| 214 | 254 | /* Step 1: tell the kernel that we have GSBASE == 0. */ |
|---|
| .. | .. |
|---|
| 228 | 268 | .useable = 0 |
|---|
| 229 | 269 | }; |
|---|
| 230 | 270 | if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { |
|---|
| 231 | | - printf("\tother thread: using LDT slot 0\n"); |
|---|
| 271 | + printf("\tusing LDT slot 0\n"); |
|---|
| 232 | 272 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); |
|---|
| 273 | + return 0x7; |
|---|
| 233 | 274 | } else { |
|---|
| 234 | 275 | /* No modify_ldt for us (configured out, perhaps) */ |
|---|
| 235 | 276 | |
|---|
| .. | .. |
|---|
| 239 | 280 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); |
|---|
| 240 | 281 | memcpy(low_desc, &desc, sizeof(desc)); |
|---|
| 241 | 282 | |
|---|
| 242 | | - low_desc->entry_number = -1; |
|---|
| 283 | + low_desc->entry_number = set_thread_area_entry_number; |
|---|
| 243 | 284 | |
|---|
| 244 | 285 | /* 32-bit set_thread_area */ |
|---|
| 245 | 286 | long ret; |
|---|
| 246 | 287 | asm volatile ("int $0x80" |
|---|
| 247 | | - : "=a" (ret) : "a" (243), "b" (low_desc) |
|---|
| 288 | + : "=a" (ret), "+m" (*low_desc) |
|---|
| 289 | + : "a" (243), "b" (low_desc) |
|---|
| 248 | 290 | : "r8", "r9", "r10", "r11"); |
|---|
| 249 | 291 | memcpy(&desc, low_desc, sizeof(desc)); |
|---|
| 250 | 292 | munmap(low_desc, sizeof(desc)); |
|---|
| 251 | 293 | |
|---|
| 252 | 294 | if (ret != 0) { |
|---|
| 253 | 295 | printf("[NOTE]\tcould not create a segment -- test won't do anything\n"); |
|---|
| 254 | | - return; |
|---|
| 296 | + return 0; |
|---|
| 255 | 297 | } |
|---|
| 256 | | - printf("\tother thread: using GDT slot %d\n", desc.entry_number); |
|---|
| 257 | | - asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3))); |
|---|
| 298 | + printf("\tusing GDT slot %d\n", desc.entry_number); |
|---|
| 299 | + set_thread_area_entry_number = desc.entry_number; |
|---|
| 300 | + |
|---|
| 301 | + unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3); |
|---|
| 302 | + asm volatile ("mov %0, %%gs" : : "rm" (gs)); |
|---|
| 303 | + return gs; |
|---|
| 258 | 304 | } |
|---|
| 305 | +} |
|---|
| 259 | 306 | |
|---|
| 260 | | - /* |
|---|
| 261 | | - * Step 3: set the selector back to zero. On AMD chips, this will |
|---|
| 262 | | - * preserve GSBASE. |
|---|
| 263 | | - */ |
|---|
| 307 | +void test_wrbase(unsigned short index, unsigned long base) |
|---|
| 308 | +{ |
|---|
| 309 | + unsigned short newindex; |
|---|
| 310 | + unsigned long newbase; |
|---|
| 264 | 311 | |
|---|
| 265 | | - asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); |
|---|
| 312 | + printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base); |
|---|
| 313 | + |
|---|
| 314 | + asm volatile ("mov %0, %%gs" : : "rm" (index)); |
|---|
| 315 | + wrgsbase(base); |
|---|
| 316 | + |
|---|
| 317 | + remote_base = 0; |
|---|
| 318 | + ftx = 1; |
|---|
| 319 | + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); |
|---|
| 320 | + while (ftx != 0) |
|---|
| 321 | + syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0); |
|---|
| 322 | + |
|---|
| 323 | + asm volatile ("mov %%gs, %0" : "=rm" (newindex)); |
|---|
| 324 | + newbase = rdgsbase(); |
|---|
| 325 | + |
|---|
| 326 | + if (newindex == index && newbase == base) { |
|---|
| 327 | + printf("[OK]\tIndex and base were preserved\n"); |
|---|
| 328 | + } else { |
|---|
| 329 | + printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n", |
|---|
| 330 | + newindex, newbase); |
|---|
| 331 | + nerrs++; |
|---|
| 332 | + } |
|---|
| 266 | 333 | } |
|---|
| 267 | 334 | |
|---|
| 268 | 335 | static void *threadproc(void *ctx) |
|---|
| .. | .. |
|---|
| 273 | 340 | if (ftx == 3) |
|---|
| 274 | 341 | return NULL; |
|---|
| 275 | 342 | |
|---|
| 276 | | - if (ftx == 1) |
|---|
| 343 | + if (ftx == 1) { |
|---|
| 277 | 344 | do_remote_base(); |
|---|
| 278 | | - else if (ftx == 2) |
|---|
| 279 | | - do_unexpected_base(); |
|---|
| 280 | | - else |
|---|
| 345 | + } else if (ftx == 2) { |
|---|
| 346 | + /* |
|---|
| 347 | + * On AMD chips, this causes GSBASE != 0, GS == 0, and |
|---|
| 348 | + * thread.gsbase == 0. |
|---|
| 349 | + */ |
|---|
| 350 | + |
|---|
| 351 | + load_gs(); |
|---|
| 352 | + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); |
|---|
| 353 | + } else { |
|---|
| 281 | 354 | errx(1, "helper thread got bad command"); |
|---|
| 355 | + } |
|---|
| 282 | 356 | |
|---|
| 283 | 357 | ftx = 0; |
|---|
| 284 | 358 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); |
|---|
| .. | .. |
|---|
| 367 | 441 | } |
|---|
| 368 | 442 | } |
|---|
| 369 | 443 | |
|---|
| 444 | +#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) |
|---|
| 445 | + |
|---|
| 446 | +static void test_ptrace_write_gs_read_base(void) |
|---|
| 447 | +{ |
|---|
| 448 | + int status; |
|---|
| 449 | + pid_t child = fork(); |
|---|
| 450 | + |
|---|
| 451 | + if (child < 0) |
|---|
| 452 | + err(1, "fork"); |
|---|
| 453 | + |
|---|
| 454 | + if (child == 0) { |
|---|
| 455 | + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); |
|---|
| 456 | + |
|---|
| 457 | + printf("[RUN]\tARCH_SET_GS to 1\n"); |
|---|
| 458 | + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) |
|---|
| 459 | + err(1, "ARCH_SET_GS"); |
|---|
| 460 | + |
|---|
| 461 | + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) |
|---|
| 462 | + err(1, "PTRACE_TRACEME"); |
|---|
| 463 | + |
|---|
| 464 | + raise(SIGTRAP); |
|---|
| 465 | + _exit(0); |
|---|
| 466 | + } |
|---|
| 467 | + |
|---|
| 468 | + wait(&status); |
|---|
| 469 | + |
|---|
| 470 | + if (WSTOPSIG(status) == SIGTRAP) { |
|---|
| 471 | + unsigned long base; |
|---|
| 472 | + unsigned long gs_offset = USER_REGS_OFFSET(gs); |
|---|
| 473 | + unsigned long base_offset = USER_REGS_OFFSET(gs_base); |
|---|
| 474 | + |
|---|
| 475 | + /* Read the initial base. It should be 1. */ |
|---|
| 476 | + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); |
|---|
| 477 | + if (base == 1) { |
|---|
| 478 | + printf("[OK]\tGSBASE started at 1\n"); |
|---|
| 479 | + } else { |
|---|
| 480 | + nerrs++; |
|---|
| 481 | + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); |
|---|
| 482 | + } |
|---|
| 483 | + |
|---|
| 484 | + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); |
|---|
| 485 | + |
|---|
| 486 | + /* Poke an LDT selector into GS. */ |
|---|
| 487 | + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) |
|---|
| 488 | + err(1, "PTRACE_POKEUSER"); |
|---|
| 489 | + |
|---|
| 490 | + /* And read the base. */ |
|---|
| 491 | + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); |
|---|
| 492 | + |
|---|
| 493 | + if (base == 0 || base == 1) { |
|---|
| 494 | + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); |
|---|
| 495 | + } else { |
|---|
| 496 | + nerrs++; |
|---|
| 497 | + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); |
|---|
| 498 | + } |
|---|
| 499 | + } |
|---|
| 500 | + |
|---|
| 501 | + ptrace(PTRACE_CONT, child, NULL, NULL); |
|---|
| 502 | + |
|---|
| 503 | + wait(&status); |
|---|
| 504 | + if (!WIFEXITED(status)) |
|---|
| 505 | + printf("[WARN]\tChild didn't exit cleanly.\n"); |
|---|
| 506 | +} |
|---|
| 507 | + |
|---|
| 508 | +static void test_ptrace_write_gsbase(void) |
|---|
| 509 | +{ |
|---|
| 510 | + int status; |
|---|
| 511 | + pid_t child = fork(); |
|---|
| 512 | + |
|---|
| 513 | + if (child < 0) |
|---|
| 514 | + err(1, "fork"); |
|---|
| 515 | + |
|---|
| 516 | + if (child == 0) { |
|---|
| 517 | + printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n"); |
|---|
| 518 | + |
|---|
| 519 | + *shared_scratch = load_gs(); |
|---|
| 520 | + |
|---|
| 521 | + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) |
|---|
| 522 | + err(1, "PTRACE_TRACEME"); |
|---|
| 523 | + |
|---|
| 524 | + raise(SIGTRAP); |
|---|
| 525 | + _exit(0); |
|---|
| 526 | + } |
|---|
| 527 | + |
|---|
| 528 | + wait(&status); |
|---|
| 529 | + |
|---|
| 530 | + if (WSTOPSIG(status) == SIGTRAP) { |
|---|
| 531 | + unsigned long gs, base; |
|---|
| 532 | + unsigned long gs_offset = USER_REGS_OFFSET(gs); |
|---|
| 533 | + unsigned long base_offset = USER_REGS_OFFSET(gs_base); |
|---|
| 534 | + |
|---|
| 535 | + gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL); |
|---|
| 536 | + |
|---|
| 537 | + if (gs != *shared_scratch) { |
|---|
| 538 | + nerrs++; |
|---|
| 539 | + printf("[FAIL]\tGS is not prepared with nonzero\n"); |
|---|
| 540 | + goto END; |
|---|
| 541 | + } |
|---|
| 542 | + |
|---|
| 543 | + if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0) |
|---|
| 544 | + err(1, "PTRACE_POKEUSER"); |
|---|
| 545 | + |
|---|
| 546 | + gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL); |
|---|
| 547 | + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); |
|---|
| 548 | + |
|---|
| 549 | + /* |
|---|
| 550 | + * In a non-FSGSBASE system, the nonzero selector will load |
|---|
| 551 | + * GSBASE (again). But what is tested here is whether the |
|---|
| 552 | + * selector value is changed or not by the GSBASE write in |
|---|
| 553 | + * a ptracer. |
|---|
| 554 | + */ |
|---|
| 555 | + if (gs != *shared_scratch) { |
|---|
| 556 | + nerrs++; |
|---|
| 557 | + printf("[FAIL]\tGS changed to %lx\n", gs); |
|---|
| 558 | + |
|---|
| 559 | + /* |
|---|
| 560 | + * On older kernels, poking a nonzero value into the |
|---|
| 561 | + * base would zero the selector. On newer kernels, |
|---|
| 562 | + * this behavior has changed -- poking the base |
|---|
| 563 | + * changes only the base and, if FSGSBASE is not |
|---|
| 564 | + * available, this may have no effect once the tracee |
|---|
| 565 | + * is resumed. |
|---|
| 566 | + */ |
|---|
| 567 | + if (gs == 0) |
|---|
| 568 | + printf("\tNote: this is expected behavior on older kernels.\n"); |
|---|
| 569 | + } else if (have_fsgsbase && (base != 0xFF)) { |
|---|
| 570 | + nerrs++; |
|---|
| 571 | + printf("[FAIL]\tGSBASE changed to %lx\n", base); |
|---|
| 572 | + } else { |
|---|
| 573 | + printf("[OK]\tGS remained 0x%hx", *shared_scratch); |
|---|
| 574 | + if (have_fsgsbase) |
|---|
| 575 | + printf(" and GSBASE changed to 0xFF"); |
|---|
| 576 | + printf("\n"); |
|---|
| 577 | + } |
|---|
| 578 | + } |
|---|
| 579 | + |
|---|
| 580 | +END: |
|---|
| 581 | + ptrace(PTRACE_CONT, child, NULL, NULL); |
|---|
| 582 | + wait(&status); |
|---|
| 583 | + if (!WIFEXITED(status)) |
|---|
| 584 | + printf("[WARN]\tChild didn't exit cleanly.\n"); |
|---|
| 585 | +} |
|---|
| 586 | + |
|---|
| 370 | 587 | int main() |
|---|
| 371 | 588 | { |
|---|
| 372 | 589 | pthread_t thread; |
|---|
| 590 | + |
|---|
| 591 | + shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, |
|---|
| 592 | + MAP_ANONYMOUS | MAP_SHARED, -1, 0); |
|---|
| 593 | + |
|---|
| 594 | + /* Do these tests before we have an LDT. */ |
|---|
| 595 | + test_ptrace_write_gs_read_base(); |
|---|
| 596 | + |
|---|
| 597 | + /* Probe FSGSBASE */ |
|---|
| 598 | + sethandler(SIGILL, sigill, 0); |
|---|
| 599 | + if (sigsetjmp(jmpbuf, 1) == 0) { |
|---|
| 600 | + rdfsbase(); |
|---|
| 601 | + have_fsgsbase = true; |
|---|
| 602 | + printf("\tFSGSBASE instructions are enabled\n"); |
|---|
| 603 | + } else { |
|---|
| 604 | + printf("\tFSGSBASE instructions are disabled\n"); |
|---|
| 605 | + } |
|---|
| 606 | + clearhandler(SIGILL); |
|---|
| 373 | 607 | |
|---|
| 374 | 608 | sethandler(SIGSEGV, sigsegv, 0); |
|---|
| 375 | 609 | |
|---|
| .. | .. |
|---|
| 417 | 651 | |
|---|
| 418 | 652 | test_unexpected_base(); |
|---|
| 419 | 653 | |
|---|
| 654 | + if (have_fsgsbase) { |
|---|
| 655 | + unsigned short ss; |
|---|
| 656 | + |
|---|
| 657 | + asm volatile ("mov %%ss, %0" : "=rm" (ss)); |
|---|
| 658 | + |
|---|
| 659 | + test_wrbase(0, 0); |
|---|
| 660 | + test_wrbase(0, 1); |
|---|
| 661 | + test_wrbase(0, 0x200000000); |
|---|
| 662 | + test_wrbase(0, 0xffffffffffffffff); |
|---|
| 663 | + test_wrbase(ss, 0); |
|---|
| 664 | + test_wrbase(ss, 1); |
|---|
| 665 | + test_wrbase(ss, 0x200000000); |
|---|
| 666 | + test_wrbase(ss, 0xffffffffffffffff); |
|---|
| 667 | + } |
|---|
| 668 | + |
|---|
| 420 | 669 | ftx = 3; /* Kill the thread. */ |
|---|
| 421 | 670 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); |
|---|
| 422 | 671 | |
|---|
| 423 | 672 | if (pthread_join(thread, NULL) != 0) |
|---|
| 424 | 673 | err(1, "pthread_join"); |
|---|
| 425 | 674 | |
|---|
| 675 | + test_ptrace_write_gsbase(); |
|---|
| 676 | + |
|---|
| 426 | 677 | return nerrs == 0 ? 0 : 1; |
|---|
| 427 | 678 | } |
|---|