.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * fsgsbase.c, an fsgsbase test |
---|
3 | 4 | * Copyright (c) 2014-2016 Andy Lutomirski |
---|
4 | | - * GPL v2 |
---|
5 | 5 | */ |
---|
6 | 6 | |
---|
7 | 7 | #define _GNU_SOURCE |
---|
.. | .. |
---|
23 | 23 | #include <pthread.h> |
---|
24 | 24 | #include <asm/ldt.h> |
---|
25 | 25 | #include <sys/mman.h> |
---|
| 26 | +#include <stddef.h> |
---|
| 27 | +#include <sys/ptrace.h> |
---|
| 28 | +#include <sys/wait.h> |
---|
| 29 | +#include <setjmp.h> |
---|
26 | 30 | |
---|
27 | 31 | #ifndef __x86_64__ |
---|
28 | 32 | # error This test is 64-bit only |
---|
.. | .. |
---|
30 | 34 | |
---|
31 | 35 | static volatile sig_atomic_t want_segv; |
---|
32 | 36 | static volatile unsigned long segv_addr; |
---|
| 37 | + |
---|
| 38 | +static unsigned short *shared_scratch; |
---|
33 | 39 | |
---|
34 | 40 | static int nerrs; |
---|
35 | 41 | |
---|
.. | .. |
---|
69 | 75 | |
---|
70 | 76 | ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */ |
---|
71 | 77 | |
---|
| 78 | +} |
---|
| 79 | + |
---|
| 80 | +static jmp_buf jmpbuf; |
---|
| 81 | + |
---|
| 82 | +static void sigill(int sig, siginfo_t *si, void *ctx_void) |
---|
| 83 | +{ |
---|
| 84 | + siglongjmp(jmpbuf, 1); |
---|
| 85 | +} |
---|
| 86 | + |
---|
| 87 | +static bool have_fsgsbase; |
---|
| 88 | + |
---|
| 89 | +static inline unsigned long rdgsbase(void) |
---|
| 90 | +{ |
---|
| 91 | + unsigned long gsbase; |
---|
| 92 | + |
---|
| 93 | + asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory"); |
---|
| 94 | + |
---|
| 95 | + return gsbase; |
---|
| 96 | +} |
---|
| 97 | + |
---|
| 98 | +static inline unsigned long rdfsbase(void) |
---|
| 99 | +{ |
---|
| 100 | + unsigned long fsbase; |
---|
| 101 | + |
---|
| 102 | + asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory"); |
---|
| 103 | + |
---|
| 104 | + return fsbase; |
---|
| 105 | +} |
---|
| 106 | + |
---|
| 107 | +static inline void wrgsbase(unsigned long gsbase) |
---|
| 108 | +{ |
---|
| 109 | + asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); |
---|
| 110 | +} |
---|
| 111 | + |
---|
| 112 | +static inline void wrfsbase(unsigned long fsbase) |
---|
| 113 | +{ |
---|
| 114 | + asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); |
---|
72 | 115 | } |
---|
73 | 116 | |
---|
74 | 117 | enum which_base { FS, GS }; |
---|
.. | .. |
---|
199 | 242 | to_set, hard_zero ? " and clear gs" : "", sel); |
---|
200 | 243 | } |
---|
201 | 244 | |
---|
202 | | -void do_unexpected_base(void) |
---|
| 245 | +static __thread int set_thread_area_entry_number = -1; |
---|
| 246 | + |
---|
| 247 | +static unsigned short load_gs(void) |
---|
203 | 248 | { |
---|
204 | 249 | /* |
---|
205 | | - * The goal here is to try to arrange for GS == 0, GSBASE != |
---|
206 | | - * 0, and for the the kernel the think that GSBASE == 0. |
---|
207 | | - * |
---|
208 | | - * To make the test as reliable as possible, this uses |
---|
209 | | - * explicit descriptorss. (This is not the only way. This |
---|
210 | | - * could use ARCH_SET_GS with a low, nonzero base, but the |
---|
211 | | - * relevant side effect of ARCH_SET_GS could change.) |
---|
| 250 | + * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think |
---|
| 251 | + * that GSBASE == 0 (i.e. thread.gsbase == 0). |
---|
212 | 252 | */ |
---|
213 | 253 | |
---|
214 | 254 | /* Step 1: tell the kernel that we have GSBASE == 0. */ |
---|
.. | .. |
---|
228 | 268 | .useable = 0 |
---|
229 | 269 | }; |
---|
230 | 270 | if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { |
---|
231 | | - printf("\tother thread: using LDT slot 0\n"); |
---|
| 271 | + printf("\tusing LDT slot 0\n"); |
---|
232 | 272 | asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7)); |
---|
| 273 | + return 0x7; |
---|
233 | 274 | } else { |
---|
234 | 275 | /* No modify_ldt for us (configured out, perhaps) */ |
---|
235 | 276 | |
---|
.. | .. |
---|
239 | 280 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); |
---|
240 | 281 | memcpy(low_desc, &desc, sizeof(desc)); |
---|
241 | 282 | |
---|
242 | | - low_desc->entry_number = -1; |
---|
| 283 | + low_desc->entry_number = set_thread_area_entry_number; |
---|
243 | 284 | |
---|
244 | 285 | /* 32-bit set_thread_area */ |
---|
245 | 286 | long ret; |
---|
246 | 287 | asm volatile ("int $0x80" |
---|
247 | | - : "=a" (ret) : "a" (243), "b" (low_desc) |
---|
| 288 | + : "=a" (ret), "+m" (*low_desc) |
---|
| 289 | + : "a" (243), "b" (low_desc) |
---|
248 | 290 | : "r8", "r9", "r10", "r11"); |
---|
249 | 291 | memcpy(&desc, low_desc, sizeof(desc)); |
---|
250 | 292 | munmap(low_desc, sizeof(desc)); |
---|
251 | 293 | |
---|
252 | 294 | if (ret != 0) { |
---|
253 | 295 | printf("[NOTE]\tcould not create a segment -- test won't do anything\n"); |
---|
254 | | - return; |
---|
| 296 | + return 0; |
---|
255 | 297 | } |
---|
256 | | - printf("\tother thread: using GDT slot %d\n", desc.entry_number); |
---|
257 | | - asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3))); |
---|
| 298 | + printf("\tusing GDT slot %d\n", desc.entry_number); |
---|
| 299 | + set_thread_area_entry_number = desc.entry_number; |
---|
| 300 | + |
---|
| 301 | + unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3); |
---|
| 302 | + asm volatile ("mov %0, %%gs" : : "rm" (gs)); |
---|
| 303 | + return gs; |
---|
258 | 304 | } |
---|
| 305 | +} |
---|
259 | 306 | |
---|
260 | | - /* |
---|
261 | | - * Step 3: set the selector back to zero. On AMD chips, this will |
---|
262 | | - * preserve GSBASE. |
---|
263 | | - */ |
---|
| 307 | +void test_wrbase(unsigned short index, unsigned long base) |
---|
| 308 | +{ |
---|
| 309 | + unsigned short newindex; |
---|
| 310 | + unsigned long newbase; |
---|
264 | 311 | |
---|
265 | | - asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); |
---|
| 312 | + printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base); |
---|
| 313 | + |
---|
| 314 | + asm volatile ("mov %0, %%gs" : : "rm" (index)); |
---|
| 315 | + wrgsbase(base); |
---|
| 316 | + |
---|
| 317 | + remote_base = 0; |
---|
| 318 | + ftx = 1; |
---|
| 319 | + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); |
---|
| 320 | + while (ftx != 0) |
---|
| 321 | + syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0); |
---|
| 322 | + |
---|
| 323 | + asm volatile ("mov %%gs, %0" : "=rm" (newindex)); |
---|
| 324 | + newbase = rdgsbase(); |
---|
| 325 | + |
---|
| 326 | + if (newindex == index && newbase == base) { |
---|
| 327 | + printf("[OK]\tIndex and base were preserved\n"); |
---|
| 328 | + } else { |
---|
| 329 | + printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n", |
---|
| 330 | + newindex, newbase); |
---|
| 331 | + nerrs++; |
---|
| 332 | + } |
---|
266 | 333 | } |
---|
267 | 334 | |
---|
268 | 335 | static void *threadproc(void *ctx) |
---|
.. | .. |
---|
273 | 340 | if (ftx == 3) |
---|
274 | 341 | return NULL; |
---|
275 | 342 | |
---|
276 | | - if (ftx == 1) |
---|
| 343 | + if (ftx == 1) { |
---|
277 | 344 | do_remote_base(); |
---|
278 | | - else if (ftx == 2) |
---|
279 | | - do_unexpected_base(); |
---|
280 | | - else |
---|
| 345 | + } else if (ftx == 2) { |
---|
| 346 | + /* |
---|
| 347 | + * On AMD chips, this causes GSBASE != 0, GS == 0, and |
---|
| 348 | + * thread.gsbase == 0. |
---|
| 349 | + */ |
---|
| 350 | + |
---|
| 351 | + load_gs(); |
---|
| 352 | + asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0)); |
---|
| 353 | + } else { |
---|
281 | 354 | errx(1, "helper thread got bad command"); |
---|
| 355 | + } |
---|
282 | 356 | |
---|
283 | 357 | ftx = 0; |
---|
284 | 358 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); |
---|
.. | .. |
---|
367 | 441 | } |
---|
368 | 442 | } |
---|
369 | 443 | |
---|
| 444 | +#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) |
---|
| 445 | + |
---|
| 446 | +static void test_ptrace_write_gs_read_base(void) |
---|
| 447 | +{ |
---|
| 448 | + int status; |
---|
| 449 | + pid_t child = fork(); |
---|
| 450 | + |
---|
| 451 | + if (child < 0) |
---|
| 452 | + err(1, "fork"); |
---|
| 453 | + |
---|
| 454 | + if (child == 0) { |
---|
| 455 | + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); |
---|
| 456 | + |
---|
| 457 | + printf("[RUN]\tARCH_SET_GS to 1\n"); |
---|
| 458 | + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) |
---|
| 459 | + err(1, "ARCH_SET_GS"); |
---|
| 460 | + |
---|
| 461 | + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) |
---|
| 462 | + err(1, "PTRACE_TRACEME"); |
---|
| 463 | + |
---|
| 464 | + raise(SIGTRAP); |
---|
| 465 | + _exit(0); |
---|
| 466 | + } |
---|
| 467 | + |
---|
| 468 | + wait(&status); |
---|
| 469 | + |
---|
| 470 | + if (WSTOPSIG(status) == SIGTRAP) { |
---|
| 471 | + unsigned long base; |
---|
| 472 | + unsigned long gs_offset = USER_REGS_OFFSET(gs); |
---|
| 473 | + unsigned long base_offset = USER_REGS_OFFSET(gs_base); |
---|
| 474 | + |
---|
| 475 | + /* Read the initial base. It should be 1. */ |
---|
| 476 | + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); |
---|
| 477 | + if (base == 1) { |
---|
| 478 | + printf("[OK]\tGSBASE started at 1\n"); |
---|
| 479 | + } else { |
---|
| 480 | + nerrs++; |
---|
| 481 | + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); |
---|
| 482 | + } |
---|
| 483 | + |
---|
| 484 | + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); |
---|
| 485 | + |
---|
| 486 | + /* Poke an LDT selector into GS. */ |
---|
| 487 | + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) |
---|
| 488 | + err(1, "PTRACE_POKEUSER"); |
---|
| 489 | + |
---|
| 490 | + /* And read the base. */ |
---|
| 491 | + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); |
---|
| 492 | + |
---|
| 493 | + if (base == 0 || base == 1) { |
---|
| 494 | + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); |
---|
| 495 | + } else { |
---|
| 496 | + nerrs++; |
---|
| 497 | + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); |
---|
| 498 | + } |
---|
| 499 | + } |
---|
| 500 | + |
---|
| 501 | + ptrace(PTRACE_CONT, child, NULL, NULL); |
---|
| 502 | + |
---|
| 503 | + wait(&status); |
---|
| 504 | + if (!WIFEXITED(status)) |
---|
| 505 | + printf("[WARN]\tChild didn't exit cleanly.\n"); |
---|
| 506 | +} |
---|
| 507 | + |
---|
| 508 | +static void test_ptrace_write_gsbase(void) |
---|
| 509 | +{ |
---|
| 510 | + int status; |
---|
| 511 | + pid_t child = fork(); |
---|
| 512 | + |
---|
| 513 | + if (child < 0) |
---|
| 514 | + err(1, "fork"); |
---|
| 515 | + |
---|
| 516 | + if (child == 0) { |
---|
| 517 | + printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n"); |
---|
| 518 | + |
---|
| 519 | + *shared_scratch = load_gs(); |
---|
| 520 | + |
---|
| 521 | + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) |
---|
| 522 | + err(1, "PTRACE_TRACEME"); |
---|
| 523 | + |
---|
| 524 | + raise(SIGTRAP); |
---|
| 525 | + _exit(0); |
---|
| 526 | + } |
---|
| 527 | + |
---|
| 528 | + wait(&status); |
---|
| 529 | + |
---|
| 530 | + if (WSTOPSIG(status) == SIGTRAP) { |
---|
| 531 | + unsigned long gs, base; |
---|
| 532 | + unsigned long gs_offset = USER_REGS_OFFSET(gs); |
---|
| 533 | + unsigned long base_offset = USER_REGS_OFFSET(gs_base); |
---|
| 534 | + |
---|
| 535 | + gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL); |
---|
| 536 | + |
---|
| 537 | + if (gs != *shared_scratch) { |
---|
| 538 | + nerrs++; |
---|
| 539 | + printf("[FAIL]\tGS is not prepared with nonzero\n"); |
---|
| 540 | + goto END; |
---|
| 541 | + } |
---|
| 542 | + |
---|
| 543 | + if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0) |
---|
| 544 | + err(1, "PTRACE_POKEUSER"); |
---|
| 545 | + |
---|
| 546 | + gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL); |
---|
| 547 | + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); |
---|
| 548 | + |
---|
| 549 | + /* |
---|
| 550 | + * In a non-FSGSBASE system, the nonzero selector will load |
---|
| 551 | + * GSBASE (again). But what is tested here is whether the |
---|
| 552 | + * selector value is changed or not by the GSBASE write in |
---|
| 553 | + * a ptracer. |
---|
| 554 | + */ |
---|
| 555 | + if (gs != *shared_scratch) { |
---|
| 556 | + nerrs++; |
---|
| 557 | + printf("[FAIL]\tGS changed to %lx\n", gs); |
---|
| 558 | + |
---|
| 559 | + /* |
---|
| 560 | + * On older kernels, poking a nonzero value into the |
---|
| 561 | + * base would zero the selector. On newer kernels, |
---|
| 562 | + * this behavior has changed -- poking the base |
---|
| 563 | + * changes only the base and, if FSGSBASE is not |
---|
| 564 | + * available, this may have no effect once the tracee |
---|
| 565 | + * is resumed. |
---|
| 566 | + */ |
---|
| 567 | + if (gs == 0) |
---|
| 568 | + printf("\tNote: this is expected behavior on older kernels.\n"); |
---|
| 569 | + } else if (have_fsgsbase && (base != 0xFF)) { |
---|
| 570 | + nerrs++; |
---|
| 571 | + printf("[FAIL]\tGSBASE changed to %lx\n", base); |
---|
| 572 | + } else { |
---|
| 573 | + printf("[OK]\tGS remained 0x%hx", *shared_scratch); |
---|
| 574 | + if (have_fsgsbase) |
---|
| 575 | + printf(" and GSBASE changed to 0xFF"); |
---|
| 576 | + printf("\n"); |
---|
| 577 | + } |
---|
| 578 | + } |
---|
| 579 | + |
---|
| 580 | +END: |
---|
| 581 | + ptrace(PTRACE_CONT, child, NULL, NULL); |
---|
| 582 | + wait(&status); |
---|
| 583 | + if (!WIFEXITED(status)) |
---|
| 584 | + printf("[WARN]\tChild didn't exit cleanly.\n"); |
---|
| 585 | +} |
---|
| 586 | + |
---|
370 | 587 | int main() |
---|
371 | 588 | { |
---|
372 | 589 | pthread_t thread; |
---|
| 590 | + |
---|
| 591 | + shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, |
---|
| 592 | + MAP_ANONYMOUS | MAP_SHARED, -1, 0); |
---|
| 593 | + |
---|
| 594 | + /* Do these tests before we have an LDT. */ |
---|
| 595 | + test_ptrace_write_gs_read_base(); |
---|
| 596 | + |
---|
| 597 | + /* Probe FSGSBASE */ |
---|
| 598 | + sethandler(SIGILL, sigill, 0); |
---|
| 599 | + if (sigsetjmp(jmpbuf, 1) == 0) { |
---|
| 600 | + rdfsbase(); |
---|
| 601 | + have_fsgsbase = true; |
---|
| 602 | + printf("\tFSGSBASE instructions are enabled\n"); |
---|
| 603 | + } else { |
---|
| 604 | + printf("\tFSGSBASE instructions are disabled\n"); |
---|
| 605 | + } |
---|
| 606 | + clearhandler(SIGILL); |
---|
373 | 607 | |
---|
374 | 608 | sethandler(SIGSEGV, sigsegv, 0); |
---|
375 | 609 | |
---|
.. | .. |
---|
417 | 651 | |
---|
418 | 652 | test_unexpected_base(); |
---|
419 | 653 | |
---|
| 654 | + if (have_fsgsbase) { |
---|
| 655 | + unsigned short ss; |
---|
| 656 | + |
---|
| 657 | + asm volatile ("mov %%ss, %0" : "=rm" (ss)); |
---|
| 658 | + |
---|
| 659 | + test_wrbase(0, 0); |
---|
| 660 | + test_wrbase(0, 1); |
---|
| 661 | + test_wrbase(0, 0x200000000); |
---|
| 662 | + test_wrbase(0, 0xffffffffffffffff); |
---|
| 663 | + test_wrbase(ss, 0); |
---|
| 664 | + test_wrbase(ss, 1); |
---|
| 665 | + test_wrbase(ss, 0x200000000); |
---|
| 666 | + test_wrbase(ss, 0xffffffffffffffff); |
---|
| 667 | + } |
---|
| 668 | + |
---|
420 | 669 | ftx = 3; /* Kill the thread. */ |
---|
421 | 670 | syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); |
---|
422 | 671 | |
---|
423 | 672 | if (pthread_join(thread, NULL) != 0) |
---|
424 | 673 | err(1, "pthread_join"); |
---|
425 | 674 | |
---|
| 675 | + test_ptrace_write_gsbase(); |
---|
| 676 | + |
---|
426 | 677 | return nerrs == 0 ? 0 : 1; |
---|
427 | 678 | } |
---|