| .. | .. |
|---|
| 5 | 5 | */ |
|---|
| 6 | 6 | #include <linux/sched/task_stack.h> |
|---|
| 7 | 7 | #include <linux/stacktrace.h> |
|---|
| 8 | +#include <linux/security.h> |
|---|
| 8 | 9 | #include <linux/kallsyms.h> |
|---|
| 9 | 10 | #include <linux/seq_file.h> |
|---|
| 10 | 11 | #include <linux/spinlock.h> |
|---|
| .. | .. |
|---|
| 18 | 19 | |
|---|
| 19 | 20 | #include "trace.h" |
|---|
| 20 | 21 | |
|---|
| 21 | | -static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = |
|---|
| 22 | | - { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; |
|---|
| 23 | | -unsigned stack_trace_index[STACK_TRACE_ENTRIES]; |
|---|
| 22 | +#define STACK_TRACE_ENTRIES 500 |
|---|
| 24 | 23 | |
|---|
| 25 | | -/* |
|---|
| 26 | | - * Reserve one entry for the passed in ip. This will allow |
|---|
| 27 | | - * us to remove most or all of the stack size overhead |
|---|
| 28 | | - * added by the stack tracer itself. |
|---|
| 29 | | - */ |
|---|
| 30 | | -struct stack_trace stack_trace_max = { |
|---|
| 31 | | - .max_entries = STACK_TRACE_ENTRIES - 1, |
|---|
| 32 | | - .entries = &stack_dump_trace[0], |
|---|
| 33 | | -}; |
|---|
| 24 | +static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES]; |
|---|
| 25 | +static unsigned stack_trace_index[STACK_TRACE_ENTRIES]; |
|---|
| 34 | 26 | |
|---|
| 35 | | -unsigned long stack_trace_max_size; |
|---|
| 36 | | -arch_spinlock_t stack_trace_max_lock = |
|---|
| 27 | +static unsigned int stack_trace_nr_entries; |
|---|
| 28 | +static unsigned long stack_trace_max_size; |
|---|
| 29 | +static arch_spinlock_t stack_trace_max_lock = |
|---|
| 37 | 30 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
|---|
| 38 | 31 | |
|---|
| 39 | 32 | DEFINE_PER_CPU(int, disable_stack_tracer); |
|---|
| 40 | 33 | static DEFINE_MUTEX(stack_sysctl_mutex); |
|---|
| 41 | 34 | |
|---|
| 42 | 35 | int stack_tracer_enabled; |
|---|
| 43 | | -static int last_stack_tracer_enabled; |
|---|
| 44 | 36 | |
|---|
| 45 | | -void stack_trace_print(void) |
|---|
| 37 | +static void print_max_stack(void) |
|---|
| 46 | 38 | { |
|---|
| 47 | 39 | long i; |
|---|
| 48 | 40 | int size; |
|---|
| 49 | 41 | |
|---|
| 50 | 42 | pr_emerg(" Depth Size Location (%d entries)\n" |
|---|
| 51 | 43 | " ----- ---- --------\n", |
|---|
| 52 | | - stack_trace_max.nr_entries); |
|---|
| 44 | + stack_trace_nr_entries); |
|---|
| 53 | 45 | |
|---|
| 54 | | - for (i = 0; i < stack_trace_max.nr_entries; i++) { |
|---|
| 55 | | - if (stack_dump_trace[i] == ULONG_MAX) |
|---|
| 56 | | - break; |
|---|
| 57 | | - if (i+1 == stack_trace_max.nr_entries || |
|---|
| 58 | | - stack_dump_trace[i+1] == ULONG_MAX) |
|---|
| 46 | + for (i = 0; i < stack_trace_nr_entries; i++) { |
|---|
| 47 | + if (i + 1 == stack_trace_nr_entries) |
|---|
| 59 | 48 | size = stack_trace_index[i]; |
|---|
| 60 | 49 | else |
|---|
| 61 | 50 | size = stack_trace_index[i] - stack_trace_index[i+1]; |
|---|
| .. | .. |
|---|
| 66 | 55 | } |
|---|
| 67 | 56 | |
|---|
| 68 | 57 | /* |
|---|
| 69 | | - * When arch-specific code overrides this function, the following |
|---|
| 70 | | - * data should be filled up, assuming stack_trace_max_lock is held to |
|---|
| 71 | | - * prevent concurrent updates. |
|---|
| 72 | | - * stack_trace_index[] |
|---|
| 73 | | - * stack_trace_max |
|---|
| 74 | | - * stack_trace_max_size |
|---|
| 58 | + * The stack tracer looks for a maximum stack at each call from a function. It |
|---|
| 59 | + * registers a callback from ftrace, and in that callback it examines the stack |
|---|
| 60 | + * size. It determines the stack size from the variable passed in, which is the |
|---|
| 61 | + * address of a local variable in the stack_trace_call() callback function. |
|---|
| 62 | + * The stack size is calculated by the address of the local variable to the top |
|---|
| 63 | + * of the current stack. If that size is smaller than the currently saved max |
|---|
| 64 | + * stack size, nothing more is done. |
|---|
| 65 | + * |
|---|
| 66 | + * If the size of the stack is greater than the maximum recorded size, then the |
|---|
| 67 | + * following algorithm takes place. |
|---|
| 68 | + * |
|---|
| 69 | + * For architectures (like x86) that store the function's return address before |
|---|
| 70 | + * saving the function's local variables, the stack will look something like |
|---|
| 71 | + * this: |
|---|
| 72 | + * |
|---|
| 73 | + * [ top of stack ] |
|---|
| 74 | + * 0: sys call entry frame |
|---|
| 75 | + * 10: return addr to entry code |
|---|
| 76 | + * 11: start of sys_foo frame |
|---|
| 77 | + * 20: return addr to sys_foo |
|---|
| 78 | + * 21: start of kernel_func_bar frame |
|---|
| 79 | + * 30: return addr to kernel_func_bar |
|---|
| 80 | + * 31: [ do trace stack here ] |
|---|
| 81 | + * |
|---|
| 82 | + * The save_stack_trace() is called returning all the functions it finds in the |
|---|
| 83 | + * current stack. Which would be (from the bottom of the stack to the top): |
|---|
| 84 | + * |
|---|
| 85 | + * return addr to kernel_func_bar |
|---|
| 86 | + * return addr to sys_foo |
|---|
| 87 | + * return addr to entry code |
|---|
| 88 | + * |
|---|
| 89 | + * Now to figure out how much each of these functions' local variable size is, |
|---|
| 90 | + * a search of the stack is made to find these values. When a match is made, it |
|---|
| 91 | + * is added to the stack_dump_trace[] array. The offset into the stack is saved |
|---|
| 92 | + * in the stack_trace_index[] array. The above example would show: |
|---|
| 93 | + * |
|---|
| 94 | + * stack_dump_trace[] | stack_trace_index[] |
|---|
| 95 | + * ------------------ + ------------------- |
|---|
| 96 | + * return addr to kernel_func_bar | 30 |
|---|
| 97 | + * return addr to sys_foo | 20 |
|---|
| 98 | + * return addr to entry | 10 |
|---|
| 99 | + * |
|---|
| 100 | + * The print_max_stack() function above, uses these values to print the size of |
|---|
| 101 | + * each function's portion of the stack. |
|---|
| 102 | + * |
|---|
| 103 | + * for (i = 0; i < nr_entries; i++) { |
|---|
| 104 | + * size = i == nr_entries - 1 ? stack_trace_index[i] : |
|---|
| 105 | + * stack_trace_index[i] - stack_trace_index[i+1] |
|---|
| 106 | + * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]); |
|---|
| 107 | + * } |
|---|
| 108 | + * |
|---|
| 109 | + * The above shows |
|---|
| 110 | + * |
|---|
| 111 | + * depth size location |
|---|
| 112 | + * ----- ---- -------- |
|---|
| 113 | + * 0 30 10 kernel_func_bar |
|---|
| 114 | + * 1 20 10 sys_foo |
|---|
| 115 | + * 2 10 10 entry code |
|---|
| 116 | + * |
|---|
| 117 | + * Now for architectures that might save the return address after the functions |
|---|
| 118 | + * local variables (saving the link register before calling nested functions), |
|---|
| 119 | + * this will cause the stack to look a little different: |
|---|
| 120 | + * |
|---|
| 121 | + * [ top of stack ] |
|---|
| 122 | + * 0: sys call entry frame |
|---|
| 123 | + * 10: start of sys_foo_frame |
|---|
| 124 | + * 19: return addr to entry code << lr saved before calling kernel_func_bar |
|---|
| 125 | + * 20: start of kernel_func_bar frame |
|---|
| 126 | + * 29: return addr to sys_foo_frame << lr saved before calling next function |
|---|
| 127 | + * 30: [ do trace stack here ] |
|---|
| 128 | + * |
|---|
| 129 | + * Although the functions returned by save_stack_trace() may be the same, the |
|---|
| 130 | + * placement in the stack will be different. Using the same algorithm as above |
|---|
| 131 | + * would yield: |
|---|
| 132 | + * |
|---|
| 133 | + * stack_dump_trace[] | stack_trace_index[] |
|---|
| 134 | + * ------------------ + ------------------- |
|---|
| 135 | + * return addr to kernel_func_bar | 30 |
|---|
| 136 | + * return addr to sys_foo | 29 |
|---|
| 137 | + * return addr to entry | 19 |
|---|
| 138 | + * |
|---|
| 139 | + * Where the mapping is off by one: |
|---|
| 140 | + * |
|---|
| 141 | + * kernel_func_bar stack frame size is 29 - 19 not 30 - 29! |
|---|
| 142 | + * |
|---|
| 143 | + * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the |
|---|
| 144 | + * values in stack_trace_index[] are shifted by one to and the number of |
|---|
| 145 | + * stack trace entries is decremented by one. |
|---|
| 146 | + * |
|---|
| 147 | + * stack_dump_trace[] | stack_trace_index[] |
|---|
| 148 | + * ------------------ + ------------------- |
|---|
| 149 | + * return addr to kernel_func_bar | 29 |
|---|
| 150 | + * return addr to sys_foo | 19 |
|---|
| 151 | + * |
|---|
| 152 | + * Although the entry function is not displayed, the first function (sys_foo) |
|---|
| 153 | + * will still include the stack size of it. |
|---|
| 75 | 154 | */ |
|---|
| 76 | | -void __weak |
|---|
| 77 | | -check_stack(unsigned long ip, unsigned long *stack) |
|---|
| 155 | +static void check_stack(unsigned long ip, unsigned long *stack) |
|---|
| 78 | 156 | { |
|---|
| 79 | 157 | unsigned long this_size, flags; unsigned long *p, *top, *start; |
|---|
| 80 | 158 | static int tracer_frame; |
|---|
| .. | .. |
|---|
| 110 | 188 | |
|---|
| 111 | 189 | stack_trace_max_size = this_size; |
|---|
| 112 | 190 | |
|---|
| 113 | | - stack_trace_max.nr_entries = 0; |
|---|
| 114 | | - stack_trace_max.skip = 3; |
|---|
| 115 | | - |
|---|
| 116 | | - save_stack_trace(&stack_trace_max); |
|---|
| 191 | + stack_trace_nr_entries = stack_trace_save(stack_dump_trace, |
|---|
| 192 | + ARRAY_SIZE(stack_dump_trace) - 1, |
|---|
| 193 | + 0); |
|---|
| 117 | 194 | |
|---|
| 118 | 195 | /* Skip over the overhead of the stack tracer itself */ |
|---|
| 119 | | - for (i = 0; i < stack_trace_max.nr_entries; i++) { |
|---|
| 196 | + for (i = 0; i < stack_trace_nr_entries; i++) { |
|---|
| 120 | 197 | if (stack_dump_trace[i] == ip) |
|---|
| 121 | 198 | break; |
|---|
| 122 | 199 | } |
|---|
| .. | .. |
|---|
| 125 | 202 | * Some archs may not have the passed in ip in the dump. |
|---|
| 126 | 203 | * If that happens, we need to show everything. |
|---|
| 127 | 204 | */ |
|---|
| 128 | | - if (i == stack_trace_max.nr_entries) |
|---|
| 205 | + if (i == stack_trace_nr_entries) |
|---|
| 129 | 206 | i = 0; |
|---|
| 130 | 207 | |
|---|
| 131 | 208 | /* |
|---|
| .. | .. |
|---|
| 143 | 220 | * loop will only happen once. This code only takes place |
|---|
| 144 | 221 | * on a new max, so it is far from a fast path. |
|---|
| 145 | 222 | */ |
|---|
| 146 | | - while (i < stack_trace_max.nr_entries) { |
|---|
| 223 | + while (i < stack_trace_nr_entries) { |
|---|
| 147 | 224 | int found = 0; |
|---|
| 148 | 225 | |
|---|
| 149 | 226 | stack_trace_index[x] = this_size; |
|---|
| 150 | 227 | p = start; |
|---|
| 151 | 228 | |
|---|
| 152 | | - for (; p < top && i < stack_trace_max.nr_entries; p++) { |
|---|
| 153 | | - if (stack_dump_trace[i] == ULONG_MAX) |
|---|
| 154 | | - break; |
|---|
| 229 | + for (; p < top && i < stack_trace_nr_entries; p++) { |
|---|
| 155 | 230 | /* |
|---|
| 156 | 231 | * The READ_ONCE_NOCHECK is used to let KASAN know that |
|---|
| 157 | 232 | * this is not a stack-out-of-bounds error. |
|---|
| .. | .. |
|---|
| 182 | 257 | i++; |
|---|
| 183 | 258 | } |
|---|
| 184 | 259 | |
|---|
| 185 | | - stack_trace_max.nr_entries = x; |
|---|
| 186 | | - for (; x < i; x++) |
|---|
| 187 | | - stack_dump_trace[x] = ULONG_MAX; |
|---|
| 260 | +#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER |
|---|
| 261 | + /* |
|---|
| 262 | + * Some archs will store the link register before calling |
|---|
| 263 | + * nested functions. This means the saved return address |
|---|
| 264 | + * comes after the local storage, and we need to shift |
|---|
| 265 | + * for that. |
|---|
| 266 | + */ |
|---|
| 267 | + if (x > 1) { |
|---|
| 268 | + memmove(&stack_trace_index[0], &stack_trace_index[1], |
|---|
| 269 | + sizeof(stack_trace_index[0]) * (x - 1)); |
|---|
| 270 | + x--; |
|---|
| 271 | + } |
|---|
| 272 | +#endif |
|---|
| 273 | + |
|---|
| 274 | + stack_trace_nr_entries = x; |
|---|
| 188 | 275 | |
|---|
| 189 | 276 | if (task_stack_end_corrupted(current)) { |
|---|
| 190 | | - stack_trace_print(); |
|---|
| 277 | + print_max_stack(); |
|---|
| 191 | 278 | BUG(); |
|---|
| 192 | 279 | } |
|---|
| 193 | 280 | |
|---|
| .. | .. |
|---|
| 291 | 378 | { |
|---|
| 292 | 379 | long n = *pos - 1; |
|---|
| 293 | 380 | |
|---|
| 294 | | - if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX) |
|---|
| 381 | + if (n >= stack_trace_nr_entries) |
|---|
| 295 | 382 | return NULL; |
|---|
| 296 | 383 | |
|---|
| 297 | 384 | m->private = (void *)n; |
|---|
| .. | .. |
|---|
| 355 | 442 | seq_printf(m, " Depth Size Location" |
|---|
| 356 | 443 | " (%d entries)\n" |
|---|
| 357 | 444 | " ----- ---- --------\n", |
|---|
| 358 | | - stack_trace_max.nr_entries); |
|---|
| 445 | + stack_trace_nr_entries); |
|---|
| 359 | 446 | |
|---|
| 360 | 447 | if (!stack_tracer_enabled && !stack_trace_max_size) |
|---|
| 361 | 448 | print_disabled(m); |
|---|
| .. | .. |
|---|
| 365 | 452 | |
|---|
| 366 | 453 | i = *(long *)v; |
|---|
| 367 | 454 | |
|---|
| 368 | | - if (i >= stack_trace_max.nr_entries || |
|---|
| 369 | | - stack_dump_trace[i] == ULONG_MAX) |
|---|
| 455 | + if (i >= stack_trace_nr_entries) |
|---|
| 370 | 456 | return 0; |
|---|
| 371 | 457 | |
|---|
| 372 | | - if (i+1 == stack_trace_max.nr_entries || |
|---|
| 373 | | - stack_dump_trace[i+1] == ULONG_MAX) |
|---|
| 458 | + if (i + 1 == stack_trace_nr_entries) |
|---|
| 374 | 459 | size = stack_trace_index[i]; |
|---|
| 375 | 460 | else |
|---|
| 376 | 461 | size = stack_trace_index[i] - stack_trace_index[i+1]; |
|---|
| .. | .. |
|---|
| 391 | 476 | |
|---|
| 392 | 477 | static int stack_trace_open(struct inode *inode, struct file *file) |
|---|
| 393 | 478 | { |
|---|
| 479 | + int ret; |
|---|
| 480 | + |
|---|
| 481 | + ret = security_locked_down(LOCKDOWN_TRACEFS); |
|---|
| 482 | + if (ret) |
|---|
| 483 | + return ret; |
|---|
| 484 | + |
|---|
| 394 | 485 | return seq_open(file, &stack_trace_seq_ops); |
|---|
| 395 | 486 | } |
|---|
| 396 | 487 | |
|---|
| .. | .. |
|---|
| 408 | 499 | { |
|---|
| 409 | 500 | struct ftrace_ops *ops = inode->i_private; |
|---|
| 410 | 501 | |
|---|
| 502 | + /* Checks for tracefs lockdown */ |
|---|
| 411 | 503 | return ftrace_regex_open(ops, FTRACE_ITER_FILTER, |
|---|
| 412 | 504 | inode, file); |
|---|
| 413 | 505 | } |
|---|
| .. | .. |
|---|
| 423 | 515 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
|---|
| 424 | 516 | |
|---|
| 425 | 517 | int |
|---|
| 426 | | -stack_trace_sysctl(struct ctl_table *table, int write, |
|---|
| 427 | | - void __user *buffer, size_t *lenp, |
|---|
| 428 | | - loff_t *ppos) |
|---|
| 518 | +stack_trace_sysctl(struct ctl_table *table, int write, void *buffer, |
|---|
| 519 | + size_t *lenp, loff_t *ppos) |
|---|
| 429 | 520 | { |
|---|
| 521 | + int was_enabled; |
|---|
| 430 | 522 | int ret; |
|---|
| 431 | 523 | |
|---|
| 432 | 524 | mutex_lock(&stack_sysctl_mutex); |
|---|
| 525 | + was_enabled = !!stack_tracer_enabled; |
|---|
| 433 | 526 | |
|---|
| 434 | 527 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
|---|
| 435 | 528 | |
|---|
| 436 | | - if (ret || !write || |
|---|
| 437 | | - (last_stack_tracer_enabled == !!stack_tracer_enabled)) |
|---|
| 529 | + if (ret || !write || (was_enabled == !!stack_tracer_enabled)) |
|---|
| 438 | 530 | goto out; |
|---|
| 439 | | - |
|---|
| 440 | | - last_stack_tracer_enabled = !!stack_tracer_enabled; |
|---|
| 441 | 531 | |
|---|
| 442 | 532 | if (stack_tracer_enabled) |
|---|
| 443 | 533 | register_ftrace_function(&trace_ops); |
|---|
| 444 | 534 | else |
|---|
| 445 | 535 | unregister_ftrace_function(&trace_ops); |
|---|
| 446 | | - |
|---|
| 447 | 536 | out: |
|---|
| 448 | 537 | mutex_unlock(&stack_sysctl_mutex); |
|---|
| 449 | 538 | return ret; |
|---|
| .. | .. |
|---|
| 453 | 542 | |
|---|
| 454 | 543 | static __init int enable_stacktrace(char *str) |
|---|
| 455 | 544 | { |
|---|
| 456 | | - if (strncmp(str, "_filter=", 8) == 0) |
|---|
| 457 | | - strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE); |
|---|
| 545 | + int len; |
|---|
| 546 | + |
|---|
| 547 | + if ((len = str_has_prefix(str, "_filter="))) |
|---|
| 548 | + strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE); |
|---|
| 458 | 549 | |
|---|
| 459 | 550 | stack_tracer_enabled = 1; |
|---|
| 460 | | - last_stack_tracer_enabled = 1; |
|---|
| 461 | 551 | return 1; |
|---|
| 462 | 552 | } |
|---|
| 463 | 553 | __setup("stacktrace", enable_stacktrace); |
|---|
| 464 | 554 | |
|---|
| 465 | 555 | static __init int stack_trace_init(void) |
|---|
| 466 | 556 | { |
|---|
| 467 | | - struct dentry *d_tracer; |
|---|
| 557 | + int ret; |
|---|
| 468 | 558 | |
|---|
| 469 | | - d_tracer = tracing_init_dentry(); |
|---|
| 470 | | - if (IS_ERR(d_tracer)) |
|---|
| 559 | + ret = tracing_init_dentry(); |
|---|
| 560 | + if (ret) |
|---|
| 471 | 561 | return 0; |
|---|
| 472 | 562 | |
|---|
| 473 | | - trace_create_file("stack_max_size", 0644, d_tracer, |
|---|
| 563 | + trace_create_file("stack_max_size", 0644, NULL, |
|---|
| 474 | 564 | &stack_trace_max_size, &stack_max_size_fops); |
|---|
| 475 | 565 | |
|---|
| 476 | | - trace_create_file("stack_trace", 0444, d_tracer, |
|---|
| 566 | + trace_create_file("stack_trace", 0444, NULL, |
|---|
| 477 | 567 | NULL, &stack_trace_fops); |
|---|
| 478 | 568 | |
|---|
| 479 | 569 | #ifdef CONFIG_DYNAMIC_FTRACE |
|---|
| 480 | | - trace_create_file("stack_trace_filter", 0644, d_tracer, |
|---|
| 570 | + trace_create_file("stack_trace_filter", 0644, NULL, |
|---|
| 481 | 571 | &trace_ops, &stack_trace_filter_fops); |
|---|
| 482 | 572 | #endif |
|---|
| 483 | 573 | |
|---|