.. | .. |
---|
5 | 5 | */ |
---|
6 | 6 | #include <linux/sched/task_stack.h> |
---|
7 | 7 | #include <linux/stacktrace.h> |
---|
| 8 | +#include <linux/security.h> |
---|
8 | 9 | #include <linux/kallsyms.h> |
---|
9 | 10 | #include <linux/seq_file.h> |
---|
10 | 11 | #include <linux/spinlock.h> |
---|
.. | .. |
---|
18 | 19 | |
---|
19 | 20 | #include "trace.h" |
---|
20 | 21 | |
---|
21 | | -static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = |
---|
22 | | - { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; |
---|
23 | | -unsigned stack_trace_index[STACK_TRACE_ENTRIES]; |
---|
| 22 | +#define STACK_TRACE_ENTRIES 500 |
---|
24 | 23 | |
---|
25 | | -/* |
---|
26 | | - * Reserve one entry for the passed in ip. This will allow |
---|
27 | | - * us to remove most or all of the stack size overhead |
---|
28 | | - * added by the stack tracer itself. |
---|
29 | | - */ |
---|
30 | | -struct stack_trace stack_trace_max = { |
---|
31 | | - .max_entries = STACK_TRACE_ENTRIES - 1, |
---|
32 | | - .entries = &stack_dump_trace[0], |
---|
33 | | -}; |
---|
| 24 | +static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES]; |
---|
| 25 | +static unsigned stack_trace_index[STACK_TRACE_ENTRIES]; |
---|
34 | 26 | |
---|
35 | | -unsigned long stack_trace_max_size; |
---|
36 | | -arch_spinlock_t stack_trace_max_lock = |
---|
| 27 | +static unsigned int stack_trace_nr_entries; |
---|
| 28 | +static unsigned long stack_trace_max_size; |
---|
| 29 | +static arch_spinlock_t stack_trace_max_lock = |
---|
37 | 30 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
---|
38 | 31 | |
---|
39 | 32 | DEFINE_PER_CPU(int, disable_stack_tracer); |
---|
40 | 33 | static DEFINE_MUTEX(stack_sysctl_mutex); |
---|
41 | 34 | |
---|
42 | 35 | int stack_tracer_enabled; |
---|
43 | | -static int last_stack_tracer_enabled; |
---|
44 | 36 | |
---|
45 | | -void stack_trace_print(void) |
---|
| 37 | +static void print_max_stack(void) |
---|
46 | 38 | { |
---|
47 | 39 | long i; |
---|
48 | 40 | int size; |
---|
49 | 41 | |
---|
50 | 42 | pr_emerg(" Depth Size Location (%d entries)\n" |
---|
51 | 43 | " ----- ---- --------\n", |
---|
52 | | - stack_trace_max.nr_entries); |
---|
| 44 | + stack_trace_nr_entries); |
---|
53 | 45 | |
---|
54 | | - for (i = 0; i < stack_trace_max.nr_entries; i++) { |
---|
55 | | - if (stack_dump_trace[i] == ULONG_MAX) |
---|
56 | | - break; |
---|
57 | | - if (i+1 == stack_trace_max.nr_entries || |
---|
58 | | - stack_dump_trace[i+1] == ULONG_MAX) |
---|
| 46 | + for (i = 0; i < stack_trace_nr_entries; i++) { |
---|
| 47 | + if (i + 1 == stack_trace_nr_entries) |
---|
59 | 48 | size = stack_trace_index[i]; |
---|
60 | 49 | else |
---|
61 | 50 | size = stack_trace_index[i] - stack_trace_index[i+1]; |
---|
.. | .. |
---|
66 | 55 | } |
---|
67 | 56 | |
---|
68 | 57 | /* |
---|
69 | | - * When arch-specific code overrides this function, the following |
---|
70 | | - * data should be filled up, assuming stack_trace_max_lock is held to |
---|
71 | | - * prevent concurrent updates. |
---|
72 | | - * stack_trace_index[] |
---|
73 | | - * stack_trace_max |
---|
74 | | - * stack_trace_max_size |
---|
| 58 | + * The stack tracer looks for a maximum stack at each call from a function. It |
---|
| 59 | + * registers a callback from ftrace, and in that callback it examines the stack |
---|
| 60 | + * size. It determines the stack size from the variable passed in, which is the |
---|
| 61 | + * address of a local variable in the stack_trace_call() callback function. |
---|
| 62 | + * The stack size is calculated by the address of the local variable to the top |
---|
| 63 | + * of the current stack. If that size is smaller than the currently saved max |
---|
| 64 | + * stack size, nothing more is done. |
---|
| 65 | + * |
---|
| 66 | + * If the size of the stack is greater than the maximum recorded size, then the |
---|
| 67 | + * following algorithm takes place. |
---|
| 68 | + * |
---|
| 69 | + * For architectures (like x86) that store the function's return address before |
---|
| 70 | + * saving the function's local variables, the stack will look something like |
---|
| 71 | + * this: |
---|
| 72 | + * |
---|
| 73 | + * [ top of stack ] |
---|
| 74 | + * 0: sys call entry frame |
---|
| 75 | + * 10: return addr to entry code |
---|
| 76 | + * 11: start of sys_foo frame |
---|
| 77 | + * 20: return addr to sys_foo |
---|
| 78 | + * 21: start of kernel_func_bar frame |
---|
| 79 | + * 30: return addr to kernel_func_bar |
---|
| 80 | + * 31: [ do trace stack here ] |
---|
| 81 | + * |
---|
| 82 | + * The save_stack_trace() is called returning all the functions it finds in the |
---|
| 83 | + * current stack. Which would be (from the bottom of the stack to the top): |
---|
| 84 | + * |
---|
| 85 | + * return addr to kernel_func_bar |
---|
| 86 | + * return addr to sys_foo |
---|
| 87 | + * return addr to entry code |
---|
| 88 | + * |
---|
| 89 | + * Now to figure out how much each of these functions' local variable size is, |
---|
| 90 | + * a search of the stack is made to find these values. When a match is made, it |
---|
| 91 | + * is added to the stack_dump_trace[] array. The offset into the stack is saved |
---|
| 92 | + * in the stack_trace_index[] array. The above example would show: |
---|
| 93 | + * |
---|
| 94 | + * stack_dump_trace[] | stack_trace_index[] |
---|
| 95 | + * ------------------ + ------------------- |
---|
| 96 | + * return addr to kernel_func_bar | 30 |
---|
| 97 | + * return addr to sys_foo | 20 |
---|
| 98 | + * return addr to entry | 10 |
---|
| 99 | + * |
---|
| 100 | + * The print_max_stack() function above, uses these values to print the size of |
---|
| 101 | + * each function's portion of the stack. |
---|
| 102 | + * |
---|
| 103 | + * for (i = 0; i < nr_entries; i++) { |
---|
| 104 | + * size = i == nr_entries - 1 ? stack_trace_index[i] : |
---|
| 105 | + * stack_trace_index[i] - stack_trace_index[i+1] |
---|
| 106 | + * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]); |
---|
| 107 | + * } |
---|
| 108 | + * |
---|
| 109 | + * The above shows |
---|
| 110 | + * |
---|
| 111 | + * depth size location |
---|
| 112 | + * ----- ---- -------- |
---|
| 113 | + * 0 30 10 kernel_func_bar |
---|
| 114 | + * 1 20 10 sys_foo |
---|
| 115 | + * 2 10 10 entry code |
---|
| 116 | + * |
---|
| 117 | + * Now for architectures that might save the return address after the functions |
---|
| 118 | + * local variables (saving the link register before calling nested functions), |
---|
| 119 | + * this will cause the stack to look a little different: |
---|
| 120 | + * |
---|
| 121 | + * [ top of stack ] |
---|
| 122 | + * 0: sys call entry frame |
---|
| 123 | + * 10: start of sys_foo_frame |
---|
| 124 | + * 19: return addr to entry code << lr saved before calling kernel_func_bar |
---|
| 125 | + * 20: start of kernel_func_bar frame |
---|
| 126 | + * 29: return addr to sys_foo_frame << lr saved before calling next function |
---|
| 127 | + * 30: [ do trace stack here ] |
---|
| 128 | + * |
---|
| 129 | + * Although the functions returned by save_stack_trace() may be the same, the |
---|
| 130 | + * placement in the stack will be different. Using the same algorithm as above |
---|
| 131 | + * would yield: |
---|
| 132 | + * |
---|
| 133 | + * stack_dump_trace[] | stack_trace_index[] |
---|
| 134 | + * ------------------ + ------------------- |
---|
| 135 | + * return addr to kernel_func_bar | 30 |
---|
| 136 | + * return addr to sys_foo | 29 |
---|
| 137 | + * return addr to entry | 19 |
---|
| 138 | + * |
---|
| 139 | + * Where the mapping is off by one: |
---|
| 140 | + * |
---|
| 141 | + * kernel_func_bar stack frame size is 29 - 19 not 30 - 29! |
---|
| 142 | + * |
---|
| 143 | + * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the |
---|
| 144 | + * values in stack_trace_index[] are shifted by one to and the number of |
---|
| 145 | + * stack trace entries is decremented by one. |
---|
| 146 | + * |
---|
| 147 | + * stack_dump_trace[] | stack_trace_index[] |
---|
| 148 | + * ------------------ + ------------------- |
---|
| 149 | + * return addr to kernel_func_bar | 29 |
---|
| 150 | + * return addr to sys_foo | 19 |
---|
| 151 | + * |
---|
| 152 | + * Although the entry function is not displayed, the first function (sys_foo) |
---|
| 153 | + * will still include the stack size of it. |
---|
75 | 154 | */ |
---|
76 | | -void __weak |
---|
77 | | -check_stack(unsigned long ip, unsigned long *stack) |
---|
| 155 | +static void check_stack(unsigned long ip, unsigned long *stack) |
---|
78 | 156 | { |
---|
79 | 157 | unsigned long this_size, flags; unsigned long *p, *top, *start; |
---|
80 | 158 | static int tracer_frame; |
---|
.. | .. |
---|
110 | 188 | |
---|
111 | 189 | stack_trace_max_size = this_size; |
---|
112 | 190 | |
---|
113 | | - stack_trace_max.nr_entries = 0; |
---|
114 | | - stack_trace_max.skip = 3; |
---|
115 | | - |
---|
116 | | - save_stack_trace(&stack_trace_max); |
---|
| 191 | + stack_trace_nr_entries = stack_trace_save(stack_dump_trace, |
---|
| 192 | + ARRAY_SIZE(stack_dump_trace) - 1, |
---|
| 193 | + 0); |
---|
117 | 194 | |
---|
118 | 195 | /* Skip over the overhead of the stack tracer itself */ |
---|
119 | | - for (i = 0; i < stack_trace_max.nr_entries; i++) { |
---|
| 196 | + for (i = 0; i < stack_trace_nr_entries; i++) { |
---|
120 | 197 | if (stack_dump_trace[i] == ip) |
---|
121 | 198 | break; |
---|
122 | 199 | } |
---|
.. | .. |
---|
125 | 202 | * Some archs may not have the passed in ip in the dump. |
---|
126 | 203 | * If that happens, we need to show everything. |
---|
127 | 204 | */ |
---|
128 | | - if (i == stack_trace_max.nr_entries) |
---|
| 205 | + if (i == stack_trace_nr_entries) |
---|
129 | 206 | i = 0; |
---|
130 | 207 | |
---|
131 | 208 | /* |
---|
.. | .. |
---|
143 | 220 | * loop will only happen once. This code only takes place |
---|
144 | 221 | * on a new max, so it is far from a fast path. |
---|
145 | 222 | */ |
---|
146 | | - while (i < stack_trace_max.nr_entries) { |
---|
| 223 | + while (i < stack_trace_nr_entries) { |
---|
147 | 224 | int found = 0; |
---|
148 | 225 | |
---|
149 | 226 | stack_trace_index[x] = this_size; |
---|
150 | 227 | p = start; |
---|
151 | 228 | |
---|
152 | | - for (; p < top && i < stack_trace_max.nr_entries; p++) { |
---|
153 | | - if (stack_dump_trace[i] == ULONG_MAX) |
---|
154 | | - break; |
---|
| 229 | + for (; p < top && i < stack_trace_nr_entries; p++) { |
---|
155 | 230 | /* |
---|
156 | 231 | * The READ_ONCE_NOCHECK is used to let KASAN know that |
---|
157 | 232 | * this is not a stack-out-of-bounds error. |
---|
.. | .. |
---|
182 | 257 | i++; |
---|
183 | 258 | } |
---|
184 | 259 | |
---|
185 | | - stack_trace_max.nr_entries = x; |
---|
186 | | - for (; x < i; x++) |
---|
187 | | - stack_dump_trace[x] = ULONG_MAX; |
---|
| 260 | +#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER |
---|
| 261 | + /* |
---|
| 262 | + * Some archs will store the link register before calling |
---|
| 263 | + * nested functions. This means the saved return address |
---|
| 264 | + * comes after the local storage, and we need to shift |
---|
| 265 | + * for that. |
---|
| 266 | + */ |
---|
| 267 | + if (x > 1) { |
---|
| 268 | + memmove(&stack_trace_index[0], &stack_trace_index[1], |
---|
| 269 | + sizeof(stack_trace_index[0]) * (x - 1)); |
---|
| 270 | + x--; |
---|
| 271 | + } |
---|
| 272 | +#endif |
---|
| 273 | + |
---|
| 274 | + stack_trace_nr_entries = x; |
---|
188 | 275 | |
---|
189 | 276 | if (task_stack_end_corrupted(current)) { |
---|
190 | | - stack_trace_print(); |
---|
| 277 | + print_max_stack(); |
---|
191 | 278 | BUG(); |
---|
192 | 279 | } |
---|
193 | 280 | |
---|
.. | .. |
---|
291 | 378 | { |
---|
292 | 379 | long n = *pos - 1; |
---|
293 | 380 | |
---|
294 | | - if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX) |
---|
| 381 | + if (n >= stack_trace_nr_entries) |
---|
295 | 382 | return NULL; |
---|
296 | 383 | |
---|
297 | 384 | m->private = (void *)n; |
---|
.. | .. |
---|
355 | 442 | seq_printf(m, " Depth Size Location" |
---|
356 | 443 | " (%d entries)\n" |
---|
357 | 444 | " ----- ---- --------\n", |
---|
358 | | - stack_trace_max.nr_entries); |
---|
| 445 | + stack_trace_nr_entries); |
---|
359 | 446 | |
---|
360 | 447 | if (!stack_tracer_enabled && !stack_trace_max_size) |
---|
361 | 448 | print_disabled(m); |
---|
.. | .. |
---|
365 | 452 | |
---|
366 | 453 | i = *(long *)v; |
---|
367 | 454 | |
---|
368 | | - if (i >= stack_trace_max.nr_entries || |
---|
369 | | - stack_dump_trace[i] == ULONG_MAX) |
---|
| 455 | + if (i >= stack_trace_nr_entries) |
---|
370 | 456 | return 0; |
---|
371 | 457 | |
---|
372 | | - if (i+1 == stack_trace_max.nr_entries || |
---|
373 | | - stack_dump_trace[i+1] == ULONG_MAX) |
---|
| 458 | + if (i + 1 == stack_trace_nr_entries) |
---|
374 | 459 | size = stack_trace_index[i]; |
---|
375 | 460 | else |
---|
376 | 461 | size = stack_trace_index[i] - stack_trace_index[i+1]; |
---|
.. | .. |
---|
391 | 476 | |
---|
392 | 477 | static int stack_trace_open(struct inode *inode, struct file *file) |
---|
393 | 478 | { |
---|
| 479 | + int ret; |
---|
| 480 | + |
---|
| 481 | + ret = security_locked_down(LOCKDOWN_TRACEFS); |
---|
| 482 | + if (ret) |
---|
| 483 | + return ret; |
---|
| 484 | + |
---|
394 | 485 | return seq_open(file, &stack_trace_seq_ops); |
---|
395 | 486 | } |
---|
396 | 487 | |
---|
.. | .. |
---|
408 | 499 | { |
---|
409 | 500 | struct ftrace_ops *ops = inode->i_private; |
---|
410 | 501 | |
---|
| 502 | + /* Checks for tracefs lockdown */ |
---|
411 | 503 | return ftrace_regex_open(ops, FTRACE_ITER_FILTER, |
---|
412 | 504 | inode, file); |
---|
413 | 505 | } |
---|
.. | .. |
---|
423 | 515 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
---|
424 | 516 | |
---|
425 | 517 | int |
---|
426 | | -stack_trace_sysctl(struct ctl_table *table, int write, |
---|
427 | | - void __user *buffer, size_t *lenp, |
---|
428 | | - loff_t *ppos) |
---|
| 518 | +stack_trace_sysctl(struct ctl_table *table, int write, void *buffer, |
---|
| 519 | + size_t *lenp, loff_t *ppos) |
---|
429 | 520 | { |
---|
| 521 | + int was_enabled; |
---|
430 | 522 | int ret; |
---|
431 | 523 | |
---|
432 | 524 | mutex_lock(&stack_sysctl_mutex); |
---|
| 525 | + was_enabled = !!stack_tracer_enabled; |
---|
433 | 526 | |
---|
434 | 527 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
---|
435 | 528 | |
---|
436 | | - if (ret || !write || |
---|
437 | | - (last_stack_tracer_enabled == !!stack_tracer_enabled)) |
---|
| 529 | + if (ret || !write || (was_enabled == !!stack_tracer_enabled)) |
---|
438 | 530 | goto out; |
---|
439 | | - |
---|
440 | | - last_stack_tracer_enabled = !!stack_tracer_enabled; |
---|
441 | 531 | |
---|
442 | 532 | if (stack_tracer_enabled) |
---|
443 | 533 | register_ftrace_function(&trace_ops); |
---|
444 | 534 | else |
---|
445 | 535 | unregister_ftrace_function(&trace_ops); |
---|
446 | | - |
---|
447 | 536 | out: |
---|
448 | 537 | mutex_unlock(&stack_sysctl_mutex); |
---|
449 | 538 | return ret; |
---|
.. | .. |
---|
453 | 542 | |
---|
454 | 543 | static __init int enable_stacktrace(char *str) |
---|
455 | 544 | { |
---|
456 | | - if (strncmp(str, "_filter=", 8) == 0) |
---|
457 | | - strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE); |
---|
| 545 | + int len; |
---|
| 546 | + |
---|
| 547 | + if ((len = str_has_prefix(str, "_filter="))) |
---|
| 548 | + strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE); |
---|
458 | 549 | |
---|
459 | 550 | stack_tracer_enabled = 1; |
---|
460 | | - last_stack_tracer_enabled = 1; |
---|
461 | 551 | return 1; |
---|
462 | 552 | } |
---|
463 | 553 | __setup("stacktrace", enable_stacktrace); |
---|
464 | 554 | |
---|
465 | 555 | static __init int stack_trace_init(void) |
---|
466 | 556 | { |
---|
467 | | - struct dentry *d_tracer; |
---|
| 557 | + int ret; |
---|
468 | 558 | |
---|
469 | | - d_tracer = tracing_init_dentry(); |
---|
470 | | - if (IS_ERR(d_tracer)) |
---|
| 559 | + ret = tracing_init_dentry(); |
---|
| 560 | + if (ret) |
---|
471 | 561 | return 0; |
---|
472 | 562 | |
---|
473 | | - trace_create_file("stack_max_size", 0644, d_tracer, |
---|
| 563 | + trace_create_file("stack_max_size", 0644, NULL, |
---|
474 | 564 | &stack_trace_max_size, &stack_max_size_fops); |
---|
475 | 565 | |
---|
476 | | - trace_create_file("stack_trace", 0444, d_tracer, |
---|
| 566 | + trace_create_file("stack_trace", 0444, NULL, |
---|
477 | 567 | NULL, &stack_trace_fops); |
---|
478 | 568 | |
---|
479 | 569 | #ifdef CONFIG_DYNAMIC_FTRACE |
---|
480 | | - trace_create_file("stack_trace_filter", 0644, d_tracer, |
---|
| 570 | + trace_create_file("stack_trace_filter", 0644, NULL, |
---|
481 | 571 | &trace_ops, &stack_trace_filter_fops); |
---|
482 | 572 | #endif |
---|
483 | 573 | |
---|