From 61598093bbdd283a7edc367d900f223070ead8d2 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 07:43:03 +0000 Subject: [PATCH] add ax88772C AX88772C_eeprom_tools --- kernel/kernel/trace/ftrace.c | 1787 ++++++++++++++++++++++++++++++++++++++--------------------- 1 files changed, 1,140 insertions(+), 647 deletions(-) diff --git a/kernel/kernel/trace/ftrace.c b/kernel/kernel/trace/ftrace.c index 35f8765..31fec92 100644 --- a/kernel/kernel/trace/ftrace.c +++ b/kernel/kernel/trace/ftrace.c @@ -18,8 +18,8 @@ #include <linux/clocksource.h> #include <linux/sched/task.h> #include <linux/kallsyms.h> +#include <linux/security.h> #include <linux/seq_file.h> -#include <linux/suspend.h> #include <linux/tracefs.h> #include <linux/hardirq.h> #include <linux/kthread.h> @@ -41,6 +41,7 @@ #include <asm/sections.h> #include <asm/setup.h> +#include "ftrace_internal.h" #include "trace_output.h" #include "trace_stat.h" @@ -61,8 +62,6 @@ }) /* hash bits for specific function selection */ -#define FTRACE_HASH_BITS 7 -#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 @@ -70,15 +69,16 @@ #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), #else #define INIT_OPS_HASH(opsname) -#define ASSIGN_OPS_HASH(opsname, val) #endif -static struct ftrace_ops ftrace_list_end __read_mostly = { +enum { + FTRACE_MODIFY_ENABLE_FL = (1 << 0), + FTRACE_MODIFY_MAY_SLEEP_FL = (1 << 1), +}; + +struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, INIT_OPS_HASH(ftrace_list_end) @@ -102,7 +102,7 @@ tr = ops->private; - return tr->function_pids != NULL; + return tr->function_pids != NULL || tr->function_no_pids != NULL; } static void ftrace_update_trampoline(struct ftrace_ops *ops); @@ -113,41 +113,20 @@ */ static int ftrace_disabled __read_mostly; -static DEFINE_MUTEX(ftrace_lock); +DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; +struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -static struct ftrace_ops global_ops; +struct ftrace_ops global_ops; #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs); #else /* See comment below, where ftrace_ops_list_func is defined */ -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); -#define ftrace_ops_list_func ftrace_ops_no_ops +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); +#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) #endif - -/* - * Traverse the ftrace_global_list, invoking all entries. The reason that we - * can use rcu_dereference_raw_notrace() is that elements removed from this list - * are simply leaked, so there is no need to interact with a grace-period - * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle - * concurrent insertions into the ftrace_global_list. - * - * Silly Alpha and silly pointer-speculation compiler optimizations! - */ -#define do_for_each_ftrace_op(op, list) \ - op = rcu_dereference_raw_notrace(list); \ - do - -/* - * Optimized for just a single item in the list (as that is the normal case). - */ -#define while_for_each_ftrace_op(op) \ - while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \ - unlikely((op) != &ftrace_list_end)) static inline void ftrace_ops_init(struct ftrace_ops *ops) { @@ -164,22 +143,18 @@ struct ftrace_ops *op, struct pt_regs *regs) { struct trace_array *tr = op->private; + int pid; - if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid)) - return; + if (tr) { + pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); + if (pid == FTRACE_PID_IGNORE) + return; + if (pid != FTRACE_PID_TRACE && + pid != current->pid) + return; + } op->saved_func(ip, parent_ip, op, regs); -} - -static void ftrace_sync(struct work_struct *work) -{ - /* - * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing - * tasks even in userspace and idle. - * - * Yes, function tracing is rude. - */ } static void ftrace_sync_ipi(void *data) @@ -187,18 +162,6 @@ /* Probably not needed, but do it anyway */ smp_rmb(); } - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void update_function_graph_func(void); - -/* Both enabled by default (can be cleared by function_graph tracer flags */ -static bool fgraph_sleep_time = true; -static bool fgraph_graph_time = true; - -#else -static inline void update_function_graph_func(void) { } -#endif - static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) { @@ -267,7 +230,7 @@ /* * For static tracing, we need to be a bit more careful. * The function change takes affect immediately. Thus, - * we need to coorditate the setting of the function_trace_ops + * we need to coordinate the setting of the function_trace_ops * with the setting of the ftrace_trace_function. * * Set the function to the list ops, which will call the @@ -279,7 +242,7 @@ * Make sure all CPUs see this. Yes this is slow, but static * tracing is slow and nasty to have enabled. */ - schedule_on_each_cpu(ftrace_sync); + synchronize_rcu_tasks_rude(); /* Now all cpus are using the list ops. */ function_trace_op = set_function_trace_op; /* Make sure the function_trace_op is visible on all CPUs */ @@ -336,7 +299,7 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops); -static int __register_ftrace_function(struct ftrace_ops *ops) +int __register_ftrace_function(struct ftrace_ops *ops) { if (ops->flags & FTRACE_OPS_FL_DELETED) return -EINVAL; @@ -357,6 +320,8 @@ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) ops->flags |= FTRACE_OPS_FL_SAVE_REGS; #endif + if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT)) + return -EBUSY; if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; @@ -377,7 +342,7 @@ return 0; } -static int __unregister_ftrace_function(struct ftrace_ops *ops) +int __unregister_ftrace_function(struct ftrace_ops *ops) { int ret; @@ -494,10 +459,10 @@ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* function graph compares on total time */ -static int function_stat_cmp(void *p1, void *p2) +static int function_stat_cmp(const void *p1, const void *p2) { - struct ftrace_profile *a = p1; - struct ftrace_profile *b = p2; + const struct ftrace_profile *a = p1; + const struct ftrace_profile *b = p2; if (a->time < b->time) return -1; @@ -508,10 +473,10 @@ } #else /* not function graph compares against hits */ -static int function_stat_cmp(void *p1, void *p2) +static int function_stat_cmp(const void *p1, const void *p2) { - struct ftrace_profile *a = p1; - struct ftrace_profile *b = p2; + const struct ftrace_profile *a = p1; + const struct ftrace_profile *b = p2; if (a->counter < b->counter) return -1; @@ -817,9 +782,16 @@ } #ifdef CONFIG_FUNCTION_GRAPH_TRACER +static bool fgraph_graph_time = true; + +void ftrace_graph_graph_time_control(bool enable) +{ + fgraph_graph_time = enable; +} + static int profile_graph_entry(struct ftrace_graph_ent *trace) { - int index = current->curr_ret_stack; + struct ftrace_ret_stack *ret_stack; function_profile_call(trace->func, 0, NULL, NULL); @@ -827,14 +799,16 @@ if (!current->ret_stack) return 0; - if (index >= 0 && index < FTRACE_RETFUNC_DEPTH) - current->ret_stack[index].subtime = 0; + ret_stack = ftrace_graph_get_ret_stack(current, 0); + if (ret_stack) + ret_stack->subtime = 0; return 1; } static void profile_graph_return(struct ftrace_graph_ret *trace) { + struct ftrace_ret_stack *ret_stack; struct ftrace_profile_stat *stat; unsigned long long calltime; struct ftrace_profile *rec; @@ -852,16 +826,15 @@ calltime = trace->rettime - trace->calltime; if (!fgraph_graph_time) { - int index; - - index = current->curr_ret_stack; /* Append this call time to the parent time to subtract */ - if (index) - current->ret_stack[index - 1].subtime += calltime; + ret_stack = ftrace_graph_get_ret_stack(current, 1); + if (ret_stack) + ret_stack->subtime += calltime; - if (current->ret_stack[index].subtime < calltime) - calltime -= current->ret_stack[index].subtime; + ret_stack = ftrace_graph_get_ret_stack(current, 0); + if (ret_stack && ret_stack->subtime < calltime) + calltime -= ret_stack->subtime; else calltime = 0; } @@ -876,15 +849,19 @@ local_irq_restore(flags); } +static struct fgraph_ops fprofiler_ops = { + .entryfunc = &profile_graph_entry, + .retfunc = &profile_graph_return, +}; + static int register_ftrace_profiler(void) { - return register_ftrace_graph(&profile_graph_return, - &profile_graph_entry); + return register_ftrace_graph(&fprofiler_ops); } static void unregister_ftrace_profiler(void) { - unregister_ftrace_graph(); + unregister_ftrace_graph(&fprofiler_ops); } #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { @@ -936,7 +913,7 @@ ftrace_profile_enabled = 0; /* * unregister_ftrace_profiler calls stop_machine - * so this acts like an synchronize_sched. + * so this acts like an synchronize_rcu. */ unregister_ftrace_profiler(); } @@ -1023,12 +1000,6 @@ } #endif /* CONFIG_FUNCTION_PROFILER */ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int ftrace_graph_active; -#else -# define ftrace_graph_active 0 -#endif - #ifdef CONFIG_DYNAMIC_FTRACE static struct ftrace_ops *removed_ops; @@ -1042,11 +1013,6 @@ #ifndef CONFIG_FTRACE_MCOUNT_RECORD # error Dynamic ftrace depends on MCOUNT_RECORD #endif - -struct ftrace_func_entry { - struct hlist_node hlist; - unsigned long ip; -}; struct ftrace_func_probe { struct ftrace_probe_ops *probe_ops; @@ -1069,7 +1035,7 @@ }; #define EMPTY_HASH ((struct ftrace_hash *)&empty_hash) -static struct ftrace_ops global_ops = { +struct ftrace_ops global_ops = { .func = ftrace_stub, .local_hash.notrace_hash = EMPTY_HASH, .local_hash.filter_hash = EMPTY_HASH, @@ -1088,7 +1054,7 @@ /* * Some of the ops may be dynamically allocated, - * they are freed after a synchronize_sched(). + * they are freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -1125,14 +1091,11 @@ struct ftrace_page *next; struct dyn_ftrace *records; int index; - int size; + int order; }; #define ENTRY_SIZE sizeof(struct dyn_ftrace) #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; @@ -1288,7 +1251,7 @@ { if (!hash || hash == EMPTY_HASH) return; - call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); + call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } void ftrace_free_filter(struct ftrace_ops *ops) @@ -1332,6 +1295,7 @@ if (!ftrace_mod) return -ENOMEM; + INIT_LIST_HEAD(&ftrace_mod->list); ftrace_mod->func = kstrdup(func, GFP_KERNEL); ftrace_mod->module = kstrdup(module, GFP_KERNEL); ftrace_mod->enable = enable; @@ -1395,28 +1359,20 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, struct ftrace_hash *new_hash); -static struct ftrace_hash * -__ftrace_hash_move(struct ftrace_hash *src) +static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size) { struct ftrace_func_entry *entry; - struct hlist_node *tn; - struct hlist_head *hhd; struct ftrace_hash *new_hash; - int size = src->count; + struct hlist_head *hhd; + struct hlist_node *tn; int bits = 0; int i; /* - * If the new source is empty, just return the empty_hash. + * Use around half the size (max bit of it), but + * a minimum of 2 is fine (as size of 0 or 1 both give 1 for bits). */ - if (ftrace_hash_empty(src)) - return EMPTY_HASH; - - /* - * Make the hash size about 1/2 the # found - */ - for (size /= 2; size; size >>= 1) - bits++; + bits = fls(size / 2); /* Don't allocate too much */ if (bits > FTRACE_HASH_MAX_BITS) @@ -1436,8 +1392,21 @@ __add_hash_entry(new_hash, entry); } } - return new_hash; +} + +static struct ftrace_hash * +__ftrace_hash_move(struct ftrace_hash *src) +{ + int size = src->count; + + /* + * If the new source is empty, just return the empty_hash. + */ + if (ftrace_hash_empty(src)) + return EMPTY_HASH; + + return dup_hash(src, size); } static int @@ -1483,7 +1452,7 @@ { /* * The function record is a match if it exists in the filter - * hash and not in the notrace hash. Note, an emty hash is + * hash and not in the notrace hash. Note, an empty hash is * considered a match for the filter hash, but an empty * notrace hash is considered not in the notrace hash. */ @@ -1503,9 +1472,9 @@ * the ip is not in the ops->notrace_hash. * * This needs to be called with preemption disabled as - * the hashes are freed with call_rcu_sched(). + * the hashes are freed with call_rcu(). */ -static int +int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { struct ftrace_ops_hash hash; @@ -1559,6 +1528,29 @@ return 0; } +static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec = NULL; + struct dyn_ftrace key; + + key.ip = start; + key.flags = end; /* overload flags, as it is unsigned long */ + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + if (pg->index == 0 || + end < pg->records[0].ip || + start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) + continue; + rec = bsearch(&key, pg->records, pg->index, + sizeof(struct dyn_ftrace), + ftrace_cmp_recs); + if (rec) + break; + } + return rec; +} + /** * ftrace_location_range - return the first address of a traced location * if it touches the given ip range @@ -1573,23 +1565,11 @@ */ unsigned long ftrace_location_range(unsigned long start, unsigned long end) { - struct ftrace_page *pg; struct dyn_ftrace *rec; - struct dyn_ftrace key; - key.ip = start; - key.flags = end; /* overload flags, as it is unsigned long */ - - for (pg = ftrace_pages_start; pg; pg = pg->next) { - if (end < pg->records[0].ip || - start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) - continue; - rec = bsearch(&key, pg->records, pg->index, - sizeof(struct dyn_ftrace), - ftrace_cmp_recs); - if (rec) - return rec->ip; - } + rec = lookup_rec(start, end); + if (rec) + return rec->ip; return 0; } @@ -1742,6 +1722,9 @@ if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX)) return false; + if (ops->flags & FTRACE_OPS_FL_DIRECT) + rec->flags |= FTRACE_FL_DIRECT; + /* * If there's only a single callback registered to a * function, and the ops has a trampoline registered @@ -1768,6 +1751,15 @@ if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0)) return false; rec->flags--; + + /* + * Only the internal direct_ops should have the + * DIRECT flag set. Thus, if it is removing a + * function, then that function should no longer + * be direct. + */ + if (ops->flags & FTRACE_OPS_FL_DIRECT) + rec->flags &= ~FTRACE_FL_DIRECT; /* * If the rec had REGS enabled and the ops that is @@ -1803,7 +1795,7 @@ count++; /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */ - update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE; + update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE; /* Shortcut, if we handled all records, we are done. */ if (!all && count == hash->count) @@ -1981,7 +1973,7 @@ char ins[MCOUNT_INSN_SIZE]; int i; - if (probe_kernel_read(ins, p, MCOUNT_INSN_SIZE)) { + if (copy_from_kernel_nofault(ins, p, MCOUNT_INSN_SIZE)) { printk(KERN_CONT "%s[FAULT] %px\n", fmt, p); return; } @@ -2025,22 +2017,22 @@ * modifying the code. @failed should be one of either: * EFAULT - if the problem happens on reading the @ip address * EINVAL - if what is read at @ip is not what was expected - * EPERM - if the problem happens on writting to the @ip address + * EPERM - if the problem happens on writing to the @ip address */ void ftrace_bug(int failed, struct dyn_ftrace *rec) { unsigned long ip = rec ? rec->ip : 0; + pr_info("------------[ ftrace bug ]------------\n"); + switch (failed) { case -EFAULT: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; case -EINVAL: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); print_ip_ins(" actual: ", (unsigned char *)ip); pr_cont("\n"); if (ftrace_expected) { @@ -2049,14 +2041,12 @@ } break; case -EPERM: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on writing "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; default: - FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on unknown error "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); } print_bug_type(); if (rec) { @@ -2081,9 +2071,11 @@ ip = ftrace_get_addr_curr(rec); pr_cont("\n expected tramp: %lx\n", ip); } + + FTRACE_WARN_ON_ONCE(1); } -static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) +static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) { unsigned long flag = 0UL; @@ -2110,15 +2102,34 @@ * If enabling and the REGS flag does not match the REGS_EN, or * the TRAMP flag doesn't match the TRAMP_EN, then do not ignore * this record. Set flags to fail the compare against ENABLED. + * Same for direct calls. */ if (flag) { - if (!(rec->flags & FTRACE_FL_REGS) != + if (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN)) flag |= FTRACE_FL_REGS; - if (!(rec->flags & FTRACE_FL_TRAMP) != + if (!(rec->flags & FTRACE_FL_TRAMP) != !(rec->flags & FTRACE_FL_TRAMP_EN)) flag |= FTRACE_FL_TRAMP; + + /* + * Direct calls are special, as count matters. + * We must test the record for direct, if the + * DIRECT and DIRECT_EN do not match, but only + * if the count is 1. That's because, if the + * count is something other than one, we do not + * want the direct enabled (it will be done via the + * direct helper). But if DIRECT_EN is set, and + * the count is not one, we need to clear it. + */ + if (ftrace_rec_count(rec) == 1) { + if (!(rec->flags & FTRACE_FL_DIRECT) != + !(rec->flags & FTRACE_FL_DIRECT_EN)) + flag |= FTRACE_FL_DIRECT; + } else if (rec->flags & FTRACE_FL_DIRECT_EN) { + flag |= FTRACE_FL_DIRECT; + } } /* If the state of this record hasn't changed, then do nothing */ @@ -2142,6 +2153,25 @@ rec->flags |= FTRACE_FL_TRAMP_EN; else rec->flags &= ~FTRACE_FL_TRAMP_EN; + } + if (flag & FTRACE_FL_DIRECT) { + /* + * If there's only one user (direct_ops helper) + * then we can call the direct function + * directly (no ftrace trampoline). + */ + if (ftrace_rec_count(rec) == 1) { + if (rec->flags & FTRACE_FL_DIRECT) + rec->flags |= FTRACE_FL_DIRECT_EN; + else + rec->flags &= ~FTRACE_FL_DIRECT_EN; + } else { + /* + * Can only call directly if there's + * only one callback to the function. + */ + rec->flags &= ~FTRACE_FL_DIRECT_EN; + } } } @@ -2172,7 +2202,7 @@ * and REGS states. The _EN flags must be disabled though. */ rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN | - FTRACE_FL_REGS_EN); + FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN); } ftrace_bug_type = FTRACE_BUG_NOP; @@ -2182,28 +2212,28 @@ /** * ftrace_update_record, set a record that now is tracing or not * @rec: the record to update - * @enable: set to 1 if the record is tracing, zero to force disable + * @enable: set to true if the record is tracing, false to force disable * * The records that represent all functions that can be traced need * to be updated when tracing has been enabled. */ -int ftrace_update_record(struct dyn_ftrace *rec, int enable) +int ftrace_update_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 1); + return ftrace_check_record(rec, enable, true); } /** * ftrace_test_record, check if the record has been enabled or not * @rec: the record to test - * @enable: set to 1 to check if enabled, 0 if it is disabled + * @enable: set to true to check if enabled, false if it is disabled * * The arch code may need to test if a record is already set to * tracing to determine how to modify the function code that it * represents. */ -int ftrace_test_record(struct dyn_ftrace *rec, int enable) +int ftrace_test_record(struct dyn_ftrace *rec, bool enable) { - return ftrace_check_record(rec, enable, 0); + return ftrace_check_record(rec, enable, false); } static struct ftrace_ops * @@ -2255,7 +2285,7 @@ if (hash_contains_ip(ip, op->func_hash)) return op; - } + } return NULL; } @@ -2345,19 +2375,77 @@ return NULL; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* Protected by rcu_tasks for reading, and direct_mutex for writing */ +static struct ftrace_hash *direct_functions = EMPTY_HASH; +static DEFINE_MUTEX(direct_mutex); +int ftrace_direct_func_count; + +/* + * Search the direct_functions hash to see if the given instruction pointer + * has a direct caller attached to it. + */ +unsigned long ftrace_find_rec_direct(unsigned long ip) +{ + struct ftrace_func_entry *entry; + + entry = __ftrace_lookup_ip(direct_functions, ip); + if (!entry) + return 0; + + return entry->direct; +} + +static void call_direct_funcs(unsigned long ip, unsigned long pip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + unsigned long addr; + + addr = ftrace_find_rec_direct(ip); + if (!addr) + return; + + arch_ftrace_set_direct_caller(regs, addr); +} + +struct ftrace_ops direct_ops = { + .func = call_direct_funcs, + .flags = FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_RECURSION_SAFE + | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS + | FTRACE_OPS_FL_PERMANENT, + /* + * By declaring the main trampoline as this trampoline + * it will never have one allocated for it. Allocated + * trampolines should not call direct functions. + * The direct_ops should only be called by the builtin + * ftrace_regs_caller trampoline. + */ + .trampoline = FTRACE_REGS_ADDR, +}; +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + /** * ftrace_get_addr_new - Get the call address to set to * @rec: The ftrace record descriptor * * If the record has the FTRACE_FL_REGS set, that means that it * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS - * is not not set, then it wants to convert to the normal callback. + * is not set, then it wants to convert to the normal callback. * * Returns the address of the trampoline to set to */ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) { struct ftrace_ops *ops; + unsigned long addr; + + if ((rec->flags & FTRACE_FL_DIRECT) && + (ftrace_rec_count(rec) == 1)) { + addr = ftrace_find_rec_direct(rec->ip); + if (addr) + return addr; + WARN_ON_ONCE(1); + } /* Trampolines take precedence over regs */ if (rec->flags & FTRACE_FL_TRAMP) { @@ -2390,6 +2478,15 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) { struct ftrace_ops *ops; + unsigned long addr; + + /* Direct calls take precedence over trampolines */ + if (rec->flags & FTRACE_FL_DIRECT_EN) { + addr = ftrace_find_rec_direct(rec->ip); + if (addr) + return addr; + WARN_ON_ONCE(1); + } /* Trampolines take precedence over regs */ if (rec->flags & FTRACE_FL_TRAMP_EN) { @@ -2410,7 +2507,7 @@ } static int -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, bool enable) { unsigned long ftrace_old_addr; unsigned long ftrace_addr; @@ -2442,13 +2539,15 @@ return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } - return -1; /* unknow ftrace bug */ + return -1; /* unknown ftrace bug */ } -void __weak ftrace_replace_code(int enable) +void __weak ftrace_replace_code(int mod_flags) { struct dyn_ftrace *rec; struct ftrace_page *pg; + bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL; + int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL; int failed; if (unlikely(ftrace_disabled)) @@ -2465,6 +2564,8 @@ /* Stop processing */ return; } + if (schedulable) + cond_resched(); } while_for_each_ftrace_rec(); } @@ -2541,14 +2642,14 @@ } static int -ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) +ftrace_nop_initialize(struct module *mod, struct dyn_ftrace *rec) { int ret; if (unlikely(ftrace_disabled)) return 0; - ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ret = ftrace_init_nop(mod, rec); if (ret) { ftrace_bug_type = FTRACE_BUG_INIT; ftrace_bug(ret, rec); @@ -2578,7 +2679,11 @@ void ftrace_modify_all_code(int command) { int update = command & FTRACE_UPDATE_TRACE_FUNC; + int mod_flags = 0; int err = 0; + + if (command & FTRACE_MAY_SLEEP) + mod_flags = FTRACE_MODIFY_MAY_SLEEP_FL; /* * If the ftrace_caller calls a ftrace_ops func directly, @@ -2597,9 +2702,9 @@ } if (command & FTRACE_UPDATE_CALLS) - ftrace_replace_code(1); + ftrace_replace_code(mod_flags | FTRACE_MODIFY_ENABLE_FL); else if (command & FTRACE_DISABLE_CALLS) - ftrace_replace_code(0); + ftrace_replace_code(mod_flags); if (update && ftrace_trace_function != ftrace_ops_list_func) { function_trace_op = set_function_trace_op; @@ -2692,6 +2797,51 @@ { } +/* List of trace_ops that have allocated trampolines */ +static LIST_HEAD(ftrace_ops_trampoline_list); + +static void ftrace_add_trampoline_to_kallsyms(struct ftrace_ops *ops) +{ + lockdep_assert_held(&ftrace_lock); + list_add_rcu(&ops->list, &ftrace_ops_trampoline_list); +} + +static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops) +{ + lockdep_assert_held(&ftrace_lock); + list_del_rcu(&ops->list); + synchronize_rcu(); +} + +/* + * "__builtin__ftrace" is used as a module name in /proc/kallsyms for symbols + * for pages allocated for ftrace purposes, even though "__builtin__ftrace" is + * not a module. + */ +#define FTRACE_TRAMPOLINE_MOD "__builtin__ftrace" +#define FTRACE_TRAMPOLINE_SYM "ftrace_trampoline" + +static void ftrace_trampoline_free(struct ftrace_ops *ops) +{ + if (ops && (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP) && + ops->trampoline) { + /* + * Record the text poke event before the ksymbol unregister + * event. + */ + perf_event_text_poke((void *)ops->trampoline, + (void *)ops->trampoline, + ops->trampoline_size, NULL, 0); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, + ops->trampoline, ops->trampoline_size, + true, FTRACE_TRAMPOLINE_SYM); + /* Remove from kallsyms after the perf events */ + ftrace_remove_trampoline_from_kallsyms(ops); + } + + arch_ftrace_trampoline_free(ops); +} + static void ftrace_startup_enable(int command) { if (saved_ftrace_func != ftrace_trace_function) { @@ -2712,7 +2862,7 @@ update_all_ops = false; } -static int ftrace_startup(struct ftrace_ops *ops, int command) +int ftrace_startup(struct ftrace_ops *ops, int command) { int ret; @@ -2741,6 +2891,8 @@ __unregister_ftrace_function(ops); ftrace_start_up--; ops->flags &= ~FTRACE_OPS_FL_ENABLED; + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) + ftrace_trampoline_free(ops); return ret; } @@ -2749,12 +2901,22 @@ ftrace_startup_enable(command); + /* + * If ftrace is in an undefined state, we just remove ops from list + * to prevent the NULL pointer, instead of totally rolling it back and + * free trampoline, because those actions could cause further damage. + */ + if (unlikely(ftrace_disabled)) { + __unregister_ftrace_function(ops); + return -ENODEV; + } + ops->flags &= ~FTRACE_OPS_FL_ADDING; return 0; } -static int ftrace_shutdown(struct ftrace_ops *ops, int command) +int ftrace_shutdown(struct ftrace_ops *ops, int command) { int ret; @@ -2786,18 +2948,8 @@ command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) { - /* - * If these are dynamic or per_cpu ops, they still - * need their data freed. Since, function tracing is - * not currently active, we can just free them - * without synchronizing all CPUs. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - goto free_ops; - - return 0; - } + if (!command || !ftrace_enabled) + goto out; /* * If the ops uses a trampoline, then it needs to be @@ -2834,6 +2986,7 @@ removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; +out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. @@ -2849,20 +3002,19 @@ * infrastructure to do the synchronization, thus we must do it * ourselves. */ - schedule_on_each_cpu(ftrace_sync); + synchronize_rcu_tasks_rude(); /* - * When the kernel is preeptive, tasks can be preempted + * When the kernel is preemptive, tasks can be preempted * while on a ftrace trampoline. Just scheduling a task on * a CPU is not good enough to flush them. Calling * synchornize_rcu_tasks() will wait for those tasks to * execute and either schedule voluntarily or enter user space. */ - if (IS_ENABLED(CONFIG_PREEMPT)) + if (IS_ENABLED(CONFIG_PREEMPTION)) synchronize_rcu_tasks(); - free_ops: - arch_ftrace_trampoline_free(ops); + ftrace_trampoline_free(ops); } return 0; @@ -2904,6 +3056,8 @@ static u64 ftrace_update_time; unsigned long ftrace_update_tot_cnt; +unsigned long ftrace_number_of_pages; +unsigned long ftrace_number_of_groups; static inline int ops_traces_mod(struct ftrace_ops *ops) { @@ -2986,7 +3140,7 @@ * to the NOP instructions. */ if (!__is_defined(CC_USING_NOP_MCOUNT) && - !ftrace_code_disable(mod, p)) + !ftrace_nop_initialize(mod, p)) break; update_cnt++; @@ -3003,18 +3157,20 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) { int order; + int pages; int cnt; if (WARN_ON(!count)) return -EINVAL; - order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); + pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE); + order = get_count_order(pages); /* * We want to fill as much as possible. No more than a page * may be empty. */ - while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) + if (!is_power_of_2(pages)) order--; again: @@ -3024,12 +3180,15 @@ /* if we can't allocate this size, try something smaller */ if (!order) return -ENOMEM; - order >>= 1; + order--; goto again; } + ftrace_number_of_pages += 1 << order; + ftrace_number_of_groups++; + cnt = (PAGE_SIZE << order) / ENTRY_SIZE; - pg->size = cnt; + pg->order = order; if (cnt > count) cnt = count; @@ -3037,16 +3196,31 @@ return cnt; } +static void ftrace_free_pages(struct ftrace_page *pages) +{ + struct ftrace_page *pg = pages; + + while (pg) { + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } + pages = pg->next; + kfree(pg); + pg = pages; + ftrace_number_of_groups--; + } +} + static struct ftrace_page * ftrace_allocate_pages(unsigned long num_to_init) { struct ftrace_page *start_pg; struct ftrace_page *pg; - int order; int cnt; if (!num_to_init) - return 0; + return NULL; start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); if (!pg) @@ -3076,14 +3250,7 @@ return start_pg; free_pages: - pg = start_pg; - while (pg) { - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - free_pages((unsigned long)pg->records, order); - start_pg = pg->next; - kfree(pg); - pg = start_pg; - } + ftrace_free_pages(start_pg); pr_info("ftrace: FAILED to allocate memory for functions\n"); return NULL; } @@ -3493,10 +3660,11 @@ if (iter->flags & FTRACE_ITER_ENABLED) { struct ftrace_ops *ops; - seq_printf(m, " (%ld)%s%s", + seq_printf(m, " (%ld)%s%s%s", ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : " ", - rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); + rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ", + rec->flags & FTRACE_FL_DIRECT ? " D" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { @@ -3512,7 +3680,14 @@ } else { add_trampoline_func(m, NULL, rec); } - } + if (rec->flags & FTRACE_FL_DIRECT) { + unsigned long direct; + + direct = ftrace_find_rec_direct(rec->ip); + if (direct) + seq_printf(m, "\n\tdirect-->%pS", (void *)direct); + } + } seq_putc(m, '\n'); @@ -3530,6 +3705,11 @@ ftrace_avail_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; if (unlikely(ftrace_disabled)) return -ENODEV; @@ -3548,6 +3728,15 @@ ftrace_enabled_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; + + /* + * This shows us what functions are currently being + * traced and by what. Not sure if we want lockdown + * to hide such critical information for an admin. + * Although, perhaps it can show information we don't + * want people to see, but if something is tracing + * something, we probably want to know about it. + */ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); if (!iter) @@ -3591,7 +3780,7 @@ if (unlikely(ftrace_disabled)) return -ENODEV; - if (tr && trace_array_get(tr) < 0) + if (tracing_check_open_get_tr(tr)) return -ENODEV; iter = kzalloc(sizeof(*iter), GFP_KERNEL); @@ -3669,6 +3858,7 @@ { struct ftrace_ops *ops = inode->i_private; + /* Checks for tracefs lockdown */ return ftrace_regex_open(ops, FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES, inode, file); @@ -3679,6 +3869,7 @@ { struct ftrace_ops *ops = inode->i_private; + /* Checks for tracefs lockdown */ return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE, inode, file); } @@ -3759,6 +3950,31 @@ } static int +add_rec_by_index(struct ftrace_hash *hash, struct ftrace_glob *func_g, + int clear_filter) +{ + long index = simple_strtoul(func_g->search, NULL, 0); + struct ftrace_page *pg; + struct dyn_ftrace *rec; + + /* The index starts at 1 */ + if (--index < 0) + return 0; + + do_for_each_ftrace_rec(pg, rec) { + if (pg->index <= index) { + index -= pg->index; + /* this is a double loop, break goes to the next page */ + break; + } + rec = &pg->records[index]; + enter_record(hash, rec, clear_filter); + return 1; + } while_for_each_ftrace_rec(); + return 0; +} + +static int ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g, struct ftrace_glob *mod_g, int exclude_mod) { @@ -3825,6 +4041,11 @@ if (unlikely(ftrace_disabled)) goto out_unlock; + + if (func_g.type == MATCH_INDEX) { + found = add_rec_by_index(hash, &func_g, clear_filter); + goto out_unlock; + } do_for_each_ftrace_rec(pg, rec) { @@ -3906,7 +4127,7 @@ static bool module_exists(const char *module) { /* All modules have the symbol __this_module */ - const char this_mod[] = "__this_module"; + static const char this_mod[] = "__this_module"; char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; unsigned long val; int n; @@ -4183,7 +4404,7 @@ * @ip: The instruction pointer address to map @data to * @data: The data to map to @ip * - * Returns 0 on succes otherwise an error. + * Returns 0 on success otherwise an error. */ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, unsigned long ip, void *data) @@ -4213,7 +4434,7 @@ * @ip: The instruction pointer address to remove the data from * * Returns the data if it is found, otherwise NULL. - * Note, if the data pointer is used as the data itself, (see + * Note, if the data pointer is used as the data itself, (see * ftrace_func_mapper_find_ip(), then the return value may be meaningless, * if the data pointer was set to zero. */ @@ -4351,7 +4572,7 @@ /* * Note, there's a small window here that the func_hash->filter_hash - * may be NULL or empty. Need to be carefule when reading the loop. + * may be NULL or empty. Need to be careful when reading the loop. */ mutex_lock(&probe->ops.func_hash->regex_lock); @@ -4552,7 +4773,7 @@ if (ftrace_enabled && !ftrace_hash_empty(hash)) ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); - synchronize_sched(); + synchronize_rcu(); hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); @@ -4794,8 +5015,404 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, int reset, int enable) { - return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); + return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable); } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +struct ftrace_direct_func { + struct list_head next; + unsigned long addr; + int count; +}; + +static LIST_HEAD(ftrace_direct_funcs); + +/** + * ftrace_find_direct_func - test an address if it is a registered direct caller + * @addr: The address of a registered direct caller + * + * This searches to see if a ftrace direct caller has been registered + * at a specific address, and if so, it returns a descriptor for it. + * + * This can be used by architecture code to see if an address is + * a direct caller (trampoline) attached to a fentry/mcount location. + * This is useful for the function_graph tracer, as it may need to + * do adjustments if it traced a location that also has a direct + * trampoline attached to it. + */ +struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) +{ + struct ftrace_direct_func *entry; + bool found = false; + + /* May be called by fgraph trampoline (protected by rcu tasks) */ + list_for_each_entry_rcu(entry, &ftrace_direct_funcs, next) { + if (entry->addr == addr) { + found = true; + break; + } + } + if (found) + return entry; + + return NULL; +} + +static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) +{ + struct ftrace_direct_func *direct; + + direct = kmalloc(sizeof(*direct), GFP_KERNEL); + if (!direct) + return NULL; + direct->addr = addr; + direct->count = 0; + list_add_rcu(&direct->next, &ftrace_direct_funcs); + ftrace_direct_func_count++; + return direct; +} + +/** + * register_ftrace_direct - Call a custom trampoline directly + * @ip: The address of the nop at the beginning of a function + * @addr: The address of the trampoline to call at @ip + * + * This is used to connect a direct call from the nop location (@ip) + * at the start of ftrace traced functions. The location that it calls + * (@addr) must be able to handle a direct call, and save the parameters + * of the function being traced, and restore them (or inject new ones + * if needed), before returning. + * + * Returns: + * 0 on success + * -EBUSY - Another direct function is already attached (there can be only one) + * -ENODEV - @ip does not point to a ftrace nop location (or not supported) + * -ENOMEM - There was an allocation failure. + */ +int register_ftrace_direct(unsigned long ip, unsigned long addr) +{ + struct ftrace_direct_func *direct; + struct ftrace_func_entry *entry; + struct ftrace_hash *free_hash = NULL; + struct dyn_ftrace *rec; + int ret = -EBUSY; + + mutex_lock(&direct_mutex); + + /* See if there's a direct function at @ip already */ + if (ftrace_find_rec_direct(ip)) + goto out_unlock; + + ret = -ENODEV; + rec = lookup_rec(ip, ip); + if (!rec) + goto out_unlock; + + /* + * Check if the rec says it has a direct call but we didn't + * find one earlier? + */ + if (WARN_ON(rec->flags & FTRACE_FL_DIRECT)) + goto out_unlock; + + /* Make sure the ip points to the exact record */ + if (ip != rec->ip) { + ip = rec->ip; + /* Need to check this ip for a direct. */ + if (ftrace_find_rec_direct(ip)) + goto out_unlock; + } + + ret = -ENOMEM; + if (ftrace_hash_empty(direct_functions) || + direct_functions->count > 2 * (1 << direct_functions->size_bits)) { + struct ftrace_hash *new_hash; + int size = ftrace_hash_empty(direct_functions) ? 0 : + direct_functions->count + 1; + + if (size < 32) + size = 32; + + new_hash = dup_hash(direct_functions, size); + if (!new_hash) + goto out_unlock; + + free_hash = direct_functions; + direct_functions = new_hash; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out_unlock; + + direct = ftrace_find_direct_func(addr); + if (!direct) { + direct = ftrace_alloc_direct_func(addr); + if (!direct) { + kfree(entry); + goto out_unlock; + } + } + + entry->ip = ip; + entry->direct = addr; + __add_hash_entry(direct_functions, entry); + + ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); + + if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { + ret = register_ftrace_function(&direct_ops); + if (ret) + ftrace_set_filter_ip(&direct_ops, ip, 1, 0); + } + + if (ret) { + remove_hash_entry(direct_functions, entry); + kfree(entry); + if (!direct->count) { + list_del_rcu(&direct->next); + synchronize_rcu_tasks(); + kfree(direct); + if (free_hash) + free_ftrace_hash(free_hash); + free_hash = NULL; + ftrace_direct_func_count--; + } + } else { + direct->count++; + } + out_unlock: + mutex_unlock(&direct_mutex); + + if (free_hash) { + synchronize_rcu_tasks(); + free_ftrace_hash(free_hash); + } + + return ret; +} +EXPORT_SYMBOL_GPL(register_ftrace_direct); + +static struct ftrace_func_entry *find_direct_entry(unsigned long *ip, + struct dyn_ftrace **recp) +{ + struct ftrace_func_entry *entry; + struct dyn_ftrace *rec; + + rec = lookup_rec(*ip, *ip); + if (!rec) + return NULL; + + entry = __ftrace_lookup_ip(direct_functions, rec->ip); + if (!entry) { + WARN_ON(rec->flags & FTRACE_FL_DIRECT); + return NULL; + } + + WARN_ON(!(rec->flags & FTRACE_FL_DIRECT)); + + /* Passed in ip just needs to be on the call site */ + *ip = rec->ip; + + if (recp) + *recp = rec; + + return entry; +} + +int unregister_ftrace_direct(unsigned long ip, unsigned long addr) +{ + struct ftrace_direct_func *direct; + struct ftrace_func_entry *entry; + int ret = -ENODEV; + + mutex_lock(&direct_mutex); + + entry = find_direct_entry(&ip, NULL); + if (!entry) + goto out_unlock; + + if (direct_functions->count == 1) + unregister_ftrace_function(&direct_ops); + + ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0); + + WARN_ON(ret); + + remove_hash_entry(direct_functions, entry); + + direct = ftrace_find_direct_func(addr); + if (!WARN_ON(!direct)) { + /* This is the good path (see the ! before WARN) */ + direct->count--; + WARN_ON(direct->count < 0); + if (!direct->count) { + list_del_rcu(&direct->next); + synchronize_rcu_tasks(); + kfree(direct); + kfree(entry); + ftrace_direct_func_count--; + } + } + out_unlock: + mutex_unlock(&direct_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(unregister_ftrace_direct); + +static struct ftrace_ops stub_ops = { + .func = ftrace_stub, +}; + +/** + * ftrace_modify_direct_caller - modify ftrace nop directly + * @entry: The ftrace hash entry of the direct helper for @rec + * @rec: The record representing the function site to patch + * @old_addr: The location that the site at @rec->ip currently calls + * @new_addr: The location that the site at @rec->ip should call + * + * An architecture may overwrite this function to optimize the + * changing of the direct callback on an ftrace nop location. + * This is called with the ftrace_lock mutex held, and no other + * ftrace callbacks are on the associated record (@rec). Thus, + * it is safe to modify the ftrace record, where it should be + * currently calling @old_addr directly, to call @new_addr. + * + * Safety checks should be made to make sure that the code at + * @rec->ip is currently calling @old_addr. And this must + * also update entry->direct to @new_addr. + */ +int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, + struct dyn_ftrace *rec, + unsigned long old_addr, + unsigned long new_addr) +{ + unsigned long ip = rec->ip; + int ret; + + /* + * The ftrace_lock was used to determine if the record + * had more than one registered user to it. If it did, + * we needed to prevent that from changing to do the quick + * switch. But if it did not (only a direct caller was attached) + * then this function is called. But this function can deal + * with attached callers to the rec that we care about, and + * since this function uses standard ftrace calls that take + * the ftrace_lock mutex, we need to release it. + */ + mutex_unlock(&ftrace_lock); + + /* + * By setting a stub function at the same address, we force + * the code to call the iterator and the direct_ops helper. + * This means that @ip does not call the direct call, and + * we can simply modify it. + */ + ret = ftrace_set_filter_ip(&stub_ops, ip, 0, 0); + if (ret) + goto out_lock; + + ret = register_ftrace_function(&stub_ops); + if (ret) { + ftrace_set_filter_ip(&stub_ops, ip, 1, 0); + goto out_lock; + } + + entry->direct = new_addr; + + /* + * By removing the stub, we put back the direct call, calling + * the @new_addr. + */ + unregister_ftrace_function(&stub_ops); + ftrace_set_filter_ip(&stub_ops, ip, 1, 0); + + out_lock: + mutex_lock(&ftrace_lock); + + return ret; +} + +/** + * modify_ftrace_direct - Modify an existing direct call to call something else + * @ip: The instruction pointer to modify + * @old_addr: The address that the current @ip calls directly + * @new_addr: The address that the @ip should call + * + * This modifies a ftrace direct caller at an instruction pointer without + * having to disable it first. The direct call will switch over to the + * @new_addr without missing anything. + * + * Returns: zero on success. Non zero on error, which includes: + * -ENODEV : the @ip given has no direct caller attached + * -EINVAL : the @old_addr does not match the current direct caller + */ +int modify_ftrace_direct(unsigned long ip, + unsigned long old_addr, unsigned long new_addr) +{ + struct ftrace_direct_func *direct, *new_direct = NULL; + struct ftrace_func_entry *entry; + struct dyn_ftrace *rec; + int ret = -ENODEV; + + mutex_lock(&direct_mutex); + + mutex_lock(&ftrace_lock); + entry = find_direct_entry(&ip, &rec); + if (!entry) + goto out_unlock; + + ret = -EINVAL; + if (entry->direct != old_addr) + goto out_unlock; + + direct = ftrace_find_direct_func(old_addr); + if (WARN_ON(!direct)) + goto out_unlock; + if (direct->count > 1) { + ret = -ENOMEM; + new_direct = ftrace_alloc_direct_func(new_addr); + if (!new_direct) + goto out_unlock; + direct->count--; + new_direct->count++; + } else { + direct->addr = new_addr; + } + + /* + * If there's no other ftrace callback on the rec->ip location, + * then it can be changed directly by the architecture. + * If there is another caller, then we just need to change the + * direct caller helper to point to @new_addr. + */ + if (ftrace_rec_count(rec) == 1) { + ret = ftrace_modify_direct_caller(entry, rec, old_addr, new_addr); + } else { + entry->direct = new_addr; + ret = 0; + } + + if (ret) { + direct->addr = old_addr; + if (unlikely(new_direct)) { + direct->count++; + list_del_rcu(&new_direct->next); + synchronize_rcu_tasks(); + kfree(new_direct); + ftrace_direct_func_count--; + } + } + + out_unlock: + mutex_unlock(&ftrace_lock); + mutex_unlock(&direct_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(modify_ftrace_direct); +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /** * ftrace_set_filter_ip - set a function to filter on in ftrace by address @@ -4967,7 +5584,7 @@ struct ftrace_hash *hash; hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); - if (WARN_ON(!hash)) + if (MEM_FAIL(!hash, "Failed to allocate hash\n")) return; while (buf) { @@ -5045,8 +5662,12 @@ if (filter_hash) { orig_hash = &iter->ops->func_hash->filter_hash; - if (iter->tr && !list_empty(&iter->tr->mod_trace)) - iter->hash->flags |= FTRACE_HASH_FL_MOD; + if (iter->tr) { + if (list_empty(&iter->tr->mod_trace)) + iter->hash->flags &= ~FTRACE_HASH_FL_MOD; + else + iter->hash->flags |= FTRACE_HASH_FL_MOD; + } } else orig_hash = &iter->ops->func_hash->notrace_hash; @@ -5220,8 +5841,12 @@ __ftrace_graph_open(struct inode *inode, struct file *file, struct ftrace_graph_data *fgd) { - int ret = 0; + int ret; struct ftrace_hash *new_hash = NULL; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; if (file->f_mode & FMODE_WRITE) { const int size_bits = FTRACE_HASH_DEFAULT_BITS; @@ -5382,7 +6007,7 @@ * infrastructure to do the synchronization, thus we must do it * ourselves. */ - schedule_on_each_cpu(ftrace_sync); + synchronize_rcu_tasks_rude(); free_ftrace_hash(old_hash); } @@ -5514,7 +6139,7 @@ /* * The name "destroy_filter_files" is really a misnomer. Although - * in the future, it may actualy delete the files, but this is + * in the future, it may actually delete the files, but this is * really intended to make sure the ops passed in are disabled * and that when this function returns, the caller is free to * free the ops. @@ -5567,13 +6192,15 @@ return 0; } -static int __norecordmcount ftrace_process_locs(struct module *mod, - unsigned long *start, - unsigned long *end) +static int ftrace_process_locs(struct module *mod, + unsigned long *start, + unsigned long *end) { + struct ftrace_page *pg_unuse = NULL; struct ftrace_page *start_pg; struct ftrace_page *pg; struct dyn_ftrace *rec; + unsigned long skipped = 0; unsigned long count; unsigned long *p; unsigned long addr; @@ -5619,6 +6246,7 @@ p = start; pg = start_pg; while (p < end) { + unsigned long end_offset; addr = ftrace_call_adjust(*p++); /* * Some architecture linkers will pad between @@ -5626,10 +6254,13 @@ * object files to satisfy alignments. * Skip any NULL pointers. */ - if (!addr) + if (!addr) { + skipped++; continue; + } - if (pg->index == pg->size) { + end_offset = (pg->index+1) * sizeof(pg->records[0]); + if (end_offset > PAGE_SIZE << pg->order) { /* We should have allocated enough */ if (WARN_ON(!pg->next)) break; @@ -5640,8 +6271,10 @@ rec->ip = addr; } - /* We should have used all pages */ - WARN_ON(pg->next); + if (pg->next) { + pg_unuse = pg->next; + pg->next = NULL; + } /* Assign the last page to ftrace_pages */ ftrace_pages = pg; @@ -5663,6 +6296,11 @@ out: mutex_unlock(&ftrace_lock); + /* We should have used all pages unless we skipped some */ + if (pg_unuse) { + WARN_ON(!skipped); + ftrace_free_pages(pg_unuse); + } return ret; } @@ -5683,6 +6321,27 @@ unsigned int num_funcs; }; +static int ftrace_get_trampoline_kallsym(unsigned int symnum, + unsigned long *value, char *type, + char *name, char *module_name, + int *exported) +{ + struct ftrace_ops *op; + + list_for_each_entry_rcu(op, &ftrace_ops_trampoline_list, list) { + if (!op->trampoline || symnum--) + continue; + *value = op->trampoline; + *type = 't'; + strlcpy(name, FTRACE_TRAMPOLINE_SYM, KSYM_NAME_LEN); + strlcpy(module_name, FTRACE_TRAMPOLINE_MOD, MODULE_NAME_LEN); + *exported = 0; + return 0; + } + + return -ERANGE; +} + #ifdef CONFIG_MODULES #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) @@ -5696,9 +6355,17 @@ for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { if (ops_references_rec(ops, rec)) { + if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + continue; cnt++; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) rec->flags |= FTRACE_FL_REGS; + if (cnt == 1 && ops->trampoline) + rec->flags |= FTRACE_FL_TRAMP; + else + rec->flags &= ~FTRACE_FL_TRAMP; } } @@ -5769,7 +6436,6 @@ struct ftrace_page **last_pg; struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; - int order; mutex_lock(&ftrace_lock); @@ -5779,7 +6445,7 @@ list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { if (mod_map->mod == mod) { list_del_rcu(&mod_map->list); - call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); + call_rcu(&mod_map->rcu, ftrace_free_mod_map); break; } } @@ -5820,10 +6486,13 @@ /* Needs to be called outside of ftrace_lock */ clear_mod_from_hashes(pg); - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - free_pages((unsigned long)pg->records, order); + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } tmp_page = pg->next; kfree(pg); + ftrace_number_of_groups--; } } @@ -5840,7 +6509,7 @@ /* * If the tracing is enabled, go ahead and enable the record. * - * The reason not to enable the record immediatelly is the + * The reason not to enable the record immediately is the * inherent check of ftrace_make_nop/ftrace_make_call for * correct previous instructions. Making first the NOP * conversion puts the module to the correct state, thus @@ -5999,7 +6668,7 @@ struct ftrace_mod_map *mod_map; const char *ret = NULL; - /* mod_map is freed via call_rcu_sched() */ + /* mod_map is freed via call_rcu() */ preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); @@ -6020,6 +6689,7 @@ { struct ftrace_mod_map *mod_map; struct ftrace_mod_func *mod_func; + int ret; preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { @@ -6046,8 +6716,10 @@ WARN_ON(1); break; } + ret = ftrace_get_trampoline_kallsym(symnum, value, type, name, + module_name, exported); preempt_enable(); - return -ERANGE; + return ret; } #else @@ -6058,6 +6730,18 @@ unsigned long start, unsigned long end) { return NULL; +} +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, char *module_name, + int *exported) +{ + int ret; + + preempt_disable(); + ret = ftrace_get_trampoline_kallsym(symnum, value, type, name, + module_name, exported); + preempt_enable(); + return ret; } #endif /* CONFIG_MODULES */ @@ -6072,11 +6756,7 @@ { struct ftrace_func_entry *entry; - if (ftrace_hash_empty(hash)) - return; - - entry = __ftrace_lookup_ip(hash, func->ip); - + entry = ftrace_lookup_ip(hash, func->ip); /* * Do not allow this rec to match again. * Yeah, it may waste some memory, but will be removed @@ -6110,7 +6790,7 @@ func = kmalloc(sizeof(*func), GFP_KERNEL); if (!func) { - WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); + MEM_FAIL(1, "alloc failure, ftrace filter could be stale\n"); return; } @@ -6129,7 +6809,6 @@ struct ftrace_mod_map *mod_map = NULL; struct ftrace_init_func *func, *func_next; struct list_head clear_hash; - int order; INIT_LIST_HEAD(&clear_hash); @@ -6167,8 +6846,11 @@ ftrace_update_tot_cnt--; if (!pg->index) { *last_pg = pg->next; - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - free_pages((unsigned long)pg->records, order); + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } + ftrace_number_of_groups--; kfree(pg); pg = container_of(last_pg, struct ftrace_page, next); if (!(*last_pg)) @@ -6216,13 +6898,16 @@ } pr_info("ftrace: allocating %ld entries in %ld pages\n", - count, count / ENTRIES_PER_PAGE + 1); + count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); + + pr_info("ftrace: allocated %ld pages with %ld groups\n", + ftrace_number_of_pages, ftrace_number_of_groups); set_ftrace_early_filters(); @@ -6238,7 +6923,24 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) { + unsigned long trampoline = ops->trampoline; + arch_ftrace_update_trampoline(ops); + if (ops->trampoline && ops->trampoline != trampoline && + (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) { + /* Add to kallsyms before the perf events */ + ftrace_add_trampoline_to_kallsyms(ops); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, + ops->trampoline, ops->trampoline_size, false, + FTRACE_TRAMPOLINE_SYM); + /* + * Record the perf text poke event after the ksymbol register + * event. + */ + perf_event_text_poke((void *)ops->trampoline, NULL, 0, + (void *)ops->trampoline, + ops->trampoline_size); + } } void ftrace_init_trace_array(struct trace_array *tr) @@ -6249,7 +6951,7 @@ } #else -static struct ftrace_ops global_ops = { +struct ftrace_ops global_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED | @@ -6266,30 +6968,9 @@ static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } static inline void ftrace_startup_all(int command) { } -/* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(ops, command) \ - ({ \ - int ___ret = __register_ftrace_function(ops); \ - if (!___ret) \ - (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ - ___ret; \ - }) -# define ftrace_shutdown(ops, command) \ - ({ \ - int ___ret = __unregister_ftrace_function(ops); \ - if (!___ret) \ - (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \ - ___ret; \ - }) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) - -static inline int -ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) -{ - return 1; -} static void ftrace_update_trampoline(struct ftrace_ops *ops) { @@ -6334,11 +7015,14 @@ /* * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). + * they must be freed after a synchronize_rcu(). */ preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { + /* Stub functions don't need to be called nor tested */ + if (op->flags & FTRACE_OPS_FL_STUB) + continue; /* * Check the following for each ops before calling their func: * if RCU flag is set, then rcu_is_watching() must be true @@ -6383,8 +7067,7 @@ } NOKPROBE_SYMBOL(ftrace_ops_list_func); #else -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } @@ -6445,11 +7128,17 @@ { struct trace_array *tr = data; struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; pid_list = rcu_dereference_sched(tr->function_pids); + no_pid_list = rcu_dereference_sched(tr->function_no_pids); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, - trace_ignore_this_task(pid_list, next)); + if (trace_ignore_this_task(pid_list, no_pid_list, next)) + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + FTRACE_PID_IGNORE); + else + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + next->pid); } static void @@ -6462,6 +7151,9 @@ pid_list = rcu_dereference_sched(tr->function_pids); trace_filter_add_remove_task(pid_list, self, task); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + trace_filter_add_remove_task(pid_list, self, task); } static void @@ -6471,6 +7163,9 @@ struct trace_array *tr = data; pid_list = rcu_dereference_sched(tr->function_pids); + trace_filter_add_remove_task(pid_list, NULL, task); + + pid_list = rcu_dereference_sched(tr->function_no_pids); trace_filter_add_remove_task(pid_list, NULL, task); } @@ -6489,42 +7184,57 @@ } } -static void clear_ftrace_pids(struct trace_array *tr) +static void clear_ftrace_pids(struct trace_array *tr, int type) { struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; int cpu; pid_list = rcu_dereference_protected(tr->function_pids, lockdep_is_held(&ftrace_lock)); - if (!pid_list) + no_pid_list = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + + /* Make sure there's something to do */ + if (!pid_type_enabled(type, pid_list, no_pid_list)) return; - unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); + /* See if the pids still need to be checked after this */ + if (!still_need_pid_events(type, pid_list, no_pid_list)) { + unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); + for_each_possible_cpu(cpu) + per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = FTRACE_PID_TRACE; + } - for_each_possible_cpu(cpu) - per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false; + if (type & TRACE_PIDS) + rcu_assign_pointer(tr->function_pids, NULL); - rcu_assign_pointer(tr->function_pids, NULL); + if (type & TRACE_NO_PIDS) + rcu_assign_pointer(tr->function_no_pids, NULL); /* Wait till all users are no longer using pid filtering */ - synchronize_sched(); + synchronize_rcu(); - trace_free_pid_list(pid_list); + if ((type & TRACE_PIDS) && pid_list) + trace_free_pid_list(pid_list); + + if ((type & TRACE_NO_PIDS) && no_pid_list) + trace_free_pid_list(no_pid_list); } void ftrace_clear_pids(struct trace_array *tr) { mutex_lock(&ftrace_lock); - clear_ftrace_pids(tr); + clear_ftrace_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); mutex_unlock(&ftrace_lock); } -static void ftrace_pid_reset(struct trace_array *tr) +static void ftrace_pid_reset(struct trace_array *tr, int type) { mutex_lock(&ftrace_lock); - clear_ftrace_pids(tr); + clear_ftrace_pids(tr, type); ftrace_update_pid_func(); ftrace_startup_all(0); @@ -6588,21 +7298,71 @@ .show = fpid_show, }; -static int -ftrace_pid_open(struct inode *inode, struct file *file) +static void *fnpid_start(struct seq_file *m, loff_t *pos) + __acquires(RCU) { + struct trace_pid_list *pid_list; + struct trace_array *tr = m->private; + + mutex_lock(&ftrace_lock); + rcu_read_lock_sched(); + + pid_list = rcu_dereference_sched(tr->function_no_pids); + + if (!pid_list) + return !(*pos) ? FTRACE_NO_PIDS : NULL; + + return trace_pid_start(pid_list, pos); +} + +static void *fnpid_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct trace_array *tr = m->private; + struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_no_pids); + + if (v == FTRACE_NO_PIDS) { + (*pos)++; + return NULL; + } + return trace_pid_next(pid_list, v, pos); +} + +static const struct seq_operations ftrace_no_pid_sops = { + .start = fnpid_start, + .next = fnpid_next, + .stop = fpid_stop, + .show = fpid_show, +}; + +static int pid_open(struct inode *inode, struct file *file, int type) +{ + const struct seq_operations *seq_ops; struct trace_array *tr = inode->i_private; struct seq_file *m; int ret = 0; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - ftrace_pid_reset(tr); + ftrace_pid_reset(tr, type); - ret = seq_open(file, &ftrace_pid_sops); + switch (type) { + case TRACE_PIDS: + seq_ops = &ftrace_pid_sops; + break; + case TRACE_NO_PIDS: + seq_ops = &ftrace_no_pid_sops; + break; + default: + trace_array_put(tr); + WARN_ON_ONCE(1); + return -EINVAL; + } + + ret = seq_open(file, seq_ops); if (ret < 0) { trace_array_put(tr); } else { @@ -6614,10 +7374,23 @@ return ret; } +static int +ftrace_pid_open(struct inode *inode, struct file *file) +{ + return pid_open(inode, file, TRACE_PIDS); +} + +static int +ftrace_no_pid_open(struct inode *inode, struct file *file) +{ + return pid_open(inode, file, TRACE_NO_PIDS); +} + static void ignore_task_cpu(void *data) { struct trace_array *tr = data; struct trace_pid_list *pid_list; + struct trace_pid_list *no_pid_list; /* * This function is called by on_each_cpu() while the @@ -6625,18 +7398,25 @@ */ pid_list = rcu_dereference_protected(tr->function_pids, mutex_is_locked(&ftrace_lock)); + no_pid_list = rcu_dereference_protected(tr->function_no_pids, + mutex_is_locked(&ftrace_lock)); - this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid, - trace_ignore_this_task(pid_list, current)); + if (trace_ignore_this_task(pid_list, no_pid_list, current)) + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + FTRACE_PID_IGNORE); + else + this_cpu_write(tr->array_buffer.data->ftrace_ignore_pid, + current->pid); } static ssize_t -ftrace_pid_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, int type) { struct seq_file *m = filp->private_data; struct trace_array *tr = m->private; - struct trace_pid_list *filtered_pids = NULL; + struct trace_pid_list *filtered_pids; + struct trace_pid_list *other_pids; struct trace_pid_list *pid_list; ssize_t ret; @@ -6645,19 +7425,43 @@ mutex_lock(&ftrace_lock); - filtered_pids = rcu_dereference_protected(tr->function_pids, + switch (type) { + case TRACE_PIDS: + filtered_pids = rcu_dereference_protected(tr->function_pids, lockdep_is_held(&ftrace_lock)); + other_pids = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + break; + case TRACE_NO_PIDS: + filtered_pids = rcu_dereference_protected(tr->function_no_pids, + lockdep_is_held(&ftrace_lock)); + other_pids = rcu_dereference_protected(tr->function_pids, + lockdep_is_held(&ftrace_lock)); + break; + default: + ret = -EINVAL; + WARN_ON_ONCE(1); + goto out; + } ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); if (ret < 0) goto out; - rcu_assign_pointer(tr->function_pids, pid_list); + switch (type) { + case TRACE_PIDS: + rcu_assign_pointer(tr->function_pids, pid_list); + break; + case TRACE_NO_PIDS: + rcu_assign_pointer(tr->function_no_pids, pid_list); + break; + } + if (filtered_pids) { - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(filtered_pids); - } else if (pid_list) { + } else if (pid_list && !other_pids) { /* Register a probe to set whether to ignore the tracing of a task */ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); } @@ -6680,6 +7484,20 @@ return ret; } +static ssize_t +ftrace_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); +} + +static ssize_t +ftrace_no_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); +} + static int ftrace_pid_release(struct inode *inode, struct file *file) { @@ -6698,10 +7516,20 @@ .release = ftrace_pid_release, }; +static const struct file_operations ftrace_no_pid_fops = { + .open = ftrace_no_pid_open, + .write = ftrace_no_pid_write, + .read = seq_read, + .llseek = tracing_lseek, + .release = ftrace_pid_release, +}; + void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer) { trace_create_file("set_ftrace_pid", 0644, d_tracer, tr, &ftrace_pid_fops); + trace_create_file("set_ftrace_notrace_pid", 0644, d_tracer, + tr, &ftrace_no_pid_fops); } void __init ftrace_init_tracefs_toplevel(struct trace_array *tr, @@ -6781,10 +7609,21 @@ } EXPORT_SYMBOL_GPL(unregister_ftrace_function); +static bool is_permanent_ops_registered(void) +{ + struct ftrace_ops *op; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PERMANENT) + return true; + } while_for_each_ftrace_op(op); + + return false; +} + int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = -ENODEV; @@ -6798,8 +7637,6 @@ if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; - last_ftrace_enabled = !!ftrace_enabled; - if (ftrace_enabled) { /* we are starting ftrace again */ @@ -6810,364 +7647,20 @@ ftrace_startup_sysctl(); } else { + if (is_permanent_ops_registered()) { + ftrace_enabled = true; + ret = -EBUSY; + goto out; + } + /* stopping ftrace calls (just send to ftrace_stub) */ ftrace_trace_function = ftrace_stub; ftrace_shutdown_sysctl(); } + last_ftrace_enabled = !!ftrace_enabled; out: mutex_unlock(&ftrace_lock); return ret; } - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -static struct ftrace_ops graph_ops = { - .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED | - FTRACE_OPS_FL_PID | - FTRACE_OPS_FL_STUB, -#ifdef FTRACE_GRAPH_TRAMP_ADDR - .trampoline = FTRACE_GRAPH_TRAMP_ADDR, - /* trampoline_size is only needed for dynamically allocated tramps */ -#endif - ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) -}; - -void ftrace_graph_sleep_time_control(bool enable) -{ - fgraph_sleep_time = enable; -} - -void ftrace_graph_graph_time_control(bool enable) -{ - fgraph_graph_time = enable; -} - -void ftrace_graph_return_stub(struct ftrace_graph_ret *trace) -{ -} - -int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) -{ - return 0; -} - -/* The callbacks that hook a function */ -trace_func_graph_ret_t ftrace_graph_return = ftrace_graph_return_stub; -trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; -static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; - -/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ -static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) -{ - int i; - int ret = 0; - int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; - struct task_struct *g, *t; - - for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); - if (!ret_stack_list[i]) { - start = 0; - end = i; - ret = -ENOMEM; - goto free; - } - } - - read_lock(&tasklist_lock); - do_each_thread(g, t) { - if (start == end) { - ret = -EAGAIN; - goto unlock; - } - - if (t->ret_stack == NULL) { - atomic_set(&t->trace_overrun, 0); - t->curr_ret_stack = -1; - t->curr_ret_depth = -1; - /* Make sure the tasks see the -1 first: */ - smp_wmb(); - t->ret_stack = ret_stack_list[start++]; - } - } while_each_thread(g, t); - -unlock: - read_unlock(&tasklist_lock); -free: - for (i = start; i < end; i++) - kfree(ret_stack_list[i]); - return ret; -} - -static void -ftrace_graph_probe_sched_switch(void *ignore, bool preempt, - struct task_struct *prev, struct task_struct *next) -{ - unsigned long long timestamp; - int index; - - /* - * Does the user want to count the time a function was asleep. - * If so, do not update the time stamps. - */ - if (fgraph_sleep_time) - return; - - timestamp = trace_clock_local(); - - prev->ftrace_timestamp = timestamp; - - /* only process tasks that we timestamped */ - if (!next->ftrace_timestamp) - return; - - /* - * Update all the counters in next to make up for the - * time next was sleeping. - */ - timestamp -= next->ftrace_timestamp; - - for (index = next->curr_ret_stack; index >= 0; index--) - next->ret_stack[index].calltime += timestamp; -} - -/* Allocate a return stack for each task */ -static int start_graph_tracing(void) -{ - struct ftrace_ret_stack **ret_stack_list; - int ret, cpu; - - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE, - sizeof(struct ftrace_ret_stack *), - GFP_KERNEL); - - if (!ret_stack_list) - return -ENOMEM; - - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - - do { - ret = alloc_retstack_tasklist(ret_stack_list); - } while (ret == -EAGAIN); - - if (!ret) { - ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); - if (ret) - pr_info("ftrace_graph: Couldn't activate tracepoint" - " probe to kernel_sched_switch\n"); - } - - kfree(ret_stack_list); - return ret; -} - -/* - * Hibernation protection. - * The state of the current task is too much unstable during - * suspend/restore to disk. We want to protect against that. - */ -static int -ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, - void *unused) -{ - switch (state) { - case PM_HIBERNATION_PREPARE: - pause_graph_tracing(); - break; - - case PM_POST_HIBERNATION: - unpause_graph_tracing(); - break; - } - return NOTIFY_DONE; -} - -static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) -{ - if (!ftrace_ops_test(&global_ops, trace->func, NULL)) - return 0; - return __ftrace_graph_entry(trace); -} - -/* - * The function graph tracer should only trace the functions defined - * by set_ftrace_filter and set_ftrace_notrace. If another function - * tracer ops is registered, the graph tracer requires testing the - * function against the global ops, and not just trace any function - * that any ftrace_ops registered. - */ -static void update_function_graph_func(void) -{ - struct ftrace_ops *op; - bool do_test = false; - - /* - * The graph and global ops share the same set of functions - * to test. If any other ops is on the list, then - * the graph tracing needs to test if its the function - * it should call. - */ - do_for_each_ftrace_op(op, ftrace_ops_list) { - if (op != &global_ops && op != &graph_ops && - op != &ftrace_list_end) { - do_test = true; - /* in double loop, break out with goto */ - goto out; - } - } while_for_each_ftrace_op(op); - out: - if (do_test) - ftrace_graph_entry = ftrace_graph_entry_test; - else - ftrace_graph_entry = __ftrace_graph_entry; -} - -static struct notifier_block ftrace_suspend_notifier = { - .notifier_call = ftrace_suspend_notifier_call, -}; - -int register_ftrace_graph(trace_func_graph_ret_t retfunc, - trace_func_graph_ent_t entryfunc) -{ - int ret = 0; - - mutex_lock(&ftrace_lock); - - /* we currently allow only one tracer registered at a time */ - if (ftrace_graph_active) { - ret = -EBUSY; - goto out; - } - - register_pm_notifier(&ftrace_suspend_notifier); - - ftrace_graph_active++; - ret = start_graph_tracing(); - if (ret) { - ftrace_graph_active--; - goto out; - } - - ftrace_graph_return = retfunc; - - /* - * Update the indirect function to the entryfunc, and the - * function that gets called to the entry_test first. Then - * call the update fgraph entry function to determine if - * the entryfunc should be called directly or not. - */ - __ftrace_graph_entry = entryfunc; - ftrace_graph_entry = ftrace_graph_entry_test; - update_function_graph_func(); - - ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); -out: - mutex_unlock(&ftrace_lock); - return ret; -} - -void unregister_ftrace_graph(void) -{ - mutex_lock(&ftrace_lock); - - if (unlikely(!ftrace_graph_active)) - goto out; - - ftrace_graph_active--; - ftrace_graph_return = ftrace_graph_return_stub; - ftrace_graph_entry = ftrace_graph_entry_stub; - __ftrace_graph_entry = ftrace_graph_entry_stub; - ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET); - unregister_pm_notifier(&ftrace_suspend_notifier); - unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); - - out: - mutex_unlock(&ftrace_lock); -} - -static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); - -static void -graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) -{ - atomic_set(&t->trace_overrun, 0); - t->ftrace_timestamp = 0; - /* make curr_ret_stack visible before we add the ret_stack */ - smp_wmb(); - t->ret_stack = ret_stack; -} - -/* - * Allocate a return stack for the idle task. May be the first - * time through, or it may be done by CPU hotplug online. - */ -void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) -{ - t->curr_ret_stack = -1; - t->curr_ret_depth = -1; - /* - * The idle task has no parent, it either has its own - * stack or no stack at all. - */ - if (t->ret_stack) - WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); - - if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; - - ret_stack = per_cpu(idle_ret_stack, cpu); - if (!ret_stack) { - ret_stack = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); - if (!ret_stack) - return; - per_cpu(idle_ret_stack, cpu) = ret_stack; - } - graph_init_task(t, ret_stack); - } -} - -/* Allocate a return stack for newly created task */ -void ftrace_graph_init_task(struct task_struct *t) -{ - /* Make sure we do not use the parent ret_stack */ - t->ret_stack = NULL; - t->curr_ret_stack = -1; - t->curr_ret_depth = -1; - - if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; - - ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); - if (!ret_stack) - return; - graph_init_task(t, ret_stack); - } -} - -void ftrace_graph_exit_task(struct task_struct *t) -{ - struct ftrace_ret_stack *ret_stack = t->ret_stack; - - t->ret_stack = NULL; - /* NULL must become visible to IRQs before we free it: */ - barrier(); - - kfree(ret_stack); -} -#endif -- Gitblit v1.6.2