hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/trace/trace_events.c
....@@ -12,6 +12,7 @@
1212 #define pr_fmt(fmt) fmt
1313
1414 #include <linux/workqueue.h>
15
+#include <linux/security.h>
1516 #include <linux/spinlock.h>
1617 #include <linux/kthread.h>
1718 #include <linux/tracefs.h>
....@@ -23,6 +24,7 @@
2324 #include <linux/delay.h>
2425
2526 #include <trace/events/sched.h>
27
+#include <trace/syscall.h>
2628
2729 #include <asm/setup.h>
2830
....@@ -36,6 +38,7 @@
3638 LIST_HEAD(ftrace_events);
3739 static LIST_HEAD(ftrace_generic_fields);
3840 static LIST_HEAD(ftrace_common_fields);
41
+static bool eventdir_initialized;
3942
4043 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
4144
....@@ -69,14 +72,6 @@
6972
7073 #define while_for_each_event_file() \
7174 }
72
-
73
-static struct list_head *
74
-trace_get_fields(struct trace_event_call *event_call)
75
-{
76
- if (!event_call->class->get_fields)
77
- return &event_call->class->fields;
78
- return event_call->class->get_fields(event_call);
79
-}
8075
8176 static struct ftrace_event_field *
8277 __find_event_field(struct list_head *head, char *name)
....@@ -173,6 +168,7 @@
173168
174169 __generic_field(int, CPU, FILTER_CPU);
175170 __generic_field(int, cpu, FILTER_CPU);
171
+ __generic_field(int, common_cpu, FILTER_CPU);
176172 __generic_field(char *, COMM, FILTER_COMM);
177173 __generic_field(char *, comm, FILTER_COMM);
178174
....@@ -188,6 +184,8 @@
188184 __common_field(unsigned char, flags);
189185 __common_field(unsigned char, preempt_count);
190186 __common_field(int, pid);
187
+ __common_field(unsigned char, migrate_disable);
188
+ __common_field(unsigned char, preempt_lazy_count);
191189
192190 return ret;
193191 }
....@@ -238,13 +236,16 @@
238236 {
239237 struct trace_array *tr = trace_file->tr;
240238 struct trace_array_cpu *data;
239
+ struct trace_pid_list *no_pid_list;
241240 struct trace_pid_list *pid_list;
242241
243242 pid_list = rcu_dereference_raw(tr->filtered_pids);
244
- if (!pid_list)
243
+ no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
244
+
245
+ if (!pid_list && !no_pid_list)
245246 return false;
246247
247
- data = this_cpu_ptr(tr->trace_buffer.data);
248
+ data = this_cpu_ptr(tr->array_buffer.data);
248249
249250 return data->ignore_pid;
250251 }
....@@ -260,25 +261,23 @@
260261 trace_event_ignore_this_pid(trace_file))
261262 return NULL;
262263
263
- local_save_flags(fbuffer->flags);
264
- fbuffer->pc = preempt_count();
265264 /*
266
- * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
265
+ * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
267266 * preemption (adding one to the preempt_count). Since we are
268267 * interested in the preempt_count at the time the tracepoint was
269268 * hit, we need to subtract one to offset the increment.
270269 */
271
- if (IS_ENABLED(CONFIG_PREEMPT))
272
- fbuffer->pc--;
270
+ fbuffer->trace_ctx = tracing_gen_ctx_dec();
273271 fbuffer->trace_file = trace_file;
274272
275273 fbuffer->event =
276274 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
277275 event_call->event.type, len,
278
- fbuffer->flags, fbuffer->pc);
276
+ fbuffer->trace_ctx);
279277 if (!fbuffer->event)
280278 return NULL;
281279
280
+ fbuffer->regs = NULL;
282281 fbuffer->entry = ring_buffer_event_data(fbuffer->event);
283282 return fbuffer->entry;
284283 }
....@@ -515,6 +514,9 @@
515514
516515 pid_list = rcu_dereference_raw(tr->filtered_pids);
517516 trace_filter_add_remove_task(pid_list, NULL, task);
517
+
518
+ pid_list = rcu_dereference_raw(tr->filtered_no_pids);
519
+ trace_filter_add_remove_task(pid_list, NULL, task);
518520 }
519521
520522 static void
....@@ -526,6 +528,9 @@
526528 struct trace_array *tr = data;
527529
528530 pid_list = rcu_dereference_sched(tr->filtered_pids);
531
+ trace_filter_add_remove_task(pid_list, self, task);
532
+
533
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
529534 trace_filter_add_remove_task(pid_list, self, task);
530535 }
531536
....@@ -549,13 +554,23 @@
549554 struct task_struct *prev, struct task_struct *next)
550555 {
551556 struct trace_array *tr = data;
557
+ struct trace_pid_list *no_pid_list;
552558 struct trace_pid_list *pid_list;
559
+ bool ret;
553560
554561 pid_list = rcu_dereference_sched(tr->filtered_pids);
562
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
555563
556
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
557
- trace_ignore_this_task(pid_list, prev) &&
558
- trace_ignore_this_task(pid_list, next));
564
+ /*
565
+ * Sched switch is funny, as we only want to ignore it
566
+ * in the notrace case if both prev and next should be ignored.
567
+ */
568
+ ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
569
+ trace_ignore_this_task(NULL, no_pid_list, next);
570
+
571
+ this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
572
+ (trace_ignore_this_task(pid_list, NULL, prev) &&
573
+ trace_ignore_this_task(pid_list, NULL, next)));
559574 }
560575
561576 static void
....@@ -563,58 +578,55 @@
563578 struct task_struct *prev, struct task_struct *next)
564579 {
565580 struct trace_array *tr = data;
581
+ struct trace_pid_list *no_pid_list;
566582 struct trace_pid_list *pid_list;
567583
568584 pid_list = rcu_dereference_sched(tr->filtered_pids);
585
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
569586
570
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
571
- trace_ignore_this_task(pid_list, next));
587
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
588
+ trace_ignore_this_task(pid_list, no_pid_list, next));
572589 }
573590
574591 static void
575592 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
576593 {
577594 struct trace_array *tr = data;
595
+ struct trace_pid_list *no_pid_list;
578596 struct trace_pid_list *pid_list;
579597
580598 /* Nothing to do if we are already tracing */
581
- if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
599
+ if (!this_cpu_read(tr->array_buffer.data->ignore_pid))
582600 return;
583601
584602 pid_list = rcu_dereference_sched(tr->filtered_pids);
603
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
585604
586
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
587
- trace_ignore_this_task(pid_list, task));
605
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
606
+ trace_ignore_this_task(pid_list, no_pid_list, task));
588607 }
589608
590609 static void
591610 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
592611 {
593612 struct trace_array *tr = data;
613
+ struct trace_pid_list *no_pid_list;
594614 struct trace_pid_list *pid_list;
595615
596616 /* Nothing to do if we are not tracing */
597
- if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
617
+ if (this_cpu_read(tr->array_buffer.data->ignore_pid))
598618 return;
599619
600620 pid_list = rcu_dereference_sched(tr->filtered_pids);
621
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
601622
602623 /* Set tracing if current is enabled */
603
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
604
- trace_ignore_this_task(pid_list, current));
624
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
625
+ trace_ignore_this_task(pid_list, no_pid_list, current));
605626 }
606627
607
-static void __ftrace_clear_event_pids(struct trace_array *tr)
628
+static void unregister_pid_events(struct trace_array *tr)
608629 {
609
- struct trace_pid_list *pid_list;
610
- struct trace_event_file *file;
611
- int cpu;
612
-
613
- pid_list = rcu_dereference_protected(tr->filtered_pids,
614
- lockdep_is_held(&event_mutex));
615
- if (!pid_list)
616
- return;
617
-
618630 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
619631 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
620632
....@@ -626,26 +638,55 @@
626638
627639 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
628640 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
641
+}
629642
630
- list_for_each_entry(file, &tr->events, list) {
631
- clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
643
+static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
644
+{
645
+ struct trace_pid_list *pid_list;
646
+ struct trace_pid_list *no_pid_list;
647
+ struct trace_event_file *file;
648
+ int cpu;
649
+
650
+ pid_list = rcu_dereference_protected(tr->filtered_pids,
651
+ lockdep_is_held(&event_mutex));
652
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
653
+ lockdep_is_held(&event_mutex));
654
+
655
+ /* Make sure there's something to do */
656
+ if (!pid_type_enabled(type, pid_list, no_pid_list))
657
+ return;
658
+
659
+ if (!still_need_pid_events(type, pid_list, no_pid_list)) {
660
+ unregister_pid_events(tr);
661
+
662
+ list_for_each_entry(file, &tr->events, list) {
663
+ clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
664
+ }
665
+
666
+ for_each_possible_cpu(cpu)
667
+ per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
632668 }
633669
634
- for_each_possible_cpu(cpu)
635
- per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
670
+ if (type & TRACE_PIDS)
671
+ rcu_assign_pointer(tr->filtered_pids, NULL);
636672
637
- rcu_assign_pointer(tr->filtered_pids, NULL);
673
+ if (type & TRACE_NO_PIDS)
674
+ rcu_assign_pointer(tr->filtered_no_pids, NULL);
638675
639676 /* Wait till all users are no longer using pid filtering */
640677 tracepoint_synchronize_unregister();
641678
642
- trace_free_pid_list(pid_list);
679
+ if ((type & TRACE_PIDS) && pid_list)
680
+ trace_free_pid_list(pid_list);
681
+
682
+ if ((type & TRACE_NO_PIDS) && no_pid_list)
683
+ trace_free_pid_list(no_pid_list);
643684 }
644685
645
-static void ftrace_clear_event_pids(struct trace_array *tr)
686
+static void ftrace_clear_event_pids(struct trace_array *tr, int type)
646687 {
647688 mutex_lock(&event_mutex);
648
- __ftrace_clear_event_pids(tr);
689
+ __ftrace_clear_event_pids(tr, type);
649690 mutex_unlock(&event_mutex);
650691 }
651692
....@@ -704,7 +745,7 @@
704745 return;
705746
706747 if (!--dir->nr_events) {
707
- tracefs_remove_recursive(dir->entry);
748
+ tracefs_remove(dir->entry);
708749 list_del(&dir->list);
709750 __put_system_dir(dir);
710751 }
....@@ -723,7 +764,7 @@
723764 }
724765 spin_unlock(&dir->d_lock);
725766
726
- tracefs_remove_recursive(dir);
767
+ tracefs_remove(dir);
727768 }
728769
729770 list_del(&file->list);
....@@ -795,7 +836,7 @@
795836 return ret;
796837 }
797838
798
-static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
839
+int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
799840 {
800841 char *event = NULL, *sub = NULL, *match;
801842 int ret;
....@@ -857,6 +898,32 @@
857898 return __ftrace_set_clr_event(tr, NULL, system, event, set);
858899 }
859900 EXPORT_SYMBOL_GPL(trace_set_clr_event);
901
+
902
+/**
903
+ * trace_array_set_clr_event - enable or disable an event for a trace array.
904
+ * @tr: concerned trace array.
905
+ * @system: system name to match (NULL for any system)
906
+ * @event: event name to match (NULL for all events, within system)
907
+ * @enable: true to enable, false to disable
908
+ *
909
+ * This is a way for other parts of the kernel to enable or disable
910
+ * event recording.
911
+ *
912
+ * Returns 0 on success, -EINVAL if the parameters do not match any
913
+ * registered events.
914
+ */
915
+int trace_array_set_clr_event(struct trace_array *tr, const char *system,
916
+ const char *event, bool enable)
917
+{
918
+ int set;
919
+
920
+ if (!tr)
921
+ return -ENOENT;
922
+
923
+ set = (enable == true) ? 1 : 0;
924
+ return __ftrace_set_clr_event(tr, NULL, system, event, set);
925
+}
926
+EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
860927
861928 /* 128 should be much more than enough */
862929 #define EVENT_BUF_SIZE 127
....@@ -992,15 +1059,32 @@
9921059 }
9931060
9941061 static void *
995
-p_next(struct seq_file *m, void *v, loff_t *pos)
1062
+__next(struct seq_file *m, void *v, loff_t *pos, int type)
9961063 {
9971064 struct trace_array *tr = m->private;
998
- struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
1065
+ struct trace_pid_list *pid_list;
1066
+
1067
+ if (type == TRACE_PIDS)
1068
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
1069
+ else
1070
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
9991071
10001072 return trace_pid_next(pid_list, v, pos);
10011073 }
10021074
1003
-static void *p_start(struct seq_file *m, loff_t *pos)
1075
+static void *
1076
+p_next(struct seq_file *m, void *v, loff_t *pos)
1077
+{
1078
+ return __next(m, v, pos, TRACE_PIDS);
1079
+}
1080
+
1081
+static void *
1082
+np_next(struct seq_file *m, void *v, loff_t *pos)
1083
+{
1084
+ return __next(m, v, pos, TRACE_NO_PIDS);
1085
+}
1086
+
1087
+static void *__start(struct seq_file *m, loff_t *pos, int type)
10041088 __acquires(RCU)
10051089 {
10061090 struct trace_pid_list *pid_list;
....@@ -1015,12 +1099,27 @@
10151099 mutex_lock(&event_mutex);
10161100 rcu_read_lock_sched();
10171101
1018
- pid_list = rcu_dereference_sched(tr->filtered_pids);
1102
+ if (type == TRACE_PIDS)
1103
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
1104
+ else
1105
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
10191106
10201107 if (!pid_list)
10211108 return NULL;
10221109
10231110 return trace_pid_start(pid_list, pos);
1111
+}
1112
+
1113
+static void *p_start(struct seq_file *m, loff_t *pos)
1114
+ __acquires(RCU)
1115
+{
1116
+ return __start(m, pos, TRACE_PIDS);
1117
+}
1118
+
1119
+static void *np_start(struct seq_file *m, loff_t *pos)
1120
+ __acquires(RCU)
1121
+{
1122
+ return __start(m, pos, TRACE_NO_PIDS);
10241123 }
10251124
10261125 static void p_stop(struct seq_file *m, void *p)
....@@ -1254,7 +1353,7 @@
12541353 */
12551354 array_descriptor = strchr(field->type, '[');
12561355
1257
- if (!strncmp(field->type, "__data_loc", 10))
1356
+ if (str_has_prefix(field->type, "__data_loc"))
12581357 array_descriptor = NULL;
12591358
12601359 if (!array_descriptor)
....@@ -1303,6 +1402,8 @@
13031402 {
13041403 struct seq_file *m;
13051404 int ret;
1405
+
1406
+ /* Do we want to hide event format files on tracefs lockdown? */
13061407
13071408 ret = seq_open(file, &trace_format_seq_ops);
13081409 if (ret < 0)
....@@ -1450,28 +1551,17 @@
14501551 struct trace_array *tr = inode->i_private;
14511552 int ret;
14521553
1453
- if (tracing_is_disabled())
1454
- return -ENODEV;
1455
-
1456
- if (trace_array_get(tr) < 0)
1457
- return -ENODEV;
1458
-
14591554 /* Make a temporary dir that has no system but points to tr */
14601555 dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1461
- if (!dir) {
1462
- trace_array_put(tr);
1556
+ if (!dir)
14631557 return -ENOMEM;
1464
- }
14651558
1466
- dir->tr = tr;
1467
-
1468
- ret = tracing_open_generic(inode, filp);
1559
+ ret = tracing_open_generic_tr(inode, filp);
14691560 if (ret < 0) {
1470
- trace_array_put(tr);
14711561 kfree(dir);
14721562 return ret;
14731563 }
1474
-
1564
+ dir->tr = tr;
14751565 filp->private_data = dir;
14761566
14771567 return 0;
....@@ -1577,6 +1667,7 @@
15771667 {
15781668 struct trace_array *tr = data;
15791669 struct trace_pid_list *pid_list;
1670
+ struct trace_pid_list *no_pid_list;
15801671
15811672 /*
15821673 * This function is called by on_each_cpu() while the
....@@ -1584,18 +1675,50 @@
15841675 */
15851676 pid_list = rcu_dereference_protected(tr->filtered_pids,
15861677 mutex_is_locked(&event_mutex));
1678
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
1679
+ mutex_is_locked(&event_mutex));
15871680
1588
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
1589
- trace_ignore_this_task(pid_list, current));
1681
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
1682
+ trace_ignore_this_task(pid_list, no_pid_list, current));
1683
+}
1684
+
1685
+static void register_pid_events(struct trace_array *tr)
1686
+{
1687
+ /*
1688
+ * Register a probe that is called before all other probes
1689
+ * to set ignore_pid if next or prev do not match.
1690
+ * Register a probe this is called after all other probes
1691
+ * to only keep ignore_pid set if next pid matches.
1692
+ */
1693
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1694
+ tr, INT_MAX);
1695
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1696
+ tr, 0);
1697
+
1698
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1699
+ tr, INT_MAX);
1700
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1701
+ tr, 0);
1702
+
1703
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1704
+ tr, INT_MAX);
1705
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1706
+ tr, 0);
1707
+
1708
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1709
+ tr, INT_MAX);
1710
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1711
+ tr, 0);
15901712 }
15911713
15921714 static ssize_t
1593
-ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1594
- size_t cnt, loff_t *ppos)
1715
+event_pid_write(struct file *filp, const char __user *ubuf,
1716
+ size_t cnt, loff_t *ppos, int type)
15951717 {
15961718 struct seq_file *m = filp->private_data;
15971719 struct trace_array *tr = m->private;
15981720 struct trace_pid_list *filtered_pids = NULL;
1721
+ struct trace_pid_list *other_pids = NULL;
15991722 struct trace_pid_list *pid_list;
16001723 struct trace_event_file *file;
16011724 ssize_t ret;
....@@ -1609,14 +1732,26 @@
16091732
16101733 mutex_lock(&event_mutex);
16111734
1612
- filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1613
- lockdep_is_held(&event_mutex));
1735
+ if (type == TRACE_PIDS) {
1736
+ filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1737
+ lockdep_is_held(&event_mutex));
1738
+ other_pids = rcu_dereference_protected(tr->filtered_no_pids,
1739
+ lockdep_is_held(&event_mutex));
1740
+ } else {
1741
+ filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
1742
+ lockdep_is_held(&event_mutex));
1743
+ other_pids = rcu_dereference_protected(tr->filtered_pids,
1744
+ lockdep_is_held(&event_mutex));
1745
+ }
16141746
16151747 ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
16161748 if (ret < 0)
16171749 goto out;
16181750
1619
- rcu_assign_pointer(tr->filtered_pids, pid_list);
1751
+ if (type == TRACE_PIDS)
1752
+ rcu_assign_pointer(tr->filtered_pids, pid_list);
1753
+ else
1754
+ rcu_assign_pointer(tr->filtered_no_pids, pid_list);
16201755
16211756 list_for_each_entry(file, &tr->events, list) {
16221757 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
....@@ -1625,32 +1760,8 @@
16251760 if (filtered_pids) {
16261761 tracepoint_synchronize_unregister();
16271762 trace_free_pid_list(filtered_pids);
1628
- } else if (pid_list) {
1629
- /*
1630
- * Register a probe that is called before all other probes
1631
- * to set ignore_pid if next or prev do not match.
1632
- * Register a probe this is called after all other probes
1633
- * to only keep ignore_pid set if next pid matches.
1634
- */
1635
- register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1636
- tr, INT_MAX);
1637
- register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1638
- tr, 0);
1639
-
1640
- register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1641
- tr, INT_MAX);
1642
- register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1643
- tr, 0);
1644
-
1645
- register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1646
- tr, INT_MAX);
1647
- register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1648
- tr, 0);
1649
-
1650
- register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1651
- tr, INT_MAX);
1652
- register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1653
- tr, 0);
1763
+ } else if (pid_list && !other_pids) {
1764
+ register_pid_events(tr);
16541765 }
16551766
16561767 /*
....@@ -1669,9 +1780,24 @@
16691780 return ret;
16701781 }
16711782
1783
+static ssize_t
1784
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1785
+ size_t cnt, loff_t *ppos)
1786
+{
1787
+ return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
1788
+}
1789
+
1790
+static ssize_t
1791
+ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
1792
+ size_t cnt, loff_t *ppos)
1793
+{
1794
+ return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
1795
+}
1796
+
16721797 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
16731798 static int ftrace_event_set_open(struct inode *inode, struct file *file);
16741799 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1800
+static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
16751801 static int ftrace_event_release(struct inode *inode, struct file *file);
16761802
16771803 static const struct seq_operations show_event_seq_ops = {
....@@ -1695,6 +1821,13 @@
16951821 .stop = p_stop,
16961822 };
16971823
1824
+static const struct seq_operations show_set_no_pid_seq_ops = {
1825
+ .start = np_start,
1826
+ .next = np_next,
1827
+ .show = trace_pid_show,
1828
+ .stop = p_stop,
1829
+};
1830
+
16981831 static const struct file_operations ftrace_avail_fops = {
16991832 .open = ftrace_event_avail_open,
17001833 .read = seq_read,
....@@ -1714,6 +1847,14 @@
17141847 .open = ftrace_event_set_pid_open,
17151848 .read = seq_read,
17161849 .write = ftrace_event_pid_write,
1850
+ .llseek = seq_lseek,
1851
+ .release = ftrace_event_release,
1852
+};
1853
+
1854
+static const struct file_operations ftrace_set_event_notrace_pid_fops = {
1855
+ .open = ftrace_event_set_npid_open,
1856
+ .read = seq_read,
1857
+ .write = ftrace_event_npid_write,
17171858 .llseek = seq_lseek,
17181859 .release = ftrace_event_release,
17191860 };
....@@ -1781,6 +1922,10 @@
17811922 struct seq_file *m;
17821923 int ret;
17831924
1925
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
1926
+ if (ret)
1927
+ return ret;
1928
+
17841929 ret = seq_open(file, seq_ops);
17851930 if (ret < 0)
17861931 return ret;
....@@ -1805,6 +1950,7 @@
18051950 {
18061951 const struct seq_operations *seq_ops = &show_event_seq_ops;
18071952
1953
+ /* Checks for tracefs lockdown */
18081954 return ftrace_event_open(inode, file, seq_ops);
18091955 }
18101956
....@@ -1815,8 +1961,9 @@
18151961 struct trace_array *tr = inode->i_private;
18161962 int ret;
18171963
1818
- if (trace_array_get(tr) < 0)
1819
- return -ENODEV;
1964
+ ret = tracing_check_open_get_tr(tr);
1965
+ if (ret)
1966
+ return ret;
18201967
18211968 if ((file->f_mode & FMODE_WRITE) &&
18221969 (file->f_flags & O_TRUNC))
....@@ -1835,12 +1982,34 @@
18351982 struct trace_array *tr = inode->i_private;
18361983 int ret;
18371984
1838
- if (trace_array_get(tr) < 0)
1839
- return -ENODEV;
1985
+ ret = tracing_check_open_get_tr(tr);
1986
+ if (ret)
1987
+ return ret;
18401988
18411989 if ((file->f_mode & FMODE_WRITE) &&
18421990 (file->f_flags & O_TRUNC))
1843
- ftrace_clear_event_pids(tr);
1991
+ ftrace_clear_event_pids(tr, TRACE_PIDS);
1992
+
1993
+ ret = ftrace_event_open(inode, file, seq_ops);
1994
+ if (ret < 0)
1995
+ trace_array_put(tr);
1996
+ return ret;
1997
+}
1998
+
1999
+static int
2000
+ftrace_event_set_npid_open(struct inode *inode, struct file *file)
2001
+{
2002
+ const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
2003
+ struct trace_array *tr = inode->i_private;
2004
+ int ret;
2005
+
2006
+ ret = tracing_check_open_get_tr(tr);
2007
+ if (ret)
2008
+ return ret;
2009
+
2010
+ if ((file->f_mode & FMODE_WRITE) &&
2011
+ (file->f_flags & O_TRUNC))
2012
+ ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
18442013
18452014 ret = ftrace_event_open(inode, file, seq_ops);
18462015 if (ret < 0)
....@@ -1957,11 +2126,47 @@
19572126 }
19582127
19592128 static int
2129
+event_define_fields(struct trace_event_call *call)
2130
+{
2131
+ struct list_head *head;
2132
+ int ret = 0;
2133
+
2134
+ /*
2135
+ * Other events may have the same class. Only update
2136
+ * the fields if they are not already defined.
2137
+ */
2138
+ head = trace_get_fields(call);
2139
+ if (list_empty(head)) {
2140
+ struct trace_event_fields *field = call->class->fields_array;
2141
+ unsigned int offset = sizeof(struct trace_entry);
2142
+
2143
+ for (; field->type; field++) {
2144
+ if (field->type == TRACE_FUNCTION_TYPE) {
2145
+ field->define_fields(call);
2146
+ break;
2147
+ }
2148
+
2149
+ offset = ALIGN(offset, field->align);
2150
+ ret = trace_define_field(call, field->type, field->name,
2151
+ offset, field->size,
2152
+ field->is_signed, field->filter_type);
2153
+ if (WARN_ON_ONCE(ret)) {
2154
+ pr_err("error code is %d\n", ret);
2155
+ break;
2156
+ }
2157
+
2158
+ offset += field->size;
2159
+ }
2160
+ }
2161
+
2162
+ return ret;
2163
+}
2164
+
2165
+static int
19602166 event_create_dir(struct dentry *parent, struct trace_event_file *file)
19612167 {
19622168 struct trace_event_call *call = file->event_call;
19632169 struct trace_array *tr = file->tr;
1964
- struct list_head *head;
19652170 struct dentry *d_events;
19662171 const char *name;
19672172 int ret;
....@@ -1995,18 +2200,10 @@
19952200 &ftrace_event_id_fops);
19962201 #endif
19972202
1998
- /*
1999
- * Other events may have the same class. Only update
2000
- * the fields if they are not already defined.
2001
- */
2002
- head = trace_get_fields(call);
2003
- if (list_empty(head)) {
2004
- ret = call->class->define_fields(call);
2005
- if (ret < 0) {
2006
- pr_warn("Could not initialize trace point events/%s\n",
2007
- name);
2008
- return -1;
2009
- }
2203
+ ret = event_define_fields(call);
2204
+ if (ret < 0) {
2205
+ pr_warn("Could not initialize trace point events/%s\n", name);
2206
+ return ret;
20102207 }
20112208
20122209 /*
....@@ -2025,8 +2222,18 @@
20252222 trace_create_file("hist", 0444, file->dir, file,
20262223 &event_hist_fops);
20272224 #endif
2225
+#ifdef CONFIG_HIST_TRIGGERS_DEBUG
2226
+ trace_create_file("hist_debug", 0444, file->dir, file,
2227
+ &event_hist_debug_fops);
2228
+#endif
20282229 trace_create_file("format", 0444, file->dir, call,
20292230 &ftrace_event_format_fops);
2231
+
2232
+#ifdef CONFIG_TRACE_EVENT_INJECT
2233
+ if (call->event.type && call->class->reg)
2234
+ trace_create_file("inject", 0200, file->dir, file,
2235
+ &event_inject_fops);
2236
+#endif
20302237
20312238 return 0;
20322239 }
....@@ -2255,6 +2462,7 @@
22552462 trace_create_new_event(struct trace_event_call *call,
22562463 struct trace_array *tr)
22572464 {
2465
+ struct trace_pid_list *no_pid_list;
22582466 struct trace_pid_list *pid_list;
22592467 struct trace_event_file *file;
22602468
....@@ -2264,8 +2472,10 @@
22642472
22652473 pid_list = rcu_dereference_protected(tr->filtered_pids,
22662474 lockdep_is_held(&event_mutex));
2475
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
2476
+ lockdep_is_held(&event_mutex));
22672477
2268
- if (pid_list)
2478
+ if (pid_list || no_pid_list)
22692479 file->flags |= EVENT_FILE_FL_PID_FILTER;
22702480
22712481 file->event_call = call;
....@@ -2288,7 +2498,10 @@
22882498 if (!file)
22892499 return -ENOMEM;
22902500
2291
- return event_create_dir(tr->event_dir, file);
2501
+ if (eventdir_initialized)
2502
+ return event_create_dir(tr->event_dir, file);
2503
+ else
2504
+ return event_define_fields(call);
22922505 }
22932506
22942507 /*
....@@ -2296,7 +2509,7 @@
22962509 * for enabling events at boot. We want to enable events before
22972510 * the filesystem is initialized.
22982511 */
2299
-static __init int
2512
+static int
23002513 __trace_early_add_new_event(struct trace_event_call *call,
23012514 struct trace_array *tr)
23022515 {
....@@ -2306,13 +2519,14 @@
23062519 if (!file)
23072520 return -ENOMEM;
23082521
2309
- return 0;
2522
+ return event_define_fields(call);
23102523 }
23112524
23122525 struct ftrace_module_file_ops;
23132526 static void __add_event_to_tracers(struct trace_event_call *call);
23142527
2315
-int trace_add_event_call_nolock(struct trace_event_call *call)
2528
+/* Add an additional event_call dynamically */
2529
+int trace_add_event_call(struct trace_event_call *call)
23162530 {
23172531 int ret;
23182532 lockdep_assert_held(&event_mutex);
....@@ -2324,17 +2538,6 @@
23242538 __add_event_to_tracers(call);
23252539
23262540 mutex_unlock(&trace_types_lock);
2327
- return ret;
2328
-}
2329
-
2330
-/* Add an additional event_call dynamically */
2331
-int trace_add_event_call(struct trace_event_call *call)
2332
-{
2333
- int ret;
2334
-
2335
- mutex_lock(&event_mutex);
2336
- ret = trace_add_event_call_nolock(call);
2337
- mutex_unlock(&event_mutex);
23382541 return ret;
23392542 }
23402543
....@@ -2368,7 +2571,10 @@
23682571 * TRACE_REG_UNREGISTER.
23692572 */
23702573 if (file->flags & EVENT_FILE_FL_ENABLED)
2371
- return -EBUSY;
2574
+ goto busy;
2575
+
2576
+ if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2577
+ tr->clear_trace = true;
23722578 /*
23732579 * The do_for_each_event_file_safe() is
23742580 * a double loop. After finding the call for this
....@@ -2381,10 +2587,16 @@
23812587 __trace_remove_event_call(call);
23822588
23832589 return 0;
2590
+ busy:
2591
+ /* No need to clear the trace now */
2592
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2593
+ tr->clear_trace = false;
2594
+ }
2595
+ return -EBUSY;
23842596 }
23852597
2386
-/* no event_mutex version */
2387
-int trace_remove_event_call_nolock(struct trace_event_call *call)
2598
+/* Remove an event_call */
2599
+int trace_remove_event_call(struct trace_event_call *call)
23882600 {
23892601 int ret;
23902602
....@@ -2395,18 +2607,6 @@
23952607 ret = probe_remove_event_call(call);
23962608 up_write(&trace_event_sem);
23972609 mutex_unlock(&trace_types_lock);
2398
-
2399
- return ret;
2400
-}
2401
-
2402
-/* Remove an event_call */
2403
-int trace_remove_event_call(struct trace_event_call *call)
2404
-{
2405
- int ret;
2406
-
2407
- mutex_lock(&event_mutex);
2408
- ret = trace_remove_event_call_nolock(call);
2409
- mutex_unlock(&event_mutex);
24102610
24112611 return ret;
24122612 }
....@@ -2481,7 +2681,7 @@
24812681 mutex_unlock(&trace_types_lock);
24822682 mutex_unlock(&event_mutex);
24832683
2484
- return 0;
2684
+ return NOTIFY_OK;
24852685 }
24862686
24872687 static struct notifier_block trace_module_nb = {
....@@ -2541,6 +2741,91 @@
25412741
25422742 return file;
25432743 }
2744
+
2745
+/**
2746
+ * trace_get_event_file - Find and return a trace event file
2747
+ * @instance: The name of the trace instance containing the event
2748
+ * @system: The name of the system containing the event
2749
+ * @event: The name of the event
2750
+ *
2751
+ * Return a trace event file given the trace instance name, trace
2752
+ * system, and trace event name. If the instance name is NULL, it
2753
+ * refers to the top-level trace array.
2754
+ *
2755
+ * This function will look it up and return it if found, after calling
2756
+ * trace_array_get() to prevent the instance from going away, and
2757
+ * increment the event's module refcount to prevent it from being
2758
+ * removed.
2759
+ *
2760
+ * To release the file, call trace_put_event_file(), which will call
2761
+ * trace_array_put() and decrement the event's module refcount.
2762
+ *
2763
+ * Return: The trace event on success, ERR_PTR otherwise.
2764
+ */
2765
+struct trace_event_file *trace_get_event_file(const char *instance,
2766
+ const char *system,
2767
+ const char *event)
2768
+{
2769
+ struct trace_array *tr = top_trace_array();
2770
+ struct trace_event_file *file = NULL;
2771
+ int ret = -EINVAL;
2772
+
2773
+ if (instance) {
2774
+ tr = trace_array_find_get(instance);
2775
+ if (!tr)
2776
+ return ERR_PTR(-ENOENT);
2777
+ } else {
2778
+ ret = trace_array_get(tr);
2779
+ if (ret)
2780
+ return ERR_PTR(ret);
2781
+ }
2782
+
2783
+ mutex_lock(&event_mutex);
2784
+
2785
+ file = find_event_file(tr, system, event);
2786
+ if (!file) {
2787
+ trace_array_put(tr);
2788
+ ret = -EINVAL;
2789
+ goto out;
2790
+ }
2791
+
2792
+ /* Don't let event modules unload while in use */
2793
+ ret = try_module_get(file->event_call->mod);
2794
+ if (!ret) {
2795
+ trace_array_put(tr);
2796
+ ret = -EBUSY;
2797
+ goto out;
2798
+ }
2799
+
2800
+ ret = 0;
2801
+ out:
2802
+ mutex_unlock(&event_mutex);
2803
+
2804
+ if (ret)
2805
+ file = ERR_PTR(ret);
2806
+
2807
+ return file;
2808
+}
2809
+EXPORT_SYMBOL_GPL(trace_get_event_file);
2810
+
2811
+/**
2812
+ * trace_put_event_file - Release a file from trace_get_event_file()
2813
+ * @file: The trace event file
2814
+ *
2815
+ * If a file was retrieved using trace_get_event_file(), this should
2816
+ * be called when it's no longer needed. It will cancel the previous
2817
+ * trace_array_get() called by that function, and decrement the
2818
+ * event's module refcount.
2819
+ */
2820
+void trace_put_event_file(struct trace_event_file *file)
2821
+{
2822
+ mutex_lock(&event_mutex);
2823
+ module_put(file->event_call->mod);
2824
+ mutex_unlock(&event_mutex);
2825
+
2826
+ trace_array_put(file->tr);
2827
+}
2828
+EXPORT_SYMBOL_GPL(trace_put_event_file);
25442829
25452830 #ifdef CONFIG_DYNAMIC_FTRACE
25462831
....@@ -2866,14 +3151,13 @@
28663151 #endif /* CONFIG_DYNAMIC_FTRACE */
28673152
28683153 /*
2869
- * The top level array has already had its trace_event_file
2870
- * descriptors created in order to allow for early events to
2871
- * be recorded. This function is called after the tracefs has been
2872
- * initialized, and we now have to create the files associated
2873
- * to the events.
3154
+ * The top level array and trace arrays created by boot-time tracing
3155
+ * have already had its trace_event_file descriptors created in order
3156
+ * to allow for early events to be recorded.
3157
+ * This function is called after the tracefs has been initialized,
3158
+ * and we now have to create the files associated to the events.
28743159 */
2875
-static __init void
2876
-__trace_early_add_event_dirs(struct trace_array *tr)
3160
+static void __trace_early_add_event_dirs(struct trace_array *tr)
28773161 {
28783162 struct trace_event_file *file;
28793163 int ret;
....@@ -2888,13 +3172,12 @@
28883172 }
28893173
28903174 /*
2891
- * For early boot up, the top trace array requires to have
2892
- * a list of events that can be enabled. This must be done before
2893
- * the filesystem is set up in order to allow events to be traced
2894
- * early.
3175
+ * For early boot up, the top trace array and the trace arrays created
3176
+ * by boot-time tracing require to have a list of events that can be
3177
+ * enabled. This must be done before the filesystem is set up in order
3178
+ * to allow events to be traced early.
28953179 */
2896
-static __init void
2897
-__trace_early_add_events(struct trace_array *tr)
3180
+void __trace_early_add_events(struct trace_array *tr)
28983181 {
28993182 struct trace_event_call *call;
29003183 int ret;
....@@ -2938,7 +3221,7 @@
29383221 {
29393222 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
29403223 ring_buffer_expanded = true;
2941
- tracing_selftest_disabled = true;
3224
+ disable_tracing_selftest("running event tracing");
29423225
29433226 return 1;
29443227 }
....@@ -2977,6 +3260,11 @@
29773260 tr, &ftrace_set_event_pid_fops);
29783261 if (!entry)
29793262 pr_warn("Could not create tracefs 'set_event_pid' entry\n");
3263
+
3264
+ entry = tracefs_create_file("set_event_notrace_pid", 0644, parent,
3265
+ tr, &ftrace_set_event_notrace_pid_fops);
3266
+ if (!entry)
3267
+ pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
29803268
29813269 /* ring buffer internal formats */
29823270 entry = trace_create_file("header_page", 0444, d_events,
....@@ -3020,7 +3308,11 @@
30203308 goto out;
30213309
30223310 down_write(&trace_event_sem);
3023
- __trace_add_event_dirs(tr);
3311
+ /* If tr already has the event list, it is initialized in early boot. */
3312
+ if (unlikely(!list_empty(&tr->events)))
3313
+ __trace_early_add_event_dirs(tr);
3314
+ else
3315
+ __trace_add_event_dirs(tr);
30243316 up_write(&trace_event_sem);
30253317
30263318 out:
....@@ -3061,7 +3353,7 @@
30613353 clear_event_triggers(tr);
30623354
30633355 /* Clear the pid list */
3064
- __ftrace_clear_event_pids(tr);
3356
+ __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
30653357
30663358 /* Disable any running events */
30673359 __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
....@@ -3071,7 +3363,7 @@
30713363
30723364 down_write(&trace_event_sem);
30733365 __trace_remove_event_dirs(tr);
3074
- tracefs_remove_recursive(tr->event_dir);
3366
+ tracefs_remove(tr->event_dir);
30753367 up_write(&trace_event_sem);
30763368
30773369 tr->event_dir = NULL;
....@@ -3176,10 +3468,21 @@
31763468
31773469 early_initcall(event_trace_enable_again);
31783470
3471
+/* Init fields which doesn't related to the tracefs */
3472
+static __init int event_trace_init_fields(void)
3473
+{
3474
+ if (trace_define_generic_fields())
3475
+ pr_warn("tracing: Failed to allocated generic fields");
3476
+
3477
+ if (trace_define_common_fields())
3478
+ pr_warn("tracing: Failed to allocate common fields");
3479
+
3480
+ return 0;
3481
+}
3482
+
31793483 __init int event_trace_init(void)
31803484 {
31813485 struct trace_array *tr;
3182
- struct dentry *d_tracer;
31833486 struct dentry *entry;
31843487 int ret;
31853488
....@@ -3187,22 +3490,12 @@
31873490 if (!tr)
31883491 return -ENODEV;
31893492
3190
- d_tracer = tracing_init_dentry();
3191
- if (IS_ERR(d_tracer))
3192
- return 0;
3193
-
3194
- entry = tracefs_create_file("available_events", 0444, d_tracer,
3493
+ entry = tracefs_create_file("available_events", 0444, NULL,
31953494 tr, &ftrace_avail_fops);
31963495 if (!entry)
31973496 pr_warn("Could not create tracefs 'available_events' entry\n");
31983497
3199
- if (trace_define_generic_fields())
3200
- pr_warn("tracing: Failed to allocated generic fields");
3201
-
3202
- if (trace_define_common_fields())
3203
- pr_warn("tracing: Failed to allocate common fields");
3204
-
3205
- ret = early_event_add_tracer(d_tracer, tr);
3498
+ ret = early_event_add_tracer(NULL, tr);
32063499 if (ret)
32073500 return ret;
32083501
....@@ -3211,6 +3504,9 @@
32113504 if (ret)
32123505 pr_warn("Failed to register trace events module notifier\n");
32133506 #endif
3507
+
3508
+ eventdir_initialized = true;
3509
+
32143510 return 0;
32153511 }
32163512
....@@ -3219,9 +3515,10 @@
32193515 event_trace_memsetup();
32203516 init_ftrace_syscalls();
32213517 event_trace_enable();
3518
+ event_trace_init_fields();
32223519 }
32233520
3224
-#ifdef CONFIG_FTRACE_STARTUP_TEST
3521
+#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
32253522
32263523 static DEFINE_SPINLOCK(test_spinlock);
32273524 static DEFINE_SPINLOCK(test_spinlock_irq);
....@@ -3398,15 +3695,14 @@
33983695 function_test_events_call(unsigned long ip, unsigned long parent_ip,
33993696 struct ftrace_ops *op, struct pt_regs *pt_regs)
34003697 {
3698
+ struct trace_buffer *buffer;
34013699 struct ring_buffer_event *event;
3402
- struct ring_buffer *buffer;
34033700 struct ftrace_entry *entry;
3404
- unsigned long flags;
3701
+ unsigned int trace_ctx;
34053702 long disabled;
34063703 int cpu;
3407
- int pc;
34083704
3409
- pc = preempt_count();
3705
+ trace_ctx = tracing_gen_ctx();
34103706 preempt_disable_notrace();
34113707 cpu = raw_smp_processor_id();
34123708 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
....@@ -3414,11 +3710,9 @@
34143710 if (disabled != 1)
34153711 goto out;
34163712
3417
- local_save_flags(flags);
3418
-
34193713 event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
34203714 TRACE_FN, sizeof(*entry),
3421
- flags, pc);
3715
+ trace_ctx);
34223716 if (!event)
34233717 goto out;
34243718 entry = ring_buffer_event_data(event);
....@@ -3426,7 +3720,7 @@
34263720 entry->parent_ip = parent_ip;
34273721
34283722 event_trigger_unlock_commit(&event_trace_file, buffer, event,
3429
- entry, flags, pc);
3723
+ entry, trace_ctx);
34303724 out:
34313725 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
34323726 preempt_enable_notrace();