hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/kernel/trace/trace_events.c
....@@ -12,6 +12,7 @@
1212 #define pr_fmt(fmt) fmt
1313
1414 #include <linux/workqueue.h>
15
+#include <linux/security.h>
1516 #include <linux/spinlock.h>
1617 #include <linux/kthread.h>
1718 #include <linux/tracefs.h>
....@@ -23,6 +24,7 @@
2324 #include <linux/delay.h>
2425
2526 #include <trace/events/sched.h>
27
+#include <trace/syscall.h>
2628
2729 #include <asm/setup.h>
2830
....@@ -36,6 +38,7 @@
3638 LIST_HEAD(ftrace_events);
3739 static LIST_HEAD(ftrace_generic_fields);
3840 static LIST_HEAD(ftrace_common_fields);
41
+static bool eventdir_initialized;
3942
4043 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
4144
....@@ -69,14 +72,6 @@
6972
7073 #define while_for_each_event_file() \
7174 }
72
-
73
-static struct list_head *
74
-trace_get_fields(struct trace_event_call *event_call)
75
-{
76
- if (!event_call->class->get_fields)
77
- return &event_call->class->fields;
78
- return event_call->class->get_fields(event_call);
79
-}
8075
8176 static struct ftrace_event_field *
8277 __find_event_field(struct list_head *head, char *name)
....@@ -173,6 +168,7 @@
173168
174169 __generic_field(int, CPU, FILTER_CPU);
175170 __generic_field(int, cpu, FILTER_CPU);
171
+ __generic_field(int, common_cpu, FILTER_CPU);
176172 __generic_field(char *, COMM, FILTER_COMM);
177173 __generic_field(char *, comm, FILTER_COMM);
178174
....@@ -240,13 +236,16 @@
240236 {
241237 struct trace_array *tr = trace_file->tr;
242238 struct trace_array_cpu *data;
239
+ struct trace_pid_list *no_pid_list;
243240 struct trace_pid_list *pid_list;
244241
245242 pid_list = rcu_dereference_raw(tr->filtered_pids);
246
- if (!pid_list)
243
+ no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
244
+
245
+ if (!pid_list && !no_pid_list)
247246 return false;
248247
249
- data = this_cpu_ptr(tr->trace_buffer.data);
248
+ data = this_cpu_ptr(tr->array_buffer.data);
250249
251250 return data->ignore_pid;
252251 }
....@@ -262,25 +261,23 @@
262261 trace_event_ignore_this_pid(trace_file))
263262 return NULL;
264263
265
- local_save_flags(fbuffer->flags);
266
- fbuffer->pc = preempt_count();
267264 /*
268
- * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
265
+ * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
269266 * preemption (adding one to the preempt_count). Since we are
270267 * interested in the preempt_count at the time the tracepoint was
271268 * hit, we need to subtract one to offset the increment.
272269 */
273
- if (IS_ENABLED(CONFIG_PREEMPT))
274
- fbuffer->pc--;
270
+ fbuffer->trace_ctx = tracing_gen_ctx_dec();
275271 fbuffer->trace_file = trace_file;
276272
277273 fbuffer->event =
278274 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
279275 event_call->event.type, len,
280
- fbuffer->flags, fbuffer->pc);
276
+ fbuffer->trace_ctx);
281277 if (!fbuffer->event)
282278 return NULL;
283279
280
+ fbuffer->regs = NULL;
284281 fbuffer->entry = ring_buffer_event_data(fbuffer->event);
285282 return fbuffer->entry;
286283 }
....@@ -517,6 +514,9 @@
517514
518515 pid_list = rcu_dereference_raw(tr->filtered_pids);
519516 trace_filter_add_remove_task(pid_list, NULL, task);
517
+
518
+ pid_list = rcu_dereference_raw(tr->filtered_no_pids);
519
+ trace_filter_add_remove_task(pid_list, NULL, task);
520520 }
521521
522522 static void
....@@ -528,6 +528,9 @@
528528 struct trace_array *tr = data;
529529
530530 pid_list = rcu_dereference_sched(tr->filtered_pids);
531
+ trace_filter_add_remove_task(pid_list, self, task);
532
+
533
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
531534 trace_filter_add_remove_task(pid_list, self, task);
532535 }
533536
....@@ -551,13 +554,23 @@
551554 struct task_struct *prev, struct task_struct *next)
552555 {
553556 struct trace_array *tr = data;
557
+ struct trace_pid_list *no_pid_list;
554558 struct trace_pid_list *pid_list;
559
+ bool ret;
555560
556561 pid_list = rcu_dereference_sched(tr->filtered_pids);
562
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
557563
558
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
559
- trace_ignore_this_task(pid_list, prev) &&
560
- trace_ignore_this_task(pid_list, next));
564
+ /*
565
+ * Sched switch is funny, as we only want to ignore it
566
+ * in the notrace case if both prev and next should be ignored.
567
+ */
568
+ ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
569
+ trace_ignore_this_task(NULL, no_pid_list, next);
570
+
571
+ this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
572
+ (trace_ignore_this_task(pid_list, NULL, prev) &&
573
+ trace_ignore_this_task(pid_list, NULL, next)));
561574 }
562575
563576 static void
....@@ -565,58 +578,55 @@
565578 struct task_struct *prev, struct task_struct *next)
566579 {
567580 struct trace_array *tr = data;
581
+ struct trace_pid_list *no_pid_list;
568582 struct trace_pid_list *pid_list;
569583
570584 pid_list = rcu_dereference_sched(tr->filtered_pids);
585
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
571586
572
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
573
- trace_ignore_this_task(pid_list, next));
587
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
588
+ trace_ignore_this_task(pid_list, no_pid_list, next));
574589 }
575590
576591 static void
577592 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
578593 {
579594 struct trace_array *tr = data;
595
+ struct trace_pid_list *no_pid_list;
580596 struct trace_pid_list *pid_list;
581597
582598 /* Nothing to do if we are already tracing */
583
- if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
599
+ if (!this_cpu_read(tr->array_buffer.data->ignore_pid))
584600 return;
585601
586602 pid_list = rcu_dereference_sched(tr->filtered_pids);
603
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
587604
588
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
589
- trace_ignore_this_task(pid_list, task));
605
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
606
+ trace_ignore_this_task(pid_list, no_pid_list, task));
590607 }
591608
592609 static void
593610 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
594611 {
595612 struct trace_array *tr = data;
613
+ struct trace_pid_list *no_pid_list;
596614 struct trace_pid_list *pid_list;
597615
598616 /* Nothing to do if we are not tracing */
599
- if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
617
+ if (this_cpu_read(tr->array_buffer.data->ignore_pid))
600618 return;
601619
602620 pid_list = rcu_dereference_sched(tr->filtered_pids);
621
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
603622
604623 /* Set tracing if current is enabled */
605
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
606
- trace_ignore_this_task(pid_list, current));
624
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
625
+ trace_ignore_this_task(pid_list, no_pid_list, current));
607626 }
608627
609
-static void __ftrace_clear_event_pids(struct trace_array *tr)
628
+static void unregister_pid_events(struct trace_array *tr)
610629 {
611
- struct trace_pid_list *pid_list;
612
- struct trace_event_file *file;
613
- int cpu;
614
-
615
- pid_list = rcu_dereference_protected(tr->filtered_pids,
616
- lockdep_is_held(&event_mutex));
617
- if (!pid_list)
618
- return;
619
-
620630 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
621631 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
622632
....@@ -628,26 +638,55 @@
628638
629639 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
630640 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
641
+}
631642
632
- list_for_each_entry(file, &tr->events, list) {
633
- clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
643
+static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
644
+{
645
+ struct trace_pid_list *pid_list;
646
+ struct trace_pid_list *no_pid_list;
647
+ struct trace_event_file *file;
648
+ int cpu;
649
+
650
+ pid_list = rcu_dereference_protected(tr->filtered_pids,
651
+ lockdep_is_held(&event_mutex));
652
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
653
+ lockdep_is_held(&event_mutex));
654
+
655
+ /* Make sure there's something to do */
656
+ if (!pid_type_enabled(type, pid_list, no_pid_list))
657
+ return;
658
+
659
+ if (!still_need_pid_events(type, pid_list, no_pid_list)) {
660
+ unregister_pid_events(tr);
661
+
662
+ list_for_each_entry(file, &tr->events, list) {
663
+ clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
664
+ }
665
+
666
+ for_each_possible_cpu(cpu)
667
+ per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
634668 }
635669
636
- for_each_possible_cpu(cpu)
637
- per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
670
+ if (type & TRACE_PIDS)
671
+ rcu_assign_pointer(tr->filtered_pids, NULL);
638672
639
- rcu_assign_pointer(tr->filtered_pids, NULL);
673
+ if (type & TRACE_NO_PIDS)
674
+ rcu_assign_pointer(tr->filtered_no_pids, NULL);
640675
641676 /* Wait till all users are no longer using pid filtering */
642677 tracepoint_synchronize_unregister();
643678
644
- trace_free_pid_list(pid_list);
679
+ if ((type & TRACE_PIDS) && pid_list)
680
+ trace_free_pid_list(pid_list);
681
+
682
+ if ((type & TRACE_NO_PIDS) && no_pid_list)
683
+ trace_free_pid_list(no_pid_list);
645684 }
646685
647
-static void ftrace_clear_event_pids(struct trace_array *tr)
686
+static void ftrace_clear_event_pids(struct trace_array *tr, int type)
648687 {
649688 mutex_lock(&event_mutex);
650
- __ftrace_clear_event_pids(tr);
689
+ __ftrace_clear_event_pids(tr, type);
651690 mutex_unlock(&event_mutex);
652691 }
653692
....@@ -706,7 +745,7 @@
706745 return;
707746
708747 if (!--dir->nr_events) {
709
- tracefs_remove_recursive(dir->entry);
748
+ tracefs_remove(dir->entry);
710749 list_del(&dir->list);
711750 __put_system_dir(dir);
712751 }
....@@ -725,7 +764,7 @@
725764 }
726765 spin_unlock(&dir->d_lock);
727766
728
- tracefs_remove_recursive(dir);
767
+ tracefs_remove(dir);
729768 }
730769
731770 list_del(&file->list);
....@@ -797,7 +836,7 @@
797836 return ret;
798837 }
799838
800
-static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
839
+int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
801840 {
802841 char *event = NULL, *sub = NULL, *match;
803842 int ret;
....@@ -859,6 +898,32 @@
859898 return __ftrace_set_clr_event(tr, NULL, system, event, set);
860899 }
861900 EXPORT_SYMBOL_GPL(trace_set_clr_event);
901
+
902
+/**
903
+ * trace_array_set_clr_event - enable or disable an event for a trace array.
904
+ * @tr: concerned trace array.
905
+ * @system: system name to match (NULL for any system)
906
+ * @event: event name to match (NULL for all events, within system)
907
+ * @enable: true to enable, false to disable
908
+ *
909
+ * This is a way for other parts of the kernel to enable or disable
910
+ * event recording.
911
+ *
912
+ * Returns 0 on success, -EINVAL if the parameters do not match any
913
+ * registered events.
914
+ */
915
+int trace_array_set_clr_event(struct trace_array *tr, const char *system,
916
+ const char *event, bool enable)
917
+{
918
+ int set;
919
+
920
+ if (!tr)
921
+ return -ENOENT;
922
+
923
+ set = (enable == true) ? 1 : 0;
924
+ return __ftrace_set_clr_event(tr, NULL, system, event, set);
925
+}
926
+EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
862927
863928 /* 128 should be much more than enough */
864929 #define EVENT_BUF_SIZE 127
....@@ -994,15 +1059,32 @@
9941059 }
9951060
9961061 static void *
997
-p_next(struct seq_file *m, void *v, loff_t *pos)
1062
+__next(struct seq_file *m, void *v, loff_t *pos, int type)
9981063 {
9991064 struct trace_array *tr = m->private;
1000
- struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
1065
+ struct trace_pid_list *pid_list;
1066
+
1067
+ if (type == TRACE_PIDS)
1068
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
1069
+ else
1070
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
10011071
10021072 return trace_pid_next(pid_list, v, pos);
10031073 }
10041074
1005
-static void *p_start(struct seq_file *m, loff_t *pos)
1075
+static void *
1076
+p_next(struct seq_file *m, void *v, loff_t *pos)
1077
+{
1078
+ return __next(m, v, pos, TRACE_PIDS);
1079
+}
1080
+
1081
+static void *
1082
+np_next(struct seq_file *m, void *v, loff_t *pos)
1083
+{
1084
+ return __next(m, v, pos, TRACE_NO_PIDS);
1085
+}
1086
+
1087
+static void *__start(struct seq_file *m, loff_t *pos, int type)
10061088 __acquires(RCU)
10071089 {
10081090 struct trace_pid_list *pid_list;
....@@ -1017,12 +1099,27 @@
10171099 mutex_lock(&event_mutex);
10181100 rcu_read_lock_sched();
10191101
1020
- pid_list = rcu_dereference_sched(tr->filtered_pids);
1102
+ if (type == TRACE_PIDS)
1103
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
1104
+ else
1105
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
10211106
10221107 if (!pid_list)
10231108 return NULL;
10241109
10251110 return trace_pid_start(pid_list, pos);
1111
+}
1112
+
1113
+static void *p_start(struct seq_file *m, loff_t *pos)
1114
+ __acquires(RCU)
1115
+{
1116
+ return __start(m, pos, TRACE_PIDS);
1117
+}
1118
+
1119
+static void *np_start(struct seq_file *m, loff_t *pos)
1120
+ __acquires(RCU)
1121
+{
1122
+ return __start(m, pos, TRACE_NO_PIDS);
10261123 }
10271124
10281125 static void p_stop(struct seq_file *m, void *p)
....@@ -1256,7 +1353,7 @@
12561353 */
12571354 array_descriptor = strchr(field->type, '[');
12581355
1259
- if (!strncmp(field->type, "__data_loc", 10))
1356
+ if (str_has_prefix(field->type, "__data_loc"))
12601357 array_descriptor = NULL;
12611358
12621359 if (!array_descriptor)
....@@ -1305,6 +1402,8 @@
13051402 {
13061403 struct seq_file *m;
13071404 int ret;
1405
+
1406
+ /* Do we want to hide event format files on tracefs lockdown? */
13081407
13091408 ret = seq_open(file, &trace_format_seq_ops);
13101409 if (ret < 0)
....@@ -1452,28 +1551,17 @@
14521551 struct trace_array *tr = inode->i_private;
14531552 int ret;
14541553
1455
- if (tracing_is_disabled())
1456
- return -ENODEV;
1457
-
1458
- if (trace_array_get(tr) < 0)
1459
- return -ENODEV;
1460
-
14611554 /* Make a temporary dir that has no system but points to tr */
14621555 dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1463
- if (!dir) {
1464
- trace_array_put(tr);
1556
+ if (!dir)
14651557 return -ENOMEM;
1466
- }
14671558
1468
- dir->tr = tr;
1469
-
1470
- ret = tracing_open_generic(inode, filp);
1559
+ ret = tracing_open_generic_tr(inode, filp);
14711560 if (ret < 0) {
1472
- trace_array_put(tr);
14731561 kfree(dir);
14741562 return ret;
14751563 }
1476
-
1564
+ dir->tr = tr;
14771565 filp->private_data = dir;
14781566
14791567 return 0;
....@@ -1579,6 +1667,7 @@
15791667 {
15801668 struct trace_array *tr = data;
15811669 struct trace_pid_list *pid_list;
1670
+ struct trace_pid_list *no_pid_list;
15821671
15831672 /*
15841673 * This function is called by on_each_cpu() while the
....@@ -1586,18 +1675,50 @@
15861675 */
15871676 pid_list = rcu_dereference_protected(tr->filtered_pids,
15881677 mutex_is_locked(&event_mutex));
1678
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
1679
+ mutex_is_locked(&event_mutex));
15891680
1590
- this_cpu_write(tr->trace_buffer.data->ignore_pid,
1591
- trace_ignore_this_task(pid_list, current));
1681
+ this_cpu_write(tr->array_buffer.data->ignore_pid,
1682
+ trace_ignore_this_task(pid_list, no_pid_list, current));
1683
+}
1684
+
1685
+static void register_pid_events(struct trace_array *tr)
1686
+{
1687
+ /*
1688
+ * Register a probe that is called before all other probes
1689
+ * to set ignore_pid if next or prev do not match.
1690
+ * Register a probe this is called after all other probes
1691
+ * to only keep ignore_pid set if next pid matches.
1692
+ */
1693
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1694
+ tr, INT_MAX);
1695
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1696
+ tr, 0);
1697
+
1698
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1699
+ tr, INT_MAX);
1700
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1701
+ tr, 0);
1702
+
1703
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1704
+ tr, INT_MAX);
1705
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1706
+ tr, 0);
1707
+
1708
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1709
+ tr, INT_MAX);
1710
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1711
+ tr, 0);
15921712 }
15931713
15941714 static ssize_t
1595
-ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1596
- size_t cnt, loff_t *ppos)
1715
+event_pid_write(struct file *filp, const char __user *ubuf,
1716
+ size_t cnt, loff_t *ppos, int type)
15971717 {
15981718 struct seq_file *m = filp->private_data;
15991719 struct trace_array *tr = m->private;
16001720 struct trace_pid_list *filtered_pids = NULL;
1721
+ struct trace_pid_list *other_pids = NULL;
16011722 struct trace_pid_list *pid_list;
16021723 struct trace_event_file *file;
16031724 ssize_t ret;
....@@ -1611,14 +1732,26 @@
16111732
16121733 mutex_lock(&event_mutex);
16131734
1614
- filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1615
- lockdep_is_held(&event_mutex));
1735
+ if (type == TRACE_PIDS) {
1736
+ filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1737
+ lockdep_is_held(&event_mutex));
1738
+ other_pids = rcu_dereference_protected(tr->filtered_no_pids,
1739
+ lockdep_is_held(&event_mutex));
1740
+ } else {
1741
+ filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
1742
+ lockdep_is_held(&event_mutex));
1743
+ other_pids = rcu_dereference_protected(tr->filtered_pids,
1744
+ lockdep_is_held(&event_mutex));
1745
+ }
16161746
16171747 ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
16181748 if (ret < 0)
16191749 goto out;
16201750
1621
- rcu_assign_pointer(tr->filtered_pids, pid_list);
1751
+ if (type == TRACE_PIDS)
1752
+ rcu_assign_pointer(tr->filtered_pids, pid_list);
1753
+ else
1754
+ rcu_assign_pointer(tr->filtered_no_pids, pid_list);
16221755
16231756 list_for_each_entry(file, &tr->events, list) {
16241757 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
....@@ -1627,32 +1760,8 @@
16271760 if (filtered_pids) {
16281761 tracepoint_synchronize_unregister();
16291762 trace_free_pid_list(filtered_pids);
1630
- } else if (pid_list) {
1631
- /*
1632
- * Register a probe that is called before all other probes
1633
- * to set ignore_pid if next or prev do not match.
1634
- * Register a probe this is called after all other probes
1635
- * to only keep ignore_pid set if next pid matches.
1636
- */
1637
- register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1638
- tr, INT_MAX);
1639
- register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1640
- tr, 0);
1641
-
1642
- register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1643
- tr, INT_MAX);
1644
- register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1645
- tr, 0);
1646
-
1647
- register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1648
- tr, INT_MAX);
1649
- register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1650
- tr, 0);
1651
-
1652
- register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1653
- tr, INT_MAX);
1654
- register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1655
- tr, 0);
1763
+ } else if (pid_list && !other_pids) {
1764
+ register_pid_events(tr);
16561765 }
16571766
16581767 /*
....@@ -1671,9 +1780,24 @@
16711780 return ret;
16721781 }
16731782
1783
+static ssize_t
1784
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1785
+ size_t cnt, loff_t *ppos)
1786
+{
1787
+ return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
1788
+}
1789
+
1790
+static ssize_t
1791
+ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
1792
+ size_t cnt, loff_t *ppos)
1793
+{
1794
+ return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
1795
+}
1796
+
16741797 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
16751798 static int ftrace_event_set_open(struct inode *inode, struct file *file);
16761799 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1800
+static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
16771801 static int ftrace_event_release(struct inode *inode, struct file *file);
16781802
16791803 static const struct seq_operations show_event_seq_ops = {
....@@ -1697,6 +1821,13 @@
16971821 .stop = p_stop,
16981822 };
16991823
1824
+static const struct seq_operations show_set_no_pid_seq_ops = {
1825
+ .start = np_start,
1826
+ .next = np_next,
1827
+ .show = trace_pid_show,
1828
+ .stop = p_stop,
1829
+};
1830
+
17001831 static const struct file_operations ftrace_avail_fops = {
17011832 .open = ftrace_event_avail_open,
17021833 .read = seq_read,
....@@ -1716,6 +1847,14 @@
17161847 .open = ftrace_event_set_pid_open,
17171848 .read = seq_read,
17181849 .write = ftrace_event_pid_write,
1850
+ .llseek = seq_lseek,
1851
+ .release = ftrace_event_release,
1852
+};
1853
+
1854
+static const struct file_operations ftrace_set_event_notrace_pid_fops = {
1855
+ .open = ftrace_event_set_npid_open,
1856
+ .read = seq_read,
1857
+ .write = ftrace_event_npid_write,
17191858 .llseek = seq_lseek,
17201859 .release = ftrace_event_release,
17211860 };
....@@ -1783,6 +1922,10 @@
17831922 struct seq_file *m;
17841923 int ret;
17851924
1925
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
1926
+ if (ret)
1927
+ return ret;
1928
+
17861929 ret = seq_open(file, seq_ops);
17871930 if (ret < 0)
17881931 return ret;
....@@ -1807,6 +1950,7 @@
18071950 {
18081951 const struct seq_operations *seq_ops = &show_event_seq_ops;
18091952
1953
+ /* Checks for tracefs lockdown */
18101954 return ftrace_event_open(inode, file, seq_ops);
18111955 }
18121956
....@@ -1817,8 +1961,9 @@
18171961 struct trace_array *tr = inode->i_private;
18181962 int ret;
18191963
1820
- if (trace_array_get(tr) < 0)
1821
- return -ENODEV;
1964
+ ret = tracing_check_open_get_tr(tr);
1965
+ if (ret)
1966
+ return ret;
18221967
18231968 if ((file->f_mode & FMODE_WRITE) &&
18241969 (file->f_flags & O_TRUNC))
....@@ -1837,12 +1982,34 @@
18371982 struct trace_array *tr = inode->i_private;
18381983 int ret;
18391984
1840
- if (trace_array_get(tr) < 0)
1841
- return -ENODEV;
1985
+ ret = tracing_check_open_get_tr(tr);
1986
+ if (ret)
1987
+ return ret;
18421988
18431989 if ((file->f_mode & FMODE_WRITE) &&
18441990 (file->f_flags & O_TRUNC))
1845
- ftrace_clear_event_pids(tr);
1991
+ ftrace_clear_event_pids(tr, TRACE_PIDS);
1992
+
1993
+ ret = ftrace_event_open(inode, file, seq_ops);
1994
+ if (ret < 0)
1995
+ trace_array_put(tr);
1996
+ return ret;
1997
+}
1998
+
1999
+static int
2000
+ftrace_event_set_npid_open(struct inode *inode, struct file *file)
2001
+{
2002
+ const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
2003
+ struct trace_array *tr = inode->i_private;
2004
+ int ret;
2005
+
2006
+ ret = tracing_check_open_get_tr(tr);
2007
+ if (ret)
2008
+ return ret;
2009
+
2010
+ if ((file->f_mode & FMODE_WRITE) &&
2011
+ (file->f_flags & O_TRUNC))
2012
+ ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
18462013
18472014 ret = ftrace_event_open(inode, file, seq_ops);
18482015 if (ret < 0)
....@@ -1959,11 +2126,47 @@
19592126 }
19602127
19612128 static int
2129
+event_define_fields(struct trace_event_call *call)
2130
+{
2131
+ struct list_head *head;
2132
+ int ret = 0;
2133
+
2134
+ /*
2135
+ * Other events may have the same class. Only update
2136
+ * the fields if they are not already defined.
2137
+ */
2138
+ head = trace_get_fields(call);
2139
+ if (list_empty(head)) {
2140
+ struct trace_event_fields *field = call->class->fields_array;
2141
+ unsigned int offset = sizeof(struct trace_entry);
2142
+
2143
+ for (; field->type; field++) {
2144
+ if (field->type == TRACE_FUNCTION_TYPE) {
2145
+ field->define_fields(call);
2146
+ break;
2147
+ }
2148
+
2149
+ offset = ALIGN(offset, field->align);
2150
+ ret = trace_define_field(call, field->type, field->name,
2151
+ offset, field->size,
2152
+ field->is_signed, field->filter_type);
2153
+ if (WARN_ON_ONCE(ret)) {
2154
+ pr_err("error code is %d\n", ret);
2155
+ break;
2156
+ }
2157
+
2158
+ offset += field->size;
2159
+ }
2160
+ }
2161
+
2162
+ return ret;
2163
+}
2164
+
2165
+static int
19622166 event_create_dir(struct dentry *parent, struct trace_event_file *file)
19632167 {
19642168 struct trace_event_call *call = file->event_call;
19652169 struct trace_array *tr = file->tr;
1966
- struct list_head *head;
19672170 struct dentry *d_events;
19682171 const char *name;
19692172 int ret;
....@@ -1997,18 +2200,10 @@
19972200 &ftrace_event_id_fops);
19982201 #endif
19992202
2000
- /*
2001
- * Other events may have the same class. Only update
2002
- * the fields if they are not already defined.
2003
- */
2004
- head = trace_get_fields(call);
2005
- if (list_empty(head)) {
2006
- ret = call->class->define_fields(call);
2007
- if (ret < 0) {
2008
- pr_warn("Could not initialize trace point events/%s\n",
2009
- name);
2010
- return -1;
2011
- }
2203
+ ret = event_define_fields(call);
2204
+ if (ret < 0) {
2205
+ pr_warn("Could not initialize trace point events/%s\n", name);
2206
+ return ret;
20122207 }
20132208
20142209 /*
....@@ -2027,8 +2222,18 @@
20272222 trace_create_file("hist", 0444, file->dir, file,
20282223 &event_hist_fops);
20292224 #endif
2225
+#ifdef CONFIG_HIST_TRIGGERS_DEBUG
2226
+ trace_create_file("hist_debug", 0444, file->dir, file,
2227
+ &event_hist_debug_fops);
2228
+#endif
20302229 trace_create_file("format", 0444, file->dir, call,
20312230 &ftrace_event_format_fops);
2231
+
2232
+#ifdef CONFIG_TRACE_EVENT_INJECT
2233
+ if (call->event.type && call->class->reg)
2234
+ trace_create_file("inject", 0200, file->dir, file,
2235
+ &event_inject_fops);
2236
+#endif
20322237
20332238 return 0;
20342239 }
....@@ -2257,6 +2462,7 @@
22572462 trace_create_new_event(struct trace_event_call *call,
22582463 struct trace_array *tr)
22592464 {
2465
+ struct trace_pid_list *no_pid_list;
22602466 struct trace_pid_list *pid_list;
22612467 struct trace_event_file *file;
22622468
....@@ -2266,8 +2472,10 @@
22662472
22672473 pid_list = rcu_dereference_protected(tr->filtered_pids,
22682474 lockdep_is_held(&event_mutex));
2475
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
2476
+ lockdep_is_held(&event_mutex));
22692477
2270
- if (pid_list)
2478
+ if (pid_list || no_pid_list)
22712479 file->flags |= EVENT_FILE_FL_PID_FILTER;
22722480
22732481 file->event_call = call;
....@@ -2290,7 +2498,10 @@
22902498 if (!file)
22912499 return -ENOMEM;
22922500
2293
- return event_create_dir(tr->event_dir, file);
2501
+ if (eventdir_initialized)
2502
+ return event_create_dir(tr->event_dir, file);
2503
+ else
2504
+ return event_define_fields(call);
22942505 }
22952506
22962507 /*
....@@ -2298,7 +2509,7 @@
22982509 * for enabling events at boot. We want to enable events before
22992510 * the filesystem is initialized.
23002511 */
2301
-static __init int
2512
+static int
23022513 __trace_early_add_new_event(struct trace_event_call *call,
23032514 struct trace_array *tr)
23042515 {
....@@ -2308,13 +2519,14 @@
23082519 if (!file)
23092520 return -ENOMEM;
23102521
2311
- return 0;
2522
+ return event_define_fields(call);
23122523 }
23132524
23142525 struct ftrace_module_file_ops;
23152526 static void __add_event_to_tracers(struct trace_event_call *call);
23162527
2317
-int trace_add_event_call_nolock(struct trace_event_call *call)
2528
+/* Add an additional event_call dynamically */
2529
+int trace_add_event_call(struct trace_event_call *call)
23182530 {
23192531 int ret;
23202532 lockdep_assert_held(&event_mutex);
....@@ -2326,17 +2538,6 @@
23262538 __add_event_to_tracers(call);
23272539
23282540 mutex_unlock(&trace_types_lock);
2329
- return ret;
2330
-}
2331
-
2332
-/* Add an additional event_call dynamically */
2333
-int trace_add_event_call(struct trace_event_call *call)
2334
-{
2335
- int ret;
2336
-
2337
- mutex_lock(&event_mutex);
2338
- ret = trace_add_event_call_nolock(call);
2339
- mutex_unlock(&event_mutex);
23402541 return ret;
23412542 }
23422543
....@@ -2370,7 +2571,10 @@
23702571 * TRACE_REG_UNREGISTER.
23712572 */
23722573 if (file->flags & EVENT_FILE_FL_ENABLED)
2373
- return -EBUSY;
2574
+ goto busy;
2575
+
2576
+ if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2577
+ tr->clear_trace = true;
23742578 /*
23752579 * The do_for_each_event_file_safe() is
23762580 * a double loop. After finding the call for this
....@@ -2383,10 +2587,16 @@
23832587 __trace_remove_event_call(call);
23842588
23852589 return 0;
2590
+ busy:
2591
+ /* No need to clear the trace now */
2592
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2593
+ tr->clear_trace = false;
2594
+ }
2595
+ return -EBUSY;
23862596 }
23872597
2388
-/* no event_mutex version */
2389
-int trace_remove_event_call_nolock(struct trace_event_call *call)
2598
+/* Remove an event_call */
2599
+int trace_remove_event_call(struct trace_event_call *call)
23902600 {
23912601 int ret;
23922602
....@@ -2397,18 +2607,6 @@
23972607 ret = probe_remove_event_call(call);
23982608 up_write(&trace_event_sem);
23992609 mutex_unlock(&trace_types_lock);
2400
-
2401
- return ret;
2402
-}
2403
-
2404
-/* Remove an event_call */
2405
-int trace_remove_event_call(struct trace_event_call *call)
2406
-{
2407
- int ret;
2408
-
2409
- mutex_lock(&event_mutex);
2410
- ret = trace_remove_event_call_nolock(call);
2411
- mutex_unlock(&event_mutex);
24122610
24132611 return ret;
24142612 }
....@@ -2483,7 +2681,7 @@
24832681 mutex_unlock(&trace_types_lock);
24842682 mutex_unlock(&event_mutex);
24852683
2486
- return 0;
2684
+ return NOTIFY_OK;
24872685 }
24882686
24892687 static struct notifier_block trace_module_nb = {
....@@ -2543,6 +2741,91 @@
25432741
25442742 return file;
25452743 }
2744
+
2745
+/**
2746
+ * trace_get_event_file - Find and return a trace event file
2747
+ * @instance: The name of the trace instance containing the event
2748
+ * @system: The name of the system containing the event
2749
+ * @event: The name of the event
2750
+ *
2751
+ * Return a trace event file given the trace instance name, trace
2752
+ * system, and trace event name. If the instance name is NULL, it
2753
+ * refers to the top-level trace array.
2754
+ *
2755
+ * This function will look it up and return it if found, after calling
2756
+ * trace_array_get() to prevent the instance from going away, and
2757
+ * increment the event's module refcount to prevent it from being
2758
+ * removed.
2759
+ *
2760
+ * To release the file, call trace_put_event_file(), which will call
2761
+ * trace_array_put() and decrement the event's module refcount.
2762
+ *
2763
+ * Return: The trace event on success, ERR_PTR otherwise.
2764
+ */
2765
+struct trace_event_file *trace_get_event_file(const char *instance,
2766
+ const char *system,
2767
+ const char *event)
2768
+{
2769
+ struct trace_array *tr = top_trace_array();
2770
+ struct trace_event_file *file = NULL;
2771
+ int ret = -EINVAL;
2772
+
2773
+ if (instance) {
2774
+ tr = trace_array_find_get(instance);
2775
+ if (!tr)
2776
+ return ERR_PTR(-ENOENT);
2777
+ } else {
2778
+ ret = trace_array_get(tr);
2779
+ if (ret)
2780
+ return ERR_PTR(ret);
2781
+ }
2782
+
2783
+ mutex_lock(&event_mutex);
2784
+
2785
+ file = find_event_file(tr, system, event);
2786
+ if (!file) {
2787
+ trace_array_put(tr);
2788
+ ret = -EINVAL;
2789
+ goto out;
2790
+ }
2791
+
2792
+ /* Don't let event modules unload while in use */
2793
+ ret = try_module_get(file->event_call->mod);
2794
+ if (!ret) {
2795
+ trace_array_put(tr);
2796
+ ret = -EBUSY;
2797
+ goto out;
2798
+ }
2799
+
2800
+ ret = 0;
2801
+ out:
2802
+ mutex_unlock(&event_mutex);
2803
+
2804
+ if (ret)
2805
+ file = ERR_PTR(ret);
2806
+
2807
+ return file;
2808
+}
2809
+EXPORT_SYMBOL_GPL(trace_get_event_file);
2810
+
2811
+/**
2812
+ * trace_put_event_file - Release a file from trace_get_event_file()
2813
+ * @file: The trace event file
2814
+ *
2815
+ * If a file was retrieved using trace_get_event_file(), this should
2816
+ * be called when it's no longer needed. It will cancel the previous
2817
+ * trace_array_get() called by that function, and decrement the
2818
+ * event's module refcount.
2819
+ */
2820
+void trace_put_event_file(struct trace_event_file *file)
2821
+{
2822
+ mutex_lock(&event_mutex);
2823
+ module_put(file->event_call->mod);
2824
+ mutex_unlock(&event_mutex);
2825
+
2826
+ trace_array_put(file->tr);
2827
+}
2828
+EXPORT_SYMBOL_GPL(trace_put_event_file);
25462829
25472830 #ifdef CONFIG_DYNAMIC_FTRACE
25482831
....@@ -2868,14 +3151,13 @@
28683151 #endif /* CONFIG_DYNAMIC_FTRACE */
28693152
28703153 /*
2871
- * The top level array has already had its trace_event_file
2872
- * descriptors created in order to allow for early events to
2873
- * be recorded. This function is called after the tracefs has been
2874
- * initialized, and we now have to create the files associated
2875
- * to the events.
3154
+ * The top level array and trace arrays created by boot-time tracing
3155
+ * have already had its trace_event_file descriptors created in order
3156
+ * to allow for early events to be recorded.
3157
+ * This function is called after the tracefs has been initialized,
3158
+ * and we now have to create the files associated to the events.
28763159 */
2877
-static __init void
2878
-__trace_early_add_event_dirs(struct trace_array *tr)
3160
+static void __trace_early_add_event_dirs(struct trace_array *tr)
28793161 {
28803162 struct trace_event_file *file;
28813163 int ret;
....@@ -2890,13 +3172,12 @@
28903172 }
28913173
28923174 /*
2893
- * For early boot up, the top trace array requires to have
2894
- * a list of events that can be enabled. This must be done before
2895
- * the filesystem is set up in order to allow events to be traced
2896
- * early.
3175
+ * For early boot up, the top trace array and the trace arrays created
3176
+ * by boot-time tracing require to have a list of events that can be
3177
+ * enabled. This must be done before the filesystem is set up in order
3178
+ * to allow events to be traced early.
28973179 */
2898
-static __init void
2899
-__trace_early_add_events(struct trace_array *tr)
3180
+void __trace_early_add_events(struct trace_array *tr)
29003181 {
29013182 struct trace_event_call *call;
29023183 int ret;
....@@ -2940,7 +3221,7 @@
29403221 {
29413222 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
29423223 ring_buffer_expanded = true;
2943
- tracing_selftest_disabled = true;
3224
+ disable_tracing_selftest("running event tracing");
29443225
29453226 return 1;
29463227 }
....@@ -2979,6 +3260,11 @@
29793260 tr, &ftrace_set_event_pid_fops);
29803261 if (!entry)
29813262 pr_warn("Could not create tracefs 'set_event_pid' entry\n");
3263
+
3264
+ entry = tracefs_create_file("set_event_notrace_pid", 0644, parent,
3265
+ tr, &ftrace_set_event_notrace_pid_fops);
3266
+ if (!entry)
3267
+ pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
29823268
29833269 /* ring buffer internal formats */
29843270 entry = trace_create_file("header_page", 0444, d_events,
....@@ -3022,7 +3308,11 @@
30223308 goto out;
30233309
30243310 down_write(&trace_event_sem);
3025
- __trace_add_event_dirs(tr);
3311
+ /* If tr already has the event list, it is initialized in early boot. */
3312
+ if (unlikely(!list_empty(&tr->events)))
3313
+ __trace_early_add_event_dirs(tr);
3314
+ else
3315
+ __trace_add_event_dirs(tr);
30263316 up_write(&trace_event_sem);
30273317
30283318 out:
....@@ -3063,7 +3353,7 @@
30633353 clear_event_triggers(tr);
30643354
30653355 /* Clear the pid list */
3066
- __ftrace_clear_event_pids(tr);
3356
+ __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
30673357
30683358 /* Disable any running events */
30693359 __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
....@@ -3073,7 +3363,7 @@
30733363
30743364 down_write(&trace_event_sem);
30753365 __trace_remove_event_dirs(tr);
3076
- tracefs_remove_recursive(tr->event_dir);
3366
+ tracefs_remove(tr->event_dir);
30773367 up_write(&trace_event_sem);
30783368
30793369 tr->event_dir = NULL;
....@@ -3178,10 +3468,21 @@
31783468
31793469 early_initcall(event_trace_enable_again);
31803470
3471
+/* Init fields which doesn't related to the tracefs */
3472
+static __init int event_trace_init_fields(void)
3473
+{
3474
+ if (trace_define_generic_fields())
3475
+ pr_warn("tracing: Failed to allocated generic fields");
3476
+
3477
+ if (trace_define_common_fields())
3478
+ pr_warn("tracing: Failed to allocate common fields");
3479
+
3480
+ return 0;
3481
+}
3482
+
31813483 __init int event_trace_init(void)
31823484 {
31833485 struct trace_array *tr;
3184
- struct dentry *d_tracer;
31853486 struct dentry *entry;
31863487 int ret;
31873488
....@@ -3189,22 +3490,12 @@
31893490 if (!tr)
31903491 return -ENODEV;
31913492
3192
- d_tracer = tracing_init_dentry();
3193
- if (IS_ERR(d_tracer))
3194
- return 0;
3195
-
3196
- entry = tracefs_create_file("available_events", 0444, d_tracer,
3493
+ entry = tracefs_create_file("available_events", 0444, NULL,
31973494 tr, &ftrace_avail_fops);
31983495 if (!entry)
31993496 pr_warn("Could not create tracefs 'available_events' entry\n");
32003497
3201
- if (trace_define_generic_fields())
3202
- pr_warn("tracing: Failed to allocated generic fields");
3203
-
3204
- if (trace_define_common_fields())
3205
- pr_warn("tracing: Failed to allocate common fields");
3206
-
3207
- ret = early_event_add_tracer(d_tracer, tr);
3498
+ ret = early_event_add_tracer(NULL, tr);
32083499 if (ret)
32093500 return ret;
32103501
....@@ -3213,6 +3504,9 @@
32133504 if (ret)
32143505 pr_warn("Failed to register trace events module notifier\n");
32153506 #endif
3507
+
3508
+ eventdir_initialized = true;
3509
+
32163510 return 0;
32173511 }
32183512
....@@ -3221,9 +3515,10 @@
32213515 event_trace_memsetup();
32223516 init_ftrace_syscalls();
32233517 event_trace_enable();
3518
+ event_trace_init_fields();
32243519 }
32253520
3226
-#ifdef CONFIG_FTRACE_STARTUP_TEST
3521
+#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
32273522
32283523 static DEFINE_SPINLOCK(test_spinlock);
32293524 static DEFINE_SPINLOCK(test_spinlock_irq);
....@@ -3400,15 +3695,14 @@
34003695 function_test_events_call(unsigned long ip, unsigned long parent_ip,
34013696 struct ftrace_ops *op, struct pt_regs *pt_regs)
34023697 {
3698
+ struct trace_buffer *buffer;
34033699 struct ring_buffer_event *event;
3404
- struct ring_buffer *buffer;
34053700 struct ftrace_entry *entry;
3406
- unsigned long flags;
3701
+ unsigned int trace_ctx;
34073702 long disabled;
34083703 int cpu;
3409
- int pc;
34103704
3411
- pc = preempt_count();
3705
+ trace_ctx = tracing_gen_ctx();
34123706 preempt_disable_notrace();
34133707 cpu = raw_smp_processor_id();
34143708 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
....@@ -3416,11 +3710,9 @@
34163710 if (disabled != 1)
34173711 goto out;
34183712
3419
- local_save_flags(flags);
3420
-
34213713 event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
34223714 TRACE_FN, sizeof(*entry),
3423
- flags, pc);
3715
+ trace_ctx);
34243716 if (!event)
34253717 goto out;
34263718 entry = ring_buffer_event_data(event);
....@@ -3428,7 +3720,7 @@
34283720 entry->parent_ip = parent_ip;
34293721
34303722 event_trigger_unlock_commit(&event_trace_file, buffer, event,
3431
- entry, flags, pc);
3723
+ entry, trace_ctx);
34323724 out:
34333725 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
34343726 preempt_enable_notrace();