hc
2024-09-20 cf4ce59b3b70238352c7f1729f0f7223214828ad
kernel/kernel/trace/trace.c
....@@ -17,6 +17,7 @@
1717 #include <linux/stacktrace.h>
1818 #include <linux/writeback.h>
1919 #include <linux/kallsyms.h>
20
+#include <linux/security.h>
2021 #include <linux/seq_file.h>
2122 #include <linux/notifier.h>
2223 #include <linux/irqflags.h>
....@@ -44,6 +45,10 @@
4445 #include <linux/trace.h>
4546 #include <linux/sched/clock.h>
4647 #include <linux/sched/rt.h>
48
+#include <linux/fsnotify.h>
49
+#include <linux/irq_work.h>
50
+#include <linux/workqueue.h>
51
+#include <trace/hooks/ftrace_dump.h>
4752
4853 #include "trace.h"
4954 #include "trace_output.h"
....@@ -64,9 +69,20 @@
6469 static bool __read_mostly tracing_selftest_running;
6570
6671 /*
67
- * If a tracer is running, we do not want to run SELFTEST.
72
+ * If boot-time tracing including tracers/events via kernel cmdline
73
+ * is running, we do not want to run SELFTEST.
6874 */
6975 bool __read_mostly tracing_selftest_disabled;
76
+
77
+#ifdef CONFIG_FTRACE_STARTUP_TEST
78
+void __init disable_tracing_selftest(const char *reason)
79
+{
80
+ if (!tracing_selftest_disabled) {
81
+ tracing_selftest_disabled = true;
82
+ pr_info("Ftrace startup test is disabled due to %s\n", reason);
83
+ }
84
+}
85
+#endif
7086
7187 /* Pipe tracepoints to printk */
7288 struct trace_iterator *tracepoint_print_iter;
....@@ -158,7 +174,10 @@
158174 static union trace_eval_map_item *trace_eval_maps;
159175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160176
161
-static int tracing_set_tracer(struct trace_array *tr, const char *buf);
177
+int tracing_set_tracer(struct trace_array *tr, const char *buf);
178
+static void ftrace_trace_userstack(struct trace_array *tr,
179
+ struct trace_buffer *buffer,
180
+ unsigned long flags, int pc);
162181
163182 #define MAX_TRACER_SIZE 100
164183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
....@@ -215,7 +234,7 @@
215234 static int __init set_trace_boot_options(char *str)
216235 {
217236 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218
- return 0;
237
+ return 1;
219238 }
220239 __setup("trace_options=", set_trace_boot_options);
221240
....@@ -226,7 +245,7 @@
226245 {
227246 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228247 trace_boot_clock = trace_boot_clock_buf;
229
- return 0;
248
+ return 1;
230249 }
231250 __setup("trace_clock=", set_trace_boot_clock);
232251
....@@ -248,6 +267,145 @@
248267 do_div(nsec, 1000);
249268 return nsec;
250269 }
270
+
271
+static void
272
+trace_process_export(struct trace_export *export,
273
+ struct ring_buffer_event *event, int flag)
274
+{
275
+ struct trace_entry *entry;
276
+ unsigned int size = 0;
277
+
278
+ if (export->flags & flag) {
279
+ entry = ring_buffer_event_data(event);
280
+ size = ring_buffer_event_length(event);
281
+ export->write(export, entry, size);
282
+ }
283
+}
284
+
285
+static DEFINE_MUTEX(ftrace_export_lock);
286
+
287
+static struct trace_export __rcu *ftrace_exports_list __read_mostly;
288
+
289
+static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
290
+static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
291
+static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
292
+
293
+static inline void ftrace_exports_enable(struct trace_export *export)
294
+{
295
+ if (export->flags & TRACE_EXPORT_FUNCTION)
296
+ static_branch_inc(&trace_function_exports_enabled);
297
+
298
+ if (export->flags & TRACE_EXPORT_EVENT)
299
+ static_branch_inc(&trace_event_exports_enabled);
300
+
301
+ if (export->flags & TRACE_EXPORT_MARKER)
302
+ static_branch_inc(&trace_marker_exports_enabled);
303
+}
304
+
305
+static inline void ftrace_exports_disable(struct trace_export *export)
306
+{
307
+ if (export->flags & TRACE_EXPORT_FUNCTION)
308
+ static_branch_dec(&trace_function_exports_enabled);
309
+
310
+ if (export->flags & TRACE_EXPORT_EVENT)
311
+ static_branch_dec(&trace_event_exports_enabled);
312
+
313
+ if (export->flags & TRACE_EXPORT_MARKER)
314
+ static_branch_dec(&trace_marker_exports_enabled);
315
+}
316
+
317
+static void ftrace_exports(struct ring_buffer_event *event, int flag)
318
+{
319
+ struct trace_export *export;
320
+
321
+ preempt_disable_notrace();
322
+
323
+ export = rcu_dereference_raw_check(ftrace_exports_list);
324
+ while (export) {
325
+ trace_process_export(export, event, flag);
326
+ export = rcu_dereference_raw_check(export->next);
327
+ }
328
+
329
+ preempt_enable_notrace();
330
+}
331
+
332
+static inline void
333
+add_trace_export(struct trace_export **list, struct trace_export *export)
334
+{
335
+ rcu_assign_pointer(export->next, *list);
336
+ /*
337
+ * We are entering export into the list but another
338
+ * CPU might be walking that list. We need to make sure
339
+ * the export->next pointer is valid before another CPU sees
340
+ * the export pointer included into the list.
341
+ */
342
+ rcu_assign_pointer(*list, export);
343
+}
344
+
345
+static inline int
346
+rm_trace_export(struct trace_export **list, struct trace_export *export)
347
+{
348
+ struct trace_export **p;
349
+
350
+ for (p = list; *p != NULL; p = &(*p)->next)
351
+ if (*p == export)
352
+ break;
353
+
354
+ if (*p != export)
355
+ return -1;
356
+
357
+ rcu_assign_pointer(*p, (*p)->next);
358
+
359
+ return 0;
360
+}
361
+
362
+static inline void
363
+add_ftrace_export(struct trace_export **list, struct trace_export *export)
364
+{
365
+ ftrace_exports_enable(export);
366
+
367
+ add_trace_export(list, export);
368
+}
369
+
370
+static inline int
371
+rm_ftrace_export(struct trace_export **list, struct trace_export *export)
372
+{
373
+ int ret;
374
+
375
+ ret = rm_trace_export(list, export);
376
+ ftrace_exports_disable(export);
377
+
378
+ return ret;
379
+}
380
+
381
+int register_ftrace_export(struct trace_export *export)
382
+{
383
+ if (WARN_ON_ONCE(!export->write))
384
+ return -1;
385
+
386
+ mutex_lock(&ftrace_export_lock);
387
+
388
+ add_ftrace_export(&ftrace_exports_list, export);
389
+
390
+ mutex_unlock(&ftrace_export_lock);
391
+
392
+ return 0;
393
+}
394
+EXPORT_SYMBOL_GPL(register_ftrace_export);
395
+
396
+int unregister_ftrace_export(struct trace_export *export)
397
+{
398
+ int ret;
399
+
400
+ mutex_lock(&ftrace_export_lock);
401
+
402
+ ret = rm_ftrace_export(&ftrace_exports_list, export);
403
+
404
+ mutex_unlock(&ftrace_export_lock);
405
+
406
+ return ret;
407
+}
408
+EXPORT_SYMBOL_GPL(unregister_ftrace_export);
251409
252410 /* trace_flags holds trace_options default values */
253411 #define TRACE_DEFAULT_FLAGS \
....@@ -299,15 +457,44 @@
299457 this_tr->ref--;
300458 }
301459
460
+/**
461
+ * trace_array_put - Decrement the reference counter for this trace array.
462
+ *
463
+ * NOTE: Use this when we no longer need the trace array returned by
464
+ * trace_array_get_by_name(). This ensures the trace array can be later
465
+ * destroyed.
466
+ *
467
+ */
302468 void trace_array_put(struct trace_array *this_tr)
303469 {
470
+ if (!this_tr)
471
+ return;
472
+
304473 mutex_lock(&trace_types_lock);
305474 __trace_array_put(this_tr);
306475 mutex_unlock(&trace_types_lock);
307476 }
477
+EXPORT_SYMBOL_GPL(trace_array_put);
478
+
479
+int tracing_check_open_get_tr(struct trace_array *tr)
480
+{
481
+ int ret;
482
+
483
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
484
+ if (ret)
485
+ return ret;
486
+
487
+ if (tracing_disabled)
488
+ return -ENODEV;
489
+
490
+ if (tr && trace_array_get(tr) < 0)
491
+ return -ENODEV;
492
+
493
+ return 0;
494
+}
308495
309496 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310
- struct ring_buffer *buffer,
497
+ struct trace_buffer *buffer,
311498 struct ring_buffer_event *event)
312499 {
313500 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
....@@ -355,20 +542,26 @@
355542 * Returns false if @task should be traced.
356543 */
357544 bool
358
-trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
545
+trace_ignore_this_task(struct trace_pid_list *filtered_pids,
546
+ struct trace_pid_list *filtered_no_pids,
547
+ struct task_struct *task)
359548 {
360549 /*
361
- * Return false, because if filtered_pids does not exist,
362
- * all pids are good to trace.
550
+ * If filterd_no_pids is not empty, and the task's pid is listed
551
+ * in filtered_no_pids, then return true.
552
+ * Otherwise, if filtered_pids is empty, that means we can
553
+ * trace all tasks. If it has content, then only trace pids
554
+ * within filtered_pids.
363555 */
364
- if (!filtered_pids)
365
- return false;
366556
367
- return !trace_find_filtered_pid(filtered_pids, task->pid);
557
+ return (filtered_pids &&
558
+ !trace_find_filtered_pid(filtered_pids, task->pid)) ||
559
+ (filtered_no_pids &&
560
+ trace_find_filtered_pid(filtered_no_pids, task->pid));
368561 }
369562
370563 /**
371
- * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
564
+ * trace_filter_add_remove_task - Add or remove a task from a pid_list
372565 * @pid_list: The list to modify
373566 * @self: The current task for fork or NULL for exit
374567 * @task: The task to add or remove
....@@ -572,7 +765,7 @@
572765 return read;
573766 }
574767
575
-static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
768
+static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
576769 {
577770 u64 ts;
578771
....@@ -588,7 +781,7 @@
588781
589782 u64 ftrace_now(int cpu)
590783 {
591
- return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
784
+ return buffer_ftrace_now(&global_trace.array_buffer, cpu);
592785 }
593786
594787 /**
....@@ -716,22 +909,22 @@
716909 #endif
717910
718911 #ifdef CONFIG_STACKTRACE
719
-static void __ftrace_trace_stack(struct ring_buffer *buffer,
912
+static void __ftrace_trace_stack(struct trace_buffer *buffer,
720913 unsigned long flags,
721914 int skip, int pc, struct pt_regs *regs);
722915 static inline void ftrace_trace_stack(struct trace_array *tr,
723
- struct ring_buffer *buffer,
916
+ struct trace_buffer *buffer,
724917 unsigned long flags,
725918 int skip, int pc, struct pt_regs *regs);
726919
727920 #else
728
-static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
921
+static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
729922 unsigned long flags,
730923 int skip, int pc, struct pt_regs *regs)
731924 {
732925 }
733926 static inline void ftrace_trace_stack(struct trace_array *tr,
734
- struct ring_buffer *buffer,
927
+ struct trace_buffer *buffer,
735928 unsigned long flags,
736929 int skip, int pc, struct pt_regs *regs)
737930 {
....@@ -745,12 +938,11 @@
745938 {
746939 struct trace_entry *ent = ring_buffer_event_data(event);
747940
748
- tracing_generic_entry_update(ent, flags, pc);
749
- ent->type = type;
941
+ tracing_generic_entry_update(ent, type, flags, pc);
750942 }
751943
752944 static __always_inline struct ring_buffer_event *
753
-__trace_buffer_lock_reserve(struct ring_buffer *buffer,
945
+__trace_buffer_lock_reserve(struct trace_buffer *buffer,
754946 int type,
755947 unsigned long len,
756948 unsigned long flags, int pc)
....@@ -766,8 +958,8 @@
766958
767959 void tracer_tracing_on(struct trace_array *tr)
768960 {
769
- if (tr->trace_buffer.buffer)
770
- ring_buffer_record_on(tr->trace_buffer.buffer);
961
+ if (tr->array_buffer.buffer)
962
+ ring_buffer_record_on(tr->array_buffer.buffer);
771963 /*
772964 * This flag is looked at when buffers haven't been allocated
773965 * yet, or by some tracers (like irqsoff), that just want to
....@@ -795,7 +987,7 @@
795987
796988
797989 static __always_inline void
798
-__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
990
+__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
799991 {
800992 __this_cpu_write(trace_taskinfo_save, true);
801993
....@@ -818,7 +1010,7 @@
8181010 int __trace_puts(unsigned long ip, const char *str, int size)
8191011 {
8201012 struct ring_buffer_event *event;
821
- struct ring_buffer *buffer;
1013
+ struct trace_buffer *buffer;
8221014 struct print_entry *entry;
8231015 unsigned long irq_flags;
8241016 int alloc;
....@@ -835,11 +1027,14 @@
8351027 alloc = sizeof(*entry) + size + 2; /* possible \n added */
8361028
8371029 local_save_flags(irq_flags);
838
- buffer = global_trace.trace_buffer.buffer;
1030
+ buffer = global_trace.array_buffer.buffer;
1031
+ ring_buffer_nest_start(buffer);
8391032 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
8401033 irq_flags, pc);
841
- if (!event)
842
- return 0;
1034
+ if (!event) {
1035
+ size = 0;
1036
+ goto out;
1037
+ }
8431038
8441039 entry = ring_buffer_event_data(event);
8451040 entry->ip = ip;
....@@ -855,7 +1050,8 @@
8551050
8561051 __buffer_unlock_commit(buffer, event);
8571052 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
858
-
1053
+ out:
1054
+ ring_buffer_nest_end(buffer);
8591055 return size;
8601056 }
8611057 EXPORT_SYMBOL_GPL(__trace_puts);
....@@ -868,10 +1064,11 @@
8681064 int __trace_bputs(unsigned long ip, const char *str)
8691065 {
8701066 struct ring_buffer_event *event;
871
- struct ring_buffer *buffer;
1067
+ struct trace_buffer *buffer;
8721068 struct bputs_entry *entry;
8731069 unsigned long irq_flags;
8741070 int size = sizeof(struct bputs_entry);
1071
+ int ret = 0;
8751072 int pc;
8761073
8771074 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
....@@ -883,11 +1080,13 @@
8831080 return 0;
8841081
8851082 local_save_flags(irq_flags);
886
- buffer = global_trace.trace_buffer.buffer;
1083
+ buffer = global_trace.array_buffer.buffer;
1084
+
1085
+ ring_buffer_nest_start(buffer);
8871086 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
8881087 irq_flags, pc);
8891088 if (!event)
890
- return 0;
1089
+ goto out;
8911090
8921091 entry = ring_buffer_event_data(event);
8931092 entry->ip = ip;
....@@ -896,12 +1095,16 @@
8961095 __buffer_unlock_commit(buffer, event);
8971096 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
8981097
899
- return 1;
1098
+ ret = 1;
1099
+ out:
1100
+ ring_buffer_nest_end(buffer);
1101
+ return ret;
9001102 }
9011103 EXPORT_SYMBOL_GPL(__trace_bputs);
9021104
9031105 #ifdef CONFIG_TRACER_SNAPSHOT
904
-void tracing_snapshot_instance(struct trace_array *tr)
1106
+static void tracing_snapshot_instance_cond(struct trace_array *tr,
1107
+ void *cond_data)
9051108 {
9061109 struct tracer *tracer = tr->current_trace;
9071110 unsigned long flags;
....@@ -927,8 +1130,13 @@
9271130 }
9281131
9291132 local_irq_save(flags);
930
- update_max_tr(tr, current, smp_processor_id());
1133
+ update_max_tr(tr, current, smp_processor_id(), cond_data);
9311134 local_irq_restore(flags);
1135
+}
1136
+
1137
+void tracing_snapshot_instance(struct trace_array *tr)
1138
+{
1139
+ tracing_snapshot_instance_cond(tr, NULL);
9321140 }
9331141
9341142 /**
....@@ -953,9 +1161,59 @@
9531161 }
9541162 EXPORT_SYMBOL_GPL(tracing_snapshot);
9551163
956
-static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
957
- struct trace_buffer *size_buf, int cpu_id);
958
-static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1164
+/**
1165
+ * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1166
+ * @tr: The tracing instance to snapshot
1167
+ * @cond_data: The data to be tested conditionally, and possibly saved
1168
+ *
1169
+ * This is the same as tracing_snapshot() except that the snapshot is
1170
+ * conditional - the snapshot will only happen if the
1171
+ * cond_snapshot.update() implementation receiving the cond_data
1172
+ * returns true, which means that the trace array's cond_snapshot
1173
+ * update() operation used the cond_data to determine whether the
1174
+ * snapshot should be taken, and if it was, presumably saved it along
1175
+ * with the snapshot.
1176
+ */
1177
+void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1178
+{
1179
+ tracing_snapshot_instance_cond(tr, cond_data);
1180
+}
1181
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1182
+
1183
+/**
1184
+ * tracing_snapshot_cond_data - get the user data associated with a snapshot
1185
+ * @tr: The tracing instance
1186
+ *
1187
+ * When the user enables a conditional snapshot using
1188
+ * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1189
+ * with the snapshot. This accessor is used to retrieve it.
1190
+ *
1191
+ * Should not be called from cond_snapshot.update(), since it takes
1192
+ * the tr->max_lock lock, which the code calling
1193
+ * cond_snapshot.update() has already done.
1194
+ *
1195
+ * Returns the cond_data associated with the trace array's snapshot.
1196
+ */
1197
+void *tracing_cond_snapshot_data(struct trace_array *tr)
1198
+{
1199
+ void *cond_data = NULL;
1200
+
1201
+ local_irq_disable();
1202
+ arch_spin_lock(&tr->max_lock);
1203
+
1204
+ if (tr->cond_snapshot)
1205
+ cond_data = tr->cond_snapshot->cond_data;
1206
+
1207
+ arch_spin_unlock(&tr->max_lock);
1208
+ local_irq_enable();
1209
+
1210
+ return cond_data;
1211
+}
1212
+EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1213
+
1214
+static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1215
+ struct array_buffer *size_buf, int cpu_id);
1216
+static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
9591217
9601218 int tracing_alloc_snapshot_instance(struct trace_array *tr)
9611219 {
....@@ -965,7 +1223,7 @@
9651223
9661224 /* allocate spare buffer */
9671225 ret = resize_buffer_duplicate_size(&tr->max_buffer,
968
- &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1226
+ &tr->array_buffer, RING_BUFFER_ALL_CPUS);
9691227 if (ret < 0)
9701228 return ret;
9711229
....@@ -1032,12 +1290,115 @@
10321290 tracing_snapshot();
10331291 }
10341292 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1293
+
1294
+/**
1295
+ * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1296
+ * @tr: The tracing instance
1297
+ * @cond_data: User data to associate with the snapshot
1298
+ * @update: Implementation of the cond_snapshot update function
1299
+ *
1300
+ * Check whether the conditional snapshot for the given instance has
1301
+ * already been enabled, or if the current tracer is already using a
1302
+ * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1303
+ * save the cond_data and update function inside.
1304
+ *
1305
+ * Returns 0 if successful, error otherwise.
1306
+ */
1307
+int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1308
+ cond_update_fn_t update)
1309
+{
1310
+ struct cond_snapshot *cond_snapshot;
1311
+ int ret = 0;
1312
+
1313
+ cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1314
+ if (!cond_snapshot)
1315
+ return -ENOMEM;
1316
+
1317
+ cond_snapshot->cond_data = cond_data;
1318
+ cond_snapshot->update = update;
1319
+
1320
+ mutex_lock(&trace_types_lock);
1321
+
1322
+ ret = tracing_alloc_snapshot_instance(tr);
1323
+ if (ret)
1324
+ goto fail_unlock;
1325
+
1326
+ if (tr->current_trace->use_max_tr) {
1327
+ ret = -EBUSY;
1328
+ goto fail_unlock;
1329
+ }
1330
+
1331
+ /*
1332
+ * The cond_snapshot can only change to NULL without the
1333
+ * trace_types_lock. We don't care if we race with it going
1334
+ * to NULL, but we want to make sure that it's not set to
1335
+ * something other than NULL when we get here, which we can
1336
+ * do safely with only holding the trace_types_lock and not
1337
+ * having to take the max_lock.
1338
+ */
1339
+ if (tr->cond_snapshot) {
1340
+ ret = -EBUSY;
1341
+ goto fail_unlock;
1342
+ }
1343
+
1344
+ local_irq_disable();
1345
+ arch_spin_lock(&tr->max_lock);
1346
+ tr->cond_snapshot = cond_snapshot;
1347
+ arch_spin_unlock(&tr->max_lock);
1348
+ local_irq_enable();
1349
+
1350
+ mutex_unlock(&trace_types_lock);
1351
+
1352
+ return ret;
1353
+
1354
+ fail_unlock:
1355
+ mutex_unlock(&trace_types_lock);
1356
+ kfree(cond_snapshot);
1357
+ return ret;
1358
+}
1359
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1360
+
1361
+/**
1362
+ * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1363
+ * @tr: The tracing instance
1364
+ *
1365
+ * Check whether the conditional snapshot for the given instance is
1366
+ * enabled; if so, free the cond_snapshot associated with it,
1367
+ * otherwise return -EINVAL.
1368
+ *
1369
+ * Returns 0 if successful, error otherwise.
1370
+ */
1371
+int tracing_snapshot_cond_disable(struct trace_array *tr)
1372
+{
1373
+ int ret = 0;
1374
+
1375
+ local_irq_disable();
1376
+ arch_spin_lock(&tr->max_lock);
1377
+
1378
+ if (!tr->cond_snapshot)
1379
+ ret = -EINVAL;
1380
+ else {
1381
+ kfree(tr->cond_snapshot);
1382
+ tr->cond_snapshot = NULL;
1383
+ }
1384
+
1385
+ arch_spin_unlock(&tr->max_lock);
1386
+ local_irq_enable();
1387
+
1388
+ return ret;
1389
+}
1390
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
10351391 #else
10361392 void tracing_snapshot(void)
10371393 {
10381394 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
10391395 }
10401396 EXPORT_SYMBOL_GPL(tracing_snapshot);
1397
+void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1398
+{
1399
+ WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1400
+}
1401
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
10411402 int tracing_alloc_snapshot(void)
10421403 {
10431404 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
....@@ -1050,12 +1411,27 @@
10501411 tracing_snapshot();
10511412 }
10521413 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1414
+void *tracing_cond_snapshot_data(struct trace_array *tr)
1415
+{
1416
+ return NULL;
1417
+}
1418
+EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1419
+int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1420
+{
1421
+ return -ENODEV;
1422
+}
1423
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1424
+int tracing_snapshot_cond_disable(struct trace_array *tr)
1425
+{
1426
+ return false;
1427
+}
1428
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
10531429 #endif /* CONFIG_TRACER_SNAPSHOT */
10541430
10551431 void tracer_tracing_off(struct trace_array *tr)
10561432 {
1057
- if (tr->trace_buffer.buffer)
1058
- ring_buffer_record_off(tr->trace_buffer.buffer);
1433
+ if (tr->array_buffer.buffer)
1434
+ ring_buffer_record_off(tr->array_buffer.buffer);
10591435 /*
10601436 * This flag is looked at when buffers haven't been allocated
10611437 * yet, or by some tracers (like irqsoff), that just want to
....@@ -1085,8 +1461,11 @@
10851461
10861462 void disable_trace_on_warning(void)
10871463 {
1088
- if (__disable_trace_on_warning)
1464
+ if (__disable_trace_on_warning) {
1465
+ trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1466
+ "Disabling tracing due to warning\n");
10891467 tracing_off();
1468
+ }
10901469 }
10911470
10921471 /**
....@@ -1097,8 +1476,8 @@
10971476 */
10981477 bool tracer_tracing_is_on(struct trace_array *tr)
10991478 {
1100
- if (tr->trace_buffer.buffer)
1101
- return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1479
+ if (tr->array_buffer.buffer)
1480
+ return ring_buffer_record_is_on(tr->array_buffer.buffer);
11021481 return !tr->buffer_disabled;
11031482 }
11041483
....@@ -1118,10 +1497,12 @@
11181497 if (!str)
11191498 return 0;
11201499 buf_size = memparse(str, &str);
1121
- /* nr_entries can not be zero */
1122
- if (buf_size == 0)
1123
- return 0;
1124
- trace_buf_size = buf_size;
1500
+ /*
1501
+ * nr_entries can not be zero and the startup
1502
+ * tests require some buffer space. Therefore
1503
+ * ensure we have at least 4096 bytes of buffer.
1504
+ */
1505
+ trace_buf_size = max(4096UL, buf_size);
11251506 return 1;
11261507 }
11271508 __setup("trace_buf_size=", set_buf_size);
....@@ -1315,6 +1696,73 @@
13151696 }
13161697
13171698 unsigned long __read_mostly tracing_thresh;
1699
+static const struct file_operations tracing_max_lat_fops;
1700
+
1701
+#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1702
+ defined(CONFIG_FSNOTIFY)
1703
+
1704
+static struct workqueue_struct *fsnotify_wq;
1705
+
1706
+static void latency_fsnotify_workfn(struct work_struct *work)
1707
+{
1708
+ struct trace_array *tr = container_of(work, struct trace_array,
1709
+ fsnotify_work);
1710
+ fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1711
+}
1712
+
1713
+static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1714
+{
1715
+ struct trace_array *tr = container_of(iwork, struct trace_array,
1716
+ fsnotify_irqwork);
1717
+ queue_work(fsnotify_wq, &tr->fsnotify_work);
1718
+}
1719
+
1720
+static void trace_create_maxlat_file(struct trace_array *tr,
1721
+ struct dentry *d_tracer)
1722
+{
1723
+ INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1724
+ init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1725
+ tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1726
+ d_tracer, &tr->max_latency,
1727
+ &tracing_max_lat_fops);
1728
+}
1729
+
1730
+__init static int latency_fsnotify_init(void)
1731
+{
1732
+ fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1733
+ WQ_UNBOUND | WQ_HIGHPRI, 0);
1734
+ if (!fsnotify_wq) {
1735
+ pr_err("Unable to allocate tr_max_lat_wq\n");
1736
+ return -ENOMEM;
1737
+ }
1738
+ return 0;
1739
+}
1740
+
1741
+late_initcall_sync(latency_fsnotify_init);
1742
+
1743
+void latency_fsnotify(struct trace_array *tr)
1744
+{
1745
+ if (!fsnotify_wq)
1746
+ return;
1747
+ /*
1748
+ * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1749
+ * possible that we are called from __schedule() or do_idle(), which
1750
+ * could cause a deadlock.
1751
+ */
1752
+ irq_work_queue(&tr->fsnotify_irqwork);
1753
+}
1754
+
1755
+/*
1756
+ * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1757
+ * defined(CONFIG_FSNOTIFY)
1758
+ */
1759
+#else
1760
+
1761
+#define trace_create_maxlat_file(tr, d_tracer) \
1762
+ trace_create_file("tracing_max_latency", 0644, d_tracer, \
1763
+ &tr->max_latency, &tracing_max_lat_fops)
1764
+
1765
+#endif
13181766
13191767 #ifdef CONFIG_TRACER_MAX_TRACE
13201768 /*
....@@ -1325,8 +1773,8 @@
13251773 static void
13261774 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
13271775 {
1328
- struct trace_buffer *trace_buf = &tr->trace_buffer;
1329
- struct trace_buffer *max_buf = &tr->max_buffer;
1776
+ struct array_buffer *trace_buf = &tr->array_buffer;
1777
+ struct array_buffer *max_buf = &tr->max_buffer;
13301778 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
13311779 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
13321780
....@@ -1337,7 +1785,7 @@
13371785 max_data->critical_start = data->critical_start;
13381786 max_data->critical_end = data->critical_end;
13391787
1340
- memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1788
+ strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
13411789 max_data->pid = tsk->pid;
13421790 /*
13431791 * If tsk == current, then use current_uid(), as that does not use
....@@ -1354,6 +1802,7 @@
13541802
13551803 /* record this tasks comm */
13561804 tracing_record_cmdline(tsk);
1805
+ latency_fsnotify(tr);
13571806 }
13581807
13591808 /**
....@@ -1361,12 +1810,14 @@
13611810 * @tr: tracer
13621811 * @tsk: the task with the latency
13631812 * @cpu: The cpu that initiated the trace.
1813
+ * @cond_data: User data associated with a conditional snapshot
13641814 *
13651815 * Flip the buffers between the @tr and the max_tr and record information
13661816 * about which task was the cause of this latency.
13671817 */
13681818 void
1369
-update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819
+update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1820
+ void *cond_data)
13701821 {
13711822 if (tr->stop_count)
13721823 return;
....@@ -1381,23 +1832,29 @@
13811832
13821833 arch_spin_lock(&tr->max_lock);
13831834
1384
- /* Inherit the recordable setting from trace_buffer */
1385
- if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1835
+ /* Inherit the recordable setting from array_buffer */
1836
+ if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
13861837 ring_buffer_record_on(tr->max_buffer.buffer);
13871838 else
13881839 ring_buffer_record_off(tr->max_buffer.buffer);
13891840
1390
- swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1841
+#ifdef CONFIG_TRACER_SNAPSHOT
1842
+ if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1843
+ goto out_unlock;
1844
+#endif
1845
+ swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
13911846
13921847 __update_max_tr(tr, tsk, cpu);
1848
+
1849
+ out_unlock:
13931850 arch_spin_unlock(&tr->max_lock);
13941851 }
13951852
13961853 /**
13971854 * update_max_tr_single - only copy one trace over, and reset the rest
1398
- * @tr - tracer
1399
- * @tsk - task with the latency
1400
- * @cpu - the cpu of the buffer to copy.
1855
+ * @tr: tracer
1856
+ * @tsk: task with the latency
1857
+ * @cpu: the cpu of the buffer to copy.
14011858 *
14021859 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
14031860 */
....@@ -1418,7 +1875,7 @@
14181875
14191876 arch_spin_lock(&tr->max_lock);
14201877
1421
- ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1878
+ ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
14221879
14231880 if (ret == -EBUSY) {
14241881 /*
....@@ -1426,9 +1883,10 @@
14261883 * place on this CPU. We fail to record, but we reset
14271884 * the max trace buffer (no one writes directly to it)
14281885 * and flag that it failed.
1886
+ * Another reason is resize is in progress.
14291887 */
14301888 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1431
- "Failed to swap buffers due to commit in progress\n");
1889
+ "Failed to swap buffers due to commit or resize in progress\n");
14321890 }
14331891
14341892 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
....@@ -1438,13 +1896,13 @@
14381896 }
14391897 #endif /* CONFIG_TRACER_MAX_TRACE */
14401898
1441
-static int wait_on_pipe(struct trace_iterator *iter, bool full)
1899
+static int wait_on_pipe(struct trace_iterator *iter, int full)
14421900 {
14431901 /* Iterators are static, they should be filled or empty */
14441902 if (trace_buffer_iter(iter, iter->cpu_file))
14451903 return 0;
14461904
1447
- return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1905
+ return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
14481906 full);
14491907 }
14501908
....@@ -1495,7 +1953,7 @@
14951953 * internal tracing to verify that everything is in order.
14961954 * If we fail, we do not register this tracer.
14971955 */
1498
- tracing_reset_online_cpus(&tr->trace_buffer);
1956
+ tracing_reset_online_cpus(&tr->array_buffer);
14991957
15001958 tr->current_trace = type;
15011959
....@@ -1521,7 +1979,7 @@
15211979 return -1;
15221980 }
15231981 /* Only reset on passing, to avoid touching corrupted buffers */
1524
- tracing_reset_online_cpus(&tr->trace_buffer);
1982
+ tracing_reset_online_cpus(&tr->array_buffer);
15251983
15261984 #ifdef CONFIG_TRACER_MAX_TRACE
15271985 if (type->use_max_tr) {
....@@ -1555,6 +2013,10 @@
15552013
15562014 tracing_selftest_running = true;
15572015 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016
+ /* This loop can take minutes when sanitizers are enabled, so
2017
+ * lets make sure we allow RCU processing.
2018
+ */
2019
+ cond_resched();
15582020 ret = run_tracer_selftest(p->type);
15592021 /* If the test fails, then warn and remove from available_tracers */
15602022 if (ret < 0) {
....@@ -1593,7 +2055,7 @@
15932055
15942056 /**
15952057 * register_tracer - register a tracer with the ftrace system.
1596
- * @type - the plugin for the tracer
2058
+ * @type: the plugin for the tracer
15972059 *
15982060 * Register a new plugin tracer.
15992061 */
....@@ -1610,6 +2072,12 @@
16102072 if (strlen(type->name) >= MAX_TRACER_SIZE) {
16112073 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
16122074 return -1;
2075
+ }
2076
+
2077
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078
+ pr_warn("Can not register tracer %s due to lockdown\n",
2079
+ type->name);
2080
+ return -EPERM;
16132081 }
16142082
16152083 mutex_lock(&trace_types_lock);
....@@ -1670,19 +2138,15 @@
16702138 apply_trace_boot_options();
16712139
16722140 /* disable other selftests, since this will break it. */
1673
- tracing_selftest_disabled = true;
1674
-#ifdef CONFIG_FTRACE_STARTUP_TEST
1675
- printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1676
- type->name);
1677
-#endif
2141
+ disable_tracing_selftest("running a tracer");
16782142
16792143 out_unlock:
16802144 return ret;
16812145 }
16822146
1683
-void tracing_reset(struct trace_buffer *buf, int cpu)
2147
+static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
16842148 {
1685
- struct ring_buffer *buffer = buf->buffer;
2149
+ struct trace_buffer *buffer = buf->buffer;
16862150
16872151 if (!buffer)
16882152 return;
....@@ -1690,16 +2154,15 @@
16902154 ring_buffer_record_disable(buffer);
16912155
16922156 /* Make sure all commits have finished */
1693
- synchronize_sched();
2157
+ synchronize_rcu();
16942158 ring_buffer_reset_cpu(buffer, cpu);
16952159
16962160 ring_buffer_record_enable(buffer);
16972161 }
16982162
1699
-void tracing_reset_online_cpus(struct trace_buffer *buf)
2163
+void tracing_reset_online_cpus(struct array_buffer *buf)
17002164 {
1701
- struct ring_buffer *buffer = buf->buffer;
1702
- int cpu;
2165
+ struct trace_buffer *buffer = buf->buffer;
17032166
17042167 if (!buffer)
17052168 return;
....@@ -1707,30 +2170,38 @@
17072170 ring_buffer_record_disable(buffer);
17082171
17092172 /* Make sure all commits have finished */
1710
- synchronize_sched();
2173
+ synchronize_rcu();
17112174
17122175 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
17132176
1714
- for_each_online_cpu(cpu)
1715
- ring_buffer_reset_cpu(buffer, cpu);
2177
+ ring_buffer_reset_online_cpus(buffer);
17162178
17172179 ring_buffer_record_enable(buffer);
17182180 }
17192181
17202182 /* Must have trace_types_lock held */
1721
-void tracing_reset_all_online_cpus(void)
2183
+void tracing_reset_all_online_cpus_unlocked(void)
17222184 {
17232185 struct trace_array *tr;
2186
+
2187
+ lockdep_assert_held(&trace_types_lock);
17242188
17252189 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
17262190 if (!tr->clear_trace)
17272191 continue;
17282192 tr->clear_trace = false;
1729
- tracing_reset_online_cpus(&tr->trace_buffer);
2193
+ tracing_reset_online_cpus(&tr->array_buffer);
17302194 #ifdef CONFIG_TRACER_MAX_TRACE
17312195 tracing_reset_online_cpus(&tr->max_buffer);
17322196 #endif
17332197 }
2198
+}
2199
+
2200
+void tracing_reset_all_online_cpus(void)
2201
+{
2202
+ mutex_lock(&trace_types_lock);
2203
+ tracing_reset_all_online_cpus_unlocked();
2204
+ mutex_unlock(&trace_types_lock);
17342205 }
17352206
17362207 /*
....@@ -1744,6 +2215,11 @@
17442215
17452216 #define SAVED_CMDLINES_DEFAULT 128
17462217 #define NO_CMDLINE_MAP UINT_MAX
2218
+/*
2219
+ * Preemption must be disabled before acquiring trace_cmdline_lock.
2220
+ * The various trace_arrays' max_lock must be acquired in a context
2221
+ * where interrupt is disabled.
2222
+ */
17472223 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
17482224 struct saved_cmdlines_buffer {
17492225 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
....@@ -1761,7 +2237,7 @@
17612237
17622238 static inline void set_cmdline(int idx, const char *cmdline)
17632239 {
1764
- memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2240
+ strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
17652241 }
17662242
17672243 static int allocate_cmdlines_buffer(unsigned int val,
....@@ -1820,7 +2296,7 @@
18202296 */
18212297 void tracing_start(void)
18222298 {
1823
- struct ring_buffer *buffer;
2299
+ struct trace_buffer *buffer;
18242300 unsigned long flags;
18252301
18262302 if (tracing_disabled)
....@@ -1839,7 +2315,7 @@
18392315 /* Prevent the buffers from switching */
18402316 arch_spin_lock(&global_trace.max_lock);
18412317
1842
- buffer = global_trace.trace_buffer.buffer;
2318
+ buffer = global_trace.array_buffer.buffer;
18432319 if (buffer)
18442320 ring_buffer_record_enable(buffer);
18452321
....@@ -1857,7 +2333,7 @@
18572333
18582334 static void tracing_start_tr(struct trace_array *tr)
18592335 {
1860
- struct ring_buffer *buffer;
2336
+ struct trace_buffer *buffer;
18612337 unsigned long flags;
18622338
18632339 if (tracing_disabled)
....@@ -1878,7 +2354,7 @@
18782354 goto out;
18792355 }
18802356
1881
- buffer = tr->trace_buffer.buffer;
2357
+ buffer = tr->array_buffer.buffer;
18822358 if (buffer)
18832359 ring_buffer_record_enable(buffer);
18842360
....@@ -1894,7 +2370,7 @@
18942370 */
18952371 void tracing_stop(void)
18962372 {
1897
- struct ring_buffer *buffer;
2373
+ struct trace_buffer *buffer;
18982374 unsigned long flags;
18992375
19002376 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
....@@ -1904,7 +2380,7 @@
19042380 /* Prevent the buffers from switching */
19052381 arch_spin_lock(&global_trace.max_lock);
19062382
1907
- buffer = global_trace.trace_buffer.buffer;
2383
+ buffer = global_trace.array_buffer.buffer;
19082384 if (buffer)
19092385 ring_buffer_record_disable(buffer);
19102386
....@@ -1922,7 +2398,7 @@
19222398
19232399 static void tracing_stop_tr(struct trace_array *tr)
19242400 {
1925
- struct ring_buffer *buffer;
2401
+ struct trace_buffer *buffer;
19262402 unsigned long flags;
19272403
19282404 /* If global, we need to also stop the max tracer */
....@@ -1933,7 +2409,7 @@
19332409 if (tr->stop_count++)
19342410 goto out;
19352411
1936
- buffer = tr->trace_buffer.buffer;
2412
+ buffer = tr->array_buffer.buffer;
19372413 if (buffer)
19382414 ring_buffer_record_disable(buffer);
19392415
....@@ -1956,7 +2432,11 @@
19562432 * the lock, but we also don't want to spin
19572433 * nor do we want to disable interrupts,
19582434 * so if we miss here, then better luck next time.
2435
+ *
2436
+ * This is called within the scheduler and wake up, so interrupts
2437
+ * had better been disabled and run queue lock been held.
19592438 */
2439
+ lockdep_assert_preemption_disabled();
19602440 if (!arch_spin_trylock(&trace_cmdline_lock))
19612441 return 0;
19622442
....@@ -2064,9 +2544,9 @@
20642544 /**
20652545 * tracing_record_taskinfo - record the task info of a task
20662546 *
2067
- * @task - task to record
2068
- * @flags - TRACE_RECORD_CMDLINE for recording comm
2069
- * - TRACE_RECORD_TGID for recording tgid
2547
+ * @task: task to record
2548
+ * @flags: TRACE_RECORD_CMDLINE for recording comm
2549
+ * TRACE_RECORD_TGID for recording tgid
20702550 */
20712551 void tracing_record_taskinfo(struct task_struct *task, int flags)
20722552 {
....@@ -2092,10 +2572,10 @@
20922572 /**
20932573 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
20942574 *
2095
- * @prev - previous task during sched_switch
2096
- * @next - next task during sched_switch
2097
- * @flags - TRACE_RECORD_CMDLINE for recording comm
2098
- * TRACE_RECORD_TGID for recording tgid
2575
+ * @prev: previous task during sched_switch
2576
+ * @next: next task during sched_switch
2577
+ * @flags: TRACE_RECORD_CMDLINE for recording comm
2578
+ * TRACE_RECORD_TGID for recording tgid
20992579 */
21002580 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
21012581 struct task_struct *next, int flags)
....@@ -2145,13 +2625,14 @@
21452625 EXPORT_SYMBOL_GPL(trace_handle_return);
21462626
21472627 void
2148
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2149
- int pc)
2628
+tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2629
+ unsigned long flags, int pc)
21502630 {
21512631 struct task_struct *tsk = current;
21522632
21532633 entry->preempt_count = pc & 0xff;
21542634 entry->pid = (tsk) ? tsk->pid : 0;
2635
+ entry->type = type;
21552636 entry->flags =
21562637 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
21572638 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
....@@ -2167,7 +2648,7 @@
21672648 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
21682649
21692650 struct ring_buffer_event *
2170
-trace_buffer_lock_reserve(struct ring_buffer *buffer,
2651
+trace_buffer_lock_reserve(struct trace_buffer *buffer,
21712652 int type,
21722653 unsigned long len,
21732654 unsigned long flags, int pc)
....@@ -2217,7 +2698,7 @@
22172698
22182699 preempt_disable();
22192700 if (cpu == smp_processor_id() &&
2220
- this_cpu_read(trace_buffered_event) !=
2701
+ __this_cpu_read(trace_buffered_event) !=
22212702 per_cpu(trace_buffered_event, cpu))
22222703 WARN_ON_ONCE(1);
22232704 preempt_enable();
....@@ -2267,7 +2748,7 @@
22672748 preempt_enable();
22682749
22692750 /* Wait for all current users to finish */
2270
- synchronize_sched();
2751
+ synchronize_rcu();
22712752
22722753 for_each_tracing_cpu(cpu) {
22732754 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
....@@ -2286,10 +2767,10 @@
22862767 preempt_enable();
22872768 }
22882769
2289
-static struct ring_buffer *temp_buffer;
2770
+static struct trace_buffer *temp_buffer;
22902771
22912772 struct ring_buffer_event *
2292
-trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2773
+trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
22932774 struct trace_event_file *trace_file,
22942775 int type, unsigned long len,
22952776 unsigned long flags, int pc)
....@@ -2297,7 +2778,7 @@
22972778 struct ring_buffer_event *entry;
22982779 int val;
22992780
2300
- *current_rb = trace_file->tr->trace_buffer.buffer;
2781
+ *current_rb = trace_file->tr->array_buffer.buffer;
23012782
23022783 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
23032784 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
....@@ -2317,7 +2798,7 @@
23172798 /*
23182799 * If tracing is off, but we have triggers enabled
23192800 * we still need to look at the event data. Use the temp_buffer
2320
- * to store the trace event for the tigger to use. It's recusive
2801
+ * to store the trace event for the trigger to use. It's recursive
23212802 * safe and will not be recorded anywhere.
23222803 */
23232804 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
....@@ -2329,12 +2810,13 @@
23292810 }
23302811 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
23312812
2332
-static DEFINE_SPINLOCK(tracepoint_iter_lock);
2813
+static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
23332814 static DEFINE_MUTEX(tracepoint_printk_mutex);
23342815
23352816 static void output_printk(struct trace_event_buffer *fbuffer)
23362817 {
23372818 struct trace_event_call *event_call;
2819
+ struct trace_event_file *file;
23382820 struct trace_event *event;
23392821 unsigned long flags;
23402822 struct trace_iterator *iter = tracepoint_print_iter;
....@@ -2348,20 +2830,26 @@
23482830 !event_call->event.funcs->trace)
23492831 return;
23502832
2833
+ file = fbuffer->trace_file;
2834
+ if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2835
+ (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2836
+ !filter_match_preds(file->filter, fbuffer->entry)))
2837
+ return;
2838
+
23512839 event = &fbuffer->trace_file->event_call->event;
23522840
2353
- spin_lock_irqsave(&tracepoint_iter_lock, flags);
2841
+ raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
23542842 trace_seq_init(&iter->seq);
23552843 iter->ent = fbuffer->entry;
23562844 event_call->event.funcs->trace(iter, 0, event);
23572845 trace_seq_putc(&iter->seq, 0);
23582846 printk("%s", iter->seq.buffer);
23592847
2360
- spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2848
+ raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
23612849 }
23622850
23632851 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2364
- void __user *buffer, size_t *lenp,
2852
+ void *buffer, size_t *lenp,
23652853 loff_t *ppos)
23662854 {
23672855 int save_tracepoint_printk;
....@@ -2398,9 +2886,11 @@
23982886 if (static_key_false(&tracepoint_printk_key.key))
23992887 output_printk(fbuffer);
24002888
2401
- event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2889
+ if (static_branch_unlikely(&trace_event_exports_enabled))
2890
+ ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2891
+ event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
24022892 fbuffer->event, fbuffer->entry,
2403
- fbuffer->flags, fbuffer->pc);
2893
+ fbuffer->flags, fbuffer->pc, fbuffer->regs);
24042894 }
24052895 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
24062896
....@@ -2414,7 +2904,7 @@
24142904 # define STACK_SKIP 3
24152905
24162906 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2417
- struct ring_buffer *buffer,
2907
+ struct trace_buffer *buffer,
24182908 struct ring_buffer_event *event,
24192909 unsigned long flags, int pc,
24202910 struct pt_regs *regs)
....@@ -2435,134 +2925,11 @@
24352925 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
24362926 */
24372927 void
2438
-trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2928
+trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
24392929 struct ring_buffer_event *event)
24402930 {
24412931 __buffer_unlock_commit(buffer, event);
24422932 }
2443
-
2444
-static void
2445
-trace_process_export(struct trace_export *export,
2446
- struct ring_buffer_event *event)
2447
-{
2448
- struct trace_entry *entry;
2449
- unsigned int size = 0;
2450
-
2451
- entry = ring_buffer_event_data(event);
2452
- size = ring_buffer_event_length(event);
2453
- export->write(export, entry, size);
2454
-}
2455
-
2456
-static DEFINE_MUTEX(ftrace_export_lock);
2457
-
2458
-static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2459
-
2460
-static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2461
-
2462
-static inline void ftrace_exports_enable(void)
2463
-{
2464
- static_branch_enable(&ftrace_exports_enabled);
2465
-}
2466
-
2467
-static inline void ftrace_exports_disable(void)
2468
-{
2469
- static_branch_disable(&ftrace_exports_enabled);
2470
-}
2471
-
2472
-void ftrace_exports(struct ring_buffer_event *event)
2473
-{
2474
- struct trace_export *export;
2475
-
2476
- preempt_disable_notrace();
2477
-
2478
- export = rcu_dereference_raw_notrace(ftrace_exports_list);
2479
- while (export) {
2480
- trace_process_export(export, event);
2481
- export = rcu_dereference_raw_notrace(export->next);
2482
- }
2483
-
2484
- preempt_enable_notrace();
2485
-}
2486
-
2487
-static inline void
2488
-add_trace_export(struct trace_export **list, struct trace_export *export)
2489
-{
2490
- rcu_assign_pointer(export->next, *list);
2491
- /*
2492
- * We are entering export into the list but another
2493
- * CPU might be walking that list. We need to make sure
2494
- * the export->next pointer is valid before another CPU sees
2495
- * the export pointer included into the list.
2496
- */
2497
- rcu_assign_pointer(*list, export);
2498
-}
2499
-
2500
-static inline int
2501
-rm_trace_export(struct trace_export **list, struct trace_export *export)
2502
-{
2503
- struct trace_export **p;
2504
-
2505
- for (p = list; *p != NULL; p = &(*p)->next)
2506
- if (*p == export)
2507
- break;
2508
-
2509
- if (*p != export)
2510
- return -1;
2511
-
2512
- rcu_assign_pointer(*p, (*p)->next);
2513
-
2514
- return 0;
2515
-}
2516
-
2517
-static inline void
2518
-add_ftrace_export(struct trace_export **list, struct trace_export *export)
2519
-{
2520
- if (*list == NULL)
2521
- ftrace_exports_enable();
2522
-
2523
- add_trace_export(list, export);
2524
-}
2525
-
2526
-static inline int
2527
-rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2528
-{
2529
- int ret;
2530
-
2531
- ret = rm_trace_export(list, export);
2532
- if (*list == NULL)
2533
- ftrace_exports_disable();
2534
-
2535
- return ret;
2536
-}
2537
-
2538
-int register_ftrace_export(struct trace_export *export)
2539
-{
2540
- if (WARN_ON_ONCE(!export->write))
2541
- return -1;
2542
-
2543
- mutex_lock(&ftrace_export_lock);
2544
-
2545
- add_ftrace_export(&ftrace_exports_list, export);
2546
-
2547
- mutex_unlock(&ftrace_export_lock);
2548
-
2549
- return 0;
2550
-}
2551
-EXPORT_SYMBOL_GPL(register_ftrace_export);
2552
-
2553
-int unregister_ftrace_export(struct trace_export *export)
2554
-{
2555
- int ret;
2556
-
2557
- mutex_lock(&ftrace_export_lock);
2558
-
2559
- ret = rm_ftrace_export(&ftrace_exports_list, export);
2560
-
2561
- mutex_unlock(&ftrace_export_lock);
2562
-
2563
- return ret;
2564
-}
2565
-EXPORT_SYMBOL_GPL(unregister_ftrace_export);
25662933
25672934 void
25682935 trace_function(struct trace_array *tr,
....@@ -2570,7 +2937,7 @@
25702937 int pc)
25712938 {
25722939 struct trace_event_call *call = &event_function;
2573
- struct ring_buffer *buffer = tr->trace_buffer.buffer;
2940
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
25742941 struct ring_buffer_event *event;
25752942 struct ftrace_entry *entry;
25762943
....@@ -2583,35 +2950,41 @@
25832950 entry->parent_ip = parent_ip;
25842951
25852952 if (!call_filter_check_discard(call, entry, buffer, event)) {
2586
- if (static_branch_unlikely(&ftrace_exports_enabled))
2587
- ftrace_exports(event);
2953
+ if (static_branch_unlikely(&trace_function_exports_enabled))
2954
+ ftrace_exports(event, TRACE_EXPORT_FUNCTION);
25882955 __buffer_unlock_commit(buffer, event);
25892956 }
25902957 }
25912958
25922959 #ifdef CONFIG_STACKTRACE
25932960
2594
-#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2961
+/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2962
+#define FTRACE_KSTACK_NESTING 4
2963
+
2964
+#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2965
+
25952966 struct ftrace_stack {
2596
- unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2967
+ unsigned long calls[FTRACE_KSTACK_ENTRIES];
25972968 };
25982969
2599
-static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2970
+
2971
+struct ftrace_stacks {
2972
+ struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2973
+};
2974
+
2975
+static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
26002976 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
26012977
2602
-static void __ftrace_trace_stack(struct ring_buffer *buffer,
2978
+static void __ftrace_trace_stack(struct trace_buffer *buffer,
26032979 unsigned long flags,
26042980 int skip, int pc, struct pt_regs *regs)
26052981 {
26062982 struct trace_event_call *call = &event_kernel_stack;
26072983 struct ring_buffer_event *event;
2984
+ unsigned int size, nr_entries;
2985
+ struct ftrace_stack *fstack;
26082986 struct stack_entry *entry;
2609
- struct stack_trace trace;
2610
- int use_stack;
2611
- int size = FTRACE_STACK_ENTRIES;
2612
-
2613
- trace.nr_entries = 0;
2614
- trace.skip = skip;
2987
+ int stackidx;
26152988
26162989 /*
26172990 * Add one, for this function and the call to save_stack_trace()
....@@ -2619,43 +2992,37 @@
26192992 */
26202993 #ifndef CONFIG_UNWINDER_ORC
26212994 if (!regs)
2622
- trace.skip++;
2995
+ skip++;
26232996 #endif
26242997
2625
- /*
2626
- * Since events can happen in NMIs there's no safe way to
2627
- * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2628
- * or NMI comes in, it will just have to use the default
2629
- * FTRACE_STACK_SIZE.
2630
- */
26312998 preempt_disable_notrace();
26322999
2633
- use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
3000
+ stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3001
+
3002
+ /* This should never happen. If it does, yell once and skip */
3003
+ if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3004
+ goto out;
3005
+
26343006 /*
2635
- * We don't need any atomic variables, just a barrier.
2636
- * If an interrupt comes in, we don't care, because it would
2637
- * have exited and put the counter back to what we want.
2638
- * We just need a barrier to keep gcc from moving things
2639
- * around.
3007
+ * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3008
+ * interrupt will either see the value pre increment or post
3009
+ * increment. If the interrupt happens pre increment it will have
3010
+ * restored the counter when it returns. We just need a barrier to
3011
+ * keep gcc from moving things around.
26403012 */
26413013 barrier();
2642
- if (use_stack == 1) {
2643
- trace.entries = this_cpu_ptr(ftrace_stack.calls);
2644
- trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
26453014
2646
- if (regs)
2647
- save_stack_trace_regs(regs, &trace);
2648
- else
2649
- save_stack_trace(&trace);
3015
+ fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3016
+ size = ARRAY_SIZE(fstack->calls);
26503017
2651
- if (trace.nr_entries > size)
2652
- size = trace.nr_entries;
2653
- } else
2654
- /* From now on, use_stack is a boolean */
2655
- use_stack = 0;
3018
+ if (regs) {
3019
+ nr_entries = stack_trace_save_regs(regs, fstack->calls,
3020
+ size, skip);
3021
+ } else {
3022
+ nr_entries = stack_trace_save(fstack->calls, size, skip);
3023
+ }
26563024
2657
- size *= sizeof(unsigned long);
2658
-
3025
+ size = nr_entries * sizeof(unsigned long);
26593026 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
26603027 (sizeof(*entry) - sizeof(entry->caller)) + size,
26613028 flags, pc);
....@@ -2663,21 +3030,8 @@
26633030 goto out;
26643031 entry = ring_buffer_event_data(event);
26653032
2666
- memset(&entry->caller, 0, size);
2667
-
2668
- if (use_stack)
2669
- memcpy(&entry->caller, trace.entries,
2670
- trace.nr_entries * sizeof(unsigned long));
2671
- else {
2672
- trace.max_entries = FTRACE_STACK_ENTRIES;
2673
- trace.entries = entry->caller;
2674
- if (regs)
2675
- save_stack_trace_regs(regs, &trace);
2676
- else
2677
- save_stack_trace(&trace);
2678
- }
2679
-
2680
- entry->size = trace.nr_entries;
3033
+ memcpy(&entry->caller, fstack->calls, size);
3034
+ entry->size = nr_entries;
26813035
26823036 if (!call_filter_check_discard(call, entry, buffer, event))
26833037 __buffer_unlock_commit(buffer, event);
....@@ -2691,7 +3045,7 @@
26913045 }
26923046
26933047 static inline void ftrace_trace_stack(struct trace_array *tr,
2694
- struct ring_buffer *buffer,
3048
+ struct trace_buffer *buffer,
26953049 unsigned long flags,
26963050 int skip, int pc, struct pt_regs *regs)
26973051 {
....@@ -2704,7 +3058,7 @@
27043058 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
27053059 int pc)
27063060 {
2707
- struct ring_buffer *buffer = tr->trace_buffer.buffer;
3061
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
27083062
27093063 if (rcu_is_watching()) {
27103064 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
....@@ -2742,20 +3096,21 @@
27423096 /* Skip 1 to skip this function. */
27433097 skip++;
27443098 #endif
2745
- __ftrace_trace_stack(global_trace.trace_buffer.buffer,
3099
+ __ftrace_trace_stack(global_trace.array_buffer.buffer,
27463100 flags, skip, preempt_count(), NULL);
27473101 }
3102
+EXPORT_SYMBOL_GPL(trace_dump_stack);
27483103
3104
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
27493105 static DEFINE_PER_CPU(int, user_stack_count);
27503106
2751
-void
3107
+static void
27523108 ftrace_trace_userstack(struct trace_array *tr,
2753
- struct ring_buffer *buffer, unsigned long flags, int pc)
3109
+ struct trace_buffer *buffer, unsigned long flags, int pc)
27543110 {
27553111 struct trace_event_call *call = &event_user_stack;
27563112 struct ring_buffer_event *event;
27573113 struct userstack_entry *entry;
2758
- struct stack_trace trace;
27593114
27603115 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
27613116 return;
....@@ -2786,12 +3141,7 @@
27863141 entry->tgid = current->tgid;
27873142 memset(&entry->caller, 0, sizeof(entry->caller));
27883143
2789
- trace.nr_entries = 0;
2790
- trace.max_entries = FTRACE_STACK_ENTRIES;
2791
- trace.skip = 0;
2792
- trace.entries = entry->caller;
2793
-
2794
- save_stack_trace_user(&trace);
3144
+ stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
27953145 if (!call_filter_check_discard(call, entry, buffer, event))
27963146 __buffer_unlock_commit(buffer, event);
27973147
....@@ -2800,13 +3150,13 @@
28003150 out:
28013151 preempt_enable();
28023152 }
2803
-
2804
-#ifdef UNUSED
2805
-static void __trace_userstack(struct trace_array *tr, unsigned long flags)
3153
+#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3154
+static void ftrace_trace_userstack(struct trace_array *tr,
3155
+ struct trace_buffer *buffer,
3156
+ unsigned long flags, int pc)
28063157 {
2807
- ftrace_trace_userstack(tr, flags, preempt_count());
28083158 }
2809
-#endif /* UNUSED */
3159
+#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
28103160
28113161 #endif /* CONFIG_STACKTRACE */
28123162
....@@ -2847,8 +3197,11 @@
28473197 {
28483198 struct trace_buffer_struct __percpu *buffers;
28493199
3200
+ if (trace_percpu_buffer)
3201
+ return 0;
3202
+
28503203 buffers = alloc_percpu(struct trace_buffer_struct);
2851
- if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3204
+ if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
28523205 return -ENOMEM;
28533206
28543207 trace_percpu_buffer = buffers;
....@@ -2893,9 +3246,10 @@
28933246 * directly here. If the global_trace.buffer is already
28943247 * allocated here, then this was called by module code.
28953248 */
2896
- if (global_trace.trace_buffer.buffer)
3249
+ if (global_trace.array_buffer.buffer)
28973250 tracing_start_cmdline_record();
28983251 }
3252
+EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
28993253
29003254 void trace_printk_start_comm(void)
29013255 {
....@@ -2918,13 +3272,15 @@
29183272
29193273 /**
29203274 * trace_vbprintk - write binary msg to tracing buffer
2921
- *
3275
+ * @ip: The address of the caller
3276
+ * @fmt: The string format to write to the buffer
3277
+ * @args: Arguments for @fmt
29223278 */
29233279 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
29243280 {
29253281 struct trace_event_call *call = &event_bprint;
29263282 struct ring_buffer_event *event;
2927
- struct ring_buffer *buffer;
3283
+ struct trace_buffer *buffer;
29283284 struct trace_array *tr = &global_trace;
29293285 struct bprint_entry *entry;
29303286 unsigned long flags;
....@@ -2949,11 +3305,12 @@
29493305 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
29503306
29513307 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2952
- goto out;
3308
+ goto out_put;
29533309
29543310 local_save_flags(flags);
29553311 size = sizeof(*entry) + sizeof(u32) * len;
2956
- buffer = tr->trace_buffer.buffer;
3312
+ buffer = tr->array_buffer.buffer;
3313
+ ring_buffer_nest_start(buffer);
29573314 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
29583315 flags, pc);
29593316 if (!event)
....@@ -2969,6 +3326,8 @@
29693326 }
29703327
29713328 out:
3329
+ ring_buffer_nest_end(buffer);
3330
+out_put:
29723331 put_trace_buf();
29733332
29743333 out_nobuffer:
....@@ -2981,7 +3340,7 @@
29813340
29823341 __printf(3, 0)
29833342 static int
2984
-__trace_array_vprintk(struct ring_buffer *buffer,
3343
+__trace_array_vprintk(struct trace_buffer *buffer,
29853344 unsigned long ip, const char *fmt, va_list args)
29863345 {
29873346 struct trace_event_call *call = &event_print;
....@@ -3011,6 +3370,7 @@
30113370
30123371 local_save_flags(flags);
30133372 size = sizeof(*entry) + len + 1;
3373
+ ring_buffer_nest_start(buffer);
30143374 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
30153375 flags, pc);
30163376 if (!event)
....@@ -3025,6 +3385,7 @@
30253385 }
30263386
30273387 out:
3388
+ ring_buffer_nest_end(buffer);
30283389 put_trace_buf();
30293390
30303391 out_nobuffer:
....@@ -3038,9 +3399,29 @@
30383399 int trace_array_vprintk(struct trace_array *tr,
30393400 unsigned long ip, const char *fmt, va_list args)
30403401 {
3041
- return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3402
+ return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
30423403 }
30433404
3405
+/**
3406
+ * trace_array_printk - Print a message to a specific instance
3407
+ * @tr: The instance trace_array descriptor
3408
+ * @ip: The instruction pointer that this is called from.
3409
+ * @fmt: The format to print (printf format)
3410
+ *
3411
+ * If a subsystem sets up its own instance, they have the right to
3412
+ * printk strings into their tracing instance buffer using this
3413
+ * function. Note, this function will not write into the top level
3414
+ * buffer (use trace_printk() for that), as writing into the top level
3415
+ * buffer should only have events that can be individually disabled.
3416
+ * trace_printk() is only used for debugging a kernel, and should not
3417
+ * be ever encorporated in normal use.
3418
+ *
3419
+ * trace_array_printk() can be used, as it will not add noise to the
3420
+ * top level tracing buffer.
3421
+ *
3422
+ * Note, trace_array_init_printk() must be called on @tr before this
3423
+ * can be used.
3424
+ */
30443425 __printf(3, 0)
30453426 int trace_array_printk(struct trace_array *tr,
30463427 unsigned long ip, const char *fmt, ...)
....@@ -3048,20 +3429,46 @@
30483429 int ret;
30493430 va_list ap;
30503431
3051
- if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3052
- return 0;
3053
-
30543432 if (!tr)
30553433 return -ENOENT;
3434
+
3435
+ /* This is only allowed for created instances */
3436
+ if (tr == &global_trace)
3437
+ return 0;
3438
+
3439
+ if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3440
+ return 0;
30563441
30573442 va_start(ap, fmt);
30583443 ret = trace_array_vprintk(tr, ip, fmt, ap);
30593444 va_end(ap);
30603445 return ret;
30613446 }
3447
+EXPORT_SYMBOL_GPL(trace_array_printk);
3448
+
3449
+/**
3450
+ * trace_array_init_printk - Initialize buffers for trace_array_printk()
3451
+ * @tr: The trace array to initialize the buffers for
3452
+ *
3453
+ * As trace_array_printk() only writes into instances, they are OK to
3454
+ * have in the kernel (unlike trace_printk()). This needs to be called
3455
+ * before trace_array_printk() can be used on a trace_array.
3456
+ */
3457
+int trace_array_init_printk(struct trace_array *tr)
3458
+{
3459
+ if (!tr)
3460
+ return -ENOENT;
3461
+
3462
+ /* This is only allowed for created instances */
3463
+ if (tr == &global_trace)
3464
+ return -EINVAL;
3465
+
3466
+ return alloc_percpu_trace_buffer();
3467
+}
3468
+EXPORT_SYMBOL_GPL(trace_array_init_printk);
30623469
30633470 __printf(3, 4)
3064
-int trace_array_printk_buf(struct ring_buffer *buffer,
3471
+int trace_array_printk_buf(struct trace_buffer *buffer,
30653472 unsigned long ip, const char *fmt, ...)
30663473 {
30673474 int ret;
....@@ -3089,7 +3496,7 @@
30893496
30903497 iter->idx++;
30913498 if (buf_iter)
3092
- ring_buffer_read(buf_iter, NULL);
3499
+ ring_buffer_iter_advance(buf_iter);
30933500 }
30943501
30953502 static struct trace_entry *
....@@ -3099,11 +3506,15 @@
30993506 struct ring_buffer_event *event;
31003507 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
31013508
3102
- if (buf_iter)
3509
+ if (buf_iter) {
31033510 event = ring_buffer_iter_peek(buf_iter, ts);
3104
- else
3105
- event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3511
+ if (lost_events)
3512
+ *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3513
+ (unsigned long)-1 : 0;
3514
+ } else {
3515
+ event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
31063516 lost_events);
3517
+ }
31073518
31083519 if (event) {
31093520 iter->ent_size = ring_buffer_event_length(event);
....@@ -3117,7 +3528,7 @@
31173528 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
31183529 unsigned long *missing_events, u64 *ent_ts)
31193530 {
3120
- struct ring_buffer *buffer = iter->trace_buffer->buffer;
3531
+ struct trace_buffer *buffer = iter->array_buffer->buffer;
31213532 struct trace_entry *ent, *next = NULL;
31223533 unsigned long lost_events = 0, next_lost = 0;
31233534 int cpu_file = iter->cpu_file;
....@@ -3173,11 +3584,53 @@
31733584 return next;
31743585 }
31753586
3587
+#define STATIC_TEMP_BUF_SIZE 128
3588
+static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3589
+
31763590 /* Find the next real entry, without updating the iterator itself */
31773591 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
31783592 int *ent_cpu, u64 *ent_ts)
31793593 {
3180
- return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3594
+ /* __find_next_entry will reset ent_size */
3595
+ int ent_size = iter->ent_size;
3596
+ struct trace_entry *entry;
3597
+
3598
+ /*
3599
+ * If called from ftrace_dump(), then the iter->temp buffer
3600
+ * will be the static_temp_buf and not created from kmalloc.
3601
+ * If the entry size is greater than the buffer, we can
3602
+ * not save it. Just return NULL in that case. This is only
3603
+ * used to add markers when two consecutive events' time
3604
+ * stamps have a large delta. See trace_print_lat_context()
3605
+ */
3606
+ if (iter->temp == static_temp_buf &&
3607
+ STATIC_TEMP_BUF_SIZE < ent_size)
3608
+ return NULL;
3609
+
3610
+ /*
3611
+ * The __find_next_entry() may call peek_next_entry(), which may
3612
+ * call ring_buffer_peek() that may make the contents of iter->ent
3613
+ * undefined. Need to copy iter->ent now.
3614
+ */
3615
+ if (iter->ent && iter->ent != iter->temp) {
3616
+ if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3617
+ !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3618
+ void *temp;
3619
+ temp = kmalloc(iter->ent_size, GFP_KERNEL);
3620
+ if (!temp)
3621
+ return NULL;
3622
+ kfree(iter->temp);
3623
+ iter->temp = temp;
3624
+ iter->temp_size = iter->ent_size;
3625
+ }
3626
+ memcpy(iter->temp, iter->ent, iter->ent_size);
3627
+ iter->ent = iter->temp;
3628
+ }
3629
+ entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3630
+ /* Put back the original ent_size */
3631
+ iter->ent_size = ent_size;
3632
+
3633
+ return entry;
31813634 }
31823635
31833636 /* Find the next real entry, and increment the iterator to the next entry */
....@@ -3194,7 +3647,7 @@
31943647
31953648 static void trace_consume(struct trace_iterator *iter)
31963649 {
3197
- ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3650
+ ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
31983651 &iter->lost_events);
31993652 }
32003653
....@@ -3227,12 +3680,11 @@
32273680
32283681 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
32293682 {
3230
- struct ring_buffer_event *event;
32313683 struct ring_buffer_iter *buf_iter;
32323684 unsigned long entries = 0;
32333685 u64 ts;
32343686
3235
- per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3687
+ per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
32363688
32373689 buf_iter = trace_buffer_iter(iter, cpu);
32383690 if (!buf_iter)
....@@ -3245,14 +3697,14 @@
32453697 * that a reset never took place on a cpu. This is evident
32463698 * by the timestamp being before the start of the buffer.
32473699 */
3248
- while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3249
- if (ts >= iter->trace_buffer->time_start)
3700
+ while (ring_buffer_iter_peek(buf_iter, &ts)) {
3701
+ if (ts >= iter->array_buffer->time_start)
32503702 break;
32513703 entries++;
3252
- ring_buffer_read(buf_iter, NULL);
3704
+ ring_buffer_iter_advance(buf_iter);
32533705 }
32543706
3255
- per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3707
+ per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
32563708 }
32573709
32583710 /*
....@@ -3275,8 +3727,15 @@
32753727 * will point to the same string as current_trace->name.
32763728 */
32773729 mutex_lock(&trace_types_lock);
3278
- if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3730
+ if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3731
+ /* Close iter->trace before switching to the new current tracer */
3732
+ if (iter->trace->close)
3733
+ iter->trace->close(iter);
32793734 *iter->trace = *tr->current_trace;
3735
+ /* Reopen the new current tracer */
3736
+ if (iter->trace->open)
3737
+ iter->trace->open(iter);
3738
+ }
32803739 mutex_unlock(&trace_types_lock);
32813740
32823741 #ifdef CONFIG_TRACER_MAX_TRACE
....@@ -3331,46 +3790,81 @@
33313790 }
33323791
33333792 static void
3334
-get_total_entries(struct trace_buffer *buf,
3335
- unsigned long *total, unsigned long *entries)
3793
+get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3794
+ unsigned long *entries, int cpu)
33363795 {
33373796 unsigned long count;
3797
+
3798
+ count = ring_buffer_entries_cpu(buf->buffer, cpu);
3799
+ /*
3800
+ * If this buffer has skipped entries, then we hold all
3801
+ * entries for the trace and we need to ignore the
3802
+ * ones before the time stamp.
3803
+ */
3804
+ if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3805
+ count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3806
+ /* total is the same as the entries */
3807
+ *total = count;
3808
+ } else
3809
+ *total = count +
3810
+ ring_buffer_overrun_cpu(buf->buffer, cpu);
3811
+ *entries = count;
3812
+}
3813
+
3814
+static void
3815
+get_total_entries(struct array_buffer *buf,
3816
+ unsigned long *total, unsigned long *entries)
3817
+{
3818
+ unsigned long t, e;
33383819 int cpu;
33393820
33403821 *total = 0;
33413822 *entries = 0;
33423823
33433824 for_each_tracing_cpu(cpu) {
3344
- count = ring_buffer_entries_cpu(buf->buffer, cpu);
3345
- /*
3346
- * If this buffer has skipped entries, then we hold all
3347
- * entries for the trace and we need to ignore the
3348
- * ones before the time stamp.
3349
- */
3350
- if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3351
- count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3352
- /* total is the same as the entries */
3353
- *total += count;
3354
- } else
3355
- *total += count +
3356
- ring_buffer_overrun_cpu(buf->buffer, cpu);
3357
- *entries += count;
3825
+ get_total_entries_cpu(buf, &t, &e, cpu);
3826
+ *total += t;
3827
+ *entries += e;
33583828 }
3829
+}
3830
+
3831
+unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3832
+{
3833
+ unsigned long total, entries;
3834
+
3835
+ if (!tr)
3836
+ tr = &global_trace;
3837
+
3838
+ get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3839
+
3840
+ return entries;
3841
+}
3842
+
3843
+unsigned long trace_total_entries(struct trace_array *tr)
3844
+{
3845
+ unsigned long total, entries;
3846
+
3847
+ if (!tr)
3848
+ tr = &global_trace;
3849
+
3850
+ get_total_entries(&tr->array_buffer, &total, &entries);
3851
+
3852
+ return entries;
33593853 }
33603854
33613855 static void print_lat_help_header(struct seq_file *m)
33623856 {
3363
- seq_puts(m, "# _------=> CPU# \n"
3364
- "# / _-----=> irqs-off \n"
3365
- "# | / _----=> need-resched \n"
3366
- "# || / _---=> hardirq/softirq \n"
3367
- "# ||| / _--=> preempt-depth \n"
3368
- "# |||| / delay \n"
3369
- "# cmd pid ||||| time | caller \n"
3370
- "# \\ / ||||| \\ | / \n");
3857
+ seq_puts(m, "# _------=> CPU# \n"
3858
+ "# / _-----=> irqs-off \n"
3859
+ "# | / _----=> need-resched \n"
3860
+ "# || / _---=> hardirq/softirq \n"
3861
+ "# ||| / _--=> preempt-depth \n"
3862
+ "# |||| / delay \n"
3863
+ "# cmd pid ||||| time | caller \n"
3864
+ "# \\ / ||||| \\ | / \n");
33713865 }
33723866
3373
-static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3867
+static void print_event_info(struct array_buffer *buf, struct seq_file *m)
33743868 {
33753869 unsigned long total;
33763870 unsigned long entries;
....@@ -3381,47 +3875,40 @@
33813875 seq_puts(m, "#\n");
33823876 }
33833877
3384
-static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3878
+static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
33853879 unsigned int flags)
33863880 {
33873881 bool tgid = flags & TRACE_ITER_RECORD_TGID;
33883882
33893883 print_event_info(buf, m);
33903884
3391
- seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
3392
- seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3885
+ seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3886
+ seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
33933887 }
33943888
3395
-static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3889
+static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
33963890 unsigned int flags)
33973891 {
33983892 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3399
- const char tgid_space[] = " ";
3400
- const char space[] = " ";
3893
+ const char *space = " ";
3894
+ int prec = tgid ? 12 : 2;
34013895
34023896 print_event_info(buf, m);
34033897
3404
- seq_printf(m, "# %s _-----=> irqs-off\n",
3405
- tgid ? tgid_space : space);
3406
- seq_printf(m, "# %s / _----=> need-resched\n",
3407
- tgid ? tgid_space : space);
3408
- seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
3409
- tgid ? tgid_space : space);
3410
- seq_printf(m, "# %s|| / _--=> preempt-depth\n",
3411
- tgid ? tgid_space : space);
3412
- seq_printf(m, "# %s||| / delay\n",
3413
- tgid ? tgid_space : space);
3414
- seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n",
3415
- tgid ? " TGID " : space);
3416
- seq_printf(m, "# | | %s | |||| | |\n",
3417
- tgid ? " | " : space);
3898
+ seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3899
+ seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3900
+ seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3901
+ seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3902
+ seq_printf(m, "# %.*s||| / delay\n", prec, space);
3903
+ seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3904
+ seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
34183905 }
34193906
34203907 void
34213908 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
34223909 {
34233910 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3424
- struct trace_buffer *buf = iter->trace_buffer;
3911
+ struct array_buffer *buf = iter->array_buffer;
34253912 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
34263913 struct tracer *type = iter->trace;
34273914 unsigned long entries;
....@@ -3448,6 +3935,8 @@
34483935 "desktop",
34493936 #elif defined(CONFIG_PREEMPT)
34503937 "preempt",
3938
+#elif defined(CONFIG_PREEMPT_RT)
3939
+ "preempt_rt",
34513940 #else
34523941 "unknown",
34533942 #endif
....@@ -3494,7 +3983,7 @@
34943983 cpumask_test_cpu(iter->cpu, iter->started))
34953984 return;
34963985
3497
- if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3986
+ if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
34983987 return;
34993988
35003989 if (cpumask_available(iter->started))
....@@ -3628,7 +4117,7 @@
36284117 if (!ring_buffer_iter_empty(buf_iter))
36294118 return 0;
36304119 } else {
3631
- if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
4120
+ if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
36324121 return 0;
36334122 }
36344123 return 1;
....@@ -3640,7 +4129,7 @@
36404129 if (!ring_buffer_iter_empty(buf_iter))
36414130 return 0;
36424131 } else {
3643
- if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
4132
+ if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
36444133 return 0;
36454134 }
36464135 }
....@@ -3656,8 +4145,12 @@
36564145 enum print_line_t ret;
36574146
36584147 if (iter->lost_events) {
3659
- trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3660
- iter->cpu, iter->lost_events);
4148
+ if (iter->lost_events == (unsigned long)-1)
4149
+ trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4150
+ iter->cpu);
4151
+ else
4152
+ trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4153
+ iter->cpu, iter->lost_events);
36614154 if (trace_seq_has_overflowed(&iter->seq))
36624155 return TRACE_TYPE_PARTIAL_LINE;
36634156 }
....@@ -3730,10 +4223,10 @@
37304223 } else {
37314224 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
37324225 if (trace_flags & TRACE_ITER_IRQ_INFO)
3733
- print_func_help_header_irq(iter->trace_buffer,
4226
+ print_func_help_header_irq(iter->array_buffer,
37344227 m, trace_flags);
37354228 else
3736
- print_func_help_header(iter->trace_buffer, m,
4229
+ print_func_help_header(iter->array_buffer, m,
37374230 trace_flags);
37384231 }
37394232 }
....@@ -3873,6 +4366,18 @@
38734366 goto release;
38744367
38754368 /*
4369
+ * trace_find_next_entry() may need to save off iter->ent.
4370
+ * It will place it into the iter->temp buffer. As most
4371
+ * events are less than 128, allocate a buffer of that size.
4372
+ * If one is greater, then trace_find_next_entry() will
4373
+ * allocate a new buffer to adjust for the bigger iter->ent.
4374
+ * It's not critical if it fails to get allocated here.
4375
+ */
4376
+ iter->temp = kmalloc(128, GFP_KERNEL);
4377
+ if (iter->temp)
4378
+ iter->temp_size = 128;
4379
+
4380
+ /*
38764381 * We make a copy of the current tracer to avoid concurrent
38774382 * changes on it while we are reading.
38784383 */
....@@ -3891,35 +4396,38 @@
38914396 #ifdef CONFIG_TRACER_MAX_TRACE
38924397 /* Currently only the top directory has a snapshot */
38934398 if (tr->current_trace->print_max || snapshot)
3894
- iter->trace_buffer = &tr->max_buffer;
4399
+ iter->array_buffer = &tr->max_buffer;
38954400 else
38964401 #endif
3897
- iter->trace_buffer = &tr->trace_buffer;
4402
+ iter->array_buffer = &tr->array_buffer;
38984403 iter->snapshot = snapshot;
38994404 iter->pos = -1;
39004405 iter->cpu_file = tracing_get_cpu(inode);
39014406 mutex_init(&iter->mutex);
39024407
39034408 /* Notify the tracer early; before we stop tracing. */
3904
- if (iter->trace && iter->trace->open)
4409
+ if (iter->trace->open)
39054410 iter->trace->open(iter);
39064411
39074412 /* Annotate start of buffers if we had overruns */
3908
- if (ring_buffer_overruns(iter->trace_buffer->buffer))
4413
+ if (ring_buffer_overruns(iter->array_buffer->buffer))
39094414 iter->iter_flags |= TRACE_FILE_ANNOTATE;
39104415
39114416 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
39124417 if (trace_clocks[tr->clock_id].in_ns)
39134418 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
39144419
3915
- /* stop the trace while dumping if we are not opening "snapshot" */
3916
- if (!iter->snapshot)
4420
+ /*
4421
+ * If pause-on-trace is enabled, then stop the trace while
4422
+ * dumping, unless this is the "snapshot" file
4423
+ */
4424
+ if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
39174425 tracing_stop_tr(tr);
39184426
39194427 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
39204428 for_each_tracing_cpu(cpu) {
39214429 iter->buffer_iter[cpu] =
3922
- ring_buffer_read_prepare(iter->trace_buffer->buffer,
4430
+ ring_buffer_read_prepare(iter->array_buffer->buffer,
39234431 cpu, GFP_KERNEL);
39244432 }
39254433 ring_buffer_read_prepare_sync();
....@@ -3930,7 +4438,7 @@
39304438 } else {
39314439 cpu = iter->cpu_file;
39324440 iter->buffer_iter[cpu] =
3933
- ring_buffer_read_prepare(iter->trace_buffer->buffer,
4441
+ ring_buffer_read_prepare(iter->array_buffer->buffer,
39344442 cpu, GFP_KERNEL);
39354443 ring_buffer_read_prepare_sync();
39364444 ring_buffer_read_start(iter->buffer_iter[cpu]);
....@@ -3944,6 +4452,7 @@
39444452 fail:
39454453 mutex_unlock(&trace_types_lock);
39464454 kfree(iter->trace);
4455
+ kfree(iter->temp);
39474456 kfree(iter->buffer_iter);
39484457 release:
39494458 seq_release_private(inode, file);
....@@ -3952,8 +4461,11 @@
39524461
39534462 int tracing_open_generic(struct inode *inode, struct file *filp)
39544463 {
3955
- if (tracing_disabled)
3956
- return -ENODEV;
4464
+ int ret;
4465
+
4466
+ ret = tracing_check_open_get_tr(NULL);
4467
+ if (ret)
4468
+ return ret;
39574469
39584470 filp->private_data = inode->i_private;
39594471 return 0;
....@@ -3968,17 +4480,43 @@
39684480 * Open and update trace_array ref count.
39694481 * Must have the current trace_array passed to it.
39704482 */
3971
-static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4483
+int tracing_open_generic_tr(struct inode *inode, struct file *filp)
39724484 {
39734485 struct trace_array *tr = inode->i_private;
4486
+ int ret;
39744487
3975
- if (tracing_disabled)
3976
- return -ENODEV;
3977
-
3978
- if (trace_array_get(tr) < 0)
3979
- return -ENODEV;
4488
+ ret = tracing_check_open_get_tr(tr);
4489
+ if (ret)
4490
+ return ret;
39804491
39814492 filp->private_data = inode->i_private;
4493
+
4494
+ return 0;
4495
+}
4496
+
4497
+/*
4498
+ * The private pointer of the inode is the trace_event_file.
4499
+ * Update the tr ref count associated to it.
4500
+ */
4501
+int tracing_open_file_tr(struct inode *inode, struct file *filp)
4502
+{
4503
+ struct trace_event_file *file = inode->i_private;
4504
+ int ret;
4505
+
4506
+ ret = tracing_check_open_get_tr(file->tr);
4507
+ if (ret)
4508
+ return ret;
4509
+
4510
+ filp->private_data = inode->i_private;
4511
+
4512
+ return 0;
4513
+}
4514
+
4515
+int tracing_release_file_tr(struct inode *inode, struct file *filp)
4516
+{
4517
+ struct trace_event_file *file = inode->i_private;
4518
+
4519
+ trace_array_put(file->tr);
39824520
39834521 return 0;
39844522 }
....@@ -4007,7 +4545,7 @@
40074545 if (iter->trace && iter->trace->close)
40084546 iter->trace->close(iter);
40094547
4010
- if (!iter->snapshot)
4548
+ if (!iter->snapshot && tr->stop_count)
40114549 /* reenable tracing if it was previously enabled */
40124550 tracing_start_tr(tr);
40134551
....@@ -4017,6 +4555,7 @@
40174555
40184556 mutex_destroy(&iter->mutex);
40194557 free_cpumask_var(iter->started);
4558
+ kfree(iter->temp);
40204559 kfree(iter->trace);
40214560 kfree(iter->buffer_iter);
40224561 seq_release_private(inode, file);
....@@ -4045,15 +4584,16 @@
40454584 {
40464585 struct trace_array *tr = inode->i_private;
40474586 struct trace_iterator *iter;
4048
- int ret = 0;
4587
+ int ret;
40494588
4050
- if (trace_array_get(tr) < 0)
4051
- return -ENODEV;
4589
+ ret = tracing_check_open_get_tr(tr);
4590
+ if (ret)
4591
+ return ret;
40524592
40534593 /* If this file was open for write, then erase contents */
40544594 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
40554595 int cpu = tracing_get_cpu(inode);
4056
- struct trace_buffer *trace_buf = &tr->trace_buffer;
4596
+ struct array_buffer *trace_buf = &tr->array_buffer;
40574597
40584598 #ifdef CONFIG_TRACER_MAX_TRACE
40594599 if (tr->current_trace->print_max)
....@@ -4063,7 +4603,7 @@
40634603 if (cpu == RING_BUFFER_ALL_CPUS)
40644604 tracing_reset_online_cpus(trace_buf);
40654605 else
4066
- tracing_reset(trace_buf, cpu);
4606
+ tracing_reset_cpu(trace_buf, cpu);
40674607 }
40684608
40694609 if (file->f_mode & FMODE_READ) {
....@@ -4164,11 +4704,9 @@
41644704 struct seq_file *m;
41654705 int ret;
41664706
4167
- if (tracing_disabled)
4168
- return -ENODEV;
4169
-
4170
- if (trace_array_get(tr) < 0)
4171
- return -ENODEV;
4707
+ ret = tracing_check_open_get_tr(tr);
4708
+ if (ret)
4709
+ return ret;
41724710
41734711 ret = seq_open(file, &show_traces_seq_ops);
41744712 if (ret) {
....@@ -4212,6 +4750,8 @@
42124750 static const struct file_operations tracing_fops = {
42134751 .open = tracing_open,
42144752 .read = seq_read,
4753
+ .read_iter = seq_read_iter,
4754
+ .splice_read = generic_file_splice_read,
42154755 .write = tracing_write_stub,
42164756 .llseek = tracing_lseek,
42174757 .release = tracing_release,
....@@ -4252,20 +4792,13 @@
42524792 return count;
42534793 }
42544794
4255
-static ssize_t
4256
-tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4257
- size_t count, loff_t *ppos)
4795
+int tracing_set_cpumask(struct trace_array *tr,
4796
+ cpumask_var_t tracing_cpumask_new)
42584797 {
4259
- struct trace_array *tr = file_inode(filp)->i_private;
4260
- cpumask_var_t tracing_cpumask_new;
4261
- int err, cpu;
4798
+ int cpu;
42624799
4263
- if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4264
- return -ENOMEM;
4265
-
4266
- err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4267
- if (err)
4268
- goto err_unlock;
4800
+ if (!tr)
4801
+ return -EINVAL;
42694802
42704803 local_irq_disable();
42714804 arch_spin_lock(&tr->max_lock);
....@@ -4276,24 +4809,53 @@
42764809 */
42774810 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
42784811 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4279
- atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4280
- ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4812
+ atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4813
+ ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4814
+#ifdef CONFIG_TRACER_MAX_TRACE
4815
+ ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
4816
+#endif
42814817 }
42824818 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
42834819 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4284
- atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4285
- ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4820
+ atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4821
+ ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4822
+#ifdef CONFIG_TRACER_MAX_TRACE
4823
+ ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
4824
+#endif
42864825 }
42874826 }
42884827 arch_spin_unlock(&tr->max_lock);
42894828 local_irq_enable();
42904829
42914830 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4831
+
4832
+ return 0;
4833
+}
4834
+
4835
+static ssize_t
4836
+tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4837
+ size_t count, loff_t *ppos)
4838
+{
4839
+ struct trace_array *tr = file_inode(filp)->i_private;
4840
+ cpumask_var_t tracing_cpumask_new;
4841
+ int err;
4842
+
4843
+ if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4844
+ return -ENOMEM;
4845
+
4846
+ err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4847
+ if (err)
4848
+ goto err_free;
4849
+
4850
+ err = tracing_set_cpumask(tr, tracing_cpumask_new);
4851
+ if (err)
4852
+ goto err_free;
4853
+
42924854 free_cpumask_var(tracing_cpumask_new);
42934855
42944856 return count;
42954857
4296
-err_unlock:
4858
+err_free:
42974859 free_cpumask_var(tracing_cpumask_new);
42984860
42994861 return err;
....@@ -4435,7 +4997,7 @@
44354997 ftrace_pid_follow_fork(tr, enabled);
44364998
44374999 if (mask == TRACE_ITER_OVERWRITE) {
4438
- ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
5000
+ ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
44395001 #ifdef CONFIG_TRACER_MAX_TRACE
44405002 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
44415003 #endif
....@@ -4449,19 +5011,21 @@
44495011 return 0;
44505012 }
44515013
4452
-static int trace_set_options(struct trace_array *tr, char *option)
5014
+int trace_set_options(struct trace_array *tr, char *option)
44535015 {
44545016 char *cmp;
44555017 int neg = 0;
44565018 int ret;
44575019 size_t orig_len = strlen(option);
5020
+ int len;
44585021
44595022 cmp = strstrip(option);
44605023
4461
- if (strncmp(cmp, "no", 2) == 0) {
5024
+ len = str_has_prefix(cmp, "no");
5025
+ if (len)
44625026 neg = 1;
4463
- cmp += 2;
4464
- }
5027
+
5028
+ cmp += len;
44655029
44665030 mutex_lock(&event_mutex);
44675031 mutex_lock(&trace_types_lock);
....@@ -4537,11 +5101,9 @@
45375101 struct trace_array *tr = inode->i_private;
45385102 int ret;
45395103
4540
- if (tracing_disabled)
4541
- return -ENODEV;
4542
-
4543
- if (trace_array_get(tr) < 0)
4544
- return -ENODEV;
5104
+ ret = tracing_check_open_get_tr(tr);
5105
+ if (ret)
5106
+ return ret;
45455107
45465108 ret = single_open(file, tracing_trace_options_show, inode->i_private);
45475109 if (ret < 0)
....@@ -4568,6 +5130,7 @@
45685130 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
45695131 " current_tracer\t- function and latency tracers\n"
45705132 " available_tracers\t- list of configured tracers for current_tracer\n"
5133
+ " error_log\t- error log for failed commands (that support it)\n"
45715134 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
45725135 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
45735136 " trace_clock\t\t-change the clock used to order events\n"
....@@ -4588,7 +5151,7 @@
45885151 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
45895152 "\t\t\t Remove sub-buffer with rmdir\n"
45905153 " trace_options\t\t- Set format or modify how tracing happens\n"
4591
- "\t\t\t Disable an option by adding a suffix 'no' to the\n"
5154
+ "\t\t\t Disable an option by prefixing 'no' to the\n"
45925155 "\t\t\t option name\n"
45935156 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
45945157 #ifdef CONFIG_DYNAMIC_FTRACE
....@@ -4632,6 +5195,8 @@
46325195 #ifdef CONFIG_FUNCTION_TRACER
46335196 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
46345197 "\t\t (function)\n"
5198
+ " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5199
+ "\t\t (function)\n"
46355200 #endif
46365201 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
46375202 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
....@@ -4653,31 +5218,49 @@
46535218 "\t\t\t traces\n"
46545219 #endif
46555220 #endif /* CONFIG_STACK_TRACER */
5221
+#ifdef CONFIG_DYNAMIC_EVENTS
5222
+ " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5223
+ "\t\t\t Write into this file to define/undefine new trace events.\n"
5224
+#endif
46565225 #ifdef CONFIG_KPROBE_EVENTS
4657
- " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
5226
+ " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
46585227 "\t\t\t Write into this file to define/undefine new trace events.\n"
46595228 #endif
46605229 #ifdef CONFIG_UPROBE_EVENTS
4661
- " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
5230
+ " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
46625231 "\t\t\t Write into this file to define/undefine new trace events.\n"
46635232 #endif
46645233 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
46655234 "\t accepts: event-definitions (one definition per line)\n"
46665235 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
46675236 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5237
+#ifdef CONFIG_HIST_TRIGGERS
5238
+ "\t s:[synthetic/]<event> <field> [<field>]\n"
5239
+#endif
46685240 "\t -:[<group>/]<event>\n"
46695241 #ifdef CONFIG_KPROBE_EVENTS
46705242 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4671
- "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5243
+ "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
46725244 #endif
46735245 #ifdef CONFIG_UPROBE_EVENTS
4674
- "\t place: <path>:<offset>\n"
5246
+ " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
46755247 #endif
46765248 "\t args: <name>=fetcharg[:type]\n"
46775249 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4678
- "\t $stack<index>, $stack, $retval, $comm\n"
4679
- "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4680
- "\t b<bit-width>@<bit-offset>/<container-size>\n"
5250
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5251
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5252
+#else
5253
+ "\t $stack<index>, $stack, $retval, $comm,\n"
5254
+#endif
5255
+ "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5256
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5257
+ "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5258
+ "\t <type>\\[<array-size>\\]\n"
5259
+#ifdef CONFIG_HIST_TRIGGERS
5260
+ "\t field: <stype> <name>;\n"
5261
+ "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5262
+ "\t [unsigned] char/int/long\n"
5263
+#endif
46815264 #endif
46825265 " events/\t\t- Directory containing all trace event subsystems:\n"
46835266 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
....@@ -4730,6 +5313,7 @@
47305313 "\t [:size=#entries]\n"
47315314 "\t [:pause][:continue][:clear]\n"
47325315 "\t [:name=histname1]\n"
5316
+ "\t [:<handler>.<action>]\n"
47335317 "\t [if <filter>]\n\n"
47345318 "\t Note, special fields can be used as well:\n"
47355319 "\t common_timestamp - to record current timestamp\n"
....@@ -4774,8 +5358,26 @@
47745358 "\t unchanged.\n\n"
47755359 "\t The enable_hist and disable_hist triggers can be used to\n"
47765360 "\t have one event conditionally start and stop another event's\n"
4777
- "\t already-attached hist trigger. The syntax is analagous to\n"
4778
- "\t the enable_event and disable_event triggers.\n"
5361
+ "\t already-attached hist trigger. The syntax is analogous to\n"
5362
+ "\t the enable_event and disable_event triggers.\n\n"
5363
+ "\t Hist trigger handlers and actions are executed whenever a\n"
5364
+ "\t a histogram entry is added or updated. They take the form:\n\n"
5365
+ "\t <handler>.<action>\n\n"
5366
+ "\t The available handlers are:\n\n"
5367
+ "\t onmatch(matching.event) - invoke on addition or update\n"
5368
+ "\t onmax(var) - invoke if var exceeds current max\n"
5369
+ "\t onchange(var) - invoke action if var changes\n\n"
5370
+ "\t The available actions are:\n\n"
5371
+ "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5372
+ "\t save(field,...) - save current event fields\n"
5373
+#ifdef CONFIG_TRACER_SNAPSHOT
5374
+ "\t snapshot() - snapshot the trace buffer\n\n"
5375
+#endif
5376
+#ifdef CONFIG_SYNTH_EVENTS
5377
+ " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5378
+ "\t Write into this file to define/undefine new synthetic events.\n"
5379
+ "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5380
+#endif
47795381 #endif
47805382 ;
47815383
....@@ -4833,8 +5435,11 @@
48335435
48345436 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
48355437 {
4836
- if (tracing_disabled)
4837
- return -ENODEV;
5438
+ int ret;
5439
+
5440
+ ret = tracing_check_open_get_tr(NULL);
5441
+ if (ret)
5442
+ return ret;
48385443
48395444 return seq_open(filp, &tracing_saved_tgids_seq_ops);
48405445 }
....@@ -4910,8 +5515,11 @@
49105515
49115516 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
49125517 {
4913
- if (tracing_disabled)
4914
- return -ENODEV;
5518
+ int ret;
5519
+
5520
+ ret = tracing_check_open_get_tr(NULL);
5521
+ if (ret)
5522
+ return ret;
49155523
49165524 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
49175525 }
....@@ -4930,9 +5538,11 @@
49305538 char buf[64];
49315539 int r;
49325540
5541
+ preempt_disable();
49335542 arch_spin_lock(&trace_cmdline_lock);
49345543 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
49355544 arch_spin_unlock(&trace_cmdline_lock);
5545
+ preempt_enable();
49365546
49375547 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
49385548 }
....@@ -4957,10 +5567,12 @@
49575567 return -ENOMEM;
49585568 }
49595569
5570
+ preempt_disable();
49605571 arch_spin_lock(&trace_cmdline_lock);
49615572 savedcmd_temp = savedcmd;
49625573 savedcmd = s;
49635574 arch_spin_unlock(&trace_cmdline_lock);
5575
+ preempt_enable();
49645576 free_saved_cmdlines_buffer(savedcmd_temp);
49655577
49665578 return 0;
....@@ -5019,14 +5631,12 @@
50195631 * Paranoid! If ptr points to end, we don't want to increment past it.
50205632 * This really should never happen.
50215633 */
5634
+ (*pos)++;
50225635 ptr = update_eval_map(ptr);
50235636 if (WARN_ON_ONCE(!ptr))
50245637 return NULL;
50255638
50265639 ptr++;
5027
-
5028
- (*pos)++;
5029
-
50305640 ptr = update_eval_map(ptr);
50315641
50325642 return ptr;
....@@ -5075,8 +5685,11 @@
50755685
50765686 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
50775687 {
5078
- if (tracing_disabled)
5079
- return -ENODEV;
5688
+ int ret;
5689
+
5690
+ ret = tracing_check_open_get_tr(NULL);
5691
+ if (ret)
5692
+ return ret;
50805693
50815694 return seq_open(filp, &tracing_eval_map_seq_ops);
50825695 }
....@@ -5189,11 +5802,11 @@
51895802
51905803 int tracer_init(struct tracer *t, struct trace_array *tr)
51915804 {
5192
- tracing_reset_online_cpus(&tr->trace_buffer);
5805
+ tracing_reset_online_cpus(&tr->array_buffer);
51935806 return t->init(tr);
51945807 }
51955808
5196
-static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5809
+static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
51975810 {
51985811 int cpu;
51995812
....@@ -5203,8 +5816,8 @@
52035816
52045817 #ifdef CONFIG_TRACER_MAX_TRACE
52055818 /* resize @tr's buffer to the size of @size_tr's entries */
5206
-static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5207
- struct trace_buffer *size_buf, int cpu_id)
5819
+static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5820
+ struct array_buffer *size_buf, int cpu_id)
52085821 {
52095822 int cpu, ret = 0;
52105823
....@@ -5242,10 +5855,10 @@
52425855 ring_buffer_expanded = true;
52435856
52445857 /* May be called before buffers are initialized */
5245
- if (!tr->trace_buffer.buffer)
5858
+ if (!tr->array_buffer.buffer)
52465859 return 0;
52475860
5248
- ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5861
+ ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
52495862 if (ret < 0)
52505863 return ret;
52515864
....@@ -5256,8 +5869,8 @@
52565869
52575870 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
52585871 if (ret < 0) {
5259
- int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5260
- &tr->trace_buffer, cpu);
5872
+ int r = resize_buffer_duplicate_size(&tr->array_buffer,
5873
+ &tr->array_buffer, cpu);
52615874 if (r < 0) {
52625875 /*
52635876 * AARGH! We are left with different
....@@ -5288,15 +5901,15 @@
52885901 #endif /* CONFIG_TRACER_MAX_TRACE */
52895902
52905903 if (cpu == RING_BUFFER_ALL_CPUS)
5291
- set_buffer_entries(&tr->trace_buffer, size);
5904
+ set_buffer_entries(&tr->array_buffer, size);
52925905 else
5293
- per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5906
+ per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
52945907
52955908 return ret;
52965909 }
52975910
5298
-static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5299
- unsigned long size, int cpu_id)
5911
+ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5912
+ unsigned long size, int cpu_id)
53005913 {
53015914 int ret = size;
53025915
....@@ -5366,16 +5979,22 @@
53665979 tr->current_trace = &nop_trace;
53675980 }
53685981
5982
+static bool tracer_options_updated;
5983
+
53695984 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
53705985 {
53715986 /* Only enable if the directory has been created already. */
53725987 if (!tr->dir)
53735988 return;
53745989
5990
+ /* Only create trace option files after update_tracer_options finish */
5991
+ if (!tracer_options_updated)
5992
+ return;
5993
+
53755994 create_trace_option_files(tr, t);
53765995 }
53775996
5378
-static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5997
+int tracing_set_tracer(struct trace_array *tr, const char *buf)
53795998 {
53805999 struct tracer *t;
53816000 #ifdef CONFIG_TRACER_MAX_TRACE
....@@ -5404,6 +6023,18 @@
54046023 if (t == tr->current_trace)
54056024 goto out;
54066025
6026
+#ifdef CONFIG_TRACER_SNAPSHOT
6027
+ if (t->use_max_tr) {
6028
+ local_irq_disable();
6029
+ arch_spin_lock(&tr->max_lock);
6030
+ if (tr->cond_snapshot)
6031
+ ret = -EBUSY;
6032
+ arch_spin_unlock(&tr->max_lock);
6033
+ local_irq_enable();
6034
+ if (ret)
6035
+ goto out;
6036
+ }
6037
+#endif
54076038 /* Some tracers won't work on kernel command line */
54086039 if (system_state < SYSTEM_RUNNING && t->noboot) {
54096040 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
....@@ -5418,7 +6049,7 @@
54186049 }
54196050
54206051 /* If trace pipe files are being read, we can't change the tracer */
5421
- if (tr->current_trace->ref) {
6052
+ if (tr->trace_ref) {
54226053 ret = -EBUSY;
54236054 goto out;
54246055 }
....@@ -5430,11 +6061,11 @@
54306061 if (tr->current_trace->reset)
54316062 tr->current_trace->reset(tr);
54326063
5433
- /* Current trace needs to be nop_trace before synchronize_sched */
5434
- tr->current_trace = &nop_trace;
5435
-
54366064 #ifdef CONFIG_TRACER_MAX_TRACE
5437
- had_max_tr = tr->allocated_snapshot;
6065
+ had_max_tr = tr->current_trace->use_max_tr;
6066
+
6067
+ /* Current trace needs to be nop_trace before synchronize_rcu */
6068
+ tr->current_trace = &nop_trace;
54386069
54396070 if (had_max_tr && !t->use_max_tr) {
54406071 /*
....@@ -5444,17 +6075,17 @@
54446075 * The update_max_tr is called from interrupts disabled
54456076 * so a synchronized_sched() is sufficient.
54466077 */
5447
- synchronize_sched();
6078
+ synchronize_rcu();
54486079 free_snapshot(tr);
54496080 }
5450
-#endif
54516081
5452
-#ifdef CONFIG_TRACER_MAX_TRACE
5453
- if (t->use_max_tr && !had_max_tr) {
6082
+ if (t->use_max_tr && !tr->allocated_snapshot) {
54546083 ret = tracing_alloc_snapshot_instance(tr);
54556084 if (ret < 0)
54566085 goto out;
54576086 }
6087
+#else
6088
+ tr->current_trace = &nop_trace;
54586089 #endif
54596090
54606091 if (t->init) {
....@@ -5589,13 +6220,11 @@
55896220 {
55906221 struct trace_array *tr = inode->i_private;
55916222 struct trace_iterator *iter;
5592
- int ret = 0;
6223
+ int ret;
55936224
5594
- if (tracing_disabled)
5595
- return -ENODEV;
5596
-
5597
- if (trace_array_get(tr) < 0)
5598
- return -ENODEV;
6225
+ ret = tracing_check_open_get_tr(tr);
6226
+ if (ret)
6227
+ return ret;
55996228
56006229 mutex_lock(&trace_types_lock);
56016230
....@@ -5626,7 +6255,7 @@
56266255 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
56276256
56286257 iter->tr = tr;
5629
- iter->trace_buffer = &tr->trace_buffer;
6258
+ iter->array_buffer = &tr->array_buffer;
56306259 iter->cpu_file = tracing_get_cpu(inode);
56316260 mutex_init(&iter->mutex);
56326261 filp->private_data = iter;
....@@ -5636,7 +6265,7 @@
56366265
56376266 nonseekable_open(inode, filp);
56386267
5639
- tr->current_trace->ref++;
6268
+ tr->trace_ref++;
56406269 out:
56416270 mutex_unlock(&trace_types_lock);
56426271 return ret;
....@@ -5655,7 +6284,7 @@
56556284
56566285 mutex_lock(&trace_types_lock);
56576286
5658
- tr->current_trace->ref--;
6287
+ tr->trace_ref--;
56596288
56606289 if (iter->trace->pipe_close)
56616290 iter->trace->pipe_close(iter);
....@@ -5663,6 +6292,7 @@
56636292 mutex_unlock(&trace_types_lock);
56646293
56656294 free_cpumask_var(iter->started);
6295
+ kfree(iter->temp);
56666296 mutex_destroy(&iter->mutex);
56676297 kfree(iter);
56686298
....@@ -5686,8 +6316,8 @@
56866316 */
56876317 return EPOLLIN | EPOLLRDNORM;
56886318 else
5689
- return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5690
- filp, poll_table);
6319
+ return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6320
+ filp, poll_table, iter->tr->buffer_percent);
56916321 }
56926322
56936323 static __poll_t
....@@ -5724,7 +6354,7 @@
57246354
57256355 mutex_unlock(&iter->mutex);
57266356
5727
- ret = wait_on_pipe(iter, false);
6357
+ ret = wait_on_pipe(iter, 0);
57286358
57296359 mutex_lock(&iter->mutex);
57306360
....@@ -5795,7 +6425,20 @@
57956425
57966426 ret = print_trace_line(iter);
57976427 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5798
- /* don't print partial lines */
6428
+ /*
6429
+ * If one print_trace_line() fills entire trace_seq in one shot,
6430
+ * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6431
+ * In this case, we need to consume it, otherwise, loop will peek
6432
+ * this event next time, resulting in an infinite loop.
6433
+ */
6434
+ if (save_len == 0) {
6435
+ iter->seq.full = 0;
6436
+ trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6437
+ trace_consume(iter);
6438
+ break;
6439
+ }
6440
+
6441
+ /* In other cases, don't print partial lines */
57996442 iter->seq.seq.len = save_len;
58006443 break;
58016444 }
....@@ -5839,14 +6482,6 @@
58396482 {
58406483 __free_page(spd->pages[idx]);
58416484 }
5842
-
5843
-static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5844
- .can_merge = 0,
5845
- .confirm = generic_pipe_buf_confirm,
5846
- .release = generic_pipe_buf_release,
5847
- .steal = generic_pipe_buf_steal,
5848
- .get = generic_pipe_buf_get,
5849
-};
58506485
58516486 static size_t
58526487 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
....@@ -5909,7 +6544,7 @@
59096544 .partial = partial_def,
59106545 .nr_pages = 0, /* This gets updated below. */
59116546 .nr_pages_max = PIPE_DEF_BUFFERS,
5912
- .ops = &tracing_pipe_buf_ops,
6547
+ .ops = &default_pipe_buf_ops,
59136548 .spd_release = tracing_spd_release_pipe,
59146549 };
59156550 ssize_t ret;
....@@ -6004,8 +6639,8 @@
60046639 for_each_tracing_cpu(cpu) {
60056640 /* fill in the size from first enabled cpu */
60066641 if (size == 0)
6007
- size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6008
- if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6642
+ size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6643
+ if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
60096644 buf_size_same = 0;
60106645 break;
60116646 }
....@@ -6021,7 +6656,7 @@
60216656 } else
60226657 r = sprintf(buf, "X\n");
60236658 } else
6024
- r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6659
+ r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
60256660
60266661 mutex_unlock(&trace_types_lock);
60276662
....@@ -6068,7 +6703,7 @@
60686703
60696704 mutex_lock(&trace_types_lock);
60706705 for_each_tracing_cpu(cpu) {
6071
- size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6706
+ size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
60726707 if (!ring_buffer_expanded)
60736708 expanded_size += trace_buf_size >> 10;
60746709 }
....@@ -6118,16 +6753,16 @@
61186753 struct trace_array *tr = filp->private_data;
61196754 struct ring_buffer_event *event;
61206755 enum event_trigger_type tt = ETT_NONE;
6121
- struct ring_buffer *buffer;
6756
+ struct trace_buffer *buffer;
61226757 struct print_entry *entry;
61236758 unsigned long irq_flags;
6124
- const char faulted[] = "<faulted>";
61256759 ssize_t written;
61266760 int size;
61276761 int len;
61286762
61296763 /* Used in tracing_mark_raw_write() as well */
6130
-#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6764
+#define FAULTED_STR "<faulted>"
6765
+#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
61316766
61326767 if (tracing_disabled)
61336768 return -EINVAL;
....@@ -6147,7 +6782,7 @@
61476782 if (cnt < FAULTED_SIZE)
61486783 size += FAULTED_SIZE - cnt;
61496784
6150
- buffer = tr->trace_buffer.buffer;
6785
+ buffer = tr->array_buffer.buffer;
61516786 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
61526787 irq_flags, preempt_count());
61536788 if (unlikely(!event))
....@@ -6159,12 +6794,11 @@
61596794
61606795 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
61616796 if (len) {
6162
- memcpy(&entry->buf, faulted, FAULTED_SIZE);
6797
+ memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
61636798 cnt = FAULTED_SIZE;
61646799 written = -EFAULT;
61656800 } else
61666801 written = cnt;
6167
- len = cnt;
61686802
61696803 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
61706804 /* do not add \n before testing triggers, but add \0 */
....@@ -6178,6 +6812,8 @@
61786812 } else
61796813 entry->buf[cnt] = '\0';
61806814
6815
+ if (static_branch_unlikely(&trace_marker_exports_enabled))
6816
+ ftrace_exports(event, TRACE_EXPORT_MARKER);
61816817 __buffer_unlock_commit(buffer, event);
61826818
61836819 if (tt)
....@@ -6198,9 +6834,8 @@
61986834 {
61996835 struct trace_array *tr = filp->private_data;
62006836 struct ring_buffer_event *event;
6201
- struct ring_buffer *buffer;
6837
+ struct trace_buffer *buffer;
62026838 struct raw_data_entry *entry;
6203
- const char faulted[] = "<faulted>";
62046839 unsigned long irq_flags;
62056840 ssize_t written;
62066841 int size;
....@@ -6228,7 +6863,7 @@
62286863 if (cnt < FAULT_SIZE_ID)
62296864 size += FAULT_SIZE_ID - cnt;
62306865
6231
- buffer = tr->trace_buffer.buffer;
6866
+ buffer = tr->array_buffer.buffer;
62326867 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
62336868 irq_flags, preempt_count());
62346869 if (!event)
....@@ -6240,7 +6875,7 @@
62406875 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
62416876 if (len) {
62426877 entry->id = -1;
6243
- memcpy(&entry->buf, faulted, FAULTED_SIZE);
6878
+ memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
62446879 written = -EFAULT;
62456880 } else
62466881 written = cnt;
....@@ -6283,13 +6918,13 @@
62836918
62846919 tr->clock_id = i;
62856920
6286
- ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6921
+ ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
62876922
62886923 /*
62896924 * New clock may not be consistent with the previous clock.
62906925 * Reset the buffer so that it doesn't have incomparable timestamps.
62916926 */
6292
- tracing_reset_online_cpus(&tr->trace_buffer);
6927
+ tracing_reset_online_cpus(&tr->array_buffer);
62936928
62946929 #ifdef CONFIG_TRACER_MAX_TRACE
62956930 if (tr->max_buffer.buffer)
....@@ -6335,11 +6970,9 @@
63356970 struct trace_array *tr = inode->i_private;
63366971 int ret;
63376972
6338
- if (tracing_disabled)
6339
- return -ENODEV;
6340
-
6341
- if (trace_array_get(tr))
6342
- return -ENODEV;
6973
+ ret = tracing_check_open_get_tr(tr);
6974
+ if (ret)
6975
+ return ret;
63436976
63446977 ret = single_open(file, tracing_clock_show, inode->i_private);
63456978 if (ret < 0)
....@@ -6354,7 +6987,7 @@
63546987
63556988 mutex_lock(&trace_types_lock);
63566989
6357
- if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6990
+ if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
63586991 seq_puts(m, "delta [absolute]\n");
63596992 else
63606993 seq_puts(m, "[delta] absolute\n");
....@@ -6369,11 +7002,9 @@
63697002 struct trace_array *tr = inode->i_private;
63707003 int ret;
63717004
6372
- if (tracing_disabled)
6373
- return -ENODEV;
6374
-
6375
- if (trace_array_get(tr))
6376
- return -ENODEV;
7005
+ ret = tracing_check_open_get_tr(tr);
7006
+ if (ret)
7007
+ return ret;
63777008
63787009 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
63797010 if (ret < 0)
....@@ -6401,7 +7032,7 @@
64017032 goto out;
64027033 }
64037034
6404
- ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
7035
+ ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
64057036
64067037 #ifdef CONFIG_TRACER_MAX_TRACE
64077038 if (tr->max_buffer.buffer)
....@@ -6426,10 +7057,11 @@
64267057 struct trace_array *tr = inode->i_private;
64277058 struct trace_iterator *iter;
64287059 struct seq_file *m;
6429
- int ret = 0;
7060
+ int ret;
64307061
6431
- if (trace_array_get(tr) < 0)
6432
- return -ENODEV;
7062
+ ret = tracing_check_open_get_tr(tr);
7063
+ if (ret)
7064
+ return ret;
64337065
64347066 if (file->f_mode & FMODE_READ) {
64357067 iter = __tracing_open(inode, file, true);
....@@ -6449,7 +7081,7 @@
64497081 ret = 0;
64507082
64517083 iter->tr = tr;
6452
- iter->trace_buffer = &tr->max_buffer;
7084
+ iter->array_buffer = &tr->max_buffer;
64537085 iter->cpu_file = tracing_get_cpu(inode);
64547086 m->private = iter;
64557087 file->private_data = m;
....@@ -6459,6 +7091,11 @@
64597091 trace_array_put(tr);
64607092
64617093 return ret;
7094
+}
7095
+
7096
+static void tracing_swap_cpu_buffer(void *tr)
7097
+{
7098
+ update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
64627099 }
64637100
64647101 static ssize_t
....@@ -6486,6 +7123,15 @@
64867123 goto out;
64877124 }
64887125
7126
+ local_irq_disable();
7127
+ arch_spin_lock(&tr->max_lock);
7128
+ if (tr->cond_snapshot)
7129
+ ret = -EBUSY;
7130
+ arch_spin_unlock(&tr->max_lock);
7131
+ local_irq_enable();
7132
+ if (ret)
7133
+ goto out;
7134
+
64897135 switch (val) {
64907136 case 0:
64917137 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
....@@ -6505,25 +7151,27 @@
65057151 #endif
65067152 if (tr->allocated_snapshot)
65077153 ret = resize_buffer_duplicate_size(&tr->max_buffer,
6508
- &tr->trace_buffer, iter->cpu_file);
7154
+ &tr->array_buffer, iter->cpu_file);
65097155 else
65107156 ret = tracing_alloc_snapshot_instance(tr);
65117157 if (ret < 0)
65127158 break;
6513
- local_irq_disable();
65147159 /* Now, we're going to swap */
6515
- if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6516
- update_max_tr(tr, current, smp_processor_id());
6517
- else
6518
- update_max_tr_single(tr, current, iter->cpu_file);
6519
- local_irq_enable();
7160
+ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7161
+ local_irq_disable();
7162
+ update_max_tr(tr, current, smp_processor_id(), NULL);
7163
+ local_irq_enable();
7164
+ } else {
7165
+ smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7166
+ (void *)tr, 1);
7167
+ }
65207168 break;
65217169 default:
65227170 if (tr->allocated_snapshot) {
65237171 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
65247172 tracing_reset_online_cpus(&tr->max_buffer);
65257173 else
6526
- tracing_reset(&tr->max_buffer, iter->cpu_file);
7174
+ tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
65277175 }
65287176 break;
65297177 }
....@@ -6567,6 +7215,7 @@
65677215 struct ftrace_buffer_info *info;
65687216 int ret;
65697217
7218
+ /* The following checks for tracefs lockdown */
65707219 ret = tracing_buffers_open(inode, filp);
65717220 if (ret < 0)
65727221 return ret;
....@@ -6579,7 +7228,7 @@
65797228 }
65807229
65817230 info->iter.snapshot = true;
6582
- info->iter.trace_buffer = &info->iter.tr->max_buffer;
7231
+ info->iter.array_buffer = &info->iter.tr->max_buffer;
65837232
65847233 return ret;
65857234 }
....@@ -6604,10 +7253,11 @@
66047253 #endif
66057254
66067255 static const struct file_operations set_tracer_fops = {
6607
- .open = tracing_open_generic,
7256
+ .open = tracing_open_generic_tr,
66087257 .read = tracing_set_trace_read,
66097258 .write = tracing_set_trace_write,
66107259 .llseek = generic_file_llseek,
7260
+ .release = tracing_release_generic_tr,
66117261 };
66127262
66137263 static const struct file_operations tracing_pipe_fops = {
....@@ -6688,19 +7338,263 @@
66887338
66897339 #endif /* CONFIG_TRACER_SNAPSHOT */
66907340
7341
+#define TRACING_LOG_ERRS_MAX 8
7342
+#define TRACING_LOG_LOC_MAX 128
7343
+
7344
+#define CMD_PREFIX " Command: "
7345
+
7346
+struct err_info {
7347
+ const char **errs; /* ptr to loc-specific array of err strings */
7348
+ u8 type; /* index into errs -> specific err string */
7349
+ u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7350
+ u64 ts;
7351
+};
7352
+
7353
+struct tracing_log_err {
7354
+ struct list_head list;
7355
+ struct err_info info;
7356
+ char loc[TRACING_LOG_LOC_MAX]; /* err location */
7357
+ char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7358
+};
7359
+
7360
+static DEFINE_MUTEX(tracing_err_log_lock);
7361
+
7362
+static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7363
+{
7364
+ struct tracing_log_err *err;
7365
+
7366
+ if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7367
+ err = kzalloc(sizeof(*err), GFP_KERNEL);
7368
+ if (!err)
7369
+ err = ERR_PTR(-ENOMEM);
7370
+ else
7371
+ tr->n_err_log_entries++;
7372
+
7373
+ return err;
7374
+ }
7375
+
7376
+ err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7377
+ list_del(&err->list);
7378
+
7379
+ return err;
7380
+}
7381
+
7382
+/**
7383
+ * err_pos - find the position of a string within a command for error careting
7384
+ * @cmd: The tracing command that caused the error
7385
+ * @str: The string to position the caret at within @cmd
7386
+ *
7387
+ * Finds the position of the first occurence of @str within @cmd. The
7388
+ * return value can be passed to tracing_log_err() for caret placement
7389
+ * within @cmd.
7390
+ *
7391
+ * Returns the index within @cmd of the first occurence of @str or 0
7392
+ * if @str was not found.
7393
+ */
7394
+unsigned int err_pos(char *cmd, const char *str)
7395
+{
7396
+ char *found;
7397
+
7398
+ if (WARN_ON(!strlen(cmd)))
7399
+ return 0;
7400
+
7401
+ found = strstr(cmd, str);
7402
+ if (found)
7403
+ return found - cmd;
7404
+
7405
+ return 0;
7406
+}
7407
+
7408
+/**
7409
+ * tracing_log_err - write an error to the tracing error log
7410
+ * @tr: The associated trace array for the error (NULL for top level array)
7411
+ * @loc: A string describing where the error occurred
7412
+ * @cmd: The tracing command that caused the error
7413
+ * @errs: The array of loc-specific static error strings
7414
+ * @type: The index into errs[], which produces the specific static err string
7415
+ * @pos: The position the caret should be placed in the cmd
7416
+ *
7417
+ * Writes an error into tracing/error_log of the form:
7418
+ *
7419
+ * <loc>: error: <text>
7420
+ * Command: <cmd>
7421
+ * ^
7422
+ *
7423
+ * tracing/error_log is a small log file containing the last
7424
+ * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7425
+ * unless there has been a tracing error, and the error log can be
7426
+ * cleared and have its memory freed by writing the empty string in
7427
+ * truncation mode to it i.e. echo > tracing/error_log.
7428
+ *
7429
+ * NOTE: the @errs array along with the @type param are used to
7430
+ * produce a static error string - this string is not copied and saved
7431
+ * when the error is logged - only a pointer to it is saved. See
7432
+ * existing callers for examples of how static strings are typically
7433
+ * defined for use with tracing_log_err().
7434
+ */
7435
+void tracing_log_err(struct trace_array *tr,
7436
+ const char *loc, const char *cmd,
7437
+ const char **errs, u8 type, u8 pos)
7438
+{
7439
+ struct tracing_log_err *err;
7440
+
7441
+ if (!tr)
7442
+ tr = &global_trace;
7443
+
7444
+ mutex_lock(&tracing_err_log_lock);
7445
+ err = get_tracing_log_err(tr);
7446
+ if (PTR_ERR(err) == -ENOMEM) {
7447
+ mutex_unlock(&tracing_err_log_lock);
7448
+ return;
7449
+ }
7450
+
7451
+ snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7452
+ snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7453
+
7454
+ err->info.errs = errs;
7455
+ err->info.type = type;
7456
+ err->info.pos = pos;
7457
+ err->info.ts = local_clock();
7458
+
7459
+ list_add_tail(&err->list, &tr->err_log);
7460
+ mutex_unlock(&tracing_err_log_lock);
7461
+}
7462
+
7463
+static void clear_tracing_err_log(struct trace_array *tr)
7464
+{
7465
+ struct tracing_log_err *err, *next;
7466
+
7467
+ mutex_lock(&tracing_err_log_lock);
7468
+ list_for_each_entry_safe(err, next, &tr->err_log, list) {
7469
+ list_del(&err->list);
7470
+ kfree(err);
7471
+ }
7472
+
7473
+ tr->n_err_log_entries = 0;
7474
+ mutex_unlock(&tracing_err_log_lock);
7475
+}
7476
+
7477
+static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7478
+{
7479
+ struct trace_array *tr = m->private;
7480
+
7481
+ mutex_lock(&tracing_err_log_lock);
7482
+
7483
+ return seq_list_start(&tr->err_log, *pos);
7484
+}
7485
+
7486
+static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7487
+{
7488
+ struct trace_array *tr = m->private;
7489
+
7490
+ return seq_list_next(v, &tr->err_log, pos);
7491
+}
7492
+
7493
+static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7494
+{
7495
+ mutex_unlock(&tracing_err_log_lock);
7496
+}
7497
+
7498
+static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7499
+{
7500
+ u8 i;
7501
+
7502
+ for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7503
+ seq_putc(m, ' ');
7504
+ for (i = 0; i < pos; i++)
7505
+ seq_putc(m, ' ');
7506
+ seq_puts(m, "^\n");
7507
+}
7508
+
7509
+static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7510
+{
7511
+ struct tracing_log_err *err = v;
7512
+
7513
+ if (err) {
7514
+ const char *err_text = err->info.errs[err->info.type];
7515
+ u64 sec = err->info.ts;
7516
+ u32 nsec;
7517
+
7518
+ nsec = do_div(sec, NSEC_PER_SEC);
7519
+ seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7520
+ err->loc, err_text);
7521
+ seq_printf(m, "%s", err->cmd);
7522
+ tracing_err_log_show_pos(m, err->info.pos);
7523
+ }
7524
+
7525
+ return 0;
7526
+}
7527
+
7528
+static const struct seq_operations tracing_err_log_seq_ops = {
7529
+ .start = tracing_err_log_seq_start,
7530
+ .next = tracing_err_log_seq_next,
7531
+ .stop = tracing_err_log_seq_stop,
7532
+ .show = tracing_err_log_seq_show
7533
+};
7534
+
7535
+static int tracing_err_log_open(struct inode *inode, struct file *file)
7536
+{
7537
+ struct trace_array *tr = inode->i_private;
7538
+ int ret = 0;
7539
+
7540
+ ret = tracing_check_open_get_tr(tr);
7541
+ if (ret)
7542
+ return ret;
7543
+
7544
+ /* If this file was opened for write, then erase contents */
7545
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7546
+ clear_tracing_err_log(tr);
7547
+
7548
+ if (file->f_mode & FMODE_READ) {
7549
+ ret = seq_open(file, &tracing_err_log_seq_ops);
7550
+ if (!ret) {
7551
+ struct seq_file *m = file->private_data;
7552
+ m->private = tr;
7553
+ } else {
7554
+ trace_array_put(tr);
7555
+ }
7556
+ }
7557
+ return ret;
7558
+}
7559
+
7560
+static ssize_t tracing_err_log_write(struct file *file,
7561
+ const char __user *buffer,
7562
+ size_t count, loff_t *ppos)
7563
+{
7564
+ return count;
7565
+}
7566
+
7567
+static int tracing_err_log_release(struct inode *inode, struct file *file)
7568
+{
7569
+ struct trace_array *tr = inode->i_private;
7570
+
7571
+ trace_array_put(tr);
7572
+
7573
+ if (file->f_mode & FMODE_READ)
7574
+ seq_release(inode, file);
7575
+
7576
+ return 0;
7577
+}
7578
+
7579
+static const struct file_operations tracing_err_log_fops = {
7580
+ .open = tracing_err_log_open,
7581
+ .write = tracing_err_log_write,
7582
+ .read = seq_read,
7583
+ .llseek = tracing_lseek,
7584
+ .release = tracing_err_log_release,
7585
+};
7586
+
66917587 static int tracing_buffers_open(struct inode *inode, struct file *filp)
66927588 {
66937589 struct trace_array *tr = inode->i_private;
66947590 struct ftrace_buffer_info *info;
66957591 int ret;
66967592
6697
- if (tracing_disabled)
6698
- return -ENODEV;
7593
+ ret = tracing_check_open_get_tr(tr);
7594
+ if (ret)
7595
+ return ret;
66997596
6700
- if (trace_array_get(tr) < 0)
6701
- return -ENODEV;
6702
-
6703
- info = kzalloc(sizeof(*info), GFP_KERNEL);
7597
+ info = kvzalloc(sizeof(*info), GFP_KERNEL);
67047598 if (!info) {
67057599 trace_array_put(tr);
67067600 return -ENOMEM;
....@@ -6711,14 +7605,14 @@
67117605 info->iter.tr = tr;
67127606 info->iter.cpu_file = tracing_get_cpu(inode);
67137607 info->iter.trace = tr->current_trace;
6714
- info->iter.trace_buffer = &tr->trace_buffer;
7608
+ info->iter.array_buffer = &tr->array_buffer;
67157609 info->spare = NULL;
67167610 /* Force reading ring buffer for first read */
67177611 info->read = (unsigned int)-1;
67187612
67197613 filp->private_data = info;
67207614
6721
- tr->current_trace->ref++;
7615
+ tr->trace_ref++;
67227616
67237617 mutex_unlock(&trace_types_lock);
67247618
....@@ -6756,7 +7650,7 @@
67567650 #endif
67577651
67587652 if (!info->spare) {
6759
- info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7653
+ info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
67607654 iter->cpu_file);
67617655 if (IS_ERR(info->spare)) {
67627656 ret = PTR_ERR(info->spare);
....@@ -6774,7 +7668,7 @@
67747668
67757669 again:
67767670 trace_access_lock(iter->cpu_file);
6777
- ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7671
+ ret = ring_buffer_read_page(iter->array_buffer->buffer,
67787672 &info->spare,
67797673 count,
67807674 iter->cpu_file, 0);
....@@ -6785,7 +7679,7 @@
67857679 if ((filp->f_flags & O_NONBLOCK))
67867680 return -EAGAIN;
67877681
6788
- ret = wait_on_pipe(iter, false);
7682
+ ret = wait_on_pipe(iter, 0);
67897683 if (ret)
67907684 return ret;
67917685
....@@ -6819,14 +7713,14 @@
68197713
68207714 mutex_lock(&trace_types_lock);
68217715
6822
- iter->tr->current_trace->ref--;
7716
+ iter->tr->trace_ref--;
68237717
68247718 __trace_array_put(iter->tr);
68257719
68267720 if (info->spare)
6827
- ring_buffer_free_read_page(iter->trace_buffer->buffer,
7721
+ ring_buffer_free_read_page(iter->array_buffer->buffer,
68287722 info->spare_cpu, info->spare);
6829
- kfree(info);
7723
+ kvfree(info);
68307724
68317725 mutex_unlock(&trace_types_lock);
68327726
....@@ -6834,7 +7728,7 @@
68347728 }
68357729
68367730 struct buffer_ref {
6837
- struct ring_buffer *buffer;
7731
+ struct trace_buffer *buffer;
68387732 void *page;
68397733 int cpu;
68407734 refcount_t refcount;
....@@ -6871,10 +7765,7 @@
68717765
68727766 /* Pipe buffer operations for a buffer. */
68737767 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6874
- .can_merge = 0,
6875
- .confirm = generic_pipe_buf_confirm,
68767768 .release = buffer_pipe_buf_release,
6877
- .steal = generic_pipe_buf_nosteal,
68787769 .get = buffer_pipe_buf_get,
68797770 };
68807771
....@@ -6930,7 +7821,7 @@
69307821
69317822 again:
69327823 trace_access_lock(iter->cpu_file);
6933
- entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7824
+ entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
69347825
69357826 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
69367827 struct page *page;
....@@ -6943,7 +7834,7 @@
69437834 }
69447835
69457836 refcount_set(&ref->refcount, 1);
6946
- ref->buffer = iter->trace_buffer->buffer;
7837
+ ref->buffer = iter->array_buffer->buffer;
69477838 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
69487839 if (IS_ERR(ref->page)) {
69497840 ret = PTR_ERR(ref->page);
....@@ -6971,7 +7862,7 @@
69717862 spd.nr_pages++;
69727863 *ppos += PAGE_SIZE;
69737864
6974
- entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7865
+ entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
69757866 }
69767867
69777868 trace_access_unlock(iter->cpu_file);
....@@ -6986,7 +7877,7 @@
69867877 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
69877878 goto out;
69887879
6989
- ret = wait_on_pipe(iter, true);
7880
+ ret = wait_on_pipe(iter, iter->tr->buffer_percent);
69907881 if (ret)
69917882 goto out;
69927883
....@@ -7015,7 +7906,7 @@
70157906 {
70167907 struct inode *inode = file_inode(filp);
70177908 struct trace_array *tr = inode->i_private;
7018
- struct trace_buffer *trace_buf = &tr->trace_buffer;
7909
+ struct array_buffer *trace_buf = &tr->array_buffer;
70197910 int cpu = tracing_get_cpu(inode);
70207911 struct trace_seq *s;
70217912 unsigned long cnt;
....@@ -7086,14 +7977,23 @@
70867977 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
70877978 size_t cnt, loff_t *ppos)
70887979 {
7089
- unsigned long *p = filp->private_data;
7090
- char buf[64]; /* Not too big for a shallow stack */
7980
+ ssize_t ret;
7981
+ char *buf;
70917982 int r;
70927983
7093
- r = scnprintf(buf, 63, "%ld", *p);
7094
- buf[r++] = '\n';
7984
+ /* 256 should be plenty to hold the amount needed */
7985
+ buf = kmalloc(256, GFP_KERNEL);
7986
+ if (!buf)
7987
+ return -ENOMEM;
70957988
7096
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7989
+ r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7990
+ ftrace_update_tot_cnt,
7991
+ ftrace_number_of_pages,
7992
+ ftrace_number_of_groups);
7993
+
7994
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7995
+ kfree(buf);
7996
+ return ret;
70977997 }
70987998
70997999 static const struct file_operations tracing_dyn_info_fops = {
....@@ -7287,7 +8187,7 @@
72878187
72888188 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
72898189
7290
- WARN_ONCE(!tr->percpu_dir,
8190
+ MEM_FAIL(!tr->percpu_dir,
72918191 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
72928192
72938193 return tr->percpu_dir;
....@@ -7396,12 +8296,33 @@
73968296 return cnt;
73978297 }
73988298
8299
+static int tracing_open_options(struct inode *inode, struct file *filp)
8300
+{
8301
+ struct trace_option_dentry *topt = inode->i_private;
8302
+ int ret;
8303
+
8304
+ ret = tracing_check_open_get_tr(topt->tr);
8305
+ if (ret)
8306
+ return ret;
8307
+
8308
+ filp->private_data = inode->i_private;
8309
+ return 0;
8310
+}
8311
+
8312
+static int tracing_release_options(struct inode *inode, struct file *file)
8313
+{
8314
+ struct trace_option_dentry *topt = file->private_data;
8315
+
8316
+ trace_array_put(topt->tr);
8317
+ return 0;
8318
+}
73998319
74008320 static const struct file_operations trace_options_fops = {
7401
- .open = tracing_open_generic,
8321
+ .open = tracing_open_options,
74028322 .read = trace_options_read,
74038323 .write = trace_options_write,
74048324 .llseek = generic_file_llseek,
8325
+ .release = tracing_release_options,
74058326 };
74068327
74078328 /*
....@@ -7608,7 +8529,7 @@
76088529 for (cnt = 0; opts[cnt].name; cnt++) {
76098530 create_trace_option_file(tr, &topts[cnt], flags,
76108531 &opts[cnt]);
7611
- WARN_ONCE(topts[cnt].entry == NULL,
8532
+ MEM_FAIL(topts[cnt].entry == NULL,
76128533 "Failed to create trace option: %s",
76138534 opts[cnt].name);
76148535 }
....@@ -7665,7 +8586,7 @@
76658586 size_t cnt, loff_t *ppos)
76668587 {
76678588 struct trace_array *tr = filp->private_data;
7668
- struct ring_buffer *buffer = tr->trace_buffer.buffer;
8589
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
76698590 unsigned long val;
76708591 int ret;
76718592
....@@ -7702,13 +8623,57 @@
77028623 .llseek = default_llseek,
77038624 };
77048625
7705
-struct dentry *trace_instance_dir;
8626
+static ssize_t
8627
+buffer_percent_read(struct file *filp, char __user *ubuf,
8628
+ size_t cnt, loff_t *ppos)
8629
+{
8630
+ struct trace_array *tr = filp->private_data;
8631
+ char buf[64];
8632
+ int r;
8633
+
8634
+ r = tr->buffer_percent;
8635
+ r = sprintf(buf, "%d\n", r);
8636
+
8637
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8638
+}
8639
+
8640
+static ssize_t
8641
+buffer_percent_write(struct file *filp, const char __user *ubuf,
8642
+ size_t cnt, loff_t *ppos)
8643
+{
8644
+ struct trace_array *tr = filp->private_data;
8645
+ unsigned long val;
8646
+ int ret;
8647
+
8648
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8649
+ if (ret)
8650
+ return ret;
8651
+
8652
+ if (val > 100)
8653
+ return -EINVAL;
8654
+
8655
+ tr->buffer_percent = val;
8656
+
8657
+ (*ppos)++;
8658
+
8659
+ return cnt;
8660
+}
8661
+
8662
+static const struct file_operations buffer_percent_fops = {
8663
+ .open = tracing_open_generic_tr,
8664
+ .read = buffer_percent_read,
8665
+ .write = buffer_percent_write,
8666
+ .release = tracing_release_generic_tr,
8667
+ .llseek = default_llseek,
8668
+};
8669
+
8670
+static struct dentry *trace_instance_dir;
77068671
77078672 static void
77088673 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
77098674
77108675 static int
7711
-allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8676
+allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
77128677 {
77138678 enum ring_buffer_flags rb_flags;
77148679
....@@ -7728,8 +8693,8 @@
77288693 }
77298694
77308695 /* Allocate the first page for all buffers */
7731
- set_buffer_entries(&tr->trace_buffer,
7732
- ring_buffer_size(tr->trace_buffer.buffer, 0));
8696
+ set_buffer_entries(&tr->array_buffer,
8697
+ ring_buffer_size(tr->array_buffer.buffer, 0));
77338698
77348699 return 0;
77358700 }
....@@ -7738,18 +8703,18 @@
77388703 {
77398704 int ret;
77408705
7741
- ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8706
+ ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
77428707 if (ret)
77438708 return ret;
77448709
77458710 #ifdef CONFIG_TRACER_MAX_TRACE
77468711 ret = allocate_trace_buffer(tr, &tr->max_buffer,
77478712 allocate_snapshot ? size : 1);
7748
- if (WARN_ON(ret)) {
7749
- ring_buffer_free(tr->trace_buffer.buffer);
7750
- tr->trace_buffer.buffer = NULL;
7751
- free_percpu(tr->trace_buffer.data);
7752
- tr->trace_buffer.data = NULL;
8713
+ if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8714
+ ring_buffer_free(tr->array_buffer.buffer);
8715
+ tr->array_buffer.buffer = NULL;
8716
+ free_percpu(tr->array_buffer.data);
8717
+ tr->array_buffer.data = NULL;
77538718 return -ENOMEM;
77548719 }
77558720 tr->allocated_snapshot = allocate_snapshot;
....@@ -7761,22 +8726,10 @@
77618726 allocate_snapshot = false;
77628727 #endif
77638728
7764
- /*
7765
- * Because of some magic with the way alloc_percpu() works on
7766
- * x86_64, we need to synchronize the pgd of all the tables,
7767
- * otherwise the trace events that happen in x86_64 page fault
7768
- * handlers can't cope with accessing the chance that a
7769
- * alloc_percpu()'d memory might be touched in the page fault trace
7770
- * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
7771
- * calls in tracing, because something might get triggered within a
7772
- * page fault trace event!
7773
- */
7774
- vmalloc_sync_mappings();
7775
-
77768729 return 0;
77778730 }
77788731
7779
-static void free_trace_buffer(struct trace_buffer *buf)
8732
+static void free_trace_buffer(struct array_buffer *buf)
77808733 {
77818734 if (buf->buffer) {
77828735 ring_buffer_free(buf->buffer);
....@@ -7791,7 +8744,7 @@
77918744 if (!tr)
77928745 return;
77938746
7794
- free_trace_buffer(&tr->trace_buffer);
8747
+ free_trace_buffer(&tr->array_buffer);
77958748
77968749 #ifdef CONFIG_TRACER_MAX_TRACE
77978750 free_trace_buffer(&tr->max_buffer);
....@@ -7818,28 +8771,68 @@
78188771 static void update_tracer_options(struct trace_array *tr)
78198772 {
78208773 mutex_lock(&trace_types_lock);
8774
+ tracer_options_updated = true;
78218775 __update_tracer_options(tr);
78228776 mutex_unlock(&trace_types_lock);
78238777 }
78248778
7825
-static int instance_mkdir(const char *name)
8779
+/* Must have trace_types_lock held */
8780
+struct trace_array *trace_array_find(const char *instance)
8781
+{
8782
+ struct trace_array *tr, *found = NULL;
8783
+
8784
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8785
+ if (tr->name && strcmp(tr->name, instance) == 0) {
8786
+ found = tr;
8787
+ break;
8788
+ }
8789
+ }
8790
+
8791
+ return found;
8792
+}
8793
+
8794
+struct trace_array *trace_array_find_get(const char *instance)
8795
+{
8796
+ struct trace_array *tr;
8797
+
8798
+ mutex_lock(&trace_types_lock);
8799
+ tr = trace_array_find(instance);
8800
+ if (tr)
8801
+ tr->ref++;
8802
+ mutex_unlock(&trace_types_lock);
8803
+
8804
+ return tr;
8805
+}
8806
+
8807
+static int trace_array_create_dir(struct trace_array *tr)
8808
+{
8809
+ int ret;
8810
+
8811
+ tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8812
+ if (!tr->dir)
8813
+ return -EINVAL;
8814
+
8815
+ ret = event_trace_add_tracer(tr->dir, tr);
8816
+ if (ret) {
8817
+ tracefs_remove(tr->dir);
8818
+ return ret;
8819
+ }
8820
+
8821
+ init_tracer_tracefs(tr, tr->dir);
8822
+ __update_tracer_options(tr);
8823
+
8824
+ return ret;
8825
+}
8826
+
8827
+static struct trace_array *trace_array_create(const char *name)
78268828 {
78278829 struct trace_array *tr;
78288830 int ret;
78298831
7830
- mutex_lock(&event_mutex);
7831
- mutex_lock(&trace_types_lock);
7832
-
7833
- ret = -EEXIST;
7834
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7835
- if (tr->name && strcmp(tr->name, name) == 0)
7836
- goto out_unlock;
7837
- }
7838
-
78398832 ret = -ENOMEM;
78408833 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
78418834 if (!tr)
7842
- goto out_unlock;
8835
+ return ERR_PTR(ret);
78438836
78448837 tr->name = kstrdup(name, GFP_KERNEL);
78458838 if (!tr->name)
....@@ -7861,70 +8854,112 @@
78618854 INIT_LIST_HEAD(&tr->systems);
78628855 INIT_LIST_HEAD(&tr->events);
78638856 INIT_LIST_HEAD(&tr->hist_vars);
8857
+ INIT_LIST_HEAD(&tr->err_log);
78648858
78658859 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
78668860 goto out_free_tr;
78678861
7868
- tr->dir = tracefs_create_dir(name, trace_instance_dir);
7869
- if (!tr->dir)
8862
+ if (ftrace_allocate_ftrace_ops(tr) < 0)
78708863 goto out_free_tr;
7871
-
7872
- ret = event_trace_add_tracer(tr->dir, tr);
7873
- if (ret) {
7874
- tracefs_remove_recursive(tr->dir);
7875
- goto out_free_tr;
7876
- }
78778864
78788865 ftrace_init_trace_array(tr);
78798866
7880
- init_tracer_tracefs(tr, tr->dir);
78818867 init_trace_flags_index(tr);
7882
- __update_tracer_options(tr);
8868
+
8869
+ if (trace_instance_dir) {
8870
+ ret = trace_array_create_dir(tr);
8871
+ if (ret)
8872
+ goto out_free_tr;
8873
+ } else
8874
+ __trace_early_add_events(tr);
78838875
78848876 list_add(&tr->list, &ftrace_trace_arrays);
78858877
7886
- mutex_unlock(&trace_types_lock);
7887
- mutex_unlock(&event_mutex);
8878
+ tr->ref++;
78888879
7889
- return 0;
8880
+ return tr;
78908881
78918882 out_free_tr:
8883
+ ftrace_free_ftrace_ops(tr);
78928884 free_trace_buffers(tr);
78938885 free_cpumask_var(tr->tracing_cpumask);
78948886 kfree(tr->name);
78958887 kfree(tr);
78968888
7897
- out_unlock:
7898
- mutex_unlock(&trace_types_lock);
7899
- mutex_unlock(&event_mutex);
7900
-
7901
- return ret;
7902
-
8889
+ return ERR_PTR(ret);
79038890 }
79048891
7905
-static int instance_rmdir(const char *name)
8892
+static int instance_mkdir(const char *name)
79068893 {
79078894 struct trace_array *tr;
7908
- int found = 0;
79098895 int ret;
7910
- int i;
79118896
79128897 mutex_lock(&event_mutex);
79138898 mutex_lock(&trace_types_lock);
79148899
7915
- ret = -ENODEV;
7916
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7917
- if (tr->name && strcmp(tr->name, name) == 0) {
7918
- found = 1;
7919
- break;
7920
- }
7921
- }
7922
- if (!found)
8900
+ ret = -EEXIST;
8901
+ if (trace_array_find(name))
79238902 goto out_unlock;
79248903
7925
- ret = -EBUSY;
7926
- if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7927
- goto out_unlock;
8904
+ tr = trace_array_create(name);
8905
+
8906
+ ret = PTR_ERR_OR_ZERO(tr);
8907
+
8908
+out_unlock:
8909
+ mutex_unlock(&trace_types_lock);
8910
+ mutex_unlock(&event_mutex);
8911
+ return ret;
8912
+}
8913
+
8914
+/**
8915
+ * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8916
+ * @name: The name of the trace array to be looked up/created.
8917
+ *
8918
+ * Returns pointer to trace array with given name.
8919
+ * NULL, if it cannot be created.
8920
+ *
8921
+ * NOTE: This function increments the reference counter associated with the
8922
+ * trace array returned. This makes sure it cannot be freed while in use.
8923
+ * Use trace_array_put() once the trace array is no longer needed.
8924
+ * If the trace_array is to be freed, trace_array_destroy() needs to
8925
+ * be called after the trace_array_put(), or simply let user space delete
8926
+ * it from the tracefs instances directory. But until the
8927
+ * trace_array_put() is called, user space can not delete it.
8928
+ *
8929
+ */
8930
+struct trace_array *trace_array_get_by_name(const char *name)
8931
+{
8932
+ struct trace_array *tr;
8933
+
8934
+ mutex_lock(&event_mutex);
8935
+ mutex_lock(&trace_types_lock);
8936
+
8937
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8938
+ if (tr->name && strcmp(tr->name, name) == 0)
8939
+ goto out_unlock;
8940
+ }
8941
+
8942
+ tr = trace_array_create(name);
8943
+
8944
+ if (IS_ERR(tr))
8945
+ tr = NULL;
8946
+out_unlock:
8947
+ if (tr)
8948
+ tr->ref++;
8949
+
8950
+ mutex_unlock(&trace_types_lock);
8951
+ mutex_unlock(&event_mutex);
8952
+ return tr;
8953
+}
8954
+EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8955
+
8956
+static int __remove_instance(struct trace_array *tr)
8957
+{
8958
+ int i;
8959
+
8960
+ /* Reference counter for a newly created trace array = 1. */
8961
+ if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8962
+ return -EBUSY;
79288963
79298964 list_del(&tr->list);
79308965
....@@ -7939,8 +8974,9 @@
79398974 event_trace_del_tracer(tr);
79408975 ftrace_clear_pids(tr);
79418976 ftrace_destroy_function_files(tr);
7942
- tracefs_remove_recursive(tr->dir);
8977
+ tracefs_remove(tr->dir);
79438978 free_trace_buffers(tr);
8979
+ clear_tracing_err_log(tr);
79448980
79458981 for (i = 0; i < tr->nr_topts; i++) {
79468982 kfree(tr->topts[i].topts);
....@@ -7951,9 +8987,50 @@
79518987 kfree(tr->name);
79528988 kfree(tr);
79538989
7954
- ret = 0;
8990
+ return 0;
8991
+}
79558992
7956
- out_unlock:
8993
+int trace_array_destroy(struct trace_array *this_tr)
8994
+{
8995
+ struct trace_array *tr;
8996
+ int ret;
8997
+
8998
+ if (!this_tr)
8999
+ return -EINVAL;
9000
+
9001
+ mutex_lock(&event_mutex);
9002
+ mutex_lock(&trace_types_lock);
9003
+
9004
+ ret = -ENODEV;
9005
+
9006
+ /* Making sure trace array exists before destroying it. */
9007
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9008
+ if (tr == this_tr) {
9009
+ ret = __remove_instance(tr);
9010
+ break;
9011
+ }
9012
+ }
9013
+
9014
+ mutex_unlock(&trace_types_lock);
9015
+ mutex_unlock(&event_mutex);
9016
+
9017
+ return ret;
9018
+}
9019
+EXPORT_SYMBOL_GPL(trace_array_destroy);
9020
+
9021
+static int instance_rmdir(const char *name)
9022
+{
9023
+ struct trace_array *tr;
9024
+ int ret;
9025
+
9026
+ mutex_lock(&event_mutex);
9027
+ mutex_lock(&trace_types_lock);
9028
+
9029
+ ret = -ENODEV;
9030
+ tr = trace_array_find(name);
9031
+ if (tr)
9032
+ ret = __remove_instance(tr);
9033
+
79579034 mutex_unlock(&trace_types_lock);
79589035 mutex_unlock(&event_mutex);
79599036
....@@ -7962,11 +9039,27 @@
79629039
79639040 static __init void create_trace_instances(struct dentry *d_tracer)
79649041 {
9042
+ struct trace_array *tr;
9043
+
79659044 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
79669045 instance_mkdir,
79679046 instance_rmdir);
7968
- if (WARN_ON(!trace_instance_dir))
9047
+ if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
79699048 return;
9049
+
9050
+ mutex_lock(&event_mutex);
9051
+ mutex_lock(&trace_types_lock);
9052
+
9053
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9054
+ if (!tr->name)
9055
+ continue;
9056
+ if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9057
+ "Failed to create instance directory\n"))
9058
+ break;
9059
+ }
9060
+
9061
+ mutex_unlock(&trace_types_lock);
9062
+ mutex_unlock(&event_mutex);
79709063 }
79719064
79729065 static void
....@@ -8023,20 +9116,27 @@
80239116 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
80249117 &trace_time_stamp_mode_fops);
80259118
9119
+ tr->buffer_percent = 50;
9120
+
9121
+ trace_create_file("buffer_percent", 0444, d_tracer,
9122
+ tr, &buffer_percent_fops);
9123
+
80269124 create_trace_options_dir(tr);
80279125
80289126 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8029
- trace_create_file("tracing_max_latency", 0644, d_tracer,
8030
- &tr->max_latency, &tracing_max_lat_fops);
9127
+ trace_create_maxlat_file(tr, d_tracer);
80319128 #endif
80329129
80339130 if (ftrace_create_function_files(tr, d_tracer))
8034
- WARN(1, "Could not allocate function filter files");
9131
+ MEM_FAIL(1, "Could not allocate function filter files");
80359132
80369133 #ifdef CONFIG_TRACER_SNAPSHOT
80379134 trace_create_file("snapshot", 0644, d_tracer,
80389135 tr, &snapshot_fops);
80399136 #endif
9137
+
9138
+ trace_create_file("error_log", 0644, d_tracer,
9139
+ tr, &tracing_err_log_fops);
80409140
80419141 for_each_tracing_cpu(cpu)
80429142 tracing_init_tracefs_percpu(tr, cpu);
....@@ -8044,6 +9144,7 @@
80449144 ftrace_init_tracefs(tr, d_tracer);
80459145 }
80469146
9147
+#ifndef CONFIG_TRACEFS_DISABLE_AUTOMOUNT
80479148 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
80489149 {
80499150 struct vfsmount *mnt;
....@@ -8065,6 +9166,7 @@
80659166
80669167 return mnt;
80679168 }
9169
+#endif
80689170
80699171 /**
80709172 * tracing_init_dentry - initialize top level trace array
....@@ -8073,19 +9175,23 @@
80739175 * directory. It is called via fs_initcall() by any of the boot up code
80749176 * and expects to return the dentry of the top level tracing directory.
80759177 */
8076
-struct dentry *tracing_init_dentry(void)
9178
+int tracing_init_dentry(void)
80779179 {
80789180 struct trace_array *tr = &global_trace;
80799181
9182
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
9183
+ pr_warn("Tracing disabled due to lockdown\n");
9184
+ return -EPERM;
9185
+ }
9186
+
80809187 /* The top level trace array uses NULL as parent */
80819188 if (tr->dir)
8082
- return NULL;
9189
+ return 0;
80839190
8084
- if (WARN_ON(!tracefs_initialized()) ||
8085
- (IS_ENABLED(CONFIG_DEBUG_FS) &&
8086
- WARN_ON(!debugfs_initialized())))
8087
- return ERR_PTR(-ENODEV);
9191
+ if (WARN_ON(!tracefs_initialized()))
9192
+ return -ENODEV;
80889193
9194
+#ifndef CONFIG_TRACEFS_DISABLE_AUTOMOUNT
80899195 /*
80909196 * As there may still be users that expect the tracing
80919197 * files to exist in debugfs/tracing, we must automount
....@@ -8094,12 +9200,11 @@
80949200 */
80959201 tr->dir = debugfs_create_automount("tracing", NULL,
80969202 trace_automount, NULL);
8097
- if (!tr->dir) {
8098
- pr_warn_once("Could not create debugfs directory 'tracing'\n");
8099
- return ERR_PTR(-ENOMEM);
8100
- }
9203
+#else
9204
+ tr->dir = ERR_PTR(-ENODEV);
9205
+#endif
81019206
8102
- return NULL;
9207
+ return 0;
81039208 }
81049209
81059210 extern struct trace_eval_map *__start_ftrace_eval_maps[];
....@@ -8175,7 +9280,7 @@
81759280 break;
81769281 }
81779282
8178
- return 0;
9283
+ return NOTIFY_OK;
81799284 }
81809285
81819286 static struct notifier_block trace_module_nb = {
....@@ -8186,48 +9291,48 @@
81869291
81879292 static __init int tracer_init_tracefs(void)
81889293 {
8189
- struct dentry *d_tracer;
9294
+ int ret;
81909295
81919296 trace_access_lock_init();
81929297
8193
- d_tracer = tracing_init_dentry();
8194
- if (IS_ERR(d_tracer))
9298
+ ret = tracing_init_dentry();
9299
+ if (ret)
81959300 return 0;
81969301
81979302 event_trace_init();
81989303
8199
- init_tracer_tracefs(&global_trace, d_tracer);
8200
- ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9304
+ init_tracer_tracefs(&global_trace, NULL);
9305
+ ftrace_init_tracefs_toplevel(&global_trace, NULL);
82019306
8202
- trace_create_file("tracing_thresh", 0644, d_tracer,
9307
+ trace_create_file("tracing_thresh", 0644, NULL,
82039308 &global_trace, &tracing_thresh_fops);
82049309
8205
- trace_create_file("README", 0444, d_tracer,
9310
+ trace_create_file("README", 0444, NULL,
82069311 NULL, &tracing_readme_fops);
82079312
8208
- trace_create_file("saved_cmdlines", 0444, d_tracer,
9313
+ trace_create_file("saved_cmdlines", 0444, NULL,
82099314 NULL, &tracing_saved_cmdlines_fops);
82109315
8211
- trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9316
+ trace_create_file("saved_cmdlines_size", 0644, NULL,
82129317 NULL, &tracing_saved_cmdlines_size_fops);
82139318
8214
- trace_create_file("saved_tgids", 0444, d_tracer,
9319
+ trace_create_file("saved_tgids", 0444, NULL,
82159320 NULL, &tracing_saved_tgids_fops);
82169321
82179322 trace_eval_init();
82189323
8219
- trace_create_eval_file(d_tracer);
9324
+ trace_create_eval_file(NULL);
82209325
82219326 #ifdef CONFIG_MODULES
82229327 register_module_notifier(&trace_module_nb);
82239328 #endif
82249329
82259330 #ifdef CONFIG_DYNAMIC_FTRACE
8226
- trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8227
- &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
9331
+ trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9332
+ NULL, &tracing_dyn_info_fops);
82289333 #endif
82299334
8230
- create_trace_instances(d_tracer);
9335
+ create_trace_instances(NULL);
82319336
82329337 update_tracer_options(&global_trace);
82339338
....@@ -8237,8 +9342,17 @@
82379342 static int trace_panic_handler(struct notifier_block *this,
82389343 unsigned long event, void *unused)
82399344 {
9345
+ bool ftrace_check = false;
9346
+
9347
+ trace_android_vh_ftrace_oops_enter(&ftrace_check);
9348
+
9349
+ if (ftrace_check)
9350
+ return NOTIFY_OK;
9351
+
82409352 if (ftrace_dump_on_oops)
82419353 ftrace_dump(ftrace_dump_on_oops);
9354
+
9355
+ trace_android_vh_ftrace_oops_exit(&ftrace_check);
82429356 return NOTIFY_OK;
82439357 }
82449358
....@@ -8252,6 +9366,13 @@
82529366 unsigned long val,
82539367 void *data)
82549368 {
9369
+ bool ftrace_check = false;
9370
+
9371
+ trace_android_vh_ftrace_oops_enter(&ftrace_check);
9372
+
9373
+ if (ftrace_check)
9374
+ return NOTIFY_OK;
9375
+
82559376 switch (val) {
82569377 case DIE_OOPS:
82579378 if (ftrace_dump_on_oops)
....@@ -8260,6 +9381,8 @@
82609381 default:
82619382 break;
82629383 }
9384
+
9385
+ trace_android_vh_ftrace_oops_exit(&ftrace_check);
82639386 return NOTIFY_OK;
82649387 }
82659388
....@@ -8284,6 +9407,8 @@
82849407 void
82859408 trace_printk_seq(struct trace_seq *s)
82869409 {
9410
+ bool dump_printk = true;
9411
+
82879412 /* Probably should print a warning here. */
82889413 if (s->seq.len >= TRACE_MAX_PRINT)
82899414 s->seq.len = TRACE_MAX_PRINT;
....@@ -8299,7 +9424,9 @@
82999424 /* should be zero ended, but we are paranoid. */
83009425 s->buffer[s->seq.len] = 0;
83019426
8302
- printk(KERN_TRACE "%s", s->buffer);
9427
+ trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
9428
+ if (dump_printk)
9429
+ printk(KERN_TRACE "%s", s->buffer);
83039430
83049431 trace_seq_init(s);
83059432 }
....@@ -8309,13 +9436,13 @@
83099436 iter->tr = &global_trace;
83109437 iter->trace = iter->tr->current_trace;
83119438 iter->cpu_file = RING_BUFFER_ALL_CPUS;
8312
- iter->trace_buffer = &global_trace.trace_buffer;
9439
+ iter->array_buffer = &global_trace.array_buffer;
83139440
83149441 if (iter->trace && iter->trace->open)
83159442 iter->trace->open(iter);
83169443
83179444 /* Annotate start of buffers if we had overruns */
8318
- if (ring_buffer_overruns(iter->trace_buffer->buffer))
9445
+ if (ring_buffer_overruns(iter->array_buffer->buffer))
83199446 iter->iter_flags |= TRACE_FILE_ANNOTATE;
83209447
83219448 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
....@@ -8332,6 +9459,8 @@
83329459 unsigned int old_userobj;
83339460 unsigned long flags;
83349461 int cnt = 0, cpu;
9462
+ bool ftrace_check = false;
9463
+ unsigned long size;
83359464
83369465 /* Only allow one dump user at a time. */
83379466 if (atomic_inc_return(&dump_running) != 1) {
....@@ -8354,15 +9483,23 @@
83549483
83559484 /* Simulate the iterator */
83569485 trace_init_global_iter(&iter);
9486
+ /* Can not use kmalloc for iter.temp */
9487
+ iter.temp = static_temp_buf;
9488
+ iter.temp_size = STATIC_TEMP_BUF_SIZE;
83579489
83589490 for_each_tracing_cpu(cpu) {
8359
- atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9491
+ atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9492
+ size = ring_buffer_size(iter.array_buffer->buffer, cpu);
9493
+ trace_android_vh_ftrace_size_check(size, &ftrace_check);
83609494 }
83619495
83629496 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
83639497
83649498 /* don't look at user memory in panic mode */
83659499 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9500
+
9501
+ if (ftrace_check)
9502
+ goto out_enable;
83669503
83679504 switch (oops_dump_mode) {
83689505 case DUMP_ALL:
....@@ -8387,13 +9524,14 @@
83879524 }
83889525
83899526 /*
8390
- * We need to stop all tracing on all CPUS to read the
9527
+ * We need to stop all tracing on all CPUS to read
83919528 * the next buffer. This is a bit expensive, but is
83929529 * not done often. We fill all what we can read,
83939530 * and then release the locks again.
83949531 */
83959532
83969533 while (!trace_empty(&iter)) {
9534
+ ftrace_check = true;
83979535
83989536 if (!cnt)
83999537 printk(KERN_TRACE "---------------------------------\n");
....@@ -8401,7 +9539,9 @@
84019539 cnt++;
84029540
84039541 trace_iterator_reset(&iter);
8404
- iter.iter_flags |= TRACE_FILE_LAT_FMT;
9542
+ trace_android_vh_ftrace_format_check(&ftrace_check);
9543
+ if (ftrace_check)
9544
+ iter.iter_flags |= TRACE_FILE_LAT_FMT;
84059545
84069546 if (trace_find_next_entry_inc(&iter) != NULL) {
84079547 int ret;
....@@ -8424,7 +9564,7 @@
84249564 tr->trace_flags |= old_userobj;
84259565
84269566 for_each_tracing_cpu(cpu) {
8427
- atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9567
+ atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
84289568 }
84299569 atomic_dec(&dump_running);
84309570 printk_nmi_direct_exit();
....@@ -8523,8 +9663,14 @@
85239663 int ring_buf_size;
85249664 int ret = -ENOMEM;
85259665
9666
+
9667
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
9668
+ pr_warn("Tracing disabled due to lockdown\n");
9669
+ return -EPERM;
9670
+ }
9671
+
85269672 /*
8527
- * Make sure we don't accidently add more trace options
9673
+ * Make sure we don't accidentally add more trace options
85289674 * than we have bits for.
85299675 */
85309676 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
....@@ -8553,7 +9699,7 @@
85539699
85549700 /*
85559701 * The prepare callbacks allocates some memory for the ring buffer. We
8556
- * don't free the buffer if the if the CPU goes down. If we were to free
9702
+ * don't free the buffer if the CPU goes down. If we were to free
85579703 * the buffer, then the user would lose any trace that was in the
85589704 * buffer. The memory will be removed once the "instance" is removed.
85599705 */
....@@ -8573,8 +9719,7 @@
85739719
85749720 /* TODO: make the number of buffers hot pluggable with CPUS */
85759721 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8576
- printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8577
- WARN_ON(1);
9722
+ MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
85789723 goto out_free_savedcmd;
85799724 }
85809725
....@@ -8619,6 +9764,7 @@
86199764 INIT_LIST_HEAD(&global_trace.systems);
86209765 INIT_LIST_HEAD(&global_trace.events);
86219766 INIT_LIST_HEAD(&global_trace.hist_vars);
9767
+ INIT_LIST_HEAD(&global_trace.err_log);
86229768 list_add(&global_trace.list, &ftrace_trace_arrays);
86239769
86249770 apply_trace_boot_options();
....@@ -8646,12 +9792,15 @@
86469792 if (tracepoint_printk) {
86479793 tracepoint_print_iter =
86489794 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8649
- if (WARN_ON(!tracepoint_print_iter))
9795
+ if (MEM_FAIL(!tracepoint_print_iter,
9796
+ "Failed to allocate trace iterator\n"))
86509797 tracepoint_printk = 0;
86519798 else
86529799 static_key_enable(&tracepoint_printk_key.key);
86539800 }
86549801 tracer_alloc_buffers();
9802
+
9803
+ init_events();
86559804 }
86569805
86579806 void __init trace_init(void)
....@@ -8686,6 +9835,11 @@
86869835 {
86879836 /* sched_clock_stable() is determined in late_initcall */
86889837 if (!trace_boot_clock && !sched_clock_stable()) {
9838
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
9839
+ pr_warn("Can not set tracing clock due to lockdown\n");
9840
+ return -EPERM;
9841
+ }
9842
+
86899843 printk(KERN_WARNING
86909844 "Unstable clock detected, switching default tracing clock to \"global\"\n"
86919845 "If you want to keep using the local clock, then add:\n"