hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/kernel/trace/trace_functions_graph.c
....@@ -16,33 +16,6 @@
1616 #include "trace.h"
1717 #include "trace_output.h"
1818
19
-static bool kill_ftrace_graph;
20
-
21
-/**
22
- * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
23
- *
24
- * ftrace_graph_stop() is called when a severe error is detected in
25
- * the function graph tracing. This function is called by the critical
26
- * paths of function graph to keep those paths from doing any more harm.
27
- */
28
-bool ftrace_graph_is_dead(void)
29
-{
30
- return kill_ftrace_graph;
31
-}
32
-
33
-/**
34
- * ftrace_graph_stop - set to permanently disable function graph tracincg
35
- *
36
- * In case of an error int function graph tracing, this is called
37
- * to try to keep function graph tracing from causing any more harm.
38
- * Usually this is pretty severe and this is called to try to at least
39
- * get a warning out to the user.
40
- */
41
-void ftrace_graph_stop(void)
42
-{
43
- kill_ftrace_graph = true;
44
-}
45
-
4619 /* When set, irq functions will be ignored */
4720 static int ftrace_graph_skip_irqs;
4821
....@@ -87,8 +60,12 @@
8760 { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
8861 /* Include sleep time (scheduled out) between entry and return */
8962 { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
63
+
64
+#ifdef CONFIG_FUNCTION_PROFILER
9065 /* Include time within nested functions */
9166 { TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
67
+#endif
68
+
9269 { } /* Empty entry */
9370 };
9471
....@@ -117,258 +94,6 @@
11794 print_graph_duration(struct trace_array *tr, unsigned long long duration,
11895 struct trace_seq *s, u32 flags);
11996
120
-/* Add a function return address to the trace stack on thread info.*/
121
-static int
122
-ftrace_push_return_trace(unsigned long ret, unsigned long func,
123
- unsigned long frame_pointer, unsigned long *retp)
124
-{
125
- unsigned long long calltime;
126
- int index;
127
-
128
- if (unlikely(ftrace_graph_is_dead()))
129
- return -EBUSY;
130
-
131
- if (!current->ret_stack)
132
- return -EBUSY;
133
-
134
- /*
135
- * We must make sure the ret_stack is tested before we read
136
- * anything else.
137
- */
138
- smp_rmb();
139
-
140
- /* The return trace stack is full */
141
- if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
142
- atomic_inc(&current->trace_overrun);
143
- return -EBUSY;
144
- }
145
-
146
- /*
147
- * The curr_ret_stack is an index to ftrace return stack of
148
- * current task. Its value should be in [0, FTRACE_RETFUNC_
149
- * DEPTH) when the function graph tracer is used. To support
150
- * filtering out specific functions, it makes the index
151
- * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
152
- * so when it sees a negative index the ftrace will ignore
153
- * the record. And the index gets recovered when returning
154
- * from the filtered function by adding the FTRACE_NOTRACE_
155
- * DEPTH and then it'll continue to record functions normally.
156
- *
157
- * The curr_ret_stack is initialized to -1 and get increased
158
- * in this function. So it can be less than -1 only if it was
159
- * filtered out via ftrace_graph_notrace_addr() which can be
160
- * set from set_graph_notrace file in tracefs by user.
161
- */
162
- if (current->curr_ret_stack < -1)
163
- return -EBUSY;
164
-
165
- calltime = trace_clock_local();
166
-
167
- index = ++current->curr_ret_stack;
168
- if (ftrace_graph_notrace_addr(func))
169
- current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
170
- barrier();
171
- current->ret_stack[index].ret = ret;
172
- current->ret_stack[index].func = func;
173
- current->ret_stack[index].calltime = calltime;
174
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
175
- current->ret_stack[index].fp = frame_pointer;
176
-#endif
177
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
178
- current->ret_stack[index].retp = retp;
179
-#endif
180
- return 0;
181
-}
182
-
183
-int function_graph_enter(unsigned long ret, unsigned long func,
184
- unsigned long frame_pointer, unsigned long *retp)
185
-{
186
- struct ftrace_graph_ent trace;
187
-
188
- trace.func = func;
189
- trace.depth = ++current->curr_ret_depth;
190
-
191
- if (ftrace_push_return_trace(ret, func,
192
- frame_pointer, retp))
193
- goto out;
194
-
195
- /* Only trace if the calling function expects to */
196
- if (!ftrace_graph_entry(&trace))
197
- goto out_ret;
198
-
199
- return 0;
200
- out_ret:
201
- current->curr_ret_stack--;
202
- out:
203
- current->curr_ret_depth--;
204
- return -EBUSY;
205
-}
206
-
207
-/* Retrieve a function return address to the trace stack on thread info.*/
208
-static void
209
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
210
- unsigned long frame_pointer)
211
-{
212
- int index;
213
-
214
- index = current->curr_ret_stack;
215
-
216
- /*
217
- * A negative index here means that it's just returned from a
218
- * notrace'd function. Recover index to get an original
219
- * return address. See ftrace_push_return_trace().
220
- *
221
- * TODO: Need to check whether the stack gets corrupted.
222
- */
223
- if (index < 0)
224
- index += FTRACE_NOTRACE_DEPTH;
225
-
226
- if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
227
- ftrace_graph_stop();
228
- WARN_ON(1);
229
- /* Might as well panic, otherwise we have no where to go */
230
- *ret = (unsigned long)panic;
231
- return;
232
- }
233
-
234
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
235
- /*
236
- * The arch may choose to record the frame pointer used
237
- * and check it here to make sure that it is what we expect it
238
- * to be. If gcc does not set the place holder of the return
239
- * address in the frame pointer, and does a copy instead, then
240
- * the function graph trace will fail. This test detects this
241
- * case.
242
- *
243
- * Currently, x86_32 with optimize for size (-Os) makes the latest
244
- * gcc do the above.
245
- *
246
- * Note, -mfentry does not use frame pointers, and this test
247
- * is not needed if CC_USING_FENTRY is set.
248
- */
249
- if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
250
- ftrace_graph_stop();
251
- WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
252
- " from func %ps return to %lx\n",
253
- current->ret_stack[index].fp,
254
- frame_pointer,
255
- (void *)current->ret_stack[index].func,
256
- current->ret_stack[index].ret);
257
- *ret = (unsigned long)panic;
258
- return;
259
- }
260
-#endif
261
-
262
- *ret = current->ret_stack[index].ret;
263
- trace->func = current->ret_stack[index].func;
264
- trace->calltime = current->ret_stack[index].calltime;
265
- trace->overrun = atomic_read(&current->trace_overrun);
266
- trace->depth = current->curr_ret_depth--;
267
- /*
268
- * We still want to trace interrupts coming in if
269
- * max_depth is set to 1. Make sure the decrement is
270
- * seen before ftrace_graph_return.
271
- */
272
- barrier();
273
-}
274
-
275
-/*
276
- * Send the trace to the ring-buffer.
277
- * @return the original return address.
278
- */
279
-unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
280
-{
281
- struct ftrace_graph_ret trace;
282
- unsigned long ret;
283
-
284
- ftrace_pop_return_trace(&trace, &ret, frame_pointer);
285
- trace.rettime = trace_clock_local();
286
- ftrace_graph_return(&trace);
287
- /*
288
- * The ftrace_graph_return() may still access the current
289
- * ret_stack structure, we need to make sure the update of
290
- * curr_ret_stack is after that.
291
- */
292
- barrier();
293
- current->curr_ret_stack--;
294
- /*
295
- * The curr_ret_stack can be less than -1 only if it was
296
- * filtered out and it's about to return from the function.
297
- * Recover the index and continue to trace normal functions.
298
- */
299
- if (current->curr_ret_stack < -1) {
300
- current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
301
- return ret;
302
- }
303
-
304
- if (unlikely(!ret)) {
305
- ftrace_graph_stop();
306
- WARN_ON(1);
307
- /* Might as well panic. What else to do? */
308
- ret = (unsigned long)panic;
309
- }
310
-
311
- return ret;
312
-}
313
-
314
-/**
315
- * ftrace_graph_ret_addr - convert a potentially modified stack return address
316
- * to its original value
317
- *
318
- * This function can be called by stack unwinding code to convert a found stack
319
- * return address ('ret') to its original value, in case the function graph
320
- * tracer has modified it to be 'return_to_handler'. If the address hasn't
321
- * been modified, the unchanged value of 'ret' is returned.
322
- *
323
- * 'idx' is a state variable which should be initialized by the caller to zero
324
- * before the first call.
325
- *
326
- * 'retp' is a pointer to the return address on the stack. It's ignored if
327
- * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
328
- */
329
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
330
-unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
331
- unsigned long ret, unsigned long *retp)
332
-{
333
- int index = task->curr_ret_stack;
334
- int i;
335
-
336
- if (ret != (unsigned long)return_to_handler)
337
- return ret;
338
-
339
- if (index < -1)
340
- index += FTRACE_NOTRACE_DEPTH;
341
-
342
- if (index < 0)
343
- return ret;
344
-
345
- for (i = 0; i <= index; i++)
346
- if (task->ret_stack[i].retp == retp)
347
- return task->ret_stack[i].ret;
348
-
349
- return ret;
350
-}
351
-#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
352
-unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
353
- unsigned long ret, unsigned long *retp)
354
-{
355
- int task_idx;
356
-
357
- if (ret != (unsigned long)return_to_handler)
358
- return ret;
359
-
360
- task_idx = task->curr_ret_stack;
361
-
362
- if (!task->ret_stack || task_idx < *idx)
363
- return ret;
364
-
365
- task_idx -= *idx;
366
- (*idx)++;
367
-
368
- return task->ret_stack[task_idx].ret;
369
-}
370
-#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
371
-
37297 int __trace_graph_entry(struct trace_array *tr,
37398 struct ftrace_graph_ent *trace,
37499 unsigned long flags,
....@@ -376,7 +101,7 @@
376101 {
377102 struct trace_event_call *call = &event_funcgraph_entry;
378103 struct ring_buffer_event *event;
379
- struct ring_buffer *buffer = tr->trace_buffer.buffer;
104
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
380105 struct ftrace_graph_ent_entry *entry;
381106
382107 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
....@@ -409,6 +134,25 @@
409134 int cpu;
410135 int pc;
411136
137
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
138
+ return 0;
139
+
140
+ /*
141
+ * Do not trace a function if it's filtered by set_graph_notrace.
142
+ * Make the index of ret stack negative to indicate that it should
143
+ * ignore further functions. But it needs its own ret stack entry
144
+ * to recover the original index in order to continue tracing after
145
+ * returning from the function.
146
+ */
147
+ if (ftrace_graph_notrace_addr(trace->func)) {
148
+ trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT);
149
+ /*
150
+ * Need to return 1 to have the return called
151
+ * that will clear the NOTRACE bit.
152
+ */
153
+ return 1;
154
+ }
155
+
412156 if (!ftrace_trace_task(tr))
413157 return 0;
414158
....@@ -419,16 +163,6 @@
419163 return 0;
420164
421165 /*
422
- * Do not trace a function if it's filtered by set_graph_notrace.
423
- * Make the index of ret stack negative to indicate that it should
424
- * ignore further functions. But it needs its own ret stack entry
425
- * to recover the original index in order to continue tracing after
426
- * returning from the function.
427
- */
428
- if (ftrace_graph_notrace_addr(trace->func))
429
- return 1;
430
-
431
- /*
432166 * Stop here if tracing_threshold is set. We only write function return
433167 * events to the ring buffer.
434168 */
....@@ -437,7 +171,7 @@
437171
438172 local_irq_save(flags);
439173 cpu = raw_smp_processor_id();
440
- data = per_cpu_ptr(tr->trace_buffer.data, cpu);
174
+ data = per_cpu_ptr(tr->array_buffer.data, cpu);
441175 disabled = atomic_inc_return(&data->disabled);
442176 if (likely(disabled == 1)) {
443177 pc = preempt_count();
....@@ -487,7 +221,7 @@
487221 {
488222 struct trace_event_call *call = &event_funcgraph_exit;
489223 struct ring_buffer_event *event;
490
- struct ring_buffer *buffer = tr->trace_buffer.buffer;
224
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
491225 struct ftrace_graph_ret_entry *entry;
492226
493227 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
....@@ -511,9 +245,14 @@
511245
512246 ftrace_graph_addr_finish(trace);
513247
248
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
249
+ trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
250
+ return;
251
+ }
252
+
514253 local_irq_save(flags);
515254 cpu = raw_smp_processor_id();
516
- data = per_cpu_ptr(tr->trace_buffer.data, cpu);
255
+ data = per_cpu_ptr(tr->array_buffer.data, cpu);
517256 disabled = atomic_inc_return(&data->disabled);
518257 if (likely(disabled == 1)) {
519258 pc = preempt_count();
....@@ -536,6 +275,11 @@
536275 {
537276 ftrace_graph_addr_finish(trace);
538277
278
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
279
+ trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
280
+ return;
281
+ }
282
+
539283 if (tracing_thresh &&
540284 (trace->rettime - trace->calltime < tracing_thresh))
541285 return;
....@@ -543,17 +287,25 @@
543287 trace_graph_return(trace);
544288 }
545289
290
+static struct fgraph_ops funcgraph_thresh_ops = {
291
+ .entryfunc = &trace_graph_entry,
292
+ .retfunc = &trace_graph_thresh_return,
293
+};
294
+
295
+static struct fgraph_ops funcgraph_ops = {
296
+ .entryfunc = &trace_graph_entry,
297
+ .retfunc = &trace_graph_return,
298
+};
299
+
546300 static int graph_trace_init(struct trace_array *tr)
547301 {
548302 int ret;
549303
550304 set_graph_array(tr);
551305 if (tracing_thresh)
552
- ret = register_ftrace_graph(&trace_graph_thresh_return,
553
- &trace_graph_entry);
306
+ ret = register_ftrace_graph(&funcgraph_thresh_ops);
554307 else
555
- ret = register_ftrace_graph(&trace_graph_return,
556
- &trace_graph_entry);
308
+ ret = register_ftrace_graph(&funcgraph_ops);
557309 if (ret)
558310 return ret;
559311 tracing_start_cmdline_record();
....@@ -564,7 +316,10 @@
564316 static void graph_trace_reset(struct trace_array *tr)
565317 {
566318 tracing_stop_cmdline_record();
567
- unregister_ftrace_graph();
319
+ if (tracing_thresh)
320
+ unregister_ftrace_graph(&funcgraph_thresh_ops);
321
+ else
322
+ unregister_ftrace_graph(&funcgraph_ops);
568323 }
569324
570325 static int graph_trace_update_thresh(struct trace_array *tr)
....@@ -622,6 +377,7 @@
622377 {
623378 trace_seq_putc(s, ' ');
624379 trace_print_lat_fmt(s, entry);
380
+ trace_seq_puts(s, " | ");
625381 }
626382
627383 /* If the pid changed since the last trace, output this event */
....@@ -688,9 +444,9 @@
688444 * We need to consume the current entry to see
689445 * the next one.
690446 */
691
- ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu,
447
+ ring_buffer_consume(iter->array_buffer->buffer, iter->cpu,
692448 NULL, NULL);
693
- event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu,
449
+ event = ring_buffer_peek(iter->array_buffer->buffer, iter->cpu,
694450 NULL, NULL);
695451 }
696452
....@@ -726,7 +482,7 @@
726482
727483 /* this is a leaf, now advance the iterator */
728484 if (ring_iter)
729
- ring_buffer_read(ring_iter, NULL);
485
+ ring_buffer_iter_advance(ring_iter);
730486
731487 return next;
732488 }
....@@ -740,6 +496,17 @@
740496
741497 trace_seq_printf(s, "%5lu.%06lu | ",
742498 (unsigned long)t, usecs_rem);
499
+}
500
+
501
+static void
502
+print_graph_rel_time(struct trace_iterator *iter, struct trace_seq *s)
503
+{
504
+ unsigned long long usecs;
505
+
506
+ usecs = iter->ts - iter->array_buffer->time_start;
507
+ do_div(usecs, NSEC_PER_USEC);
508
+
509
+ trace_seq_printf(s, "%9llu us | ", usecs);
743510 }
744511
745512 static void
....@@ -758,6 +525,10 @@
758525 /* Absolute time */
759526 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
760527 print_graph_abs_time(iter->ts, s);
528
+
529
+ /* Relative time */
530
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
531
+ print_graph_rel_time(iter, s);
761532
762533 /* Cpu */
763534 if (flags & TRACE_GRAPH_PRINT_CPU)
....@@ -874,10 +645,6 @@
874645
875646 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
876647
877
- /* If a graph tracer ignored set_graph_notrace */
878
- if (call->depth < -1)
879
- call->depth += FTRACE_NOTRACE_DEPTH;
880
-
881648 /*
882649 * Comments display at + 1 to depth. Since
883650 * this is a leaf function, keep the comments
....@@ -919,10 +686,6 @@
919686 if (data) {
920687 struct fgraph_cpu_data *cpu_data;
921688 int cpu = iter->cpu;
922
-
923
- /* If a graph tracer ignored set_graph_notrace */
924
- if (call->depth < -1)
925
- call->depth += FTRACE_NOTRACE_DEPTH;
926689
927690 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
928691 cpu_data->depth = call->depth;
....@@ -974,6 +737,10 @@
974737 /* Absolute time */
975738 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
976739 print_graph_abs_time(iter->ts, s);
740
+
741
+ /* Relative time */
742
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
743
+ print_graph_rel_time(iter, s);
977744
978745 /* Cpu */
979746 if (flags & TRACE_GRAPH_PRINT_CPU)
....@@ -1351,6 +1118,8 @@
13511118
13521119 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
13531120 size += 16;
1121
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
1122
+ size += 16;
13541123 if (flags & TRACE_GRAPH_PRINT_CPU)
13551124 size += 4;
13561125 if (flags & TRACE_GRAPH_PRINT_PROC)
....@@ -1375,12 +1144,14 @@
13751144 seq_putc(s, '#');
13761145 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
13771146 seq_puts(s, " TIME ");
1147
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
1148
+ seq_puts(s, " REL TIME ");
13781149 if (flags & TRACE_GRAPH_PRINT_CPU)
13791150 seq_puts(s, " CPU");
13801151 if (flags & TRACE_GRAPH_PRINT_PROC)
13811152 seq_puts(s, " TASK/PID ");
13821153 if (lat)
1383
- seq_puts(s, "||||");
1154
+ seq_puts(s, "|||| ");
13841155 if (flags & TRACE_GRAPH_PRINT_DURATION)
13851156 seq_puts(s, " DURATION ");
13861157 seq_puts(s, " FUNCTION CALLS\n");
....@@ -1389,12 +1160,14 @@
13891160 seq_putc(s, '#');
13901161 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
13911162 seq_puts(s, " | ");
1163
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
1164
+ seq_puts(s, " | ");
13921165 if (flags & TRACE_GRAPH_PRINT_CPU)
13931166 seq_puts(s, " | ");
13941167 if (flags & TRACE_GRAPH_PRINT_PROC)
13951168 seq_puts(s, " | | ");
13961169 if (lat)
1397
- seq_puts(s, "||||");
1170
+ seq_puts(s, "|||| ");
13981171 if (flags & TRACE_GRAPH_PRINT_DURATION)
13991172 seq_puts(s, " | | ");
14001173 seq_puts(s, " | | | |\n");
....@@ -1563,13 +1336,13 @@
15631336
15641337 static __init int init_graph_tracefs(void)
15651338 {
1566
- struct dentry *d_tracer;
1339
+ int ret;
15671340
1568
- d_tracer = tracing_init_dentry();
1569
- if (IS_ERR(d_tracer))
1341
+ ret = tracing_init_dentry();
1342
+ if (ret)
15701343 return 0;
15711344
1572
- trace_create_file("max_graph_depth", 0644, d_tracer,
1345
+ trace_create_file("max_graph_depth", 0644, NULL,
15731346 NULL, &graph_depth_fops);
15741347
15751348 return 0;