hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/tools/perf/builtin-sched.c
....@@ -1,11 +1,12 @@
11 // SPDX-License-Identifier: GPL-2.0
22 #include "builtin.h"
33 #include "perf.h"
4
+#include "perf-sys.h"
45
5
-#include "util/util.h"
6
+#include "util/cpumap.h"
67 #include "util/evlist.h"
7
-#include "util/cache.h"
88 #include "util/evsel.h"
9
+#include "util/evsel_fprintf.h"
910 #include "util/symbol.h"
1011 #include "util/thread.h"
1112 #include "util/header.h"
....@@ -15,16 +16,20 @@
1516 #include "util/thread_map.h"
1617 #include "util/color.h"
1718 #include "util/stat.h"
19
+#include "util/string2.h"
1820 #include "util/callchain.h"
1921 #include "util/time-utils.h"
2022
23
+#include <subcmd/pager.h>
2124 #include <subcmd/parse-options.h>
2225 #include "util/trace-event.h"
2326
2427 #include "util/debug.h"
28
+#include "util/event.h"
2529
2630 #include <linux/kernel.h>
2731 #include <linux/log2.h>
32
+#include <linux/zalloc.h>
2833 #include <sys/prctl.h>
2934 #include <sys/resource.h>
3035 #include <inttypes.h>
....@@ -34,15 +39,20 @@
3439 #include <pthread.h>
3540 #include <math.h>
3641 #include <api/fs/fs.h>
42
+#include <perf/cpumap.h>
3743 #include <linux/time64.h>
44
+#include <linux/err.h>
3845
39
-#include "sane_ctype.h"
46
+#include <linux/ctype.h>
4047
4148 #define PR_SET_NAME 15 /* Set process name */
4249 #define MAX_CPUS 4096
4350 #define COMM_LEN 20
4451 #define SYM_LEN 129
4552 #define MAX_PID 1024000
53
+
54
+static const char *cpu_list;
55
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
4656
4757 struct sched_atom;
4858
....@@ -120,7 +130,8 @@
120130 struct thread *thread;
121131 struct rb_node node;
122132 u64 max_lat;
123
- u64 max_lat_at;
133
+ u64 max_lat_start;
134
+ u64 max_lat_end;
124135 u64 total_lat;
125136 u64 nb_atoms;
126137 u64 total_runtime;
....@@ -132,13 +143,13 @@
132143 struct perf_sched;
133144
134145 struct trace_sched_handler {
135
- int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel,
146
+ int (*switch_event)(struct perf_sched *sched, struct evsel *evsel,
136147 struct perf_sample *sample, struct machine *machine);
137148
138
- int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
149
+ int (*runtime_event)(struct perf_sched *sched, struct evsel *evsel,
139150 struct perf_sample *sample, struct machine *machine);
140151
141
- int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
152
+ int (*wakeup_event)(struct perf_sched *sched, struct evsel *evsel,
142153 struct perf_sample *sample, struct machine *machine);
143154
144155 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */
....@@ -146,7 +157,7 @@
146157 struct machine *machine);
147158
148159 int (*migrate_task_event)(struct perf_sched *sched,
149
- struct perf_evsel *evsel,
160
+ struct evsel *evsel,
150161 struct perf_sample *sample,
151162 struct machine *machine);
152163 };
....@@ -158,11 +169,11 @@
158169 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
159170 int *comp_cpus;
160171 bool comp;
161
- struct thread_map *color_pids;
172
+ struct perf_thread_map *color_pids;
162173 const char *color_pids_str;
163
- struct cpu_map *color_cpus;
174
+ struct perf_cpu_map *color_cpus;
164175 const char *color_cpus_str;
165
- struct cpu_map *cpus;
176
+ struct perf_cpu_map *cpus;
166177 const char *cpus_str;
167178 };
168179
....@@ -213,7 +224,7 @@
213224 u64 all_runtime;
214225 u64 all_count;
215226 u64 cpu_last_switched[MAX_CPUS];
216
- struct rb_root atom_root, sorted_atom_root, merged_atom_root;
227
+ struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root;
217228 struct list_head sort_list, cmp_pid;
218229 bool force;
219230 bool skip_merge;
....@@ -271,7 +282,7 @@
271282 struct idle_thread_runtime {
272283 struct thread_runtime tr;
273284 struct thread *last_thread;
274
- struct rb_root sorted_root;
285
+ struct rb_root_cached sorted_root;
275286 struct callchain_root callchain;
276287 struct callchain_cursor cursor;
277288 };
....@@ -798,11 +809,11 @@
798809
799810 static int
800811 replay_wakeup_event(struct perf_sched *sched,
801
- struct perf_evsel *evsel, struct perf_sample *sample,
812
+ struct evsel *evsel, struct perf_sample *sample,
802813 struct machine *machine __maybe_unused)
803814 {
804
- const char *comm = perf_evsel__strval(evsel, sample, "comm");
805
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
815
+ const char *comm = evsel__strval(evsel, sample, "comm");
816
+ const u32 pid = evsel__intval(evsel, sample, "pid");
806817 struct task_desc *waker, *wakee;
807818
808819 if (verbose > 0) {
....@@ -819,15 +830,15 @@
819830 }
820831
821832 static int replay_switch_event(struct perf_sched *sched,
822
- struct perf_evsel *evsel,
833
+ struct evsel *evsel,
823834 struct perf_sample *sample,
824835 struct machine *machine __maybe_unused)
825836 {
826
- const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"),
827
- *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
828
- const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
829
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
830
- const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
837
+ const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"),
838
+ *next_comm = evsel__strval(evsel, sample, "next_comm");
839
+ const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
840
+ next_pid = evsel__intval(evsel, sample, "next_pid");
841
+ const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
831842 struct task_desc *prev, __maybe_unused *next;
832843 u64 timestamp0, timestamp = sample->time;
833844 int cpu = sample->cpu;
....@@ -950,10 +961,10 @@
950961 }
951962
952963 static struct work_atoms *
953
-thread_atoms_search(struct rb_root *root, struct thread *thread,
964
+thread_atoms_search(struct rb_root_cached *root, struct thread *thread,
954965 struct list_head *sort_list)
955966 {
956
- struct rb_node *node = root->rb_node;
967
+ struct rb_node *node = root->rb_root.rb_node;
957968 struct work_atoms key = { .thread = thread };
958969
959970 while (node) {
....@@ -976,10 +987,11 @@
976987 }
977988
978989 static void
979
-__thread_latency_insert(struct rb_root *root, struct work_atoms *data,
990
+__thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data,
980991 struct list_head *sort_list)
981992 {
982
- struct rb_node **new = &(root->rb_node), *parent = NULL;
993
+ struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
994
+ bool leftmost = true;
983995
984996 while (*new) {
985997 struct work_atoms *this;
....@@ -992,12 +1004,14 @@
9921004
9931005 if (cmp > 0)
9941006 new = &((*new)->rb_left);
995
- else
1007
+ else {
9961008 new = &((*new)->rb_right);
1009
+ leftmost = false;
1010
+ }
9971011 }
9981012
9991013 rb_link_node(&data->node, parent, new);
1000
- rb_insert_color(&data->node, root);
1014
+ rb_insert_color_cached(&data->node, root, leftmost);
10011015 }
10021016
10031017 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
....@@ -1083,19 +1097,20 @@
10831097 atoms->total_lat += delta;
10841098 if (delta > atoms->max_lat) {
10851099 atoms->max_lat = delta;
1086
- atoms->max_lat_at = timestamp;
1100
+ atoms->max_lat_start = atom->wake_up_time;
1101
+ atoms->max_lat_end = timestamp;
10871102 }
10881103 atoms->nb_atoms++;
10891104 }
10901105
10911106 static int latency_switch_event(struct perf_sched *sched,
1092
- struct perf_evsel *evsel,
1107
+ struct evsel *evsel,
10931108 struct perf_sample *sample,
10941109 struct machine *machine)
10951110 {
1096
- const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
1097
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
1098
- const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
1111
+ const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
1112
+ next_pid = evsel__intval(evsel, sample, "next_pid");
1113
+ const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
10991114 struct work_atoms *out_events, *in_events;
11001115 struct thread *sched_out, *sched_in;
11011116 u64 timestamp0, timestamp = sample->time;
....@@ -1159,12 +1174,12 @@
11591174 }
11601175
11611176 static int latency_runtime_event(struct perf_sched *sched,
1162
- struct perf_evsel *evsel,
1177
+ struct evsel *evsel,
11631178 struct perf_sample *sample,
11641179 struct machine *machine)
11651180 {
1166
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
1167
- const u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1181
+ const u32 pid = evsel__intval(evsel, sample, "pid");
1182
+ const u64 runtime = evsel__intval(evsel, sample, "runtime");
11681183 struct thread *thread = machine__findnew_thread(machine, -1, pid);
11691184 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
11701185 u64 timestamp = sample->time;
....@@ -1194,11 +1209,11 @@
11941209 }
11951210
11961211 static int latency_wakeup_event(struct perf_sched *sched,
1197
- struct perf_evsel *evsel,
1212
+ struct evsel *evsel,
11981213 struct perf_sample *sample,
11991214 struct machine *machine)
12001215 {
1201
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
1216
+ const u32 pid = evsel__intval(evsel, sample, "pid");
12021217 struct work_atoms *atoms;
12031218 struct work_atom *atom;
12041219 struct thread *wakee;
....@@ -1255,11 +1270,11 @@
12551270 }
12561271
12571272 static int latency_migrate_task_event(struct perf_sched *sched,
1258
- struct perf_evsel *evsel,
1273
+ struct evsel *evsel,
12591274 struct perf_sample *sample,
12601275 struct machine *machine)
12611276 {
1262
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
1277
+ const u32 pid = evsel__intval(evsel, sample, "pid");
12631278 u64 timestamp = sample->time;
12641279 struct work_atoms *atoms;
12651280 struct work_atom *atom;
....@@ -1309,7 +1324,7 @@
13091324 int i;
13101325 int ret;
13111326 u64 avg;
1312
- char max_lat_at[32];
1327
+ char max_lat_start[32], max_lat_end[32];
13131328
13141329 if (!work_list->nb_atoms)
13151330 return;
....@@ -1331,13 +1346,14 @@
13311346 printf(" ");
13321347
13331348 avg = work_list->total_lat / work_list->nb_atoms;
1334
- timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
1349
+ timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
1350
+ timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
13351351
1336
- printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
1352
+ printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
13371353 (double)work_list->total_runtime / NSEC_PER_MSEC,
13381354 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
13391355 (double)work_list->max_lat / NSEC_PER_MSEC,
1340
- max_lat_at);
1356
+ max_lat_start, max_lat_end);
13411357 }
13421358
13431359 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
....@@ -1447,15 +1463,15 @@
14471463 static void perf_sched__sort_lat(struct perf_sched *sched)
14481464 {
14491465 struct rb_node *node;
1450
- struct rb_root *root = &sched->atom_root;
1466
+ struct rb_root_cached *root = &sched->atom_root;
14511467 again:
14521468 for (;;) {
14531469 struct work_atoms *data;
1454
- node = rb_first(root);
1470
+ node = rb_first_cached(root);
14551471 if (!node)
14561472 break;
14571473
1458
- rb_erase(node, root);
1474
+ rb_erase_cached(node, root);
14591475 data = rb_entry(node, struct work_atoms, node);
14601476 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
14611477 }
....@@ -1466,7 +1482,7 @@
14661482 }
14671483
14681484 static int process_sched_wakeup_event(struct perf_tool *tool,
1469
- struct perf_evsel *evsel,
1485
+ struct evsel *evsel,
14701486 struct perf_sample *sample,
14711487 struct machine *machine)
14721488 {
....@@ -1510,10 +1526,10 @@
15101526 return thread;
15111527 }
15121528
1513
-static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1529
+static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
15141530 struct perf_sample *sample, struct machine *machine)
15151531 {
1516
- const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
1532
+ const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
15171533 struct thread *sched_in;
15181534 struct thread_runtime *tr;
15191535 int new_shortname;
....@@ -1651,14 +1667,14 @@
16511667 }
16521668
16531669 static int process_sched_switch_event(struct perf_tool *tool,
1654
- struct perf_evsel *evsel,
1670
+ struct evsel *evsel,
16551671 struct perf_sample *sample,
16561672 struct machine *machine)
16571673 {
16581674 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
16591675 int this_cpu = sample->cpu, err = 0;
1660
- u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
1661
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
1676
+ u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
1677
+ next_pid = evsel__intval(evsel, sample, "next_pid");
16621678
16631679 if (sched->curr_pid[this_cpu] != (u32)-1) {
16641680 /*
....@@ -1677,7 +1693,7 @@
16771693 }
16781694
16791695 static int process_sched_runtime_event(struct perf_tool *tool,
1680
- struct perf_evsel *evsel,
1696
+ struct evsel *evsel,
16811697 struct perf_sample *sample,
16821698 struct machine *machine)
16831699 {
....@@ -1707,7 +1723,7 @@
17071723 }
17081724
17091725 static int process_sched_migrate_task_event(struct perf_tool *tool,
1710
- struct perf_evsel *evsel,
1726
+ struct evsel *evsel,
17111727 struct perf_sample *sample,
17121728 struct machine *machine)
17131729 {
....@@ -1720,14 +1736,14 @@
17201736 }
17211737
17221738 typedef int (*tracepoint_handler)(struct perf_tool *tool,
1723
- struct perf_evsel *evsel,
1739
+ struct evsel *evsel,
17241740 struct perf_sample *sample,
17251741 struct machine *machine);
17261742
17271743 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused,
17281744 union perf_event *event __maybe_unused,
17291745 struct perf_sample *sample,
1730
- struct perf_evsel *evsel,
1746
+ struct evsel *evsel,
17311747 struct machine *machine)
17321748 {
17331749 int err = 0;
....@@ -1773,7 +1789,7 @@
17731789
17741790 static int perf_sched__read_events(struct perf_sched *sched)
17751791 {
1776
- const struct perf_evsel_str_handler handlers[] = {
1792
+ const struct evsel_str_handler handlers[] = {
17771793 { "sched:sched_switch", process_sched_switch_event, },
17781794 { "sched:sched_stat_runtime", process_sched_runtime_event, },
17791795 { "sched:sched_wakeup", process_sched_wakeup_event, },
....@@ -1782,18 +1798,16 @@
17821798 };
17831799 struct perf_session *session;
17841800 struct perf_data data = {
1785
- .file = {
1786
- .path = input_name,
1787
- },
1788
- .mode = PERF_DATA_MODE_READ,
1789
- .force = sched->force,
1801
+ .path = input_name,
1802
+ .mode = PERF_DATA_MODE_READ,
1803
+ .force = sched->force,
17901804 };
17911805 int rc = -1;
17921806
17931807 session = perf_session__new(&data, false, &sched->tool);
1794
- if (session == NULL) {
1795
- pr_debug("No Memory for session\n");
1796
- return -1;
1808
+ if (IS_ERR(session)) {
1809
+ pr_debug("Error creating perf session");
1810
+ return PTR_ERR(session);
17971811 }
17981812
17991813 symbol__init(&session->header.env);
....@@ -1837,7 +1851,7 @@
18371851 * returns runtime data for event, allocating memory for it the
18381852 * first time it is used.
18391853 */
1840
-static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel)
1854
+static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel)
18411855 {
18421856 struct evsel_runtime *r = evsel->priv;
18431857
....@@ -1852,10 +1866,9 @@
18521866 /*
18531867 * save last time event was seen per cpu
18541868 */
1855
-static void perf_evsel__save_time(struct perf_evsel *evsel,
1856
- u64 timestamp, u32 cpu)
1869
+static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu)
18571870 {
1858
- struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
1871
+ struct evsel_runtime *r = evsel__get_runtime(evsel);
18591872
18601873 if (r == NULL)
18611874 return;
....@@ -1879,9 +1892,9 @@
18791892 }
18801893
18811894 /* returns last time this event was seen on the given cpu */
1882
-static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu)
1895
+static u64 evsel__get_time(struct evsel *evsel, u32 cpu)
18831896 {
1884
- struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
1897
+ struct evsel_runtime *r = evsel__get_runtime(evsel);
18851898
18861899 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
18871900 return 0;
....@@ -1986,19 +1999,22 @@
19861999 }
19872000
19882001 static void timehist_print_sample(struct perf_sched *sched,
1989
- struct perf_evsel *evsel,
2002
+ struct evsel *evsel,
19902003 struct perf_sample *sample,
19912004 struct addr_location *al,
19922005 struct thread *thread,
19932006 u64 t, int state)
19942007 {
19952008 struct thread_runtime *tr = thread__priv(thread);
1996
- const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
1997
- const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
2009
+ const char *next_comm = evsel__strval(evsel, sample, "next_comm");
2010
+ const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
19982011 u32 max_cpus = sched->max_cpu + 1;
19992012 char tstr[64];
20002013 char nstr[30];
20012014 u64 wait_time;
2015
+
2016
+ if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
2017
+ return;
20022018
20032019 timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
20042020 printf("%15s [%04d] ", tstr, sample->cpu);
....@@ -2048,7 +2064,7 @@
20482064 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
20492065 EVSEL__PRINT_CALLCHAIN_ARROW |
20502066 EVSEL__PRINT_SKIP_IGNORED,
2051
- &callchain_cursor, stdout);
2067
+ &callchain_cursor, symbol_conf.bt_stop_list, stdout);
20522068
20532069 out:
20542070 printf("\n");
....@@ -2119,18 +2135,18 @@
21192135 }
21202136
21212137 static bool is_idle_sample(struct perf_sample *sample,
2122
- struct perf_evsel *evsel)
2138
+ struct evsel *evsel)
21232139 {
21242140 /* pid 0 == swapper == idle task */
2125
- if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0)
2126
- return perf_evsel__intval(evsel, sample, "prev_pid") == 0;
2141
+ if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0)
2142
+ return evsel__intval(evsel, sample, "prev_pid") == 0;
21272143
21282144 return sample->pid == 0;
21292145 }
21302146
21312147 static void save_task_callchain(struct perf_sched *sched,
21322148 struct perf_sample *sample,
2133
- struct perf_evsel *evsel,
2149
+ struct evsel *evsel,
21342150 struct machine *machine)
21352151 {
21362152 struct callchain_cursor *cursor = &callchain_cursor;
....@@ -2164,7 +2180,7 @@
21642180 if (node == NULL)
21652181 break;
21662182
2167
- sym = node->sym;
2183
+ sym = node->ms.sym;
21682184 if (sym) {
21692185 if (!strcmp(sym->name, "schedule") ||
21702186 !strcmp(sym->name, "__schedule") ||
....@@ -2284,7 +2300,7 @@
22842300 static struct thread *timehist_get_thread(struct perf_sched *sched,
22852301 struct perf_sample *sample,
22862302 struct machine *machine,
2287
- struct perf_evsel *evsel)
2303
+ struct evsel *evsel)
22882304 {
22892305 struct thread *thread;
22902306
....@@ -2320,7 +2336,7 @@
23202336 itr->last_thread = thread;
23212337
23222338 /* copy task callchain when entering to idle */
2323
- if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
2339
+ if (evsel__intval(evsel, sample, "next_pid") == 0)
23242340 save_idle_callchain(sched, itr, sample);
23252341 }
23262342 }
....@@ -2330,7 +2346,7 @@
23302346
23312347 static bool timehist_skip_sample(struct perf_sched *sched,
23322348 struct thread *thread,
2333
- struct perf_evsel *evsel,
2349
+ struct evsel *evsel,
23342350 struct perf_sample *sample)
23352351 {
23362352 bool rc = false;
....@@ -2341,10 +2357,10 @@
23412357 }
23422358
23432359 if (sched->idle_hist) {
2344
- if (strcmp(perf_evsel__name(evsel), "sched:sched_switch"))
2360
+ if (strcmp(evsel__name(evsel), "sched:sched_switch"))
23452361 rc = true;
2346
- else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 &&
2347
- perf_evsel__intval(evsel, sample, "next_pid") != 0)
2362
+ else if (evsel__intval(evsel, sample, "prev_pid") != 0 &&
2363
+ evsel__intval(evsel, sample, "next_pid") != 0)
23482364 rc = true;
23492365 }
23502366
....@@ -2352,7 +2368,7 @@
23522368 }
23532369
23542370 static void timehist_print_wakeup_event(struct perf_sched *sched,
2355
- struct perf_evsel *evsel,
2371
+ struct evsel *evsel,
23562372 struct perf_sample *sample,
23572373 struct machine *machine,
23582374 struct thread *awakened)
....@@ -2385,9 +2401,18 @@
23852401 printf("\n");
23862402 }
23872403
2404
+static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused,
2405
+ union perf_event *event __maybe_unused,
2406
+ struct evsel *evsel __maybe_unused,
2407
+ struct perf_sample *sample __maybe_unused,
2408
+ struct machine *machine __maybe_unused)
2409
+{
2410
+ return 0;
2411
+}
2412
+
23882413 static int timehist_sched_wakeup_event(struct perf_tool *tool,
23892414 union perf_event *event __maybe_unused,
2390
- struct perf_evsel *evsel,
2415
+ struct evsel *evsel,
23912416 struct perf_sample *sample,
23922417 struct machine *machine)
23932418 {
....@@ -2395,7 +2420,7 @@
23952420 struct thread *thread;
23962421 struct thread_runtime *tr = NULL;
23972422 /* want pid of awakened task not pid in sample */
2398
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
2423
+ const u32 pid = evsel__intval(evsel, sample, "pid");
23992424
24002425 thread = machine__findnew_thread(machine, 0, pid);
24012426 if (thread == NULL)
....@@ -2417,7 +2442,7 @@
24172442 }
24182443
24192444 static void timehist_print_migration_event(struct perf_sched *sched,
2420
- struct perf_evsel *evsel,
2445
+ struct evsel *evsel,
24212446 struct perf_sample *sample,
24222447 struct machine *machine,
24232448 struct thread *migrated)
....@@ -2431,8 +2456,8 @@
24312456 return;
24322457
24332458 max_cpus = sched->max_cpu + 1;
2434
- ocpu = perf_evsel__intval(evsel, sample, "orig_cpu");
2435
- dcpu = perf_evsel__intval(evsel, sample, "dest_cpu");
2459
+ ocpu = evsel__intval(evsel, sample, "orig_cpu");
2460
+ dcpu = evsel__intval(evsel, sample, "dest_cpu");
24362461
24372462 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
24382463 if (thread == NULL)
....@@ -2471,7 +2496,7 @@
24712496
24722497 static int timehist_migrate_task_event(struct perf_tool *tool,
24732498 union perf_event *event __maybe_unused,
2474
- struct perf_evsel *evsel,
2499
+ struct evsel *evsel,
24752500 struct perf_sample *sample,
24762501 struct machine *machine)
24772502 {
....@@ -2479,7 +2504,7 @@
24792504 struct thread *thread;
24802505 struct thread_runtime *tr = NULL;
24812506 /* want pid of migrated task not pid in sample */
2482
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
2507
+ const u32 pid = evsel__intval(evsel, sample, "pid");
24832508
24842509 thread = machine__findnew_thread(machine, 0, pid);
24852510 if (thread == NULL)
....@@ -2499,7 +2524,7 @@
24992524
25002525 static int timehist_sched_change_event(struct perf_tool *tool,
25012526 union perf_event *event,
2502
- struct perf_evsel *evsel,
2527
+ struct evsel *evsel,
25032528 struct perf_sample *sample,
25042529 struct machine *machine)
25052530 {
....@@ -2510,8 +2535,7 @@
25102535 struct thread_runtime *tr = NULL;
25112536 u64 tprev, t = sample->time;
25122537 int rc = 0;
2513
- int state = perf_evsel__intval(evsel, sample, "prev_state");
2514
-
2538
+ int state = evsel__intval(evsel, sample, "prev_state");
25152539
25162540 if (machine__resolve(machine, &al, sample) < 0) {
25172541 pr_err("problem processing %d event. skipping it\n",
....@@ -2535,7 +2559,7 @@
25352559 goto out;
25362560 }
25372561
2538
- tprev = perf_evsel__get_time(evsel, sample->cpu);
2562
+ tprev = evsel__get_time(evsel, sample->cpu);
25392563
25402564 /*
25412565 * If start time given:
....@@ -2563,7 +2587,8 @@
25632587 }
25642588
25652589 if (!sched->idle_hist || thread->tid == 0) {
2566
- timehist_update_runtime_stats(tr, t, tprev);
2590
+ if (!cpu_list || test_bit(sample->cpu, cpu_bitmap))
2591
+ timehist_update_runtime_stats(tr, t, tprev);
25672592
25682593 if (sched->idle_hist) {
25692594 struct idle_thread_runtime *itr = (void *)tr;
....@@ -2618,14 +2643,14 @@
26182643 tr->ready_to_run = 0;
26192644 }
26202645
2621
- perf_evsel__save_time(evsel, sample->time, sample->cpu);
2646
+ evsel__save_time(evsel, sample->time, sample->cpu);
26222647
26232648 return rc;
26242649 }
26252650
26262651 static int timehist_sched_switch_event(struct perf_tool *tool,
26272652 union perf_event *event,
2628
- struct perf_evsel *evsel,
2653
+ struct evsel *evsel,
26292654 struct perf_sample *sample,
26302655 struct machine *machine __maybe_unused)
26312656 {
....@@ -2641,7 +2666,7 @@
26412666
26422667 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
26432668 printf("%15s ", tstr);
2644
- printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu);
2669
+ printf("lost %" PRI_lu64 " events on cpu %d\n", event->lost.lost, sample->cpu);
26452670
26462671 return 0;
26472672 }
....@@ -2762,12 +2787,12 @@
27622787 return ret;
27632788 }
27642789
2765
-static size_t timehist_print_idlehist_callchain(struct rb_root *root)
2790
+static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root)
27662791 {
27672792 size_t ret = 0;
27682793 FILE *fp = stdout;
27692794 struct callchain_node *chain;
2770
- struct rb_node *rb_node = rb_first(root);
2795
+ struct rb_node *rb_node = rb_first_cached(root);
27712796
27722797 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains");
27732798 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line,
....@@ -2836,6 +2861,9 @@
28362861
28372862 printf("\nIdle stats:\n");
28382863 for (i = 0; i < idle_max_cpu; ++i) {
2864
+ if (cpu_list && !test_bit(i, cpu_bitmap))
2865
+ continue;
2866
+
28392867 t = idle_threads[i];
28402868 if (!t)
28412869 continue;
....@@ -2868,7 +2896,7 @@
28682896 if (itr == NULL)
28692897 continue;
28702898
2871
- callchain_param.sort(&itr->sorted_root, &itr->callchain,
2899
+ callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain,
28722900 0, &callchain_param);
28732901
28742902 printf(" CPU %2d:", i);
....@@ -2895,14 +2923,14 @@
28952923
28962924 typedef int (*sched_handler)(struct perf_tool *tool,
28972925 union perf_event *event,
2898
- struct perf_evsel *evsel,
2926
+ struct evsel *evsel,
28992927 struct perf_sample *sample,
29002928 struct machine *machine);
29012929
29022930 static int perf_timehist__process_sample(struct perf_tool *tool,
29032931 union perf_event *event,
29042932 struct perf_sample *sample,
2905
- struct perf_evsel *evsel,
2933
+ struct evsel *evsel,
29062934 struct machine *machine)
29072935 {
29082936 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
....@@ -2922,13 +2950,13 @@
29222950 }
29232951
29242952 static int timehist_check_attr(struct perf_sched *sched,
2925
- struct perf_evlist *evlist)
2953
+ struct evlist *evlist)
29262954 {
2927
- struct perf_evsel *evsel;
2955
+ struct evsel *evsel;
29282956 struct evsel_runtime *er;
29292957
2930
- list_for_each_entry(evsel, &evlist->entries, node) {
2931
- er = perf_evsel__get_runtime(evsel);
2958
+ list_for_each_entry(evsel, &evlist->core.entries, core.node) {
2959
+ er = evsel__get_runtime(evsel);
29322960 if (er == NULL) {
29332961 pr_err("Failed to allocate memory for evsel runtime data\n");
29342962 return -1;
....@@ -2946,24 +2974,23 @@
29462974
29472975 static int perf_sched__timehist(struct perf_sched *sched)
29482976 {
2949
- const struct perf_evsel_str_handler handlers[] = {
2977
+ struct evsel_str_handler handlers[] = {
29502978 { "sched:sched_switch", timehist_sched_switch_event, },
29512979 { "sched:sched_wakeup", timehist_sched_wakeup_event, },
2980
+ { "sched:sched_waking", timehist_sched_wakeup_event, },
29522981 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, },
29532982 };
2954
- const struct perf_evsel_str_handler migrate_handlers[] = {
2983
+ const struct evsel_str_handler migrate_handlers[] = {
29552984 { "sched:sched_migrate_task", timehist_migrate_task_event, },
29562985 };
29572986 struct perf_data data = {
2958
- .file = {
2959
- .path = input_name,
2960
- },
2961
- .mode = PERF_DATA_MODE_READ,
2962
- .force = sched->force,
2987
+ .path = input_name,
2988
+ .mode = PERF_DATA_MODE_READ,
2989
+ .force = sched->force,
29632990 };
29642991
29652992 struct perf_session *session;
2966
- struct perf_evlist *evlist;
2993
+ struct evlist *evlist;
29672994 int err = -1;
29682995
29692996 /*
....@@ -2985,8 +3012,14 @@
29853012 symbol_conf.use_callchain = sched->show_callchain;
29863013
29873014 session = perf_session__new(&data, false, &sched->tool);
2988
- if (session == NULL)
2989
- return -ENOMEM;
3015
+ if (IS_ERR(session))
3016
+ return PTR_ERR(session);
3017
+
3018
+ if (cpu_list) {
3019
+ err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
3020
+ if (err < 0)
3021
+ goto out;
3022
+ }
29903023
29913024 evlist = session->evlist;
29923025
....@@ -3001,6 +3034,11 @@
30013034 goto out;
30023035
30033036 setup_pager();
3037
+
3038
+ /* prefer sched_waking if it is captured */
3039
+ if (perf_evlist__find_tracepoint_by_name(session->evlist,
3040
+ "sched:sched_waking"))
3041
+ handlers[1].handler = timehist_sched_wakeup_ignore;
30043042
30053043 /* setup per-evsel handlers */
30063044 if (perf_session__set_tracepoints_handlers(session, handlers))
....@@ -3074,11 +3112,12 @@
30743112 }
30753113 }
30763114
3077
-static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
3115
+static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data)
30783116 {
3079
- struct rb_node **new = &(root->rb_node), *parent = NULL;
3117
+ struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
30803118 struct work_atoms *this;
30813119 const char *comm = thread__comm_str(data->thread), *this_comm;
3120
+ bool leftmost = true;
30823121
30833122 while (*new) {
30843123 int cmp;
....@@ -3092,6 +3131,7 @@
30923131 new = &((*new)->rb_left);
30933132 } else if (cmp < 0) {
30943133 new = &((*new)->rb_right);
3134
+ leftmost = false;
30953135 } else {
30963136 this->num_merged++;
30973137 this->total_runtime += data->total_runtime;
....@@ -3100,7 +3140,8 @@
31003140 list_splice(&data->work_list, &this->work_list);
31013141 if (this->max_lat < data->max_lat) {
31023142 this->max_lat = data->max_lat;
3103
- this->max_lat_at = data->max_lat_at;
3143
+ this->max_lat_start = data->max_lat_start;
3144
+ this->max_lat_end = data->max_lat_end;
31043145 }
31053146 zfree(&data);
31063147 return;
....@@ -3109,7 +3150,7 @@
31093150
31103151 data->num_merged++;
31113152 rb_link_node(&data->node, parent, new);
3112
- rb_insert_color(&data->node, root);
3153
+ rb_insert_color_cached(&data->node, root, leftmost);
31133154 }
31143155
31153156 static void perf_sched__merge_lat(struct perf_sched *sched)
....@@ -3120,8 +3161,8 @@
31203161 if (sched->skip_merge)
31213162 return;
31223163
3123
- while ((node = rb_first(&sched->atom_root))) {
3124
- rb_erase(node, &sched->atom_root);
3164
+ while ((node = rb_first_cached(&sched->atom_root))) {
3165
+ rb_erase_cached(node, &sched->atom_root);
31253166 data = rb_entry(node, struct work_atoms, node);
31263167 __merge_work_atoms(&sched->merged_atom_root, data);
31273168 }
....@@ -3139,11 +3180,11 @@
31393180 perf_sched__merge_lat(sched);
31403181 perf_sched__sort_lat(sched);
31413182
3142
- printf("\n -----------------------------------------------------------------------------------------------------------------\n");
3143
- printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
3144
- printf(" -----------------------------------------------------------------------------------------------------------------\n");
3183
+ printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
3184
+ printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
3185
+ printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
31453186
3146
- next = rb_first(&sched->sorted_atom_root);
3187
+ next = rb_first_cached(&sched->sorted_atom_root);
31473188
31483189 while (next) {
31493190 struct work_atoms *work_list;
....@@ -3168,7 +3209,7 @@
31683209
31693210 static int setup_map_cpus(struct perf_sched *sched)
31703211 {
3171
- struct cpu_map *map;
3212
+ struct perf_cpu_map *map;
31723213
31733214 sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
31743215
....@@ -3181,7 +3222,7 @@
31813222 if (!sched->map.cpus_str)
31823223 return 0;
31833224
3184
- map = cpu_map__new(sched->map.cpus_str);
3225
+ map = perf_cpu_map__new(sched->map.cpus_str);
31853226 if (!map) {
31863227 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
31873228 return -1;
....@@ -3193,7 +3234,7 @@
31933234
31943235 static int setup_color_pids(struct perf_sched *sched)
31953236 {
3196
- struct thread_map *map;
3237
+ struct perf_thread_map *map;
31973238
31983239 if (!sched->map.color_pids_str)
31993240 return 0;
....@@ -3210,12 +3251,12 @@
32103251
32113252 static int setup_color_cpus(struct perf_sched *sched)
32123253 {
3213
- struct cpu_map *map;
3254
+ struct perf_cpu_map *map;
32143255
32153256 if (!sched->map.color_cpus_str)
32163257 return 0;
32173258
3218
- map = cpu_map__new(sched->map.color_cpus_str);
3259
+ map = perf_cpu_map__new(sched->map.color_cpus_str);
32193260 if (!map) {
32203261 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
32213262 return -1;
....@@ -3296,6 +3337,16 @@
32963337 sort_dimension__add("pid", &sched->cmp_pid);
32973338 }
32983339
3340
+static bool schedstat_events_exposed(void)
3341
+{
3342
+ /*
3343
+ * Select "sched:sched_stat_wait" event to check
3344
+ * whether schedstat tracepoints are exposed.
3345
+ */
3346
+ return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ?
3347
+ false : true;
3348
+}
3349
+
32993350 static int __cmd_record(int argc, const char **argv)
33003351 {
33013352 unsigned int rec_argc, i, j;
....@@ -3307,17 +3358,33 @@
33073358 "-m", "1024",
33083359 "-c", "1",
33093360 "-e", "sched:sched_switch",
3310
- "-e", "sched:sched_stat_wait",
3311
- "-e", "sched:sched_stat_sleep",
3312
- "-e", "sched:sched_stat_iowait",
33133361 "-e", "sched:sched_stat_runtime",
33143362 "-e", "sched:sched_process_fork",
3315
- "-e", "sched:sched_wakeup",
33163363 "-e", "sched:sched_wakeup_new",
33173364 "-e", "sched:sched_migrate_task",
33183365 };
33193366
3320
- rec_argc = ARRAY_SIZE(record_args) + argc - 1;
3367
+ /*
3368
+ * The tracepoints trace_sched_stat_{wait, sleep, iowait}
3369
+ * are not exposed to user if CONFIG_SCHEDSTATS is not set,
3370
+ * to prevent "perf sched record" execution failure, determine
3371
+ * whether to record schedstat events according to actual situation.
3372
+ */
3373
+ const char * const schedstat_args[] = {
3374
+ "-e", "sched:sched_stat_wait",
3375
+ "-e", "sched:sched_stat_sleep",
3376
+ "-e", "sched:sched_stat_iowait",
3377
+ };
3378
+ unsigned int schedstat_argc = schedstat_events_exposed() ?
3379
+ ARRAY_SIZE(schedstat_args) : 0;
3380
+
3381
+ struct tep_event *waking_event;
3382
+
3383
+ /*
3384
+ * +2 for either "-e", "sched:sched_wakeup" or
3385
+ * "-e", "sched:sched_waking"
3386
+ */
3387
+ rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1;
33213388 rec_argv = calloc(rec_argc + 1, sizeof(char *));
33223389
33233390 if (rec_argv == NULL)
....@@ -3325,6 +3392,16 @@
33253392
33263393 for (i = 0; i < ARRAY_SIZE(record_args); i++)
33273394 rec_argv[i] = strdup(record_args[i]);
3395
+
3396
+ rec_argv[i++] = "-e";
3397
+ waking_event = trace_event__tp_format("sched", "sched_waking");
3398
+ if (!IS_ERR(waking_event))
3399
+ rec_argv[i++] = strdup("sched:sched_waking");
3400
+ else
3401
+ rec_argv[i++] = strdup("sched:sched_wakeup");
3402
+
3403
+ for (j = 0; j < schedstat_argc; j++)
3404
+ rec_argv[i++] = strdup(schedstat_args[j]);
33283405
33293406 for (j = 1; j < (unsigned int)argc; j++, i++)
33303407 rec_argv[i] = argv[j];
....@@ -3336,7 +3413,7 @@
33363413
33373414 int cmd_sched(int argc, const char **argv)
33383415 {
3339
- const char default_sort_order[] = "avg, max, switch, runtime";
3416
+ static const char default_sort_order[] = "avg, max, switch, runtime";
33403417 struct perf_sched sched = {
33413418 .tool = {
33423419 .sample = perf_sched__process_tracepoint_sample,
....@@ -3421,6 +3498,7 @@
34213498 "analyze events only for given process id(s)"),
34223499 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
34233500 "analyze events only for given thread id(s)"),
3501
+ OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
34243502 OPT_PARENT(sched_options)
34253503 };
34263504