Skip to content

Commit 53be502

Browse files
namhyungacmel
authored andcommitted
perf ftrace: Add 'latency' subcommand
The perf ftrace latency is to get a histogram of function execution time. Users should give a function name using -T option. This is implemented using function_graph tracer with the given function only. And it parses the output to extract the time. $ sudo perf ftrace latency -a -T mutex_lock sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 4596 | ######################## | 1 - 2 us | 1680 | ######### | 2 - 4 us | 1106 | ##### | 4 - 8 us | 546 | ## | 8 - 16 us | 562 | ### | 16 - 32 us | 1 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | Committer testing: Latency for the __handle_mm_fault kernel function, system wide for 1 second, see how one can go from the usual 'perf ftrace' output, now the same as for the 'perf ftrace trace' subcommand, to the new 'perf ftrace latency' subcommand: # perf ftrace -T __handle_mm_fault -a sleep 1 | wc -l 709 # perf ftrace -T __handle_mm_fault -a sleep 1 | wc -l 510 # perf ftrace -T __handle_mm_fault -a sleep 1 | head -20 # tracer: function # # entries-in-buffer/entries-written: 0/0 #P:32 # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | perf-exec-1685104 [007] 90638.894613: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894620: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894622: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894635: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894688: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894702: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894714: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894728: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894740: __handle_mm_fault <-handle_mm_fault perf-exec-1685104 [007] 90638.894751: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894962: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894977: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894983: __handle_mm_fault <-handle_mm_fault sleep-1685104 [007] 90638.894995: __handle_mm_fault <-handle_mm_fault # perf ftrace latency -T __handle_mm_fault -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 125 | ###### | 1 - 2 us | 249 | ############# | 2 - 4 us | 455 | ######################## | 4 - 8 us | 37 | # | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # Signed-off-by: Namhyung Kim <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Athira Jajeev <[email protected]> Cc: Changbin Du <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Song Liu <[email protected]> Cc: Stephane Eranian <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent a9b8ae8 commit 53be502

File tree

1 file changed

+276
-9
lines changed

1 file changed

+276
-9
lines changed

tools/perf/builtin-ftrace.c

Lines changed: 276 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
#include <signal.h>
1414
#include <stdlib.h>
1515
#include <fcntl.h>
16+
#include <math.h>
1617
#include <poll.h>
18+
#include <ctype.h>
1719
#include <linux/capability.h>
1820
#include <linux/string.h>
1921

@@ -702,6 +704,224 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
702704
return (done && !workload_exec_errno) ? 0 : -1;
703705
}
704706

707+
#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */
708+
709+
static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
710+
{
711+
char *p, *q;
712+
char *unit;
713+
double num;
714+
int i;
715+
716+
/* ensure NUL termination */
717+
buf[len] = '\0';
718+
719+
/* handle data line by line */
720+
for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
721+
*q = '\0';
722+
/* move it to the line buffer */
723+
strcat(linebuf, p);
724+
725+
/*
726+
* parse trace output to get function duration like in
727+
*
728+
* # tracer: function_graph
729+
* #
730+
* # CPU DURATION FUNCTION CALLS
731+
* # | | | | | | |
732+
* 1) + 10.291 us | do_filp_open();
733+
* 1) 4.889 us | do_filp_open();
734+
* 1) 6.086 us | do_filp_open();
735+
*
736+
*/
737+
if (linebuf[0] == '#')
738+
goto next;
739+
740+
/* ignore CPU */
741+
p = strchr(linebuf, ')');
742+
if (p == NULL)
743+
p = linebuf;
744+
745+
while (*p && !isdigit(*p) && (*p != '|'))
746+
p++;
747+
748+
/* no duration */
749+
if (*p == '\0' || *p == '|')
750+
goto next;
751+
752+
num = strtod(p, &unit);
753+
if (!unit || strncmp(unit, " us", 3))
754+
goto next;
755+
756+
i = log2(num);
757+
if (i < 0)
758+
i = 0;
759+
if (i >= NUM_BUCKET)
760+
i = NUM_BUCKET - 1;
761+
762+
buckets[i]++;
763+
764+
next:
765+
/* empty the line buffer for the next output */
766+
linebuf[0] = '\0';
767+
}
768+
769+
/* preserve any remaining output (before newline) */
770+
strcat(linebuf, p);
771+
}
772+
773+
static void display_histogram(int buckets[])
774+
{
775+
int i;
776+
int total = 0;
777+
int bar_total = 46; /* to fit in 80 column */
778+
char bar[] = "###############################################";
779+
int bar_len;
780+
781+
for (i = 0; i < NUM_BUCKET; i++)
782+
total += buckets[i];
783+
784+
if (total == 0) {
785+
printf("No data found\n");
786+
return;
787+
}
788+
789+
printf("# %14s | %10s | %-*s |\n",
790+
" DURATION ", "COUNT", bar_total, "GRAPH");
791+
792+
bar_len = buckets[0] * bar_total / total;
793+
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
794+
0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
795+
796+
for (i = 1; i < NUM_BUCKET - 1; i++) {
797+
int start = (1 << (i - 1));
798+
int stop = 1 << i;
799+
const char *unit = "us";
800+
801+
if (start >= 1024) {
802+
start >>= 10;
803+
stop >>= 10;
804+
unit = "ms";
805+
}
806+
bar_len = buckets[i] * bar_total / total;
807+
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
808+
start, stop, unit, buckets[i], bar_len, bar,
809+
bar_total - bar_len, "");
810+
}
811+
812+
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
813+
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
814+
1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
815+
bar_total - bar_len, "");
816+
817+
}
818+
819+
static int __cmd_latency(struct perf_ftrace *ftrace)
820+
{
821+
char *trace_file;
822+
int trace_fd;
823+
char buf[4096];
824+
char line[256];
825+
struct pollfd pollfd = {
826+
.events = POLLIN,
827+
};
828+
int buckets[NUM_BUCKET] = { };
829+
830+
if (!(perf_cap__capable(CAP_PERFMON) ||
831+
perf_cap__capable(CAP_SYS_ADMIN))) {
832+
pr_err("ftrace only works for %s!\n",
833+
#ifdef HAVE_LIBCAP_SUPPORT
834+
"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
835+
#else
836+
"root"
837+
#endif
838+
);
839+
return -1;
840+
}
841+
842+
if (reset_tracing_files(ftrace) < 0) {
843+
pr_err("failed to reset ftrace\n");
844+
goto out;
845+
}
846+
847+
/* reset ftrace buffer */
848+
if (write_tracing_file("trace", "0") < 0)
849+
goto out;
850+
851+
if (set_tracing_options(ftrace) < 0)
852+
goto out_reset;
853+
854+
/* force to use the function_graph tracer to track duration */
855+
if (write_tracing_file("current_tracer", "function_graph") < 0) {
856+
pr_err("failed to set current_tracer to function_graph\n");
857+
goto out_reset;
858+
}
859+
860+
trace_file = get_tracing_file("trace_pipe");
861+
if (!trace_file) {
862+
pr_err("failed to open trace_pipe\n");
863+
goto out_reset;
864+
}
865+
866+
trace_fd = open(trace_file, O_RDONLY);
867+
868+
put_tracing_file(trace_file);
869+
870+
if (trace_fd < 0) {
871+
pr_err("failed to open trace_pipe\n");
872+
goto out_reset;
873+
}
874+
875+
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
876+
pollfd.fd = trace_fd;
877+
878+
if (write_tracing_file("tracing_on", "1") < 0) {
879+
pr_err("can't enable tracing\n");
880+
goto out_close_fd;
881+
}
882+
883+
evlist__start_workload(ftrace->evlist);
884+
885+
line[0] = '\0';
886+
while (!done) {
887+
if (poll(&pollfd, 1, -1) < 0)
888+
break;
889+
890+
if (pollfd.revents & POLLIN) {
891+
int n = read(trace_fd, buf, sizeof(buf) - 1);
892+
if (n < 0)
893+
break;
894+
895+
make_histogram(buckets, buf, n, line);
896+
}
897+
}
898+
899+
write_tracing_file("tracing_on", "0");
900+
901+
if (workload_exec_errno) {
902+
const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
903+
pr_err("workload failed: %s\n", emsg);
904+
goto out_close_fd;
905+
}
906+
907+
/* read remaining buffer contents */
908+
while (true) {
909+
int n = read(trace_fd, buf, sizeof(buf) - 1);
910+
if (n <= 0)
911+
break;
912+
make_histogram(buckets, buf, n, line);
913+
}
914+
915+
display_histogram(buckets);
916+
917+
out_close_fd:
918+
close(trace_fd);
919+
out_reset:
920+
reset_tracing_files(ftrace);
921+
out:
922+
return (done && !workload_exec_errno) ? 0 : -1;
923+
}
924+
705925
static int perf_ftrace_config(const char *var, const char *value, void *cb)
706926
{
707927
struct perf_ftrace *ftrace = cb;
@@ -864,6 +1084,12 @@ static int parse_graph_tracer_opts(const struct option *opt,
8641084
return 0;
8651085
}
8661086

1087+
enum perf_ftrace_subcommand {
1088+
PERF_FTRACE_NONE,
1089+
PERF_FTRACE_TRACE,
1090+
PERF_FTRACE_LATENCY,
1091+
};
1092+
8671093
int cmd_ftrace(int argc, const char **argv)
8681094
{
8691095
int ret;
@@ -915,14 +1141,21 @@ int cmd_ftrace(int argc, const char **argv)
9151141
"Number of milliseconds to wait before starting tracing after program start"),
9161142
OPT_PARENT(common_options),
9171143
};
1144+
const struct option latency_options[] = {
1145+
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
1146+
"Show latency of given function", parse_filter_func),
1147+
OPT_PARENT(common_options),
1148+
};
1149+
const struct option *options = ftrace_options;
9181150

9191151
const char * const ftrace_usage[] = {
9201152
"perf ftrace [<options>] [<command>]",
9211153
"perf ftrace [<options>] -- [<command>] [<options>]",
922-
"perf ftrace trace [<options>] [<command>]",
923-
"perf ftrace trace [<options>] -- [<command>] [<options>]",
1154+
"perf ftrace {trace|latency} [<options>] [<command>]",
1155+
"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
9241156
NULL
9251157
};
1158+
enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
9261159

9271160
INIT_LIST_HEAD(&ftrace.filters);
9281161
INIT_LIST_HEAD(&ftrace.notrace);
@@ -938,15 +1171,29 @@ int cmd_ftrace(int argc, const char **argv)
9381171
if (ret < 0)
9391172
return -1;
9401173

941-
if (argc > 1 && !strcmp(argv[1], "trace")) {
942-
argc--;
943-
argv++;
1174+
if (argc > 1) {
1175+
if (!strcmp(argv[1], "trace")) {
1176+
subcmd = PERF_FTRACE_TRACE;
1177+
} else if (!strcmp(argv[1], "latency")) {
1178+
subcmd = PERF_FTRACE_LATENCY;
1179+
options = latency_options;
1180+
}
1181+
1182+
if (subcmd != PERF_FTRACE_NONE) {
1183+
argc--;
1184+
argv++;
1185+
}
9441186
}
1187+
/* for backward compatibility */
1188+
if (subcmd == PERF_FTRACE_NONE)
1189+
subcmd = PERF_FTRACE_TRACE;
9451190

946-
argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
1191+
argc = parse_options(argc, argv, options, ftrace_usage,
9471192
PARSE_OPT_STOP_AT_NON_OPTION);
948-
if (!argc && target__none(&ftrace.target))
949-
ftrace.target.system_wide = true;
1193+
if (argc < 0) {
1194+
ret = -EINVAL;
1195+
goto out_delete_filters;
1196+
}
9501197

9511198
ret = target__validate(&ftrace.target);
9521199
if (ret) {
@@ -975,7 +1222,27 @@ int cmd_ftrace(int argc, const char **argv)
9751222
goto out_delete_evlist;
9761223
}
9771224

978-
ret = __cmd_ftrace(&ftrace);
1225+
switch (subcmd) {
1226+
case PERF_FTRACE_TRACE:
1227+
if (!argc && target__none(&ftrace.target))
1228+
ftrace.target.system_wide = true;
1229+
ret = __cmd_ftrace(&ftrace);
1230+
break;
1231+
case PERF_FTRACE_LATENCY:
1232+
if (list_empty(&ftrace.filters)) {
1233+
pr_err("Should provide a function to measure\n");
1234+
parse_options_usage(ftrace_usage, options, "T", 1);
1235+
ret = -EINVAL;
1236+
goto out_delete_evlist;
1237+
}
1238+
ret = __cmd_latency(&ftrace);
1239+
break;
1240+
case PERF_FTRACE_NONE:
1241+
default:
1242+
pr_err("Invalid subcommand\n");
1243+
ret = -EINVAL;
1244+
break;
1245+
}
9791246

9801247
out_delete_evlist:
9811248
evlist__delete(ftrace.evlist);

0 commit comments

Comments
 (0)