Skip to content

Commit 5def33d

Browse files
Daniel Bristot de Oliveirarostedt
authored andcommitted
rtla/timerlat: Add auto-analysis support to timerlat top
Currently, timerlat top displays the timerlat tracer latency results, saving the intuitive timerlat trace for the developer to analyze. This patch goes a step forward in the automaton of the scheduling latency analysis by providing a summary of the root cause of a latency higher than the passed "stop tracing" parameter if the trace stops. The output is intuitive enough for non-expert users to have a general idea of the root cause by looking at each factor's contribution percentage while keeping the technical detail in the output for more expert users to start an in dept debug or to correlate a root cause with an existing one. The terminology is in line with recent industry and academic publications to facilitate the understanding of both audiences. Here is one example of tool output: ----------------------------------------- %< ----------------------------------------------------- # taskset -c 0 timerlat -a 40 -c 1-23 -q Timer Latency 0 00:00:12 | IRQ Timer Latency (us) | Thread Timer Latency (us) CPU COUNT | cur min avg max | cur min avg max 1 #12322 | 0 0 1 15 | 10 3 9 31 2 #12322 | 3 0 1 12 | 10 3 9 23 3 #12322 | 1 0 1 21 | 8 2 8 34 4 #12322 | 1 0 1 17 | 10 2 11 33 5 #12322 | 0 0 1 12 | 8 3 8 25 6 #12322 | 1 0 1 14 | 16 3 11 35 7 #12322 | 0 0 1 14 | 9 2 8 29 8 #12322 | 1 0 1 22 | 9 3 9 34 9 #12322 | 0 0 1 14 | 8 2 8 24 10 #12322 | 1 0 0 12 | 9 3 8 24 11 #12322 | 0 0 0 15 | 6 2 7 29 12 #12321 | 1 0 0 13 | 5 3 8 23 13 #12319 | 0 0 1 14 | 9 3 9 26 14 #12321 | 1 0 0 13 | 6 2 8 24 15 #12321 | 1 0 1 15 | 12 3 11 27 16 #12318 | 0 0 1 13 | 7 3 10 24 17 #12319 | 0 0 1 13 | 11 3 9 25 18 #12318 | 0 0 0 12 | 8 2 8 20 19 #12319 | 0 0 1 18 | 10 2 9 28 20 #12317 | 0 0 0 20 | 9 3 8 34 21 #12318 | 0 0 0 13 | 8 3 8 28 22 #12319 | 0 0 1 11 | 8 3 10 22 23 #12320 | 28 0 1 28 | 41 3 11 41 rtla timerlat hit stop tracing ## CPU 23 hit stop tracing, analyzing it ## IRQ handler delay: 27.49 us (65.52 %) IRQ latency: 28.13 us Timerlat IRQ duration: 9.59 us (22.85 %) Blocking thread: 3.79 us (9.03 %) objtool:49256 3.79 us Blocking thread stacktrace -> timerlat_irq -> __hrtimer_run_queues -> hrtimer_interrupt -> __sysvec_apic_timer_interrupt -> sysvec_apic_timer_interrupt -> asm_sysvec_apic_timer_interrupt -> _raw_spin_unlock_irqrestore -> cgroup_rstat_flush_locked -> cgroup_rstat_flush_irqsafe -> mem_cgroup_flush_stats -> mem_cgroup_wb_stats -> balance_dirty_pages -> balance_dirty_pages_ratelimited_flags -> btrfs_buffered_write -> btrfs_do_write_iter -> vfs_write -> __x64_sys_pwrite64 -> do_syscall_64 -> entry_SYSCALL_64_after_hwframe ------------------------------------------------------------------------ Thread latency: 41.96 us (100%) The system has exit from idle latency! Max timerlat IRQ latency from idle: 17.48 us in cpu 4 Saving trace to timerlat_trace.txt ----------------------------------------- >% ----------------------------------------------------- In this case, the major factor was the delay suffered by the IRQ handler that handles timerlat wakeup: 65.52 %. This can be caused by the current thread masking interrupts, which can be seen in the blocking thread stacktrace: the current thread (objtool:49256) disabled interrupts via raw spin lock operations inside mem cgroup, while doing write syscall in a btrfs file system. A simple search for the function name on Google shows that this is a legit case for disabling the interrupts: cgroup: Use irqsave in cgroup_rstat_flush_locked() lore.kernel.org/linux-mm/[email protected]/ The output also prints other reasons for the latency root cause, such as: - an IRQ that happened before the IRQ handler that caused delays - The interference from NMI, IRQ, Softirq, and Threads The details about how these factors affect the scheduling latency can be found here: https://bristot.me/demystifying-the-real-time-linux-latency/ Link: https://lkml.kernel.org/r/3d45f40e630317f51ac6d678e2d96d310e495729.1675179318.git.bristot@kernel.org Cc: Daniel Bristot de Oliveira <[email protected]> Cc: Jonathan Corbet <[email protected]> Signed-off-by: Daniel Bristot de Oliveira <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 27e348b commit 5def33d

File tree

1 file changed

+44
-2
lines changed

1 file changed

+44
-2
lines changed

tools/tracing/rtla/src/timerlat_top.c

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
#include <unistd.h>
1111
#include <stdio.h>
1212
#include <time.h>
13+
#include <errno.h>
1314

1415
#include "utils.h"
1516
#include "osnoise.h"
1617
#include "timerlat.h"
18+
#include "timerlat_aa.h"
1719

1820
struct timerlat_top_params {
1921
char *cpus;
@@ -30,6 +32,8 @@ struct timerlat_top_params {
3032
int quiet;
3133
int set_sched;
3234
int dma_latency;
35+
int no_aa;
36+
int dump_tasks;
3337
struct sched_attr sched_param;
3438
struct trace_events *events;
3539
};
@@ -130,17 +134,22 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
130134
struct tep_event *event, void *context)
131135
{
132136
struct trace_instance *trace = context;
137+
struct timerlat_top_params *params;
133138
unsigned long long latency, thread;
134139
struct osnoise_tool *top;
135140
int cpu = record->cpu;
136141

137142
top = container_of(trace, struct osnoise_tool, trace);
143+
params = top->params;
138144

139145
tep_get_field_val(s, event, "context", record, &thread, 1);
140146
tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
141147

142148
timerlat_top_update(top, cpu, thread, latency);
143149

150+
if (!params->no_aa)
151+
timerlat_aa_handler(s, record, event, context);
152+
144153
return 0;
145154
}
146155

@@ -281,11 +290,13 @@ static void timerlat_top_usage(char *usage)
281290
" -c/--cpus cpus: run the tracer only on the given cpus",
282291
" -d/--duration time[m|h|d]: duration of the session in seconds",
283292
" -D/--debug: print debug info",
293+
" --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
284294
" -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
285295
" -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
286296
" --filter <command>: enable a trace event filter to the previous -e event",
287297
" --trigger <command>: enable a trace event trigger to the previous -e event",
288298
" -n/--nano: display data in nanoseconds",
299+
" --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage",
289300
" -q/--quiet print only a summary at the end",
290301
" --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
291302
" -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
@@ -349,13 +360,15 @@ static struct timerlat_top_params
349360
{"trigger", required_argument, 0, '0'},
350361
{"filter", required_argument, 0, '1'},
351362
{"dma-latency", required_argument, 0, '2'},
363+
{"no-aa", no_argument, 0, '3'},
364+
{"dump-tasks", no_argument, 0, '4'},
352365
{0, 0, 0, 0}
353366
};
354367

355368
/* getopt_long stores the option index here. */
356369
int option_index = 0;
357370

358-
c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:",
371+
c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:34",
359372
long_options, &option_index);
360373

361374
/* detect the end of the options. */
@@ -368,13 +381,13 @@ static struct timerlat_top_params
368381

369382
/* set thread stop to auto_thresh */
370383
params->stop_total_us = auto_thresh;
384+
params->stop_us = auto_thresh;
371385

372386
/* get stack trace */
373387
params->print_stack = auto_thresh;
374388

375389
/* set trace */
376390
params->trace_output = "timerlat_trace.txt";
377-
378391
break;
379392
case 'c':
380393
retval = parse_cpu_list(optarg, &params->monitored_cpus);
@@ -437,6 +450,7 @@ static struct timerlat_top_params
437450
params->trace_output = &optarg[1];
438451
else
439452
params->trace_output = "timerlat_trace.txt";
453+
440454
break;
441455
case '0': /* trigger */
442456
if (params->events) {
@@ -467,6 +481,12 @@ static struct timerlat_top_params
467481
exit(EXIT_FAILURE);
468482
}
469483
break;
484+
case '3': /* no-aa */
485+
params->no_aa = 1;
486+
break;
487+
case '4':
488+
params->dump_tasks = 1;
489+
break;
470490
default:
471491
timerlat_top_usage("Invalid option");
472492
}
@@ -477,6 +497,12 @@ static struct timerlat_top_params
477497
exit(EXIT_FAILURE);
478498
}
479499

500+
/*
501+
* Auto analysis only happens if stop tracing, thus:
502+
*/
503+
if (!params->stop_us && !params->stop_total_us)
504+
params->no_aa = 1;
505+
480506
return params;
481507
}
482508

@@ -547,6 +573,7 @@ static struct osnoise_tool
547573
{
548574
struct osnoise_tool *top;
549575
int nr_cpus;
576+
int retval;
550577

551578
nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
552579

@@ -563,6 +590,16 @@ static struct osnoise_tool
563590
tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
564591
timerlat_top_handler, top);
565592

593+
/*
594+
* If no auto analysis, we are ready.
595+
*/
596+
if (params->no_aa)
597+
return top;
598+
599+
retval = timerlat_aa_init(top, nr_cpus, params->dump_tasks);
600+
if (retval)
601+
goto out_err;
602+
566603
return top;
567604

568605
out_err:
@@ -688,6 +725,10 @@ int timerlat_top_main(int argc, char *argv[])
688725

689726
if (trace_is_off(&top->trace, &record->trace)) {
690727
printf("rtla timerlat hit stop tracing\n");
728+
729+
if (!params->no_aa)
730+
timerlat_auto_analysis(params->stop_us, params->stop_total_us);
731+
691732
if (params->trace_output) {
692733
printf(" Saving trace to %s\n", params->trace_output);
693734
save_trace_to_file(record->trace.inst, params->trace_output);
@@ -701,6 +742,7 @@ int timerlat_top_main(int argc, char *argv[])
701742
params->events = NULL;
702743
out_free:
703744
timerlat_free_top(top->data);
745+
timerlat_aa_destroy();
704746
osnoise_destroy_tool(record);
705747
osnoise_destroy_tool(top);
706748
free(params);

0 commit comments

Comments
 (0)