Skip to content

Commit a485ea9

Browse files
committed
tracing: Fix irqsoff and wakeup latency tracers when using function graph
The function graph tracer has become generic so that kretprobes and BPF can use it along with function graph tracing itself. Some of the infrastructure was specific for function graph tracing such as recording the calltime and return time of the functions. Calling the clock code on a high volume function does add overhead. The calculation of the calltime was removed from the generic code and placed into the function graph tracer itself so that the other users did not incur this overhead as they did not need that timestamp. The calltime field was still kept in the generic return entry structure and the function graph return entry callback filled it as that structure was passed to other code. But this broke both irqsoff and wakeup latency tracer as they still depended on the trace structure containing the calltime when the option display-graph is set as it used some of those same functions that the function graph tracer used. But now the calltime was not set and was just zero. This caused the calculation of the function time to be the absolute value of the return timestamp and not the length of the function. # cd /sys/kernel/tracing # echo 1 > options/display-graph # echo irqsoff > current_tracer The tracers went from: # REL TIME CPU TASK/PID |||| DURATION FUNCTION CALLS # | | | | |||| | | | | | | 0 us | 4) <idle>-0 | d..1. | 0.000 us | irqentry_enter(); 3 us | 4) <idle>-0 | d..2. | | irq_enter_rcu() { 4 us | 4) <idle>-0 | d..2. | 0.431 us | preempt_count_add(); 5 us | 4) <idle>-0 | d.h2. | | tick_irq_enter() { 5 us | 4) <idle>-0 | d.h2. | 0.433 us | tick_check_oneshot_broadcast_this_cpu(); 6 us | 4) <idle>-0 | d.h2. | 2.426 us | ktime_get(); 9 us | 4) <idle>-0 | d.h2. | | tick_nohz_stop_idle() { 10 us | 4) <idle>-0 | d.h2. | 0.398 us | nr_iowait_cpu(); 11 us | 4) <idle>-0 | d.h1. | 1.903 us | } 11 us | 4) <idle>-0 | d.h2. | | tick_do_update_jiffies64() { 12 us | 4) <idle>-0 | d.h2. | | _raw_spin_lock() { 12 us | 4) <idle>-0 | d.h2. | 0.360 us | preempt_count_add(); 13 us | 4) <idle>-0 | d.h3. | 0.354 us | do_raw_spin_lock(); 14 us | 4) <idle>-0 | d.h2. | 2.207 us | } 15 us | 4) <idle>-0 | d.h3. | 0.428 us | calc_global_load(); 16 us | 4) <idle>-0 | d.h3. | | _raw_spin_unlock() { 16 us | 4) <idle>-0 | d.h3. | 0.380 us | do_raw_spin_unlock(); 17 us | 4) <idle>-0 | d.h3. | 0.334 us | preempt_count_sub(); 18 us | 4) <idle>-0 | d.h1. | 1.768 us | } 18 us | 4) <idle>-0 | d.h2. | | update_wall_time() { [..] To: # REL TIME CPU TASK/PID |||| DURATION FUNCTION CALLS # | | | | |||| | | | | | | 0 us | 5) <idle>-0 | d.s2. | 0.000 us | _raw_spin_lock_irqsave(); 0 us | 5) <idle>-0 | d.s3. | 312159583 us | preempt_count_add(); 2 us | 5) <idle>-0 | d.s4. | 312159585 us | do_raw_spin_lock(); 3 us | 5) <idle>-0 | d.s4. | | _raw_spin_unlock() { 3 us | 5) <idle>-0 | d.s4. | 312159586 us | do_raw_spin_unlock(); 4 us | 5) <idle>-0 | d.s4. | 312159587 us | preempt_count_sub(); 4 us | 5) <idle>-0 | d.s2. | 312159587 us | } 5 us | 5) <idle>-0 | d.s3. | | _raw_spin_lock() { 5 us | 5) <idle>-0 | d.s3. | 312159588 us | preempt_count_add(); 6 us | 5) <idle>-0 | d.s4. | 312159589 us | do_raw_spin_lock(); 7 us | 5) <idle>-0 | d.s3. | 312159590 us | } 8 us | 5) <idle>-0 | d.s4. | 312159591 us | calc_wheel_index(); 9 us | 5) <idle>-0 | d.s4. | | enqueue_timer() { 9 us | 5) <idle>-0 | d.s4. | | wake_up_nohz_cpu() { 11 us | 5) <idle>-0 | d.s4. | | native_smp_send_reschedule() { 11 us | 5) <idle>-0 | d.s4. | 312171987 us | default_send_IPI_single_phys(); 12408 us | 5) <idle>-0 | d.s3. | 312171990 us | } 12408 us | 5) <idle>-0 | d.s3. | 312171991 us | } 12409 us | 5) <idle>-0 | d.s3. | 312171991 us | } Where the calculation of the time for each function was the return time minus zero and not the time of when the function returned. Have these tracers also save the calltime in the fgraph data section and retrieve it again on the return to get the correct timings again. Cc: Masami Hiramatsu <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Mark Rutland <[email protected]> Link: https://lore.kernel.org/[email protected] Fixes: f1f36e2 ("ftrace: Have calltime be saved in the fgraph storage") Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 5bc55a3 commit a485ea9

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

kernel/trace/trace_irqsoff.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
182182
struct trace_array_cpu *data;
183183
unsigned long flags;
184184
unsigned int trace_ctx;
185+
u64 *calltime;
185186
int ret;
186187

187188
if (ftrace_graph_ignore_func(gops, trace))
@@ -199,6 +200,12 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
199200
if (!func_prolog_dec(tr, &data, &flags))
200201
return 0;
201202

203+
calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime));
204+
if (!calltime)
205+
return 0;
206+
207+
*calltime = trace_clock_local();
208+
202209
trace_ctx = tracing_gen_ctx_flags(flags);
203210
ret = __trace_graph_entry(tr, trace, trace_ctx);
204211
atomic_dec(&data->disabled);
@@ -213,12 +220,19 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace,
213220
struct trace_array_cpu *data;
214221
unsigned long flags;
215222
unsigned int trace_ctx;
223+
u64 *calltime;
224+
int size;
216225

217226
ftrace_graph_addr_finish(gops, trace);
218227

219228
if (!func_prolog_dec(tr, &data, &flags))
220229
return;
221230

231+
calltime = fgraph_retrieve_data(gops->idx, &size);
232+
if (!calltime)
233+
return;
234+
trace->calltime = *calltime;
235+
222236
trace_ctx = tracing_gen_ctx_flags(flags);
223237
__trace_graph_return(tr, trace, trace_ctx);
224238
atomic_dec(&data->disabled);

kernel/trace/trace_sched_wakeup.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
118118
struct trace_array *tr = wakeup_trace;
119119
struct trace_array_cpu *data;
120120
unsigned int trace_ctx;
121+
u64 *calltime;
121122
int ret = 0;
122123

123124
if (ftrace_graph_ignore_func(gops, trace))
@@ -135,6 +136,12 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
135136
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
136137
return 0;
137138

139+
calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime));
140+
if (!calltime)
141+
return 0;
142+
143+
*calltime = trace_clock_local();
144+
138145
ret = __trace_graph_entry(tr, trace, trace_ctx);
139146
atomic_dec(&data->disabled);
140147
preempt_enable_notrace();
@@ -148,12 +155,19 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace,
148155
struct trace_array *tr = wakeup_trace;
149156
struct trace_array_cpu *data;
150157
unsigned int trace_ctx;
158+
u64 *calltime;
159+
int size;
151160

152161
ftrace_graph_addr_finish(gops, trace);
153162

154163
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
155164
return;
156165

166+
calltime = fgraph_retrieve_data(gops->idx, &size);
167+
if (!calltime)
168+
return;
169+
trace->calltime = *calltime;
170+
157171
__trace_graph_return(tr, trace, trace_ctx);
158172
atomic_dec(&data->disabled);
159173

0 commit comments

Comments
 (0)