Skip to content

Commit 658eb5a

Browse files
Wang Yaxinakpm00
authored andcommitted
delayacct: add delay max to record delay peak
Introduce the use cases of delay max, which can help quickly detect potential abnormal delays in the system and record the types and specific details of delay spikes. Problem ======== Delay accounting can track the average delay of processes to show system workload. However, when a process experiences a significant delay, maybe a delay spike, which adversely affects performance, getdelays can only display the average system delay over a period of time. Yet, average delay is unhelpful for diagnosing delay peak. It is not even possible to determine which type of delay has spiked, as this information might be masked by the average delay. Solution ========= the 'delay max' can display delay peak since the system's startup, which can record potential abnormal delays over time, including the type of delay and the maximum delay. This is helpful for quickly identifying crash caused by delay. Use case ========= bash# ./getdelays -d -p 244 print delayacct stats ON PID 244 CPU count real total virtual total delay total delay average delay max 68 192000000 213676651 705643 0.010ms 0.306381ms IO count delay total delay average delay max 0 0 0.000ms 0.000000ms SWAP count delay total delay average delay max 0 0 0.000ms 0.000000ms RECLAIM count delay total delay average delay max 0 0 0.000ms 0.000000ms THRASHING count delay total delay average delay max 0 0 0.000ms 0.000000ms COMPACT count delay total delay average delay max 0 0 0.000ms 0.000000ms WPCOPY count delay total delay average delay max 235 15648284 0.067ms 0.263842ms IRQ count delay total delay average delay max 0 0 0.000ms 0.000000ms [[email protected]: update docs and fix some spelling errors] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Co-developed-by: Wang Yong <[email protected]> Signed-off-by: Wang Yong <[email protected]> Co-developed-by: xu xin <[email protected]> Signed-off-by: xu xin <[email protected]> Co-developed-by: Wang Yaxin <[email protected]> Signed-off-by: Wang Yaxin <[email protected]> Signed-off-by: Kun Jiang <[email protected]> Cc: Balbir Singh <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Fan Yu <[email protected]> Cc: Peilin He <[email protected]> Cc: tuqiang <[email protected]> Cc: Yang Yang <[email protected]> Cc: ye xingchen <[email protected]> Cc: Yunkai Zhang <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 1e18572 commit 658eb5a

File tree

7 files changed

+105
-57
lines changed

7 files changed

+105
-57
lines changed

Documentation/accounting/delay-accounting.rst

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10::
100100
# ./getdelays -d -p 10
101101
(output similar to next case)
102102

103-
Get sum of delays, since system boot, for all pids with tgid 5::
103+
Get sum and peak of delays, since system boot, for all pids with tgid 242::
104104

105-
# ./getdelays -d -t 5
105+
bash-4.4# ./getdelays -d -t 242
106106
print delayacct stats ON
107-
TGID 5
108-
109-
110-
CPU count real total virtual total delay total delay average
111-
8 7000000 6872122 3382277 0.423ms
112-
IO count delay total delay average
113-
0 0 0.000ms
114-
SWAP count delay total delay average
115-
0 0 0.000ms
116-
RECLAIM count delay total delay average
117-
0 0 0.000ms
118-
THRASHING count delay total delay average
119-
0 0 0.000ms
120-
COMPACT count delay total delay average
121-
0 0 0.000ms
122-
WPCOPY count delay total delay average
123-
0 0 0.000ms
124-
IRQ count delay total delay average
125-
0 0 0.000ms
107+
TGID 242
108+
109+
110+
CPU count real total virtual total delay total delay average delay max
111+
239 296000000 307724885 1127792 0.005ms 0.238382ms
112+
IO count delay total delay average delay max
113+
0 0 0.000ms 0.000000ms
114+
SWAP count delay total delay average delay max
115+
0 0 0.000ms 0.000000ms
116+
RECLAIM count delay total delay average delay max
117+
0 0 0.000ms 0.000000ms
118+
THRASHING count delay total delay average delay max
119+
0 0 0.000ms 0.000000ms
120+
COMPACT count delay total delay average delay max
121+
0 0 0.000ms 0.000000ms
122+
WPCOPY count delay total delay average delay max
123+
230 19100476 0.083ms 0.383822ms
124+
IRQ count delay total delay average delay max
125+
0 0 0.000ms 0.000000ms
126126

127127
Get IO accounting for pid 1, it works only with -p::
128128

include/linux/delayacct.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,32 @@ struct task_delay_info {
2929
* XXX_delay contains the accumulated delay time in nanoseconds.
3030
*/
3131
u64 blkio_start;
32+
u64 blkio_delay_max;
3233
u64 blkio_delay; /* wait for sync block io completion */
3334
u64 swapin_start;
35+
u64 swapin_delay_max;
3436
u64 swapin_delay; /* wait for swapin */
3537
u32 blkio_count; /* total count of the number of sync block */
3638
/* io operations performed */
3739
u32 swapin_count; /* total count of swapin */
3840

3941
u64 freepages_start;
42+
u64 freepages_delay_max;
4043
u64 freepages_delay; /* wait for memory reclaim */
4144

4245
u64 thrashing_start;
46+
u64 thrashing_delay_max;
4347
u64 thrashing_delay; /* wait for thrashing page */
4448

4549
u64 compact_start;
50+
u64 compact_delay_max;
4651
u64 compact_delay; /* wait for memory compact */
4752

4853
u64 wpcopy_start;
54+
u64 wpcopy_delay_max;
4955
u64 wpcopy_delay; /* wait for write-protect copy */
5056

57+
u64 irq_delay_max;
5158
u64 irq_delay; /* wait for IRQ/SOFTIRQ */
5259

5360
u32 freepages_count; /* total count of memory reclaim */

include/linux/sched.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,9 @@ struct sched_info {
398398
/* Time spent waiting on a runqueue: */
399399
unsigned long long run_delay;
400400

401+
/* Max time spent waiting on a runqueue: */
402+
unsigned long long max_run_delay;
403+
401404
/* Timestamps: */
402405

403406
/* When did we last run on a CPU? */

include/uapi/linux/taskstats.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ struct taskstats {
7272
*/
7373
__u64 cpu_count __attribute__((aligned(8)));
7474
__u64 cpu_delay_total;
75+
__u64 cpu_delay_max;
7576

7677
/* Following four fields atomically updated using task->delays->lock */
7778

@@ -80,10 +81,12 @@ struct taskstats {
8081
*/
8182
__u64 blkio_count;
8283
__u64 blkio_delay_total;
84+
__u64 blkio_delay_max;
8385

8486
/* Delay waiting for page fault I/O (swap in only) */
8587
__u64 swapin_count;
8688
__u64 swapin_delay_total;
89+
__u64 swapin_delay_max;
8790

8891
/* cpu "wall-clock" running time
8992
* On some architectures, value will adjust for cpu time stolen
@@ -166,17 +169,20 @@ struct taskstats {
166169
/* Delay waiting for memory reclaim */
167170
__u64 freepages_count;
168171
__u64 freepages_delay_total;
172+
__u64 freepages_delay_max;
169173

170174
/* Delay waiting for thrashing page */
171175
__u64 thrashing_count;
172176
__u64 thrashing_delay_total;
177+
__u64 thrashing_delay_max;
173178

174179
/* v10: 64-bit btime to avoid overflow */
175180
__u64 ac_btime64; /* 64-bit begin time */
176181

177182
/* v11: Delay waiting for memory compact */
178183
__u64 compact_count;
179184
__u64 compact_delay_total;
185+
__u64 compact_delay_max;
180186

181187
/* v12 begin */
182188
__u32 ac_tgid; /* thread group ID */
@@ -198,10 +204,13 @@ struct taskstats {
198204
/* v13: Delay waiting for write-protect copy */
199205
__u64 wpcopy_count;
200206
__u64 wpcopy_delay_total;
207+
__u64 wpcopy_delay_max;
201208

202209
/* v14: Delay waiting for IRQ/SOFTIRQ */
203210
__u64 irq_count;
204211
__u64 irq_delay_total;
212+
__u64 irq_delay_max;
213+
/* v15: add Delay max */
205214
};
206215

207216

kernel/delayacct.c

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ void __delayacct_tsk_init(struct task_struct *tsk)
9393

9494
/*
9595
* Finish delay accounting for a statistic using its timestamps (@start),
96-
* accumalator (@total) and @count
96+
* accumulator (@total) and @count
9797
*/
98-
static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count)
98+
static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max)
9999
{
100100
s64 ns = local_clock() - *start;
101101
unsigned long flags;
@@ -104,6 +104,8 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou
104104
raw_spin_lock_irqsave(lock, flags);
105105
*total += ns;
106106
(*count)++;
107+
if (ns > *max)
108+
*max = ns;
107109
raw_spin_unlock_irqrestore(lock, flags);
108110
}
109111
}
@@ -122,7 +124,8 @@ void __delayacct_blkio_end(struct task_struct *p)
122124
delayacct_end(&p->delays->lock,
123125
&p->delays->blkio_start,
124126
&p->delays->blkio_delay,
125-
&p->delays->blkio_count);
127+
&p->delays->blkio_count,
128+
&p->delays->blkio_delay_max);
126129
}
127130

128131
int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -153,31 +156,38 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
153156

154157
d->cpu_count += t1;
155158

159+
d->cpu_delay_max = tsk->sched_info.max_run_delay;
156160
tmp = (s64)d->cpu_delay_total + t2;
157161
d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
158-
159162
tmp = (s64)d->cpu_run_virtual_total + t3;
163+
160164
d->cpu_run_virtual_total =
161165
(tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
162166

163167
if (!tsk->delays)
164168
return 0;
165169

166170
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
167-
168171
raw_spin_lock_irqsave(&tsk->delays->lock, flags);
172+
d->blkio_delay_max = tsk->delays->blkio_delay_max;
169173
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
170174
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
175+
d->swapin_delay_max = tsk->delays->swapin_delay_max;
171176
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
172177
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
178+
d->freepages_delay_max = tsk->delays->freepages_delay_max;
173179
tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
174180
d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
181+
d->thrashing_delay_max = tsk->delays->thrashing_delay_max;
175182
tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
176183
d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
184+
d->compact_delay_max = tsk->delays->compact_delay_max;
177185
tmp = d->compact_delay_total + tsk->delays->compact_delay;
178186
d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
187+
d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max;
179188
tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
180189
d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
190+
d->irq_delay_max = tsk->delays->irq_delay_max;
181191
tmp = d->irq_delay_total + tsk->delays->irq_delay;
182192
d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
183193
d->blkio_count += tsk->delays->blkio_count;
@@ -213,7 +223,8 @@ void __delayacct_freepages_end(void)
213223
delayacct_end(&current->delays->lock,
214224
&current->delays->freepages_start,
215225
&current->delays->freepages_delay,
216-
&current->delays->freepages_count);
226+
&current->delays->freepages_count,
227+
&current->delays->freepages_delay_max);
217228
}
218229

219230
void __delayacct_thrashing_start(bool *in_thrashing)
@@ -235,7 +246,8 @@ void __delayacct_thrashing_end(bool *in_thrashing)
235246
delayacct_end(&current->delays->lock,
236247
&current->delays->thrashing_start,
237248
&current->delays->thrashing_delay,
238-
&current->delays->thrashing_count);
249+
&current->delays->thrashing_count,
250+
&current->delays->thrashing_delay_max);
239251
}
240252

241253
void __delayacct_swapin_start(void)
@@ -248,7 +260,8 @@ void __delayacct_swapin_end(void)
248260
delayacct_end(&current->delays->lock,
249261
&current->delays->swapin_start,
250262
&current->delays->swapin_delay,
251-
&current->delays->swapin_count);
263+
&current->delays->swapin_count,
264+
&current->delays->swapin_delay_max);
252265
}
253266

254267
void __delayacct_compact_start(void)
@@ -261,7 +274,8 @@ void __delayacct_compact_end(void)
261274
delayacct_end(&current->delays->lock,
262275
&current->delays->compact_start,
263276
&current->delays->compact_delay,
264-
&current->delays->compact_count);
277+
&current->delays->compact_count,
278+
&current->delays->compact_delay_max);
265279
}
266280

267281
void __delayacct_wpcopy_start(void)
@@ -274,7 +288,8 @@ void __delayacct_wpcopy_end(void)
274288
delayacct_end(&current->delays->lock,
275289
&current->delays->wpcopy_start,
276290
&current->delays->wpcopy_delay,
277-
&current->delays->wpcopy_count);
291+
&current->delays->wpcopy_count,
292+
&current->delays->wpcopy_delay_max);
278293
}
279294

280295
void __delayacct_irq(struct task_struct *task, u32 delta)
@@ -284,6 +299,8 @@ void __delayacct_irq(struct task_struct *task, u32 delta)
284299
raw_spin_lock_irqsave(&task->delays->lock, flags);
285300
task->delays->irq_delay += delta;
286301
task->delays->irq_count++;
302+
if (delta > task->delays->irq_delay_max)
303+
task->delays->irq_delay_max = delta;
287304
raw_spin_unlock_irqrestore(&task->delays->lock, flags);
288305
}
289306

kernel/sched/stats.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
244244
delta = rq_clock(rq) - t->sched_info.last_queued;
245245
t->sched_info.last_queued = 0;
246246
t->sched_info.run_delay += delta;
247-
247+
if (delta > t->sched_info.max_run_delay)
248+
t->sched_info.max_run_delay = delta;
248249
rq_sched_info_dequeue(rq, delta);
249250
}
250251

@@ -266,6 +267,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
266267
t->sched_info.run_delay += delta;
267268
t->sched_info.last_arrival = now;
268269
t->sched_info.pcount++;
270+
if (delta > t->sched_info.max_run_delay)
271+
t->sched_info.max_run_delay = delta;
269272

270273
rq_sched_info_arrive(rq, delta);
271274
}

tools/accounting/getdelays.c

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -192,60 +192,69 @@ static int get_family_id(int sd)
192192
}
193193

194194
#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
195+
#define delay_max_ms(t) (t / 1000000ULL)
195196

196197
static void print_delayacct(struct taskstats *t)
197198
{
198-
printf("\n\nCPU %15s%15s%15s%15s%15s\n"
199-
" %15llu%15llu%15llu%15llu%15.3fms\n"
200-
"IO %15s%15s%15s\n"
201-
" %15llu%15llu%15.3fms\n"
202-
"SWAP %15s%15s%15s\n"
203-
" %15llu%15llu%15.3fms\n"
204-
"RECLAIM %12s%15s%15s\n"
205-
" %15llu%15llu%15.3fms\n"
206-
"THRASHING%12s%15s%15s\n"
207-
" %15llu%15llu%15.3fms\n"
208-
"COMPACT %12s%15s%15s\n"
209-
" %15llu%15llu%15.3fms\n"
210-
"WPCOPY %12s%15s%15s\n"
211-
" %15llu%15llu%15.3fms\n"
212-
"IRQ %15s%15s%15s\n"
213-
" %15llu%15llu%15.3fms\n",
199+
printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n"
200+
" %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n"
201+
"IO %15s%15s%15s%15s\n"
202+
" %15llu%15llu%15.3fms%13.6fms\n"
203+
"SWAP %15s%15s%15s%15s\n"
204+
" %15llu%15llu%15.3fms%13.6fms\n"
205+
"RECLAIM %12s%15s%15s%15s\n"
206+
" %15llu%15llu%15.3fms%13.6fms\n"
207+
"THRASHING%12s%15s%15s%15s\n"
208+
" %15llu%15llu%15.3fms%13.6fms\n"
209+
"COMPACT %12s%15s%15s%15s\n"
210+
" %15llu%15llu%15.3fms%13.6fms\n"
211+
"WPCOPY %12s%15s%15s%15s\n"
212+
" %15llu%15llu%15.3fms%13.6fms\n"
213+
"IRQ %15s%15s%15s%15s\n"
214+
" %15llu%15llu%15.3fms%13.6fms\n",
214215
"count", "real total", "virtual total",
215-
"delay total", "delay average",
216+
"delay total", "delay average", "delay max",
216217
(unsigned long long)t->cpu_count,
217218
(unsigned long long)t->cpu_run_real_total,
218219
(unsigned long long)t->cpu_run_virtual_total,
219220
(unsigned long long)t->cpu_delay_total,
220221
average_ms((double)t->cpu_delay_total, t->cpu_count),
221-
"count", "delay total", "delay average",
222+
delay_max_ms((double)t->cpu_delay_max),
223+
"count", "delay total", "delay average", "delay max",
222224
(unsigned long long)t->blkio_count,
223225
(unsigned long long)t->blkio_delay_total,
224226
average_ms((double)t->blkio_delay_total, t->blkio_count),
225-
"count", "delay total", "delay average",
227+
delay_max_ms((double)t->blkio_delay_max),
228+
"count", "delay total", "delay average", "delay max",
226229
(unsigned long long)t->swapin_count,
227230
(unsigned long long)t->swapin_delay_total,
228231
average_ms((double)t->swapin_delay_total, t->swapin_count),
229-
"count", "delay total", "delay average",
232+
delay_max_ms((double)t->swapin_delay_max),
233+
"count", "delay total", "delay average", "delay max",
230234
(unsigned long long)t->freepages_count,
231235
(unsigned long long)t->freepages_delay_total,
232236
average_ms((double)t->freepages_delay_total, t->freepages_count),
233-
"count", "delay total", "delay average",
237+
delay_max_ms((double)t->freepages_delay_max),
238+
"count", "delay total", "delay average", "delay max",
234239
(unsigned long long)t->thrashing_count,
235240
(unsigned long long)t->thrashing_delay_total,
236241
average_ms((double)t->thrashing_delay_total, t->thrashing_count),
237-
"count", "delay total", "delay average",
242+
delay_max_ms((double)t->thrashing_delay_max),
243+
"count", "delay total", "delay average", "delay max",
238244
(unsigned long long)t->compact_count,
239245
(unsigned long long)t->compact_delay_total,
240246
average_ms((double)t->compact_delay_total, t->compact_count),
241-
"count", "delay total", "delay average",
247+
delay_max_ms((double)t->compact_delay_max),
248+
"count", "delay total", "delay average", "delay max",
242249
(unsigned long long)t->wpcopy_count,
243250
(unsigned long long)t->wpcopy_delay_total,
244251
average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
245-
"count", "delay total", "delay average",
252+
delay_max_ms((double)t->wpcopy_delay_max),
253+
"count", "delay total", "delay average", "delay max",
246254
(unsigned long long)t->irq_count,
247255
(unsigned long long)t->irq_delay_total,
248-
average_ms((double)t->irq_delay_total, t->irq_count));
256+
average_ms((double)t->irq_delay_total, t->irq_count),
257+
delay_max_ms((double)t->irq_delay_max));
249258
}
250259

251260
static void task_context_switch_counts(struct taskstats *t)

0 commit comments

Comments
 (0)