Skip to content

Commit a4b6817

Browse files
Daeho JeongJaegeuk Kim
authored andcommitted
f2fs: introduce periodic iostat io latency traces
Whenever we notice some sluggish issues on our machines, we are always curious about how well all types of I/O in the f2fs filesystem are handled. But, it's hard to get this kind of real data. First of all, we need to reproduce the issue while turning on the profiling tool like blktrace, but the issue doesn't happen again easily. Second, with the intervention of any tools, the overall timing of the issue will be slightly changed and it sometimes makes us hard to figure it out. So, I added the feature printing out IO latency statistics tracepoint events, which are minimal things to understand filesystem's I/O related behaviors, into F2FS_IOSTAT kernel config. With "iostat_enable" sysfs node on, we can get this statistics info in a periodic way and it would cause the least overhead. [samples] f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency: dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count], rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4], wr_sync_data [164/16/3331], wr_sync_node [152/3/648], wr_sync_meta [160/2/4243], wr_async_data [24/13/15], wr_async_node [0/0/0], wr_async_meta [0/0/0] f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency: dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count], rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1], wr_sync_data [120/12/2285], wr_sync_node [88/5/428], wr_sync_meta [52/6/2990], wr_async_data [4/1/3], wr_async_node [0/0/0], wr_async_meta [0/0/0] Signed-off-by: Daeho Jeong <[email protected]> Reviewed-by: Chao Yu <[email protected]> Signed-off-by: Jaegeuk Kim <[email protected]>
1 parent 5211874 commit a4b6817

File tree

6 files changed

+315
-6
lines changed

6 files changed

+315
-6
lines changed

fs/f2fs/data.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,10 @@ static void f2fs_post_read_work(struct work_struct *work)
271271
static void f2fs_read_end_io(struct bio *bio)
272272
{
273273
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
274-
struct bio_post_read_ctx *ctx = bio->bi_private;
274+
struct bio_post_read_ctx *ctx;
275+
276+
iostat_update_and_unbind_ctx(bio, 0);
277+
ctx = bio->bi_private;
275278

276279
if (time_to_inject(sbi, FAULT_READ_IO)) {
277280
f2fs_show_injection_info(sbi, FAULT_READ_IO);
@@ -293,10 +296,13 @@ static void f2fs_read_end_io(struct bio *bio)
293296

294297
static void f2fs_write_end_io(struct bio *bio)
295298
{
296-
struct f2fs_sb_info *sbi = bio->bi_private;
299+
struct f2fs_sb_info *sbi;
297300
struct bio_vec *bvec;
298301
struct bvec_iter_all iter_all;
299302

303+
iostat_update_and_unbind_ctx(bio, 1);
304+
sbi = bio->bi_private;
305+
300306
if (time_to_inject(sbi, FAULT_WRITE_IO)) {
301307
f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
302308
bio->bi_status = BLK_STS_IOERR;
@@ -400,6 +406,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
400406
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
401407
fio->type, fio->temp);
402408
}
409+
iostat_alloc_and_bind_ctx(sbi, bio, NULL);
410+
403411
if (fio->io_wbc)
404412
wbc_init_bio(fio->io_wbc, bio);
405413

@@ -481,6 +489,8 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
481489
trace_f2fs_submit_read_bio(sbi->sb, type, bio);
482490
else
483491
trace_f2fs_submit_write_bio(sbi->sb, type, bio);
492+
493+
iostat_update_submit_ctx(bio, type);
484494
submit_bio(bio);
485495
}
486496

@@ -972,7 +982,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
972982
{
973983
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
974984
struct bio *bio;
975-
struct bio_post_read_ctx *ctx;
985+
struct bio_post_read_ctx *ctx = NULL;
976986
unsigned int post_read_steps = 0;
977987

978988
bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
@@ -1008,6 +1018,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
10081018
ctx->fs_blkaddr = blkaddr;
10091019
bio->bi_private = ctx;
10101020
}
1021+
iostat_alloc_and_bind_ctx(sbi, bio, ctx);
10111022

10121023
return bio;
10131024
}
@@ -2253,7 +2264,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
22532264
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
22542265
goto submit_and_realloc;
22552266

2256-
ctx = bio->bi_private;
2267+
ctx = get_post_read_ctx(bio);
22572268
ctx->enabled_steps |= STEP_DECOMPRESS;
22582269
refcount_inc(&dic->refcnt);
22592270

fs/f2fs/f2fs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,6 +1781,10 @@ struct f2fs_sb_info {
17811781
bool iostat_enable;
17821782
unsigned long iostat_next_period;
17831783
unsigned int iostat_period_ms;
1784+
1785+
/* For io latency related statistics info in one iostat period */
1786+
spinlock_t iostat_lat_lock;
1787+
struct iostat_lat_info *iostat_io_lat;
17841788
#endif
17851789
};
17861790

fs/f2fs/iostat.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
#include "iostat.h"
1515
#include <trace/events/f2fs.h>
1616

17+
#define NUM_PREALLOC_IOSTAT_CTXS 128
18+
static struct kmem_cache *bio_iostat_ctx_cache;
19+
static mempool_t *bio_iostat_ctx_pool;
20+
1721
int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
1822
{
1923
struct super_block *sb = seq->private;
@@ -81,6 +85,32 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
8185
return 0;
8286
}
8387

88+
static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
89+
{
90+
int io, idx = 0;
91+
unsigned int cnt;
92+
struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
93+
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
94+
95+
spin_lock_irq(&sbi->iostat_lat_lock);
96+
for (idx = 0; idx < MAX_IO_TYPE; idx++) {
97+
for (io = 0; io < NR_PAGE_TYPE; io++) {
98+
cnt = io_lat->bio_cnt[idx][io];
99+
iostat_lat[idx][io].peak_lat =
100+
jiffies_to_msecs(io_lat->peak_lat[idx][io]);
101+
iostat_lat[idx][io].cnt = cnt;
102+
iostat_lat[idx][io].avg_lat = cnt ?
103+
jiffies_to_msecs(io_lat->sum_lat[idx][io]) / cnt : 0;
104+
io_lat->sum_lat[idx][io] = 0;
105+
io_lat->peak_lat[idx][io] = 0;
106+
io_lat->bio_cnt[idx][io] = 0;
107+
}
108+
}
109+
spin_unlock_irq(&sbi->iostat_lat_lock);
110+
111+
trace_f2fs_iostat_latency(sbi, iostat_lat);
112+
}
113+
84114
static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
85115
{
86116
unsigned long long iostat_diff[NR_IO_TYPE];
@@ -106,10 +136,13 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
106136
spin_unlock(&sbi->iostat_lock);
107137

108138
trace_f2fs_iostat(sbi, iostat_diff);
139+
140+
__record_iostat_latency(sbi);
109141
}
110142

111143
void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
112144
{
145+
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
113146
int i;
114147

115148
spin_lock(&sbi->iostat_lock);
@@ -118,6 +151,10 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
118151
sbi->prev_rw_iostat[i] = 0;
119152
}
120153
spin_unlock(&sbi->iostat_lock);
154+
155+
spin_lock_irq(&sbi->iostat_lat_lock);
156+
memset(io_lat, 0, sizeof(struct iostat_lat_info));
157+
spin_unlock_irq(&sbi->iostat_lat_lock);
121158
}
122159

123160
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
@@ -143,12 +180,108 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
143180
f2fs_record_iostat(sbi);
144181
}
145182

183+
static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
184+
int rw, bool is_sync)
185+
{
186+
unsigned long ts_diff;
187+
unsigned int iotype = iostat_ctx->type;
188+
unsigned long flags;
189+
struct f2fs_sb_info *sbi = iostat_ctx->sbi;
190+
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
191+
int idx;
192+
193+
if (!sbi->iostat_enable)
194+
return;
195+
196+
ts_diff = jiffies - iostat_ctx->submit_ts;
197+
if (iotype >= META_FLUSH)
198+
iotype = META;
199+
200+
if (rw == 0) {
201+
idx = READ_IO;
202+
} else {
203+
if (is_sync)
204+
idx = WRITE_SYNC_IO;
205+
else
206+
idx = WRITE_ASYNC_IO;
207+
}
208+
209+
spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
210+
io_lat->sum_lat[idx][iotype] += ts_diff;
211+
io_lat->bio_cnt[idx][iotype]++;
212+
if (ts_diff > io_lat->peak_lat[idx][iotype])
213+
io_lat->peak_lat[idx][iotype] = ts_diff;
214+
spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
215+
}
216+
217+
void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
218+
{
219+
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
220+
bool is_sync = bio->bi_opf & REQ_SYNC;
221+
222+
if (rw == 0)
223+
bio->bi_private = iostat_ctx->post_read_ctx;
224+
else
225+
bio->bi_private = iostat_ctx->sbi;
226+
__update_iostat_latency(iostat_ctx, rw, is_sync);
227+
mempool_free(iostat_ctx, bio_iostat_ctx_pool);
228+
}
229+
230+
void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
231+
struct bio *bio, struct bio_post_read_ctx *ctx)
232+
{
233+
struct bio_iostat_ctx *iostat_ctx;
234+
/* Due to the mempool, this never fails. */
235+
iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
236+
iostat_ctx->sbi = sbi;
237+
iostat_ctx->submit_ts = 0;
238+
iostat_ctx->type = 0;
239+
iostat_ctx->post_read_ctx = ctx;
240+
bio->bi_private = iostat_ctx;
241+
}
242+
243+
int __init f2fs_init_iostat_processing(void)
244+
{
245+
bio_iostat_ctx_cache =
246+
kmem_cache_create("f2fs_bio_iostat_ctx",
247+
sizeof(struct bio_iostat_ctx), 0, 0, NULL);
248+
if (!bio_iostat_ctx_cache)
249+
goto fail;
250+
bio_iostat_ctx_pool =
251+
mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
252+
bio_iostat_ctx_cache);
253+
if (!bio_iostat_ctx_pool)
254+
goto fail_free_cache;
255+
return 0;
256+
257+
fail_free_cache:
258+
kmem_cache_destroy(bio_iostat_ctx_cache);
259+
fail:
260+
return -ENOMEM;
261+
}
262+
263+
void f2fs_destroy_iostat_processing(void)
264+
{
265+
mempool_destroy(bio_iostat_ctx_pool);
266+
kmem_cache_destroy(bio_iostat_ctx_cache);
267+
}
268+
146269
int f2fs_init_iostat(struct f2fs_sb_info *sbi)
147270
{
148271
/* init iostat info */
149272
spin_lock_init(&sbi->iostat_lock);
273+
spin_lock_init(&sbi->iostat_lat_lock);
150274
sbi->iostat_enable = false;
151275
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
276+
sbi->iostat_io_lat = f2fs_kzalloc(sbi, sizeof(struct iostat_lat_info),
277+
GFP_KERNEL);
278+
if (!sbi->iostat_io_lat)
279+
return -ENOMEM;
152280

153281
return 0;
154282
}
283+
284+
void f2fs_destroy_iostat(struct f2fs_sb_info *sbi)
285+
{
286+
kfree(sbi->iostat_io_lat);
287+
}

fs/f2fs/iostat.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,79 @@
66
#ifndef __F2FS_IOSTAT_H__
77
#define __F2FS_IOSTAT_H__
88

9+
struct bio_post_read_ctx;
10+
911
#ifdef CONFIG_F2FS_IOSTAT
1012

1113
#define DEFAULT_IOSTAT_PERIOD_MS 3000
1214
#define MIN_IOSTAT_PERIOD_MS 100
1315
/* maximum period of iostat tracing is 1 day */
1416
#define MAX_IOSTAT_PERIOD_MS 8640000
1517

18+
enum {
19+
READ_IO,
20+
WRITE_SYNC_IO,
21+
WRITE_ASYNC_IO,
22+
MAX_IO_TYPE,
23+
};
24+
25+
struct iostat_lat_info {
26+
unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* sum of io latencies */
27+
unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* peak io latency */
28+
unsigned int bio_cnt[MAX_IO_TYPE][NR_PAGE_TYPE]; /* bio count */
29+
};
30+
1631
extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
1732
void *offset);
1833
extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi);
1934
extern void f2fs_update_iostat(struct f2fs_sb_info *sbi,
2035
enum iostat_type type, unsigned long long io_bytes);
36+
37+
struct bio_iostat_ctx {
38+
struct f2fs_sb_info *sbi;
39+
unsigned long submit_ts;
40+
enum page_type type;
41+
struct bio_post_read_ctx *post_read_ctx;
42+
};
43+
44+
static inline void iostat_update_submit_ctx(struct bio *bio,
45+
enum page_type type)
46+
{
47+
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
48+
49+
iostat_ctx->submit_ts = jiffies;
50+
iostat_ctx->type = type;
51+
}
52+
53+
static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
54+
{
55+
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
56+
57+
return iostat_ctx->post_read_ctx;
58+
}
59+
60+
extern void iostat_update_and_unbind_ctx(struct bio *bio, int rw);
61+
extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
62+
struct bio *bio, struct bio_post_read_ctx *ctx);
63+
extern int f2fs_init_iostat_processing(void);
64+
extern void f2fs_destroy_iostat_processing(void);
2165
extern int f2fs_init_iostat(struct f2fs_sb_info *sbi);
66+
extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
2267
#else
2368
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
2469
enum iostat_type type, unsigned long long io_bytes) {}
70+
static inline void iostat_update_and_unbind_ctx(struct bio *bio, int rw) {}
71+
static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
72+
struct bio *bio, struct bio_post_read_ctx *ctx) {}
73+
static inline void iostat_update_submit_ctx(struct bio *bio,
74+
enum page_type type) {}
75+
static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
76+
{
77+
return bio->bi_private;
78+
}
79+
static inline int f2fs_init_iostat_processing(void) { return 0; }
80+
static inline void f2fs_destroy_iostat_processing(void) {}
2581
static inline int f2fs_init_iostat(struct f2fs_sb_info *sbi) { return 0; }
82+
static inline void f2fs_destroy_iostat(struct f2fs_sb_info *sbi) {}
2683
#endif
2784
#endif /* __F2FS_IOSTAT_H__ */

fs/f2fs/super.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,6 +1574,7 @@ static void f2fs_put_super(struct super_block *sb)
15741574
#endif
15751575
fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
15761576
destroy_percpu_info(sbi);
1577+
f2fs_destroy_iostat(sbi);
15771578
for (i = 0; i < NR_PAGE_TYPE; i++)
15781579
kvfree(sbi->write_io[i]);
15791580
#ifdef CONFIG_UNICODE
@@ -4001,7 +4002,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
40014002

40024003
err = init_percpu_info(sbi);
40034004
if (err)
4004-
goto free_bio_info;
4005+
goto free_iostat;
40054006

40064007
if (F2FS_IO_ALIGNED(sbi)) {
40074008
sbi->write_io_dummy =
@@ -4334,6 +4335,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
43344335
mempool_destroy(sbi->write_io_dummy);
43354336
free_percpu:
43364337
destroy_percpu_info(sbi);
4338+
free_iostat:
4339+
f2fs_destroy_iostat(sbi);
43374340
free_bio_info:
43384341
for (i = 0; i < NR_PAGE_TYPE; i++)
43394342
kvfree(sbi->write_io[i]);
@@ -4476,9 +4479,12 @@ static int __init init_f2fs_fs(void)
44764479
err = f2fs_init_post_read_processing();
44774480
if (err)
44784481
goto free_root_stats;
4479-
err = f2fs_init_bio_entry_cache();
4482+
err = f2fs_init_iostat_processing();
44804483
if (err)
44814484
goto free_post_read;
4485+
err = f2fs_init_bio_entry_cache();
4486+
if (err)
4487+
goto free_iostat;
44824488
err = f2fs_init_bioset();
44834489
if (err)
44844490
goto free_bio_enrty_cache;
@@ -4500,6 +4506,8 @@ static int __init init_f2fs_fs(void)
45004506
f2fs_destroy_bioset();
45014507
free_bio_enrty_cache:
45024508
f2fs_destroy_bio_entry_cache();
4509+
free_iostat:
4510+
f2fs_destroy_iostat_processing();
45034511
free_post_read:
45044512
f2fs_destroy_post_read_processing();
45054513
free_root_stats:
@@ -4534,6 +4542,7 @@ static void __exit exit_f2fs_fs(void)
45344542
f2fs_destroy_compress_mempool();
45354543
f2fs_destroy_bioset();
45364544
f2fs_destroy_bio_entry_cache();
4545+
f2fs_destroy_iostat_processing();
45374546
f2fs_destroy_post_read_processing();
45384547
f2fs_destroy_root_stats();
45394548
unregister_filesystem(&f2fs_fs_type);

0 commit comments

Comments
 (0)