Skip to content

Commit 9e0af23

Browse files
Liu Bomasoncl
authored andcommitted
Btrfs: fix task hang under heavy compressed write
This has been reported and discussed for a long time, and this hang occurs in both 3.15 and 3.16. Btrfs now migrates to use kernel workqueue, but it introduces this hang problem. Btrfs has a kind of work queued as an ordered way, which means that its ordered_func() must be processed in the way of FIFO, so it usually looks like -- normal_work_helper(arg) work = container_of(arg, struct btrfs_work, normal_work); work->func() <---- (we name it work X) for ordered_work in wq->ordered_list ordered_work->ordered_func() ordered_work->ordered_free() The hang is a rare case, first when we find free space, we get an uncached block group, then we go to read its free space cache inode for free space information, so it will file a readahead request btrfs_readpages() for page that is not in page cache __do_readpage() submit_extent_page() btrfs_submit_bio_hook() btrfs_bio_wq_end_io() submit_bio() end_workqueue_bio() <--(ret by the 1st endio) queue a work(named work Y) for the 2nd also the real endio() So the hang occurs when work Y's work_struct and work X's work_struct happens to share the same address. A bit more explanation, A,B,C -- struct btrfs_work arg -- struct work_struct kthread: worker_thread() pick up a work_struct from @workList process_one_work(arg) worker->current_work = arg; <-- arg is A->normal_work worker->current_func(arg) normal_work_helper(arg) A = container_of(arg, struct btrfs_work, normal_work); A->func() A->ordered_func() A->ordered_free() <-- A gets freed B->ordered_func() submit_compressed_extents() find_free_extent() load_free_space_inode() ... <-- (the above readhead stack) end_workqueue_bio() btrfs_queue_work(work C) B->ordered_free() As if work A has a high priority in wq->ordered_list and there are more ordered works queued after it, such as B->ordered_func(), its memory could have been freed before normal_work_helper() returns, which means that kernel workqueue code worker_thread() still has worker->current_work pointer to be work A->normal_work's, ie. arg's address. Meanwhile, work C is allocated after work A is freed, work C->normal_work and work A->normal_work are likely to share the same address(I confirmed this with ftrace output, so I'm not just guessing, it's rare though). When another kthread picks up work C->normal_work to process, and finds our kthread is processing it(see find_worker_executing_work()), it'll think work C as a collision and skip then, which ends up nobody processing work C. So the situation is that our kthread is waiting forever on work C. Besides, there're other cases that can lead to deadlock, but the real problem is that all btrfs workqueue shares one work->func, -- normal_work_helper, so this makes each workqueue to have its own helper function, but only a wraper pf normal_work_helper. With this patch, I no long hit the above hang. Signed-off-by: Liu Bo <[email protected]> Signed-off-by: Chris Mason <[email protected]>
1 parent f6dc45c commit 9e0af23

File tree

12 files changed

+141
-61
lines changed

12 files changed

+141
-61
lines changed

fs/btrfs/async-thread.c

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include <linux/list.h>
2323
#include <linux/spinlock.h>
2424
#include <linux/freezer.h>
25-
#include <linux/workqueue.h>
2625
#include "async-thread.h"
2726
#include "ctree.h"
2827

@@ -55,8 +54,39 @@ struct btrfs_workqueue {
5554
struct __btrfs_workqueue *high;
5655
};
5756

58-
static inline struct __btrfs_workqueue
59-
*__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
57+
static void normal_work_helper(struct btrfs_work *work);
58+
59+
#define BTRFS_WORK_HELPER(name) \
60+
void btrfs_##name(struct work_struct *arg) \
61+
{ \
62+
struct btrfs_work *work = container_of(arg, struct btrfs_work, \
63+
normal_work); \
64+
normal_work_helper(work); \
65+
}
66+
67+
BTRFS_WORK_HELPER(worker_helper);
68+
BTRFS_WORK_HELPER(delalloc_helper);
69+
BTRFS_WORK_HELPER(flush_delalloc_helper);
70+
BTRFS_WORK_HELPER(cache_helper);
71+
BTRFS_WORK_HELPER(submit_helper);
72+
BTRFS_WORK_HELPER(fixup_helper);
73+
BTRFS_WORK_HELPER(endio_helper);
74+
BTRFS_WORK_HELPER(endio_meta_helper);
75+
BTRFS_WORK_HELPER(endio_meta_write_helper);
76+
BTRFS_WORK_HELPER(endio_raid56_helper);
77+
BTRFS_WORK_HELPER(rmw_helper);
78+
BTRFS_WORK_HELPER(endio_write_helper);
79+
BTRFS_WORK_HELPER(freespace_write_helper);
80+
BTRFS_WORK_HELPER(delayed_meta_helper);
81+
BTRFS_WORK_HELPER(readahead_helper);
82+
BTRFS_WORK_HELPER(qgroup_rescan_helper);
83+
BTRFS_WORK_HELPER(extent_refs_helper);
84+
BTRFS_WORK_HELPER(scrub_helper);
85+
BTRFS_WORK_HELPER(scrubwrc_helper);
86+
BTRFS_WORK_HELPER(scrubnc_helper);
87+
88+
static struct __btrfs_workqueue *
89+
__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
6090
int thresh)
6191
{
6292
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
232262
spin_unlock_irqrestore(lock, flags);
233263
}
234264

235-
static void normal_work_helper(struct work_struct *arg)
265+
static void normal_work_helper(struct btrfs_work *work)
236266
{
237-
struct btrfs_work *work;
238267
struct __btrfs_workqueue *wq;
239268
int need_order = 0;
240269

241-
work = container_of(arg, struct btrfs_work, normal_work);
242270
/*
243271
* We should not touch things inside work in the following cases:
244272
* 1) after work->func() if it has no ordered_free
@@ -262,15 +290,15 @@ static void normal_work_helper(struct work_struct *arg)
262290
trace_btrfs_all_work_done(work);
263291
}
264292

265-
void btrfs_init_work(struct btrfs_work *work,
293+
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
266294
btrfs_func_t func,
267295
btrfs_func_t ordered_func,
268296
btrfs_func_t ordered_free)
269297
{
270298
work->func = func;
271299
work->ordered_func = ordered_func;
272300
work->ordered_free = ordered_free;
273-
INIT_WORK(&work->normal_work, normal_work_helper);
301+
INIT_WORK(&work->normal_work, uniq_func);
274302
INIT_LIST_HEAD(&work->ordered_list);
275303
work->flags = 0;
276304
}

fs/btrfs/async-thread.h

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919

2020
#ifndef __BTRFS_ASYNC_THREAD_
2121
#define __BTRFS_ASYNC_THREAD_
22+
#include <linux/workqueue.h>
2223

2324
struct btrfs_workqueue;
2425
/* Internal use only */
2526
struct __btrfs_workqueue;
2627
struct btrfs_work;
2728
typedef void (*btrfs_func_t)(struct btrfs_work *arg);
29+
typedef void (*btrfs_work_func_t)(struct work_struct *arg);
2830

2931
struct btrfs_work {
3032
btrfs_func_t func;
@@ -38,11 +40,35 @@ struct btrfs_work {
3840
unsigned long flags;
3941
};
4042

43+
#define BTRFS_WORK_HELPER_PROTO(name) \
44+
void btrfs_##name(struct work_struct *arg)
45+
46+
BTRFS_WORK_HELPER_PROTO(worker_helper);
47+
BTRFS_WORK_HELPER_PROTO(delalloc_helper);
48+
BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
49+
BTRFS_WORK_HELPER_PROTO(cache_helper);
50+
BTRFS_WORK_HELPER_PROTO(submit_helper);
51+
BTRFS_WORK_HELPER_PROTO(fixup_helper);
52+
BTRFS_WORK_HELPER_PROTO(endio_helper);
53+
BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
54+
BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
55+
BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
56+
BTRFS_WORK_HELPER_PROTO(rmw_helper);
57+
BTRFS_WORK_HELPER_PROTO(endio_write_helper);
58+
BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
59+
BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
60+
BTRFS_WORK_HELPER_PROTO(readahead_helper);
61+
BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
62+
BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
63+
BTRFS_WORK_HELPER_PROTO(scrub_helper);
64+
BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
65+
BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
66+
4167
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
4268
int flags,
4369
int max_active,
4470
int thresh);
45-
void btrfs_init_work(struct btrfs_work *work,
71+
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
4672
btrfs_func_t func,
4773
btrfs_func_t ordered_func,
4874
btrfs_func_t ordered_free);

fs/btrfs/delayed-inode.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
13951395
return -ENOMEM;
13961396

13971397
async_work->delayed_root = delayed_root;
1398-
btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root,
1399-
NULL, NULL);
1398+
btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1399+
btrfs_async_run_delayed_root, NULL, NULL);
14001400
async_work->nr = nr;
14011401

14021402
btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);

fs/btrfs/disk-io.c

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
#include "btrfs_inode.h"
4040
#include "volumes.h"
4141
#include "print-tree.h"
42-
#include "async-thread.h"
4342
#include "locking.h"
4443
#include "tree-log.h"
4544
#include "free-space-cache.h"
@@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err)
693692
{
694693
struct end_io_wq *end_io_wq = bio->bi_private;
695694
struct btrfs_fs_info *fs_info;
695+
struct btrfs_workqueue *wq;
696+
btrfs_work_func_t func;
696697

697698
fs_info = end_io_wq->info;
698699
end_io_wq->error = err;
699-
btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
700700

701701
if (bio->bi_rw & REQ_WRITE) {
702-
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
703-
btrfs_queue_work(fs_info->endio_meta_write_workers,
704-
&end_io_wq->work);
705-
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
706-
btrfs_queue_work(fs_info->endio_freespace_worker,
707-
&end_io_wq->work);
708-
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
709-
btrfs_queue_work(fs_info->endio_raid56_workers,
710-
&end_io_wq->work);
711-
else
712-
btrfs_queue_work(fs_info->endio_write_workers,
713-
&end_io_wq->work);
702+
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
703+
wq = fs_info->endio_meta_write_workers;
704+
func = btrfs_endio_meta_write_helper;
705+
} else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
706+
wq = fs_info->endio_freespace_worker;
707+
func = btrfs_freespace_write_helper;
708+
} else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
709+
wq = fs_info->endio_raid56_workers;
710+
func = btrfs_endio_raid56_helper;
711+
} else {
712+
wq = fs_info->endio_write_workers;
713+
func = btrfs_endio_write_helper;
714+
}
714715
} else {
715-
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
716-
btrfs_queue_work(fs_info->endio_raid56_workers,
717-
&end_io_wq->work);
718-
else if (end_io_wq->metadata)
719-
btrfs_queue_work(fs_info->endio_meta_workers,
720-
&end_io_wq->work);
721-
else
722-
btrfs_queue_work(fs_info->endio_workers,
723-
&end_io_wq->work);
716+
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
717+
wq = fs_info->endio_raid56_workers;
718+
func = btrfs_endio_raid56_helper;
719+
} else if (end_io_wq->metadata) {
720+
wq = fs_info->endio_meta_workers;
721+
func = btrfs_endio_meta_helper;
722+
} else {
723+
wq = fs_info->endio_workers;
724+
func = btrfs_endio_helper;
725+
}
724726
}
727+
728+
btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
729+
btrfs_queue_work(wq, &end_io_wq->work);
725730
}
726731

727732
/*
@@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
828833
async->submit_bio_start = submit_bio_start;
829834
async->submit_bio_done = submit_bio_done;
830835

831-
btrfs_init_work(&async->work, run_one_async_start,
836+
btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
832837
run_one_async_done, run_one_async_free);
833838

834839
async->bio_flags = bio_flags;

fs/btrfs/extent-tree.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
552552
caching_ctl->block_group = cache;
553553
caching_ctl->progress = cache->key.objectid;
554554
atomic_set(&caching_ctl->count, 1);
555-
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
555+
btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
556+
caching_thread, NULL, NULL);
556557

557558
spin_lock(&cache->lock);
558559
/*
@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
27492750
async->sync = 0;
27502751
init_completion(&async->wait);
27512752

2752-
btrfs_init_work(&async->work, delayed_ref_async_start,
2753-
NULL, NULL);
2753+
btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2754+
delayed_ref_async_start, NULL, NULL);
27542755

27552756
btrfs_queue_work(root->fs_info->extent_workers, &async->work);
27562757

fs/btrfs/inode.c

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
10961096
async_cow->end = cur_end;
10971097
INIT_LIST_HEAD(&async_cow->extents);
10981098

1099-
btrfs_init_work(&async_cow->work, async_cow_start,
1100-
async_cow_submit, async_cow_free);
1099+
btrfs_init_work(&async_cow->work,
1100+
btrfs_delalloc_helper,
1101+
async_cow_start, async_cow_submit,
1102+
async_cow_free);
11011103

11021104
nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
11031105
PAGE_CACHE_SHIFT;
@@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
18811883

18821884
SetPageChecked(page);
18831885
page_cache_get(page);
1884-
btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
1886+
btrfs_init_work(&fixup->work, btrfs_fixup_helper,
1887+
btrfs_writepage_fixup_worker, NULL, NULL);
18851888
fixup->page = page;
18861889
btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
18871890
return -EBUSY;
@@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
28222825
struct inode *inode = page->mapping->host;
28232826
struct btrfs_root *root = BTRFS_I(inode)->root;
28242827
struct btrfs_ordered_extent *ordered_extent = NULL;
2825-
struct btrfs_workqueue *workers;
2828+
struct btrfs_workqueue *wq;
2829+
btrfs_work_func_t func;
28262830

28272831
trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
28282832

@@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
28312835
end - start + 1, uptodate))
28322836
return 0;
28332837

2834-
btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
2838+
if (btrfs_is_free_space_inode(inode)) {
2839+
wq = root->fs_info->endio_freespace_worker;
2840+
func = btrfs_freespace_write_helper;
2841+
} else {
2842+
wq = root->fs_info->endio_write_workers;
2843+
func = btrfs_endio_write_helper;
2844+
}
28352845

2836-
if (btrfs_is_free_space_inode(inode))
2837-
workers = root->fs_info->endio_freespace_worker;
2838-
else
2839-
workers = root->fs_info->endio_write_workers;
2840-
btrfs_queue_work(workers, &ordered_extent->work);
2846+
btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
2847+
NULL);
2848+
btrfs_queue_work(wq, &ordered_extent->work);
28412849

28422850
return 0;
28432851
}
@@ -7208,7 +7216,8 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
72087216
if (!ret)
72097217
goto out_test;
72107218

7211-
btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
7219+
btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
7220+
finish_ordered_fn, NULL, NULL);
72127221
btrfs_queue_work(root->fs_info->endio_write_workers,
72137222
&ordered->work);
72147223
out_test:
@@ -8535,7 +8544,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
85358544
work->inode = inode;
85368545
work->wait = wait;
85378546
work->delay_iput = delay_iput;
8538-
btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
8547+
WARN_ON_ONCE(!inode);
8548+
btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
8549+
btrfs_run_delalloc_work, NULL, NULL);
85398550

85408551
return work;
85418552
}

fs/btrfs/ordered-data.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
615615
spin_unlock(&root->ordered_extent_lock);
616616

617617
btrfs_init_work(&ordered->flush_work,
618+
btrfs_flush_delalloc_helper,
618619
btrfs_run_ordered_extent_work, NULL, NULL);
619620
list_add_tail(&ordered->work_list, &works);
620621
btrfs_queue_work(root->fs_info->flush_workers,

fs/btrfs/qgroup.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
27202720
memset(&fs_info->qgroup_rescan_work, 0,
27212721
sizeof(fs_info->qgroup_rescan_work));
27222722
btrfs_init_work(&fs_info->qgroup_rescan_work,
2723+
btrfs_qgroup_rescan_helper,
27232724
btrfs_qgroup_rescan_worker, NULL, NULL);
27242725

27252726
if (ret) {

fs/btrfs/raid56.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,15 +1416,17 @@ static void raid_rmw_end_io(struct bio *bio, int err)
14161416

14171417
static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
14181418
{
1419-
btrfs_init_work(&rbio->work, rmw_work, NULL, NULL);
1419+
btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1420+
rmw_work, NULL, NULL);
14201421

14211422
btrfs_queue_work(rbio->fs_info->rmw_workers,
14221423
&rbio->work);
14231424
}
14241425

14251426
static void async_read_rebuild(struct btrfs_raid_bio *rbio)
14261427
{
1427-
btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL);
1428+
btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1429+
read_rebuild_work, NULL, NULL);
14281430

14291431
btrfs_queue_work(rbio->fs_info->rmw_workers,
14301432
&rbio->work);
@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
16651667
plug = container_of(cb, struct btrfs_plug_cb, cb);
16661668

16671669
if (from_schedule) {
1668-
btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
1670+
btrfs_init_work(&plug->work, btrfs_rmw_helper,
1671+
unplug_work, NULL, NULL);
16691672
btrfs_queue_work(plug->info->rmw_workers,
16701673
&plug->work);
16711674
return;

fs/btrfs/reada.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
798798
/* FIXME we cannot handle this properly right now */
799799
BUG();
800800
}
801-
btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
801+
btrfs_init_work(&rmw->work, btrfs_readahead_helper,
802+
reada_start_machine_worker, NULL, NULL);
802803
rmw->fs_info = fs_info;
803804

804805
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);

0 commit comments

Comments
 (0)