Skip to content

Commit 2f3186d

Browse files
adam900710kdave
authored andcommitted
btrfs: introduce end_bio_subpage_eb_writepage() function
The new function, end_bio_subpage_eb_writepage(), will handle the metadata writeback endio. The major differences involved are: - How to grab extent buffer Now page::private is a pointer to btrfs_subpage, we can no longer grab extent buffer directly. Thus we need to use the bv_offset to locate the extent buffer manually and iterate through the whole range. - Use btrfs_subpage_end_writeback() caller This helper will handle the subpage writeback for us. Since this function is executed under endio context, when grabbing extent buffers it can't grab eb->refs_lock as that lock is not designed to be grabbed under hardirq context. So here introduce a helper, find_extent_buffer_nolock(), for such situation, and convert find_extent_buffer() to use that helper. Signed-off-by: Qu Wenruo <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent fb686c6 commit 2f3186d

File tree

1 file changed

+106
-29
lines changed

1 file changed

+106
-29
lines changed

fs/btrfs/extent_io.c

Lines changed: 106 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4080,13 +4080,98 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
40804080
}
40814081
}
40824082

4083+
/*
4084+
* The endio specific version which won't touch any unsafe spinlock in endio
4085+
* context.
4086+
*/
4087+
static struct extent_buffer *find_extent_buffer_nolock(
4088+
struct btrfs_fs_info *fs_info, u64 start)
4089+
{
4090+
struct extent_buffer *eb;
4091+
4092+
rcu_read_lock();
4093+
eb = radix_tree_lookup(&fs_info->buffer_radix,
4094+
start >> fs_info->sectorsize_bits);
4095+
if (eb && atomic_inc_not_zero(&eb->refs)) {
4096+
rcu_read_unlock();
4097+
return eb;
4098+
}
4099+
rcu_read_unlock();
4100+
return NULL;
4101+
}
4102+
4103+
/*
4104+
* The endio function for subpage extent buffer write.
4105+
*
4106+
* Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
4107+
* after all extent buffers in the page has finished their writeback.
4108+
*/
4109+
static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
4110+
struct bio *bio)
4111+
{
4112+
struct bio_vec *bvec;
4113+
struct bvec_iter_all iter_all;
4114+
4115+
ASSERT(!bio_flagged(bio, BIO_CLONED));
4116+
bio_for_each_segment_all(bvec, bio, iter_all) {
4117+
struct page *page = bvec->bv_page;
4118+
u64 bvec_start = page_offset(page) + bvec->bv_offset;
4119+
u64 bvec_end = bvec_start + bvec->bv_len - 1;
4120+
u64 cur_bytenr = bvec_start;
4121+
4122+
ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
4123+
4124+
/* Iterate through all extent buffers in the range */
4125+
while (cur_bytenr <= bvec_end) {
4126+
struct extent_buffer *eb;
4127+
int done;
4128+
4129+
/*
4130+
* Here we can't use find_extent_buffer(), as it may
4131+
* try to lock eb->refs_lock, which is not safe in endio
4132+
* context.
4133+
*/
4134+
eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
4135+
ASSERT(eb);
4136+
4137+
cur_bytenr = eb->start + eb->len;
4138+
4139+
ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
4140+
done = atomic_dec_and_test(&eb->io_pages);
4141+
ASSERT(done);
4142+
4143+
if (bio->bi_status ||
4144+
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
4145+
ClearPageUptodate(page);
4146+
set_btree_ioerr(page, eb);
4147+
}
4148+
4149+
btrfs_subpage_clear_writeback(fs_info, page, eb->start,
4150+
eb->len);
4151+
end_extent_buffer_writeback(eb);
4152+
/*
4153+
* free_extent_buffer() will grab spinlock which is not
4154+
* safe in endio context. Thus here we manually dec
4155+
* the ref.
4156+
*/
4157+
atomic_dec(&eb->refs);
4158+
}
4159+
}
4160+
bio_put(bio);
4161+
}
4162+
40834163
static void end_bio_extent_buffer_writepage(struct bio *bio)
40844164
{
4165+
struct btrfs_fs_info *fs_info;
40854166
struct bio_vec *bvec;
40864167
struct extent_buffer *eb;
40874168
int done;
40884169
struct bvec_iter_all iter_all;
40894170

4171+
fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
4172+
if (fs_info->sectorsize < PAGE_SIZE)
4173+
return end_bio_subpage_eb_writepage(fs_info, bio);
4174+
40904175
ASSERT(!bio_flagged(bio, BIO_CLONED));
40914176
bio_for_each_segment_all(bvec, bio, iter_all) {
40924177
struct page *page = bvec->bv_page;
@@ -5465,36 +5550,28 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
54655550
{
54665551
struct extent_buffer *eb;
54675552

5468-
rcu_read_lock();
5469-
eb = radix_tree_lookup(&fs_info->buffer_radix,
5470-
start >> fs_info->sectorsize_bits);
5471-
if (eb && atomic_inc_not_zero(&eb->refs)) {
5472-
rcu_read_unlock();
5473-
/*
5474-
* Lock our eb's refs_lock to avoid races with
5475-
* free_extent_buffer. When we get our eb it might be flagged
5476-
* with EXTENT_BUFFER_STALE and another task running
5477-
* free_extent_buffer might have seen that flag set,
5478-
* eb->refs == 2, that the buffer isn't under IO (dirty and
5479-
* writeback flags not set) and it's still in the tree (flag
5480-
* EXTENT_BUFFER_TREE_REF set), therefore being in the process
5481-
* of decrementing the extent buffer's reference count twice.
5482-
* So here we could race and increment the eb's reference count,
5483-
* clear its stale flag, mark it as dirty and drop our reference
5484-
* before the other task finishes executing free_extent_buffer,
5485-
* which would later result in an attempt to free an extent
5486-
* buffer that is dirty.
5487-
*/
5488-
if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
5489-
spin_lock(&eb->refs_lock);
5490-
spin_unlock(&eb->refs_lock);
5491-
}
5492-
mark_extent_buffer_accessed(eb, NULL);
5493-
return eb;
5553+
eb = find_extent_buffer_nolock(fs_info, start);
5554+
if (!eb)
5555+
return NULL;
5556+
/*
5557+
* Lock our eb's refs_lock to avoid races with free_extent_buffer().
5558+
* When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
5559+
* another task running free_extent_buffer() might have seen that flag
5560+
* set, eb->refs == 2, that the buffer isn't under IO (dirty and
5561+
* writeback flags not set) and it's still in the tree (flag
5562+
* EXTENT_BUFFER_TREE_REF set), therefore being in the process of
5563+
* decrementing the extent buffer's reference count twice. So here we
5564+
* could race and increment the eb's reference count, clear its stale
5565+
* flag, mark it as dirty and drop our reference before the other task
5566+
* finishes executing free_extent_buffer, which would later result in
5567+
* an attempt to free an extent buffer that is dirty.
5568+
*/
5569+
if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
5570+
spin_lock(&eb->refs_lock);
5571+
spin_unlock(&eb->refs_lock);
54945572
}
5495-
rcu_read_unlock();
5496-
5497-
return NULL;
5573+
mark_extent_buffer_accessed(eb, NULL);
5574+
return eb;
54985575
}
54995576

55005577
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS

0 commit comments

Comments
 (0)