Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit cc423f6

Browse files
committed
Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Mainly core changes, refactoring and optimizations. Performance is improved in some areas, overall there may be a cumulative improvement due to refactoring that removed lookups in the IO path or simplified IO submission tracking. Core: - submit IO synchronously for fast checksums (crc32c and xxhash), remove high priority worker kthread - read extent buffer in one go, simplify IO tracking, bio submission and locking - remove additional tracking of redirtied extent buffers, originally added for zoned mode but actually not needed - track ordered extent pointer in bio to avoid rbtree lookups during IO - scrub, use recovered data stripes as cache to avoid unnecessary read - in zoned mode, optimize logical to physical mappings of extents - remove PageError handling, not set by VFS nor writeback - cleanups, refactoring, better structure packing - lots of error handling improvements - more assertions, lockdep annotations - print assertion failure with the exact line where it happens - tracepoint updates - more debugging prints Performance: - speedup in fsync(), better tracking of inode logged status can avoid transaction commit - IO path structures track logical offsets in data structures and does not need to look it up User visible changes: - don't commit transaction for every created subvolume, this can reduce time when many subvolumes are created in a batch - print affected files when relocation fails - trigger orphan file cleanup during START_SYNC ioctl Notable fixes: - fix crash when disabling quota and relocation - fix crashes when removing roots from drity list - fix transacion abort during relocation when converting from newer profiles not covered by fallback - in zoned mode, stop reclaiming block groups if filesystem becomes read-only - fix rare race condition in tree mod log rewind that can miss some btree node slots - with enabled fsverity, drop up-to-date page bit in case the verification fails" * tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (194 commits) btrfs: fix race between quota disable and relocation btrfs: add comment to struct btrfs_fs_info::dirty_cowonly_roots btrfs: fix race when deleting free space root from the dirty cow roots list btrfs: fix race when deleting quota root from the dirty cow roots list btrfs: tracepoints: also show actual number of the outstanding extents btrfs: update i_version in update_dev_time btrfs: make btrfs_compressed_bioset static btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile btrfs: scrub: remove btrfs_fs_info::scrub_wr_completion_workers btrfs: scrub: remove scrub_ctx::csum_list member btrfs: do not BUG_ON after failure to migrate space during truncation btrfs: do not BUG_ON on failure to get dir index for new snapshot btrfs: send: do not BUG_ON() on unexpected symlink data extent btrfs: do not BUG_ON() when dropping inode items from log root btrfs: replace BUG_ON() at split_item() with proper error handling btrfs: do not BUG_ON() on tree mod log failures at btrfs_del_ptr() btrfs: do not BUG_ON() on tree mod log failures at insert_ptr() btrfs: do not BUG_ON() on tree mod log failure at insert_new_root() btrfs: do not BUG_ON() on tree mod log failures at push_nodes_for_insert() btrfs: abort transaction at update_ref_for_cow() when ref count is zero ...
2 parents e940efa + 8a4a0b2 commit cc423f6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+2737
-2667
lines changed

fs/btrfs/async-thread.c

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
7171
return atomic_read(&wq->pending) > wq->thresh * 2;
7272
}
7373

74+
static void btrfs_init_workqueue(struct btrfs_workqueue *wq,
75+
struct btrfs_fs_info *fs_info)
76+
{
77+
wq->fs_info = fs_info;
78+
atomic_set(&wq->pending, 0);
79+
INIT_LIST_HEAD(&wq->ordered_list);
80+
spin_lock_init(&wq->list_lock);
81+
spin_lock_init(&wq->thres_lock);
82+
}
83+
7484
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
7585
const char *name, unsigned int flags,
7686
int limit_active, int thresh)
@@ -80,9 +90,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
8090
if (!ret)
8191
return NULL;
8292

83-
ret->fs_info = fs_info;
93+
btrfs_init_workqueue(ret, fs_info);
94+
8495
ret->limit_active = limit_active;
85-
atomic_set(&ret->pending, 0);
8696
if (thresh == 0)
8797
thresh = DFT_THRESHOLD;
8898
/* For low threshold, disabling threshold is a better choice */
@@ -106,9 +116,33 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
106116
return NULL;
107117
}
108118

109-
INIT_LIST_HEAD(&ret->ordered_list);
110-
spin_lock_init(&ret->list_lock);
111-
spin_lock_init(&ret->thres_lock);
119+
trace_btrfs_workqueue_alloc(ret, name);
120+
return ret;
121+
}
122+
123+
struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
124+
struct btrfs_fs_info *fs_info, const char *name,
125+
unsigned int flags)
126+
{
127+
struct btrfs_workqueue *ret;
128+
129+
ret = kzalloc(sizeof(*ret), GFP_KERNEL);
130+
if (!ret)
131+
return NULL;
132+
133+
btrfs_init_workqueue(ret, fs_info);
134+
135+
/* Ordered workqueues don't allow @max_active adjustments. */
136+
ret->limit_active = 1;
137+
ret->current_active = 1;
138+
ret->thresh = NO_THRESHOLD;
139+
140+
ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name);
141+
if (!ret->normal_wq) {
142+
kfree(ret);
143+
return NULL;
144+
}
145+
112146
trace_btrfs_workqueue_alloc(ret, name);
113147
return ret;
114148
}

fs/btrfs/async-thread.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
3131
unsigned int flags,
3232
int limit_active,
3333
int thresh);
34+
struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
35+
struct btrfs_fs_info *fs_info, const char *name,
36+
unsigned int flags);
3437
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
3538
btrfs_func_t ordered_func, btrfs_func_t ordered_free);
3639
void btrfs_queue_work(struct btrfs_workqueue *wq,

fs/btrfs/bio.c

Lines changed: 68 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@ struct btrfs_failed_bio {
2727
atomic_t repair_count;
2828
};
2929

30+
/* Is this a data path I/O that needs storage layer checksum and repair? */
31+
static inline bool is_data_bbio(struct btrfs_bio *bbio)
32+
{
33+
return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
34+
}
35+
36+
static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
37+
{
38+
return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
39+
}
40+
3041
/*
3142
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
3243
* is already initialized by the block layer.
@@ -61,20 +72,6 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
6172
return bbio;
6273
}
6374

64-
static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)
65-
{
66-
struct btrfs_ordered_extent *ordered;
67-
int ret;
68-
69-
ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
70-
if (WARN_ON_ONCE(!ordered))
71-
return BLK_STS_IOERR;
72-
ret = btrfs_extract_ordered_extent(bbio, ordered);
73-
btrfs_put_ordered_extent(ordered);
74-
75-
return errno_to_blk_status(ret);
76-
}
77-
7875
static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
7976
struct btrfs_bio *orig_bbio,
8077
u64 map_length, bool use_append)
@@ -95,13 +92,41 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
9592
btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
9693
bbio->inode = orig_bbio->inode;
9794
bbio->file_offset = orig_bbio->file_offset;
98-
if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))
99-
orig_bbio->file_offset += map_length;
100-
95+
orig_bbio->file_offset += map_length;
96+
if (bbio_has_ordered_extent(bbio)) {
97+
refcount_inc(&orig_bbio->ordered->refs);
98+
bbio->ordered = orig_bbio->ordered;
99+
}
101100
atomic_inc(&orig_bbio->pending_ios);
102101
return bbio;
103102
}
104103

104+
/* Free a bio that was never submitted to the underlying device. */
105+
static void btrfs_cleanup_bio(struct btrfs_bio *bbio)
106+
{
107+
if (bbio_has_ordered_extent(bbio))
108+
btrfs_put_ordered_extent(bbio->ordered);
109+
bio_put(&bbio->bio);
110+
}
111+
112+
static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
113+
{
114+
if (bbio_has_ordered_extent(bbio)) {
115+
struct btrfs_ordered_extent *ordered = bbio->ordered;
116+
117+
bbio->end_io(bbio);
118+
btrfs_put_ordered_extent(ordered);
119+
} else {
120+
bbio->end_io(bbio);
121+
}
122+
}
123+
124+
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
125+
{
126+
bbio->bio.bi_status = status;
127+
__btrfs_bio_end_io(bbio);
128+
}
129+
105130
static void btrfs_orig_write_end_io(struct bio *bio);
106131

107132
static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
@@ -130,12 +155,12 @@ static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
130155

131156
if (bbio->bio.bi_status)
132157
btrfs_bbio_propagate_error(bbio, orig_bbio);
133-
bio_put(&bbio->bio);
158+
btrfs_cleanup_bio(bbio);
134159
bbio = orig_bbio;
135160
}
136161

137162
if (atomic_dec_and_test(&bbio->pending_ios))
138-
bbio->end_io(bbio);
163+
__btrfs_bio_end_io(bbio);
139164
}
140165

141166
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
@@ -327,7 +352,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
327352
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
328353

329354
/* Metadata reads are checked and repaired by the submitter. */
330-
if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
355+
if (is_data_bbio(bbio))
331356
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
332357
else
333358
btrfs_orig_bbio_end_io(bbio);
@@ -348,7 +373,7 @@ static void btrfs_simple_end_io(struct bio *bio)
348373
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
349374
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
350375
} else {
351-
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
376+
if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
352377
btrfs_record_physical_zoned(bbio);
353378
btrfs_orig_bbio_end_io(bbio);
354379
}
@@ -361,8 +386,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
361386

362387
btrfs_bio_counter_dec(bioc->fs_info);
363388
bbio->mirror_num = bioc->mirror_num;
364-
if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
365-
!(bbio->bio.bi_opf & REQ_META))
389+
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
366390
btrfs_check_read_bio(bbio, NULL);
367391
else
368392
btrfs_orig_bbio_end_io(bbio);
@@ -472,13 +496,12 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
472496
static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
473497
struct btrfs_io_stripe *smap, int mirror_num)
474498
{
475-
/* Do not leak our private flag into the block layer. */
476-
bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
477-
478499
if (!bioc) {
479500
/* Single mirror read/write fast path. */
480501
btrfs_bio(bio)->mirror_num = mirror_num;
481502
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
503+
if (bio_op(bio) != REQ_OP_READ)
504+
btrfs_bio(bio)->orig_physical = smap->physical;
482505
bio->bi_private = smap->dev;
483506
bio->bi_end_io = btrfs_simple_end_io;
484507
btrfs_submit_dev_bio(smap->dev, bio);
@@ -574,27 +597,20 @@ static void run_one_async_free(struct btrfs_work *work)
574597

575598
static bool should_async_write(struct btrfs_bio *bbio)
576599
{
577-
/*
578-
* If the I/O is not issued by fsync and friends, (->sync_writers != 0),
579-
* then try to defer the submission to a workqueue to parallelize the
580-
* checksum calculation.
581-
*/
582-
if (atomic_read(&bbio->inode->sync_writers))
600+
/* Submit synchronously if the checksum implementation is fast. */
601+
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
583602
return false;
584603

585604
/*
586-
* Submit metadata writes synchronously if the checksum implementation
587-
* is fast, or we are on a zoned device that wants I/O to be submitted
588-
* in order.
605+
* Try to defer the submission to a workqueue to parallelize the
606+
* checksum calculation unless the I/O is issued synchronously.
589607
*/
590-
if (bbio->bio.bi_opf & REQ_META) {
591-
struct btrfs_fs_info *fs_info = bbio->fs_info;
608+
if (op_is_sync(bbio->bio.bi_opf))
609+
return false;
592610

593-
if (btrfs_is_zoned(fs_info))
594-
return false;
595-
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
596-
return false;
597-
}
611+
/* Zoned devices require I/O to be submitted in order. */
612+
if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
613+
return false;
598614

599615
return true;
600616
}
@@ -622,10 +638,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
622638

623639
btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
624640
run_one_async_free);
625-
if (op_is_sync(bbio->bio.bi_opf))
626-
btrfs_queue_work(fs_info->hipri_workers, &async->work);
627-
else
628-
btrfs_queue_work(fs_info->workers, &async->work);
641+
btrfs_queue_work(fs_info->workers, &async->work);
629642
return true;
630643
}
631644

@@ -635,7 +648,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
635648
struct btrfs_fs_info *fs_info = bbio->fs_info;
636649
struct btrfs_bio *orig_bbio = bbio;
637650
struct bio *bio = &bbio->bio;
638-
u64 logical = bio->bi_iter.bi_sector << 9;
651+
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
639652
u64 length = bio->bi_iter.bi_size;
640653
u64 map_length = length;
641654
bool use_append = btrfs_use_zone_append(bbio);
@@ -645,8 +658,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
645658
int error;
646659

647660
btrfs_bio_counter_inc_blocked(fs_info);
648-
error = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
649-
&bioc, &smap, &mirror_num, 1);
661+
error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
662+
&bioc, &smap, &mirror_num, 1);
650663
if (error) {
651664
ret = errno_to_blk_status(error);
652665
goto fail;
@@ -665,7 +678,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
665678
* Save the iter for the end_io handler and preload the checksums for
666679
* data reads.
667680
*/
668-
if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {
681+
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
669682
bbio->saved_iter = bio->bi_iter;
670683
ret = btrfs_lookup_bio_sums(bbio);
671684
if (ret)
@@ -676,9 +689,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
676689
if (use_append) {
677690
bio->bi_opf &= ~REQ_OP_WRITE;
678691
bio->bi_opf |= REQ_OP_ZONE_APPEND;
679-
ret = btrfs_bio_extract_ordered_extent(bbio);
680-
if (ret)
681-
goto fail_put_bio;
682692
}
683693

684694
/*
@@ -695,6 +705,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
695705
ret = btrfs_bio_csum(bbio);
696706
if (ret)
697707
goto fail_put_bio;
708+
} else if (use_append) {
709+
ret = btrfs_alloc_dummy_sum(bbio);
710+
if (ret)
711+
goto fail_put_bio;
698712
}
699713
}
700714

@@ -704,7 +718,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
704718

705719
fail_put_bio:
706720
if (map_length < length)
707-
bio_put(bio);
721+
btrfs_cleanup_bio(bbio);
708722
fail:
709723
btrfs_bio_counter_dec(fs_info);
710724
btrfs_bio_end_io(orig_bbio, ret);

fs/btrfs/bio.h

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,29 @@ struct btrfs_bio {
3939

4040
union {
4141
/*
42-
* Data checksumming and original I/O information for internal
43-
* use in the btrfs_submit_bio machinery.
42+
* For data reads: checksumming and original I/O information.
43+
* (for internal use in the btrfs_submit_bio machinery only)
4444
*/
4545
struct {
4646
u8 *csum;
4747
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
4848
struct bvec_iter saved_iter;
4949
};
5050

51-
/* For metadata parentness verification. */
51+
/*
52+
* For data writes:
53+
* - ordered extent covering the bio
54+
* - pointer to the checksums for this bio
55+
* - original physical address from the allocator
56+
* (for zone append only)
57+
*/
58+
struct {
59+
struct btrfs_ordered_extent *ordered;
60+
struct btrfs_ordered_sum *sums;
61+
u64 orig_physical;
62+
};
63+
64+
/* For metadata reads: parentness verification. */
5265
struct btrfs_tree_parent_check parent_check;
5366
};
5467

@@ -84,15 +97,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
8497
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
8598
struct btrfs_fs_info *fs_info,
8699
btrfs_bio_end_io_t end_io, void *private);
87-
88-
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
89-
{
90-
bbio->bio.bi_status = status;
91-
bbio->end_io(bbio);
92-
}
93-
94-
/* Bio only refers to one ordered extent. */
95-
#define REQ_BTRFS_ONE_ORDERED REQ_DRV
100+
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
96101

97102
/* Submit using blkcg_punt_bio_submit. */
98103
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE

0 commit comments

Comments
 (0)