@@ -4080,13 +4080,98 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
4080
4080
}
4081
4081
}
4082
4082
4083
+ /*
4084
+ * The endio specific version which won't touch any unsafe spinlock in endio
4085
+ * context.
4086
+ */
4087
+ static struct extent_buffer * find_extent_buffer_nolock (
4088
+ struct btrfs_fs_info * fs_info , u64 start )
4089
+ {
4090
+ struct extent_buffer * eb ;
4091
+
4092
+ rcu_read_lock ();
4093
+ eb = radix_tree_lookup (& fs_info -> buffer_radix ,
4094
+ start >> fs_info -> sectorsize_bits );
4095
+ if (eb && atomic_inc_not_zero (& eb -> refs )) {
4096
+ rcu_read_unlock ();
4097
+ return eb ;
4098
+ }
4099
+ rcu_read_unlock ();
4100
+ return NULL ;
4101
+ }
4102
+
4103
+ /*
4104
+ * The endio function for subpage extent buffer write.
4105
+ *
4106
+ * Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
4107
+ * after all extent buffers in the page has finished their writeback.
4108
+ */
4109
+ static void end_bio_subpage_eb_writepage (struct btrfs_fs_info * fs_info ,
4110
+ struct bio * bio )
4111
+ {
4112
+ struct bio_vec * bvec ;
4113
+ struct bvec_iter_all iter_all ;
4114
+
4115
+ ASSERT (!bio_flagged (bio , BIO_CLONED ));
4116
+ bio_for_each_segment_all (bvec , bio , iter_all ) {
4117
+ struct page * page = bvec -> bv_page ;
4118
+ u64 bvec_start = page_offset (page ) + bvec -> bv_offset ;
4119
+ u64 bvec_end = bvec_start + bvec -> bv_len - 1 ;
4120
+ u64 cur_bytenr = bvec_start ;
4121
+
4122
+ ASSERT (IS_ALIGNED (bvec -> bv_len , fs_info -> nodesize ));
4123
+
4124
+ /* Iterate through all extent buffers in the range */
4125
+ while (cur_bytenr <= bvec_end ) {
4126
+ struct extent_buffer * eb ;
4127
+ int done ;
4128
+
4129
+ /*
4130
+ * Here we can't use find_extent_buffer(), as it may
4131
+ * try to lock eb->refs_lock, which is not safe in endio
4132
+ * context.
4133
+ */
4134
+ eb = find_extent_buffer_nolock (fs_info , cur_bytenr );
4135
+ ASSERT (eb );
4136
+
4137
+ cur_bytenr = eb -> start + eb -> len ;
4138
+
4139
+ ASSERT (test_bit (EXTENT_BUFFER_WRITEBACK , & eb -> bflags ));
4140
+ done = atomic_dec_and_test (& eb -> io_pages );
4141
+ ASSERT (done );
4142
+
4143
+ if (bio -> bi_status ||
4144
+ test_bit (EXTENT_BUFFER_WRITE_ERR , & eb -> bflags )) {
4145
+ ClearPageUptodate (page );
4146
+ set_btree_ioerr (page , eb );
4147
+ }
4148
+
4149
+ btrfs_subpage_clear_writeback (fs_info , page , eb -> start ,
4150
+ eb -> len );
4151
+ end_extent_buffer_writeback (eb );
4152
+ /*
4153
+ * free_extent_buffer() will grab spinlock which is not
4154
+ * safe in endio context. Thus here we manually dec
4155
+ * the ref.
4156
+ */
4157
+ atomic_dec (& eb -> refs );
4158
+ }
4159
+ }
4160
+ bio_put (bio );
4161
+ }
4162
+
4083
4163
static void end_bio_extent_buffer_writepage (struct bio * bio )
4084
4164
{
4165
+ struct btrfs_fs_info * fs_info ;
4085
4166
struct bio_vec * bvec ;
4086
4167
struct extent_buffer * eb ;
4087
4168
int done ;
4088
4169
struct bvec_iter_all iter_all ;
4089
4170
4171
+ fs_info = btrfs_sb (bio_first_page_all (bio )-> mapping -> host -> i_sb );
4172
+ if (fs_info -> sectorsize < PAGE_SIZE )
4173
+ return end_bio_subpage_eb_writepage (fs_info , bio );
4174
+
4090
4175
ASSERT (!bio_flagged (bio , BIO_CLONED ));
4091
4176
bio_for_each_segment_all (bvec , bio , iter_all ) {
4092
4177
struct page * page = bvec -> bv_page ;
@@ -5465,36 +5550,28 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
5465
5550
{
5466
5551
struct extent_buffer * eb ;
5467
5552
5468
- rcu_read_lock ();
5469
- eb = radix_tree_lookup (& fs_info -> buffer_radix ,
5470
- start >> fs_info -> sectorsize_bits );
5471
- if (eb && atomic_inc_not_zero (& eb -> refs )) {
5472
- rcu_read_unlock ();
5473
- /*
5474
- * Lock our eb's refs_lock to avoid races with
5475
- * free_extent_buffer. When we get our eb it might be flagged
5476
- * with EXTENT_BUFFER_STALE and another task running
5477
- * free_extent_buffer might have seen that flag set,
5478
- * eb->refs == 2, that the buffer isn't under IO (dirty and
5479
- * writeback flags not set) and it's still in the tree (flag
5480
- * EXTENT_BUFFER_TREE_REF set), therefore being in the process
5481
- * of decrementing the extent buffer's reference count twice.
5482
- * So here we could race and increment the eb's reference count,
5483
- * clear its stale flag, mark it as dirty and drop our reference
5484
- * before the other task finishes executing free_extent_buffer,
5485
- * which would later result in an attempt to free an extent
5486
- * buffer that is dirty.
5487
- */
5488
- if (test_bit (EXTENT_BUFFER_STALE , & eb -> bflags )) {
5489
- spin_lock (& eb -> refs_lock );
5490
- spin_unlock (& eb -> refs_lock );
5491
- }
5492
- mark_extent_buffer_accessed (eb , NULL );
5493
- return eb ;
5553
+ eb = find_extent_buffer_nolock (fs_info , start );
5554
+ if (!eb )
5555
+ return NULL ;
5556
+ /*
5557
+ * Lock our eb's refs_lock to avoid races with free_extent_buffer().
5558
+ * When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
5559
+ * another task running free_extent_buffer() might have seen that flag
5560
+ * set, eb->refs == 2, that the buffer isn't under IO (dirty and
5561
+ * writeback flags not set) and it's still in the tree (flag
5562
+ * EXTENT_BUFFER_TREE_REF set), therefore being in the process of
5563
+ * decrementing the extent buffer's reference count twice. So here we
5564
+ * could race and increment the eb's reference count, clear its stale
5565
+ * flag, mark it as dirty and drop our reference before the other task
5566
+ * finishes executing free_extent_buffer, which would later result in
5567
+ * an attempt to free an extent buffer that is dirty.
5568
+ */
5569
+ if (test_bit (EXTENT_BUFFER_STALE , & eb -> bflags )) {
5570
+ spin_lock (& eb -> refs_lock );
5571
+ spin_unlock (& eb -> refs_lock );
5494
5572
}
5495
- rcu_read_unlock ();
5496
-
5497
- return NULL ;
5573
+ mark_extent_buffer_accessed (eb , NULL );
5574
+ return eb ;
5498
5575
}
5499
5576
5500
5577
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
0 commit comments