@@ -3607,6 +3607,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3607
3607
list_add (& eb -> leak_list , & buffers );
3608
3608
spin_unlock_irqrestore (& leak_lock , flags );
3609
3609
#endif
3610
+ spin_lock_init (& eb -> refs_lock );
3610
3611
atomic_set (& eb -> refs , 1 );
3611
3612
atomic_set (& eb -> pages_reading , 0 );
3612
3613
@@ -3654,6 +3655,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3654
3655
*/
3655
3656
if (PagePrivate (page ) &&
3656
3657
page -> private == (unsigned long )eb ) {
3658
+ BUG_ON (PageDirty (page ));
3659
+ BUG_ON (PageWriteback (page ));
3657
3660
/*
3658
3661
* We need to make sure we haven't be attached
3659
3662
* to a new eb.
@@ -3763,7 +3766,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3763
3766
if (!atomic_inc_not_zero (& exists -> refs )) {
3764
3767
spin_unlock (& tree -> buffer_lock );
3765
3768
radix_tree_preload_end ();
3766
- synchronize_rcu ();
3767
3769
exists = NULL ;
3768
3770
goto again ;
3769
3771
}
@@ -3772,7 +3774,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3772
3774
goto free_eb ;
3773
3775
}
3774
3776
/* add one reference for the tree */
3777
+ spin_lock (& eb -> refs_lock );
3775
3778
atomic_inc (& eb -> refs );
3779
+ set_bit (EXTENT_BUFFER_TREE_REF , & eb -> bflags );
3780
+ spin_unlock (& eb -> refs_lock );
3776
3781
spin_unlock (& tree -> buffer_lock );
3777
3782
radix_tree_preload_end ();
3778
3783
@@ -3823,15 +3828,143 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3823
3828
return NULL ;
3824
3829
}
3825
3830
3831
+ static inline void btrfs_release_extent_buffer_rcu (struct rcu_head * head )
3832
+ {
3833
+ struct extent_buffer * eb =
3834
+ container_of (head , struct extent_buffer , rcu_head );
3835
+
3836
+ __free_extent_buffer (eb );
3837
+ }
3838
+
3839
+ static int extent_buffer_under_io (struct extent_buffer * eb ,
3840
+ struct page * locked_page )
3841
+ {
3842
+ unsigned long num_pages , i ;
3843
+
3844
+ num_pages = num_extent_pages (eb -> start , eb -> len );
3845
+ for (i = 0 ; i < num_pages ; i ++ ) {
3846
+ struct page * page = eb -> pages [i ];
3847
+ int need_unlock = 0 ;
3848
+
3849
+ if (!page )
3850
+ continue ;
3851
+
3852
+ if (page != locked_page ) {
3853
+ if (!trylock_page (page ))
3854
+ return 1 ;
3855
+ need_unlock = 1 ;
3856
+ }
3857
+
3858
+ if (PageDirty (page ) || PageWriteback (page )) {
3859
+ if (need_unlock )
3860
+ unlock_page (page );
3861
+ return 1 ;
3862
+ }
3863
+ if (need_unlock )
3864
+ unlock_page (page );
3865
+ }
3866
+
3867
+ return 0 ;
3868
+ }
3869
+
3870
+ /* Expects to have eb->eb_lock already held */
3871
+ static void release_extent_buffer (struct extent_buffer * eb , gfp_t mask )
3872
+ {
3873
+ WARN_ON (atomic_read (& eb -> refs ) == 0 );
3874
+ if (atomic_dec_and_test (& eb -> refs )) {
3875
+ struct extent_io_tree * tree = eb -> tree ;
3876
+ int ret ;
3877
+
3878
+ spin_unlock (& eb -> refs_lock );
3879
+
3880
+ might_sleep_if (mask & __GFP_WAIT );
3881
+ ret = clear_extent_bit (tree , eb -> start ,
3882
+ eb -> start + eb -> len - 1 , -1 , 0 , 0 ,
3883
+ NULL , mask );
3884
+ if (ret < 0 ) {
3885
+ unsigned long num_pages , i ;
3886
+
3887
+ num_pages = num_extent_pages (eb -> start , eb -> len );
3888
+ /*
3889
+ * We failed to clear the state bits which likely means
3890
+ * ENOMEM, so just re-up the eb ref and continue, we
3891
+ * will get freed later on via releasepage or something
3892
+ * else and will be ok.
3893
+ */
3894
+ spin_lock (& eb -> tree -> mapping -> private_lock );
3895
+ spin_lock (& eb -> refs_lock );
3896
+ set_bit (EXTENT_BUFFER_TREE_REF , & eb -> bflags );
3897
+ atomic_inc (& eb -> refs );
3898
+
3899
+ /*
3900
+ * We may have started to reclaim the pages for a newly
3901
+ * allocated eb, make sure we own all of them again.
3902
+ */
3903
+ for (i = 0 ; i < num_pages ; i ++ ) {
3904
+ struct page * page = eb -> pages [i ];
3905
+
3906
+ if (!page ) {
3907
+ WARN_ON (1 );
3908
+ continue ;
3909
+ }
3910
+
3911
+ BUG_ON (!PagePrivate (page ));
3912
+ if (page -> private != (unsigned long )eb ) {
3913
+ ClearPagePrivate (page );
3914
+ page_cache_release (page );
3915
+ attach_extent_buffer_page (eb , page );
3916
+ }
3917
+ }
3918
+ spin_unlock (& eb -> refs_lock );
3919
+ spin_unlock (& eb -> tree -> mapping -> private_lock );
3920
+ return ;
3921
+ }
3922
+
3923
+ spin_lock (& tree -> buffer_lock );
3924
+ radix_tree_delete (& tree -> buffer ,
3925
+ eb -> start >> PAGE_CACHE_SHIFT );
3926
+ spin_unlock (& tree -> buffer_lock );
3927
+
3928
+ /* Should be safe to release our pages at this point */
3929
+ btrfs_release_extent_buffer_page (eb , 0 );
3930
+
3931
+ call_rcu (& eb -> rcu_head , btrfs_release_extent_buffer_rcu );
3932
+ return ;
3933
+ }
3934
+ spin_unlock (& eb -> refs_lock );
3935
+ }
3936
+
3826
3937
void free_extent_buffer (struct extent_buffer * eb )
3827
3938
{
3828
3939
if (!eb )
3829
3940
return ;
3830
3941
3831
- if (!atomic_dec_and_test (& eb -> refs ))
3942
+ spin_lock (& eb -> refs_lock );
3943
+ if (atomic_read (& eb -> refs ) == 2 &&
3944
+ test_bit (EXTENT_BUFFER_STALE , & eb -> bflags ) &&
3945
+ !extent_buffer_under_io (eb , NULL ) &&
3946
+ test_and_clear_bit (EXTENT_BUFFER_TREE_REF , & eb -> bflags ))
3947
+ atomic_dec (& eb -> refs );
3948
+
3949
+ /*
3950
+ * I know this is terrible, but it's temporary until we stop tracking
3951
+ * the uptodate bits and such for the extent buffers.
3952
+ */
3953
+ release_extent_buffer (eb , GFP_ATOMIC );
3954
+ }
3955
+
3956
+ void free_extent_buffer_stale (struct extent_buffer * eb )
3957
+ {
3958
+ if (!eb )
3832
3959
return ;
3833
3960
3834
- WARN_ON (1 );
3961
+ spin_lock (& eb -> refs_lock );
3962
+ set_bit (EXTENT_BUFFER_STALE , & eb -> bflags );
3963
+
3964
+ if (atomic_read (& eb -> refs ) == 2 && !extent_buffer_under_io (eb , NULL ) &&
3965
+ test_and_clear_bit (EXTENT_BUFFER_TREE_REF , & eb -> bflags ))
3966
+ atomic_dec (& eb -> refs );
3967
+ release_extent_buffer (eb , GFP_NOFS );
3835
3968
}
3836
3969
3837
3970
int clear_extent_buffer_dirty (struct extent_io_tree * tree ,
@@ -3874,6 +4007,7 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
3874
4007
3875
4008
was_dirty = test_and_set_bit (EXTENT_BUFFER_DIRTY , & eb -> bflags );
3876
4009
num_pages = num_extent_pages (eb -> start , eb -> len );
4010
+ WARN_ON (atomic_read (& eb -> refs ) == 0 );
3877
4011
for (i = 0 ; i < num_pages ; i ++ )
3878
4012
__set_page_dirty_nobuffers (extent_buffer_page (eb , i ));
3879
4013
return was_dirty ;
@@ -4440,45 +4574,48 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4440
4574
}
4441
4575
}
4442
4576
4443
- static inline void btrfs_release_extent_buffer_rcu (struct rcu_head * head )
4444
- {
4445
- struct extent_buffer * eb =
4446
- container_of (head , struct extent_buffer , rcu_head );
4447
-
4448
- __free_extent_buffer (eb );
4449
- }
4450
-
4451
- int try_release_extent_buffer (struct extent_io_tree * tree , struct page * page )
4577
+ int try_release_extent_buffer (struct page * page , gfp_t mask )
4452
4578
{
4453
- u64 start = page_offset (page );
4454
- struct extent_buffer * eb = (struct extent_buffer * )page -> private ;
4455
- int ret = 1 ;
4579
+ struct extent_buffer * eb ;
4456
4580
4457
- if (!PagePrivate (page ) || !eb )
4581
+ /*
4582
+ * We need to make sure noboody is attaching this page to an eb right
4583
+ * now.
4584
+ */
4585
+ spin_lock (& page -> mapping -> private_lock );
4586
+ if (!PagePrivate (page )) {
4587
+ spin_unlock (& page -> mapping -> private_lock );
4458
4588
return 1 ;
4589
+ }
4459
4590
4460
- spin_lock (& tree -> buffer_lock );
4461
- if (atomic_read (& eb -> refs ) > 1 ||
4462
- test_bit (EXTENT_BUFFER_DIRTY , & eb -> bflags )) {
4463
- ret = 0 ;
4464
- goto out ;
4591
+ eb = (struct extent_buffer * )page -> private ;
4592
+ BUG_ON (!eb );
4593
+
4594
+ /*
4595
+ * This is a little awful but should be ok, we need to make sure that
4596
+ * the eb doesn't disappear out from under us while we're looking at
4597
+ * this page.
4598
+ */
4599
+ spin_lock (& eb -> refs_lock );
4600
+ if (atomic_read (& eb -> refs ) != 1 || extent_buffer_under_io (eb , page )) {
4601
+ spin_unlock (& eb -> refs_lock );
4602
+ spin_unlock (& page -> mapping -> private_lock );
4603
+ return 0 ;
4465
4604
}
4605
+ spin_unlock (& page -> mapping -> private_lock );
4606
+
4607
+ if ((mask & GFP_NOFS ) == GFP_NOFS )
4608
+ mask = GFP_NOFS ;
4466
4609
4467
4610
/*
4468
- * set @eb->refs to 0 if it is already 1, and then release the @eb.
4469
- * Or go back .
4611
+ * If tree ref isn't set then we know the ref on this eb is a real ref,
4612
+ * so just return, this page will likely be freed soon anyway .
4470
4613
*/
4471
- if (atomic_cmpxchg ( & eb -> refs , 1 , 0 ) != 1 ) {
4472
- ret = 0 ;
4473
- goto out ;
4614
+ if (! test_and_clear_bit ( EXTENT_BUFFER_TREE_REF , & eb -> bflags ) ) {
4615
+ spin_unlock ( & eb -> refs_lock ) ;
4616
+ return 0 ;
4474
4617
}
4475
- radix_tree_delete (& tree -> buffer , start >> PAGE_CACHE_SHIFT );
4476
- btrfs_release_extent_buffer_page (eb , 0 );
4477
- out :
4478
- spin_unlock (& tree -> buffer_lock );
4618
+ release_extent_buffer (eb , mask );
4479
4619
4480
- /* at this point we can safely release the extent buffer */
4481
- if (atomic_read (& eb -> refs ) == 0 )
4482
- call_rcu (& eb -> rcu_head , btrfs_release_extent_buffer_rcu );
4483
- return ret ;
4620
+ return 1 ;
4484
4621
}
0 commit comments