@@ -743,8 +743,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
743
743
static struct {
744
744
struct list_head idle_ws ;
745
745
spinlock_t ws_lock ;
746
- int num_ws ;
747
- atomic_t alloc_ws ;
746
+ /* Number of free workspaces */
747
+ int free_ws ;
748
+ /* Total number of allocated workspaces */
749
+ atomic_t total_ws ;
750
+ /* Waiters for a free workspace */
748
751
wait_queue_head_t ws_wait ;
749
752
} btrfs_comp_ws [BTRFS_COMPRESS_TYPES ];
750
753
@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
758
761
int i ;
759
762
760
763
for (i = 0 ; i < BTRFS_COMPRESS_TYPES ; i ++ ) {
764
+ struct list_head * workspace ;
765
+
761
766
INIT_LIST_HEAD (& btrfs_comp_ws [i ].idle_ws );
762
767
spin_lock_init (& btrfs_comp_ws [i ].ws_lock );
763
- atomic_set (& btrfs_comp_ws [i ].alloc_ws , 0 );
768
+ atomic_set (& btrfs_comp_ws [i ].total_ws , 0 );
764
769
init_waitqueue_head (& btrfs_comp_ws [i ].ws_wait );
770
+
771
+ /*
772
+ * Preallocate one workspace for each compression type so
773
+ * we can guarantee forward progress in the worst case
774
+ */
775
+ workspace = btrfs_compress_op [i ]-> alloc_workspace ();
776
+ if (IS_ERR (workspace )) {
777
+ printk (KERN_WARNING
778
+ "BTRFS: cannot preallocate compression workspace, will try later" );
779
+ } else {
780
+ atomic_set (& btrfs_comp_ws [i ].total_ws , 1 );
781
+ btrfs_comp_ws [i ].free_ws = 1 ;
782
+ list_add (workspace , & btrfs_comp_ws [i ].idle_ws );
783
+ }
765
784
}
766
785
}
767
786
768
787
/*
769
- * this finds an available workspace or allocates a new one
770
- * ERR_PTR is returned if things go bad.
788
+ * This finds an available workspace or allocates a new one.
789
+ * If it's not possible to allocate a new one, waits until there's one.
790
+ * Preallocation makes a forward progress guarantees and we do not return
791
+ * errors.
771
792
*/
772
793
static struct list_head * find_workspace (int type )
773
794
{
@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)
777
798
778
799
struct list_head * idle_ws = & btrfs_comp_ws [idx ].idle_ws ;
779
800
spinlock_t * ws_lock = & btrfs_comp_ws [idx ].ws_lock ;
780
- atomic_t * alloc_ws = & btrfs_comp_ws [idx ].alloc_ws ;
801
+ atomic_t * total_ws = & btrfs_comp_ws [idx ].total_ws ;
781
802
wait_queue_head_t * ws_wait = & btrfs_comp_ws [idx ].ws_wait ;
782
- int * num_ws = & btrfs_comp_ws [idx ].num_ws ;
803
+ int * free_ws = & btrfs_comp_ws [idx ].free_ws ;
783
804
again :
784
805
spin_lock (ws_lock );
785
806
if (!list_empty (idle_ws )) {
786
807
workspace = idle_ws -> next ;
787
808
list_del (workspace );
788
- (* num_ws )-- ;
809
+ (* free_ws )-- ;
789
810
spin_unlock (ws_lock );
790
811
return workspace ;
791
812
792
813
}
793
- if (atomic_read (alloc_ws ) > cpus ) {
814
+ if (atomic_read (total_ws ) > cpus ) {
794
815
DEFINE_WAIT (wait );
795
816
796
817
spin_unlock (ws_lock );
797
818
prepare_to_wait (ws_wait , & wait , TASK_UNINTERRUPTIBLE );
798
- if (atomic_read (alloc_ws ) > cpus && !* num_ws )
819
+ if (atomic_read (total_ws ) > cpus && !* free_ws )
799
820
schedule ();
800
821
finish_wait (ws_wait , & wait );
801
822
goto again ;
802
823
}
803
- atomic_inc (alloc_ws );
824
+ atomic_inc (total_ws );
804
825
spin_unlock (ws_lock );
805
826
806
827
workspace = btrfs_compress_op [idx ]-> alloc_workspace ();
807
828
if (IS_ERR (workspace )) {
808
- atomic_dec (alloc_ws );
829
+ atomic_dec (total_ws );
809
830
wake_up (ws_wait );
831
+
832
+ /*
833
+ * Do not return the error but go back to waiting. There's a
834
+ * workspace preallocated for each type and the compression
835
+ * time is bounded so we get to a workspace eventually. This
836
+ * makes our caller's life easier.
837
+ *
838
+ * To prevent silent and low-probability deadlocks (when the
839
+ * initial preallocation fails), check if there are any
840
+ * workspaces at all.
841
+ */
842
+ if (atomic_read (total_ws ) == 0 ) {
843
+ static DEFINE_RATELIMIT_STATE (_rs ,
844
+ /* once per minute */ 60 * HZ ,
845
+ /* no burst */ 1 );
846
+
847
+ if (__ratelimit (& _rs )) {
848
+ printk (KERN_WARNING
849
+ "no compression workspaces, low memory, retrying" );
850
+ }
851
+ }
852
+ goto again ;
810
853
}
811
854
return workspace ;
812
855
}
@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
820
863
int idx = type - 1 ;
821
864
struct list_head * idle_ws = & btrfs_comp_ws [idx ].idle_ws ;
822
865
spinlock_t * ws_lock = & btrfs_comp_ws [idx ].ws_lock ;
823
- atomic_t * alloc_ws = & btrfs_comp_ws [idx ].alloc_ws ;
866
+ atomic_t * total_ws = & btrfs_comp_ws [idx ].total_ws ;
824
867
wait_queue_head_t * ws_wait = & btrfs_comp_ws [idx ].ws_wait ;
825
- int * num_ws = & btrfs_comp_ws [idx ].num_ws ;
868
+ int * free_ws = & btrfs_comp_ws [idx ].free_ws ;
826
869
827
870
spin_lock (ws_lock );
828
- if (* num_ws < num_online_cpus ()) {
871
+ if (* free_ws < num_online_cpus ()) {
829
872
list_add (workspace , idle_ws );
830
- (* num_ws )++ ;
873
+ (* free_ws )++ ;
831
874
spin_unlock (ws_lock );
832
875
goto wake ;
833
876
}
834
877
spin_unlock (ws_lock );
835
878
836
879
btrfs_compress_op [idx ]-> free_workspace (workspace );
837
- atomic_dec (alloc_ws );
880
+ atomic_dec (total_ws );
838
881
wake :
839
882
/*
840
883
* Make sure counter is updated before we wake up waiters.
@@ -857,7 +900,7 @@ static void free_workspaces(void)
857
900
workspace = btrfs_comp_ws [i ].idle_ws .next ;
858
901
list_del (workspace );
859
902
btrfs_compress_op [i ]-> free_workspace (workspace );
860
- atomic_dec (& btrfs_comp_ws [i ].alloc_ws );
903
+ atomic_dec (& btrfs_comp_ws [i ].total_ws );
861
904
}
862
905
}
863
906
}
@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
894
937
int ret ;
895
938
896
939
workspace = find_workspace (type );
897
- if (IS_ERR (workspace ))
898
- return PTR_ERR (workspace );
899
940
900
941
ret = btrfs_compress_op [type - 1 ]-> compress_pages (workspace , mapping ,
901
942
start , len , pages ,
@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
930
971
int ret ;
931
972
932
973
workspace = find_workspace (type );
933
- if (IS_ERR (workspace ))
934
- return PTR_ERR (workspace );
935
974
936
975
ret = btrfs_compress_op [type - 1 ]-> decompress_biovec (workspace , pages_in ,
937
976
disk_start ,
@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
952
991
int ret ;
953
992
954
993
workspace = find_workspace (type );
955
- if (IS_ERR (workspace ))
956
- return PTR_ERR (workspace );
957
994
958
995
ret = btrfs_compress_op [type - 1 ]-> decompress (workspace , data_in ,
959
996
dest_page , start_byte ,
0 commit comments