@@ -1830,8 +1830,15 @@ compact_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
1830
1830
% % Open the file.
1831
1831
FileName = filenum_to_name (File ),
1832
1832
{ok , Fd } = file :open (form_filename (Dir , FileName ), [read , write , binary , raw ]),
1833
- % % Load the messages.
1834
- Messages = load_and_vacuum_message_file (File , State ),
1833
+ % % Load the messages. It's possible to get 0 messages here;
1834
+ % % that's OK. That means we have little to do as the file is
1835
+ % % about to be deleted.
1836
+ {Messages , _ } = scan_and_vacuum_message_file (File , State ),
1837
+ % % Blank holes. We must do this first otherwise the file is left
1838
+ % % with data that may confuse the code (for example data that looks
1839
+ % % like a message, isn't a message, but spans over a real message).
1840
+ % % We blank more than is likely required but better safe than sorry.
1841
+ blank_holes_in_file (Fd , Messages ),
1835
1842
% % Compact the file.
1836
1843
{ok , TruncateSize , IndexUpdates } = do_compact_file (Fd , 0 , Messages , lists :reverse (Messages ), []),
1837
1844
% % Sync and close the file.
@@ -1876,6 +1883,32 @@ compact_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
1876
1883
garbage_collect (),
1877
1884
ok .
1878
1885
1886
+ % % We must special case the blanking of the beginning of the file.
1887
+ blank_holes_in_file (Fd , [# msg_location { offset = Offset }|Tail ])
1888
+ when Offset =/= 0 ->
1889
+ Bytes = <<0 :Offset /unit :8 >>,
1890
+ ok = file :pwrite (Fd , 0 , Bytes ),
1891
+ blank_holes_in_file1 (Fd , Tail );
1892
+ blank_holes_in_file (Fd , Messages ) ->
1893
+ blank_holes_in_file1 (Fd , Messages ).
1894
+
1895
+ blank_holes_in_file1 (Fd , [
1896
+ # msg_location { offset = OneOffset , total_size = OneSize },
1897
+ # msg_location { offset = TwoOffset } = Two
1898
+ |Tail ]) when OneOffset + OneSize < TwoOffset ->
1899
+ Offset = OneOffset + OneSize ,
1900
+ Size = TwoOffset - Offset ,
1901
+ Bytes = <<0 :Size /unit :8 >>,
1902
+ ok = file :pwrite (Fd , Offset , Bytes ),
1903
+ blank_holes_in_file1 (Fd , [Two |Tail ]);
1904
+ % % We either have only one message left, or contiguous messages.
1905
+ blank_holes_in_file1 (Fd , [_ |Tail ]) ->
1906
+ blank_holes_in_file1 (Fd , Tail );
1907
+ % % No need to blank the hole past the last message as we will
1908
+ % % not write there (no confusion possible) and truncate afterwards.
1909
+ blank_holes_in_file1 (_ , []) ->
1910
+ ok .
1911
+
1879
1912
% % If the message at the end fits into the hole we have found, we copy it there.
1880
1913
% % We will do the ets:updates after the data is synced to disk.
1881
1914
do_compact_file (Fd , Offset , Start = [# msg_location { offset = StartMsgOffset }|_ ],
@@ -1962,27 +1995,7 @@ delete_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
1962
1995
ok
1963
1996
end .
1964
1997
1965
- load_and_vacuum_message_file (File , State = # gc_state { dir = Dir }) ->
1966
- % % Messages here will be end-of-file at start-of-list
1967
- {ok , Messages , _FileSize } =
1968
- scan_file_for_valid_messages (Dir , filenum_to_name (File )),
1969
- % % foldl will reverse so will end up with msgs in ascending offset order
1970
- lists :foldl (
1971
- fun ({MsgId , TotalSize , Offset }, Acc ) ->
1972
- case index_lookup (MsgId , State ) of
1973
- # msg_location { file = File , total_size = TotalSize ,
1974
- offset = Offset , ref_count = 0 } = Entry ->
1975
- ok = index_delete_object (Entry , State ),
1976
- Acc ;
1977
- # msg_location { file = File , total_size = TotalSize ,
1978
- offset = Offset } = Entry ->
1979
- [ Entry | Acc ];
1980
- _ ->
1981
- Acc
1982
- end
1983
- end , [], Messages ).
1984
-
1985
- scan_and_vacuum_message_file (File , State = # gc_state { dir = Dir }) ->
1998
+ scan_and_vacuum_message_file (File , State = # gc_state { dir = Dir }) ->
1986
1999
% % Messages here will be end-of-file at start-of-list
1987
2000
{ok , Messages , _FileSize } =
1988
2001
scan_file_for_valid_messages (Dir , filenum_to_name (File )),
@@ -1997,7 +2010,15 @@ scan_and_vacuum_message_file(File, State = #gc_state { dir = Dir }) ->
1997
2010
# msg_location { file = File , total_size = TotalSize ,
1998
2011
offset = Offset } = Entry ->
1999
2012
{[ Entry | List ], TotalSize + Size };
2000
- _ ->
2013
+ % % Fan-out may remove the entry but also write a new
2014
+ % % entry in a different file when it needs to write
2015
+ % % a message and the existing reference is in a file
2016
+ % % that's about to be deleted. So we explicitly accept
2017
+ % % these cases and ignore this message.
2018
+ # msg_location { file = OtherFile , total_size = TotalSize }
2019
+ when File =/= OtherFile ->
2020
+ Acc ;
2021
+ not_found ->
2001
2022
Acc
2002
2023
end
2003
2024
end , {[], 0 }, Messages ).
0 commit comments