Skip to content

Commit 2f52f04

Browse files
Loïc Hoguinmergify[bot]
authored andcommitted
CQ: Fix shared store crashes
Crashes could happen because compaction would wrongly write over valid messages, or truncate over valid messages, because when looking for messages into the files it would encounter leftover data that made it look like there was a message, which prompted compaction to not look for the real messages hidden within. To avoid this we ensure that there can't be leftover data as a result of compaction. We get this guarantee by blanking data in the holes in the file before we start copying messages closer to the start of the file. This requires us to do a few more writes but we know that the only data in the files at any point are valid messages. Note that it's possible that some of the messages in the files are no longer referenced; that's OK. We filter them out after scanning the file. This was also a good time to merge two almost identical scan functions, and be more explicit about what messages should be dropped after scanning the file (the messages no longer in the ets index and the fan-out messages that ended up re-written in a more recent file). (cherry picked from commit fcd011f)
1 parent 78d9cc9 commit 2f52f04

File tree

2 files changed

+46
-25
lines changed

2 files changed

+46
-25
lines changed

deps/rabbit/src/rabbit_msg_store.erl

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,8 +1830,15 @@ compact_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
18301830
%% Open the file.
18311831
FileName = filenum_to_name(File),
18321832
{ok, Fd} = file:open(form_filename(Dir, FileName), [read, write, binary, raw]),
1833-
%% Load the messages.
1834-
Messages = load_and_vacuum_message_file(File, State),
1833+
%% Load the messages. It's possible to get 0 messages here;
1834+
%% that's OK. That means we have little to do as the file is
1835+
%% about to be deleted.
1836+
{Messages, _} = scan_and_vacuum_message_file(File, State),
1837+
%% Blank holes. We must do this first otherwise the file is left
1838+
%% with data that may confuse the code (for example data that looks
1839+
%% like a message, isn't a message, but spans over a real message).
1840+
%% We blank more than is likely required but better safe than sorry.
1841+
blank_holes_in_file(Fd, Messages),
18351842
%% Compact the file.
18361843
{ok, TruncateSize, IndexUpdates} = do_compact_file(Fd, 0, Messages, lists:reverse(Messages), []),
18371844
%% Sync and close the file.
@@ -1876,6 +1883,32 @@ compact_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
18761883
garbage_collect(),
18771884
ok.
18781885

1886+
%% We must special case the blanking of the beginning of the file.
1887+
blank_holes_in_file(Fd, [#msg_location{ offset = Offset }|Tail])
1888+
when Offset =/= 0 ->
1889+
Bytes = <<0:Offset/unit:8>>,
1890+
ok = file:pwrite(Fd, 0, Bytes),
1891+
blank_holes_in_file1(Fd, Tail);
1892+
blank_holes_in_file(Fd, Messages) ->
1893+
blank_holes_in_file1(Fd, Messages).
1894+
1895+
blank_holes_in_file1(Fd, [
1896+
#msg_location{ offset = OneOffset, total_size = OneSize },
1897+
#msg_location{ offset = TwoOffset } = Two
1898+
|Tail]) when OneOffset + OneSize < TwoOffset ->
1899+
Offset = OneOffset + OneSize,
1900+
Size = TwoOffset - Offset,
1901+
Bytes = <<0:Size/unit:8>>,
1902+
ok = file:pwrite(Fd, Offset, Bytes),
1903+
blank_holes_in_file1(Fd, [Two|Tail]);
1904+
%% We either have only one message left, or contiguous messages.
1905+
blank_holes_in_file1(Fd, [_|Tail]) ->
1906+
blank_holes_in_file1(Fd, Tail);
1907+
%% No need to blank the hole past the last message as we will
1908+
%% not write there (no confusion possible) and truncate afterwards.
1909+
blank_holes_in_file1(_, []) ->
1910+
ok.
1911+
18791912
%% If the message at the end fits into the hole we have found, we copy it there.
18801913
%% We will do the ets:updates after the data is synced to disk.
18811914
do_compact_file(Fd, Offset, Start = [#msg_location{ offset = StartMsgOffset }|_],
@@ -1962,27 +1995,7 @@ delete_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
19621995
ok
19631996
end.
19641997

1965-
load_and_vacuum_message_file(File, State = #gc_state{ dir = Dir }) ->
1966-
%% Messages here will be end-of-file at start-of-list
1967-
{ok, Messages, _FileSize} =
1968-
scan_file_for_valid_messages(Dir, filenum_to_name(File)),
1969-
%% foldl will reverse so will end up with msgs in ascending offset order
1970-
lists:foldl(
1971-
fun ({MsgId, TotalSize, Offset}, Acc) ->
1972-
case index_lookup(MsgId, State) of
1973-
#msg_location { file = File, total_size = TotalSize,
1974-
offset = Offset, ref_count = 0 } = Entry ->
1975-
ok = index_delete_object(Entry, State),
1976-
Acc;
1977-
#msg_location { file = File, total_size = TotalSize,
1978-
offset = Offset } = Entry ->
1979-
[ Entry | Acc ];
1980-
_ ->
1981-
Acc
1982-
end
1983-
end, [], Messages).
1984-
1985-
scan_and_vacuum_message_file(File, State = #gc_state { dir = Dir }) ->
1998+
scan_and_vacuum_message_file(File, State = #gc_state{ dir = Dir }) ->
19861999
%% Messages here will be end-of-file at start-of-list
19872000
{ok, Messages, _FileSize} =
19882001
scan_file_for_valid_messages(Dir, filenum_to_name(File)),
@@ -1997,7 +2010,15 @@ scan_and_vacuum_message_file(File, State = #gc_state { dir = Dir }) ->
19972010
#msg_location { file = File, total_size = TotalSize,
19982011
offset = Offset } = Entry ->
19992012
{[ Entry | List ], TotalSize + Size};
2000-
_ ->
2013+
%% Fan-out may remove the entry but also write a new
2014+
%% entry in a different file when it needs to write
2015+
%% a message and the existing reference is in a file
2016+
%% that's about to be deleted. So we explicitly accept
2017+
%% these cases and ignore this message.
2018+
#msg_location { file = OtherFile, total_size = TotalSize }
2019+
when File =/= OtherFile ->
2020+
Acc;
2021+
not_found ->
20012022
Acc
20022023
end
20032024
end, {[], 0}, Messages).

deps/rabbit/test/backing_queue_SUITE.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
% This Source Code Form is subject to the terms of the Mozilla Public
1+
%% This Source Code Form is subject to the terms of the Mozilla Public
22
%% License, v. 2.0. If a copy of the MPL was not distributed with this
33
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
44
%%

0 commit comments

Comments
 (0)