Skip to content

Commit b604bb3

Browse files
committed
Fix rabbit_fifo poison handling
By ensuring the delivery count is retained when "dehydrating" the state in preparation for snapshotting. Now the entire message header map is stored which will take additional space w.r.t to keynamne duplication although this can be optimised. [#163513253]
1 parent 55a7c55 commit b604bb3

File tree

2 files changed

+123
-54
lines changed

2 files changed

+123
-54
lines changed

src/rabbit_fifo.erl

Lines changed: 67 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@
8181
%% in enqueue messages. Used to ensure ordering of messages send from the
8282
%% same process
8383

84-
-type msg_header() :: #{delivery_count => non_neg_integer()}.
84+
-type msg_header() :: #{size := msg_size(),
85+
delivery_count => non_neg_integer()}.
8586
%% The message header map:
8687
%% delivery_count: the number of unsuccessful delivery attempts.
8788
%% A non-zero value indicates a previous attempt.
@@ -94,7 +95,7 @@
9495

9596
-type indexed_msg() :: {ra_index(), msg()}.
9697

97-
-type prefix_msg() :: {'$prefix_msg', msg_size()}.
98+
-type prefix_msg() :: {'$prefix_msg', msg_header()}.
9899

99100
-type delivery_msg() :: {msg_id(), msg()}.
100101
%% A tuple consisting of the message id and the headered message.
@@ -242,8 +243,8 @@
242243
%% overflow calculations).
243244
%% This is done so that consumers are still served in a deterministic
244245
%% order on recovery.
245-
prefix_msgs = {[], []} :: {Return :: [msg_size()],
246-
PrefixMsgs :: [msg_size()]},
246+
prefix_msgs = {[], []} :: {Return :: [msg_header()],
247+
PrefixMsgs :: [msg_header()]},
247248
msg_bytes_enqueue = 0 :: non_neg_integer(),
248249
msg_bytes_checkout = 0 :: non_neg_integer(),
249250
max_length :: maybe(non_neg_integer()),
@@ -970,11 +971,9 @@ maybe_return_all(ConsumerId, #consumer{checked_out = Checked0} = Consumer, Cons1
970971
end.
971972

972973
apply_enqueue(#{index := RaftIdx} = Meta, From, Seq, RawMsg, State0) ->
973-
Bytes = message_size(RawMsg),
974974
case maybe_enqueue(RaftIdx, From, Seq, RawMsg, [], State0) of
975975
{ok, State1, Effects1} ->
976-
State2 = append_to_master_index(RaftIdx,
977-
add_bytes_enqueue(Bytes, State1)),
976+
State2 = append_to_master_index(RaftIdx, State1),
978977
{State, ok, Effects} = checkout(Meta, State2, Effects1),
979978
{maybe_store_dehydrated_state(RaftIdx, State), ok, Effects};
980979
{duplicate, State, Effects} ->
@@ -991,7 +990,7 @@ drop_head(#state{ra_indexes = Indexes0} = State0, Effects0) ->
991990
Effects = dead_letter_effects(maxlen, maps:put(none, FullMsg, #{}),
992991
State, Effects0),
993992
{State, Effects};
994-
{{'$prefix_msg', Bytes}, State1} ->
993+
{{'$prefix_msg', #{size := Bytes}}, State1} ->
995994
State = add_bytes_drop(Bytes, State1),
996995
{State, Effects0};
997996
empty ->
@@ -1001,12 +1000,14 @@ drop_head(#state{ra_indexes = Indexes0} = State0, Effects0) ->
10011000
enqueue(RaftIdx, RawMsg, #state{messages = Messages,
10021001
low_msg_num = LowMsgNum,
10031002
next_msg_num = NextMsgNum} = State0) ->
1004-
Msg = {RaftIdx, {#{}, RawMsg}}, % indexed message with header map
1005-
State0#state{messages = Messages#{NextMsgNum => Msg},
1006-
% this is probably only done to record it when low_msg_num
1007-
% is undefined
1008-
low_msg_num = min(LowMsgNum, NextMsgNum),
1009-
next_msg_num = NextMsgNum + 1}.
1003+
Size = message_size(RawMsg),
1004+
Msg = {RaftIdx, {#{size => Size}, RawMsg}}, % indexed message with header map
1005+
State = add_bytes_enqueue(Size, State0),
1006+
State#state{messages = Messages#{NextMsgNum => Msg},
1007+
% this is probably only done to record it when low_msg_num
1008+
% is undefined
1009+
low_msg_num = min(LowMsgNum, NextMsgNum),
1010+
next_msg_num = NextMsgNum + 1}.
10101011

10111012
append_to_master_index(RaftIdx,
10121013
#state{ra_indexes = Indexes0} = State0) ->
@@ -1088,11 +1089,14 @@ return(Meta, ConsumerId, MsgNumMsgs, Con0, Checked,
10881089
credit = increase_credit(Con0, length(MsgNumMsgs))},
10891090
{Cons, SQ, Effects1} = update_or_remove_sub(ConsumerId, Con, Cons0,
10901091
SQ0, Effects0),
1091-
{State1, Effects2} = lists:foldl(fun({'$prefix_msg', _} = Msg, {S0, E0}) ->
1092-
return_one(0, Msg, S0, E0, ConsumerId, Con);
1093-
({MsgNum, Msg}, {S0, E0}) ->
1094-
return_one(MsgNum, Msg, S0, E0, ConsumerId, Con)
1095-
end, {State0, Effects1}, MsgNumMsgs),
1092+
{State1, Effects2} = lists:foldl(
1093+
fun({'$prefix_msg', _} = Msg, {S0, E0}) ->
1094+
return_one(0, Msg, S0, E0,
1095+
ConsumerId, Con);
1096+
({MsgNum, Msg}, {S0, E0}) ->
1097+
return_one(MsgNum, Msg, S0, E0,
1098+
ConsumerId, Con)
1099+
end, {State0, Effects1}, MsgNumMsgs),
10961100
checkout(Meta, State1#state{consumers = Cons,
10971101
service_queue = SQ},
10981102
Effects2).
@@ -1152,9 +1156,8 @@ dead_letter_effects(_Reason, _Discarded,
11521156
Effects;
11531157
dead_letter_effects(Reason, Discarded,
11541158
#state{dead_letter_handler = {Mod, Fun, Args}}, Effects) ->
1155-
DeadLetters = maps:fold(fun(_, {_, {_, {_Header, Msg}}},
1156-
% MsgId, MsgIdID, RaftId, Header
1157-
Acc) -> [{Reason, Msg} | Acc]
1159+
DeadLetters = maps:fold(fun(_, {_, {_, {_Header, Msg}}}, Acc) ->
1160+
[{Reason, Msg} | Acc]
11581161
end, [], Discarded),
11591162
[{mod_call, Mod, Fun, Args ++ [DeadLetters]} | Effects].
11601163

@@ -1200,24 +1203,44 @@ find_next_cursor(Smallest, Cursors0, Potential) ->
12001203
{Potential, Cursors0}
12011204
end.
12021205

1203-
return_one(0, {'$prefix_msg', _} = Msg,
1204-
#state{returns = Returns} = State0, Effects, _ConsumerId, _Con) ->
1205-
{add_bytes_return(Msg,
1206-
State0#state{returns = lqueue:in(Msg, Returns)}), Effects};
1206+
return_one(0, {'$prefix_msg', Header0},
1207+
#state{returns = Returns,
1208+
delivery_limit = DeliveryLimit} = State0, Effects0,
1209+
ConsumerId, Con) ->
1210+
Header = maps:update_with(delivery_count,
1211+
fun (C) -> C+1 end,
1212+
1, Header0),
1213+
Msg = {'$prefix_msg', Header},
1214+
case maps:get(delivery_count, Header) of
1215+
DeliveryCount when DeliveryCount > DeliveryLimit ->
1216+
Checked = Con#consumer.checked_out,
1217+
{State1, Effects} = complete(ConsumerId, [], 1, Con, Checked,
1218+
Effects0, State0),
1219+
{add_bytes_settle(Msg, State1), Effects};
1220+
_ ->
1221+
%% this should not affect the release cursor in any way
1222+
{add_bytes_return(Msg,
1223+
State0#state{returns = lqueue:in(Msg, Returns)}),
1224+
Effects0}
1225+
end;
12071226
return_one(MsgNum, {RaftId, {Header0, RawMsg}},
12081227
#state{returns = Returns,
1209-
delivery_limit = DeliveryLimit} = State0, Effects0, ConsumerId, Con) ->
1228+
delivery_limit = DeliveryLimit} = State0,
1229+
Effects0, ConsumerId, Con) ->
12101230
Header = maps:update_with(delivery_count,
12111231
fun (C) -> C+1 end,
12121232
1, Header0),
1233+
Msg = {RaftId, {Header, RawMsg}},
12131234
case maps:get(delivery_count, Header) of
12141235
DeliveryCount when DeliveryCount > DeliveryLimit ->
1215-
Effects = dead_letter_effects(rejected, maps:put(none, {MsgNum, {RaftId, {Header, RawMsg}}}, #{}), State0, Effects0),
1216-
Checked = maps:without([MsgNum], Con#consumer.checked_out),
1217-
{State1, Effects1} = complete(ConsumerId, [RaftId], 1, Con, Checked, Effects, State0),
1236+
DlMsg = {MsgNum, Msg},
1237+
Effects = dead_letter_effects(rejected, maps:put(none, DlMsg, #{}),
1238+
State0, Effects0),
1239+
Checked = Con#consumer.checked_out,
1240+
{State1, Effects1} = complete(ConsumerId, [RaftId], 1, Con, Checked,
1241+
Effects, State0),
12181242
{add_bytes_settle(RawMsg, State1), Effects1};
12191243
_ ->
1220-
Msg = {RaftId, {Header, RawMsg}},
12211244
%% this should not affect the release cursor in any way
12221245
{add_bytes_return(RawMsg,
12231246
State0#state{returns = lqueue:in({MsgNum, Msg}, Returns)}), Effects0}
@@ -1293,9 +1316,9 @@ append_send_msg_effects(Effects0, AccMap) ->
12931316
%%
12941317
%% When we return it is always done to the current return queue
12951318
%% for both prefix messages and current messages
1296-
take_next_msg(#state{prefix_msgs = {[Bytes | Rem], P}} = State) ->
1319+
take_next_msg(#state{prefix_msgs = {[Header | Rem], P}} = State) ->
12971320
%% there are prefix returns, these should be served first
1298-
{{'$prefix_msg', Bytes},
1321+
{{'$prefix_msg', Header},
12991322
State#state{prefix_msgs = {Rem, P}}};
13001323
take_next_msg(#state{returns = Returns,
13011324
low_msg_num = Low0,
@@ -1325,9 +1348,9 @@ take_next_msg(#state{returns = Returns,
13251348
end
13261349
end;
13271350
empty ->
1328-
[Bytes | Rem] = P,
1351+
[Header | Rem] = P,
13291352
%% There are prefix msgs
1330-
{{'$prefix_msg', Bytes},
1353+
{{'$prefix_msg', Header},
13311354
State#state{prefix_msgs = {R, Rem}}}
13321355
end.
13331356

@@ -1486,15 +1509,15 @@ dehydrate_state(#state{messages = Messages,
14861509
returns = Returns,
14871510
prefix_msgs = {PrefRet0, PrefMsg0}} = State) ->
14881511
%% TODO: optimise this function as far as possible
1489-
PrefRet = lists:foldl(fun ({'$prefix_msg', Bytes}, Acc) ->
1490-
[Bytes | Acc];
1491-
({_, {_, {_, Raw}}}, Acc) ->
1492-
[message_size(Raw) | Acc]
1512+
PrefRet = lists:foldl(fun ({'$prefix_msg', Header}, Acc) ->
1513+
[Header | Acc];
1514+
({_, {_, {Header, _}}}, Acc) ->
1515+
[Header | Acc]
14931516
end,
14941517
lists:reverse(PrefRet0),
14951518
lqueue:to_list(Returns)),
1496-
PrefMsgs = lists:foldl(fun ({_, {_RaftIdx, {_H, Raw}}}, Acc) ->
1497-
[message_size(Raw) | Acc]
1519+
PrefMsgs = lists:foldl(fun ({_, {_RaftIdx, {Header, _}}}, Acc) ->
1520+
[Header| Acc]
14981521
end,
14991522
lists:reverse(PrefMsg0),
15001523
lists:sort(maps:to_list(Messages))),
@@ -1512,8 +1535,8 @@ dehydrate_state(#state{messages = Messages,
15121535
dehydrate_consumer(#consumer{checked_out = Checked0} = Con) ->
15131536
Checked = maps:map(fun (_, {'$prefix_msg', _} = M) ->
15141537
M;
1515-
(_, {_, {_, {_, Raw}}}) ->
1516-
{'$prefix_msg', message_size(Raw)}
1538+
(_, {_, {_, {Header, _}}}) ->
1539+
{'$prefix_msg', Header}
15171540
end, Checked0),
15181541
Con#consumer{checked_out = Checked}.
15191542

@@ -1591,7 +1614,7 @@ add_bytes_return(Msg, #state{msg_bytes_checkout = Checkout,
15911614
message_size(#basic_message{content = Content}) ->
15921615
#content{payload_fragments_rev = PFR} = Content,
15931616
iolist_size(PFR);
1594-
message_size({'$prefix_msg', B}) ->
1617+
message_size({'$prefix_msg', #{size := B}}) ->
15951618
B;
15961619
message_size(B) when is_binary(B) ->
15971620
byte_size(B);

test/rabbit_fifo_prop_SUITE.erl

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
-export([
66
]).
77

8-
-include_lib("common_test/include/ct.hrl").
98
-include_lib("proper/include/proper.hrl").
9+
-include_lib("common_test/include/ct.hrl").
1010
-include_lib("eunit/include/eunit.hrl").
1111

1212
%%%===================================================================
@@ -36,7 +36,9 @@ all_tests() ->
3636
scenario12,
3737
scenario13,
3838
scenario14,
39-
scenario15
39+
scenario15,
40+
scenario16,
41+
fake_pid
4042
].
4143

4244
groups() ->
@@ -251,20 +253,59 @@ scenario15(_Config) ->
251253
delivery_limit => 1}, Commands),
252254
ok.
253255

256+
scenario16(_Config) ->
257+
C1Pid = c:pid(0,883,1),
258+
C1 = {<<>>, C1Pid},
259+
C2 = {<<>>, c:pid(0,882,1)},
260+
E = c:pid(0,176,1),
261+
Commands = [
262+
make_checkout(C1, {auto,1,simple_prefetch}),
263+
make_enqueue(E, 1, msg1),
264+
make_checkout(C2, {auto,1,simple_prefetch}),
265+
{down, C1Pid, noproc}, %% msg1 allocated to C2
266+
make_return(C2, [0]), %% msg1 returned
267+
make_enqueue(E, 2, <<>>),
268+
make_settle(C2, [0])
269+
],
270+
run_snapshot_test(#{name => ?FUNCTION_NAME,
271+
delivery_limit => 1}, Commands),
272+
ok.
273+
274+
fake_pid(_Config) ->
275+
Pid = fake_external_pid(<<"mynode@banana">>),
276+
?assertNotEqual(node(Pid), node()),
277+
?assert(is_pid(Pid)),
278+
ok.
279+
280+
fake_external_pid(Node) when is_binary(Node) ->
281+
ThisNodeSize = size(term_to_binary(node())) + 1,
282+
Pid = spawn(fun () -> ok end),
283+
%% drop the local node data from a local pid
284+
<<_:ThisNodeSize/binary, LocalPidData/binary>> = term_to_binary(Pid),
285+
S = size(Node),
286+
%% replace it with the incoming node binary
287+
Final = <<131,103, 100, 0, S, Node/binary, LocalPidData/binary>>,
288+
binary_to_term(Final).
289+
254290
snapshots(_Config) ->
255291
run_proper(
256292
fun () ->
257293
?FORALL({Length, Bytes, SingleActiveConsumer, DeliveryLimit},
258294
frequency([{10, {0, 0, false, 0}},
259-
{5, {non_neg_integer(), non_neg_integer(),
260-
boolean(), non_neg_integer()}}]),
261-
?FORALL(O, ?LET(Ops, log_gen(200), expand(Ops)),
262-
collect({Length, Bytes},
295+
{5, {oneof([range(1, 10), undefined]),
296+
oneof([range(1, 1000), undefined]),
297+
boolean(),
298+
oneof([range(1, 3), undefined])
299+
}}]),
300+
?FORALL(O, ?LET(Ops, log_gen(250), expand(Ops)),
301+
collect({log_size, length(O)},
263302
snapshots_prop(
264303
config(?FUNCTION_NAME,
265-
Length, Bytes,
266-
SingleActiveConsumer, DeliveryLimit), O))))
267-
end, [], 2000).
304+
Length,
305+
Bytes,
306+
SingleActiveConsumer,
307+
DeliveryLimit), O))))
308+
end, [], 2500).
268309

269310
config(Name, Length, Bytes, SingleActive, DeliveryLimit) ->
270311
#{name => Name,
@@ -305,7 +346,10 @@ log_gen(Size) ->
305346
]))))).
306347

307348
pid_gen() ->
308-
?LET(_, integer(), spawn(fun () -> ok end)).
349+
?LET(Node, oneof([atom_to_binary(node(), utf8),
350+
<<"fakenode@fake">>,
351+
<<"fakenode@fake2">>
352+
]), fake_external_pid(Node)).
309353

310354
down_gen(Pid) ->
311355
?LET(E, {down, Pid, oneof([noconnection, noproc])}, E).
@@ -493,6 +537,7 @@ run_snapshot_test0(Conf, Commands) ->
493537
State = rabbit_fifo:normalize(State0),
494538

495539
[begin
540+
% ct:pal("release_cursor: ~b~n", [SnapIdx]),
496541
%% drop all entries below and including the snapshot
497542
Filtered = lists:dropwhile(fun({X, _}) when X =< SnapIdx -> true;
498543
(_) -> false
@@ -506,6 +551,7 @@ run_snapshot_test0(Conf, Commands) ->
506551
ct:pal("Snapshot tests failed run log:~n"
507552
"~p~n from ~n~p~n Entries~n~p~n",
508553
[Filtered, SnapState, Entries]),
554+
ct:pal("Expected~n~p~nGot:~n~p", [State, S]),
509555
?assertEqual(State, S)
510556
end
511557
end || {release_cursor, SnapIdx, SnapState} <- Effects],

0 commit comments

Comments
 (0)