Skip to content

Commit c651536

Browse files
committed
rabbitmq_ct_helpers: Find crashes in RabbitMQ logs
If we find some crashes after stopping a node, the crash reports are logged in Common Test HTML report and an assertion is fired. Here is an example of a crash report found: The console output: * [khepri_store] policy_ttl (00:00.574) operator_policy_ttl (00:00.178) operator_retroactive_policy_ttl (00:00.154) operator_retroactive_policy_publish_ttl (00:01.677) policy_SUITE > khepri_store {error, {{assertEqual, [{module,rabbit_ct_broker_helpers}, {line,1050}, {expression,"CrashesCount"}, {expected,0}, {value,1}]}, [{rabbit_ct_broker_helpers,find_crashes_in_logs,1, [{file,"rabbit_ct_broker_helpers.erl"},{line,1050}]}, {rabbit_ct_broker_helpers,stop_rabbitmq_nodes,1, [{file,"rabbit_ct_broker_helpers.erl"},{line,1019}]}, {rabbit_ct_helpers,run_steps,2, [{file,"rabbit_ct_helpers.erl"},{line,136}]}, {test_server,ts_tc,3,[{file,"test_server.erl"},{line,1783}]}, {test_server,run_test_case_eval1,6, [{file,"test_server.erl"},{line,1380}]}, {test_server,run_test_case_eval,9, [{file,"test_server.erl"},{line,1224}]}]}} The Common Test HTML (truncated) report: *** User 2022-06-10 14:58:50.687 *** Looking up any crash reports in the node log files. If we find some, they will appear below: *** User 2022-06-10 14:58:50.716 *** gen_server termination: 2022-06-10 14:58:25.785158+02:00 [error] <0.1387.0> ** Generic server rabbit_node_monitor terminating 2022-06-10 14:58:25.785158+02:00 [error] <0.1387.0> ** Last message in was {'DOWN',#Ref<0.2117588230.1026555905.48245>,process, 2022-06-10 14:58:25.785158+02:00 [error] <0.1387.0> {rabbit,'rmq-ct-khepri_store-1-21000@localhost'}, 2022-06-10 14:58:25.785158+02:00 [error] <0.1387.0> normal} (...) *** User 2022-06-10 14:58:50.717 *** Found 1 crash reports
1 parent 97dd198 commit c651536

File tree

1 file changed

+67
-1
lines changed

1 file changed

+67
-1
lines changed

deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,10 +785,12 @@ query_node(Config, NodeConfig) ->
785785
[rabbit, plugins_dir]),
786786
{ok, EnabledPluginsFile} = rpc(Config, Nodename, application, get_env,
787787
[rabbit, enabled_plugins_file]),
788+
LogLocations = rpc(Config, Nodename, rabbit, log_locations, []),
788789
Vars0 = [{pid_file, PidFile},
789790
{mnesia_dir, MnesiaDir},
790791
{plugins_dir, PluginsDir},
791-
{enabled_plugins_file, EnabledPluginsFile}],
792+
{enabled_plugins_file, EnabledPluginsFile},
793+
{log_locations, LogLocations}],
792794
Vars = try
793795
EnabledFeatureFlagsFile = rpc(Config, Nodename,
794796
rabbit_feature_flags,
@@ -999,6 +1001,7 @@ stop_rabbitmq_nodes(Config) ->
9991001
fun(NodeConfig) ->
10001002
stop_rabbitmq_node(Config, NodeConfig)
10011003
end),
1004+
find_crashes_in_logs(NodeConfigs),
10021005
proplists:delete(rmq_nodes, Config).
10031006

10041007
stop_rabbitmq_node(Config, NodeConfig) ->
@@ -1019,6 +1022,69 @@ stop_rabbitmq_node(Config, NodeConfig) ->
10191022
end,
10201023
NodeConfig.
10211024

1025+
find_crashes_in_logs(NodeConfigs) ->
1026+
ct:pal(
1027+
"Looking up any crash reports in the nodes' log files. If we find "
1028+
"some, they will appear below:"),
1029+
CrashesCount = lists:foldl(
1030+
fun(NodeConfig, Total) ->
1031+
Count = count_crashes_in_logs(NodeConfig),
1032+
Total + Count
1033+
end, 0, NodeConfigs),
1034+
ct:pal("Found ~b crash report(s)", [CrashesCount]),
1035+
?assertEqual(0, CrashesCount).
1036+
1037+
count_crashes_in_logs(NodeConfig) ->
1038+
LogLocations = ?config(log_locations, NodeConfig),
1039+
lists:foldl(
1040+
fun(LogLocation, Total) ->
1041+
Count = count_crashes_in_log(LogLocation),
1042+
Total + Count
1043+
end, 0, LogLocations).
1044+
1045+
count_crashes_in_log(LogLocation) ->
1046+
case file:read_file(LogLocation) of
1047+
{ok, Content} -> count_crashes_in_content(Content);
1048+
_ -> 0
1049+
end.
1050+
1051+
count_crashes_in_content(Content) ->
1052+
ReOpts = [multiline],
1053+
Lines = re:split(Content, "^", ReOpts),
1054+
count_gen_server_terminations(Lines).
1055+
1056+
count_gen_server_terminations(Lines) ->
1057+
count_gen_server_terminations(Lines, 0).
1058+
1059+
count_gen_server_terminations([Line | Rest], Count) ->
1060+
ReOpts = [{capture, all_but_first, list}],
1061+
Ret = re:run(
1062+
Line,
1063+
"(<[0-9.]+> )[*]{2} Generic server .+ terminating$",
1064+
ReOpts),
1065+
case Ret of
1066+
{match, [Prefix]} ->
1067+
capture_gen_server_termination(Rest, Prefix, [Line], Count + 1);
1068+
nomatch ->
1069+
count_gen_server_terminations(Rest, Count)
1070+
end;
1071+
count_gen_server_terminations([], Count) ->
1072+
Count.
1073+
1074+
capture_gen_server_termination([Line | Rest] = Lines, Prefix, Acc, Count) ->
1075+
ReOpts = [{capture, none}],
1076+
Ret = re:run(Line, Prefix ++ "( |\\*|$)", ReOpts),
1077+
case Ret of
1078+
match ->
1079+
capture_gen_server_termination(Rest, Prefix, [Line | Acc], Count);
1080+
nomatch ->
1081+
ct:pal("gen_server termination:~n~n~s", [lists:reverse(Acc)]),
1082+
count_gen_server_terminations(Lines, Count)
1083+
end;
1084+
capture_gen_server_termination([] = Rest, _Prefix, Acc, Count) ->
1085+
ct:pal("gen_server termination:~n~n~s", [lists:reverse(Acc)]),
1086+
count_gen_server_terminations(Rest, Count).
1087+
10221088
%% -------------------------------------------------------------------
10231089
%% Helpers for partition simulation
10241090
%% -------------------------------------------------------------------

0 commit comments

Comments
 (0)