@@ -1008,6 +1008,31 @@ stop_rabbitmq_nodes(Config) ->
1008
1008
fun (NodeConfig ) ->
1009
1009
stop_rabbitmq_node (Config , NodeConfig )
1010
1010
end ),
1011
+ % % Except if disabled, we search for crashes logged in the test nodes after
1012
+ % % they are stopped. If we find some, we log them again in the common_test
1013
+ % % logs and throw an exception to make the test fail.
1014
+ FindCrashes = case rabbit_ct_helpers :get_config (Config , find_crashes ) of
1015
+ true ->
1016
+ true ;
1017
+ false ->
1018
+ false ;
1019
+ undefined ->
1020
+ case os :getenv (" FIND_CRASHES" ) of
1021
+ undefined -> true ;
1022
+ " 1" -> true ;
1023
+ " yes" -> true ;
1024
+ " true" -> true ;
1025
+ _ -> false
1026
+ end
1027
+ end ,
1028
+ case FindCrashes of
1029
+ true ->
1030
+ % % TODO: Make the ignore list configurable.
1031
+ IgnoredCrashes = [" ** force_vhost_failure" ],
1032
+ find_crashes_in_logs (NodeConfigs , IgnoredCrashes );
1033
+ false ->
1034
+ ok
1035
+ end ,
1011
1036
proplists :delete (rmq_nodes , Config ).
1012
1037
1013
1038
stop_rabbitmq_node (Config , NodeConfig ) ->
@@ -1029,6 +1054,84 @@ stop_rabbitmq_node(Config, NodeConfig) ->
1029
1054
end ,
1030
1055
NodeConfig .
1031
1056
1057
+ find_crashes_in_logs (NodeConfigs , IgnoredCrashes ) ->
1058
+ ct :pal (
1059
+ " Looking up any crash reports in the nodes' log files. If we find "
1060
+ " some, they will appear below:" ),
1061
+ CrashesCount = lists :foldl (
1062
+ fun (NodeConfig , Total ) ->
1063
+ Count = count_crashes_in_logs (
1064
+ NodeConfig , IgnoredCrashes ),
1065
+ Total + Count
1066
+ end , 0 , NodeConfigs ),
1067
+ ct :pal (" Found ~b crash report(s)" , [CrashesCount ]),
1068
+ ? assertEqual (0 , CrashesCount ).
1069
+
1070
+ count_crashes_in_logs (NodeConfig , IgnoredCrashes ) ->
1071
+ LogLocations = ? config (log_locations , NodeConfig ),
1072
+ lists :foldl (
1073
+ fun (LogLocation , Total ) ->
1074
+ Count = count_crashes_in_log (LogLocation , IgnoredCrashes ),
1075
+ Total + Count
1076
+ end , 0 , LogLocations ).
1077
+
1078
+ count_crashes_in_log (LogLocation , IgnoredCrashes ) ->
1079
+ case file :read_file (LogLocation ) of
1080
+ {ok , Content } -> count_crashes_in_content (Content , IgnoredCrashes );
1081
+ _ -> 0
1082
+ end .
1083
+
1084
+ count_crashes_in_content (Content , IgnoredCrashes ) ->
1085
+ ReOpts = [multiline ],
1086
+ Lines = re :split (Content , " ^" , ReOpts ),
1087
+ count_gen_server_terminations (Lines , IgnoredCrashes ).
1088
+
1089
+ count_gen_server_terminations (Lines , IgnoredCrashes ) ->
1090
+ count_gen_server_terminations (Lines , 0 , IgnoredCrashes ).
1091
+
1092
+ count_gen_server_terminations ([Line | Rest ], Count , IgnoredCrashes ) ->
1093
+ ReOpts = [{capture , all_but_first , list }],
1094
+ Ret = re :run (
1095
+ Line ,
1096
+ " (<[0-9.]+> )[*]{2} Generic server .+ terminating$" ,
1097
+ ReOpts ),
1098
+ case Ret of
1099
+ {match , [Prefix ]} ->
1100
+ capture_gen_server_termination (
1101
+ Rest , Prefix , [Line ], Count , IgnoredCrashes );
1102
+ nomatch ->
1103
+ count_gen_server_terminations (Rest , Count , IgnoredCrashes )
1104
+ end ;
1105
+ count_gen_server_terminations ([], Count , _IgnoredCrashes ) ->
1106
+ Count .
1107
+
1108
+ capture_gen_server_termination (
1109
+ [Line | Rest ] = Lines , Prefix , Acc , Count , IgnoredCrashes ) ->
1110
+ ReOpts = [{capture , all_but_first , list }],
1111
+ Ret = re :run (Line , Prefix ++ " ( .*|\\ *.*|)$" , ReOpts ),
1112
+ case Ret of
1113
+ {match , [Suffix ]} ->
1114
+ case lists :member (Suffix , IgnoredCrashes ) of
1115
+ false ->
1116
+ capture_gen_server_termination (
1117
+ Rest , Prefix , [Line | Acc ], Count , IgnoredCrashes );
1118
+ true ->
1119
+ count_gen_server_terminations (
1120
+ Lines , Count , IgnoredCrashes )
1121
+ end ;
1122
+ nomatch ->
1123
+ found_gen_server_termiation (
1124
+ lists :reverse (Acc ), Lines , Count , IgnoredCrashes )
1125
+ end ;
1126
+ capture_gen_server_termination (
1127
+ [] = Rest , _Prefix , Acc , Count , IgnoredCrashes ) ->
1128
+ found_gen_server_termiation (
1129
+ lists :reverse (Acc ), Rest , Count , IgnoredCrashes ).
1130
+
1131
+ found_gen_server_termiation (Message , Lines , Count , IgnoredCrashes ) ->
1132
+ ct :pal (" gen_server termination:~n~n~s " , [Message ]),
1133
+ count_gen_server_terminations (Lines , Count + 1 , IgnoredCrashes ).
1134
+
1032
1135
% % -------------------------------------------------------------------
1033
1136
% % Helpers for partition simulation
1034
1137
% % -------------------------------------------------------------------
@@ -1346,6 +1449,8 @@ delete_vhost(Config, Node, VHost) ->
1346
1449
delete_vhost (Config , Node , VHost , Username ) ->
1347
1450
catch rpc (Config , Node , rabbit_vhost , delete , [VHost , Username ]).
1348
1451
1452
+ -define (FORCE_VHOST_FAILURE_REASON , force_vhost_failure ).
1453
+
1349
1454
force_vhost_failure (Config , VHost ) -> force_vhost_failure (Config , 0 , VHost ).
1350
1455
1351
1456
force_vhost_failure (Config , Node , VHost ) ->
@@ -1359,7 +1464,8 @@ force_vhost_failure(Config, Node, VHost, Attempts) ->
1359
1464
try
1360
1465
MessageStorePid = get_message_store_pid (Config , Node , VHost ),
1361
1466
rpc (Config , Node ,
1362
- erlang , exit , [MessageStorePid , force_vhost_failure ]),
1467
+ erlang , exit ,
1468
+ [MessageStorePid , ? FORCE_VHOST_FAILURE_REASON ]),
1363
1469
% % Give it a time to fail
1364
1470
timer :sleep (300 ),
1365
1471
force_vhost_failure (Config , Node , VHost , Attempts - 1 )
0 commit comments