Skip to content

Commit 00c77e0

Browse files
committed
Skip test maintenance in mixed version mode
This test fails when MQTT client ID tracking is performed in Ra, and the higher version node gets compiled with a different OTP version (26) than the lower version node (25). The reason is described in 83eede7 ``` An interesting side note learned here is that the compiled file rabbit_mqtt_collector must not be changed. This commit only modifies function specs. However as soon as the compiled code is changed, this module becomes a new version. The new version causes the anonymous ra query function to fail in mixed clusters: When the old node does a ra:leader_query where the leader is on the new node, the query function fails on the new node with `badfun` because the new node does not have the same module version. For more context, read: https://web.archive.org/web/20181017104411/http://www.javalimit.com/2010/05/passing-funs-to-other-erlang-nodes.html ``` We shouldn’t use an anonymous function for ra:leader_query or ra:consistent_query. Instead we should use the {M,F,A} form. https://github.com/rabbitmq/ra/blob/9e5d437a0a76cc126f396be93645a290e758ac75/src/ra.erl#L102-L103 In MQTT the anonymous function is used in https://github.com/rabbitmq/rabbitmq-server/blob/bcb95c949d8622fb7ba6d3c1b4a12fdb73b0fa66/deps/rabbitmq_mqtt/src/rabbit_mqtt_collector.erl#L50 This causes the query to return a bad fun error (silently ignored in https://github.com/rabbitmq/rabbitmq-server/blob/bcb95c949d8622fb7ba6d3c1b4a12fdb73b0fa66/deps/rabbitmq_mqtt/src/rabbit_mqtt_collector.erl#L70-L71 ) when executed on a different node and either: 1.) Any code in file rabbit_mqtt_collector.erl changed, or 2.) The code gets compiled with a different OTP version. 2.) is the reason for a failing mixed version test in #8553 because both higher and lower versions run OTP 26, but the higher version node got compiled with 26 while the lower version node got compiled with 25. The same file compiled with OTP 26.0.1 ``` 1> rabbit_mqtt_collector:module_info(attributes). [{vsn,[30045739264236496640687548892374951597]}] ``` compiled with OTP 25.3.2 ``` 1> rabbit_mqtt_collector:module_info(attributes). [{vsn,[168144385419873449889532520247510637232]}] ``` Due to the very low impact that maintenance mode will not close all MQTT client connections with feature flag delete_ra_cluster_mqtt_node being disabled, we skip this test.
1 parent 2a27132 commit 00c77e0

File tree

1 file changed

+29
-20
lines changed

1 file changed

+29
-20
lines changed

deps/rabbitmq_mqtt/test/shared_SUITE.erl

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,29 +1276,38 @@ cli_list_queues(Config) ->
12761276
ok = emqtt:disconnect(C).
12771277

12781278
maintenance(Config) ->
1279-
C0 = connect(<<"client-0">>, Config, 0, []),
1280-
C1a = connect(<<"client-1a">>, Config, 1, []),
1281-
C1b = connect(<<"client-1b">>, Config, 1, []),
1282-
ClientsNode1 = [C1a, C1b],
1279+
case is_feature_flag_enabled(Config, delete_ra_cluster_mqtt_node) of
1280+
false ->
1281+
%% When either file rabbit_mqtt_collector changes or different OTP versions
1282+
%% are used for compilation, the rabbit_mqtt_collector module version will
1283+
%% change and cause a bad fun error when executing ra:leader_query/2 remotely.
1284+
{skip, "Anonymous fun as used in ra:leader_query/2 errors when executing "
1285+
"remotely with a different module version"};
1286+
true ->
1287+
C0 = connect(<<"client-0">>, Config, 0, []),
1288+
C1a = connect(<<"client-1a">>, Config, 1, []),
1289+
C1b = connect(<<"client-1b">>, Config, 1, []),
1290+
ClientsNode1 = [C1a, C1b],
12831291

1284-
timer:sleep(500),
1292+
timer:sleep(500),
12851293

1286-
ok = drain_node(Config, 2),
1287-
ok = revive_node(Config, 2),
1288-
timer:sleep(500),
1289-
[?assert(erlang:is_process_alive(C)) || C <- [C0, C1a, C1b]],
1294+
ok = drain_node(Config, 2),
1295+
ok = revive_node(Config, 2),
1296+
timer:sleep(500),
1297+
[?assert(erlang:is_process_alive(C)) || C <- [C0, C1a, C1b]],
12901298

1291-
process_flag(trap_exit, true),
1292-
ok = drain_node(Config, 1),
1293-
[await_exit(Pid) || Pid <- ClientsNode1],
1294-
[assert_v5_disconnect_reason_code(Config, ?RC_SERVER_SHUTTING_DOWN) || _ <- ClientsNode1],
1295-
ok = revive_node(Config, 1),
1296-
?assert(erlang:is_process_alive(C0)),
1297-
1298-
ok = drain_node(Config, 0),
1299-
await_exit(C0),
1300-
assert_v5_disconnect_reason_code(Config, ?RC_SERVER_SHUTTING_DOWN),
1301-
ok = revive_node(Config, 0).
1299+
process_flag(trap_exit, true),
1300+
ok = drain_node(Config, 1),
1301+
[await_exit(Pid) || Pid <- ClientsNode1],
1302+
[assert_v5_disconnect_reason_code(Config, ?RC_SERVER_SHUTTING_DOWN) || _ <- ClientsNode1],
1303+
ok = revive_node(Config, 1),
1304+
?assert(erlang:is_process_alive(C0)),
1305+
1306+
ok = drain_node(Config, 0),
1307+
await_exit(C0),
1308+
assert_v5_disconnect_reason_code(Config, ?RC_SERVER_SHUTTING_DOWN),
1309+
ok = revive_node(Config, 0)
1310+
end.
13021311

13031312
keepalive(Config) ->
13041313
KeepaliveSecs = 1,

0 commit comments

Comments
 (0)