Skip to content

Commit c4519dd

Browse files
Merge branch 'stable'
Conflicts: Makefile
2 parents 5366742 + bafb801 commit c4519dd

File tree

3 files changed

+68
-13
lines changed

3 files changed

+68
-13
lines changed

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ define PROJECT_ENV
115115
{background_gc_enabled, false},
116116
{background_gc_target_interval, 60000},
117117
%% rabbitmq-server-589
118-
{proxy_protocol, false}
118+
{proxy_protocol, false},
119+
{disk_monitor_failure_retries, 10},
120+
{disk_monitor_failure_retry_interval, 120000}
119121
]
120122
endef
121123

src/rabbit_disk_monitor.erl

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@
6565
alarmed,
6666
%% is monitoring enabled? false on unsupported
6767
%% platforms
68-
enabled
68+
enabled,
69+
%% number of retries to enable monitoring if it fails
70+
%% on start-up
71+
retries,
72+
%% Interval between retries
73+
interval
6974
}).
7075

7176
%%----------------------------------------------------------------------------
@@ -114,20 +119,17 @@ start_link(Args) ->
114119

115120
init([Limit]) ->
116121
Dir = dir(),
122+
{ok, Retries} = application:get_env(rabbit, disk_monitor_failure_retries),
123+
{ok, Interval} = application:get_env(rabbit, disk_monitor_failure_retry_interval),
117124
State = #state{dir = Dir,
118125
min_interval = ?DEFAULT_MIN_DISK_CHECK_INTERVAL,
119126
max_interval = ?DEFAULT_MAX_DISK_CHECK_INTERVAL,
120127
alarmed = false,
121-
enabled = true},
122-
case {catch get_disk_free(Dir),
123-
vm_memory_monitor:get_total_memory()} of
124-
{N1, N2} when is_integer(N1), is_integer(N2) ->
125-
{ok, start_timer(set_disk_limits(State, Limit))};
126-
Err ->
127-
rabbit_log:info("Disabling disk free space monitoring "
128-
"on unsupported platform:~n~p~n", [Err]),
129-
{ok, State#state{enabled = false}}
130-
end.
128+
enabled = true,
129+
limit = Limit,
130+
retries = Retries,
131+
interval = Interval},
132+
{ok, enable(State)}.
131133

132134
handle_call(get_disk_free_limit, _From, State = #state{limit = Limit}) ->
133135
{reply, Limit, State};
@@ -161,6 +163,8 @@ handle_call(_Request, _From, State) ->
161163
handle_cast(_Request, State) ->
162164
{noreply, State}.
163165

166+
handle_info(try_enable, #state{retries = Retries} = State) ->
167+
{noreply, enable(State#state{retries = Retries - 1})};
164168
handle_info(update, State) ->
165169
{noreply, start_timer(internal_update(State))};
166170

@@ -246,7 +250,7 @@ interpret_limit(Absolute) ->
246250

247251
emit_update_info(StateStr, CurrentFree, Limit) ->
248252
rabbit_log:info(
249-
"Disk free space ~s. Free bytes:~p Limit:~p~n",
253+
"Free disk space is ~s. Free bytes: ~p. Limit: ~p~n",
250254
[StateStr, CurrentFree, Limit]).
251255

252256
start_timer(State) ->
@@ -261,3 +265,20 @@ interval(#state{limit = Limit,
261265
max_interval = MaxInterval}) ->
262266
IdealInterval = 2 * (Actual - Limit) / ?FAST_RATE,
263267
trunc(erlang:max(MinInterval, erlang:min(MaxInterval, IdealInterval))).
268+
269+
enable(#state{retries = 0} = State) ->
270+
State;
271+
enable(#state{dir = Dir, interval = Interval, limit = Limit, retries = Retries}
272+
= State) ->
273+
case {catch get_disk_free(Dir),
274+
vm_memory_monitor:get_total_memory()} of
275+
{N1, N2} when is_integer(N1), is_integer(N2) ->
276+
rabbit_log:info("Enabling free disk space monitoring~n", []),
277+
start_timer(set_disk_limits(State, Limit));
278+
Err ->
279+
rabbit_log:info("Free disk space monitor encountered an error "
280+
"(e.g. failed to parse output from OS tools): ~p, retries left: ~s~n",
281+
[Err, Retries]),
282+
timer:send_after(Interval, self(), try_enable),
283+
State#state{enabled = false}
284+
end.

test/unit_inbroker_non_parallel_SUITE.erl

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ groups() ->
3535
app_management, %% Restart RabbitMQ.
3636
channel_statistics, %% Expect specific statistics.
3737
disk_monitor, %% Replace rabbit_misc module.
38+
disk_monitor_enable,
3839
file_handle_cache, %% Change FHC limit.
3940
head_message_timestamp_statistics, %% Expect specific statistics.
4041
log_management, %% Check log files.
@@ -631,6 +632,37 @@ disk_monitor1(_Config) ->
631632
meck:unload(rabbit_misc),
632633
passed.
633634

635+
disk_monitor_enable(Config) ->
636+
passed = rabbit_ct_broker_helpers:rpc(Config, 0,
637+
?MODULE, disk_monitor_enable1, [Config]).
638+
639+
disk_monitor_enable1(_Config) ->
640+
case os:type() of
641+
{unix, _} ->
642+
disk_monitor_enable1();
643+
_ ->
644+
%% skip windows testing
645+
skipped
646+
end.
647+
648+
disk_monitor_enable1() ->
649+
ok = meck:new(rabbit_misc, [passthrough]),
650+
ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> "\n" end),
651+
application:set_env(rabbit, disk_monitor_failure_retries, 20000),
652+
application:set_env(rabbit, disk_monitor_failure_retry_interval, 100),
653+
ok = rabbit_sup:stop_child(rabbit_disk_monitor_sup),
654+
ok = rabbit_sup:start_delayed_restartable_child(rabbit_disk_monitor, [1000]),
655+
undefined = rabbit_disk_monitor:get_disk_free(),
656+
Cmd = "Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on\n/dev/disk1 975798272 234783364 740758908 25% 58759839 185189727 24% /\n",
657+
ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> Cmd end),
658+
timer:sleep(1000),
659+
Bytes = 740758908 * 1024,
660+
Bytes = rabbit_disk_monitor:get_disk_free(),
661+
meck:unload(rabbit_misc),
662+
application:set_env(rabbit, disk_monitor_failure_retries, 10),
663+
application:set_env(rabbit, disk_monitor_failure_retry_interval, 120000),
664+
passed.
665+
634666
%% ---------------------------------------------------------------------------
635667
%% rabbitmqctl helpers.
636668
%% ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)