Skip to content

Commit ad82c0b

Browse files
author
Loïc Hoguin
committed
Improve memory and disk usage when upgrading to new index
Also add progress output in the logs so we know things are happening.
1 parent cf080b9 commit ad82c0b

File tree

3 files changed

+73
-9
lines changed

3 files changed

+73
-9
lines changed

deps/rabbit/src/rabbit_classic_queue_index_v2.erl

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -302,25 +302,35 @@ recover_segment(State, ContainsCheckFun, CountersRef, Fd,
302302
Unacked - 1, LocBytes0)
303303
end.
304304

305-
recover_index_v1_clean(State = #mqistate{ queue_name = Name }, Terms, IsMsgStoreClean,
305+
recover_index_v1_clean(State0 = #mqistate{ queue_name = Name }, Terms, IsMsgStoreClean,
306306
ContainsCheckFun, OnSyncFun, OnSyncMsgFun) ->
307+
#resource{virtual_host = VHost, name = QName} = Name,
308+
logger:info("Converting clean queue ~s on vhost ~s to the new index format", [QName, VHost]),
307309
{_, _, V1State} = rabbit_queue_index:recover(Name, Terms, IsMsgStoreClean,
308310
ContainsCheckFun, OnSyncFun, OnSyncMsgFun),
309311
%% We will ignore the counter results because on clean shutdown
310312
%% we do not need to calculate the values again. This lets us
311313
%% share code with dirty recovery.
312314
DummyCountersRef = counters:new(2, []),
313-
recover_index_v1_common(State, Terms, V1State, DummyCountersRef).
315+
State = recover_index_v1_common(State0, Terms, V1State, DummyCountersRef),
316+
logger:info("Queue ~s on vhost ~s converted ~b total messages to the new index format",
317+
[QName, VHost, counters:get(DummyCountersRef, ?RECOVER_COUNT)]),
318+
State.
314319

315-
recover_index_v1_dirty(State = #mqistate{ queue_name = Name }, Terms, IsMsgStoreClean,
320+
recover_index_v1_dirty(State0 = #mqistate{ queue_name = Name }, Terms, IsMsgStoreClean,
316321
ContainsCheckFun, OnSyncFun, OnSyncMsgFun,
317322
CountersRef) ->
323+
#resource{virtual_host = VHost, name = QName} = Name,
324+
logger:info("Converting dirty queue ~s on vhost ~s to the new index format", [QName, VHost]),
318325
%% We ignore the count and bytes returned here because we cannot trust
319326
%% rabbit_queue_index: it has a bug that may lead to more bytes being
320327
%% returned than it really has.
321328
{_, _, V1State} = rabbit_queue_index:recover(Name, Terms, IsMsgStoreClean,
322329
ContainsCheckFun, OnSyncFun, OnSyncMsgFun),
323-
recover_index_v1_common(State, Terms, V1State, CountersRef).
330+
State = recover_index_v1_common(State0, Terms, V1State, CountersRef),
331+
logger:info("Queue ~s on vhost ~s converted ~b total messages to the new index format",
332+
[QName, VHost, counters:get(CountersRef, ?RECOVER_COUNT)]),
333+
State.
324334

325335
recover_index_v1_common(State0 = #mqistate{ queue_name = #resource{ virtual_host = VHost },
326336
dir = Dir }, Terms, V1State, CountersRef) ->
@@ -359,16 +369,22 @@ recover_index_v1_common(State0 = #mqistate{ queue_name = #resource{ virtual_host
359369

360370
recover_index_v1_loop(State, _, _, _, HiSeqId, HiSeqId) ->
361371
State;
362-
recover_index_v1_loop(State0, MSClient, V1State0, CountersRef, LoSeqId, HiSeqId) ->
372+
recover_index_v1_loop(State0 = #mqistate{ queue_name = Name },
373+
MSClient, V1State0, CountersRef, LoSeqId, HiSeqId) ->
363374
UpSeqId = lists:min([rabbit_queue_index:next_segment_boundary(LoSeqId),
364375
HiSeqId]),
365376
{Messages, V1State} = rabbit_queue_index:read(LoSeqId, UpSeqId, V1State0),
366-
counters:add(CountersRef, ?RECOVER_COUNT, length(Messages)),
367-
State = lists:foldl(fun({MsgOrId, SeqId, Props, IsPersistent, IsDelivered}, State1) ->
377+
%% We do a garbage collect immediately after the old index read
378+
%% and ack because they may have created a lot of garbage.
379+
garbage_collect(),
380+
MessagesCount = length(Messages),
381+
counters:add(CountersRef, ?RECOVER_COUNT, MessagesCount),
382+
State2 = lists:foldl(fun({MsgOrId, SeqId, Props, IsPersistent, IsDelivered}, State1) ->
368383
%% We must move embedded messages to the message store.
369384
MsgId = case MsgOrId of
370385
Msg = #basic_message{ id = MsgId0 } ->
371-
rabbit_msg_store:write(MsgId0, Msg, MSClient),
386+
%% We must do a synchronous write to avoid overloading the message store.
387+
rabbit_msg_store:sync_write(MsgId0, Msg, MSClient),
372388
MsgId0;
373389
MsgId0 ->
374390
MsgId0
@@ -378,6 +394,15 @@ recover_index_v1_loop(State0, MSClient, V1State0, CountersRef, LoSeqId, HiSeqId)
378394
counters:add(CountersRef, ?RECOVER_BYTES, Props#message_properties.size),
379395
publish(MsgId, SeqId, Props, IsPersistent, IsDelivered, infinity, State1)
380396
end, State0, Messages),
397+
State = flush(State2),
398+
%% We have written everything to disk. We can delete the old segment file
399+
%% to free up much needed space, to avoid doubling disk usage during the upgrade.
400+
rabbit_queue_index:delete_segment_file_for_seq_id(LoSeqId, V1State),
401+
%% Log some progress to keep the user aware of what's going on, as moving
402+
%% embedded messages can take quite some time.
403+
#resource{virtual_host = VHost, name = QName} = Name,
404+
logger:info("Queue ~s on vhost ~s converted ~b more messages to the new index format",
405+
[QName, VHost, MessagesCount]),
381406
recover_index_v1_loop(State, MSClient, V1State, CountersRef, UpSeqId, HiSeqId).
382407

383408
-spec terminate(rabbit_types:vhost(), [any()], State) -> State when State::mqistate().

deps/rabbit/src/rabbit_msg_store.erl

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
-export([scan_file_for_valid_messages/1]). %% salvage tool
2121

2222
-export([transform_dir/3, force_recovery/2]). %% upgrade
23+
-export([sync_write/3]). %% Used when upgrading to the modern index.
2324

2425
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
2526
code_change/3, prioritise_call/4, prioritise_cast/3,
@@ -515,6 +516,17 @@ write_flow(MsgId, Msg,
515516

516517
write(MsgId, Msg, CState) -> client_write(MsgId, Msg, noflow, CState).
517518

519+
-spec sync_write(rabbit_types:msg_id(), msg(), client_msstate()) -> 'ok'.
520+
521+
%% Used when upgrading to the modern index.
522+
sync_write(MsgId, Msg,
523+
CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts,
524+
client_ref = CRef }) ->
525+
file_handle_cache_stats:update(msg_store_write),
526+
ok = client_update_flying(+1, MsgId, CState),
527+
ok = update_msg_cache(CurFileCacheEts, MsgId, Msg),
528+
ok = server_call(CState, {write, CRef, MsgId, noflow}).
529+
518530
-spec read(rabbit_types:msg_id(), client_msstate()) ->
519531
{rabbit_types:ok(msg()) | 'not_found', client_msstate()}.
520532

@@ -867,7 +879,12 @@ handle_call({read, MsgId}, From, State) ->
867879

868880
handle_call({contains, MsgId}, From, State) ->
869881
State1 = contains_message(MsgId, From, State),
870-
noreply(State1).
882+
noreply(State1);
883+
884+
%% Used when upgrading to the modern index.
885+
handle_call(Write = {write, _, _, _}, _From, State) ->
886+
{noreply, State1, _} = handle_cast(Write, State),
887+
reply(ok, State1).
871888

872889
handle_cast({client_dying, CRef},
873890
State = #msstate { dying_clients = DyingClients,

deps/rabbit/src/rabbit_queue_index.erl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
%% after a non-clean shutdown.
3232
-export([queue_index_walker_reader/2]).
3333

34+
%% Used to upgrade to the modern index.
35+
-export([delete_segment_file_for_seq_id/2]).
36+
3437
-define(CLEAN_FILENAME, "clean.dot").
3538

3639
%%----------------------------------------------------------------------------
@@ -1543,3 +1546,22 @@ cleanup_global_recovery_terms() ->
15431546
update_recovery_term(#resource{virtual_host = VHost} = QueueName, Term) ->
15441547
Key = queue_name_to_dir_name(QueueName),
15451548
rabbit_recovery_terms:store(VHost, Key, Term).
1549+
1550+
1551+
%%----------------------------------------------------------------------------
1552+
%% Upgrade to the modern index
1553+
%%----------------------------------------------------------------------------
1554+
1555+
%% This function is only used when upgrading to the new index.
1556+
%% We delete the segment file without updating the state.
1557+
%% We will drop the state later on so we don't care much
1558+
%% about how accurate it is as long as we can read from
1559+
%% subsequent segment files.
1560+
delete_segment_file_for_seq_id(SeqId, #qistate { segments = Segments }) ->
1561+
{Seg, _} = seq_id_to_seg_and_rel_seq_id(SeqId),
1562+
case segment_find(Seg, Segments) of
1563+
{ok, #segment { path = Path }} ->
1564+
ok = rabbit_file:delete(Path);
1565+
error ->
1566+
ok
1567+
end.

0 commit comments

Comments
 (0)