Skip to content

Commit 67bdc01

Browse files
committed
Add a health check for testing readiness to serve clients
1 parent 028b692 commit 67bdc01

File tree

4 files changed

+137
-1
lines changed

4 files changed

+137
-1
lines changed

deps/rabbitmq_management/priv/www/api/index.html

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,29 @@ <h2>Reference</h2>
12641264
503 Service Unavailable.
12651265
</td>
12661266
</tr>
1267+
<tr>
1268+
<td>X</td>
1269+
<td></td>
1270+
<td></td>
1271+
<td></td>
1272+
<td class="path">/api/health/checks/ready-to-serve-clients</td>
1273+
<td>
1274+
<p>
1275+
Responds a 200 OK if the target node is ready to serve clients, otherwise
1276+
responds with a 503 Service Unavailable. This check combines:
1277+
</p>
1278+
<ol>
1279+
<li>/api/health/checks/is-in-service</li>
1280+
<li>/api/health/checks/protocol-listener/amqp or /api/health/checks/protocol-listener/amqps</li>
1281+
<li>/api/health/checks/below-node-connection-limit</li>
1282+
</ol>
1283+
<p>
1284+
So this check will only return 200 OK if the target node is in service,
1285+
an AMQP or AMQPS listener is available and the target node has fewer active
1286+
AMQP and AMQPS connections that its configured limit.
1287+
</p>
1288+
</td>
1289+
</tr>
12671290
<tr>
12681291
<td>X</td>
12691292
<td></td>

deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ dispatcher() ->
209209
{"/health/checks/node-is-quorum-critical", rabbit_mgmt_wm_health_check_node_is_quorum_critical, []},
210210
{"/health/checks/is-in-service", rabbit_mgmt_wm_health_check_is_in_service, []},
211211
{"/health/checks/below-node-connection-limit", rabbit_mgmt_wm_health_check_below_node_connection_limit, []},
212+
{"/health/checks/ready-to-serve-clients", rabbit_mgmt_wm_health_check_ready_to_serve_clients, []},
212213
{"/reset", rabbit_mgmt_wm_reset, []},
213214
{"/reset/:node", rabbit_mgmt_wm_reset, []},
214215
{"/rebalance/queues", rabbit_mgmt_wm_rebalance_queues, [{queues, all}]},
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
%% This Source Code Form is subject to the terms of the Mozilla Public
2+
%% License, v. 2.0. If a copy of the MPL was not distributed with this
3+
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
%%
5+
%% Copyright (c) 2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
6+
%%
7+
8+
%% A composite health check that combines:
9+
%% * GET /api/health/checks/is-in-service
10+
%% * GET /api/health/checks/protocol-listener/amqp
11+
%% * GET /api/health/checks/below-node-connection-limit
12+
13+
-module(rabbit_mgmt_wm_health_check_ready_to_serve_clients).
14+
15+
-export([init/2]).
16+
-export([to_json/2, content_types_provided/2]).
17+
-export([variances/2]).
18+
19+
-include("rabbit_mgmt.hrl").
20+
-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl").
21+
22+
init(Req, _State) ->
23+
Req1 = rabbit_mgmt_headers:set_no_cache_headers(
24+
rabbit_mgmt_headers:set_common_permission_headers(
25+
Req, ?MODULE), ?MODULE),
26+
{cowboy_rest, Req1, #context{}}.
27+
28+
variances(Req, Context) ->
29+
{[<<"accept-encoding">>, <<"origin">>], Req, Context}.
30+
31+
content_types_provided(ReqData, Context) ->
32+
{rabbit_mgmt_util:responder_map(to_json), ReqData, Context}.
33+
34+
to_json(ReqData, Context) ->
35+
case check() of
36+
{ok, Body} ->
37+
rabbit_mgmt_util:reply(Body, ReqData, Context);
38+
{error, Body} ->
39+
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply(
40+
Body, ReqData, Context),
41+
{stop,
42+
cowboy_req:reply(
43+
?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1),
44+
Context1}
45+
end.
46+
47+
check() ->
48+
case rabbit:is_serving() of
49+
true ->
50+
RanchRefs0 = [
51+
rabbit_networking:ranch_ref_of_protocol(amqp),
52+
rabbit_networking:ranch_ref_of_protocol('amqp/ssl')
53+
],
54+
RanchRefs = [R || R <- RanchRefs0, R =/= undefined],
55+
case RanchRefs of
56+
[_ | _] ->
57+
ActiveConns = lists:foldl(
58+
fun(RanchRef, Acc) ->
59+
#{active_connections := Count} = ranch:info(RanchRef),
60+
Acc + Count
61+
end, 0, RanchRefs),
62+
Limit = rabbit_misc:get_env(rabbit, connection_max, infinity),
63+
case ActiveConns < Limit of
64+
true ->
65+
{ok, #{status => ok,
66+
limit => Limit,
67+
connections => ActiveConns}};
68+
false ->
69+
{error, #{status => failed,
70+
reason => <<"node connection limit is reached">>,
71+
limit => Limit,
72+
connections => ActiveConns}}
73+
end;
74+
[] ->
75+
{error, #{status => failed,
76+
reason => <<"no active listeners for AMQP/AMQPS">>}}
77+
end;
78+
false ->
79+
{error, #{status => failed,
80+
reason => <<"the rabbit node is not currently available to serve">>}}
81+
end.

deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ all_tests() -> [
5252
port_listener_test,
5353
certificate_expiration_test,
5454
is_in_service_test,
55-
below_node_connection_limit_test
55+
below_node_connection_limit_test,
56+
ready_to_serve_clients_test
5657
].
5758

5859
%% -------------------------------------------------------------------
@@ -498,6 +499,36 @@ below_node_connection_limit_test(Config) ->
498499

499500
passed.
500501

502+
ready_to_serve_clients_test(Config) ->
503+
Path = "/health/checks/ready-to-serve-clients",
504+
Check0 = http_get(Config, Path, ?OK),
505+
?assertEqual(<<"ok">>, maps:get(status, Check0)),
506+
507+
true = rabbit_ct_broker_helpers:mark_as_being_drained(Config, 0),
508+
Body0 = http_get_failed(Config, Path),
509+
?assertEqual(<<"failed">>, maps:get(<<"status">>, Body0)),
510+
true = rabbit_ct_broker_helpers:unmark_as_being_drained(Config, 0),
511+
512+
%% Set the connection limit low and open 'limit' connections.
513+
Limit = 10,
514+
rabbit_ct_broker_helpers:rpc(
515+
Config, 0, application, set_env, [rabbit, connection_max, Limit]),
516+
Connections = [rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0) || _ <- lists:seq(1, Limit)],
517+
true = lists:all(fun(E) -> is_pid(E) end, Connections),
518+
{error, not_allowed} = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0),
519+
520+
Body1 = http_get_failed(Config, Path),
521+
?assertEqual(<<"failed">>, maps:get(<<"status">>, Body1)),
522+
?assertEqual(10, maps:get(<<"limit">>, Body1)),
523+
?assertEqual(10, maps:get(<<"connections">>, Body1)),
524+
525+
%% Clean up the connections and reset the limit.
526+
[catch rabbit_ct_client_helpers:close_connection(C) || C <- Connections],
527+
rabbit_ct_broker_helpers:rpc(
528+
Config, 0, application, set_env, [rabbit, connection_max, infinity]),
529+
530+
passed.
531+
501532
http_get_failed(Config, Path) ->
502533
{ok, {{_, Code, _}, _, ResBody}} = req(Config, get, Path, [auth_header("guest", "guest")]),
503534
ct:pal("GET ~s: ~w ~w", [Path, Code, ResBody]),

0 commit comments

Comments
 (0)