rabbitmq · michaelklishin · May 9, 2025 · May 9, 2025 · May 9, 2025
diff --git a/deps/rabbitmq_management/priv/www/api/index.html b/deps/rabbitmq_management/priv/www/api/index.html
@@ -1252,6 +1252,41 @@ <h2>Reference</h2>
           Service Unavailable.
         </td>
       </tr>
+      <tr>
+        <td>X</td>
+        <td></td>
+        <td></td>
+        <td></td>
+        <td class="path">/api/health/checks/below-node-connection-limit</td>
+        <td>
+          Responds a 200 OK if the target node has fewer connections to the AMQP
+          and AMQPS ports than the configured maximum, otherwise responds with a
+          503 Service Unavailable.
+        </td>
+      </tr>
+      <tr>
+        <td>X</td>
+        <td></td>
+        <td></td>
+        <td></td>
+        <td class="path">/api/health/checks/ready-to-serve-clients</td>
+        <td>
+          <p>
+            Responds a 200 OK if the target node is ready to serve clients, otherwise
+            responds with a 503 Service Unavailable. This check combines:
+          </p>
+          <ol>
+            <li>/api/health/checks/is-in-service</li>
+            <li>/api/health/checks/protocol-listener/amqp or /api/health/checks/protocol-listener/amqps</li>
+            <li>/api/health/checks/below-node-connection-limit</li>
+          </ol>
+          <p>
+            So this check will only return 200 OK if the target node is in service,
+            an AMQP or AMQPS listener is available and the target node has fewer active
+            AMQP and AMQPS connections that its configured limit.
+          </p>
+        </td>
+      </tr>
       <tr>
         <td>X</td>
         <td></td>

diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl b/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl
@@ -208,6 +208,8 @@ dispatcher() ->
      {"/health/checks/quorum-queues-without-elected-leaders/vhost/:vhost/pattern/:pattern",  rabbit_mgmt_wm_health_check_quorum_queues_without_elected_leaders, []},
      {"/health/checks/node-is-quorum-critical",                rabbit_mgmt_wm_health_check_node_is_quorum_critical, []},
      {"/health/checks/is-in-service",                          rabbit_mgmt_wm_health_check_is_in_service, []},
+     {"/health/checks/below-node-connection-limit",            rabbit_mgmt_wm_health_check_below_node_connection_limit, []},
+     {"/health/checks/ready-to-serve-clients",                 rabbit_mgmt_wm_health_check_ready_to_serve_clients, []},
      {"/reset",                                                rabbit_mgmt_wm_reset, []},
      {"/reset/:node",                                          rabbit_mgmt_wm_reset, []},
      {"/rebalance/queues",                                     rabbit_mgmt_wm_rebalance_queues, [{queues, all}]},

diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_below_node_connection_limit.erl b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_below_node_connection_limit.erl
@@ -0,0 +1,63 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
+%%
+
+-module(rabbit_mgmt_wm_health_check_below_node_connection_limit).
+
+-export([init/2]).
+-export([to_json/2, content_types_provided/2]).
+-export([variances/2]).
+
+-include("rabbit_mgmt.hrl").
+-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl").
+
+init(Req, _State) ->
+    Req1 = rabbit_mgmt_headers:set_no_cache_headers(
+             rabbit_mgmt_headers:set_common_permission_headers(
+               Req, ?MODULE), ?MODULE),
+    {cowboy_rest, Req1, #context{}}.
+
+variances(Req, Context) ->
+    {[<<"accept-encoding">>, <<"origin">>], Req, Context}.
+
+content_types_provided(ReqData, Context) ->
+   {rabbit_mgmt_util:responder_map(to_json), ReqData, Context}.
+
+to_json(ReqData, Context) ->
+    ActiveConns = lists:foldl(
+                    fun(Protocol, Acc) ->
+                            Acc + protocol_connection_count(Protocol)
+                    end, 0, [amqp, 'amqp/ssl']),
+    Limit = rabbit_misc:get_env(rabbit, connection_max, infinity),
+    case ActiveConns < Limit of
+        true ->
+            rabbit_mgmt_util:reply(
+              #{status => ok,
+                limit => Limit,
+                connections => ActiveConns}, ReqData, Context);
+        false ->
+            Body = #{
+                status => failed,
+                reason => <<"node connection limit is reached">>,
+                limit => Limit,
+                connections => ActiveConns
+            },
+            {Response, ReqData1, Context1} = rabbit_mgmt_util:reply(
+                                               Body, ReqData, Context),
+            {stop,
+             cowboy_req:reply(
+               ?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1),
+             Context1}
+    end.
+
+protocol_connection_count(Protocol) ->
+    case rabbit_networking:ranch_ref_of_protocol(Protocol) of
+        undefined ->
+            0;
+        RanchRef ->
+            #{active_connections := Count} = ranch:info(RanchRef),
+            Count
+    end.
diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_ready_to_serve_clients.erl b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_ready_to_serve_clients.erl
@@ -0,0 +1,81 @@
+%% This Source Code Form is subject to the terms of the Mozilla Public
+%% License, v. 2.0. If a copy of the MPL was not distributed with this
+%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
+%%
+%% Copyright (c) 2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
+%%
+
+%% A composite health check that combines:
+%% * GET /api/health/checks/is-in-service
+%% * GET /api/health/checks/protocol-listener/amqp
+%% * GET /api/health/checks/below-node-connection-limit
+
+-module(rabbit_mgmt_wm_health_check_ready_to_serve_clients).
+
+-export([init/2]).
+-export([to_json/2, content_types_provided/2]).
+-export([variances/2]).
+
+-include("rabbit_mgmt.hrl").
+-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl").
+
+init(Req, _State) ->
+    Req1 = rabbit_mgmt_headers:set_no_cache_headers(
+             rabbit_mgmt_headers:set_common_permission_headers(
+               Req, ?MODULE), ?MODULE),
+    {cowboy_rest, Req1, #context{}}.
+
+variances(Req, Context) ->
+    {[<<"accept-encoding">>, <<"origin">>], Req, Context}.
+
+content_types_provided(ReqData, Context) ->
+   {rabbit_mgmt_util:responder_map(to_json), ReqData, Context}.
+
+to_json(ReqData, Context) ->
+    case check() of
+        {ok, Body} ->
+            rabbit_mgmt_util:reply(Body, ReqData, Context);
+        {error, Body} ->
+            {Response, ReqData1, Context1} = rabbit_mgmt_util:reply(
+                                               Body, ReqData, Context),
+            {stop,
+             cowboy_req:reply(
+               ?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1),
+             Context1}
+    end.
+
+check() ->
+    case rabbit:is_serving() of
+        true ->
+            RanchRefs0 = [
+                rabbit_networking:ranch_ref_of_protocol(amqp),
+                rabbit_networking:ranch_ref_of_protocol('amqp/ssl')
+            ],
+            RanchRefs = [R || R <- RanchRefs0, R =/= undefined],
+            case RanchRefs of
+                [_ | _] ->
+                    ActiveConns = lists:foldl(
+                      fun(RanchRef, Acc) ->
+                              #{active_connections := Count} = ranch:info(RanchRef),
+                              Acc + Count
+                      end, 0, RanchRefs),
+                    Limit = rabbit_misc:get_env(rabbit, connection_max, infinity),
+                    case ActiveConns < Limit of
+                        true ->
+                            {ok, #{status => ok,
+                                   limit => Limit,
+                                   connections => ActiveConns}};
+                        false ->
+                            {error, #{status => failed,
+                                      reason => <<"node connection limit is reached">>,
+                                      limit => Limit,
+                                      connections => ActiveConns}}
+                    end;
+                [] ->
+                    {error, #{status => failed,
+                              reason => <<"no active listeners for AMQP/AMQPS">>}}
+            end;
+        false ->
+            {error, #{status => failed,
+                      reason => <<"the rabbit node is not currently available to serve">>}}
+    end.
diff --git a/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl b/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl
@@ -51,7 +51,9 @@ all_tests() -> [
                 protocol_listener_test,
                 port_listener_test,
                 certificate_expiration_test,
-                is_in_service_test
+                is_in_service_test,
+                below_node_connection_limit_test,
+                ready_to_serve_clients_test
                ].
 
 %% -------------------------------------------------------------------
@@ -470,8 +472,66 @@ is_in_service_test(Config) ->
 
     passed.
 
+below_node_connection_limit_test(Config) ->
+    Path = "/health/checks/below-node-connection-limit",
+    Check0 = http_get(Config, Path, ?OK),
+    ?assertEqual(<<"ok">>, maps:get(status, Check0)),
+    ?assertEqual(0, maps:get(connections, Check0)),
+    ?assertEqual(<<"infinity">>, maps:get(limit, Check0)),
+
+    %% Set the connection limit low and open 'limit' connections.
+    Limit = 10,
+    rabbit_ct_broker_helpers:rpc(
+      Config, 0, application, set_env, [rabbit, connection_max, Limit]),
+    Connections = [rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0) || _ <- lists:seq(1, Limit)],
+    true = lists:all(fun(E) -> is_pid(E) end, Connections),
+    {error, not_allowed} = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0),
+
+    Body0 = http_get_failed(Config, Path),
+    ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body0)),
+    ?assertEqual(10, maps:get(<<"limit">>, Body0)),
+    ?assertEqual(10, maps:get(<<"connections">>, Body0)),
+
+    %% Clean up the connections and reset the limit.
+    [catch rabbit_ct_client_helpers:close_connection(C) || C <- Connections],
+    rabbit_ct_broker_helpers:rpc(
+      Config, 0, application, set_env, [rabbit, connection_max, infinity]),
+
+    passed.
+
+ready_to_serve_clients_test(Config) ->
+    Path = "/health/checks/ready-to-serve-clients",
+    Check0 = http_get(Config, Path, ?OK),
+    ?assertEqual(<<"ok">>, maps:get(status, Check0)),
+
+    true = rabbit_ct_broker_helpers:mark_as_being_drained(Config, 0),
+    Body0 = http_get_failed(Config, Path),
+    ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body0)),
+    true = rabbit_ct_broker_helpers:unmark_as_being_drained(Config, 0),
+
+    %% Set the connection limit low and open 'limit' connections.
+    Limit = 10,
+    rabbit_ct_broker_helpers:rpc(
+      Config, 0, application, set_env, [rabbit, connection_max, Limit]),
+    Connections = [rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0) || _ <- lists:seq(1, Limit)],
+    true = lists:all(fun(E) -> is_pid(E) end, Connections),
+    {error, not_allowed} = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0),
+
+    Body1 = http_get_failed(Config, Path),
+    ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body1)),
+    ?assertEqual(10, maps:get(<<"limit">>, Body1)),
+    ?assertEqual(10, maps:get(<<"connections">>, Body1)),
+
+    %% Clean up the connections and reset the limit.
+    [catch rabbit_ct_client_helpers:close_connection(C) || C <- Connections],
+    rabbit_ct_broker_helpers:rpc(
+      Config, 0, application, set_env, [rabbit, connection_max, infinity]),
+
+    passed.
+
 http_get_failed(Config, Path) ->
     {ok, {{_, Code, _}, _, ResBody}} = req(Config, get, Path, [auth_header("guest", "guest")]),
+    ct:pal("GET ~s: ~w ~w", [Path, Code, ResBody]),
     ?assertEqual(Code, ?HEALTH_CHECK_FAILURE_STATUS),
     rabbit_json:decode(rabbit_data_coercion:to_binary(ResBody)).