|
| 1 | +%% This Source Code Form is subject to the terms of the Mozilla Public |
| 2 | +%% License, v. 2.0. If a copy of the MPL was not distributed with this |
| 3 | +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. |
| 4 | +%% |
| 5 | +%% Copyright (c) 2007-2021 VMware, Inc. or its affiliates. All rights reserved. |
| 6 | +%% |
| 7 | +-module(prometheus_rabbitmq_alarm_metrics_collector). |
| 8 | + |
| 9 | +-export([register/0, deregister_cleanup/1, collect_mf/2]). |
| 10 | + |
| 11 | +-import(prometheus_model_helpers, [create_mf/4, untyped_metric/1]). |
| 12 | + |
| 13 | +-include_lib("prometheus/include/prometheus.hrl"). |
| 14 | + |
| 15 | +-behaviour(prometheus_collector). |
| 16 | + |
| 17 | +-define(METRIC_NAME_PREFIX, "rabbitmq_alarms_"). |
| 18 | + |
| 19 | +%%==================================================================== |
| 20 | +%% Collector API |
| 21 | +%%==================================================================== |
| 22 | + |
| 23 | +register() -> |
| 24 | + ok = prometheus_registry:register_collector(?MODULE). |
| 25 | + |
| 26 | +deregister_cleanup(_) -> |
| 27 | + ok. |
| 28 | + |
| 29 | +-spec collect_mf(_Registry, Callback) -> ok |
| 30 | + when _Registry :: prometheus_registry:registry(), |
| 31 | + Callback :: prometheus_collector:callback(). |
| 32 | +collect_mf(_Registry, Callback) -> |
| 33 | + try |
| 34 | + case rabbit_alarm:get_local_alarms(500) %% TODO: figure out timeout |
| 35 | + of |
| 36 | + Alarms when is_list(Alarms) -> |
| 37 | + ActiveAlarms = |
| 38 | + lists:foldl(fun ({{resource_limit, disk, _}, _}, Acc) -> |
| 39 | + maps:put(disk_limit, 1, Acc); |
| 40 | + ({{resource_limit, memory, _}, _}, Acc) -> |
| 41 | + maps:put(memory_limit, 1, Acc); |
| 42 | + ({file_descriptor_limit, _}, Acc) -> |
| 43 | + maps:put(file_descriptor_limit, 1, Acc) |
| 44 | + end, |
| 45 | + #{}, |
| 46 | + Alarms), |
| 47 | + |
| 48 | + Callback(create_mf(?METRIC_NAME(<<"file_descriptor_limit">>), |
| 49 | + <<"is 1 if file descriptor limit alarm is in effect">>, |
| 50 | + untyped, |
| 51 | + [untyped_metric(maps:get(file_descriptor_limit, |
| 52 | + ActiveAlarms, |
| 53 | + 0))])), |
| 54 | + Callback(create_mf(?METRIC_NAME(<<"disk_limit">>), |
| 55 | + <<"is 1 if disk alarm is in effect">>, |
| 56 | + untyped, |
| 57 | + [untyped_metric(maps:get(disk_limit, ActiveAlarms, 0))])), |
| 58 | + Callback(create_mf(?METRIC_NAME(<<"memory_limit">>), |
| 59 | + <<"is 1 if memory alarm is in effect">>, |
| 60 | + untyped, |
| 61 | + [untyped_metric(maps:get(memory_limit, ActiveAlarms, 0))])), |
| 62 | + ok; |
| 63 | + Error -> |
| 64 | + rabbit_log:error("alarm_metrics_collector failed to emit metrics: " |
| 65 | + "rabbitm_alarm:get_local_alarms returned ~p", |
| 66 | + [Error]), |
| 67 | + %% We are not going to render any alarm metrics here. |
| 68 | + %% Breaks continuity but at least doesn't crash the |
| 69 | + %% whole scraping endpoint |
| 70 | + ok |
| 71 | + end |
| 72 | + catch |
| 73 | + exit:{timeout, _} -> |
| 74 | + rabbit_log:error("alarm_metrics_collector failed to emit metrics: " |
| 75 | + "rabbitm_alarm:get_local_alarms timed out"), |
| 76 | + %% We are not going to render any alarm metrics here. |
| 77 | + %% Breaks continuity but at least doesn't crash the |
| 78 | + %% whole scraping endpoint |
| 79 | + ok |
| 80 | + end. |
0 commit comments