Skip to content

Commit 966fb62

Browse files
authored
Merge pull request #1044 from stackhpc/bond-single-link-alert
Add alert to detect bonds with a single link
2 parents 03bf3f6 + 6cf594d commit 966fb62

File tree

3 files changed

+27
-0
lines changed

3 files changed

+27
-0
lines changed

etc/kayobe/kolla/config/prometheus/system.rules

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,22 @@ groups:
104104
annotations:
105105
summary: Host network bond degraded (instance {{ $labels.instance }})
106106
description: "Bond {{ $labels.master }} degraded on {{ $labels.instance }}"
107+
{% endraw %}
107108

109+
{% if alertmanager_warn_network_bond_single_link | bool %}
110+
{% raw %}
111+
- alert: HostNetworkBondSingleLink
112+
expr: node_bonding_slaves == 1
113+
for: 2m
114+
labels:
115+
severity: warning
116+
annotations:
117+
summary: Host network bond with a single link (instance {{ $labels.instance }})
118+
description: "Bond {{ $labels.master }} configured with a single link on {{ $labels.instance }}"
119+
{% endraw %}
120+
{% endif %}
121+
122+
{% raw %}
108123
- alert: HostConntrackLimit
109124
expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8
110125
for: 5m

etc/kayobe/stackhpc-monitoring.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
# of free memory is lower than this value an alert will be triggered.
99
alertmanager_low_memory_threshold_gib: 5
1010

11+
# Whether to raise an alert if any network bond is configured with a single
12+
# link. Change to false to disable this alert.
13+
alertmanager_warn_network_bond_single_link: true
14+
1115
###############################################################################
1216
# Exporter configuration
1317

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
features:
3+
- |
4+
Adds a new Prometheus alert ``HostNetworkBondSingleLink`` which will be
5+
raised when a bond is configured with only one member. This can happen when
6+
NetworkManager detects that a bond member is down at boot time. This alert
7+
can be disabled by setting ``alertmanager_warn_network_bond_single_link``
8+
to ``false``.

0 commit comments

Comments
 (0)