File tree Expand file tree Collapse file tree 2 files changed +13
-5
lines changed Expand file tree Collapse file tree 2 files changed +13
-5
lines changed Original file line number Diff line number Diff line change @@ -137,25 +137,25 @@ groups:
137
137
annotations:
138
138
description: "Root volume (OSD and MON store) is dangerously full: {{ $value | humanize }}% free."
139
139
140
- # alert on nic packet errors and drops rates > 1 packet/s
140
+ # alert on nic packet errors and drops rates > alertmanager_packet_drop_threshold packet/s
141
141
- alert: NetworkPacketsDropped
142
- expr: irate(node_network_receive_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) + irate(node_network_transmit_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) > 1
142
+ expr: irate(node_network_receive_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) + irate(node_network_transmit_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) > {% endraw %}{{ alertmanager_packet_drop_threshold }}{% raw %}
143
143
labels:
144
144
severity: warning
145
145
annotations:
146
146
description: >
147
- Node {{ $labels.instance }} experiences packet drop > 1
147
+ Node {{ $labels.instance }} experiences packet drop > {% endraw %}{{ alertmanager_packet_drop_threshold }}{% raw %}
148
148
packet/s on interface {{ $labels.device }}.
149
149
150
150
- alert: NetworkPacketErrors
151
151
expr: |
152
152
irate(node_network_receive_errs_total{device!="lo"}[5m]) +
153
- irate(node_network_transmit_errs_total{device!="lo"}[5m]) > 1
153
+ irate(node_network_transmit_errs_total{device!="lo"}[5m]) > {% endraw %}{{ alertmanager_packet_errors_threshold }}{% raw %}
154
154
labels:
155
155
severity: warning
156
156
annotations:
157
157
description: >
158
- Node {{ $labels.instance }} experiences packet errors > 1
158
+ Node {{ $labels.instance }} experiences packet errors > {% endraw %}{{ alertmanager_packet_errors_threshold }}{% raw %}
159
159
packet/s on interface {{ $labels.device }}.
160
160
161
161
- alert: StorageFillingUp
Original file line number Diff line number Diff line change @@ -18,6 +18,14 @@ alertmanager_warn_network_bond_single_link: true
18
18
alertmanager_node_free_swap_warning_threshold_ratio : 0.25
19
19
alertmanager_node_free_swap_critical_threshold_ratio : 0.1
20
20
21
+ # Threshold to trigger an alert for dropped packets, measured in packets/s
22
+ # averaged over 5 minutes.
23
+ alertmanager_packet_drop_threshold : 1
24
+
25
+ # Threshold to trigger an alert for packet receive/transmit errors, measured in
26
+ # packets/s averaged over 5 minutes.
27
+ alertmanager_packet_errors_threshold : 1
28
+
21
29
# ##############################################################################
22
30
# Exporter configuration
23
31
You can’t perform that action at this time.
0 commit comments