File tree Expand file tree Collapse file tree 3 files changed +118
-3
lines changed
grafana/dashboards/openstack Expand file tree Collapse file tree 3 files changed +118
-3
lines changed Original file line number Diff line number Diff line change 637
637
"overrides" : []
638
638
},
639
639
"gridPos" : {
640
- "h" : 12 ,
641
- "w" : 20 ,
640
+ "h" : 13 ,
641
+ "w" : 9 ,
642
642
"x" : 0 ,
643
643
"y" : 17
644
644
},
674
674
],
675
675
"title" : " Disk Temperatures" ,
676
676
"type" : " timeseries"
677
+ },
678
+ {
679
+ "datasource" : {
680
+ "type" : " prometheus" ,
681
+ "uid" : " ${datasource}"
682
+ },
683
+ "description" : " The data written to the disk in the last 24h period divided by the physical capacity of the disk" ,
684
+ "fieldConfig" : {
685
+ "defaults" : {
686
+ "color" : {
687
+ "mode" : " palette-classic"
688
+ },
689
+ "custom" : {
690
+ "axisCenteredZero" : false ,
691
+ "axisColorMode" : " text" ,
692
+ "axisLabel" : " " ,
693
+ "axisPlacement" : " auto" ,
694
+ "barAlignment" : 0 ,
695
+ "drawStyle" : " line" ,
696
+ "fillOpacity" : 0 ,
697
+ "gradientMode" : " none" ,
698
+ "hideFrom" : {
699
+ "legend" : false ,
700
+ "tooltip" : false ,
701
+ "viz" : false
702
+ },
703
+ "lineInterpolation" : " linear" ,
704
+ "lineWidth" : 1 ,
705
+ "pointSize" : 5 ,
706
+ "scaleDistribution" : {
707
+ "type" : " linear"
708
+ },
709
+ "showPoints" : " auto" ,
710
+ "spanNulls" : false ,
711
+ "stacking" : {
712
+ "group" : " A" ,
713
+ "mode" : " none"
714
+ },
715
+ "thresholdsStyle" : {
716
+ "mode" : " off"
717
+ }
718
+ },
719
+ "mappings" : [],
720
+ "thresholds" : {
721
+ "mode" : " absolute" ,
722
+ "steps" : [
723
+ {
724
+ "color" : " green" ,
725
+ "value" : null
726
+ }
727
+ ]
728
+ }
729
+ },
730
+ "overrides" : []
731
+ },
732
+ "gridPos" : {
733
+ "h" : 13 ,
734
+ "w" : 10 ,
735
+ "x" : 9 ,
736
+ "y" : 17
737
+ },
738
+ "id" : 9 ,
739
+ "options" : {
740
+ "legend" : {
741
+ "calcs" : [],
742
+ "displayMode" : " list" ,
743
+ "placement" : " bottom" ,
744
+ "showLegend" : true
745
+ },
746
+ "tooltip" : {
747
+ "mode" : " single" ,
748
+ "sort" : " none"
749
+ }
750
+ },
751
+ "targets" : [
752
+ {
753
+ "datasource" : {
754
+ "type" : " prometheus" ,
755
+ "uid" : " ${datasource}"
756
+ },
757
+ "editorMode" : " code" ,
758
+ "expr" : " delta(nvme_data_units_written_total{instance=~\" $node\" }[24h])*512000 / nvme_physical_size_bytes{instance=~\" $node\" }" ,
759
+ "legendFormat" : " {{instance}} - {{device}}" ,
760
+ "range" : true ,
761
+ "refId" : " A"
762
+ }
763
+ ],
764
+ "title" : " DWPD" ,
765
+ "type" : " timeseries"
677
766
}
678
767
],
679
768
"refresh" : false ,
Original file line number Diff line number Diff line change @@ -13,4 +13,20 @@ groups:
13
13
summary: "SMART monitor reports bad disk on (instance {{ $labels.instance }})"
14
14
description: "{{ $labels.instance }} is reporting unhealthy for the disk at {{ $labels.disk }}. Disk serial number is: {{ $labels.serial_number }}"
15
15
16
- {% endraw %}
16
+ - alert: DWPDTooHigh
17
+ expr: (delta(nvme_data_units_written_total[30d])*512000 / nvme_physical_size_bytes) / 30 > 1
18
+ labels:
19
+ severity: alert
20
+ annotations:
21
+ summary: "High 30-Day Average DWPD for {{ $labels.instance }}"
22
+ description: "The 30-Day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD"
23
+
24
+ - alert: DWPDTooHighWarning
25
+ expr: (delta(nvme_data_units_written_total[7d])*512000 / nvme_physical_size_bytes) / 7 > 1
26
+ labels:
27
+ severity: warning
28
+ annotations:
29
+ summary: "High 7-Day Average DWPD for {{ $labels.instance }}"
30
+ description: "The 7-day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD"
31
+
32
+ {% endraw %}
Original file line number Diff line number Diff line change
1
+ ---
2
+ features :
3
+ - |
4
+ Adds a panel in the Hardware Overview dashboard to show DWPD (Drive writes
5
+ per day) for NVMEs. This is calculated by dividing the total bytes written
6
+ in the past 24 hours by the drive capacity. This is currently only
7
+ supported on NVMEs.
8
+ - |
9
+ Adds alerts that will fire after 1 DWPD is sustained for 7 days, and a
10
+ critical alert if 1 DWPD is sustained for 30 days.
You can’t perform that action at this time.
0 commit comments