@@ -21,6 +21,43 @@ if ! command -v nvme >/dev/null 2>&1; then
21
21
exit 1
22
22
fi
23
23
24
+ if ! command -v jq > /dev/null 2>&1 ; then
25
+ echo " ${0##*/ } : jq is required but not installed. Aborting." >&2
26
+ exit 1
27
+ fi
28
+
29
+ # Path to the DWPD ratings JSON file
30
+ dwpd_file=" /opt/kayobe/etc/monitoring/dwpd_ratings.json"
31
+
32
+ declare -A rated_dwpd
33
+
34
+ load_dwpd_ratings () {
35
+ if [[ -f " $dwpd_file " ]]; then
36
+ # Read the JSON; if it fails, default to empty array
37
+ dwpd_json=" $( cat " $dwpd_file " 2> /dev/null | jq ' .' || echo ' []' ) "
38
+
39
+ # We iterate over each array element in dwpd_json
40
+ while IFS= read -r line; do
41
+ key=" $( echo " $line " | jq -r ' .model_name' ) "
42
+ value=" $( echo " $line " | jq -r ' .rated_dwpd' ) "
43
+
44
+ # Clean up trailing whitespace
45
+ key=" ${key%% [[:space:]]* } "
46
+ value=" ${value%% [[:space:]]* } "
47
+
48
+ # If we have a valid key, store it in the dictionary
49
+ if [[ -n " $key " && " $key " != " null" ]]; then
50
+ rated_dwpd[" $key " ]=" $value "
51
+ fi
52
+ done < <( echo " $dwpd_json " | jq -c ' .[]' )
53
+ else
54
+ echo " Warning: DWPD ratings file not found at '$dwpd_file '. Defaulting to rated_dwpd=1." >&2
55
+ fi
56
+ }
57
+
58
+
59
+ load_dwpd_ratings
60
+
24
61
output_format_awk=" $(
25
62
cat << 'OUTPUTAWK '
26
63
BEGIN { v = "" }
@@ -44,58 +81,70 @@ format_output() {
44
81
nvme_version=" $( nvme version | awk ' $1 == "nvme" {print $3}' ) "
45
82
echo " nvmecli{version=\" ${nvme_version} \" } 1" | format_output
46
83
47
- # Get devices (DevicePath and PhysicalSize)
48
- device_info=" $( nvme list -o json | jq -c ' .Devices[] | {DevicePath: .DevicePath, PhysicalSize: .PhysicalSize}' ) "
84
+ # Get devices (DevicePath, PhysicalSize and ModelNumber)
85
+ device_info=" $( nvme list -o json | jq -c ' .Devices[] | {DevicePath, PhysicalSize, ModelNumber, SerialNumber}' ) "
86
+
87
+ # Convert device_info to an array
88
+ device_info_array=()
89
+ while IFS= read -r line; do
90
+ device_info_array+=(" $line " )
91
+ done <<< " $device_info"
49
92
50
93
# Loop through the NVMe devices
51
- echo " $device_info " | while read -r device_data ; do
52
- device=$( echo " $device_data " | jq -r ' .DevicePath' )
94
+ for device_data in " ${device_info_array[@]} " ; do
95
+ device=" $( echo " $device_data " | jq -r ' .DevicePath' ) "
53
96
json_check=" $( nvme smart-log -o json " ${device} " ) "
54
97
disk=" ${device##*/ } "
98
+ model_name=" $( echo " $device_data " | jq -r ' .ModelNumber' ) "
99
+ serial_number=" $( echo " $device_data " | jq -r ' .SerialNumber' ) "
55
100
56
- physical_size=$( echo " $device_data " | jq -r ' .PhysicalSize' )
57
- echo " physical_size_bytes{device=\" ${disk} \" } ${physical_size} "
101
+ physical_size=" $( echo " $device_data " | jq -r ' .PhysicalSize' ) "
102
+ echo " physical_size_bytes{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${physical_size} "
58
103
59
104
# The temperature value in JSON is in Kelvin, we want Celsius
60
105
value_temperature=" $( echo " $json_check " | jq ' .temperature - 273' ) "
61
- echo " temperature_celsius{device=\" ${disk} \" } ${value_temperature} "
106
+ echo " temperature_celsius{device=\" ${disk} \" ,model=\" ${model_name} \" ,serial_number=\" ${serial_number} \" } ${value_temperature} "
107
+
108
+ # Get the rated DWPD from the dictionary or default to 1 if not found
109
+ value_rated_dwpd=" ${rated_dwpd[$model_name]:- 1} "
110
+ echo " rated_dwpd{device=\" ${disk} \" ,model=\" ${model_name} \" ,serial_number=\" ${serial_number} \" } ${value_rated_dwpd} "
62
111
63
112
value_available_spare=" $( echo " $json_check " | jq ' .avail_spare / 100' ) "
64
- echo " available_spare_ratio{device=\" ${disk} \" } ${value_available_spare} "
113
+ echo " available_spare_ratio{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_available_spare} "
65
114
66
115
value_available_spare_threshold=" $( echo " $json_check " | jq ' .spare_thresh / 100' ) "
67
- echo " available_spare_threshold_ratio{device=\" ${disk} \" } ${value_available_spare_threshold} "
116
+ echo " available_spare_threshold_ratio{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_available_spare_threshold} "
68
117
69
118
value_percentage_used=" $( echo " $json_check " | jq ' .percent_used / 100' ) "
70
- echo " percentage_used_ratio{device=\" ${disk} \" } ${value_percentage_used} "
119
+ echo " percentage_used_ratio{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_percentage_used} "
71
120
72
121
value_critical_warning=" $( echo " $json_check " | jq ' .critical_warning' ) "
73
- echo " critical_warning_total{device=\" ${disk} \" } ${value_critical_warning} "
122
+ echo " critical_warning_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_critical_warning} "
74
123
75
124
value_media_errors=" $( echo " $json_check " | jq ' .media_errors' ) "
76
- echo " media_errors_total{device=\" ${disk} \" } ${value_media_errors} "
125
+ echo " media_errors_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_media_errors} "
77
126
78
127
value_num_err_log_entries=" $( echo " $json_check " | jq ' .num_err_log_entries' ) "
79
- echo " num_err_log_entries_total{device=\" ${disk} \" } ${value_num_err_log_entries} "
128
+ echo " num_err_log_entries_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_num_err_log_entries} "
80
129
81
130
value_power_cycles=" $( echo " $json_check " | jq ' .power_cycles' ) "
82
- echo " power_cycles_total{device=\" ${disk} \" } ${value_power_cycles} "
131
+ echo " power_cycles_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_power_cycles} "
83
132
84
133
value_power_on_hours=" $( echo " $json_check " | jq ' .power_on_hours' ) "
85
- echo " power_on_hours_total{device=\" ${disk} \" } ${value_power_on_hours} "
134
+ echo " power_on_hours_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_power_on_hours} "
86
135
87
136
value_controller_busy_time=" $( echo " $json_check " | jq ' .controller_busy_time' ) "
88
- echo " controller_busy_time_seconds{device=\" ${disk} \" } ${value_controller_busy_time} "
137
+ echo " controller_busy_time_seconds{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_controller_busy_time} "
89
138
90
139
value_data_units_written=" $( echo " $json_check " | jq ' .data_units_written' ) "
91
- echo " data_units_written_total{device=\" ${disk} \" } ${value_data_units_written} "
140
+ echo " data_units_written_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_data_units_written} "
92
141
93
142
value_data_units_read=" $( echo " $json_check " | jq ' .data_units_read' ) "
94
- echo " data_units_read_total{device=\" ${disk} \" } ${value_data_units_read} "
143
+ echo " data_units_read_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_data_units_read} "
95
144
96
145
value_host_read_commands=" $( echo " $json_check " | jq ' .host_read_commands' ) "
97
- echo " host_read_commands_total{device=\" ${disk} \" } ${value_host_read_commands} "
146
+ echo " host_read_commands_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_host_read_commands} "
98
147
99
148
value_host_write_commands=" $( echo " $json_check " | jq ' .host_write_commands' ) "
100
- echo " host_write_commands_total{device=\" ${disk} \" } ${value_host_write_commands} "
149
+ echo " host_write_commands_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_host_write_commands} "
101
150
done | format_output
0 commit comments