Skip to content

Commit d0063e7

Browse files
committed
Convert smartmon script to python
1 parent 5c1eb95 commit d0063e7

File tree

1 file changed

+156
-0
lines changed

1 file changed

+156
-0
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#!/usr/bin/env python3
2+
3+
import subprocess
4+
import json
5+
from datetime import datetime
6+
7+
SMARTCTL_PATH = "/usr/sbin/smartctl"
8+
9+
def run_command(command, parse_json=False):
10+
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
11+
if parse_json:
12+
return json.loads(result.stdout)
13+
else:
14+
return result.stdout.strip()
15+
16+
def parse_smartctl_attributes(disk, disk_type, serial, json_data):
17+
labels = f'disk="{disk}",type="{disk_type}",serial_number="{serial}"'
18+
metrics = []
19+
smartmon_attrs = set([
20+
"airflow_temperature_cel", "command_timeout", "current_pending_sector", "end_to_end_error", "erase_fail_count",
21+
"g_sense_error_rate", "hardware_ecc_recovered", "host_reads_32mib", "host_reads_mib", "host_writes_32mib",
22+
"host_writes_mib", "load_cycle_count", "media_wearout_indicator", "nand_writes_1gib", "offline_uncorrectable",
23+
"power_cycle_count", "power_on_hours", "program_fail_cnt_total", "program_fail_count", "raw_read_error_rate",
24+
"reallocated_event_count", "reallocated_sector_ct", "reported_uncorrect", "runtime_bad_block", "sata_downshift_count",
25+
"seek_error_rate", "spin_retry_count", "spin_up_time", "start_stop_count", "temperature_case", "temperature_celsius",
26+
"temperature_internal", "total_lbas_read", "total_lbas_written", "udma_crc_error_count", "unsafe_shutdown_count",
27+
"unused_rsvd_blk_cnt_tot", "wear_leveling_count", "workld_host_reads_perc", "workld_media_wear_indic", "workload_minutes",
28+
"critical_warning", "temperature", "available_spare", "available_spare_threshold", "percentage_used",
29+
"data_units_read", "data_units_written", "host_reads", "host_writes", "controller_busy_time",
30+
"power_cycles", "unsafe_shutdowns", "media_errors", "num_err_log_entries",
31+
"warning_temp_time", "critical_comp_time"
32+
])
33+
if 'nvme_smart_health_information_log' in json_data:
34+
smart_log = json_data['nvme_smart_health_information_log']
35+
for attr_name, value in smart_log.items():
36+
attr_name = attr_name.replace(' ', '_').lower()
37+
if attr_name in smartmon_attrs:
38+
metrics.append(f"{attr_name}{{{labels}}} {value}")
39+
elif 'scsi_grown_defect_list' in json_data:
40+
scsi_attrs = json_data.get('scsi_grown_defect_list', {})
41+
for attr_name, value in scsi_attrs.items():
42+
attr_name = attr_name.replace(' ', '_').lower()
43+
if attr_name in smartmon_attrs:
44+
metrics.append(f"{attr_name}{{{labels}}} {value}")
45+
elif 'ata_smart_attributes' in json_data and 'table' in json_data['ata_smart_attributes']:
46+
for attr in json_data['ata_smart_attributes']['table']:
47+
attr_name = attr['name'].replace('-', '_').lower()
48+
if attr_name in smartmon_attrs:
49+
attr_id = attr.get('id', '')
50+
value = attr.get('value', '')
51+
worst = attr.get('worst', '')
52+
threshold = attr.get('thresh', '')
53+
raw_value = attr.get('raw', {}).get('value', '')
54+
metrics.append(f"{attr_name}_value{{{labels},smart_id=\"{attr_id}\"}} {value}")
55+
metrics.append(f"{attr_name}_worst{{{labels},smart_id=\"{attr_id}\"}} {worst}")
56+
metrics.append(f"{attr_name}_threshold{{{labels},smart_id=\"{attr_id}\"}} {threshold}")
57+
metrics.append(f"{attr_name}_raw_value{{{labels},smart_id=\"{attr_id}\"}} {raw_value}")
58+
return metrics
59+
60+
def parse_smartctl_info(disk, disk_type, json_data):
61+
info = json_data.get('device', {})
62+
smart_status = json_data.get('smart_status', {})
63+
labels = {
64+
'disk': disk,
65+
'type': disk_type,
66+
'vendor': info.get('vendor', ''),
67+
'product': info.get('product', ''),
68+
'revision': info.get('revision', ''),
69+
'lun_id': info.get('lun_id', ''),
70+
'model_family': json_data.get('model_family', ''),
71+
'device_model': json_data.get('model_name', ''),
72+
'serial_number': json_data.get('serial_number', '').lower(),
73+
'firmware_version': json_data.get('firmware_version', '')
74+
}
75+
label_str = ','.join(f'{k}="{v}"' for k, v in labels.items())
76+
metrics = [
77+
f'device_info{{{label_str}}} 1',
78+
f'device_smart_available{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("available", False) else 0}',
79+
]
80+
if smart_status.get("available", False):
81+
metrics.append(f'device_smart_enabled{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("enabled", False) else 0}')
82+
if 'passed' in smart_status:
83+
metrics.append(f'device_smart_healthy{{disk="{disk}",type="{disk_type}",serial_number="{labels["serial_number"]}"}} {1 if smart_status.get("passed", False) else 0}')
84+
return metrics
85+
86+
def format_output(metrics):
87+
output = []
88+
last_metric = ""
89+
for metric in sorted(metrics):
90+
metric_name = metric.split('{')[0]
91+
if metric_name != last_metric:
92+
output.append(f"# HELP smartmon_{metric_name} SMART metric {metric_name}")
93+
output.append(f"# TYPE smartmon_{metric_name} gauge")
94+
last_metric = metric_name
95+
output.append(f"smartmon_{metric}")
96+
return '\n'.join(output)
97+
98+
def main():
99+
try:
100+
version_output = run_command([SMARTCTL_PATH, '-j'], parse_json=True)
101+
smartctl_version_list = version_output.get('smartctl', {}).get('version', [])
102+
if smartctl_version_list:
103+
smartctl_version_str = '.'.join(map(str, smartctl_version_list))
104+
else:
105+
smartctl_version_str = "unknown"
106+
except json.JSONDecodeError:
107+
smartctl_version_str = "unknown"
108+
metrics = [f'smartctl_version{{version="{smartctl_version_str}"}} 1']
109+
110+
try:
111+
device_list_output = run_command([SMARTCTL_PATH, '--scan-open', '-j'], parse_json=True)
112+
devices = []
113+
for device in device_list_output.get('devices', []):
114+
disk = device.get('name', '')
115+
disk_type = device.get('type', 'auto')
116+
if disk:
117+
devices.append((disk, disk_type))
118+
except json.JSONDecodeError:
119+
devices = []
120+
121+
for disk, disk_type in devices:
122+
serial_number = ''
123+
active = 1
124+
metrics.append(f'smartctl_run{{disk="{disk}",type="{disk_type}"}} {int(datetime.utcnow().timestamp())}')
125+
126+
try:
127+
standby_output = run_command([SMARTCTL_PATH, '-n', 'standby', '-d', disk_type, '-j', disk], parse_json=True)
128+
power_mode = standby_output.get('power_mode', '')
129+
if power_mode == 'standby':
130+
active = 0
131+
except json.JSONDecodeError:
132+
active = 0 # Assume device is inactive if we can't parse the output
133+
134+
metrics.append(f'device_active{{disk="{disk}",type="{disk_type}"}} {active}')
135+
136+
if active == 0:
137+
continue
138+
139+
try:
140+
info_output = run_command([SMARTCTL_PATH, '-i', '-H', '-d', disk_type, '-j', disk], parse_json=True)
141+
except json.JSONDecodeError:
142+
continue
143+
metrics.extend(parse_smartctl_info(disk, disk_type, info_output))
144+
serial_number = info_output.get('serial_number', '').lower()
145+
146+
try:
147+
attributes_output = run_command([SMARTCTL_PATH, '-A', '-d', disk_type, '-j', disk], parse_json=True)
148+
except json.JSONDecodeError:
149+
continue
150+
metrics.extend(parse_smartctl_attributes(disk, disk_type, serial_number, attributes_output))
151+
152+
formatted_output = format_output(metrics)
153+
print(formatted_output)
154+
155+
if __name__ == "__main__":
156+
main()

0 commit comments

Comments
 (0)