check disk wearout levels

This commit is contained in:
Alex 2024-11-09 13:57:32 +03:00
parent 93f5404bc1
commit 92ce59d6a3
9 changed files with 1560 additions and 8 deletions

View file

@ -20,14 +20,15 @@ class AlertType(StrEnum):
LOGIN = "LOGIN" # TODO LOGIN = "LOGIN" # TODO
SMART = "SMART" # TODO SMART = "SMART" # TODO
RAID = "RAID" RAID = "RAID"
DISKS = "DISKS"
UPS = "UPS" UPS = "UPS"
UPDATE = "UPDATE" UPDATE = "UPDATE"
class Severity(Enum): class Severity(StrEnum):
INFO = 1 INFO = "INFO"
WARNING = 2 WARNING = "WARNING"
CRITICAL = 3 CRITICAL = "CRITICAL"
@dataclass @dataclass

View file

@ -7,7 +7,21 @@
] ]
}, },
"raid": { "raid": {
"lvs": ["Data/lvol0"] "lvs": [
"Data/lvol0"
]
},
"wearout": {
"disks": [
{
"name": "/dev/sda",
"severity": "WARNING"
},
{
"name": "/dev/nvme0",
"severity": "CRITICAL"
}
]
} }
} }
} }

View file

@ -4,7 +4,7 @@ from datetime import timedelta
from alerting import alerts from alerting import alerts
from misc import cvars, docker_registry, sensors, vuln from misc import cvars, docker_registry, sensors, vuln
from misc.disks import LVAttr from misc.disks import LVAttr, WearoutIndicator, get_wearout_reading
IS_TESTING = False IS_TESTING = False
@ -236,3 +236,35 @@ def raid_check() -> list[alerts.Alert]:
) )
return alert_list return alert_list
def disk_wearout_check() -> list[alerts.Alert]:
check_config = cvars.config.get()["checks"]["wearout"]
alert_list = []
for disk in check_config["disks"]:
try:
wearout_reading = get_wearout_reading(disk["name"])
except Exception as exc:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.ERROR,
message=f"Could not check wearout for disk {disk['name']}: {repr(exc)}, see logs",
severity=alerts.Severity.CRITICAL,
)
)
logging.error(traceback.format_exc())
continue
if wearout_reading.current_reading < wearout_reading.threshold_reading:
match wearout_reading.indicator:
case WearoutIndicator.REALLOCATED_SECTORS:
message = f"Disk {disk['name']} has reallocated sectors (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
case WearoutIndicator.SPARE_BLOCKS:
message = f"Disk {disk['name']} has too few spare blocks (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.DISKS, message=message, severity=alerts.Severity[disk["severity"]]
)
)
return alert_list

View file

@ -1,7 +1,7 @@
import json import json
import subprocess import subprocess
from dataclasses import dataclass from dataclasses import dataclass
from enum import StrEnum from enum import Enum, StrEnum
from typing import Optional, Self from typing import Optional, Self
@ -132,3 +132,58 @@ class LVAttr:
json_obj = json.loads(subprocess.run(["lvs", "--reportformat=json", name], capture_output=True).stdout) json_obj = json.loads(subprocess.run(["lvs", "--reportformat=json", name], capture_output=True).stdout)
attr_str = json_obj["report"][0]["lv"][0]["lv_attr"] attr_str = json_obj["report"][0]["lv"][0]["lv_attr"]
return cls.from_str(attr_str, name) return cls.from_str(attr_str, name)
class WearoutIndicator(Enum):
REALLOCATED_SECTORS = 0
SPARE_BLOCKS = 1
@dataclass
class WearoutReading:
indicator: WearoutIndicator
current_reading: int
threshold_reading: int
def _get_wearout_reading_from_smartctl_output(smartctl_output: dict) -> WearoutReading:
disk_protocol = smartctl_output["device"]["protocol"]
rotation_rate = smartctl_output.get("rotation_rate", 0)
match rotation_rate:
case 0: # assuming non-rotating media is an SSD
indicator = WearoutIndicator.SPARE_BLOCKS
match disk_protocol:
case "ATA":
attr_table = smartctl_output["ata_smart_attributes"]["table"]
for a in attr_table:
if a["name"] == "Available_Reservd_Space":
value = a["value"]
threshold = a["thresh"]
break
else:
raise Exception(f"no Available_Reservd_Space on ATA SSD")
case "NVMe":
value = smartctl_output["nvme_smart_health_information_log"]["available_spare"]
threshold = smartctl_output["nvme_smart_health_information_log"]["available_spare_threshold"]
case _:
indicator = WearoutIndicator.REALLOCATED_SECTORS
match disk_protocol:
case "ATA":
attr_table = smartctl_output["ata_smart_attributes"]["table"]
for a in attr_table:
if a["name"] == "Reallocated_Sector_Ct":
value = a["value"]
threshold = a["thresh"]
break
else:
raise Exception(f"no Reallocated_Sector_Ct on ATA HDD")
case "NVMe": # ? NVMe HDDs are very rare, if they even exist
raise NotImplementedError
return WearoutReading(indicator, current_reading=value, threshold_reading=threshold)
def get_wearout_reading(disk: str) -> WearoutReading:
smartctl_output = json.loads(subprocess.run(["smartctl", "-ja", disk], capture_output=True).stdout.decode("utf-8"))
wearout_reading = _get_wearout_reading_from_smartctl_output(smartctl_output)
return wearout_reading

View file

@ -38,6 +38,7 @@ async def main():
interval_checker(checks.ram_check, datetime.timedelta(minutes=1)), interval_checker(checks.ram_check, datetime.timedelta(minutes=1)),
interval_checker(checks.vuln_check, datetime.timedelta(days=1)), interval_checker(checks.vuln_check, datetime.timedelta(days=1)),
interval_checker(checks.raid_check, datetime.timedelta(days=1)), interval_checker(checks.raid_check, datetime.timedelta(days=1)),
interval_checker(checks.disk_wearout_check, datetime.timedelta(days=1)),
scheduled_checker( scheduled_checker(
checks.docker_registry_check, period=datetime.timedelta(days=1), when=datetime.time(hour=0, minute=0) checks.docker_registry_check, period=datetime.timedelta(days=1), when=datetime.time(hour=0, minute=0)
), ),

594
tests/smartctl_ata_hdd.json Normal file
View file

@ -0,0 +1,594 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
4
],
"pre_release": false,
"svn_revision": "5530",
"platform_info": "x86_64-linux-6.11.3-arch1-1",
"build_info": "(local build)",
"argv": [
"smartctl",
"-ja",
"/dev/sda"
],
"drive_database_version": {
"string": "7.3/5528"
},
"exit_status": 0
},
"local_time": {
"time_t": 1731149584,
"asctime": "Sat Nov 9 13:53:04 2024 MSK"
},
"device": {
"name": "/dev/sda",
"info_name": "/dev/sda [SAT]",
"type": "sat",
"protocol": "ATA"
},
"model_name": "WDC WD20EARZ-00C5XB0",
"serial_number": "WD-WX32D83C15U7",
"wwn": {
"naa": 5,
"oui": 5358,
"id": 8959374949
},
"firmware_version": "01.01A01",
"user_capacity": {
"blocks": 3907029168,
"bytes": 2000398934016
},
"logical_block_size": 512,
"physical_block_size": 4096,
"rotation_rate": 5400,
"form_factor": {
"ata_value": 2,
"name": "3.5 inches"
},
"trim": {
"supported": false
},
"in_smartctl_database": false,
"ata_version": {
"string": "ACS-3 T13/2161-D revision 5",
"major_value": 2046,
"minor_value": 109
},
"sata_version": {
"string": "SATA 3.1",
"value": 126
},
"interface_speed": {
"max": {
"sata_value": 14,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
},
"current": {
"sata_value": 3,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
}
},
"smart_support": {
"available": true,
"enabled": true
},
"smart_status": {
"passed": true
},
"ata_smart_data": {
"offline_data_collection": {
"status": {
"value": 0,
"string": "was never started"
},
"completion_seconds": 19380
},
"self_test": {
"status": {
"value": 0,
"string": "completed without error",
"passed": true
},
"polling_minutes": {
"short": 2,
"extended": 208,
"conveyance": 5
}
},
"capabilities": {
"values": [
123,
3
],
"exec_offline_immediate_supported": true,
"offline_is_aborted_upon_new_cmd": false,
"offline_surface_scan_supported": true,
"self_tests_supported": true,
"conveyance_self_test_supported": true,
"selective_self_test_supported": true,
"attribute_autosave_enabled": true,
"error_logging_supported": true,
"gp_logging_supported": true
}
},
"ata_sct_capabilities": {
"value": 12341,
"error_recovery_control_supported": false,
"feature_control_supported": true,
"data_table_supported": true
},
"ata_smart_attributes": {
"revision": 16,
"table": [
{
"id": 1,
"name": "Raw_Read_Error_Rate",
"value": 200,
"worst": 200,
"thresh": 51,
"when_failed": "",
"flags": {
"value": 47,
"string": "POSR-K ",
"prefailure": true,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 3,
"name": "Spin_Up_Time",
"value": 175,
"worst": 175,
"thresh": 21,
"when_failed": "",
"flags": {
"value": 39,
"string": "POS--K ",
"prefailure": true,
"updated_online": true,
"performance": true,
"error_rate": false,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 2241,
"string": "2241"
}
},
{
"id": 4,
"name": "Start_Stop_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 11,
"string": "11"
}
},
{
"id": 5,
"name": "Reallocated_Sector_Ct",
"value": 200,
"worst": 200,
"thresh": 140,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 7,
"name": "Seek_Error_Rate",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 46,
"string": "-OSR-K ",
"prefailure": false,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 9,
"name": "Power_On_Hours",
"value": 96,
"worst": 96,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 3275,
"string": "3275"
}
},
{
"id": 10,
"name": "Spin_Retry_Count",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 11,
"name": "Calibration_Retry_Count",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 12,
"name": "Power_Cycle_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 10,
"string": "10"
}
},
{
"id": 192,
"name": "Power-Off_Retract_Count",
"value": 200,
"worst": 200,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 6,
"string": "6"
}
},
{
"id": 193,
"name": "Load_Cycle_Count",
"value": 200,
"worst": 200,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 28,
"string": "28"
}
},
{
"id": 194,
"name": "Temperature_Celsius",
"value": 112,
"worst": 105,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 34,
"string": "-O---K ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 31,
"string": "31"
}
},
{
"id": 196,
"name": "Reallocated_Event_Count",
"value": 200,
"worst": 200,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 197,
"name": "Current_Pending_Sector",
"value": 200,
"worst": 200,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 198,
"name": "Offline_Uncorrectable",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 48,
"string": "----CK ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 199,
"name": "UDMA_CRC_Error_Count",
"value": 200,
"worst": 200,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 200,
"name": "Multi_Zone_Error_Rate",
"value": 100,
"worst": 253,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 8,
"string": "---R-- ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": true,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
}
]
},
"power_on_time": {
"hours": 3275
},
"power_cycle_count": 10,
"temperature": {
"current": 31
},
"ata_smart_error_log": {
"summary": {
"revision": 1,
"count": 0
}
},
"ata_smart_self_test_log": {
"standard": {
"revision": 1,
"table": [
{
"type": {
"value": 3,
"string": "Conveyance offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 0
}
],
"count": 1,
"error_count_total": 0,
"error_count_outdated": 0
}
},
"ata_smart_selective_self_test_log": {
"revision": 1,
"table": [
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
}
],
"flags": {
"value": 0,
"remainder_scan_enabled": false
},
"power_up_scan_resume_minutes": 0
}
}

680
tests/smartctl_ata_ssd.json Normal file
View file

@ -0,0 +1,680 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
4
],
"pre_release": false,
"svn_revision": "5530",
"platform_info": "x86_64-linux-6.11.6-arch1-1",
"build_info": "(local build)",
"argv": [
"smartctl",
"-ja",
"/dev/sda"
],
"drive_database_version": {
"string": "7.3/5528"
},
"exit_status": 0
},
"local_time": {
"time_t": 1731149676,
"asctime": "Sat Nov 9 13:54:36 2024 MSK"
},
"device": {
"name": "/dev/sda",
"info_name": "/dev/sda [SAT]",
"type": "sat",
"protocol": "ATA"
},
"model_family": "WD Blue / Red / Green SSDs",
"model_name": "WDC WDS100T2G0A-00JH30",
"serial_number": "20299A802244",
"wwn": {
"naa": 5,
"oui": 6980,
"id": 37501727029
},
"firmware_version": "UH510000",
"user_capacity": {
"blocks": 1953529856,
"bytes": 1000207286272
},
"logical_block_size": 512,
"physical_block_size": 512,
"rotation_rate": 0,
"form_factor": {
"ata_value": 3,
"name": "2.5 inches"
},
"trim": {
"supported": true,
"deterministic": true,
"zeroed": false
},
"in_smartctl_database": true,
"ata_version": {
"string": "ACS-2 T13/2015-D revision 3",
"major_value": 1008,
"minor_value": 272
},
"sata_version": {
"string": "SATA 3.2",
"value": 255
},
"interface_speed": {
"max": {
"sata_value": 14,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
},
"current": {
"sata_value": 3,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
}
},
"smart_support": {
"available": true,
"enabled": true
},
"smart_status": {
"passed": true
},
"ata_smart_data": {
"offline_data_collection": {
"status": {
"value": 0,
"string": "was never started"
},
"completion_seconds": 120
},
"self_test": {
"status": {
"value": 0,
"string": "completed without error",
"passed": true
},
"polling_minutes": {
"short": 2,
"extended": 182
}
},
"capabilities": {
"values": [
21,
3
],
"exec_offline_immediate_supported": true,
"offline_is_aborted_upon_new_cmd": true,
"offline_surface_scan_supported": false,
"self_tests_supported": true,
"conveyance_self_test_supported": false,
"selective_self_test_supported": false,
"attribute_autosave_enabled": true,
"error_logging_supported": true,
"gp_logging_supported": true
}
},
"ata_smart_attributes": {
"revision": 1,
"table": [
{
"id": 5,
"name": "Reallocated_Sector_Ct",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 9,
"name": "Power_On_Hours",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 7976,
"string": "7976"
}
},
{
"id": 12,
"name": "Power_Cycle_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 1218,
"string": "1218"
}
},
{
"id": 165,
"name": "Block_Erase_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 2063,
"string": "2063"
}
},
{
"id": 166,
"name": "Minimum_PE_Cycles_TLC",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 15,
"string": "15"
}
},
{
"id": 167,
"name": "Max_Bad_Blocks_per_Die",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 168,
"name": "Maximum_PE_Cycles_TLC",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 36,
"string": "36"
}
},
{
"id": 169,
"name": "Total_Bad_Blocks",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 1075,
"string": "1075"
}
},
{
"id": 170,
"name": "Grown_Bad_Blocks",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 171,
"name": "Program_Fail_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 172,
"name": "Erase_Fail_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 173,
"name": "Average_PE_Cycles_TLC",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 15,
"string": "15"
}
},
{
"id": 174,
"name": "Unexpected_Power_Loss",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 144,
"string": "144"
}
},
{
"id": 184,
"name": "End-to-End_Error",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 187,
"name": "Reported_Uncorrect",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 188,
"name": "Command_Timeout",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 194,
"name": "Temperature_Celsius",
"value": 70,
"worst": 58,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 34,
"string": "-O---K ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 249108103198,
"string": "30 (Min/Max 0/58)"
}
},
{
"id": 199,
"name": "UDMA_CRC_Error_Count",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 230,
"name": "Media_Wearout_Indicator",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 5879860561241,
"string": "0x055903000559"
}
},
{
"id": 232,
"name": "Available_Reservd_Space",
"value": 100,
"worst": 100,
"thresh": 5,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 100,
"string": "100"
}
},
{
"id": 233,
"name": "NAND_GB_Written_TLC",
"value": 100,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 15654,
"string": "15654"
}
},
{
"id": 234,
"name": "NAND_GB_Written_SLC",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 32244,
"string": "32244"
}
},
{
"id": 241,
"name": "Host_Writes_GiB",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 48,
"string": "----CK ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 13847,
"string": "13847"
}
},
{
"id": 242,
"name": "Host_Reads_GiB",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 48,
"string": "----CK ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 31195,
"string": "31195"
}
},
{
"id": 244,
"name": "Temp_Throttle_Status",
"value": 0,
"worst": 100,
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
}
]
},
"power_on_time": {
"hours": 7976
},
"power_cycle_count": 1218,
"temperature": {
"current": 30
},
"ata_smart_error_log": {
"summary": {
"revision": 1,
"count": 0
}
},
"ata_smart_self_test_log": {
"standard": {
"revision": 1,
"count": 0
}
}
}

View file

@ -0,0 +1,145 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
4
],
"pre_release": false,
"svn_revision": "5530",
"platform_info": "x86_64-linux-6.11.3-arch1-1",
"build_info": "(local build)",
"argv": [
"smartctl",
"-ja",
"/dev/nvme0"
],
"exit_status": 0
},
"local_time": {
"time_t": 1731149045,
"asctime": "Sat Nov 9 13:44:05 2024 MSK"
},
"device": {
"name": "/dev/nvme0",
"info_name": "/dev/nvme0",
"type": "nvme",
"protocol": "NVMe"
},
"model_name": "Samsung SSD 970 EVO Plus 1TB",
"serial_number": "S4EWNM0W921977B",
"firmware_version": "2B2QEXM7",
"nvme_pci_vendor": {
"id": 5197,
"subsystem_id": 5197
},
"nvme_ieee_oui_identifier": 9528,
"nvme_total_capacity": 1000204886016,
"nvme_unallocated_capacity": 0,
"nvme_controller_id": 4,
"nvme_version": {
"string": "1.3",
"value": 66304
},
"nvme_number_of_namespaces": 1,
"nvme_namespaces": [
{
"id": 1,
"size": {
"blocks": 1953525168,
"bytes": 1000204886016
},
"capacity": {
"blocks": 1953525168,
"bytes": 1000204886016
},
"utilization": {
"blocks": 686279048,
"bytes": 351374872576
},
"formatted_lba_size": 512,
"eui64": {
"oui": 9528,
"ext_id": 383083641036
}
}
],
"user_capacity": {
"blocks": 1953525168,
"bytes": 1000204886016
},
"logical_block_size": 512,
"smart_support": {
"available": true,
"enabled": true
},
"smart_status": {
"passed": true,
"nvme": {
"value": 0
}
},
"nvme_smart_health_information_log": {
"critical_warning": 0,
"temperature": 47,
"available_spare": 100,
"available_spare_threshold": 10,
"percentage_used": 0,
"data_units_read": 111588,
"data_units_written": 1802957,
"host_reads": 2570341,
"host_writes": 36266417,
"controller_busy_time": 133,
"power_cycles": 31,
"power_on_hours": 432,
"unsafe_shutdowns": 18,
"media_errors": 0,
"num_err_log_entries": 63,
"warning_temp_time": 0,
"critical_comp_time": 0,
"temperature_sensors": [
47,
51
]
},
"temperature": {
"current": 47
},
"power_cycle_count": 31,
"power_on_time": {
"hours": 432
},
"nvme_error_information_log": {
"size": 64,
"read": 16,
"unread": 0,
"table": [
{
"error_count": 63,
"submission_queue_id": 0,
"command_id": 8,
"status_field": {
"value": 8194,
"do_not_retry": false,
"status_code_type": 0,
"status_code": 2,
"string": "Invalid Field in Command"
},
"phase_tag": false,
"lba": {
"value": 0
},
"nsid": 0
}
]
},
"nvme_self_test_log": {
"current_self_test_operation": {
"value": 0,
"string": "No self-test in progress"
}
}
}

View file

@ -1,6 +1,12 @@
import json
import unittest import unittest
from misc.disks import LVAttr from misc.disks import (
LVAttr,
WearoutIndicator,
WearoutReading,
_get_wearout_reading_from_smartctl_output,
)
class TestDisks(unittest.TestCase): class TestDisks(unittest.TestCase):
@ -22,6 +28,30 @@ class TestDisks(unittest.TestCase):
), ),
) )
def test_wearout_reading_nvme_ssd(self):
with open("tests/smartctl_nvme_ssd.json") as f:
smartctl_output = json.load(f)
self.assertEqual(
_get_wearout_reading_from_smartctl_output(smartctl_output),
WearoutReading(indicator=WearoutIndicator.SPARE_BLOCKS, current_reading=100, threshold_reading=10),
)
def test_wearout_reading_ata_hdd(self):
with open("tests/smartctl_ata_hdd.json") as f:
smartctl_output = json.load(f)
self.assertEqual(
_get_wearout_reading_from_smartctl_output(smartctl_output),
WearoutReading(indicator=WearoutIndicator.REALLOCATED_SECTORS, current_reading=200, threshold_reading=140),
)
def test_wearout_reading_ata_ssd(self):
with open("tests/smartctl_ata_ssd.json") as f:
smartctl_output = json.load(f)
self.assertEqual(
_get_wearout_reading_from_smartctl_output(smartctl_output),
WearoutReading(indicator=WearoutIndicator.SPARE_BLOCKS, current_reading=100, threshold_reading=5),
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()