From 47c110f83eaef6c12b0f4b3b823aebdd328d03c4 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 9 Nov 2024 12:22:12 +0300 Subject: [PATCH 1/5] check disk health --- alerting/alerts.py | 2 +- config.example.json | 3 + misc/checks.py | 74 +++++++++++++++++++++++- misc/disks.py | 134 ++++++++++++++++++++++++++++++++++++++++++++ service.py | 1 + tests/test_disks.py | 27 +++++++++ 6 files changed, 239 insertions(+), 2 deletions(-) create mode 100644 misc/disks.py create mode 100644 tests/test_disks.py diff --git a/alerting/alerts.py b/alerting/alerts.py index ae57492..a658b46 100644 --- a/alerting/alerts.py +++ b/alerting/alerts.py @@ -19,7 +19,7 @@ class AlertType(StrEnum): VULN = "VULN" LOGIN = "LOGIN" # TODO SMART = "SMART" # TODO - RAID = "RAID" # TODO + RAID = "RAID" UPS = "UPS" UPDATE = "UPDATE" diff --git a/config.example.json b/config.example.json index 21a8897..a3ee930 100644 --- a/config.example.json +++ b/config.example.json @@ -5,6 +5,9 @@ "images": [ "gitlab/gitlab-ce" ] + }, + "raid": { + "lvs": ["Data/lvol0"] } } } diff --git a/misc/checks.py b/misc/checks.py index ed4458b..64fe817 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -1,7 +1,10 @@ +import logging +import traceback from datetime import timedelta from alerting import alerts -from misc import docker_registry, sensors, vuln +from misc import cvars, docker_registry, sensors, vuln +from misc.disks import LVAttr from misc.enums import UPSStatus IS_TESTING = False @@ -169,3 +172,72 @@ async def docker_registry_check() -> list[alerts.Alert]: ) ) return alert_list + + +def raid_check() -> list[alerts.Alert]: + check_config = cvars.config.get()["checks"]["raid"] + alert_list = [] + for lv in check_config["lvs"]: + try: + lv_attr = LVAttr.from_cli(lv) + except Exception as exc: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.ERROR, + message=f"Could not check RAID LV {lv}: {repr(exc)}, see logs", + severity=alerts.Severity.CRITICAL, + ) + ) + logging.error(traceback.format_exc()) + continue + + # sanity check + if lv_attr.vol_type not in [LVAttr.VolType.RAID, LVAttr.VolType.RAID_NOSYNC]: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.ERROR, + message=f"LV {lv} is not of RAID type", + severity=alerts.Severity.CRITICAL, + ) + ) + continue + + match lv_attr.health: + case LVAttr.Health.PARTIAL: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.RAID, + message=f"LV {lv} operating in partial mode; one of PVs has failed", + severity=alerts.Severity.CRITICAL, + ) + ) + continue + case LVAttr.Health.UNKNOWN: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.RAID, + message=f"LV {lv}'s state is unknown", + severity=alerts.Severity.CRITICAL, + ) + ) + continue + case LVAttr.Health.REFRESH_NEEDED: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.RAID, + message=f"LV {lv} has suffered a write error; run a refresh or replace the failing PV", + severity=alerts.Severity.WARNING, + ) + ) + continue + case LVAttr.Health.MISMATCHES: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.RAID, + message=f"LV {lv} is partially incoherent; run a repairing scrub operation", + severity=alerts.Severity.WARNING, + ) + ) + continue + + return alert_list diff --git a/misc/disks.py b/misc/disks.py new file mode 100644 index 0000000..f5a74df --- /dev/null +++ b/misc/disks.py @@ -0,0 +1,134 @@ +import json +import subprocess +from dataclasses import dataclass +from enum import StrEnum +from typing import Optional, Self + + +@dataclass +class LVAttr: + """https://man.archlinux.org/man/lvs.8#NOTES""" + + class VolType(StrEnum): + CACHE = "C" + MIRRORED = "m" + MIRRORED_NOSYNC = "M" + ORIGIN = "o" + ORIGIN_MERGING_SNAPSHOT = "O" + INTEGRITY = "g" + RAID = "r" + RAID_NOSYNC = "R" + SNAPSHOT = "s" + MERGING_SNAPSHOT = "S" + PVMOVE = "p" + VIRTUAL = "v" + IMAGE = "i" + IMAGE_OUT_OF_SYNC = "I" + MIRROR_LOG = "l" + CONVERTING = "c" + THIN = "V" + THIN_POOL = "t" + THIN_POOL_DATA = "T" + VDO_POOL = "d" + VDO_POOL_DATA = "D" + METADATA = "e" + + class Permissions(StrEnum): + WRITABLE = "w" + READONLY = "r" + READONLY_ACTIVATED = "R" + + class AllocationPolicy(StrEnum): + ANYWHERE = "a" + ANYWHERE_LOCKED = "A" + CONTIGUOUS = "c" + CONTIGUOUS_LOCKED = "C" + INHERITED = "i" + INHERITED_LOCKED = "I" + CLING = "l" + CLING_LOCKED = "L" + NORMAL = "n" + NORMAL_LOCKED = "N" + + class State(StrEnum): + ACTIVE = "a" + HISTORICAL = "h" + SUSPENDED = "s" + INVALID_SNAPSHOT = "I" + INVALID_SUSPENDED_SNAPSHOT = "S" + SNAPSHOT_MERGE_FAILED = "m" + SUSPENDED_SNAPSHOT_MERGE_FAILED = "M" + DEVICE_PRESENT_NO_TABLES = "d" + DEVICE_PRESENT_INACTIVE_TABLE = "i" + THIN_POOL_CHECK_NEEDED = "c" + SUSPENDED_THIN_POOL_CHECK_NEEDED = "C" + UNKNOWN = "X" + + class IsOpen(StrEnum): + OPEN = "o" + CLOSED = "-" + UNKNOWN = "X" + + class TargetType(StrEnum): + CACHE = "C" + MIRROR = "m" + RAID = "r" + SNAPSHOT = "s" + THIN = "t" + UNKNOWN = "u" + VIRTUAL = "v" + + class Health(StrEnum): + # for all + PARTIAL = "p" + UNKNOWN = "X" + OK = "-" + + # for RAID + REFRESH_NEEDED = "r" + MISMATCHES = "m" + WRITEMOSTLY = "w" + RESHAPING = "s" + REMOVE = "R" + + # for thin pools and LVs + FAILED = "F" + OUT_OF_SPACE = "D" + METADATA_READ_ONLY = "M" + + # for writecache + ERROR = "E" + + vol_type: VolType + permissions: Permissions + allocation_policy: AllocationPolicy + fixed_minor: bool + state: State + is_open: IsOpen + target_type: TargetType + zero_before_use: bool + health: Health + skip_activation: bool + name: Optional[str] = None + + @classmethod + def from_str(cls, attr_str: str, name: Optional[str] = None) -> Self: + kwargs = {} + kwargs["vol_type"] = cls.VolType(attr_str[0]) + kwargs["permissions"] = cls.Permissions(attr_str[1]) + kwargs["allocation_policy"] = cls.AllocationPolicy(attr_str[2]) + kwargs["fixed_minor"] = True if attr_str[3] == "m" else False + kwargs["state"] = cls.State(attr_str[4]) + kwargs["is_open"] = cls.IsOpen(attr_str[5]) + kwargs["target_type"] = cls.TargetType(attr_str[6]) + kwargs["zero_before_use"] = True if attr_str[7] == "z" else False + kwargs["health"] = cls.Health(attr_str[8]) + kwargs["skip_activation"] = True if attr_str[9] == "k" else False + kwargs["name"] = name + return cls(**kwargs) + + @classmethod + def from_cli(cls, name: str) -> Self: + json_obj = json.loads(subprocess.run(["lvs", "--reportformat=json", name], capture_output=True).stdout) + attr_str = json_obj["report"][0]["lv"][0]["lv_attr"] + return cls.from_str(attr_str, name) diff --git a/service.py b/service.py index 2d843fa..017d6bf 100755 --- a/service.py +++ b/service.py @@ -37,6 +37,7 @@ async def main(): interval_checker(checks.ups_check, datetime.timedelta(minutes=5)), interval_checker(checks.ram_check, datetime.timedelta(minutes=1)), interval_checker(checks.vuln_check, datetime.timedelta(days=1)), + interval_checker(checks.raid_check, datetime.timedelta(days=1)), scheduled_checker( checks.docker_registry_check, period=datetime.timedelta(days=1), when=datetime.time(hour=0, minute=0) ), diff --git a/tests/test_disks.py b/tests/test_disks.py new file mode 100644 index 0000000..e431a9d --- /dev/null +++ b/tests/test_disks.py @@ -0,0 +1,27 @@ +import unittest + +from misc.disks import LVAttr + + +class TestDisks(unittest.TestCase): + def test_lv_attr_declaration(self): + self.assertEqual( + LVAttr.from_str("rwi-aor---", "Data/lvol0"), + LVAttr( + vol_type=LVAttr.VolType.RAID, + permissions=LVAttr.Permissions.WRITABLE, + allocation_policy=LVAttr.AllocationPolicy.INHERITED, + fixed_minor=False, + state=LVAttr.State.ACTIVE, + is_open=LVAttr.IsOpen.OPEN, + target_type=LVAttr.TargetType.RAID, + zero_before_use=False, + health=LVAttr.Health.OK, + skip_activation=False, + name="Data/lvol0", + ), + ) + + +if __name__ == "__main__": + unittest.main() From 071d8afb9f0399b9b01a4a2d0d4d9f792924d3db Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 9 Nov 2024 12:48:53 +0300 Subject: [PATCH 2/5] move upsstatus enum to sensors --- misc/checks.py | 9 ++++----- misc/enums.py | 21 --------------------- misc/sensors.py | 23 +++++++++++++++++++++-- 3 files changed, 25 insertions(+), 28 deletions(-) delete mode 100644 misc/enums.py diff --git a/misc/checks.py b/misc/checks.py index 64fe817..c4ccb0d 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -5,7 +5,6 @@ from datetime import timedelta from alerting import alerts from misc import cvars, docker_registry, sensors, vuln from misc.disks import LVAttr -from misc.enums import UPSStatus IS_TESTING = False @@ -126,13 +125,13 @@ async def ups_check() -> list[alerts.Alert]: ) for status in sensor.ups_status: - if IS_TESTING or status == UPSStatus.UPS_OVERLOAD: + if IS_TESTING or status == sensors.UPSStatus.UPS_OVERLOAD: alert_list.append( alerts.Alert( alert_type=alerts.AlertType.UPS, message=f"UPS is overloaded!", severity=alerts.Severity.CRITICAL ) ) - elif IS_TESTING or status == UPSStatus.ON_BATTERY: + elif IS_TESTING or status == sensors.UPSStatus.ON_BATTERY: alert_list.append( alerts.Alert( alert_type=alerts.AlertType.UPS, @@ -140,7 +139,7 @@ async def ups_check() -> list[alerts.Alert]: severity=alerts.Severity.INFO, ) ) - elif IS_TESTING or status == UPSStatus.UPS_TRIM: + elif IS_TESTING or status == sensors.UPSStatus.UPS_TRIM: alert_list.append( alerts.Alert( alert_type=alerts.AlertType.UPS, @@ -148,7 +147,7 @@ async def ups_check() -> list[alerts.Alert]: severity=alerts.Severity.INFO, ) ) - elif IS_TESTING or status == UPSStatus.UPS_BOOST: + elif IS_TESTING or status == sensors.UPSStatus.UPS_BOOST: alert_list.append( alerts.Alert( alert_type=alerts.AlertType.UPS, diff --git a/misc/enums.py b/misc/enums.py deleted file mode 100644 index 2ea1fc1..0000000 --- a/misc/enums.py +++ /dev/null @@ -1,21 +0,0 @@ -from enum import StrEnum - - -class UPSStatus(StrEnum): - """https://networkupstools.org/docs/developer-guide.chunked/new-drivers.html#_status_data""" - - ON_LINE = "OL" - ON_BATTERY = "OB" - BATTERY_LOW = "LB" - BATTERY_HIGH = "HB" - BATTERY_REPLACE = "RB" - BATTERY_CHARGING = "CHRG" - BATTERY_DISCHARGING = "DISCHRG" - UPS_BYPASS = "BYPASS" - """Battery and connected devices are not protected from power outage!""" - UPS_OFFLINE = "OFF" - UPS_OVERLOAD = "OVER" - UPS_CALIBRATION = "CAL" - UPS_TRIM = "TRIM" - UPS_BOOST = "BOOST" - UPS_FSD = "FSD" diff --git a/misc/sensors.py b/misc/sensors.py index f5082cc..6a05a54 100644 --- a/misc/sensors.py +++ b/misc/sensors.py @@ -1,12 +1,11 @@ import subprocess from dataclasses import dataclass +from enum import StrEnum from psutil import cpu_percent, sensors_temperatures, virtual_memory from alerting import alerts -from .enums import UPSStatus - @dataclass class TemperatureSensor: @@ -32,6 +31,26 @@ class RamSensor: critical_avail: int = 2 * 1024**3 +class UPSStatus(StrEnum): + """https://networkupstools.org/docs/developer-guide.chunked/new-drivers.html#_status_data""" + + ON_LINE = "OL" + ON_BATTERY = "OB" + BATTERY_LOW = "LB" + BATTERY_HIGH = "HB" + BATTERY_REPLACE = "RB" + BATTERY_CHARGING = "CHRG" + BATTERY_DISCHARGING = "DISCHRG" + UPS_BYPASS = "BYPASS" + """Battery and connected devices are not protected from power outage!""" + UPS_OFFLINE = "OFF" + UPS_OVERLOAD = "OVER" + UPS_CALIBRATION = "CAL" + UPS_TRIM = "TRIM" + UPS_BOOST = "BOOST" + UPS_FSD = "FSD" + + @dataclass class UPSSensor: ups_status: list[UPSStatus] = None From 93f5404bc121dd727e5f187fa7a915f41eb2bb1a Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 9 Nov 2024 12:53:35 +0300 Subject: [PATCH 3/5] remove useless continue clauses from checks --- misc/checks.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/misc/checks.py b/misc/checks.py index c4ccb0d..1c1b151 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -210,7 +210,6 @@ def raid_check() -> list[alerts.Alert]: severity=alerts.Severity.CRITICAL, ) ) - continue case LVAttr.Health.UNKNOWN: alert_list.append( alerts.Alert( @@ -219,7 +218,6 @@ def raid_check() -> list[alerts.Alert]: severity=alerts.Severity.CRITICAL, ) ) - continue case LVAttr.Health.REFRESH_NEEDED: alert_list.append( alerts.Alert( @@ -228,7 +226,6 @@ def raid_check() -> list[alerts.Alert]: severity=alerts.Severity.WARNING, ) ) - continue case LVAttr.Health.MISMATCHES: alert_list.append( alerts.Alert( @@ -237,6 +234,5 @@ def raid_check() -> list[alerts.Alert]: severity=alerts.Severity.WARNING, ) ) - continue return alert_list From 92ce59d6a30f634bd5d58c263b2a41886ad2ec27 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 9 Nov 2024 13:57:32 +0300 Subject: [PATCH 4/5] check disk wearout levels --- alerting/alerts.py | 9 +- config.example.json | 16 +- misc/checks.py | 34 +- misc/disks.py | 57 ++- service.py | 1 + tests/smartctl_ata_hdd.json | 594 ++++++++++++++++++++++++++++++ tests/smartctl_ata_ssd.json | 680 +++++++++++++++++++++++++++++++++++ tests/smartctl_nvme_ssd.json | 145 ++++++++ tests/test_disks.py | 32 +- 9 files changed, 1560 insertions(+), 8 deletions(-) create mode 100644 tests/smartctl_ata_hdd.json create mode 100644 tests/smartctl_ata_ssd.json create mode 100644 tests/smartctl_nvme_ssd.json diff --git a/alerting/alerts.py b/alerting/alerts.py index a658b46..7d4e76c 100644 --- a/alerting/alerts.py +++ b/alerting/alerts.py @@ -20,14 +20,15 @@ class AlertType(StrEnum): LOGIN = "LOGIN" # TODO SMART = "SMART" # TODO RAID = "RAID" + DISKS = "DISKS" UPS = "UPS" UPDATE = "UPDATE" -class Severity(Enum): - INFO = 1 - WARNING = 2 - CRITICAL = 3 +class Severity(StrEnum): + INFO = "INFO" + WARNING = "WARNING" + CRITICAL = "CRITICAL" @dataclass diff --git a/config.example.json b/config.example.json index a3ee930..6aabe09 100644 --- a/config.example.json +++ b/config.example.json @@ -7,7 +7,21 @@ ] }, "raid": { - "lvs": ["Data/lvol0"] + "lvs": [ + "Data/lvol0" + ] + }, + "wearout": { + "disks": [ + { + "name": "/dev/sda", + "severity": "WARNING" + }, + { + "name": "/dev/nvme0", + "severity": "CRITICAL" + } + ] } } } diff --git a/misc/checks.py b/misc/checks.py index 1c1b151..4dcda77 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -4,7 +4,7 @@ from datetime import timedelta from alerting import alerts from misc import cvars, docker_registry, sensors, vuln -from misc.disks import LVAttr +from misc.disks import LVAttr, WearoutIndicator, get_wearout_reading IS_TESTING = False @@ -236,3 +236,35 @@ def raid_check() -> list[alerts.Alert]: ) return alert_list + + +def disk_wearout_check() -> list[alerts.Alert]: + check_config = cvars.config.get()["checks"]["wearout"] + alert_list = [] + for disk in check_config["disks"]: + try: + wearout_reading = get_wearout_reading(disk["name"]) + except Exception as exc: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.ERROR, + message=f"Could not check wearout for disk {disk['name']}: {repr(exc)}, see logs", + severity=alerts.Severity.CRITICAL, + ) + ) + logging.error(traceback.format_exc()) + continue + + if wearout_reading.current_reading < wearout_reading.threshold_reading: + match wearout_reading.indicator: + case WearoutIndicator.REALLOCATED_SECTORS: + message = f"Disk {disk['name']} has reallocated sectors (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})" + case WearoutIndicator.SPARE_BLOCKS: + message = f"Disk {disk['name']} has too few spare blocks (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})" + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.DISKS, message=message, severity=alerts.Severity[disk["severity"]] + ) + ) + + return alert_list diff --git a/misc/disks.py b/misc/disks.py index f5a74df..c88eccd 100644 --- a/misc/disks.py +++ b/misc/disks.py @@ -1,7 +1,7 @@ import json import subprocess from dataclasses import dataclass -from enum import StrEnum +from enum import Enum, StrEnum from typing import Optional, Self @@ -132,3 +132,58 @@ class LVAttr: json_obj = json.loads(subprocess.run(["lvs", "--reportformat=json", name], capture_output=True).stdout) attr_str = json_obj["report"][0]["lv"][0]["lv_attr"] return cls.from_str(attr_str, name) + + +class WearoutIndicator(Enum): + REALLOCATED_SECTORS = 0 + SPARE_BLOCKS = 1 + + +@dataclass +class WearoutReading: + indicator: WearoutIndicator + current_reading: int + threshold_reading: int + + +def _get_wearout_reading_from_smartctl_output(smartctl_output: dict) -> WearoutReading: + disk_protocol = smartctl_output["device"]["protocol"] + rotation_rate = smartctl_output.get("rotation_rate", 0) + match rotation_rate: + case 0: # assuming non-rotating media is an SSD + indicator = WearoutIndicator.SPARE_BLOCKS + match disk_protocol: + case "ATA": + attr_table = smartctl_output["ata_smart_attributes"]["table"] + for a in attr_table: + if a["name"] == "Available_Reservd_Space": + value = a["value"] + threshold = a["thresh"] + break + else: + raise Exception(f"no Available_Reservd_Space on ATA SSD") + case "NVMe": + value = smartctl_output["nvme_smart_health_information_log"]["available_spare"] + threshold = smartctl_output["nvme_smart_health_information_log"]["available_spare_threshold"] + case _: + indicator = WearoutIndicator.REALLOCATED_SECTORS + match disk_protocol: + case "ATA": + attr_table = smartctl_output["ata_smart_attributes"]["table"] + for a in attr_table: + if a["name"] == "Reallocated_Sector_Ct": + value = a["value"] + threshold = a["thresh"] + break + else: + raise Exception(f"no Reallocated_Sector_Ct on ATA HDD") + case "NVMe": # ? NVMe HDDs are very rare, if they even exist + raise NotImplementedError + + return WearoutReading(indicator, current_reading=value, threshold_reading=threshold) + + +def get_wearout_reading(disk: str) -> WearoutReading: + smartctl_output = json.loads(subprocess.run(["smartctl", "-ja", disk], capture_output=True).stdout.decode("utf-8")) + wearout_reading = _get_wearout_reading_from_smartctl_output(smartctl_output) + return wearout_reading diff --git a/service.py b/service.py index 017d6bf..47cd9b0 100755 --- a/service.py +++ b/service.py @@ -38,6 +38,7 @@ async def main(): interval_checker(checks.ram_check, datetime.timedelta(minutes=1)), interval_checker(checks.vuln_check, datetime.timedelta(days=1)), interval_checker(checks.raid_check, datetime.timedelta(days=1)), + interval_checker(checks.disk_wearout_check, datetime.timedelta(days=1)), scheduled_checker( checks.docker_registry_check, period=datetime.timedelta(days=1), when=datetime.time(hour=0, minute=0) ), diff --git a/tests/smartctl_ata_hdd.json b/tests/smartctl_ata_hdd.json new file mode 100644 index 0000000..a03afd1 --- /dev/null +++ b/tests/smartctl_ata_hdd.json @@ -0,0 +1,594 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 4 + ], + "pre_release": false, + "svn_revision": "5530", + "platform_info": "x86_64-linux-6.11.3-arch1-1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-ja", + "/dev/sda" + ], + "drive_database_version": { + "string": "7.3/5528" + }, + "exit_status": 0 + }, + "local_time": { + "time_t": 1731149584, + "asctime": "Sat Nov 9 13:53:04 2024 MSK" + }, + "device": { + "name": "/dev/sda", + "info_name": "/dev/sda [SAT]", + "type": "sat", + "protocol": "ATA" + }, + "model_name": "WDC WD20EARZ-00C5XB0", + "serial_number": "WD-WX32D83C15U7", + "wwn": { + "naa": 5, + "oui": 5358, + "id": 8959374949 + }, + "firmware_version": "01.01A01", + "user_capacity": { + "blocks": 3907029168, + "bytes": 2000398934016 + }, + "logical_block_size": 512, + "physical_block_size": 4096, + "rotation_rate": 5400, + "form_factor": { + "ata_value": 2, + "name": "3.5 inches" + }, + "trim": { + "supported": false + }, + "in_smartctl_database": false, + "ata_version": { + "string": "ACS-3 T13/2161-D revision 5", + "major_value": 2046, + "minor_value": 109 + }, + "sata_version": { + "string": "SATA 3.1", + "value": 126 + }, + "interface_speed": { + "max": { + "sata_value": 14, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 3, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + } + }, + "smart_support": { + "available": true, + "enabled": true + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 0, + "string": "was never started" + }, + "completion_seconds": 19380 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 208, + "conveyance": 5 + } + }, + "capabilities": { + "values": [ + 123, + 3 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": false, + "offline_surface_scan_supported": true, + "self_tests_supported": true, + "conveyance_self_test_supported": true, + "selective_self_test_supported": true, + "attribute_autosave_enabled": true, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_sct_capabilities": { + "value": 12341, + "error_recovery_control_supported": false, + "feature_control_supported": true, + "data_table_supported": true + }, + "ata_smart_attributes": { + "revision": 16, + "table": [ + { + "id": 1, + "name": "Raw_Read_Error_Rate", + "value": 200, + "worst": 200, + "thresh": 51, + "when_failed": "", + "flags": { + "value": 47, + "string": "POSR-K ", + "prefailure": true, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 3, + "name": "Spin_Up_Time", + "value": 175, + "worst": 175, + "thresh": 21, + "when_failed": "", + "flags": { + "value": 39, + "string": "POS--K ", + "prefailure": true, + "updated_online": true, + "performance": true, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 2241, + "string": "2241" + } + }, + { + "id": 4, + "name": "Start_Stop_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 11, + "string": "11" + } + }, + { + "id": 5, + "name": "Reallocated_Sector_Ct", + "value": 200, + "worst": 200, + "thresh": 140, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 7, + "name": "Seek_Error_Rate", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 46, + "string": "-OSR-K ", + "prefailure": false, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 96, + "worst": 96, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 3275, + "string": "3275" + } + }, + { + "id": 10, + "name": "Spin_Retry_Count", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 11, + "name": "Calibration_Retry_Count", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 10, + "string": "10" + } + }, + { + "id": 192, + "name": "Power-Off_Retract_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 6, + "string": "6" + } + }, + { + "id": 193, + "name": "Load_Cycle_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 28, + "string": "28" + } + }, + { + "id": 194, + "name": "Temperature_Celsius", + "value": 112, + "worst": 105, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 34, + "string": "-O---K ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 31, + "string": "31" + } + }, + { + "id": 196, + "name": "Reallocated_Event_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 197, + "name": "Current_Pending_Sector", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 198, + "name": "Offline_Uncorrectable", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 48, + "string": "----CK ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 199, + "name": "UDMA_CRC_Error_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 200, + "name": "Multi_Zone_Error_Rate", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 8, + "string": "---R-- ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": true, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + } + ] + }, + "power_on_time": { + "hours": 3275 + }, + "power_cycle_count": 10, + "temperature": { + "current": 31 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "table": [ + { + "type": { + "value": 3, + "string": "Conveyance offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 0 + } + ], + "count": 1, + "error_count_total": 0, + "error_count_outdated": 0 + } + }, + "ata_smart_selective_self_test_log": { + "revision": 1, + "table": [ + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + } + ], + "flags": { + "value": 0, + "remainder_scan_enabled": false + }, + "power_up_scan_resume_minutes": 0 + } +} diff --git a/tests/smartctl_ata_ssd.json b/tests/smartctl_ata_ssd.json new file mode 100644 index 0000000..13760ce --- /dev/null +++ b/tests/smartctl_ata_ssd.json @@ -0,0 +1,680 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 4 + ], + "pre_release": false, + "svn_revision": "5530", + "platform_info": "x86_64-linux-6.11.6-arch1-1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-ja", + "/dev/sda" + ], + "drive_database_version": { + "string": "7.3/5528" + }, + "exit_status": 0 + }, + "local_time": { + "time_t": 1731149676, + "asctime": "Sat Nov 9 13:54:36 2024 MSK" + }, + "device": { + "name": "/dev/sda", + "info_name": "/dev/sda [SAT]", + "type": "sat", + "protocol": "ATA" + }, + "model_family": "WD Blue / Red / Green SSDs", + "model_name": "WDC WDS100T2G0A-00JH30", + "serial_number": "20299A802244", + "wwn": { + "naa": 5, + "oui": 6980, + "id": 37501727029 + }, + "firmware_version": "UH510000", + "user_capacity": { + "blocks": 1953529856, + "bytes": 1000207286272 + }, + "logical_block_size": 512, + "physical_block_size": 512, + "rotation_rate": 0, + "form_factor": { + "ata_value": 3, + "name": "2.5 inches" + }, + "trim": { + "supported": true, + "deterministic": true, + "zeroed": false + }, + "in_smartctl_database": true, + "ata_version": { + "string": "ACS-2 T13/2015-D revision 3", + "major_value": 1008, + "minor_value": 272 + }, + "sata_version": { + "string": "SATA 3.2", + "value": 255 + }, + "interface_speed": { + "max": { + "sata_value": 14, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 3, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + } + }, + "smart_support": { + "available": true, + "enabled": true + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 0, + "string": "was never started" + }, + "completion_seconds": 120 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 182 + } + }, + "capabilities": { + "values": [ + 21, + 3 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": true, + "offline_surface_scan_supported": false, + "self_tests_supported": true, + "conveyance_self_test_supported": false, + "selective_self_test_supported": false, + "attribute_autosave_enabled": true, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_smart_attributes": { + "revision": 1, + "table": [ + { + "id": 5, + "name": "Reallocated_Sector_Ct", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 7976, + "string": "7976" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 1218, + "string": "1218" + } + }, + { + "id": 165, + "name": "Block_Erase_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 2063, + "string": "2063" + } + }, + { + "id": 166, + "name": "Minimum_PE_Cycles_TLC", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 15, + "string": "15" + } + }, + { + "id": 167, + "name": "Max_Bad_Blocks_per_Die", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 168, + "name": "Maximum_PE_Cycles_TLC", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 36, + "string": "36" + } + }, + { + "id": 169, + "name": "Total_Bad_Blocks", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 1075, + "string": "1075" + } + }, + { + "id": 170, + "name": "Grown_Bad_Blocks", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 171, + "name": "Program_Fail_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 172, + "name": "Erase_Fail_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 173, + "name": "Average_PE_Cycles_TLC", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 15, + "string": "15" + } + }, + { + "id": 174, + "name": "Unexpected_Power_Loss", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 144, + "string": "144" + } + }, + { + "id": 184, + "name": "End-to-End_Error", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 187, + "name": "Reported_Uncorrect", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 188, + "name": "Command_Timeout", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 194, + "name": "Temperature_Celsius", + "value": 70, + "worst": 58, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 34, + "string": "-O---K ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 249108103198, + "string": "30 (Min/Max 0/58)" + } + }, + { + "id": 199, + "name": "UDMA_CRC_Error_Count", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 230, + "name": "Media_Wearout_Indicator", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 5879860561241, + "string": "0x055903000559" + } + }, + { + "id": 232, + "name": "Available_Reservd_Space", + "value": 100, + "worst": 100, + "thresh": 5, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 100, + "string": "100" + } + }, + { + "id": 233, + "name": "NAND_GB_Written_TLC", + "value": 100, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 15654, + "string": "15654" + } + }, + { + "id": 234, + "name": "NAND_GB_Written_SLC", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 32244, + "string": "32244" + } + }, + { + "id": 241, + "name": "Host_Writes_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 48, + "string": "----CK ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 13847, + "string": "13847" + } + }, + { + "id": 242, + "name": "Host_Reads_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 48, + "string": "----CK ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 31195, + "string": "31195" + } + }, + { + "id": 244, + "name": "Temp_Throttle_Status", + "value": 0, + "worst": 100, + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + } + ] + }, + "power_on_time": { + "hours": 7976 + }, + "power_cycle_count": 1218, + "temperature": { + "current": 30 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "count": 0 + } + } +} diff --git a/tests/smartctl_nvme_ssd.json b/tests/smartctl_nvme_ssd.json new file mode 100644 index 0000000..fac63de --- /dev/null +++ b/tests/smartctl_nvme_ssd.json @@ -0,0 +1,145 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 4 + ], + "pre_release": false, + "svn_revision": "5530", + "platform_info": "x86_64-linux-6.11.3-arch1-1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-ja", + "/dev/nvme0" + ], + "exit_status": 0 + }, + "local_time": { + "time_t": 1731149045, + "asctime": "Sat Nov 9 13:44:05 2024 MSK" + }, + "device": { + "name": "/dev/nvme0", + "info_name": "/dev/nvme0", + "type": "nvme", + "protocol": "NVMe" + }, + "model_name": "Samsung SSD 970 EVO Plus 1TB", + "serial_number": "S4EWNM0W921977B", + "firmware_version": "2B2QEXM7", + "nvme_pci_vendor": { + "id": 5197, + "subsystem_id": 5197 + }, + "nvme_ieee_oui_identifier": 9528, + "nvme_total_capacity": 1000204886016, + "nvme_unallocated_capacity": 0, + "nvme_controller_id": 4, + "nvme_version": { + "string": "1.3", + "value": 66304 + }, + "nvme_number_of_namespaces": 1, + "nvme_namespaces": [ + { + "id": 1, + "size": { + "blocks": 1953525168, + "bytes": 1000204886016 + }, + "capacity": { + "blocks": 1953525168, + "bytes": 1000204886016 + }, + "utilization": { + "blocks": 686279048, + "bytes": 351374872576 + }, + "formatted_lba_size": 512, + "eui64": { + "oui": 9528, + "ext_id": 383083641036 + } + } + ], + "user_capacity": { + "blocks": 1953525168, + "bytes": 1000204886016 + }, + "logical_block_size": 512, + "smart_support": { + "available": true, + "enabled": true + }, + "smart_status": { + "passed": true, + "nvme": { + "value": 0 + } + }, + "nvme_smart_health_information_log": { + "critical_warning": 0, + "temperature": 47, + "available_spare": 100, + "available_spare_threshold": 10, + "percentage_used": 0, + "data_units_read": 111588, + "data_units_written": 1802957, + "host_reads": 2570341, + "host_writes": 36266417, + "controller_busy_time": 133, + "power_cycles": 31, + "power_on_hours": 432, + "unsafe_shutdowns": 18, + "media_errors": 0, + "num_err_log_entries": 63, + "warning_temp_time": 0, + "critical_comp_time": 0, + "temperature_sensors": [ + 47, + 51 + ] + }, + "temperature": { + "current": 47 + }, + "power_cycle_count": 31, + "power_on_time": { + "hours": 432 + }, + "nvme_error_information_log": { + "size": 64, + "read": 16, + "unread": 0, + "table": [ + { + "error_count": 63, + "submission_queue_id": 0, + "command_id": 8, + "status_field": { + "value": 8194, + "do_not_retry": false, + "status_code_type": 0, + "status_code": 2, + "string": "Invalid Field in Command" + }, + "phase_tag": false, + "lba": { + "value": 0 + }, + "nsid": 0 + } + ] + }, + "nvme_self_test_log": { + "current_self_test_operation": { + "value": 0, + "string": "No self-test in progress" + } + } +} diff --git a/tests/test_disks.py b/tests/test_disks.py index e431a9d..0765b76 100644 --- a/tests/test_disks.py +++ b/tests/test_disks.py @@ -1,6 +1,12 @@ +import json import unittest -from misc.disks import LVAttr +from misc.disks import ( + LVAttr, + WearoutIndicator, + WearoutReading, + _get_wearout_reading_from_smartctl_output, +) class TestDisks(unittest.TestCase): @@ -22,6 +28,30 @@ class TestDisks(unittest.TestCase): ), ) + def test_wearout_reading_nvme_ssd(self): + with open("tests/smartctl_nvme_ssd.json") as f: + smartctl_output = json.load(f) + self.assertEqual( + _get_wearout_reading_from_smartctl_output(smartctl_output), + WearoutReading(indicator=WearoutIndicator.SPARE_BLOCKS, current_reading=100, threshold_reading=10), + ) + + def test_wearout_reading_ata_hdd(self): + with open("tests/smartctl_ata_hdd.json") as f: + smartctl_output = json.load(f) + self.assertEqual( + _get_wearout_reading_from_smartctl_output(smartctl_output), + WearoutReading(indicator=WearoutIndicator.REALLOCATED_SECTORS, current_reading=200, threshold_reading=140), + ) + + def test_wearout_reading_ata_ssd(self): + with open("tests/smartctl_ata_ssd.json") as f: + smartctl_output = json.load(f) + self.assertEqual( + _get_wearout_reading_from_smartctl_output(smartctl_output), + WearoutReading(indicator=WearoutIndicator.SPARE_BLOCKS, current_reading=100, threshold_reading=5), + ) + if __name__ == "__main__": unittest.main() From 6fc68c20c57c12e74fa4550b80ffbbbe5066f9d0 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 9 Nov 2024 14:06:43 +0300 Subject: [PATCH 5/5] don't forget to test alerts themselves --- misc/checks.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/misc/checks.py b/misc/checks.py index 4dcda77..8fa1c64 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -201,6 +201,15 @@ def raid_check() -> list[alerts.Alert]: ) continue + if IS_TESTING: + alert_list.append( + alerts.Alert( + alert_type=alerts.AlertType.RAID, + message=f"Test alert: LV {lv} health is {lv_attr.health}", + severity=alerts.Severity.INFO, + ) + ) + match lv_attr.health: case LVAttr.Health.PARTIAL: alert_list.append( @@ -255,7 +264,7 @@ def disk_wearout_check() -> list[alerts.Alert]: logging.error(traceback.format_exc()) continue - if wearout_reading.current_reading < wearout_reading.threshold_reading: + if IS_TESTING or wearout_reading.current_reading < wearout_reading.threshold_reading: match wearout_reading.indicator: case WearoutIndicator.REALLOCATED_SECTORS: message = f"Disk {disk['name']} has reallocated sectors (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"