mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
check disk wearout levels
This commit is contained in:
parent
93f5404bc1
commit
92ce59d6a3
9 changed files with 1560 additions and 8 deletions
|
|
@ -4,7 +4,7 @@ from datetime import timedelta
|
|||
|
||||
from alerting import alerts
|
||||
from misc import cvars, docker_registry, sensors, vuln
|
||||
from misc.disks import LVAttr
|
||||
from misc.disks import LVAttr, WearoutIndicator, get_wearout_reading
|
||||
|
||||
IS_TESTING = False
|
||||
|
||||
|
|
@ -236,3 +236,35 @@ def raid_check() -> list[alerts.Alert]:
|
|||
)
|
||||
|
||||
return alert_list
|
||||
|
||||
|
||||
def disk_wearout_check() -> list[alerts.Alert]:
|
||||
check_config = cvars.config.get()["checks"]["wearout"]
|
||||
alert_list = []
|
||||
for disk in check_config["disks"]:
|
||||
try:
|
||||
wearout_reading = get_wearout_reading(disk["name"])
|
||||
except Exception as exc:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.ERROR,
|
||||
message=f"Could not check wearout for disk {disk['name']}: {repr(exc)}, see logs",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
)
|
||||
logging.error(traceback.format_exc())
|
||||
continue
|
||||
|
||||
if wearout_reading.current_reading < wearout_reading.threshold_reading:
|
||||
match wearout_reading.indicator:
|
||||
case WearoutIndicator.REALLOCATED_SECTORS:
|
||||
message = f"Disk {disk['name']} has reallocated sectors (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
|
||||
case WearoutIndicator.SPARE_BLOCKS:
|
||||
message = f"Disk {disk['name']} has too few spare blocks (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.DISKS, message=message, severity=alerts.Severity[disk["severity"]]
|
||||
)
|
||||
)
|
||||
|
||||
return alert_list
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue