use NestedDeserializableDataclass for config

This commit is contained in:
Alex 2025-01-07 01:43:39 +03:00
parent 96664684f8
commit 3eb358d618
13 changed files with 188 additions and 130 deletions

View file

@ -1,34 +1,14 @@
import json import json
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum, StrEnum
from typing import Optional from typing import Optional
import aiofiles import aiofiles
import nio import nio
from alerting.common import CONFIG_FILE from alerting.enum import AlertType, Severity
from misc import cvars from misc import cvars
from misc.common import CONFIG_FILE
from misc.config import get_config
class AlertType(StrEnum):
TEST = "TEST"
ERROR = "ERROR"
RAM = "RAM"
CPU = "CPU"
TEMP = "TEMP"
VULN = "VULN"
LOGIN = "LOGIN" # TODO
SMART = "SMART" # TODO
RAID = "RAID"
DISKS = "DISKS"
UPS = "UPS"
UPDATE = "UPDATE"
class Severity(StrEnum):
INFO = "INFO"
WARNING = "WARNING"
CRITICAL = "CRITICAL"
@dataclass @dataclass
@ -44,11 +24,11 @@ async def get_client() -> nio.AsyncClient:
Returns a Matrix client. Returns a Matrix client.
It is better to call get_client once and use it for multiple send_alert calls It is better to call get_client once and use it for multiple send_alert calls
""" """
matrix_cfg = cvars.config.get()["matrix"] matrix_cfg = cvars.config.get().matrix
client = nio.AsyncClient(matrix_cfg["homeserver"]) client = nio.AsyncClient(matrix_cfg.homeserver)
client.access_token = matrix_cfg["access_token"] client.access_token = matrix_cfg.access_token
client.user_id = matrix_cfg["user_id"] client.user_id = matrix_cfg.user_id
client.device_id = matrix_cfg["device_id"] client.device_id = matrix_cfg.device_id
return client return client
@ -72,15 +52,13 @@ async def send_alert(alert: Alert) -> None:
try: try:
client = cvars.matrix_client.get() client = cvars.matrix_client.get()
except LookupError: # being called standalone except LookupError: # being called standalone
async with aiofiles.open(CONFIG_FILE) as f: cvars.config.set(get_config())
contents = await f.read()
cvars.config.set(json.loads(contents))
temp_client = True temp_client = True
client = await get_client() client = await get_client()
cvars.matrix_client.set(client) cvars.matrix_client.set(client)
else: else:
temp_client = False temp_client = False
room_id = cvars.config.get()["matrix"]["room_id"] room_id = cvars.config.get().matrix.room_id
message, html_message = format_message(alert) message, html_message = format_message(alert)
content = { content = {
"msgtype": "m.text", "msgtype": "m.text",

22
alerting/enum.py Normal file
View file

@ -0,0 +1,22 @@
from enum import StrEnum
class AlertType(StrEnum):
TEST = "TEST"
ERROR = "ERROR"
RAM = "RAM"
CPU = "CPU"
TEMP = "TEMP"
VULN = "VULN"
LOGIN = "LOGIN" # TODO
SMART = "SMART" # TODO
RAID = "RAID"
DISKS = "DISKS"
UPS = "UPS"
UPDATE = "UPDATE"
class Severity(StrEnum):
INFO = "INFO"
WARNING = "WARNING"
CRITICAL = "CRITICAL"

View file

@ -4,9 +4,10 @@ import getpass
import json import json
import os import os
from common import CONFIG_FILE
from nio import AsyncClient, LoginResponse from nio import AsyncClient, LoginResponse
from misc.common import CONFIG_FILE
async def main() -> None: async def main() -> None:
try: try:

View file

@ -3,6 +3,7 @@ import traceback
from datetime import timedelta from datetime import timedelta
from alerting import alerts from alerting import alerts
from alerting.enum import AlertType, Severity
from misc import cvars, docker_registry, sensors, vuln from misc import cvars, docker_registry, sensors, vuln
from misc.disks import LVAttr, WearoutIndicator, get_wearout_reading from misc.disks import LVAttr, WearoutIndicator, get_wearout_reading
@ -18,15 +19,15 @@ def temp_check() -> list[alerts.Alert]:
continue # little valuable info and too low limits there, might as well ignore it continue # little valuable info and too low limits there, might as well ignore it
if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp): if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp):
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("TEMP"), alert_type=AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp): elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp):
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("TEMP"), alert_type=AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C", message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C",
severity=alerts.Severity.WARNING, severity=Severity.WARNING,
) )
else: else:
continue continue
@ -38,15 +39,15 @@ def cpu_check() -> list[alerts.Alert]:
sensor = sensors.Sensors.get_cpu() sensor = sensors.Sensors.get_cpu()
if IS_TESTING or sensor.current_load > sensor.critical_load: if IS_TESTING or sensor.current_load > sensor.critical_load:
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("CPU"), alert_type=AlertType("CPU"),
message=f"{sensor.current_load}% > {sensor.critical_load}%", message=f"{sensor.current_load}% > {sensor.critical_load}%",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
elif IS_TESTING or sensor.current_load > sensor.highest_load: elif IS_TESTING or sensor.current_load > sensor.highest_load:
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("CPU"), alert_type=AlertType("CPU"),
message=f"{sensor.current_load}% > {sensor.highest_load}%", message=f"{sensor.current_load}% > {sensor.highest_load}%",
severity=alerts.Severity.WARNING, severity=Severity.WARNING,
) )
else: else:
return [] return []
@ -57,15 +58,15 @@ def ram_check() -> list[alerts.Alert]:
sensor = sensors.Sensors.get_ram() sensor = sensors.Sensors.get_ram()
if IS_TESTING or sensor.current_avail < sensor.critical_avail: if IS_TESTING or sensor.current_avail < sensor.critical_avail:
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("RAM"), alert_type=AlertType("RAM"),
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB", message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
elif IS_TESTING or sensor.current_avail < sensor.warning_avail: elif IS_TESTING or sensor.current_avail < sensor.warning_avail:
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("RAM"), alert_type=AlertType("RAM"),
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB", message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB",
severity=alerts.Severity.WARNING, severity=Severity.WARNING,
) )
else: else:
return [] return []
@ -79,18 +80,18 @@ async def vuln_check() -> list[alerts.Alert]:
if IS_TESTING or v.fixed or v.severity in (vuln.Severity.HIGH, vuln.Severity.CRITICAL): if IS_TESTING or v.fixed or v.severity in (vuln.Severity.HIGH, vuln.Severity.CRITICAL):
match v.severity: match v.severity:
case vuln.Severity.LOW: case vuln.Severity.LOW:
severity = alerts.Severity.INFO severity = Severity.INFO
case vuln.Severity.MEDIUM: case vuln.Severity.MEDIUM:
severity = alerts.Severity.WARNING severity = Severity.WARNING
case vuln.Severity.HIGH | vuln.Severity.CRITICAL: case vuln.Severity.HIGH | vuln.Severity.CRITICAL:
severity = alerts.Severity.CRITICAL severity = Severity.CRITICAL
message = f"{v.id}: {v.vuln_type} in {','.join(v.packages)}" message = f"{v.id}: {v.vuln_type} in {','.join(v.packages)}"
html_message = f"<a href='{v.link}'>{v.id}</a>: {v.vuln_type} in {','.join(v.packages)}" html_message = f"<a href='{v.link}'>{v.id}</a>: {v.vuln_type} in {','.join(v.packages)}"
if v.fixed: if v.fixed:
message.append(f" -- update to {v.fixed} ASAP") message.append(f" -- update to {v.fixed} ASAP")
html_message.append(f" -- update to {v.fixed} ASAP") html_message.append(f" -- update to {v.fixed} ASAP")
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType.VULN, alert_type=AlertType.VULN,
message=message, message=message,
html_message=html_message, html_message=html_message,
severity=severity, severity=severity,
@ -110,49 +111,47 @@ async def ups_check() -> list[alerts.Alert]:
if IS_TESTING or sensor.battery_charge_percentage < sensor.battery_critical_percentage: if IS_TESTING or sensor.battery_charge_percentage < sensor.battery_critical_percentage:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.UPS, alert_type=AlertType.UPS,
message=f"Battery is under {sensor.battery_critical_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.", message=f"Battery is under {sensor.battery_critical_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
elif IS_TESTING or sensor.battery_charge_percentage < sensor.battery_warning_percentage: elif IS_TESTING or sensor.battery_charge_percentage < sensor.battery_warning_percentage:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.UPS, alert_type=AlertType.UPS,
message=f"Battery is under {sensor.battery_warning_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.", message=f"Battery is under {sensor.battery_warning_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=alerts.Severity.WARNING, severity=Severity.WARNING,
) )
) )
for status in sensor.ups_status: for status in sensor.ups_status:
if IS_TESTING or status == sensors.UPSStatus.UPS_OVERLOAD: if IS_TESTING or status == sensors.UPSStatus.UPS_OVERLOAD:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(alert_type=AlertType.UPS, message=f"UPS is overloaded!", severity=Severity.CRITICAL)
alert_type=alerts.AlertType.UPS, message=f"UPS is overloaded!", severity=alerts.Severity.CRITICAL
)
) )
elif IS_TESTING or status == sensors.UPSStatus.ON_BATTERY: elif IS_TESTING or status == sensors.UPSStatus.ON_BATTERY:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.UPS, alert_type=AlertType.UPS,
message=f"UPS is on battery.\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.", message=f"UPS is on battery.\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=alerts.Severity.INFO, severity=Severity.INFO,
) )
) )
elif IS_TESTING or status == sensors.UPSStatus.UPS_TRIM: elif IS_TESTING or status == sensors.UPSStatus.UPS_TRIM:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.UPS, alert_type=AlertType.UPS,
message=f"Overvoltage detected: trimming voltage to nominal.", message=f"Overvoltage detected: trimming voltage to nominal.",
severity=alerts.Severity.INFO, severity=Severity.INFO,
) )
) )
elif IS_TESTING or status == sensors.UPSStatus.UPS_BOOST: elif IS_TESTING or status == sensors.UPSStatus.UPS_BOOST:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.UPS, alert_type=AlertType.UPS,
message=f"Undervoltage detected: boosting voltage to nominal.", message=f"Undervoltage detected: boosting voltage to nominal.",
severity=alerts.Severity.INFO, severity=Severity.INFO,
) )
) )
@ -165,26 +164,26 @@ async def docker_registry_check() -> list[alerts.Alert]:
for image in updated_images: for image in updated_images:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.UPDATE, alert_type=AlertType.UPDATE,
message=f"{image} docker image: new version available", message=f"{image} docker image: new version available",
severity=alerts.Severity.INFO, severity=Severity.INFO,
) )
) )
return alert_list return alert_list
def raid_check() -> list[alerts.Alert]: def raid_check() -> list[alerts.Alert]:
check_config = cvars.config.get()["checks"]["raid"] check_config = cvars.config.get().checks.raid
alert_list = [] alert_list = []
for lv in check_config["lvs"]: for lv in check_config.lvs:
try: try:
lv_attr = LVAttr.from_cli(lv) lv_attr = LVAttr.from_cli(lv)
except Exception as exc: except Exception as exc:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message=f"Could not check RAID LV {lv}: {repr(exc)}, see logs", message=f"Could not check RAID LV {lv}: {repr(exc)}, see logs",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
logging.error(traceback.format_exc()) logging.error(traceback.format_exc())
@ -194,9 +193,9 @@ def raid_check() -> list[alerts.Alert]:
if lv_attr.vol_type not in [LVAttr.VolType.RAID, LVAttr.VolType.RAID_NOSYNC]: if lv_attr.vol_type not in [LVAttr.VolType.RAID, LVAttr.VolType.RAID_NOSYNC]:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message=f"LV {lv} is not of RAID type", message=f"LV {lv} is not of RAID type",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
continue continue
@ -204,9 +203,9 @@ def raid_check() -> list[alerts.Alert]:
if IS_TESTING: if IS_TESTING:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.RAID, alert_type=AlertType.RAID,
message=f"Test alert: LV {lv} health is {lv_attr.health}", message=f"Test alert: LV {lv} health is {lv_attr.health}",
severity=alerts.Severity.INFO, severity=Severity.INFO,
) )
) )
@ -214,33 +213,33 @@ def raid_check() -> list[alerts.Alert]:
case LVAttr.Health.PARTIAL: case LVAttr.Health.PARTIAL:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.RAID, alert_type=AlertType.RAID,
message=f"LV {lv} operating in partial mode; one of PVs has failed", message=f"LV {lv} operating in partial mode; one of PVs has failed",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
case LVAttr.Health.UNKNOWN: case LVAttr.Health.UNKNOWN:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.RAID, alert_type=AlertType.RAID,
message=f"LV {lv}'s state is unknown", message=f"LV {lv}'s state is unknown",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
case LVAttr.Health.REFRESH_NEEDED: case LVAttr.Health.REFRESH_NEEDED:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.RAID, alert_type=AlertType.RAID,
message=f"LV {lv} has suffered a write error; run a refresh or replace the failing PV", message=f"LV {lv} has suffered a write error; run a refresh or replace the failing PV",
severity=alerts.Severity.WARNING, severity=Severity.WARNING,
) )
) )
case LVAttr.Health.MISMATCHES: case LVAttr.Health.MISMATCHES:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.RAID, alert_type=AlertType.RAID,
message=f"LV {lv} is partially incoherent; run a repairing scrub operation", message=f"LV {lv} is partially incoherent; run a repairing scrub operation",
severity=alerts.Severity.WARNING, severity=Severity.WARNING,
) )
) )
@ -248,17 +247,17 @@ def raid_check() -> list[alerts.Alert]:
def disk_wearout_check() -> list[alerts.Alert]: def disk_wearout_check() -> list[alerts.Alert]:
check_config = cvars.config.get()["checks"]["wearout"] check_config = cvars.config.get().checks.wearout
alert_list = [] alert_list = []
for disk in check_config["disks"]: for disk in check_config.disks:
try: try:
wearout_reading = get_wearout_reading(disk["name"]) wearout_reading = get_wearout_reading(disk.name)
except Exception as exc: except Exception as exc:
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message=f"Could not check wearout for disk {disk['name']}: {repr(exc)}, see logs", message=f"Could not check wearout for disk {disk.name}: {repr(exc)}, see logs",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
logging.error(traceback.format_exc()) logging.error(traceback.format_exc())
@ -267,13 +266,11 @@ def disk_wearout_check() -> list[alerts.Alert]:
if IS_TESTING or wearout_reading.current_reading < wearout_reading.threshold_reading: if IS_TESTING or wearout_reading.current_reading < wearout_reading.threshold_reading:
match wearout_reading.indicator: match wearout_reading.indicator:
case WearoutIndicator.REALLOCATED_SECTORS: case WearoutIndicator.REALLOCATED_SECTORS:
message = f"Disk {disk['name']} has reallocated sectors (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})" message = f"Disk {disk.name} has reallocated sectors (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
case WearoutIndicator.SPARE_BLOCKS: case WearoutIndicator.SPARE_BLOCKS:
message = f"Disk {disk['name']} has too few spare blocks (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})" message = f"Disk {disk.name} has too few spare blocks (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
alert_list.append( alert_list.append(
alerts.Alert( alerts.Alert(alert_type=AlertType.DISKS, message=message, severity=Severity[disk.severity])
alert_type=alerts.AlertType.DISKS, message=message, severity=alerts.Severity[disk["severity"]]
)
) )
return alert_list return alert_list

58
misc/config.py Normal file
View file

@ -0,0 +1,58 @@
import json
from dataclasses import dataclass
from alt_utils import NestedDeserializableDataclass
from alerting.enum import Severity
from misc.common import CONFIG_FILE
@dataclass
class MatrixConfig:
homeserver: str
user_id: str
device_id: str
access_token: str
room_id: str
@dataclass
class CheckDockerRegistryConfig:
hub_url: str
images: list[str]
@dataclass
class CheckRaidConfig:
lvs: list[str]
@dataclass
class CheckWearoutDiskConfig:
name: str
severity: Severity
@dataclass
class CheckWearoutConfig(NestedDeserializableDataclass):
disks: list[CheckWearoutDiskConfig]
@dataclass
class ChecksConfig(NestedDeserializableDataclass):
docker_registry: CheckDockerRegistryConfig
raid: CheckRaidConfig
wearout: CheckWearoutConfig
@dataclass
class Config(NestedDeserializableDataclass):
matrix: MatrixConfig
checks: ChecksConfig
def get_config() -> Config:
with open(CONFIG_FILE) as f:
cfg_dict = json.load(f)
cfg = Config.from_dict(cfg_dict)
return cfg

View file

@ -2,5 +2,7 @@ from contextvars import ContextVar
import nio import nio
config: ContextVar[dict] = ContextVar("config") from misc.config import Config
config: ContextVar[Config] = ContextVar("config")
matrix_client: ContextVar[nio.AsyncClient] = ContextVar("matrix_client") matrix_client: ContextVar[nio.AsyncClient] = ContextVar("matrix_client")

View file

@ -11,6 +11,7 @@ from urllib.parse import urlparse
import uplink import uplink
from alerting import alerts from alerting import alerts
from alerting.enum import AlertType, Severity
from misc import cvars from misc import cvars
@ -67,12 +68,12 @@ class DockerRegistryClient(uplink.Consumer):
async def get_updated_images() -> list[str]: async def get_updated_images() -> list[str]:
check_config = cvars.config.get()["checks"]["docker_registry"] check_config = cvars.config.get().checks.docker_registry
hub_client = DockerHubClient(base_url=check_config["hub_url"], client=uplink.AiohttpClient()) hub_client = DockerHubClient(base_url=check_config.hub_url, client=uplink.AiohttpClient())
now = datetime.datetime.now(datetime.timezone.utc) now = datetime.datetime.now(datetime.timezone.utc)
updated_images = [] updated_images = []
for image in check_config["images"]: for image in check_config.images:
image_split = image.split("/") image_split = image.split("/")
match len(image_split): match len(image_split):
case 2: case 2:
@ -84,9 +85,9 @@ async def get_updated_images() -> list[str]:
except Exception as exc: except Exception as exc:
await alerts.send_alert( await alerts.send_alert(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message=f"Could not query Docker Hub: {repr(exc)}, see logs", message=f"Could not query Docker Hub: {repr(exc)}, see logs",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
logging.error(traceback.format_exc()) logging.error(traceback.format_exc())
@ -111,9 +112,9 @@ async def get_updated_images() -> list[str]:
except Exception as exc: except Exception as exc:
await alerts.send_alert( await alerts.send_alert(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message=f"Could not query Docker registry {registry}: {repr(exc)}, see logs", message=f"Could not query Docker registry {registry}: {repr(exc)}, see logs",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
logging.error(traceback.format_exc()) logging.error(traceback.format_exc())

View file

@ -5,6 +5,7 @@ from enum import StrEnum
from psutil import cpu_percent, sensors_temperatures, virtual_memory from psutil import cpu_percent, sensors_temperatures, virtual_memory
from alerting import alerts from alerting import alerts
from alerting.enum import AlertType, Severity
@dataclass @dataclass
@ -138,9 +139,9 @@ class Sensors:
except FileNotFoundError: except FileNotFoundError:
await alerts.send_alert( await alerts.send_alert(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message="upsc is not installed!", message="upsc is not installed!",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
return None return None

View file

@ -7,6 +7,7 @@ from enum import StrEnum
from typing import Optional from typing import Optional
from alerting import alerts from alerting import alerts
from alerting.enum import AlertType, Severity
class Severity(StrEnum): class Severity(StrEnum):
@ -48,18 +49,18 @@ async def get_vulns() -> list[Vulnerability]:
except FileNotFoundError: except FileNotFoundError:
await alerts.send_alert( await alerts.send_alert(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message="arch-audit not installed!", message="arch-audit not installed!",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
return [] return []
except Exception as exc: except Exception as exc:
await alerts.send_alert( await alerts.send_alert(
alerts.Alert( alerts.Alert(
alert_type=alerts.AlertType.ERROR, alert_type=AlertType.ERROR,
message=f"arch-audit failed to run: {repr(exc)}, see logs", message=f"arch-audit failed to run: {repr(exc)}, see logs",
severity=alerts.Severity.CRITICAL, severity=Severity.CRITICAL,
) )
) )
logging.error(traceback.format_exc()) logging.error(traceback.format_exc())

View file

@ -3,3 +3,4 @@ psutil==5.9.8
matrix-nio[e2e]==0.24.0 matrix-nio[e2e]==0.24.0
uplink[aiohttp]==0.9.7 uplink[aiohttp]==0.9.7
setuptools==75.2.0 setuptools==75.2.0
alt-utils==0.0.4

View file

@ -4,25 +4,26 @@ from os import environ
from sys import argv from sys import argv
from alerting import alerts from alerting import alerts
from alerting.enum import AlertType, Severity
type_priority_map = { type_priority_map = {
"ONLINE": alerts.Severity.INFO, # UPS is back online "ONLINE": Severity.INFO, # UPS is back online
"ONBATT": alerts.Severity.WARNING, # UPS is on battery "ONBATT": Severity.WARNING, # UPS is on battery
"LOWBATT": alerts.Severity.CRITICAL, # UPS is on battery and has a low battery (is critical) "LOWBATT": Severity.CRITICAL, # UPS is on battery and has a low battery (is critical)
"FSD": alerts.Severity.CRITICAL, # UPS is being shutdown by the primary (FSD = "Forced Shutdown") "FSD": Severity.CRITICAL, # UPS is being shutdown by the primary (FSD = "Forced Shutdown")
"COMMOK": alerts.Severity.INFO, # Communications established with the UPS "COMMOK": Severity.INFO, # Communications established with the UPS
"COMMBAD": alerts.Severity.WARNING, # Communications lost to the UPS "COMMBAD": Severity.WARNING, # Communications lost to the UPS
"SHUTDOWN": alerts.Severity.CRITICAL, # The system is being shutdown "SHUTDOWN": Severity.CRITICAL, # The system is being shutdown
"REPLBATT": alerts.Severity.WARNING, # The UPS battery is bad and needs to be replaced "REPLBATT": Severity.WARNING, # The UPS battery is bad and needs to be replaced
"NOCOMM": alerts.Severity.WARNING, # A UPS is unavailable (cant be contacted for monitoring) "NOCOMM": Severity.WARNING, # A UPS is unavailable (cant be contacted for monitoring)
"NOPARENT": alerts.Severity.CRITICAL, # upsmon parent process died - shutdown impossible "NOPARENT": Severity.CRITICAL, # upsmon parent process died - shutdown impossible
"CAL": alerts.Severity.INFO, # UPS calibration in progress "CAL": Severity.INFO, # UPS calibration in progress
"NOTCAL": alerts.Severity.INFO, # UPS calibration finished "NOTCAL": Severity.INFO, # UPS calibration finished
"OFF": alerts.Severity.CRITICAL, # UPS administratively OFF or asleep "OFF": Severity.CRITICAL, # UPS administratively OFF or asleep
"NOTOFF": alerts.Severity.INFO, # UPS no longer administratively OFF or asleep "NOTOFF": Severity.INFO, # UPS no longer administratively OFF or asleep
"BYPASS": alerts.Severity.WARNING, # UPS on bypass (powered, not protecting) "BYPASS": Severity.WARNING, # UPS on bypass (powered, not protecting)
"NOTBYPASS": alerts.Severity.INFO, # UPS no longer on bypass "NOTBYPASS": Severity.INFO, # UPS no longer on bypass
None: alerts.Severity.CRITICAL, # unknown alert type None: Severity.CRITICAL, # unknown alert type
} }
@ -33,7 +34,7 @@ async def main():
message = argv[1] message = argv[1]
typestr = environ.get("NOTIFYTYPE", None) typestr = environ.get("NOTIFYTYPE", None)
severity = type_priority_map[typestr] severity = type_priority_map[typestr]
alert = alerts.Alert(alert_type=alerts.AlertType.UPS, message=message, severity=severity) alert = alerts.Alert(alert_type=AlertType.UPS, message=message, severity=severity)
await alerts.send_alert(alert) await alerts.send_alert(alert)

View file

@ -1,16 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import asyncio import asyncio
import datetime import datetime
import json
import logging import logging
import signal import signal
import aiofiles
from alerting import alerts from alerting import alerts
from alerting.common import CONFIG_FILE
from misc import checks, cvars from misc import checks, cvars
from misc.checkers import interval_checker, scheduled_checker from misc.checkers import interval_checker, scheduled_checker
from misc.config import get_config
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -25,9 +22,7 @@ def stop_gracefully(signum, frame):
async def main(): async def main():
signal.signal(signal.SIGTERM, stop_gracefully) signal.signal(signal.SIGTERM, stop_gracefully)
async with aiofiles.open(CONFIG_FILE) as f: cvars.config.set(get_config())
contents = await f.read()
cvars.config.set(json.loads(contents))
client = await alerts.get_client() client = await alerts.get_client()
cvars.matrix_client.set(client) cvars.matrix_client.set(client)