add temp monitoring

This commit is contained in:
Alex Tau 2025-05-02 15:25:27 +03:00
parent 19ee6f487b
commit 758438382d
13 changed files with 272 additions and 25 deletions

View file

@ -1,12 +1,16 @@
import argparse
import asyncio
import datetime
import logging
import signal
import time
from . import checks
from .alerting import alerts
from .checks.temp.sensors import print_readings
from .config import load_config
from .core import cvars
from .core.config import load_config
from .core.checkers import interval_checker
stopping = False
@ -27,11 +31,21 @@ async def async_main():
prog="lego-monitoring",
description="Lego-monitoring service",
)
parser.add_argument("-c", "--config", required=True)
parser.add_argument("-c", "--config", help="config file")
parser.add_argument("--print-temp", help="print temp sensor readings and exit", action="store_true")
args = parser.parse_args()
config_path = parser.parse_args().config
config = load_config(config_path)
cvars.config.set(config)
if args.config:
config_path = parser.parse_args().config
config = load_config(config_path)
cvars.config.set(config)
if args.print_temp:
print_readings()
raise SystemExit
if not args.config:
raise RuntimeError("--config must be specified in standard operating mode")
tg_client = await alerts.get_client()
cvars.tg_client.set(tg_client)
@ -41,10 +55,11 @@ async def async_main():
alerts.send_start_alert(),
],
"stop": [], # this is checked later
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
}
checkers = []
for enabled_set in config.enabled_checker_sets:
for enabled_set in config.enabled_check_sets:
for checker in checker_sets[enabled_set]:
checkers.append(checker)
@ -57,7 +72,7 @@ async def async_main():
checker_tasks.add(task)
while True:
if stopping:
if "stop" in config.enabled_checker_sets:
if "stop" in config.enabled_check_sets:
await alerts.send_stop_alert()
await tg_client.disconnect()
raise SystemExit

View file

@ -57,7 +57,7 @@ async def send_start_alert() -> None:
await send_alert(
Alert(
alert_type=AlertType.BOOT,
message=f"Service running with enabled checkers: {', '.join(config.enabled_checker_sets)}",
message=f"Service running with enabled checks: {', '.join(config.enabled_check_sets)}",
severity=Severity.INFO,
)
)

View file

@ -3,11 +3,11 @@ from enum import StrEnum
class AlertType(StrEnum):
BOOT = "BOOT"
TEMP = "TEMP"
TEST = "TEST"
# ERROR = "ERROR"
# RAM = "RAM"
# CPU = "CPU"
# TEMP = "TEMP"
# VULN = "VULN"
# LOGIN = "LOGIN"
# SMART = "SMART" # TODO

View file

@ -0,0 +1 @@
from .temp import temp_check

View file

@ -0,0 +1,29 @@
from lego_monitoring.alerting import alerts
from lego_monitoring.alerting.enum import AlertType, Severity
from . import sensors
IS_TESTING = False
def temp_check() -> list[alerts.Alert]:
alert_list = []
temps = sensors.get_readings()
for sensor, readings in temps.items():
for r in readings:
if r.critical_temp is not None and (IS_TESTING or r.current_temp > r.critical_temp):
alert = alerts.Alert(
alert_type=AlertType.TEMP,
message=f"{sensor} {r.label}: {r.current_temp}°C > {r.critical_temp}°C",
severity=Severity.CRITICAL,
)
elif r.warning_temp is not None and (IS_TESTING or r.current_temp > r.warning_temp):
alert = alerts.Alert(
alert_type=AlertType.TEMP,
message=f"{sensor} {r.label}: {r.current_temp}°C > {r.warning_temp}°C",
severity=Severity.WARNING,
)
else:
continue
alert_list.append(alert)
return alert_list

View file

@ -0,0 +1,66 @@
from dataclasses import dataclass
from typing import Optional
from psutil import sensors_temperatures
from lego_monitoring.config.checks.temp import TempSensorConfig
from lego_monitoring.core import cvars
@dataclass
class TemperatureReading:
label: str
current_temp: float
warning_temp: Optional[float]
critical_temp: Optional[float]
def print_readings():
sensor_readings = get_readings()
for sensor, readings in sensor_readings.items():
print(f"*** Sensor {sensor}***\n")
for r in readings:
print(f"Label: {r.label}")
print(f"Current temp: {r.current_temp}")
print(f"Warning temp: {r.warning_temp}")
print(f"Critical temp: {r.critical_temp}\n")
def get_readings() -> dict[str, list[TemperatureReading]]:
try:
config = cvars.config.get().checks.temp.sensors
except LookupError:
config: dict[str, TempSensorConfig] = {}
psutil_temperatures = sensors_temperatures()
sensor_readings = {}
for sensor, readings in psutil_temperatures.items():
if sensor in config:
if not config[sensor].enabled:
continue
sensor_friendly_name = config[sensor].name if config[sensor].name else sensor
else:
sensor_friendly_name = sensor
sensor_readings[sensor_friendly_name] = []
for r in readings:
try:
config_r = config[sensor].readings[r.label]
except KeyError:
friendly_r = TemperatureReading(
label=r.label, current_temp=r.current, warning_temp=r.high, critical_temp=r.critical
)
else:
if not config_r.enabled:
continue
friendly_r = TemperatureReading(
label=config_r.label if config_r.label else r.label,
current_temp=r.current,
warning_temp=config_r.warning_temp if config_r.warning_temp else r.high,
critical_temp=config_r.critical_temp if config_r.critical_temp else r.critical,
)
sensor_readings[sensor_friendly_name].append(friendly_r)
return sensor_readings

View file

@ -3,6 +3,13 @@ from dataclasses import dataclass
from alt_utils import NestedDeserializableDataclass
from .checks.temp import TempCheckConfig
@dataclass
class ChecksConfig(NestedDeserializableDataclass):
temp: TempCheckConfig
@dataclass
class TelegramConfig:
@ -12,7 +19,8 @@ class TelegramConfig:
@dataclass
class Config(NestedDeserializableDataclass):
enabled_checker_sets: list[str]
enabled_check_sets: list[str]
checks: ChecksConfig
telegram: TelegramConfig

View file

@ -0,0 +1,24 @@
from dataclasses import dataclass
from typing import Optional
from alt_utils import NestedDeserializableDataclass
@dataclass
class TempReadingConfig:
label: Optional[str]
enabled: bool
warning_temp: Optional[float]
critical_temp: Optional[float]
@dataclass
class TempSensorConfig(NestedDeserializableDataclass):
name: Optional[str]
enabled: bool
readings: dict[str, TempReadingConfig]
@dataclass
class TempCheckConfig(NestedDeserializableDataclass):
sensors: dict[str, TempSensorConfig]

View file

@ -2,7 +2,7 @@ from contextvars import ContextVar
from telethon import TelegramClient
from .config import Config
from ..config import Config
config: ContextVar[Config] = ContextVar("config")
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")