mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
add temp monitoring
This commit is contained in:
parent
19ee6f487b
commit
758438382d
13 changed files with 272 additions and 25 deletions
|
|
@ -1,12 +1,16 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import datetime
|
||||
import logging
|
||||
import signal
|
||||
import time
|
||||
|
||||
from . import checks
|
||||
from .alerting import alerts
|
||||
from .checks.temp.sensors import print_readings
|
||||
from .config import load_config
|
||||
from .core import cvars
|
||||
from .core.config import load_config
|
||||
from .core.checkers import interval_checker
|
||||
|
||||
stopping = False
|
||||
|
||||
|
|
@ -27,11 +31,21 @@ async def async_main():
|
|||
prog="lego-monitoring",
|
||||
description="Lego-monitoring service",
|
||||
)
|
||||
parser.add_argument("-c", "--config", required=True)
|
||||
parser.add_argument("-c", "--config", help="config file")
|
||||
parser.add_argument("--print-temp", help="print temp sensor readings and exit", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
config_path = parser.parse_args().config
|
||||
config = load_config(config_path)
|
||||
cvars.config.set(config)
|
||||
if args.config:
|
||||
config_path = parser.parse_args().config
|
||||
config = load_config(config_path)
|
||||
cvars.config.set(config)
|
||||
|
||||
if args.print_temp:
|
||||
print_readings()
|
||||
raise SystemExit
|
||||
|
||||
if not args.config:
|
||||
raise RuntimeError("--config must be specified in standard operating mode")
|
||||
|
||||
tg_client = await alerts.get_client()
|
||||
cvars.tg_client.set(tg_client)
|
||||
|
|
@ -41,10 +55,11 @@ async def async_main():
|
|||
alerts.send_start_alert(),
|
||||
],
|
||||
"stop": [], # this is checked later
|
||||
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
|
||||
}
|
||||
|
||||
checkers = []
|
||||
for enabled_set in config.enabled_checker_sets:
|
||||
for enabled_set in config.enabled_check_sets:
|
||||
for checker in checker_sets[enabled_set]:
|
||||
checkers.append(checker)
|
||||
|
||||
|
|
@ -57,7 +72,7 @@ async def async_main():
|
|||
checker_tasks.add(task)
|
||||
while True:
|
||||
if stopping:
|
||||
if "stop" in config.enabled_checker_sets:
|
||||
if "stop" in config.enabled_check_sets:
|
||||
await alerts.send_stop_alert()
|
||||
await tg_client.disconnect()
|
||||
raise SystemExit
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ async def send_start_alert() -> None:
|
|||
await send_alert(
|
||||
Alert(
|
||||
alert_type=AlertType.BOOT,
|
||||
message=f"Service running with enabled checkers: {', '.join(config.enabled_checker_sets)}",
|
||||
message=f"Service running with enabled checks: {', '.join(config.enabled_check_sets)}",
|
||||
severity=Severity.INFO,
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,11 +3,11 @@ from enum import StrEnum
|
|||
|
||||
class AlertType(StrEnum):
|
||||
BOOT = "BOOT"
|
||||
TEMP = "TEMP"
|
||||
TEST = "TEST"
|
||||
# ERROR = "ERROR"
|
||||
# RAM = "RAM"
|
||||
# CPU = "CPU"
|
||||
# TEMP = "TEMP"
|
||||
# VULN = "VULN"
|
||||
# LOGIN = "LOGIN"
|
||||
# SMART = "SMART" # TODO
|
||||
|
|
|
|||
1
src/lego_monitoring/checks/__init__.py
Normal file
1
src/lego_monitoring/checks/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .temp import temp_check
|
||||
29
src/lego_monitoring/checks/temp/__init__.py
Normal file
29
src/lego_monitoring/checks/temp/__init__.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
from lego_monitoring.alerting import alerts
|
||||
from lego_monitoring.alerting.enum import AlertType, Severity
|
||||
|
||||
from . import sensors
|
||||
|
||||
IS_TESTING = False
|
||||
|
||||
|
||||
def temp_check() -> list[alerts.Alert]:
|
||||
alert_list = []
|
||||
temps = sensors.get_readings()
|
||||
for sensor, readings in temps.items():
|
||||
for r in readings:
|
||||
if r.critical_temp is not None and (IS_TESTING or r.current_temp > r.critical_temp):
|
||||
alert = alerts.Alert(
|
||||
alert_type=AlertType.TEMP,
|
||||
message=f"{sensor} {r.label}: {r.current_temp}°C > {r.critical_temp}°C",
|
||||
severity=Severity.CRITICAL,
|
||||
)
|
||||
elif r.warning_temp is not None and (IS_TESTING or r.current_temp > r.warning_temp):
|
||||
alert = alerts.Alert(
|
||||
alert_type=AlertType.TEMP,
|
||||
message=f"{sensor} {r.label}: {r.current_temp}°C > {r.warning_temp}°C",
|
||||
severity=Severity.WARNING,
|
||||
)
|
||||
else:
|
||||
continue
|
||||
alert_list.append(alert)
|
||||
return alert_list
|
||||
66
src/lego_monitoring/checks/temp/sensors.py
Normal file
66
src/lego_monitoring/checks/temp/sensors.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from psutil import sensors_temperatures
|
||||
|
||||
from lego_monitoring.config.checks.temp import TempSensorConfig
|
||||
from lego_monitoring.core import cvars
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemperatureReading:
|
||||
label: str
|
||||
current_temp: float
|
||||
warning_temp: Optional[float]
|
||||
critical_temp: Optional[float]
|
||||
|
||||
|
||||
def print_readings():
|
||||
sensor_readings = get_readings()
|
||||
for sensor, readings in sensor_readings.items():
|
||||
print(f"*** Sensor {sensor}***\n")
|
||||
for r in readings:
|
||||
print(f"Label: {r.label}")
|
||||
print(f"Current temp: {r.current_temp}")
|
||||
print(f"Warning temp: {r.warning_temp}")
|
||||
print(f"Critical temp: {r.critical_temp}\n")
|
||||
|
||||
|
||||
def get_readings() -> dict[str, list[TemperatureReading]]:
|
||||
try:
|
||||
config = cvars.config.get().checks.temp.sensors
|
||||
except LookupError:
|
||||
config: dict[str, TempSensorConfig] = {}
|
||||
|
||||
psutil_temperatures = sensors_temperatures()
|
||||
|
||||
sensor_readings = {}
|
||||
for sensor, readings in psutil_temperatures.items():
|
||||
if sensor in config:
|
||||
if not config[sensor].enabled:
|
||||
continue
|
||||
sensor_friendly_name = config[sensor].name if config[sensor].name else sensor
|
||||
else:
|
||||
sensor_friendly_name = sensor
|
||||
|
||||
sensor_readings[sensor_friendly_name] = []
|
||||
|
||||
for r in readings:
|
||||
try:
|
||||
config_r = config[sensor].readings[r.label]
|
||||
except KeyError:
|
||||
friendly_r = TemperatureReading(
|
||||
label=r.label, current_temp=r.current, warning_temp=r.high, critical_temp=r.critical
|
||||
)
|
||||
else:
|
||||
if not config_r.enabled:
|
||||
continue
|
||||
friendly_r = TemperatureReading(
|
||||
label=config_r.label if config_r.label else r.label,
|
||||
current_temp=r.current,
|
||||
warning_temp=config_r.warning_temp if config_r.warning_temp else r.high,
|
||||
critical_temp=config_r.critical_temp if config_r.critical_temp else r.critical,
|
||||
)
|
||||
sensor_readings[sensor_friendly_name].append(friendly_r)
|
||||
|
||||
return sensor_readings
|
||||
|
|
@ -3,6 +3,13 @@ from dataclasses import dataclass
|
|||
|
||||
from alt_utils import NestedDeserializableDataclass
|
||||
|
||||
from .checks.temp import TempCheckConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChecksConfig(NestedDeserializableDataclass):
|
||||
temp: TempCheckConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class TelegramConfig:
|
||||
|
|
@ -12,7 +19,8 @@ class TelegramConfig:
|
|||
|
||||
@dataclass
|
||||
class Config(NestedDeserializableDataclass):
|
||||
enabled_checker_sets: list[str]
|
||||
enabled_check_sets: list[str]
|
||||
checks: ChecksConfig
|
||||
telegram: TelegramConfig
|
||||
|
||||
|
||||
24
src/lego_monitoring/config/checks/temp.py
Normal file
24
src/lego_monitoring/config/checks/temp.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from alt_utils import NestedDeserializableDataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempReadingConfig:
|
||||
label: Optional[str]
|
||||
enabled: bool
|
||||
warning_temp: Optional[float]
|
||||
critical_temp: Optional[float]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempSensorConfig(NestedDeserializableDataclass):
|
||||
name: Optional[str]
|
||||
enabled: bool
|
||||
readings: dict[str, TempReadingConfig]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempCheckConfig(NestedDeserializableDataclass):
|
||||
sensors: dict[str, TempSensorConfig]
|
||||
|
|
@ -2,7 +2,7 @@ from contextvars import ContextVar
|
|||
|
||||
from telethon import TelegramClient
|
||||
|
||||
from .config import Config
|
||||
from ..config import Config
|
||||
|
||||
config: ContextVar[Config] = ContextVar("config")
|
||||
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue