alerting service

This commit is contained in:
Alex 2024-06-16 16:18:25 +03:00
parent 84ab61eb00
commit fcc02da845
5 changed files with 143 additions and 55 deletions

25
misc/checks.py Normal file
View file

@ -0,0 +1,25 @@
from alerting import alerts
from misc import sensors
def temp_check() -> set[alerts.Alert]:
alert_list = []
temps = sensors.Sensors.get_temperatures()
for _, sensor_list in temps.items():
for sensor in sensor_list:
if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp:
alert = alerts.Alert(
alert_type=alerts.AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
severity=alerts.Severity.CRITICAL,
)
elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp:
alert = alerts.Alert(
alert_type=alerts.AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
severity=alerts.Severity.WARNING,
)
else:
continue
alert_list.append(alert)
return alert_list

60
misc/sensors.py Normal file
View file

@ -0,0 +1,60 @@
from dataclasses import dataclass
from psutil import sensors_temperatures
@dataclass
class TemperatureSensor:
sensor_type: str
sensor_label: str
current_temp: float
highest_temp: float | None = None
critical_temp: float | None = None
class Sensors:
@staticmethod
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
psutil_temp_sensors = sensors_temperatures()
temp_sensors = {}
for s_type, sensors in psutil_temp_sensors.items():
if s_type not in temp_sensors.keys():
temp_sensors[s_type] = []
match (s_type):
case "nvme":
for sensor in sensors:
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label=sensor.label,
current_temp=sensor.current,
highest_temp=sensor.high,
critical_temp=sensor.critical,
)
)
case "amdgpu":
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label="Integrated GPU",
current_temp=sensors[0].current,
)
)
case "k10temp":
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label="AMD CPU",
current_temp=sensors[0].current,
critical_temp=95.0, # hardcoded because we have R9 7900X
)
)
return temp_sensors
if __name__ == "__main__":
for i in Sensors.get_temperatures():
print(i)

View file

@ -2,7 +2,7 @@
from colorama import Back, Fore, Style from colorama import Back, Fore, Style
from sensors import Sensors from misc.sensors import Sensors
def pretty_print(): def pretty_print():

View file

@ -1,54 +0,0 @@
from dataclasses import dataclass
from psutil import sensors_temperatures
@dataclass
class TemperatureSensor:
sensor_type: str
sensor_label: str
current_temp: float
highest_temp: float | None = None
critical_temp: float | None = None
class Sensors:
@staticmethod
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
psutil_temp_sensors = sensors_temperatures()
temp_sensors = {}
for s_type, sensors in psutil_temp_sensors.items():
if s_type not in temp_sensors.keys():
temp_sensors[s_type] = []
match(s_type):
case "nvme":
for sensor in sensors:
temp_sensors[s_type].append(TemperatureSensor(
sensor_type=s_type,
sensor_label=sensor.label,
current_temp=sensor.current,
highest_temp=sensor.high,
critical_temp=sensor.critical
))
case "amdgpu":
temp_sensors[s_type].append(TemperatureSensor(
sensor_type=s_type,
sensor_label="Integrated GPU",
current_temp=sensors[0].current,
))
case "k10temp":
temp_sensors[s_type].append(TemperatureSensor(
sensor_type=s_type,
sensor_label="AMD CPU",
current_temp=sensors[0].current,
critical_temp=95.0 # hardcoded because we have R9 7900X
))
return temp_sensors
if __name__ == "__main__":
for i in Sensors.get_temperatures():
print(i)

57
service.py Executable file
View file

@ -0,0 +1,57 @@
#!/usr/bin/env python3
import asyncio
import logging
import signal
from typing import Callable, Coroutine
import nio
from alerting import alerts
from misc import checks
logging.basicConfig(level=logging.INFO)
stopping = False
def stop_gracefully(signum, frame):
global stopping
stopping = True
async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.AsyncClient, *args, **kwargs):
logging.info(f"Calling {check.__name__}")
while True:
if isinstance(check, Callable):
result = check(*args, **kwargs)
if isinstance(result, Coroutine):
result = await result
elif isinstance(check, Coroutine):
result = await check
else:
raise TypeError(f"check is {type(check)}, neither function nor coroutine")
logging.info(f"Got {len(result)} alerts")
for alert in result:
await alerts.send_alert(alert, client)
await asyncio.sleep(interval_secs)
async def main():
signal.signal(signal.SIGTERM, stop_gracefully)
client = await alerts.get_client()
checkers = (checker(checks.temp_check, 5 * 60, client),)
async with asyncio.TaskGroup() as tg:
checker_tasks: set[asyncio.Task] = set()
for c in checkers:
task = tg.create_task(c)
checker_tasks.add(task)
while True:
if stopping:
await client.close()
raise SystemExit
else:
await asyncio.sleep(3)
if __name__ == "__main__":
asyncio.run(main())