mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
alerting service
This commit is contained in:
parent
84ab61eb00
commit
fcc02da845
5 changed files with 143 additions and 55 deletions
25
misc/checks.py
Normal file
25
misc/checks.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from alerting import alerts
|
||||
from misc import sensors
|
||||
|
||||
|
||||
def temp_check() -> set[alerts.Alert]:
|
||||
alert_list = []
|
||||
temps = sensors.Sensors.get_temperatures()
|
||||
for _, sensor_list in temps.items():
|
||||
for sensor in sensor_list:
|
||||
if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp:
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("TEMP"),
|
||||
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp:
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("TEMP"),
|
||||
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||
severity=alerts.Severity.WARNING,
|
||||
)
|
||||
else:
|
||||
continue
|
||||
alert_list.append(alert)
|
||||
return alert_list
|
||||
60
misc/sensors.py
Normal file
60
misc/sensors.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
from psutil import sensors_temperatures
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemperatureSensor:
|
||||
sensor_type: str
|
||||
sensor_label: str
|
||||
current_temp: float
|
||||
highest_temp: float | None = None
|
||||
critical_temp: float | None = None
|
||||
|
||||
|
||||
class Sensors:
|
||||
@staticmethod
|
||||
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
||||
psutil_temp_sensors = sensors_temperatures()
|
||||
|
||||
temp_sensors = {}
|
||||
|
||||
for s_type, sensors in psutil_temp_sensors.items():
|
||||
if s_type not in temp_sensors.keys():
|
||||
temp_sensors[s_type] = []
|
||||
match (s_type):
|
||||
case "nvme":
|
||||
for sensor in sensors:
|
||||
temp_sensors[s_type].append(
|
||||
TemperatureSensor(
|
||||
sensor_type=s_type,
|
||||
sensor_label=sensor.label,
|
||||
current_temp=sensor.current,
|
||||
highest_temp=sensor.high,
|
||||
critical_temp=sensor.critical,
|
||||
)
|
||||
)
|
||||
case "amdgpu":
|
||||
temp_sensors[s_type].append(
|
||||
TemperatureSensor(
|
||||
sensor_type=s_type,
|
||||
sensor_label="Integrated GPU",
|
||||
current_temp=sensors[0].current,
|
||||
)
|
||||
)
|
||||
case "k10temp":
|
||||
temp_sensors[s_type].append(
|
||||
TemperatureSensor(
|
||||
sensor_type=s_type,
|
||||
sensor_label="AMD CPU",
|
||||
current_temp=sensors[0].current,
|
||||
critical_temp=95.0, # hardcoded because we have R9 7900X
|
||||
)
|
||||
)
|
||||
|
||||
return temp_sensors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for i in Sensors.get_temperatures():
|
||||
print(i)
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
from colorama import Back, Fore, Style
|
||||
|
||||
from sensors import Sensors
|
||||
from misc.sensors import Sensors
|
||||
|
||||
|
||||
def pretty_print():
|
||||
|
|
|
|||
54
sensors.py
54
sensors.py
|
|
@ -1,54 +0,0 @@
|
|||
from dataclasses import dataclass
|
||||
from psutil import sensors_temperatures
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemperatureSensor:
|
||||
sensor_type: str
|
||||
sensor_label: str
|
||||
current_temp: float
|
||||
highest_temp: float | None = None
|
||||
critical_temp: float | None = None
|
||||
|
||||
|
||||
class Sensors:
|
||||
@staticmethod
|
||||
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
||||
psutil_temp_sensors = sensors_temperatures()
|
||||
|
||||
temp_sensors = {}
|
||||
|
||||
for s_type, sensors in psutil_temp_sensors.items():
|
||||
if s_type not in temp_sensors.keys():
|
||||
temp_sensors[s_type] = []
|
||||
match(s_type):
|
||||
case "nvme":
|
||||
for sensor in sensors:
|
||||
temp_sensors[s_type].append(TemperatureSensor(
|
||||
sensor_type=s_type,
|
||||
sensor_label=sensor.label,
|
||||
current_temp=sensor.current,
|
||||
highest_temp=sensor.high,
|
||||
critical_temp=sensor.critical
|
||||
))
|
||||
case "amdgpu":
|
||||
temp_sensors[s_type].append(TemperatureSensor(
|
||||
sensor_type=s_type,
|
||||
sensor_label="Integrated GPU",
|
||||
current_temp=sensors[0].current,
|
||||
))
|
||||
case "k10temp":
|
||||
temp_sensors[s_type].append(TemperatureSensor(
|
||||
sensor_type=s_type,
|
||||
sensor_label="AMD CPU",
|
||||
current_temp=sensors[0].current,
|
||||
critical_temp=95.0 # hardcoded because we have R9 7900X
|
||||
))
|
||||
|
||||
|
||||
return temp_sensors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for i in Sensors.get_temperatures():
|
||||
print(i)
|
||||
57
service.py
Executable file
57
service.py
Executable file
|
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
import logging
|
||||
import signal
|
||||
from typing import Callable, Coroutine
|
||||
|
||||
import nio
|
||||
|
||||
from alerting import alerts
|
||||
from misc import checks
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
stopping = False
|
||||
|
||||
|
||||
def stop_gracefully(signum, frame):
|
||||
global stopping
|
||||
stopping = True
|
||||
|
||||
|
||||
async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.AsyncClient, *args, **kwargs):
|
||||
logging.info(f"Calling {check.__name__}")
|
||||
while True:
|
||||
if isinstance(check, Callable):
|
||||
result = check(*args, **kwargs)
|
||||
if isinstance(result, Coroutine):
|
||||
result = await result
|
||||
elif isinstance(check, Coroutine):
|
||||
result = await check
|
||||
else:
|
||||
raise TypeError(f"check is {type(check)}, neither function nor coroutine")
|
||||
logging.info(f"Got {len(result)} alerts")
|
||||
for alert in result:
|
||||
await alerts.send_alert(alert, client)
|
||||
await asyncio.sleep(interval_secs)
|
||||
|
||||
|
||||
async def main():
|
||||
signal.signal(signal.SIGTERM, stop_gracefully)
|
||||
client = await alerts.get_client()
|
||||
checkers = (checker(checks.temp_check, 5 * 60, client),)
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
checker_tasks: set[asyncio.Task] = set()
|
||||
for c in checkers:
|
||||
task = tg.create_task(c)
|
||||
checker_tasks.add(task)
|
||||
while True:
|
||||
if stopping:
|
||||
await client.close()
|
||||
raise SystemExit
|
||||
else:
|
||||
await asyncio.sleep(3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue