mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
alerting service
This commit is contained in:
parent
84ab61eb00
commit
fcc02da845
5 changed files with 143 additions and 55 deletions
25
misc/checks.py
Normal file
25
misc/checks.py
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
from alerting import alerts
|
||||||
|
from misc import sensors
|
||||||
|
|
||||||
|
|
||||||
|
def temp_check() -> set[alerts.Alert]:
|
||||||
|
alert_list = []
|
||||||
|
temps = sensors.Sensors.get_temperatures()
|
||||||
|
for _, sensor_list in temps.items():
|
||||||
|
for sensor in sensor_list:
|
||||||
|
if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("TEMP"),
|
||||||
|
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||||
|
severity=alerts.Severity.CRITICAL,
|
||||||
|
)
|
||||||
|
elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("TEMP"),
|
||||||
|
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||||
|
severity=alerts.Severity.WARNING,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
alert_list.append(alert)
|
||||||
|
return alert_list
|
||||||
60
misc/sensors.py
Normal file
60
misc/sensors.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from psutil import sensors_temperatures
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TemperatureSensor:
|
||||||
|
sensor_type: str
|
||||||
|
sensor_label: str
|
||||||
|
current_temp: float
|
||||||
|
highest_temp: float | None = None
|
||||||
|
critical_temp: float | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class Sensors:
|
||||||
|
@staticmethod
|
||||||
|
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
||||||
|
psutil_temp_sensors = sensors_temperatures()
|
||||||
|
|
||||||
|
temp_sensors = {}
|
||||||
|
|
||||||
|
for s_type, sensors in psutil_temp_sensors.items():
|
||||||
|
if s_type not in temp_sensors.keys():
|
||||||
|
temp_sensors[s_type] = []
|
||||||
|
match (s_type):
|
||||||
|
case "nvme":
|
||||||
|
for sensor in sensors:
|
||||||
|
temp_sensors[s_type].append(
|
||||||
|
TemperatureSensor(
|
||||||
|
sensor_type=s_type,
|
||||||
|
sensor_label=sensor.label,
|
||||||
|
current_temp=sensor.current,
|
||||||
|
highest_temp=sensor.high,
|
||||||
|
critical_temp=sensor.critical,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
case "amdgpu":
|
||||||
|
temp_sensors[s_type].append(
|
||||||
|
TemperatureSensor(
|
||||||
|
sensor_type=s_type,
|
||||||
|
sensor_label="Integrated GPU",
|
||||||
|
current_temp=sensors[0].current,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
case "k10temp":
|
||||||
|
temp_sensors[s_type].append(
|
||||||
|
TemperatureSensor(
|
||||||
|
sensor_type=s_type,
|
||||||
|
sensor_label="AMD CPU",
|
||||||
|
current_temp=sensors[0].current,
|
||||||
|
critical_temp=95.0, # hardcoded because we have R9 7900X
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return temp_sensors
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for i in Sensors.get_temperatures():
|
||||||
|
print(i)
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
from colorama import Back, Fore, Style
|
from colorama import Back, Fore, Style
|
||||||
|
|
||||||
from sensors import Sensors
|
from misc.sensors import Sensors
|
||||||
|
|
||||||
|
|
||||||
def pretty_print():
|
def pretty_print():
|
||||||
|
|
|
||||||
54
sensors.py
54
sensors.py
|
|
@ -1,54 +0,0 @@
|
||||||
from dataclasses import dataclass
|
|
||||||
from psutil import sensors_temperatures
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TemperatureSensor:
|
|
||||||
sensor_type: str
|
|
||||||
sensor_label: str
|
|
||||||
current_temp: float
|
|
||||||
highest_temp: float | None = None
|
|
||||||
critical_temp: float | None = None
|
|
||||||
|
|
||||||
|
|
||||||
class Sensors:
|
|
||||||
@staticmethod
|
|
||||||
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
|
||||||
psutil_temp_sensors = sensors_temperatures()
|
|
||||||
|
|
||||||
temp_sensors = {}
|
|
||||||
|
|
||||||
for s_type, sensors in psutil_temp_sensors.items():
|
|
||||||
if s_type not in temp_sensors.keys():
|
|
||||||
temp_sensors[s_type] = []
|
|
||||||
match(s_type):
|
|
||||||
case "nvme":
|
|
||||||
for sensor in sensors:
|
|
||||||
temp_sensors[s_type].append(TemperatureSensor(
|
|
||||||
sensor_type=s_type,
|
|
||||||
sensor_label=sensor.label,
|
|
||||||
current_temp=sensor.current,
|
|
||||||
highest_temp=sensor.high,
|
|
||||||
critical_temp=sensor.critical
|
|
||||||
))
|
|
||||||
case "amdgpu":
|
|
||||||
temp_sensors[s_type].append(TemperatureSensor(
|
|
||||||
sensor_type=s_type,
|
|
||||||
sensor_label="Integrated GPU",
|
|
||||||
current_temp=sensors[0].current,
|
|
||||||
))
|
|
||||||
case "k10temp":
|
|
||||||
temp_sensors[s_type].append(TemperatureSensor(
|
|
||||||
sensor_type=s_type,
|
|
||||||
sensor_label="AMD CPU",
|
|
||||||
current_temp=sensors[0].current,
|
|
||||||
critical_temp=95.0 # hardcoded because we have R9 7900X
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
return temp_sensors
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
for i in Sensors.get_temperatures():
|
|
||||||
print(i)
|
|
||||||
57
service.py
Executable file
57
service.py
Executable file
|
|
@ -0,0 +1,57 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
from typing import Callable, Coroutine
|
||||||
|
|
||||||
|
import nio
|
||||||
|
|
||||||
|
from alerting import alerts
|
||||||
|
from misc import checks
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
stopping = False
|
||||||
|
|
||||||
|
|
||||||
|
def stop_gracefully(signum, frame):
|
||||||
|
global stopping
|
||||||
|
stopping = True
|
||||||
|
|
||||||
|
|
||||||
|
async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.AsyncClient, *args, **kwargs):
|
||||||
|
logging.info(f"Calling {check.__name__}")
|
||||||
|
while True:
|
||||||
|
if isinstance(check, Callable):
|
||||||
|
result = check(*args, **kwargs)
|
||||||
|
if isinstance(result, Coroutine):
|
||||||
|
result = await result
|
||||||
|
elif isinstance(check, Coroutine):
|
||||||
|
result = await check
|
||||||
|
else:
|
||||||
|
raise TypeError(f"check is {type(check)}, neither function nor coroutine")
|
||||||
|
logging.info(f"Got {len(result)} alerts")
|
||||||
|
for alert in result:
|
||||||
|
await alerts.send_alert(alert, client)
|
||||||
|
await asyncio.sleep(interval_secs)
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
signal.signal(signal.SIGTERM, stop_gracefully)
|
||||||
|
client = await alerts.get_client()
|
||||||
|
checkers = (checker(checks.temp_check, 5 * 60, client),)
|
||||||
|
async with asyncio.TaskGroup() as tg:
|
||||||
|
checker_tasks: set[asyncio.Task] = set()
|
||||||
|
for c in checkers:
|
||||||
|
task = tg.create_task(c)
|
||||||
|
checker_tasks.add(task)
|
||||||
|
while True:
|
||||||
|
if stopping:
|
||||||
|
await client.close()
|
||||||
|
raise SystemExit
|
||||||
|
else:
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
Loading…
Add table
Add a link
Reference in a new issue