diff --git a/.gitignore b/.gitignore index 0e5ac79..2db7150 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .venv -__pycache__ \ No newline at end of file +__pycache__ +alerting/credentials.json diff --git a/README.md b/README.md index 94cbb2d..6bb4e40 100644 --- a/README.md +++ b/README.md @@ -1 +1,18 @@ # lego-monitoring + +## Prerequisites + +* `pacman -S libolm` +* `pip -r requirements.txt` + +## Configuring + +* Run `alerting/login.py` once to generate credentials file +* Invite the bot account to the room (you have to accept the invite manually) +* Set room ID in `alerting/common.py` + +## Running + +* `prettyprint.py` -- check and print all sensors +* `service.py` -- launch service +* `lego-monitoring.service` is a systemd unit that starts `service.py` diff --git a/alerting/alerts.py b/alerting/alerts.py new file mode 100644 index 0000000..78d71fd --- /dev/null +++ b/alerting/alerts.py @@ -0,0 +1,78 @@ +import json +from dataclasses import dataclass +from enum import Enum, StrEnum +from typing import Optional + +import aiofiles +import nio + +from alerting.common import CREDS_FILE, ROOM_ID + + +class AlertType(StrEnum): + TEST = "TEST" + RAM = "RAM" # TODO + CPU = "CPU" # TODO + TEMP = "TEMP" + LOGIN = "LOGIN" # TODO + SMART = "SMART" # TODO + RAID = "RAID" # TODO + + +class Severity(Enum): + INFO = 1 + WARNING = 2 + CRITICAL = 3 + + +@dataclass +class Alert: + alert_type: AlertType + message: str + severity: Severity + + +async def get_client() -> nio.AsyncClient: + """ + Returns a Matrix client. + It is better to call get_client once and use it for multiple send_alert calls + """ + async with aiofiles.open(CREDS_FILE) as f: + contents = await f.read() + creds = json.loads(contents) + client = nio.AsyncClient(creds["homeserver"]) + client.access_token = creds["access_token"] + client.user_id = creds["user_id"] + client.device_id = creds["device_id"] + return client + + +def format_message(alert: Alert) -> str: + match alert.severity: + case Severity.INFO: + severity_emoji = "ℹ️" + case Severity.WARNING: + severity_emoji = "⚠️" + case Severity.CRITICAL: + severity_emoji = "🆘" + message = f"{severity_emoji} {alert.alert_type} Alert\n{alert.message}" + return message + + +async def send_alert(alert: Alert, client: Optional[nio.AsyncClient] = None) -> None: + if client is None: + temp_client = True + client = await get_client() + else: + temp_client = False + message = format_message(alert) + await client.room_send( + room_id=ROOM_ID, + message_type="m.room.message", + content={ + "msgtype": "m.text", + "body": message, + }, + ) + if temp_client: + await client.close() diff --git a/alerting/common.py b/alerting/common.py new file mode 100644 index 0000000..15d517b --- /dev/null +++ b/alerting/common.py @@ -0,0 +1,8 @@ +import os +from pathlib import Path + +CREDS_FILE = (Path(os.path.dirname(os.path.realpath(__file__))) / "credentials.json").resolve() +HOMESERVER = "https://matrix.altau.su" +USER_ID = "@AlertBot:altau.su" +DEVICE_NAME = "lego" +ROOM_ID = "!aSCaiSJfLHslrJrHiJ:altau.su" diff --git a/alerting/login.py b/alerting/login.py new file mode 100755 index 0000000..bd20dd2 --- /dev/null +++ b/alerting/login.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +import asyncio +import getpass +import json +import os +import stat + +from common import CREDS_FILE, DEVICE_NAME, HOMESERVER, USER_ID +from nio import AsyncClient, LoginResponse + + +async def main() -> None: + if os.path.exists(CREDS_FILE): + print(f"Creds already configured in {CREDS_FILE}") + raise SystemExit + + client = AsyncClient(HOMESERVER, USER_ID) + password = getpass.getpass() + resp = await client.login(password, device_name=DEVICE_NAME) + await client.close() + if isinstance(resp, LoginResponse): + open(CREDS_FILE, "w").close() + os.chmod(CREDS_FILE, stat.S_IRUSR | stat.S_IWUSR) + with open(CREDS_FILE, "w") as f: + json.dump( + { + "homeserver": HOMESERVER, + "user_id": resp.user_id, + "device_id": resp.device_id, + "access_token": resp.access_token, + }, + f, + ) + print(f"Logged in as {resp.user_id}. Credentials saved to {CREDS_FILE}") + else: + raise Exception(f"Failed to log in: {resp}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/lego-monitoring.service b/lego-monitoring.service new file mode 100644 index 0000000..2ca22a2 --- /dev/null +++ b/lego-monitoring.service @@ -0,0 +1,10 @@ +[Unit] +After=docker.service + +[Service] +ExecStart=/opt/lego-monitoring/.venv/bin/python /opt/lego-monitoring/service.py +Type=exec +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/misc/checks.py b/misc/checks.py new file mode 100644 index 0000000..f74cc40 --- /dev/null +++ b/misc/checks.py @@ -0,0 +1,25 @@ +from alerting import alerts +from misc import sensors + + +def temp_check() -> set[alerts.Alert]: + alert_list = [] + temps = sensors.Sensors.get_temperatures() + for _, sensor_list in temps.items(): + for sensor in sensor_list: + if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp: + alert = alerts.Alert( + alert_type=alerts.AlertType("TEMP"), + message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", + severity=alerts.Severity.CRITICAL, + ) + elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp: + alert = alerts.Alert( + alert_type=alerts.AlertType("TEMP"), + message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", + severity=alerts.Severity.WARNING, + ) + else: + continue + alert_list.append(alert) + return alert_list diff --git a/misc/sensors.py b/misc/sensors.py new file mode 100644 index 0000000..9400dc5 --- /dev/null +++ b/misc/sensors.py @@ -0,0 +1,60 @@ +from dataclasses import dataclass + +from psutil import sensors_temperatures + + +@dataclass +class TemperatureSensor: + sensor_type: str + sensor_label: str + current_temp: float + highest_temp: float | None = None + critical_temp: float | None = None + + +class Sensors: + @staticmethod + def get_temperatures() -> dict[str, list[TemperatureSensor]]: + psutil_temp_sensors = sensors_temperatures() + + temp_sensors = {} + + for s_type, sensors in psutil_temp_sensors.items(): + if s_type not in temp_sensors.keys(): + temp_sensors[s_type] = [] + match (s_type): + case "nvme": + for sensor in sensors: + temp_sensors[s_type].append( + TemperatureSensor( + sensor_type=s_type, + sensor_label=sensor.label, + current_temp=sensor.current, + highest_temp=sensor.high, + critical_temp=sensor.critical, + ) + ) + case "amdgpu": + temp_sensors[s_type].append( + TemperatureSensor( + sensor_type=s_type, + sensor_label="Integrated GPU", + current_temp=sensors[0].current, + ) + ) + case "k10temp": + temp_sensors[s_type].append( + TemperatureSensor( + sensor_type=s_type, + sensor_label="AMD CPU", + current_temp=sensors[0].current, + critical_temp=95.0, # hardcoded because we have R9 7900X + ) + ) + + return temp_sensors + + +if __name__ == "__main__": + for i in Sensors.get_temperatures(): + print(i) diff --git a/main.py b/prettyprint.py similarity index 91% rename from main.py rename to prettyprint.py index c4479e4..3edc3a9 100755 --- a/main.py +++ b/prettyprint.py @@ -2,7 +2,7 @@ from colorama import Back, Fore, Style -from sensors import Sensors +from misc.sensors import Sensors def pretty_print(): diff --git a/requirements.txt b/requirements.txt index 69c8e04..ac76115 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ colorama==0.4.6 psutil==5.9.8 +matrix-nio[e2e]==0.24.0 diff --git a/sensors.py b/sensors.py deleted file mode 100644 index ef46f9f..0000000 --- a/sensors.py +++ /dev/null @@ -1,54 +0,0 @@ -from dataclasses import dataclass -from psutil import sensors_temperatures - - -@dataclass -class TemperatureSensor: - sensor_type: str - sensor_label: str - current_temp: float - highest_temp: float | None = None - critical_temp: float | None = None - - -class Sensors: - @staticmethod - def get_temperatures() -> dict[str, list[TemperatureSensor]]: - psutil_temp_sensors = sensors_temperatures() - - temp_sensors = {} - - for s_type, sensors in psutil_temp_sensors.items(): - if s_type not in temp_sensors.keys(): - temp_sensors[s_type] = [] - match(s_type): - case "nvme": - for sensor in sensors: - temp_sensors[s_type].append(TemperatureSensor( - sensor_type=s_type, - sensor_label=sensor.label, - current_temp=sensor.current, - highest_temp=sensor.high, - critical_temp=sensor.critical - )) - case "amdgpu": - temp_sensors[s_type].append(TemperatureSensor( - sensor_type=s_type, - sensor_label="Integrated GPU", - current_temp=sensors[0].current, - )) - case "k10temp": - temp_sensors[s_type].append(TemperatureSensor( - sensor_type=s_type, - sensor_label="AMD CPU", - current_temp=sensors[0].current, - critical_temp=95.0 # hardcoded because we have R9 7900X - )) - - - return temp_sensors - - -if __name__ == "__main__": - for i in Sensors.get_temperatures(): - print(i) diff --git a/service.py b/service.py new file mode 100755 index 0000000..07d9fab --- /dev/null +++ b/service.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import asyncio +import logging +import signal +from typing import Callable, Coroutine + +import nio + +from alerting import alerts +from misc import checks + +logging.basicConfig(level=logging.INFO) + +stopping = False + + +def stop_gracefully(signum, frame): + global stopping + stopping = True + + +async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.AsyncClient, *args, **kwargs): + while True: + logging.info(f"Calling {check.__name__}") + if isinstance(check, Callable): + result = check(*args, **kwargs) + if isinstance(result, Coroutine): + result = await result + elif isinstance(check, Coroutine): + result = await check + else: + raise TypeError(f"check is {type(check)}, neither function nor coroutine") + logging.info(f"Got {len(result)} alerts") + for alert in result: + await alerts.send_alert(alert, client) + await asyncio.sleep(interval_secs) + + +async def main(): + signal.signal(signal.SIGTERM, stop_gracefully) + client = await alerts.get_client() + checkers = (checker(checks.temp_check, 5 * 60, client),) + async with asyncio.TaskGroup() as tg: + checker_tasks: set[asyncio.Task] = set() + for c in checkers: + task = tg.create_task(c) + checker_tasks.add(task) + while True: + if stopping: + await client.close() + raise SystemExit + else: + await asyncio.sleep(3) + + +if __name__ == "__main__": + asyncio.run(main())