mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
Merge branch 'alert-sender' into 'main'
Alert sender service See merge request lego/lego-monitoring!2
This commit is contained in:
commit
a4bbe835d3
12 changed files with 300 additions and 56 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
||||||
.venv
|
.venv
|
||||||
__pycache__
|
__pycache__
|
||||||
|
alerting/credentials.json
|
||||||
|
|
|
||||||
17
README.md
17
README.md
|
|
@ -1 +1,18 @@
|
||||||
# lego-monitoring
|
# lego-monitoring
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
* `pacman -S libolm`
|
||||||
|
* `pip -r requirements.txt`
|
||||||
|
|
||||||
|
## Configuring
|
||||||
|
|
||||||
|
* Run `alerting/login.py` once to generate credentials file
|
||||||
|
* Invite the bot account to the room (you have to accept the invite manually)
|
||||||
|
* Set room ID in `alerting/common.py`
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
* `prettyprint.py` -- check and print all sensors
|
||||||
|
* `service.py` -- launch service
|
||||||
|
* `lego-monitoring.service` is a systemd unit that starts `service.py`
|
||||||
|
|
|
||||||
78
alerting/alerts.py
Normal file
78
alerting/alerts.py
Normal file
|
|
@ -0,0 +1,78 @@
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum, StrEnum
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import aiofiles
|
||||||
|
import nio
|
||||||
|
|
||||||
|
from alerting.common import CREDS_FILE, ROOM_ID
|
||||||
|
|
||||||
|
|
||||||
|
class AlertType(StrEnum):
|
||||||
|
TEST = "TEST"
|
||||||
|
RAM = "RAM" # TODO
|
||||||
|
CPU = "CPU" # TODO
|
||||||
|
TEMP = "TEMP"
|
||||||
|
LOGIN = "LOGIN" # TODO
|
||||||
|
SMART = "SMART" # TODO
|
||||||
|
RAID = "RAID" # TODO
|
||||||
|
|
||||||
|
|
||||||
|
class Severity(Enum):
|
||||||
|
INFO = 1
|
||||||
|
WARNING = 2
|
||||||
|
CRITICAL = 3
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Alert:
|
||||||
|
alert_type: AlertType
|
||||||
|
message: str
|
||||||
|
severity: Severity
|
||||||
|
|
||||||
|
|
||||||
|
async def get_client() -> nio.AsyncClient:
|
||||||
|
"""
|
||||||
|
Returns a Matrix client.
|
||||||
|
It is better to call get_client once and use it for multiple send_alert calls
|
||||||
|
"""
|
||||||
|
async with aiofiles.open(CREDS_FILE) as f:
|
||||||
|
contents = await f.read()
|
||||||
|
creds = json.loads(contents)
|
||||||
|
client = nio.AsyncClient(creds["homeserver"])
|
||||||
|
client.access_token = creds["access_token"]
|
||||||
|
client.user_id = creds["user_id"]
|
||||||
|
client.device_id = creds["device_id"]
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
def format_message(alert: Alert) -> str:
|
||||||
|
match alert.severity:
|
||||||
|
case Severity.INFO:
|
||||||
|
severity_emoji = "ℹ️"
|
||||||
|
case Severity.WARNING:
|
||||||
|
severity_emoji = "⚠️"
|
||||||
|
case Severity.CRITICAL:
|
||||||
|
severity_emoji = "🆘"
|
||||||
|
message = f"{severity_emoji} {alert.alert_type} Alert\n{alert.message}"
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
|
async def send_alert(alert: Alert, client: Optional[nio.AsyncClient] = None) -> None:
|
||||||
|
if client is None:
|
||||||
|
temp_client = True
|
||||||
|
client = await get_client()
|
||||||
|
else:
|
||||||
|
temp_client = False
|
||||||
|
message = format_message(alert)
|
||||||
|
await client.room_send(
|
||||||
|
room_id=ROOM_ID,
|
||||||
|
message_type="m.room.message",
|
||||||
|
content={
|
||||||
|
"msgtype": "m.text",
|
||||||
|
"body": message,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if temp_client:
|
||||||
|
await client.close()
|
||||||
8
alerting/common.py
Normal file
8
alerting/common.py
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
CREDS_FILE = (Path(os.path.dirname(os.path.realpath(__file__))) / "credentials.json").resolve()
|
||||||
|
HOMESERVER = "https://matrix.altau.su"
|
||||||
|
USER_ID = "@AlertBot:altau.su"
|
||||||
|
DEVICE_NAME = "lego"
|
||||||
|
ROOM_ID = "!aSCaiSJfLHslrJrHiJ:altau.su"
|
||||||
41
alerting/login.py
Executable file
41
alerting/login.py
Executable file
|
|
@ -0,0 +1,41 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import getpass
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import stat
|
||||||
|
|
||||||
|
from common import CREDS_FILE, DEVICE_NAME, HOMESERVER, USER_ID
|
||||||
|
from nio import AsyncClient, LoginResponse
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
if os.path.exists(CREDS_FILE):
|
||||||
|
print(f"Creds already configured in {CREDS_FILE}")
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
|
client = AsyncClient(HOMESERVER, USER_ID)
|
||||||
|
password = getpass.getpass()
|
||||||
|
resp = await client.login(password, device_name=DEVICE_NAME)
|
||||||
|
await client.close()
|
||||||
|
if isinstance(resp, LoginResponse):
|
||||||
|
open(CREDS_FILE, "w").close()
|
||||||
|
os.chmod(CREDS_FILE, stat.S_IRUSR | stat.S_IWUSR)
|
||||||
|
with open(CREDS_FILE, "w") as f:
|
||||||
|
json.dump(
|
||||||
|
{
|
||||||
|
"homeserver": HOMESERVER,
|
||||||
|
"user_id": resp.user_id,
|
||||||
|
"device_id": resp.device_id,
|
||||||
|
"access_token": resp.access_token,
|
||||||
|
},
|
||||||
|
f,
|
||||||
|
)
|
||||||
|
print(f"Logged in as {resp.user_id}. Credentials saved to {CREDS_FILE}")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Failed to log in: {resp}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
10
lego-monitoring.service
Normal file
10
lego-monitoring.service
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
[Unit]
|
||||||
|
After=docker.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
ExecStart=/opt/lego-monitoring/.venv/bin/python /opt/lego-monitoring/service.py
|
||||||
|
Type=exec
|
||||||
|
Restart=always
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
25
misc/checks.py
Normal file
25
misc/checks.py
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
from alerting import alerts
|
||||||
|
from misc import sensors
|
||||||
|
|
||||||
|
|
||||||
|
def temp_check() -> set[alerts.Alert]:
|
||||||
|
alert_list = []
|
||||||
|
temps = sensors.Sensors.get_temperatures()
|
||||||
|
for _, sensor_list in temps.items():
|
||||||
|
for sensor in sensor_list:
|
||||||
|
if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("TEMP"),
|
||||||
|
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||||
|
severity=alerts.Severity.CRITICAL,
|
||||||
|
)
|
||||||
|
elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("TEMP"),
|
||||||
|
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||||
|
severity=alerts.Severity.WARNING,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
alert_list.append(alert)
|
||||||
|
return alert_list
|
||||||
60
misc/sensors.py
Normal file
60
misc/sensors.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from psutil import sensors_temperatures
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TemperatureSensor:
|
||||||
|
sensor_type: str
|
||||||
|
sensor_label: str
|
||||||
|
current_temp: float
|
||||||
|
highest_temp: float | None = None
|
||||||
|
critical_temp: float | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class Sensors:
|
||||||
|
@staticmethod
|
||||||
|
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
||||||
|
psutil_temp_sensors = sensors_temperatures()
|
||||||
|
|
||||||
|
temp_sensors = {}
|
||||||
|
|
||||||
|
for s_type, sensors in psutil_temp_sensors.items():
|
||||||
|
if s_type not in temp_sensors.keys():
|
||||||
|
temp_sensors[s_type] = []
|
||||||
|
match (s_type):
|
||||||
|
case "nvme":
|
||||||
|
for sensor in sensors:
|
||||||
|
temp_sensors[s_type].append(
|
||||||
|
TemperatureSensor(
|
||||||
|
sensor_type=s_type,
|
||||||
|
sensor_label=sensor.label,
|
||||||
|
current_temp=sensor.current,
|
||||||
|
highest_temp=sensor.high,
|
||||||
|
critical_temp=sensor.critical,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
case "amdgpu":
|
||||||
|
temp_sensors[s_type].append(
|
||||||
|
TemperatureSensor(
|
||||||
|
sensor_type=s_type,
|
||||||
|
sensor_label="Integrated GPU",
|
||||||
|
current_temp=sensors[0].current,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
case "k10temp":
|
||||||
|
temp_sensors[s_type].append(
|
||||||
|
TemperatureSensor(
|
||||||
|
sensor_type=s_type,
|
||||||
|
sensor_label="AMD CPU",
|
||||||
|
current_temp=sensors[0].current,
|
||||||
|
critical_temp=95.0, # hardcoded because we have R9 7900X
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return temp_sensors
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for i in Sensors.get_temperatures():
|
||||||
|
print(i)
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
from colorama import Back, Fore, Style
|
from colorama import Back, Fore, Style
|
||||||
|
|
||||||
from sensors import Sensors
|
from misc.sensors import Sensors
|
||||||
|
|
||||||
|
|
||||||
def pretty_print():
|
def pretty_print():
|
||||||
|
|
@ -1,2 +1,3 @@
|
||||||
colorama==0.4.6
|
colorama==0.4.6
|
||||||
psutil==5.9.8
|
psutil==5.9.8
|
||||||
|
matrix-nio[e2e]==0.24.0
|
||||||
|
|
|
||||||
54
sensors.py
54
sensors.py
|
|
@ -1,54 +0,0 @@
|
||||||
from dataclasses import dataclass
|
|
||||||
from psutil import sensors_temperatures
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TemperatureSensor:
|
|
||||||
sensor_type: str
|
|
||||||
sensor_label: str
|
|
||||||
current_temp: float
|
|
||||||
highest_temp: float | None = None
|
|
||||||
critical_temp: float | None = None
|
|
||||||
|
|
||||||
|
|
||||||
class Sensors:
|
|
||||||
@staticmethod
|
|
||||||
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
|
||||||
psutil_temp_sensors = sensors_temperatures()
|
|
||||||
|
|
||||||
temp_sensors = {}
|
|
||||||
|
|
||||||
for s_type, sensors in psutil_temp_sensors.items():
|
|
||||||
if s_type not in temp_sensors.keys():
|
|
||||||
temp_sensors[s_type] = []
|
|
||||||
match(s_type):
|
|
||||||
case "nvme":
|
|
||||||
for sensor in sensors:
|
|
||||||
temp_sensors[s_type].append(TemperatureSensor(
|
|
||||||
sensor_type=s_type,
|
|
||||||
sensor_label=sensor.label,
|
|
||||||
current_temp=sensor.current,
|
|
||||||
highest_temp=sensor.high,
|
|
||||||
critical_temp=sensor.critical
|
|
||||||
))
|
|
||||||
case "amdgpu":
|
|
||||||
temp_sensors[s_type].append(TemperatureSensor(
|
|
||||||
sensor_type=s_type,
|
|
||||||
sensor_label="Integrated GPU",
|
|
||||||
current_temp=sensors[0].current,
|
|
||||||
))
|
|
||||||
case "k10temp":
|
|
||||||
temp_sensors[s_type].append(TemperatureSensor(
|
|
||||||
sensor_type=s_type,
|
|
||||||
sensor_label="AMD CPU",
|
|
||||||
current_temp=sensors[0].current,
|
|
||||||
critical_temp=95.0 # hardcoded because we have R9 7900X
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
return temp_sensors
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
for i in Sensors.get_temperatures():
|
|
||||||
print(i)
|
|
||||||
57
service.py
Executable file
57
service.py
Executable file
|
|
@ -0,0 +1,57 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
from typing import Callable, Coroutine
|
||||||
|
|
||||||
|
import nio
|
||||||
|
|
||||||
|
from alerting import alerts
|
||||||
|
from misc import checks
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
stopping = False
|
||||||
|
|
||||||
|
|
||||||
|
def stop_gracefully(signum, frame):
|
||||||
|
global stopping
|
||||||
|
stopping = True
|
||||||
|
|
||||||
|
|
||||||
|
async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.AsyncClient, *args, **kwargs):
|
||||||
|
while True:
|
||||||
|
logging.info(f"Calling {check.__name__}")
|
||||||
|
if isinstance(check, Callable):
|
||||||
|
result = check(*args, **kwargs)
|
||||||
|
if isinstance(result, Coroutine):
|
||||||
|
result = await result
|
||||||
|
elif isinstance(check, Coroutine):
|
||||||
|
result = await check
|
||||||
|
else:
|
||||||
|
raise TypeError(f"check is {type(check)}, neither function nor coroutine")
|
||||||
|
logging.info(f"Got {len(result)} alerts")
|
||||||
|
for alert in result:
|
||||||
|
await alerts.send_alert(alert, client)
|
||||||
|
await asyncio.sleep(interval_secs)
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
signal.signal(signal.SIGTERM, stop_gracefully)
|
||||||
|
client = await alerts.get_client()
|
||||||
|
checkers = (checker(checks.temp_check, 5 * 60, client),)
|
||||||
|
async with asyncio.TaskGroup() as tg:
|
||||||
|
checker_tasks: set[asyncio.Task] = set()
|
||||||
|
for c in checkers:
|
||||||
|
task = tg.create_task(c)
|
||||||
|
checker_tasks.add(task)
|
||||||
|
while True:
|
||||||
|
if stopping:
|
||||||
|
await client.close()
|
||||||
|
raise SystemExit
|
||||||
|
else:
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
Loading…
Add table
Add a link
Reference in a new issue