mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
add temp monitoring
This commit is contained in:
parent
19ee6f487b
commit
758438382d
13 changed files with 272 additions and 25 deletions
|
|
@ -7,17 +7,21 @@ package:
|
|||
...
|
||||
}:
|
||||
|
||||
let
|
||||
tempSensorOptions = (import ./submodules/tempSensorOptions.nix) { inherit lib; };
|
||||
in
|
||||
{
|
||||
options.services.lego-monitoring = {
|
||||
enable = lib.mkEnableOption "lego-monitoring service.";
|
||||
|
||||
enabledCheckerSets = lib.mkOption {
|
||||
enabledCheckSets = lib.mkOption {
|
||||
type = lib.types.listOf (lib.types.enum [
|
||||
"start"
|
||||
"stop"
|
||||
"temp"
|
||||
]);
|
||||
default = [ ];
|
||||
description = "List of enabled checker sets. Each checker set is a module which checks something and generates alerts based on check results.";
|
||||
description = "List of enabled check sets. Each check set is a module which checks something and generates alerts based on check results.";
|
||||
};
|
||||
|
||||
telegram = {
|
||||
|
|
@ -30,17 +34,61 @@ package:
|
|||
description = "ID of chat where to send alerts.";
|
||||
};
|
||||
};
|
||||
|
||||
checks = {
|
||||
temp = {
|
||||
sensors = lib.mkOption {
|
||||
type = lib.types.attrsOf (lib.types.submodule tempSensorOptions);
|
||||
default = { };
|
||||
description = ''
|
||||
Temp sensor override definitions. Sensors not defined here, or missing options in definitions, will be read with default parameters.
|
||||
|
||||
To get list of sensors and their default configurations, run `lego-monitoring --print-temp`.'';
|
||||
example = lib.literalExpression ''
|
||||
{
|
||||
amdgpu.readings.edge.label = "Integrated GPU";
|
||||
k10temp.readings = {
|
||||
Tctl = {
|
||||
label = "AMD CPU";
|
||||
criticalTemp = 95.0;
|
||||
};
|
||||
Tccd1.enabled = false;
|
||||
Tccd2.enabled = false;
|
||||
};
|
||||
nvme.readings = {
|
||||
"Sensor 1".enabled = false;
|
||||
"Sensor 2".enabled = false;
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = let
|
||||
cfg = config.services.lego-monitoring;
|
||||
json = pkgs.formats.json {};
|
||||
serviceConfigFile = json.generate "config.json" {
|
||||
enabled_checker_sets = cfg.enabledCheckerSets;
|
||||
enabled_check_sets = cfg.enabledCheckSets;
|
||||
telegram = with cfg.telegram; {
|
||||
creds_secret_path = credsSecretPath;
|
||||
room_id = roomId;
|
||||
};
|
||||
checks = {
|
||||
temp.sensors = lib.mapAttrs (_: sensorCfg: {
|
||||
|
||||
inherit (sensorCfg) name enabled;
|
||||
readings = lib.mapAttrs (_: readingCfg: {
|
||||
|
||||
inherit (readingCfg) label enabled;
|
||||
warning_temp = readingCfg.warningTemp;
|
||||
critical_temp = readingCfg.criticalTemp;
|
||||
|
||||
}) sensorCfg.readings;
|
||||
|
||||
}) cfg.checks.temp.sensors;
|
||||
};
|
||||
};
|
||||
in lib.mkIf cfg.enable {
|
||||
systemd.services.lego-monitoring = {
|
||||
|
|
|
|||
49
modules/submodules/tempSensorOptions.nix
Normal file
49
modules/submodules/tempSensorOptions.nix
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
{
|
||||
lib,
|
||||
}:
|
||||
|
||||
let
|
||||
tempReadingOptions = {
|
||||
options = {
|
||||
label = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.str;
|
||||
default = null;
|
||||
description = "Friendly label of the reading.";
|
||||
};
|
||||
enabled = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = true;
|
||||
description = "Whether this reading is enabled.";
|
||||
};
|
||||
warningTemp = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.float;
|
||||
default = null;
|
||||
description = "Warning temperature threshold.";
|
||||
};
|
||||
criticalTemp = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.float;
|
||||
default = null;
|
||||
description = "Critical temperature threshold.";
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
options = {
|
||||
name = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.str;
|
||||
default = null;
|
||||
description = "Friendly name of the sensor.";
|
||||
};
|
||||
enabled = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = true;
|
||||
description = "Whether sensor is enabled.";
|
||||
};
|
||||
readings = lib.mkOption {
|
||||
type = lib.types.attrsOf (lib.types.submodule tempReadingOptions);
|
||||
default = { };
|
||||
description = "Overrides for specific readings of the sensor, by label.";
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
@ -6,6 +6,7 @@ readme = "README.md"
|
|||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"alt-utils>=0.0.6",
|
||||
"psutil>=7.0.0",
|
||||
"telethon>=1.40.0",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import datetime
|
||||
import logging
|
||||
import signal
|
||||
import time
|
||||
|
||||
from . import checks
|
||||
from .alerting import alerts
|
||||
from .checks.temp.sensors import print_readings
|
||||
from .config import load_config
|
||||
from .core import cvars
|
||||
from .core.config import load_config
|
||||
from .core.checkers import interval_checker
|
||||
|
||||
stopping = False
|
||||
|
||||
|
|
@ -27,11 +31,21 @@ async def async_main():
|
|||
prog="lego-monitoring",
|
||||
description="Lego-monitoring service",
|
||||
)
|
||||
parser.add_argument("-c", "--config", required=True)
|
||||
parser.add_argument("-c", "--config", help="config file")
|
||||
parser.add_argument("--print-temp", help="print temp sensor readings and exit", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
config_path = parser.parse_args().config
|
||||
config = load_config(config_path)
|
||||
cvars.config.set(config)
|
||||
if args.config:
|
||||
config_path = parser.parse_args().config
|
||||
config = load_config(config_path)
|
||||
cvars.config.set(config)
|
||||
|
||||
if args.print_temp:
|
||||
print_readings()
|
||||
raise SystemExit
|
||||
|
||||
if not args.config:
|
||||
raise RuntimeError("--config must be specified in standard operating mode")
|
||||
|
||||
tg_client = await alerts.get_client()
|
||||
cvars.tg_client.set(tg_client)
|
||||
|
|
@ -41,10 +55,11 @@ async def async_main():
|
|||
alerts.send_start_alert(),
|
||||
],
|
||||
"stop": [], # this is checked later
|
||||
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
|
||||
}
|
||||
|
||||
checkers = []
|
||||
for enabled_set in config.enabled_checker_sets:
|
||||
for enabled_set in config.enabled_check_sets:
|
||||
for checker in checker_sets[enabled_set]:
|
||||
checkers.append(checker)
|
||||
|
||||
|
|
@ -57,7 +72,7 @@ async def async_main():
|
|||
checker_tasks.add(task)
|
||||
while True:
|
||||
if stopping:
|
||||
if "stop" in config.enabled_checker_sets:
|
||||
if "stop" in config.enabled_check_sets:
|
||||
await alerts.send_stop_alert()
|
||||
await tg_client.disconnect()
|
||||
raise SystemExit
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ async def send_start_alert() -> None:
|
|||
await send_alert(
|
||||
Alert(
|
||||
alert_type=AlertType.BOOT,
|
||||
message=f"Service running with enabled checkers: {', '.join(config.enabled_checker_sets)}",
|
||||
message=f"Service running with enabled checks: {', '.join(config.enabled_check_sets)}",
|
||||
severity=Severity.INFO,
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,11 +3,11 @@ from enum import StrEnum
|
|||
|
||||
class AlertType(StrEnum):
|
||||
BOOT = "BOOT"
|
||||
TEMP = "TEMP"
|
||||
TEST = "TEST"
|
||||
# ERROR = "ERROR"
|
||||
# RAM = "RAM"
|
||||
# CPU = "CPU"
|
||||
# TEMP = "TEMP"
|
||||
# VULN = "VULN"
|
||||
# LOGIN = "LOGIN"
|
||||
# SMART = "SMART" # TODO
|
||||
|
|
|
|||
1
src/lego_monitoring/checks/__init__.py
Normal file
1
src/lego_monitoring/checks/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .temp import temp_check
|
||||
29
src/lego_monitoring/checks/temp/__init__.py
Normal file
29
src/lego_monitoring/checks/temp/__init__.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
from lego_monitoring.alerting import alerts
|
||||
from lego_monitoring.alerting.enum import AlertType, Severity
|
||||
|
||||
from . import sensors
|
||||
|
||||
IS_TESTING = False
|
||||
|
||||
|
||||
def temp_check() -> list[alerts.Alert]:
|
||||
alert_list = []
|
||||
temps = sensors.get_readings()
|
||||
for sensor, readings in temps.items():
|
||||
for r in readings:
|
||||
if r.critical_temp is not None and (IS_TESTING or r.current_temp > r.critical_temp):
|
||||
alert = alerts.Alert(
|
||||
alert_type=AlertType.TEMP,
|
||||
message=f"{sensor} {r.label}: {r.current_temp}°C > {r.critical_temp}°C",
|
||||
severity=Severity.CRITICAL,
|
||||
)
|
||||
elif r.warning_temp is not None and (IS_TESTING or r.current_temp > r.warning_temp):
|
||||
alert = alerts.Alert(
|
||||
alert_type=AlertType.TEMP,
|
||||
message=f"{sensor} {r.label}: {r.current_temp}°C > {r.warning_temp}°C",
|
||||
severity=Severity.WARNING,
|
||||
)
|
||||
else:
|
||||
continue
|
||||
alert_list.append(alert)
|
||||
return alert_list
|
||||
66
src/lego_monitoring/checks/temp/sensors.py
Normal file
66
src/lego_monitoring/checks/temp/sensors.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from psutil import sensors_temperatures
|
||||
|
||||
from lego_monitoring.config.checks.temp import TempSensorConfig
|
||||
from lego_monitoring.core import cvars
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemperatureReading:
|
||||
label: str
|
||||
current_temp: float
|
||||
warning_temp: Optional[float]
|
||||
critical_temp: Optional[float]
|
||||
|
||||
|
||||
def print_readings():
|
||||
sensor_readings = get_readings()
|
||||
for sensor, readings in sensor_readings.items():
|
||||
print(f"*** Sensor {sensor}***\n")
|
||||
for r in readings:
|
||||
print(f"Label: {r.label}")
|
||||
print(f"Current temp: {r.current_temp}")
|
||||
print(f"Warning temp: {r.warning_temp}")
|
||||
print(f"Critical temp: {r.critical_temp}\n")
|
||||
|
||||
|
||||
def get_readings() -> dict[str, list[TemperatureReading]]:
|
||||
try:
|
||||
config = cvars.config.get().checks.temp.sensors
|
||||
except LookupError:
|
||||
config: dict[str, TempSensorConfig] = {}
|
||||
|
||||
psutil_temperatures = sensors_temperatures()
|
||||
|
||||
sensor_readings = {}
|
||||
for sensor, readings in psutil_temperatures.items():
|
||||
if sensor in config:
|
||||
if not config[sensor].enabled:
|
||||
continue
|
||||
sensor_friendly_name = config[sensor].name if config[sensor].name else sensor
|
||||
else:
|
||||
sensor_friendly_name = sensor
|
||||
|
||||
sensor_readings[sensor_friendly_name] = []
|
||||
|
||||
for r in readings:
|
||||
try:
|
||||
config_r = config[sensor].readings[r.label]
|
||||
except KeyError:
|
||||
friendly_r = TemperatureReading(
|
||||
label=r.label, current_temp=r.current, warning_temp=r.high, critical_temp=r.critical
|
||||
)
|
||||
else:
|
||||
if not config_r.enabled:
|
||||
continue
|
||||
friendly_r = TemperatureReading(
|
||||
label=config_r.label if config_r.label else r.label,
|
||||
current_temp=r.current,
|
||||
warning_temp=config_r.warning_temp if config_r.warning_temp else r.high,
|
||||
critical_temp=config_r.critical_temp if config_r.critical_temp else r.critical,
|
||||
)
|
||||
sensor_readings[sensor_friendly_name].append(friendly_r)
|
||||
|
||||
return sensor_readings
|
||||
|
|
@ -3,6 +3,13 @@ from dataclasses import dataclass
|
|||
|
||||
from alt_utils import NestedDeserializableDataclass
|
||||
|
||||
from .checks.temp import TempCheckConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChecksConfig(NestedDeserializableDataclass):
|
||||
temp: TempCheckConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class TelegramConfig:
|
||||
|
|
@ -12,7 +19,8 @@ class TelegramConfig:
|
|||
|
||||
@dataclass
|
||||
class Config(NestedDeserializableDataclass):
|
||||
enabled_checker_sets: list[str]
|
||||
enabled_check_sets: list[str]
|
||||
checks: ChecksConfig
|
||||
telegram: TelegramConfig
|
||||
|
||||
|
||||
24
src/lego_monitoring/config/checks/temp.py
Normal file
24
src/lego_monitoring/config/checks/temp.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from alt_utils import NestedDeserializableDataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempReadingConfig:
|
||||
label: Optional[str]
|
||||
enabled: bool
|
||||
warning_temp: Optional[float]
|
||||
critical_temp: Optional[float]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempSensorConfig(NestedDeserializableDataclass):
|
||||
name: Optional[str]
|
||||
enabled: bool
|
||||
readings: dict[str, TempReadingConfig]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempCheckConfig(NestedDeserializableDataclass):
|
||||
sensors: dict[str, TempSensorConfig]
|
||||
|
|
@ -2,7 +2,7 @@ from contextvars import ContextVar
|
|||
|
||||
from telethon import TelegramClient
|
||||
|
||||
from .config import Config
|
||||
from ..config import Config
|
||||
|
||||
config: ContextVar[Config] = ContextVar("config")
|
||||
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
|
||||
|
|
|
|||
28
uv.lock
generated
28
uv.lock
generated
|
|
@ -16,17 +16,32 @@ version = "0.1.0"
|
|||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "alt-utils" },
|
||||
{ name = "setuptools" },
|
||||
{ name = "psutil" },
|
||||
{ name = "telethon" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "alt-utils", specifier = ">=0.0.6" },
|
||||
{ name = "setuptools", specifier = ">=80.0.0" },
|
||||
{ name = "psutil", specifier = ">=7.0.0" },
|
||||
{ name = "telethon", specifier = ">=1.40.0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
version = "7.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051 },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535 },
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004 },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986 },
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544 },
|
||||
{ url = "https://files.pythonhosted.org/packages/50/e6/eecf58810b9d12e6427369784efe814a1eec0f492084ce8eb8f4d89d6d61/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99", size = 241053 },
|
||||
{ url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyaes"
|
||||
version = "1.6.1"
|
||||
|
|
@ -54,15 +69,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "80.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/44/80/97e25f0f1e4067677806084b7382a6ff9979f3d15119375c475c288db9d7/setuptools-80.0.0.tar.gz", hash = "sha256:c40a5b3729d58dd749c0f08f1a07d134fb8a0a3d7f87dc33e7c5e1f762138650", size = 1354221 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/23/63/5517029d6696ddf2bd378d46f63f479be001c31b462303170a1da57650cb/setuptools-80.0.0-py3-none-any.whl", hash = "sha256:a38f898dcd6e5380f4da4381a87ec90bd0a7eec23d204a5552e80ee3cab6bd27", size = 1240907 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "telethon"
|
||||
version = "1.40.0"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue