From 758438382debc9df7a73d9e12458080cd2d4de48 Mon Sep 17 00:00:00 2001 From: Alex Tau Date: Fri, 2 May 2025 15:25:27 +0300 Subject: [PATCH] add temp monitoring --- modules/default.nix | 54 ++++++++++++++- modules/submodules/tempSensorOptions.nix | 49 ++++++++++++++ pyproject.toml | 1 + src/lego_monitoring/__init__.py | 29 ++++++-- src/lego_monitoring/alerting/alerts.py | 2 +- src/lego_monitoring/alerting/enum.py | 2 +- src/lego_monitoring/checks/__init__.py | 1 + src/lego_monitoring/checks/temp/__init__.py | 29 ++++++++ src/lego_monitoring/checks/temp/sensors.py | 66 +++++++++++++++++++ .../{core/config.py => config/__init__.py} | 10 ++- src/lego_monitoring/config/checks/temp.py | 24 +++++++ src/lego_monitoring/core/cvars.py | 2 +- uv.lock | 28 ++++---- 13 files changed, 272 insertions(+), 25 deletions(-) create mode 100644 modules/submodules/tempSensorOptions.nix create mode 100644 src/lego_monitoring/checks/__init__.py create mode 100644 src/lego_monitoring/checks/temp/__init__.py create mode 100644 src/lego_monitoring/checks/temp/sensors.py rename src/lego_monitoring/{core/config.py => config/__init__.py} (84%) create mode 100644 src/lego_monitoring/config/checks/temp.py diff --git a/modules/default.nix b/modules/default.nix index 516c50c..d7f3ab3 100644 --- a/modules/default.nix +++ b/modules/default.nix @@ -7,17 +7,21 @@ package: ... }: +let + tempSensorOptions = (import ./submodules/tempSensorOptions.nix) { inherit lib; }; +in { options.services.lego-monitoring = { enable = lib.mkEnableOption "lego-monitoring service."; - enabledCheckerSets = lib.mkOption { + enabledCheckSets = lib.mkOption { type = lib.types.listOf (lib.types.enum [ "start" "stop" + "temp" ]); default = [ ]; - description = "List of enabled checker sets. Each checker set is a module which checks something and generates alerts based on check results."; + description = "List of enabled check sets. Each check set is a module which checks something and generates alerts based on check results."; }; telegram = { @@ -30,17 +34,61 @@ package: description = "ID of chat where to send alerts."; }; }; + + checks = { + temp = { + sensors = lib.mkOption { + type = lib.types.attrsOf (lib.types.submodule tempSensorOptions); + default = { }; + description = '' + Temp sensor override definitions. Sensors not defined here, or missing options in definitions, will be read with default parameters. + + To get list of sensors and their default configurations, run `lego-monitoring --print-temp`.''; + example = lib.literalExpression '' + { + amdgpu.readings.edge.label = "Integrated GPU"; + k10temp.readings = { + Tctl = { + label = "AMD CPU"; + criticalTemp = 95.0; + }; + Tccd1.enabled = false; + Tccd2.enabled = false; + }; + nvme.readings = { + "Sensor 1".enabled = false; + "Sensor 2".enabled = false; + }; + } + ''; + }; + }; + }; }; config = let cfg = config.services.lego-monitoring; json = pkgs.formats.json {}; serviceConfigFile = json.generate "config.json" { - enabled_checker_sets = cfg.enabledCheckerSets; + enabled_check_sets = cfg.enabledCheckSets; telegram = with cfg.telegram; { creds_secret_path = credsSecretPath; room_id = roomId; }; + checks = { + temp.sensors = lib.mapAttrs (_: sensorCfg: { + + inherit (sensorCfg) name enabled; + readings = lib.mapAttrs (_: readingCfg: { + + inherit (readingCfg) label enabled; + warning_temp = readingCfg.warningTemp; + critical_temp = readingCfg.criticalTemp; + + }) sensorCfg.readings; + + }) cfg.checks.temp.sensors; + }; }; in lib.mkIf cfg.enable { systemd.services.lego-monitoring = { diff --git a/modules/submodules/tempSensorOptions.nix b/modules/submodules/tempSensorOptions.nix new file mode 100644 index 0000000..31d72fc --- /dev/null +++ b/modules/submodules/tempSensorOptions.nix @@ -0,0 +1,49 @@ +{ + lib, +}: + +let + tempReadingOptions = { + options = { + label = lib.mkOption { + type = lib.types.nullOr lib.types.str; + default = null; + description = "Friendly label of the reading."; + }; + enabled = lib.mkOption { + type = lib.types.bool; + default = true; + description = "Whether this reading is enabled."; + }; + warningTemp = lib.mkOption { + type = lib.types.nullOr lib.types.float; + default = null; + description = "Warning temperature threshold."; + }; + criticalTemp = lib.mkOption { + type = lib.types.nullOr lib.types.float; + default = null; + description = "Critical temperature threshold."; + }; + }; + }; +in +{ + options = { + name = lib.mkOption { + type = lib.types.nullOr lib.types.str; + default = null; + description = "Friendly name of the sensor."; + }; + enabled = lib.mkOption { + type = lib.types.bool; + default = true; + description = "Whether sensor is enabled."; + }; + readings = lib.mkOption { + type = lib.types.attrsOf (lib.types.submodule tempReadingOptions); + default = { }; + description = "Overrides for specific readings of the sensor, by label."; + }; + }; +} diff --git a/pyproject.toml b/pyproject.toml index 5ed90ee..f21fd01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "alt-utils>=0.0.6", + "psutil>=7.0.0", "telethon>=1.40.0", ] diff --git a/src/lego_monitoring/__init__.py b/src/lego_monitoring/__init__.py index bd1d8f0..981951e 100644 --- a/src/lego_monitoring/__init__.py +++ b/src/lego_monitoring/__init__.py @@ -1,12 +1,16 @@ import argparse import asyncio +import datetime import logging import signal import time +from . import checks from .alerting import alerts +from .checks.temp.sensors import print_readings +from .config import load_config from .core import cvars -from .core.config import load_config +from .core.checkers import interval_checker stopping = False @@ -27,11 +31,21 @@ async def async_main(): prog="lego-monitoring", description="Lego-monitoring service", ) - parser.add_argument("-c", "--config", required=True) + parser.add_argument("-c", "--config", help="config file") + parser.add_argument("--print-temp", help="print temp sensor readings and exit", action="store_true") + args = parser.parse_args() - config_path = parser.parse_args().config - config = load_config(config_path) - cvars.config.set(config) + if args.config: + config_path = parser.parse_args().config + config = load_config(config_path) + cvars.config.set(config) + + if args.print_temp: + print_readings() + raise SystemExit + + if not args.config: + raise RuntimeError("--config must be specified in standard operating mode") tg_client = await alerts.get_client() cvars.tg_client.set(tg_client) @@ -41,10 +55,11 @@ async def async_main(): alerts.send_start_alert(), ], "stop": [], # this is checked later + "temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))], } checkers = [] - for enabled_set in config.enabled_checker_sets: + for enabled_set in config.enabled_check_sets: for checker in checker_sets[enabled_set]: checkers.append(checker) @@ -57,7 +72,7 @@ async def async_main(): checker_tasks.add(task) while True: if stopping: - if "stop" in config.enabled_checker_sets: + if "stop" in config.enabled_check_sets: await alerts.send_stop_alert() await tg_client.disconnect() raise SystemExit diff --git a/src/lego_monitoring/alerting/alerts.py b/src/lego_monitoring/alerting/alerts.py index e5b4ccf..2241b6e 100644 --- a/src/lego_monitoring/alerting/alerts.py +++ b/src/lego_monitoring/alerting/alerts.py @@ -57,7 +57,7 @@ async def send_start_alert() -> None: await send_alert( Alert( alert_type=AlertType.BOOT, - message=f"Service running with enabled checkers: {', '.join(config.enabled_checker_sets)}", + message=f"Service running with enabled checks: {', '.join(config.enabled_check_sets)}", severity=Severity.INFO, ) ) diff --git a/src/lego_monitoring/alerting/enum.py b/src/lego_monitoring/alerting/enum.py index 1cb6a08..2e121c7 100644 --- a/src/lego_monitoring/alerting/enum.py +++ b/src/lego_monitoring/alerting/enum.py @@ -3,11 +3,11 @@ from enum import StrEnum class AlertType(StrEnum): BOOT = "BOOT" + TEMP = "TEMP" TEST = "TEST" # ERROR = "ERROR" # RAM = "RAM" # CPU = "CPU" - # TEMP = "TEMP" # VULN = "VULN" # LOGIN = "LOGIN" # SMART = "SMART" # TODO diff --git a/src/lego_monitoring/checks/__init__.py b/src/lego_monitoring/checks/__init__.py new file mode 100644 index 0000000..af7a958 --- /dev/null +++ b/src/lego_monitoring/checks/__init__.py @@ -0,0 +1 @@ +from .temp import temp_check diff --git a/src/lego_monitoring/checks/temp/__init__.py b/src/lego_monitoring/checks/temp/__init__.py new file mode 100644 index 0000000..4f965dc --- /dev/null +++ b/src/lego_monitoring/checks/temp/__init__.py @@ -0,0 +1,29 @@ +from lego_monitoring.alerting import alerts +from lego_monitoring.alerting.enum import AlertType, Severity + +from . import sensors + +IS_TESTING = False + + +def temp_check() -> list[alerts.Alert]: + alert_list = [] + temps = sensors.get_readings() + for sensor, readings in temps.items(): + for r in readings: + if r.critical_temp is not None and (IS_TESTING or r.current_temp > r.critical_temp): + alert = alerts.Alert( + alert_type=AlertType.TEMP, + message=f"{sensor} {r.label}: {r.current_temp}°C > {r.critical_temp}°C", + severity=Severity.CRITICAL, + ) + elif r.warning_temp is not None and (IS_TESTING or r.current_temp > r.warning_temp): + alert = alerts.Alert( + alert_type=AlertType.TEMP, + message=f"{sensor} {r.label}: {r.current_temp}°C > {r.warning_temp}°C", + severity=Severity.WARNING, + ) + else: + continue + alert_list.append(alert) + return alert_list diff --git a/src/lego_monitoring/checks/temp/sensors.py b/src/lego_monitoring/checks/temp/sensors.py new file mode 100644 index 0000000..ac1d7e4 --- /dev/null +++ b/src/lego_monitoring/checks/temp/sensors.py @@ -0,0 +1,66 @@ +from dataclasses import dataclass +from typing import Optional + +from psutil import sensors_temperatures + +from lego_monitoring.config.checks.temp import TempSensorConfig +from lego_monitoring.core import cvars + + +@dataclass +class TemperatureReading: + label: str + current_temp: float + warning_temp: Optional[float] + critical_temp: Optional[float] + + +def print_readings(): + sensor_readings = get_readings() + for sensor, readings in sensor_readings.items(): + print(f"*** Sensor {sensor}***\n") + for r in readings: + print(f"Label: {r.label}") + print(f"Current temp: {r.current_temp}") + print(f"Warning temp: {r.warning_temp}") + print(f"Critical temp: {r.critical_temp}\n") + + +def get_readings() -> dict[str, list[TemperatureReading]]: + try: + config = cvars.config.get().checks.temp.sensors + except LookupError: + config: dict[str, TempSensorConfig] = {} + + psutil_temperatures = sensors_temperatures() + + sensor_readings = {} + for sensor, readings in psutil_temperatures.items(): + if sensor in config: + if not config[sensor].enabled: + continue + sensor_friendly_name = config[sensor].name if config[sensor].name else sensor + else: + sensor_friendly_name = sensor + + sensor_readings[sensor_friendly_name] = [] + + for r in readings: + try: + config_r = config[sensor].readings[r.label] + except KeyError: + friendly_r = TemperatureReading( + label=r.label, current_temp=r.current, warning_temp=r.high, critical_temp=r.critical + ) + else: + if not config_r.enabled: + continue + friendly_r = TemperatureReading( + label=config_r.label if config_r.label else r.label, + current_temp=r.current, + warning_temp=config_r.warning_temp if config_r.warning_temp else r.high, + critical_temp=config_r.critical_temp if config_r.critical_temp else r.critical, + ) + sensor_readings[sensor_friendly_name].append(friendly_r) + + return sensor_readings diff --git a/src/lego_monitoring/core/config.py b/src/lego_monitoring/config/__init__.py similarity index 84% rename from src/lego_monitoring/core/config.py rename to src/lego_monitoring/config/__init__.py index 71cc766..bebca3d 100644 --- a/src/lego_monitoring/core/config.py +++ b/src/lego_monitoring/config/__init__.py @@ -3,6 +3,13 @@ from dataclasses import dataclass from alt_utils import NestedDeserializableDataclass +from .checks.temp import TempCheckConfig + + +@dataclass +class ChecksConfig(NestedDeserializableDataclass): + temp: TempCheckConfig + @dataclass class TelegramConfig: @@ -12,7 +19,8 @@ class TelegramConfig: @dataclass class Config(NestedDeserializableDataclass): - enabled_checker_sets: list[str] + enabled_check_sets: list[str] + checks: ChecksConfig telegram: TelegramConfig diff --git a/src/lego_monitoring/config/checks/temp.py b/src/lego_monitoring/config/checks/temp.py new file mode 100644 index 0000000..dea6b8b --- /dev/null +++ b/src/lego_monitoring/config/checks/temp.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass +from typing import Optional + +from alt_utils import NestedDeserializableDataclass + + +@dataclass +class TempReadingConfig: + label: Optional[str] + enabled: bool + warning_temp: Optional[float] + critical_temp: Optional[float] + + +@dataclass +class TempSensorConfig(NestedDeserializableDataclass): + name: Optional[str] + enabled: bool + readings: dict[str, TempReadingConfig] + + +@dataclass +class TempCheckConfig(NestedDeserializableDataclass): + sensors: dict[str, TempSensorConfig] diff --git a/src/lego_monitoring/core/cvars.py b/src/lego_monitoring/core/cvars.py index b1f7b50..a4781c5 100644 --- a/src/lego_monitoring/core/cvars.py +++ b/src/lego_monitoring/core/cvars.py @@ -2,7 +2,7 @@ from contextvars import ContextVar from telethon import TelegramClient -from .config import Config +from ..config import Config config: ContextVar[Config] = ContextVar("config") tg_client: ContextVar[TelegramClient] = ContextVar("tg_client") diff --git a/uv.lock b/uv.lock index 41ce1c9..2e89def 100644 --- a/uv.lock +++ b/uv.lock @@ -16,17 +16,32 @@ version = "0.1.0" source = { editable = "." } dependencies = [ { name = "alt-utils" }, - { name = "setuptools" }, + { name = "psutil" }, { name = "telethon" }, ] [package.metadata] requires-dist = [ { name = "alt-utils", specifier = ">=0.0.6" }, - { name = "setuptools", specifier = ">=80.0.0" }, + { name = "psutil", specifier = ">=7.0.0" }, { name = "telethon", specifier = ">=1.40.0" }, ] +[[package]] +name = "psutil" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051 }, + { url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535 }, + { url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004 }, + { url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986 }, + { url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544 }, + { url = "https://files.pythonhosted.org/packages/50/e6/eecf58810b9d12e6427369784efe814a1eec0f492084ce8eb8f4d89d6d61/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99", size = 241053 }, + { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 }, +] + [[package]] name = "pyaes" version = "1.6.1" @@ -54,15 +69,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696 }, ] -[[package]] -name = "setuptools" -version = "80.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/44/80/97e25f0f1e4067677806084b7382a6ff9979f3d15119375c475c288db9d7/setuptools-80.0.0.tar.gz", hash = "sha256:c40a5b3729d58dd749c0f08f1a07d134fb8a0a3d7f87dc33e7c5e1f762138650", size = 1354221 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/23/63/5517029d6696ddf2bd378d46f63f479be001c31b462303170a1da57650cb/setuptools-80.0.0-py3-none-any.whl", hash = "sha256:a38f898dcd6e5380f4da4381a87ec90bd0a7eec23d204a5552e80ee3cab6bd27", size = 1240907 }, -] - [[package]] name = "telethon" version = "1.40.0"