From 5095057a1381c3611a7f3027dd05dbfd6a9b4b61 Mon Sep 17 00:00:00 2001 From: Alex Tau Date: Sat, 10 May 2025 22:43:29 +0300 Subject: [PATCH] add cpu check --- docs/nixos-options.md | 44 ++++++++++++++++++++++- modules/default.nix | 7 +++- modules/options.nix | 14 ++++++++ pyproject.toml | 2 +- src/lego_monitoring/__init__.py | 2 +- src/lego_monitoring/alerting/enum.py | 4 +-- src/lego_monitoring/checks/__init__.py | 1 + src/lego_monitoring/checks/cpu.py | 30 ++++++++++++++++ src/lego_monitoring/config/__init__.py | 6 ++-- src/lego_monitoring/config/checks/cpu.py | 8 +++++ src/lego_monitoring/config/checks/temp.py | 18 +++++----- uv.lock | 8 ++--- 12 files changed, 123 insertions(+), 21 deletions(-) create mode 100644 src/lego_monitoring/checks/cpu.py create mode 100644 src/lego_monitoring/config/checks/cpu.py diff --git a/docs/nixos-options.md b/docs/nixos-options.md index 0e3a81f..1839ef9 100644 --- a/docs/nixos-options.md +++ b/docs/nixos-options.md @@ -33,7 +33,7 @@ List of enabled check sets\. Each check set is a module which checks something a *Type:* -list of (one of “start”, “stop”, “temp”, “vulnix”) +list of (one of “start”, “stop”, “temp”, “cpu”, “vulnix”) @@ -45,8 +45,50 @@ list of (one of “start”, “stop”, “temp”, “vulnix”) +## services\.lego-monitoring\.checks\.cpu\.criticalPercentage + +CPU load percentage for a critical alert to be sent\. Null means never generate a CPU critical alert\. + + + +*Type:* +null or floating point number + + + +*Default:* +` 90.0 ` + +*Declared by:* + - [modules/options\.nix](../modules/options.nix) + + + +## services\.lego-monitoring\.checks\.cpu\.warningPercentage + + + +CPU load percentage for a warning alert is sent\. Null means never generate a CPU warning alert\. + + + +*Type:* +null or floating point number + + + +*Default:* +` 80.0 ` + +*Declared by:* + - [modules/options\.nix](../modules/options.nix) + + + ## services\.lego-monitoring\.checks\.temp\.sensors + + Temp sensor override definitions\. Sensors not defined here, or missing options in definitions, will be read with default parameters\. To get list of sensors and their default configurations, run ` lego-monitoring --print-temp `\. diff --git a/modules/default.nix b/modules/default.nix index d48361c..448fe11 100644 --- a/modules/default.nix +++ b/modules/default.nix @@ -11,7 +11,7 @@ package: imports = [ ./options.nix ]; - + config = let cfg = config.services.lego-monitoring; json = pkgs.formats.json {}; @@ -49,6 +49,11 @@ package: }) cfg.checks.temp.sensors; vulnix.whitelist_path = vulnixWhitelistFile; + + cpu = with cfg.checks.cpu; { + warning_percentage = warningPercentage; + critical_percentage = criticalPercentage; + }; }; }; in lib.mkIf cfg.enable { diff --git a/modules/options.nix b/modules/options.nix index 2a65787..27e9f82 100644 --- a/modules/options.nix +++ b/modules/options.nix @@ -16,6 +16,7 @@ in "start" "stop" "temp" + "cpu" "vulnix" ]); default = [ ]; @@ -76,6 +77,19 @@ in }''; }; }; + + cpu = { + warningPercentage = lib.mkOption { + type = lib.types.nullOr lib.types.float; + default = 80.0; + description = "CPU load percentage for a warning alert is sent. Null means never generate a CPU warning alert."; + }; + criticalPercentage = lib.mkOption { + type = lib.types.nullOr lib.types.float; + default = 90.0; + description = "CPU load percentage for a critical alert to be sent. Null means never generate a CPU critical alert."; + }; + }; }; }; } diff --git a/pyproject.toml b/pyproject.toml index 87702ac..dcb602a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Monitoring software for the lego server" readme = "README.md" requires-python = ">=3.12" dependencies = [ - "alt-utils>=0.0.7", + "alt-utils>=0.0.8", "psutil>=7.0.0", "telethon>=1.40.0", ] diff --git a/src/lego_monitoring/__init__.py b/src/lego_monitoring/__init__.py index 12191b0..f1299cc 100644 --- a/src/lego_monitoring/__init__.py +++ b/src/lego_monitoring/__init__.py @@ -3,7 +3,6 @@ import asyncio import datetime import logging import signal -import time from . import checks from .alerting import alerts @@ -57,6 +56,7 @@ async def async_main(): "stop": [], # this is checked later "temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))], "vulnix": [interval_checker(checks.vulnix_check, datetime.timedelta(days=3))], + "cpu": [interval_checker(checks.cpu_check, datetime.timedelta(minutes=5))], } checkers = [] diff --git a/src/lego_monitoring/alerting/enum.py b/src/lego_monitoring/alerting/enum.py index de6ca5e..0b92bb1 100644 --- a/src/lego_monitoring/alerting/enum.py +++ b/src/lego_monitoring/alerting/enum.py @@ -3,12 +3,12 @@ from enum import StrEnum class AlertType(StrEnum): BOOT = "BOOT" + CPU = "CPU" + ERROR = "ERROR" TEMP = "TEMP" TEST = "TEST" VULN = "VULN" - ERROR = "ERROR" # RAM = "RAM" - # CPU = "CPU" # LOGIN = "LOGIN" # SMART = "SMART" # TODO # RAID = "RAID" diff --git a/src/lego_monitoring/checks/__init__.py b/src/lego_monitoring/checks/__init__.py index df26ddf..8818d25 100644 --- a/src/lego_monitoring/checks/__init__.py +++ b/src/lego_monitoring/checks/__init__.py @@ -1,2 +1,3 @@ +from .cpu import cpu_check from .temp import temp_check from .vulnix import vulnix_check diff --git a/src/lego_monitoring/checks/cpu.py b/src/lego_monitoring/checks/cpu.py new file mode 100644 index 0000000..de46820 --- /dev/null +++ b/src/lego_monitoring/checks/cpu.py @@ -0,0 +1,30 @@ +from psutil import cpu_percent + +from lego_monitoring.alerting import alerts +from lego_monitoring.alerting.enum import AlertType, Severity +from lego_monitoring.core import cvars + +IS_TESTING = False + + +def cpu_check() -> list[alerts.Alert]: + percentage = cpu_percent() + config = cvars.config.get().checks.cpu + if config.critical_percentage and (IS_TESTING or percentage > config.critical_percentage): + return [ + alerts.Alert( + alert_type=AlertType.CPU, + message=f"CPU load: {percentage:.2f}% > {config.critical_percentage:.2f}%", + severity=Severity.CRITICAL, + ) + ] + elif config.warning_percentage and (IS_TESTING or percentage > config.warning_percentage): + return [ + alerts.Alert( + alert_type=AlertType.CPU, + message=f"CPU load: {percentage:.2f}% > {config.warning_percentage:.2f}%", + severity=Severity.WARNING, + ) + ] + else: + return [] diff --git a/src/lego_monitoring/config/__init__.py b/src/lego_monitoring/config/__init__.py index d6518bb..e08fdd9 100644 --- a/src/lego_monitoring/config/__init__.py +++ b/src/lego_monitoring/config/__init__.py @@ -1,15 +1,17 @@ import json -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional from alt_utils import NestedDeserializableDataclass +from .checks.cpu import CpuCheckConfig from .checks.temp import TempCheckConfig from .checks.vulnix import VulnixCheckConfig @dataclass class ChecksConfig(NestedDeserializableDataclass): + cpu: Optional[CpuCheckConfig] = None temp: Optional[TempCheckConfig] = None vulnix: Optional[VulnixCheckConfig] = None @@ -22,9 +24,9 @@ class TelegramConfig: @dataclass class Config(NestedDeserializableDataclass): - enabled_check_sets: list[str] checks: ChecksConfig telegram: TelegramConfig + enabled_check_sets: list[str] = field(default_factory=list) def load_config(filepath: str) -> Config: diff --git a/src/lego_monitoring/config/checks/cpu.py b/src/lego_monitoring/config/checks/cpu.py new file mode 100644 index 0000000..174687a --- /dev/null +++ b/src/lego_monitoring/config/checks/cpu.py @@ -0,0 +1,8 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class CpuCheckConfig: + warning_percentage: Optional[float] = 80 + critical_percentage: Optional[float] = 90 diff --git a/src/lego_monitoring/config/checks/temp.py b/src/lego_monitoring/config/checks/temp.py index dea6b8b..da36d9f 100644 --- a/src/lego_monitoring/config/checks/temp.py +++ b/src/lego_monitoring/config/checks/temp.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional from alt_utils import NestedDeserializableDataclass @@ -6,19 +6,19 @@ from alt_utils import NestedDeserializableDataclass @dataclass class TempReadingConfig: - label: Optional[str] - enabled: bool - warning_temp: Optional[float] - critical_temp: Optional[float] + label: Optional[str] = None + enabled: bool = True + warning_temp: Optional[float] = None + critical_temp: Optional[float] = None @dataclass class TempSensorConfig(NestedDeserializableDataclass): - name: Optional[str] - enabled: bool - readings: dict[str, TempReadingConfig] + name: Optional[str] = None + enabled: bool = True + readings: dict[str, TempReadingConfig] = field(default_factory=dict) @dataclass class TempCheckConfig(NestedDeserializableDataclass): - sensors: dict[str, TempSensorConfig] + sensors: dict[str, TempSensorConfig] = field(default_factory=dict) diff --git a/uv.lock b/uv.lock index 2e89def..f5ec571 100644 --- a/uv.lock +++ b/uv.lock @@ -3,11 +3,11 @@ requires-python = ">=3.12" [[package]] name = "alt-utils" -version = "0.0.6" +version = "0.0.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/22/d2/b4a3ea37f773696b07a545e8964c37e98e4939d5f8e3dae949d2cd4e4f53/alt_utils-0.0.6.tar.gz", hash = "sha256:91b8ca633238e819848e1f8b351892f4c148c7fddef120d5e966e3a0b5d06f81", size = 6001 } +sdist = { url = "https://files.pythonhosted.org/packages/31/15/67246107a8c808a9e99b34fd0024bebe954a67f3c315821eae985b87db7f/alt_utils-0.0.8.tar.gz", hash = "sha256:4b2901df0be4af736210277d58e231d4c4bce597a8fc665a8dd3e7b582705081", size = 6103 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/27/0c963d6c64150e3fb2f98eb01773e2f9cf9b51f5b65632944bff67a68ec2/alt_utils-0.0.6-py3-none-any.whl", hash = "sha256:e4fd04394827eb49ae0d835f645ea03de1d9637a77acd5674a35890ae22abbef", size = 6260 }, + { url = "https://files.pythonhosted.org/packages/9a/5a/7fe15b55fa0ff5528643750c409cd14da005406aef312b32512d8a8487ab/alt_utils-0.0.8-py3-none-any.whl", hash = "sha256:af5549c49543ff4a02b735308bc2a5bfb7f20755620652fd969a648bbaecbc47", size = 6378 }, ] [[package]] @@ -22,7 +22,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "alt-utils", specifier = ">=0.0.6" }, + { name = "alt-utils", specifier = ">=0.0.8" }, { name = "psutil", specifier = ">=7.0.0" }, { name = "telethon", specifier = ">=1.40.0" }, ]