add cpu check

This commit is contained in:
Alex Tau 2025-05-10 22:43:29 +03:00
parent 8709b019ea
commit 5095057a13
12 changed files with 123 additions and 21 deletions

View file

@ -33,7 +33,7 @@ List of enabled check sets\. Each check set is a module which checks something a
*Type:*
list of (one of “start”, “stop”, “temp”, “vulnix”)
list of (one of “start”, “stop”, “temp”, “cpu”, “vulnix”)
@ -45,8 +45,50 @@ list of (one of “start”, “stop”, “temp”, “vulnix”)
## services\.lego-monitoring\.checks\.cpu\.criticalPercentage
CPU load percentage for a critical alert to be sent\. Null means never generate a CPU critical alert\.
*Type:*
null or floating point number
*Default:*
` 90.0 `
*Declared by:*
- [modules/options\.nix](../modules/options.nix)
## services\.lego-monitoring\.checks\.cpu\.warningPercentage
CPU load percentage for a warning alert is sent\. Null means never generate a CPU warning alert\.
*Type:*
null or floating point number
*Default:*
` 80.0 `
*Declared by:*
- [modules/options\.nix](../modules/options.nix)
## services\.lego-monitoring\.checks\.temp\.sensors
Temp sensor override definitions\. Sensors not defined here, or missing options in definitions, will be read with default parameters\.
To get list of sensors and their default configurations, run ` lego-monitoring --print-temp `\.

View file

@ -49,6 +49,11 @@ package:
}) cfg.checks.temp.sensors;
vulnix.whitelist_path = vulnixWhitelistFile;
cpu = with cfg.checks.cpu; {
warning_percentage = warningPercentage;
critical_percentage = criticalPercentage;
};
};
};
in lib.mkIf cfg.enable {

View file

@ -16,6 +16,7 @@ in
"start"
"stop"
"temp"
"cpu"
"vulnix"
]);
default = [ ];
@ -76,6 +77,19 @@ in
}'';
};
};
cpu = {
warningPercentage = lib.mkOption {
type = lib.types.nullOr lib.types.float;
default = 80.0;
description = "CPU load percentage for a warning alert is sent. Null means never generate a CPU warning alert.";
};
criticalPercentage = lib.mkOption {
type = lib.types.nullOr lib.types.float;
default = 90.0;
description = "CPU load percentage for a critical alert to be sent. Null means never generate a CPU critical alert.";
};
};
};
};
}

View file

@ -5,7 +5,7 @@ description = "Monitoring software for the lego server"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"alt-utils>=0.0.7",
"alt-utils>=0.0.8",
"psutil>=7.0.0",
"telethon>=1.40.0",
]

View file

@ -3,7 +3,6 @@ import asyncio
import datetime
import logging
import signal
import time
from . import checks
from .alerting import alerts
@ -57,6 +56,7 @@ async def async_main():
"stop": [], # this is checked later
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
"vulnix": [interval_checker(checks.vulnix_check, datetime.timedelta(days=3))],
"cpu": [interval_checker(checks.cpu_check, datetime.timedelta(minutes=5))],
}
checkers = []

View file

@ -3,12 +3,12 @@ from enum import StrEnum
class AlertType(StrEnum):
BOOT = "BOOT"
CPU = "CPU"
ERROR = "ERROR"
TEMP = "TEMP"
TEST = "TEST"
VULN = "VULN"
ERROR = "ERROR"
# RAM = "RAM"
# CPU = "CPU"
# LOGIN = "LOGIN"
# SMART = "SMART" # TODO
# RAID = "RAID"

View file

@ -1,2 +1,3 @@
from .cpu import cpu_check
from .temp import temp_check
from .vulnix import vulnix_check

View file

@ -0,0 +1,30 @@
from psutil import cpu_percent
from lego_monitoring.alerting import alerts
from lego_monitoring.alerting.enum import AlertType, Severity
from lego_monitoring.core import cvars
IS_TESTING = False
def cpu_check() -> list[alerts.Alert]:
percentage = cpu_percent()
config = cvars.config.get().checks.cpu
if config.critical_percentage and (IS_TESTING or percentage > config.critical_percentage):
return [
alerts.Alert(
alert_type=AlertType.CPU,
message=f"CPU load: {percentage:.2f}% > {config.critical_percentage:.2f}%",
severity=Severity.CRITICAL,
)
]
elif config.warning_percentage and (IS_TESTING or percentage > config.warning_percentage):
return [
alerts.Alert(
alert_type=AlertType.CPU,
message=f"CPU load: {percentage:.2f}% > {config.warning_percentage:.2f}%",
severity=Severity.WARNING,
)
]
else:
return []

View file

@ -1,15 +1,17 @@
import json
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Optional
from alt_utils import NestedDeserializableDataclass
from .checks.cpu import CpuCheckConfig
from .checks.temp import TempCheckConfig
from .checks.vulnix import VulnixCheckConfig
@dataclass
class ChecksConfig(NestedDeserializableDataclass):
cpu: Optional[CpuCheckConfig] = None
temp: Optional[TempCheckConfig] = None
vulnix: Optional[VulnixCheckConfig] = None
@ -22,9 +24,9 @@ class TelegramConfig:
@dataclass
class Config(NestedDeserializableDataclass):
enabled_check_sets: list[str]
checks: ChecksConfig
telegram: TelegramConfig
enabled_check_sets: list[str] = field(default_factory=list)
def load_config(filepath: str) -> Config:

View file

@ -0,0 +1,8 @@
from dataclasses import dataclass
from typing import Optional
@dataclass
class CpuCheckConfig:
warning_percentage: Optional[float] = 80
critical_percentage: Optional[float] = 90

View file

@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Optional
from alt_utils import NestedDeserializableDataclass
@ -6,19 +6,19 @@ from alt_utils import NestedDeserializableDataclass
@dataclass
class TempReadingConfig:
label: Optional[str]
enabled: bool
warning_temp: Optional[float]
critical_temp: Optional[float]
label: Optional[str] = None
enabled: bool = True
warning_temp: Optional[float] = None
critical_temp: Optional[float] = None
@dataclass
class TempSensorConfig(NestedDeserializableDataclass):
name: Optional[str]
enabled: bool
readings: dict[str, TempReadingConfig]
name: Optional[str] = None
enabled: bool = True
readings: dict[str, TempReadingConfig] = field(default_factory=dict)
@dataclass
class TempCheckConfig(NestedDeserializableDataclass):
sensors: dict[str, TempSensorConfig]
sensors: dict[str, TempSensorConfig] = field(default_factory=dict)

8
uv.lock generated
View file

@ -3,11 +3,11 @@ requires-python = ">=3.12"
[[package]]
name = "alt-utils"
version = "0.0.6"
version = "0.0.8"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/22/d2/b4a3ea37f773696b07a545e8964c37e98e4939d5f8e3dae949d2cd4e4f53/alt_utils-0.0.6.tar.gz", hash = "sha256:91b8ca633238e819848e1f8b351892f4c148c7fddef120d5e966e3a0b5d06f81", size = 6001 }
sdist = { url = "https://files.pythonhosted.org/packages/31/15/67246107a8c808a9e99b34fd0024bebe954a67f3c315821eae985b87db7f/alt_utils-0.0.8.tar.gz", hash = "sha256:4b2901df0be4af736210277d58e231d4c4bce597a8fc665a8dd3e7b582705081", size = 6103 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/27/0c963d6c64150e3fb2f98eb01773e2f9cf9b51f5b65632944bff67a68ec2/alt_utils-0.0.6-py3-none-any.whl", hash = "sha256:e4fd04394827eb49ae0d835f645ea03de1d9637a77acd5674a35890ae22abbef", size = 6260 },
{ url = "https://files.pythonhosted.org/packages/9a/5a/7fe15b55fa0ff5528643750c409cd14da005406aef312b32512d8a8487ab/alt_utils-0.0.8-py3-none-any.whl", hash = "sha256:af5549c49543ff4a02b735308bc2a5bfb7f20755620652fd969a648bbaecbc47", size = 6378 },
]
[[package]]
@ -22,7 +22,7 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "alt-utils", specifier = ">=0.0.6" },
{ name = "alt-utils", specifier = ">=0.0.8" },
{ name = "psutil", specifier = ">=7.0.0" },
{ name = "telethon", specifier = ">=1.40.0" },
]