add cpu check

This commit is contained in:
Alex Tau 2025-05-10 22:43:29 +03:00
parent 8709b019ea
commit 5095057a13
12 changed files with 123 additions and 21 deletions

View file

@ -33,7 +33,7 @@ List of enabled check sets\. Each check set is a module which checks something a
*Type:* *Type:*
list of (one of “start”, “stop”, “temp”, “vulnix”) list of (one of “start”, “stop”, “temp”, “cpu”, “vulnix”)
@ -45,8 +45,50 @@ list of (one of “start”, “stop”, “temp”, “vulnix”)
## services\.lego-monitoring\.checks\.cpu\.criticalPercentage
CPU load percentage for a critical alert to be sent\. Null means never generate a CPU critical alert\.
*Type:*
null or floating point number
*Default:*
` 90.0 `
*Declared by:*
- [modules/options\.nix](../modules/options.nix)
## services\.lego-monitoring\.checks\.cpu\.warningPercentage
CPU load percentage for a warning alert is sent\. Null means never generate a CPU warning alert\.
*Type:*
null or floating point number
*Default:*
` 80.0 `
*Declared by:*
- [modules/options\.nix](../modules/options.nix)
## services\.lego-monitoring\.checks\.temp\.sensors ## services\.lego-monitoring\.checks\.temp\.sensors
Temp sensor override definitions\. Sensors not defined here, or missing options in definitions, will be read with default parameters\. Temp sensor override definitions\. Sensors not defined here, or missing options in definitions, will be read with default parameters\.
To get list of sensors and their default configurations, run ` lego-monitoring --print-temp `\. To get list of sensors and their default configurations, run ` lego-monitoring --print-temp `\.

View file

@ -11,7 +11,7 @@ package:
imports = [ imports = [
./options.nix ./options.nix
]; ];
config = let config = let
cfg = config.services.lego-monitoring; cfg = config.services.lego-monitoring;
json = pkgs.formats.json {}; json = pkgs.formats.json {};
@ -49,6 +49,11 @@ package:
}) cfg.checks.temp.sensors; }) cfg.checks.temp.sensors;
vulnix.whitelist_path = vulnixWhitelistFile; vulnix.whitelist_path = vulnixWhitelistFile;
cpu = with cfg.checks.cpu; {
warning_percentage = warningPercentage;
critical_percentage = criticalPercentage;
};
}; };
}; };
in lib.mkIf cfg.enable { in lib.mkIf cfg.enable {

View file

@ -16,6 +16,7 @@ in
"start" "start"
"stop" "stop"
"temp" "temp"
"cpu"
"vulnix" "vulnix"
]); ]);
default = [ ]; default = [ ];
@ -76,6 +77,19 @@ in
}''; }'';
}; };
}; };
cpu = {
warningPercentage = lib.mkOption {
type = lib.types.nullOr lib.types.float;
default = 80.0;
description = "CPU load percentage for a warning alert is sent. Null means never generate a CPU warning alert.";
};
criticalPercentage = lib.mkOption {
type = lib.types.nullOr lib.types.float;
default = 90.0;
description = "CPU load percentage for a critical alert to be sent. Null means never generate a CPU critical alert.";
};
};
}; };
}; };
} }

View file

@ -5,7 +5,7 @@ description = "Monitoring software for the lego server"
readme = "README.md" readme = "README.md"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [
"alt-utils>=0.0.7", "alt-utils>=0.0.8",
"psutil>=7.0.0", "psutil>=7.0.0",
"telethon>=1.40.0", "telethon>=1.40.0",
] ]

View file

@ -3,7 +3,6 @@ import asyncio
import datetime import datetime
import logging import logging
import signal import signal
import time
from . import checks from . import checks
from .alerting import alerts from .alerting import alerts
@ -57,6 +56,7 @@ async def async_main():
"stop": [], # this is checked later "stop": [], # this is checked later
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))], "temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
"vulnix": [interval_checker(checks.vulnix_check, datetime.timedelta(days=3))], "vulnix": [interval_checker(checks.vulnix_check, datetime.timedelta(days=3))],
"cpu": [interval_checker(checks.cpu_check, datetime.timedelta(minutes=5))],
} }
checkers = [] checkers = []

View file

@ -3,12 +3,12 @@ from enum import StrEnum
class AlertType(StrEnum): class AlertType(StrEnum):
BOOT = "BOOT" BOOT = "BOOT"
CPU = "CPU"
ERROR = "ERROR"
TEMP = "TEMP" TEMP = "TEMP"
TEST = "TEST" TEST = "TEST"
VULN = "VULN" VULN = "VULN"
ERROR = "ERROR"
# RAM = "RAM" # RAM = "RAM"
# CPU = "CPU"
# LOGIN = "LOGIN" # LOGIN = "LOGIN"
# SMART = "SMART" # TODO # SMART = "SMART" # TODO
# RAID = "RAID" # RAID = "RAID"

View file

@ -1,2 +1,3 @@
from .cpu import cpu_check
from .temp import temp_check from .temp import temp_check
from .vulnix import vulnix_check from .vulnix import vulnix_check

View file

@ -0,0 +1,30 @@
from psutil import cpu_percent
from lego_monitoring.alerting import alerts
from lego_monitoring.alerting.enum import AlertType, Severity
from lego_monitoring.core import cvars
IS_TESTING = False
def cpu_check() -> list[alerts.Alert]:
percentage = cpu_percent()
config = cvars.config.get().checks.cpu
if config.critical_percentage and (IS_TESTING or percentage > config.critical_percentage):
return [
alerts.Alert(
alert_type=AlertType.CPU,
message=f"CPU load: {percentage:.2f}% > {config.critical_percentage:.2f}%",
severity=Severity.CRITICAL,
)
]
elif config.warning_percentage and (IS_TESTING or percentage > config.warning_percentage):
return [
alerts.Alert(
alert_type=AlertType.CPU,
message=f"CPU load: {percentage:.2f}% > {config.warning_percentage:.2f}%",
severity=Severity.WARNING,
)
]
else:
return []

View file

@ -1,15 +1,17 @@
import json import json
from dataclasses import dataclass from dataclasses import dataclass, field
from typing import Optional from typing import Optional
from alt_utils import NestedDeserializableDataclass from alt_utils import NestedDeserializableDataclass
from .checks.cpu import CpuCheckConfig
from .checks.temp import TempCheckConfig from .checks.temp import TempCheckConfig
from .checks.vulnix import VulnixCheckConfig from .checks.vulnix import VulnixCheckConfig
@dataclass @dataclass
class ChecksConfig(NestedDeserializableDataclass): class ChecksConfig(NestedDeserializableDataclass):
cpu: Optional[CpuCheckConfig] = None
temp: Optional[TempCheckConfig] = None temp: Optional[TempCheckConfig] = None
vulnix: Optional[VulnixCheckConfig] = None vulnix: Optional[VulnixCheckConfig] = None
@ -22,9 +24,9 @@ class TelegramConfig:
@dataclass @dataclass
class Config(NestedDeserializableDataclass): class Config(NestedDeserializableDataclass):
enabled_check_sets: list[str]
checks: ChecksConfig checks: ChecksConfig
telegram: TelegramConfig telegram: TelegramConfig
enabled_check_sets: list[str] = field(default_factory=list)
def load_config(filepath: str) -> Config: def load_config(filepath: str) -> Config:

View file

@ -0,0 +1,8 @@
from dataclasses import dataclass
from typing import Optional
@dataclass
class CpuCheckConfig:
warning_percentage: Optional[float] = 80
critical_percentage: Optional[float] = 90

View file

@ -1,4 +1,4 @@
from dataclasses import dataclass from dataclasses import dataclass, field
from typing import Optional from typing import Optional
from alt_utils import NestedDeserializableDataclass from alt_utils import NestedDeserializableDataclass
@ -6,19 +6,19 @@ from alt_utils import NestedDeserializableDataclass
@dataclass @dataclass
class TempReadingConfig: class TempReadingConfig:
label: Optional[str] label: Optional[str] = None
enabled: bool enabled: bool = True
warning_temp: Optional[float] warning_temp: Optional[float] = None
critical_temp: Optional[float] critical_temp: Optional[float] = None
@dataclass @dataclass
class TempSensorConfig(NestedDeserializableDataclass): class TempSensorConfig(NestedDeserializableDataclass):
name: Optional[str] name: Optional[str] = None
enabled: bool enabled: bool = True
readings: dict[str, TempReadingConfig] readings: dict[str, TempReadingConfig] = field(default_factory=dict)
@dataclass @dataclass
class TempCheckConfig(NestedDeserializableDataclass): class TempCheckConfig(NestedDeserializableDataclass):
sensors: dict[str, TempSensorConfig] sensors: dict[str, TempSensorConfig] = field(default_factory=dict)

8
uv.lock generated
View file

@ -3,11 +3,11 @@ requires-python = ">=3.12"
[[package]] [[package]]
name = "alt-utils" name = "alt-utils"
version = "0.0.6" version = "0.0.8"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/22/d2/b4a3ea37f773696b07a545e8964c37e98e4939d5f8e3dae949d2cd4e4f53/alt_utils-0.0.6.tar.gz", hash = "sha256:91b8ca633238e819848e1f8b351892f4c148c7fddef120d5e966e3a0b5d06f81", size = 6001 } sdist = { url = "https://files.pythonhosted.org/packages/31/15/67246107a8c808a9e99b34fd0024bebe954a67f3c315821eae985b87db7f/alt_utils-0.0.8.tar.gz", hash = "sha256:4b2901df0be4af736210277d58e231d4c4bce597a8fc665a8dd3e7b582705081", size = 6103 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/27/0c963d6c64150e3fb2f98eb01773e2f9cf9b51f5b65632944bff67a68ec2/alt_utils-0.0.6-py3-none-any.whl", hash = "sha256:e4fd04394827eb49ae0d835f645ea03de1d9637a77acd5674a35890ae22abbef", size = 6260 }, { url = "https://files.pythonhosted.org/packages/9a/5a/7fe15b55fa0ff5528643750c409cd14da005406aef312b32512d8a8487ab/alt_utils-0.0.8-py3-none-any.whl", hash = "sha256:af5549c49543ff4a02b735308bc2a5bfb7f20755620652fd969a648bbaecbc47", size = 6378 },
] ]
[[package]] [[package]]
@ -22,7 +22,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "alt-utils", specifier = ">=0.0.6" }, { name = "alt-utils", specifier = ">=0.0.8" },
{ name = "psutil", specifier = ">=7.0.0" }, { name = "psutil", specifier = ">=7.0.0" },
{ name = "telethon", specifier = ">=1.40.0" }, { name = "telethon", specifier = ">=1.40.0" },
] ]