mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-09 20:31:10 +00:00
add cpu check
This commit is contained in:
parent
8709b019ea
commit
5095057a13
12 changed files with 123 additions and 21 deletions
|
|
@ -33,7 +33,7 @@ List of enabled check sets\. Each check set is a module which checks something a
|
|||
|
||||
|
||||
*Type:*
|
||||
list of (one of “start”, “stop”, “temp”, “vulnix”)
|
||||
list of (one of “start”, “stop”, “temp”, “cpu”, “vulnix”)
|
||||
|
||||
|
||||
|
||||
|
|
@ -45,8 +45,50 @@ list of (one of “start”, “stop”, “temp”, “vulnix”)
|
|||
|
||||
|
||||
|
||||
## services\.lego-monitoring\.checks\.cpu\.criticalPercentage
|
||||
|
||||
CPU load percentage for a critical alert to be sent\. Null means never generate a CPU critical alert\.
|
||||
|
||||
|
||||
|
||||
*Type:*
|
||||
null or floating point number
|
||||
|
||||
|
||||
|
||||
*Default:*
|
||||
` 90.0 `
|
||||
|
||||
*Declared by:*
|
||||
- [modules/options\.nix](../modules/options.nix)
|
||||
|
||||
|
||||
|
||||
## services\.lego-monitoring\.checks\.cpu\.warningPercentage
|
||||
|
||||
|
||||
|
||||
CPU load percentage for a warning alert is sent\. Null means never generate a CPU warning alert\.
|
||||
|
||||
|
||||
|
||||
*Type:*
|
||||
null or floating point number
|
||||
|
||||
|
||||
|
||||
*Default:*
|
||||
` 80.0 `
|
||||
|
||||
*Declared by:*
|
||||
- [modules/options\.nix](../modules/options.nix)
|
||||
|
||||
|
||||
|
||||
## services\.lego-monitoring\.checks\.temp\.sensors
|
||||
|
||||
|
||||
|
||||
Temp sensor override definitions\. Sensors not defined here, or missing options in definitions, will be read with default parameters\.
|
||||
|
||||
To get list of sensors and their default configurations, run ` lego-monitoring --print-temp `\.
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ package:
|
|||
imports = [
|
||||
./options.nix
|
||||
];
|
||||
|
||||
|
||||
config = let
|
||||
cfg = config.services.lego-monitoring;
|
||||
json = pkgs.formats.json {};
|
||||
|
|
@ -49,6 +49,11 @@ package:
|
|||
}) cfg.checks.temp.sensors;
|
||||
|
||||
vulnix.whitelist_path = vulnixWhitelistFile;
|
||||
|
||||
cpu = with cfg.checks.cpu; {
|
||||
warning_percentage = warningPercentage;
|
||||
critical_percentage = criticalPercentage;
|
||||
};
|
||||
};
|
||||
};
|
||||
in lib.mkIf cfg.enable {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ in
|
|||
"start"
|
||||
"stop"
|
||||
"temp"
|
||||
"cpu"
|
||||
"vulnix"
|
||||
]);
|
||||
default = [ ];
|
||||
|
|
@ -76,6 +77,19 @@ in
|
|||
}'';
|
||||
};
|
||||
};
|
||||
|
||||
cpu = {
|
||||
warningPercentage = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.float;
|
||||
default = 80.0;
|
||||
description = "CPU load percentage for a warning alert is sent. Null means never generate a CPU warning alert.";
|
||||
};
|
||||
criticalPercentage = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.float;
|
||||
default = 90.0;
|
||||
description = "CPU load percentage for a critical alert to be sent. Null means never generate a CPU critical alert.";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ description = "Monitoring software for the lego server"
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"alt-utils>=0.0.7",
|
||||
"alt-utils>=0.0.8",
|
||||
"psutil>=7.0.0",
|
||||
"telethon>=1.40.0",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import asyncio
|
|||
import datetime
|
||||
import logging
|
||||
import signal
|
||||
import time
|
||||
|
||||
from . import checks
|
||||
from .alerting import alerts
|
||||
|
|
@ -57,6 +56,7 @@ async def async_main():
|
|||
"stop": [], # this is checked later
|
||||
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
|
||||
"vulnix": [interval_checker(checks.vulnix_check, datetime.timedelta(days=3))],
|
||||
"cpu": [interval_checker(checks.cpu_check, datetime.timedelta(minutes=5))],
|
||||
}
|
||||
|
||||
checkers = []
|
||||
|
|
|
|||
|
|
@ -3,12 +3,12 @@ from enum import StrEnum
|
|||
|
||||
class AlertType(StrEnum):
|
||||
BOOT = "BOOT"
|
||||
CPU = "CPU"
|
||||
ERROR = "ERROR"
|
||||
TEMP = "TEMP"
|
||||
TEST = "TEST"
|
||||
VULN = "VULN"
|
||||
ERROR = "ERROR"
|
||||
# RAM = "RAM"
|
||||
# CPU = "CPU"
|
||||
# LOGIN = "LOGIN"
|
||||
# SMART = "SMART" # TODO
|
||||
# RAID = "RAID"
|
||||
|
|
|
|||
|
|
@ -1,2 +1,3 @@
|
|||
from .cpu import cpu_check
|
||||
from .temp import temp_check
|
||||
from .vulnix import vulnix_check
|
||||
|
|
|
|||
30
src/lego_monitoring/checks/cpu.py
Normal file
30
src/lego_monitoring/checks/cpu.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
from psutil import cpu_percent
|
||||
|
||||
from lego_monitoring.alerting import alerts
|
||||
from lego_monitoring.alerting.enum import AlertType, Severity
|
||||
from lego_monitoring.core import cvars
|
||||
|
||||
IS_TESTING = False
|
||||
|
||||
|
||||
def cpu_check() -> list[alerts.Alert]:
|
||||
percentage = cpu_percent()
|
||||
config = cvars.config.get().checks.cpu
|
||||
if config.critical_percentage and (IS_TESTING or percentage > config.critical_percentage):
|
||||
return [
|
||||
alerts.Alert(
|
||||
alert_type=AlertType.CPU,
|
||||
message=f"CPU load: {percentage:.2f}% > {config.critical_percentage:.2f}%",
|
||||
severity=Severity.CRITICAL,
|
||||
)
|
||||
]
|
||||
elif config.warning_percentage and (IS_TESTING or percentage > config.warning_percentage):
|
||||
return [
|
||||
alerts.Alert(
|
||||
alert_type=AlertType.CPU,
|
||||
message=f"CPU load: {percentage:.2f}% > {config.warning_percentage:.2f}%",
|
||||
severity=Severity.WARNING,
|
||||
)
|
||||
]
|
||||
else:
|
||||
return []
|
||||
|
|
@ -1,15 +1,17 @@
|
|||
import json
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from alt_utils import NestedDeserializableDataclass
|
||||
|
||||
from .checks.cpu import CpuCheckConfig
|
||||
from .checks.temp import TempCheckConfig
|
||||
from .checks.vulnix import VulnixCheckConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChecksConfig(NestedDeserializableDataclass):
|
||||
cpu: Optional[CpuCheckConfig] = None
|
||||
temp: Optional[TempCheckConfig] = None
|
||||
vulnix: Optional[VulnixCheckConfig] = None
|
||||
|
||||
|
|
@ -22,9 +24,9 @@ class TelegramConfig:
|
|||
|
||||
@dataclass
|
||||
class Config(NestedDeserializableDataclass):
|
||||
enabled_check_sets: list[str]
|
||||
checks: ChecksConfig
|
||||
telegram: TelegramConfig
|
||||
enabled_check_sets: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def load_config(filepath: str) -> Config:
|
||||
|
|
|
|||
8
src/lego_monitoring/config/checks/cpu.py
Normal file
8
src/lego_monitoring/config/checks/cpu.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class CpuCheckConfig:
|
||||
warning_percentage: Optional[float] = 80
|
||||
critical_percentage: Optional[float] = 90
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from alt_utils import NestedDeserializableDataclass
|
||||
|
|
@ -6,19 +6,19 @@ from alt_utils import NestedDeserializableDataclass
|
|||
|
||||
@dataclass
|
||||
class TempReadingConfig:
|
||||
label: Optional[str]
|
||||
enabled: bool
|
||||
warning_temp: Optional[float]
|
||||
critical_temp: Optional[float]
|
||||
label: Optional[str] = None
|
||||
enabled: bool = True
|
||||
warning_temp: Optional[float] = None
|
||||
critical_temp: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempSensorConfig(NestedDeserializableDataclass):
|
||||
name: Optional[str]
|
||||
enabled: bool
|
||||
readings: dict[str, TempReadingConfig]
|
||||
name: Optional[str] = None
|
||||
enabled: bool = True
|
||||
readings: dict[str, TempReadingConfig] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TempCheckConfig(NestedDeserializableDataclass):
|
||||
sensors: dict[str, TempSensorConfig]
|
||||
sensors: dict[str, TempSensorConfig] = field(default_factory=dict)
|
||||
|
|
|
|||
8
uv.lock
generated
8
uv.lock
generated
|
|
@ -3,11 +3,11 @@ requires-python = ">=3.12"
|
|||
|
||||
[[package]]
|
||||
name = "alt-utils"
|
||||
version = "0.0.6"
|
||||
version = "0.0.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/22/d2/b4a3ea37f773696b07a545e8964c37e98e4939d5f8e3dae949d2cd4e4f53/alt_utils-0.0.6.tar.gz", hash = "sha256:91b8ca633238e819848e1f8b351892f4c148c7fddef120d5e966e3a0b5d06f81", size = 6001 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/31/15/67246107a8c808a9e99b34fd0024bebe954a67f3c315821eae985b87db7f/alt_utils-0.0.8.tar.gz", hash = "sha256:4b2901df0be4af736210277d58e231d4c4bce597a8fc665a8dd3e7b582705081", size = 6103 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/27/0c963d6c64150e3fb2f98eb01773e2f9cf9b51f5b65632944bff67a68ec2/alt_utils-0.0.6-py3-none-any.whl", hash = "sha256:e4fd04394827eb49ae0d835f645ea03de1d9637a77acd5674a35890ae22abbef", size = 6260 },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/5a/7fe15b55fa0ff5528643750c409cd14da005406aef312b32512d8a8487ab/alt_utils-0.0.8-py3-none-any.whl", hash = "sha256:af5549c49543ff4a02b735308bc2a5bfb7f20755620652fd969a648bbaecbc47", size = 6378 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -22,7 +22,7 @@ dependencies = [
|
|||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "alt-utils", specifier = ">=0.0.6" },
|
||||
{ name = "alt-utils", specifier = ">=0.0.8" },
|
||||
{ name = "psutil", specifier = ">=7.0.0" },
|
||||
{ name = "telethon", specifier = ">=1.40.0" },
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue