mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-09 20:31:10 +00:00
add cpu check
This commit is contained in:
parent
8709b019ea
commit
5095057a13
12 changed files with 123 additions and 21 deletions
|
|
@ -33,7 +33,7 @@ List of enabled check sets\. Each check set is a module which checks something a
|
||||||
|
|
||||||
|
|
||||||
*Type:*
|
*Type:*
|
||||||
list of (one of “start”, “stop”, “temp”, “vulnix”)
|
list of (one of “start”, “stop”, “temp”, “cpu”, “vulnix”)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -45,8 +45,50 @@ list of (one of “start”, “stop”, “temp”, “vulnix”)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## services\.lego-monitoring\.checks\.cpu\.criticalPercentage
|
||||||
|
|
||||||
|
CPU load percentage for a critical alert to be sent\. Null means never generate a CPU critical alert\.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*Type:*
|
||||||
|
null or floating point number
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*Default:*
|
||||||
|
` 90.0 `
|
||||||
|
|
||||||
|
*Declared by:*
|
||||||
|
- [modules/options\.nix](../modules/options.nix)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## services\.lego-monitoring\.checks\.cpu\.warningPercentage
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CPU load percentage for a warning alert is sent\. Null means never generate a CPU warning alert\.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*Type:*
|
||||||
|
null or floating point number
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*Default:*
|
||||||
|
` 80.0 `
|
||||||
|
|
||||||
|
*Declared by:*
|
||||||
|
- [modules/options\.nix](../modules/options.nix)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## services\.lego-monitoring\.checks\.temp\.sensors
|
## services\.lego-monitoring\.checks\.temp\.sensors
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Temp sensor override definitions\. Sensors not defined here, or missing options in definitions, will be read with default parameters\.
|
Temp sensor override definitions\. Sensors not defined here, or missing options in definitions, will be read with default parameters\.
|
||||||
|
|
||||||
To get list of sensors and their default configurations, run ` lego-monitoring --print-temp `\.
|
To get list of sensors and their default configurations, run ` lego-monitoring --print-temp `\.
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ package:
|
||||||
imports = [
|
imports = [
|
||||||
./options.nix
|
./options.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
config = let
|
config = let
|
||||||
cfg = config.services.lego-monitoring;
|
cfg = config.services.lego-monitoring;
|
||||||
json = pkgs.formats.json {};
|
json = pkgs.formats.json {};
|
||||||
|
|
@ -49,6 +49,11 @@ package:
|
||||||
}) cfg.checks.temp.sensors;
|
}) cfg.checks.temp.sensors;
|
||||||
|
|
||||||
vulnix.whitelist_path = vulnixWhitelistFile;
|
vulnix.whitelist_path = vulnixWhitelistFile;
|
||||||
|
|
||||||
|
cpu = with cfg.checks.cpu; {
|
||||||
|
warning_percentage = warningPercentage;
|
||||||
|
critical_percentage = criticalPercentage;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
in lib.mkIf cfg.enable {
|
in lib.mkIf cfg.enable {
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ in
|
||||||
"start"
|
"start"
|
||||||
"stop"
|
"stop"
|
||||||
"temp"
|
"temp"
|
||||||
|
"cpu"
|
||||||
"vulnix"
|
"vulnix"
|
||||||
]);
|
]);
|
||||||
default = [ ];
|
default = [ ];
|
||||||
|
|
@ -76,6 +77,19 @@ in
|
||||||
}'';
|
}'';
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
cpu = {
|
||||||
|
warningPercentage = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.float;
|
||||||
|
default = 80.0;
|
||||||
|
description = "CPU load percentage for a warning alert is sent. Null means never generate a CPU warning alert.";
|
||||||
|
};
|
||||||
|
criticalPercentage = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.float;
|
||||||
|
default = 90.0;
|
||||||
|
description = "CPU load percentage for a critical alert to be sent. Null means never generate a CPU critical alert.";
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ description = "Monitoring software for the lego server"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"alt-utils>=0.0.7",
|
"alt-utils>=0.0.8",
|
||||||
"psutil>=7.0.0",
|
"psutil>=7.0.0",
|
||||||
"telethon>=1.40.0",
|
"telethon>=1.40.0",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import signal
|
import signal
|
||||||
import time
|
|
||||||
|
|
||||||
from . import checks
|
from . import checks
|
||||||
from .alerting import alerts
|
from .alerting import alerts
|
||||||
|
|
@ -57,6 +56,7 @@ async def async_main():
|
||||||
"stop": [], # this is checked later
|
"stop": [], # this is checked later
|
||||||
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
|
"temp": [interval_checker(checks.temp_check, datetime.timedelta(minutes=5))],
|
||||||
"vulnix": [interval_checker(checks.vulnix_check, datetime.timedelta(days=3))],
|
"vulnix": [interval_checker(checks.vulnix_check, datetime.timedelta(days=3))],
|
||||||
|
"cpu": [interval_checker(checks.cpu_check, datetime.timedelta(minutes=5))],
|
||||||
}
|
}
|
||||||
|
|
||||||
checkers = []
|
checkers = []
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,12 @@ from enum import StrEnum
|
||||||
|
|
||||||
class AlertType(StrEnum):
|
class AlertType(StrEnum):
|
||||||
BOOT = "BOOT"
|
BOOT = "BOOT"
|
||||||
|
CPU = "CPU"
|
||||||
|
ERROR = "ERROR"
|
||||||
TEMP = "TEMP"
|
TEMP = "TEMP"
|
||||||
TEST = "TEST"
|
TEST = "TEST"
|
||||||
VULN = "VULN"
|
VULN = "VULN"
|
||||||
ERROR = "ERROR"
|
|
||||||
# RAM = "RAM"
|
# RAM = "RAM"
|
||||||
# CPU = "CPU"
|
|
||||||
# LOGIN = "LOGIN"
|
# LOGIN = "LOGIN"
|
||||||
# SMART = "SMART" # TODO
|
# SMART = "SMART" # TODO
|
||||||
# RAID = "RAID"
|
# RAID = "RAID"
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,3 @@
|
||||||
|
from .cpu import cpu_check
|
||||||
from .temp import temp_check
|
from .temp import temp_check
|
||||||
from .vulnix import vulnix_check
|
from .vulnix import vulnix_check
|
||||||
|
|
|
||||||
30
src/lego_monitoring/checks/cpu.py
Normal file
30
src/lego_monitoring/checks/cpu.py
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
from psutil import cpu_percent
|
||||||
|
|
||||||
|
from lego_monitoring.alerting import alerts
|
||||||
|
from lego_monitoring.alerting.enum import AlertType, Severity
|
||||||
|
from lego_monitoring.core import cvars
|
||||||
|
|
||||||
|
IS_TESTING = False
|
||||||
|
|
||||||
|
|
||||||
|
def cpu_check() -> list[alerts.Alert]:
|
||||||
|
percentage = cpu_percent()
|
||||||
|
config = cvars.config.get().checks.cpu
|
||||||
|
if config.critical_percentage and (IS_TESTING or percentage > config.critical_percentage):
|
||||||
|
return [
|
||||||
|
alerts.Alert(
|
||||||
|
alert_type=AlertType.CPU,
|
||||||
|
message=f"CPU load: {percentage:.2f}% > {config.critical_percentage:.2f}%",
|
||||||
|
severity=Severity.CRITICAL,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
elif config.warning_percentage and (IS_TESTING or percentage > config.warning_percentage):
|
||||||
|
return [
|
||||||
|
alerts.Alert(
|
||||||
|
alert_type=AlertType.CPU,
|
||||||
|
message=f"CPU load: {percentage:.2f}% > {config.warning_percentage:.2f}%",
|
||||||
|
severity=Severity.WARNING,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
@ -1,15 +1,17 @@
|
||||||
import json
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from alt_utils import NestedDeserializableDataclass
|
from alt_utils import NestedDeserializableDataclass
|
||||||
|
|
||||||
|
from .checks.cpu import CpuCheckConfig
|
||||||
from .checks.temp import TempCheckConfig
|
from .checks.temp import TempCheckConfig
|
||||||
from .checks.vulnix import VulnixCheckConfig
|
from .checks.vulnix import VulnixCheckConfig
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ChecksConfig(NestedDeserializableDataclass):
|
class ChecksConfig(NestedDeserializableDataclass):
|
||||||
|
cpu: Optional[CpuCheckConfig] = None
|
||||||
temp: Optional[TempCheckConfig] = None
|
temp: Optional[TempCheckConfig] = None
|
||||||
vulnix: Optional[VulnixCheckConfig] = None
|
vulnix: Optional[VulnixCheckConfig] = None
|
||||||
|
|
||||||
|
|
@ -22,9 +24,9 @@ class TelegramConfig:
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config(NestedDeserializableDataclass):
|
class Config(NestedDeserializableDataclass):
|
||||||
enabled_check_sets: list[str]
|
|
||||||
checks: ChecksConfig
|
checks: ChecksConfig
|
||||||
telegram: TelegramConfig
|
telegram: TelegramConfig
|
||||||
|
enabled_check_sets: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
def load_config(filepath: str) -> Config:
|
def load_config(filepath: str) -> Config:
|
||||||
|
|
|
||||||
8
src/lego_monitoring/config/checks/cpu.py
Normal file
8
src/lego_monitoring/config/checks/cpu.py
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CpuCheckConfig:
|
||||||
|
warning_percentage: Optional[float] = 80
|
||||||
|
critical_percentage: Optional[float] = 90
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from alt_utils import NestedDeserializableDataclass
|
from alt_utils import NestedDeserializableDataclass
|
||||||
|
|
@ -6,19 +6,19 @@ from alt_utils import NestedDeserializableDataclass
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TempReadingConfig:
|
class TempReadingConfig:
|
||||||
label: Optional[str]
|
label: Optional[str] = None
|
||||||
enabled: bool
|
enabled: bool = True
|
||||||
warning_temp: Optional[float]
|
warning_temp: Optional[float] = None
|
||||||
critical_temp: Optional[float]
|
critical_temp: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TempSensorConfig(NestedDeserializableDataclass):
|
class TempSensorConfig(NestedDeserializableDataclass):
|
||||||
name: Optional[str]
|
name: Optional[str] = None
|
||||||
enabled: bool
|
enabled: bool = True
|
||||||
readings: dict[str, TempReadingConfig]
|
readings: dict[str, TempReadingConfig] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TempCheckConfig(NestedDeserializableDataclass):
|
class TempCheckConfig(NestedDeserializableDataclass):
|
||||||
sensors: dict[str, TempSensorConfig]
|
sensors: dict[str, TempSensorConfig] = field(default_factory=dict)
|
||||||
|
|
|
||||||
8
uv.lock
generated
8
uv.lock
generated
|
|
@ -3,11 +3,11 @@ requires-python = ">=3.12"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alt-utils"
|
name = "alt-utils"
|
||||||
version = "0.0.6"
|
version = "0.0.8"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/22/d2/b4a3ea37f773696b07a545e8964c37e98e4939d5f8e3dae949d2cd4e4f53/alt_utils-0.0.6.tar.gz", hash = "sha256:91b8ca633238e819848e1f8b351892f4c148c7fddef120d5e966e3a0b5d06f81", size = 6001 }
|
sdist = { url = "https://files.pythonhosted.org/packages/31/15/67246107a8c808a9e99b34fd0024bebe954a67f3c315821eae985b87db7f/alt_utils-0.0.8.tar.gz", hash = "sha256:4b2901df0be4af736210277d58e231d4c4bce597a8fc665a8dd3e7b582705081", size = 6103 }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/c1/27/0c963d6c64150e3fb2f98eb01773e2f9cf9b51f5b65632944bff67a68ec2/alt_utils-0.0.6-py3-none-any.whl", hash = "sha256:e4fd04394827eb49ae0d835f645ea03de1d9637a77acd5674a35890ae22abbef", size = 6260 },
|
{ url = "https://files.pythonhosted.org/packages/9a/5a/7fe15b55fa0ff5528643750c409cd14da005406aef312b32512d8a8487ab/alt_utils-0.0.8-py3-none-any.whl", hash = "sha256:af5549c49543ff4a02b735308bc2a5bfb7f20755620652fd969a648bbaecbc47", size = 6378 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -22,7 +22,7 @@ dependencies = [
|
||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "alt-utils", specifier = ">=0.0.6" },
|
{ name = "alt-utils", specifier = ">=0.0.8" },
|
||||||
{ name = "psutil", specifier = ">=7.0.0" },
|
{ name = "psutil", specifier = ">=7.0.0" },
|
||||||
{ name = "telethon", specifier = ">=1.40.0" },
|
{ name = "telethon", specifier = ">=1.40.0" },
|
||||||
]
|
]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue