mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
Merge branch 'ram-cpu-alerts' into 'main'
ram and cpu monitoring See merge request lego/lego-monitoring!1
This commit is contained in:
commit
56ebed516e
5 changed files with 106 additions and 9 deletions
|
|
@ -11,8 +11,8 @@ from alerting.common import CREDS_FILE, ROOM_ID
|
|||
|
||||
class AlertType(StrEnum):
|
||||
TEST = "TEST"
|
||||
RAM = "RAM" # TODO
|
||||
CPU = "CPU" # TODO
|
||||
RAM = "RAM"
|
||||
CPU = "CPU"
|
||||
TEMP = "TEMP"
|
||||
LOGIN = "LOGIN" # TODO
|
||||
SMART = "SMART" # TODO
|
||||
|
|
|
|||
|
|
@ -1,25 +1,67 @@
|
|||
from alerting import alerts
|
||||
from misc import sensors
|
||||
|
||||
IS_TESTING = False
|
||||
|
||||
def temp_check() -> set[alerts.Alert]:
|
||||
|
||||
def temp_check() -> list[alerts.Alert]:
|
||||
alert_list = []
|
||||
temps = sensors.Sensors.get_temperatures()
|
||||
for _, sensor_list in temps.items():
|
||||
for sensor in sensor_list:
|
||||
if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp:
|
||||
if sensor.sensor_type == "nct6687":
|
||||
continue # little valuable info and too low limits there, might as well ignore it
|
||||
if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp):
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("TEMP"),
|
||||
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp:
|
||||
elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp):
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("TEMP"),
|
||||
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C",
|
||||
severity=alerts.Severity.WARNING,
|
||||
)
|
||||
else:
|
||||
continue
|
||||
alert_list.append(alert)
|
||||
return alert_list
|
||||
|
||||
|
||||
def cpu_check() -> list[alerts.Alert]:
|
||||
sensor = sensors.Sensors.get_cpu()
|
||||
if IS_TESTING or sensor.current_load > sensor.critical_load:
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("CPU"),
|
||||
message=f"{sensor.current_load}% > {sensor.critical_load}%",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
elif IS_TESTING or sensor.current_load > sensor.highest_load:
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("CPU"),
|
||||
message=f"{sensor.current_load}% > {sensor.highest_load}%",
|
||||
severity=alerts.Severity.WARNING,
|
||||
)
|
||||
else:
|
||||
return []
|
||||
return [alert]
|
||||
|
||||
|
||||
def ram_check() -> list[alerts.Alert]:
|
||||
sensor = sensors.Sensors.get_ram()
|
||||
if IS_TESTING or sensor.current_avail < sensor.critical_avail:
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("RAM"),
|
||||
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
elif IS_TESTING or sensor.current_avail < sensor.warning_avail:
|
||||
alert = alerts.Alert(
|
||||
alert_type=alerts.AlertType("RAM"),
|
||||
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB",
|
||||
severity=alerts.Severity.WARNING,
|
||||
)
|
||||
else:
|
||||
return []
|
||||
return [alert]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
from psutil import sensors_temperatures
|
||||
from psutil import cpu_percent, sensors_temperatures, virtual_memory
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -12,6 +12,21 @@ class TemperatureSensor:
|
|||
critical_temp: float | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class CpuSensor:
|
||||
current_load: float
|
||||
highest_load: float = 90
|
||||
critical_load: float = 95
|
||||
|
||||
|
||||
@dataclass
|
||||
class RamSensor:
|
||||
current_avail: int
|
||||
current_avail_percentage: float
|
||||
warning_avail: int = 4 * 1024**3
|
||||
critical_avail: int = 2 * 1024**3
|
||||
|
||||
|
||||
class Sensors:
|
||||
@staticmethod
|
||||
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
||||
|
|
@ -51,9 +66,39 @@ class Sensors:
|
|||
critical_temp=95.0, # hardcoded because we have R9 7900X
|
||||
)
|
||||
)
|
||||
case "nct6687":
|
||||
lables = {
|
||||
"AMD TSI Addr 98h": "CPU",
|
||||
"Diode 0 (curr)": "System",
|
||||
"Thermistor 15": "VRM MOSFET",
|
||||
"Thermistor 1": "Platform Controller Hub (Peripherals)",
|
||||
"Thermistor 16": "CPU Socket",
|
||||
}
|
||||
|
||||
for sensor in sensors[:-2]:
|
||||
real_label = lables[sensor.label]
|
||||
temp_sensors[s_type].append(
|
||||
TemperatureSensor(
|
||||
sensor_type=s_type,
|
||||
sensor_label=real_label,
|
||||
current_temp=sensor.current,
|
||||
highest_temp=sensor.high or None,
|
||||
critical_temp=sensor.critical or None,
|
||||
)
|
||||
)
|
||||
|
||||
return temp_sensors
|
||||
|
||||
@staticmethod
|
||||
def get_cpu() -> CpuSensor:
|
||||
return CpuSensor(current_load=cpu_percent())
|
||||
|
||||
@staticmethod
|
||||
def get_ram() -> RamSensor:
|
||||
ram = virtual_memory()
|
||||
return RamSensor(current_avail=ram.available,
|
||||
current_avail_percentage=ram.percent)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for i in Sensors.get_temperatures():
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from colorama import Back, Fore, Style
|
||||
from colorama import Back, Style
|
||||
|
||||
from misc.sensors import Sensors
|
||||
|
||||
|
|
@ -14,6 +14,12 @@ def pretty_print():
|
|||
for sensors in v:
|
||||
print(f"{sensors.sensor_label}: {sensors.current_temp}°C")
|
||||
|
||||
print()
|
||||
s = Sensors.get_cpu()
|
||||
print(f"Used CPU: {s.current_load}%")
|
||||
s = Sensors.get_ram()
|
||||
print(f"Available RAM: {(s.current_avail / 1024**3):.2f} ({s.current_avail_percentage}%) GiB")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pretty_print()
|
||||
|
|
|
|||
|
|
@ -39,7 +39,11 @@ async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.A
|
|||
async def main():
|
||||
signal.signal(signal.SIGTERM, stop_gracefully)
|
||||
client = await alerts.get_client()
|
||||
checkers = (checker(checks.temp_check, 5 * 60, client),)
|
||||
checkers = (
|
||||
checker(checks.temp_check, 5 * 60, client),
|
||||
checker(checks.cpu_check, 5 * 60, client),
|
||||
checker(checks.ram_check, 1 * 60, client),
|
||||
)
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
checker_tasks: set[asyncio.Task] = set()
|
||||
for c in checkers:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue