From 4fd3391c704ee7135a500c29c7057a10d698f058 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 27 Jul 2024 18:10:40 +0300 Subject: [PATCH] ram and cpu monitoring --- alerting/alerts.py | 4 ++-- misc/checks.py | 50 ++++++++++++++++++++++++++++++++++++++++++---- misc/sensors.py | 35 +++++++++++++++++++++++++++++++- prettyprint.py | 6 ++++++ service.py | 6 +++++- 5 files changed, 93 insertions(+), 8 deletions(-) diff --git a/alerting/alerts.py b/alerting/alerts.py index 78d71fd..bc78a28 100644 --- a/alerting/alerts.py +++ b/alerting/alerts.py @@ -11,8 +11,8 @@ from alerting.common import CREDS_FILE, ROOM_ID class AlertType(StrEnum): TEST = "TEST" - RAM = "RAM" # TODO - CPU = "CPU" # TODO + RAM = "RAM" + CPU = "CPU" TEMP = "TEMP" LOGIN = "LOGIN" # TODO SMART = "SMART" # TODO diff --git a/misc/checks.py b/misc/checks.py index f74cc40..5e7cc1f 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -1,25 +1,67 @@ from alerting import alerts from misc import sensors +IS_TESTING = False -def temp_check() -> set[alerts.Alert]: + +def temp_check() -> list[alerts.Alert]: alert_list = [] temps = sensors.Sensors.get_temperatures() for _, sensor_list in temps.items(): for sensor in sensor_list: - if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp: + if sensor.sensor_type == "nct6687": + continue # little valuable info and too low limits there, might as well ignore it + if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp): alert = alerts.Alert( alert_type=alerts.AlertType("TEMP"), message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", severity=alerts.Severity.CRITICAL, ) - elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp: + elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp): alert = alerts.Alert( alert_type=alerts.AlertType("TEMP"), - message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", + message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C", severity=alerts.Severity.WARNING, ) else: continue alert_list.append(alert) return alert_list + + +def cpu_check() -> list[alerts.Alert]: + sensor = sensors.Sensors.get_cpu() + if IS_TESTING or sensor.current_load > sensor.critical_load: + alert = alerts.Alert( + alert_type=alerts.AlertType("CPU"), + message=f"{sensor.current_load}% > {sensor.critical_load}%", + severity=alerts.Severity.CRITICAL, + ) + elif IS_TESTING or sensor.current_load > sensor.highest_load: + alert = alerts.Alert( + alert_type=alerts.AlertType("CPU"), + message=f"{sensor.current_load}% > {sensor.highest_load}%", + severity=alerts.Severity.WARNING, + ) + else: + return [] + return [alert] + + +def ram_check() -> list[alerts.Alert]: + sensor = sensors.Sensors.get_ram() + if IS_TESTING or sensor.current_avail < sensor.critical_avail: + alert = alerts.Alert( + alert_type=alerts.AlertType("RAM"), + message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB", + severity=alerts.Severity.CRITICAL, + ) + elif IS_TESTING or sensor.current_avail < sensor.warning_avail: + alert = alerts.Alert( + alert_type=alerts.AlertType("RAM"), + message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB", + severity=alerts.Severity.WARNING, + ) + else: + return [] + return [alert] diff --git a/misc/sensors.py b/misc/sensors.py index 9400dc5..983207a 100644 --- a/misc/sensors.py +++ b/misc/sensors.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from psutil import sensors_temperatures +from psutil import cpu_percent, sensors_temperatures, virtual_memory @dataclass @@ -12,6 +12,20 @@ class TemperatureSensor: critical_temp: float | None = None +@dataclass +class CpuSensor: + current_load: float + highest_load: float = 90 + critical_load: float = 95 + + +@dataclass +class RamSensor: + current_avail: int + warning_avail: int = 4 * 1024**3 + critical_avail: int = 2 * 1024**3 + + class Sensors: @staticmethod def get_temperatures() -> dict[str, list[TemperatureSensor]]: @@ -51,9 +65,28 @@ class Sensors: critical_temp=95.0, # hardcoded because we have R9 7900X ) ) + case "nct6687": + for sensor in sensors: + temp_sensors[s_type].append( + TemperatureSensor( + sensor_type=s_type, + sensor_label=sensor.label, + current_temp=sensor.current, + highest_temp=sensor.high or None, + critical_temp=sensor.critical or None, + ) + ) return temp_sensors + @staticmethod + def get_cpu() -> CpuSensor: + return CpuSensor(current_load=cpu_percent()) + + @staticmethod + def get_ram() -> RamSensor: + return RamSensor(current_avail=virtual_memory().available) + if __name__ == "__main__": for i in Sensors.get_temperatures(): diff --git a/prettyprint.py b/prettyprint.py index 3edc3a9..8867d78 100755 --- a/prettyprint.py +++ b/prettyprint.py @@ -14,6 +14,12 @@ def pretty_print(): for sensors in v: print(f"{sensors.sensor_label}: {sensors.current_temp}°C") + print() + s = Sensors.get_cpu() + print(f"Used CPU: {s.current_load}%") + s = Sensors.get_ram() + print(f"Available RAM: {(s.current_avail / 1024**3):.2f} GiB") + if __name__ == "__main__": pretty_print() diff --git a/service.py b/service.py index 07d9fab..4f13ba5 100755 --- a/service.py +++ b/service.py @@ -39,7 +39,11 @@ async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.A async def main(): signal.signal(signal.SIGTERM, stop_gracefully) client = await alerts.get_client() - checkers = (checker(checks.temp_check, 5 * 60, client),) + checkers = ( + checker(checks.temp_check, 5 * 60, client), + checker(checks.cpu_check, 5 * 60, client), + checker(checks.ram_check, 1 * 60, client), + ) async with asyncio.TaskGroup() as tg: checker_tasks: set[asyncio.Task] = set() for c in checkers: