From 4fd3391c704ee7135a500c29c7057a10d698f058 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 27 Jul 2024 18:10:40 +0300 Subject: [PATCH 1/5] ram and cpu monitoring --- alerting/alerts.py | 4 ++-- misc/checks.py | 50 ++++++++++++++++++++++++++++++++++++++++++---- misc/sensors.py | 35 +++++++++++++++++++++++++++++++- prettyprint.py | 6 ++++++ service.py | 6 +++++- 5 files changed, 93 insertions(+), 8 deletions(-) diff --git a/alerting/alerts.py b/alerting/alerts.py index 78d71fd..bc78a28 100644 --- a/alerting/alerts.py +++ b/alerting/alerts.py @@ -11,8 +11,8 @@ from alerting.common import CREDS_FILE, ROOM_ID class AlertType(StrEnum): TEST = "TEST" - RAM = "RAM" # TODO - CPU = "CPU" # TODO + RAM = "RAM" + CPU = "CPU" TEMP = "TEMP" LOGIN = "LOGIN" # TODO SMART = "SMART" # TODO diff --git a/misc/checks.py b/misc/checks.py index f74cc40..5e7cc1f 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -1,25 +1,67 @@ from alerting import alerts from misc import sensors +IS_TESTING = False -def temp_check() -> set[alerts.Alert]: + +def temp_check() -> list[alerts.Alert]: alert_list = [] temps = sensors.Sensors.get_temperatures() for _, sensor_list in temps.items(): for sensor in sensor_list: - if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp: + if sensor.sensor_type == "nct6687": + continue # little valuable info and too low limits there, might as well ignore it + if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp): alert = alerts.Alert( alert_type=alerts.AlertType("TEMP"), message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", severity=alerts.Severity.CRITICAL, ) - elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp: + elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp): alert = alerts.Alert( alert_type=alerts.AlertType("TEMP"), - message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", + message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C", severity=alerts.Severity.WARNING, ) else: continue alert_list.append(alert) return alert_list + + +def cpu_check() -> list[alerts.Alert]: + sensor = sensors.Sensors.get_cpu() + if IS_TESTING or sensor.current_load > sensor.critical_load: + alert = alerts.Alert( + alert_type=alerts.AlertType("CPU"), + message=f"{sensor.current_load}% > {sensor.critical_load}%", + severity=alerts.Severity.CRITICAL, + ) + elif IS_TESTING or sensor.current_load > sensor.highest_load: + alert = alerts.Alert( + alert_type=alerts.AlertType("CPU"), + message=f"{sensor.current_load}% > {sensor.highest_load}%", + severity=alerts.Severity.WARNING, + ) + else: + return [] + return [alert] + + +def ram_check() -> list[alerts.Alert]: + sensor = sensors.Sensors.get_ram() + if IS_TESTING or sensor.current_avail < sensor.critical_avail: + alert = alerts.Alert( + alert_type=alerts.AlertType("RAM"), + message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB", + severity=alerts.Severity.CRITICAL, + ) + elif IS_TESTING or sensor.current_avail < sensor.warning_avail: + alert = alerts.Alert( + alert_type=alerts.AlertType("RAM"), + message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB", + severity=alerts.Severity.WARNING, + ) + else: + return [] + return [alert] diff --git a/misc/sensors.py b/misc/sensors.py index 9400dc5..983207a 100644 --- a/misc/sensors.py +++ b/misc/sensors.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from psutil import sensors_temperatures +from psutil import cpu_percent, sensors_temperatures, virtual_memory @dataclass @@ -12,6 +12,20 @@ class TemperatureSensor: critical_temp: float | None = None +@dataclass +class CpuSensor: + current_load: float + highest_load: float = 90 + critical_load: float = 95 + + +@dataclass +class RamSensor: + current_avail: int + warning_avail: int = 4 * 1024**3 + critical_avail: int = 2 * 1024**3 + + class Sensors: @staticmethod def get_temperatures() -> dict[str, list[TemperatureSensor]]: @@ -51,9 +65,28 @@ class Sensors: critical_temp=95.0, # hardcoded because we have R9 7900X ) ) + case "nct6687": + for sensor in sensors: + temp_sensors[s_type].append( + TemperatureSensor( + sensor_type=s_type, + sensor_label=sensor.label, + current_temp=sensor.current, + highest_temp=sensor.high or None, + critical_temp=sensor.critical or None, + ) + ) return temp_sensors + @staticmethod + def get_cpu() -> CpuSensor: + return CpuSensor(current_load=cpu_percent()) + + @staticmethod + def get_ram() -> RamSensor: + return RamSensor(current_avail=virtual_memory().available) + if __name__ == "__main__": for i in Sensors.get_temperatures(): diff --git a/prettyprint.py b/prettyprint.py index 3edc3a9..8867d78 100755 --- a/prettyprint.py +++ b/prettyprint.py @@ -14,6 +14,12 @@ def pretty_print(): for sensors in v: print(f"{sensors.sensor_label}: {sensors.current_temp}°C") + print() + s = Sensors.get_cpu() + print(f"Used CPU: {s.current_load}%") + s = Sensors.get_ram() + print(f"Available RAM: {(s.current_avail / 1024**3):.2f} GiB") + if __name__ == "__main__": pretty_print() diff --git a/service.py b/service.py index 07d9fab..4f13ba5 100755 --- a/service.py +++ b/service.py @@ -39,7 +39,11 @@ async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.A async def main(): signal.signal(signal.SIGTERM, stop_gracefully) client = await alerts.get_client() - checkers = (checker(checks.temp_check, 5 * 60, client),) + checkers = ( + checker(checks.temp_check, 5 * 60, client), + checker(checks.cpu_check, 5 * 60, client), + checker(checks.ram_check, 1 * 60, client), + ) async with asyncio.TaskGroup() as tg: checker_tasks: set[asyncio.Task] = set() for c in checkers: From 7455a7b7d6e9f265f5c9ac491c27bf39b9e9b122 Mon Sep 17 00:00:00 2001 From: saqriphnix Date: Sat, 27 Jul 2024 19:09:33 +0300 Subject: [PATCH 2/5] skip duplacate sensors --- misc/sensors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/misc/sensors.py b/misc/sensors.py index 983207a..c416771 100644 --- a/misc/sensors.py +++ b/misc/sensors.py @@ -66,7 +66,10 @@ class Sensors: ) ) case "nct6687": - for sensor in sensors: + # skipping first 8 elements as they're not informative + # while the last elements duplicate the first ones, + # but with more understandable names + for sensor in sensors[7::]: temp_sensors[s_type].append( TemperatureSensor( sensor_type=s_type, From 4dbd81b352927d6afa5a591e6a06d883e8246b94 Mon Sep 17 00:00:00 2001 From: saqriphnix Date: Sat, 27 Jul 2024 19:16:40 +0300 Subject: [PATCH 3/5] RAM percentage recording. removed unnecessary sensors --- misc/sensors.py | 7 +++++-- prettyprint.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/misc/sensors.py b/misc/sensors.py index c416771..c6d55bd 100644 --- a/misc/sensors.py +++ b/misc/sensors.py @@ -22,6 +22,7 @@ class CpuSensor: @dataclass class RamSensor: current_avail: int + current_avail_percentage: float warning_avail: int = 4 * 1024**3 critical_avail: int = 2 * 1024**3 @@ -69,7 +70,7 @@ class Sensors: # skipping first 8 elements as they're not informative # while the last elements duplicate the first ones, # but with more understandable names - for sensor in sensors[7::]: + for sensor in sensors[7:-2:]: temp_sensors[s_type].append( TemperatureSensor( sensor_type=s_type, @@ -88,7 +89,9 @@ class Sensors: @staticmethod def get_ram() -> RamSensor: - return RamSensor(current_avail=virtual_memory().available) + ram = virtual_memory() + return RamSensor(current_avail=ram.available, + current_avail_percentage=ram.percent) if __name__ == "__main__": diff --git a/prettyprint.py b/prettyprint.py index 8867d78..0cdf5ef 100755 --- a/prettyprint.py +++ b/prettyprint.py @@ -18,7 +18,7 @@ def pretty_print(): s = Sensors.get_cpu() print(f"Used CPU: {s.current_load}%") s = Sensors.get_ram() - print(f"Available RAM: {(s.current_avail / 1024**3):.2f} GiB") + print(f"Available RAM: {(s.current_avail / 1024**3):.2f} ({s.current_avail_percentage}%) GiB") if __name__ == "__main__": From 630f3d0f9150f7966765a213553b272a7dafb76a Mon Sep 17 00:00:00 2001 From: saqriphnix Date: Sat, 27 Jul 2024 19:20:40 +0300 Subject: [PATCH 4/5] unused imports removal --- prettyprint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prettyprint.py b/prettyprint.py index 0cdf5ef..cb677ad 100755 --- a/prettyprint.py +++ b/prettyprint.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from colorama import Back, Fore, Style +from colorama import Back, Style from misc.sensors import Sensors From 791176aae11e061d1637827aac42915fc81cd5f5 Mon Sep 17 00:00:00 2001 From: saqriphnix Date: Sat, 27 Jul 2024 19:43:50 +0300 Subject: [PATCH 5/5] replace lables --- misc/sensors.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/misc/sensors.py b/misc/sensors.py index c6d55bd..b1b3f1b 100644 --- a/misc/sensors.py +++ b/misc/sensors.py @@ -67,14 +67,20 @@ class Sensors: ) ) case "nct6687": - # skipping first 8 elements as they're not informative - # while the last elements duplicate the first ones, - # but with more understandable names - for sensor in sensors[7:-2:]: + lables = { + "AMD TSI Addr 98h": "CPU", + "Diode 0 (curr)": "System", + "Thermistor 15": "VRM MOSFET", + "Thermistor 1": "Platform Controller Hub (Peripherals)", + "Thermistor 16": "CPU Socket", + } + + for sensor in sensors[:-2]: + real_label = lables[sensor.label] temp_sensors[s_type].append( TemperatureSensor( sensor_type=s_type, - sensor_label=sensor.label, + sensor_label=real_label, current_temp=sensor.current, highest_temp=sensor.high or None, critical_temp=sensor.critical or None,