diff --git a/alerting/alerts.py b/alerting/alerts.py index 78d71fd..bc78a28 100644 --- a/alerting/alerts.py +++ b/alerting/alerts.py @@ -11,8 +11,8 @@ from alerting.common import CREDS_FILE, ROOM_ID class AlertType(StrEnum): TEST = "TEST" - RAM = "RAM" # TODO - CPU = "CPU" # TODO + RAM = "RAM" + CPU = "CPU" TEMP = "TEMP" LOGIN = "LOGIN" # TODO SMART = "SMART" # TODO diff --git a/misc/checks.py b/misc/checks.py index f74cc40..5e7cc1f 100644 --- a/misc/checks.py +++ b/misc/checks.py @@ -1,25 +1,67 @@ from alerting import alerts from misc import sensors +IS_TESTING = False -def temp_check() -> set[alerts.Alert]: + +def temp_check() -> list[alerts.Alert]: alert_list = [] temps = sensors.Sensors.get_temperatures() for _, sensor_list in temps.items(): for sensor in sensor_list: - if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp: + if sensor.sensor_type == "nct6687": + continue # little valuable info and too low limits there, might as well ignore it + if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp): alert = alerts.Alert( alert_type=alerts.AlertType("TEMP"), message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", severity=alerts.Severity.CRITICAL, ) - elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp: + elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp): alert = alerts.Alert( alert_type=alerts.AlertType("TEMP"), - message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", + message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C", severity=alerts.Severity.WARNING, ) else: continue alert_list.append(alert) return alert_list + + +def cpu_check() -> list[alerts.Alert]: + sensor = sensors.Sensors.get_cpu() + if IS_TESTING or sensor.current_load > sensor.critical_load: + alert = alerts.Alert( + alert_type=alerts.AlertType("CPU"), + message=f"{sensor.current_load}% > {sensor.critical_load}%", + severity=alerts.Severity.CRITICAL, + ) + elif IS_TESTING or sensor.current_load > sensor.highest_load: + alert = alerts.Alert( + alert_type=alerts.AlertType("CPU"), + message=f"{sensor.current_load}% > {sensor.highest_load}%", + severity=alerts.Severity.WARNING, + ) + else: + return [] + return [alert] + + +def ram_check() -> list[alerts.Alert]: + sensor = sensors.Sensors.get_ram() + if IS_TESTING or sensor.current_avail < sensor.critical_avail: + alert = alerts.Alert( + alert_type=alerts.AlertType("RAM"), + message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB", + severity=alerts.Severity.CRITICAL, + ) + elif IS_TESTING or sensor.current_avail < sensor.warning_avail: + alert = alerts.Alert( + alert_type=alerts.AlertType("RAM"), + message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB", + severity=alerts.Severity.WARNING, + ) + else: + return [] + return [alert] diff --git a/misc/sensors.py b/misc/sensors.py index 9400dc5..b1b3f1b 100644 --- a/misc/sensors.py +++ b/misc/sensors.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from psutil import sensors_temperatures +from psutil import cpu_percent, sensors_temperatures, virtual_memory @dataclass @@ -12,6 +12,21 @@ class TemperatureSensor: critical_temp: float | None = None +@dataclass +class CpuSensor: + current_load: float + highest_load: float = 90 + critical_load: float = 95 + + +@dataclass +class RamSensor: + current_avail: int + current_avail_percentage: float + warning_avail: int = 4 * 1024**3 + critical_avail: int = 2 * 1024**3 + + class Sensors: @staticmethod def get_temperatures() -> dict[str, list[TemperatureSensor]]: @@ -51,9 +66,39 @@ class Sensors: critical_temp=95.0, # hardcoded because we have R9 7900X ) ) + case "nct6687": + lables = { + "AMD TSI Addr 98h": "CPU", + "Diode 0 (curr)": "System", + "Thermistor 15": "VRM MOSFET", + "Thermistor 1": "Platform Controller Hub (Peripherals)", + "Thermistor 16": "CPU Socket", + } + + for sensor in sensors[:-2]: + real_label = lables[sensor.label] + temp_sensors[s_type].append( + TemperatureSensor( + sensor_type=s_type, + sensor_label=real_label, + current_temp=sensor.current, + highest_temp=sensor.high or None, + critical_temp=sensor.critical or None, + ) + ) return temp_sensors + @staticmethod + def get_cpu() -> CpuSensor: + return CpuSensor(current_load=cpu_percent()) + + @staticmethod + def get_ram() -> RamSensor: + ram = virtual_memory() + return RamSensor(current_avail=ram.available, + current_avail_percentage=ram.percent) + if __name__ == "__main__": for i in Sensors.get_temperatures(): diff --git a/prettyprint.py b/prettyprint.py index 3edc3a9..cb677ad 100755 --- a/prettyprint.py +++ b/prettyprint.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from colorama import Back, Fore, Style +from colorama import Back, Style from misc.sensors import Sensors @@ -14,6 +14,12 @@ def pretty_print(): for sensors in v: print(f"{sensors.sensor_label}: {sensors.current_temp}°C") + print() + s = Sensors.get_cpu() + print(f"Used CPU: {s.current_load}%") + s = Sensors.get_ram() + print(f"Available RAM: {(s.current_avail / 1024**3):.2f} ({s.current_avail_percentage}%) GiB") + if __name__ == "__main__": pretty_print() diff --git a/service.py b/service.py index 07d9fab..4f13ba5 100755 --- a/service.py +++ b/service.py @@ -39,7 +39,11 @@ async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.A async def main(): signal.signal(signal.SIGTERM, stop_gracefully) client = await alerts.get_client() - checkers = (checker(checks.temp_check, 5 * 60, client),) + checkers = ( + checker(checks.temp_check, 5 * 60, client), + checker(checks.cpu_check, 5 * 60, client), + checker(checks.ram_check, 1 * 60, client), + ) async with asyncio.TaskGroup() as tg: checker_tasks: set[asyncio.Task] = set() for c in checkers: