ram and cpu monitoring

This commit is contained in:
Alex 2024-07-27 18:10:40 +03:00
parent 5bbcf95015
commit 4fd3391c70
5 changed files with 93 additions and 8 deletions

View file

@ -11,8 +11,8 @@ from alerting.common import CREDS_FILE, ROOM_ID
class AlertType(StrEnum): class AlertType(StrEnum):
TEST = "TEST" TEST = "TEST"
RAM = "RAM" # TODO RAM = "RAM"
CPU = "CPU" # TODO CPU = "CPU"
TEMP = "TEMP" TEMP = "TEMP"
LOGIN = "LOGIN" # TODO LOGIN = "LOGIN" # TODO
SMART = "SMART" # TODO SMART = "SMART" # TODO

View file

@ -1,25 +1,67 @@
from alerting import alerts from alerting import alerts
from misc import sensors from misc import sensors
IS_TESTING = False
def temp_check() -> set[alerts.Alert]:
def temp_check() -> list[alerts.Alert]:
alert_list = [] alert_list = []
temps = sensors.Sensors.get_temperatures() temps = sensors.Sensors.get_temperatures()
for _, sensor_list in temps.items(): for _, sensor_list in temps.items():
for sensor in sensor_list: for sensor in sensor_list:
if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp: if sensor.sensor_type == "nct6687":
continue # little valuable info and too low limits there, might as well ignore it
if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp):
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("TEMP"), alert_type=alerts.AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
severity=alerts.Severity.CRITICAL, severity=alerts.Severity.CRITICAL,
) )
elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp: elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp):
alert = alerts.Alert( alert = alerts.Alert(
alert_type=alerts.AlertType("TEMP"), alert_type=alerts.AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C", message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C",
severity=alerts.Severity.WARNING, severity=alerts.Severity.WARNING,
) )
else: else:
continue continue
alert_list.append(alert) alert_list.append(alert)
return alert_list return alert_list
def cpu_check() -> list[alerts.Alert]:
sensor = sensors.Sensors.get_cpu()
if IS_TESTING or sensor.current_load > sensor.critical_load:
alert = alerts.Alert(
alert_type=alerts.AlertType("CPU"),
message=f"{sensor.current_load}% > {sensor.critical_load}%",
severity=alerts.Severity.CRITICAL,
)
elif IS_TESTING or sensor.current_load > sensor.highest_load:
alert = alerts.Alert(
alert_type=alerts.AlertType("CPU"),
message=f"{sensor.current_load}% > {sensor.highest_load}%",
severity=alerts.Severity.WARNING,
)
else:
return []
return [alert]
def ram_check() -> list[alerts.Alert]:
sensor = sensors.Sensors.get_ram()
if IS_TESTING or sensor.current_avail < sensor.critical_avail:
alert = alerts.Alert(
alert_type=alerts.AlertType("RAM"),
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB",
severity=alerts.Severity.CRITICAL,
)
elif IS_TESTING or sensor.current_avail < sensor.warning_avail:
alert = alerts.Alert(
alert_type=alerts.AlertType("RAM"),
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB",
severity=alerts.Severity.WARNING,
)
else:
return []
return [alert]

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass
from psutil import sensors_temperatures from psutil import cpu_percent, sensors_temperatures, virtual_memory
@dataclass @dataclass
@ -12,6 +12,20 @@ class TemperatureSensor:
critical_temp: float | None = None critical_temp: float | None = None
@dataclass
class CpuSensor:
current_load: float
highest_load: float = 90
critical_load: float = 95
@dataclass
class RamSensor:
current_avail: int
warning_avail: int = 4 * 1024**3
critical_avail: int = 2 * 1024**3
class Sensors: class Sensors:
@staticmethod @staticmethod
def get_temperatures() -> dict[str, list[TemperatureSensor]]: def get_temperatures() -> dict[str, list[TemperatureSensor]]:
@ -51,9 +65,28 @@ class Sensors:
critical_temp=95.0, # hardcoded because we have R9 7900X critical_temp=95.0, # hardcoded because we have R9 7900X
) )
) )
case "nct6687":
for sensor in sensors:
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label=sensor.label,
current_temp=sensor.current,
highest_temp=sensor.high or None,
critical_temp=sensor.critical or None,
)
)
return temp_sensors return temp_sensors
@staticmethod
def get_cpu() -> CpuSensor:
return CpuSensor(current_load=cpu_percent())
@staticmethod
def get_ram() -> RamSensor:
return RamSensor(current_avail=virtual_memory().available)
if __name__ == "__main__": if __name__ == "__main__":
for i in Sensors.get_temperatures(): for i in Sensors.get_temperatures():

View file

@ -14,6 +14,12 @@ def pretty_print():
for sensors in v: for sensors in v:
print(f"{sensors.sensor_label}: {sensors.current_temp}°C") print(f"{sensors.sensor_label}: {sensors.current_temp}°C")
print()
s = Sensors.get_cpu()
print(f"Used CPU: {s.current_load}%")
s = Sensors.get_ram()
print(f"Available RAM: {(s.current_avail / 1024**3):.2f} GiB")
if __name__ == "__main__": if __name__ == "__main__":
pretty_print() pretty_print()

View file

@ -39,7 +39,11 @@ async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.A
async def main(): async def main():
signal.signal(signal.SIGTERM, stop_gracefully) signal.signal(signal.SIGTERM, stop_gracefully)
client = await alerts.get_client() client = await alerts.get_client()
checkers = (checker(checks.temp_check, 5 * 60, client),) checkers = (
checker(checks.temp_check, 5 * 60, client),
checker(checks.cpu_check, 5 * 60, client),
checker(checks.ram_check, 1 * 60, client),
)
async with asyncio.TaskGroup() as tg: async with asyncio.TaskGroup() as tg:
checker_tasks: set[asyncio.Task] = set() checker_tasks: set[asyncio.Task] = set()
for c in checkers: for c in checkers: