mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
ram and cpu monitoring
This commit is contained in:
parent
5bbcf95015
commit
4fd3391c70
5 changed files with 93 additions and 8 deletions
|
|
@ -11,8 +11,8 @@ from alerting.common import CREDS_FILE, ROOM_ID
|
||||||
|
|
||||||
class AlertType(StrEnum):
|
class AlertType(StrEnum):
|
||||||
TEST = "TEST"
|
TEST = "TEST"
|
||||||
RAM = "RAM" # TODO
|
RAM = "RAM"
|
||||||
CPU = "CPU" # TODO
|
CPU = "CPU"
|
||||||
TEMP = "TEMP"
|
TEMP = "TEMP"
|
||||||
LOGIN = "LOGIN" # TODO
|
LOGIN = "LOGIN" # TODO
|
||||||
SMART = "SMART" # TODO
|
SMART = "SMART" # TODO
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,67 @@
|
||||||
from alerting import alerts
|
from alerting import alerts
|
||||||
from misc import sensors
|
from misc import sensors
|
||||||
|
|
||||||
|
IS_TESTING = False
|
||||||
|
|
||||||
def temp_check() -> set[alerts.Alert]:
|
|
||||||
|
def temp_check() -> list[alerts.Alert]:
|
||||||
alert_list = []
|
alert_list = []
|
||||||
temps = sensors.Sensors.get_temperatures()
|
temps = sensors.Sensors.get_temperatures()
|
||||||
for _, sensor_list in temps.items():
|
for _, sensor_list in temps.items():
|
||||||
for sensor in sensor_list:
|
for sensor in sensor_list:
|
||||||
if sensor.critical_temp is not None and sensor.current_temp > sensor.critical_temp:
|
if sensor.sensor_type == "nct6687":
|
||||||
|
continue # little valuable info and too low limits there, might as well ignore it
|
||||||
|
if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp):
|
||||||
alert = alerts.Alert(
|
alert = alerts.Alert(
|
||||||
alert_type=alerts.AlertType("TEMP"),
|
alert_type=alerts.AlertType("TEMP"),
|
||||||
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
||||||
severity=alerts.Severity.CRITICAL,
|
severity=alerts.Severity.CRITICAL,
|
||||||
)
|
)
|
||||||
elif sensor.highest_temp is not None and sensor.current_temp > sensor.highest_temp:
|
elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp):
|
||||||
alert = alerts.Alert(
|
alert = alerts.Alert(
|
||||||
alert_type=alerts.AlertType("TEMP"),
|
alert_type=alerts.AlertType("TEMP"),
|
||||||
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
|
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C",
|
||||||
severity=alerts.Severity.WARNING,
|
severity=alerts.Severity.WARNING,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
alert_list.append(alert)
|
alert_list.append(alert)
|
||||||
return alert_list
|
return alert_list
|
||||||
|
|
||||||
|
|
||||||
|
def cpu_check() -> list[alerts.Alert]:
|
||||||
|
sensor = sensors.Sensors.get_cpu()
|
||||||
|
if IS_TESTING or sensor.current_load > sensor.critical_load:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("CPU"),
|
||||||
|
message=f"{sensor.current_load}% > {sensor.critical_load}%",
|
||||||
|
severity=alerts.Severity.CRITICAL,
|
||||||
|
)
|
||||||
|
elif IS_TESTING or sensor.current_load > sensor.highest_load:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("CPU"),
|
||||||
|
message=f"{sensor.current_load}% > {sensor.highest_load}%",
|
||||||
|
severity=alerts.Severity.WARNING,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
return [alert]
|
||||||
|
|
||||||
|
|
||||||
|
def ram_check() -> list[alerts.Alert]:
|
||||||
|
sensor = sensors.Sensors.get_ram()
|
||||||
|
if IS_TESTING or sensor.current_avail < sensor.critical_avail:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("RAM"),
|
||||||
|
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB",
|
||||||
|
severity=alerts.Severity.CRITICAL,
|
||||||
|
)
|
||||||
|
elif IS_TESTING or sensor.current_avail < sensor.warning_avail:
|
||||||
|
alert = alerts.Alert(
|
||||||
|
alert_type=alerts.AlertType("RAM"),
|
||||||
|
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB",
|
||||||
|
severity=alerts.Severity.WARNING,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
return [alert]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from psutil import sensors_temperatures
|
from psutil import cpu_percent, sensors_temperatures, virtual_memory
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -12,6 +12,20 @@ class TemperatureSensor:
|
||||||
critical_temp: float | None = None
|
critical_temp: float | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CpuSensor:
|
||||||
|
current_load: float
|
||||||
|
highest_load: float = 90
|
||||||
|
critical_load: float = 95
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RamSensor:
|
||||||
|
current_avail: int
|
||||||
|
warning_avail: int = 4 * 1024**3
|
||||||
|
critical_avail: int = 2 * 1024**3
|
||||||
|
|
||||||
|
|
||||||
class Sensors:
|
class Sensors:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
||||||
|
|
@ -51,9 +65,28 @@ class Sensors:
|
||||||
critical_temp=95.0, # hardcoded because we have R9 7900X
|
critical_temp=95.0, # hardcoded because we have R9 7900X
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
case "nct6687":
|
||||||
|
for sensor in sensors:
|
||||||
|
temp_sensors[s_type].append(
|
||||||
|
TemperatureSensor(
|
||||||
|
sensor_type=s_type,
|
||||||
|
sensor_label=sensor.label,
|
||||||
|
current_temp=sensor.current,
|
||||||
|
highest_temp=sensor.high or None,
|
||||||
|
critical_temp=sensor.critical or None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return temp_sensors
|
return temp_sensors
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_cpu() -> CpuSensor:
|
||||||
|
return CpuSensor(current_load=cpu_percent())
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_ram() -> RamSensor:
|
||||||
|
return RamSensor(current_avail=virtual_memory().available)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
for i in Sensors.get_temperatures():
|
for i in Sensors.get_temperatures():
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,12 @@ def pretty_print():
|
||||||
for sensors in v:
|
for sensors in v:
|
||||||
print(f"{sensors.sensor_label}: {sensors.current_temp}°C")
|
print(f"{sensors.sensor_label}: {sensors.current_temp}°C")
|
||||||
|
|
||||||
|
print()
|
||||||
|
s = Sensors.get_cpu()
|
||||||
|
print(f"Used CPU: {s.current_load}%")
|
||||||
|
s = Sensors.get_ram()
|
||||||
|
print(f"Available RAM: {(s.current_avail / 1024**3):.2f} GiB")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pretty_print()
|
pretty_print()
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,11 @@ async def checker(check: Callable | Coroutine, interval_secs: int, client: nio.A
|
||||||
async def main():
|
async def main():
|
||||||
signal.signal(signal.SIGTERM, stop_gracefully)
|
signal.signal(signal.SIGTERM, stop_gracefully)
|
||||||
client = await alerts.get_client()
|
client = await alerts.get_client()
|
||||||
checkers = (checker(checks.temp_check, 5 * 60, client),)
|
checkers = (
|
||||||
|
checker(checks.temp_check, 5 * 60, client),
|
||||||
|
checker(checks.cpu_check, 5 * 60, client),
|
||||||
|
checker(checks.ram_check, 1 * 60, client),
|
||||||
|
)
|
||||||
async with asyncio.TaskGroup() as tg:
|
async with asyncio.TaskGroup() as tg:
|
||||||
checker_tasks: set[asyncio.Task] = set()
|
checker_tasks: set[asyncio.Task] = set()
|
||||||
for c in checkers:
|
for c in checkers:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue