Merge branch 'ups-monitoring' into 'main'

UPS monitoring

See merge request lego/lego-monitoring!3
This commit is contained in:
Alex Tau 2024-08-17 11:25:48 +00:00
commit 1e1ebbb809
5 changed files with 172 additions and 5 deletions

View file

@ -19,6 +19,7 @@ class AlertType(StrEnum):
LOGIN = "LOGIN" # TODO LOGIN = "LOGIN" # TODO
SMART = "SMART" # TODO SMART = "SMART" # TODO
RAID = "RAID" # TODO RAID = "RAID" # TODO
UPS = "UPS"
class Severity(Enum): class Severity(Enum):

View file

@ -1,5 +1,8 @@
from datetime import timedelta
from alerting import alerts from alerting import alerts
from misc import sensors, vuln from misc import sensors, vuln
from misc.enums import UPSStatus
IS_TESTING = False IS_TESTING = False
@ -92,3 +95,101 @@ async def vuln_check() -> list[alerts.Alert]:
) )
alert_list.append(alert) alert_list.append(alert)
return alert_list return alert_list
async def ups_check() -> list[alerts.Alert]:
sensor = await sensors.Sensors.get_ups()
if not sensor:
return
alert_list = []
if IS_TESTING or sensor.battery_charge_percentage < sensor.battery_critical_percentage:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"Battery is under {sensor.battery_critical_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=alerts.Severity.CRITICAL,
)
)
elif IS_TESTING or sensor.battery_charge_percentage < sensor.battery_warning_percentage:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"Battery is under {sensor.battery_warning_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=alerts.Severity.WARNING,
)
)
for status in sensor.ups_status:
if IS_TESTING or status == UPSStatus.BATTERY_REPLACE:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"UPS battery needs to be replaced ASAP!",
severity=alerts.Severity.CRITICAL,
)
)
elif IS_TESTING or status == UPSStatus.UPS_OVERLOAD:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS, message=f"UPS is overloaded!", severity=alerts.Severity.CRITICAL
)
)
elif IS_TESTING or status == UPSStatus.UPS_BYPASS:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"BYPASS MODE: Battery and connected devices are not protected from power outage!",
severity=alerts.Severity.WARNING,
)
)
elif IS_TESTING or status == UPSStatus.UPS_CALIBRATION:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"UPS is currently performing runtime calibration.",
severity=alerts.Severity.INFO,
)
)
elif IS_TESTING or status == UPSStatus.ON_BATTERY:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"UPS is on battery.\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=alerts.Severity.INFO,
)
)
elif IS_TESTING or status == UPSStatus.UPS_OFFLINE:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS, message=f"UPS seems to be offline.", severity=alerts.Severity.INFO
)
)
elif IS_TESTING or status == UPSStatus.UPS_TRIM:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"Overvoltage detected: trimming voltage to nominal.",
severity=alerts.Severity.INFO,
)
)
elif IS_TESTING or status == UPSStatus.UPS_BOOST:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"Undervoltage detected: boosting voltage to nominal.",
severity=alerts.Severity.INFO,
)
)
elif IS_TESTING or status == UPSStatus.UPS_FSD:
alert_list.append(
alerts.Alert(
alert_type=alerts.AlertType.UPS,
message=f"Shutdown imminent!",
severity=alerts.Severity.CRITICAL,
)
)
return alert_list

21
misc/enums.py Normal file
View file

@ -0,0 +1,21 @@
from enum import StrEnum
class UPSStatus(StrEnum):
"""https://networkupstools.org/docs/developer-guide.chunked/new-drivers.html#_status_data"""
ON_LINE = "OL"
ON_BATTERY = "OB"
BATTERY_LOW = "LB"
BATTERY_HIGH = "HB"
BATTERY_REPLACE = "RB"
BATTERY_CHARGING = "CHRG"
BATTERY_DISCHARGING = "DISCHRG"
UPS_BYPASS = "BYPASS"
"""Battery and connected devices are not protected from power outage!"""
UPS_OFFLINE = "OFF"
UPS_OVERLOAD = "OVER"
UPS_CALIBRATION = "CAL"
UPS_TRIM = "TRIM"
UPS_BOOST = "BOOST"
UPS_FSD = "FSD"

View file

@ -1,7 +1,11 @@
import subprocess
from dataclasses import dataclass from dataclasses import dataclass
from psutil import cpu_percent, sensors_temperatures, virtual_memory from psutil import cpu_percent, sensors_temperatures, virtual_memory
from .enums import UPSStatus
from alerting import alerts
@dataclass @dataclass
class TemperatureSensor: class TemperatureSensor:
@ -27,6 +31,15 @@ class RamSensor:
critical_avail: int = 2 * 1024**3 critical_avail: int = 2 * 1024**3
@dataclass
class UPSSensor:
ups_status: list[UPSStatus] = None
battery_charge_percentage: int = None
battery_warning_percentage: int = 20
battery_critical_percentage: int = 10
battery_runtime: int = 1000
class Sensors: class Sensors:
@staticmethod @staticmethod
def get_temperatures() -> dict[str, list[TemperatureSensor]]: def get_temperatures() -> dict[str, list[TemperatureSensor]]:
@ -96,10 +109,40 @@ class Sensors:
@staticmethod @staticmethod
def get_ram() -> RamSensor: def get_ram() -> RamSensor:
ram = virtual_memory() ram = virtual_memory()
return RamSensor(current_avail=ram.available, return RamSensor(current_avail=ram.available, current_avail_percentage=ram.percent)
current_avail_percentage=ram.percent)
@staticmethod
async def get_ups() -> None | UPSSensor:
try:
raw_data = subprocess.run(["upsc", "cp1300"], stdout=subprocess.PIPE, encoding="utf-8")
except FileNotFoundError:
await alerts.send_alert(alerts.Alert(
alert_type=alerts.AlertType.ERROR,
message="upsc is not installed!",
severity=alerts.Severity.CRITICAL
))
return None
if __name__ == "__main__": sensor_data = UPSSensor()
for i in Sensors.get_temperatures():
print(i) for line in raw_data.stdout.splitlines():
sensor, value = line.split(": ")[:2]
match sensor:
case "battery.charge":
sensor_data.battery_charge_percentage = int(value)
case "battery.charge.low":
# ? in case we need to evaluate critical% from sensor
# sensor_data.battery_critical_percentage = int(value)
sensor_data.battery_critical_percentage = 25
case "battery.charge.warning":
# ? in case we need to evaluate warning% from sensor
# sensor_data.battery_warning_percentage = int(value)
sensor_data.battery_warning_percentage = 50
case "battery.runtime":
sensor_data.battery_runtime = int(value)
case "ups.status":
sensor_data.ups_status = [UPSStatus(status) for status in value.split()]
case _:
...
return sensor_data

View file

@ -47,6 +47,7 @@ async def main():
checkers = ( checkers = (
checker(checks.temp_check, 5 * MINUTE, client), checker(checks.temp_check, 5 * MINUTE, client),
checker(checks.cpu_check, 5 * MINUTE, client), checker(checks.cpu_check, 5 * MINUTE, client),
checker(checks.ups_check, 5 * MINUTE, client),
checker(checks.ram_check, 1 * MINUTE, client), checker(checks.ram_check, 1 * MINUTE, client),
checker(checks.vuln_check, 1 * DAY, client), checker(checks.vuln_check, 1 * DAY, client),
) )